illumos-merge Wdiff usr/src/uts/common/fs/zfs/zfs_ctldir.c

Print this page

2882 implement libzfs_core
2883 changing "canmount" property to "on" should not always remount dataset
2900 "zfs snapshot" should be able to create multiple, arbitrary snapshots at once
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Chris Siden <christopher.siden@delphix.com>
Reviewed by: Garrett D'Amore <garrett@damore.org>
Reviewed by: Bill Pijewski <wdp@joyent.com>
Reviewed by: Dan Kruchinin <dan.kruchinin@gmail.com>

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/zfs/zfs_ctldir.c
          +++ new/usr/src/uts/common/fs/zfs/zfs_ctldir.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *

↓ open down ↓

12 lines elided

↑ open up ↑

  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
       23 + * Copyright (c) 2012 by Delphix. All rights reserved.
  23   24   */
  24   25  
  25   26  /*
  26   27   * ZFS control directory (a.k.a. ".zfs")
  27   28   *
  28   29   * This directory provides a common location for all ZFS meta-objects.
  29   30   * Currently, this is only the 'snapshot' directory, but this may expand in the
  30   31   * future.  The elements are built using the GFS primitives, as the hierarchy
  31   32   * does not actually exist on disk.
  32   33   *

  33   34   * For 'snapshot', we don't want to have all snapshots always mounted, because
  34   35   * this would take up a huge amount of space in /etc/mnttab.  We have three
  35   36   * types of objects:
  36   37   *
  37   38   *      ctldir ------> snapshotdir -------> snapshot
  38   39   *                                             |
  39   40   *                                             |
  40   41   *                                             V
  41   42   *                                         mounted fs
  42   43   *
  43   44   * The 'snapshot' node contains just enough information to lookup '..' and act
  44   45   * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
  45   46   * perform an automount of the underlying filesystem and return the
  46   47   * corresponding vnode.
  47   48   *
  48   49   * All mounts are handled automatically by the kernel, but unmounts are
  49   50   * (currently) handled from user land.  The main reason is that there is no
  50   51   * reliable way to auto-unmount the filesystem when it's "no longer in use".
  51   52   * When the user unmounts a filesystem, we call zfsctl_unmount(), which
  52   53   * unmounts any snapshots within the snapshot directory.
  53   54   *
  54   55   * The '.zfs', '.zfs/snapshot', and all directories created under
  55   56   * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and
  56   57   * share the same vfs_t as the head filesystem (what '.zfs' lives under).
  57   58   *
  58   59   * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>'
  59   60   * (ie: snapshots) are ZFS nodes and have their own unique vfs_t.
  60   61   * However, vnodes within these mounted on file systems have their v_vfsp
  61   62   * fields set to the head filesystem to make NFS happy (see
  62   63   * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t
  63   64   * so that it cannot be freed until all snapshots have been unmounted.
  64   65   */
  65   66  
  66   67  #include <fs/fs_subr.h>
  67   68  #include <sys/zfs_ctldir.h>
  68   69  #include <sys/zfs_ioctl.h>
  69   70  #include <sys/zfs_vfsops.h>
  70   71  #include <sys/vfs_opreg.h>
  71   72  #include <sys/gfs.h>
  72   73  #include <sys/stat.h>
  73   74  #include <sys/dmu.h>
  74   75  #include <sys/dsl_deleg.h>
  75   76  #include <sys/mount.h>
  76   77  #include <sys/sunddi.h>
  77   78  
  78   79  #include "zfs_namecheck.h"
  79   80  
  80   81  typedef struct zfsctl_node {
  81   82          gfs_dir_t       zc_gfs_private;
  82   83          uint64_t        zc_id;
  83   84          timestruc_t     zc_cmtime;      /* ctime and mtime, always the same */
  84   85  } zfsctl_node_t;
  85   86  
  86   87  typedef struct zfsctl_snapdir {
  87   88          zfsctl_node_t   sd_node;
  88   89          kmutex_t        sd_lock;
  89   90          avl_tree_t      sd_snaps;
  90   91  } zfsctl_snapdir_t;
  91   92  
  92   93  typedef struct {
  93   94          char            *se_name;
  94   95          vnode_t         *se_root;
  95   96          avl_node_t      se_node;
  96   97  } zfs_snapentry_t;
  97   98  
  98   99  static int
  99  100  snapentry_compare(const void *a, const void *b)
 100  101  {
 101  102          const zfs_snapentry_t *sa = a;
 102  103          const zfs_snapentry_t *sb = b;
 103  104          int ret = strcmp(sa->se_name, sb->se_name);
 104  105  
 105  106          if (ret < 0)
 106  107                  return (-1);
 107  108          else if (ret > 0)
 108  109                  return (1);
 109  110          else
 110  111                  return (0);
 111  112  }
 112  113  
 113  114  vnodeops_t *zfsctl_ops_root;
 114  115  vnodeops_t *zfsctl_ops_snapdir;
 115  116  vnodeops_t *zfsctl_ops_snapshot;
 116  117  vnodeops_t *zfsctl_ops_shares;
 117  118  vnodeops_t *zfsctl_ops_shares_dir;
 118  119  
 119  120  static const fs_operation_def_t zfsctl_tops_root[];
 120  121  static const fs_operation_def_t zfsctl_tops_snapdir[];
 121  122  static const fs_operation_def_t zfsctl_tops_snapshot[];
 122  123  static const fs_operation_def_t zfsctl_tops_shares[];
 123  124  
 124  125  static vnode_t *zfsctl_mknode_snapdir(vnode_t *);
 125  126  static vnode_t *zfsctl_mknode_shares(vnode_t *);
 126  127  static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset);
 127  128  static int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *);
 128  129  
 129  130  static gfs_opsvec_t zfsctl_opsvec[] = {
 130  131          { ".zfs", zfsctl_tops_root, &zfsctl_ops_root },
 131  132          { ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir },
 132  133          { ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot },
 133  134          { ".zfs/shares", zfsctl_tops_shares, &zfsctl_ops_shares_dir },
 134  135          { ".zfs/shares/vnode", zfsctl_tops_shares, &zfsctl_ops_shares },
 135  136          { NULL }
 136  137  };
 137  138  
 138  139  /*
 139  140   * Root directory elements.  We only have two entries
 140  141   * snapshot and shares.
 141  142   */
 142  143  static gfs_dirent_t zfsctl_root_entries[] = {
 143  144          { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE },
 144  145          { "shares", zfsctl_mknode_shares, GFS_CACHE_VNODE },
 145  146          { NULL }
 146  147  };
 147  148  
 148  149  /* include . and .. in the calculation */
 149  150  #define NROOT_ENTRIES   ((sizeof (zfsctl_root_entries) / \
 150  151      sizeof (gfs_dirent_t)) + 1)
 151  152  
 152  153  
 153  154  /*
 154  155   * Initialize the various GFS pieces we'll need to create and manipulate .zfs
 155  156   * directories.  This is called from the ZFS init routine, and initializes the
 156  157   * vnode ops vectors that we'll be using.
 157  158   */
 158  159  void
 159  160  zfsctl_init(void)
 160  161  {
 161  162          VERIFY(gfs_make_opsvec(zfsctl_opsvec) == 0);
 162  163  }
 163  164  
 164  165  void
 165  166  zfsctl_fini(void)
 166  167  {
 167  168          /*
 168  169           * Remove vfsctl vnode ops
 169  170           */
 170  171          if (zfsctl_ops_root)
 171  172                  vn_freevnodeops(zfsctl_ops_root);
 172  173          if (zfsctl_ops_snapdir)
 173  174                  vn_freevnodeops(zfsctl_ops_snapdir);
 174  175          if (zfsctl_ops_snapshot)
 175  176                  vn_freevnodeops(zfsctl_ops_snapshot);
 176  177          if (zfsctl_ops_shares)
 177  178                  vn_freevnodeops(zfsctl_ops_shares);
 178  179          if (zfsctl_ops_shares_dir)
 179  180                  vn_freevnodeops(zfsctl_ops_shares_dir);
 180  181  
 181  182          zfsctl_ops_root = NULL;
 182  183          zfsctl_ops_snapdir = NULL;
 183  184          zfsctl_ops_snapshot = NULL;
 184  185          zfsctl_ops_shares = NULL;
 185  186          zfsctl_ops_shares_dir = NULL;
 186  187  }
 187  188  
 188  189  boolean_t
 189  190  zfsctl_is_node(vnode_t *vp)
 190  191  {
 191  192          return (vn_matchops(vp, zfsctl_ops_root) ||
 192  193              vn_matchops(vp, zfsctl_ops_snapdir) ||
 193  194              vn_matchops(vp, zfsctl_ops_snapshot) ||
 194  195              vn_matchops(vp, zfsctl_ops_shares) ||
 195  196              vn_matchops(vp, zfsctl_ops_shares_dir));
 196  197  
 197  198  }
 198  199  
 199  200  /*
 200  201   * Return the inode number associated with the 'snapshot' or
 201  202   * 'shares' directory.
 202  203   */
 203  204  /* ARGSUSED */
 204  205  static ino64_t
 205  206  zfsctl_root_inode_cb(vnode_t *vp, int index)
 206  207  {
 207  208          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 208  209  
 209  210          ASSERT(index <= 2);
 210  211  
 211  212          if (index == 0)
 212  213                  return (ZFSCTL_INO_SNAPDIR);
 213  214  
 214  215          return (zfsvfs->z_shares_dir);
 215  216  }
 216  217  
 217  218  /*
 218  219   * Create the '.zfs' directory.  This directory is cached as part of the VFS
 219  220   * structure.  This results in a hold on the vfs_t.  The code in zfs_umount()
 220  221   * therefore checks against a vfs_count of 2 instead of 1.  This reference
 221  222   * is removed when the ctldir is destroyed in the unmount.
 222  223   */
 223  224  void
 224  225  zfsctl_create(zfsvfs_t *zfsvfs)
 225  226  {
 226  227          vnode_t *vp, *rvp;
 227  228          zfsctl_node_t *zcp;
 228  229          uint64_t crtime[2];
 229  230  
 230  231          ASSERT(zfsvfs->z_ctldir == NULL);
 231  232  
 232  233          vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs,
 233  234              zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries,
 234  235              zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL);
 235  236          zcp = vp->v_data;
 236  237          zcp->zc_id = ZFSCTL_INO_ROOT;
 237  238  
 238  239          VERIFY(VFS_ROOT(zfsvfs->z_vfs, &rvp) == 0);
 239  240          VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
 240  241              &crtime, sizeof (crtime)));
 241  242          ZFS_TIME_DECODE(&zcp->zc_cmtime, crtime);
 242  243          VN_RELE(rvp);
 243  244  
 244  245          /*
 245  246           * We're only faking the fact that we have a root of a filesystem for
 246  247           * the sake of the GFS interfaces.  Undo the flag manipulation it did
 247  248           * for us.
 248  249           */
 249  250          vp->v_flag &= ~(VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT);
 250  251  
 251  252          zfsvfs->z_ctldir = vp;
 252  253  }
 253  254  
 254  255  /*
 255  256   * Destroy the '.zfs' directory.  Only called when the filesystem is unmounted.
 256  257   * There might still be more references if we were force unmounted, but only
 257  258   * new zfs_inactive() calls can occur and they don't reference .zfs
 258  259   */
 259  260  void
 260  261  zfsctl_destroy(zfsvfs_t *zfsvfs)
 261  262  {
 262  263          VN_RELE(zfsvfs->z_ctldir);
 263  264          zfsvfs->z_ctldir = NULL;
 264  265  }
 265  266  
 266  267  /*
 267  268   * Given a root znode, retrieve the associated .zfs directory.
 268  269   * Add a hold to the vnode and return it.
 269  270   */
 270  271  vnode_t *
 271  272  zfsctl_root(znode_t *zp)
 272  273  {
 273  274          ASSERT(zfs_has_ctldir(zp));
 274  275          VN_HOLD(zp->z_zfsvfs->z_ctldir);
 275  276          return (zp->z_zfsvfs->z_ctldir);
 276  277  }
 277  278  
 278  279  /*
 279  280   * Common open routine.  Disallow any write access.
 280  281   */
 281  282  /* ARGSUSED */
 282  283  static int
 283  284  zfsctl_common_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
 284  285  {
 285  286          if (flags & FWRITE)
 286  287                  return (EACCES);
 287  288  
 288  289          return (0);
 289  290  }
 290  291  
 291  292  /*
 292  293   * Common close routine.  Nothing to do here.
 293  294   */
 294  295  /* ARGSUSED */
 295  296  static int
 296  297  zfsctl_common_close(vnode_t *vpp, int flags, int count, offset_t off,
 297  298      cred_t *cr, caller_context_t *ct)
 298  299  {
 299  300          return (0);
 300  301  }
 301  302  
 302  303  /*
 303  304   * Common access routine.  Disallow writes.
 304  305   */
 305  306  /* ARGSUSED */
 306  307  static int
 307  308  zfsctl_common_access(vnode_t *vp, int mode, int flags, cred_t *cr,
 308  309      caller_context_t *ct)
 309  310  {
 310  311          if (flags & V_ACE_MASK) {
 311  312                  if (mode & ACE_ALL_WRITE_PERMS)
 312  313                          return (EACCES);
 313  314          } else {
 314  315                  if (mode & VWRITE)
 315  316                          return (EACCES);
 316  317          }
 317  318  
 318  319          return (0);
 319  320  }
 320  321  
 321  322  /*
 322  323   * Common getattr function.  Fill in basic information.
 323  324   */
 324  325  static void
 325  326  zfsctl_common_getattr(vnode_t *vp, vattr_t *vap)
 326  327  {
 327  328          timestruc_t     now;
 328  329  
 329  330          vap->va_uid = 0;
 330  331          vap->va_gid = 0;
 331  332          vap->va_rdev = 0;
 332  333          /*
 333  334           * We are a purely virtual object, so we have no
 334  335           * blocksize or allocated blocks.
 335  336           */
 336  337          vap->va_blksize = 0;
 337  338          vap->va_nblocks = 0;
 338  339          vap->va_seq = 0;
 339  340          vap->va_fsid = vp->v_vfsp->vfs_dev;
 340  341          vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP |
 341  342              S_IROTH | S_IXOTH;
 342  343          vap->va_type = VDIR;
 343  344          /*
 344  345           * We live in the now (for atime).
 345  346           */
 346  347          gethrestime(&now);
 347  348          vap->va_atime = now;
 348  349  }
 349  350  
 350  351  /*ARGSUSED*/
 351  352  static int
 352  353  zfsctl_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
 353  354  {
 354  355          zfsvfs_t        *zfsvfs = vp->v_vfsp->vfs_data;
 355  356          zfsctl_node_t   *zcp = vp->v_data;
 356  357          uint64_t        object = zcp->zc_id;
 357  358          zfid_short_t    *zfid;
 358  359          int             i;
 359  360  
 360  361          ZFS_ENTER(zfsvfs);
 361  362  
 362  363          if (fidp->fid_len < SHORT_FID_LEN) {
 363  364                  fidp->fid_len = SHORT_FID_LEN;
 364  365                  ZFS_EXIT(zfsvfs);
 365  366                  return (ENOSPC);
 366  367          }
 367  368  
 368  369          zfid = (zfid_short_t *)fidp;
 369  370  
 370  371          zfid->zf_len = SHORT_FID_LEN;
 371  372  
 372  373          for (i = 0; i < sizeof (zfid->zf_object); i++)
 373  374                  zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
 374  375  
 375  376          /* .zfs znodes always have a generation number of 0 */
 376  377          for (i = 0; i < sizeof (zfid->zf_gen); i++)
 377  378                  zfid->zf_gen[i] = 0;
 378  379  
 379  380          ZFS_EXIT(zfsvfs);
 380  381          return (0);
 381  382  }
 382  383  
 383  384  
 384  385  /*ARGSUSED*/
 385  386  static int
 386  387  zfsctl_shares_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
 387  388  {
 388  389          zfsvfs_t        *zfsvfs = vp->v_vfsp->vfs_data;
 389  390          znode_t         *dzp;
 390  391          int             error;
 391  392  
 392  393          ZFS_ENTER(zfsvfs);
 393  394  
 394  395          if (zfsvfs->z_shares_dir == 0) {
 395  396                  ZFS_EXIT(zfsvfs);
 396  397                  return (ENOTSUP);
 397  398          }
 398  399  
 399  400          if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
 400  401                  error = VOP_FID(ZTOV(dzp), fidp, ct);
 401  402                  VN_RELE(ZTOV(dzp));
 402  403          }
 403  404  
 404  405          ZFS_EXIT(zfsvfs);
 405  406          return (error);
 406  407  }
 407  408  /*
 408  409   * .zfs inode namespace
 409  410   *
 410  411   * We need to generate unique inode numbers for all files and directories
 411  412   * within the .zfs pseudo-filesystem.  We use the following scheme:
 412  413   *
 413  414   *      ENTRY                   ZFSCTL_INODE
 414  415   *      .zfs                    1
 415  416   *      .zfs/snapshot           2
 416  417   *      .zfs/snapshot/<snap>    objectid(snap)
 417  418   */
 418  419  
 419  420  #define ZFSCTL_INO_SNAP(id)     (id)
 420  421  
 421  422  /*
 422  423   * Get root directory attributes.
 423  424   */
 424  425  /* ARGSUSED */
 425  426  static int
 426  427  zfsctl_root_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
 427  428      caller_context_t *ct)
 428  429  {
 429  430          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 430  431          zfsctl_node_t *zcp = vp->v_data;
 431  432  
 432  433          ZFS_ENTER(zfsvfs);
 433  434          vap->va_nodeid = ZFSCTL_INO_ROOT;
 434  435          vap->va_nlink = vap->va_size = NROOT_ENTRIES;
 435  436          vap->va_mtime = vap->va_ctime = zcp->zc_cmtime;
 436  437  
 437  438          zfsctl_common_getattr(vp, vap);
 438  439          ZFS_EXIT(zfsvfs);
 439  440  
 440  441          return (0);
 441  442  }
 442  443  
 443  444  /*
 444  445   * Special case the handling of "..".
 445  446   */
 446  447  /* ARGSUSED */
 447  448  int
 448  449  zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
 449  450      int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
 450  451      int *direntflags, pathname_t *realpnp)
 451  452  {
 452  453          zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
 453  454          int err;
 454  455  
 455  456          /*
 456  457           * No extended attributes allowed under .zfs
 457  458           */
 458  459          if (flags & LOOKUP_XATTR)
 459  460                  return (EINVAL);
 460  461  
 461  462          ZFS_ENTER(zfsvfs);
 462  463  
 463  464          if (strcmp(nm, "..") == 0) {
 464  465                  err = VFS_ROOT(dvp->v_vfsp, vpp);
 465  466          } else {
 466  467                  err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir,
 467  468                      cr, ct, direntflags, realpnp);
 468  469          }
 469  470  
 470  471          ZFS_EXIT(zfsvfs);
 471  472  
 472  473          return (err);
 473  474  }
 474  475  
 475  476  static int
 476  477  zfsctl_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
 477  478      caller_context_t *ct)
 478  479  {
 479  480          /*
 480  481           * We only care about ACL_ENABLED so that libsec can
 481  482           * display ACL correctly and not default to POSIX draft.
 482  483           */
 483  484          if (cmd == _PC_ACL_ENABLED) {
 484  485                  *valp = _ACL_ACE_ENABLED;
 485  486                  return (0);
 486  487          }
 487  488  
 488  489          return (fs_pathconf(vp, cmd, valp, cr, ct));
 489  490  }
 490  491  
 491  492  static const fs_operation_def_t zfsctl_tops_root[] = {
 492  493          { VOPNAME_OPEN,         { .vop_open = zfsctl_common_open }      },
 493  494          { VOPNAME_CLOSE,        { .vop_close = zfsctl_common_close }    },
 494  495          { VOPNAME_IOCTL,        { .error = fs_inval }                   },
 495  496          { VOPNAME_GETATTR,      { .vop_getattr = zfsctl_root_getattr }  },
 496  497          { VOPNAME_ACCESS,       { .vop_access = zfsctl_common_access }  },
 497  498          { VOPNAME_READDIR,      { .vop_readdir = gfs_vop_readdir }      },
 498  499          { VOPNAME_LOOKUP,       { .vop_lookup = zfsctl_root_lookup }    },
 499  500          { VOPNAME_SEEK,         { .vop_seek = fs_seek }                 },
 500  501          { VOPNAME_INACTIVE,     { .vop_inactive = gfs_vop_inactive }    },
 501  502          { VOPNAME_PATHCONF,     { .vop_pathconf = zfsctl_pathconf }     },
 502  503          { VOPNAME_FID,          { .vop_fid = zfsctl_common_fid  }       },
 503  504          { NULL }
 504  505  };
 505  506  
 506  507  static int
 507  508  zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname)
 508  509  {
 509  510          objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os;
 510  511  
 511  512          if (snapshot_namecheck(name, NULL, NULL) != 0)
 512  513                  return (EILSEQ);
 513  514          dmu_objset_name(os, zname);
 514  515          if (strlen(zname) + 1 + strlen(name) >= len)
 515  516                  return (ENAMETOOLONG);
 516  517          (void) strcat(zname, "@");
 517  518          (void) strcat(zname, name);
 518  519          return (0);
 519  520  }
 520  521  
 521  522  static int
 522  523  zfsctl_unmount_snap(zfs_snapentry_t *sep, int fflags, cred_t *cr)
 523  524  {
 524  525          vnode_t *svp = sep->se_root;
 525  526          int error;
 526  527  
 527  528          ASSERT(vn_ismntpt(svp));
 528  529  
 529  530          /* this will be dropped by dounmount() */
 530  531          if ((error = vn_vfswlock(svp)) != 0)
 531  532                  return (error);
 532  533  
 533  534          VN_HOLD(svp);
 534  535          error = dounmount(vn_mountedvfs(svp), fflags, cr);
 535  536          if (error) {
 536  537                  VN_RELE(svp);
 537  538                  return (error);
 538  539          }
 539  540  
 540  541          /*
 541  542           * We can't use VN_RELE(), as that will try to invoke
 542  543           * zfsctl_snapdir_inactive(), which would cause us to destroy
 543  544           * the sd_lock mutex held by our caller.
 544  545           */
 545  546          ASSERT(svp->v_count == 1);
 546  547          gfs_vop_inactive(svp, cr, NULL);
 547  548  
 548  549          kmem_free(sep->se_name, strlen(sep->se_name) + 1);
 549  550          kmem_free(sep, sizeof (zfs_snapentry_t));
 550  551  
 551  552          return (0);
 552  553  }
 553  554  
 554  555  static void
 555  556  zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm)
 556  557  {
 557  558          avl_index_t where;
 558  559          vfs_t *vfsp;
 559  560          refstr_t *pathref;
 560  561          char newpath[MAXNAMELEN];
 561  562          char *tail;
 562  563  
 563  564          ASSERT(MUTEX_HELD(&sdp->sd_lock));
 564  565          ASSERT(sep != NULL);
 565  566  
 566  567          vfsp = vn_mountedvfs(sep->se_root);
 567  568          ASSERT(vfsp != NULL);
 568  569  
 569  570          vfs_lock_wait(vfsp);
 570  571  
 571  572          /*
 572  573           * Change the name in the AVL tree.
 573  574           */
 574  575          avl_remove(&sdp->sd_snaps, sep);
 575  576          kmem_free(sep->se_name, strlen(sep->se_name) + 1);
 576  577          sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
 577  578          (void) strcpy(sep->se_name, nm);
 578  579          VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL);
 579  580          avl_insert(&sdp->sd_snaps, sep, where);
 580  581  
 581  582          /*
 582  583           * Change the current mountpoint info:
 583  584           *      - update the tail of the mntpoint path
 584  585           *      - update the tail of the resource path
 585  586           */
 586  587          pathref = vfs_getmntpoint(vfsp);
 587  588          (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
 588  589          VERIFY((tail = strrchr(newpath, '/')) != NULL);
 589  590          *(tail+1) = '\0';
 590  591          ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
 591  592          (void) strcat(newpath, nm);
 592  593          refstr_rele(pathref);
 593  594          vfs_setmntpoint(vfsp, newpath, 0);
 594  595  
 595  596          pathref = vfs_getresource(vfsp);
 596  597          (void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
 597  598          VERIFY((tail = strrchr(newpath, '@')) != NULL);
 598  599          *(tail+1) = '\0';
 599  600          ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
 600  601          (void) strcat(newpath, nm);
 601  602          refstr_rele(pathref);
 602  603          vfs_setresource(vfsp, newpath, 0);
 603  604  
 604  605          vfs_unlock(vfsp);
 605  606  }
 606  607  
 607  608  /*ARGSUSED*/
 608  609  static int
 609  610  zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
 610  611      cred_t *cr, caller_context_t *ct, int flags)
 611  612  {
 612  613          zfsctl_snapdir_t *sdp = sdvp->v_data;
 613  614          zfs_snapentry_t search, *sep;
 614  615          zfsvfs_t *zfsvfs;
 615  616          avl_index_t where;
 616  617          char from[MAXNAMELEN], to[MAXNAMELEN];
 617  618          char real[MAXNAMELEN];
 618  619          int err;
 619  620  
 620  621          zfsvfs = sdvp->v_vfsp->vfs_data;
 621  622          ZFS_ENTER(zfsvfs);
 622  623  
 623  624          if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
 624  625                  err = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
 625  626                      MAXNAMELEN, NULL);
 626  627                  if (err == 0) {
 627  628                          snm = real;
 628  629                  } else if (err != ENOTSUP) {
 629  630                          ZFS_EXIT(zfsvfs);
 630  631                          return (err);
 631  632                  }
 632  633          }
 633  634  
 634  635          ZFS_EXIT(zfsvfs);
 635  636  
 636  637          err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from);
 637  638          if (!err)
 638  639                  err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to);
 639  640          if (!err)
 640  641                  err = zfs_secpolicy_rename_perms(from, to, cr);
 641  642          if (err)
 642  643                  return (err);
 643  644  
 644  645          /*
 645  646           * Cannot move snapshots out of the snapdir.
 646  647           */
 647  648          if (sdvp != tdvp)
 648  649                  return (EINVAL);
 649  650  
 650  651          if (strcmp(snm, tnm) == 0)
 651  652                  return (0);
 652  653  
 653  654          mutex_enter(&sdp->sd_lock);
 654  655  
 655  656          search.se_name = (char *)snm;
 656  657          if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) {
 657  658                  mutex_exit(&sdp->sd_lock);
 658  659                  return (ENOENT);
 659  660          }
 660  661  
 661  662          err = dmu_objset_rename(from, to, B_FALSE);
 662  663          if (err == 0)
 663  664                  zfsctl_rename_snap(sdp, sep, tnm);
 664  665  
 665  666          mutex_exit(&sdp->sd_lock);
 666  667  
 667  668          return (err);
 668  669  }
 669  670  
 670  671  /* ARGSUSED */
 671  672  static int
 672  673  zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr,
 673  674      caller_context_t *ct, int flags)
 674  675  {
 675  676          zfsctl_snapdir_t *sdp = dvp->v_data;
 676  677          zfs_snapentry_t *sep;
 677  678          zfs_snapentry_t search;
 678  679          zfsvfs_t *zfsvfs;
 679  680          char snapname[MAXNAMELEN];
 680  681          char real[MAXNAMELEN];
 681  682          int err;
 682  683  
 683  684          zfsvfs = dvp->v_vfsp->vfs_data;
 684  685          ZFS_ENTER(zfsvfs);
 685  686  
 686  687          if ((flags & FIGNORECASE) || zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
 687  688  
 688  689                  err = dmu_snapshot_realname(zfsvfs->z_os, name, real,
 689  690                      MAXNAMELEN, NULL);
 690  691                  if (err == 0) {
 691  692                          name = real;
 692  693                  } else if (err != ENOTSUP) {
 693  694                          ZFS_EXIT(zfsvfs);
 694  695                          return (err);
 695  696                  }
 696  697          }
 697  698  
 698  699          ZFS_EXIT(zfsvfs);
 699  700  
 700  701          err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname);
 701  702          if (!err)
 702  703                  err = zfs_secpolicy_destroy_perms(snapname, cr);
 703  704          if (err)
 704  705                  return (err);
 705  706  
 706  707          mutex_enter(&sdp->sd_lock);
 707  708  
 708  709          search.se_name = name;
 709  710          sep = avl_find(&sdp->sd_snaps, &search, NULL);
 710  711          if (sep) {
 711  712                  avl_remove(&sdp->sd_snaps, sep);
 712  713                  err = zfsctl_unmount_snap(sep, MS_FORCE, cr);
 713  714                  if (err)
 714  715                          avl_add(&sdp->sd_snaps, sep);
 715  716                  else
 716  717                          err = dmu_objset_destroy(snapname, B_FALSE);
 717  718          } else {
 718  719                  err = ENOENT;
 719  720          }
 720  721  
 721  722          mutex_exit(&sdp->sd_lock);
 722  723  
 723  724          return (err);
 724  725  }
 725  726  
 726  727  /*
 727  728   * This creates a snapshot under '.zfs/snapshot'.
 728  729   */
 729  730  /* ARGSUSED */
 730  731  static int
 731  732  zfsctl_snapdir_mkdir(vnode_t *dvp, char *dirname, vattr_t *vap, vnode_t  **vpp,
 732  733      cred_t *cr, caller_context_t *cc, int flags, vsecattr_t *vsecp)
 733  734  {
 734  735          zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
 735  736          char name[MAXNAMELEN];
 736  737          int err;
 737  738          static enum symfollow follow = NO_FOLLOW;
 738  739          static enum uio_seg seg = UIO_SYSSPACE;
 739  740  
 740  741          if (snapshot_namecheck(dirname, NULL, NULL) != 0)
 741  742                  return (EILSEQ);

↓ open down ↓

709 lines elided

↑ open up ↑

 742  743  
 743  744          dmu_objset_name(zfsvfs->z_os, name);
 744  745  
 745  746          *vpp = NULL;
 746  747  
 747  748          err = zfs_secpolicy_snapshot_perms(name, cr);
 748  749          if (err)
 749  750                  return (err);
 750  751  
 751  752          if (err == 0) {
 752      -                err = dmu_objset_snapshot(name, dirname, NULL, NULL,
 753      -                    B_FALSE, B_FALSE, -1);
      753 +                err = dmu_objset_snapshot_one(name, dirname);
 754  754                  if (err)
 755  755                          return (err);
 756  756                  err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp);
 757  757          }
 758  758  
 759  759          return (err);
 760  760  }
 761  761  
 762  762  /*
 763  763   * Lookup entry point for the 'snapshot' directory.  Try to open the

 764  764   * snapshot if it exist, creating the pseudo filesystem vnode as necessary.
 765  765   * Perform a mount of the associated dataset on top of the vnode.
 766  766   */
 767  767  /* ARGSUSED */
 768  768  static int
 769  769  zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
 770  770      int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
 771  771      int *direntflags, pathname_t *realpnp)
 772  772  {
 773  773          zfsctl_snapdir_t *sdp = dvp->v_data;
 774  774          objset_t *snap;
 775  775          char snapname[MAXNAMELEN];
 776  776          char real[MAXNAMELEN];
 777  777          char *mountpoint;
 778  778          zfs_snapentry_t *sep, search;
 779  779          struct mounta margs;
 780  780          vfs_t *vfsp;
 781  781          size_t mountpoint_len;
 782  782          avl_index_t where;
 783  783          zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
 784  784          int err;
 785  785  
 786  786          /*
 787  787           * No extended attributes allowed under .zfs
 788  788           */
 789  789          if (flags & LOOKUP_XATTR)
 790  790                  return (EINVAL);
 791  791  
 792  792          ASSERT(dvp->v_type == VDIR);
 793  793  
 794  794          /*
 795  795           * If we get a recursive call, that means we got called
 796  796           * from the domount() code while it was trying to look up the
 797  797           * spec (which looks like a local path for zfs).  We need to
 798  798           * add some flag to domount() to tell it not to do this lookup.
 799  799           */
 800  800          if (MUTEX_HELD(&sdp->sd_lock))
 801  801                  return (ENOENT);
 802  802  
 803  803          ZFS_ENTER(zfsvfs);
 804  804  
 805  805          if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
 806  806                  ZFS_EXIT(zfsvfs);
 807  807                  return (0);
 808  808          }
 809  809  
 810  810          if (flags & FIGNORECASE) {
 811  811                  boolean_t conflict = B_FALSE;
 812  812  
 813  813                  err = dmu_snapshot_realname(zfsvfs->z_os, nm, real,
 814  814                      MAXNAMELEN, &conflict);
 815  815                  if (err == 0) {
 816  816                          nm = real;
 817  817                  } else if (err != ENOTSUP) {
 818  818                          ZFS_EXIT(zfsvfs);
 819  819                          return (err);
 820  820                  }
 821  821                  if (realpnp)
 822  822                          (void) strlcpy(realpnp->pn_buf, nm,
 823  823                              realpnp->pn_bufsize);
 824  824                  if (conflict && direntflags)
 825  825                          *direntflags = ED_CASE_CONFLICT;
 826  826          }
 827  827  
 828  828          mutex_enter(&sdp->sd_lock);
 829  829          search.se_name = (char *)nm;
 830  830          if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
 831  831                  *vpp = sep->se_root;
 832  832                  VN_HOLD(*vpp);
 833  833                  err = traverse(vpp);
 834  834                  if (err) {
 835  835                          VN_RELE(*vpp);
 836  836                          *vpp = NULL;
 837  837                  } else if (*vpp == sep->se_root) {
 838  838                          /*
 839  839                           * The snapshot was unmounted behind our backs,
 840  840                           * try to remount it.
 841  841                           */
 842  842                          goto domount;
 843  843                  } else {
 844  844                          /*
 845  845                           * VROOT was set during the traverse call.  We need
 846  846                           * to clear it since we're pretending to be part
 847  847                           * of our parent's vfs.
 848  848                           */
 849  849                          (*vpp)->v_flag &= ~VROOT;
 850  850                  }
 851  851                  mutex_exit(&sdp->sd_lock);
 852  852                  ZFS_EXIT(zfsvfs);
 853  853                  return (err);
 854  854          }
 855  855  
 856  856          /*
 857  857           * The requested snapshot is not currently mounted, look it up.
 858  858           */
 859  859          err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname);
 860  860          if (err) {
 861  861                  mutex_exit(&sdp->sd_lock);
 862  862                  ZFS_EXIT(zfsvfs);
 863  863                  /*
 864  864                   * handle "ls *" or "?" in a graceful manner,
 865  865                   * forcing EILSEQ to ENOENT.
 866  866                   * Since shell ultimately passes "*" or "?" as name to lookup
 867  867                   */
 868  868                  return (err == EILSEQ ? ENOENT : err);
 869  869          }
 870  870          if (dmu_objset_hold(snapname, FTAG, &snap) != 0) {
 871  871                  mutex_exit(&sdp->sd_lock);
 872  872                  ZFS_EXIT(zfsvfs);
 873  873                  return (ENOENT);
 874  874          }
 875  875  
 876  876          sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP);
 877  877          sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
 878  878          (void) strcpy(sep->se_name, nm);
 879  879          *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
 880  880          avl_insert(&sdp->sd_snaps, sep, where);
 881  881  
 882  882          dmu_objset_rele(snap, FTAG);
 883  883  domount:
 884  884          mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) +
 885  885              strlen("/.zfs/snapshot/") + strlen(nm) + 1;
 886  886          mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
 887  887          (void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s",
 888  888              refstr_value(dvp->v_vfsp->vfs_mntpt), nm);
 889  889  
 890  890          margs.spec = snapname;
 891  891          margs.dir = mountpoint;
 892  892          margs.flags = MS_SYSSPACE | MS_NOMNTTAB;
 893  893          margs.fstype = "zfs";
 894  894          margs.dataptr = NULL;
 895  895          margs.datalen = 0;
 896  896          margs.optptr = NULL;
 897  897          margs.optlen = 0;
 898  898  
 899  899          err = domount("zfs", &margs, *vpp, kcred, &vfsp);
 900  900          kmem_free(mountpoint, mountpoint_len);
 901  901  
 902  902          if (err == 0) {
 903  903                  /*
 904  904                   * Return the mounted root rather than the covered mount point.
 905  905                   * Takes the GFS vnode at .zfs/snapshot/<snapname> and returns
 906  906                   * the ZFS vnode mounted on top of the GFS node.  This ZFS
 907  907                   * vnode is the root of the newly created vfsp.
 908  908                   */
 909  909                  VFS_RELE(vfsp);
 910  910                  err = traverse(vpp);
 911  911          }
 912  912  
 913  913          if (err == 0) {
 914  914                  /*
 915  915                   * Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
 916  916                   *
 917  917                   * This is where we lie about our v_vfsp in order to
 918  918                   * make .zfs/snapshot/<snapname> accessible over NFS
 919  919                   * without requiring manual mounts of <snapname>.
 920  920                   */
 921  921                  ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
 922  922                  VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
 923  923                  (*vpp)->v_vfsp = zfsvfs->z_vfs;
 924  924                  (*vpp)->v_flag &= ~VROOT;
 925  925          }
 926  926          mutex_exit(&sdp->sd_lock);
 927  927          ZFS_EXIT(zfsvfs);
 928  928  
 929  929          /*
 930  930           * If we had an error, drop our hold on the vnode and
 931  931           * zfsctl_snapshot_inactive() will clean up.
 932  932           */
 933  933          if (err) {
 934  934                  VN_RELE(*vpp);
 935  935                  *vpp = NULL;
 936  936          }
 937  937          return (err);
 938  938  }
 939  939  
 940  940  /* ARGSUSED */
 941  941  static int
 942  942  zfsctl_shares_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp,
 943  943      int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
 944  944      int *direntflags, pathname_t *realpnp)
 945  945  {
 946  946          zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
 947  947          znode_t *dzp;
 948  948          int error;
 949  949  
 950  950          ZFS_ENTER(zfsvfs);
 951  951  
 952  952          if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
 953  953                  ZFS_EXIT(zfsvfs);
 954  954                  return (0);
 955  955          }
 956  956  
 957  957          if (zfsvfs->z_shares_dir == 0) {
 958  958                  ZFS_EXIT(zfsvfs);
 959  959                  return (ENOTSUP);
 960  960          }
 961  961          if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0)
 962  962                  error = VOP_LOOKUP(ZTOV(dzp), nm, vpp, pnp,
 963  963                      flags, rdir, cr, ct, direntflags, realpnp);
 964  964  
 965  965          VN_RELE(ZTOV(dzp));
 966  966          ZFS_EXIT(zfsvfs);
 967  967  
 968  968          return (error);
 969  969  }
 970  970  
 971  971  /* ARGSUSED */
 972  972  static int
 973  973  zfsctl_snapdir_readdir_cb(vnode_t *vp, void *dp, int *eofp,
 974  974      offset_t *offp, offset_t *nextp, void *data, int flags)
 975  975  {
 976  976          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
 977  977          char snapname[MAXNAMELEN];
 978  978          uint64_t id, cookie;
 979  979          boolean_t case_conflict;
 980  980          int error;
 981  981  
 982  982          ZFS_ENTER(zfsvfs);
 983  983  
 984  984          cookie = *offp;
 985  985          error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id,
 986  986              &cookie, &case_conflict);
 987  987          if (error) {
 988  988                  ZFS_EXIT(zfsvfs);
 989  989                  if (error == ENOENT) {
 990  990                          *eofp = 1;
 991  991                          return (0);
 992  992                  }
 993  993                  return (error);
 994  994          }
 995  995  
 996  996          if (flags & V_RDDIR_ENTFLAGS) {
 997  997                  edirent_t *eodp = dp;
 998  998  
 999  999                  (void) strcpy(eodp->ed_name, snapname);
1000 1000                  eodp->ed_ino = ZFSCTL_INO_SNAP(id);
1001 1001                  eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0;
1002 1002          } else {
1003 1003                  struct dirent64 *odp = dp;
1004 1004  
1005 1005                  (void) strcpy(odp->d_name, snapname);
1006 1006                  odp->d_ino = ZFSCTL_INO_SNAP(id);
1007 1007          }
1008 1008          *nextp = cookie;
1009 1009  
1010 1010          ZFS_EXIT(zfsvfs);
1011 1011  
1012 1012          return (0);
1013 1013  }
1014 1014  
1015 1015  /* ARGSUSED */
1016 1016  static int
1017 1017  zfsctl_shares_readdir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp,
1018 1018      caller_context_t *ct, int flags)
1019 1019  {
1020 1020          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1021 1021          znode_t *dzp;
1022 1022          int error;
1023 1023  
1024 1024          ZFS_ENTER(zfsvfs);
1025 1025  
1026 1026          if (zfsvfs->z_shares_dir == 0) {
1027 1027                  ZFS_EXIT(zfsvfs);
1028 1028                  return (ENOTSUP);
1029 1029          }
1030 1030          if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
1031 1031                  error = VOP_READDIR(ZTOV(dzp), uiop, cr, eofp, ct, flags);
1032 1032                  VN_RELE(ZTOV(dzp));
1033 1033          } else {
1034 1034                  *eofp = 1;
1035 1035                  error = ENOENT;
1036 1036          }
1037 1037  
1038 1038          ZFS_EXIT(zfsvfs);
1039 1039          return (error);
1040 1040  }
1041 1041  
1042 1042  /*
1043 1043   * pvp is the '.zfs' directory (zfsctl_node_t).
1044 1044   * Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t).
1045 1045   *
1046 1046   * This function is the callback to create a GFS vnode for '.zfs/snapshot'
1047 1047   * when a lookup is performed on .zfs for "snapshot".
1048 1048   */
1049 1049  vnode_t *
1050 1050  zfsctl_mknode_snapdir(vnode_t *pvp)
1051 1051  {
1052 1052          vnode_t *vp;
1053 1053          zfsctl_snapdir_t *sdp;
1054 1054  
1055 1055          vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp,
1056 1056              zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN,
1057 1057              zfsctl_snapdir_readdir_cb, NULL);
1058 1058          sdp = vp->v_data;
1059 1059          sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR;
1060 1060          sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
1061 1061          mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL);
1062 1062          avl_create(&sdp->sd_snaps, snapentry_compare,
1063 1063              sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
1064 1064          return (vp);
1065 1065  }
1066 1066  
1067 1067  vnode_t *
1068 1068  zfsctl_mknode_shares(vnode_t *pvp)
1069 1069  {
1070 1070          vnode_t *vp;
1071 1071          zfsctl_node_t *sdp;
1072 1072  
1073 1073          vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp,
1074 1074              zfsctl_ops_shares, NULL, NULL, MAXNAMELEN,
1075 1075              NULL, NULL);
1076 1076          sdp = vp->v_data;
1077 1077          sdp->zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
1078 1078          return (vp);
1079 1079  
1080 1080  }
1081 1081  
1082 1082  /* ARGSUSED */
1083 1083  static int
1084 1084  zfsctl_shares_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1085 1085      caller_context_t *ct)
1086 1086  {
1087 1087          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1088 1088          znode_t *dzp;
1089 1089          int error;
1090 1090  
1091 1091          ZFS_ENTER(zfsvfs);
1092 1092          if (zfsvfs->z_shares_dir == 0) {
1093 1093                  ZFS_EXIT(zfsvfs);
1094 1094                  return (ENOTSUP);
1095 1095          }
1096 1096          if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
1097 1097                  error = VOP_GETATTR(ZTOV(dzp), vap, flags, cr, ct);
1098 1098                  VN_RELE(ZTOV(dzp));
1099 1099          }
1100 1100          ZFS_EXIT(zfsvfs);
1101 1101          return (error);
1102 1102  
1103 1103  
1104 1104  }
1105 1105  
1106 1106  /* ARGSUSED */
1107 1107  static int
1108 1108  zfsctl_snapdir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1109 1109      caller_context_t *ct)
1110 1110  {
1111 1111          zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
1112 1112          zfsctl_snapdir_t *sdp = vp->v_data;
1113 1113  
1114 1114          ZFS_ENTER(zfsvfs);
1115 1115          zfsctl_common_getattr(vp, vap);
1116 1116          vap->va_nodeid = gfs_file_inode(vp);
1117 1117          vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2;
1118 1118          vap->va_ctime = vap->va_mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
1119 1119          ZFS_EXIT(zfsvfs);
1120 1120  
1121 1121          return (0);
1122 1122  }
1123 1123  
1124 1124  /* ARGSUSED */
1125 1125  static void
1126 1126  zfsctl_snapdir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1127 1127  {
1128 1128          zfsctl_snapdir_t *sdp = vp->v_data;
1129 1129          void *private;
1130 1130  
1131 1131          private = gfs_dir_inactive(vp);
1132 1132          if (private != NULL) {
1133 1133                  ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
1134 1134                  mutex_destroy(&sdp->sd_lock);
1135 1135                  avl_destroy(&sdp->sd_snaps);
1136 1136                  kmem_free(private, sizeof (zfsctl_snapdir_t));
1137 1137          }
1138 1138  }
1139 1139  
1140 1140  static const fs_operation_def_t zfsctl_tops_snapdir[] = {
1141 1141          { VOPNAME_OPEN,         { .vop_open = zfsctl_common_open }      },
1142 1142          { VOPNAME_CLOSE,        { .vop_close = zfsctl_common_close }    },
1143 1143          { VOPNAME_IOCTL,        { .error = fs_inval }                   },
1144 1144          { VOPNAME_GETATTR,      { .vop_getattr = zfsctl_snapdir_getattr } },
1145 1145          { VOPNAME_ACCESS,       { .vop_access = zfsctl_common_access }  },
1146 1146          { VOPNAME_RENAME,       { .vop_rename = zfsctl_snapdir_rename } },
1147 1147          { VOPNAME_RMDIR,        { .vop_rmdir = zfsctl_snapdir_remove }  },
1148 1148          { VOPNAME_MKDIR,        { .vop_mkdir = zfsctl_snapdir_mkdir }   },
1149 1149          { VOPNAME_READDIR,      { .vop_readdir = gfs_vop_readdir }      },
1150 1150          { VOPNAME_LOOKUP,       { .vop_lookup = zfsctl_snapdir_lookup } },
1151 1151          { VOPNAME_SEEK,         { .vop_seek = fs_seek }                 },
1152 1152          { VOPNAME_INACTIVE,     { .vop_inactive = zfsctl_snapdir_inactive } },
1153 1153          { VOPNAME_FID,          { .vop_fid = zfsctl_common_fid }        },
1154 1154          { NULL }
1155 1155  };
1156 1156  
1157 1157  static const fs_operation_def_t zfsctl_tops_shares[] = {
1158 1158          { VOPNAME_OPEN,         { .vop_open = zfsctl_common_open }      },
1159 1159          { VOPNAME_CLOSE,        { .vop_close = zfsctl_common_close }    },
1160 1160          { VOPNAME_IOCTL,        { .error = fs_inval }                   },
1161 1161          { VOPNAME_GETATTR,      { .vop_getattr = zfsctl_shares_getattr } },
1162 1162          { VOPNAME_ACCESS,       { .vop_access = zfsctl_common_access }  },
1163 1163          { VOPNAME_READDIR,      { .vop_readdir = zfsctl_shares_readdir } },
1164 1164          { VOPNAME_LOOKUP,       { .vop_lookup = zfsctl_shares_lookup }  },
1165 1165          { VOPNAME_SEEK,         { .vop_seek = fs_seek }                 },
1166 1166          { VOPNAME_INACTIVE,     { .vop_inactive = gfs_vop_inactive } },
1167 1167          { VOPNAME_FID,          { .vop_fid = zfsctl_shares_fid } },
1168 1168          { NULL }
1169 1169  };
1170 1170  
1171 1171  /*
1172 1172   * pvp is the GFS vnode '.zfs/snapshot'.
1173 1173   *
1174 1174   * This creates a GFS node under '.zfs/snapshot' representing each
1175 1175   * snapshot.  This newly created GFS node is what we mount snapshot
1176 1176   * vfs_t's ontop of.
1177 1177   */
1178 1178  static vnode_t *
1179 1179  zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
1180 1180  {
1181 1181          vnode_t *vp;
1182 1182          zfsctl_node_t *zcp;
1183 1183  
1184 1184          vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp,
1185 1185              zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
1186 1186          zcp = vp->v_data;
1187 1187          zcp->zc_id = objset;
1188 1188  
1189 1189          return (vp);
1190 1190  }
1191 1191  
1192 1192  static void
1193 1193  zfsctl_snapshot_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1194 1194  {
1195 1195          zfsctl_snapdir_t *sdp;
1196 1196          zfs_snapentry_t *sep, *next;
1197 1197          vnode_t *dvp;
1198 1198  
1199 1199          VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0);
1200 1200          sdp = dvp->v_data;
1201 1201  
1202 1202          mutex_enter(&sdp->sd_lock);
1203 1203  
1204 1204          if (vp->v_count > 1) {
1205 1205                  mutex_exit(&sdp->sd_lock);
1206 1206                  return;
1207 1207          }
1208 1208          ASSERT(!vn_ismntpt(vp));
1209 1209  
1210 1210          sep = avl_first(&sdp->sd_snaps);
1211 1211          while (sep != NULL) {
1212 1212                  next = AVL_NEXT(&sdp->sd_snaps, sep);
1213 1213  
1214 1214                  if (sep->se_root == vp) {
1215 1215                          avl_remove(&sdp->sd_snaps, sep);
1216 1216                          kmem_free(sep->se_name, strlen(sep->se_name) + 1);
1217 1217                          kmem_free(sep, sizeof (zfs_snapentry_t));
1218 1218                          break;
1219 1219                  }
1220 1220                  sep = next;
1221 1221          }
1222 1222          ASSERT(sep != NULL);
1223 1223  
1224 1224          mutex_exit(&sdp->sd_lock);
1225 1225          VN_RELE(dvp);
1226 1226  
1227 1227          /*
1228 1228           * Dispose of the vnode for the snapshot mount point.
1229 1229           * This is safe to do because once this entry has been removed
1230 1230           * from the AVL tree, it can't be found again, so cannot become
1231 1231           * "active".  If we lookup the same name again we will end up
1232 1232           * creating a new vnode.
1233 1233           */
1234 1234          gfs_vop_inactive(vp, cr, ct);
1235 1235  }
1236 1236  
1237 1237  
1238 1238  /*
1239 1239   * These VP's should never see the light of day.  They should always
1240 1240   * be covered.
1241 1241   */
1242 1242  static const fs_operation_def_t zfsctl_tops_snapshot[] = {
1243 1243          VOPNAME_INACTIVE, { .vop_inactive =  zfsctl_snapshot_inactive },
1244 1244          NULL, NULL
1245 1245  };
1246 1246  
1247 1247  int
1248 1248  zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp)
1249 1249  {
1250 1250          zfsvfs_t *zfsvfs = vfsp->vfs_data;
1251 1251          vnode_t *dvp, *vp;
1252 1252          zfsctl_snapdir_t *sdp;
1253 1253          zfsctl_node_t *zcp;
1254 1254          zfs_snapentry_t *sep;
1255 1255          int error;
1256 1256  
1257 1257          ASSERT(zfsvfs->z_ctldir != NULL);
1258 1258          error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
1259 1259              NULL, 0, NULL, kcred, NULL, NULL, NULL);
1260 1260          if (error != 0)
1261 1261                  return (error);
1262 1262          sdp = dvp->v_data;
1263 1263  
1264 1264          mutex_enter(&sdp->sd_lock);
1265 1265          sep = avl_first(&sdp->sd_snaps);
1266 1266          while (sep != NULL) {
1267 1267                  vp = sep->se_root;
1268 1268                  zcp = vp->v_data;
1269 1269                  if (zcp->zc_id == objsetid)
1270 1270                          break;
1271 1271  
1272 1272                  sep = AVL_NEXT(&sdp->sd_snaps, sep);
1273 1273          }
1274 1274  
1275 1275          if (sep != NULL) {
1276 1276                  VN_HOLD(vp);
1277 1277                  /*
1278 1278                   * Return the mounted root rather than the covered mount point.
1279 1279                   * Takes the GFS vnode at .zfs/snapshot/<snapshot objsetid>
1280 1280                   * and returns the ZFS vnode mounted on top of the GFS node.
1281 1281                   * This ZFS vnode is the root of the vfs for objset 'objsetid'.
1282 1282                   */
1283 1283                  error = traverse(&vp);
1284 1284                  if (error == 0) {
1285 1285                          if (vp == sep->se_root)
1286 1286                                  error = EINVAL;
1287 1287                          else
1288 1288                                  *zfsvfsp = VTOZ(vp)->z_zfsvfs;
1289 1289                  }
1290 1290                  mutex_exit(&sdp->sd_lock);
1291 1291                  VN_RELE(vp);
1292 1292          } else {
1293 1293                  error = EINVAL;
1294 1294                  mutex_exit(&sdp->sd_lock);
1295 1295          }
1296 1296  
1297 1297          VN_RELE(dvp);
1298 1298  
1299 1299          return (error);
1300 1300  }
1301 1301  
1302 1302  /*
1303 1303   * Unmount any snapshots for the given filesystem.  This is called from
1304 1304   * zfs_umount() - if we have a ctldir, then go through and unmount all the
1305 1305   * snapshots.
1306 1306   */
1307 1307  int
1308 1308  zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr)
1309 1309  {
1310 1310          zfsvfs_t *zfsvfs = vfsp->vfs_data;
1311 1311          vnode_t *dvp;
1312 1312          zfsctl_snapdir_t *sdp;
1313 1313          zfs_snapentry_t *sep, *next;
1314 1314          int error;
1315 1315  
1316 1316          ASSERT(zfsvfs->z_ctldir != NULL);
1317 1317          error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
1318 1318              NULL, 0, NULL, cr, NULL, NULL, NULL);
1319 1319          if (error != 0)
1320 1320                  return (error);
1321 1321          sdp = dvp->v_data;
1322 1322  
1323 1323          mutex_enter(&sdp->sd_lock);
1324 1324  
1325 1325          sep = avl_first(&sdp->sd_snaps);
1326 1326          while (sep != NULL) {
1327 1327                  next = AVL_NEXT(&sdp->sd_snaps, sep);
1328 1328  
1329 1329                  /*
1330 1330                   * If this snapshot is not mounted, then it must
1331 1331                   * have just been unmounted by somebody else, and
1332 1332                   * will be cleaned up by zfsctl_snapdir_inactive().
1333 1333                   */
1334 1334                  if (vn_ismntpt(sep->se_root)) {
1335 1335                          avl_remove(&sdp->sd_snaps, sep);
1336 1336                          error = zfsctl_unmount_snap(sep, fflags, cr);
1337 1337                          if (error) {
1338 1338                                  avl_add(&sdp->sd_snaps, sep);
1339 1339                                  break;
1340 1340                          }
1341 1341                  }
1342 1342                  sep = next;
1343 1343          }
1344 1344  
1345 1345          mutex_exit(&sdp->sd_lock);
1346 1346          VN_RELE(dvp);
1347 1347  
1348 1348          return (error);
1349 1349  }

↓ open down ↓

586 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX