illumos Cdiff usr/src/uts/common/fs/zfs/dsl

Print this page

OS-1566 filesystem limits for ZFS datasets


*** 43,52 ****
--- 43,53 ----
  #include <sys/zfs_znode.h>
  #include <sys/zfs_onexit.h>
  #include <sys/zvol.h>
  #include <sys/dsl_scan.h>
  #include <sys/dsl_deadlist.h>
+ #include "zfs_prop.h"
  
  static char *dsl_reaper = "the grim reaper";
  
  static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
  static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
*** 329,339 ****
                  err = zap_lookup(mos, snapobj, name, 8, 1, value);
          return (err);
  }
  
  static int
! dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
  {
          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
          uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
          matchtype_t mt;
          int err;
--- 330,341 ----
                  err = zap_lookup(mos, snapobj, name, 8, 1, value);
          return (err);
  }
  
  static int
! dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx,
!     boolean_t adj_cnt)
  {
          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
          uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
          matchtype_t mt;
          int err;
*** 346,355 ****
--- 348,361 ----
                  mt = MT_EXACT;
  
          err = zap_remove_norm(mos, snapobj, name, mt, tx);
          if (err == ENOTSUP && mt == MT_FIRST)
                  err = zap_remove(mos, snapobj, name, tx);
+ 
+         if (err == 0 && adj_cnt)
+                 dsl_snapcount_adjust(ds->ds_dir, tx, -1, B_TRUE);
+ 
          return (err);
  }
  
  static int
  dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
*** 1945,1955 ****
                              ds->ds_snapname, &val);
                          ASSERT0(err);
                          ASSERT3U(val, ==, obj);
                  }
  #endif
!                 err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);
                  ASSERT(err == 0);
                  dsl_dataset_rele(ds_head, FTAG);
          }
  
          if (ds_prev && ds->ds_prev != ds_prev)
--- 1951,1962 ----
                              ds->ds_snapname, &val);
                          ASSERT0(err);
                          ASSERT3U(val, ==, obj);
                  }
  #endif
!                 err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx,
!                     B_TRUE);
                  ASSERT(err == 0);
                  dsl_dataset_rele(ds_head, FTAG);
          }
  
          if (ds_prev && ds->ds_prev != ds_prev)
*** 2010,2022 ****
                  dsl_dir_willuse_space(ds->ds_dir, asize, tx);
  
          return (0);
  }
  
  int
  dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
!     dmu_tx_t *tx)
  {
          int err;
          uint64_t value;
  
          /*
--- 2017,2144 ----
                  dsl_dir_willuse_space(ds->ds_dir, asize, tx);
  
          return (0);
  }
  
+ /*
+  * Check if adding additional snapshot(s) would exceed any snapshot limits.
+  * Note that all snapshot limits up to the root dataset (i.e. the pool itself)
+  * or the given ancestor must be satisfied. Note that it is valid for the
+  * count to exceed the limit. This can happen if a snapshot is taken by an
+  * administrative user in the global zone (e.g. a recursive snapshot by root).
+  */
  int
+ dsl_snapcount_check(dsl_dir_t *dd, uint64_t cnt, dsl_dir_t *ancestor,
+     cred_t *cr)
+ {
+         uint64_t limit;
+         int err = 0;
+ 
+         VERIFY(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
+ 
+         /* If we're allowed to change the limit, don't enforce the limit. */
+         if (dsl_secpolicy_write_prop(dd, ZFS_PROP_SNAPSHOT_LIMIT, cr) == 0)
+                 return (0);
+ 
+         /*
+          * If renaming a dataset with no snapshots, count adjustment is 0.
+          */
+         if (cnt == 0)
+                 return (0);
+ 
+         /*
+          * If an ancestor has been provided, stop checking the limit once we
+          * hit that dir. We need this during rename so that we don't overcount
+          * the check once we recurse up to the common ancestor.
+          */
+         if (ancestor == dd)
+                 return (0);
+ 
+         /*
+          * If we hit an uninitialized node while recursing up the tree, we can
+          * stop since we know the counts are not valid on this node and we
+          * know we won't touch this node's counts. We also know that the counts
+          * on the nodes above this one are uninitialized and that there cannot
+          * be a limit set on any of those nodes.
+          */
+         if (dd->dd_phys->dd_filesystem_count == 0)
+                 return (0);
+ 
+         err = dsl_prop_get_dd(dd, zfs_prop_to_name(ZFS_PROP_SNAPSHOT_LIMIT),
+             8, 1, &limit, NULL, B_FALSE);
+         if (err != 0)
+                 return (err);
+ 
+         /* Is there a snapshot limit which we've hit? */
+         if ((dd->dd_phys->dd_snapshot_count + cnt) > limit)
+                 return (EDQUOT);
+ 
+         if (dd->dd_parent != NULL)
+                 err = dsl_snapcount_check(dd->dd_parent, cnt, ancestor, cr);
+ 
+         return (err);
+ }
+ 
+ /*
+  * Adjust the snapshot count for the specified dsl_dir_t and all parents.
+  * When a new snapshot is created, increment the count on all parents, and when
+  * a snapshot is destroyed, decrement the count.
+  */
+ void
+ dsl_snapcount_adjust(dsl_dir_t *dd, dmu_tx_t *tx, int64_t delta,
+     boolean_t first)
+ {
+         if (first) {
+                 VERIFY(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
+                 VERIFY(dmu_tx_is_syncing(tx));
+         }
+ 
+         /*
+          * If we hit an uninitialized node while recursing up the tree, we can
+          * stop since we know the counts are not valid on this node and we
+          * know we shouldn't touch this node's counts. An uninitialized count
+          * on the node indicates that either the feature has not yet been
+          * activated or there are no limits on this part of the tree.
+          */
+         if (dd->dd_phys->dd_filesystem_count == 0)
+                 return;
+ 
+         /* if renaming a dataset with no snapshots, count adjustment is 0 */
+         if (delta == 0)
+                 return;
+ 
+         /*
+          * On initial entry we need to check if this feature is active, but
+          * we don't want to re-check this on each recursive call. Note: the
+          * feature cannot be active if it's not enabled. If the feature is not
+          * active, don't touch the on-disk count fields.
+          */
+         if (first) {
+                 zfeature_info_t *quota_feat =
+                     &spa_feature_table[SPA_FEATURE_FS_SS_LIMIT];
+ 
+                 if (!spa_feature_is_active(dd->dd_pool->dp_spa, quota_feat))
+                         return;
+         }
+ 
+         dmu_buf_will_dirty(dd->dd_dbuf, tx);
+ 
+         mutex_enter(&dd->dd_lock);
+ 
+         dd->dd_phys->dd_snapshot_count += delta;
+         VERIFY(dd->dd_phys->dd_snapshot_count >= 0);
+ 
+         /* Roll up this additional count into our ancestors */
+         if (dd->dd_parent != NULL)
+                 dsl_snapcount_adjust(dd->dd_parent, tx, delta, B_FALSE);
+ 
+         mutex_exit(&dd->dd_lock);
+ }
+ 
+ int
  dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
!     uint64_t cnt, dmu_tx_t *tx, cred_t *cr)
  {
          int err;
          uint64_t value;
  
          /*
*** 2040,2049 ****
--- 2162,2175 ----
           * of the dataset's length + 1 for the @-sign + snapshot name's length
           */
          if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
                  return (ENAMETOOLONG);
  
+         err = dsl_snapcount_check(ds->ds_dir, cnt, NULL, cr);
+         if (err)
+                 return (err);
+ 
          err = dsl_dataset_snapshot_reserve_space(ds, tx);
          if (err)
                  return (err);
  
          ds->ds_trysnap_txg = tx->tx_txg;
*** 2061,2070 ****
--- 2187,2198 ----
          objset_t *mos = dp->dp_meta_objset;
          int err;
  
          ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
  
+         dsl_snapcount_adjust(ds->ds_dir, tx, 1, B_TRUE);
+ 
          /*
           * The origin's ds_creation_txg has to be < TXG_INITIAL
           */
          if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
                  crtxg = 1;
*** 2434,2444 ****
  
          VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
              dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
  
          VERIFY(0 == dsl_dataset_get_snapname(ds));
!         err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
          ASSERT0(err);
          mutex_enter(&ds->ds_lock);
          (void) strcpy(ds->ds_snapname, newsnapname);
          mutex_exit(&ds->ds_lock);
          err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
--- 2562,2572 ----
  
          VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
              dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
  
          VERIFY(0 == dsl_dataset_get_snapname(ds));
!         err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx, B_FALSE);
          ASSERT0(err);
          mutex_enter(&ds->ds_lock);
          (void) strcpy(ds->ds_snapname, newsnapname);
          mutex_exit(&ds->ds_lock);
          err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
*** 2629,2638 ****
--- 2757,2767 ----
  struct promotearg {
          list_t shared_snaps, origin_snaps, clone_snaps;
          dsl_dataset_t *origin_origin;
          uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
          char *err_ds;
+         cred_t *cr;
  };
  
  static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
  static boolean_t snaplist_unstable(list_t *l);
  
*** 2716,2728 ****
                  pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
                  pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
                  pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
          }
  
!         /* Check that there is enough space here */
          err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
!             pa->used);
          if (err)
                  return (err);
  
          /*
           * Compute the amounts of space that will be used by snapshots
--- 2845,2857 ----
                  pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
                  pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
                  pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
          }
  
!         /* Check that there is enough space and limit headroom here */
          err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
!             origin_ds->ds_dir, pa->used, pa->cr);
          if (err)
                  return (err);
  
          /*
           * Compute the amounts of space that will be used by snapshots
*** 2847,2860 ****
                          ds->ds_objset = NULL;
                  }
                  /* move snap name entry */
                  VERIFY(0 == dsl_dataset_get_snapname(ds));
                  VERIFY(0 == dsl_dataset_snap_remove(origin_head,
!                     ds->ds_snapname, tx));
                  VERIFY(0 == zap_add(dp->dp_meta_objset,
                      hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
                      8, 1, &ds->ds_object, tx));
  
                  /* change containing dsl_dir */
                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
                  ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
                  ds->ds_phys->ds_dir_obj = dd->dd_object;
--- 2976,2990 ----
                          ds->ds_objset = NULL;
                  }
                  /* move snap name entry */
                  VERIFY(0 == dsl_dataset_get_snapname(ds));
                  VERIFY(0 == dsl_dataset_snap_remove(origin_head,
!                     ds->ds_snapname, tx, B_TRUE));
                  VERIFY(0 == zap_add(dp->dp_meta_objset,
                      hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
                      8, 1, &ds->ds_object, tx));
+                 dsl_snapcount_adjust(hds->ds_dir, tx, 1, B_TRUE);
  
                  /* change containing dsl_dir */
                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
                  ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
                  ds->ds_phys->ds_dir_obj = dd->dd_object;
*** 3088,3097 ****
--- 3218,3228 ----
                          goto out;
          }
  
  out:
          rw_exit(&dp->dp_config_rwlock);
+         pa.cr = CRED();
  
          /*
           * Add in 128x the snapnames zapobj size, since we will be moving
           * a bunch of snapnames to the promoted ds, and dirtying their
           * bonus buffers.