Print this page
OS-1566 filesystem limits for ZFS datasets

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/dsl_dataset.c
          +++ new/usr/src/uts/common/fs/zfs/dsl_dataset.c
↓ open down ↓ 37 lines elided ↑ open up ↑
  38   38  #include <sys/zfeature.h>
  39   39  #include <sys/unique.h>
  40   40  #include <sys/zfs_context.h>
  41   41  #include <sys/zfs_ioctl.h>
  42   42  #include <sys/spa.h>
  43   43  #include <sys/zfs_znode.h>
  44   44  #include <sys/zfs_onexit.h>
  45   45  #include <sys/zvol.h>
  46   46  #include <sys/dsl_scan.h>
  47   47  #include <sys/dsl_deadlist.h>
       48 +#include "zfs_prop.h"
  48   49  
  49   50  static char *dsl_reaper = "the grim reaper";
  50   51  
  51   52  static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
  52   53  static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
  53   54  static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
  54   55  
  55   56  #define SWITCH64(x, y) \
  56   57          { \
  57   58                  uint64_t __tmp = (x); \
↓ open down ↓ 266 lines elided ↑ open up ↑
 324  325                  mt = MT_EXACT;
 325  326  
 326  327          err = zap_lookup_norm(mos, snapobj, name, 8, 1,
 327  328              value, mt, NULL, 0, NULL);
 328  329          if (err == ENOTSUP && mt == MT_FIRST)
 329  330                  err = zap_lookup(mos, snapobj, name, 8, 1, value);
 330  331          return (err);
 331  332  }
 332  333  
 333  334  static int
 334      -dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
      335 +dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx,
      336 +    boolean_t adj_cnt)
 335  337  {
 336  338          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 337  339          uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 338  340          matchtype_t mt;
 339  341          int err;
 340  342  
 341  343          dsl_dir_snap_cmtime_update(ds->ds_dir);
 342  344  
 343  345          if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 344  346                  mt = MT_FIRST;
 345  347          else
 346  348                  mt = MT_EXACT;
 347  349  
 348  350          err = zap_remove_norm(mos, snapobj, name, mt, tx);
 349  351          if (err == ENOTSUP && mt == MT_FIRST)
 350  352                  err = zap_remove(mos, snapobj, name, tx);
      353 +
      354 +        if (err == 0 && adj_cnt)
      355 +                dsl_snapcount_adjust(ds->ds_dir, tx, -1, B_TRUE);
      356 +
 351  357          return (err);
 352  358  }
 353  359  
 354  360  static int
 355  361  dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
 356  362      dsl_dataset_t **dsp)
 357  363  {
 358  364          objset_t *mos = dp->dp_meta_objset;
 359  365          dmu_buf_t *dbuf;
 360  366          dsl_dataset_t *ds;
↓ open down ↓ 1579 lines elided ↑ open up ↑
1940 1946  #ifdef ZFS_DEBUG
1941 1947                  {
1942 1948                          uint64_t val;
1943 1949  
1944 1950                          err = dsl_dataset_snap_lookup(ds_head,
1945 1951                              ds->ds_snapname, &val);
1946 1952                          ASSERT0(err);
1947 1953                          ASSERT3U(val, ==, obj);
1948 1954                  }
1949 1955  #endif
1950      -                err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);
     1956 +                err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx,
     1957 +                    B_TRUE);
1951 1958                  ASSERT(err == 0);
1952 1959                  dsl_dataset_rele(ds_head, FTAG);
1953 1960          }
1954 1961  
1955 1962          if (ds_prev && ds->ds_prev != ds_prev)
1956 1963                  dsl_dataset_rele(ds_prev, FTAG);
1957 1964  
1958 1965          spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
1959 1966  
1960 1967          if (ds->ds_phys->ds_next_clones_obj != 0) {
↓ open down ↓ 44 lines elided ↑ open up ↑
2005 2012          /*
2006 2013           * Propagate any reserved space for this snapshot to other
2007 2014           * snapshot checks in this sync group.
2008 2015           */
2009 2016          if (asize > 0)
2010 2017                  dsl_dir_willuse_space(ds->ds_dir, asize, tx);
2011 2018  
2012 2019          return (0);
2013 2020  }
2014 2021  
     2022 +/*
     2023 + * Check if adding additional snapshot(s) would exceed any snapshot limits.
     2024 + * Note that all snapshot limits up to the root dataset (i.e. the pool itself)
     2025 + * or the given ancestor must be satisfied. Note that it is valid for the
     2026 + * count to exceed the limit. This can happen if a snapshot is taken by an
     2027 + * administrative user in the global zone (e.g. a recursive snapshot by root).
     2028 + */
2015 2029  int
     2030 +dsl_snapcount_check(dsl_dir_t *dd, uint64_t cnt, dsl_dir_t *ancestor,
     2031 +    cred_t *cr)
     2032 +{
     2033 +        uint64_t limit;
     2034 +        int err = 0;
     2035 +
     2036 +        VERIFY(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
     2037 +
     2038 +        /* If we're allowed to change the limit, don't enforce the limit. */
     2039 +        if (dsl_secpolicy_write_prop(dd, ZFS_PROP_SNAPSHOT_LIMIT, cr) == 0)
     2040 +                return (0);
     2041 +
     2042 +        /*
     2043 +         * If renaming a dataset with no snapshots, count adjustment is 0.
     2044 +         */
     2045 +        if (cnt == 0)
     2046 +                return (0);
     2047 +
     2048 +        /*
     2049 +         * If an ancestor has been provided, stop checking the limit once we
     2050 +         * hit that dir. We need this during rename so that we don't overcount
     2051 +         * the check once we recurse up to the common ancestor.
     2052 +         */
     2053 +        if (ancestor == dd)
     2054 +                return (0);
     2055 +
     2056 +        /*
     2057 +         * If we hit an uninitialized node while recursing up the tree, we can
     2058 +         * stop since we know the counts are not valid on this node and we
     2059 +         * know we won't touch this node's counts. We also know that the counts
     2060 +         * on the nodes above this one are uninitialized and that there cannot
     2061 +         * be a limit set on any of those nodes.
     2062 +         */
     2063 +        if (dd->dd_phys->dd_filesystem_count == 0)
     2064 +                return (0);
     2065 +
     2066 +        err = dsl_prop_get_dd(dd, zfs_prop_to_name(ZFS_PROP_SNAPSHOT_LIMIT),
     2067 +            8, 1, &limit, NULL, B_FALSE);
     2068 +        if (err != 0)
     2069 +                return (err);
     2070 +
     2071 +        /* Is there a snapshot limit which we've hit? */
     2072 +        if ((dd->dd_phys->dd_snapshot_count + cnt) > limit)
     2073 +                return (EDQUOT);
     2074 +
     2075 +        if (dd->dd_parent != NULL)
     2076 +                err = dsl_snapcount_check(dd->dd_parent, cnt, ancestor, cr);
     2077 +
     2078 +        return (err);
     2079 +}
     2080 +
     2081 +/*
     2082 + * Adjust the snapshot count for the specified dsl_dir_t and all parents.
     2083 + * When a new snapshot is created, increment the count on all parents, and when
     2084 + * a snapshot is destroyed, decrement the count.
     2085 + */
     2086 +void
     2087 +dsl_snapcount_adjust(dsl_dir_t *dd, dmu_tx_t *tx, int64_t delta,
     2088 +    boolean_t first)
     2089 +{
     2090 +        if (first) {
     2091 +                VERIFY(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
     2092 +                VERIFY(dmu_tx_is_syncing(tx));
     2093 +        }
     2094 +
     2095 +        /*
     2096 +         * If we hit an uninitialized node while recursing up the tree, we can
     2097 +         * stop since we know the counts are not valid on this node and we
     2098 +         * know we shouldn't touch this node's counts. An uninitialized count
     2099 +         * on the node indicates that either the feature has not yet been
     2100 +         * activated or there are no limits on this part of the tree.
     2101 +         */
     2102 +        if (dd->dd_phys->dd_filesystem_count == 0)
     2103 +                return;
     2104 +
     2105 +        /* if renaming a dataset with no snapshots, count adjustment is 0 */
     2106 +        if (delta == 0)
     2107 +                return;
     2108 +
     2109 +        /*
     2110 +         * On initial entry we need to check if this feature is active, but
     2111 +         * we don't want to re-check this on each recursive call. Note: the
     2112 +         * feature cannot be active if it's not enabled. If the feature is not
     2113 +         * active, don't touch the on-disk count fields.
     2114 +         */
     2115 +        if (first) {
     2116 +                zfeature_info_t *quota_feat =
     2117 +                    &spa_feature_table[SPA_FEATURE_FS_SS_LIMIT];
     2118 +
     2119 +                if (!spa_feature_is_active(dd->dd_pool->dp_spa, quota_feat))
     2120 +                        return;
     2121 +        }
     2122 +
     2123 +        dmu_buf_will_dirty(dd->dd_dbuf, tx);
     2124 +
     2125 +        mutex_enter(&dd->dd_lock);
     2126 +
     2127 +        dd->dd_phys->dd_snapshot_count += delta;
     2128 +        VERIFY(dd->dd_phys->dd_snapshot_count >= 0);
     2129 +
     2130 +        /* Roll up this additional count into our ancestors */
     2131 +        if (dd->dd_parent != NULL)
     2132 +                dsl_snapcount_adjust(dd->dd_parent, tx, delta, B_FALSE);
     2133 +
     2134 +        mutex_exit(&dd->dd_lock);
     2135 +}
     2136 +
     2137 +int
2016 2138  dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
2017      -    dmu_tx_t *tx)
     2139 +    uint64_t cnt, dmu_tx_t *tx, cred_t *cr)
2018 2140  {
2019 2141          int err;
2020 2142          uint64_t value;
2021 2143  
2022 2144          /*
2023 2145           * We don't allow multiple snapshots of the same txg.  If there
2024 2146           * is already one, try again.
2025 2147           */
2026 2148          if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
2027 2149                  return (EAGAIN);
↓ open down ↓ 7 lines elided ↑ open up ↑
2035 2157          if (err != ENOENT)
2036 2158                  return (err);
2037 2159  
2038 2160          /*
2039 2161           * Check that the dataset's name is not too long.  Name consists
2040 2162           * of the dataset's length + 1 for the @-sign + snapshot name's length
2041 2163           */
2042 2164          if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
2043 2165                  return (ENAMETOOLONG);
2044 2166  
     2167 +        err = dsl_snapcount_check(ds->ds_dir, cnt, NULL, cr);
     2168 +        if (err)
     2169 +                return (err);
     2170 +
2045 2171          err = dsl_dataset_snapshot_reserve_space(ds, tx);
2046 2172          if (err)
2047 2173                  return (err);
2048 2174  
2049 2175          ds->ds_trysnap_txg = tx->tx_txg;
2050 2176          return (0);
2051 2177  }
2052 2178  
2053 2179  void
2054 2180  dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
↓ open down ↓ 1 lines elided ↑ open up ↑
2056 2182  {
2057 2183          dsl_pool_t *dp = ds->ds_dir->dd_pool;
2058 2184          dmu_buf_t *dbuf;
2059 2185          dsl_dataset_phys_t *dsphys;
2060 2186          uint64_t dsobj, crtxg;
2061 2187          objset_t *mos = dp->dp_meta_objset;
2062 2188          int err;
2063 2189  
2064 2190          ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
2065 2191  
     2192 +        dsl_snapcount_adjust(ds->ds_dir, tx, 1, B_TRUE);
     2193 +
2066 2194          /*
2067 2195           * The origin's ds_creation_txg has to be < TXG_INITIAL
2068 2196           */
2069 2197          if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
2070 2198                  crtxg = 1;
2071 2199          else
2072 2200                  crtxg = tx->tx_txg;
2073 2201  
2074 2202          dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
2075 2203              DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
↓ open down ↓ 353 lines elided ↑ open up ↑
2429 2557          objset_t *mos = dd->dd_pool->dp_meta_objset;
2430 2558          dsl_dataset_t *hds;
2431 2559          int err;
2432 2560  
2433 2561          ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
2434 2562  
2435 2563          VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
2436 2564              dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
2437 2565  
2438 2566          VERIFY(0 == dsl_dataset_get_snapname(ds));
2439      -        err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
     2567 +        err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx, B_FALSE);
2440 2568          ASSERT0(err);
2441 2569          mutex_enter(&ds->ds_lock);
2442 2570          (void) strcpy(ds->ds_snapname, newsnapname);
2443 2571          mutex_exit(&ds->ds_lock);
2444 2572          err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
2445 2573              ds->ds_snapname, 8, 1, &ds->ds_object, tx);
2446 2574          ASSERT0(err);
2447 2575  
2448 2576          spa_history_log_internal_ds(ds, "rename", tx,
2449 2577              "-> @%s", newsnapname);
↓ open down ↓ 174 lines elided ↑ open up ↑
2624 2752  struct promotenode {
2625 2753          list_node_t link;
2626 2754          dsl_dataset_t *ds;
2627 2755  };
2628 2756  
2629 2757  struct promotearg {
2630 2758          list_t shared_snaps, origin_snaps, clone_snaps;
2631 2759          dsl_dataset_t *origin_origin;
2632 2760          uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
2633 2761          char *err_ds;
     2762 +        cred_t *cr;
2634 2763  };
2635 2764  
2636 2765  static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
2637 2766  static boolean_t snaplist_unstable(list_t *l);
2638 2767  
2639 2768  static int
2640 2769  dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
2641 2770  {
2642 2771          dsl_dataset_t *hds = arg1;
2643 2772          struct promotearg *pa = arg2;
↓ open down ↓ 67 lines elided ↑ open up ↑
2711 2840          /*
2712 2841           * If we are a clone of a clone then we never reached ORIGIN,
2713 2842           * so we need to subtract out the clone origin's used space.
2714 2843           */
2715 2844          if (pa->origin_origin) {
2716 2845                  pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
2717 2846                  pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
2718 2847                  pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
2719 2848          }
2720 2849  
2721      -        /* Check that there is enough space here */
     2850 +        /* Check that there is enough space and limit headroom here */
2722 2851          err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
2723      -            pa->used);
     2852 +            origin_ds->ds_dir, pa->used, pa->cr);
2724 2853          if (err)
2725 2854                  return (err);
2726 2855  
2727 2856          /*
2728 2857           * Compute the amounts of space that will be used by snapshots
2729 2858           * after the promotion (for both origin and clone).  For each,
2730 2859           * it is the amount of space that will be on all of their
2731 2860           * deadlists (that was not born before their new origin).
2732 2861           */
2733 2862          if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
↓ open down ↓ 108 lines elided ↑ open up ↑
2842 2971                  dsl_dataset_t *ds = snap->ds;
2843 2972  
2844 2973                  /* unregister props as dsl_dir is changing */
2845 2974                  if (ds->ds_objset) {
2846 2975                          dmu_objset_evict(ds->ds_objset);
2847 2976                          ds->ds_objset = NULL;
2848 2977                  }
2849 2978                  /* move snap name entry */
2850 2979                  VERIFY(0 == dsl_dataset_get_snapname(ds));
2851 2980                  VERIFY(0 == dsl_dataset_snap_remove(origin_head,
2852      -                    ds->ds_snapname, tx));
     2981 +                    ds->ds_snapname, tx, B_TRUE));
2853 2982                  VERIFY(0 == zap_add(dp->dp_meta_objset,
2854 2983                      hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
2855 2984                      8, 1, &ds->ds_object, tx));
     2985 +                dsl_snapcount_adjust(hds->ds_dir, tx, 1, B_TRUE);
2856 2986  
2857 2987                  /* change containing dsl_dir */
2858 2988                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
2859 2989                  ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
2860 2990                  ds->ds_phys->ds_dir_obj = dd->dd_object;
2861 2991                  ASSERT3P(ds->ds_dir, ==, odd);
2862 2992                  dsl_dir_close(ds->ds_dir, ds);
2863 2993                  VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
2864 2994                      NULL, ds, &ds->ds_dir));
2865 2995  
↓ open down ↓ 217 lines elided ↑ open up ↑
3083 3213          if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) {
3084 3214                  err = dsl_dataset_hold_obj(dp,
3085 3215                      snap->ds->ds_dir->dd_phys->dd_origin_obj,
3086 3216                      FTAG, &pa.origin_origin);
3087 3217                  if (err != 0)
3088 3218                          goto out;
3089 3219          }
3090 3220  
3091 3221  out:
3092 3222          rw_exit(&dp->dp_config_rwlock);
     3223 +        pa.cr = CRED();
3093 3224  
3094 3225          /*
3095 3226           * Add in 128x the snapnames zapobj size, since we will be moving
3096 3227           * a bunch of snapnames to the promoted ds, and dirtying their
3097 3228           * bonus buffers.
3098 3229           */
3099 3230          if (err == 0) {
3100 3231                  err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
3101 3232                      dsl_dataset_promote_sync, ds, &pa,
3102 3233                      2 + 2 * doi.doi_physical_blocks_512);
↓ open down ↓ 1191 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX