Print this page
OS-1566 dataset quota for ZFS datasets

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/dsl_dataset.c
          +++ new/usr/src/uts/common/fs/zfs/dsl_dataset.c
↓ open down ↓ 340 lines elided ↑ open up ↑
 341  341          dsl_dir_snap_cmtime_update(ds->ds_dir);
 342  342  
 343  343          if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 344  344                  mt = MT_FIRST;
 345  345          else
 346  346                  mt = MT_EXACT;
 347  347  
 348  348          err = zap_remove_norm(mos, snapobj, name, mt, tx);
 349  349          if (err == ENOTSUP && mt == MT_FIRST)
 350  350                  err = zap_remove(mos, snapobj, name, tx);
      351 +
      352 +        if (err == 0)
      353 +                dsl_snapcount_adjust(ds->ds_dir, tx, -1, B_TRUE);
      354 +
 351  355          return (err);
 352  356  }
 353  357  
 354  358  static int
 355  359  dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
 356  360      dsl_dataset_t **dsp)
 357  361  {
 358  362          objset_t *mos = dp->dp_meta_objset;
 359  363          dmu_buf_t *dbuf;
 360  364          dsl_dataset_t *ds;
↓ open down ↓ 768 lines elided ↑ open up ↑
1129 1133                          if (err) {
1130 1134                                  dsl_dir_close(dd, FTAG);
1131 1135                                  goto out;
1132 1136                          }
1133 1137                  }
1134 1138  
1135 1139                  dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
1136 1140                  dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
1137 1141                      dsl_dataset_destroy_sync, &dsda, tag, 0);
1138 1142                  dsl_sync_task_create(dstg, dsl_dir_destroy_check,
1139      -                    dsl_dir_destroy_sync, dd, FTAG, 0);
     1143 +                    dsl_dir_destroy_sync, dd, tag, 0);
1140 1144                  err = dsl_sync_task_group_wait(dstg);
1141 1145                  dsl_sync_task_group_destroy(dstg);
1142 1146  
1143 1147                  /*
1144 1148                   * We could be racing against 'zfs release' or 'zfs destroy -d'
1145 1149                   * on the origin snap, in which case we can get EBUSY if we
1146 1150                   * needed to destroy the origin snap but were not ready to
1147 1151                   * do so.
1148 1152                   */
1149 1153                  if (dsda.need_prep) {
↓ open down ↓ 855 lines elided ↑ open up ↑
2005 2009          /*
2006 2010           * Propagate any reserved space for this snapshot to other
2007 2011           * snapshot checks in this sync group.
2008 2012           */
2009 2013          if (asize > 0)
2010 2014                  dsl_dir_willuse_space(ds->ds_dir, asize, tx);
2011 2015  
2012 2016          return (0);
2013 2017  }
2014 2018  
     2019 +/*
     2020 + * Check if adding additional snapshot(s) would exceed any snapshot quotas.
     2021 + * Note that all snapshot quotas up to the root dataset (i.e. the pool itself)
     2022 + * or the given ancestor must be satisfied. Note that it is valid for the
     2023 + * count to exceed the quota. This can happen if a recursive snapshot is taken
     2024 + * from a dataset above this one.
     2025 + */
2015 2026  int
     2027 +dsl_snapcount_check(dsl_dir_t *dd, dmu_tx_t *tx, uint64_t cnt,
     2028 +    dsl_dir_t *ancestor)
     2029 +{
     2030 +        uint64_t quota;
     2031 +        int err = 0;
     2032 +
     2033 +        /*
     2034 +         * As with dsl_dataset_set_reservation_check(), don't run this check in
     2035 +         * open context.
     2036 +         */
     2037 +        if (!dmu_tx_is_syncing(tx))
     2038 +                return (0);
     2039 +
     2040 +        /*
     2041 +         * If renaming a dataset with no snapshots, count adjustment is 0.
     2042 +         * Likewise when taking a recursive snapshot below the top-level (see
     2043 +         * the comment in snapshot_check() for more details).
     2044 +         */
     2045 +        if (cnt == 0)
     2046 +                return (0);
     2047 +
     2048 +        /*
     2049 +         * If an ancestor has been provided, stop checking the quota once we
     2050 +         * hit that dir. We need this during rename so that we don't overcount
     2051 +         * the check once we recurse up to the common ancestor.
     2052 +         */
     2053 +        if (ancestor == dd)
     2054 +                return (0);
     2055 +
     2056 +        /*
     2057 +         * If there's no value for this property, there's no need to enforce a
     2058 +         * snapshot quota.
     2059 +         */
     2060 +        err = dsl_prop_get_dd(dd, zfs_prop_to_name(ZFS_PROP_SNAPSHOT_QUOTA),
     2061 +            8, 1, &quota, NULL, B_FALSE);
     2062 +        if (err == ENOENT)
     2063 +                return (0);
     2064 +        else if (err != 0)
     2065 +                return (err);
     2066 +
     2067 +#ifdef _KERNEL
     2068 +        extern void __dtrace_probe_zfs__ss__quota(uint64_t, uint64_t, char *);
     2069 +        __dtrace_probe_zfs__ss__quota(
     2070 +            (uint64_t)dd->dd_phys->dd_snapshot_count, (uint64_t)quota,
     2071 +            dd->dd_myname);
     2072 +#endif
     2073 +
     2074 +        if (quota > 0 && (dd->dd_phys->dd_snapshot_count + cnt) > quota)
     2075 +                return (EDQUOT);
     2076 +
     2077 +        if (dd->dd_parent != NULL)
     2078 +                err = dsl_snapcount_check(dd->dd_parent, tx, cnt, ancestor);
     2079 +
     2080 +        return (err);
     2081 +}
     2082 +
     2083 +/*
     2084 + * Adjust the snapshot count for the specified dsl_dir_t and all parents.
     2085 + * When a new snapshot is created, increment the count on all parents, and when
     2086 + * a snapshot is destroyed, decrement the count.
     2087 + */
     2088 +void
     2089 +dsl_snapcount_adjust(dsl_dir_t *dd, dmu_tx_t *tx, int64_t delta,
     2090 +    boolean_t first)
     2091 +{
     2092 +        /*
     2093 +         * On initial entry we need to check if this feature is active, but
     2094 +         * we don't want to re-check this on each recursive call. Note: the
     2095 +         * feature cannot be active if its not enabled. If the feature is not
     2096 +         * active, don't touch the on-disk count fields.
     2097 +         */
     2098 +        if (first) {
     2099 +                dsl_dataset_t *ds = NULL;
     2100 +                spa_t *spa;
     2101 +                zfeature_info_t *quota_feat =
     2102 +                    &spa_feature_table[SPA_FEATURE_DS_SS_QUOTA];
     2103 +
     2104 +                VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
     2105 +                    dd->dd_phys->dd_head_dataset_obj, FTAG, &ds));
     2106 +                spa = dsl_dataset_get_spa(ds);
     2107 +                dsl_dataset_rele(ds, FTAG);
     2108 +                if (!spa_feature_is_active(spa, quota_feat))
     2109 +                        return;
     2110 +        }
     2111 +
     2112 +        /*
     2113 +         * As with dsl_dataset_set_reservation_check(), wdon't want to run
     2114 +         * this check in open context.
     2115 +         */
     2116 +        if (!dmu_tx_is_syncing(tx))
     2117 +                return;
     2118 +
     2119 +        /* if renaming a dataset with no snapshots, count adjustment is 0 */
     2120 +        if (delta == 0)
     2121 +                return;
     2122 +
     2123 +        /* Increment count for parent */
     2124 +        dmu_buf_will_dirty(dd->dd_dbuf, tx);
     2125 +
     2126 +        mutex_enter(&dd->dd_lock);
     2127 +
     2128 +        /*
     2129 +         * Counts may be incorrect if dealing with an existing pool and
     2130 +         * there has never been a quota set in the dataset hierarchy.
     2131 +         * This is not an error.
     2132 +         */
     2133 +        if (delta < 0 && dd->dd_phys->dd_snapshot_count < (delta * -1)) {
     2134 +#ifdef _KERNEL
     2135 +                extern void __dtrace_probe_zfs__sscnt__adj__neg(char *);
     2136 +                __dtrace_probe_zfs__sscnt__adj__neg(dd->dd_myname);
     2137 +#endif
     2138 +                mutex_exit(&dd->dd_lock);
     2139 +                return;
     2140 +        }
     2141 +
     2142 +        dd->dd_phys->dd_snapshot_count += delta;
     2143 +
     2144 +        /* Roll up this additional count into our ancestors */
     2145 +
     2146 +        if (dd->dd_parent != NULL)
     2147 +                dsl_snapcount_adjust(dd->dd_parent, tx, delta, B_FALSE);
     2148 +
     2149 +        mutex_exit(&dd->dd_lock);
     2150 +}
     2151 +
     2152 +int
2016 2153  dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
2017      -    dmu_tx_t *tx)
     2154 +    uint64_t cnt, dmu_tx_t *tx)
2018 2155  {
2019 2156          int err;
2020 2157          uint64_t value;
2021 2158  
2022 2159          /*
2023 2160           * We don't allow multiple snapshots of the same txg.  If there
2024 2161           * is already one, try again.
2025 2162           */
2026 2163          if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
2027 2164                  return (EAGAIN);
↓ open down ↓ 7 lines elided ↑ open up ↑
2035 2172          if (err != ENOENT)
2036 2173                  return (err);
2037 2174  
2038 2175          /*
2039 2176           * Check that the dataset's name is not too long.  Name consists
2040 2177           * of the dataset's length + 1 for the @-sign + snapshot name's length
2041 2178           */
2042 2179          if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
2043 2180                  return (ENAMETOOLONG);
2044 2181  
     2182 +        err = dsl_snapcount_check(ds->ds_dir, tx, cnt, NULL);
     2183 +        if (err)
     2184 +                return (err);
     2185 +
2045 2186          err = dsl_dataset_snapshot_reserve_space(ds, tx);
2046 2187          if (err)
2047 2188                  return (err);
2048 2189  
2049 2190          ds->ds_trysnap_txg = tx->tx_txg;
2050 2191          return (0);
2051 2192  }
2052 2193  
2053 2194  void
2054 2195  dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
↓ open down ↓ 1 lines elided ↑ open up ↑
2056 2197  {
2057 2198          dsl_pool_t *dp = ds->ds_dir->dd_pool;
2058 2199          dmu_buf_t *dbuf;
2059 2200          dsl_dataset_phys_t *dsphys;
2060 2201          uint64_t dsobj, crtxg;
2061 2202          objset_t *mos = dp->dp_meta_objset;
2062 2203          int err;
2063 2204  
2064 2205          ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
2065 2206  
     2207 +        dsl_snapcount_adjust(ds->ds_dir, tx, 1, B_TRUE);
     2208 +
2066 2209          /*
2067 2210           * The origin's ds_creation_txg has to be < TXG_INITIAL
2068 2211           */
2069 2212          if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
2070 2213                  crtxg = 1;
2071 2214          else
2072 2215                  crtxg = tx->tx_txg;
2073 2216  
2074 2217          dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
2075 2218              DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
↓ open down ↓ 635 lines elided ↑ open up ↑
2711 2854          /*
2712 2855           * If we are a clone of a clone then we never reached ORIGIN,
2713 2856           * so we need to subtract out the clone origin's used space.
2714 2857           */
2715 2858          if (pa->origin_origin) {
2716 2859                  pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
2717 2860                  pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
2718 2861                  pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
2719 2862          }
2720 2863  
2721      -        /* Check that there is enough space here */
     2864 +        /* Check that there is enough space and quota headroom here */
2722 2865          err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
2723      -            pa->used);
     2866 +            origin_ds->ds_dir, pa->used, tx);
2724 2867          if (err)
2725 2868                  return (err);
2726 2869  
2727 2870          /*
2728 2871           * Compute the amounts of space that will be used by snapshots
2729 2872           * after the promotion (for both origin and clone).  For each,
2730 2873           * it is the amount of space that will be on all of their
2731 2874           * deadlists (that was not born before their new origin).
2732 2875           */
2733 2876          if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
↓ open down ↓ 112 lines elided ↑ open up ↑
2846 2989                          dmu_objset_evict(ds->ds_objset);
2847 2990                          ds->ds_objset = NULL;
2848 2991                  }
2849 2992                  /* move snap name entry */
2850 2993                  VERIFY(0 == dsl_dataset_get_snapname(ds));
2851 2994                  VERIFY(0 == dsl_dataset_snap_remove(origin_head,
2852 2995                      ds->ds_snapname, tx));
2853 2996                  VERIFY(0 == zap_add(dp->dp_meta_objset,
2854 2997                      hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
2855 2998                      8, 1, &ds->ds_object, tx));
     2999 +                dsl_snapcount_adjust(hds->ds_dir, tx, 1, B_TRUE);
2856 3000  
2857 3001                  /* change containing dsl_dir */
2858 3002                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
2859 3003                  ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
2860 3004                  ds->ds_phys->ds_dir_obj = dd->dd_object;
2861 3005                  ASSERT3P(ds->ds_dir, ==, odd);
2862 3006                  dsl_dir_close(ds->ds_dir, ds);
2863 3007                  VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
2864 3008                      NULL, ds, &ds->ds_dir));
2865 3009  
↓ open down ↓ 1428 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX