Print this page
OS-1566 filesystem limits for ZFS datasets

*** 43,52 **** --- 43,53 ---- #include <sys/zfs_znode.h> #include <sys/zfs_onexit.h> #include <sys/zvol.h> #include <sys/dsl_scan.h> #include <sys/dsl_deadlist.h> + #include "zfs_prop.h" static char *dsl_reaper = "the grim reaper"; static dsl_checkfunc_t dsl_dataset_destroy_begin_check; static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
*** 346,355 **** --- 347,360 ---- mt = MT_EXACT; err = zap_remove_norm(mos, snapobj, name, mt, tx); if (err == ENOTSUP && mt == MT_FIRST) err = zap_remove(mos, snapobj, name, tx); + + if (err == 0) + dsl_snapcount_adjust(ds->ds_dir, tx, -1, B_TRUE); + return (err); } static int dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
*** 1134,1144 **** dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); dsl_sync_task_create(dstg, dsl_dataset_destroy_check, dsl_dataset_destroy_sync, &dsda, tag, 0); dsl_sync_task_create(dstg, dsl_dir_destroy_check, ! dsl_dir_destroy_sync, dd, FTAG, 0); err = dsl_sync_task_group_wait(dstg); dsl_sync_task_group_destroy(dstg); /* * We could be racing against 'zfs release' or 'zfs destroy -d' --- 1139,1149 ---- dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool); dsl_sync_task_create(dstg, dsl_dataset_destroy_check, dsl_dataset_destroy_sync, &dsda, tag, 0); dsl_sync_task_create(dstg, dsl_dir_destroy_check, ! dsl_dir_destroy_sync, dd, tag, 0); err = dsl_sync_task_group_wait(dstg); dsl_sync_task_group_destroy(dstg); /* * We could be racing against 'zfs release' or 'zfs destroy -d'
*** 2010,2022 **** dsl_dir_willuse_space(ds->ds_dir, asize, tx); return (0); } int dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname, ! dmu_tx_t *tx) { int err; uint64_t value; /* --- 2015,2174 ---- dsl_dir_willuse_space(ds->ds_dir, asize, tx); return (0); } + /* + * Check if adding additional snapshot(s) would exceed any snapshot limits. + * Note that all snapshot limits up to the root dataset (i.e. the pool itself) + * or the given ancestor must be satisfied. Note that it is valid for the + * count to exceed the limit. This can happen if a snapshot is taken by an + * administrative user in the global zone (e.g. a recursive snapshot by root). + */ int + dsl_snapcount_check(dsl_dir_t *dd, uint64_t cnt, dsl_dir_t *ancestor) + { + uint64_t limit; + int err = 0; + + /* + * The limit is never enforced for the admin user in global zone. + * If we're not in the global zone then we need to run this check in + * open context, since thats when we know what zone we're in and + * syncing is only performed in the global zone. + */ + if (INGLOBALZONE(curproc)) + return (0); + + /* + * If renaming a dataset with no snapshots, count adjustment is 0. + */ + if (cnt == 0) + return (0); + + /* + * If an ancestor has been provided, stop checking the limit once we + * hit that dir. We need this during rename so that we don't overcount + * the check once we recurse up to the common ancestor. + */ + if (ancestor == dd) + return (0); + + /* + * If we hit an uninitialized node while recursing up the tree, we can + * stop since we know the counts are not valid on this node and we + * know we won't touch this node's counts. + */ + if (dd->dd_phys->dd_filesystem_count == 0) + return (0); + + /* + * If there's no value for this property, there's no need to enforce a + * snapshot limit. + */ + err = dsl_prop_get_dd(dd, zfs_prop_to_name(ZFS_PROP_SNAPSHOT_LIMIT), + 8, 1, &limit, NULL, B_FALSE); + if (err == ENOENT) + return (0); + else if (err != 0) + return (err); + + #ifdef _KERNEL + extern void __dtrace_probe_zfs__ss__limit(uint64_t, uint64_t, char *); + __dtrace_probe_zfs__ss__limit( + (uint64_t)dd->dd_phys->dd_snapshot_count, (uint64_t)limit, + dd->dd_myname); + #endif + + if (limit != MAXLIMIT && + (dd->dd_phys->dd_snapshot_count + cnt) > limit) + return (EDQUOT); + + if (dd->dd_parent != NULL) + err = dsl_snapcount_check(dd->dd_parent, cnt, ancestor); + + return (err); + } + + /* + * Adjust the snapshot count for the specified dsl_dir_t and all parents. + * When a new snapshot is created, increment the count on all parents, and when + * a snapshot is destroyed, decrement the count. + */ + void + dsl_snapcount_adjust(dsl_dir_t *dd, dmu_tx_t *tx, int64_t delta, + boolean_t first) + { + /* + * If we hit an uninitialized node while recursing up the tree, we can + * stop since we know the counts are not valid on this node and we + * know we shouldn't touch this node's counts. An uninitialized count + * on the node indicates that either the feature has not yet been + * activated or there are no limits on this part of the tree. + */ + if (dd->dd_phys->dd_filesystem_count == 0) + return; + + /* + * The feature might have previously been active, so there could be + * non-0 counts on the nodes, but it might now be inactive. + * + * On initial entry we need to check if this feature is active, but + * we don't want to re-check this on each recursive call. Note: the + * feature cannot be active if its not enabled. If the feature is not + * active, don't touch the on-disk count fields. + */ + if (first) { + dsl_dataset_t *ds = NULL; + spa_t *spa; + zfeature_info_t *quota_feat = + &spa_feature_table[SPA_FEATURE_FS_SS_LIMIT]; + + VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, + dd->dd_phys->dd_head_dataset_obj, FTAG, &ds)); + spa = dsl_dataset_get_spa(ds); + dsl_dataset_rele(ds, FTAG); + if (!spa_feature_is_active(spa, quota_feat)) + return; + } + + /* + * As with dsl_dataset_set_reservation_check(), wdon't want to run + * this check in open context. + */ + if (!dmu_tx_is_syncing(tx)) + return; + + /* if renaming a dataset with no snapshots, count adjustment is 0 */ + if (delta == 0) + return; + + /* + * If we hit an uninitialized node while recursing up the tree, we can + * stop since we know the counts are not valid on this node and we + * know we shouldn't touch this node's counts. + */ + if (dd->dd_phys->dd_filesystem_count == 0) + return; + + /* Increment count for parent */ + dmu_buf_will_dirty(dd->dd_dbuf, tx); + + mutex_enter(&dd->dd_lock); + + dd->dd_phys->dd_snapshot_count += delta; + + /* Roll up this additional count into our ancestors */ + if (dd->dd_parent != NULL) + dsl_snapcount_adjust(dd->dd_parent, tx, delta, B_FALSE); + + mutex_exit(&dd->dd_lock); + } + + int dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname, ! uint64_t cnt, dmu_tx_t *tx) { int err; uint64_t value; /*
*** 2040,2049 **** --- 2192,2205 ---- * of the dataset's length + 1 for the @-sign + snapshot name's length */ if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN) return (ENAMETOOLONG); + err = dsl_snapcount_check(ds->ds_dir, cnt, NULL); + if (err) + return (err); + err = dsl_dataset_snapshot_reserve_space(ds, tx); if (err) return (err); ds->ds_trysnap_txg = tx->tx_txg;
*** 2061,2070 **** --- 2217,2228 ---- objset_t *mos = dp->dp_meta_objset; int err; ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock)); + dsl_snapcount_adjust(ds->ds_dir, tx, 1, B_TRUE); + /* * The origin's ds_creation_txg has to be < TXG_INITIAL */ if (strcmp(snapname, ORIGIN_DIR_NAME) == 0) crtxg = 1;
*** 2716,2728 **** pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes; pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; } ! /* Check that there is enough space here */ err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, ! pa->used); if (err) return (err); /* * Compute the amounts of space that will be used by snapshots --- 2874,2886 ---- pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes; pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes; pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes; } ! /* Check that there is enough space and limit headroom here */ err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir, ! origin_ds->ds_dir, pa->used, tx); if (err) return (err); /* * Compute the amounts of space that will be used by snapshots
*** 2851,2860 **** --- 3009,3019 ---- VERIFY(0 == dsl_dataset_snap_remove(origin_head, ds->ds_snapname, tx)); VERIFY(0 == zap_add(dp->dp_meta_objset, hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname, 8, 1, &ds->ds_object, tx)); + dsl_snapcount_adjust(hds->ds_dir, tx, 1, B_TRUE); /* change containing dsl_dir */ dmu_buf_will_dirty(ds->ds_dbuf, tx); ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object); ds->ds_phys->ds_dir_obj = dd->dd_object;