28 #include <sys/dsl_dataset.h>
29 #include <sys/dsl_dir.h>
30 #include <sys/dsl_prop.h>
31 #include <sys/dsl_synctask.h>
32 #include <sys/dmu_traverse.h>
33 #include <sys/dmu_impl.h>
34 #include <sys/dmu_tx.h>
35 #include <sys/arc.h>
36 #include <sys/zio.h>
37 #include <sys/zap.h>
38 #include <sys/zfeature.h>
39 #include <sys/unique.h>
40 #include <sys/zfs_context.h>
41 #include <sys/zfs_ioctl.h>
42 #include <sys/spa.h>
43 #include <sys/zfs_znode.h>
44 #include <sys/zfs_onexit.h>
45 #include <sys/zvol.h>
46 #include <sys/dsl_scan.h>
47 #include <sys/dsl_deadlist.h>
48
49 static char *dsl_reaper = "the grim reaper";
50
51 static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
52 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
53 static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
54
55 #define SWITCH64(x, y) \
56 { \
57 uint64_t __tmp = (x); \
58 (x) = (y); \
59 (y) = __tmp; \
60 }
61
62 #define DS_REF_MAX (1ULL << 62)
63
64 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE
65
66 #define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper)
67
314 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
315 {
316 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
317 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
318 matchtype_t mt;
319 int err;
320
321 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
322 mt = MT_FIRST;
323 else
324 mt = MT_EXACT;
325
326 err = zap_lookup_norm(mos, snapobj, name, 8, 1,
327 value, mt, NULL, 0, NULL);
328 if (err == ENOTSUP && mt == MT_FIRST)
329 err = zap_lookup(mos, snapobj, name, 8, 1, value);
330 return (err);
331 }
332
333 static int
334 dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
335 {
336 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
337 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
338 matchtype_t mt;
339 int err;
340
341 dsl_dir_snap_cmtime_update(ds->ds_dir);
342
343 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
344 mt = MT_FIRST;
345 else
346 mt = MT_EXACT;
347
348 err = zap_remove_norm(mos, snapobj, name, mt, tx);
349 if (err == ENOTSUP && mt == MT_FIRST)
350 err = zap_remove(mos, snapobj, name, tx);
351 return (err);
352 }
353
354 static int
355 dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
356 dsl_dataset_t **dsp)
357 {
358 objset_t *mos = dp->dp_meta_objset;
359 dmu_buf_t *dbuf;
360 dsl_dataset_t *ds;
361 int err;
362 dmu_object_info_t doi;
363
364 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
365 dsl_pool_sync_context(dp));
366
367 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
368 if (err)
369 return (err);
370
1930 ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
1931 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1932 ASSERT(err == 0);
1933 } else {
1934 /* remove from snapshot namespace */
1935 dsl_dataset_t *ds_head;
1936 ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
1937 VERIFY(0 == dsl_dataset_hold_obj(dp,
1938 ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
1939 VERIFY(0 == dsl_dataset_get_snapname(ds));
1940 #ifdef ZFS_DEBUG
1941 {
1942 uint64_t val;
1943
1944 err = dsl_dataset_snap_lookup(ds_head,
1945 ds->ds_snapname, &val);
1946 ASSERT0(err);
1947 ASSERT3U(val, ==, obj);
1948 }
1949 #endif
1950 err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);
1951 ASSERT(err == 0);
1952 dsl_dataset_rele(ds_head, FTAG);
1953 }
1954
1955 if (ds_prev && ds->ds_prev != ds_prev)
1956 dsl_dataset_rele(ds_prev, FTAG);
1957
1958 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
1959
1960 if (ds->ds_phys->ds_next_clones_obj != 0) {
1961 uint64_t count;
1962 ASSERT(0 == zap_count(mos,
1963 ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
1964 VERIFY(0 == dmu_object_free(mos,
1965 ds->ds_phys->ds_next_clones_obj, tx));
1966 }
1967 if (ds->ds_phys->ds_props_obj != 0)
1968 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
1969 if (ds->ds_phys->ds_userrefs_obj != 0)
1970 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
1995 /*
1996 * If there's an fs-only reservation, any blocks that might become
1997 * owned by the snapshot dataset must be accommodated by space
1998 * outside of the reservation.
1999 */
2000 ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
2001 asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
2002 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
2003 return (ENOSPC);
2004
2005 /*
2006 * Propagate any reserved space for this snapshot to other
2007 * snapshot checks in this sync group.
2008 */
2009 if (asize > 0)
2010 dsl_dir_willuse_space(ds->ds_dir, asize, tx);
2011
2012 return (0);
2013 }
2014
2015 int
2016 dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
2017 dmu_tx_t *tx)
2018 {
2019 int err;
2020 uint64_t value;
2021
2022 /*
2023 * We don't allow multiple snapshots of the same txg. If there
2024 * is already one, try again.
2025 */
2026 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
2027 return (EAGAIN);
2028
2029 /*
2030 * Check for conflicting snapshot name.
2031 */
2032 err = dsl_dataset_snap_lookup(ds, snapname, &value);
2033 if (err == 0)
2034 return (EEXIST);
2035 if (err != ENOENT)
2036 return (err);
2037
2038 /*
2039 * Check that the dataset's name is not too long. Name consists
2040 * of the dataset's length + 1 for the @-sign + snapshot name's length
2041 */
2042 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
2043 return (ENAMETOOLONG);
2044
2045 err = dsl_dataset_snapshot_reserve_space(ds, tx);
2046 if (err)
2047 return (err);
2048
2049 ds->ds_trysnap_txg = tx->tx_txg;
2050 return (0);
2051 }
2052
2053 void
2054 dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
2055 dmu_tx_t *tx)
2056 {
2057 dsl_pool_t *dp = ds->ds_dir->dd_pool;
2058 dmu_buf_t *dbuf;
2059 dsl_dataset_phys_t *dsphys;
2060 uint64_t dsobj, crtxg;
2061 objset_t *mos = dp->dp_meta_objset;
2062 int err;
2063
2064 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
2065
2066 /*
2067 * The origin's ds_creation_txg has to be < TXG_INITIAL
2068 */
2069 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
2070 crtxg = 1;
2071 else
2072 crtxg = tx->tx_txg;
2073
2074 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
2075 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
2076 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
2077 dmu_buf_will_dirty(dbuf, tx);
2078 dsphys = dbuf->db_data;
2079 bzero(dsphys, sizeof (dsl_dataset_phys_t));
2080 dsphys->ds_dir_obj = ds->ds_dir->dd_object;
2081 dsphys->ds_fsid_guid = unique_create();
2082 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
2083 sizeof (dsphys->ds_guid));
2084 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
2085 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
2419
2420 return (err);
2421 }
2422
2423 static void
2424 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
2425 {
2426 dsl_dataset_t *ds = arg1;
2427 const char *newsnapname = arg2;
2428 dsl_dir_t *dd = ds->ds_dir;
2429 objset_t *mos = dd->dd_pool->dp_meta_objset;
2430 dsl_dataset_t *hds;
2431 int err;
2432
2433 ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
2434
2435 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
2436 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
2437
2438 VERIFY(0 == dsl_dataset_get_snapname(ds));
2439 err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
2440 ASSERT0(err);
2441 mutex_enter(&ds->ds_lock);
2442 (void) strcpy(ds->ds_snapname, newsnapname);
2443 mutex_exit(&ds->ds_lock);
2444 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
2445 ds->ds_snapname, 8, 1, &ds->ds_object, tx);
2446 ASSERT0(err);
2447
2448 spa_history_log_internal_ds(ds, "rename", tx,
2449 "-> @%s", newsnapname);
2450 dsl_dataset_rele(hds, FTAG);
2451 }
2452
2453 struct renamesnaparg {
2454 dsl_sync_task_group_t *dstg;
2455 char failed[MAXPATHLEN];
2456 char *oldsnap;
2457 char *newsnap;
2458 };
2459
2614 err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2615 dsl_dataset_snapshot_rename_check,
2616 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
2617
2618 dsl_dataset_rele(ds, FTAG);
2619 }
2620
2621 return (err);
2622 }
2623
2624 struct promotenode {
2625 list_node_t link;
2626 dsl_dataset_t *ds;
2627 };
2628
2629 struct promotearg {
2630 list_t shared_snaps, origin_snaps, clone_snaps;
2631 dsl_dataset_t *origin_origin;
2632 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
2633 char *err_ds;
2634 };
2635
2636 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
2637 static boolean_t snaplist_unstable(list_t *l);
2638
2639 static int
2640 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
2641 {
2642 dsl_dataset_t *hds = arg1;
2643 struct promotearg *pa = arg2;
2644 struct promotenode *snap = list_head(&pa->shared_snaps);
2645 dsl_dataset_t *origin_ds = snap->ds;
2646 int err;
2647 uint64_t unused;
2648
2649 /* Check that it is a real clone */
2650 if (!dsl_dir_is_clone(hds->ds_dir))
2651 return (EINVAL);
2652
2653 /* Since this is so expensive, don't do the preliminary check */
2701 if (ds->ds_phys->ds_prev_snap_obj == 0)
2702 continue;
2703
2704 dsl_deadlist_space(&ds->ds_deadlist,
2705 &dlused, &dlcomp, &dluncomp);
2706 pa->used += dlused;
2707 pa->comp += dlcomp;
2708 pa->uncomp += dluncomp;
2709 }
2710
2711 /*
2712 * If we are a clone of a clone then we never reached ORIGIN,
2713 * so we need to subtract out the clone origin's used space.
2714 */
2715 if (pa->origin_origin) {
2716 pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
2717 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
2718 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
2719 }
2720
2721 /* Check that there is enough space here */
2722 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
2723 pa->used);
2724 if (err)
2725 return (err);
2726
2727 /*
2728 * Compute the amounts of space that will be used by snapshots
2729 * after the promotion (for both origin and clone). For each,
2730 * it is the amount of space that will be on all of their
2731 * deadlists (that was not born before their new origin).
2732 */
2733 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
2734 uint64_t space;
2735
2736 /*
2737 * Note, typically this will not be a clone of a clone,
2738 * so dd_origin_txg will be < TXG_INITIAL, so
2739 * these snaplist_space() -> dsl_deadlist_space_range()
2740 * calls will be fast because they do not have to
2741 * iterate over all bps.
2742 */
2743 snap = list_head(&pa->origin_snaps);
2832 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
2833 }
2834 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
2835 dd->dd_phys->dd_clones, origin_head->ds_object, tx));
2836
2837 }
2838
2839 /* move snapshots to this dir */
2840 for (snap = list_head(&pa->shared_snaps); snap;
2841 snap = list_next(&pa->shared_snaps, snap)) {
2842 dsl_dataset_t *ds = snap->ds;
2843
2844 /* unregister props as dsl_dir is changing */
2845 if (ds->ds_objset) {
2846 dmu_objset_evict(ds->ds_objset);
2847 ds->ds_objset = NULL;
2848 }
2849 /* move snap name entry */
2850 VERIFY(0 == dsl_dataset_get_snapname(ds));
2851 VERIFY(0 == dsl_dataset_snap_remove(origin_head,
2852 ds->ds_snapname, tx));
2853 VERIFY(0 == zap_add(dp->dp_meta_objset,
2854 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
2855 8, 1, &ds->ds_object, tx));
2856
2857 /* change containing dsl_dir */
2858 dmu_buf_will_dirty(ds->ds_dbuf, tx);
2859 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
2860 ds->ds_phys->ds_dir_obj = dd->dd_object;
2861 ASSERT3P(ds->ds_dir, ==, odd);
2862 dsl_dir_close(ds->ds_dir, ds);
2863 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
2864 NULL, ds, &ds->ds_dir));
2865
2866 /* move any clone references */
2867 if (ds->ds_phys->ds_next_clones_obj &&
2868 spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
2869 zap_cursor_t zc;
2870 zap_attribute_t za;
2871
2872 for (zap_cursor_init(&zc, dp->dp_meta_objset,
2873 ds->ds_phys->ds_next_clones_obj);
2874 zap_cursor_retrieve(&zc, &za) == 0;
2875 zap_cursor_advance(&zc)) {
3073 if (err != 0)
3074 goto out;
3075
3076 snap = list_head(&pa.shared_snaps);
3077 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj);
3078 err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj,
3079 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps);
3080 if (err != 0)
3081 goto out;
3082
3083 if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) {
3084 err = dsl_dataset_hold_obj(dp,
3085 snap->ds->ds_dir->dd_phys->dd_origin_obj,
3086 FTAG, &pa.origin_origin);
3087 if (err != 0)
3088 goto out;
3089 }
3090
3091 out:
3092 rw_exit(&dp->dp_config_rwlock);
3093
3094 /*
3095 * Add in 128x the snapnames zapobj size, since we will be moving
3096 * a bunch of snapnames to the promoted ds, and dirtying their
3097 * bonus buffers.
3098 */
3099 if (err == 0) {
3100 err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
3101 dsl_dataset_promote_sync, ds, &pa,
3102 2 + 2 * doi.doi_physical_blocks_512);
3103 if (err && pa.err_ds && conflsnap)
3104 (void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN);
3105 }
3106
3107 snaplist_destroy(&pa.shared_snaps, B_TRUE);
3108 snaplist_destroy(&pa.clone_snaps, B_FALSE);
3109 snaplist_destroy(&pa.origin_snaps, B_FALSE);
3110 if (pa.origin_origin)
3111 dsl_dataset_rele(pa.origin_origin, FTAG);
3112 dsl_dataset_rele(ds, FTAG);
|
28 #include <sys/dsl_dataset.h>
29 #include <sys/dsl_dir.h>
30 #include <sys/dsl_prop.h>
31 #include <sys/dsl_synctask.h>
32 #include <sys/dmu_traverse.h>
33 #include <sys/dmu_impl.h>
34 #include <sys/dmu_tx.h>
35 #include <sys/arc.h>
36 #include <sys/zio.h>
37 #include <sys/zap.h>
38 #include <sys/zfeature.h>
39 #include <sys/unique.h>
40 #include <sys/zfs_context.h>
41 #include <sys/zfs_ioctl.h>
42 #include <sys/spa.h>
43 #include <sys/zfs_znode.h>
44 #include <sys/zfs_onexit.h>
45 #include <sys/zvol.h>
46 #include <sys/dsl_scan.h>
47 #include <sys/dsl_deadlist.h>
48 #include "zfs_prop.h"
49
50 static char *dsl_reaper = "the grim reaper";
51
52 static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
53 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
54 static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
55
56 #define SWITCH64(x, y) \
57 { \
58 uint64_t __tmp = (x); \
59 (x) = (y); \
60 (y) = __tmp; \
61 }
62
63 #define DS_REF_MAX (1ULL << 62)
64
65 #define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE
66
67 #define DSL_DATASET_IS_DESTROYED(ds) ((ds)->ds_owner == dsl_reaper)
68
315 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
316 {
317 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
318 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
319 matchtype_t mt;
320 int err;
321
322 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
323 mt = MT_FIRST;
324 else
325 mt = MT_EXACT;
326
327 err = zap_lookup_norm(mos, snapobj, name, 8, 1,
328 value, mt, NULL, 0, NULL);
329 if (err == ENOTSUP && mt == MT_FIRST)
330 err = zap_lookup(mos, snapobj, name, 8, 1, value);
331 return (err);
332 }
333
334 static int
335 dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx,
336 boolean_t adj_cnt)
337 {
338 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
339 uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
340 matchtype_t mt;
341 int err;
342
343 dsl_dir_snap_cmtime_update(ds->ds_dir);
344
345 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
346 mt = MT_FIRST;
347 else
348 mt = MT_EXACT;
349
350 err = zap_remove_norm(mos, snapobj, name, mt, tx);
351 if (err == ENOTSUP && mt == MT_FIRST)
352 err = zap_remove(mos, snapobj, name, tx);
353
354 if (err == 0 && adj_cnt)
355 dsl_snapcount_adjust(ds->ds_dir, tx, -1, B_TRUE);
356
357 return (err);
358 }
359
360 static int
361 dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
362 dsl_dataset_t **dsp)
363 {
364 objset_t *mos = dp->dp_meta_objset;
365 dmu_buf_t *dbuf;
366 dsl_dataset_t *ds;
367 int err;
368 dmu_object_info_t doi;
369
370 ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
371 dsl_pool_sync_context(dp));
372
373 err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
374 if (err)
375 return (err);
376
1936 ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
1937 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1938 ASSERT(err == 0);
1939 } else {
1940 /* remove from snapshot namespace */
1941 dsl_dataset_t *ds_head;
1942 ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
1943 VERIFY(0 == dsl_dataset_hold_obj(dp,
1944 ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
1945 VERIFY(0 == dsl_dataset_get_snapname(ds));
1946 #ifdef ZFS_DEBUG
1947 {
1948 uint64_t val;
1949
1950 err = dsl_dataset_snap_lookup(ds_head,
1951 ds->ds_snapname, &val);
1952 ASSERT0(err);
1953 ASSERT3U(val, ==, obj);
1954 }
1955 #endif
1956 err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx,
1957 B_TRUE);
1958 ASSERT(err == 0);
1959 dsl_dataset_rele(ds_head, FTAG);
1960 }
1961
1962 if (ds_prev && ds->ds_prev != ds_prev)
1963 dsl_dataset_rele(ds_prev, FTAG);
1964
1965 spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
1966
1967 if (ds->ds_phys->ds_next_clones_obj != 0) {
1968 uint64_t count;
1969 ASSERT(0 == zap_count(mos,
1970 ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
1971 VERIFY(0 == dmu_object_free(mos,
1972 ds->ds_phys->ds_next_clones_obj, tx));
1973 }
1974 if (ds->ds_phys->ds_props_obj != 0)
1975 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
1976 if (ds->ds_phys->ds_userrefs_obj != 0)
1977 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
2002 /*
2003 * If there's an fs-only reservation, any blocks that might become
2004 * owned by the snapshot dataset must be accommodated by space
2005 * outside of the reservation.
2006 */
2007 ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
2008 asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
2009 if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
2010 return (ENOSPC);
2011
2012 /*
2013 * Propagate any reserved space for this snapshot to other
2014 * snapshot checks in this sync group.
2015 */
2016 if (asize > 0)
2017 dsl_dir_willuse_space(ds->ds_dir, asize, tx);
2018
2019 return (0);
2020 }
2021
2022 /*
2023 * Check if adding additional snapshot(s) would exceed any snapshot limits.
2024 * Note that all snapshot limits up to the root dataset (i.e. the pool itself)
2025 * or the given ancestor must be satisfied. Note that it is valid for the
2026 * count to exceed the limit. This can happen if a snapshot is taken by an
2027 * administrative user in the global zone (e.g. a recursive snapshot by root).
2028 */
2029 int
2030 dsl_snapcount_check(dsl_dir_t *dd, uint64_t cnt, dsl_dir_t *ancestor,
2031 cred_t *cr)
2032 {
2033 uint64_t limit;
2034 int err = 0;
2035
2036 VERIFY(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
2037
2038 /* If we're allowed to change the limit, don't enforce the limit. */
2039 if (dsl_secpolicy_write_prop(dd, ZFS_PROP_SNAPSHOT_LIMIT, cr) == 0)
2040 return (0);
2041
2042 /*
2043 * If renaming a dataset with no snapshots, count adjustment is 0.
2044 */
2045 if (cnt == 0)
2046 return (0);
2047
2048 /*
2049 * If an ancestor has been provided, stop checking the limit once we
2050 * hit that dir. We need this during rename so that we don't overcount
2051 * the check once we recurse up to the common ancestor.
2052 */
2053 if (ancestor == dd)
2054 return (0);
2055
2056 /*
2057 * If we hit an uninitialized node while recursing up the tree, we can
2058 * stop since we know the counts are not valid on this node and we
2059 * know we won't touch this node's counts. We also know that the counts
2060 * on the nodes above this one are uninitialized and that there cannot
2061 * be a limit set on any of those nodes.
2062 */
2063 if (dd->dd_phys->dd_filesystem_count == 0)
2064 return (0);
2065
2066 err = dsl_prop_get_dd(dd, zfs_prop_to_name(ZFS_PROP_SNAPSHOT_LIMIT),
2067 8, 1, &limit, NULL, B_FALSE);
2068 if (err != 0)
2069 return (err);
2070
2071 /* Is there a snapshot limit which we've hit? */
2072 if ((dd->dd_phys->dd_snapshot_count + cnt) > limit)
2073 return (EDQUOT);
2074
2075 if (dd->dd_parent != NULL)
2076 err = dsl_snapcount_check(dd->dd_parent, cnt, ancestor, cr);
2077
2078 return (err);
2079 }
2080
2081 /*
2082 * Adjust the snapshot count for the specified dsl_dir_t and all parents.
2083 * When a new snapshot is created, increment the count on all parents, and when
2084 * a snapshot is destroyed, decrement the count.
2085 */
2086 void
2087 dsl_snapcount_adjust(dsl_dir_t *dd, dmu_tx_t *tx, int64_t delta,
2088 boolean_t first)
2089 {
2090 if (first) {
2091 VERIFY(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
2092 VERIFY(dmu_tx_is_syncing(tx));
2093 }
2094
2095 /*
2096 * If we hit an uninitialized node while recursing up the tree, we can
2097 * stop since we know the counts are not valid on this node and we
2098 * know we shouldn't touch this node's counts. An uninitialized count
2099 * on the node indicates that either the feature has not yet been
2100 * activated or there are no limits on this part of the tree.
2101 */
2102 if (dd->dd_phys->dd_filesystem_count == 0)
2103 return;
2104
2105 /* if renaming a dataset with no snapshots, count adjustment is 0 */
2106 if (delta == 0)
2107 return;
2108
2109 /*
2110 * On initial entry we need to check if this feature is active, but
2111 * we don't want to re-check this on each recursive call. Note: the
2112 * feature cannot be active if it's not enabled. If the feature is not
2113 * active, don't touch the on-disk count fields.
2114 */
2115 if (first) {
2116 zfeature_info_t *quota_feat =
2117 &spa_feature_table[SPA_FEATURE_FS_SS_LIMIT];
2118
2119 if (!spa_feature_is_active(dd->dd_pool->dp_spa, quota_feat))
2120 return;
2121 }
2122
2123 dmu_buf_will_dirty(dd->dd_dbuf, tx);
2124
2125 mutex_enter(&dd->dd_lock);
2126
2127 dd->dd_phys->dd_snapshot_count += delta;
2128 VERIFY(dd->dd_phys->dd_snapshot_count >= 0);
2129
2130 /* Roll up this additional count into our ancestors */
2131 if (dd->dd_parent != NULL)
2132 dsl_snapcount_adjust(dd->dd_parent, tx, delta, B_FALSE);
2133
2134 mutex_exit(&dd->dd_lock);
2135 }
2136
2137 int
2138 dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
2139 uint64_t cnt, dmu_tx_t *tx, cred_t *cr)
2140 {
2141 int err;
2142 uint64_t value;
2143
2144 /*
2145 * We don't allow multiple snapshots of the same txg. If there
2146 * is already one, try again.
2147 */
2148 if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
2149 return (EAGAIN);
2150
2151 /*
2152 * Check for conflicting snapshot name.
2153 */
2154 err = dsl_dataset_snap_lookup(ds, snapname, &value);
2155 if (err == 0)
2156 return (EEXIST);
2157 if (err != ENOENT)
2158 return (err);
2159
2160 /*
2161 * Check that the dataset's name is not too long. Name consists
2162 * of the dataset's length + 1 for the @-sign + snapshot name's length
2163 */
2164 if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
2165 return (ENAMETOOLONG);
2166
2167 err = dsl_snapcount_check(ds->ds_dir, cnt, NULL, cr);
2168 if (err)
2169 return (err);
2170
2171 err = dsl_dataset_snapshot_reserve_space(ds, tx);
2172 if (err)
2173 return (err);
2174
2175 ds->ds_trysnap_txg = tx->tx_txg;
2176 return (0);
2177 }
2178
2179 void
2180 dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
2181 dmu_tx_t *tx)
2182 {
2183 dsl_pool_t *dp = ds->ds_dir->dd_pool;
2184 dmu_buf_t *dbuf;
2185 dsl_dataset_phys_t *dsphys;
2186 uint64_t dsobj, crtxg;
2187 objset_t *mos = dp->dp_meta_objset;
2188 int err;
2189
2190 ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
2191
2192 dsl_snapcount_adjust(ds->ds_dir, tx, 1, B_TRUE);
2193
2194 /*
2195 * The origin's ds_creation_txg has to be < TXG_INITIAL
2196 */
2197 if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
2198 crtxg = 1;
2199 else
2200 crtxg = tx->tx_txg;
2201
2202 dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
2203 DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
2204 VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
2205 dmu_buf_will_dirty(dbuf, tx);
2206 dsphys = dbuf->db_data;
2207 bzero(dsphys, sizeof (dsl_dataset_phys_t));
2208 dsphys->ds_dir_obj = ds->ds_dir->dd_object;
2209 dsphys->ds_fsid_guid = unique_create();
2210 (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
2211 sizeof (dsphys->ds_guid));
2212 dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
2213 dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
2547
2548 return (err);
2549 }
2550
2551 static void
2552 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
2553 {
2554 dsl_dataset_t *ds = arg1;
2555 const char *newsnapname = arg2;
2556 dsl_dir_t *dd = ds->ds_dir;
2557 objset_t *mos = dd->dd_pool->dp_meta_objset;
2558 dsl_dataset_t *hds;
2559 int err;
2560
2561 ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
2562
2563 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
2564 dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
2565
2566 VERIFY(0 == dsl_dataset_get_snapname(ds));
2567 err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx, B_FALSE);
2568 ASSERT0(err);
2569 mutex_enter(&ds->ds_lock);
2570 (void) strcpy(ds->ds_snapname, newsnapname);
2571 mutex_exit(&ds->ds_lock);
2572 err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
2573 ds->ds_snapname, 8, 1, &ds->ds_object, tx);
2574 ASSERT0(err);
2575
2576 spa_history_log_internal_ds(ds, "rename", tx,
2577 "-> @%s", newsnapname);
2578 dsl_dataset_rele(hds, FTAG);
2579 }
2580
2581 struct renamesnaparg {
2582 dsl_sync_task_group_t *dstg;
2583 char failed[MAXPATHLEN];
2584 char *oldsnap;
2585 char *newsnap;
2586 };
2587
2742 err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2743 dsl_dataset_snapshot_rename_check,
2744 dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
2745
2746 dsl_dataset_rele(ds, FTAG);
2747 }
2748
2749 return (err);
2750 }
2751
2752 struct promotenode {
2753 list_node_t link;
2754 dsl_dataset_t *ds;
2755 };
2756
2757 struct promotearg {
2758 list_t shared_snaps, origin_snaps, clone_snaps;
2759 dsl_dataset_t *origin_origin;
2760 uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
2761 char *err_ds;
2762 cred_t *cr;
2763 };
2764
2765 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
2766 static boolean_t snaplist_unstable(list_t *l);
2767
2768 static int
2769 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
2770 {
2771 dsl_dataset_t *hds = arg1;
2772 struct promotearg *pa = arg2;
2773 struct promotenode *snap = list_head(&pa->shared_snaps);
2774 dsl_dataset_t *origin_ds = snap->ds;
2775 int err;
2776 uint64_t unused;
2777
2778 /* Check that it is a real clone */
2779 if (!dsl_dir_is_clone(hds->ds_dir))
2780 return (EINVAL);
2781
2782 /* Since this is so expensive, don't do the preliminary check */
2830 if (ds->ds_phys->ds_prev_snap_obj == 0)
2831 continue;
2832
2833 dsl_deadlist_space(&ds->ds_deadlist,
2834 &dlused, &dlcomp, &dluncomp);
2835 pa->used += dlused;
2836 pa->comp += dlcomp;
2837 pa->uncomp += dluncomp;
2838 }
2839
2840 /*
2841 * If we are a clone of a clone then we never reached ORIGIN,
2842 * so we need to subtract out the clone origin's used space.
2843 */
2844 if (pa->origin_origin) {
2845 pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
2846 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
2847 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
2848 }
2849
2850 /* Check that there is enough space and limit headroom here */
2851 err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
2852 origin_ds->ds_dir, pa->used, pa->cr);
2853 if (err)
2854 return (err);
2855
2856 /*
2857 * Compute the amounts of space that will be used by snapshots
2858 * after the promotion (for both origin and clone). For each,
2859 * it is the amount of space that will be on all of their
2860 * deadlists (that was not born before their new origin).
2861 */
2862 if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
2863 uint64_t space;
2864
2865 /*
2866 * Note, typically this will not be a clone of a clone,
2867 * so dd_origin_txg will be < TXG_INITIAL, so
2868 * these snaplist_space() -> dsl_deadlist_space_range()
2869 * calls will be fast because they do not have to
2870 * iterate over all bps.
2871 */
2872 snap = list_head(&pa->origin_snaps);
2961 DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
2962 }
2963 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
2964 dd->dd_phys->dd_clones, origin_head->ds_object, tx));
2965
2966 }
2967
2968 /* move snapshots to this dir */
2969 for (snap = list_head(&pa->shared_snaps); snap;
2970 snap = list_next(&pa->shared_snaps, snap)) {
2971 dsl_dataset_t *ds = snap->ds;
2972
2973 /* unregister props as dsl_dir is changing */
2974 if (ds->ds_objset) {
2975 dmu_objset_evict(ds->ds_objset);
2976 ds->ds_objset = NULL;
2977 }
2978 /* move snap name entry */
2979 VERIFY(0 == dsl_dataset_get_snapname(ds));
2980 VERIFY(0 == dsl_dataset_snap_remove(origin_head,
2981 ds->ds_snapname, tx, B_TRUE));
2982 VERIFY(0 == zap_add(dp->dp_meta_objset,
2983 hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
2984 8, 1, &ds->ds_object, tx));
2985 dsl_snapcount_adjust(hds->ds_dir, tx, 1, B_TRUE);
2986
2987 /* change containing dsl_dir */
2988 dmu_buf_will_dirty(ds->ds_dbuf, tx);
2989 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
2990 ds->ds_phys->ds_dir_obj = dd->dd_object;
2991 ASSERT3P(ds->ds_dir, ==, odd);
2992 dsl_dir_close(ds->ds_dir, ds);
2993 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
2994 NULL, ds, &ds->ds_dir));
2995
2996 /* move any clone references */
2997 if (ds->ds_phys->ds_next_clones_obj &&
2998 spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
2999 zap_cursor_t zc;
3000 zap_attribute_t za;
3001
3002 for (zap_cursor_init(&zc, dp->dp_meta_objset,
3003 ds->ds_phys->ds_next_clones_obj);
3004 zap_cursor_retrieve(&zc, &za) == 0;
3005 zap_cursor_advance(&zc)) {
3203 if (err != 0)
3204 goto out;
3205
3206 snap = list_head(&pa.shared_snaps);
3207 ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj);
3208 err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj,
3209 snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps);
3210 if (err != 0)
3211 goto out;
3212
3213 if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) {
3214 err = dsl_dataset_hold_obj(dp,
3215 snap->ds->ds_dir->dd_phys->dd_origin_obj,
3216 FTAG, &pa.origin_origin);
3217 if (err != 0)
3218 goto out;
3219 }
3220
3221 out:
3222 rw_exit(&dp->dp_config_rwlock);
3223 pa.cr = CRED();
3224
3225 /*
3226 * Add in 128x the snapnames zapobj size, since we will be moving
3227 * a bunch of snapnames to the promoted ds, and dirtying their
3228 * bonus buffers.
3229 */
3230 if (err == 0) {
3231 err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
3232 dsl_dataset_promote_sync, ds, &pa,
3233 2 + 2 * doi.doi_physical_blocks_512);
3234 if (err && pa.err_ds && conflsnap)
3235 (void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN);
3236 }
3237
3238 snaplist_destroy(&pa.shared_snaps, B_TRUE);
3239 snaplist_destroy(&pa.clone_snaps, B_FALSE);
3240 snaplist_destroy(&pa.origin_snaps, B_FALSE);
3241 if (pa.origin_origin)
3242 dsl_dataset_rele(pa.origin_origin, FTAG);
3243 dsl_dataset_rele(ds, FTAG);
|