Print this page
OS-1566 filesystem limits for ZFS datasets


  28 #include <sys/dsl_dataset.h>
  29 #include <sys/dsl_dir.h>
  30 #include <sys/dsl_prop.h>
  31 #include <sys/dsl_synctask.h>
  32 #include <sys/dmu_traverse.h>
  33 #include <sys/dmu_impl.h>
  34 #include <sys/dmu_tx.h>
  35 #include <sys/arc.h>
  36 #include <sys/zio.h>
  37 #include <sys/zap.h>
  38 #include <sys/zfeature.h>
  39 #include <sys/unique.h>
  40 #include <sys/zfs_context.h>
  41 #include <sys/zfs_ioctl.h>
  42 #include <sys/spa.h>
  43 #include <sys/zfs_znode.h>
  44 #include <sys/zfs_onexit.h>
  45 #include <sys/zvol.h>
  46 #include <sys/dsl_scan.h>
  47 #include <sys/dsl_deadlist.h>

  48 
  49 static char *dsl_reaper = "the grim reaper";
  50 
  51 static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
  52 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
  53 static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
  54 
  55 #define SWITCH64(x, y) \
  56         { \
  57                 uint64_t __tmp = (x); \
  58                 (x) = (y); \
  59                 (y) = __tmp; \
  60         }
  61 
  62 #define DS_REF_MAX      (1ULL << 62)
  63 
  64 #define DSL_DEADLIST_BLOCKSIZE  SPA_MAXBLOCKSIZE
  65 
  66 #define DSL_DATASET_IS_DESTROYED(ds)    ((ds)->ds_owner == dsl_reaper)
  67 


 314 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
 315 {
 316         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 317         uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 318         matchtype_t mt;
 319         int err;
 320 
 321         if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 322                 mt = MT_FIRST;
 323         else
 324                 mt = MT_EXACT;
 325 
 326         err = zap_lookup_norm(mos, snapobj, name, 8, 1,
 327             value, mt, NULL, 0, NULL);
 328         if (err == ENOTSUP && mt == MT_FIRST)
 329                 err = zap_lookup(mos, snapobj, name, 8, 1, value);
 330         return (err);
 331 }
 332 
 333 static int
 334 dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)

 335 {
 336         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 337         uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 338         matchtype_t mt;
 339         int err;
 340 
 341         dsl_dir_snap_cmtime_update(ds->ds_dir);
 342 
 343         if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 344                 mt = MT_FIRST;
 345         else
 346                 mt = MT_EXACT;
 347 
 348         err = zap_remove_norm(mos, snapobj, name, mt, tx);
 349         if (err == ENOTSUP && mt == MT_FIRST)
 350                 err = zap_remove(mos, snapobj, name, tx);




 351         return (err);
 352 }
 353 
 354 static int
 355 dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
 356     dsl_dataset_t **dsp)
 357 {
 358         objset_t *mos = dp->dp_meta_objset;
 359         dmu_buf_t *dbuf;
 360         dsl_dataset_t *ds;
 361         int err;
 362         dmu_object_info_t doi;
 363 
 364         ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
 365             dsl_pool_sync_context(dp));
 366 
 367         err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
 368         if (err)
 369                 return (err);
 370 


1930                 ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
1931                 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1932                 ASSERT(err == 0);
1933         } else {
1934                 /* remove from snapshot namespace */
1935                 dsl_dataset_t *ds_head;
1936                 ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
1937                 VERIFY(0 == dsl_dataset_hold_obj(dp,
1938                     ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
1939                 VERIFY(0 == dsl_dataset_get_snapname(ds));
1940 #ifdef ZFS_DEBUG
1941                 {
1942                         uint64_t val;
1943 
1944                         err = dsl_dataset_snap_lookup(ds_head,
1945                             ds->ds_snapname, &val);
1946                         ASSERT0(err);
1947                         ASSERT3U(val, ==, obj);
1948                 }
1949 #endif
1950                 err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);

1951                 ASSERT(err == 0);
1952                 dsl_dataset_rele(ds_head, FTAG);
1953         }
1954 
1955         if (ds_prev && ds->ds_prev != ds_prev)
1956                 dsl_dataset_rele(ds_prev, FTAG);
1957 
1958         spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
1959 
1960         if (ds->ds_phys->ds_next_clones_obj != 0) {
1961                 uint64_t count;
1962                 ASSERT(0 == zap_count(mos,
1963                     ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
1964                 VERIFY(0 == dmu_object_free(mos,
1965                     ds->ds_phys->ds_next_clones_obj, tx));
1966         }
1967         if (ds->ds_phys->ds_props_obj != 0)
1968                 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
1969         if (ds->ds_phys->ds_userrefs_obj != 0)
1970                 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));


1995         /*
1996          * If there's an fs-only reservation, any blocks that might become
1997          * owned by the snapshot dataset must be accommodated by space
1998          * outside of the reservation.
1999          */
2000         ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
2001         asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
2002         if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
2003                 return (ENOSPC);
2004 
2005         /*
2006          * Propagate any reserved space for this snapshot to other
2007          * snapshot checks in this sync group.
2008          */
2009         if (asize > 0)
2010                 dsl_dir_willuse_space(ds->ds_dir, asize, tx);
2011 
2012         return (0);
2013 }
2014 







2015 int












































































































2016 dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
2017     dmu_tx_t *tx)
2018 {
2019         int err;
2020         uint64_t value;
2021 
2022         /*
2023          * We don't allow multiple snapshots of the same txg.  If there
2024          * is already one, try again.
2025          */
2026         if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
2027                 return (EAGAIN);
2028 
2029         /*
2030          * Check for conflicting snapshot name.
2031          */
2032         err = dsl_dataset_snap_lookup(ds, snapname, &value);
2033         if (err == 0)
2034                 return (EEXIST);
2035         if (err != ENOENT)
2036                 return (err);
2037 
2038         /*
2039          * Check that the dataset's name is not too long.  Name consists
2040          * of the dataset's length + 1 for the @-sign + snapshot name's length
2041          */
2042         if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
2043                 return (ENAMETOOLONG);
2044 




2045         err = dsl_dataset_snapshot_reserve_space(ds, tx);
2046         if (err)
2047                 return (err);
2048 
2049         ds->ds_trysnap_txg = tx->tx_txg;
2050         return (0);
2051 }
2052 
2053 void
2054 dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
2055     dmu_tx_t *tx)
2056 {
2057         dsl_pool_t *dp = ds->ds_dir->dd_pool;
2058         dmu_buf_t *dbuf;
2059         dsl_dataset_phys_t *dsphys;
2060         uint64_t dsobj, crtxg;
2061         objset_t *mos = dp->dp_meta_objset;
2062         int err;
2063 
2064         ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
2065 


2066         /*
2067          * The origin's ds_creation_txg has to be < TXG_INITIAL
2068          */
2069         if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
2070                 crtxg = 1;
2071         else
2072                 crtxg = tx->tx_txg;
2073 
2074         dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
2075             DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
2076         VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
2077         dmu_buf_will_dirty(dbuf, tx);
2078         dsphys = dbuf->db_data;
2079         bzero(dsphys, sizeof (dsl_dataset_phys_t));
2080         dsphys->ds_dir_obj = ds->ds_dir->dd_object;
2081         dsphys->ds_fsid_guid = unique_create();
2082         (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
2083             sizeof (dsphys->ds_guid));
2084         dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
2085         dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;


2419 
2420         return (err);
2421 }
2422 
2423 static void
2424 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
2425 {
2426         dsl_dataset_t *ds = arg1;
2427         const char *newsnapname = arg2;
2428         dsl_dir_t *dd = ds->ds_dir;
2429         objset_t *mos = dd->dd_pool->dp_meta_objset;
2430         dsl_dataset_t *hds;
2431         int err;
2432 
2433         ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
2434 
2435         VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
2436             dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
2437 
2438         VERIFY(0 == dsl_dataset_get_snapname(ds));
2439         err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
2440         ASSERT0(err);
2441         mutex_enter(&ds->ds_lock);
2442         (void) strcpy(ds->ds_snapname, newsnapname);
2443         mutex_exit(&ds->ds_lock);
2444         err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
2445             ds->ds_snapname, 8, 1, &ds->ds_object, tx);
2446         ASSERT0(err);
2447 
2448         spa_history_log_internal_ds(ds, "rename", tx,
2449             "-> @%s", newsnapname);
2450         dsl_dataset_rele(hds, FTAG);
2451 }
2452 
2453 struct renamesnaparg {
2454         dsl_sync_task_group_t *dstg;
2455         char failed[MAXPATHLEN];
2456         char *oldsnap;
2457         char *newsnap;
2458 };
2459 


2614                 err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2615                     dsl_dataset_snapshot_rename_check,
2616                     dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
2617 
2618                 dsl_dataset_rele(ds, FTAG);
2619         }
2620 
2621         return (err);
2622 }
2623 
2624 struct promotenode {
2625         list_node_t link;
2626         dsl_dataset_t *ds;
2627 };
2628 
2629 struct promotearg {
2630         list_t shared_snaps, origin_snaps, clone_snaps;
2631         dsl_dataset_t *origin_origin;
2632         uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
2633         char *err_ds;

2634 };
2635 
2636 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
2637 static boolean_t snaplist_unstable(list_t *l);
2638 
2639 static int
2640 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
2641 {
2642         dsl_dataset_t *hds = arg1;
2643         struct promotearg *pa = arg2;
2644         struct promotenode *snap = list_head(&pa->shared_snaps);
2645         dsl_dataset_t *origin_ds = snap->ds;
2646         int err;
2647         uint64_t unused;
2648 
2649         /* Check that it is a real clone */
2650         if (!dsl_dir_is_clone(hds->ds_dir))
2651                 return (EINVAL);
2652 
2653         /* Since this is so expensive, don't do the preliminary check */


2701                 if (ds->ds_phys->ds_prev_snap_obj == 0)
2702                         continue;
2703 
2704                 dsl_deadlist_space(&ds->ds_deadlist,
2705                     &dlused, &dlcomp, &dluncomp);
2706                 pa->used += dlused;
2707                 pa->comp += dlcomp;
2708                 pa->uncomp += dluncomp;
2709         }
2710 
2711         /*
2712          * If we are a clone of a clone then we never reached ORIGIN,
2713          * so we need to subtract out the clone origin's used space.
2714          */
2715         if (pa->origin_origin) {
2716                 pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
2717                 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
2718                 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
2719         }
2720 
2721         /* Check that there is enough space here */
2722         err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
2723             pa->used);
2724         if (err)
2725                 return (err);
2726 
2727         /*
2728          * Compute the amounts of space that will be used by snapshots
2729          * after the promotion (for both origin and clone).  For each,
2730          * it is the amount of space that will be on all of their
2731          * deadlists (that was not born before their new origin).
2732          */
2733         if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
2734                 uint64_t space;
2735 
2736                 /*
2737                  * Note, typically this will not be a clone of a clone,
2738                  * so dd_origin_txg will be < TXG_INITIAL, so
2739                  * these snaplist_space() -> dsl_deadlist_space_range()
2740                  * calls will be fast because they do not have to
2741                  * iterate over all bps.
2742                  */
2743                 snap = list_head(&pa->origin_snaps);


2832                             DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
2833                 }
2834                 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
2835                     dd->dd_phys->dd_clones, origin_head->ds_object, tx));
2836 
2837         }
2838 
2839         /* move snapshots to this dir */
2840         for (snap = list_head(&pa->shared_snaps); snap;
2841             snap = list_next(&pa->shared_snaps, snap)) {
2842                 dsl_dataset_t *ds = snap->ds;
2843 
2844                 /* unregister props as dsl_dir is changing */
2845                 if (ds->ds_objset) {
2846                         dmu_objset_evict(ds->ds_objset);
2847                         ds->ds_objset = NULL;
2848                 }
2849                 /* move snap name entry */
2850                 VERIFY(0 == dsl_dataset_get_snapname(ds));
2851                 VERIFY(0 == dsl_dataset_snap_remove(origin_head,
2852                     ds->ds_snapname, tx));
2853                 VERIFY(0 == zap_add(dp->dp_meta_objset,
2854                     hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
2855                     8, 1, &ds->ds_object, tx));

2856 
2857                 /* change containing dsl_dir */
2858                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
2859                 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
2860                 ds->ds_phys->ds_dir_obj = dd->dd_object;
2861                 ASSERT3P(ds->ds_dir, ==, odd);
2862                 dsl_dir_close(ds->ds_dir, ds);
2863                 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
2864                     NULL, ds, &ds->ds_dir));
2865 
2866                 /* move any clone references */
2867                 if (ds->ds_phys->ds_next_clones_obj &&
2868                     spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
2869                         zap_cursor_t zc;
2870                         zap_attribute_t za;
2871 
2872                         for (zap_cursor_init(&zc, dp->dp_meta_objset,
2873                             ds->ds_phys->ds_next_clones_obj);
2874                             zap_cursor_retrieve(&zc, &za) == 0;
2875                             zap_cursor_advance(&zc)) {


3073         if (err != 0)
3074                 goto out;
3075 
3076         snap = list_head(&pa.shared_snaps);
3077         ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj);
3078         err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj,
3079             snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps);
3080         if (err != 0)
3081                 goto out;
3082 
3083         if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) {
3084                 err = dsl_dataset_hold_obj(dp,
3085                     snap->ds->ds_dir->dd_phys->dd_origin_obj,
3086                     FTAG, &pa.origin_origin);
3087                 if (err != 0)
3088                         goto out;
3089         }
3090 
3091 out:
3092         rw_exit(&dp->dp_config_rwlock);

3093 
3094         /*
3095          * Add in 128x the snapnames zapobj size, since we will be moving
3096          * a bunch of snapnames to the promoted ds, and dirtying their
3097          * bonus buffers.
3098          */
3099         if (err == 0) {
3100                 err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
3101                     dsl_dataset_promote_sync, ds, &pa,
3102                     2 + 2 * doi.doi_physical_blocks_512);
3103                 if (err && pa.err_ds && conflsnap)
3104                         (void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN);
3105         }
3106 
3107         snaplist_destroy(&pa.shared_snaps, B_TRUE);
3108         snaplist_destroy(&pa.clone_snaps, B_FALSE);
3109         snaplist_destroy(&pa.origin_snaps, B_FALSE);
3110         if (pa.origin_origin)
3111                 dsl_dataset_rele(pa.origin_origin, FTAG);
3112         dsl_dataset_rele(ds, FTAG);




  28 #include <sys/dsl_dataset.h>
  29 #include <sys/dsl_dir.h>
  30 #include <sys/dsl_prop.h>
  31 #include <sys/dsl_synctask.h>
  32 #include <sys/dmu_traverse.h>
  33 #include <sys/dmu_impl.h>
  34 #include <sys/dmu_tx.h>
  35 #include <sys/arc.h>
  36 #include <sys/zio.h>
  37 #include <sys/zap.h>
  38 #include <sys/zfeature.h>
  39 #include <sys/unique.h>
  40 #include <sys/zfs_context.h>
  41 #include <sys/zfs_ioctl.h>
  42 #include <sys/spa.h>
  43 #include <sys/zfs_znode.h>
  44 #include <sys/zfs_onexit.h>
  45 #include <sys/zvol.h>
  46 #include <sys/dsl_scan.h>
  47 #include <sys/dsl_deadlist.h>
  48 #include "zfs_prop.h"
  49 
  50 static char *dsl_reaper = "the grim reaper";
  51 
  52 static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
  53 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
  54 static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
  55 
  56 #define SWITCH64(x, y) \
  57         { \
  58                 uint64_t __tmp = (x); \
  59                 (x) = (y); \
  60                 (y) = __tmp; \
  61         }
  62 
  63 #define DS_REF_MAX      (1ULL << 62)
  64 
  65 #define DSL_DEADLIST_BLOCKSIZE  SPA_MAXBLOCKSIZE
  66 
  67 #define DSL_DATASET_IS_DESTROYED(ds)    ((ds)->ds_owner == dsl_reaper)
  68 


 315 dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
 316 {
 317         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 318         uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 319         matchtype_t mt;
 320         int err;
 321 
 322         if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 323                 mt = MT_FIRST;
 324         else
 325                 mt = MT_EXACT;
 326 
 327         err = zap_lookup_norm(mos, snapobj, name, 8, 1,
 328             value, mt, NULL, 0, NULL);
 329         if (err == ENOTSUP && mt == MT_FIRST)
 330                 err = zap_lookup(mos, snapobj, name, 8, 1, value);
 331         return (err);
 332 }
 333 
 334 static int
 335 dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx,
 336     boolean_t adj_cnt)
 337 {
 338         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 339         uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 340         matchtype_t mt;
 341         int err;
 342 
 343         dsl_dir_snap_cmtime_update(ds->ds_dir);
 344 
 345         if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 346                 mt = MT_FIRST;
 347         else
 348                 mt = MT_EXACT;
 349 
 350         err = zap_remove_norm(mos, snapobj, name, mt, tx);
 351         if (err == ENOTSUP && mt == MT_FIRST)
 352                 err = zap_remove(mos, snapobj, name, tx);
 353 
 354         if (err == 0 && adj_cnt)
 355                 dsl_snapcount_adjust(ds->ds_dir, tx, -1, B_TRUE);
 356 
 357         return (err);
 358 }
 359 
 360 static int
 361 dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
 362     dsl_dataset_t **dsp)
 363 {
 364         objset_t *mos = dp->dp_meta_objset;
 365         dmu_buf_t *dbuf;
 366         dsl_dataset_t *ds;
 367         int err;
 368         dmu_object_info_t doi;
 369 
 370         ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
 371             dsl_pool_sync_context(dp));
 372 
 373         err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
 374         if (err)
 375                 return (err);
 376 


1936                 ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
1937                 err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1938                 ASSERT(err == 0);
1939         } else {
1940                 /* remove from snapshot namespace */
1941                 dsl_dataset_t *ds_head;
1942                 ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
1943                 VERIFY(0 == dsl_dataset_hold_obj(dp,
1944                     ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
1945                 VERIFY(0 == dsl_dataset_get_snapname(ds));
1946 #ifdef ZFS_DEBUG
1947                 {
1948                         uint64_t val;
1949 
1950                         err = dsl_dataset_snap_lookup(ds_head,
1951                             ds->ds_snapname, &val);
1952                         ASSERT0(err);
1953                         ASSERT3U(val, ==, obj);
1954                 }
1955 #endif
1956                 err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx,
1957                     B_TRUE);
1958                 ASSERT(err == 0);
1959                 dsl_dataset_rele(ds_head, FTAG);
1960         }
1961 
1962         if (ds_prev && ds->ds_prev != ds_prev)
1963                 dsl_dataset_rele(ds_prev, FTAG);
1964 
1965         spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
1966 
1967         if (ds->ds_phys->ds_next_clones_obj != 0) {
1968                 uint64_t count;
1969                 ASSERT(0 == zap_count(mos,
1970                     ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
1971                 VERIFY(0 == dmu_object_free(mos,
1972                     ds->ds_phys->ds_next_clones_obj, tx));
1973         }
1974         if (ds->ds_phys->ds_props_obj != 0)
1975                 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
1976         if (ds->ds_phys->ds_userrefs_obj != 0)
1977                 VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));


2002         /*
2003          * If there's an fs-only reservation, any blocks that might become
2004          * owned by the snapshot dataset must be accommodated by space
2005          * outside of the reservation.
2006          */
2007         ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
2008         asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
2009         if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
2010                 return (ENOSPC);
2011 
2012         /*
2013          * Propagate any reserved space for this snapshot to other
2014          * snapshot checks in this sync group.
2015          */
2016         if (asize > 0)
2017                 dsl_dir_willuse_space(ds->ds_dir, asize, tx);
2018 
2019         return (0);
2020 }
2021 
2022 /*
2023  * Check if adding additional snapshot(s) would exceed any snapshot limits.
2024  * Note that all snapshot limits up to the root dataset (i.e. the pool itself)
2025  * or the given ancestor must be satisfied. Note that it is valid for the
2026  * count to exceed the limit. This can happen if a snapshot is taken by an
2027  * administrative user in the global zone (e.g. a recursive snapshot by root).
2028  */
2029 int
2030 dsl_snapcount_check(dsl_dir_t *dd, uint64_t cnt, dsl_dir_t *ancestor,
2031     cred_t *cr)
2032 {
2033         uint64_t limit;
2034         int err = 0;
2035 
2036         VERIFY(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
2037 
2038         /* If we're allowed to change the limit, don't enforce the limit. */
2039         if (dsl_secpolicy_write_prop(dd, ZFS_PROP_SNAPSHOT_LIMIT, cr) == 0)
2040                 return (0);
2041 
2042         /*
2043          * If renaming a dataset with no snapshots, count adjustment is 0.
2044          */
2045         if (cnt == 0)
2046                 return (0);
2047 
2048         /*
2049          * If an ancestor has been provided, stop checking the limit once we
2050          * hit that dir. We need this during rename so that we don't overcount
2051          * the check once we recurse up to the common ancestor.
2052          */
2053         if (ancestor == dd)
2054                 return (0);
2055 
2056         /*
2057          * If we hit an uninitialized node while recursing up the tree, we can
2058          * stop since we know the counts are not valid on this node and we
2059          * know we won't touch this node's counts. We also know that the counts
2060          * on the nodes above this one are uninitialized and that there cannot
2061          * be a limit set on any of those nodes.
2062          */
2063         if (dd->dd_phys->dd_filesystem_count == 0)
2064                 return (0);
2065 
2066         err = dsl_prop_get_dd(dd, zfs_prop_to_name(ZFS_PROP_SNAPSHOT_LIMIT),
2067             8, 1, &limit, NULL, B_FALSE);
2068         if (err != 0)
2069                 return (err);
2070 
2071         /* Is there a snapshot limit which we've hit? */
2072         if ((dd->dd_phys->dd_snapshot_count + cnt) > limit)
2073                 return (EDQUOT);
2074 
2075         if (dd->dd_parent != NULL)
2076                 err = dsl_snapcount_check(dd->dd_parent, cnt, ancestor, cr);
2077 
2078         return (err);
2079 }
2080 
2081 /*
2082  * Adjust the snapshot count for the specified dsl_dir_t and all parents.
2083  * When a new snapshot is created, increment the count on all parents, and when
2084  * a snapshot is destroyed, decrement the count.
2085  */
2086 void
2087 dsl_snapcount_adjust(dsl_dir_t *dd, dmu_tx_t *tx, int64_t delta,
2088     boolean_t first)
2089 {
2090         if (first) {
2091                 VERIFY(RW_LOCK_HELD(&dd->dd_pool->dp_config_rwlock));
2092                 VERIFY(dmu_tx_is_syncing(tx));
2093         }
2094 
2095         /*
2096          * If we hit an uninitialized node while recursing up the tree, we can
2097          * stop since we know the counts are not valid on this node and we
2098          * know we shouldn't touch this node's counts. An uninitialized count
2099          * on the node indicates that either the feature has not yet been
2100          * activated or there are no limits on this part of the tree.
2101          */
2102         if (dd->dd_phys->dd_filesystem_count == 0)
2103                 return;
2104 
2105         /* if renaming a dataset with no snapshots, count adjustment is 0 */
2106         if (delta == 0)
2107                 return;
2108 
2109         /*
2110          * On initial entry we need to check if this feature is active, but
2111          * we don't want to re-check this on each recursive call. Note: the
2112          * feature cannot be active if it's not enabled. If the feature is not
2113          * active, don't touch the on-disk count fields.
2114          */
2115         if (first) {
2116                 zfeature_info_t *quota_feat =
2117                     &spa_feature_table[SPA_FEATURE_FS_SS_LIMIT];
2118 
2119                 if (!spa_feature_is_active(dd->dd_pool->dp_spa, quota_feat))
2120                         return;
2121         }
2122 
2123         dmu_buf_will_dirty(dd->dd_dbuf, tx);
2124 
2125         mutex_enter(&dd->dd_lock);
2126 
2127         dd->dd_phys->dd_snapshot_count += delta;
2128         VERIFY(dd->dd_phys->dd_snapshot_count >= 0);
2129 
2130         /* Roll up this additional count into our ancestors */
2131         if (dd->dd_parent != NULL)
2132                 dsl_snapcount_adjust(dd->dd_parent, tx, delta, B_FALSE);
2133 
2134         mutex_exit(&dd->dd_lock);
2135 }
2136 
2137 int
2138 dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
2139     uint64_t cnt, dmu_tx_t *tx, cred_t *cr)
2140 {
2141         int err;
2142         uint64_t value;
2143 
2144         /*
2145          * We don't allow multiple snapshots of the same txg.  If there
2146          * is already one, try again.
2147          */
2148         if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
2149                 return (EAGAIN);
2150 
2151         /*
2152          * Check for conflicting snapshot name.
2153          */
2154         err = dsl_dataset_snap_lookup(ds, snapname, &value);
2155         if (err == 0)
2156                 return (EEXIST);
2157         if (err != ENOENT)
2158                 return (err);
2159 
2160         /*
2161          * Check that the dataset's name is not too long.  Name consists
2162          * of the dataset's length + 1 for the @-sign + snapshot name's length
2163          */
2164         if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
2165                 return (ENAMETOOLONG);
2166 
2167         err = dsl_snapcount_check(ds->ds_dir, cnt, NULL, cr);
2168         if (err)
2169                 return (err);
2170 
2171         err = dsl_dataset_snapshot_reserve_space(ds, tx);
2172         if (err)
2173                 return (err);
2174 
2175         ds->ds_trysnap_txg = tx->tx_txg;
2176         return (0);
2177 }
2178 
2179 void
2180 dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
2181     dmu_tx_t *tx)
2182 {
2183         dsl_pool_t *dp = ds->ds_dir->dd_pool;
2184         dmu_buf_t *dbuf;
2185         dsl_dataset_phys_t *dsphys;
2186         uint64_t dsobj, crtxg;
2187         objset_t *mos = dp->dp_meta_objset;
2188         int err;
2189 
2190         ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
2191 
2192         dsl_snapcount_adjust(ds->ds_dir, tx, 1, B_TRUE);
2193 
2194         /*
2195          * The origin's ds_creation_txg has to be < TXG_INITIAL
2196          */
2197         if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
2198                 crtxg = 1;
2199         else
2200                 crtxg = tx->tx_txg;
2201 
2202         dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
2203             DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
2204         VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
2205         dmu_buf_will_dirty(dbuf, tx);
2206         dsphys = dbuf->db_data;
2207         bzero(dsphys, sizeof (dsl_dataset_phys_t));
2208         dsphys->ds_dir_obj = ds->ds_dir->dd_object;
2209         dsphys->ds_fsid_guid = unique_create();
2210         (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
2211             sizeof (dsphys->ds_guid));
2212         dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
2213         dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;


2547 
2548         return (err);
2549 }
2550 
2551 static void
2552 dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
2553 {
2554         dsl_dataset_t *ds = arg1;
2555         const char *newsnapname = arg2;
2556         dsl_dir_t *dd = ds->ds_dir;
2557         objset_t *mos = dd->dd_pool->dp_meta_objset;
2558         dsl_dataset_t *hds;
2559         int err;
2560 
2561         ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
2562 
2563         VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
2564             dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
2565 
2566         VERIFY(0 == dsl_dataset_get_snapname(ds));
2567         err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx, B_FALSE);
2568         ASSERT0(err);
2569         mutex_enter(&ds->ds_lock);
2570         (void) strcpy(ds->ds_snapname, newsnapname);
2571         mutex_exit(&ds->ds_lock);
2572         err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
2573             ds->ds_snapname, 8, 1, &ds->ds_object, tx);
2574         ASSERT0(err);
2575 
2576         spa_history_log_internal_ds(ds, "rename", tx,
2577             "-> @%s", newsnapname);
2578         dsl_dataset_rele(hds, FTAG);
2579 }
2580 
2581 struct renamesnaparg {
2582         dsl_sync_task_group_t *dstg;
2583         char failed[MAXPATHLEN];
2584         char *oldsnap;
2585         char *newsnap;
2586 };
2587 


2742                 err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2743                     dsl_dataset_snapshot_rename_check,
2744                     dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
2745 
2746                 dsl_dataset_rele(ds, FTAG);
2747         }
2748 
2749         return (err);
2750 }
2751 
2752 struct promotenode {
2753         list_node_t link;
2754         dsl_dataset_t *ds;
2755 };
2756 
2757 struct promotearg {
2758         list_t shared_snaps, origin_snaps, clone_snaps;
2759         dsl_dataset_t *origin_origin;
2760         uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
2761         char *err_ds;
2762         cred_t *cr;
2763 };
2764 
2765 static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
2766 static boolean_t snaplist_unstable(list_t *l);
2767 
2768 static int
2769 dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
2770 {
2771         dsl_dataset_t *hds = arg1;
2772         struct promotearg *pa = arg2;
2773         struct promotenode *snap = list_head(&pa->shared_snaps);
2774         dsl_dataset_t *origin_ds = snap->ds;
2775         int err;
2776         uint64_t unused;
2777 
2778         /* Check that it is a real clone */
2779         if (!dsl_dir_is_clone(hds->ds_dir))
2780                 return (EINVAL);
2781 
2782         /* Since this is so expensive, don't do the preliminary check */


2830                 if (ds->ds_phys->ds_prev_snap_obj == 0)
2831                         continue;
2832 
2833                 dsl_deadlist_space(&ds->ds_deadlist,
2834                     &dlused, &dlcomp, &dluncomp);
2835                 pa->used += dlused;
2836                 pa->comp += dlcomp;
2837                 pa->uncomp += dluncomp;
2838         }
2839 
2840         /*
2841          * If we are a clone of a clone then we never reached ORIGIN,
2842          * so we need to subtract out the clone origin's used space.
2843          */
2844         if (pa->origin_origin) {
2845                 pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
2846                 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
2847                 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
2848         }
2849 
2850         /* Check that there is enough space and limit headroom here */
2851         err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
2852             origin_ds->ds_dir, pa->used, pa->cr);
2853         if (err)
2854                 return (err);
2855 
2856         /*
2857          * Compute the amounts of space that will be used by snapshots
2858          * after the promotion (for both origin and clone).  For each,
2859          * it is the amount of space that will be on all of their
2860          * deadlists (that was not born before their new origin).
2861          */
2862         if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
2863                 uint64_t space;
2864 
2865                 /*
2866                  * Note, typically this will not be a clone of a clone,
2867                  * so dd_origin_txg will be < TXG_INITIAL, so
2868                  * these snaplist_space() -> dsl_deadlist_space_range()
2869                  * calls will be fast because they do not have to
2870                  * iterate over all bps.
2871                  */
2872                 snap = list_head(&pa->origin_snaps);


2961                             DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
2962                 }
2963                 VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
2964                     dd->dd_phys->dd_clones, origin_head->ds_object, tx));
2965 
2966         }
2967 
2968         /* move snapshots to this dir */
2969         for (snap = list_head(&pa->shared_snaps); snap;
2970             snap = list_next(&pa->shared_snaps, snap)) {
2971                 dsl_dataset_t *ds = snap->ds;
2972 
2973                 /* unregister props as dsl_dir is changing */
2974                 if (ds->ds_objset) {
2975                         dmu_objset_evict(ds->ds_objset);
2976                         ds->ds_objset = NULL;
2977                 }
2978                 /* move snap name entry */
2979                 VERIFY(0 == dsl_dataset_get_snapname(ds));
2980                 VERIFY(0 == dsl_dataset_snap_remove(origin_head,
2981                     ds->ds_snapname, tx, B_TRUE));
2982                 VERIFY(0 == zap_add(dp->dp_meta_objset,
2983                     hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
2984                     8, 1, &ds->ds_object, tx));
2985                 dsl_snapcount_adjust(hds->ds_dir, tx, 1, B_TRUE);
2986 
2987                 /* change containing dsl_dir */
2988                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
2989                 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
2990                 ds->ds_phys->ds_dir_obj = dd->dd_object;
2991                 ASSERT3P(ds->ds_dir, ==, odd);
2992                 dsl_dir_close(ds->ds_dir, ds);
2993                 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
2994                     NULL, ds, &ds->ds_dir));
2995 
2996                 /* move any clone references */
2997                 if (ds->ds_phys->ds_next_clones_obj &&
2998                     spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
2999                         zap_cursor_t zc;
3000                         zap_attribute_t za;
3001 
3002                         for (zap_cursor_init(&zc, dp->dp_meta_objset,
3003                             ds->ds_phys->ds_next_clones_obj);
3004                             zap_cursor_retrieve(&zc, &za) == 0;
3005                             zap_cursor_advance(&zc)) {


3203         if (err != 0)
3204                 goto out;
3205 
3206         snap = list_head(&pa.shared_snaps);
3207         ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj);
3208         err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj,
3209             snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps);
3210         if (err != 0)
3211                 goto out;
3212 
3213         if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) {
3214                 err = dsl_dataset_hold_obj(dp,
3215                     snap->ds->ds_dir->dd_phys->dd_origin_obj,
3216                     FTAG, &pa.origin_origin);
3217                 if (err != 0)
3218                         goto out;
3219         }
3220 
3221 out:
3222         rw_exit(&dp->dp_config_rwlock);
3223         pa.cr = CRED();
3224 
3225         /*
3226          * Add in 128x the snapnames zapobj size, since we will be moving
3227          * a bunch of snapnames to the promoted ds, and dirtying their
3228          * bonus buffers.
3229          */
3230         if (err == 0) {
3231                 err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
3232                     dsl_dataset_promote_sync, ds, &pa,
3233                     2 + 2 * doi.doi_physical_blocks_512);
3234                 if (err && pa.err_ds && conflsnap)
3235                         (void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN);
3236         }
3237 
3238         snaplist_destroy(&pa.shared_snaps, B_TRUE);
3239         snaplist_destroy(&pa.clone_snaps, B_FALSE);
3240         snaplist_destroy(&pa.origin_snaps, B_FALSE);
3241         if (pa.origin_origin)
3242                 dsl_dataset_rele(pa.origin_origin, FTAG);
3243         dsl_dataset_rele(ds, FTAG);