illumos Sdiff usr/src/uts/common/fs/zfs/dsl

Print this page

OS-1566 filesystem limits for ZFS datasets

  28 #include <sys/dsl_dataset.h>
  29 #include <sys/dsl_dir.h>
  30 #include <sys/dsl_prop.h>
  31 #include <sys/dsl_synctask.h>
  32 #include <sys/dmu_traverse.h>
  33 #include <sys/dmu_impl.h>
  34 #include <sys/dmu_tx.h>
  35 #include <sys/arc.h>
  36 #include <sys/zio.h>
  37 #include <sys/zap.h>
  38 #include <sys/zfeature.h>
  39 #include <sys/unique.h>
  40 #include <sys/zfs_context.h>
  41 #include <sys/zfs_ioctl.h>
  42 #include <sys/spa.h>
  43 #include <sys/zfs_znode.h>
  44 #include <sys/zfs_onexit.h>
  45 #include <sys/zvol.h>
  46 #include <sys/dsl_scan.h>
  47 #include <sys/dsl_deadlist.h>

  48 
  49 static char *dsl_reaper = "the grim reaper";
  50 
  51 static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
  52 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
  53 static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
  54 
  55 #define SWITCH64(x, y) \
  56         { \
  57                 uint64_t __tmp = (x); \
  58                 (x) = (y); \
  59                 (y) = __tmp; \
  60         }
  61 
  62 #define DS_REF_MAX      (1ULL << 62)
  63 
  64 #define DSL_DEADLIST_BLOCKSIZE  SPA_MAXBLOCKSIZE
  65 
  66 #define DSL_DATASET_IS_DESTROYED(ds)    ((ds)->ds_owner == dsl_reaper)
  67

 331 }
 332 
 333 static int
 334 dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
 335 {
 336         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 337         uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 338         matchtype_t mt;
 339         int err;
 340 
 341         dsl_dir_snap_cmtime_update(ds->ds_dir);
 342 
 343         if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 344                 mt = MT_FIRST;
 345         else
 346                 mt = MT_EXACT;
 347 
 348         err = zap_remove_norm(mos, snapobj, name, mt, tx);
 349         if (err == ENOTSUP && mt == MT_FIRST)
 350                 err = zap_remove(mos, snapobj, name, tx);




 351         return (err);
 352 }
 353 
 354 static int
 355 dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
 356     dsl_dataset_t **dsp)
 357 {
 358         objset_t *mos = dp->dp_meta_objset;
 359         dmu_buf_t *dbuf;
 360         dsl_dataset_t *ds;
 361         int err;
 362         dmu_object_info_t doi;
 363 
 364         ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
 365             dsl_pool_sync_context(dp));
 366 
 367         err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
 368         if (err)
 369                 return (err);
 370

1119          */
1120         dsl_dataset_make_exclusive(ds, tag);
1121         /*
1122          * If we're removing a clone, we might also need to remove its
1123          * origin.
1124          */
1125         do {
1126                 dsda.need_prep = B_FALSE;
1127                 if (dsl_dir_is_clone(dd)) {
1128                         err = dsl_dataset_origin_rm_prep(&dsda, tag);
1129                         if (err) {
1130                                 dsl_dir_close(dd, FTAG);
1131                                 goto out;
1132                         }
1133                 }
1134 
1135                 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
1136                 dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
1137                     dsl_dataset_destroy_sync, &dsda, tag, 0);
1138                 dsl_sync_task_create(dstg, dsl_dir_destroy_check,
1139                     dsl_dir_destroy_sync, dd, FTAG, 0);
1140                 err = dsl_sync_task_group_wait(dstg);
1141                 dsl_sync_task_group_destroy(dstg);
1142 
1143                 /*
1144                  * We could be racing against 'zfs release' or 'zfs destroy -d'
1145                  * on the origin snap, in which case we can get EBUSY if we
1146                  * needed to destroy the origin snap but were not ready to
1147                  * do so.
1148                  */
1149                 if (dsda.need_prep) {
1150                         ASSERT(err == EBUSY);
1151                         ASSERT(dsl_dir_is_clone(dd));
1152                         ASSERT(dsda.rm_origin == NULL);
1153                 }
1154         } while (dsda.need_prep);
1155 
1156         if (dsda.rm_origin != NULL)
1157                 dsl_dataset_disown(dsda.rm_origin, tag);
1158 
1159         /* if it is successful, dsl_dir_destroy_sync will close the dd */

1995         /*
1996          * If there's an fs-only reservation, any blocks that might become
1997          * owned by the snapshot dataset must be accommodated by space
1998          * outside of the reservation.
1999          */
2000         ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
2001         asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
2002         if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
2003                 return (ENOSPC);
2004 
2005         /*
2006          * Propagate any reserved space for this snapshot to other
2007          * snapshot checks in this sync group.
2008          */
2009         if (asize > 0)
2010                 dsl_dir_willuse_space(ds->ds_dir, asize, tx);
2011 
2012         return (0);
2013 }
2014 







2015 int












































































































































2016 dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
2017     dmu_tx_t *tx)
2018 {
2019         int err;
2020         uint64_t value;
2021 
2022         /*
2023          * We don't allow multiple snapshots of the same txg.  If there
2024          * is already one, try again.
2025          */
2026         if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
2027                 return (EAGAIN);
2028 
2029         /*
2030          * Check for conflicting snapshot name.
2031          */
2032         err = dsl_dataset_snap_lookup(ds, snapname, &value);
2033         if (err == 0)
2034                 return (EEXIST);
2035         if (err != ENOENT)
2036                 return (err);
2037 
2038         /*
2039          * Check that the dataset's name is not too long.  Name consists
2040          * of the dataset's length + 1 for the @-sign + snapshot name's length
2041          */
2042         if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
2043                 return (ENAMETOOLONG);
2044 




2045         err = dsl_dataset_snapshot_reserve_space(ds, tx);
2046         if (err)
2047                 return (err);
2048 
2049         ds->ds_trysnap_txg = tx->tx_txg;
2050         return (0);
2051 }
2052 
2053 void
2054 dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
2055     dmu_tx_t *tx)
2056 {
2057         dsl_pool_t *dp = ds->ds_dir->dd_pool;
2058         dmu_buf_t *dbuf;
2059         dsl_dataset_phys_t *dsphys;
2060         uint64_t dsobj, crtxg;
2061         objset_t *mos = dp->dp_meta_objset;
2062         int err;
2063 
2064         ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
2065 


2066         /*
2067          * The origin's ds_creation_txg has to be < TXG_INITIAL
2068          */
2069         if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
2070                 crtxg = 1;
2071         else
2072                 crtxg = tx->tx_txg;
2073 
2074         dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
2075             DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
2076         VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
2077         dmu_buf_will_dirty(dbuf, tx);
2078         dsphys = dbuf->db_data;
2079         bzero(dsphys, sizeof (dsl_dataset_phys_t));
2080         dsphys->ds_dir_obj = ds->ds_dir->dd_object;
2081         dsphys->ds_fsid_guid = unique_create();
2082         (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
2083             sizeof (dsphys->ds_guid));
2084         dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
2085         dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;

2701                 if (ds->ds_phys->ds_prev_snap_obj == 0)
2702                         continue;
2703 
2704                 dsl_deadlist_space(&ds->ds_deadlist,
2705                     &dlused, &dlcomp, &dluncomp);
2706                 pa->used += dlused;
2707                 pa->comp += dlcomp;
2708                 pa->uncomp += dluncomp;
2709         }
2710 
2711         /*
2712          * If we are a clone of a clone then we never reached ORIGIN,
2713          * so we need to subtract out the clone origin's used space.
2714          */
2715         if (pa->origin_origin) {
2716                 pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
2717                 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
2718                 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
2719         }
2720 
2721         /* Check that there is enough space here */
2722         err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
2723             pa->used);
2724         if (err)
2725                 return (err);
2726 
2727         /*
2728          * Compute the amounts of space that will be used by snapshots
2729          * after the promotion (for both origin and clone).  For each,
2730          * it is the amount of space that will be on all of their
2731          * deadlists (that was not born before their new origin).
2732          */
2733         if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
2734                 uint64_t space;
2735 
2736                 /*
2737                  * Note, typically this will not be a clone of a clone,
2738                  * so dd_origin_txg will be < TXG_INITIAL, so
2739                  * these snaplist_space() -> dsl_deadlist_space_range()
2740                  * calls will be fast because they do not have to
2741                  * iterate over all bps.
2742                  */
2743                 snap = list_head(&pa->origin_snaps);

2836 
2837         }
2838 
2839         /* move snapshots to this dir */
2840         for (snap = list_head(&pa->shared_snaps); snap;
2841             snap = list_next(&pa->shared_snaps, snap)) {
2842                 dsl_dataset_t *ds = snap->ds;
2843 
2844                 /* unregister props as dsl_dir is changing */
2845                 if (ds->ds_objset) {
2846                         dmu_objset_evict(ds->ds_objset);
2847                         ds->ds_objset = NULL;
2848                 }
2849                 /* move snap name entry */
2850                 VERIFY(0 == dsl_dataset_get_snapname(ds));
2851                 VERIFY(0 == dsl_dataset_snap_remove(origin_head,
2852                     ds->ds_snapname, tx));
2853                 VERIFY(0 == zap_add(dp->dp_meta_objset,
2854                     hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
2855                     8, 1, &ds->ds_object, tx));

2856 
2857                 /* change containing dsl_dir */
2858                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
2859                 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
2860                 ds->ds_phys->ds_dir_obj = dd->dd_object;
2861                 ASSERT3P(ds->ds_dir, ==, odd);
2862                 dsl_dir_close(ds->ds_dir, ds);
2863                 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
2864                     NULL, ds, &ds->ds_dir));
2865 
2866                 /* move any clone references */
2867                 if (ds->ds_phys->ds_next_clones_obj &&
2868                     spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
2869                         zap_cursor_t zc;
2870                         zap_attribute_t za;
2871 
2872                         for (zap_cursor_init(&zc, dp->dp_meta_objset,
2873                             ds->ds_phys->ds_next_clones_obj);
2874                             zap_cursor_retrieve(&zc, &za) == 0;
2875                             zap_cursor_advance(&zc)) {

  28 #include <sys/dsl_dataset.h>
  29 #include <sys/dsl_dir.h>
  30 #include <sys/dsl_prop.h>
  31 #include <sys/dsl_synctask.h>
  32 #include <sys/dmu_traverse.h>
  33 #include <sys/dmu_impl.h>
  34 #include <sys/dmu_tx.h>
  35 #include <sys/arc.h>
  36 #include <sys/zio.h>
  37 #include <sys/zap.h>
  38 #include <sys/zfeature.h>
  39 #include <sys/unique.h>
  40 #include <sys/zfs_context.h>
  41 #include <sys/zfs_ioctl.h>
  42 #include <sys/spa.h>
  43 #include <sys/zfs_znode.h>
  44 #include <sys/zfs_onexit.h>
  45 #include <sys/zvol.h>
  46 #include <sys/dsl_scan.h>
  47 #include <sys/dsl_deadlist.h>
  48 #include "zfs_prop.h"
  49 
  50 static char *dsl_reaper = "the grim reaper";
  51 
  52 static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
  53 static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
  54 static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
  55 
  56 #define SWITCH64(x, y) \
  57         { \
  58                 uint64_t __tmp = (x); \
  59                 (x) = (y); \
  60                 (y) = __tmp; \
  61         }
  62 
  63 #define DS_REF_MAX      (1ULL << 62)
  64 
  65 #define DSL_DEADLIST_BLOCKSIZE  SPA_MAXBLOCKSIZE
  66 
  67 #define DSL_DATASET_IS_DESTROYED(ds)    ((ds)->ds_owner == dsl_reaper)
  68

 332 }
 333 
 334 static int
 335 dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
 336 {
 337         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 338         uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 339         matchtype_t mt;
 340         int err;
 341 
 342         dsl_dir_snap_cmtime_update(ds->ds_dir);
 343 
 344         if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 345                 mt = MT_FIRST;
 346         else
 347                 mt = MT_EXACT;
 348 
 349         err = zap_remove_norm(mos, snapobj, name, mt, tx);
 350         if (err == ENOTSUP && mt == MT_FIRST)
 351                 err = zap_remove(mos, snapobj, name, tx);
 352 
 353         if (err == 0)
 354                 dsl_snapcount_adjust(ds->ds_dir, tx, -1, B_TRUE);
 355 
 356         return (err);
 357 }
 358 
 359 static int
 360 dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
 361     dsl_dataset_t **dsp)
 362 {
 363         objset_t *mos = dp->dp_meta_objset;
 364         dmu_buf_t *dbuf;
 365         dsl_dataset_t *ds;
 366         int err;
 367         dmu_object_info_t doi;
 368 
 369         ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
 370             dsl_pool_sync_context(dp));
 371 
 372         err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
 373         if (err)
 374                 return (err);
 375

1124          */
1125         dsl_dataset_make_exclusive(ds, tag);
1126         /*
1127          * If we're removing a clone, we might also need to remove its
1128          * origin.
1129          */
1130         do {
1131                 dsda.need_prep = B_FALSE;
1132                 if (dsl_dir_is_clone(dd)) {
1133                         err = dsl_dataset_origin_rm_prep(&dsda, tag);
1134                         if (err) {
1135                                 dsl_dir_close(dd, FTAG);
1136                                 goto out;
1137                         }
1138                 }
1139 
1140                 dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
1141                 dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
1142                     dsl_dataset_destroy_sync, &dsda, tag, 0);
1143                 dsl_sync_task_create(dstg, dsl_dir_destroy_check,
1144                     dsl_dir_destroy_sync, dd, tag, 0);
1145                 err = dsl_sync_task_group_wait(dstg);
1146                 dsl_sync_task_group_destroy(dstg);
1147 
1148                 /*
1149                  * We could be racing against 'zfs release' or 'zfs destroy -d'
1150                  * on the origin snap, in which case we can get EBUSY if we
1151                  * needed to destroy the origin snap but were not ready to
1152                  * do so.
1153                  */
1154                 if (dsda.need_prep) {
1155                         ASSERT(err == EBUSY);
1156                         ASSERT(dsl_dir_is_clone(dd));
1157                         ASSERT(dsda.rm_origin == NULL);
1158                 }
1159         } while (dsda.need_prep);
1160 
1161         if (dsda.rm_origin != NULL)
1162                 dsl_dataset_disown(dsda.rm_origin, tag);
1163 
1164         /* if it is successful, dsl_dir_destroy_sync will close the dd */

2000         /*
2001          * If there's an fs-only reservation, any blocks that might become
2002          * owned by the snapshot dataset must be accommodated by space
2003          * outside of the reservation.
2004          */
2005         ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
2006         asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
2007         if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
2008                 return (ENOSPC);
2009 
2010         /*
2011          * Propagate any reserved space for this snapshot to other
2012          * snapshot checks in this sync group.
2013          */
2014         if (asize > 0)
2015                 dsl_dir_willuse_space(ds->ds_dir, asize, tx);
2016 
2017         return (0);
2018 }
2019 
2020 /*
2021  * Check if adding additional snapshot(s) would exceed any snapshot limits.
2022  * Note that all snapshot limits up to the root dataset (i.e. the pool itself)
2023  * or the given ancestor must be satisfied. Note that it is valid for the
2024  * count to exceed the limit. This can happen if a snapshot is taken by an
2025  * administrative user in the global zone (e.g. a recursive snapshot by root).
2026  */
2027 int
2028 dsl_snapcount_check(dsl_dir_t *dd, uint64_t cnt, dsl_dir_t *ancestor)
2029 {
2030         uint64_t limit;
2031         int err = 0;
2032 
2033         /*
2034          * The limit is never enforced for the admin user in global zone.
2035          * If we're not in the global zone then we need to run this check in
2036          * open context, since thats when we know what zone we're in and
2037          * syncing is only performed in the global zone.
2038          */
2039         if (INGLOBALZONE(curproc))
2040                 return (0);
2041 
2042         /*
2043          * If renaming a dataset with no snapshots, count adjustment is 0.
2044          */
2045         if (cnt == 0)
2046                 return (0);
2047 
2048         /*
2049          * If an ancestor has been provided, stop checking the limit once we
2050          * hit that dir. We need this during rename so that we don't overcount
2051          * the check once we recurse up to the common ancestor.
2052          */
2053         if (ancestor == dd)
2054                 return (0);
2055 
2056         /*
2057          * If we hit an uninitialized node while recursing up the tree, we can
2058          * stop since we know the counts are not valid on this node and we
2059          * know we won't touch this node's counts.
2060          */
2061         if (dd->dd_phys->dd_filesystem_count == 0)
2062                 return (0);
2063 
2064         /*
2065          * If there's no value for this property, there's no need to enforce a
2066          * snapshot limit.
2067          */
2068         err = dsl_prop_get_dd(dd, zfs_prop_to_name(ZFS_PROP_SNAPSHOT_LIMIT),
2069             8, 1, &limit, NULL, B_FALSE);
2070         if (err == ENOENT)
2071                 return (0);
2072         else if (err != 0)
2073                 return (err);
2074 
2075 #ifdef _KERNEL
2076         extern void __dtrace_probe_zfs__ss__limit(uint64_t, uint64_t, char *);
2077         __dtrace_probe_zfs__ss__limit(
2078             (uint64_t)dd->dd_phys->dd_snapshot_count, (uint64_t)limit,
2079             dd->dd_myname);
2080 #endif
2081 
2082         if (limit != MAXLIMIT &&
2083             (dd->dd_phys->dd_snapshot_count + cnt) > limit)
2084                 return (EDQUOT);
2085 
2086         if (dd->dd_parent != NULL)
2087                 err = dsl_snapcount_check(dd->dd_parent, cnt, ancestor);
2088 
2089         return (err);
2090 }
2091 
2092 /*
2093  * Adjust the snapshot count for the specified dsl_dir_t and all parents.
2094  * When a new snapshot is created, increment the count on all parents, and when
2095  * a snapshot is destroyed, decrement the count.
2096  */
2097 void
2098 dsl_snapcount_adjust(dsl_dir_t *dd, dmu_tx_t *tx, int64_t delta,
2099     boolean_t first)
2100 {
2101         /*
2102          * If we hit an uninitialized node while recursing up the tree, we can
2103          * stop since we know the counts are not valid on this node and we
2104          * know we shouldn't touch this node's counts. An uninitialized count
2105          * on the node indicates that either the feature has not yet been
2106          * activated or there are no limits on this part of the tree.
2107          */
2108         if (dd->dd_phys->dd_filesystem_count == 0)
2109                 return;
2110 
2111         /*
2112          * The feature might have previously been active, so there could be
2113          * non-0 counts on the nodes, but it might now be inactive.
2114          *
2115          * On initial entry we need to check if this feature is active, but
2116          * we don't want to re-check this on each recursive call. Note: the
2117          * feature cannot be active if its not enabled. If the feature is not
2118          * active, don't touch the on-disk count fields.
2119          */
2120         if (first) {
2121                 dsl_dataset_t *ds = NULL;
2122                 spa_t *spa;
2123                 zfeature_info_t *quota_feat =
2124                     &spa_feature_table[SPA_FEATURE_FS_SS_LIMIT];
2125 
2126                 VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
2127                     dd->dd_phys->dd_head_dataset_obj, FTAG, &ds));
2128                 spa = dsl_dataset_get_spa(ds);
2129                 dsl_dataset_rele(ds, FTAG);
2130                 if (!spa_feature_is_active(spa, quota_feat))
2131                         return;
2132         }
2133 
2134         /*
2135          * As with dsl_dataset_set_reservation_check(), wdon't want to run
2136          * this check in open context.
2137          */
2138         if (!dmu_tx_is_syncing(tx))
2139                 return;
2140 
2141         /* if renaming a dataset with no snapshots, count adjustment is 0 */
2142         if (delta == 0)
2143                 return;
2144 
2145         /*
2146          * If we hit an uninitialized node while recursing up the tree, we can
2147          * stop since we know the counts are not valid on this node and we
2148          * know we shouldn't touch this node's counts.
2149          */
2150         if (dd->dd_phys->dd_filesystem_count == 0)
2151                 return;
2152 
2153         /* Increment count for parent */
2154         dmu_buf_will_dirty(dd->dd_dbuf, tx);
2155 
2156         mutex_enter(&dd->dd_lock);
2157 
2158         dd->dd_phys->dd_snapshot_count += delta;
2159 
2160         /* Roll up this additional count into our ancestors */
2161         if (dd->dd_parent != NULL)
2162                 dsl_snapcount_adjust(dd->dd_parent, tx, delta, B_FALSE);
2163 
2164         mutex_exit(&dd->dd_lock);
2165 }
2166 
2167 int
2168 dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
2169     uint64_t cnt, dmu_tx_t *tx)
2170 {
2171         int err;
2172         uint64_t value;
2173 
2174         /*
2175          * We don't allow multiple snapshots of the same txg.  If there
2176          * is already one, try again.
2177          */
2178         if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
2179                 return (EAGAIN);
2180 
2181         /*
2182          * Check for conflicting snapshot name.
2183          */
2184         err = dsl_dataset_snap_lookup(ds, snapname, &value);
2185         if (err == 0)
2186                 return (EEXIST);
2187         if (err != ENOENT)
2188                 return (err);
2189 
2190         /*
2191          * Check that the dataset's name is not too long.  Name consists
2192          * of the dataset's length + 1 for the @-sign + snapshot name's length
2193          */
2194         if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
2195                 return (ENAMETOOLONG);
2196 
2197         err = dsl_snapcount_check(ds->ds_dir, cnt, NULL);
2198         if (err)
2199                 return (err);
2200 
2201         err = dsl_dataset_snapshot_reserve_space(ds, tx);
2202         if (err)
2203                 return (err);
2204 
2205         ds->ds_trysnap_txg = tx->tx_txg;
2206         return (0);
2207 }
2208 
2209 void
2210 dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
2211     dmu_tx_t *tx)
2212 {
2213         dsl_pool_t *dp = ds->ds_dir->dd_pool;
2214         dmu_buf_t *dbuf;
2215         dsl_dataset_phys_t *dsphys;
2216         uint64_t dsobj, crtxg;
2217         objset_t *mos = dp->dp_meta_objset;
2218         int err;
2219 
2220         ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
2221 
2222         dsl_snapcount_adjust(ds->ds_dir, tx, 1, B_TRUE);
2223 
2224         /*
2225          * The origin's ds_creation_txg has to be < TXG_INITIAL
2226          */
2227         if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
2228                 crtxg = 1;
2229         else
2230                 crtxg = tx->tx_txg;
2231 
2232         dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
2233             DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
2234         VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
2235         dmu_buf_will_dirty(dbuf, tx);
2236         dsphys = dbuf->db_data;
2237         bzero(dsphys, sizeof (dsl_dataset_phys_t));
2238         dsphys->ds_dir_obj = ds->ds_dir->dd_object;
2239         dsphys->ds_fsid_guid = unique_create();
2240         (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
2241             sizeof (dsphys->ds_guid));
2242         dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
2243         dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;

2859                 if (ds->ds_phys->ds_prev_snap_obj == 0)
2860                         continue;
2861 
2862                 dsl_deadlist_space(&ds->ds_deadlist,
2863                     &dlused, &dlcomp, &dluncomp);
2864                 pa->used += dlused;
2865                 pa->comp += dlcomp;
2866                 pa->uncomp += dluncomp;
2867         }
2868 
2869         /*
2870          * If we are a clone of a clone then we never reached ORIGIN,
2871          * so we need to subtract out the clone origin's used space.
2872          */
2873         if (pa->origin_origin) {
2874                 pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
2875                 pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
2876                 pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
2877         }
2878 
2879         /* Check that there is enough space and limit headroom here */
2880         err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
2881             origin_ds->ds_dir, pa->used, tx);
2882         if (err)
2883                 return (err);
2884 
2885         /*
2886          * Compute the amounts of space that will be used by snapshots
2887          * after the promotion (for both origin and clone).  For each,
2888          * it is the amount of space that will be on all of their
2889          * deadlists (that was not born before their new origin).
2890          */
2891         if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
2892                 uint64_t space;
2893 
2894                 /*
2895                  * Note, typically this will not be a clone of a clone,
2896                  * so dd_origin_txg will be < TXG_INITIAL, so
2897                  * these snaplist_space() -> dsl_deadlist_space_range()
2898                  * calls will be fast because they do not have to
2899                  * iterate over all bps.
2900                  */
2901                 snap = list_head(&pa->origin_snaps);

2994 
2995         }
2996 
2997         /* move snapshots to this dir */
2998         for (snap = list_head(&pa->shared_snaps); snap;
2999             snap = list_next(&pa->shared_snaps, snap)) {
3000                 dsl_dataset_t *ds = snap->ds;
3001 
3002                 /* unregister props as dsl_dir is changing */
3003                 if (ds->ds_objset) {
3004                         dmu_objset_evict(ds->ds_objset);
3005                         ds->ds_objset = NULL;
3006                 }
3007                 /* move snap name entry */
3008                 VERIFY(0 == dsl_dataset_get_snapname(ds));
3009                 VERIFY(0 == dsl_dataset_snap_remove(origin_head,
3010                     ds->ds_snapname, tx));
3011                 VERIFY(0 == zap_add(dp->dp_meta_objset,
3012                     hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
3013                     8, 1, &ds->ds_object, tx));
3014                 dsl_snapcount_adjust(hds->ds_dir, tx, 1, B_TRUE);
3015 
3016                 /* change containing dsl_dir */
3017                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
3018                 ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
3019                 ds->ds_phys->ds_dir_obj = dd->dd_object;
3020                 ASSERT3P(ds->ds_dir, ==, odd);
3021                 dsl_dir_close(ds->ds_dir, ds);
3022                 VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
3023                     NULL, ds, &ds->ds_dir));
3024 
3025                 /* move any clone references */
3026                 if (ds->ds_phys->ds_next_clones_obj &&
3027                     spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
3028                         zap_cursor_t zc;
3029                         zap_attribute_t za;
3030 
3031                         for (zap_cursor_init(&zc, dp->dp_meta_objset,
3032                             ds->ds_phys->ds_next_clones_obj);
3033                             zap_cursor_retrieve(&zc, &za) == 0;
3034                             zap_cursor_advance(&zc)) {