Print this page
Optimize creation and removal of temporary "user holds" placed on
snapshots by a zfs send, by ensuring all the required holds and
releases are done in a single dsl_sync_task.
Creation now collates the required holds during a dry run and
then uses a single lzc_hold call via zfs_hold_apply instead of
processing each snapshot in turn.
Defered (on exit) cleanup by the kernel is also now done in
dsl_sync_task by reusing dsl_dataset_user_release.
On a test with 11 volumes in a tree each with 8 snapshots on a
single HDD zpool this reduces the time required to perform a full
send from 20 seconds to under 0.8 seconds.
For reference eliminating the hold entirely reduces this 0.15
seconds.
While I'm here:-
* Remove some unused structures
* Fix nvlist_t leak in zfs_release_one

@@ -145,11 +145,10 @@
         VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 
         if (minor != 0) {
                 VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
                     htag, now, tx));
-                dsl_register_onexit_hold_cleanup(ds, htag, minor);
         }
 
         spa_history_log_internal_ds(ds, "hold", tx,
             "tag=%s temp=%d refs=%llu",
             htag, minor != 0, ds->ds_userrefs);

@@ -164,10 +163,11 @@
         uint64_t now = gethrestime_sec();
 
         for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
             pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
                 dsl_dataset_t *ds;
+
                 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
                 dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
                     dduha->dduha_minor, now, tx);
                 dsl_dataset_rele(ds, FTAG);
         }

@@ -184,21 +184,26 @@
 int
 dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
 {
         dsl_dataset_user_hold_arg_t dduha;
         nvpair_t *pair;
+        int ret;
 
         pair = nvlist_next_nvpair(holds, NULL);
         if (pair == NULL)
                 return (0);
 
         dduha.dduha_holds = holds;
         dduha.dduha_errlist = errlist;
         dduha.dduha_minor = cleanup_minor;
 
-        return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
-            dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));
+        ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
+            dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds));
+        if (ret == 0)
+                dsl_register_onexit_hold_cleanup(holds, cleanup_minor);
+
+        return (ret);
 }
 
 typedef struct dsl_dataset_user_release_arg {
         nvlist_t *ddura_holds;
         nvlist_t *ddura_todelete;

@@ -349,154 +354,70 @@
  */
 int
 dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
 {
         dsl_dataset_user_release_arg_t ddura;
-        nvpair_t *pair;
+        nvpair_t *pair, *pair2;
         int error;
 
         pair = nvlist_next_nvpair(holds, NULL);
         if (pair == NULL)
                 return (0);
 
+#ifdef _KERNEL
+        /*
+         * The release may cause the snapshot to be destroyed; make sure it
+         * is not mounted.
+         */
+        for (pair2 = pair; pair2 != NULL;
+            pair2 = nvlist_next_nvpair(holds, pair2)) {
+                zfs_unmount_snap(nvpair_name(pair2));
+        }
+#endif
+
         ddura.ddura_holds = holds;
         ddura.ddura_errlist = errlist;
         ddura.ddura_todelete = fnvlist_alloc();
 
         error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
             dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
         fnvlist_free(ddura.ddura_todelete);
         return (error);
 }
 
-typedef struct dsl_dataset_user_release_tmp_arg {
-        uint64_t ddurta_dsobj;
-        nvlist_t *ddurta_holds;
-        boolean_t ddurta_deleteme;
-} dsl_dataset_user_release_tmp_arg_t;
-
-static int
-dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
-{
-        dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
-        dsl_pool_t *dp = dmu_tx_pool(tx);
-        dsl_dataset_t *ds;
-        int error;
-
-        if (!dmu_tx_is_syncing(tx))
-                return (0);
-
-        error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
-        if (error)
-                return (error);
-
-        error = dsl_dataset_user_release_check_one(ds,
-            ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
-        dsl_dataset_rele(ds, FTAG);
-        return (error);
-}
-
 static void
-dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
+dsl_dataset_user_release_onexit(void *arg)
 {
-        dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
-        dsl_pool_t *dp = dmu_tx_pool(tx);
-        dsl_dataset_t *ds;
+        nvlist_t *holds = arg;
 
-        VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
-        dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
-        if (ddurta->ddurta_deleteme) {
-                ASSERT(ds->ds_userrefs == 0 &&
-                    ds->ds_phys->ds_num_children == 1 &&
-                    DS_IS_DEFER_DESTROY(ds));
-                dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
-        }
-        dsl_dataset_rele(ds, FTAG);
+        (void) dsl_dataset_user_release(holds, NULL);
+        fnvlist_free(holds);
 }
 
-/*
- * Called at spa_load time to release a stale temporary user hold.
- * Also called by the onexit code.
- */
 void
-dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
-{
-        dsl_dataset_user_release_tmp_arg_t ddurta;
-        dsl_dataset_t *ds;
-        int error;
-
-#ifdef _KERNEL
-        /* Make sure it is not mounted. */
-        dsl_pool_config_enter(dp, FTAG);
-        error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
-        if (error == 0) {
-                char name[MAXNAMELEN];
-                dsl_dataset_name(ds, name);
-                dsl_dataset_rele(ds, FTAG);
-                dsl_pool_config_exit(dp, FTAG);
-                zfs_unmount_snap(name);
-        } else {
-                dsl_pool_config_exit(dp, FTAG);
-        }
-#endif
-
-        ddurta.ddurta_dsobj = dsobj;
-        ddurta.ddurta_holds = fnvlist_alloc();
-        fnvlist_add_boolean(ddurta.ddurta_holds, htag);
-
-        (void) dsl_sync_task(spa_name(dp->dp_spa),
-            dsl_dataset_user_release_tmp_check,
-            dsl_dataset_user_release_tmp_sync, &ddurta, 1);
-        fnvlist_free(ddurta.ddurta_holds);
-}
-
-typedef struct zfs_hold_cleanup_arg {
-        char zhca_spaname[MAXNAMELEN];
-        uint64_t zhca_spa_load_guid;
-        uint64_t zhca_dsobj;
-        char zhca_htag[MAXNAMELEN];
-} zfs_hold_cleanup_arg_t;
-
-static void
-dsl_dataset_user_release_onexit(void *arg)
+dsl_register_onexit_hold_cleanup(nvlist_t *holds, minor_t minor)
 {
-        zfs_hold_cleanup_arg_t *ca = arg;
-        spa_t *spa;
-        int error;
+        nvlist_t *ca;
+        nvpair_t *pair;
+        char *htag;
 
-        error = spa_open(ca->zhca_spaname, &spa, FTAG);
-        if (error != 0) {
-                zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
-                    "because pool is no longer loaded",
-                    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
-                return;
-        }
-        if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
-                zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
-                    "because pool is no longer loaded (guid doesn't match)",
-                    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
-                spa_close(spa, FTAG);
-                return;
-        }
-
-        dsl_dataset_user_release_tmp(spa_get_dsl(spa),
-            ca->zhca_dsobj, ca->zhca_htag);
-        kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
-        spa_close(spa, FTAG);
-}
+        ca = fnvlist_alloc();
+        /*
+         * Convert from hold format: nvl of snapname -> holdname
+         * to release format: nvl of snapname -> { holdname, ... }
+         */
+        for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
+            pair = nvlist_next_nvpair(holds, pair)) {
+                if (nvpair_value_string(pair, &htag) == 0) {
+                        nvlist_t *tags;
 
-void
-dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
-    minor_t minor)
-{
-        zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
-        spa_t *spa = dsl_dataset_get_spa(ds);
-        (void) strlcpy(ca->zhca_spaname, spa_name(spa),
-            sizeof (ca->zhca_spaname));
-        ca->zhca_spa_load_guid = spa_load_guid(spa);
-        ca->zhca_dsobj = ds->ds_object;
-        (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
+                        tags = fnvlist_alloc();
+                        fnvlist_add_boolean(tags, htag);
+                        fnvlist_add_nvlist(ca, nvpair_name(pair), tags);
+                        fnvlist_free(tags);
+                }
+        }
         VERIFY0(zfs_onexit_add_cb(minor,
             dsl_dataset_user_release_onexit, ca, NULL));
 }
 
 int