Print this page
Optimize creation and removal of temporary "user holds" placed on
snapshots by a zfs send, by ensuring all the required holds and
releases are done in a single dsl_sync_task.
Creation now collates the required holds during a dry run and
then uses a single lzc_hold call via zfs_hold_apply instead of
processing each snapshot in turn.
Defered (on exit) cleanup by the kernel is also now done in
dsl_sync_task by reusing dsl_dataset_user_release.
On a test with 11 volumes in a tree each with 8 snapshots on a
single HDD zpool this reduces the time required to perform a full
send from 20 seconds to under 0.8 seconds.
For reference eliminating the hold entirely reduces this 0.15
seconds.
While I'm here:-
* Remove some unused structures
* Fix nvlist_t leak in zfs_release_one


 130         mutex_enter(&ds->ds_lock);
 131         if (ds->ds_phys->ds_userrefs_obj == 0) {
 132                 /*
 133                  * This is the first user hold for this dataset.  Create
 134                  * the userrefs zap object.
 135                  */
 136                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
 137                 zapobj = ds->ds_phys->ds_userrefs_obj =
 138                     zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
 139         } else {
 140                 zapobj = ds->ds_phys->ds_userrefs_obj;
 141         }
 142         ds->ds_userrefs++;
 143         mutex_exit(&ds->ds_lock);
 144 
 145         VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 146 
 147         if (minor != 0) {
 148                 VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
 149                     htag, now, tx));
 150                 dsl_register_onexit_hold_cleanup(ds, htag, minor);
 151         }
 152 
 153         spa_history_log_internal_ds(ds, "hold", tx,
 154             "tag=%s temp=%d refs=%llu",
 155             htag, minor != 0, ds->ds_userrefs);
 156 }
 157 
 158 static void
 159 dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
 160 {
 161         dsl_dataset_user_hold_arg_t *dduha = arg;
 162         dsl_pool_t *dp = dmu_tx_pool(tx);
 163         nvpair_t *pair;
 164         uint64_t now = gethrestime_sec();
 165 
 166         for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
 167             pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
 168                 dsl_dataset_t *ds;

 169                 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 170                 dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
 171                     dduha->dduha_minor, now, tx);
 172                 dsl_dataset_rele(ds, FTAG);
 173         }
 174 }
 175 
 176 /*
 177  * holds is nvl of snapname -> holdname
 178  * errlist will be filled in with snapname -> error
 179  * if cleanup_minor is not 0, the holds will be temporary, cleaned up
 180  * when the process exits.
 181  *
 182  * if any fails, all will fail.
 183  */
 184 int
 185 dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
 186 {
 187         dsl_dataset_user_hold_arg_t dduha;
 188         nvpair_t *pair;

 189 
 190         pair = nvlist_next_nvpair(holds, NULL);
 191         if (pair == NULL)
 192                 return (0);
 193 
 194         dduha.dduha_holds = holds;
 195         dduha.dduha_errlist = errlist;
 196         dduha.dduha_minor = cleanup_minor;
 197 
 198         return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
 199             dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));




 200 }
 201 
 202 typedef struct dsl_dataset_user_release_arg {
 203         nvlist_t *ddura_holds;
 204         nvlist_t *ddura_todelete;
 205         nvlist_t *ddura_errlist;
 206 } dsl_dataset_user_release_arg_t;
 207 
 208 static int
 209 dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
 210     nvlist_t *holds, boolean_t *todelete)
 211 {
 212         uint64_t zapobj;
 213         nvpair_t *pair;
 214         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 215         int error;
 216         int numholds = 0;
 217 
 218         *todelete = B_FALSE;
 219 


 334                     nvpair_name(pair))) {
 335                         ASSERT(ds->ds_userrefs == 0 &&
 336                             ds->ds_phys->ds_num_children == 1 &&
 337                             DS_IS_DEFER_DESTROY(ds));
 338                         dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 339                 }
 340                 dsl_dataset_rele(ds, FTAG);
 341         }
 342 }
 343 
 344 /*
 345  * holds is nvl of snapname -> { holdname, ... }
 346  * errlist will be filled in with snapname -> error
 347  *
 348  * if any fails, all will fail.
 349  */
 350 int
 351 dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
 352 {
 353         dsl_dataset_user_release_arg_t ddura;
 354         nvpair_t *pair;
 355         int error;
 356 
 357         pair = nvlist_next_nvpair(holds, NULL);
 358         if (pair == NULL)
 359                 return (0);
 360 











 361         ddura.ddura_holds = holds;
 362         ddura.ddura_errlist = errlist;
 363         ddura.ddura_todelete = fnvlist_alloc();
 364 
 365         error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
 366             dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
 367         fnvlist_free(ddura.ddura_todelete);
 368         return (error);
 369 }
 370 
 371 typedef struct dsl_dataset_user_release_tmp_arg {
 372         uint64_t ddurta_dsobj;
 373         nvlist_t *ddurta_holds;
 374         boolean_t ddurta_deleteme;
 375 } dsl_dataset_user_release_tmp_arg_t;
 376 
 377 static int
 378 dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
 379 {
 380         dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 381         dsl_pool_t *dp = dmu_tx_pool(tx);
 382         dsl_dataset_t *ds;
 383         int error;
 384 
 385         if (!dmu_tx_is_syncing(tx))
 386                 return (0);
 387 
 388         error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
 389         if (error)
 390                 return (error);
 391 
 392         error = dsl_dataset_user_release_check_one(ds,
 393             ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
 394         dsl_dataset_rele(ds, FTAG);
 395         return (error);
 396 }
 397 
 398 static void
 399 dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
 400 {
 401         dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 402         dsl_pool_t *dp = dmu_tx_pool(tx);
 403         dsl_dataset_t *ds;
 404 
 405         VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
 406         dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
 407         if (ddurta->ddurta_deleteme) {
 408                 ASSERT(ds->ds_userrefs == 0 &&
 409                     ds->ds_phys->ds_num_children == 1 &&
 410                     DS_IS_DEFER_DESTROY(ds));
 411                 dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 412         }
 413         dsl_dataset_rele(ds, FTAG);
 414 }
 415 
 416 /*
 417  * Called at spa_load time to release a stale temporary user hold.
 418  * Also called by the onexit code.
 419  */
 420 void
 421 dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
 422 {
 423         dsl_dataset_user_release_tmp_arg_t ddurta;
 424         dsl_dataset_t *ds;
 425         int error;
 426 
 427 #ifdef _KERNEL
 428         /* Make sure it is not mounted. */
 429         dsl_pool_config_enter(dp, FTAG);
 430         error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
 431         if (error == 0) {
 432                 char name[MAXNAMELEN];
 433                 dsl_dataset_name(ds, name);
 434                 dsl_dataset_rele(ds, FTAG);
 435                 dsl_pool_config_exit(dp, FTAG);
 436                 zfs_unmount_snap(name);
 437         } else {
 438                 dsl_pool_config_exit(dp, FTAG);
 439         }
 440 #endif
 441 
 442         ddurta.ddurta_dsobj = dsobj;
 443         ddurta.ddurta_holds = fnvlist_alloc();
 444         fnvlist_add_boolean(ddurta.ddurta_holds, htag);
 445 
 446         (void) dsl_sync_task(spa_name(dp->dp_spa),
 447             dsl_dataset_user_release_tmp_check,
 448             dsl_dataset_user_release_tmp_sync, &ddurta, 1);
 449         fnvlist_free(ddurta.ddurta_holds);
 450 }
 451 
 452 typedef struct zfs_hold_cleanup_arg {
 453         char zhca_spaname[MAXNAMELEN];
 454         uint64_t zhca_spa_load_guid;
 455         uint64_t zhca_dsobj;
 456         char zhca_htag[MAXNAMELEN];
 457 } zfs_hold_cleanup_arg_t;
 458 
 459 static void
 460 dsl_dataset_user_release_onexit(void *arg)
 461 {
 462         zfs_hold_cleanup_arg_t *ca = arg;
 463         spa_t *spa;
 464         int error;
 465 
 466         error = spa_open(ca->zhca_spaname, &spa, FTAG);
 467         if (error != 0) {
 468                 zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 469                     "because pool is no longer loaded",
 470                     ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 471                 return;
 472         }
 473         if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
 474                 zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 475                     "because pool is no longer loaded (guid doesn't match)",
 476                     ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 477                 spa_close(spa, FTAG);
 478                 return;
 479         }
 480 
 481         dsl_dataset_user_release_tmp(spa_get_dsl(spa),
 482             ca->zhca_dsobj, ca->zhca_htag);
 483         kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
 484         spa_close(spa, FTAG);
 485 }
 486 
 487 void
 488 dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
 489     minor_t minor)
 490 {
 491         zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
 492         spa_t *spa = dsl_dataset_get_spa(ds);
 493         (void) strlcpy(ca->zhca_spaname, spa_name(spa),
 494             sizeof (ca->zhca_spaname));
 495         ca->zhca_spa_load_guid = spa_load_guid(spa);
 496         ca->zhca_dsobj = ds->ds_object;
 497         (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
 498         VERIFY0(zfs_onexit_add_cb(minor,
 499             dsl_dataset_user_release_onexit, ca, NULL));
 500 }
 501 
 502 int
 503 dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
 504 {
 505         dsl_pool_t *dp;
 506         dsl_dataset_t *ds;
 507         int err;
 508 
 509         err = dsl_pool_hold(dsname, FTAG, &dp);
 510         if (err != 0)
 511                 return (err);
 512         err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
 513         if (err != 0) {
 514                 dsl_pool_rele(dp, FTAG);
 515                 return (err);
 516         }
 517 


 130         mutex_enter(&ds->ds_lock);
 131         if (ds->ds_phys->ds_userrefs_obj == 0) {
 132                 /*
 133                  * This is the first user hold for this dataset.  Create
 134                  * the userrefs zap object.
 135                  */
 136                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
 137                 zapobj = ds->ds_phys->ds_userrefs_obj =
 138                     zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
 139         } else {
 140                 zapobj = ds->ds_phys->ds_userrefs_obj;
 141         }
 142         ds->ds_userrefs++;
 143         mutex_exit(&ds->ds_lock);
 144 
 145         VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 146 
 147         if (minor != 0) {
 148                 VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
 149                     htag, now, tx));

 150         }
 151 
 152         spa_history_log_internal_ds(ds, "hold", tx,
 153             "tag=%s temp=%d refs=%llu",
 154             htag, minor != 0, ds->ds_userrefs);
 155 }
 156 
 157 static void
 158 dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
 159 {
 160         dsl_dataset_user_hold_arg_t *dduha = arg;
 161         dsl_pool_t *dp = dmu_tx_pool(tx);
 162         nvpair_t *pair;
 163         uint64_t now = gethrestime_sec();
 164 
 165         for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
 166             pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
 167                 dsl_dataset_t *ds;
 168 
 169                 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 170                 dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
 171                     dduha->dduha_minor, now, tx);
 172                 dsl_dataset_rele(ds, FTAG);
 173         }
 174 }
 175 
 176 /*
 177  * holds is nvl of snapname -> holdname
 178  * errlist will be filled in with snapname -> error
 179  * if cleanup_minor is not 0, the holds will be temporary, cleaned up
 180  * when the process exits.
 181  *
 182  * if any fails, all will fail.
 183  */
 184 int
 185 dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
 186 {
 187         dsl_dataset_user_hold_arg_t dduha;
 188         nvpair_t *pair;
 189         int ret;
 190 
 191         pair = nvlist_next_nvpair(holds, NULL);
 192         if (pair == NULL)
 193                 return (0);
 194 
 195         dduha.dduha_holds = holds;
 196         dduha.dduha_errlist = errlist;
 197         dduha.dduha_minor = cleanup_minor;
 198 
 199         ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
 200             dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds));
 201         if (ret == 0)
 202                 dsl_register_onexit_hold_cleanup(holds, cleanup_minor);
 203 
 204         return (ret);
 205 }
 206 
 207 typedef struct dsl_dataset_user_release_arg {
 208         nvlist_t *ddura_holds;
 209         nvlist_t *ddura_todelete;
 210         nvlist_t *ddura_errlist;
 211 } dsl_dataset_user_release_arg_t;
 212 
 213 static int
 214 dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
 215     nvlist_t *holds, boolean_t *todelete)
 216 {
 217         uint64_t zapobj;
 218         nvpair_t *pair;
 219         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 220         int error;
 221         int numholds = 0;
 222 
 223         *todelete = B_FALSE;
 224 


 339                     nvpair_name(pair))) {
 340                         ASSERT(ds->ds_userrefs == 0 &&
 341                             ds->ds_phys->ds_num_children == 1 &&
 342                             DS_IS_DEFER_DESTROY(ds));
 343                         dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 344                 }
 345                 dsl_dataset_rele(ds, FTAG);
 346         }
 347 }
 348 
 349 /*
 350  * holds is nvl of snapname -> { holdname, ... }
 351  * errlist will be filled in with snapname -> error
 352  *
 353  * if any fails, all will fail.
 354  */
 355 int
 356 dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
 357 {
 358         dsl_dataset_user_release_arg_t ddura;
 359         nvpair_t *pair, *pair2;
 360         int error;
 361 
 362         pair = nvlist_next_nvpair(holds, NULL);
 363         if (pair == NULL)
 364                 return (0);
 365 
 366 #ifdef _KERNEL
 367         /*
 368          * The release may cause the snapshot to be destroyed; make sure it
 369          * is not mounted.
 370          */
 371         for (pair2 = pair; pair2 != NULL;
 372             pair2 = nvlist_next_nvpair(holds, pair2)) {
 373                 zfs_unmount_snap(nvpair_name(pair2));
 374         }
 375 #endif
 376 
 377         ddura.ddura_holds = holds;
 378         ddura.ddura_errlist = errlist;
 379         ddura.ddura_todelete = fnvlist_alloc();
 380 
 381         error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
 382             dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
 383         fnvlist_free(ddura.ddura_todelete);
 384         return (error);
 385 }
 386 



























 387 static void
 388 dsl_dataset_user_release_onexit(void *arg)
 389 {
 390         nvlist_t *holds = arg;


 391 
 392         (void) dsl_dataset_user_release(holds, NULL);
 393         fnvlist_free(holds);







 394 }
 395 




 396 void
 397 dsl_register_onexit_hold_cleanup(nvlist_t *holds, minor_t minor)







































 398 {
 399         nvlist_t *ca;
 400         nvpair_t *pair;
 401         char *htag;
 402 
 403         ca = fnvlist_alloc();
 404         /*
 405          * Convert from hold format: nvl of snapname -> holdname
 406          * to release format: nvl of snapname -> { holdname, ... }
 407          */
 408         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 409             pair = nvlist_next_nvpair(holds, pair)) {
 410                 if (nvpair_value_string(pair, &htag) == 0) {
 411                         nvlist_t *tags;











 412 
 413                         tags = fnvlist_alloc();
 414                         fnvlist_add_boolean(tags, htag);
 415                         fnvlist_add_nvlist(ca, nvpair_name(pair), tags);
 416                         fnvlist_free(tags);
 417                 }
 418         }





 419         VERIFY0(zfs_onexit_add_cb(minor,
 420             dsl_dataset_user_release_onexit, ca, NULL));
 421 }
 422 
 423 int
 424 dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
 425 {
 426         dsl_pool_t *dp;
 427         dsl_dataset_t *ds;
 428         int err;
 429 
 430         err = dsl_pool_hold(dsname, FTAG, &dp);
 431         if (err != 0)
 432                 return (err);
 433         err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
 434         if (err != 0) {
 435                 dsl_pool_rele(dp, FTAG);
 436                 return (err);
 437         }
 438