Print this page
Optimize creation and removal of temporary "user holds" placed on
snapshots by a zfs send, by ensuring all the required holds and
releases are done in a single dsl_sync_task.
Creation now collates the required holds during a dry run and
then uses a single lzc_hold call via zfs_hold_apply instead of
processing each snapshot in turn.
Defered (on exit) cleanup by the kernel is also now done in
dsl_sync_task by reusing dsl_dataset_user_release.
On a test with 11 volumes in a tree each with 8 snapshots on a
single HDD zpool this reduces the time required to perform a full
send from 20 seconds to under 0.8 seconds.
For reference eliminating the hold entirely reduces this 0.15
seconds.
While I'm here:-
* Remove some unused structures
* Fix nvlist_t leak in zfs_release_one

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/dsl_userhold.c
          +++ new/usr/src/uts/common/fs/zfs/dsl_userhold.c
↓ open down ↓ 139 lines elided ↑ open up ↑
 140  140                  zapobj = ds->ds_phys->ds_userrefs_obj;
 141  141          }
 142  142          ds->ds_userrefs++;
 143  143          mutex_exit(&ds->ds_lock);
 144  144  
 145  145          VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 146  146  
 147  147          if (minor != 0) {
 148  148                  VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
 149  149                      htag, now, tx));
 150      -                dsl_register_onexit_hold_cleanup(ds, htag, minor);
 151  150          }
 152  151  
 153  152          spa_history_log_internal_ds(ds, "hold", tx,
 154  153              "tag=%s temp=%d refs=%llu",
 155  154              htag, minor != 0, ds->ds_userrefs);
 156  155  }
 157  156  
 158  157  static void
 159  158  dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
 160  159  {
 161  160          dsl_dataset_user_hold_arg_t *dduha = arg;
 162  161          dsl_pool_t *dp = dmu_tx_pool(tx);
 163  162          nvpair_t *pair;
 164  163          uint64_t now = gethrestime_sec();
 165  164  
 166  165          for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
 167  166              pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
 168  167                  dsl_dataset_t *ds;
      168 +
 169  169                  VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 170  170                  dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
 171  171                      dduha->dduha_minor, now, tx);
 172  172                  dsl_dataset_rele(ds, FTAG);
 173  173          }
 174  174  }
 175  175  
 176  176  /*
 177  177   * holds is nvl of snapname -> holdname
 178  178   * errlist will be filled in with snapname -> error
 179  179   * if cleanup_minor is not 0, the holds will be temporary, cleaned up
 180  180   * when the process exits.
 181  181   *
 182  182   * if any fails, all will fail.
 183  183   */
 184  184  int
 185  185  dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
 186  186  {
 187  187          dsl_dataset_user_hold_arg_t dduha;
 188  188          nvpair_t *pair;
      189 +        int ret;
 189  190  
 190  191          pair = nvlist_next_nvpair(holds, NULL);
 191  192          if (pair == NULL)
 192  193                  return (0);
 193  194  
 194  195          dduha.dduha_holds = holds;
 195  196          dduha.dduha_errlist = errlist;
 196  197          dduha.dduha_minor = cleanup_minor;
 197  198  
 198      -        return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
 199      -            dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));
      199 +        ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
      200 +            dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds));
      201 +        if (ret == 0)
      202 +                dsl_register_onexit_hold_cleanup(holds, cleanup_minor);
      203 +
      204 +        return (ret);
 200  205  }
 201  206  
 202  207  typedef struct dsl_dataset_user_release_arg {
 203  208          nvlist_t *ddura_holds;
 204  209          nvlist_t *ddura_todelete;
 205  210          nvlist_t *ddura_errlist;
 206  211  } dsl_dataset_user_release_arg_t;
 207  212  
 208  213  static int
 209  214  dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
↓ open down ↓ 134 lines elided ↑ open up ↑
 344  349  /*
 345  350   * holds is nvl of snapname -> { holdname, ... }
 346  351   * errlist will be filled in with snapname -> error
 347  352   *
 348  353   * if any fails, all will fail.
 349  354   */
 350  355  int
 351  356  dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
 352  357  {
 353  358          dsl_dataset_user_release_arg_t ddura;
 354      -        nvpair_t *pair;
      359 +        nvpair_t *pair, *pair2;
 355  360          int error;
 356  361  
 357  362          pair = nvlist_next_nvpair(holds, NULL);
 358  363          if (pair == NULL)
 359  364                  return (0);
 360  365  
      366 +#ifdef _KERNEL
      367 +        /*
      368 +         * The release may cause the snapshot to be destroyed; make sure it
      369 +         * is not mounted.
      370 +         */
      371 +        for (pair2 = pair; pair2 != NULL;
      372 +            pair2 = nvlist_next_nvpair(holds, pair2)) {
      373 +                zfs_unmount_snap(nvpair_name(pair2));
      374 +        }
      375 +#endif
      376 +
 361  377          ddura.ddura_holds = holds;
 362  378          ddura.ddura_errlist = errlist;
 363  379          ddura.ddura_todelete = fnvlist_alloc();
 364  380  
 365  381          error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
 366  382              dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
 367  383          fnvlist_free(ddura.ddura_todelete);
 368  384          return (error);
 369  385  }
 370  386  
 371      -typedef struct dsl_dataset_user_release_tmp_arg {
 372      -        uint64_t ddurta_dsobj;
 373      -        nvlist_t *ddurta_holds;
 374      -        boolean_t ddurta_deleteme;
 375      -} dsl_dataset_user_release_tmp_arg_t;
 376      -
 377      -static int
 378      -dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
 379      -{
 380      -        dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 381      -        dsl_pool_t *dp = dmu_tx_pool(tx);
 382      -        dsl_dataset_t *ds;
 383      -        int error;
 384      -
 385      -        if (!dmu_tx_is_syncing(tx))
 386      -                return (0);
 387      -
 388      -        error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
 389      -        if (error)
 390      -                return (error);
 391      -
 392      -        error = dsl_dataset_user_release_check_one(ds,
 393      -            ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
 394      -        dsl_dataset_rele(ds, FTAG);
 395      -        return (error);
 396      -}
 397      -
 398  387  static void
 399      -dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
      388 +dsl_dataset_user_release_onexit(void *arg)
 400  389  {
 401      -        dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 402      -        dsl_pool_t *dp = dmu_tx_pool(tx);
 403      -        dsl_dataset_t *ds;
      390 +        nvlist_t *holds = arg;
 404  391  
 405      -        VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
 406      -        dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
 407      -        if (ddurta->ddurta_deleteme) {
 408      -                ASSERT(ds->ds_userrefs == 0 &&
 409      -                    ds->ds_phys->ds_num_children == 1 &&
 410      -                    DS_IS_DEFER_DESTROY(ds));
 411      -                dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 412      -        }
 413      -        dsl_dataset_rele(ds, FTAG);
      392 +        (void) dsl_dataset_user_release(holds, NULL);
      393 +        fnvlist_free(holds);
 414  394  }
 415  395  
 416      -/*
 417      - * Called at spa_load time to release a stale temporary user hold.
 418      - * Also called by the onexit code.
 419      - */
 420  396  void
 421      -dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
      397 +dsl_register_onexit_hold_cleanup(nvlist_t *holds, minor_t minor)
 422  398  {
 423      -        dsl_dataset_user_release_tmp_arg_t ddurta;
 424      -        dsl_dataset_t *ds;
 425      -        int error;
 426      -
 427      -#ifdef _KERNEL
 428      -        /* Make sure it is not mounted. */
 429      -        dsl_pool_config_enter(dp, FTAG);
 430      -        error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
 431      -        if (error == 0) {
 432      -                char name[MAXNAMELEN];
 433      -                dsl_dataset_name(ds, name);
 434      -                dsl_dataset_rele(ds, FTAG);
 435      -                dsl_pool_config_exit(dp, FTAG);
 436      -                zfs_unmount_snap(name);
 437      -        } else {
 438      -                dsl_pool_config_exit(dp, FTAG);
 439      -        }
 440      -#endif
 441      -
 442      -        ddurta.ddurta_dsobj = dsobj;
 443      -        ddurta.ddurta_holds = fnvlist_alloc();
 444      -        fnvlist_add_boolean(ddurta.ddurta_holds, htag);
 445      -
 446      -        (void) dsl_sync_task(spa_name(dp->dp_spa),
 447      -            dsl_dataset_user_release_tmp_check,
 448      -            dsl_dataset_user_release_tmp_sync, &ddurta, 1);
 449      -        fnvlist_free(ddurta.ddurta_holds);
 450      -}
 451      -
 452      -typedef struct zfs_hold_cleanup_arg {
 453      -        char zhca_spaname[MAXNAMELEN];
 454      -        uint64_t zhca_spa_load_guid;
 455      -        uint64_t zhca_dsobj;
 456      -        char zhca_htag[MAXNAMELEN];
 457      -} zfs_hold_cleanup_arg_t;
 458      -
 459      -static void
 460      -dsl_dataset_user_release_onexit(void *arg)
 461      -{
 462      -        zfs_hold_cleanup_arg_t *ca = arg;
 463      -        spa_t *spa;
 464      -        int error;
      399 +        nvlist_t *ca;
      400 +        nvpair_t *pair;
      401 +        char *htag;
 465  402  
 466      -        error = spa_open(ca->zhca_spaname, &spa, FTAG);
 467      -        if (error != 0) {
 468      -                zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 469      -                    "because pool is no longer loaded",
 470      -                    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 471      -                return;
 472      -        }
 473      -        if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
 474      -                zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 475      -                    "because pool is no longer loaded (guid doesn't match)",
 476      -                    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 477      -                spa_close(spa, FTAG);
 478      -                return;
 479      -        }
 480      -
 481      -        dsl_dataset_user_release_tmp(spa_get_dsl(spa),
 482      -            ca->zhca_dsobj, ca->zhca_htag);
 483      -        kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
 484      -        spa_close(spa, FTAG);
 485      -}
      403 +        ca = fnvlist_alloc();
      404 +        /*
      405 +         * Convert from hold format: nvl of snapname -> holdname
      406 +         * to release format: nvl of snapname -> { holdname, ... }
      407 +         */
      408 +        for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
      409 +            pair = nvlist_next_nvpair(holds, pair)) {
      410 +                if (nvpair_value_string(pair, &htag) == 0) {
      411 +                        nvlist_t *tags;
 486  412  
 487      -void
 488      -dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
 489      -    minor_t minor)
 490      -{
 491      -        zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
 492      -        spa_t *spa = dsl_dataset_get_spa(ds);
 493      -        (void) strlcpy(ca->zhca_spaname, spa_name(spa),
 494      -            sizeof (ca->zhca_spaname));
 495      -        ca->zhca_spa_load_guid = spa_load_guid(spa);
 496      -        ca->zhca_dsobj = ds->ds_object;
 497      -        (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
      413 +                        tags = fnvlist_alloc();
      414 +                        fnvlist_add_boolean(tags, htag);
      415 +                        fnvlist_add_nvlist(ca, nvpair_name(pair), tags);
      416 +                        fnvlist_free(tags);
      417 +                }
      418 +        }
 498  419          VERIFY0(zfs_onexit_add_cb(minor,
 499  420              dsl_dataset_user_release_onexit, ca, NULL));
 500  421  }
 501  422  
 502  423  int
 503  424  dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
 504  425  {
 505  426          dsl_pool_t *dp;
 506  427          dsl_dataset_t *ds;
 507  428          int err;
↓ open down ↓ 29 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX