Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>


  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013 by Delphix. All rights reserved.
  24  */
  25 
  26 #include <sys/zfs_context.h>
  27 #include <sys/dsl_userhold.h>
  28 #include <sys/dsl_dataset.h>
  29 #include <sys/dsl_destroy.h>
  30 #include <sys/dsl_synctask.h>
  31 #include <sys/dmu_tx.h>
  32 #include <sys/zfs_onexit.h>
  33 #include <sys/dsl_pool.h>
  34 #include <sys/dsl_dir.h>
  35 #include <sys/zfs_ioctl.h>
  36 #include <sys/zap.h>
  37 
  38 typedef struct dsl_dataset_user_hold_arg {

  39         nvlist_t *dduha_holds;

  40         nvlist_t *dduha_errlist;
  41         minor_t dduha_minor;

  42 } dsl_dataset_user_hold_arg_t;
  43 
  44 /*
  45  * If you add new checks here, you may need to add additional checks to the
  46  * "temporary" case in snapshot_check() in dmu_objset.c.
  47  */
  48 int
  49 dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
  50     boolean_t temphold, dmu_tx_t *tx)
  51 {
  52         dsl_pool_t *dp = dmu_tx_pool(tx);
  53         objset_t *mos = dp->dp_meta_objset;
  54         int error = 0;
  55 
  56         if (strlen(htag) > MAXNAMELEN)
  57                 return (E2BIG);
  58         /* Tempholds have a more restricted length */
  59         if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
  60                 return (E2BIG);
  61 


  67                         error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
  68                             htag, 8, 1, &value);
  69                         if (error == 0)
  70                                 error = SET_ERROR(EEXIST);
  71                         else if (error == ENOENT)
  72                                 error = 0;
  73                 }
  74                 mutex_exit(&ds->ds_lock);
  75         }
  76 
  77         return (error);
  78 }
  79 
  80 static int
  81 dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
  82 {
  83         dsl_dataset_user_hold_arg_t *dduha = arg;
  84         dsl_pool_t *dp = dmu_tx_pool(tx);
  85         nvpair_t *pair;
  86         int rv = 0;

  87 
  88         if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
  89                 return (SET_ERROR(ENOTSUP));
  90 


  91         for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
  92             pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
  93                 int error = 0;
  94                 dsl_dataset_t *ds;
  95                 char *htag;
  96 
  97                 /* must be a snapshot */
  98                 if (strchr(nvpair_name(pair), '@') == NULL)
  99                         error = SET_ERROR(EINVAL);
 100 
 101                 if (error == 0)
 102                         error = nvpair_value_string(pair, &htag);
 103                 if (error == 0) {
 104                         error = dsl_dataset_hold(dp,
 105                             nvpair_name(pair), FTAG, &ds);













 106                 }
 107                 if (error == 0) {
 108                         error = dsl_dataset_user_hold_check_one(ds, htag,
 109                             dduha->dduha_minor != 0, tx);
 110                         dsl_dataset_rele(ds, FTAG);
 111                 }
 112 
 113                 if (error != 0) {
 114                         rv = error;
 115                         fnvlist_add_int32(dduha->dduha_errlist,
 116                             nvpair_name(pair), error);
 117                 }




 118         }








 119         return (rv);
 120 }
 121 
 122 void
 123 dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
 124     minor_t minor, uint64_t now, dmu_tx_t *tx)


 125 {
 126         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 127         objset_t *mos = dp->dp_meta_objset;
 128         uint64_t zapobj;
 129 
 130         mutex_enter(&ds->ds_lock);
 131         if (ds->ds_phys->ds_userrefs_obj == 0) {
 132                 /*
 133                  * This is the first user hold for this dataset.  Create
 134                  * the userrefs zap object.
 135                  */
 136                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
 137                 zapobj = ds->ds_phys->ds_userrefs_obj =
 138                     zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
 139         } else {
 140                 zapobj = ds->ds_phys->ds_userrefs_obj;
 141         }
 142         ds->ds_userrefs++;
 143         mutex_exit(&ds->ds_lock);
 144 
 145         VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 146 
 147         if (minor != 0) {



 148                 VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
 149                     htag, now, tx));
 150                 dsl_register_onexit_hold_cleanup(ds, htag, minor);










 151         }
 152 
 153         spa_history_log_internal_ds(ds, "hold", tx,
 154             "tag=%s temp=%d refs=%llu",
 155             htag, minor != 0, ds->ds_userrefs);
 156 }
 157 









































































 158 static void
 159 dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
 160 {
 161         dsl_dataset_user_hold_arg_t *dduha = arg;
 162         dsl_pool_t *dp = dmu_tx_pool(tx);
 163         nvpair_t *pair;
 164         uint64_t now = gethrestime_sec();
 165 
 166         for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
 167             pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
 168                 dsl_dataset_t *ds;
 169                 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 170                 dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
 171                     dduha->dduha_minor, now, tx);






 172                 dsl_dataset_rele(ds, FTAG);







 173         }


 174 }
 175 
 176 /*




 177  * holds is nvl of snapname -> holdname
 178  * errlist will be filled in with snapname -> error
 179  * if cleanup_minor is not 0, the holds will be temporary, cleaned up
 180  * when the process exits.
 181  *
 182  * if any fails, all will fail.

















 183  */
 184 int
 185 dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
 186 {
 187         dsl_dataset_user_hold_arg_t dduha;
 188         nvpair_t *pair;

 189 
 190         pair = nvlist_next_nvpair(holds, NULL);
 191         if (pair == NULL)
 192                 return (0);
 193 

 194         dduha.dduha_holds = holds;

 195         dduha.dduha_errlist = errlist;
 196         dduha.dduha_minor = cleanup_minor;

 197 
 198         return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
 199             dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));














 200 }
 201 



 202 typedef struct dsl_dataset_user_release_arg {

 203         nvlist_t *ddura_holds;
 204         nvlist_t *ddura_todelete;
 205         nvlist_t *ddura_errlist;

 206 } dsl_dataset_user_release_arg_t;
 207 








 208 static int
 209 dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
 210     nvlist_t *holds, boolean_t *todelete)
 211 {
 212         uint64_t zapobj;
 213         nvpair_t *pair;
 214         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 215         int error;
 216         int numholds = 0;

 217 
 218         *todelete = B_FALSE;

 219 
 220         if (!dsl_dataset_is_snapshot(ds))
 221                 return (SET_ERROR(EINVAL));
 222 
 223         zapobj = ds->ds_phys->ds_userrefs_obj;
 224         if (zapobj == 0)
 225                 return (SET_ERROR(ESRCH));
 226 
 227         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 228             pair = nvlist_next_nvpair(holds, pair)) {
 229                 /* Make sure the hold exists */
 230                 uint64_t tmp;

 231                 error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
 232                 if (error == ENOENT)
 233                         error = SET_ERROR(ESRCH);



 234                 if (error != 0)
 235                         return (error);
 236                 numholds++;
 237         }
 238 
 239         if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
 240             ds->ds_userrefs == numholds) {
 241                 /* we need to destroy the snapshot as well */
 242 
 243                 if (dsl_dataset_long_held(ds))
 244                         return (SET_ERROR(EBUSY));
 245                 *todelete = B_TRUE;
 246         }
 247         return (0);




 248 }
 249 
 250 static int
 251 dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
 252 {
 253         dsl_dataset_user_release_arg_t *ddura = arg;

 254         dsl_pool_t *dp = dmu_tx_pool(tx);
 255         nvpair_t *pair;
 256         int rv = 0;
 257 
 258         if (!dmu_tx_is_syncing(tx))
 259                 return (0);
 260 
 261         for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 262             pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
 263                 const char *name = nvpair_name(pair);
 264                 int error;
 265                 dsl_dataset_t *ds;
 266                 nvlist_t *holds;
 267 
 268                 error = nvpair_value_nvlist(pair, &holds);
 269                 if (error != 0)
 270                         return (SET_ERROR(EINVAL));
 271 
 272                 error = dsl_dataset_hold(dp, name, FTAG, &ds);
 273                 if (error == 0) {
 274                         boolean_t deleteme;
 275                         error = dsl_dataset_user_release_check_one(ds,
 276                             holds, &deleteme);
 277                         if (error == 0 && deleteme) {





 278                                 fnvlist_add_boolean(ddura->ddura_todelete,
 279                                     name);
 280                         }
 281                         dsl_dataset_rele(ds, FTAG);
 282                 }
 283                 if (error != 0) {
 284                         if (ddura->ddura_errlist != NULL) {
 285                                 fnvlist_add_int32(ddura->ddura_errlist,
 286                                     name, error);
 287                         }


 288                         rv = error;
 289                 }
 290         }








 291         return (rv);
 292 }
 293 
 294 static void
 295 dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
 296     dmu_tx_t *tx)
 297 {
 298         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 299         objset_t *mos = dp->dp_meta_objset;
 300         uint64_t zapobj;
 301         int error;
 302         nvpair_t *pair;
 303 
 304         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 305             pair = nvlist_next_nvpair(holds, pair)) {
 306                 ds->ds_userrefs--;
 307                 error = dsl_pool_user_release(dp, ds->ds_object,
 308                     nvpair_name(pair), tx);





 309                 VERIFY(error == 0 || error == ENOENT);


 310                 zapobj = ds->ds_phys->ds_userrefs_obj;
 311                 VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));










 312 
 313                 spa_history_log_internal_ds(ds, "release", tx,
 314                     "tag=%s refs=%lld", nvpair_name(pair),
 315                     (longlong_t)ds->ds_userrefs);
 316         }
 317 }
 318 
 319 static void
 320 dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
 321 {
 322         dsl_dataset_user_release_arg_t *ddura = arg;

 323         dsl_pool_t *dp = dmu_tx_pool(tx);
 324         nvpair_t *pair;
 325 








 326         for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 327             pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
 328                 dsl_dataset_t *ds;

 329 
 330                 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 331                 dsl_dataset_user_release_sync_one(ds,




 332                     fnvpair_value_nvlist(pair), tx);
 333                 if (nvlist_exists(ddura->ddura_todelete,
 334                     nvpair_name(pair))) {
 335                         ASSERT(ds->ds_userrefs == 0 &&
 336                             ds->ds_phys->ds_num_children == 1 &&
 337                             DS_IS_DEFER_DESTROY(ds));
 338                         dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 339                 }
 340                 dsl_dataset_rele(ds, FTAG);
 341         }
 342 }
 343 
 344 /*






 345  * holds is nvl of snapname -> { holdname, ... }
 346  * errlist will be filled in with snapname -> error
 347  *
 348  * if any fails, all will fail.









 349  */
 350 int
 351 dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)

 352 {
 353         dsl_dataset_user_release_arg_t ddura;
 354         nvpair_t *pair;

 355         int error;
 356 
 357         pair = nvlist_next_nvpair(holds, NULL);
 358         if (pair == NULL)
 359                 return (0);
 360 
 361         ddura.ddura_holds = holds;
 362         ddura.ddura_errlist = errlist;
 363         ddura.ddura_todelete = fnvlist_alloc();
 364 
 365         error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
 366             dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
 367         fnvlist_free(ddura.ddura_todelete);
 368         return (error);
 369 }
 370 
 371 typedef struct dsl_dataset_user_release_tmp_arg {
 372         uint64_t ddurta_dsobj;
 373         nvlist_t *ddurta_holds;
 374         boolean_t ddurta_deleteme;
 375 } dsl_dataset_user_release_tmp_arg_t;
 376 
 377 static int
 378 dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
 379 {
 380         dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 381         dsl_pool_t *dp = dmu_tx_pool(tx);
 382         dsl_dataset_t *ds;
 383         int error;
 384 
 385         if (!dmu_tx_is_syncing(tx))
 386                 return (0);
 387 
 388         error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
 389         if (error)
 390                 return (error);
 391 
 392         error = dsl_dataset_user_release_check_one(ds,
 393             ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
 394         dsl_dataset_rele(ds, FTAG);
 395         return (error);
 396 }
 397 
 398 static void
 399 dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
 400 {
 401         dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 402         dsl_pool_t *dp = dmu_tx_pool(tx);
 403         dsl_dataset_t *ds;
 404 
 405         VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
 406         dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
 407         if (ddurta->ddurta_deleteme) {
 408                 ASSERT(ds->ds_userrefs == 0 &&
 409                     ds->ds_phys->ds_num_children == 1 &&
 410                     DS_IS_DEFER_DESTROY(ds));
 411                 dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 412         }
 413         dsl_dataset_rele(ds, FTAG);
 414 }
 415 
 416 /*
 417  * Called at spa_load time to release a stale temporary user hold.
 418  * Also called by the onexit code.
 419  */
 420 void
 421 dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
 422 {
 423         dsl_dataset_user_release_tmp_arg_t ddurta;




 424         dsl_dataset_t *ds;
 425         int error;
 426 
 427 #ifdef _KERNEL
 428         /* Make sure it is not mounted. */
 429         dsl_pool_config_enter(dp, FTAG);
 430         error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
 431         if (error == 0) {
 432                 char name[MAXNAMELEN];
 433                 dsl_dataset_name(ds, name);
 434                 dsl_dataset_rele(ds, FTAG);
 435                 dsl_pool_config_exit(dp, FTAG);
 436                 zfs_unmount_snap(name);



 437         } else {
 438                 dsl_pool_config_exit(dp, FTAG);






 439         }
 440 #endif
 441 
 442         ddurta.ddurta_dsobj = dsobj;
 443         ddurta.ddurta_holds = fnvlist_alloc();
 444         fnvlist_add_boolean(ddurta.ddurta_holds, htag);
 445 
 446         (void) dsl_sync_task(spa_name(dp->dp_spa),
 447             dsl_dataset_user_release_tmp_check,
 448             dsl_dataset_user_release_tmp_sync, &ddurta, 1);
 449         fnvlist_free(ddurta.ddurta_holds);
 450 }
 451 
 452 typedef struct zfs_hold_cleanup_arg {
 453         char zhca_spaname[MAXNAMELEN];
 454         uint64_t zhca_spa_load_guid;
 455         uint64_t zhca_dsobj;
 456         char zhca_htag[MAXNAMELEN];
 457 } zfs_hold_cleanup_arg_t;
 458 
 459 static void
 460 dsl_dataset_user_release_onexit(void *arg)
 461 {
 462         zfs_hold_cleanup_arg_t *ca = arg;
 463         spa_t *spa;
 464         int error;
 465 
 466         error = spa_open(ca->zhca_spaname, &spa, FTAG);
 467         if (error != 0) {
 468                 zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 469                     "because pool is no longer loaded",
 470                     ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 471                 return;
 472         }
 473         if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
 474                 zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 475                     "because pool is no longer loaded (guid doesn't match)",
 476                     ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 477                 spa_close(spa, FTAG);
 478                 return;
 479         }
 480 
 481         dsl_dataset_user_release_tmp(spa_get_dsl(spa),
 482             ca->zhca_dsobj, ca->zhca_htag);
 483         kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
 484         spa_close(spa, FTAG);






 485 }
 486 



 487 void
 488 dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
 489     minor_t minor)
 490 {
 491         zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
 492         spa_t *spa = dsl_dataset_get_spa(ds);
 493         (void) strlcpy(ca->zhca_spaname, spa_name(spa),
 494             sizeof (ca->zhca_spaname));
 495         ca->zhca_spa_load_guid = spa_load_guid(spa);
 496         ca->zhca_dsobj = ds->ds_object;
 497         (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
 498         VERIFY0(zfs_onexit_add_cb(minor,
 499             dsl_dataset_user_release_onexit, ca, NULL));
 500 }
 501 
 502 int
 503 dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
 504 {
 505         dsl_pool_t *dp;
 506         dsl_dataset_t *ds;
 507         int err;
 508 
 509         err = dsl_pool_hold(dsname, FTAG, &dp);
 510         if (err != 0)
 511                 return (err);
 512         err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
 513         if (err != 0) {
 514                 dsl_pool_rele(dp, FTAG);
 515                 return (err);
 516         }
 517 
 518         if (ds->ds_phys->ds_userrefs_obj != 0) {
 519                 zap_attribute_t *za;


  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013 by Delphix. All rights reserved.
  24  */
  25 
  26 #include <sys/zfs_context.h>
  27 #include <sys/dsl_userhold.h>
  28 #include <sys/dsl_dataset.h>
  29 #include <sys/dsl_destroy.h>
  30 #include <sys/dsl_synctask.h>
  31 #include <sys/dmu_tx.h>
  32 #include <sys/zfs_onexit.h>
  33 #include <sys/dsl_pool.h>
  34 #include <sys/dsl_dir.h>
  35 #include <sys/zfs_ioctl.h>
  36 #include <sys/zap.h>
  37 
  38 typedef struct dsl_dataset_user_hold_arg {
  39         spa_t *dduha_spa;
  40         nvlist_t *dduha_holds;
  41         nvlist_t *dduha_tmpholds;
  42         nvlist_t *dduha_errlist;
  43         minor_t dduha_minor;
  44         boolean_t dduha_holds_created;
  45 } dsl_dataset_user_hold_arg_t;
  46 
  47 /*
  48  * If you add new checks here, you may need to add additional checks to the
  49  * "temporary" case in snapshot_check() in dmu_objset.c.
  50  */
  51 int
  52 dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
  53     boolean_t temphold, dmu_tx_t *tx)
  54 {
  55         dsl_pool_t *dp = dmu_tx_pool(tx);
  56         objset_t *mos = dp->dp_meta_objset;
  57         int error = 0;
  58 
  59         if (strlen(htag) > MAXNAMELEN)
  60                 return (E2BIG);
  61         /* Tempholds have a more restricted length */
  62         if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
  63                 return (E2BIG);
  64 


  70                         error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
  71                             htag, 8, 1, &value);
  72                         if (error == 0)
  73                                 error = SET_ERROR(EEXIST);
  74                         else if (error == ENOENT)
  75                                 error = 0;
  76                 }
  77                 mutex_exit(&ds->ds_lock);
  78         }
  79 
  80         return (error);
  81 }
  82 
  83 static int
  84 dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
  85 {
  86         dsl_dataset_user_hold_arg_t *dduha = arg;
  87         dsl_pool_t *dp = dmu_tx_pool(tx);
  88         nvpair_t *pair;
  89         int rv = 0;
  90         boolean_t holds_possible;
  91 
  92         if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
  93                 return (SET_ERROR(ENOTSUP));
  94 
  95         holds_possible = B_FALSE;
  96 
  97         for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
  98             pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
  99                 int error = 0;
 100                 dsl_dataset_t *ds;
 101                 char *htag;
 102 
 103                 /* must be a snapshot */
 104                 if (strchr(nvpair_name(pair), '@') == NULL)
 105                         error = SET_ERROR(EINVAL);
 106 
 107                 if (error == 0)
 108                         error = nvpair_value_string(pair, &htag);
 109                 if (error == 0) {
 110                         error = dsl_dataset_hold(dp,
 111                             nvpair_name(pair), FTAG, &ds);
 112 
 113                         if (error == ENOENT) {
 114                                 /*
 115                                  * We register ENOENT errors so they can be
 116                                  * correctly reported if needed, such as when
 117                                  * all holds fail.
 118                                  */
 119                                 if (dduha->dduha_errlist != NULL) {
 120                                         fnvlist_add_int32(dduha->dduha_errlist,
 121                                             nvpair_name(pair), error);
 122                                 }
 123                                 continue;
 124                         }
 125                 }
 126                 if (error == 0) {
 127                         error = dsl_dataset_user_hold_check_one(ds, htag,
 128                             dduha->dduha_minor != 0, tx);
 129                         dsl_dataset_rele(ds, FTAG);
 130                 }
 131 
 132                 if (error != 0) {
 133                         if (dduha->dduha_errlist != NULL) {
 134                                 fnvlist_add_int32(dduha->dduha_errlist,
 135                                     nvpair_name(pair), error);
 136                         }
 137                         rv = error;
 138                 } else {
 139                         holds_possible = B_TRUE;
 140                 }
 141         }
 142 
 143         /*
 144          * Check that at least one hold will possibly be created,
 145          * otherwise fail.
 146          */
 147         if (rv == 0 && !holds_possible)
 148                 rv = ENOENT;
 149 
 150         return (rv);
 151 }
 152 
 153 
 154 static void
 155 dsl_dataset_user_hold_sync_one_impl(dsl_dataset_user_hold_arg_t *dduha,
 156     dsl_dataset_t *ds, const char *htag, minor_t minor, uint64_t now,
 157     dmu_tx_t *tx)
 158 {
 159         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 160         objset_t *mos = dp->dp_meta_objset;
 161         uint64_t zapobj;
 162 
 163         mutex_enter(&ds->ds_lock);
 164         if (ds->ds_phys->ds_userrefs_obj == 0) {
 165                 /*
 166                  * This is the first user hold for this dataset.  Create
 167                  * the userrefs zap object.
 168                  */
 169                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
 170                 zapobj = ds->ds_phys->ds_userrefs_obj =
 171                     zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
 172         } else {
 173                 zapobj = ds->ds_phys->ds_userrefs_obj;
 174         }
 175         ds->ds_userrefs++;
 176         mutex_exit(&ds->ds_lock);
 177 
 178         VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 179 
 180         if (minor != 0) {
 181                 char name[MAXNAMELEN];
 182                 nvlist_t *tags;
 183 
 184                 VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
 185                     htag, now, tx));
 186                 (void) snprintf(name, sizeof(name), "%llx",
 187                     (u_longlong_t)ds->ds_object);
 188 
 189                 if (nvlist_lookup_nvlist(dduha->dduha_tmpholds, name, &tags) != 0) {
 190                         tags = fnvlist_alloc();
 191                         fnvlist_add_boolean(tags, htag);
 192                         fnvlist_add_nvlist(dduha->dduha_tmpholds, name, tags);
 193                         fnvlist_free(tags);
 194                 } else {
 195                         fnvlist_add_boolean(tags, htag);
 196                 }
 197         }
 198 
 199         spa_history_log_internal_ds(ds, "hold", tx,
 200             "tag=%s temp=%d refs=%llu",
 201             htag, minor != 0, ds->ds_userrefs);
 202 }
 203 
 204 typedef struct zfs_hold_cleanup_arg {
 205         char zhca_spaname[MAXNAMELEN];
 206         uint64_t zhca_spa_load_guid;
 207         nvlist_t *zhca_holds;
 208 } zfs_hold_cleanup_arg_t;
 209 
 210 static void
 211 dsl_dataset_user_release_onexit(void *arg)
 212 {
 213         zfs_hold_cleanup_arg_t *ca = (zfs_hold_cleanup_arg_t *)arg;
 214         spa_t *spa;
 215         int error;
 216 
 217         error = spa_open(ca->zhca_spaname, &spa, FTAG);
 218         if (error != 0) {
 219                 zfs_dbgmsg("couldn't release holds on pool=%s "
 220                     "because pool is no longer loaded",
 221                     ca->zhca_spaname);
 222                 return;
 223         }
 224         if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
 225                 zfs_dbgmsg("couldn't release holds on pool=%s "
 226                     "because pool is no longer loaded (guid doesn't match)",
 227                     ca->zhca_spaname);
 228                 spa_close(spa, FTAG);
 229                 return;
 230         }
 231 
 232         (void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds);
 233         fnvlist_free(ca->zhca_holds);
 234         kmem_free(ca, sizeof(zfs_hold_cleanup_arg_t));
 235         spa_close(spa, FTAG);
 236 }
 237 
 238 static void
 239 dsl_register_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor)
 240 {
 241         zfs_hold_cleanup_arg_t *ca;
 242 
 243         if (minor == 0 || nvlist_next_nvpair(holds, NULL) == NULL) {
 244                 fnvlist_free(holds);
 245                 return;
 246         }
 247 
 248         ASSERT(spa != NULL);
 249         ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
 250 
 251         (void) strlcpy(ca->zhca_spaname, spa_name(spa),
 252             sizeof (ca->zhca_spaname));
 253         ca->zhca_spa_load_guid = spa_load_guid(spa);
 254         ca->zhca_holds = holds;
 255         VERIFY0(zfs_onexit_add_cb(minor,
 256             dsl_dataset_user_release_onexit, ca, NULL));
 257 }
 258 
 259 void
 260 dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
 261     minor_t minor, uint64_t now, dmu_tx_t *tx)
 262 {
 263         dsl_dataset_user_hold_arg_t dduha;
 264 
 265         dduha.dduha_spa = NULL;
 266         dduha.dduha_holds = NULL;
 267         dduha.dduha_tmpholds = fnvlist_alloc();
 268         dduha.dduha_errlist = NULL;
 269         dduha.dduha_minor = minor;
 270         dduha.dduha_holds_created = B_FALSE;
 271 
 272         dsl_dataset_user_hold_sync_one_impl(&dduha, ds, htag, minor, now, tx);
 273         dsl_register_onexit_hold_cleanup(dsl_dataset_get_spa(ds),
 274             dduha.dduha_tmpholds, minor);
 275 }
 276 
 277 static void
 278 dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
 279 {
 280         dsl_dataset_user_hold_arg_t *dduha = arg;
 281         dsl_pool_t *dp = dmu_tx_pool(tx);
 282         nvpair_t *pair;
 283         uint64_t now = gethrestime_sec();
 284 
 285         for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
 286             pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
 287                 dsl_dataset_t *ds;
 288                 char *name;
 289                 int error;
 290 
 291                 name = nvpair_name(pair);
 292                 error = dsl_dataset_hold(dp, name, FTAG, &ds);
 293                 if (error == 0) {
 294                         dsl_dataset_user_hold_sync_one_impl(dduha, ds,
 295                             fnvpair_value_string(pair), dduha->dduha_minor,
 296                             now, tx);
 297                         dsl_dataset_rele(ds, FTAG);
 298                         dduha->dduha_holds_created = B_TRUE;
 299                 } else if (dduha->dduha_errlist != NULL) {
 300                         /*
 301                          * We register ENOENT errors so they can be correctly
 302                          * reported if needed, such as when all holds fail.
 303                          */
 304                         fnvlist_add_int32(dduha->dduha_errlist, name, error);
 305                 }
 306         }
 307         dduha->dduha_spa = dp->dp_spa;
 308 }
 309 
 310 /*
 311  * The full semantics of this function are described in the comment above
 312  * lzc_hold().
 313  *
 314  * To summarize:
 315  * holds is nvl of snapname -> holdname
 316  * errlist will be filled in with snapname -> error


 317  *
 318  * The snaphosts must all be in the same pool.
 319  *
 320  * Holds for snapshots that don't exist will be skipped.
 321  *
 322  * If none of the snapshots for requested holds exist then ENOENT will be
 323  * returned.
 324  *
 325  * If cleanup_minor is not 0, the holds will be temporary, which will be cleaned
 326  * up when the process exits.
 327  *
 328  * On success all the holds, for snapshots that existed, will be created and 0
 329  * will be returned.
 330  *
 331  * On failure no holds will be created, the errlist will be filled in,
 332  * and an errno will returned.
 333  *
 334  * In all cases the errlist will contain entries for holds where the snapshot
 335  * didn't exist.
 336  */
 337 int
 338 dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
 339 {
 340         dsl_dataset_user_hold_arg_t dduha;
 341         nvpair_t *pair;
 342         int ret;
 343 
 344         pair = nvlist_next_nvpair(holds, NULL);
 345         if (pair == NULL)
 346                 return (0);
 347 
 348         dduha.dduha_spa = NULL;
 349         dduha.dduha_holds = holds;
 350         dduha.dduha_tmpholds = fnvlist_alloc();
 351         dduha.dduha_errlist = errlist;
 352         dduha.dduha_minor = cleanup_minor;
 353         dduha.dduha_holds_created = B_FALSE;
 354 
 355         ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
 356             dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds));
 357         if (ret == 0) {
 358                 /* Check we created at least one hold. */
 359                 if (dduha.dduha_holds_created) {
 360                         dsl_register_onexit_hold_cleanup(dduha.dduha_spa,
 361                             dduha.dduha_tmpholds, cleanup_minor);
 362                 } else {
 363                         fnvlist_free(dduha.dduha_tmpholds);
 364                         ret = ENOENT;
 365                 }
 366         } else {
 367                 fnvlist_free(dduha.dduha_tmpholds);
 368         }
 369 
 370         return (ret);
 371 }
 372 
 373 typedef int (dsl_holdfunc_t)(dsl_pool_t *dp, const char *name, void *tag,
 374     dsl_dataset_t **dsp);
 375 
 376 typedef struct dsl_dataset_user_release_arg {
 377         dsl_holdfunc_t *ddura_holdfunc;
 378         nvlist_t *ddura_holds;
 379         nvlist_t *ddura_todelete;
 380         nvlist_t *ddura_errlist;
 381         boolean_t ddura_holds_found;
 382 } dsl_dataset_user_release_arg_t;
 383 
 384 /* Place a dataset hold on the snapshot identified by passed dsobj string */
 385 static
 386 int dsl_dataset_hold_byobj(dsl_pool_t *dp, const char *dsobj, void *tag,
 387     dsl_dataset_t **dsp)
 388 {
 389         return dsl_dataset_hold_obj(dp, strtonum(dsobj, NULL), tag, dsp);
 390 }
 391 
 392 static int
 393 dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura,
 394     dsl_dataset_t *ds, nvlist_t *holds, boolean_t *todelete)
 395 {
 396         uint64_t zapobj;
 397         nvpair_t *pair;
 398         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 399         int error;
 400         int numholds = 0;
 401         int ret;
 402 
 403         *todelete = B_FALSE;
 404         ret = 0;
 405 
 406         if (!dsl_dataset_is_snapshot(ds))
 407                 return (SET_ERROR(EINVAL));
 408 
 409         zapobj = ds->ds_phys->ds_userrefs_obj;
 410         if (zapobj == 0)
 411                 return (SET_ERROR(ESRCH));
 412 
 413         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 414             pair = nvlist_next_nvpair(holds, pair)) {

 415                 uint64_t tmp;
 416 
 417                 error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
 418                 /* Non-existent holds aren't always fatal. */
 419                 if (error == ENOENT) {
 420                         ret = error;
 421                         continue;
 422                 }
 423                 if (error != 0)
 424                         return (error);
 425                 numholds++;
 426         }
 427 
 428         if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
 429             ds->ds_userrefs == numholds) {
 430                 /* we need to destroy the snapshot as well */
 431 
 432                 if (dsl_dataset_long_held(ds))
 433                         return (SET_ERROR(EBUSY));
 434                 *todelete = B_TRUE;
 435         }
 436 
 437         if (numholds != 0)
 438                 ddura->ddura_holds_found = B_TRUE;
 439 
 440         return (ret);
 441 }
 442 
 443 static int
 444 dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
 445 {
 446         dsl_dataset_user_release_arg_t *ddura = arg;
 447         dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc;
 448         dsl_pool_t *dp = dmu_tx_pool(tx);
 449         nvpair_t *pair;
 450         int rv = 0;
 451 
 452         if (!dmu_tx_is_syncing(tx))
 453                 return (0);
 454 
 455         for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 456             pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
 457                 const char *name = nvpair_name(pair);
 458                 int error;
 459                 dsl_dataset_t *ds;
 460                 nvlist_t *holds;
 461 
 462                 error = nvpair_value_nvlist(pair, &holds);
 463                 if (error != 0)
 464                         return (SET_ERROR(EINVAL));
 465 
 466                 error = holdfunc(dp, name, FTAG, &ds);
 467                 if (error == 0) {
 468                         boolean_t deleteme;
 469                         error = dsl_dataset_user_release_check_one(ddura, ds,
 470                             holds, &deleteme);
 471                         /*
 472                          * Don't check for error == 0 as deleteme is only set
 473                          * to B_TRUE if it's correct to do so dispite the error
 474                          * e.g. ENOENT.
 475                          */
 476                         if (deleteme) {
 477                                 fnvlist_add_boolean(ddura->ddura_todelete,
 478                                     name);
 479                         }
 480                         dsl_dataset_rele(ds, FTAG);
 481                 }
 482                 if (error != 0) {
 483                         if (ddura->ddura_errlist != NULL) {
 484                                 fnvlist_add_int32(ddura->ddura_errlist,
 485                                     name, error);
 486                         }
 487                         /* Non-existent holds aren't always fatal. */
 488                         if (error != ENOENT)
 489                                 rv = error;
 490                 }
 491         }
 492 
 493         /*
 494          * None of the specified holds existed so avoid the overhead of a sync
 495          * and return ENOENT.
 496          */
 497         if (rv == 0 && !ddura->ddura_holds_found)
 498                 rv = ENOENT;
 499 
 500         return (rv);
 501 }
 502 
 503 static void
 504 dsl_dataset_user_release_sync_one(dsl_dataset_user_release_arg_t *ddura,
 505     dsl_dataset_t *ds, nvlist_t *holds, dmu_tx_t *tx)
 506 {
 507         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 508         objset_t *mos = dp->dp_meta_objset;


 509         nvpair_t *pair;
 510 
 511         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 512             pair = nvlist_next_nvpair(holds, pair)) {
 513                 uint64_t zapobj;
 514                 int error;
 515                 char *name;
 516 
 517                 name = nvpair_name(pair);
 518 
 519                 /* Remove temporary hold if one exists. */
 520                 error = dsl_pool_user_release(dp, ds->ds_object, name, tx);
 521                 VERIFY(error == 0 || error == ENOENT);
 522 
 523                 /* Remove user hold if one exists. */
 524                 zapobj = ds->ds_phys->ds_userrefs_obj;
 525                 error = zap_remove(mos, zapobj, name, tx);
 526                 if (error == ENOENT)
 527                         continue;
 528                 VERIFY0(error);
 529 
 530                 /* Only if we removed a hold do we decrement userrefs. */
 531                 mutex_enter(&ds->ds_lock);
 532                 ds->ds_userrefs--;
 533                 mutex_exit(&ds->ds_lock);
 534 
 535                 ddura->ddura_holds_found = B_TRUE;
 536 
 537                 spa_history_log_internal_ds(ds, "release", tx,
 538                     "tag=%s refs=%lld", nvpair_name(pair),
 539                     (longlong_t)ds->ds_userrefs);
 540         }
 541 }
 542 
 543 static void
 544 dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
 545 {
 546         dsl_dataset_user_release_arg_t *ddura = arg;
 547         dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc;
 548         dsl_pool_t *dp = dmu_tx_pool(tx);
 549         nvpair_t *pair;
 550 
 551         /*
 552          * Even though check suggested that at least one of our holds where
 553          * found this may have changed. Recalculate ddura_holds_found so that
 554          * we can return ENOENT from the caller in the case that no holds
 555          * where actually released.
 556          */
 557         ddura->ddura_holds_found = B_FALSE;
 558 
 559         for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 560             pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
 561                 dsl_dataset_t *ds;
 562                 int error;
 563 
 564                 error = holdfunc(dp, nvpair_name(pair), FTAG, &ds);
 565                 if (error == ENOENT)
 566                         continue;
 567                 VERIFY0(error);
 568 
 569                 dsl_dataset_user_release_sync_one(ddura, ds,
 570                     fnvpair_value_nvlist(pair), tx);
 571                 if (nvlist_exists(ddura->ddura_todelete, nvpair_name(pair))) {

 572                         ASSERT(ds->ds_userrefs == 0 &&
 573                             ds->ds_phys->ds_num_children == 1 &&
 574                             DS_IS_DEFER_DESTROY(ds));
 575                         dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 576                 }
 577                 dsl_dataset_rele(ds, FTAG);
 578         }
 579 }
 580 
 581 /*
 582  * The full semantics of this function are described in the comment above
 583  * lzc_release().
 584  *
 585  * To summarize:
 586  * Releases holds specified in the nvl holds.
 587  *
 588  * holds is nvl of snapname -> { holdname, ... }
 589  * errlist will be filled in with snapname -> error
 590  * 
 591  * If tmpdp is not NULL the names for holds should be the dbobj's of snapshots,
 592  * otherwise they should be the names of shapshots.
 593  *
 594  * As a release may cause snapshots to be destroyed this trys to ensure they
 595  * aren't mounted.
 596  *
 597  * The release of non-existent holds are skipped.
 598  *
 599  * At least one hold must have been released for the this function to succeed
 600  * and return 0.
 601  */
 602 static int
 603 dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
 604     dsl_pool_t *tmpdp)
 605 {
 606         dsl_dataset_user_release_arg_t ddura;
 607         nvpair_t *pair;
 608         char *pool;
 609         int error;
 610 
 611         pair = nvlist_next_nvpair(holds, NULL);
 612         if (pair == NULL)
 613                 return (0);
 614 
 615 #ifdef _KERNEL
 616         /*
 617          * The release may cause snapshots to be destroyed; make sure they
 618          * are not mounted.






















































 619          */
 620         if (tmpdp != NULL) {
 621                 /* Temporary holds are specified by dbobj. */
 622                 ddura.ddura_holdfunc = dsl_dataset_hold_byobj;
 623                 pool = spa_name(tmpdp->dp_spa);
 624 
 625                 dsl_pool_config_enter(tmpdp, FTAG);
 626                 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 627                     pair = nvlist_next_nvpair(holds, pair)) {
 628                         dsl_dataset_t *ds;

 629 
 630                         error = dsl_dataset_hold_byobj(tmpdp, nvpair_name(pair),
 631                             FTAG, &ds);


 632                         if (error == 0) {
 633                                 char name[MAXNAMELEN];
 634                                 dsl_dataset_name(ds, name);
 635                                 dsl_dataset_rele(ds, FTAG);

 636                                 zfs_unmount_snap(name);
 637                         }
 638                 }
 639                 dsl_pool_config_exit(tmpdp, FTAG);
 640         } else {
 641                 /* Non-temporary holds are specified by name. */
 642                 ddura.ddura_holdfunc = dsl_dataset_hold;
 643                 pool = nvpair_name(pair);
 644 
 645                 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 646                     pair = nvlist_next_nvpair(holds, pair))
 647                         zfs_unmount_snap(nvpair_name(pair));
 648         }
 649 #endif
 650 
 651         ddura.ddura_holds = holds;
 652         ddura.ddura_errlist = errlist;
 653         ddura.ddura_todelete = fnvlist_alloc();
 654         ddura.ddura_holds_found = B_FALSE;





 655 
 656         error = dsl_sync_task(pool, dsl_dataset_user_release_check,
 657             dsl_dataset_user_release_sync, &ddura,
 658             fnvlist_num_pairs(holds));
 659         fnvlist_free(ddura.ddura_todelete);


 660 
 661         /* If at least one hold wasn't removed return ENOENT. */
 662         if (error == 0 && !ddura.ddura_holds_found)
 663                 error = ENOENT;



 664 
 665         return (error);
 666 }












 667 
 668 /*
 669  * holds is nvl of snapname -> { holdname, ... }
 670  * errlist will be filled in with snapname -> error
 671  *
 672  * if any fails, all will fail.
 673  */
 674 int
 675 dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
 676 {
 677         return dsl_dataset_user_release_impl(holds, errlist, NULL);
 678 }
 679 
 680 /*
 681  * holds is nvl of snapdsobj -> { holdname, ... }
 682  */
 683 void
 684 dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds)

 685 {
 686         ASSERT(dp != NULL);
 687         (void) dsl_dataset_user_release_impl(holds, NULL, dp);







 688 }
 689 
 690 int
 691 dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
 692 {
 693         dsl_pool_t *dp;
 694         dsl_dataset_t *ds;
 695         int err;
 696 
 697         err = dsl_pool_hold(dsname, FTAG, &dp);
 698         if (err != 0)
 699                 return (err);
 700         err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
 701         if (err != 0) {
 702                 dsl_pool_rele(dp, FTAG);
 703                 return (err);
 704         }
 705 
 706         if (ds->ds_phys->ds_userrefs_obj != 0) {
 707                 zap_attribute_t *za;