Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013 by Delphix. All rights reserved.

  24  */
  25 
  26 #include <sys/zfs_context.h>
  27 #include <sys/dsl_userhold.h>
  28 #include <sys/dsl_dataset.h>
  29 #include <sys/dsl_destroy.h>
  30 #include <sys/dsl_synctask.h>
  31 #include <sys/dmu_tx.h>
  32 #include <sys/zfs_onexit.h>
  33 #include <sys/dsl_pool.h>
  34 #include <sys/dsl_dir.h>
  35 #include <sys/zfs_ioctl.h>
  36 #include <sys/zap.h>
  37 
  38 typedef struct dsl_dataset_user_hold_arg {
  39         nvlist_t *dduha_holds;

  40         nvlist_t *dduha_errlist;
  41         minor_t dduha_minor;
  42 } dsl_dataset_user_hold_arg_t;
  43 
  44 /*
  45  * If you add new checks here, you may need to add additional checks to the
  46  * "temporary" case in snapshot_check() in dmu_objset.c.
  47  */
  48 int
  49 dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
  50     boolean_t temphold, dmu_tx_t *tx)
  51 {
  52         dsl_pool_t *dp = dmu_tx_pool(tx);
  53         objset_t *mos = dp->dp_meta_objset;
  54         int error = 0;
  55 


  56         if (strlen(htag) > MAXNAMELEN)
  57                 return (E2BIG);
  58         /* Tempholds have a more restricted length */
  59         if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
  60                 return (E2BIG);
  61 
  62         /* tags must be unique (if ds already exists) */
  63         if (ds != NULL) {
  64                 mutex_enter(&ds->ds_lock);
  65                 if (ds->ds_phys->ds_userrefs_obj != 0) {
  66                         uint64_t value;

  67                         error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
  68                             htag, 8, 1, &value);
  69                         if (error == 0)
  70                                 error = SET_ERROR(EEXIST);
  71                         else if (error == ENOENT)
  72                                 error = 0;
  73                 }
  74                 mutex_exit(&ds->ds_lock);
  75         }
  76 
  77         return (error);
  78 }
  79 
  80 static int
  81 dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
  82 {
  83         dsl_dataset_user_hold_arg_t *dduha = arg;
  84         dsl_pool_t *dp = dmu_tx_pool(tx);
  85         nvpair_t *pair;
  86         int rv = 0;
  87 
  88         if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
  89                 return (SET_ERROR(ENOTSUP));
  90 



  91         for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
  92             pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
  93                 int error = 0;
  94                 dsl_dataset_t *ds;
  95                 char *htag;

  96 
  97                 /* must be a snapshot */
  98                 if (strchr(nvpair_name(pair), '@') == NULL)

  99                         error = SET_ERROR(EINVAL);
 100 
 101                 if (error == 0)
 102                         error = nvpair_value_string(pair, &htag);
 103                 if (error == 0) {
 104                         error = dsl_dataset_hold(dp,
 105                             nvpair_name(pair), FTAG, &ds);
 106                 }
 107                 if (error == 0) {
 108                         error = dsl_dataset_user_hold_check_one(ds, htag,
 109                             dduha->dduha_minor != 0, tx);
 110                         dsl_dataset_rele(ds, FTAG);
 111                 }
 112 
 113                 if (error != 0) {
 114                         rv = error;
 115                         fnvlist_add_int32(dduha->dduha_errlist,
 116                             nvpair_name(pair), error);






 117                 }
 118         }
 119         return (rv);





 120 }
 121 
 122 void
 123 dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
 124     minor_t minor, uint64_t now, dmu_tx_t *tx)

 125 {
 126         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 127         objset_t *mos = dp->dp_meta_objset;
 128         uint64_t zapobj;
 129 
 130         mutex_enter(&ds->ds_lock);

 131         if (ds->ds_phys->ds_userrefs_obj == 0) {
 132                 /*
 133                  * This is the first user hold for this dataset.  Create
 134                  * the userrefs zap object.
 135                  */
 136                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
 137                 zapobj = ds->ds_phys->ds_userrefs_obj =
 138                     zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
 139         } else {
 140                 zapobj = ds->ds_phys->ds_userrefs_obj;
 141         }
 142         ds->ds_userrefs++;
 143         mutex_exit(&ds->ds_lock);
 144 
 145         VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 146 
 147         if (minor != 0) {



 148                 VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
 149                     htag, now, tx));
 150                 dsl_register_onexit_hold_cleanup(ds, htag, minor);










 151         }
 152 
 153         spa_history_log_internal_ds(ds, "hold", tx,
 154             "tag=%s temp=%d refs=%llu",
 155             htag, minor != 0, ds->ds_userrefs);
 156 }
 157 





































































 158 static void
 159 dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
 160 {
 161         dsl_dataset_user_hold_arg_t *dduha = arg;
 162         dsl_pool_t *dp = dmu_tx_pool(tx);
 163         nvpair_t *pair;

 164         uint64_t now = gethrestime_sec();
 165 
 166         for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
 167             pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {





 168                 dsl_dataset_t *ds;

 169                 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 170                 dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
 171                     dduha->dduha_minor, now, tx);
 172                 dsl_dataset_rele(ds, FTAG);
 173         }

 174 }
 175 
 176 /*




 177  * holds is nvl of snapname -> holdname
 178  * errlist will be filled in with snapname -> error
 179  * if cleanup_minor is not 0, the holds will be temporary, cleaned up
 180  * when the process exits.
 181  *
 182  * if any fails, all will fail.

















 183  */
 184 int
 185 dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
 186 {
 187         dsl_dataset_user_hold_arg_t dduha;
 188         nvpair_t *pair;

 189 
 190         pair = nvlist_next_nvpair(holds, NULL);
 191         if (pair == NULL)
 192                 return (0);
 193 
 194         dduha.dduha_holds = holds;

 195         dduha.dduha_errlist = errlist;
 196         dduha.dduha_minor = cleanup_minor;
 197 
 198         return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
 199             dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));



 200 }
 201 



 202 typedef struct dsl_dataset_user_release_arg {

 203         nvlist_t *ddura_holds;
 204         nvlist_t *ddura_todelete;
 205         nvlist_t *ddura_errlist;

 206 } dsl_dataset_user_release_arg_t;
 207 








 208 static int
 209 dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
 210     nvlist_t *holds, boolean_t *todelete)
 211 {
 212         uint64_t zapobj;
 213         nvpair_t *pair;

 214         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 215         int error;
 216         int numholds = 0;
 217 
 218         *todelete = B_FALSE;
 219 
 220         if (!dsl_dataset_is_snapshot(ds))
 221                 return (SET_ERROR(EINVAL));
 222 
 223         zapobj = ds->ds_phys->ds_userrefs_obj;
 224         if (zapobj == 0)
 225                 return (SET_ERROR(ESRCH));
 226 




 227         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 228             pair = nvlist_next_nvpair(holds, pair)) {
 229                 /* Make sure the hold exists */
 230                 uint64_t tmp;
 231                 error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);






 232                 if (error == ENOENT)
 233                         error = SET_ERROR(ESRCH);
 234                 if (error != 0)


 235                         return (error);



 236                 numholds++;
 237         }
 238 
 239         if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
 240             ds->ds_userrefs == numholds) {
 241                 /* we need to destroy the snapshot as well */
 242 
 243                 if (dsl_dataset_long_held(ds))
 244                         return (SET_ERROR(EBUSY));
 245                 *todelete = B_TRUE;
 246         }
 247         return (0);









 248 }
 249 
 250 static int
 251 dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
 252 {
 253         dsl_dataset_user_release_arg_t *ddura = arg;
 254         dsl_pool_t *dp = dmu_tx_pool(tx);

 255         nvpair_t *pair;
 256         int rv = 0;
 257 
 258         if (!dmu_tx_is_syncing(tx))
 259                 return (0);
 260 






 261         for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 262             pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
 263                 const char *name = nvpair_name(pair);
 264                 int error;
 265                 dsl_dataset_t *ds;
 266                 nvlist_t *holds;
 267 

 268                 error = nvpair_value_nvlist(pair, &holds);
 269                 if (error != 0)
 270                         return (SET_ERROR(EINVAL));
 271 
 272                 error = dsl_dataset_hold(dp, name, FTAG, &ds);
 273                 if (error == 0) {
 274                         boolean_t deleteme;
 275                         error = dsl_dataset_user_release_check_one(ds,
 276                             holds, &deleteme);
 277                         if (error == 0 && deleteme) {
 278                                 fnvlist_add_boolean(ddura->ddura_todelete,
 279                                     name);
 280                         }
 281                         dsl_dataset_rele(ds, FTAG);
 282                 }
 283                 if (error != 0) {
 284                         if (ddura->ddura_errlist != NULL) {
 285                                 fnvlist_add_int32(ddura->ddura_errlist,
 286                                     name, error);
 287                         }
 288                         rv = error;


 289                 }
 290         }
 291         return (rv);








 292 }
 293 
 294 static void
 295 dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
 296     dmu_tx_t *tx)
 297 {
 298         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 299         objset_t *mos = dp->dp_meta_objset;
 300         uint64_t zapobj;
 301         int error;
 302         nvpair_t *pair;
 303 
 304         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 305             pair = nvlist_next_nvpair(holds, pair)) {
 306                 ds->ds_userrefs--;
 307                 error = dsl_pool_user_release(dp, ds->ds_object,
 308                     nvpair_name(pair), tx);




 309                 VERIFY(error == 0 || error == ENOENT);
 310                 zapobj = ds->ds_phys->ds_userrefs_obj;
 311                 VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));


 312 
 313                 spa_history_log_internal_ds(ds, "release", tx,
 314                     "tag=%s refs=%lld", nvpair_name(pair),
 315                     (longlong_t)ds->ds_userrefs);
 316         }
 317 }
 318 
 319 static void
 320 dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
 321 {
 322         dsl_dataset_user_release_arg_t *ddura = arg;

 323         dsl_pool_t *dp = dmu_tx_pool(tx);
 324         nvpair_t *pair;
 325 
 326         for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 327             pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {



 328                 dsl_dataset_t *ds;

 329 
 330                 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 331                 dsl_dataset_user_release_sync_one(ds,


 332                     fnvpair_value_nvlist(pair), tx);
 333                 if (nvlist_exists(ddura->ddura_todelete,
 334                     nvpair_name(pair))) {
 335                         ASSERT(ds->ds_userrefs == 0 &&
 336                             ds->ds_phys->ds_num_children == 1 &&
 337                             DS_IS_DEFER_DESTROY(ds));
 338                         dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 339                 }
 340                 dsl_dataset_rele(ds, FTAG);
 341         }
 342 }
 343 
 344 /*






 345  * holds is nvl of snapname -> { holdname, ... }
 346  * errlist will be filled in with snapname -> error
 347  *
 348  * if any fails, all will fail.









 349  */
 350 int
 351 dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)

 352 {
 353         dsl_dataset_user_release_arg_t ddura;
 354         nvpair_t *pair;

 355         int error;
 356 
 357         pair = nvlist_next_nvpair(holds, NULL);
 358         if (pair == NULL)
 359                 return (0);
 360 
 361         ddura.ddura_holds = holds;
 362         ddura.ddura_errlist = errlist;
 363         ddura.ddura_todelete = fnvlist_alloc();
 364 
 365         error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
 366             dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
 367         fnvlist_free(ddura.ddura_todelete);
 368         return (error);
 369 }
 370 
 371 typedef struct dsl_dataset_user_release_tmp_arg {
 372         uint64_t ddurta_dsobj;
 373         nvlist_t *ddurta_holds;
 374         boolean_t ddurta_deleteme;
 375 } dsl_dataset_user_release_tmp_arg_t;
 376 
 377 static int
 378 dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
 379 {
 380         dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 381         dsl_pool_t *dp = dmu_tx_pool(tx);
 382         dsl_dataset_t *ds;
 383         int error;
 384 
 385         if (!dmu_tx_is_syncing(tx))
 386                 return (0);
 387 
 388         error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
 389         if (error)
 390                 return (error);
 391 
 392         error = dsl_dataset_user_release_check_one(ds,
 393             ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
 394         dsl_dataset_rele(ds, FTAG);
 395         return (error);
 396 }
 397 
 398 static void
 399 dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
 400 {
 401         dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 402         dsl_pool_t *dp = dmu_tx_pool(tx);
 403         dsl_dataset_t *ds;
 404 
 405         VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
 406         dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
 407         if (ddurta->ddurta_deleteme) {
 408                 ASSERT(ds->ds_userrefs == 0 &&
 409                     ds->ds_phys->ds_num_children == 1 &&
 410                     DS_IS_DEFER_DESTROY(ds));
 411                 dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 412         }
 413         dsl_dataset_rele(ds, FTAG);
 414 }
 415 
 416 /*
 417  * Called at spa_load time to release a stale temporary user hold.
 418  * Also called by the onexit code.
 419  */
 420 void
 421 dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
 422 {
 423         dsl_dataset_user_release_tmp_arg_t ddurta;




 424         dsl_dataset_t *ds;
 425         int error;
 426 
 427 #ifdef _KERNEL
 428         /* Make sure it is not mounted. */
 429         dsl_pool_config_enter(dp, FTAG);
 430         error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
 431         if (error == 0) {
 432                 char name[MAXNAMELEN];
 433                 dsl_dataset_name(ds, name);
 434                 dsl_dataset_rele(ds, FTAG);
 435                 dsl_pool_config_exit(dp, FTAG);
 436                 zfs_unmount_snap(name);



 437         } else {
 438                 dsl_pool_config_exit(dp, FTAG);







 439         }
 440 #endif
 441 
 442         ddurta.ddurta_dsobj = dsobj;
 443         ddurta.ddurta_holds = fnvlist_alloc();
 444         fnvlist_add_boolean(ddurta.ddurta_holds, htag);
 445 
 446         (void) dsl_sync_task(spa_name(dp->dp_spa),
 447             dsl_dataset_user_release_tmp_check,
 448             dsl_dataset_user_release_tmp_sync, &ddurta, 1);
 449         fnvlist_free(ddurta.ddurta_holds);
 450 }
 451 
 452 typedef struct zfs_hold_cleanup_arg {
 453         char zhca_spaname[MAXNAMELEN];
 454         uint64_t zhca_spa_load_guid;
 455         uint64_t zhca_dsobj;
 456         char zhca_htag[MAXNAMELEN];
 457 } zfs_hold_cleanup_arg_t;
 458 
 459 static void
 460 dsl_dataset_user_release_onexit(void *arg)
 461 {
 462         zfs_hold_cleanup_arg_t *ca = arg;
 463         spa_t *spa;
 464         int error;
 465 
 466         error = spa_open(ca->zhca_spaname, &spa, FTAG);
 467         if (error != 0) {
 468                 zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 469                     "because pool is no longer loaded",
 470                     ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 471                 return;
 472         }
 473         if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
 474                 zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 475                     "because pool is no longer loaded (guid doesn't match)",
 476                     ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 477                 spa_close(spa, FTAG);
 478                 return;
 479         }
 480 
 481         dsl_dataset_user_release_tmp(spa_get_dsl(spa),
 482             ca->zhca_dsobj, ca->zhca_htag);
 483         kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
 484         spa_close(spa, FTAG);




 485 }
 486 



 487 void
 488 dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
 489     minor_t minor)
 490 {
 491         zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
 492         spa_t *spa = dsl_dataset_get_spa(ds);
 493         (void) strlcpy(ca->zhca_spaname, spa_name(spa),
 494             sizeof (ca->zhca_spaname));
 495         ca->zhca_spa_load_guid = spa_load_guid(spa);
 496         ca->zhca_dsobj = ds->ds_object;
 497         (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
 498         VERIFY0(zfs_onexit_add_cb(minor,
 499             dsl_dataset_user_release_onexit, ca, NULL));
 500 }
 501 
 502 int
 503 dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
 504 {
 505         dsl_pool_t *dp;
 506         dsl_dataset_t *ds;
 507         int err;
 508 
 509         err = dsl_pool_hold(dsname, FTAG, &dp);
 510         if (err != 0)
 511                 return (err);
 512         err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
 513         if (err != 0) {
 514                 dsl_pool_rele(dp, FTAG);
 515                 return (err);
 516         }
 517 
 518         if (ds->ds_phys->ds_userrefs_obj != 0) {
 519                 zap_attribute_t *za;


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013 by Delphix. All rights reserved.
  24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  25  */
  26 
  27 #include <sys/zfs_context.h>
  28 #include <sys/dsl_userhold.h>
  29 #include <sys/dsl_dataset.h>
  30 #include <sys/dsl_destroy.h>
  31 #include <sys/dsl_synctask.h>
  32 #include <sys/dmu_tx.h>
  33 #include <sys/zfs_onexit.h>
  34 #include <sys/dsl_pool.h>
  35 #include <sys/dsl_dir.h>
  36 #include <sys/zfs_ioctl.h>
  37 #include <sys/zap.h>
  38 
  39 typedef struct dsl_dataset_user_hold_arg {
  40         nvlist_t *dduha_holds;
  41         nvlist_t *dduha_chkholds;
  42         nvlist_t *dduha_errlist;
  43         minor_t dduha_minor;
  44 } dsl_dataset_user_hold_arg_t;
  45 
  46 /*
  47  * If you add new checks here, you may need to add additional checks to the
  48  * "temporary" case in snapshot_check() in dmu_objset.c.
  49  */
  50 int
  51 dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
  52     boolean_t temphold, dmu_tx_t *tx)
  53 {
  54         dsl_pool_t *dp = dmu_tx_pool(tx);
  55         objset_t *mos = dp->dp_meta_objset;
  56         int error = 0;
  57 
  58         ASSERT(RRW_READ_HELD(&dp->dp_config_rwlock));
  59 
  60         if (strlen(htag) > MAXNAMELEN)
  61                 return (E2BIG);
  62         /* Tempholds have a more restricted length */
  63         if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
  64                 return (E2BIG);
  65 
  66         /* tags must be unique (if ds already exists) */
  67         if (ds != NULL && ds->ds_phys->ds_userrefs_obj != 0) {


  68                 uint64_t value;
  69 
  70                 error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
  71                     htag, 8, 1, &value);
  72                 if (error == 0)
  73                         error = SET_ERROR(EEXIST);
  74                 else if (error == ENOENT)
  75                         error = 0;
  76         }


  77 
  78         return (error);
  79 }
  80 
  81 static int
  82 dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
  83 {
  84         dsl_dataset_user_hold_arg_t *dduha = arg;
  85         dsl_pool_t *dp = dmu_tx_pool(tx);
  86         nvpair_t *pair;

  87 
  88         if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
  89                 return (SET_ERROR(ENOTSUP));
  90 
  91         if (!dmu_tx_is_syncing(tx))
  92                 return (0);
  93 
  94         for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
  95             pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {

  96                 dsl_dataset_t *ds;
  97                 int error = 0;
  98                 char *htag, *name;
  99 
 100                 /* must be a snapshot */
 101                 name = nvpair_name(pair);
 102                 if (strchr(name, '@') == NULL)
 103                         error = SET_ERROR(EINVAL);
 104 
 105                 if (error == 0)
 106                         error = nvpair_value_string(pair, &htag);
 107 
 108                 if (error == 0)
 109                         error = dsl_dataset_hold(dp, name, FTAG, &ds);
 110 
 111                 if (error == 0) {
 112                         error = dsl_dataset_user_hold_check_one(ds, htag,
 113                             dduha->dduha_minor != 0, tx);
 114                         dsl_dataset_rele(ds, FTAG);
 115                 }
 116 
 117                 if (error == 0) {
 118                         fnvlist_add_string(dduha->dduha_chkholds, name, htag);
 119                 } else {
 120                         /*
 121                          * We register ENOENT errors so they can be correctly
 122                          * reported if needed, such as when all holds fail.
 123                          */
 124                         fnvlist_add_int32(dduha->dduha_errlist, name, error);
 125                         if (error != ENOENT)
 126                                 return (error);
 127                 }
 128         }
 129 
 130         /* Return ENOENT if no holds would be created. */
 131         if (nvlist_next_nvpair(dduha->dduha_chkholds, NULL) == NULL)
 132                 return (ENOENT);
 133 
 134         return (0);
 135 }
 136 
 137 
 138 static void
 139 dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds,
 140     const char *htag, minor_t minor, uint64_t now, dmu_tx_t *tx)
 141 {
 142         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 143         objset_t *mos = dp->dp_meta_objset;
 144         uint64_t zapobj;
 145 
 146         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 147 
 148         if (ds->ds_phys->ds_userrefs_obj == 0) {
 149                 /*
 150                  * This is the first user hold for this dataset.  Create
 151                  * the userrefs zap object.
 152                  */
 153                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
 154                 zapobj = ds->ds_phys->ds_userrefs_obj =
 155                     zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
 156         } else {
 157                 zapobj = ds->ds_phys->ds_userrefs_obj;
 158         }
 159         ds->ds_userrefs++;

 160 
 161         VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 162 
 163         if (minor != 0) {
 164                 char name[MAXNAMELEN];
 165                 nvlist_t *tags;
 166 
 167                 VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
 168                     htag, now, tx));
 169                 (void) snprintf(name, sizeof (name), "%llx",
 170                     (u_longlong_t)ds->ds_object);
 171 
 172                 if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) {
 173                         tags = fnvlist_alloc();
 174                         fnvlist_add_boolean(tags, htag);
 175                         fnvlist_add_nvlist(tmpholds, name, tags);
 176                         fnvlist_free(tags);
 177                 } else {
 178                         fnvlist_add_boolean(tags, htag);
 179                 }
 180         }
 181 
 182         spa_history_log_internal_ds(ds, "hold", tx,
 183             "tag=%s temp=%d refs=%llu",
 184             htag, minor != 0, ds->ds_userrefs);
 185 }
 186 
 187 typedef struct zfs_hold_cleanup_arg {
 188         char zhca_spaname[MAXNAMELEN];
 189         uint64_t zhca_spa_load_guid;
 190         nvlist_t *zhca_holds;
 191 } zfs_hold_cleanup_arg_t;
 192 
 193 static void
 194 dsl_dataset_user_release_onexit(void *arg)
 195 {
 196         zfs_hold_cleanup_arg_t *ca = (zfs_hold_cleanup_arg_t *)arg;
 197         spa_t *spa;
 198         int error;
 199 
 200         error = spa_open(ca->zhca_spaname, &spa, FTAG);
 201         if (error != 0) {
 202                 zfs_dbgmsg("couldn't release holds on pool=%s "
 203                     "because pool is no longer loaded",
 204                     ca->zhca_spaname);
 205                 return;
 206         }
 207         if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
 208                 zfs_dbgmsg("couldn't release holds on pool=%s "
 209                     "because pool is no longer loaded (guid doesn't match)",
 210                     ca->zhca_spaname);
 211                 spa_close(spa, FTAG);
 212                 return;
 213         }
 214 
 215         (void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds);
 216         fnvlist_free(ca->zhca_holds);
 217         kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
 218         spa_close(spa, FTAG);
 219 }
 220 
 221 static void
 222 dsl_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor)
 223 {
 224         zfs_hold_cleanup_arg_t *ca;
 225 
 226         if (minor == 0 || nvlist_next_nvpair(holds, NULL) == NULL) {
 227                 fnvlist_free(holds);
 228                 return;
 229         }
 230 
 231         ASSERT(spa != NULL);
 232         ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
 233 
 234         (void) strlcpy(ca->zhca_spaname, spa_name(spa),
 235             sizeof (ca->zhca_spaname));
 236         ca->zhca_spa_load_guid = spa_load_guid(spa);
 237         ca->zhca_holds = holds;
 238         VERIFY0(zfs_onexit_add_cb(minor,
 239             dsl_dataset_user_release_onexit, ca, NULL));
 240 }
 241 
 242 void
 243 dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
 244     minor_t minor, uint64_t now, dmu_tx_t *tx)
 245 {
 246         nvlist_t *tmpholds;
 247 
 248         if (minor != 0)
 249                 tmpholds = fnvlist_alloc();
 250         else
 251                 tmpholds = NULL;
 252         dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, htag, minor, now, tx);
 253         dsl_onexit_hold_cleanup(dsl_dataset_get_spa(ds), tmpholds, minor);
 254 }
 255 
 256 static void
 257 dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
 258 {
 259         dsl_dataset_user_hold_arg_t *dduha = arg;
 260         dsl_pool_t *dp = dmu_tx_pool(tx);
 261         nvpair_t *pair;
 262         nvlist_t *tmpholds;
 263         uint64_t now = gethrestime_sec();
 264 
 265         if (dduha->dduha_minor != 0)
 266                 tmpholds = fnvlist_alloc();
 267         else
 268                 tmpholds = NULL;
 269         for (pair = nvlist_next_nvpair(dduha->dduha_chkholds, NULL);
 270             pair != NULL;
 271             pair = nvlist_next_nvpair(dduha->dduha_chkholds, pair)) {
 272                 dsl_dataset_t *ds;
 273 
 274                 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 275                 dsl_dataset_user_hold_sync_one_impl(tmpholds, ds,
 276                     fnvpair_value_string(pair), dduha->dduha_minor, now, tx);
 277                 dsl_dataset_rele(ds, FTAG);
 278         }
 279         dsl_onexit_hold_cleanup(dp->dp_spa, tmpholds, dduha->dduha_minor);
 280 }
 281 
 282 /*
 283  * The full semantics of this function are described in the comment above
 284  * lzc_hold().
 285  *
 286  * To summarize:
 287  * holds is nvl of snapname -> holdname
 288  * errlist will be filled in with snapname -> error


 289  *
 290  * The snaphosts must all be in the same pool.
 291  *
 292  * Holds for snapshots that don't exist will be skipped.
 293  *
 294  * If none of the snapshots for requested holds exist then ENOENT will be
 295  * returned.
 296  *
 297  * If cleanup_minor is not 0, the holds will be temporary, which will be cleaned
 298  * up when the process exits.
 299  *
 300  * On success all the holds, for snapshots that existed, will be created and 0
 301  * will be returned.
 302  *
 303  * On failure no holds will be created, the errlist will be filled in,
 304  * and an errno will returned.
 305  *
 306  * In all cases the errlist will contain entries for holds where the snapshot
 307  * didn't exist.
 308  */
 309 int
 310 dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
 311 {
 312         dsl_dataset_user_hold_arg_t dduha;
 313         nvpair_t *pair;
 314         int ret;
 315 
 316         pair = nvlist_next_nvpair(holds, NULL);
 317         if (pair == NULL)
 318                 return (0);
 319 
 320         dduha.dduha_holds = holds;
 321         dduha.dduha_chkholds = fnvlist_alloc();
 322         dduha.dduha_errlist = errlist;
 323         dduha.dduha_minor = cleanup_minor;
 324 
 325         ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
 326             dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds));
 327         fnvlist_free(dduha.dduha_chkholds);
 328 
 329         return (ret);
 330 }
 331 
 332 typedef int (dsl_holdfunc_t)(dsl_pool_t *dp, const char *name, void *tag,
 333     dsl_dataset_t **dsp);
 334 
 335 typedef struct dsl_dataset_user_release_arg {
 336         dsl_holdfunc_t *ddura_holdfunc;
 337         nvlist_t *ddura_holds;
 338         nvlist_t *ddura_todelete;
 339         nvlist_t *ddura_errlist;
 340         nvlist_t *ddura_chkholds;
 341 } dsl_dataset_user_release_arg_t;
 342 
 343 /* Place a dataset hold on the snapshot identified by passed dsobj string */
 344 static int
 345 dsl_dataset_hold_obj_string(dsl_pool_t *dp, const char *dsobj, void *tag,
 346     dsl_dataset_t **dsp)
 347 {
 348         return (dsl_dataset_hold_obj(dp, strtonum(dsobj, NULL), tag, dsp));
 349 }
 350 
 351 static int
 352 dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura,
 353     dsl_dataset_t *ds, nvlist_t *holds, const char *name)
 354 {
 355         uint64_t zapobj;
 356         nvpair_t *pair;
 357         nvlist_t *holds_found;
 358         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 359         int ret, numholds;



 360 
 361         if (!dsl_dataset_is_snapshot(ds))
 362                 return (SET_ERROR(EINVAL));
 363 
 364         zapobj = ds->ds_phys->ds_userrefs_obj;
 365         if (zapobj == 0)
 366                 return (SET_ERROR(ESRCH));
 367 
 368         ret = 0;
 369         numholds = 0;
 370         holds_found = fnvlist_alloc();
 371 
 372         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 373             pair = nvlist_next_nvpair(holds, pair)) {

 374                 uint64_t tmp;
 375                 int error;
 376                 const char *name;
 377 
 378                 name = nvpair_name(pair);
 379                 error = zap_lookup(mos, zapobj, name, 8, 1, &tmp);
 380 
 381                 /* Non-existent holds aren't always an error. */
 382                 if (error == ENOENT)
 383                         continue;
 384 
 385                 if (error != 0) {
 386                         fnvlist_free(holds_found);
 387                         return (error);
 388                 }
 389 
 390                 fnvlist_add_boolean(holds_found, name);
 391                 numholds++;
 392         }
 393 
 394         if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
 395             ds->ds_userrefs == numholds) {
 396                 /* we need to destroy the snapshot as well */
 397                 if (dsl_dataset_long_held(ds)) {
 398                         fnvlist_free(holds_found);
 399                         return (SET_ERROR(EBUSY));

 400                 }
 401                 fnvlist_add_boolean(ddura->ddura_todelete, name);
 402         }
 403 
 404         if (numholds == 0)
 405                 ret = ENOENT;
 406         else
 407                 fnvlist_add_nvlist(ddura->ddura_chkholds, name, holds_found);
 408         fnvlist_free(holds_found);
 409 
 410         return (ret);
 411 }
 412 
 413 static int
 414 dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
 415 {
 416         dsl_dataset_user_release_arg_t *ddura;
 417         dsl_holdfunc_t *holdfunc;
 418         dsl_pool_t *dp;
 419         nvpair_t *pair;

 420 
 421         if (!dmu_tx_is_syncing(tx))
 422                 return (0);
 423 
 424         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 425 
 426         dp = dmu_tx_pool(tx);
 427         ddura = (dsl_dataset_user_release_arg_t *)arg;
 428         holdfunc = ddura->ddura_holdfunc;
 429 
 430         for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 431             pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
 432                 const char *name;
 433                 int error;
 434                 dsl_dataset_t *ds;
 435                 nvlist_t *holds;
 436 
 437                 name = nvpair_name(pair);
 438                 error = nvpair_value_nvlist(pair, &holds);
 439                 if (error != 0)
 440                         error = (SET_ERROR(EINVAL));
 441                 if (error == 0)
 442                         error = holdfunc(dp, name, FTAG, &ds);
 443                 if (error == 0) {
 444                         error = dsl_dataset_user_release_check_one(ddura, ds,
 445                             holds, name);





 446                         dsl_dataset_rele(ds, FTAG);
 447                 }
 448                 if (error != 0) {
 449                         if (ddura->ddura_errlist != NULL) {
 450                                 fnvlist_add_int32(ddura->ddura_errlist, name,
 451                                     error);
 452                         }
 453                         /* Non-existent holds aren't always an error. */
 454                         if (error != ENOENT)
 455                                 return (error);
 456                 }
 457         }
 458 
 459         /*
 460          * Return ENOENT if none of the holds existed avoiding the overhead
 461          * of a sync.
 462          */
 463         if (nvlist_next_nvpair(ddura->ddura_chkholds, NULL) == NULL)
 464                 return (ENOENT);
 465 
 466         return (0);
 467 }
 468 
 469 static void
 470 dsl_dataset_user_release_sync_one(dsl_dataset_user_release_arg_t *ddura,
 471     dsl_dataset_t *ds, nvlist_t *holds, dmu_tx_t *tx)
 472 {
 473         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 474         objset_t *mos = dp->dp_meta_objset;


 475         nvpair_t *pair;
 476 
 477         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 478             pair = nvlist_next_nvpair(holds, pair)) {
 479                 int error;
 480                 const char *name;
 481 
 482                 name = nvpair_name(pair);
 483 
 484                 /* Remove temporary hold if one exists. */
 485                 error = dsl_pool_user_release(dp, ds->ds_object, name, tx);
 486                 VERIFY(error == 0 || error == ENOENT);
 487 
 488                 VERIFY0(zap_remove(mos, ds->ds_phys->ds_userrefs_obj, name,
 489                     tx));
 490                 ds->ds_userrefs--;
 491 
 492                 spa_history_log_internal_ds(ds, "release", tx,
 493                     "tag=%s refs=%lld", name, (longlong_t)ds->ds_userrefs);

 494         }
 495 }
 496 
 497 static void
 498 dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
 499 {
 500         dsl_dataset_user_release_arg_t *ddura = arg;
 501         dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc;
 502         dsl_pool_t *dp = dmu_tx_pool(tx);
 503         nvpair_t *pair;
 504 
 505         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 506 
 507         for (pair = nvlist_next_nvpair(ddura->ddura_chkholds, NULL);
 508             pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_chkholds,
 509             pair)) {
 510                 dsl_dataset_t *ds;
 511                 const char *name;
 512 
 513                 name = nvpair_name(pair);
 514                 VERIFY0(holdfunc(dp, name, FTAG, &ds));
 515 
 516                 dsl_dataset_user_release_sync_one(ddura, ds,
 517                     fnvpair_value_nvlist(pair), tx);
 518                 if (nvlist_exists(ddura->ddura_todelete, name)) {

 519                         ASSERT(ds->ds_userrefs == 0 &&
 520                             ds->ds_phys->ds_num_children == 1 &&
 521                             DS_IS_DEFER_DESTROY(ds));
 522                         dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 523                 }
 524                 dsl_dataset_rele(ds, FTAG);
 525         }
 526 }
 527 
 528 /*
 529  * The full semantics of this function are described in the comment above
 530  * lzc_release().
 531  *
 532  * To summarize:
 533  * Releases holds specified in the nvl holds.
 534  *
 535  * holds is nvl of snapname -> { holdname, ... }
 536  * errlist will be filled in with snapname -> error
 537  *
 538  * If tmpdp is not NULL the names for holds should be the dsobj's of snapshots,
 539  * otherwise they should be the names of shapshots.
 540  *
 541  * As a release may cause snapshots to be destroyed this trys to ensure they
 542  * aren't mounted.
 543  *
 544  * The release of non-existent holds are skipped.
 545  *
 546  * At least one hold must have been released for the this function to succeed
 547  * and return 0.
 548  */
 549 static int
 550 dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
 551     dsl_pool_t *tmpdp)
 552 {
 553         dsl_dataset_user_release_arg_t ddura;
 554         nvpair_t *pair;
 555         char *pool;
 556         int error;
 557 
 558         pair = nvlist_next_nvpair(holds, NULL);
 559         if (pair == NULL)
 560                 return (0);
 561 
 562 #ifdef _KERNEL
 563         /*
 564          * The release may cause snapshots to be destroyed; make sure they
 565          * are not mounted.






















































 566          */
 567         if (tmpdp != NULL) {
 568                 /* Temporary holds are specified by dsobj string. */
 569                 ddura.ddura_holdfunc = dsl_dataset_hold_obj_string;
 570                 pool = spa_name(tmpdp->dp_spa);
 571 
 572                 dsl_pool_config_enter(tmpdp, FTAG);
 573                 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 574                     pair = nvlist_next_nvpair(holds, pair)) {
 575                         dsl_dataset_t *ds;

 576 
 577                         error = dsl_dataset_hold_obj_string(tmpdp,
 578                             nvpair_name(pair), FTAG, &ds);


 579                         if (error == 0) {
 580                                 char name[MAXNAMELEN];
 581                                 dsl_dataset_name(ds, name);
 582                                 dsl_dataset_rele(ds, FTAG);

 583                                 zfs_unmount_snap(name);
 584                         }
 585                 }
 586                 dsl_pool_config_exit(tmpdp, FTAG);
 587         } else {
 588                 /* Non-temporary holds are specified by name. */
 589                 ddura.ddura_holdfunc = dsl_dataset_hold;
 590                 pool = nvpair_name(pair);
 591 
 592                 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 593                     pair = nvlist_next_nvpair(holds, pair)) {
 594                         zfs_unmount_snap(nvpair_name(pair));
 595                 }
 596         }
 597 #endif
 598 
 599         ddura.ddura_holds = holds;
 600         ddura.ddura_errlist = errlist;
 601         ddura.ddura_todelete = fnvlist_alloc();
 602         ddura.ddura_chkholds = fnvlist_alloc();












 603 
 604         error = dsl_sync_task(pool, dsl_dataset_user_release_check,
 605             dsl_dataset_user_release_sync, &ddura,
 606             fnvlist_num_pairs(holds));
 607         fnvlist_free(ddura.ddura_todelete);
 608         fnvlist_free(ddura.ddura_chkholds);

 609 
 610         return (error);
 611 }












 612 
 613 /*
 614  * holds is nvl of snapname -> { holdname, ... }
 615  * errlist will be filled in with snapname -> error
 616  */
 617 int
 618 dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
 619 {
 620         return (dsl_dataset_user_release_impl(holds, errlist, NULL));
 621 }
 622 
 623 /*
 624  * holds is nvl of snapdsobj -> { holdname, ... }
 625  */
 626 void
 627 dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds)

 628 {
 629         ASSERT(dp != NULL);
 630         (void) dsl_dataset_user_release_impl(holds, NULL, dp);







 631 }
 632 
 633 int
 634 dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
 635 {
 636         dsl_pool_t *dp;
 637         dsl_dataset_t *ds;
 638         int err;
 639 
 640         err = dsl_pool_hold(dsname, FTAG, &dp);
 641         if (err != 0)
 642                 return (err);
 643         err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
 644         if (err != 0) {
 645                 dsl_pool_rele(dp, FTAG);
 646                 return (err);
 647         }
 648 
 649         if (ds->ds_phys->ds_userrefs_obj != 0) {
 650                 zap_attribute_t *za;