Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013 by Delphix. All rights reserved.

  24  */
  25 
  26 #include <sys/zfs_context.h>
  27 #include <sys/dsl_userhold.h>
  28 #include <sys/dsl_dataset.h>
  29 #include <sys/dsl_destroy.h>
  30 #include <sys/dsl_synctask.h>
  31 #include <sys/dmu_tx.h>
  32 #include <sys/zfs_onexit.h>
  33 #include <sys/dsl_pool.h>
  34 #include <sys/dsl_dir.h>
  35 #include <sys/zfs_ioctl.h>
  36 #include <sys/zap.h>
  37 
  38 typedef struct dsl_dataset_user_hold_arg {
  39         nvlist_t *dduha_holds;

  40         nvlist_t *dduha_errlist;
  41         minor_t dduha_minor;
  42 } dsl_dataset_user_hold_arg_t;
  43 
  44 /*
  45  * If you add new checks here, you may need to add additional checks to the
  46  * "temporary" case in snapshot_check() in dmu_objset.c.
  47  */
  48 int
  49 dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
  50     boolean_t temphold, dmu_tx_t *tx)
  51 {
  52         dsl_pool_t *dp = dmu_tx_pool(tx);
  53         objset_t *mos = dp->dp_meta_objset;
  54         int error = 0;
  55 


  56         if (strlen(htag) > MAXNAMELEN)
  57                 return (E2BIG);
  58         /* Tempholds have a more restricted length */
  59         if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
  60                 return (E2BIG);
  61 
  62         /* tags must be unique (if ds already exists) */
  63         if (ds != NULL) {
  64                 mutex_enter(&ds->ds_lock);
  65                 if (ds->ds_phys->ds_userrefs_obj != 0) {
  66                         uint64_t value;

  67                         error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
  68                             htag, 8, 1, &value);
  69                         if (error == 0)
  70                                 error = SET_ERROR(EEXIST);
  71                         else if (error == ENOENT)
  72                                 error = 0;
  73                 }
  74                 mutex_exit(&ds->ds_lock);
  75         }
  76 
  77         return (error);
  78 }
  79 
  80 static int
  81 dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
  82 {
  83         dsl_dataset_user_hold_arg_t *dduha = arg;
  84         dsl_pool_t *dp = dmu_tx_pool(tx);
  85         nvpair_t *pair;
  86         int rv = 0;
  87 
  88         if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
  89                 return (SET_ERROR(ENOTSUP));
  90 
  91         for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
  92             pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
  93                 int error = 0;


  94                 dsl_dataset_t *ds;
  95                 char *htag;

  96 
  97                 /* must be a snapshot */
  98                 if (strchr(nvpair_name(pair), '@') == NULL)

  99                         error = SET_ERROR(EINVAL);
 100 
 101                 if (error == 0)
 102                         error = nvpair_value_string(pair, &htag);
 103                 if (error == 0) {
 104                         error = dsl_dataset_hold(dp,
 105                             nvpair_name(pair), FTAG, &ds);
 106                 }
 107                 if (error == 0) {
 108                         error = dsl_dataset_user_hold_check_one(ds, htag,
 109                             dduha->dduha_minor != 0, tx);
 110                         dsl_dataset_rele(ds, FTAG);
 111                 }
 112 
 113                 if (error != 0) {
 114                         rv = error;
 115                         fnvlist_add_int32(dduha->dduha_errlist,
 116                             nvpair_name(pair), error);






 117                 }
 118         }
 119         return (rv);





 120 }
 121 
 122 void
 123 dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
 124     minor_t minor, uint64_t now, dmu_tx_t *tx)

 125 {
 126         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 127         objset_t *mos = dp->dp_meta_objset;
 128         uint64_t zapobj;
 129 
 130         mutex_enter(&ds->ds_lock);

 131         if (ds->ds_phys->ds_userrefs_obj == 0) {
 132                 /*
 133                  * This is the first user hold for this dataset.  Create
 134                  * the userrefs zap object.
 135                  */
 136                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
 137                 zapobj = ds->ds_phys->ds_userrefs_obj =
 138                     zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
 139         } else {
 140                 zapobj = ds->ds_phys->ds_userrefs_obj;
 141         }
 142         ds->ds_userrefs++;
 143         mutex_exit(&ds->ds_lock);
 144 
 145         VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 146 
 147         if (minor != 0) {



 148                 VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
 149                     htag, now, tx));
 150                 dsl_register_onexit_hold_cleanup(ds, htag, minor);










 151         }
 152 
 153         spa_history_log_internal_ds(ds, "hold", tx,
 154             "tag=%s temp=%d refs=%llu",
 155             htag, minor != 0, ds->ds_userrefs);
 156 }
 157 





































































 158 static void
 159 dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
 160 {
 161         dsl_dataset_user_hold_arg_t *dduha = arg;
 162         dsl_pool_t *dp = dmu_tx_pool(tx);
 163         nvpair_t *pair;
 164         uint64_t now = gethrestime_sec();
 165 
 166         for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
 167             pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {





 168                 dsl_dataset_t *ds;

 169                 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 170                 dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
 171                     dduha->dduha_minor, now, tx);
 172                 dsl_dataset_rele(ds, FTAG);
 173         }

 174 }
 175 
 176 /*




 177  * holds is nvl of snapname -> holdname
 178  * errlist will be filled in with snapname -> error
 179  * if cleanup_minor is not 0, the holds will be temporary, cleaned up
 180  * when the process exits.
 181  *
 182  * if any fails, all will fail.

















 183  */
 184 int
 185 dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
 186 {
 187         dsl_dataset_user_hold_arg_t dduha;
 188         nvpair_t *pair;

 189 
 190         pair = nvlist_next_nvpair(holds, NULL);
 191         if (pair == NULL)
 192                 return (0);
 193 
 194         dduha.dduha_holds = holds;

 195         dduha.dduha_errlist = errlist;
 196         dduha.dduha_minor = cleanup_minor;
 197 
 198         return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
 199             dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));



 200 }
 201 



 202 typedef struct dsl_dataset_user_release_arg {

 203         nvlist_t *ddura_holds;
 204         nvlist_t *ddura_todelete;
 205         nvlist_t *ddura_errlist;

 206 } dsl_dataset_user_release_arg_t;
 207 

 208 static int
 209 dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
 210     nvlist_t *holds, boolean_t *todelete)
 211 {
 212         uint64_t zapobj;
 213         nvpair_t *pair;
 214         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 215         int error;
 216         int numholds = 0;
 217 
 218         *todelete = B_FALSE;







 219 
 220         if (!dsl_dataset_is_snapshot(ds))
 221                 return (SET_ERROR(EINVAL));
 222 





 223         zapobj = ds->ds_phys->ds_userrefs_obj;
 224         if (zapobj == 0)
 225                 return (SET_ERROR(ESRCH));
 226 
 227         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 228             pair = nvlist_next_nvpair(holds, pair)) {
 229                 /* Make sure the hold exists */
 230                 uint64_t tmp;
 231                 error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
 232                 if (error == ENOENT)
 233                         error = SET_ERROR(ESRCH);
 234                 if (error != 0)





















 235                         return (error);



 236                 numholds++;
 237         }
 238 
 239         if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
 240             ds->ds_userrefs == numholds) {
 241                 /* we need to destroy the snapshot as well */
 242 
 243                 if (dsl_dataset_long_held(ds))
 244                         return (SET_ERROR(EBUSY));
 245                 *todelete = B_TRUE;
 246         }









 247         return (0);
 248 }
 249 
 250 static int
 251 dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
 252 {
 253         dsl_dataset_user_release_arg_t *ddura = arg;
 254         dsl_pool_t *dp = dmu_tx_pool(tx);
 255         nvpair_t *pair;
 256         int rv = 0;
 257 
 258         if (!dmu_tx_is_syncing(tx))
 259                 return (0);
 260 
 261         for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 262             pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
 263                 const char *name = nvpair_name(pair);






 264                 int error;
 265                 dsl_dataset_t *ds;
 266                 nvlist_t *holds;

 267 
 268                 error = nvpair_value_nvlist(pair, &holds);
 269                 if (error != 0)
 270                         return (SET_ERROR(EINVAL));
 271 
 272                 error = dsl_dataset_hold(dp, name, FTAG, &ds);
 273                 if (error == 0) {
 274                         boolean_t deleteme;
 275                         error = dsl_dataset_user_release_check_one(ds,
 276                             holds, &deleteme);
 277                         if (error == 0 && deleteme) {
 278                                 fnvlist_add_boolean(ddura->ddura_todelete,
 279                                     name);
 280                         }
 281                         dsl_dataset_rele(ds, FTAG);
 282                 }
 283                 if (error != 0) {
 284                         if (ddura->ddura_errlist != NULL) {
 285                                 fnvlist_add_int32(ddura->ddura_errlist,
 286                                     name, error);
 287                         }
 288                         rv = error;





 289                 }
 290         }
 291         return (rv);





 292 }
 293 
 294 static void
 295 dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
 296     dmu_tx_t *tx)
 297 {
 298         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 299         objset_t *mos = dp->dp_meta_objset;
 300         uint64_t zapobj;
 301         int error;
 302         nvpair_t *pair;
 303 
 304         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 305             pair = nvlist_next_nvpair(holds, pair)) {
 306                 ds->ds_userrefs--;
 307                 error = dsl_pool_user_release(dp, ds->ds_object,
 308                     nvpair_name(pair), tx);


 309                 VERIFY(error == 0 || error == ENOENT);
 310                 zapobj = ds->ds_phys->ds_userrefs_obj;
 311                 VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));


 312 
 313                 spa_history_log_internal_ds(ds, "release", tx,
 314                     "tag=%s refs=%lld", nvpair_name(pair),
 315                     (longlong_t)ds->ds_userrefs);
 316         }
 317 }
 318 
 319 static void
 320 dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
 321 {
 322         dsl_dataset_user_release_arg_t *ddura = arg;

 323         dsl_pool_t *dp = dmu_tx_pool(tx);
 324         nvpair_t *pair;
 325 
 326         for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 327             pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {



 328                 dsl_dataset_t *ds;



 329 
 330                 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 331                 dsl_dataset_user_release_sync_one(ds,
 332                     fnvpair_value_nvlist(pair), tx);
 333                 if (nvlist_exists(ddura->ddura_todelete,
 334                     nvpair_name(pair))) {
 335                         ASSERT(ds->ds_userrefs == 0 &&
 336                             ds->ds_phys->ds_num_children == 1 &&
 337                             DS_IS_DEFER_DESTROY(ds));
 338                         dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 339                 }
 340                 dsl_dataset_rele(ds, FTAG);
 341         }
 342 }
 343 
 344 /*






 345  * holds is nvl of snapname -> { holdname, ... }
 346  * errlist will be filled in with snapname -> error
 347  *
 348  * if any fails, all will fail.









 349  */
 350 int
 351 dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)

 352 {
 353         dsl_dataset_user_release_arg_t ddura;
 354         nvpair_t *pair;

 355         int error;
 356 
 357         pair = nvlist_next_nvpair(holds, NULL);
 358         if (pair == NULL)
 359                 return (0);
 360 
 361         ddura.ddura_holds = holds;
 362         ddura.ddura_errlist = errlist;
 363         ddura.ddura_todelete = fnvlist_alloc();
 364 
 365         error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
 366             dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
 367         fnvlist_free(ddura.ddura_todelete);
 368         return (error);
 369 }
 370 
 371 typedef struct dsl_dataset_user_release_tmp_arg {
 372         uint64_t ddurta_dsobj;
 373         nvlist_t *ddurta_holds;
 374         boolean_t ddurta_deleteme;
 375 } dsl_dataset_user_release_tmp_arg_t;
 376 
 377 static int
 378 dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
 379 {
 380         dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 381         dsl_pool_t *dp = dmu_tx_pool(tx);
 382         dsl_dataset_t *ds;
 383         int error;
 384 
 385         if (!dmu_tx_is_syncing(tx))
 386                 return (0);
 387 
 388         error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
 389         if (error)
 390                 return (error);
 391 
 392         error = dsl_dataset_user_release_check_one(ds,
 393             ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
 394         dsl_dataset_rele(ds, FTAG);
 395         return (error);
 396 }
 397 
 398 static void
 399 dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
 400 {
 401         dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 402         dsl_pool_t *dp = dmu_tx_pool(tx);
 403         dsl_dataset_t *ds;
 404 
 405         VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
 406         dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
 407         if (ddurta->ddurta_deleteme) {
 408                 ASSERT(ds->ds_userrefs == 0 &&
 409                     ds->ds_phys->ds_num_children == 1 &&
 410                     DS_IS_DEFER_DESTROY(ds));
 411                 dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 412         }
 413         dsl_dataset_rele(ds, FTAG);
 414 }
 415 
 416 /*
 417  * Called at spa_load time to release a stale temporary user hold.
 418  * Also called by the onexit code.
 419  */
 420 void
 421 dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
 422 {
 423         dsl_dataset_user_release_tmp_arg_t ddurta;




 424         dsl_dataset_t *ds;
 425         int error;
 426 
 427 #ifdef _KERNEL
 428         /* Make sure it is not mounted. */
 429         dsl_pool_config_enter(dp, FTAG);
 430         error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
 431         if (error == 0) {
 432                 char name[MAXNAMELEN];
 433                 dsl_dataset_name(ds, name);
 434                 dsl_dataset_rele(ds, FTAG);
 435                 dsl_pool_config_exit(dp, FTAG);
 436                 zfs_unmount_snap(name);




 437         } else {
 438                 dsl_pool_config_exit(dp, FTAG);






 439         }
 440 #endif

 441 
 442         ddurta.ddurta_dsobj = dsobj;
 443         ddurta.ddurta_holds = fnvlist_alloc();
 444         fnvlist_add_boolean(ddurta.ddurta_holds, htag);
 445 
 446         (void) dsl_sync_task(spa_name(dp->dp_spa),
 447             dsl_dataset_user_release_tmp_check,
 448             dsl_dataset_user_release_tmp_sync, &ddurta, 1);
 449         fnvlist_free(ddurta.ddurta_holds);
 450 }
 451 
 452 typedef struct zfs_hold_cleanup_arg {
 453         char zhca_spaname[MAXNAMELEN];
 454         uint64_t zhca_spa_load_guid;
 455         uint64_t zhca_dsobj;
 456         char zhca_htag[MAXNAMELEN];
 457 } zfs_hold_cleanup_arg_t;
 458 
 459 static void
 460 dsl_dataset_user_release_onexit(void *arg)
 461 {
 462         zfs_hold_cleanup_arg_t *ca = arg;
 463         spa_t *spa;
 464         int error;
 465 
 466         error = spa_open(ca->zhca_spaname, &spa, FTAG);
 467         if (error != 0) {
 468                 zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 469                     "because pool is no longer loaded",
 470                     ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 471                 return;
 472         }
 473         if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
 474                 zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 475                     "because pool is no longer loaded (guid doesn't match)",
 476                     ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 477                 spa_close(spa, FTAG);
 478                 return;
 479         }
 480 
 481         dsl_dataset_user_release_tmp(spa_get_dsl(spa),
 482             ca->zhca_dsobj, ca->zhca_htag);
 483         kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
 484         spa_close(spa, FTAG);




 485 }
 486 



 487 void
 488 dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
 489     minor_t minor)
 490 {
 491         zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
 492         spa_t *spa = dsl_dataset_get_spa(ds);
 493         (void) strlcpy(ca->zhca_spaname, spa_name(spa),
 494             sizeof (ca->zhca_spaname));
 495         ca->zhca_spa_load_guid = spa_load_guid(spa);
 496         ca->zhca_dsobj = ds->ds_object;
 497         (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
 498         VERIFY0(zfs_onexit_add_cb(minor,
 499             dsl_dataset_user_release_onexit, ca, NULL));
 500 }
 501 
 502 int
 503 dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
 504 {
 505         dsl_pool_t *dp;
 506         dsl_dataset_t *ds;
 507         int err;
 508 
 509         err = dsl_pool_hold(dsname, FTAG, &dp);
 510         if (err != 0)
 511                 return (err);
 512         err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
 513         if (err != 0) {
 514                 dsl_pool_rele(dp, FTAG);
 515                 return (err);
 516         }
 517 
 518         if (ds->ds_phys->ds_userrefs_obj != 0) {
 519                 zap_attribute_t *za;


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013 by Delphix. All rights reserved.
  24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  25  */
  26 
  27 #include <sys/zfs_context.h>
  28 #include <sys/dsl_userhold.h>
  29 #include <sys/dsl_dataset.h>
  30 #include <sys/dsl_destroy.h>
  31 #include <sys/dsl_synctask.h>
  32 #include <sys/dmu_tx.h>
  33 #include <sys/zfs_onexit.h>
  34 #include <sys/dsl_pool.h>
  35 #include <sys/dsl_dir.h>
  36 #include <sys/zfs_ioctl.h>
  37 #include <sys/zap.h>
  38 
  39 typedef struct dsl_dataset_user_hold_arg {
  40         nvlist_t *dduha_holds;
  41         nvlist_t *dduha_chkholds;
  42         nvlist_t *dduha_errlist;
  43         minor_t dduha_minor;
  44 } dsl_dataset_user_hold_arg_t;
  45 
  46 /*
  47  * If you add new checks here, you may need to add additional checks to the
  48  * "temporary" case in snapshot_check() in dmu_objset.c.
  49  */
  50 int
  51 dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
  52     boolean_t temphold, dmu_tx_t *tx)
  53 {
  54         dsl_pool_t *dp = dmu_tx_pool(tx);
  55         objset_t *mos = dp->dp_meta_objset;
  56         int error = 0;
  57 
  58         ASSERT(dsl_pool_config_held(dp));
  59 
  60         if (strlen(htag) > MAXNAMELEN)
  61                 return (SET_ERROR(E2BIG));
  62         /* Tempholds have a more restricted length */
  63         if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
  64                 return (SET_ERROR(E2BIG));
  65 
  66         /* tags must be unique (if ds already exists) */
  67         if (ds != NULL && ds->ds_phys->ds_userrefs_obj != 0) {


  68                 uint64_t value;
  69 
  70                 error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
  71                     htag, 8, 1, &value);
  72                 if (error == 0)
  73                         error = SET_ERROR(EEXIST);
  74                 else if (error == ENOENT)
  75                         error = 0;
  76         }


  77 
  78         return (error);
  79 }
  80 
  81 static int
  82 dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
  83 {
  84         dsl_dataset_user_hold_arg_t *dduha = arg;
  85         dsl_pool_t *dp = dmu_tx_pool(tx);


  86 
  87         if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
  88                 return (SET_ERROR(ENOTSUP));
  89 
  90         if (!dmu_tx_is_syncing(tx))
  91                 return (0);
  92 
  93         for (nvpair_t *pair = nvlist_next_nvpair(dduha->dduha_holds, NULL);
  94             pair != NULL; pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
  95                 dsl_dataset_t *ds;
  96                 int error = 0;
  97                 char *htag, *name;
  98 
  99                 /* must be a snapshot */
 100                 name = nvpair_name(pair);
 101                 if (strchr(name, '@') == NULL)
 102                         error = SET_ERROR(EINVAL);
 103 
 104                 if (error == 0)
 105                         error = nvpair_value_string(pair, &htag);
 106 
 107                 if (error == 0)
 108                         error = dsl_dataset_hold(dp, name, FTAG, &ds);
 109 
 110                 if (error == 0) {
 111                         error = dsl_dataset_user_hold_check_one(ds, htag,
 112                             dduha->dduha_minor != 0, tx);
 113                         dsl_dataset_rele(ds, FTAG);
 114                 }
 115 
 116                 if (error == 0) {
 117                         fnvlist_add_string(dduha->dduha_chkholds, name, htag);
 118                 } else {
 119                         /*
 120                          * We register ENOENT errors so they can be correctly
 121                          * reported if needed, such as when all holds fail.
 122                          */
 123                         fnvlist_add_int32(dduha->dduha_errlist, name, error);
 124                         if (error != ENOENT)
 125                                 return (error);
 126                 }
 127         }
 128 
 129         /* Return ENOENT if no holds would be created. */
 130         if (nvlist_empty(dduha->dduha_chkholds))
 131                 return (SET_ERROR(ENOENT));
 132 
 133         return (0);
 134 }
 135 
 136 
 137 static void
 138 dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds,
 139     const char *htag, minor_t minor, uint64_t now, dmu_tx_t *tx)
 140 {
 141         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 142         objset_t *mos = dp->dp_meta_objset;
 143         uint64_t zapobj;
 144 
 145         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 146 
 147         if (ds->ds_phys->ds_userrefs_obj == 0) {
 148                 /*
 149                  * This is the first user hold for this dataset.  Create
 150                  * the userrefs zap object.
 151                  */
 152                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
 153                 zapobj = ds->ds_phys->ds_userrefs_obj =
 154                     zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
 155         } else {
 156                 zapobj = ds->ds_phys->ds_userrefs_obj;
 157         }
 158         ds->ds_userrefs++;

 159 
 160         VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 161 
 162         if (minor != 0) {
 163                 char name[MAXNAMELEN];
 164                 nvlist_t *tags;
 165 
 166                 VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
 167                     htag, now, tx));
 168                 (void) snprintf(name, sizeof (name), "%llx",
 169                     (u_longlong_t)ds->ds_object);
 170 
 171                 if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) {
 172                         tags = fnvlist_alloc();
 173                         fnvlist_add_boolean(tags, htag);
 174                         fnvlist_add_nvlist(tmpholds, name, tags);
 175                         fnvlist_free(tags);
 176                 } else {
 177                         fnvlist_add_boolean(tags, htag);
 178                 }
 179         }
 180 
 181         spa_history_log_internal_ds(ds, "hold", tx,
 182             "tag=%s temp=%d refs=%llu",
 183             htag, minor != 0, ds->ds_userrefs);
 184 }
 185 
 186 typedef struct zfs_hold_cleanup_arg {
 187         char zhca_spaname[MAXNAMELEN];
 188         uint64_t zhca_spa_load_guid;
 189         nvlist_t *zhca_holds;
 190 } zfs_hold_cleanup_arg_t;
 191 
 192 static void
 193 dsl_dataset_user_release_onexit(void *arg)
 194 {
 195         zfs_hold_cleanup_arg_t *ca = arg;
 196         spa_t *spa;
 197         int error;
 198 
 199         error = spa_open(ca->zhca_spaname, &spa, FTAG);
 200         if (error != 0) {
 201                 zfs_dbgmsg("couldn't release holds on pool=%s "
 202                     "because pool is no longer loaded",
 203                     ca->zhca_spaname);
 204                 return;
 205         }
 206         if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
 207                 zfs_dbgmsg("couldn't release holds on pool=%s "
 208                     "because pool is no longer loaded (guid doesn't match)",
 209                     ca->zhca_spaname);
 210                 spa_close(spa, FTAG);
 211                 return;
 212         }
 213 
 214         (void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds);
 215         fnvlist_free(ca->zhca_holds);
 216         kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
 217         spa_close(spa, FTAG);
 218 }
 219 
 220 static void
 221 dsl_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor)
 222 {
 223         zfs_hold_cleanup_arg_t *ca;
 224 
 225         if (minor == 0 || nvlist_empty(holds)) {
 226                 fnvlist_free(holds);
 227                 return;
 228         }
 229 
 230         ASSERT(spa != NULL);
 231         ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
 232 
 233         (void) strlcpy(ca->zhca_spaname, spa_name(spa),
 234             sizeof (ca->zhca_spaname));
 235         ca->zhca_spa_load_guid = spa_load_guid(spa);
 236         ca->zhca_holds = holds;
 237         VERIFY0(zfs_onexit_add_cb(minor,
 238             dsl_dataset_user_release_onexit, ca, NULL));
 239 }
 240 
 241 void
 242 dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
 243     minor_t minor, uint64_t now, dmu_tx_t *tx)
 244 {
 245         nvlist_t *tmpholds;
 246 
 247         if (minor != 0)
 248                 tmpholds = fnvlist_alloc();
 249         else
 250                 tmpholds = NULL;
 251         dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, htag, minor, now, tx);
 252         dsl_onexit_hold_cleanup(dsl_dataset_get_spa(ds), tmpholds, minor);
 253 }
 254 
 255 static void
 256 dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
 257 {
 258         dsl_dataset_user_hold_arg_t *dduha = arg;
 259         dsl_pool_t *dp = dmu_tx_pool(tx);
 260         nvlist_t *tmpholds;
 261         uint64_t now = gethrestime_sec();
 262 
 263         if (dduha->dduha_minor != 0)
 264                 tmpholds = fnvlist_alloc();
 265         else
 266                 tmpholds = NULL;
 267         for (nvpair_t *pair = nvlist_next_nvpair(dduha->dduha_chkholds, NULL);
 268             pair != NULL;
 269             pair = nvlist_next_nvpair(dduha->dduha_chkholds, pair)) {
 270                 dsl_dataset_t *ds;
 271 
 272                 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 273                 dsl_dataset_user_hold_sync_one_impl(tmpholds, ds,
 274                     fnvpair_value_string(pair), dduha->dduha_minor, now, tx);
 275                 dsl_dataset_rele(ds, FTAG);
 276         }
 277         dsl_onexit_hold_cleanup(dp->dp_spa, tmpholds, dduha->dduha_minor);
 278 }
 279 
 280 /*
 281  * The full semantics of this function are described in the comment above
 282  * lzc_hold().
 283  *
 284  * To summarize:
 285  * holds is nvl of snapname -> holdname
 286  * errlist will be filled in with snapname -> error


 287  *
 288  * The snaphosts must all be in the same pool.
 289  *
 290  * Holds for snapshots that don't exist will be skipped.
 291  *
 292  * If none of the snapshots for requested holds exist then ENOENT will be
 293  * returned.
 294  *
 295  * If cleanup_minor is not 0, the holds will be temporary, which will be cleaned
 296  * up when the process exits.
 297  *
 298  * On success all the holds, for snapshots that existed, will be created and 0
 299  * will be returned.
 300  *
 301  * On failure no holds will be created, the errlist will be filled in,
 302  * and an errno will returned.
 303  *
 304  * In all cases the errlist will contain entries for holds where the snapshot
 305  * didn't exist.
 306  */
 307 int
 308 dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
 309 {
 310         dsl_dataset_user_hold_arg_t dduha;
 311         nvpair_t *pair;
 312         int ret;
 313 
 314         pair = nvlist_next_nvpair(holds, NULL);
 315         if (pair == NULL)
 316                 return (0);
 317 
 318         dduha.dduha_holds = holds;
 319         dduha.dduha_chkholds = fnvlist_alloc();
 320         dduha.dduha_errlist = errlist;
 321         dduha.dduha_minor = cleanup_minor;
 322 
 323         ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
 324             dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds));
 325         fnvlist_free(dduha.dduha_chkholds);
 326 
 327         return (ret);
 328 }
 329 
 330 typedef int (dsl_holdfunc_t)(dsl_pool_t *dp, const char *name, void *tag,
 331     dsl_dataset_t **dsp);
 332 
 333 typedef struct dsl_dataset_user_release_arg {
 334         dsl_holdfunc_t *ddura_holdfunc;
 335         nvlist_t *ddura_holds;
 336         nvlist_t *ddura_todelete;
 337         nvlist_t *ddura_errlist;
 338         nvlist_t *ddura_chkholds;
 339 } dsl_dataset_user_release_arg_t;
 340 
 341 /* Place a dataset hold on the snapshot identified by passed dsobj string */
 342 static int
 343 dsl_dataset_hold_obj_string(dsl_pool_t *dp, const char *dsobj, void *tag,
 344     dsl_dataset_t **dsp)
 345 {
 346         return (dsl_dataset_hold_obj(dp, strtonum(dsobj, NULL), tag, dsp));
 347 }



 348 
 349 static int
 350 dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura,
 351     dsl_dataset_t *ds, nvlist_t *holds, const char *snapname)
 352 {
 353         uint64_t zapobj;
 354         nvlist_t *holds_found;
 355         objset_t *mos;
 356         int numholds;
 357 
 358         if (!dsl_dataset_is_snapshot(ds))
 359                 return (SET_ERROR(EINVAL));
 360 
 361         if (nvlist_empty(holds))
 362                 return (0);
 363 
 364         numholds = 0;
 365         mos = ds->ds_dir->dd_pool->dp_meta_objset;
 366         zapobj = ds->ds_phys->ds_userrefs_obj;
 367         holds_found = fnvlist_alloc();

 368 
 369         for (nvpair_t *pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 370             pair = nvlist_next_nvpair(holds, pair)) {

 371                 uint64_t tmp;
 372                 int error;
 373                 const char *holdname = nvpair_name(pair);
 374 
 375                 if (zapobj != 0)
 376                         error = zap_lookup(mos, zapobj, holdname, 8, 1, &tmp);
 377                 else
 378                         error = SET_ERROR(ENOENT);
 379 
 380                 /*
 381                  * Non-existent holds are put on the errlist, but don't
 382                  * cause an overall failure.
 383                  */
 384                 if (error == ENOENT) {
 385                         if (ddura->ddura_errlist != NULL) {
 386                                 char *errtag = kmem_asprintf("%s#%s",
 387                                     snapname, holdname);
 388                                 fnvlist_add_int32(ddura->ddura_errlist, errtag,
 389                                     ENOENT);
 390                                 strfree(errtag);
 391                         }
 392                         continue;
 393                 }
 394 
 395                 if (error != 0) {
 396                         fnvlist_free(holds_found);
 397                         return (error);
 398                 }
 399 
 400                 fnvlist_add_boolean(holds_found, holdname);
 401                 numholds++;
 402         }
 403 
 404         if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
 405             ds->ds_userrefs == numholds) {
 406                 /* we need to destroy the snapshot as well */
 407                 if (dsl_dataset_long_held(ds)) {
 408                         fnvlist_free(holds_found);
 409                         return (SET_ERROR(EBUSY));

 410                 }
 411                 fnvlist_add_boolean(ddura->ddura_todelete, snapname);
 412         }
 413 
 414         if (numholds != 0) {
 415                 fnvlist_add_nvlist(ddura->ddura_chkholds, snapname,
 416                     holds_found);
 417         }
 418         fnvlist_free(holds_found);
 419 
 420         return (0);
 421 }
 422 
 423 static int
 424 dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
 425 {
 426         dsl_dataset_user_release_arg_t *ddura;
 427         dsl_holdfunc_t *holdfunc;
 428         dsl_pool_t *dp;

 429 
 430         if (!dmu_tx_is_syncing(tx))
 431                 return (0);
 432 
 433         dp = dmu_tx_pool(tx);
 434 
 435         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 436 
 437         ddura = arg;
 438         holdfunc = ddura->ddura_holdfunc;
 439 
 440         for (nvpair_t *pair = nvlist_next_nvpair(ddura->ddura_holds, NULL);
 441             pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
 442                 int error;
 443                 dsl_dataset_t *ds;
 444                 nvlist_t *holds;
 445                 const char *snapname = nvpair_name(pair);
 446 
 447                 error = nvpair_value_nvlist(pair, &holds);
 448                 if (error != 0)
 449                         error = (SET_ERROR(EINVAL));
 450                 else
 451                         error = holdfunc(dp, snapname, FTAG, &ds);
 452                 if (error == 0) {
 453                         error = dsl_dataset_user_release_check_one(ddura, ds,
 454                             holds, snapname);





 455                         dsl_dataset_rele(ds, FTAG);
 456                 }
 457                 if (error != 0) {
 458                         if (ddura->ddura_errlist != NULL) {
 459                                 fnvlist_add_int32(ddura->ddura_errlist,
 460                                     snapname, error);
 461                         }
 462                         /*
 463                          * Non-existent snapshots are put on the errlist,
 464                          * but don't cause an overall failure.
 465                          */
 466                         if (error != ENOENT)
 467                                 return (error);
 468                 }
 469         }
 470 
 471         /* Return ENOENT if none of the holds existed. */
 472         if (nvlist_empty(ddura->ddura_chkholds))
 473                 return (SET_ERROR(ENOENT));
 474 
 475         return (0);
 476 }
 477 
 478 static void
 479 dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
 480     dmu_tx_t *tx)
 481 {
 482         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 483         objset_t *mos = dp->dp_meta_objset;



 484 
 485         for (nvpair_t *pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 486             pair = nvlist_next_nvpair(holds, pair)) {
 487                 int error;
 488                 const char *holdname = nvpair_name(pair);
 489 
 490                 /* Remove temporary hold if one exists. */
 491                 error = dsl_pool_user_release(dp, ds->ds_object, holdname, tx);
 492                 VERIFY(error == 0 || error == ENOENT);
 493 
 494                 VERIFY0(zap_remove(mos, ds->ds_phys->ds_userrefs_obj, holdname,
 495                     tx));
 496                 ds->ds_userrefs--;
 497 
 498                 spa_history_log_internal_ds(ds, "release", tx,
 499                     "tag=%s refs=%lld", holdname, (longlong_t)ds->ds_userrefs);

 500         }
 501 }
 502 
 503 static void
 504 dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
 505 {
 506         dsl_dataset_user_release_arg_t *ddura = arg;
 507         dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc;
 508         dsl_pool_t *dp = dmu_tx_pool(tx);

 509 
 510         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 511 
 512         for (nvpair_t *pair = nvlist_next_nvpair(ddura->ddura_chkholds, NULL);
 513             pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_chkholds,
 514             pair)) {
 515                 dsl_dataset_t *ds;
 516                 const char *name = nvpair_name(pair);
 517 
 518                 VERIFY0(holdfunc(dp, name, FTAG, &ds));
 519 

 520                 dsl_dataset_user_release_sync_one(ds,
 521                     fnvpair_value_nvlist(pair), tx);
 522                 if (nvlist_exists(ddura->ddura_todelete, name)) {

 523                         ASSERT(ds->ds_userrefs == 0 &&
 524                             ds->ds_phys->ds_num_children == 1 &&
 525                             DS_IS_DEFER_DESTROY(ds));
 526                         dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 527                 }
 528                 dsl_dataset_rele(ds, FTAG);
 529         }
 530 }
 531 
 532 /*
 533  * The full semantics of this function are described in the comment above
 534  * lzc_release().
 535  *
 536  * To summarize:
 537  * Releases holds specified in the nvl holds.
 538  *
 539  * holds is nvl of snapname -> { holdname, ... }
 540  * errlist will be filled in with snapname -> error
 541  *
 542  * If tmpdp is not NULL the names for holds should be the dsobj's of snapshots,
 543  * otherwise they should be the names of shapshots.
 544  *
 545  * As a release may cause snapshots to be destroyed this trys to ensure they
 546  * aren't mounted.
 547  *
 548  * The release of non-existent holds are skipped.
 549  *
 550  * At least one hold must have been released for the this function to succeed
 551  * and return 0.
 552  */
 553 static int
 554 dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
 555     dsl_pool_t *tmpdp)
 556 {
 557         dsl_dataset_user_release_arg_t ddura;
 558         nvpair_t *pair;
 559         char *pool;
 560         int error;
 561 
 562         pair = nvlist_next_nvpair(holds, NULL);
 563         if (pair == NULL)
 564                 return (0);
 565 
 566         /*
 567          * The release may cause snapshots to be destroyed; make sure they
 568          * are not mounted.























































 569          */
 570         if (tmpdp != NULL) {
 571                 /* Temporary holds are specified by dsobj string. */
 572                 ddura.ddura_holdfunc = dsl_dataset_hold_obj_string;
 573                 pool = spa_name(tmpdp->dp_spa);
 574 #ifdef _KERNEL
 575                 dsl_pool_config_enter(tmpdp, FTAG);
 576                 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 577                     pair = nvlist_next_nvpair(holds, pair)) {
 578                         dsl_dataset_t *ds;

 579 
 580                         error = dsl_dataset_hold_obj_string(tmpdp,
 581                             nvpair_name(pair), FTAG, &ds);


 582                         if (error == 0) {
 583                                 char name[MAXNAMELEN];
 584                                 dsl_dataset_name(ds, name);
 585                                 dsl_dataset_rele(ds, FTAG);

 586                                 zfs_unmount_snap(name);
 587                         }
 588                 }
 589                 dsl_pool_config_exit(tmpdp, FTAG);
 590 #endif
 591         } else {
 592                 /* Non-temporary holds are specified by name. */
 593                 ddura.ddura_holdfunc = dsl_dataset_hold;
 594                 pool = nvpair_name(pair);
 595 #ifdef _KERNEL
 596                 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 597                     pair = nvlist_next_nvpair(holds, pair)) {
 598                         zfs_unmount_snap(nvpair_name(pair));
 599                 }
 600 #endif
 601         }
 602 
 603         ddura.ddura_holds = holds;
 604         ddura.ddura_errlist = errlist;
 605         ddura.ddura_todelete = fnvlist_alloc();
 606         ddura.ddura_chkholds = fnvlist_alloc();












 607 
 608         error = dsl_sync_task(pool, dsl_dataset_user_release_check,
 609             dsl_dataset_user_release_sync, &ddura,
 610             fnvlist_num_pairs(holds));
 611         fnvlist_free(ddura.ddura_todelete);
 612         fnvlist_free(ddura.ddura_chkholds);

 613 
 614         return (error);
 615 }












 616 
 617 /*
 618  * holds is nvl of snapname -> { holdname, ... }
 619  * errlist will be filled in with snapname -> error
 620  */
 621 int
 622 dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
 623 {
 624         return (dsl_dataset_user_release_impl(holds, errlist, NULL));
 625 }
 626 
 627 /*
 628  * holds is nvl of snapdsobj -> { holdname, ... }
 629  */
 630 void
 631 dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds)

 632 {
 633         ASSERT(dp != NULL);
 634         (void) dsl_dataset_user_release_impl(holds, NULL, dp);







 635 }
 636 
 637 int
 638 dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
 639 {
 640         dsl_pool_t *dp;
 641         dsl_dataset_t *ds;
 642         int err;
 643 
 644         err = dsl_pool_hold(dsname, FTAG, &dp);
 645         if (err != 0)
 646                 return (err);
 647         err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
 648         if (err != 0) {
 649                 dsl_pool_rele(dp, FTAG);
 650                 return (err);
 651         }
 652 
 653         if (ds->ds_phys->ds_userrefs_obj != 0) {
 654                 zap_attribute_t *za;