Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/dsl_userhold.c
          +++ new/usr/src/uts/common/fs/zfs/dsl_userhold.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2013 by Delphix. All rights reserved.
       24 + * Copyright (c) 2013 Steven Hartland. All rights reserved.
  24   25   */
  25   26  
  26   27  #include <sys/zfs_context.h>
  27   28  #include <sys/dsl_userhold.h>
  28   29  #include <sys/dsl_dataset.h>
  29   30  #include <sys/dsl_destroy.h>
  30   31  #include <sys/dsl_synctask.h>
  31   32  #include <sys/dmu_tx.h>
  32   33  #include <sys/zfs_onexit.h>
  33   34  #include <sys/dsl_pool.h>
  34   35  #include <sys/dsl_dir.h>
  35   36  #include <sys/zfs_ioctl.h>
  36   37  #include <sys/zap.h>
  37   38  
  38   39  typedef struct dsl_dataset_user_hold_arg {
  39   40          nvlist_t *dduha_holds;
       41 +        nvlist_t *dduha_chkholds;
  40   42          nvlist_t *dduha_errlist;
  41   43          minor_t dduha_minor;
  42   44  } dsl_dataset_user_hold_arg_t;
  43   45  
  44   46  /*
  45   47   * If you add new checks here, you may need to add additional checks to the
  46   48   * "temporary" case in snapshot_check() in dmu_objset.c.
  47   49   */
  48   50  int
  49   51  dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
  50   52      boolean_t temphold, dmu_tx_t *tx)
  51   53  {
  52   54          dsl_pool_t *dp = dmu_tx_pool(tx);
  53   55          objset_t *mos = dp->dp_meta_objset;
  54   56          int error = 0;
  55   57  
       58 +        ASSERT(dsl_pool_config_held(dp));
       59 +
  56   60          if (strlen(htag) > MAXNAMELEN)
  57      -                return (E2BIG);
       61 +                return (SET_ERROR(E2BIG));
  58   62          /* Tempholds have a more restricted length */
  59   63          if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
  60      -                return (E2BIG);
       64 +                return (SET_ERROR(E2BIG));
  61   65  
  62   66          /* tags must be unique (if ds already exists) */
  63      -        if (ds != NULL) {
  64      -                mutex_enter(&ds->ds_lock);
  65      -                if (ds->ds_phys->ds_userrefs_obj != 0) {
  66      -                        uint64_t value;
  67      -                        error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
  68      -                            htag, 8, 1, &value);
  69      -                        if (error == 0)
  70      -                                error = SET_ERROR(EEXIST);
  71      -                        else if (error == ENOENT)
  72      -                                error = 0;
  73      -                }
  74      -                mutex_exit(&ds->ds_lock);
       67 +        if (ds != NULL && ds->ds_phys->ds_userrefs_obj != 0) {
       68 +                uint64_t value;
       69 +
       70 +                error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
       71 +                    htag, 8, 1, &value);
       72 +                if (error == 0)
       73 +                        error = SET_ERROR(EEXIST);
       74 +                else if (error == ENOENT)
       75 +                        error = 0;
  75   76          }
  76   77  
  77   78          return (error);
  78   79  }
  79   80  
  80   81  static int
  81   82  dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
  82   83  {
  83   84          dsl_dataset_user_hold_arg_t *dduha = arg;
  84   85          dsl_pool_t *dp = dmu_tx_pool(tx);
  85      -        nvpair_t *pair;
  86      -        int rv = 0;
  87   86  
  88   87          if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
  89   88                  return (SET_ERROR(ENOTSUP));
  90   89  
  91      -        for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
  92      -            pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
  93      -                int error = 0;
       90 +        if (!dmu_tx_is_syncing(tx))
       91 +                return (0);
       92 +
       93 +        for (nvpair_t *pair = nvlist_next_nvpair(dduha->dduha_holds, NULL);
       94 +            pair != NULL; pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
  94   95                  dsl_dataset_t *ds;
  95      -                char *htag;
       96 +                int error = 0;
       97 +                char *htag, *name;
  96   98  
  97   99                  /* must be a snapshot */
  98      -                if (strchr(nvpair_name(pair), '@') == NULL)
      100 +                name = nvpair_name(pair);
      101 +                if (strchr(name, '@') == NULL)
  99  102                          error = SET_ERROR(EINVAL);
 100  103  
 101  104                  if (error == 0)
 102  105                          error = nvpair_value_string(pair, &htag);
 103      -                if (error == 0) {
 104      -                        error = dsl_dataset_hold(dp,
 105      -                            nvpair_name(pair), FTAG, &ds);
 106      -                }
      106 +
      107 +                if (error == 0)
      108 +                        error = dsl_dataset_hold(dp, name, FTAG, &ds);
      109 +
 107  110                  if (error == 0) {
 108  111                          error = dsl_dataset_user_hold_check_one(ds, htag,
 109  112                              dduha->dduha_minor != 0, tx);
 110  113                          dsl_dataset_rele(ds, FTAG);
 111  114                  }
 112  115  
 113      -                if (error != 0) {
 114      -                        rv = error;
 115      -                        fnvlist_add_int32(dduha->dduha_errlist,
 116      -                            nvpair_name(pair), error);
      116 +                if (error == 0) {
      117 +                        fnvlist_add_string(dduha->dduha_chkholds, name, htag);
      118 +                } else {
      119 +                        /*
      120 +                         * We register ENOENT errors so they can be correctly
      121 +                         * reported if needed, such as when all holds fail.
      122 +                         */
      123 +                        fnvlist_add_int32(dduha->dduha_errlist, name, error);
      124 +                        if (error != ENOENT)
      125 +                                return (error);
 117  126                  }
 118  127          }
 119      -        return (rv);
      128 +
      129 +        /* Return ENOENT if no holds would be created. */
      130 +        if (nvlist_empty(dduha->dduha_chkholds))
      131 +                return (SET_ERROR(ENOENT));
      132 +
      133 +        return (0);
 120  134  }
 121  135  
 122      -void
 123      -dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
 124      -    minor_t minor, uint64_t now, dmu_tx_t *tx)
      136 +
      137 +static void
      138 +dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds,
      139 +    const char *htag, minor_t minor, uint64_t now, dmu_tx_t *tx)
 125  140  {
 126  141          dsl_pool_t *dp = ds->ds_dir->dd_pool;
 127  142          objset_t *mos = dp->dp_meta_objset;
 128  143          uint64_t zapobj;
 129  144  
 130      -        mutex_enter(&ds->ds_lock);
      145 +        ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
      146 +
 131  147          if (ds->ds_phys->ds_userrefs_obj == 0) {
 132  148                  /*
 133  149                   * This is the first user hold for this dataset.  Create
 134  150                   * the userrefs zap object.
 135  151                   */
 136  152                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
 137  153                  zapobj = ds->ds_phys->ds_userrefs_obj =
 138  154                      zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
 139  155          } else {
 140  156                  zapobj = ds->ds_phys->ds_userrefs_obj;
 141  157          }
 142  158          ds->ds_userrefs++;
 143      -        mutex_exit(&ds->ds_lock);
 144  159  
 145  160          VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 146  161  
 147  162          if (minor != 0) {
      163 +                char name[MAXNAMELEN];
      164 +                nvlist_t *tags;
      165 +
 148  166                  VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
 149  167                      htag, now, tx));
 150      -                dsl_register_onexit_hold_cleanup(ds, htag, minor);
      168 +                (void) snprintf(name, sizeof (name), "%llx",
      169 +                    (u_longlong_t)ds->ds_object);
      170 +
      171 +                if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) {
      172 +                        tags = fnvlist_alloc();
      173 +                        fnvlist_add_boolean(tags, htag);
      174 +                        fnvlist_add_nvlist(tmpholds, name, tags);
      175 +                        fnvlist_free(tags);
      176 +                } else {
      177 +                        fnvlist_add_boolean(tags, htag);
      178 +                }
 151  179          }
 152  180  
 153  181          spa_history_log_internal_ds(ds, "hold", tx,
 154  182              "tag=%s temp=%d refs=%llu",
 155  183              htag, minor != 0, ds->ds_userrefs);
 156  184  }
 157  185  
      186 +typedef struct zfs_hold_cleanup_arg {
      187 +        char zhca_spaname[MAXNAMELEN];
      188 +        uint64_t zhca_spa_load_guid;
      189 +        nvlist_t *zhca_holds;
      190 +} zfs_hold_cleanup_arg_t;
      191 +
      192 +static void
      193 +dsl_dataset_user_release_onexit(void *arg)
      194 +{
      195 +        zfs_hold_cleanup_arg_t *ca = arg;
      196 +        spa_t *spa;
      197 +        int error;
      198 +
      199 +        error = spa_open(ca->zhca_spaname, &spa, FTAG);
      200 +        if (error != 0) {
      201 +                zfs_dbgmsg("couldn't release holds on pool=%s "
      202 +                    "because pool is no longer loaded",
      203 +                    ca->zhca_spaname);
      204 +                return;
      205 +        }
      206 +        if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
      207 +                zfs_dbgmsg("couldn't release holds on pool=%s "
      208 +                    "because pool is no longer loaded (guid doesn't match)",
      209 +                    ca->zhca_spaname);
      210 +                spa_close(spa, FTAG);
      211 +                return;
      212 +        }
      213 +
      214 +        (void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds);
      215 +        fnvlist_free(ca->zhca_holds);
      216 +        kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
      217 +        spa_close(spa, FTAG);
      218 +}
      219 +
      220 +static void
      221 +dsl_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor)
      222 +{
      223 +        zfs_hold_cleanup_arg_t *ca;
      224 +
      225 +        if (minor == 0 || nvlist_empty(holds)) {
      226 +                fnvlist_free(holds);
      227 +                return;
      228 +        }
      229 +
      230 +        ASSERT(spa != NULL);
      231 +        ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
      232 +
      233 +        (void) strlcpy(ca->zhca_spaname, spa_name(spa),
      234 +            sizeof (ca->zhca_spaname));
      235 +        ca->zhca_spa_load_guid = spa_load_guid(spa);
      236 +        ca->zhca_holds = holds;
      237 +        VERIFY0(zfs_onexit_add_cb(minor,
      238 +            dsl_dataset_user_release_onexit, ca, NULL));
      239 +}
      240 +
      241 +void
      242 +dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
      243 +    minor_t minor, uint64_t now, dmu_tx_t *tx)
      244 +{
      245 +        nvlist_t *tmpholds;
      246 +
      247 +        if (minor != 0)
      248 +                tmpholds = fnvlist_alloc();
      249 +        else
      250 +                tmpholds = NULL;
      251 +        dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, htag, minor, now, tx);
      252 +        dsl_onexit_hold_cleanup(dsl_dataset_get_spa(ds), tmpholds, minor);
      253 +}
      254 +
 158  255  static void
 159  256  dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
 160  257  {
 161  258          dsl_dataset_user_hold_arg_t *dduha = arg;
 162  259          dsl_pool_t *dp = dmu_tx_pool(tx);
 163      -        nvpair_t *pair;
      260 +        nvlist_t *tmpholds;
 164  261          uint64_t now = gethrestime_sec();
 165  262  
 166      -        for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
 167      -            pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
      263 +        if (dduha->dduha_minor != 0)
      264 +                tmpholds = fnvlist_alloc();
      265 +        else
      266 +                tmpholds = NULL;
      267 +        for (nvpair_t *pair = nvlist_next_nvpair(dduha->dduha_chkholds, NULL);
      268 +            pair != NULL;
      269 +            pair = nvlist_next_nvpair(dduha->dduha_chkholds, pair)) {
 168  270                  dsl_dataset_t *ds;
      271 +
 169  272                  VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 170      -                dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
 171      -                    dduha->dduha_minor, now, tx);
      273 +                dsl_dataset_user_hold_sync_one_impl(tmpholds, ds,
      274 +                    fnvpair_value_string(pair), dduha->dduha_minor, now, tx);
 172  275                  dsl_dataset_rele(ds, FTAG);
 173  276          }
      277 +        dsl_onexit_hold_cleanup(dp->dp_spa, tmpholds, dduha->dduha_minor);
 174  278  }
 175  279  
 176  280  /*
      281 + * The full semantics of this function are described in the comment above
      282 + * lzc_hold().
      283 + *
      284 + * To summarize:
 177  285   * holds is nvl of snapname -> holdname
 178  286   * errlist will be filled in with snapname -> error
 179      - * if cleanup_minor is not 0, the holds will be temporary, cleaned up
 180      - * when the process exits.
 181  287   *
 182      - * if any fails, all will fail.
      288 + * The snaphosts must all be in the same pool.
      289 + *
      290 + * Holds for snapshots that don't exist will be skipped.
      291 + *
      292 + * If none of the snapshots for requested holds exist then ENOENT will be
      293 + * returned.
      294 + *
      295 + * If cleanup_minor is not 0, the holds will be temporary, which will be cleaned
      296 + * up when the process exits.
      297 + *
      298 + * On success all the holds, for snapshots that existed, will be created and 0
      299 + * will be returned.
      300 + *
      301 + * On failure no holds will be created, the errlist will be filled in,
      302 + * and an errno will returned.
      303 + *
      304 + * In all cases the errlist will contain entries for holds where the snapshot
      305 + * didn't exist.
 183  306   */
 184  307  int
 185  308  dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
 186  309  {
 187  310          dsl_dataset_user_hold_arg_t dduha;
 188  311          nvpair_t *pair;
      312 +        int ret;
 189  313  
 190  314          pair = nvlist_next_nvpair(holds, NULL);
 191  315          if (pair == NULL)
 192  316                  return (0);
 193  317  
 194  318          dduha.dduha_holds = holds;
      319 +        dduha.dduha_chkholds = fnvlist_alloc();
 195  320          dduha.dduha_errlist = errlist;
 196  321          dduha.dduha_minor = cleanup_minor;
 197  322  
 198      -        return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
 199      -            dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));
      323 +        ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
      324 +            dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds));
      325 +        fnvlist_free(dduha.dduha_chkholds);
      326 +
      327 +        return (ret);
 200  328  }
 201  329  
      330 +typedef int (dsl_holdfunc_t)(dsl_pool_t *dp, const char *name, void *tag,
      331 +    dsl_dataset_t **dsp);
      332 +
 202  333  typedef struct dsl_dataset_user_release_arg {
      334 +        dsl_holdfunc_t *ddura_holdfunc;
 203  335          nvlist_t *ddura_holds;
 204  336          nvlist_t *ddura_todelete;
 205  337          nvlist_t *ddura_errlist;
      338 +        nvlist_t *ddura_chkholds;
 206  339  } dsl_dataset_user_release_arg_t;
 207  340  
      341 +/* Place a dataset hold on the snapshot identified by passed dsobj string */
 208  342  static int
 209      -dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
 210      -    nvlist_t *holds, boolean_t *todelete)
      343 +dsl_dataset_hold_obj_string(dsl_pool_t *dp, const char *dsobj, void *tag,
      344 +    dsl_dataset_t **dsp)
 211  345  {
 212      -        uint64_t zapobj;
 213      -        nvpair_t *pair;
 214      -        objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 215      -        int error;
 216      -        int numholds = 0;
      346 +        return (dsl_dataset_hold_obj(dp, strtonum(dsobj, NULL), tag, dsp));
      347 +}
 217  348  
 218      -        *todelete = B_FALSE;
      349 +static int
      350 +dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura,
      351 +    dsl_dataset_t *ds, nvlist_t *holds, const char *snapname)
      352 +{
      353 +        uint64_t zapobj;
      354 +        nvlist_t *holds_found;
      355 +        objset_t *mos;
      356 +        int numholds;
 219  357  
 220  358          if (!dsl_dataset_is_snapshot(ds))
 221  359                  return (SET_ERROR(EINVAL));
 222  360  
      361 +        if (nvlist_empty(holds))
      362 +                return (0);
      363 +
      364 +        numholds = 0;
      365 +        mos = ds->ds_dir->dd_pool->dp_meta_objset;
 223  366          zapobj = ds->ds_phys->ds_userrefs_obj;
 224      -        if (zapobj == 0)
 225      -                return (SET_ERROR(ESRCH));
      367 +        holds_found = fnvlist_alloc();
 226  368  
 227      -        for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
      369 +        for (nvpair_t *pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 228  370              pair = nvlist_next_nvpair(holds, pair)) {
 229      -                /* Make sure the hold exists */
 230  371                  uint64_t tmp;
 231      -                error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
 232      -                if (error == ENOENT)
 233      -                        error = SET_ERROR(ESRCH);
 234      -                if (error != 0)
      372 +                int error;
      373 +                const char *holdname = nvpair_name(pair);
      374 +
      375 +                if (zapobj != 0)
      376 +                        error = zap_lookup(mos, zapobj, holdname, 8, 1, &tmp);
      377 +                else
      378 +                        error = SET_ERROR(ENOENT);
      379 +
      380 +                /*
      381 +                 * Non-existent holds are put on the errlist, but don't
      382 +                 * cause an overall failure.
      383 +                 */
      384 +                if (error == ENOENT) {
      385 +                        if (ddura->ddura_errlist != NULL) {
      386 +                                char *errtag = kmem_asprintf("%s#%s",
      387 +                                    snapname, holdname);
      388 +                                fnvlist_add_int32(ddura->ddura_errlist, errtag,
      389 +                                    ENOENT);
      390 +                                strfree(errtag);
      391 +                        }
      392 +                        continue;
      393 +                }
      394 +
      395 +                if (error != 0) {
      396 +                        fnvlist_free(holds_found);
 235  397                          return (error);
      398 +                }
      399 +
      400 +                fnvlist_add_boolean(holds_found, holdname);
 236  401                  numholds++;
 237  402          }
 238  403  
 239  404          if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
 240  405              ds->ds_userrefs == numholds) {
 241  406                  /* we need to destroy the snapshot as well */
 242      -
 243      -                if (dsl_dataset_long_held(ds))
      407 +                if (dsl_dataset_long_held(ds)) {
      408 +                        fnvlist_free(holds_found);
 244  409                          return (SET_ERROR(EBUSY));
 245      -                *todelete = B_TRUE;
      410 +                }
      411 +                fnvlist_add_boolean(ddura->ddura_todelete, snapname);
 246  412          }
      413 +
      414 +        if (numholds != 0) {
      415 +                fnvlist_add_nvlist(ddura->ddura_chkholds, snapname,
      416 +                    holds_found);
      417 +        }
      418 +        fnvlist_free(holds_found);
      419 +
 247  420          return (0);
 248  421  }
 249  422  
 250  423  static int
 251  424  dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
 252  425  {
 253      -        dsl_dataset_user_release_arg_t *ddura = arg;
 254      -        dsl_pool_t *dp = dmu_tx_pool(tx);
 255      -        nvpair_t *pair;
 256      -        int rv = 0;
      426 +        dsl_dataset_user_release_arg_t *ddura;
      427 +        dsl_holdfunc_t *holdfunc;
      428 +        dsl_pool_t *dp;
 257  429  
 258  430          if (!dmu_tx_is_syncing(tx))
 259  431                  return (0);
 260  432  
 261      -        for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 262      -            pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
 263      -                const char *name = nvpair_name(pair);
      433 +        dp = dmu_tx_pool(tx);
      434 +
      435 +        ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
      436 +
      437 +        ddura = arg;
      438 +        holdfunc = ddura->ddura_holdfunc;
      439 +
      440 +        for (nvpair_t *pair = nvlist_next_nvpair(ddura->ddura_holds, NULL);
      441 +            pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
 264  442                  int error;
 265  443                  dsl_dataset_t *ds;
 266  444                  nvlist_t *holds;
      445 +                const char *snapname = nvpair_name(pair);
 267  446  
 268  447                  error = nvpair_value_nvlist(pair, &holds);
 269  448                  if (error != 0)
 270      -                        return (SET_ERROR(EINVAL));
 271      -
 272      -                error = dsl_dataset_hold(dp, name, FTAG, &ds);
      449 +                        error = (SET_ERROR(EINVAL));
      450 +                else
      451 +                        error = holdfunc(dp, snapname, FTAG, &ds);
 273  452                  if (error == 0) {
 274      -                        boolean_t deleteme;
 275      -                        error = dsl_dataset_user_release_check_one(ds,
 276      -                            holds, &deleteme);
 277      -                        if (error == 0 && deleteme) {
 278      -                                fnvlist_add_boolean(ddura->ddura_todelete,
 279      -                                    name);
 280      -                        }
      453 +                        error = dsl_dataset_user_release_check_one(ddura, ds,
      454 +                            holds, snapname);
 281  455                          dsl_dataset_rele(ds, FTAG);
 282  456                  }
 283  457                  if (error != 0) {
 284  458                          if (ddura->ddura_errlist != NULL) {
 285  459                                  fnvlist_add_int32(ddura->ddura_errlist,
 286      -                                    name, error);
      460 +                                    snapname, error);
 287  461                          }
 288      -                        rv = error;
      462 +                        /*
      463 +                         * Non-existent snapshots are put on the errlist,
      464 +                         * but don't cause an overall failure.
      465 +                         */
      466 +                        if (error != ENOENT)
      467 +                                return (error);
 289  468                  }
 290  469          }
 291      -        return (rv);
      470 +
      471 +        /* Return ENOENT if none of the holds existed. */
      472 +        if (nvlist_empty(ddura->ddura_chkholds))
      473 +                return (SET_ERROR(ENOENT));
      474 +
      475 +        return (0);
 292  476  }
 293  477  
 294  478  static void
 295  479  dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
 296  480      dmu_tx_t *tx)
 297  481  {
 298  482          dsl_pool_t *dp = ds->ds_dir->dd_pool;
 299  483          objset_t *mos = dp->dp_meta_objset;
 300      -        uint64_t zapobj;
 301      -        int error;
 302      -        nvpair_t *pair;
 303  484  
 304      -        for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
      485 +        for (nvpair_t *pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 305  486              pair = nvlist_next_nvpair(holds, pair)) {
 306      -                ds->ds_userrefs--;
 307      -                error = dsl_pool_user_release(dp, ds->ds_object,
 308      -                    nvpair_name(pair), tx);
      487 +                int error;
      488 +                const char *holdname = nvpair_name(pair);
      489 +
      490 +                /* Remove temporary hold if one exists. */
      491 +                error = dsl_pool_user_release(dp, ds->ds_object, holdname, tx);
 309  492                  VERIFY(error == 0 || error == ENOENT);
 310      -                zapobj = ds->ds_phys->ds_userrefs_obj;
 311      -                VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));
      493 +
      494 +                VERIFY0(zap_remove(mos, ds->ds_phys->ds_userrefs_obj, holdname,
      495 +                    tx));
      496 +                ds->ds_userrefs--;
 312  497  
 313  498                  spa_history_log_internal_ds(ds, "release", tx,
 314      -                    "tag=%s refs=%lld", nvpair_name(pair),
 315      -                    (longlong_t)ds->ds_userrefs);
      499 +                    "tag=%s refs=%lld", holdname, (longlong_t)ds->ds_userrefs);
 316  500          }
 317  501  }
 318  502  
 319  503  static void
 320  504  dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
 321  505  {
 322  506          dsl_dataset_user_release_arg_t *ddura = arg;
      507 +        dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc;
 323  508          dsl_pool_t *dp = dmu_tx_pool(tx);
 324      -        nvpair_t *pair;
 325  509  
 326      -        for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 327      -            pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
      510 +        ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
      511 +
      512 +        for (nvpair_t *pair = nvlist_next_nvpair(ddura->ddura_chkholds, NULL);
      513 +            pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_chkholds,
      514 +            pair)) {
 328  515                  dsl_dataset_t *ds;
      516 +                const char *name = nvpair_name(pair);
      517 +
      518 +                VERIFY0(holdfunc(dp, name, FTAG, &ds));
 329  519  
 330      -                VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 331  520                  dsl_dataset_user_release_sync_one(ds,
 332  521                      fnvpair_value_nvlist(pair), tx);
 333      -                if (nvlist_exists(ddura->ddura_todelete,
 334      -                    nvpair_name(pair))) {
      522 +                if (nvlist_exists(ddura->ddura_todelete, name)) {
 335  523                          ASSERT(ds->ds_userrefs == 0 &&
 336  524                              ds->ds_phys->ds_num_children == 1 &&
 337  525                              DS_IS_DEFER_DESTROY(ds));
 338  526                          dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 339  527                  }
 340  528                  dsl_dataset_rele(ds, FTAG);
 341  529          }
 342  530  }
 343  531  
 344  532  /*
      533 + * The full semantics of this function are described in the comment above
      534 + * lzc_release().
      535 + *
      536 + * To summarize:
      537 + * Releases holds specified in the nvl holds.
      538 + *
 345  539   * holds is nvl of snapname -> { holdname, ... }
 346  540   * errlist will be filled in with snapname -> error
 347  541   *
 348      - * if any fails, all will fail.
      542 + * If tmpdp is not NULL the names for holds should be the dsobj's of snapshots,
      543 + * otherwise they should be the names of shapshots.
      544 + *
      545 + * As a release may cause snapshots to be destroyed this trys to ensure they
      546 + * aren't mounted.
      547 + *
      548 + * The release of non-existent holds are skipped.
      549 + *
      550 + * At least one hold must have been released for the this function to succeed
      551 + * and return 0.
 349  552   */
 350      -int
 351      -dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
      553 +static int
      554 +dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
      555 +    dsl_pool_t *tmpdp)
 352  556  {
 353  557          dsl_dataset_user_release_arg_t ddura;
 354  558          nvpair_t *pair;
      559 +        char *pool;
 355  560          int error;
 356  561  
 357  562          pair = nvlist_next_nvpair(holds, NULL);
 358  563          if (pair == NULL)
 359  564                  return (0);
 360  565  
      566 +        /*
      567 +         * The release may cause snapshots to be destroyed; make sure they
      568 +         * are not mounted.
      569 +         */
      570 +        if (tmpdp != NULL) {
      571 +                /* Temporary holds are specified by dsobj string. */
      572 +                ddura.ddura_holdfunc = dsl_dataset_hold_obj_string;
      573 +                pool = spa_name(tmpdp->dp_spa);
      574 +#ifdef _KERNEL
      575 +                dsl_pool_config_enter(tmpdp, FTAG);
      576 +                for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
      577 +                    pair = nvlist_next_nvpair(holds, pair)) {
      578 +                        dsl_dataset_t *ds;
      579 +
      580 +                        error = dsl_dataset_hold_obj_string(tmpdp,
      581 +                            nvpair_name(pair), FTAG, &ds);
      582 +                        if (error == 0) {
      583 +                                char name[MAXNAMELEN];
      584 +                                dsl_dataset_name(ds, name);
      585 +                                dsl_dataset_rele(ds, FTAG);
      586 +                                zfs_unmount_snap(name);
      587 +                        }
      588 +                }
      589 +                dsl_pool_config_exit(tmpdp, FTAG);
      590 +#endif
      591 +        } else {
      592 +                /* Non-temporary holds are specified by name. */
      593 +                ddura.ddura_holdfunc = dsl_dataset_hold;
      594 +                pool = nvpair_name(pair);
      595 +#ifdef _KERNEL
      596 +                for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
      597 +                    pair = nvlist_next_nvpair(holds, pair)) {
      598 +                        zfs_unmount_snap(nvpair_name(pair));
      599 +                }
      600 +#endif
      601 +        }
      602 +
 361  603          ddura.ddura_holds = holds;
 362  604          ddura.ddura_errlist = errlist;
 363  605          ddura.ddura_todelete = fnvlist_alloc();
      606 +        ddura.ddura_chkholds = fnvlist_alloc();
 364  607  
 365      -        error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
 366      -            dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
      608 +        error = dsl_sync_task(pool, dsl_dataset_user_release_check,
      609 +            dsl_dataset_user_release_sync, &ddura,
      610 +            fnvlist_num_pairs(holds));
 367  611          fnvlist_free(ddura.ddura_todelete);
 368      -        return (error);
 369      -}
 370      -
 371      -typedef struct dsl_dataset_user_release_tmp_arg {
 372      -        uint64_t ddurta_dsobj;
 373      -        nvlist_t *ddurta_holds;
 374      -        boolean_t ddurta_deleteme;
 375      -} dsl_dataset_user_release_tmp_arg_t;
 376      -
 377      -static int
 378      -dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
 379      -{
 380      -        dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 381      -        dsl_pool_t *dp = dmu_tx_pool(tx);
 382      -        dsl_dataset_t *ds;
 383      -        int error;
 384      -
 385      -        if (!dmu_tx_is_syncing(tx))
 386      -                return (0);
 387      -
 388      -        error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
 389      -        if (error)
 390      -                return (error);
      612 +        fnvlist_free(ddura.ddura_chkholds);
 391  613  
 392      -        error = dsl_dataset_user_release_check_one(ds,
 393      -            ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
 394      -        dsl_dataset_rele(ds, FTAG);
 395  614          return (error);
 396  615  }
 397  616  
 398      -static void
 399      -dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
 400      -{
 401      -        dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 402      -        dsl_pool_t *dp = dmu_tx_pool(tx);
 403      -        dsl_dataset_t *ds;
 404      -
 405      -        VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
 406      -        dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
 407      -        if (ddurta->ddurta_deleteme) {
 408      -                ASSERT(ds->ds_userrefs == 0 &&
 409      -                    ds->ds_phys->ds_num_children == 1 &&
 410      -                    DS_IS_DEFER_DESTROY(ds));
 411      -                dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 412      -        }
 413      -        dsl_dataset_rele(ds, FTAG);
 414      -}
 415      -
 416  617  /*
 417      - * Called at spa_load time to release a stale temporary user hold.
 418      - * Also called by the onexit code.
      618 + * holds is nvl of snapname -> { holdname, ... }
      619 + * errlist will be filled in with snapname -> error
 419  620   */
 420      -void
 421      -dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
 422      -{
 423      -        dsl_dataset_user_release_tmp_arg_t ddurta;
 424      -        dsl_dataset_t *ds;
 425      -        int error;
 426      -
 427      -#ifdef _KERNEL
 428      -        /* Make sure it is not mounted. */
 429      -        dsl_pool_config_enter(dp, FTAG);
 430      -        error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
 431      -        if (error == 0) {
 432      -                char name[MAXNAMELEN];
 433      -                dsl_dataset_name(ds, name);
 434      -                dsl_dataset_rele(ds, FTAG);
 435      -                dsl_pool_config_exit(dp, FTAG);
 436      -                zfs_unmount_snap(name);
 437      -        } else {
 438      -                dsl_pool_config_exit(dp, FTAG);
 439      -        }
 440      -#endif
 441      -
 442      -        ddurta.ddurta_dsobj = dsobj;
 443      -        ddurta.ddurta_holds = fnvlist_alloc();
 444      -        fnvlist_add_boolean(ddurta.ddurta_holds, htag);
 445      -
 446      -        (void) dsl_sync_task(spa_name(dp->dp_spa),
 447      -            dsl_dataset_user_release_tmp_check,
 448      -            dsl_dataset_user_release_tmp_sync, &ddurta, 1);
 449      -        fnvlist_free(ddurta.ddurta_holds);
 450      -}
 451      -
 452      -typedef struct zfs_hold_cleanup_arg {
 453      -        char zhca_spaname[MAXNAMELEN];
 454      -        uint64_t zhca_spa_load_guid;
 455      -        uint64_t zhca_dsobj;
 456      -        char zhca_htag[MAXNAMELEN];
 457      -} zfs_hold_cleanup_arg_t;
 458      -
 459      -static void
 460      -dsl_dataset_user_release_onexit(void *arg)
      621 +int
      622 +dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
 461  623  {
 462      -        zfs_hold_cleanup_arg_t *ca = arg;
 463      -        spa_t *spa;
 464      -        int error;
 465      -
 466      -        error = spa_open(ca->zhca_spaname, &spa, FTAG);
 467      -        if (error != 0) {
 468      -                zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 469      -                    "because pool is no longer loaded",
 470      -                    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 471      -                return;
 472      -        }
 473      -        if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
 474      -                zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 475      -                    "because pool is no longer loaded (guid doesn't match)",
 476      -                    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 477      -                spa_close(spa, FTAG);
 478      -                return;
 479      -        }
 480      -
 481      -        dsl_dataset_user_release_tmp(spa_get_dsl(spa),
 482      -            ca->zhca_dsobj, ca->zhca_htag);
 483      -        kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
 484      -        spa_close(spa, FTAG);
      624 +        return (dsl_dataset_user_release_impl(holds, errlist, NULL));
 485  625  }
 486  626  
      627 +/*
      628 + * holds is nvl of snapdsobj -> { holdname, ... }
      629 + */
 487  630  void
 488      -dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
 489      -    minor_t minor)
      631 +dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds)
 490  632  {
 491      -        zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
 492      -        spa_t *spa = dsl_dataset_get_spa(ds);
 493      -        (void) strlcpy(ca->zhca_spaname, spa_name(spa),
 494      -            sizeof (ca->zhca_spaname));
 495      -        ca->zhca_spa_load_guid = spa_load_guid(spa);
 496      -        ca->zhca_dsobj = ds->ds_object;
 497      -        (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
 498      -        VERIFY0(zfs_onexit_add_cb(minor,
 499      -            dsl_dataset_user_release_onexit, ca, NULL));
      633 +        ASSERT(dp != NULL);
      634 +        (void) dsl_dataset_user_release_impl(holds, NULL, dp);
 500  635  }
 501  636  
 502  637  int
 503  638  dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
 504  639  {
 505  640          dsl_pool_t *dp;
 506  641          dsl_dataset_t *ds;
 507  642          int err;
 508  643  
 509  644          err = dsl_pool_hold(dsname, FTAG, &dp);
↓ open down ↓ 27 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX