Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/dsl_userhold.c
          +++ new/usr/src/uts/common/fs/zfs/dsl_userhold.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2013 by Delphix. All rights reserved.
       24 + * Copyright (c) 2013 Steven Hartland. All rights reserved.
  24   25   */
  25   26  
  26   27  #include <sys/zfs_context.h>
  27   28  #include <sys/dsl_userhold.h>
  28   29  #include <sys/dsl_dataset.h>
  29   30  #include <sys/dsl_destroy.h>
  30   31  #include <sys/dsl_synctask.h>
  31   32  #include <sys/dmu_tx.h>
  32   33  #include <sys/zfs_onexit.h>
  33   34  #include <sys/dsl_pool.h>
  34   35  #include <sys/dsl_dir.h>
  35   36  #include <sys/zfs_ioctl.h>
  36   37  #include <sys/zap.h>
  37   38  
  38   39  typedef struct dsl_dataset_user_hold_arg {
  39   40          nvlist_t *dduha_holds;
       41 +        nvlist_t *dduha_chkholds;
  40   42          nvlist_t *dduha_errlist;
  41   43          minor_t dduha_minor;
  42   44  } dsl_dataset_user_hold_arg_t;
  43   45  
  44   46  /*
  45   47   * If you add new checks here, you may need to add additional checks to the
  46   48   * "temporary" case in snapshot_check() in dmu_objset.c.
  47   49   */
  48   50  int
  49   51  dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
  50   52      boolean_t temphold, dmu_tx_t *tx)
  51   53  {
  52   54          dsl_pool_t *dp = dmu_tx_pool(tx);
  53   55          objset_t *mos = dp->dp_meta_objset;
  54   56          int error = 0;
  55   57  
       58 +        ASSERT(RRW_READ_HELD(&dp->dp_config_rwlock));
       59 +
  56   60          if (strlen(htag) > MAXNAMELEN)
  57   61                  return (E2BIG);
  58   62          /* Tempholds have a more restricted length */
  59   63          if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
  60   64                  return (E2BIG);
  61   65  
  62   66          /* tags must be unique (if ds already exists) */
  63      -        if (ds != NULL) {
  64      -                mutex_enter(&ds->ds_lock);
  65      -                if (ds->ds_phys->ds_userrefs_obj != 0) {
  66      -                        uint64_t value;
  67      -                        error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
  68      -                            htag, 8, 1, &value);
  69      -                        if (error == 0)
  70      -                                error = SET_ERROR(EEXIST);
  71      -                        else if (error == ENOENT)
  72      -                                error = 0;
  73      -                }
  74      -                mutex_exit(&ds->ds_lock);
       67 +        if (ds != NULL && ds->ds_phys->ds_userrefs_obj != 0) {
       68 +                uint64_t value;
       69 +
       70 +                error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
       71 +                    htag, 8, 1, &value);
       72 +                if (error == 0)
       73 +                        error = SET_ERROR(EEXIST);
       74 +                else if (error == ENOENT)
       75 +                        error = 0;
  75   76          }
  76   77  
  77   78          return (error);
  78   79  }
  79   80  
  80   81  static int
  81   82  dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
  82   83  {
  83   84          dsl_dataset_user_hold_arg_t *dduha = arg;
  84   85          dsl_pool_t *dp = dmu_tx_pool(tx);
  85   86          nvpair_t *pair;
  86      -        int rv = 0;
  87   87  
  88   88          if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
  89   89                  return (SET_ERROR(ENOTSUP));
  90   90  
       91 +        if (!dmu_tx_is_syncing(tx))
       92 +                return (0);
       93 +
  91   94          for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
  92   95              pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
  93      -                int error = 0;
  94   96                  dsl_dataset_t *ds;
  95      -                char *htag;
       97 +                int error = 0;
       98 +                char *htag, *name;
  96   99  
  97  100                  /* must be a snapshot */
  98      -                if (strchr(nvpair_name(pair), '@') == NULL)
      101 +                name = nvpair_name(pair);
      102 +                if (strchr(name, '@') == NULL)
  99  103                          error = SET_ERROR(EINVAL);
 100  104  
 101  105                  if (error == 0)
 102  106                          error = nvpair_value_string(pair, &htag);
 103      -                if (error == 0) {
 104      -                        error = dsl_dataset_hold(dp,
 105      -                            nvpair_name(pair), FTAG, &ds);
 106      -                }
      107 +
      108 +                if (error == 0)
      109 +                        error = dsl_dataset_hold(dp, name, FTAG, &ds);
      110 +
 107  111                  if (error == 0) {
 108  112                          error = dsl_dataset_user_hold_check_one(ds, htag,
 109  113                              dduha->dduha_minor != 0, tx);
 110  114                          dsl_dataset_rele(ds, FTAG);
 111  115                  }
 112  116  
 113      -                if (error != 0) {
 114      -                        rv = error;
 115      -                        fnvlist_add_int32(dduha->dduha_errlist,
 116      -                            nvpair_name(pair), error);
      117 +                if (error == 0) {
      118 +                        fnvlist_add_string(dduha->dduha_chkholds, name, htag);
      119 +                } else {
      120 +                        /*
      121 +                         * We register ENOENT errors so they can be correctly
      122 +                         * reported if needed, such as when all holds fail.
      123 +                         */
      124 +                        fnvlist_add_int32(dduha->dduha_errlist, name, error);
      125 +                        if (error != ENOENT)
      126 +                                return (error);
 117  127                  }
 118  128          }
 119      -        return (rv);
      129 +
      130 +        /* Return ENOENT if no holds would be created. */
      131 +        if (nvlist_next_nvpair(dduha->dduha_chkholds, NULL) == NULL)
      132 +                return (ENOENT);
      133 +
      134 +        return (0);
 120  135  }
 121  136  
 122      -void
 123      -dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
 124      -    minor_t minor, uint64_t now, dmu_tx_t *tx)
      137 +
      138 +static void
      139 +dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds,
      140 +    const char *htag, minor_t minor, uint64_t now, dmu_tx_t *tx)
 125  141  {
 126  142          dsl_pool_t *dp = ds->ds_dir->dd_pool;
 127  143          objset_t *mos = dp->dp_meta_objset;
 128  144          uint64_t zapobj;
 129  145  
 130      -        mutex_enter(&ds->ds_lock);
      146 +        ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
      147 +
 131  148          if (ds->ds_phys->ds_userrefs_obj == 0) {
 132  149                  /*
 133  150                   * This is the first user hold for this dataset.  Create
 134  151                   * the userrefs zap object.
 135  152                   */
 136  153                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
 137  154                  zapobj = ds->ds_phys->ds_userrefs_obj =
 138  155                      zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
 139  156          } else {
 140  157                  zapobj = ds->ds_phys->ds_userrefs_obj;
 141  158          }
 142  159          ds->ds_userrefs++;
 143      -        mutex_exit(&ds->ds_lock);
 144  160  
 145  161          VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 146  162  
 147  163          if (minor != 0) {
      164 +                char name[MAXNAMELEN];
      165 +                nvlist_t *tags;
      166 +
 148  167                  VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
 149  168                      htag, now, tx));
 150      -                dsl_register_onexit_hold_cleanup(ds, htag, minor);
      169 +                (void) snprintf(name, sizeof (name), "%llx",
      170 +                    (u_longlong_t)ds->ds_object);
      171 +
      172 +                if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) {
      173 +                        tags = fnvlist_alloc();
      174 +                        fnvlist_add_boolean(tags, htag);
      175 +                        fnvlist_add_nvlist(tmpholds, name, tags);
      176 +                        fnvlist_free(tags);
      177 +                } else {
      178 +                        fnvlist_add_boolean(tags, htag);
      179 +                }
 151  180          }
 152  181  
 153  182          spa_history_log_internal_ds(ds, "hold", tx,
 154  183              "tag=%s temp=%d refs=%llu",
 155  184              htag, minor != 0, ds->ds_userrefs);
 156  185  }
 157  186  
      187 +typedef struct zfs_hold_cleanup_arg {
      188 +        char zhca_spaname[MAXNAMELEN];
      189 +        uint64_t zhca_spa_load_guid;
      190 +        nvlist_t *zhca_holds;
      191 +} zfs_hold_cleanup_arg_t;
      192 +
      193 +static void
      194 +dsl_dataset_user_release_onexit(void *arg)
      195 +{
      196 +        zfs_hold_cleanup_arg_t *ca = (zfs_hold_cleanup_arg_t *)arg;
      197 +        spa_t *spa;
      198 +        int error;
      199 +
      200 +        error = spa_open(ca->zhca_spaname, &spa, FTAG);
      201 +        if (error != 0) {
      202 +                zfs_dbgmsg("couldn't release holds on pool=%s "
      203 +                    "because pool is no longer loaded",
      204 +                    ca->zhca_spaname);
      205 +                return;
      206 +        }
      207 +        if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
      208 +                zfs_dbgmsg("couldn't release holds on pool=%s "
      209 +                    "because pool is no longer loaded (guid doesn't match)",
      210 +                    ca->zhca_spaname);
      211 +                spa_close(spa, FTAG);
      212 +                return;
      213 +        }
      214 +
      215 +        (void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds);
      216 +        fnvlist_free(ca->zhca_holds);
      217 +        kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
      218 +        spa_close(spa, FTAG);
      219 +}
      220 +
      221 +static void
      222 +dsl_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor)
      223 +{
      224 +        zfs_hold_cleanup_arg_t *ca;
      225 +
      226 +        if (minor == 0 || nvlist_next_nvpair(holds, NULL) == NULL) {
      227 +                fnvlist_free(holds);
      228 +                return;
      229 +        }
      230 +
      231 +        ASSERT(spa != NULL);
      232 +        ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
      233 +
      234 +        (void) strlcpy(ca->zhca_spaname, spa_name(spa),
      235 +            sizeof (ca->zhca_spaname));
      236 +        ca->zhca_spa_load_guid = spa_load_guid(spa);
      237 +        ca->zhca_holds = holds;
      238 +        VERIFY0(zfs_onexit_add_cb(minor,
      239 +            dsl_dataset_user_release_onexit, ca, NULL));
      240 +}
      241 +
      242 +void
      243 +dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
      244 +    minor_t minor, uint64_t now, dmu_tx_t *tx)
      245 +{
      246 +        nvlist_t *tmpholds;
      247 +
      248 +        if (minor != 0)
      249 +                tmpholds = fnvlist_alloc();
      250 +        else
      251 +                tmpholds = NULL;
      252 +        dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, htag, minor, now, tx);
      253 +        dsl_onexit_hold_cleanup(dsl_dataset_get_spa(ds), tmpholds, minor);
      254 +}
      255 +
 158  256  static void
 159  257  dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
 160  258  {
 161  259          dsl_dataset_user_hold_arg_t *dduha = arg;
 162  260          dsl_pool_t *dp = dmu_tx_pool(tx);
 163  261          nvpair_t *pair;
      262 +        nvlist_t *tmpholds;
 164  263          uint64_t now = gethrestime_sec();
 165  264  
 166      -        for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
 167      -            pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
      265 +        if (dduha->dduha_minor != 0)
      266 +                tmpholds = fnvlist_alloc();
      267 +        else
      268 +                tmpholds = NULL;
      269 +        for (pair = nvlist_next_nvpair(dduha->dduha_chkholds, NULL);
      270 +            pair != NULL;
      271 +            pair = nvlist_next_nvpair(dduha->dduha_chkholds, pair)) {
 168  272                  dsl_dataset_t *ds;
      273 +
 169  274                  VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 170      -                dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
 171      -                    dduha->dduha_minor, now, tx);
      275 +                dsl_dataset_user_hold_sync_one_impl(tmpholds, ds,
      276 +                    fnvpair_value_string(pair), dduha->dduha_minor, now, tx);
 172  277                  dsl_dataset_rele(ds, FTAG);
 173  278          }
      279 +        dsl_onexit_hold_cleanup(dp->dp_spa, tmpholds, dduha->dduha_minor);
 174  280  }
 175  281  
 176  282  /*
      283 + * The full semantics of this function are described in the comment above
      284 + * lzc_hold().
      285 + *
      286 + * To summarize:
 177  287   * holds is nvl of snapname -> holdname
 178  288   * errlist will be filled in with snapname -> error
 179      - * if cleanup_minor is not 0, the holds will be temporary, cleaned up
 180      - * when the process exits.
 181  289   *
 182      - * if any fails, all will fail.
      290 + * The snaphosts must all be in the same pool.
      291 + *
      292 + * Holds for snapshots that don't exist will be skipped.
      293 + *
      294 + * If none of the snapshots for requested holds exist then ENOENT will be
      295 + * returned.
      296 + *
      297 + * If cleanup_minor is not 0, the holds will be temporary, which will be cleaned
      298 + * up when the process exits.
      299 + *
      300 + * On success all the holds, for snapshots that existed, will be created and 0
      301 + * will be returned.
      302 + *
      303 + * On failure no holds will be created, the errlist will be filled in,
      304 + * and an errno will returned.
      305 + *
      306 + * In all cases the errlist will contain entries for holds where the snapshot
      307 + * didn't exist.
 183  308   */
 184  309  int
 185  310  dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
 186  311  {
 187  312          dsl_dataset_user_hold_arg_t dduha;
 188  313          nvpair_t *pair;
      314 +        int ret;
 189  315  
 190  316          pair = nvlist_next_nvpair(holds, NULL);
 191  317          if (pair == NULL)
 192  318                  return (0);
 193  319  
 194  320          dduha.dduha_holds = holds;
      321 +        dduha.dduha_chkholds = fnvlist_alloc();
 195  322          dduha.dduha_errlist = errlist;
 196  323          dduha.dduha_minor = cleanup_minor;
 197  324  
 198      -        return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
 199      -            dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));
      325 +        ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
      326 +            dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds));
      327 +        fnvlist_free(dduha.dduha_chkholds);
      328 +
      329 +        return (ret);
 200  330  }
 201  331  
      332 +typedef int (dsl_holdfunc_t)(dsl_pool_t *dp, const char *name, void *tag,
      333 +    dsl_dataset_t **dsp);
      334 +
 202  335  typedef struct dsl_dataset_user_release_arg {
      336 +        dsl_holdfunc_t *ddura_holdfunc;
 203  337          nvlist_t *ddura_holds;
 204  338          nvlist_t *ddura_todelete;
 205  339          nvlist_t *ddura_errlist;
      340 +        nvlist_t *ddura_chkholds;
 206  341  } dsl_dataset_user_release_arg_t;
 207  342  
      343 +/* Place a dataset hold on the snapshot identified by passed dsobj string */
      344 +static int
      345 +dsl_dataset_hold_obj_string(dsl_pool_t *dp, const char *dsobj, void *tag,
      346 +    dsl_dataset_t **dsp)
      347 +{
      348 +        return (dsl_dataset_hold_obj(dp, strtonum(dsobj, NULL), tag, dsp));
      349 +}
      350 +
 208  351  static int
 209      -dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
 210      -    nvlist_t *holds, boolean_t *todelete)
      352 +dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura,
      353 +    dsl_dataset_t *ds, nvlist_t *holds, const char *name)
 211  354  {
 212  355          uint64_t zapobj;
 213  356          nvpair_t *pair;
      357 +        nvlist_t *holds_found;
 214  358          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 215      -        int error;
 216      -        int numholds = 0;
 217      -
 218      -        *todelete = B_FALSE;
      359 +        int ret, numholds;
 219  360  
 220  361          if (!dsl_dataset_is_snapshot(ds))
 221  362                  return (SET_ERROR(EINVAL));
 222  363  
 223  364          zapobj = ds->ds_phys->ds_userrefs_obj;
 224  365          if (zapobj == 0)
 225  366                  return (SET_ERROR(ESRCH));
 226  367  
      368 +        ret = 0;
      369 +        numholds = 0;
      370 +        holds_found = fnvlist_alloc();
      371 +
 227  372          for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 228  373              pair = nvlist_next_nvpair(holds, pair)) {
 229      -                /* Make sure the hold exists */
 230  374                  uint64_t tmp;
 231      -                error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
      375 +                int error;
      376 +                const char *name;
      377 +
      378 +                name = nvpair_name(pair);
      379 +                error = zap_lookup(mos, zapobj, name, 8, 1, &tmp);
      380 +
      381 +                /* Non-existent holds aren't always an error. */
 232  382                  if (error == ENOENT)
 233      -                        error = SET_ERROR(ESRCH);
 234      -                if (error != 0)
      383 +                        continue;
      384 +
      385 +                if (error != 0) {
      386 +                        fnvlist_free(holds_found);
 235  387                          return (error);
      388 +                }
      389 +
      390 +                fnvlist_add_boolean(holds_found, name);
 236  391                  numholds++;
 237  392          }
 238  393  
 239  394          if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
 240  395              ds->ds_userrefs == numholds) {
 241  396                  /* we need to destroy the snapshot as well */
 242      -
 243      -                if (dsl_dataset_long_held(ds))
      397 +                if (dsl_dataset_long_held(ds)) {
      398 +                        fnvlist_free(holds_found);
 244  399                          return (SET_ERROR(EBUSY));
 245      -                *todelete = B_TRUE;
      400 +                }
      401 +                fnvlist_add_boolean(ddura->ddura_todelete, name);
 246  402          }
 247      -        return (0);
      403 +
      404 +        if (numholds == 0)
      405 +                ret = ENOENT;
      406 +        else
      407 +                fnvlist_add_nvlist(ddura->ddura_chkholds, name, holds_found);
      408 +        fnvlist_free(holds_found);
      409 +
      410 +        return (ret);
 248  411  }
 249  412  
 250  413  static int
 251  414  dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
 252  415  {
 253      -        dsl_dataset_user_release_arg_t *ddura = arg;
 254      -        dsl_pool_t *dp = dmu_tx_pool(tx);
      416 +        dsl_dataset_user_release_arg_t *ddura;
      417 +        dsl_holdfunc_t *holdfunc;
      418 +        dsl_pool_t *dp;
 255  419          nvpair_t *pair;
 256      -        int rv = 0;
 257  420  
 258  421          if (!dmu_tx_is_syncing(tx))
 259  422                  return (0);
 260  423  
      424 +        ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
      425 +
      426 +        dp = dmu_tx_pool(tx);
      427 +        ddura = (dsl_dataset_user_release_arg_t *)arg;
      428 +        holdfunc = ddura->ddura_holdfunc;
      429 +
 261  430          for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 262  431              pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
 263      -                const char *name = nvpair_name(pair);
      432 +                const char *name;
 264  433                  int error;
 265  434                  dsl_dataset_t *ds;
 266  435                  nvlist_t *holds;
 267  436  
      437 +                name = nvpair_name(pair);
 268  438                  error = nvpair_value_nvlist(pair, &holds);
 269  439                  if (error != 0)
 270      -                        return (SET_ERROR(EINVAL));
 271      -
 272      -                error = dsl_dataset_hold(dp, name, FTAG, &ds);
      440 +                        error = (SET_ERROR(EINVAL));
      441 +                if (error == 0)
      442 +                        error = holdfunc(dp, name, FTAG, &ds);
 273  443                  if (error == 0) {
 274      -                        boolean_t deleteme;
 275      -                        error = dsl_dataset_user_release_check_one(ds,
 276      -                            holds, &deleteme);
 277      -                        if (error == 0 && deleteme) {
 278      -                                fnvlist_add_boolean(ddura->ddura_todelete,
 279      -                                    name);
 280      -                        }
      444 +                        error = dsl_dataset_user_release_check_one(ddura, ds,
      445 +                            holds, name);
 281  446                          dsl_dataset_rele(ds, FTAG);
 282  447                  }
 283  448                  if (error != 0) {
 284  449                          if (ddura->ddura_errlist != NULL) {
 285      -                                fnvlist_add_int32(ddura->ddura_errlist,
 286      -                                    name, error);
      450 +                                fnvlist_add_int32(ddura->ddura_errlist, name,
      451 +                                    error);
 287  452                          }
 288      -                        rv = error;
      453 +                        /* Non-existent holds aren't always an error. */
      454 +                        if (error != ENOENT)
      455 +                                return (error);
 289  456                  }
 290  457          }
 291      -        return (rv);
      458 +
      459 +        /*
      460 +         * Return ENOENT if none of the holds existed avoiding the overhead
      461 +         * of a sync.
      462 +         */
      463 +        if (nvlist_next_nvpair(ddura->ddura_chkholds, NULL) == NULL)
      464 +                return (ENOENT);
      465 +
      466 +        return (0);
 292  467  }
 293  468  
 294  469  static void
 295      -dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
 296      -    dmu_tx_t *tx)
      470 +dsl_dataset_user_release_sync_one(dsl_dataset_user_release_arg_t *ddura,
      471 +    dsl_dataset_t *ds, nvlist_t *holds, dmu_tx_t *tx)
 297  472  {
 298  473          dsl_pool_t *dp = ds->ds_dir->dd_pool;
 299  474          objset_t *mos = dp->dp_meta_objset;
 300      -        uint64_t zapobj;
 301      -        int error;
 302  475          nvpair_t *pair;
 303  476  
 304  477          for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 305  478              pair = nvlist_next_nvpair(holds, pair)) {
 306      -                ds->ds_userrefs--;
 307      -                error = dsl_pool_user_release(dp, ds->ds_object,
 308      -                    nvpair_name(pair), tx);
      479 +                int error;
      480 +                const char *name;
      481 +
      482 +                name = nvpair_name(pair);
      483 +
      484 +                /* Remove temporary hold if one exists. */
      485 +                error = dsl_pool_user_release(dp, ds->ds_object, name, tx);
 309  486                  VERIFY(error == 0 || error == ENOENT);
 310      -                zapobj = ds->ds_phys->ds_userrefs_obj;
 311      -                VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));
      487 +
      488 +                VERIFY0(zap_remove(mos, ds->ds_phys->ds_userrefs_obj, name,
      489 +                    tx));
      490 +                ds->ds_userrefs--;
 312  491  
 313  492                  spa_history_log_internal_ds(ds, "release", tx,
 314      -                    "tag=%s refs=%lld", nvpair_name(pair),
 315      -                    (longlong_t)ds->ds_userrefs);
      493 +                    "tag=%s refs=%lld", name, (longlong_t)ds->ds_userrefs);
 316  494          }
 317  495  }
 318  496  
 319  497  static void
 320  498  dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
 321  499  {
 322  500          dsl_dataset_user_release_arg_t *ddura = arg;
      501 +        dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc;
 323  502          dsl_pool_t *dp = dmu_tx_pool(tx);
 324  503          nvpair_t *pair;
 325  504  
 326      -        for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 327      -            pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
      505 +        ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
      506 +
      507 +        for (pair = nvlist_next_nvpair(ddura->ddura_chkholds, NULL);
      508 +            pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_chkholds,
      509 +            pair)) {
 328  510                  dsl_dataset_t *ds;
      511 +                const char *name;
 329  512  
 330      -                VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 331      -                dsl_dataset_user_release_sync_one(ds,
      513 +                name = nvpair_name(pair);
      514 +                VERIFY0(holdfunc(dp, name, FTAG, &ds));
      515 +
      516 +                dsl_dataset_user_release_sync_one(ddura, ds,
 332  517                      fnvpair_value_nvlist(pair), tx);
 333      -                if (nvlist_exists(ddura->ddura_todelete,
 334      -                    nvpair_name(pair))) {
      518 +                if (nvlist_exists(ddura->ddura_todelete, name)) {
 335  519                          ASSERT(ds->ds_userrefs == 0 &&
 336  520                              ds->ds_phys->ds_num_children == 1 &&
 337  521                              DS_IS_DEFER_DESTROY(ds));
 338  522                          dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 339  523                  }
 340  524                  dsl_dataset_rele(ds, FTAG);
 341  525          }
 342  526  }
 343  527  
 344  528  /*
      529 + * The full semantics of this function are described in the comment above
      530 + * lzc_release().
      531 + *
      532 + * To summarize:
      533 + * Releases holds specified in the nvl holds.
      534 + *
 345  535   * holds is nvl of snapname -> { holdname, ... }
 346  536   * errlist will be filled in with snapname -> error
 347  537   *
 348      - * if any fails, all will fail.
      538 + * If tmpdp is not NULL the names for holds should be the dsobj's of snapshots,
      539 + * otherwise they should be the names of shapshots.
      540 + *
      541 + * As a release may cause snapshots to be destroyed this trys to ensure they
      542 + * aren't mounted.
      543 + *
      544 + * The release of non-existent holds are skipped.
      545 + *
      546 + * At least one hold must have been released for the this function to succeed
      547 + * and return 0.
 349  548   */
 350      -int
 351      -dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
      549 +static int
      550 +dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
      551 +    dsl_pool_t *tmpdp)
 352  552  {
 353  553          dsl_dataset_user_release_arg_t ddura;
 354  554          nvpair_t *pair;
      555 +        char *pool;
 355  556          int error;
 356  557  
 357  558          pair = nvlist_next_nvpair(holds, NULL);
 358  559          if (pair == NULL)
 359  560                  return (0);
 360  561  
      562 +#ifdef _KERNEL
      563 +        /*
      564 +         * The release may cause snapshots to be destroyed; make sure they
      565 +         * are not mounted.
      566 +         */
      567 +        if (tmpdp != NULL) {
      568 +                /* Temporary holds are specified by dsobj string. */
      569 +                ddura.ddura_holdfunc = dsl_dataset_hold_obj_string;
      570 +                pool = spa_name(tmpdp->dp_spa);
      571 +
      572 +                dsl_pool_config_enter(tmpdp, FTAG);
      573 +                for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
      574 +                    pair = nvlist_next_nvpair(holds, pair)) {
      575 +                        dsl_dataset_t *ds;
      576 +
      577 +                        error = dsl_dataset_hold_obj_string(tmpdp,
      578 +                            nvpair_name(pair), FTAG, &ds);
      579 +                        if (error == 0) {
      580 +                                char name[MAXNAMELEN];
      581 +                                dsl_dataset_name(ds, name);
      582 +                                dsl_dataset_rele(ds, FTAG);
      583 +                                zfs_unmount_snap(name);
      584 +                        }
      585 +                }
      586 +                dsl_pool_config_exit(tmpdp, FTAG);
      587 +        } else {
      588 +                /* Non-temporary holds are specified by name. */
      589 +                ddura.ddura_holdfunc = dsl_dataset_hold;
      590 +                pool = nvpair_name(pair);
      591 +
      592 +                for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
      593 +                    pair = nvlist_next_nvpair(holds, pair)) {
      594 +                        zfs_unmount_snap(nvpair_name(pair));
      595 +                }
      596 +        }
      597 +#endif
      598 +
 361  599          ddura.ddura_holds = holds;
 362  600          ddura.ddura_errlist = errlist;
 363  601          ddura.ddura_todelete = fnvlist_alloc();
      602 +        ddura.ddura_chkholds = fnvlist_alloc();
 364  603  
 365      -        error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
 366      -            dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
      604 +        error = dsl_sync_task(pool, dsl_dataset_user_release_check,
      605 +            dsl_dataset_user_release_sync, &ddura,
      606 +            fnvlist_num_pairs(holds));
 367  607          fnvlist_free(ddura.ddura_todelete);
 368      -        return (error);
 369      -}
      608 +        fnvlist_free(ddura.ddura_chkholds);
 370  609  
 371      -typedef struct dsl_dataset_user_release_tmp_arg {
 372      -        uint64_t ddurta_dsobj;
 373      -        nvlist_t *ddurta_holds;
 374      -        boolean_t ddurta_deleteme;
 375      -} dsl_dataset_user_release_tmp_arg_t;
 376      -
 377      -static int
 378      -dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
 379      -{
 380      -        dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 381      -        dsl_pool_t *dp = dmu_tx_pool(tx);
 382      -        dsl_dataset_t *ds;
 383      -        int error;
 384      -
 385      -        if (!dmu_tx_is_syncing(tx))
 386      -                return (0);
 387      -
 388      -        error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
 389      -        if (error)
 390      -                return (error);
 391      -
 392      -        error = dsl_dataset_user_release_check_one(ds,
 393      -            ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
 394      -        dsl_dataset_rele(ds, FTAG);
 395  610          return (error);
 396  611  }
 397  612  
 398      -static void
 399      -dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
 400      -{
 401      -        dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 402      -        dsl_pool_t *dp = dmu_tx_pool(tx);
 403      -        dsl_dataset_t *ds;
 404      -
 405      -        VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
 406      -        dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
 407      -        if (ddurta->ddurta_deleteme) {
 408      -                ASSERT(ds->ds_userrefs == 0 &&
 409      -                    ds->ds_phys->ds_num_children == 1 &&
 410      -                    DS_IS_DEFER_DESTROY(ds));
 411      -                dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 412      -        }
 413      -        dsl_dataset_rele(ds, FTAG);
 414      -}
 415      -
 416  613  /*
 417      - * Called at spa_load time to release a stale temporary user hold.
 418      - * Also called by the onexit code.
      614 + * holds is nvl of snapname -> { holdname, ... }
      615 + * errlist will be filled in with snapname -> error
 419  616   */
 420      -void
 421      -dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
 422      -{
 423      -        dsl_dataset_user_release_tmp_arg_t ddurta;
 424      -        dsl_dataset_t *ds;
 425      -        int error;
 426      -
 427      -#ifdef _KERNEL
 428      -        /* Make sure it is not mounted. */
 429      -        dsl_pool_config_enter(dp, FTAG);
 430      -        error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
 431      -        if (error == 0) {
 432      -                char name[MAXNAMELEN];
 433      -                dsl_dataset_name(ds, name);
 434      -                dsl_dataset_rele(ds, FTAG);
 435      -                dsl_pool_config_exit(dp, FTAG);
 436      -                zfs_unmount_snap(name);
 437      -        } else {
 438      -                dsl_pool_config_exit(dp, FTAG);
 439      -        }
 440      -#endif
 441      -
 442      -        ddurta.ddurta_dsobj = dsobj;
 443      -        ddurta.ddurta_holds = fnvlist_alloc();
 444      -        fnvlist_add_boolean(ddurta.ddurta_holds, htag);
 445      -
 446      -        (void) dsl_sync_task(spa_name(dp->dp_spa),
 447      -            dsl_dataset_user_release_tmp_check,
 448      -            dsl_dataset_user_release_tmp_sync, &ddurta, 1);
 449      -        fnvlist_free(ddurta.ddurta_holds);
 450      -}
 451      -
 452      -typedef struct zfs_hold_cleanup_arg {
 453      -        char zhca_spaname[MAXNAMELEN];
 454      -        uint64_t zhca_spa_load_guid;
 455      -        uint64_t zhca_dsobj;
 456      -        char zhca_htag[MAXNAMELEN];
 457      -} zfs_hold_cleanup_arg_t;
 458      -
 459      -static void
 460      -dsl_dataset_user_release_onexit(void *arg)
      617 +int
      618 +dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
 461  619  {
 462      -        zfs_hold_cleanup_arg_t *ca = arg;
 463      -        spa_t *spa;
 464      -        int error;
 465      -
 466      -        error = spa_open(ca->zhca_spaname, &spa, FTAG);
 467      -        if (error != 0) {
 468      -                zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 469      -                    "because pool is no longer loaded",
 470      -                    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 471      -                return;
 472      -        }
 473      -        if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
 474      -                zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 475      -                    "because pool is no longer loaded (guid doesn't match)",
 476      -                    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 477      -                spa_close(spa, FTAG);
 478      -                return;
 479      -        }
 480      -
 481      -        dsl_dataset_user_release_tmp(spa_get_dsl(spa),
 482      -            ca->zhca_dsobj, ca->zhca_htag);
 483      -        kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
 484      -        spa_close(spa, FTAG);
      620 +        return (dsl_dataset_user_release_impl(holds, errlist, NULL));
 485  621  }
 486  622  
      623 +/*
      624 + * holds is nvl of snapdsobj -> { holdname, ... }
      625 + */
 487  626  void
 488      -dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
 489      -    minor_t minor)
      627 +dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds)
 490  628  {
 491      -        zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
 492      -        spa_t *spa = dsl_dataset_get_spa(ds);
 493      -        (void) strlcpy(ca->zhca_spaname, spa_name(spa),
 494      -            sizeof (ca->zhca_spaname));
 495      -        ca->zhca_spa_load_guid = spa_load_guid(spa);
 496      -        ca->zhca_dsobj = ds->ds_object;
 497      -        (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
 498      -        VERIFY0(zfs_onexit_add_cb(minor,
 499      -            dsl_dataset_user_release_onexit, ca, NULL));
      629 +        ASSERT(dp != NULL);
      630 +        (void) dsl_dataset_user_release_impl(holds, NULL, dp);
 500  631  }
 501  632  
 502  633  int
 503  634  dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
 504  635  {
 505  636          dsl_pool_t *dp;
 506  637          dsl_dataset_t *ds;
 507  638          int err;
 508  639  
 509  640          err = dsl_pool_hold(dsname, FTAG, &dp);
↓ open down ↓ 27 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX