Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/dsl_userhold.c
          +++ new/usr/src/uts/common/fs/zfs/dsl_userhold.c
↓ open down ↓ 28 lines elided ↑ open up ↑
  29   29  #include <sys/dsl_destroy.h>
  30   30  #include <sys/dsl_synctask.h>
  31   31  #include <sys/dmu_tx.h>
  32   32  #include <sys/zfs_onexit.h>
  33   33  #include <sys/dsl_pool.h>
  34   34  #include <sys/dsl_dir.h>
  35   35  #include <sys/zfs_ioctl.h>
  36   36  #include <sys/zap.h>
  37   37  
  38   38  typedef struct dsl_dataset_user_hold_arg {
       39 +        spa_t *dduha_spa;
  39   40          nvlist_t *dduha_holds;
       41 +        nvlist_t *dduha_chkholds;
       42 +        nvlist_t *dduha_tmpholds;
  40   43          nvlist_t *dduha_errlist;
  41   44          minor_t dduha_minor;
  42   45  } dsl_dataset_user_hold_arg_t;
  43   46  
  44   47  /*
  45   48   * If you add new checks here, you may need to add additional checks to the
  46   49   * "temporary" case in snapshot_check() in dmu_objset.c.
  47   50   */
  48   51  int
  49   52  dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
  50   53      boolean_t temphold, dmu_tx_t *tx)
  51   54  {
  52   55          dsl_pool_t *dp = dmu_tx_pool(tx);
  53   56          objset_t *mos = dp->dp_meta_objset;
  54   57          int error = 0;
  55   58  
       59 +        ASSERT(RRW_READ_HELD(&dp->dp_config_rwlock));
       60 +
  56   61          if (strlen(htag) > MAXNAMELEN)
  57   62                  return (E2BIG);
  58   63          /* Tempholds have a more restricted length */
  59   64          if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
  60   65                  return (E2BIG);
  61   66  
  62   67          /* tags must be unique (if ds already exists) */
  63      -        if (ds != NULL) {
  64      -                mutex_enter(&ds->ds_lock);
  65      -                if (ds->ds_phys->ds_userrefs_obj != 0) {
  66      -                        uint64_t value;
  67      -                        error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
  68      -                            htag, 8, 1, &value);
  69      -                        if (error == 0)
  70      -                                error = SET_ERROR(EEXIST);
  71      -                        else if (error == ENOENT)
  72      -                                error = 0;
  73      -                }
  74      -                mutex_exit(&ds->ds_lock);
       68 +        if (ds != NULL && ds->ds_phys->ds_userrefs_obj != 0) {
       69 +                uint64_t value;
       70 +
       71 +                error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
       72 +                    htag, 8, 1, &value);
       73 +                if (error == 0)
       74 +                        error = SET_ERROR(EEXIST);
       75 +                else if (error == ENOENT)
       76 +                        error = 0;
  75   77          }
  76   78  
  77   79          return (error);
  78   80  }
  79   81  
  80   82  static int
  81   83  dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
  82   84  {
  83   85          dsl_dataset_user_hold_arg_t *dduha = arg;
  84   86          dsl_pool_t *dp = dmu_tx_pool(tx);
  85   87          nvpair_t *pair;
  86      -        int rv = 0;
  87   88  
  88   89          if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
  89   90                  return (SET_ERROR(ENOTSUP));
  90   91  
       92 +        if (!dmu_tx_is_syncing(tx))
       93 +                return (0);
       94 +
  91   95          for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
  92   96              pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
  93      -                int error = 0;
  94   97                  dsl_dataset_t *ds;
  95      -                char *htag;
       98 +                int error = 0;
       99 +                char *htag, *name;
  96  100  
  97  101                  /* must be a snapshot */
  98      -                if (strchr(nvpair_name(pair), '@') == NULL)
      102 +                name = nvpair_name(pair);
      103 +                if (strchr(name, '@') == NULL)
  99  104                          error = SET_ERROR(EINVAL);
 100  105  
 101  106                  if (error == 0)
 102  107                          error = nvpair_value_string(pair, &htag);
 103      -                if (error == 0) {
 104      -                        error = dsl_dataset_hold(dp,
 105      -                            nvpair_name(pair), FTAG, &ds);
 106      -                }
      108 +
      109 +                if (error == 0)
      110 +                        error = dsl_dataset_hold(dp, name, FTAG, &ds);
      111 +
 107  112                  if (error == 0) {
 108  113                          error = dsl_dataset_user_hold_check_one(ds, htag,
 109  114                              dduha->dduha_minor != 0, tx);
 110  115                          dsl_dataset_rele(ds, FTAG);
 111  116                  }
 112  117  
 113      -                if (error != 0) {
 114      -                        rv = error;
 115      -                        fnvlist_add_int32(dduha->dduha_errlist,
 116      -                            nvpair_name(pair), error);
      118 +                if (error == 0) {
      119 +                        fnvlist_add_string(dduha->dduha_chkholds, name, htag);
      120 +                } else {
      121 +                        /*
      122 +                         * We register ENOENT errors so they can be correctly
      123 +                         * reported if needed, such as when all holds fail.
      124 +                         */
      125 +                        fnvlist_add_int32(dduha->dduha_errlist, name, error);
      126 +                        if (error != ENOENT)
      127 +                                return (error);
 117  128                  }
 118  129          }
 119      -        return (rv);
      130 +
      131 +        /* Return ENOENT if no holds would be created. */
      132 +        if (nvlist_next_nvpair(dduha->dduha_chkholds, NULL) == NULL)
      133 +                return (ENOENT);
      134 +
      135 +        return (0);
 120  136  }
 121  137  
 122      -void
 123      -dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
 124      -    minor_t minor, uint64_t now, dmu_tx_t *tx)
      138 +
      139 +static void
      140 +dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds,
      141 +    const char *htag, minor_t minor, uint64_t now, dmu_tx_t *tx)
 125  142  {
 126  143          dsl_pool_t *dp = ds->ds_dir->dd_pool;
 127  144          objset_t *mos = dp->dp_meta_objset;
 128  145          uint64_t zapobj;
 129  146  
 130      -        mutex_enter(&ds->ds_lock);
      147 +        ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
      148 +
 131  149          if (ds->ds_phys->ds_userrefs_obj == 0) {
 132  150                  /*
 133  151                   * This is the first user hold for this dataset.  Create
 134  152                   * the userrefs zap object.
 135  153                   */
 136  154                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
 137  155                  zapobj = ds->ds_phys->ds_userrefs_obj =
 138  156                      zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
 139  157          } else {
 140  158                  zapobj = ds->ds_phys->ds_userrefs_obj;
 141  159          }
 142  160          ds->ds_userrefs++;
 143      -        mutex_exit(&ds->ds_lock);
 144  161  
 145  162          VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 146  163  
 147  164          if (minor != 0) {
      165 +                char name[MAXNAMELEN];
      166 +                nvlist_t *tags;
      167 +
 148  168                  VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
 149  169                      htag, now, tx));
 150      -                dsl_register_onexit_hold_cleanup(ds, htag, minor);
      170 +                (void) snprintf(name, sizeof(name), "%llx",
      171 +                    (u_longlong_t)ds->ds_object);
      172 +
      173 +                if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) {
      174 +                        tags = fnvlist_alloc();
      175 +                        fnvlist_add_boolean(tags, htag);
      176 +                        fnvlist_add_nvlist(tmpholds, name, tags);
      177 +                        fnvlist_free(tags);
      178 +                } else {
      179 +                        fnvlist_add_boolean(tags, htag);
      180 +                }
 151  181          }
 152  182  
 153  183          spa_history_log_internal_ds(ds, "hold", tx,
 154  184              "tag=%s temp=%d refs=%llu",
 155  185              htag, minor != 0, ds->ds_userrefs);
 156  186  }
 157  187  
      188 +typedef struct zfs_hold_cleanup_arg {
      189 +        char zhca_spaname[MAXNAMELEN];
      190 +        uint64_t zhca_spa_load_guid;
      191 +        nvlist_t *zhca_holds;
      192 +} zfs_hold_cleanup_arg_t;
      193 +
      194 +static void
      195 +dsl_dataset_user_release_onexit(void *arg)
      196 +{
      197 +        zfs_hold_cleanup_arg_t *ca = (zfs_hold_cleanup_arg_t *)arg;
      198 +        spa_t *spa;
      199 +        int error;
      200 +
      201 +        error = spa_open(ca->zhca_spaname, &spa, FTAG);
      202 +        if (error != 0) {
      203 +                zfs_dbgmsg("couldn't release holds on pool=%s "
      204 +                    "because pool is no longer loaded",
      205 +                    ca->zhca_spaname);
      206 +                return;
      207 +        }
      208 +        if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
      209 +                zfs_dbgmsg("couldn't release holds on pool=%s "
      210 +                    "because pool is no longer loaded (guid doesn't match)",
      211 +                    ca->zhca_spaname);
      212 +                spa_close(spa, FTAG);
      213 +                return;
      214 +        }
      215 +
      216 +        (void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds);
      217 +        fnvlist_free(ca->zhca_holds);
      218 +        kmem_free(ca, sizeof(zfs_hold_cleanup_arg_t));
      219 +        spa_close(spa, FTAG);
      220 +}
      221 +
      222 +static void
      223 +dsl_register_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor)
      224 +{
      225 +        zfs_hold_cleanup_arg_t *ca;
      226 +
      227 +        if (minor == 0 || nvlist_next_nvpair(holds, NULL) == NULL) {
      228 +                fnvlist_free(holds);
      229 +                return;
      230 +        }
      231 +
      232 +        ASSERT(spa != NULL);
      233 +        ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
      234 +
      235 +        (void) strlcpy(ca->zhca_spaname, spa_name(spa),
      236 +            sizeof (ca->zhca_spaname));
      237 +        ca->zhca_spa_load_guid = spa_load_guid(spa);
      238 +        ca->zhca_holds = holds;
      239 +        VERIFY0(zfs_onexit_add_cb(minor,
      240 +            dsl_dataset_user_release_onexit, ca, NULL));
      241 +}
      242 +
      243 +void
      244 +dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
      245 +    minor_t minor, uint64_t now, dmu_tx_t *tx)
      246 +{
      247 +        nvlist_t *tmpholds;
      248 +
      249 +        tmpholds = fnvlist_alloc();
      250 +
      251 +        dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, htag, minor, now, tx);
      252 +        dsl_register_onexit_hold_cleanup(dsl_dataset_get_spa(ds), tmpholds,
      253 +            minor);
      254 +}
      255 +
 158  256  static void
 159  257  dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
 160  258  {
 161  259          dsl_dataset_user_hold_arg_t *dduha = arg;
 162  260          dsl_pool_t *dp = dmu_tx_pool(tx);
 163  261          nvpair_t *pair;
 164  262          uint64_t now = gethrestime_sec();
 165  263  
 166      -        for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
 167      -            pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
      264 +        for (pair = nvlist_next_nvpair(dduha->dduha_chkholds, NULL);
      265 +            pair != NULL;
      266 +            pair = nvlist_next_nvpair(dduha->dduha_chkholds, pair)) {
 168  267                  dsl_dataset_t *ds;
      268 +
 169  269                  VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 170      -                dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
 171      -                    dduha->dduha_minor, now, tx);
      270 +                dsl_dataset_user_hold_sync_one_impl(dduha->dduha_tmpholds, ds,
      271 +                    fnvpair_value_string(pair), dduha->dduha_minor, now, tx);
 172  272                  dsl_dataset_rele(ds, FTAG);
 173  273          }
      274 +        dduha->dduha_spa = dp->dp_spa;
 174  275  }
 175  276  
 176  277  /*
      278 + * The full semantics of this function are described in the comment above
      279 + * lzc_hold().
      280 + *
      281 + * To summarize:
 177  282   * holds is nvl of snapname -> holdname
 178  283   * errlist will be filled in with snapname -> error
 179      - * if cleanup_minor is not 0, the holds will be temporary, cleaned up
 180      - * when the process exits.
 181  284   *
 182      - * if any fails, all will fail.
      285 + * The snaphosts must all be in the same pool.
      286 + *
      287 + * Holds for snapshots that don't exist will be skipped.
      288 + *
      289 + * If none of the snapshots for requested holds exist then ENOENT will be
      290 + * returned.
      291 + *
      292 + * If cleanup_minor is not 0, the holds will be temporary, which will be cleaned
      293 + * up when the process exits.
      294 + *
      295 + * On success all the holds, for snapshots that existed, will be created and 0
      296 + * will be returned.
      297 + *
      298 + * On failure no holds will be created, the errlist will be filled in,
      299 + * and an errno will returned.
      300 + *
      301 + * In all cases the errlist will contain entries for holds where the snapshot
      302 + * didn't exist.
 183  303   */
 184  304  int
 185  305  dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
 186  306  {
 187  307          dsl_dataset_user_hold_arg_t dduha;
 188  308          nvpair_t *pair;
      309 +        int ret;
 189  310  
 190  311          pair = nvlist_next_nvpair(holds, NULL);
 191  312          if (pair == NULL)
 192  313                  return (0);
 193  314  
      315 +        dduha.dduha_spa = NULL;
 194  316          dduha.dduha_holds = holds;
      317 +        dduha.dduha_chkholds = fnvlist_alloc();
      318 +        dduha.dduha_tmpholds = fnvlist_alloc();
 195  319          dduha.dduha_errlist = errlist;
 196  320          dduha.dduha_minor = cleanup_minor;
 197  321  
 198      -        return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
 199      -            dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));
      322 +        ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
      323 +            dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds));
      324 +
      325 +        /* dsl_register_onexit_hold_cleanup() always frees the passed holds. */
      326 +        dsl_register_onexit_hold_cleanup(dduha.dduha_spa, dduha.dduha_tmpholds,
      327 +            cleanup_minor);
      328 +        fnvlist_free(dduha.dduha_chkholds);
      329 +
      330 +        return (ret);
 200  331  }
 201  332  
      333 +typedef int (dsl_holdfunc_t)(dsl_pool_t *dp, const char *name, void *tag,
      334 +    dsl_dataset_t **dsp);
      335 +
 202  336  typedef struct dsl_dataset_user_release_arg {
      337 +        dsl_holdfunc_t *ddura_holdfunc;
 203  338          nvlist_t *ddura_holds;
 204  339          nvlist_t *ddura_todelete;
 205  340          nvlist_t *ddura_errlist;
      341 +        nvlist_t *ddura_chkholds;
 206  342  } dsl_dataset_user_release_arg_t;
 207  343  
      344 +/* Place a dataset hold on the snapshot identified by passed dsobj string */
      345 +static int
      346 +dsl_dataset_hold_obj_string(dsl_pool_t *dp, const char *dsobj, void *tag,
      347 +    dsl_dataset_t **dsp)
      348 +{
      349 +        return dsl_dataset_hold_obj(dp, strtonum(dsobj, NULL), tag, dsp);
      350 +}
      351 +
 208  352  static int
 209      -dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
 210      -    nvlist_t *holds, boolean_t *todelete)
      353 +dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura,
      354 +    dsl_dataset_t *ds, nvlist_t *holds, const char *name)
 211  355  {
 212  356          uint64_t zapobj;
 213  357          nvpair_t *pair;
      358 +        nvlist_t *holds_found;
 214  359          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 215      -        int error;
 216      -        int numholds = 0;
 217      -
 218      -        *todelete = B_FALSE;
      360 +        int ret, numholds;
 219  361  
 220  362          if (!dsl_dataset_is_snapshot(ds))
 221  363                  return (SET_ERROR(EINVAL));
 222  364  
 223  365          zapobj = ds->ds_phys->ds_userrefs_obj;
 224  366          if (zapobj == 0)
 225  367                  return (SET_ERROR(ESRCH));
 226  368  
      369 +        ret = 0;
      370 +        numholds = 0;
      371 +        holds_found = fnvlist_alloc();
      372 +
 227  373          for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 228  374              pair = nvlist_next_nvpair(holds, pair)) {
 229      -                /* Make sure the hold exists */
 230  375                  uint64_t tmp;
 231      -                error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
      376 +                int error;
      377 +                const char *name;
      378 +
      379 +                name = nvpair_name(pair);
      380 +                error = zap_lookup(mos, zapobj, name, 8, 1, &tmp);
      381 +
      382 +                /* Non-existent holds aren't always an error. */
 232  383                  if (error == ENOENT)
 233      -                        error = SET_ERROR(ESRCH);
 234      -                if (error != 0)
      384 +                        continue;
      385 +
      386 +                if (error != 0) {
      387 +                        fnvlist_free(holds_found);
 235  388                          return (error);
      389 +                }
      390 +
      391 +                fnvlist_add_boolean(holds_found, name);
 236  392                  numholds++;
 237  393          }
 238  394  
 239  395          if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
 240  396              ds->ds_userrefs == numholds) {
 241  397                  /* we need to destroy the snapshot as well */
 242      -
 243      -                if (dsl_dataset_long_held(ds))
      398 +                if (dsl_dataset_long_held(ds)) {
      399 +                        fnvlist_free(holds_found);
 244  400                          return (SET_ERROR(EBUSY));
 245      -                *todelete = B_TRUE;
      401 +                }
      402 +                fnvlist_add_boolean(ddura->ddura_todelete, name);
 246  403          }
 247      -        return (0);
      404 +
      405 +        if (numholds == 0)
      406 +                ret = ENOENT;
      407 +        else
      408 +                fnvlist_add_nvlist(ddura->ddura_chkholds, name, holds_found);
      409 +        fnvlist_free(holds_found);
      410 +
      411 +        return (ret);
 248  412  }
 249  413  
 250  414  static int
 251  415  dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
 252  416  {
 253      -        dsl_dataset_user_release_arg_t *ddura = arg;
 254      -        dsl_pool_t *dp = dmu_tx_pool(tx);
      417 +        dsl_dataset_user_release_arg_t *ddura;
      418 +        dsl_holdfunc_t *holdfunc;
      419 +        dsl_pool_t *dp;
 255  420          nvpair_t *pair;
 256      -        int rv = 0;
 257  421  
 258  422          if (!dmu_tx_is_syncing(tx))
 259  423                  return (0);
 260  424  
      425 +        ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
      426 +
      427 +        dp = dmu_tx_pool(tx);
      428 +        ddura = (dsl_dataset_user_release_arg_t *)arg;
      429 +        holdfunc = ddura->ddura_holdfunc;
      430 +
 261  431          for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 262  432              pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
 263      -                const char *name = nvpair_name(pair);
      433 +                const char *name;
 264  434                  int error;
 265  435                  dsl_dataset_t *ds;
 266  436                  nvlist_t *holds;
 267  437  
      438 +                name = nvpair_name(pair);
 268  439                  error = nvpair_value_nvlist(pair, &holds);
 269  440                  if (error != 0)
 270      -                        return (SET_ERROR(EINVAL));
 271      -
 272      -                error = dsl_dataset_hold(dp, name, FTAG, &ds);
      441 +                        error = (SET_ERROR(EINVAL));
      442 +                if (error == 0)
      443 +                        error = holdfunc(dp, name, FTAG, &ds);
 273  444                  if (error == 0) {
 274      -                        boolean_t deleteme;
 275      -                        error = dsl_dataset_user_release_check_one(ds,
 276      -                            holds, &deleteme);
 277      -                        if (error == 0 && deleteme) {
 278      -                                fnvlist_add_boolean(ddura->ddura_todelete,
 279      -                                    name);
 280      -                        }
      445 +                        error = dsl_dataset_user_release_check_one(ddura, ds,
      446 +                            holds, name);
 281  447                          dsl_dataset_rele(ds, FTAG);
 282  448                  }
 283  449                  if (error != 0) {
 284  450                          if (ddura->ddura_errlist != NULL) {
 285      -                                fnvlist_add_int32(ddura->ddura_errlist,
 286      -                                    name, error);
      451 +                                fnvlist_add_int32(ddura->ddura_errlist, name,
      452 +                                    error);
 287  453                          }
 288      -                        rv = error;
      454 +                        /* Non-existent holds aren't always an error. */
      455 +                        if (error != ENOENT)
      456 +                                return (error);
 289  457                  }
 290  458          }
 291      -        return (rv);
      459 +
      460 +        /*
      461 +         * Return ENOENT if none of the holds existed avoiding the overhead
      462 +         * of a sync.
      463 +         */
      464 +        if (nvlist_next_nvpair(ddura->ddura_chkholds, NULL) == NULL)
      465 +                return (ENOENT);
      466 +
      467 +        return (0);
 292  468  }
 293  469  
 294  470  static void
 295      -dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
 296      -    dmu_tx_t *tx)
      471 +dsl_dataset_user_release_sync_one(dsl_dataset_user_release_arg_t *ddura,
      472 +    dsl_dataset_t *ds, nvlist_t *holds, dmu_tx_t *tx)
 297  473  {
 298  474          dsl_pool_t *dp = ds->ds_dir->dd_pool;
 299  475          objset_t *mos = dp->dp_meta_objset;
 300      -        uint64_t zapobj;
 301      -        int error;
 302  476          nvpair_t *pair;
 303  477  
 304  478          for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 305  479              pair = nvlist_next_nvpair(holds, pair)) {
 306      -                ds->ds_userrefs--;
 307      -                error = dsl_pool_user_release(dp, ds->ds_object,
 308      -                    nvpair_name(pair), tx);
      480 +                uint64_t zapobj;
      481 +                int error;
      482 +                const char *name;
      483 +
      484 +                name = nvpair_name(pair);
      485 +
      486 +                /* Remove temporary hold if one exists. */
      487 +                error = dsl_pool_user_release(dp, ds->ds_object, name, tx);
 309  488                  VERIFY(error == 0 || error == ENOENT);
      489 +
      490 +                /* Remove user hold if one exists. */
 310  491                  zapobj = ds->ds_phys->ds_userrefs_obj;
 311      -                VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));
      492 +                error = zap_remove(mos, zapobj, name, tx);
      493 +                if (error == ENOENT)
      494 +                        continue;
      495 +                VERIFY0(error);
      496 +
      497 +                /* Only if we removed a hold do we decrement ds_userrefs. */
      498 +                ds->ds_userrefs--;
 312  499  
 313  500                  spa_history_log_internal_ds(ds, "release", tx,
 314  501                      "tag=%s refs=%lld", nvpair_name(pair),
 315  502                      (longlong_t)ds->ds_userrefs);
 316  503          }
 317  504  }
 318  505  
 319  506  static void
 320  507  dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
 321  508  {
 322  509          dsl_dataset_user_release_arg_t *ddura = arg;
      510 +        dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc;
 323  511          dsl_pool_t *dp = dmu_tx_pool(tx);
 324  512          nvpair_t *pair;
 325  513  
 326      -        for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 327      -            pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
      514 +        ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
      515 +
      516 +        for (pair = nvlist_next_nvpair(ddura->ddura_chkholds, NULL);
      517 +            pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_chkholds,
      518 +            pair)) {
 328  519                  dsl_dataset_t *ds;
      520 +                const char *name;
 329  521  
 330      -                VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 331      -                dsl_dataset_user_release_sync_one(ds,
      522 +                name = nvpair_name(pair);
      523 +                VERIFY0(holdfunc(dp, name, FTAG, &ds));
      524 +
      525 +                dsl_dataset_user_release_sync_one(ddura, ds,
 332  526                      fnvpair_value_nvlist(pair), tx);
 333      -                if (nvlist_exists(ddura->ddura_todelete,
 334      -                    nvpair_name(pair))) {
      527 +                if (nvlist_exists(ddura->ddura_todelete, name)) {
 335  528                          ASSERT(ds->ds_userrefs == 0 &&
 336  529                              ds->ds_phys->ds_num_children == 1 &&
 337  530                              DS_IS_DEFER_DESTROY(ds));
 338  531                          dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 339  532                  }
 340  533                  dsl_dataset_rele(ds, FTAG);
 341  534          }
 342  535  }
 343  536  
 344  537  /*
      538 + * The full semantics of this function are described in the comment above
      539 + * lzc_release().
      540 + *
      541 + * To summarize:
      542 + * Releases holds specified in the nvl holds.
      543 + *
 345  544   * holds is nvl of snapname -> { holdname, ... }
 346  545   * errlist will be filled in with snapname -> error
      546 + * 
      547 + * If tmpdp is not NULL the names for holds should be the dsobj's of snapshots,
      548 + * otherwise they should be the names of shapshots.
      549 + *
      550 + * As a release may cause snapshots to be destroyed this trys to ensure they
      551 + * aren't mounted.
      552 + *
      553 + * The release of non-existent holds are skipped.
 347  554   *
 348      - * if any fails, all will fail.
      555 + * At least one hold must have been released for the this function to succeed
      556 + * and return 0.
 349  557   */
 350      -int
 351      -dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
      558 +static int
      559 +dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
      560 +    dsl_pool_t *tmpdp)
 352  561  {
 353  562          dsl_dataset_user_release_arg_t ddura;
 354  563          nvpair_t *pair;
      564 +        char *pool;
 355  565          int error;
 356  566  
 357  567          pair = nvlist_next_nvpair(holds, NULL);
 358  568          if (pair == NULL)
 359  569                  return (0);
 360  570  
      571 +#ifdef _KERNEL
      572 +        /*
      573 +         * The release may cause snapshots to be destroyed; make sure they
      574 +         * are not mounted.
      575 +         */
      576 +        if (tmpdp != NULL) {
      577 +                /* Temporary holds are specified by dsobj string. */
      578 +                ddura.ddura_holdfunc = dsl_dataset_hold_obj_string;
      579 +                pool = spa_name(tmpdp->dp_spa);
      580 +
      581 +                dsl_pool_config_enter(tmpdp, FTAG);
      582 +                for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
      583 +                    pair = nvlist_next_nvpair(holds, pair)) {
      584 +                        dsl_dataset_t *ds;
      585 +
      586 +                        error = dsl_dataset_hold_obj_string(tmpdp,
      587 +                            nvpair_name(pair), FTAG, &ds);
      588 +                        if (error == 0) {
      589 +                                char name[MAXNAMELEN];
      590 +                                dsl_dataset_name(ds, name);
      591 +                                dsl_dataset_rele(ds, FTAG);
      592 +                                zfs_unmount_snap(name);
      593 +                        }
      594 +                }
      595 +                dsl_pool_config_exit(tmpdp, FTAG);
      596 +        } else {
      597 +                /* Non-temporary holds are specified by name. */
      598 +                ddura.ddura_holdfunc = dsl_dataset_hold;
      599 +                pool = nvpair_name(pair);
      600 +
      601 +                for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
      602 +                    pair = nvlist_next_nvpair(holds, pair)) {
      603 +                        zfs_unmount_snap(nvpair_name(pair));
      604 +                }
      605 +        }
      606 +#endif
      607 +
 361  608          ddura.ddura_holds = holds;
 362  609          ddura.ddura_errlist = errlist;
 363  610          ddura.ddura_todelete = fnvlist_alloc();
      611 +        ddura.ddura_chkholds = fnvlist_alloc();
 364  612  
 365      -        error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
 366      -            dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
      613 +        error = dsl_sync_task(pool, dsl_dataset_user_release_check,
      614 +            dsl_dataset_user_release_sync, &ddura,
      615 +            fnvlist_num_pairs(holds));
 367  616          fnvlist_free(ddura.ddura_todelete);
 368      -        return (error);
 369      -}
 370      -
 371      -typedef struct dsl_dataset_user_release_tmp_arg {
 372      -        uint64_t ddurta_dsobj;
 373      -        nvlist_t *ddurta_holds;
 374      -        boolean_t ddurta_deleteme;
 375      -} dsl_dataset_user_release_tmp_arg_t;
 376      -
 377      -static int
 378      -dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
 379      -{
 380      -        dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 381      -        dsl_pool_t *dp = dmu_tx_pool(tx);
 382      -        dsl_dataset_t *ds;
 383      -        int error;
 384      -
 385      -        if (!dmu_tx_is_syncing(tx))
 386      -                return (0);
      617 +        fnvlist_free(ddura.ddura_chkholds);
 387  618  
 388      -        error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
 389      -        if (error)
 390      -                return (error);
 391      -
 392      -        error = dsl_dataset_user_release_check_one(ds,
 393      -            ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
 394      -        dsl_dataset_rele(ds, FTAG);
 395  619          return (error);
 396  620  }
 397  621  
 398      -static void
 399      -dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
 400      -{
 401      -        dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 402      -        dsl_pool_t *dp = dmu_tx_pool(tx);
 403      -        dsl_dataset_t *ds;
 404      -
 405      -        VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
 406      -        dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
 407      -        if (ddurta->ddurta_deleteme) {
 408      -                ASSERT(ds->ds_userrefs == 0 &&
 409      -                    ds->ds_phys->ds_num_children == 1 &&
 410      -                    DS_IS_DEFER_DESTROY(ds));
 411      -                dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 412      -        }
 413      -        dsl_dataset_rele(ds, FTAG);
 414      -}
 415      -
 416  622  /*
 417      - * Called at spa_load time to release a stale temporary user hold.
 418      - * Also called by the onexit code.
      623 + * holds is nvl of snapname -> { holdname, ... }
      624 + * errlist will be filled in with snapname -> error
 419  625   */
 420      -void
 421      -dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
 422      -{
 423      -        dsl_dataset_user_release_tmp_arg_t ddurta;
 424      -        dsl_dataset_t *ds;
 425      -        int error;
 426      -
 427      -#ifdef _KERNEL
 428      -        /* Make sure it is not mounted. */
 429      -        dsl_pool_config_enter(dp, FTAG);
 430      -        error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
 431      -        if (error == 0) {
 432      -                char name[MAXNAMELEN];
 433      -                dsl_dataset_name(ds, name);
 434      -                dsl_dataset_rele(ds, FTAG);
 435      -                dsl_pool_config_exit(dp, FTAG);
 436      -                zfs_unmount_snap(name);
 437      -        } else {
 438      -                dsl_pool_config_exit(dp, FTAG);
 439      -        }
 440      -#endif
 441      -
 442      -        ddurta.ddurta_dsobj = dsobj;
 443      -        ddurta.ddurta_holds = fnvlist_alloc();
 444      -        fnvlist_add_boolean(ddurta.ddurta_holds, htag);
 445      -
 446      -        (void) dsl_sync_task(spa_name(dp->dp_spa),
 447      -            dsl_dataset_user_release_tmp_check,
 448      -            dsl_dataset_user_release_tmp_sync, &ddurta, 1);
 449      -        fnvlist_free(ddurta.ddurta_holds);
 450      -}
 451      -
 452      -typedef struct zfs_hold_cleanup_arg {
 453      -        char zhca_spaname[MAXNAMELEN];
 454      -        uint64_t zhca_spa_load_guid;
 455      -        uint64_t zhca_dsobj;
 456      -        char zhca_htag[MAXNAMELEN];
 457      -} zfs_hold_cleanup_arg_t;
 458      -
 459      -static void
 460      -dsl_dataset_user_release_onexit(void *arg)
      626 +int
      627 +dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
 461  628  {
 462      -        zfs_hold_cleanup_arg_t *ca = arg;
 463      -        spa_t *spa;
 464      -        int error;
 465      -
 466      -        error = spa_open(ca->zhca_spaname, &spa, FTAG);
 467      -        if (error != 0) {
 468      -                zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 469      -                    "because pool is no longer loaded",
 470      -                    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 471      -                return;
 472      -        }
 473      -        if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
 474      -                zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 475      -                    "because pool is no longer loaded (guid doesn't match)",
 476      -                    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 477      -                spa_close(spa, FTAG);
 478      -                return;
 479      -        }
 480      -
 481      -        dsl_dataset_user_release_tmp(spa_get_dsl(spa),
 482      -            ca->zhca_dsobj, ca->zhca_htag);
 483      -        kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
 484      -        spa_close(spa, FTAG);
      629 +        return dsl_dataset_user_release_impl(holds, errlist, NULL);
 485  630  }
 486  631  
      632 +/*
      633 + * holds is nvl of snapdsobj -> { holdname, ... }
      634 + */
 487  635  void
 488      -dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
 489      -    minor_t minor)
      636 +dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds)
 490  637  {
 491      -        zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
 492      -        spa_t *spa = dsl_dataset_get_spa(ds);
 493      -        (void) strlcpy(ca->zhca_spaname, spa_name(spa),
 494      -            sizeof (ca->zhca_spaname));
 495      -        ca->zhca_spa_load_guid = spa_load_guid(spa);
 496      -        ca->zhca_dsobj = ds->ds_object;
 497      -        (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
 498      -        VERIFY0(zfs_onexit_add_cb(minor,
 499      -            dsl_dataset_user_release_onexit, ca, NULL));
      638 +        ASSERT(dp != NULL);
      639 +        (void) dsl_dataset_user_release_impl(holds, NULL, dp);
 500  640  }
 501  641  
 502  642  int
 503  643  dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
 504  644  {
 505  645          dsl_pool_t *dp;
 506  646          dsl_dataset_t *ds;
 507  647          int err;
 508  648  
 509  649          err = dsl_pool_hold(dsname, FTAG, &dp);
↓ open down ↓ 27 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX