Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/dsl_userhold.c
          +++ new/usr/src/uts/common/fs/zfs/dsl_userhold.c
↓ open down ↓ 28 lines elided ↑ open up ↑
  29   29  #include <sys/dsl_destroy.h>
  30   30  #include <sys/dsl_synctask.h>
  31   31  #include <sys/dmu_tx.h>
  32   32  #include <sys/zfs_onexit.h>
  33   33  #include <sys/dsl_pool.h>
  34   34  #include <sys/dsl_dir.h>
  35   35  #include <sys/zfs_ioctl.h>
  36   36  #include <sys/zap.h>
  37   37  
  38   38  typedef struct dsl_dataset_user_hold_arg {
       39 +        spa_t *dduha_spa;
  39   40          nvlist_t *dduha_holds;
       41 +        nvlist_t *dduha_tmpholds;
  40   42          nvlist_t *dduha_errlist;
  41   43          minor_t dduha_minor;
       44 +        boolean_t dduha_holds_created;
  42   45  } dsl_dataset_user_hold_arg_t;
  43   46  
  44   47  /*
  45   48   * If you add new checks here, you may need to add additional checks to the
  46   49   * "temporary" case in snapshot_check() in dmu_objset.c.
  47   50   */
  48   51  int
  49   52  dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
  50   53      boolean_t temphold, dmu_tx_t *tx)
  51   54  {
↓ open down ↓ 25 lines elided ↑ open up ↑
  77   80          return (error);
  78   81  }
  79   82  
  80   83  static int
  81   84  dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
  82   85  {
  83   86          dsl_dataset_user_hold_arg_t *dduha = arg;
  84   87          dsl_pool_t *dp = dmu_tx_pool(tx);
  85   88          nvpair_t *pair;
  86   89          int rv = 0;
       90 +        boolean_t holds_possible;
  87   91  
  88   92          if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
  89   93                  return (SET_ERROR(ENOTSUP));
  90   94  
       95 +        holds_possible = B_FALSE;
       96 +
  91   97          for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
  92   98              pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
  93   99                  int error = 0;
  94  100                  dsl_dataset_t *ds;
  95  101                  char *htag;
  96  102  
  97  103                  /* must be a snapshot */
  98  104                  if (strchr(nvpair_name(pair), '@') == NULL)
  99  105                          error = SET_ERROR(EINVAL);
 100  106  
 101  107                  if (error == 0)
 102  108                          error = nvpair_value_string(pair, &htag);
 103  109                  if (error == 0) {
 104  110                          error = dsl_dataset_hold(dp,
 105  111                              nvpair_name(pair), FTAG, &ds);
      112 +
      113 +                        if (error == ENOENT) {
      114 +                                /*
      115 +                                 * We register ENOENT errors so they can be
      116 +                                 * correctly reported if needed, such as when
      117 +                                 * all holds fail.
      118 +                                 */
      119 +                                if (dduha->dduha_errlist != NULL) {
      120 +                                        fnvlist_add_int32(dduha->dduha_errlist,
      121 +                                            nvpair_name(pair), error);
      122 +                                }
      123 +                                continue;
      124 +                        }
 106  125                  }
 107  126                  if (error == 0) {
 108  127                          error = dsl_dataset_user_hold_check_one(ds, htag,
 109  128                              dduha->dduha_minor != 0, tx);
 110  129                          dsl_dataset_rele(ds, FTAG);
 111  130                  }
 112  131  
 113  132                  if (error != 0) {
      133 +                        if (dduha->dduha_errlist != NULL) {
      134 +                                fnvlist_add_int32(dduha->dduha_errlist,
      135 +                                    nvpair_name(pair), error);
      136 +                        }
 114  137                          rv = error;
 115      -                        fnvlist_add_int32(dduha->dduha_errlist,
 116      -                            nvpair_name(pair), error);
      138 +                } else {
      139 +                        holds_possible = B_TRUE;
 117  140                  }
 118  141          }
      142 +
      143 +        /*
      144 +         * Check that at least one hold will possibly be created,
      145 +         * otherwise fail.
      146 +         */
      147 +        if (rv == 0 && !holds_possible)
      148 +                rv = ENOENT;
      149 +
 119  150          return (rv);
 120  151  }
 121  152  
 122      -void
 123      -dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
 124      -    minor_t minor, uint64_t now, dmu_tx_t *tx)
      153 +
      154 +static void
      155 +dsl_dataset_user_hold_sync_one_impl(dsl_dataset_user_hold_arg_t *dduha,
      156 +    dsl_dataset_t *ds, const char *htag, minor_t minor, uint64_t now,
      157 +    dmu_tx_t *tx)
 125  158  {
 126  159          dsl_pool_t *dp = ds->ds_dir->dd_pool;
 127  160          objset_t *mos = dp->dp_meta_objset;
 128  161          uint64_t zapobj;
 129  162  
 130  163          mutex_enter(&ds->ds_lock);
 131  164          if (ds->ds_phys->ds_userrefs_obj == 0) {
 132  165                  /*
 133  166                   * This is the first user hold for this dataset.  Create
 134  167                   * the userrefs zap object.
↓ open down ↓ 3 lines elided ↑ open up ↑
 138  171                      zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
 139  172          } else {
 140  173                  zapobj = ds->ds_phys->ds_userrefs_obj;
 141  174          }
 142  175          ds->ds_userrefs++;
 143  176          mutex_exit(&ds->ds_lock);
 144  177  
 145  178          VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
 146  179  
 147  180          if (minor != 0) {
      181 +                char name[MAXNAMELEN];
      182 +                nvlist_t *tags;
      183 +
 148  184                  VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
 149  185                      htag, now, tx));
 150      -                dsl_register_onexit_hold_cleanup(ds, htag, minor);
      186 +                (void) snprintf(name, sizeof(name), "%llx",
      187 +                    (u_longlong_t)ds->ds_object);
      188 +
      189 +                if (nvlist_lookup_nvlist(dduha->dduha_tmpholds, name, &tags) != 0) {
      190 +                        tags = fnvlist_alloc();
      191 +                        fnvlist_add_boolean(tags, htag);
      192 +                        fnvlist_add_nvlist(dduha->dduha_tmpholds, name, tags);
      193 +                        fnvlist_free(tags);
      194 +                } else {
      195 +                        fnvlist_add_boolean(tags, htag);
      196 +                }
 151  197          }
 152  198  
 153  199          spa_history_log_internal_ds(ds, "hold", tx,
 154  200              "tag=%s temp=%d refs=%llu",
 155  201              htag, minor != 0, ds->ds_userrefs);
 156  202  }
 157  203  
      204 +typedef struct zfs_hold_cleanup_arg {
      205 +        char zhca_spaname[MAXNAMELEN];
      206 +        uint64_t zhca_spa_load_guid;
      207 +        nvlist_t *zhca_holds;
      208 +} zfs_hold_cleanup_arg_t;
      209 +
      210 +static void
      211 +dsl_dataset_user_release_onexit(void *arg)
      212 +{
      213 +        zfs_hold_cleanup_arg_t *ca = (zfs_hold_cleanup_arg_t *)arg;
      214 +        spa_t *spa;
      215 +        int error;
      216 +
      217 +        error = spa_open(ca->zhca_spaname, &spa, FTAG);
      218 +        if (error != 0) {
      219 +                zfs_dbgmsg("couldn't release holds on pool=%s "
      220 +                    "because pool is no longer loaded",
      221 +                    ca->zhca_spaname);
      222 +                return;
      223 +        }
      224 +        if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
      225 +                zfs_dbgmsg("couldn't release holds on pool=%s "
      226 +                    "because pool is no longer loaded (guid doesn't match)",
      227 +                    ca->zhca_spaname);
      228 +                spa_close(spa, FTAG);
      229 +                return;
      230 +        }
      231 +
      232 +        (void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds);
      233 +        fnvlist_free(ca->zhca_holds);
      234 +        kmem_free(ca, sizeof(zfs_hold_cleanup_arg_t));
      235 +        spa_close(spa, FTAG);
      236 +}
      237 +
      238 +static void
      239 +dsl_register_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor)
      240 +{
      241 +        zfs_hold_cleanup_arg_t *ca;
      242 +
      243 +        if (minor == 0 || nvlist_next_nvpair(holds, NULL) == NULL) {
      244 +                fnvlist_free(holds);
      245 +                return;
      246 +        }
      247 +
      248 +        ASSERT(spa != NULL);
      249 +        ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
      250 +
      251 +        (void) strlcpy(ca->zhca_spaname, spa_name(spa),
      252 +            sizeof (ca->zhca_spaname));
      253 +        ca->zhca_spa_load_guid = spa_load_guid(spa);
      254 +        ca->zhca_holds = holds;
      255 +        VERIFY0(zfs_onexit_add_cb(minor,
      256 +            dsl_dataset_user_release_onexit, ca, NULL));
      257 +}
      258 +
      259 +void
      260 +dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
      261 +    minor_t minor, uint64_t now, dmu_tx_t *tx)
      262 +{
      263 +        dsl_dataset_user_hold_arg_t dduha;
      264 +
      265 +        dduha.dduha_spa = NULL;
      266 +        dduha.dduha_holds = NULL;
      267 +        dduha.dduha_tmpholds = fnvlist_alloc();
      268 +        dduha.dduha_errlist = NULL;
      269 +        dduha.dduha_minor = minor;
      270 +        dduha.dduha_holds_created = B_FALSE;
      271 +
      272 +        dsl_dataset_user_hold_sync_one_impl(&dduha, ds, htag, minor, now, tx);
      273 +        dsl_register_onexit_hold_cleanup(dsl_dataset_get_spa(ds),
      274 +            dduha.dduha_tmpholds, minor);
      275 +}
      276 +
 158  277  static void
 159  278  dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
 160  279  {
 161  280          dsl_dataset_user_hold_arg_t *dduha = arg;
 162  281          dsl_pool_t *dp = dmu_tx_pool(tx);
 163  282          nvpair_t *pair;
 164  283          uint64_t now = gethrestime_sec();
 165  284  
 166  285          for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
 167  286              pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
 168  287                  dsl_dataset_t *ds;
 169      -                VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 170      -                dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
 171      -                    dduha->dduha_minor, now, tx);
 172      -                dsl_dataset_rele(ds, FTAG);
      288 +                char *name;
      289 +                int error;
      290 +
      291 +                name = nvpair_name(pair);
      292 +                error = dsl_dataset_hold(dp, name, FTAG, &ds);
      293 +                if (error == 0) {
      294 +                        dsl_dataset_user_hold_sync_one_impl(dduha, ds,
      295 +                            fnvpair_value_string(pair), dduha->dduha_minor,
      296 +                            now, tx);
      297 +                        dsl_dataset_rele(ds, FTAG);
      298 +                        dduha->dduha_holds_created = B_TRUE;
      299 +                } else if (dduha->dduha_errlist != NULL) {
      300 +                        /*
      301 +                         * We register ENOENT errors so they can be correctly
      302 +                         * reported if needed, such as when all holds fail.
      303 +                         */
      304 +                        fnvlist_add_int32(dduha->dduha_errlist, name, error);
      305 +                }
 173  306          }
      307 +        dduha->dduha_spa = dp->dp_spa;
 174  308  }
 175  309  
 176  310  /*
      311 + * The full semantics of this function are described in the comment above
      312 + * lzc_hold().
      313 + *
      314 + * To summarize:
 177  315   * holds is nvl of snapname -> holdname
 178  316   * errlist will be filled in with snapname -> error
 179      - * if cleanup_minor is not 0, the holds will be temporary, cleaned up
 180      - * when the process exits.
 181  317   *
 182      - * if any fails, all will fail.
      318 + * The snaphosts must all be in the same pool.
      319 + *
      320 + * Holds for snapshots that don't exist will be skipped.
      321 + *
      322 + * If none of the snapshots for requested holds exist then ENOENT will be
      323 + * returned.
      324 + *
      325 + * If cleanup_minor is not 0, the holds will be temporary, which will be cleaned
      326 + * up when the process exits.
      327 + *
      328 + * On success all the holds, for snapshots that existed, will be created and 0
      329 + * will be returned.
      330 + *
      331 + * On failure no holds will be created, the errlist will be filled in,
      332 + * and an errno will returned.
      333 + *
      334 + * In all cases the errlist will contain entries for holds where the snapshot
      335 + * didn't exist.
 183  336   */
 184  337  int
 185  338  dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
 186  339  {
 187  340          dsl_dataset_user_hold_arg_t dduha;
 188  341          nvpair_t *pair;
      342 +        int ret;
 189  343  
 190  344          pair = nvlist_next_nvpair(holds, NULL);
 191  345          if (pair == NULL)
 192  346                  return (0);
 193  347  
      348 +        dduha.dduha_spa = NULL;
 194  349          dduha.dduha_holds = holds;
      350 +        dduha.dduha_tmpholds = fnvlist_alloc();
 195  351          dduha.dduha_errlist = errlist;
 196  352          dduha.dduha_minor = cleanup_minor;
      353 +        dduha.dduha_holds_created = B_FALSE;
      354 +
      355 +        ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
      356 +            dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds));
      357 +        if (ret == 0) {
      358 +                /* Check we created at least one hold. */
      359 +                if (dduha.dduha_holds_created) {
      360 +                        dsl_register_onexit_hold_cleanup(dduha.dduha_spa,
      361 +                            dduha.dduha_tmpholds, cleanup_minor);
      362 +                } else {
      363 +                        fnvlist_free(dduha.dduha_tmpholds);
      364 +                        ret = ENOENT;
      365 +                }
      366 +        } else {
      367 +                fnvlist_free(dduha.dduha_tmpholds);
      368 +        }
 197  369  
 198      -        return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
 199      -            dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));
      370 +        return (ret);
 200  371  }
 201  372  
      373 +typedef int (dsl_holdfunc_t)(dsl_pool_t *dp, const char *name, void *tag,
      374 +    dsl_dataset_t **dsp);
      375 +
 202  376  typedef struct dsl_dataset_user_release_arg {
      377 +        dsl_holdfunc_t *ddura_holdfunc;
 203  378          nvlist_t *ddura_holds;
 204  379          nvlist_t *ddura_todelete;
 205  380          nvlist_t *ddura_errlist;
      381 +        boolean_t ddura_holds_found;
 206  382  } dsl_dataset_user_release_arg_t;
 207  383  
      384 +/* Place a dataset hold on the snapshot identified by passed dsobj string */
      385 +static
      386 +int dsl_dataset_hold_byobj(dsl_pool_t *dp, const char *dsobj, void *tag,
      387 +    dsl_dataset_t **dsp)
      388 +{
      389 +        return dsl_dataset_hold_obj(dp, strtonum(dsobj, NULL), tag, dsp);
      390 +}
      391 +
 208  392  static int
 209      -dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
 210      -    nvlist_t *holds, boolean_t *todelete)
      393 +dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura,
      394 +    dsl_dataset_t *ds, nvlist_t *holds, boolean_t *todelete)
 211  395  {
 212  396          uint64_t zapobj;
 213  397          nvpair_t *pair;
 214  398          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 215  399          int error;
 216  400          int numholds = 0;
      401 +        int ret;
 217  402  
 218  403          *todelete = B_FALSE;
      404 +        ret = 0;
 219  405  
 220  406          if (!dsl_dataset_is_snapshot(ds))
 221  407                  return (SET_ERROR(EINVAL));
 222  408  
 223  409          zapobj = ds->ds_phys->ds_userrefs_obj;
 224  410          if (zapobj == 0)
 225  411                  return (SET_ERROR(ESRCH));
 226  412  
 227  413          for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 228  414              pair = nvlist_next_nvpair(holds, pair)) {
 229      -                /* Make sure the hold exists */
 230  415                  uint64_t tmp;
      416 +
 231  417                  error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
 232      -                if (error == ENOENT)
 233      -                        error = SET_ERROR(ESRCH);
      418 +                /* Non-existent holds aren't always fatal. */
      419 +                if (error == ENOENT) {
      420 +                        ret = error;
      421 +                        continue;
      422 +                }
 234  423                  if (error != 0)
 235  424                          return (error);
 236  425                  numholds++;
 237  426          }
 238  427  
 239  428          if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
 240  429              ds->ds_userrefs == numholds) {
 241  430                  /* we need to destroy the snapshot as well */
 242  431  
 243  432                  if (dsl_dataset_long_held(ds))
 244  433                          return (SET_ERROR(EBUSY));
 245  434                  *todelete = B_TRUE;
 246  435          }
 247      -        return (0);
      436 +
      437 +        if (numholds != 0)
      438 +                ddura->ddura_holds_found = B_TRUE;
      439 +
      440 +        return (ret);
 248  441  }
 249  442  
 250  443  static int
 251  444  dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
 252  445  {
 253  446          dsl_dataset_user_release_arg_t *ddura = arg;
      447 +        dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc;
 254  448          dsl_pool_t *dp = dmu_tx_pool(tx);
 255  449          nvpair_t *pair;
 256  450          int rv = 0;
 257  451  
 258  452          if (!dmu_tx_is_syncing(tx))
 259  453                  return (0);
 260  454  
 261  455          for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 262  456              pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
 263  457                  const char *name = nvpair_name(pair);
 264  458                  int error;
 265  459                  dsl_dataset_t *ds;
 266  460                  nvlist_t *holds;
 267  461  
 268  462                  error = nvpair_value_nvlist(pair, &holds);
 269  463                  if (error != 0)
 270  464                          return (SET_ERROR(EINVAL));
 271  465  
 272      -                error = dsl_dataset_hold(dp, name, FTAG, &ds);
      466 +                error = holdfunc(dp, name, FTAG, &ds);
 273  467                  if (error == 0) {
 274  468                          boolean_t deleteme;
 275      -                        error = dsl_dataset_user_release_check_one(ds,
      469 +                        error = dsl_dataset_user_release_check_one(ddura, ds,
 276  470                              holds, &deleteme);
 277      -                        if (error == 0 && deleteme) {
      471 +                        /*
      472 +                         * Don't check for error == 0 as deleteme is only set
      473 +                         * to B_TRUE if it's correct to do so dispite the error
      474 +                         * e.g. ENOENT.
      475 +                         */
      476 +                        if (deleteme) {
 278  477                                  fnvlist_add_boolean(ddura->ddura_todelete,
 279  478                                      name);
 280  479                          }
 281  480                          dsl_dataset_rele(ds, FTAG);
 282  481                  }
 283  482                  if (error != 0) {
 284  483                          if (ddura->ddura_errlist != NULL) {
 285  484                                  fnvlist_add_int32(ddura->ddura_errlist,
 286  485                                      name, error);
 287  486                          }
 288      -                        rv = error;
      487 +                        /* Non-existent holds aren't always fatal. */
      488 +                        if (error != ENOENT)
      489 +                                rv = error;
 289  490                  }
 290  491          }
      492 +
      493 +        /*
      494 +         * None of the specified holds existed so avoid the overhead of a sync
      495 +         * and return ENOENT.
      496 +         */
      497 +        if (rv == 0 && !ddura->ddura_holds_found)
      498 +                rv = ENOENT;
      499 +
 291  500          return (rv);
 292  501  }
 293  502  
 294  503  static void
 295      -dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
 296      -    dmu_tx_t *tx)
      504 +dsl_dataset_user_release_sync_one(dsl_dataset_user_release_arg_t *ddura,
      505 +    dsl_dataset_t *ds, nvlist_t *holds, dmu_tx_t *tx)
 297  506  {
 298  507          dsl_pool_t *dp = ds->ds_dir->dd_pool;
 299  508          objset_t *mos = dp->dp_meta_objset;
 300      -        uint64_t zapobj;
 301      -        int error;
 302  509          nvpair_t *pair;
 303  510  
 304  511          for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
 305  512              pair = nvlist_next_nvpair(holds, pair)) {
 306      -                ds->ds_userrefs--;
 307      -                error = dsl_pool_user_release(dp, ds->ds_object,
 308      -                    nvpair_name(pair), tx);
      513 +                uint64_t zapobj;
      514 +                int error;
      515 +                char *name;
      516 +
      517 +                name = nvpair_name(pair);
      518 +
      519 +                /* Remove temporary hold if one exists. */
      520 +                error = dsl_pool_user_release(dp, ds->ds_object, name, tx);
 309  521                  VERIFY(error == 0 || error == ENOENT);
      522 +
      523 +                /* Remove user hold if one exists. */
 310  524                  zapobj = ds->ds_phys->ds_userrefs_obj;
 311      -                VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));
      525 +                error = zap_remove(mos, zapobj, name, tx);
      526 +                if (error == ENOENT)
      527 +                        continue;
      528 +                VERIFY0(error);
      529 +
      530 +                /* Only if we removed a hold do we decrement userrefs. */
      531 +                mutex_enter(&ds->ds_lock);
      532 +                ds->ds_userrefs--;
      533 +                mutex_exit(&ds->ds_lock);
      534 +
      535 +                ddura->ddura_holds_found = B_TRUE;
 312  536  
 313  537                  spa_history_log_internal_ds(ds, "release", tx,
 314  538                      "tag=%s refs=%lld", nvpair_name(pair),
 315  539                      (longlong_t)ds->ds_userrefs);
 316  540          }
 317  541  }
 318  542  
 319  543  static void
 320  544  dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
 321  545  {
 322  546          dsl_dataset_user_release_arg_t *ddura = arg;
      547 +        dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc;
 323  548          dsl_pool_t *dp = dmu_tx_pool(tx);
 324  549          nvpair_t *pair;
 325  550  
      551 +        /*
      552 +         * Even though check suggested that at least one of our holds where
      553 +         * found this may have changed. Recalculate ddura_holds_found so that
      554 +         * we can return ENOENT from the caller in the case that no holds
      555 +         * where actually released.
      556 +         */
      557 +        ddura->ddura_holds_found = B_FALSE;
      558 +
 326  559          for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
 327  560              pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
 328  561                  dsl_dataset_t *ds;
      562 +                int error;
 329  563  
 330      -                VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 331      -                dsl_dataset_user_release_sync_one(ds,
      564 +                error = holdfunc(dp, nvpair_name(pair), FTAG, &ds);
      565 +                if (error == ENOENT)
      566 +                        continue;
      567 +                VERIFY0(error);
      568 +
      569 +                dsl_dataset_user_release_sync_one(ddura, ds,
 332  570                      fnvpair_value_nvlist(pair), tx);
 333      -                if (nvlist_exists(ddura->ddura_todelete,
 334      -                    nvpair_name(pair))) {
      571 +                if (nvlist_exists(ddura->ddura_todelete, nvpair_name(pair))) {
 335  572                          ASSERT(ds->ds_userrefs == 0 &&
 336  573                              ds->ds_phys->ds_num_children == 1 &&
 337  574                              DS_IS_DEFER_DESTROY(ds));
 338  575                          dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 339  576                  }
 340  577                  dsl_dataset_rele(ds, FTAG);
 341  578          }
 342  579  }
 343  580  
 344  581  /*
      582 + * The full semantics of this function are described in the comment above
      583 + * lzc_release().
      584 + *
      585 + * To summarize:
      586 + * Releases holds specified in the nvl holds.
      587 + *
 345  588   * holds is nvl of snapname -> { holdname, ... }
 346  589   * errlist will be filled in with snapname -> error
      590 + * 
      591 + * If tmpdp is not NULL the names for holds should be the dbobj's of snapshots,
      592 + * otherwise they should be the names of shapshots.
 347  593   *
 348      - * if any fails, all will fail.
      594 + * As a release may cause snapshots to be destroyed this trys to ensure they
      595 + * aren't mounted.
      596 + *
      597 + * The release of non-existent holds are skipped.
      598 + *
      599 + * At least one hold must have been released for the this function to succeed
      600 + * and return 0.
 349  601   */
 350      -int
 351      -dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
      602 +static int
      603 +dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
      604 +    dsl_pool_t *tmpdp)
 352  605  {
 353  606          dsl_dataset_user_release_arg_t ddura;
 354  607          nvpair_t *pair;
      608 +        char *pool;
 355  609          int error;
 356  610  
 357  611          pair = nvlist_next_nvpair(holds, NULL);
 358  612          if (pair == NULL)
 359  613                  return (0);
 360  614  
      615 +#ifdef _KERNEL
      616 +        /*
      617 +         * The release may cause snapshots to be destroyed; make sure they
      618 +         * are not mounted.
      619 +         */
      620 +        if (tmpdp != NULL) {
      621 +                /* Temporary holds are specified by dbobj. */
      622 +                ddura.ddura_holdfunc = dsl_dataset_hold_byobj;
      623 +                pool = spa_name(tmpdp->dp_spa);
      624 +
      625 +                dsl_pool_config_enter(tmpdp, FTAG);
      626 +                for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
      627 +                    pair = nvlist_next_nvpair(holds, pair)) {
      628 +                        dsl_dataset_t *ds;
      629 +
      630 +                        error = dsl_dataset_hold_byobj(tmpdp, nvpair_name(pair),
      631 +                            FTAG, &ds);
      632 +                        if (error == 0) {
      633 +                                char name[MAXNAMELEN];
      634 +                                dsl_dataset_name(ds, name);
      635 +                                dsl_dataset_rele(ds, FTAG);
      636 +                                zfs_unmount_snap(name);
      637 +                        }
      638 +                }
      639 +                dsl_pool_config_exit(tmpdp, FTAG);
      640 +        } else {
      641 +                /* Non-temporary holds are specified by name. */
      642 +                ddura.ddura_holdfunc = dsl_dataset_hold;
      643 +                pool = nvpair_name(pair);
      644 +
      645 +                for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
      646 +                    pair = nvlist_next_nvpair(holds, pair))
      647 +                        zfs_unmount_snap(nvpair_name(pair));
      648 +        }
      649 +#endif
      650 +
 361  651          ddura.ddura_holds = holds;
 362  652          ddura.ddura_errlist = errlist;
 363  653          ddura.ddura_todelete = fnvlist_alloc();
      654 +        ddura.ddura_holds_found = B_FALSE;
 364  655  
 365      -        error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
 366      -            dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
      656 +        error = dsl_sync_task(pool, dsl_dataset_user_release_check,
      657 +            dsl_dataset_user_release_sync, &ddura,
      658 +            fnvlist_num_pairs(holds));
 367  659          fnvlist_free(ddura.ddura_todelete);
 368      -        return (error);
 369      -}
 370  660  
 371      -typedef struct dsl_dataset_user_release_tmp_arg {
 372      -        uint64_t ddurta_dsobj;
 373      -        nvlist_t *ddurta_holds;
 374      -        boolean_t ddurta_deleteme;
 375      -} dsl_dataset_user_release_tmp_arg_t;
      661 +        /* If at least one hold wasn't removed return ENOENT. */
      662 +        if (error == 0 && !ddura.ddura_holds_found)
      663 +                error = ENOENT;
 376  664  
 377      -static int
 378      -dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
 379      -{
 380      -        dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 381      -        dsl_pool_t *dp = dmu_tx_pool(tx);
 382      -        dsl_dataset_t *ds;
 383      -        int error;
 384      -
 385      -        if (!dmu_tx_is_syncing(tx))
 386      -                return (0);
 387      -
 388      -        error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
 389      -        if (error)
 390      -                return (error);
 391      -
 392      -        error = dsl_dataset_user_release_check_one(ds,
 393      -            ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
 394      -        dsl_dataset_rele(ds, FTAG);
 395  665          return (error);
 396  666  }
 397  667  
 398      -static void
 399      -dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
 400      -{
 401      -        dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
 402      -        dsl_pool_t *dp = dmu_tx_pool(tx);
 403      -        dsl_dataset_t *ds;
 404      -
 405      -        VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
 406      -        dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
 407      -        if (ddurta->ddurta_deleteme) {
 408      -                ASSERT(ds->ds_userrefs == 0 &&
 409      -                    ds->ds_phys->ds_num_children == 1 &&
 410      -                    DS_IS_DEFER_DESTROY(ds));
 411      -                dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
 412      -        }
 413      -        dsl_dataset_rele(ds, FTAG);
 414      -}
 415      -
 416  668  /*
 417      - * Called at spa_load time to release a stale temporary user hold.
 418      - * Also called by the onexit code.
      669 + * holds is nvl of snapname -> { holdname, ... }
      670 + * errlist will be filled in with snapname -> error
      671 + *
      672 + * if any fails, all will fail.
 419  673   */
 420      -void
 421      -dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
 422      -{
 423      -        dsl_dataset_user_release_tmp_arg_t ddurta;
 424      -        dsl_dataset_t *ds;
 425      -        int error;
 426      -
 427      -#ifdef _KERNEL
 428      -        /* Make sure it is not mounted. */
 429      -        dsl_pool_config_enter(dp, FTAG);
 430      -        error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
 431      -        if (error == 0) {
 432      -                char name[MAXNAMELEN];
 433      -                dsl_dataset_name(ds, name);
 434      -                dsl_dataset_rele(ds, FTAG);
 435      -                dsl_pool_config_exit(dp, FTAG);
 436      -                zfs_unmount_snap(name);
 437      -        } else {
 438      -                dsl_pool_config_exit(dp, FTAG);
 439      -        }
 440      -#endif
 441      -
 442      -        ddurta.ddurta_dsobj = dsobj;
 443      -        ddurta.ddurta_holds = fnvlist_alloc();
 444      -        fnvlist_add_boolean(ddurta.ddurta_holds, htag);
 445      -
 446      -        (void) dsl_sync_task(spa_name(dp->dp_spa),
 447      -            dsl_dataset_user_release_tmp_check,
 448      -            dsl_dataset_user_release_tmp_sync, &ddurta, 1);
 449      -        fnvlist_free(ddurta.ddurta_holds);
 450      -}
 451      -
 452      -typedef struct zfs_hold_cleanup_arg {
 453      -        char zhca_spaname[MAXNAMELEN];
 454      -        uint64_t zhca_spa_load_guid;
 455      -        uint64_t zhca_dsobj;
 456      -        char zhca_htag[MAXNAMELEN];
 457      -} zfs_hold_cleanup_arg_t;
 458      -
 459      -static void
 460      -dsl_dataset_user_release_onexit(void *arg)
      674 +int
      675 +dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
 461  676  {
 462      -        zfs_hold_cleanup_arg_t *ca = arg;
 463      -        spa_t *spa;
 464      -        int error;
 465      -
 466      -        error = spa_open(ca->zhca_spaname, &spa, FTAG);
 467      -        if (error != 0) {
 468      -                zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 469      -                    "because pool is no longer loaded",
 470      -                    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 471      -                return;
 472      -        }
 473      -        if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
 474      -                zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
 475      -                    "because pool is no longer loaded (guid doesn't match)",
 476      -                    ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
 477      -                spa_close(spa, FTAG);
 478      -                return;
 479      -        }
 480      -
 481      -        dsl_dataset_user_release_tmp(spa_get_dsl(spa),
 482      -            ca->zhca_dsobj, ca->zhca_htag);
 483      -        kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
 484      -        spa_close(spa, FTAG);
      677 +        return dsl_dataset_user_release_impl(holds, errlist, NULL);
 485  678  }
 486  679  
      680 +/*
      681 + * holds is nvl of snapdsobj -> { holdname, ... }
      682 + */
 487  683  void
 488      -dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
 489      -    minor_t minor)
      684 +dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds)
 490  685  {
 491      -        zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
 492      -        spa_t *spa = dsl_dataset_get_spa(ds);
 493      -        (void) strlcpy(ca->zhca_spaname, spa_name(spa),
 494      -            sizeof (ca->zhca_spaname));
 495      -        ca->zhca_spa_load_guid = spa_load_guid(spa);
 496      -        ca->zhca_dsobj = ds->ds_object;
 497      -        (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
 498      -        VERIFY0(zfs_onexit_add_cb(minor,
 499      -            dsl_dataset_user_release_onexit, ca, NULL));
      686 +        ASSERT(dp != NULL);
      687 +        (void) dsl_dataset_user_release_impl(holds, NULL, dp);
 500  688  }
 501  689  
 502  690  int
 503  691  dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
 504  692  {
 505  693          dsl_pool_t *dp;
 506  694          dsl_dataset_t *ds;
 507  695          int err;
 508  696  
 509  697          err = dsl_pool_hold(dsname, FTAG, &dp);
↓ open down ↓ 27 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX