illumos Wdiff usr/src/uts/common/fs/zfs/dsl_dataset.c

Print this page

OS-1566 dataset quota for ZFS datasets

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/zfs/dsl_dataset.c
          +++ new/usr/src/uts/common/fs/zfs/dsl_dataset.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2012 by Delphix. All rights reserved.
  24   24   * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  25   25   */
  26   26  
  27   27  #include <sys/dmu_objset.h>
  28   28  #include <sys/dsl_dataset.h>
  29   29  #include <sys/dsl_dir.h>
  30   30  #include <sys/dsl_prop.h>
  31   31  #include <sys/dsl_synctask.h>
  32   32  #include <sys/dmu_traverse.h>
  33   33  #include <sys/dmu_impl.h>
  34   34  #include <sys/dmu_tx.h>
  35   35  #include <sys/arc.h>
  36   36  #include <sys/zio.h>
  37   37  #include <sys/zap.h>
  38   38  #include <sys/zfeature.h>
  39   39  #include <sys/unique.h>
  40   40  #include <sys/zfs_context.h>
  41   41  #include <sys/zfs_ioctl.h>
  42   42  #include <sys/spa.h>
  43   43  #include <sys/zfs_znode.h>
  44   44  #include <sys/zfs_onexit.h>
  45   45  #include <sys/zvol.h>
  46   46  #include <sys/dsl_scan.h>
  47   47  #include <sys/dsl_deadlist.h>
  48   48  
  49   49  static char *dsl_reaper = "the grim reaper";
  50   50  
  51   51  static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
  52   52  static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
  53   53  static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
  54   54  
  55   55  #define SWITCH64(x, y) \
  56   56          { \
  57   57                  uint64_t __tmp = (x); \
  58   58                  (x) = (y); \
  59   59                  (y) = __tmp; \
  60   60          }
  61   61  
  62   62  #define DS_REF_MAX      (1ULL << 62)
  63   63  
  64   64  #define DSL_DEADLIST_BLOCKSIZE  SPA_MAXBLOCKSIZE
  65   65  
  66   66  #define DSL_DATASET_IS_DESTROYED(ds)    ((ds)->ds_owner == dsl_reaper)
  67   67  
  68   68  
  69   69  /*
  70   70   * Figure out how much of this delta should be propogated to the dsl_dir
  71   71   * layer.  If there's a refreservation, that space has already been
  72   72   * partially accounted for in our ancestors.
  73   73   */
  74   74  static int64_t
  75   75  parent_delta(dsl_dataset_t *ds, int64_t delta)
  76   76  {
  77   77          uint64_t old_bytes, new_bytes;
  78   78  
  79   79          if (ds->ds_reserved == 0)
  80   80                  return (delta);
  81   81  
  82   82          old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
  83   83          new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
  84   84  
  85   85          ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
  86   86          return (new_bytes - old_bytes);
  87   87  }
  88   88  
  89   89  void
  90   90  dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
  91   91  {
  92   92          int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
  93   93          int compressed = BP_GET_PSIZE(bp);
  94   94          int uncompressed = BP_GET_UCSIZE(bp);
  95   95          int64_t delta;
  96   96  
  97   97          dprintf_bp(bp, "ds=%p", ds);
  98   98  
  99   99          ASSERT(dmu_tx_is_syncing(tx));
 100  100          /* It could have been compressed away to nothing */
 101  101          if (BP_IS_HOLE(bp))
 102  102                  return;
 103  103          ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
 104  104          ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
 105  105          if (ds == NULL) {
 106  106                  dsl_pool_mos_diduse_space(tx->tx_pool,
 107  107                      used, compressed, uncompressed);
 108  108                  return;
 109  109          }
 110  110          dmu_buf_will_dirty(ds->ds_dbuf, tx);
 111  111  
 112  112          mutex_enter(&ds->ds_dir->dd_lock);
 113  113          mutex_enter(&ds->ds_lock);
 114  114          delta = parent_delta(ds, used);
 115  115          ds->ds_phys->ds_referenced_bytes += used;
 116  116          ds->ds_phys->ds_compressed_bytes += compressed;
 117  117          ds->ds_phys->ds_uncompressed_bytes += uncompressed;
 118  118          ds->ds_phys->ds_unique_bytes += used;
 119  119          mutex_exit(&ds->ds_lock);
 120  120          dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
 121  121              compressed, uncompressed, tx);
 122  122          dsl_dir_transfer_space(ds->ds_dir, used - delta,
 123  123              DD_USED_REFRSRV, DD_USED_HEAD, tx);
 124  124          mutex_exit(&ds->ds_dir->dd_lock);
 125  125  }
 126  126  
 127  127  int
 128  128  dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
 129  129      boolean_t async)
 130  130  {
 131  131          if (BP_IS_HOLE(bp))
 132  132                  return (0);
 133  133  
 134  134          ASSERT(dmu_tx_is_syncing(tx));
 135  135          ASSERT(bp->blk_birth <= tx->tx_txg);
 136  136  
 137  137          int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
 138  138          int compressed = BP_GET_PSIZE(bp);
 139  139          int uncompressed = BP_GET_UCSIZE(bp);
 140  140  
 141  141          ASSERT(used > 0);
 142  142          if (ds == NULL) {
 143  143                  dsl_free(tx->tx_pool, tx->tx_txg, bp);
 144  144                  dsl_pool_mos_diduse_space(tx->tx_pool,
 145  145                      -used, -compressed, -uncompressed);
 146  146                  return (used);
 147  147          }
 148  148          ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
 149  149  
 150  150          ASSERT(!dsl_dataset_is_snapshot(ds));
 151  151          dmu_buf_will_dirty(ds->ds_dbuf, tx);
 152  152  
 153  153          if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
 154  154                  int64_t delta;
 155  155  
 156  156                  dprintf_bp(bp, "freeing ds=%llu", ds->ds_object);
 157  157                  dsl_free(tx->tx_pool, tx->tx_txg, bp);
 158  158  
 159  159                  mutex_enter(&ds->ds_dir->dd_lock);
 160  160                  mutex_enter(&ds->ds_lock);
 161  161                  ASSERT(ds->ds_phys->ds_unique_bytes >= used ||
 162  162                      !DS_UNIQUE_IS_ACCURATE(ds));
 163  163                  delta = parent_delta(ds, -used);
 164  164                  ds->ds_phys->ds_unique_bytes -= used;
 165  165                  mutex_exit(&ds->ds_lock);
 166  166                  dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
 167  167                      delta, -compressed, -uncompressed, tx);
 168  168                  dsl_dir_transfer_space(ds->ds_dir, -used - delta,
 169  169                      DD_USED_REFRSRV, DD_USED_HEAD, tx);
 170  170                  mutex_exit(&ds->ds_dir->dd_lock);
 171  171          } else {
 172  172                  dprintf_bp(bp, "putting on dead list: %s", "");
 173  173                  if (async) {
 174  174                          /*
 175  175                           * We are here as part of zio's write done callback,
 176  176                           * which means we're a zio interrupt thread.  We can't
 177  177                           * call dsl_deadlist_insert() now because it may block
 178  178                           * waiting for I/O.  Instead, put bp on the deferred
 179  179                           * queue and let dsl_pool_sync() finish the job.
 180  180                           */
 181  181                          bplist_append(&ds->ds_pending_deadlist, bp);
 182  182                  } else {
 183  183                          dsl_deadlist_insert(&ds->ds_deadlist, bp, tx);
 184  184                  }
 185  185                  ASSERT3U(ds->ds_prev->ds_object, ==,
 186  186                      ds->ds_phys->ds_prev_snap_obj);
 187  187                  ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
 188  188                  /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
 189  189                  if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
 190  190                      ds->ds_object && bp->blk_birth >
 191  191                      ds->ds_prev->ds_phys->ds_prev_snap_txg) {
 192  192                          dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
 193  193                          mutex_enter(&ds->ds_prev->ds_lock);
 194  194                          ds->ds_prev->ds_phys->ds_unique_bytes += used;
 195  195                          mutex_exit(&ds->ds_prev->ds_lock);
 196  196                  }
 197  197                  if (bp->blk_birth > ds->ds_dir->dd_origin_txg) {
 198  198                          dsl_dir_transfer_space(ds->ds_dir, used,
 199  199                              DD_USED_HEAD, DD_USED_SNAP, tx);
 200  200                  }
 201  201          }
 202  202          mutex_enter(&ds->ds_lock);
 203  203          ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used);
 204  204          ds->ds_phys->ds_referenced_bytes -= used;
 205  205          ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
 206  206          ds->ds_phys->ds_compressed_bytes -= compressed;
 207  207          ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
 208  208          ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
 209  209          mutex_exit(&ds->ds_lock);
 210  210  
 211  211          return (used);
 212  212  }
 213  213  
 214  214  uint64_t
 215  215  dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
 216  216  {
 217  217          uint64_t trysnap = 0;
 218  218  
 219  219          if (ds == NULL)
 220  220                  return (0);
 221  221          /*
 222  222           * The snapshot creation could fail, but that would cause an
 223  223           * incorrect FALSE return, which would only result in an
 224  224           * overestimation of the amount of space that an operation would
 225  225           * consume, which is OK.
 226  226           *
 227  227           * There's also a small window where we could miss a pending
 228  228           * snapshot, because we could set the sync task in the quiescing
 229  229           * phase.  So this should only be used as a guess.
 230  230           */
 231  231          if (ds->ds_trysnap_txg >
 232  232              spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
 233  233                  trysnap = ds->ds_trysnap_txg;
 234  234          return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
 235  235  }
 236  236  
 237  237  boolean_t
 238  238  dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
 239  239      uint64_t blk_birth)
 240  240  {
 241  241          if (blk_birth <= dsl_dataset_prev_snap_txg(ds))
 242  242                  return (B_FALSE);
 243  243  
 244  244          ddt_prefetch(dsl_dataset_get_spa(ds), bp);
 245  245  
 246  246          return (B_TRUE);
 247  247  }
 248  248  
 249  249  /* ARGSUSED */
 250  250  static void
 251  251  dsl_dataset_evict(dmu_buf_t *db, void *dsv)
 252  252  {
 253  253          dsl_dataset_t *ds = dsv;
 254  254  
 255  255          ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds));
 256  256  
 257  257          unique_remove(ds->ds_fsid_guid);
 258  258  
 259  259          if (ds->ds_objset != NULL)
 260  260                  dmu_objset_evict(ds->ds_objset);
 261  261  
 262  262          if (ds->ds_prev) {
 263  263                  dsl_dataset_drop_ref(ds->ds_prev, ds);
 264  264                  ds->ds_prev = NULL;
 265  265          }
 266  266  
 267  267          bplist_destroy(&ds->ds_pending_deadlist);
 268  268          if (db != NULL) {
 269  269                  dsl_deadlist_close(&ds->ds_deadlist);
 270  270          } else {
 271  271                  ASSERT(ds->ds_deadlist.dl_dbuf == NULL);
 272  272                  ASSERT(!ds->ds_deadlist.dl_oldfmt);
 273  273          }
 274  274          if (ds->ds_dir)
 275  275                  dsl_dir_close(ds->ds_dir, ds);
 276  276  
 277  277          ASSERT(!list_link_active(&ds->ds_synced_link));
 278  278  
 279  279          mutex_destroy(&ds->ds_lock);
 280  280          mutex_destroy(&ds->ds_recvlock);
 281  281          mutex_destroy(&ds->ds_opening_lock);
 282  282          rw_destroy(&ds->ds_rwlock);
 283  283          cv_destroy(&ds->ds_exclusive_cv);
 284  284  
 285  285          kmem_free(ds, sizeof (dsl_dataset_t));
 286  286  }
 287  287  
 288  288  static int
 289  289  dsl_dataset_get_snapname(dsl_dataset_t *ds)
 290  290  {
 291  291          dsl_dataset_phys_t *headphys;
 292  292          int err;
 293  293          dmu_buf_t *headdbuf;
 294  294          dsl_pool_t *dp = ds->ds_dir->dd_pool;
 295  295          objset_t *mos = dp->dp_meta_objset;
 296  296  
 297  297          if (ds->ds_snapname[0])
 298  298                  return (0);
 299  299          if (ds->ds_phys->ds_next_snap_obj == 0)
 300  300                  return (0);
 301  301  
 302  302          err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
 303  303              FTAG, &headdbuf);
 304  304          if (err)
 305  305                  return (err);
 306  306          headphys = headdbuf->db_data;
 307  307          err = zap_value_search(dp->dp_meta_objset,
 308  308              headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname);
 309  309          dmu_buf_rele(headdbuf, FTAG);
 310  310          return (err);
 311  311  }
 312  312  
 313  313  static int
 314  314  dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
 315  315  {
 316  316          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 317  317          uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 318  318          matchtype_t mt;
 319  319          int err;
 320  320  
 321  321          if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 322  322                  mt = MT_FIRST;
 323  323          else
 324  324                  mt = MT_EXACT;
 325  325  
 326  326          err = zap_lookup_norm(mos, snapobj, name, 8, 1,
 327  327              value, mt, NULL, 0, NULL);
 328  328          if (err == ENOTSUP && mt == MT_FIRST)
 329  329                  err = zap_lookup(mos, snapobj, name, 8, 1, value);
 330  330          return (err);
 331  331  }
 332  332  
 333  333  static int
 334  334  dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
 335  335  {
 336  336          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 337  337          uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 338  338          matchtype_t mt;
 339  339          int err;
 340  340

↓ open down ↓

340 lines elided

↑ open up ↑

 341  341          dsl_dir_snap_cmtime_update(ds->ds_dir);
 342  342  
 343  343          if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 344  344                  mt = MT_FIRST;
 345  345          else
 346  346                  mt = MT_EXACT;
 347  347  
 348  348          err = zap_remove_norm(mos, snapobj, name, mt, tx);
 349  349          if (err == ENOTSUP && mt == MT_FIRST)
 350  350                  err = zap_remove(mos, snapobj, name, tx);
      351 +
      352 +        if (err == 0)
      353 +                dsl_snapcount_adjust(ds->ds_dir, tx, -1, B_TRUE);
      354 +
 351  355          return (err);
 352  356  }
 353  357  
 354  358  static int
 355  359  dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
 356  360      dsl_dataset_t **dsp)
 357  361  {
 358  362          objset_t *mos = dp->dp_meta_objset;
 359  363          dmu_buf_t *dbuf;
 360  364          dsl_dataset_t *ds;

 361  365          int err;
 362  366          dmu_object_info_t doi;
 363  367  
 364  368          ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
 365  369              dsl_pool_sync_context(dp));
 366  370  
 367  371          err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
 368  372          if (err)
 369  373                  return (err);
 370  374  
 371  375          /* Make sure dsobj has the correct object type. */
 372  376          dmu_object_info_from_db(dbuf, &doi);
 373  377          if (doi.doi_type != DMU_OT_DSL_DATASET)
 374  378                  return (EINVAL);
 375  379  
 376  380          ds = dmu_buf_get_user(dbuf);
 377  381          if (ds == NULL) {
 378  382                  dsl_dataset_t *winner;
 379  383  
 380  384                  ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
 381  385                  ds->ds_dbuf = dbuf;
 382  386                  ds->ds_object = dsobj;
 383  387                  ds->ds_phys = dbuf->db_data;
 384  388  
 385  389                  mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
 386  390                  mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL);
 387  391                  mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
 388  392                  mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
 389  393  
 390  394                  rw_init(&ds->ds_rwlock, 0, 0, 0);
 391  395                  cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
 392  396  
 393  397                  bplist_create(&ds->ds_pending_deadlist);
 394  398                  dsl_deadlist_open(&ds->ds_deadlist,
 395  399                      mos, ds->ds_phys->ds_deadlist_obj);
 396  400  
 397  401                  list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
 398  402                      offsetof(dmu_sendarg_t, dsa_link));
 399  403  
 400  404                  if (err == 0) {
 401  405                          err = dsl_dir_open_obj(dp,
 402  406                              ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
 403  407                  }
 404  408                  if (err) {
 405  409                          mutex_destroy(&ds->ds_lock);
 406  410                          mutex_destroy(&ds->ds_recvlock);
 407  411                          mutex_destroy(&ds->ds_opening_lock);
 408  412                          rw_destroy(&ds->ds_rwlock);
 409  413                          cv_destroy(&ds->ds_exclusive_cv);
 410  414                          bplist_destroy(&ds->ds_pending_deadlist);
 411  415                          dsl_deadlist_close(&ds->ds_deadlist);
 412  416                          kmem_free(ds, sizeof (dsl_dataset_t));
 413  417                          dmu_buf_rele(dbuf, tag);
 414  418                          return (err);
 415  419                  }
 416  420  
 417  421                  if (!dsl_dataset_is_snapshot(ds)) {
 418  422                          ds->ds_snapname[0] = '\0';
 419  423                          if (ds->ds_phys->ds_prev_snap_obj) {
 420  424                                  err = dsl_dataset_get_ref(dp,
 421  425                                      ds->ds_phys->ds_prev_snap_obj,
 422  426                                      ds, &ds->ds_prev);
 423  427                          }
 424  428                  } else {
 425  429                          if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
 426  430                                  err = dsl_dataset_get_snapname(ds);
 427  431                          if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) {
 428  432                                  err = zap_count(
 429  433                                      ds->ds_dir->dd_pool->dp_meta_objset,
 430  434                                      ds->ds_phys->ds_userrefs_obj,
 431  435                                      &ds->ds_userrefs);
 432  436                          }
 433  437                  }
 434  438  
 435  439                  if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
 436  440                          /*
 437  441                           * In sync context, we're called with either no lock
 438  442                           * or with the write lock.  If we're not syncing,
 439  443                           * we're always called with the read lock held.
 440  444                           */
 441  445                          boolean_t need_lock =
 442  446                              !RW_WRITE_HELD(&dp->dp_config_rwlock) &&
 443  447                              dsl_pool_sync_context(dp);
 444  448  
 445  449                          if (need_lock)
 446  450                                  rw_enter(&dp->dp_config_rwlock, RW_READER);
 447  451  
 448  452                          err = dsl_prop_get_ds(ds,
 449  453                              "refreservation", sizeof (uint64_t), 1,
 450  454                              &ds->ds_reserved, NULL);
 451  455                          if (err == 0) {
 452  456                                  err = dsl_prop_get_ds(ds,
 453  457                                      "refquota", sizeof (uint64_t), 1,
 454  458                                      &ds->ds_quota, NULL);
 455  459                          }
 456  460  
 457  461                          if (need_lock)
 458  462                                  rw_exit(&dp->dp_config_rwlock);
 459  463                  } else {
 460  464                          ds->ds_reserved = ds->ds_quota = 0;
 461  465                  }
 462  466  
 463  467                  if (err == 0) {
 464  468                          winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
 465  469                              dsl_dataset_evict);
 466  470                  }
 467  471                  if (err || winner) {
 468  472                          bplist_destroy(&ds->ds_pending_deadlist);
 469  473                          dsl_deadlist_close(&ds->ds_deadlist);
 470  474                          if (ds->ds_prev)
 471  475                                  dsl_dataset_drop_ref(ds->ds_prev, ds);
 472  476                          dsl_dir_close(ds->ds_dir, ds);
 473  477                          mutex_destroy(&ds->ds_lock);
 474  478                          mutex_destroy(&ds->ds_recvlock);
 475  479                          mutex_destroy(&ds->ds_opening_lock);
 476  480                          rw_destroy(&ds->ds_rwlock);
 477  481                          cv_destroy(&ds->ds_exclusive_cv);
 478  482                          kmem_free(ds, sizeof (dsl_dataset_t));
 479  483                          if (err) {
 480  484                                  dmu_buf_rele(dbuf, tag);
 481  485                                  return (err);
 482  486                          }
 483  487                          ds = winner;
 484  488                  } else {
 485  489                          ds->ds_fsid_guid =
 486  490                              unique_insert(ds->ds_phys->ds_fsid_guid);
 487  491                  }
 488  492          }
 489  493          ASSERT3P(ds->ds_dbuf, ==, dbuf);
 490  494          ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
 491  495          ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 ||
 492  496              spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
 493  497              dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
 494  498          mutex_enter(&ds->ds_lock);
 495  499          if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) {
 496  500                  mutex_exit(&ds->ds_lock);
 497  501                  dmu_buf_rele(ds->ds_dbuf, tag);
 498  502                  return (ENOENT);
 499  503          }
 500  504          mutex_exit(&ds->ds_lock);
 501  505          *dsp = ds;
 502  506          return (0);
 503  507  }
 504  508  
 505  509  static int
 506  510  dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag)
 507  511  {
 508  512          dsl_pool_t *dp = ds->ds_dir->dd_pool;
 509  513  
 510  514          /*
 511  515           * In syncing context we don't want the rwlock lock: there
 512  516           * may be an existing writer waiting for sync phase to
 513  517           * finish.  We don't need to worry about such writers, since
 514  518           * sync phase is single-threaded, so the writer can't be
 515  519           * doing anything while we are active.
 516  520           */
 517  521          if (dsl_pool_sync_context(dp)) {
 518  522                  ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
 519  523                  return (0);
 520  524          }
 521  525  
 522  526          /*
 523  527           * Normal users will hold the ds_rwlock as a READER until they
 524  528           * are finished (i.e., call dsl_dataset_rele()).  "Owners" will
 525  529           * drop their READER lock after they set the ds_owner field.
 526  530           *
 527  531           * If the dataset is being destroyed, the destroy thread will
 528  532           * obtain a WRITER lock for exclusive access after it's done its
 529  533           * open-context work and then change the ds_owner to
 530  534           * dsl_reaper once destruction is assured.  So threads
 531  535           * may block here temporarily, until the "destructability" of
 532  536           * the dataset is determined.
 533  537           */
 534  538          ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock));
 535  539          mutex_enter(&ds->ds_lock);
 536  540          while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) {
 537  541                  rw_exit(&dp->dp_config_rwlock);
 538  542                  cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock);
 539  543                  if (DSL_DATASET_IS_DESTROYED(ds)) {
 540  544                          mutex_exit(&ds->ds_lock);
 541  545                          dsl_dataset_drop_ref(ds, tag);
 542  546                          rw_enter(&dp->dp_config_rwlock, RW_READER);
 543  547                          return (ENOENT);
 544  548                  }
 545  549                  /*
 546  550                   * The dp_config_rwlock lives above the ds_lock. And
 547  551                   * we need to check DSL_DATASET_IS_DESTROYED() while
 548  552                   * holding the ds_lock, so we have to drop and reacquire
 549  553                   * the ds_lock here.
 550  554                   */
 551  555                  mutex_exit(&ds->ds_lock);
 552  556                  rw_enter(&dp->dp_config_rwlock, RW_READER);
 553  557                  mutex_enter(&ds->ds_lock);
 554  558          }
 555  559          mutex_exit(&ds->ds_lock);
 556  560          return (0);
 557  561  }
 558  562  
 559  563  int
 560  564  dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
 561  565      dsl_dataset_t **dsp)
 562  566  {
 563  567          int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp);
 564  568  
 565  569          if (err)
 566  570                  return (err);
 567  571          return (dsl_dataset_hold_ref(*dsp, tag));
 568  572  }
 569  573  
 570  574  int
 571  575  dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok,
 572  576      void *tag, dsl_dataset_t **dsp)
 573  577  {
 574  578          int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
 575  579          if (err)
 576  580                  return (err);
 577  581          if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
 578  582                  dsl_dataset_rele(*dsp, tag);
 579  583                  *dsp = NULL;
 580  584                  return (EBUSY);
 581  585          }
 582  586          return (0);
 583  587  }
 584  588  
 585  589  int
 586  590  dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp)
 587  591  {
 588  592          dsl_dir_t *dd;
 589  593          dsl_pool_t *dp;
 590  594          const char *snapname;
 591  595          uint64_t obj;
 592  596          int err = 0;
 593  597  
 594  598          err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname);
 595  599          if (err)
 596  600                  return (err);
 597  601  
 598  602          dp = dd->dd_pool;
 599  603          obj = dd->dd_phys->dd_head_dataset_obj;
 600  604          rw_enter(&dp->dp_config_rwlock, RW_READER);
 601  605          if (obj)
 602  606                  err = dsl_dataset_get_ref(dp, obj, tag, dsp);
 603  607          else
 604  608                  err = ENOENT;
 605  609          if (err)
 606  610                  goto out;
 607  611  
 608  612          err = dsl_dataset_hold_ref(*dsp, tag);
 609  613  
 610  614          /* we may be looking for a snapshot */
 611  615          if (err == 0 && snapname != NULL) {
 612  616                  dsl_dataset_t *ds = NULL;
 613  617  
 614  618                  if (*snapname++ != '@') {
 615  619                          dsl_dataset_rele(*dsp, tag);
 616  620                          err = ENOENT;
 617  621                          goto out;
 618  622                  }
 619  623  
 620  624                  dprintf("looking for snapshot '%s'\n", snapname);
 621  625                  err = dsl_dataset_snap_lookup(*dsp, snapname, &obj);
 622  626                  if (err == 0)
 623  627                          err = dsl_dataset_get_ref(dp, obj, tag, &ds);
 624  628                  dsl_dataset_rele(*dsp, tag);
 625  629  
 626  630                  ASSERT3U((err == 0), ==, (ds != NULL));
 627  631  
 628  632                  if (ds) {
 629  633                          mutex_enter(&ds->ds_lock);
 630  634                          if (ds->ds_snapname[0] == 0)
 631  635                                  (void) strlcpy(ds->ds_snapname, snapname,
 632  636                                      sizeof (ds->ds_snapname));
 633  637                          mutex_exit(&ds->ds_lock);
 634  638                          err = dsl_dataset_hold_ref(ds, tag);
 635  639                          *dsp = err ? NULL : ds;
 636  640                  }
 637  641          }
 638  642  out:
 639  643          rw_exit(&dp->dp_config_rwlock);
 640  644          dsl_dir_close(dd, FTAG);
 641  645          return (err);
 642  646  }
 643  647  
 644  648  int
 645  649  dsl_dataset_own(const char *name, boolean_t inconsistentok,
 646  650      void *tag, dsl_dataset_t **dsp)
 647  651  {
 648  652          int err = dsl_dataset_hold(name, tag, dsp);
 649  653          if (err)
 650  654                  return (err);
 651  655          if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
 652  656                  dsl_dataset_rele(*dsp, tag);
 653  657                  return (EBUSY);
 654  658          }
 655  659          return (0);
 656  660  }
 657  661  
 658  662  void
 659  663  dsl_dataset_name(dsl_dataset_t *ds, char *name)
 660  664  {
 661  665          if (ds == NULL) {
 662  666                  (void) strcpy(name, "mos");
 663  667          } else {
 664  668                  dsl_dir_name(ds->ds_dir, name);
 665  669                  VERIFY(0 == dsl_dataset_get_snapname(ds));
 666  670                  if (ds->ds_snapname[0]) {
 667  671                          (void) strcat(name, "@");
 668  672                          /*
 669  673                           * We use a "recursive" mutex so that we
 670  674                           * can call dprintf_ds() with ds_lock held.
 671  675                           */
 672  676                          if (!MUTEX_HELD(&ds->ds_lock)) {
 673  677                                  mutex_enter(&ds->ds_lock);
 674  678                                  (void) strcat(name, ds->ds_snapname);
 675  679                                  mutex_exit(&ds->ds_lock);
 676  680                          } else {
 677  681                                  (void) strcat(name, ds->ds_snapname);
 678  682                          }
 679  683                  }
 680  684          }
 681  685  }
 682  686  
 683  687  static int
 684  688  dsl_dataset_namelen(dsl_dataset_t *ds)
 685  689  {
 686  690          int result;
 687  691  
 688  692          if (ds == NULL) {
 689  693                  result = 3;     /* "mos" */
 690  694          } else {
 691  695                  result = dsl_dir_namelen(ds->ds_dir);
 692  696                  VERIFY(0 == dsl_dataset_get_snapname(ds));
 693  697                  if (ds->ds_snapname[0]) {
 694  698                          ++result;       /* adding one for the @-sign */
 695  699                          if (!MUTEX_HELD(&ds->ds_lock)) {
 696  700                                  mutex_enter(&ds->ds_lock);
 697  701                                  result += strlen(ds->ds_snapname);
 698  702                                  mutex_exit(&ds->ds_lock);
 699  703                          } else {
 700  704                                  result += strlen(ds->ds_snapname);
 701  705                          }
 702  706                  }
 703  707          }
 704  708  
 705  709          return (result);
 706  710  }
 707  711  
 708  712  void
 709  713  dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag)
 710  714  {
 711  715          dmu_buf_rele(ds->ds_dbuf, tag);
 712  716  }
 713  717  
 714  718  void
 715  719  dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
 716  720  {
 717  721          if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) {
 718  722                  rw_exit(&ds->ds_rwlock);
 719  723          }
 720  724          dsl_dataset_drop_ref(ds, tag);
 721  725  }
 722  726  
 723  727  void
 724  728  dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
 725  729  {
 726  730          ASSERT((ds->ds_owner == tag && ds->ds_dbuf) ||
 727  731              (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL));
 728  732  
 729  733          mutex_enter(&ds->ds_lock);
 730  734          ds->ds_owner = NULL;
 731  735          if (RW_WRITE_HELD(&ds->ds_rwlock)) {
 732  736                  rw_exit(&ds->ds_rwlock);
 733  737                  cv_broadcast(&ds->ds_exclusive_cv);
 734  738          }
 735  739          mutex_exit(&ds->ds_lock);
 736  740          if (ds->ds_dbuf)
 737  741                  dsl_dataset_drop_ref(ds, tag);
 738  742          else
 739  743                  dsl_dataset_evict(NULL, ds);
 740  744  }
 741  745  
 742  746  boolean_t
 743  747  dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag)
 744  748  {
 745  749          boolean_t gotit = FALSE;
 746  750  
 747  751          mutex_enter(&ds->ds_lock);
 748  752          if (ds->ds_owner == NULL &&
 749  753              (!DS_IS_INCONSISTENT(ds) || inconsistentok)) {
 750  754                  ds->ds_owner = tag;
 751  755                  if (!dsl_pool_sync_context(ds->ds_dir->dd_pool))
 752  756                          rw_exit(&ds->ds_rwlock);
 753  757                  gotit = TRUE;
 754  758          }
 755  759          mutex_exit(&ds->ds_lock);
 756  760          return (gotit);
 757  761  }
 758  762  
 759  763  void
 760  764  dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner)
 761  765  {
 762  766          ASSERT3P(owner, ==, ds->ds_owner);
 763  767          if (!RW_WRITE_HELD(&ds->ds_rwlock))
 764  768                  rw_enter(&ds->ds_rwlock, RW_WRITER);
 765  769  }
 766  770  
 767  771  uint64_t
 768  772  dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
 769  773      uint64_t flags, dmu_tx_t *tx)
 770  774  {
 771  775          dsl_pool_t *dp = dd->dd_pool;
 772  776          dmu_buf_t *dbuf;
 773  777          dsl_dataset_phys_t *dsphys;
 774  778          uint64_t dsobj;
 775  779          objset_t *mos = dp->dp_meta_objset;
 776  780  
 777  781          if (origin == NULL)
 778  782                  origin = dp->dp_origin_snap;
 779  783  
 780  784          ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
 781  785          ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0);
 782  786          ASSERT(dmu_tx_is_syncing(tx));
 783  787          ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
 784  788  
 785  789          dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
 786  790              DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
 787  791          VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
 788  792          dmu_buf_will_dirty(dbuf, tx);
 789  793          dsphys = dbuf->db_data;
 790  794          bzero(dsphys, sizeof (dsl_dataset_phys_t));
 791  795          dsphys->ds_dir_obj = dd->dd_object;
 792  796          dsphys->ds_flags = flags;
 793  797          dsphys->ds_fsid_guid = unique_create();
 794  798          (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
 795  799              sizeof (dsphys->ds_guid));
 796  800          dsphys->ds_snapnames_zapobj =
 797  801              zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
 798  802              DMU_OT_NONE, 0, tx);
 799  803          dsphys->ds_creation_time = gethrestime_sec();
 800  804          dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg;
 801  805  
 802  806          if (origin == NULL) {
 803  807                  dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx);
 804  808          } else {
 805  809                  dsl_dataset_t *ohds;
 806  810  
 807  811                  dsphys->ds_prev_snap_obj = origin->ds_object;
 808  812                  dsphys->ds_prev_snap_txg =
 809  813                      origin->ds_phys->ds_creation_txg;
 810  814                  dsphys->ds_referenced_bytes =
 811  815                      origin->ds_phys->ds_referenced_bytes;
 812  816                  dsphys->ds_compressed_bytes =
 813  817                      origin->ds_phys->ds_compressed_bytes;
 814  818                  dsphys->ds_uncompressed_bytes =
 815  819                      origin->ds_phys->ds_uncompressed_bytes;
 816  820                  dsphys->ds_bp = origin->ds_phys->ds_bp;
 817  821                  dsphys->ds_flags |= origin->ds_phys->ds_flags;
 818  822  
 819  823                  dmu_buf_will_dirty(origin->ds_dbuf, tx);
 820  824                  origin->ds_phys->ds_num_children++;
 821  825  
 822  826                  VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
 823  827                      origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds));
 824  828                  dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist,
 825  829                      dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx);
 826  830                  dsl_dataset_rele(ohds, FTAG);
 827  831  
 828  832                  if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) {
 829  833                          if (origin->ds_phys->ds_next_clones_obj == 0) {
 830  834                                  origin->ds_phys->ds_next_clones_obj =
 831  835                                      zap_create(mos,
 832  836                                      DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);
 833  837                          }
 834  838                          VERIFY(0 == zap_add_int(mos,
 835  839                              origin->ds_phys->ds_next_clones_obj,
 836  840                              dsobj, tx));
 837  841                  }
 838  842  
 839  843                  dmu_buf_will_dirty(dd->dd_dbuf, tx);
 840  844                  dd->dd_phys->dd_origin_obj = origin->ds_object;
 841  845                  if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
 842  846                          if (origin->ds_dir->dd_phys->dd_clones == 0) {
 843  847                                  dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx);
 844  848                                  origin->ds_dir->dd_phys->dd_clones =
 845  849                                      zap_create(mos,
 846  850                                      DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
 847  851                          }
 848  852                          VERIFY3U(0, ==, zap_add_int(mos,
 849  853                              origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
 850  854                  }
 851  855          }
 852  856  
 853  857          if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
 854  858                  dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
 855  859  
 856  860          dmu_buf_rele(dbuf, FTAG);
 857  861  
 858  862          dmu_buf_will_dirty(dd->dd_dbuf, tx);
 859  863          dd->dd_phys->dd_head_dataset_obj = dsobj;
 860  864  
 861  865          return (dsobj);
 862  866  }
 863  867  
 864  868  uint64_t
 865  869  dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
 866  870      dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx)
 867  871  {
 868  872          dsl_pool_t *dp = pdd->dd_pool;
 869  873          uint64_t dsobj, ddobj;
 870  874          dsl_dir_t *dd;
 871  875  
 872  876          ASSERT(lastname[0] != '@');
 873  877  
 874  878          ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
 875  879          VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
 876  880  
 877  881          dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx);
 878  882  
 879  883          dsl_deleg_set_create_perms(dd, tx, cr);
 880  884  
 881  885          dsl_dir_close(dd, FTAG);
 882  886  
 883  887          /*
 884  888           * If we are creating a clone, make sure we zero out any stale
 885  889           * data from the origin snapshots zil header.
 886  890           */
 887  891          if (origin != NULL) {
 888  892                  dsl_dataset_t *ds;
 889  893                  objset_t *os;
 890  894  
 891  895                  VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
 892  896                  VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
 893  897                  bzero(&os->os_zil_header, sizeof (os->os_zil_header));
 894  898                  dsl_dataset_dirty(ds, tx);
 895  899                  dsl_dataset_rele(ds, FTAG);
 896  900          }
 897  901  
 898  902          return (dsobj);
 899  903  }
 900  904  
 901  905  /*
 902  906   * The snapshots must all be in the same pool.
 903  907   */
 904  908  int
 905  909  dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer,
 906  910      nvlist_t *errlist)
 907  911  {
 908  912          int err;
 909  913          dsl_sync_task_t *dst;
 910  914          spa_t *spa;
 911  915          nvpair_t *pair;
 912  916          dsl_sync_task_group_t *dstg;
 913  917  
 914  918          pair = nvlist_next_nvpair(snaps, NULL);
 915  919          if (pair == NULL)
 916  920                  return (0);
 917  921  
 918  922          err = spa_open(nvpair_name(pair), &spa, FTAG);
 919  923          if (err)
 920  924                  return (err);
 921  925          dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 922  926  
 923  927          for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 924  928              pair = nvlist_next_nvpair(snaps, pair)) {
 925  929                  dsl_dataset_t *ds;
 926  930  
 927  931                  err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
 928  932                  if (err == 0) {
 929  933                          struct dsl_ds_destroyarg *dsda;
 930  934  
 931  935                          dsl_dataset_make_exclusive(ds, dstg);
 932  936                          dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg),
 933  937                              KM_SLEEP);
 934  938                          dsda->ds = ds;
 935  939                          dsda->defer = defer;
 936  940                          dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
 937  941                              dsl_dataset_destroy_sync, dsda, dstg, 0);
 938  942                  } else if (err == ENOENT) {
 939  943                          err = 0;
 940  944                  } else {
 941  945                          fnvlist_add_int32(errlist, nvpair_name(pair), err);
 942  946                          break;
 943  947                  }
 944  948          }
 945  949  
 946  950          if (err == 0)
 947  951                  err = dsl_sync_task_group_wait(dstg);
 948  952  
 949  953          for (dst = list_head(&dstg->dstg_tasks); dst;
 950  954              dst = list_next(&dstg->dstg_tasks, dst)) {
 951  955                  struct dsl_ds_destroyarg *dsda = dst->dst_arg1;
 952  956                  dsl_dataset_t *ds = dsda->ds;
 953  957  
 954  958                  /*
 955  959                   * Return the snapshots that triggered the error.
 956  960                   */
 957  961                  if (dst->dst_err != 0) {
 958  962                          char name[ZFS_MAXNAMELEN];
 959  963                          dsl_dataset_name(ds, name);
 960  964                          fnvlist_add_int32(errlist, name, dst->dst_err);
 961  965                  }
 962  966                  ASSERT3P(dsda->rm_origin, ==, NULL);
 963  967                  dsl_dataset_disown(ds, dstg);
 964  968                  kmem_free(dsda, sizeof (struct dsl_ds_destroyarg));
 965  969          }
 966  970  
 967  971          dsl_sync_task_group_destroy(dstg);
 968  972          spa_close(spa, FTAG);
 969  973          return (err);
 970  974  
 971  975  }
 972  976  
 973  977  static boolean_t
 974  978  dsl_dataset_might_destroy_origin(dsl_dataset_t *ds)
 975  979  {
 976  980          boolean_t might_destroy = B_FALSE;
 977  981  
 978  982          mutex_enter(&ds->ds_lock);
 979  983          if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 &&
 980  984              DS_IS_DEFER_DESTROY(ds))
 981  985                  might_destroy = B_TRUE;
 982  986          mutex_exit(&ds->ds_lock);
 983  987  
 984  988          return (might_destroy);
 985  989  }
 986  990  
 987  991  /*
 988  992   * If we're removing a clone, and these three conditions are true:
 989  993   *      1) the clone's origin has no other children
 990  994   *      2) the clone's origin has no user references
 991  995   *      3) the clone's origin has been marked for deferred destruction
 992  996   * Then, prepare to remove the origin as part of this sync task group.
 993  997   */
 994  998  static int
 995  999  dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag)
 996 1000  {
 997 1001          dsl_dataset_t *ds = dsda->ds;
 998 1002          dsl_dataset_t *origin = ds->ds_prev;
 999 1003  
1000 1004          if (dsl_dataset_might_destroy_origin(origin)) {
1001 1005                  char *name;
1002 1006                  int namelen;
1003 1007                  int error;
1004 1008  
1005 1009                  namelen = dsl_dataset_namelen(origin) + 1;
1006 1010                  name = kmem_alloc(namelen, KM_SLEEP);
1007 1011                  dsl_dataset_name(origin, name);
1008 1012  #ifdef _KERNEL
1009 1013                  error = zfs_unmount_snap(name, NULL);
1010 1014                  if (error) {
1011 1015                          kmem_free(name, namelen);
1012 1016                          return (error);
1013 1017                  }
1014 1018  #endif
1015 1019                  error = dsl_dataset_own(name, B_TRUE, tag, &origin);
1016 1020                  kmem_free(name, namelen);
1017 1021                  if (error)
1018 1022                          return (error);
1019 1023                  dsda->rm_origin = origin;
1020 1024                  dsl_dataset_make_exclusive(origin, tag);
1021 1025          }
1022 1026  
1023 1027          return (0);
1024 1028  }
1025 1029  
1026 1030  /*
1027 1031   * ds must be opened as OWNER.  On return (whether successful or not),
1028 1032   * ds will be closed and caller can no longer dereference it.
1029 1033   */
1030 1034  int
1031 1035  dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
1032 1036  {
1033 1037          int err;
1034 1038          dsl_sync_task_group_t *dstg;
1035 1039          objset_t *os;
1036 1040          dsl_dir_t *dd;
1037 1041          uint64_t obj;
1038 1042          struct dsl_ds_destroyarg dsda = { 0 };
1039 1043  
1040 1044          dsda.ds = ds;
1041 1045  
1042 1046          if (dsl_dataset_is_snapshot(ds)) {
1043 1047                  /* Destroying a snapshot is simpler */
1044 1048                  dsl_dataset_make_exclusive(ds, tag);
1045 1049  
1046 1050                  dsda.defer = defer;
1047 1051                  err = dsl_sync_task_do(ds->ds_dir->dd_pool,
1048 1052                      dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
1049 1053                      &dsda, tag, 0);
1050 1054                  ASSERT3P(dsda.rm_origin, ==, NULL);
1051 1055                  goto out;
1052 1056          } else if (defer) {
1053 1057                  err = EINVAL;
1054 1058                  goto out;
1055 1059          }
1056 1060  
1057 1061          dd = ds->ds_dir;
1058 1062  
1059 1063          if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
1060 1064              &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
1061 1065                  /*
1062 1066                   * Check for errors and mark this ds as inconsistent, in
1063 1067                   * case we crash while freeing the objects.
1064 1068                   */
1065 1069                  err = dsl_sync_task_do(dd->dd_pool,
1066 1070                      dsl_dataset_destroy_begin_check,
1067 1071                      dsl_dataset_destroy_begin_sync, ds, NULL, 0);
1068 1072                  if (err)
1069 1073                          goto out;
1070 1074  
1071 1075                  err = dmu_objset_from_ds(ds, &os);
1072 1076                  if (err)
1073 1077                          goto out;
1074 1078  
1075 1079                  /*
1076 1080                   * Remove all objects while in the open context so that
1077 1081                   * there is less work to do in the syncing context.
1078 1082                   */
1079 1083                  for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
1080 1084                      ds->ds_phys->ds_prev_snap_txg)) {
1081 1085                          /*
1082 1086                           * Ignore errors, if there is not enough disk space
1083 1087                           * we will deal with it in dsl_dataset_destroy_sync().
1084 1088                           */
1085 1089                          (void) dmu_free_object(os, obj);
1086 1090                  }
1087 1091                  if (err != ESRCH)
1088 1092                          goto out;
1089 1093  
1090 1094                  /*
1091 1095                   * Sync out all in-flight IO.
1092 1096                   */
1093 1097                  txg_wait_synced(dd->dd_pool, 0);
1094 1098  
1095 1099                  /*
1096 1100                   * If we managed to free all the objects in open
1097 1101                   * context, the user space accounting should be zero.
1098 1102                   */
1099 1103                  if (ds->ds_phys->ds_bp.blk_fill == 0 &&
1100 1104                      dmu_objset_userused_enabled(os)) {
1101 1105                          uint64_t count;
1102 1106  
1103 1107                          ASSERT(zap_count(os, DMU_USERUSED_OBJECT,
1104 1108                              &count) != 0 || count == 0);
1105 1109                          ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT,
1106 1110                              &count) != 0 || count == 0);
1107 1111                  }
1108 1112          }
1109 1113  
1110 1114          rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
1111 1115          err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
1112 1116          rw_exit(&dd->dd_pool->dp_config_rwlock);
1113 1117  
1114 1118          if (err)
1115 1119                  goto out;
1116 1120  
1117 1121          /*
1118 1122           * Blow away the dsl_dir + head dataset.
1119 1123           */
1120 1124          dsl_dataset_make_exclusive(ds, tag);
1121 1125          /*
1122 1126           * If we're removing a clone, we might also need to remove its
1123 1127           * origin.
1124 1128           */
1125 1129          do {
1126 1130                  dsda.need_prep = B_FALSE;
1127 1131                  if (dsl_dir_is_clone(dd)) {
1128 1132                          err = dsl_dataset_origin_rm_prep(&dsda, tag);

↓ open down ↓

768 lines elided

↑ open up ↑

1129 1133                          if (err) {
1130 1134                                  dsl_dir_close(dd, FTAG);
1131 1135                                  goto out;
1132 1136                          }
1133 1137                  }
1134 1138  
1135 1139                  dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
1136 1140                  dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
1137 1141                      dsl_dataset_destroy_sync, &dsda, tag, 0);
1138 1142                  dsl_sync_task_create(dstg, dsl_dir_destroy_check,
1139      -                    dsl_dir_destroy_sync, dd, FTAG, 0);
     1143 +                    dsl_dir_destroy_sync, dd, tag, 0);
1140 1144                  err = dsl_sync_task_group_wait(dstg);
1141 1145                  dsl_sync_task_group_destroy(dstg);
1142 1146  
1143 1147                  /*
1144 1148                   * We could be racing against 'zfs release' or 'zfs destroy -d'
1145 1149                   * on the origin snap, in which case we can get EBUSY if we
1146 1150                   * needed to destroy the origin snap but were not ready to
1147 1151                   * do so.
1148 1152                   */
1149 1153                  if (dsda.need_prep) {

1150 1154                          ASSERT(err == EBUSY);
1151 1155                          ASSERT(dsl_dir_is_clone(dd));
1152 1156                          ASSERT(dsda.rm_origin == NULL);
1153 1157                  }
1154 1158          } while (dsda.need_prep);
1155 1159  
1156 1160          if (dsda.rm_origin != NULL)
1157 1161                  dsl_dataset_disown(dsda.rm_origin, tag);
1158 1162  
1159 1163          /* if it is successful, dsl_dir_destroy_sync will close the dd */
1160 1164          if (err)
1161 1165                  dsl_dir_close(dd, FTAG);
1162 1166  out:
1163 1167          dsl_dataset_disown(ds, tag);
1164 1168          return (err);
1165 1169  }
1166 1170  
1167 1171  blkptr_t *
1168 1172  dsl_dataset_get_blkptr(dsl_dataset_t *ds)
1169 1173  {
1170 1174          return (&ds->ds_phys->ds_bp);
1171 1175  }
1172 1176  
1173 1177  void
1174 1178  dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
1175 1179  {
1176 1180          ASSERT(dmu_tx_is_syncing(tx));
1177 1181          /* If it's the meta-objset, set dp_meta_rootbp */
1178 1182          if (ds == NULL) {
1179 1183                  tx->tx_pool->dp_meta_rootbp = *bp;
1180 1184          } else {
1181 1185                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
1182 1186                  ds->ds_phys->ds_bp = *bp;
1183 1187          }
1184 1188  }
1185 1189  
1186 1190  spa_t *
1187 1191  dsl_dataset_get_spa(dsl_dataset_t *ds)
1188 1192  {
1189 1193          return (ds->ds_dir->dd_pool->dp_spa);
1190 1194  }
1191 1195  
1192 1196  void
1193 1197  dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
1194 1198  {
1195 1199          dsl_pool_t *dp;
1196 1200  
1197 1201          if (ds == NULL) /* this is the meta-objset */
1198 1202                  return;
1199 1203  
1200 1204          ASSERT(ds->ds_objset != NULL);
1201 1205  
1202 1206          if (ds->ds_phys->ds_next_snap_obj != 0)
1203 1207                  panic("dirtying snapshot!");
1204 1208  
1205 1209          dp = ds->ds_dir->dd_pool;
1206 1210  
1207 1211          if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
1208 1212                  /* up the hold count until we can be written out */
1209 1213                  dmu_buf_add_ref(ds->ds_dbuf, ds);
1210 1214          }
1211 1215  }
1212 1216  
1213 1217  boolean_t
1214 1218  dsl_dataset_is_dirty(dsl_dataset_t *ds)
1215 1219  {
1216 1220          for (int t = 0; t < TXG_SIZE; t++) {
1217 1221                  if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,
1218 1222                      ds, t))
1219 1223                          return (B_TRUE);
1220 1224          }
1221 1225          return (B_FALSE);
1222 1226  }
1223 1227  
1224 1228  /*
1225 1229   * The unique space in the head dataset can be calculated by subtracting
1226 1230   * the space used in the most recent snapshot, that is still being used
1227 1231   * in this file system, from the space currently in use.  To figure out
1228 1232   * the space in the most recent snapshot still in use, we need to take
1229 1233   * the total space used in the snapshot and subtract out the space that
1230 1234   * has been freed up since the snapshot was taken.
1231 1235   */
1232 1236  static void
1233 1237  dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
1234 1238  {
1235 1239          uint64_t mrs_used;
1236 1240          uint64_t dlused, dlcomp, dluncomp;
1237 1241  
1238 1242          ASSERT(!dsl_dataset_is_snapshot(ds));
1239 1243  
1240 1244          if (ds->ds_phys->ds_prev_snap_obj != 0)
1241 1245                  mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes;
1242 1246          else
1243 1247                  mrs_used = 0;
1244 1248  
1245 1249          dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
1246 1250  
1247 1251          ASSERT3U(dlused, <=, mrs_used);
1248 1252          ds->ds_phys->ds_unique_bytes =
1249 1253              ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused);
1250 1254  
1251 1255          if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
1252 1256              SPA_VERSION_UNIQUE_ACCURATE)
1253 1257                  ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1254 1258  }
1255 1259  
1256 1260  struct killarg {
1257 1261          dsl_dataset_t *ds;
1258 1262          dmu_tx_t *tx;
1259 1263  };
1260 1264  
1261 1265  /* ARGSUSED */
1262 1266  static int
1263 1267  kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
1264 1268      const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
1265 1269  {
1266 1270          struct killarg *ka = arg;
1267 1271          dmu_tx_t *tx = ka->tx;
1268 1272  
1269 1273          if (bp == NULL)
1270 1274                  return (0);
1271 1275  
1272 1276          if (zb->zb_level == ZB_ZIL_LEVEL) {
1273 1277                  ASSERT(zilog != NULL);
1274 1278                  /*
1275 1279                   * It's a block in the intent log.  It has no
1276 1280                   * accounting, so just free it.
1277 1281                   */
1278 1282                  dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
1279 1283          } else {
1280 1284                  ASSERT(zilog == NULL);
1281 1285                  ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
1282 1286                  (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
1283 1287          }
1284 1288  
1285 1289          return (0);
1286 1290  }
1287 1291  
1288 1292  /* ARGSUSED */
1289 1293  static int
1290 1294  dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
1291 1295  {
1292 1296          dsl_dataset_t *ds = arg1;
1293 1297          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1294 1298          uint64_t count;
1295 1299          int err;
1296 1300  
1297 1301          /*
1298 1302           * Can't delete a head dataset if there are snapshots of it.
1299 1303           * (Except if the only snapshots are from the branch we cloned
1300 1304           * from.)
1301 1305           */
1302 1306          if (ds->ds_prev != NULL &&
1303 1307              ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1304 1308                  return (EBUSY);
1305 1309  
1306 1310          /*
1307 1311           * This is really a dsl_dir thing, but check it here so that
1308 1312           * we'll be less likely to leave this dataset inconsistent &
1309 1313           * nearly destroyed.
1310 1314           */
1311 1315          err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
1312 1316          if (err)
1313 1317                  return (err);
1314 1318          if (count != 0)
1315 1319                  return (EEXIST);
1316 1320  
1317 1321          return (0);
1318 1322  }
1319 1323  
1320 1324  /* ARGSUSED */
1321 1325  static void
1322 1326  dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx)
1323 1327  {
1324 1328          dsl_dataset_t *ds = arg1;
1325 1329  
1326 1330          /* Mark it as inconsistent on-disk, in case we crash */
1327 1331          dmu_buf_will_dirty(ds->ds_dbuf, tx);
1328 1332          ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
1329 1333  
1330 1334          spa_history_log_internal_ds(ds, "destroy begin", tx, "");
1331 1335  }
1332 1336  
1333 1337  static int
1334 1338  dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag,
1335 1339      dmu_tx_t *tx)
1336 1340  {
1337 1341          dsl_dataset_t *ds = dsda->ds;
1338 1342          dsl_dataset_t *ds_prev = ds->ds_prev;
1339 1343  
1340 1344          if (dsl_dataset_might_destroy_origin(ds_prev)) {
1341 1345                  struct dsl_ds_destroyarg ndsda = {0};
1342 1346  
1343 1347                  /*
1344 1348                   * If we're not prepared to remove the origin, don't remove
1345 1349                   * the clone either.
1346 1350                   */
1347 1351                  if (dsda->rm_origin == NULL) {
1348 1352                          dsda->need_prep = B_TRUE;
1349 1353                          return (EBUSY);
1350 1354                  }
1351 1355  
1352 1356                  ndsda.ds = ds_prev;
1353 1357                  ndsda.is_origin_rm = B_TRUE;
1354 1358                  return (dsl_dataset_destroy_check(&ndsda, tag, tx));
1355 1359          }
1356 1360  
1357 1361          /*
1358 1362           * If we're not going to remove the origin after all,
1359 1363           * undo the open context setup.
1360 1364           */
1361 1365          if (dsda->rm_origin != NULL) {
1362 1366                  dsl_dataset_disown(dsda->rm_origin, tag);
1363 1367                  dsda->rm_origin = NULL;
1364 1368          }
1365 1369  
1366 1370          return (0);
1367 1371  }
1368 1372  
1369 1373  /*
1370 1374   * If you add new checks here, you may need to add
1371 1375   * additional checks to the "temporary" case in
1372 1376   * snapshot_check() in dmu_objset.c.
1373 1377   */
1374 1378  /* ARGSUSED */
1375 1379  int
1376 1380  dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
1377 1381  {
1378 1382          struct dsl_ds_destroyarg *dsda = arg1;
1379 1383          dsl_dataset_t *ds = dsda->ds;
1380 1384  
1381 1385          /* we have an owner hold, so noone else can destroy us */
1382 1386          ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
1383 1387  
1384 1388          /*
1385 1389           * Only allow deferred destroy on pools that support it.
1386 1390           * NOTE: deferred destroy is only supported on snapshots.
1387 1391           */
1388 1392          if (dsda->defer) {
1389 1393                  if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
1390 1394                      SPA_VERSION_USERREFS)
1391 1395                          return (ENOTSUP);
1392 1396                  ASSERT(dsl_dataset_is_snapshot(ds));
1393 1397                  return (0);
1394 1398          }
1395 1399  
1396 1400          /*
1397 1401           * Can't delete a head dataset if there are snapshots of it.
1398 1402           * (Except if the only snapshots are from the branch we cloned
1399 1403           * from.)
1400 1404           */
1401 1405          if (ds->ds_prev != NULL &&
1402 1406              ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1403 1407                  return (EBUSY);
1404 1408  
1405 1409          /*
1406 1410           * If we made changes this txg, traverse_dsl_dataset won't find
1407 1411           * them.  Try again.
1408 1412           */
1409 1413          if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1410 1414                  return (EAGAIN);
1411 1415  
1412 1416          if (dsl_dataset_is_snapshot(ds)) {
1413 1417                  /*
1414 1418                   * If this snapshot has an elevated user reference count,
1415 1419                   * we can't destroy it yet.
1416 1420                   */
1417 1421                  if (ds->ds_userrefs > 0 && !dsda->releasing)
1418 1422                          return (EBUSY);
1419 1423  
1420 1424                  mutex_enter(&ds->ds_lock);
1421 1425                  /*
1422 1426                   * Can't delete a branch point. However, if we're destroying
1423 1427                   * a clone and removing its origin due to it having a user
1424 1428                   * hold count of 0 and having been marked for deferred destroy,
1425 1429                   * it's OK for the origin to have a single clone.
1426 1430                   */
1427 1431                  if (ds->ds_phys->ds_num_children >
1428 1432                      (dsda->is_origin_rm ? 2 : 1)) {
1429 1433                          mutex_exit(&ds->ds_lock);
1430 1434                          return (EEXIST);
1431 1435                  }
1432 1436                  mutex_exit(&ds->ds_lock);
1433 1437          } else if (dsl_dir_is_clone(ds->ds_dir)) {
1434 1438                  return (dsl_dataset_origin_check(dsda, arg2, tx));
1435 1439          }
1436 1440  
1437 1441          /* XXX we should do some i/o error checking... */
1438 1442          return (0);
1439 1443  }
1440 1444  
1441 1445  struct refsarg {
1442 1446          kmutex_t lock;
1443 1447          boolean_t gone;
1444 1448          kcondvar_t cv;
1445 1449  };
1446 1450  
1447 1451  /* ARGSUSED */
1448 1452  static void
1449 1453  dsl_dataset_refs_gone(dmu_buf_t *db, void *argv)
1450 1454  {
1451 1455          struct refsarg *arg = argv;
1452 1456  
1453 1457          mutex_enter(&arg->lock);
1454 1458          arg->gone = TRUE;
1455 1459          cv_signal(&arg->cv);
1456 1460          mutex_exit(&arg->lock);
1457 1461  }
1458 1462  
1459 1463  static void
1460 1464  dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag)
1461 1465  {
1462 1466          struct refsarg arg;
1463 1467  
1464 1468          mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL);
1465 1469          cv_init(&arg.cv, NULL, CV_DEFAULT, NULL);
1466 1470          arg.gone = FALSE;
1467 1471          (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys,
1468 1472              dsl_dataset_refs_gone);
1469 1473          dmu_buf_rele(ds->ds_dbuf, tag);
1470 1474          mutex_enter(&arg.lock);
1471 1475          while (!arg.gone)
1472 1476                  cv_wait(&arg.cv, &arg.lock);
1473 1477          ASSERT(arg.gone);
1474 1478          mutex_exit(&arg.lock);
1475 1479          ds->ds_dbuf = NULL;
1476 1480          ds->ds_phys = NULL;
1477 1481          mutex_destroy(&arg.lock);
1478 1482          cv_destroy(&arg.cv);
1479 1483  }
1480 1484  
1481 1485  static void
1482 1486  remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx)
1483 1487  {
1484 1488          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1485 1489          uint64_t count;
1486 1490          int err;
1487 1491  
1488 1492          ASSERT(ds->ds_phys->ds_num_children >= 2);
1489 1493          err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx);
1490 1494          /*
1491 1495           * The err should not be ENOENT, but a bug in a previous version
1492 1496           * of the code could cause upgrade_clones_cb() to not set
1493 1497           * ds_next_snap_obj when it should, leading to a missing entry.
1494 1498           * If we knew that the pool was created after
1495 1499           * SPA_VERSION_NEXT_CLONES, we could assert that it isn't
1496 1500           * ENOENT.  However, at least we can check that we don't have
1497 1501           * too many entries in the next_clones_obj even after failing to
1498 1502           * remove this one.
1499 1503           */
1500 1504          if (err != ENOENT) {
1501 1505                  VERIFY0(err);
1502 1506          }
1503 1507          ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
1504 1508              &count));
1505 1509          ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2);
1506 1510  }
1507 1511  
1508 1512  static void
1509 1513  dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
1510 1514  {
1511 1515          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1512 1516          zap_cursor_t zc;
1513 1517          zap_attribute_t za;
1514 1518  
1515 1519          /*
1516 1520           * If it is the old version, dd_clones doesn't exist so we can't
1517 1521           * find the clones, but deadlist_remove_key() is a no-op so it
1518 1522           * doesn't matter.
1519 1523           */
1520 1524          if (ds->ds_dir->dd_phys->dd_clones == 0)
1521 1525                  return;
1522 1526  
1523 1527          for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones);
1524 1528              zap_cursor_retrieve(&zc, &za) == 0;
1525 1529              zap_cursor_advance(&zc)) {
1526 1530                  dsl_dataset_t *clone;
1527 1531  
1528 1532                  VERIFY3U(0, ==, dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
1529 1533                      za.za_first_integer, FTAG, &clone));
1530 1534                  if (clone->ds_dir->dd_origin_txg > mintxg) {
1531 1535                          dsl_deadlist_remove_key(&clone->ds_deadlist,
1532 1536                              mintxg, tx);
1533 1537                          dsl_dataset_remove_clones_key(clone, mintxg, tx);
1534 1538                  }
1535 1539                  dsl_dataset_rele(clone, FTAG);
1536 1540          }
1537 1541          zap_cursor_fini(&zc);
1538 1542  }
1539 1543  
1540 1544  struct process_old_arg {
1541 1545          dsl_dataset_t *ds;
1542 1546          dsl_dataset_t *ds_prev;
1543 1547          boolean_t after_branch_point;
1544 1548          zio_t *pio;
1545 1549          uint64_t used, comp, uncomp;
1546 1550  };
1547 1551  
1548 1552  static int
1549 1553  process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1550 1554  {
1551 1555          struct process_old_arg *poa = arg;
1552 1556          dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
1553 1557  
1554 1558          if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
1555 1559                  dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
1556 1560                  if (poa->ds_prev && !poa->after_branch_point &&
1557 1561                      bp->blk_birth >
1558 1562                      poa->ds_prev->ds_phys->ds_prev_snap_txg) {
1559 1563                          poa->ds_prev->ds_phys->ds_unique_bytes +=
1560 1564                              bp_get_dsize_sync(dp->dp_spa, bp);
1561 1565                  }
1562 1566          } else {
1563 1567                  poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
1564 1568                  poa->comp += BP_GET_PSIZE(bp);
1565 1569                  poa->uncomp += BP_GET_UCSIZE(bp);
1566 1570                  dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
1567 1571          }
1568 1572          return (0);
1569 1573  }
1570 1574  
1571 1575  static void
1572 1576  process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
1573 1577      dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
1574 1578  {
1575 1579          struct process_old_arg poa = { 0 };
1576 1580          dsl_pool_t *dp = ds->ds_dir->dd_pool;
1577 1581          objset_t *mos = dp->dp_meta_objset;
1578 1582  
1579 1583          ASSERT(ds->ds_deadlist.dl_oldfmt);
1580 1584          ASSERT(ds_next->ds_deadlist.dl_oldfmt);
1581 1585  
1582 1586          poa.ds = ds;
1583 1587          poa.ds_prev = ds_prev;
1584 1588          poa.after_branch_point = after_branch_point;
1585 1589          poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1586 1590          VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
1587 1591              process_old_cb, &poa, tx));
1588 1592          VERIFY0(zio_wait(poa.pio));
1589 1593          ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
1590 1594  
1591 1595          /* change snapused */
1592 1596          dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
1593 1597              -poa.used, -poa.comp, -poa.uncomp, tx);
1594 1598  
1595 1599          /* swap next's deadlist to our deadlist */
1596 1600          dsl_deadlist_close(&ds->ds_deadlist);
1597 1601          dsl_deadlist_close(&ds_next->ds_deadlist);
1598 1602          SWITCH64(ds_next->ds_phys->ds_deadlist_obj,
1599 1603              ds->ds_phys->ds_deadlist_obj);
1600 1604          dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
1601 1605          dsl_deadlist_open(&ds_next->ds_deadlist, mos,
1602 1606              ds_next->ds_phys->ds_deadlist_obj);
1603 1607  }
1604 1608  
1605 1609  static int
1606 1610  old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
1607 1611  {
1608 1612          int err;
1609 1613          struct killarg ka;
1610 1614  
1611 1615          /*
1612 1616           * Free everything that we point to (that's born after
1613 1617           * the previous snapshot, if we are a clone)
1614 1618           *
1615 1619           * NB: this should be very quick, because we already
1616 1620           * freed all the objects in open context.
1617 1621           */
1618 1622          ka.ds = ds;
1619 1623          ka.tx = tx;
1620 1624          err = traverse_dataset(ds,
1621 1625              ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
1622 1626              kill_blkptr, &ka);
1623 1627          ASSERT0(err);
1624 1628          ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
1625 1629  
1626 1630          return (err);
1627 1631  }
1628 1632  
1629 1633  void
1630 1634  dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
1631 1635  {
1632 1636          struct dsl_ds_destroyarg *dsda = arg1;
1633 1637          dsl_dataset_t *ds = dsda->ds;
1634 1638          int err;
1635 1639          int after_branch_point = FALSE;
1636 1640          dsl_pool_t *dp = ds->ds_dir->dd_pool;
1637 1641          objset_t *mos = dp->dp_meta_objset;
1638 1642          dsl_dataset_t *ds_prev = NULL;
1639 1643          boolean_t wont_destroy;
1640 1644          uint64_t obj;
1641 1645  
1642 1646          wont_destroy = (dsda->defer &&
1643 1647              (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1));
1644 1648  
1645 1649          ASSERT(ds->ds_owner || wont_destroy);
1646 1650          ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1);
1647 1651          ASSERT(ds->ds_prev == NULL ||
1648 1652              ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
1649 1653          ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
1650 1654  
1651 1655          if (wont_destroy) {
1652 1656                  ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
1653 1657                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
1654 1658                  ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
1655 1659                  spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
1656 1660                  return;
1657 1661          }
1658 1662  
1659 1663          /* We need to log before removing it from the namespace. */
1660 1664          spa_history_log_internal_ds(ds, "destroy", tx, "");
1661 1665  
1662 1666          /* signal any waiters that this dataset is going away */
1663 1667          mutex_enter(&ds->ds_lock);
1664 1668          ds->ds_owner = dsl_reaper;
1665 1669          cv_broadcast(&ds->ds_exclusive_cv);
1666 1670          mutex_exit(&ds->ds_lock);
1667 1671  
1668 1672          /* Remove our reservation */
1669 1673          if (ds->ds_reserved != 0) {
1670 1674                  dsl_prop_setarg_t psa;
1671 1675                  uint64_t value = 0;
1672 1676  
1673 1677                  dsl_prop_setarg_init_uint64(&psa, "refreservation",
1674 1678                      (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
1675 1679                      &value);
1676 1680                  psa.psa_effective_value = 0;    /* predict default value */
1677 1681  
1678 1682                  dsl_dataset_set_reservation_sync(ds, &psa, tx);
1679 1683                  ASSERT0(ds->ds_reserved);
1680 1684          }
1681 1685  
1682 1686          ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
1683 1687  
1684 1688          dsl_scan_ds_destroyed(ds, tx);
1685 1689  
1686 1690          obj = ds->ds_object;
1687 1691  
1688 1692          if (ds->ds_phys->ds_prev_snap_obj != 0) {
1689 1693                  if (ds->ds_prev) {
1690 1694                          ds_prev = ds->ds_prev;
1691 1695                  } else {
1692 1696                          VERIFY(0 == dsl_dataset_hold_obj(dp,
1693 1697                              ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
1694 1698                  }
1695 1699                  after_branch_point =
1696 1700                      (ds_prev->ds_phys->ds_next_snap_obj != obj);
1697 1701  
1698 1702                  dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
1699 1703                  if (after_branch_point &&
1700 1704                      ds_prev->ds_phys->ds_next_clones_obj != 0) {
1701 1705                          remove_from_next_clones(ds_prev, obj, tx);
1702 1706                          if (ds->ds_phys->ds_next_snap_obj != 0) {
1703 1707                                  VERIFY(0 == zap_add_int(mos,
1704 1708                                      ds_prev->ds_phys->ds_next_clones_obj,
1705 1709                                      ds->ds_phys->ds_next_snap_obj, tx));
1706 1710                          }
1707 1711                  }
1708 1712                  if (after_branch_point &&
1709 1713                      ds->ds_phys->ds_next_snap_obj == 0) {
1710 1714                          /* This clone is toast. */
1711 1715                          ASSERT(ds_prev->ds_phys->ds_num_children > 1);
1712 1716                          ds_prev->ds_phys->ds_num_children--;
1713 1717  
1714 1718                          /*
1715 1719                           * If the clone's origin has no other clones, no
1716 1720                           * user holds, and has been marked for deferred
1717 1721                           * deletion, then we should have done the necessary
1718 1722                           * destroy setup for it.
1719 1723                           */
1720 1724                          if (ds_prev->ds_phys->ds_num_children == 1 &&
1721 1725                              ds_prev->ds_userrefs == 0 &&
1722 1726                              DS_IS_DEFER_DESTROY(ds_prev)) {
1723 1727                                  ASSERT3P(dsda->rm_origin, !=, NULL);
1724 1728                          } else {
1725 1729                                  ASSERT3P(dsda->rm_origin, ==, NULL);
1726 1730                          }
1727 1731                  } else if (!after_branch_point) {
1728 1732                          ds_prev->ds_phys->ds_next_snap_obj =
1729 1733                              ds->ds_phys->ds_next_snap_obj;
1730 1734                  }
1731 1735          }
1732 1736  
1733 1737          if (dsl_dataset_is_snapshot(ds)) {
1734 1738                  dsl_dataset_t *ds_next;
1735 1739                  uint64_t old_unique;
1736 1740                  uint64_t used = 0, comp = 0, uncomp = 0;
1737 1741  
1738 1742                  VERIFY(0 == dsl_dataset_hold_obj(dp,
1739 1743                      ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
1740 1744                  ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
1741 1745  
1742 1746                  old_unique = ds_next->ds_phys->ds_unique_bytes;
1743 1747  
1744 1748                  dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
1745 1749                  ds_next->ds_phys->ds_prev_snap_obj =
1746 1750                      ds->ds_phys->ds_prev_snap_obj;
1747 1751                  ds_next->ds_phys->ds_prev_snap_txg =
1748 1752                      ds->ds_phys->ds_prev_snap_txg;
1749 1753                  ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1750 1754                      ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
1751 1755  
1752 1756  
1753 1757                  if (ds_next->ds_deadlist.dl_oldfmt) {
1754 1758                          process_old_deadlist(ds, ds_prev, ds_next,
1755 1759                              after_branch_point, tx);
1756 1760                  } else {
1757 1761                          /* Adjust prev's unique space. */
1758 1762                          if (ds_prev && !after_branch_point) {
1759 1763                                  dsl_deadlist_space_range(&ds_next->ds_deadlist,
1760 1764                                      ds_prev->ds_phys->ds_prev_snap_txg,
1761 1765                                      ds->ds_phys->ds_prev_snap_txg,
1762 1766                                      &used, &comp, &uncomp);
1763 1767                                  ds_prev->ds_phys->ds_unique_bytes += used;
1764 1768                          }
1765 1769  
1766 1770                          /* Adjust snapused. */
1767 1771                          dsl_deadlist_space_range(&ds_next->ds_deadlist,
1768 1772                              ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
1769 1773                              &used, &comp, &uncomp);
1770 1774                          dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
1771 1775                              -used, -comp, -uncomp, tx);
1772 1776  
1773 1777                          /* Move blocks to be freed to pool's free list. */
1774 1778                          dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
1775 1779                              &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
1776 1780                              tx);
1777 1781                          dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
1778 1782                              DD_USED_HEAD, used, comp, uncomp, tx);
1779 1783  
1780 1784                          /* Merge our deadlist into next's and free it. */
1781 1785                          dsl_deadlist_merge(&ds_next->ds_deadlist,
1782 1786                              ds->ds_phys->ds_deadlist_obj, tx);
1783 1787                  }
1784 1788                  dsl_deadlist_close(&ds->ds_deadlist);
1785 1789                  dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
1786 1790  
1787 1791                  /* Collapse range in clone heads */
1788 1792                  dsl_dataset_remove_clones_key(ds,
1789 1793                      ds->ds_phys->ds_creation_txg, tx);
1790 1794  
1791 1795                  if (dsl_dataset_is_snapshot(ds_next)) {
1792 1796                          dsl_dataset_t *ds_nextnext;
1793 1797  
1794 1798                          /*
1795 1799                           * Update next's unique to include blocks which
1796 1800                           * were previously shared by only this snapshot
1797 1801                           * and it.  Those blocks will be born after the
1798 1802                           * prev snap and before this snap, and will have
1799 1803                           * died after the next snap and before the one
1800 1804                           * after that (ie. be on the snap after next's
1801 1805                           * deadlist).
1802 1806                           */
1803 1807                          VERIFY(0 == dsl_dataset_hold_obj(dp,
1804 1808                              ds_next->ds_phys->ds_next_snap_obj,
1805 1809                              FTAG, &ds_nextnext));
1806 1810                          dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
1807 1811                              ds->ds_phys->ds_prev_snap_txg,
1808 1812                              ds->ds_phys->ds_creation_txg,
1809 1813                              &used, &comp, &uncomp);
1810 1814                          ds_next->ds_phys->ds_unique_bytes += used;
1811 1815                          dsl_dataset_rele(ds_nextnext, FTAG);
1812 1816                          ASSERT3P(ds_next->ds_prev, ==, NULL);
1813 1817  
1814 1818                          /* Collapse range in this head. */
1815 1819                          dsl_dataset_t *hds;
1816 1820                          VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
1817 1821                              ds->ds_dir->dd_phys->dd_head_dataset_obj,
1818 1822                              FTAG, &hds));
1819 1823                          dsl_deadlist_remove_key(&hds->ds_deadlist,
1820 1824                              ds->ds_phys->ds_creation_txg, tx);
1821 1825                          dsl_dataset_rele(hds, FTAG);
1822 1826  
1823 1827                  } else {
1824 1828                          ASSERT3P(ds_next->ds_prev, ==, ds);
1825 1829                          dsl_dataset_drop_ref(ds_next->ds_prev, ds_next);
1826 1830                          ds_next->ds_prev = NULL;
1827 1831                          if (ds_prev) {
1828 1832                                  VERIFY(0 == dsl_dataset_get_ref(dp,
1829 1833                                      ds->ds_phys->ds_prev_snap_obj,
1830 1834                                      ds_next, &ds_next->ds_prev));
1831 1835                          }
1832 1836  
1833 1837                          dsl_dataset_recalc_head_uniq(ds_next);
1834 1838  
1835 1839                          /*
1836 1840                           * Reduce the amount of our unconsmed refreservation
1837 1841                           * being charged to our parent by the amount of
1838 1842                           * new unique data we have gained.
1839 1843                           */
1840 1844                          if (old_unique < ds_next->ds_reserved) {
1841 1845                                  int64_t mrsdelta;
1842 1846                                  uint64_t new_unique =
1843 1847                                      ds_next->ds_phys->ds_unique_bytes;
1844 1848  
1845 1849                                  ASSERT(old_unique <= new_unique);
1846 1850                                  mrsdelta = MIN(new_unique - old_unique,
1847 1851                                      ds_next->ds_reserved - old_unique);
1848 1852                                  dsl_dir_diduse_space(ds->ds_dir,
1849 1853                                      DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
1850 1854                          }
1851 1855                  }
1852 1856                  dsl_dataset_rele(ds_next, FTAG);
1853 1857          } else {
1854 1858                  zfeature_info_t *async_destroy =
1855 1859                      &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
1856 1860                  objset_t *os;
1857 1861  
1858 1862                  /*
1859 1863                   * There's no next snapshot, so this is a head dataset.
1860 1864                   * Destroy the deadlist.  Unless it's a clone, the
1861 1865                   * deadlist should be empty.  (If it's a clone, it's
1862 1866                   * safe to ignore the deadlist contents.)
1863 1867                   */
1864 1868                  dsl_deadlist_close(&ds->ds_deadlist);
1865 1869                  dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
1866 1870                  ds->ds_phys->ds_deadlist_obj = 0;
1867 1871  
1868 1872                  VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
1869 1873  
1870 1874                  if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
1871 1875                          err = old_synchronous_dataset_destroy(ds, tx);
1872 1876                  } else {
1873 1877                          /*
1874 1878                           * Move the bptree into the pool's list of trees to
1875 1879                           * clean up and update space accounting information.
1876 1880                           */
1877 1881                          uint64_t used, comp, uncomp;
1878 1882  
1879 1883                          zil_destroy_sync(dmu_objset_zil(os), tx);
1880 1884  
1881 1885                          if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
1882 1886                                  spa_feature_incr(dp->dp_spa, async_destroy, tx);
1883 1887                                  dp->dp_bptree_obj = bptree_alloc(mos, tx);
1884 1888                                  VERIFY(zap_add(mos,
1885 1889                                      DMU_POOL_DIRECTORY_OBJECT,
1886 1890                                      DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
1887 1891                                      &dp->dp_bptree_obj, tx) == 0);
1888 1892                          }
1889 1893  
1890 1894                          used = ds->ds_dir->dd_phys->dd_used_bytes;
1891 1895                          comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
1892 1896                          uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
1893 1897  
1894 1898                          ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
1895 1899                              ds->ds_phys->ds_unique_bytes == used);
1896 1900  
1897 1901                          bptree_add(mos, dp->dp_bptree_obj,
1898 1902                              &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
1899 1903                              used, comp, uncomp, tx);
1900 1904                          dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
1901 1905                              -used, -comp, -uncomp, tx);
1902 1906                          dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
1903 1907                              used, comp, uncomp, tx);
1904 1908                  }
1905 1909  
1906 1910                  if (ds->ds_prev != NULL) {
1907 1911                          if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
1908 1912                                  VERIFY3U(0, ==, zap_remove_int(mos,
1909 1913                                      ds->ds_prev->ds_dir->dd_phys->dd_clones,
1910 1914                                      ds->ds_object, tx));
1911 1915                          }
1912 1916                          dsl_dataset_rele(ds->ds_prev, ds);
1913 1917                          ds->ds_prev = ds_prev = NULL;
1914 1918                  }
1915 1919          }
1916 1920  
1917 1921          /*
1918 1922           * This must be done after the dsl_traverse(), because it will
1919 1923           * re-open the objset.
1920 1924           */
1921 1925          if (ds->ds_objset) {
1922 1926                  dmu_objset_evict(ds->ds_objset);
1923 1927                  ds->ds_objset = NULL;
1924 1928          }
1925 1929  
1926 1930          if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
1927 1931                  /* Erase the link in the dir */
1928 1932                  dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
1929 1933                  ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
1930 1934                  ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
1931 1935                  err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1932 1936                  ASSERT(err == 0);
1933 1937          } else {
1934 1938                  /* remove from snapshot namespace */
1935 1939                  dsl_dataset_t *ds_head;
1936 1940                  ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
1937 1941                  VERIFY(0 == dsl_dataset_hold_obj(dp,
1938 1942                      ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
1939 1943                  VERIFY(0 == dsl_dataset_get_snapname(ds));
1940 1944  #ifdef ZFS_DEBUG
1941 1945                  {
1942 1946                          uint64_t val;
1943 1947  
1944 1948                          err = dsl_dataset_snap_lookup(ds_head,
1945 1949                              ds->ds_snapname, &val);
1946 1950                          ASSERT0(err);
1947 1951                          ASSERT3U(val, ==, obj);
1948 1952                  }
1949 1953  #endif
1950 1954                  err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);
1951 1955                  ASSERT(err == 0);
1952 1956                  dsl_dataset_rele(ds_head, FTAG);
1953 1957          }
1954 1958  
1955 1959          if (ds_prev && ds->ds_prev != ds_prev)
1956 1960                  dsl_dataset_rele(ds_prev, FTAG);
1957 1961  
1958 1962          spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
1959 1963  
1960 1964          if (ds->ds_phys->ds_next_clones_obj != 0) {
1961 1965                  uint64_t count;
1962 1966                  ASSERT(0 == zap_count(mos,
1963 1967                      ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
1964 1968                  VERIFY(0 == dmu_object_free(mos,
1965 1969                      ds->ds_phys->ds_next_clones_obj, tx));
1966 1970          }
1967 1971          if (ds->ds_phys->ds_props_obj != 0)
1968 1972                  VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
1969 1973          if (ds->ds_phys->ds_userrefs_obj != 0)
1970 1974                  VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
1971 1975          dsl_dir_close(ds->ds_dir, ds);
1972 1976          ds->ds_dir = NULL;
1973 1977          dsl_dataset_drain_refs(ds, tag);
1974 1978          VERIFY(0 == dmu_object_free(mos, obj, tx));
1975 1979  
1976 1980          if (dsda->rm_origin) {
1977 1981                  /*
1978 1982                   * Remove the origin of the clone we just destroyed.
1979 1983                   */
1980 1984                  struct dsl_ds_destroyarg ndsda = {0};
1981 1985  
1982 1986                  ndsda.ds = dsda->rm_origin;
1983 1987                  dsl_dataset_destroy_sync(&ndsda, tag, tx);
1984 1988          }
1985 1989  }
1986 1990  
1987 1991  static int
1988 1992  dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
1989 1993  {
1990 1994          uint64_t asize;
1991 1995  
1992 1996          if (!dmu_tx_is_syncing(tx))
1993 1997                  return (0);
1994 1998  
1995 1999          /*
1996 2000           * If there's an fs-only reservation, any blocks that might become
1997 2001           * owned by the snapshot dataset must be accommodated by space
1998 2002           * outside of the reservation.
1999 2003           */
2000 2004          ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
2001 2005          asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
2002 2006          if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
2003 2007                  return (ENOSPC);
2004 2008

↓ open down ↓

855 lines elided

↑ open up ↑

2005 2009          /*
2006 2010           * Propagate any reserved space for this snapshot to other
2007 2011           * snapshot checks in this sync group.
2008 2012           */
2009 2013          if (asize > 0)
2010 2014                  dsl_dir_willuse_space(ds->ds_dir, asize, tx);
2011 2015  
2012 2016          return (0);
2013 2017  }
2014 2018  
     2019 +/*
     2020 + * Check if adding additional snapshot(s) would exceed any snapshot quotas.
     2021 + * Note that all snapshot quotas up to the root dataset (i.e. the pool itself)
     2022 + * or the given ancestor must be satisfied. Note that it is valid for the
     2023 + * count to exceed the quota. This can happen if a recursive snapshot is taken
     2024 + * from a dataset above this one.
     2025 + */
2015 2026  int
     2027 +dsl_snapcount_check(dsl_dir_t *dd, dmu_tx_t *tx, uint64_t cnt,
     2028 +    dsl_dir_t *ancestor)
     2029 +{
     2030 +        uint64_t quota;
     2031 +        int err = 0;
     2032 +
     2033 +        /*
     2034 +         * As with dsl_dataset_set_reservation_check(), don't run this check in
     2035 +         * open context.
     2036 +         */
     2037 +        if (!dmu_tx_is_syncing(tx))
     2038 +                return (0);
     2039 +
     2040 +        /*
     2041 +         * If renaming a dataset with no snapshots, count adjustment is 0.
     2042 +         * Likewise when taking a recursive snapshot below the top-level (see
     2043 +         * the comment in snapshot_check() for more details).
     2044 +         */
     2045 +        if (cnt == 0)
     2046 +                return (0);
     2047 +
     2048 +        /*
     2049 +         * If an ancestor has been provided, stop checking the quota once we
     2050 +         * hit that dir. We need this during rename so that we don't overcount
     2051 +         * the check once we recurse up to the common ancestor.
     2052 +         */
     2053 +        if (ancestor == dd)
     2054 +                return (0);
     2055 +
     2056 +        /*
     2057 +         * If there's no value for this property, there's no need to enforce a
     2058 +         * snapshot quota.
     2059 +         */
     2060 +        err = dsl_prop_get_dd(dd, zfs_prop_to_name(ZFS_PROP_SNAPSHOT_QUOTA),
     2061 +            8, 1, &quota, NULL, B_FALSE);
     2062 +        if (err == ENOENT)
     2063 +                return (0);
     2064 +        else if (err != 0)
     2065 +                return (err);
     2066 +
     2067 +#ifdef _KERNEL
     2068 +        extern void __dtrace_probe_zfs__ss__quota(uint64_t, uint64_t, char *);
     2069 +        __dtrace_probe_zfs__ss__quota(
     2070 +            (uint64_t)dd->dd_phys->dd_snapshot_count, (uint64_t)quota,
     2071 +            dd->dd_myname);
     2072 +#endif
     2073 +
     2074 +        if (quota > 0 && (dd->dd_phys->dd_snapshot_count + cnt) > quota)
     2075 +                return (EDQUOT);
     2076 +
     2077 +        if (dd->dd_parent != NULL)
     2078 +                err = dsl_snapcount_check(dd->dd_parent, tx, cnt, ancestor);
     2079 +
     2080 +        return (err);
     2081 +}
     2082 +
     2083 +/*
     2084 + * Adjust the snapshot count for the specified dsl_dir_t and all parents.
     2085 + * When a new snapshot is created, increment the count on all parents, and when
     2086 + * a snapshot is destroyed, decrement the count.
     2087 + */
     2088 +void
     2089 +dsl_snapcount_adjust(dsl_dir_t *dd, dmu_tx_t *tx, int64_t delta,
     2090 +    boolean_t first)
     2091 +{
     2092 +        /*
     2093 +         * On initial entry we need to check if this feature is active, but
     2094 +         * we don't want to re-check this on each recursive call. Note: the
     2095 +         * feature cannot be active if its not enabled. If the feature is not
     2096 +         * active, don't touch the on-disk count fields.
     2097 +         */
     2098 +        if (first) {
     2099 +                dsl_dataset_t *ds = NULL;
     2100 +                spa_t *spa;
     2101 +                zfeature_info_t *quota_feat =
     2102 +                    &spa_feature_table[SPA_FEATURE_DS_SS_QUOTA];
     2103 +
     2104 +                VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
     2105 +                    dd->dd_phys->dd_head_dataset_obj, FTAG, &ds));
     2106 +                spa = dsl_dataset_get_spa(ds);
     2107 +                dsl_dataset_rele(ds, FTAG);
     2108 +                if (!spa_feature_is_active(spa, quota_feat))
     2109 +                        return;
     2110 +        }
     2111 +
     2112 +        /*
     2113 +         * As with dsl_dataset_set_reservation_check(), wdon't want to run
     2114 +         * this check in open context.
     2115 +         */
     2116 +        if (!dmu_tx_is_syncing(tx))
     2117 +                return;
     2118 +
     2119 +        /* if renaming a dataset with no snapshots, count adjustment is 0 */
     2120 +        if (delta == 0)
     2121 +                return;
     2122 +
     2123 +        /* Increment count for parent */
     2124 +        dmu_buf_will_dirty(dd->dd_dbuf, tx);
     2125 +
     2126 +        mutex_enter(&dd->dd_lock);
     2127 +
     2128 +        /*
     2129 +         * Counts may be incorrect if dealing with an existing pool and
     2130 +         * there has never been a quota set in the dataset hierarchy.
     2131 +         * This is not an error.
     2132 +         */
     2133 +        if (delta < 0 && dd->dd_phys->dd_snapshot_count < (delta * -1)) {
     2134 +#ifdef _KERNEL
     2135 +                extern void __dtrace_probe_zfs__sscnt__adj__neg(char *);
     2136 +                __dtrace_probe_zfs__sscnt__adj__neg(dd->dd_myname);
     2137 +#endif
     2138 +                mutex_exit(&dd->dd_lock);
     2139 +                return;
     2140 +        }
     2141 +
     2142 +        dd->dd_phys->dd_snapshot_count += delta;
     2143 +
     2144 +        /* Roll up this additional count into our ancestors */
     2145 +
     2146 +        if (dd->dd_parent != NULL)
     2147 +                dsl_snapcount_adjust(dd->dd_parent, tx, delta, B_FALSE);
     2148 +
     2149 +        mutex_exit(&dd->dd_lock);
     2150 +}
     2151 +
     2152 +int
2016 2153  dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
2017      -    dmu_tx_t *tx)
     2154 +    uint64_t cnt, dmu_tx_t *tx)
2018 2155  {
2019 2156          int err;
2020 2157          uint64_t value;
2021 2158  
2022 2159          /*
2023 2160           * We don't allow multiple snapshots of the same txg.  If there
2024 2161           * is already one, try again.
2025 2162           */
2026 2163          if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
2027 2164                  return (EAGAIN);

2028 2165  
2029 2166          /*
2030 2167           * Check for conflicting snapshot name.
2031 2168           */
2032 2169          err = dsl_dataset_snap_lookup(ds, snapname, &value);
2033 2170          if (err == 0)
2034 2171                  return (EEXIST);

↓ open down ↓

7 lines elided

↑ open up ↑

2035 2172          if (err != ENOENT)
2036 2173                  return (err);
2037 2174  
2038 2175          /*
2039 2176           * Check that the dataset's name is not too long.  Name consists
2040 2177           * of the dataset's length + 1 for the @-sign + snapshot name's length
2041 2178           */
2042 2179          if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
2043 2180                  return (ENAMETOOLONG);
2044 2181  
     2182 +        err = dsl_snapcount_check(ds->ds_dir, tx, cnt, NULL);
     2183 +        if (err)
     2184 +                return (err);
     2185 +
2045 2186          err = dsl_dataset_snapshot_reserve_space(ds, tx);
2046 2187          if (err)
2047 2188                  return (err);
2048 2189  
2049 2190          ds->ds_trysnap_txg = tx->tx_txg;
2050 2191          return (0);
2051 2192  }
2052 2193  
2053 2194  void
2054 2195  dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,

2055 2196      dmu_tx_t *tx)

↓ open down ↓

1 lines elided

↑ open up ↑

2056 2197  {
2057 2198          dsl_pool_t *dp = ds->ds_dir->dd_pool;
2058 2199          dmu_buf_t *dbuf;
2059 2200          dsl_dataset_phys_t *dsphys;
2060 2201          uint64_t dsobj, crtxg;
2061 2202          objset_t *mos = dp->dp_meta_objset;
2062 2203          int err;
2063 2204  
2064 2205          ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
2065 2206  
     2207 +        dsl_snapcount_adjust(ds->ds_dir, tx, 1, B_TRUE);
     2208 +
2066 2209          /*
2067 2210           * The origin's ds_creation_txg has to be < TXG_INITIAL
2068 2211           */
2069 2212          if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
2070 2213                  crtxg = 1;
2071 2214          else
2072 2215                  crtxg = tx->tx_txg;
2073 2216  
2074 2217          dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
2075 2218              DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);

2076 2219          VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
2077 2220          dmu_buf_will_dirty(dbuf, tx);
2078 2221          dsphys = dbuf->db_data;
2079 2222          bzero(dsphys, sizeof (dsl_dataset_phys_t));
2080 2223          dsphys->ds_dir_obj = ds->ds_dir->dd_object;
2081 2224          dsphys->ds_fsid_guid = unique_create();
2082 2225          (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
2083 2226              sizeof (dsphys->ds_guid));
2084 2227          dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
2085 2228          dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
2086 2229          dsphys->ds_next_snap_obj = ds->ds_object;
2087 2230          dsphys->ds_num_children = 1;
2088 2231          dsphys->ds_creation_time = gethrestime_sec();
2089 2232          dsphys->ds_creation_txg = crtxg;
2090 2233          dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
2091 2234          dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes;
2092 2235          dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
2093 2236          dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
2094 2237          dsphys->ds_flags = ds->ds_phys->ds_flags;
2095 2238          dsphys->ds_bp = ds->ds_phys->ds_bp;
2096 2239          dmu_buf_rele(dbuf, FTAG);
2097 2240  
2098 2241          ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
2099 2242          if (ds->ds_prev) {
2100 2243                  uint64_t next_clones_obj =
2101 2244                      ds->ds_prev->ds_phys->ds_next_clones_obj;
2102 2245                  ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
2103 2246                      ds->ds_object ||
2104 2247                      ds->ds_prev->ds_phys->ds_num_children > 1);
2105 2248                  if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
2106 2249                          dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
2107 2250                          ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
2108 2251                              ds->ds_prev->ds_phys->ds_creation_txg);
2109 2252                          ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
2110 2253                  } else if (next_clones_obj != 0) {
2111 2254                          remove_from_next_clones(ds->ds_prev,
2112 2255                              dsphys->ds_next_snap_obj, tx);
2113 2256                          VERIFY3U(0, ==, zap_add_int(mos,
2114 2257                              next_clones_obj, dsobj, tx));
2115 2258                  }
2116 2259          }
2117 2260  
2118 2261          /*
2119 2262           * If we have a reference-reservation on this dataset, we will
2120 2263           * need to increase the amount of refreservation being charged
2121 2264           * since our unique space is going to zero.
2122 2265           */
2123 2266          if (ds->ds_reserved) {
2124 2267                  int64_t delta;
2125 2268                  ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
2126 2269                  delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
2127 2270                  dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV,
2128 2271                      delta, 0, 0, tx);
2129 2272          }
2130 2273  
2131 2274          dmu_buf_will_dirty(ds->ds_dbuf, tx);
2132 2275          zfs_dbgmsg("taking snapshot %s@%s/%llu; newkey=%llu",
2133 2276              ds->ds_dir->dd_myname, snapname, dsobj,
2134 2277              ds->ds_phys->ds_prev_snap_txg);
2135 2278          ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist,
2136 2279              UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx);
2137 2280          dsl_deadlist_close(&ds->ds_deadlist);
2138 2281          dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
2139 2282          dsl_deadlist_add_key(&ds->ds_deadlist,
2140 2283              ds->ds_phys->ds_prev_snap_txg, tx);
2141 2284  
2142 2285          ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg);
2143 2286          ds->ds_phys->ds_prev_snap_obj = dsobj;
2144 2287          ds->ds_phys->ds_prev_snap_txg = crtxg;
2145 2288          ds->ds_phys->ds_unique_bytes = 0;
2146 2289          if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
2147 2290                  ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
2148 2291  
2149 2292          err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
2150 2293              snapname, 8, 1, &dsobj, tx);
2151 2294          ASSERT(err == 0);
2152 2295  
2153 2296          if (ds->ds_prev)
2154 2297                  dsl_dataset_drop_ref(ds->ds_prev, ds);
2155 2298          VERIFY(0 == dsl_dataset_get_ref(dp,
2156 2299              ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
2157 2300  
2158 2301          dsl_scan_ds_snapshotted(ds, tx);
2159 2302  
2160 2303          dsl_dir_snap_cmtime_update(ds->ds_dir);
2161 2304  
2162 2305          spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, "");
2163 2306  }
2164 2307  
2165 2308  void
2166 2309  dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
2167 2310  {
2168 2311          ASSERT(dmu_tx_is_syncing(tx));
2169 2312          ASSERT(ds->ds_objset != NULL);
2170 2313          ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
2171 2314  
2172 2315          /*
2173 2316           * in case we had to change ds_fsid_guid when we opened it,
2174 2317           * sync it out now.
2175 2318           */
2176 2319          dmu_buf_will_dirty(ds->ds_dbuf, tx);
2177 2320          ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
2178 2321  
2179 2322          dmu_objset_sync(ds->ds_objset, zio, tx);
2180 2323  }
2181 2324  
2182 2325  static void
2183 2326  get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
2184 2327  {
2185 2328          uint64_t count = 0;
2186 2329          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
2187 2330          zap_cursor_t zc;
2188 2331          zap_attribute_t za;
2189 2332          nvlist_t *propval;
2190 2333          nvlist_t *val;
2191 2334  
2192 2335          rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
2193 2336          VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2194 2337          VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2195 2338  
2196 2339          /*
2197 2340           * There may me missing entries in ds_next_clones_obj
2198 2341           * due to a bug in a previous version of the code.
2199 2342           * Only trust it if it has the right number of entries.
2200 2343           */
2201 2344          if (ds->ds_phys->ds_next_clones_obj != 0) {
2202 2345                  ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
2203 2346                      &count));
2204 2347          }
2205 2348          if (count != ds->ds_phys->ds_num_children - 1) {
2206 2349                  goto fail;
2207 2350          }
2208 2351          for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
2209 2352              zap_cursor_retrieve(&zc, &za) == 0;
2210 2353              zap_cursor_advance(&zc)) {
2211 2354                  dsl_dataset_t *clone;
2212 2355                  char buf[ZFS_MAXNAMELEN];
2213 2356                  /*
2214 2357                   * Even though we hold the dp_config_rwlock, the dataset
2215 2358                   * may fail to open, returning ENOENT.  If there is a
2216 2359                   * thread concurrently attempting to destroy this
2217 2360                   * dataset, it will have the ds_rwlock held for
2218 2361                   * RW_WRITER.  Our call to dsl_dataset_hold_obj() ->
2219 2362                   * dsl_dataset_hold_ref() will fail its
2220 2363                   * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the
2221 2364                   * dp_config_rwlock, and wait for the destroy progress
2222 2365                   * and signal ds_exclusive_cv.  If the destroy was
2223 2366                   * successful, we will see that
2224 2367                   * DSL_DATASET_IS_DESTROYED(), and return ENOENT.
2225 2368                   */
2226 2369                  if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
2227 2370                      za.za_first_integer, FTAG, &clone) != 0)
2228 2371                          continue;
2229 2372                  dsl_dir_name(clone->ds_dir, buf);
2230 2373                  VERIFY(nvlist_add_boolean(val, buf) == 0);
2231 2374                  dsl_dataset_rele(clone, FTAG);
2232 2375          }
2233 2376          zap_cursor_fini(&zc);
2234 2377          VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0);
2235 2378          VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),
2236 2379              propval) == 0);
2237 2380  fail:
2238 2381          nvlist_free(val);
2239 2382          nvlist_free(propval);
2240 2383          rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
2241 2384  }
2242 2385  
2243 2386  void
2244 2387  dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
2245 2388  {
2246 2389          uint64_t refd, avail, uobjs, aobjs, ratio;
2247 2390  
2248 2391          ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
2249 2392              (ds->ds_phys->ds_uncompressed_bytes * 100 /
2250 2393              ds->ds_phys->ds_compressed_bytes);
2251 2394  
2252 2395          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
2253 2396  
2254 2397          if (dsl_dataset_is_snapshot(ds)) {
2255 2398                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
2256 2399                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
2257 2400                      ds->ds_phys->ds_unique_bytes);
2258 2401                  get_clones_stat(ds, nv);
2259 2402          } else {
2260 2403                  dsl_dir_stats(ds->ds_dir, nv);
2261 2404          }
2262 2405  
2263 2406          dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
2264 2407          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
2265 2408          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
2266 2409  
2267 2410          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
2268 2411              ds->ds_phys->ds_creation_time);
2269 2412          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
2270 2413              ds->ds_phys->ds_creation_txg);
2271 2414          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
2272 2415              ds->ds_quota);
2273 2416          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
2274 2417              ds->ds_reserved);
2275 2418          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
2276 2419              ds->ds_phys->ds_guid);
2277 2420          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE,
2278 2421              ds->ds_phys->ds_unique_bytes);
2279 2422          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID,
2280 2423              ds->ds_object);
2281 2424          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS,
2282 2425              ds->ds_userrefs);
2283 2426          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
2284 2427              DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
2285 2428  
2286 2429          if (ds->ds_phys->ds_prev_snap_obj != 0) {
2287 2430                  uint64_t written, comp, uncomp;
2288 2431                  dsl_pool_t *dp = ds->ds_dir->dd_pool;
2289 2432                  dsl_dataset_t *prev;
2290 2433  
2291 2434                  rw_enter(&dp->dp_config_rwlock, RW_READER);
2292 2435                  int err = dsl_dataset_hold_obj(dp,
2293 2436                      ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
2294 2437                  rw_exit(&dp->dp_config_rwlock);
2295 2438                  if (err == 0) {
2296 2439                          err = dsl_dataset_space_written(prev, ds, &written,
2297 2440                              &comp, &uncomp);
2298 2441                          dsl_dataset_rele(prev, FTAG);
2299 2442                          if (err == 0) {
2300 2443                                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
2301 2444                                      written);
2302 2445                          }
2303 2446                  }
2304 2447          }
2305 2448  }
2306 2449  
2307 2450  void
2308 2451  dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
2309 2452  {
2310 2453          stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
2311 2454          stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
2312 2455          stat->dds_guid = ds->ds_phys->ds_guid;
2313 2456          stat->dds_origin[0] = '\0';
2314 2457          if (dsl_dataset_is_snapshot(ds)) {
2315 2458                  stat->dds_is_snapshot = B_TRUE;
2316 2459                  stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
2317 2460          } else {
2318 2461                  stat->dds_is_snapshot = B_FALSE;
2319 2462                  stat->dds_num_clones = 0;
2320 2463  
2321 2464                  rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
2322 2465                  if (dsl_dir_is_clone(ds->ds_dir)) {
2323 2466                          dsl_dataset_t *ods;
2324 2467  
2325 2468                          VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool,
2326 2469                              ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
2327 2470                          dsl_dataset_name(ods, stat->dds_origin);
2328 2471                          dsl_dataset_drop_ref(ods, FTAG);
2329 2472                  }
2330 2473                  rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
2331 2474          }
2332 2475  }
2333 2476  
2334 2477  uint64_t
2335 2478  dsl_dataset_fsid_guid(dsl_dataset_t *ds)
2336 2479  {
2337 2480          return (ds->ds_fsid_guid);
2338 2481  }
2339 2482  
2340 2483  void
2341 2484  dsl_dataset_space(dsl_dataset_t *ds,
2342 2485      uint64_t *refdbytesp, uint64_t *availbytesp,
2343 2486      uint64_t *usedobjsp, uint64_t *availobjsp)
2344 2487  {
2345 2488          *refdbytesp = ds->ds_phys->ds_referenced_bytes;
2346 2489          *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
2347 2490          if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
2348 2491                  *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
2349 2492          if (ds->ds_quota != 0) {
2350 2493                  /*
2351 2494                   * Adjust available bytes according to refquota
2352 2495                   */
2353 2496                  if (*refdbytesp < ds->ds_quota)
2354 2497                          *availbytesp = MIN(*availbytesp,
2355 2498                              ds->ds_quota - *refdbytesp);
2356 2499                  else
2357 2500                          *availbytesp = 0;
2358 2501          }
2359 2502          *usedobjsp = ds->ds_phys->ds_bp.blk_fill;
2360 2503          *availobjsp = DN_MAX_OBJECT - *usedobjsp;
2361 2504  }
2362 2505  
2363 2506  boolean_t
2364 2507  dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
2365 2508  {
2366 2509          dsl_pool_t *dp = ds->ds_dir->dd_pool;
2367 2510  
2368 2511          ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
2369 2512              dsl_pool_sync_context(dp));
2370 2513          if (ds->ds_prev == NULL)
2371 2514                  return (B_FALSE);
2372 2515          if (ds->ds_phys->ds_bp.blk_birth >
2373 2516              ds->ds_prev->ds_phys->ds_creation_txg) {
2374 2517                  objset_t *os, *os_prev;
2375 2518                  /*
2376 2519                   * It may be that only the ZIL differs, because it was
2377 2520                   * reset in the head.  Don't count that as being
2378 2521                   * modified.
2379 2522                   */
2380 2523                  if (dmu_objset_from_ds(ds, &os) != 0)
2381 2524                          return (B_TRUE);
2382 2525                  if (dmu_objset_from_ds(ds->ds_prev, &os_prev) != 0)
2383 2526                          return (B_TRUE);
2384 2527                  return (bcmp(&os->os_phys->os_meta_dnode,
2385 2528                      &os_prev->os_phys->os_meta_dnode,
2386 2529                      sizeof (os->os_phys->os_meta_dnode)) != 0);
2387 2530          }
2388 2531          return (B_FALSE);
2389 2532  }
2390 2533  
2391 2534  /* ARGSUSED */
2392 2535  static int
2393 2536  dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
2394 2537  {
2395 2538          dsl_dataset_t *ds = arg1;
2396 2539          char *newsnapname = arg2;
2397 2540          dsl_dir_t *dd = ds->ds_dir;
2398 2541          dsl_dataset_t *hds;
2399 2542          uint64_t val;
2400 2543          int err;
2401 2544  
2402 2545          err = dsl_dataset_hold_obj(dd->dd_pool,
2403 2546              dd->dd_phys->dd_head_dataset_obj, FTAG, &hds);
2404 2547          if (err)
2405 2548                  return (err);
2406 2549  
2407 2550          /* new name better not be in use */
2408 2551          err = dsl_dataset_snap_lookup(hds, newsnapname, &val);
2409 2552          dsl_dataset_rele(hds, FTAG);
2410 2553  
2411 2554          if (err == 0)
2412 2555                  err = EEXIST;
2413 2556          else if (err == ENOENT)
2414 2557                  err = 0;
2415 2558  
2416 2559          /* dataset name + 1 for the "@" + the new snapshot name must fit */
2417 2560          if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
2418 2561                  err = ENAMETOOLONG;
2419 2562  
2420 2563          return (err);
2421 2564  }
2422 2565  
2423 2566  static void
2424 2567  dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
2425 2568  {
2426 2569          dsl_dataset_t *ds = arg1;
2427 2570          const char *newsnapname = arg2;
2428 2571          dsl_dir_t *dd = ds->ds_dir;
2429 2572          objset_t *mos = dd->dd_pool->dp_meta_objset;
2430 2573          dsl_dataset_t *hds;
2431 2574          int err;
2432 2575  
2433 2576          ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
2434 2577  
2435 2578          VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
2436 2579              dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
2437 2580  
2438 2581          VERIFY(0 == dsl_dataset_get_snapname(ds));
2439 2582          err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
2440 2583          ASSERT0(err);
2441 2584          mutex_enter(&ds->ds_lock);
2442 2585          (void) strcpy(ds->ds_snapname, newsnapname);
2443 2586          mutex_exit(&ds->ds_lock);
2444 2587          err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
2445 2588              ds->ds_snapname, 8, 1, &ds->ds_object, tx);
2446 2589          ASSERT0(err);
2447 2590  
2448 2591          spa_history_log_internal_ds(ds, "rename", tx,
2449 2592              "-> @%s", newsnapname);
2450 2593          dsl_dataset_rele(hds, FTAG);
2451 2594  }
2452 2595  
2453 2596  struct renamesnaparg {
2454 2597          dsl_sync_task_group_t *dstg;
2455 2598          char failed[MAXPATHLEN];
2456 2599          char *oldsnap;
2457 2600          char *newsnap;
2458 2601  };
2459 2602  
2460 2603  static int
2461 2604  dsl_snapshot_rename_one(const char *name, void *arg)
2462 2605  {
2463 2606          struct renamesnaparg *ra = arg;
2464 2607          dsl_dataset_t *ds = NULL;
2465 2608          char *snapname;
2466 2609          int err;
2467 2610  
2468 2611          snapname = kmem_asprintf("%s@%s", name, ra->oldsnap);
2469 2612          (void) strlcpy(ra->failed, snapname, sizeof (ra->failed));
2470 2613  
2471 2614          /*
2472 2615           * For recursive snapshot renames the parent won't be changing
2473 2616           * so we just pass name for both the to/from argument.
2474 2617           */
2475 2618          err = zfs_secpolicy_rename_perms(snapname, snapname, CRED());
2476 2619          if (err != 0) {
2477 2620                  strfree(snapname);
2478 2621                  return (err == ENOENT ? 0 : err);
2479 2622          }
2480 2623  
2481 2624  #ifdef _KERNEL
2482 2625          /*
2483 2626           * For all filesystems undergoing rename, we'll need to unmount it.
2484 2627           */
2485 2628          (void) zfs_unmount_snap(snapname, NULL);
2486 2629  #endif
2487 2630          err = dsl_dataset_hold(snapname, ra->dstg, &ds);
2488 2631          strfree(snapname);
2489 2632          if (err != 0)
2490 2633                  return (err == ENOENT ? 0 : err);
2491 2634  
2492 2635          dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
2493 2636              dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
2494 2637  
2495 2638          return (0);
2496 2639  }
2497 2640  
2498 2641  static int
2499 2642  dsl_recursive_rename(char *oldname, const char *newname)
2500 2643  {
2501 2644          int err;
2502 2645          struct renamesnaparg *ra;
2503 2646          dsl_sync_task_t *dst;
2504 2647          spa_t *spa;
2505 2648          char *cp, *fsname = spa_strdup(oldname);
2506 2649          int len = strlen(oldname) + 1;
2507 2650  
2508 2651          /* truncate the snapshot name to get the fsname */
2509 2652          cp = strchr(fsname, '@');
2510 2653          *cp = '\0';
2511 2654  
2512 2655          err = spa_open(fsname, &spa, FTAG);
2513 2656          if (err) {
2514 2657                  kmem_free(fsname, len);
2515 2658                  return (err);
2516 2659          }
2517 2660          ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP);
2518 2661          ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
2519 2662  
2520 2663          ra->oldsnap = strchr(oldname, '@') + 1;
2521 2664          ra->newsnap = strchr(newname, '@') + 1;
2522 2665          *ra->failed = '\0';
2523 2666  
2524 2667          err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
2525 2668              DS_FIND_CHILDREN);
2526 2669          kmem_free(fsname, len);
2527 2670  
2528 2671          if (err == 0) {
2529 2672                  err = dsl_sync_task_group_wait(ra->dstg);
2530 2673          }
2531 2674  
2532 2675          for (dst = list_head(&ra->dstg->dstg_tasks); dst;
2533 2676              dst = list_next(&ra->dstg->dstg_tasks, dst)) {
2534 2677                  dsl_dataset_t *ds = dst->dst_arg1;
2535 2678                  if (dst->dst_err) {
2536 2679                          dsl_dir_name(ds->ds_dir, ra->failed);
2537 2680                          (void) strlcat(ra->failed, "@", sizeof (ra->failed));
2538 2681                          (void) strlcat(ra->failed, ra->newsnap,
2539 2682                              sizeof (ra->failed));
2540 2683                  }
2541 2684                  dsl_dataset_rele(ds, ra->dstg);
2542 2685          }
2543 2686  
2544 2687          if (err)
2545 2688                  (void) strlcpy(oldname, ra->failed, sizeof (ra->failed));
2546 2689  
2547 2690          dsl_sync_task_group_destroy(ra->dstg);
2548 2691          kmem_free(ra, sizeof (struct renamesnaparg));
2549 2692          spa_close(spa, FTAG);
2550 2693          return (err);
2551 2694  }
2552 2695  
2553 2696  static int
2554 2697  dsl_valid_rename(const char *oldname, void *arg)
2555 2698  {
2556 2699          int delta = *(int *)arg;
2557 2700  
2558 2701          if (strlen(oldname) + delta >= MAXNAMELEN)
2559 2702                  return (ENAMETOOLONG);
2560 2703  
2561 2704          return (0);
2562 2705  }
2563 2706  
2564 2707  #pragma weak dmu_objset_rename = dsl_dataset_rename
2565 2708  int
2566 2709  dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive)
2567 2710  {
2568 2711          dsl_dir_t *dd;
2569 2712          dsl_dataset_t *ds;
2570 2713          const char *tail;
2571 2714          int err;
2572 2715  
2573 2716          err = dsl_dir_open(oldname, FTAG, &dd, &tail);
2574 2717          if (err)
2575 2718                  return (err);
2576 2719  
2577 2720          if (tail == NULL) {
2578 2721                  int delta = strlen(newname) - strlen(oldname);
2579 2722  
2580 2723                  /* if we're growing, validate child name lengths */
2581 2724                  if (delta > 0)
2582 2725                          err = dmu_objset_find(oldname, dsl_valid_rename,
2583 2726                              &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
2584 2727  
2585 2728                  if (err == 0)
2586 2729                          err = dsl_dir_rename(dd, newname);
2587 2730                  dsl_dir_close(dd, FTAG);
2588 2731                  return (err);
2589 2732          }
2590 2733  
2591 2734          if (tail[0] != '@') {
2592 2735                  /* the name ended in a nonexistent component */
2593 2736                  dsl_dir_close(dd, FTAG);
2594 2737                  return (ENOENT);
2595 2738          }
2596 2739  
2597 2740          dsl_dir_close(dd, FTAG);
2598 2741  
2599 2742          /* new name must be snapshot in same filesystem */
2600 2743          tail = strchr(newname, '@');
2601 2744          if (tail == NULL)
2602 2745                  return (EINVAL);
2603 2746          tail++;
2604 2747          if (strncmp(oldname, newname, tail - newname) != 0)
2605 2748                  return (EXDEV);
2606 2749  
2607 2750          if (recursive) {
2608 2751                  err = dsl_recursive_rename(oldname, newname);
2609 2752          } else {
2610 2753                  err = dsl_dataset_hold(oldname, FTAG, &ds);
2611 2754                  if (err)
2612 2755                          return (err);
2613 2756  
2614 2757                  err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2615 2758                      dsl_dataset_snapshot_rename_check,
2616 2759                      dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
2617 2760  
2618 2761                  dsl_dataset_rele(ds, FTAG);
2619 2762          }
2620 2763  
2621 2764          return (err);
2622 2765  }
2623 2766  
2624 2767  struct promotenode {
2625 2768          list_node_t link;
2626 2769          dsl_dataset_t *ds;
2627 2770  };
2628 2771  
2629 2772  struct promotearg {
2630 2773          list_t shared_snaps, origin_snaps, clone_snaps;
2631 2774          dsl_dataset_t *origin_origin;
2632 2775          uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
2633 2776          char *err_ds;
2634 2777  };
2635 2778  
2636 2779  static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
2637 2780  static boolean_t snaplist_unstable(list_t *l);
2638 2781  
2639 2782  static int
2640 2783  dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
2641 2784  {
2642 2785          dsl_dataset_t *hds = arg1;
2643 2786          struct promotearg *pa = arg2;
2644 2787          struct promotenode *snap = list_head(&pa->shared_snaps);
2645 2788          dsl_dataset_t *origin_ds = snap->ds;
2646 2789          int err;
2647 2790          uint64_t unused;
2648 2791  
2649 2792          /* Check that it is a real clone */
2650 2793          if (!dsl_dir_is_clone(hds->ds_dir))
2651 2794                  return (EINVAL);
2652 2795  
2653 2796          /* Since this is so expensive, don't do the preliminary check */
2654 2797          if (!dmu_tx_is_syncing(tx))
2655 2798                  return (0);
2656 2799  
2657 2800          if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)
2658 2801                  return (EXDEV);
2659 2802  
2660 2803          /* compute origin's new unique space */
2661 2804          snap = list_tail(&pa->clone_snaps);
2662 2805          ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
2663 2806          dsl_deadlist_space_range(&snap->ds->ds_deadlist,
2664 2807              origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
2665 2808              &pa->unique, &unused, &unused);
2666 2809  
2667 2810          /*
2668 2811           * Walk the snapshots that we are moving
2669 2812           *
2670 2813           * Compute space to transfer.  Consider the incremental changes
2671 2814           * to used for each snapshot:
2672 2815           * (my used) = (prev's used) + (blocks born) - (blocks killed)
2673 2816           * So each snapshot gave birth to:
2674 2817           * (blocks born) = (my used) - (prev's used) + (blocks killed)
2675 2818           * So a sequence would look like:
2676 2819           * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
2677 2820           * Which simplifies to:
2678 2821           * uN + kN + kN-1 + ... + k1 + k0
2679 2822           * Note however, if we stop before we reach the ORIGIN we get:
2680 2823           * uN + kN + kN-1 + ... + kM - uM-1
2681 2824           */
2682 2825          pa->used = origin_ds->ds_phys->ds_referenced_bytes;
2683 2826          pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
2684 2827          pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
2685 2828          for (snap = list_head(&pa->shared_snaps); snap;
2686 2829              snap = list_next(&pa->shared_snaps, snap)) {
2687 2830                  uint64_t val, dlused, dlcomp, dluncomp;
2688 2831                  dsl_dataset_t *ds = snap->ds;
2689 2832  
2690 2833                  /* Check that the snapshot name does not conflict */
2691 2834                  VERIFY(0 == dsl_dataset_get_snapname(ds));
2692 2835                  err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
2693 2836                  if (err == 0) {
2694 2837                          err = EEXIST;
2695 2838                          goto out;
2696 2839                  }
2697 2840                  if (err != ENOENT)
2698 2841                          goto out;
2699 2842  
2700 2843                  /* The very first snapshot does not have a deadlist */
2701 2844                  if (ds->ds_phys->ds_prev_snap_obj == 0)
2702 2845                          continue;
2703 2846  
2704 2847                  dsl_deadlist_space(&ds->ds_deadlist,
2705 2848                      &dlused, &dlcomp, &dluncomp);
2706 2849                  pa->used += dlused;
2707 2850                  pa->comp += dlcomp;
2708 2851                  pa->uncomp += dluncomp;
2709 2852          }
2710 2853

↓ open down ↓

635 lines elided

↑ open up ↑

2711 2854          /*
2712 2855           * If we are a clone of a clone then we never reached ORIGIN,
2713 2856           * so we need to subtract out the clone origin's used space.
2714 2857           */
2715 2858          if (pa->origin_origin) {
2716 2859                  pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
2717 2860                  pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
2718 2861                  pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
2719 2862          }
2720 2863  
2721      -        /* Check that there is enough space here */
     2864 +        /* Check that there is enough space and quota headroom here */
2722 2865          err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
2723      -            pa->used);
     2866 +            origin_ds->ds_dir, pa->used, tx);
2724 2867          if (err)
2725 2868                  return (err);
2726 2869  
2727 2870          /*
2728 2871           * Compute the amounts of space that will be used by snapshots
2729 2872           * after the promotion (for both origin and clone).  For each,
2730 2873           * it is the amount of space that will be on all of their
2731 2874           * deadlists (that was not born before their new origin).
2732 2875           */
2733 2876          if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {

2734 2877                  uint64_t space;
2735 2878  
2736 2879                  /*
2737 2880                   * Note, typically this will not be a clone of a clone,
2738 2881                   * so dd_origin_txg will be < TXG_INITIAL, so
2739 2882                   * these snaplist_space() -> dsl_deadlist_space_range()
2740 2883                   * calls will be fast because they do not have to
2741 2884                   * iterate over all bps.
2742 2885                   */
2743 2886                  snap = list_head(&pa->origin_snaps);
2744 2887                  err = snaplist_space(&pa->shared_snaps,
2745 2888                      snap->ds->ds_dir->dd_origin_txg, &pa->cloneusedsnap);
2746 2889                  if (err)
2747 2890                          return (err);
2748 2891  
2749 2892                  err = snaplist_space(&pa->clone_snaps,
2750 2893                      snap->ds->ds_dir->dd_origin_txg, &space);
2751 2894                  if (err)
2752 2895                          return (err);
2753 2896                  pa->cloneusedsnap += space;
2754 2897          }
2755 2898          if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
2756 2899                  err = snaplist_space(&pa->origin_snaps,
2757 2900                      origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap);
2758 2901                  if (err)
2759 2902                          return (err);
2760 2903          }
2761 2904  
2762 2905          return (0);
2763 2906  out:
2764 2907          pa->err_ds =  snap->ds->ds_snapname;
2765 2908          return (err);
2766 2909  }
2767 2910  
2768 2911  static void
2769 2912  dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
2770 2913  {
2771 2914          dsl_dataset_t *hds = arg1;
2772 2915          struct promotearg *pa = arg2;
2773 2916          struct promotenode *snap = list_head(&pa->shared_snaps);
2774 2917          dsl_dataset_t *origin_ds = snap->ds;
2775 2918          dsl_dataset_t *origin_head;
2776 2919          dsl_dir_t *dd = hds->ds_dir;
2777 2920          dsl_pool_t *dp = hds->ds_dir->dd_pool;
2778 2921          dsl_dir_t *odd = NULL;
2779 2922          uint64_t oldnext_obj;
2780 2923          int64_t delta;
2781 2924  
2782 2925          ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
2783 2926  
2784 2927          snap = list_head(&pa->origin_snaps);
2785 2928          origin_head = snap->ds;
2786 2929  
2787 2930          /*
2788 2931           * We need to explicitly open odd, since origin_ds's dd will be
2789 2932           * changing.
2790 2933           */
2791 2934          VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
2792 2935              NULL, FTAG, &odd));
2793 2936  
2794 2937          /* change origin's next snap */
2795 2938          dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
2796 2939          oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj;
2797 2940          snap = list_tail(&pa->clone_snaps);
2798 2941          ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
2799 2942          origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object;
2800 2943  
2801 2944          /* change the origin's next clone */
2802 2945          if (origin_ds->ds_phys->ds_next_clones_obj) {
2803 2946                  remove_from_next_clones(origin_ds, snap->ds->ds_object, tx);
2804 2947                  VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
2805 2948                      origin_ds->ds_phys->ds_next_clones_obj,
2806 2949                      oldnext_obj, tx));
2807 2950          }
2808 2951  
2809 2952          /* change origin */
2810 2953          dmu_buf_will_dirty(dd->dd_dbuf, tx);
2811 2954          ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
2812 2955          dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
2813 2956          dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg;
2814 2957          dmu_buf_will_dirty(odd->dd_dbuf, tx);
2815 2958          odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
2816 2959          origin_head->ds_dir->dd_origin_txg =
2817 2960              origin_ds->ds_phys->ds_creation_txg;
2818 2961  
2819 2962          /* change dd_clone entries */
2820 2963          if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
2821 2964                  VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
2822 2965                      odd->dd_phys->dd_clones, hds->ds_object, tx));
2823 2966                  VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
2824 2967                      pa->origin_origin->ds_dir->dd_phys->dd_clones,
2825 2968                      hds->ds_object, tx));
2826 2969  
2827 2970                  VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
2828 2971                      pa->origin_origin->ds_dir->dd_phys->dd_clones,
2829 2972                      origin_head->ds_object, tx));
2830 2973                  if (dd->dd_phys->dd_clones == 0) {
2831 2974                          dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset,
2832 2975                              DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
2833 2976                  }
2834 2977                  VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
2835 2978                      dd->dd_phys->dd_clones, origin_head->ds_object, tx));
2836 2979  
2837 2980          }
2838 2981  
2839 2982          /* move snapshots to this dir */
2840 2983          for (snap = list_head(&pa->shared_snaps); snap;
2841 2984              snap = list_next(&pa->shared_snaps, snap)) {
2842 2985                  dsl_dataset_t *ds = snap->ds;
2843 2986  
2844 2987                  /* unregister props as dsl_dir is changing */
2845 2988                  if (ds->ds_objset) {

↓ open down ↓

112 lines elided

↑ open up ↑

2846 2989                          dmu_objset_evict(ds->ds_objset);
2847 2990                          ds->ds_objset = NULL;
2848 2991                  }
2849 2992                  /* move snap name entry */
2850 2993                  VERIFY(0 == dsl_dataset_get_snapname(ds));
2851 2994                  VERIFY(0 == dsl_dataset_snap_remove(origin_head,
2852 2995                      ds->ds_snapname, tx));
2853 2996                  VERIFY(0 == zap_add(dp->dp_meta_objset,
2854 2997                      hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
2855 2998                      8, 1, &ds->ds_object, tx));
     2999 +                dsl_snapcount_adjust(hds->ds_dir, tx, 1, B_TRUE);
2856 3000  
2857 3001                  /* change containing dsl_dir */
2858 3002                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
2859 3003                  ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
2860 3004                  ds->ds_phys->ds_dir_obj = dd->dd_object;
2861 3005                  ASSERT3P(ds->ds_dir, ==, odd);
2862 3006                  dsl_dir_close(ds->ds_dir, ds);
2863 3007                  VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
2864 3008                      NULL, ds, &ds->ds_dir));
2865 3009

2866 3010                  /* move any clone references */
2867 3011                  if (ds->ds_phys->ds_next_clones_obj &&
2868 3012                      spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
2869 3013                          zap_cursor_t zc;
2870 3014                          zap_attribute_t za;
2871 3015  
2872 3016                          for (zap_cursor_init(&zc, dp->dp_meta_objset,
2873 3017                              ds->ds_phys->ds_next_clones_obj);
2874 3018                              zap_cursor_retrieve(&zc, &za) == 0;
2875 3019                              zap_cursor_advance(&zc)) {
2876 3020                                  dsl_dataset_t *cnds;
2877 3021                                  uint64_t o;
2878 3022  
2879 3023                                  if (za.za_first_integer == oldnext_obj) {
2880 3024                                          /*
2881 3025                                           * We've already moved the
2882 3026                                           * origin's reference.
2883 3027                                           */
2884 3028                                          continue;
2885 3029                                  }
2886 3030  
2887 3031                                  VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
2888 3032                                      za.za_first_integer, FTAG, &cnds));
2889 3033                                  o = cnds->ds_dir->dd_phys->dd_head_dataset_obj;
2890 3034  
2891 3035                                  VERIFY3U(zap_remove_int(dp->dp_meta_objset,
2892 3036                                      odd->dd_phys->dd_clones, o, tx), ==, 0);
2893 3037                                  VERIFY3U(zap_add_int(dp->dp_meta_objset,
2894 3038                                      dd->dd_phys->dd_clones, o, tx), ==, 0);
2895 3039                                  dsl_dataset_rele(cnds, FTAG);
2896 3040                          }
2897 3041                          zap_cursor_fini(&zc);
2898 3042                  }
2899 3043  
2900 3044                  ASSERT0(dsl_prop_numcb(ds));
2901 3045          }
2902 3046  
2903 3047          /*
2904 3048           * Change space accounting.
2905 3049           * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either
2906 3050           * both be valid, or both be 0 (resulting in delta == 0).  This
2907 3051           * is true for each of {clone,origin} independently.
2908 3052           */
2909 3053  
2910 3054          delta = pa->cloneusedsnap -
2911 3055              dd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
2912 3056          ASSERT3S(delta, >=, 0);
2913 3057          ASSERT3U(pa->used, >=, delta);
2914 3058          dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx);
2915 3059          dsl_dir_diduse_space(dd, DD_USED_HEAD,
2916 3060              pa->used - delta, pa->comp, pa->uncomp, tx);
2917 3061  
2918 3062          delta = pa->originusedsnap -
2919 3063              odd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
2920 3064          ASSERT3S(delta, <=, 0);
2921 3065          ASSERT3U(pa->used, >=, -delta);
2922 3066          dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx);
2923 3067          dsl_dir_diduse_space(odd, DD_USED_HEAD,
2924 3068              -pa->used - delta, -pa->comp, -pa->uncomp, tx);
2925 3069  
2926 3070          origin_ds->ds_phys->ds_unique_bytes = pa->unique;
2927 3071  
2928 3072          /* log history record */
2929 3073          spa_history_log_internal_ds(hds, "promote", tx, "");
2930 3074  
2931 3075          dsl_dir_close(odd, FTAG);
2932 3076  }
2933 3077  
2934 3078  static char *snaplist_tag = "snaplist";
2935 3079  /*
2936 3080   * Make a list of dsl_dataset_t's for the snapshots between first_obj
2937 3081   * (exclusive) and last_obj (inclusive).  The list will be in reverse
2938 3082   * order (last_obj will be the list_head()).  If first_obj == 0, do all
2939 3083   * snapshots back to this dataset's origin.
2940 3084   */
2941 3085  static int
2942 3086  snaplist_make(dsl_pool_t *dp, boolean_t own,
2943 3087      uint64_t first_obj, uint64_t last_obj, list_t *l)
2944 3088  {
2945 3089          uint64_t obj = last_obj;
2946 3090  
2947 3091          ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock));
2948 3092  
2949 3093          list_create(l, sizeof (struct promotenode),
2950 3094              offsetof(struct promotenode, link));
2951 3095  
2952 3096          while (obj != first_obj) {
2953 3097                  dsl_dataset_t *ds;
2954 3098                  struct promotenode *snap;
2955 3099                  int err;
2956 3100  
2957 3101                  if (own) {
2958 3102                          err = dsl_dataset_own_obj(dp, obj,
2959 3103                              0, snaplist_tag, &ds);
2960 3104                          if (err == 0)
2961 3105                                  dsl_dataset_make_exclusive(ds, snaplist_tag);
2962 3106                  } else {
2963 3107                          err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds);
2964 3108                  }
2965 3109                  if (err == ENOENT) {
2966 3110                          /* lost race with snapshot destroy */
2967 3111                          struct promotenode *last = list_tail(l);
2968 3112                          ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj);
2969 3113                          obj = last->ds->ds_phys->ds_prev_snap_obj;
2970 3114                          continue;
2971 3115                  } else if (err) {
2972 3116                          return (err);
2973 3117                  }
2974 3118  
2975 3119                  if (first_obj == 0)
2976 3120                          first_obj = ds->ds_dir->dd_phys->dd_origin_obj;
2977 3121  
2978 3122                  snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP);
2979 3123                  snap->ds = ds;
2980 3124                  list_insert_tail(l, snap);
2981 3125                  obj = ds->ds_phys->ds_prev_snap_obj;
2982 3126          }
2983 3127  
2984 3128          return (0);
2985 3129  }
2986 3130  
2987 3131  static int
2988 3132  snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep)
2989 3133  {
2990 3134          struct promotenode *snap;
2991 3135  
2992 3136          *spacep = 0;
2993 3137          for (snap = list_head(l); snap; snap = list_next(l, snap)) {
2994 3138                  uint64_t used, comp, uncomp;
2995 3139                  dsl_deadlist_space_range(&snap->ds->ds_deadlist,
2996 3140                      mintxg, UINT64_MAX, &used, &comp, &uncomp);
2997 3141                  *spacep += used;
2998 3142          }
2999 3143          return (0);
3000 3144  }
3001 3145  
3002 3146  static void
3003 3147  snaplist_destroy(list_t *l, boolean_t own)
3004 3148  {
3005 3149          struct promotenode *snap;
3006 3150  
3007 3151          if (!l || !list_link_active(&l->list_head))
3008 3152                  return;
3009 3153  
3010 3154          while ((snap = list_tail(l)) != NULL) {
3011 3155                  list_remove(l, snap);
3012 3156                  if (own)
3013 3157                          dsl_dataset_disown(snap->ds, snaplist_tag);
3014 3158                  else
3015 3159                          dsl_dataset_rele(snap->ds, snaplist_tag);
3016 3160                  kmem_free(snap, sizeof (struct promotenode));
3017 3161          }
3018 3162          list_destroy(l);
3019 3163  }
3020 3164  
3021 3165  /*
3022 3166   * Promote a clone.  Nomenclature note:
3023 3167   * "clone" or "cds": the original clone which is being promoted
3024 3168   * "origin" or "ods": the snapshot which is originally clone's origin
3025 3169   * "origin head" or "ohds": the dataset which is the head
3026 3170   * (filesystem/volume) for the origin
3027 3171   * "origin origin": the origin of the origin's filesystem (typically
3028 3172   * NULL, indicating that the clone is not a clone of a clone).
3029 3173   */
3030 3174  int
3031 3175  dsl_dataset_promote(const char *name, char *conflsnap)
3032 3176  {
3033 3177          dsl_dataset_t *ds;
3034 3178          dsl_dir_t *dd;
3035 3179          dsl_pool_t *dp;
3036 3180          dmu_object_info_t doi;
3037 3181          struct promotearg pa = { 0 };
3038 3182          struct promotenode *snap;
3039 3183          int err;
3040 3184  
3041 3185          err = dsl_dataset_hold(name, FTAG, &ds);
3042 3186          if (err)
3043 3187                  return (err);
3044 3188          dd = ds->ds_dir;
3045 3189          dp = dd->dd_pool;
3046 3190  
3047 3191          err = dmu_object_info(dp->dp_meta_objset,
3048 3192              ds->ds_phys->ds_snapnames_zapobj, &doi);
3049 3193          if (err) {
3050 3194                  dsl_dataset_rele(ds, FTAG);
3051 3195                  return (err);
3052 3196          }
3053 3197  
3054 3198          if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) {
3055 3199                  dsl_dataset_rele(ds, FTAG);
3056 3200                  return (EINVAL);
3057 3201          }
3058 3202  
3059 3203          /*
3060 3204           * We are going to inherit all the snapshots taken before our
3061 3205           * origin (i.e., our new origin will be our parent's origin).
3062 3206           * Take ownership of them so that we can rename them into our
3063 3207           * namespace.
3064 3208           */
3065 3209          rw_enter(&dp->dp_config_rwlock, RW_READER);
3066 3210  
3067 3211          err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj,
3068 3212              &pa.shared_snaps);
3069 3213          if (err != 0)
3070 3214                  goto out;
3071 3215  
3072 3216          err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps);
3073 3217          if (err != 0)
3074 3218                  goto out;
3075 3219  
3076 3220          snap = list_head(&pa.shared_snaps);
3077 3221          ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj);
3078 3222          err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj,
3079 3223              snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps);
3080 3224          if (err != 0)
3081 3225                  goto out;
3082 3226  
3083 3227          if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) {
3084 3228                  err = dsl_dataset_hold_obj(dp,
3085 3229                      snap->ds->ds_dir->dd_phys->dd_origin_obj,
3086 3230                      FTAG, &pa.origin_origin);
3087 3231                  if (err != 0)
3088 3232                          goto out;
3089 3233          }
3090 3234  
3091 3235  out:
3092 3236          rw_exit(&dp->dp_config_rwlock);
3093 3237  
3094 3238          /*
3095 3239           * Add in 128x the snapnames zapobj size, since we will be moving
3096 3240           * a bunch of snapnames to the promoted ds, and dirtying their
3097 3241           * bonus buffers.
3098 3242           */
3099 3243          if (err == 0) {
3100 3244                  err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
3101 3245                      dsl_dataset_promote_sync, ds, &pa,
3102 3246                      2 + 2 * doi.doi_physical_blocks_512);
3103 3247                  if (err && pa.err_ds && conflsnap)
3104 3248                          (void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN);
3105 3249          }
3106 3250  
3107 3251          snaplist_destroy(&pa.shared_snaps, B_TRUE);
3108 3252          snaplist_destroy(&pa.clone_snaps, B_FALSE);
3109 3253          snaplist_destroy(&pa.origin_snaps, B_FALSE);
3110 3254          if (pa.origin_origin)
3111 3255                  dsl_dataset_rele(pa.origin_origin, FTAG);
3112 3256          dsl_dataset_rele(ds, FTAG);
3113 3257          return (err);
3114 3258  }
3115 3259  
3116 3260  struct cloneswaparg {
3117 3261          dsl_dataset_t *cds; /* clone dataset */
3118 3262          dsl_dataset_t *ohds; /* origin's head dataset */
3119 3263          boolean_t force;
3120 3264          int64_t unused_refres_delta; /* change in unconsumed refreservation */
3121 3265  };
3122 3266  
3123 3267  /* ARGSUSED */
3124 3268  static int
3125 3269  dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx)
3126 3270  {
3127 3271          struct cloneswaparg *csa = arg1;
3128 3272  
3129 3273          /* they should both be heads */
3130 3274          if (dsl_dataset_is_snapshot(csa->cds) ||
3131 3275              dsl_dataset_is_snapshot(csa->ohds))
3132 3276                  return (EINVAL);
3133 3277  
3134 3278          /* the branch point should be just before them */
3135 3279          if (csa->cds->ds_prev != csa->ohds->ds_prev)
3136 3280                  return (EINVAL);
3137 3281  
3138 3282          /* cds should be the clone (unless they are unrelated) */
3139 3283          if (csa->cds->ds_prev != NULL &&
3140 3284              csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap &&
3141 3285              csa->ohds->ds_object !=
3142 3286              csa->cds->ds_prev->ds_phys->ds_next_snap_obj)
3143 3287                  return (EINVAL);
3144 3288  
3145 3289          /* the clone should be a child of the origin */
3146 3290          if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir)
3147 3291                  return (EINVAL);
3148 3292  
3149 3293          /* ohds shouldn't be modified unless 'force' */
3150 3294          if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds))
3151 3295                  return (ETXTBSY);
3152 3296  
3153 3297          /* adjust amount of any unconsumed refreservation */
3154 3298          csa->unused_refres_delta =
3155 3299              (int64_t)MIN(csa->ohds->ds_reserved,
3156 3300              csa->ohds->ds_phys->ds_unique_bytes) -
3157 3301              (int64_t)MIN(csa->ohds->ds_reserved,
3158 3302              csa->cds->ds_phys->ds_unique_bytes);
3159 3303  
3160 3304          if (csa->unused_refres_delta > 0 &&
3161 3305              csa->unused_refres_delta >
3162 3306              dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE))
3163 3307                  return (ENOSPC);
3164 3308  
3165 3309          if (csa->ohds->ds_quota != 0 &&
3166 3310              csa->cds->ds_phys->ds_unique_bytes > csa->ohds->ds_quota)
3167 3311                  return (EDQUOT);
3168 3312  
3169 3313          return (0);
3170 3314  }
3171 3315  
3172 3316  /* ARGSUSED */
3173 3317  static void
3174 3318  dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
3175 3319  {
3176 3320          struct cloneswaparg *csa = arg1;
3177 3321          dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
3178 3322  
3179 3323          ASSERT(csa->cds->ds_reserved == 0);
3180 3324          ASSERT(csa->ohds->ds_quota == 0 ||
3181 3325              csa->cds->ds_phys->ds_unique_bytes <= csa->ohds->ds_quota);
3182 3326  
3183 3327          dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
3184 3328          dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
3185 3329  
3186 3330          if (csa->cds->ds_objset != NULL) {
3187 3331                  dmu_objset_evict(csa->cds->ds_objset);
3188 3332                  csa->cds->ds_objset = NULL;
3189 3333          }
3190 3334  
3191 3335          if (csa->ohds->ds_objset != NULL) {
3192 3336                  dmu_objset_evict(csa->ohds->ds_objset);
3193 3337                  csa->ohds->ds_objset = NULL;
3194 3338          }
3195 3339  
3196 3340          /*
3197 3341           * Reset origin's unique bytes, if it exists.
3198 3342           */
3199 3343          if (csa->cds->ds_prev) {
3200 3344                  dsl_dataset_t *origin = csa->cds->ds_prev;
3201 3345                  uint64_t comp, uncomp;
3202 3346  
3203 3347                  dmu_buf_will_dirty(origin->ds_dbuf, tx);
3204 3348                  dsl_deadlist_space_range(&csa->cds->ds_deadlist,
3205 3349                      origin->ds_phys->ds_prev_snap_txg, UINT64_MAX,
3206 3350                      &origin->ds_phys->ds_unique_bytes, &comp, &uncomp);
3207 3351          }
3208 3352  
3209 3353          /* swap blkptrs */
3210 3354          {
3211 3355                  blkptr_t tmp;
3212 3356                  tmp = csa->ohds->ds_phys->ds_bp;
3213 3357                  csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
3214 3358                  csa->cds->ds_phys->ds_bp = tmp;
3215 3359          }
3216 3360  
3217 3361          /* set dd_*_bytes */
3218 3362          {
3219 3363                  int64_t dused, dcomp, duncomp;
3220 3364                  uint64_t cdl_used, cdl_comp, cdl_uncomp;
3221 3365                  uint64_t odl_used, odl_comp, odl_uncomp;
3222 3366  
3223 3367                  ASSERT3U(csa->cds->ds_dir->dd_phys->
3224 3368                      dd_used_breakdown[DD_USED_SNAP], ==, 0);
3225 3369  
3226 3370                  dsl_deadlist_space(&csa->cds->ds_deadlist,
3227 3371                      &cdl_used, &cdl_comp, &cdl_uncomp);
3228 3372                  dsl_deadlist_space(&csa->ohds->ds_deadlist,
3229 3373                      &odl_used, &odl_comp, &odl_uncomp);
3230 3374  
3231 3375                  dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used -
3232 3376                      (csa->ohds->ds_phys->ds_referenced_bytes + odl_used);
3233 3377                  dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
3234 3378                      (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
3235 3379                  duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
3236 3380                      cdl_uncomp -
3237 3381                      (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
3238 3382  
3239 3383                  dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD,
3240 3384                      dused, dcomp, duncomp, tx);
3241 3385                  dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD,
3242 3386                      -dused, -dcomp, -duncomp, tx);
3243 3387  
3244 3388                  /*
3245 3389                   * The difference in the space used by snapshots is the
3246 3390                   * difference in snapshot space due to the head's
3247 3391                   * deadlist (since that's the only thing that's
3248 3392                   * changing that affects the snapused).
3249 3393                   */
3250 3394                  dsl_deadlist_space_range(&csa->cds->ds_deadlist,
3251 3395                      csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
3252 3396                      &cdl_used, &cdl_comp, &cdl_uncomp);
3253 3397                  dsl_deadlist_space_range(&csa->ohds->ds_deadlist,
3254 3398                      csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
3255 3399                      &odl_used, &odl_comp, &odl_uncomp);
3256 3400                  dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used,
3257 3401                      DD_USED_HEAD, DD_USED_SNAP, tx);
3258 3402          }
3259 3403  
3260 3404          /* swap ds_*_bytes */
3261 3405          SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes,
3262 3406              csa->cds->ds_phys->ds_referenced_bytes);
3263 3407          SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
3264 3408              csa->cds->ds_phys->ds_compressed_bytes);
3265 3409          SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
3266 3410              csa->cds->ds_phys->ds_uncompressed_bytes);
3267 3411          SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
3268 3412              csa->cds->ds_phys->ds_unique_bytes);
3269 3413  
3270 3414          /* apply any parent delta for change in unconsumed refreservation */
3271 3415          dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV,
3272 3416              csa->unused_refres_delta, 0, 0, tx);
3273 3417  
3274 3418          /*
3275 3419           * Swap deadlists.
3276 3420           */
3277 3421          dsl_deadlist_close(&csa->cds->ds_deadlist);
3278 3422          dsl_deadlist_close(&csa->ohds->ds_deadlist);
3279 3423          SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
3280 3424              csa->cds->ds_phys->ds_deadlist_obj);
3281 3425          dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
3282 3426              csa->cds->ds_phys->ds_deadlist_obj);
3283 3427          dsl_deadlist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
3284 3428              csa->ohds->ds_phys->ds_deadlist_obj);
3285 3429  
3286 3430          dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx);
3287 3431  
3288 3432          spa_history_log_internal_ds(csa->cds, "clone swap", tx,
3289 3433              "parent=%s", csa->ohds->ds_dir->dd_myname);
3290 3434  }
3291 3435  
3292 3436  /*
3293 3437   * Swap 'clone' with its origin head datasets.  Used at the end of "zfs
3294 3438   * recv" into an existing fs to swizzle the file system to the new
3295 3439   * version, and by "zfs rollback".  Can also be used to swap two
3296 3440   * independent head datasets if neither has any snapshots.
3297 3441   */
3298 3442  int
3299 3443  dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
3300 3444      boolean_t force)
3301 3445  {
3302 3446          struct cloneswaparg csa;
3303 3447          int error;
3304 3448  
3305 3449          ASSERT(clone->ds_owner);
3306 3450          ASSERT(origin_head->ds_owner);
3307 3451  retry:
3308 3452          /*
3309 3453           * Need exclusive access for the swap. If we're swapping these
3310 3454           * datasets back after an error, we already hold the locks.
3311 3455           */
3312 3456          if (!RW_WRITE_HELD(&clone->ds_rwlock))
3313 3457                  rw_enter(&clone->ds_rwlock, RW_WRITER);
3314 3458          if (!RW_WRITE_HELD(&origin_head->ds_rwlock) &&
3315 3459              !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) {
3316 3460                  rw_exit(&clone->ds_rwlock);
3317 3461                  rw_enter(&origin_head->ds_rwlock, RW_WRITER);
3318 3462                  if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) {
3319 3463                          rw_exit(&origin_head->ds_rwlock);
3320 3464                          goto retry;
3321 3465                  }
3322 3466          }
3323 3467          csa.cds = clone;
3324 3468          csa.ohds = origin_head;
3325 3469          csa.force = force;
3326 3470          error = dsl_sync_task_do(clone->ds_dir->dd_pool,
3327 3471              dsl_dataset_clone_swap_check,
3328 3472              dsl_dataset_clone_swap_sync, &csa, NULL, 9);
3329 3473          return (error);
3330 3474  }
3331 3475  
3332 3476  /*
3333 3477   * Given a pool name and a dataset object number in that pool,
3334 3478   * return the name of that dataset.
3335 3479   */
3336 3480  int
3337 3481  dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
3338 3482  {
3339 3483          spa_t *spa;
3340 3484          dsl_pool_t *dp;
3341 3485          dsl_dataset_t *ds;
3342 3486          int error;
3343 3487  
3344 3488          if ((error = spa_open(pname, &spa, FTAG)) != 0)
3345 3489                  return (error);
3346 3490          dp = spa_get_dsl(spa);
3347 3491          rw_enter(&dp->dp_config_rwlock, RW_READER);
3348 3492          if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) {
3349 3493                  dsl_dataset_name(ds, buf);
3350 3494                  dsl_dataset_rele(ds, FTAG);
3351 3495          }
3352 3496          rw_exit(&dp->dp_config_rwlock);
3353 3497          spa_close(spa, FTAG);
3354 3498  
3355 3499          return (error);
3356 3500  }
3357 3501  
3358 3502  int
3359 3503  dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
3360 3504      uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv)
3361 3505  {
3362 3506          int error = 0;
3363 3507  
3364 3508          ASSERT3S(asize, >, 0);
3365 3509  
3366 3510          /*
3367 3511           * *ref_rsrv is the portion of asize that will come from any
3368 3512           * unconsumed refreservation space.
3369 3513           */
3370 3514          *ref_rsrv = 0;
3371 3515  
3372 3516          mutex_enter(&ds->ds_lock);
3373 3517          /*
3374 3518           * Make a space adjustment for reserved bytes.
3375 3519           */
3376 3520          if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) {
3377 3521                  ASSERT3U(*used, >=,
3378 3522                      ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
3379 3523                  *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
3380 3524                  *ref_rsrv =
3381 3525                      asize - MIN(asize, parent_delta(ds, asize + inflight));
3382 3526          }
3383 3527  
3384 3528          if (!check_quota || ds->ds_quota == 0) {
3385 3529                  mutex_exit(&ds->ds_lock);
3386 3530                  return (0);
3387 3531          }
3388 3532          /*
3389 3533           * If they are requesting more space, and our current estimate
3390 3534           * is over quota, they get to try again unless the actual
3391 3535           * on-disk is over quota and there are no pending changes (which
3392 3536           * may free up space for us).
3393 3537           */
3394 3538          if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) {
3395 3539                  if (inflight > 0 ||
3396 3540                      ds->ds_phys->ds_referenced_bytes < ds->ds_quota)
3397 3541                          error = ERESTART;
3398 3542                  else
3399 3543                          error = EDQUOT;
3400 3544          }
3401 3545          mutex_exit(&ds->ds_lock);
3402 3546  
3403 3547          return (error);
3404 3548  }
3405 3549  
3406 3550  /* ARGSUSED */
3407 3551  static int
3408 3552  dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
3409 3553  {
3410 3554          dsl_dataset_t *ds = arg1;
3411 3555          dsl_prop_setarg_t *psa = arg2;
3412 3556          int err;
3413 3557  
3414 3558          if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
3415 3559                  return (ENOTSUP);
3416 3560  
3417 3561          if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
3418 3562                  return (err);
3419 3563  
3420 3564          if (psa->psa_effective_value == 0)
3421 3565                  return (0);
3422 3566  
3423 3567          if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes ||
3424 3568              psa->psa_effective_value < ds->ds_reserved)
3425 3569                  return (ENOSPC);
3426 3570  
3427 3571          return (0);
3428 3572  }
3429 3573  
3430 3574  extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *);
3431 3575  
3432 3576  void
3433 3577  dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
3434 3578  {
3435 3579          dsl_dataset_t *ds = arg1;
3436 3580          dsl_prop_setarg_t *psa = arg2;
3437 3581          uint64_t effective_value = psa->psa_effective_value;
3438 3582  
3439 3583          dsl_prop_set_sync(ds, psa, tx);
3440 3584          DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
3441 3585  
3442 3586          if (ds->ds_quota != effective_value) {
3443 3587                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
3444 3588                  ds->ds_quota = effective_value;
3445 3589          }
3446 3590  }
3447 3591  
3448 3592  int
3449 3593  dsl_dataset_set_quota(const char *dsname, zprop_source_t source, uint64_t quota)
3450 3594  {
3451 3595          dsl_dataset_t *ds;
3452 3596          dsl_prop_setarg_t psa;
3453 3597          int err;
3454 3598  
3455 3599          dsl_prop_setarg_init_uint64(&psa, "refquota", source, &quota);
3456 3600  
3457 3601          err = dsl_dataset_hold(dsname, FTAG, &ds);
3458 3602          if (err)
3459 3603                  return (err);
3460 3604  
3461 3605          /*
3462 3606           * If someone removes a file, then tries to set the quota, we
3463 3607           * want to make sure the file freeing takes effect.
3464 3608           */
3465 3609          txg_wait_open(ds->ds_dir->dd_pool, 0);
3466 3610  
3467 3611          err = dsl_sync_task_do(ds->ds_dir->dd_pool,
3468 3612              dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync,
3469 3613              ds, &psa, 0);
3470 3614  
3471 3615          dsl_dataset_rele(ds, FTAG);
3472 3616          return (err);
3473 3617  }
3474 3618  
3475 3619  static int
3476 3620  dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
3477 3621  {
3478 3622          dsl_dataset_t *ds = arg1;
3479 3623          dsl_prop_setarg_t *psa = arg2;
3480 3624          uint64_t effective_value;
3481 3625          uint64_t unique;
3482 3626          int err;
3483 3627  
3484 3628          if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
3485 3629              SPA_VERSION_REFRESERVATION)
3486 3630                  return (ENOTSUP);
3487 3631  
3488 3632          if (dsl_dataset_is_snapshot(ds))
3489 3633                  return (EINVAL);
3490 3634  
3491 3635          if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
3492 3636                  return (err);
3493 3637  
3494 3638          effective_value = psa->psa_effective_value;
3495 3639  
3496 3640          /*
3497 3641           * If we are doing the preliminary check in open context, the
3498 3642           * space estimates may be inaccurate.
3499 3643           */
3500 3644          if (!dmu_tx_is_syncing(tx))
3501 3645                  return (0);
3502 3646  
3503 3647          mutex_enter(&ds->ds_lock);
3504 3648          if (!DS_UNIQUE_IS_ACCURATE(ds))
3505 3649                  dsl_dataset_recalc_head_uniq(ds);
3506 3650          unique = ds->ds_phys->ds_unique_bytes;
3507 3651          mutex_exit(&ds->ds_lock);
3508 3652  
3509 3653          if (MAX(unique, effective_value) > MAX(unique, ds->ds_reserved)) {
3510 3654                  uint64_t delta = MAX(unique, effective_value) -
3511 3655                      MAX(unique, ds->ds_reserved);
3512 3656  
3513 3657                  if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
3514 3658                          return (ENOSPC);
3515 3659                  if (ds->ds_quota > 0 &&
3516 3660                      effective_value > ds->ds_quota)
3517 3661                          return (ENOSPC);
3518 3662          }
3519 3663  
3520 3664          return (0);
3521 3665  }
3522 3666  
3523 3667  static void
3524 3668  dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
3525 3669  {
3526 3670          dsl_dataset_t *ds = arg1;
3527 3671          dsl_prop_setarg_t *psa = arg2;
3528 3672          uint64_t effective_value = psa->psa_effective_value;
3529 3673          uint64_t unique;
3530 3674          int64_t delta;
3531 3675  
3532 3676          dsl_prop_set_sync(ds, psa, tx);
3533 3677          DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
3534 3678  
3535 3679          dmu_buf_will_dirty(ds->ds_dbuf, tx);
3536 3680  
3537 3681          mutex_enter(&ds->ds_dir->dd_lock);
3538 3682          mutex_enter(&ds->ds_lock);
3539 3683          ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
3540 3684          unique = ds->ds_phys->ds_unique_bytes;
3541 3685          delta = MAX(0, (int64_t)(effective_value - unique)) -
3542 3686              MAX(0, (int64_t)(ds->ds_reserved - unique));
3543 3687          ds->ds_reserved = effective_value;
3544 3688          mutex_exit(&ds->ds_lock);
3545 3689  
3546 3690          dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
3547 3691          mutex_exit(&ds->ds_dir->dd_lock);
3548 3692  }
3549 3693  
3550 3694  int
3551 3695  dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
3552 3696      uint64_t reservation)
3553 3697  {
3554 3698          dsl_dataset_t *ds;
3555 3699          dsl_prop_setarg_t psa;
3556 3700          int err;
3557 3701  
3558 3702          dsl_prop_setarg_init_uint64(&psa, "refreservation", source,
3559 3703              &reservation);
3560 3704  
3561 3705          err = dsl_dataset_hold(dsname, FTAG, &ds);
3562 3706          if (err)
3563 3707                  return (err);
3564 3708  
3565 3709          err = dsl_sync_task_do(ds->ds_dir->dd_pool,
3566 3710              dsl_dataset_set_reservation_check,
3567 3711              dsl_dataset_set_reservation_sync, ds, &psa, 0);
3568 3712  
3569 3713          dsl_dataset_rele(ds, FTAG);
3570 3714          return (err);
3571 3715  }
3572 3716  
3573 3717  typedef struct zfs_hold_cleanup_arg {
3574 3718          dsl_pool_t *dp;
3575 3719          uint64_t dsobj;
3576 3720          char htag[MAXNAMELEN];
3577 3721  } zfs_hold_cleanup_arg_t;
3578 3722  
3579 3723  static void
3580 3724  dsl_dataset_user_release_onexit(void *arg)
3581 3725  {
3582 3726          zfs_hold_cleanup_arg_t *ca = arg;
3583 3727  
3584 3728          (void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag,
3585 3729              B_TRUE);
3586 3730          kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
3587 3731  }
3588 3732  
3589 3733  void
3590 3734  dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
3591 3735      minor_t minor)
3592 3736  {
3593 3737          zfs_hold_cleanup_arg_t *ca;
3594 3738  
3595 3739          ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP);
3596 3740          ca->dp = ds->ds_dir->dd_pool;
3597 3741          ca->dsobj = ds->ds_object;
3598 3742          (void) strlcpy(ca->htag, htag, sizeof (ca->htag));
3599 3743          VERIFY3U(0, ==, zfs_onexit_add_cb(minor,
3600 3744              dsl_dataset_user_release_onexit, ca, NULL));
3601 3745  }
3602 3746  
3603 3747  /*
3604 3748   * If you add new checks here, you may need to add
3605 3749   * additional checks to the "temporary" case in
3606 3750   * snapshot_check() in dmu_objset.c.
3607 3751   */
3608 3752  static int
3609 3753  dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx)
3610 3754  {
3611 3755          dsl_dataset_t *ds = arg1;
3612 3756          struct dsl_ds_holdarg *ha = arg2;
3613 3757          const char *htag = ha->htag;
3614 3758          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
3615 3759          int error = 0;
3616 3760  
3617 3761          if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
3618 3762                  return (ENOTSUP);
3619 3763  
3620 3764          if (!dsl_dataset_is_snapshot(ds))
3621 3765                  return (EINVAL);
3622 3766  
3623 3767          /* tags must be unique */
3624 3768          mutex_enter(&ds->ds_lock);
3625 3769          if (ds->ds_phys->ds_userrefs_obj) {
3626 3770                  error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag,
3627 3771                      8, 1, tx);
3628 3772                  if (error == 0)
3629 3773                          error = EEXIST;
3630 3774                  else if (error == ENOENT)
3631 3775                          error = 0;
3632 3776          }
3633 3777          mutex_exit(&ds->ds_lock);
3634 3778  
3635 3779          if (error == 0 && ha->temphold &&
3636 3780              strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
3637 3781                  error = E2BIG;
3638 3782  
3639 3783          return (error);
3640 3784  }
3641 3785  
3642 3786  void
3643 3787  dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx)
3644 3788  {
3645 3789          dsl_dataset_t *ds = arg1;
3646 3790          struct dsl_ds_holdarg *ha = arg2;
3647 3791          const char *htag = ha->htag;
3648 3792          dsl_pool_t *dp = ds->ds_dir->dd_pool;
3649 3793          objset_t *mos = dp->dp_meta_objset;
3650 3794          uint64_t now = gethrestime_sec();
3651 3795          uint64_t zapobj;
3652 3796  
3653 3797          mutex_enter(&ds->ds_lock);
3654 3798          if (ds->ds_phys->ds_userrefs_obj == 0) {
3655 3799                  /*
3656 3800                   * This is the first user hold for this dataset.  Create
3657 3801                   * the userrefs zap object.
3658 3802                   */
3659 3803                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
3660 3804                  zapobj = ds->ds_phys->ds_userrefs_obj =
3661 3805                      zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
3662 3806          } else {
3663 3807                  zapobj = ds->ds_phys->ds_userrefs_obj;
3664 3808          }
3665 3809          ds->ds_userrefs++;
3666 3810          mutex_exit(&ds->ds_lock);
3667 3811  
3668 3812          VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx));
3669 3813  
3670 3814          if (ha->temphold) {
3671 3815                  VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object,
3672 3816                      htag, &now, tx));
3673 3817          }
3674 3818  
3675 3819          spa_history_log_internal_ds(ds, "hold", tx,
3676 3820              "tag = %s temp = %d holds now = %llu",
3677 3821              htag, (int)ha->temphold, ds->ds_userrefs);
3678 3822  }
3679 3823  
3680 3824  static int
3681 3825  dsl_dataset_user_hold_one(const char *dsname, void *arg)
3682 3826  {
3683 3827          struct dsl_ds_holdarg *ha = arg;
3684 3828          dsl_dataset_t *ds;
3685 3829          int error;
3686 3830          char *name;
3687 3831  
3688 3832          /* alloc a buffer to hold dsname@snapname plus terminating NULL */
3689 3833          name = kmem_asprintf("%s@%s", dsname, ha->snapname);
3690 3834          error = dsl_dataset_hold(name, ha->dstg, &ds);
3691 3835          strfree(name);
3692 3836          if (error == 0) {
3693 3837                  ha->gotone = B_TRUE;
3694 3838                  dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check,
3695 3839                      dsl_dataset_user_hold_sync, ds, ha, 0);
3696 3840          } else if (error == ENOENT && ha->recursive) {
3697 3841                  error = 0;
3698 3842          } else {
3699 3843                  (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
3700 3844          }
3701 3845          return (error);
3702 3846  }
3703 3847  
3704 3848  int
3705 3849  dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
3706 3850      boolean_t temphold)
3707 3851  {
3708 3852          struct dsl_ds_holdarg *ha;
3709 3853          int error;
3710 3854  
3711 3855          ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
3712 3856          ha->htag = htag;
3713 3857          ha->temphold = temphold;
3714 3858          error = dsl_sync_task_do(ds->ds_dir->dd_pool,
3715 3859              dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync,
3716 3860              ds, ha, 0);
3717 3861          kmem_free(ha, sizeof (struct dsl_ds_holdarg));
3718 3862  
3719 3863          return (error);
3720 3864  }
3721 3865  
3722 3866  int
3723 3867  dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
3724 3868      boolean_t recursive, boolean_t temphold, int cleanup_fd)
3725 3869  {
3726 3870          struct dsl_ds_holdarg *ha;
3727 3871          dsl_sync_task_t *dst;
3728 3872          spa_t *spa;
3729 3873          int error;
3730 3874          minor_t minor = 0;
3731 3875  
3732 3876          if (cleanup_fd != -1) {
3733 3877                  /* Currently we only support cleanup-on-exit of tempholds. */
3734 3878                  if (!temphold)
3735 3879                          return (EINVAL);
3736 3880                  error = zfs_onexit_fd_hold(cleanup_fd, &minor);
3737 3881                  if (error)
3738 3882                          return (error);
3739 3883          }
3740 3884  
3741 3885          ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
3742 3886  
3743 3887          (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
3744 3888  
3745 3889          error = spa_open(dsname, &spa, FTAG);
3746 3890          if (error) {
3747 3891                  kmem_free(ha, sizeof (struct dsl_ds_holdarg));
3748 3892                  if (cleanup_fd != -1)
3749 3893                          zfs_onexit_fd_rele(cleanup_fd);
3750 3894                  return (error);
3751 3895          }
3752 3896  
3753 3897          ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
3754 3898          ha->htag = htag;
3755 3899          ha->snapname = snapname;
3756 3900          ha->recursive = recursive;
3757 3901          ha->temphold = temphold;
3758 3902  
3759 3903          if (recursive) {
3760 3904                  error = dmu_objset_find(dsname, dsl_dataset_user_hold_one,
3761 3905                      ha, DS_FIND_CHILDREN);
3762 3906          } else {
3763 3907                  error = dsl_dataset_user_hold_one(dsname, ha);
3764 3908          }
3765 3909          if (error == 0)
3766 3910                  error = dsl_sync_task_group_wait(ha->dstg);
3767 3911  
3768 3912          for (dst = list_head(&ha->dstg->dstg_tasks); dst;
3769 3913              dst = list_next(&ha->dstg->dstg_tasks, dst)) {
3770 3914                  dsl_dataset_t *ds = dst->dst_arg1;
3771 3915  
3772 3916                  if (dst->dst_err) {
3773 3917                          dsl_dataset_name(ds, ha->failed);
3774 3918                          *strchr(ha->failed, '@') = '\0';
3775 3919                  } else if (error == 0 && minor != 0 && temphold) {
3776 3920                          /*
3777 3921                           * If this hold is to be released upon process exit,
3778 3922                           * register that action now.
3779 3923                           */
3780 3924                          dsl_register_onexit_hold_cleanup(ds, htag, minor);
3781 3925                  }
3782 3926                  dsl_dataset_rele(ds, ha->dstg);
3783 3927          }
3784 3928  
3785 3929          if (error == 0 && recursive && !ha->gotone)
3786 3930                  error = ENOENT;
3787 3931  
3788 3932          if (error)
3789 3933                  (void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
3790 3934  
3791 3935          dsl_sync_task_group_destroy(ha->dstg);
3792 3936  
3793 3937          kmem_free(ha, sizeof (struct dsl_ds_holdarg));
3794 3938          spa_close(spa, FTAG);
3795 3939          if (cleanup_fd != -1)
3796 3940                  zfs_onexit_fd_rele(cleanup_fd);
3797 3941          return (error);
3798 3942  }
3799 3943  
3800 3944  struct dsl_ds_releasearg {
3801 3945          dsl_dataset_t *ds;
3802 3946          const char *htag;
3803 3947          boolean_t own;          /* do we own or just hold ds? */
3804 3948  };
3805 3949  
3806 3950  static int
3807 3951  dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag,
3808 3952      boolean_t *might_destroy)
3809 3953  {
3810 3954          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
3811 3955          uint64_t zapobj;
3812 3956          uint64_t tmp;
3813 3957          int error;
3814 3958  
3815 3959          *might_destroy = B_FALSE;
3816 3960  
3817 3961          mutex_enter(&ds->ds_lock);
3818 3962          zapobj = ds->ds_phys->ds_userrefs_obj;
3819 3963          if (zapobj == 0) {
3820 3964                  /* The tag can't possibly exist */
3821 3965                  mutex_exit(&ds->ds_lock);
3822 3966                  return (ESRCH);
3823 3967          }
3824 3968  
3825 3969          /* Make sure the tag exists */
3826 3970          error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp);
3827 3971          if (error) {
3828 3972                  mutex_exit(&ds->ds_lock);
3829 3973                  if (error == ENOENT)
3830 3974                          error = ESRCH;
3831 3975                  return (error);
3832 3976          }
3833 3977  
3834 3978          if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 &&
3835 3979              DS_IS_DEFER_DESTROY(ds))
3836 3980                  *might_destroy = B_TRUE;
3837 3981  
3838 3982          mutex_exit(&ds->ds_lock);
3839 3983          return (0);
3840 3984  }
3841 3985  
3842 3986  static int
3843 3987  dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx)
3844 3988  {
3845 3989          struct dsl_ds_releasearg *ra = arg1;
3846 3990          dsl_dataset_t *ds = ra->ds;
3847 3991          boolean_t might_destroy;
3848 3992          int error;
3849 3993  
3850 3994          if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
3851 3995                  return (ENOTSUP);
3852 3996  
3853 3997          error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy);
3854 3998          if (error)
3855 3999                  return (error);
3856 4000  
3857 4001          if (might_destroy) {
3858 4002                  struct dsl_ds_destroyarg dsda = {0};
3859 4003  
3860 4004                  if (dmu_tx_is_syncing(tx)) {
3861 4005                          /*
3862 4006                           * If we're not prepared to remove the snapshot,
3863 4007                           * we can't allow the release to happen right now.
3864 4008                           */
3865 4009                          if (!ra->own)
3866 4010                                  return (EBUSY);
3867 4011                  }
3868 4012                  dsda.ds = ds;
3869 4013                  dsda.releasing = B_TRUE;
3870 4014                  return (dsl_dataset_destroy_check(&dsda, tag, tx));
3871 4015          }
3872 4016  
3873 4017          return (0);
3874 4018  }
3875 4019  
3876 4020  static void
3877 4021  dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx)
3878 4022  {
3879 4023          struct dsl_ds_releasearg *ra = arg1;
3880 4024          dsl_dataset_t *ds = ra->ds;
3881 4025          dsl_pool_t *dp = ds->ds_dir->dd_pool;
3882 4026          objset_t *mos = dp->dp_meta_objset;
3883 4027          uint64_t zapobj;
3884 4028          uint64_t refs;
3885 4029          int error;
3886 4030  
3887 4031          mutex_enter(&ds->ds_lock);
3888 4032          ds->ds_userrefs--;
3889 4033          refs = ds->ds_userrefs;
3890 4034          mutex_exit(&ds->ds_lock);
3891 4035          error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx);
3892 4036          VERIFY(error == 0 || error == ENOENT);
3893 4037          zapobj = ds->ds_phys->ds_userrefs_obj;
3894 4038          VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx));
3895 4039  
3896 4040          spa_history_log_internal_ds(ds, "release", tx,
3897 4041              "tag = %s refs now = %lld", ra->htag, (longlong_t)refs);
3898 4042  
3899 4043          if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 &&
3900 4044              DS_IS_DEFER_DESTROY(ds)) {
3901 4045                  struct dsl_ds_destroyarg dsda = {0};
3902 4046  
3903 4047                  ASSERT(ra->own);
3904 4048                  dsda.ds = ds;
3905 4049                  dsda.releasing = B_TRUE;
3906 4050                  /* We already did the destroy_check */
3907 4051                  dsl_dataset_destroy_sync(&dsda, tag, tx);
3908 4052          }
3909 4053  }
3910 4054  
3911 4055  static int
3912 4056  dsl_dataset_user_release_one(const char *dsname, void *arg)
3913 4057  {
3914 4058          struct dsl_ds_holdarg *ha = arg;
3915 4059          struct dsl_ds_releasearg *ra;
3916 4060          dsl_dataset_t *ds;
3917 4061          int error;
3918 4062          void *dtag = ha->dstg;
3919 4063          char *name;
3920 4064          boolean_t own = B_FALSE;
3921 4065          boolean_t might_destroy;
3922 4066  
3923 4067          /* alloc a buffer to hold dsname@snapname, plus the terminating NULL */
3924 4068          name = kmem_asprintf("%s@%s", dsname, ha->snapname);
3925 4069          error = dsl_dataset_hold(name, dtag, &ds);
3926 4070          strfree(name);
3927 4071          if (error == ENOENT && ha->recursive)
3928 4072                  return (0);
3929 4073          (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
3930 4074          if (error)
3931 4075                  return (error);
3932 4076  
3933 4077          ha->gotone = B_TRUE;
3934 4078  
3935 4079          ASSERT(dsl_dataset_is_snapshot(ds));
3936 4080  
3937 4081          error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy);
3938 4082          if (error) {
3939 4083                  dsl_dataset_rele(ds, dtag);
3940 4084                  return (error);
3941 4085          }
3942 4086  
3943 4087          if (might_destroy) {
3944 4088  #ifdef _KERNEL
3945 4089                  name = kmem_asprintf("%s@%s", dsname, ha->snapname);
3946 4090                  error = zfs_unmount_snap(name, NULL);
3947 4091                  strfree(name);
3948 4092                  if (error) {
3949 4093                          dsl_dataset_rele(ds, dtag);
3950 4094                          return (error);
3951 4095                  }
3952 4096  #endif
3953 4097                  if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) {
3954 4098                          dsl_dataset_rele(ds, dtag);
3955 4099                          return (EBUSY);
3956 4100                  } else {
3957 4101                          own = B_TRUE;
3958 4102                          dsl_dataset_make_exclusive(ds, dtag);
3959 4103                  }
3960 4104          }
3961 4105  
3962 4106          ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP);
3963 4107          ra->ds = ds;
3964 4108          ra->htag = ha->htag;
3965 4109          ra->own = own;
3966 4110          dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check,
3967 4111              dsl_dataset_user_release_sync, ra, dtag, 0);
3968 4112  
3969 4113          return (0);
3970 4114  }
3971 4115  
3972 4116  int
3973 4117  dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
3974 4118      boolean_t recursive)
3975 4119  {
3976 4120          struct dsl_ds_holdarg *ha;
3977 4121          dsl_sync_task_t *dst;
3978 4122          spa_t *spa;
3979 4123          int error;
3980 4124  
3981 4125  top:
3982 4126          ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
3983 4127  
3984 4128          (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
3985 4129  
3986 4130          error = spa_open(dsname, &spa, FTAG);
3987 4131          if (error) {
3988 4132                  kmem_free(ha, sizeof (struct dsl_ds_holdarg));
3989 4133                  return (error);
3990 4134          }
3991 4135  
3992 4136          ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
3993 4137          ha->htag = htag;
3994 4138          ha->snapname = snapname;
3995 4139          ha->recursive = recursive;
3996 4140          if (recursive) {
3997 4141                  error = dmu_objset_find(dsname, dsl_dataset_user_release_one,
3998 4142                      ha, DS_FIND_CHILDREN);
3999 4143          } else {
4000 4144                  error = dsl_dataset_user_release_one(dsname, ha);
4001 4145          }
4002 4146          if (error == 0)
4003 4147                  error = dsl_sync_task_group_wait(ha->dstg);
4004 4148  
4005 4149          for (dst = list_head(&ha->dstg->dstg_tasks); dst;
4006 4150              dst = list_next(&ha->dstg->dstg_tasks, dst)) {
4007 4151                  struct dsl_ds_releasearg *ra = dst->dst_arg1;
4008 4152                  dsl_dataset_t *ds = ra->ds;
4009 4153  
4010 4154                  if (dst->dst_err)
4011 4155                          dsl_dataset_name(ds, ha->failed);
4012 4156  
4013 4157                  if (ra->own)
4014 4158                          dsl_dataset_disown(ds, ha->dstg);
4015 4159                  else
4016 4160                          dsl_dataset_rele(ds, ha->dstg);
4017 4161  
4018 4162                  kmem_free(ra, sizeof (struct dsl_ds_releasearg));
4019 4163          }
4020 4164  
4021 4165          if (error == 0 && recursive && !ha->gotone)
4022 4166                  error = ENOENT;
4023 4167  
4024 4168          if (error && error != EBUSY)
4025 4169                  (void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
4026 4170  
4027 4171          dsl_sync_task_group_destroy(ha->dstg);
4028 4172          kmem_free(ha, sizeof (struct dsl_ds_holdarg));
4029 4173          spa_close(spa, FTAG);
4030 4174  
4031 4175          /*
4032 4176           * We can get EBUSY if we were racing with deferred destroy and
4033 4177           * dsl_dataset_user_release_check() hadn't done the necessary
4034 4178           * open context setup.  We can also get EBUSY if we're racing
4035 4179           * with destroy and that thread is the ds_owner.  Either way
4036 4180           * the busy condition should be transient, and we should retry
4037 4181           * the release operation.
4038 4182           */
4039 4183          if (error == EBUSY)
4040 4184                  goto top;
4041 4185  
4042 4186          return (error);
4043 4187  }
4044 4188  
4045 4189  /*
4046 4190   * Called at spa_load time (with retry == B_FALSE) to release a stale
4047 4191   * temporary user hold. Also called by the onexit code (with retry == B_TRUE).
4048 4192   */
4049 4193  int
4050 4194  dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag,
4051 4195      boolean_t retry)
4052 4196  {
4053 4197          dsl_dataset_t *ds;
4054 4198          char *snap;
4055 4199          char *name;
4056 4200          int namelen;
4057 4201          int error;
4058 4202  
4059 4203          do {
4060 4204                  rw_enter(&dp->dp_config_rwlock, RW_READER);
4061 4205                  error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
4062 4206                  rw_exit(&dp->dp_config_rwlock);
4063 4207                  if (error)
4064 4208                          return (error);
4065 4209                  namelen = dsl_dataset_namelen(ds)+1;
4066 4210                  name = kmem_alloc(namelen, KM_SLEEP);
4067 4211                  dsl_dataset_name(ds, name);
4068 4212                  dsl_dataset_rele(ds, FTAG);
4069 4213  
4070 4214                  snap = strchr(name, '@');
4071 4215                  *snap = '\0';
4072 4216                  ++snap;
4073 4217                  error = dsl_dataset_user_release(name, snap, htag, B_FALSE);
4074 4218                  kmem_free(name, namelen);
4075 4219  
4076 4220                  /*
4077 4221                   * The object can't have been destroyed because we have a hold,
4078 4222                   * but it might have been renamed, resulting in ENOENT.  Retry
4079 4223                   * if we've been requested to do so.
4080 4224                   *
4081 4225                   * It would be nice if we could use the dsobj all the way
4082 4226                   * through and avoid ENOENT entirely.  But we might need to
4083 4227                   * unmount the snapshot, and there's currently no way to lookup
4084 4228                   * a vfsp using a ZFS object id.
4085 4229                   */
4086 4230          } while ((error == ENOENT) && retry);
4087 4231  
4088 4232          return (error);
4089 4233  }
4090 4234  
4091 4235  int
4092 4236  dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp)
4093 4237  {
4094 4238          dsl_dataset_t *ds;
4095 4239          int err;
4096 4240  
4097 4241          err = dsl_dataset_hold(dsname, FTAG, &ds);
4098 4242          if (err)
4099 4243                  return (err);
4100 4244  
4101 4245          VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP));
4102 4246          if (ds->ds_phys->ds_userrefs_obj != 0) {
4103 4247                  zap_attribute_t *za;
4104 4248                  zap_cursor_t zc;
4105 4249  
4106 4250                  za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
4107 4251                  for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
4108 4252                      ds->ds_phys->ds_userrefs_obj);
4109 4253                      zap_cursor_retrieve(&zc, za) == 0;
4110 4254                      zap_cursor_advance(&zc)) {
4111 4255                          VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name,
4112 4256                              za->za_first_integer));
4113 4257                  }
4114 4258                  zap_cursor_fini(&zc);
4115 4259                  kmem_free(za, sizeof (zap_attribute_t));
4116 4260          }
4117 4261          dsl_dataset_rele(ds, FTAG);
4118 4262          return (0);
4119 4263  }
4120 4264  
4121 4265  /*
4122 4266   * Note, this function is used as the callback for dmu_objset_find().  We
4123 4267   * always return 0 so that we will continue to find and process
4124 4268   * inconsistent datasets, even if we encounter an error trying to
4125 4269   * process one of them.
4126 4270   */
4127 4271  /* ARGSUSED */
4128 4272  int
4129 4273  dsl_destroy_inconsistent(const char *dsname, void *arg)
4130 4274  {
4131 4275          dsl_dataset_t *ds;
4132 4276  
4133 4277          if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) {
4134 4278                  if (DS_IS_INCONSISTENT(ds))
4135 4279                          (void) dsl_dataset_destroy(ds, FTAG, B_FALSE);
4136 4280                  else
4137 4281                          dsl_dataset_disown(ds, FTAG);
4138 4282          }
4139 4283          return (0);
4140 4284  }
4141 4285  
4142 4286  /*
4143 4287   * Return (in *usedp) the amount of space written in new that is not
4144 4288   * present in oldsnap.  New may be a snapshot or the head.  Old must be
4145 4289   * a snapshot before new, in new's filesystem (or its origin).  If not then
4146 4290   * fail and return EINVAL.
4147 4291   *
4148 4292   * The written space is calculated by considering two components:  First, we
4149 4293   * ignore any freed space, and calculate the written as new's used space
4150 4294   * minus old's used space.  Next, we add in the amount of space that was freed
4151 4295   * between the two snapshots, thus reducing new's used space relative to old's.
4152 4296   * Specifically, this is the space that was born before old->ds_creation_txg,
4153 4297   * and freed before new (ie. on new's deadlist or a previous deadlist).
4154 4298   *
4155 4299   * space freed                         [---------------------]
4156 4300   * snapshots                       ---O-------O--------O-------O------
4157 4301   *                                         oldsnap            new
4158 4302   */
4159 4303  int
4160 4304  dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
4161 4305      uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
4162 4306  {
4163 4307          int err = 0;
4164 4308          uint64_t snapobj;
4165 4309          dsl_pool_t *dp = new->ds_dir->dd_pool;
4166 4310  
4167 4311          *usedp = 0;
4168 4312          *usedp += new->ds_phys->ds_referenced_bytes;
4169 4313          *usedp -= oldsnap->ds_phys->ds_referenced_bytes;
4170 4314  
4171 4315          *compp = 0;
4172 4316          *compp += new->ds_phys->ds_compressed_bytes;
4173 4317          *compp -= oldsnap->ds_phys->ds_compressed_bytes;
4174 4318  
4175 4319          *uncompp = 0;
4176 4320          *uncompp += new->ds_phys->ds_uncompressed_bytes;
4177 4321          *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes;
4178 4322  
4179 4323          rw_enter(&dp->dp_config_rwlock, RW_READER);
4180 4324          snapobj = new->ds_object;
4181 4325          while (snapobj != oldsnap->ds_object) {
4182 4326                  dsl_dataset_t *snap;
4183 4327                  uint64_t used, comp, uncomp;
4184 4328  
4185 4329                  if (snapobj == new->ds_object) {
4186 4330                          snap = new;
4187 4331                  } else {
4188 4332                          err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
4189 4333                          if (err != 0)
4190 4334                                  break;
4191 4335                  }
4192 4336  
4193 4337                  if (snap->ds_phys->ds_prev_snap_txg ==
4194 4338                      oldsnap->ds_phys->ds_creation_txg) {
4195 4339                          /*
4196 4340                           * The blocks in the deadlist can not be born after
4197 4341                           * ds_prev_snap_txg, so get the whole deadlist space,
4198 4342                           * which is more efficient (especially for old-format
4199 4343                           * deadlists).  Unfortunately the deadlist code
4200 4344                           * doesn't have enough information to make this
4201 4345                           * optimization itself.
4202 4346                           */
4203 4347                          dsl_deadlist_space(&snap->ds_deadlist,
4204 4348                              &used, &comp, &uncomp);
4205 4349                  } else {
4206 4350                          dsl_deadlist_space_range(&snap->ds_deadlist,
4207 4351                              0, oldsnap->ds_phys->ds_creation_txg,
4208 4352                              &used, &comp, &uncomp);
4209 4353                  }
4210 4354                  *usedp += used;
4211 4355                  *compp += comp;
4212 4356                  *uncompp += uncomp;
4213 4357  
4214 4358                  /*
4215 4359                   * If we get to the beginning of the chain of snapshots
4216 4360                   * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
4217 4361                   * was not a snapshot of/before new.
4218 4362                   */
4219 4363                  snapobj = snap->ds_phys->ds_prev_snap_obj;
4220 4364                  if (snap != new)
4221 4365                          dsl_dataset_rele(snap, FTAG);
4222 4366                  if (snapobj == 0) {
4223 4367                          err = EINVAL;
4224 4368                          break;
4225 4369                  }
4226 4370  
4227 4371          }
4228 4372          rw_exit(&dp->dp_config_rwlock);
4229 4373          return (err);
4230 4374  }
4231 4375  
4232 4376  /*
4233 4377   * Return (in *usedp) the amount of space that will be reclaimed if firstsnap,
4234 4378   * lastsnap, and all snapshots in between are deleted.
4235 4379   *
4236 4380   * blocks that would be freed            [---------------------------]
4237 4381   * snapshots                       ---O-------O--------O-------O--------O
4238 4382   *                                        firstsnap        lastsnap
4239 4383   *
4240 4384   * This is the set of blocks that were born after the snap before firstsnap,
4241 4385   * (birth > firstsnap->prev_snap_txg) and died before the snap after the
4242 4386   * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
4243 4387   * We calculate this by iterating over the relevant deadlists (from the snap
4244 4388   * after lastsnap, backward to the snap after firstsnap), summing up the
4245 4389   * space on the deadlist that was born after the snap before firstsnap.
4246 4390   */
4247 4391  int
4248 4392  dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
4249 4393      dsl_dataset_t *lastsnap,
4250 4394      uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
4251 4395  {
4252 4396          int err = 0;
4253 4397          uint64_t snapobj;
4254 4398          dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
4255 4399  
4256 4400          ASSERT(dsl_dataset_is_snapshot(firstsnap));
4257 4401          ASSERT(dsl_dataset_is_snapshot(lastsnap));
4258 4402  
4259 4403          /*
4260 4404           * Check that the snapshots are in the same dsl_dir, and firstsnap
4261 4405           * is before lastsnap.
4262 4406           */
4263 4407          if (firstsnap->ds_dir != lastsnap->ds_dir ||
4264 4408              firstsnap->ds_phys->ds_creation_txg >
4265 4409              lastsnap->ds_phys->ds_creation_txg)
4266 4410                  return (EINVAL);
4267 4411  
4268 4412          *usedp = *compp = *uncompp = 0;
4269 4413  
4270 4414          rw_enter(&dp->dp_config_rwlock, RW_READER);
4271 4415          snapobj = lastsnap->ds_phys->ds_next_snap_obj;
4272 4416          while (snapobj != firstsnap->ds_object) {
4273 4417                  dsl_dataset_t *ds;
4274 4418                  uint64_t used, comp, uncomp;
4275 4419  
4276 4420                  err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);
4277 4421                  if (err != 0)
4278 4422                          break;
4279 4423  
4280 4424                  dsl_deadlist_space_range(&ds->ds_deadlist,
4281 4425                      firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX,
4282 4426                      &used, &comp, &uncomp);
4283 4427                  *usedp += used;
4284 4428                  *compp += comp;
4285 4429                  *uncompp += uncomp;
4286 4430  
4287 4431                  snapobj = ds->ds_phys->ds_prev_snap_obj;
4288 4432                  ASSERT3U(snapobj, !=, 0);
4289 4433                  dsl_dataset_rele(ds, FTAG);
4290 4434          }
4291 4435          rw_exit(&dp->dp_config_rwlock);
4292 4436          return (err);
4293 4437  }

↓ open down ↓

1428 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX