illumos-gate Wdiff usr/src/uts/common/fs/zfs/dsl_dataset.c

Print this page

3006 VERIFY[S,U,P] and ASSERT[S,U,P] frequently check if first argument is zero

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/zfs/dsl_dataset.c
          +++ new/usr/src/uts/common/fs/zfs/dsl_dataset.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2012 by Delphix. All rights reserved.
  24   24   * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  25   25   */
  26   26  
  27   27  #include <sys/dmu_objset.h>
  28   28  #include <sys/dsl_dataset.h>
  29   29  #include <sys/dsl_dir.h>
  30   30  #include <sys/dsl_prop.h>
  31   31  #include <sys/dsl_synctask.h>
  32   32  #include <sys/dmu_traverse.h>
  33   33  #include <sys/dmu_impl.h>
  34   34  #include <sys/dmu_tx.h>
  35   35  #include <sys/arc.h>
  36   36  #include <sys/zio.h>
  37   37  #include <sys/zap.h>
  38   38  #include <sys/zfeature.h>
  39   39  #include <sys/unique.h>
  40   40  #include <sys/zfs_context.h>
  41   41  #include <sys/zfs_ioctl.h>
  42   42  #include <sys/spa.h>
  43   43  #include <sys/zfs_znode.h>
  44   44  #include <sys/zfs_onexit.h>
  45   45  #include <sys/zvol.h>
  46   46  #include <sys/dsl_scan.h>
  47   47  #include <sys/dsl_deadlist.h>
  48   48  
  49   49  static char *dsl_reaper = "the grim reaper";
  50   50  
  51   51  static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
  52   52  static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
  53   53  static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
  54   54  
  55   55  #define SWITCH64(x, y) \
  56   56          { \
  57   57                  uint64_t __tmp = (x); \
  58   58                  (x) = (y); \
  59   59                  (y) = __tmp; \
  60   60          }
  61   61  
  62   62  #define DS_REF_MAX      (1ULL << 62)
  63   63  
  64   64  #define DSL_DEADLIST_BLOCKSIZE  SPA_MAXBLOCKSIZE
  65   65  
  66   66  #define DSL_DATASET_IS_DESTROYED(ds)    ((ds)->ds_owner == dsl_reaper)
  67   67  
  68   68  
  69   69  /*
  70   70   * Figure out how much of this delta should be propogated to the dsl_dir
  71   71   * layer.  If there's a refreservation, that space has already been
  72   72   * partially accounted for in our ancestors.
  73   73   */
  74   74  static int64_t
  75   75  parent_delta(dsl_dataset_t *ds, int64_t delta)
  76   76  {
  77   77          uint64_t old_bytes, new_bytes;
  78   78  
  79   79          if (ds->ds_reserved == 0)
  80   80                  return (delta);
  81   81  
  82   82          old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
  83   83          new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
  84   84  
  85   85          ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
  86   86          return (new_bytes - old_bytes);
  87   87  }
  88   88  
  89   89  void
  90   90  dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
  91   91  {
  92   92          int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
  93   93          int compressed = BP_GET_PSIZE(bp);
  94   94          int uncompressed = BP_GET_UCSIZE(bp);
  95   95          int64_t delta;
  96   96  
  97   97          dprintf_bp(bp, "ds=%p", ds);
  98   98  
  99   99          ASSERT(dmu_tx_is_syncing(tx));
 100  100          /* It could have been compressed away to nothing */
 101  101          if (BP_IS_HOLE(bp))
 102  102                  return;
 103  103          ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
 104  104          ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
 105  105          if (ds == NULL) {
 106  106                  /*
 107  107                   * Account for the meta-objset space in its placeholder
 108  108                   * dsl_dir.
 109  109                   */
 110  110                  ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
 111  111                  dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
 112  112                      used, compressed, uncompressed, tx);
 113  113                  dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
 114  114                  return;
 115  115          }
 116  116          dmu_buf_will_dirty(ds->ds_dbuf, tx);
 117  117  
 118  118          mutex_enter(&ds->ds_dir->dd_lock);
 119  119          mutex_enter(&ds->ds_lock);
 120  120          delta = parent_delta(ds, used);
 121  121          ds->ds_phys->ds_referenced_bytes += used;
 122  122          ds->ds_phys->ds_compressed_bytes += compressed;
 123  123          ds->ds_phys->ds_uncompressed_bytes += uncompressed;
 124  124          ds->ds_phys->ds_unique_bytes += used;
 125  125          mutex_exit(&ds->ds_lock);
 126  126          dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
 127  127              compressed, uncompressed, tx);
 128  128          dsl_dir_transfer_space(ds->ds_dir, used - delta,
 129  129              DD_USED_REFRSRV, DD_USED_HEAD, tx);
 130  130          mutex_exit(&ds->ds_dir->dd_lock);
 131  131  }
 132  132  
 133  133  int
 134  134  dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
 135  135      boolean_t async)
 136  136  {
 137  137          if (BP_IS_HOLE(bp))
 138  138                  return (0);
 139  139  
 140  140          ASSERT(dmu_tx_is_syncing(tx));
 141  141          ASSERT(bp->blk_birth <= tx->tx_txg);
 142  142  
 143  143          int used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
 144  144          int compressed = BP_GET_PSIZE(bp);
 145  145          int uncompressed = BP_GET_UCSIZE(bp);
 146  146  
 147  147          ASSERT(used > 0);
 148  148          if (ds == NULL) {
 149  149                  /*
 150  150                   * Account for the meta-objset space in its placeholder
 151  151                   * dataset.
 152  152                   */
 153  153                  dsl_free(tx->tx_pool, tx->tx_txg, bp);
 154  154  
 155  155                  dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
 156  156                      -used, -compressed, -uncompressed, tx);
 157  157                  dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
 158  158                  return (used);
 159  159          }
 160  160          ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
 161  161  
 162  162          ASSERT(!dsl_dataset_is_snapshot(ds));
 163  163          dmu_buf_will_dirty(ds->ds_dbuf, tx);
 164  164  
 165  165          if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
 166  166                  int64_t delta;
 167  167  
 168  168                  dprintf_bp(bp, "freeing ds=%llu", ds->ds_object);
 169  169                  dsl_free(tx->tx_pool, tx->tx_txg, bp);
 170  170  
 171  171                  mutex_enter(&ds->ds_dir->dd_lock);
 172  172                  mutex_enter(&ds->ds_lock);
 173  173                  ASSERT(ds->ds_phys->ds_unique_bytes >= used ||
 174  174                      !DS_UNIQUE_IS_ACCURATE(ds));
 175  175                  delta = parent_delta(ds, -used);
 176  176                  ds->ds_phys->ds_unique_bytes -= used;
 177  177                  mutex_exit(&ds->ds_lock);
 178  178                  dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
 179  179                      delta, -compressed, -uncompressed, tx);
 180  180                  dsl_dir_transfer_space(ds->ds_dir, -used - delta,
 181  181                      DD_USED_REFRSRV, DD_USED_HEAD, tx);
 182  182                  mutex_exit(&ds->ds_dir->dd_lock);
 183  183          } else {
 184  184                  dprintf_bp(bp, "putting on dead list: %s", "");
 185  185                  if (async) {
 186  186                          /*
 187  187                           * We are here as part of zio's write done callback,
 188  188                           * which means we're a zio interrupt thread.  We can't
 189  189                           * call dsl_deadlist_insert() now because it may block
 190  190                           * waiting for I/O.  Instead, put bp on the deferred
 191  191                           * queue and let dsl_pool_sync() finish the job.
 192  192                           */
 193  193                          bplist_append(&ds->ds_pending_deadlist, bp);
 194  194                  } else {
 195  195                          dsl_deadlist_insert(&ds->ds_deadlist, bp, tx);
 196  196                  }
 197  197                  ASSERT3U(ds->ds_prev->ds_object, ==,
 198  198                      ds->ds_phys->ds_prev_snap_obj);
 199  199                  ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
 200  200                  /* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
 201  201                  if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
 202  202                      ds->ds_object && bp->blk_birth >
 203  203                      ds->ds_prev->ds_phys->ds_prev_snap_txg) {
 204  204                          dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
 205  205                          mutex_enter(&ds->ds_prev->ds_lock);
 206  206                          ds->ds_prev->ds_phys->ds_unique_bytes += used;
 207  207                          mutex_exit(&ds->ds_prev->ds_lock);
 208  208                  }
 209  209                  if (bp->blk_birth > ds->ds_dir->dd_origin_txg) {
 210  210                          dsl_dir_transfer_space(ds->ds_dir, used,
 211  211                              DD_USED_HEAD, DD_USED_SNAP, tx);
 212  212                  }
 213  213          }
 214  214          mutex_enter(&ds->ds_lock);
 215  215          ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used);
 216  216          ds->ds_phys->ds_referenced_bytes -= used;
 217  217          ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
 218  218          ds->ds_phys->ds_compressed_bytes -= compressed;
 219  219          ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
 220  220          ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
 221  221          mutex_exit(&ds->ds_lock);
 222  222  
 223  223          return (used);
 224  224  }
 225  225  
 226  226  uint64_t
 227  227  dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
 228  228  {
 229  229          uint64_t trysnap = 0;
 230  230  
 231  231          if (ds == NULL)
 232  232                  return (0);
 233  233          /*
 234  234           * The snapshot creation could fail, but that would cause an
 235  235           * incorrect FALSE return, which would only result in an
 236  236           * overestimation of the amount of space that an operation would
 237  237           * consume, which is OK.
 238  238           *
 239  239           * There's also a small window where we could miss a pending
 240  240           * snapshot, because we could set the sync task in the quiescing
 241  241           * phase.  So this should only be used as a guess.
 242  242           */
 243  243          if (ds->ds_trysnap_txg >
 244  244              spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
 245  245                  trysnap = ds->ds_trysnap_txg;
 246  246          return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
 247  247  }
 248  248  
 249  249  boolean_t
 250  250  dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
 251  251      uint64_t blk_birth)
 252  252  {
 253  253          if (blk_birth <= dsl_dataset_prev_snap_txg(ds))
 254  254                  return (B_FALSE);
 255  255  
 256  256          ddt_prefetch(dsl_dataset_get_spa(ds), bp);
 257  257  
 258  258          return (B_TRUE);
 259  259  }
 260  260  
 261  261  /* ARGSUSED */
 262  262  static void
 263  263  dsl_dataset_evict(dmu_buf_t *db, void *dsv)
 264  264  {
 265  265          dsl_dataset_t *ds = dsv;
 266  266  
 267  267          ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds));
 268  268  
 269  269          unique_remove(ds->ds_fsid_guid);
 270  270  
 271  271          if (ds->ds_objset != NULL)
 272  272                  dmu_objset_evict(ds->ds_objset);
 273  273  
 274  274          if (ds->ds_prev) {
 275  275                  dsl_dataset_drop_ref(ds->ds_prev, ds);
 276  276                  ds->ds_prev = NULL;
 277  277          }
 278  278  
 279  279          bplist_destroy(&ds->ds_pending_deadlist);
 280  280          if (db != NULL) {
 281  281                  dsl_deadlist_close(&ds->ds_deadlist);
 282  282          } else {
 283  283                  ASSERT(ds->ds_deadlist.dl_dbuf == NULL);
 284  284                  ASSERT(!ds->ds_deadlist.dl_oldfmt);
 285  285          }
 286  286          if (ds->ds_dir)
 287  287                  dsl_dir_close(ds->ds_dir, ds);
 288  288  
 289  289          ASSERT(!list_link_active(&ds->ds_synced_link));
 290  290  
 291  291          mutex_destroy(&ds->ds_lock);
 292  292          mutex_destroy(&ds->ds_recvlock);
 293  293          mutex_destroy(&ds->ds_opening_lock);
 294  294          rw_destroy(&ds->ds_rwlock);
 295  295          cv_destroy(&ds->ds_exclusive_cv);
 296  296  
 297  297          kmem_free(ds, sizeof (dsl_dataset_t));
 298  298  }
 299  299  
 300  300  static int
 301  301  dsl_dataset_get_snapname(dsl_dataset_t *ds)
 302  302  {
 303  303          dsl_dataset_phys_t *headphys;
 304  304          int err;
 305  305          dmu_buf_t *headdbuf;
 306  306          dsl_pool_t *dp = ds->ds_dir->dd_pool;
 307  307          objset_t *mos = dp->dp_meta_objset;
 308  308  
 309  309          if (ds->ds_snapname[0])
 310  310                  return (0);
 311  311          if (ds->ds_phys->ds_next_snap_obj == 0)
 312  312                  return (0);
 313  313  
 314  314          err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
 315  315              FTAG, &headdbuf);
 316  316          if (err)
 317  317                  return (err);
 318  318          headphys = headdbuf->db_data;
 319  319          err = zap_value_search(dp->dp_meta_objset,
 320  320              headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname);
 321  321          dmu_buf_rele(headdbuf, FTAG);
 322  322          return (err);
 323  323  }
 324  324  
 325  325  static int
 326  326  dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
 327  327  {
 328  328          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 329  329          uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 330  330          matchtype_t mt;
 331  331          int err;
 332  332  
 333  333          if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 334  334                  mt = MT_FIRST;
 335  335          else
 336  336                  mt = MT_EXACT;
 337  337  
 338  338          err = zap_lookup_norm(mos, snapobj, name, 8, 1,
 339  339              value, mt, NULL, 0, NULL);
 340  340          if (err == ENOTSUP && mt == MT_FIRST)
 341  341                  err = zap_lookup(mos, snapobj, name, 8, 1, value);
 342  342          return (err);
 343  343  }
 344  344  
 345  345  static int
 346  346  dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
 347  347  {
 348  348          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 349  349          uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
 350  350          matchtype_t mt;
 351  351          int err;
 352  352  
 353  353          dsl_dir_snap_cmtime_update(ds->ds_dir);
 354  354  
 355  355          if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 356  356                  mt = MT_FIRST;
 357  357          else
 358  358                  mt = MT_EXACT;
 359  359  
 360  360          err = zap_remove_norm(mos, snapobj, name, mt, tx);
 361  361          if (err == ENOTSUP && mt == MT_FIRST)
 362  362                  err = zap_remove(mos, snapobj, name, tx);
 363  363          return (err);
 364  364  }
 365  365  
 366  366  static int
 367  367  dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
 368  368      dsl_dataset_t **dsp)
 369  369  {
 370  370          objset_t *mos = dp->dp_meta_objset;
 371  371          dmu_buf_t *dbuf;
 372  372          dsl_dataset_t *ds;
 373  373          int err;
 374  374          dmu_object_info_t doi;
 375  375  
 376  376          ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
 377  377              dsl_pool_sync_context(dp));
 378  378  
 379  379          err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
 380  380          if (err)
 381  381                  return (err);
 382  382  
 383  383          /* Make sure dsobj has the correct object type. */
 384  384          dmu_object_info_from_db(dbuf, &doi);
 385  385          if (doi.doi_type != DMU_OT_DSL_DATASET)
 386  386                  return (EINVAL);
 387  387  
 388  388          ds = dmu_buf_get_user(dbuf);
 389  389          if (ds == NULL) {
 390  390                  dsl_dataset_t *winner;
 391  391  
 392  392                  ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
 393  393                  ds->ds_dbuf = dbuf;
 394  394                  ds->ds_object = dsobj;
 395  395                  ds->ds_phys = dbuf->db_data;
 396  396  
 397  397                  mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
 398  398                  mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL);
 399  399                  mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
 400  400                  mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
 401  401  
 402  402                  rw_init(&ds->ds_rwlock, 0, 0, 0);
 403  403                  cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
 404  404  
 405  405                  bplist_create(&ds->ds_pending_deadlist);
 406  406                  dsl_deadlist_open(&ds->ds_deadlist,
 407  407                      mos, ds->ds_phys->ds_deadlist_obj);
 408  408  
 409  409                  list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
 410  410                      offsetof(dmu_sendarg_t, dsa_link));
 411  411  
 412  412                  if (err == 0) {
 413  413                          err = dsl_dir_open_obj(dp,
 414  414                              ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
 415  415                  }
 416  416                  if (err) {
 417  417                          mutex_destroy(&ds->ds_lock);
 418  418                          mutex_destroy(&ds->ds_recvlock);
 419  419                          mutex_destroy(&ds->ds_opening_lock);
 420  420                          rw_destroy(&ds->ds_rwlock);
 421  421                          cv_destroy(&ds->ds_exclusive_cv);
 422  422                          bplist_destroy(&ds->ds_pending_deadlist);
 423  423                          dsl_deadlist_close(&ds->ds_deadlist);
 424  424                          kmem_free(ds, sizeof (dsl_dataset_t));
 425  425                          dmu_buf_rele(dbuf, tag);
 426  426                          return (err);
 427  427                  }
 428  428  
 429  429                  if (!dsl_dataset_is_snapshot(ds)) {
 430  430                          ds->ds_snapname[0] = '\0';
 431  431                          if (ds->ds_phys->ds_prev_snap_obj) {
 432  432                                  err = dsl_dataset_get_ref(dp,
 433  433                                      ds->ds_phys->ds_prev_snap_obj,
 434  434                                      ds, &ds->ds_prev);
 435  435                          }
 436  436                  } else {
 437  437                          if (zfs_flags & ZFS_DEBUG_SNAPNAMES)
 438  438                                  err = dsl_dataset_get_snapname(ds);
 439  439                          if (err == 0 && ds->ds_phys->ds_userrefs_obj != 0) {
 440  440                                  err = zap_count(
 441  441                                      ds->ds_dir->dd_pool->dp_meta_objset,
 442  442                                      ds->ds_phys->ds_userrefs_obj,
 443  443                                      &ds->ds_userrefs);
 444  444                          }
 445  445                  }
 446  446  
 447  447                  if (err == 0 && !dsl_dataset_is_snapshot(ds)) {
 448  448                          /*
 449  449                           * In sync context, we're called with either no lock
 450  450                           * or with the write lock.  If we're not syncing,
 451  451                           * we're always called with the read lock held.
 452  452                           */
 453  453                          boolean_t need_lock =
 454  454                              !RW_WRITE_HELD(&dp->dp_config_rwlock) &&
 455  455                              dsl_pool_sync_context(dp);
 456  456  
 457  457                          if (need_lock)
 458  458                                  rw_enter(&dp->dp_config_rwlock, RW_READER);
 459  459  
 460  460                          err = dsl_prop_get_ds(ds,
 461  461                              "refreservation", sizeof (uint64_t), 1,
 462  462                              &ds->ds_reserved, NULL);
 463  463                          if (err == 0) {
 464  464                                  err = dsl_prop_get_ds(ds,
 465  465                                      "refquota", sizeof (uint64_t), 1,
 466  466                                      &ds->ds_quota, NULL);
 467  467                          }
 468  468  
 469  469                          if (need_lock)
 470  470                                  rw_exit(&dp->dp_config_rwlock);
 471  471                  } else {
 472  472                          ds->ds_reserved = ds->ds_quota = 0;
 473  473                  }
 474  474  
 475  475                  if (err == 0) {
 476  476                          winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
 477  477                              dsl_dataset_evict);
 478  478                  }
 479  479                  if (err || winner) {
 480  480                          bplist_destroy(&ds->ds_pending_deadlist);
 481  481                          dsl_deadlist_close(&ds->ds_deadlist);
 482  482                          if (ds->ds_prev)
 483  483                                  dsl_dataset_drop_ref(ds->ds_prev, ds);
 484  484                          dsl_dir_close(ds->ds_dir, ds);
 485  485                          mutex_destroy(&ds->ds_lock);
 486  486                          mutex_destroy(&ds->ds_recvlock);
 487  487                          mutex_destroy(&ds->ds_opening_lock);
 488  488                          rw_destroy(&ds->ds_rwlock);
 489  489                          cv_destroy(&ds->ds_exclusive_cv);
 490  490                          kmem_free(ds, sizeof (dsl_dataset_t));
 491  491                          if (err) {
 492  492                                  dmu_buf_rele(dbuf, tag);
 493  493                                  return (err);
 494  494                          }
 495  495                          ds = winner;
 496  496                  } else {
 497  497                          ds->ds_fsid_guid =
 498  498                              unique_insert(ds->ds_phys->ds_fsid_guid);
 499  499                  }
 500  500          }
 501  501          ASSERT3P(ds->ds_dbuf, ==, dbuf);
 502  502          ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
 503  503          ASSERT(ds->ds_phys->ds_prev_snap_obj != 0 ||
 504  504              spa_version(dp->dp_spa) < SPA_VERSION_ORIGIN ||
 505  505              dp->dp_origin_snap == NULL || ds == dp->dp_origin_snap);
 506  506          mutex_enter(&ds->ds_lock);
 507  507          if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) {
 508  508                  mutex_exit(&ds->ds_lock);
 509  509                  dmu_buf_rele(ds->ds_dbuf, tag);
 510  510                  return (ENOENT);
 511  511          }
 512  512          mutex_exit(&ds->ds_lock);
 513  513          *dsp = ds;
 514  514          return (0);
 515  515  }
 516  516  
 517  517  static int
 518  518  dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag)
 519  519  {
 520  520          dsl_pool_t *dp = ds->ds_dir->dd_pool;
 521  521  
 522  522          /*
 523  523           * In syncing context we don't want the rwlock lock: there
 524  524           * may be an existing writer waiting for sync phase to
 525  525           * finish.  We don't need to worry about such writers, since
 526  526           * sync phase is single-threaded, so the writer can't be
 527  527           * doing anything while we are active.
 528  528           */
 529  529          if (dsl_pool_sync_context(dp)) {
 530  530                  ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
 531  531                  return (0);
 532  532          }
 533  533  
 534  534          /*
 535  535           * Normal users will hold the ds_rwlock as a READER until they
 536  536           * are finished (i.e., call dsl_dataset_rele()).  "Owners" will
 537  537           * drop their READER lock after they set the ds_owner field.
 538  538           *
 539  539           * If the dataset is being destroyed, the destroy thread will
 540  540           * obtain a WRITER lock for exclusive access after it's done its
 541  541           * open-context work and then change the ds_owner to
 542  542           * dsl_reaper once destruction is assured.  So threads
 543  543           * may block here temporarily, until the "destructability" of
 544  544           * the dataset is determined.
 545  545           */
 546  546          ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock));
 547  547          mutex_enter(&ds->ds_lock);
 548  548          while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) {
 549  549                  rw_exit(&dp->dp_config_rwlock);
 550  550                  cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock);
 551  551                  if (DSL_DATASET_IS_DESTROYED(ds)) {
 552  552                          mutex_exit(&ds->ds_lock);
 553  553                          dsl_dataset_drop_ref(ds, tag);
 554  554                          rw_enter(&dp->dp_config_rwlock, RW_READER);
 555  555                          return (ENOENT);
 556  556                  }
 557  557                  /*
 558  558                   * The dp_config_rwlock lives above the ds_lock. And
 559  559                   * we need to check DSL_DATASET_IS_DESTROYED() while
 560  560                   * holding the ds_lock, so we have to drop and reacquire
 561  561                   * the ds_lock here.
 562  562                   */
 563  563                  mutex_exit(&ds->ds_lock);
 564  564                  rw_enter(&dp->dp_config_rwlock, RW_READER);
 565  565                  mutex_enter(&ds->ds_lock);
 566  566          }
 567  567          mutex_exit(&ds->ds_lock);
 568  568          return (0);
 569  569  }
 570  570  
 571  571  int
 572  572  dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
 573  573      dsl_dataset_t **dsp)
 574  574  {
 575  575          int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp);
 576  576  
 577  577          if (err)
 578  578                  return (err);
 579  579          return (dsl_dataset_hold_ref(*dsp, tag));
 580  580  }
 581  581  
 582  582  int
 583  583  dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, boolean_t inconsistentok,
 584  584      void *tag, dsl_dataset_t **dsp)
 585  585  {
 586  586          int err = dsl_dataset_hold_obj(dp, dsobj, tag, dsp);
 587  587          if (err)
 588  588                  return (err);
 589  589          if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
 590  590                  dsl_dataset_rele(*dsp, tag);
 591  591                  *dsp = NULL;
 592  592                  return (EBUSY);
 593  593          }
 594  594          return (0);
 595  595  }
 596  596  
 597  597  int
 598  598  dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp)
 599  599  {
 600  600          dsl_dir_t *dd;
 601  601          dsl_pool_t *dp;
 602  602          const char *snapname;
 603  603          uint64_t obj;
 604  604          int err = 0;
 605  605  
 606  606          err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname);
 607  607          if (err)
 608  608                  return (err);
 609  609  
 610  610          dp = dd->dd_pool;
 611  611          obj = dd->dd_phys->dd_head_dataset_obj;
 612  612          rw_enter(&dp->dp_config_rwlock, RW_READER);
 613  613          if (obj)
 614  614                  err = dsl_dataset_get_ref(dp, obj, tag, dsp);
 615  615          else
 616  616                  err = ENOENT;
 617  617          if (err)
 618  618                  goto out;
 619  619  
 620  620          err = dsl_dataset_hold_ref(*dsp, tag);
 621  621  
 622  622          /* we may be looking for a snapshot */
 623  623          if (err == 0 && snapname != NULL) {
 624  624                  dsl_dataset_t *ds = NULL;
 625  625  
 626  626                  if (*snapname++ != '@') {
 627  627                          dsl_dataset_rele(*dsp, tag);
 628  628                          err = ENOENT;
 629  629                          goto out;
 630  630                  }
 631  631  
 632  632                  dprintf("looking for snapshot '%s'\n", snapname);
 633  633                  err = dsl_dataset_snap_lookup(*dsp, snapname, &obj);
 634  634                  if (err == 0)
 635  635                          err = dsl_dataset_get_ref(dp, obj, tag, &ds);
 636  636                  dsl_dataset_rele(*dsp, tag);
 637  637  
 638  638                  ASSERT3U((err == 0), ==, (ds != NULL));
 639  639  
 640  640                  if (ds) {
 641  641                          mutex_enter(&ds->ds_lock);
 642  642                          if (ds->ds_snapname[0] == 0)
 643  643                                  (void) strlcpy(ds->ds_snapname, snapname,
 644  644                                      sizeof (ds->ds_snapname));
 645  645                          mutex_exit(&ds->ds_lock);
 646  646                          err = dsl_dataset_hold_ref(ds, tag);
 647  647                          *dsp = err ? NULL : ds;
 648  648                  }
 649  649          }
 650  650  out:
 651  651          rw_exit(&dp->dp_config_rwlock);
 652  652          dsl_dir_close(dd, FTAG);
 653  653          return (err);
 654  654  }
 655  655  
 656  656  int
 657  657  dsl_dataset_own(const char *name, boolean_t inconsistentok,
 658  658      void *tag, dsl_dataset_t **dsp)
 659  659  {
 660  660          int err = dsl_dataset_hold(name, tag, dsp);
 661  661          if (err)
 662  662                  return (err);
 663  663          if (!dsl_dataset_tryown(*dsp, inconsistentok, tag)) {
 664  664                  dsl_dataset_rele(*dsp, tag);
 665  665                  return (EBUSY);
 666  666          }
 667  667          return (0);
 668  668  }
 669  669  
 670  670  void
 671  671  dsl_dataset_name(dsl_dataset_t *ds, char *name)
 672  672  {
 673  673          if (ds == NULL) {
 674  674                  (void) strcpy(name, "mos");
 675  675          } else {
 676  676                  dsl_dir_name(ds->ds_dir, name);
 677  677                  VERIFY(0 == dsl_dataset_get_snapname(ds));
 678  678                  if (ds->ds_snapname[0]) {
 679  679                          (void) strcat(name, "@");
 680  680                          /*
 681  681                           * We use a "recursive" mutex so that we
 682  682                           * can call dprintf_ds() with ds_lock held.
 683  683                           */
 684  684                          if (!MUTEX_HELD(&ds->ds_lock)) {
 685  685                                  mutex_enter(&ds->ds_lock);
 686  686                                  (void) strcat(name, ds->ds_snapname);
 687  687                                  mutex_exit(&ds->ds_lock);
 688  688                          } else {
 689  689                                  (void) strcat(name, ds->ds_snapname);
 690  690                          }
 691  691                  }
 692  692          }
 693  693  }
 694  694  
 695  695  static int
 696  696  dsl_dataset_namelen(dsl_dataset_t *ds)
 697  697  {
 698  698          int result;
 699  699  
 700  700          if (ds == NULL) {
 701  701                  result = 3;     /* "mos" */
 702  702          } else {
 703  703                  result = dsl_dir_namelen(ds->ds_dir);
 704  704                  VERIFY(0 == dsl_dataset_get_snapname(ds));
 705  705                  if (ds->ds_snapname[0]) {
 706  706                          ++result;       /* adding one for the @-sign */
 707  707                          if (!MUTEX_HELD(&ds->ds_lock)) {
 708  708                                  mutex_enter(&ds->ds_lock);
 709  709                                  result += strlen(ds->ds_snapname);
 710  710                                  mutex_exit(&ds->ds_lock);
 711  711                          } else {
 712  712                                  result += strlen(ds->ds_snapname);
 713  713                          }
 714  714                  }
 715  715          }
 716  716  
 717  717          return (result);
 718  718  }
 719  719  
 720  720  void
 721  721  dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag)
 722  722  {
 723  723          dmu_buf_rele(ds->ds_dbuf, tag);
 724  724  }
 725  725  
 726  726  void
 727  727  dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
 728  728  {
 729  729          if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) {
 730  730                  rw_exit(&ds->ds_rwlock);
 731  731          }
 732  732          dsl_dataset_drop_ref(ds, tag);
 733  733  }
 734  734  
 735  735  void
 736  736  dsl_dataset_disown(dsl_dataset_t *ds, void *tag)
 737  737  {
 738  738          ASSERT((ds->ds_owner == tag && ds->ds_dbuf) ||
 739  739              (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL));
 740  740  
 741  741          mutex_enter(&ds->ds_lock);
 742  742          ds->ds_owner = NULL;
 743  743          if (RW_WRITE_HELD(&ds->ds_rwlock)) {
 744  744                  rw_exit(&ds->ds_rwlock);
 745  745                  cv_broadcast(&ds->ds_exclusive_cv);
 746  746          }
 747  747          mutex_exit(&ds->ds_lock);
 748  748          if (ds->ds_dbuf)
 749  749                  dsl_dataset_drop_ref(ds, tag);
 750  750          else
 751  751                  dsl_dataset_evict(NULL, ds);
 752  752  }
 753  753  
 754  754  boolean_t
 755  755  dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *tag)
 756  756  {
 757  757          boolean_t gotit = FALSE;
 758  758  
 759  759          mutex_enter(&ds->ds_lock);
 760  760          if (ds->ds_owner == NULL &&
 761  761              (!DS_IS_INCONSISTENT(ds) || inconsistentok)) {
 762  762                  ds->ds_owner = tag;
 763  763                  if (!dsl_pool_sync_context(ds->ds_dir->dd_pool))
 764  764                          rw_exit(&ds->ds_rwlock);
 765  765                  gotit = TRUE;
 766  766          }
 767  767          mutex_exit(&ds->ds_lock);
 768  768          return (gotit);
 769  769  }
 770  770  
 771  771  void
 772  772  dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner)
 773  773  {
 774  774          ASSERT3P(owner, ==, ds->ds_owner);
 775  775          if (!RW_WRITE_HELD(&ds->ds_rwlock))
 776  776                  rw_enter(&ds->ds_rwlock, RW_WRITER);
 777  777  }
 778  778  
 779  779  uint64_t
 780  780  dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
 781  781      uint64_t flags, dmu_tx_t *tx)
 782  782  {
 783  783          dsl_pool_t *dp = dd->dd_pool;
 784  784          dmu_buf_t *dbuf;
 785  785          dsl_dataset_phys_t *dsphys;
 786  786          uint64_t dsobj;
 787  787          objset_t *mos = dp->dp_meta_objset;
 788  788  
 789  789          if (origin == NULL)
 790  790                  origin = dp->dp_origin_snap;
 791  791  
 792  792          ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
 793  793          ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0);
 794  794          ASSERT(dmu_tx_is_syncing(tx));
 795  795          ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
 796  796  
 797  797          dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
 798  798              DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
 799  799          VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
 800  800          dmu_buf_will_dirty(dbuf, tx);
 801  801          dsphys = dbuf->db_data;
 802  802          bzero(dsphys, sizeof (dsl_dataset_phys_t));
 803  803          dsphys->ds_dir_obj = dd->dd_object;
 804  804          dsphys->ds_flags = flags;
 805  805          dsphys->ds_fsid_guid = unique_create();
 806  806          (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
 807  807              sizeof (dsphys->ds_guid));
 808  808          dsphys->ds_snapnames_zapobj =
 809  809              zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
 810  810              DMU_OT_NONE, 0, tx);
 811  811          dsphys->ds_creation_time = gethrestime_sec();
 812  812          dsphys->ds_creation_txg = tx->tx_txg == TXG_INITIAL ? 1 : tx->tx_txg;
 813  813  
 814  814          if (origin == NULL) {
 815  815                  dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx);
 816  816          } else {
 817  817                  dsl_dataset_t *ohds;
 818  818  
 819  819                  dsphys->ds_prev_snap_obj = origin->ds_object;
 820  820                  dsphys->ds_prev_snap_txg =
 821  821                      origin->ds_phys->ds_creation_txg;
 822  822                  dsphys->ds_referenced_bytes =
 823  823                      origin->ds_phys->ds_referenced_bytes;

↓ open down ↓

823 lines elided

↑ open up ↑

 824  824                  dsphys->ds_compressed_bytes =
 825  825                      origin->ds_phys->ds_compressed_bytes;
 826  826                  dsphys->ds_uncompressed_bytes =
 827  827                      origin->ds_phys->ds_uncompressed_bytes;
 828  828                  dsphys->ds_bp = origin->ds_phys->ds_bp;
 829  829                  dsphys->ds_flags |= origin->ds_phys->ds_flags;
 830  830  
 831  831                  dmu_buf_will_dirty(origin->ds_dbuf, tx);
 832  832                  origin->ds_phys->ds_num_children++;
 833  833  
 834      -                VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
      834 +                VERIFY0(dsl_dataset_hold_obj(dp,
 835  835                      origin->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ohds));
 836  836                  dsphys->ds_deadlist_obj = dsl_deadlist_clone(&ohds->ds_deadlist,
 837  837                      dsphys->ds_prev_snap_txg, dsphys->ds_prev_snap_obj, tx);
 838  838                  dsl_dataset_rele(ohds, FTAG);
 839  839  
 840  840                  if (spa_version(dp->dp_spa) >= SPA_VERSION_NEXT_CLONES) {
 841  841                          if (origin->ds_phys->ds_next_clones_obj == 0) {
 842  842                                  origin->ds_phys->ds_next_clones_obj =
 843  843                                      zap_create(mos,
 844  844                                      DMU_OT_NEXT_CLONES, DMU_OT_NONE, 0, tx);

 845  845                          }
 846  846                          VERIFY(0 == zap_add_int(mos,
 847  847                              origin->ds_phys->ds_next_clones_obj,
 848  848                              dsobj, tx));
 849  849                  }

↓ open down ↓

5 lines elided

↑ open up ↑

 850  850  
 851  851                  dmu_buf_will_dirty(dd->dd_dbuf, tx);
 852  852                  dd->dd_phys->dd_origin_obj = origin->ds_object;
 853  853                  if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
 854  854                          if (origin->ds_dir->dd_phys->dd_clones == 0) {
 855  855                                  dmu_buf_will_dirty(origin->ds_dir->dd_dbuf, tx);
 856  856                                  origin->ds_dir->dd_phys->dd_clones =
 857  857                                      zap_create(mos,
 858  858                                      DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
 859  859                          }
 860      -                        VERIFY3U(0, ==, zap_add_int(mos,
      860 +                        VERIFY0(zap_add_int(mos,
 861  861                              origin->ds_dir->dd_phys->dd_clones, dsobj, tx));
 862  862                  }
 863  863          }
 864  864  
 865  865          if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
 866  866                  dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
 867  867  
 868  868          dmu_buf_rele(dbuf, FTAG);
 869  869  
 870  870          dmu_buf_will_dirty(dd->dd_dbuf, tx);

 871  871          dd->dd_phys->dd_head_dataset_obj = dsobj;
 872  872  
 873  873          return (dsobj);
 874  874  }
 875  875  
 876  876  uint64_t
 877  877  dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
 878  878      dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx)
 879  879  {
 880  880          dsl_pool_t *dp = pdd->dd_pool;
 881  881          uint64_t dsobj, ddobj;
 882  882          dsl_dir_t *dd;
 883  883  
 884  884          ASSERT(lastname[0] != '@');
 885  885  
 886  886          ddobj = dsl_dir_create_sync(dp, pdd, lastname, tx);
 887  887          VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
 888  888  
 889  889          dsobj = dsl_dataset_create_sync_dd(dd, origin, flags, tx);
 890  890  
 891  891          dsl_deleg_set_create_perms(dd, tx, cr);
 892  892

↓ open down ↓

22 lines elided

↑ open up ↑

 893  893          dsl_dir_close(dd, FTAG);
 894  894  
 895  895          /*
 896  896           * If we are creating a clone, make sure we zero out any stale
 897  897           * data from the origin snapshots zil header.
 898  898           */
 899  899          if (origin != NULL) {
 900  900                  dsl_dataset_t *ds;
 901  901                  objset_t *os;
 902  902  
 903      -                VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
 904      -                VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
      903 +                VERIFY0(dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
      904 +                VERIFY0(dmu_objset_from_ds(ds, &os));
 905  905                  bzero(&os->os_zil_header, sizeof (os->os_zil_header));
 906  906                  dsl_dataset_dirty(ds, tx);
 907  907                  dsl_dataset_rele(ds, FTAG);
 908  908          }
 909  909  
 910  910          return (dsobj);
 911  911  }
 912  912  
 913  913  /*
 914  914   * The snapshots must all be in the same pool.

 915  915   */
 916  916  int
 917  917  dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer,
 918  918      nvlist_t *errlist)
 919  919  {
 920  920          int err;
 921  921          dsl_sync_task_t *dst;
 922  922          spa_t *spa;
 923  923          nvpair_t *pair;
 924  924          dsl_sync_task_group_t *dstg;
 925  925  
 926  926          pair = nvlist_next_nvpair(snaps, NULL);
 927  927          if (pair == NULL)
 928  928                  return (0);
 929  929  
 930  930          err = spa_open(nvpair_name(pair), &spa, FTAG);
 931  931          if (err)
 932  932                  return (err);
 933  933          dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
 934  934  
 935  935          for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 936  936              pair = nvlist_next_nvpair(snaps, pair)) {
 937  937                  dsl_dataset_t *ds;
 938  938  
 939  939                  err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
 940  940                  if (err == 0) {
 941  941                          struct dsl_ds_destroyarg *dsda;
 942  942  
 943  943                          dsl_dataset_make_exclusive(ds, dstg);
 944  944                          dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg),
 945  945                              KM_SLEEP);
 946  946                          dsda->ds = ds;
 947  947                          dsda->defer = defer;
 948  948                          dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
 949  949                              dsl_dataset_destroy_sync, dsda, dstg, 0);
 950  950                  } else if (err == ENOENT) {
 951  951                          err = 0;
 952  952                  } else {
 953  953                          fnvlist_add_int32(errlist, nvpair_name(pair), err);
 954  954                          break;
 955  955                  }
 956  956          }
 957  957  
 958  958          if (err == 0)
 959  959                  err = dsl_sync_task_group_wait(dstg);
 960  960  
 961  961          for (dst = list_head(&dstg->dstg_tasks); dst;
 962  962              dst = list_next(&dstg->dstg_tasks, dst)) {
 963  963                  struct dsl_ds_destroyarg *dsda = dst->dst_arg1;
 964  964                  dsl_dataset_t *ds = dsda->ds;
 965  965  
 966  966                  /*
 967  967                   * Return the snapshots that triggered the error.
 968  968                   */
 969  969                  if (dst->dst_err != 0) {
 970  970                          char name[ZFS_MAXNAMELEN];
 971  971                          dsl_dataset_name(ds, name);
 972  972                          fnvlist_add_int32(errlist, name, dst->dst_err);
 973  973                  }
 974  974                  ASSERT3P(dsda->rm_origin, ==, NULL);
 975  975                  dsl_dataset_disown(ds, dstg);
 976  976                  kmem_free(dsda, sizeof (struct dsl_ds_destroyarg));
 977  977          }
 978  978  
 979  979          dsl_sync_task_group_destroy(dstg);
 980  980          spa_close(spa, FTAG);
 981  981          return (err);
 982  982  
 983  983  }
 984  984  
 985  985  static boolean_t
 986  986  dsl_dataset_might_destroy_origin(dsl_dataset_t *ds)
 987  987  {
 988  988          boolean_t might_destroy = B_FALSE;
 989  989  
 990  990          mutex_enter(&ds->ds_lock);
 991  991          if (ds->ds_phys->ds_num_children == 2 && ds->ds_userrefs == 0 &&
 992  992              DS_IS_DEFER_DESTROY(ds))
 993  993                  might_destroy = B_TRUE;
 994  994          mutex_exit(&ds->ds_lock);
 995  995  
 996  996          return (might_destroy);
 997  997  }
 998  998  
 999  999  /*
1000 1000   * If we're removing a clone, and these three conditions are true:
1001 1001   *      1) the clone's origin has no other children
1002 1002   *      2) the clone's origin has no user references
1003 1003   *      3) the clone's origin has been marked for deferred destruction
1004 1004   * Then, prepare to remove the origin as part of this sync task group.
1005 1005   */
1006 1006  static int
1007 1007  dsl_dataset_origin_rm_prep(struct dsl_ds_destroyarg *dsda, void *tag)
1008 1008  {
1009 1009          dsl_dataset_t *ds = dsda->ds;
1010 1010          dsl_dataset_t *origin = ds->ds_prev;
1011 1011  
1012 1012          if (dsl_dataset_might_destroy_origin(origin)) {
1013 1013                  char *name;
1014 1014                  int namelen;
1015 1015                  int error;
1016 1016  
1017 1017                  namelen = dsl_dataset_namelen(origin) + 1;
1018 1018                  name = kmem_alloc(namelen, KM_SLEEP);
1019 1019                  dsl_dataset_name(origin, name);
1020 1020  #ifdef _KERNEL
1021 1021                  error = zfs_unmount_snap(name, NULL);
1022 1022                  if (error) {
1023 1023                          kmem_free(name, namelen);
1024 1024                          return (error);
1025 1025                  }
1026 1026  #endif
1027 1027                  error = dsl_dataset_own(name, B_TRUE, tag, &origin);
1028 1028                  kmem_free(name, namelen);
1029 1029                  if (error)
1030 1030                          return (error);
1031 1031                  dsda->rm_origin = origin;
1032 1032                  dsl_dataset_make_exclusive(origin, tag);
1033 1033          }
1034 1034  
1035 1035          return (0);
1036 1036  }
1037 1037  
1038 1038  /*
1039 1039   * ds must be opened as OWNER.  On return (whether successful or not),
1040 1040   * ds will be closed and caller can no longer dereference it.
1041 1041   */
1042 1042  int
1043 1043  dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
1044 1044  {
1045 1045          int err;
1046 1046          dsl_sync_task_group_t *dstg;
1047 1047          objset_t *os;
1048 1048          dsl_dir_t *dd;
1049 1049          uint64_t obj;
1050 1050          struct dsl_ds_destroyarg dsda = { 0 };
1051 1051  
1052 1052          dsda.ds = ds;
1053 1053  
1054 1054          if (dsl_dataset_is_snapshot(ds)) {
1055 1055                  /* Destroying a snapshot is simpler */
1056 1056                  dsl_dataset_make_exclusive(ds, tag);
1057 1057  
1058 1058                  dsda.defer = defer;
1059 1059                  err = dsl_sync_task_do(ds->ds_dir->dd_pool,
1060 1060                      dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
1061 1061                      &dsda, tag, 0);
1062 1062                  ASSERT3P(dsda.rm_origin, ==, NULL);
1063 1063                  goto out;
1064 1064          } else if (defer) {
1065 1065                  err = EINVAL;
1066 1066                  goto out;
1067 1067          }
1068 1068  
1069 1069          dd = ds->ds_dir;
1070 1070  
1071 1071          /*
1072 1072           * Check for errors and mark this ds as inconsistent, in
1073 1073           * case we crash while freeing the objects.
1074 1074           */
1075 1075          err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
1076 1076              dsl_dataset_destroy_begin_sync, ds, NULL, 0);
1077 1077          if (err)
1078 1078                  goto out;
1079 1079  
1080 1080          err = dmu_objset_from_ds(ds, &os);
1081 1081          if (err)
1082 1082                  goto out;
1083 1083  
1084 1084          /*
1085 1085           * If async destruction is not enabled try to remove all objects
1086 1086           * while in the open context so that there is less work to do in
1087 1087           * the syncing context.
1088 1088           */
1089 1089          if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
1090 1090              &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
1091 1091                  for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
1092 1092                      ds->ds_phys->ds_prev_snap_txg)) {
1093 1093                          /*
1094 1094                           * Ignore errors, if there is not enough disk space
1095 1095                           * we will deal with it in dsl_dataset_destroy_sync().
1096 1096                           */
1097 1097                          (void) dmu_free_object(os, obj);
1098 1098                  }
1099 1099                  if (err != ESRCH)
1100 1100                          goto out;
1101 1101          }
1102 1102  
1103 1103          /*
1104 1104           * Only the ZIL knows how to free log blocks.
1105 1105           */
1106 1106          zil_destroy(dmu_objset_zil(os), B_FALSE);
1107 1107  
1108 1108          /*
1109 1109           * Sync out all in-flight IO.
1110 1110           */
1111 1111          txg_wait_synced(dd->dd_pool, 0);
1112 1112  
1113 1113          /*
1114 1114           * If we managed to free all the objects in open
1115 1115           * context, the user space accounting should be zero.
1116 1116           */
1117 1117          if (ds->ds_phys->ds_bp.blk_fill == 0 &&
1118 1118              dmu_objset_userused_enabled(os)) {
1119 1119                  uint64_t count;
1120 1120  
1121 1121                  ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
1122 1122                      count == 0);
1123 1123                  ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 ||
1124 1124                      count == 0);
1125 1125          }
1126 1126  
1127 1127          rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
1128 1128          err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
1129 1129          rw_exit(&dd->dd_pool->dp_config_rwlock);
1130 1130  
1131 1131          if (err)
1132 1132                  goto out;
1133 1133  
1134 1134          /*
1135 1135           * Blow away the dsl_dir + head dataset.
1136 1136           */
1137 1137          dsl_dataset_make_exclusive(ds, tag);
1138 1138          /*
1139 1139           * If we're removing a clone, we might also need to remove its
1140 1140           * origin.
1141 1141           */
1142 1142          do {
1143 1143                  dsda.need_prep = B_FALSE;
1144 1144                  if (dsl_dir_is_clone(dd)) {
1145 1145                          err = dsl_dataset_origin_rm_prep(&dsda, tag);
1146 1146                          if (err) {
1147 1147                                  dsl_dir_close(dd, FTAG);
1148 1148                                  goto out;
1149 1149                          }
1150 1150                  }
1151 1151  
1152 1152                  dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
1153 1153                  dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
1154 1154                      dsl_dataset_destroy_sync, &dsda, tag, 0);
1155 1155                  dsl_sync_task_create(dstg, dsl_dir_destroy_check,
1156 1156                      dsl_dir_destroy_sync, dd, FTAG, 0);
1157 1157                  err = dsl_sync_task_group_wait(dstg);
1158 1158                  dsl_sync_task_group_destroy(dstg);
1159 1159  
1160 1160                  /*
1161 1161                   * We could be racing against 'zfs release' or 'zfs destroy -d'
1162 1162                   * on the origin snap, in which case we can get EBUSY if we
1163 1163                   * needed to destroy the origin snap but were not ready to
1164 1164                   * do so.
1165 1165                   */
1166 1166                  if (dsda.need_prep) {
1167 1167                          ASSERT(err == EBUSY);
1168 1168                          ASSERT(dsl_dir_is_clone(dd));
1169 1169                          ASSERT(dsda.rm_origin == NULL);
1170 1170                  }
1171 1171          } while (dsda.need_prep);
1172 1172  
1173 1173          if (dsda.rm_origin != NULL)
1174 1174                  dsl_dataset_disown(dsda.rm_origin, tag);
1175 1175  
1176 1176          /* if it is successful, dsl_dir_destroy_sync will close the dd */
1177 1177          if (err)
1178 1178                  dsl_dir_close(dd, FTAG);
1179 1179  out:
1180 1180          dsl_dataset_disown(ds, tag);
1181 1181          return (err);
1182 1182  }
1183 1183  
1184 1184  blkptr_t *
1185 1185  dsl_dataset_get_blkptr(dsl_dataset_t *ds)
1186 1186  {
1187 1187          return (&ds->ds_phys->ds_bp);
1188 1188  }
1189 1189  
1190 1190  void
1191 1191  dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
1192 1192  {
1193 1193          ASSERT(dmu_tx_is_syncing(tx));
1194 1194          /* If it's the meta-objset, set dp_meta_rootbp */
1195 1195          if (ds == NULL) {
1196 1196                  tx->tx_pool->dp_meta_rootbp = *bp;
1197 1197          } else {
1198 1198                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
1199 1199                  ds->ds_phys->ds_bp = *bp;
1200 1200          }
1201 1201  }
1202 1202  
1203 1203  spa_t *
1204 1204  dsl_dataset_get_spa(dsl_dataset_t *ds)
1205 1205  {
1206 1206          return (ds->ds_dir->dd_pool->dp_spa);
1207 1207  }
1208 1208  
1209 1209  void
1210 1210  dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
1211 1211  {
1212 1212          dsl_pool_t *dp;
1213 1213  
1214 1214          if (ds == NULL) /* this is the meta-objset */
1215 1215                  return;
1216 1216  
1217 1217          ASSERT(ds->ds_objset != NULL);
1218 1218  
1219 1219          if (ds->ds_phys->ds_next_snap_obj != 0)
1220 1220                  panic("dirtying snapshot!");
1221 1221  
1222 1222          dp = ds->ds_dir->dd_pool;
1223 1223  
1224 1224          if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
1225 1225                  /* up the hold count until we can be written out */
1226 1226                  dmu_buf_add_ref(ds->ds_dbuf, ds);
1227 1227          }
1228 1228  }
1229 1229  
1230 1230  /*
1231 1231   * The unique space in the head dataset can be calculated by subtracting
1232 1232   * the space used in the most recent snapshot, that is still being used
1233 1233   * in this file system, from the space currently in use.  To figure out
1234 1234   * the space in the most recent snapshot still in use, we need to take
1235 1235   * the total space used in the snapshot and subtract out the space that
1236 1236   * has been freed up since the snapshot was taken.
1237 1237   */
1238 1238  static void
1239 1239  dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
1240 1240  {
1241 1241          uint64_t mrs_used;
1242 1242          uint64_t dlused, dlcomp, dluncomp;
1243 1243  
1244 1244          ASSERT(!dsl_dataset_is_snapshot(ds));
1245 1245  
1246 1246          if (ds->ds_phys->ds_prev_snap_obj != 0)
1247 1247                  mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes;
1248 1248          else
1249 1249                  mrs_used = 0;
1250 1250  
1251 1251          dsl_deadlist_space(&ds->ds_deadlist, &dlused, &dlcomp, &dluncomp);
1252 1252  
1253 1253          ASSERT3U(dlused, <=, mrs_used);
1254 1254          ds->ds_phys->ds_unique_bytes =
1255 1255              ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused);
1256 1256  
1257 1257          if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
1258 1258              SPA_VERSION_UNIQUE_ACCURATE)
1259 1259                  ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1260 1260  }
1261 1261  
1262 1262  struct killarg {
1263 1263          dsl_dataset_t *ds;
1264 1264          dmu_tx_t *tx;
1265 1265  };
1266 1266  
1267 1267  /* ARGSUSED */
1268 1268  static int
1269 1269  kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
1270 1270      const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
1271 1271  {
1272 1272          struct killarg *ka = arg;
1273 1273          dmu_tx_t *tx = ka->tx;
1274 1274  
1275 1275          if (bp == NULL)
1276 1276                  return (0);
1277 1277  
1278 1278          if (zb->zb_level == ZB_ZIL_LEVEL) {
1279 1279                  ASSERT(zilog != NULL);
1280 1280                  /*
1281 1281                   * It's a block in the intent log.  It has no
1282 1282                   * accounting, so just free it.
1283 1283                   */
1284 1284                  dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
1285 1285          } else {
1286 1286                  ASSERT(zilog == NULL);
1287 1287                  ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
1288 1288                  (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
1289 1289          }
1290 1290  
1291 1291          return (0);
1292 1292  }
1293 1293  
1294 1294  /* ARGSUSED */
1295 1295  static int
1296 1296  dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
1297 1297  {
1298 1298          dsl_dataset_t *ds = arg1;
1299 1299          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1300 1300          uint64_t count;
1301 1301          int err;
1302 1302  
1303 1303          /*
1304 1304           * Can't delete a head dataset if there are snapshots of it.
1305 1305           * (Except if the only snapshots are from the branch we cloned
1306 1306           * from.)
1307 1307           */
1308 1308          if (ds->ds_prev != NULL &&
1309 1309              ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1310 1310                  return (EBUSY);
1311 1311  
1312 1312          /*
1313 1313           * This is really a dsl_dir thing, but check it here so that
1314 1314           * we'll be less likely to leave this dataset inconsistent &
1315 1315           * nearly destroyed.
1316 1316           */
1317 1317          err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
1318 1318          if (err)
1319 1319                  return (err);
1320 1320          if (count != 0)
1321 1321                  return (EEXIST);
1322 1322  
1323 1323          return (0);
1324 1324  }
1325 1325  
1326 1326  /* ARGSUSED */
1327 1327  static void
1328 1328  dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx)
1329 1329  {
1330 1330          dsl_dataset_t *ds = arg1;
1331 1331  
1332 1332          /* Mark it as inconsistent on-disk, in case we crash */
1333 1333          dmu_buf_will_dirty(ds->ds_dbuf, tx);
1334 1334          ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
1335 1335  
1336 1336          spa_history_log_internal_ds(ds, "destroy begin", tx, "");
1337 1337  }
1338 1338  
1339 1339  static int
1340 1340  dsl_dataset_origin_check(struct dsl_ds_destroyarg *dsda, void *tag,
1341 1341      dmu_tx_t *tx)
1342 1342  {
1343 1343          dsl_dataset_t *ds = dsda->ds;
1344 1344          dsl_dataset_t *ds_prev = ds->ds_prev;
1345 1345  
1346 1346          if (dsl_dataset_might_destroy_origin(ds_prev)) {
1347 1347                  struct dsl_ds_destroyarg ndsda = {0};
1348 1348  
1349 1349                  /*
1350 1350                   * If we're not prepared to remove the origin, don't remove
1351 1351                   * the clone either.
1352 1352                   */
1353 1353                  if (dsda->rm_origin == NULL) {
1354 1354                          dsda->need_prep = B_TRUE;
1355 1355                          return (EBUSY);
1356 1356                  }
1357 1357  
1358 1358                  ndsda.ds = ds_prev;
1359 1359                  ndsda.is_origin_rm = B_TRUE;
1360 1360                  return (dsl_dataset_destroy_check(&ndsda, tag, tx));
1361 1361          }
1362 1362  
1363 1363          /*
1364 1364           * If we're not going to remove the origin after all,
1365 1365           * undo the open context setup.
1366 1366           */
1367 1367          if (dsda->rm_origin != NULL) {
1368 1368                  dsl_dataset_disown(dsda->rm_origin, tag);
1369 1369                  dsda->rm_origin = NULL;
1370 1370          }
1371 1371  
1372 1372          return (0);
1373 1373  }
1374 1374  
1375 1375  /*
1376 1376   * If you add new checks here, you may need to add
1377 1377   * additional checks to the "temporary" case in
1378 1378   * snapshot_check() in dmu_objset.c.
1379 1379   */
1380 1380  /* ARGSUSED */
1381 1381  int
1382 1382  dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
1383 1383  {
1384 1384          struct dsl_ds_destroyarg *dsda = arg1;
1385 1385          dsl_dataset_t *ds = dsda->ds;
1386 1386  
1387 1387          /* we have an owner hold, so noone else can destroy us */
1388 1388          ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
1389 1389  
1390 1390          /*
1391 1391           * Only allow deferred destroy on pools that support it.
1392 1392           * NOTE: deferred destroy is only supported on snapshots.
1393 1393           */
1394 1394          if (dsda->defer) {
1395 1395                  if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
1396 1396                      SPA_VERSION_USERREFS)
1397 1397                          return (ENOTSUP);
1398 1398                  ASSERT(dsl_dataset_is_snapshot(ds));
1399 1399                  return (0);
1400 1400          }
1401 1401  
1402 1402          /*
1403 1403           * Can't delete a head dataset if there are snapshots of it.
1404 1404           * (Except if the only snapshots are from the branch we cloned
1405 1405           * from.)
1406 1406           */
1407 1407          if (ds->ds_prev != NULL &&
1408 1408              ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1409 1409                  return (EBUSY);
1410 1410  
1411 1411          /*
1412 1412           * If we made changes this txg, traverse_dsl_dataset won't find
1413 1413           * them.  Try again.
1414 1414           */
1415 1415          if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1416 1416                  return (EAGAIN);
1417 1417  
1418 1418          if (dsl_dataset_is_snapshot(ds)) {
1419 1419                  /*
1420 1420                   * If this snapshot has an elevated user reference count,
1421 1421                   * we can't destroy it yet.
1422 1422                   */
1423 1423                  if (ds->ds_userrefs > 0 && !dsda->releasing)
1424 1424                          return (EBUSY);
1425 1425  
1426 1426                  mutex_enter(&ds->ds_lock);
1427 1427                  /*
1428 1428                   * Can't delete a branch point. However, if we're destroying
1429 1429                   * a clone and removing its origin due to it having a user
1430 1430                   * hold count of 0 and having been marked for deferred destroy,
1431 1431                   * it's OK for the origin to have a single clone.
1432 1432                   */
1433 1433                  if (ds->ds_phys->ds_num_children >
1434 1434                      (dsda->is_origin_rm ? 2 : 1)) {
1435 1435                          mutex_exit(&ds->ds_lock);
1436 1436                          return (EEXIST);
1437 1437                  }
1438 1438                  mutex_exit(&ds->ds_lock);
1439 1439          } else if (dsl_dir_is_clone(ds->ds_dir)) {
1440 1440                  return (dsl_dataset_origin_check(dsda, arg2, tx));
1441 1441          }
1442 1442  
1443 1443          /* XXX we should do some i/o error checking... */
1444 1444          return (0);
1445 1445  }
1446 1446  
1447 1447  struct refsarg {
1448 1448          kmutex_t lock;
1449 1449          boolean_t gone;
1450 1450          kcondvar_t cv;
1451 1451  };
1452 1452  
1453 1453  /* ARGSUSED */
1454 1454  static void
1455 1455  dsl_dataset_refs_gone(dmu_buf_t *db, void *argv)
1456 1456  {
1457 1457          struct refsarg *arg = argv;
1458 1458  
1459 1459          mutex_enter(&arg->lock);
1460 1460          arg->gone = TRUE;
1461 1461          cv_signal(&arg->cv);
1462 1462          mutex_exit(&arg->lock);
1463 1463  }
1464 1464  
1465 1465  static void
1466 1466  dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag)
1467 1467  {
1468 1468          struct refsarg arg;
1469 1469  
1470 1470          mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL);
1471 1471          cv_init(&arg.cv, NULL, CV_DEFAULT, NULL);
1472 1472          arg.gone = FALSE;
1473 1473          (void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys,
1474 1474              dsl_dataset_refs_gone);
1475 1475          dmu_buf_rele(ds->ds_dbuf, tag);
1476 1476          mutex_enter(&arg.lock);
1477 1477          while (!arg.gone)
1478 1478                  cv_wait(&arg.cv, &arg.lock);
1479 1479          ASSERT(arg.gone);
1480 1480          mutex_exit(&arg.lock);
1481 1481          ds->ds_dbuf = NULL;
1482 1482          ds->ds_phys = NULL;
1483 1483          mutex_destroy(&arg.lock);
1484 1484          cv_destroy(&arg.cv);
1485 1485  }
1486 1486  
1487 1487  static void
1488 1488  remove_from_next_clones(dsl_dataset_t *ds, uint64_t obj, dmu_tx_t *tx)
1489 1489  {
1490 1490          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1491 1491          uint64_t count;
1492 1492          int err;
1493 1493  
1494 1494          ASSERT(ds->ds_phys->ds_num_children >= 2);
1495 1495          err = zap_remove_int(mos, ds->ds_phys->ds_next_clones_obj, obj, tx);
1496 1496          /*

↓ open down ↓

582 lines elided

↑ open up ↑

1497 1497           * The err should not be ENOENT, but a bug in a previous version
1498 1498           * of the code could cause upgrade_clones_cb() to not set
1499 1499           * ds_next_snap_obj when it should, leading to a missing entry.
1500 1500           * If we knew that the pool was created after
1501 1501           * SPA_VERSION_NEXT_CLONES, we could assert that it isn't
1502 1502           * ENOENT.  However, at least we can check that we don't have
1503 1503           * too many entries in the next_clones_obj even after failing to
1504 1504           * remove this one.
1505 1505           */
1506 1506          if (err != ENOENT) {
1507      -                VERIFY3U(err, ==, 0);
     1507 +                VERIFY0(err);
1508 1508          }
1509      -        ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
1510      -            &count));
     1509 +        ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj, &count));
1511 1510          ASSERT3U(count, <=, ds->ds_phys->ds_num_children - 2);
1512 1511  }
1513 1512  
1514 1513  static void
1515 1514  dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
1516 1515  {
1517 1516          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1518 1517          zap_cursor_t zc;
1519 1518          zap_attribute_t za;
1520 1519

1521 1520          /*
1522 1521           * If it is the old version, dd_clones doesn't exist so we can't
1523 1522           * find the clones, but deadlist_remove_key() is a no-op so it

↓ open down ↓

3 lines elided

↑ open up ↑

1524 1523           * doesn't matter.
1525 1524           */
1526 1525          if (ds->ds_dir->dd_phys->dd_clones == 0)
1527 1526                  return;
1528 1527  
1529 1528          for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones);
1530 1529              zap_cursor_retrieve(&zc, &za) == 0;
1531 1530              zap_cursor_advance(&zc)) {
1532 1531                  dsl_dataset_t *clone;
1533 1532  
1534      -                VERIFY3U(0, ==, dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
     1533 +                VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
1535 1534                      za.za_first_integer, FTAG, &clone));
1536 1535                  if (clone->ds_dir->dd_origin_txg > mintxg) {
1537 1536                          dsl_deadlist_remove_key(&clone->ds_deadlist,
1538 1537                              mintxg, tx);
1539 1538                          dsl_dataset_remove_clones_key(clone, mintxg, tx);
1540 1539                  }
1541 1540                  dsl_dataset_rele(clone, FTAG);
1542 1541          }
1543 1542          zap_cursor_fini(&zc);
1544 1543  }

1545 1544  
1546 1545  struct process_old_arg {
1547 1546          dsl_dataset_t *ds;
1548 1547          dsl_dataset_t *ds_prev;
1549 1548          boolean_t after_branch_point;
1550 1549          zio_t *pio;
1551 1550          uint64_t used, comp, uncomp;
1552 1551  };
1553 1552  
1554 1553  static int
1555 1554  process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1556 1555  {
1557 1556          struct process_old_arg *poa = arg;
1558 1557          dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
1559 1558  
1560 1559          if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
1561 1560                  dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
1562 1561                  if (poa->ds_prev && !poa->after_branch_point &&
1563 1562                      bp->blk_birth >
1564 1563                      poa->ds_prev->ds_phys->ds_prev_snap_txg) {
1565 1564                          poa->ds_prev->ds_phys->ds_unique_bytes +=
1566 1565                              bp_get_dsize_sync(dp->dp_spa, bp);
1567 1566                  }
1568 1567          } else {
1569 1568                  poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
1570 1569                  poa->comp += BP_GET_PSIZE(bp);
1571 1570                  poa->uncomp += BP_GET_UCSIZE(bp);
1572 1571                  dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
1573 1572          }
1574 1573          return (0);
1575 1574  }
1576 1575  
1577 1576  static void
1578 1577  process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
1579 1578      dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
1580 1579  {
1581 1580          struct process_old_arg poa = { 0 };

↓ open down ↓

37 lines elided

↑ open up ↑

1582 1581          dsl_pool_t *dp = ds->ds_dir->dd_pool;
1583 1582          objset_t *mos = dp->dp_meta_objset;
1584 1583  
1585 1584          ASSERT(ds->ds_deadlist.dl_oldfmt);
1586 1585          ASSERT(ds_next->ds_deadlist.dl_oldfmt);
1587 1586  
1588 1587          poa.ds = ds;
1589 1588          poa.ds_prev = ds_prev;
1590 1589          poa.after_branch_point = after_branch_point;
1591 1590          poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1592      -        VERIFY3U(0, ==, bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
     1591 +        VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
1593 1592              process_old_cb, &poa, tx));
1594      -        VERIFY3U(zio_wait(poa.pio), ==, 0);
     1593 +        VERIFY0(zio_wait(poa.pio));
1595 1594          ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
1596 1595  
1597 1596          /* change snapused */
1598 1597          dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
1599 1598              -poa.used, -poa.comp, -poa.uncomp, tx);
1600 1599  
1601 1600          /* swap next's deadlist to our deadlist */
1602 1601          dsl_deadlist_close(&ds->ds_deadlist);
1603 1602          dsl_deadlist_close(&ds_next->ds_deadlist);
1604 1603          SWITCH64(ds_next->ds_phys->ds_deadlist_obj,

1605 1604              ds->ds_phys->ds_deadlist_obj);
1606 1605          dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
1607 1606          dsl_deadlist_open(&ds_next->ds_deadlist, mos,
1608 1607              ds_next->ds_phys->ds_deadlist_obj);
1609 1608  }
1610 1609  
1611 1610  static int
1612 1611  old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
1613 1612  {
1614 1613          int err;
1615 1614          struct killarg ka;
1616 1615  
1617 1616          /*
1618 1617           * Free everything that we point to (that's born after

↓ open down ↓

14 lines elided

↑ open up ↑

1619 1618           * the previous snapshot, if we are a clone)
1620 1619           *
1621 1620           * NB: this should be very quick, because we already
1622 1621           * freed all the objects in open context.
1623 1622           */
1624 1623          ka.ds = ds;
1625 1624          ka.tx = tx;
1626 1625          err = traverse_dataset(ds,
1627 1626              ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
1628 1627              kill_blkptr, &ka);
1629      -        ASSERT3U(err, ==, 0);
     1628 +        ASSERT0(err);
1630 1629          ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
1631 1630  
1632 1631          return (err);
1633 1632  }
1634 1633  
1635 1634  void
1636 1635  dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
1637 1636  {
1638 1637          struct dsl_ds_destroyarg *dsda = arg1;
1639 1638          dsl_dataset_t *ds = dsda->ds;

1640 1639          int err;
1641 1640          int after_branch_point = FALSE;
1642 1641          dsl_pool_t *dp = ds->ds_dir->dd_pool;
1643 1642          objset_t *mos = dp->dp_meta_objset;
1644 1643          dsl_dataset_t *ds_prev = NULL;
1645 1644          boolean_t wont_destroy;
1646 1645          uint64_t obj;
1647 1646  
1648 1647          wont_destroy = (dsda->defer &&
1649 1648              (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1));
1650 1649  
1651 1650          ASSERT(ds->ds_owner || wont_destroy);
1652 1651          ASSERT(dsda->defer || ds->ds_phys->ds_num_children <= 1);
1653 1652          ASSERT(ds->ds_prev == NULL ||
1654 1653              ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
1655 1654          ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
1656 1655  
1657 1656          if (wont_destroy) {
1658 1657                  ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
1659 1658                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
1660 1659                  ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
1661 1660                  spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
1662 1661                  return;
1663 1662          }
1664 1663  
1665 1664          /* We need to log before removing it from the namespace. */
1666 1665          spa_history_log_internal_ds(ds, "destroy", tx, "");
1667 1666  
1668 1667          /* signal any waiters that this dataset is going away */
1669 1668          mutex_enter(&ds->ds_lock);
1670 1669          ds->ds_owner = dsl_reaper;
1671 1670          cv_broadcast(&ds->ds_exclusive_cv);
1672 1671          mutex_exit(&ds->ds_lock);
1673 1672  
1674 1673          /* Remove our reservation */

↓ open down ↓

35 lines elided

↑ open up ↑

1675 1674          if (ds->ds_reserved != 0) {
1676 1675                  dsl_prop_setarg_t psa;
1677 1676                  uint64_t value = 0;
1678 1677  
1679 1678                  dsl_prop_setarg_init_uint64(&psa, "refreservation",
1680 1679                      (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
1681 1680                      &value);
1682 1681                  psa.psa_effective_value = 0;    /* predict default value */
1683 1682  
1684 1683                  dsl_dataset_set_reservation_sync(ds, &psa, tx);
1685      -                ASSERT3U(ds->ds_reserved, ==, 0);
     1684 +                ASSERT0(ds->ds_reserved);
1686 1685          }
1687 1686  
1688 1687          ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
1689 1688  
1690 1689          dsl_scan_ds_destroyed(ds, tx);
1691 1690  
1692 1691          obj = ds->ds_object;
1693 1692  
1694 1693          if (ds->ds_phys->ds_prev_snap_obj != 0) {
1695 1694                  if (ds->ds_prev) {

1696 1695                          ds_prev = ds->ds_prev;
1697 1696                  } else {
1698 1697                          VERIFY(0 == dsl_dataset_hold_obj(dp,
1699 1698                              ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
1700 1699                  }
1701 1700                  after_branch_point =
1702 1701                      (ds_prev->ds_phys->ds_next_snap_obj != obj);
1703 1702  
1704 1703                  dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
1705 1704                  if (after_branch_point &&
1706 1705                      ds_prev->ds_phys->ds_next_clones_obj != 0) {
1707 1706                          remove_from_next_clones(ds_prev, obj, tx);
1708 1707                          if (ds->ds_phys->ds_next_snap_obj != 0) {
1709 1708                                  VERIFY(0 == zap_add_int(mos,
1710 1709                                      ds_prev->ds_phys->ds_next_clones_obj,
1711 1710                                      ds->ds_phys->ds_next_snap_obj, tx));
1712 1711                          }
1713 1712                  }
1714 1713                  if (after_branch_point &&
1715 1714                      ds->ds_phys->ds_next_snap_obj == 0) {
1716 1715                          /* This clone is toast. */
1717 1716                          ASSERT(ds_prev->ds_phys->ds_num_children > 1);
1718 1717                          ds_prev->ds_phys->ds_num_children--;
1719 1718  
1720 1719                          /*
1721 1720                           * If the clone's origin has no other clones, no
1722 1721                           * user holds, and has been marked for deferred
1723 1722                           * deletion, then we should have done the necessary
1724 1723                           * destroy setup for it.
1725 1724                           */
1726 1725                          if (ds_prev->ds_phys->ds_num_children == 1 &&
1727 1726                              ds_prev->ds_userrefs == 0 &&
1728 1727                              DS_IS_DEFER_DESTROY(ds_prev)) {
1729 1728                                  ASSERT3P(dsda->rm_origin, !=, NULL);
1730 1729                          } else {
1731 1730                                  ASSERT3P(dsda->rm_origin, ==, NULL);
1732 1731                          }
1733 1732                  } else if (!after_branch_point) {
1734 1733                          ds_prev->ds_phys->ds_next_snap_obj =
1735 1734                              ds->ds_phys->ds_next_snap_obj;
1736 1735                  }
1737 1736          }
1738 1737  
1739 1738          if (dsl_dataset_is_snapshot(ds)) {
1740 1739                  dsl_dataset_t *ds_next;
1741 1740                  uint64_t old_unique;
1742 1741                  uint64_t used = 0, comp = 0, uncomp = 0;
1743 1742  
1744 1743                  VERIFY(0 == dsl_dataset_hold_obj(dp,
1745 1744                      ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
1746 1745                  ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
1747 1746  
1748 1747                  old_unique = ds_next->ds_phys->ds_unique_bytes;
1749 1748  
1750 1749                  dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
1751 1750                  ds_next->ds_phys->ds_prev_snap_obj =
1752 1751                      ds->ds_phys->ds_prev_snap_obj;
1753 1752                  ds_next->ds_phys->ds_prev_snap_txg =
1754 1753                      ds->ds_phys->ds_prev_snap_txg;
1755 1754                  ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1756 1755                      ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
1757 1756  
1758 1757  
1759 1758                  if (ds_next->ds_deadlist.dl_oldfmt) {
1760 1759                          process_old_deadlist(ds, ds_prev, ds_next,
1761 1760                              after_branch_point, tx);
1762 1761                  } else {
1763 1762                          /* Adjust prev's unique space. */
1764 1763                          if (ds_prev && !after_branch_point) {
1765 1764                                  dsl_deadlist_space_range(&ds_next->ds_deadlist,
1766 1765                                      ds_prev->ds_phys->ds_prev_snap_txg,
1767 1766                                      ds->ds_phys->ds_prev_snap_txg,
1768 1767                                      &used, &comp, &uncomp);
1769 1768                                  ds_prev->ds_phys->ds_unique_bytes += used;
1770 1769                          }
1771 1770  
1772 1771                          /* Adjust snapused. */
1773 1772                          dsl_deadlist_space_range(&ds_next->ds_deadlist,
1774 1773                              ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
1775 1774                              &used, &comp, &uncomp);
1776 1775                          dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
1777 1776                              -used, -comp, -uncomp, tx);
1778 1777  
1779 1778                          /* Move blocks to be freed to pool's free list. */
1780 1779                          dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
1781 1780                              &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
1782 1781                              tx);
1783 1782                          dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
1784 1783                              DD_USED_HEAD, used, comp, uncomp, tx);
1785 1784  
1786 1785                          /* Merge our deadlist into next's and free it. */
1787 1786                          dsl_deadlist_merge(&ds_next->ds_deadlist,
1788 1787                              ds->ds_phys->ds_deadlist_obj, tx);
1789 1788                  }
1790 1789                  dsl_deadlist_close(&ds->ds_deadlist);
1791 1790                  dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
1792 1791  
1793 1792                  /* Collapse range in clone heads */
1794 1793                  dsl_dataset_remove_clones_key(ds,
1795 1794                      ds->ds_phys->ds_creation_txg, tx);
1796 1795  
1797 1796                  if (dsl_dataset_is_snapshot(ds_next)) {
1798 1797                          dsl_dataset_t *ds_nextnext;
1799 1798  
1800 1799                          /*
1801 1800                           * Update next's unique to include blocks which
1802 1801                           * were previously shared by only this snapshot
1803 1802                           * and it.  Those blocks will be born after the
1804 1803                           * prev snap and before this snap, and will have
1805 1804                           * died after the next snap and before the one
1806 1805                           * after that (ie. be on the snap after next's
1807 1806                           * deadlist).
1808 1807                           */
1809 1808                          VERIFY(0 == dsl_dataset_hold_obj(dp,
1810 1809                              ds_next->ds_phys->ds_next_snap_obj,
1811 1810                              FTAG, &ds_nextnext));

↓ open down ↓

116 lines elided

↑ open up ↑

1812 1811                          dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
1813 1812                              ds->ds_phys->ds_prev_snap_txg,
1814 1813                              ds->ds_phys->ds_creation_txg,
1815 1814                              &used, &comp, &uncomp);
1816 1815                          ds_next->ds_phys->ds_unique_bytes += used;
1817 1816                          dsl_dataset_rele(ds_nextnext, FTAG);
1818 1817                          ASSERT3P(ds_next->ds_prev, ==, NULL);
1819 1818  
1820 1819                          /* Collapse range in this head. */
1821 1820                          dsl_dataset_t *hds;
1822      -                        VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
     1821 +                        VERIFY0(dsl_dataset_hold_obj(dp,
1823 1822                              ds->ds_dir->dd_phys->dd_head_dataset_obj,
1824 1823                              FTAG, &hds));
1825 1824                          dsl_deadlist_remove_key(&hds->ds_deadlist,
1826 1825                              ds->ds_phys->ds_creation_txg, tx);
1827 1826                          dsl_dataset_rele(hds, FTAG);
1828 1827  
1829 1828                  } else {
1830 1829                          ASSERT3P(ds_next->ds_prev, ==, ds);
1831 1830                          dsl_dataset_drop_ref(ds_next->ds_prev, ds_next);
1832 1831                          ds_next->ds_prev = NULL;

1833 1832                          if (ds_prev) {
1834 1833                                  VERIFY(0 == dsl_dataset_get_ref(dp,
1835 1834                                      ds->ds_phys->ds_prev_snap_obj,
1836 1835                                      ds_next, &ds_next->ds_prev));
1837 1836                          }
1838 1837  
1839 1838                          dsl_dataset_recalc_head_uniq(ds_next);
1840 1839  
1841 1840                          /*
1842 1841                           * Reduce the amount of our unconsmed refreservation
1843 1842                           * being charged to our parent by the amount of
1844 1843                           * new unique data we have gained.
1845 1844                           */
1846 1845                          if (old_unique < ds_next->ds_reserved) {
1847 1846                                  int64_t mrsdelta;
1848 1847                                  uint64_t new_unique =
1849 1848                                      ds_next->ds_phys->ds_unique_bytes;
1850 1849  
1851 1850                                  ASSERT(old_unique <= new_unique);
1852 1851                                  mrsdelta = MIN(new_unique - old_unique,
1853 1852                                      ds_next->ds_reserved - old_unique);
1854 1853                                  dsl_dir_diduse_space(ds->ds_dir,
1855 1854                                      DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
1856 1855                          }
1857 1856                  }
1858 1857                  dsl_dataset_rele(ds_next, FTAG);
1859 1858          } else {
1860 1859                  zfeature_info_t *async_destroy =
1861 1860                      &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
1862 1861  
1863 1862                  /*
1864 1863                   * There's no next snapshot, so this is a head dataset.
1865 1864                   * Destroy the deadlist.  Unless it's a clone, the
1866 1865                   * deadlist should be empty.  (If it's a clone, it's
1867 1866                   * safe to ignore the deadlist contents.)
1868 1867                   */
1869 1868                  dsl_deadlist_close(&ds->ds_deadlist);
1870 1869                  dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
1871 1870                  ds->ds_phys->ds_deadlist_obj = 0;
1872 1871  
1873 1872                  if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
1874 1873                          err = old_synchronous_dataset_destroy(ds, tx);
1875 1874                  } else {
1876 1875                          /*
1877 1876                           * Move the bptree into the pool's list of trees to
1878 1877                           * clean up and update space accounting information.
1879 1878                           */
1880 1879                          uint64_t used, comp, uncomp;
1881 1880  
1882 1881                          ASSERT(err == 0 || err == EBUSY);
1883 1882                          if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
1884 1883                                  spa_feature_incr(dp->dp_spa, async_destroy, tx);
1885 1884                                  dp->dp_bptree_obj = bptree_alloc(
1886 1885                                      dp->dp_meta_objset, tx);
1887 1886                                  VERIFY(zap_add(dp->dp_meta_objset,
1888 1887                                      DMU_POOL_DIRECTORY_OBJECT,
1889 1888                                      DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
1890 1889                                      &dp->dp_bptree_obj, tx) == 0);
1891 1890                          }
1892 1891  
1893 1892                          used = ds->ds_dir->dd_phys->dd_used_bytes;
1894 1893                          comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
1895 1894                          uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
1896 1895  
1897 1896                          ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
1898 1897                              ds->ds_phys->ds_unique_bytes == used);
1899 1898  
1900 1899                          bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj,

↓ open down ↓

68 lines elided

↑ open up ↑

1901 1900                              &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
1902 1901                              used, comp, uncomp, tx);
1903 1902                          dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
1904 1903                              -used, -comp, -uncomp, tx);
1905 1904                          dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
1906 1905                              used, comp, uncomp, tx);
1907 1906                  }
1908 1907  
1909 1908                  if (ds->ds_prev != NULL) {
1910 1909                          if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
1911      -                                VERIFY3U(0, ==, zap_remove_int(mos,
     1910 +                                VERIFY0(zap_remove_int(mos,
1912 1911                                      ds->ds_prev->ds_dir->dd_phys->dd_clones,
1913 1912                                      ds->ds_object, tx));
1914 1913                          }
1915 1914                          dsl_dataset_rele(ds->ds_prev, ds);
1916 1915                          ds->ds_prev = ds_prev = NULL;
1917 1916                  }
1918 1917          }
1919 1918  
1920 1919          /*
1921 1920           * This must be done after the dsl_traverse(), because it will

1922 1921           * re-open the objset.
1923 1922           */
1924 1923          if (ds->ds_objset) {
1925 1924                  dmu_objset_evict(ds->ds_objset);
1926 1925                  ds->ds_objset = NULL;
1927 1926          }
1928 1927  
1929 1928          if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
1930 1929                  /* Erase the link in the dir */
1931 1930                  dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
1932 1931                  ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
1933 1932                  ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
1934 1933                  err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1935 1934                  ASSERT(err == 0);
1936 1935          } else {
1937 1936                  /* remove from snapshot namespace */
1938 1937                  dsl_dataset_t *ds_head;

↓ open down ↓

17 lines elided

↑ open up ↑

1939 1938                  ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
1940 1939                  VERIFY(0 == dsl_dataset_hold_obj(dp,
1941 1940                      ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
1942 1941                  VERIFY(0 == dsl_dataset_get_snapname(ds));
1943 1942  #ifdef ZFS_DEBUG
1944 1943                  {
1945 1944                          uint64_t val;
1946 1945  
1947 1946                          err = dsl_dataset_snap_lookup(ds_head,
1948 1947                              ds->ds_snapname, &val);
1949      -                        ASSERT3U(err, ==, 0);
     1948 +                        ASSERT0(err);
1950 1949                          ASSERT3U(val, ==, obj);
1951 1950                  }
1952 1951  #endif
1953 1952                  err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);
1954 1953                  ASSERT(err == 0);
1955 1954                  dsl_dataset_rele(ds_head, FTAG);
1956 1955          }
1957 1956  
1958 1957          if (ds_prev && ds->ds_prev != ds_prev)
1959 1958                  dsl_dataset_rele(ds_prev, FTAG);

1960 1959  
1961 1960          spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
1962 1961  
1963 1962          if (ds->ds_phys->ds_next_clones_obj != 0) {
1964 1963                  uint64_t count;
1965 1964                  ASSERT(0 == zap_count(mos,
1966 1965                      ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
1967 1966                  VERIFY(0 == dmu_object_free(mos,
1968 1967                      ds->ds_phys->ds_next_clones_obj, tx));
1969 1968          }
1970 1969          if (ds->ds_phys->ds_props_obj != 0)
1971 1970                  VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
1972 1971          if (ds->ds_phys->ds_userrefs_obj != 0)
1973 1972                  VERIFY(0 == zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
1974 1973          dsl_dir_close(ds->ds_dir, ds);
1975 1974          ds->ds_dir = NULL;
1976 1975          dsl_dataset_drain_refs(ds, tag);
1977 1976          VERIFY(0 == dmu_object_free(mos, obj, tx));
1978 1977  
1979 1978          if (dsda->rm_origin) {
1980 1979                  /*
1981 1980                   * Remove the origin of the clone we just destroyed.
1982 1981                   */
1983 1982                  struct dsl_ds_destroyarg ndsda = {0};
1984 1983  
1985 1984                  ndsda.ds = dsda->rm_origin;
1986 1985                  dsl_dataset_destroy_sync(&ndsda, tag, tx);
1987 1986          }
1988 1987  }
1989 1988  
1990 1989  static int
1991 1990  dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
1992 1991  {
1993 1992          uint64_t asize;
1994 1993  
1995 1994          if (!dmu_tx_is_syncing(tx))
1996 1995                  return (0);
1997 1996  
1998 1997          /*
1999 1998           * If there's an fs-only reservation, any blocks that might become
2000 1999           * owned by the snapshot dataset must be accommodated by space
2001 2000           * outside of the reservation.
2002 2001           */
2003 2002          ASSERT(ds->ds_reserved == 0 || DS_UNIQUE_IS_ACCURATE(ds));
2004 2003          asize = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
2005 2004          if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
2006 2005                  return (ENOSPC);
2007 2006  
2008 2007          /*
2009 2008           * Propagate any reserved space for this snapshot to other
2010 2009           * snapshot checks in this sync group.
2011 2010           */
2012 2011          if (asize > 0)
2013 2012                  dsl_dir_willuse_space(ds->ds_dir, asize, tx);
2014 2013  
2015 2014          return (0);
2016 2015  }
2017 2016  
2018 2017  int
2019 2018  dsl_dataset_snapshot_check(dsl_dataset_t *ds, const char *snapname,
2020 2019      dmu_tx_t *tx)
2021 2020  {
2022 2021          int err;
2023 2022          uint64_t value;
2024 2023  
2025 2024          /*
2026 2025           * We don't allow multiple snapshots of the same txg.  If there
2027 2026           * is already one, try again.
2028 2027           */
2029 2028          if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
2030 2029                  return (EAGAIN);
2031 2030  
2032 2031          /*
2033 2032           * Check for conflicting snapshot name.
2034 2033           */
2035 2034          err = dsl_dataset_snap_lookup(ds, snapname, &value);
2036 2035          if (err == 0)
2037 2036                  return (EEXIST);
2038 2037          if (err != ENOENT)
2039 2038                  return (err);
2040 2039  
2041 2040          /*
2042 2041           * Check that the dataset's name is not too long.  Name consists
2043 2042           * of the dataset's length + 1 for the @-sign + snapshot name's length
2044 2043           */
2045 2044          if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
2046 2045                  return (ENAMETOOLONG);
2047 2046  
2048 2047          err = dsl_dataset_snapshot_reserve_space(ds, tx);
2049 2048          if (err)
2050 2049                  return (err);
2051 2050  
2052 2051          ds->ds_trysnap_txg = tx->tx_txg;
2053 2052          return (0);
2054 2053  }
2055 2054  
2056 2055  void
2057 2056  dsl_dataset_snapshot_sync(dsl_dataset_t *ds, const char *snapname,
2058 2057      dmu_tx_t *tx)
2059 2058  {
2060 2059          dsl_pool_t *dp = ds->ds_dir->dd_pool;
2061 2060          dmu_buf_t *dbuf;
2062 2061          dsl_dataset_phys_t *dsphys;
2063 2062          uint64_t dsobj, crtxg;
2064 2063          objset_t *mos = dp->dp_meta_objset;
2065 2064          int err;
2066 2065  
2067 2066          ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
2068 2067  
2069 2068          /*
2070 2069           * The origin's ds_creation_txg has to be < TXG_INITIAL
2071 2070           */
2072 2071          if (strcmp(snapname, ORIGIN_DIR_NAME) == 0)
2073 2072                  crtxg = 1;
2074 2073          else
2075 2074                  crtxg = tx->tx_txg;
2076 2075  
2077 2076          dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
2078 2077              DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
2079 2078          VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
2080 2079          dmu_buf_will_dirty(dbuf, tx);
2081 2080          dsphys = dbuf->db_data;
2082 2081          bzero(dsphys, sizeof (dsl_dataset_phys_t));
2083 2082          dsphys->ds_dir_obj = ds->ds_dir->dd_object;
2084 2083          dsphys->ds_fsid_guid = unique_create();
2085 2084          (void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
2086 2085              sizeof (dsphys->ds_guid));
2087 2086          dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
2088 2087          dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
2089 2088          dsphys->ds_next_snap_obj = ds->ds_object;
2090 2089          dsphys->ds_num_children = 1;
2091 2090          dsphys->ds_creation_time = gethrestime_sec();
2092 2091          dsphys->ds_creation_txg = crtxg;
2093 2092          dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
2094 2093          dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes;
2095 2094          dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
2096 2095          dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
2097 2096          dsphys->ds_flags = ds->ds_phys->ds_flags;
2098 2097          dsphys->ds_bp = ds->ds_phys->ds_bp;
2099 2098          dmu_buf_rele(dbuf, FTAG);
2100 2099  
2101 2100          ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
2102 2101          if (ds->ds_prev) {
2103 2102                  uint64_t next_clones_obj =
2104 2103                      ds->ds_prev->ds_phys->ds_next_clones_obj;
2105 2104                  ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==

↓ open down ↓

146 lines elided

↑ open up ↑

2106 2105                      ds->ds_object ||
2107 2106                      ds->ds_prev->ds_phys->ds_num_children > 1);
2108 2107                  if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
2109 2108                          dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
2110 2109                          ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
2111 2110                              ds->ds_prev->ds_phys->ds_creation_txg);
2112 2111                          ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
2113 2112                  } else if (next_clones_obj != 0) {
2114 2113                          remove_from_next_clones(ds->ds_prev,
2115 2114                              dsphys->ds_next_snap_obj, tx);
2116      -                        VERIFY3U(0, ==, zap_add_int(mos,
     2115 +                        VERIFY0(zap_add_int(mos,
2117 2116                              next_clones_obj, dsobj, tx));
2118 2117                  }
2119 2118          }
2120 2119  
2121 2120          /*
2122 2121           * If we have a reference-reservation on this dataset, we will
2123 2122           * need to increase the amount of refreservation being charged
2124 2123           * since our unique space is going to zero.
2125 2124           */
2126 2125          if (ds->ds_reserved) {

2127 2126                  int64_t delta;
2128 2127                  ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
2129 2128                  delta = MIN(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
2130 2129                  dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV,
2131 2130                      delta, 0, 0, tx);
2132 2131          }
2133 2132  
2134 2133          dmu_buf_will_dirty(ds->ds_dbuf, tx);
2135 2134          zfs_dbgmsg("taking snapshot %s@%s/%llu; newkey=%llu",
2136 2135              ds->ds_dir->dd_myname, snapname, dsobj,
2137 2136              ds->ds_phys->ds_prev_snap_txg);
2138 2137          ds->ds_phys->ds_deadlist_obj = dsl_deadlist_clone(&ds->ds_deadlist,
2139 2138              UINT64_MAX, ds->ds_phys->ds_prev_snap_obj, tx);
2140 2139          dsl_deadlist_close(&ds->ds_deadlist);
2141 2140          dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
2142 2141          dsl_deadlist_add_key(&ds->ds_deadlist,
2143 2142              ds->ds_phys->ds_prev_snap_txg, tx);
2144 2143  
2145 2144          ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg);
2146 2145          ds->ds_phys->ds_prev_snap_obj = dsobj;
2147 2146          ds->ds_phys->ds_prev_snap_txg = crtxg;
2148 2147          ds->ds_phys->ds_unique_bytes = 0;
2149 2148          if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
2150 2149                  ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
2151 2150  
2152 2151          err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
2153 2152              snapname, 8, 1, &dsobj, tx);
2154 2153          ASSERT(err == 0);
2155 2154  
2156 2155          if (ds->ds_prev)
2157 2156                  dsl_dataset_drop_ref(ds->ds_prev, ds);
2158 2157          VERIFY(0 == dsl_dataset_get_ref(dp,
2159 2158              ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
2160 2159  
2161 2160          dsl_scan_ds_snapshotted(ds, tx);
2162 2161  
2163 2162          dsl_dir_snap_cmtime_update(ds->ds_dir);
2164 2163  
2165 2164          spa_history_log_internal_ds(ds->ds_prev, "snapshot", tx, "");
2166 2165  }
2167 2166  
2168 2167  void
2169 2168  dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
2170 2169  {
2171 2170          ASSERT(dmu_tx_is_syncing(tx));
2172 2171          ASSERT(ds->ds_objset != NULL);
2173 2172          ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
2174 2173  
2175 2174          /*
2176 2175           * in case we had to change ds_fsid_guid when we opened it,
2177 2176           * sync it out now.
2178 2177           */
2179 2178          dmu_buf_will_dirty(ds->ds_dbuf, tx);
2180 2179          ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
2181 2180  
2182 2181          dsl_dir_dirty(ds->ds_dir, tx);
2183 2182          dmu_objset_sync(ds->ds_objset, zio, tx);
2184 2183  }
2185 2184  
2186 2185  static void
2187 2186  get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
2188 2187  {
2189 2188          uint64_t count = 0;
2190 2189          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
2191 2190          zap_cursor_t zc;
2192 2191          zap_attribute_t za;
2193 2192          nvlist_t *propval;
2194 2193          nvlist_t *val;
2195 2194

↓ open down ↓

69 lines elided

↑ open up ↑

2196 2195          rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
2197 2196          VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2198 2197          VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2199 2198  
2200 2199          /*
2201 2200           * There may me missing entries in ds_next_clones_obj
2202 2201           * due to a bug in a previous version of the code.
2203 2202           * Only trust it if it has the right number of entries.
2204 2203           */
2205 2204          if (ds->ds_phys->ds_next_clones_obj != 0) {
2206      -                ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
     2205 +                ASSERT0(zap_count(mos, ds->ds_phys->ds_next_clones_obj,
2207 2206                      &count));
2208 2207          }
2209 2208          if (count != ds->ds_phys->ds_num_children - 1) {
2210 2209                  goto fail;
2211 2210          }
2212 2211          for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
2213 2212              zap_cursor_retrieve(&zc, &za) == 0;
2214 2213              zap_cursor_advance(&zc)) {
2215 2214                  dsl_dataset_t *clone;
2216 2215                  char buf[ZFS_MAXNAMELEN];

2217 2216                  /*
2218 2217                   * Even though we hold the dp_config_rwlock, the dataset
2219 2218                   * may fail to open, returning ENOENT.  If there is a
2220 2219                   * thread concurrently attempting to destroy this
2221 2220                   * dataset, it will have the ds_rwlock held for
2222 2221                   * RW_WRITER.  Our call to dsl_dataset_hold_obj() ->
2223 2222                   * dsl_dataset_hold_ref() will fail its
2224 2223                   * rw_tryenter(&ds->ds_rwlock, RW_READER), drop the
2225 2224                   * dp_config_rwlock, and wait for the destroy progress
2226 2225                   * and signal ds_exclusive_cv.  If the destroy was
2227 2226                   * successful, we will see that
2228 2227                   * DSL_DATASET_IS_DESTROYED(), and return ENOENT.
2229 2228                   */
2230 2229                  if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
2231 2230                      za.za_first_integer, FTAG, &clone) != 0)
2232 2231                          continue;
2233 2232                  dsl_dir_name(clone->ds_dir, buf);
2234 2233                  VERIFY(nvlist_add_boolean(val, buf) == 0);
2235 2234                  dsl_dataset_rele(clone, FTAG);
2236 2235          }
2237 2236          zap_cursor_fini(&zc);
2238 2237          VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0);
2239 2238          VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),
2240 2239              propval) == 0);
2241 2240  fail:
2242 2241          nvlist_free(val);
2243 2242          nvlist_free(propval);
2244 2243          rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
2245 2244  }
2246 2245  
2247 2246  void
2248 2247  dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
2249 2248  {
2250 2249          uint64_t refd, avail, uobjs, aobjs, ratio;
2251 2250  
2252 2251          ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
2253 2252              (ds->ds_phys->ds_uncompressed_bytes * 100 /
2254 2253              ds->ds_phys->ds_compressed_bytes);
2255 2254  
2256 2255          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
2257 2256  
2258 2257          if (dsl_dataset_is_snapshot(ds)) {
2259 2258                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
2260 2259                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
2261 2260                      ds->ds_phys->ds_unique_bytes);
2262 2261                  get_clones_stat(ds, nv);
2263 2262          } else {
2264 2263                  dsl_dir_stats(ds->ds_dir, nv);
2265 2264          }
2266 2265  
2267 2266          dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
2268 2267          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
2269 2268          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
2270 2269  
2271 2270          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
2272 2271              ds->ds_phys->ds_creation_time);
2273 2272          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
2274 2273              ds->ds_phys->ds_creation_txg);
2275 2274          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
2276 2275              ds->ds_quota);
2277 2276          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
2278 2277              ds->ds_reserved);
2279 2278          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
2280 2279              ds->ds_phys->ds_guid);
2281 2280          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_UNIQUE,
2282 2281              ds->ds_phys->ds_unique_bytes);
2283 2282          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_OBJSETID,
2284 2283              ds->ds_object);
2285 2284          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USERREFS,
2286 2285              ds->ds_userrefs);
2287 2286          dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
2288 2287              DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
2289 2288  
2290 2289          if (ds->ds_phys->ds_prev_snap_obj != 0) {
2291 2290                  uint64_t written, comp, uncomp;
2292 2291                  dsl_pool_t *dp = ds->ds_dir->dd_pool;
2293 2292                  dsl_dataset_t *prev;
2294 2293  
2295 2294                  rw_enter(&dp->dp_config_rwlock, RW_READER);
2296 2295                  int err = dsl_dataset_hold_obj(dp,
2297 2296                      ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
2298 2297                  rw_exit(&dp->dp_config_rwlock);
2299 2298                  if (err == 0) {
2300 2299                          err = dsl_dataset_space_written(prev, ds, &written,
2301 2300                              &comp, &uncomp);
2302 2301                          dsl_dataset_rele(prev, FTAG);
2303 2302                          if (err == 0) {
2304 2303                                  dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
2305 2304                                      written);
2306 2305                          }
2307 2306                  }
2308 2307          }
2309 2308  
2310 2309  }
2311 2310  
2312 2311  void
2313 2312  dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
2314 2313  {
2315 2314          stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
2316 2315          stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
2317 2316          stat->dds_guid = ds->ds_phys->ds_guid;
2318 2317          stat->dds_origin[0] = '\0';
2319 2318          if (dsl_dataset_is_snapshot(ds)) {
2320 2319                  stat->dds_is_snapshot = B_TRUE;
2321 2320                  stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
2322 2321          } else {
2323 2322                  stat->dds_is_snapshot = B_FALSE;
2324 2323                  stat->dds_num_clones = 0;
2325 2324  
2326 2325                  rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
2327 2326                  if (dsl_dir_is_clone(ds->ds_dir)) {
2328 2327                          dsl_dataset_t *ods;
2329 2328  
2330 2329                          VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool,
2331 2330                              ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
2332 2331                          dsl_dataset_name(ods, stat->dds_origin);
2333 2332                          dsl_dataset_drop_ref(ods, FTAG);
2334 2333                  }
2335 2334                  rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
2336 2335          }
2337 2336  }
2338 2337  
2339 2338  uint64_t
2340 2339  dsl_dataset_fsid_guid(dsl_dataset_t *ds)
2341 2340  {
2342 2341          return (ds->ds_fsid_guid);
2343 2342  }
2344 2343  
2345 2344  void
2346 2345  dsl_dataset_space(dsl_dataset_t *ds,
2347 2346      uint64_t *refdbytesp, uint64_t *availbytesp,
2348 2347      uint64_t *usedobjsp, uint64_t *availobjsp)
2349 2348  {
2350 2349          *refdbytesp = ds->ds_phys->ds_referenced_bytes;
2351 2350          *availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
2352 2351          if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
2353 2352                  *availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
2354 2353          if (ds->ds_quota != 0) {
2355 2354                  /*
2356 2355                   * Adjust available bytes according to refquota
2357 2356                   */
2358 2357                  if (*refdbytesp < ds->ds_quota)
2359 2358                          *availbytesp = MIN(*availbytesp,
2360 2359                              ds->ds_quota - *refdbytesp);
2361 2360                  else
2362 2361                          *availbytesp = 0;
2363 2362          }
2364 2363          *usedobjsp = ds->ds_phys->ds_bp.blk_fill;
2365 2364          *availobjsp = DN_MAX_OBJECT - *usedobjsp;
2366 2365  }
2367 2366  
2368 2367  boolean_t
2369 2368  dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
2370 2369  {
2371 2370          dsl_pool_t *dp = ds->ds_dir->dd_pool;
2372 2371  
2373 2372          ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
2374 2373              dsl_pool_sync_context(dp));
2375 2374          if (ds->ds_prev == NULL)
2376 2375                  return (B_FALSE);
2377 2376          if (ds->ds_phys->ds_bp.blk_birth >
2378 2377              ds->ds_prev->ds_phys->ds_creation_txg) {
2379 2378                  objset_t *os, *os_prev;
2380 2379                  /*
2381 2380                   * It may be that only the ZIL differs, because it was
2382 2381                   * reset in the head.  Don't count that as being
2383 2382                   * modified.
2384 2383                   */
2385 2384                  if (dmu_objset_from_ds(ds, &os) != 0)
2386 2385                          return (B_TRUE);
2387 2386                  if (dmu_objset_from_ds(ds->ds_prev, &os_prev) != 0)
2388 2387                          return (B_TRUE);
2389 2388                  return (bcmp(&os->os_phys->os_meta_dnode,
2390 2389                      &os_prev->os_phys->os_meta_dnode,
2391 2390                      sizeof (os->os_phys->os_meta_dnode)) != 0);
2392 2391          }
2393 2392          return (B_FALSE);
2394 2393  }
2395 2394  
2396 2395  /* ARGSUSED */
2397 2396  static int
2398 2397  dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
2399 2398  {
2400 2399          dsl_dataset_t *ds = arg1;
2401 2400          char *newsnapname = arg2;
2402 2401          dsl_dir_t *dd = ds->ds_dir;
2403 2402          dsl_dataset_t *hds;
2404 2403          uint64_t val;
2405 2404          int err;
2406 2405  
2407 2406          err = dsl_dataset_hold_obj(dd->dd_pool,
2408 2407              dd->dd_phys->dd_head_dataset_obj, FTAG, &hds);
2409 2408          if (err)
2410 2409                  return (err);
2411 2410  
2412 2411          /* new name better not be in use */
2413 2412          err = dsl_dataset_snap_lookup(hds, newsnapname, &val);
2414 2413          dsl_dataset_rele(hds, FTAG);
2415 2414  
2416 2415          if (err == 0)
2417 2416                  err = EEXIST;
2418 2417          else if (err == ENOENT)
2419 2418                  err = 0;
2420 2419  
2421 2420          /* dataset name + 1 for the "@" + the new snapshot name must fit */
2422 2421          if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
2423 2422                  err = ENAMETOOLONG;
2424 2423  
2425 2424          return (err);
2426 2425  }
2427 2426  
2428 2427  static void
2429 2428  dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
2430 2429  {
2431 2430          dsl_dataset_t *ds = arg1;
2432 2431          const char *newsnapname = arg2;
2433 2432          dsl_dir_t *dd = ds->ds_dir;
2434 2433          objset_t *mos = dd->dd_pool->dp_meta_objset;

↓ open down ↓

218 lines elided

↑ open up ↑

2435 2434          dsl_dataset_t *hds;
2436 2435          int err;
2437 2436  
2438 2437          ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
2439 2438  
2440 2439          VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
2441 2440              dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
2442 2441  
2443 2442          VERIFY(0 == dsl_dataset_get_snapname(ds));
2444 2443          err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
2445      -        ASSERT3U(err, ==, 0);
     2444 +        ASSERT0(err);
2446 2445          mutex_enter(&ds->ds_lock);
2447 2446          (void) strcpy(ds->ds_snapname, newsnapname);
2448 2447          mutex_exit(&ds->ds_lock);
2449 2448          err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
2450 2449              ds->ds_snapname, 8, 1, &ds->ds_object, tx);
2451      -        ASSERT3U(err, ==, 0);
     2450 +        ASSERT0(err);
2452 2451  
2453 2452          spa_history_log_internal_ds(ds, "rename", tx,
2454 2453              "-> @%s", newsnapname);
2455 2454          dsl_dataset_rele(hds, FTAG);
2456 2455  }
2457 2456  
2458 2457  struct renamesnaparg {
2459 2458          dsl_sync_task_group_t *dstg;
2460 2459          char failed[MAXPATHLEN];
2461 2460          char *oldsnap;

2462 2461          char *newsnap;
2463 2462  };
2464 2463  
2465 2464  static int
2466 2465  dsl_snapshot_rename_one(const char *name, void *arg)
2467 2466  {
2468 2467          struct renamesnaparg *ra = arg;
2469 2468          dsl_dataset_t *ds = NULL;
2470 2469          char *snapname;
2471 2470          int err;
2472 2471  
2473 2472          snapname = kmem_asprintf("%s@%s", name, ra->oldsnap);
2474 2473          (void) strlcpy(ra->failed, snapname, sizeof (ra->failed));
2475 2474  
2476 2475          /*
2477 2476           * For recursive snapshot renames the parent won't be changing
2478 2477           * so we just pass name for both the to/from argument.
2479 2478           */
2480 2479          err = zfs_secpolicy_rename_perms(snapname, snapname, CRED());
2481 2480          if (err != 0) {
2482 2481                  strfree(snapname);
2483 2482                  return (err == ENOENT ? 0 : err);
2484 2483          }
2485 2484  
2486 2485  #ifdef _KERNEL
2487 2486          /*
2488 2487           * For all filesystems undergoing rename, we'll need to unmount it.
2489 2488           */
2490 2489          (void) zfs_unmount_snap(snapname, NULL);
2491 2490  #endif
2492 2491          err = dsl_dataset_hold(snapname, ra->dstg, &ds);
2493 2492          strfree(snapname);
2494 2493          if (err != 0)
2495 2494                  return (err == ENOENT ? 0 : err);
2496 2495  
2497 2496          dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
2498 2497              dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
2499 2498  
2500 2499          return (0);
2501 2500  }
2502 2501  
2503 2502  static int
2504 2503  dsl_recursive_rename(char *oldname, const char *newname)
2505 2504  {
2506 2505          int err;
2507 2506          struct renamesnaparg *ra;
2508 2507          dsl_sync_task_t *dst;
2509 2508          spa_t *spa;
2510 2509          char *cp, *fsname = spa_strdup(oldname);
2511 2510          int len = strlen(oldname) + 1;
2512 2511  
2513 2512          /* truncate the snapshot name to get the fsname */
2514 2513          cp = strchr(fsname, '@');
2515 2514          *cp = '\0';
2516 2515  
2517 2516          err = spa_open(fsname, &spa, FTAG);
2518 2517          if (err) {
2519 2518                  kmem_free(fsname, len);
2520 2519                  return (err);
2521 2520          }
2522 2521          ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP);
2523 2522          ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
2524 2523  
2525 2524          ra->oldsnap = strchr(oldname, '@') + 1;
2526 2525          ra->newsnap = strchr(newname, '@') + 1;
2527 2526          *ra->failed = '\0';
2528 2527  
2529 2528          err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
2530 2529              DS_FIND_CHILDREN);
2531 2530          kmem_free(fsname, len);
2532 2531  
2533 2532          if (err == 0) {
2534 2533                  err = dsl_sync_task_group_wait(ra->dstg);
2535 2534          }
2536 2535  
2537 2536          for (dst = list_head(&ra->dstg->dstg_tasks); dst;
2538 2537              dst = list_next(&ra->dstg->dstg_tasks, dst)) {
2539 2538                  dsl_dataset_t *ds = dst->dst_arg1;
2540 2539                  if (dst->dst_err) {
2541 2540                          dsl_dir_name(ds->ds_dir, ra->failed);
2542 2541                          (void) strlcat(ra->failed, "@", sizeof (ra->failed));
2543 2542                          (void) strlcat(ra->failed, ra->newsnap,
2544 2543                              sizeof (ra->failed));
2545 2544                  }
2546 2545                  dsl_dataset_rele(ds, ra->dstg);
2547 2546          }
2548 2547  
2549 2548          if (err)
2550 2549                  (void) strlcpy(oldname, ra->failed, sizeof (ra->failed));
2551 2550  
2552 2551          dsl_sync_task_group_destroy(ra->dstg);
2553 2552          kmem_free(ra, sizeof (struct renamesnaparg));
2554 2553          spa_close(spa, FTAG);
2555 2554          return (err);
2556 2555  }
2557 2556  
2558 2557  static int
2559 2558  dsl_valid_rename(const char *oldname, void *arg)
2560 2559  {
2561 2560          int delta = *(int *)arg;
2562 2561  
2563 2562          if (strlen(oldname) + delta >= MAXNAMELEN)
2564 2563                  return (ENAMETOOLONG);
2565 2564  
2566 2565          return (0);
2567 2566  }
2568 2567  
2569 2568  #pragma weak dmu_objset_rename = dsl_dataset_rename
2570 2569  int
2571 2570  dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive)
2572 2571  {
2573 2572          dsl_dir_t *dd;
2574 2573          dsl_dataset_t *ds;
2575 2574          const char *tail;
2576 2575          int err;
2577 2576  
2578 2577          err = dsl_dir_open(oldname, FTAG, &dd, &tail);
2579 2578          if (err)
2580 2579                  return (err);
2581 2580  
2582 2581          if (tail == NULL) {
2583 2582                  int delta = strlen(newname) - strlen(oldname);
2584 2583  
2585 2584                  /* if we're growing, validate child name lengths */
2586 2585                  if (delta > 0)
2587 2586                          err = dmu_objset_find(oldname, dsl_valid_rename,
2588 2587                              &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
2589 2588  
2590 2589                  if (err == 0)
2591 2590                          err = dsl_dir_rename(dd, newname);
2592 2591                  dsl_dir_close(dd, FTAG);
2593 2592                  return (err);
2594 2593          }
2595 2594  
2596 2595          if (tail[0] != '@') {
2597 2596                  /* the name ended in a nonexistent component */
2598 2597                  dsl_dir_close(dd, FTAG);
2599 2598                  return (ENOENT);
2600 2599          }
2601 2600  
2602 2601          dsl_dir_close(dd, FTAG);
2603 2602  
2604 2603          /* new name must be snapshot in same filesystem */
2605 2604          tail = strchr(newname, '@');
2606 2605          if (tail == NULL)
2607 2606                  return (EINVAL);
2608 2607          tail++;
2609 2608          if (strncmp(oldname, newname, tail - newname) != 0)
2610 2609                  return (EXDEV);
2611 2610  
2612 2611          if (recursive) {
2613 2612                  err = dsl_recursive_rename(oldname, newname);
2614 2613          } else {
2615 2614                  err = dsl_dataset_hold(oldname, FTAG, &ds);
2616 2615                  if (err)
2617 2616                          return (err);
2618 2617  
2619 2618                  err = dsl_sync_task_do(ds->ds_dir->dd_pool,
2620 2619                      dsl_dataset_snapshot_rename_check,
2621 2620                      dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
2622 2621  
2623 2622                  dsl_dataset_rele(ds, FTAG);
2624 2623          }
2625 2624  
2626 2625          return (err);
2627 2626  }
2628 2627  
2629 2628  struct promotenode {
2630 2629          list_node_t link;
2631 2630          dsl_dataset_t *ds;
2632 2631  };
2633 2632  
2634 2633  struct promotearg {
2635 2634          list_t shared_snaps, origin_snaps, clone_snaps;
2636 2635          dsl_dataset_t *origin_origin;
2637 2636          uint64_t used, comp, uncomp, unique, cloneusedsnap, originusedsnap;
2638 2637          char *err_ds;
2639 2638  };
2640 2639  
2641 2640  static int snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep);
2642 2641  static boolean_t snaplist_unstable(list_t *l);
2643 2642  
2644 2643  static int
2645 2644  dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
2646 2645  {
2647 2646          dsl_dataset_t *hds = arg1;
2648 2647          struct promotearg *pa = arg2;
2649 2648          struct promotenode *snap = list_head(&pa->shared_snaps);
2650 2649          dsl_dataset_t *origin_ds = snap->ds;
2651 2650          int err;
2652 2651          uint64_t unused;
2653 2652  
2654 2653          /* Check that it is a real clone */
2655 2654          if (!dsl_dir_is_clone(hds->ds_dir))
2656 2655                  return (EINVAL);
2657 2656  
2658 2657          /* Since this is so expensive, don't do the preliminary check */
2659 2658          if (!dmu_tx_is_syncing(tx))
2660 2659                  return (0);
2661 2660  
2662 2661          if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)
2663 2662                  return (EXDEV);
2664 2663  
2665 2664          /* compute origin's new unique space */
2666 2665          snap = list_tail(&pa->clone_snaps);
2667 2666          ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
2668 2667          dsl_deadlist_space_range(&snap->ds->ds_deadlist,
2669 2668              origin_ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
2670 2669              &pa->unique, &unused, &unused);
2671 2670  
2672 2671          /*
2673 2672           * Walk the snapshots that we are moving
2674 2673           *
2675 2674           * Compute space to transfer.  Consider the incremental changes
2676 2675           * to used for each snapshot:
2677 2676           * (my used) = (prev's used) + (blocks born) - (blocks killed)
2678 2677           * So each snapshot gave birth to:
2679 2678           * (blocks born) = (my used) - (prev's used) + (blocks killed)
2680 2679           * So a sequence would look like:
2681 2680           * (uN - u(N-1) + kN) + ... + (u1 - u0 + k1) + (u0 - 0 + k0)
2682 2681           * Which simplifies to:
2683 2682           * uN + kN + kN-1 + ... + k1 + k0
2684 2683           * Note however, if we stop before we reach the ORIGIN we get:
2685 2684           * uN + kN + kN-1 + ... + kM - uM-1
2686 2685           */
2687 2686          pa->used = origin_ds->ds_phys->ds_referenced_bytes;
2688 2687          pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
2689 2688          pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
2690 2689          for (snap = list_head(&pa->shared_snaps); snap;
2691 2690              snap = list_next(&pa->shared_snaps, snap)) {
2692 2691                  uint64_t val, dlused, dlcomp, dluncomp;
2693 2692                  dsl_dataset_t *ds = snap->ds;
2694 2693  
2695 2694                  /* Check that the snapshot name does not conflict */
2696 2695                  VERIFY(0 == dsl_dataset_get_snapname(ds));
2697 2696                  err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
2698 2697                  if (err == 0) {
2699 2698                          err = EEXIST;
2700 2699                          goto out;
2701 2700                  }
2702 2701                  if (err != ENOENT)
2703 2702                          goto out;
2704 2703  
2705 2704                  /* The very first snapshot does not have a deadlist */
2706 2705                  if (ds->ds_phys->ds_prev_snap_obj == 0)
2707 2706                          continue;
2708 2707  
2709 2708                  dsl_deadlist_space(&ds->ds_deadlist,
2710 2709                      &dlused, &dlcomp, &dluncomp);
2711 2710                  pa->used += dlused;
2712 2711                  pa->comp += dlcomp;
2713 2712                  pa->uncomp += dluncomp;
2714 2713          }
2715 2714  
2716 2715          /*
2717 2716           * If we are a clone of a clone then we never reached ORIGIN,
2718 2717           * so we need to subtract out the clone origin's used space.
2719 2718           */
2720 2719          if (pa->origin_origin) {
2721 2720                  pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
2722 2721                  pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
2723 2722                  pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
2724 2723          }
2725 2724  
2726 2725          /* Check that there is enough space here */
2727 2726          err = dsl_dir_transfer_possible(origin_ds->ds_dir, hds->ds_dir,
2728 2727              pa->used);
2729 2728          if (err)
2730 2729                  return (err);
2731 2730  
2732 2731          /*
2733 2732           * Compute the amounts of space that will be used by snapshots
2734 2733           * after the promotion (for both origin and clone).  For each,
2735 2734           * it is the amount of space that will be on all of their
2736 2735           * deadlists (that was not born before their new origin).
2737 2736           */
2738 2737          if (hds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
2739 2738                  uint64_t space;
2740 2739  
2741 2740                  /*
2742 2741                   * Note, typically this will not be a clone of a clone,
2743 2742                   * so dd_origin_txg will be < TXG_INITIAL, so
2744 2743                   * these snaplist_space() -> dsl_deadlist_space_range()
2745 2744                   * calls will be fast because they do not have to
2746 2745                   * iterate over all bps.
2747 2746                   */
2748 2747                  snap = list_head(&pa->origin_snaps);
2749 2748                  err = snaplist_space(&pa->shared_snaps,
2750 2749                      snap->ds->ds_dir->dd_origin_txg, &pa->cloneusedsnap);
2751 2750                  if (err)
2752 2751                          return (err);
2753 2752  
2754 2753                  err = snaplist_space(&pa->clone_snaps,
2755 2754                      snap->ds->ds_dir->dd_origin_txg, &space);
2756 2755                  if (err)
2757 2756                          return (err);
2758 2757                  pa->cloneusedsnap += space;
2759 2758          }
2760 2759          if (origin_ds->ds_dir->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
2761 2760                  err = snaplist_space(&pa->origin_snaps,
2762 2761                      origin_ds->ds_phys->ds_creation_txg, &pa->originusedsnap);
2763 2762                  if (err)
2764 2763                          return (err);
2765 2764          }
2766 2765  
2767 2766          return (0);
2768 2767  out:
2769 2768          pa->err_ds =  snap->ds->ds_snapname;
2770 2769          return (err);
2771 2770  }
2772 2771  
2773 2772  static void
2774 2773  dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
2775 2774  {
2776 2775          dsl_dataset_t *hds = arg1;
2777 2776          struct promotearg *pa = arg2;
2778 2777          struct promotenode *snap = list_head(&pa->shared_snaps);
2779 2778          dsl_dataset_t *origin_ds = snap->ds;
2780 2779          dsl_dataset_t *origin_head;
2781 2780          dsl_dir_t *dd = hds->ds_dir;
2782 2781          dsl_pool_t *dp = hds->ds_dir->dd_pool;
2783 2782          dsl_dir_t *odd = NULL;
2784 2783          uint64_t oldnext_obj;
2785 2784          int64_t delta;
2786 2785  
2787 2786          ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
2788 2787  
2789 2788          snap = list_head(&pa->origin_snaps);
2790 2789          origin_head = snap->ds;
2791 2790  
2792 2791          /*
2793 2792           * We need to explicitly open odd, since origin_ds's dd will be
2794 2793           * changing.
2795 2794           */
2796 2795          VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
2797 2796              NULL, FTAG, &odd));
2798 2797

↓ open down ↓

337 lines elided

↑ open up ↑

2799 2798          /* change origin's next snap */
2800 2799          dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
2801 2800          oldnext_obj = origin_ds->ds_phys->ds_next_snap_obj;
2802 2801          snap = list_tail(&pa->clone_snaps);
2803 2802          ASSERT3U(snap->ds->ds_phys->ds_prev_snap_obj, ==, origin_ds->ds_object);
2804 2803          origin_ds->ds_phys->ds_next_snap_obj = snap->ds->ds_object;
2805 2804  
2806 2805          /* change the origin's next clone */
2807 2806          if (origin_ds->ds_phys->ds_next_clones_obj) {
2808 2807                  remove_from_next_clones(origin_ds, snap->ds->ds_object, tx);
2809      -                VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
     2808 +                VERIFY0(zap_add_int(dp->dp_meta_objset,
2810 2809                      origin_ds->ds_phys->ds_next_clones_obj,
2811 2810                      oldnext_obj, tx));
2812 2811          }
2813 2812  
2814 2813          /* change origin */
2815 2814          dmu_buf_will_dirty(dd->dd_dbuf, tx);
2816 2815          ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
2817 2816          dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
2818 2817          dd->dd_origin_txg = origin_head->ds_dir->dd_origin_txg;
2819 2818          dmu_buf_will_dirty(odd->dd_dbuf, tx);
2820 2819          odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
2821 2820          origin_head->ds_dir->dd_origin_txg =
2822 2821              origin_ds->ds_phys->ds_creation_txg;
2823 2822  
2824 2823          /* change dd_clone entries */
2825 2824          if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
2826      -                VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
     2825 +                VERIFY0(zap_remove_int(dp->dp_meta_objset,
2827 2826                      odd->dd_phys->dd_clones, hds->ds_object, tx));
2828      -                VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
     2827 +                VERIFY0(zap_add_int(dp->dp_meta_objset,
2829 2828                      pa->origin_origin->ds_dir->dd_phys->dd_clones,
2830 2829                      hds->ds_object, tx));
2831 2830  
2832      -                VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
     2831 +                VERIFY0(zap_remove_int(dp->dp_meta_objset,
2833 2832                      pa->origin_origin->ds_dir->dd_phys->dd_clones,
2834 2833                      origin_head->ds_object, tx));
2835 2834                  if (dd->dd_phys->dd_clones == 0) {
2836 2835                          dd->dd_phys->dd_clones = zap_create(dp->dp_meta_objset,
2837 2836                              DMU_OT_DSL_CLONES, DMU_OT_NONE, 0, tx);
2838 2837                  }
2839      -                VERIFY3U(0, ==, zap_add_int(dp->dp_meta_objset,
     2838 +                VERIFY0(zap_add_int(dp->dp_meta_objset,
2840 2839                      dd->dd_phys->dd_clones, origin_head->ds_object, tx));
2841 2840  
2842 2841          }
2843 2842  
2844 2843          /* move snapshots to this dir */
2845 2844          for (snap = list_head(&pa->shared_snaps); snap;
2846 2845              snap = list_next(&pa->shared_snaps, snap)) {
2847 2846                  dsl_dataset_t *ds = snap->ds;
2848 2847  
2849 2848                  /* unregister props as dsl_dir is changing */

2850 2849                  if (ds->ds_objset) {
2851 2850                          dmu_objset_evict(ds->ds_objset);
2852 2851                          ds->ds_objset = NULL;
2853 2852                  }
2854 2853                  /* move snap name entry */
2855 2854                  VERIFY(0 == dsl_dataset_get_snapname(ds));
2856 2855                  VERIFY(0 == dsl_dataset_snap_remove(origin_head,
2857 2856                      ds->ds_snapname, tx));
2858 2857                  VERIFY(0 == zap_add(dp->dp_meta_objset,
2859 2858                      hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
2860 2859                      8, 1, &ds->ds_object, tx));
2861 2860  
2862 2861                  /* change containing dsl_dir */
2863 2862                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
2864 2863                  ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
2865 2864                  ds->ds_phys->ds_dir_obj = dd->dd_object;
2866 2865                  ASSERT3P(ds->ds_dir, ==, odd);
2867 2866                  dsl_dir_close(ds->ds_dir, ds);
2868 2867                  VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
2869 2868                      NULL, ds, &ds->ds_dir));
2870 2869  
2871 2870                  /* move any clone references */
2872 2871                  if (ds->ds_phys->ds_next_clones_obj &&
2873 2872                      spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
2874 2873                          zap_cursor_t zc;
2875 2874                          zap_attribute_t za;
2876 2875  
2877 2876                          for (zap_cursor_init(&zc, dp->dp_meta_objset,
2878 2877                              ds->ds_phys->ds_next_clones_obj);
2879 2878                              zap_cursor_retrieve(&zc, &za) == 0;
2880 2879                              zap_cursor_advance(&zc)) {
2881 2880                                  dsl_dataset_t *cnds;

↓ open down ↓

32 lines elided

↑ open up ↑

2882 2881                                  uint64_t o;
2883 2882  
2884 2883                                  if (za.za_first_integer == oldnext_obj) {
2885 2884                                          /*
2886 2885                                           * We've already moved the
2887 2886                                           * origin's reference.
2888 2887                                           */
2889 2888                                          continue;
2890 2889                                  }
2891 2890  
2892      -                                VERIFY3U(0, ==, dsl_dataset_hold_obj(dp,
     2891 +                                VERIFY0(dsl_dataset_hold_obj(dp,
2893 2892                                      za.za_first_integer, FTAG, &cnds));
2894 2893                                  o = cnds->ds_dir->dd_phys->dd_head_dataset_obj;
2895 2894  
2896 2895                                  VERIFY3U(zap_remove_int(dp->dp_meta_objset,
2897 2896                                      odd->dd_phys->dd_clones, o, tx), ==, 0);
2898 2897                                  VERIFY3U(zap_add_int(dp->dp_meta_objset,
2899 2898                                      dd->dd_phys->dd_clones, o, tx), ==, 0);
2900 2899                                  dsl_dataset_rele(cnds, FTAG);
2901 2900                          }
2902 2901                          zap_cursor_fini(&zc);
2903 2902                  }
2904 2903  
2905      -                ASSERT3U(dsl_prop_numcb(ds), ==, 0);
     2904 +                ASSERT0(dsl_prop_numcb(ds));
2906 2905          }
2907 2906  
2908 2907          /*
2909 2908           * Change space accounting.
2910 2909           * Note, pa->*usedsnap and dd_used_breakdown[SNAP] will either
2911 2910           * both be valid, or both be 0 (resulting in delta == 0).  This
2912 2911           * is true for each of {clone,origin} independently.
2913 2912           */
2914 2913  
2915 2914          delta = pa->cloneusedsnap -

2916 2915              dd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
2917 2916          ASSERT3S(delta, >=, 0);
2918 2917          ASSERT3U(pa->used, >=, delta);
2919 2918          dsl_dir_diduse_space(dd, DD_USED_SNAP, delta, 0, 0, tx);
2920 2919          dsl_dir_diduse_space(dd, DD_USED_HEAD,
2921 2920              pa->used - delta, pa->comp, pa->uncomp, tx);
2922 2921  
2923 2922          delta = pa->originusedsnap -
2924 2923              odd->dd_phys->dd_used_breakdown[DD_USED_SNAP];
2925 2924          ASSERT3S(delta, <=, 0);
2926 2925          ASSERT3U(pa->used, >=, -delta);
2927 2926          dsl_dir_diduse_space(odd, DD_USED_SNAP, delta, 0, 0, tx);
2928 2927          dsl_dir_diduse_space(odd, DD_USED_HEAD,
2929 2928              -pa->used - delta, -pa->comp, -pa->uncomp, tx);
2930 2929  
2931 2930          origin_ds->ds_phys->ds_unique_bytes = pa->unique;
2932 2931  
2933 2932          /* log history record */
2934 2933          spa_history_log_internal_ds(hds, "promote", tx, "");
2935 2934  
2936 2935          dsl_dir_close(odd, FTAG);
2937 2936  }
2938 2937  
2939 2938  static char *snaplist_tag = "snaplist";
2940 2939  /*
2941 2940   * Make a list of dsl_dataset_t's for the snapshots between first_obj
2942 2941   * (exclusive) and last_obj (inclusive).  The list will be in reverse
2943 2942   * order (last_obj will be the list_head()).  If first_obj == 0, do all
2944 2943   * snapshots back to this dataset's origin.
2945 2944   */
2946 2945  static int
2947 2946  snaplist_make(dsl_pool_t *dp, boolean_t own,
2948 2947      uint64_t first_obj, uint64_t last_obj, list_t *l)
2949 2948  {
2950 2949          uint64_t obj = last_obj;
2951 2950  
2952 2951          ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock));
2953 2952  
2954 2953          list_create(l, sizeof (struct promotenode),
2955 2954              offsetof(struct promotenode, link));
2956 2955  
2957 2956          while (obj != first_obj) {
2958 2957                  dsl_dataset_t *ds;
2959 2958                  struct promotenode *snap;
2960 2959                  int err;
2961 2960  
2962 2961                  if (own) {
2963 2962                          err = dsl_dataset_own_obj(dp, obj,
2964 2963                              0, snaplist_tag, &ds);
2965 2964                          if (err == 0)
2966 2965                                  dsl_dataset_make_exclusive(ds, snaplist_tag);
2967 2966                  } else {
2968 2967                          err = dsl_dataset_hold_obj(dp, obj, snaplist_tag, &ds);
2969 2968                  }
2970 2969                  if (err == ENOENT) {
2971 2970                          /* lost race with snapshot destroy */
2972 2971                          struct promotenode *last = list_tail(l);
2973 2972                          ASSERT(obj != last->ds->ds_phys->ds_prev_snap_obj);
2974 2973                          obj = last->ds->ds_phys->ds_prev_snap_obj;
2975 2974                          continue;
2976 2975                  } else if (err) {
2977 2976                          return (err);
2978 2977                  }
2979 2978  
2980 2979                  if (first_obj == 0)
2981 2980                          first_obj = ds->ds_dir->dd_phys->dd_origin_obj;
2982 2981  
2983 2982                  snap = kmem_alloc(sizeof (struct promotenode), KM_SLEEP);
2984 2983                  snap->ds = ds;
2985 2984                  list_insert_tail(l, snap);
2986 2985                  obj = ds->ds_phys->ds_prev_snap_obj;
2987 2986          }
2988 2987  
2989 2988          return (0);
2990 2989  }
2991 2990  
2992 2991  static int
2993 2992  snaplist_space(list_t *l, uint64_t mintxg, uint64_t *spacep)
2994 2993  {
2995 2994          struct promotenode *snap;
2996 2995  
2997 2996          *spacep = 0;
2998 2997          for (snap = list_head(l); snap; snap = list_next(l, snap)) {
2999 2998                  uint64_t used, comp, uncomp;
3000 2999                  dsl_deadlist_space_range(&snap->ds->ds_deadlist,
3001 3000                      mintxg, UINT64_MAX, &used, &comp, &uncomp);
3002 3001                  *spacep += used;
3003 3002          }
3004 3003          return (0);
3005 3004  }
3006 3005  
3007 3006  static void
3008 3007  snaplist_destroy(list_t *l, boolean_t own)
3009 3008  {
3010 3009          struct promotenode *snap;
3011 3010  
3012 3011          if (!l || !list_link_active(&l->list_head))
3013 3012                  return;
3014 3013  
3015 3014          while ((snap = list_tail(l)) != NULL) {
3016 3015                  list_remove(l, snap);
3017 3016                  if (own)
3018 3017                          dsl_dataset_disown(snap->ds, snaplist_tag);
3019 3018                  else
3020 3019                          dsl_dataset_rele(snap->ds, snaplist_tag);
3021 3020                  kmem_free(snap, sizeof (struct promotenode));
3022 3021          }
3023 3022          list_destroy(l);
3024 3023  }
3025 3024  
3026 3025  /*
3027 3026   * Promote a clone.  Nomenclature note:
3028 3027   * "clone" or "cds": the original clone which is being promoted
3029 3028   * "origin" or "ods": the snapshot which is originally clone's origin
3030 3029   * "origin head" or "ohds": the dataset which is the head
3031 3030   * (filesystem/volume) for the origin
3032 3031   * "origin origin": the origin of the origin's filesystem (typically
3033 3032   * NULL, indicating that the clone is not a clone of a clone).
3034 3033   */
3035 3034  int
3036 3035  dsl_dataset_promote(const char *name, char *conflsnap)
3037 3036  {
3038 3037          dsl_dataset_t *ds;
3039 3038          dsl_dir_t *dd;
3040 3039          dsl_pool_t *dp;
3041 3040          dmu_object_info_t doi;
3042 3041          struct promotearg pa = { 0 };
3043 3042          struct promotenode *snap;
3044 3043          int err;
3045 3044  
3046 3045          err = dsl_dataset_hold(name, FTAG, &ds);
3047 3046          if (err)
3048 3047                  return (err);
3049 3048          dd = ds->ds_dir;
3050 3049          dp = dd->dd_pool;
3051 3050  
3052 3051          err = dmu_object_info(dp->dp_meta_objset,
3053 3052              ds->ds_phys->ds_snapnames_zapobj, &doi);
3054 3053          if (err) {
3055 3054                  dsl_dataset_rele(ds, FTAG);
3056 3055                  return (err);
3057 3056          }
3058 3057  
3059 3058          if (dsl_dataset_is_snapshot(ds) || dd->dd_phys->dd_origin_obj == 0) {
3060 3059                  dsl_dataset_rele(ds, FTAG);
3061 3060                  return (EINVAL);
3062 3061          }
3063 3062  
3064 3063          /*
3065 3064           * We are going to inherit all the snapshots taken before our
3066 3065           * origin (i.e., our new origin will be our parent's origin).
3067 3066           * Take ownership of them so that we can rename them into our
3068 3067           * namespace.
3069 3068           */
3070 3069          rw_enter(&dp->dp_config_rwlock, RW_READER);
3071 3070  
3072 3071          err = snaplist_make(dp, B_TRUE, 0, dd->dd_phys->dd_origin_obj,
3073 3072              &pa.shared_snaps);
3074 3073          if (err != 0)
3075 3074                  goto out;
3076 3075  
3077 3076          err = snaplist_make(dp, B_FALSE, 0, ds->ds_object, &pa.clone_snaps);
3078 3077          if (err != 0)
3079 3078                  goto out;
3080 3079  
3081 3080          snap = list_head(&pa.shared_snaps);
3082 3081          ASSERT3U(snap->ds->ds_object, ==, dd->dd_phys->dd_origin_obj);
3083 3082          err = snaplist_make(dp, B_FALSE, dd->dd_phys->dd_origin_obj,
3084 3083              snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, &pa.origin_snaps);
3085 3084          if (err != 0)
3086 3085                  goto out;
3087 3086  
3088 3087          if (snap->ds->ds_dir->dd_phys->dd_origin_obj != 0) {
3089 3088                  err = dsl_dataset_hold_obj(dp,
3090 3089                      snap->ds->ds_dir->dd_phys->dd_origin_obj,
3091 3090                      FTAG, &pa.origin_origin);
3092 3091                  if (err != 0)
3093 3092                          goto out;
3094 3093          }
3095 3094  
3096 3095  out:
3097 3096          rw_exit(&dp->dp_config_rwlock);
3098 3097  
3099 3098          /*
3100 3099           * Add in 128x the snapnames zapobj size, since we will be moving
3101 3100           * a bunch of snapnames to the promoted ds, and dirtying their
3102 3101           * bonus buffers.
3103 3102           */
3104 3103          if (err == 0) {
3105 3104                  err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
3106 3105                      dsl_dataset_promote_sync, ds, &pa,
3107 3106                      2 + 2 * doi.doi_physical_blocks_512);
3108 3107                  if (err && pa.err_ds && conflsnap)
3109 3108                          (void) strncpy(conflsnap, pa.err_ds, MAXNAMELEN);
3110 3109          }
3111 3110  
3112 3111          snaplist_destroy(&pa.shared_snaps, B_TRUE);
3113 3112          snaplist_destroy(&pa.clone_snaps, B_FALSE);
3114 3113          snaplist_destroy(&pa.origin_snaps, B_FALSE);
3115 3114          if (pa.origin_origin)
3116 3115                  dsl_dataset_rele(pa.origin_origin, FTAG);
3117 3116          dsl_dataset_rele(ds, FTAG);
3118 3117          return (err);
3119 3118  }
3120 3119  
3121 3120  struct cloneswaparg {
3122 3121          dsl_dataset_t *cds; /* clone dataset */
3123 3122          dsl_dataset_t *ohds; /* origin's head dataset */
3124 3123          boolean_t force;
3125 3124          int64_t unused_refres_delta; /* change in unconsumed refreservation */
3126 3125  };
3127 3126  
3128 3127  /* ARGSUSED */
3129 3128  static int
3130 3129  dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx)
3131 3130  {
3132 3131          struct cloneswaparg *csa = arg1;
3133 3132  
3134 3133          /* they should both be heads */
3135 3134          if (dsl_dataset_is_snapshot(csa->cds) ||
3136 3135              dsl_dataset_is_snapshot(csa->ohds))
3137 3136                  return (EINVAL);
3138 3137  
3139 3138          /* the branch point should be just before them */
3140 3139          if (csa->cds->ds_prev != csa->ohds->ds_prev)
3141 3140                  return (EINVAL);
3142 3141  
3143 3142          /* cds should be the clone (unless they are unrelated) */
3144 3143          if (csa->cds->ds_prev != NULL &&
3145 3144              csa->cds->ds_prev != csa->cds->ds_dir->dd_pool->dp_origin_snap &&
3146 3145              csa->ohds->ds_object !=
3147 3146              csa->cds->ds_prev->ds_phys->ds_next_snap_obj)
3148 3147                  return (EINVAL);
3149 3148  
3150 3149          /* the clone should be a child of the origin */
3151 3150          if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir)
3152 3151                  return (EINVAL);
3153 3152  
3154 3153          /* ohds shouldn't be modified unless 'force' */
3155 3154          if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds))
3156 3155                  return (ETXTBSY);
3157 3156  
3158 3157          /* adjust amount of any unconsumed refreservation */
3159 3158          csa->unused_refres_delta =
3160 3159              (int64_t)MIN(csa->ohds->ds_reserved,
3161 3160              csa->ohds->ds_phys->ds_unique_bytes) -
3162 3161              (int64_t)MIN(csa->ohds->ds_reserved,
3163 3162              csa->cds->ds_phys->ds_unique_bytes);
3164 3163  
3165 3164          if (csa->unused_refres_delta > 0 &&
3166 3165              csa->unused_refres_delta >
3167 3166              dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE))
3168 3167                  return (ENOSPC);
3169 3168  
3170 3169          if (csa->ohds->ds_quota != 0 &&
3171 3170              csa->cds->ds_phys->ds_unique_bytes > csa->ohds->ds_quota)
3172 3171                  return (EDQUOT);
3173 3172  
3174 3173          return (0);
3175 3174  }
3176 3175  
3177 3176  /* ARGSUSED */
3178 3177  static void
3179 3178  dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
3180 3179  {
3181 3180          struct cloneswaparg *csa = arg1;
3182 3181          dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
3183 3182  
3184 3183          ASSERT(csa->cds->ds_reserved == 0);
3185 3184          ASSERT(csa->ohds->ds_quota == 0 ||
3186 3185              csa->cds->ds_phys->ds_unique_bytes <= csa->ohds->ds_quota);
3187 3186  
3188 3187          dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
3189 3188          dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
3190 3189  
3191 3190          if (csa->cds->ds_objset != NULL) {
3192 3191                  dmu_objset_evict(csa->cds->ds_objset);
3193 3192                  csa->cds->ds_objset = NULL;
3194 3193          }
3195 3194  
3196 3195          if (csa->ohds->ds_objset != NULL) {
3197 3196                  dmu_objset_evict(csa->ohds->ds_objset);
3198 3197                  csa->ohds->ds_objset = NULL;
3199 3198          }
3200 3199  
3201 3200          /*
3202 3201           * Reset origin's unique bytes, if it exists.
3203 3202           */
3204 3203          if (csa->cds->ds_prev) {
3205 3204                  dsl_dataset_t *origin = csa->cds->ds_prev;
3206 3205                  uint64_t comp, uncomp;
3207 3206  
3208 3207                  dmu_buf_will_dirty(origin->ds_dbuf, tx);
3209 3208                  dsl_deadlist_space_range(&csa->cds->ds_deadlist,
3210 3209                      origin->ds_phys->ds_prev_snap_txg, UINT64_MAX,
3211 3210                      &origin->ds_phys->ds_unique_bytes, &comp, &uncomp);
3212 3211          }
3213 3212  
3214 3213          /* swap blkptrs */
3215 3214          {
3216 3215                  blkptr_t tmp;
3217 3216                  tmp = csa->ohds->ds_phys->ds_bp;
3218 3217                  csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
3219 3218                  csa->cds->ds_phys->ds_bp = tmp;
3220 3219          }
3221 3220  
3222 3221          /* set dd_*_bytes */
3223 3222          {
3224 3223                  int64_t dused, dcomp, duncomp;
3225 3224                  uint64_t cdl_used, cdl_comp, cdl_uncomp;
3226 3225                  uint64_t odl_used, odl_comp, odl_uncomp;
3227 3226  
3228 3227                  ASSERT3U(csa->cds->ds_dir->dd_phys->
3229 3228                      dd_used_breakdown[DD_USED_SNAP], ==, 0);
3230 3229  
3231 3230                  dsl_deadlist_space(&csa->cds->ds_deadlist,
3232 3231                      &cdl_used, &cdl_comp, &cdl_uncomp);
3233 3232                  dsl_deadlist_space(&csa->ohds->ds_deadlist,
3234 3233                      &odl_used, &odl_comp, &odl_uncomp);
3235 3234  
3236 3235                  dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used -
3237 3236                      (csa->ohds->ds_phys->ds_referenced_bytes + odl_used);
3238 3237                  dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
3239 3238                      (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
3240 3239                  duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
3241 3240                      cdl_uncomp -
3242 3241                      (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
3243 3242  
3244 3243                  dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_HEAD,
3245 3244                      dused, dcomp, duncomp, tx);
3246 3245                  dsl_dir_diduse_space(csa->cds->ds_dir, DD_USED_HEAD,
3247 3246                      -dused, -dcomp, -duncomp, tx);
3248 3247  
3249 3248                  /*
3250 3249                   * The difference in the space used by snapshots is the
3251 3250                   * difference in snapshot space due to the head's
3252 3251                   * deadlist (since that's the only thing that's
3253 3252                   * changing that affects the snapused).
3254 3253                   */
3255 3254                  dsl_deadlist_space_range(&csa->cds->ds_deadlist,
3256 3255                      csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
3257 3256                      &cdl_used, &cdl_comp, &cdl_uncomp);
3258 3257                  dsl_deadlist_space_range(&csa->ohds->ds_deadlist,
3259 3258                      csa->ohds->ds_dir->dd_origin_txg, UINT64_MAX,
3260 3259                      &odl_used, &odl_comp, &odl_uncomp);
3261 3260                  dsl_dir_transfer_space(csa->ohds->ds_dir, cdl_used - odl_used,
3262 3261                      DD_USED_HEAD, DD_USED_SNAP, tx);
3263 3262          }
3264 3263  
3265 3264          /* swap ds_*_bytes */
3266 3265          SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes,
3267 3266              csa->cds->ds_phys->ds_referenced_bytes);
3268 3267          SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
3269 3268              csa->cds->ds_phys->ds_compressed_bytes);
3270 3269          SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
3271 3270              csa->cds->ds_phys->ds_uncompressed_bytes);
3272 3271          SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
3273 3272              csa->cds->ds_phys->ds_unique_bytes);
3274 3273  
3275 3274          /* apply any parent delta for change in unconsumed refreservation */
3276 3275          dsl_dir_diduse_space(csa->ohds->ds_dir, DD_USED_REFRSRV,
3277 3276              csa->unused_refres_delta, 0, 0, tx);
3278 3277  
3279 3278          /*
3280 3279           * Swap deadlists.
3281 3280           */
3282 3281          dsl_deadlist_close(&csa->cds->ds_deadlist);
3283 3282          dsl_deadlist_close(&csa->ohds->ds_deadlist);
3284 3283          SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
3285 3284              csa->cds->ds_phys->ds_deadlist_obj);
3286 3285          dsl_deadlist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
3287 3286              csa->cds->ds_phys->ds_deadlist_obj);
3288 3287          dsl_deadlist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
3289 3288              csa->ohds->ds_phys->ds_deadlist_obj);
3290 3289  
3291 3290          dsl_scan_ds_clone_swapped(csa->ohds, csa->cds, tx);
3292 3291  
3293 3292          spa_history_log_internal_ds(csa->cds, "clone swap", tx,
3294 3293              "parent=%s", csa->ohds->ds_dir->dd_myname);
3295 3294  }
3296 3295  
3297 3296  /*
3298 3297   * Swap 'clone' with its origin head datasets.  Used at the end of "zfs
3299 3298   * recv" into an existing fs to swizzle the file system to the new
3300 3299   * version, and by "zfs rollback".  Can also be used to swap two
3301 3300   * independent head datasets if neither has any snapshots.
3302 3301   */
3303 3302  int
3304 3303  dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
3305 3304      boolean_t force)
3306 3305  {
3307 3306          struct cloneswaparg csa;
3308 3307          int error;
3309 3308  
3310 3309          ASSERT(clone->ds_owner);
3311 3310          ASSERT(origin_head->ds_owner);
3312 3311  retry:
3313 3312          /*
3314 3313           * Need exclusive access for the swap. If we're swapping these
3315 3314           * datasets back after an error, we already hold the locks.
3316 3315           */
3317 3316          if (!RW_WRITE_HELD(&clone->ds_rwlock))
3318 3317                  rw_enter(&clone->ds_rwlock, RW_WRITER);
3319 3318          if (!RW_WRITE_HELD(&origin_head->ds_rwlock) &&
3320 3319              !rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) {
3321 3320                  rw_exit(&clone->ds_rwlock);
3322 3321                  rw_enter(&origin_head->ds_rwlock, RW_WRITER);
3323 3322                  if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) {
3324 3323                          rw_exit(&origin_head->ds_rwlock);
3325 3324                          goto retry;
3326 3325                  }
3327 3326          }
3328 3327          csa.cds = clone;
3329 3328          csa.ohds = origin_head;
3330 3329          csa.force = force;
3331 3330          error = dsl_sync_task_do(clone->ds_dir->dd_pool,
3332 3331              dsl_dataset_clone_swap_check,
3333 3332              dsl_dataset_clone_swap_sync, &csa, NULL, 9);
3334 3333          return (error);
3335 3334  }
3336 3335  
3337 3336  /*
3338 3337   * Given a pool name and a dataset object number in that pool,
3339 3338   * return the name of that dataset.
3340 3339   */
3341 3340  int
3342 3341  dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
3343 3342  {
3344 3343          spa_t *spa;
3345 3344          dsl_pool_t *dp;
3346 3345          dsl_dataset_t *ds;
3347 3346          int error;
3348 3347  
3349 3348          if ((error = spa_open(pname, &spa, FTAG)) != 0)
3350 3349                  return (error);
3351 3350          dp = spa_get_dsl(spa);
3352 3351          rw_enter(&dp->dp_config_rwlock, RW_READER);
3353 3352          if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) {
3354 3353                  dsl_dataset_name(ds, buf);
3355 3354                  dsl_dataset_rele(ds, FTAG);
3356 3355          }
3357 3356          rw_exit(&dp->dp_config_rwlock);
3358 3357          spa_close(spa, FTAG);
3359 3358  
3360 3359          return (error);
3361 3360  }
3362 3361  
3363 3362  int
3364 3363  dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
3365 3364      uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv)
3366 3365  {
3367 3366          int error = 0;
3368 3367  
3369 3368          ASSERT3S(asize, >, 0);
3370 3369  
3371 3370          /*
3372 3371           * *ref_rsrv is the portion of asize that will come from any
3373 3372           * unconsumed refreservation space.
3374 3373           */
3375 3374          *ref_rsrv = 0;
3376 3375  
3377 3376          mutex_enter(&ds->ds_lock);
3378 3377          /*
3379 3378           * Make a space adjustment for reserved bytes.
3380 3379           */
3381 3380          if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) {
3382 3381                  ASSERT3U(*used, >=,
3383 3382                      ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
3384 3383                  *used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
3385 3384                  *ref_rsrv =
3386 3385                      asize - MIN(asize, parent_delta(ds, asize + inflight));
3387 3386          }
3388 3387  
3389 3388          if (!check_quota || ds->ds_quota == 0) {
3390 3389                  mutex_exit(&ds->ds_lock);
3391 3390                  return (0);
3392 3391          }
3393 3392          /*
3394 3393           * If they are requesting more space, and our current estimate
3395 3394           * is over quota, they get to try again unless the actual
3396 3395           * on-disk is over quota and there are no pending changes (which
3397 3396           * may free up space for us).
3398 3397           */
3399 3398          if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) {
3400 3399                  if (inflight > 0 ||
3401 3400                      ds->ds_phys->ds_referenced_bytes < ds->ds_quota)
3402 3401                          error = ERESTART;
3403 3402                  else
3404 3403                          error = EDQUOT;
3405 3404          }
3406 3405          mutex_exit(&ds->ds_lock);
3407 3406  
3408 3407          return (error);
3409 3408  }
3410 3409  
3411 3410  /* ARGSUSED */
3412 3411  static int
3413 3412  dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
3414 3413  {
3415 3414          dsl_dataset_t *ds = arg1;
3416 3415          dsl_prop_setarg_t *psa = arg2;
3417 3416          int err;
3418 3417  
3419 3418          if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
3420 3419                  return (ENOTSUP);
3421 3420  
3422 3421          if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
3423 3422                  return (err);
3424 3423  
3425 3424          if (psa->psa_effective_value == 0)
3426 3425                  return (0);
3427 3426  
3428 3427          if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes ||
3429 3428              psa->psa_effective_value < ds->ds_reserved)
3430 3429                  return (ENOSPC);
3431 3430  
3432 3431          return (0);
3433 3432  }
3434 3433  
3435 3434  extern void dsl_prop_set_sync(void *, void *, dmu_tx_t *);
3436 3435  
3437 3436  void
3438 3437  dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
3439 3438  {
3440 3439          dsl_dataset_t *ds = arg1;
3441 3440          dsl_prop_setarg_t *psa = arg2;
3442 3441          uint64_t effective_value = psa->psa_effective_value;
3443 3442  
3444 3443          dsl_prop_set_sync(ds, psa, tx);
3445 3444          DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
3446 3445  
3447 3446          if (ds->ds_quota != effective_value) {
3448 3447                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
3449 3448                  ds->ds_quota = effective_value;
3450 3449  
3451 3450                  spa_history_log_internal_ds(ds, "set refquota", tx,
3452 3451                      "refquota=%lld", (longlong_t)ds->ds_quota);
3453 3452          }
3454 3453  }
3455 3454  
3456 3455  int
3457 3456  dsl_dataset_set_quota(const char *dsname, zprop_source_t source, uint64_t quota)
3458 3457  {
3459 3458          dsl_dataset_t *ds;
3460 3459          dsl_prop_setarg_t psa;
3461 3460          int err;
3462 3461  
3463 3462          dsl_prop_setarg_init_uint64(&psa, "refquota", source, &quota);
3464 3463  
3465 3464          err = dsl_dataset_hold(dsname, FTAG, &ds);
3466 3465          if (err)
3467 3466                  return (err);
3468 3467  
3469 3468          /*
3470 3469           * If someone removes a file, then tries to set the quota, we
3471 3470           * want to make sure the file freeing takes effect.
3472 3471           */
3473 3472          txg_wait_open(ds->ds_dir->dd_pool, 0);
3474 3473  
3475 3474          err = dsl_sync_task_do(ds->ds_dir->dd_pool,
3476 3475              dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync,
3477 3476              ds, &psa, 0);
3478 3477  
3479 3478          dsl_dataset_rele(ds, FTAG);
3480 3479          return (err);
3481 3480  }
3482 3481  
3483 3482  static int
3484 3483  dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
3485 3484  {
3486 3485          dsl_dataset_t *ds = arg1;
3487 3486          dsl_prop_setarg_t *psa = arg2;
3488 3487          uint64_t effective_value;
3489 3488          uint64_t unique;
3490 3489          int err;
3491 3490  
3492 3491          if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
3493 3492              SPA_VERSION_REFRESERVATION)
3494 3493                  return (ENOTSUP);
3495 3494  
3496 3495          if (dsl_dataset_is_snapshot(ds))
3497 3496                  return (EINVAL);
3498 3497  
3499 3498          if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
3500 3499                  return (err);
3501 3500  
3502 3501          effective_value = psa->psa_effective_value;
3503 3502  
3504 3503          /*
3505 3504           * If we are doing the preliminary check in open context, the
3506 3505           * space estimates may be inaccurate.
3507 3506           */
3508 3507          if (!dmu_tx_is_syncing(tx))
3509 3508                  return (0);
3510 3509  
3511 3510          mutex_enter(&ds->ds_lock);
3512 3511          if (!DS_UNIQUE_IS_ACCURATE(ds))
3513 3512                  dsl_dataset_recalc_head_uniq(ds);
3514 3513          unique = ds->ds_phys->ds_unique_bytes;
3515 3514          mutex_exit(&ds->ds_lock);
3516 3515  
3517 3516          if (MAX(unique, effective_value) > MAX(unique, ds->ds_reserved)) {
3518 3517                  uint64_t delta = MAX(unique, effective_value) -
3519 3518                      MAX(unique, ds->ds_reserved);
3520 3519  
3521 3520                  if (delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
3522 3521                          return (ENOSPC);
3523 3522                  if (ds->ds_quota > 0 &&
3524 3523                      effective_value > ds->ds_quota)
3525 3524                          return (ENOSPC);
3526 3525          }
3527 3526  
3528 3527          return (0);
3529 3528  }
3530 3529  
3531 3530  static void
3532 3531  dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
3533 3532  {
3534 3533          dsl_dataset_t *ds = arg1;
3535 3534          dsl_prop_setarg_t *psa = arg2;
3536 3535          uint64_t effective_value = psa->psa_effective_value;
3537 3536          uint64_t unique;
3538 3537          int64_t delta;
3539 3538  
3540 3539          dsl_prop_set_sync(ds, psa, tx);
3541 3540          DSL_PROP_CHECK_PREDICTION(ds->ds_dir, psa);
3542 3541  
3543 3542          dmu_buf_will_dirty(ds->ds_dbuf, tx);
3544 3543  
3545 3544          mutex_enter(&ds->ds_dir->dd_lock);
3546 3545          mutex_enter(&ds->ds_lock);
3547 3546          ASSERT(DS_UNIQUE_IS_ACCURATE(ds));
3548 3547          unique = ds->ds_phys->ds_unique_bytes;
3549 3548          delta = MAX(0, (int64_t)(effective_value - unique)) -
3550 3549              MAX(0, (int64_t)(ds->ds_reserved - unique));
3551 3550          ds->ds_reserved = effective_value;
3552 3551          mutex_exit(&ds->ds_lock);
3553 3552  
3554 3553          dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
3555 3554          mutex_exit(&ds->ds_dir->dd_lock);
3556 3555  
3557 3556          spa_history_log_internal_ds(ds, "set refreservation", tx,
3558 3557              "refreservation=%lld", (longlong_t)effective_value);
3559 3558  }
3560 3559  
3561 3560  int
3562 3561  dsl_dataset_set_reservation(const char *dsname, zprop_source_t source,
3563 3562      uint64_t reservation)
3564 3563  {
3565 3564          dsl_dataset_t *ds;
3566 3565          dsl_prop_setarg_t psa;
3567 3566          int err;
3568 3567  
3569 3568          dsl_prop_setarg_init_uint64(&psa, "refreservation", source,
3570 3569              &reservation);
3571 3570  
3572 3571          err = dsl_dataset_hold(dsname, FTAG, &ds);
3573 3572          if (err)
3574 3573                  return (err);
3575 3574  
3576 3575          err = dsl_sync_task_do(ds->ds_dir->dd_pool,
3577 3576              dsl_dataset_set_reservation_check,
3578 3577              dsl_dataset_set_reservation_sync, ds, &psa, 0);
3579 3578  
3580 3579          dsl_dataset_rele(ds, FTAG);
3581 3580          return (err);
3582 3581  }
3583 3582  
3584 3583  typedef struct zfs_hold_cleanup_arg {
3585 3584          dsl_pool_t *dp;
3586 3585          uint64_t dsobj;
3587 3586          char htag[MAXNAMELEN];
3588 3587  } zfs_hold_cleanup_arg_t;
3589 3588  
3590 3589  static void
3591 3590  dsl_dataset_user_release_onexit(void *arg)
3592 3591  {
3593 3592          zfs_hold_cleanup_arg_t *ca = arg;
3594 3593  
3595 3594          (void) dsl_dataset_user_release_tmp(ca->dp, ca->dsobj, ca->htag,
3596 3595              B_TRUE);
3597 3596          kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
3598 3597  }
3599 3598

↓ open down ↓

684 lines elided

↑ open up ↑

3600 3599  void
3601 3600  dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
3602 3601      minor_t minor)
3603 3602  {
3604 3603          zfs_hold_cleanup_arg_t *ca;
3605 3604  
3606 3605          ca = kmem_alloc(sizeof (zfs_hold_cleanup_arg_t), KM_SLEEP);
3607 3606          ca->dp = ds->ds_dir->dd_pool;
3608 3607          ca->dsobj = ds->ds_object;
3609 3608          (void) strlcpy(ca->htag, htag, sizeof (ca->htag));
3610      -        VERIFY3U(0, ==, zfs_onexit_add_cb(minor,
     3609 +        VERIFY0(zfs_onexit_add_cb(minor,
3611 3610              dsl_dataset_user_release_onexit, ca, NULL));
3612 3611  }
3613 3612  
3614 3613  /*
3615 3614   * If you add new checks here, you may need to add
3616 3615   * additional checks to the "temporary" case in
3617 3616   * snapshot_check() in dmu_objset.c.
3618 3617   */
3619 3618  static int
3620 3619  dsl_dataset_user_hold_check(void *arg1, void *arg2, dmu_tx_t *tx)

3621 3620  {
3622 3621          dsl_dataset_t *ds = arg1;
3623 3622          struct dsl_ds_holdarg *ha = arg2;
3624 3623          const char *htag = ha->htag;
3625 3624          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
3626 3625          int error = 0;
3627 3626  
3628 3627          if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
3629 3628                  return (ENOTSUP);
3630 3629  
3631 3630          if (!dsl_dataset_is_snapshot(ds))
3632 3631                  return (EINVAL);
3633 3632  
3634 3633          /* tags must be unique */
3635 3634          mutex_enter(&ds->ds_lock);
3636 3635          if (ds->ds_phys->ds_userrefs_obj) {
3637 3636                  error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj, htag,
3638 3637                      8, 1, tx);
3639 3638                  if (error == 0)
3640 3639                          error = EEXIST;
3641 3640                  else if (error == ENOENT)
3642 3641                          error = 0;
3643 3642          }
3644 3643          mutex_exit(&ds->ds_lock);
3645 3644  
3646 3645          if (error == 0 && ha->temphold &&
3647 3646              strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
3648 3647                  error = E2BIG;
3649 3648  
3650 3649          return (error);
3651 3650  }
3652 3651  
3653 3652  void
3654 3653  dsl_dataset_user_hold_sync(void *arg1, void *arg2, dmu_tx_t *tx)
3655 3654  {
3656 3655          dsl_dataset_t *ds = arg1;
3657 3656          struct dsl_ds_holdarg *ha = arg2;
3658 3657          const char *htag = ha->htag;
3659 3658          dsl_pool_t *dp = ds->ds_dir->dd_pool;
3660 3659          objset_t *mos = dp->dp_meta_objset;
3661 3660          uint64_t now = gethrestime_sec();
3662 3661          uint64_t zapobj;
3663 3662  
3664 3663          mutex_enter(&ds->ds_lock);
3665 3664          if (ds->ds_phys->ds_userrefs_obj == 0) {
3666 3665                  /*
3667 3666                   * This is the first user hold for this dataset.  Create
3668 3667                   * the userrefs zap object.
3669 3668                   */
3670 3669                  dmu_buf_will_dirty(ds->ds_dbuf, tx);
3671 3670                  zapobj = ds->ds_phys->ds_userrefs_obj =
3672 3671                      zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
3673 3672          } else {
3674 3673                  zapobj = ds->ds_phys->ds_userrefs_obj;
3675 3674          }
3676 3675          ds->ds_userrefs++;
3677 3676          mutex_exit(&ds->ds_lock);
3678 3677  
3679 3678          VERIFY(0 == zap_add(mos, zapobj, htag, 8, 1, &now, tx));
3680 3679  
3681 3680          if (ha->temphold) {
3682 3681                  VERIFY(0 == dsl_pool_user_hold(dp, ds->ds_object,
3683 3682                      htag, &now, tx));
3684 3683          }
3685 3684  
3686 3685          spa_history_log_internal_ds(ds, "hold", tx,
3687 3686              "tag = %s temp = %d holds now = %llu",
3688 3687              htag, (int)ha->temphold, ds->ds_userrefs);
3689 3688  }
3690 3689  
3691 3690  static int
3692 3691  dsl_dataset_user_hold_one(const char *dsname, void *arg)
3693 3692  {
3694 3693          struct dsl_ds_holdarg *ha = arg;
3695 3694          dsl_dataset_t *ds;
3696 3695          int error;
3697 3696          char *name;
3698 3697  
3699 3698          /* alloc a buffer to hold dsname@snapname plus terminating NULL */
3700 3699          name = kmem_asprintf("%s@%s", dsname, ha->snapname);
3701 3700          error = dsl_dataset_hold(name, ha->dstg, &ds);
3702 3701          strfree(name);
3703 3702          if (error == 0) {
3704 3703                  ha->gotone = B_TRUE;
3705 3704                  dsl_sync_task_create(ha->dstg, dsl_dataset_user_hold_check,
3706 3705                      dsl_dataset_user_hold_sync, ds, ha, 0);
3707 3706          } else if (error == ENOENT && ha->recursive) {
3708 3707                  error = 0;
3709 3708          } else {
3710 3709                  (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
3711 3710          }
3712 3711          return (error);
3713 3712  }
3714 3713  
3715 3714  int
3716 3715  dsl_dataset_user_hold_for_send(dsl_dataset_t *ds, char *htag,
3717 3716      boolean_t temphold)
3718 3717  {
3719 3718          struct dsl_ds_holdarg *ha;
3720 3719          int error;
3721 3720  
3722 3721          ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
3723 3722          ha->htag = htag;
3724 3723          ha->temphold = temphold;
3725 3724          error = dsl_sync_task_do(ds->ds_dir->dd_pool,
3726 3725              dsl_dataset_user_hold_check, dsl_dataset_user_hold_sync,
3727 3726              ds, ha, 0);
3728 3727          kmem_free(ha, sizeof (struct dsl_ds_holdarg));
3729 3728  
3730 3729          return (error);
3731 3730  }
3732 3731  
3733 3732  int
3734 3733  dsl_dataset_user_hold(char *dsname, char *snapname, char *htag,
3735 3734      boolean_t recursive, boolean_t temphold, int cleanup_fd)
3736 3735  {
3737 3736          struct dsl_ds_holdarg *ha;
3738 3737          dsl_sync_task_t *dst;
3739 3738          spa_t *spa;
3740 3739          int error;
3741 3740          minor_t minor = 0;
3742 3741  
3743 3742          if (cleanup_fd != -1) {
3744 3743                  /* Currently we only support cleanup-on-exit of tempholds. */
3745 3744                  if (!temphold)
3746 3745                          return (EINVAL);
3747 3746                  error = zfs_onexit_fd_hold(cleanup_fd, &minor);
3748 3747                  if (error)
3749 3748                          return (error);
3750 3749          }
3751 3750  
3752 3751          ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
3753 3752  
3754 3753          (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
3755 3754  
3756 3755          error = spa_open(dsname, &spa, FTAG);
3757 3756          if (error) {
3758 3757                  kmem_free(ha, sizeof (struct dsl_ds_holdarg));
3759 3758                  if (cleanup_fd != -1)
3760 3759                          zfs_onexit_fd_rele(cleanup_fd);
3761 3760                  return (error);
3762 3761          }
3763 3762  
3764 3763          ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
3765 3764          ha->htag = htag;
3766 3765          ha->snapname = snapname;
3767 3766          ha->recursive = recursive;
3768 3767          ha->temphold = temphold;
3769 3768  
3770 3769          if (recursive) {
3771 3770                  error = dmu_objset_find(dsname, dsl_dataset_user_hold_one,
3772 3771                      ha, DS_FIND_CHILDREN);
3773 3772          } else {
3774 3773                  error = dsl_dataset_user_hold_one(dsname, ha);
3775 3774          }
3776 3775          if (error == 0)
3777 3776                  error = dsl_sync_task_group_wait(ha->dstg);
3778 3777  
3779 3778          for (dst = list_head(&ha->dstg->dstg_tasks); dst;
3780 3779              dst = list_next(&ha->dstg->dstg_tasks, dst)) {
3781 3780                  dsl_dataset_t *ds = dst->dst_arg1;
3782 3781  
3783 3782                  if (dst->dst_err) {
3784 3783                          dsl_dataset_name(ds, ha->failed);
3785 3784                          *strchr(ha->failed, '@') = '\0';
3786 3785                  } else if (error == 0 && minor != 0 && temphold) {
3787 3786                          /*
3788 3787                           * If this hold is to be released upon process exit,
3789 3788                           * register that action now.
3790 3789                           */
3791 3790                          dsl_register_onexit_hold_cleanup(ds, htag, minor);
3792 3791                  }
3793 3792                  dsl_dataset_rele(ds, ha->dstg);
3794 3793          }
3795 3794  
3796 3795          if (error == 0 && recursive && !ha->gotone)
3797 3796                  error = ENOENT;
3798 3797  
3799 3798          if (error)
3800 3799                  (void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
3801 3800  
3802 3801          dsl_sync_task_group_destroy(ha->dstg);
3803 3802  
3804 3803          kmem_free(ha, sizeof (struct dsl_ds_holdarg));
3805 3804          spa_close(spa, FTAG);
3806 3805          if (cleanup_fd != -1)
3807 3806                  zfs_onexit_fd_rele(cleanup_fd);
3808 3807          return (error);
3809 3808  }
3810 3809  
3811 3810  struct dsl_ds_releasearg {
3812 3811          dsl_dataset_t *ds;
3813 3812          const char *htag;
3814 3813          boolean_t own;          /* do we own or just hold ds? */
3815 3814  };
3816 3815  
3817 3816  static int
3818 3817  dsl_dataset_release_might_destroy(dsl_dataset_t *ds, const char *htag,
3819 3818      boolean_t *might_destroy)
3820 3819  {
3821 3820          objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
3822 3821          uint64_t zapobj;
3823 3822          uint64_t tmp;
3824 3823          int error;
3825 3824  
3826 3825          *might_destroy = B_FALSE;
3827 3826  
3828 3827          mutex_enter(&ds->ds_lock);
3829 3828          zapobj = ds->ds_phys->ds_userrefs_obj;
3830 3829          if (zapobj == 0) {
3831 3830                  /* The tag can't possibly exist */
3832 3831                  mutex_exit(&ds->ds_lock);
3833 3832                  return (ESRCH);
3834 3833          }
3835 3834  
3836 3835          /* Make sure the tag exists */
3837 3836          error = zap_lookup(mos, zapobj, htag, 8, 1, &tmp);
3838 3837          if (error) {
3839 3838                  mutex_exit(&ds->ds_lock);
3840 3839                  if (error == ENOENT)
3841 3840                          error = ESRCH;
3842 3841                  return (error);
3843 3842          }
3844 3843  
3845 3844          if (ds->ds_userrefs == 1 && ds->ds_phys->ds_num_children == 1 &&
3846 3845              DS_IS_DEFER_DESTROY(ds))
3847 3846                  *might_destroy = B_TRUE;
3848 3847  
3849 3848          mutex_exit(&ds->ds_lock);
3850 3849          return (0);
3851 3850  }
3852 3851  
3853 3852  static int
3854 3853  dsl_dataset_user_release_check(void *arg1, void *tag, dmu_tx_t *tx)
3855 3854  {
3856 3855          struct dsl_ds_releasearg *ra = arg1;
3857 3856          dsl_dataset_t *ds = ra->ds;
3858 3857          boolean_t might_destroy;
3859 3858          int error;
3860 3859  
3861 3860          if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_USERREFS)
3862 3861                  return (ENOTSUP);
3863 3862  
3864 3863          error = dsl_dataset_release_might_destroy(ds, ra->htag, &might_destroy);
3865 3864          if (error)
3866 3865                  return (error);
3867 3866  
3868 3867          if (might_destroy) {
3869 3868                  struct dsl_ds_destroyarg dsda = {0};
3870 3869  
3871 3870                  if (dmu_tx_is_syncing(tx)) {
3872 3871                          /*
3873 3872                           * If we're not prepared to remove the snapshot,
3874 3873                           * we can't allow the release to happen right now.
3875 3874                           */
3876 3875                          if (!ra->own)
3877 3876                                  return (EBUSY);
3878 3877                  }
3879 3878                  dsda.ds = ds;
3880 3879                  dsda.releasing = B_TRUE;
3881 3880                  return (dsl_dataset_destroy_check(&dsda, tag, tx));
3882 3881          }
3883 3882  
3884 3883          return (0);
3885 3884  }
3886 3885  
3887 3886  static void
3888 3887  dsl_dataset_user_release_sync(void *arg1, void *tag, dmu_tx_t *tx)
3889 3888  {
3890 3889          struct dsl_ds_releasearg *ra = arg1;
3891 3890          dsl_dataset_t *ds = ra->ds;
3892 3891          dsl_pool_t *dp = ds->ds_dir->dd_pool;
3893 3892          objset_t *mos = dp->dp_meta_objset;
3894 3893          uint64_t zapobj;
3895 3894          uint64_t refs;
3896 3895          int error;
3897 3896  
3898 3897          mutex_enter(&ds->ds_lock);
3899 3898          ds->ds_userrefs--;
3900 3899          refs = ds->ds_userrefs;
3901 3900          mutex_exit(&ds->ds_lock);
3902 3901          error = dsl_pool_user_release(dp, ds->ds_object, ra->htag, tx);
3903 3902          VERIFY(error == 0 || error == ENOENT);
3904 3903          zapobj = ds->ds_phys->ds_userrefs_obj;
3905 3904          VERIFY(0 == zap_remove(mos, zapobj, ra->htag, tx));
3906 3905          if (ds->ds_userrefs == 0 && ds->ds_phys->ds_num_children == 1 &&
3907 3906              DS_IS_DEFER_DESTROY(ds)) {
3908 3907                  struct dsl_ds_destroyarg dsda = {0};
3909 3908  
3910 3909                  ASSERT(ra->own);
3911 3910                  dsda.ds = ds;
3912 3911                  dsda.releasing = B_TRUE;
3913 3912                  /* We already did the destroy_check */
3914 3913                  dsl_dataset_destroy_sync(&dsda, tag, tx);
3915 3914          }
3916 3915  
3917 3916          spa_history_log_internal_ds(ds, "release", tx,
3918 3917              "tag = %s refs now = %lld", ra->htag, (longlong_t)refs);
3919 3918  }
3920 3919  
3921 3920  static int
3922 3921  dsl_dataset_user_release_one(const char *dsname, void *arg)
3923 3922  {
3924 3923          struct dsl_ds_holdarg *ha = arg;
3925 3924          struct dsl_ds_releasearg *ra;
3926 3925          dsl_dataset_t *ds;
3927 3926          int error;
3928 3927          void *dtag = ha->dstg;
3929 3928          char *name;
3930 3929          boolean_t own = B_FALSE;
3931 3930          boolean_t might_destroy;
3932 3931  
3933 3932          /* alloc a buffer to hold dsname@snapname, plus the terminating NULL */
3934 3933          name = kmem_asprintf("%s@%s", dsname, ha->snapname);
3935 3934          error = dsl_dataset_hold(name, dtag, &ds);
3936 3935          strfree(name);
3937 3936          if (error == ENOENT && ha->recursive)
3938 3937                  return (0);
3939 3938          (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
3940 3939          if (error)
3941 3940                  return (error);
3942 3941  
3943 3942          ha->gotone = B_TRUE;
3944 3943  
3945 3944          ASSERT(dsl_dataset_is_snapshot(ds));
3946 3945  
3947 3946          error = dsl_dataset_release_might_destroy(ds, ha->htag, &might_destroy);
3948 3947          if (error) {
3949 3948                  dsl_dataset_rele(ds, dtag);
3950 3949                  return (error);
3951 3950          }
3952 3951  
3953 3952          if (might_destroy) {
3954 3953  #ifdef _KERNEL
3955 3954                  name = kmem_asprintf("%s@%s", dsname, ha->snapname);
3956 3955                  error = zfs_unmount_snap(name, NULL);
3957 3956                  strfree(name);
3958 3957                  if (error) {
3959 3958                          dsl_dataset_rele(ds, dtag);
3960 3959                          return (error);
3961 3960                  }
3962 3961  #endif
3963 3962                  if (!dsl_dataset_tryown(ds, B_TRUE, dtag)) {
3964 3963                          dsl_dataset_rele(ds, dtag);
3965 3964                          return (EBUSY);
3966 3965                  } else {
3967 3966                          own = B_TRUE;
3968 3967                          dsl_dataset_make_exclusive(ds, dtag);
3969 3968                  }
3970 3969          }
3971 3970  
3972 3971          ra = kmem_alloc(sizeof (struct dsl_ds_releasearg), KM_SLEEP);
3973 3972          ra->ds = ds;
3974 3973          ra->htag = ha->htag;
3975 3974          ra->own = own;
3976 3975          dsl_sync_task_create(ha->dstg, dsl_dataset_user_release_check,
3977 3976              dsl_dataset_user_release_sync, ra, dtag, 0);
3978 3977  
3979 3978          return (0);
3980 3979  }
3981 3980  
3982 3981  int
3983 3982  dsl_dataset_user_release(char *dsname, char *snapname, char *htag,
3984 3983      boolean_t recursive)
3985 3984  {
3986 3985          struct dsl_ds_holdarg *ha;
3987 3986          dsl_sync_task_t *dst;
3988 3987          spa_t *spa;
3989 3988          int error;
3990 3989  
3991 3990  top:
3992 3991          ha = kmem_zalloc(sizeof (struct dsl_ds_holdarg), KM_SLEEP);
3993 3992  
3994 3993          (void) strlcpy(ha->failed, dsname, sizeof (ha->failed));
3995 3994  
3996 3995          error = spa_open(dsname, &spa, FTAG);
3997 3996          if (error) {
3998 3997                  kmem_free(ha, sizeof (struct dsl_ds_holdarg));
3999 3998                  return (error);
4000 3999          }
4001 4000  
4002 4001          ha->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
4003 4002          ha->htag = htag;
4004 4003          ha->snapname = snapname;
4005 4004          ha->recursive = recursive;
4006 4005          if (recursive) {
4007 4006                  error = dmu_objset_find(dsname, dsl_dataset_user_release_one,
4008 4007                      ha, DS_FIND_CHILDREN);
4009 4008          } else {
4010 4009                  error = dsl_dataset_user_release_one(dsname, ha);
4011 4010          }
4012 4011          if (error == 0)
4013 4012                  error = dsl_sync_task_group_wait(ha->dstg);
4014 4013  
4015 4014          for (dst = list_head(&ha->dstg->dstg_tasks); dst;
4016 4015              dst = list_next(&ha->dstg->dstg_tasks, dst)) {
4017 4016                  struct dsl_ds_releasearg *ra = dst->dst_arg1;
4018 4017                  dsl_dataset_t *ds = ra->ds;
4019 4018  
4020 4019                  if (dst->dst_err)
4021 4020                          dsl_dataset_name(ds, ha->failed);
4022 4021  
4023 4022                  if (ra->own)
4024 4023                          dsl_dataset_disown(ds, ha->dstg);
4025 4024                  else
4026 4025                          dsl_dataset_rele(ds, ha->dstg);
4027 4026  
4028 4027                  kmem_free(ra, sizeof (struct dsl_ds_releasearg));
4029 4028          }
4030 4029  
4031 4030          if (error == 0 && recursive && !ha->gotone)
4032 4031                  error = ENOENT;
4033 4032  
4034 4033          if (error && error != EBUSY)
4035 4034                  (void) strlcpy(dsname, ha->failed, sizeof (ha->failed));
4036 4035  
4037 4036          dsl_sync_task_group_destroy(ha->dstg);
4038 4037          kmem_free(ha, sizeof (struct dsl_ds_holdarg));
4039 4038          spa_close(spa, FTAG);
4040 4039  
4041 4040          /*
4042 4041           * We can get EBUSY if we were racing with deferred destroy and
4043 4042           * dsl_dataset_user_release_check() hadn't done the necessary
4044 4043           * open context setup.  We can also get EBUSY if we're racing
4045 4044           * with destroy and that thread is the ds_owner.  Either way
4046 4045           * the busy condition should be transient, and we should retry
4047 4046           * the release operation.
4048 4047           */
4049 4048          if (error == EBUSY)
4050 4049                  goto top;
4051 4050  
4052 4051          return (error);
4053 4052  }
4054 4053  
4055 4054  /*
4056 4055   * Called at spa_load time (with retry == B_FALSE) to release a stale
4057 4056   * temporary user hold. Also called by the onexit code (with retry == B_TRUE).
4058 4057   */
4059 4058  int
4060 4059  dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, char *htag,
4061 4060      boolean_t retry)
4062 4061  {
4063 4062          dsl_dataset_t *ds;
4064 4063          char *snap;
4065 4064          char *name;
4066 4065          int namelen;
4067 4066          int error;
4068 4067  
4069 4068          do {
4070 4069                  rw_enter(&dp->dp_config_rwlock, RW_READER);
4071 4070                  error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
4072 4071                  rw_exit(&dp->dp_config_rwlock);
4073 4072                  if (error)
4074 4073                          return (error);
4075 4074                  namelen = dsl_dataset_namelen(ds)+1;
4076 4075                  name = kmem_alloc(namelen, KM_SLEEP);
4077 4076                  dsl_dataset_name(ds, name);
4078 4077                  dsl_dataset_rele(ds, FTAG);
4079 4078  
4080 4079                  snap = strchr(name, '@');
4081 4080                  *snap = '\0';
4082 4081                  ++snap;
4083 4082                  error = dsl_dataset_user_release(name, snap, htag, B_FALSE);
4084 4083                  kmem_free(name, namelen);
4085 4084  
4086 4085                  /*
4087 4086                   * The object can't have been destroyed because we have a hold,
4088 4087                   * but it might have been renamed, resulting in ENOENT.  Retry
4089 4088                   * if we've been requested to do so.
4090 4089                   *
4091 4090                   * It would be nice if we could use the dsobj all the way
4092 4091                   * through and avoid ENOENT entirely.  But we might need to
4093 4092                   * unmount the snapshot, and there's currently no way to lookup
4094 4093                   * a vfsp using a ZFS object id.
4095 4094                   */
4096 4095          } while ((error == ENOENT) && retry);
4097 4096  
4098 4097          return (error);
4099 4098  }
4100 4099  
4101 4100  int
4102 4101  dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp)
4103 4102  {
4104 4103          dsl_dataset_t *ds;
4105 4104          int err;
4106 4105  
4107 4106          err = dsl_dataset_hold(dsname, FTAG, &ds);
4108 4107          if (err)
4109 4108                  return (err);
4110 4109  
4111 4110          VERIFY(0 == nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP));
4112 4111          if (ds->ds_phys->ds_userrefs_obj != 0) {
4113 4112                  zap_attribute_t *za;
4114 4113                  zap_cursor_t zc;
4115 4114  
4116 4115                  za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
4117 4116                  for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
4118 4117                      ds->ds_phys->ds_userrefs_obj);
4119 4118                      zap_cursor_retrieve(&zc, za) == 0;
4120 4119                      zap_cursor_advance(&zc)) {
4121 4120                          VERIFY(0 == nvlist_add_uint64(*nvp, za->za_name,
4122 4121                              za->za_first_integer));
4123 4122                  }
4124 4123                  zap_cursor_fini(&zc);
4125 4124                  kmem_free(za, sizeof (zap_attribute_t));
4126 4125          }
4127 4126          dsl_dataset_rele(ds, FTAG);
4128 4127          return (0);
4129 4128  }
4130 4129  
4131 4130  /*
4132 4131   * Note, this function is used as the callback for dmu_objset_find().  We
4133 4132   * always return 0 so that we will continue to find and process
4134 4133   * inconsistent datasets, even if we encounter an error trying to
4135 4134   * process one of them.
4136 4135   */
4137 4136  /* ARGSUSED */
4138 4137  int
4139 4138  dsl_destroy_inconsistent(const char *dsname, void *arg)
4140 4139  {
4141 4140          dsl_dataset_t *ds;
4142 4141  
4143 4142          if (dsl_dataset_own(dsname, B_TRUE, FTAG, &ds) == 0) {
4144 4143                  if (DS_IS_INCONSISTENT(ds))
4145 4144                          (void) dsl_dataset_destroy(ds, FTAG, B_FALSE);
4146 4145                  else
4147 4146                          dsl_dataset_disown(ds, FTAG);
4148 4147          }
4149 4148          return (0);
4150 4149  }
4151 4150  
4152 4151  /*
4153 4152   * Return (in *usedp) the amount of space written in new that is not
4154 4153   * present in oldsnap.  New may be a snapshot or the head.  Old must be
4155 4154   * a snapshot before new, in new's filesystem (or its origin).  If not then
4156 4155   * fail and return EINVAL.
4157 4156   *
4158 4157   * The written space is calculated by considering two components:  First, we
4159 4158   * ignore any freed space, and calculate the written as new's used space
4160 4159   * minus old's used space.  Next, we add in the amount of space that was freed
4161 4160   * between the two snapshots, thus reducing new's used space relative to old's.
4162 4161   * Specifically, this is the space that was born before old->ds_creation_txg,
4163 4162   * and freed before new (ie. on new's deadlist or a previous deadlist).
4164 4163   *
4165 4164   * space freed                         [---------------------]
4166 4165   * snapshots                       ---O-------O--------O-------O------
4167 4166   *                                         oldsnap            new
4168 4167   */
4169 4168  int
4170 4169  dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
4171 4170      uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
4172 4171  {
4173 4172          int err = 0;
4174 4173          uint64_t snapobj;
4175 4174          dsl_pool_t *dp = new->ds_dir->dd_pool;
4176 4175  
4177 4176          *usedp = 0;
4178 4177          *usedp += new->ds_phys->ds_referenced_bytes;
4179 4178          *usedp -= oldsnap->ds_phys->ds_referenced_bytes;
4180 4179  
4181 4180          *compp = 0;
4182 4181          *compp += new->ds_phys->ds_compressed_bytes;
4183 4182          *compp -= oldsnap->ds_phys->ds_compressed_bytes;
4184 4183  
4185 4184          *uncompp = 0;
4186 4185          *uncompp += new->ds_phys->ds_uncompressed_bytes;
4187 4186          *uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes;
4188 4187  
4189 4188          rw_enter(&dp->dp_config_rwlock, RW_READER);
4190 4189          snapobj = new->ds_object;
4191 4190          while (snapobj != oldsnap->ds_object) {
4192 4191                  dsl_dataset_t *snap;
4193 4192                  uint64_t used, comp, uncomp;
4194 4193  
4195 4194                  if (snapobj == new->ds_object) {
4196 4195                          snap = new;
4197 4196                  } else {
4198 4197                          err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
4199 4198                          if (err != 0)
4200 4199                                  break;
4201 4200                  }
4202 4201  
4203 4202                  if (snap->ds_phys->ds_prev_snap_txg ==
4204 4203                      oldsnap->ds_phys->ds_creation_txg) {
4205 4204                          /*
4206 4205                           * The blocks in the deadlist can not be born after
4207 4206                           * ds_prev_snap_txg, so get the whole deadlist space,
4208 4207                           * which is more efficient (especially for old-format
4209 4208                           * deadlists).  Unfortunately the deadlist code
4210 4209                           * doesn't have enough information to make this
4211 4210                           * optimization itself.
4212 4211                           */
4213 4212                          dsl_deadlist_space(&snap->ds_deadlist,
4214 4213                              &used, &comp, &uncomp);
4215 4214                  } else {
4216 4215                          dsl_deadlist_space_range(&snap->ds_deadlist,
4217 4216                              0, oldsnap->ds_phys->ds_creation_txg,
4218 4217                              &used, &comp, &uncomp);
4219 4218                  }
4220 4219                  *usedp += used;
4221 4220                  *compp += comp;
4222 4221                  *uncompp += uncomp;
4223 4222  
4224 4223                  /*
4225 4224                   * If we get to the beginning of the chain of snapshots
4226 4225                   * (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
4227 4226                   * was not a snapshot of/before new.
4228 4227                   */
4229 4228                  snapobj = snap->ds_phys->ds_prev_snap_obj;
4230 4229                  if (snap != new)
4231 4230                          dsl_dataset_rele(snap, FTAG);
4232 4231                  if (snapobj == 0) {
4233 4232                          err = EINVAL;
4234 4233                          break;
4235 4234                  }
4236 4235  
4237 4236          }
4238 4237          rw_exit(&dp->dp_config_rwlock);
4239 4238          return (err);
4240 4239  }
4241 4240  
4242 4241  /*
4243 4242   * Return (in *usedp) the amount of space that will be reclaimed if firstsnap,
4244 4243   * lastsnap, and all snapshots in between are deleted.
4245 4244   *
4246 4245   * blocks that would be freed            [---------------------------]
4247 4246   * snapshots                       ---O-------O--------O-------O--------O
4248 4247   *                                        firstsnap        lastsnap
4249 4248   *
4250 4249   * This is the set of blocks that were born after the snap before firstsnap,
4251 4250   * (birth > firstsnap->prev_snap_txg) and died before the snap after the
4252 4251   * last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
4253 4252   * We calculate this by iterating over the relevant deadlists (from the snap
4254 4253   * after lastsnap, backward to the snap after firstsnap), summing up the
4255 4254   * space on the deadlist that was born after the snap before firstsnap.
4256 4255   */
4257 4256  int
4258 4257  dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
4259 4258      dsl_dataset_t *lastsnap,
4260 4259      uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
4261 4260  {
4262 4261          int err = 0;
4263 4262          uint64_t snapobj;
4264 4263          dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
4265 4264  
4266 4265          ASSERT(dsl_dataset_is_snapshot(firstsnap));
4267 4266          ASSERT(dsl_dataset_is_snapshot(lastsnap));
4268 4267  
4269 4268          /*
4270 4269           * Check that the snapshots are in the same dsl_dir, and firstsnap
4271 4270           * is before lastsnap.
4272 4271           */
4273 4272          if (firstsnap->ds_dir != lastsnap->ds_dir ||
4274 4273              firstsnap->ds_phys->ds_creation_txg >
4275 4274              lastsnap->ds_phys->ds_creation_txg)
4276 4275                  return (EINVAL);
4277 4276  
4278 4277          *usedp = *compp = *uncompp = 0;
4279 4278  
4280 4279          rw_enter(&dp->dp_config_rwlock, RW_READER);
4281 4280          snapobj = lastsnap->ds_phys->ds_next_snap_obj;
4282 4281          while (snapobj != firstsnap->ds_object) {
4283 4282                  dsl_dataset_t *ds;
4284 4283                  uint64_t used, comp, uncomp;
4285 4284  
4286 4285                  err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);
4287 4286                  if (err != 0)
4288 4287                          break;
4289 4288  
4290 4289                  dsl_deadlist_space_range(&ds->ds_deadlist,
4291 4290                      firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX,
4292 4291                      &used, &comp, &uncomp);
4293 4292                  *usedp += used;
4294 4293                  *compp += comp;
4295 4294                  *uncompp += uncomp;
4296 4295  
4297 4296                  snapobj = ds->ds_phys->ds_prev_snap_obj;
4298 4297                  ASSERT3U(snapobj, !=, 0);
4299 4298                  dsl_dataset_rele(ds, FTAG);
4300 4299          }
4301 4300          rw_exit(&dp->dp_config_rwlock);
4302 4301          return (err);
4303 4302  }

↓ open down ↓

683 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX