1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013 by Delphix. All rights reserved.
  24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  25  */
  26 
  27 #include <sys/zfs_context.h>
  28 #include <sys/dsl_userhold.h>
  29 #include <sys/dsl_dataset.h>
  30 #include <sys/dsl_synctask.h>
  31 #include <sys/dmu_tx.h>
  32 #include <sys/dsl_pool.h>
  33 #include <sys/dsl_dir.h>
  34 #include <sys/dmu_traverse.h>
  35 #include <sys/dsl_scan.h>
  36 #include <sys/dmu_objset.h>
  37 #include <sys/zap.h>
  38 #include <sys/zfeature.h>
  39 #include <sys/zfs_ioctl.h>
  40 #include <sys/dsl_deleg.h>
  41 
  42 typedef struct dmu_snapshots_destroy_arg {
  43         nvlist_t *dsda_snaps;
  44         nvlist_t *dsda_successful_snaps;
  45         boolean_t dsda_defer;
  46         nvlist_t *dsda_errlist;
  47 } dmu_snapshots_destroy_arg_t;
  48 
  49 /*
  50  * ds must be owned.
  51  */
  52 static int
  53 dsl_destroy_snapshot_check_impl(dsl_dataset_t *ds, boolean_t defer)
  54 {
  55         if (!dsl_dataset_is_snapshot(ds))
  56                 return (SET_ERROR(EINVAL));
  57 
  58         if (dsl_dataset_long_held(ds))
  59                 return (SET_ERROR(EBUSY));
  60 
  61         /*
  62          * Only allow deferred destroy on pools that support it.
  63          * NOTE: deferred destroy is only supported on snapshots.
  64          */
  65         if (defer) {
  66                 if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
  67                     SPA_VERSION_USERREFS)
  68                         return (SET_ERROR(ENOTSUP));
  69                 return (0);
  70         }
  71 
  72         /*
  73          * If this snapshot has an elevated user reference count,
  74          * we can't destroy it yet.
  75          */
  76         if (ds->ds_userrefs > 0)
  77                 return (SET_ERROR(EBUSY));
  78 
  79         /*
  80          * Can't delete a branch point.
  81          */
  82         if (ds->ds_phys->ds_num_children > 1)
  83                 return (SET_ERROR(EEXIST));
  84 
  85         return (0);
  86 }
  87 
  88 static int
  89 dsl_destroy_snapshot_check(void *arg, dmu_tx_t *tx)
  90 {
  91         dmu_snapshots_destroy_arg_t *dsda = arg;
  92         dsl_pool_t *dp = dmu_tx_pool(tx);
  93         nvpair_t *pair;
  94         int error = 0;
  95 
  96         if (!dmu_tx_is_syncing(tx))
  97                 return (0);
  98 
  99         for (pair = nvlist_next_nvpair(dsda->dsda_snaps, NULL);
 100             pair != NULL; pair = nvlist_next_nvpair(dsda->dsda_snaps, pair)) {
 101                 dsl_dataset_t *ds;
 102 
 103                 error = dsl_dataset_hold(dp, nvpair_name(pair),
 104                     FTAG, &ds);
 105 
 106                 /*
 107                  * If the snapshot does not exist, silently ignore it
 108                  * (it's "already destroyed").
 109                  */
 110                 if (error == ENOENT)
 111                         continue;
 112 
 113                 if (error == 0) {
 114                         error = dsl_destroy_snapshot_check_impl(ds,
 115                             dsda->dsda_defer);
 116                         dsl_dataset_rele(ds, FTAG);
 117                 }
 118 
 119                 if (error == 0) {
 120                         fnvlist_add_boolean(dsda->dsda_successful_snaps,
 121                             nvpair_name(pair));
 122                 } else {
 123                         fnvlist_add_int32(dsda->dsda_errlist,
 124                             nvpair_name(pair), error);
 125                 }
 126         }
 127 
 128         pair = nvlist_next_nvpair(dsda->dsda_errlist, NULL);
 129         if (pair != NULL)
 130                 return (fnvpair_value_int32(pair));
 131 
 132         if (nvlist_empty(dsda->dsda_successful_snaps))
 133                 return (SET_ERROR(ENOENT));
 134 
 135         return (0);
 136 }
 137 
 138 struct process_old_arg {
 139         dsl_dataset_t *ds;
 140         dsl_dataset_t *ds_prev;
 141         boolean_t after_branch_point;
 142         zio_t *pio;
 143         uint64_t used, comp, uncomp;
 144 };
 145 
 146 static int
 147 process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
 148 {
 149         struct process_old_arg *poa = arg;
 150         dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;
 151 
 152         if (bp->blk_birth <= poa->ds->ds_phys->ds_prev_snap_txg) {
 153                 dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
 154                 if (poa->ds_prev && !poa->after_branch_point &&
 155                     bp->blk_birth >
 156                     poa->ds_prev->ds_phys->ds_prev_snap_txg) {
 157                         poa->ds_prev->ds_phys->ds_unique_bytes +=
 158                             bp_get_dsize_sync(dp->dp_spa, bp);
 159                 }
 160         } else {
 161                 poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
 162                 poa->comp += BP_GET_PSIZE(bp);
 163                 poa->uncomp += BP_GET_UCSIZE(bp);
 164                 dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
 165         }
 166         return (0);
 167 }
 168 
 169 static void
 170 process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
 171     dsl_dataset_t *ds_next, boolean_t after_branch_point, dmu_tx_t *tx)
 172 {
 173         struct process_old_arg poa = { 0 };
 174         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 175         objset_t *mos = dp->dp_meta_objset;
 176         uint64_t deadlist_obj;
 177 
 178         ASSERT(ds->ds_deadlist.dl_oldfmt);
 179         ASSERT(ds_next->ds_deadlist.dl_oldfmt);
 180 
 181         poa.ds = ds;
 182         poa.ds_prev = ds_prev;
 183         poa.after_branch_point = after_branch_point;
 184         poa.pio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
 185         VERIFY0(bpobj_iterate(&ds_next->ds_deadlist.dl_bpobj,
 186             process_old_cb, &poa, tx));
 187         VERIFY0(zio_wait(poa.pio));
 188         ASSERT3U(poa.used, ==, ds->ds_phys->ds_unique_bytes);
 189 
 190         /* change snapused */
 191         dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
 192             -poa.used, -poa.comp, -poa.uncomp, tx);
 193 
 194         /* swap next's deadlist to our deadlist */
 195         dsl_deadlist_close(&ds->ds_deadlist);
 196         dsl_deadlist_close(&ds_next->ds_deadlist);
 197         deadlist_obj = ds->ds_phys->ds_deadlist_obj;
 198         ds->ds_phys->ds_deadlist_obj = ds_next->ds_phys->ds_deadlist_obj;
 199         ds_next->ds_phys->ds_deadlist_obj = deadlist_obj;
 200         dsl_deadlist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
 201         dsl_deadlist_open(&ds_next->ds_deadlist, mos,
 202             ds_next->ds_phys->ds_deadlist_obj);
 203 }
 204 
 205 static void
 206 dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
 207 {
 208         objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
 209         zap_cursor_t zc;
 210         zap_attribute_t za;
 211 
 212         /*
 213          * If it is the old version, dd_clones doesn't exist so we can't
 214          * find the clones, but dsl_deadlist_remove_key() is a no-op so it
 215          * doesn't matter.
 216          */
 217         if (ds->ds_dir->dd_phys->dd_clones == 0)
 218                 return;
 219 
 220         for (zap_cursor_init(&zc, mos, ds->ds_dir->dd_phys->dd_clones);
 221             zap_cursor_retrieve(&zc, &za) == 0;
 222             zap_cursor_advance(&zc)) {
 223                 dsl_dataset_t *clone;
 224 
 225                 VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
 226                     za.za_first_integer, FTAG, &clone));
 227                 if (clone->ds_dir->dd_origin_txg > mintxg) {
 228                         dsl_deadlist_remove_key(&clone->ds_deadlist,
 229                             mintxg, tx);
 230                         dsl_dataset_remove_clones_key(clone, mintxg, tx);
 231                 }
 232                 dsl_dataset_rele(clone, FTAG);
 233         }
 234         zap_cursor_fini(&zc);
 235 }
 236 
 237 void
 238 dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
 239 {
 240         int err;
 241         int after_branch_point = FALSE;
 242         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 243         objset_t *mos = dp->dp_meta_objset;
 244         dsl_dataset_t *ds_prev = NULL;
 245         uint64_t obj;
 246 
 247         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 248         ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
 249         ASSERT(refcount_is_zero(&ds->ds_longholds));
 250 
 251         if (defer &&
 252             (ds->ds_userrefs > 0 || ds->ds_phys->ds_num_children > 1)) {
 253                 ASSERT(spa_version(dp->dp_spa) >= SPA_VERSION_USERREFS);
 254                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
 255                 ds->ds_phys->ds_flags |= DS_FLAG_DEFER_DESTROY;
 256                 spa_history_log_internal_ds(ds, "defer_destroy", tx, "");
 257                 return;
 258         }
 259 
 260         ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
 261 
 262         /* We need to log before removing it from the namespace. */
 263         spa_history_log_internal_ds(ds, "destroy", tx, "");
 264 
 265         dsl_scan_ds_destroyed(ds, tx);
 266 
 267         obj = ds->ds_object;
 268 
 269         if (ds->ds_phys->ds_prev_snap_obj != 0) {
 270                 ASSERT3P(ds->ds_prev, ==, NULL);
 271                 VERIFY0(dsl_dataset_hold_obj(dp,
 272                     ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
 273                 after_branch_point =
 274                     (ds_prev->ds_phys->ds_next_snap_obj != obj);
 275 
 276                 dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
 277                 if (after_branch_point &&
 278                     ds_prev->ds_phys->ds_next_clones_obj != 0) {
 279                         dsl_dataset_remove_from_next_clones(ds_prev, obj, tx);
 280                         if (ds->ds_phys->ds_next_snap_obj != 0) {
 281                                 VERIFY0(zap_add_int(mos,
 282                                     ds_prev->ds_phys->ds_next_clones_obj,
 283                                     ds->ds_phys->ds_next_snap_obj, tx));
 284                         }
 285                 }
 286                 if (!after_branch_point) {
 287                         ds_prev->ds_phys->ds_next_snap_obj =
 288                             ds->ds_phys->ds_next_snap_obj;
 289                 }
 290         }
 291 
 292         dsl_dataset_t *ds_next;
 293         uint64_t old_unique;
 294         uint64_t used = 0, comp = 0, uncomp = 0;
 295 
 296         VERIFY0(dsl_dataset_hold_obj(dp,
 297             ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
 298         ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
 299 
 300         old_unique = ds_next->ds_phys->ds_unique_bytes;
 301 
 302         dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
 303         ds_next->ds_phys->ds_prev_snap_obj =
 304             ds->ds_phys->ds_prev_snap_obj;
 305         ds_next->ds_phys->ds_prev_snap_txg =
 306             ds->ds_phys->ds_prev_snap_txg;
 307         ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
 308             ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
 309 
 310         if (ds_next->ds_deadlist.dl_oldfmt) {
 311                 process_old_deadlist(ds, ds_prev, ds_next,
 312                     after_branch_point, tx);
 313         } else {
 314                 /* Adjust prev's unique space. */
 315                 if (ds_prev && !after_branch_point) {
 316                         dsl_deadlist_space_range(&ds_next->ds_deadlist,
 317                             ds_prev->ds_phys->ds_prev_snap_txg,
 318                             ds->ds_phys->ds_prev_snap_txg,
 319                             &used, &comp, &uncomp);
 320                         ds_prev->ds_phys->ds_unique_bytes += used;
 321                 }
 322 
 323                 /* Adjust snapused. */
 324                 dsl_deadlist_space_range(&ds_next->ds_deadlist,
 325                     ds->ds_phys->ds_prev_snap_txg, UINT64_MAX,
 326                     &used, &comp, &uncomp);
 327                 dsl_dir_diduse_space(ds->ds_dir, DD_USED_SNAP,
 328                     -used, -comp, -uncomp, tx);
 329 
 330                 /* Move blocks to be freed to pool's free list. */
 331                 dsl_deadlist_move_bpobj(&ds_next->ds_deadlist,
 332                     &dp->dp_free_bpobj, ds->ds_phys->ds_prev_snap_txg,
 333                     tx);
 334                 dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
 335                     DD_USED_HEAD, used, comp, uncomp, tx);
 336 
 337                 /* Merge our deadlist into next's and free it. */
 338                 dsl_deadlist_merge(&ds_next->ds_deadlist,
 339                     ds->ds_phys->ds_deadlist_obj, tx);
 340         }
 341         dsl_deadlist_close(&ds->ds_deadlist);
 342         dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
 343         dmu_buf_will_dirty(ds->ds_dbuf, tx);
 344         ds->ds_phys->ds_deadlist_obj = 0;
 345 
 346         /* Collapse range in clone heads */
 347         dsl_dataset_remove_clones_key(ds,
 348             ds->ds_phys->ds_creation_txg, tx);
 349 
 350         if (dsl_dataset_is_snapshot(ds_next)) {
 351                 dsl_dataset_t *ds_nextnext;
 352 
 353                 /*
 354                  * Update next's unique to include blocks which
 355                  * were previously shared by only this snapshot
 356                  * and it.  Those blocks will be born after the
 357                  * prev snap and before this snap, and will have
 358                  * died after the next snap and before the one
 359                  * after that (ie. be on the snap after next's
 360                  * deadlist).
 361                  */
 362                 VERIFY0(dsl_dataset_hold_obj(dp,
 363                     ds_next->ds_phys->ds_next_snap_obj, FTAG, &ds_nextnext));
 364                 dsl_deadlist_space_range(&ds_nextnext->ds_deadlist,
 365                     ds->ds_phys->ds_prev_snap_txg,
 366                     ds->ds_phys->ds_creation_txg,
 367                     &used, &comp, &uncomp);
 368                 ds_next->ds_phys->ds_unique_bytes += used;
 369                 dsl_dataset_rele(ds_nextnext, FTAG);
 370                 ASSERT3P(ds_next->ds_prev, ==, NULL);
 371 
 372                 /* Collapse range in this head. */
 373                 dsl_dataset_t *hds;
 374                 VERIFY0(dsl_dataset_hold_obj(dp,
 375                     ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &hds));
 376                 dsl_deadlist_remove_key(&hds->ds_deadlist,
 377                     ds->ds_phys->ds_creation_txg, tx);
 378                 dsl_dataset_rele(hds, FTAG);
 379 
 380         } else {
 381                 ASSERT3P(ds_next->ds_prev, ==, ds);
 382                 dsl_dataset_rele(ds_next->ds_prev, ds_next);
 383                 ds_next->ds_prev = NULL;
 384                 if (ds_prev) {
 385                         VERIFY0(dsl_dataset_hold_obj(dp,
 386                             ds->ds_phys->ds_prev_snap_obj,
 387                             ds_next, &ds_next->ds_prev));
 388                 }
 389 
 390                 dsl_dataset_recalc_head_uniq(ds_next);
 391 
 392                 /*
 393                  * Reduce the amount of our unconsumed refreservation
 394                  * being charged to our parent by the amount of
 395                  * new unique data we have gained.
 396                  */
 397                 if (old_unique < ds_next->ds_reserved) {
 398                         int64_t mrsdelta;
 399                         uint64_t new_unique =
 400                             ds_next->ds_phys->ds_unique_bytes;
 401 
 402                         ASSERT(old_unique <= new_unique);
 403                         mrsdelta = MIN(new_unique - old_unique,
 404                             ds_next->ds_reserved - old_unique);
 405                         dsl_dir_diduse_space(ds->ds_dir,
 406                             DD_USED_REFRSRV, -mrsdelta, 0, 0, tx);
 407                 }
 408         }
 409         dsl_dataset_rele(ds_next, FTAG);
 410 
 411         /*
 412          * This must be done after the dsl_traverse(), because it will
 413          * re-open the objset.
 414          */
 415         if (ds->ds_objset) {
 416                 dmu_objset_evict(ds->ds_objset);
 417                 ds->ds_objset = NULL;
 418         }
 419 
 420         /* remove from snapshot namespace */
 421         dsl_dataset_t *ds_head;
 422         ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
 423         VERIFY0(dsl_dataset_hold_obj(dp,
 424             ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
 425         VERIFY0(dsl_dataset_get_snapname(ds));
 426 #ifdef ZFS_DEBUG
 427         {
 428                 uint64_t val;
 429 
 430                 err = dsl_dataset_snap_lookup(ds_head,
 431                     ds->ds_snapname, &val);
 432                 ASSERT0(err);
 433                 ASSERT3U(val, ==, obj);
 434         }
 435 #endif
 436         VERIFY0(dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx));
 437         dsl_dataset_rele(ds_head, FTAG);
 438 
 439         if (ds_prev != NULL)
 440                 dsl_dataset_rele(ds_prev, FTAG);
 441 
 442         spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
 443 
 444         if (ds->ds_phys->ds_next_clones_obj != 0) {
 445                 uint64_t count;
 446                 ASSERT0(zap_count(mos,
 447                     ds->ds_phys->ds_next_clones_obj, &count) && count == 0);
 448                 VERIFY0(dmu_object_free(mos,
 449                     ds->ds_phys->ds_next_clones_obj, tx));
 450         }
 451         if (ds->ds_phys->ds_props_obj != 0)
 452                 VERIFY0(zap_destroy(mos, ds->ds_phys->ds_props_obj, tx));
 453         if (ds->ds_phys->ds_userrefs_obj != 0)
 454                 VERIFY0(zap_destroy(mos, ds->ds_phys->ds_userrefs_obj, tx));
 455         dsl_dir_rele(ds->ds_dir, ds);
 456         ds->ds_dir = NULL;
 457         VERIFY0(dmu_object_free(mos, obj, tx));
 458 }
 459 
 460 static void
 461 dsl_destroy_snapshot_sync(void *arg, dmu_tx_t *tx)
 462 {
 463         dmu_snapshots_destroy_arg_t *dsda = arg;
 464         dsl_pool_t *dp = dmu_tx_pool(tx);
 465         nvpair_t *pair;
 466 
 467         for (pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, NULL);
 468             pair != NULL;
 469             pair = nvlist_next_nvpair(dsda->dsda_successful_snaps, pair)) {
 470                 dsl_dataset_t *ds;
 471 
 472                 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
 473 
 474                 dsl_destroy_snapshot_sync_impl(ds, dsda->dsda_defer, tx);
 475                 dsl_dataset_rele(ds, FTAG);
 476         }
 477 }
 478 
 479 /*
 480  * The semantics of this function are described in the comment above
 481  * lzc_destroy_snaps().  To summarize:
 482  *
 483  * The snapshots must all be in the same pool.
 484  *
 485  * Snapshots that don't exist will be silently ignored (considered to be
 486  * "already deleted").
 487  *
 488  * On success, all snaps will be destroyed and this will return 0.
 489  * On failure, no snaps will be destroyed, the errlist will be filled in,
 490  * and this will return an errno.
 491  */
 492 int
 493 dsl_destroy_snapshots_nvl(nvlist_t *snaps, boolean_t defer,
 494     nvlist_t *errlist)
 495 {
 496         dmu_snapshots_destroy_arg_t dsda;
 497         int error;
 498         nvpair_t *pair;
 499 
 500         pair = nvlist_next_nvpair(snaps, NULL);
 501         if (pair == NULL)
 502                 return (0);
 503 
 504         dsda.dsda_snaps = snaps;
 505         dsda.dsda_successful_snaps = fnvlist_alloc();
 506         dsda.dsda_defer = defer;
 507         dsda.dsda_errlist = errlist;
 508 
 509         error = dsl_sync_task(nvpair_name(pair),
 510             dsl_destroy_snapshot_check, dsl_destroy_snapshot_sync,
 511             &dsda, 0);
 512         fnvlist_free(dsda.dsda_successful_snaps);
 513 
 514         return (error);
 515 }
 516 
 517 int
 518 dsl_destroy_snapshot(const char *name, boolean_t defer)
 519 {
 520         int error;
 521         nvlist_t *nvl = fnvlist_alloc();
 522         nvlist_t *errlist = fnvlist_alloc();
 523 
 524         fnvlist_add_boolean(nvl, name);
 525         error = dsl_destroy_snapshots_nvl(nvl, defer, errlist);
 526         fnvlist_free(errlist);
 527         fnvlist_free(nvl);
 528         return (error);
 529 }
 530 
 531 struct killarg {
 532         dsl_dataset_t *ds;
 533         dmu_tx_t *tx;
 534 };
 535 
 536 /* ARGSUSED */
 537 static int
 538 kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
 539     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 540 {
 541         struct killarg *ka = arg;
 542         dmu_tx_t *tx = ka->tx;
 543 
 544         if (bp == NULL)
 545                 return (0);
 546 
 547         if (zb->zb_level == ZB_ZIL_LEVEL) {
 548                 ASSERT(zilog != NULL);
 549                 /*
 550                  * It's a block in the intent log.  It has no
 551                  * accounting, so just free it.
 552                  */
 553                 dsl_free(ka->tx->tx_pool, ka->tx->tx_txg, bp);
 554         } else {
 555                 ASSERT(zilog == NULL);
 556                 ASSERT3U(bp->blk_birth, >, ka->ds->ds_phys->ds_prev_snap_txg);
 557                 (void) dsl_dataset_block_kill(ka->ds, bp, tx, B_FALSE);
 558         }
 559 
 560         return (0);
 561 }
 562 
 563 static void
 564 old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
 565 {
 566         struct killarg ka;
 567 
 568         /*
 569          * Free everything that we point to (that's born after
 570          * the previous snapshot, if we are a clone)
 571          *
 572          * NB: this should be very quick, because we already
 573          * freed all the objects in open context.
 574          */
 575         ka.ds = ds;
 576         ka.tx = tx;
 577         VERIFY0(traverse_dataset(ds,
 578             ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
 579             kill_blkptr, &ka));
 580         ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
 581 }
 582 
 583 typedef struct dsl_destroy_head_arg {
 584         const char *ddha_name;
 585 } dsl_destroy_head_arg_t;
 586 
 587 int
 588 dsl_destroy_head_check_impl(dsl_dataset_t *ds, int expected_holds)
 589 {
 590         int error;
 591         uint64_t count;
 592         objset_t *mos;
 593 
 594         if (dsl_dataset_is_snapshot(ds))
 595                 return (SET_ERROR(EINVAL));
 596 
 597         if (refcount_count(&ds->ds_longholds) != expected_holds)
 598                 return (SET_ERROR(EBUSY));
 599 
 600         mos = ds->ds_dir->dd_pool->dp_meta_objset;
 601 
 602         /*
 603          * Can't delete a head dataset if there are snapshots of it.
 604          * (Except if the only snapshots are from the branch we cloned
 605          * from.)
 606          */
 607         if (ds->ds_prev != NULL &&
 608             ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
 609                 return (SET_ERROR(EBUSY));
 610 
 611         /*
 612          * Can't delete if there are children of this fs.
 613          */
 614         error = zap_count(mos,
 615             ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
 616         if (error != 0)
 617                 return (error);
 618         if (count != 0)
 619                 return (SET_ERROR(EEXIST));
 620 
 621         if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev) &&
 622             ds->ds_prev->ds_phys->ds_num_children == 2 &&
 623             ds->ds_prev->ds_userrefs == 0) {
 624                 /* We need to remove the origin snapshot as well. */
 625                 if (!refcount_is_zero(&ds->ds_prev->ds_longholds))
 626                         return (SET_ERROR(EBUSY));
 627         }
 628         return (0);
 629 }
 630 
 631 static int
 632 dsl_destroy_head_check(void *arg, dmu_tx_t *tx)
 633 {
 634         dsl_destroy_head_arg_t *ddha = arg;
 635         dsl_pool_t *dp = dmu_tx_pool(tx);
 636         dsl_dataset_t *ds;
 637         int error;
 638 
 639         error = dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds);
 640         if (error != 0)
 641                 return (error);
 642 
 643         error = dsl_destroy_head_check_impl(ds, 0);
 644         dsl_dataset_rele(ds, FTAG);
 645         return (error);
 646 }
 647 
 648 static void
 649 dsl_dir_destroy_sync(uint64_t ddobj, dmu_tx_t *tx)
 650 {
 651         dsl_dir_t *dd;
 652         dsl_pool_t *dp = dmu_tx_pool(tx);
 653         objset_t *mos = dp->dp_meta_objset;
 654         dd_used_t t;
 655 
 656         ASSERT(RRW_WRITE_HELD(&dmu_tx_pool(tx)->dp_config_rwlock));
 657 
 658         VERIFY0(dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd));
 659 
 660         ASSERT0(dd->dd_phys->dd_head_dataset_obj);
 661 
 662         /*
 663          * Remove our reservation. The impl() routine avoids setting the
 664          * actual property, which would require the (already destroyed) ds.
 665          */
 666         dsl_dir_set_reservation_sync_impl(dd, 0, tx);
 667 
 668         ASSERT0(dd->dd_phys->dd_used_bytes);
 669         ASSERT0(dd->dd_phys->dd_reserved);
 670         for (t = 0; t < DD_USED_NUM; t++)
 671                 ASSERT0(dd->dd_phys->dd_used_breakdown[t]);
 672 
 673         VERIFY0(zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
 674         VERIFY0(zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
 675         VERIFY0(dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
 676         VERIFY0(zap_remove(mos,
 677             dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
 678 
 679         dsl_dir_rele(dd, FTAG);
 680         VERIFY0(dmu_object_free(mos, ddobj, tx));
 681 }
 682 
 683 void
 684 dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
 685 {
 686         dsl_pool_t *dp = dmu_tx_pool(tx);
 687         objset_t *mos = dp->dp_meta_objset;
 688         uint64_t obj, ddobj, prevobj = 0;
 689         boolean_t rmorigin;
 690 
 691         ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
 692         ASSERT(ds->ds_prev == NULL ||
 693             ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
 694         ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
 695         ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
 696 
 697         /* We need to log before removing it from the namespace. */
 698         spa_history_log_internal_ds(ds, "destroy", tx, "");
 699 
 700         rmorigin = (dsl_dir_is_clone(ds->ds_dir) &&
 701             DS_IS_DEFER_DESTROY(ds->ds_prev) &&
 702             ds->ds_prev->ds_phys->ds_num_children == 2 &&
 703             ds->ds_prev->ds_userrefs == 0);
 704 
 705         /* Remove our reservation */
 706         if (ds->ds_reserved != 0) {
 707                 dsl_dataset_set_refreservation_sync_impl(ds,
 708                     (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
 709                     0, tx);
 710                 ASSERT0(ds->ds_reserved);
 711         }
 712 
 713         dsl_scan_ds_destroyed(ds, tx);
 714 
 715         obj = ds->ds_object;
 716 
 717         if (ds->ds_phys->ds_prev_snap_obj != 0) {
 718                 /* This is a clone */
 719                 ASSERT(ds->ds_prev != NULL);
 720                 ASSERT3U(ds->ds_prev->ds_phys->ds_next_snap_obj, !=, obj);
 721                 ASSERT0(ds->ds_phys->ds_next_snap_obj);
 722 
 723                 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
 724                 if (ds->ds_prev->ds_phys->ds_next_clones_obj != 0) {
 725                         dsl_dataset_remove_from_next_clones(ds->ds_prev,
 726                             obj, tx);
 727                 }
 728 
 729                 ASSERT3U(ds->ds_prev->ds_phys->ds_num_children, >, 1);
 730                 ds->ds_prev->ds_phys->ds_num_children--;
 731         }
 732 
 733         zfeature_info_t *async_destroy =
 734             &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
 735         objset_t *os;
 736 
 737         /*
 738          * Destroy the deadlist.  Unless it's a clone, the
 739          * deadlist should be empty.  (If it's a clone, it's
 740          * safe to ignore the deadlist contents.)
 741          */
 742         dsl_deadlist_close(&ds->ds_deadlist);
 743         dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
 744         dmu_buf_will_dirty(ds->ds_dbuf, tx);
 745         ds->ds_phys->ds_deadlist_obj = 0;
 746 
 747         VERIFY0(dmu_objset_from_ds(ds, &os));
 748 
 749         if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
 750                 old_synchronous_dataset_destroy(ds, tx);
 751         } else {
 752                 /*
 753                  * Move the bptree into the pool's list of trees to
 754                  * clean up and update space accounting information.
 755                  */
 756                 uint64_t used, comp, uncomp;
 757 
 758                 zil_destroy_sync(dmu_objset_zil(os), tx);
 759 
 760                 if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
 761                         dsl_scan_t *scn = dp->dp_scan;
 762 
 763                         spa_feature_incr(dp->dp_spa, async_destroy, tx);
 764                         dp->dp_bptree_obj = bptree_alloc(mos, tx);
 765                         VERIFY0(zap_add(mos,
 766                             DMU_POOL_DIRECTORY_OBJECT,
 767                             DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
 768                             &dp->dp_bptree_obj, tx));
 769                         ASSERT(!scn->scn_async_destroying);
 770                         scn->scn_async_destroying = B_TRUE;
 771                 }
 772 
 773                 used = ds->ds_dir->dd_phys->dd_used_bytes;
 774                 comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
 775                 uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
 776 
 777                 ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
 778                     ds->ds_phys->ds_unique_bytes == used);
 779 
 780                 bptree_add(mos, dp->dp_bptree_obj,
 781                     &ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
 782                     used, comp, uncomp, tx);
 783                 dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
 784                     -used, -comp, -uncomp, tx);
 785                 dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
 786                     used, comp, uncomp, tx);
 787         }
 788 
 789         if (ds->ds_prev != NULL) {
 790                 if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
 791                         VERIFY0(zap_remove_int(mos,
 792                             ds->ds_prev->ds_dir->dd_phys->dd_clones,
 793                             ds->ds_object, tx));
 794                 }
 795                 prevobj = ds->ds_prev->ds_object;
 796                 dsl_dataset_rele(ds->ds_prev, ds);
 797                 ds->ds_prev = NULL;
 798         }
 799 
 800         /*
 801          * This must be done after the dsl_traverse(), because it will
 802          * re-open the objset.
 803          */
 804         if (ds->ds_objset) {
 805                 dmu_objset_evict(ds->ds_objset);
 806                 ds->ds_objset = NULL;
 807         }
 808 
 809         /* Erase the link in the dir */
 810         dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
 811         ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
 812         ddobj = ds->ds_dir->dd_object;
 813         ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
 814         VERIFY0(zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx));
 815 
 816         spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
 817 
 818         ASSERT0(ds->ds_phys->ds_next_clones_obj);
 819         ASSERT0(ds->ds_phys->ds_props_obj);
 820         ASSERT0(ds->ds_phys->ds_userrefs_obj);
 821         dsl_dir_rele(ds->ds_dir, ds);
 822         ds->ds_dir = NULL;
 823         VERIFY0(dmu_object_free(mos, obj, tx));
 824 
 825         dsl_dir_destroy_sync(ddobj, tx);
 826 
 827         if (rmorigin) {
 828                 dsl_dataset_t *prev;
 829                 VERIFY0(dsl_dataset_hold_obj(dp, prevobj, FTAG, &prev));
 830                 dsl_destroy_snapshot_sync_impl(prev, B_FALSE, tx);
 831                 dsl_dataset_rele(prev, FTAG);
 832         }
 833 }
 834 
 835 static void
 836 dsl_destroy_head_sync(void *arg, dmu_tx_t *tx)
 837 {
 838         dsl_destroy_head_arg_t *ddha = arg;
 839         dsl_pool_t *dp = dmu_tx_pool(tx);
 840         dsl_dataset_t *ds;
 841 
 842         VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
 843         dsl_destroy_head_sync_impl(ds, tx);
 844         dsl_dataset_rele(ds, FTAG);
 845 }
 846 
 847 static void
 848 dsl_destroy_head_begin_sync(void *arg, dmu_tx_t *tx)
 849 {
 850         dsl_destroy_head_arg_t *ddha = arg;
 851         dsl_pool_t *dp = dmu_tx_pool(tx);
 852         dsl_dataset_t *ds;
 853 
 854         VERIFY0(dsl_dataset_hold(dp, ddha->ddha_name, FTAG, &ds));
 855 
 856         /* Mark it as inconsistent on-disk, in case we crash */
 857         dmu_buf_will_dirty(ds->ds_dbuf, tx);
 858         ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
 859 
 860         spa_history_log_internal_ds(ds, "destroy begin", tx, "");
 861         dsl_dataset_rele(ds, FTAG);
 862 }
 863 
 864 int
 865 dsl_destroy_head(const char *name)
 866 {
 867         dsl_destroy_head_arg_t ddha;
 868         int error;
 869         spa_t *spa;
 870         boolean_t isenabled;
 871 
 872 #ifdef _KERNEL
 873         zfs_destroy_unmount_origin(name);
 874 #endif
 875 
 876         error = spa_open(name, &spa, FTAG);
 877         if (error != 0)
 878                 return (error);
 879         isenabled = spa_feature_is_enabled(spa,
 880             &spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]);
 881         spa_close(spa, FTAG);
 882 
 883         ddha.ddha_name = name;
 884 
 885         if (!isenabled) {
 886                 objset_t *os;
 887 
 888                 error = dsl_sync_task(name, dsl_destroy_head_check,
 889                     dsl_destroy_head_begin_sync, &ddha, 0);
 890                 if (error != 0)
 891                         return (error);
 892 
 893                 /*
 894                  * Head deletion is processed in one txg on old pools;
 895                  * remove the objects from open context so that the txg sync
 896                  * is not too long.
 897                  */
 898                 error = dmu_objset_own(name, DMU_OST_ANY, B_FALSE, FTAG, &os);
 899                 if (error == 0) {
 900                         uint64_t prev_snap_txg =
 901                             dmu_objset_ds(os)->ds_phys->ds_prev_snap_txg;
 902                         for (uint64_t obj = 0; error == 0;
 903                             error = dmu_object_next(os, &obj, FALSE,
 904                             prev_snap_txg))
 905                                 (void) dmu_free_object(os, obj);
 906                         /* sync out all frees */
 907                         txg_wait_synced(dmu_objset_pool(os), 0);
 908                         dmu_objset_disown(os, FTAG);
 909                 }
 910         }
 911 
 912         return (dsl_sync_task(name, dsl_destroy_head_check,
 913             dsl_destroy_head_sync, &ddha, 0));
 914 }
 915 
 916 /*
 917  * Note, this function is used as the callback for dmu_objset_find().  We
 918  * always return 0 so that we will continue to find and process
 919  * inconsistent datasets, even if we encounter an error trying to
 920  * process one of them.
 921  */
 922 /* ARGSUSED */
 923 int
 924 dsl_destroy_inconsistent(const char *dsname, void *arg)
 925 {
 926         objset_t *os;
 927 
 928         if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
 929                 boolean_t inconsistent = DS_IS_INCONSISTENT(dmu_objset_ds(os));
 930                 dmu_objset_rele(os, FTAG);
 931                 if (inconsistent)
 932                         (void) dsl_destroy_head(dsname);
 933         }
 934         return (0);
 935 }