1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013 by Delphix. All rights reserved.
  24  */
  25 
  26 #include <sys/zfs_context.h>
  27 #include <sys/dmu_objset.h>
  28 #include <sys/dmu_traverse.h>
  29 #include <sys/dsl_dataset.h>
  30 #include <sys/dsl_dir.h>
  31 #include <sys/dsl_pool.h>
  32 #include <sys/dnode.h>
  33 #include <sys/spa.h>
  34 #include <sys/zio.h>
  35 #include <sys/dmu_impl.h>
  36 #include <sys/sa.h>
  37 #include <sys/sa_impl.h>
  38 #include <sys/callb.h>
  39 
  40 int zfs_pd_blks_max = 100;
  41 
  42 typedef struct prefetch_data {
  43         kmutex_t pd_mtx;
  44         kcondvar_t pd_cv;
  45         int pd_blks_max;
  46         int pd_blks_fetched;
  47         int pd_flags;
  48         boolean_t pd_cancel;
  49         boolean_t pd_exited;
  50 } prefetch_data_t;
  51 
  52 typedef struct traverse_data {
  53         spa_t *td_spa;
  54         uint64_t td_objset;
  55         blkptr_t *td_rootbp;
  56         uint64_t td_min_txg;
  57         zbookmark_t *td_resume;
  58         int td_flags;
  59         prefetch_data_t *td_pfd;
  60         blkptr_cb_t *td_func;
  61         void *td_arg;
  62 } traverse_data_t;
  63 
  64 static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
  65     uint64_t objset, uint64_t object);
  66 static void prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *,
  67     uint64_t objset, uint64_t object);
  68 
  69 static int
  70 traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
  71 {
  72         traverse_data_t *td = arg;
  73         zbookmark_t zb;
  74 
  75         if (bp->blk_birth == 0)
  76                 return (0);
  77 
  78         if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(td->td_spa))
  79                 return (0);
  80 
  81         SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
  82             bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
  83 
  84         (void) td->td_func(td->td_spa, zilog, bp, &zb, NULL, td->td_arg);
  85 
  86         return (0);
  87 }
  88 
  89 static int
  90 traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
  91 {
  92         traverse_data_t *td = arg;
  93 
  94         if (lrc->lrc_txtype == TX_WRITE) {
  95                 lr_write_t *lr = (lr_write_t *)lrc;
  96                 blkptr_t *bp = &lr->lr_blkptr;
  97                 zbookmark_t zb;
  98 
  99                 if (bp->blk_birth == 0)
 100                         return (0);
 101 
 102                 if (claim_txg == 0 || bp->blk_birth < claim_txg)
 103                         return (0);
 104 
 105                 SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid,
 106                     ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));
 107 
 108                 (void) td->td_func(td->td_spa, zilog, bp, &zb, NULL,
 109                     td->td_arg);
 110         }
 111         return (0);
 112 }
 113 
 114 static void
 115 traverse_zil(traverse_data_t *td, zil_header_t *zh)
 116 {
 117         uint64_t claim_txg = zh->zh_claim_txg;
 118         zilog_t *zilog;
 119 
 120         /*
 121          * We only want to visit blocks that have been claimed but not yet
 122          * replayed; plus, in read-only mode, blocks that are already stable.
 123          */
 124         if (claim_txg == 0 && spa_writeable(td->td_spa))
 125                 return;
 126 
 127         zilog = zil_alloc(spa_get_dsl(td->td_spa)->dp_meta_objset, zh);
 128 
 129         (void) zil_parse(zilog, traverse_zil_block, traverse_zil_record, td,
 130             claim_txg);
 131 
 132         zil_free(zilog);
 133 }
 134 
 135 typedef enum resume_skip {
 136         RESUME_SKIP_ALL,
 137         RESUME_SKIP_NONE,
 138         RESUME_SKIP_CHILDREN
 139 } resume_skip_t;
 140 
 141 /*
 142  * Returns RESUME_SKIP_ALL if td indicates that we are resuming a traversal and
 143  * the block indicated by zb does not need to be visited at all. Returns
 144  * RESUME_SKIP_CHILDREN if we are resuming a post traversal and we reach the
 145  * resume point. This indicates that this block should be visited but not its
 146  * children (since they must have been visited in a previous traversal).
 147  * Otherwise returns RESUME_SKIP_NONE.
 148  */
 149 static resume_skip_t
 150 resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp,
 151     const zbookmark_t *zb)
 152 {
 153         if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume)) {
 154                 /*
 155                  * If we already visited this bp & everything below,
 156                  * don't bother doing it again.
 157                  */
 158                 if (zbookmark_is_before(dnp, zb, td->td_resume))
 159                         return (RESUME_SKIP_ALL);
 160 
 161                 /*
 162                  * If we found the block we're trying to resume from, zero
 163                  * the bookmark out to indicate that we have resumed.
 164                  */
 165                 ASSERT3U(zb->zb_object, <=, td->td_resume->zb_object);
 166                 if (bcmp(zb, td->td_resume, sizeof (*zb)) == 0) {
 167                         bzero(td->td_resume, sizeof (*zb));
 168                         if (td->td_flags & TRAVERSE_POST)
 169                                 return (RESUME_SKIP_CHILDREN);
 170                 }
 171         }
 172         return (RESUME_SKIP_NONE);
 173 }
 174 
 175 static void
 176 traverse_pause(traverse_data_t *td, const zbookmark_t *zb)
 177 {
 178         ASSERT(td->td_resume != NULL);
 179         ASSERT0(zb->zb_level);
 180         bcopy(zb, td->td_resume, sizeof (*td->td_resume));
 181 }
 182 
 183 static void
 184 traverse_prefetch_metadata(traverse_data_t *td,
 185     const blkptr_t *bp, const zbookmark_t *zb)
 186 {
 187         uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;
 188 
 189         if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
 190                 return;
 191         /*
 192          * If we are in the process of resuming, don't prefetch, because
 193          * some children will not be needed (and in fact may have already
 194          * been freed).
 195          */
 196         if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume))
 197                 return;
 198         if (BP_IS_HOLE(bp) || bp->blk_birth <= td->td_min_txg)
 199                 return;
 200         if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)
 201                 return;
 202 
 203         (void) arc_read(NULL, td->td_spa, bp, NULL, NULL,
 204             ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
 205 }
 206 
 207 static int
 208 traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
 209     const blkptr_t *bp, const zbookmark_t *zb)
 210 {
 211         zbookmark_t czb;
 212         int err = 0, lasterr = 0;
 213         arc_buf_t *buf = NULL;
 214         prefetch_data_t *pd = td->td_pfd;
 215         boolean_t hard = td->td_flags & TRAVERSE_HARD;
 216         boolean_t pause = B_FALSE;
 217 
 218         switch (resume_skip_check(td, dnp, zb)) {
 219         case RESUME_SKIP_ALL:
 220                 return (0);
 221         case RESUME_SKIP_CHILDREN:
 222                 goto post;
 223         case RESUME_SKIP_NONE:
 224                 break;
 225         default:
 226                 ASSERT(0);
 227         }
 228 
 229         if (BP_IS_HOLE(bp)) {
 230                 err = td->td_func(td->td_spa, NULL, NULL, zb, dnp, td->td_arg);
 231                 return (err);
 232         }
 233 
 234         if (bp->blk_birth <= td->td_min_txg)
 235                 return (0);
 236 
 237         if (pd && !pd->pd_exited &&
 238             ((pd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
 239             BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0)) {
 240                 mutex_enter(&pd->pd_mtx);
 241                 ASSERT(pd->pd_blks_fetched >= 0);
 242                 while (pd->pd_blks_fetched == 0 && !pd->pd_exited)
 243                         cv_wait(&pd->pd_cv, &pd->pd_mtx);
 244                 pd->pd_blks_fetched--;
 245                 cv_broadcast(&pd->pd_cv);
 246                 mutex_exit(&pd->pd_mtx);
 247         }
 248 
 249         if (td->td_flags & TRAVERSE_PRE) {
 250                 err = td->td_func(td->td_spa, NULL, bp, zb, dnp,
 251                     td->td_arg);
 252                 if (err == TRAVERSE_VISIT_NO_CHILDREN)
 253                         return (0);
 254                 if (err == ERESTART)
 255                         pause = B_TRUE; /* handle pausing at a common point */
 256                 if (err != 0)
 257                         goto post;
 258         }
 259 
 260         if (BP_GET_LEVEL(bp) > 0) {
 261                 uint32_t flags = ARC_WAIT;
 262                 int i;
 263                 blkptr_t *cbp;
 264                 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
 265 
 266                 err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
 267                     ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
 268                 if (err != 0)
 269                         return (err);
 270                 cbp = buf->b_data;
 271 
 272                 for (i = 0; i < epb; i++) {
 273                         SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
 274                             zb->zb_level - 1,
 275                             zb->zb_blkid * epb + i);
 276                         traverse_prefetch_metadata(td, &cbp[i], &czb);
 277                 }
 278 
 279                 /* recursively visitbp() blocks below this */
 280                 for (i = 0; i < epb; i++) {
 281                         SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
 282                             zb->zb_level - 1,
 283                             zb->zb_blkid * epb + i);
 284                         err = traverse_visitbp(td, dnp, &cbp[i], &czb);
 285                         if (err != 0) {
 286                                 if (!hard)
 287                                         break;
 288                                 lasterr = err;
 289                         }
 290                 }
 291         } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
 292                 uint32_t flags = ARC_WAIT;
 293                 int i;
 294                 int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
 295 
 296                 err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
 297                     ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
 298                 if (err != 0)
 299                         return (err);
 300                 dnp = buf->b_data;
 301 
 302                 for (i = 0; i < epb; i++) {
 303                         prefetch_dnode_metadata(td, &dnp[i], zb->zb_objset,
 304                             zb->zb_blkid * epb + i);
 305                 }
 306 
 307                 /* recursively visitbp() blocks below this */
 308                 for (i = 0; i < epb; i++) {
 309                         err = traverse_dnode(td, &dnp[i], zb->zb_objset,
 310                             zb->zb_blkid * epb + i);
 311                         if (err != 0) {
 312                                 if (!hard)
 313                                         break;
 314                                 lasterr = err;
 315                         }
 316                 }
 317         } else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
 318                 uint32_t flags = ARC_WAIT;
 319                 objset_phys_t *osp;
 320                 dnode_phys_t *dnp;
 321 
 322                 err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
 323                     ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
 324                 if (err != 0)
 325                         return (err);
 326 
 327                 osp = buf->b_data;
 328                 dnp = &osp->os_meta_dnode;
 329                 prefetch_dnode_metadata(td, dnp, zb->zb_objset,
 330                     DMU_META_DNODE_OBJECT);
 331                 if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
 332                         prefetch_dnode_metadata(td, &osp->os_userused_dnode,
 333                             zb->zb_objset, DMU_USERUSED_OBJECT);
 334                         prefetch_dnode_metadata(td, &osp->os_groupused_dnode,
 335                             zb->zb_objset, DMU_USERUSED_OBJECT);
 336                 }
 337 
 338                 err = traverse_dnode(td, dnp, zb->zb_objset,
 339                     DMU_META_DNODE_OBJECT);
 340                 if (err && hard) {
 341                         lasterr = err;
 342                         err = 0;
 343                 }
 344                 if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
 345                         dnp = &osp->os_userused_dnode;
 346                         err = traverse_dnode(td, dnp, zb->zb_objset,
 347                             DMU_USERUSED_OBJECT);
 348                 }
 349                 if (err && hard) {
 350                         lasterr = err;
 351                         err = 0;
 352                 }
 353                 if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
 354                         dnp = &osp->os_groupused_dnode;
 355                         err = traverse_dnode(td, dnp, zb->zb_objset,
 356                             DMU_GROUPUSED_OBJECT);
 357                 }
 358         }
 359 
 360         if (buf)
 361                 (void) arc_buf_remove_ref(buf, &buf);
 362 
 363 post:
 364         if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
 365                 err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
 366                 if (err == ERESTART)
 367                         pause = B_TRUE;
 368         }
 369 
 370         if (pause && td->td_resume != NULL) {
 371                 ASSERT3U(err, ==, ERESTART);
 372                 ASSERT(!hard);
 373                 traverse_pause(td, zb);
 374         }
 375 
 376         return (err != 0 ? err : lasterr);
 377 }
 378 
 379 static void
 380 prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *dnp,
 381     uint64_t objset, uint64_t object)
 382 {
 383         int j;
 384         zbookmark_t czb;
 385 
 386         for (j = 0; j < dnp->dn_nblkptr; j++) {
 387                 SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
 388                 traverse_prefetch_metadata(td, &dnp->dn_blkptr[j], &czb);
 389         }
 390 
 391         if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
 392                 SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
 393                 traverse_prefetch_metadata(td, &dnp->dn_spill, &czb);
 394         }
 395 }
 396 
 397 static int
 398 traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
 399     uint64_t objset, uint64_t object)
 400 {
 401         int j, err = 0, lasterr = 0;
 402         zbookmark_t czb;
 403         boolean_t hard = (td->td_flags & TRAVERSE_HARD);
 404 
 405         for (j = 0; j < dnp->dn_nblkptr; j++) {
 406                 SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
 407                 err = traverse_visitbp(td, dnp, &dnp->dn_blkptr[j], &czb);
 408                 if (err != 0) {
 409                         if (!hard)
 410                                 break;
 411                         lasterr = err;
 412                 }
 413         }
 414 
 415         if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
 416                 SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
 417                 err = traverse_visitbp(td, dnp, &dnp->dn_spill, &czb);
 418                 if (err != 0) {
 419                         if (!hard)
 420                                 return (err);
 421                         lasterr = err;
 422                 }
 423         }
 424         return (err != 0 ? err : lasterr);
 425 }
 426 
 427 /* ARGSUSED */
 428 static int
 429 traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
 430     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 431 {
 432         prefetch_data_t *pfd = arg;
 433         uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
 434 
 435         ASSERT(pfd->pd_blks_fetched >= 0);
 436         if (pfd->pd_cancel)
 437                 return (SET_ERROR(EINTR));
 438 
 439         if (bp == NULL || !((pfd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
 440             BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0) ||
 441             BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG)
 442                 return (0);
 443 
 444         mutex_enter(&pfd->pd_mtx);
 445         while (!pfd->pd_cancel && pfd->pd_blks_fetched >= pfd->pd_blks_max)
 446                 cv_wait(&pfd->pd_cv, &pfd->pd_mtx);
 447         pfd->pd_blks_fetched++;
 448         cv_broadcast(&pfd->pd_cv);
 449         mutex_exit(&pfd->pd_mtx);
 450 
 451         (void) arc_read(NULL, spa, bp, NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
 452             ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &aflags, zb);
 453 
 454         return (0);
 455 }
 456 
 457 static void
 458 traverse_prefetch_thread(void *arg)
 459 {
 460         traverse_data_t *td_main = arg;
 461         traverse_data_t td = *td_main;
 462         zbookmark_t czb;
 463 
 464         td.td_func = traverse_prefetcher;
 465         td.td_arg = td_main->td_pfd;
 466         td.td_pfd = NULL;
 467 
 468         SET_BOOKMARK(&czb, td.td_objset,
 469             ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 470         (void) traverse_visitbp(&td, NULL, td.td_rootbp, &czb);
 471 
 472         mutex_enter(&td_main->td_pfd->pd_mtx);
 473         td_main->td_pfd->pd_exited = B_TRUE;
 474         cv_broadcast(&td_main->td_pfd->pd_cv);
 475         mutex_exit(&td_main->td_pfd->pd_mtx);
 476 }
 477 
 478 /*
 479  * NB: dataset must not be changing on-disk (eg, is a snapshot or we are
 480  * in syncing context).
 481  */
 482 static int
 483 traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
 484     uint64_t txg_start, zbookmark_t *resume, int flags,
 485     blkptr_cb_t func, void *arg)
 486 {
 487         traverse_data_t td;
 488         prefetch_data_t pd = { 0 };
 489         zbookmark_t czb;
 490         int err;
 491 
 492         ASSERT(ds == NULL || objset == ds->ds_object);
 493         ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST));
 494 
 495         /*
 496          * The data prefetching mechanism (the prefetch thread) is incompatible
 497          * with resuming from a bookmark.
 498          */
 499         ASSERT(resume == NULL || !(flags & TRAVERSE_PREFETCH_DATA));
 500 
 501         td.td_spa = spa;
 502         td.td_objset = objset;
 503         td.td_rootbp = rootbp;
 504         td.td_min_txg = txg_start;
 505         td.td_resume = resume;
 506         td.td_func = func;
 507         td.td_arg = arg;
 508         td.td_pfd = &pd;
 509         td.td_flags = flags;
 510 
 511         pd.pd_blks_max = zfs_pd_blks_max;
 512         pd.pd_flags = flags;
 513         mutex_init(&pd.pd_mtx, NULL, MUTEX_DEFAULT, NULL);
 514         cv_init(&pd.pd_cv, NULL, CV_DEFAULT, NULL);
 515 
 516         /* See comment on ZIL traversal in dsl_scan_visitds. */
 517         if (ds != NULL && !dsl_dataset_is_snapshot(ds) && !BP_IS_HOLE(rootbp)) {
 518                 uint32_t flags = ARC_WAIT;
 519                 objset_phys_t *osp;
 520                 arc_buf_t *buf;
 521 
 522                 err = arc_read(NULL, td.td_spa, rootbp,
 523                     arc_getbuf_func, &buf,
 524                     ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, NULL);
 525                 if (err != 0)
 526                         return (err);
 527 
 528                 osp = buf->b_data;
 529                 traverse_zil(&td, &osp->os_zil_header);
 530                 (void) arc_buf_remove_ref(buf, &buf);
 531         }
 532 
 533         if (!(flags & TRAVERSE_PREFETCH_DATA) ||
 534             0 == taskq_dispatch(system_taskq, traverse_prefetch_thread,
 535             &td, TQ_NOQUEUE))
 536                 pd.pd_exited = B_TRUE;
 537 
 538         SET_BOOKMARK(&czb, td.td_objset,
 539             ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 540         err = traverse_visitbp(&td, NULL, rootbp, &czb);
 541 
 542         mutex_enter(&pd.pd_mtx);
 543         pd.pd_cancel = B_TRUE;
 544         cv_broadcast(&pd.pd_cv);
 545         while (!pd.pd_exited)
 546                 cv_wait(&pd.pd_cv, &pd.pd_mtx);
 547         mutex_exit(&pd.pd_mtx);
 548 
 549         mutex_destroy(&pd.pd_mtx);
 550         cv_destroy(&pd.pd_cv);
 551 
 552         return (err);
 553 }
 554 
 555 /*
 556  * NB: dataset must not be changing on-disk (eg, is a snapshot or we are
 557  * in syncing context).
 558  */
 559 int
 560 traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
 561     blkptr_cb_t func, void *arg)
 562 {
 563         return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object,
 564             &ds->ds_phys->ds_bp, txg_start, NULL, flags, func, arg));
 565 }
 566 
 567 int
 568 traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
 569     uint64_t txg_start, zbookmark_t *resume, int flags,
 570     blkptr_cb_t func, void *arg)
 571 {
 572         return (traverse_impl(spa, NULL, ZB_DESTROYED_OBJSET,
 573             blkptr, txg_start, resume, flags, func, arg));
 574 }
 575 
 576 /*
 577  * NB: pool must not be changing on-disk (eg, from zdb or sync context).
 578  */
 579 int
 580 traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
 581     blkptr_cb_t func, void *arg)
 582 {
 583         int err, lasterr = 0;
 584         uint64_t obj;
 585         dsl_pool_t *dp = spa_get_dsl(spa);
 586         objset_t *mos = dp->dp_meta_objset;
 587         boolean_t hard = (flags & TRAVERSE_HARD);
 588 
 589         /* visit the MOS */
 590         err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa),
 591             txg_start, NULL, flags, func, arg);
 592         if (err != 0)
 593                 return (err);
 594 
 595         /* visit each dataset */
 596         for (obj = 1; err == 0 || (err != ESRCH && hard);
 597             err = dmu_object_next(mos, &obj, FALSE, txg_start)) {
 598                 dmu_object_info_t doi;
 599 
 600                 err = dmu_object_info(mos, obj, &doi);
 601                 if (err != 0) {
 602                         if (!hard)
 603                                 return (err);
 604                         lasterr = err;
 605                         continue;
 606                 }
 607 
 608                 if (doi.doi_bonus_type == DMU_OT_DSL_DATASET) {
 609                         dsl_dataset_t *ds;
 610                         uint64_t txg = txg_start;
 611 
 612                         dsl_pool_config_enter(dp, FTAG);
 613                         err = dsl_dataset_hold_obj(dp, obj, FTAG, &ds);
 614                         dsl_pool_config_exit(dp, FTAG);
 615                         if (err != 0) {
 616                                 if (!hard)
 617                                         return (err);
 618                                 lasterr = err;
 619                                 continue;
 620                         }
 621                         if (ds->ds_phys->ds_prev_snap_txg > txg)
 622                                 txg = ds->ds_phys->ds_prev_snap_txg;
 623                         err = traverse_dataset(ds, txg, flags, func, arg);
 624                         dsl_dataset_rele(ds, FTAG);
 625                         if (err != 0) {
 626                                 if (!hard)
 627                                         return (err);
 628                                 lasterr = err;
 629                         }
 630                 }
 631         }
 632         if (err == ESRCH)
 633                 err = 0;
 634         return (err != 0 ? err : lasterr);
 635 }