1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  25  * Copyright (c) 2014 Integros [integros.com]
  26  * Copyright 2017 Nexenta Systems, Inc.
  27  */
  28 
  29 #include <stdio.h>
  30 #include <unistd.h>
  31 #include <stdio_ext.h>
  32 #include <stdlib.h>
  33 #include <ctype.h>
  34 #include <sys/zfs_context.h>
  35 #include <sys/spa.h>
  36 #include <sys/spa_impl.h>
  37 #include <sys/dmu.h>
  38 #include <sys/zap.h>
  39 #include <sys/fs/zfs.h>
  40 #include <sys/zfs_znode.h>
  41 #include <sys/zfs_sa.h>
  42 #include <sys/sa.h>
  43 #include <sys/sa_impl.h>
  44 #include <sys/vdev.h>
  45 #include <sys/vdev_impl.h>
  46 #include <sys/metaslab_impl.h>
  47 #include <sys/dmu_objset.h>
  48 #include <sys/dsl_dir.h>
  49 #include <sys/dsl_dataset.h>
  50 #include <sys/dsl_pool.h>
  51 #include <sys/dbuf.h>
  52 #include <sys/zil.h>
  53 #include <sys/zil_impl.h>
  54 #include <sys/stat.h>
  55 #include <sys/resource.h>
  56 #include <sys/dmu_traverse.h>
  57 #include <sys/zio_checksum.h>
  58 #include <sys/zio_compress.h>
  59 #include <sys/zfs_fuid.h>
  60 #include <sys/arc.h>
  61 #include <sys/ddt.h>
  62 #include <sys/zfeature.h>
  63 #include <sys/abd.h>
  64 #include <sys/blkptr.h>
  65 #include <zfs_comutil.h>
  66 #include <libcmdutils.h>
  67 #undef verify
  68 #include <libzfs.h>
  69 
  70 #include "zdb.h"
  71 
  72 #define ZDB_COMPRESS_NAME(idx) ((idx) < ZIO_COMPRESS_FUNCTIONS ?     \
  73         zio_compress_table[(idx)].ci_name : "UNKNOWN")
  74 #define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ?     \
  75         zio_checksum_table[(idx)].ci_name : "UNKNOWN")
  76 #define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ?  \
  77         dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ?  \
  78         dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN")
  79 #define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) :          \
  80         (((idx) == DMU_OTN_ZAP_DATA || (idx) == DMU_OTN_ZAP_METADATA) ? \
  81         DMU_OT_ZAP_OTHER : DMU_OT_NUMTYPES))
  82 
  83 #ifndef lint
  84 extern int reference_tracking_enable;
  85 extern boolean_t zfs_recover;
  86 extern uint64_t zfs_arc_max, zfs_arc_meta_limit;
  87 extern int zfs_vdev_async_read_max_active;
  88 #else
  89 int reference_tracking_enable;
  90 boolean_t zfs_recover;
  91 uint64_t zfs_arc_max, zfs_arc_meta_limit;
  92 int zfs_vdev_async_read_max_active;
  93 #endif
  94 
  95 static const char cmdname[] = "zdb";
  96 uint8_t dump_opt[256];
  97 
  98 typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);
  99 
 100 uint64_t *zopt_object = NULL;
 101 static unsigned zopt_objects = 0;
 102 libzfs_handle_t *g_zfs;
 103 uint64_t max_inflight = 1000;
 104 
 105 static void snprintf_blkptr_compact(char *, size_t, const blkptr_t *);
 106 
 107 /*
 108  * These libumem hooks provide a reasonable set of defaults for the allocator's
 109  * debugging facilities.
 110  */
 111 const char *
 112 _umem_debug_init()
 113 {
 114         return ("default,verbose"); /* $UMEM_DEBUG setting */
 115 }
 116 
 117 const char *
 118 _umem_logging_init(void)
 119 {
 120         return ("fail,contents"); /* $UMEM_LOGGING setting */
 121 }
 122 
 123 static void
 124 usage(void)
 125 {
 126         (void) fprintf(stderr,
 127             "Usage:\t%s [-AbcdDFGhiLMPsvX] [-e [-V] [-p <path> ...]] "
 128             "[-I <inflight I/Os>]\n"
 129             "\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
 130             "\t\t[<poolname> [<object> ...]]\n"
 131             "\t%s [-AdiPv] [-e [-V] [-p <path> ...]] [-U <cache>] <dataset> "
 132             "[<object> ...]\n"
 133             "\t%s -C [-A] [-U <cache>]\n"
 134             "\t%s -l [-Aqu] <device>\n"
 135             "\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
 136             "[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"
 137             "\t%s -O <dataset> <path>\n"
 138             "\t%s -R [-A] [-e [-V] [-p <path> ...]] [-U <cache>]\n"
 139             "\t\t<poolname> <vdev>:<offset>:<size>[:<flags>]\n"
 140             "\t%s -E [-A] word0:word1:...:word15\n"
 141             "\t%s -S [-AP] [-e [-V] [-p <path> ...]] [-U <cache>] "
 142             "<poolname>\n\n",
 143             cmdname, cmdname, cmdname, cmdname, cmdname, cmdname, cmdname,
 144             cmdname, cmdname);
 145 
 146         (void) fprintf(stderr, "    Dataset name must include at least one "
 147             "separator character '/' or '@'\n");
 148         (void) fprintf(stderr, "    If dataset name is specified, only that "
 149             "dataset is dumped\n");
 150         (void) fprintf(stderr, "    If object numbers are specified, only "
 151             "those objects are dumped\n\n");
 152         (void) fprintf(stderr, "    Options to control amount of output:\n");
 153         (void) fprintf(stderr, "        -b block statistics\n");
 154         (void) fprintf(stderr, "        -c checksum all metadata (twice for "
 155             "all data) blocks\n");
 156         (void) fprintf(stderr, "        -C config (or cachefile if alone)\n");
 157         (void) fprintf(stderr, "        -d dataset(s)\n");
 158         (void) fprintf(stderr, "        -D dedup statistics\n");
 159         (void) fprintf(stderr, "        -E decode and display block from an "
 160             "embedded block pointer\n");
 161         (void) fprintf(stderr, "        -h pool history\n");
 162         (void) fprintf(stderr, "        -i intent logs\n");
 163         (void) fprintf(stderr, "        -l read label contents\n");
 164         (void) fprintf(stderr, "        -L disable leak tracking (do not "
 165             "load spacemaps)\n");
 166         (void) fprintf(stderr, "        -m metaslabs\n");
 167         (void) fprintf(stderr, "        -M metaslab groups\n");
 168         (void) fprintf(stderr, "        -O perform object lookups by path\n");
 169         (void) fprintf(stderr, "        -R read and display block from a "
 170             "device\n");
 171         (void) fprintf(stderr, "        -s report stats on zdb's I/O\n");
 172         (void) fprintf(stderr, "        -S simulate dedup to measure effect\n");
 173         (void) fprintf(stderr, "        -v verbose (applies to all "
 174             "others)\n\n");
 175         (void) fprintf(stderr, "    Below options are intended for use "
 176             "with other options:\n");
 177         (void) fprintf(stderr, "        -A ignore assertions (-A), enable "
 178             "panic recovery (-AA) or both (-AAA)\n");
 179         (void) fprintf(stderr, "        -e pool is exported/destroyed/"
 180             "has altroot/not in a cachefile\n");
 181         (void) fprintf(stderr, "        -F attempt automatic rewind within "
 182             "safe range of transaction groups\n");
 183         (void) fprintf(stderr, "        -G dump zfs_dbgmsg buffer before "
 184             "exiting\n");
 185         (void) fprintf(stderr, "        -I <number of inflight I/Os> -- "
 186             "specify the maximum number of "
 187             "checksumming I/Os [default is 200]\n");
 188         (void) fprintf(stderr, "        -o <variable>=<value> set global "
 189             "variable to an unsigned 32-bit integer value\n");
 190         (void) fprintf(stderr, "        -p <path> -- use one or more with "
 191             "-e to specify path to vdev dir\n");
 192         (void) fprintf(stderr, "        -P print numbers in parseable form\n");
 193         (void) fprintf(stderr, "        -q don't print label contents\n");
 194         (void) fprintf(stderr, "        -t <txg> -- highest txg to use when "
 195             "searching for uberblocks\n");
 196         (void) fprintf(stderr, "        -u uberblock\n");
 197         (void) fprintf(stderr, "        -U <cachefile_path> -- use alternate "
 198             "cachefile\n");
 199         (void) fprintf(stderr, "        -V do verbatim import\n");
 200         (void) fprintf(stderr, "        -x <dumpdir> -- "
 201             "dump all read blocks into specified directory\n");
 202         (void) fprintf(stderr, "        -X attempt extreme rewind (does not "
 203             "work with dataset)\n\n");
 204         (void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
 205             "to make only that option verbose\n");
 206         (void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
 207         exit(1);
 208 }
 209 
 210 static void
 211 dump_debug_buffer()
 212 {
 213         if (dump_opt['G']) {
 214                 (void) printf("\n");
 215                 zfs_dbgmsg_print("zdb");
 216         }
 217 }
 218 
 219 /*
 220  * Called for usage errors that are discovered after a call to spa_open(),
 221  * dmu_bonus_hold(), or pool_match().  abort() is called for other errors.
 222  */
 223 
 224 static void
 225 fatal(const char *fmt, ...)
 226 {
 227         va_list ap;
 228 
 229         va_start(ap, fmt);
 230         (void) fprintf(stderr, "%s: ", cmdname);
 231         (void) vfprintf(stderr, fmt, ap);
 232         va_end(ap);
 233         (void) fprintf(stderr, "\n");
 234 
 235         dump_debug_buffer();
 236 
 237         exit(1);
 238 }
 239 
 240 /* ARGSUSED */
 241 static void
 242 dump_packed_nvlist(objset_t *os, uint64_t object, void *data, size_t size)
 243 {
 244         nvlist_t *nv;
 245         size_t nvsize = *(uint64_t *)data;
 246         char *packed = umem_alloc(nvsize, UMEM_NOFAIL);
 247 
 248         VERIFY(0 == dmu_read(os, object, 0, nvsize, packed, DMU_READ_PREFETCH));
 249 
 250         VERIFY(nvlist_unpack(packed, nvsize, &nv, 0) == 0);
 251 
 252         umem_free(packed, nvsize);
 253 
 254         dump_nvlist(nv, 8);
 255 
 256         nvlist_free(nv);
 257 }
 258 
 259 /* ARGSUSED */
 260 static void
 261 dump_history_offsets(objset_t *os, uint64_t object, void *data, size_t size)
 262 {
 263         spa_history_phys_t *shp = data;
 264 
 265         if (shp == NULL)
 266                 return;
 267 
 268         (void) printf("\t\tpool_create_len = %llu\n",
 269             (u_longlong_t)shp->sh_pool_create_len);
 270         (void) printf("\t\tphys_max_off = %llu\n",
 271             (u_longlong_t)shp->sh_phys_max_off);
 272         (void) printf("\t\tbof = %llu\n",
 273             (u_longlong_t)shp->sh_bof);
 274         (void) printf("\t\teof = %llu\n",
 275             (u_longlong_t)shp->sh_eof);
 276         (void) printf("\t\trecords_lost = %llu\n",
 277             (u_longlong_t)shp->sh_records_lost);
 278 }
 279 
 280 static void
 281 zdb_nicenum(uint64_t num, char *buf, size_t buflen)
 282 {
 283         if (dump_opt['P'])
 284                 (void) snprintf(buf, buflen, "%llu", (longlong_t)num);
 285         else
 286                 nicenum(num, buf, sizeof (buf));
 287 }
 288 
 289 static const char histo_stars[] = "****************************************";
 290 static const uint64_t histo_width = sizeof (histo_stars) - 1;
 291 
 292 static void
 293 dump_histogram(const uint64_t *histo, int size, int offset)
 294 {
 295         int i;
 296         int minidx = size - 1;
 297         int maxidx = 0;
 298         uint64_t max = 0;
 299 
 300         for (i = 0; i < size; i++) {
 301                 if (histo[i] > max)
 302                         max = histo[i];
 303                 if (histo[i] > 0 && i > maxidx)
 304                         maxidx = i;
 305                 if (histo[i] > 0 && i < minidx)
 306                         minidx = i;
 307         }
 308 
 309         if (max < histo_width)
 310                 max = histo_width;
 311 
 312         for (i = minidx; i <= maxidx; i++) {
 313                 (void) printf("\t\t\t%3u: %6llu %s\n",
 314                     i + offset, (u_longlong_t)histo[i],
 315                     &histo_stars[(max - histo[i]) * histo_width / max]);
 316         }
 317 }
 318 
 319 static void
 320 dump_zap_stats(objset_t *os, uint64_t object)
 321 {
 322         int error;
 323         zap_stats_t zs;
 324 
 325         error = zap_get_stats(os, object, &zs);
 326         if (error)
 327                 return;
 328 
 329         if (zs.zs_ptrtbl_len == 0) {
 330                 ASSERT(zs.zs_num_blocks == 1);
 331                 (void) printf("\tmicrozap: %llu bytes, %llu entries\n",
 332                     (u_longlong_t)zs.zs_blocksize,
 333                     (u_longlong_t)zs.zs_num_entries);
 334                 return;
 335         }
 336 
 337         (void) printf("\tFat ZAP stats:\n");
 338 
 339         (void) printf("\t\tPointer table:\n");
 340         (void) printf("\t\t\t%llu elements\n",
 341             (u_longlong_t)zs.zs_ptrtbl_len);
 342         (void) printf("\t\t\tzt_blk: %llu\n",
 343             (u_longlong_t)zs.zs_ptrtbl_zt_blk);
 344         (void) printf("\t\t\tzt_numblks: %llu\n",
 345             (u_longlong_t)zs.zs_ptrtbl_zt_numblks);
 346         (void) printf("\t\t\tzt_shift: %llu\n",
 347             (u_longlong_t)zs.zs_ptrtbl_zt_shift);
 348         (void) printf("\t\t\tzt_blks_copied: %llu\n",
 349             (u_longlong_t)zs.zs_ptrtbl_blks_copied);
 350         (void) printf("\t\t\tzt_nextblk: %llu\n",
 351             (u_longlong_t)zs.zs_ptrtbl_nextblk);
 352 
 353         (void) printf("\t\tZAP entries: %llu\n",
 354             (u_longlong_t)zs.zs_num_entries);
 355         (void) printf("\t\tLeaf blocks: %llu\n",
 356             (u_longlong_t)zs.zs_num_leafs);
 357         (void) printf("\t\tTotal blocks: %llu\n",
 358             (u_longlong_t)zs.zs_num_blocks);
 359         (void) printf("\t\tzap_block_type: 0x%llx\n",
 360             (u_longlong_t)zs.zs_block_type);
 361         (void) printf("\t\tzap_magic: 0x%llx\n",
 362             (u_longlong_t)zs.zs_magic);
 363         (void) printf("\t\tzap_salt: 0x%llx\n",
 364             (u_longlong_t)zs.zs_salt);
 365 
 366         (void) printf("\t\tLeafs with 2^n pointers:\n");
 367         dump_histogram(zs.zs_leafs_with_2n_pointers, ZAP_HISTOGRAM_SIZE, 0);
 368 
 369         (void) printf("\t\tBlocks with n*5 entries:\n");
 370         dump_histogram(zs.zs_blocks_with_n5_entries, ZAP_HISTOGRAM_SIZE, 0);
 371 
 372         (void) printf("\t\tBlocks n/10 full:\n");
 373         dump_histogram(zs.zs_blocks_n_tenths_full, ZAP_HISTOGRAM_SIZE, 0);
 374 
 375         (void) printf("\t\tEntries with n chunks:\n");
 376         dump_histogram(zs.zs_entries_using_n_chunks, ZAP_HISTOGRAM_SIZE, 0);
 377 
 378         (void) printf("\t\tBuckets with n entries:\n");
 379         dump_histogram(zs.zs_buckets_with_n_entries, ZAP_HISTOGRAM_SIZE, 0);
 380 }
 381 
 382 /*ARGSUSED*/
 383 static void
 384 dump_none(objset_t *os, uint64_t object, void *data, size_t size)
 385 {
 386 }
 387 
 388 /*ARGSUSED*/
 389 static void
 390 dump_unknown(objset_t *os, uint64_t object, void *data, size_t size)
 391 {
 392         (void) printf("\tUNKNOWN OBJECT TYPE\n");
 393 }
 394 
 395 /*ARGSUSED*/
 396 static void
 397 dump_uint8(objset_t *os, uint64_t object, void *data, size_t size)
 398 {
 399 }
 400 
 401 /*ARGSUSED*/
 402 static void
 403 dump_uint64(objset_t *os, uint64_t object, void *data, size_t size)
 404 {
 405 }
 406 
 407 /*ARGSUSED*/
 408 static void
 409 dump_zap(objset_t *os, uint64_t object, void *data, size_t size)
 410 {
 411         zap_cursor_t zc;
 412         zap_attribute_t attr;
 413         void *prop;
 414         unsigned i;
 415 
 416         dump_zap_stats(os, object);
 417         (void) printf("\n");
 418 
 419         for (zap_cursor_init(&zc, os, object);
 420             zap_cursor_retrieve(&zc, &attr) == 0;
 421             zap_cursor_advance(&zc)) {
 422                 (void) printf("\t\t%s = ", attr.za_name);
 423                 if (attr.za_num_integers == 0) {
 424                         (void) printf("\n");
 425                         continue;
 426                 }
 427                 prop = umem_zalloc(attr.za_num_integers *
 428                     attr.za_integer_length, UMEM_NOFAIL);
 429                 (void) zap_lookup(os, object, attr.za_name,
 430                     attr.za_integer_length, attr.za_num_integers, prop);
 431                 if (attr.za_integer_length == 1) {
 432                         (void) printf("%s", (char *)prop);
 433                 } else {
 434                         for (i = 0; i < attr.za_num_integers; i++) {
 435                                 switch (attr.za_integer_length) {
 436                                 case 2:
 437                                         (void) printf("%u ",
 438                                             ((uint16_t *)prop)[i]);
 439                                         break;
 440                                 case 4:
 441                                         (void) printf("%u ",
 442                                             ((uint32_t *)prop)[i]);
 443                                         break;
 444                                 case 8:
 445                                         (void) printf("%lld ",
 446                                             (u_longlong_t)((int64_t *)prop)[i]);
 447                                         break;
 448                                 }
 449                         }
 450                 }
 451                 (void) printf("\n");
 452                 umem_free(prop, attr.za_num_integers * attr.za_integer_length);
 453         }
 454         zap_cursor_fini(&zc);
 455 }
 456 
 457 static void
 458 dump_bpobj(objset_t *os, uint64_t object, void *data, size_t size)
 459 {
 460         bpobj_phys_t *bpop = data;
 461         char bytes[32], comp[32], uncomp[32];
 462 
 463         /* make sure the output won't get truncated */
 464         CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
 465         CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
 466         CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
 467 
 468         if (bpop == NULL)
 469                 return;
 470 
 471         zdb_nicenum(bpop->bpo_bytes, bytes, sizeof (bytes));
 472         zdb_nicenum(bpop->bpo_comp, comp, sizeof (comp));
 473         zdb_nicenum(bpop->bpo_uncomp, uncomp, sizeof (uncomp));
 474 
 475         (void) printf("\t\tnum_blkptrs = %llu\n",
 476             (u_longlong_t)bpop->bpo_num_blkptrs);
 477         (void) printf("\t\tbytes = %s\n", bytes);
 478         if (size >= BPOBJ_SIZE_V1) {
 479                 (void) printf("\t\tcomp = %s\n", comp);
 480                 (void) printf("\t\tuncomp = %s\n", uncomp);
 481         }
 482         if (size >= sizeof (*bpop)) {
 483                 (void) printf("\t\tsubobjs = %llu\n",
 484                     (u_longlong_t)bpop->bpo_subobjs);
 485                 (void) printf("\t\tnum_subobjs = %llu\n",
 486                     (u_longlong_t)bpop->bpo_num_subobjs);
 487         }
 488 
 489         if (dump_opt['d'] < 5)
 490                 return;
 491 
 492         for (uint64_t i = 0; i < bpop->bpo_num_blkptrs; i++) {
 493                 char blkbuf[BP_SPRINTF_LEN];
 494                 blkptr_t bp;
 495 
 496                 int err = dmu_read(os, object,
 497                     i * sizeof (bp), sizeof (bp), &bp, 0);
 498                 if (err != 0) {
 499                         (void) printf("got error %u from dmu_read\n", err);
 500                         break;
 501                 }
 502                 snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), &bp);
 503                 (void) printf("\t%s\n", blkbuf);
 504         }
 505 }
 506 
 507 /* ARGSUSED */
 508 static void
 509 dump_bpobj_subobjs(objset_t *os, uint64_t object, void *data, size_t size)
 510 {
 511         dmu_object_info_t doi;
 512 
 513         VERIFY0(dmu_object_info(os, object, &doi));
 514         uint64_t *subobjs = kmem_alloc(doi.doi_max_offset, KM_SLEEP);
 515 
 516         int err = dmu_read(os, object, 0, doi.doi_max_offset, subobjs, 0);
 517         if (err != 0) {
 518                 (void) printf("got error %u from dmu_read\n", err);
 519                 kmem_free(subobjs, doi.doi_max_offset);
 520                 return;
 521         }
 522 
 523         int64_t last_nonzero = -1;
 524         for (uint64_t i = 0; i < doi.doi_max_offset / 8; i++) {
 525                 if (subobjs[i] != 0)
 526                         last_nonzero = i;
 527         }
 528 
 529         for (int64_t i = 0; i <= last_nonzero; i++) {
 530                 (void) printf("\t%llu\n", (longlong_t)subobjs[i]);
 531         }
 532         kmem_free(subobjs, doi.doi_max_offset);
 533 }
 534 
 535 /*ARGSUSED*/
 536 static void
 537 dump_ddt_zap(objset_t *os, uint64_t object, void *data, size_t size)
 538 {
 539         dump_zap_stats(os, object);
 540         /* contents are printed elsewhere, properly decoded */
 541 }
 542 
 543 /*ARGSUSED*/
 544 static void
 545 dump_sa_attrs(objset_t *os, uint64_t object, void *data, size_t size)
 546 {
 547         zap_cursor_t zc;
 548         zap_attribute_t attr;
 549 
 550         dump_zap_stats(os, object);
 551         (void) printf("\n");
 552 
 553         for (zap_cursor_init(&zc, os, object);
 554             zap_cursor_retrieve(&zc, &attr) == 0;
 555             zap_cursor_advance(&zc)) {
 556                 (void) printf("\t\t%s = ", attr.za_name);
 557                 if (attr.za_num_integers == 0) {
 558                         (void) printf("\n");
 559                         continue;
 560                 }
 561                 (void) printf(" %llx : [%d:%d:%d]\n",
 562                     (u_longlong_t)attr.za_first_integer,
 563                     (int)ATTR_LENGTH(attr.za_first_integer),
 564                     (int)ATTR_BSWAP(attr.za_first_integer),
 565                     (int)ATTR_NUM(attr.za_first_integer));
 566         }
 567         zap_cursor_fini(&zc);
 568 }
 569 
 570 /*ARGSUSED*/
 571 static void
 572 dump_sa_layouts(objset_t *os, uint64_t object, void *data, size_t size)
 573 {
 574         zap_cursor_t zc;
 575         zap_attribute_t attr;
 576         uint16_t *layout_attrs;
 577         unsigned i;
 578 
 579         dump_zap_stats(os, object);
 580         (void) printf("\n");
 581 
 582         for (zap_cursor_init(&zc, os, object);
 583             zap_cursor_retrieve(&zc, &attr) == 0;
 584             zap_cursor_advance(&zc)) {
 585                 (void) printf("\t\t%s = [", attr.za_name);
 586                 if (attr.za_num_integers == 0) {
 587                         (void) printf("\n");
 588                         continue;
 589                 }
 590 
 591                 VERIFY(attr.za_integer_length == 2);
 592                 layout_attrs = umem_zalloc(attr.za_num_integers *
 593                     attr.za_integer_length, UMEM_NOFAIL);
 594 
 595                 VERIFY(zap_lookup(os, object, attr.za_name,
 596                     attr.za_integer_length,
 597                     attr.za_num_integers, layout_attrs) == 0);
 598 
 599                 for (i = 0; i != attr.za_num_integers; i++)
 600                         (void) printf(" %d ", (int)layout_attrs[i]);
 601                 (void) printf("]\n");
 602                 umem_free(layout_attrs,
 603                     attr.za_num_integers * attr.za_integer_length);
 604         }
 605         zap_cursor_fini(&zc);
 606 }
 607 
 608 /*ARGSUSED*/
 609 static void
 610 dump_zpldir(objset_t *os, uint64_t object, void *data, size_t size)
 611 {
 612         zap_cursor_t zc;
 613         zap_attribute_t attr;
 614         const char *typenames[] = {
 615                 /* 0 */ "not specified",
 616                 /* 1 */ "FIFO",
 617                 /* 2 */ "Character Device",
 618                 /* 3 */ "3 (invalid)",
 619                 /* 4 */ "Directory",
 620                 /* 5 */ "5 (invalid)",
 621                 /* 6 */ "Block Device",
 622                 /* 7 */ "7 (invalid)",
 623                 /* 8 */ "Regular File",
 624                 /* 9 */ "9 (invalid)",
 625                 /* 10 */ "Symbolic Link",
 626                 /* 11 */ "11 (invalid)",
 627                 /* 12 */ "Socket",
 628                 /* 13 */ "Door",
 629                 /* 14 */ "Event Port",
 630                 /* 15 */ "15 (invalid)",
 631         };
 632 
 633         dump_zap_stats(os, object);
 634         (void) printf("\n");
 635 
 636         for (zap_cursor_init(&zc, os, object);
 637             zap_cursor_retrieve(&zc, &attr) == 0;
 638             zap_cursor_advance(&zc)) {
 639                 (void) printf("\t\t%s = %lld (type: %s)\n",
 640                     attr.za_name, ZFS_DIRENT_OBJ(attr.za_first_integer),
 641                     typenames[ZFS_DIRENT_TYPE(attr.za_first_integer)]);
 642         }
 643         zap_cursor_fini(&zc);
 644 }
 645 
 646 static int
 647 get_dtl_refcount(vdev_t *vd)
 648 {
 649         int refcount = 0;
 650 
 651         if (vd->vdev_ops->vdev_op_leaf) {
 652                 space_map_t *sm = vd->vdev_dtl_sm;
 653 
 654                 if (sm != NULL &&
 655                     sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
 656                         return (1);
 657                 return (0);
 658         }
 659 
 660         for (unsigned c = 0; c < vd->vdev_children; c++)
 661                 refcount += get_dtl_refcount(vd->vdev_child[c]);
 662         return (refcount);
 663 }
 664 
 665 static int
 666 get_metaslab_refcount(vdev_t *vd)
 667 {
 668         int refcount = 0;
 669 
 670         if (vd->vdev_top == vd && !vd->vdev_removing) {
 671                 for (unsigned m = 0; m < vd->vdev_ms_count; m++) {
 672                         space_map_t *sm = vd->vdev_ms[m]->ms_sm;
 673 
 674                         if (sm != NULL &&
 675                             sm->sm_dbuf->db_size == sizeof (space_map_phys_t))
 676                                 refcount++;
 677                 }
 678         }
 679         for (unsigned c = 0; c < vd->vdev_children; c++)
 680                 refcount += get_metaslab_refcount(vd->vdev_child[c]);
 681 
 682         return (refcount);
 683 }
 684 
 685 static int
 686 verify_spacemap_refcounts(spa_t *spa)
 687 {
 688         uint64_t expected_refcount = 0;
 689         uint64_t actual_refcount;
 690 
 691         (void) feature_get_refcount(spa,
 692             &spa_feature_table[SPA_FEATURE_SPACEMAP_HISTOGRAM],
 693             &expected_refcount);
 694         actual_refcount = get_dtl_refcount(spa->spa_root_vdev);
 695         actual_refcount += get_metaslab_refcount(spa->spa_root_vdev);
 696 
 697         if (expected_refcount != actual_refcount) {
 698                 (void) printf("space map refcount mismatch: expected %lld != "
 699                     "actual %lld\n",
 700                     (longlong_t)expected_refcount,
 701                     (longlong_t)actual_refcount);
 702                 return (2);
 703         }
 704         return (0);
 705 }
 706 
 707 static void
 708 dump_spacemap(objset_t *os, space_map_t *sm)
 709 {
 710         uint64_t alloc, offset, entry;
 711         const char *ddata[] = { "ALLOC", "FREE", "CONDENSE", "INVALID",
 712                             "INVALID", "INVALID", "INVALID", "INVALID" };
 713 
 714         if (sm == NULL)
 715                 return;
 716 
 717         /*
 718          * Print out the freelist entries in both encoded and decoded form.
 719          */
 720         alloc = 0;
 721         for (offset = 0; offset < space_map_length(sm);
 722             offset += sizeof (entry)) {
 723                 uint8_t mapshift = sm->sm_shift;
 724 
 725                 VERIFY0(dmu_read(os, space_map_object(sm), offset,
 726                     sizeof (entry), &entry, DMU_READ_PREFETCH));
 727                 if (SM_DEBUG_DECODE(entry)) {
 728 
 729                         (void) printf("\t    [%6llu] %s: txg %llu, pass %llu\n",
 730                             (u_longlong_t)(offset / sizeof (entry)),
 731                             ddata[SM_DEBUG_ACTION_DECODE(entry)],
 732                             (u_longlong_t)SM_DEBUG_TXG_DECODE(entry),
 733                             (u_longlong_t)SM_DEBUG_SYNCPASS_DECODE(entry));
 734                 } else {
 735                         (void) printf("\t    [%6llu]    %c  range:"
 736                             " %010llx-%010llx  size: %06llx\n",
 737                             (u_longlong_t)(offset / sizeof (entry)),
 738                             SM_TYPE_DECODE(entry) == SM_ALLOC ? 'A' : 'F',
 739                             (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
 740                             mapshift) + sm->sm_start),
 741                             (u_longlong_t)((SM_OFFSET_DECODE(entry) <<
 742                             mapshift) + sm->sm_start +
 743                             (SM_RUN_DECODE(entry) << mapshift)),
 744                             (u_longlong_t)(SM_RUN_DECODE(entry) << mapshift));
 745                         if (SM_TYPE_DECODE(entry) == SM_ALLOC)
 746                                 alloc += SM_RUN_DECODE(entry) << mapshift;
 747                         else
 748                                 alloc -= SM_RUN_DECODE(entry) << mapshift;
 749                 }
 750         }
 751         if (alloc != space_map_allocated(sm)) {
 752                 (void) printf("space_map_object alloc (%llu) INCONSISTENT "
 753                     "with space map summary (%llu)\n",
 754                     (u_longlong_t)space_map_allocated(sm), (u_longlong_t)alloc);
 755         }
 756 }
 757 
 758 static void
 759 dump_metaslab_stats(metaslab_t *msp)
 760 {
 761         char maxbuf[32];
 762         range_tree_t *rt = msp->ms_tree;
 763         avl_tree_t *t = &msp->ms_size_tree;
 764         int free_pct = range_tree_space(rt) * 100 / msp->ms_size;
 765 
 766         /* max sure nicenum has enough space */
 767         CTASSERT(sizeof (maxbuf) >= NN_NUMBUF_SZ);
 768 
 769         zdb_nicenum(metaslab_block_maxsize(msp), maxbuf, sizeof (maxbuf));
 770 
 771         (void) printf("\t %25s %10lu   %7s  %6s   %4s %4d%%\n",
 772             "segments", avl_numnodes(t), "maxsize", maxbuf,
 773             "freepct", free_pct);
 774         (void) printf("\tIn-memory histogram:\n");
 775         dump_histogram(rt->rt_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
 776 }
 777 
 778 static void
 779 dump_metaslab(metaslab_t *msp)
 780 {
 781         vdev_t *vd = msp->ms_group->mg_vd;
 782         spa_t *spa = vd->vdev_spa;
 783         space_map_t *sm = msp->ms_sm;
 784         char freebuf[32];
 785 
 786         zdb_nicenum(msp->ms_size - space_map_allocated(sm), freebuf,
 787             sizeof (freebuf));
 788 
 789         (void) printf(
 790             "\tmetaslab %6llu   offset %12llx   spacemap %6llu   free    %5s\n",
 791             (u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
 792             (u_longlong_t)space_map_object(sm), freebuf);
 793 
 794         if (dump_opt['m'] > 2 && !dump_opt['L']) {
 795                 mutex_enter(&msp->ms_lock);
 796                 metaslab_load_wait(msp);
 797                 if (!msp->ms_loaded) {
 798                         VERIFY0(metaslab_load(msp));
 799                         range_tree_stat_verify(msp->ms_tree);
 800                 }
 801                 dump_metaslab_stats(msp);
 802                 metaslab_unload(msp);
 803                 mutex_exit(&msp->ms_lock);
 804         }
 805 
 806         if (dump_opt['m'] > 1 && sm != NULL &&
 807             spa_feature_is_active(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM)) {
 808                 /*
 809                  * The space map histogram represents free space in chunks
 810                  * of sm_shift (i.e. bucket 0 refers to 2^sm_shift).
 811                  */
 812                 (void) printf("\tOn-disk histogram:\t\tfragmentation %llu\n",
 813                     (u_longlong_t)msp->ms_fragmentation);
 814                 dump_histogram(sm->sm_phys->smp_histogram,
 815                     SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
 816         }
 817 
 818         if (dump_opt['d'] > 5 || dump_opt['m'] > 3) {
 819                 ASSERT(msp->ms_size == (1ULL << vd->vdev_ms_shift));
 820 
 821                 mutex_enter(&msp->ms_lock);
 822                 dump_spacemap(spa->spa_meta_objset, msp->ms_sm);
 823                 mutex_exit(&msp->ms_lock);
 824         }
 825 }
 826 
 827 static void
 828 print_vdev_metaslab_header(vdev_t *vd)
 829 {
 830         (void) printf("\tvdev %10llu\n\t%-10s%5llu   %-19s   %-15s   %-10s\n",
 831             (u_longlong_t)vd->vdev_id,
 832             "metaslabs", (u_longlong_t)vd->vdev_ms_count,
 833             "offset", "spacemap", "free");
 834         (void) printf("\t%15s   %19s   %15s   %10s\n",
 835             "---------------", "-------------------",
 836             "---------------", "-------------");
 837 }
 838 
 839 static void
 840 dump_metaslab_groups(spa_t *spa)
 841 {
 842         vdev_t *rvd = spa->spa_root_vdev;
 843         metaslab_class_t *mc = spa_normal_class(spa);
 844         uint64_t fragmentation;
 845 
 846         metaslab_class_histogram_verify(mc);
 847 
 848         for (unsigned c = 0; c < rvd->vdev_children; c++) {
 849                 vdev_t *tvd = rvd->vdev_child[c];
 850                 metaslab_group_t *mg = tvd->vdev_mg;
 851 
 852                 if (mg->mg_class != mc)
 853                         continue;
 854 
 855                 metaslab_group_histogram_verify(mg);
 856                 mg->mg_fragmentation = metaslab_group_fragmentation(mg);
 857 
 858                 (void) printf("\tvdev %10llu\t\tmetaslabs%5llu\t\t"
 859                     "fragmentation",
 860                     (u_longlong_t)tvd->vdev_id,
 861                     (u_longlong_t)tvd->vdev_ms_count);
 862                 if (mg->mg_fragmentation == ZFS_FRAG_INVALID) {
 863                         (void) printf("%3s\n", "-");
 864                 } else {
 865                         (void) printf("%3llu%%\n",
 866                             (u_longlong_t)mg->mg_fragmentation);
 867                 }
 868                 dump_histogram(mg->mg_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
 869         }
 870 
 871         (void) printf("\tpool %s\tfragmentation", spa_name(spa));
 872         fragmentation = metaslab_class_fragmentation(mc);
 873         if (fragmentation == ZFS_FRAG_INVALID)
 874                 (void) printf("\t%3s\n", "-");
 875         else
 876                 (void) printf("\t%3llu%%\n", (u_longlong_t)fragmentation);
 877         dump_histogram(mc->mc_histogram, RANGE_TREE_HISTOGRAM_SIZE, 0);
 878 }
 879 
 880 static void
 881 dump_metaslabs(spa_t *spa)
 882 {
 883         vdev_t *vd, *rvd = spa->spa_root_vdev;
 884         uint64_t m, c = 0, children = rvd->vdev_children;
 885 
 886         (void) printf("\nMetaslabs:\n");
 887 
 888         if (!dump_opt['d'] && zopt_objects > 0) {
 889                 c = zopt_object[0];
 890 
 891                 if (c >= children)
 892                         (void) fatal("bad vdev id: %llu", (u_longlong_t)c);
 893 
 894                 if (zopt_objects > 1) {
 895                         vd = rvd->vdev_child[c];
 896                         print_vdev_metaslab_header(vd);
 897 
 898                         for (m = 1; m < zopt_objects; m++) {
 899                                 if (zopt_object[m] < vd->vdev_ms_count)
 900                                         dump_metaslab(
 901                                             vd->vdev_ms[zopt_object[m]]);
 902                                 else
 903                                         (void) fprintf(stderr, "bad metaslab "
 904                                             "number %llu\n",
 905                                             (u_longlong_t)zopt_object[m]);
 906                         }
 907                         (void) printf("\n");
 908                         return;
 909                 }
 910                 children = c + 1;
 911         }
 912         for (; c < children; c++) {
 913                 vd = rvd->vdev_child[c];
 914                 print_vdev_metaslab_header(vd);
 915 
 916                 for (m = 0; m < vd->vdev_ms_count; m++)
 917                         dump_metaslab(vd->vdev_ms[m]);
 918                 (void) printf("\n");
 919         }
 920 }
 921 
 922 static void
 923 dump_dde(const ddt_t *ddt, const ddt_entry_t *dde, uint64_t index)
 924 {
 925         const ddt_phys_t *ddp = dde->dde_phys;
 926         const ddt_key_t *ddk = &dde->dde_key;
 927         const char *types[4] = { "ditto", "single", "double", "triple" };
 928         char blkbuf[BP_SPRINTF_LEN];
 929         blkptr_t blk;
 930 
 931         for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
 932                 if (ddp->ddp_phys_birth == 0)
 933                         continue;
 934                 ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
 935                 snprintf_blkptr(blkbuf, sizeof (blkbuf), &blk);
 936                 (void) printf("index %llx refcnt %llu %s %s\n",
 937                     (u_longlong_t)index, (u_longlong_t)ddp->ddp_refcnt,
 938                     types[p], blkbuf);
 939         }
 940 }
 941 
 942 static void
 943 dump_dedup_ratio(const ddt_stat_t *dds)
 944 {
 945         double rL, rP, rD, D, dedup, compress, copies;
 946 
 947         if (dds->dds_blocks == 0)
 948                 return;
 949 
 950         rL = (double)dds->dds_ref_lsize;
 951         rP = (double)dds->dds_ref_psize;
 952         rD = (double)dds->dds_ref_dsize;
 953         D = (double)dds->dds_dsize;
 954 
 955         dedup = rD / D;
 956         compress = rL / rP;
 957         copies = rD / rP;
 958 
 959         (void) printf("dedup = %.2f, compress = %.2f, copies = %.2f, "
 960             "dedup * compress / copies = %.2f\n\n",
 961             dedup, compress, copies, dedup * compress / copies);
 962 }
 963 
 964 static void
 965 dump_ddt(ddt_t *ddt, enum ddt_type type, enum ddt_class class)
 966 {
 967         char name[DDT_NAMELEN];
 968         ddt_entry_t dde;
 969         uint64_t walk = 0;
 970         dmu_object_info_t doi;
 971         uint64_t count, dspace, mspace;
 972         int error;
 973 
 974         error = ddt_object_info(ddt, type, class, &doi);
 975 
 976         if (error == ENOENT)
 977                 return;
 978         ASSERT(error == 0);
 979 
 980         if ((count = ddt_object_count(ddt, type, class)) == 0)
 981                 return;
 982 
 983         dspace = doi.doi_physical_blocks_512 << 9;
 984         mspace = doi.doi_fill_count * doi.doi_data_block_size;
 985 
 986         ddt_object_name(ddt, type, class, name);
 987 
 988         (void) printf("%s: %llu entries, size %llu on disk, %llu in core\n",
 989             name,
 990             (u_longlong_t)count,
 991             (u_longlong_t)(dspace / count),
 992             (u_longlong_t)(mspace / count));
 993 
 994         if (dump_opt['D'] < 3)
 995                 return;
 996 
 997         zpool_dump_ddt(NULL, &ddt->ddt_histogram[type][class]);
 998 
 999         if (dump_opt['D'] < 4)
1000                 return;
1001 
1002         if (dump_opt['D'] < 5 && class == DDT_CLASS_UNIQUE)
1003                 return;
1004 
1005         (void) printf("%s contents:\n\n", name);
1006 
1007         while ((error = ddt_object_walk(ddt, type, class, &walk, &dde)) == 0)
1008                 dump_dde(ddt, &dde, walk);
1009 
1010         ASSERT(error == ENOENT);
1011 
1012         (void) printf("\n");
1013 }
1014 
1015 static void
1016 dump_all_ddts(spa_t *spa)
1017 {
1018         ddt_histogram_t ddh_total;
1019         ddt_stat_t dds_total;
1020 
1021         bzero(&ddh_total, sizeof (ddh_total));
1022         bzero(&dds_total, sizeof (dds_total));
1023 
1024         for (enum zio_checksum c = 0; c < ZIO_CHECKSUM_FUNCTIONS; c++) {
1025                 ddt_t *ddt = spa->spa_ddt[c];
1026                 for (enum ddt_type type = 0; type < DDT_TYPES; type++) {
1027                         for (enum ddt_class class = 0; class < DDT_CLASSES;
1028                             class++) {
1029                                 dump_ddt(ddt, type, class);
1030                         }
1031                 }
1032         }
1033 
1034         ddt_get_dedup_stats(spa, &dds_total);
1035 
1036         if (dds_total.dds_blocks == 0) {
1037                 (void) printf("All DDTs are empty\n");
1038                 return;
1039         }
1040 
1041         (void) printf("\n");
1042 
1043         if (dump_opt['D'] > 1) {
1044                 (void) printf("DDT histogram (aggregated over all DDTs):\n");
1045                 ddt_get_dedup_histogram(spa, &ddh_total);
1046                 zpool_dump_ddt(&dds_total, &ddh_total);
1047         }
1048 
1049         dump_dedup_ratio(&dds_total);
1050 }
1051 
1052 static void
1053 dump_dtl_seg(void *arg, uint64_t start, uint64_t size)
1054 {
1055         char *prefix = arg;
1056 
1057         (void) printf("%s [%llu,%llu) length %llu\n",
1058             prefix,
1059             (u_longlong_t)start,
1060             (u_longlong_t)(start + size),
1061             (u_longlong_t)(size));
1062 }
1063 
1064 static void
1065 dump_dtl(vdev_t *vd, int indent)
1066 {
1067         spa_t *spa = vd->vdev_spa;
1068         boolean_t required;
1069         const char *name[DTL_TYPES] = { "missing", "partial", "scrub",
1070                 "outage" };
1071         char prefix[256];
1072 
1073         spa_vdev_state_enter(spa, SCL_NONE);
1074         required = vdev_dtl_required(vd);
1075         (void) spa_vdev_state_exit(spa, NULL, 0);
1076 
1077         if (indent == 0)
1078                 (void) printf("\nDirty time logs:\n\n");
1079 
1080         (void) printf("\t%*s%s [%s]\n", indent, "",
1081             vd->vdev_path ? vd->vdev_path :
1082             vd->vdev_parent ? vd->vdev_ops->vdev_op_type : spa_name(spa),
1083             required ? "DTL-required" : "DTL-expendable");
1084 
1085         for (int t = 0; t < DTL_TYPES; t++) {
1086                 range_tree_t *rt = vd->vdev_dtl[t];
1087                 if (range_tree_space(rt) == 0)
1088                         continue;
1089                 (void) snprintf(prefix, sizeof (prefix), "\t%*s%s",
1090                     indent + 2, "", name[t]);
1091                 mutex_enter(rt->rt_lock);
1092                 range_tree_walk(rt, dump_dtl_seg, prefix);
1093                 mutex_exit(rt->rt_lock);
1094                 if (dump_opt['d'] > 5 && vd->vdev_children == 0)
1095                         dump_spacemap(spa->spa_meta_objset, vd->vdev_dtl_sm);
1096         }
1097 
1098         for (unsigned c = 0; c < vd->vdev_children; c++)
1099                 dump_dtl(vd->vdev_child[c], indent + 4);
1100 }
1101 
1102 static void
1103 dump_history(spa_t *spa)
1104 {
1105         nvlist_t **events = NULL;
1106         uint64_t resid, len, off = 0;
1107         uint_t num = 0;
1108         int error;
1109         time_t tsec;
1110         struct tm t;
1111         char tbuf[30];
1112         char internalstr[MAXPATHLEN];
1113 
1114         char *buf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
1115         do {
1116                 len = SPA_MAXBLOCKSIZE;
1117 
1118                 if ((error = spa_history_get(spa, &off, &len, buf)) != 0) {
1119                         (void) fprintf(stderr, "Unable to read history: "
1120                             "error %d\n", error);
1121                         umem_free(buf, SPA_MAXBLOCKSIZE);
1122                         return;
1123                 }
1124 
1125                 if (zpool_history_unpack(buf, len, &resid, &events, &num) != 0)
1126                         break;
1127 
1128                 off -= resid;
1129         } while (len != 0);
1130         umem_free(buf, SPA_MAXBLOCKSIZE);
1131 
1132         (void) printf("\nHistory:\n");
1133         for (unsigned i = 0; i < num; i++) {
1134                 uint64_t time, txg, ievent;
1135                 char *cmd, *intstr;
1136                 boolean_t printed = B_FALSE;
1137 
1138                 if (nvlist_lookup_uint64(events[i], ZPOOL_HIST_TIME,
1139                     &time) != 0)
1140                         goto next;
1141                 if (nvlist_lookup_string(events[i], ZPOOL_HIST_CMD,
1142                     &cmd) != 0) {
1143                         if (nvlist_lookup_uint64(events[i],
1144                             ZPOOL_HIST_INT_EVENT, &ievent) != 0)
1145                                 goto next;
1146                         verify(nvlist_lookup_uint64(events[i],
1147                             ZPOOL_HIST_TXG, &txg) == 0);
1148                         verify(nvlist_lookup_string(events[i],
1149                             ZPOOL_HIST_INT_STR, &intstr) == 0);
1150                         if (ievent >= ZFS_NUM_LEGACY_HISTORY_EVENTS)
1151                                 goto next;
1152 
1153                         (void) snprintf(internalstr,
1154                             sizeof (internalstr),
1155                             "[internal %s txg:%ju] %s",
1156                             zfs_history_event_names[ievent], (uintmax_t)txg,
1157                             intstr);
1158                         cmd = internalstr;
1159                 }
1160                 tsec = time;
1161                 (void) localtime_r(&tsec, &t);
1162                 (void) strftime(tbuf, sizeof (tbuf), "%F.%T", &t);
1163                 (void) printf("%s %s\n", tbuf, cmd);
1164                 printed = B_TRUE;
1165 
1166 next:
1167                 if (dump_opt['h'] > 1) {
1168                         if (!printed)
1169                                 (void) printf("unrecognized record:\n");
1170                         dump_nvlist(events[i], 2);
1171                 }
1172         }
1173 }
1174 
1175 /*ARGSUSED*/
1176 static void
1177 dump_dnode(objset_t *os, uint64_t object, void *data, size_t size)
1178 {
1179 }
1180 
1181 static uint64_t
1182 blkid2offset(const dnode_phys_t *dnp, const blkptr_t *bp,
1183     const zbookmark_phys_t *zb)
1184 {
1185         if (dnp == NULL) {
1186                 ASSERT(zb->zb_level < 0);
1187                 if (zb->zb_object == 0)
1188                         return (zb->zb_blkid);
1189                 return (zb->zb_blkid * BP_GET_LSIZE(bp));
1190         }
1191 
1192         ASSERT(zb->zb_level >= 0);
1193 
1194         return ((zb->zb_blkid <<
1195             (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT))) *
1196             dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
1197 }
1198 
1199 static void
1200 snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp)
1201 {
1202         const dva_t *dva = bp->blk_dva;
1203         int ndvas = dump_opt['d'] > 5 ? BP_GET_NDVAS(bp) : 1;
1204 
1205         if (dump_opt['b'] >= 6) {
1206                 snprintf_blkptr(blkbuf, buflen, bp);
1207                 return;
1208         }
1209 
1210         if (BP_IS_EMBEDDED(bp)) {
1211                 (void) sprintf(blkbuf,
1212                     "EMBEDDED et=%u %llxL/%llxP B=%llu",
1213                     (int)BPE_GET_ETYPE(bp),
1214                     (u_longlong_t)BPE_GET_LSIZE(bp),
1215                     (u_longlong_t)BPE_GET_PSIZE(bp),
1216                     (u_longlong_t)bp->blk_birth);
1217                 return;
1218         }
1219 
1220         blkbuf[0] = '\0';
1221         for (int i = 0; i < ndvas; i++)
1222                 (void) snprintf(blkbuf + strlen(blkbuf),
1223                     buflen - strlen(blkbuf), "%llu:%llx:%llx ",
1224                     (u_longlong_t)DVA_GET_VDEV(&dva[i]),
1225                     (u_longlong_t)DVA_GET_OFFSET(&dva[i]),
1226                     (u_longlong_t)DVA_GET_ASIZE(&dva[i]));
1227 
1228         if (BP_IS_HOLE(bp)) {
1229                 (void) snprintf(blkbuf + strlen(blkbuf),
1230                     buflen - strlen(blkbuf),
1231                     "%llxL B=%llu",
1232                     (u_longlong_t)BP_GET_LSIZE(bp),
1233                     (u_longlong_t)bp->blk_birth);
1234         } else {
1235                 (void) snprintf(blkbuf + strlen(blkbuf),
1236                     buflen - strlen(blkbuf),
1237                     "%llxL/%llxP F=%llu B=%llu/%llu",
1238                     (u_longlong_t)BP_GET_LSIZE(bp),
1239                     (u_longlong_t)BP_GET_PSIZE(bp),
1240                     (u_longlong_t)BP_GET_FILL(bp),
1241                     (u_longlong_t)bp->blk_birth,
1242                     (u_longlong_t)BP_PHYSICAL_BIRTH(bp));
1243         }
1244 }
1245 
1246 static void
1247 print_indirect(blkptr_t *bp, const zbookmark_phys_t *zb,
1248     const dnode_phys_t *dnp)
1249 {
1250         char blkbuf[BP_SPRINTF_LEN];
1251         int l;
1252 
1253         if (!BP_IS_EMBEDDED(bp)) {
1254                 ASSERT3U(BP_GET_TYPE(bp), ==, dnp->dn_type);
1255                 ASSERT3U(BP_GET_LEVEL(bp), ==, zb->zb_level);
1256         }
1257 
1258         (void) printf("%16llx ", (u_longlong_t)blkid2offset(dnp, bp, zb));
1259 
1260         ASSERT(zb->zb_level >= 0);
1261 
1262         for (l = dnp->dn_nlevels - 1; l >= -1; l--) {
1263                 if (l == zb->zb_level) {
1264                         (void) printf("L%llx", (u_longlong_t)zb->zb_level);
1265                 } else {
1266                         (void) printf(" ");
1267                 }
1268         }
1269 
1270         snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1271         (void) printf("%s\n", blkbuf);
1272 }
1273 
1274 static int
1275 visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
1276     blkptr_t *bp, const zbookmark_phys_t *zb)
1277 {
1278         int err = 0;
1279 
1280         if (bp->blk_birth == 0)
1281                 return (0);
1282 
1283         print_indirect(bp, zb, dnp);
1284 
1285         if (BP_GET_LEVEL(bp) > 0 && !BP_IS_HOLE(bp)) {
1286                 arc_flags_t flags = ARC_FLAG_WAIT;
1287                 int i;
1288                 blkptr_t *cbp;
1289                 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
1290                 arc_buf_t *buf;
1291                 uint64_t fill = 0;
1292 
1293                 err = arc_read(NULL, spa, bp, arc_getbuf_func, &buf,
1294                     ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
1295                 if (err)
1296                         return (err);
1297                 ASSERT(buf->b_data);
1298 
1299                 /* recursively visit blocks below this */
1300                 cbp = buf->b_data;
1301                 for (i = 0; i < epb; i++, cbp++) {
1302                         zbookmark_phys_t czb;
1303 
1304                         SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
1305                             zb->zb_level - 1,
1306                             zb->zb_blkid * epb + i);
1307                         err = visit_indirect(spa, dnp, cbp, &czb);
1308                         if (err)
1309                                 break;
1310                         fill += BP_GET_FILL(cbp);
1311                 }
1312                 if (!err)
1313                         ASSERT3U(fill, ==, BP_GET_FILL(bp));
1314                 arc_buf_destroy(buf, &buf);
1315         }
1316 
1317         return (err);
1318 }
1319 
1320 /*ARGSUSED*/
1321 static void
1322 dump_indirect(dnode_t *dn)
1323 {
1324         dnode_phys_t *dnp = dn->dn_phys;
1325         int j;
1326         zbookmark_phys_t czb;
1327 
1328         (void) printf("Indirect blocks:\n");
1329 
1330         SET_BOOKMARK(&czb, dmu_objset_id(dn->dn_objset),
1331             dn->dn_object, dnp->dn_nlevels - 1, 0);
1332         for (j = 0; j < dnp->dn_nblkptr; j++) {
1333                 czb.zb_blkid = j;
1334                 (void) visit_indirect(dmu_objset_spa(dn->dn_objset), dnp,
1335                     &dnp->dn_blkptr[j], &czb);
1336         }
1337 
1338         (void) printf("\n");
1339 }
1340 
1341 /*ARGSUSED*/
1342 static void
1343 dump_dsl_dir(objset_t *os, uint64_t object, void *data, size_t size)
1344 {
1345         dsl_dir_phys_t *dd = data;
1346         time_t crtime;
1347         char nice[32];
1348 
1349         /* make sure nicenum has enough space */
1350         CTASSERT(sizeof (nice) >= NN_NUMBUF_SZ);
1351 
1352         if (dd == NULL)
1353                 return;
1354 
1355         ASSERT3U(size, >=, sizeof (dsl_dir_phys_t));
1356 
1357         crtime = dd->dd_creation_time;
1358         (void) printf("\t\tcreation_time = %s", ctime(&crtime));
1359         (void) printf("\t\thead_dataset_obj = %llu\n",
1360             (u_longlong_t)dd->dd_head_dataset_obj);
1361         (void) printf("\t\tparent_dir_obj = %llu\n",
1362             (u_longlong_t)dd->dd_parent_obj);
1363         (void) printf("\t\torigin_obj = %llu\n",
1364             (u_longlong_t)dd->dd_origin_obj);
1365         (void) printf("\t\tchild_dir_zapobj = %llu\n",
1366             (u_longlong_t)dd->dd_child_dir_zapobj);
1367         zdb_nicenum(dd->dd_used_bytes, nice, sizeof (nice));
1368         (void) printf("\t\tused_bytes = %s\n", nice);
1369         zdb_nicenum(dd->dd_compressed_bytes, nice, sizeof (nice));
1370         (void) printf("\t\tcompressed_bytes = %s\n", nice);
1371         zdb_nicenum(dd->dd_uncompressed_bytes, nice, sizeof (nice));
1372         (void) printf("\t\tuncompressed_bytes = %s\n", nice);
1373         zdb_nicenum(dd->dd_quota, nice, sizeof (nice));
1374         (void) printf("\t\tquota = %s\n", nice);
1375         zdb_nicenum(dd->dd_reserved, nice, sizeof (nice));
1376         (void) printf("\t\treserved = %s\n", nice);
1377         (void) printf("\t\tprops_zapobj = %llu\n",
1378             (u_longlong_t)dd->dd_props_zapobj);
1379         (void) printf("\t\tdeleg_zapobj = %llu\n",
1380             (u_longlong_t)dd->dd_deleg_zapobj);
1381         (void) printf("\t\tflags = %llx\n",
1382             (u_longlong_t)dd->dd_flags);
1383 
1384 #define DO(which) \
1385         zdb_nicenum(dd->dd_used_breakdown[DD_USED_ ## which], nice, \
1386             sizeof (nice)); \
1387         (void) printf("\t\tused_breakdown[" #which "] = %s\n", nice)
1388         DO(HEAD);
1389         DO(SNAP);
1390         DO(CHILD);
1391         DO(CHILD_RSRV);
1392         DO(REFRSRV);
1393 #undef DO
1394 }
1395 
1396 /*ARGSUSED*/
1397 static void
1398 dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
1399 {
1400         dsl_dataset_phys_t *ds = data;
1401         time_t crtime;
1402         char used[32], compressed[32], uncompressed[32], unique[32];
1403         char blkbuf[BP_SPRINTF_LEN];
1404 
1405         /* make sure nicenum has enough space */
1406         CTASSERT(sizeof (used) >= NN_NUMBUF_SZ);
1407         CTASSERT(sizeof (compressed) >= NN_NUMBUF_SZ);
1408         CTASSERT(sizeof (uncompressed) >= NN_NUMBUF_SZ);
1409         CTASSERT(sizeof (unique) >= NN_NUMBUF_SZ);
1410 
1411         if (ds == NULL)
1412                 return;
1413 
1414         ASSERT(size == sizeof (*ds));
1415         crtime = ds->ds_creation_time;
1416         zdb_nicenum(ds->ds_referenced_bytes, used, sizeof (used));
1417         zdb_nicenum(ds->ds_compressed_bytes, compressed, sizeof (compressed));
1418         zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed,
1419             sizeof (uncompressed));
1420         zdb_nicenum(ds->ds_unique_bytes, unique, sizeof (unique));
1421         snprintf_blkptr(blkbuf, sizeof (blkbuf), &ds->ds_bp);
1422 
1423         (void) printf("\t\tdir_obj = %llu\n",
1424             (u_longlong_t)ds->ds_dir_obj);
1425         (void) printf("\t\tprev_snap_obj = %llu\n",
1426             (u_longlong_t)ds->ds_prev_snap_obj);
1427         (void) printf("\t\tprev_snap_txg = %llu\n",
1428             (u_longlong_t)ds->ds_prev_snap_txg);
1429         (void) printf("\t\tnext_snap_obj = %llu\n",
1430             (u_longlong_t)ds->ds_next_snap_obj);
1431         (void) printf("\t\tsnapnames_zapobj = %llu\n",
1432             (u_longlong_t)ds->ds_snapnames_zapobj);
1433         (void) printf("\t\tnum_children = %llu\n",
1434             (u_longlong_t)ds->ds_num_children);
1435         (void) printf("\t\tuserrefs_obj = %llu\n",
1436             (u_longlong_t)ds->ds_userrefs_obj);
1437         (void) printf("\t\tcreation_time = %s", ctime(&crtime));
1438         (void) printf("\t\tcreation_txg = %llu\n",
1439             (u_longlong_t)ds->ds_creation_txg);
1440         (void) printf("\t\tdeadlist_obj = %llu\n",
1441             (u_longlong_t)ds->ds_deadlist_obj);
1442         (void) printf("\t\tused_bytes = %s\n", used);
1443         (void) printf("\t\tcompressed_bytes = %s\n", compressed);
1444         (void) printf("\t\tuncompressed_bytes = %s\n", uncompressed);
1445         (void) printf("\t\tunique = %s\n", unique);
1446         (void) printf("\t\tfsid_guid = %llu\n",
1447             (u_longlong_t)ds->ds_fsid_guid);
1448         (void) printf("\t\tguid = %llu\n",
1449             (u_longlong_t)ds->ds_guid);
1450         (void) printf("\t\tflags = %llx\n",
1451             (u_longlong_t)ds->ds_flags);
1452         (void) printf("\t\tnext_clones_obj = %llu\n",
1453             (u_longlong_t)ds->ds_next_clones_obj);
1454         (void) printf("\t\tprops_obj = %llu\n",
1455             (u_longlong_t)ds->ds_props_obj);
1456         (void) printf("\t\tbp = %s\n", blkbuf);
1457 }
1458 
1459 /* ARGSUSED */
1460 static int
1461 dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1462 {
1463         char blkbuf[BP_SPRINTF_LEN];
1464 
1465         if (bp->blk_birth != 0) {
1466                 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
1467                 (void) printf("\t%s\n", blkbuf);
1468         }
1469         return (0);
1470 }
1471 
1472 static void
1473 dump_bptree(objset_t *os, uint64_t obj, const char *name)
1474 {
1475         char bytes[32];
1476         bptree_phys_t *bt;
1477         dmu_buf_t *db;
1478 
1479         /* make sure nicenum has enough space */
1480         CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
1481 
1482         if (dump_opt['d'] < 3)
1483                 return;
1484 
1485         VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
1486         bt = db->db_data;
1487         zdb_nicenum(bt->bt_bytes, bytes, sizeof (bytes));
1488         (void) printf("\n    %s: %llu datasets, %s\n",
1489             name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
1490         dmu_buf_rele(db, FTAG);
1491 
1492         if (dump_opt['d'] < 5)
1493                 return;
1494 
1495         (void) printf("\n");
1496 
1497         (void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL);
1498 }
1499 
1500 /* ARGSUSED */
1501 static int
1502 dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
1503 {
1504         char blkbuf[BP_SPRINTF_LEN];
1505 
1506         ASSERT(bp->blk_birth != 0);
1507         snprintf_blkptr_compact(blkbuf, sizeof (blkbuf), bp);
1508         (void) printf("\t%s\n", blkbuf);
1509         return (0);
1510 }
1511 
1512 static void
1513 dump_full_bpobj(bpobj_t *bpo, const char *name, int indent)
1514 {
1515         char bytes[32];
1516         char comp[32];
1517         char uncomp[32];
1518 
1519         /* make sure nicenum has enough space */
1520         CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
1521         CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
1522         CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
1523 
1524         if (dump_opt['d'] < 3)
1525                 return;
1526 
1527         zdb_nicenum(bpo->bpo_phys->bpo_bytes, bytes, sizeof (bytes));
1528         if (bpo->bpo_havesubobj && bpo->bpo_phys->bpo_subobjs != 0) {
1529                 zdb_nicenum(bpo->bpo_phys->bpo_comp, comp, sizeof (comp));
1530                 zdb_nicenum(bpo->bpo_phys->bpo_uncomp, uncomp, sizeof (uncomp));
1531                 (void) printf("    %*s: object %llu, %llu local blkptrs, "
1532                     "%llu subobjs in object %llu, %s (%s/%s comp)\n",
1533                     indent * 8, name,
1534                     (u_longlong_t)bpo->bpo_object,
1535                     (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1536                     (u_longlong_t)bpo->bpo_phys->bpo_num_subobjs,
1537                     (u_longlong_t)bpo->bpo_phys->bpo_subobjs,
1538                     bytes, comp, uncomp);
1539 
1540                 for (uint64_t i = 0; i < bpo->bpo_phys->bpo_num_subobjs; i++) {
1541                         uint64_t subobj;
1542                         bpobj_t subbpo;
1543                         int error;
1544                         VERIFY0(dmu_read(bpo->bpo_os,
1545                             bpo->bpo_phys->bpo_subobjs,
1546                             i * sizeof (subobj), sizeof (subobj), &subobj, 0));
1547                         error = bpobj_open(&subbpo, bpo->bpo_os, subobj);
1548                         if (error != 0) {
1549                                 (void) printf("ERROR %u while trying to open "
1550                                     "subobj id %llu\n",
1551                                     error, (u_longlong_t)subobj);
1552                                 continue;
1553                         }
1554                         dump_full_bpobj(&subbpo, "subobj", indent + 1);
1555                         bpobj_close(&subbpo);
1556                 }
1557         } else {
1558                 (void) printf("    %*s: object %llu, %llu blkptrs, %s\n",
1559                     indent * 8, name,
1560                     (u_longlong_t)bpo->bpo_object,
1561                     (u_longlong_t)bpo->bpo_phys->bpo_num_blkptrs,
1562                     bytes);
1563         }
1564 
1565         if (dump_opt['d'] < 5)
1566                 return;
1567 
1568 
1569         if (indent == 0) {
1570                 (void) bpobj_iterate_nofree(bpo, dump_bpobj_cb, NULL, NULL);
1571                 (void) printf("\n");
1572         }
1573 }
1574 
1575 static void
1576 dump_deadlist(dsl_deadlist_t *dl)
1577 {
1578         dsl_deadlist_entry_t *dle;
1579         uint64_t unused;
1580         char bytes[32];
1581         char comp[32];
1582         char uncomp[32];
1583 
1584         /* make sure nicenum has enough space */
1585         CTASSERT(sizeof (bytes) >= NN_NUMBUF_SZ);
1586         CTASSERT(sizeof (comp) >= NN_NUMBUF_SZ);
1587         CTASSERT(sizeof (uncomp) >= NN_NUMBUF_SZ);
1588 
1589         if (dump_opt['d'] < 3)
1590                 return;
1591 
1592         if (dl->dl_oldfmt) {
1593                 dump_full_bpobj(&dl->dl_bpobj, "old-format deadlist", 0);
1594                 return;
1595         }
1596 
1597         zdb_nicenum(dl->dl_phys->dl_used, bytes, sizeof (bytes));
1598         zdb_nicenum(dl->dl_phys->dl_comp, comp, sizeof (comp));
1599         zdb_nicenum(dl->dl_phys->dl_uncomp, uncomp, sizeof (uncomp));
1600         (void) printf("\n    Deadlist: %s (%s/%s comp)\n",
1601             bytes, comp, uncomp);
1602 
1603         if (dump_opt['d'] < 4)
1604                 return;
1605 
1606         (void) printf("\n");
1607 
1608         /* force the tree to be loaded */
1609         dsl_deadlist_space_range(dl, 0, UINT64_MAX, &unused, &unused, &unused);
1610 
1611         for (dle = avl_first(&dl->dl_tree); dle;
1612             dle = AVL_NEXT(&dl->dl_tree, dle)) {
1613                 if (dump_opt['d'] >= 5) {
1614                         char buf[128];
1615                         (void) snprintf(buf, sizeof (buf),
1616                             "mintxg %llu -> obj %llu",
1617                             (longlong_t)dle->dle_mintxg,
1618                             (longlong_t)dle->dle_bpobj.bpo_object);
1619 
1620                         dump_full_bpobj(&dle->dle_bpobj, buf, 0);
1621                 } else {
1622                         (void) printf("mintxg %llu -> obj %llu\n",
1623                             (longlong_t)dle->dle_mintxg,
1624                             (longlong_t)dle->dle_bpobj.bpo_object);
1625 
1626                 }
1627         }
1628 }
1629 
1630 static avl_tree_t idx_tree;
1631 static avl_tree_t domain_tree;
1632 static boolean_t fuid_table_loaded;
1633 static objset_t *sa_os = NULL;
1634 static sa_attr_type_t *sa_attr_table = NULL;
1635 
1636 static int
1637 open_objset(const char *path, dmu_objset_type_t type, void *tag, objset_t **osp)
1638 {
1639         int err;
1640         uint64_t sa_attrs = 0;
1641         uint64_t version = 0;
1642 
1643         VERIFY3P(sa_os, ==, NULL);
1644         err = dmu_objset_own(path, type, B_TRUE, tag, osp);
1645         if (err != 0) {
1646                 (void) fprintf(stderr, "failed to own dataset '%s': %s\n", path,
1647                     strerror(err));
1648                 return (err);
1649         }
1650 
1651         if (dmu_objset_type(*osp) == DMU_OST_ZFS) {
1652                 (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZPL_VERSION_STR,
1653                     8, 1, &version);
1654                 if (version >= ZPL_VERSION_SA) {
1655                         (void) zap_lookup(*osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS,
1656                             8, 1, &sa_attrs);
1657                 }
1658                 err = sa_setup(*osp, sa_attrs, zfs_attr_table, ZPL_END,
1659                     &sa_attr_table);
1660                 if (err != 0) {
1661                         (void) fprintf(stderr, "sa_setup failed: %s\n",
1662                             strerror(err));
1663                         dmu_objset_disown(*osp, tag);
1664                         *osp = NULL;
1665                 }
1666         }
1667         sa_os = *osp;
1668 
1669         return (0);
1670 }
1671 
1672 static void
1673 close_objset(objset_t *os, void *tag)
1674 {
1675         VERIFY3P(os, ==, sa_os);
1676         if (os->os_sa != NULL)
1677                 sa_tear_down(os);
1678         dmu_objset_disown(os, tag);
1679         sa_attr_table = NULL;
1680         sa_os = NULL;
1681 }
1682 
1683 static void
1684 fuid_table_destroy()
1685 {
1686         if (fuid_table_loaded) {
1687                 zfs_fuid_table_destroy(&idx_tree, &domain_tree);
1688                 fuid_table_loaded = B_FALSE;
1689         }
1690 }
1691 
1692 /*
1693  * print uid or gid information.
1694  * For normal POSIX id just the id is printed in decimal format.
1695  * For CIFS files with FUID the fuid is printed in hex followed by
1696  * the domain-rid string.
1697  */
1698 static void
1699 print_idstr(uint64_t id, const char *id_type)
1700 {
1701         if (FUID_INDEX(id)) {
1702                 char *domain;
1703 
1704                 domain = zfs_fuid_idx_domain(&idx_tree, FUID_INDEX(id));
1705                 (void) printf("\t%s     %llx [%s-%d]\n", id_type,
1706                     (u_longlong_t)id, domain, (int)FUID_RID(id));
1707         } else {
1708                 (void) printf("\t%s     %llu\n", id_type, (u_longlong_t)id);
1709         }
1710 
1711 }
1712 
1713 static void
1714 dump_uidgid(objset_t *os, uint64_t uid, uint64_t gid)
1715 {
1716         uint32_t uid_idx, gid_idx;
1717 
1718         uid_idx = FUID_INDEX(uid);
1719         gid_idx = FUID_INDEX(gid);
1720 
1721         /* Load domain table, if not already loaded */
1722         if (!fuid_table_loaded && (uid_idx || gid_idx)) {
1723                 uint64_t fuid_obj;
1724 
1725                 /* first find the fuid object.  It lives in the master node */
1726                 VERIFY(zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES,
1727                     8, 1, &fuid_obj) == 0);
1728                 zfs_fuid_avl_tree_create(&idx_tree, &domain_tree);
1729                 (void) zfs_fuid_table_load(os, fuid_obj,
1730                     &idx_tree, &domain_tree);
1731                 fuid_table_loaded = B_TRUE;
1732         }
1733 
1734         print_idstr(uid, "uid");
1735         print_idstr(gid, "gid");
1736 }
1737 
1738 /*ARGSUSED*/
1739 static void
1740 dump_znode(objset_t *os, uint64_t object, void *data, size_t size)
1741 {
1742         char path[MAXPATHLEN * 2];      /* allow for xattr and failure prefix */
1743         sa_handle_t *hdl;
1744         uint64_t xattr, rdev, gen;
1745         uint64_t uid, gid, mode, fsize, parent, links;
1746         uint64_t pflags;
1747         uint64_t acctm[2], modtm[2], chgtm[2], crtm[2];
1748         time_t z_crtime, z_atime, z_mtime, z_ctime;
1749         sa_bulk_attr_t bulk[12];
1750         int idx = 0;
1751         int error;
1752 
1753         VERIFY3P(os, ==, sa_os);
1754         if (sa_handle_get(os, object, NULL, SA_HDL_PRIVATE, &hdl)) {
1755                 (void) printf("Failed to get handle for SA znode\n");
1756                 return;
1757         }
1758 
1759         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_UID], NULL, &uid, 8);
1760         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GID], NULL, &gid, 8);
1761         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_LINKS], NULL,
1762             &links, 8);
1763         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_GEN], NULL, &gen, 8);
1764         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MODE], NULL,
1765             &mode, 8);
1766         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_PARENT],
1767             NULL, &parent, 8);
1768         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_SIZE], NULL,
1769             &fsize, 8);
1770         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_ATIME], NULL,
1771             acctm, 16);
1772         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_MTIME], NULL,
1773             modtm, 16);
1774         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CRTIME], NULL,
1775             crtm, 16);
1776         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_CTIME], NULL,
1777             chgtm, 16);
1778         SA_ADD_BULK_ATTR(bulk, idx, sa_attr_table[ZPL_FLAGS], NULL,
1779             &pflags, 8);
1780 
1781         if (sa_bulk_lookup(hdl, bulk, idx)) {
1782                 (void) sa_handle_destroy(hdl);
1783                 return;
1784         }
1785 
1786         z_crtime = (time_t)crtm[0];
1787         z_atime = (time_t)acctm[0];
1788         z_mtime = (time_t)modtm[0];
1789         z_ctime = (time_t)chgtm[0];
1790 
1791         if (dump_opt['d'] > 4) {
1792                 error = zfs_obj_to_path(os, object, path, sizeof (path));
1793                 if (error != 0) {
1794                         (void) snprintf(path, sizeof (path),
1795                             "\?\?\?<object#%llu>", (u_longlong_t)object);
1796                 }
1797                 (void) printf("\tpath   %s\n", path);
1798         }
1799         dump_uidgid(os, uid, gid);
1800         (void) printf("\tatime  %s", ctime(&z_atime));
1801         (void) printf("\tmtime  %s", ctime(&z_mtime));
1802         (void) printf("\tctime  %s", ctime(&z_ctime));
1803         (void) printf("\tcrtime %s", ctime(&z_crtime));
1804         (void) printf("\tgen    %llu\n", (u_longlong_t)gen);
1805         (void) printf("\tmode   %llo\n", (u_longlong_t)mode);
1806         (void) printf("\tsize   %llu\n", (u_longlong_t)fsize);
1807         (void) printf("\tparent %llu\n", (u_longlong_t)parent);
1808         (void) printf("\tlinks  %llu\n", (u_longlong_t)links);
1809         (void) printf("\tpflags %llx\n", (u_longlong_t)pflags);
1810         if (sa_lookup(hdl, sa_attr_table[ZPL_XATTR], &xattr,
1811             sizeof (uint64_t)) == 0)
1812                 (void) printf("\txattr  %llu\n", (u_longlong_t)xattr);
1813         if (sa_lookup(hdl, sa_attr_table[ZPL_RDEV], &rdev,
1814             sizeof (uint64_t)) == 0)
1815                 (void) printf("\trdev   0x%016llx\n", (u_longlong_t)rdev);
1816         sa_handle_destroy(hdl);
1817 }
1818 
1819 /*ARGSUSED*/
1820 static void
1821 dump_acl(objset_t *os, uint64_t object, void *data, size_t size)
1822 {
1823 }
1824 
1825 /*ARGSUSED*/
1826 static void
1827 dump_dmu_objset(objset_t *os, uint64_t object, void *data, size_t size)
1828 {
1829 }
1830 
1831 static object_viewer_t *object_viewer[DMU_OT_NUMTYPES + 1] = {
1832         dump_none,              /* unallocated                  */
1833         dump_zap,               /* object directory             */
1834         dump_uint64,            /* object array                 */
1835         dump_none,              /* packed nvlist                */
1836         dump_packed_nvlist,     /* packed nvlist size           */
1837         dump_none,              /* bpobj                        */
1838         dump_bpobj,             /* bpobj header                 */
1839         dump_none,              /* SPA space map header         */
1840         dump_none,              /* SPA space map                */
1841         dump_none,              /* ZIL intent log               */
1842         dump_dnode,             /* DMU dnode                    */
1843         dump_dmu_objset,        /* DMU objset                   */
1844         dump_dsl_dir,           /* DSL directory                */
1845         dump_zap,               /* DSL directory child map      */
1846         dump_zap,               /* DSL dataset snap map         */
1847         dump_zap,               /* DSL props                    */
1848         dump_dsl_dataset,       /* DSL dataset                  */
1849         dump_znode,             /* ZFS znode                    */
1850         dump_acl,               /* ZFS V0 ACL                   */
1851         dump_uint8,             /* ZFS plain file               */
1852         dump_zpldir,            /* ZFS directory                */
1853         dump_zap,               /* ZFS master node              */
1854         dump_zap,               /* ZFS delete queue             */
1855         dump_uint8,             /* zvol object                  */
1856         dump_zap,               /* zvol prop                    */
1857         dump_uint8,             /* other uint8[]                */
1858         dump_uint64,            /* other uint64[]               */
1859         dump_zap,               /* other ZAP                    */
1860         dump_zap,               /* persistent error log         */
1861         dump_uint8,             /* SPA history                  */
1862         dump_history_offsets,   /* SPA history offsets          */
1863         dump_zap,               /* Pool properties              */
1864         dump_zap,               /* DSL permissions              */
1865         dump_acl,               /* ZFS ACL                      */
1866         dump_uint8,             /* ZFS SYSACL                   */
1867         dump_none,              /* FUID nvlist                  */
1868         dump_packed_nvlist,     /* FUID nvlist size             */
1869         dump_zap,               /* DSL dataset next clones      */
1870         dump_zap,               /* DSL scrub queue              */
1871         dump_zap,               /* ZFS user/group used          */
1872         dump_zap,               /* ZFS user/group quota         */
1873         dump_zap,               /* snapshot refcount tags       */
1874         dump_ddt_zap,           /* DDT ZAP object               */
1875         dump_zap,               /* DDT statistics               */
1876         dump_znode,             /* SA object                    */
1877         dump_zap,               /* SA Master Node               */
1878         dump_sa_attrs,          /* SA attribute registration    */
1879         dump_sa_layouts,        /* SA attribute layouts         */
1880         dump_zap,               /* DSL scrub translations       */
1881         dump_none,              /* fake dedup BP                */
1882         dump_zap,               /* deadlist                     */
1883         dump_none,              /* deadlist hdr                 */
1884         dump_zap,               /* dsl clones                   */
1885         dump_bpobj_subobjs,     /* bpobj subobjs                */
1886         dump_unknown,           /* Unknown type, must be last   */
1887 };
1888 
1889 static void
1890 dump_object(objset_t *os, uint64_t object, int verbosity, int *print_header)
1891 {
1892         dmu_buf_t *db = NULL;
1893         dmu_object_info_t doi;
1894         dnode_t *dn;
1895         void *bonus = NULL;
1896         size_t bsize = 0;
1897         char iblk[32], dblk[32], lsize[32], asize[32], fill[32];
1898         char bonus_size[32];
1899         char aux[50];
1900         int error;
1901 
1902         /* make sure nicenum has enough space */
1903         CTASSERT(sizeof (iblk) >= NN_NUMBUF_SZ);
1904         CTASSERT(sizeof (dblk) >= NN_NUMBUF_SZ);
1905         CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ);
1906         CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ);
1907         CTASSERT(sizeof (bonus_size) >= NN_NUMBUF_SZ);
1908 
1909         if (*print_header) {
1910                 (void) printf("\n%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1911                     "Object", "lvl", "iblk", "dblk", "dsize", "lsize",
1912                     "%full", "type");
1913                 *print_header = 0;
1914         }
1915 
1916         if (object == 0) {
1917                 dn = DMU_META_DNODE(os);
1918         } else {
1919                 error = dmu_bonus_hold(os, object, FTAG, &db);
1920                 if (error)
1921                         fatal("dmu_bonus_hold(%llu) failed, errno %u",
1922                             object, error);
1923                 bonus = db->db_data;
1924                 bsize = db->db_size;
1925                 dn = DB_DNODE((dmu_buf_impl_t *)db);
1926         }
1927         dmu_object_info_from_dnode(dn, &doi);
1928 
1929         zdb_nicenum(doi.doi_metadata_block_size, iblk, sizeof (iblk));
1930         zdb_nicenum(doi.doi_data_block_size, dblk, sizeof (dblk));
1931         zdb_nicenum(doi.doi_max_offset, lsize, sizeof (lsize));
1932         zdb_nicenum(doi.doi_physical_blocks_512 << 9, asize, sizeof (asize));
1933         zdb_nicenum(doi.doi_bonus_size, bonus_size, sizeof (bonus_size));
1934         (void) sprintf(fill, "%6.2f", 100.0 * doi.doi_fill_count *
1935             doi.doi_data_block_size / (object == 0 ? DNODES_PER_BLOCK : 1) /
1936             doi.doi_max_offset);
1937 
1938         aux[0] = '\0';
1939 
1940         if (doi.doi_checksum != ZIO_CHECKSUM_INHERIT || verbosity >= 6) {
1941                 (void) snprintf(aux + strlen(aux), sizeof (aux), " (K=%s)",
1942                     ZDB_CHECKSUM_NAME(doi.doi_checksum));
1943         }
1944 
1945         if (doi.doi_compress != ZIO_COMPRESS_INHERIT || verbosity >= 6) {
1946                 (void) snprintf(aux + strlen(aux), sizeof (aux), " (Z=%s)",
1947                     ZDB_COMPRESS_NAME(doi.doi_compress));
1948         }
1949 
1950         (void) printf("%10lld  %3u  %5s  %5s  %5s  %5s  %6s  %s%s\n",
1951             (u_longlong_t)object, doi.doi_indirection, iblk, dblk,
1952             asize, lsize, fill, ZDB_OT_NAME(doi.doi_type), aux);
1953 
1954         if (doi.doi_bonus_type != DMU_OT_NONE && verbosity > 3) {
1955                 (void) printf("%10s  %3s  %5s  %5s  %5s  %5s  %6s  %s\n",
1956                     "", "", "", "", "", bonus_size, "bonus",
1957                     ZDB_OT_NAME(doi.doi_bonus_type));
1958         }
1959 
1960         if (verbosity >= 4) {
1961                 (void) printf("\tdnode flags: %s%s%s\n",
1962                     (dn->dn_phys->dn_flags & DNODE_FLAG_USED_BYTES) ?
1963                     "USED_BYTES " : "",
1964                     (dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED) ?
1965                     "USERUSED_ACCOUNTED " : "",
1966                     (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ?
1967                     "SPILL_BLKPTR" : "");
1968                 (void) printf("\tdnode maxblkid: %llu\n",
1969                     (longlong_t)dn->dn_phys->dn_maxblkid);
1970 
1971                 object_viewer[ZDB_OT_TYPE(doi.doi_bonus_type)](os, object,
1972                     bonus, bsize);
1973                 object_viewer[ZDB_OT_TYPE(doi.doi_type)](os, object, NULL, 0);
1974                 *print_header = 1;
1975         }
1976 
1977         if (verbosity >= 5)
1978                 dump_indirect(dn);
1979 
1980         if (verbosity >= 5) {
1981                 /*
1982                  * Report the list of segments that comprise the object.
1983                  */
1984                 uint64_t start = 0;
1985                 uint64_t end;
1986                 uint64_t blkfill = 1;
1987                 int minlvl = 1;
1988 
1989                 if (dn->dn_type == DMU_OT_DNODE) {
1990                         minlvl = 0;
1991                         blkfill = DNODES_PER_BLOCK;
1992                 }
1993 
1994                 for (;;) {
1995                         char segsize[32];
1996                         /* make sure nicenum has enough space */
1997                         CTASSERT(sizeof (segsize) >= NN_NUMBUF_SZ);
1998                         error = dnode_next_offset(dn,
1999                             0, &start, minlvl, blkfill, 0);
2000                         if (error)
2001                                 break;
2002                         end = start;
2003                         error = dnode_next_offset(dn,
2004                             DNODE_FIND_HOLE, &end, minlvl, blkfill, 0);
2005                         zdb_nicenum(end - start, segsize, sizeof (segsize));
2006                         (void) printf("\t\tsegment [%016llx, %016llx)"
2007                             " size %5s\n", (u_longlong_t)start,
2008                             (u_longlong_t)end, segsize);
2009                         if (error)
2010                                 break;
2011                         start = end;
2012                 }
2013         }
2014 
2015         if (db != NULL)
2016                 dmu_buf_rele(db, FTAG);
2017 }
2018 
2019 static const char *objset_types[DMU_OST_NUMTYPES] = {
2020         "NONE", "META", "ZPL", "ZVOL", "OTHER", "ANY" };
2021 
2022 static void
2023 dump_dir(objset_t *os)
2024 {
2025         dmu_objset_stats_t dds;
2026         uint64_t object, object_count;
2027         uint64_t refdbytes, usedobjs, scratch;
2028         char numbuf[32];
2029         char blkbuf[BP_SPRINTF_LEN + 20];
2030         char osname[ZFS_MAX_DATASET_NAME_LEN];
2031         const char *type = "UNKNOWN";
2032         int verbosity = dump_opt['d'];
2033         int print_header = 1;
2034         unsigned i;
2035         int error;
2036 
2037         /* make sure nicenum has enough space */
2038         CTASSERT(sizeof (numbuf) >= NN_NUMBUF_SZ);
2039 
2040         dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
2041         dmu_objset_fast_stat(os, &dds);
2042         dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
2043 
2044         if (dds.dds_type < DMU_OST_NUMTYPES)
2045                 type = objset_types[dds.dds_type];
2046 
2047         if (dds.dds_type == DMU_OST_META) {
2048                 dds.dds_creation_txg = TXG_INITIAL;
2049                 usedobjs = BP_GET_FILL(os->os_rootbp);
2050                 refdbytes = dsl_dir_phys(os->os_spa->spa_dsl_pool->dp_mos_dir)->
2051                     dd_used_bytes;
2052         } else {
2053                 dmu_objset_space(os, &refdbytes, &scratch, &usedobjs, &scratch);
2054         }
2055 
2056         ASSERT3U(usedobjs, ==, BP_GET_FILL(os->os_rootbp));
2057 
2058         zdb_nicenum(refdbytes, numbuf, sizeof (numbuf));
2059 
2060         if (verbosity >= 4) {
2061                 (void) snprintf(blkbuf, sizeof (blkbuf), ", rootbp ");
2062                 (void) snprintf_blkptr(blkbuf + strlen(blkbuf),
2063                     sizeof (blkbuf) - strlen(blkbuf), os->os_rootbp);
2064         } else {
2065                 blkbuf[0] = '\0';
2066         }
2067 
2068         dmu_objset_name(os, osname);
2069 
2070         (void) printf("Dataset %s [%s], ID %llu, cr_txg %llu, "
2071             "%s, %llu objects%s\n",
2072             osname, type, (u_longlong_t)dmu_objset_id(os),
2073             (u_longlong_t)dds.dds_creation_txg,
2074             numbuf, (u_longlong_t)usedobjs, blkbuf);
2075 
2076         if (zopt_objects != 0) {
2077                 for (i = 0; i < zopt_objects; i++)
2078                         dump_object(os, zopt_object[i], verbosity,
2079                             &print_header);
2080                 (void) printf("\n");
2081                 return;
2082         }
2083 
2084         if (dump_opt['i'] != 0 || verbosity >= 2)
2085                 dump_intent_log(dmu_objset_zil(os));
2086 
2087         if (dmu_objset_ds(os) != NULL)
2088                 dump_deadlist(&dmu_objset_ds(os)->ds_deadlist);
2089 
2090         if (verbosity < 2)
2091                 return;
2092 
2093         if (BP_IS_HOLE(os->os_rootbp))
2094                 return;
2095 
2096         dump_object(os, 0, verbosity, &print_header);
2097         object_count = 0;
2098         if (DMU_USERUSED_DNODE(os) != NULL &&
2099             DMU_USERUSED_DNODE(os)->dn_type != 0) {
2100                 dump_object(os, DMU_USERUSED_OBJECT, verbosity, &print_header);
2101                 dump_object(os, DMU_GROUPUSED_OBJECT, verbosity, &print_header);
2102         }
2103 
2104         object = 0;
2105         while ((error = dmu_object_next(os, &object, B_FALSE, 0)) == 0) {
2106                 dump_object(os, object, verbosity, &print_header);
2107                 object_count++;
2108         }
2109 
2110         ASSERT3U(object_count, ==, usedobjs);
2111 
2112         (void) printf("\n");
2113 
2114         if (error != ESRCH) {
2115                 (void) fprintf(stderr, "dmu_object_next() = %d\n", error);
2116                 abort();
2117         }
2118 }
2119 
2120 static void
2121 dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
2122 {
2123         time_t timestamp = ub->ub_timestamp;
2124 
2125         (void) printf("%s", header ? header : "");
2126         (void) printf("\tmagic = %016llx\n", (u_longlong_t)ub->ub_magic);
2127         (void) printf("\tversion = %llu\n", (u_longlong_t)ub->ub_version);
2128         (void) printf("\ttxg = %llu\n", (u_longlong_t)ub->ub_txg);
2129         (void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
2130         (void) printf("\ttimestamp = %llu UTC = %s",
2131             (u_longlong_t)ub->ub_timestamp, asctime(localtime(&timestamp)));
2132         if (dump_opt['u'] >= 3) {
2133                 char blkbuf[BP_SPRINTF_LEN];
2134                 snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
2135                 (void) printf("\trootbp = %s\n", blkbuf);
2136         }
2137         (void) printf("%s", footer ? footer : "");
2138 }
2139 
2140 static void
2141 dump_config(spa_t *spa)
2142 {
2143         dmu_buf_t *db;
2144         size_t nvsize = 0;
2145         int error = 0;
2146 
2147 
2148         error = dmu_bonus_hold(spa->spa_meta_objset,
2149             spa->spa_config_object, FTAG, &db);
2150 
2151         if (error == 0) {
2152                 nvsize = *(uint64_t *)db->db_data;
2153                 dmu_buf_rele(db, FTAG);
2154 
2155                 (void) printf("\nMOS Configuration:\n");
2156                 dump_packed_nvlist(spa->spa_meta_objset,
2157                     spa->spa_config_object, (void *)&nvsize, 1);
2158         } else {
2159                 (void) fprintf(stderr, "dmu_bonus_hold(%llu) failed, errno %d",
2160                     (u_longlong_t)spa->spa_config_object, error);
2161         }
2162 }
2163 
2164 static void
2165 dump_cachefile(const char *cachefile)
2166 {
2167         int fd;
2168         struct stat64 statbuf;
2169         char *buf;
2170         nvlist_t *config;
2171 
2172         if ((fd = open64(cachefile, O_RDONLY)) < 0) {
2173                 (void) printf("cannot open '%s': %s\n", cachefile,
2174                     strerror(errno));
2175                 exit(1);
2176         }
2177 
2178         if (fstat64(fd, &statbuf) != 0) {
2179                 (void) printf("failed to stat '%s': %s\n", cachefile,
2180                     strerror(errno));
2181                 exit(1);
2182         }
2183 
2184         if ((buf = malloc(statbuf.st_size)) == NULL) {
2185                 (void) fprintf(stderr, "failed to allocate %llu bytes\n",
2186                     (u_longlong_t)statbuf.st_size);
2187                 exit(1);
2188         }
2189 
2190         if (read(fd, buf, statbuf.st_size) != statbuf.st_size) {
2191                 (void) fprintf(stderr, "failed to read %llu bytes\n",
2192                     (u_longlong_t)statbuf.st_size);
2193                 exit(1);
2194         }
2195 
2196         (void) close(fd);
2197 
2198         if (nvlist_unpack(buf, statbuf.st_size, &config, 0) != 0) {
2199                 (void) fprintf(stderr, "failed to unpack nvlist\n");
2200                 exit(1);
2201         }
2202 
2203         free(buf);
2204 
2205         dump_nvlist(config, 0);
2206 
2207         nvlist_free(config);
2208 }
2209 
2210 #define ZDB_MAX_UB_HEADER_SIZE 32
2211 
2212 static void
2213 dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
2214 {
2215         vdev_t vd;
2216         vdev_t *vdp = &vd;
2217         char header[ZDB_MAX_UB_HEADER_SIZE];
2218 
2219         vd.vdev_ashift = ashift;
2220         vdp->vdev_top = vdp;
2221 
2222         for (int i = 0; i < VDEV_UBERBLOCK_COUNT(vdp); i++) {
2223                 uint64_t uoff = VDEV_UBERBLOCK_OFFSET(vdp, i);
2224                 uberblock_t *ub = (void *)((char *)lbl + uoff);
2225 
2226                 if (uberblock_verify(ub))
2227                         continue;
2228                 (void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
2229                     "Uberblock[%d]\n", i);
2230                 dump_uberblock(ub, header, "");
2231         }
2232 }
2233 
2234 static char curpath[PATH_MAX];
2235 
2236 /*
2237  * Iterate through the path components, recursively passing
2238  * current one's obj and remaining path until we find the obj
2239  * for the last one.
2240  */
2241 static int
2242 dump_path_impl(objset_t *os, uint64_t obj, char *name)
2243 {
2244         int err;
2245         int header = 1;
2246         uint64_t child_obj;
2247         char *s;
2248         dmu_buf_t *db;
2249         dmu_object_info_t doi;
2250 
2251         if ((s = strchr(name, '/')) != NULL)
2252                 *s = '\0';
2253         err = zap_lookup(os, obj, name, 8, 1, &child_obj);
2254 
2255         (void) strlcat(curpath, name, sizeof (curpath));
2256 
2257         if (err != 0) {
2258                 (void) fprintf(stderr, "failed to lookup %s: %s\n",
2259                     curpath, strerror(err));
2260                 return (err);
2261         }
2262 
2263         child_obj = ZFS_DIRENT_OBJ(child_obj);
2264         err = sa_buf_hold(os, child_obj, FTAG, &db);
2265         if (err != 0) {
2266                 (void) fprintf(stderr,
2267                     "failed to get SA dbuf for obj %llu: %s\n",
2268                     (u_longlong_t)child_obj, strerror(err));
2269                 return (EINVAL);
2270         }
2271         dmu_object_info_from_db(db, &doi);
2272         sa_buf_rele(db, FTAG);
2273 
2274         if (doi.doi_bonus_type != DMU_OT_SA &&
2275             doi.doi_bonus_type != DMU_OT_ZNODE) {
2276                 (void) fprintf(stderr, "invalid bonus type %d for obj %llu\n",
2277                     doi.doi_bonus_type, (u_longlong_t)child_obj);
2278                 return (EINVAL);
2279         }
2280 
2281         if (dump_opt['v'] > 6) {
2282                 (void) printf("obj=%llu %s type=%d bonustype=%d\n",
2283                     (u_longlong_t)child_obj, curpath, doi.doi_type,
2284                     doi.doi_bonus_type);
2285         }
2286 
2287         (void) strlcat(curpath, "/", sizeof (curpath));
2288 
2289         switch (doi.doi_type) {
2290         case DMU_OT_DIRECTORY_CONTENTS:
2291                 if (s != NULL && *(s + 1) != '\0')
2292                         return (dump_path_impl(os, child_obj, s + 1));
2293                 /*FALLTHROUGH*/
2294         case DMU_OT_PLAIN_FILE_CONTENTS:
2295                 dump_object(os, child_obj, dump_opt['v'], &header);
2296                 return (0);
2297         default:
2298                 (void) fprintf(stderr, "object %llu has non-file/directory "
2299                     "type %d\n", (u_longlong_t)obj, doi.doi_type);
2300                 break;
2301         }
2302 
2303         return (EINVAL);
2304 }
2305 
2306 /*
2307  * Dump the blocks for the object specified by path inside the dataset.
2308  */
2309 static int
2310 dump_path(char *ds, char *path)
2311 {
2312         int err;
2313         objset_t *os;
2314         uint64_t root_obj;
2315 
2316         err = open_objset(ds, DMU_OST_ZFS, FTAG, &os);
2317         if (err != 0)
2318                 return (err);
2319 
2320         err = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &root_obj);
2321         if (err != 0) {
2322                 (void) fprintf(stderr, "can't lookup root znode: %s\n",
2323                     strerror(err));
2324                 dmu_objset_disown(os, FTAG);
2325                 return (EINVAL);
2326         }
2327 
2328         (void) snprintf(curpath, sizeof (curpath), "dataset=%s path=/", ds);
2329 
2330         err = dump_path_impl(os, root_obj, path);
2331 
2332         close_objset(os, FTAG);
2333         return (err);
2334 }
2335 
2336 static int
2337 dump_label(const char *dev)
2338 {
2339         int fd;
2340         vdev_label_t label;
2341         char path[MAXPATHLEN];
2342         char *buf = label.vl_vdev_phys.vp_nvlist;
2343         size_t buflen = sizeof (label.vl_vdev_phys.vp_nvlist);
2344         struct stat64 statbuf;
2345         uint64_t psize, ashift;
2346         boolean_t label_found = B_FALSE;
2347 
2348         (void) strlcpy(path, dev, sizeof (path));
2349         if (dev[0] == '/') {
2350                 if (strncmp(dev, ZFS_DISK_ROOTD,
2351                     strlen(ZFS_DISK_ROOTD)) == 0) {
2352                         (void) snprintf(path, sizeof (path), "%s%s",
2353                             ZFS_RDISK_ROOTD, dev + strlen(ZFS_DISK_ROOTD));
2354                 }
2355         } else if (stat64(path, &statbuf) != 0) {
2356                 char *s;
2357 
2358                 (void) snprintf(path, sizeof (path), "%s%s", ZFS_RDISK_ROOTD,
2359                     dev);
2360                 if (((s = strrchr(dev, 's')) == NULL &&
2361                     (s = strchr(dev, 'p')) == NULL) ||
2362                     !isdigit(*(s + 1)))
2363                         (void) strlcat(path, "s0", sizeof (path));
2364         }
2365 
2366         if ((fd = open64(path, O_RDONLY)) < 0) {
2367                 (void) fprintf(stderr, "cannot open '%s': %s\n", path,
2368                     strerror(errno));
2369                 exit(1);
2370         }
2371 
2372         if (fstat64(fd, &statbuf) != 0) {
2373                 (void) fprintf(stderr, "failed to stat '%s': %s\n", path,
2374                     strerror(errno));
2375                 (void) close(fd);
2376                 exit(1);
2377         }
2378 
2379         if (S_ISBLK(statbuf.st_mode)) {
2380                 (void) fprintf(stderr,
2381                     "cannot use '%s': character device required\n", path);
2382                 (void) close(fd);
2383                 exit(1);
2384         }
2385 
2386         psize = statbuf.st_size;
2387         psize = P2ALIGN(psize, (uint64_t)sizeof (vdev_label_t));
2388 
2389         for (int l = 0; l < VDEV_LABELS; l++) {
2390                 nvlist_t *config = NULL;
2391 
2392                 if (!dump_opt['q']) {
2393                         (void) printf("------------------------------------\n");
2394                         (void) printf("LABEL %d\n", l);
2395                         (void) printf("------------------------------------\n");
2396                 }
2397 
2398                 if (pread64(fd, &label, sizeof (label),
2399                     vdev_label_offset(psize, l, 0)) != sizeof (label)) {
2400                         if (!dump_opt['q'])
2401                                 (void) printf("failed to read label %d\n", l);
2402                         continue;
2403                 }
2404 
2405                 if (nvlist_unpack(buf, buflen, &config, 0) != 0) {
2406                         if (!dump_opt['q'])
2407                                 (void) printf("failed to unpack label %d\n", l);
2408                         ashift = SPA_MINBLOCKSHIFT;
2409                 } else {
2410                         nvlist_t *vdev_tree = NULL;
2411 
2412                         if (!dump_opt['q'])
2413                                 dump_nvlist(config, 4);
2414                         if ((nvlist_lookup_nvlist(config,
2415                             ZPOOL_CONFIG_VDEV_TREE, &vdev_tree) != 0) ||
2416                             (nvlist_lookup_uint64(vdev_tree,
2417                             ZPOOL_CONFIG_ASHIFT, &ashift) != 0))
2418                                 ashift = SPA_MINBLOCKSHIFT;
2419                         nvlist_free(config);
2420                         label_found = B_TRUE;
2421                 }
2422                 if (dump_opt['u'])
2423                         dump_label_uberblocks(&label, ashift);
2424         }
2425 
2426         (void) close(fd);
2427 
2428         return (label_found ? 0 : 2);
2429 }
2430 
2431 static uint64_t dataset_feature_count[SPA_FEATURES];
2432 
2433 /*ARGSUSED*/
2434 static int
2435 dump_one_dir(const char *dsname, void *arg)
2436 {
2437         int error;
2438         objset_t *os;
2439 
2440         error = open_objset(dsname, DMU_OST_ANY, FTAG, &os);
2441         if (error != 0)
2442                 return (0);
2443 
2444         for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
2445                 if (!dmu_objset_ds(os)->ds_feature_inuse[f])
2446                         continue;
2447                 ASSERT(spa_feature_table[f].fi_flags &
2448                     ZFEATURE_FLAG_PER_DATASET);
2449                 dataset_feature_count[f]++;
2450         }
2451 
2452         dump_dir(os);
2453         close_objset(os, FTAG);
2454         fuid_table_destroy();
2455         return (0);
2456 }
2457 
2458 /*
2459  * Block statistics.
2460  */
2461 #define PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2)
2462 typedef struct zdb_blkstats {
2463         uint64_t zb_asize;
2464         uint64_t zb_lsize;
2465         uint64_t zb_psize;
2466         uint64_t zb_count;
2467         uint64_t zb_gangs;
2468         uint64_t zb_ditto_samevdev;
2469         uint64_t zb_psize_histogram[PSIZE_HISTO_SIZE];
2470 } zdb_blkstats_t;
2471 
2472 /*
2473  * Extended object types to report deferred frees and dedup auto-ditto blocks.
2474  */
2475 #define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0)
2476 #define ZDB_OT_DITTO    (DMU_OT_NUMTYPES + 1)
2477 #define ZDB_OT_OTHER    (DMU_OT_NUMTYPES + 2)
2478 #define ZDB_OT_TOTAL    (DMU_OT_NUMTYPES + 3)
2479 
2480 static const char *zdb_ot_extname[] = {
2481         "deferred free",
2482         "dedup ditto",
2483         "other",
2484         "Total",
2485 };
2486 
2487 #define ZB_TOTAL        DN_MAX_LEVELS
2488 
2489 typedef struct zdb_cb {
2490         zdb_blkstats_t  zcb_type[ZB_TOTAL + 1][ZDB_OT_TOTAL + 1];
2491         uint64_t        zcb_dedup_asize;
2492         uint64_t        zcb_dedup_blocks;
2493         uint64_t        zcb_embedded_blocks[NUM_BP_EMBEDDED_TYPES];
2494         uint64_t        zcb_embedded_histogram[NUM_BP_EMBEDDED_TYPES]
2495             [BPE_PAYLOAD_SIZE];
2496         uint64_t        zcb_start;
2497         hrtime_t        zcb_lastprint;
2498         uint64_t        zcb_totalasize;
2499         uint64_t        zcb_errors[256];
2500         int             zcb_readfails;
2501         int             zcb_haderrors;
2502         spa_t           *zcb_spa;
2503 } zdb_cb_t;
2504 
2505 static void
2506 zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
2507     dmu_object_type_t type)
2508 {
2509         uint64_t refcnt = 0;
2510 
2511         ASSERT(type < ZDB_OT_TOTAL);
2512 
2513         if (zilog && zil_bp_tree_add(zilog, bp) != 0)
2514                 return;
2515 
2516         for (int i = 0; i < 4; i++) {
2517                 int l = (i < 2) ? BP_GET_LEVEL(bp) : ZB_TOTAL;
2518                 int t = (i & 1) ? type : ZDB_OT_TOTAL;
2519                 int equal;
2520                 zdb_blkstats_t *zb = &zcb->zcb_type[l][t];
2521 
2522                 zb->zb_asize += BP_GET_ASIZE(bp);
2523                 zb->zb_lsize += BP_GET_LSIZE(bp);
2524                 zb->zb_psize += BP_GET_PSIZE(bp);
2525                 zb->zb_count++;
2526 
2527                 /*
2528                  * The histogram is only big enough to record blocks up to
2529                  * SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last,
2530                  * "other", bucket.
2531                  */
2532                 unsigned idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT;
2533                 idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1);
2534                 zb->zb_psize_histogram[idx]++;
2535 
2536                 zb->zb_gangs += BP_COUNT_GANG(bp);
2537 
2538                 switch (BP_GET_NDVAS(bp)) {
2539                 case 2:
2540                         if (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2541                             DVA_GET_VDEV(&bp->blk_dva[1]))
2542                                 zb->zb_ditto_samevdev++;
2543                         break;
2544                 case 3:
2545                         equal = (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2546                             DVA_GET_VDEV(&bp->blk_dva[1])) +
2547                             (DVA_GET_VDEV(&bp->blk_dva[0]) ==
2548                             DVA_GET_VDEV(&bp->blk_dva[2])) +
2549                             (DVA_GET_VDEV(&bp->blk_dva[1]) ==
2550                             DVA_GET_VDEV(&bp->blk_dva[2]));
2551                         if (equal != 0)
2552                                 zb->zb_ditto_samevdev++;
2553                         break;
2554                 }
2555 
2556         }
2557 
2558         if (BP_IS_EMBEDDED(bp)) {
2559                 zcb->zcb_embedded_blocks[BPE_GET_ETYPE(bp)]++;
2560                 zcb->zcb_embedded_histogram[BPE_GET_ETYPE(bp)]
2561                     [BPE_GET_PSIZE(bp)]++;
2562                 return;
2563         }
2564 
2565         if (dump_opt['L'])
2566                 return;
2567 
2568         if (BP_GET_DEDUP(bp)) {
2569                 ddt_t *ddt;
2570                 ddt_entry_t *dde;
2571 
2572                 ddt = ddt_select(zcb->zcb_spa, bp);
2573                 ddt_enter(ddt);
2574                 dde = ddt_lookup(ddt, bp, B_FALSE);
2575 
2576                 if (dde == NULL) {
2577                         refcnt = 0;
2578                 } else {
2579                         ddt_phys_t *ddp = ddt_phys_select(dde, bp);
2580                         ddt_phys_decref(ddp);
2581                         refcnt = ddp->ddp_refcnt;
2582                         if (ddt_phys_total_refcnt(dde) == 0)
2583                                 ddt_remove(ddt, dde);
2584                 }
2585                 ddt_exit(ddt);
2586         }
2587 
2588         VERIFY3U(zio_wait(zio_claim(NULL, zcb->zcb_spa,
2589             refcnt ? 0 : spa_first_txg(zcb->zcb_spa),
2590             bp, NULL, NULL, ZIO_FLAG_CANFAIL)), ==, 0);
2591 }
2592 
2593 static void
2594 zdb_blkptr_done(zio_t *zio)
2595 {
2596         spa_t *spa = zio->io_spa;
2597         blkptr_t *bp = zio->io_bp;
2598         int ioerr = zio->io_error;
2599         zdb_cb_t *zcb = zio->io_private;
2600         zbookmark_phys_t *zb = &zio->io_bookmark;
2601 
2602         abd_free(zio->io_abd);
2603 
2604         mutex_enter(&spa->spa_scrub_lock);
2605         spa->spa_scrub_inflight--;
2606         cv_broadcast(&spa->spa_scrub_io_cv);
2607 
2608         if (ioerr && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
2609                 char blkbuf[BP_SPRINTF_LEN];
2610 
2611                 zcb->zcb_haderrors = 1;
2612                 zcb->zcb_errors[ioerr]++;
2613 
2614                 if (dump_opt['b'] >= 2)
2615                         snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2616                 else
2617                         blkbuf[0] = '\0';
2618 
2619                 (void) printf("zdb_blkptr_cb: "
2620                     "Got error %d reading "
2621                     "<%llu, %llu, %lld, %llx> %s -- skipping\n",
2622                     ioerr,
2623                     (u_longlong_t)zb->zb_objset,
2624                     (u_longlong_t)zb->zb_object,
2625                     (u_longlong_t)zb->zb_level,
2626                     (u_longlong_t)zb->zb_blkid,
2627                     blkbuf);
2628         }
2629         mutex_exit(&spa->spa_scrub_lock);
2630 }
2631 
2632 static int
2633 zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
2634     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
2635 {
2636         zdb_cb_t *zcb = arg;
2637         dmu_object_type_t type;
2638         boolean_t is_metadata;
2639 
2640         if (bp == NULL)
2641                 return (0);
2642 
2643         if (dump_opt['b'] >= 5 && bp->blk_birth > 0) {
2644                 char blkbuf[BP_SPRINTF_LEN];
2645                 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2646                 (void) printf("objset %llu object %llu "
2647                     "level %lld offset 0x%llx %s\n",
2648                     (u_longlong_t)zb->zb_objset,
2649                     (u_longlong_t)zb->zb_object,
2650                     (longlong_t)zb->zb_level,
2651                     (u_longlong_t)blkid2offset(dnp, bp, zb),
2652                     blkbuf);
2653         }
2654 
2655         if (BP_IS_HOLE(bp))
2656                 return (0);
2657 
2658         type = BP_GET_TYPE(bp);
2659 
2660         zdb_count_block(zcb, zilog, bp,
2661             (type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type);
2662 
2663         is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
2664 
2665         if (!BP_IS_EMBEDDED(bp) &&
2666             (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata))) {
2667                 size_t size = BP_GET_PSIZE(bp);
2668                 abd_t *abd = abd_alloc(size, B_FALSE);
2669                 int flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_SCRUB | ZIO_FLAG_RAW;
2670 
2671                 /* If it's an intent log block, failure is expected. */
2672                 if (zb->zb_level == ZB_ZIL_LEVEL)
2673                         flags |= ZIO_FLAG_SPECULATIVE;
2674 
2675                 mutex_enter(&spa->spa_scrub_lock);
2676                 while (spa->spa_scrub_inflight > max_inflight)
2677                         cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
2678                 spa->spa_scrub_inflight++;
2679                 mutex_exit(&spa->spa_scrub_lock);
2680 
2681                 zio_nowait(zio_read(NULL, spa, bp, abd, size,
2682                     zdb_blkptr_done, zcb, ZIO_PRIORITY_ASYNC_READ, flags, zb));
2683         }
2684 
2685         zcb->zcb_readfails = 0;
2686 
2687         /* only call gethrtime() every 100 blocks */
2688         static int iters;
2689         if (++iters > 100)
2690                 iters = 0;
2691         else
2692                 return (0);
2693 
2694         if (dump_opt['b'] < 5 && gethrtime() > zcb->zcb_lastprint + NANOSEC) {
2695                 uint64_t now = gethrtime();
2696                 char buf[10];
2697                 uint64_t bytes = zcb->zcb_type[ZB_TOTAL][ZDB_OT_TOTAL].zb_asize;
2698                 int kb_per_sec =
2699                     1 + bytes / (1 + ((now - zcb->zcb_start) / 1000 / 1000));
2700                 int sec_remaining =
2701                     (zcb->zcb_totalasize - bytes) / 1024 / kb_per_sec;
2702 
2703                 /* make sure nicenum has enough space */
2704                 CTASSERT(sizeof (buf) >= NN_NUMBUF_SZ);
2705 
2706                 zfs_nicenum(bytes, buf, sizeof (buf));
2707                 (void) fprintf(stderr,
2708                     "\r%5s completed (%4dMB/s) "
2709                     "estimated time remaining: %uhr %02umin %02usec        ",
2710                     buf, kb_per_sec / 1024,
2711                     sec_remaining / 60 / 60,
2712                     sec_remaining / 60 % 60,
2713                     sec_remaining % 60);
2714 
2715                 zcb->zcb_lastprint = now;
2716         }
2717 
2718         return (0);
2719 }
2720 
2721 static void
2722 zdb_leak(void *arg, uint64_t start, uint64_t size)
2723 {
2724         vdev_t *vd = arg;
2725 
2726         (void) printf("leaked space: vdev %llu, offset 0x%llx, size %llu\n",
2727             (u_longlong_t)vd->vdev_id, (u_longlong_t)start, (u_longlong_t)size);
2728 }
2729 
2730 static metaslab_ops_t zdb_metaslab_ops = {
2731         NULL    /* alloc */
2732 };
2733 
2734 static void
2735 zdb_ddt_leak_init(spa_t *spa, zdb_cb_t *zcb)
2736 {
2737         ddt_bookmark_t ddb;
2738         ddt_entry_t dde;
2739         int error;
2740 
2741         bzero(&ddb, sizeof (ddb));
2742         while ((error = ddt_walk(spa, &ddb, &dde)) == 0) {
2743                 blkptr_t blk;
2744                 ddt_phys_t *ddp = dde.dde_phys;
2745 
2746                 if (ddb.ddb_class == DDT_CLASS_UNIQUE)
2747                         return;
2748 
2749                 ASSERT(ddt_phys_total_refcnt(&dde) > 1);
2750 
2751                 for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
2752                         if (ddp->ddp_phys_birth == 0)
2753                                 continue;
2754                         ddt_bp_create(ddb.ddb_checksum,
2755                             &dde.dde_key, ddp, &blk);
2756                         if (p == DDT_PHYS_DITTO) {
2757                                 zdb_count_block(zcb, NULL, &blk, ZDB_OT_DITTO);
2758                         } else {
2759                                 zcb->zcb_dedup_asize +=
2760                                     BP_GET_ASIZE(&blk) * (ddp->ddp_refcnt - 1);
2761                                 zcb->zcb_dedup_blocks++;
2762                         }
2763                 }
2764                 if (!dump_opt['L']) {
2765                         ddt_t *ddt = spa->spa_ddt[ddb.ddb_checksum];
2766                         ddt_enter(ddt);
2767                         VERIFY(ddt_lookup(ddt, &blk, B_TRUE) != NULL);
2768                         ddt_exit(ddt);
2769                 }
2770         }
2771 
2772         ASSERT(error == ENOENT);
2773 }
2774 
2775 static void
2776 zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
2777 {
2778         zcb->zcb_spa = spa;
2779 
2780         if (!dump_opt['L']) {
2781                 vdev_t *rvd = spa->spa_root_vdev;
2782 
2783                 /*
2784                  * We are going to be changing the meaning of the metaslab's
2785                  * ms_tree.  Ensure that the allocator doesn't try to
2786                  * use the tree.
2787                  */
2788                 spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
2789                 spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
2790 
2791                 for (uint64_t c = 0; c < rvd->vdev_children; c++) {
2792                         vdev_t *vd = rvd->vdev_child[c];
2793                         metaslab_group_t *mg = vd->vdev_mg;
2794                         for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
2795                                 metaslab_t *msp = vd->vdev_ms[m];
2796                                 ASSERT3P(msp->ms_group, ==, mg);
2797                                 mutex_enter(&msp->ms_lock);
2798                                 metaslab_unload(msp);
2799 
2800                                 /*
2801                                  * For leak detection, we overload the metaslab
2802                                  * ms_tree to contain allocated segments
2803                                  * instead of free segments. As a result,
2804                                  * we can't use the normal metaslab_load/unload
2805                                  * interfaces.
2806                                  */
2807                                 if (msp->ms_sm != NULL) {
2808                                         (void) fprintf(stderr,
2809                                             "\rloading space map for "
2810                                             "vdev %llu of %llu, "
2811                                             "metaslab %llu of %llu ...",
2812                                             (longlong_t)c,
2813                                             (longlong_t)rvd->vdev_children,
2814                                             (longlong_t)m,
2815                                             (longlong_t)vd->vdev_ms_count);
2816 
2817                                         /*
2818                                          * We don't want to spend the CPU
2819                                          * manipulating the size-ordered
2820                                          * tree, so clear the range_tree
2821                                          * ops.
2822                                          */
2823                                         msp->ms_tree->rt_ops = NULL;
2824                                         VERIFY0(space_map_load(msp->ms_sm,
2825                                             msp->ms_tree, SM_ALLOC));
2826 
2827                                         if (!msp->ms_loaded) {
2828                                                 msp->ms_loaded = B_TRUE;
2829                                         }
2830                                 }
2831                                 mutex_exit(&msp->ms_lock);
2832                         }
2833                 }
2834                 (void) fprintf(stderr, "\n");
2835         }
2836 
2837         spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2838 
2839         zdb_ddt_leak_init(spa, zcb);
2840 
2841         spa_config_exit(spa, SCL_CONFIG, FTAG);
2842 }
2843 
2844 static void
2845 zdb_leak_fini(spa_t *spa)
2846 {
2847         if (!dump_opt['L']) {
2848                 vdev_t *rvd = spa->spa_root_vdev;
2849                 for (unsigned c = 0; c < rvd->vdev_children; c++) {
2850                         vdev_t *vd = rvd->vdev_child[c];
2851                         metaslab_group_t *mg = vd->vdev_mg;
2852                         for (unsigned m = 0; m < vd->vdev_ms_count; m++) {
2853                                 metaslab_t *msp = vd->vdev_ms[m];
2854                                 ASSERT3P(mg, ==, msp->ms_group);
2855                                 mutex_enter(&msp->ms_lock);
2856 
2857                                 /*
2858                                  * The ms_tree has been overloaded to
2859                                  * contain allocated segments. Now that we
2860                                  * finished traversing all blocks, any
2861                                  * block that remains in the ms_tree
2862                                  * represents an allocated block that we
2863                                  * did not claim during the traversal.
2864                                  * Claimed blocks would have been removed
2865                                  * from the ms_tree.
2866                                  */
2867                                 range_tree_vacate(msp->ms_tree, zdb_leak, vd);
2868 
2869                                 if (msp->ms_loaded) {
2870                                         msp->ms_loaded = B_FALSE;
2871                                 }
2872 
2873                                 mutex_exit(&msp->ms_lock);
2874                         }
2875                 }
2876         }
2877 }
2878 
2879 /* ARGSUSED */
2880 static int
2881 count_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
2882 {
2883         zdb_cb_t *zcb = arg;
2884 
2885         if (dump_opt['b'] >= 5) {
2886                 char blkbuf[BP_SPRINTF_LEN];
2887                 snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
2888                 (void) printf("[%s] %s\n",
2889                     "deferred free", blkbuf);
2890         }
2891         zdb_count_block(zcb, NULL, bp, ZDB_OT_DEFERRED);
2892         return (0);
2893 }
2894 
2895 static int
2896 dump_block_stats(spa_t *spa)
2897 {
2898         zdb_cb_t zcb;
2899         zdb_blkstats_t *zb, *tzb;
2900         uint64_t norm_alloc, norm_space, total_alloc, total_found;
2901         int flags = TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA | TRAVERSE_HARD;
2902         boolean_t leaks = B_FALSE;
2903 
2904         bzero(&zcb, sizeof (zcb));
2905         (void) printf("\nTraversing all blocks %s%s%s%s%s...\n\n",
2906             (dump_opt['c'] || !dump_opt['L']) ? "to verify " : "",
2907             (dump_opt['c'] == 1) ? "metadata " : "",
2908             dump_opt['c'] ? "checksums " : "",
2909             (dump_opt['c'] && !dump_opt['L']) ? "and verify " : "",
2910             !dump_opt['L'] ? "nothing leaked " : "");
2911 
2912         /*
2913          * Load all space maps as SM_ALLOC maps, then traverse the pool
2914          * claiming each block we discover.  If the pool is perfectly
2915          * consistent, the space maps will be empty when we're done.
2916          * Anything left over is a leak; any block we can't claim (because
2917          * it's not part of any space map) is a double allocation,
2918          * reference to a freed block, or an unclaimed log block.
2919          */
2920         zdb_leak_init(spa, &zcb);
2921 
2922         /*
2923          * If there's a deferred-free bplist, process that first.
2924          */
2925         (void) bpobj_iterate_nofree(&spa->spa_deferred_bpobj,
2926             count_block_cb, &zcb, NULL);
2927         if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
2928                 (void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
2929                     count_block_cb, &zcb, NULL);
2930         }
2931         if (spa_feature_is_active(spa, SPA_FEATURE_ASYNC_DESTROY)) {
2932                 VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
2933                     spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
2934                     &zcb, NULL));
2935         }
2936 
2937         if (dump_opt['c'] > 1)
2938                 flags |= TRAVERSE_PREFETCH_DATA;
2939 
2940         zcb.zcb_totalasize = metaslab_class_get_alloc(spa_normal_class(spa));
2941         zcb.zcb_start = zcb.zcb_lastprint = gethrtime();
2942         zcb.zcb_haderrors |= traverse_pool(spa, 0, flags, zdb_blkptr_cb, &zcb);
2943 
2944         /*
2945          * If we've traversed the data blocks then we need to wait for those
2946          * I/Os to complete. We leverage "The Godfather" zio to wait on
2947          * all async I/Os to complete.
2948          */
2949         if (dump_opt['c']) {
2950                 for (int i = 0; i < max_ncpus; i++) {
2951                         (void) zio_wait(spa->spa_async_zio_root[i]);
2952                         spa->spa_async_zio_root[i] = zio_root(spa, NULL, NULL,
2953                             ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
2954                             ZIO_FLAG_GODFATHER);
2955                 }
2956         }
2957 
2958         if (zcb.zcb_haderrors) {
2959                 (void) printf("\nError counts:\n\n");
2960                 (void) printf("\t%5s  %s\n", "errno", "count");
2961                 for (int e = 0; e < 256; e++) {
2962                         if (zcb.zcb_errors[e] != 0) {
2963                                 (void) printf("\t%5d  %llu\n",
2964                                     e, (u_longlong_t)zcb.zcb_errors[e]);
2965                         }
2966                 }
2967         }
2968 
2969         /*
2970          * Report any leaked segments.
2971          */
2972         zdb_leak_fini(spa);
2973 
2974         tzb = &zcb.zcb_type[ZB_TOTAL][ZDB_OT_TOTAL];
2975 
2976         norm_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
2977         norm_space = metaslab_class_get_space(spa_normal_class(spa));
2978 
2979         total_alloc = norm_alloc + metaslab_class_get_alloc(spa_log_class(spa));
2980         total_found = tzb->zb_asize - zcb.zcb_dedup_asize;
2981 
2982         if (total_found == total_alloc) {
2983                 if (!dump_opt['L'])
2984                         (void) printf("\n\tNo leaks (block sum matches space"
2985                             " maps exactly)\n");
2986         } else {
2987                 (void) printf("block traversal size %llu != alloc %llu "
2988                     "(%s %lld)\n",
2989                     (u_longlong_t)total_found,
2990                     (u_longlong_t)total_alloc,
2991                     (dump_opt['L']) ? "unreachable" : "leaked",
2992                     (longlong_t)(total_alloc - total_found));
2993                 leaks = B_TRUE;
2994         }
2995 
2996         if (tzb->zb_count == 0)
2997                 return (2);
2998 
2999         (void) printf("\n");
3000         (void) printf("\tbp count:      %10llu\n",
3001             (u_longlong_t)tzb->zb_count);
3002         (void) printf("\tganged count:  %10llu\n",
3003             (longlong_t)tzb->zb_gangs);
3004         (void) printf("\tbp logical:    %10llu      avg: %6llu\n",
3005             (u_longlong_t)tzb->zb_lsize,
3006             (u_longlong_t)(tzb->zb_lsize / tzb->zb_count));
3007         (void) printf("\tbp physical:   %10llu      avg:"
3008             " %6llu     compression: %6.2f\n",
3009             (u_longlong_t)tzb->zb_psize,
3010             (u_longlong_t)(tzb->zb_psize / tzb->zb_count),
3011             (double)tzb->zb_lsize / tzb->zb_psize);
3012         (void) printf("\tbp allocated:  %10llu      avg:"
3013             " %6llu     compression: %6.2f\n",
3014             (u_longlong_t)tzb->zb_asize,
3015             (u_longlong_t)(tzb->zb_asize / tzb->zb_count),
3016             (double)tzb->zb_lsize / tzb->zb_asize);
3017         (void) printf("\tbp deduped:    %10llu    ref>1:"
3018             " %6llu   deduplication: %6.2f\n",
3019             (u_longlong_t)zcb.zcb_dedup_asize,
3020             (u_longlong_t)zcb.zcb_dedup_blocks,
3021             (double)zcb.zcb_dedup_asize / tzb->zb_asize + 1.0);
3022         (void) printf("\tSPA allocated: %10llu     used: %5.2f%%\n",
3023             (u_longlong_t)norm_alloc, 100.0 * norm_alloc / norm_space);
3024 
3025         for (bp_embedded_type_t i = 0; i < NUM_BP_EMBEDDED_TYPES; i++) {
3026                 if (zcb.zcb_embedded_blocks[i] == 0)
3027                         continue;
3028                 (void) printf("\n");
3029                 (void) printf("\tadditional, non-pointer bps of type %u: "
3030                     "%10llu\n",
3031                     i, (u_longlong_t)zcb.zcb_embedded_blocks[i]);
3032 
3033                 if (dump_opt['b'] >= 3) {
3034                         (void) printf("\t number of (compressed) bytes:  "
3035                             "number of bps\n");
3036                         dump_histogram(zcb.zcb_embedded_histogram[i],
3037                             sizeof (zcb.zcb_embedded_histogram[i]) /
3038                             sizeof (zcb.zcb_embedded_histogram[i][0]), 0);
3039                 }
3040         }
3041 
3042         if (tzb->zb_ditto_samevdev != 0) {
3043                 (void) printf("\tDittoed blocks on same vdev: %llu\n",
3044                     (longlong_t)tzb->zb_ditto_samevdev);
3045         }
3046 
3047         if (dump_opt['b'] >= 2) {
3048                 int l, t, level;
3049                 (void) printf("\nBlocks\tLSIZE\tPSIZE\tASIZE"
3050                     "\t  avg\t comp\t%%Total\tType\n");
3051 
3052                 for (t = 0; t <= ZDB_OT_TOTAL; t++) {
3053                         char csize[32], lsize[32], psize[32], asize[32];
3054                         char avg[32], gang[32];
3055                         const char *typename;
3056 
3057                         /* make sure nicenum has enough space */
3058                         CTASSERT(sizeof (csize) >= NN_NUMBUF_SZ);
3059                         CTASSERT(sizeof (lsize) >= NN_NUMBUF_SZ);
3060                         CTASSERT(sizeof (psize) >= NN_NUMBUF_SZ);
3061                         CTASSERT(sizeof (asize) >= NN_NUMBUF_SZ);
3062                         CTASSERT(sizeof (avg) >= NN_NUMBUF_SZ);
3063                         CTASSERT(sizeof (gang) >= NN_NUMBUF_SZ);
3064 
3065                         if (t < DMU_OT_NUMTYPES)
3066                                 typename = dmu_ot[t].ot_name;
3067                         else
3068                                 typename = zdb_ot_extname[t - DMU_OT_NUMTYPES];
3069 
3070                         if (zcb.zcb_type[ZB_TOTAL][t].zb_asize == 0) {
3071                                 (void) printf("%6s\t%5s\t%5s\t%5s"
3072                                     "\t%5s\t%5s\t%6s\t%s\n",
3073                                     "-",
3074                                     "-",
3075                                     "-",
3076                                     "-",
3077                                     "-",
3078                                     "-",
3079                                     "-",
3080                                     typename);
3081                                 continue;
3082                         }
3083 
3084                         for (l = ZB_TOTAL - 1; l >= -1; l--) {
3085                                 level = (l == -1 ? ZB_TOTAL : l);
3086                                 zb = &zcb.zcb_type[level][t];
3087 
3088                                 if (zb->zb_asize == 0)
3089                                         continue;
3090 
3091                                 if (dump_opt['b'] < 3 && level != ZB_TOTAL)
3092                                         continue;
3093 
3094                                 if (level == 0 && zb->zb_asize ==
3095                                     zcb.zcb_type[ZB_TOTAL][t].zb_asize)
3096                                         continue;
3097 
3098                                 zdb_nicenum(zb->zb_count, csize,
3099                                     sizeof (csize));
3100                                 zdb_nicenum(zb->zb_lsize, lsize,
3101                                     sizeof (lsize));
3102                                 zdb_nicenum(zb->zb_psize, psize,
3103                                     sizeof (psize));
3104                                 zdb_nicenum(zb->zb_asize, asize,
3105                                     sizeof (asize));
3106                                 zdb_nicenum(zb->zb_asize / zb->zb_count, avg,
3107                                     sizeof (avg));
3108                                 zdb_nicenum(zb->zb_gangs, gang, sizeof (gang));
3109 
3110                                 (void) printf("%6s\t%5s\t%5s\t%5s\t%5s"
3111                                     "\t%5.2f\t%6.2f\t",
3112                                     csize, lsize, psize, asize, avg,
3113                                     (double)zb->zb_lsize / zb->zb_psize,
3114                                     100.0 * zb->zb_asize / tzb->zb_asize);
3115 
3116                                 if (level == ZB_TOTAL)
3117                                         (void) printf("%s\n", typename);
3118                                 else
3119                                         (void) printf("    L%d %s\n",
3120                                             level, typename);
3121 
3122                                 if (dump_opt['b'] >= 3 && zb->zb_gangs > 0) {
3123                                         (void) printf("\t number of ganged "
3124                                             "blocks: %s\n", gang);
3125                                 }
3126 
3127                                 if (dump_opt['b'] >= 4) {
3128                                         (void) printf("psize "
3129                                             "(in 512-byte sectors): "
3130                                             "number of blocks\n");
3131                                         dump_histogram(zb->zb_psize_histogram,
3132                                             PSIZE_HISTO_SIZE, 0);
3133                                 }
3134                         }
3135                 }
3136         }
3137 
3138         (void) printf("\n");
3139 
3140         if (leaks)
3141                 return (2);
3142 
3143         if (zcb.zcb_haderrors)
3144                 return (3);
3145 
3146         return (0);
3147 }
3148 
3149 typedef struct zdb_ddt_entry {
3150         ddt_key_t       zdde_key;
3151         uint64_t        zdde_ref_blocks;
3152         uint64_t        zdde_ref_lsize;
3153         uint64_t        zdde_ref_psize;
3154         uint64_t        zdde_ref_dsize;
3155         avl_node_t      zdde_node;
3156 } zdb_ddt_entry_t;
3157 
3158 /* ARGSUSED */
3159 static int
3160 zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
3161     const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
3162 {
3163         avl_tree_t *t = arg;
3164         avl_index_t where;
3165         zdb_ddt_entry_t *zdde, zdde_search;
3166 
3167         if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
3168                 return (0);
3169 
3170         if (dump_opt['S'] > 1 && zb->zb_level == ZB_ROOT_LEVEL) {
3171                 (void) printf("traversing objset %llu, %llu objects, "
3172                     "%lu blocks so far\n",
3173                     (u_longlong_t)zb->zb_objset,
3174                     (u_longlong_t)BP_GET_FILL(bp),
3175                     avl_numnodes(t));
3176         }
3177 
3178         if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
3179             BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
3180                 return (0);
3181 
3182         ddt_key_fill(&zdde_search.zdde_key, bp);
3183 
3184         zdde = avl_find(t, &zdde_search, &where);
3185 
3186         if (zdde == NULL) {
3187                 zdde = umem_zalloc(sizeof (*zdde), UMEM_NOFAIL);
3188                 zdde->zdde_key = zdde_search.zdde_key;
3189                 avl_insert(t, zdde, where);
3190         }
3191 
3192         zdde->zdde_ref_blocks += 1;
3193         zdde->zdde_ref_lsize += BP_GET_LSIZE(bp);
3194         zdde->zdde_ref_psize += BP_GET_PSIZE(bp);
3195         zdde->zdde_ref_dsize += bp_get_dsize_sync(spa, bp);
3196 
3197         return (0);
3198 }
3199 
3200 static void
3201 dump_simulated_ddt(spa_t *spa)
3202 {
3203         avl_tree_t t;
3204         void *cookie = NULL;
3205         zdb_ddt_entry_t *zdde;
3206         ddt_histogram_t ddh_total;
3207         ddt_stat_t dds_total;
3208 
3209         bzero(&ddh_total, sizeof (ddh_total));
3210         bzero(&dds_total, sizeof (dds_total));
3211         avl_create(&t, ddt_entry_compare,
3212             sizeof (zdb_ddt_entry_t), offsetof(zdb_ddt_entry_t, zdde_node));
3213 
3214         spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3215 
3216         (void) traverse_pool(spa, 0, TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
3217             zdb_ddt_add_cb, &t);
3218 
3219         spa_config_exit(spa, SCL_CONFIG, FTAG);
3220 
3221         while ((zdde = avl_destroy_nodes(&t, &cookie)) != NULL) {
3222                 ddt_stat_t dds;
3223                 uint64_t refcnt = zdde->zdde_ref_blocks;
3224                 ASSERT(refcnt != 0);
3225 
3226                 dds.dds_blocks = zdde->zdde_ref_blocks / refcnt;
3227                 dds.dds_lsize = zdde->zdde_ref_lsize / refcnt;
3228                 dds.dds_psize = zdde->zdde_ref_psize / refcnt;
3229                 dds.dds_dsize = zdde->zdde_ref_dsize / refcnt;
3230 
3231                 dds.dds_ref_blocks = zdde->zdde_ref_blocks;
3232                 dds.dds_ref_lsize = zdde->zdde_ref_lsize;
3233                 dds.dds_ref_psize = zdde->zdde_ref_psize;
3234                 dds.dds_ref_dsize = zdde->zdde_ref_dsize;
3235 
3236                 ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
3237                     &dds, 0);
3238 
3239                 umem_free(zdde, sizeof (*zdde));
3240         }
3241 
3242         avl_destroy(&t);
3243 
3244         ddt_histogram_stat(&dds_total, &ddh_total);
3245 
3246         (void) printf("Simulated DDT histogram:\n");
3247 
3248         zpool_dump_ddt(&dds_total, &ddh_total);
3249 
3250         dump_dedup_ratio(&dds_total);
3251 }
3252 
3253 static void
3254 dump_zpool(spa_t *spa)
3255 {
3256         dsl_pool_t *dp = spa_get_dsl(spa);
3257         int rc = 0;
3258 
3259         if (dump_opt['S']) {
3260                 dump_simulated_ddt(spa);
3261                 return;
3262         }
3263 
3264         if (!dump_opt['e'] && dump_opt['C'] > 1) {
3265                 (void) printf("\nCached configuration:\n");
3266                 dump_nvlist(spa->spa_config, 8);
3267         }
3268 
3269         if (dump_opt['C'])
3270                 dump_config(spa);
3271 
3272         if (dump_opt['u'])
3273                 dump_uberblock(&spa->spa_uberblock, "\nUberblock:\n", "\n");
3274 
3275         if (dump_opt['D'])
3276                 dump_all_ddts(spa);
3277 
3278         if (dump_opt['d'] > 2 || dump_opt['m'])
3279                 dump_metaslabs(spa);
3280         if (dump_opt['M'])
3281                 dump_metaslab_groups(spa);
3282 
3283         if (dump_opt['d'] || dump_opt['i']) {
3284                 dump_dir(dp->dp_meta_objset);
3285                 if (dump_opt['d'] >= 3) {
3286                         dump_full_bpobj(&spa->spa_deferred_bpobj,
3287                             "Deferred frees", 0);
3288                         if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
3289                                 dump_full_bpobj(
3290                                     &spa->spa_dsl_pool->dp_free_bpobj,
3291                                     "Pool snapshot frees", 0);
3292                         }
3293 
3294                         if (spa_feature_is_active(spa,
3295                             SPA_FEATURE_ASYNC_DESTROY)) {
3296                                 dump_bptree(spa->spa_meta_objset,
3297                                     spa->spa_dsl_pool->dp_bptree_obj,
3298                                     "Pool dataset frees");
3299                         }
3300                         dump_dtl(spa->spa_root_vdev, 0);
3301                 }
3302                 (void) dmu_objset_find(spa_name(spa), dump_one_dir,
3303                     NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
3304 
3305                 for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
3306                         uint64_t refcount;
3307 
3308                         if (!(spa_feature_table[f].fi_flags &
3309                             ZFEATURE_FLAG_PER_DATASET) ||
3310                             !spa_feature_is_enabled(spa, f)) {
3311                                 ASSERT0(dataset_feature_count[f]);
3312                                 continue;
3313                         }
3314                         (void) feature_get_refcount(spa,
3315                             &spa_feature_table[f], &refcount);
3316                         if (dataset_feature_count[f] != refcount) {
3317                                 (void) printf("%s feature refcount mismatch: "
3318                                     "%lld datasets != %lld refcount\n",
3319                                     spa_feature_table[f].fi_uname,
3320                                     (longlong_t)dataset_feature_count[f],
3321                                     (longlong_t)refcount);
3322                                 rc = 2;
3323                         } else {
3324                                 (void) printf("Verified %s feature refcount "
3325                                     "of %llu is correct\n",
3326                                     spa_feature_table[f].fi_uname,
3327                                     (longlong_t)refcount);
3328                         }
3329                 }
3330         }
3331         if (rc == 0 && (dump_opt['b'] || dump_opt['c']))
3332                 rc = dump_block_stats(spa);
3333 
3334         if (rc == 0)
3335                 rc = verify_spacemap_refcounts(spa);
3336 
3337         if (dump_opt['s'])
3338                 show_pool_stats(spa);
3339 
3340         if (dump_opt['h'])
3341                 dump_history(spa);
3342 
3343         if (rc != 0) {
3344                 dump_debug_buffer();
3345                 exit(rc);
3346         }
3347 }
3348 
3349 #define ZDB_FLAG_CHECKSUM       0x0001
3350 #define ZDB_FLAG_DECOMPRESS     0x0002
3351 #define ZDB_FLAG_BSWAP          0x0004
3352 #define ZDB_FLAG_GBH            0x0008
3353 #define ZDB_FLAG_INDIRECT       0x0010
3354 #define ZDB_FLAG_PHYS           0x0020
3355 #define ZDB_FLAG_RAW            0x0040
3356 #define ZDB_FLAG_PRINT_BLKPTR   0x0080
3357 
3358 static int flagbits[256];
3359 
3360 static void
3361 zdb_print_blkptr(blkptr_t *bp, int flags)
3362 {
3363         char blkbuf[BP_SPRINTF_LEN];
3364 
3365         if (flags & ZDB_FLAG_BSWAP)
3366                 byteswap_uint64_array((void *)bp, sizeof (blkptr_t));
3367 
3368         snprintf_blkptr(blkbuf, sizeof (blkbuf), bp);
3369         (void) printf("%s\n", blkbuf);
3370 }
3371 
3372 static void
3373 zdb_dump_indirect(blkptr_t *bp, int nbps, int flags)
3374 {
3375         int i;
3376 
3377         for (i = 0; i < nbps; i++)
3378                 zdb_print_blkptr(&bp[i], flags);
3379 }
3380 
3381 static void
3382 zdb_dump_gbh(void *buf, int flags)
3383 {
3384         zdb_dump_indirect((blkptr_t *)buf, SPA_GBH_NBLKPTRS, flags);
3385 }
3386 
3387 static void
3388 zdb_dump_block_raw(void *buf, uint64_t size, int flags)
3389 {
3390         if (flags & ZDB_FLAG_BSWAP)
3391                 byteswap_uint64_array(buf, size);
3392         (void) write(1, buf, size);
3393 }
3394 
3395 static void
3396 zdb_dump_block(char *label, void *buf, uint64_t size, int flags)
3397 {
3398         uint64_t *d = (uint64_t *)buf;
3399         unsigned nwords = size / sizeof (uint64_t);
3400         int do_bswap = !!(flags & ZDB_FLAG_BSWAP);
3401         unsigned i, j;
3402         const char *hdr;
3403         char *c;
3404 
3405 
3406         if (do_bswap)
3407                 hdr = " 7 6 5 4 3 2 1 0   f e d c b a 9 8";
3408         else
3409                 hdr = " 0 1 2 3 4 5 6 7   8 9 a b c d e f";
3410 
3411         (void) printf("\n%s\n%6s   %s  0123456789abcdef\n", label, "", hdr);
3412 
3413         for (i = 0; i < nwords; i += 2) {
3414                 (void) printf("%06llx:  %016llx  %016llx  ",
3415                     (u_longlong_t)(i * sizeof (uint64_t)),
3416                     (u_longlong_t)(do_bswap ? BSWAP_64(d[i]) : d[i]),
3417                     (u_longlong_t)(do_bswap ? BSWAP_64(d[i + 1]) : d[i + 1]));
3418 
3419                 c = (char *)&d[i];
3420                 for (j = 0; j < 2 * sizeof (uint64_t); j++)
3421                         (void) printf("%c", isprint(c[j]) ? c[j] : '.');
3422                 (void) printf("\n");
3423         }
3424 }
3425 
3426 /*
3427  * There are two acceptable formats:
3428  *      leaf_name         - For example: c1t0d0 or /tmp/ztest.0a
3429  *      child[.child]*    - For example: 0.1.1
3430  *
3431  * The second form can be used to specify arbitrary vdevs anywhere
3432  * in the heirarchy.  For example, in a pool with a mirror of
3433  * RAID-Zs, you can specify either RAID-Z vdev with 0.0 or 0.1 .
3434  */
3435 static vdev_t *
3436 zdb_vdev_lookup(vdev_t *vdev, const char *path)
3437 {
3438         char *s, *p, *q;
3439         unsigned i;
3440 
3441         if (vdev == NULL)
3442                 return (NULL);
3443 
3444         /* First, assume the x.x.x.x format */
3445         i = strtoul(path, &s, 10);
3446         if (s == path || (s && *s != '.' && *s != '\0'))
3447                 goto name;
3448         if (i >= vdev->vdev_children)
3449                 return (NULL);
3450 
3451         vdev = vdev->vdev_child[i];
3452         if (*s == '\0')
3453                 return (vdev);
3454         return (zdb_vdev_lookup(vdev, s+1));
3455 
3456 name:
3457         for (i = 0; i < vdev->vdev_children; i++) {
3458                 vdev_t *vc = vdev->vdev_child[i];
3459 
3460                 if (vc->vdev_path == NULL) {
3461                         vc = zdb_vdev_lookup(vc, path);
3462                         if (vc == NULL)
3463                                 continue;
3464                         else
3465                                 return (vc);
3466                 }
3467 
3468                 p = strrchr(vc->vdev_path, '/');
3469                 p = p ? p + 1 : vc->vdev_path;
3470                 q = &vc->vdev_path[strlen(vc->vdev_path) - 2];
3471 
3472                 if (strcmp(vc->vdev_path, path) == 0)
3473                         return (vc);
3474                 if (strcmp(p, path) == 0)
3475                         return (vc);
3476                 if (strcmp(q, "s0") == 0 && strncmp(p, path, q - p) == 0)
3477                         return (vc);
3478         }
3479 
3480         return (NULL);
3481 }
3482 
3483 /* ARGSUSED */
3484 static int
3485 random_get_pseudo_bytes_cb(void *buf, size_t len, void *unused)
3486 {
3487         return (random_get_pseudo_bytes(buf, len));
3488 }
3489 
3490 /*
3491  * Read a block from a pool and print it out.  The syntax of the
3492  * block descriptor is:
3493  *
3494  *      pool:vdev_specifier:offset:size[:flags]
3495  *
3496  *      pool           - The name of the pool you wish to read from
3497  *      vdev_specifier - Which vdev (see comment for zdb_vdev_lookup)
3498  *      offset         - offset, in hex, in bytes
3499  *      size           - Amount of data to read, in hex, in bytes
3500  *      flags          - A string of characters specifying options
3501  *               b: Decode a blkptr at given offset within block
3502  *              *c: Calculate and display checksums
3503  *               d: Decompress data before dumping
3504  *               e: Byteswap data before dumping
3505  *               g: Display data as a gang block header
3506  *               i: Display as an indirect block
3507  *               p: Do I/O to physical offset
3508  *               r: Dump raw data to stdout
3509  *
3510  *              * = not yet implemented
3511  */
3512 static void
3513 zdb_read_block(char *thing, spa_t *spa)
3514 {
3515         blkptr_t blk, *bp = &blk;
3516         dva_t *dva = bp->blk_dva;
3517         int flags = 0;
3518         uint64_t offset = 0, size = 0, psize = 0, lsize = 0, blkptr_offset = 0;
3519         zio_t *zio;
3520         vdev_t *vd;
3521         abd_t *pabd;
3522         void *lbuf, *buf;
3523         const char *s, *vdev;
3524         char *p, *dup, *flagstr;
3525         int i, error;
3526 
3527         dup = strdup(thing);
3528         s = strtok(dup, ":");
3529         vdev = s ? s : "";
3530         s = strtok(NULL, ":");
3531         offset = strtoull(s ? s : "", NULL, 16);
3532         s = strtok(NULL, ":");
3533         size = strtoull(s ? s : "", NULL, 16);
3534         s = strtok(NULL, ":");
3535         if (s)
3536                 flagstr = strdup(s);
3537         else
3538                 flagstr = strdup("");
3539 
3540         s = NULL;
3541         if (size == 0)
3542                 s = "size must not be zero";
3543         if (!IS_P2ALIGNED(size, DEV_BSIZE))
3544                 s = "size must be a multiple of sector size";
3545         if (!IS_P2ALIGNED(offset, DEV_BSIZE))
3546                 s = "offset must be a multiple of sector size";
3547         if (s) {
3548                 (void) printf("Invalid block specifier: %s  - %s\n", thing, s);
3549                 free(dup);
3550                 return;
3551         }
3552 
3553         for (s = strtok(flagstr, ":"); s; s = strtok(NULL, ":")) {
3554                 for (i = 0; flagstr[i]; i++) {
3555                         int bit = flagbits[(uchar_t)flagstr[i]];
3556 
3557                         if (bit == 0) {
3558                                 (void) printf("***Invalid flag: %c\n",
3559                                     flagstr[i]);
3560                                 continue;
3561                         }
3562                         flags |= bit;
3563 
3564                         /* If it's not something with an argument, keep going */
3565                         if ((bit & (ZDB_FLAG_CHECKSUM |
3566                             ZDB_FLAG_PRINT_BLKPTR)) == 0)
3567                                 continue;
3568 
3569                         p = &flagstr[i + 1];
3570                         if (bit == ZDB_FLAG_PRINT_BLKPTR)
3571                                 blkptr_offset = strtoull(p, &p, 16);
3572                         if (*p != ':' && *p != '\0') {
3573                                 (void) printf("***Invalid flag arg: '%s'\n", s);
3574                                 free(dup);
3575                                 return;
3576                         }
3577                 }
3578         }
3579         free(flagstr);
3580 
3581         vd = zdb_vdev_lookup(spa->spa_root_vdev, vdev);
3582         if (vd == NULL) {
3583                 (void) printf("***Invalid vdev: %s\n", vdev);
3584                 free(dup);
3585                 return;
3586         } else {
3587                 if (vd->vdev_path)
3588                         (void) fprintf(stderr, "Found vdev: %s\n",
3589                             vd->vdev_path);
3590                 else
3591                         (void) fprintf(stderr, "Found vdev type: %s\n",
3592                             vd->vdev_ops->vdev_op_type);
3593         }
3594 
3595         psize = size;
3596         lsize = size;
3597 
3598         pabd = abd_alloc_linear(SPA_MAXBLOCKSIZE, B_FALSE);
3599         lbuf = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3600 
3601         BP_ZERO(bp);
3602 
3603         DVA_SET_VDEV(&dva[0], vd->vdev_id);
3604         DVA_SET_OFFSET(&dva[0], offset);
3605         DVA_SET_GANG(&dva[0], !!(flags & ZDB_FLAG_GBH));
3606         DVA_SET_ASIZE(&dva[0], vdev_psize_to_asize(vd, psize));
3607 
3608         BP_SET_BIRTH(bp, TXG_INITIAL, TXG_INITIAL);
3609 
3610         BP_SET_LSIZE(bp, lsize);
3611         BP_SET_PSIZE(bp, psize);
3612         BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF);
3613         BP_SET_CHECKSUM(bp, ZIO_CHECKSUM_OFF);
3614         BP_SET_TYPE(bp, DMU_OT_NONE);
3615         BP_SET_LEVEL(bp, 0);
3616         BP_SET_DEDUP(bp, 0);
3617         BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
3618 
3619         spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
3620         zio = zio_root(spa, NULL, NULL, 0);
3621 
3622         if (vd == vd->vdev_top) {
3623                 /*
3624                  * Treat this as a normal block read.
3625                  */
3626                 zio_nowait(zio_read(zio, spa, bp, pabd, psize, NULL, NULL,
3627                     ZIO_PRIORITY_SYNC_READ,
3628                     ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL));
3629         } else {
3630                 /*
3631                  * Treat this as a vdev child I/O.
3632                  */
3633                 zio_nowait(zio_vdev_child_io(zio, bp, vd, offset, pabd,
3634                     psize, ZIO_TYPE_READ, ZIO_PRIORITY_SYNC_READ,
3635                     ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE |
3636                     ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY |
3637                     ZIO_FLAG_CANFAIL | ZIO_FLAG_RAW, NULL, NULL));
3638         }
3639 
3640         error = zio_wait(zio);
3641         spa_config_exit(spa, SCL_STATE, FTAG);
3642 
3643         if (error) {
3644                 (void) printf("Read of %s failed, error: %d\n", thing, error);
3645                 goto out;
3646         }
3647 
3648         if (flags & ZDB_FLAG_DECOMPRESS) {
3649                 /*
3650                  * We don't know how the data was compressed, so just try
3651                  * every decompress function at every inflated blocksize.
3652                  */
3653                 enum zio_compress c;
3654                 void *pbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3655                 void *lbuf2 = umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
3656 
3657                 abd_copy_to_buf(pbuf2, pabd, psize);
3658 
3659                 VERIFY0(abd_iterate_func(pabd, psize, SPA_MAXBLOCKSIZE - psize,
3660                     random_get_pseudo_bytes_cb, NULL));
3661 
3662                 VERIFY0(random_get_pseudo_bytes((uint8_t *)pbuf2 + psize,
3663                     SPA_MAXBLOCKSIZE - psize));
3664 
3665                 for (lsize = SPA_MAXBLOCKSIZE; lsize > psize;
3666                     lsize -= SPA_MINBLOCKSIZE) {
3667                         for (c = 0; c < ZIO_COMPRESS_FUNCTIONS; c++) {
3668                                 if (zio_decompress_data(c, pabd,
3669                                     lbuf, psize, lsize) == 0 &&
3670                                     zio_decompress_data_buf(c, pbuf2,
3671                                     lbuf2, psize, lsize) == 0 &&
3672                                     bcmp(lbuf, lbuf2, lsize) == 0)
3673                                         break;
3674                         }
3675                         if (c != ZIO_COMPRESS_FUNCTIONS)
3676                                 break;
3677                         lsize -= SPA_MINBLOCKSIZE;
3678                 }
3679 
3680                 umem_free(pbuf2, SPA_MAXBLOCKSIZE);
3681                 umem_free(lbuf2, SPA_MAXBLOCKSIZE);
3682 
3683                 if (lsize <= psize) {
3684                         (void) printf("Decompress of %s failed\n", thing);
3685                         goto out;
3686                 }
3687                 buf = lbuf;
3688                 size = lsize;
3689         } else {
3690                 buf = abd_to_buf(pabd);
3691                 size = psize;
3692         }
3693 
3694         if (flags & ZDB_FLAG_PRINT_BLKPTR)
3695                 zdb_print_blkptr((blkptr_t *)(void *)
3696                     ((uintptr_t)buf + (uintptr_t)blkptr_offset), flags);
3697         else if (flags & ZDB_FLAG_RAW)
3698                 zdb_dump_block_raw(buf, size, flags);
3699         else if (flags & ZDB_FLAG_INDIRECT)
3700                 zdb_dump_indirect((blkptr_t *)buf, size / sizeof (blkptr_t),
3701                     flags);
3702         else if (flags & ZDB_FLAG_GBH)
3703                 zdb_dump_gbh(buf, flags);
3704         else
3705                 zdb_dump_block(thing, buf, size, flags);
3706 
3707 out:
3708         abd_free(pabd);
3709         umem_free(lbuf, SPA_MAXBLOCKSIZE);
3710         free(dup);
3711 }
3712 
3713 static void
3714 zdb_embedded_block(char *thing)
3715 {
3716         blkptr_t bp;
3717         unsigned long long *words = (void *)&bp;
3718         char buf[SPA_MAXBLOCKSIZE];
3719         int err;
3720 
3721         bzero(&bp, sizeof (bp));
3722         err = sscanf(thing, "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx:"
3723             "%llx:%llx:%llx:%llx:%llx:%llx:%llx:%llx",
3724             words + 0, words + 1, words + 2, words + 3,
3725             words + 4, words + 5, words + 6, words + 7,
3726             words + 8, words + 9, words + 10, words + 11,
3727             words + 12, words + 13, words + 14, words + 15);
3728         if (err != 16) {
3729                 (void) printf("invalid input format\n");
3730                 exit(1);
3731         }
3732         ASSERT3U(BPE_GET_LSIZE(&bp), <=, SPA_MAXBLOCKSIZE);
3733         err = decode_embedded_bp(&bp, buf, BPE_GET_LSIZE(&bp));
3734         if (err != 0) {
3735                 (void) printf("decode failed: %u\n", err);
3736                 exit(1);
3737         }
3738         zdb_dump_block_raw(buf, BPE_GET_LSIZE(&bp), 0);
3739 }
3740 
3741 static boolean_t
3742 pool_match(nvlist_t *cfg, char *tgt)
3743 {
3744         uint64_t v, guid = strtoull(tgt, NULL, 0);
3745         char *s;
3746 
3747         if (guid != 0) {
3748                 if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
3749                         return (v == guid);
3750         } else {
3751                 if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
3752                         return (strcmp(s, tgt) == 0);
3753         }
3754         return (B_FALSE);
3755 }
3756 
3757 static char *
3758 find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
3759 {
3760         nvlist_t *pools;
3761         nvlist_t *match = NULL;
3762         char *name = NULL;
3763         char *sepp = NULL;
3764         char sep = '\0';
3765         int count = 0;
3766         importargs_t args;
3767 
3768         bzero(&args, sizeof (args));
3769         args.paths = dirc;
3770         args.path = dirv;
3771         args.can_be_active = B_TRUE;
3772 
3773         if ((sepp = strpbrk(*target, "/@")) != NULL) {
3774                 sep = *sepp;
3775                 *sepp = '\0';
3776         }
3777 
3778         pools = zpool_search_import(g_zfs, &args);
3779 
3780         if (pools != NULL) {
3781                 nvpair_t *elem = NULL;
3782                 while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
3783                         verify(nvpair_value_nvlist(elem, configp) == 0);
3784                         if (pool_match(*configp, *target)) {
3785                                 count++;
3786                                 if (match != NULL) {
3787                                         /* print previously found config */
3788                                         if (name != NULL) {
3789                                                 (void) printf("%s\n", name);
3790                                                 dump_nvlist(match, 8);
3791                                                 name = NULL;
3792                                         }
3793                                         (void) printf("%s\n",
3794                                             nvpair_name(elem));
3795                                         dump_nvlist(*configp, 8);
3796                                 } else {
3797                                         match = *configp;
3798                                         name = nvpair_name(elem);
3799                                 }
3800                         }
3801                 }
3802         }
3803         if (count > 1)
3804                 (void) fatal("\tMatched %d pools - use pool GUID "
3805                     "instead of pool name or \n"
3806                     "\tpool name part of a dataset name to select pool", count);
3807 
3808         if (sepp)
3809                 *sepp = sep;
3810         /*
3811          * If pool GUID was specified for pool id, replace it with pool name
3812          */
3813         if (name && (strstr(*target, name) != *target)) {
3814                 int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
3815 
3816                 *target = umem_alloc(sz, UMEM_NOFAIL);
3817                 (void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
3818         }
3819 
3820         *configp = name ? match : NULL;
3821 
3822         return (name);
3823 }
3824 
3825 int
3826 main(int argc, char **argv)
3827 {
3828         int c;
3829         struct rlimit rl = { 1024, 1024 };
3830         spa_t *spa = NULL;
3831         objset_t *os = NULL;
3832         int dump_all = 1;
3833         int verbose = 0;
3834         int error = 0;
3835         char **searchdirs = NULL;
3836         int nsearch = 0;
3837         char *target;
3838         nvlist_t *policy = NULL;
3839         uint64_t max_txg = UINT64_MAX;
3840         int flags = ZFS_IMPORT_MISSING_LOG;
3841         int rewind = ZPOOL_NEVER_REWIND;
3842         char *spa_config_path_env;
3843         boolean_t target_is_spa = B_TRUE;
3844 
3845         (void) setrlimit(RLIMIT_NOFILE, &rl);
3846         (void) enable_extended_FILE_stdio(-1, -1);
3847 
3848         dprintf_setup(&argc, argv);
3849 
3850         /*
3851          * If there is an environment variable SPA_CONFIG_PATH it overrides
3852          * default spa_config_path setting. If -U flag is specified it will
3853          * override this environment variable settings once again.
3854          */
3855         spa_config_path_env = getenv("SPA_CONFIG_PATH");
3856         if (spa_config_path_env != NULL)
3857                 spa_config_path = spa_config_path_env;
3858 
3859         while ((c = getopt(argc, argv,
3860             "AbcCdDeEFGhiI:lLmMo:Op:PqRsSt:uU:vVx:X")) != -1) {
3861                 switch (c) {
3862                 case 'b':
3863                 case 'c':
3864                 case 'C':
3865                 case 'd':
3866                 case 'D':
3867                 case 'E':
3868                 case 'G':
3869                 case 'h':
3870                 case 'i':
3871                 case 'l':
3872                 case 'm':
3873                 case 'M':
3874                 case 'O':
3875                 case 'R':
3876                 case 's':
3877                 case 'S':
3878                 case 'u':
3879                         dump_opt[c]++;
3880                         dump_all = 0;
3881                         break;
3882                 case 'A':
3883                 case 'e':
3884                 case 'F':
3885                 case 'L':
3886                 case 'P':
3887                 case 'q':
3888                 case 'X':
3889                         dump_opt[c]++;
3890                         break;
3891                 /* NB: Sort single match options below. */
3892                 case 'I':
3893                         max_inflight = strtoull(optarg, NULL, 0);
3894                         if (max_inflight == 0) {
3895                                 (void) fprintf(stderr, "maximum number "
3896                                     "of inflight I/Os must be greater "
3897                                     "than 0\n");
3898                                 usage();
3899                         }
3900                         break;
3901                 case 'o':
3902                         error = set_global_var(optarg);
3903                         if (error != 0)
3904                                 usage();
3905                         break;
3906                 case 'p':
3907                         if (searchdirs == NULL) {
3908                                 searchdirs = umem_alloc(sizeof (char *),
3909                                     UMEM_NOFAIL);
3910                         } else {
3911                                 char **tmp = umem_alloc((nsearch + 1) *
3912                                     sizeof (char *), UMEM_NOFAIL);
3913                                 bcopy(searchdirs, tmp, nsearch *
3914                                     sizeof (char *));
3915                                 umem_free(searchdirs,
3916                                     nsearch * sizeof (char *));
3917                                 searchdirs = tmp;
3918                         }
3919                         searchdirs[nsearch++] = optarg;
3920                         break;
3921                 case 't':
3922                         max_txg = strtoull(optarg, NULL, 0);
3923                         if (max_txg < TXG_INITIAL) {
3924                                 (void) fprintf(stderr, "incorrect txg "
3925                                     "specified: %s\n", optarg);
3926                                 usage();
3927                         }
3928                         break;
3929                 case 'U':
3930                         spa_config_path = optarg;
3931                         if (spa_config_path[0] != '/') {
3932                                 (void) fprintf(stderr,
3933                                     "cachefile must be an absolute path "
3934                                     "(i.e. start with a slash)\n");
3935                                 usage();
3936                         }
3937                         break;
3938                 case 'v':
3939                         verbose++;
3940                         break;
3941                 case 'V':
3942                         flags = ZFS_IMPORT_VERBATIM;
3943                         break;
3944                 case 'x':
3945                         vn_dumpdir = optarg;
3946                         break;
3947                 default:
3948                         usage();
3949                         break;
3950                 }
3951         }
3952 
3953         if (!dump_opt['e'] && searchdirs != NULL) {
3954                 (void) fprintf(stderr, "-p option requires use of -e\n");
3955                 usage();
3956         }
3957 
3958         /*
3959          * ZDB does not typically re-read blocks; therefore limit the ARC
3960          * to 256 MB, which can be used entirely for metadata.
3961          */
3962         zfs_arc_max = zfs_arc_meta_limit = 256 * 1024 * 1024;
3963 
3964         /*
3965          * "zdb -c" uses checksum-verifying scrub i/os which are async reads.
3966          * "zdb -b" uses traversal prefetch which uses async reads.
3967          * For good performance, let several of them be active at once.
3968          */
3969         zfs_vdev_async_read_max_active = 10;
3970 
3971         /*
3972          * Disable reference tracking for better performance.
3973          */
3974         reference_tracking_enable = B_FALSE;
3975 
3976         kernel_init(FREAD);
3977         g_zfs = libzfs_init();
3978         ASSERT(g_zfs != NULL);
3979 
3980         if (dump_all)
3981                 verbose = MAX(verbose, 1);
3982 
3983         for (c = 0; c < 256; c++) {
3984                 if (dump_all && strchr("AeEFlLOPRSX", c) == NULL)
3985                         dump_opt[c] = 1;
3986                 if (dump_opt[c])
3987                         dump_opt[c] += verbose;
3988         }
3989 
3990         aok = (dump_opt['A'] == 1) || (dump_opt['A'] > 2);
3991         zfs_recover = (dump_opt['A'] > 1);
3992 
3993         argc -= optind;
3994         argv += optind;
3995 
3996         if (argc < 2 && dump_opt['R'])
3997                 usage();
3998 
3999         if (dump_opt['E']) {
4000                 if (argc != 1)
4001                         usage();
4002                 zdb_embedded_block(argv[0]);
4003                 return (0);
4004         }
4005 
4006         if (argc < 1) {
4007                 if (!dump_opt['e'] && dump_opt['C']) {
4008                         dump_cachefile(spa_config_path);
4009                         return (0);
4010                 }
4011                 usage();
4012         }
4013 
4014         if (dump_opt['l'])
4015                 return (dump_label(argv[0]));
4016 
4017         if (dump_opt['O']) {
4018                 if (argc != 2)
4019                         usage();
4020                 dump_opt['v'] = verbose + 3;
4021                 return (dump_path(argv[0], argv[1]));
4022         }
4023 
4024         if (dump_opt['X'] || dump_opt['F'])
4025                 rewind = ZPOOL_DO_REWIND |
4026                     (dump_opt['X'] ? ZPOOL_EXTREME_REWIND : 0);
4027 
4028         if (nvlist_alloc(&policy, NV_UNIQUE_NAME_TYPE, 0) != 0 ||
4029             nvlist_add_uint64(policy, ZPOOL_REWIND_REQUEST_TXG, max_txg) != 0 ||
4030             nvlist_add_uint32(policy, ZPOOL_REWIND_REQUEST, rewind) != 0)
4031                 fatal("internal error: %s", strerror(ENOMEM));
4032 
4033         error = 0;
4034         target = argv[0];
4035 
4036         if (dump_opt['e']) {
4037                 nvlist_t *cfg = NULL;
4038                 char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
4039 
4040                 error = ENOENT;
4041                 if (name) {
4042                         if (dump_opt['C'] > 1) {
4043                                 (void) printf("\nConfiguration for import:\n");
4044                                 dump_nvlist(cfg, 8);
4045                         }
4046                         if (nvlist_add_nvlist(cfg,
4047                             ZPOOL_REWIND_POLICY, policy) != 0) {
4048                                 fatal("can't open '%s': %s",
4049                                     target, strerror(ENOMEM));
4050                         }
4051                         error = spa_import(name, cfg, NULL, flags);
4052                 }
4053         }
4054 
4055         if (strpbrk(target, "/@") != NULL) {
4056                 size_t targetlen;
4057 
4058                 target_is_spa = B_FALSE;
4059                 /*
4060                  * Remove any trailing slash.  Later code would get confused
4061                  * by it, but we want to allow it so that "pool/" can
4062                  * indicate that we want to dump the topmost filesystem,
4063                  * rather than the whole pool.
4064                  */
4065                 targetlen = strlen(target);
4066                 if (targetlen != 0 && target[targetlen - 1] == '/')
4067                         target[targetlen - 1] = '\0';
4068         }
4069 
4070         if (error == 0) {
4071                 if (target_is_spa || dump_opt['R']) {
4072                         error = spa_open_rewind(target, &spa, FTAG, policy,
4073                             NULL);
4074                         if (error) {
4075                                 /*
4076                                  * If we're missing the log device then
4077                                  * try opening the pool after clearing the
4078                                  * log state.
4079                                  */
4080                                 mutex_enter(&spa_namespace_lock);
4081                                 if ((spa = spa_lookup(target)) != NULL &&
4082                                     spa->spa_log_state == SPA_LOG_MISSING) {
4083                                         spa->spa_log_state = SPA_LOG_CLEAR;
4084                                         error = 0;
4085                                 }
4086                                 mutex_exit(&spa_namespace_lock);
4087 
4088                                 if (!error) {
4089                                         error = spa_open_rewind(target, &spa,
4090                                             FTAG, policy, NULL);
4091                                 }
4092                         }
4093                 } else {
4094                         error = open_objset(target, DMU_OST_ANY, FTAG, &os);
4095                 }
4096         }
4097         nvlist_free(policy);
4098 
4099         if (error)
4100                 fatal("can't open '%s': %s", target, strerror(error));
4101 
4102         argv++;
4103         argc--;
4104         if (!dump_opt['R']) {
4105                 if (argc > 0) {
4106                         zopt_objects = argc;
4107                         zopt_object = calloc(zopt_objects, sizeof (uint64_t));
4108                         for (unsigned i = 0; i < zopt_objects; i++) {
4109                                 errno = 0;
4110                                 zopt_object[i] = strtoull(argv[i], NULL, 0);
4111                                 if (zopt_object[i] == 0 && errno != 0)
4112                                         fatal("bad number %s: %s",
4113                                             argv[i], strerror(errno));
4114                         }
4115                 }
4116                 if (os != NULL) {
4117                         dump_dir(os);
4118                 } else if (zopt_objects > 0 && !dump_opt['m']) {
4119                         dump_dir(spa->spa_meta_objset);
4120                 } else {
4121                         dump_zpool(spa);
4122                 }
4123         } else {
4124                 flagbits['b'] = ZDB_FLAG_PRINT_BLKPTR;
4125                 flagbits['c'] = ZDB_FLAG_CHECKSUM;
4126                 flagbits['d'] = ZDB_FLAG_DECOMPRESS;
4127                 flagbits['e'] = ZDB_FLAG_BSWAP;
4128                 flagbits['g'] = ZDB_FLAG_GBH;
4129                 flagbits['i'] = ZDB_FLAG_INDIRECT;
4130                 flagbits['p'] = ZDB_FLAG_PHYS;
4131                 flagbits['r'] = ZDB_FLAG_RAW;
4132 
4133                 for (int i = 0; i < argc; i++)
4134                         zdb_read_block(argv[i], spa);
4135         }
4136 
4137         if (os != NULL)
4138                 close_objset(os, FTAG);
4139         else
4140                 spa_close(spa, FTAG);
4141 
4142         fuid_table_destroy();
4143 
4144         dump_debug_buffer();
4145 
4146         libzfs_fini(g_zfs);
4147         kernel_fini();
4148 
4149         return (0);
4150 }