Print this page
8115 parallel zfs mount


   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  24  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  25  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  26  * Copyright (c) 2014 Integros [integros.com]
  27  * Copyright 2017 Joyent, Inc.

  28  */
  29 
  30 /*
  31  * The objective of this program is to provide a DMU/ZAP/SPA stress test
  32  * that runs entirely in userland, is easy to use, and easy to extend.
  33  *
  34  * The overall design of the ztest program is as follows:
  35  *
  36  * (1) For each major functional area (e.g. adding vdevs to a pool,
  37  *     creating and destroying datasets, reading and writing objects, etc)
  38  *     we have a simple routine to test that functionality.  These
  39  *     individual routines do not have to do anything "stressful".
  40  *
  41  * (2) We turn these simple functionality tests into a stress test by
  42  *     running them all in parallel, with as many threads as desired,
  43  *     and spread across as many datasets, objects, and vdevs as desired.
  44  *
  45  * (3) While all this is happening, we inject faults into the pool to
  46  *     verify that self-healing data really works.
  47  *


 228 } ztest_block_tag_t;
 229 
 230 typedef struct bufwad {
 231         uint64_t        bw_index;
 232         uint64_t        bw_txg;
 233         uint64_t        bw_data;
 234 } bufwad_t;
 235 
 236 /*
 237  * XXX -- fix zfs range locks to be generic so we can use them here.
 238  */
 239 typedef enum {
 240         RL_READER,
 241         RL_WRITER,
 242         RL_APPEND
 243 } rl_type_t;
 244 
 245 typedef struct rll {
 246         void            *rll_writer;
 247         int             rll_readers;
 248         mutex_t         rll_lock;
 249         cond_t          rll_cv;
 250 } rll_t;
 251 
 252 typedef struct rl {
 253         uint64_t        rl_object;
 254         uint64_t        rl_offset;
 255         uint64_t        rl_size;
 256         rll_t           *rl_lock;
 257 } rl_t;
 258 
 259 #define ZTEST_RANGE_LOCKS       64
 260 #define ZTEST_OBJECT_LOCKS      64
 261 
 262 /*
 263  * Object descriptor.  Used as a template for object lookup/create/remove.
 264  */
 265 typedef struct ztest_od {
 266         uint64_t        od_dir;
 267         uint64_t        od_object;
 268         dmu_object_type_t od_type;
 269         dmu_object_type_t od_crtype;
 270         uint64_t        od_blocksize;
 271         uint64_t        od_crblocksize;
 272         uint64_t        od_gen;
 273         uint64_t        od_crgen;
 274         char            od_name[ZFS_MAX_DATASET_NAME_LEN];
 275 } ztest_od_t;
 276 
 277 /*
 278  * Per-dataset state.
 279  */
 280 typedef struct ztest_ds {
 281         ztest_shared_ds_t *zd_shared;
 282         objset_t        *zd_os;
 283         rwlock_t        zd_zilog_lock;
 284         zilog_t         *zd_zilog;
 285         ztest_od_t      *zd_od;         /* debugging aid */
 286         char            zd_name[ZFS_MAX_DATASET_NAME_LEN];
 287         mutex_t         zd_dirobj_lock;
 288         rll_t           zd_object_lock[ZTEST_OBJECT_LOCKS];
 289         rll_t           zd_range_lock[ZTEST_RANGE_LOCKS];
 290 } ztest_ds_t;
 291 
 292 /*
 293  * Per-iteration state.
 294  */
 295 typedef void ztest_func_t(ztest_ds_t *zd, uint64_t id);
 296 
 297 typedef struct ztest_info {
 298         ztest_func_t    *zi_func;       /* test function */
 299         uint64_t        zi_iters;       /* iterations per execution */
 300         uint64_t        *zi_interval;   /* execute every <interval> seconds */
 301 } ztest_info_t;
 302 
 303 typedef struct ztest_shared_callstate {
 304         uint64_t        zc_count;       /* per-pass count */
 305         uint64_t        zc_time;        /* per-pass time */
 306         uint64_t        zc_next;        /* next time to call this function */
 307 } ztest_shared_callstate_t;


 374         { ztest_reguid,                         1,      &zopt_rarely        },
 375         { ztest_spa_rename,                     1,      &zopt_rarely        },
 376         { ztest_scrub,                          1,      &zopt_rarely        },
 377         { ztest_spa_upgrade,                    1,      &zopt_rarely        },
 378         { ztest_dsl_dataset_promote_busy,       1,      &zopt_rarely        },
 379         { ztest_vdev_attach_detach,             1,      &zopt_sometimes     },
 380         { ztest_vdev_LUN_growth,                1,      &zopt_rarely        },
 381         { ztest_vdev_add_remove,                1,
 382             &ztest_opts.zo_vdevtime                         },
 383         { ztest_vdev_aux_add_remove,            1,
 384             &ztest_opts.zo_vdevtime                         },
 385 };
 386 
 387 #define ZTEST_FUNCS     (sizeof (ztest_info) / sizeof (ztest_info_t))
 388 
 389 /*
 390  * The following struct is used to hold a list of uncalled commit callbacks.
 391  * The callbacks are ordered by txg number.
 392  */
 393 typedef struct ztest_cb_list {
 394         mutex_t zcl_callbacks_lock;
 395         list_t  zcl_callbacks;
 396 } ztest_cb_list_t;
 397 
 398 /*
 399  * Stuff we need to share writably between parent and child.
 400  */
 401 typedef struct ztest_shared {
 402         boolean_t       zs_do_init;
 403         hrtime_t        zs_proc_start;
 404         hrtime_t        zs_proc_stop;
 405         hrtime_t        zs_thread_start;
 406         hrtime_t        zs_thread_stop;
 407         hrtime_t        zs_thread_kill;
 408         uint64_t        zs_enospc_count;
 409         uint64_t        zs_vdev_next_leaf;
 410         uint64_t        zs_vdev_aux;
 411         uint64_t        zs_alloc;
 412         uint64_t        zs_space;
 413         uint64_t        zs_splits;
 414         uint64_t        zs_mirrors;
 415         uint64_t        zs_metaslab_sz;
 416         uint64_t        zs_metaslab_df_alloc_threshold;
 417         uint64_t        zs_guid;
 418 } ztest_shared_t;
 419 
 420 #define ID_PARALLEL     -1ULL
 421 
 422 static char ztest_dev_template[] = "%s/%s.%llua";
 423 static char ztest_aux_template[] = "%s/%s.%s.%llu";
 424 ztest_shared_t *ztest_shared;
 425 
 426 static spa_t *ztest_spa = NULL;
 427 static ztest_ds_t *ztest_ds;
 428 
 429 static mutex_t ztest_vdev_lock;
 430 
 431 /*
 432  * The ztest_name_lock protects the pool and dataset namespace used by
 433  * the individual tests. To modify the namespace, consumers must grab
 434  * this lock as writer. Grabbing the lock as reader will ensure that the
 435  * namespace does not change while the lock is held.
 436  */
 437 static rwlock_t ztest_name_lock;
 438 
 439 static boolean_t ztest_dump_core = B_TRUE;
 440 static boolean_t ztest_exiting;
 441 
 442 /* Global commit callback list */
 443 static ztest_cb_list_t zcl;
 444 
 445 enum ztest_object {
 446         ZTEST_META_DNODE = 0,
 447         ZTEST_DIROBJ,
 448         ZTEST_OBJECTS
 449 };
 450 
 451 static void usage(boolean_t) __NORETURN;
 452 
 453 /*
 454  * These libumem hooks provide a reasonable set of defaults for the allocator's
 455  * debugging facilities.
 456  */
 457 const char *


1073         VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0);
1074 
1075         error = spa_prop_set(spa, props);
1076 
1077         nvlist_free(props);
1078 
1079         if (error == ENOSPC) {
1080                 ztest_record_enospc(FTAG);
1081                 return (error);
1082         }
1083         ASSERT0(error);
1084 
1085         return (error);
1086 }
1087 
1088 static void
1089 ztest_rll_init(rll_t *rll)
1090 {
1091         rll->rll_writer = NULL;
1092         rll->rll_readers = 0;
1093         VERIFY(_mutex_init(&rll->rll_lock, USYNC_THREAD, NULL) == 0);
1094         VERIFY(cond_init(&rll->rll_cv, USYNC_THREAD, NULL) == 0);
1095 }
1096 
1097 static void
1098 ztest_rll_destroy(rll_t *rll)
1099 {
1100         ASSERT(rll->rll_writer == NULL);
1101         ASSERT(rll->rll_readers == 0);
1102         VERIFY(_mutex_destroy(&rll->rll_lock) == 0);
1103         VERIFY(cond_destroy(&rll->rll_cv) == 0);
1104 }
1105 
1106 static void
1107 ztest_rll_lock(rll_t *rll, rl_type_t type)
1108 {
1109         VERIFY(mutex_lock(&rll->rll_lock) == 0);
1110 
1111         if (type == RL_READER) {
1112                 while (rll->rll_writer != NULL)
1113                         (void) cond_wait(&rll->rll_cv, &rll->rll_lock);
1114                 rll->rll_readers++;
1115         } else {
1116                 while (rll->rll_writer != NULL || rll->rll_readers)
1117                         (void) cond_wait(&rll->rll_cv, &rll->rll_lock);
1118                 rll->rll_writer = curthread;
1119         }
1120 
1121         VERIFY(mutex_unlock(&rll->rll_lock) == 0);
1122 }
1123 
1124 static void
1125 ztest_rll_unlock(rll_t *rll)
1126 {
1127         VERIFY(mutex_lock(&rll->rll_lock) == 0);
1128 
1129         if (rll->rll_writer) {
1130                 ASSERT(rll->rll_readers == 0);
1131                 rll->rll_writer = NULL;
1132         } else {
1133                 ASSERT(rll->rll_readers != 0);
1134                 ASSERT(rll->rll_writer == NULL);
1135                 rll->rll_readers--;
1136         }
1137 
1138         if (rll->rll_writer == NULL && rll->rll_readers == 0)
1139                 VERIFY(cond_broadcast(&rll->rll_cv) == 0);
1140 
1141         VERIFY(mutex_unlock(&rll->rll_lock) == 0);
1142 }
1143 
1144 static void
1145 ztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type)
1146 {
1147         rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
1148 
1149         ztest_rll_lock(rll, type);
1150 }
1151 
1152 static void
1153 ztest_object_unlock(ztest_ds_t *zd, uint64_t object)
1154 {
1155         rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
1156 
1157         ztest_rll_unlock(rll);
1158 }
1159 
1160 static rl_t *
1161 ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset,


1180 ztest_range_unlock(rl_t *rl)
1181 {
1182         rll_t *rll = rl->rl_lock;
1183 
1184         ztest_rll_unlock(rll);
1185 
1186         umem_free(rl, sizeof (*rl));
1187 }
1188 
1189 static void
1190 ztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os)
1191 {
1192         zd->zd_os = os;
1193         zd->zd_zilog = dmu_objset_zil(os);
1194         zd->zd_shared = szd;
1195         dmu_objset_name(os, zd->zd_name);
1196 
1197         if (zd->zd_shared != NULL)
1198                 zd->zd_shared->zd_seq = 0;
1199 
1200         VERIFY(rwlock_init(&zd->zd_zilog_lock, USYNC_THREAD, NULL) == 0);
1201         VERIFY(_mutex_init(&zd->zd_dirobj_lock, USYNC_THREAD, NULL) == 0);
1202 
1203         for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
1204                 ztest_rll_init(&zd->zd_object_lock[l]);
1205 
1206         for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
1207                 ztest_rll_init(&zd->zd_range_lock[l]);
1208 }
1209 
1210 static void
1211 ztest_zd_fini(ztest_ds_t *zd)
1212 {
1213         VERIFY(_mutex_destroy(&zd->zd_dirobj_lock) == 0);
1214 
1215         for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
1216                 ztest_rll_destroy(&zd->zd_object_lock[l]);
1217 
1218         for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
1219                 ztest_rll_destroy(&zd->zd_range_lock[l]);
1220 }
1221 
1222 #define TXG_MIGHTWAIT   (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT)
1223 
1224 static uint64_t
1225 ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag)
1226 {
1227         uint64_t txg;
1228         int error;
1229 
1230         /*
1231          * Attempt to assign tx to some transaction group.
1232          */
1233         error = dmu_tx_assign(tx, txg_how);


1948         return (lr);
1949 }
1950 
1951 void
1952 ztest_lr_free(void *lr, size_t lrsize, char *name)
1953 {
1954         size_t namesize = name ? strlen(name) + 1 : 0;
1955 
1956         umem_free(lr, lrsize + namesize);
1957 }
1958 
1959 /*
1960  * Lookup a bunch of objects.  Returns the number of objects not found.
1961  */
1962 static int
1963 ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count)
1964 {
1965         int missing = 0;
1966         int error;
1967 
1968         ASSERT(_mutex_held(&zd->zd_dirobj_lock));
1969 
1970         for (int i = 0; i < count; i++, od++) {
1971                 od->od_object = 0;
1972                 error = zap_lookup(zd->zd_os, od->od_dir, od->od_name,
1973                     sizeof (uint64_t), 1, &od->od_object);
1974                 if (error) {
1975                         ASSERT(error == ENOENT);
1976                         ASSERT(od->od_object == 0);
1977                         missing++;
1978                 } else {
1979                         dmu_buf_t *db;
1980                         ztest_block_tag_t *bbt;
1981                         dmu_object_info_t doi;
1982 
1983                         ASSERT(od->od_object != 0);
1984                         ASSERT(missing == 0);   /* there should be no gaps */
1985 
1986                         ztest_object_lock(zd, od->od_object, RL_READER);
1987                         VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os,
1988                             od->od_object, FTAG, &db));
1989                         dmu_object_info_from_db(db, &doi);
1990                         bbt = ztest_bt_bonus(db);
1991                         ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
1992                         od->od_type = doi.doi_type;
1993                         od->od_blocksize = doi.doi_data_block_size;
1994                         od->od_gen = bbt->bt_gen;
1995                         dmu_buf_rele(db, FTAG);
1996                         ztest_object_unlock(zd, od->od_object);
1997                 }
1998         }
1999 
2000         return (missing);
2001 }
2002 
2003 static int
2004 ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count)
2005 {
2006         int missing = 0;
2007 
2008         ASSERT(_mutex_held(&zd->zd_dirobj_lock));
2009 
2010         for (int i = 0; i < count; i++, od++) {
2011                 if (missing) {
2012                         od->od_object = 0;
2013                         missing++;
2014                         continue;
2015                 }
2016 
2017                 lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
2018 
2019                 lr->lr_doid = od->od_dir;
2020                 lr->lr_foid = 0;     /* 0 to allocate, > 0 to claim */
2021                 lr->lrz_type = od->od_crtype;
2022                 lr->lrz_blocksize = od->od_crblocksize;
2023                 lr->lrz_ibshift = ztest_random_ibshift();
2024                 lr->lrz_bonustype = DMU_OT_UINT64_OTHER;
2025                 lr->lrz_bonuslen = dmu_bonus_max();
2026                 lr->lr_gen = od->od_crgen;
2027                 lr->lr_crtime[0] = time(NULL);
2028 


2033                 } else {
2034                         od->od_object = lr->lr_foid;
2035                         od->od_type = od->od_crtype;
2036                         od->od_blocksize = od->od_crblocksize;
2037                         od->od_gen = od->od_crgen;
2038                         ASSERT(od->od_object != 0);
2039                 }
2040 
2041                 ztest_lr_free(lr, sizeof (*lr), od->od_name);
2042         }
2043 
2044         return (missing);
2045 }
2046 
2047 static int
2048 ztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count)
2049 {
2050         int missing = 0;
2051         int error;
2052 
2053         ASSERT(_mutex_held(&zd->zd_dirobj_lock));
2054 
2055         od += count - 1;
2056 
2057         for (int i = count - 1; i >= 0; i--, od--) {
2058                 if (missing) {
2059                         missing++;
2060                         continue;
2061                 }
2062 
2063                 /*
2064                  * No object was found.
2065                  */
2066                 if (od->od_object == 0)
2067                         continue;
2068 
2069                 lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
2070 
2071                 lr->lr_doid = od->od_dir;
2072 
2073                 if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) {


2179 ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
2180 {
2181         int err;
2182         ztest_block_tag_t wbt;
2183         dmu_object_info_t doi;
2184         enum ztest_io_type io_type;
2185         uint64_t blocksize;
2186         void *data;
2187 
2188         VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0);
2189         blocksize = doi.doi_data_block_size;
2190         data = umem_alloc(blocksize, UMEM_NOFAIL);
2191 
2192         /*
2193          * Pick an i/o type at random, biased toward writing block tags.
2194          */
2195         io_type = ztest_random(ZTEST_IO_TYPES);
2196         if (ztest_random(2) == 0)
2197                 io_type = ZTEST_IO_WRITE_TAG;
2198 
2199         (void) rw_rdlock(&zd->zd_zilog_lock);
2200 
2201         switch (io_type) {
2202 
2203         case ZTEST_IO_WRITE_TAG:
2204                 ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0);
2205                 (void) ztest_write(zd, object, offset, sizeof (wbt), &wbt);
2206                 break;
2207 
2208         case ZTEST_IO_WRITE_PATTERN:
2209                 (void) memset(data, 'a' + (object + offset) % 5, blocksize);
2210                 if (ztest_random(2) == 0) {
2211                         /*
2212                          * Induce fletcher2 collisions to ensure that
2213                          * zio_ddt_collision() detects and resolves them
2214                          * when using fletcher2-verify for deduplication.
2215                          */
2216                         ((uint64_t *)data)[0] ^= 1ULL << 63;
2217                         ((uint64_t *)data)[4] ^= 1ULL << 63;
2218                 }
2219                 (void) ztest_write(zd, object, offset, blocksize, data);
2220                 break;
2221 
2222         case ZTEST_IO_WRITE_ZEROES:
2223                 bzero(data, blocksize);
2224                 (void) ztest_write(zd, object, offset, blocksize, data);
2225                 break;
2226 
2227         case ZTEST_IO_TRUNCATE:
2228                 (void) ztest_truncate(zd, object, offset, blocksize);
2229                 break;
2230 
2231         case ZTEST_IO_SETATTR:
2232                 (void) ztest_setattr(zd, object);
2233                 break;
2234 
2235         case ZTEST_IO_REWRITE:
2236                 (void) rw_rdlock(&ztest_name_lock);
2237                 err = ztest_dsl_prop_set_uint64(zd->zd_name,
2238                     ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa),
2239                     B_FALSE);
2240                 VERIFY(err == 0 || err == ENOSPC);
2241                 err = ztest_dsl_prop_set_uint64(zd->zd_name,
2242                     ZFS_PROP_COMPRESSION,
2243                     ztest_random_dsl_prop(ZFS_PROP_COMPRESSION),
2244                     B_FALSE);
2245                 VERIFY(err == 0 || err == ENOSPC);
2246                 (void) rw_unlock(&ztest_name_lock);
2247 
2248                 VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data,
2249                     DMU_READ_NO_PREFETCH));
2250 
2251                 (void) ztest_write(zd, object, offset, blocksize, data);
2252                 break;
2253         }
2254 
2255         (void) rw_unlock(&zd->zd_zilog_lock);
2256 
2257         umem_free(data, blocksize);
2258 }
2259 
2260 /*
2261  * Initialize an object description template.
2262  */
2263 static void
2264 ztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index,
2265     dmu_object_type_t type, uint64_t blocksize, uint64_t gen)
2266 {
2267         od->od_dir = ZTEST_DIROBJ;
2268         od->od_object = 0;
2269 
2270         od->od_crtype = type;
2271         od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize();
2272         od->od_crgen = gen;
2273 
2274         od->od_type = DMU_OT_NONE;
2275         od->od_blocksize = 0;
2276         od->od_gen = 0;
2277 
2278         (void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]",
2279             tag, (int64_t)id, index);
2280 }
2281 
2282 /*
2283  * Lookup or create the objects for a test using the od template.
2284  * If the objects do not all exist, or if 'remove' is specified,
2285  * remove any existing objects and create new ones.  Otherwise,
2286  * use the existing objects.
2287  */
2288 static int
2289 ztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove)
2290 {
2291         int count = size / sizeof (*od);
2292         int rv = 0;
2293 
2294         VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0);
2295         if ((ztest_lookup(zd, od, count) != 0 || remove) &&
2296             (ztest_remove(zd, od, count) != 0 ||
2297             ztest_create(zd, od, count) != 0))
2298                 rv = -1;
2299         zd->zd_od = od;
2300         VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0);
2301 
2302         return (rv);
2303 }
2304 
2305 /* ARGSUSED */
2306 void
2307 ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
2308 {
2309         zilog_t *zilog = zd->zd_zilog;
2310 
2311         (void) rw_rdlock(&zd->zd_zilog_lock);
2312 
2313         zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
2314 
2315         /*
2316          * Remember the committed values in zd, which is in parent/child
2317          * shared memory.  If we die, the next iteration of ztest_run()
2318          * will verify that the log really does contain this record.
2319          */
2320         mutex_enter(&zilog->zl_lock);
2321         ASSERT(zd->zd_shared != NULL);
2322         ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq);
2323         zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq;
2324         mutex_exit(&zilog->zl_lock);
2325 
2326         (void) rw_unlock(&zd->zd_zilog_lock);
2327 }
2328 
2329 /*
2330  * This function is designed to simulate the operations that occur during a
2331  * mount/unmount operation.  We hold the dataset across these operations in an
2332  * attempt to expose any implicit assumptions about ZIL management.
2333  */
2334 /* ARGSUSED */
2335 void
2336 ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
2337 {
2338         objset_t *os = zd->zd_os;
2339 
2340         /*
2341          * We grab the zd_dirobj_lock to ensure that no other thread is
2342          * updating the zil (i.e. adding in-memory log records) and the
2343          * zd_zilog_lock to block any I/O.
2344          */
2345         VERIFY0(mutex_lock(&zd->zd_dirobj_lock));
2346         (void) rw_wrlock(&zd->zd_zilog_lock);
2347 
2348         /* zfsvfs_teardown() */
2349         zil_close(zd->zd_zilog);
2350 
2351         /* zfsvfs_setup() */
2352         VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog);
2353         zil_replay(os, zd, ztest_replay_vector);
2354 
2355         (void) rw_unlock(&zd->zd_zilog_lock);
2356         VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0);
2357 }
2358 
2359 /*
2360  * Verify that we can't destroy an active pool, create an existing pool,
2361  * or create a pool with a bad vdev spec.
2362  */
2363 /* ARGSUSED */
2364 void
2365 ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
2366 {
2367         ztest_shared_opts_t *zo = &ztest_opts;
2368         spa_t *spa;
2369         nvlist_t *nvroot;
2370 
2371         /*
2372          * Attempt to create using a bad file.
2373          */
2374         nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
2375         VERIFY3U(ENOENT, ==,
2376             spa_create("ztest_bad_file", nvroot, NULL, NULL));
2377         nvlist_free(nvroot);
2378 
2379         /*
2380          * Attempt to create using a bad mirror.
2381          */
2382         nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1);
2383         VERIFY3U(ENOENT, ==,
2384             spa_create("ztest_bad_mirror", nvroot, NULL, NULL));
2385         nvlist_free(nvroot);
2386 
2387         /*
2388          * Attempt to create an existing pool.  It shouldn't matter
2389          * what's in the nvroot; we should fail with EEXIST.
2390          */
2391         (void) rw_rdlock(&ztest_name_lock);
2392         nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
2393         VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL));
2394         nvlist_free(nvroot);
2395         VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
2396         VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
2397         spa_close(spa, FTAG);
2398 
2399         (void) rw_unlock(&ztest_name_lock);
2400 }
2401 
2402 /* ARGSUSED */
2403 void
2404 ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
2405 {
2406         spa_t *spa;
2407         uint64_t initial_version = SPA_VERSION_INITIAL;
2408         uint64_t version, newversion;
2409         nvlist_t *nvroot, *props;
2410         char *name;
2411 
2412         VERIFY0(mutex_lock(&ztest_vdev_lock));
2413         name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool);
2414 
2415         /*
2416          * Clean up from previous runs.
2417          */
2418         (void) spa_destroy(name);
2419 
2420         nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0,
2421             0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1);
2422 
2423         /*
2424          * If we're configuring a RAIDZ device then make sure that the
2425          * the initial version is capable of supporting that feature.
2426          */
2427         switch (ztest_opts.zo_raidz_parity) {
2428         case 0:
2429         case 1:
2430                 initial_version = SPA_VERSION_INITIAL;
2431                 break;
2432         case 2:


2451         VERIFY0(spa_create(name, nvroot, props, NULL));
2452         fnvlist_free(nvroot);
2453         fnvlist_free(props);
2454 
2455         VERIFY0(spa_open(name, &spa, FTAG));
2456         VERIFY3U(spa_version(spa), ==, version);
2457         newversion = ztest_random_spa_version(version + 1);
2458 
2459         if (ztest_opts.zo_verbose >= 4) {
2460                 (void) printf("upgrading spa version from %llu to %llu\n",
2461                     (u_longlong_t)version, (u_longlong_t)newversion);
2462         }
2463 
2464         spa_upgrade(spa, newversion);
2465         VERIFY3U(spa_version(spa), >, version);
2466         VERIFY3U(spa_version(spa), ==, fnvlist_lookup_uint64(spa->spa_config,
2467             zpool_prop_to_name(ZPOOL_PROP_VERSION)));
2468         spa_close(spa, FTAG);
2469 
2470         strfree(name);
2471         VERIFY0(mutex_unlock(&ztest_vdev_lock));
2472 }
2473 
2474 static vdev_t *
2475 vdev_lookup_by_path(vdev_t *vd, const char *path)
2476 {
2477         vdev_t *mvd;
2478 
2479         if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0)
2480                 return (vd);
2481 
2482         for (int c = 0; c < vd->vdev_children; c++)
2483                 if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) !=
2484                     NULL)
2485                         return (mvd);
2486 
2487         return (NULL);
2488 }
2489 
2490 /*
2491  * Find the first available hole which can be used as a top-level.


2504                 if (cvd->vdev_ishole)
2505                         break;
2506         }
2507         return (c);
2508 }
2509 
2510 /*
2511  * Verify that vdev_add() works as expected.
2512  */
2513 /* ARGSUSED */
2514 void
2515 ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
2516 {
2517         ztest_shared_t *zs = ztest_shared;
2518         spa_t *spa = ztest_spa;
2519         uint64_t leaves;
2520         uint64_t guid;
2521         nvlist_t *nvroot;
2522         int error;
2523 
2524         VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
2525         leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
2526 
2527         spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
2528 
2529         ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves;
2530 
2531         /*
2532          * If we have slogs then remove them 1/4 of the time.
2533          */
2534         if (spa_has_slogs(spa) && ztest_random(4) == 0) {
2535                 /*
2536                  * Grab the guid from the head of the log class rotor.
2537                  */
2538                 guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid;
2539 
2540                 spa_config_exit(spa, SCL_VDEV, FTAG);
2541 
2542                 /*
2543                  * We have to grab the zs_name_lock as writer to
2544                  * prevent a race between removing a slog (dmu_objset_find)
2545                  * and destroying a dataset. Removing the slog will
2546                  * grab a reference on the dataset which may cause
2547                  * dmu_objset_destroy() to fail with EBUSY thus
2548                  * leaving the dataset in an inconsistent state.
2549                  */
2550                 VERIFY(rw_wrlock(&ztest_name_lock) == 0);
2551                 error = spa_vdev_remove(spa, guid, B_FALSE);
2552                 VERIFY(rw_unlock(&ztest_name_lock) == 0);
2553 
2554                 if (error && error != EEXIST)
2555                         fatal(0, "spa_vdev_remove() = %d", error);
2556         } else {
2557                 spa_config_exit(spa, SCL_VDEV, FTAG);
2558 
2559                 /*
2560                  * Make 1/4 of the devices be log devices.
2561                  */
2562                 nvroot = make_vdev_root(NULL, NULL, NULL,
2563                     ztest_opts.zo_vdev_size, 0,
2564                     ztest_random(4) == 0, ztest_opts.zo_raidz,
2565                     zs->zs_mirrors, 1);
2566 
2567                 error = spa_vdev_add(spa, nvroot);
2568                 nvlist_free(nvroot);
2569 
2570                 if (error == ENOSPC)
2571                         ztest_record_enospc("spa_vdev_add");
2572                 else if (error != 0)
2573                         fatal(0, "spa_vdev_add() = %d", error);
2574         }
2575 
2576         VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
2577 }
2578 
2579 /*
2580  * Verify that adding/removing aux devices (l2arc, hot spare) works as expected.
2581  */
2582 /* ARGSUSED */
2583 void
2584 ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
2585 {
2586         ztest_shared_t *zs = ztest_shared;
2587         spa_t *spa = ztest_spa;
2588         vdev_t *rvd = spa->spa_root_vdev;
2589         spa_aux_vdev_t *sav;
2590         char *aux;
2591         uint64_t guid = 0;
2592         int error;
2593 
2594         if (ztest_random(2) == 0) {
2595                 sav = &spa->spa_spares;
2596                 aux = ZPOOL_CONFIG_SPARES;
2597         } else {
2598                 sav = &spa->spa_l2cache;
2599                 aux = ZPOOL_CONFIG_L2CACHE;
2600         }
2601 
2602         VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
2603 
2604         spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
2605 
2606         if (sav->sav_count != 0 && ztest_random(4) == 0) {
2607                 /*
2608                  * Pick a random device to remove.
2609                  */
2610                 guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid;
2611         } else {
2612                 /*
2613                  * Find an unused device we can add.
2614                  */
2615                 zs->zs_vdev_aux = 0;
2616                 for (;;) {
2617                         char path[MAXPATHLEN];
2618                         int c;
2619                         (void) snprintf(path, sizeof (path), ztest_aux_template,
2620                             ztest_opts.zo_dir, ztest_opts.zo_pool, aux,
2621                             zs->zs_vdev_aux);
2622                         for (c = 0; c < sav->sav_count; c++)


2639                 nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL,
2640                     (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1);
2641                 error = spa_vdev_add(spa, nvroot);
2642                 if (error != 0)
2643                         fatal(0, "spa_vdev_add(%p) = %d", nvroot, error);
2644                 nvlist_free(nvroot);
2645         } else {
2646                 /*
2647                  * Remove an existing device.  Sometimes, dirty its
2648                  * vdev state first to make sure we handle removal
2649                  * of devices that have pending state changes.
2650                  */
2651                 if (ztest_random(2) == 0)
2652                         (void) vdev_online(spa, guid, 0, NULL);
2653 
2654                 error = spa_vdev_remove(spa, guid, B_FALSE);
2655                 if (error != 0 && error != EBUSY)
2656                         fatal(0, "spa_vdev_remove(%llu) = %d", guid, error);
2657         }
2658 
2659         VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
2660 }
2661 
2662 /*
2663  * split a pool if it has mirror tlvdevs
2664  */
2665 /* ARGSUSED */
2666 void
2667 ztest_split_pool(ztest_ds_t *zd, uint64_t id)
2668 {
2669         ztest_shared_t *zs = ztest_shared;
2670         spa_t *spa = ztest_spa;
2671         vdev_t *rvd = spa->spa_root_vdev;
2672         nvlist_t *tree, **child, *config, *split, **schild;
2673         uint_t c, children, schildren = 0, lastlogid = 0;
2674         int error = 0;
2675 
2676         VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
2677 
2678         /* ensure we have a useable config; mirrors of raidz aren't supported */
2679         if (zs->zs_mirrors < 3 || ztest_opts.zo_raidz > 1) {
2680                 VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
2681                 return;
2682         }
2683 
2684         /* clean up the old pool, if any */
2685         (void) spa_destroy("splitp");
2686 
2687         spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
2688 
2689         /* generate a config from the existing config */
2690         mutex_enter(&spa->spa_props_lock);
2691         VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE,
2692             &tree) == 0);
2693         mutex_exit(&spa->spa_props_lock);
2694 
2695         VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
2696             &children) == 0);
2697 
2698         schild = malloc(rvd->vdev_children * sizeof (nvlist_t *));
2699         for (c = 0; c < children; c++) {
2700                 vdev_t *tvd = rvd->vdev_child[c];


2719                 VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0);
2720         }
2721 
2722         /* OK, create a config that can be used to split */
2723         VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0);
2724         VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE,
2725             VDEV_TYPE_ROOT) == 0);
2726         VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild,
2727             lastlogid != 0 ? lastlogid : schildren) == 0);
2728 
2729         VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0);
2730         VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0);
2731 
2732         for (c = 0; c < schildren; c++)
2733                 nvlist_free(schild[c]);
2734         free(schild);
2735         nvlist_free(split);
2736 
2737         spa_config_exit(spa, SCL_VDEV, FTAG);
2738 
2739         (void) rw_wrlock(&ztest_name_lock);
2740         error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE);
2741         (void) rw_unlock(&ztest_name_lock);
2742 
2743         nvlist_free(config);
2744 
2745         if (error == 0) {
2746                 (void) printf("successful split - results:\n");
2747                 mutex_enter(&spa_namespace_lock);
2748                 show_pool_stats(spa);
2749                 show_pool_stats(spa_lookup("splitp"));
2750                 mutex_exit(&spa_namespace_lock);
2751                 ++zs->zs_splits;
2752                 --zs->zs_mirrors;
2753         }
2754         VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
2755 
2756 }
2757 
2758 /*
2759  * Verify that we can attach and detach devices.
2760  */
2761 /* ARGSUSED */
2762 void
2763 ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
2764 {
2765         ztest_shared_t *zs = ztest_shared;
2766         spa_t *spa = ztest_spa;
2767         spa_aux_vdev_t *sav = &spa->spa_spares;
2768         vdev_t *rvd = spa->spa_root_vdev;
2769         vdev_t *oldvd, *newvd, *pvd;
2770         nvlist_t *root;
2771         uint64_t leaves;
2772         uint64_t leaf, top;
2773         uint64_t ashift = ztest_get_ashift();
2774         uint64_t oldguid, pguid;
2775         uint64_t oldsize, newsize;
2776         char oldpath[MAXPATHLEN], newpath[MAXPATHLEN];
2777         int replacing;
2778         int oldvd_has_siblings = B_FALSE;
2779         int newvd_is_spare = B_FALSE;
2780         int oldvd_is_log;
2781         int error, expected_error;
2782 
2783         VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
2784         leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
2785 
2786         spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
2787 
2788         /*
2789          * Decide whether to do an attach or a replace.
2790          */
2791         replacing = ztest_random(2);
2792 
2793         /*
2794          * Pick a random top-level vdev.
2795          */
2796         top = ztest_random_vdev_top(spa, B_TRUE);
2797 
2798         /*
2799          * Pick a random leaf within it.
2800          */
2801         leaf = ztest_random(leaves);
2802 
2803         /*


2824                 ASSERT(oldvd->vdev_children >= 2);
2825                 oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)];
2826         }
2827 
2828         oldguid = oldvd->vdev_guid;
2829         oldsize = vdev_get_min_asize(oldvd);
2830         oldvd_is_log = oldvd->vdev_top->vdev_islog;
2831         (void) strcpy(oldpath, oldvd->vdev_path);
2832         pvd = oldvd->vdev_parent;
2833         pguid = pvd->vdev_guid;
2834 
2835         /*
2836          * If oldvd has siblings, then half of the time, detach it.
2837          */
2838         if (oldvd_has_siblings && ztest_random(2) == 0) {
2839                 spa_config_exit(spa, SCL_VDEV, FTAG);
2840                 error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE);
2841                 if (error != 0 && error != ENODEV && error != EBUSY &&
2842                     error != ENOTSUP)
2843                         fatal(0, "detach (%s) returned %d", oldpath, error);
2844                 VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
2845                 return;
2846         }
2847 
2848         /*
2849          * For the new vdev, choose with equal probability between the two
2850          * standard paths (ending in either 'a' or 'b') or a random hot spare.
2851          */
2852         if (sav->sav_count != 0 && ztest_random(3) == 0) {
2853                 newvd = sav->sav_vdevs[ztest_random(sav->sav_count)];
2854                 newvd_is_spare = B_TRUE;
2855                 (void) strcpy(newpath, newvd->vdev_path);
2856         } else {
2857                 (void) snprintf(newpath, sizeof (newpath), ztest_dev_template,
2858                     ztest_opts.zo_dir, ztest_opts.zo_pool,
2859                     top * leaves + leaf);
2860                 if (ztest_random(2) == 0)
2861                         newpath[strlen(newpath) - 1] = 'b';
2862                 newvd = vdev_lookup_by_path(rvd, newpath);
2863         }
2864 


2918          * fail with ENODEV, or fail with EOVERFLOW.
2919          */
2920         if (expected_error == ENOTSUP &&
2921             (error == 0 || error == ENODEV || error == EOVERFLOW))
2922                 expected_error = error;
2923 
2924         /*
2925          * If someone grew the LUN, the replacement may be too small.
2926          */
2927         if (error == EOVERFLOW || error == EBUSY)
2928                 expected_error = error;
2929 
2930         /* XXX workaround 6690467 */
2931         if (error != expected_error && expected_error != EBUSY) {
2932                 fatal(0, "attach (%s %llu, %s %llu, %d) "
2933                     "returned %d, expected %d",
2934                     oldpath, oldsize, newpath,
2935                     newsize, replacing, error, expected_error);
2936         }
2937 
2938         VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
2939 }
2940 
2941 /*
2942  * Callback function which expands the physical size of the vdev.
2943  */
2944 vdev_t *
2945 grow_vdev(vdev_t *vd, void *arg)
2946 {
2947         spa_t *spa = vd->vdev_spa;
2948         size_t *newsize = arg;
2949         size_t fsize;
2950         int fd;
2951 
2952         ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
2953         ASSERT(vd->vdev_ops->vdev_op_leaf);
2954 
2955         if ((fd = open(vd->vdev_path, O_RDWR)) == -1)
2956                 return (vd);
2957 
2958         fsize = lseek(fd, 0, SEEK_END);


3046                         return (cvd);
3047         }
3048         return (NULL);
3049 }
3050 
3051 /*
3052  * Verify that dynamic LUN growth works as expected.
3053  */
3054 /* ARGSUSED */
3055 void
3056 ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
3057 {
3058         spa_t *spa = ztest_spa;
3059         vdev_t *vd, *tvd;
3060         metaslab_class_t *mc;
3061         metaslab_group_t *mg;
3062         size_t psize, newsize;
3063         uint64_t top;
3064         uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count;
3065 
3066         VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
3067         spa_config_enter(spa, SCL_STATE, spa, RW_READER);
3068 
3069         top = ztest_random_vdev_top(spa, B_TRUE);
3070 
3071         tvd = spa->spa_root_vdev->vdev_child[top];
3072         mg = tvd->vdev_mg;
3073         mc = mg->mg_class;
3074         old_ms_count = tvd->vdev_ms_count;
3075         old_class_space = metaslab_class_get_space(mc);
3076 
3077         /*
3078          * Determine the size of the first leaf vdev associated with
3079          * our top-level device.
3080          */
3081         vd = vdev_walk_tree(tvd, NULL, NULL);
3082         ASSERT3P(vd, !=, NULL);
3083         ASSERT(vd->vdev_ops->vdev_op_leaf);
3084 
3085         psize = vd->vdev_psize;
3086 
3087         /*
3088          * We only try to expand the vdev if it's healthy, less than 4x its
3089          * original size, and it has a valid psize.
3090          */
3091         if (tvd->vdev_state != VDEV_STATE_HEALTHY ||
3092             psize == 0 || psize >= 4 * ztest_opts.zo_vdev_size) {
3093                 spa_config_exit(spa, SCL_STATE, spa);
3094                 VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
3095                 return;
3096         }
3097         ASSERT(psize > 0);
3098         newsize = psize + psize / 8;
3099         ASSERT3U(newsize, >, psize);
3100 
3101         if (ztest_opts.zo_verbose >= 6) {
3102                 (void) printf("Expanding LUN %s from %lu to %lu\n",
3103                     vd->vdev_path, (ulong_t)psize, (ulong_t)newsize);
3104         }
3105 
3106         /*
3107          * Growing the vdev is a two step process:
3108          *      1). expand the physical size (i.e. relabel)
3109          *      2). online the vdev to create the new metaslabs
3110          */
3111         if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL ||
3112             vdev_walk_tree(tvd, online_vdev, NULL) != NULL ||
3113             tvd->vdev_state != VDEV_STATE_HEALTHY) {
3114                 if (ztest_opts.zo_verbose >= 5) {
3115                         (void) printf("Could not expand LUN because "
3116                             "the vdev configuration changed.\n");
3117                 }
3118                 spa_config_exit(spa, SCL_STATE, spa);
3119                 VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
3120                 return;
3121         }
3122 
3123         spa_config_exit(spa, SCL_STATE, spa);
3124 
3125         /*
3126          * Expanding the LUN will update the config asynchronously,
3127          * thus we must wait for the async thread to complete any
3128          * pending tasks before proceeding.
3129          */
3130         for (;;) {
3131                 boolean_t done;
3132                 mutex_enter(&spa->spa_async_lock);
3133                 done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks);
3134                 mutex_exit(&spa->spa_async_lock);
3135                 if (done)
3136                         break;
3137                 txg_wait_synced(spa_get_dsl(spa), 0);
3138                 (void) poll(NULL, 0, 100);
3139         }
3140 
3141         spa_config_enter(spa, SCL_STATE, spa, RW_READER);
3142 
3143         tvd = spa->spa_root_vdev->vdev_child[top];
3144         new_ms_count = tvd->vdev_ms_count;
3145         new_class_space = metaslab_class_get_space(mc);
3146 
3147         if (tvd->vdev_mg != mg || mg->mg_class != mc) {
3148                 if (ztest_opts.zo_verbose >= 5) {
3149                         (void) printf("Could not verify LUN expansion due to "
3150                             "intervening vdev offline or remove.\n");
3151                 }
3152                 spa_config_exit(spa, SCL_STATE, spa);
3153                 VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
3154                 return;
3155         }
3156 
3157         /*
3158          * Make sure we were able to grow the vdev.
3159          */
3160         if (new_ms_count <= old_ms_count)
3161                 fatal(0, "LUN expansion failed: ms_count %llu <= %llu\n",
3162                     old_ms_count, new_ms_count);
3163 
3164         /*
3165          * Make sure we were able to grow the pool.
3166          */
3167         if (new_class_space <= old_class_space)
3168                 fatal(0, "LUN expansion failed: class_space %llu <= %llu\n",
3169                     old_class_space, new_class_space);
3170 
3171         if (ztest_opts.zo_verbose >= 5) {
3172                 char oldnumbuf[NN_NUMBUF_SZ], newnumbuf[NN_NUMBUF_SZ];
3173 
3174                 nicenum(old_class_space, oldnumbuf, sizeof (oldnumbuf));
3175                 nicenum(new_class_space, newnumbuf, sizeof (newnumbuf));
3176                 (void) printf("%s grew from %s to %s\n",
3177                     spa->spa_name, oldnumbuf, newnumbuf);
3178         }
3179 
3180         spa_config_exit(spa, SCL_STATE, spa);
3181         VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
3182 }
3183 
3184 /*
3185  * Verify that dmu_objset_{create,destroy,open,close} work as expected.
3186  */
3187 /* ARGSUSED */
3188 static void
3189 ztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3190 {
3191         /*
3192          * Create the objects common to all ztest datasets.
3193          */
3194         VERIFY(zap_create_claim(os, ZTEST_DIROBJ,
3195             DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
3196 }
3197 
3198 static int
3199 ztest_dataset_create(char *dsname)
3200 {
3201         uint64_t zilset = ztest_random(100);


3275         (void) snprintf(snapname, sizeof (snapname), "%s@%llu", osname,
3276             (u_longlong_t)id);
3277 
3278         error = dsl_destroy_snapshot(snapname, B_FALSE);
3279         if (error != 0 && error != ENOENT)
3280                 fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error);
3281         return (B_TRUE);
3282 }
3283 
3284 /* ARGSUSED */
3285 void
3286 ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
3287 {
3288         ztest_ds_t zdtmp;
3289         int iters;
3290         int error;
3291         objset_t *os, *os2;
3292         char name[ZFS_MAX_DATASET_NAME_LEN];
3293         zilog_t *zilog;
3294 
3295         (void) rw_rdlock(&ztest_name_lock);
3296 
3297         (void) snprintf(name, sizeof (name), "%s/temp_%llu",
3298             ztest_opts.zo_pool, (u_longlong_t)id);
3299 
3300         /*
3301          * If this dataset exists from a previous run, process its replay log
3302          * half of the time.  If we don't replay it, then dmu_objset_destroy()
3303          * (invoked from ztest_objset_destroy_cb()) should just throw it away.
3304          */
3305         if (ztest_random(2) == 0 &&
3306             dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) {
3307                 ztest_zd_init(&zdtmp, NULL, os);
3308                 zil_replay(os, &zdtmp, ztest_replay_vector);
3309                 ztest_zd_fini(&zdtmp);
3310                 dmu_objset_disown(os, FTAG);
3311         }
3312 
3313         /*
3314          * There may be an old instance of the dataset we're about to
3315          * create lying around from a previous run.  If so, destroy it
3316          * and all of its snapshots.
3317          */
3318         (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL,
3319             DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
3320 
3321         /*
3322          * Verify that the destroyed dataset is no longer in the namespace.
3323          */
3324         VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE,
3325             FTAG, &os));
3326 
3327         /*
3328          * Verify that we can create a new dataset.
3329          */
3330         error = ztest_dataset_create(name);
3331         if (error) {
3332                 if (error == ENOSPC) {
3333                         ztest_record_enospc(FTAG);
3334                         (void) rw_unlock(&ztest_name_lock);
3335                         return;
3336                 }
3337                 fatal(0, "dmu_objset_create(%s) = %d", name, error);
3338         }
3339 
3340         VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
3341 
3342         ztest_zd_init(&zdtmp, NULL, os);
3343 
3344         /*
3345          * Open the intent log for it.
3346          */
3347         zilog = zil_open(os, ztest_get_data);
3348 
3349         /*
3350          * Put some objects in there, do a little I/O to them,
3351          * and randomly take a couple of snapshots along the way.
3352          */
3353         iters = ztest_random(5);
3354         for (int i = 0; i < iters; i++) {


3362          */
3363         VERIFY3U(EEXIST, ==,
3364             dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL));
3365 
3366         /*
3367          * Verify that we can hold an objset that is also owned.
3368          */
3369         VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os2));
3370         dmu_objset_rele(os2, FTAG);
3371 
3372         /*
3373          * Verify that we cannot own an objset that is already owned.
3374          */
3375         VERIFY3U(EBUSY, ==,
3376             dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2));
3377 
3378         zil_close(zilog);
3379         dmu_objset_disown(os, FTAG);
3380         ztest_zd_fini(&zdtmp);
3381 
3382         (void) rw_unlock(&ztest_name_lock);
3383 }
3384 
3385 /*
3386  * Verify that dmu_snapshot_{create,destroy,open,close} work as expected.
3387  */
3388 void
3389 ztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id)
3390 {
3391         (void) rw_rdlock(&ztest_name_lock);
3392         (void) ztest_snapshot_destroy(zd->zd_name, id);
3393         (void) ztest_snapshot_create(zd->zd_name, id);
3394         (void) rw_unlock(&ztest_name_lock);
3395 }
3396 
3397 /*
3398  * Cleanup non-standard snapshots and clones.
3399  */
3400 void
3401 ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
3402 {
3403         char snap1name[ZFS_MAX_DATASET_NAME_LEN];
3404         char clone1name[ZFS_MAX_DATASET_NAME_LEN];
3405         char snap2name[ZFS_MAX_DATASET_NAME_LEN];
3406         char clone2name[ZFS_MAX_DATASET_NAME_LEN];
3407         char snap3name[ZFS_MAX_DATASET_NAME_LEN];
3408         int error;
3409 
3410         (void) snprintf(snap1name, sizeof (snap1name),
3411             "%s@s1_%llu", osname, id);
3412         (void) snprintf(clone1name, sizeof (clone1name),
3413             "%s/c1_%llu", osname, id);
3414         (void) snprintf(snap2name, sizeof (snap2name),


3433         error = dsl_destroy_snapshot(snap1name, B_FALSE);
3434         if (error && error != ENOENT)
3435                 fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error);
3436 }
3437 
3438 /*
3439  * Verify dsl_dataset_promote handles EBUSY
3440  */
3441 void
3442 ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
3443 {
3444         objset_t *os;
3445         char snap1name[ZFS_MAX_DATASET_NAME_LEN];
3446         char clone1name[ZFS_MAX_DATASET_NAME_LEN];
3447         char snap2name[ZFS_MAX_DATASET_NAME_LEN];
3448         char clone2name[ZFS_MAX_DATASET_NAME_LEN];
3449         char snap3name[ZFS_MAX_DATASET_NAME_LEN];
3450         char *osname = zd->zd_name;
3451         int error;
3452 
3453         (void) rw_rdlock(&ztest_name_lock);
3454 
3455         ztest_dsl_dataset_cleanup(osname, id);
3456 
3457         (void) snprintf(snap1name, sizeof (snap1name),
3458             "%s@s1_%llu", osname, id);
3459         (void) snprintf(clone1name, sizeof (clone1name),
3460             "%s/c1_%llu", osname, id);
3461         (void) snprintf(snap2name, sizeof (snap2name),
3462             "%s@s2_%llu", clone1name, id);
3463         (void) snprintf(clone2name, sizeof (clone2name),
3464             "%s/c2_%llu", osname, id);
3465         (void) snprintf(snap3name, sizeof (snap3name),
3466             "%s@s3_%llu", clone1name, id);
3467 
3468         error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1);
3469         if (error && error != EEXIST) {
3470                 if (error == ENOSPC) {
3471                         ztest_record_enospc(FTAG);
3472                         goto out;
3473                 }


3510                 fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
3511         }
3512 
3513         error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os);
3514         if (error)
3515                 fatal(0, "dmu_objset_own(%s) = %d", snap2name, error);
3516         error = dsl_dataset_promote(clone2name, NULL);
3517         if (error == ENOSPC) {
3518                 dmu_objset_disown(os, FTAG);
3519                 ztest_record_enospc(FTAG);
3520                 goto out;
3521         }
3522         if (error != EBUSY)
3523                 fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
3524                     error);
3525         dmu_objset_disown(os, FTAG);
3526 
3527 out:
3528         ztest_dsl_dataset_cleanup(osname, id);
3529 
3530         (void) rw_unlock(&ztest_name_lock);
3531 }
3532 
3533 /*
3534  * Verify that dmu_object_{alloc,free} work as expected.
3535  */
3536 void
3537 ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id)
3538 {
3539         ztest_od_t od[4];
3540         int batchsize = sizeof (od) / sizeof (od[0]);
3541 
3542         for (int b = 0; b < batchsize; b++)
3543                 ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0);
3544 
3545         /*
3546          * Destroy the previous batch of objects, create a new batch,
3547          * and do some I/O on the new objects.
3548          */
3549         if (ztest_object_init(zd, od, sizeof (od), B_TRUE) != 0)
3550                 return;


4444 
4445         if (error == ECANCELED) {
4446                 ASSERT0(data->zcd_txg);
4447                 ASSERT(!data->zcd_added);
4448 
4449                 /*
4450                  * The private callback data should be destroyed here, but
4451                  * since we are going to check the zcd_called field after
4452                  * dmu_tx_abort(), we will destroy it there.
4453                  */
4454                 return;
4455         }
4456 
4457         /* Was this callback added to the global callback list? */
4458         if (!data->zcd_added)
4459                 goto out;
4460 
4461         ASSERT3U(data->zcd_txg, !=, 0);
4462 
4463         /* Remove our callback from the list */
4464         (void) mutex_lock(&zcl.zcl_callbacks_lock);
4465         list_remove(&zcl.zcl_callbacks, data);
4466         (void) mutex_unlock(&zcl.zcl_callbacks_lock);
4467 
4468 out:
4469         umem_free(data, sizeof (ztest_cb_data_t));
4470 }
4471 
4472 /* Allocate and initialize callback data structure */
4473 static ztest_cb_data_t *
4474 ztest_create_cb_data(objset_t *os, uint64_t txg)
4475 {
4476         ztest_cb_data_t *cb_data;
4477 
4478         cb_data = umem_zalloc(sizeof (ztest_cb_data_t), UMEM_NOFAIL);
4479 
4480         cb_data->zcd_txg = txg;
4481         cb_data->zcd_spa = dmu_objset_spa(os);
4482 
4483         return (cb_data);
4484 }
4485 
4486 /*


4548                 }
4549 
4550                 return;
4551         }
4552 
4553         cb_data[2] = ztest_create_cb_data(os, txg);
4554         dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[2]);
4555 
4556         /*
4557          * Read existing data to make sure there isn't a future leak.
4558          */
4559         VERIFY(0 == dmu_read(os, od[0].od_object, 0, sizeof (uint64_t),
4560             &old_txg, DMU_READ_PREFETCH));
4561 
4562         if (old_txg > txg)
4563                 fatal(0, "future leak: got %" PRIu64 ", open txg is %" PRIu64,
4564                     old_txg, txg);
4565 
4566         dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx);
4567 
4568         (void) mutex_lock(&zcl.zcl_callbacks_lock);
4569 
4570         /*
4571          * Since commit callbacks don't have any ordering requirement and since
4572          * it is theoretically possible for a commit callback to be called
4573          * after an arbitrary amount of time has elapsed since its txg has been
4574          * synced, it is difficult to reliably determine whether a commit
4575          * callback hasn't been called due to high load or due to a flawed
4576          * implementation.
4577          *
4578          * In practice, we will assume that if after a certain number of txgs a
4579          * commit callback hasn't been called, then most likely there's an
4580          * implementation bug..
4581          */
4582         tmp_cb = list_head(&zcl.zcl_callbacks);
4583         if (tmp_cb != NULL &&
4584             (txg - ZTEST_COMMIT_CALLBACK_THRESH) > tmp_cb->zcd_txg) {
4585                 fatal(0, "Commit callback threshold exceeded, oldest txg: %"
4586                     PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg);
4587         }
4588 


4595          * (from other objsets) may have sneaked in.
4596          */
4597         tmp_cb = list_tail(&zcl.zcl_callbacks);
4598         while (tmp_cb != NULL && tmp_cb->zcd_txg > txg)
4599                 tmp_cb = list_prev(&zcl.zcl_callbacks, tmp_cb);
4600 
4601         /* Add the 3 callbacks to the list */
4602         for (i = 0; i < 3; i++) {
4603                 if (tmp_cb == NULL)
4604                         list_insert_head(&zcl.zcl_callbacks, cb_data[i]);
4605                 else
4606                         list_insert_after(&zcl.zcl_callbacks, tmp_cb,
4607                             cb_data[i]);
4608 
4609                 cb_data[i]->zcd_added = B_TRUE;
4610                 VERIFY(!cb_data[i]->zcd_called);
4611 
4612                 tmp_cb = cb_data[i];
4613         }
4614 
4615         (void) mutex_unlock(&zcl.zcl_callbacks_lock);
4616 
4617         dmu_tx_commit(tx);
4618 }
4619 
4620 /* ARGSUSED */
4621 void
4622 ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id)
4623 {
4624         zfs_prop_t proplist[] = {
4625                 ZFS_PROP_CHECKSUM,
4626                 ZFS_PROP_COMPRESSION,
4627                 ZFS_PROP_COPIES,
4628                 ZFS_PROP_DEDUP
4629         };
4630 
4631         (void) rw_rdlock(&ztest_name_lock);
4632 
4633         for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++)
4634                 (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p],
4635                     ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2));
4636 
4637         (void) rw_unlock(&ztest_name_lock);
4638 }
4639 
4640 /* ARGSUSED */
4641 void
4642 ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
4643 {
4644         nvlist_t *props = NULL;
4645 
4646         (void) rw_rdlock(&ztest_name_lock);
4647 
4648         (void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO,
4649             ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN));
4650 
4651         VERIFY0(spa_prop_get(ztest_spa, &props));
4652 
4653         if (ztest_opts.zo_verbose >= 6)
4654                 dump_nvlist(props, 4);
4655 
4656         nvlist_free(props);
4657 
4658         (void) rw_unlock(&ztest_name_lock);
4659 }
4660 
4661 static int
4662 user_release_one(const char *snapname, const char *holdname)
4663 {
4664         nvlist_t *snaps, *holds;
4665         int error;
4666 
4667         snaps = fnvlist_alloc();
4668         holds = fnvlist_alloc();
4669         fnvlist_add_boolean(holds, holdname);
4670         fnvlist_add_nvlist(snaps, snapname, holds);
4671         fnvlist_free(holds);
4672         error = dsl_dataset_user_release(snaps, NULL);
4673         fnvlist_free(snaps);
4674         return (error);
4675 }
4676 
4677 /*
4678  * Test snapshot hold/release and deferred destroy.
4679  */
4680 void
4681 ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
4682 {
4683         int error;
4684         objset_t *os = zd->zd_os;
4685         objset_t *origin;
4686         char snapname[100];
4687         char fullname[100];
4688         char clonename[100];
4689         char tag[100];
4690         char osname[ZFS_MAX_DATASET_NAME_LEN];
4691         nvlist_t *holds;
4692 
4693         (void) rw_rdlock(&ztest_name_lock);
4694 
4695         dmu_objset_name(os, osname);
4696 
4697         (void) snprintf(snapname, sizeof (snapname), "sh1_%llu", id);
4698         (void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname);
4699         (void) snprintf(clonename, sizeof (clonename),
4700             "%s/ch1_%llu", osname, id);
4701         (void) snprintf(tag, sizeof (tag), "tag_%llu", id);
4702 
4703         /*
4704          * Clean up from any previous run.
4705          */
4706         error = dsl_destroy_head(clonename);
4707         if (error != ENOENT)
4708                 ASSERT0(error);
4709         error = user_release_one(fullname, tag);
4710         if (error != ESRCH && error != ENOENT)
4711                 ASSERT0(error);
4712         error = dsl_destroy_snapshot(fullname, B_FALSE);
4713         if (error != ENOENT)


4778 
4779         error = dsl_destroy_snapshot(fullname, B_FALSE);
4780         if (error != EBUSY) {
4781                 fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d",
4782                     fullname, error);
4783         }
4784 
4785         error = dsl_destroy_snapshot(fullname, B_TRUE);
4786         if (error) {
4787                 fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
4788                     fullname, error);
4789         }
4790 
4791         error = user_release_one(fullname, tag);
4792         if (error)
4793                 fatal(0, "user_release_one(%s, %s) = %d", fullname, tag, error);
4794 
4795         VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT);
4796 
4797 out:
4798         (void) rw_unlock(&ztest_name_lock);
4799 }
4800 
4801 /*
4802  * Inject random faults into the on-disk data.
4803  */
4804 /* ARGSUSED */
4805 void
4806 ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
4807 {
4808         ztest_shared_t *zs = ztest_shared;
4809         spa_t *spa = ztest_spa;
4810         int fd;
4811         uint64_t offset;
4812         uint64_t leaves;
4813         uint64_t bad = 0x1990c0ffeedecade;
4814         uint64_t top, leaf;
4815         char path0[MAXPATHLEN];
4816         char pathrand[MAXPATHLEN];
4817         size_t fsize;
4818         int bshift = SPA_MAXBLOCKSHIFT + 2;
4819         int iters = 1000;
4820         int maxfaults;
4821         int mirror_save;
4822         vdev_t *vd0 = NULL;
4823         uint64_t guid0 = 0;
4824         boolean_t islog = B_FALSE;
4825 
4826         VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
4827         maxfaults = MAXFAULTS();
4828         leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
4829         mirror_save = zs->zs_mirrors;
4830         VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
4831 
4832         ASSERT(leaves >= 1);
4833 
4834         /*
4835          * Grab the name lock as reader. There are some operations
4836          * which don't like to have their vdevs changed while
4837          * they are in progress (i.e. spa_change_guid). Those
4838          * operations will have grabbed the name lock as writer.
4839          */
4840         (void) rw_rdlock(&ztest_name_lock);
4841 
4842         /*
4843          * We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
4844          */
4845         spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
4846 
4847         if (ztest_random(2) == 0) {
4848                 /*
4849                  * Inject errors on a normal data device or slog device.
4850                  */
4851                 top = ztest_random_vdev_top(spa, B_TRUE);
4852                 leaf = ztest_random(leaves) + zs->zs_splits;
4853 
4854                 /*
4855                  * Generate paths to the first leaf in this top-level vdev,
4856                  * and to the random leaf we selected.  We'll induce transient
4857                  * write failures and random online/offline activity on leaf 0,
4858                  * and we'll write random garbage to the randomly chosen leaf.
4859                  */
4860                 (void) snprintf(path0, sizeof (path0), ztest_dev_template,


4889                         vdev_file_t *vf = vd0->vdev_tsd;
4890 
4891                         if (vf != NULL && ztest_random(3) == 0) {
4892                                 (void) close(vf->vf_vnode->v_fd);
4893                                 vf->vf_vnode->v_fd = -1;
4894                         } else if (ztest_random(2) == 0) {
4895                                 vd0->vdev_cant_read = B_TRUE;
4896                         } else {
4897                                 vd0->vdev_cant_write = B_TRUE;
4898                         }
4899                         guid0 = vd0->vdev_guid;
4900                 }
4901         } else {
4902                 /*
4903                  * Inject errors on an l2cache device.
4904                  */
4905                 spa_aux_vdev_t *sav = &spa->spa_l2cache;
4906 
4907                 if (sav->sav_count == 0) {
4908                         spa_config_exit(spa, SCL_STATE, FTAG);
4909                         (void) rw_unlock(&ztest_name_lock);
4910                         return;
4911                 }
4912                 vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)];
4913                 guid0 = vd0->vdev_guid;
4914                 (void) strcpy(path0, vd0->vdev_path);
4915                 (void) strcpy(pathrand, vd0->vdev_path);
4916 
4917                 leaf = 0;
4918                 leaves = 1;
4919                 maxfaults = INT_MAX;    /* no limit on cache devices */
4920         }
4921 
4922         spa_config_exit(spa, SCL_STATE, FTAG);
4923         (void) rw_unlock(&ztest_name_lock);
4924 
4925         /*
4926          * If we can tolerate two or more faults, or we're dealing
4927          * with a slog, randomly online/offline vd0.
4928          */
4929         if ((maxfaults >= 2 || islog) && guid0 != 0) {
4930                 if (ztest_random(10) < 6) {
4931                         int flags = (ztest_random(2) == 0 ?
4932                             ZFS_OFFLINE_TEMPORARY : 0);
4933 
4934                         /*
4935                          * We have to grab the zs_name_lock as writer to
4936                          * prevent a race between offlining a slog and
4937                          * destroying a dataset. Offlining the slog will
4938                          * grab a reference on the dataset which may cause
4939                          * dmu_objset_destroy() to fail with EBUSY thus
4940                          * leaving the dataset in an inconsistent state.
4941                          */
4942                         if (islog)
4943                                 (void) rw_wrlock(&ztest_name_lock);
4944 
4945                         VERIFY(vdev_offline(spa, guid0, flags) != EBUSY);
4946 
4947                         if (islog)
4948                                 (void) rw_unlock(&ztest_name_lock);
4949                 } else {
4950                         /*
4951                          * Ideally we would like to be able to randomly
4952                          * call vdev_[on|off]line without holding locks
4953                          * to force unpredictable failures but the side
4954                          * effects of vdev_[on|off]line prevent us from
4955                          * doing so. We grab the ztest_vdev_lock here to
4956                          * prevent a race between injection testing and
4957                          * aux_vdev removal.
4958                          */
4959                         VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
4960                         (void) vdev_online(spa, guid0, 0, NULL);
4961                         VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
4962                 }
4963         }
4964 
4965         if (maxfaults == 0)
4966                 return;
4967 
4968         /*
4969          * We have at least single-fault tolerance, so inject data corruption.
4970          */
4971         fd = open(pathrand, O_RDWR);
4972 
4973         if (fd == -1)   /* we hit a gap in the device namespace */
4974                 return;
4975 
4976         fsize = lseek(fd, 0, SEEK_END);
4977 
4978         while (--iters != 0) {
4979                 /*
4980                  * The offset must be chosen carefully to ensure that
4981                  * we do not inject a given logical block with errors


5013                  * because we also damage (parts of) the other side of
5014                  * the mirror/raidz.
5015                  *
5016                  * Additionally, we will always have both an even and an
5017                  * odd label, so that we can handle crashes in the
5018                  * middle of vdev_config_sync().
5019                  */
5020                 if ((leaf & 1) == 0 && offset < VDEV_LABEL_START_SIZE)
5021                         continue;
5022 
5023                 /*
5024                  * The two end labels are stored at the "end" of the disk, but
5025                  * the end of the disk (vdev_psize) is aligned to
5026                  * sizeof (vdev_label_t).
5027                  */
5028                 uint64_t psize = P2ALIGN(fsize, sizeof (vdev_label_t));
5029                 if ((leaf & 1) == 1 &&
5030                     offset + sizeof (bad) > psize - VDEV_LABEL_END_SIZE)
5031                         continue;
5032 
5033                 VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
5034                 if (mirror_save != zs->zs_mirrors) {
5035                         VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
5036                         (void) close(fd);
5037                         return;
5038                 }
5039 
5040                 if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad))
5041                         fatal(1, "can't inject bad word at 0x%llx in %s",
5042                             offset, pathrand);
5043 
5044                 VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
5045 
5046                 if (ztest_opts.zo_verbose >= 7)
5047                         (void) printf("injected bad word into %s,"
5048                             " offset 0x%llx\n", pathrand, (u_longlong_t)offset);
5049         }
5050 
5051         (void) close(fd);
5052 }
5053 
5054 /*
5055  * Verify that DDT repair works as expected.
5056  */
5057 void
5058 ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
5059 {
5060         ztest_shared_t *zs = ztest_shared;
5061         spa_t *spa = ztest_spa;
5062         objset_t *os = zd->zd_os;
5063         ztest_od_t od[1];
5064         uint64_t object, blocksize, txg, pattern, psize;
5065         enum zio_checksum checksum = spa_dedup_checksum(spa);
5066         dmu_buf_t *db;
5067         dmu_tx_t *tx;
5068         abd_t *abd;
5069         blkptr_t blk;
5070         int copies = 2 * ZIO_DEDUPDITTO_MIN;
5071 
5072         blocksize = ztest_random_blocksize();
5073         blocksize = MIN(blocksize, 2048);       /* because we write so many */
5074 
5075         ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
5076 
5077         if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
5078                 return;
5079 
5080         /*
5081          * Take the name lock as writer to prevent anyone else from changing
5082          * the pool and dataset properies we need to maintain during this test.
5083          */
5084         (void) rw_wrlock(&ztest_name_lock);
5085 
5086         if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum,
5087             B_FALSE) != 0 ||
5088             ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1,
5089             B_FALSE) != 0) {
5090                 (void) rw_unlock(&ztest_name_lock);
5091                 return;
5092         }
5093 
5094         dmu_objset_stats_t dds;
5095         dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
5096         dmu_objset_fast_stat(os, &dds);
5097         dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
5098 
5099         object = od[0].od_object;
5100         blocksize = od[0].od_blocksize;
5101         pattern = zs->zs_guid ^ dds.dds_guid;
5102 
5103         ASSERT(object != 0);
5104 
5105         tx = dmu_tx_create(os);
5106         dmu_tx_hold_write(tx, object, 0, copies * blocksize);
5107         txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
5108         if (txg == 0) {
5109                 (void) rw_unlock(&ztest_name_lock);
5110                 return;
5111         }
5112 
5113         /*
5114          * Write all the copies of our block.
5115          */
5116         for (int i = 0; i < copies; i++) {
5117                 uint64_t offset = i * blocksize;
5118                 int error = dmu_buf_hold(os, object, offset, FTAG, &db,
5119                     DMU_READ_NO_PREFETCH);
5120                 if (error != 0) {
5121                         fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u",
5122                             os, (long long)object, (long long) offset, error);
5123                 }
5124                 ASSERT(db->db_offset == offset);
5125                 ASSERT(db->db_size == blocksize);
5126                 ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) ||
5127                     ztest_pattern_match(db->db_data, db->db_size, 0ULL));
5128                 dmu_buf_will_fill(db, tx);
5129                 ztest_pattern_set(db->db_data, db->db_size, pattern);


5137          * Find out what block we got.
5138          */
5139         VERIFY0(dmu_buf_hold(os, object, 0, FTAG, &db,
5140             DMU_READ_NO_PREFETCH));
5141         blk = *((dmu_buf_impl_t *)db)->db_blkptr;
5142         dmu_buf_rele(db, FTAG);
5143 
5144         /*
5145          * Damage the block.  Dedup-ditto will save us when we read it later.
5146          */
5147         psize = BP_GET_PSIZE(&blk);
5148         abd = abd_alloc_linear(psize, B_TRUE);
5149         ztest_pattern_set(abd_to_buf(abd), psize, ~pattern);
5150 
5151         (void) zio_wait(zio_rewrite(NULL, spa, 0, &blk,
5152             abd, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE,
5153             ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL));
5154 
5155         abd_free(abd);
5156 
5157         (void) rw_unlock(&ztest_name_lock);
5158 }
5159 
5160 /*
5161  * Scrub the pool.
5162  */
5163 /* ARGSUSED */
5164 void
5165 ztest_scrub(ztest_ds_t *zd, uint64_t id)
5166 {
5167         spa_t *spa = ztest_spa;
5168 
5169         (void) spa_scan(spa, POOL_SCAN_SCRUB);
5170         (void) poll(NULL, 0, 100); /* wait a moment, then force a restart */
5171         (void) spa_scan(spa, POOL_SCAN_SCRUB);
5172 }
5173 
5174 /*
5175  * Change the guid for the pool.
5176  */
5177 /* ARGSUSED */
5178 void
5179 ztest_reguid(ztest_ds_t *zd, uint64_t id)
5180 {
5181         spa_t *spa = ztest_spa;
5182         uint64_t orig, load;
5183         int error;
5184 
5185         orig = spa_guid(spa);
5186         load = spa_load_guid(spa);
5187 
5188         (void) rw_wrlock(&ztest_name_lock);
5189         error = spa_change_guid(spa);
5190         (void) rw_unlock(&ztest_name_lock);
5191 
5192         if (error != 0)
5193                 return;
5194 
5195         if (ztest_opts.zo_verbose >= 4) {
5196                 (void) printf("Changed guid old %llu -> %llu\n",
5197                     (u_longlong_t)orig, (u_longlong_t)spa_guid(spa));
5198         }
5199 
5200         VERIFY3U(orig, !=, spa_guid(spa));
5201         VERIFY3U(load, ==, spa_load_guid(spa));
5202 }
5203 
5204 /*
5205  * Rename the pool to a different name and then rename it back.
5206  */
5207 /* ARGSUSED */
5208 void
5209 ztest_spa_rename(ztest_ds_t *zd, uint64_t id)
5210 {
5211         char *oldname, *newname;
5212         spa_t *spa;
5213 
5214         (void) rw_wrlock(&ztest_name_lock);
5215 
5216         oldname = ztest_opts.zo_pool;
5217         newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL);
5218         (void) strcpy(newname, oldname);
5219         (void) strcat(newname, "_tmp");
5220 
5221         /*
5222          * Do the rename
5223          */
5224         VERIFY3U(0, ==, spa_rename(oldname, newname));
5225 
5226         /*
5227          * Try to open it under the old name, which shouldn't exist
5228          */
5229         VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG));
5230 
5231         /*
5232          * Open it under the new name and make sure it's still the same spa_t.
5233          */
5234         VERIFY3U(0, ==, spa_open(newname, &spa, FTAG));
5235 
5236         ASSERT(spa == ztest_spa);
5237         spa_close(spa, FTAG);
5238 
5239         /*
5240          * Rename it back to the original
5241          */
5242         VERIFY3U(0, ==, spa_rename(newname, oldname));
5243 
5244         /*
5245          * Make sure it can still be opened
5246          */
5247         VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG));
5248 
5249         ASSERT(spa == ztest_spa);
5250         spa_close(spa, FTAG);
5251 
5252         umem_free(newname, strlen(newname) + 1);
5253 
5254         (void) rw_unlock(&ztest_name_lock);
5255 }
5256 
5257 /*
5258  * Verify pool integrity by running zdb.
5259  */
5260 static void
5261 ztest_run_zdb(char *pool)
5262 {
5263         int status;
5264         char zdb[MAXPATHLEN + MAXNAMELEN + 20];
5265         char zbuf[1024];
5266         char *bin;
5267         char *ztest;
5268         char *isa;
5269         int isalen;
5270         FILE *fp;
5271 
5272         (void) realpath(getexecname(), zdb);
5273 
5274         /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */


5589          * That's because zap_count() returns the open-context value,
5590          * while dmu_objset_space() returns the rootbp fill count.
5591          */
5592         VERIFY3U(0, ==, zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs));
5593         dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch);
5594         ASSERT3U(dirobjs + 1, ==, usedobjs);
5595 }
5596 
5597 static int
5598 ztest_dataset_open(int d)
5599 {
5600         ztest_ds_t *zd = &ztest_ds[d];
5601         uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq;
5602         objset_t *os;
5603         zilog_t *zilog;
5604         char name[ZFS_MAX_DATASET_NAME_LEN];
5605         int error;
5606 
5607         ztest_dataset_name(name, ztest_opts.zo_pool, d);
5608 
5609         (void) rw_rdlock(&ztest_name_lock);
5610 
5611         error = ztest_dataset_create(name);
5612         if (error == ENOSPC) {
5613                 (void) rw_unlock(&ztest_name_lock);
5614                 ztest_record_enospc(FTAG);
5615                 return (error);
5616         }
5617         ASSERT(error == 0 || error == EEXIST);
5618 
5619         VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os));
5620         (void) rw_unlock(&ztest_name_lock);
5621 
5622         ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os);
5623 
5624         zilog = zd->zd_zilog;
5625 
5626         if (zilog->zl_header->zh_claim_lr_seq != 0 &&
5627             zilog->zl_header->zh_claim_lr_seq < committed_seq)
5628                 fatal(0, "missing log records: claimed %llu < committed %llu",
5629                     zilog->zl_header->zh_claim_lr_seq, committed_seq);
5630 
5631         ztest_dataset_dirobj_verify(zd);
5632 
5633         zil_replay(os, zd, ztest_replay_vector);
5634 
5635         ztest_dataset_dirobj_verify(zd);
5636 
5637         if (ztest_opts.zo_verbose >= 6)
5638                 (void) printf("%s replay %llu blocks, %llu records, seq %llu\n",
5639                     zd->zd_name,
5640                     (u_longlong_t)zilog->zl_parse_blk_count,


5651         return (0);
5652 }
5653 
5654 static void
5655 ztest_dataset_close(int d)
5656 {
5657         ztest_ds_t *zd = &ztest_ds[d];
5658 
5659         zil_close(zd->zd_zilog);
5660         dmu_objset_disown(zd->zd_os, zd);
5661 
5662         ztest_zd_fini(zd);
5663 }
5664 
5665 /*
5666  * Kick off threads to run tests on all datasets in parallel.
5667  */
5668 static void
5669 ztest_run(ztest_shared_t *zs)
5670 {
5671         thread_t *tid;
5672         spa_t *spa;
5673         objset_t *os;
5674         thread_t resume_tid;
5675         int error;
5676 
5677         ztest_exiting = B_FALSE;
5678 
5679         /*
5680          * Initialize parent/child shared state.
5681          */
5682         VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0);
5683         VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0);
5684 
5685         zs->zs_thread_start = gethrtime();
5686         zs->zs_thread_stop =
5687             zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC;
5688         zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop);
5689         zs->zs_thread_kill = zs->zs_thread_stop;
5690         if (ztest_random(100) < ztest_opts.zo_killrate) {
5691                 zs->zs_thread_kill -=
5692                     ztest_random(ztest_opts.zo_passtime * NANOSEC);
5693         }
5694 
5695         (void) _mutex_init(&zcl.zcl_callbacks_lock, USYNC_THREAD, NULL);
5696 
5697         list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t),
5698             offsetof(ztest_cb_data_t, zcd_node));
5699 
5700         /*
5701          * Open our pool.
5702          */
5703         kernel_init(FREAD | FWRITE);
5704         VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
5705         spa->spa_debug = B_TRUE;
5706         metaslab_preload_limit = ztest_random(20) + 1;
5707         ztest_spa = spa;
5708 
5709         dmu_objset_stats_t dds;
5710         VERIFY0(dmu_objset_own(ztest_opts.zo_pool,
5711             DMU_OST_ANY, B_TRUE, FTAG, &os));
5712         dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
5713         dmu_objset_fast_stat(os, &dds);
5714         dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
5715         zs->zs_guid = dds.dds_guid;
5716         dmu_objset_disown(os, FTAG);
5717 
5718         spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
5719 
5720         /*
5721          * We don't expect the pool to suspend unless maxfaults == 0,
5722          * in which case ztest_fault_inject() temporarily takes away
5723          * the only valid replica.
5724          */
5725         if (MAXFAULTS() == 0)
5726                 spa->spa_failmode = ZIO_FAILURE_MODE_WAIT;
5727         else
5728                 spa->spa_failmode = ZIO_FAILURE_MODE_PANIC;
5729 
5730         /*
5731          * Create a thread to periodically resume suspended I/O.
5732          */
5733         VERIFY(thr_create(0, 0, ztest_resume_thread, spa, THR_BOUND,
5734             &resume_tid) == 0);
5735 
5736         /*
5737          * Create a deadman thread to abort() if we hang.
5738          */
5739         VERIFY(thr_create(0, 0, ztest_deadman_thread, zs, THR_BOUND,
5740             NULL) == 0);
5741 
5742         /*
5743          * Verify that we can safely inquire about about any object,
5744          * whether it's allocated or not.  To make it interesting,
5745          * we probe a 5-wide window around each power of two.
5746          * This hits all edge cases, including zero and the max.
5747          */
5748         for (int t = 0; t < 64; t++) {
5749                 for (int d = -5; d <= 5; d++) {
5750                         error = dmu_object_info(spa->spa_meta_objset,
5751                             (1ULL << t) + d, NULL);
5752                         ASSERT(error == 0 || error == ENOENT ||
5753                             error == EINVAL);
5754                 }
5755         }
5756 
5757         /*
5758          * If we got any ENOSPC errors on the previous run, destroy something.
5759          */
5760         if (zs->zs_enospc_count != 0) {
5761                 int d = ztest_random(ztest_opts.zo_datasets);
5762                 ztest_dataset_destroy(d);
5763         }
5764         zs->zs_enospc_count = 0;
5765 
5766         tid = umem_zalloc(ztest_opts.zo_threads * sizeof (thread_t),
5767             UMEM_NOFAIL);
5768 
5769         if (ztest_opts.zo_verbose >= 4)
5770                 (void) printf("starting main threads...\n");
5771 
5772         /*
5773          * Kick off all the tests that run in parallel.
5774          */
5775         for (int t = 0; t < ztest_opts.zo_threads; t++) {
5776                 if (t < ztest_opts.zo_datasets &&
5777                     ztest_dataset_open(t) != 0)
5778                         return;
5779                 VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t,
5780                     THR_BOUND, &tid[t]) == 0);
5781         }
5782 
5783         /*
5784          * Wait for all of the tests to complete.  We go in reverse order
5785          * so we don't close datasets while threads are still using them.
5786          */
5787         for (int t = ztest_opts.zo_threads - 1; t >= 0; t--) {
5788                 VERIFY(thr_join(tid[t], NULL, NULL) == 0);
5789                 if (t < ztest_opts.zo_datasets)
5790                         ztest_dataset_close(t);
5791         }
5792 
5793         txg_wait_synced(spa_get_dsl(spa), 0);
5794 
5795         zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
5796         zs->zs_space = metaslab_class_get_space(spa_normal_class(spa));
5797         zfs_dbgmsg_print(FTAG);
5798 
5799         umem_free(tid, ztest_opts.zo_threads * sizeof (thread_t));
5800 
5801         /* Kill the resume thread */
5802         ztest_exiting = B_TRUE;
5803         VERIFY(thr_join(resume_tid, NULL, NULL) == 0);
5804         ztest_resume(spa);
5805 
5806         /*
5807          * Right before closing the pool, kick off a bunch of async I/O;
5808          * spa_close() should wait for it to complete.
5809          */
5810         for (uint64_t object = 1; object < 50; object++) {
5811                 dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20,
5812                     ZIO_PRIORITY_SYNC_READ);
5813         }
5814 
5815         spa_close(spa, FTAG);
5816 
5817         /*
5818          * Verify that we can loop over all pools.
5819          */
5820         mutex_enter(&spa_namespace_lock);
5821         for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa))
5822                 if (ztest_opts.zo_verbose > 3)
5823                         (void) printf("spa_next: found %s\n", spa_name(spa));
5824         mutex_exit(&spa_namespace_lock);
5825 
5826         /*
5827          * Verify that we can export the pool and reimport it under a
5828          * different name.
5829          */
5830         if (ztest_random(2) == 0) {
5831                 char name[ZFS_MAX_DATASET_NAME_LEN];
5832                 (void) snprintf(name, sizeof (name), "%s_import",
5833                     ztest_opts.zo_pool);
5834                 ztest_spa_import_export(ztest_opts.zo_pool, name);
5835                 ztest_spa_import_export(name, ztest_opts.zo_pool);
5836         }
5837 
5838         kernel_fini();
5839 
5840         list_destroy(&zcl.zcl_callbacks);
5841 
5842         (void) _mutex_destroy(&zcl.zcl_callbacks_lock);
5843 
5844         (void) rwlock_destroy(&ztest_name_lock);
5845         (void) _mutex_destroy(&ztest_vdev_lock);
5846 }
5847 
5848 static void
5849 ztest_freeze(void)
5850 {
5851         ztest_ds_t *zd = &ztest_ds[0];
5852         spa_t *spa;
5853         int numloops = 0;
5854 
5855         if (ztest_opts.zo_verbose >= 3)
5856                 (void) printf("testing spa_freeze()...\n");
5857 
5858         kernel_init(FREAD | FWRITE);
5859         VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
5860         VERIFY3U(0, ==, ztest_dataset_open(0));
5861         spa->spa_debug = B_TRUE;
5862         ztest_spa = spa;
5863 
5864         /*
5865          * Force the first log block to be transactionally allocated.


5969         nvlist_t *props;
5970 
5971         VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
5972         if (ztest_random(2) == 0)
5973                 return (props);
5974         VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0);
5975 
5976         return (props);
5977 }
5978 
5979 /*
5980  * Create a storage pool with the given name and initial vdev size.
5981  * Then test spa_freeze() functionality.
5982  */
5983 static void
5984 ztest_init(ztest_shared_t *zs)
5985 {
5986         spa_t *spa;
5987         nvlist_t *nvroot, *props;
5988 
5989         VERIFY(_mutex_init(&ztest_vdev_lock, USYNC_THREAD, NULL) == 0);
5990         VERIFY(rwlock_init(&ztest_name_lock, USYNC_THREAD, NULL) == 0);
5991 
5992         kernel_init(FREAD | FWRITE);
5993 
5994         /*
5995          * Create the storage pool.
5996          */
5997         (void) spa_destroy(ztest_opts.zo_pool);
5998         ztest_shared->zs_vdev_next_leaf = 0;
5999         zs->zs_splits = 0;
6000         zs->zs_mirrors = ztest_opts.zo_mirrors;
6001         nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
6002             0, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
6003         props = make_random_props();
6004         for (int i = 0; i < SPA_FEATURES; i++) {
6005                 char buf[1024];
6006                 (void) snprintf(buf, sizeof (buf), "feature@%s",
6007                     spa_feature_table[i].fi_uname);
6008                 VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0));
6009         }
6010         VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL));
6011         nvlist_free(nvroot);
6012         nvlist_free(props);
6013 
6014         VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
6015         zs->zs_metaslab_sz =
6016             1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
6017 
6018         spa_close(spa, FTAG);
6019 
6020         kernel_fini();
6021 
6022         ztest_run_zdb(ztest_opts.zo_pool);
6023 
6024         ztest_freeze();
6025 
6026         ztest_run_zdb(ztest_opts.zo_pool);
6027 
6028         (void) rwlock_destroy(&ztest_name_lock);
6029         (void) _mutex_destroy(&ztest_vdev_lock);
6030 }
6031 
6032 static void
6033 setup_data_fd(void)
6034 {
6035         static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX";
6036 
6037         ztest_fd_data = mkstemp(ztest_name_data);
6038         ASSERT3S(ztest_fd_data, >=, 0);
6039         (void) unlink(ztest_name_data);
6040 }
6041 
6042 
6043 static int
6044 shared_data_size(ztest_shared_hdr_t *hdr)
6045 {
6046         int size;
6047 
6048         size = hdr->zh_hdr_size;
6049         size += hdr->zh_opts_size;




   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  24  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  25  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  26  * Copyright (c) 2014 Integros [integros.com]
  27  * Copyright 2017 Joyent, Inc.
  28  * Copyright 2017 RackTop Systems.
  29  */
  30 
  31 /*
  32  * The objective of this program is to provide a DMU/ZAP/SPA stress test
  33  * that runs entirely in userland, is easy to use, and easy to extend.
  34  *
  35  * The overall design of the ztest program is as follows:
  36  *
  37  * (1) For each major functional area (e.g. adding vdevs to a pool,
  38  *     creating and destroying datasets, reading and writing objects, etc)
  39  *     we have a simple routine to test that functionality.  These
  40  *     individual routines do not have to do anything "stressful".
  41  *
  42  * (2) We turn these simple functionality tests into a stress test by
  43  *     running them all in parallel, with as many threads as desired,
  44  *     and spread across as many datasets, objects, and vdevs as desired.
  45  *
  46  * (3) While all this is happening, we inject faults into the pool to
  47  *     verify that self-healing data really works.
  48  *


 229 } ztest_block_tag_t;
 230 
 231 typedef struct bufwad {
 232         uint64_t        bw_index;
 233         uint64_t        bw_txg;
 234         uint64_t        bw_data;
 235 } bufwad_t;
 236 
 237 /*
 238  * XXX -- fix zfs range locks to be generic so we can use them here.
 239  */
 240 typedef enum {
 241         RL_READER,
 242         RL_WRITER,
 243         RL_APPEND
 244 } rl_type_t;
 245 
 246 typedef struct rll {
 247         void            *rll_writer;
 248         int             rll_readers;
 249         kmutex_t        rll_lock;
 250         kcondvar_t      rll_cv;
 251 } rll_t;
 252 
 253 typedef struct rl {
 254         uint64_t        rl_object;
 255         uint64_t        rl_offset;
 256         uint64_t        rl_size;
 257         rll_t           *rl_lock;
 258 } rl_t;
 259 
 260 #define ZTEST_RANGE_LOCKS       64
 261 #define ZTEST_OBJECT_LOCKS      64
 262 
 263 /*
 264  * Object descriptor.  Used as a template for object lookup/create/remove.
 265  */
 266 typedef struct ztest_od {
 267         uint64_t        od_dir;
 268         uint64_t        od_object;
 269         dmu_object_type_t od_type;
 270         dmu_object_type_t od_crtype;
 271         uint64_t        od_blocksize;
 272         uint64_t        od_crblocksize;
 273         uint64_t        od_gen;
 274         uint64_t        od_crgen;
 275         char            od_name[ZFS_MAX_DATASET_NAME_LEN];
 276 } ztest_od_t;
 277 
 278 /*
 279  * Per-dataset state.
 280  */
 281 typedef struct ztest_ds {
 282         ztest_shared_ds_t *zd_shared;
 283         objset_t        *zd_os;
 284         krwlock_t       zd_zilog_lock;
 285         zilog_t         *zd_zilog;
 286         ztest_od_t      *zd_od;         /* debugging aid */
 287         char            zd_name[ZFS_MAX_DATASET_NAME_LEN];
 288         kmutex_t        zd_dirobj_lock;
 289         rll_t           zd_object_lock[ZTEST_OBJECT_LOCKS];
 290         rll_t           zd_range_lock[ZTEST_RANGE_LOCKS];
 291 } ztest_ds_t;
 292 
 293 /*
 294  * Per-iteration state.
 295  */
 296 typedef void ztest_func_t(ztest_ds_t *zd, uint64_t id);
 297 
 298 typedef struct ztest_info {
 299         ztest_func_t    *zi_func;       /* test function */
 300         uint64_t        zi_iters;       /* iterations per execution */
 301         uint64_t        *zi_interval;   /* execute every <interval> seconds */
 302 } ztest_info_t;
 303 
 304 typedef struct ztest_shared_callstate {
 305         uint64_t        zc_count;       /* per-pass count */
 306         uint64_t        zc_time;        /* per-pass time */
 307         uint64_t        zc_next;        /* next time to call this function */
 308 } ztest_shared_callstate_t;


 375         { ztest_reguid,                         1,      &zopt_rarely        },
 376         { ztest_spa_rename,                     1,      &zopt_rarely        },
 377         { ztest_scrub,                          1,      &zopt_rarely        },
 378         { ztest_spa_upgrade,                    1,      &zopt_rarely        },
 379         { ztest_dsl_dataset_promote_busy,       1,      &zopt_rarely        },
 380         { ztest_vdev_attach_detach,             1,      &zopt_sometimes     },
 381         { ztest_vdev_LUN_growth,                1,      &zopt_rarely        },
 382         { ztest_vdev_add_remove,                1,
 383             &ztest_opts.zo_vdevtime                         },
 384         { ztest_vdev_aux_add_remove,            1,
 385             &ztest_opts.zo_vdevtime                         },
 386 };
 387 
 388 #define ZTEST_FUNCS     (sizeof (ztest_info) / sizeof (ztest_info_t))
 389 
 390 /*
 391  * The following struct is used to hold a list of uncalled commit callbacks.
 392  * The callbacks are ordered by txg number.
 393  */
 394 typedef struct ztest_cb_list {
 395         kmutex_t zcl_callbacks_lock;
 396         list_t  zcl_callbacks;
 397 } ztest_cb_list_t;
 398 
 399 /*
 400  * Stuff we need to share writably between parent and child.
 401  */
 402 typedef struct ztest_shared {
 403         boolean_t       zs_do_init;
 404         hrtime_t        zs_proc_start;
 405         hrtime_t        zs_proc_stop;
 406         hrtime_t        zs_thread_start;
 407         hrtime_t        zs_thread_stop;
 408         hrtime_t        zs_thread_kill;
 409         uint64_t        zs_enospc_count;
 410         uint64_t        zs_vdev_next_leaf;
 411         uint64_t        zs_vdev_aux;
 412         uint64_t        zs_alloc;
 413         uint64_t        zs_space;
 414         uint64_t        zs_splits;
 415         uint64_t        zs_mirrors;
 416         uint64_t        zs_metaslab_sz;
 417         uint64_t        zs_metaslab_df_alloc_threshold;
 418         uint64_t        zs_guid;
 419 } ztest_shared_t;
 420 
 421 #define ID_PARALLEL     -1ULL
 422 
 423 static char ztest_dev_template[] = "%s/%s.%llua";
 424 static char ztest_aux_template[] = "%s/%s.%s.%llu";
 425 ztest_shared_t *ztest_shared;
 426 
 427 static spa_t *ztest_spa = NULL;
 428 static ztest_ds_t *ztest_ds;
 429 
 430 static kmutex_t ztest_vdev_lock;
 431 
 432 /*
 433  * The ztest_name_lock protects the pool and dataset namespace used by
 434  * the individual tests. To modify the namespace, consumers must grab
 435  * this lock as writer. Grabbing the lock as reader will ensure that the
 436  * namespace does not change while the lock is held.
 437  */
 438 static krwlock_t ztest_name_lock;
 439 
 440 static boolean_t ztest_dump_core = B_TRUE;
 441 static boolean_t ztest_exiting;
 442 
 443 /* Global commit callback list */
 444 static ztest_cb_list_t zcl;
 445 
 446 enum ztest_object {
 447         ZTEST_META_DNODE = 0,
 448         ZTEST_DIROBJ,
 449         ZTEST_OBJECTS
 450 };
 451 
 452 static void usage(boolean_t) __NORETURN;
 453 
 454 /*
 455  * These libumem hooks provide a reasonable set of defaults for the allocator's
 456  * debugging facilities.
 457  */
 458 const char *


1074         VERIFY(nvlist_add_uint64(props, zpool_prop_to_name(prop), value) == 0);
1075 
1076         error = spa_prop_set(spa, props);
1077 
1078         nvlist_free(props);
1079 
1080         if (error == ENOSPC) {
1081                 ztest_record_enospc(FTAG);
1082                 return (error);
1083         }
1084         ASSERT0(error);
1085 
1086         return (error);
1087 }
1088 
1089 static void
1090 ztest_rll_init(rll_t *rll)
1091 {
1092         rll->rll_writer = NULL;
1093         rll->rll_readers = 0;
1094         mutex_init(&rll->rll_lock, NULL, USYNC_THREAD, NULL);
1095         cv_init(&rll->rll_cv, NULL, USYNC_THREAD, NULL);
1096 }
1097 
1098 static void
1099 ztest_rll_destroy(rll_t *rll)
1100 {
1101         ASSERT(rll->rll_writer == NULL);
1102         ASSERT(rll->rll_readers == 0);
1103         mutex_destroy(&rll->rll_lock);
1104         cv_destroy(&rll->rll_cv);
1105 }
1106 
1107 static void
1108 ztest_rll_lock(rll_t *rll, rl_type_t type)
1109 {
1110         mutex_enter(&rll->rll_lock);
1111 
1112         if (type == RL_READER) {
1113                 while (rll->rll_writer != NULL)
1114                         cv_wait(&rll->rll_cv, &rll->rll_lock);
1115                 rll->rll_readers++;
1116         } else {
1117                 while (rll->rll_writer != NULL || rll->rll_readers)
1118                         cv_wait(&rll->rll_cv, &rll->rll_lock);
1119                 rll->rll_writer = curthread;
1120         }
1121 
1122         mutex_exit(&rll->rll_lock);
1123 }
1124 
1125 static void
1126 ztest_rll_unlock(rll_t *rll)
1127 {
1128         mutex_enter(&rll->rll_lock);
1129 
1130         if (rll->rll_writer) {
1131                 ASSERT(rll->rll_readers == 0);
1132                 rll->rll_writer = NULL;
1133         } else {
1134                 ASSERT(rll->rll_readers != 0);
1135                 ASSERT(rll->rll_writer == NULL);
1136                 rll->rll_readers--;
1137         }
1138 
1139         if (rll->rll_writer == NULL && rll->rll_readers == 0)
1140                 cv_broadcast(&rll->rll_cv);
1141 
1142         mutex_exit(&rll->rll_lock);
1143 }
1144 
1145 static void
1146 ztest_object_lock(ztest_ds_t *zd, uint64_t object, rl_type_t type)
1147 {
1148         rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
1149 
1150         ztest_rll_lock(rll, type);
1151 }
1152 
1153 static void
1154 ztest_object_unlock(ztest_ds_t *zd, uint64_t object)
1155 {
1156         rll_t *rll = &zd->zd_object_lock[object & (ZTEST_OBJECT_LOCKS - 1)];
1157 
1158         ztest_rll_unlock(rll);
1159 }
1160 
1161 static rl_t *
1162 ztest_range_lock(ztest_ds_t *zd, uint64_t object, uint64_t offset,


1181 ztest_range_unlock(rl_t *rl)
1182 {
1183         rll_t *rll = rl->rl_lock;
1184 
1185         ztest_rll_unlock(rll);
1186 
1187         umem_free(rl, sizeof (*rl));
1188 }
1189 
1190 static void
1191 ztest_zd_init(ztest_ds_t *zd, ztest_shared_ds_t *szd, objset_t *os)
1192 {
1193         zd->zd_os = os;
1194         zd->zd_zilog = dmu_objset_zil(os);
1195         zd->zd_shared = szd;
1196         dmu_objset_name(os, zd->zd_name);
1197 
1198         if (zd->zd_shared != NULL)
1199                 zd->zd_shared->zd_seq = 0;
1200 
1201         rw_init(&zd->zd_zilog_lock, NULL, USYNC_THREAD, NULL);
1202         mutex_init(&zd->zd_dirobj_lock, NULL, USYNC_THREAD, NULL);
1203 
1204         for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
1205                 ztest_rll_init(&zd->zd_object_lock[l]);
1206 
1207         for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
1208                 ztest_rll_init(&zd->zd_range_lock[l]);
1209 }
1210 
1211 static void
1212 ztest_zd_fini(ztest_ds_t *zd)
1213 {
1214         mutex_destroy(&zd->zd_dirobj_lock);
1215 
1216         for (int l = 0; l < ZTEST_OBJECT_LOCKS; l++)
1217                 ztest_rll_destroy(&zd->zd_object_lock[l]);
1218 
1219         for (int l = 0; l < ZTEST_RANGE_LOCKS; l++)
1220                 ztest_rll_destroy(&zd->zd_range_lock[l]);
1221 }
1222 
1223 #define TXG_MIGHTWAIT   (ztest_random(10) == 0 ? TXG_NOWAIT : TXG_WAIT)
1224 
1225 static uint64_t
1226 ztest_tx_assign(dmu_tx_t *tx, uint64_t txg_how, const char *tag)
1227 {
1228         uint64_t txg;
1229         int error;
1230 
1231         /*
1232          * Attempt to assign tx to some transaction group.
1233          */
1234         error = dmu_tx_assign(tx, txg_how);


1949         return (lr);
1950 }
1951 
1952 void
1953 ztest_lr_free(void *lr, size_t lrsize, char *name)
1954 {
1955         size_t namesize = name ? strlen(name) + 1 : 0;
1956 
1957         umem_free(lr, lrsize + namesize);
1958 }
1959 
1960 /*
1961  * Lookup a bunch of objects.  Returns the number of objects not found.
1962  */
1963 static int
1964 ztest_lookup(ztest_ds_t *zd, ztest_od_t *od, int count)
1965 {
1966         int missing = 0;
1967         int error;
1968 
1969         ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock));
1970 
1971         for (int i = 0; i < count; i++, od++) {
1972                 od->od_object = 0;
1973                 error = zap_lookup(zd->zd_os, od->od_dir, od->od_name,
1974                     sizeof (uint64_t), 1, &od->od_object);
1975                 if (error) {
1976                         ASSERT(error == ENOENT);
1977                         ASSERT(od->od_object == 0);
1978                         missing++;
1979                 } else {
1980                         dmu_buf_t *db;
1981                         ztest_block_tag_t *bbt;
1982                         dmu_object_info_t doi;
1983 
1984                         ASSERT(od->od_object != 0);
1985                         ASSERT(missing == 0);   /* there should be no gaps */
1986 
1987                         ztest_object_lock(zd, od->od_object, RL_READER);
1988                         VERIFY3U(0, ==, dmu_bonus_hold(zd->zd_os,
1989                             od->od_object, FTAG, &db));
1990                         dmu_object_info_from_db(db, &doi);
1991                         bbt = ztest_bt_bonus(db);
1992                         ASSERT3U(bbt->bt_magic, ==, BT_MAGIC);
1993                         od->od_type = doi.doi_type;
1994                         od->od_blocksize = doi.doi_data_block_size;
1995                         od->od_gen = bbt->bt_gen;
1996                         dmu_buf_rele(db, FTAG);
1997                         ztest_object_unlock(zd, od->od_object);
1998                 }
1999         }
2000 
2001         return (missing);
2002 }
2003 
2004 static int
2005 ztest_create(ztest_ds_t *zd, ztest_od_t *od, int count)
2006 {
2007         int missing = 0;
2008 
2009         ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock));
2010 
2011         for (int i = 0; i < count; i++, od++) {
2012                 if (missing) {
2013                         od->od_object = 0;
2014                         missing++;
2015                         continue;
2016                 }
2017 
2018                 lr_create_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
2019 
2020                 lr->lr_doid = od->od_dir;
2021                 lr->lr_foid = 0;     /* 0 to allocate, > 0 to claim */
2022                 lr->lrz_type = od->od_crtype;
2023                 lr->lrz_blocksize = od->od_crblocksize;
2024                 lr->lrz_ibshift = ztest_random_ibshift();
2025                 lr->lrz_bonustype = DMU_OT_UINT64_OTHER;
2026                 lr->lrz_bonuslen = dmu_bonus_max();
2027                 lr->lr_gen = od->od_crgen;
2028                 lr->lr_crtime[0] = time(NULL);
2029 


2034                 } else {
2035                         od->od_object = lr->lr_foid;
2036                         od->od_type = od->od_crtype;
2037                         od->od_blocksize = od->od_crblocksize;
2038                         od->od_gen = od->od_crgen;
2039                         ASSERT(od->od_object != 0);
2040                 }
2041 
2042                 ztest_lr_free(lr, sizeof (*lr), od->od_name);
2043         }
2044 
2045         return (missing);
2046 }
2047 
2048 static int
2049 ztest_remove(ztest_ds_t *zd, ztest_od_t *od, int count)
2050 {
2051         int missing = 0;
2052         int error;
2053 
2054         ASSERT(MUTEX_HELD(&zd->zd_dirobj_lock));
2055 
2056         od += count - 1;
2057 
2058         for (int i = count - 1; i >= 0; i--, od--) {
2059                 if (missing) {
2060                         missing++;
2061                         continue;
2062                 }
2063 
2064                 /*
2065                  * No object was found.
2066                  */
2067                 if (od->od_object == 0)
2068                         continue;
2069 
2070                 lr_remove_t *lr = ztest_lr_alloc(sizeof (*lr), od->od_name);
2071 
2072                 lr->lr_doid = od->od_dir;
2073 
2074                 if ((error = ztest_replay_remove(zd, lr, B_FALSE)) != 0) {


2180 ztest_io(ztest_ds_t *zd, uint64_t object, uint64_t offset)
2181 {
2182         int err;
2183         ztest_block_tag_t wbt;
2184         dmu_object_info_t doi;
2185         enum ztest_io_type io_type;
2186         uint64_t blocksize;
2187         void *data;
2188 
2189         VERIFY(dmu_object_info(zd->zd_os, object, &doi) == 0);
2190         blocksize = doi.doi_data_block_size;
2191         data = umem_alloc(blocksize, UMEM_NOFAIL);
2192 
2193         /*
2194          * Pick an i/o type at random, biased toward writing block tags.
2195          */
2196         io_type = ztest_random(ZTEST_IO_TYPES);
2197         if (ztest_random(2) == 0)
2198                 io_type = ZTEST_IO_WRITE_TAG;
2199 
2200         rw_enter(&zd->zd_zilog_lock, RW_READER);
2201 
2202         switch (io_type) {
2203 
2204         case ZTEST_IO_WRITE_TAG:
2205                 ztest_bt_generate(&wbt, zd->zd_os, object, offset, 0, 0, 0);
2206                 (void) ztest_write(zd, object, offset, sizeof (wbt), &wbt);
2207                 break;
2208 
2209         case ZTEST_IO_WRITE_PATTERN:
2210                 (void) memset(data, 'a' + (object + offset) % 5, blocksize);
2211                 if (ztest_random(2) == 0) {
2212                         /*
2213                          * Induce fletcher2 collisions to ensure that
2214                          * zio_ddt_collision() detects and resolves them
2215                          * when using fletcher2-verify for deduplication.
2216                          */
2217                         ((uint64_t *)data)[0] ^= 1ULL << 63;
2218                         ((uint64_t *)data)[4] ^= 1ULL << 63;
2219                 }
2220                 (void) ztest_write(zd, object, offset, blocksize, data);
2221                 break;
2222 
2223         case ZTEST_IO_WRITE_ZEROES:
2224                 bzero(data, blocksize);
2225                 (void) ztest_write(zd, object, offset, blocksize, data);
2226                 break;
2227 
2228         case ZTEST_IO_TRUNCATE:
2229                 (void) ztest_truncate(zd, object, offset, blocksize);
2230                 break;
2231 
2232         case ZTEST_IO_SETATTR:
2233                 (void) ztest_setattr(zd, object);
2234                 break;
2235 
2236         case ZTEST_IO_REWRITE:
2237                 rw_enter(&ztest_name_lock, RW_READER);
2238                 err = ztest_dsl_prop_set_uint64(zd->zd_name,
2239                     ZFS_PROP_CHECKSUM, spa_dedup_checksum(ztest_spa),
2240                     B_FALSE);
2241                 VERIFY(err == 0 || err == ENOSPC);
2242                 err = ztest_dsl_prop_set_uint64(zd->zd_name,
2243                     ZFS_PROP_COMPRESSION,
2244                     ztest_random_dsl_prop(ZFS_PROP_COMPRESSION),
2245                     B_FALSE);
2246                 VERIFY(err == 0 || err == ENOSPC);
2247                 rw_exit(&ztest_name_lock);
2248 
2249                 VERIFY0(dmu_read(zd->zd_os, object, offset, blocksize, data,
2250                     DMU_READ_NO_PREFETCH));
2251 
2252                 (void) ztest_write(zd, object, offset, blocksize, data);
2253                 break;
2254         }
2255 
2256         rw_exit(&zd->zd_zilog_lock);
2257 
2258         umem_free(data, blocksize);
2259 }
2260 
2261 /*
2262  * Initialize an object description template.
2263  */
2264 static void
2265 ztest_od_init(ztest_od_t *od, uint64_t id, char *tag, uint64_t index,
2266     dmu_object_type_t type, uint64_t blocksize, uint64_t gen)
2267 {
2268         od->od_dir = ZTEST_DIROBJ;
2269         od->od_object = 0;
2270 
2271         od->od_crtype = type;
2272         od->od_crblocksize = blocksize ? blocksize : ztest_random_blocksize();
2273         od->od_crgen = gen;
2274 
2275         od->od_type = DMU_OT_NONE;
2276         od->od_blocksize = 0;
2277         od->od_gen = 0;
2278 
2279         (void) snprintf(od->od_name, sizeof (od->od_name), "%s(%lld)[%llu]",
2280             tag, (int64_t)id, index);
2281 }
2282 
2283 /*
2284  * Lookup or create the objects for a test using the od template.
2285  * If the objects do not all exist, or if 'remove' is specified,
2286  * remove any existing objects and create new ones.  Otherwise,
2287  * use the existing objects.
2288  */
2289 static int
2290 ztest_object_init(ztest_ds_t *zd, ztest_od_t *od, size_t size, boolean_t remove)
2291 {
2292         int count = size / sizeof (*od);
2293         int rv = 0;
2294 
2295         mutex_enter(&zd->zd_dirobj_lock);
2296         if ((ztest_lookup(zd, od, count) != 0 || remove) &&
2297             (ztest_remove(zd, od, count) != 0 ||
2298             ztest_create(zd, od, count) != 0))
2299                 rv = -1;
2300         zd->zd_od = od;
2301         mutex_exit(&zd->zd_dirobj_lock);
2302 
2303         return (rv);
2304 }
2305 
2306 /* ARGSUSED */
2307 void
2308 ztest_zil_commit(ztest_ds_t *zd, uint64_t id)
2309 {
2310         zilog_t *zilog = zd->zd_zilog;
2311 
2312         rw_enter(&zd->zd_zilog_lock, RW_READER);
2313 
2314         zil_commit(zilog, ztest_random(ZTEST_OBJECTS));
2315 
2316         /*
2317          * Remember the committed values in zd, which is in parent/child
2318          * shared memory.  If we die, the next iteration of ztest_run()
2319          * will verify that the log really does contain this record.
2320          */
2321         mutex_enter(&zilog->zl_lock);
2322         ASSERT(zd->zd_shared != NULL);
2323         ASSERT3U(zd->zd_shared->zd_seq, <=, zilog->zl_commit_lr_seq);
2324         zd->zd_shared->zd_seq = zilog->zl_commit_lr_seq;
2325         mutex_exit(&zilog->zl_lock);
2326 
2327         rw_exit(&zd->zd_zilog_lock);
2328 }
2329 
2330 /*
2331  * This function is designed to simulate the operations that occur during a
2332  * mount/unmount operation.  We hold the dataset across these operations in an
2333  * attempt to expose any implicit assumptions about ZIL management.
2334  */
2335 /* ARGSUSED */
2336 void
2337 ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
2338 {
2339         objset_t *os = zd->zd_os;
2340 
2341         /*
2342          * We grab the zd_dirobj_lock to ensure that no other thread is
2343          * updating the zil (i.e. adding in-memory log records) and the
2344          * zd_zilog_lock to block any I/O.
2345          */
2346         mutex_enter(&zd->zd_dirobj_lock);
2347         rw_enter(&zd->zd_zilog_lock, RW_WRITER);
2348 
2349         /* zfsvfs_teardown() */
2350         zil_close(zd->zd_zilog);
2351 
2352         /* zfsvfs_setup() */
2353         VERIFY(zil_open(os, ztest_get_data) == zd->zd_zilog);
2354         zil_replay(os, zd, ztest_replay_vector);
2355 
2356         rw_exit(&zd->zd_zilog_lock);
2357         mutex_exit(&zd->zd_dirobj_lock);
2358 }
2359 
2360 /*
2361  * Verify that we can't destroy an active pool, create an existing pool,
2362  * or create a pool with a bad vdev spec.
2363  */
2364 /* ARGSUSED */
2365 void
2366 ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
2367 {
2368         ztest_shared_opts_t *zo = &ztest_opts;
2369         spa_t *spa;
2370         nvlist_t *nvroot;
2371 
2372         /*
2373          * Attempt to create using a bad file.
2374          */
2375         nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
2376         VERIFY3U(ENOENT, ==,
2377             spa_create("ztest_bad_file", nvroot, NULL, NULL));
2378         nvlist_free(nvroot);
2379 
2380         /*
2381          * Attempt to create using a bad mirror.
2382          */
2383         nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 2, 1);
2384         VERIFY3U(ENOENT, ==,
2385             spa_create("ztest_bad_mirror", nvroot, NULL, NULL));
2386         nvlist_free(nvroot);
2387 
2388         /*
2389          * Attempt to create an existing pool.  It shouldn't matter
2390          * what's in the nvroot; we should fail with EEXIST.
2391          */
2392         rw_enter(&ztest_name_lock, RW_READER);
2393         nvroot = make_vdev_root("/dev/bogus", NULL, NULL, 0, 0, 0, 0, 0, 1);
2394         VERIFY3U(EEXIST, ==, spa_create(zo->zo_pool, nvroot, NULL, NULL));
2395         nvlist_free(nvroot);
2396         VERIFY3U(0, ==, spa_open(zo->zo_pool, &spa, FTAG));
2397         VERIFY3U(EBUSY, ==, spa_destroy(zo->zo_pool));
2398         spa_close(spa, FTAG);
2399 
2400         rw_exit(&ztest_name_lock);
2401 }
2402 
2403 /* ARGSUSED */
2404 void
2405 ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
2406 {
2407         spa_t *spa;
2408         uint64_t initial_version = SPA_VERSION_INITIAL;
2409         uint64_t version, newversion;
2410         nvlist_t *nvroot, *props;
2411         char *name;
2412 
2413         mutex_enter(&ztest_vdev_lock);
2414         name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool);
2415 
2416         /*
2417          * Clean up from previous runs.
2418          */
2419         (void) spa_destroy(name);
2420 
2421         nvroot = make_vdev_root(NULL, NULL, name, ztest_opts.zo_vdev_size, 0,
2422             0, ztest_opts.zo_raidz, ztest_opts.zo_mirrors, 1);
2423 
2424         /*
2425          * If we're configuring a RAIDZ device then make sure that the
2426          * the initial version is capable of supporting that feature.
2427          */
2428         switch (ztest_opts.zo_raidz_parity) {
2429         case 0:
2430         case 1:
2431                 initial_version = SPA_VERSION_INITIAL;
2432                 break;
2433         case 2:


2452         VERIFY0(spa_create(name, nvroot, props, NULL));
2453         fnvlist_free(nvroot);
2454         fnvlist_free(props);
2455 
2456         VERIFY0(spa_open(name, &spa, FTAG));
2457         VERIFY3U(spa_version(spa), ==, version);
2458         newversion = ztest_random_spa_version(version + 1);
2459 
2460         if (ztest_opts.zo_verbose >= 4) {
2461                 (void) printf("upgrading spa version from %llu to %llu\n",
2462                     (u_longlong_t)version, (u_longlong_t)newversion);
2463         }
2464 
2465         spa_upgrade(spa, newversion);
2466         VERIFY3U(spa_version(spa), >, version);
2467         VERIFY3U(spa_version(spa), ==, fnvlist_lookup_uint64(spa->spa_config,
2468             zpool_prop_to_name(ZPOOL_PROP_VERSION)));
2469         spa_close(spa, FTAG);
2470 
2471         strfree(name);
2472         mutex_exit(&ztest_vdev_lock);
2473 }
2474 
2475 static vdev_t *
2476 vdev_lookup_by_path(vdev_t *vd, const char *path)
2477 {
2478         vdev_t *mvd;
2479 
2480         if (vd->vdev_path != NULL && strcmp(path, vd->vdev_path) == 0)
2481                 return (vd);
2482 
2483         for (int c = 0; c < vd->vdev_children; c++)
2484                 if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], path)) !=
2485                     NULL)
2486                         return (mvd);
2487 
2488         return (NULL);
2489 }
2490 
2491 /*
2492  * Find the first available hole which can be used as a top-level.


2505                 if (cvd->vdev_ishole)
2506                         break;
2507         }
2508         return (c);
2509 }
2510 
2511 /*
2512  * Verify that vdev_add() works as expected.
2513  */
2514 /* ARGSUSED */
2515 void
2516 ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
2517 {
2518         ztest_shared_t *zs = ztest_shared;
2519         spa_t *spa = ztest_spa;
2520         uint64_t leaves;
2521         uint64_t guid;
2522         nvlist_t *nvroot;
2523         int error;
2524 
2525         mutex_enter(&ztest_vdev_lock);
2526         leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
2527 
2528         spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
2529 
2530         ztest_shared->zs_vdev_next_leaf = find_vdev_hole(spa) * leaves;
2531 
2532         /*
2533          * If we have slogs then remove them 1/4 of the time.
2534          */
2535         if (spa_has_slogs(spa) && ztest_random(4) == 0) {
2536                 /*
2537                  * Grab the guid from the head of the log class rotor.
2538                  */
2539                 guid = spa_log_class(spa)->mc_rotor->mg_vd->vdev_guid;
2540 
2541                 spa_config_exit(spa, SCL_VDEV, FTAG);
2542 
2543                 /*
2544                  * We have to grab the zs_name_lock as writer to
2545                  * prevent a race between removing a slog (dmu_objset_find)
2546                  * and destroying a dataset. Removing the slog will
2547                  * grab a reference on the dataset which may cause
2548                  * dmu_objset_destroy() to fail with EBUSY thus
2549                  * leaving the dataset in an inconsistent state.
2550                  */
2551                 rw_enter(&ztest_name_lock, RW_WRITER);
2552                 error = spa_vdev_remove(spa, guid, B_FALSE);
2553                 rw_exit(&ztest_name_lock);
2554 
2555                 if (error && error != EEXIST)
2556                         fatal(0, "spa_vdev_remove() = %d", error);
2557         } else {
2558                 spa_config_exit(spa, SCL_VDEV, FTAG);
2559 
2560                 /*
2561                  * Make 1/4 of the devices be log devices.
2562                  */
2563                 nvroot = make_vdev_root(NULL, NULL, NULL,
2564                     ztest_opts.zo_vdev_size, 0,
2565                     ztest_random(4) == 0, ztest_opts.zo_raidz,
2566                     zs->zs_mirrors, 1);
2567 
2568                 error = spa_vdev_add(spa, nvroot);
2569                 nvlist_free(nvroot);
2570 
2571                 if (error == ENOSPC)
2572                         ztest_record_enospc("spa_vdev_add");
2573                 else if (error != 0)
2574                         fatal(0, "spa_vdev_add() = %d", error);
2575         }
2576 
2577         mutex_exit(&ztest_vdev_lock);
2578 }
2579 
2580 /*
2581  * Verify that adding/removing aux devices (l2arc, hot spare) works as expected.
2582  */
2583 /* ARGSUSED */
2584 void
2585 ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
2586 {
2587         ztest_shared_t *zs = ztest_shared;
2588         spa_t *spa = ztest_spa;
2589         vdev_t *rvd = spa->spa_root_vdev;
2590         spa_aux_vdev_t *sav;
2591         char *aux;
2592         uint64_t guid = 0;
2593         int error;
2594 
2595         if (ztest_random(2) == 0) {
2596                 sav = &spa->spa_spares;
2597                 aux = ZPOOL_CONFIG_SPARES;
2598         } else {
2599                 sav = &spa->spa_l2cache;
2600                 aux = ZPOOL_CONFIG_L2CACHE;
2601         }
2602 
2603         mutex_enter(&ztest_vdev_lock);
2604 
2605         spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
2606 
2607         if (sav->sav_count != 0 && ztest_random(4) == 0) {
2608                 /*
2609                  * Pick a random device to remove.
2610                  */
2611                 guid = sav->sav_vdevs[ztest_random(sav->sav_count)]->vdev_guid;
2612         } else {
2613                 /*
2614                  * Find an unused device we can add.
2615                  */
2616                 zs->zs_vdev_aux = 0;
2617                 for (;;) {
2618                         char path[MAXPATHLEN];
2619                         int c;
2620                         (void) snprintf(path, sizeof (path), ztest_aux_template,
2621                             ztest_opts.zo_dir, ztest_opts.zo_pool, aux,
2622                             zs->zs_vdev_aux);
2623                         for (c = 0; c < sav->sav_count; c++)


2640                 nvlist_t *nvroot = make_vdev_root(NULL, aux, NULL,
2641                     (ztest_opts.zo_vdev_size * 5) / 4, 0, 0, 0, 0, 1);
2642                 error = spa_vdev_add(spa, nvroot);
2643                 if (error != 0)
2644                         fatal(0, "spa_vdev_add(%p) = %d", nvroot, error);
2645                 nvlist_free(nvroot);
2646         } else {
2647                 /*
2648                  * Remove an existing device.  Sometimes, dirty its
2649                  * vdev state first to make sure we handle removal
2650                  * of devices that have pending state changes.
2651                  */
2652                 if (ztest_random(2) == 0)
2653                         (void) vdev_online(spa, guid, 0, NULL);
2654 
2655                 error = spa_vdev_remove(spa, guid, B_FALSE);
2656                 if (error != 0 && error != EBUSY)
2657                         fatal(0, "spa_vdev_remove(%llu) = %d", guid, error);
2658         }
2659 
2660         mutex_exit(&ztest_vdev_lock);
2661 }
2662 
2663 /*
2664  * split a pool if it has mirror tlvdevs
2665  */
2666 /* ARGSUSED */
2667 void
2668 ztest_split_pool(ztest_ds_t *zd, uint64_t id)
2669 {
2670         ztest_shared_t *zs = ztest_shared;
2671         spa_t *spa = ztest_spa;
2672         vdev_t *rvd = spa->spa_root_vdev;
2673         nvlist_t *tree, **child, *config, *split, **schild;
2674         uint_t c, children, schildren = 0, lastlogid = 0;
2675         int error = 0;
2676 
2677         mutex_enter(&ztest_vdev_lock);
2678 
2679         /* ensure we have a useable config; mirrors of raidz aren't supported */
2680         if (zs->zs_mirrors < 3 || ztest_opts.zo_raidz > 1) {
2681                 mutex_exit(&ztest_vdev_lock);
2682                 return;
2683         }
2684 
2685         /* clean up the old pool, if any */
2686         (void) spa_destroy("splitp");
2687 
2688         spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
2689 
2690         /* generate a config from the existing config */
2691         mutex_enter(&spa->spa_props_lock);
2692         VERIFY(nvlist_lookup_nvlist(spa->spa_config, ZPOOL_CONFIG_VDEV_TREE,
2693             &tree) == 0);
2694         mutex_exit(&spa->spa_props_lock);
2695 
2696         VERIFY(nvlist_lookup_nvlist_array(tree, ZPOOL_CONFIG_CHILDREN, &child,
2697             &children) == 0);
2698 
2699         schild = malloc(rvd->vdev_children * sizeof (nvlist_t *));
2700         for (c = 0; c < children; c++) {
2701                 vdev_t *tvd = rvd->vdev_child[c];


2720                 VERIFY(nvlist_dup(mchild[0], &schild[schildren++], 0) == 0);
2721         }
2722 
2723         /* OK, create a config that can be used to split */
2724         VERIFY(nvlist_alloc(&split, NV_UNIQUE_NAME, 0) == 0);
2725         VERIFY(nvlist_add_string(split, ZPOOL_CONFIG_TYPE,
2726             VDEV_TYPE_ROOT) == 0);
2727         VERIFY(nvlist_add_nvlist_array(split, ZPOOL_CONFIG_CHILDREN, schild,
2728             lastlogid != 0 ? lastlogid : schildren) == 0);
2729 
2730         VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, 0) == 0);
2731         VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, split) == 0);
2732 
2733         for (c = 0; c < schildren; c++)
2734                 nvlist_free(schild[c]);
2735         free(schild);
2736         nvlist_free(split);
2737 
2738         spa_config_exit(spa, SCL_VDEV, FTAG);
2739 
2740         rw_enter(&ztest_name_lock, RW_WRITER);
2741         error = spa_vdev_split_mirror(spa, "splitp", config, NULL, B_FALSE);
2742         rw_exit(&ztest_name_lock);
2743 
2744         nvlist_free(config);
2745 
2746         if (error == 0) {
2747                 (void) printf("successful split - results:\n");
2748                 mutex_enter(&spa_namespace_lock);
2749                 show_pool_stats(spa);
2750                 show_pool_stats(spa_lookup("splitp"));
2751                 mutex_exit(&spa_namespace_lock);
2752                 ++zs->zs_splits;
2753                 --zs->zs_mirrors;
2754         }
2755         mutex_exit(&ztest_vdev_lock);
2756 
2757 }
2758 
2759 /*
2760  * Verify that we can attach and detach devices.
2761  */
2762 /* ARGSUSED */
2763 void
2764 ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
2765 {
2766         ztest_shared_t *zs = ztest_shared;
2767         spa_t *spa = ztest_spa;
2768         spa_aux_vdev_t *sav = &spa->spa_spares;
2769         vdev_t *rvd = spa->spa_root_vdev;
2770         vdev_t *oldvd, *newvd, *pvd;
2771         nvlist_t *root;
2772         uint64_t leaves;
2773         uint64_t leaf, top;
2774         uint64_t ashift = ztest_get_ashift();
2775         uint64_t oldguid, pguid;
2776         uint64_t oldsize, newsize;
2777         char oldpath[MAXPATHLEN], newpath[MAXPATHLEN];
2778         int replacing;
2779         int oldvd_has_siblings = B_FALSE;
2780         int newvd_is_spare = B_FALSE;
2781         int oldvd_is_log;
2782         int error, expected_error;
2783 
2784         mutex_enter(&ztest_vdev_lock);
2785         leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
2786 
2787         spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
2788 
2789         /*
2790          * Decide whether to do an attach or a replace.
2791          */
2792         replacing = ztest_random(2);
2793 
2794         /*
2795          * Pick a random top-level vdev.
2796          */
2797         top = ztest_random_vdev_top(spa, B_TRUE);
2798 
2799         /*
2800          * Pick a random leaf within it.
2801          */
2802         leaf = ztest_random(leaves);
2803 
2804         /*


2825                 ASSERT(oldvd->vdev_children >= 2);
2826                 oldvd = oldvd->vdev_child[ztest_random(oldvd->vdev_children)];
2827         }
2828 
2829         oldguid = oldvd->vdev_guid;
2830         oldsize = vdev_get_min_asize(oldvd);
2831         oldvd_is_log = oldvd->vdev_top->vdev_islog;
2832         (void) strcpy(oldpath, oldvd->vdev_path);
2833         pvd = oldvd->vdev_parent;
2834         pguid = pvd->vdev_guid;
2835 
2836         /*
2837          * If oldvd has siblings, then half of the time, detach it.
2838          */
2839         if (oldvd_has_siblings && ztest_random(2) == 0) {
2840                 spa_config_exit(spa, SCL_VDEV, FTAG);
2841                 error = spa_vdev_detach(spa, oldguid, pguid, B_FALSE);
2842                 if (error != 0 && error != ENODEV && error != EBUSY &&
2843                     error != ENOTSUP)
2844                         fatal(0, "detach (%s) returned %d", oldpath, error);
2845                 mutex_exit(&ztest_vdev_lock);
2846                 return;
2847         }
2848 
2849         /*
2850          * For the new vdev, choose with equal probability between the two
2851          * standard paths (ending in either 'a' or 'b') or a random hot spare.
2852          */
2853         if (sav->sav_count != 0 && ztest_random(3) == 0) {
2854                 newvd = sav->sav_vdevs[ztest_random(sav->sav_count)];
2855                 newvd_is_spare = B_TRUE;
2856                 (void) strcpy(newpath, newvd->vdev_path);
2857         } else {
2858                 (void) snprintf(newpath, sizeof (newpath), ztest_dev_template,
2859                     ztest_opts.zo_dir, ztest_opts.zo_pool,
2860                     top * leaves + leaf);
2861                 if (ztest_random(2) == 0)
2862                         newpath[strlen(newpath) - 1] = 'b';
2863                 newvd = vdev_lookup_by_path(rvd, newpath);
2864         }
2865 


2919          * fail with ENODEV, or fail with EOVERFLOW.
2920          */
2921         if (expected_error == ENOTSUP &&
2922             (error == 0 || error == ENODEV || error == EOVERFLOW))
2923                 expected_error = error;
2924 
2925         /*
2926          * If someone grew the LUN, the replacement may be too small.
2927          */
2928         if (error == EOVERFLOW || error == EBUSY)
2929                 expected_error = error;
2930 
2931         /* XXX workaround 6690467 */
2932         if (error != expected_error && expected_error != EBUSY) {
2933                 fatal(0, "attach (%s %llu, %s %llu, %d) "
2934                     "returned %d, expected %d",
2935                     oldpath, oldsize, newpath,
2936                     newsize, replacing, error, expected_error);
2937         }
2938 
2939         mutex_exit(&ztest_vdev_lock);
2940 }
2941 
2942 /*
2943  * Callback function which expands the physical size of the vdev.
2944  */
2945 vdev_t *
2946 grow_vdev(vdev_t *vd, void *arg)
2947 {
2948         spa_t *spa = vd->vdev_spa;
2949         size_t *newsize = arg;
2950         size_t fsize;
2951         int fd;
2952 
2953         ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
2954         ASSERT(vd->vdev_ops->vdev_op_leaf);
2955 
2956         if ((fd = open(vd->vdev_path, O_RDWR)) == -1)
2957                 return (vd);
2958 
2959         fsize = lseek(fd, 0, SEEK_END);


3047                         return (cvd);
3048         }
3049         return (NULL);
3050 }
3051 
3052 /*
3053  * Verify that dynamic LUN growth works as expected.
3054  */
3055 /* ARGSUSED */
3056 void
3057 ztest_vdev_LUN_growth(ztest_ds_t *zd, uint64_t id)
3058 {
3059         spa_t *spa = ztest_spa;
3060         vdev_t *vd, *tvd;
3061         metaslab_class_t *mc;
3062         metaslab_group_t *mg;
3063         size_t psize, newsize;
3064         uint64_t top;
3065         uint64_t old_class_space, new_class_space, old_ms_count, new_ms_count;
3066 
3067         mutex_enter(&ztest_vdev_lock);
3068         spa_config_enter(spa, SCL_STATE, spa, RW_READER);
3069 
3070         top = ztest_random_vdev_top(spa, B_TRUE);
3071 
3072         tvd = spa->spa_root_vdev->vdev_child[top];
3073         mg = tvd->vdev_mg;
3074         mc = mg->mg_class;
3075         old_ms_count = tvd->vdev_ms_count;
3076         old_class_space = metaslab_class_get_space(mc);
3077 
3078         /*
3079          * Determine the size of the first leaf vdev associated with
3080          * our top-level device.
3081          */
3082         vd = vdev_walk_tree(tvd, NULL, NULL);
3083         ASSERT3P(vd, !=, NULL);
3084         ASSERT(vd->vdev_ops->vdev_op_leaf);
3085 
3086         psize = vd->vdev_psize;
3087 
3088         /*
3089          * We only try to expand the vdev if it's healthy, less than 4x its
3090          * original size, and it has a valid psize.
3091          */
3092         if (tvd->vdev_state != VDEV_STATE_HEALTHY ||
3093             psize == 0 || psize >= 4 * ztest_opts.zo_vdev_size) {
3094                 spa_config_exit(spa, SCL_STATE, spa);
3095                 mutex_exit(&ztest_vdev_lock);
3096                 return;
3097         }
3098         ASSERT(psize > 0);
3099         newsize = psize + psize / 8;
3100         ASSERT3U(newsize, >, psize);
3101 
3102         if (ztest_opts.zo_verbose >= 6) {
3103                 (void) printf("Expanding LUN %s from %lu to %lu\n",
3104                     vd->vdev_path, (ulong_t)psize, (ulong_t)newsize);
3105         }
3106 
3107         /*
3108          * Growing the vdev is a two step process:
3109          *      1). expand the physical size (i.e. relabel)
3110          *      2). online the vdev to create the new metaslabs
3111          */
3112         if (vdev_walk_tree(tvd, grow_vdev, &newsize) != NULL ||
3113             vdev_walk_tree(tvd, online_vdev, NULL) != NULL ||
3114             tvd->vdev_state != VDEV_STATE_HEALTHY) {
3115                 if (ztest_opts.zo_verbose >= 5) {
3116                         (void) printf("Could not expand LUN because "
3117                             "the vdev configuration changed.\n");
3118                 }
3119                 spa_config_exit(spa, SCL_STATE, spa);
3120                 mutex_exit(&ztest_vdev_lock);
3121                 return;
3122         }
3123 
3124         spa_config_exit(spa, SCL_STATE, spa);
3125 
3126         /*
3127          * Expanding the LUN will update the config asynchronously,
3128          * thus we must wait for the async thread to complete any
3129          * pending tasks before proceeding.
3130          */
3131         for (;;) {
3132                 boolean_t done;
3133                 mutex_enter(&spa->spa_async_lock);
3134                 done = (spa->spa_async_thread == NULL && !spa->spa_async_tasks);
3135                 mutex_exit(&spa->spa_async_lock);
3136                 if (done)
3137                         break;
3138                 txg_wait_synced(spa_get_dsl(spa), 0);
3139                 (void) poll(NULL, 0, 100);
3140         }
3141 
3142         spa_config_enter(spa, SCL_STATE, spa, RW_READER);
3143 
3144         tvd = spa->spa_root_vdev->vdev_child[top];
3145         new_ms_count = tvd->vdev_ms_count;
3146         new_class_space = metaslab_class_get_space(mc);
3147 
3148         if (tvd->vdev_mg != mg || mg->mg_class != mc) {
3149                 if (ztest_opts.zo_verbose >= 5) {
3150                         (void) printf("Could not verify LUN expansion due to "
3151                             "intervening vdev offline or remove.\n");
3152                 }
3153                 spa_config_exit(spa, SCL_STATE, spa);
3154                 mutex_exit(&ztest_vdev_lock);
3155                 return;
3156         }
3157 
3158         /*
3159          * Make sure we were able to grow the vdev.
3160          */
3161         if (new_ms_count <= old_ms_count)
3162                 fatal(0, "LUN expansion failed: ms_count %llu <= %llu\n",
3163                     old_ms_count, new_ms_count);
3164 
3165         /*
3166          * Make sure we were able to grow the pool.
3167          */
3168         if (new_class_space <= old_class_space)
3169                 fatal(0, "LUN expansion failed: class_space %llu <= %llu\n",
3170                     old_class_space, new_class_space);
3171 
3172         if (ztest_opts.zo_verbose >= 5) {
3173                 char oldnumbuf[NN_NUMBUF_SZ], newnumbuf[NN_NUMBUF_SZ];
3174 
3175                 nicenum(old_class_space, oldnumbuf, sizeof (oldnumbuf));
3176                 nicenum(new_class_space, newnumbuf, sizeof (newnumbuf));
3177                 (void) printf("%s grew from %s to %s\n",
3178                     spa->spa_name, oldnumbuf, newnumbuf);
3179         }
3180 
3181         spa_config_exit(spa, SCL_STATE, spa);
3182         mutex_exit(&ztest_vdev_lock);
3183 }
3184 
3185 /*
3186  * Verify that dmu_objset_{create,destroy,open,close} work as expected.
3187  */
3188 /* ARGSUSED */
3189 static void
3190 ztest_objset_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3191 {
3192         /*
3193          * Create the objects common to all ztest datasets.
3194          */
3195         VERIFY(zap_create_claim(os, ZTEST_DIROBJ,
3196             DMU_OT_ZAP_OTHER, DMU_OT_NONE, 0, tx) == 0);
3197 }
3198 
3199 static int
3200 ztest_dataset_create(char *dsname)
3201 {
3202         uint64_t zilset = ztest_random(100);


3276         (void) snprintf(snapname, sizeof (snapname), "%s@%llu", osname,
3277             (u_longlong_t)id);
3278 
3279         error = dsl_destroy_snapshot(snapname, B_FALSE);
3280         if (error != 0 && error != ENOENT)
3281                 fatal(0, "ztest_snapshot_destroy(%s) = %d", snapname, error);
3282         return (B_TRUE);
3283 }
3284 
3285 /* ARGSUSED */
3286 void
3287 ztest_dmu_objset_create_destroy(ztest_ds_t *zd, uint64_t id)
3288 {
3289         ztest_ds_t zdtmp;
3290         int iters;
3291         int error;
3292         objset_t *os, *os2;
3293         char name[ZFS_MAX_DATASET_NAME_LEN];
3294         zilog_t *zilog;
3295 
3296         rw_enter(&ztest_name_lock, RW_READER);
3297 
3298         (void) snprintf(name, sizeof (name), "%s/temp_%llu",
3299             ztest_opts.zo_pool, (u_longlong_t)id);
3300 
3301         /*
3302          * If this dataset exists from a previous run, process its replay log
3303          * half of the time.  If we don't replay it, then dmu_objset_destroy()
3304          * (invoked from ztest_objset_destroy_cb()) should just throw it away.
3305          */
3306         if (ztest_random(2) == 0 &&
3307             dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os) == 0) {
3308                 ztest_zd_init(&zdtmp, NULL, os);
3309                 zil_replay(os, &zdtmp, ztest_replay_vector);
3310                 ztest_zd_fini(&zdtmp);
3311                 dmu_objset_disown(os, FTAG);
3312         }
3313 
3314         /*
3315          * There may be an old instance of the dataset we're about to
3316          * create lying around from a previous run.  If so, destroy it
3317          * and all of its snapshots.
3318          */
3319         (void) dmu_objset_find(name, ztest_objset_destroy_cb, NULL,
3320             DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
3321 
3322         /*
3323          * Verify that the destroyed dataset is no longer in the namespace.
3324          */
3325         VERIFY3U(ENOENT, ==, dmu_objset_own(name, DMU_OST_OTHER, B_TRUE,
3326             FTAG, &os));
3327 
3328         /*
3329          * Verify that we can create a new dataset.
3330          */
3331         error = ztest_dataset_create(name);
3332         if (error) {
3333                 if (error == ENOSPC) {
3334                         ztest_record_enospc(FTAG);
3335                         rw_exit(&ztest_name_lock);
3336                         return;
3337                 }
3338                 fatal(0, "dmu_objset_create(%s) = %d", name, error);
3339         }
3340 
3341         VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os));
3342 
3343         ztest_zd_init(&zdtmp, NULL, os);
3344 
3345         /*
3346          * Open the intent log for it.
3347          */
3348         zilog = zil_open(os, ztest_get_data);
3349 
3350         /*
3351          * Put some objects in there, do a little I/O to them,
3352          * and randomly take a couple of snapshots along the way.
3353          */
3354         iters = ztest_random(5);
3355         for (int i = 0; i < iters; i++) {


3363          */
3364         VERIFY3U(EEXIST, ==,
3365             dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL));
3366 
3367         /*
3368          * Verify that we can hold an objset that is also owned.
3369          */
3370         VERIFY3U(0, ==, dmu_objset_hold(name, FTAG, &os2));
3371         dmu_objset_rele(os2, FTAG);
3372 
3373         /*
3374          * Verify that we cannot own an objset that is already owned.
3375          */
3376         VERIFY3U(EBUSY, ==,
3377             dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, FTAG, &os2));
3378 
3379         zil_close(zilog);
3380         dmu_objset_disown(os, FTAG);
3381         ztest_zd_fini(&zdtmp);
3382 
3383         rw_exit(&ztest_name_lock);
3384 }
3385 
3386 /*
3387  * Verify that dmu_snapshot_{create,destroy,open,close} work as expected.
3388  */
3389 void
3390 ztest_dmu_snapshot_create_destroy(ztest_ds_t *zd, uint64_t id)
3391 {
3392         rw_enter(&ztest_name_lock, RW_READER);
3393         (void) ztest_snapshot_destroy(zd->zd_name, id);
3394         (void) ztest_snapshot_create(zd->zd_name, id);
3395         rw_exit(&ztest_name_lock);
3396 }
3397 
3398 /*
3399  * Cleanup non-standard snapshots and clones.
3400  */
3401 void
3402 ztest_dsl_dataset_cleanup(char *osname, uint64_t id)
3403 {
3404         char snap1name[ZFS_MAX_DATASET_NAME_LEN];
3405         char clone1name[ZFS_MAX_DATASET_NAME_LEN];
3406         char snap2name[ZFS_MAX_DATASET_NAME_LEN];
3407         char clone2name[ZFS_MAX_DATASET_NAME_LEN];
3408         char snap3name[ZFS_MAX_DATASET_NAME_LEN];
3409         int error;
3410 
3411         (void) snprintf(snap1name, sizeof (snap1name),
3412             "%s@s1_%llu", osname, id);
3413         (void) snprintf(clone1name, sizeof (clone1name),
3414             "%s/c1_%llu", osname, id);
3415         (void) snprintf(snap2name, sizeof (snap2name),


3434         error = dsl_destroy_snapshot(snap1name, B_FALSE);
3435         if (error && error != ENOENT)
3436                 fatal(0, "dsl_destroy_snapshot(%s) = %d", snap1name, error);
3437 }
3438 
3439 /*
3440  * Verify dsl_dataset_promote handles EBUSY
3441  */
3442 void
3443 ztest_dsl_dataset_promote_busy(ztest_ds_t *zd, uint64_t id)
3444 {
3445         objset_t *os;
3446         char snap1name[ZFS_MAX_DATASET_NAME_LEN];
3447         char clone1name[ZFS_MAX_DATASET_NAME_LEN];
3448         char snap2name[ZFS_MAX_DATASET_NAME_LEN];
3449         char clone2name[ZFS_MAX_DATASET_NAME_LEN];
3450         char snap3name[ZFS_MAX_DATASET_NAME_LEN];
3451         char *osname = zd->zd_name;
3452         int error;
3453 
3454         rw_enter(&ztest_name_lock, RW_READER);
3455 
3456         ztest_dsl_dataset_cleanup(osname, id);
3457 
3458         (void) snprintf(snap1name, sizeof (snap1name),
3459             "%s@s1_%llu", osname, id);
3460         (void) snprintf(clone1name, sizeof (clone1name),
3461             "%s/c1_%llu", osname, id);
3462         (void) snprintf(snap2name, sizeof (snap2name),
3463             "%s@s2_%llu", clone1name, id);
3464         (void) snprintf(clone2name, sizeof (clone2name),
3465             "%s/c2_%llu", osname, id);
3466         (void) snprintf(snap3name, sizeof (snap3name),
3467             "%s@s3_%llu", clone1name, id);
3468 
3469         error = dmu_objset_snapshot_one(osname, strchr(snap1name, '@') + 1);
3470         if (error && error != EEXIST) {
3471                 if (error == ENOSPC) {
3472                         ztest_record_enospc(FTAG);
3473                         goto out;
3474                 }


3511                 fatal(0, "dmu_objset_create(%s) = %d", clone2name, error);
3512         }
3513 
3514         error = dmu_objset_own(snap2name, DMU_OST_ANY, B_TRUE, FTAG, &os);
3515         if (error)
3516                 fatal(0, "dmu_objset_own(%s) = %d", snap2name, error);
3517         error = dsl_dataset_promote(clone2name, NULL);
3518         if (error == ENOSPC) {
3519                 dmu_objset_disown(os, FTAG);
3520                 ztest_record_enospc(FTAG);
3521                 goto out;
3522         }
3523         if (error != EBUSY)
3524                 fatal(0, "dsl_dataset_promote(%s), %d, not EBUSY", clone2name,
3525                     error);
3526         dmu_objset_disown(os, FTAG);
3527 
3528 out:
3529         ztest_dsl_dataset_cleanup(osname, id);
3530 
3531         rw_exit(&ztest_name_lock);
3532 }
3533 
3534 /*
3535  * Verify that dmu_object_{alloc,free} work as expected.
3536  */
3537 void
3538 ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id)
3539 {
3540         ztest_od_t od[4];
3541         int batchsize = sizeof (od) / sizeof (od[0]);
3542 
3543         for (int b = 0; b < batchsize; b++)
3544                 ztest_od_init(&od[b], id, FTAG, b, DMU_OT_UINT64_OTHER, 0, 0);
3545 
3546         /*
3547          * Destroy the previous batch of objects, create a new batch,
3548          * and do some I/O on the new objects.
3549          */
3550         if (ztest_object_init(zd, od, sizeof (od), B_TRUE) != 0)
3551                 return;


4445 
4446         if (error == ECANCELED) {
4447                 ASSERT0(data->zcd_txg);
4448                 ASSERT(!data->zcd_added);
4449 
4450                 /*
4451                  * The private callback data should be destroyed here, but
4452                  * since we are going to check the zcd_called field after
4453                  * dmu_tx_abort(), we will destroy it there.
4454                  */
4455                 return;
4456         }
4457 
4458         /* Was this callback added to the global callback list? */
4459         if (!data->zcd_added)
4460                 goto out;
4461 
4462         ASSERT3U(data->zcd_txg, !=, 0);
4463 
4464         /* Remove our callback from the list */
4465         mutex_enter(&zcl.zcl_callbacks_lock);
4466         list_remove(&zcl.zcl_callbacks, data);
4467         mutex_exit(&zcl.zcl_callbacks_lock);
4468 
4469 out:
4470         umem_free(data, sizeof (ztest_cb_data_t));
4471 }
4472 
4473 /* Allocate and initialize callback data structure */
4474 static ztest_cb_data_t *
4475 ztest_create_cb_data(objset_t *os, uint64_t txg)
4476 {
4477         ztest_cb_data_t *cb_data;
4478 
4479         cb_data = umem_zalloc(sizeof (ztest_cb_data_t), UMEM_NOFAIL);
4480 
4481         cb_data->zcd_txg = txg;
4482         cb_data->zcd_spa = dmu_objset_spa(os);
4483 
4484         return (cb_data);
4485 }
4486 
4487 /*


4549                 }
4550 
4551                 return;
4552         }
4553 
4554         cb_data[2] = ztest_create_cb_data(os, txg);
4555         dmu_tx_callback_register(tx, ztest_commit_callback, cb_data[2]);
4556 
4557         /*
4558          * Read existing data to make sure there isn't a future leak.
4559          */
4560         VERIFY(0 == dmu_read(os, od[0].od_object, 0, sizeof (uint64_t),
4561             &old_txg, DMU_READ_PREFETCH));
4562 
4563         if (old_txg > txg)
4564                 fatal(0, "future leak: got %" PRIu64 ", open txg is %" PRIu64,
4565                     old_txg, txg);
4566 
4567         dmu_write(os, od[0].od_object, 0, sizeof (uint64_t), &txg, tx);
4568 
4569         mutex_enter(&zcl.zcl_callbacks_lock);
4570 
4571         /*
4572          * Since commit callbacks don't have any ordering requirement and since
4573          * it is theoretically possible for a commit callback to be called
4574          * after an arbitrary amount of time has elapsed since its txg has been
4575          * synced, it is difficult to reliably determine whether a commit
4576          * callback hasn't been called due to high load or due to a flawed
4577          * implementation.
4578          *
4579          * In practice, we will assume that if after a certain number of txgs a
4580          * commit callback hasn't been called, then most likely there's an
4581          * implementation bug..
4582          */
4583         tmp_cb = list_head(&zcl.zcl_callbacks);
4584         if (tmp_cb != NULL &&
4585             (txg - ZTEST_COMMIT_CALLBACK_THRESH) > tmp_cb->zcd_txg) {
4586                 fatal(0, "Commit callback threshold exceeded, oldest txg: %"
4587                     PRIu64 ", open txg: %" PRIu64 "\n", tmp_cb->zcd_txg, txg);
4588         }
4589 


4596          * (from other objsets) may have sneaked in.
4597          */
4598         tmp_cb = list_tail(&zcl.zcl_callbacks);
4599         while (tmp_cb != NULL && tmp_cb->zcd_txg > txg)
4600                 tmp_cb = list_prev(&zcl.zcl_callbacks, tmp_cb);
4601 
4602         /* Add the 3 callbacks to the list */
4603         for (i = 0; i < 3; i++) {
4604                 if (tmp_cb == NULL)
4605                         list_insert_head(&zcl.zcl_callbacks, cb_data[i]);
4606                 else
4607                         list_insert_after(&zcl.zcl_callbacks, tmp_cb,
4608                             cb_data[i]);
4609 
4610                 cb_data[i]->zcd_added = B_TRUE;
4611                 VERIFY(!cb_data[i]->zcd_called);
4612 
4613                 tmp_cb = cb_data[i];
4614         }
4615 
4616         mutex_exit(&zcl.zcl_callbacks_lock);
4617 
4618         dmu_tx_commit(tx);
4619 }
4620 
4621 /* ARGSUSED */
4622 void
4623 ztest_dsl_prop_get_set(ztest_ds_t *zd, uint64_t id)
4624 {
4625         zfs_prop_t proplist[] = {
4626                 ZFS_PROP_CHECKSUM,
4627                 ZFS_PROP_COMPRESSION,
4628                 ZFS_PROP_COPIES,
4629                 ZFS_PROP_DEDUP
4630         };
4631 
4632         rw_enter(&ztest_name_lock, RW_READER);
4633 
4634         for (int p = 0; p < sizeof (proplist) / sizeof (proplist[0]); p++)
4635                 (void) ztest_dsl_prop_set_uint64(zd->zd_name, proplist[p],
4636                     ztest_random_dsl_prop(proplist[p]), (int)ztest_random(2));
4637 
4638         rw_exit(&ztest_name_lock);
4639 }
4640 
4641 /* ARGSUSED */
4642 void
4643 ztest_spa_prop_get_set(ztest_ds_t *zd, uint64_t id)
4644 {
4645         nvlist_t *props = NULL;
4646 
4647         rw_enter(&ztest_name_lock, RW_READER);
4648 
4649         (void) ztest_spa_prop_set_uint64(ZPOOL_PROP_DEDUPDITTO,
4650             ZIO_DEDUPDITTO_MIN + ztest_random(ZIO_DEDUPDITTO_MIN));
4651 
4652         VERIFY0(spa_prop_get(ztest_spa, &props));
4653 
4654         if (ztest_opts.zo_verbose >= 6)
4655                 dump_nvlist(props, 4);
4656 
4657         nvlist_free(props);
4658 
4659         rw_exit(&ztest_name_lock);
4660 }
4661 
4662 static int
4663 user_release_one(const char *snapname, const char *holdname)
4664 {
4665         nvlist_t *snaps, *holds;
4666         int error;
4667 
4668         snaps = fnvlist_alloc();
4669         holds = fnvlist_alloc();
4670         fnvlist_add_boolean(holds, holdname);
4671         fnvlist_add_nvlist(snaps, snapname, holds);
4672         fnvlist_free(holds);
4673         error = dsl_dataset_user_release(snaps, NULL);
4674         fnvlist_free(snaps);
4675         return (error);
4676 }
4677 
4678 /*
4679  * Test snapshot hold/release and deferred destroy.
4680  */
4681 void
4682 ztest_dmu_snapshot_hold(ztest_ds_t *zd, uint64_t id)
4683 {
4684         int error;
4685         objset_t *os = zd->zd_os;
4686         objset_t *origin;
4687         char snapname[100];
4688         char fullname[100];
4689         char clonename[100];
4690         char tag[100];
4691         char osname[ZFS_MAX_DATASET_NAME_LEN];
4692         nvlist_t *holds;
4693 
4694         rw_enter(&ztest_name_lock, RW_READER);
4695 
4696         dmu_objset_name(os, osname);
4697 
4698         (void) snprintf(snapname, sizeof (snapname), "sh1_%llu", id);
4699         (void) snprintf(fullname, sizeof (fullname), "%s@%s", osname, snapname);
4700         (void) snprintf(clonename, sizeof (clonename),
4701             "%s/ch1_%llu", osname, id);
4702         (void) snprintf(tag, sizeof (tag), "tag_%llu", id);
4703 
4704         /*
4705          * Clean up from any previous run.
4706          */
4707         error = dsl_destroy_head(clonename);
4708         if (error != ENOENT)
4709                 ASSERT0(error);
4710         error = user_release_one(fullname, tag);
4711         if (error != ESRCH && error != ENOENT)
4712                 ASSERT0(error);
4713         error = dsl_destroy_snapshot(fullname, B_FALSE);
4714         if (error != ENOENT)


4779 
4780         error = dsl_destroy_snapshot(fullname, B_FALSE);
4781         if (error != EBUSY) {
4782                 fatal(0, "dsl_destroy_snapshot(%s, B_FALSE) = %d",
4783                     fullname, error);
4784         }
4785 
4786         error = dsl_destroy_snapshot(fullname, B_TRUE);
4787         if (error) {
4788                 fatal(0, "dsl_destroy_snapshot(%s, B_TRUE) = %d",
4789                     fullname, error);
4790         }
4791 
4792         error = user_release_one(fullname, tag);
4793         if (error)
4794                 fatal(0, "user_release_one(%s, %s) = %d", fullname, tag, error);
4795 
4796         VERIFY3U(dmu_objset_hold(fullname, FTAG, &origin), ==, ENOENT);
4797 
4798 out:
4799         rw_exit(&ztest_name_lock);
4800 }
4801 
4802 /*
4803  * Inject random faults into the on-disk data.
4804  */
4805 /* ARGSUSED */
4806 void
4807 ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
4808 {
4809         ztest_shared_t *zs = ztest_shared;
4810         spa_t *spa = ztest_spa;
4811         int fd;
4812         uint64_t offset;
4813         uint64_t leaves;
4814         uint64_t bad = 0x1990c0ffeedecade;
4815         uint64_t top, leaf;
4816         char path0[MAXPATHLEN];
4817         char pathrand[MAXPATHLEN];
4818         size_t fsize;
4819         int bshift = SPA_MAXBLOCKSHIFT + 2;
4820         int iters = 1000;
4821         int maxfaults;
4822         int mirror_save;
4823         vdev_t *vd0 = NULL;
4824         uint64_t guid0 = 0;
4825         boolean_t islog = B_FALSE;
4826 
4827         mutex_enter(&ztest_vdev_lock);
4828         maxfaults = MAXFAULTS();
4829         leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
4830         mirror_save = zs->zs_mirrors;
4831         mutex_exit(&ztest_vdev_lock);
4832 
4833         ASSERT(leaves >= 1);
4834 
4835         /*
4836          * Grab the name lock as reader. There are some operations
4837          * which don't like to have their vdevs changed while
4838          * they are in progress (i.e. spa_change_guid). Those
4839          * operations will have grabbed the name lock as writer.
4840          */
4841         rw_enter(&ztest_name_lock, RW_READER);
4842 
4843         /*
4844          * We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
4845          */
4846         spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
4847 
4848         if (ztest_random(2) == 0) {
4849                 /*
4850                  * Inject errors on a normal data device or slog device.
4851                  */
4852                 top = ztest_random_vdev_top(spa, B_TRUE);
4853                 leaf = ztest_random(leaves) + zs->zs_splits;
4854 
4855                 /*
4856                  * Generate paths to the first leaf in this top-level vdev,
4857                  * and to the random leaf we selected.  We'll induce transient
4858                  * write failures and random online/offline activity on leaf 0,
4859                  * and we'll write random garbage to the randomly chosen leaf.
4860                  */
4861                 (void) snprintf(path0, sizeof (path0), ztest_dev_template,


4890                         vdev_file_t *vf = vd0->vdev_tsd;
4891 
4892                         if (vf != NULL && ztest_random(3) == 0) {
4893                                 (void) close(vf->vf_vnode->v_fd);
4894                                 vf->vf_vnode->v_fd = -1;
4895                         } else if (ztest_random(2) == 0) {
4896                                 vd0->vdev_cant_read = B_TRUE;
4897                         } else {
4898                                 vd0->vdev_cant_write = B_TRUE;
4899                         }
4900                         guid0 = vd0->vdev_guid;
4901                 }
4902         } else {
4903                 /*
4904                  * Inject errors on an l2cache device.
4905                  */
4906                 spa_aux_vdev_t *sav = &spa->spa_l2cache;
4907 
4908                 if (sav->sav_count == 0) {
4909                         spa_config_exit(spa, SCL_STATE, FTAG);
4910                         rw_exit(&ztest_name_lock);
4911                         return;
4912                 }
4913                 vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)];
4914                 guid0 = vd0->vdev_guid;
4915                 (void) strcpy(path0, vd0->vdev_path);
4916                 (void) strcpy(pathrand, vd0->vdev_path);
4917 
4918                 leaf = 0;
4919                 leaves = 1;
4920                 maxfaults = INT_MAX;    /* no limit on cache devices */
4921         }
4922 
4923         spa_config_exit(spa, SCL_STATE, FTAG);
4924         rw_exit(&ztest_name_lock);
4925 
4926         /*
4927          * If we can tolerate two or more faults, or we're dealing
4928          * with a slog, randomly online/offline vd0.
4929          */
4930         if ((maxfaults >= 2 || islog) && guid0 != 0) {
4931                 if (ztest_random(10) < 6) {
4932                         int flags = (ztest_random(2) == 0 ?
4933                             ZFS_OFFLINE_TEMPORARY : 0);
4934 
4935                         /*
4936                          * We have to grab the zs_name_lock as writer to
4937                          * prevent a race between offlining a slog and
4938                          * destroying a dataset. Offlining the slog will
4939                          * grab a reference on the dataset which may cause
4940                          * dmu_objset_destroy() to fail with EBUSY thus
4941                          * leaving the dataset in an inconsistent state.
4942                          */
4943                         if (islog)
4944                                 rw_enter(&ztest_name_lock, RW_WRITER);
4945 
4946                         VERIFY(vdev_offline(spa, guid0, flags) != EBUSY);
4947 
4948                         if (islog)
4949                                 rw_exit(&ztest_name_lock);
4950                 } else {
4951                         /*
4952                          * Ideally we would like to be able to randomly
4953                          * call vdev_[on|off]line without holding locks
4954                          * to force unpredictable failures but the side
4955                          * effects of vdev_[on|off]line prevent us from
4956                          * doing so. We grab the ztest_vdev_lock here to
4957                          * prevent a race between injection testing and
4958                          * aux_vdev removal.
4959                          */
4960                         mutex_enter(&ztest_vdev_lock);
4961                         (void) vdev_online(spa, guid0, 0, NULL);
4962                         mutex_exit(&ztest_vdev_lock);
4963                 }
4964         }
4965 
4966         if (maxfaults == 0)
4967                 return;
4968 
4969         /*
4970          * We have at least single-fault tolerance, so inject data corruption.
4971          */
4972         fd = open(pathrand, O_RDWR);
4973 
4974         if (fd == -1)   /* we hit a gap in the device namespace */
4975                 return;
4976 
4977         fsize = lseek(fd, 0, SEEK_END);
4978 
4979         while (--iters != 0) {
4980                 /*
4981                  * The offset must be chosen carefully to ensure that
4982                  * we do not inject a given logical block with errors


5014                  * because we also damage (parts of) the other side of
5015                  * the mirror/raidz.
5016                  *
5017                  * Additionally, we will always have both an even and an
5018                  * odd label, so that we can handle crashes in the
5019                  * middle of vdev_config_sync().
5020                  */
5021                 if ((leaf & 1) == 0 && offset < VDEV_LABEL_START_SIZE)
5022                         continue;
5023 
5024                 /*
5025                  * The two end labels are stored at the "end" of the disk, but
5026                  * the end of the disk (vdev_psize) is aligned to
5027                  * sizeof (vdev_label_t).
5028                  */
5029                 uint64_t psize = P2ALIGN(fsize, sizeof (vdev_label_t));
5030                 if ((leaf & 1) == 1 &&
5031                     offset + sizeof (bad) > psize - VDEV_LABEL_END_SIZE)
5032                         continue;
5033 
5034                 mutex_enter(&ztest_vdev_lock);
5035                 if (mirror_save != zs->zs_mirrors) {
5036                         mutex_exit(&ztest_vdev_lock);
5037                         (void) close(fd);
5038                         return;
5039                 }
5040 
5041                 if (pwrite(fd, &bad, sizeof (bad), offset) != sizeof (bad))
5042                         fatal(1, "can't inject bad word at 0x%llx in %s",
5043                             offset, pathrand);
5044 
5045                 mutex_exit(&ztest_vdev_lock);
5046 
5047                 if (ztest_opts.zo_verbose >= 7)
5048                         (void) printf("injected bad word into %s,"
5049                             " offset 0x%llx\n", pathrand, (u_longlong_t)offset);
5050         }
5051 
5052         (void) close(fd);
5053 }
5054 
5055 /*
5056  * Verify that DDT repair works as expected.
5057  */
5058 void
5059 ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
5060 {
5061         ztest_shared_t *zs = ztest_shared;
5062         spa_t *spa = ztest_spa;
5063         objset_t *os = zd->zd_os;
5064         ztest_od_t od[1];
5065         uint64_t object, blocksize, txg, pattern, psize;
5066         enum zio_checksum checksum = spa_dedup_checksum(spa);
5067         dmu_buf_t *db;
5068         dmu_tx_t *tx;
5069         abd_t *abd;
5070         blkptr_t blk;
5071         int copies = 2 * ZIO_DEDUPDITTO_MIN;
5072 
5073         blocksize = ztest_random_blocksize();
5074         blocksize = MIN(blocksize, 2048);       /* because we write so many */
5075 
5076         ztest_od_init(&od[0], id, FTAG, 0, DMU_OT_UINT64_OTHER, blocksize, 0);
5077 
5078         if (ztest_object_init(zd, od, sizeof (od), B_FALSE) != 0)
5079                 return;
5080 
5081         /*
5082          * Take the name lock as writer to prevent anyone else from changing
5083          * the pool and dataset properies we need to maintain during this test.
5084          */
5085         rw_enter(&ztest_name_lock, RW_WRITER);
5086 
5087         if (ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_DEDUP, checksum,
5088             B_FALSE) != 0 ||
5089             ztest_dsl_prop_set_uint64(zd->zd_name, ZFS_PROP_COPIES, 1,
5090             B_FALSE) != 0) {
5091                 rw_exit(&ztest_name_lock);
5092                 return;
5093         }
5094 
5095         dmu_objset_stats_t dds;
5096         dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
5097         dmu_objset_fast_stat(os, &dds);
5098         dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
5099 
5100         object = od[0].od_object;
5101         blocksize = od[0].od_blocksize;
5102         pattern = zs->zs_guid ^ dds.dds_guid;
5103 
5104         ASSERT(object != 0);
5105 
5106         tx = dmu_tx_create(os);
5107         dmu_tx_hold_write(tx, object, 0, copies * blocksize);
5108         txg = ztest_tx_assign(tx, TXG_WAIT, FTAG);
5109         if (txg == 0) {
5110                 rw_exit(&ztest_name_lock);
5111                 return;
5112         }
5113 
5114         /*
5115          * Write all the copies of our block.
5116          */
5117         for (int i = 0; i < copies; i++) {
5118                 uint64_t offset = i * blocksize;
5119                 int error = dmu_buf_hold(os, object, offset, FTAG, &db,
5120                     DMU_READ_NO_PREFETCH);
5121                 if (error != 0) {
5122                         fatal(B_FALSE, "dmu_buf_hold(%p, %llu, %llu) = %u",
5123                             os, (long long)object, (long long) offset, error);
5124                 }
5125                 ASSERT(db->db_offset == offset);
5126                 ASSERT(db->db_size == blocksize);
5127                 ASSERT(ztest_pattern_match(db->db_data, db->db_size, pattern) ||
5128                     ztest_pattern_match(db->db_data, db->db_size, 0ULL));
5129                 dmu_buf_will_fill(db, tx);
5130                 ztest_pattern_set(db->db_data, db->db_size, pattern);


5138          * Find out what block we got.
5139          */
5140         VERIFY0(dmu_buf_hold(os, object, 0, FTAG, &db,
5141             DMU_READ_NO_PREFETCH));
5142         blk = *((dmu_buf_impl_t *)db)->db_blkptr;
5143         dmu_buf_rele(db, FTAG);
5144 
5145         /*
5146          * Damage the block.  Dedup-ditto will save us when we read it later.
5147          */
5148         psize = BP_GET_PSIZE(&blk);
5149         abd = abd_alloc_linear(psize, B_TRUE);
5150         ztest_pattern_set(abd_to_buf(abd), psize, ~pattern);
5151 
5152         (void) zio_wait(zio_rewrite(NULL, spa, 0, &blk,
5153             abd, psize, NULL, NULL, ZIO_PRIORITY_SYNC_WRITE,
5154             ZIO_FLAG_CANFAIL | ZIO_FLAG_INDUCE_DAMAGE, NULL));
5155 
5156         abd_free(abd);
5157 
5158         rw_exit(&ztest_name_lock);
5159 }
5160 
5161 /*
5162  * Scrub the pool.
5163  */
5164 /* ARGSUSED */
5165 void
5166 ztest_scrub(ztest_ds_t *zd, uint64_t id)
5167 {
5168         spa_t *spa = ztest_spa;
5169 
5170         (void) spa_scan(spa, POOL_SCAN_SCRUB);
5171         (void) poll(NULL, 0, 100); /* wait a moment, then force a restart */
5172         (void) spa_scan(spa, POOL_SCAN_SCRUB);
5173 }
5174 
5175 /*
5176  * Change the guid for the pool.
5177  */
5178 /* ARGSUSED */
5179 void
5180 ztest_reguid(ztest_ds_t *zd, uint64_t id)
5181 {
5182         spa_t *spa = ztest_spa;
5183         uint64_t orig, load;
5184         int error;
5185 
5186         orig = spa_guid(spa);
5187         load = spa_load_guid(spa);
5188 
5189         rw_enter(&ztest_name_lock, RW_WRITER);
5190         error = spa_change_guid(spa);
5191         rw_exit(&ztest_name_lock);
5192 
5193         if (error != 0)
5194                 return;
5195 
5196         if (ztest_opts.zo_verbose >= 4) {
5197                 (void) printf("Changed guid old %llu -> %llu\n",
5198                     (u_longlong_t)orig, (u_longlong_t)spa_guid(spa));
5199         }
5200 
5201         VERIFY3U(orig, !=, spa_guid(spa));
5202         VERIFY3U(load, ==, spa_load_guid(spa));
5203 }
5204 
5205 /*
5206  * Rename the pool to a different name and then rename it back.
5207  */
5208 /* ARGSUSED */
5209 void
5210 ztest_spa_rename(ztest_ds_t *zd, uint64_t id)
5211 {
5212         char *oldname, *newname;
5213         spa_t *spa;
5214 
5215         rw_enter(&ztest_name_lock, RW_WRITER);
5216 
5217         oldname = ztest_opts.zo_pool;
5218         newname = umem_alloc(strlen(oldname) + 5, UMEM_NOFAIL);
5219         (void) strcpy(newname, oldname);
5220         (void) strcat(newname, "_tmp");
5221 
5222         /*
5223          * Do the rename
5224          */
5225         VERIFY3U(0, ==, spa_rename(oldname, newname));
5226 
5227         /*
5228          * Try to open it under the old name, which shouldn't exist
5229          */
5230         VERIFY3U(ENOENT, ==, spa_open(oldname, &spa, FTAG));
5231 
5232         /*
5233          * Open it under the new name and make sure it's still the same spa_t.
5234          */
5235         VERIFY3U(0, ==, spa_open(newname, &spa, FTAG));
5236 
5237         ASSERT(spa == ztest_spa);
5238         spa_close(spa, FTAG);
5239 
5240         /*
5241          * Rename it back to the original
5242          */
5243         VERIFY3U(0, ==, spa_rename(newname, oldname));
5244 
5245         /*
5246          * Make sure it can still be opened
5247          */
5248         VERIFY3U(0, ==, spa_open(oldname, &spa, FTAG));
5249 
5250         ASSERT(spa == ztest_spa);
5251         spa_close(spa, FTAG);
5252 
5253         umem_free(newname, strlen(newname) + 1);
5254 
5255         rw_exit(&ztest_name_lock);
5256 }
5257 
5258 /*
5259  * Verify pool integrity by running zdb.
5260  */
5261 static void
5262 ztest_run_zdb(char *pool)
5263 {
5264         int status;
5265         char zdb[MAXPATHLEN + MAXNAMELEN + 20];
5266         char zbuf[1024];
5267         char *bin;
5268         char *ztest;
5269         char *isa;
5270         int isalen;
5271         FILE *fp;
5272 
5273         (void) realpath(getexecname(), zdb);
5274 
5275         /* zdb lives in /usr/sbin, while ztest lives in /usr/bin */


5590          * That's because zap_count() returns the open-context value,
5591          * while dmu_objset_space() returns the rootbp fill count.
5592          */
5593         VERIFY3U(0, ==, zap_count(zd->zd_os, ZTEST_DIROBJ, &dirobjs));
5594         dmu_objset_space(zd->zd_os, &scratch, &scratch, &usedobjs, &scratch);
5595         ASSERT3U(dirobjs + 1, ==, usedobjs);
5596 }
5597 
5598 static int
5599 ztest_dataset_open(int d)
5600 {
5601         ztest_ds_t *zd = &ztest_ds[d];
5602         uint64_t committed_seq = ZTEST_GET_SHARED_DS(d)->zd_seq;
5603         objset_t *os;
5604         zilog_t *zilog;
5605         char name[ZFS_MAX_DATASET_NAME_LEN];
5606         int error;
5607 
5608         ztest_dataset_name(name, ztest_opts.zo_pool, d);
5609 
5610         rw_enter(&ztest_name_lock, RW_READER);
5611 
5612         error = ztest_dataset_create(name);
5613         if (error == ENOSPC) {
5614                 rw_exit(&ztest_name_lock);
5615                 ztest_record_enospc(FTAG);
5616                 return (error);
5617         }
5618         ASSERT(error == 0 || error == EEXIST);
5619 
5620         VERIFY0(dmu_objset_own(name, DMU_OST_OTHER, B_FALSE, zd, &os));
5621         rw_exit(&ztest_name_lock);
5622 
5623         ztest_zd_init(zd, ZTEST_GET_SHARED_DS(d), os);
5624 
5625         zilog = zd->zd_zilog;
5626 
5627         if (zilog->zl_header->zh_claim_lr_seq != 0 &&
5628             zilog->zl_header->zh_claim_lr_seq < committed_seq)
5629                 fatal(0, "missing log records: claimed %llu < committed %llu",
5630                     zilog->zl_header->zh_claim_lr_seq, committed_seq);
5631 
5632         ztest_dataset_dirobj_verify(zd);
5633 
5634         zil_replay(os, zd, ztest_replay_vector);
5635 
5636         ztest_dataset_dirobj_verify(zd);
5637 
5638         if (ztest_opts.zo_verbose >= 6)
5639                 (void) printf("%s replay %llu blocks, %llu records, seq %llu\n",
5640                     zd->zd_name,
5641                     (u_longlong_t)zilog->zl_parse_blk_count,


5652         return (0);
5653 }
5654 
5655 static void
5656 ztest_dataset_close(int d)
5657 {
5658         ztest_ds_t *zd = &ztest_ds[d];
5659 
5660         zil_close(zd->zd_zilog);
5661         dmu_objset_disown(zd->zd_os, zd);
5662 
5663         ztest_zd_fini(zd);
5664 }
5665 
5666 /*
5667  * Kick off threads to run tests on all datasets in parallel.
5668  */
5669 static void
5670 ztest_run(ztest_shared_t *zs)
5671 {
5672         pthread_t *tid;
5673         spa_t *spa;
5674         objset_t *os;
5675         pthread_t resume_tid;
5676         int error;
5677 
5678         ztest_exiting = B_FALSE;
5679 
5680         /*
5681          * Initialize parent/child shared state.
5682          */
5683         mutex_init(&ztest_vdev_lock, NULL, USYNC_THREAD, NULL);
5684         rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL);
5685 
5686         zs->zs_thread_start = gethrtime();
5687         zs->zs_thread_stop =
5688             zs->zs_thread_start + ztest_opts.zo_passtime * NANOSEC;
5689         zs->zs_thread_stop = MIN(zs->zs_thread_stop, zs->zs_proc_stop);
5690         zs->zs_thread_kill = zs->zs_thread_stop;
5691         if (ztest_random(100) < ztest_opts.zo_killrate) {
5692                 zs->zs_thread_kill -=
5693                     ztest_random(ztest_opts.zo_passtime * NANOSEC);
5694         }
5695 
5696         mutex_init(&zcl.zcl_callbacks_lock, NULL, USYNC_THREAD, NULL);
5697 
5698         list_create(&zcl.zcl_callbacks, sizeof (ztest_cb_data_t),
5699             offsetof(ztest_cb_data_t, zcd_node));
5700 
5701         /*
5702          * Open our pool.
5703          */
5704         kernel_init(FREAD | FWRITE);
5705         VERIFY0(spa_open(ztest_opts.zo_pool, &spa, FTAG));
5706         spa->spa_debug = B_TRUE;
5707         metaslab_preload_limit = ztest_random(20) + 1;
5708         ztest_spa = spa;
5709 
5710         dmu_objset_stats_t dds;
5711         VERIFY0(dmu_objset_own(ztest_opts.zo_pool,
5712             DMU_OST_ANY, B_TRUE, FTAG, &os));
5713         dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
5714         dmu_objset_fast_stat(os, &dds);
5715         dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
5716         zs->zs_guid = dds.dds_guid;
5717         dmu_objset_disown(os, FTAG);
5718 
5719         spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
5720 
5721         /*
5722          * We don't expect the pool to suspend unless maxfaults == 0,
5723          * in which case ztest_fault_inject() temporarily takes away
5724          * the only valid replica.
5725          */
5726         if (MAXFAULTS() == 0)
5727                 spa->spa_failmode = ZIO_FAILURE_MODE_WAIT;
5728         else
5729                 spa->spa_failmode = ZIO_FAILURE_MODE_PANIC;
5730 
5731         /*
5732          * Create a thread to periodically resume suspended I/O.
5733          */
5734         VERIFY(pthread_create(&resume_tid, NULL, ztest_resume_thread,
5735             spa) == 0);
5736 
5737         /*
5738          * Create a deadman thread to abort() if we hang.
5739          */
5740         VERIFY(pthread_create(&resume_tid, NULL, ztest_deadman_thread,
5741             zs) == 0);
5742 
5743         /*
5744          * Verify that we can safely inquire about about any object,
5745          * whether it's allocated or not.  To make it interesting,
5746          * we probe a 5-wide window around each power of two.
5747          * This hits all edge cases, including zero and the max.
5748          */
5749         for (int t = 0; t < 64; t++) {
5750                 for (int d = -5; d <= 5; d++) {
5751                         error = dmu_object_info(spa->spa_meta_objset,
5752                             (1ULL << t) + d, NULL);
5753                         ASSERT(error == 0 || error == ENOENT ||
5754                             error == EINVAL);
5755                 }
5756         }
5757 
5758         /*
5759          * If we got any ENOSPC errors on the previous run, destroy something.
5760          */
5761         if (zs->zs_enospc_count != 0) {
5762                 int d = ztest_random(ztest_opts.zo_datasets);
5763                 ztest_dataset_destroy(d);
5764         }
5765         zs->zs_enospc_count = 0;
5766 
5767         tid = umem_zalloc(ztest_opts.zo_threads * sizeof (pthread_t),
5768             UMEM_NOFAIL);
5769 
5770         if (ztest_opts.zo_verbose >= 4)
5771                 (void) printf("starting main threads...\n");
5772 
5773         /*
5774          * Kick off all the tests that run in parallel.
5775          */
5776         for (int t = 0; t < ztest_opts.zo_threads; t++) {
5777                 if (t < ztest_opts.zo_datasets &&
5778                     ztest_dataset_open(t) != 0)
5779                         return;
5780                 VERIFY(pthread_create(&tid[t], NULL, ztest_thread,
5781                     (void *)(uintptr_t)t) == 0);
5782         }
5783 
5784         /*
5785          * Wait for all of the tests to complete.  We go in reverse order
5786          * so we don't close datasets while threads are still using them.
5787          */
5788         for (int t = ztest_opts.zo_threads - 1; t >= 0; t--) {
5789                 VERIFY(pthread_join(tid[t], NULL) == 0);
5790                 if (t < ztest_opts.zo_datasets)
5791                         ztest_dataset_close(t);
5792         }
5793 
5794         txg_wait_synced(spa_get_dsl(spa), 0);
5795 
5796         zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
5797         zs->zs_space = metaslab_class_get_space(spa_normal_class(spa));
5798         zfs_dbgmsg_print(FTAG);
5799 
5800         umem_free(tid, ztest_opts.zo_threads * sizeof (pthread_t));
5801 
5802         /* Kill the resume thread */
5803         ztest_exiting = B_TRUE;
5804         VERIFY(pthread_join(resume_tid, NULL) == 0);
5805         ztest_resume(spa);
5806 
5807         /*
5808          * Right before closing the pool, kick off a bunch of async I/O;
5809          * spa_close() should wait for it to complete.
5810          */
5811         for (uint64_t object = 1; object < 50; object++) {
5812                 dmu_prefetch(spa->spa_meta_objset, object, 0, 0, 1ULL << 20,
5813                     ZIO_PRIORITY_SYNC_READ);
5814         }
5815 
5816         spa_close(spa, FTAG);
5817 
5818         /*
5819          * Verify that we can loop over all pools.
5820          */
5821         mutex_enter(&spa_namespace_lock);
5822         for (spa = spa_next(NULL); spa != NULL; spa = spa_next(spa))
5823                 if (ztest_opts.zo_verbose > 3)
5824                         (void) printf("spa_next: found %s\n", spa_name(spa));
5825         mutex_exit(&spa_namespace_lock);
5826 
5827         /*
5828          * Verify that we can export the pool and reimport it under a
5829          * different name.
5830          */
5831         if (ztest_random(2) == 0) {
5832                 char name[ZFS_MAX_DATASET_NAME_LEN];
5833                 (void) snprintf(name, sizeof (name), "%s_import",
5834                     ztest_opts.zo_pool);
5835                 ztest_spa_import_export(ztest_opts.zo_pool, name);
5836                 ztest_spa_import_export(name, ztest_opts.zo_pool);
5837         }
5838 
5839         kernel_fini();
5840 
5841         list_destroy(&zcl.zcl_callbacks);
5842 
5843         mutex_destroy(&zcl.zcl_callbacks_lock);
5844 
5845         rw_destroy(&ztest_name_lock);
5846         mutex_destroy(&ztest_vdev_lock);
5847 }
5848 
5849 static void
5850 ztest_freeze(void)
5851 {
5852         ztest_ds_t *zd = &ztest_ds[0];
5853         spa_t *spa;
5854         int numloops = 0;
5855 
5856         if (ztest_opts.zo_verbose >= 3)
5857                 (void) printf("testing spa_freeze()...\n");
5858 
5859         kernel_init(FREAD | FWRITE);
5860         VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
5861         VERIFY3U(0, ==, ztest_dataset_open(0));
5862         spa->spa_debug = B_TRUE;
5863         ztest_spa = spa;
5864 
5865         /*
5866          * Force the first log block to be transactionally allocated.


5970         nvlist_t *props;
5971 
5972         VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
5973         if (ztest_random(2) == 0)
5974                 return (props);
5975         VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0);
5976 
5977         return (props);
5978 }
5979 
5980 /*
5981  * Create a storage pool with the given name and initial vdev size.
5982  * Then test spa_freeze() functionality.
5983  */
5984 static void
5985 ztest_init(ztest_shared_t *zs)
5986 {
5987         spa_t *spa;
5988         nvlist_t *nvroot, *props;
5989 
5990         mutex_init(&ztest_vdev_lock, NULL, USYNC_THREAD, NULL);
5991         rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL);
5992 
5993         kernel_init(FREAD | FWRITE);
5994 
5995         /*
5996          * Create the storage pool.
5997          */
5998         (void) spa_destroy(ztest_opts.zo_pool);
5999         ztest_shared->zs_vdev_next_leaf = 0;
6000         zs->zs_splits = 0;
6001         zs->zs_mirrors = ztest_opts.zo_mirrors;
6002         nvroot = make_vdev_root(NULL, NULL, NULL, ztest_opts.zo_vdev_size, 0,
6003             0, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
6004         props = make_random_props();
6005         for (int i = 0; i < SPA_FEATURES; i++) {
6006                 char buf[1024];
6007                 (void) snprintf(buf, sizeof (buf), "feature@%s",
6008                     spa_feature_table[i].fi_uname);
6009                 VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0));
6010         }
6011         VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props, NULL));
6012         nvlist_free(nvroot);
6013         nvlist_free(props);
6014 
6015         VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
6016         zs->zs_metaslab_sz =
6017             1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
6018 
6019         spa_close(spa, FTAG);
6020 
6021         kernel_fini();
6022 
6023         ztest_run_zdb(ztest_opts.zo_pool);
6024 
6025         ztest_freeze();
6026 
6027         ztest_run_zdb(ztest_opts.zo_pool);
6028 
6029         rw_destroy(&ztest_name_lock);
6030         mutex_destroy(&ztest_vdev_lock);
6031 }
6032 
6033 static void
6034 setup_data_fd(void)
6035 {
6036         static char ztest_name_data[] = "/tmp/ztest.data.XXXXXX";
6037 
6038         ztest_fd_data = mkstemp(ztest_name_data);
6039         ASSERT3S(ztest_fd_data, >=, 0);
6040         (void) unlink(ztest_name_data);
6041 }
6042 
6043 
6044 static int
6045 shared_data_size(ztest_shared_hdr_t *hdr)
6046 {
6047         int size;
6048 
6049         size = hdr->zh_hdr_size;
6050         size += hdr->zh_opts_size;