Print this page
3949 ztest fault injection should avoid resilvering devices
3950 ztest: deadman fires when we're doing a scan
3951 ztest hang when running dedup test
3952 ztest: ztest_reguid test and ztest_fault_inject don't place nice together
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
@@ -182,10 +182,11 @@
.zo_metaslab_gang_bang = 32 << 10
};
extern uint64_t metaslab_gang_bang;
extern uint64_t metaslab_df_alloc_threshold;
+extern uint64_t zfs_deadman_synctime;
static ztest_shared_opts_t *ztest_shared_opts;
static ztest_shared_opts_t ztest_opts;
typedef struct ztest_shared_ds {
@@ -361,11 +362,11 @@
{ ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes },
{ ztest_spa_create_destroy, 1, &zopt_sometimes },
{ ztest_fault_inject, 1, &zopt_sometimes },
{ ztest_ddt_repair, 1, &zopt_sometimes },
{ ztest_dmu_snapshot_hold, 1, &zopt_sometimes },
- { ztest_reguid, 1, &zopt_sometimes },
+ { ztest_reguid, 1, &zopt_rarely },
{ ztest_spa_rename, 1, &zopt_rarely },
{ ztest_scrub, 1, &zopt_rarely },
{ ztest_spa_upgrade, 1, &zopt_rarely },
{ ztest_dsl_dataset_promote_busy, 1, &zopt_rarely },
{ ztest_vdev_attach_detach, 1, &zopt_sometimes },
@@ -4752,10 +4753,18 @@
VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
ASSERT(leaves >= 1);
/*
+ * Grab the name lock as reader. There are some operations
+ * which don't like to have their vdevs changed while
+ * they are in progress (i.e. spa_change_guid). Those
+ * operations will have grabbed the name lock as writer.
+ */
+ (void) rw_rdlock(&ztest_name_lock);
+
+ /*
* We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
*/
spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
if (ztest_random(2) == 0) {
@@ -4780,12 +4789,19 @@
vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
if (vd0 != NULL && vd0->vdev_top->vdev_islog)
islog = B_TRUE;
- if (vd0 != NULL && maxfaults != 1) {
/*
+ * If the top-level vdev needs to be resilvered
+ * then we only allow faults on the device that is
+ * resilvering.
+ */
+ if (vd0 != NULL && maxfaults != 1 &&
+ (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) ||
+ vd0->vdev_resilvering)) {
+ /*
* Make vd0 explicitly claim to be unreadable,
* or unwriteable, or reach behind its back
* and close the underlying fd. We can do this if
* maxfaults == 0 because we'll fail and reexecute,
* and we can do it if maxfaults >= 2 because we'll
@@ -4811,10 +4827,11 @@
*/
spa_aux_vdev_t *sav = &spa->spa_l2cache;
if (sav->sav_count == 0) {
spa_config_exit(spa, SCL_STATE, FTAG);
+ (void) rw_unlock(&ztest_name_lock);
return;
}
vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)];
guid0 = vd0->vdev_guid;
(void) strcpy(path0, vd0->vdev_path);
@@ -4824,10 +4841,11 @@
leaves = 1;
maxfaults = INT_MAX; /* no limit on cache devices */
}
spa_config_exit(spa, SCL_STATE, FTAG);
+ (void) rw_unlock(&ztest_name_lock);
/*
* If we can tolerate two or more faults, or we're dealing
* with a slog, randomly online/offline vd0.
*/
@@ -5288,20 +5306,37 @@
static void *
ztest_deadman_thread(void *arg)
{
ztest_shared_t *zs = arg;
- int grace = 300;
- hrtime_t delta;
+ spa_t *spa = ztest_spa;
+ hrtime_t delta, total = 0;
- delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace;
+ for (;;) {
+ delta = (zs->zs_thread_stop - zs->zs_thread_start) /
+ NANOSEC + zfs_deadman_synctime;
(void) poll(NULL, 0, (int)(1000 * delta));
- fatal(0, "failed to complete within %d seconds of deadline", grace);
-
+ /*
+ * If the pool is suspended then fail immediately. Otherwise,
+ * check to see if the pool is making any progress. If
+ * vdev_deadman() discovers that there hasn't been any recent
+ * I/Os then it will end up aborting the tests.
+ */
+ if (spa_suspended(spa)) {
+ fatal(0, "aborting test after %llu seconds because "
+ "pool has transitioned to a suspended state.",
+ zfs_deadman_synctime);
return (NULL);
+ }
+ vdev_deadman(spa->spa_root_vdev);
+
+ total += zfs_deadman_synctime;
+ (void) printf("ztest has been running for %lld seconds\n",
+ total);
+ }
}
static void
ztest_execute(int test, ztest_info_t *zi, uint64_t id)
{
@@ -6022,10 +6057,11 @@
char *fd_data_str = getenv("ZTEST_FD_DATA");
(void) setvbuf(stdout, NULL, _IOLBF, 0);
dprintf_setup(&argc, argv);
+ zfs_deadman_synctime = 300;
ztest_fd_rand = open("/dev/urandom", O_RDONLY);
ASSERT3S(ztest_fd_rand, >=, 0);
if (!fd_data_str) {