Print this page
3949 ztest fault injection should avoid resilvering devices
3950 ztest: deadman fires when we're doing a scan
3951 ztest hang when running dedup test
3952 ztest: ztest_reguid test and ztest_fault_inject don't place nice together
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>

*** 182,191 **** --- 182,192 ---- .zo_metaslab_gang_bang = 32 << 10 }; extern uint64_t metaslab_gang_bang; extern uint64_t metaslab_df_alloc_threshold; + extern uint64_t zfs_deadman_synctime; static ztest_shared_opts_t *ztest_shared_opts; static ztest_shared_opts_t ztest_opts; typedef struct ztest_shared_ds {
*** 361,371 **** { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes }, { ztest_spa_create_destroy, 1, &zopt_sometimes }, { ztest_fault_inject, 1, &zopt_sometimes }, { ztest_ddt_repair, 1, &zopt_sometimes }, { ztest_dmu_snapshot_hold, 1, &zopt_sometimes }, ! { ztest_reguid, 1, &zopt_sometimes }, { ztest_spa_rename, 1, &zopt_rarely }, { ztest_scrub, 1, &zopt_rarely }, { ztest_spa_upgrade, 1, &zopt_rarely }, { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely }, { ztest_vdev_attach_detach, 1, &zopt_sometimes }, --- 362,372 ---- { ztest_dmu_snapshot_create_destroy, 1, &zopt_sometimes }, { ztest_spa_create_destroy, 1, &zopt_sometimes }, { ztest_fault_inject, 1, &zopt_sometimes }, { ztest_ddt_repair, 1, &zopt_sometimes }, { ztest_dmu_snapshot_hold, 1, &zopt_sometimes }, ! { ztest_reguid, 1, &zopt_rarely }, { ztest_spa_rename, 1, &zopt_rarely }, { ztest_scrub, 1, &zopt_rarely }, { ztest_spa_upgrade, 1, &zopt_rarely }, { ztest_dsl_dataset_promote_busy, 1, &zopt_rarely }, { ztest_vdev_attach_detach, 1, &zopt_sometimes },
*** 4752,4761 **** --- 4753,4770 ---- VERIFY(mutex_unlock(&ztest_vdev_lock) == 0); ASSERT(leaves >= 1); /* + * Grab the name lock as reader. There are some operations + * which don't like to have their vdevs changed while + * they are in progress (i.e. spa_change_guid). Those + * operations will have grabbed the name lock as writer. + */ + (void) rw_rdlock(&ztest_name_lock); + + /* * We need SCL_STATE here because we're going to look at vd0->vdev_tsd. */ spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); if (ztest_random(2) == 0) {
*** 4780,4791 **** vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0); if (vd0 != NULL && vd0->vdev_top->vdev_islog) islog = B_TRUE; - if (vd0 != NULL && maxfaults != 1) { /* * Make vd0 explicitly claim to be unreadable, * or unwriteable, or reach behind its back * and close the underlying fd. We can do this if * maxfaults == 0 because we'll fail and reexecute, * and we can do it if maxfaults >= 2 because we'll --- 4789,4807 ---- vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0); if (vd0 != NULL && vd0->vdev_top->vdev_islog) islog = B_TRUE; /* + * If the top-level vdev needs to be resilvered + * then we only allow faults on the device that is + * resilvering. + */ + if (vd0 != NULL && maxfaults != 1 && + (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) || + vd0->vdev_resilvering)) { + /* * Make vd0 explicitly claim to be unreadable, * or unwriteable, or reach behind its back * and close the underlying fd. We can do this if * maxfaults == 0 because we'll fail and reexecute, * and we can do it if maxfaults >= 2 because we'll
*** 4811,4820 **** --- 4827,4837 ---- */ spa_aux_vdev_t *sav = &spa->spa_l2cache; if (sav->sav_count == 0) { spa_config_exit(spa, SCL_STATE, FTAG); + (void) rw_unlock(&ztest_name_lock); return; } vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)]; guid0 = vd0->vdev_guid; (void) strcpy(path0, vd0->vdev_path);
*** 4824,4833 **** --- 4841,4851 ---- leaves = 1; maxfaults = INT_MAX; /* no limit on cache devices */ } spa_config_exit(spa, SCL_STATE, FTAG); + (void) rw_unlock(&ztest_name_lock); /* * If we can tolerate two or more faults, or we're dealing * with a slog, randomly online/offline vd0. */
*** 5288,5307 **** static void * ztest_deadman_thread(void *arg) { ztest_shared_t *zs = arg; ! int grace = 300; ! hrtime_t delta; ! delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace; (void) poll(NULL, 0, (int)(1000 * delta)); ! fatal(0, "failed to complete within %d seconds of deadline", grace); ! return (NULL); } static void ztest_execute(int test, ztest_info_t *zi, uint64_t id) { --- 5306,5342 ---- static void * ztest_deadman_thread(void *arg) { ztest_shared_t *zs = arg; ! spa_t *spa = ztest_spa; ! hrtime_t delta, total = 0; ! for (;;) { ! delta = (zs->zs_thread_stop - zs->zs_thread_start) / ! NANOSEC + zfs_deadman_synctime; (void) poll(NULL, 0, (int)(1000 * delta)); ! /* ! * If the pool is suspended then fail immediately. Otherwise, ! * check to see if the pool is making any progress. If ! * vdev_deadman() discovers that there hasn't been any recent ! * I/Os then it will end up aborting the tests. ! */ ! if (spa_suspended(spa)) { ! fatal(0, "aborting test after %llu seconds because " ! "pool has transitioned to a suspended state.", ! zfs_deadman_synctime); return (NULL); + } + vdev_deadman(spa->spa_root_vdev); + + total += zfs_deadman_synctime; + (void) printf("ztest has been running for %lld seconds\n", + total); + } } static void ztest_execute(int test, ztest_info_t *zi, uint64_t id) {
*** 6022,6031 **** --- 6057,6067 ---- char *fd_data_str = getenv("ZTEST_FD_DATA"); (void) setvbuf(stdout, NULL, _IOLBF, 0); dprintf_setup(&argc, argv); + zfs_deadman_synctime = 300; ztest_fd_rand = open("/dev/urandom", O_RDONLY); ASSERT3S(ztest_fd_rand, >=, 0); if (!fd_data_str) {