dlpx-os-diff Cdiff usr/src/cmd/ztest/ztest.c

Print this page

3949 ztest fault injection should avoid resilvering devices
3950 ztest: deadman fires when we're doing a scan
3951 ztest hang when running dedup test
3952 ztest: ztest_reguid test and ztest_fault_inject don't place nice together
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>


*** 182,191 ****
--- 182,192 ----
          .zo_metaslab_gang_bang = 32 << 10
  };
  
  extern uint64_t metaslab_gang_bang;
  extern uint64_t metaslab_df_alloc_threshold;
+ extern uint64_t zfs_deadman_synctime;
  
  static ztest_shared_opts_t *ztest_shared_opts;
  static ztest_shared_opts_t ztest_opts;
  
  typedef struct ztest_shared_ds {
*** 361,371 ****
          { ztest_dmu_snapshot_create_destroy,    1,      &zopt_sometimes },
          { ztest_spa_create_destroy,             1,      &zopt_sometimes },
          { ztest_fault_inject,                   1,      &zopt_sometimes },
          { ztest_ddt_repair,                     1,      &zopt_sometimes },
          { ztest_dmu_snapshot_hold,              1,      &zopt_sometimes },
!         { ztest_reguid,                         1,      &zopt_sometimes },
          { ztest_spa_rename,                     1,      &zopt_rarely    },
          { ztest_scrub,                          1,      &zopt_rarely    },
          { ztest_spa_upgrade,                    1,      &zopt_rarely    },
          { ztest_dsl_dataset_promote_busy,       1,      &zopt_rarely    },
          { ztest_vdev_attach_detach,             1,      &zopt_sometimes },
--- 362,372 ----
          { ztest_dmu_snapshot_create_destroy,    1,      &zopt_sometimes },
          { ztest_spa_create_destroy,             1,      &zopt_sometimes },
          { ztest_fault_inject,                   1,      &zopt_sometimes },
          { ztest_ddt_repair,                     1,      &zopt_sometimes },
          { ztest_dmu_snapshot_hold,              1,      &zopt_sometimes },
!         { ztest_reguid,                         1,      &zopt_rarely    },
          { ztest_spa_rename,                     1,      &zopt_rarely    },
          { ztest_scrub,                          1,      &zopt_rarely    },
          { ztest_spa_upgrade,                    1,      &zopt_rarely    },
          { ztest_dsl_dataset_promote_busy,       1,      &zopt_rarely    },
          { ztest_vdev_attach_detach,             1,      &zopt_sometimes },
*** 4752,4761 ****
--- 4753,4770 ----
          VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
  
          ASSERT(leaves >= 1);
  
          /*
+          * Grab the name lock as reader. There are some operations
+          * which don't like to have their vdevs changed while
+          * they are in progress (i.e. spa_change_guid). Those
+          * operations will have grabbed the name lock as writer.
+          */
+         (void) rw_rdlock(&ztest_name_lock);
+ 
+         /*
           * We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
           */
          spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
  
          if (ztest_random(2) == 0) {
*** 4780,4791 ****
  
                  vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
                  if (vd0 != NULL && vd0->vdev_top->vdev_islog)
                          islog = B_TRUE;
  
-                 if (vd0 != NULL && maxfaults != 1) {
                          /*
                           * Make vd0 explicitly claim to be unreadable,
                           * or unwriteable, or reach behind its back
                           * and close the underlying fd.  We can do this if
                           * maxfaults == 0 because we'll fail and reexecute,
                           * and we can do it if maxfaults >= 2 because we'll
--- 4789,4807 ----
  
                  vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
                  if (vd0 != NULL && vd0->vdev_top->vdev_islog)
                          islog = B_TRUE;
  
                  /*
+                  * If the top-level vdev needs to be resilvered
+                  * then we only allow faults on the device that is
+                  * resilvering.
+                  */
+                 if (vd0 != NULL && maxfaults != 1 &&
+                     (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) ||
+                     vd0->vdev_resilvering)) {
+                         /*
                           * Make vd0 explicitly claim to be unreadable,
                           * or unwriteable, or reach behind its back
                           * and close the underlying fd.  We can do this if
                           * maxfaults == 0 because we'll fail and reexecute,
                           * and we can do it if maxfaults >= 2 because we'll
*** 4811,4820 ****
--- 4827,4837 ----
                   */
                  spa_aux_vdev_t *sav = &spa->spa_l2cache;
  
                  if (sav->sav_count == 0) {
                          spa_config_exit(spa, SCL_STATE, FTAG);
+                         (void) rw_unlock(&ztest_name_lock);
                          return;
                  }
                  vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)];
                  guid0 = vd0->vdev_guid;
                  (void) strcpy(path0, vd0->vdev_path);
*** 4824,4833 ****
--- 4841,4851 ----
                  leaves = 1;
                  maxfaults = INT_MAX;    /* no limit on cache devices */
          }
  
          spa_config_exit(spa, SCL_STATE, FTAG);
+         (void) rw_unlock(&ztest_name_lock);
  
          /*
           * If we can tolerate two or more faults, or we're dealing
           * with a slog, randomly online/offline vd0.
           */
*** 5288,5307 ****
  
  static void *
  ztest_deadman_thread(void *arg)
  {
          ztest_shared_t *zs = arg;
!         int grace = 300;
!         hrtime_t delta;
  
!         delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace;
  
          (void) poll(NULL, 0, (int)(1000 * delta));
  
!         fatal(0, "failed to complete within %d seconds of deadline", grace);
! 
          return (NULL);
  }
  
  static void
  ztest_execute(int test, ztest_info_t *zi, uint64_t id)
  {
--- 5306,5342 ----
  
  static void *
  ztest_deadman_thread(void *arg)
  {
          ztest_shared_t *zs = arg;
!         spa_t *spa = ztest_spa;
!         hrtime_t delta, total = 0;
  
!         for (;;) {
!                 delta = (zs->zs_thread_stop - zs->zs_thread_start) /
!                     NANOSEC + zfs_deadman_synctime;
  
                  (void) poll(NULL, 0, (int)(1000 * delta));
  
!                 /*
!                  * If the pool is suspended then fail immediately. Otherwise,
!                  * check to see if the pool is making any progress. If
!                  * vdev_deadman() discovers that there hasn't been any recent
!                  * I/Os then it will end up aborting the tests.
!                  */
!                 if (spa_suspended(spa)) {
!                         fatal(0, "aborting test after %llu seconds because "
!                             "pool has transitioned to a suspended state.",
!                             zfs_deadman_synctime);
                          return (NULL);
+                 }
+                 vdev_deadman(spa->spa_root_vdev);
+ 
+                 total += zfs_deadman_synctime;
+                 (void) printf("ztest has been running for %lld seconds\n",
+                     total);
+         }
  }
  
  static void
  ztest_execute(int test, ztest_info_t *zi, uint64_t id)
  {
*** 6022,6031 ****
--- 6057,6067 ----
          char *fd_data_str = getenv("ZTEST_FD_DATA");
  
          (void) setvbuf(stdout, NULL, _IOLBF, 0);
  
          dprintf_setup(&argc, argv);
+         zfs_deadman_synctime = 300;
  
          ztest_fd_rand = open("/dev/urandom", O_RDONLY);
          ASSERT3S(ztest_fd_rand, >=, 0);
  
          if (!fd_data_str) {