Print this page
3949 ztest fault injection should avoid resilvering devices
3950 ztest: deadman fires when we're doing a scan
3951 ztest hang when running dedup test
3952 ztest: ztest_reguid test and ztest_fault_inject don't place nice together
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/cmd/ztest/ztest.c
          +++ new/usr/src/cmd/ztest/ztest.c
↓ open down ↓ 176 lines elided ↑ open up ↑
 177  177          .zo_killrate = 70,              /* 70% kill rate */
 178  178          .zo_verbose = 0,
 179  179          .zo_init = 1,
 180  180          .zo_time = 300,                 /* 5 minutes */
 181  181          .zo_maxloops = 50,              /* max loops during spa_freeze() */
 182  182          .zo_metaslab_gang_bang = 32 << 10
 183  183  };
 184  184  
 185  185  extern uint64_t metaslab_gang_bang;
 186  186  extern uint64_t metaslab_df_alloc_threshold;
      187 +extern uint64_t zfs_deadman_synctime;
 187  188  
 188  189  static ztest_shared_opts_t *ztest_shared_opts;
 189  190  static ztest_shared_opts_t ztest_opts;
 190  191  
 191  192  typedef struct ztest_shared_ds {
 192  193          uint64_t        zd_seq;
 193  194  } ztest_shared_ds_t;
 194  195  
 195  196  static ztest_shared_ds_t *ztest_shared_ds;
 196  197  #define ZTEST_GET_SHARED_DS(d) (&ztest_shared_ds[d])
↓ open down ↓ 159 lines elided ↑ open up ↑
 356  357          { ztest_spa_prop_get_set,               1,      &zopt_sometimes },
 357  358  #if 0
 358  359          { ztest_dmu_prealloc,                   1,      &zopt_sometimes },
 359  360  #endif
 360  361          { ztest_fzap,                           1,      &zopt_sometimes },
 361  362          { ztest_dmu_snapshot_create_destroy,    1,      &zopt_sometimes },
 362  363          { ztest_spa_create_destroy,             1,      &zopt_sometimes },
 363  364          { ztest_fault_inject,                   1,      &zopt_sometimes },
 364  365          { ztest_ddt_repair,                     1,      &zopt_sometimes },
 365  366          { ztest_dmu_snapshot_hold,              1,      &zopt_sometimes },
 366      -        { ztest_reguid,                         1,      &zopt_sometimes },
      367 +        { ztest_reguid,                         1,      &zopt_rarely    },
 367  368          { ztest_spa_rename,                     1,      &zopt_rarely    },
 368  369          { ztest_scrub,                          1,      &zopt_rarely    },
 369  370          { ztest_spa_upgrade,                    1,      &zopt_rarely    },
 370  371          { ztest_dsl_dataset_promote_busy,       1,      &zopt_rarely    },
 371  372          { ztest_vdev_attach_detach,             1,      &zopt_sometimes },
 372  373          { ztest_vdev_LUN_growth,                1,      &zopt_rarely    },
 373  374          { ztest_vdev_add_remove,                1,
 374  375              &ztest_opts.zo_vdevtime                             },
 375  376          { ztest_vdev_aux_add_remove,            1,
 376  377              &ztest_opts.zo_vdevtime                             },
↓ open down ↓ 4370 lines elided ↑ open up ↑
4747 4748  
4748 4749          VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
4749 4750          maxfaults = MAXFAULTS();
4750 4751          leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
4751 4752          mirror_save = zs->zs_mirrors;
4752 4753          VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
4753 4754  
4754 4755          ASSERT(leaves >= 1);
4755 4756  
4756 4757          /*
     4758 +         * Grab the name lock as reader. There are some operations
     4759 +         * which don't like to have their vdevs changed while
     4760 +         * they are in progress (i.e. spa_change_guid). Those
     4761 +         * operations will have grabbed the name lock as writer.
     4762 +         */
     4763 +        (void) rw_rdlock(&ztest_name_lock);
     4764 +
     4765 +        /*
4757 4766           * We need SCL_STATE here because we're going to look at vd0->vdev_tsd.
4758 4767           */
4759 4768          spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
4760 4769  
4761 4770          if (ztest_random(2) == 0) {
4762 4771                  /*
4763 4772                   * Inject errors on a normal data device or slog device.
4764 4773                   */
4765 4774                  top = ztest_random_vdev_top(spa, B_TRUE);
4766 4775                  leaf = ztest_random(leaves) + zs->zs_splits;
↓ open down ↓ 8 lines elided ↑ open up ↑
4775 4784                      ztest_opts.zo_dir, ztest_opts.zo_pool,
4776 4785                      top * leaves + zs->zs_splits);
4777 4786                  (void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template,
4778 4787                      ztest_opts.zo_dir, ztest_opts.zo_pool,
4779 4788                      top * leaves + leaf);
4780 4789  
4781 4790                  vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
4782 4791                  if (vd0 != NULL && vd0->vdev_top->vdev_islog)
4783 4792                          islog = B_TRUE;
4784 4793  
4785      -                if (vd0 != NULL && maxfaults != 1) {
     4794 +                /*
     4795 +                 * If the top-level vdev needs to be resilvered
     4796 +                 * then we only allow faults on the device that is
     4797 +                 * resilvering.
     4798 +                 */
     4799 +                if (vd0 != NULL && maxfaults != 1 &&
     4800 +                    (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) ||
     4801 +                    vd0->vdev_resilvering)) {
4786 4802                          /*
4787 4803                           * Make vd0 explicitly claim to be unreadable,
4788 4804                           * or unwriteable, or reach behind its back
4789 4805                           * and close the underlying fd.  We can do this if
4790 4806                           * maxfaults == 0 because we'll fail and reexecute,
4791 4807                           * and we can do it if maxfaults >= 2 because we'll
4792 4808                           * have enough redundancy.  If maxfaults == 1, the
4793 4809                           * combination of this with injection of random data
4794 4810                           * corruption below exceeds the pool's fault tolerance.
4795 4811                           */
↓ open down ↓ 10 lines elided ↑ open up ↑
4806 4822                          guid0 = vd0->vdev_guid;
4807 4823                  }
4808 4824          } else {
4809 4825                  /*
4810 4826                   * Inject errors on an l2cache device.
4811 4827                   */
4812 4828                  spa_aux_vdev_t *sav = &spa->spa_l2cache;
4813 4829  
4814 4830                  if (sav->sav_count == 0) {
4815 4831                          spa_config_exit(spa, SCL_STATE, FTAG);
     4832 +                        (void) rw_unlock(&ztest_name_lock);
4816 4833                          return;
4817 4834                  }
4818 4835                  vd0 = sav->sav_vdevs[ztest_random(sav->sav_count)];
4819 4836                  guid0 = vd0->vdev_guid;
4820 4837                  (void) strcpy(path0, vd0->vdev_path);
4821 4838                  (void) strcpy(pathrand, vd0->vdev_path);
4822 4839  
4823 4840                  leaf = 0;
4824 4841                  leaves = 1;
4825 4842                  maxfaults = INT_MAX;    /* no limit on cache devices */
4826 4843          }
4827 4844  
4828 4845          spa_config_exit(spa, SCL_STATE, FTAG);
     4846 +        (void) rw_unlock(&ztest_name_lock);
4829 4847  
4830 4848          /*
4831 4849           * If we can tolerate two or more faults, or we're dealing
4832 4850           * with a slog, randomly online/offline vd0.
4833 4851           */
4834 4852          if ((maxfaults >= 2 || islog) && guid0 != 0) {
4835 4853                  if (ztest_random(10) < 6) {
4836 4854                          int flags = (ztest_random(2) == 0 ?
4837 4855                              ZFS_OFFLINE_TEMPORARY : 0);
4838 4856  
↓ open down ↓ 444 lines elided ↑ open up ↑
5283 5301                          ztest_resume(spa);
5284 5302                  (void) poll(NULL, 0, 100);
5285 5303          }
5286 5304          return (NULL);
5287 5305  }
5288 5306  
5289 5307  static void *
5290 5308  ztest_deadman_thread(void *arg)
5291 5309  {
5292 5310          ztest_shared_t *zs = arg;
5293      -        int grace = 300;
5294      -        hrtime_t delta;
     5311 +        spa_t *spa = ztest_spa;
     5312 +        hrtime_t delta, total = 0;
5295 5313  
5296      -        delta = (zs->zs_thread_stop - zs->zs_thread_start) / NANOSEC + grace;
     5314 +        for (;;) {
     5315 +                delta = (zs->zs_thread_stop - zs->zs_thread_start) /
     5316 +                    NANOSEC + zfs_deadman_synctime;
5297 5317  
5298      -        (void) poll(NULL, 0, (int)(1000 * delta));
     5318 +                (void) poll(NULL, 0, (int)(1000 * delta));
5299 5319  
5300      -        fatal(0, "failed to complete within %d seconds of deadline", grace);
     5320 +                /*
     5321 +                 * If the pool is suspended then fail immediately. Otherwise,
     5322 +                 * check to see if the pool is making any progress. If
     5323 +                 * vdev_deadman() discovers that there hasn't been any recent
     5324 +                 * I/Os then it will end up aborting the tests.
     5325 +                 */
     5326 +                if (spa_suspended(spa)) {
     5327 +                        fatal(0, "aborting test after %llu seconds because "
     5328 +                            "pool has transitioned to a suspended state.",
     5329 +                            zfs_deadman_synctime);
     5330 +                        return (NULL);
     5331 +                }
     5332 +                vdev_deadman(spa->spa_root_vdev);
5301 5333  
5302      -        return (NULL);
     5334 +                total += zfs_deadman_synctime;
     5335 +                (void) printf("ztest has been running for %lld seconds\n",
     5336 +                    total);
     5337 +        }
5303 5338  }
5304 5339  
5305 5340  static void
5306 5341  ztest_execute(int test, ztest_info_t *zi, uint64_t id)
5307 5342  {
5308 5343          ztest_ds_t *zd = &ztest_ds[id % ztest_opts.zo_datasets];
5309 5344          ztest_shared_callstate_t *zc = ZTEST_GET_SHARED_CALLSTATE(test);
5310 5345          hrtime_t functime = gethrtime();
5311 5346  
5312 5347          for (int i = 0; i < zi->zi_iters; i++)
↓ open down ↓ 704 lines elided ↑ open up ↑
6017 6052          char timebuf[100];
6018 6053          char numbuf[6];
6019 6054          spa_t *spa;
6020 6055          char *cmd;
6021 6056          boolean_t hasalt;
6022 6057          char *fd_data_str = getenv("ZTEST_FD_DATA");
6023 6058  
6024 6059          (void) setvbuf(stdout, NULL, _IOLBF, 0);
6025 6060  
6026 6061          dprintf_setup(&argc, argv);
     6062 +        zfs_deadman_synctime = 300;
6027 6063  
6028 6064          ztest_fd_rand = open("/dev/urandom", O_RDONLY);
6029 6065          ASSERT3S(ztest_fd_rand, >=, 0);
6030 6066  
6031 6067          if (!fd_data_str) {
6032 6068                  process_options(argc, argv);
6033 6069  
6034 6070                  setup_data_fd();
6035 6071                  setup_hdr();
6036 6072                  setup_data();
↓ open down ↓ 186 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX