3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012 by Delphix. All rights reserved.
24 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2013 Steven Hartland. All rights reserved.
26 */
27
28 /*
29 * The objective of this program is to provide a DMU/ZAP/SPA stress test
30 * that runs entirely in userland, is easy to use, and easy to extend.
31 *
32 * The overall design of the ztest program is as follows:
33 *
34 * (1) For each major functional area (e.g. adding vdevs to a pool,
35 * creating and destroying datasets, reading and writing objects, etc)
36 * we have a simple routine to test that functionality. These
37 * individual routines do not have to do anything "stressful".
38 *
39 * (2) We turn these simple functionality tests into a stress test by
40 * running them all in parallel, with as many threads as desired,
41 * and spread across as many datasets, objects, and vdevs as desired.
42 *
43 * (3) While all this is happening, we inject faults into the pool to
750 if (0 != access(zo->zo_alt_ztest, X_OK)) {
751 ztest_dump_core = B_FALSE;
752 fatal(B_TRUE, "invalid alternate ztest: %s",
753 zo->zo_alt_ztest);
754 } else if (0 != access(zo->zo_alt_libpath, X_OK)) {
755 ztest_dump_core = B_FALSE;
756 fatal(B_TRUE, "invalid alternate lib directory %s",
757 zo->zo_alt_libpath);
758 }
759
760 umem_free(cmd, MAXPATHLEN);
761 umem_free(realaltdir, MAXPATHLEN);
762 }
763 }
764
765 static void
766 ztest_kill(ztest_shared_t *zs)
767 {
768 zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa));
769 zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa));
770 (void) kill(getpid(), SIGKILL);
771 }
772
773 static uint64_t
774 ztest_random(uint64_t range)
775 {
776 uint64_t r;
777
778 ASSERT3S(ztest_fd_rand, >=, 0);
779
780 if (range == 0)
781 return (0);
782
783 if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
784 fatal(1, "short read from /dev/urandom");
785
786 return (r % range);
787 }
788
789 /* ARGSUSED */
2714
2715 }
2716
2717 /*
2718 * Verify that we can attach and detach devices.
2719 */
2720 /* ARGSUSED */
2721 void
2722 ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
2723 {
2724 ztest_shared_t *zs = ztest_shared;
2725 spa_t *spa = ztest_spa;
2726 spa_aux_vdev_t *sav = &spa->spa_spares;
2727 vdev_t *rvd = spa->spa_root_vdev;
2728 vdev_t *oldvd, *newvd, *pvd;
2729 nvlist_t *root;
2730 uint64_t leaves;
2731 uint64_t leaf, top;
2732 uint64_t ashift = ztest_get_ashift();
2733 uint64_t oldguid, pguid;
2734 size_t oldsize, newsize;
2735 char oldpath[MAXPATHLEN], newpath[MAXPATHLEN];
2736 int replacing;
2737 int oldvd_has_siblings = B_FALSE;
2738 int newvd_is_spare = B_FALSE;
2739 int oldvd_is_log;
2740 int error, expected_error;
2741
2742 VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
2743 leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
2744
2745 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
2746
2747 /*
2748 * Decide whether to do an attach or a replace.
2749 */
2750 replacing = ztest_random(2);
2751
2752 /*
2753 * Pick a random top-level vdev.
2754 */
2873
2874 /*
2875 * If our parent was the replacing vdev, but the replace completed,
2876 * then instead of failing with ENOTSUP we may either succeed,
2877 * fail with ENODEV, or fail with EOVERFLOW.
2878 */
2879 if (expected_error == ENOTSUP &&
2880 (error == 0 || error == ENODEV || error == EOVERFLOW))
2881 expected_error = error;
2882
2883 /*
2884 * If someone grew the LUN, the replacement may be too small.
2885 */
2886 if (error == EOVERFLOW || error == EBUSY)
2887 expected_error = error;
2888
2889 /* XXX workaround 6690467 */
2890 if (error != expected_error && expected_error != EBUSY) {
2891 fatal(0, "attach (%s %llu, %s %llu, %d) "
2892 "returned %d, expected %d",
2893 oldpath, (longlong_t)oldsize, newpath,
2894 (longlong_t)newsize, replacing, error, expected_error);
2895 }
2896
2897 VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
2898 }
2899
2900 /*
2901 * Callback function which expands the physical size of the vdev.
2902 */
2903 vdev_t *
2904 grow_vdev(vdev_t *vd, void *arg)
2905 {
2906 spa_t *spa = vd->vdev_spa;
2907 size_t *newsize = arg;
2908 size_t fsize;
2909 int fd;
2910
2911 ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
2912 ASSERT(vd->vdev_ops->vdev_op_leaf);
2913
2914 if ((fd = open(vd->vdev_path, O_RDWR)) == -1)
4784 * and we'll write random garbage to the randomly chosen leaf.
4785 */
4786 (void) snprintf(path0, sizeof (path0), ztest_dev_template,
4787 ztest_opts.zo_dir, ztest_opts.zo_pool,
4788 top * leaves + zs->zs_splits);
4789 (void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template,
4790 ztest_opts.zo_dir, ztest_opts.zo_pool,
4791 top * leaves + leaf);
4792
4793 vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
4794 if (vd0 != NULL && vd0->vdev_top->vdev_islog)
4795 islog = B_TRUE;
4796
4797 /*
4798 * If the top-level vdev needs to be resilvered
4799 * then we only allow faults on the device that is
4800 * resilvering.
4801 */
4802 if (vd0 != NULL && maxfaults != 1 &&
4803 (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) ||
4804 vd0->vdev_resilvering)) {
4805 /*
4806 * Make vd0 explicitly claim to be unreadable,
4807 * or unwriteable, or reach behind its back
4808 * and close the underlying fd. We can do this if
4809 * maxfaults == 0 because we'll fail and reexecute,
4810 * and we can do it if maxfaults >= 2 because we'll
4811 * have enough redundancy. If maxfaults == 1, the
4812 * combination of this with injection of random data
4813 * corruption below exceeds the pool's fault tolerance.
4814 */
4815 vdev_file_t *vf = vd0->vdev_tsd;
4816
4817 if (vf != NULL && ztest_random(3) == 0) {
4818 (void) close(vf->vf_vnode->v_fd);
4819 vf->vf_vnode->v_fd = -1;
4820 } else if (ztest_random(2) == 0) {
4821 vd0->vdev_cant_read = B_TRUE;
4822 } else {
4823 vd0->vdev_cant_write = B_TRUE;
4824 }
5634 ztest_dataset_open(t) != 0)
5635 return;
5636 VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t,
5637 THR_BOUND, &tid[t]) == 0);
5638 }
5639
5640 /*
5641 * Wait for all of the tests to complete. We go in reverse order
5642 * so we don't close datasets while threads are still using them.
5643 */
5644 for (int t = ztest_opts.zo_threads - 1; t >= 0; t--) {
5645 VERIFY(thr_join(tid[t], NULL, NULL) == 0);
5646 if (t < ztest_opts.zo_datasets)
5647 ztest_dataset_close(t);
5648 }
5649
5650 txg_wait_synced(spa_get_dsl(spa), 0);
5651
5652 zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
5653 zs->zs_space = metaslab_class_get_space(spa_normal_class(spa));
5654
5655 umem_free(tid, ztest_opts.zo_threads * sizeof (thread_t));
5656
5657 /* Kill the resume thread */
5658 ztest_exiting = B_TRUE;
5659 VERIFY(thr_join(resume_tid, NULL, NULL) == 0);
5660 ztest_resume(spa);
5661
5662 /*
5663 * Right before closing the pool, kick off a bunch of async I/O;
5664 * spa_close() should wait for it to complete.
5665 */
5666 for (uint64_t object = 1; object < 50; object++)
5667 dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20);
5668
5669 spa_close(spa, FTAG);
5670
5671 /*
5672 * Verify that we can loop over all pools.
5673 */
|
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2013 Steven Hartland. All rights reserved.
26 */
27
28 /*
29 * The objective of this program is to provide a DMU/ZAP/SPA stress test
30 * that runs entirely in userland, is easy to use, and easy to extend.
31 *
32 * The overall design of the ztest program is as follows:
33 *
34 * (1) For each major functional area (e.g. adding vdevs to a pool,
35 * creating and destroying datasets, reading and writing objects, etc)
36 * we have a simple routine to test that functionality. These
37 * individual routines do not have to do anything "stressful".
38 *
39 * (2) We turn these simple functionality tests into a stress test by
40 * running them all in parallel, with as many threads as desired,
41 * and spread across as many datasets, objects, and vdevs as desired.
42 *
43 * (3) While all this is happening, we inject faults into the pool to
750 if (0 != access(zo->zo_alt_ztest, X_OK)) {
751 ztest_dump_core = B_FALSE;
752 fatal(B_TRUE, "invalid alternate ztest: %s",
753 zo->zo_alt_ztest);
754 } else if (0 != access(zo->zo_alt_libpath, X_OK)) {
755 ztest_dump_core = B_FALSE;
756 fatal(B_TRUE, "invalid alternate lib directory %s",
757 zo->zo_alt_libpath);
758 }
759
760 umem_free(cmd, MAXPATHLEN);
761 umem_free(realaltdir, MAXPATHLEN);
762 }
763 }
764
765 static void
766 ztest_kill(ztest_shared_t *zs)
767 {
768 zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(ztest_spa));
769 zs->zs_space = metaslab_class_get_space(spa_normal_class(ztest_spa));
770
771 /*
772 * Before we kill off ztest, make sure that the config is updated.
773 * See comment above spa_config_sync().
774 */
775 mutex_enter(&spa_namespace_lock);
776 spa_config_sync(ztest_spa, B_FALSE, B_FALSE);
777 mutex_exit(&spa_namespace_lock);
778
779 zfs_dbgmsg_print(FTAG);
780 (void) kill(getpid(), SIGKILL);
781 }
782
783 static uint64_t
784 ztest_random(uint64_t range)
785 {
786 uint64_t r;
787
788 ASSERT3S(ztest_fd_rand, >=, 0);
789
790 if (range == 0)
791 return (0);
792
793 if (read(ztest_fd_rand, &r, sizeof (r)) != sizeof (r))
794 fatal(1, "short read from /dev/urandom");
795
796 return (r % range);
797 }
798
799 /* ARGSUSED */
2724
2725 }
2726
2727 /*
2728 * Verify that we can attach and detach devices.
2729 */
2730 /* ARGSUSED */
2731 void
2732 ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
2733 {
2734 ztest_shared_t *zs = ztest_shared;
2735 spa_t *spa = ztest_spa;
2736 spa_aux_vdev_t *sav = &spa->spa_spares;
2737 vdev_t *rvd = spa->spa_root_vdev;
2738 vdev_t *oldvd, *newvd, *pvd;
2739 nvlist_t *root;
2740 uint64_t leaves;
2741 uint64_t leaf, top;
2742 uint64_t ashift = ztest_get_ashift();
2743 uint64_t oldguid, pguid;
2744 uint64_t oldsize, newsize;
2745 char oldpath[MAXPATHLEN], newpath[MAXPATHLEN];
2746 int replacing;
2747 int oldvd_has_siblings = B_FALSE;
2748 int newvd_is_spare = B_FALSE;
2749 int oldvd_is_log;
2750 int error, expected_error;
2751
2752 VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
2753 leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
2754
2755 spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
2756
2757 /*
2758 * Decide whether to do an attach or a replace.
2759 */
2760 replacing = ztest_random(2);
2761
2762 /*
2763 * Pick a random top-level vdev.
2764 */
2883
2884 /*
2885 * If our parent was the replacing vdev, but the replace completed,
2886 * then instead of failing with ENOTSUP we may either succeed,
2887 * fail with ENODEV, or fail with EOVERFLOW.
2888 */
2889 if (expected_error == ENOTSUP &&
2890 (error == 0 || error == ENODEV || error == EOVERFLOW))
2891 expected_error = error;
2892
2893 /*
2894 * If someone grew the LUN, the replacement may be too small.
2895 */
2896 if (error == EOVERFLOW || error == EBUSY)
2897 expected_error = error;
2898
2899 /* XXX workaround 6690467 */
2900 if (error != expected_error && expected_error != EBUSY) {
2901 fatal(0, "attach (%s %llu, %s %llu, %d) "
2902 "returned %d, expected %d",
2903 oldpath, oldsize, newpath,
2904 newsize, replacing, error, expected_error);
2905 }
2906
2907 VERIFY(mutex_unlock(&ztest_vdev_lock) == 0);
2908 }
2909
2910 /*
2911 * Callback function which expands the physical size of the vdev.
2912 */
2913 vdev_t *
2914 grow_vdev(vdev_t *vd, void *arg)
2915 {
2916 spa_t *spa = vd->vdev_spa;
2917 size_t *newsize = arg;
2918 size_t fsize;
2919 int fd;
2920
2921 ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
2922 ASSERT(vd->vdev_ops->vdev_op_leaf);
2923
2924 if ((fd = open(vd->vdev_path, O_RDWR)) == -1)
4794 * and we'll write random garbage to the randomly chosen leaf.
4795 */
4796 (void) snprintf(path0, sizeof (path0), ztest_dev_template,
4797 ztest_opts.zo_dir, ztest_opts.zo_pool,
4798 top * leaves + zs->zs_splits);
4799 (void) snprintf(pathrand, sizeof (pathrand), ztest_dev_template,
4800 ztest_opts.zo_dir, ztest_opts.zo_pool,
4801 top * leaves + leaf);
4802
4803 vd0 = vdev_lookup_by_path(spa->spa_root_vdev, path0);
4804 if (vd0 != NULL && vd0->vdev_top->vdev_islog)
4805 islog = B_TRUE;
4806
4807 /*
4808 * If the top-level vdev needs to be resilvered
4809 * then we only allow faults on the device that is
4810 * resilvering.
4811 */
4812 if (vd0 != NULL && maxfaults != 1 &&
4813 (!vdev_resilver_needed(vd0->vdev_top, NULL, NULL) ||
4814 vd0->vdev_resilver_txg != 0)) {
4815 /*
4816 * Make vd0 explicitly claim to be unreadable,
4817 * or unwriteable, or reach behind its back
4818 * and close the underlying fd. We can do this if
4819 * maxfaults == 0 because we'll fail and reexecute,
4820 * and we can do it if maxfaults >= 2 because we'll
4821 * have enough redundancy. If maxfaults == 1, the
4822 * combination of this with injection of random data
4823 * corruption below exceeds the pool's fault tolerance.
4824 */
4825 vdev_file_t *vf = vd0->vdev_tsd;
4826
4827 if (vf != NULL && ztest_random(3) == 0) {
4828 (void) close(vf->vf_vnode->v_fd);
4829 vf->vf_vnode->v_fd = -1;
4830 } else if (ztest_random(2) == 0) {
4831 vd0->vdev_cant_read = B_TRUE;
4832 } else {
4833 vd0->vdev_cant_write = B_TRUE;
4834 }
5644 ztest_dataset_open(t) != 0)
5645 return;
5646 VERIFY(thr_create(0, 0, ztest_thread, (void *)(uintptr_t)t,
5647 THR_BOUND, &tid[t]) == 0);
5648 }
5649
5650 /*
5651 * Wait for all of the tests to complete. We go in reverse order
5652 * so we don't close datasets while threads are still using them.
5653 */
5654 for (int t = ztest_opts.zo_threads - 1; t >= 0; t--) {
5655 VERIFY(thr_join(tid[t], NULL, NULL) == 0);
5656 if (t < ztest_opts.zo_datasets)
5657 ztest_dataset_close(t);
5658 }
5659
5660 txg_wait_synced(spa_get_dsl(spa), 0);
5661
5662 zs->zs_alloc = metaslab_class_get_alloc(spa_normal_class(spa));
5663 zs->zs_space = metaslab_class_get_space(spa_normal_class(spa));
5664 zfs_dbgmsg_print(FTAG);
5665
5666 umem_free(tid, ztest_opts.zo_threads * sizeof (thread_t));
5667
5668 /* Kill the resume thread */
5669 ztest_exiting = B_TRUE;
5670 VERIFY(thr_join(resume_tid, NULL, NULL) == 0);
5671 ztest_resume(spa);
5672
5673 /*
5674 * Right before closing the pool, kick off a bunch of async I/O;
5675 * spa_close() should wait for it to complete.
5676 */
5677 for (uint64_t object = 1; object < 50; object++)
5678 dmu_prefetch(spa->spa_meta_objset, object, 0, 1ULL << 20);
5679
5680 spa_close(spa, FTAG);
5681
5682 /*
5683 * Verify that we can loop over all pools.
5684 */
|