70 #ifdef _KERNEL
71 #include <sys/bootprops.h>
72 #include <sys/callb.h>
73 #include <sys/cpupart.h>
74 #include <sys/pool.h>
75 #include <sys/sysdc.h>
76 #include <sys/zone.h>
77 #endif /* _KERNEL */
78
79 #include "zfs_prop.h"
80 #include "zfs_comutil.h"
81
82 /*
83 * The interval, in seconds, at which failed configuration cache file writes
84 * should be retried.
85 */
86 static int zfs_ccw_retry_interval = 300;
87
88 typedef enum zti_modes {
89 ZTI_MODE_FIXED, /* value is # of threads (min 1) */
90 ZTI_MODE_ONLINE_PERCENT, /* value is % of online CPUs */
91 ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */
92 ZTI_MODE_NULL, /* don't create a taskq */
93 ZTI_NMODES
94 } zti_modes_t;
95
96 #define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) }
97 #define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 }
98 #define ZTI_BATCH { ZTI_MODE_BATCH, 0, 1 }
99 #define ZTI_NULL { ZTI_MODE_NULL, 0, 0 }
100
101 #define ZTI_N(n) ZTI_P(n, 1)
102 #define ZTI_ONE ZTI_N(1)
103
104 typedef struct zio_taskq_info {
105 zti_modes_t zti_mode;
106 uint_t zti_value;
107 uint_t zti_count;
108 } zio_taskq_info_t;
109
110 static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
111 "issue", "issue_high", "intr", "intr_high"
112 };
113
114 /*
115 * This table defines the taskq settings for each ZFS I/O type. When
116 * initializing a pool, we use this table to create an appropriately sized
117 * taskq. Some operations are low volume and therefore have a small, static
129 * need to be handled with minimum delay.
130 */
131 const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
132 /* ISSUE ISSUE_HIGH INTR INTR_HIGH */
133 { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */
134 { ZTI_N(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL }, /* READ */
135 { ZTI_BATCH, ZTI_N(5), ZTI_N(8), ZTI_N(5) }, /* WRITE */
136 { ZTI_P(12, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
137 { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */
138 { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */
139 };
140
141 static void spa_sync_version(void *arg, dmu_tx_t *tx);
142 static void spa_sync_props(void *arg, dmu_tx_t *tx);
143 static boolean_t spa_has_active_shared_spare(spa_t *spa);
144 static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
145 spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
146 char **ereport);
147 static void spa_vdev_resilver_done(spa_t *spa);
148
149 uint_t zio_taskq_batch_pct = 100; /* 1 thread per cpu in pset */
150 id_t zio_taskq_psrset_bind = PS_NONE;
151 boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */
152 uint_t zio_taskq_basedc = 80; /* base duty cycle */
153
154 boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */
155 extern int zfs_sync_pass_deferred_free;
156
157 /*
158 * This (illegal) pool name is used when temporarily importing a spa_t in order
159 * to get the vdev stats associated with the imported devices.
160 */
161 #define TRYIMPORT_NAME "$import"
162
163 /*
164 * ==========================================================================
165 * SPA properties routines
166 * ==========================================================================
167 */
168
169 /*
825 const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
826 enum zti_modes mode = ztip->zti_mode;
827 uint_t value = ztip->zti_value;
828 uint_t count = ztip->zti_count;
829 spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
830 char name[32];
831 uint_t flags = 0;
832 boolean_t batch = B_FALSE;
833
834 if (mode == ZTI_MODE_NULL) {
835 tqs->stqs_count = 0;
836 tqs->stqs_taskq = NULL;
837 return;
838 }
839
840 ASSERT3U(count, >, 0);
841
842 tqs->stqs_count = count;
843 tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP);
844
845 for (uint_t i = 0; i < count; i++) {
846 taskq_t *tq;
847
848 switch (mode) {
849 case ZTI_MODE_FIXED:
850 ASSERT3U(value, >=, 1);
851 value = MAX(value, 1);
852 break;
853
854 case ZTI_MODE_BATCH:
855 batch = B_TRUE;
856 flags |= TASKQ_THREADS_CPU_PCT;
857 value = zio_taskq_batch_pct;
858 break;
859
860 case ZTI_MODE_ONLINE_PERCENT:
861 flags |= TASKQ_THREADS_CPU_PCT;
862 break;
863
864 default:
865 panic("unrecognized mode for %s_%s taskq (%u:%u) in "
866 "spa_activate()",
867 zio_type_name[t], zio_taskq_types[q], mode, value);
868 break;
869 }
870
871 if (count > 1) {
872 (void) snprintf(name, sizeof (name), "%s_%s_%u",
873 zio_type_name[t], zio_taskq_types[q], i);
874 } else {
875 (void) snprintf(name, sizeof (name), "%s_%s",
876 zio_type_name[t], zio_taskq_types[q]);
877 }
878
879 if (zio_taskq_sysdc && spa->spa_proc != &p0) {
880 if (batch)
881 flags |= TASKQ_DC_BATCH;
882
883 tq = taskq_create_sysdc(name, value, 50, INT_MAX,
884 spa->spa_proc, zio_taskq_basedc, flags);
885 } else {
886 tq = taskq_create_proc(name, value, maxclsyspri, 50,
887 INT_MAX, spa->spa_proc, flags);
888 }
889
890 tqs->stqs_taskq[i] = tq;
891 }
892 }
893
894 static void
895 spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
896 {
897 spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
898
899 if (tqs->stqs_taskq == NULL) {
900 ASSERT0(tqs->stqs_count);
901 return;
902 }
903
904 for (uint_t i = 0; i < tqs->stqs_count; i++) {
905 ASSERT3P(tqs->stqs_taskq[i], !=, NULL);
906 taskq_destroy(tqs->stqs_taskq[i]);
5720 */
5721
5722 static int
5723 bpobj_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
5724 {
5725 bpobj_t *bpo = arg;
5726 bpobj_enqueue(bpo, bp, tx);
5727 return (0);
5728 }
5729
5730 static int
5731 spa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
5732 {
5733 zio_t *zio = arg;
5734
5735 zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp,
5736 zio->io_flags));
5737 return (0);
5738 }
5739
5740 static void
5741 spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
5742 {
5743 char *packed = NULL;
5744 size_t bufsize;
5745 size_t nvsize = 0;
5746 dmu_buf_t *db;
5747
5748 VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0);
5749
5750 /*
5751 * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration
5752 * information. This avoids the dbuf_will_dirty() path and
5753 * saves us a pre-read to get data we don't actually care about.
5754 */
5755 bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE);
5756 packed = kmem_alloc(bufsize, KM_SLEEP);
5757
5758 VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
5759 KM_SLEEP) == 0);
5760 bzero(packed + nvsize, bufsize - nvsize);
6046 /* Keeping the freedir open increases spa_minref */
6047 spa->spa_minref += 3;
6048 }
6049
6050 if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES &&
6051 spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
6052 spa_feature_create_zap_objects(spa, tx);
6053 }
6054 rrw_exit(&dp->dp_config_rwlock, FTAG);
6055 }
6056
6057 /*
6058 * Sync the specified transaction group. New blocks may be dirtied as
6059 * part of the process, so we iterate until it converges.
6060 */
6061 void
6062 spa_sync(spa_t *spa, uint64_t txg)
6063 {
6064 dsl_pool_t *dp = spa->spa_dsl_pool;
6065 objset_t *mos = spa->spa_meta_objset;
6066 bpobj_t *defer_bpo = &spa->spa_deferred_bpobj;
6067 bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
6068 vdev_t *rvd = spa->spa_root_vdev;
6069 vdev_t *vd;
6070 dmu_tx_t *tx;
6071 int error;
6072
6073 VERIFY(spa_writeable(spa));
6074
6075 /*
6076 * Lock out configuration changes.
6077 */
6078 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
6079
6080 spa->spa_syncing_txg = txg;
6081 spa->spa_sync_pass = 0;
6082
6083 /*
6084 * If there are any pending vdev state changes, convert them
6085 * into config changes that go out with this transaction group.
6086 */
6126 if (i == rvd->vdev_children) {
6127 spa->spa_deflate = TRUE;
6128 VERIFY(0 == zap_add(spa->spa_meta_objset,
6129 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
6130 sizeof (uint64_t), 1, &spa->spa_deflate, tx));
6131 }
6132 }
6133
6134 /*
6135 * If anything has changed in this txg, or if someone is waiting
6136 * for this txg to sync (eg, spa_vdev_remove()), push the
6137 * deferred frees from the previous txg. If not, leave them
6138 * alone so that we don't generate work on an otherwise idle
6139 * system.
6140 */
6141 if (!txg_list_empty(&dp->dp_dirty_datasets, txg) ||
6142 !txg_list_empty(&dp->dp_dirty_dirs, txg) ||
6143 !txg_list_empty(&dp->dp_sync_tasks, txg) ||
6144 ((dsl_scan_active(dp->dp_scan) ||
6145 txg_sync_waiting(dp)) && !spa_shutting_down(spa))) {
6146 zio_t *zio = zio_root(spa, NULL, NULL, 0);
6147 VERIFY3U(bpobj_iterate(defer_bpo,
6148 spa_free_sync_cb, zio, tx), ==, 0);
6149 VERIFY0(zio_wait(zio));
6150 }
6151
6152 /*
6153 * Iterate to convergence.
6154 */
6155 do {
6156 int pass = ++spa->spa_sync_pass;
6157
6158 spa_sync_config_object(spa, tx);
6159 spa_sync_aux_dev(spa, &spa->spa_spares, tx,
6160 ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES);
6161 spa_sync_aux_dev(spa, &spa->spa_l2cache, tx,
6162 ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE);
6163 spa_errlog_sync(spa, txg);
6164 dsl_pool_sync(dp, txg);
6165
6166 if (pass < zfs_sync_pass_deferred_free) {
6167 zio_t *zio = zio_root(spa, NULL, NULL, 0);
6168 bplist_iterate(free_bpl, spa_free_sync_cb,
6169 zio, tx);
6170 VERIFY(zio_wait(zio) == 0);
6171 } else {
6172 bplist_iterate(free_bpl, bpobj_enqueue_cb,
6173 defer_bpo, tx);
6174 }
6175
6176 ddt_sync(spa, txg);
6177 dsl_scan_sync(dp, tx);
6178
6179 while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg))
6180 vdev_sync(vd, txg);
6181
6182 if (pass == 1)
6183 spa_sync_upgrades(spa, tx);
6184
6185 } while (dmu_objset_is_dirty(mos, txg));
6186
6187 /*
6188 * Rewrite the vdev configuration (which includes the uberblock)
6189 * to commit the transaction group.
6190 *
6191 * If there are no dirty vdevs, we sync the uberblock to a few
6192 * random top-level vdevs that are known to be visible in the
6193 * config cache (see spa_vdev_add() for a complete description).
|
70 #ifdef _KERNEL
71 #include <sys/bootprops.h>
72 #include <sys/callb.h>
73 #include <sys/cpupart.h>
74 #include <sys/pool.h>
75 #include <sys/sysdc.h>
76 #include <sys/zone.h>
77 #endif /* _KERNEL */
78
79 #include "zfs_prop.h"
80 #include "zfs_comutil.h"
81
82 /*
83 * The interval, in seconds, at which failed configuration cache file writes
84 * should be retried.
85 */
86 static int zfs_ccw_retry_interval = 300;
87
88 typedef enum zti_modes {
89 ZTI_MODE_FIXED, /* value is # of threads (min 1) */
90 ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */
91 ZTI_MODE_NULL, /* don't create a taskq */
92 ZTI_NMODES
93 } zti_modes_t;
94
95 #define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) }
96 #define ZTI_BATCH { ZTI_MODE_BATCH, 0, 1 }
97 #define ZTI_NULL { ZTI_MODE_NULL, 0, 0 }
98
99 #define ZTI_N(n) ZTI_P(n, 1)
100 #define ZTI_ONE ZTI_N(1)
101
102 typedef struct zio_taskq_info {
103 zti_modes_t zti_mode;
104 uint_t zti_value;
105 uint_t zti_count;
106 } zio_taskq_info_t;
107
108 static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
109 "issue", "issue_high", "intr", "intr_high"
110 };
111
112 /*
113 * This table defines the taskq settings for each ZFS I/O type. When
114 * initializing a pool, we use this table to create an appropriately sized
115 * taskq. Some operations are low volume and therefore have a small, static
127 * need to be handled with minimum delay.
128 */
129 const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
130 /* ISSUE ISSUE_HIGH INTR INTR_HIGH */
131 { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* NULL */
132 { ZTI_N(8), ZTI_NULL, ZTI_BATCH, ZTI_NULL }, /* READ */
133 { ZTI_BATCH, ZTI_N(5), ZTI_N(8), ZTI_N(5) }, /* WRITE */
134 { ZTI_P(12, 8), ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* FREE */
135 { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* CLAIM */
136 { ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */
137 };
138
139 static void spa_sync_version(void *arg, dmu_tx_t *tx);
140 static void spa_sync_props(void *arg, dmu_tx_t *tx);
141 static boolean_t spa_has_active_shared_spare(spa_t *spa);
142 static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
143 spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
144 char **ereport);
145 static void spa_vdev_resilver_done(spa_t *spa);
146
147 uint_t zio_taskq_batch_pct = 75; /* 1 thread per cpu in pset */
148 id_t zio_taskq_psrset_bind = PS_NONE;
149 boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */
150 uint_t zio_taskq_basedc = 80; /* base duty cycle */
151
152 boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */
153 extern int zfs_sync_pass_deferred_free;
154
155 /*
156 * This (illegal) pool name is used when temporarily importing a spa_t in order
157 * to get the vdev stats associated with the imported devices.
158 */
159 #define TRYIMPORT_NAME "$import"
160
161 /*
162 * ==========================================================================
163 * SPA properties routines
164 * ==========================================================================
165 */
166
167 /*
823 const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
824 enum zti_modes mode = ztip->zti_mode;
825 uint_t value = ztip->zti_value;
826 uint_t count = ztip->zti_count;
827 spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
828 char name[32];
829 uint_t flags = 0;
830 boolean_t batch = B_FALSE;
831
832 if (mode == ZTI_MODE_NULL) {
833 tqs->stqs_count = 0;
834 tqs->stqs_taskq = NULL;
835 return;
836 }
837
838 ASSERT3U(count, >, 0);
839
840 tqs->stqs_count = count;
841 tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP);
842
843 switch (mode) {
844 case ZTI_MODE_FIXED:
845 ASSERT3U(value, >=, 1);
846 value = MAX(value, 1);
847 break;
848
849 case ZTI_MODE_BATCH:
850 batch = B_TRUE;
851 flags |= TASKQ_THREADS_CPU_PCT;
852 value = zio_taskq_batch_pct;
853 break;
854
855 default:
856 panic("unrecognized mode for %s_%s taskq (%u:%u) in "
857 "spa_activate()",
858 zio_type_name[t], zio_taskq_types[q], mode, value);
859 break;
860 }
861
862 for (uint_t i = 0; i < count; i++) {
863 taskq_t *tq;
864
865 if (count > 1) {
866 (void) snprintf(name, sizeof (name), "%s_%s_%u",
867 zio_type_name[t], zio_taskq_types[q], i);
868 } else {
869 (void) snprintf(name, sizeof (name), "%s_%s",
870 zio_type_name[t], zio_taskq_types[q]);
871 }
872
873 if (zio_taskq_sysdc && spa->spa_proc != &p0) {
874 if (batch)
875 flags |= TASKQ_DC_BATCH;
876
877 tq = taskq_create_sysdc(name, value, 50, INT_MAX,
878 spa->spa_proc, zio_taskq_basedc, flags);
879 } else {
880 pri_t pri = maxclsyspri;
881 /*
882 * The write issue taskq can be extremely CPU
883 * intensive. Run it at slightly lower priority
884 * than the other taskqs.
885 */
886 if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE)
887 pri--;
888
889 tq = taskq_create_proc(name, value, pri, 50,
890 INT_MAX, spa->spa_proc, flags);
891 }
892
893 tqs->stqs_taskq[i] = tq;
894 }
895 }
896
897 static void
898 spa_taskqs_fini(spa_t *spa, zio_type_t t, zio_taskq_type_t q)
899 {
900 spa_taskqs_t *tqs = &spa->spa_zio_taskq[t][q];
901
902 if (tqs->stqs_taskq == NULL) {
903 ASSERT0(tqs->stqs_count);
904 return;
905 }
906
907 for (uint_t i = 0; i < tqs->stqs_count; i++) {
908 ASSERT3P(tqs->stqs_taskq[i], !=, NULL);
909 taskq_destroy(tqs->stqs_taskq[i]);
5723 */
5724
5725 static int
5726 bpobj_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
5727 {
5728 bpobj_t *bpo = arg;
5729 bpobj_enqueue(bpo, bp, tx);
5730 return (0);
5731 }
5732
5733 static int
5734 spa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
5735 {
5736 zio_t *zio = arg;
5737
5738 zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp,
5739 zio->io_flags));
5740 return (0);
5741 }
5742
5743 /*
5744 * Note: this simple function is not inlined to make it easier to dtrace the
5745 * amount of time spent syncing frees.
5746 */
5747 static void
5748 spa_sync_frees(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx)
5749 {
5750 zio_t *zio = zio_root(spa, NULL, NULL, 0);
5751 bplist_iterate(bpl, spa_free_sync_cb, zio, tx);
5752 VERIFY(zio_wait(zio) == 0);
5753 }
5754
5755 /*
5756 * Note: this simple function is not inlined to make it easier to dtrace the
5757 * amount of time spent syncing deferred frees.
5758 */
5759 static void
5760 spa_sync_deferred_frees(spa_t *spa, dmu_tx_t *tx)
5761 {
5762 zio_t *zio = zio_root(spa, NULL, NULL, 0);
5763 VERIFY3U(bpobj_iterate(&spa->spa_deferred_bpobj,
5764 spa_free_sync_cb, zio, tx), ==, 0);
5765 VERIFY0(zio_wait(zio));
5766 }
5767
5768
5769 static void
5770 spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
5771 {
5772 char *packed = NULL;
5773 size_t bufsize;
5774 size_t nvsize = 0;
5775 dmu_buf_t *db;
5776
5777 VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0);
5778
5779 /*
5780 * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration
5781 * information. This avoids the dbuf_will_dirty() path and
5782 * saves us a pre-read to get data we don't actually care about.
5783 */
5784 bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE);
5785 packed = kmem_alloc(bufsize, KM_SLEEP);
5786
5787 VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
5788 KM_SLEEP) == 0);
5789 bzero(packed + nvsize, bufsize - nvsize);
6075 /* Keeping the freedir open increases spa_minref */
6076 spa->spa_minref += 3;
6077 }
6078
6079 if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES &&
6080 spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
6081 spa_feature_create_zap_objects(spa, tx);
6082 }
6083 rrw_exit(&dp->dp_config_rwlock, FTAG);
6084 }
6085
6086 /*
6087 * Sync the specified transaction group. New blocks may be dirtied as
6088 * part of the process, so we iterate until it converges.
6089 */
6090 void
6091 spa_sync(spa_t *spa, uint64_t txg)
6092 {
6093 dsl_pool_t *dp = spa->spa_dsl_pool;
6094 objset_t *mos = spa->spa_meta_objset;
6095 bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
6096 vdev_t *rvd = spa->spa_root_vdev;
6097 vdev_t *vd;
6098 dmu_tx_t *tx;
6099 int error;
6100
6101 VERIFY(spa_writeable(spa));
6102
6103 /*
6104 * Lock out configuration changes.
6105 */
6106 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
6107
6108 spa->spa_syncing_txg = txg;
6109 spa->spa_sync_pass = 0;
6110
6111 /*
6112 * If there are any pending vdev state changes, convert them
6113 * into config changes that go out with this transaction group.
6114 */
6154 if (i == rvd->vdev_children) {
6155 spa->spa_deflate = TRUE;
6156 VERIFY(0 == zap_add(spa->spa_meta_objset,
6157 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
6158 sizeof (uint64_t), 1, &spa->spa_deflate, tx));
6159 }
6160 }
6161
6162 /*
6163 * If anything has changed in this txg, or if someone is waiting
6164 * for this txg to sync (eg, spa_vdev_remove()), push the
6165 * deferred frees from the previous txg. If not, leave them
6166 * alone so that we don't generate work on an otherwise idle
6167 * system.
6168 */
6169 if (!txg_list_empty(&dp->dp_dirty_datasets, txg) ||
6170 !txg_list_empty(&dp->dp_dirty_dirs, txg) ||
6171 !txg_list_empty(&dp->dp_sync_tasks, txg) ||
6172 ((dsl_scan_active(dp->dp_scan) ||
6173 txg_sync_waiting(dp)) && !spa_shutting_down(spa))) {
6174 spa_sync_deferred_frees(spa, tx);
6175 }
6176
6177 /*
6178 * Iterate to convergence.
6179 */
6180 do {
6181 int pass = ++spa->spa_sync_pass;
6182
6183 spa_sync_config_object(spa, tx);
6184 spa_sync_aux_dev(spa, &spa->spa_spares, tx,
6185 ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES);
6186 spa_sync_aux_dev(spa, &spa->spa_l2cache, tx,
6187 ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE);
6188 spa_errlog_sync(spa, txg);
6189 dsl_pool_sync(dp, txg);
6190
6191 if (pass < zfs_sync_pass_deferred_free) {
6192 spa_sync_frees(spa, free_bpl, tx);
6193 } else {
6194 bplist_iterate(free_bpl, bpobj_enqueue_cb,
6195 &spa->spa_deferred_bpobj, tx);
6196 }
6197
6198 ddt_sync(spa, txg);
6199 dsl_scan_sync(dp, tx);
6200
6201 while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg))
6202 vdev_sync(vd, txg);
6203
6204 if (pass == 1)
6205 spa_sync_upgrades(spa, tx);
6206
6207 } while (dmu_objset_is_dirty(mos, txg));
6208
6209 /*
6210 * Rewrite the vdev configuration (which includes the uberblock)
6211 * to commit the transaction group.
6212 *
6213 * If there are no dirty vdevs, we sync the uberblock to a few
6214 * random top-level vdevs that are known to be visible in the
6215 * config cache (see spa_vdev_add() for a complete description).
|