1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012 by Delphix. All rights reserved.
24 */
25
26 #include <sys/dmu.h>
27 #include <sys/dmu_tx.h>
28 #include <sys/dsl_pool.h>
29 #include <sys/dsl_dir.h>
30 #include <sys/dsl_synctask.h>
31 #include <sys/metaslab.h>
32
33 #define DST_AVG_BLKSHIFT 14
34
35 /* ARGSUSED */
36 static int
37 dsl_null_checkfunc(void *arg1, void *arg2, dmu_tx_t *tx)
38 {
39 return (0);
40 }
41
42 dsl_sync_task_group_t *
43 dsl_sync_task_group_create(dsl_pool_t *dp)
44 {
45 dsl_sync_task_group_t *dstg;
46
47 dstg = kmem_zalloc(sizeof (dsl_sync_task_group_t), KM_SLEEP);
48 list_create(&dstg->dstg_tasks, sizeof (dsl_sync_task_t),
49 offsetof(dsl_sync_task_t, dst_node));
50 dstg->dstg_pool = dp;
51
52 return (dstg);
53 }
54
55 void
56 dsl_sync_task_create(dsl_sync_task_group_t *dstg,
57 dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
58 void *arg1, void *arg2, int blocks_modified)
59 {
60 dsl_sync_task_t *dst;
61
62 if (checkfunc == NULL)
63 checkfunc = dsl_null_checkfunc;
64 dst = kmem_zalloc(sizeof (dsl_sync_task_t), KM_SLEEP);
65 dst->dst_checkfunc = checkfunc;
66 dst->dst_syncfunc = syncfunc;
67 dst->dst_arg1 = arg1;
68 dst->dst_arg2 = arg2;
69 list_insert_tail(&dstg->dstg_tasks, dst);
70
71 dstg->dstg_space += blocks_modified << DST_AVG_BLKSHIFT;
72 }
73
74 int
75 dsl_sync_task_group_wait(dsl_sync_task_group_t *dstg)
76 {
77 dmu_tx_t *tx;
78 uint64_t txg;
79 dsl_sync_task_t *dst;
80
81 top:
82 tx = dmu_tx_create_dd(dstg->dstg_pool->dp_mos_dir);
83 VERIFY(0 == dmu_tx_assign(tx, TXG_WAIT));
84
85 txg = dmu_tx_get_txg(tx);
86
87 /* Do a preliminary error check. */
88 dstg->dstg_err = 0;
89 #ifdef ZFS_DEBUG
90 /*
91 * Only check half the time, otherwise, the sync-context
92 * check will almost never fail.
93 */
94 if (spa_get_random(2) == 0)
95 goto skip;
96 #endif
97 rw_enter(&dstg->dstg_pool->dp_config_rwlock, RW_READER);
98 for (dst = list_head(&dstg->dstg_tasks); dst;
99 dst = list_next(&dstg->dstg_tasks, dst)) {
100 dst->dst_err =
101 dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
102 if (dst->dst_err)
103 dstg->dstg_err = dst->dst_err;
104 }
105 rw_exit(&dstg->dstg_pool->dp_config_rwlock);
106
107 if (dstg->dstg_err) {
108 dmu_tx_commit(tx);
109 return (dstg->dstg_err);
110 }
111 skip:
112
113 /*
114 * We don't generally have many sync tasks, so pay the price of
115 * add_tail to get the tasks executed in the right order.
116 */
117 VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
118 dstg, txg));
119
120 dmu_tx_commit(tx);
121
122 txg_wait_synced(dstg->dstg_pool, txg);
123
124 if (dstg->dstg_err == EAGAIN) {
125 txg_wait_synced(dstg->dstg_pool, txg + TXG_DEFER_SIZE);
126 goto top;
127 }
128
129 return (dstg->dstg_err);
130 }
131
132 void
133 dsl_sync_task_group_nowait(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
134 {
135 uint64_t txg;
136
137 dstg->dstg_nowaiter = B_TRUE;
138 txg = dmu_tx_get_txg(tx);
139 /*
140 * We don't generally have many sync tasks, so pay the price of
141 * add_tail to get the tasks executed in the right order.
142 */
143 VERIFY(0 == txg_list_add_tail(&dstg->dstg_pool->dp_sync_tasks,
144 dstg, txg));
145 }
146
147 void
148 dsl_sync_task_group_destroy(dsl_sync_task_group_t *dstg)
149 {
150 dsl_sync_task_t *dst;
151
152 while (dst = list_head(&dstg->dstg_tasks)) {
153 list_remove(&dstg->dstg_tasks, dst);
154 kmem_free(dst, sizeof (dsl_sync_task_t));
155 }
156 kmem_free(dstg, sizeof (dsl_sync_task_group_t));
157 }
158
159 void
160 dsl_sync_task_group_sync(dsl_sync_task_group_t *dstg, dmu_tx_t *tx)
161 {
162 dsl_sync_task_t *dst;
163 dsl_pool_t *dp = dstg->dstg_pool;
164 uint64_t quota, used;
165
166 ASSERT3U(dstg->dstg_err, ==, 0);
167
168 /*
169 * Check for sufficient space. We just check against what's
170 * on-disk; we don't want any in-flight accounting to get in our
171 * way, because open context may have already used up various
172 * in-core limits (arc_tempreserve, dsl_pool_tempreserve).
173 */
174 quota = dsl_pool_adjustedsize(dp, B_FALSE) -
175 metaslab_class_get_deferred(spa_normal_class(dp->dp_spa));
176 used = dp->dp_root_dir->dd_phys->dd_used_bytes;
177 /* MOS space is triple-dittoed, so we multiply by 3. */
178 if (dstg->dstg_space > 0 && used + dstg->dstg_space * 3 > quota) {
179 dstg->dstg_err = ENOSPC;
180 return;
181 }
182
183 /*
184 * Check for errors by calling checkfuncs.
185 */
186 rw_enter(&dp->dp_config_rwlock, RW_WRITER);
187 for (dst = list_head(&dstg->dstg_tasks); dst;
188 dst = list_next(&dstg->dstg_tasks, dst)) {
189 dst->dst_err =
190 dst->dst_checkfunc(dst->dst_arg1, dst->dst_arg2, tx);
191 if (dst->dst_err)
192 dstg->dstg_err = dst->dst_err;
193 }
194
195 if (dstg->dstg_err == 0) {
196 /*
197 * Execute sync tasks.
198 */
199 for (dst = list_head(&dstg->dstg_tasks); dst;
200 dst = list_next(&dstg->dstg_tasks, dst)) {
201 dst->dst_syncfunc(dst->dst_arg1, dst->dst_arg2, tx);
202 }
203 }
204 rw_exit(&dp->dp_config_rwlock);
205
206 if (dstg->dstg_nowaiter)
207 dsl_sync_task_group_destroy(dstg);
208 }
209
210 int
211 dsl_sync_task_do(dsl_pool_t *dp,
212 dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
213 void *arg1, void *arg2, int blocks_modified)
214 {
215 dsl_sync_task_group_t *dstg;
216 int err;
217
218 ASSERT(spa_writeable(dp->dp_spa));
219
220 dstg = dsl_sync_task_group_create(dp);
221 dsl_sync_task_create(dstg, checkfunc, syncfunc,
222 arg1, arg2, blocks_modified);
223 err = dsl_sync_task_group_wait(dstg);
224 dsl_sync_task_group_destroy(dstg);
225 return (err);
226 }
227
228 void
229 dsl_sync_task_do_nowait(dsl_pool_t *dp,
230 dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc,
231 void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx)
232 {
233 dsl_sync_task_group_t *dstg;
234
235 if (!spa_writeable(dp->dp_spa))
236 return;
237
238 dstg = dsl_sync_task_group_create(dp);
239 dsl_sync_task_create(dstg, checkfunc, syncfunc,
240 arg1, arg2, blocks_modified);
241 dsl_sync_task_group_nowait(dstg, tx);
242 }