Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/zfs/dsl_userhold.c
+++ new/usr/src/uts/common/fs/zfs/dsl_userhold.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 24 */
25 25
26 26 #include <sys/zfs_context.h>
27 27 #include <sys/dsl_userhold.h>
28 28 #include <sys/dsl_dataset.h>
↓ open down ↓ |
28 lines elided |
↑ open up ↑ |
29 29 #include <sys/dsl_destroy.h>
30 30 #include <sys/dsl_synctask.h>
31 31 #include <sys/dmu_tx.h>
32 32 #include <sys/zfs_onexit.h>
33 33 #include <sys/dsl_pool.h>
34 34 #include <sys/dsl_dir.h>
35 35 #include <sys/zfs_ioctl.h>
36 36 #include <sys/zap.h>
37 37
38 38 typedef struct dsl_dataset_user_hold_arg {
39 + spa_t *dduha_spa;
39 40 nvlist_t *dduha_holds;
41 + nvlist_t *dduha_chkholds;
42 + nvlist_t *dduha_tmpholds;
40 43 nvlist_t *dduha_errlist;
41 44 minor_t dduha_minor;
42 45 } dsl_dataset_user_hold_arg_t;
43 46
44 47 /*
45 48 * If you add new checks here, you may need to add additional checks to the
46 49 * "temporary" case in snapshot_check() in dmu_objset.c.
47 50 */
48 51 int
49 52 dsl_dataset_user_hold_check_one(dsl_dataset_t *ds, const char *htag,
50 53 boolean_t temphold, dmu_tx_t *tx)
51 54 {
52 55 dsl_pool_t *dp = dmu_tx_pool(tx);
53 56 objset_t *mos = dp->dp_meta_objset;
54 57 int error = 0;
55 58
59 + ASSERT(RRW_READ_HELD(&dp->dp_config_rwlock));
60 +
56 61 if (strlen(htag) > MAXNAMELEN)
57 62 return (E2BIG);
58 63 /* Tempholds have a more restricted length */
59 64 if (temphold && strlen(htag) + MAX_TAG_PREFIX_LEN >= MAXNAMELEN)
60 65 return (E2BIG);
61 66
62 67 /* tags must be unique (if ds already exists) */
63 - if (ds != NULL) {
64 - mutex_enter(&ds->ds_lock);
65 - if (ds->ds_phys->ds_userrefs_obj != 0) {
66 - uint64_t value;
67 - error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
68 - htag, 8, 1, &value);
69 - if (error == 0)
70 - error = SET_ERROR(EEXIST);
71 - else if (error == ENOENT)
72 - error = 0;
73 - }
74 - mutex_exit(&ds->ds_lock);
68 + if (ds != NULL && ds->ds_phys->ds_userrefs_obj != 0) {
69 + uint64_t value;
70 +
71 + error = zap_lookup(mos, ds->ds_phys->ds_userrefs_obj,
72 + htag, 8, 1, &value);
73 + if (error == 0)
74 + error = SET_ERROR(EEXIST);
75 + else if (error == ENOENT)
76 + error = 0;
75 77 }
76 78
77 79 return (error);
78 80 }
79 81
80 82 static int
81 83 dsl_dataset_user_hold_check(void *arg, dmu_tx_t *tx)
82 84 {
83 85 dsl_dataset_user_hold_arg_t *dduha = arg;
84 86 dsl_pool_t *dp = dmu_tx_pool(tx);
85 87 nvpair_t *pair;
86 - int rv = 0;
87 88
88 89 if (spa_version(dp->dp_spa) < SPA_VERSION_USERREFS)
89 90 return (SET_ERROR(ENOTSUP));
90 91
92 + if (!dmu_tx_is_syncing(tx))
93 + return (0);
94 +
91 95 for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
92 96 pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
93 - int error = 0;
94 97 dsl_dataset_t *ds;
95 - char *htag;
98 + int error = 0;
99 + char *htag, *name;
96 100
97 101 /* must be a snapshot */
98 - if (strchr(nvpair_name(pair), '@') == NULL)
102 + name = nvpair_name(pair);
103 + if (strchr(name, '@') == NULL)
99 104 error = SET_ERROR(EINVAL);
100 105
101 106 if (error == 0)
102 107 error = nvpair_value_string(pair, &htag);
103 - if (error == 0) {
104 - error = dsl_dataset_hold(dp,
105 - nvpair_name(pair), FTAG, &ds);
106 - }
108 +
109 + if (error == 0)
110 + error = dsl_dataset_hold(dp, name, FTAG, &ds);
111 +
107 112 if (error == 0) {
108 113 error = dsl_dataset_user_hold_check_one(ds, htag,
109 114 dduha->dduha_minor != 0, tx);
110 115 dsl_dataset_rele(ds, FTAG);
111 116 }
112 117
113 - if (error != 0) {
114 - rv = error;
115 - fnvlist_add_int32(dduha->dduha_errlist,
116 - nvpair_name(pair), error);
118 + if (error == 0) {
119 + fnvlist_add_string(dduha->dduha_chkholds, name, htag);
120 + } else {
121 + /*
122 + * We register ENOENT errors so they can be correctly
123 + * reported if needed, such as when all holds fail.
124 + */
125 + fnvlist_add_int32(dduha->dduha_errlist, name, error);
126 + if (error != ENOENT)
127 + return (error);
117 128 }
118 129 }
119 - return (rv);
130 +
131 + /* Return ENOENT if no holds would be created. */
132 + if (nvlist_next_nvpair(dduha->dduha_chkholds, NULL) == NULL)
133 + return (ENOENT);
134 +
135 + return (0);
120 136 }
121 137
122 -void
123 -dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
124 - minor_t minor, uint64_t now, dmu_tx_t *tx)
138 +
139 +static void
140 +dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds,
141 + const char *htag, minor_t minor, uint64_t now, dmu_tx_t *tx)
125 142 {
126 143 dsl_pool_t *dp = ds->ds_dir->dd_pool;
127 144 objset_t *mos = dp->dp_meta_objset;
128 145 uint64_t zapobj;
129 146
130 - mutex_enter(&ds->ds_lock);
147 + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
148 +
131 149 if (ds->ds_phys->ds_userrefs_obj == 0) {
132 150 /*
133 151 * This is the first user hold for this dataset. Create
134 152 * the userrefs zap object.
135 153 */
136 154 dmu_buf_will_dirty(ds->ds_dbuf, tx);
137 155 zapobj = ds->ds_phys->ds_userrefs_obj =
138 156 zap_create(mos, DMU_OT_USERREFS, DMU_OT_NONE, 0, tx);
139 157 } else {
140 158 zapobj = ds->ds_phys->ds_userrefs_obj;
141 159 }
142 160 ds->ds_userrefs++;
143 - mutex_exit(&ds->ds_lock);
144 161
145 162 VERIFY0(zap_add(mos, zapobj, htag, 8, 1, &now, tx));
146 163
147 164 if (minor != 0) {
165 + char name[MAXNAMELEN];
166 + nvlist_t *tags;
167 +
148 168 VERIFY0(dsl_pool_user_hold(dp, ds->ds_object,
149 169 htag, now, tx));
150 - dsl_register_onexit_hold_cleanup(ds, htag, minor);
170 + (void) snprintf(name, sizeof(name), "%llx",
171 + (u_longlong_t)ds->ds_object);
172 +
173 + if (nvlist_lookup_nvlist(tmpholds, name, &tags) != 0) {
174 + tags = fnvlist_alloc();
175 + fnvlist_add_boolean(tags, htag);
176 + fnvlist_add_nvlist(tmpholds, name, tags);
177 + fnvlist_free(tags);
178 + } else {
179 + fnvlist_add_boolean(tags, htag);
180 + }
151 181 }
152 182
153 183 spa_history_log_internal_ds(ds, "hold", tx,
154 184 "tag=%s temp=%d refs=%llu",
155 185 htag, minor != 0, ds->ds_userrefs);
156 186 }
157 187
188 +typedef struct zfs_hold_cleanup_arg {
189 + char zhca_spaname[MAXNAMELEN];
190 + uint64_t zhca_spa_load_guid;
191 + nvlist_t *zhca_holds;
192 +} zfs_hold_cleanup_arg_t;
193 +
194 +static void
195 +dsl_dataset_user_release_onexit(void *arg)
196 +{
197 + zfs_hold_cleanup_arg_t *ca = (zfs_hold_cleanup_arg_t *)arg;
198 + spa_t *spa;
199 + int error;
200 +
201 + error = spa_open(ca->zhca_spaname, &spa, FTAG);
202 + if (error != 0) {
203 + zfs_dbgmsg("couldn't release holds on pool=%s "
204 + "because pool is no longer loaded",
205 + ca->zhca_spaname);
206 + return;
207 + }
208 + if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
209 + zfs_dbgmsg("couldn't release holds on pool=%s "
210 + "because pool is no longer loaded (guid doesn't match)",
211 + ca->zhca_spaname);
212 + spa_close(spa, FTAG);
213 + return;
214 + }
215 +
216 + (void) dsl_dataset_user_release_tmp(spa_get_dsl(spa), ca->zhca_holds);
217 + fnvlist_free(ca->zhca_holds);
218 + kmem_free(ca, sizeof(zfs_hold_cleanup_arg_t));
219 + spa_close(spa, FTAG);
220 +}
221 +
222 +static void
223 +dsl_register_onexit_hold_cleanup(spa_t *spa, nvlist_t *holds, minor_t minor)
224 +{
225 + zfs_hold_cleanup_arg_t *ca;
226 +
227 + if (minor == 0 || nvlist_next_nvpair(holds, NULL) == NULL) {
228 + fnvlist_free(holds);
229 + return;
230 + }
231 +
232 + ASSERT(spa != NULL);
233 + ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
234 +
235 + (void) strlcpy(ca->zhca_spaname, spa_name(spa),
236 + sizeof (ca->zhca_spaname));
237 + ca->zhca_spa_load_guid = spa_load_guid(spa);
238 + ca->zhca_holds = holds;
239 + VERIFY0(zfs_onexit_add_cb(minor,
240 + dsl_dataset_user_release_onexit, ca, NULL));
241 +}
242 +
243 +void
244 +dsl_dataset_user_hold_sync_one(dsl_dataset_t *ds, const char *htag,
245 + minor_t minor, uint64_t now, dmu_tx_t *tx)
246 +{
247 + nvlist_t *tmpholds;
248 +
249 + tmpholds = fnvlist_alloc();
250 +
251 + dsl_dataset_user_hold_sync_one_impl(tmpholds, ds, htag, minor, now, tx);
252 + dsl_register_onexit_hold_cleanup(dsl_dataset_get_spa(ds), tmpholds,
253 + minor);
254 +}
255 +
158 256 static void
159 257 dsl_dataset_user_hold_sync(void *arg, dmu_tx_t *tx)
160 258 {
161 259 dsl_dataset_user_hold_arg_t *dduha = arg;
162 260 dsl_pool_t *dp = dmu_tx_pool(tx);
163 261 nvpair_t *pair;
164 262 uint64_t now = gethrestime_sec();
165 263
166 - for (pair = nvlist_next_nvpair(dduha->dduha_holds, NULL); pair != NULL;
167 - pair = nvlist_next_nvpair(dduha->dduha_holds, pair)) {
264 + for (pair = nvlist_next_nvpair(dduha->dduha_chkholds, NULL);
265 + pair != NULL;
266 + pair = nvlist_next_nvpair(dduha->dduha_chkholds, pair)) {
168 267 dsl_dataset_t *ds;
268 +
169 269 VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
170 - dsl_dataset_user_hold_sync_one(ds, fnvpair_value_string(pair),
171 - dduha->dduha_minor, now, tx);
270 + dsl_dataset_user_hold_sync_one_impl(dduha->dduha_tmpholds, ds,
271 + fnvpair_value_string(pair), dduha->dduha_minor, now, tx);
172 272 dsl_dataset_rele(ds, FTAG);
173 273 }
274 + dduha->dduha_spa = dp->dp_spa;
174 275 }
175 276
176 277 /*
278 + * The full semantics of this function are described in the comment above
279 + * lzc_hold().
280 + *
281 + * To summarize:
177 282 * holds is nvl of snapname -> holdname
178 283 * errlist will be filled in with snapname -> error
179 - * if cleanup_minor is not 0, the holds will be temporary, cleaned up
180 - * when the process exits.
181 284 *
182 - * if any fails, all will fail.
285 + * The snaphosts must all be in the same pool.
286 + *
287 + * Holds for snapshots that don't exist will be skipped.
288 + *
289 + * If none of the snapshots for requested holds exist then ENOENT will be
290 + * returned.
291 + *
292 + * If cleanup_minor is not 0, the holds will be temporary, which will be cleaned
293 + * up when the process exits.
294 + *
295 + * On success all the holds, for snapshots that existed, will be created and 0
296 + * will be returned.
297 + *
298 + * On failure no holds will be created, the errlist will be filled in,
299 + * and an errno will returned.
300 + *
301 + * In all cases the errlist will contain entries for holds where the snapshot
302 + * didn't exist.
183 303 */
184 304 int
185 305 dsl_dataset_user_hold(nvlist_t *holds, minor_t cleanup_minor, nvlist_t *errlist)
186 306 {
187 307 dsl_dataset_user_hold_arg_t dduha;
188 308 nvpair_t *pair;
309 + int ret;
189 310
190 311 pair = nvlist_next_nvpair(holds, NULL);
191 312 if (pair == NULL)
192 313 return (0);
193 314
315 + dduha.dduha_spa = NULL;
194 316 dduha.dduha_holds = holds;
317 + dduha.dduha_chkholds = fnvlist_alloc();
318 + dduha.dduha_tmpholds = fnvlist_alloc();
195 319 dduha.dduha_errlist = errlist;
196 320 dduha.dduha_minor = cleanup_minor;
197 321
198 - return (dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
199 - dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds)));
322 + ret = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_hold_check,
323 + dsl_dataset_user_hold_sync, &dduha, fnvlist_num_pairs(holds));
324 +
325 + /* dsl_register_onexit_hold_cleanup() always frees the passed holds. */
326 + dsl_register_onexit_hold_cleanup(dduha.dduha_spa, dduha.dduha_tmpholds,
327 + cleanup_minor);
328 + fnvlist_free(dduha.dduha_chkholds);
329 +
330 + return (ret);
200 331 }
201 332
333 +typedef int (dsl_holdfunc_t)(dsl_pool_t *dp, const char *name, void *tag,
334 + dsl_dataset_t **dsp);
335 +
202 336 typedef struct dsl_dataset_user_release_arg {
337 + dsl_holdfunc_t *ddura_holdfunc;
203 338 nvlist_t *ddura_holds;
204 339 nvlist_t *ddura_todelete;
205 340 nvlist_t *ddura_errlist;
341 + nvlist_t *ddura_chkholds;
206 342 } dsl_dataset_user_release_arg_t;
207 343
344 +/* Place a dataset hold on the snapshot identified by passed dsobj string */
345 +static int
346 +dsl_dataset_hold_obj_string(dsl_pool_t *dp, const char *dsobj, void *tag,
347 + dsl_dataset_t **dsp)
348 +{
349 + return dsl_dataset_hold_obj(dp, strtonum(dsobj, NULL), tag, dsp);
350 +}
351 +
208 352 static int
209 -dsl_dataset_user_release_check_one(dsl_dataset_t *ds,
210 - nvlist_t *holds, boolean_t *todelete)
353 +dsl_dataset_user_release_check_one(dsl_dataset_user_release_arg_t *ddura,
354 + dsl_dataset_t *ds, nvlist_t *holds, const char *name)
211 355 {
212 356 uint64_t zapobj;
213 357 nvpair_t *pair;
358 + nvlist_t *holds_found;
214 359 objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
215 - int error;
216 - int numholds = 0;
217 -
218 - *todelete = B_FALSE;
360 + int ret, numholds;
219 361
220 362 if (!dsl_dataset_is_snapshot(ds))
221 363 return (SET_ERROR(EINVAL));
222 364
223 365 zapobj = ds->ds_phys->ds_userrefs_obj;
224 366 if (zapobj == 0)
225 367 return (SET_ERROR(ESRCH));
226 368
369 + ret = 0;
370 + numholds = 0;
371 + holds_found = fnvlist_alloc();
372 +
227 373 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
228 374 pair = nvlist_next_nvpair(holds, pair)) {
229 - /* Make sure the hold exists */
230 375 uint64_t tmp;
231 - error = zap_lookup(mos, zapobj, nvpair_name(pair), 8, 1, &tmp);
376 + int error;
377 + const char *name;
378 +
379 + name = nvpair_name(pair);
380 + error = zap_lookup(mos, zapobj, name, 8, 1, &tmp);
381 +
382 + /* Non-existent holds aren't always an error. */
232 383 if (error == ENOENT)
233 - error = SET_ERROR(ESRCH);
234 - if (error != 0)
384 + continue;
385 +
386 + if (error != 0) {
387 + fnvlist_free(holds_found);
235 388 return (error);
389 + }
390 +
391 + fnvlist_add_boolean(holds_found, name);
236 392 numholds++;
237 393 }
238 394
239 395 if (DS_IS_DEFER_DESTROY(ds) && ds->ds_phys->ds_num_children == 1 &&
240 396 ds->ds_userrefs == numholds) {
241 397 /* we need to destroy the snapshot as well */
242 -
243 - if (dsl_dataset_long_held(ds))
398 + if (dsl_dataset_long_held(ds)) {
399 + fnvlist_free(holds_found);
244 400 return (SET_ERROR(EBUSY));
245 - *todelete = B_TRUE;
401 + }
402 + fnvlist_add_boolean(ddura->ddura_todelete, name);
246 403 }
247 - return (0);
404 +
405 + if (numholds == 0)
406 + ret = ENOENT;
407 + else
408 + fnvlist_add_nvlist(ddura->ddura_chkholds, name, holds_found);
409 + fnvlist_free(holds_found);
410 +
411 + return (ret);
248 412 }
249 413
250 414 static int
251 415 dsl_dataset_user_release_check(void *arg, dmu_tx_t *tx)
252 416 {
253 - dsl_dataset_user_release_arg_t *ddura = arg;
254 - dsl_pool_t *dp = dmu_tx_pool(tx);
417 + dsl_dataset_user_release_arg_t *ddura;
418 + dsl_holdfunc_t *holdfunc;
419 + dsl_pool_t *dp;
255 420 nvpair_t *pair;
256 - int rv = 0;
257 421
258 422 if (!dmu_tx_is_syncing(tx))
259 423 return (0);
260 424
425 + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
426 +
427 + dp = dmu_tx_pool(tx);
428 + ddura = (dsl_dataset_user_release_arg_t *)arg;
429 + holdfunc = ddura->ddura_holdfunc;
430 +
261 431 for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
262 432 pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
263 - const char *name = nvpair_name(pair);
433 + const char *name;
264 434 int error;
265 435 dsl_dataset_t *ds;
266 436 nvlist_t *holds;
267 437
438 + name = nvpair_name(pair);
268 439 error = nvpair_value_nvlist(pair, &holds);
269 440 if (error != 0)
270 - return (SET_ERROR(EINVAL));
271 -
272 - error = dsl_dataset_hold(dp, name, FTAG, &ds);
441 + error = (SET_ERROR(EINVAL));
442 + if (error == 0)
443 + error = holdfunc(dp, name, FTAG, &ds);
273 444 if (error == 0) {
274 - boolean_t deleteme;
275 - error = dsl_dataset_user_release_check_one(ds,
276 - holds, &deleteme);
277 - if (error == 0 && deleteme) {
278 - fnvlist_add_boolean(ddura->ddura_todelete,
279 - name);
280 - }
445 + error = dsl_dataset_user_release_check_one(ddura, ds,
446 + holds, name);
281 447 dsl_dataset_rele(ds, FTAG);
282 448 }
283 449 if (error != 0) {
284 450 if (ddura->ddura_errlist != NULL) {
285 - fnvlist_add_int32(ddura->ddura_errlist,
286 - name, error);
451 + fnvlist_add_int32(ddura->ddura_errlist, name,
452 + error);
287 453 }
288 - rv = error;
454 + /* Non-existent holds aren't always an error. */
455 + if (error != ENOENT)
456 + return (error);
289 457 }
290 458 }
291 - return (rv);
459 +
460 + /*
461 + * Return ENOENT if none of the holds existed avoiding the overhead
462 + * of a sync.
463 + */
464 + if (nvlist_next_nvpair(ddura->ddura_chkholds, NULL) == NULL)
465 + return (ENOENT);
466 +
467 + return (0);
292 468 }
293 469
294 470 static void
295 -dsl_dataset_user_release_sync_one(dsl_dataset_t *ds, nvlist_t *holds,
296 - dmu_tx_t *tx)
471 +dsl_dataset_user_release_sync_one(dsl_dataset_user_release_arg_t *ddura,
472 + dsl_dataset_t *ds, nvlist_t *holds, dmu_tx_t *tx)
297 473 {
298 474 dsl_pool_t *dp = ds->ds_dir->dd_pool;
299 475 objset_t *mos = dp->dp_meta_objset;
300 - uint64_t zapobj;
301 - int error;
302 476 nvpair_t *pair;
303 477
304 478 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
305 479 pair = nvlist_next_nvpair(holds, pair)) {
306 - ds->ds_userrefs--;
307 - error = dsl_pool_user_release(dp, ds->ds_object,
308 - nvpair_name(pair), tx);
480 + uint64_t zapobj;
481 + int error;
482 + const char *name;
483 +
484 + name = nvpair_name(pair);
485 +
486 + /* Remove temporary hold if one exists. */
487 + error = dsl_pool_user_release(dp, ds->ds_object, name, tx);
309 488 VERIFY(error == 0 || error == ENOENT);
489 +
490 + /* Remove user hold if one exists. */
310 491 zapobj = ds->ds_phys->ds_userrefs_obj;
311 - VERIFY0(zap_remove(mos, zapobj, nvpair_name(pair), tx));
492 + error = zap_remove(mos, zapobj, name, tx);
493 + if (error == ENOENT)
494 + continue;
495 + VERIFY0(error);
496 +
497 + /* Only if we removed a hold do we decrement ds_userrefs. */
498 + ds->ds_userrefs--;
312 499
313 500 spa_history_log_internal_ds(ds, "release", tx,
314 501 "tag=%s refs=%lld", nvpair_name(pair),
315 502 (longlong_t)ds->ds_userrefs);
316 503 }
317 504 }
318 505
319 506 static void
320 507 dsl_dataset_user_release_sync(void *arg, dmu_tx_t *tx)
321 508 {
322 509 dsl_dataset_user_release_arg_t *ddura = arg;
510 + dsl_holdfunc_t *holdfunc = ddura->ddura_holdfunc;
323 511 dsl_pool_t *dp = dmu_tx_pool(tx);
324 512 nvpair_t *pair;
325 513
326 - for (pair = nvlist_next_nvpair(ddura->ddura_holds, NULL); pair != NULL;
327 - pair = nvlist_next_nvpair(ddura->ddura_holds, pair)) {
514 + ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
515 +
516 + for (pair = nvlist_next_nvpair(ddura->ddura_chkholds, NULL);
517 + pair != NULL; pair = nvlist_next_nvpair(ddura->ddura_chkholds,
518 + pair)) {
328 519 dsl_dataset_t *ds;
520 + const char *name;
329 521
330 - VERIFY0(dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds));
331 - dsl_dataset_user_release_sync_one(ds,
522 + name = nvpair_name(pair);
523 + VERIFY0(holdfunc(dp, name, FTAG, &ds));
524 +
525 + dsl_dataset_user_release_sync_one(ddura, ds,
332 526 fnvpair_value_nvlist(pair), tx);
333 - if (nvlist_exists(ddura->ddura_todelete,
334 - nvpair_name(pair))) {
527 + if (nvlist_exists(ddura->ddura_todelete, name)) {
335 528 ASSERT(ds->ds_userrefs == 0 &&
336 529 ds->ds_phys->ds_num_children == 1 &&
337 530 DS_IS_DEFER_DESTROY(ds));
338 531 dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
339 532 }
340 533 dsl_dataset_rele(ds, FTAG);
341 534 }
342 535 }
343 536
344 537 /*
538 + * The full semantics of this function are described in the comment above
539 + * lzc_release().
540 + *
541 + * To summarize:
542 + * Releases holds specified in the nvl holds.
543 + *
345 544 * holds is nvl of snapname -> { holdname, ... }
346 545 * errlist will be filled in with snapname -> error
546 + *
547 + * If tmpdp is not NULL the names for holds should be the dsobj's of snapshots,
548 + * otherwise they should be the names of shapshots.
549 + *
550 + * As a release may cause snapshots to be destroyed this trys to ensure they
551 + * aren't mounted.
552 + *
553 + * The release of non-existent holds are skipped.
347 554 *
348 - * if any fails, all will fail.
555 + * At least one hold must have been released for the this function to succeed
556 + * and return 0.
349 557 */
350 -int
351 -dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
558 +static int
559 +dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
560 + dsl_pool_t *tmpdp)
352 561 {
353 562 dsl_dataset_user_release_arg_t ddura;
354 563 nvpair_t *pair;
564 + char *pool;
355 565 int error;
356 566
357 567 pair = nvlist_next_nvpair(holds, NULL);
358 568 if (pair == NULL)
359 569 return (0);
360 570
571 +#ifdef _KERNEL
572 + /*
573 + * The release may cause snapshots to be destroyed; make sure they
574 + * are not mounted.
575 + */
576 + if (tmpdp != NULL) {
577 + /* Temporary holds are specified by dsobj string. */
578 + ddura.ddura_holdfunc = dsl_dataset_hold_obj_string;
579 + pool = spa_name(tmpdp->dp_spa);
580 +
581 + dsl_pool_config_enter(tmpdp, FTAG);
582 + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
583 + pair = nvlist_next_nvpair(holds, pair)) {
584 + dsl_dataset_t *ds;
585 +
586 + error = dsl_dataset_hold_obj_string(tmpdp,
587 + nvpair_name(pair), FTAG, &ds);
588 + if (error == 0) {
589 + char name[MAXNAMELEN];
590 + dsl_dataset_name(ds, name);
591 + dsl_dataset_rele(ds, FTAG);
592 + zfs_unmount_snap(name);
593 + }
594 + }
595 + dsl_pool_config_exit(tmpdp, FTAG);
596 + } else {
597 + /* Non-temporary holds are specified by name. */
598 + ddura.ddura_holdfunc = dsl_dataset_hold;
599 + pool = nvpair_name(pair);
600 +
601 + for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
602 + pair = nvlist_next_nvpair(holds, pair)) {
603 + zfs_unmount_snap(nvpair_name(pair));
604 + }
605 + }
606 +#endif
607 +
361 608 ddura.ddura_holds = holds;
362 609 ddura.ddura_errlist = errlist;
363 610 ddura.ddura_todelete = fnvlist_alloc();
611 + ddura.ddura_chkholds = fnvlist_alloc();
364 612
365 - error = dsl_sync_task(nvpair_name(pair), dsl_dataset_user_release_check,
366 - dsl_dataset_user_release_sync, &ddura, fnvlist_num_pairs(holds));
613 + error = dsl_sync_task(pool, dsl_dataset_user_release_check,
614 + dsl_dataset_user_release_sync, &ddura,
615 + fnvlist_num_pairs(holds));
367 616 fnvlist_free(ddura.ddura_todelete);
368 - return (error);
369 -}
370 -
371 -typedef struct dsl_dataset_user_release_tmp_arg {
372 - uint64_t ddurta_dsobj;
373 - nvlist_t *ddurta_holds;
374 - boolean_t ddurta_deleteme;
375 -} dsl_dataset_user_release_tmp_arg_t;
376 -
377 -static int
378 -dsl_dataset_user_release_tmp_check(void *arg, dmu_tx_t *tx)
379 -{
380 - dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
381 - dsl_pool_t *dp = dmu_tx_pool(tx);
382 - dsl_dataset_t *ds;
383 - int error;
384 -
385 - if (!dmu_tx_is_syncing(tx))
386 - return (0);
617 + fnvlist_free(ddura.ddura_chkholds);
387 618
388 - error = dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds);
389 - if (error)
390 - return (error);
391 -
392 - error = dsl_dataset_user_release_check_one(ds,
393 - ddurta->ddurta_holds, &ddurta->ddurta_deleteme);
394 - dsl_dataset_rele(ds, FTAG);
395 619 return (error);
396 620 }
397 621
398 -static void
399 -dsl_dataset_user_release_tmp_sync(void *arg, dmu_tx_t *tx)
400 -{
401 - dsl_dataset_user_release_tmp_arg_t *ddurta = arg;
402 - dsl_pool_t *dp = dmu_tx_pool(tx);
403 - dsl_dataset_t *ds;
404 -
405 - VERIFY0(dsl_dataset_hold_obj(dp, ddurta->ddurta_dsobj, FTAG, &ds));
406 - dsl_dataset_user_release_sync_one(ds, ddurta->ddurta_holds, tx);
407 - if (ddurta->ddurta_deleteme) {
408 - ASSERT(ds->ds_userrefs == 0 &&
409 - ds->ds_phys->ds_num_children == 1 &&
410 - DS_IS_DEFER_DESTROY(ds));
411 - dsl_destroy_snapshot_sync_impl(ds, B_FALSE, tx);
412 - }
413 - dsl_dataset_rele(ds, FTAG);
414 -}
415 -
416 622 /*
417 - * Called at spa_load time to release a stale temporary user hold.
418 - * Also called by the onexit code.
623 + * holds is nvl of snapname -> { holdname, ... }
624 + * errlist will be filled in with snapname -> error
419 625 */
420 -void
421 -dsl_dataset_user_release_tmp(dsl_pool_t *dp, uint64_t dsobj, const char *htag)
422 -{
423 - dsl_dataset_user_release_tmp_arg_t ddurta;
424 - dsl_dataset_t *ds;
425 - int error;
426 -
427 -#ifdef _KERNEL
428 - /* Make sure it is not mounted. */
429 - dsl_pool_config_enter(dp, FTAG);
430 - error = dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds);
431 - if (error == 0) {
432 - char name[MAXNAMELEN];
433 - dsl_dataset_name(ds, name);
434 - dsl_dataset_rele(ds, FTAG);
435 - dsl_pool_config_exit(dp, FTAG);
436 - zfs_unmount_snap(name);
437 - } else {
438 - dsl_pool_config_exit(dp, FTAG);
439 - }
440 -#endif
441 -
442 - ddurta.ddurta_dsobj = dsobj;
443 - ddurta.ddurta_holds = fnvlist_alloc();
444 - fnvlist_add_boolean(ddurta.ddurta_holds, htag);
445 -
446 - (void) dsl_sync_task(spa_name(dp->dp_spa),
447 - dsl_dataset_user_release_tmp_check,
448 - dsl_dataset_user_release_tmp_sync, &ddurta, 1);
449 - fnvlist_free(ddurta.ddurta_holds);
450 -}
451 -
452 -typedef struct zfs_hold_cleanup_arg {
453 - char zhca_spaname[MAXNAMELEN];
454 - uint64_t zhca_spa_load_guid;
455 - uint64_t zhca_dsobj;
456 - char zhca_htag[MAXNAMELEN];
457 -} zfs_hold_cleanup_arg_t;
458 -
459 -static void
460 -dsl_dataset_user_release_onexit(void *arg)
626 +int
627 +dsl_dataset_user_release(nvlist_t *holds, nvlist_t *errlist)
461 628 {
462 - zfs_hold_cleanup_arg_t *ca = arg;
463 - spa_t *spa;
464 - int error;
465 -
466 - error = spa_open(ca->zhca_spaname, &spa, FTAG);
467 - if (error != 0) {
468 - zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
469 - "because pool is no longer loaded",
470 - ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
471 - return;
472 - }
473 - if (spa_load_guid(spa) != ca->zhca_spa_load_guid) {
474 - zfs_dbgmsg("couldn't release hold on pool=%s ds=%llu tag=%s "
475 - "because pool is no longer loaded (guid doesn't match)",
476 - ca->zhca_spaname, ca->zhca_dsobj, ca->zhca_htag);
477 - spa_close(spa, FTAG);
478 - return;
479 - }
480 -
481 - dsl_dataset_user_release_tmp(spa_get_dsl(spa),
482 - ca->zhca_dsobj, ca->zhca_htag);
483 - kmem_free(ca, sizeof (zfs_hold_cleanup_arg_t));
484 - spa_close(spa, FTAG);
629 + return dsl_dataset_user_release_impl(holds, errlist, NULL);
485 630 }
486 631
632 +/*
633 + * holds is nvl of snapdsobj -> { holdname, ... }
634 + */
487 635 void
488 -dsl_register_onexit_hold_cleanup(dsl_dataset_t *ds, const char *htag,
489 - minor_t minor)
636 +dsl_dataset_user_release_tmp(struct dsl_pool *dp, nvlist_t *holds)
490 637 {
491 - zfs_hold_cleanup_arg_t *ca = kmem_alloc(sizeof (*ca), KM_SLEEP);
492 - spa_t *spa = dsl_dataset_get_spa(ds);
493 - (void) strlcpy(ca->zhca_spaname, spa_name(spa),
494 - sizeof (ca->zhca_spaname));
495 - ca->zhca_spa_load_guid = spa_load_guid(spa);
496 - ca->zhca_dsobj = ds->ds_object;
497 - (void) strlcpy(ca->zhca_htag, htag, sizeof (ca->zhca_htag));
498 - VERIFY0(zfs_onexit_add_cb(minor,
499 - dsl_dataset_user_release_onexit, ca, NULL));
638 + ASSERT(dp != NULL);
639 + (void) dsl_dataset_user_release_impl(holds, NULL, dp);
500 640 }
501 641
502 642 int
503 643 dsl_dataset_get_holds(const char *dsname, nvlist_t *nvl)
504 644 {
505 645 dsl_pool_t *dp;
506 646 dsl_dataset_t *ds;
507 647 int err;
508 648
509 649 err = dsl_pool_hold(dsname, FTAG, &dp);
510 650 if (err != 0)
511 651 return (err);
512 652 err = dsl_dataset_hold(dp, dsname, FTAG, &ds);
513 653 if (err != 0) {
514 654 dsl_pool_rele(dp, FTAG);
515 655 return (err);
516 656 }
517 657
518 658 if (ds->ds_phys->ds_userrefs_obj != 0) {
519 659 zap_attribute_t *za;
520 660 zap_cursor_t zc;
521 661
522 662 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
523 663 for (zap_cursor_init(&zc, ds->ds_dir->dd_pool->dp_meta_objset,
524 664 ds->ds_phys->ds_userrefs_obj);
525 665 zap_cursor_retrieve(&zc, za) == 0;
526 666 zap_cursor_advance(&zc)) {
527 667 fnvlist_add_uint64(nvl, za->za_name,
528 668 za->za_first_integer);
529 669 }
530 670 zap_cursor_fini(&zc);
531 671 kmem_free(za, sizeof (zap_attribute_t));
532 672 }
533 673 dsl_dataset_rele(ds, FTAG);
534 674 dsl_pool_rele(dp, FTAG);
535 675 return (0);
536 676 }
↓ open down ↓ |
27 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX