Print this page
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/zfs/dmu_send.c
+++ new/usr/src/uts/common/fs/zfs/dmu_send.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 * Copyright (c) 2012 by Delphix. All rights reserved.
25 25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 26 */
27 27
28 28 #include <sys/dmu.h>
29 29 #include <sys/dmu_impl.h>
30 30 #include <sys/dmu_tx.h>
31 31 #include <sys/dbuf.h>
32 32 #include <sys/dnode.h>
33 33 #include <sys/zfs_context.h>
34 34 #include <sys/dmu_objset.h>
35 35 #include <sys/dmu_traverse.h>
36 36 #include <sys/dsl_dataset.h>
37 37 #include <sys/dsl_dir.h>
38 38 #include <sys/dsl_prop.h>
39 39 #include <sys/dsl_pool.h>
40 40 #include <sys/dsl_synctask.h>
41 41 #include <sys/zfs_ioctl.h>
42 42 #include <sys/zap.h>
43 43 #include <sys/zio_checksum.h>
44 44 #include <sys/zfs_znode.h>
45 45 #include <zfs_fletcher.h>
46 46 #include <sys/avl.h>
47 47 #include <sys/ddt.h>
48 48 #include <sys/zfs_onexit.h>
49 49
50 50 /* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
51 51 int zfs_send_corrupt_data = B_FALSE;
52 52
53 53 static char *dmu_recv_tag = "dmu_recv_tag";
54 54
55 55 static int
56 56 dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
57 57 {
58 58 dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
59 59 ssize_t resid; /* have to get resid to get detailed errno */
60 60 ASSERT3U(len % 8, ==, 0);
61 61
62 62 fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
63 63 dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
64 64 (caddr_t)buf, len,
65 65 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
66 66
67 67 mutex_enter(&ds->ds_sendstream_lock);
68 68 *dsp->dsa_off += len;
69 69 mutex_exit(&ds->ds_sendstream_lock);
↓ open down ↓ |
69 lines elided |
↑ open up ↑ |
70 70
71 71 return (dsp->dsa_err);
72 72 }
73 73
74 74 static int
75 75 dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
76 76 uint64_t length)
77 77 {
78 78 struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
79 79
80 + if (length != -1ULL && offset + length < offset)
81 + length = -1ULL;
82 +
80 83 /*
81 84 * If there is a pending op, but it's not PENDING_FREE, push it out,
82 85 * since free block aggregation can only be done for blocks of the
83 86 * same type (i.e., DRR_FREE records can only be aggregated with
84 87 * other DRR_FREE records. DRR_FREEOBJECTS records can only be
85 88 * aggregated with other DRR_FREEOBJECTS records.
86 89 */
87 90 if (dsp->dsa_pending_op != PENDING_NONE &&
88 91 dsp->dsa_pending_op != PENDING_FREE) {
89 92 if (dump_bytes(dsp, dsp->dsa_drr,
90 93 sizeof (dmu_replay_record_t)) != 0)
91 94 return (EINTR);
92 95 dsp->dsa_pending_op = PENDING_NONE;
93 96 }
94 97
95 98 if (dsp->dsa_pending_op == PENDING_FREE) {
96 99 /*
97 100 * There should never be a PENDING_FREE if length is -1
98 101 * (because dump_dnode is the only place where this
99 102 * function is called with a -1, and only after flushing
100 103 * any pending record).
101 104 */
102 105 ASSERT(length != -1ULL);
103 106 /*
104 107 * Check to see whether this free block can be aggregated
105 108 * with pending one.
106 109 */
107 110 if (drrf->drr_object == object && drrf->drr_offset +
108 111 drrf->drr_length == offset) {
109 112 drrf->drr_length += length;
110 113 return (0);
111 114 } else {
112 115 /* not a continuation. Push out pending record */
113 116 if (dump_bytes(dsp, dsp->dsa_drr,
114 117 sizeof (dmu_replay_record_t)) != 0)
115 118 return (EINTR);
116 119 dsp->dsa_pending_op = PENDING_NONE;
117 120 }
118 121 }
119 122 /* create a FREE record and make it pending */
120 123 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
121 124 dsp->dsa_drr->drr_type = DRR_FREE;
122 125 drrf->drr_object = object;
123 126 drrf->drr_offset = offset;
124 127 drrf->drr_length = length;
125 128 drrf->drr_toguid = dsp->dsa_toguid;
126 129 if (length == -1ULL) {
127 130 if (dump_bytes(dsp, dsp->dsa_drr,
128 131 sizeof (dmu_replay_record_t)) != 0)
129 132 return (EINTR);
130 133 } else {
131 134 dsp->dsa_pending_op = PENDING_FREE;
132 135 }
133 136
134 137 return (0);
135 138 }
136 139
137 140 static int
138 141 dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
139 142 uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data)
140 143 {
141 144 struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
142 145
143 146
144 147 /*
145 148 * If there is any kind of pending aggregation (currently either
146 149 * a grouping of free objects or free blocks), push it out to
147 150 * the stream, since aggregation can't be done across operations
148 151 * of different types.
149 152 */
150 153 if (dsp->dsa_pending_op != PENDING_NONE) {
151 154 if (dump_bytes(dsp, dsp->dsa_drr,
152 155 sizeof (dmu_replay_record_t)) != 0)
153 156 return (EINTR);
154 157 dsp->dsa_pending_op = PENDING_NONE;
155 158 }
156 159 /* write a DATA record */
157 160 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
158 161 dsp->dsa_drr->drr_type = DRR_WRITE;
159 162 drrw->drr_object = object;
160 163 drrw->drr_type = type;
161 164 drrw->drr_offset = offset;
162 165 drrw->drr_length = blksz;
163 166 drrw->drr_toguid = dsp->dsa_toguid;
164 167 drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
165 168 if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
166 169 drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
167 170 DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
168 171 DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
169 172 DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
170 173 drrw->drr_key.ddk_cksum = bp->blk_cksum;
171 174
172 175 if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
173 176 return (EINTR);
174 177 if (dump_bytes(dsp, data, blksz) != 0)
175 178 return (EINTR);
176 179 return (0);
177 180 }
178 181
179 182 static int
180 183 dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
181 184 {
182 185 struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
183 186
184 187 if (dsp->dsa_pending_op != PENDING_NONE) {
185 188 if (dump_bytes(dsp, dsp->dsa_drr,
186 189 sizeof (dmu_replay_record_t)) != 0)
187 190 return (EINTR);
188 191 dsp->dsa_pending_op = PENDING_NONE;
189 192 }
190 193
191 194 /* write a SPILL record */
192 195 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
193 196 dsp->dsa_drr->drr_type = DRR_SPILL;
194 197 drrs->drr_object = object;
195 198 drrs->drr_length = blksz;
196 199 drrs->drr_toguid = dsp->dsa_toguid;
197 200
198 201 if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)))
199 202 return (EINTR);
200 203 if (dump_bytes(dsp, data, blksz))
201 204 return (EINTR);
202 205 return (0);
203 206 }
204 207
205 208 static int
206 209 dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
207 210 {
208 211 struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
209 212
210 213 /*
211 214 * If there is a pending op, but it's not PENDING_FREEOBJECTS,
212 215 * push it out, since free block aggregation can only be done for
213 216 * blocks of the same type (i.e., DRR_FREE records can only be
214 217 * aggregated with other DRR_FREE records. DRR_FREEOBJECTS records
215 218 * can only be aggregated with other DRR_FREEOBJECTS records.
216 219 */
217 220 if (dsp->dsa_pending_op != PENDING_NONE &&
218 221 dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
219 222 if (dump_bytes(dsp, dsp->dsa_drr,
220 223 sizeof (dmu_replay_record_t)) != 0)
221 224 return (EINTR);
222 225 dsp->dsa_pending_op = PENDING_NONE;
223 226 }
224 227 if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) {
225 228 /*
226 229 * See whether this free object array can be aggregated
227 230 * with pending one
228 231 */
229 232 if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) {
230 233 drrfo->drr_numobjs += numobjs;
231 234 return (0);
232 235 } else {
233 236 /* can't be aggregated. Push out pending record */
234 237 if (dump_bytes(dsp, dsp->dsa_drr,
235 238 sizeof (dmu_replay_record_t)) != 0)
236 239 return (EINTR);
237 240 dsp->dsa_pending_op = PENDING_NONE;
238 241 }
239 242 }
240 243
241 244 /* write a FREEOBJECTS record */
242 245 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
243 246 dsp->dsa_drr->drr_type = DRR_FREEOBJECTS;
244 247 drrfo->drr_firstobj = firstobj;
245 248 drrfo->drr_numobjs = numobjs;
246 249 drrfo->drr_toguid = dsp->dsa_toguid;
247 250
248 251 dsp->dsa_pending_op = PENDING_FREEOBJECTS;
249 252
250 253 return (0);
251 254 }
252 255
253 256 static int
254 257 dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
255 258 {
256 259 struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
257 260
258 261 if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
259 262 return (dump_freeobjects(dsp, object, 1));
260 263
261 264 if (dsp->dsa_pending_op != PENDING_NONE) {
262 265 if (dump_bytes(dsp, dsp->dsa_drr,
263 266 sizeof (dmu_replay_record_t)) != 0)
264 267 return (EINTR);
265 268 dsp->dsa_pending_op = PENDING_NONE;
266 269 }
267 270
268 271 /* write an OBJECT record */
269 272 bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
270 273 dsp->dsa_drr->drr_type = DRR_OBJECT;
271 274 drro->drr_object = object;
272 275 drro->drr_type = dnp->dn_type;
273 276 drro->drr_bonustype = dnp->dn_bonustype;
274 277 drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
275 278 drro->drr_bonuslen = dnp->dn_bonuslen;
276 279 drro->drr_checksumtype = dnp->dn_checksum;
277 280 drro->drr_compress = dnp->dn_compress;
278 281 drro->drr_toguid = dsp->dsa_toguid;
279 282
280 283 if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
281 284 return (EINTR);
282 285
283 286 if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
284 287 return (EINTR);
285 288
286 289 /* free anything past the end of the file */
287 290 if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
288 291 (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL))
289 292 return (EINTR);
290 293 if (dsp->dsa_err)
291 294 return (EINTR);
292 295 return (0);
293 296 }
294 297
295 298 #define BP_SPAN(dnp, level) \
296 299 (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \
297 300 (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
298 301
299 302 /* ARGSUSED */
300 303 static int
301 304 backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
302 305 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
303 306 {
304 307 dmu_sendarg_t *dsp = arg;
305 308 dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
306 309 int err = 0;
307 310
308 311 if (issig(JUSTLOOKING) && issig(FORREAL))
309 312 return (EINTR);
310 313
311 314 if (zb->zb_object != DMU_META_DNODE_OBJECT &&
312 315 DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
313 316 return (0);
314 317 } else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) {
315 318 uint64_t span = BP_SPAN(dnp, zb->zb_level);
316 319 uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
317 320 err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT);
318 321 } else if (bp == NULL) {
319 322 uint64_t span = BP_SPAN(dnp, zb->zb_level);
320 323 err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span);
321 324 } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
322 325 return (0);
323 326 } else if (type == DMU_OT_DNODE) {
324 327 dnode_phys_t *blk;
325 328 int i;
326 329 int blksz = BP_GET_LSIZE(bp);
327 330 uint32_t aflags = ARC_WAIT;
328 331 arc_buf_t *abuf;
329 332
330 333 if (dsl_read(NULL, spa, bp, pbuf,
331 334 arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
332 335 ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
333 336 return (EIO);
334 337
335 338 blk = abuf->b_data;
336 339 for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
337 340 uint64_t dnobj = (zb->zb_blkid <<
338 341 (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
339 342 err = dump_dnode(dsp, dnobj, blk+i);
340 343 if (err)
341 344 break;
342 345 }
343 346 (void) arc_buf_remove_ref(abuf, &abuf);
344 347 } else if (type == DMU_OT_SA) {
345 348 uint32_t aflags = ARC_WAIT;
346 349 arc_buf_t *abuf;
347 350 int blksz = BP_GET_LSIZE(bp);
348 351
349 352 if (arc_read_nolock(NULL, spa, bp,
350 353 arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
351 354 ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
352 355 return (EIO);
353 356
354 357 err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
355 358 (void) arc_buf_remove_ref(abuf, &abuf);
356 359 } else { /* it's a level-0 block of a regular object */
357 360 uint32_t aflags = ARC_WAIT;
358 361 arc_buf_t *abuf;
359 362 int blksz = BP_GET_LSIZE(bp);
360 363
361 364 if (dsl_read(NULL, spa, bp, pbuf,
362 365 arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
363 366 ZIO_FLAG_CANFAIL, &aflags, zb) != 0) {
364 367 if (zfs_send_corrupt_data) {
365 368 /* Send a block filled with 0x"zfs badd bloc" */
366 369 abuf = arc_buf_alloc(spa, blksz, &abuf,
367 370 ARC_BUFC_DATA);
368 371 uint64_t *ptr;
369 372 for (ptr = abuf->b_data;
370 373 (char *)ptr < (char *)abuf->b_data + blksz;
371 374 ptr++)
372 375 *ptr = 0x2f5baddb10c;
373 376 } else {
374 377 return (EIO);
375 378 }
376 379 }
377 380
378 381 err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
379 382 blksz, bp, abuf->b_data);
380 383 (void) arc_buf_remove_ref(abuf, &abuf);
381 384 }
382 385
383 386 ASSERT(err == 0 || err == EINTR);
384 387 return (err);
385 388 }
386 389
387 390 int
388 391 dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
389 392 int outfd, vnode_t *vp, offset_t *off)
390 393 {
391 394 dsl_dataset_t *ds = tosnap->os_dsl_dataset;
392 395 dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
393 396 dmu_replay_record_t *drr;
394 397 dmu_sendarg_t *dsp;
395 398 int err;
396 399 uint64_t fromtxg = 0;
397 400
398 401 /* tosnap must be a snapshot */
399 402 if (ds->ds_phys->ds_next_snap_obj == 0)
400 403 return (EINVAL);
401 404
402 405 /* fromsnap must be an earlier snapshot from the same fs as tosnap */
403 406 if (fromds && (ds->ds_dir != fromds->ds_dir ||
404 407 fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg))
405 408 return (EXDEV);
406 409
407 410 if (fromorigin) {
408 411 dsl_pool_t *dp = ds->ds_dir->dd_pool;
409 412
410 413 if (fromsnap)
411 414 return (EINVAL);
412 415
413 416 if (dsl_dir_is_clone(ds->ds_dir)) {
414 417 rw_enter(&dp->dp_config_rwlock, RW_READER);
415 418 err = dsl_dataset_hold_obj(dp,
416 419 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds);
417 420 rw_exit(&dp->dp_config_rwlock);
418 421 if (err)
419 422 return (err);
420 423 } else {
421 424 fromorigin = B_FALSE;
422 425 }
423 426 }
424 427
425 428
426 429 drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
427 430 drr->drr_type = DRR_BEGIN;
428 431 drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
429 432 DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo,
430 433 DMU_SUBSTREAM);
431 434
432 435 #ifdef _KERNEL
433 436 if (dmu_objset_type(tosnap) == DMU_OST_ZFS) {
434 437 uint64_t version;
435 438 if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) {
436 439 kmem_free(drr, sizeof (dmu_replay_record_t));
437 440 return (EINVAL);
438 441 }
439 442 if (version == ZPL_VERSION_SA) {
440 443 DMU_SET_FEATUREFLAGS(
441 444 drr->drr_u.drr_begin.drr_versioninfo,
442 445 DMU_BACKUP_FEATURE_SA_SPILL);
443 446 }
444 447 }
445 448 #endif
446 449
447 450 drr->drr_u.drr_begin.drr_creation_time =
448 451 ds->ds_phys->ds_creation_time;
449 452 drr->drr_u.drr_begin.drr_type = tosnap->os_phys->os_type;
450 453 if (fromorigin)
451 454 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
452 455 drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid;
453 456 if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
454 457 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
455 458
456 459 if (fromds)
457 460 drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid;
458 461 dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
459 462
460 463 if (fromds)
461 464 fromtxg = fromds->ds_phys->ds_creation_txg;
462 465 if (fromorigin)
463 466 dsl_dataset_rele(fromds, FTAG);
464 467
465 468 dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
466 469
467 470 dsp->dsa_drr = drr;
468 471 dsp->dsa_vp = vp;
469 472 dsp->dsa_outfd = outfd;
470 473 dsp->dsa_proc = curproc;
471 474 dsp->dsa_os = tosnap;
472 475 dsp->dsa_off = off;
473 476 dsp->dsa_toguid = ds->ds_phys->ds_guid;
474 477 ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
475 478 dsp->dsa_pending_op = PENDING_NONE;
476 479
477 480 mutex_enter(&ds->ds_sendstream_lock);
478 481 list_insert_head(&ds->ds_sendstreams, dsp);
479 482 mutex_exit(&ds->ds_sendstream_lock);
480 483
481 484 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
482 485 err = dsp->dsa_err;
483 486 goto out;
484 487 }
485 488
486 489 err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
487 490 backup_cb, dsp);
488 491
489 492 if (dsp->dsa_pending_op != PENDING_NONE)
490 493 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
491 494 err = EINTR;
492 495
493 496 if (err) {
494 497 if (err == EINTR && dsp->dsa_err)
495 498 err = dsp->dsa_err;
496 499 goto out;
497 500 }
498 501
499 502 bzero(drr, sizeof (dmu_replay_record_t));
500 503 drr->drr_type = DRR_END;
501 504 drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
502 505 drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
503 506
504 507 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
505 508 err = dsp->dsa_err;
506 509 goto out;
507 510 }
508 511
509 512 out:
510 513 mutex_enter(&ds->ds_sendstream_lock);
511 514 list_remove(&ds->ds_sendstreams, dsp);
512 515 mutex_exit(&ds->ds_sendstream_lock);
513 516
514 517 kmem_free(drr, sizeof (dmu_replay_record_t));
515 518 kmem_free(dsp, sizeof (dmu_sendarg_t));
516 519
517 520 return (err);
518 521 }
519 522
520 523 int
521 524 dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
522 525 uint64_t *sizep)
523 526 {
524 527 dsl_dataset_t *ds = tosnap->os_dsl_dataset;
525 528 dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
526 529 dsl_pool_t *dp = ds->ds_dir->dd_pool;
527 530 int err;
528 531 uint64_t size;
529 532
530 533 /* tosnap must be a snapshot */
531 534 if (ds->ds_phys->ds_next_snap_obj == 0)
532 535 return (EINVAL);
533 536
534 537 /* fromsnap must be an earlier snapshot from the same fs as tosnap */
535 538 if (fromds && (ds->ds_dir != fromds->ds_dir ||
536 539 fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg))
537 540 return (EXDEV);
538 541
539 542 if (fromorigin) {
540 543 if (fromsnap)
541 544 return (EINVAL);
542 545
543 546 if (dsl_dir_is_clone(ds->ds_dir)) {
544 547 rw_enter(&dp->dp_config_rwlock, RW_READER);
545 548 err = dsl_dataset_hold_obj(dp,
546 549 ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds);
547 550 rw_exit(&dp->dp_config_rwlock);
548 551 if (err)
549 552 return (err);
550 553 } else {
551 554 fromorigin = B_FALSE;
552 555 }
553 556 }
554 557
555 558 /* Get uncompressed size estimate of changed data. */
556 559 if (fromds == NULL) {
557 560 size = ds->ds_phys->ds_uncompressed_bytes;
558 561 } else {
559 562 uint64_t used, comp;
560 563 err = dsl_dataset_space_written(fromds, ds,
561 564 &used, &comp, &size);
562 565 if (fromorigin)
563 566 dsl_dataset_rele(fromds, FTAG);
564 567 if (err)
565 568 return (err);
566 569 }
567 570
568 571 /*
569 572 * Assume that space (both on-disk and in-stream) is dominated by
570 573 * data. We will adjust for indirect blocks and the copies property,
571 574 * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
572 575 */
573 576
574 577 /*
575 578 * Subtract out approximate space used by indirect blocks.
576 579 * Assume most space is used by data blocks (non-indirect, non-dnode).
577 580 * Assume all blocks are recordsize. Assume ditto blocks and
578 581 * internal fragmentation counter out compression.
579 582 *
580 583 * Therefore, space used by indirect blocks is sizeof(blkptr_t) per
581 584 * block, which we observe in practice.
582 585 */
583 586 uint64_t recordsize;
584 587 rw_enter(&dp->dp_config_rwlock, RW_READER);
585 588 err = dsl_prop_get_ds(ds, "recordsize",
586 589 sizeof (recordsize), 1, &recordsize, NULL);
587 590 rw_exit(&dp->dp_config_rwlock);
588 591 if (err)
589 592 return (err);
590 593 size -= size / recordsize * sizeof (blkptr_t);
591 594
592 595 /* Add in the space for the record associated with each block. */
593 596 size += size / recordsize * sizeof (dmu_replay_record_t);
594 597
595 598 *sizep = size;
596 599
597 600 return (0);
598 601 }
599 602
600 603 struct recvbeginsyncarg {
601 604 const char *tofs;
602 605 const char *tosnap;
603 606 dsl_dataset_t *origin;
604 607 uint64_t fromguid;
605 608 dmu_objset_type_t type;
606 609 void *tag;
607 610 boolean_t force;
608 611 uint64_t dsflags;
609 612 char clonelastname[MAXNAMELEN];
610 613 dsl_dataset_t *ds; /* the ds to recv into; returned from the syncfunc */
611 614 cred_t *cr;
612 615 };
613 616
614 617 /* ARGSUSED */
615 618 static int
616 619 recv_new_check(void *arg1, void *arg2, dmu_tx_t *tx)
617 620 {
618 621 dsl_dir_t *dd = arg1;
619 622 struct recvbeginsyncarg *rbsa = arg2;
620 623 objset_t *mos = dd->dd_pool->dp_meta_objset;
621 624 uint64_t val;
622 625 int err;
623 626
624 627 err = zap_lookup(mos, dd->dd_phys->dd_child_dir_zapobj,
625 628 strrchr(rbsa->tofs, '/') + 1, sizeof (uint64_t), 1, &val);
626 629
627 630 if (err != ENOENT)
628 631 return (err ? err : EEXIST);
629 632
630 633 if (rbsa->origin) {
631 634 /* make sure it's a snap in the same pool */
632 635 if (rbsa->origin->ds_dir->dd_pool != dd->dd_pool)
633 636 return (EXDEV);
634 637 if (!dsl_dataset_is_snapshot(rbsa->origin))
635 638 return (EINVAL);
636 639 if (rbsa->origin->ds_phys->ds_guid != rbsa->fromguid)
637 640 return (ENODEV);
638 641 }
639 642
640 643 return (0);
641 644 }
642 645
643 646 static void
644 647 recv_new_sync(void *arg1, void *arg2, dmu_tx_t *tx)
645 648 {
646 649 dsl_dir_t *dd = arg1;
647 650 struct recvbeginsyncarg *rbsa = arg2;
648 651 uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
649 652 uint64_t dsobj;
650 653
651 654 /* Create and open new dataset. */
652 655 dsobj = dsl_dataset_create_sync(dd, strrchr(rbsa->tofs, '/') + 1,
653 656 rbsa->origin, flags, rbsa->cr, tx);
654 657 VERIFY(0 == dsl_dataset_own_obj(dd->dd_pool, dsobj,
655 658 B_TRUE, dmu_recv_tag, &rbsa->ds));
656 659
657 660 if (rbsa->origin == NULL) {
658 661 (void) dmu_objset_create_impl(dd->dd_pool->dp_spa,
659 662 rbsa->ds, &rbsa->ds->ds_phys->ds_bp, rbsa->type, tx);
660 663 }
661 664
662 665 spa_history_log_internal(LOG_DS_REPLAY_FULL_SYNC,
663 666 dd->dd_pool->dp_spa, tx, "dataset = %lld", dsobj);
664 667 }
665 668
666 669 /* ARGSUSED */
667 670 static int
668 671 recv_existing_check(void *arg1, void *arg2, dmu_tx_t *tx)
669 672 {
670 673 dsl_dataset_t *ds = arg1;
671 674 struct recvbeginsyncarg *rbsa = arg2;
672 675 int err;
673 676 uint64_t val;
674 677
675 678 /* must not have any changes since most recent snapshot */
676 679 if (!rbsa->force && dsl_dataset_modified_since_lastsnap(ds))
677 680 return (ETXTBSY);
678 681
679 682 /* new snapshot name must not exist */
680 683 err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
681 684 ds->ds_phys->ds_snapnames_zapobj, rbsa->tosnap, 8, 1, &val);
682 685 if (err == 0)
683 686 return (EEXIST);
684 687 if (err != ENOENT)
685 688 return (err);
686 689
687 690 if (rbsa->fromguid) {
688 691 /* if incremental, most recent snapshot must match fromguid */
689 692 if (ds->ds_prev == NULL)
690 693 return (ENODEV);
691 694
692 695 /*
693 696 * most recent snapshot must match fromguid, or there are no
694 697 * changes since the fromguid one
695 698 */
696 699 if (ds->ds_prev->ds_phys->ds_guid != rbsa->fromguid) {
697 700 uint64_t birth = ds->ds_prev->ds_phys->ds_bp.blk_birth;
698 701 uint64_t obj = ds->ds_prev->ds_phys->ds_prev_snap_obj;
699 702 while (obj != 0) {
700 703 dsl_dataset_t *snap;
701 704 err = dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
702 705 obj, FTAG, &snap);
703 706 if (err)
704 707 return (ENODEV);
705 708 if (snap->ds_phys->ds_creation_txg < birth) {
706 709 dsl_dataset_rele(snap, FTAG);
707 710 return (ENODEV);
708 711 }
709 712 if (snap->ds_phys->ds_guid == rbsa->fromguid) {
710 713 dsl_dataset_rele(snap, FTAG);
711 714 break; /* it's ok */
712 715 }
713 716 obj = snap->ds_phys->ds_prev_snap_obj;
714 717 dsl_dataset_rele(snap, FTAG);
715 718 }
716 719 if (obj == 0)
717 720 return (ENODEV);
718 721 }
719 722 } else {
720 723 /* if full, most recent snapshot must be $ORIGIN */
721 724 if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL)
722 725 return (ENODEV);
723 726 }
724 727
725 728 /* temporary clone name must not exist */
726 729 err = zap_lookup(ds->ds_dir->dd_pool->dp_meta_objset,
727 730 ds->ds_dir->dd_phys->dd_child_dir_zapobj,
728 731 rbsa->clonelastname, 8, 1, &val);
729 732 if (err == 0)
730 733 return (EEXIST);
731 734 if (err != ENOENT)
732 735 return (err);
733 736
734 737 return (0);
735 738 }
736 739
737 740 /* ARGSUSED */
738 741 static void
739 742 recv_existing_sync(void *arg1, void *arg2, dmu_tx_t *tx)
740 743 {
741 744 dsl_dataset_t *ohds = arg1;
742 745 struct recvbeginsyncarg *rbsa = arg2;
743 746 dsl_pool_t *dp = ohds->ds_dir->dd_pool;
744 747 dsl_dataset_t *cds;
745 748 uint64_t flags = DS_FLAG_INCONSISTENT | rbsa->dsflags;
746 749 uint64_t dsobj;
747 750
748 751 /* create and open the temporary clone */
749 752 dsobj = dsl_dataset_create_sync(ohds->ds_dir, rbsa->clonelastname,
750 753 ohds->ds_prev, flags, rbsa->cr, tx);
751 754 VERIFY(0 == dsl_dataset_own_obj(dp, dsobj, B_TRUE, dmu_recv_tag, &cds));
752 755
753 756 /*
754 757 * If we actually created a non-clone, we need to create the
755 758 * objset in our new dataset.
756 759 */
757 760 if (BP_IS_HOLE(dsl_dataset_get_blkptr(cds))) {
758 761 (void) dmu_objset_create_impl(dp->dp_spa,
759 762 cds, dsl_dataset_get_blkptr(cds), rbsa->type, tx);
760 763 }
761 764
762 765 rbsa->ds = cds;
763 766
764 767 spa_history_log_internal(LOG_DS_REPLAY_INC_SYNC,
765 768 dp->dp_spa, tx, "dataset = %lld", dsobj);
766 769 }
767 770
768 771 static boolean_t
769 772 dmu_recv_verify_features(dsl_dataset_t *ds, struct drr_begin *drrb)
770 773 {
771 774 int featureflags;
772 775
773 776 featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
774 777
775 778 /* Verify pool version supports SA if SA_SPILL feature set */
776 779 return ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
777 780 (spa_version(dsl_dataset_get_spa(ds)) < SPA_VERSION_SA));
778 781 }
779 782
780 783 /*
781 784 * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin()
782 785 * succeeds; otherwise we will leak the holds on the datasets.
783 786 */
784 787 int
785 788 dmu_recv_begin(char *tofs, char *tosnap, char *top_ds, struct drr_begin *drrb,
786 789 boolean_t force, objset_t *origin, dmu_recv_cookie_t *drc)
787 790 {
788 791 int err = 0;
789 792 boolean_t byteswap;
790 793 struct recvbeginsyncarg rbsa = { 0 };
791 794 uint64_t versioninfo;
792 795 int flags;
793 796 dsl_dataset_t *ds;
794 797
795 798 if (drrb->drr_magic == DMU_BACKUP_MAGIC)
796 799 byteswap = FALSE;
797 800 else if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
798 801 byteswap = TRUE;
799 802 else
800 803 return (EINVAL);
801 804
802 805 rbsa.tofs = tofs;
803 806 rbsa.tosnap = tosnap;
804 807 rbsa.origin = origin ? origin->os_dsl_dataset : NULL;
805 808 rbsa.fromguid = drrb->drr_fromguid;
806 809 rbsa.type = drrb->drr_type;
807 810 rbsa.tag = FTAG;
808 811 rbsa.dsflags = 0;
809 812 rbsa.cr = CRED();
810 813 versioninfo = drrb->drr_versioninfo;
811 814 flags = drrb->drr_flags;
812 815
813 816 if (byteswap) {
814 817 rbsa.type = BSWAP_32(rbsa.type);
815 818 rbsa.fromguid = BSWAP_64(rbsa.fromguid);
816 819 versioninfo = BSWAP_64(versioninfo);
817 820 flags = BSWAP_32(flags);
818 821 }
819 822
820 823 if (DMU_GET_STREAM_HDRTYPE(versioninfo) == DMU_COMPOUNDSTREAM ||
821 824 rbsa.type >= DMU_OST_NUMTYPES ||
822 825 ((flags & DRR_FLAG_CLONE) && origin == NULL))
823 826 return (EINVAL);
824 827
825 828 if (flags & DRR_FLAG_CI_DATA)
826 829 rbsa.dsflags = DS_FLAG_CI_DATASET;
827 830
828 831 bzero(drc, sizeof (dmu_recv_cookie_t));
829 832 drc->drc_drrb = drrb;
830 833 drc->drc_tosnap = tosnap;
831 834 drc->drc_top_ds = top_ds;
832 835 drc->drc_force = force;
833 836
834 837 /*
835 838 * Process the begin in syncing context.
836 839 */
837 840
838 841 /* open the dataset we are logically receiving into */
839 842 err = dsl_dataset_hold(tofs, dmu_recv_tag, &ds);
840 843 if (err == 0) {
841 844 if (dmu_recv_verify_features(ds, drrb)) {
842 845 dsl_dataset_rele(ds, dmu_recv_tag);
843 846 return (ENOTSUP);
844 847 }
845 848 /* target fs already exists; recv into temp clone */
846 849
847 850 /* Can't recv a clone into an existing fs */
848 851 if (flags & DRR_FLAG_CLONE) {
849 852 dsl_dataset_rele(ds, dmu_recv_tag);
850 853 return (EINVAL);
851 854 }
852 855
853 856 /* must not have an incremental recv already in progress */
854 857 if (!mutex_tryenter(&ds->ds_recvlock)) {
855 858 dsl_dataset_rele(ds, dmu_recv_tag);
856 859 return (EBUSY);
857 860 }
858 861
859 862 /* tmp clone name is: tofs/%tosnap" */
860 863 (void) snprintf(rbsa.clonelastname, sizeof (rbsa.clonelastname),
861 864 "%%%s", tosnap);
862 865 rbsa.force = force;
863 866 err = dsl_sync_task_do(ds->ds_dir->dd_pool,
864 867 recv_existing_check, recv_existing_sync, ds, &rbsa, 5);
865 868 if (err) {
866 869 mutex_exit(&ds->ds_recvlock);
867 870 dsl_dataset_rele(ds, dmu_recv_tag);
868 871 return (err);
869 872 }
870 873 drc->drc_logical_ds = ds;
871 874 drc->drc_real_ds = rbsa.ds;
872 875 } else if (err == ENOENT) {
873 876 /* target fs does not exist; must be a full backup or clone */
874 877 char *cp;
875 878
876 879 /*
877 880 * If it's a non-clone incremental, we are missing the
878 881 * target fs, so fail the recv.
879 882 */
880 883 if (rbsa.fromguid && !(flags & DRR_FLAG_CLONE))
881 884 return (ENOENT);
882 885
883 886 /* Open the parent of tofs */
884 887 cp = strrchr(tofs, '/');
885 888 *cp = '\0';
886 889 err = dsl_dataset_hold(tofs, FTAG, &ds);
887 890 *cp = '/';
888 891 if (err)
889 892 return (err);
890 893
891 894 if (dmu_recv_verify_features(ds, drrb)) {
892 895 dsl_dataset_rele(ds, FTAG);
893 896 return (ENOTSUP);
894 897 }
895 898
896 899 err = dsl_sync_task_do(ds->ds_dir->dd_pool,
897 900 recv_new_check, recv_new_sync, ds->ds_dir, &rbsa, 5);
898 901 dsl_dataset_rele(ds, FTAG);
899 902 if (err)
900 903 return (err);
901 904 drc->drc_logical_ds = drc->drc_real_ds = rbsa.ds;
902 905 drc->drc_newfs = B_TRUE;
903 906 }
904 907
905 908 return (err);
906 909 }
907 910
908 911 struct restorearg {
909 912 int err;
910 913 int byteswap;
911 914 vnode_t *vp;
912 915 char *buf;
913 916 uint64_t voff;
914 917 int bufsize; /* amount of memory allocated for buf */
915 918 zio_cksum_t cksum;
916 919 avl_tree_t *guid_to_ds_map;
917 920 };
918 921
919 922 typedef struct guid_map_entry {
920 923 uint64_t guid;
921 924 dsl_dataset_t *gme_ds;
922 925 avl_node_t avlnode;
923 926 } guid_map_entry_t;
924 927
925 928 static int
926 929 guid_compare(const void *arg1, const void *arg2)
927 930 {
928 931 const guid_map_entry_t *gmep1 = arg1;
929 932 const guid_map_entry_t *gmep2 = arg2;
930 933
931 934 if (gmep1->guid < gmep2->guid)
932 935 return (-1);
933 936 else if (gmep1->guid > gmep2->guid)
934 937 return (1);
935 938 return (0);
936 939 }
937 940
938 941 static void
939 942 free_guid_map_onexit(void *arg)
940 943 {
941 944 avl_tree_t *ca = arg;
942 945 void *cookie = NULL;
943 946 guid_map_entry_t *gmep;
944 947
945 948 while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
946 949 dsl_dataset_rele(gmep->gme_ds, ca);
947 950 kmem_free(gmep, sizeof (guid_map_entry_t));
948 951 }
949 952 avl_destroy(ca);
950 953 kmem_free(ca, sizeof (avl_tree_t));
951 954 }
952 955
953 956 static void *
954 957 restore_read(struct restorearg *ra, int len)
955 958 {
956 959 void *rv;
957 960 int done = 0;
958 961
959 962 /* some things will require 8-byte alignment, so everything must */
960 963 ASSERT3U(len % 8, ==, 0);
961 964
962 965 while (done < len) {
963 966 ssize_t resid;
964 967
965 968 ra->err = vn_rdwr(UIO_READ, ra->vp,
966 969 (caddr_t)ra->buf + done, len - done,
967 970 ra->voff, UIO_SYSSPACE, FAPPEND,
968 971 RLIM64_INFINITY, CRED(), &resid);
969 972
970 973 if (resid == len - done)
971 974 ra->err = EINVAL;
972 975 ra->voff += len - done - resid;
973 976 done = len - resid;
974 977 if (ra->err)
975 978 return (NULL);
976 979 }
977 980
978 981 ASSERT3U(done, ==, len);
979 982 rv = ra->buf;
980 983 if (ra->byteswap)
981 984 fletcher_4_incremental_byteswap(rv, len, &ra->cksum);
982 985 else
983 986 fletcher_4_incremental_native(rv, len, &ra->cksum);
984 987 return (rv);
985 988 }
986 989
987 990 static void
988 991 backup_byteswap(dmu_replay_record_t *drr)
989 992 {
990 993 #define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
991 994 #define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
992 995 drr->drr_type = BSWAP_32(drr->drr_type);
993 996 drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen);
994 997 switch (drr->drr_type) {
995 998 case DRR_BEGIN:
996 999 DO64(drr_begin.drr_magic);
997 1000 DO64(drr_begin.drr_versioninfo);
998 1001 DO64(drr_begin.drr_creation_time);
999 1002 DO32(drr_begin.drr_type);
1000 1003 DO32(drr_begin.drr_flags);
1001 1004 DO64(drr_begin.drr_toguid);
1002 1005 DO64(drr_begin.drr_fromguid);
1003 1006 break;
1004 1007 case DRR_OBJECT:
1005 1008 DO64(drr_object.drr_object);
1006 1009 /* DO64(drr_object.drr_allocation_txg); */
1007 1010 DO32(drr_object.drr_type);
1008 1011 DO32(drr_object.drr_bonustype);
1009 1012 DO32(drr_object.drr_blksz);
1010 1013 DO32(drr_object.drr_bonuslen);
1011 1014 DO64(drr_object.drr_toguid);
1012 1015 break;
1013 1016 case DRR_FREEOBJECTS:
1014 1017 DO64(drr_freeobjects.drr_firstobj);
1015 1018 DO64(drr_freeobjects.drr_numobjs);
1016 1019 DO64(drr_freeobjects.drr_toguid);
1017 1020 break;
1018 1021 case DRR_WRITE:
1019 1022 DO64(drr_write.drr_object);
1020 1023 DO32(drr_write.drr_type);
1021 1024 DO64(drr_write.drr_offset);
1022 1025 DO64(drr_write.drr_length);
1023 1026 DO64(drr_write.drr_toguid);
1024 1027 DO64(drr_write.drr_key.ddk_cksum.zc_word[0]);
1025 1028 DO64(drr_write.drr_key.ddk_cksum.zc_word[1]);
1026 1029 DO64(drr_write.drr_key.ddk_cksum.zc_word[2]);
1027 1030 DO64(drr_write.drr_key.ddk_cksum.zc_word[3]);
1028 1031 DO64(drr_write.drr_key.ddk_prop);
1029 1032 break;
1030 1033 case DRR_WRITE_BYREF:
1031 1034 DO64(drr_write_byref.drr_object);
1032 1035 DO64(drr_write_byref.drr_offset);
1033 1036 DO64(drr_write_byref.drr_length);
1034 1037 DO64(drr_write_byref.drr_toguid);
1035 1038 DO64(drr_write_byref.drr_refguid);
1036 1039 DO64(drr_write_byref.drr_refobject);
1037 1040 DO64(drr_write_byref.drr_refoffset);
1038 1041 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]);
1039 1042 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]);
1040 1043 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]);
1041 1044 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]);
1042 1045 DO64(drr_write_byref.drr_key.ddk_prop);
1043 1046 break;
1044 1047 case DRR_FREE:
1045 1048 DO64(drr_free.drr_object);
1046 1049 DO64(drr_free.drr_offset);
1047 1050 DO64(drr_free.drr_length);
1048 1051 DO64(drr_free.drr_toguid);
1049 1052 break;
1050 1053 case DRR_SPILL:
1051 1054 DO64(drr_spill.drr_object);
1052 1055 DO64(drr_spill.drr_length);
1053 1056 DO64(drr_spill.drr_toguid);
1054 1057 break;
1055 1058 case DRR_END:
1056 1059 DO64(drr_end.drr_checksum.zc_word[0]);
1057 1060 DO64(drr_end.drr_checksum.zc_word[1]);
1058 1061 DO64(drr_end.drr_checksum.zc_word[2]);
1059 1062 DO64(drr_end.drr_checksum.zc_word[3]);
1060 1063 DO64(drr_end.drr_toguid);
1061 1064 break;
1062 1065 }
1063 1066 #undef DO64
1064 1067 #undef DO32
1065 1068 }
1066 1069
1067 1070 static int
1068 1071 restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
1069 1072 {
1070 1073 int err;
1071 1074 dmu_tx_t *tx;
1072 1075 void *data = NULL;
1073 1076
1074 1077 if (drro->drr_type == DMU_OT_NONE ||
1075 1078 !DMU_OT_IS_VALID(drro->drr_type) ||
1076 1079 !DMU_OT_IS_VALID(drro->drr_bonustype) ||
1077 1080 drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS ||
1078 1081 drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
1079 1082 P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
1080 1083 drro->drr_blksz < SPA_MINBLOCKSIZE ||
1081 1084 drro->drr_blksz > SPA_MAXBLOCKSIZE ||
1082 1085 drro->drr_bonuslen > DN_MAX_BONUSLEN) {
1083 1086 return (EINVAL);
1084 1087 }
1085 1088
1086 1089 err = dmu_object_info(os, drro->drr_object, NULL);
1087 1090
1088 1091 if (err != 0 && err != ENOENT)
1089 1092 return (EINVAL);
1090 1093
1091 1094 if (drro->drr_bonuslen) {
1092 1095 data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8));
1093 1096 if (ra->err)
1094 1097 return (ra->err);
1095 1098 }
1096 1099
1097 1100 if (err == ENOENT) {
1098 1101 /* currently free, want to be allocated */
1099 1102 tx = dmu_tx_create(os);
1100 1103 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
1101 1104 err = dmu_tx_assign(tx, TXG_WAIT);
1102 1105 if (err) {
1103 1106 dmu_tx_abort(tx);
1104 1107 return (err);
1105 1108 }
1106 1109 err = dmu_object_claim(os, drro->drr_object,
1107 1110 drro->drr_type, drro->drr_blksz,
1108 1111 drro->drr_bonustype, drro->drr_bonuslen, tx);
1109 1112 dmu_tx_commit(tx);
1110 1113 } else {
1111 1114 /* currently allocated, want to be allocated */
1112 1115 err = dmu_object_reclaim(os, drro->drr_object,
1113 1116 drro->drr_type, drro->drr_blksz,
1114 1117 drro->drr_bonustype, drro->drr_bonuslen);
1115 1118 }
1116 1119 if (err) {
1117 1120 return (EINVAL);
1118 1121 }
1119 1122
1120 1123 tx = dmu_tx_create(os);
1121 1124 dmu_tx_hold_bonus(tx, drro->drr_object);
1122 1125 err = dmu_tx_assign(tx, TXG_WAIT);
1123 1126 if (err) {
1124 1127 dmu_tx_abort(tx);
1125 1128 return (err);
1126 1129 }
1127 1130
1128 1131 dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype,
1129 1132 tx);
1130 1133 dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);
1131 1134
1132 1135 if (data != NULL) {
1133 1136 dmu_buf_t *db;
1134 1137
1135 1138 VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db));
1136 1139 dmu_buf_will_dirty(db, tx);
1137 1140
1138 1141 ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
1139 1142 bcopy(data, db->db_data, drro->drr_bonuslen);
1140 1143 if (ra->byteswap) {
1141 1144 dmu_object_byteswap_t byteswap =
1142 1145 DMU_OT_BYTESWAP(drro->drr_bonustype);
1143 1146 dmu_ot_byteswap[byteswap].ob_func(db->db_data,
1144 1147 drro->drr_bonuslen);
1145 1148 }
1146 1149 dmu_buf_rele(db, FTAG);
1147 1150 }
1148 1151 dmu_tx_commit(tx);
1149 1152 return (0);
1150 1153 }
1151 1154
1152 1155 /* ARGSUSED */
1153 1156 static int
1154 1157 restore_freeobjects(struct restorearg *ra, objset_t *os,
1155 1158 struct drr_freeobjects *drrfo)
1156 1159 {
1157 1160 uint64_t obj;
1158 1161
1159 1162 if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj)
1160 1163 return (EINVAL);
1161 1164
1162 1165 for (obj = drrfo->drr_firstobj;
1163 1166 obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
1164 1167 (void) dmu_object_next(os, &obj, FALSE, 0)) {
1165 1168 int err;
1166 1169
1167 1170 if (dmu_object_info(os, obj, NULL) != 0)
1168 1171 continue;
1169 1172
1170 1173 err = dmu_free_object(os, obj);
1171 1174 if (err)
1172 1175 return (err);
1173 1176 }
1174 1177 return (0);
1175 1178 }
1176 1179
1177 1180 static int
1178 1181 restore_write(struct restorearg *ra, objset_t *os,
1179 1182 struct drr_write *drrw)
1180 1183 {
1181 1184 dmu_tx_t *tx;
1182 1185 void *data;
1183 1186 int err;
1184 1187
1185 1188 if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
1186 1189 !DMU_OT_IS_VALID(drrw->drr_type))
1187 1190 return (EINVAL);
1188 1191
1189 1192 data = restore_read(ra, drrw->drr_length);
1190 1193 if (data == NULL)
1191 1194 return (ra->err);
1192 1195
1193 1196 if (dmu_object_info(os, drrw->drr_object, NULL) != 0)
1194 1197 return (EINVAL);
1195 1198
1196 1199 tx = dmu_tx_create(os);
1197 1200
1198 1201 dmu_tx_hold_write(tx, drrw->drr_object,
1199 1202 drrw->drr_offset, drrw->drr_length);
1200 1203 err = dmu_tx_assign(tx, TXG_WAIT);
1201 1204 if (err) {
1202 1205 dmu_tx_abort(tx);
1203 1206 return (err);
1204 1207 }
1205 1208 if (ra->byteswap) {
1206 1209 dmu_object_byteswap_t byteswap =
1207 1210 DMU_OT_BYTESWAP(drrw->drr_type);
1208 1211 dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
1209 1212 }
1210 1213 dmu_write(os, drrw->drr_object,
1211 1214 drrw->drr_offset, drrw->drr_length, data, tx);
1212 1215 dmu_tx_commit(tx);
1213 1216 return (0);
1214 1217 }
1215 1218
1216 1219 /*
1217 1220 * Handle a DRR_WRITE_BYREF record. This record is used in dedup'ed
1218 1221 * streams to refer to a copy of the data that is already on the
1219 1222 * system because it came in earlier in the stream. This function
1220 1223 * finds the earlier copy of the data, and uses that copy instead of
1221 1224 * data from the stream to fulfill this write.
1222 1225 */
1223 1226 static int
1224 1227 restore_write_byref(struct restorearg *ra, objset_t *os,
1225 1228 struct drr_write_byref *drrwbr)
1226 1229 {
1227 1230 dmu_tx_t *tx;
1228 1231 int err;
1229 1232 guid_map_entry_t gmesrch;
1230 1233 guid_map_entry_t *gmep;
1231 1234 avl_index_t where;
1232 1235 objset_t *ref_os = NULL;
1233 1236 dmu_buf_t *dbp;
1234 1237
1235 1238 if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset)
1236 1239 return (EINVAL);
1237 1240
1238 1241 /*
1239 1242 * If the GUID of the referenced dataset is different from the
1240 1243 * GUID of the target dataset, find the referenced dataset.
1241 1244 */
1242 1245 if (drrwbr->drr_toguid != drrwbr->drr_refguid) {
1243 1246 gmesrch.guid = drrwbr->drr_refguid;
1244 1247 if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch,
1245 1248 &where)) == NULL) {
1246 1249 return (EINVAL);
1247 1250 }
1248 1251 if (dmu_objset_from_ds(gmep->gme_ds, &ref_os))
1249 1252 return (EINVAL);
1250 1253 } else {
1251 1254 ref_os = os;
1252 1255 }
1253 1256
1254 1257 if (err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
1255 1258 drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH))
1256 1259 return (err);
1257 1260
1258 1261 tx = dmu_tx_create(os);
1259 1262
1260 1263 dmu_tx_hold_write(tx, drrwbr->drr_object,
1261 1264 drrwbr->drr_offset, drrwbr->drr_length);
1262 1265 err = dmu_tx_assign(tx, TXG_WAIT);
1263 1266 if (err) {
1264 1267 dmu_tx_abort(tx);
1265 1268 return (err);
1266 1269 }
1267 1270 dmu_write(os, drrwbr->drr_object,
1268 1271 drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
1269 1272 dmu_buf_rele(dbp, FTAG);
1270 1273 dmu_tx_commit(tx);
1271 1274 return (0);
1272 1275 }
1273 1276
1274 1277 static int
1275 1278 restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
1276 1279 {
1277 1280 dmu_tx_t *tx;
1278 1281 void *data;
1279 1282 dmu_buf_t *db, *db_spill;
1280 1283 int err;
1281 1284
1282 1285 if (drrs->drr_length < SPA_MINBLOCKSIZE ||
1283 1286 drrs->drr_length > SPA_MAXBLOCKSIZE)
1284 1287 return (EINVAL);
1285 1288
1286 1289 data = restore_read(ra, drrs->drr_length);
1287 1290 if (data == NULL)
1288 1291 return (ra->err);
1289 1292
1290 1293 if (dmu_object_info(os, drrs->drr_object, NULL) != 0)
1291 1294 return (EINVAL);
1292 1295
1293 1296 VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db));
1294 1297 if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) {
1295 1298 dmu_buf_rele(db, FTAG);
1296 1299 return (err);
1297 1300 }
1298 1301
1299 1302 tx = dmu_tx_create(os);
1300 1303
1301 1304 dmu_tx_hold_spill(tx, db->db_object);
1302 1305
1303 1306 err = dmu_tx_assign(tx, TXG_WAIT);
1304 1307 if (err) {
1305 1308 dmu_buf_rele(db, FTAG);
1306 1309 dmu_buf_rele(db_spill, FTAG);
1307 1310 dmu_tx_abort(tx);
1308 1311 return (err);
1309 1312 }
1310 1313 dmu_buf_will_dirty(db_spill, tx);
1311 1314
1312 1315 if (db_spill->db_size < drrs->drr_length)
1313 1316 VERIFY(0 == dbuf_spill_set_blksz(db_spill,
1314 1317 drrs->drr_length, tx));
1315 1318 bcopy(data, db_spill->db_data, drrs->drr_length);
1316 1319
1317 1320 dmu_buf_rele(db, FTAG);
1318 1321 dmu_buf_rele(db_spill, FTAG);
1319 1322
1320 1323 dmu_tx_commit(tx);
1321 1324 return (0);
1322 1325 }
1323 1326
1324 1327 /* ARGSUSED */
1325 1328 static int
1326 1329 restore_free(struct restorearg *ra, objset_t *os,
1327 1330 struct drr_free *drrf)
1328 1331 {
1329 1332 int err;
1330 1333
1331 1334 if (drrf->drr_length != -1ULL &&
1332 1335 drrf->drr_offset + drrf->drr_length < drrf->drr_offset)
1333 1336 return (EINVAL);
1334 1337
1335 1338 if (dmu_object_info(os, drrf->drr_object, NULL) != 0)
1336 1339 return (EINVAL);
1337 1340
1338 1341 err = dmu_free_long_range(os, drrf->drr_object,
1339 1342 drrf->drr_offset, drrf->drr_length);
1340 1343 return (err);
1341 1344 }
1342 1345
1343 1346 /*
1344 1347 * NB: callers *must* call dmu_recv_end() if this succeeds.
1345 1348 */
1346 1349 int
1347 1350 dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
1348 1351 int cleanup_fd, uint64_t *action_handlep)
1349 1352 {
1350 1353 struct restorearg ra = { 0 };
1351 1354 dmu_replay_record_t *drr;
1352 1355 objset_t *os;
1353 1356 zio_cksum_t pcksum;
1354 1357 int featureflags;
1355 1358
1356 1359 if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
1357 1360 ra.byteswap = TRUE;
1358 1361
1359 1362 {
1360 1363 /* compute checksum of drr_begin record */
1361 1364 dmu_replay_record_t *drr;
1362 1365 drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
1363 1366
1364 1367 drr->drr_type = DRR_BEGIN;
1365 1368 drr->drr_u.drr_begin = *drc->drc_drrb;
1366 1369 if (ra.byteswap) {
1367 1370 fletcher_4_incremental_byteswap(drr,
1368 1371 sizeof (dmu_replay_record_t), &ra.cksum);
1369 1372 } else {
1370 1373 fletcher_4_incremental_native(drr,
1371 1374 sizeof (dmu_replay_record_t), &ra.cksum);
1372 1375 }
1373 1376 kmem_free(drr, sizeof (dmu_replay_record_t));
1374 1377 }
1375 1378
1376 1379 if (ra.byteswap) {
1377 1380 struct drr_begin *drrb = drc->drc_drrb;
1378 1381 drrb->drr_magic = BSWAP_64(drrb->drr_magic);
1379 1382 drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
1380 1383 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
1381 1384 drrb->drr_type = BSWAP_32(drrb->drr_type);
1382 1385 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
1383 1386 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
1384 1387 }
1385 1388
1386 1389 ra.vp = vp;
1387 1390 ra.voff = *voffp;
1388 1391 ra.bufsize = 1<<20;
1389 1392 ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP);
1390 1393
1391 1394 /* these were verified in dmu_recv_begin */
1392 1395 ASSERT(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo) ==
1393 1396 DMU_SUBSTREAM);
1394 1397 ASSERT(drc->drc_drrb->drr_type < DMU_OST_NUMTYPES);
1395 1398
1396 1399 /*
1397 1400 * Open the objset we are modifying.
1398 1401 */
1399 1402 VERIFY(dmu_objset_from_ds(drc->drc_real_ds, &os) == 0);
1400 1403
1401 1404 ASSERT(drc->drc_real_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT);
1402 1405
1403 1406 featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo);
1404 1407
1405 1408 /* if this stream is dedup'ed, set up the avl tree for guid mapping */
1406 1409 if (featureflags & DMU_BACKUP_FEATURE_DEDUP) {
1407 1410 minor_t minor;
1408 1411
1409 1412 if (cleanup_fd == -1) {
1410 1413 ra.err = EBADF;
1411 1414 goto out;
1412 1415 }
1413 1416 ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor);
1414 1417 if (ra.err) {
1415 1418 cleanup_fd = -1;
1416 1419 goto out;
1417 1420 }
1418 1421
1419 1422 if (*action_handlep == 0) {
1420 1423 ra.guid_to_ds_map =
1421 1424 kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
1422 1425 avl_create(ra.guid_to_ds_map, guid_compare,
1423 1426 sizeof (guid_map_entry_t),
1424 1427 offsetof(guid_map_entry_t, avlnode));
1425 1428 ra.err = zfs_onexit_add_cb(minor,
1426 1429 free_guid_map_onexit, ra.guid_to_ds_map,
1427 1430 action_handlep);
1428 1431 if (ra.err)
1429 1432 goto out;
1430 1433 } else {
1431 1434 ra.err = zfs_onexit_cb_data(minor, *action_handlep,
1432 1435 (void **)&ra.guid_to_ds_map);
1433 1436 if (ra.err)
1434 1437 goto out;
1435 1438 }
1436 1439
1437 1440 drc->drc_guid_to_ds_map = ra.guid_to_ds_map;
1438 1441 }
1439 1442
1440 1443 /*
1441 1444 * Read records and process them.
1442 1445 */
1443 1446 pcksum = ra.cksum;
1444 1447 while (ra.err == 0 &&
1445 1448 NULL != (drr = restore_read(&ra, sizeof (*drr)))) {
1446 1449 if (issig(JUSTLOOKING) && issig(FORREAL)) {
1447 1450 ra.err = EINTR;
1448 1451 goto out;
1449 1452 }
1450 1453
1451 1454 if (ra.byteswap)
1452 1455 backup_byteswap(drr);
1453 1456
1454 1457 switch (drr->drr_type) {
1455 1458 case DRR_OBJECT:
1456 1459 {
1457 1460 /*
1458 1461 * We need to make a copy of the record header,
1459 1462 * because restore_{object,write} may need to
1460 1463 * restore_read(), which will invalidate drr.
1461 1464 */
1462 1465 struct drr_object drro = drr->drr_u.drr_object;
1463 1466 ra.err = restore_object(&ra, os, &drro);
1464 1467 break;
1465 1468 }
1466 1469 case DRR_FREEOBJECTS:
1467 1470 {
1468 1471 struct drr_freeobjects drrfo =
1469 1472 drr->drr_u.drr_freeobjects;
1470 1473 ra.err = restore_freeobjects(&ra, os, &drrfo);
1471 1474 break;
1472 1475 }
1473 1476 case DRR_WRITE:
1474 1477 {
1475 1478 struct drr_write drrw = drr->drr_u.drr_write;
1476 1479 ra.err = restore_write(&ra, os, &drrw);
1477 1480 break;
1478 1481 }
1479 1482 case DRR_WRITE_BYREF:
1480 1483 {
1481 1484 struct drr_write_byref drrwbr =
1482 1485 drr->drr_u.drr_write_byref;
1483 1486 ra.err = restore_write_byref(&ra, os, &drrwbr);
1484 1487 break;
1485 1488 }
1486 1489 case DRR_FREE:
1487 1490 {
1488 1491 struct drr_free drrf = drr->drr_u.drr_free;
1489 1492 ra.err = restore_free(&ra, os, &drrf);
1490 1493 break;
1491 1494 }
1492 1495 case DRR_END:
1493 1496 {
1494 1497 struct drr_end drre = drr->drr_u.drr_end;
1495 1498 /*
1496 1499 * We compare against the *previous* checksum
1497 1500 * value, because the stored checksum is of
1498 1501 * everything before the DRR_END record.
1499 1502 */
1500 1503 if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum))
1501 1504 ra.err = ECKSUM;
1502 1505 goto out;
1503 1506 }
1504 1507 case DRR_SPILL:
1505 1508 {
1506 1509 struct drr_spill drrs = drr->drr_u.drr_spill;
1507 1510 ra.err = restore_spill(&ra, os, &drrs);
1508 1511 break;
1509 1512 }
1510 1513 default:
1511 1514 ra.err = EINVAL;
1512 1515 goto out;
1513 1516 }
1514 1517 pcksum = ra.cksum;
1515 1518 }
1516 1519 ASSERT(ra.err != 0);
1517 1520
1518 1521 out:
1519 1522 if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
1520 1523 zfs_onexit_fd_rele(cleanup_fd);
1521 1524
1522 1525 if (ra.err != 0) {
1523 1526 /*
1524 1527 * destroy what we created, so we don't leave it in the
1525 1528 * inconsistent restoring state.
1526 1529 */
1527 1530 txg_wait_synced(drc->drc_real_ds->ds_dir->dd_pool, 0);
1528 1531
1529 1532 (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
1530 1533 B_FALSE);
1531 1534 if (drc->drc_real_ds != drc->drc_logical_ds) {
1532 1535 mutex_exit(&drc->drc_logical_ds->ds_recvlock);
1533 1536 dsl_dataset_rele(drc->drc_logical_ds, dmu_recv_tag);
1534 1537 }
1535 1538 }
1536 1539
1537 1540 kmem_free(ra.buf, ra.bufsize);
1538 1541 *voffp = ra.voff;
1539 1542 return (ra.err);
1540 1543 }
1541 1544
1542 1545 struct recvendsyncarg {
1543 1546 char *tosnap;
1544 1547 uint64_t creation_time;
1545 1548 uint64_t toguid;
1546 1549 };
1547 1550
1548 1551 static int
1549 1552 recv_end_check(void *arg1, void *arg2, dmu_tx_t *tx)
1550 1553 {
1551 1554 dsl_dataset_t *ds = arg1;
1552 1555 struct recvendsyncarg *resa = arg2;
1553 1556
1554 1557 return (dsl_dataset_snapshot_check(ds, resa->tosnap, tx));
1555 1558 }
1556 1559
1557 1560 static void
1558 1561 recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx)
1559 1562 {
1560 1563 dsl_dataset_t *ds = arg1;
1561 1564 struct recvendsyncarg *resa = arg2;
1562 1565
1563 1566 dsl_dataset_snapshot_sync(ds, resa->tosnap, tx);
1564 1567
1565 1568 /* set snapshot's creation time and guid */
1566 1569 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1567 1570 ds->ds_prev->ds_phys->ds_creation_time = resa->creation_time;
1568 1571 ds->ds_prev->ds_phys->ds_guid = resa->toguid;
1569 1572 ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
1570 1573
1571 1574 dmu_buf_will_dirty(ds->ds_dbuf, tx);
1572 1575 ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
1573 1576 }
1574 1577
1575 1578 static int
1576 1579 add_ds_to_guidmap(avl_tree_t *guid_map, dsl_dataset_t *ds)
1577 1580 {
1578 1581 dsl_pool_t *dp = ds->ds_dir->dd_pool;
1579 1582 uint64_t snapobj = ds->ds_phys->ds_prev_snap_obj;
1580 1583 dsl_dataset_t *snapds;
1581 1584 guid_map_entry_t *gmep;
1582 1585 int err;
1583 1586
1584 1587 ASSERT(guid_map != NULL);
1585 1588
1586 1589 rw_enter(&dp->dp_config_rwlock, RW_READER);
1587 1590 err = dsl_dataset_hold_obj(dp, snapobj, guid_map, &snapds);
1588 1591 if (err == 0) {
1589 1592 gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP);
1590 1593 gmep->guid = snapds->ds_phys->ds_guid;
1591 1594 gmep->gme_ds = snapds;
1592 1595 avl_add(guid_map, gmep);
1593 1596 }
1594 1597
1595 1598 rw_exit(&dp->dp_config_rwlock);
1596 1599 return (err);
1597 1600 }
1598 1601
1599 1602 static int
1600 1603 dmu_recv_existing_end(dmu_recv_cookie_t *drc)
1601 1604 {
1602 1605 struct recvendsyncarg resa;
1603 1606 dsl_dataset_t *ds = drc->drc_logical_ds;
1604 1607 int err, myerr;
1605 1608
1606 1609 /*
1607 1610 * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
1608 1611 * expects it to have a ds_user_ptr (and zil), but clone_swap()
1609 1612 * can close it.
1610 1613 */
1611 1614 txg_wait_synced(ds->ds_dir->dd_pool, 0);
1612 1615
1613 1616 if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) {
1614 1617 err = dsl_dataset_clone_swap(drc->drc_real_ds, ds,
1615 1618 drc->drc_force);
1616 1619 if (err)
1617 1620 goto out;
1618 1621 } else {
1619 1622 mutex_exit(&ds->ds_recvlock);
1620 1623 dsl_dataset_rele(ds, dmu_recv_tag);
1621 1624 (void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag,
1622 1625 B_FALSE);
1623 1626 return (EBUSY);
1624 1627 }
1625 1628
1626 1629 resa.creation_time = drc->drc_drrb->drr_creation_time;
1627 1630 resa.toguid = drc->drc_drrb->drr_toguid;
1628 1631 resa.tosnap = drc->drc_tosnap;
1629 1632
1630 1633 err = dsl_sync_task_do(ds->ds_dir->dd_pool,
1631 1634 recv_end_check, recv_end_sync, ds, &resa, 3);
1632 1635 if (err) {
1633 1636 /* swap back */
1634 1637 (void) dsl_dataset_clone_swap(drc->drc_real_ds, ds, B_TRUE);
1635 1638 }
1636 1639
1637 1640 out:
1638 1641 mutex_exit(&ds->ds_recvlock);
1639 1642 if (err == 0 && drc->drc_guid_to_ds_map != NULL)
1640 1643 (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
1641 1644 dsl_dataset_disown(ds, dmu_recv_tag);
1642 1645 myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE);
1643 1646 ASSERT3U(myerr, ==, 0);
1644 1647 return (err);
1645 1648 }
1646 1649
1647 1650 static int
1648 1651 dmu_recv_new_end(dmu_recv_cookie_t *drc)
1649 1652 {
1650 1653 struct recvendsyncarg resa;
1651 1654 dsl_dataset_t *ds = drc->drc_logical_ds;
1652 1655 int err;
1653 1656
1654 1657 /*
1655 1658 * XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
1656 1659 * expects it to have a ds_user_ptr (and zil), but clone_swap()
1657 1660 * can close it.
1658 1661 */
1659 1662 txg_wait_synced(ds->ds_dir->dd_pool, 0);
1660 1663
1661 1664 resa.creation_time = drc->drc_drrb->drr_creation_time;
1662 1665 resa.toguid = drc->drc_drrb->drr_toguid;
1663 1666 resa.tosnap = drc->drc_tosnap;
1664 1667
1665 1668 err = dsl_sync_task_do(ds->ds_dir->dd_pool,
1666 1669 recv_end_check, recv_end_sync, ds, &resa, 3);
1667 1670 if (err) {
1668 1671 /* clean up the fs we just recv'd into */
1669 1672 (void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE);
1670 1673 } else {
1671 1674 if (drc->drc_guid_to_ds_map != NULL)
1672 1675 (void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
1673 1676 /* release the hold from dmu_recv_begin */
1674 1677 dsl_dataset_disown(ds, dmu_recv_tag);
1675 1678 }
1676 1679 return (err);
1677 1680 }
1678 1681
1679 1682 int
1680 1683 dmu_recv_end(dmu_recv_cookie_t *drc)
1681 1684 {
1682 1685 if (drc->drc_logical_ds != drc->drc_real_ds)
1683 1686 return (dmu_recv_existing_end(drc));
1684 1687 else
1685 1688 return (dmu_recv_new_end(drc));
1686 1689 }
↓ open down ↓ |
1597 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX