Print this page
3909 Fix hang when sending dedup stream
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libzfs/common/libzfs_sendrecv.c
+++ new/usr/src/lib/libzfs/common/libzfs_sendrecv.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright (c) 2012 by Delphix. All rights reserved.
25 25 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
26 26 * Copyright (c) 2013 Steven Hartland. All rights reserved.
27 27 */
28 28
29 29 #include <assert.h>
30 30 #include <ctype.h>
31 31 #include <errno.h>
32 32 #include <libintl.h>
33 33 #include <stdio.h>
34 34 #include <stdlib.h>
35 35 #include <strings.h>
36 36 #include <unistd.h>
37 37 #include <stddef.h>
38 38 #include <fcntl.h>
39 39 #include <sys/mount.h>
40 40 #include <pthread.h>
41 41 #include <umem.h>
42 42 #include <time.h>
43 43
44 44 #include <libzfs.h>
45 45
46 46 #include "zfs_namecheck.h"
47 47 #include "zfs_prop.h"
48 48 #include "zfs_fletcher.h"
49 49 #include "libzfs_impl.h"
50 50 #include <sha2.h>
51 51 #include <sys/zio_checksum.h>
52 52 #include <sys/ddt.h>
53 53
54 54 /* in libzfs_dataset.c */
55 55 extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
56 56
57 57 static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *,
58 58 int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
59 59
60 60 static const zio_cksum_t zero_cksum = { 0 };
61 61
62 62 typedef struct dedup_arg {
63 63 int inputfd;
64 64 int outputfd;
65 65 libzfs_handle_t *dedup_hdl;
66 66 } dedup_arg_t;
67 67
68 68 typedef struct progress_arg {
69 69 zfs_handle_t *pa_zhp;
70 70 int pa_fd;
71 71 boolean_t pa_parsable;
72 72 } progress_arg_t;
73 73
74 74 typedef struct dataref {
75 75 uint64_t ref_guid;
76 76 uint64_t ref_object;
77 77 uint64_t ref_offset;
78 78 } dataref_t;
79 79
80 80 typedef struct dedup_entry {
81 81 struct dedup_entry *dde_next;
82 82 zio_cksum_t dde_chksum;
83 83 uint64_t dde_prop;
84 84 dataref_t dde_ref;
85 85 } dedup_entry_t;
86 86
87 87 #define MAX_DDT_PHYSMEM_PERCENT 20
88 88 #define SMALLEST_POSSIBLE_MAX_DDT_MB 128
89 89
90 90 typedef struct dedup_table {
91 91 dedup_entry_t **dedup_hash_array;
92 92 umem_cache_t *ddecache;
93 93 uint64_t max_ddt_size; /* max dedup table size in bytes */
94 94 uint64_t cur_ddt_size; /* current dedup table size in bytes */
95 95 uint64_t ddt_count;
96 96 int numhashbits;
97 97 boolean_t ddt_full;
98 98 } dedup_table_t;
99 99
100 100 static int
101 101 high_order_bit(uint64_t n)
102 102 {
103 103 int count;
104 104
105 105 for (count = 0; n != 0; count++)
106 106 n >>= 1;
107 107 return (count);
108 108 }
109 109
110 110 static size_t
111 111 ssread(void *buf, size_t len, FILE *stream)
112 112 {
113 113 size_t outlen;
114 114
115 115 if ((outlen = fread(buf, len, 1, stream)) == 0)
116 116 return (0);
117 117
118 118 return (outlen);
119 119 }
120 120
121 121 static void
122 122 ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
123 123 zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
124 124 {
125 125 dedup_entry_t *dde;
126 126
127 127 if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
128 128 if (ddt->ddt_full == B_FALSE) {
129 129 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
130 130 "Dedup table full. Deduplication will continue "
131 131 "with existing table entries"));
132 132 ddt->ddt_full = B_TRUE;
133 133 }
134 134 return;
135 135 }
136 136
137 137 if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
138 138 != NULL) {
139 139 assert(*ddepp == NULL);
140 140 dde->dde_next = NULL;
141 141 dde->dde_chksum = *cs;
142 142 dde->dde_prop = prop;
143 143 dde->dde_ref = *dr;
144 144 *ddepp = dde;
145 145 ddt->cur_ddt_size += sizeof (dedup_entry_t);
146 146 ddt->ddt_count++;
147 147 }
148 148 }
149 149
150 150 /*
151 151 * Using the specified dedup table, do a lookup for an entry with
152 152 * the checksum cs. If found, return the block's reference info
153 153 * in *dr. Otherwise, insert a new entry in the dedup table, using
154 154 * the reference information specified by *dr.
155 155 *
156 156 * return value: true - entry was found
157 157 * false - entry was not found
158 158 */
159 159 static boolean_t
160 160 ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
161 161 uint64_t prop, dataref_t *dr)
162 162 {
163 163 uint32_t hashcode;
164 164 dedup_entry_t **ddepp;
165 165
166 166 hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
167 167
168 168 for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
169 169 ddepp = &((*ddepp)->dde_next)) {
170 170 if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
171 171 (*ddepp)->dde_prop == prop) {
172 172 *dr = (*ddepp)->dde_ref;
173 173 return (B_TRUE);
174 174 }
175 175 }
176 176 ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
177 177 return (B_FALSE);
178 178 }
179 179
180 180 static int
181 181 cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
182 182 {
183 183 fletcher_4_incremental_native(buf, len, zc);
184 184 return (write(outfd, buf, len));
185 185 }
186 186
187 187 /*
188 188 * This function is started in a separate thread when the dedup option
189 189 * has been requested. The main send thread determines the list of
190 190 * snapshots to be included in the send stream and makes the ioctl calls
191 191 * for each one. But instead of having the ioctl send the output to the
192 192 * the output fd specified by the caller of zfs_send()), the
193 193 * ioctl is told to direct the output to a pipe, which is read by the
194 194 * alternate thread running THIS function. This function does the
195 195 * dedup'ing by:
196 196 * 1. building a dedup table (the DDT)
197 197 * 2. doing checksums on each data block and inserting a record in the DDT
198 198 * 3. looking for matching checksums, and
199 199 * 4. sending a DRR_WRITE_BYREF record instead of a write record whenever
200 200 * a duplicate block is found.
201 201 * The output of this function then goes to the output fd requested
202 202 * by the caller of zfs_send().
203 203 */
204 204 static void *
205 205 cksummer(void *arg)
206 206 {
207 207 dedup_arg_t *dda = arg;
208 208 char *buf = malloc(1<<20);
209 209 dmu_replay_record_t thedrr;
210 210 dmu_replay_record_t *drr = &thedrr;
211 211 struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
212 212 struct drr_end *drre = &thedrr.drr_u.drr_end;
213 213 struct drr_object *drro = &thedrr.drr_u.drr_object;
214 214 struct drr_write *drrw = &thedrr.drr_u.drr_write;
215 215 struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
216 216 FILE *ofp;
217 217 int outfd;
218 218 dmu_replay_record_t wbr_drr = {0};
219 219 struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
220 220 dedup_table_t ddt;
221 221 zio_cksum_t stream_cksum;
222 222 uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
223 223 uint64_t numbuckets;
224 224
225 225 ddt.max_ddt_size =
226 226 MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
227 227 SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
228 228
229 229 numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
230 230
231 231 /*
232 232 * numbuckets must be a power of 2. Increase number to
233 233 * a power of 2 if necessary.
234 234 */
235 235 if (!ISP2(numbuckets))
236 236 numbuckets = 1 << high_order_bit(numbuckets);
237 237
238 238 ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
239 239 ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
240 240 NULL, NULL, NULL, NULL, NULL, 0);
241 241 ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
242 242 ddt.numhashbits = high_order_bit(numbuckets) - 1;
243 243 ddt.ddt_full = B_FALSE;
244 244
245 245 /* Initialize the write-by-reference block. */
246 246 wbr_drr.drr_type = DRR_WRITE_BYREF;
247 247 wbr_drr.drr_payloadlen = 0;
248 248
249 249 outfd = dda->outputfd;
250 250 ofp = fdopen(dda->inputfd, "r");
251 251 while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
252 252
253 253 switch (drr->drr_type) {
254 254 case DRR_BEGIN:
255 255 {
256 256 int fflags;
257 257 ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
258 258
259 259 /* set the DEDUP feature flag for this stream */
260 260 fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
261 261 fflags |= (DMU_BACKUP_FEATURE_DEDUP |
262 262 DMU_BACKUP_FEATURE_DEDUPPROPS);
263 263 DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
264 264
265 265 if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
266 266 &stream_cksum, outfd) == -1)
267 267 goto out;
268 268 if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
269 269 DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
270 270 int sz = drr->drr_payloadlen;
271 271
272 272 if (sz > 1<<20) {
273 273 free(buf);
274 274 buf = malloc(sz);
275 275 }
276 276 (void) ssread(buf, sz, ofp);
277 277 if (ferror(stdin))
278 278 perror("fread");
279 279 if (cksum_and_write(buf, sz, &stream_cksum,
280 280 outfd) == -1)
281 281 goto out;
282 282 }
283 283 break;
284 284 }
285 285
286 286 case DRR_END:
287 287 {
288 288 /* use the recalculated checksum */
289 289 ZIO_SET_CHECKSUM(&drre->drr_checksum,
290 290 stream_cksum.zc_word[0], stream_cksum.zc_word[1],
291 291 stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
292 292 if ((write(outfd, drr,
293 293 sizeof (dmu_replay_record_t))) == -1)
294 294 goto out;
295 295 break;
296 296 }
297 297
298 298 case DRR_OBJECT:
299 299 {
300 300 if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
301 301 &stream_cksum, outfd) == -1)
302 302 goto out;
303 303 if (drro->drr_bonuslen > 0) {
304 304 (void) ssread(buf,
305 305 P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
306 306 ofp);
307 307 if (cksum_and_write(buf,
308 308 P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
309 309 &stream_cksum, outfd) == -1)
310 310 goto out;
311 311 }
312 312 break;
313 313 }
314 314
315 315 case DRR_SPILL:
316 316 {
317 317 if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
318 318 &stream_cksum, outfd) == -1)
319 319 goto out;
320 320 (void) ssread(buf, drrs->drr_length, ofp);
321 321 if (cksum_and_write(buf, drrs->drr_length,
322 322 &stream_cksum, outfd) == -1)
323 323 goto out;
324 324 break;
325 325 }
326 326
327 327 case DRR_FREEOBJECTS:
328 328 {
329 329 if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
330 330 &stream_cksum, outfd) == -1)
331 331 goto out;
332 332 break;
333 333 }
334 334
335 335 case DRR_WRITE:
336 336 {
337 337 dataref_t dataref;
338 338
339 339 (void) ssread(buf, drrw->drr_length, ofp);
340 340
341 341 /*
342 342 * Use the existing checksum if it's dedup-capable,
343 343 * else calculate a SHA256 checksum for it.
344 344 */
345 345
346 346 if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
347 347 zero_cksum) ||
348 348 !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
349 349 SHA256_CTX ctx;
350 350 zio_cksum_t tmpsha256;
351 351
352 352 SHA256Init(&ctx);
353 353 SHA256Update(&ctx, buf, drrw->drr_length);
354 354 SHA256Final(&tmpsha256, &ctx);
355 355 drrw->drr_key.ddk_cksum.zc_word[0] =
356 356 BE_64(tmpsha256.zc_word[0]);
357 357 drrw->drr_key.ddk_cksum.zc_word[1] =
358 358 BE_64(tmpsha256.zc_word[1]);
359 359 drrw->drr_key.ddk_cksum.zc_word[2] =
360 360 BE_64(tmpsha256.zc_word[2]);
361 361 drrw->drr_key.ddk_cksum.zc_word[3] =
362 362 BE_64(tmpsha256.zc_word[3]);
363 363 drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
364 364 drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
365 365 }
366 366
367 367 dataref.ref_guid = drrw->drr_toguid;
368 368 dataref.ref_object = drrw->drr_object;
369 369 dataref.ref_offset = drrw->drr_offset;
370 370
371 371 if (ddt_update(dda->dedup_hdl, &ddt,
372 372 &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
373 373 &dataref)) {
374 374 /* block already present in stream */
375 375 wbr_drrr->drr_object = drrw->drr_object;
376 376 wbr_drrr->drr_offset = drrw->drr_offset;
377 377 wbr_drrr->drr_length = drrw->drr_length;
378 378 wbr_drrr->drr_toguid = drrw->drr_toguid;
379 379 wbr_drrr->drr_refguid = dataref.ref_guid;
380 380 wbr_drrr->drr_refobject =
381 381 dataref.ref_object;
382 382 wbr_drrr->drr_refoffset =
383 383 dataref.ref_offset;
384 384
385 385 wbr_drrr->drr_checksumtype =
386 386 drrw->drr_checksumtype;
387 387 wbr_drrr->drr_checksumflags =
388 388 drrw->drr_checksumtype;
389 389 wbr_drrr->drr_key.ddk_cksum =
390 390 drrw->drr_key.ddk_cksum;
391 391 wbr_drrr->drr_key.ddk_prop =
392 392 drrw->drr_key.ddk_prop;
393 393
394 394 if (cksum_and_write(&wbr_drr,
395 395 sizeof (dmu_replay_record_t), &stream_cksum,
396 396 outfd) == -1)
397 397 goto out;
398 398 } else {
399 399 /* block not previously seen */
400 400 if (cksum_and_write(drr,
401 401 sizeof (dmu_replay_record_t), &stream_cksum,
402 402 outfd) == -1)
403 403 goto out;
404 404 if (cksum_and_write(buf,
405 405 drrw->drr_length,
406 406 &stream_cksum, outfd) == -1)
407 407 goto out;
408 408 }
409 409 break;
410 410 }
411 411
412 412 case DRR_FREE:
413 413 {
414 414 if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
415 415 &stream_cksum, outfd) == -1)
416 416 goto out;
417 417 break;
418 418 }
419 419
420 420 default:
421 421 (void) printf("INVALID record type 0x%x\n",
422 422 drr->drr_type);
423 423 /* should never happen, so assert */
424 424 assert(B_FALSE);
425 425 }
426 426 }
427 427 out:
428 428 umem_cache_destroy(ddt.ddecache);
429 429 free(ddt.dedup_hash_array);
430 430 free(buf);
431 431 (void) fclose(ofp);
432 432
433 433 return (NULL);
434 434 }
435 435
436 436 /*
437 437 * Routines for dealing with the AVL tree of fs-nvlists
438 438 */
439 439 typedef struct fsavl_node {
440 440 avl_node_t fn_node;
441 441 nvlist_t *fn_nvfs;
442 442 char *fn_snapname;
443 443 uint64_t fn_guid;
444 444 } fsavl_node_t;
445 445
446 446 static int
447 447 fsavl_compare(const void *arg1, const void *arg2)
448 448 {
449 449 const fsavl_node_t *fn1 = arg1;
450 450 const fsavl_node_t *fn2 = arg2;
451 451
452 452 if (fn1->fn_guid > fn2->fn_guid)
453 453 return (+1);
454 454 else if (fn1->fn_guid < fn2->fn_guid)
455 455 return (-1);
456 456 else
457 457 return (0);
458 458 }
459 459
460 460 /*
461 461 * Given the GUID of a snapshot, find its containing filesystem and
462 462 * (optionally) name.
463 463 */
464 464 static nvlist_t *
465 465 fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
466 466 {
467 467 fsavl_node_t fn_find;
468 468 fsavl_node_t *fn;
469 469
470 470 fn_find.fn_guid = snapguid;
471 471
472 472 fn = avl_find(avl, &fn_find, NULL);
473 473 if (fn) {
474 474 if (snapname)
475 475 *snapname = fn->fn_snapname;
476 476 return (fn->fn_nvfs);
477 477 }
478 478 return (NULL);
479 479 }
480 480
481 481 static void
482 482 fsavl_destroy(avl_tree_t *avl)
483 483 {
484 484 fsavl_node_t *fn;
485 485 void *cookie;
486 486
487 487 if (avl == NULL)
488 488 return;
489 489
490 490 cookie = NULL;
491 491 while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
492 492 free(fn);
493 493 avl_destroy(avl);
494 494 free(avl);
495 495 }
496 496
497 497 /*
498 498 * Given an nvlist, produce an avl tree of snapshots, ordered by guid
499 499 */
500 500 static avl_tree_t *
501 501 fsavl_create(nvlist_t *fss)
502 502 {
503 503 avl_tree_t *fsavl;
504 504 nvpair_t *fselem = NULL;
505 505
506 506 if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
507 507 return (NULL);
508 508
509 509 avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
510 510 offsetof(fsavl_node_t, fn_node));
511 511
512 512 while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
513 513 nvlist_t *nvfs, *snaps;
514 514 nvpair_t *snapelem = NULL;
515 515
516 516 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
517 517 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
518 518
519 519 while ((snapelem =
520 520 nvlist_next_nvpair(snaps, snapelem)) != NULL) {
521 521 fsavl_node_t *fn;
522 522 uint64_t guid;
523 523
524 524 VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
525 525 if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
526 526 fsavl_destroy(fsavl);
527 527 return (NULL);
528 528 }
529 529 fn->fn_nvfs = nvfs;
530 530 fn->fn_snapname = nvpair_name(snapelem);
531 531 fn->fn_guid = guid;
532 532
533 533 /*
534 534 * Note: if there are multiple snaps with the
535 535 * same GUID, we ignore all but one.
536 536 */
537 537 if (avl_find(fsavl, fn, NULL) == NULL)
538 538 avl_add(fsavl, fn);
539 539 else
540 540 free(fn);
541 541 }
542 542 }
543 543
544 544 return (fsavl);
545 545 }
546 546
547 547 /*
548 548 * Routines for dealing with the giant nvlist of fs-nvlists, etc.
549 549 */
550 550 typedef struct send_data {
551 551 uint64_t parent_fromsnap_guid;
552 552 nvlist_t *parent_snaps;
553 553 nvlist_t *fss;
554 554 nvlist_t *snapprops;
555 555 const char *fromsnap;
556 556 const char *tosnap;
557 557 boolean_t recursive;
558 558
559 559 /*
560 560 * The header nvlist is of the following format:
561 561 * {
562 562 * "tosnap" -> string
563 563 * "fromsnap" -> string (if incremental)
564 564 * "fss" -> {
565 565 * id -> {
566 566 *
567 567 * "name" -> string (full name; for debugging)
568 568 * "parentfromsnap" -> number (guid of fromsnap in parent)
569 569 *
570 570 * "props" -> { name -> value (only if set here) }
571 571 * "snaps" -> { name (lastname) -> number (guid) }
572 572 * "snapprops" -> { name (lastname) -> { name -> value } }
573 573 *
574 574 * "origin" -> number (guid) (if clone)
575 575 * "sent" -> boolean (not on-disk)
576 576 * }
577 577 * }
578 578 * }
579 579 *
580 580 */
581 581 } send_data_t;
582 582
583 583 static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
584 584
585 585 static int
586 586 send_iterate_snap(zfs_handle_t *zhp, void *arg)
587 587 {
588 588 send_data_t *sd = arg;
589 589 uint64_t guid = zhp->zfs_dmustats.dds_guid;
590 590 char *snapname;
591 591 nvlist_t *nv;
592 592
593 593 snapname = strrchr(zhp->zfs_name, '@')+1;
594 594
595 595 VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
596 596 /*
597 597 * NB: if there is no fromsnap here (it's a newly created fs in
598 598 * an incremental replication), we will substitute the tosnap.
599 599 */
600 600 if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
601 601 (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
602 602 strcmp(snapname, sd->tosnap) == 0)) {
603 603 sd->parent_fromsnap_guid = guid;
604 604 }
605 605
606 606 VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
607 607 send_iterate_prop(zhp, nv);
608 608 VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
609 609 nvlist_free(nv);
610 610
611 611 zfs_close(zhp);
612 612 return (0);
613 613 }
614 614
615 615 static void
616 616 send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
617 617 {
618 618 nvpair_t *elem = NULL;
619 619
620 620 while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
621 621 char *propname = nvpair_name(elem);
622 622 zfs_prop_t prop = zfs_name_to_prop(propname);
623 623 nvlist_t *propnv;
624 624
625 625 if (!zfs_prop_user(propname)) {
626 626 /*
627 627 * Realistically, this should never happen. However,
628 628 * we want the ability to add DSL properties without
629 629 * needing to make incompatible version changes. We
630 630 * need to ignore unknown properties to allow older
631 631 * software to still send datasets containing these
632 632 * properties, with the unknown properties elided.
633 633 */
634 634 if (prop == ZPROP_INVAL)
635 635 continue;
636 636
637 637 if (zfs_prop_readonly(prop))
638 638 continue;
639 639 }
640 640
641 641 verify(nvpair_value_nvlist(elem, &propnv) == 0);
642 642 if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
643 643 prop == ZFS_PROP_REFQUOTA ||
644 644 prop == ZFS_PROP_REFRESERVATION) {
645 645 char *source;
646 646 uint64_t value;
647 647 verify(nvlist_lookup_uint64(propnv,
648 648 ZPROP_VALUE, &value) == 0);
649 649 if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
650 650 continue;
651 651 /*
652 652 * May have no source before SPA_VERSION_RECVD_PROPS,
653 653 * but is still modifiable.
654 654 */
655 655 if (nvlist_lookup_string(propnv,
656 656 ZPROP_SOURCE, &source) == 0) {
657 657 if ((strcmp(source, zhp->zfs_name) != 0) &&
658 658 (strcmp(source,
659 659 ZPROP_SOURCE_VAL_RECVD) != 0))
660 660 continue;
661 661 }
662 662 } else {
663 663 char *source;
664 664 if (nvlist_lookup_string(propnv,
665 665 ZPROP_SOURCE, &source) != 0)
666 666 continue;
667 667 if ((strcmp(source, zhp->zfs_name) != 0) &&
668 668 (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
669 669 continue;
670 670 }
671 671
672 672 if (zfs_prop_user(propname) ||
673 673 zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
674 674 char *value;
675 675 verify(nvlist_lookup_string(propnv,
676 676 ZPROP_VALUE, &value) == 0);
677 677 VERIFY(0 == nvlist_add_string(nv, propname, value));
678 678 } else {
679 679 uint64_t value;
680 680 verify(nvlist_lookup_uint64(propnv,
681 681 ZPROP_VALUE, &value) == 0);
682 682 VERIFY(0 == nvlist_add_uint64(nv, propname, value));
683 683 }
684 684 }
685 685 }
686 686
687 687 /*
688 688 * recursively generate nvlists describing datasets. See comment
689 689 * for the data structure send_data_t above for description of contents
690 690 * of the nvlist.
691 691 */
692 692 static int
693 693 send_iterate_fs(zfs_handle_t *zhp, void *arg)
694 694 {
695 695 send_data_t *sd = arg;
696 696 nvlist_t *nvfs, *nv;
697 697 int rv = 0;
698 698 uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
699 699 uint64_t guid = zhp->zfs_dmustats.dds_guid;
700 700 char guidstring[64];
701 701
702 702 VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
703 703 VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
704 704 VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
705 705 sd->parent_fromsnap_guid));
706 706
707 707 if (zhp->zfs_dmustats.dds_origin[0]) {
708 708 zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
709 709 zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
710 710 if (origin == NULL)
711 711 return (-1);
712 712 VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
713 713 origin->zfs_dmustats.dds_guid));
714 714 }
715 715
716 716 /* iterate over props */
717 717 VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
718 718 send_iterate_prop(zhp, nv);
719 719 VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
720 720 nvlist_free(nv);
721 721
722 722 /* iterate over snaps, and set sd->parent_fromsnap_guid */
723 723 sd->parent_fromsnap_guid = 0;
724 724 VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
725 725 VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
726 726 (void) zfs_iter_snapshots_sorted(zhp, send_iterate_snap, sd);
727 727 VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
728 728 VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
729 729 nvlist_free(sd->parent_snaps);
730 730 nvlist_free(sd->snapprops);
731 731
732 732 /* add this fs to nvlist */
733 733 (void) snprintf(guidstring, sizeof (guidstring),
734 734 "0x%llx", (longlong_t)guid);
735 735 VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
736 736 nvlist_free(nvfs);
737 737
738 738 /* iterate over children */
739 739 if (sd->recursive)
740 740 rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
741 741
742 742 sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
743 743
744 744 zfs_close(zhp);
745 745 return (rv);
746 746 }
747 747
748 748 static int
749 749 gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
750 750 const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp)
751 751 {
752 752 zfs_handle_t *zhp;
753 753 send_data_t sd = { 0 };
754 754 int error;
755 755
756 756 zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
757 757 if (zhp == NULL)
758 758 return (EZFS_BADTYPE);
759 759
760 760 VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
761 761 sd.fromsnap = fromsnap;
762 762 sd.tosnap = tosnap;
763 763 sd.recursive = recursive;
764 764
765 765 if ((error = send_iterate_fs(zhp, &sd)) != 0) {
766 766 nvlist_free(sd.fss);
767 767 if (avlp != NULL)
768 768 *avlp = NULL;
769 769 *nvlp = NULL;
770 770 return (error);
771 771 }
772 772
773 773 if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
774 774 nvlist_free(sd.fss);
775 775 *nvlp = NULL;
776 776 return (EZFS_NOMEM);
777 777 }
778 778
779 779 *nvlp = sd.fss;
780 780 return (0);
781 781 }
782 782
783 783 /*
784 784 * Routines specific to "zfs send"
785 785 */
786 786 typedef struct send_dump_data {
787 787 /* these are all just the short snapname (the part after the @) */
788 788 const char *fromsnap;
789 789 const char *tosnap;
790 790 char prevsnap[ZFS_MAXNAMELEN];
791 791 uint64_t prevsnap_obj;
792 792 boolean_t seenfrom, seento, replicate, doall, fromorigin;
793 793 boolean_t verbose, dryrun, parsable, progress;
794 794 int outfd;
795 795 boolean_t err;
796 796 nvlist_t *fss;
797 797 nvlist_t *snapholds;
798 798 avl_tree_t *fsavl;
799 799 snapfilter_cb_t *filter_cb;
800 800 void *filter_cb_arg;
801 801 nvlist_t *debugnv;
802 802 char holdtag[ZFS_MAXNAMELEN];
803 803 int cleanup_fd;
804 804 uint64_t size;
805 805 } send_dump_data_t;
806 806
807 807 static int
808 808 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
809 809 boolean_t fromorigin, uint64_t *sizep)
810 810 {
811 811 zfs_cmd_t zc = { 0 };
812 812 libzfs_handle_t *hdl = zhp->zfs_hdl;
813 813
814 814 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
815 815 assert(fromsnap_obj == 0 || !fromorigin);
816 816
817 817 (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
818 818 zc.zc_obj = fromorigin;
819 819 zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
820 820 zc.zc_fromobj = fromsnap_obj;
821 821 zc.zc_guid = 1; /* estimate flag */
822 822
823 823 if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
824 824 char errbuf[1024];
825 825 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
826 826 "warning: cannot estimate space for '%s'"), zhp->zfs_name);
827 827
828 828 switch (errno) {
829 829 case EXDEV:
830 830 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
831 831 "not an earlier snapshot from the same fs"));
832 832 return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
833 833
834 834 case ENOENT:
835 835 if (zfs_dataset_exists(hdl, zc.zc_name,
836 836 ZFS_TYPE_SNAPSHOT)) {
837 837 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
838 838 "incremental source (@%s) does not exist"),
839 839 zc.zc_value);
840 840 }
841 841 return (zfs_error(hdl, EZFS_NOENT, errbuf));
842 842
843 843 case EDQUOT:
844 844 case EFBIG:
845 845 case EIO:
846 846 case ENOLINK:
847 847 case ENOSPC:
848 848 case ENOSTR:
849 849 case ENXIO:
850 850 case EPIPE:
851 851 case ERANGE:
852 852 case EFAULT:
853 853 case EROFS:
854 854 zfs_error_aux(hdl, strerror(errno));
855 855 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
856 856
857 857 default:
858 858 return (zfs_standard_error(hdl, errno, errbuf));
859 859 }
860 860 }
861 861
862 862 *sizep = zc.zc_objset_type;
863 863
864 864 return (0);
865 865 }
866 866
867 867 /*
868 868 * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
869 869 * NULL) to the file descriptor specified by outfd.
870 870 */
871 871 static int
872 872 dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
873 873 boolean_t fromorigin, int outfd, nvlist_t *debugnv)
874 874 {
875 875 zfs_cmd_t zc = { 0 };
876 876 libzfs_handle_t *hdl = zhp->zfs_hdl;
877 877 nvlist_t *thisdbg;
878 878
879 879 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
880 880 assert(fromsnap_obj == 0 || !fromorigin);
881 881
882 882 (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
883 883 zc.zc_cookie = outfd;
884 884 zc.zc_obj = fromorigin;
885 885 zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
886 886 zc.zc_fromobj = fromsnap_obj;
887 887
888 888 VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
889 889 if (fromsnap && fromsnap[0] != '\0') {
890 890 VERIFY(0 == nvlist_add_string(thisdbg,
891 891 "fromsnap", fromsnap));
892 892 }
893 893
894 894 if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
895 895 char errbuf[1024];
896 896 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
897 897 "warning: cannot send '%s'"), zhp->zfs_name);
898 898
899 899 VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
900 900 if (debugnv) {
901 901 VERIFY(0 == nvlist_add_nvlist(debugnv,
902 902 zhp->zfs_name, thisdbg));
903 903 }
904 904 nvlist_free(thisdbg);
905 905
906 906 switch (errno) {
907 907 case EXDEV:
908 908 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
909 909 "not an earlier snapshot from the same fs"));
910 910 return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
911 911
912 912 case ENOENT:
913 913 if (zfs_dataset_exists(hdl, zc.zc_name,
914 914 ZFS_TYPE_SNAPSHOT)) {
915 915 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
916 916 "incremental source (@%s) does not exist"),
917 917 zc.zc_value);
918 918 }
919 919 return (zfs_error(hdl, EZFS_NOENT, errbuf));
920 920
921 921 case EDQUOT:
922 922 case EFBIG:
923 923 case EIO:
924 924 case ENOLINK:
925 925 case ENOSPC:
926 926 case ENOSTR:
927 927 case ENXIO:
928 928 case EPIPE:
929 929 case ERANGE:
930 930 case EFAULT:
931 931 case EROFS:
932 932 zfs_error_aux(hdl, strerror(errno));
933 933 return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
934 934
935 935 default:
936 936 return (zfs_standard_error(hdl, errno, errbuf));
937 937 }
938 938 }
939 939
940 940 if (debugnv)
941 941 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
942 942 nvlist_free(thisdbg);
943 943
944 944 return (0);
945 945 }
946 946
947 947 static void
948 948 gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
949 949 {
950 950 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
951 951
952 952 /*
953 953 * zfs_send() only sets snapholds for sends that need them,
954 954 * e.g. replication and doall.
955 955 */
956 956 if (sdd->snapholds == NULL)
957 957 return;
958 958
959 959 fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
960 960 }
961 961
962 962 static void *
963 963 send_progress_thread(void *arg)
964 964 {
965 965 progress_arg_t *pa = arg;
966 966
967 967 zfs_cmd_t zc = { 0 };
968 968 zfs_handle_t *zhp = pa->pa_zhp;
969 969 libzfs_handle_t *hdl = zhp->zfs_hdl;
970 970 unsigned long long bytes;
971 971 char buf[16];
972 972
973 973 time_t t;
974 974 struct tm *tm;
975 975
976 976 assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
977 977 (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
978 978
979 979 if (!pa->pa_parsable)
980 980 (void) fprintf(stderr, "TIME SENT SNAPSHOT\n");
981 981
982 982 /*
983 983 * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
984 984 */
985 985 for (;;) {
986 986 (void) sleep(1);
987 987
988 988 zc.zc_cookie = pa->pa_fd;
989 989 if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
990 990 return ((void *)-1);
991 991
992 992 (void) time(&t);
993 993 tm = localtime(&t);
994 994 bytes = zc.zc_cookie;
995 995
996 996 if (pa->pa_parsable) {
997 997 (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
998 998 tm->tm_hour, tm->tm_min, tm->tm_sec,
999 999 bytes, zhp->zfs_name);
1000 1000 } else {
1001 1001 zfs_nicenum(bytes, buf, sizeof (buf));
1002 1002 (void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n",
1003 1003 tm->tm_hour, tm->tm_min, tm->tm_sec,
1004 1004 buf, zhp->zfs_name);
1005 1005 }
1006 1006 }
1007 1007 }
1008 1008
1009 1009 static int
1010 1010 dump_snapshot(zfs_handle_t *zhp, void *arg)
1011 1011 {
1012 1012 send_dump_data_t *sdd = arg;
1013 1013 progress_arg_t pa = { 0 };
1014 1014 pthread_t tid;
1015 1015 char *thissnap;
1016 1016 int err;
1017 1017 boolean_t isfromsnap, istosnap, fromorigin;
1018 1018 boolean_t exclude = B_FALSE;
1019 1019
1020 1020 err = 0;
1021 1021 thissnap = strchr(zhp->zfs_name, '@') + 1;
1022 1022 isfromsnap = (sdd->fromsnap != NULL &&
1023 1023 strcmp(sdd->fromsnap, thissnap) == 0);
1024 1024
1025 1025 if (!sdd->seenfrom && isfromsnap) {
1026 1026 gather_holds(zhp, sdd);
1027 1027 sdd->seenfrom = B_TRUE;
1028 1028 (void) strcpy(sdd->prevsnap, thissnap);
1029 1029 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1030 1030 zfs_close(zhp);
1031 1031 return (0);
1032 1032 }
1033 1033
1034 1034 if (sdd->seento || !sdd->seenfrom) {
1035 1035 zfs_close(zhp);
1036 1036 return (0);
1037 1037 }
1038 1038
1039 1039 istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1040 1040 if (istosnap)
1041 1041 sdd->seento = B_TRUE;
1042 1042
1043 1043 if (!sdd->doall && !isfromsnap && !istosnap) {
1044 1044 if (sdd->replicate) {
1045 1045 char *snapname;
1046 1046 nvlist_t *snapprops;
1047 1047 /*
1048 1048 * Filter out all intermediate snapshots except origin
1049 1049 * snapshots needed to replicate clones.
1050 1050 */
1051 1051 nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1052 1052 zhp->zfs_dmustats.dds_guid, &snapname);
1053 1053
1054 1054 VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1055 1055 "snapprops", &snapprops));
1056 1056 VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1057 1057 thissnap, &snapprops));
1058 1058 exclude = !nvlist_exists(snapprops, "is_clone_origin");
1059 1059 } else {
1060 1060 exclude = B_TRUE;
1061 1061 }
1062 1062 }
1063 1063
1064 1064 /*
1065 1065 * If a filter function exists, call it to determine whether
1066 1066 * this snapshot will be sent.
1067 1067 */
1068 1068 if (exclude || (sdd->filter_cb != NULL &&
1069 1069 sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1070 1070 /*
1071 1071 * This snapshot is filtered out. Don't send it, and don't
1072 1072 * set prevsnap_obj, so it will be as if this snapshot didn't
1073 1073 * exist, and the next accepted snapshot will be sent as
1074 1074 * an incremental from the last accepted one, or as the
1075 1075 * first (and full) snapshot in the case of a replication,
1076 1076 * non-incremental send.
1077 1077 */
1078 1078 zfs_close(zhp);
1079 1079 return (0);
1080 1080 }
1081 1081
1082 1082 gather_holds(zhp, sdd);
1083 1083 fromorigin = sdd->prevsnap[0] == '\0' &&
1084 1084 (sdd->fromorigin || sdd->replicate);
1085 1085
1086 1086 if (sdd->verbose) {
1087 1087 uint64_t size;
1088 1088 err = estimate_ioctl(zhp, sdd->prevsnap_obj,
1089 1089 fromorigin, &size);
1090 1090
1091 1091 if (sdd->parsable) {
1092 1092 if (sdd->prevsnap[0] != '\0') {
1093 1093 (void) fprintf(stderr, "incremental\t%s\t%s",
1094 1094 sdd->prevsnap, zhp->zfs_name);
1095 1095 } else {
1096 1096 (void) fprintf(stderr, "full\t%s",
1097 1097 zhp->zfs_name);
1098 1098 }
1099 1099 } else {
1100 1100 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1101 1101 "send from @%s to %s"),
1102 1102 sdd->prevsnap, zhp->zfs_name);
1103 1103 }
1104 1104 if (err == 0) {
1105 1105 if (sdd->parsable) {
1106 1106 (void) fprintf(stderr, "\t%llu\n",
1107 1107 (longlong_t)size);
1108 1108 } else {
1109 1109 char buf[16];
1110 1110 zfs_nicenum(size, buf, sizeof (buf));
1111 1111 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1112 1112 " estimated size is %s\n"), buf);
1113 1113 }
1114 1114 sdd->size += size;
1115 1115 } else {
1116 1116 (void) fprintf(stderr, "\n");
1117 1117 }
1118 1118 }
1119 1119
1120 1120 if (!sdd->dryrun) {
1121 1121 /*
1122 1122 * If progress reporting is requested, spawn a new thread to
1123 1123 * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1124 1124 */
1125 1125 if (sdd->progress) {
1126 1126 pa.pa_zhp = zhp;
1127 1127 pa.pa_fd = sdd->outfd;
1128 1128 pa.pa_parsable = sdd->parsable;
1129 1129
1130 1130 if (err = pthread_create(&tid, NULL,
1131 1131 send_progress_thread, &pa)) {
1132 1132 zfs_close(zhp);
1133 1133 return (err);
1134 1134 }
1135 1135 }
1136 1136
1137 1137 err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1138 1138 fromorigin, sdd->outfd, sdd->debugnv);
1139 1139
1140 1140 if (sdd->progress) {
1141 1141 (void) pthread_cancel(tid);
1142 1142 (void) pthread_join(tid, NULL);
1143 1143 }
1144 1144 }
1145 1145
1146 1146 (void) strcpy(sdd->prevsnap, thissnap);
1147 1147 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1148 1148 zfs_close(zhp);
1149 1149 return (err);
1150 1150 }
1151 1151
1152 1152 static int
1153 1153 dump_filesystem(zfs_handle_t *zhp, void *arg)
1154 1154 {
1155 1155 int rv = 0;
1156 1156 send_dump_data_t *sdd = arg;
1157 1157 boolean_t missingfrom = B_FALSE;
1158 1158 zfs_cmd_t zc = { 0 };
1159 1159
1160 1160 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1161 1161 zhp->zfs_name, sdd->tosnap);
1162 1162 if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1163 1163 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1164 1164 "WARNING: could not send %s@%s: does not exist\n"),
1165 1165 zhp->zfs_name, sdd->tosnap);
1166 1166 sdd->err = B_TRUE;
1167 1167 return (0);
1168 1168 }
1169 1169
1170 1170 if (sdd->replicate && sdd->fromsnap) {
1171 1171 /*
1172 1172 * If this fs does not have fromsnap, and we're doing
1173 1173 * recursive, we need to send a full stream from the
1174 1174 * beginning (or an incremental from the origin if this
1175 1175 * is a clone). If we're doing non-recursive, then let
1176 1176 * them get the error.
1177 1177 */
1178 1178 (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1179 1179 zhp->zfs_name, sdd->fromsnap);
1180 1180 if (ioctl(zhp->zfs_hdl->libzfs_fd,
1181 1181 ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1182 1182 missingfrom = B_TRUE;
1183 1183 }
1184 1184 }
1185 1185
1186 1186 sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1187 1187 sdd->prevsnap_obj = 0;
1188 1188 if (sdd->fromsnap == NULL || missingfrom)
1189 1189 sdd->seenfrom = B_TRUE;
1190 1190
1191 1191 rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
1192 1192 if (!sdd->seenfrom) {
1193 1193 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1194 1194 "WARNING: could not send %s@%s:\n"
1195 1195 "incremental source (%s@%s) does not exist\n"),
1196 1196 zhp->zfs_name, sdd->tosnap,
1197 1197 zhp->zfs_name, sdd->fromsnap);
1198 1198 sdd->err = B_TRUE;
1199 1199 } else if (!sdd->seento) {
1200 1200 if (sdd->fromsnap) {
1201 1201 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1202 1202 "WARNING: could not send %s@%s:\n"
1203 1203 "incremental source (%s@%s) "
1204 1204 "is not earlier than it\n"),
1205 1205 zhp->zfs_name, sdd->tosnap,
1206 1206 zhp->zfs_name, sdd->fromsnap);
1207 1207 } else {
1208 1208 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1209 1209 "WARNING: "
1210 1210 "could not send %s@%s: does not exist\n"),
1211 1211 zhp->zfs_name, sdd->tosnap);
1212 1212 }
1213 1213 sdd->err = B_TRUE;
1214 1214 }
1215 1215
1216 1216 return (rv);
1217 1217 }
1218 1218
1219 1219 static int
1220 1220 dump_filesystems(zfs_handle_t *rzhp, void *arg)
1221 1221 {
1222 1222 send_dump_data_t *sdd = arg;
1223 1223 nvpair_t *fspair;
1224 1224 boolean_t needagain, progress;
1225 1225
1226 1226 if (!sdd->replicate)
1227 1227 return (dump_filesystem(rzhp, sdd));
1228 1228
1229 1229 /* Mark the clone origin snapshots. */
1230 1230 for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1231 1231 fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1232 1232 nvlist_t *nvfs;
1233 1233 uint64_t origin_guid = 0;
1234 1234
1235 1235 VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1236 1236 (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1237 1237 if (origin_guid != 0) {
1238 1238 char *snapname;
1239 1239 nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1240 1240 origin_guid, &snapname);
1241 1241 if (origin_nv != NULL) {
1242 1242 nvlist_t *snapprops;
1243 1243 VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1244 1244 "snapprops", &snapprops));
1245 1245 VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1246 1246 snapname, &snapprops));
1247 1247 VERIFY(0 == nvlist_add_boolean(
1248 1248 snapprops, "is_clone_origin"));
1249 1249 }
1250 1250 }
1251 1251 }
1252 1252 again:
1253 1253 needagain = progress = B_FALSE;
1254 1254 for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1255 1255 fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1256 1256 nvlist_t *fslist, *parent_nv;
1257 1257 char *fsname;
1258 1258 zfs_handle_t *zhp;
1259 1259 int err;
1260 1260 uint64_t origin_guid = 0;
1261 1261 uint64_t parent_guid = 0;
1262 1262
1263 1263 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1264 1264 if (nvlist_lookup_boolean(fslist, "sent") == 0)
1265 1265 continue;
1266 1266
1267 1267 VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1268 1268 (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1269 1269 (void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1270 1270 &parent_guid);
1271 1271
1272 1272 if (parent_guid != 0) {
1273 1273 parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1274 1274 if (!nvlist_exists(parent_nv, "sent")) {
1275 1275 /* parent has not been sent; skip this one */
1276 1276 needagain = B_TRUE;
1277 1277 continue;
1278 1278 }
1279 1279 }
1280 1280
1281 1281 if (origin_guid != 0) {
1282 1282 nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1283 1283 origin_guid, NULL);
1284 1284 if (origin_nv != NULL &&
1285 1285 !nvlist_exists(origin_nv, "sent")) {
1286 1286 /*
1287 1287 * origin has not been sent yet;
1288 1288 * skip this clone.
1289 1289 */
1290 1290 needagain = B_TRUE;
1291 1291 continue;
1292 1292 }
1293 1293 }
1294 1294
1295 1295 zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1296 1296 if (zhp == NULL)
1297 1297 return (-1);
1298 1298 err = dump_filesystem(zhp, sdd);
1299 1299 VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1300 1300 progress = B_TRUE;
1301 1301 zfs_close(zhp);
1302 1302 if (err)
1303 1303 return (err);
1304 1304 }
1305 1305 if (needagain) {
1306 1306 assert(progress);
1307 1307 goto again;
1308 1308 }
1309 1309
1310 1310 /* clean out the sent flags in case we reuse this fss */
1311 1311 for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1312 1312 fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1313 1313 nvlist_t *fslist;
1314 1314
1315 1315 VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1316 1316 (void) nvlist_remove_all(fslist, "sent");
1317 1317 }
1318 1318
1319 1319 return (0);
1320 1320 }
1321 1321
1322 1322 /*
1323 1323 * Generate a send stream for the dataset identified by the argument zhp.
1324 1324 *
1325 1325 * The content of the send stream is the snapshot identified by
1326 1326 * 'tosnap'. Incremental streams are requested in two ways:
1327 1327 * - from the snapshot identified by "fromsnap" (if non-null) or
1328 1328 * - from the origin of the dataset identified by zhp, which must
1329 1329 * be a clone. In this case, "fromsnap" is null and "fromorigin"
1330 1330 * is TRUE.
1331 1331 *
1332 1332 * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1333 1333 * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1334 1334 * if "replicate" is set. If "doall" is set, dump all the intermediate
1335 1335 * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1336 1336 * case too. If "props" is set, send properties.
1337 1337 */
1338 1338 int
1339 1339 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1340 1340 sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1341 1341 void *cb_arg, nvlist_t **debugnvp)
1342 1342 {
1343 1343 char errbuf[1024];
1344 1344 send_dump_data_t sdd = { 0 };
1345 1345 int err = 0;
1346 1346 nvlist_t *fss = NULL;
1347 1347 avl_tree_t *fsavl = NULL;
1348 1348 static uint64_t holdseq;
1349 1349 int spa_version;
1350 1350 pthread_t tid = 0;
1351 1351 int pipefd[2];
1352 1352 dedup_arg_t dda = { 0 };
1353 1353 int featureflags = 0;
1354 1354
1355 1355 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1356 1356 "cannot send '%s'"), zhp->zfs_name);
1357 1357
1358 1358 if (fromsnap && fromsnap[0] == '\0') {
1359 1359 zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1360 1360 "zero-length incremental source"));
1361 1361 return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1362 1362 }
1363 1363
1364 1364 if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1365 1365 uint64_t version;
1366 1366 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1367 1367 if (version >= ZPL_VERSION_SA) {
1368 1368 featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1369 1369 }
1370 1370 }
1371 1371
1372 1372 if (flags->dedup && !flags->dryrun) {
1373 1373 featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1374 1374 DMU_BACKUP_FEATURE_DEDUPPROPS);
1375 1375 if (err = pipe(pipefd)) {
1376 1376 zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1377 1377 return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1378 1378 errbuf));
1379 1379 }
1380 1380 dda.outputfd = outfd;
1381 1381 dda.inputfd = pipefd[1];
1382 1382 dda.dedup_hdl = zhp->zfs_hdl;
1383 1383 if (err = pthread_create(&tid, NULL, cksummer, &dda)) {
1384 1384 (void) close(pipefd[0]);
1385 1385 (void) close(pipefd[1]);
1386 1386 zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1387 1387 return (zfs_error(zhp->zfs_hdl,
1388 1388 EZFS_THREADCREATEFAILED, errbuf));
1389 1389 }
1390 1390 }
1391 1391
1392 1392 if (flags->replicate || flags->doall || flags->props) {
1393 1393 dmu_replay_record_t drr = { 0 };
1394 1394 char *packbuf = NULL;
1395 1395 size_t buflen = 0;
1396 1396 zio_cksum_t zc = { 0 };
1397 1397
1398 1398 if (flags->replicate || flags->props) {
1399 1399 nvlist_t *hdrnv;
1400 1400
1401 1401 VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1402 1402 if (fromsnap) {
1403 1403 VERIFY(0 == nvlist_add_string(hdrnv,
1404 1404 "fromsnap", fromsnap));
1405 1405 }
1406 1406 VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1407 1407 if (!flags->replicate) {
1408 1408 VERIFY(0 == nvlist_add_boolean(hdrnv,
1409 1409 "not_recursive"));
1410 1410 }
1411 1411
1412 1412 err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1413 1413 fromsnap, tosnap, flags->replicate, &fss, &fsavl);
1414 1414 if (err)
1415 1415 goto err_out;
1416 1416 VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1417 1417 err = nvlist_pack(hdrnv, &packbuf, &buflen,
1418 1418 NV_ENCODE_XDR, 0);
1419 1419 if (debugnvp)
1420 1420 *debugnvp = hdrnv;
1421 1421 else
1422 1422 nvlist_free(hdrnv);
1423 1423 if (err)
1424 1424 goto stderr_out;
1425 1425 }
1426 1426
1427 1427 if (!flags->dryrun) {
1428 1428 /* write first begin record */
1429 1429 drr.drr_type = DRR_BEGIN;
1430 1430 drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1431 1431 DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1432 1432 drr_versioninfo, DMU_COMPOUNDSTREAM);
1433 1433 DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1434 1434 drr_versioninfo, featureflags);
1435 1435 (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1436 1436 sizeof (drr.drr_u.drr_begin.drr_toname),
1437 1437 "%s@%s", zhp->zfs_name, tosnap);
1438 1438 drr.drr_payloadlen = buflen;
1439 1439 err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1440 1440
1441 1441 /* write header nvlist */
1442 1442 if (err != -1 && packbuf != NULL) {
1443 1443 err = cksum_and_write(packbuf, buflen, &zc,
1444 1444 outfd);
1445 1445 }
1446 1446 free(packbuf);
1447 1447 if (err == -1) {
1448 1448 err = errno;
1449 1449 goto stderr_out;
1450 1450 }
1451 1451
1452 1452 /* write end record */
1453 1453 bzero(&drr, sizeof (drr));
1454 1454 drr.drr_type = DRR_END;
1455 1455 drr.drr_u.drr_end.drr_checksum = zc;
1456 1456 err = write(outfd, &drr, sizeof (drr));
1457 1457 if (err == -1) {
1458 1458 err = errno;
1459 1459 goto stderr_out;
1460 1460 }
1461 1461
1462 1462 err = 0;
1463 1463 }
1464 1464 }
1465 1465
1466 1466 /* dump each stream */
1467 1467 sdd.fromsnap = fromsnap;
1468 1468 sdd.tosnap = tosnap;
1469 1469 if (tid != 0)
1470 1470 sdd.outfd = pipefd[0];
1471 1471 else
1472 1472 sdd.outfd = outfd;
1473 1473 sdd.replicate = flags->replicate;
1474 1474 sdd.doall = flags->doall;
1475 1475 sdd.fromorigin = flags->fromorigin;
1476 1476 sdd.fss = fss;
1477 1477 sdd.fsavl = fsavl;
1478 1478 sdd.verbose = flags->verbose;
1479 1479 sdd.parsable = flags->parsable;
1480 1480 sdd.progress = flags->progress;
1481 1481 sdd.dryrun = flags->dryrun;
1482 1482 sdd.filter_cb = filter_func;
1483 1483 sdd.filter_cb_arg = cb_arg;
1484 1484 if (debugnvp)
1485 1485 sdd.debugnv = *debugnvp;
1486 1486
1487 1487 /*
1488 1488 * Some flags require that we place user holds on the datasets that are
1489 1489 * being sent so they don't get destroyed during the send. We can skip
1490 1490 * this step if the pool is imported read-only since the datasets cannot
1491 1491 * be destroyed.
1492 1492 */
1493 1493 if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1494 1494 ZPOOL_PROP_READONLY, NULL) &&
1495 1495 zfs_spa_version(zhp, &spa_version) == 0 &&
1496 1496 spa_version >= SPA_VERSION_USERREFS &&
1497 1497 (flags->doall || flags->replicate)) {
1498 1498 ++holdseq;
1499 1499 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1500 1500 ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1501 1501 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1502 1502 if (sdd.cleanup_fd < 0) {
1503 1503 err = errno;
1504 1504 goto stderr_out;
1505 1505 }
1506 1506 sdd.snapholds = fnvlist_alloc();
1507 1507 } else {
1508 1508 sdd.cleanup_fd = -1;
1509 1509 sdd.snapholds = NULL;
1510 1510 }
1511 1511 if (flags->verbose || sdd.snapholds != NULL) {
1512 1512 /*
1513 1513 * Do a verbose no-op dry run to get all the verbose output
1514 1514 * or to gather snapshot hold's before generating any data,
1515 1515 * then do a non-verbose real run to generate the streams.
1516 1516 */
1517 1517 sdd.dryrun = B_TRUE;
1518 1518 err = dump_filesystems(zhp, &sdd);
1519 1519
1520 1520 if (err != 0)
1521 1521 goto stderr_out;
1522 1522
1523 1523 if (flags->verbose) {
1524 1524 if (flags->parsable) {
1525 1525 (void) fprintf(stderr, "size\t%llu\n",
1526 1526 (longlong_t)sdd.size);
1527 1527 } else {
1528 1528 char buf[16];
1529 1529 zfs_nicenum(sdd.size, buf, sizeof (buf));
1530 1530 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1531 1531 "total estimated size is %s\n"), buf);
1532 1532 }
1533 1533 }
1534 1534
1535 1535 /* Ensure no snaps found is treated as an error. */
1536 1536 if (!sdd.seento) {
1537 1537 err = ENOENT;
1538 1538 goto err_out;
1539 1539 }
1540 1540
1541 1541 /* Skip the second run if dryrun was requested. */
1542 1542 if (flags->dryrun)
1543 1543 goto err_out;
1544 1544
1545 1545 if (sdd.snapholds != NULL) {
1546 1546 err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
1547 1547 if (err != 0)
1548 1548 goto stderr_out;
1549 1549
1550 1550 fnvlist_free(sdd.snapholds);
1551 1551 sdd.snapholds = NULL;
1552 1552 }
1553 1553
1554 1554 sdd.dryrun = B_FALSE;
1555 1555 sdd.verbose = B_FALSE;
1556 1556 }
1557 1557
1558 1558 err = dump_filesystems(zhp, &sdd);
↓ open down ↓ |
1558 lines elided |
↑ open up ↑ |
1559 1559 fsavl_destroy(fsavl);
1560 1560 nvlist_free(fss);
1561 1561
1562 1562 /* Ensure no snaps found is treated as an error. */
1563 1563 if (err == 0 && !sdd.seento)
1564 1564 err = ENOENT;
1565 1565
1566 1566 if (tid != 0) {
1567 1567 if (err != 0)
1568 1568 (void) pthread_cancel(tid);
1569 - (void) pthread_join(tid, NULL);
1570 1569 (void) close(pipefd[0]);
1570 + (void) pthread_join(tid, NULL);
1571 1571 }
1572 1572
1573 1573 if (sdd.cleanup_fd != -1) {
1574 1574 VERIFY(0 == close(sdd.cleanup_fd));
1575 1575 sdd.cleanup_fd = -1;
1576 1576 }
1577 1577
1578 1578 if (!flags->dryrun && (flags->replicate || flags->doall ||
1579 1579 flags->props)) {
1580 1580 /*
1581 1581 * write final end record. NB: want to do this even if
1582 1582 * there was some error, because it might not be totally
1583 1583 * failed.
1584 1584 */
1585 1585 dmu_replay_record_t drr = { 0 };
1586 1586 drr.drr_type = DRR_END;
1587 1587 if (write(outfd, &drr, sizeof (drr)) == -1) {
1588 1588 return (zfs_standard_error(zhp->zfs_hdl,
1589 1589 errno, errbuf));
1590 1590 }
1591 1591 }
1592 1592
1593 1593 return (err || sdd.err);
1594 1594
1595 1595 stderr_out:
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
1596 1596 err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1597 1597 err_out:
1598 1598 fsavl_destroy(fsavl);
1599 1599 nvlist_free(fss);
1600 1600 fnvlist_free(sdd.snapholds);
1601 1601
1602 1602 if (sdd.cleanup_fd != -1)
1603 1603 VERIFY(0 == close(sdd.cleanup_fd));
1604 1604 if (tid != 0) {
1605 1605 (void) pthread_cancel(tid);
1606 - (void) pthread_join(tid, NULL);
1607 1606 (void) close(pipefd[0]);
1607 + (void) pthread_join(tid, NULL);
1608 1608 }
1609 1609 return (err);
1610 1610 }
1611 1611
1612 1612 /*
1613 1613 * Routines specific to "zfs recv"
1614 1614 */
1615 1615
1616 1616 static int
1617 1617 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
1618 1618 boolean_t byteswap, zio_cksum_t *zc)
1619 1619 {
1620 1620 char *cp = buf;
1621 1621 int rv;
1622 1622 int len = ilen;
1623 1623
1624 1624 do {
1625 1625 rv = read(fd, cp, len);
1626 1626 cp += rv;
1627 1627 len -= rv;
1628 1628 } while (rv > 0);
1629 1629
1630 1630 if (rv < 0 || len != 0) {
1631 1631 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1632 1632 "failed to read from stream"));
1633 1633 return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
1634 1634 "cannot receive")));
1635 1635 }
1636 1636
1637 1637 if (zc) {
1638 1638 if (byteswap)
1639 1639 fletcher_4_incremental_byteswap(buf, ilen, zc);
1640 1640 else
1641 1641 fletcher_4_incremental_native(buf, ilen, zc);
1642 1642 }
1643 1643 return (0);
1644 1644 }
1645 1645
1646 1646 static int
1647 1647 recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
1648 1648 boolean_t byteswap, zio_cksum_t *zc)
1649 1649 {
1650 1650 char *buf;
1651 1651 int err;
1652 1652
1653 1653 buf = zfs_alloc(hdl, len);
1654 1654 if (buf == NULL)
1655 1655 return (ENOMEM);
1656 1656
1657 1657 err = recv_read(hdl, fd, buf, len, byteswap, zc);
1658 1658 if (err != 0) {
1659 1659 free(buf);
1660 1660 return (err);
1661 1661 }
1662 1662
1663 1663 err = nvlist_unpack(buf, len, nvp, 0);
1664 1664 free(buf);
1665 1665 if (err != 0) {
1666 1666 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
1667 1667 "stream (malformed nvlist)"));
1668 1668 return (EINVAL);
1669 1669 }
1670 1670 return (0);
1671 1671 }
1672 1672
1673 1673 static int
1674 1674 recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
1675 1675 int baselen, char *newname, recvflags_t *flags)
1676 1676 {
1677 1677 static int seq;
1678 1678 zfs_cmd_t zc = { 0 };
1679 1679 int err;
1680 1680 prop_changelist_t *clp;
1681 1681 zfs_handle_t *zhp;
1682 1682
1683 1683 zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1684 1684 if (zhp == NULL)
1685 1685 return (-1);
1686 1686 clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1687 1687 flags->force ? MS_FORCE : 0);
1688 1688 zfs_close(zhp);
1689 1689 if (clp == NULL)
1690 1690 return (-1);
1691 1691 err = changelist_prefix(clp);
1692 1692 if (err)
1693 1693 return (err);
1694 1694
1695 1695 zc.zc_objset_type = DMU_OST_ZFS;
1696 1696 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1697 1697
1698 1698 if (tryname) {
1699 1699 (void) strcpy(newname, tryname);
1700 1700
1701 1701 (void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
1702 1702
1703 1703 if (flags->verbose) {
1704 1704 (void) printf("attempting rename %s to %s\n",
1705 1705 zc.zc_name, zc.zc_value);
1706 1706 }
1707 1707 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1708 1708 if (err == 0)
1709 1709 changelist_rename(clp, name, tryname);
1710 1710 } else {
1711 1711 err = ENOENT;
1712 1712 }
1713 1713
1714 1714 if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
1715 1715 seq++;
1716 1716
1717 1717 (void) snprintf(newname, ZFS_MAXNAMELEN, "%.*srecv-%u-%u",
1718 1718 baselen, name, getpid(), seq);
1719 1719 (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
1720 1720
1721 1721 if (flags->verbose) {
1722 1722 (void) printf("failed - trying rename %s to %s\n",
1723 1723 zc.zc_name, zc.zc_value);
1724 1724 }
1725 1725 err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1726 1726 if (err == 0)
1727 1727 changelist_rename(clp, name, newname);
1728 1728 if (err && flags->verbose) {
1729 1729 (void) printf("failed (%u) - "
1730 1730 "will try again on next pass\n", errno);
1731 1731 }
1732 1732 err = EAGAIN;
1733 1733 } else if (flags->verbose) {
1734 1734 if (err == 0)
1735 1735 (void) printf("success\n");
1736 1736 else
1737 1737 (void) printf("failed (%u)\n", errno);
1738 1738 }
1739 1739
1740 1740 (void) changelist_postfix(clp);
1741 1741 changelist_free(clp);
1742 1742
1743 1743 return (err);
1744 1744 }
1745 1745
1746 1746 static int
1747 1747 recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
1748 1748 char *newname, recvflags_t *flags)
1749 1749 {
1750 1750 zfs_cmd_t zc = { 0 };
1751 1751 int err = 0;
1752 1752 prop_changelist_t *clp;
1753 1753 zfs_handle_t *zhp;
1754 1754 boolean_t defer = B_FALSE;
1755 1755 int spa_version;
1756 1756
1757 1757 zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1758 1758 if (zhp == NULL)
1759 1759 return (-1);
1760 1760 clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1761 1761 flags->force ? MS_FORCE : 0);
1762 1762 if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
1763 1763 zfs_spa_version(zhp, &spa_version) == 0 &&
1764 1764 spa_version >= SPA_VERSION_USERREFS)
1765 1765 defer = B_TRUE;
1766 1766 zfs_close(zhp);
1767 1767 if (clp == NULL)
1768 1768 return (-1);
1769 1769 err = changelist_prefix(clp);
1770 1770 if (err)
1771 1771 return (err);
1772 1772
1773 1773 zc.zc_objset_type = DMU_OST_ZFS;
1774 1774 zc.zc_defer_destroy = defer;
1775 1775 (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1776 1776
1777 1777 if (flags->verbose)
1778 1778 (void) printf("attempting destroy %s\n", zc.zc_name);
1779 1779 err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
1780 1780 if (err == 0) {
1781 1781 if (flags->verbose)
1782 1782 (void) printf("success\n");
1783 1783 changelist_remove(clp, zc.zc_name);
1784 1784 }
1785 1785
1786 1786 (void) changelist_postfix(clp);
1787 1787 changelist_free(clp);
1788 1788
1789 1789 /*
1790 1790 * Deferred destroy might destroy the snapshot or only mark it to be
1791 1791 * destroyed later, and it returns success in either case.
1792 1792 */
1793 1793 if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
1794 1794 ZFS_TYPE_SNAPSHOT))) {
1795 1795 err = recv_rename(hdl, name, NULL, baselen, newname, flags);
1796 1796 }
1797 1797
1798 1798 return (err);
1799 1799 }
1800 1800
1801 1801 typedef struct guid_to_name_data {
1802 1802 uint64_t guid;
1803 1803 char *name;
1804 1804 char *skip;
1805 1805 } guid_to_name_data_t;
1806 1806
1807 1807 static int
1808 1808 guid_to_name_cb(zfs_handle_t *zhp, void *arg)
1809 1809 {
1810 1810 guid_to_name_data_t *gtnd = arg;
1811 1811 int err;
1812 1812
1813 1813 if (gtnd->skip != NULL &&
1814 1814 strcmp(zhp->zfs_name, gtnd->skip) == 0) {
1815 1815 return (0);
1816 1816 }
1817 1817
1818 1818 if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
1819 1819 (void) strcpy(gtnd->name, zhp->zfs_name);
1820 1820 zfs_close(zhp);
1821 1821 return (EEXIST);
1822 1822 }
1823 1823
1824 1824 err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
1825 1825 zfs_close(zhp);
1826 1826 return (err);
1827 1827 }
1828 1828
1829 1829 /*
1830 1830 * Attempt to find the local dataset associated with this guid. In the case of
1831 1831 * multiple matches, we attempt to find the "best" match by searching
1832 1832 * progressively larger portions of the hierarchy. This allows one to send a
1833 1833 * tree of datasets individually and guarantee that we will find the source
1834 1834 * guid within that hierarchy, even if there are multiple matches elsewhere.
1835 1835 */
1836 1836 static int
1837 1837 guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
1838 1838 char *name)
1839 1839 {
1840 1840 /* exhaustive search all local snapshots */
1841 1841 char pname[ZFS_MAXNAMELEN];
1842 1842 guid_to_name_data_t gtnd;
1843 1843 int err = 0;
1844 1844 zfs_handle_t *zhp;
1845 1845 char *cp;
1846 1846
1847 1847 gtnd.guid = guid;
1848 1848 gtnd.name = name;
1849 1849 gtnd.skip = NULL;
1850 1850
1851 1851 (void) strlcpy(pname, parent, sizeof (pname));
1852 1852
1853 1853 /*
1854 1854 * Search progressively larger portions of the hierarchy. This will
1855 1855 * select the "most local" version of the origin snapshot in the case
1856 1856 * that there are multiple matching snapshots in the system.
1857 1857 */
1858 1858 while ((cp = strrchr(pname, '/')) != NULL) {
1859 1859
1860 1860 /* Chop off the last component and open the parent */
1861 1861 *cp = '\0';
1862 1862 zhp = make_dataset_handle(hdl, pname);
1863 1863
1864 1864 if (zhp == NULL)
1865 1865 continue;
1866 1866
1867 1867 err = zfs_iter_children(zhp, guid_to_name_cb, >nd);
1868 1868 zfs_close(zhp);
1869 1869 if (err == EEXIST)
1870 1870 return (0);
1871 1871
1872 1872 /*
1873 1873 * Remember the dataset that we already searched, so we
1874 1874 * skip it next time through.
1875 1875 */
1876 1876 gtnd.skip = pname;
1877 1877 }
1878 1878
1879 1879 return (ENOENT);
1880 1880 }
1881 1881
1882 1882 /*
1883 1883 * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
1884 1884 * guid1 is after guid2.
1885 1885 */
1886 1886 static int
1887 1887 created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
1888 1888 uint64_t guid1, uint64_t guid2)
1889 1889 {
1890 1890 nvlist_t *nvfs;
1891 1891 char *fsname, *snapname;
1892 1892 char buf[ZFS_MAXNAMELEN];
1893 1893 int rv;
1894 1894 zfs_handle_t *guid1hdl, *guid2hdl;
1895 1895 uint64_t create1, create2;
1896 1896
1897 1897 if (guid2 == 0)
1898 1898 return (0);
1899 1899 if (guid1 == 0)
1900 1900 return (1);
1901 1901
1902 1902 nvfs = fsavl_find(avl, guid1, &snapname);
1903 1903 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1904 1904 (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1905 1905 guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1906 1906 if (guid1hdl == NULL)
1907 1907 return (-1);
1908 1908
1909 1909 nvfs = fsavl_find(avl, guid2, &snapname);
1910 1910 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1911 1911 (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1912 1912 guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1913 1913 if (guid2hdl == NULL) {
1914 1914 zfs_close(guid1hdl);
1915 1915 return (-1);
1916 1916 }
1917 1917
1918 1918 create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
1919 1919 create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
1920 1920
1921 1921 if (create1 < create2)
1922 1922 rv = -1;
1923 1923 else if (create1 > create2)
1924 1924 rv = +1;
1925 1925 else
1926 1926 rv = 0;
1927 1927
1928 1928 zfs_close(guid1hdl);
1929 1929 zfs_close(guid2hdl);
1930 1930
1931 1931 return (rv);
1932 1932 }
1933 1933
1934 1934 static int
1935 1935 recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
1936 1936 recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
1937 1937 nvlist_t *renamed)
1938 1938 {
1939 1939 nvlist_t *local_nv, *deleted = NULL;
1940 1940 avl_tree_t *local_avl;
1941 1941 nvpair_t *fselem, *nextfselem;
1942 1942 char *fromsnap;
1943 1943 char newname[ZFS_MAXNAMELEN], guidname[32];
1944 1944 int error;
1945 1945 boolean_t needagain, progress, recursive;
1946 1946 char *s1, *s2;
1947 1947
1948 1948 VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
1949 1949
1950 1950 recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
1951 1951 ENOENT);
1952 1952
1953 1953 if (flags->dryrun)
1954 1954 return (0);
1955 1955
1956 1956 again:
1957 1957 needagain = progress = B_FALSE;
1958 1958
1959 1959 if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
1960 1960 recursive, &local_nv, &local_avl)) != 0)
1961 1961 return (error);
1962 1962
1963 1963 deleted = fnvlist_alloc();
1964 1964
1965 1965 /*
1966 1966 * Process deletes and renames
1967 1967 */
1968 1968 for (fselem = nvlist_next_nvpair(local_nv, NULL);
1969 1969 fselem; fselem = nextfselem) {
1970 1970 nvlist_t *nvfs, *snaps;
1971 1971 nvlist_t *stream_nvfs = NULL;
1972 1972 nvpair_t *snapelem, *nextsnapelem;
1973 1973 uint64_t fromguid = 0;
1974 1974 uint64_t originguid = 0;
1975 1975 uint64_t stream_originguid = 0;
1976 1976 uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
1977 1977 char *fsname, *stream_fsname;
1978 1978
1979 1979 nextfselem = nvlist_next_nvpair(local_nv, fselem);
1980 1980
1981 1981 VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
1982 1982 VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
1983 1983 VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1984 1984 VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
1985 1985 &parent_fromsnap_guid));
1986 1986 (void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
1987 1987
1988 1988 /*
1989 1989 * First find the stream's fs, so we can check for
1990 1990 * a different origin (due to "zfs promote")
1991 1991 */
1992 1992 for (snapelem = nvlist_next_nvpair(snaps, NULL);
1993 1993 snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
1994 1994 uint64_t thisguid;
1995 1995
1996 1996 VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1997 1997 stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
1998 1998
1999 1999 if (stream_nvfs != NULL)
2000 2000 break;
2001 2001 }
2002 2002
2003 2003 /* check for promote */
2004 2004 (void) nvlist_lookup_uint64(stream_nvfs, "origin",
2005 2005 &stream_originguid);
2006 2006 if (stream_nvfs && originguid != stream_originguid) {
2007 2007 switch (created_before(hdl, local_avl,
2008 2008 stream_originguid, originguid)) {
2009 2009 case 1: {
2010 2010 /* promote it! */
2011 2011 zfs_cmd_t zc = { 0 };
2012 2012 nvlist_t *origin_nvfs;
2013 2013 char *origin_fsname;
2014 2014
2015 2015 if (flags->verbose)
2016 2016 (void) printf("promoting %s\n", fsname);
2017 2017
2018 2018 origin_nvfs = fsavl_find(local_avl, originguid,
2019 2019 NULL);
2020 2020 VERIFY(0 == nvlist_lookup_string(origin_nvfs,
2021 2021 "name", &origin_fsname));
2022 2022 (void) strlcpy(zc.zc_value, origin_fsname,
2023 2023 sizeof (zc.zc_value));
2024 2024 (void) strlcpy(zc.zc_name, fsname,
2025 2025 sizeof (zc.zc_name));
2026 2026 error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2027 2027 if (error == 0)
2028 2028 progress = B_TRUE;
2029 2029 break;
2030 2030 }
2031 2031 default:
2032 2032 break;
2033 2033 case -1:
2034 2034 fsavl_destroy(local_avl);
2035 2035 nvlist_free(local_nv);
2036 2036 nvlist_free(deleted);
2037 2037 return (-1);
2038 2038 }
2039 2039 /*
2040 2040 * We had/have the wrong origin, therefore our
2041 2041 * list of snapshots is wrong. Need to handle
2042 2042 * them on the next pass.
2043 2043 */
2044 2044 needagain = B_TRUE;
2045 2045 continue;
2046 2046 }
2047 2047
2048 2048 for (snapelem = nvlist_next_nvpair(snaps, NULL);
2049 2049 snapelem; snapelem = nextsnapelem) {
2050 2050 uint64_t thisguid;
2051 2051 char *stream_snapname;
2052 2052 nvlist_t *found, *props;
2053 2053
2054 2054 nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
2055 2055
2056 2056 VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2057 2057 found = fsavl_find(stream_avl, thisguid,
2058 2058 &stream_snapname);
2059 2059
2060 2060 /* check for delete */
2061 2061 if (found == NULL) {
2062 2062 char name[ZFS_MAXNAMELEN];
2063 2063
2064 2064 if (!flags->force)
2065 2065 continue;
2066 2066
2067 2067 (void) snprintf(name, sizeof (name), "%s@%s",
2068 2068 fsname, nvpair_name(snapelem));
2069 2069
2070 2070 error = recv_destroy(hdl, name,
2071 2071 strlen(fsname)+1, newname, flags);
2072 2072 if (error)
2073 2073 needagain = B_TRUE;
2074 2074 else
2075 2075 progress = B_TRUE;
2076 2076 (void) sprintf(guidname, "%llu",
2077 2077 (u_longlong_t)thisguid);
2078 2078 fnvlist_add_boolean(deleted, guidname);
2079 2079 continue;
2080 2080 }
2081 2081
2082 2082 stream_nvfs = found;
2083 2083
2084 2084 if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
2085 2085 &props) && 0 == nvlist_lookup_nvlist(props,
2086 2086 stream_snapname, &props)) {
2087 2087 zfs_cmd_t zc = { 0 };
2088 2088
2089 2089 zc.zc_cookie = B_TRUE; /* received */
2090 2090 (void) snprintf(zc.zc_name, sizeof (zc.zc_name),
2091 2091 "%s@%s", fsname, nvpair_name(snapelem));
2092 2092 if (zcmd_write_src_nvlist(hdl, &zc,
2093 2093 props) == 0) {
2094 2094 (void) zfs_ioctl(hdl,
2095 2095 ZFS_IOC_SET_PROP, &zc);
2096 2096 zcmd_free_nvlists(&zc);
2097 2097 }
2098 2098 }
2099 2099
2100 2100 /* check for different snapname */
2101 2101 if (strcmp(nvpair_name(snapelem),
2102 2102 stream_snapname) != 0) {
2103 2103 char name[ZFS_MAXNAMELEN];
2104 2104 char tryname[ZFS_MAXNAMELEN];
2105 2105
2106 2106 (void) snprintf(name, sizeof (name), "%s@%s",
2107 2107 fsname, nvpair_name(snapelem));
2108 2108 (void) snprintf(tryname, sizeof (name), "%s@%s",
2109 2109 fsname, stream_snapname);
2110 2110
2111 2111 error = recv_rename(hdl, name, tryname,
2112 2112 strlen(fsname)+1, newname, flags);
2113 2113 if (error)
2114 2114 needagain = B_TRUE;
2115 2115 else
2116 2116 progress = B_TRUE;
2117 2117 }
2118 2118
2119 2119 if (strcmp(stream_snapname, fromsnap) == 0)
2120 2120 fromguid = thisguid;
2121 2121 }
2122 2122
2123 2123 /* check for delete */
2124 2124 if (stream_nvfs == NULL) {
2125 2125 if (!flags->force)
2126 2126 continue;
2127 2127
2128 2128 error = recv_destroy(hdl, fsname, strlen(tofs)+1,
2129 2129 newname, flags);
2130 2130 if (error)
2131 2131 needagain = B_TRUE;
2132 2132 else
2133 2133 progress = B_TRUE;
2134 2134 (void) sprintf(guidname, "%llu",
2135 2135 (u_longlong_t)parent_fromsnap_guid);
2136 2136 fnvlist_add_boolean(deleted, guidname);
2137 2137 continue;
2138 2138 }
2139 2139
2140 2140 if (fromguid == 0) {
2141 2141 if (flags->verbose) {
2142 2142 (void) printf("local fs %s does not have "
2143 2143 "fromsnap (%s in stream); must have "
2144 2144 "been deleted locally; ignoring\n",
2145 2145 fsname, fromsnap);
2146 2146 }
2147 2147 continue;
2148 2148 }
2149 2149
2150 2150 VERIFY(0 == nvlist_lookup_string(stream_nvfs,
2151 2151 "name", &stream_fsname));
2152 2152 VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
2153 2153 "parentfromsnap", &stream_parent_fromsnap_guid));
2154 2154
2155 2155 s1 = strrchr(fsname, '/');
2156 2156 s2 = strrchr(stream_fsname, '/');
2157 2157
2158 2158 /*
2159 2159 * Check if we're going to rename based on parent guid change
2160 2160 * and the current parent guid was also deleted. If it was then
2161 2161 * the rename will fail so avoid this and force an early retry
2162 2162 * to determine the new parent_fromsnap_guid.
2163 2163 */
2164 2164 if (stream_parent_fromsnap_guid != 0 &&
2165 2165 parent_fromsnap_guid != 0 &&
2166 2166 stream_parent_fromsnap_guid != parent_fromsnap_guid) {
2167 2167 (void) sprintf(guidname, "%llu",
2168 2168 (u_longlong_t)parent_fromsnap_guid);
2169 2169 if (nvlist_exists(deleted, guidname)) {
2170 2170 progress = B_TRUE;
2171 2171 needagain = B_TRUE;
2172 2172 goto doagain;
2173 2173 }
2174 2174 }
2175 2175
2176 2176 /*
2177 2177 * Check for rename. If the exact receive path is specified, it
2178 2178 * does not count as a rename, but we still need to check the
2179 2179 * datasets beneath it.
2180 2180 */
2181 2181 if ((stream_parent_fromsnap_guid != 0 &&
2182 2182 parent_fromsnap_guid != 0 &&
2183 2183 stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
2184 2184 ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
2185 2185 (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
2186 2186 nvlist_t *parent;
2187 2187 char tryname[ZFS_MAXNAMELEN];
2188 2188
2189 2189 parent = fsavl_find(local_avl,
2190 2190 stream_parent_fromsnap_guid, NULL);
2191 2191 /*
2192 2192 * NB: parent might not be found if we used the
2193 2193 * tosnap for stream_parent_fromsnap_guid,
2194 2194 * because the parent is a newly-created fs;
2195 2195 * we'll be able to rename it after we recv the
2196 2196 * new fs.
2197 2197 */
2198 2198 if (parent != NULL) {
2199 2199 char *pname;
2200 2200
2201 2201 VERIFY(0 == nvlist_lookup_string(parent, "name",
2202 2202 &pname));
2203 2203 (void) snprintf(tryname, sizeof (tryname),
2204 2204 "%s%s", pname, strrchr(stream_fsname, '/'));
2205 2205 } else {
2206 2206 tryname[0] = '\0';
2207 2207 if (flags->verbose) {
2208 2208 (void) printf("local fs %s new parent "
2209 2209 "not found\n", fsname);
2210 2210 }
2211 2211 }
2212 2212
2213 2213 newname[0] = '\0';
2214 2214
2215 2215 error = recv_rename(hdl, fsname, tryname,
2216 2216 strlen(tofs)+1, newname, flags);
2217 2217
2218 2218 if (renamed != NULL && newname[0] != '\0') {
2219 2219 VERIFY(0 == nvlist_add_boolean(renamed,
2220 2220 newname));
2221 2221 }
2222 2222
2223 2223 if (error)
2224 2224 needagain = B_TRUE;
2225 2225 else
2226 2226 progress = B_TRUE;
2227 2227 }
2228 2228 }
2229 2229
2230 2230 doagain:
2231 2231 fsavl_destroy(local_avl);
2232 2232 nvlist_free(local_nv);
2233 2233 nvlist_free(deleted);
2234 2234
2235 2235 if (needagain && progress) {
2236 2236 /* do another pass to fix up temporary names */
2237 2237 if (flags->verbose)
2238 2238 (void) printf("another pass:\n");
2239 2239 goto again;
2240 2240 }
2241 2241
2242 2242 return (needagain);
2243 2243 }
2244 2244
2245 2245 static int
2246 2246 zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
2247 2247 recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
2248 2248 char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2249 2249 {
2250 2250 nvlist_t *stream_nv = NULL;
2251 2251 avl_tree_t *stream_avl = NULL;
2252 2252 char *fromsnap = NULL;
2253 2253 char *cp;
2254 2254 char tofs[ZFS_MAXNAMELEN];
2255 2255 char sendfs[ZFS_MAXNAMELEN];
2256 2256 char errbuf[1024];
2257 2257 dmu_replay_record_t drre;
2258 2258 int error;
2259 2259 boolean_t anyerr = B_FALSE;
2260 2260 boolean_t softerr = B_FALSE;
2261 2261 boolean_t recursive;
2262 2262
2263 2263 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2264 2264 "cannot receive"));
2265 2265
2266 2266 assert(drr->drr_type == DRR_BEGIN);
2267 2267 assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
2268 2268 assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
2269 2269 DMU_COMPOUNDSTREAM);
2270 2270
2271 2271 /*
2272 2272 * Read in the nvlist from the stream.
2273 2273 */
2274 2274 if (drr->drr_payloadlen != 0) {
2275 2275 error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
2276 2276 &stream_nv, flags->byteswap, zc);
2277 2277 if (error) {
2278 2278 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2279 2279 goto out;
2280 2280 }
2281 2281 }
2282 2282
2283 2283 recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2284 2284 ENOENT);
2285 2285
2286 2286 if (recursive && strchr(destname, '@')) {
2287 2287 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2288 2288 "cannot specify snapshot name for multi-snapshot stream"));
2289 2289 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2290 2290 goto out;
2291 2291 }
2292 2292
2293 2293 /*
2294 2294 * Read in the end record and verify checksum.
2295 2295 */
2296 2296 if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
2297 2297 flags->byteswap, NULL)))
2298 2298 goto out;
2299 2299 if (flags->byteswap) {
2300 2300 drre.drr_type = BSWAP_32(drre.drr_type);
2301 2301 drre.drr_u.drr_end.drr_checksum.zc_word[0] =
2302 2302 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
2303 2303 drre.drr_u.drr_end.drr_checksum.zc_word[1] =
2304 2304 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
2305 2305 drre.drr_u.drr_end.drr_checksum.zc_word[2] =
2306 2306 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
2307 2307 drre.drr_u.drr_end.drr_checksum.zc_word[3] =
2308 2308 BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
2309 2309 }
2310 2310 if (drre.drr_type != DRR_END) {
2311 2311 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2312 2312 goto out;
2313 2313 }
2314 2314 if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
2315 2315 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2316 2316 "incorrect header checksum"));
2317 2317 error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2318 2318 goto out;
2319 2319 }
2320 2320
2321 2321 (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
2322 2322
2323 2323 if (drr->drr_payloadlen != 0) {
2324 2324 nvlist_t *stream_fss;
2325 2325
2326 2326 VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
2327 2327 &stream_fss));
2328 2328 if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
2329 2329 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2330 2330 "couldn't allocate avl tree"));
2331 2331 error = zfs_error(hdl, EZFS_NOMEM, errbuf);
2332 2332 goto out;
2333 2333 }
2334 2334
2335 2335 if (fromsnap != NULL) {
2336 2336 nvlist_t *renamed = NULL;
2337 2337 nvpair_t *pair = NULL;
2338 2338
2339 2339 (void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
2340 2340 if (flags->isprefix) {
2341 2341 struct drr_begin *drrb = &drr->drr_u.drr_begin;
2342 2342 int i;
2343 2343
2344 2344 if (flags->istail) {
2345 2345 cp = strrchr(drrb->drr_toname, '/');
2346 2346 if (cp == NULL) {
2347 2347 (void) strlcat(tofs, "/",
2348 2348 ZFS_MAXNAMELEN);
2349 2349 i = 0;
2350 2350 } else {
2351 2351 i = (cp - drrb->drr_toname);
2352 2352 }
2353 2353 } else {
2354 2354 i = strcspn(drrb->drr_toname, "/@");
2355 2355 }
2356 2356 /* zfs_receive_one() will create_parents() */
2357 2357 (void) strlcat(tofs, &drrb->drr_toname[i],
2358 2358 ZFS_MAXNAMELEN);
2359 2359 *strchr(tofs, '@') = '\0';
2360 2360 }
2361 2361
2362 2362 if (recursive && !flags->dryrun && !flags->nomount) {
2363 2363 VERIFY(0 == nvlist_alloc(&renamed,
2364 2364 NV_UNIQUE_NAME, 0));
2365 2365 }
2366 2366
2367 2367 softerr = recv_incremental_replication(hdl, tofs, flags,
2368 2368 stream_nv, stream_avl, renamed);
2369 2369
2370 2370 /* Unmount renamed filesystems before receiving. */
2371 2371 while ((pair = nvlist_next_nvpair(renamed,
2372 2372 pair)) != NULL) {
2373 2373 zfs_handle_t *zhp;
2374 2374 prop_changelist_t *clp = NULL;
2375 2375
2376 2376 zhp = zfs_open(hdl, nvpair_name(pair),
2377 2377 ZFS_TYPE_FILESYSTEM);
2378 2378 if (zhp != NULL) {
2379 2379 clp = changelist_gather(zhp,
2380 2380 ZFS_PROP_MOUNTPOINT, 0, 0);
2381 2381 zfs_close(zhp);
2382 2382 if (clp != NULL) {
2383 2383 softerr |=
2384 2384 changelist_prefix(clp);
2385 2385 changelist_free(clp);
2386 2386 }
2387 2387 }
2388 2388 }
2389 2389
2390 2390 nvlist_free(renamed);
2391 2391 }
2392 2392 }
2393 2393
2394 2394 /*
2395 2395 * Get the fs specified by the first path in the stream (the top level
2396 2396 * specified by 'zfs send') and pass it to each invocation of
2397 2397 * zfs_receive_one().
2398 2398 */
2399 2399 (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
2400 2400 ZFS_MAXNAMELEN);
2401 2401 if ((cp = strchr(sendfs, '@')) != NULL)
2402 2402 *cp = '\0';
2403 2403
2404 2404 /* Finally, receive each contained stream */
2405 2405 do {
2406 2406 /*
2407 2407 * we should figure out if it has a recoverable
2408 2408 * error, in which case do a recv_skip() and drive on.
2409 2409 * Note, if we fail due to already having this guid,
2410 2410 * zfs_receive_one() will take care of it (ie,
2411 2411 * recv_skip() and return 0).
2412 2412 */
2413 2413 error = zfs_receive_impl(hdl, destname, flags, fd,
2414 2414 sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
2415 2415 action_handlep);
2416 2416 if (error == ENODATA) {
2417 2417 error = 0;
2418 2418 break;
2419 2419 }
2420 2420 anyerr |= error;
2421 2421 } while (error == 0);
2422 2422
2423 2423 if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
2424 2424 /*
2425 2425 * Now that we have the fs's they sent us, try the
2426 2426 * renames again.
2427 2427 */
2428 2428 softerr = recv_incremental_replication(hdl, tofs, flags,
2429 2429 stream_nv, stream_avl, NULL);
2430 2430 }
2431 2431
2432 2432 out:
2433 2433 fsavl_destroy(stream_avl);
2434 2434 if (stream_nv)
2435 2435 nvlist_free(stream_nv);
2436 2436 if (softerr)
2437 2437 error = -2;
2438 2438 if (anyerr)
2439 2439 error = -1;
2440 2440 return (error);
2441 2441 }
2442 2442
2443 2443 static void
2444 2444 trunc_prop_errs(int truncated)
2445 2445 {
2446 2446 ASSERT(truncated != 0);
2447 2447
2448 2448 if (truncated == 1)
2449 2449 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2450 2450 "1 more property could not be set\n"));
2451 2451 else
2452 2452 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2453 2453 "%d more properties could not be set\n"), truncated);
2454 2454 }
2455 2455
2456 2456 static int
2457 2457 recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
2458 2458 {
2459 2459 dmu_replay_record_t *drr;
2460 2460 void *buf = malloc(1<<20);
2461 2461 char errbuf[1024];
2462 2462
2463 2463 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2464 2464 "cannot receive:"));
2465 2465
2466 2466 /* XXX would be great to use lseek if possible... */
2467 2467 drr = buf;
2468 2468
2469 2469 while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
2470 2470 byteswap, NULL) == 0) {
2471 2471 if (byteswap)
2472 2472 drr->drr_type = BSWAP_32(drr->drr_type);
2473 2473
2474 2474 switch (drr->drr_type) {
2475 2475 case DRR_BEGIN:
2476 2476 /* NB: not to be used on v2 stream packages */
2477 2477 if (drr->drr_payloadlen != 0) {
2478 2478 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2479 2479 "invalid substream header"));
2480 2480 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2481 2481 }
2482 2482 break;
2483 2483
2484 2484 case DRR_END:
2485 2485 free(buf);
2486 2486 return (0);
2487 2487
2488 2488 case DRR_OBJECT:
2489 2489 if (byteswap) {
2490 2490 drr->drr_u.drr_object.drr_bonuslen =
2491 2491 BSWAP_32(drr->drr_u.drr_object.
2492 2492 drr_bonuslen);
2493 2493 }
2494 2494 (void) recv_read(hdl, fd, buf,
2495 2495 P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
2496 2496 B_FALSE, NULL);
2497 2497 break;
2498 2498
2499 2499 case DRR_WRITE:
2500 2500 if (byteswap) {
2501 2501 drr->drr_u.drr_write.drr_length =
2502 2502 BSWAP_64(drr->drr_u.drr_write.drr_length);
2503 2503 }
2504 2504 (void) recv_read(hdl, fd, buf,
2505 2505 drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
2506 2506 break;
2507 2507 case DRR_SPILL:
2508 2508 if (byteswap) {
2509 2509 drr->drr_u.drr_write.drr_length =
2510 2510 BSWAP_64(drr->drr_u.drr_spill.drr_length);
2511 2511 }
2512 2512 (void) recv_read(hdl, fd, buf,
2513 2513 drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
2514 2514 break;
2515 2515 case DRR_WRITE_BYREF:
2516 2516 case DRR_FREEOBJECTS:
2517 2517 case DRR_FREE:
2518 2518 break;
2519 2519
2520 2520 default:
2521 2521 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2522 2522 "invalid record type"));
2523 2523 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2524 2524 }
2525 2525 }
2526 2526
2527 2527 free(buf);
2528 2528 return (-1);
2529 2529 }
2530 2530
2531 2531 /*
2532 2532 * Restores a backup of tosnap from the file descriptor specified by infd.
2533 2533 */
2534 2534 static int
2535 2535 zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
2536 2536 recvflags_t *flags, dmu_replay_record_t *drr,
2537 2537 dmu_replay_record_t *drr_noswap, const char *sendfs,
2538 2538 nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
2539 2539 uint64_t *action_handlep)
2540 2540 {
2541 2541 zfs_cmd_t zc = { 0 };
2542 2542 time_t begin_time;
2543 2543 int ioctl_err, ioctl_errno, err;
2544 2544 char *cp;
2545 2545 struct drr_begin *drrb = &drr->drr_u.drr_begin;
2546 2546 char errbuf[1024];
2547 2547 char prop_errbuf[1024];
2548 2548 const char *chopprefix;
2549 2549 boolean_t newfs = B_FALSE;
2550 2550 boolean_t stream_wantsnewfs;
2551 2551 uint64_t parent_snapguid = 0;
2552 2552 prop_changelist_t *clp = NULL;
2553 2553 nvlist_t *snapprops_nvlist = NULL;
2554 2554 zprop_errflags_t prop_errflags;
2555 2555 boolean_t recursive;
2556 2556
2557 2557 begin_time = time(NULL);
2558 2558
2559 2559 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2560 2560 "cannot receive"));
2561 2561
2562 2562 recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2563 2563 ENOENT);
2564 2564
2565 2565 if (stream_avl != NULL) {
2566 2566 char *snapname;
2567 2567 nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
2568 2568 &snapname);
2569 2569 nvlist_t *props;
2570 2570 int ret;
2571 2571
2572 2572 (void) nvlist_lookup_uint64(fs, "parentfromsnap",
2573 2573 &parent_snapguid);
2574 2574 err = nvlist_lookup_nvlist(fs, "props", &props);
2575 2575 if (err)
2576 2576 VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
2577 2577
2578 2578 if (flags->canmountoff) {
2579 2579 VERIFY(0 == nvlist_add_uint64(props,
2580 2580 zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
2581 2581 }
2582 2582 ret = zcmd_write_src_nvlist(hdl, &zc, props);
2583 2583 if (err)
2584 2584 nvlist_free(props);
2585 2585
2586 2586 if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
2587 2587 VERIFY(0 == nvlist_lookup_nvlist(props,
2588 2588 snapname, &snapprops_nvlist));
2589 2589 }
2590 2590
2591 2591 if (ret != 0)
2592 2592 return (-1);
2593 2593 }
2594 2594
2595 2595 cp = NULL;
2596 2596
2597 2597 /*
2598 2598 * Determine how much of the snapshot name stored in the stream
2599 2599 * we are going to tack on to the name they specified on the
2600 2600 * command line, and how much we are going to chop off.
2601 2601 *
2602 2602 * If they specified a snapshot, chop the entire name stored in
2603 2603 * the stream.
2604 2604 */
2605 2605 if (flags->istail) {
2606 2606 /*
2607 2607 * A filesystem was specified with -e. We want to tack on only
2608 2608 * the tail of the sent snapshot path.
2609 2609 */
2610 2610 if (strchr(tosnap, '@')) {
2611 2611 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2612 2612 "argument - snapshot not allowed with -e"));
2613 2613 return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2614 2614 }
2615 2615
2616 2616 chopprefix = strrchr(sendfs, '/');
2617 2617
2618 2618 if (chopprefix == NULL) {
2619 2619 /*
2620 2620 * The tail is the poolname, so we need to
2621 2621 * prepend a path separator.
2622 2622 */
2623 2623 int len = strlen(drrb->drr_toname);
2624 2624 cp = malloc(len + 2);
2625 2625 cp[0] = '/';
2626 2626 (void) strcpy(&cp[1], drrb->drr_toname);
2627 2627 chopprefix = cp;
2628 2628 } else {
2629 2629 chopprefix = drrb->drr_toname + (chopprefix - sendfs);
2630 2630 }
2631 2631 } else if (flags->isprefix) {
2632 2632 /*
2633 2633 * A filesystem was specified with -d. We want to tack on
2634 2634 * everything but the first element of the sent snapshot path
2635 2635 * (all but the pool name).
2636 2636 */
2637 2637 if (strchr(tosnap, '@')) {
2638 2638 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2639 2639 "argument - snapshot not allowed with -d"));
2640 2640 return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2641 2641 }
2642 2642
2643 2643 chopprefix = strchr(drrb->drr_toname, '/');
2644 2644 if (chopprefix == NULL)
2645 2645 chopprefix = strchr(drrb->drr_toname, '@');
2646 2646 } else if (strchr(tosnap, '@') == NULL) {
2647 2647 /*
2648 2648 * If a filesystem was specified without -d or -e, we want to
2649 2649 * tack on everything after the fs specified by 'zfs send'.
2650 2650 */
2651 2651 chopprefix = drrb->drr_toname + strlen(sendfs);
2652 2652 } else {
2653 2653 /* A snapshot was specified as an exact path (no -d or -e). */
2654 2654 if (recursive) {
2655 2655 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2656 2656 "cannot specify snapshot name for multi-snapshot "
2657 2657 "stream"));
2658 2658 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2659 2659 }
2660 2660 chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
2661 2661 }
2662 2662
2663 2663 ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
2664 2664 ASSERT(chopprefix > drrb->drr_toname);
2665 2665 ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
2666 2666 ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
2667 2667 chopprefix[0] == '\0');
2668 2668
2669 2669 /*
2670 2670 * Determine name of destination snapshot, store in zc_value.
2671 2671 */
2672 2672 (void) strcpy(zc.zc_value, tosnap);
2673 2673 (void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
2674 2674 free(cp);
2675 2675 if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
2676 2676 zcmd_free_nvlists(&zc);
2677 2677 return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2678 2678 }
2679 2679
2680 2680 /*
2681 2681 * Determine the name of the origin snapshot, store in zc_string.
2682 2682 */
2683 2683 if (drrb->drr_flags & DRR_FLAG_CLONE) {
2684 2684 if (guid_to_name(hdl, zc.zc_value,
2685 2685 drrb->drr_fromguid, zc.zc_string) != 0) {
2686 2686 zcmd_free_nvlists(&zc);
2687 2687 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2688 2688 "local origin for clone %s does not exist"),
2689 2689 zc.zc_value);
2690 2690 return (zfs_error(hdl, EZFS_NOENT, errbuf));
2691 2691 }
2692 2692 if (flags->verbose)
2693 2693 (void) printf("found clone origin %s\n", zc.zc_string);
2694 2694 }
2695 2695
2696 2696 stream_wantsnewfs = (drrb->drr_fromguid == NULL ||
2697 2697 (drrb->drr_flags & DRR_FLAG_CLONE));
2698 2698
2699 2699 if (stream_wantsnewfs) {
2700 2700 /*
2701 2701 * if the parent fs does not exist, look for it based on
2702 2702 * the parent snap GUID
2703 2703 */
2704 2704 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2705 2705 "cannot receive new filesystem stream"));
2706 2706
2707 2707 (void) strcpy(zc.zc_name, zc.zc_value);
2708 2708 cp = strrchr(zc.zc_name, '/');
2709 2709 if (cp)
2710 2710 *cp = '\0';
2711 2711 if (cp &&
2712 2712 !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2713 2713 char suffix[ZFS_MAXNAMELEN];
2714 2714 (void) strcpy(suffix, strrchr(zc.zc_value, '/'));
2715 2715 if (guid_to_name(hdl, zc.zc_name, parent_snapguid,
2716 2716 zc.zc_value) == 0) {
2717 2717 *strchr(zc.zc_value, '@') = '\0';
2718 2718 (void) strcat(zc.zc_value, suffix);
2719 2719 }
2720 2720 }
2721 2721 } else {
2722 2722 /*
2723 2723 * if the fs does not exist, look for it based on the
2724 2724 * fromsnap GUID
2725 2725 */
2726 2726 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2727 2727 "cannot receive incremental stream"));
2728 2728
2729 2729 (void) strcpy(zc.zc_name, zc.zc_value);
2730 2730 *strchr(zc.zc_name, '@') = '\0';
2731 2731
2732 2732 /*
2733 2733 * If the exact receive path was specified and this is the
2734 2734 * topmost path in the stream, then if the fs does not exist we
2735 2735 * should look no further.
2736 2736 */
2737 2737 if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
2738 2738 strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
2739 2739 !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2740 2740 char snap[ZFS_MAXNAMELEN];
2741 2741 (void) strcpy(snap, strchr(zc.zc_value, '@'));
2742 2742 if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid,
2743 2743 zc.zc_value) == 0) {
2744 2744 *strchr(zc.zc_value, '@') = '\0';
2745 2745 (void) strcat(zc.zc_value, snap);
2746 2746 }
2747 2747 }
2748 2748 }
2749 2749
2750 2750 (void) strcpy(zc.zc_name, zc.zc_value);
2751 2751 *strchr(zc.zc_name, '@') = '\0';
2752 2752
2753 2753 if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2754 2754 zfs_handle_t *zhp;
2755 2755
2756 2756 /*
2757 2757 * Destination fs exists. Therefore this should either
2758 2758 * be an incremental, or the stream specifies a new fs
2759 2759 * (full stream or clone) and they want us to blow it
2760 2760 * away (and have therefore specified -F and removed any
2761 2761 * snapshots).
2762 2762 */
2763 2763 if (stream_wantsnewfs) {
2764 2764 if (!flags->force) {
2765 2765 zcmd_free_nvlists(&zc);
2766 2766 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2767 2767 "destination '%s' exists\n"
2768 2768 "must specify -F to overwrite it"),
2769 2769 zc.zc_name);
2770 2770 return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2771 2771 }
2772 2772 if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
2773 2773 &zc) == 0) {
2774 2774 zcmd_free_nvlists(&zc);
2775 2775 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2776 2776 "destination has snapshots (eg. %s)\n"
2777 2777 "must destroy them to overwrite it"),
2778 2778 zc.zc_name);
2779 2779 return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2780 2780 }
2781 2781 }
2782 2782
2783 2783 if ((zhp = zfs_open(hdl, zc.zc_name,
2784 2784 ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
2785 2785 zcmd_free_nvlists(&zc);
2786 2786 return (-1);
2787 2787 }
2788 2788
2789 2789 if (stream_wantsnewfs &&
2790 2790 zhp->zfs_dmustats.dds_origin[0]) {
2791 2791 zcmd_free_nvlists(&zc);
2792 2792 zfs_close(zhp);
2793 2793 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2794 2794 "destination '%s' is a clone\n"
2795 2795 "must destroy it to overwrite it"),
2796 2796 zc.zc_name);
2797 2797 return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2798 2798 }
2799 2799
2800 2800 if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
2801 2801 stream_wantsnewfs) {
2802 2802 /* We can't do online recv in this case */
2803 2803 clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
2804 2804 if (clp == NULL) {
2805 2805 zfs_close(zhp);
2806 2806 zcmd_free_nvlists(&zc);
2807 2807 return (-1);
2808 2808 }
2809 2809 if (changelist_prefix(clp) != 0) {
2810 2810 changelist_free(clp);
2811 2811 zfs_close(zhp);
2812 2812 zcmd_free_nvlists(&zc);
2813 2813 return (-1);
2814 2814 }
2815 2815 }
2816 2816 zfs_close(zhp);
2817 2817 } else {
2818 2818 /*
2819 2819 * Destination filesystem does not exist. Therefore we better
2820 2820 * be creating a new filesystem (either from a full backup, or
2821 2821 * a clone). It would therefore be invalid if the user
2822 2822 * specified only the pool name (i.e. if the destination name
2823 2823 * contained no slash character).
2824 2824 */
2825 2825 if (!stream_wantsnewfs ||
2826 2826 (cp = strrchr(zc.zc_name, '/')) == NULL) {
2827 2827 zcmd_free_nvlists(&zc);
2828 2828 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2829 2829 "destination '%s' does not exist"), zc.zc_name);
2830 2830 return (zfs_error(hdl, EZFS_NOENT, errbuf));
2831 2831 }
2832 2832
2833 2833 /*
2834 2834 * Trim off the final dataset component so we perform the
2835 2835 * recvbackup ioctl to the filesystems's parent.
2836 2836 */
2837 2837 *cp = '\0';
2838 2838
2839 2839 if (flags->isprefix && !flags->istail && !flags->dryrun &&
2840 2840 create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
2841 2841 zcmd_free_nvlists(&zc);
2842 2842 return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
2843 2843 }
2844 2844
2845 2845 newfs = B_TRUE;
2846 2846 }
2847 2847
2848 2848 zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
2849 2849 zc.zc_cookie = infd;
2850 2850 zc.zc_guid = flags->force;
2851 2851 if (flags->verbose) {
2852 2852 (void) printf("%s %s stream of %s into %s\n",
2853 2853 flags->dryrun ? "would receive" : "receiving",
2854 2854 drrb->drr_fromguid ? "incremental" : "full",
2855 2855 drrb->drr_toname, zc.zc_value);
2856 2856 (void) fflush(stdout);
2857 2857 }
2858 2858
2859 2859 if (flags->dryrun) {
2860 2860 zcmd_free_nvlists(&zc);
2861 2861 return (recv_skip(hdl, infd, flags->byteswap));
2862 2862 }
2863 2863
2864 2864 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
2865 2865 zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
2866 2866 zc.zc_cleanup_fd = cleanup_fd;
2867 2867 zc.zc_action_handle = *action_handlep;
2868 2868
2869 2869 err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
2870 2870 ioctl_errno = errno;
2871 2871 prop_errflags = (zprop_errflags_t)zc.zc_obj;
2872 2872
2873 2873 if (err == 0) {
2874 2874 nvlist_t *prop_errors;
2875 2875 VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
2876 2876 zc.zc_nvlist_dst_size, &prop_errors, 0));
2877 2877
2878 2878 nvpair_t *prop_err = NULL;
2879 2879
2880 2880 while ((prop_err = nvlist_next_nvpair(prop_errors,
2881 2881 prop_err)) != NULL) {
2882 2882 char tbuf[1024];
2883 2883 zfs_prop_t prop;
2884 2884 int intval;
2885 2885
2886 2886 prop = zfs_name_to_prop(nvpair_name(prop_err));
2887 2887 (void) nvpair_value_int32(prop_err, &intval);
2888 2888 if (strcmp(nvpair_name(prop_err),
2889 2889 ZPROP_N_MORE_ERRORS) == 0) {
2890 2890 trunc_prop_errs(intval);
2891 2891 break;
2892 2892 } else {
2893 2893 (void) snprintf(tbuf, sizeof (tbuf),
2894 2894 dgettext(TEXT_DOMAIN,
2895 2895 "cannot receive %s property on %s"),
2896 2896 nvpair_name(prop_err), zc.zc_name);
2897 2897 zfs_setprop_error(hdl, prop, intval, tbuf);
2898 2898 }
2899 2899 }
2900 2900 nvlist_free(prop_errors);
2901 2901 }
2902 2902
2903 2903 zc.zc_nvlist_dst = 0;
2904 2904 zc.zc_nvlist_dst_size = 0;
2905 2905 zcmd_free_nvlists(&zc);
2906 2906
2907 2907 if (err == 0 && snapprops_nvlist) {
2908 2908 zfs_cmd_t zc2 = { 0 };
2909 2909
2910 2910 (void) strcpy(zc2.zc_name, zc.zc_value);
2911 2911 zc2.zc_cookie = B_TRUE; /* received */
2912 2912 if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
2913 2913 (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
2914 2914 zcmd_free_nvlists(&zc2);
2915 2915 }
2916 2916 }
2917 2917
2918 2918 if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
2919 2919 /*
2920 2920 * It may be that this snapshot already exists,
2921 2921 * in which case we want to consume & ignore it
2922 2922 * rather than failing.
2923 2923 */
2924 2924 avl_tree_t *local_avl;
2925 2925 nvlist_t *local_nv, *fs;
2926 2926 cp = strchr(zc.zc_value, '@');
2927 2927
2928 2928 /*
2929 2929 * XXX Do this faster by just iterating over snaps in
2930 2930 * this fs. Also if zc_value does not exist, we will
2931 2931 * get a strange "does not exist" error message.
2932 2932 */
2933 2933 *cp = '\0';
2934 2934 if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
2935 2935 &local_nv, &local_avl) == 0) {
2936 2936 *cp = '@';
2937 2937 fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
2938 2938 fsavl_destroy(local_avl);
2939 2939 nvlist_free(local_nv);
2940 2940
2941 2941 if (fs != NULL) {
2942 2942 if (flags->verbose) {
2943 2943 (void) printf("snap %s already exists; "
2944 2944 "ignoring\n", zc.zc_value);
2945 2945 }
2946 2946 err = ioctl_err = recv_skip(hdl, infd,
2947 2947 flags->byteswap);
2948 2948 }
2949 2949 }
2950 2950 *cp = '@';
2951 2951 }
2952 2952
2953 2953 if (ioctl_err != 0) {
2954 2954 switch (ioctl_errno) {
2955 2955 case ENODEV:
2956 2956 cp = strchr(zc.zc_value, '@');
2957 2957 *cp = '\0';
2958 2958 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2959 2959 "most recent snapshot of %s does not\n"
2960 2960 "match incremental source"), zc.zc_value);
2961 2961 (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
2962 2962 *cp = '@';
2963 2963 break;
2964 2964 case ETXTBSY:
2965 2965 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2966 2966 "destination %s has been modified\n"
2967 2967 "since most recent snapshot"), zc.zc_name);
2968 2968 (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
2969 2969 break;
2970 2970 case EEXIST:
2971 2971 cp = strchr(zc.zc_value, '@');
2972 2972 if (newfs) {
2973 2973 /* it's the containing fs that exists */
2974 2974 *cp = '\0';
2975 2975 }
2976 2976 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2977 2977 "destination already exists"));
2978 2978 (void) zfs_error_fmt(hdl, EZFS_EXISTS,
2979 2979 dgettext(TEXT_DOMAIN, "cannot restore to %s"),
2980 2980 zc.zc_value);
2981 2981 *cp = '@';
2982 2982 break;
2983 2983 case EINVAL:
2984 2984 (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2985 2985 break;
2986 2986 case ECKSUM:
2987 2987 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2988 2988 "invalid stream (checksum mismatch)"));
2989 2989 (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2990 2990 break;
2991 2991 case ENOTSUP:
2992 2992 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2993 2993 "pool must be upgraded to receive this stream."));
2994 2994 (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
2995 2995 break;
2996 2996 case EDQUOT:
2997 2997 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2998 2998 "destination %s space quota exceeded"), zc.zc_name);
2999 2999 (void) zfs_error(hdl, EZFS_NOSPC, errbuf);
3000 3000 break;
3001 3001 default:
3002 3002 (void) zfs_standard_error(hdl, ioctl_errno, errbuf);
3003 3003 }
3004 3004 }
3005 3005
3006 3006 /*
3007 3007 * Mount the target filesystem (if created). Also mount any
3008 3008 * children of the target filesystem if we did a replication
3009 3009 * receive (indicated by stream_avl being non-NULL).
3010 3010 */
3011 3011 cp = strchr(zc.zc_value, '@');
3012 3012 if (cp && (ioctl_err == 0 || !newfs)) {
3013 3013 zfs_handle_t *h;
3014 3014
3015 3015 *cp = '\0';
3016 3016 h = zfs_open(hdl, zc.zc_value,
3017 3017 ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
3018 3018 if (h != NULL) {
3019 3019 if (h->zfs_type == ZFS_TYPE_VOLUME) {
3020 3020 *cp = '@';
3021 3021 } else if (newfs || stream_avl) {
3022 3022 /*
3023 3023 * Track the first/top of hierarchy fs,
3024 3024 * for mounting and sharing later.
3025 3025 */
3026 3026 if (top_zfs && *top_zfs == NULL)
3027 3027 *top_zfs = zfs_strdup(hdl, zc.zc_value);
3028 3028 }
3029 3029 zfs_close(h);
3030 3030 }
3031 3031 *cp = '@';
3032 3032 }
3033 3033
3034 3034 if (clp) {
3035 3035 err |= changelist_postfix(clp);
3036 3036 changelist_free(clp);
3037 3037 }
3038 3038
3039 3039 if (prop_errflags & ZPROP_ERR_NOCLEAR) {
3040 3040 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3041 3041 "failed to clear unreceived properties on %s"),
3042 3042 zc.zc_name);
3043 3043 (void) fprintf(stderr, "\n");
3044 3044 }
3045 3045 if (prop_errflags & ZPROP_ERR_NORESTORE) {
3046 3046 (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3047 3047 "failed to restore original properties on %s"),
3048 3048 zc.zc_name);
3049 3049 (void) fprintf(stderr, "\n");
3050 3050 }
3051 3051
3052 3052 if (err || ioctl_err)
3053 3053 return (-1);
3054 3054
3055 3055 *action_handlep = zc.zc_action_handle;
3056 3056
3057 3057 if (flags->verbose) {
3058 3058 char buf1[64];
3059 3059 char buf2[64];
3060 3060 uint64_t bytes = zc.zc_cookie;
3061 3061 time_t delta = time(NULL) - begin_time;
3062 3062 if (delta == 0)
3063 3063 delta = 1;
3064 3064 zfs_nicenum(bytes, buf1, sizeof (buf1));
3065 3065 zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
3066 3066
3067 3067 (void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
3068 3068 buf1, delta, buf2);
3069 3069 }
3070 3070
3071 3071 return (0);
3072 3072 }
3073 3073
3074 3074 static int
3075 3075 zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
3076 3076 int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl,
3077 3077 char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
3078 3078 {
3079 3079 int err;
3080 3080 dmu_replay_record_t drr, drr_noswap;
3081 3081 struct drr_begin *drrb = &drr.drr_u.drr_begin;
3082 3082 char errbuf[1024];
3083 3083 zio_cksum_t zcksum = { 0 };
3084 3084 uint64_t featureflags;
3085 3085 int hdrtype;
3086 3086
3087 3087 (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3088 3088 "cannot receive"));
3089 3089
3090 3090 if (flags->isprefix &&
3091 3091 !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
3092 3092 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
3093 3093 "(%s) does not exist"), tosnap);
3094 3094 return (zfs_error(hdl, EZFS_NOENT, errbuf));
3095 3095 }
3096 3096
3097 3097 /* read in the BEGIN record */
3098 3098 if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
3099 3099 &zcksum)))
3100 3100 return (err);
3101 3101
3102 3102 if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
3103 3103 /* It's the double end record at the end of a package */
3104 3104 return (ENODATA);
3105 3105 }
3106 3106
3107 3107 /* the kernel needs the non-byteswapped begin record */
3108 3108 drr_noswap = drr;
3109 3109
3110 3110 flags->byteswap = B_FALSE;
3111 3111 if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
3112 3112 /*
3113 3113 * We computed the checksum in the wrong byteorder in
3114 3114 * recv_read() above; do it again correctly.
3115 3115 */
3116 3116 bzero(&zcksum, sizeof (zio_cksum_t));
3117 3117 fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
3118 3118 flags->byteswap = B_TRUE;
3119 3119
3120 3120 drr.drr_type = BSWAP_32(drr.drr_type);
3121 3121 drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
3122 3122 drrb->drr_magic = BSWAP_64(drrb->drr_magic);
3123 3123 drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
3124 3124 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
3125 3125 drrb->drr_type = BSWAP_32(drrb->drr_type);
3126 3126 drrb->drr_flags = BSWAP_32(drrb->drr_flags);
3127 3127 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
3128 3128 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
3129 3129 }
3130 3130
3131 3131 if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
3132 3132 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3133 3133 "stream (bad magic number)"));
3134 3134 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3135 3135 }
3136 3136
3137 3137 featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
3138 3138 hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
3139 3139
3140 3140 if (!DMU_STREAM_SUPPORTED(featureflags) ||
3141 3141 (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
3142 3142 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3143 3143 "stream has unsupported feature, feature flags = %lx"),
3144 3144 featureflags);
3145 3145 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3146 3146 }
3147 3147
3148 3148 if (strchr(drrb->drr_toname, '@') == NULL) {
3149 3149 zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3150 3150 "stream (bad snapshot name)"));
3151 3151 return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3152 3152 }
3153 3153
3154 3154 if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
3155 3155 char nonpackage_sendfs[ZFS_MAXNAMELEN];
3156 3156 if (sendfs == NULL) {
3157 3157 /*
3158 3158 * We were not called from zfs_receive_package(). Get
3159 3159 * the fs specified by 'zfs send'.
3160 3160 */
3161 3161 char *cp;
3162 3162 (void) strlcpy(nonpackage_sendfs,
3163 3163 drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN);
3164 3164 if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
3165 3165 *cp = '\0';
3166 3166 sendfs = nonpackage_sendfs;
3167 3167 }
3168 3168 return (zfs_receive_one(hdl, infd, tosnap, flags,
3169 3169 &drr, &drr_noswap, sendfs, stream_nv, stream_avl,
3170 3170 top_zfs, cleanup_fd, action_handlep));
3171 3171 } else {
3172 3172 assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
3173 3173 DMU_COMPOUNDSTREAM);
3174 3174 return (zfs_receive_package(hdl, infd, tosnap, flags,
3175 3175 &drr, &zcksum, top_zfs, cleanup_fd, action_handlep));
3176 3176 }
3177 3177 }
3178 3178
3179 3179 /*
3180 3180 * Restores a backup of tosnap from the file descriptor specified by infd.
3181 3181 * Return 0 on total success, -2 if some things couldn't be
3182 3182 * destroyed/renamed/promoted, -1 if some things couldn't be received.
3183 3183 * (-1 will override -2).
3184 3184 */
3185 3185 int
3186 3186 zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
3187 3187 int infd, avl_tree_t *stream_avl)
3188 3188 {
3189 3189 char *top_zfs = NULL;
3190 3190 int err;
3191 3191 int cleanup_fd;
3192 3192 uint64_t action_handle = 0;
3193 3193
3194 3194 cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
3195 3195 VERIFY(cleanup_fd >= 0);
3196 3196
3197 3197 err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL,
3198 3198 stream_avl, &top_zfs, cleanup_fd, &action_handle);
3199 3199
3200 3200 VERIFY(0 == close(cleanup_fd));
3201 3201
3202 3202 if (err == 0 && !flags->nomount && top_zfs) {
3203 3203 zfs_handle_t *zhp;
3204 3204 prop_changelist_t *clp;
3205 3205
3206 3206 zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
3207 3207 if (zhp != NULL) {
3208 3208 clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
3209 3209 CL_GATHER_MOUNT_ALWAYS, 0);
3210 3210 zfs_close(zhp);
3211 3211 if (clp != NULL) {
3212 3212 /* mount and share received datasets */
3213 3213 err = changelist_postfix(clp);
3214 3214 changelist_free(clp);
3215 3215 }
3216 3216 }
3217 3217 if (zhp == NULL || clp == NULL || err)
3218 3218 err = -1;
3219 3219 }
3220 3220 if (top_zfs)
3221 3221 free(top_zfs);
3222 3222
3223 3223 return (err);
3224 3224 }
↓ open down ↓ |
1607 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX