Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>

*** 21,30 **** --- 21,31 ---- /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012 by Delphix. All rights reserved. * Copyright (c) 2012, Joyent, Inc. All rights reserved. + * Copyright (c) 2013 Steven Hartland. All rights reserved. */ #include <assert.h> #include <ctype.h> #include <errno.h>
*** 791,800 **** --- 792,802 ---- boolean_t seenfrom, seento, replicate, doall, fromorigin; boolean_t verbose, dryrun, parsable, progress; int outfd; boolean_t err; nvlist_t *fss; + nvlist_t *snapholds; avl_tree_t *fsavl; snapfilter_cb_t *filter_cb; void *filter_cb_arg; nvlist_t *debugnv; char holdtag[ZFS_MAXNAMELEN];
*** 940,984 **** nvlist_free(thisdbg); return (0); } ! static int ! hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd) { - zfs_handle_t *pzhp; - int error = 0; - char *thissnap; - assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); - if (sdd->dryrun) - return (0); - /* ! * zfs_send() only opens a cleanup_fd for sends that need it, * e.g. replication and doall. */ ! if (sdd->cleanup_fd == -1) ! return (0); ! ! thissnap = strchr(zhp->zfs_name, '@') + 1; ! *(thissnap - 1) = '\0'; ! pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET); ! *(thissnap - 1) = '@'; ! ! /* ! * It's OK if the parent no longer exists. The send code will ! * handle that error. ! */ ! if (pzhp) { ! error = zfs_hold(pzhp, thissnap, sdd->holdtag, ! B_FALSE, B_TRUE, sdd->cleanup_fd); ! zfs_close(pzhp); ! } ! return (error); } static void * send_progress_thread(void *arg) { --- 942,964 ---- nvlist_free(thisdbg); return (0); } ! static void ! gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd) { assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT); /* ! * zfs_send() only sets snapholds for sends that need them, * e.g. replication and doall. */ ! if (sdd->snapholds == NULL) ! return; ! fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag); } static void * send_progress_thread(void *arg) {
*** 1030,1061 **** dump_snapshot(zfs_handle_t *zhp, void *arg) { send_dump_data_t *sdd = arg; progress_arg_t pa = { 0 }; pthread_t tid; - char *thissnap; int err; boolean_t isfromsnap, istosnap, fromorigin; boolean_t exclude = B_FALSE; thissnap = strchr(zhp->zfs_name, '@') + 1; isfromsnap = (sdd->fromsnap != NULL && strcmp(sdd->fromsnap, thissnap) == 0); if (!sdd->seenfrom && isfromsnap) { ! err = hold_for_send(zhp, sdd); ! if (err == 0) { sdd->seenfrom = B_TRUE; (void) strcpy(sdd->prevsnap, thissnap); ! sdd->prevsnap_obj = zfs_prop_get_int(zhp, ! ZFS_PROP_OBJSETID); ! } else if (err == ENOENT) { ! err = 0; ! } zfs_close(zhp); ! return (err); } if (sdd->seento || !sdd->seenfrom) { zfs_close(zhp); return (0); --- 1010,1036 ---- dump_snapshot(zfs_handle_t *zhp, void *arg) { send_dump_data_t *sdd = arg; progress_arg_t pa = { 0 }; pthread_t tid; char *thissnap; int err; boolean_t isfromsnap, istosnap, fromorigin; boolean_t exclude = B_FALSE; + err = 0; thissnap = strchr(zhp->zfs_name, '@') + 1; isfromsnap = (sdd->fromsnap != NULL && strcmp(sdd->fromsnap, thissnap) == 0); if (!sdd->seenfrom && isfromsnap) { ! gather_holds(zhp, sdd); sdd->seenfrom = B_TRUE; (void) strcpy(sdd->prevsnap, thissnap); ! sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID); zfs_close(zhp); ! return (0); } if (sdd->seento || !sdd->seenfrom) { zfs_close(zhp); return (0);
*** 1102,1119 **** */ zfs_close(zhp); return (0); } ! err = hold_for_send(zhp, sdd); ! if (err) { ! if (err == ENOENT) ! err = 0; ! zfs_close(zhp); ! return (err); ! } ! fromorigin = sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate); if (sdd->verbose) { uint64_t size; --- 1077,1087 ---- */ zfs_close(zhp); return (0); } ! gather_holds(zhp, sdd); fromorigin = sdd->prevsnap[0] == '\0' && (sdd->fromorigin || sdd->replicate); if (sdd->verbose) { uint64_t size;
*** 1377,1387 **** int err = 0; nvlist_t *fss = NULL; avl_tree_t *fsavl = NULL; static uint64_t holdseq; int spa_version; ! pthread_t tid; int pipefd[2]; dedup_arg_t dda = { 0 }; int featureflags = 0; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN, --- 1345,1355 ---- int err = 0; nvlist_t *fss = NULL; avl_tree_t *fsavl = NULL; static uint64_t holdseq; int spa_version; ! pthread_t tid = 0; int pipefd[2]; dedup_arg_t dda = { 0 }; int featureflags = 0; (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
*** 1450,1465 **** NV_ENCODE_XDR, 0); if (debugnvp) *debugnvp = hdrnv; else nvlist_free(hdrnv); ! if (err) { ! fsavl_destroy(fsavl); ! nvlist_free(fss); goto stderr_out; } - } if (!flags->dryrun) { /* write first begin record */ drr.drr_type = DRR_BEGIN; drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC; --- 1418,1430 ---- NV_ENCODE_XDR, 0); if (debugnvp) *debugnvp = hdrnv; else nvlist_free(hdrnv); ! if (err) goto stderr_out; } if (!flags->dryrun) { /* write first begin record */ drr.drr_type = DRR_BEGIN; drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
*** 1478,1501 **** err = cksum_and_write(packbuf, buflen, &zc, outfd); } free(packbuf); if (err == -1) { - fsavl_destroy(fsavl); - nvlist_free(fss); err = errno; goto stderr_out; } /* write end record */ bzero(&drr, sizeof (drr)); drr.drr_type = DRR_END; drr.drr_u.drr_end.drr_checksum = zc; err = write(outfd, &drr, sizeof (drr)); if (err == -1) { - fsavl_destroy(fsavl); - nvlist_free(fss); err = errno; goto stderr_out; } err = 0; --- 1443,1462 ----
*** 1503,1513 **** } /* dump each stream */ sdd.fromsnap = fromsnap; sdd.tosnap = tosnap; ! if (flags->dedup) sdd.outfd = pipefd[0]; else sdd.outfd = outfd; sdd.replicate = flags->replicate; sdd.doall = flags->doall; --- 1464,1474 ---- } /* dump each stream */ sdd.fromsnap = fromsnap; sdd.tosnap = tosnap; ! if (tid != 0) sdd.outfd = pipefd[0]; else sdd.outfd = outfd; sdd.replicate = flags->replicate; sdd.doall = flags->doall;
*** 1540,1579 **** sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL); if (sdd.cleanup_fd < 0) { err = errno; goto stderr_out; } } else { sdd.cleanup_fd = -1; } ! if (flags->verbose) { /* * Do a verbose no-op dry run to get all the verbose output ! * before generating any data. Then do a non-verbose real ! * run to generate the streams. */ sdd.dryrun = B_TRUE; err = dump_filesystems(zhp, &sdd); ! sdd.dryrun = flags->dryrun; ! sdd.verbose = B_FALSE; if (flags->parsable) { (void) fprintf(stderr, "size\t%llu\n", (longlong_t)sdd.size); } else { char buf[16]; zfs_nicenum(sdd.size, buf, sizeof (buf)); (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "total estimated size is %s\n"), buf); } } err = dump_filesystems(zhp, &sdd); fsavl_destroy(fsavl); nvlist_free(fss); ! if (flags->dedup) { ! (void) close(pipefd[0]); (void) pthread_join(tid, NULL); } if (sdd.cleanup_fd != -1) { VERIFY(0 == close(sdd.cleanup_fd)); sdd.cleanup_fd = -1; --- 1501,1575 ---- sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL); if (sdd.cleanup_fd < 0) { err = errno; goto stderr_out; } + sdd.snapholds = fnvlist_alloc(); } else { sdd.cleanup_fd = -1; + sdd.snapholds = NULL; } ! if (flags->verbose || sdd.snapholds != NULL) { /* * Do a verbose no-op dry run to get all the verbose output ! * or to gather snapshot hold's before generating any data, ! * then do a non-verbose real run to generate the streams. */ sdd.dryrun = B_TRUE; err = dump_filesystems(zhp, &sdd); ! ! if (err != 0) ! goto stderr_out; ! ! if (flags->verbose) { if (flags->parsable) { (void) fprintf(stderr, "size\t%llu\n", (longlong_t)sdd.size); } else { char buf[16]; zfs_nicenum(sdd.size, buf, sizeof (buf)); (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "total estimated size is %s\n"), buf); } } + + /* Ensure no snaps found is treated as an error. */ + if (!sdd.seento) { + err = ENOENT; + goto err_out; + } + + /* Skip the second run if dryrun was requested. */ + if (flags->dryrun) + goto err_out; + + if (sdd.snapholds != NULL) { + err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds); + if (err != 0) + goto stderr_out; + + fnvlist_free(sdd.snapholds); + sdd.snapholds = NULL; + } + + sdd.dryrun = B_FALSE; + sdd.verbose = B_FALSE; + } + err = dump_filesystems(zhp, &sdd); fsavl_destroy(fsavl); nvlist_free(fss); ! /* Ensure no snaps found is treated as an error. */ ! if (err == 0 && !sdd.seento) ! err = ENOENT; ! ! if (tid != 0) { ! if (err != 0) ! (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); + (void) close(pipefd[0]); } if (sdd.cleanup_fd != -1) { VERIFY(0 == close(sdd.cleanup_fd)); sdd.cleanup_fd = -1;
*** 1597,1609 **** return (err || sdd.err); stderr_out: err = zfs_standard_error(zhp->zfs_hdl, err, errbuf); err_out: if (sdd.cleanup_fd != -1) VERIFY(0 == close(sdd.cleanup_fd)); ! if (flags->dedup) { (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); (void) close(pipefd[0]); } return (err); --- 1593,1609 ---- return (err || sdd.err); stderr_out: err = zfs_standard_error(zhp->zfs_hdl, err, errbuf); err_out: + fsavl_destroy(fsavl); + nvlist_free(fss); + fnvlist_free(sdd.snapholds); + if (sdd.cleanup_fd != -1) VERIFY(0 == close(sdd.cleanup_fd)); ! if (tid != 0) { (void) pthread_cancel(tid); (void) pthread_join(tid, NULL); (void) close(pipefd[0]); } return (err);