Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>
*** 21,30 ****
--- 21,31 ----
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
#include <assert.h>
#include <ctype.h>
#include <errno.h>
*** 791,800 ****
--- 792,802 ----
boolean_t seenfrom, seento, replicate, doall, fromorigin;
boolean_t verbose, dryrun, parsable, progress;
int outfd;
boolean_t err;
nvlist_t *fss;
+ nvlist_t *snapholds;
avl_tree_t *fsavl;
snapfilter_cb_t *filter_cb;
void *filter_cb_arg;
nvlist_t *debugnv;
char holdtag[ZFS_MAXNAMELEN];
*** 940,984 ****
nvlist_free(thisdbg);
return (0);
}
! static int
! hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
{
- zfs_handle_t *pzhp;
- int error = 0;
- char *thissnap;
-
assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
- if (sdd->dryrun)
- return (0);
-
/*
! * zfs_send() only opens a cleanup_fd for sends that need it,
* e.g. replication and doall.
*/
! if (sdd->cleanup_fd == -1)
! return (0);
!
! thissnap = strchr(zhp->zfs_name, '@') + 1;
! *(thissnap - 1) = '\0';
! pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
! *(thissnap - 1) = '@';
!
! /*
! * It's OK if the parent no longer exists. The send code will
! * handle that error.
! */
! if (pzhp) {
! error = zfs_hold(pzhp, thissnap, sdd->holdtag,
! B_FALSE, B_TRUE, sdd->cleanup_fd);
! zfs_close(pzhp);
! }
! return (error);
}
static void *
send_progress_thread(void *arg)
{
--- 942,964 ----
nvlist_free(thisdbg);
return (0);
}
! static void
! gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
{
assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
/*
! * zfs_send() only sets snapholds for sends that need them,
* e.g. replication and doall.
*/
! if (sdd->snapholds == NULL)
! return;
! fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
}
static void *
send_progress_thread(void *arg)
{
*** 1030,1061 ****
dump_snapshot(zfs_handle_t *zhp, void *arg)
{
send_dump_data_t *sdd = arg;
progress_arg_t pa = { 0 };
pthread_t tid;
-
char *thissnap;
int err;
boolean_t isfromsnap, istosnap, fromorigin;
boolean_t exclude = B_FALSE;
thissnap = strchr(zhp->zfs_name, '@') + 1;
isfromsnap = (sdd->fromsnap != NULL &&
strcmp(sdd->fromsnap, thissnap) == 0);
if (!sdd->seenfrom && isfromsnap) {
! err = hold_for_send(zhp, sdd);
! if (err == 0) {
sdd->seenfrom = B_TRUE;
(void) strcpy(sdd->prevsnap, thissnap);
! sdd->prevsnap_obj = zfs_prop_get_int(zhp,
! ZFS_PROP_OBJSETID);
! } else if (err == ENOENT) {
! err = 0;
! }
zfs_close(zhp);
! return (err);
}
if (sdd->seento || !sdd->seenfrom) {
zfs_close(zhp);
return (0);
--- 1010,1036 ----
dump_snapshot(zfs_handle_t *zhp, void *arg)
{
send_dump_data_t *sdd = arg;
progress_arg_t pa = { 0 };
pthread_t tid;
char *thissnap;
int err;
boolean_t isfromsnap, istosnap, fromorigin;
boolean_t exclude = B_FALSE;
+ err = 0;
thissnap = strchr(zhp->zfs_name, '@') + 1;
isfromsnap = (sdd->fromsnap != NULL &&
strcmp(sdd->fromsnap, thissnap) == 0);
if (!sdd->seenfrom && isfromsnap) {
! gather_holds(zhp, sdd);
sdd->seenfrom = B_TRUE;
(void) strcpy(sdd->prevsnap, thissnap);
! sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
zfs_close(zhp);
! return (0);
}
if (sdd->seento || !sdd->seenfrom) {
zfs_close(zhp);
return (0);
*** 1102,1119 ****
*/
zfs_close(zhp);
return (0);
}
! err = hold_for_send(zhp, sdd);
! if (err) {
! if (err == ENOENT)
! err = 0;
! zfs_close(zhp);
! return (err);
! }
!
fromorigin = sdd->prevsnap[0] == '\0' &&
(sdd->fromorigin || sdd->replicate);
if (sdd->verbose) {
uint64_t size;
--- 1077,1087 ----
*/
zfs_close(zhp);
return (0);
}
! gather_holds(zhp, sdd);
fromorigin = sdd->prevsnap[0] == '\0' &&
(sdd->fromorigin || sdd->replicate);
if (sdd->verbose) {
uint64_t size;
*** 1377,1387 ****
int err = 0;
nvlist_t *fss = NULL;
avl_tree_t *fsavl = NULL;
static uint64_t holdseq;
int spa_version;
! pthread_t tid;
int pipefd[2];
dedup_arg_t dda = { 0 };
int featureflags = 0;
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
--- 1345,1355 ----
int err = 0;
nvlist_t *fss = NULL;
avl_tree_t *fsavl = NULL;
static uint64_t holdseq;
int spa_version;
! pthread_t tid = 0;
int pipefd[2];
dedup_arg_t dda = { 0 };
int featureflags = 0;
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
*** 1450,1465 ****
NV_ENCODE_XDR, 0);
if (debugnvp)
*debugnvp = hdrnv;
else
nvlist_free(hdrnv);
! if (err) {
! fsavl_destroy(fsavl);
! nvlist_free(fss);
goto stderr_out;
}
- }
if (!flags->dryrun) {
/* write first begin record */
drr.drr_type = DRR_BEGIN;
drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
--- 1418,1430 ----
NV_ENCODE_XDR, 0);
if (debugnvp)
*debugnvp = hdrnv;
else
nvlist_free(hdrnv);
! if (err)
goto stderr_out;
}
if (!flags->dryrun) {
/* write first begin record */
drr.drr_type = DRR_BEGIN;
drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
*** 1478,1501 ****
err = cksum_and_write(packbuf, buflen, &zc,
outfd);
}
free(packbuf);
if (err == -1) {
- fsavl_destroy(fsavl);
- nvlist_free(fss);
err = errno;
goto stderr_out;
}
/* write end record */
bzero(&drr, sizeof (drr));
drr.drr_type = DRR_END;
drr.drr_u.drr_end.drr_checksum = zc;
err = write(outfd, &drr, sizeof (drr));
if (err == -1) {
- fsavl_destroy(fsavl);
- nvlist_free(fss);
err = errno;
goto stderr_out;
}
err = 0;
--- 1443,1462 ----
*** 1503,1513 ****
}
/* dump each stream */
sdd.fromsnap = fromsnap;
sdd.tosnap = tosnap;
! if (flags->dedup)
sdd.outfd = pipefd[0];
else
sdd.outfd = outfd;
sdd.replicate = flags->replicate;
sdd.doall = flags->doall;
--- 1464,1474 ----
}
/* dump each stream */
sdd.fromsnap = fromsnap;
sdd.tosnap = tosnap;
! if (tid != 0)
sdd.outfd = pipefd[0];
else
sdd.outfd = outfd;
sdd.replicate = flags->replicate;
sdd.doall = flags->doall;
*** 1540,1579 ****
sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
if (sdd.cleanup_fd < 0) {
err = errno;
goto stderr_out;
}
} else {
sdd.cleanup_fd = -1;
}
! if (flags->verbose) {
/*
* Do a verbose no-op dry run to get all the verbose output
! * before generating any data. Then do a non-verbose real
! * run to generate the streams.
*/
sdd.dryrun = B_TRUE;
err = dump_filesystems(zhp, &sdd);
! sdd.dryrun = flags->dryrun;
! sdd.verbose = B_FALSE;
if (flags->parsable) {
(void) fprintf(stderr, "size\t%llu\n",
(longlong_t)sdd.size);
} else {
char buf[16];
zfs_nicenum(sdd.size, buf, sizeof (buf));
(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
"total estimated size is %s\n"), buf);
}
}
err = dump_filesystems(zhp, &sdd);
fsavl_destroy(fsavl);
nvlist_free(fss);
! if (flags->dedup) {
! (void) close(pipefd[0]);
(void) pthread_join(tid, NULL);
}
if (sdd.cleanup_fd != -1) {
VERIFY(0 == close(sdd.cleanup_fd));
sdd.cleanup_fd = -1;
--- 1501,1575 ----
sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
if (sdd.cleanup_fd < 0) {
err = errno;
goto stderr_out;
}
+ sdd.snapholds = fnvlist_alloc();
} else {
sdd.cleanup_fd = -1;
+ sdd.snapholds = NULL;
}
! if (flags->verbose || sdd.snapholds != NULL) {
/*
* Do a verbose no-op dry run to get all the verbose output
! * or to gather snapshot hold's before generating any data,
! * then do a non-verbose real run to generate the streams.
*/
sdd.dryrun = B_TRUE;
err = dump_filesystems(zhp, &sdd);
!
! if (err != 0)
! goto stderr_out;
!
! if (flags->verbose) {
if (flags->parsable) {
(void) fprintf(stderr, "size\t%llu\n",
(longlong_t)sdd.size);
} else {
char buf[16];
zfs_nicenum(sdd.size, buf, sizeof (buf));
(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
"total estimated size is %s\n"), buf);
}
}
+
+ /* Ensure no snaps found is treated as an error. */
+ if (!sdd.seento) {
+ err = ENOENT;
+ goto err_out;
+ }
+
+ /* Skip the second run if dryrun was requested. */
+ if (flags->dryrun)
+ goto err_out;
+
+ if (sdd.snapholds != NULL) {
+ err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
+ if (err != 0)
+ goto stderr_out;
+
+ fnvlist_free(sdd.snapholds);
+ sdd.snapholds = NULL;
+ }
+
+ sdd.dryrun = B_FALSE;
+ sdd.verbose = B_FALSE;
+ }
+
err = dump_filesystems(zhp, &sdd);
fsavl_destroy(fsavl);
nvlist_free(fss);
! /* Ensure no snaps found is treated as an error. */
! if (err == 0 && !sdd.seento)
! err = ENOENT;
!
! if (tid != 0) {
! if (err != 0)
! (void) pthread_cancel(tid);
(void) pthread_join(tid, NULL);
+ (void) close(pipefd[0]);
}
if (sdd.cleanup_fd != -1) {
VERIFY(0 == close(sdd.cleanup_fd));
sdd.cleanup_fd = -1;
*** 1597,1609 ****
return (err || sdd.err);
stderr_out:
err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
err_out:
if (sdd.cleanup_fd != -1)
VERIFY(0 == close(sdd.cleanup_fd));
! if (flags->dedup) {
(void) pthread_cancel(tid);
(void) pthread_join(tid, NULL);
(void) close(pipefd[0]);
}
return (err);
--- 1593,1609 ----
return (err || sdd.err);
stderr_out:
err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
err_out:
+ fsavl_destroy(fsavl);
+ nvlist_free(fss);
+ fnvlist_free(sdd.snapholds);
+
if (sdd.cleanup_fd != -1)
VERIFY(0 == close(sdd.cleanup_fd));
! if (tid != 0) {
(void) pthread_cancel(tid);
(void) pthread_join(tid, NULL);
(void) close(pipefd[0]);
}
return (err);