Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>
@@ -791,10 +791,11 @@
boolean_t seenfrom, seento, replicate, doall, fromorigin;
boolean_t verbose, dryrun, parsable, progress;
int outfd;
boolean_t err;
nvlist_t *fss;
+ nvlist_t *snapholds;
avl_tree_t *fsavl;
snapfilter_cb_t *filter_cb;
void *filter_cb_arg;
nvlist_t *debugnv;
char holdtag[ZFS_MAXNAMELEN];
@@ -940,45 +941,23 @@
nvlist_free(thisdbg);
return (0);
}
-static int
-hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
+static void
+gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
{
- zfs_handle_t *pzhp;
- int error = 0;
- char *thissnap;
-
assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
- if (sdd->dryrun)
- return (0);
-
/*
- * zfs_send() only opens a cleanup_fd for sends that need it,
+ * zfs_send() only sets snapholds for sends that need them,
* e.g. replication and doall.
*/
- if (sdd->cleanup_fd == -1)
- return (0);
-
- thissnap = strchr(zhp->zfs_name, '@') + 1;
- *(thissnap - 1) = '\0';
- pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
- *(thissnap - 1) = '@';
-
- /*
- * It's OK if the parent no longer exists. The send code will
- * handle that error.
- */
- if (pzhp) {
- error = zfs_hold(pzhp, thissnap, sdd->holdtag,
- B_FALSE, B_TRUE, sdd->cleanup_fd);
- zfs_close(pzhp);
- }
+ if (sdd->snapholds == NULL)
+ return;
- return (error);
+ fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
}
static void *
send_progress_thread(void *arg)
{
@@ -1030,32 +1009,27 @@
dump_snapshot(zfs_handle_t *zhp, void *arg)
{
send_dump_data_t *sdd = arg;
progress_arg_t pa = { 0 };
pthread_t tid;
-
char *thissnap;
int err;
boolean_t isfromsnap, istosnap, fromorigin;
boolean_t exclude = B_FALSE;
+ err = 0;
thissnap = strchr(zhp->zfs_name, '@') + 1;
isfromsnap = (sdd->fromsnap != NULL &&
strcmp(sdd->fromsnap, thissnap) == 0);
if (!sdd->seenfrom && isfromsnap) {
- err = hold_for_send(zhp, sdd);
- if (err == 0) {
+ gather_holds(zhp, sdd);
sdd->seenfrom = B_TRUE;
(void) strcpy(sdd->prevsnap, thissnap);
- sdd->prevsnap_obj = zfs_prop_get_int(zhp,
- ZFS_PROP_OBJSETID);
- } else if (err == ENOENT) {
- err = 0;
- }
+ sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
zfs_close(zhp);
- return (err);
+ return (0);
}
if (sdd->seento || !sdd->seenfrom) {
zfs_close(zhp);
return (0);
@@ -1102,18 +1076,11 @@
*/
zfs_close(zhp);
return (0);
}
- err = hold_for_send(zhp, sdd);
- if (err) {
- if (err == ENOENT)
- err = 0;
- zfs_close(zhp);
- return (err);
- }
-
+ gather_holds(zhp, sdd);
fromorigin = sdd->prevsnap[0] == '\0' &&
(sdd->fromorigin || sdd->replicate);
if (sdd->verbose) {
uint64_t size;
@@ -1377,11 +1344,11 @@
int err = 0;
nvlist_t *fss = NULL;
avl_tree_t *fsavl = NULL;
static uint64_t holdseq;
int spa_version;
- pthread_t tid;
+ pthread_t tid = 0;
int pipefd[2];
dedup_arg_t dda = { 0 };
int featureflags = 0;
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
@@ -1450,16 +1417,13 @@
NV_ENCODE_XDR, 0);
if (debugnvp)
*debugnvp = hdrnv;
else
nvlist_free(hdrnv);
- if (err) {
- fsavl_destroy(fsavl);
- nvlist_free(fss);
+ if (err)
goto stderr_out;
}
- }
if (!flags->dryrun) {
/* write first begin record */
drr.drr_type = DRR_BEGIN;
drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
@@ -1478,24 +1442,20 @@
err = cksum_and_write(packbuf, buflen, &zc,
outfd);
}
free(packbuf);
if (err == -1) {
- fsavl_destroy(fsavl);
- nvlist_free(fss);
err = errno;
goto stderr_out;
}
/* write end record */
bzero(&drr, sizeof (drr));
drr.drr_type = DRR_END;
drr.drr_u.drr_end.drr_checksum = zc;
err = write(outfd, &drr, sizeof (drr));
if (err == -1) {
- fsavl_destroy(fsavl);
- nvlist_free(fss);
err = errno;
goto stderr_out;
}
err = 0;
@@ -1503,11 +1463,11 @@
}
/* dump each stream */
sdd.fromsnap = fromsnap;
sdd.tosnap = tosnap;
- if (flags->dedup)
+ if (tid != 0)
sdd.outfd = pipefd[0];
else
sdd.outfd = outfd;
sdd.replicate = flags->replicate;
sdd.doall = flags->doall;
@@ -1540,40 +1500,74 @@
sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
if (sdd.cleanup_fd < 0) {
err = errno;
goto stderr_out;
}
+ sdd.snapholds = fnvlist_alloc();
} else {
sdd.cleanup_fd = -1;
+ sdd.snapholds = NULL;
}
- if (flags->verbose) {
+ if (flags->verbose || sdd.snapholds != NULL) {
/*
* Do a verbose no-op dry run to get all the verbose output
- * before generating any data. Then do a non-verbose real
- * run to generate the streams.
+ * or to gather snapshot hold's before generating any data,
+ * then do a non-verbose real run to generate the streams.
*/
sdd.dryrun = B_TRUE;
err = dump_filesystems(zhp, &sdd);
- sdd.dryrun = flags->dryrun;
- sdd.verbose = B_FALSE;
+
+ if (err != 0)
+ goto stderr_out;
+
+ if (flags->verbose) {
if (flags->parsable) {
(void) fprintf(stderr, "size\t%llu\n",
(longlong_t)sdd.size);
} else {
char buf[16];
zfs_nicenum(sdd.size, buf, sizeof (buf));
(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
"total estimated size is %s\n"), buf);
}
}
+
+ /* Ensure no snaps found is treated as an error. */
+ if (!sdd.seento) {
+ err = ENOENT;
+ goto err_out;
+ }
+
+ /* Skip the second run if dryrun was requested. */
+ if (flags->dryrun)
+ goto err_out;
+
+ if (sdd.snapholds != NULL) {
+ err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
+ if (err != 0)
+ goto stderr_out;
+ fnvlist_free(sdd.snapholds);
+ sdd.snapholds = NULL;
+ }
+
+ sdd.dryrun = B_FALSE;
+ sdd.verbose = B_FALSE;
+ }
+
err = dump_filesystems(zhp, &sdd);
fsavl_destroy(fsavl);
nvlist_free(fss);
- if (flags->dedup) {
- (void) close(pipefd[0]);
+ /* Ensure no snaps found is treated as an error. */
+ if (err == 0 && !sdd.seento)
+ err = ENOENT;
+
+ if (tid != 0) {
+ if (err != 0)
+ (void) pthread_cancel(tid);
(void) pthread_join(tid, NULL);
+ (void) close(pipefd[0]);
}
if (sdd.cleanup_fd != -1) {
VERIFY(0 == close(sdd.cleanup_fd));
sdd.cleanup_fd = -1;
@@ -1597,13 +1591,17 @@
return (err || sdd.err);
stderr_out:
err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
err_out:
+ fsavl_destroy(fsavl);
+ nvlist_free(fss);
+ fnvlist_free(sdd.snapholds);
+
if (sdd.cleanup_fd != -1)
VERIFY(0 == close(sdd.cleanup_fd));
- if (flags->dedup) {
+ if (tid != 0) {
(void) pthread_cancel(tid);
(void) pthread_join(tid, NULL);
(void) close(pipefd[0]);
}
return (err);