Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>
        
*** 21,30 ****
--- 21,31 ----
  
  /*
   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
   * Copyright (c) 2012 by Delphix. All rights reserved.
   * Copyright (c) 2012, Joyent, Inc. All rights reserved.
+  * Copyright (c) 2013 Steven Hartland. All rights reserved.
   */
  
  #include <assert.h>
  #include <ctype.h>
  #include <errno.h>
*** 791,800 ****
--- 792,802 ----
          boolean_t seenfrom, seento, replicate, doall, fromorigin;
          boolean_t verbose, dryrun, parsable, progress;
          int outfd;
          boolean_t err;
          nvlist_t *fss;
+         nvlist_t *snapholds;
          avl_tree_t *fsavl;
          snapfilter_cb_t *filter_cb;
          void *filter_cb_arg;
          nvlist_t *debugnv;
          char holdtag[ZFS_MAXNAMELEN];
*** 940,984 ****
          nvlist_free(thisdbg);
  
          return (0);
  }
  
! static int
! hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
  {
-         zfs_handle_t *pzhp;
-         int error = 0;
-         char *thissnap;
- 
          assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
  
-         if (sdd->dryrun)
-                 return (0);
- 
          /*
!          * zfs_send() only opens a cleanup_fd for sends that need it,
           * e.g. replication and doall.
           */
!         if (sdd->cleanup_fd == -1)
!                 return (0);
! 
!         thissnap = strchr(zhp->zfs_name, '@') + 1;
!         *(thissnap - 1) = '\0';
!         pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
!         *(thissnap - 1) = '@';
! 
!         /*
!          * It's OK if the parent no longer exists.  The send code will
!          * handle that error.
!          */
!         if (pzhp) {
!                 error = zfs_hold(pzhp, thissnap, sdd->holdtag,
!                     B_FALSE, B_TRUE, sdd->cleanup_fd);
!                 zfs_close(pzhp);
!         }
  
!         return (error);
  }
  
  static void *
  send_progress_thread(void *arg)
  {
--- 942,964 ----
          nvlist_free(thisdbg);
  
          return (0);
  }
  
! static void
! gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
  {
          assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
  
          /*
!          * zfs_send() only sets snapholds for sends that need them,
           * e.g. replication and doall.
           */
!         if (sdd->snapholds == NULL)
!                 return;
  
!         fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
  }
  
  static void *
  send_progress_thread(void *arg)
  {
*** 1030,1061 ****
  dump_snapshot(zfs_handle_t *zhp, void *arg)
  {
          send_dump_data_t *sdd = arg;
          progress_arg_t pa = { 0 };
          pthread_t tid;
- 
          char *thissnap;
          int err;
          boolean_t isfromsnap, istosnap, fromorigin;
          boolean_t exclude = B_FALSE;
  
          thissnap = strchr(zhp->zfs_name, '@') + 1;
          isfromsnap = (sdd->fromsnap != NULL &&
              strcmp(sdd->fromsnap, thissnap) == 0);
  
          if (!sdd->seenfrom && isfromsnap) {
!                 err = hold_for_send(zhp, sdd);
!                 if (err == 0) {
                          sdd->seenfrom = B_TRUE;
                          (void) strcpy(sdd->prevsnap, thissnap);
!                         sdd->prevsnap_obj = zfs_prop_get_int(zhp,
!                             ZFS_PROP_OBJSETID);
!                 } else if (err == ENOENT) {
!                         err = 0;
!                 }
                  zfs_close(zhp);
!                 return (err);
          }
  
          if (sdd->seento || !sdd->seenfrom) {
                  zfs_close(zhp);
                  return (0);
--- 1010,1036 ----
  dump_snapshot(zfs_handle_t *zhp, void *arg)
  {
          send_dump_data_t *sdd = arg;
          progress_arg_t pa = { 0 };
          pthread_t tid;
          char *thissnap;
          int err;
          boolean_t isfromsnap, istosnap, fromorigin;
          boolean_t exclude = B_FALSE;
  
+         err = 0;
          thissnap = strchr(zhp->zfs_name, '@') + 1;
          isfromsnap = (sdd->fromsnap != NULL &&
              strcmp(sdd->fromsnap, thissnap) == 0);
  
          if (!sdd->seenfrom && isfromsnap) {
!                 gather_holds(zhp, sdd);
                  sdd->seenfrom = B_TRUE;
                  (void) strcpy(sdd->prevsnap, thissnap);
!                 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
                  zfs_close(zhp);
!                 return (0);
          }
  
          if (sdd->seento || !sdd->seenfrom) {
                  zfs_close(zhp);
                  return (0);
*** 1102,1119 ****
                   */
                  zfs_close(zhp);
                  return (0);
          }
  
!         err = hold_for_send(zhp, sdd);
!         if (err) {
!                 if (err == ENOENT)
!                         err = 0;
!                 zfs_close(zhp);
!                 return (err);
!         }
! 
          fromorigin = sdd->prevsnap[0] == '\0' &&
              (sdd->fromorigin || sdd->replicate);
  
          if (sdd->verbose) {
                  uint64_t size;
--- 1077,1087 ----
                   */
                  zfs_close(zhp);
                  return (0);
          }
  
!         gather_holds(zhp, sdd);
          fromorigin = sdd->prevsnap[0] == '\0' &&
              (sdd->fromorigin || sdd->replicate);
  
          if (sdd->verbose) {
                  uint64_t size;
*** 1377,1387 ****
          int err = 0;
          nvlist_t *fss = NULL;
          avl_tree_t *fsavl = NULL;
          static uint64_t holdseq;
          int spa_version;
!         pthread_t tid;
          int pipefd[2];
          dedup_arg_t dda = { 0 };
          int featureflags = 0;
  
          (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
--- 1345,1355 ----
          int err = 0;
          nvlist_t *fss = NULL;
          avl_tree_t *fsavl = NULL;
          static uint64_t holdseq;
          int spa_version;
!         pthread_t tid = 0;
          int pipefd[2];
          dedup_arg_t dda = { 0 };
          int featureflags = 0;
  
          (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
*** 1450,1465 ****
                              NV_ENCODE_XDR, 0);
                          if (debugnvp)
                                  *debugnvp = hdrnv;
                          else
                                  nvlist_free(hdrnv);
!                         if (err) {
!                                 fsavl_destroy(fsavl);
!                                 nvlist_free(fss);
                                  goto stderr_out;
                          }
-                 }
  
                  if (!flags->dryrun) {
                          /* write first begin record */
                          drr.drr_type = DRR_BEGIN;
                          drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
--- 1418,1430 ----
                              NV_ENCODE_XDR, 0);
                          if (debugnvp)
                                  *debugnvp = hdrnv;
                          else
                                  nvlist_free(hdrnv);
!                         if (err)
                                  goto stderr_out;
                  }
  
                  if (!flags->dryrun) {
                          /* write first begin record */
                          drr.drr_type = DRR_BEGIN;
                          drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
*** 1478,1501 ****
                                  err = cksum_and_write(packbuf, buflen, &zc,
                                      outfd);
                          }
                          free(packbuf);
                          if (err == -1) {
-                                 fsavl_destroy(fsavl);
-                                 nvlist_free(fss);
                                  err = errno;
                                  goto stderr_out;
                          }
  
                          /* write end record */
                          bzero(&drr, sizeof (drr));
                          drr.drr_type = DRR_END;
                          drr.drr_u.drr_end.drr_checksum = zc;
                          err = write(outfd, &drr, sizeof (drr));
                          if (err == -1) {
-                                 fsavl_destroy(fsavl);
-                                 nvlist_free(fss);
                                  err = errno;
                                  goto stderr_out;
                          }
  
                          err = 0;
--- 1443,1462 ----
*** 1503,1513 ****
          }
  
          /* dump each stream */
          sdd.fromsnap = fromsnap;
          sdd.tosnap = tosnap;
!         if (flags->dedup)
                  sdd.outfd = pipefd[0];
          else
                  sdd.outfd = outfd;
          sdd.replicate = flags->replicate;
          sdd.doall = flags->doall;
--- 1464,1474 ----
          }
  
          /* dump each stream */
          sdd.fromsnap = fromsnap;
          sdd.tosnap = tosnap;
!         if (tid != 0)
                  sdd.outfd = pipefd[0];
          else
                  sdd.outfd = outfd;
          sdd.replicate = flags->replicate;
          sdd.doall = flags->doall;
*** 1540,1579 ****
                  sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
                  if (sdd.cleanup_fd < 0) {
                          err = errno;
                          goto stderr_out;
                  }
          } else {
                  sdd.cleanup_fd = -1;
          }
!         if (flags->verbose) {
                  /*
                   * Do a verbose no-op dry run to get all the verbose output
!                  * before generating any data.  Then do a non-verbose real
!                  * run to generate the streams.
                   */
                  sdd.dryrun = B_TRUE;
                  err = dump_filesystems(zhp, &sdd);
!                 sdd.dryrun = flags->dryrun;
!                 sdd.verbose = B_FALSE;
                  if (flags->parsable) {
                          (void) fprintf(stderr, "size\t%llu\n",
                              (longlong_t)sdd.size);
                  } else {
                          char buf[16];
                          zfs_nicenum(sdd.size, buf, sizeof (buf));
                          (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
                              "total estimated size is %s\n"), buf);
                  }
          }
          err = dump_filesystems(zhp, &sdd);
          fsavl_destroy(fsavl);
          nvlist_free(fss);
  
!         if (flags->dedup) {
!                 (void) close(pipefd[0]);
                  (void) pthread_join(tid, NULL);
          }
  
          if (sdd.cleanup_fd != -1) {
                  VERIFY(0 == close(sdd.cleanup_fd));
                  sdd.cleanup_fd = -1;
--- 1501,1575 ----
                  sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
                  if (sdd.cleanup_fd < 0) {
                          err = errno;
                          goto stderr_out;
                  }
+                 sdd.snapholds = fnvlist_alloc();
          } else {
                  sdd.cleanup_fd = -1;
+                 sdd.snapholds = NULL;
          }
!         if (flags->verbose || sdd.snapholds != NULL) {
                  /*
                   * Do a verbose no-op dry run to get all the verbose output
!                  * or to gather snapshot hold's before generating any data,
!                  * then do a non-verbose real run to generate the streams.
                   */
                  sdd.dryrun = B_TRUE;
                  err = dump_filesystems(zhp, &sdd);
! 
!                 if (err != 0)
!                         goto stderr_out;
! 
!                 if (flags->verbose) {
                          if (flags->parsable) {
                                  (void) fprintf(stderr, "size\t%llu\n",
                                      (longlong_t)sdd.size);
                          } else {
                                  char buf[16];
                                  zfs_nicenum(sdd.size, buf, sizeof (buf));
                                  (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
                                      "total estimated size is %s\n"), buf);
                          }
                  }
+ 
+                 /* Ensure no snaps found is treated as an error. */
+                 if (!sdd.seento) {
+                         err = ENOENT;
+                         goto err_out;
+                 }
+ 
+                 /* Skip the second run if dryrun was requested. */
+                 if (flags->dryrun)
+                         goto err_out;
+ 
+                 if (sdd.snapholds != NULL) {
+                         err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
+                         if (err != 0)
+                                 goto stderr_out;
+ 
+                         fnvlist_free(sdd.snapholds);
+                         sdd.snapholds = NULL;
+                 }
+ 
+                 sdd.dryrun = B_FALSE;
+                 sdd.verbose = B_FALSE;
+         }
+ 
          err = dump_filesystems(zhp, &sdd);
          fsavl_destroy(fsavl);
          nvlist_free(fss);
  
!         /* Ensure no snaps found is treated as an error. */
!         if (err == 0 && !sdd.seento)
!                 err = ENOENT;
! 
!         if (tid != 0) {
!                 if (err != 0)
!                         (void) pthread_cancel(tid);
                  (void) pthread_join(tid, NULL);
+                 (void) close(pipefd[0]);
          }
  
          if (sdd.cleanup_fd != -1) {
                  VERIFY(0 == close(sdd.cleanup_fd));
                  sdd.cleanup_fd = -1;
*** 1597,1609 ****
          return (err || sdd.err);
  
  stderr_out:
          err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
  err_out:
          if (sdd.cleanup_fd != -1)
                  VERIFY(0 == close(sdd.cleanup_fd));
!         if (flags->dedup) {
                  (void) pthread_cancel(tid);
                  (void) pthread_join(tid, NULL);
                  (void) close(pipefd[0]);
          }
          return (err);
--- 1593,1609 ----
          return (err || sdd.err);
  
  stderr_out:
          err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
  err_out:
+         fsavl_destroy(fsavl);
+         nvlist_free(fss);
+         fnvlist_free(sdd.snapholds);
+ 
          if (sdd.cleanup_fd != -1)
                  VERIFY(0 == close(sdd.cleanup_fd));
!         if (tid != 0) {
                  (void) pthread_cancel(tid);
                  (void) pthread_join(tid, NULL);
                  (void) close(pipefd[0]);
          }
          return (err);