Print this page
Optimize creation and removal of temporary "user holds" placed on
snapshots by a zfs send, by ensuring all the required holds and
releases are done in a single dsl_sync_task.
Creation now collates the required holds during a dry run and
then uses a single lzc_hold call via zfs_hold_apply instead of
processing each snapshot in turn.
Defered (on exit) cleanup by the kernel is also now done in
dsl_sync_task by reusing dsl_dataset_user_release.
On a test with 11 volumes in a tree each with 8 snapshots on a
single HDD zpool this reduces the time required to perform a full
send from 20 seconds to under 0.8 seconds.
For reference eliminating the hold entirely reduces this 0.15
seconds.
While I'm here:-
* Remove some unused structures
* Fix nvlist_t leak in zfs_release_one

Split Close
Expand all
Collapse all
          --- old/usr/src/lib/libzfs/common/libzfs_sendrecv.c
          +++ new/usr/src/lib/libzfs/common/libzfs_sendrecv.c
↓ open down ↓ 785 lines elided ↑ open up ↑
 786  786          /* these are all just the short snapname (the part after the @) */
 787  787          const char *fromsnap;
 788  788          const char *tosnap;
 789  789          char prevsnap[ZFS_MAXNAMELEN];
 790  790          uint64_t prevsnap_obj;
 791  791          boolean_t seenfrom, seento, replicate, doall, fromorigin;
 792  792          boolean_t verbose, dryrun, parsable, progress;
 793  793          int outfd;
 794  794          boolean_t err;
 795  795          nvlist_t *fss;
      796 +        nvlist_t *snapholds;
 796  797          avl_tree_t *fsavl;
 797  798          snapfilter_cb_t *filter_cb;
 798  799          void *filter_cb_arg;
 799  800          nvlist_t *debugnv;
 800  801          char holdtag[ZFS_MAXNAMELEN];
 801  802          int cleanup_fd;
 802  803          uint64_t size;
 803  804  } send_dump_data_t;
 804  805  
 805  806  static int
↓ open down ↓ 138 lines elided ↑ open up ↑
 944  945  
 945  946  static int
 946  947  hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
 947  948  {
 948  949          zfs_handle_t *pzhp;
 949  950          int error = 0;
 950  951          char *thissnap;
 951  952  
 952  953          assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 953  954  
 954      -        if (sdd->dryrun)
 955      -                return (0);
 956      -
 957  955          /*
 958      -         * zfs_send() only opens a cleanup_fd for sends that need it,
 959      -         * e.g. replication and doall.
      956 +         * We process if snapholds is not NULL even if on a dry run as
      957 +         * this is used to pre-calculate the required holds so they can
      958 +         * be processed in one kernel request
 960  959           */
 961      -        if (sdd->cleanup_fd == -1)
      960 +        if (sdd->snapholds == NULL)
 962  961                  return (0);
 963  962  
 964  963          thissnap = strchr(zhp->zfs_name, '@') + 1;
 965  964          *(thissnap - 1) = '\0';
 966  965          pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
 967  966          *(thissnap - 1) = '@';
 968  967  
 969  968          /*
 970  969           * It's OK if the parent no longer exists.  The send code will
 971  970           * handle that error.
 972  971           */
 973  972          if (pzhp) {
 974      -                error = zfs_hold(pzhp, thissnap, sdd->holdtag,
 975      -                    B_FALSE, B_TRUE, sdd->cleanup_fd);
      973 +                error = zfs_hold_add(pzhp, thissnap, sdd->holdtag, B_TRUE,
      974 +                    sdd->snapholds);
 976  975                  zfs_close(pzhp);
 977  976          }
 978  977  
 979  978          return (error);
 980  979  }
 981  980  
 982  981  static void *
 983  982  send_progress_thread(void *arg)
 984  983  {
 985  984          progress_arg_t *pa = arg;
↓ open down ↓ 549 lines elided ↑ open up ↑
1535 1534              spa_version >= SPA_VERSION_USERREFS &&
1536 1535              (flags->doall || flags->replicate)) {
1537 1536                  ++holdseq;
1538 1537                  (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1539 1538                      ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1540 1539                  sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1541 1540                  if (sdd.cleanup_fd < 0) {
1542 1541                          err = errno;
1543 1542                          goto stderr_out;
1544 1543                  }
     1544 +                sdd.snapholds = fnvlist_alloc();
1545 1545          } else {
1546 1546                  sdd.cleanup_fd = -1;
     1547 +                sdd.snapholds = NULL;
1547 1548          }
1548 1549          if (flags->verbose) {
1549 1550                  /*
1550 1551                   * Do a verbose no-op dry run to get all the verbose output
1551 1552                   * before generating any data.  Then do a non-verbose real
1552 1553                   * run to generate the streams.
1553 1554                   */
1554 1555                  sdd.dryrun = B_TRUE;
1555 1556                  err = dump_filesystems(zhp, &sdd);
1556 1557                  sdd.dryrun = flags->dryrun;
↓ open down ↓ 1 lines elided ↑ open up ↑
1558 1559                  if (flags->parsable) {
1559 1560                          (void) fprintf(stderr, "size\t%llu\n",
1560 1561                              (longlong_t)sdd.size);
1561 1562                  } else {
1562 1563                          char buf[16];
1563 1564                          zfs_nicenum(sdd.size, buf, sizeof (buf));
1564 1565                          (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1565 1566                              "total estimated size is %s\n"), buf);
1566 1567                  }
1567 1568          }
     1569 +
     1570 +        if (sdd.snapholds != NULL) {
     1571 +                /* Holds are required */
     1572 +                if (!flags->verbose) {
     1573 +                        /*
     1574 +                         * A verbose dry run wasn't done so do a non-verbose
     1575 +                         * dry run to collate snapshot hold's.
     1576 +                         */
     1577 +                        sdd.dryrun = B_TRUE;
     1578 +                        err = dump_filesystems(zhp, &sdd);
     1579 +                        sdd.dryrun = flags->dryrun;
     1580 +                }
     1581 +
     1582 +                if (err != 0) {
     1583 +                        fnvlist_free(sdd.snapholds);
     1584 +                        goto stderr_out;
     1585 +                }
     1586 +
     1587 +                err = zfs_hold_apply(zhp, B_TRUE, sdd.cleanup_fd, sdd.snapholds);
     1588 +                fnvlist_free(sdd.snapholds);
     1589 +                if (err != 0)
     1590 +                        goto stderr_out;
     1591 +        }
     1592 +        
1568 1593          err = dump_filesystems(zhp, &sdd);
1569 1594          fsavl_destroy(fsavl);
1570 1595          nvlist_free(fss);
1571 1596  
1572 1597          if (flags->dedup) {
1573 1598                  (void) close(pipefd[0]);
1574 1599                  (void) pthread_join(tid, NULL);
1575 1600          }
1576 1601  
1577 1602          if (sdd.cleanup_fd != -1) {
↓ open down ↓ 1618 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX