Print this page
Optimize creation and removal of temporary "user holds" placed on
snapshots by a zfs send, by ensuring all the required holds and
releases are done in a single dsl_sync_task.
Creation now collates the required holds during a dry run and
then uses a single lzc_hold call via zfs_hold_apply instead of
processing each snapshot in turn.
Defered (on exit) cleanup by the kernel is also now done in
dsl_sync_task by reusing dsl_dataset_user_release.
On a test with 11 volumes in a tree each with 8 snapshots on a
single HDD zpool this reduces the time required to perform a full
send from 20 seconds to under 0.8 seconds.
For reference eliminating the hold entirely reduces this 0.15
seconds.
While I'm here:-
* Remove some unused structures
* Fix nvlist_t leak in zfs_release_one


 776         }
 777 
 778         *nvlp = sd.fss;
 779         return (0);
 780 }
 781 
 782 /*
 783  * Routines specific to "zfs send"
 784  */
 785 typedef struct send_dump_data {
 786         /* these are all just the short snapname (the part after the @) */
 787         const char *fromsnap;
 788         const char *tosnap;
 789         char prevsnap[ZFS_MAXNAMELEN];
 790         uint64_t prevsnap_obj;
 791         boolean_t seenfrom, seento, replicate, doall, fromorigin;
 792         boolean_t verbose, dryrun, parsable, progress;
 793         int outfd;
 794         boolean_t err;
 795         nvlist_t *fss;

 796         avl_tree_t *fsavl;
 797         snapfilter_cb_t *filter_cb;
 798         void *filter_cb_arg;
 799         nvlist_t *debugnv;
 800         char holdtag[ZFS_MAXNAMELEN];
 801         int cleanup_fd;
 802         uint64_t size;
 803 } send_dump_data_t;
 804 
 805 static int
 806 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
 807     boolean_t fromorigin, uint64_t *sizep)
 808 {
 809         zfs_cmd_t zc = { 0 };
 810         libzfs_handle_t *hdl = zhp->zfs_hdl;
 811 
 812         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 813         assert(fromsnap_obj == 0 || !fromorigin);
 814 
 815         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));


 934                         return (zfs_standard_error(hdl, errno, errbuf));
 935                 }
 936         }
 937 
 938         if (debugnv)
 939                 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
 940         nvlist_free(thisdbg);
 941 
 942         return (0);
 943 }
 944 
 945 static int
 946 hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
 947 {
 948         zfs_handle_t *pzhp;
 949         int error = 0;
 950         char *thissnap;
 951 
 952         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 953 
 954         if (sdd->dryrun)
 955                 return (0);
 956 
 957         /*
 958          * zfs_send() only opens a cleanup_fd for sends that need it,
 959          * e.g. replication and doall.

 960          */
 961         if (sdd->cleanup_fd == -1)
 962                 return (0);
 963 
 964         thissnap = strchr(zhp->zfs_name, '@') + 1;
 965         *(thissnap - 1) = '\0';
 966         pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
 967         *(thissnap - 1) = '@';
 968 
 969         /*
 970          * It's OK if the parent no longer exists.  The send code will
 971          * handle that error.
 972          */
 973         if (pzhp) {
 974                 error = zfs_hold(pzhp, thissnap, sdd->holdtag,
 975                     B_FALSE, B_TRUE, sdd->cleanup_fd);
 976                 zfs_close(pzhp);
 977         }
 978 
 979         return (error);
 980 }
 981 
 982 static void *
 983 send_progress_thread(void *arg)
 984 {
 985         progress_arg_t *pa = arg;
 986 
 987         zfs_cmd_t zc = { 0 };
 988         zfs_handle_t *zhp = pa->pa_zhp;
 989         libzfs_handle_t *hdl = zhp->zfs_hdl;
 990         unsigned long long bytes;
 991         char buf[16];
 992 
 993         time_t t;
 994         struct tm *tm;
 995 


1525 
1526         /*
1527          * Some flags require that we place user holds on the datasets that are
1528          * being sent so they don't get destroyed during the send. We can skip
1529          * this step if the pool is imported read-only since the datasets cannot
1530          * be destroyed.
1531          */
1532         if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1533             ZPOOL_PROP_READONLY, NULL) &&
1534             zfs_spa_version(zhp, &spa_version) == 0 &&
1535             spa_version >= SPA_VERSION_USERREFS &&
1536             (flags->doall || flags->replicate)) {
1537                 ++holdseq;
1538                 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1539                     ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1540                 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1541                 if (sdd.cleanup_fd < 0) {
1542                         err = errno;
1543                         goto stderr_out;
1544                 }

1545         } else {
1546                 sdd.cleanup_fd = -1;

1547         }
1548         if (flags->verbose) {
1549                 /*
1550                  * Do a verbose no-op dry run to get all the verbose output
1551                  * before generating any data.  Then do a non-verbose real
1552                  * run to generate the streams.
1553                  */
1554                 sdd.dryrun = B_TRUE;
1555                 err = dump_filesystems(zhp, &sdd);
1556                 sdd.dryrun = flags->dryrun;
1557                 sdd.verbose = B_FALSE;
1558                 if (flags->parsable) {
1559                         (void) fprintf(stderr, "size\t%llu\n",
1560                             (longlong_t)sdd.size);
1561                 } else {
1562                         char buf[16];
1563                         zfs_nicenum(sdd.size, buf, sizeof (buf));
1564                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1565                             "total estimated size is %s\n"), buf);
1566                 }
1567         }
























1568         err = dump_filesystems(zhp, &sdd);
1569         fsavl_destroy(fsavl);
1570         nvlist_free(fss);
1571 
1572         if (flags->dedup) {
1573                 (void) close(pipefd[0]);
1574                 (void) pthread_join(tid, NULL);
1575         }
1576 
1577         if (sdd.cleanup_fd != -1) {
1578                 VERIFY(0 == close(sdd.cleanup_fd));
1579                 sdd.cleanup_fd = -1;
1580         }
1581 
1582         if (!flags->dryrun && (flags->replicate || flags->doall ||
1583             flags->props)) {
1584                 /*
1585                  * write final end record.  NB: want to do this even if
1586                  * there was some error, because it might not be totally
1587                  * failed.




 776         }
 777 
 778         *nvlp = sd.fss;
 779         return (0);
 780 }
 781 
 782 /*
 783  * Routines specific to "zfs send"
 784  */
 785 typedef struct send_dump_data {
 786         /* these are all just the short snapname (the part after the @) */
 787         const char *fromsnap;
 788         const char *tosnap;
 789         char prevsnap[ZFS_MAXNAMELEN];
 790         uint64_t prevsnap_obj;
 791         boolean_t seenfrom, seento, replicate, doall, fromorigin;
 792         boolean_t verbose, dryrun, parsable, progress;
 793         int outfd;
 794         boolean_t err;
 795         nvlist_t *fss;
 796         nvlist_t *snapholds;
 797         avl_tree_t *fsavl;
 798         snapfilter_cb_t *filter_cb;
 799         void *filter_cb_arg;
 800         nvlist_t *debugnv;
 801         char holdtag[ZFS_MAXNAMELEN];
 802         int cleanup_fd;
 803         uint64_t size;
 804 } send_dump_data_t;
 805 
 806 static int
 807 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
 808     boolean_t fromorigin, uint64_t *sizep)
 809 {
 810         zfs_cmd_t zc = { 0 };
 811         libzfs_handle_t *hdl = zhp->zfs_hdl;
 812 
 813         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 814         assert(fromsnap_obj == 0 || !fromorigin);
 815 
 816         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));


 935                         return (zfs_standard_error(hdl, errno, errbuf));
 936                 }
 937         }
 938 
 939         if (debugnv)
 940                 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
 941         nvlist_free(thisdbg);
 942 
 943         return (0);
 944 }
 945 
 946 static int
 947 hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
 948 {
 949         zfs_handle_t *pzhp;
 950         int error = 0;
 951         char *thissnap;
 952 
 953         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 954 



 955         /*
 956          * We process if snapholds is not NULL even if on a dry run as
 957          * this is used to pre-calculate the required holds so they can
 958          * be processed in one kernel request
 959          */
 960         if (sdd->snapholds == NULL)
 961                 return (0);
 962 
 963         thissnap = strchr(zhp->zfs_name, '@') + 1;
 964         *(thissnap - 1) = '\0';
 965         pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
 966         *(thissnap - 1) = '@';
 967 
 968         /*
 969          * It's OK if the parent no longer exists.  The send code will
 970          * handle that error.
 971          */
 972         if (pzhp) {
 973                 error = zfs_hold_add(pzhp, thissnap, sdd->holdtag, B_TRUE,
 974                     sdd->snapholds);
 975                 zfs_close(pzhp);
 976         }
 977 
 978         return (error);
 979 }
 980 
 981 static void *
 982 send_progress_thread(void *arg)
 983 {
 984         progress_arg_t *pa = arg;
 985 
 986         zfs_cmd_t zc = { 0 };
 987         zfs_handle_t *zhp = pa->pa_zhp;
 988         libzfs_handle_t *hdl = zhp->zfs_hdl;
 989         unsigned long long bytes;
 990         char buf[16];
 991 
 992         time_t t;
 993         struct tm *tm;
 994 


1524 
1525         /*
1526          * Some flags require that we place user holds on the datasets that are
1527          * being sent so they don't get destroyed during the send. We can skip
1528          * this step if the pool is imported read-only since the datasets cannot
1529          * be destroyed.
1530          */
1531         if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1532             ZPOOL_PROP_READONLY, NULL) &&
1533             zfs_spa_version(zhp, &spa_version) == 0 &&
1534             spa_version >= SPA_VERSION_USERREFS &&
1535             (flags->doall || flags->replicate)) {
1536                 ++holdseq;
1537                 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1538                     ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1539                 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1540                 if (sdd.cleanup_fd < 0) {
1541                         err = errno;
1542                         goto stderr_out;
1543                 }
1544                 sdd.snapholds = fnvlist_alloc();
1545         } else {
1546                 sdd.cleanup_fd = -1;
1547                 sdd.snapholds = NULL;
1548         }
1549         if (flags->verbose) {
1550                 /*
1551                  * Do a verbose no-op dry run to get all the verbose output
1552                  * before generating any data.  Then do a non-verbose real
1553                  * run to generate the streams.
1554                  */
1555                 sdd.dryrun = B_TRUE;
1556                 err = dump_filesystems(zhp, &sdd);
1557                 sdd.dryrun = flags->dryrun;
1558                 sdd.verbose = B_FALSE;
1559                 if (flags->parsable) {
1560                         (void) fprintf(stderr, "size\t%llu\n",
1561                             (longlong_t)sdd.size);
1562                 } else {
1563                         char buf[16];
1564                         zfs_nicenum(sdd.size, buf, sizeof (buf));
1565                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1566                             "total estimated size is %s\n"), buf);
1567                 }
1568         }
1569 
1570         if (sdd.snapholds != NULL) {
1571                 /* Holds are required */
1572                 if (!flags->verbose) {
1573                         /*
1574                          * A verbose dry run wasn't done so do a non-verbose
1575                          * dry run to collate snapshot hold's.
1576                          */
1577                         sdd.dryrun = B_TRUE;
1578                         err = dump_filesystems(zhp, &sdd);
1579                         sdd.dryrun = flags->dryrun;
1580                 }
1581 
1582                 if (err != 0) {
1583                         fnvlist_free(sdd.snapholds);
1584                         goto stderr_out;
1585                 }
1586 
1587                 err = zfs_hold_apply(zhp, B_TRUE, sdd.cleanup_fd, sdd.snapholds);
1588                 fnvlist_free(sdd.snapholds);
1589                 if (err != 0)
1590                         goto stderr_out;
1591         }
1592         
1593         err = dump_filesystems(zhp, &sdd);
1594         fsavl_destroy(fsavl);
1595         nvlist_free(fss);
1596 
1597         if (flags->dedup) {
1598                 (void) close(pipefd[0]);
1599                 (void) pthread_join(tid, NULL);
1600         }
1601 
1602         if (sdd.cleanup_fd != -1) {
1603                 VERIFY(0 == close(sdd.cleanup_fd));
1604                 sdd.cleanup_fd = -1;
1605         }
1606 
1607         if (!flags->dryrun && (flags->replicate || flags->doall ||
1608             flags->props)) {
1609                 /*
1610                  * write final end record.  NB: want to do this even if
1611                  * there was some error, because it might not be totally
1612                  * failed.