Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>


   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2012 by Delphix. All rights reserved.
  25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.

  26  */
  27 
  28 #include <assert.h>
  29 #include <ctype.h>
  30 #include <errno.h>
  31 #include <libintl.h>
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <strings.h>
  35 #include <unistd.h>
  36 #include <stddef.h>
  37 #include <fcntl.h>
  38 #include <sys/mount.h>
  39 #include <pthread.h>
  40 #include <umem.h>
  41 #include <time.h>
  42 
  43 #include <libzfs.h>
  44 
  45 #include "zfs_namecheck.h"


 776         }
 777 
 778         *nvlp = sd.fss;
 779         return (0);
 780 }
 781 
 782 /*
 783  * Routines specific to "zfs send"
 784  */
 785 typedef struct send_dump_data {
 786         /* these are all just the short snapname (the part after the @) */
 787         const char *fromsnap;
 788         const char *tosnap;
 789         char prevsnap[ZFS_MAXNAMELEN];
 790         uint64_t prevsnap_obj;
 791         boolean_t seenfrom, seento, replicate, doall, fromorigin;
 792         boolean_t verbose, dryrun, parsable, progress;
 793         int outfd;
 794         boolean_t err;
 795         nvlist_t *fss;

 796         avl_tree_t *fsavl;
 797         snapfilter_cb_t *filter_cb;
 798         void *filter_cb_arg;
 799         nvlist_t *debugnv;
 800         char holdtag[ZFS_MAXNAMELEN];
 801         int cleanup_fd;
 802         uint64_t size;
 803 } send_dump_data_t;
 804 
 805 static int
 806 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
 807     boolean_t fromorigin, uint64_t *sizep)
 808 {
 809         zfs_cmd_t zc = { 0 };
 810         libzfs_handle_t *hdl = zhp->zfs_hdl;
 811 
 812         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 813         assert(fromsnap_obj == 0 || !fromorigin);
 814 
 815         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));


 925                 case ENXIO:
 926                 case EPIPE:
 927                 case ERANGE:
 928                 case EFAULT:
 929                 case EROFS:
 930                         zfs_error_aux(hdl, strerror(errno));
 931                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 932 
 933                 default:
 934                         return (zfs_standard_error(hdl, errno, errbuf));
 935                 }
 936         }
 937 
 938         if (debugnv)
 939                 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
 940         nvlist_free(thisdbg);
 941 
 942         return (0);
 943 }
 944 
 945 static int
 946 hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
 947 {
 948         zfs_handle_t *pzhp;
 949         int error = 0;
 950         char *thissnap;
 951 
 952         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 953 
 954         if (sdd->dryrun)
 955                 return (0);
 956 
 957         /*
 958          * zfs_send() only opens a cleanup_fd for sends that need it,
 959          * e.g. replication and doall.
 960          */
 961         if (sdd->cleanup_fd == -1)
 962                 return (0);
 963 
 964         thissnap = strchr(zhp->zfs_name, '@') + 1;
 965         *(thissnap - 1) = '\0';
 966         pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
 967         *(thissnap - 1) = '@';
 968 
 969         /*
 970          * It's OK if the parent no longer exists.  The send code will
 971          * handle that error.
 972          */
 973         if (pzhp) {
 974                 error = zfs_hold(pzhp, thissnap, sdd->holdtag,
 975                     B_FALSE, B_TRUE, sdd->cleanup_fd);
 976                 zfs_close(pzhp);
 977         }
 978 
 979         return (error);
 980 }
 981 
 982 static void *
 983 send_progress_thread(void *arg)
 984 {
 985         progress_arg_t *pa = arg;
 986 
 987         zfs_cmd_t zc = { 0 };
 988         zfs_handle_t *zhp = pa->pa_zhp;
 989         libzfs_handle_t *hdl = zhp->zfs_hdl;
 990         unsigned long long bytes;
 991         char buf[16];
 992 
 993         time_t t;
 994         struct tm *tm;
 995 
 996         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 997         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 998 
 999         if (!pa->pa_parsable)


1015 
1016                 if (pa->pa_parsable) {
1017                         (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1018                             tm->tm_hour, tm->tm_min, tm->tm_sec,
1019                             bytes, zhp->zfs_name);
1020                 } else {
1021                         zfs_nicenum(bytes, buf, sizeof (buf));
1022                         (void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1023                             tm->tm_hour, tm->tm_min, tm->tm_sec,
1024                             buf, zhp->zfs_name);
1025                 }
1026         }
1027 }
1028 
1029 static int
1030 dump_snapshot(zfs_handle_t *zhp, void *arg)
1031 {
1032         send_dump_data_t *sdd = arg;
1033         progress_arg_t pa = { 0 };
1034         pthread_t tid;
1035 
1036         char *thissnap;
1037         int err;
1038         boolean_t isfromsnap, istosnap, fromorigin;
1039         boolean_t exclude = B_FALSE;
1040 

1041         thissnap = strchr(zhp->zfs_name, '@') + 1;
1042         isfromsnap = (sdd->fromsnap != NULL &&
1043             strcmp(sdd->fromsnap, thissnap) == 0);
1044 
1045         if (!sdd->seenfrom && isfromsnap) {
1046                 err = hold_for_send(zhp, sdd);
1047                 if (err == 0) {
1048                         sdd->seenfrom = B_TRUE;
1049                         (void) strcpy(sdd->prevsnap, thissnap);
1050                         sdd->prevsnap_obj = zfs_prop_get_int(zhp,
1051                             ZFS_PROP_OBJSETID);
1052                 } else if (err == ENOENT) {
1053                         err = 0;
1054                 }
1055                 zfs_close(zhp);
1056                 return (err);
1057         }
1058 
1059         if (sdd->seento || !sdd->seenfrom) {
1060                 zfs_close(zhp);
1061                 return (0);
1062         }
1063 
1064         istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1065         if (istosnap)
1066                 sdd->seento = B_TRUE;
1067 
1068         if (!sdd->doall && !isfromsnap && !istosnap) {
1069                 if (sdd->replicate) {
1070                         char *snapname;
1071                         nvlist_t *snapprops;
1072                         /*
1073                          * Filter out all intermediate snapshots except origin
1074                          * snapshots needed to replicate clones.
1075                          */
1076                         nvlist_t *nvfs = fsavl_find(sdd->fsavl,


1087         }
1088 
1089         /*
1090          * If a filter function exists, call it to determine whether
1091          * this snapshot will be sent.
1092          */
1093         if (exclude || (sdd->filter_cb != NULL &&
1094             sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1095                 /*
1096                  * This snapshot is filtered out.  Don't send it, and don't
1097                  * set prevsnap_obj, so it will be as if this snapshot didn't
1098                  * exist, and the next accepted snapshot will be sent as
1099                  * an incremental from the last accepted one, or as the
1100                  * first (and full) snapshot in the case of a replication,
1101                  * non-incremental send.
1102                  */
1103                 zfs_close(zhp);
1104                 return (0);
1105         }
1106 
1107         err = hold_for_send(zhp, sdd);
1108         if (err) {
1109                 if (err == ENOENT)
1110                         err = 0;
1111                 zfs_close(zhp);
1112                 return (err);
1113         }
1114 
1115         fromorigin = sdd->prevsnap[0] == '\0' &&
1116             (sdd->fromorigin || sdd->replicate);
1117 
1118         if (sdd->verbose) {
1119                 uint64_t size;
1120                 err = estimate_ioctl(zhp, sdd->prevsnap_obj,
1121                     fromorigin, &size);
1122 
1123                 if (sdd->parsable) {
1124                         if (sdd->prevsnap[0] != '\0') {
1125                                 (void) fprintf(stderr, "incremental\t%s\t%s",
1126                                     sdd->prevsnap, zhp->zfs_name);
1127                         } else {
1128                                 (void) fprintf(stderr, "full\t%s",
1129                                     zhp->zfs_name);
1130                         }
1131                 } else {
1132                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1133                             "send from @%s to %s"),
1134                             sdd->prevsnap, zhp->zfs_name);


1362  *       is TRUE.
1363  *
1364  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1365  * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1366  * if "replicate" is set.  If "doall" is set, dump all the intermediate
1367  * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1368  * case too. If "props" is set, send properties.
1369  */
1370 int
1371 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1372     sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1373     void *cb_arg, nvlist_t **debugnvp)
1374 {
1375         char errbuf[1024];
1376         send_dump_data_t sdd = { 0 };
1377         int err = 0;
1378         nvlist_t *fss = NULL;
1379         avl_tree_t *fsavl = NULL;
1380         static uint64_t holdseq;
1381         int spa_version;
1382         pthread_t tid;
1383         int pipefd[2];
1384         dedup_arg_t dda = { 0 };
1385         int featureflags = 0;
1386 
1387         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1388             "cannot send '%s'"), zhp->zfs_name);
1389 
1390         if (fromsnap && fromsnap[0] == '\0') {
1391                 zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1392                     "zero-length incremental source"));
1393                 return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1394         }
1395 
1396         if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1397                 uint64_t version;
1398                 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1399                 if (version >= ZPL_VERSION_SA) {
1400                         featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1401                 }
1402         }


1435                                 VERIFY(0 == nvlist_add_string(hdrnv,
1436                                     "fromsnap", fromsnap));
1437                         }
1438                         VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1439                         if (!flags->replicate) {
1440                                 VERIFY(0 == nvlist_add_boolean(hdrnv,
1441                                     "not_recursive"));
1442                         }
1443 
1444                         err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1445                             fromsnap, tosnap, flags->replicate, &fss, &fsavl);
1446                         if (err)
1447                                 goto err_out;
1448                         VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1449                         err = nvlist_pack(hdrnv, &packbuf, &buflen,
1450                             NV_ENCODE_XDR, 0);
1451                         if (debugnvp)
1452                                 *debugnvp = hdrnv;
1453                         else
1454                                 nvlist_free(hdrnv);
1455                         if (err) {
1456                                 fsavl_destroy(fsavl);
1457                                 nvlist_free(fss);
1458                                 goto stderr_out;
1459                         }
1460                 }
1461 
1462                 if (!flags->dryrun) {
1463                         /* write first begin record */
1464                         drr.drr_type = DRR_BEGIN;
1465                         drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1466                         DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1467                             drr_versioninfo, DMU_COMPOUNDSTREAM);
1468                         DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1469                             drr_versioninfo, featureflags);
1470                         (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1471                             sizeof (drr.drr_u.drr_begin.drr_toname),
1472                             "%s@%s", zhp->zfs_name, tosnap);
1473                         drr.drr_payloadlen = buflen;
1474                         err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1475 
1476                         /* write header nvlist */
1477                         if (err != -1 && packbuf != NULL) {
1478                                 err = cksum_and_write(packbuf, buflen, &zc,
1479                                     outfd);
1480                         }
1481                         free(packbuf);
1482                         if (err == -1) {
1483                                 fsavl_destroy(fsavl);
1484                                 nvlist_free(fss);
1485                                 err = errno;
1486                                 goto stderr_out;
1487                         }
1488 
1489                         /* write end record */
1490                         bzero(&drr, sizeof (drr));
1491                         drr.drr_type = DRR_END;
1492                         drr.drr_u.drr_end.drr_checksum = zc;
1493                         err = write(outfd, &drr, sizeof (drr));
1494                         if (err == -1) {
1495                                 fsavl_destroy(fsavl);
1496                                 nvlist_free(fss);
1497                                 err = errno;
1498                                 goto stderr_out;
1499                         }
1500 
1501                         err = 0;
1502                 }
1503         }
1504 
1505         /* dump each stream */
1506         sdd.fromsnap = fromsnap;
1507         sdd.tosnap = tosnap;
1508         if (flags->dedup)
1509                 sdd.outfd = pipefd[0];
1510         else
1511                 sdd.outfd = outfd;
1512         sdd.replicate = flags->replicate;
1513         sdd.doall = flags->doall;
1514         sdd.fromorigin = flags->fromorigin;
1515         sdd.fss = fss;
1516         sdd.fsavl = fsavl;
1517         sdd.verbose = flags->verbose;
1518         sdd.parsable = flags->parsable;
1519         sdd.progress = flags->progress;
1520         sdd.dryrun = flags->dryrun;
1521         sdd.filter_cb = filter_func;
1522         sdd.filter_cb_arg = cb_arg;
1523         if (debugnvp)
1524                 sdd.debugnv = *debugnvp;
1525 
1526         /*
1527          * Some flags require that we place user holds on the datasets that are
1528          * being sent so they don't get destroyed during the send. We can skip
1529          * this step if the pool is imported read-only since the datasets cannot
1530          * be destroyed.
1531          */
1532         if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1533             ZPOOL_PROP_READONLY, NULL) &&
1534             zfs_spa_version(zhp, &spa_version) == 0 &&
1535             spa_version >= SPA_VERSION_USERREFS &&
1536             (flags->doall || flags->replicate)) {
1537                 ++holdseq;
1538                 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1539                     ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1540                 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1541                 if (sdd.cleanup_fd < 0) {
1542                         err = errno;
1543                         goto stderr_out;
1544                 }

1545         } else {
1546                 sdd.cleanup_fd = -1;

1547         }
1548         if (flags->verbose) {
1549                 /*
1550                  * Do a verbose no-op dry run to get all the verbose output
1551                  * before generating any data.  Then do a non-verbose real
1552                  * run to generate the streams.
1553                  */
1554                 sdd.dryrun = B_TRUE;
1555                 err = dump_filesystems(zhp, &sdd);
1556                 sdd.dryrun = flags->dryrun;
1557                 sdd.verbose = B_FALSE;



1558                 if (flags->parsable) {
1559                         (void) fprintf(stderr, "size\t%llu\n",
1560                             (longlong_t)sdd.size);
1561                 } else {
1562                         char buf[16];
1563                         zfs_nicenum(sdd.size, buf, sizeof (buf));
1564                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1565                             "total estimated size is %s\n"), buf);
1566                 }
1567         }
























1568         err = dump_filesystems(zhp, &sdd);
1569         fsavl_destroy(fsavl);
1570         nvlist_free(fss);
1571 
1572         if (flags->dedup) {
1573                 (void) close(pipefd[0]);





1574                 (void) pthread_join(tid, NULL);

1575         }
1576 
1577         if (sdd.cleanup_fd != -1) {
1578                 VERIFY(0 == close(sdd.cleanup_fd));
1579                 sdd.cleanup_fd = -1;
1580         }
1581 
1582         if (!flags->dryrun && (flags->replicate || flags->doall ||
1583             flags->props)) {
1584                 /*
1585                  * write final end record.  NB: want to do this even if
1586                  * there was some error, because it might not be totally
1587                  * failed.
1588                  */
1589                 dmu_replay_record_t drr = { 0 };
1590                 drr.drr_type = DRR_END;
1591                 if (write(outfd, &drr, sizeof (drr)) == -1) {
1592                         return (zfs_standard_error(zhp->zfs_hdl,
1593                             errno, errbuf));
1594                 }
1595         }
1596 
1597         return (err || sdd.err);
1598 
1599 stderr_out:
1600         err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1601 err_out:




1602         if (sdd.cleanup_fd != -1)
1603                 VERIFY(0 == close(sdd.cleanup_fd));
1604         if (flags->dedup) {
1605                 (void) pthread_cancel(tid);
1606                 (void) pthread_join(tid, NULL);
1607                 (void) close(pipefd[0]);
1608         }
1609         return (err);
1610 }
1611 
1612 /*
1613  * Routines specific to "zfs recv"
1614  */
1615 
1616 static int
1617 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
1618     boolean_t byteswap, zio_cksum_t *zc)
1619 {
1620         char *cp = buf;
1621         int rv;
1622         int len = ilen;
1623 
1624         do {




   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2012 by Delphix. All rights reserved.
  25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  26  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  27  */
  28 
  29 #include <assert.h>
  30 #include <ctype.h>
  31 #include <errno.h>
  32 #include <libintl.h>
  33 #include <stdio.h>
  34 #include <stdlib.h>
  35 #include <strings.h>
  36 #include <unistd.h>
  37 #include <stddef.h>
  38 #include <fcntl.h>
  39 #include <sys/mount.h>
  40 #include <pthread.h>
  41 #include <umem.h>
  42 #include <time.h>
  43 
  44 #include <libzfs.h>
  45 
  46 #include "zfs_namecheck.h"


 777         }
 778 
 779         *nvlp = sd.fss;
 780         return (0);
 781 }
 782 
 783 /*
 784  * Routines specific to "zfs send"
 785  */
 786 typedef struct send_dump_data {
 787         /* these are all just the short snapname (the part after the @) */
 788         const char *fromsnap;
 789         const char *tosnap;
 790         char prevsnap[ZFS_MAXNAMELEN];
 791         uint64_t prevsnap_obj;
 792         boolean_t seenfrom, seento, replicate, doall, fromorigin;
 793         boolean_t verbose, dryrun, parsable, progress;
 794         int outfd;
 795         boolean_t err;
 796         nvlist_t *fss;
 797         nvlist_t *snapholds;
 798         avl_tree_t *fsavl;
 799         snapfilter_cb_t *filter_cb;
 800         void *filter_cb_arg;
 801         nvlist_t *debugnv;
 802         char holdtag[ZFS_MAXNAMELEN];
 803         int cleanup_fd;
 804         uint64_t size;
 805 } send_dump_data_t;
 806 
 807 static int
 808 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
 809     boolean_t fromorigin, uint64_t *sizep)
 810 {
 811         zfs_cmd_t zc = { 0 };
 812         libzfs_handle_t *hdl = zhp->zfs_hdl;
 813 
 814         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 815         assert(fromsnap_obj == 0 || !fromorigin);
 816 
 817         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));


 927                 case ENXIO:
 928                 case EPIPE:
 929                 case ERANGE:
 930                 case EFAULT:
 931                 case EROFS:
 932                         zfs_error_aux(hdl, strerror(errno));
 933                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 934 
 935                 default:
 936                         return (zfs_standard_error(hdl, errno, errbuf));
 937                 }
 938         }
 939 
 940         if (debugnv)
 941                 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
 942         nvlist_free(thisdbg);
 943 
 944         return (0);
 945 }
 946 
 947 static void
 948 gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
 949 {




 950         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 951 



 952         /*
 953          * zfs_send() only sets snapholds for sends that need them,
 954          * e.g. replication and doall.
 955          */
 956         if (sdd->snapholds == NULL)
 957                 return;















 958 
 959         fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
 960 }
 961 
 962 static void *
 963 send_progress_thread(void *arg)
 964 {
 965         progress_arg_t *pa = arg;
 966 
 967         zfs_cmd_t zc = { 0 };
 968         zfs_handle_t *zhp = pa->pa_zhp;
 969         libzfs_handle_t *hdl = zhp->zfs_hdl;
 970         unsigned long long bytes;
 971         char buf[16];
 972 
 973         time_t t;
 974         struct tm *tm;
 975 
 976         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 977         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 978 
 979         if (!pa->pa_parsable)


 995 
 996                 if (pa->pa_parsable) {
 997                         (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
 998                             tm->tm_hour, tm->tm_min, tm->tm_sec,
 999                             bytes, zhp->zfs_name);
1000                 } else {
1001                         zfs_nicenum(bytes, buf, sizeof (buf));
1002                         (void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1003                             tm->tm_hour, tm->tm_min, tm->tm_sec,
1004                             buf, zhp->zfs_name);
1005                 }
1006         }
1007 }
1008 
1009 static int
1010 dump_snapshot(zfs_handle_t *zhp, void *arg)
1011 {
1012         send_dump_data_t *sdd = arg;
1013         progress_arg_t pa = { 0 };
1014         pthread_t tid;

1015         char *thissnap;
1016         int err;
1017         boolean_t isfromsnap, istosnap, fromorigin;
1018         boolean_t exclude = B_FALSE;
1019 
1020         err = 0;
1021         thissnap = strchr(zhp->zfs_name, '@') + 1;
1022         isfromsnap = (sdd->fromsnap != NULL &&
1023             strcmp(sdd->fromsnap, thissnap) == 0);
1024 
1025         if (!sdd->seenfrom && isfromsnap) {
1026                 gather_holds(zhp, sdd);

1027                 sdd->seenfrom = B_TRUE;
1028                 (void) strcpy(sdd->prevsnap, thissnap);
1029                 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);




1030                 zfs_close(zhp);
1031                 return (0);
1032         }
1033 
1034         if (sdd->seento || !sdd->seenfrom) {
1035                 zfs_close(zhp);
1036                 return (0);
1037         }
1038 
1039         istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1040         if (istosnap)
1041                 sdd->seento = B_TRUE;
1042 
1043         if (!sdd->doall && !isfromsnap && !istosnap) {
1044                 if (sdd->replicate) {
1045                         char *snapname;
1046                         nvlist_t *snapprops;
1047                         /*
1048                          * Filter out all intermediate snapshots except origin
1049                          * snapshots needed to replicate clones.
1050                          */
1051                         nvlist_t *nvfs = fsavl_find(sdd->fsavl,


1062         }
1063 
1064         /*
1065          * If a filter function exists, call it to determine whether
1066          * this snapshot will be sent.
1067          */
1068         if (exclude || (sdd->filter_cb != NULL &&
1069             sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1070                 /*
1071                  * This snapshot is filtered out.  Don't send it, and don't
1072                  * set prevsnap_obj, so it will be as if this snapshot didn't
1073                  * exist, and the next accepted snapshot will be sent as
1074                  * an incremental from the last accepted one, or as the
1075                  * first (and full) snapshot in the case of a replication,
1076                  * non-incremental send.
1077                  */
1078                 zfs_close(zhp);
1079                 return (0);
1080         }
1081 
1082         gather_holds(zhp, sdd);







1083         fromorigin = sdd->prevsnap[0] == '\0' &&
1084             (sdd->fromorigin || sdd->replicate);
1085 
1086         if (sdd->verbose) {
1087                 uint64_t size;
1088                 err = estimate_ioctl(zhp, sdd->prevsnap_obj,
1089                     fromorigin, &size);
1090 
1091                 if (sdd->parsable) {
1092                         if (sdd->prevsnap[0] != '\0') {
1093                                 (void) fprintf(stderr, "incremental\t%s\t%s",
1094                                     sdd->prevsnap, zhp->zfs_name);
1095                         } else {
1096                                 (void) fprintf(stderr, "full\t%s",
1097                                     zhp->zfs_name);
1098                         }
1099                 } else {
1100                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1101                             "send from @%s to %s"),
1102                             sdd->prevsnap, zhp->zfs_name);


1330  *       is TRUE.
1331  *
1332  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1333  * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1334  * if "replicate" is set.  If "doall" is set, dump all the intermediate
1335  * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1336  * case too. If "props" is set, send properties.
1337  */
1338 int
1339 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1340     sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1341     void *cb_arg, nvlist_t **debugnvp)
1342 {
1343         char errbuf[1024];
1344         send_dump_data_t sdd = { 0 };
1345         int err = 0;
1346         nvlist_t *fss = NULL;
1347         avl_tree_t *fsavl = NULL;
1348         static uint64_t holdseq;
1349         int spa_version;
1350         pthread_t tid = 0;
1351         int pipefd[2];
1352         dedup_arg_t dda = { 0 };
1353         int featureflags = 0;
1354 
1355         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1356             "cannot send '%s'"), zhp->zfs_name);
1357 
1358         if (fromsnap && fromsnap[0] == '\0') {
1359                 zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1360                     "zero-length incremental source"));
1361                 return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1362         }
1363 
1364         if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1365                 uint64_t version;
1366                 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1367                 if (version >= ZPL_VERSION_SA) {
1368                         featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1369                 }
1370         }


1403                                 VERIFY(0 == nvlist_add_string(hdrnv,
1404                                     "fromsnap", fromsnap));
1405                         }
1406                         VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1407                         if (!flags->replicate) {
1408                                 VERIFY(0 == nvlist_add_boolean(hdrnv,
1409                                     "not_recursive"));
1410                         }
1411 
1412                         err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1413                             fromsnap, tosnap, flags->replicate, &fss, &fsavl);
1414                         if (err)
1415                                 goto err_out;
1416                         VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1417                         err = nvlist_pack(hdrnv, &packbuf, &buflen,
1418                             NV_ENCODE_XDR, 0);
1419                         if (debugnvp)
1420                                 *debugnvp = hdrnv;
1421                         else
1422                                 nvlist_free(hdrnv);
1423                         if (err)


1424                                 goto stderr_out;
1425                 }

1426 
1427                 if (!flags->dryrun) {
1428                         /* write first begin record */
1429                         drr.drr_type = DRR_BEGIN;
1430                         drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1431                         DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1432                             drr_versioninfo, DMU_COMPOUNDSTREAM);
1433                         DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1434                             drr_versioninfo, featureflags);
1435                         (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1436                             sizeof (drr.drr_u.drr_begin.drr_toname),
1437                             "%s@%s", zhp->zfs_name, tosnap);
1438                         drr.drr_payloadlen = buflen;
1439                         err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1440 
1441                         /* write header nvlist */
1442                         if (err != -1 && packbuf != NULL) {
1443                                 err = cksum_and_write(packbuf, buflen, &zc,
1444                                     outfd);
1445                         }
1446                         free(packbuf);
1447                         if (err == -1) {


1448                                 err = errno;
1449                                 goto stderr_out;
1450                         }
1451 
1452                         /* write end record */
1453                         bzero(&drr, sizeof (drr));
1454                         drr.drr_type = DRR_END;
1455                         drr.drr_u.drr_end.drr_checksum = zc;
1456                         err = write(outfd, &drr, sizeof (drr));
1457                         if (err == -1) {


1458                                 err = errno;
1459                                 goto stderr_out;
1460                         }
1461 
1462                         err = 0;
1463                 }
1464         }
1465 
1466         /* dump each stream */
1467         sdd.fromsnap = fromsnap;
1468         sdd.tosnap = tosnap;
1469         if (tid != 0)
1470                 sdd.outfd = pipefd[0];
1471         else
1472                 sdd.outfd = outfd;
1473         sdd.replicate = flags->replicate;
1474         sdd.doall = flags->doall;
1475         sdd.fromorigin = flags->fromorigin;
1476         sdd.fss = fss;
1477         sdd.fsavl = fsavl;
1478         sdd.verbose = flags->verbose;
1479         sdd.parsable = flags->parsable;
1480         sdd.progress = flags->progress;
1481         sdd.dryrun = flags->dryrun;
1482         sdd.filter_cb = filter_func;
1483         sdd.filter_cb_arg = cb_arg;
1484         if (debugnvp)
1485                 sdd.debugnv = *debugnvp;
1486 
1487         /*
1488          * Some flags require that we place user holds on the datasets that are
1489          * being sent so they don't get destroyed during the send. We can skip
1490          * this step if the pool is imported read-only since the datasets cannot
1491          * be destroyed.
1492          */
1493         if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1494             ZPOOL_PROP_READONLY, NULL) &&
1495             zfs_spa_version(zhp, &spa_version) == 0 &&
1496             spa_version >= SPA_VERSION_USERREFS &&
1497             (flags->doall || flags->replicate)) {
1498                 ++holdseq;
1499                 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1500                     ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1501                 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1502                 if (sdd.cleanup_fd < 0) {
1503                         err = errno;
1504                         goto stderr_out;
1505                 }
1506                 sdd.snapholds = fnvlist_alloc();
1507         } else {
1508                 sdd.cleanup_fd = -1;
1509                 sdd.snapholds = NULL;
1510         }
1511         if (flags->verbose || sdd.snapholds != NULL) {
1512                 /*
1513                  * Do a verbose no-op dry run to get all the verbose output
1514                  * or to gather snapshot hold's before generating any data,
1515                  * then do a non-verbose real run to generate the streams.
1516                  */
1517                 sdd.dryrun = B_TRUE;
1518                 err = dump_filesystems(zhp, &sdd);
1519 
1520                 if (err != 0)
1521                         goto stderr_out;
1522 
1523                 if (flags->verbose) {
1524                         if (flags->parsable) {
1525                                 (void) fprintf(stderr, "size\t%llu\n",
1526                                     (longlong_t)sdd.size);
1527                         } else {
1528                                 char buf[16];
1529                                 zfs_nicenum(sdd.size, buf, sizeof (buf));
1530                                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1531                                     "total estimated size is %s\n"), buf);
1532                         }
1533                 }
1534 
1535                 /* Ensure no snaps found is treated as an error. */
1536                 if (!sdd.seento) {
1537                         err = ENOENT;
1538                         goto err_out;
1539                 }
1540 
1541                 /* Skip the second run if dryrun was requested. */
1542                 if (flags->dryrun)
1543                         goto err_out;
1544 
1545                 if (sdd.snapholds != NULL) {
1546                         err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
1547                         if (err != 0)
1548                                 goto stderr_out;
1549 
1550                         fnvlist_free(sdd.snapholds);
1551                         sdd.snapholds = NULL;
1552                 }
1553 
1554                 sdd.dryrun = B_FALSE;
1555                 sdd.verbose = B_FALSE;
1556         }
1557 
1558         err = dump_filesystems(zhp, &sdd);
1559         fsavl_destroy(fsavl);
1560         nvlist_free(fss);
1561 
1562         /* Ensure no snaps found is treated as an error. */
1563         if (err == 0 && !sdd.seento)
1564                 err = ENOENT;
1565 
1566         if (tid != 0) {
1567                 if (err != 0)
1568                         (void) pthread_cancel(tid);
1569                 (void) pthread_join(tid, NULL);
1570                 (void) close(pipefd[0]);
1571         }
1572 
1573         if (sdd.cleanup_fd != -1) {
1574                 VERIFY(0 == close(sdd.cleanup_fd));
1575                 sdd.cleanup_fd = -1;
1576         }
1577 
1578         if (!flags->dryrun && (flags->replicate || flags->doall ||
1579             flags->props)) {
1580                 /*
1581                  * write final end record.  NB: want to do this even if
1582                  * there was some error, because it might not be totally
1583                  * failed.
1584                  */
1585                 dmu_replay_record_t drr = { 0 };
1586                 drr.drr_type = DRR_END;
1587                 if (write(outfd, &drr, sizeof (drr)) == -1) {
1588                         return (zfs_standard_error(zhp->zfs_hdl,
1589                             errno, errbuf));
1590                 }
1591         }
1592 
1593         return (err || sdd.err);
1594 
1595 stderr_out:
1596         err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1597 err_out:
1598         fsavl_destroy(fsavl);
1599         nvlist_free(fss);
1600         fnvlist_free(sdd.snapholds);
1601 
1602         if (sdd.cleanup_fd != -1)
1603                 VERIFY(0 == close(sdd.cleanup_fd));
1604         if (tid != 0) {
1605                 (void) pthread_cancel(tid);
1606                 (void) pthread_join(tid, NULL);
1607                 (void) close(pipefd[0]);
1608         }
1609         return (err);
1610 }
1611 
1612 /*
1613  * Routines specific to "zfs recv"
1614  */
1615 
1616 static int
1617 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
1618     boolean_t byteswap, zio_cksum_t *zc)
1619 {
1620         char *cp = buf;
1621         int rv;
1622         int len = ilen;
1623 
1624         do {