Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>


 776         }
 777 
 778         *nvlp = sd.fss;
 779         return (0);
 780 }
 781 
 782 /*
 783  * Routines specific to "zfs send"
 784  */
 785 typedef struct send_dump_data {
 786         /* these are all just the short snapname (the part after the @) */
 787         const char *fromsnap;
 788         const char *tosnap;
 789         char prevsnap[ZFS_MAXNAMELEN];
 790         uint64_t prevsnap_obj;
 791         boolean_t seenfrom, seento, replicate, doall, fromorigin;
 792         boolean_t verbose, dryrun, parsable, progress;
 793         int outfd;
 794         boolean_t err;
 795         nvlist_t *fss;

 796         avl_tree_t *fsavl;
 797         snapfilter_cb_t *filter_cb;
 798         void *filter_cb_arg;
 799         nvlist_t *debugnv;
 800         char holdtag[ZFS_MAXNAMELEN];
 801         int cleanup_fd;
 802         uint64_t size;
 803 } send_dump_data_t;
 804 
 805 static int
 806 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
 807     boolean_t fromorigin, uint64_t *sizep)
 808 {
 809         zfs_cmd_t zc = { 0 };
 810         libzfs_handle_t *hdl = zhp->zfs_hdl;
 811 
 812         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 813         assert(fromsnap_obj == 0 || !fromorigin);
 814 
 815         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));


 925                 case ENXIO:
 926                 case EPIPE:
 927                 case ERANGE:
 928                 case EFAULT:
 929                 case EROFS:
 930                         zfs_error_aux(hdl, strerror(errno));
 931                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 932 
 933                 default:
 934                         return (zfs_standard_error(hdl, errno, errbuf));
 935                 }
 936         }
 937 
 938         if (debugnv)
 939                 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
 940         nvlist_free(thisdbg);
 941 
 942         return (0);
 943 }
 944 
 945 static int
 946 hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
 947 {
 948         zfs_handle_t *pzhp;
 949         int error = 0;
 950         char *thissnap;
 951 
 952         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 953 
 954         if (sdd->dryrun)
 955                 return (0);
 956 
 957         /*
 958          * zfs_send() only opens a cleanup_fd for sends that need it,
 959          * e.g. replication and doall.
 960          */
 961         if (sdd->cleanup_fd == -1)
 962                 return (0);
 963 
 964         thissnap = strchr(zhp->zfs_name, '@') + 1;
 965         *(thissnap - 1) = '\0';
 966         pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
 967         *(thissnap - 1) = '@';
 968 
 969         /*
 970          * It's OK if the parent no longer exists.  The send code will
 971          * handle that error.
 972          */
 973         if (pzhp) {
 974                 error = zfs_hold(pzhp, thissnap, sdd->holdtag,
 975                     B_FALSE, B_TRUE, sdd->cleanup_fd);
 976                 zfs_close(pzhp);
 977         }
 978 
 979         return (error);
 980 }
 981 
 982 static void *
 983 send_progress_thread(void *arg)
 984 {
 985         progress_arg_t *pa = arg;
 986 
 987         zfs_cmd_t zc = { 0 };
 988         zfs_handle_t *zhp = pa->pa_zhp;
 989         libzfs_handle_t *hdl = zhp->zfs_hdl;
 990         unsigned long long bytes;
 991         char buf[16];
 992 
 993         time_t t;
 994         struct tm *tm;
 995 
 996         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 997         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 998 
 999         if (!pa->pa_parsable)


1015 
1016                 if (pa->pa_parsable) {
1017                         (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1018                             tm->tm_hour, tm->tm_min, tm->tm_sec,
1019                             bytes, zhp->zfs_name);
1020                 } else {
1021                         zfs_nicenum(bytes, buf, sizeof (buf));
1022                         (void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1023                             tm->tm_hour, tm->tm_min, tm->tm_sec,
1024                             buf, zhp->zfs_name);
1025                 }
1026         }
1027 }
1028 
1029 static int
1030 dump_snapshot(zfs_handle_t *zhp, void *arg)
1031 {
1032         send_dump_data_t *sdd = arg;
1033         progress_arg_t pa = { 0 };
1034         pthread_t tid;
1035 
1036         char *thissnap;
1037         int err;
1038         boolean_t isfromsnap, istosnap, fromorigin;
1039         boolean_t exclude = B_FALSE;
1040 

1041         thissnap = strchr(zhp->zfs_name, '@') + 1;
1042         isfromsnap = (sdd->fromsnap != NULL &&
1043             strcmp(sdd->fromsnap, thissnap) == 0);
1044 
1045         if (!sdd->seenfrom && isfromsnap) {
1046                 err = hold_for_send(zhp, sdd);
1047                 if (err == 0) {
1048                         sdd->seenfrom = B_TRUE;
1049                         (void) strcpy(sdd->prevsnap, thissnap);
1050                         sdd->prevsnap_obj = zfs_prop_get_int(zhp,
1051                             ZFS_PROP_OBJSETID);
1052                 } else if (err == ENOENT) {
1053                         err = 0;
1054                 }
1055                 zfs_close(zhp);
1056                 return (err);
1057         }
1058 
1059         if (sdd->seento || !sdd->seenfrom) {
1060                 zfs_close(zhp);
1061                 return (0);
1062         }
1063 
1064         istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1065         if (istosnap)
1066                 sdd->seento = B_TRUE;
1067 
1068         if (!sdd->doall && !isfromsnap && !istosnap) {
1069                 if (sdd->replicate) {
1070                         char *snapname;
1071                         nvlist_t *snapprops;
1072                         /*
1073                          * Filter out all intermediate snapshots except origin
1074                          * snapshots needed to replicate clones.
1075                          */
1076                         nvlist_t *nvfs = fsavl_find(sdd->fsavl,


1087         }
1088 
1089         /*
1090          * If a filter function exists, call it to determine whether
1091          * this snapshot will be sent.
1092          */
1093         if (exclude || (sdd->filter_cb != NULL &&
1094             sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1095                 /*
1096                  * This snapshot is filtered out.  Don't send it, and don't
1097                  * set prevsnap_obj, so it will be as if this snapshot didn't
1098                  * exist, and the next accepted snapshot will be sent as
1099                  * an incremental from the last accepted one, or as the
1100                  * first (and full) snapshot in the case of a replication,
1101                  * non-incremental send.
1102                  */
1103                 zfs_close(zhp);
1104                 return (0);
1105         }
1106 
1107         err = hold_for_send(zhp, sdd);
1108         if (err) {
1109                 if (err == ENOENT)
1110                         err = 0;
1111                 zfs_close(zhp);
1112                 return (err);
1113         }
1114 
1115         fromorigin = sdd->prevsnap[0] == '\0' &&
1116             (sdd->fromorigin || sdd->replicate);
1117 
1118         if (sdd->verbose) {
1119                 uint64_t size;
1120                 err = estimate_ioctl(zhp, sdd->prevsnap_obj,
1121                     fromorigin, &size);
1122 
1123                 if (sdd->parsable) {
1124                         if (sdd->prevsnap[0] != '\0') {
1125                                 (void) fprintf(stderr, "incremental\t%s\t%s",
1126                                     sdd->prevsnap, zhp->zfs_name);
1127                         } else {
1128                                 (void) fprintf(stderr, "full\t%s",
1129                                     zhp->zfs_name);
1130                         }
1131                 } else {
1132                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1133                             "send from @%s to %s"),
1134                             sdd->prevsnap, zhp->zfs_name);


1362  *       is TRUE.
1363  *
1364  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1365  * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1366  * if "replicate" is set.  If "doall" is set, dump all the intermediate
1367  * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1368  * case too. If "props" is set, send properties.
1369  */
1370 int
1371 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1372     sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1373     void *cb_arg, nvlist_t **debugnvp)
1374 {
1375         char errbuf[1024];
1376         send_dump_data_t sdd = { 0 };
1377         int err = 0;
1378         nvlist_t *fss = NULL;
1379         avl_tree_t *fsavl = NULL;
1380         static uint64_t holdseq;
1381         int spa_version;
1382         pthread_t tid;
1383         int pipefd[2];
1384         dedup_arg_t dda = { 0 };
1385         int featureflags = 0;
1386 
1387         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1388             "cannot send '%s'"), zhp->zfs_name);
1389 
1390         if (fromsnap && fromsnap[0] == '\0') {
1391                 zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1392                     "zero-length incremental source"));
1393                 return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1394         }
1395 
1396         if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1397                 uint64_t version;
1398                 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1399                 if (version >= ZPL_VERSION_SA) {
1400                         featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1401                 }
1402         }


1435                                 VERIFY(0 == nvlist_add_string(hdrnv,
1436                                     "fromsnap", fromsnap));
1437                         }
1438                         VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1439                         if (!flags->replicate) {
1440                                 VERIFY(0 == nvlist_add_boolean(hdrnv,
1441                                     "not_recursive"));
1442                         }
1443 
1444                         err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1445                             fromsnap, tosnap, flags->replicate, &fss, &fsavl);
1446                         if (err)
1447                                 goto err_out;
1448                         VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1449                         err = nvlist_pack(hdrnv, &packbuf, &buflen,
1450                             NV_ENCODE_XDR, 0);
1451                         if (debugnvp)
1452                                 *debugnvp = hdrnv;
1453                         else
1454                                 nvlist_free(hdrnv);
1455                         if (err) {
1456                                 fsavl_destroy(fsavl);
1457                                 nvlist_free(fss);
1458                                 goto stderr_out;
1459                         }
1460                 }
1461 
1462                 if (!flags->dryrun) {
1463                         /* write first begin record */
1464                         drr.drr_type = DRR_BEGIN;
1465                         drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1466                         DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1467                             drr_versioninfo, DMU_COMPOUNDSTREAM);
1468                         DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1469                             drr_versioninfo, featureflags);
1470                         (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1471                             sizeof (drr.drr_u.drr_begin.drr_toname),
1472                             "%s@%s", zhp->zfs_name, tosnap);
1473                         drr.drr_payloadlen = buflen;
1474                         err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1475 
1476                         /* write header nvlist */
1477                         if (err != -1 && packbuf != NULL) {
1478                                 err = cksum_and_write(packbuf, buflen, &zc,
1479                                     outfd);
1480                         }
1481                         free(packbuf);
1482                         if (err == -1) {
1483                                 fsavl_destroy(fsavl);
1484                                 nvlist_free(fss);
1485                                 err = errno;
1486                                 goto stderr_out;
1487                         }
1488 
1489                         /* write end record */
1490                         bzero(&drr, sizeof (drr));
1491                         drr.drr_type = DRR_END;
1492                         drr.drr_u.drr_end.drr_checksum = zc;
1493                         err = write(outfd, &drr, sizeof (drr));
1494                         if (err == -1) {
1495                                 fsavl_destroy(fsavl);
1496                                 nvlist_free(fss);
1497                                 err = errno;
1498                                 goto stderr_out;
1499                         }
1500 
1501                         err = 0;
1502                 }
1503         }
1504 
1505         /* dump each stream */
1506         sdd.fromsnap = fromsnap;
1507         sdd.tosnap = tosnap;
1508         if (flags->dedup)
1509                 sdd.outfd = pipefd[0];
1510         else
1511                 sdd.outfd = outfd;
1512         sdd.replicate = flags->replicate;
1513         sdd.doall = flags->doall;
1514         sdd.fromorigin = flags->fromorigin;
1515         sdd.fss = fss;
1516         sdd.fsavl = fsavl;
1517         sdd.verbose = flags->verbose;
1518         sdd.parsable = flags->parsable;
1519         sdd.progress = flags->progress;
1520         sdd.dryrun = flags->dryrun;
1521         sdd.filter_cb = filter_func;
1522         sdd.filter_cb_arg = cb_arg;
1523         if (debugnvp)
1524                 sdd.debugnv = *debugnvp;
1525 
1526         /*
1527          * Some flags require that we place user holds on the datasets that are
1528          * being sent so they don't get destroyed during the send. We can skip
1529          * this step if the pool is imported read-only since the datasets cannot
1530          * be destroyed.
1531          */
1532         if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1533             ZPOOL_PROP_READONLY, NULL) &&
1534             zfs_spa_version(zhp, &spa_version) == 0 &&
1535             spa_version >= SPA_VERSION_USERREFS &&
1536             (flags->doall || flags->replicate)) {
1537                 ++holdseq;
1538                 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1539                     ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1540                 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1541                 if (sdd.cleanup_fd < 0) {
1542                         err = errno;
1543                         goto stderr_out;
1544                 }

1545         } else {
1546                 sdd.cleanup_fd = -1;

1547         }
1548         if (flags->verbose) {
1549                 /*
1550                  * Do a verbose no-op dry run to get all the verbose output
1551                  * before generating any data.  Then do a non-verbose real
1552                  * run to generate the streams.
1553                  */
1554                 sdd.dryrun = B_TRUE;
1555                 err = dump_filesystems(zhp, &sdd);
1556                 sdd.dryrun = flags->dryrun;
1557                 sdd.verbose = B_FALSE;



1558                 if (flags->parsable) {
1559                         (void) fprintf(stderr, "size\t%llu\n",
1560                             (longlong_t)sdd.size);
1561                 } else {
1562                         char buf[16];
1563                         zfs_nicenum(sdd.size, buf, sizeof (buf));
1564                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1565                             "total estimated size is %s\n"), buf);
1566                 }
1567         }























1568         err = dump_filesystems(zhp, &sdd);
1569         fsavl_destroy(fsavl);
1570         nvlist_free(fss);
1571 
1572         if (flags->dedup) {
1573                 (void) close(pipefd[0]);





1574                 (void) pthread_join(tid, NULL);

1575         }
1576 
1577         if (sdd.cleanup_fd != -1) {
1578                 VERIFY(0 == close(sdd.cleanup_fd));
1579                 sdd.cleanup_fd = -1;
1580         }
1581 
1582         if (!flags->dryrun && (flags->replicate || flags->doall ||
1583             flags->props)) {
1584                 /*
1585                  * write final end record.  NB: want to do this even if
1586                  * there was some error, because it might not be totally
1587                  * failed.
1588                  */
1589                 dmu_replay_record_t drr = { 0 };
1590                 drr.drr_type = DRR_END;
1591                 if (write(outfd, &drr, sizeof (drr)) == -1) {
1592                         return (zfs_standard_error(zhp->zfs_hdl,
1593                             errno, errbuf));
1594                 }
1595         }
1596 
1597         return (err || sdd.err);
1598 
1599 stderr_out:
1600         err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1601 err_out:




1602         if (sdd.cleanup_fd != -1)
1603                 VERIFY(0 == close(sdd.cleanup_fd));
1604         if (flags->dedup) {
1605                 (void) pthread_cancel(tid);
1606                 (void) pthread_join(tid, NULL);
1607                 (void) close(pipefd[0]);
1608         }
1609         return (err);
1610 }
1611 
1612 /*
1613  * Routines specific to "zfs recv"
1614  */
1615 
1616 static int
1617 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
1618     boolean_t byteswap, zio_cksum_t *zc)
1619 {
1620         char *cp = buf;
1621         int rv;
1622         int len = ilen;
1623 
1624         do {




 776         }
 777 
 778         *nvlp = sd.fss;
 779         return (0);
 780 }
 781 
 782 /*
 783  * Routines specific to "zfs send"
 784  */
 785 typedef struct send_dump_data {
 786         /* these are all just the short snapname (the part after the @) */
 787         const char *fromsnap;
 788         const char *tosnap;
 789         char prevsnap[ZFS_MAXNAMELEN];
 790         uint64_t prevsnap_obj;
 791         boolean_t seenfrom, seento, replicate, doall, fromorigin;
 792         boolean_t verbose, dryrun, parsable, progress;
 793         int outfd;
 794         boolean_t err;
 795         nvlist_t *fss;
 796         nvlist_t *snapholds;
 797         avl_tree_t *fsavl;
 798         snapfilter_cb_t *filter_cb;
 799         void *filter_cb_arg;
 800         nvlist_t *debugnv;
 801         char holdtag[ZFS_MAXNAMELEN];
 802         int cleanup_fd;
 803         uint64_t size;
 804 } send_dump_data_t;
 805 
 806 static int
 807 estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
 808     boolean_t fromorigin, uint64_t *sizep)
 809 {
 810         zfs_cmd_t zc = { 0 };
 811         libzfs_handle_t *hdl = zhp->zfs_hdl;
 812 
 813         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 814         assert(fromsnap_obj == 0 || !fromorigin);
 815 
 816         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));


 926                 case ENXIO:
 927                 case EPIPE:
 928                 case ERANGE:
 929                 case EFAULT:
 930                 case EROFS:
 931                         zfs_error_aux(hdl, strerror(errno));
 932                         return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 933 
 934                 default:
 935                         return (zfs_standard_error(hdl, errno, errbuf));
 936                 }
 937         }
 938 
 939         if (debugnv)
 940                 VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
 941         nvlist_free(thisdbg);
 942 
 943         return (0);
 944 }
 945 
 946 static void
 947 gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
 948 {




 949         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 950 



 951         /*
 952          * zfs_send() only sets snapholds for sends that need them,
 953          * e.g. replication and doall.
 954          */
 955         if (sdd->snapholds == NULL)
 956                 return;















 957 
 958         fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
 959 }
 960 
 961 static void *
 962 send_progress_thread(void *arg)
 963 {
 964         progress_arg_t *pa = arg;
 965 
 966         zfs_cmd_t zc = { 0 };
 967         zfs_handle_t *zhp = pa->pa_zhp;
 968         libzfs_handle_t *hdl = zhp->zfs_hdl;
 969         unsigned long long bytes;
 970         char buf[16];
 971 
 972         time_t t;
 973         struct tm *tm;
 974 
 975         assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 976         (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 977 
 978         if (!pa->pa_parsable)


 994 
 995                 if (pa->pa_parsable) {
 996                         (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
 997                             tm->tm_hour, tm->tm_min, tm->tm_sec,
 998                             bytes, zhp->zfs_name);
 999                 } else {
1000                         zfs_nicenum(bytes, buf, sizeof (buf));
1001                         (void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1002                             tm->tm_hour, tm->tm_min, tm->tm_sec,
1003                             buf, zhp->zfs_name);
1004                 }
1005         }
1006 }
1007 
1008 static int
1009 dump_snapshot(zfs_handle_t *zhp, void *arg)
1010 {
1011         send_dump_data_t *sdd = arg;
1012         progress_arg_t pa = { 0 };
1013         pthread_t tid;

1014         char *thissnap;
1015         int err;
1016         boolean_t isfromsnap, istosnap, fromorigin;
1017         boolean_t exclude = B_FALSE;
1018 
1019         err = 0;
1020         thissnap = strchr(zhp->zfs_name, '@') + 1;
1021         isfromsnap = (sdd->fromsnap != NULL &&
1022             strcmp(sdd->fromsnap, thissnap) == 0);
1023 
1024         if (!sdd->seenfrom && isfromsnap) {
1025                 gather_holds(zhp, sdd);

1026                 sdd->seenfrom = B_TRUE;
1027                 (void) strcpy(sdd->prevsnap, thissnap);
1028                 sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);




1029                 zfs_close(zhp);
1030                 return (0);
1031         }
1032 
1033         if (sdd->seento || !sdd->seenfrom) {
1034                 zfs_close(zhp);
1035                 return (0);
1036         }
1037 
1038         istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1039         if (istosnap)
1040                 sdd->seento = B_TRUE;
1041 
1042         if (!sdd->doall && !isfromsnap && !istosnap) {
1043                 if (sdd->replicate) {
1044                         char *snapname;
1045                         nvlist_t *snapprops;
1046                         /*
1047                          * Filter out all intermediate snapshots except origin
1048                          * snapshots needed to replicate clones.
1049                          */
1050                         nvlist_t *nvfs = fsavl_find(sdd->fsavl,


1061         }
1062 
1063         /*
1064          * If a filter function exists, call it to determine whether
1065          * this snapshot will be sent.
1066          */
1067         if (exclude || (sdd->filter_cb != NULL &&
1068             sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1069                 /*
1070                  * This snapshot is filtered out.  Don't send it, and don't
1071                  * set prevsnap_obj, so it will be as if this snapshot didn't
1072                  * exist, and the next accepted snapshot will be sent as
1073                  * an incremental from the last accepted one, or as the
1074                  * first (and full) snapshot in the case of a replication,
1075                  * non-incremental send.
1076                  */
1077                 zfs_close(zhp);
1078                 return (0);
1079         }
1080 
1081         gather_holds(zhp, sdd);







1082         fromorigin = sdd->prevsnap[0] == '\0' &&
1083             (sdd->fromorigin || sdd->replicate);
1084 
1085         if (sdd->verbose) {
1086                 uint64_t size;
1087                 err = estimate_ioctl(zhp, sdd->prevsnap_obj,
1088                     fromorigin, &size);
1089 
1090                 if (sdd->parsable) {
1091                         if (sdd->prevsnap[0] != '\0') {
1092                                 (void) fprintf(stderr, "incremental\t%s\t%s",
1093                                     sdd->prevsnap, zhp->zfs_name);
1094                         } else {
1095                                 (void) fprintf(stderr, "full\t%s",
1096                                     zhp->zfs_name);
1097                         }
1098                 } else {
1099                         (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1100                             "send from @%s to %s"),
1101                             sdd->prevsnap, zhp->zfs_name);


1329  *       is TRUE.
1330  *
1331  * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1332  * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1333  * if "replicate" is set.  If "doall" is set, dump all the intermediate
1334  * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1335  * case too. If "props" is set, send properties.
1336  */
1337 int
1338 zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1339     sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1340     void *cb_arg, nvlist_t **debugnvp)
1341 {
1342         char errbuf[1024];
1343         send_dump_data_t sdd = { 0 };
1344         int err = 0;
1345         nvlist_t *fss = NULL;
1346         avl_tree_t *fsavl = NULL;
1347         static uint64_t holdseq;
1348         int spa_version;
1349         pthread_t tid = 0;
1350         int pipefd[2];
1351         dedup_arg_t dda = { 0 };
1352         int featureflags = 0;
1353 
1354         (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1355             "cannot send '%s'"), zhp->zfs_name);
1356 
1357         if (fromsnap && fromsnap[0] == '\0') {
1358                 zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1359                     "zero-length incremental source"));
1360                 return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1361         }
1362 
1363         if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1364                 uint64_t version;
1365                 version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1366                 if (version >= ZPL_VERSION_SA) {
1367                         featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1368                 }
1369         }


1402                                 VERIFY(0 == nvlist_add_string(hdrnv,
1403                                     "fromsnap", fromsnap));
1404                         }
1405                         VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1406                         if (!flags->replicate) {
1407                                 VERIFY(0 == nvlist_add_boolean(hdrnv,
1408                                     "not_recursive"));
1409                         }
1410 
1411                         err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1412                             fromsnap, tosnap, flags->replicate, &fss, &fsavl);
1413                         if (err)
1414                                 goto err_out;
1415                         VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1416                         err = nvlist_pack(hdrnv, &packbuf, &buflen,
1417                             NV_ENCODE_XDR, 0);
1418                         if (debugnvp)
1419                                 *debugnvp = hdrnv;
1420                         else
1421                                 nvlist_free(hdrnv);
1422                         if (err)


1423                                 goto stderr_out;
1424                 }

1425 
1426                 if (!flags->dryrun) {
1427                         /* write first begin record */
1428                         drr.drr_type = DRR_BEGIN;
1429                         drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1430                         DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1431                             drr_versioninfo, DMU_COMPOUNDSTREAM);
1432                         DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1433                             drr_versioninfo, featureflags);
1434                         (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1435                             sizeof (drr.drr_u.drr_begin.drr_toname),
1436                             "%s@%s", zhp->zfs_name, tosnap);
1437                         drr.drr_payloadlen = buflen;
1438                         err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1439 
1440                         /* write header nvlist */
1441                         if (err != -1 && packbuf != NULL) {
1442                                 err = cksum_and_write(packbuf, buflen, &zc,
1443                                     outfd);
1444                         }
1445                         free(packbuf);
1446                         if (err == -1) {


1447                                 err = errno;
1448                                 goto stderr_out;
1449                         }
1450 
1451                         /* write end record */
1452                         bzero(&drr, sizeof (drr));
1453                         drr.drr_type = DRR_END;
1454                         drr.drr_u.drr_end.drr_checksum = zc;
1455                         err = write(outfd, &drr, sizeof (drr));
1456                         if (err == -1) {


1457                                 err = errno;
1458                                 goto stderr_out;
1459                         }
1460 
1461                         err = 0;
1462                 }
1463         }
1464 
1465         /* dump each stream */
1466         sdd.fromsnap = fromsnap;
1467         sdd.tosnap = tosnap;
1468         if (tid != 0)
1469                 sdd.outfd = pipefd[0];
1470         else
1471                 sdd.outfd = outfd;
1472         sdd.replicate = flags->replicate;
1473         sdd.doall = flags->doall;
1474         sdd.fromorigin = flags->fromorigin;
1475         sdd.fss = fss;
1476         sdd.fsavl = fsavl;
1477         sdd.verbose = flags->verbose;
1478         sdd.parsable = flags->parsable;
1479         sdd.progress = flags->progress;
1480         sdd.dryrun = flags->dryrun;
1481         sdd.filter_cb = filter_func;
1482         sdd.filter_cb_arg = cb_arg;
1483         if (debugnvp)
1484                 sdd.debugnv = *debugnvp;
1485 
1486         /*
1487          * Some flags require that we place user holds on the datasets that are
1488          * being sent so they don't get destroyed during the send. We can skip
1489          * this step if the pool is imported read-only since the datasets cannot
1490          * be destroyed.
1491          */
1492         if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1493             ZPOOL_PROP_READONLY, NULL) &&
1494             zfs_spa_version(zhp, &spa_version) == 0 &&
1495             spa_version >= SPA_VERSION_USERREFS &&
1496             (flags->doall || flags->replicate)) {
1497                 ++holdseq;
1498                 (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1499                     ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1500                 sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1501                 if (sdd.cleanup_fd < 0) {
1502                         err = errno;
1503                         goto stderr_out;
1504                 }
1505                 sdd.snapholds = fnvlist_alloc();
1506         } else {
1507                 sdd.cleanup_fd = -1;
1508                 sdd.snapholds = NULL;
1509         }
1510         if (flags->verbose || sdd.snapholds != NULL) {
1511                 /*
1512                  * Do a verbose no-op dry run to get all the verbose output
1513                  * or to gather snapshot hold's before generating any data,
1514                  * then do a non-verbose real run to generate the streams.
1515                  */
1516                 sdd.dryrun = B_TRUE;
1517                 err = dump_filesystems(zhp, &sdd);
1518         
1519                 if (err != 0)
1520                         goto stderr_out;
1521 
1522                 if (flags->verbose) {
1523                         if (flags->parsable) {
1524                                 (void) fprintf(stderr, "size\t%llu\n",
1525                                     (longlong_t)sdd.size);
1526                         } else {
1527                                 char buf[16];
1528                                 zfs_nicenum(sdd.size, buf, sizeof (buf));
1529                                 (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1530                                     "total estimated size is %s\n"), buf);
1531                         }
1532                 }
1533 
1534                 /* Ensure no snaps found is treated as an error. */
1535                 if (!sdd.seento) {
1536                         err = ENOENT;
1537                         goto err_out;
1538                 }
1539 
1540                 /* Skip the second run if dryrun was requested. */
1541                 if (flags->dryrun)
1542                         goto err_out;
1543         
1544                 if (sdd.snapholds != NULL) {
1545                         err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
1546                         if (err != 0)
1547                                 goto stderr_out;
1548                         fnvlist_free(sdd.snapholds);
1549                         sdd.snapholds = NULL;
1550                 }
1551 
1552                 sdd.dryrun = B_FALSE;
1553                 sdd.verbose = B_FALSE;
1554         }
1555         
1556         err = dump_filesystems(zhp, &sdd);
1557         fsavl_destroy(fsavl);
1558         nvlist_free(fss);
1559 
1560         /* Ensure no snaps found is treated as an error. */
1561         if (err == 0 && !sdd.seento)
1562                 err = ENOENT;
1563 
1564         if (tid != 0) {
1565                 if (err != 0)
1566                         (void) pthread_cancel(tid);
1567                 (void) pthread_join(tid, NULL);
1568                 (void) close(pipefd[0]);
1569         }
1570 
1571         if (sdd.cleanup_fd != -1) {
1572                 VERIFY(0 == close(sdd.cleanup_fd));
1573                 sdd.cleanup_fd = -1;
1574         }
1575 
1576         if (!flags->dryrun && (flags->replicate || flags->doall ||
1577             flags->props)) {
1578                 /*
1579                  * write final end record.  NB: want to do this even if
1580                  * there was some error, because it might not be totally
1581                  * failed.
1582                  */
1583                 dmu_replay_record_t drr = { 0 };
1584                 drr.drr_type = DRR_END;
1585                 if (write(outfd, &drr, sizeof (drr)) == -1) {
1586                         return (zfs_standard_error(zhp->zfs_hdl,
1587                             errno, errbuf));
1588                 }
1589         }
1590 
1591         return (err || sdd.err);
1592 
1593 stderr_out:
1594         err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1595 err_out:
1596         fsavl_destroy(fsavl);
1597         nvlist_free(fss);
1598         fnvlist_free(sdd.snapholds);
1599 
1600         if (sdd.cleanup_fd != -1)
1601                 VERIFY(0 == close(sdd.cleanup_fd));
1602         if (tid != 0) {
1603                 (void) pthread_cancel(tid);
1604                 (void) pthread_join(tid, NULL);
1605                 (void) close(pipefd[0]);
1606         }
1607         return (err);
1608 }
1609 
1610 /*
1611  * Routines specific to "zfs recv"
1612  */
1613 
1614 static int
1615 recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
1616     boolean_t byteswap, zio_cksum_t *zc)
1617 {
1618         char *cp = buf;
1619         int rv;
1620         int len = ilen;
1621 
1622         do {