Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>

Split Close
Expand all
Collapse all
          --- old/usr/src/lib/libzfs/common/libzfs_sendrecv.c
          +++ new/usr/src/lib/libzfs/common/libzfs_sendrecv.c
↓ open down ↓ 785 lines elided ↑ open up ↑
 786  786          /* these are all just the short snapname (the part after the @) */
 787  787          const char *fromsnap;
 788  788          const char *tosnap;
 789  789          char prevsnap[ZFS_MAXNAMELEN];
 790  790          uint64_t prevsnap_obj;
 791  791          boolean_t seenfrom, seento, replicate, doall, fromorigin;
 792  792          boolean_t verbose, dryrun, parsable, progress;
 793  793          int outfd;
 794  794          boolean_t err;
 795  795          nvlist_t *fss;
      796 +        nvlist_t *snapholds;
 796  797          avl_tree_t *fsavl;
 797  798          snapfilter_cb_t *filter_cb;
 798  799          void *filter_cb_arg;
 799  800          nvlist_t *debugnv;
 800  801          char holdtag[ZFS_MAXNAMELEN];
 801  802          int cleanup_fd;
 802  803          uint64_t size;
 803  804  } send_dump_data_t;
 804  805  
 805  806  static int
↓ open down ↓ 129 lines elided ↑ open up ↑
 935  936                  }
 936  937          }
 937  938  
 938  939          if (debugnv)
 939  940                  VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
 940  941          nvlist_free(thisdbg);
 941  942  
 942  943          return (0);
 943  944  }
 944  945  
 945      -static int
 946      -hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
      946 +static void
      947 +gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
 947  948  {
 948      -        zfs_handle_t *pzhp;
 949      -        int error = 0;
 950      -        char *thissnap;
 951      -
 952  949          assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 953  950  
 954      -        if (sdd->dryrun)
 955      -                return (0);
 956      -
 957  951          /*
 958      -         * zfs_send() only opens a cleanup_fd for sends that need it,
      952 +         * zfs_send() only sets snapholds for sends that need them,
 959  953           * e.g. replication and doall.
 960  954           */
 961      -        if (sdd->cleanup_fd == -1)
 962      -                return (0);
 963      -
 964      -        thissnap = strchr(zhp->zfs_name, '@') + 1;
 965      -        *(thissnap - 1) = '\0';
 966      -        pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
 967      -        *(thissnap - 1) = '@';
 968      -
 969      -        /*
 970      -         * It's OK if the parent no longer exists.  The send code will
 971      -         * handle that error.
 972      -         */
 973      -        if (pzhp) {
 974      -                error = zfs_hold(pzhp, thissnap, sdd->holdtag,
 975      -                    B_FALSE, B_TRUE, sdd->cleanup_fd);
 976      -                zfs_close(pzhp);
 977      -        }
      955 +        if (sdd->snapholds == NULL)
      956 +                return;
 978  957  
 979      -        return (error);
      958 +        fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
 980  959  }
 981  960  
 982  961  static void *
 983  962  send_progress_thread(void *arg)
 984  963  {
 985  964          progress_arg_t *pa = arg;
 986  965  
 987  966          zfs_cmd_t zc = { 0 };
 988  967          zfs_handle_t *zhp = pa->pa_zhp;
 989  968          libzfs_handle_t *hdl = zhp->zfs_hdl;
↓ open down ↓ 46 lines elided ↑ open up ↑
1036 1015          char *thissnap;
1037 1016          int err;
1038 1017          boolean_t isfromsnap, istosnap, fromorigin;
1039 1018          boolean_t exclude = B_FALSE;
1040 1019  
1041 1020          thissnap = strchr(zhp->zfs_name, '@') + 1;
1042 1021          isfromsnap = (sdd->fromsnap != NULL &&
1043 1022              strcmp(sdd->fromsnap, thissnap) == 0);
1044 1023  
1045 1024          if (!sdd->seenfrom && isfromsnap) {
1046      -                err = hold_for_send(zhp, sdd);
1047      -                if (err == 0) {
1048      -                        sdd->seenfrom = B_TRUE;
1049      -                        (void) strcpy(sdd->prevsnap, thissnap);
1050      -                        sdd->prevsnap_obj = zfs_prop_get_int(zhp,
1051      -                            ZFS_PROP_OBJSETID);
1052      -                } else if (err == ENOENT) {
1053      -                        err = 0;
1054      -                }
     1025 +                gather_holds(zhp, sdd);
     1026 +                sdd->seenfrom = B_TRUE;
     1027 +                (void) strcpy(sdd->prevsnap, thissnap);
     1028 +                sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1055 1029                  zfs_close(zhp);
1056      -                return (err);
     1030 +                return (0);
1057 1031          }
1058 1032  
1059 1033          if (sdd->seento || !sdd->seenfrom) {
1060 1034                  zfs_close(zhp);
1061 1035                  return (0);
1062 1036          }
1063 1037  
1064 1038          istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1065 1039          if (istosnap)
1066 1040                  sdd->seento = B_TRUE;
↓ open down ↓ 30 lines elided ↑ open up ↑
1097 1071                   * set prevsnap_obj, so it will be as if this snapshot didn't
1098 1072                   * exist, and the next accepted snapshot will be sent as
1099 1073                   * an incremental from the last accepted one, or as the
1100 1074                   * first (and full) snapshot in the case of a replication,
1101 1075                   * non-incremental send.
1102 1076                   */
1103 1077                  zfs_close(zhp);
1104 1078                  return (0);
1105 1079          }
1106 1080  
1107      -        err = hold_for_send(zhp, sdd);
1108      -        if (err) {
1109      -                if (err == ENOENT)
1110      -                        err = 0;
1111      -                zfs_close(zhp);
1112      -                return (err);
1113      -        }
1114      -
     1081 +        gather_holds(zhp, sdd);
1115 1082          fromorigin = sdd->prevsnap[0] == '\0' &&
1116 1083              (sdd->fromorigin || sdd->replicate);
1117 1084  
1118 1085          if (sdd->verbose) {
1119 1086                  uint64_t size;
1120 1087                  err = estimate_ioctl(zhp, sdd->prevsnap_obj,
1121 1088                      fromorigin, &size);
1122 1089  
1123 1090                  if (sdd->parsable) {
1124 1091                          if (sdd->prevsnap[0] != '\0') {
↓ open down ↓ 410 lines elided ↑ open up ↑
1535 1502              spa_version >= SPA_VERSION_USERREFS &&
1536 1503              (flags->doall || flags->replicate)) {
1537 1504                  ++holdseq;
1538 1505                  (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1539 1506                      ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1540 1507                  sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1541 1508                  if (sdd.cleanup_fd < 0) {
1542 1509                          err = errno;
1543 1510                          goto stderr_out;
1544 1511                  }
     1512 +                sdd.snapholds = fnvlist_alloc();
1545 1513          } else {
1546 1514                  sdd.cleanup_fd = -1;
     1515 +                sdd.snapholds = NULL;
1547 1516          }
1548 1517          if (flags->verbose) {
1549 1518                  /*
1550 1519                   * Do a verbose no-op dry run to get all the verbose output
1551 1520                   * before generating any data.  Then do a non-verbose real
1552 1521                   * run to generate the streams.
1553 1522                   */
1554 1523                  sdd.dryrun = B_TRUE;
1555 1524                  err = dump_filesystems(zhp, &sdd);
1556 1525                  sdd.dryrun = flags->dryrun;
↓ open down ↓ 1 lines elided ↑ open up ↑
1558 1527                  if (flags->parsable) {
1559 1528                          (void) fprintf(stderr, "size\t%llu\n",
1560 1529                              (longlong_t)sdd.size);
1561 1530                  } else {
1562 1531                          char buf[16];
1563 1532                          zfs_nicenum(sdd.size, buf, sizeof (buf));
1564 1533                          (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1565 1534                              "total estimated size is %s\n"), buf);
1566 1535                  }
1567 1536          }
     1537 +
     1538 +        if (sdd.snapholds != NULL) {
     1539 +                /* Holds are required. */
     1540 +                if (!flags->verbose) {
     1541 +                        /*
     1542 +                         * A verbose dry run wasn't done so do a non-verbose
     1543 +                         * dry run to gather snapshot hold's.
     1544 +                         */
     1545 +                        sdd.dryrun = B_TRUE;
     1546 +                        err = dump_filesystems(zhp, &sdd);
     1547 +                        sdd.dryrun = flags->dryrun;
     1548 +                }
     1549 +
     1550 +                if (err != 0) {
     1551 +                        fnvlist_free(sdd.snapholds);
     1552 +                        goto stderr_out;
     1553 +                }
     1554 +
     1555 +                err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
     1556 +                fnvlist_free(sdd.snapholds);
     1557 +                if (err != 0)
     1558 +                        goto stderr_out;
     1559 +        }
     1560 +        
1568 1561          err = dump_filesystems(zhp, &sdd);
1569 1562          fsavl_destroy(fsavl);
1570 1563          nvlist_free(fss);
1571 1564  
1572 1565          if (flags->dedup) {
1573 1566                  (void) close(pipefd[0]);
1574 1567                  (void) pthread_join(tid, NULL);
1575 1568          }
1576 1569  
1577 1570          if (sdd.cleanup_fd != -1) {
↓ open down ↓ 1618 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX