Print this page
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>

Split Close
Expand all
Collapse all
          --- old/usr/src/lib/libzfs/common/libzfs_sendrecv.c
          +++ new/usr/src/lib/libzfs/common/libzfs_sendrecv.c
↓ open down ↓ 785 lines elided ↑ open up ↑
 786  786          /* these are all just the short snapname (the part after the @) */
 787  787          const char *fromsnap;
 788  788          const char *tosnap;
 789  789          char prevsnap[ZFS_MAXNAMELEN];
 790  790          uint64_t prevsnap_obj;
 791  791          boolean_t seenfrom, seento, replicate, doall, fromorigin;
 792  792          boolean_t verbose, dryrun, parsable, progress;
 793  793          int outfd;
 794  794          boolean_t err;
 795  795          nvlist_t *fss;
      796 +        nvlist_t *snapholds;
 796  797          avl_tree_t *fsavl;
 797  798          snapfilter_cb_t *filter_cb;
 798  799          void *filter_cb_arg;
 799  800          nvlist_t *debugnv;
 800  801          char holdtag[ZFS_MAXNAMELEN];
 801  802          int cleanup_fd;
 802  803          uint64_t size;
 803  804  } send_dump_data_t;
 804  805  
 805  806  static int
↓ open down ↓ 129 lines elided ↑ open up ↑
 935  936                  }
 936  937          }
 937  938  
 938  939          if (debugnv)
 939  940                  VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
 940  941          nvlist_free(thisdbg);
 941  942  
 942  943          return (0);
 943  944  }
 944  945  
 945      -static int
 946      -hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
      946 +static void
      947 +gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
 947  948  {
 948      -        zfs_handle_t *pzhp;
 949      -        int error = 0;
 950      -        char *thissnap;
 951      -
 952  949          assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 953  950  
 954      -        if (sdd->dryrun)
 955      -                return (0);
 956      -
 957  951          /*
 958      -         * zfs_send() only opens a cleanup_fd for sends that need it,
      952 +         * zfs_send() only sets snapholds for sends that need them,
 959  953           * e.g. replication and doall.
 960  954           */
 961      -        if (sdd->cleanup_fd == -1)
 962      -                return (0);
 963      -
 964      -        thissnap = strchr(zhp->zfs_name, '@') + 1;
 965      -        *(thissnap - 1) = '\0';
 966      -        pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
 967      -        *(thissnap - 1) = '@';
 968      -
 969      -        /*
 970      -         * It's OK if the parent no longer exists.  The send code will
 971      -         * handle that error.
 972      -         */
 973      -        if (pzhp) {
 974      -                error = zfs_hold(pzhp, thissnap, sdd->holdtag,
 975      -                    B_FALSE, B_TRUE, sdd->cleanup_fd);
 976      -                zfs_close(pzhp);
 977      -        }
      955 +        if (sdd->snapholds == NULL)
      956 +                return;
 978  957  
 979      -        return (error);
      958 +        fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
 980  959  }
 981  960  
 982  961  static void *
 983  962  send_progress_thread(void *arg)
 984  963  {
 985  964          progress_arg_t *pa = arg;
 986  965  
 987  966          zfs_cmd_t zc = { 0 };
 988  967          zfs_handle_t *zhp = pa->pa_zhp;
 989  968          libzfs_handle_t *hdl = zhp->zfs_hdl;
↓ open down ↓ 35 lines elided ↑ open up ↑
1025 1004                  }
1026 1005          }
1027 1006  }
1028 1007  
1029 1008  static int
1030 1009  dump_snapshot(zfs_handle_t *zhp, void *arg)
1031 1010  {
1032 1011          send_dump_data_t *sdd = arg;
1033 1012          progress_arg_t pa = { 0 };
1034 1013          pthread_t tid;
1035      -
1036 1014          char *thissnap;
1037 1015          int err;
1038 1016          boolean_t isfromsnap, istosnap, fromorigin;
1039 1017          boolean_t exclude = B_FALSE;
1040 1018  
     1019 +        err = 0;
1041 1020          thissnap = strchr(zhp->zfs_name, '@') + 1;
1042 1021          isfromsnap = (sdd->fromsnap != NULL &&
1043 1022              strcmp(sdd->fromsnap, thissnap) == 0);
1044 1023  
1045 1024          if (!sdd->seenfrom && isfromsnap) {
1046      -                err = hold_for_send(zhp, sdd);
1047      -                if (err == 0) {
1048      -                        sdd->seenfrom = B_TRUE;
1049      -                        (void) strcpy(sdd->prevsnap, thissnap);
1050      -                        sdd->prevsnap_obj = zfs_prop_get_int(zhp,
1051      -                            ZFS_PROP_OBJSETID);
1052      -                } else if (err == ENOENT) {
1053      -                        err = 0;
1054      -                }
     1025 +                gather_holds(zhp, sdd);
     1026 +                sdd->seenfrom = B_TRUE;
     1027 +                (void) strcpy(sdd->prevsnap, thissnap);
     1028 +                sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1055 1029                  zfs_close(zhp);
1056      -                return (err);
     1030 +                return (0);
1057 1031          }
1058 1032  
1059 1033          if (sdd->seento || !sdd->seenfrom) {
1060 1034                  zfs_close(zhp);
1061 1035                  return (0);
1062 1036          }
1063 1037  
1064 1038          istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1065 1039          if (istosnap)
1066 1040                  sdd->seento = B_TRUE;
↓ open down ↓ 30 lines elided ↑ open up ↑
1097 1071                   * set prevsnap_obj, so it will be as if this snapshot didn't
1098 1072                   * exist, and the next accepted snapshot will be sent as
1099 1073                   * an incremental from the last accepted one, or as the
1100 1074                   * first (and full) snapshot in the case of a replication,
1101 1075                   * non-incremental send.
1102 1076                   */
1103 1077                  zfs_close(zhp);
1104 1078                  return (0);
1105 1079          }
1106 1080  
1107      -        err = hold_for_send(zhp, sdd);
1108      -        if (err) {
1109      -                if (err == ENOENT)
1110      -                        err = 0;
1111      -                zfs_close(zhp);
1112      -                return (err);
1113      -        }
1114      -
     1081 +        gather_holds(zhp, sdd);
1115 1082          fromorigin = sdd->prevsnap[0] == '\0' &&
1116 1083              (sdd->fromorigin || sdd->replicate);
1117 1084  
1118 1085          if (sdd->verbose) {
1119 1086                  uint64_t size;
1120 1087                  err = estimate_ioctl(zhp, sdd->prevsnap_obj,
1121 1088                      fromorigin, &size);
1122 1089  
1123 1090                  if (sdd->parsable) {
1124 1091                          if (sdd->prevsnap[0] != '\0') {
↓ open down ↓ 247 lines elided ↑ open up ↑
1372 1339      sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1373 1340      void *cb_arg, nvlist_t **debugnvp)
1374 1341  {
1375 1342          char errbuf[1024];
1376 1343          send_dump_data_t sdd = { 0 };
1377 1344          int err = 0;
1378 1345          nvlist_t *fss = NULL;
1379 1346          avl_tree_t *fsavl = NULL;
1380 1347          static uint64_t holdseq;
1381 1348          int spa_version;
1382      -        pthread_t tid;
     1349 +        pthread_t tid = 0;
1383 1350          int pipefd[2];
1384 1351          dedup_arg_t dda = { 0 };
1385 1352          int featureflags = 0;
1386 1353  
1387 1354          (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1388 1355              "cannot send '%s'"), zhp->zfs_name);
1389 1356  
1390 1357          if (fromsnap && fromsnap[0] == '\0') {
1391 1358                  zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1392 1359                      "zero-length incremental source"));
↓ open down ↓ 52 lines elided ↑ open up ↑
1445 1412                              fromsnap, tosnap, flags->replicate, &fss, &fsavl);
1446 1413                          if (err)
1447 1414                                  goto err_out;
1448 1415                          VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1449 1416                          err = nvlist_pack(hdrnv, &packbuf, &buflen,
1450 1417                              NV_ENCODE_XDR, 0);
1451 1418                          if (debugnvp)
1452 1419                                  *debugnvp = hdrnv;
1453 1420                          else
1454 1421                                  nvlist_free(hdrnv);
1455      -                        if (err) {
1456      -                                fsavl_destroy(fsavl);
1457      -                                nvlist_free(fss);
     1422 +                        if (err)
1458 1423                                  goto stderr_out;
1459      -                        }
1460 1424                  }
1461 1425  
1462 1426                  if (!flags->dryrun) {
1463 1427                          /* write first begin record */
1464 1428                          drr.drr_type = DRR_BEGIN;
1465 1429                          drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1466 1430                          DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1467 1431                              drr_versioninfo, DMU_COMPOUNDSTREAM);
1468 1432                          DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1469 1433                              drr_versioninfo, featureflags);
↓ open down ↓ 3 lines elided ↑ open up ↑
1473 1437                          drr.drr_payloadlen = buflen;
1474 1438                          err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1475 1439  
1476 1440                          /* write header nvlist */
1477 1441                          if (err != -1 && packbuf != NULL) {
1478 1442                                  err = cksum_and_write(packbuf, buflen, &zc,
1479 1443                                      outfd);
1480 1444                          }
1481 1445                          free(packbuf);
1482 1446                          if (err == -1) {
1483      -                                fsavl_destroy(fsavl);
1484      -                                nvlist_free(fss);
1485 1447                                  err = errno;
1486 1448                                  goto stderr_out;
1487 1449                          }
1488 1450  
1489 1451                          /* write end record */
1490 1452                          bzero(&drr, sizeof (drr));
1491 1453                          drr.drr_type = DRR_END;
1492 1454                          drr.drr_u.drr_end.drr_checksum = zc;
1493 1455                          err = write(outfd, &drr, sizeof (drr));
1494 1456                          if (err == -1) {
1495      -                                fsavl_destroy(fsavl);
1496      -                                nvlist_free(fss);
1497 1457                                  err = errno;
1498 1458                                  goto stderr_out;
1499 1459                          }
1500 1460  
1501 1461                          err = 0;
1502 1462                  }
1503 1463          }
1504 1464  
1505 1465          /* dump each stream */
1506 1466          sdd.fromsnap = fromsnap;
1507 1467          sdd.tosnap = tosnap;
1508      -        if (flags->dedup)
     1468 +        if (tid != 0)
1509 1469                  sdd.outfd = pipefd[0];
1510 1470          else
1511 1471                  sdd.outfd = outfd;
1512 1472          sdd.replicate = flags->replicate;
1513 1473          sdd.doall = flags->doall;
1514 1474          sdd.fromorigin = flags->fromorigin;
1515 1475          sdd.fss = fss;
1516 1476          sdd.fsavl = fsavl;
1517 1477          sdd.verbose = flags->verbose;
1518 1478          sdd.parsable = flags->parsable;
↓ open down ↓ 16 lines elided ↑ open up ↑
1535 1495              spa_version >= SPA_VERSION_USERREFS &&
1536 1496              (flags->doall || flags->replicate)) {
1537 1497                  ++holdseq;
1538 1498                  (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1539 1499                      ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1540 1500                  sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1541 1501                  if (sdd.cleanup_fd < 0) {
1542 1502                          err = errno;
1543 1503                          goto stderr_out;
1544 1504                  }
     1505 +                sdd.snapholds = fnvlist_alloc();
1545 1506          } else {
1546 1507                  sdd.cleanup_fd = -1;
     1508 +                sdd.snapholds = NULL;
1547 1509          }
1548      -        if (flags->verbose) {
     1510 +        if (flags->verbose || sdd.snapholds != NULL) {
1549 1511                  /*
1550 1512                   * Do a verbose no-op dry run to get all the verbose output
1551      -                 * before generating any data.  Then do a non-verbose real
1552      -                 * run to generate the streams.
     1513 +                 * or to gather snapshot hold's before generating any data,
     1514 +                 * then do a non-verbose real run to generate the streams.
1553 1515                   */
1554 1516                  sdd.dryrun = B_TRUE;
1555 1517                  err = dump_filesystems(zhp, &sdd);
1556      -                sdd.dryrun = flags->dryrun;
1557      -                sdd.verbose = B_FALSE;
1558      -                if (flags->parsable) {
1559      -                        (void) fprintf(stderr, "size\t%llu\n",
1560      -                            (longlong_t)sdd.size);
1561      -                } else {
1562      -                        char buf[16];
1563      -                        zfs_nicenum(sdd.size, buf, sizeof (buf));
1564      -                        (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1565      -                            "total estimated size is %s\n"), buf);
     1518 +        
     1519 +                if (err != 0)
     1520 +                        goto stderr_out;
     1521 +
     1522 +                if (flags->verbose) {
     1523 +                        if (flags->parsable) {
     1524 +                                (void) fprintf(stderr, "size\t%llu\n",
     1525 +                                    (longlong_t)sdd.size);
     1526 +                        } else {
     1527 +                                char buf[16];
     1528 +                                zfs_nicenum(sdd.size, buf, sizeof (buf));
     1529 +                                (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
     1530 +                                    "total estimated size is %s\n"), buf);
     1531 +                        }
     1532 +                }
     1533 +
     1534 +                /* Ensure no snaps found is treated as an error. */
     1535 +                if (!sdd.seento) {
     1536 +                        err = ENOENT;
     1537 +                        goto err_out;
1566 1538                  }
     1539 +
     1540 +                /* Skip the second run if dryrun was requested. */
     1541 +                if (flags->dryrun)
     1542 +                        goto err_out;
     1543 +        
     1544 +                if (sdd.snapholds != NULL) {
     1545 +                        err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
     1546 +                        if (err != 0)
     1547 +                                goto stderr_out;
     1548 +                        fnvlist_free(sdd.snapholds);
     1549 +                        sdd.snapholds = NULL;
     1550 +                }
     1551 +
     1552 +                sdd.dryrun = B_FALSE;
     1553 +                sdd.verbose = B_FALSE;
1567 1554          }
     1555 +        
1568 1556          err = dump_filesystems(zhp, &sdd);
1569 1557          fsavl_destroy(fsavl);
1570 1558          nvlist_free(fss);
1571 1559  
1572      -        if (flags->dedup) {
1573      -                (void) close(pipefd[0]);
     1560 +        /* Ensure no snaps found is treated as an error. */
     1561 +        if (err == 0 && !sdd.seento)
     1562 +                err = ENOENT;
     1563 +
     1564 +        if (tid != 0) {
     1565 +                if (err != 0)
     1566 +                        (void) pthread_cancel(tid);
1574 1567                  (void) pthread_join(tid, NULL);
1575      -        }
     1568 +                (void) close(pipefd[0]);
     1569 +        }
1576 1570  
1577 1571          if (sdd.cleanup_fd != -1) {
1578 1572                  VERIFY(0 == close(sdd.cleanup_fd));
1579 1573                  sdd.cleanup_fd = -1;
1580 1574          }
1581 1575  
1582 1576          if (!flags->dryrun && (flags->replicate || flags->doall ||
1583 1577              flags->props)) {
1584 1578                  /*
1585 1579                   * write final end record.  NB: want to do this even if
↓ open down ↓ 6 lines elided ↑ open up ↑
1592 1586                          return (zfs_standard_error(zhp->zfs_hdl,
1593 1587                              errno, errbuf));
1594 1588                  }
1595 1589          }
1596 1590  
1597 1591          return (err || sdd.err);
1598 1592  
1599 1593  stderr_out:
1600 1594          err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1601 1595  err_out:
     1596 +        fsavl_destroy(fsavl);
     1597 +        nvlist_free(fss);
     1598 +        fnvlist_free(sdd.snapholds);
     1599 +
1602 1600          if (sdd.cleanup_fd != -1)
1603 1601                  VERIFY(0 == close(sdd.cleanup_fd));
1604      -        if (flags->dedup) {
     1602 +        if (tid != 0) {
1605 1603                  (void) pthread_cancel(tid);
1606 1604                  (void) pthread_join(tid, NULL);
1607 1605                  (void) close(pipefd[0]);
1608 1606          }
1609 1607          return (err);
1610 1608  }
1611 1609  
1612 1610  /*
1613 1611   * Routines specific to "zfs recv"
1614 1612   */
↓ open down ↓ 1581 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX