Print this page
3956 ::vdev -r should work with pipelines
3957 ztest should update the cachefile before killing itself
3958 multiple scans can lead to partial resilvering
3959 ddt entries are not always resilvered
3960 dsl_scan can skip over dedup-ed blocks if physical birth != logical birth
3961 freed gang blocks are not resilvered and can cause pool to suspend
3962 ztest should print out zfs debug buffer before exiting
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/vdev.c
          +++ new/usr/src/uts/common/fs/zfs/vdev.c
↓ open down ↓ 513 lines elided ↑ open up ↑
 514  514                          uint64_t spare = 0;
 515  515  
 516  516                          if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
 517  517                              &spare) == 0 && spare)
 518  518                                  spa_spare_add(vd);
 519  519                  }
 520  520  
 521  521                  (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE,
 522  522                      &vd->vdev_offline);
 523  523  
 524      -                (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVERING,
 525      -                    &vd->vdev_resilvering);
      524 +                (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG,
      525 +                    &vd->vdev_resilver_txg);
 526  526  
 527  527                  /*
 528  528                   * When importing a pool, we want to ignore the persistent fault
 529  529                   * state, as the diagnosis made on another system may not be
 530  530                   * valid in the current context.  Local vdevs will
 531  531                   * remain in the faulted state.
 532  532                   */
 533  533                  if (spa_load_state(spa) == SPA_LOAD_OPEN) {
 534  534                          (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED,
 535  535                              &vd->vdev_faulted);
↓ open down ↓ 1120 lines elided ↑ open up ↑
1656 1656          boolean_t empty;
1657 1657  
1658 1658          mutex_enter(sm->sm_lock);
1659 1659          empty = (sm->sm_space == 0);
1660 1660          mutex_exit(sm->sm_lock);
1661 1661  
1662 1662          return (empty);
1663 1663  }
1664 1664  
1665 1665  /*
     1666 + * Returns the lowest txg in the DTL range.
     1667 + */
     1668 +static uint64_t
     1669 +vdev_dtl_min(vdev_t *vd)
     1670 +{
     1671 +        space_seg_t *ss;
     1672 +
     1673 +        ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock));
     1674 +        ASSERT3U(vd->vdev_dtl[DTL_MISSING].sm_space, !=, 0);
     1675 +        ASSERT0(vd->vdev_children);
     1676 +
     1677 +        ss = avl_first(&vd->vdev_dtl[DTL_MISSING].sm_root);
     1678 +        return (ss->ss_start - 1);
     1679 +}
     1680 +
     1681 +/*
     1682 + * Returns the highest txg in the DTL.
     1683 + */
     1684 +static uint64_t
     1685 +vdev_dtl_max(vdev_t *vd)
     1686 +{
     1687 +        space_seg_t *ss;
     1688 +
     1689 +        ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock));
     1690 +        ASSERT3U(vd->vdev_dtl[DTL_MISSING].sm_space, !=, 0);
     1691 +        ASSERT0(vd->vdev_children);
     1692 +
     1693 +        ss = avl_last(&vd->vdev_dtl[DTL_MISSING].sm_root);
     1694 +        return (ss->ss_end);
     1695 +}
     1696 +
     1697 +/*
     1698 + * Determine if a resilvering vdev should remove any DTL entries from
     1699 + * its range. If the vdev was resilvering for the entire duration of the
     1700 + * scan then it should excise that range from its DTLs. Otherwise, this
     1701 + * vdev is considered partially resilvered and should leave its DTL
     1702 + * entries intact. The comment in vdev_dtl_reassess() describes how we
     1703 + * excise the DTLs.
     1704 + */
     1705 +static boolean_t
     1706 +vdev_dtl_should_excise(vdev_t *vd)
     1707 +{
     1708 +        spa_t *spa = vd->vdev_spa;
     1709 +        dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
     1710 +
     1711 +        ASSERT0(scn->scn_phys.scn_errors);
     1712 +        ASSERT0(vd->vdev_children);
     1713 +
     1714 +        if (vd->vdev_resilver_txg == 0 ||
     1715 +            vd->vdev_dtl[DTL_MISSING].sm_space == 0)
     1716 +                return (B_TRUE);
     1717 +
     1718 +        /*
     1719 +         * When a resilver is initiated the scan will assign the scn_max_txg
     1720 +         * value to the highest txg value that exists in all DTLs. If this
     1721 +         * device's max DTL is not part of this scan (i.e. it is not in
     1722 +         * the range (scn_min_txg, scn_max_txg] then it is not eligible
     1723 +         * for excision.
     1724 +         */
     1725 +        if (vdev_dtl_max(vd) <= scn->scn_phys.scn_max_txg) {
     1726 +                ASSERT3U(scn->scn_phys.scn_min_txg, <=, vdev_dtl_min(vd));
     1727 +                ASSERT3U(scn->scn_phys.scn_min_txg, <, vd->vdev_resilver_txg);
     1728 +                ASSERT3U(vd->vdev_resilver_txg, <=, scn->scn_phys.scn_max_txg);
     1729 +                return (B_TRUE);
     1730 +        }
     1731 +        return (B_FALSE);
     1732 +}
     1733 +
     1734 +/*
1666 1735   * Reassess DTLs after a config change or scrub completion.
1667 1736   */
1668 1737  void
1669 1738  vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
1670 1739  {
1671 1740          spa_t *spa = vd->vdev_spa;
1672 1741          avl_tree_t reftree;
1673 1742          int minref;
1674 1743  
1675 1744          ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
↓ open down ↓ 2 lines elided ↑ open up ↑
1678 1747                  vdev_dtl_reassess(vd->vdev_child[c], txg,
1679 1748                      scrub_txg, scrub_done);
1680 1749  
1681 1750          if (vd == spa->spa_root_vdev || vd->vdev_ishole || vd->vdev_aux)
1682 1751                  return;
1683 1752  
1684 1753          if (vd->vdev_ops->vdev_op_leaf) {
1685 1754                  dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
1686 1755  
1687 1756                  mutex_enter(&vd->vdev_dtl_lock);
     1757 +
     1758 +                /*
     1759 +                 * If we've completed a scan cleanly then determine
     1760 +                 * if this vdev should remove any DTLs. We only want to
     1761 +                 * excise regions on vdevs that were available during
     1762 +                 * the entire duration of this scan.
     1763 +                 */
1688 1764                  if (scrub_txg != 0 &&
1689 1765                      (spa->spa_scrub_started ||
1690      -                    (scn && scn->scn_phys.scn_errors == 0))) {
     1766 +                    (scn != NULL && scn->scn_phys.scn_errors == 0)) &&
     1767 +                    vdev_dtl_should_excise(vd)) {
1691 1768                          /*
1692 1769                           * We completed a scrub up to scrub_txg.  If we
1693 1770                           * did it without rebooting, then the scrub dtl
1694 1771                           * will be valid, so excise the old region and
1695 1772                           * fold in the scrub dtl.  Otherwise, leave the
1696 1773                           * dtl as-is if there was an error.
1697 1774                           *
1698 1775                           * There's little trick here: to excise the beginning
1699 1776                           * of the DTL_MISSING map, we put it into a reference
1700 1777                           * tree and then add a segment with refcnt -1 that
↓ open down ↓ 18 lines elided ↑ open up ↑
1719 1796                  space_map_walk(&vd->vdev_dtl[DTL_MISSING],
1720 1797                      space_map_add, &vd->vdev_dtl[DTL_PARTIAL]);
1721 1798                  if (scrub_done)
1722 1799                          space_map_vacate(&vd->vdev_dtl[DTL_SCRUB], NULL, NULL);
1723 1800                  space_map_vacate(&vd->vdev_dtl[DTL_OUTAGE], NULL, NULL);
1724 1801                  if (!vdev_readable(vd))
1725 1802                          space_map_add(&vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL);
1726 1803                  else
1727 1804                          space_map_walk(&vd->vdev_dtl[DTL_MISSING],
1728 1805                              space_map_add, &vd->vdev_dtl[DTL_OUTAGE]);
     1806 +
     1807 +                /*
     1808 +                 * If the vdev was resilvering and no longer has any
     1809 +                 * DTLs then reset its resilvering flag.
     1810 +                 */
     1811 +                if (vd->vdev_resilver_txg != 0 &&
     1812 +                    vd->vdev_dtl[DTL_MISSING].sm_space == 0 &&
     1813 +                    vd->vdev_dtl[DTL_OUTAGE].sm_space == 0)
     1814 +                        vd->vdev_resilver_txg = 0;
     1815 +
1729 1816                  mutex_exit(&vd->vdev_dtl_lock);
1730 1817  
1731 1818                  if (txg != 0)
1732 1819                          vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg);
1733 1820                  return;
1734 1821          }
1735 1822  
1736 1823          mutex_enter(&vd->vdev_dtl_lock);
1737 1824          for (int t = 0; t < DTL_TYPES; t++) {
1738 1825                  /* account for child's outage in parent's missing map */
↓ open down ↓ 156 lines elided ↑ open up ↑
1895 1982  vdev_resilver_needed(vdev_t *vd, uint64_t *minp, uint64_t *maxp)
1896 1983  {
1897 1984          boolean_t needed = B_FALSE;
1898 1985          uint64_t thismin = UINT64_MAX;
1899 1986          uint64_t thismax = 0;
1900 1987  
1901 1988          if (vd->vdev_children == 0) {
1902 1989                  mutex_enter(&vd->vdev_dtl_lock);
1903 1990                  if (vd->vdev_dtl[DTL_MISSING].sm_space != 0 &&
1904 1991                      vdev_writeable(vd)) {
1905      -                        space_seg_t *ss;
1906 1992  
1907      -                        ss = avl_first(&vd->vdev_dtl[DTL_MISSING].sm_root);
1908      -                        thismin = ss->ss_start - 1;
1909      -                        ss = avl_last(&vd->vdev_dtl[DTL_MISSING].sm_root);
1910      -                        thismax = ss->ss_end;
     1993 +                        thismin = vdev_dtl_min(vd);
     1994 +                        thismax = vdev_dtl_max(vd);
1911 1995                          needed = B_TRUE;
1912 1996                  }
1913 1997                  mutex_exit(&vd->vdev_dtl_lock);
1914 1998          } else {
1915 1999                  for (int c = 0; c < vd->vdev_children; c++) {
1916 2000                          vdev_t *cvd = vd->vdev_child[c];
1917 2001                          uint64_t cmin, cmax;
1918 2002  
1919 2003                          if (vdev_resilver_needed(cvd, &cmin, &cmax)) {
1920 2004                                  thismin = MIN(thismin, cmin);
↓ open down ↓ 1279 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX