504 (alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_L2CACHE ||
505 alloctype == VDEV_ALLOC_ROOTPOOL)) {
506 if (alloctype == VDEV_ALLOC_LOAD) {
507 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DTL,
508 &vd->vdev_dtl_smo.smo_object);
509 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_UNSPARE,
510 &vd->vdev_unspare);
511 }
512
513 if (alloctype == VDEV_ALLOC_ROOTPOOL) {
514 uint64_t spare = 0;
515
516 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
517 &spare) == 0 && spare)
518 spa_spare_add(vd);
519 }
520
521 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE,
522 &vd->vdev_offline);
523
524 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVERING,
525 &vd->vdev_resilvering);
526
527 /*
528 * When importing a pool, we want to ignore the persistent fault
529 * state, as the diagnosis made on another system may not be
530 * valid in the current context. Local vdevs will
531 * remain in the faulted state.
532 */
533 if (spa_load_state(spa) == SPA_LOAD_OPEN) {
534 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED,
535 &vd->vdev_faulted);
536 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DEGRADED,
537 &vd->vdev_degraded);
538 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED,
539 &vd->vdev_removed);
540
541 if (vd->vdev_faulted || vd->vdev_degraded) {
542 char *aux;
543
544 vd->vdev_label_aux =
545 VDEV_AUX_ERR_EXCEEDED;
1646 dirty = space_map_contains(sm, txg, size);
1647 mutex_exit(sm->sm_lock);
1648
1649 return (dirty);
1650 }
1651
1652 boolean_t
1653 vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t t)
1654 {
1655 space_map_t *sm = &vd->vdev_dtl[t];
1656 boolean_t empty;
1657
1658 mutex_enter(sm->sm_lock);
1659 empty = (sm->sm_space == 0);
1660 mutex_exit(sm->sm_lock);
1661
1662 return (empty);
1663 }
1664
1665 /*
1666 * Reassess DTLs after a config change or scrub completion.
1667 */
1668 void
1669 vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
1670 {
1671 spa_t *spa = vd->vdev_spa;
1672 avl_tree_t reftree;
1673 int minref;
1674
1675 ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
1676
1677 for (int c = 0; c < vd->vdev_children; c++)
1678 vdev_dtl_reassess(vd->vdev_child[c], txg,
1679 scrub_txg, scrub_done);
1680
1681 if (vd == spa->spa_root_vdev || vd->vdev_ishole || vd->vdev_aux)
1682 return;
1683
1684 if (vd->vdev_ops->vdev_op_leaf) {
1685 dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
1686
1687 mutex_enter(&vd->vdev_dtl_lock);
1688 if (scrub_txg != 0 &&
1689 (spa->spa_scrub_started ||
1690 (scn && scn->scn_phys.scn_errors == 0))) {
1691 /*
1692 * We completed a scrub up to scrub_txg. If we
1693 * did it without rebooting, then the scrub dtl
1694 * will be valid, so excise the old region and
1695 * fold in the scrub dtl. Otherwise, leave the
1696 * dtl as-is if there was an error.
1697 *
1698 * There's little trick here: to excise the beginning
1699 * of the DTL_MISSING map, we put it into a reference
1700 * tree and then add a segment with refcnt -1 that
1701 * covers the range [0, scrub_txg). This means
1702 * that each txg in that range has refcnt -1 or 0.
1703 * We then add DTL_SCRUB with a refcnt of 2, so that
1704 * entries in the range [0, scrub_txg) will have a
1705 * positive refcnt -- either 1 or 2. We then convert
1706 * the reference tree into the new DTL_MISSING map.
1707 */
1708 space_map_ref_create(&reftree);
1709 space_map_ref_add_map(&reftree,
1710 &vd->vdev_dtl[DTL_MISSING], 1);
1711 space_map_ref_add_seg(&reftree, 0, scrub_txg, -1);
1712 space_map_ref_add_map(&reftree,
1713 &vd->vdev_dtl[DTL_SCRUB], 2);
1714 space_map_ref_generate_map(&reftree,
1715 &vd->vdev_dtl[DTL_MISSING], 1);
1716 space_map_ref_destroy(&reftree);
1717 }
1718 space_map_vacate(&vd->vdev_dtl[DTL_PARTIAL], NULL, NULL);
1719 space_map_walk(&vd->vdev_dtl[DTL_MISSING],
1720 space_map_add, &vd->vdev_dtl[DTL_PARTIAL]);
1721 if (scrub_done)
1722 space_map_vacate(&vd->vdev_dtl[DTL_SCRUB], NULL, NULL);
1723 space_map_vacate(&vd->vdev_dtl[DTL_OUTAGE], NULL, NULL);
1724 if (!vdev_readable(vd))
1725 space_map_add(&vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL);
1726 else
1727 space_map_walk(&vd->vdev_dtl[DTL_MISSING],
1728 space_map_add, &vd->vdev_dtl[DTL_OUTAGE]);
1729 mutex_exit(&vd->vdev_dtl_lock);
1730
1731 if (txg != 0)
1732 vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg);
1733 return;
1734 }
1735
1736 mutex_enter(&vd->vdev_dtl_lock);
1737 for (int t = 0; t < DTL_TYPES; t++) {
1738 /* account for child's outage in parent's missing map */
1739 int s = (t == DTL_MISSING) ? DTL_OUTAGE: t;
1740 if (t == DTL_SCRUB)
1741 continue; /* leaf vdevs only */
1742 if (t == DTL_PARTIAL)
1743 minref = 1; /* i.e. non-zero */
1744 else if (vd->vdev_nparity != 0)
1745 minref = vd->vdev_nparity + 1; /* RAID-Z */
1746 else
1747 minref = vd->vdev_children; /* any kind of mirror */
1748 space_map_ref_create(&reftree);
1885 if (!required && zio_injection_enabled)
1886 required = !!zio_handle_device_injection(vd, NULL, ECHILD);
1887
1888 return (required);
1889 }
1890
1891 /*
1892 * Determine if resilver is needed, and if so the txg range.
1893 */
1894 boolean_t
1895 vdev_resilver_needed(vdev_t *vd, uint64_t *minp, uint64_t *maxp)
1896 {
1897 boolean_t needed = B_FALSE;
1898 uint64_t thismin = UINT64_MAX;
1899 uint64_t thismax = 0;
1900
1901 if (vd->vdev_children == 0) {
1902 mutex_enter(&vd->vdev_dtl_lock);
1903 if (vd->vdev_dtl[DTL_MISSING].sm_space != 0 &&
1904 vdev_writeable(vd)) {
1905 space_seg_t *ss;
1906
1907 ss = avl_first(&vd->vdev_dtl[DTL_MISSING].sm_root);
1908 thismin = ss->ss_start - 1;
1909 ss = avl_last(&vd->vdev_dtl[DTL_MISSING].sm_root);
1910 thismax = ss->ss_end;
1911 needed = B_TRUE;
1912 }
1913 mutex_exit(&vd->vdev_dtl_lock);
1914 } else {
1915 for (int c = 0; c < vd->vdev_children; c++) {
1916 vdev_t *cvd = vd->vdev_child[c];
1917 uint64_t cmin, cmax;
1918
1919 if (vdev_resilver_needed(cvd, &cmin, &cmax)) {
1920 thismin = MIN(thismin, cmin);
1921 thismax = MAX(thismax, cmax);
1922 needed = B_TRUE;
1923 }
1924 }
1925 }
1926
1927 if (needed && minp) {
1928 *minp = thismin;
1929 *maxp = thismax;
1930 }
|
504 (alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_L2CACHE ||
505 alloctype == VDEV_ALLOC_ROOTPOOL)) {
506 if (alloctype == VDEV_ALLOC_LOAD) {
507 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DTL,
508 &vd->vdev_dtl_smo.smo_object);
509 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_UNSPARE,
510 &vd->vdev_unspare);
511 }
512
513 if (alloctype == VDEV_ALLOC_ROOTPOOL) {
514 uint64_t spare = 0;
515
516 if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_SPARE,
517 &spare) == 0 && spare)
518 spa_spare_add(vd);
519 }
520
521 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE,
522 &vd->vdev_offline);
523
524 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG,
525 &vd->vdev_resilver_txg);
526
527 /*
528 * When importing a pool, we want to ignore the persistent fault
529 * state, as the diagnosis made on another system may not be
530 * valid in the current context. Local vdevs will
531 * remain in the faulted state.
532 */
533 if (spa_load_state(spa) == SPA_LOAD_OPEN) {
534 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED,
535 &vd->vdev_faulted);
536 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DEGRADED,
537 &vd->vdev_degraded);
538 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVED,
539 &vd->vdev_removed);
540
541 if (vd->vdev_faulted || vd->vdev_degraded) {
542 char *aux;
543
544 vd->vdev_label_aux =
545 VDEV_AUX_ERR_EXCEEDED;
1646 dirty = space_map_contains(sm, txg, size);
1647 mutex_exit(sm->sm_lock);
1648
1649 return (dirty);
1650 }
1651
1652 boolean_t
1653 vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t t)
1654 {
1655 space_map_t *sm = &vd->vdev_dtl[t];
1656 boolean_t empty;
1657
1658 mutex_enter(sm->sm_lock);
1659 empty = (sm->sm_space == 0);
1660 mutex_exit(sm->sm_lock);
1661
1662 return (empty);
1663 }
1664
1665 /*
1666 * Returns the lowest txg in the DTL range.
1667 */
1668 static uint64_t
1669 vdev_dtl_min(vdev_t *vd)
1670 {
1671 space_seg_t *ss;
1672
1673 ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock));
1674 ASSERT3U(vd->vdev_dtl[DTL_MISSING].sm_space, !=, 0);
1675 ASSERT0(vd->vdev_children);
1676
1677 ss = avl_first(&vd->vdev_dtl[DTL_MISSING].sm_root);
1678 return (ss->ss_start - 1);
1679 }
1680
1681 /*
1682 * Returns the highest txg in the DTL.
1683 */
1684 static uint64_t
1685 vdev_dtl_max(vdev_t *vd)
1686 {
1687 space_seg_t *ss;
1688
1689 ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock));
1690 ASSERT3U(vd->vdev_dtl[DTL_MISSING].sm_space, !=, 0);
1691 ASSERT0(vd->vdev_children);
1692
1693 ss = avl_last(&vd->vdev_dtl[DTL_MISSING].sm_root);
1694 return (ss->ss_end);
1695 }
1696
1697 /*
1698 * Determine if a resilvering vdev should remove any DTL entries from
1699 * its range. If the vdev was resilvering for the entire duration of the
1700 * scan then it should excise that range from its DTLs. Otherwise, this
1701 * vdev is considered partially resilvered and should leave its DTL
1702 * entries intact. The comment in vdev_dtl_reassess() describes how we
1703 * excise the DTLs.
1704 */
1705 static boolean_t
1706 vdev_dtl_should_excise(vdev_t *vd)
1707 {
1708 spa_t *spa = vd->vdev_spa;
1709 dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
1710
1711 ASSERT0(scn->scn_phys.scn_errors);
1712 ASSERT0(vd->vdev_children);
1713
1714 if (vd->vdev_resilver_txg == 0 ||
1715 vd->vdev_dtl[DTL_MISSING].sm_space == 0)
1716 return (B_TRUE);
1717
1718 /*
1719 * When a resilver is initiated the scan will assign the scn_max_txg
1720 * value to the highest txg value that exists in all DTLs. If this
1721 * device's max DTL is not part of this scan (i.e. it is not in
1722 * the range (scn_min_txg, scn_max_txg] then it is not eligible
1723 * for excision.
1724 */
1725 if (vdev_dtl_max(vd) <= scn->scn_phys.scn_max_txg) {
1726 ASSERT3U(scn->scn_phys.scn_min_txg, <=, vdev_dtl_min(vd));
1727 ASSERT3U(scn->scn_phys.scn_min_txg, <, vd->vdev_resilver_txg);
1728 ASSERT3U(vd->vdev_resilver_txg, <=, scn->scn_phys.scn_max_txg);
1729 return (B_TRUE);
1730 }
1731 return (B_FALSE);
1732 }
1733
1734 /*
1735 * Reassess DTLs after a config change or scrub completion.
1736 */
1737 void
1738 vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
1739 {
1740 spa_t *spa = vd->vdev_spa;
1741 avl_tree_t reftree;
1742 int minref;
1743
1744 ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
1745
1746 for (int c = 0; c < vd->vdev_children; c++)
1747 vdev_dtl_reassess(vd->vdev_child[c], txg,
1748 scrub_txg, scrub_done);
1749
1750 if (vd == spa->spa_root_vdev || vd->vdev_ishole || vd->vdev_aux)
1751 return;
1752
1753 if (vd->vdev_ops->vdev_op_leaf) {
1754 dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
1755
1756 mutex_enter(&vd->vdev_dtl_lock);
1757
1758 /*
1759 * If we've completed a scan cleanly then determine
1760 * if this vdev should remove any DTLs. We only want to
1761 * excise regions on vdevs that were available during
1762 * the entire duration of this scan.
1763 */
1764 if (scrub_txg != 0 &&
1765 (spa->spa_scrub_started ||
1766 (scn != NULL && scn->scn_phys.scn_errors == 0)) &&
1767 vdev_dtl_should_excise(vd)) {
1768 /*
1769 * We completed a scrub up to scrub_txg. If we
1770 * did it without rebooting, then the scrub dtl
1771 * will be valid, so excise the old region and
1772 * fold in the scrub dtl. Otherwise, leave the
1773 * dtl as-is if there was an error.
1774 *
1775 * There's little trick here: to excise the beginning
1776 * of the DTL_MISSING map, we put it into a reference
1777 * tree and then add a segment with refcnt -1 that
1778 * covers the range [0, scrub_txg). This means
1779 * that each txg in that range has refcnt -1 or 0.
1780 * We then add DTL_SCRUB with a refcnt of 2, so that
1781 * entries in the range [0, scrub_txg) will have a
1782 * positive refcnt -- either 1 or 2. We then convert
1783 * the reference tree into the new DTL_MISSING map.
1784 */
1785 space_map_ref_create(&reftree);
1786 space_map_ref_add_map(&reftree,
1787 &vd->vdev_dtl[DTL_MISSING], 1);
1788 space_map_ref_add_seg(&reftree, 0, scrub_txg, -1);
1789 space_map_ref_add_map(&reftree,
1790 &vd->vdev_dtl[DTL_SCRUB], 2);
1791 space_map_ref_generate_map(&reftree,
1792 &vd->vdev_dtl[DTL_MISSING], 1);
1793 space_map_ref_destroy(&reftree);
1794 }
1795 space_map_vacate(&vd->vdev_dtl[DTL_PARTIAL], NULL, NULL);
1796 space_map_walk(&vd->vdev_dtl[DTL_MISSING],
1797 space_map_add, &vd->vdev_dtl[DTL_PARTIAL]);
1798 if (scrub_done)
1799 space_map_vacate(&vd->vdev_dtl[DTL_SCRUB], NULL, NULL);
1800 space_map_vacate(&vd->vdev_dtl[DTL_OUTAGE], NULL, NULL);
1801 if (!vdev_readable(vd))
1802 space_map_add(&vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL);
1803 else
1804 space_map_walk(&vd->vdev_dtl[DTL_MISSING],
1805 space_map_add, &vd->vdev_dtl[DTL_OUTAGE]);
1806
1807 /*
1808 * If the vdev was resilvering and no longer has any
1809 * DTLs then reset its resilvering flag.
1810 */
1811 if (vd->vdev_resilver_txg != 0 &&
1812 vd->vdev_dtl[DTL_MISSING].sm_space == 0 &&
1813 vd->vdev_dtl[DTL_OUTAGE].sm_space == 0)
1814 vd->vdev_resilver_txg = 0;
1815
1816 mutex_exit(&vd->vdev_dtl_lock);
1817
1818 if (txg != 0)
1819 vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg);
1820 return;
1821 }
1822
1823 mutex_enter(&vd->vdev_dtl_lock);
1824 for (int t = 0; t < DTL_TYPES; t++) {
1825 /* account for child's outage in parent's missing map */
1826 int s = (t == DTL_MISSING) ? DTL_OUTAGE: t;
1827 if (t == DTL_SCRUB)
1828 continue; /* leaf vdevs only */
1829 if (t == DTL_PARTIAL)
1830 minref = 1; /* i.e. non-zero */
1831 else if (vd->vdev_nparity != 0)
1832 minref = vd->vdev_nparity + 1; /* RAID-Z */
1833 else
1834 minref = vd->vdev_children; /* any kind of mirror */
1835 space_map_ref_create(&reftree);
1972 if (!required && zio_injection_enabled)
1973 required = !!zio_handle_device_injection(vd, NULL, ECHILD);
1974
1975 return (required);
1976 }
1977
1978 /*
1979 * Determine if resilver is needed, and if so the txg range.
1980 */
1981 boolean_t
1982 vdev_resilver_needed(vdev_t *vd, uint64_t *minp, uint64_t *maxp)
1983 {
1984 boolean_t needed = B_FALSE;
1985 uint64_t thismin = UINT64_MAX;
1986 uint64_t thismax = 0;
1987
1988 if (vd->vdev_children == 0) {
1989 mutex_enter(&vd->vdev_dtl_lock);
1990 if (vd->vdev_dtl[DTL_MISSING].sm_space != 0 &&
1991 vdev_writeable(vd)) {
1992
1993 thismin = vdev_dtl_min(vd);
1994 thismax = vdev_dtl_max(vd);
1995 needed = B_TRUE;
1996 }
1997 mutex_exit(&vd->vdev_dtl_lock);
1998 } else {
1999 for (int c = 0; c < vd->vdev_children; c++) {
2000 vdev_t *cvd = vd->vdev_child[c];
2001 uint64_t cmin, cmax;
2002
2003 if (vdev_resilver_needed(cvd, &cmin, &cmax)) {
2004 thismin = MIN(thismin, cmin);
2005 thismax = MAX(thismax, cmax);
2006 needed = B_TRUE;
2007 }
2008 }
2009 }
2010
2011 if (needed && minp) {
2012 *minp = thismin;
2013 *maxp = thismax;
2014 }
|