Print this page
3956 ::vdev -r should work with pipelines
3957 ztest should update the cachefile before killing itself
3958 multiple scans can lead to partial resilvering
3959 ddt entries are not always resilvered
3960 dsl_scan can skip over dedup-ed blocks if physical birth != logical birth
3961 freed gang blocks are not resilvered and can cause pool to suspend
3962 ztest should print out zfs debug buffer before exiting
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>

*** 519,530 **** } (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &vd->vdev_offline); ! (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVERING, ! &vd->vdev_resilvering); /* * When importing a pool, we want to ignore the persistent fault * state, as the diagnosis made on another system may not be * valid in the current context. Local vdevs will --- 519,530 ---- } (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE, &vd->vdev_offline); ! (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG, ! &vd->vdev_resilver_txg); /* * When importing a pool, we want to ignore the persistent fault * state, as the diagnosis made on another system may not be * valid in the current context. Local vdevs will
*** 1661,1670 **** --- 1661,1739 ---- return (empty); } /* + * Returns the lowest txg in the DTL range. + */ + static uint64_t + vdev_dtl_min(vdev_t *vd) + { + space_seg_t *ss; + + ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock)); + ASSERT3U(vd->vdev_dtl[DTL_MISSING].sm_space, !=, 0); + ASSERT0(vd->vdev_children); + + ss = avl_first(&vd->vdev_dtl[DTL_MISSING].sm_root); + return (ss->ss_start - 1); + } + + /* + * Returns the highest txg in the DTL. + */ + static uint64_t + vdev_dtl_max(vdev_t *vd) + { + space_seg_t *ss; + + ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock)); + ASSERT3U(vd->vdev_dtl[DTL_MISSING].sm_space, !=, 0); + ASSERT0(vd->vdev_children); + + ss = avl_last(&vd->vdev_dtl[DTL_MISSING].sm_root); + return (ss->ss_end); + } + + /* + * Determine if a resilvering vdev should remove any DTL entries from + * its range. If the vdev was resilvering for the entire duration of the + * scan then it should excise that range from its DTLs. Otherwise, this + * vdev is considered partially resilvered and should leave its DTL + * entries intact. The comment in vdev_dtl_reassess() describes how we + * excise the DTLs. + */ + static boolean_t + vdev_dtl_should_excise(vdev_t *vd) + { + spa_t *spa = vd->vdev_spa; + dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan; + + ASSERT0(scn->scn_phys.scn_errors); + ASSERT0(vd->vdev_children); + + if (vd->vdev_resilver_txg == 0 || + vd->vdev_dtl[DTL_MISSING].sm_space == 0) + return (B_TRUE); + + /* + * When a resilver is initiated the scan will assign the scn_max_txg + * value to the highest txg value that exists in all DTLs. If this + * device's max DTL is not part of this scan (i.e. it is not in + * the range (scn_min_txg, scn_max_txg] then it is not eligible + * for excision. + */ + if (vdev_dtl_max(vd) <= scn->scn_phys.scn_max_txg) { + ASSERT3U(scn->scn_phys.scn_min_txg, <=, vdev_dtl_min(vd)); + ASSERT3U(scn->scn_phys.scn_min_txg, <, vd->vdev_resilver_txg); + ASSERT3U(vd->vdev_resilver_txg, <=, scn->scn_phys.scn_max_txg); + return (B_TRUE); + } + return (B_FALSE); + } + + /* * Reassess DTLs after a config change or scrub completion. */ void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done) {
*** 1683,1695 **** if (vd->vdev_ops->vdev_op_leaf) { dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan; mutex_enter(&vd->vdev_dtl_lock); if (scrub_txg != 0 && (spa->spa_scrub_started || ! (scn && scn->scn_phys.scn_errors == 0))) { /* * We completed a scrub up to scrub_txg. If we * did it without rebooting, then the scrub dtl * will be valid, so excise the old region and * fold in the scrub dtl. Otherwise, leave the --- 1752,1772 ---- if (vd->vdev_ops->vdev_op_leaf) { dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan; mutex_enter(&vd->vdev_dtl_lock); + + /* + * If we've completed a scan cleanly then determine + * if this vdev should remove any DTLs. We only want to + * excise regions on vdevs that were available during + * the entire duration of this scan. + */ if (scrub_txg != 0 && (spa->spa_scrub_started || ! (scn != NULL && scn->scn_phys.scn_errors == 0)) && ! vdev_dtl_should_excise(vd)) { /* * We completed a scrub up to scrub_txg. If we * did it without rebooting, then the scrub dtl * will be valid, so excise the old region and * fold in the scrub dtl. Otherwise, leave the
*** 1724,1733 **** --- 1801,1820 ---- if (!vdev_readable(vd)) space_map_add(&vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL); else space_map_walk(&vd->vdev_dtl[DTL_MISSING], space_map_add, &vd->vdev_dtl[DTL_OUTAGE]); + + /* + * If the vdev was resilvering and no longer has any + * DTLs then reset its resilvering flag. + */ + if (vd->vdev_resilver_txg != 0 && + vd->vdev_dtl[DTL_MISSING].sm_space == 0 && + vd->vdev_dtl[DTL_OUTAGE].sm_space == 0) + vd->vdev_resilver_txg = 0; + mutex_exit(&vd->vdev_dtl_lock); if (txg != 0) vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg); return;
*** 1900,1915 **** if (vd->vdev_children == 0) { mutex_enter(&vd->vdev_dtl_lock); if (vd->vdev_dtl[DTL_MISSING].sm_space != 0 && vdev_writeable(vd)) { - space_seg_t *ss; ! ss = avl_first(&vd->vdev_dtl[DTL_MISSING].sm_root); ! thismin = ss->ss_start - 1; ! ss = avl_last(&vd->vdev_dtl[DTL_MISSING].sm_root); ! thismax = ss->ss_end; needed = B_TRUE; } mutex_exit(&vd->vdev_dtl_lock); } else { for (int c = 0; c < vd->vdev_children; c++) { --- 1987,1999 ---- if (vd->vdev_children == 0) { mutex_enter(&vd->vdev_dtl_lock); if (vd->vdev_dtl[DTL_MISSING].sm_space != 0 && vdev_writeable(vd)) { ! thismin = vdev_dtl_min(vd); ! thismax = vdev_dtl_max(vd); needed = B_TRUE; } mutex_exit(&vd->vdev_dtl_lock); } else { for (int c = 0; c < vd->vdev_children; c++) {