Print this page
3956 ::vdev -r should work with pipelines
3957 ztest should update the cachefile before killing itself
3958 multiple scans can lead to partial resilvering
3959 ddt entries are not always resilvered
3960 dsl_scan can skip over dedup-ed blocks if physical birth != logical birth
3961 freed gang blocks are not resilvered and can cause pool to suspend
3962 ztest should print out zfs debug buffer before exiting
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>

@@ -519,12 +519,12 @@
                 }
 
                 (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_OFFLINE,
                     &vd->vdev_offline);
 
-                (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVERING,
-                    &vd->vdev_resilvering);
+                (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG,
+                    &vd->vdev_resilver_txg);
 
                 /*
                  * When importing a pool, we want to ignore the persistent fault
                  * state, as the diagnosis made on another system may not be
                  * valid in the current context.  Local vdevs will

@@ -1661,10 +1661,79 @@
 
         return (empty);
 }
 
 /*
+ * Returns the lowest txg in the DTL range.
+ */
+static uint64_t
+vdev_dtl_min(vdev_t *vd)
+{
+        space_seg_t *ss;
+
+        ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock));
+        ASSERT3U(vd->vdev_dtl[DTL_MISSING].sm_space, !=, 0);
+        ASSERT0(vd->vdev_children);
+
+        ss = avl_first(&vd->vdev_dtl[DTL_MISSING].sm_root);
+        return (ss->ss_start - 1);
+}
+
+/*
+ * Returns the highest txg in the DTL.
+ */
+static uint64_t
+vdev_dtl_max(vdev_t *vd)
+{
+        space_seg_t *ss;
+
+        ASSERT(MUTEX_HELD(&vd->vdev_dtl_lock));
+        ASSERT3U(vd->vdev_dtl[DTL_MISSING].sm_space, !=, 0);
+        ASSERT0(vd->vdev_children);
+
+        ss = avl_last(&vd->vdev_dtl[DTL_MISSING].sm_root);
+        return (ss->ss_end);
+}
+
+/*
+ * Determine if a resilvering vdev should remove any DTL entries from
+ * its range. If the vdev was resilvering for the entire duration of the
+ * scan then it should excise that range from its DTLs. Otherwise, this
+ * vdev is considered partially resilvered and should leave its DTL
+ * entries intact. The comment in vdev_dtl_reassess() describes how we
+ * excise the DTLs.
+ */
+static boolean_t
+vdev_dtl_should_excise(vdev_t *vd)
+{
+        spa_t *spa = vd->vdev_spa;
+        dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
+
+        ASSERT0(scn->scn_phys.scn_errors);
+        ASSERT0(vd->vdev_children);
+
+        if (vd->vdev_resilver_txg == 0 ||
+            vd->vdev_dtl[DTL_MISSING].sm_space == 0)
+                return (B_TRUE);
+
+        /*
+         * When a resilver is initiated the scan will assign the scn_max_txg
+         * value to the highest txg value that exists in all DTLs. If this
+         * device's max DTL is not part of this scan (i.e. it is not in
+         * the range (scn_min_txg, scn_max_txg] then it is not eligible
+         * for excision.
+         */
+        if (vdev_dtl_max(vd) <= scn->scn_phys.scn_max_txg) {
+                ASSERT3U(scn->scn_phys.scn_min_txg, <=, vdev_dtl_min(vd));
+                ASSERT3U(scn->scn_phys.scn_min_txg, <, vd->vdev_resilver_txg);
+                ASSERT3U(vd->vdev_resilver_txg, <=, scn->scn_phys.scn_max_txg);
+                return (B_TRUE);
+        }
+        return (B_FALSE);
+}
+
+/*
  * Reassess DTLs after a config change or scrub completion.
  */
 void
 vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg, int scrub_done)
 {

@@ -1683,13 +1752,21 @@
 
         if (vd->vdev_ops->vdev_op_leaf) {
                 dsl_scan_t *scn = spa->spa_dsl_pool->dp_scan;
 
                 mutex_enter(&vd->vdev_dtl_lock);
+
+                /*
+                 * If we've completed a scan cleanly then determine
+                 * if this vdev should remove any DTLs. We only want to
+                 * excise regions on vdevs that were available during
+                 * the entire duration of this scan.
+                 */
                 if (scrub_txg != 0 &&
                     (spa->spa_scrub_started ||
-                    (scn && scn->scn_phys.scn_errors == 0))) {
+                    (scn != NULL && scn->scn_phys.scn_errors == 0)) &&
+                    vdev_dtl_should_excise(vd)) {
                         /*
                          * We completed a scrub up to scrub_txg.  If we
                          * did it without rebooting, then the scrub dtl
                          * will be valid, so excise the old region and
                          * fold in the scrub dtl.  Otherwise, leave the

@@ -1724,10 +1801,20 @@
                 if (!vdev_readable(vd))
                         space_map_add(&vd->vdev_dtl[DTL_OUTAGE], 0, -1ULL);
                 else
                         space_map_walk(&vd->vdev_dtl[DTL_MISSING],
                             space_map_add, &vd->vdev_dtl[DTL_OUTAGE]);
+
+                /*
+                 * If the vdev was resilvering and no longer has any
+                 * DTLs then reset its resilvering flag.
+                 */
+                if (vd->vdev_resilver_txg != 0 &&
+                    vd->vdev_dtl[DTL_MISSING].sm_space == 0 &&
+                    vd->vdev_dtl[DTL_OUTAGE].sm_space == 0)
+                        vd->vdev_resilver_txg = 0;
+
                 mutex_exit(&vd->vdev_dtl_lock);
 
                 if (txg != 0)
                         vdev_dirty(vd->vdev_top, VDD_DTL, vd, txg);
                 return;

@@ -1900,16 +1987,13 @@
 
         if (vd->vdev_children == 0) {
                 mutex_enter(&vd->vdev_dtl_lock);
                 if (vd->vdev_dtl[DTL_MISSING].sm_space != 0 &&
                     vdev_writeable(vd)) {
-                        space_seg_t *ss;
 
-                        ss = avl_first(&vd->vdev_dtl[DTL_MISSING].sm_root);
-                        thismin = ss->ss_start - 1;
-                        ss = avl_last(&vd->vdev_dtl[DTL_MISSING].sm_root);
-                        thismax = ss->ss_end;
+                        thismin = vdev_dtl_min(vd);
+                        thismax = vdev_dtl_max(vd);
                         needed = B_TRUE;
                 }
                 mutex_exit(&vd->vdev_dtl_lock);
         } else {
                 for (int c = 0; c < vd->vdev_children; c++) {