Print this page
3956 ::vdev -r should work with pipelines
3957 ztest should update the cachefile before killing itself
3958 multiple scans can lead to partial resilvering
3959 ddt entries are not always resilvered
3960 dsl_scan can skip over dedup-ed blocks if physical birth != logical birth
3961 freed gang blocks are not resilvered and can cause pool to suspend
3962 ztest should print out zfs debug buffer before exiting
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>


 177 dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
 178 {
 179         dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
 180         pool_scan_func_t *funcp = arg;
 181         dmu_object_type_t ot = 0;
 182         dsl_pool_t *dp = scn->scn_dp;
 183         spa_t *spa = dp->dp_spa;
 184 
 185         ASSERT(scn->scn_phys.scn_state != DSS_SCANNING);
 186         ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS);
 187         bzero(&scn->scn_phys, sizeof (scn->scn_phys));
 188         scn->scn_phys.scn_func = *funcp;
 189         scn->scn_phys.scn_state = DSS_SCANNING;
 190         scn->scn_phys.scn_min_txg = 0;
 191         scn->scn_phys.scn_max_txg = tx->tx_txg;
 192         scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */
 193         scn->scn_phys.scn_start_time = gethrestime_sec();
 194         scn->scn_phys.scn_errors = 0;
 195         scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc;
 196         scn->scn_restart_txg = 0;

 197         spa_scan_stat_init(spa);
 198 
 199         if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
 200                 scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;
 201 
 202                 /* rewrite all disk labels */
 203                 vdev_config_dirty(spa->spa_root_vdev);
 204 
 205                 if (vdev_resilver_needed(spa->spa_root_vdev,
 206                     &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
 207                         spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START);
 208                 } else {
 209                         spa_event_notify(spa, NULL, ESC_ZFS_SCRUB_START);
 210                 }
 211 
 212                 spa->spa_scrub_started = B_TRUE;
 213                 /*
 214                  * If this is an incremental scrub, limit the DDT scrub phase
 215                  * to just the auto-ditto class (for correctness); the rest
 216                  * of the scrub should go faster using top-down pruning.


 752                 return;
 753 
 754         /*
 755          * If dsl_scan_ddt() has aready visited this block, it will have
 756          * already done any translations or scrubbing, so don't call the
 757          * callback again.
 758          */
 759         if (ddt_class_contains(dp->dp_spa,
 760             scn->scn_phys.scn_ddt_class_max, bp)) {
 761                 ASSERT(buf == NULL);
 762                 return;
 763         }
 764 
 765         /*
 766          * If this block is from the future (after cur_max_txg), then we
 767          * are doing this on behalf of a deleted snapshot, and we will
 768          * revisit the future block on the next pass of this dataset.
 769          * Don't scan it now unless we need to because something
 770          * under it was modified.
 771          */
 772         if (bp->blk_birth <= scn->scn_phys.scn_cur_max_txg) {
 773                 scan_funcs[scn->scn_phys.scn_func](dp, bp, zb);
 774         }
 775         if (buf)
 776                 (void) arc_buf_remove_ref(buf, &buf);
 777 }
 778 
 779 static void
 780 dsl_scan_visit_rootbp(dsl_scan_t *scn, dsl_dataset_t *ds, blkptr_t *bp,
 781     dmu_tx_t *tx)
 782 {
 783         zbookmark_t zb;
 784 
 785         SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
 786             ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 787         dsl_scan_visitbp(bp, &zb, NULL, NULL,
 788             ds, scn, DMU_OST_NONE, tx);
 789 
 790         dprintf_ds(ds, "finished scan%s", "");
 791 }
 792 


1197         ASSERT(error == 0 || error == ENOENT);
1198         ASSERT(error != ENOENT ||
1199             ddb->ddb_class > scn->scn_phys.scn_ddt_class_max);
1200 }
1201 
1202 /* ARGSUSED */
1203 void
1204 dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
1205     ddt_entry_t *dde, dmu_tx_t *tx)
1206 {
1207         const ddt_key_t *ddk = &dde->dde_key;
1208         ddt_phys_t *ddp = dde->dde_phys;
1209         blkptr_t bp;
1210         zbookmark_t zb = { 0 };
1211 
1212         if (scn->scn_phys.scn_state != DSS_SCANNING)
1213                 return;
1214 
1215         for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
1216                 if (ddp->ddp_phys_birth == 0 ||
1217                     ddp->ddp_phys_birth > scn->scn_phys.scn_cur_max_txg)
1218                         continue;
1219                 ddt_bp_create(checksum, ddk, ddp, &bp);
1220 
1221                 scn->scn_visited_this_txg++;
1222                 scan_funcs[scn->scn_phys.scn_func](scn->scn_dp, &bp, &zb);
1223         }
1224 }
1225 
1226 static void
1227 dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
1228 {
1229         dsl_pool_t *dp = scn->scn_dp;
1230         zap_cursor_t zc;
1231         zap_attribute_t za;
1232 
1233         if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
1234             scn->scn_phys.scn_ddt_class_max) {
1235                 scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
1236                 scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
1237                 dsl_scan_ddt(scn, tx);


1440                         zfs_dbgmsg("freed %llu blocks in %llums from "
1441                             "free_bpobj/bptree txg %llu",
1442                             (longlong_t)scn->scn_visited_this_txg,
1443                             (longlong_t)
1444                             NSEC2MSEC(gethrtime() - scn->scn_sync_start_time),
1445                             (longlong_t)tx->tx_txg);
1446                         scn->scn_visited_this_txg = 0;
1447                         /*
1448                          * Re-sync the ddt so that we can further modify
1449                          * it when doing bprewrite.
1450                          */
1451                         ddt_sync(spa, tx->tx_txg);
1452                 }
1453                 if (err == ERESTART)
1454                         return;
1455         }
1456 
1457         if (scn->scn_phys.scn_state != DSS_SCANNING)
1458                 return;
1459 










1460         if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
1461             scn->scn_phys.scn_ddt_class_max) {
1462                 zfs_dbgmsg("doing scan sync txg %llu; "
1463                     "ddt bm=%llu/%llu/%llu/%llx",
1464                     (longlong_t)tx->tx_txg,
1465                     (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
1466                     (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
1467                     (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
1468                     (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
1469                 ASSERT(scn->scn_phys.scn_bookmark.zb_objset == 0);
1470                 ASSERT(scn->scn_phys.scn_bookmark.zb_object == 0);
1471                 ASSERT(scn->scn_phys.scn_bookmark.zb_level == 0);
1472                 ASSERT(scn->scn_phys.scn_bookmark.zb_blkid == 0);
1473         } else {
1474                 zfs_dbgmsg("doing scan sync txg %llu; bm=%llu/%llu/%llu/%llu",
1475                     (longlong_t)tx->tx_txg,
1476                     (longlong_t)scn->scn_phys.scn_bookmark.zb_objset,
1477                     (longlong_t)scn->scn_phys.scn_bookmark.zb_object,
1478                     (longlong_t)scn->scn_phys.scn_bookmark.zb_level,
1479                     (longlong_t)scn->scn_phys.scn_bookmark.zb_blkid);
1480         }
1481 
1482         scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
1483             NULL, ZIO_FLAG_CANFAIL);
1484         dsl_pool_config_enter(dp, FTAG);
1485         dsl_scan_visit(scn, tx);
1486         dsl_pool_config_exit(dp, FTAG);
1487         (void) zio_wait(scn->scn_zio_root);
1488         scn->scn_zio_root = NULL;
1489 
1490         zfs_dbgmsg("visited %llu blocks in %llums",
1491             (longlong_t)scn->scn_visited_this_txg,
1492             (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time));
1493 
1494         if (!scn->scn_pausing) {
1495                 /* finished with scan. */
1496                 zfs_dbgmsg("finished scan txg %llu", (longlong_t)tx->tx_txg);
1497                 dsl_scan_done(scn, B_TRUE, tx);
1498         }
1499 
1500         if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
1501                 mutex_enter(&spa->spa_scrub_lock);
1502                 while (spa->spa_scrub_inflight > 0) {
1503                         cv_wait(&spa->spa_scrub_io_cv,
1504                             &spa->spa_scrub_lock);
1505                 }
1506                 mutex_exit(&spa->spa_scrub_lock);
1507         }
1508 
1509         dsl_scan_sync_state(scn, tx);
1510 }
1511 
1512 /*
1513  * This will start a new scan, or restart an existing one.
1514  */
1515 void
1516 dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg)
1517 {




 177 dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
 178 {
 179         dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
 180         pool_scan_func_t *funcp = arg;
 181         dmu_object_type_t ot = 0;
 182         dsl_pool_t *dp = scn->scn_dp;
 183         spa_t *spa = dp->dp_spa;
 184 
 185         ASSERT(scn->scn_phys.scn_state != DSS_SCANNING);
 186         ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS);
 187         bzero(&scn->scn_phys, sizeof (scn->scn_phys));
 188         scn->scn_phys.scn_func = *funcp;
 189         scn->scn_phys.scn_state = DSS_SCANNING;
 190         scn->scn_phys.scn_min_txg = 0;
 191         scn->scn_phys.scn_max_txg = tx->tx_txg;
 192         scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */
 193         scn->scn_phys.scn_start_time = gethrestime_sec();
 194         scn->scn_phys.scn_errors = 0;
 195         scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc;
 196         scn->scn_restart_txg = 0;
 197         scn->scn_done_txg = 0;
 198         spa_scan_stat_init(spa);
 199 
 200         if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
 201                 scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;
 202 
 203                 /* rewrite all disk labels */
 204                 vdev_config_dirty(spa->spa_root_vdev);
 205 
 206                 if (vdev_resilver_needed(spa->spa_root_vdev,
 207                     &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
 208                         spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START);
 209                 } else {
 210                         spa_event_notify(spa, NULL, ESC_ZFS_SCRUB_START);
 211                 }
 212 
 213                 spa->spa_scrub_started = B_TRUE;
 214                 /*
 215                  * If this is an incremental scrub, limit the DDT scrub phase
 216                  * to just the auto-ditto class (for correctness); the rest
 217                  * of the scrub should go faster using top-down pruning.


 753                 return;
 754 
 755         /*
 756          * If dsl_scan_ddt() has aready visited this block, it will have
 757          * already done any translations or scrubbing, so don't call the
 758          * callback again.
 759          */
 760         if (ddt_class_contains(dp->dp_spa,
 761             scn->scn_phys.scn_ddt_class_max, bp)) {
 762                 ASSERT(buf == NULL);
 763                 return;
 764         }
 765 
 766         /*
 767          * If this block is from the future (after cur_max_txg), then we
 768          * are doing this on behalf of a deleted snapshot, and we will
 769          * revisit the future block on the next pass of this dataset.
 770          * Don't scan it now unless we need to because something
 771          * under it was modified.
 772          */
 773         if (BP_PHYSICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_max_txg) {
 774                 scan_funcs[scn->scn_phys.scn_func](dp, bp, zb);
 775         }
 776         if (buf)
 777                 (void) arc_buf_remove_ref(buf, &buf);
 778 }
 779 
 780 static void
 781 dsl_scan_visit_rootbp(dsl_scan_t *scn, dsl_dataset_t *ds, blkptr_t *bp,
 782     dmu_tx_t *tx)
 783 {
 784         zbookmark_t zb;
 785 
 786         SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
 787             ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
 788         dsl_scan_visitbp(bp, &zb, NULL, NULL,
 789             ds, scn, DMU_OST_NONE, tx);
 790 
 791         dprintf_ds(ds, "finished scan%s", "");
 792 }
 793 


1198         ASSERT(error == 0 || error == ENOENT);
1199         ASSERT(error != ENOENT ||
1200             ddb->ddb_class > scn->scn_phys.scn_ddt_class_max);
1201 }
1202 
1203 /* ARGSUSED */
1204 void
1205 dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
1206     ddt_entry_t *dde, dmu_tx_t *tx)
1207 {
1208         const ddt_key_t *ddk = &dde->dde_key;
1209         ddt_phys_t *ddp = dde->dde_phys;
1210         blkptr_t bp;
1211         zbookmark_t zb = { 0 };
1212 
1213         if (scn->scn_phys.scn_state != DSS_SCANNING)
1214                 return;
1215 
1216         for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
1217                 if (ddp->ddp_phys_birth == 0 ||
1218                     ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg)
1219                         continue;
1220                 ddt_bp_create(checksum, ddk, ddp, &bp);
1221 
1222                 scn->scn_visited_this_txg++;
1223                 scan_funcs[scn->scn_phys.scn_func](scn->scn_dp, &bp, &zb);
1224         }
1225 }
1226 
1227 static void
1228 dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
1229 {
1230         dsl_pool_t *dp = scn->scn_dp;
1231         zap_cursor_t zc;
1232         zap_attribute_t za;
1233 
1234         if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
1235             scn->scn_phys.scn_ddt_class_max) {
1236                 scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
1237                 scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
1238                 dsl_scan_ddt(scn, tx);


1441                         zfs_dbgmsg("freed %llu blocks in %llums from "
1442                             "free_bpobj/bptree txg %llu",
1443                             (longlong_t)scn->scn_visited_this_txg,
1444                             (longlong_t)
1445                             NSEC2MSEC(gethrtime() - scn->scn_sync_start_time),
1446                             (longlong_t)tx->tx_txg);
1447                         scn->scn_visited_this_txg = 0;
1448                         /*
1449                          * Re-sync the ddt so that we can further modify
1450                          * it when doing bprewrite.
1451                          */
1452                         ddt_sync(spa, tx->tx_txg);
1453                 }
1454                 if (err == ERESTART)
1455                         return;
1456         }
1457 
1458         if (scn->scn_phys.scn_state != DSS_SCANNING)
1459                 return;
1460 
1461         if (scn->scn_done_txg == tx->tx_txg) {
1462                 ASSERT(!scn->scn_pausing);
1463                 /* finished with scan. */
1464                 zfs_dbgmsg("txg %llu scan complete", tx->tx_txg);
1465                 dsl_scan_done(scn, B_TRUE, tx);
1466                 ASSERT3U(spa->spa_scrub_inflight, ==, 0);
1467                 dsl_scan_sync_state(scn, tx);
1468                 return;
1469         }
1470 
1471         if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
1472             scn->scn_phys.scn_ddt_class_max) {
1473                 zfs_dbgmsg("doing scan sync txg %llu; "
1474                     "ddt bm=%llu/%llu/%llu/%llx",
1475                     (longlong_t)tx->tx_txg,
1476                     (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
1477                     (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
1478                     (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
1479                     (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
1480                 ASSERT(scn->scn_phys.scn_bookmark.zb_objset == 0);
1481                 ASSERT(scn->scn_phys.scn_bookmark.zb_object == 0);
1482                 ASSERT(scn->scn_phys.scn_bookmark.zb_level == 0);
1483                 ASSERT(scn->scn_phys.scn_bookmark.zb_blkid == 0);
1484         } else {
1485                 zfs_dbgmsg("doing scan sync txg %llu; bm=%llu/%llu/%llu/%llu",
1486                     (longlong_t)tx->tx_txg,
1487                     (longlong_t)scn->scn_phys.scn_bookmark.zb_objset,
1488                     (longlong_t)scn->scn_phys.scn_bookmark.zb_object,
1489                     (longlong_t)scn->scn_phys.scn_bookmark.zb_level,
1490                     (longlong_t)scn->scn_phys.scn_bookmark.zb_blkid);
1491         }
1492 
1493         scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
1494             NULL, ZIO_FLAG_CANFAIL);
1495         dsl_pool_config_enter(dp, FTAG);
1496         dsl_scan_visit(scn, tx);
1497         dsl_pool_config_exit(dp, FTAG);
1498         (void) zio_wait(scn->scn_zio_root);
1499         scn->scn_zio_root = NULL;
1500 
1501         zfs_dbgmsg("visited %llu blocks in %llums",
1502             (longlong_t)scn->scn_visited_this_txg,
1503             (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time));
1504 
1505         if (!scn->scn_pausing) {
1506                 scn->scn_done_txg = tx->tx_txg + 1;
1507                 zfs_dbgmsg("txg %llu traversal complete, waiting till txg %llu",
1508                     tx->tx_txg, scn->scn_done_txg);
1509         }
1510 
1511         if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
1512                 mutex_enter(&spa->spa_scrub_lock);
1513                 while (spa->spa_scrub_inflight > 0) {
1514                         cv_wait(&spa->spa_scrub_io_cv,
1515                             &spa->spa_scrub_lock);
1516                 }
1517                 mutex_exit(&spa->spa_scrub_lock);
1518         }
1519 
1520         dsl_scan_sync_state(scn, tx);
1521 }
1522 
1523 /*
1524  * This will start a new scan, or restart an existing one.
1525  */
1526 void
1527 dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg)
1528 {