177 dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
178 {
179 dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
180 pool_scan_func_t *funcp = arg;
181 dmu_object_type_t ot = 0;
182 dsl_pool_t *dp = scn->scn_dp;
183 spa_t *spa = dp->dp_spa;
184
185 ASSERT(scn->scn_phys.scn_state != DSS_SCANNING);
186 ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS);
187 bzero(&scn->scn_phys, sizeof (scn->scn_phys));
188 scn->scn_phys.scn_func = *funcp;
189 scn->scn_phys.scn_state = DSS_SCANNING;
190 scn->scn_phys.scn_min_txg = 0;
191 scn->scn_phys.scn_max_txg = tx->tx_txg;
192 scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */
193 scn->scn_phys.scn_start_time = gethrestime_sec();
194 scn->scn_phys.scn_errors = 0;
195 scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc;
196 scn->scn_restart_txg = 0;
197 spa_scan_stat_init(spa);
198
199 if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
200 scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;
201
202 /* rewrite all disk labels */
203 vdev_config_dirty(spa->spa_root_vdev);
204
205 if (vdev_resilver_needed(spa->spa_root_vdev,
206 &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
207 spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START);
208 } else {
209 spa_event_notify(spa, NULL, ESC_ZFS_SCRUB_START);
210 }
211
212 spa->spa_scrub_started = B_TRUE;
213 /*
214 * If this is an incremental scrub, limit the DDT scrub phase
215 * to just the auto-ditto class (for correctness); the rest
216 * of the scrub should go faster using top-down pruning.
752 return;
753
754 /*
755 * If dsl_scan_ddt() has aready visited this block, it will have
756 * already done any translations or scrubbing, so don't call the
757 * callback again.
758 */
759 if (ddt_class_contains(dp->dp_spa,
760 scn->scn_phys.scn_ddt_class_max, bp)) {
761 ASSERT(buf == NULL);
762 return;
763 }
764
765 /*
766 * If this block is from the future (after cur_max_txg), then we
767 * are doing this on behalf of a deleted snapshot, and we will
768 * revisit the future block on the next pass of this dataset.
769 * Don't scan it now unless we need to because something
770 * under it was modified.
771 */
772 if (bp->blk_birth <= scn->scn_phys.scn_cur_max_txg) {
773 scan_funcs[scn->scn_phys.scn_func](dp, bp, zb);
774 }
775 if (buf)
776 (void) arc_buf_remove_ref(buf, &buf);
777 }
778
779 static void
780 dsl_scan_visit_rootbp(dsl_scan_t *scn, dsl_dataset_t *ds, blkptr_t *bp,
781 dmu_tx_t *tx)
782 {
783 zbookmark_t zb;
784
785 SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
786 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
787 dsl_scan_visitbp(bp, &zb, NULL, NULL,
788 ds, scn, DMU_OST_NONE, tx);
789
790 dprintf_ds(ds, "finished scan%s", "");
791 }
792
1197 ASSERT(error == 0 || error == ENOENT);
1198 ASSERT(error != ENOENT ||
1199 ddb->ddb_class > scn->scn_phys.scn_ddt_class_max);
1200 }
1201
1202 /* ARGSUSED */
1203 void
1204 dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
1205 ddt_entry_t *dde, dmu_tx_t *tx)
1206 {
1207 const ddt_key_t *ddk = &dde->dde_key;
1208 ddt_phys_t *ddp = dde->dde_phys;
1209 blkptr_t bp;
1210 zbookmark_t zb = { 0 };
1211
1212 if (scn->scn_phys.scn_state != DSS_SCANNING)
1213 return;
1214
1215 for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
1216 if (ddp->ddp_phys_birth == 0 ||
1217 ddp->ddp_phys_birth > scn->scn_phys.scn_cur_max_txg)
1218 continue;
1219 ddt_bp_create(checksum, ddk, ddp, &bp);
1220
1221 scn->scn_visited_this_txg++;
1222 scan_funcs[scn->scn_phys.scn_func](scn->scn_dp, &bp, &zb);
1223 }
1224 }
1225
1226 static void
1227 dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
1228 {
1229 dsl_pool_t *dp = scn->scn_dp;
1230 zap_cursor_t zc;
1231 zap_attribute_t za;
1232
1233 if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
1234 scn->scn_phys.scn_ddt_class_max) {
1235 scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
1236 scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
1237 dsl_scan_ddt(scn, tx);
1440 zfs_dbgmsg("freed %llu blocks in %llums from "
1441 "free_bpobj/bptree txg %llu",
1442 (longlong_t)scn->scn_visited_this_txg,
1443 (longlong_t)
1444 NSEC2MSEC(gethrtime() - scn->scn_sync_start_time),
1445 (longlong_t)tx->tx_txg);
1446 scn->scn_visited_this_txg = 0;
1447 /*
1448 * Re-sync the ddt so that we can further modify
1449 * it when doing bprewrite.
1450 */
1451 ddt_sync(spa, tx->tx_txg);
1452 }
1453 if (err == ERESTART)
1454 return;
1455 }
1456
1457 if (scn->scn_phys.scn_state != DSS_SCANNING)
1458 return;
1459
1460 if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
1461 scn->scn_phys.scn_ddt_class_max) {
1462 zfs_dbgmsg("doing scan sync txg %llu; "
1463 "ddt bm=%llu/%llu/%llu/%llx",
1464 (longlong_t)tx->tx_txg,
1465 (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
1466 (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
1467 (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
1468 (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
1469 ASSERT(scn->scn_phys.scn_bookmark.zb_objset == 0);
1470 ASSERT(scn->scn_phys.scn_bookmark.zb_object == 0);
1471 ASSERT(scn->scn_phys.scn_bookmark.zb_level == 0);
1472 ASSERT(scn->scn_phys.scn_bookmark.zb_blkid == 0);
1473 } else {
1474 zfs_dbgmsg("doing scan sync txg %llu; bm=%llu/%llu/%llu/%llu",
1475 (longlong_t)tx->tx_txg,
1476 (longlong_t)scn->scn_phys.scn_bookmark.zb_objset,
1477 (longlong_t)scn->scn_phys.scn_bookmark.zb_object,
1478 (longlong_t)scn->scn_phys.scn_bookmark.zb_level,
1479 (longlong_t)scn->scn_phys.scn_bookmark.zb_blkid);
1480 }
1481
1482 scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
1483 NULL, ZIO_FLAG_CANFAIL);
1484 dsl_pool_config_enter(dp, FTAG);
1485 dsl_scan_visit(scn, tx);
1486 dsl_pool_config_exit(dp, FTAG);
1487 (void) zio_wait(scn->scn_zio_root);
1488 scn->scn_zio_root = NULL;
1489
1490 zfs_dbgmsg("visited %llu blocks in %llums",
1491 (longlong_t)scn->scn_visited_this_txg,
1492 (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time));
1493
1494 if (!scn->scn_pausing) {
1495 /* finished with scan. */
1496 zfs_dbgmsg("finished scan txg %llu", (longlong_t)tx->tx_txg);
1497 dsl_scan_done(scn, B_TRUE, tx);
1498 }
1499
1500 if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
1501 mutex_enter(&spa->spa_scrub_lock);
1502 while (spa->spa_scrub_inflight > 0) {
1503 cv_wait(&spa->spa_scrub_io_cv,
1504 &spa->spa_scrub_lock);
1505 }
1506 mutex_exit(&spa->spa_scrub_lock);
1507 }
1508
1509 dsl_scan_sync_state(scn, tx);
1510 }
1511
1512 /*
1513 * This will start a new scan, or restart an existing one.
1514 */
1515 void
1516 dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg)
1517 {
|
177 dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
178 {
179 dsl_scan_t *scn = dmu_tx_pool(tx)->dp_scan;
180 pool_scan_func_t *funcp = arg;
181 dmu_object_type_t ot = 0;
182 dsl_pool_t *dp = scn->scn_dp;
183 spa_t *spa = dp->dp_spa;
184
185 ASSERT(scn->scn_phys.scn_state != DSS_SCANNING);
186 ASSERT(*funcp > POOL_SCAN_NONE && *funcp < POOL_SCAN_FUNCS);
187 bzero(&scn->scn_phys, sizeof (scn->scn_phys));
188 scn->scn_phys.scn_func = *funcp;
189 scn->scn_phys.scn_state = DSS_SCANNING;
190 scn->scn_phys.scn_min_txg = 0;
191 scn->scn_phys.scn_max_txg = tx->tx_txg;
192 scn->scn_phys.scn_ddt_class_max = DDT_CLASSES - 1; /* the entire DDT */
193 scn->scn_phys.scn_start_time = gethrestime_sec();
194 scn->scn_phys.scn_errors = 0;
195 scn->scn_phys.scn_to_examine = spa->spa_root_vdev->vdev_stat.vs_alloc;
196 scn->scn_restart_txg = 0;
197 scn->scn_done_txg = 0;
198 spa_scan_stat_init(spa);
199
200 if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
201 scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;
202
203 /* rewrite all disk labels */
204 vdev_config_dirty(spa->spa_root_vdev);
205
206 if (vdev_resilver_needed(spa->spa_root_vdev,
207 &scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
208 spa_event_notify(spa, NULL, ESC_ZFS_RESILVER_START);
209 } else {
210 spa_event_notify(spa, NULL, ESC_ZFS_SCRUB_START);
211 }
212
213 spa->spa_scrub_started = B_TRUE;
214 /*
215 * If this is an incremental scrub, limit the DDT scrub phase
216 * to just the auto-ditto class (for correctness); the rest
217 * of the scrub should go faster using top-down pruning.
753 return;
754
755 /*
756 * If dsl_scan_ddt() has aready visited this block, it will have
757 * already done any translations or scrubbing, so don't call the
758 * callback again.
759 */
760 if (ddt_class_contains(dp->dp_spa,
761 scn->scn_phys.scn_ddt_class_max, bp)) {
762 ASSERT(buf == NULL);
763 return;
764 }
765
766 /*
767 * If this block is from the future (after cur_max_txg), then we
768 * are doing this on behalf of a deleted snapshot, and we will
769 * revisit the future block on the next pass of this dataset.
770 * Don't scan it now unless we need to because something
771 * under it was modified.
772 */
773 if (BP_PHYSICAL_BIRTH(bp) <= scn->scn_phys.scn_cur_max_txg) {
774 scan_funcs[scn->scn_phys.scn_func](dp, bp, zb);
775 }
776 if (buf)
777 (void) arc_buf_remove_ref(buf, &buf);
778 }
779
780 static void
781 dsl_scan_visit_rootbp(dsl_scan_t *scn, dsl_dataset_t *ds, blkptr_t *bp,
782 dmu_tx_t *tx)
783 {
784 zbookmark_t zb;
785
786 SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
787 ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
788 dsl_scan_visitbp(bp, &zb, NULL, NULL,
789 ds, scn, DMU_OST_NONE, tx);
790
791 dprintf_ds(ds, "finished scan%s", "");
792 }
793
1198 ASSERT(error == 0 || error == ENOENT);
1199 ASSERT(error != ENOENT ||
1200 ddb->ddb_class > scn->scn_phys.scn_ddt_class_max);
1201 }
1202
1203 /* ARGSUSED */
1204 void
1205 dsl_scan_ddt_entry(dsl_scan_t *scn, enum zio_checksum checksum,
1206 ddt_entry_t *dde, dmu_tx_t *tx)
1207 {
1208 const ddt_key_t *ddk = &dde->dde_key;
1209 ddt_phys_t *ddp = dde->dde_phys;
1210 blkptr_t bp;
1211 zbookmark_t zb = { 0 };
1212
1213 if (scn->scn_phys.scn_state != DSS_SCANNING)
1214 return;
1215
1216 for (int p = 0; p < DDT_PHYS_TYPES; p++, ddp++) {
1217 if (ddp->ddp_phys_birth == 0 ||
1218 ddp->ddp_phys_birth > scn->scn_phys.scn_max_txg)
1219 continue;
1220 ddt_bp_create(checksum, ddk, ddp, &bp);
1221
1222 scn->scn_visited_this_txg++;
1223 scan_funcs[scn->scn_phys.scn_func](scn->scn_dp, &bp, &zb);
1224 }
1225 }
1226
1227 static void
1228 dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
1229 {
1230 dsl_pool_t *dp = scn->scn_dp;
1231 zap_cursor_t zc;
1232 zap_attribute_t za;
1233
1234 if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
1235 scn->scn_phys.scn_ddt_class_max) {
1236 scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
1237 scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
1238 dsl_scan_ddt(scn, tx);
1441 zfs_dbgmsg("freed %llu blocks in %llums from "
1442 "free_bpobj/bptree txg %llu",
1443 (longlong_t)scn->scn_visited_this_txg,
1444 (longlong_t)
1445 NSEC2MSEC(gethrtime() - scn->scn_sync_start_time),
1446 (longlong_t)tx->tx_txg);
1447 scn->scn_visited_this_txg = 0;
1448 /*
1449 * Re-sync the ddt so that we can further modify
1450 * it when doing bprewrite.
1451 */
1452 ddt_sync(spa, tx->tx_txg);
1453 }
1454 if (err == ERESTART)
1455 return;
1456 }
1457
1458 if (scn->scn_phys.scn_state != DSS_SCANNING)
1459 return;
1460
1461 if (scn->scn_done_txg == tx->tx_txg) {
1462 ASSERT(!scn->scn_pausing);
1463 /* finished with scan. */
1464 zfs_dbgmsg("txg %llu scan complete", tx->tx_txg);
1465 dsl_scan_done(scn, B_TRUE, tx);
1466 ASSERT3U(spa->spa_scrub_inflight, ==, 0);
1467 dsl_scan_sync_state(scn, tx);
1468 return;
1469 }
1470
1471 if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
1472 scn->scn_phys.scn_ddt_class_max) {
1473 zfs_dbgmsg("doing scan sync txg %llu; "
1474 "ddt bm=%llu/%llu/%llu/%llx",
1475 (longlong_t)tx->tx_txg,
1476 (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
1477 (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
1478 (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
1479 (longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
1480 ASSERT(scn->scn_phys.scn_bookmark.zb_objset == 0);
1481 ASSERT(scn->scn_phys.scn_bookmark.zb_object == 0);
1482 ASSERT(scn->scn_phys.scn_bookmark.zb_level == 0);
1483 ASSERT(scn->scn_phys.scn_bookmark.zb_blkid == 0);
1484 } else {
1485 zfs_dbgmsg("doing scan sync txg %llu; bm=%llu/%llu/%llu/%llu",
1486 (longlong_t)tx->tx_txg,
1487 (longlong_t)scn->scn_phys.scn_bookmark.zb_objset,
1488 (longlong_t)scn->scn_phys.scn_bookmark.zb_object,
1489 (longlong_t)scn->scn_phys.scn_bookmark.zb_level,
1490 (longlong_t)scn->scn_phys.scn_bookmark.zb_blkid);
1491 }
1492
1493 scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
1494 NULL, ZIO_FLAG_CANFAIL);
1495 dsl_pool_config_enter(dp, FTAG);
1496 dsl_scan_visit(scn, tx);
1497 dsl_pool_config_exit(dp, FTAG);
1498 (void) zio_wait(scn->scn_zio_root);
1499 scn->scn_zio_root = NULL;
1500
1501 zfs_dbgmsg("visited %llu blocks in %llums",
1502 (longlong_t)scn->scn_visited_this_txg,
1503 (longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time));
1504
1505 if (!scn->scn_pausing) {
1506 scn->scn_done_txg = tx->tx_txg + 1;
1507 zfs_dbgmsg("txg %llu traversal complete, waiting till txg %llu",
1508 tx->tx_txg, scn->scn_done_txg);
1509 }
1510
1511 if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
1512 mutex_enter(&spa->spa_scrub_lock);
1513 while (spa->spa_scrub_inflight > 0) {
1514 cv_wait(&spa->spa_scrub_io_cv,
1515 &spa->spa_scrub_lock);
1516 }
1517 mutex_exit(&spa->spa_scrub_lock);
1518 }
1519
1520 dsl_scan_sync_state(scn, tx);
1521 }
1522
1523 /*
1524 * This will start a new scan, or restart an existing one.
1525 */
1526 void
1527 dsl_resilver_restart(dsl_pool_t *dp, uint64_t txg)
1528 {
|