Print this page
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>


1610         spa->spa_scrub_inflight--;
1611         cv_broadcast(&spa->spa_scrub_io_cv);
1612 
1613         if (zio->io_error && (zio->io_error != ECKSUM ||
1614             !(zio->io_flags & ZIO_FLAG_SPECULATIVE))) {
1615                 spa->spa_dsl_pool->dp_scan->scn_phys.scn_errors++;
1616         }
1617         mutex_exit(&spa->spa_scrub_lock);
1618 }
1619 
1620 static int
1621 dsl_scan_scrub_cb(dsl_pool_t *dp,
1622     const blkptr_t *bp, const zbookmark_t *zb)
1623 {
1624         dsl_scan_t *scn = dp->dp_scan;
1625         size_t size = BP_GET_PSIZE(bp);
1626         spa_t *spa = dp->dp_spa;
1627         uint64_t phys_birth = BP_PHYSICAL_BIRTH(bp);
1628         boolean_t needs_io;
1629         int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;
1630         int zio_priority;
1631         int scan_delay = 0;
1632 
1633         if (phys_birth <= scn->scn_phys.scn_min_txg ||
1634             phys_birth >= scn->scn_phys.scn_max_txg)
1635                 return (0);
1636 
1637         count_block(dp->dp_blkstats, bp);
1638 
1639         ASSERT(DSL_SCAN_IS_SCRUB_RESILVER(scn));
1640         if (scn->scn_phys.scn_func == POOL_SCAN_SCRUB) {
1641                 zio_flags |= ZIO_FLAG_SCRUB;
1642                 zio_priority = ZIO_PRIORITY_SCRUB;
1643                 needs_io = B_TRUE;
1644                 scan_delay = zfs_scrub_delay;
1645         } else {
1646                 ASSERT3U(scn->scn_phys.scn_func, ==, POOL_SCAN_RESILVER);
1647                 zio_flags |= ZIO_FLAG_RESILVER;
1648                 zio_priority = ZIO_PRIORITY_RESILVER;
1649                 needs_io = B_FALSE;
1650                 scan_delay = zfs_resilver_delay;
1651         }
1652 
1653         /* If it's an intent log block, failure is expected. */
1654         if (zb->zb_level == ZB_ZIL_LEVEL)
1655                 zio_flags |= ZIO_FLAG_SPECULATIVE;
1656 
1657         for (int d = 0; d < BP_GET_NDVAS(bp); d++) {
1658                 vdev_t *vd = vdev_lookup_top(spa,
1659                     DVA_GET_VDEV(&bp->blk_dva[d]));
1660 
1661                 /*
1662                  * Keep track of how much data we've examined so that
1663                  * zpool(1M) status can make useful progress reports.
1664                  */
1665                 scn->scn_phys.scn_examined += DVA_GET_ASIZE(&bp->blk_dva[d]);
1666                 spa->spa_scan_pass_exam += DVA_GET_ASIZE(&bp->blk_dva[d]);
1667 
1668                 /* if it's a resilver, this may not be in the target range */


1686 
1687         if (needs_io && !zfs_no_scrub_io) {
1688                 vdev_t *rvd = spa->spa_root_vdev;
1689                 uint64_t maxinflight = rvd->vdev_children * zfs_top_maxinflight;
1690                 void *data = zio_data_buf_alloc(size);
1691 
1692                 mutex_enter(&spa->spa_scrub_lock);
1693                 while (spa->spa_scrub_inflight >= maxinflight)
1694                         cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
1695                 spa->spa_scrub_inflight++;
1696                 mutex_exit(&spa->spa_scrub_lock);
1697 
1698                 /*
1699                  * If we're seeing recent (zfs_scan_idle) "important" I/Os
1700                  * then throttle our workload to limit the impact of a scan.
1701                  */
1702                 if (ddi_get_lbolt64() - spa->spa_last_io <= zfs_scan_idle)
1703                         delay(scan_delay);
1704 
1705                 zio_nowait(zio_read(NULL, spa, bp, data, size,
1706                     dsl_scan_scrub_done, NULL, zio_priority,
1707                     zio_flags, zb));
1708         }
1709 
1710         /* do not relocate this block */
1711         return (0);
1712 }
1713 
1714 int
1715 dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
1716 {
1717         spa_t *spa = dp->dp_spa;
1718 
1719         /*
1720          * Purge all vdev caches and probe all devices.  We do this here
1721          * rather than in sync context because this requires a writer lock
1722          * on the spa_config lock, which we can't do from sync context.  The
1723          * spa_scrub_reopen flag indicates that vdev_open() should not
1724          * attempt to start another scrub.
1725          */
1726         spa_vdev_state_enter(spa, SCL_NONE);


1610         spa->spa_scrub_inflight--;
1611         cv_broadcast(&spa->spa_scrub_io_cv);
1612 
1613         if (zio->io_error && (zio->io_error != ECKSUM ||
1614             !(zio->io_flags & ZIO_FLAG_SPECULATIVE))) {
1615                 spa->spa_dsl_pool->dp_scan->scn_phys.scn_errors++;
1616         }
1617         mutex_exit(&spa->spa_scrub_lock);
1618 }
1619 
1620 static int
1621 dsl_scan_scrub_cb(dsl_pool_t *dp,
1622     const blkptr_t *bp, const zbookmark_t *zb)
1623 {
1624         dsl_scan_t *scn = dp->dp_scan;
1625         size_t size = BP_GET_PSIZE(bp);
1626         spa_t *spa = dp->dp_spa;
1627         uint64_t phys_birth = BP_PHYSICAL_BIRTH(bp);
1628         boolean_t needs_io;
1629         int zio_flags = ZIO_FLAG_SCAN_THREAD | ZIO_FLAG_RAW | ZIO_FLAG_CANFAIL;

1630         int scan_delay = 0;
1631 
1632         if (phys_birth <= scn->scn_phys.scn_min_txg ||
1633             phys_birth >= scn->scn_phys.scn_max_txg)
1634                 return (0);
1635 
1636         count_block(dp->dp_blkstats, bp);
1637 
1638         ASSERT(DSL_SCAN_IS_SCRUB_RESILVER(scn));
1639         if (scn->scn_phys.scn_func == POOL_SCAN_SCRUB) {
1640                 zio_flags |= ZIO_FLAG_SCRUB;

1641                 needs_io = B_TRUE;
1642                 scan_delay = zfs_scrub_delay;
1643         } else {
1644                 ASSERT3U(scn->scn_phys.scn_func, ==, POOL_SCAN_RESILVER);
1645                 zio_flags |= ZIO_FLAG_RESILVER;

1646                 needs_io = B_FALSE;
1647                 scan_delay = zfs_resilver_delay;
1648         }
1649 
1650         /* If it's an intent log block, failure is expected. */
1651         if (zb->zb_level == ZB_ZIL_LEVEL)
1652                 zio_flags |= ZIO_FLAG_SPECULATIVE;
1653 
1654         for (int d = 0; d < BP_GET_NDVAS(bp); d++) {
1655                 vdev_t *vd = vdev_lookup_top(spa,
1656                     DVA_GET_VDEV(&bp->blk_dva[d]));
1657 
1658                 /*
1659                  * Keep track of how much data we've examined so that
1660                  * zpool(1M) status can make useful progress reports.
1661                  */
1662                 scn->scn_phys.scn_examined += DVA_GET_ASIZE(&bp->blk_dva[d]);
1663                 spa->spa_scan_pass_exam += DVA_GET_ASIZE(&bp->blk_dva[d]);
1664 
1665                 /* if it's a resilver, this may not be in the target range */


1683 
1684         if (needs_io && !zfs_no_scrub_io) {
1685                 vdev_t *rvd = spa->spa_root_vdev;
1686                 uint64_t maxinflight = rvd->vdev_children * zfs_top_maxinflight;
1687                 void *data = zio_data_buf_alloc(size);
1688 
1689                 mutex_enter(&spa->spa_scrub_lock);
1690                 while (spa->spa_scrub_inflight >= maxinflight)
1691                         cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
1692                 spa->spa_scrub_inflight++;
1693                 mutex_exit(&spa->spa_scrub_lock);
1694 
1695                 /*
1696                  * If we're seeing recent (zfs_scan_idle) "important" I/Os
1697                  * then throttle our workload to limit the impact of a scan.
1698                  */
1699                 if (ddi_get_lbolt64() - spa->spa_last_io <= zfs_scan_idle)
1700                         delay(scan_delay);
1701 
1702                 zio_nowait(zio_read(NULL, spa, bp, data, size,
1703                     dsl_scan_scrub_done, NULL, ZIO_PRIORITY_SCRUB,
1704                     zio_flags, zb));
1705         }
1706 
1707         /* do not relocate this block */
1708         return (0);
1709 }
1710 
1711 int
1712 dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
1713 {
1714         spa_t *spa = dp->dp_spa;
1715 
1716         /*
1717          * Purge all vdev caches and probe all devices.  We do this here
1718          * rather than in sync context because this requires a writer lock
1719          * on the spa_config lock, which we can't do from sync context.  The
1720          * spa_scrub_reopen flag indicates that vdev_open() should not
1721          * attempt to start another scrub.
1722          */
1723         spa_vdev_state_enter(spa, SCL_NONE);