Print this page
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
*** 248,276 ****
* otherwise-fatal errors, typically caused by on-disk corruption. When
* set, calls to zfs_panic_recover() will turn into warning messages.
*/
int zfs_recover = 0;
! extern int zfs_txg_synctime_ms;
/*
! * Expiration time in units of zfs_txg_synctime_ms. This value has two
! * meanings. First it is used to determine when the spa_deadman logic
! * should fire. By default the spa_deadman will fire if spa_sync has
! * not completed in 1000 * zfs_txg_synctime_ms (i.e. 1000 seconds).
! * Secondly, the value determines if an I/O is considered "hung".
! * Any I/O that has not completed in zfs_deadman_synctime is considered
! * "hung" resulting in a system panic.
*/
! uint64_t zfs_deadman_synctime = 1000ULL;
/*
* Override the zfs deadman behavior via /etc/system. By default the
* deadman is enabled except on VMware and sparc deployments.
*/
int zfs_deadman_enabled = -1;
/*
* ==========================================================================
* SPA config locking
* ==========================================================================
--- 248,289 ----
* otherwise-fatal errors, typically caused by on-disk corruption. When
* set, calls to zfs_panic_recover() will turn into warning messages.
*/
int zfs_recover = 0;
! /*
! * Expiration time in milliseconds. This value has two meanings. First it is
! * used to determine when the spa_deadman() logic should fire. By default the
! * spa_deadman() will fire if spa_sync() has not completed in 1000 seconds.
! * Secondly, the value determines if an I/O is considered "hung". Any I/O that
! * has not completed in zfs_deadman_synctime_ms is considered "hung" resulting
! * in a system panic.
! */
! uint64_t zfs_deadman_synctime_ms = 1000000ULL;
/*
! * Check time in milliseconds. This defines the frequency at which we check
! * for hung I/O.
*/
! uint64_t zfs_deadman_checktime_ms = 5000ULL;
/*
* Override the zfs deadman behavior via /etc/system. By default the
* deadman is enabled except on VMware and sparc deployments.
*/
int zfs_deadman_enabled = -1;
+ /*
+ * The worst case is single-sector max-parity RAID-Z blocks, in which
+ * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1)
+ * times the size; so just assume that. Add to this the fact that
+ * we can have up to 3 DVAs per bp, and one more factor of 2 because
+ * the block may be dittoed with up to 3 DVAs by ddt_sync(). All together,
+ * the worst case is:
+ * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2 == 24
+ */
+ int spa_asize_inflation = 24;
/*
* ==========================================================================
* SPA config locking
* ==========================================================================
*** 497,516 ****
hdlr.cyh_func = spa_deadman;
hdlr.cyh_arg = spa;
hdlr.cyh_level = CY_LOW_LEVEL;
! spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime *
! zfs_txg_synctime_ms);
/*
* This determines how often we need to check for hung I/Os after
* the cyclic has already fired. Since checking for hung I/Os is
* an expensive operation we don't want to check too frequently.
! * Instead wait for 5 synctimes before checking again.
*/
! when.cyt_interval = MSEC2NSEC(5 * zfs_txg_synctime_ms);
when.cyt_when = CY_INFINITY;
mutex_enter(&cpu_lock);
spa->spa_deadman_cycid = cyclic_add(&hdlr, &when);
mutex_exit(&cpu_lock);
--- 510,528 ----
hdlr.cyh_func = spa_deadman;
hdlr.cyh_arg = spa;
hdlr.cyh_level = CY_LOW_LEVEL;
! spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms);
/*
* This determines how often we need to check for hung I/Os after
* the cyclic has already fired. Since checking for hung I/Os is
* an expensive operation we don't want to check too frequently.
! * Instead wait for 5 seconds before checking again.
*/
! when.cyt_interval = MSEC2NSEC(zfs_deadman_checktime_ms);
when.cyt_when = CY_INFINITY;
mutex_enter(&cpu_lock);
spa->spa_deadman_cycid = cyclic_add(&hdlr, &when);
mutex_exit(&cpu_lock);
*** 1497,1514 ****
/* ARGSUSED */
uint64_t
spa_get_asize(spa_t *spa, uint64_t lsize)
{
! /*
! * The worst case is single-sector max-parity RAID-Z blocks, in which
! * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1)
! * times the size; so just assume that. Add to this the fact that
! * we can have up to 3 DVAs per bp, and one more factor of 2 because
! * the block may be dittoed with up to 3 DVAs by ddt_sync().
! */
! return (lsize * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2);
}
uint64_t
spa_get_dspace(spa_t *spa)
{
--- 1509,1519 ----
/* ARGSUSED */
uint64_t
spa_get_asize(spa_t *spa, uint64_t lsize)
{
! return (lsize * spa_asize_inflation);
}
uint64_t
spa_get_dspace(spa_t *spa)
{