Print this page
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>

*** 248,276 **** * otherwise-fatal errors, typically caused by on-disk corruption. When * set, calls to zfs_panic_recover() will turn into warning messages. */ int zfs_recover = 0; ! extern int zfs_txg_synctime_ms; /* ! * Expiration time in units of zfs_txg_synctime_ms. This value has two ! * meanings. First it is used to determine when the spa_deadman logic ! * should fire. By default the spa_deadman will fire if spa_sync has ! * not completed in 1000 * zfs_txg_synctime_ms (i.e. 1000 seconds). ! * Secondly, the value determines if an I/O is considered "hung". ! * Any I/O that has not completed in zfs_deadman_synctime is considered ! * "hung" resulting in a system panic. */ ! uint64_t zfs_deadman_synctime = 1000ULL; /* * Override the zfs deadman behavior via /etc/system. By default the * deadman is enabled except on VMware and sparc deployments. */ int zfs_deadman_enabled = -1; /* * ========================================================================== * SPA config locking * ========================================================================== --- 248,289 ---- * otherwise-fatal errors, typically caused by on-disk corruption. When * set, calls to zfs_panic_recover() will turn into warning messages. */ int zfs_recover = 0; ! /* ! * Expiration time in milliseconds. This value has two meanings. First it is ! * used to determine when the spa_deadman() logic should fire. By default the ! * spa_deadman() will fire if spa_sync() has not completed in 1000 seconds. ! * Secondly, the value determines if an I/O is considered "hung". Any I/O that ! * has not completed in zfs_deadman_synctime_ms is considered "hung" resulting ! * in a system panic. ! */ ! uint64_t zfs_deadman_synctime_ms = 1000000ULL; /* ! * Check time in milliseconds. This defines the frequency at which we check ! * for hung I/O. */ ! uint64_t zfs_deadman_checktime_ms = 5000ULL; /* * Override the zfs deadman behavior via /etc/system. By default the * deadman is enabled except on VMware and sparc deployments. */ int zfs_deadman_enabled = -1; + /* + * The worst case is single-sector max-parity RAID-Z blocks, in which + * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1) + * times the size; so just assume that. Add to this the fact that + * we can have up to 3 DVAs per bp, and one more factor of 2 because + * the block may be dittoed with up to 3 DVAs by ddt_sync(). All together, + * the worst case is: + * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2 == 24 + */ + int spa_asize_inflation = 24; /* * ========================================================================== * SPA config locking * ==========================================================================
*** 497,516 **** hdlr.cyh_func = spa_deadman; hdlr.cyh_arg = spa; hdlr.cyh_level = CY_LOW_LEVEL; ! spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime * ! zfs_txg_synctime_ms); /* * This determines how often we need to check for hung I/Os after * the cyclic has already fired. Since checking for hung I/Os is * an expensive operation we don't want to check too frequently. ! * Instead wait for 5 synctimes before checking again. */ ! when.cyt_interval = MSEC2NSEC(5 * zfs_txg_synctime_ms); when.cyt_when = CY_INFINITY; mutex_enter(&cpu_lock); spa->spa_deadman_cycid = cyclic_add(&hdlr, &when); mutex_exit(&cpu_lock); --- 510,528 ---- hdlr.cyh_func = spa_deadman; hdlr.cyh_arg = spa; hdlr.cyh_level = CY_LOW_LEVEL; ! spa->spa_deadman_synctime = MSEC2NSEC(zfs_deadman_synctime_ms); /* * This determines how often we need to check for hung I/Os after * the cyclic has already fired. Since checking for hung I/Os is * an expensive operation we don't want to check too frequently. ! * Instead wait for 5 seconds before checking again. */ ! when.cyt_interval = MSEC2NSEC(zfs_deadman_checktime_ms); when.cyt_when = CY_INFINITY; mutex_enter(&cpu_lock); spa->spa_deadman_cycid = cyclic_add(&hdlr, &when); mutex_exit(&cpu_lock);
*** 1497,1514 **** /* ARGSUSED */ uint64_t spa_get_asize(spa_t *spa, uint64_t lsize) { ! /* ! * The worst case is single-sector max-parity RAID-Z blocks, in which ! * case the space requirement is exactly (VDEV_RAIDZ_MAXPARITY + 1) ! * times the size; so just assume that. Add to this the fact that ! * we can have up to 3 DVAs per bp, and one more factor of 2 because ! * the block may be dittoed with up to 3 DVAs by ddt_sync(). ! */ ! return (lsize * (VDEV_RAIDZ_MAXPARITY + 1) * SPA_DVAS_PER_BP * 2); } uint64_t spa_get_dspace(spa_t *spa) { --- 1509,1519 ---- /* ARGSUSED */ uint64_t spa_get_asize(spa_t *spa, uint64_t lsize) { ! return (lsize * spa_asize_inflation); } uint64_t spa_get_dspace(spa_t *spa) {