Print this page
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
*** 85,102 ****
*/
static int zfs_ccw_retry_interval = 300;
typedef enum zti_modes {
ZTI_MODE_FIXED, /* value is # of threads (min 1) */
- ZTI_MODE_ONLINE_PERCENT, /* value is % of online CPUs */
ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */
ZTI_MODE_NULL, /* don't create a taskq */
ZTI_NMODES
} zti_modes_t;
#define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) }
- #define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 }
#define ZTI_BATCH { ZTI_MODE_BATCH, 0, 1 }
#define ZTI_NULL { ZTI_MODE_NULL, 0, 0 }
#define ZTI_N(n) ZTI_P(n, 1)
#define ZTI_ONE ZTI_N(1)
--- 85,100 ----
*** 144,154 ****
static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
char **ereport);
static void spa_vdev_resilver_done(spa_t *spa);
! uint_t zio_taskq_batch_pct = 100; /* 1 thread per cpu in pset */
id_t zio_taskq_psrset_bind = PS_NONE;
boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */
uint_t zio_taskq_basedc = 80; /* base duty cycle */
boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */
--- 142,152 ----
static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
char **ereport);
static void spa_vdev_resilver_done(spa_t *spa);
! uint_t zio_taskq_batch_pct = 75; /* 1 thread per cpu in pset */
id_t zio_taskq_psrset_bind = PS_NONE;
boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */
uint_t zio_taskq_basedc = 80; /* base duty cycle */
boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */
*** 840,852 ****
ASSERT3U(count, >, 0);
tqs->stqs_count = count;
tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP);
- for (uint_t i = 0; i < count; i++) {
- taskq_t *tq;
-
switch (mode) {
case ZTI_MODE_FIXED:
ASSERT3U(value, >=, 1);
value = MAX(value, 1);
break;
--- 838,847 ----
*** 855,875 ****
batch = B_TRUE;
flags |= TASKQ_THREADS_CPU_PCT;
value = zio_taskq_batch_pct;
break;
- case ZTI_MODE_ONLINE_PERCENT:
- flags |= TASKQ_THREADS_CPU_PCT;
- break;
-
default:
panic("unrecognized mode for %s_%s taskq (%u:%u) in "
"spa_activate()",
zio_type_name[t], zio_taskq_types[q], mode, value);
break;
}
if (count > 1) {
(void) snprintf(name, sizeof (name), "%s_%s_%u",
zio_type_name[t], zio_taskq_types[q], i);
} else {
(void) snprintf(name, sizeof (name), "%s_%s",
--- 850,869 ----
batch = B_TRUE;
flags |= TASKQ_THREADS_CPU_PCT;
value = zio_taskq_batch_pct;
break;
default:
panic("unrecognized mode for %s_%s taskq (%u:%u) in "
"spa_activate()",
zio_type_name[t], zio_taskq_types[q], mode, value);
break;
}
+ for (uint_t i = 0; i < count; i++) {
+ taskq_t *tq;
+
if (count > 1) {
(void) snprintf(name, sizeof (name), "%s_%s_%u",
zio_type_name[t], zio_taskq_types[q], i);
} else {
(void) snprintf(name, sizeof (name), "%s_%s",
*** 881,891 ****
flags |= TASKQ_DC_BATCH;
tq = taskq_create_sysdc(name, value, 50, INT_MAX,
spa->spa_proc, zio_taskq_basedc, flags);
} else {
! tq = taskq_create_proc(name, value, maxclsyspri, 50,
INT_MAX, spa->spa_proc, flags);
}
tqs->stqs_taskq[i] = tq;
}
--- 875,894 ----
flags |= TASKQ_DC_BATCH;
tq = taskq_create_sysdc(name, value, 50, INT_MAX,
spa->spa_proc, zio_taskq_basedc, flags);
} else {
! pri_t pri = maxclsyspri;
! /*
! * The write issue taskq can be extremely CPU
! * intensive. Run it at slightly lower priority
! * than the other taskqs.
! */
! if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE)
! pri--;
!
! tq = taskq_create_proc(name, value, pri, 50,
INT_MAX, spa->spa_proc, flags);
}
tqs->stqs_taskq[i] = tq;
}
*** 5735,5745 ****
--- 5738,5774 ----
zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp,
zio->io_flags));
return (0);
}
+ /*
+ * Note: this simple function is not inlined to make it easier to dtrace the
+ * amount of time spent syncing frees.
+ */
static void
+ spa_sync_frees(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx)
+ {
+ zio_t *zio = zio_root(spa, NULL, NULL, 0);
+ bplist_iterate(bpl, spa_free_sync_cb, zio, tx);
+ VERIFY(zio_wait(zio) == 0);
+ }
+
+ /*
+ * Note: this simple function is not inlined to make it easier to dtrace the
+ * amount of time spent syncing deferred frees.
+ */
+ static void
+ spa_sync_deferred_frees(spa_t *spa, dmu_tx_t *tx)
+ {
+ zio_t *zio = zio_root(spa, NULL, NULL, 0);
+ VERIFY3U(bpobj_iterate(&spa->spa_deferred_bpobj,
+ spa_free_sync_cb, zio, tx), ==, 0);
+ VERIFY0(zio_wait(zio));
+ }
+
+
+ static void
spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
{
char *packed = NULL;
size_t bufsize;
size_t nvsize = 0;
*** 6061,6071 ****
void
spa_sync(spa_t *spa, uint64_t txg)
{
dsl_pool_t *dp = spa->spa_dsl_pool;
objset_t *mos = spa->spa_meta_objset;
- bpobj_t *defer_bpo = &spa->spa_deferred_bpobj;
bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *vd;
dmu_tx_t *tx;
int error;
--- 6090,6099 ----
*** 6141,6154 ****
if (!txg_list_empty(&dp->dp_dirty_datasets, txg) ||
!txg_list_empty(&dp->dp_dirty_dirs, txg) ||
!txg_list_empty(&dp->dp_sync_tasks, txg) ||
((dsl_scan_active(dp->dp_scan) ||
txg_sync_waiting(dp)) && !spa_shutting_down(spa))) {
! zio_t *zio = zio_root(spa, NULL, NULL, 0);
! VERIFY3U(bpobj_iterate(defer_bpo,
! spa_free_sync_cb, zio, tx), ==, 0);
! VERIFY0(zio_wait(zio));
}
/*
* Iterate to convergence.
*/
--- 6169,6179 ----
if (!txg_list_empty(&dp->dp_dirty_datasets, txg) ||
!txg_list_empty(&dp->dp_dirty_dirs, txg) ||
!txg_list_empty(&dp->dp_sync_tasks, txg) ||
((dsl_scan_active(dp->dp_scan) ||
txg_sync_waiting(dp)) && !spa_shutting_down(spa))) {
! spa_sync_deferred_frees(spa, tx);
}
/*
* Iterate to convergence.
*/
*** 6162,6178 ****
ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE);
spa_errlog_sync(spa, txg);
dsl_pool_sync(dp, txg);
if (pass < zfs_sync_pass_deferred_free) {
! zio_t *zio = zio_root(spa, NULL, NULL, 0);
! bplist_iterate(free_bpl, spa_free_sync_cb,
! zio, tx);
! VERIFY(zio_wait(zio) == 0);
} else {
bplist_iterate(free_bpl, bpobj_enqueue_cb,
! defer_bpo, tx);
}
ddt_sync(spa, txg);
dsl_scan_sync(dp, tx);
--- 6187,6200 ----
ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE);
spa_errlog_sync(spa, txg);
dsl_pool_sync(dp, txg);
if (pass < zfs_sync_pass_deferred_free) {
! spa_sync_frees(spa, free_bpl, tx);
} else {
bplist_iterate(free_bpl, bpobj_enqueue_cb,
! &spa->spa_deferred_bpobj, tx);
}
ddt_sync(spa, txg);
dsl_scan_sync(dp, tx);