Print this page
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>

*** 85,102 **** */ static int zfs_ccw_retry_interval = 300; typedef enum zti_modes { ZTI_MODE_FIXED, /* value is # of threads (min 1) */ - ZTI_MODE_ONLINE_PERCENT, /* value is % of online CPUs */ ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */ ZTI_MODE_NULL, /* don't create a taskq */ ZTI_NMODES } zti_modes_t; #define ZTI_P(n, q) { ZTI_MODE_FIXED, (n), (q) } - #define ZTI_PCT(n) { ZTI_MODE_ONLINE_PERCENT, (n), 1 } #define ZTI_BATCH { ZTI_MODE_BATCH, 0, 1 } #define ZTI_NULL { ZTI_MODE_NULL, 0, 0 } #define ZTI_N(n) ZTI_P(n, 1) #define ZTI_ONE ZTI_N(1) --- 85,100 ----
*** 144,154 **** static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config, spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, char **ereport); static void spa_vdev_resilver_done(spa_t *spa); ! uint_t zio_taskq_batch_pct = 100; /* 1 thread per cpu in pset */ id_t zio_taskq_psrset_bind = PS_NONE; boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */ uint_t zio_taskq_basedc = 80; /* base duty cycle */ boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */ --- 142,152 ---- static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config, spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig, char **ereport); static void spa_vdev_resilver_done(spa_t *spa); ! uint_t zio_taskq_batch_pct = 75; /* 1 thread per cpu in pset */ id_t zio_taskq_psrset_bind = PS_NONE; boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */ uint_t zio_taskq_basedc = 80; /* base duty cycle */ boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */
*** 840,852 **** ASSERT3U(count, >, 0); tqs->stqs_count = count; tqs->stqs_taskq = kmem_alloc(count * sizeof (taskq_t *), KM_SLEEP); - for (uint_t i = 0; i < count; i++) { - taskq_t *tq; - switch (mode) { case ZTI_MODE_FIXED: ASSERT3U(value, >=, 1); value = MAX(value, 1); break; --- 838,847 ----
*** 855,875 **** batch = B_TRUE; flags |= TASKQ_THREADS_CPU_PCT; value = zio_taskq_batch_pct; break; - case ZTI_MODE_ONLINE_PERCENT: - flags |= TASKQ_THREADS_CPU_PCT; - break; - default: panic("unrecognized mode for %s_%s taskq (%u:%u) in " "spa_activate()", zio_type_name[t], zio_taskq_types[q], mode, value); break; } if (count > 1) { (void) snprintf(name, sizeof (name), "%s_%s_%u", zio_type_name[t], zio_taskq_types[q], i); } else { (void) snprintf(name, sizeof (name), "%s_%s", --- 850,869 ---- batch = B_TRUE; flags |= TASKQ_THREADS_CPU_PCT; value = zio_taskq_batch_pct; break; default: panic("unrecognized mode for %s_%s taskq (%u:%u) in " "spa_activate()", zio_type_name[t], zio_taskq_types[q], mode, value); break; } + for (uint_t i = 0; i < count; i++) { + taskq_t *tq; + if (count > 1) { (void) snprintf(name, sizeof (name), "%s_%s_%u", zio_type_name[t], zio_taskq_types[q], i); } else { (void) snprintf(name, sizeof (name), "%s_%s",
*** 881,891 **** flags |= TASKQ_DC_BATCH; tq = taskq_create_sysdc(name, value, 50, INT_MAX, spa->spa_proc, zio_taskq_basedc, flags); } else { ! tq = taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX, spa->spa_proc, flags); } tqs->stqs_taskq[i] = tq; } --- 875,894 ---- flags |= TASKQ_DC_BATCH; tq = taskq_create_sysdc(name, value, 50, INT_MAX, spa->spa_proc, zio_taskq_basedc, flags); } else { ! pri_t pri = maxclsyspri; ! /* ! * The write issue taskq can be extremely CPU ! * intensive. Run it at slightly lower priority ! * than the other taskqs. ! */ ! if (t == ZIO_TYPE_WRITE && q == ZIO_TASKQ_ISSUE) ! pri--; ! ! tq = taskq_create_proc(name, value, pri, 50, INT_MAX, spa->spa_proc, flags); } tqs->stqs_taskq[i] = tq; }
*** 5735,5745 **** --- 5738,5774 ---- zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp, zio->io_flags)); return (0); } + /* + * Note: this simple function is not inlined to make it easier to dtrace the + * amount of time spent syncing frees. + */ static void + spa_sync_frees(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx) + { + zio_t *zio = zio_root(spa, NULL, NULL, 0); + bplist_iterate(bpl, spa_free_sync_cb, zio, tx); + VERIFY(zio_wait(zio) == 0); + } + + /* + * Note: this simple function is not inlined to make it easier to dtrace the + * amount of time spent syncing deferred frees. + */ + static void + spa_sync_deferred_frees(spa_t *spa, dmu_tx_t *tx) + { + zio_t *zio = zio_root(spa, NULL, NULL, 0); + VERIFY3U(bpobj_iterate(&spa->spa_deferred_bpobj, + spa_free_sync_cb, zio, tx), ==, 0); + VERIFY0(zio_wait(zio)); + } + + + static void spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx) { char *packed = NULL; size_t bufsize; size_t nvsize = 0;
*** 6061,6071 **** void spa_sync(spa_t *spa, uint64_t txg) { dsl_pool_t *dp = spa->spa_dsl_pool; objset_t *mos = spa->spa_meta_objset; - bpobj_t *defer_bpo = &spa->spa_deferred_bpobj; bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK]; vdev_t *rvd = spa->spa_root_vdev; vdev_t *vd; dmu_tx_t *tx; int error; --- 6090,6099 ----
*** 6141,6154 **** if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || !txg_list_empty(&dp->dp_dirty_dirs, txg) || !txg_list_empty(&dp->dp_sync_tasks, txg) || ((dsl_scan_active(dp->dp_scan) || txg_sync_waiting(dp)) && !spa_shutting_down(spa))) { ! zio_t *zio = zio_root(spa, NULL, NULL, 0); ! VERIFY3U(bpobj_iterate(defer_bpo, ! spa_free_sync_cb, zio, tx), ==, 0); ! VERIFY0(zio_wait(zio)); } /* * Iterate to convergence. */ --- 6169,6179 ---- if (!txg_list_empty(&dp->dp_dirty_datasets, txg) || !txg_list_empty(&dp->dp_dirty_dirs, txg) || !txg_list_empty(&dp->dp_sync_tasks, txg) || ((dsl_scan_active(dp->dp_scan) || txg_sync_waiting(dp)) && !spa_shutting_down(spa))) { ! spa_sync_deferred_frees(spa, tx); } /* * Iterate to convergence. */
*** 6162,6178 **** ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); spa_errlog_sync(spa, txg); dsl_pool_sync(dp, txg); if (pass < zfs_sync_pass_deferred_free) { ! zio_t *zio = zio_root(spa, NULL, NULL, 0); ! bplist_iterate(free_bpl, spa_free_sync_cb, ! zio, tx); ! VERIFY(zio_wait(zio) == 0); } else { bplist_iterate(free_bpl, bpobj_enqueue_cb, ! defer_bpo, tx); } ddt_sync(spa, txg); dsl_scan_sync(dp, tx); --- 6187,6200 ---- ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE); spa_errlog_sync(spa, txg); dsl_pool_sync(dp, txg); if (pass < zfs_sync_pass_deferred_free) { ! spa_sync_frees(spa, free_bpl, tx); } else { bplist_iterate(free_bpl, bpobj_enqueue_cb, ! &spa->spa_deferred_bpobj, tx); } ddt_sync(spa, txg); dsl_scan_sync(dp, tx);