Print this page
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
@@ -43,19 +43,19 @@
* transaction group states: open, quiescing, or syncing. At any given time,
* there may be an active txg associated with each state; each active txg may
* either be processing, or blocked waiting to enter the next state. There may
* be up to three active txgs, and there is always a txg in the open state
* (though it may be blocked waiting to enter the quiescing state). In broad
- * strokes, transactions — operations that change in-memory structures — are
+ * strokes, transactions -- operations that change in-memory structures -- are
* accepted into the txg in the open state, and are completed while the txg is
* in the open or quiescing states. The accumulated changes are written to
* disk in the syncing state.
*
* Open
*
* When a new txg becomes active, it first enters the open state. New
- * transactions — updates to in-memory structures — are assigned to the
+ * transactions -- updates to in-memory structures -- are assigned to the
* currently open txg. There is always a txg in the open state so that ZFS can
* accept new changes (though the txg may refuse new changes if it has hit
* some limit). ZFS advances the open txg to the next state for a variety of
* reasons such as it hitting a time or size threshold, or the execution of an
* administrative action that must be completed in the syncing state.
@@ -362,10 +362,11 @@
for (c = 0; c < max_ncpus; c++)
mutex_enter(&tx->tx_cpu[c].tc_open_lock);
ASSERT(txg == tx->tx_open_txg);
tx->tx_open_txg++;
+ tx->tx_open_time = gethrtime();
DTRACE_PROBE2(txg__quiescing, dsl_pool_t *, dp, uint64_t, txg);
DTRACE_PROBE2(txg__opened, dsl_pool_t *, dp, uint64_t, tx->tx_open_txg);
/*
@@ -452,11 +453,12 @@
txg_thread_enter(tx, &cpr);
start = delta = 0;
for (;;) {
- uint64_t timer, timeout = zfs_txg_timeout * hz;
+ uint64_t timeout = zfs_txg_timeout * hz;
+ uint64_t timer;
uint64_t txg;
/*
* We sync when we're scanning, there's someone waiting
* on us, or the quiesce thread has handed off a txg to
@@ -464,11 +466,12 @@
*/
timer = (delta >= timeout ? 0 : timeout - delta);
while (!dsl_scan_active(dp->dp_scan) &&
!tx->tx_exiting && timer > 0 &&
tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
- tx->tx_quiesced_txg == 0) {
+ tx->tx_quiesced_txg == 0 &&
+ dp->dp_dirty_total < zfs_dirty_data_sync) {
dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",
tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp);
txg_thread_wait(tx, &cpr, &tx->tx_sync_more_cv, timer);
delta = ddi_get_lbolt() - start;
timer = (delta > timeout ? 0 : timeout - delta);
@@ -639,10 +642,32 @@
while (tx->tx_open_txg < txg) {
cv_broadcast(&tx->tx_quiesce_more_cv);
cv_wait(&tx->tx_quiesce_done_cv, &tx->tx_sync_lock);
}
mutex_exit(&tx->tx_sync_lock);
+}
+
+/*
+ * If there isn't a txg syncing or in the pipeline, push another txg through
+ * the pipeline by queiscing the open txg.
+ */
+void
+txg_kick(dsl_pool_t *dp)
+{
+ tx_state_t *tx = &dp->dp_tx;
+
+ ASSERT(!dsl_pool_config_held(dp));
+
+ mutex_enter(&tx->tx_sync_lock);
+ if (tx->tx_syncing_txg == 0 &&
+ tx->tx_quiesce_txg_waiting <= tx->tx_open_txg &&
+ tx->tx_sync_txg_waiting <= tx->tx_synced_txg &&
+ tx->tx_quiesced_txg <= tx->tx_synced_txg) {
+ tx->tx_quiesce_txg_waiting = tx->tx_open_txg + 1;
+ cv_broadcast(&tx->tx_quiesce_more_cv);
+ }
+ mutex_exit(&tx->tx_sync_lock);
}
boolean_t
txg_stalled(dsl_pool_t *dp)
{