Print this page
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>

@@ -43,19 +43,19 @@
  * transaction group states: open, quiescing, or syncing. At any given time,
  * there may be an active txg associated with each state; each active txg may
  * either be processing, or blocked waiting to enter the next state. There may
  * be up to three active txgs, and there is always a txg in the open state
  * (though it may be blocked waiting to enter the quiescing state). In broad
- * strokes, transactions — operations that change in-memory structures — are
+ * strokes, transactions -- operations that change in-memory structures -- are
  * accepted into the txg in the open state, and are completed while the txg is
  * in the open or quiescing states. The accumulated changes are written to
  * disk in the syncing state.
  *
  * Open
  *
  * When a new txg becomes active, it first enters the open state. New
- * transactions — updates to in-memory structures — are assigned to the
+ * transactions -- updates to in-memory structures -- are assigned to the
  * currently open txg. There is always a txg in the open state so that ZFS can
  * accept new changes (though the txg may refuse new changes if it has hit
  * some limit). ZFS advances the open txg to the next state for a variety of
  * reasons such as it hitting a time or size threshold, or the execution of an
  * administrative action that must be completed in the syncing state.

@@ -362,10 +362,11 @@
         for (c = 0; c < max_ncpus; c++)
                 mutex_enter(&tx->tx_cpu[c].tc_open_lock);
 
         ASSERT(txg == tx->tx_open_txg);
         tx->tx_open_txg++;
+        tx->tx_open_time = gethrtime();
 
         DTRACE_PROBE2(txg__quiescing, dsl_pool_t *, dp, uint64_t, txg);
         DTRACE_PROBE2(txg__opened, dsl_pool_t *, dp, uint64_t, tx->tx_open_txg);
 
         /*

@@ -452,11 +453,12 @@
 
         txg_thread_enter(tx, &cpr);
 
         start = delta = 0;
         for (;;) {
-                uint64_t timer, timeout = zfs_txg_timeout * hz;
+                uint64_t timeout = zfs_txg_timeout * hz;
+                uint64_t timer;
                 uint64_t txg;
 
                 /*
                  * We sync when we're scanning, there's someone waiting
                  * on us, or the quiesce thread has handed off a txg to

@@ -464,11 +466,12 @@
                  */
                 timer = (delta >= timeout ? 0 : timeout - delta);
                 while (!dsl_scan_active(dp->dp_scan) &&
                     !tx->tx_exiting && timer > 0 &&
                     tx->tx_synced_txg >= tx->tx_sync_txg_waiting &&
-                    tx->tx_quiesced_txg == 0) {
+                    tx->tx_quiesced_txg == 0 &&
+                    dp->dp_dirty_total < zfs_dirty_data_sync) {
                         dprintf("waiting; tx_synced=%llu waiting=%llu dp=%p\n",
                             tx->tx_synced_txg, tx->tx_sync_txg_waiting, dp);
                         txg_thread_wait(tx, &cpr, &tx->tx_sync_more_cv, timer);
                         delta = ddi_get_lbolt() - start;
                         timer = (delta > timeout ? 0 : timeout - delta);

@@ -639,10 +642,32 @@
         while (tx->tx_open_txg < txg) {
                 cv_broadcast(&tx->tx_quiesce_more_cv);
                 cv_wait(&tx->tx_quiesce_done_cv, &tx->tx_sync_lock);
         }
         mutex_exit(&tx->tx_sync_lock);
+}
+
+/*
+ * If there isn't a txg syncing or in the pipeline, push another txg through
+ * the pipeline by queiscing the open txg.
+ */
+void
+txg_kick(dsl_pool_t *dp)
+{
+        tx_state_t *tx = &dp->dp_tx;
+
+        ASSERT(!dsl_pool_config_held(dp));
+
+        mutex_enter(&tx->tx_sync_lock);
+        if (tx->tx_syncing_txg == 0 &&
+            tx->tx_quiesce_txg_waiting <= tx->tx_open_txg &&
+            tx->tx_sync_txg_waiting <= tx->tx_synced_txg &&
+            tx->tx_quiesced_txg <= tx->tx_synced_txg) {
+                tx->tx_quiesce_txg_waiting = tx->tx_open_txg + 1;
+                cv_broadcast(&tx->tx_quiesce_more_cv);
+        }
+        mutex_exit(&tx->tx_sync_lock);
 }
 
 boolean_t
 txg_stalled(dsl_pool_t *dp)
 {