Print this page
4347 ZPL can use dmu_tx_assign(TXG_WAIT)
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>

@@ -109,15 +109,22 @@
  *      If you must call VN_RELE() within a tx then use VN_RELE_ASYNC().
  *
  *  (3) All range locks must be grabbed before calling dmu_tx_assign(),
  *      as they can span dmu_tx_assign() calls.
  *
- *  (4) Always pass TXG_NOWAIT as the second argument to dmu_tx_assign().
- *      This is critical because we don't want to block while holding locks.
+ *  (4) If ZPL locks are held, pass TXG_NOWAIT as the second argument to
+ *      dmu_tx_assign().  This is critical because we don't want to block
+ *      while holding locks.
+ *
+ *      If no ZPL locks are held (aside from ZFS_ENTER()), use TXG_WAIT.  This
+ *      reduces lock contention and CPU usage when we must wait (note that if
+ *      throughput is constrained by the storage, nearly every transaction
+ *      must wait).
+ *
  *      Note, in particular, that if a lock is sometimes acquired before
- *      the tx assigns, and sometimes after (e.g. z_lock), then failing to
- *      use a non-blocking assign can deadlock the system.  The scenario:
+ *      the tx assigns, and sometimes after (e.g. z_lock), then failing
+ *      to use a non-blocking assign can deadlock the system.  The scenario:
  *
  *      Thread A has grabbed a lock before calling dmu_tx_assign().
  *      Thread B is in an already-assigned tx, and blocks for this lock.
  *      Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
  *      forever, because the previous txg can't quiesce until B's tx commits.

@@ -726,11 +733,10 @@
          * and allows us to do more fine-grained space accounting.
          */
         while (n > 0) {
                 abuf = NULL;
                 woff = uio->uio_loffset;
-again:
                 if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
                     zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
                         if (abuf != NULL)
                                 dmu_return_arcbuf(abuf);
                         error = SET_ERROR(EDQUOT);

@@ -778,18 +784,13 @@
                  */
                 tx = dmu_tx_create(zfsvfs->z_os);
                 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
                 dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
                 zfs_sa_upgrade_txholds(tx, zp);
-                error = dmu_tx_assign(tx, TXG_NOWAIT);
+                error = dmu_tx_assign(tx, TXG_WAIT);
                 if (error) {
-                        if (error == ERESTART) {
-                                dmu_tx_wait(tx);
                                 dmu_tx_abort(tx);
-                                goto again;
-                        }
-                        dmu_tx_abort(tx);
                         if (abuf != NULL)
                                 dmu_return_arcbuf(abuf);
                         break;
                 }
 

@@ -3041,16 +3042,13 @@
         if (fuid_dirtied)
                 zfs_fuid_txhold(zfsvfs, tx);
 
         zfs_sa_upgrade_txholds(tx, zp);
 
-        err = dmu_tx_assign(tx, TXG_NOWAIT);
-        if (err) {
-                if (err == ERESTART)
-                        dmu_tx_wait(tx);
+        err = dmu_tx_assign(tx, TXG_WAIT);
+        if (err)
                 goto out;
-        }
 
         count = 0;
         /*
          * Set each attribute requested.
          * We group settings according to the locks they need to acquire.

@@ -4138,24 +4136,18 @@
         if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
             zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
                 err = SET_ERROR(EDQUOT);
                 goto out;
         }
-top:
         tx = dmu_tx_create(zfsvfs->z_os);
         dmu_tx_hold_write(tx, zp->z_id, off, len);
 
         dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
         zfs_sa_upgrade_txholds(tx, zp);
-        err = dmu_tx_assign(tx, TXG_NOWAIT);
+        err = dmu_tx_assign(tx, TXG_WAIT);
         if (err != 0) {
-                if (err == ERESTART) {
-                        dmu_tx_wait(tx);
                         dmu_tx_abort(tx);
-                        goto top;
-                }
-                dmu_tx_abort(tx);
                 goto out;
         }
 
         if (zp->z_blksz <= PAGESIZE) {
                 caddr_t va = zfs_map_page(pp, S_READ);