Print this page
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
@@ -121,11 +121,15 @@
* Thread B is in an already-assigned tx, and blocks for this lock.
* Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
* forever, because the previous txg can't quiesce until B's tx commits.
*
* If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
- * then drop all locks, call dmu_tx_wait(), and try again.
+ * then drop all locks, call dmu_tx_wait(), and try again. On subsequent
+ * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT,
+ * to indicate that this operation has already called dmu_tx_wait().
+ * This will ensure that we don't retry forever, waiting a short bit
+ * each time.
*
* (5) If the operation succeeded, generate the intent log entry for it
* before dropping locks. This ensures that the ordering of events
* in the intent log matches the order in which they actually occurred.
* During ZIL replay the zfs_log_* functions will update the sequence
@@ -143,16 +147,17 @@
* top:
* zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD())
* rw_enter(...); // grab any other locks you need
* tx = dmu_tx_create(...); // get DMU tx
* dmu_tx_hold_*(); // hold each object you might modify
- * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign
+ * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
* if (error) {
* rw_exit(...); // drop locks
* zfs_dirent_unlock(dl); // unlock directory entry
* VN_RELE(...); // release held vnodes
* if (error == ERESTART) {
+ * waited = B_TRUE;
* dmu_tx_wait(tx);
* dmu_tx_abort(tx);
* goto top;
* }
* dmu_tx_abort(tx); // abort DMU tx
@@ -1313,10 +1318,11 @@
uid_t uid;
gid_t gid = crgetgid(cr);
zfs_acl_ids_t acl_ids;
boolean_t fuid_dirtied;
boolean_t have_acl = B_FALSE;
+ boolean_t waited = B_FALSE;
/*
* If we have an ephemeral id, ACL, or XVATTR then
* make sure file system is at proper version
*/
@@ -1433,14 +1439,15 @@
if (!zfsvfs->z_use_sa &&
acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
0, acl_ids.z_aclp->z_acl_bytes);
}
- error = dmu_tx_assign(tx, TXG_NOWAIT);
+ error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
+ waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
zfs_acl_ids_free(&acl_ids);
@@ -1568,10 +1575,11 @@
uint64_t txtype;
pathname_t *realnmp = NULL;
pathname_t realnm;
int error;
int zflg = ZEXISTS;
+ boolean_t waited = B_FALSE;
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(dzp);
zilog = zfsvfs->z_log;
@@ -1656,17 +1664,18 @@
mutex_exit(&zp->z_lock);
/* charge as an update -- would be nice not to charge at all */
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
- error = dmu_tx_assign(tx, TXG_NOWAIT);
+ error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
VN_RELE(vp);
if (xzp)
VN_RELE(ZTOV(xzp));
if (error == ERESTART) {
+ waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
if (realnmp)
@@ -1796,10 +1805,11 @@
ksid_t *ksid;
uid_t uid;
gid_t gid = crgetgid(cr);
zfs_acl_ids_t acl_ids;
boolean_t fuid_dirtied;
+ boolean_t waited = B_FALSE;
ASSERT(vap->va_type == VDIR);
/*
* If we have an ephemeral id, ACL, or XVATTR then
@@ -1892,14 +1902,15 @@
}
dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
ZFS_SA_BASE_ATTR_SIZE);
- error = dmu_tx_assign(tx, TXG_NOWAIT);
+ error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
+ waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
zfs_acl_ids_free(&acl_ids);
@@ -1971,10 +1982,11 @@
zilog_t *zilog;
zfs_dirlock_t *dl;
dmu_tx_t *tx;
int error;
int zflg = ZEXISTS;
+ boolean_t waited = B_FALSE;
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(dzp);
zilog = zfsvfs->z_log;
@@ -2026,17 +2038,18 @@
dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
zfs_sa_upgrade_txholds(tx, zp);
zfs_sa_upgrade_txholds(tx, dzp);
- error = dmu_tx_assign(tx, TXG_NOWAIT);
+ error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
rw_exit(&zp->z_parent_lock);
rw_exit(&zp->z_name_lock);
zfs_dirent_unlock(dl);
VN_RELE(vp);
if (error == ERESTART) {
+ waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
dmu_tx_abort(tx);
@@ -3360,10 +3373,11 @@
dmu_tx_t *tx;
zfs_zlock_t *zl;
int cmp, serr, terr;
int error = 0;
int zflg = 0;
+ boolean_t waited = B_FALSE;
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(sdzp);
zilog = zfsvfs->z_log;
@@ -3597,11 +3611,11 @@
zfs_sa_upgrade_txholds(tx, tzp);
}
zfs_sa_upgrade_txholds(tx, szp);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
- error = dmu_tx_assign(tx, TXG_NOWAIT);
+ error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
if (zl != NULL)
zfs_rename_unlock(&zl);
zfs_dirent_unlock(sdl);
zfs_dirent_unlock(tdl);
@@ -3611,10 +3625,11 @@
VN_RELE(ZTOV(szp));
if (tzp)
VN_RELE(ZTOV(tzp));
if (error == ERESTART) {
+ waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
dmu_tx_abort(tx);
@@ -3716,10 +3731,11 @@
int error;
int zflg = ZNEW;
zfs_acl_ids_t acl_ids;
boolean_t fuid_dirtied;
uint64_t txtype = TX_SYMLINK;
+ boolean_t waited = B_FALSE;
ASSERT(vap->va_type == VLNK);
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(dzp);
@@ -3778,14 +3794,15 @@
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
acl_ids.z_aclp->z_acl_bytes);
}
if (fuid_dirtied)
zfs_fuid_txhold(zfsvfs, tx);
- error = dmu_tx_assign(tx, TXG_NOWAIT);
+ error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
+ waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
zfs_acl_ids_free(&acl_ids);
@@ -3908,10 +3925,11 @@
vnode_t *realvp;
int error;
int zf = ZNEW;
uint64_t parent;
uid_t owner;
+ boolean_t waited = B_FALSE;
ASSERT(tdvp->v_type == VDIR);
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(dzp);
@@ -3997,14 +4015,15 @@
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
zfs_sa_upgrade_txholds(tx, szp);
zfs_sa_upgrade_txholds(tx, dzp);
- error = dmu_tx_assign(tx, TXG_NOWAIT);
+ error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
+ waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
dmu_tx_abort(tx);