Print this page
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
*** 121,131 ****
* Thread B is in an already-assigned tx, and blocks for this lock.
* Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
* forever, because the previous txg can't quiesce until B's tx commits.
*
* If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
! * then drop all locks, call dmu_tx_wait(), and try again.
*
* (5) If the operation succeeded, generate the intent log entry for it
* before dropping locks. This ensures that the ordering of events
* in the intent log matches the order in which they actually occurred.
* During ZIL replay the zfs_log_* functions will update the sequence
--- 121,135 ----
* Thread B is in an already-assigned tx, and blocks for this lock.
* Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
* forever, because the previous txg can't quiesce until B's tx commits.
*
* If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
! * then drop all locks, call dmu_tx_wait(), and try again. On subsequent
! * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT,
! * to indicate that this operation has already called dmu_tx_wait().
! * This will ensure that we don't retry forever, waiting a short bit
! * each time.
*
* (5) If the operation succeeded, generate the intent log entry for it
* before dropping locks. This ensures that the ordering of events
* in the intent log matches the order in which they actually occurred.
* During ZIL replay the zfs_log_* functions will update the sequence
*** 143,158 ****
* top:
* zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD())
* rw_enter(...); // grab any other locks you need
* tx = dmu_tx_create(...); // get DMU tx
* dmu_tx_hold_*(); // hold each object you might modify
! * error = dmu_tx_assign(tx, TXG_NOWAIT); // try to assign
* if (error) {
* rw_exit(...); // drop locks
* zfs_dirent_unlock(dl); // unlock directory entry
* VN_RELE(...); // release held vnodes
* if (error == ERESTART) {
* dmu_tx_wait(tx);
* dmu_tx_abort(tx);
* goto top;
* }
* dmu_tx_abort(tx); // abort DMU tx
--- 147,163 ----
* top:
* zfs_dirent_lock(&dl, ...) // lock directory entry (may VN_HOLD())
* rw_enter(...); // grab any other locks you need
* tx = dmu_tx_create(...); // get DMU tx
* dmu_tx_hold_*(); // hold each object you might modify
! * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
* if (error) {
* rw_exit(...); // drop locks
* zfs_dirent_unlock(dl); // unlock directory entry
* VN_RELE(...); // release held vnodes
* if (error == ERESTART) {
+ * waited = B_TRUE;
* dmu_tx_wait(tx);
* dmu_tx_abort(tx);
* goto top;
* }
* dmu_tx_abort(tx); // abort DMU tx
*** 1313,1322 ****
--- 1318,1328 ----
uid_t uid;
gid_t gid = crgetgid(cr);
zfs_acl_ids_t acl_ids;
boolean_t fuid_dirtied;
boolean_t have_acl = B_FALSE;
+ boolean_t waited = B_FALSE;
/*
* If we have an ephemeral id, ACL, or XVATTR then
* make sure file system is at proper version
*/
*** 1433,1446 ****
if (!zfsvfs->z_use_sa &&
acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
0, acl_ids.z_aclp->z_acl_bytes);
}
! error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
zfs_acl_ids_free(&acl_ids);
--- 1439,1453 ----
if (!zfsvfs->z_use_sa &&
acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
0, acl_ids.z_aclp->z_acl_bytes);
}
! error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
+ waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
zfs_acl_ids_free(&acl_ids);
*** 1568,1577 ****
--- 1575,1585 ----
uint64_t txtype;
pathname_t *realnmp = NULL;
pathname_t realnm;
int error;
int zflg = ZEXISTS;
+ boolean_t waited = B_FALSE;
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(dzp);
zilog = zfsvfs->z_log;
*** 1656,1672 ****
mutex_exit(&zp->z_lock);
/* charge as an update -- would be nice not to charge at all */
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
! error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
VN_RELE(vp);
if (xzp)
VN_RELE(ZTOV(xzp));
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
if (realnmp)
--- 1664,1681 ----
mutex_exit(&zp->z_lock);
/* charge as an update -- would be nice not to charge at all */
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
! error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
VN_RELE(vp);
if (xzp)
VN_RELE(ZTOV(xzp));
if (error == ERESTART) {
+ waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
if (realnmp)
*** 1796,1805 ****
--- 1805,1815 ----
ksid_t *ksid;
uid_t uid;
gid_t gid = crgetgid(cr);
zfs_acl_ids_t acl_ids;
boolean_t fuid_dirtied;
+ boolean_t waited = B_FALSE;
ASSERT(vap->va_type == VDIR);
/*
* If we have an ephemeral id, ACL, or XVATTR then
*** 1892,1905 ****
}
dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
ZFS_SA_BASE_ATTR_SIZE);
! error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
zfs_acl_ids_free(&acl_ids);
--- 1902,1916 ----
}
dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
ZFS_SA_BASE_ATTR_SIZE);
! error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
+ waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
zfs_acl_ids_free(&acl_ids);
*** 1971,1980 ****
--- 1982,1992 ----
zilog_t *zilog;
zfs_dirlock_t *dl;
dmu_tx_t *tx;
int error;
int zflg = ZEXISTS;
+ boolean_t waited = B_FALSE;
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(dzp);
zilog = zfsvfs->z_log;
*** 2026,2042 ****
dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
zfs_sa_upgrade_txholds(tx, zp);
zfs_sa_upgrade_txholds(tx, dzp);
! error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
rw_exit(&zp->z_parent_lock);
rw_exit(&zp->z_name_lock);
zfs_dirent_unlock(dl);
VN_RELE(vp);
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
dmu_tx_abort(tx);
--- 2038,2055 ----
dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
zfs_sa_upgrade_txholds(tx, zp);
zfs_sa_upgrade_txholds(tx, dzp);
! error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
rw_exit(&zp->z_parent_lock);
rw_exit(&zp->z_name_lock);
zfs_dirent_unlock(dl);
VN_RELE(vp);
if (error == ERESTART) {
+ waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
dmu_tx_abort(tx);
*** 3360,3369 ****
--- 3373,3383 ----
dmu_tx_t *tx;
zfs_zlock_t *zl;
int cmp, serr, terr;
int error = 0;
int zflg = 0;
+ boolean_t waited = B_FALSE;
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(sdzp);
zilog = zfsvfs->z_log;
*** 3597,3607 ****
zfs_sa_upgrade_txholds(tx, tzp);
}
zfs_sa_upgrade_txholds(tx, szp);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
! error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
if (zl != NULL)
zfs_rename_unlock(&zl);
zfs_dirent_unlock(sdl);
zfs_dirent_unlock(tdl);
--- 3611,3621 ----
zfs_sa_upgrade_txholds(tx, tzp);
}
zfs_sa_upgrade_txholds(tx, szp);
dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
! error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
if (zl != NULL)
zfs_rename_unlock(&zl);
zfs_dirent_unlock(sdl);
zfs_dirent_unlock(tdl);
*** 3611,3620 ****
--- 3625,3635 ----
VN_RELE(ZTOV(szp));
if (tzp)
VN_RELE(ZTOV(tzp));
if (error == ERESTART) {
+ waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
dmu_tx_abort(tx);
*** 3716,3725 ****
--- 3731,3741 ----
int error;
int zflg = ZNEW;
zfs_acl_ids_t acl_ids;
boolean_t fuid_dirtied;
uint64_t txtype = TX_SYMLINK;
+ boolean_t waited = B_FALSE;
ASSERT(vap->va_type == VLNK);
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(dzp);
*** 3778,3791 ****
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
acl_ids.z_aclp->z_acl_bytes);
}
if (fuid_dirtied)
zfs_fuid_txhold(zfsvfs, tx);
! error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
zfs_acl_ids_free(&acl_ids);
--- 3794,3808 ----
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
acl_ids.z_aclp->z_acl_bytes);
}
if (fuid_dirtied)
zfs_fuid_txhold(zfsvfs, tx);
! error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
+ waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
zfs_acl_ids_free(&acl_ids);
*** 3908,3917 ****
--- 3925,3935 ----
vnode_t *realvp;
int error;
int zf = ZNEW;
uint64_t parent;
uid_t owner;
+ boolean_t waited = B_FALSE;
ASSERT(tdvp->v_type == VDIR);
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(dzp);
*** 3997,4010 ****
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
zfs_sa_upgrade_txholds(tx, szp);
zfs_sa_upgrade_txholds(tx, dzp);
! error = dmu_tx_assign(tx, TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
dmu_tx_abort(tx);
--- 4015,4029 ----
tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
zfs_sa_upgrade_txholds(tx, szp);
zfs_sa_upgrade_txholds(tx, dzp);
! error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
if (error == ERESTART) {
+ waited = B_TRUE;
dmu_tx_wait(tx);
dmu_tx_abort(tx);
goto top;
}
dmu_tx_abort(tx);