Print this page
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
*** 369,385 ****
*/
static int
dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
{
- dsl_pool_t *dp = NULL;
dmu_buf_t **dbp;
uint64_t blkid, nblks, i;
uint32_t dbuf_flags;
int err;
zio_t *zio;
- hrtime_t start;
ASSERT(length <= DMU_MAX_ACCESS);
dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT;
if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz)
--- 369,383 ----
*** 403,415 ****
}
nblks = 1;
}
dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP);
- if (dn->dn_objset->os_dsl_dataset)
- dp = dn->dn_objset->os_dsl_dataset->ds_dir->dd_pool;
- start = gethrtime();
zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL);
blkid = dbuf_whichblock(dn, offset);
for (i = 0; i < nblks; i++) {
dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag);
if (db == NULL) {
--- 401,410 ----
*** 426,438 ****
}
rw_exit(&dn->dn_struct_rwlock);
/* wait for async i/o */
err = zio_wait(zio);
- /* track read overhead when we are in sync context */
- if (dp && dsl_pool_sync_context(dp))
- dp->dp_read_overhead += gethrtime() - start;
if (err) {
dmu_buf_rele_array(dbp, nblks, tag);
return (err);
}
--- 421,430 ----
*** 510,525 ****
}
kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs);
}
void
dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
{
dnode_t *dn;
uint64_t blkid;
! int nblks, i, err;
if (zfs_prefetch_disable)
return;
if (len == 0) { /* they're interested in the bonus buffer */
--- 502,527 ----
}
kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs);
}
+ /*
+ * Issue prefetch i/os for the given blocks.
+ *
+ * Note: The assumption is that we *know* these blocks will be needed
+ * almost immediately. Therefore, the prefetch i/os will be issued at
+ * ZIO_PRIORITY_SYNC_READ
+ *
+ * Note: indirect blocks and other metadata will be read synchronously,
+ * causing this function to block if they are not already cached.
+ */
void
dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
{
dnode_t *dn;
uint64_t blkid;
! int nblks, err;
if (zfs_prefetch_disable)
return;
if (len == 0) { /* they're interested in the bonus buffer */
*** 528,538 ****
if (object == 0 || object >= DN_MAX_OBJECT)
return;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t));
! dbuf_prefetch(dn, blkid);
rw_exit(&dn->dn_struct_rwlock);
return;
}
/*
--- 530,540 ----
if (object == 0 || object >= DN_MAX_OBJECT)
return;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t));
! dbuf_prefetch(dn, blkid, ZIO_PRIORITY_SYNC_READ);
rw_exit(&dn->dn_struct_rwlock);
return;
}
/*
*** 545,564 ****
return;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
if (dn->dn_datablkshift) {
int blkshift = dn->dn_datablkshift;
! nblks = (P2ROUNDUP(offset+len, 1<<blkshift) -
! P2ALIGN(offset, 1<<blkshift)) >> blkshift;
} else {
nblks = (offset < dn->dn_datablksz);
}
if (nblks != 0) {
blkid = dbuf_whichblock(dn, offset);
! for (i = 0; i < nblks; i++)
! dbuf_prefetch(dn, blkid+i);
}
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
--- 547,566 ----
return;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
if (dn->dn_datablkshift) {
int blkshift = dn->dn_datablkshift;
! nblks = (P2ROUNDUP(offset + len, 1 << blkshift) -
! P2ALIGN(offset, 1 << blkshift)) >> blkshift;
} else {
nblks = (offset < dn->dn_datablksz);
}
if (nblks != 0) {
blkid = dbuf_whichblock(dn, offset);
! for (int i = 0; i < nblks; i++)
! dbuf_prefetch(dn, blkid + i, ZIO_PRIORITY_SYNC_READ);
}
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
*** 1354,1364 ****
dsa->dsa_zgd = zgd;
dsa->dsa_tx = tx;
zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
zgd->zgd_db->db_data, zgd->zgd_db->db_size, zp,
! dmu_sync_late_arrival_ready, dmu_sync_late_arrival_done, dsa,
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb));
return (0);
}
--- 1356,1366 ----
dsa->dsa_zgd = zgd;
dsa->dsa_tx = tx;
zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
zgd->zgd_db->db_data, zgd->zgd_db->db_size, zp,
! dmu_sync_late_arrival_ready, NULL, dmu_sync_late_arrival_done, dsa,
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb));
return (0);
}
*** 1494,1505 ****
dsa->dsa_zgd = zgd;
dsa->dsa_tx = NULL;
zio_nowait(arc_write(pio, os->os_spa, txg,
bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
! DBUF_IS_L2COMPRESSIBLE(db), &zp, dmu_sync_ready, dmu_sync_done,
! dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
return (0);
}
int
--- 1496,1508 ----
dsa->dsa_zgd = zgd;
dsa->dsa_tx = NULL;
zio_nowait(arc_write(pio, os->os_spa, txg,
bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
! DBUF_IS_L2COMPRESSIBLE(db), &zp, dmu_sync_ready,
! NULL, dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE,
! ZIO_FLAG_CANFAIL, &zb));
return (0);
}
int