Print this page
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>

*** 369,385 **** */ static int dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags) { - dsl_pool_t *dp = NULL; dmu_buf_t **dbp; uint64_t blkid, nblks, i; uint32_t dbuf_flags; int err; zio_t *zio; - hrtime_t start; ASSERT(length <= DMU_MAX_ACCESS); dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT; if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz) --- 369,383 ----
*** 403,415 **** } nblks = 1; } dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP); - if (dn->dn_objset->os_dsl_dataset) - dp = dn->dn_objset->os_dsl_dataset->ds_dir->dd_pool; - start = gethrtime(); zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL); blkid = dbuf_whichblock(dn, offset); for (i = 0; i < nblks; i++) { dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag); if (db == NULL) { --- 401,410 ----
*** 426,438 **** } rw_exit(&dn->dn_struct_rwlock); /* wait for async i/o */ err = zio_wait(zio); - /* track read overhead when we are in sync context */ - if (dp && dsl_pool_sync_context(dp)) - dp->dp_read_overhead += gethrtime() - start; if (err) { dmu_buf_rele_array(dbp, nblks, tag); return (err); } --- 421,430 ----
*** 510,525 **** } kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs); } void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) { dnode_t *dn; uint64_t blkid; ! int nblks, i, err; if (zfs_prefetch_disable) return; if (len == 0) { /* they're interested in the bonus buffer */ --- 502,527 ---- } kmem_free(dbp, sizeof (dmu_buf_t *) * numbufs); } + /* + * Issue prefetch i/os for the given blocks. + * + * Note: The assumption is that we *know* these blocks will be needed + * almost immediately. Therefore, the prefetch i/os will be issued at + * ZIO_PRIORITY_SYNC_READ + * + * Note: indirect blocks and other metadata will be read synchronously, + * causing this function to block if they are not already cached. + */ void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len) { dnode_t *dn; uint64_t blkid; ! int nblks, err; if (zfs_prefetch_disable) return; if (len == 0) { /* they're interested in the bonus buffer */
*** 528,538 **** if (object == 0 || object >= DN_MAX_OBJECT) return; rw_enter(&dn->dn_struct_rwlock, RW_READER); blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t)); ! dbuf_prefetch(dn, blkid); rw_exit(&dn->dn_struct_rwlock); return; } /* --- 530,540 ---- if (object == 0 || object >= DN_MAX_OBJECT) return; rw_enter(&dn->dn_struct_rwlock, RW_READER); blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t)); ! dbuf_prefetch(dn, blkid, ZIO_PRIORITY_SYNC_READ); rw_exit(&dn->dn_struct_rwlock); return; } /*
*** 545,564 **** return; rw_enter(&dn->dn_struct_rwlock, RW_READER); if (dn->dn_datablkshift) { int blkshift = dn->dn_datablkshift; ! nblks = (P2ROUNDUP(offset+len, 1<<blkshift) - ! P2ALIGN(offset, 1<<blkshift)) >> blkshift; } else { nblks = (offset < dn->dn_datablksz); } if (nblks != 0) { blkid = dbuf_whichblock(dn, offset); ! for (i = 0; i < nblks; i++) ! dbuf_prefetch(dn, blkid+i); } rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG); --- 547,566 ---- return; rw_enter(&dn->dn_struct_rwlock, RW_READER); if (dn->dn_datablkshift) { int blkshift = dn->dn_datablkshift; ! nblks = (P2ROUNDUP(offset + len, 1 << blkshift) - ! P2ALIGN(offset, 1 << blkshift)) >> blkshift; } else { nblks = (offset < dn->dn_datablksz); } if (nblks != 0) { blkid = dbuf_whichblock(dn, offset); ! for (int i = 0; i < nblks; i++) ! dbuf_prefetch(dn, blkid + i, ZIO_PRIORITY_SYNC_READ); } rw_exit(&dn->dn_struct_rwlock); dnode_rele(dn, FTAG);
*** 1354,1364 **** dsa->dsa_zgd = zgd; dsa->dsa_tx = tx; zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp, zgd->zgd_db->db_data, zgd->zgd_db->db_size, zp, ! dmu_sync_late_arrival_ready, dmu_sync_late_arrival_done, dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb)); return (0); } --- 1356,1366 ---- dsa->dsa_zgd = zgd; dsa->dsa_tx = tx; zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp, zgd->zgd_db->db_data, zgd->zgd_db->db_size, zp, ! dmu_sync_late_arrival_ready, NULL, dmu_sync_late_arrival_done, dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb)); return (0); }
*** 1494,1505 **** dsa->dsa_zgd = zgd; dsa->dsa_tx = NULL; zio_nowait(arc_write(pio, os->os_spa, txg, bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db), ! DBUF_IS_L2COMPRESSIBLE(db), &zp, dmu_sync_ready, dmu_sync_done, ! dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb)); return (0); } int --- 1496,1508 ---- dsa->dsa_zgd = zgd; dsa->dsa_tx = NULL; zio_nowait(arc_write(pio, os->os_spa, txg, bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db), ! DBUF_IS_L2COMPRESSIBLE(db), &zp, dmu_sync_ready, ! NULL, dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE, ! ZIO_FLAG_CANFAIL, &zb)); return (0); } int