Print this page
3752 want more verifiable dbuf user eviction
Submitted by: Justin Gibbs <justing@spectralogic.com>
Submitted by: Will Andrews <willa@spectralogic.com>
@@ -48,14 +48,12 @@
#include <sys/zap_impl.h>
#include <sys/zap_leaf.h>
int fzap_default_block_shift = 14; /* 16k blocksize */
-static void zap_leaf_pageout(dmu_buf_t *db, void *vl);
static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks);
-
void
fzap_byteswap(void *vbuf, size_t size)
{
uint64_t block_type;
@@ -78,17 +76,16 @@
zap_phys_t *zp;
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
zap->zap_ismicro = FALSE;
- (void) dmu_buf_update_user(zap->zap_dbuf, zap, zap,
- &zap->zap_f.zap_phys, zap_evict);
+ zap->db_evict.evict_func = zap_evict;
mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
zap->zap_f.zap_block_shift = highbit(zap->zap_dbuf->db_size) - 1;
- zp = zap->zap_f.zap_phys;
+ zp = zap->zap_f_phys;
/*
* explicitly zero it since it might be coming from an
* initialized microzap
*/
bzero(zap->zap_dbuf->db_data, zap->zap_dbuf->db_size);
@@ -115,11 +112,10 @@
1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db, DMU_READ_NO_PREFETCH));
dmu_buf_will_dirty(db, tx);
l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
l->l_dbuf = db;
- l->l_phys = db->db_data;
zap_leaf_init(l, zp->zap_normflags != 0);
kmem_free(l, sizeof (zap_leaf_t));
dmu_buf_rele(db, FTAG);
@@ -322,26 +318,26 @@
* The pointer table should never use more hash bits than we
* have (otherwise we'd be using useless zero bits to index it).
* If we are within 2 bits of running out, stop growing, since
* this is already an aberrant condition.
*/
- if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2)
+ if (zap->zap_f_phys->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2)
return (SET_ERROR(ENOSPC));
- if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
+ if (zap->zap_f_phys->zap_ptrtbl.zt_numblks == 0) {
/*
* We are outgrowing the "embedded" ptrtbl (the one
* stored in the header block). Give it its own entire
* block, which will double the size of the ptrtbl.
*/
uint64_t newblk;
dmu_buf_t *db_new;
int err;
- ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==,
+ ASSERT3U(zap->zap_f_phys->zap_ptrtbl.zt_shift, ==,
ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
- ASSERT0(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk);
+ ASSERT0(zap->zap_f_phys->zap_ptrtbl.zt_blk);
newblk = zap_allocate_blocks(zap, 1);
err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new,
DMU_READ_NO_PREFETCH);
@@ -350,45 +346,54 @@
dmu_buf_will_dirty(db_new, tx);
zap_ptrtbl_transfer(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
db_new->db_data, 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
dmu_buf_rele(db_new, FTAG);
- zap->zap_f.zap_phys->zap_ptrtbl.zt_blk = newblk;
- zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks = 1;
- zap->zap_f.zap_phys->zap_ptrtbl.zt_shift++;
+ zap->zap_f_phys->zap_ptrtbl.zt_blk = newblk;
+ zap->zap_f_phys->zap_ptrtbl.zt_numblks = 1;
+ zap->zap_f_phys->zap_ptrtbl.zt_shift++;
- ASSERT3U(1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==,
- zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks <<
+ ASSERT3U(1ULL << zap->zap_f_phys->zap_ptrtbl.zt_shift, ==,
+ zap->zap_f_phys->zap_ptrtbl.zt_numblks <<
(FZAP_BLOCK_SHIFT(zap)-3));
return (0);
} else {
- return (zap_table_grow(zap, &zap->zap_f.zap_phys->zap_ptrtbl,
+ return (zap_table_grow(zap, &zap->zap_f_phys->zap_ptrtbl,
zap_ptrtbl_transfer, tx));
}
}
static void
zap_increment_num_entries(zap_t *zap, int delta, dmu_tx_t *tx)
{
dmu_buf_will_dirty(zap->zap_dbuf, tx);
mutex_enter(&zap->zap_f.zap_num_entries_mtx);
- ASSERT(delta > 0 || zap->zap_f.zap_phys->zap_num_entries >= -delta);
- zap->zap_f.zap_phys->zap_num_entries += delta;
+ ASSERT(delta > 0 || zap->zap_f_phys->zap_num_entries >= -delta);
+ zap->zap_f_phys->zap_num_entries += delta;
mutex_exit(&zap->zap_f.zap_num_entries_mtx);
}
static uint64_t
zap_allocate_blocks(zap_t *zap, int nblocks)
{
uint64_t newblk;
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
- newblk = zap->zap_f.zap_phys->zap_freeblk;
- zap->zap_f.zap_phys->zap_freeblk += nblocks;
+ newblk = zap->zap_f_phys->zap_freeblk;
+ zap->zap_f_phys->zap_freeblk += nblocks;
return (newblk);
}
+static void
+zap_leaf_pageout(dmu_buf_user_t *dbu)
+{
+ zap_leaf_t *l = (zap_leaf_t *)dbu;
+
+ rw_destroy(&l->l_rwlock);
+ kmem_free(l, sizeof (zap_leaf_t));
+}
+
static zap_leaf_t *
zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
{
void *winner;
zap_leaf_t *l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP);
@@ -397,32 +402,32 @@
rw_init(&l->l_rwlock, 0, 0, 0);
rw_enter(&l->l_rwlock, RW_WRITER);
l->l_blkid = zap_allocate_blocks(zap, 1);
l->l_dbuf = NULL;
- l->l_phys = NULL;
VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
DMU_READ_NO_PREFETCH));
- winner = dmu_buf_set_user(l->l_dbuf, l, &l->l_phys, zap_leaf_pageout);
+ dmu_buf_init_user(&l->db_evict, zap_leaf_pageout);
+ winner = (zap_leaf_t *)dmu_buf_set_user(l->l_dbuf, &l->db_evict);
ASSERT(winner == NULL);
dmu_buf_will_dirty(l->l_dbuf, tx);
zap_leaf_init(l, zap->zap_normflags != 0);
- zap->zap_f.zap_phys->zap_num_leafs++;
+ zap->zap_f_phys->zap_num_leafs++;
return (l);
}
int
fzap_count(zap_t *zap, uint64_t *count)
{
ASSERT(!zap->zap_ismicro);
mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */
- *count = zap->zap_f.zap_phys->zap_num_entries;
+ *count = zap->zap_f_phys->zap_num_entries;
mutex_exit(&zap->zap_f.zap_num_entries_mtx);
return (0);
}
/*
@@ -434,20 +439,10 @@
{
rw_exit(&l->l_rwlock);
dmu_buf_rele(l->l_dbuf, NULL);
}
-_NOTE(ARGSUSED(0))
-static void
-zap_leaf_pageout(dmu_buf_t *db, void *vl)
-{
- zap_leaf_t *l = vl;
-
- rw_destroy(&l->l_rwlock);
- kmem_free(l, sizeof (zap_leaf_t));
-}
-
static zap_leaf_t *
zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
{
zap_leaf_t *l, *winner;
@@ -457,18 +452,18 @@
rw_init(&l->l_rwlock, 0, 0, 0);
rw_enter(&l->l_rwlock, RW_WRITER);
l->l_blkid = blkid;
l->l_bs = highbit(db->db_size)-1;
l->l_dbuf = db;
- l->l_phys = NULL;
- winner = dmu_buf_set_user(db, l, &l->l_phys, zap_leaf_pageout);
+ dmu_buf_init_user(&l->db_evict, zap_leaf_pageout);
+ winner = (zap_leaf_t *)dmu_buf_set_user(db, &l->db_evict);
rw_exit(&l->l_rwlock);
if (winner != NULL) {
/* someone else set it first */
- zap_leaf_pageout(NULL, l);
+ zap_leaf_pageout(&l->db_evict);
l = winner;
}
/*
* lhr_pad was previously used for the next leaf in the leaf
@@ -513,11 +508,11 @@
ASSERT3U(db->db_object, ==, zap->zap_object);
ASSERT3U(db->db_offset, ==, blkid << bs);
ASSERT3U(db->db_size, ==, 1 << bs);
ASSERT(blkid != 0);
- l = dmu_buf_get_user(db);
+ l = (zap_leaf_t *)dmu_buf_get_user(db);
if (l == NULL)
l = zap_open_leaf(blkid, db);
rw_enter(&l->l_rwlock, lt);
@@ -540,32 +535,32 @@
static int
zap_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t *valp)
{
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
- if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
+ if (zap->zap_f_phys->zap_ptrtbl.zt_numblks == 0) {
ASSERT3U(idx, <,
- (1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift));
+ (1ULL << zap->zap_f_phys->zap_ptrtbl.zt_shift));
*valp = ZAP_EMBEDDED_PTRTBL_ENT(zap, idx);
return (0);
} else {
- return (zap_table_load(zap, &zap->zap_f.zap_phys->zap_ptrtbl,
+ return (zap_table_load(zap, &zap->zap_f_phys->zap_ptrtbl,
idx, valp));
}
}
static int
zap_set_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t blk, dmu_tx_t *tx)
{
ASSERT(tx != NULL);
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
- if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0) {
+ if (zap->zap_f_phys->zap_ptrtbl.zt_blk == 0) {
ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) = blk;
return (0);
} else {
- return (zap_table_store(zap, &zap->zap_f.zap_phys->zap_ptrtbl,
+ return (zap_table_store(zap, &zap->zap_f_phys->zap_ptrtbl,
idx, blk, tx));
}
}
static int
@@ -573,13 +568,13 @@
{
uint64_t idx, blk;
int err;
ASSERT(zap->zap_dbuf == NULL ||
- zap->zap_f.zap_phys == zap->zap_dbuf->db_data);
- ASSERT3U(zap->zap_f.zap_phys->zap_magic, ==, ZAP_MAGIC);
- idx = ZAP_HASH_IDX(h, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
+ zap->zap_f_phys == zap->zap_dbuf->db_data);
+ ASSERT3U(zap->zap_f_phys->zap_magic, ==, ZAP_MAGIC);
+ idx = ZAP_HASH_IDX(h, zap->zap_f_phys->zap_ptrtbl.zt_shift);
err = zap_idx_to_blk(zap, idx, &blk);
if (err != 0)
return (err);
err = zap_get_leaf_byblk(zap, blk, tx, lt, lp);
@@ -596,18 +591,18 @@
zap_leaf_t *nl;
int prefix_diff, i, err;
uint64_t sibling;
int old_prefix_len = l->l_phys->l_hdr.lh_prefix_len;
- ASSERT3U(old_prefix_len, <=, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
+ ASSERT3U(old_prefix_len, <=, zap->zap_f_phys->zap_ptrtbl.zt_shift);
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
l->l_phys->l_hdr.lh_prefix);
if (zap_tryupgradedir(zap, tx) == 0 ||
- old_prefix_len == zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) {
+ old_prefix_len == zap->zap_f_phys->zap_ptrtbl.zt_shift) {
/* We failed to upgrade, or need to grow the pointer table */
objset_t *os = zap->zap_objset;
uint64_t object = zap->zap_object;
zap_put_leaf(l);
@@ -618,11 +613,11 @@
if (err)
return (err);
ASSERT(!zap->zap_ismicro);
while (old_prefix_len ==
- zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) {
+ zap->zap_f_phys->zap_ptrtbl.zt_shift) {
err = zap_grow_ptrtbl(zap, tx);
if (err)
return (err);
}
@@ -635,15 +630,15 @@
*lp = l;
return (0);
}
}
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
- ASSERT3U(old_prefix_len, <, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
+ ASSERT3U(old_prefix_len, <, zap->zap_f_phys->zap_ptrtbl.zt_shift);
ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
l->l_phys->l_hdr.lh_prefix);
- prefix_diff = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift -
+ prefix_diff = zap->zap_f_phys->zap_ptrtbl.zt_shift -
(old_prefix_len + 1);
sibling = (ZAP_HASH_IDX(hash, old_prefix_len + 1) | 1) << prefix_diff;
/* check for i/o errors before doing zap_leaf_split */
for (i = 0; i < (1ULL<<prefix_diff); i++) {
@@ -677,17 +672,17 @@
static void
zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
{
zap_t *zap = zn->zn_zap;
- int shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
+ int shift = zap->zap_f_phys->zap_ptrtbl.zt_shift;
int leaffull = (l->l_phys->l_hdr.lh_prefix_len == shift &&
l->l_phys->l_hdr.lh_nfree < ZAP_LEAF_LOW_WATER);
zap_put_leaf(l);
- if (leaffull || zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk) {
+ if (leaffull || zap->zap_f_phys->zap_ptrtbl.zt_nextblk) {
int err;
/*
* We are in the middle of growing the pointer table, or
* this leaf will soon make us grow it.
@@ -703,11 +698,11 @@
if (err)
return;
}
/* could have finished growing while our locks were down */
- if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift == shift)
+ if (zap->zap_f_phys->zap_ptrtbl.zt_shift == shift)
(void) zap_grow_ptrtbl(zap, tx);
}
}
static int
@@ -934,11 +929,11 @@
uint64_t idx, blk;
zap_t *zap = zn->zn_zap;
int bs;
idx = ZAP_HASH_IDX(zn->zn_hash,
- zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
+ zap->zap_f_phys->zap_ptrtbl.zt_shift);
if (zap_idx_to_blk(zap, idx, &blk) != 0)
return;
bs = FZAP_BLOCK_SHIFT(zap);
dmu_prefetch(zap->zap_objset, zap->zap_object, blk << bs, 1 << bs);
}
@@ -1274,46 +1269,46 @@
zs->zs_blocksize = 1ULL << bs;
/*
* Set zap_phys_t fields
*/
- zs->zs_num_leafs = zap->zap_f.zap_phys->zap_num_leafs;
- zs->zs_num_entries = zap->zap_f.zap_phys->zap_num_entries;
- zs->zs_num_blocks = zap->zap_f.zap_phys->zap_freeblk;
- zs->zs_block_type = zap->zap_f.zap_phys->zap_block_type;
- zs->zs_magic = zap->zap_f.zap_phys->zap_magic;
- zs->zs_salt = zap->zap_f.zap_phys->zap_salt;
+ zs->zs_num_leafs = zap->zap_f_phys->zap_num_leafs;
+ zs->zs_num_entries = zap->zap_f_phys->zap_num_entries;
+ zs->zs_num_blocks = zap->zap_f_phys->zap_freeblk;
+ zs->zs_block_type = zap->zap_f_phys->zap_block_type;
+ zs->zs_magic = zap->zap_f_phys->zap_magic;
+ zs->zs_salt = zap->zap_f_phys->zap_salt;
/*
* Set zap_ptrtbl fields
*/
- zs->zs_ptrtbl_len = 1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
- zs->zs_ptrtbl_nextblk = zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk;
+ zs->zs_ptrtbl_len = 1ULL << zap->zap_f_phys->zap_ptrtbl.zt_shift;
+ zs->zs_ptrtbl_nextblk = zap->zap_f_phys->zap_ptrtbl.zt_nextblk;
zs->zs_ptrtbl_blks_copied =
- zap->zap_f.zap_phys->zap_ptrtbl.zt_blks_copied;
- zs->zs_ptrtbl_zt_blk = zap->zap_f.zap_phys->zap_ptrtbl.zt_blk;
- zs->zs_ptrtbl_zt_numblks = zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks;
- zs->zs_ptrtbl_zt_shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
+ zap->zap_f_phys->zap_ptrtbl.zt_blks_copied;
+ zs->zs_ptrtbl_zt_blk = zap->zap_f_phys->zap_ptrtbl.zt_blk;
+ zs->zs_ptrtbl_zt_numblks = zap->zap_f_phys->zap_ptrtbl.zt_numblks;
+ zs->zs_ptrtbl_zt_shift = zap->zap_f_phys->zap_ptrtbl.zt_shift;
- if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
+ if (zap->zap_f_phys->zap_ptrtbl.zt_numblks == 0) {
/* the ptrtbl is entirely in the header block. */
zap_stats_ptrtbl(zap, &ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap), zs);
} else {
int b;
dmu_prefetch(zap->zap_objset, zap->zap_object,
- zap->zap_f.zap_phys->zap_ptrtbl.zt_blk << bs,
- zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << bs);
+ zap->zap_f_phys->zap_ptrtbl.zt_blk << bs,
+ zap->zap_f_phys->zap_ptrtbl.zt_numblks << bs);
- for (b = 0; b < zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks;
+ for (b = 0; b < zap->zap_f_phys->zap_ptrtbl.zt_numblks;
b++) {
dmu_buf_t *db;
int err;
err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
- (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk + b) << bs,
+ (zap->zap_f_phys->zap_ptrtbl.zt_blk + b) << bs,
FTAG, &db, DMU_READ_NO_PREFETCH);
if (err == 0) {
zap_stats_ptrtbl(zap, db->db_data,
1<<(bs-3), zs);
dmu_buf_rele(db, FTAG);
@@ -1346,11 +1341,11 @@
* external pointer table.
* - If this already has an external pointer table this operation
* could extend the table.
*/
if (add) {
- if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0)
+ if (zap->zap_f_phys->zap_ptrtbl.zt_blk == 0)
*towrite += zap->zap_dbuf->db_size;
else
*towrite += (zap->zap_dbuf->db_size * 3);
}