145 zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
146 void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n),
147 dmu_tx_t *tx)
148 {
149 uint64_t b, newblk;
150 dmu_buf_t *db_old, *db_new;
151 int err;
152 int bs = FZAP_BLOCK_SHIFT(zap);
153 int hepb = 1<<(bs-4);
154 /* hepb = half the number of entries in a block */
155
156 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
157 ASSERT(tbl->zt_blk != 0);
158 ASSERT(tbl->zt_numblks > 0);
159
160 if (tbl->zt_nextblk != 0) {
161 newblk = tbl->zt_nextblk;
162 } else {
163 newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
164 tbl->zt_nextblk = newblk;
165 ASSERT3U(tbl->zt_blks_copied, ==, 0);
166 dmu_prefetch(zap->zap_objset, zap->zap_object,
167 tbl->zt_blk << bs, tbl->zt_numblks << bs);
168 }
169
170 /*
171 * Copy the ptrtbl from the old to new location.
172 */
173
174 b = tbl->zt_blks_copied;
175 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
176 (tbl->zt_blk + b) << bs, FTAG, &db_old, DMU_READ_NO_PREFETCH);
177 if (err)
178 return (err);
179
180 /* first half of entries in old[b] go to new[2*b+0] */
181 VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
182 (newblk + 2*b+0) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
183 dmu_buf_will_dirty(db_new, tx);
184 transfer_func(db_old->db_data, db_new->db_data, hepb);
185 dmu_buf_rele(db_new, FTAG);
322 * The pointer table should never use more hash bits than we
323 * have (otherwise we'd be using useless zero bits to index it).
324 * If we are within 2 bits of running out, stop growing, since
325 * this is already an aberrant condition.
326 */
327 if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2)
328 return (ENOSPC);
329
330 if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
331 /*
332 * We are outgrowing the "embedded" ptrtbl (the one
333 * stored in the header block). Give it its own entire
334 * block, which will double the size of the ptrtbl.
335 */
336 uint64_t newblk;
337 dmu_buf_t *db_new;
338 int err;
339
340 ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==,
341 ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
342 ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk, ==, 0);
343
344 newblk = zap_allocate_blocks(zap, 1);
345 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
346 newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new,
347 DMU_READ_NO_PREFETCH);
348 if (err)
349 return (err);
350 dmu_buf_will_dirty(db_new, tx);
351 zap_ptrtbl_transfer(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
352 db_new->db_data, 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
353 dmu_buf_rele(db_new, FTAG);
354
355 zap->zap_f.zap_phys->zap_ptrtbl.zt_blk = newblk;
356 zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks = 1;
357 zap->zap_f.zap_phys->zap_ptrtbl.zt_shift++;
358
359 ASSERT3U(1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==,
360 zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks <<
361 (FZAP_BLOCK_SHIFT(zap)-3));
362
458 rw_enter(&l->l_rwlock, RW_WRITER);
459 l->l_blkid = blkid;
460 l->l_bs = highbit(db->db_size)-1;
461 l->l_dbuf = db;
462 l->l_phys = NULL;
463
464 winner = dmu_buf_set_user(db, l, &l->l_phys, zap_leaf_pageout);
465
466 rw_exit(&l->l_rwlock);
467 if (winner != NULL) {
468 /* someone else set it first */
469 zap_leaf_pageout(NULL, l);
470 l = winner;
471 }
472
473 /*
474 * lhr_pad was previously used for the next leaf in the leaf
475 * chain. There should be no chained leafs (as we have removed
476 * support for them).
477 */
478 ASSERT3U(l->l_phys->l_hdr.lh_pad1, ==, 0);
479
480 /*
481 * There should be more hash entries than there can be
482 * chunks to put in the hash table
483 */
484 ASSERT3U(ZAP_LEAF_HASH_NUMENTRIES(l), >, ZAP_LEAF_NUMCHUNKS(l) / 3);
485
486 /* The chunks should begin at the end of the hash table */
487 ASSERT3P(&ZAP_LEAF_CHUNK(l, 0), ==,
488 &l->l_phys->l_hash[ZAP_LEAF_HASH_NUMENTRIES(l)]);
489
490 /* The chunks should end at the end of the block */
491 ASSERT3U((uintptr_t)&ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)) -
492 (uintptr_t)l->l_phys, ==, l->l_dbuf->db_size);
493
494 return (l);
495 }
496
497 static int
498 zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,
641 ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
642 l->l_phys->l_hdr.lh_prefix);
643
644 prefix_diff = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift -
645 (old_prefix_len + 1);
646 sibling = (ZAP_HASH_IDX(hash, old_prefix_len + 1) | 1) << prefix_diff;
647
648 /* check for i/o errors before doing zap_leaf_split */
649 for (i = 0; i < (1ULL<<prefix_diff); i++) {
650 uint64_t blk;
651 err = zap_idx_to_blk(zap, sibling+i, &blk);
652 if (err)
653 return (err);
654 ASSERT3U(blk, ==, l->l_blkid);
655 }
656
657 nl = zap_create_leaf(zap, tx);
658 zap_leaf_split(l, nl, zap->zap_normflags != 0);
659
660 /* set sibling pointers */
661 for (i = 0; i < (1ULL<<prefix_diff); i++) {
662 err = zap_set_idx_to_blk(zap, sibling+i, nl->l_blkid, tx);
663 ASSERT3U(err, ==, 0); /* we checked for i/o errors above */
664 }
665
666 if (hash & (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len))) {
667 /* we want the sibling */
668 zap_put_leaf(l);
669 *lp = nl;
670 } else {
671 zap_put_leaf(nl);
672 *lp = l;
673 }
674
675 return (0);
676 }
677
678 static void
679 zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
680 {
681 zap_t *zap = zn->zn_zap;
682 int shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
683 int leaffull = (l->l_phys->l_hdr.lh_prefix_len == shift &&
|
145 zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
146 void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n),
147 dmu_tx_t *tx)
148 {
149 uint64_t b, newblk;
150 dmu_buf_t *db_old, *db_new;
151 int err;
152 int bs = FZAP_BLOCK_SHIFT(zap);
153 int hepb = 1<<(bs-4);
154 /* hepb = half the number of entries in a block */
155
156 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
157 ASSERT(tbl->zt_blk != 0);
158 ASSERT(tbl->zt_numblks > 0);
159
160 if (tbl->zt_nextblk != 0) {
161 newblk = tbl->zt_nextblk;
162 } else {
163 newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
164 tbl->zt_nextblk = newblk;
165 ASSERT0(tbl->zt_blks_copied);
166 dmu_prefetch(zap->zap_objset, zap->zap_object,
167 tbl->zt_blk << bs, tbl->zt_numblks << bs);
168 }
169
170 /*
171 * Copy the ptrtbl from the old to new location.
172 */
173
174 b = tbl->zt_blks_copied;
175 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
176 (tbl->zt_blk + b) << bs, FTAG, &db_old, DMU_READ_NO_PREFETCH);
177 if (err)
178 return (err);
179
180 /* first half of entries in old[b] go to new[2*b+0] */
181 VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
182 (newblk + 2*b+0) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
183 dmu_buf_will_dirty(db_new, tx);
184 transfer_func(db_old->db_data, db_new->db_data, hepb);
185 dmu_buf_rele(db_new, FTAG);
322 * The pointer table should never use more hash bits than we
323 * have (otherwise we'd be using useless zero bits to index it).
324 * If we are within 2 bits of running out, stop growing, since
325 * this is already an aberrant condition.
326 */
327 if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2)
328 return (ENOSPC);
329
330 if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
331 /*
332 * We are outgrowing the "embedded" ptrtbl (the one
333 * stored in the header block). Give it its own entire
334 * block, which will double the size of the ptrtbl.
335 */
336 uint64_t newblk;
337 dmu_buf_t *db_new;
338 int err;
339
340 ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==,
341 ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
342 ASSERT0(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk);
343
344 newblk = zap_allocate_blocks(zap, 1);
345 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
346 newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new,
347 DMU_READ_NO_PREFETCH);
348 if (err)
349 return (err);
350 dmu_buf_will_dirty(db_new, tx);
351 zap_ptrtbl_transfer(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
352 db_new->db_data, 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
353 dmu_buf_rele(db_new, FTAG);
354
355 zap->zap_f.zap_phys->zap_ptrtbl.zt_blk = newblk;
356 zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks = 1;
357 zap->zap_f.zap_phys->zap_ptrtbl.zt_shift++;
358
359 ASSERT3U(1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==,
360 zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks <<
361 (FZAP_BLOCK_SHIFT(zap)-3));
362
458 rw_enter(&l->l_rwlock, RW_WRITER);
459 l->l_blkid = blkid;
460 l->l_bs = highbit(db->db_size)-1;
461 l->l_dbuf = db;
462 l->l_phys = NULL;
463
464 winner = dmu_buf_set_user(db, l, &l->l_phys, zap_leaf_pageout);
465
466 rw_exit(&l->l_rwlock);
467 if (winner != NULL) {
468 /* someone else set it first */
469 zap_leaf_pageout(NULL, l);
470 l = winner;
471 }
472
473 /*
474 * lhr_pad was previously used for the next leaf in the leaf
475 * chain. There should be no chained leafs (as we have removed
476 * support for them).
477 */
478 ASSERT0(l->l_phys->l_hdr.lh_pad1);
479
480 /*
481 * There should be more hash entries than there can be
482 * chunks to put in the hash table
483 */
484 ASSERT3U(ZAP_LEAF_HASH_NUMENTRIES(l), >, ZAP_LEAF_NUMCHUNKS(l) / 3);
485
486 /* The chunks should begin at the end of the hash table */
487 ASSERT3P(&ZAP_LEAF_CHUNK(l, 0), ==,
488 &l->l_phys->l_hash[ZAP_LEAF_HASH_NUMENTRIES(l)]);
489
490 /* The chunks should end at the end of the block */
491 ASSERT3U((uintptr_t)&ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)) -
492 (uintptr_t)l->l_phys, ==, l->l_dbuf->db_size);
493
494 return (l);
495 }
496
497 static int
498 zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,
641 ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
642 l->l_phys->l_hdr.lh_prefix);
643
644 prefix_diff = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift -
645 (old_prefix_len + 1);
646 sibling = (ZAP_HASH_IDX(hash, old_prefix_len + 1) | 1) << prefix_diff;
647
648 /* check for i/o errors before doing zap_leaf_split */
649 for (i = 0; i < (1ULL<<prefix_diff); i++) {
650 uint64_t blk;
651 err = zap_idx_to_blk(zap, sibling+i, &blk);
652 if (err)
653 return (err);
654 ASSERT3U(blk, ==, l->l_blkid);
655 }
656
657 nl = zap_create_leaf(zap, tx);
658 zap_leaf_split(l, nl, zap->zap_normflags != 0);
659
660 /* set sibling pointers */
661 for (i = 0; i < (1ULL << prefix_diff); i++) {
662 err = zap_set_idx_to_blk(zap, sibling+i, nl->l_blkid, tx);
663 ASSERT0(err); /* we checked for i/o errors above */
664 }
665
666 if (hash & (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len))) {
667 /* we want the sibling */
668 zap_put_leaf(l);
669 *lp = nl;
670 } else {
671 zap_put_leaf(nl);
672 *lp = l;
673 }
674
675 return (0);
676 }
677
678 static void
679 zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
680 {
681 zap_t *zap = zn->zn_zap;
682 int shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
683 int leaffull = (l->l_phys->l_hdr.lh_prefix_len == shift &&
|