28 #include <sys/dmu.h>
29 #include <sys/zfs_context.h>
30 #include <sys/zap.h>
31 #include <sys/refcount.h>
32 #include <sys/zap_impl.h>
33 #include <sys/zap_leaf.h>
34 #include <sys/avl.h>
35 #include <sys/arc.h>
36
37 #ifdef _KERNEL
38 #include <sys/sunddi.h>
39 #endif
40
41 static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags);
42
43 uint64_t
44 zap_getflags(zap_t *zap)
45 {
46 if (zap->zap_ismicro)
47 return (0);
48 return (zap->zap_u.zap_fat.zap_phys->zap_flags);
49 }
50
51 int
52 zap_hashbits(zap_t *zap)
53 {
54 if (zap_getflags(zap) & ZAP_FLAG_HASH64)
55 return (48);
56 else
57 return (28);
58 }
59
60 uint32_t
61 zap_maxcd(zap_t *zap)
62 {
63 if (zap_getflags(zap) & ZAP_FLAG_HASH64)
64 return ((1<<16)-1);
65 else
66 return (-1U);
67 }
68
367
368 zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
369 rw_init(&zap->zap_rwlock, 0, 0, 0);
370 rw_enter(&zap->zap_rwlock, RW_WRITER);
371 zap->zap_objset = os;
372 zap->zap_object = obj;
373 zap->zap_dbuf = db;
374
375 if (*(uint64_t *)db->db_data != ZBT_MICRO) {
376 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
377 zap->zap_f.zap_block_shift = highbit(db->db_size) - 1;
378 } else {
379 zap->zap_ismicro = TRUE;
380 }
381
382 /*
383 * Make sure that zap_ismicro is set before we let others see
384 * it, because zap_lockdir() checks zap_ismicro without the lock
385 * held.
386 */
387 winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict);
388
389 if (winner != NULL) {
390 rw_exit(&zap->zap_rwlock);
391 rw_destroy(&zap->zap_rwlock);
392 if (!zap->zap_ismicro)
393 mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
394 kmem_free(zap, sizeof (zap_t));
395 return (winner);
396 }
397
398 if (zap->zap_ismicro) {
399 zap->zap_salt = zap->zap_m.zap_phys->mz_salt;
400 zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags;
401 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
402 avl_create(&zap->zap_m.zap_avl, mze_compare,
403 sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
404
405 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
406 mzap_ent_phys_t *mze =
407 &zap->zap_m.zap_phys->mz_chunk[i];
408 if (mze->mze_name[0]) {
409 zap_name_t *zn;
410
411 zap->zap_m.zap_num_entries++;
412 zn = zap_name_alloc(zap, mze->mze_name,
413 MT_EXACT);
414 mze_insert(zap, i, zn->zn_hash);
415 zap_name_free(zn);
416 }
417 }
418 } else {
419 zap->zap_salt = zap->zap_f.zap_phys->zap_salt;
420 zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags;
421
422 ASSERT3U(sizeof (struct zap_leaf_header), ==,
423 2*ZAP_LEAF_CHUNKSIZE);
424
425 /*
426 * The embedded pointer table should not overlap the
427 * other members.
428 */
429 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
430 &zap->zap_f.zap_phys->zap_salt);
431
432 /*
433 * The embedded pointer table should end at the end of
434 * the block
435 */
436 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
437 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
438 (uintptr_t)zap->zap_f.zap_phys, ==,
439 zap->zap_dbuf->db_size);
440 }
441 rw_exit(&zap->zap_rwlock);
442 return (zap);
443 }
444
445 int
446 zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
447 krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
448 {
449 zap_t *zap;
450 dmu_buf_t *db;
451 krw_t lt;
452 int err;
453
454 *zapp = NULL;
455
456 err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH);
457 if (err)
458 return (err);
459
460 #ifdef ZFS_DEBUG
461 {
462 dmu_object_info_t doi;
463 dmu_object_info_from_db(db, &doi);
464 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
465 }
466 #endif
467
468 zap = dmu_buf_get_user(db);
469 if (zap == NULL)
470 zap = mzap_open(os, obj, db);
471
472 /*
473 * We're checking zap_ismicro without the lock held, in order to
474 * tell what type of lock we want. Once we have some sort of
475 * lock, see if it really is the right type. In practice this
476 * can only be different if it was upgraded from micro to fat,
477 * and micro wanted WRITER but fat only needs READER.
478 */
479 lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
480 rw_enter(&zap->zap_rwlock, lt);
481 if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
482 /* it was upgraded, now we only need reader */
483 ASSERT(lt == RW_WRITER);
484 ASSERT(RW_READER ==
485 (!zap->zap_ismicro && fatreader) ? RW_READER : lti);
486 rw_downgrade(&zap->zap_rwlock);
487 lt = RW_READER;
488 }
659
660 VERIFY(dmu_object_set_blocksize(os, obj,
661 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0);
662
663 mzap_create_impl(os, obj, normflags, flags, tx);
664 return (obj);
665 }
666
667 int
668 zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
669 {
670 /*
671 * dmu_object_free will free the object number and free the
672 * data. Freeing the data will cause our pageout function to be
673 * called, which will destroy our data (zap_leaf_t's and zap_t).
674 */
675
676 return (dmu_object_free(os, zapobj, tx));
677 }
678
679 _NOTE(ARGSUSED(0))
680 void
681 zap_evict(dmu_buf_t *db, void *vzap)
682 {
683 zap_t *zap = vzap;
684
685 rw_destroy(&zap->zap_rwlock);
686
687 if (zap->zap_ismicro)
688 mze_destroy(zap);
689 else
690 mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
691
692 kmem_free(zap, sizeof (zap_t));
693 }
694
695 int
696 zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
697 {
698 zap_t *zap;
699 int err;
700
701 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
702 if (err)
703 return (err);
921 return (SET_ERROR(ENOTSUP));
922 }
923 err = fzap_length(zn, integer_size, num_integers);
924 zap_name_free(zn);
925 zap_unlockdir(zap);
926 return (err);
927 }
928
929 static void
930 mzap_addent(zap_name_t *zn, uint64_t value)
931 {
932 int i;
933 zap_t *zap = zn->zn_zap;
934 int start = zap->zap_m.zap_alloc_next;
935 uint32_t cd;
936
937 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
938
939 #ifdef ZFS_DEBUG
940 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
941 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i];
942 ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0);
943 }
944 #endif
945
946 cd = mze_find_unused_cd(zap, zn->zn_hash);
947 /* given the limited size of the microzap, this can't happen */
948 ASSERT(cd < zap_maxcd(zap));
949
950 again:
951 for (i = start; i < zap->zap_m.zap_num_chunks; i++) {
952 mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i];
953 if (mze->mze_name[0] == 0) {
954 mze->mze_value = value;
955 mze->mze_cd = cd;
956 (void) strcpy(mze->mze_name, zn->zn_key_orig);
957 zap->zap_m.zap_num_entries++;
958 zap->zap_m.zap_alloc_next = i+1;
959 if (zap->zap_m.zap_alloc_next ==
960 zap->zap_m.zap_num_chunks)
961 zap->zap_m.zap_alloc_next = 0;
962 mze_insert(zap, i, zn->zn_hash);
963 return;
964 }
965 }
966 if (start != 0) {
967 start = 0;
968 goto again;
969 }
970 ASSERT(!"out of entries!");
971 }
972
1133 int err;
1134 mzap_ent_t *mze;
1135 zap_name_t *zn;
1136
1137 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap);
1138 if (err)
1139 return (err);
1140 zn = zap_name_alloc(zap, name, mt);
1141 if (zn == NULL) {
1142 zap_unlockdir(zap);
1143 return (SET_ERROR(ENOTSUP));
1144 }
1145 if (!zap->zap_ismicro) {
1146 err = fzap_remove(zn, tx);
1147 } else {
1148 mze = mze_find(zn);
1149 if (mze == NULL) {
1150 err = SET_ERROR(ENOENT);
1151 } else {
1152 zap->zap_m.zap_num_entries--;
1153 bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid],
1154 sizeof (mzap_ent_phys_t));
1155 mze_remove(zap, mze);
1156 }
1157 }
1158 zap_name_free(zn);
1159 zap_unlockdir(zap);
1160 return (err);
1161 }
1162
1163 int
1164 zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1165 int key_numints, dmu_tx_t *tx)
1166 {
1167 zap_t *zap;
1168 int err;
1169 zap_name_t *zn;
1170
1171 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap);
1172 if (err)
1173 return (err);
|
28 #include <sys/dmu.h>
29 #include <sys/zfs_context.h>
30 #include <sys/zap.h>
31 #include <sys/refcount.h>
32 #include <sys/zap_impl.h>
33 #include <sys/zap_leaf.h>
34 #include <sys/avl.h>
35 #include <sys/arc.h>
36
37 #ifdef _KERNEL
38 #include <sys/sunddi.h>
39 #endif
40
41 static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags);
42
43 uint64_t
44 zap_getflags(zap_t *zap)
45 {
46 if (zap->zap_ismicro)
47 return (0);
48 return (zap->zap_f_phys->zap_flags);
49 }
50
51 int
52 zap_hashbits(zap_t *zap)
53 {
54 if (zap_getflags(zap) & ZAP_FLAG_HASH64)
55 return (48);
56 else
57 return (28);
58 }
59
60 uint32_t
61 zap_maxcd(zap_t *zap)
62 {
63 if (zap_getflags(zap) & ZAP_FLAG_HASH64)
64 return ((1<<16)-1);
65 else
66 return (-1U);
67 }
68
367
368 zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
369 rw_init(&zap->zap_rwlock, 0, 0, 0);
370 rw_enter(&zap->zap_rwlock, RW_WRITER);
371 zap->zap_objset = os;
372 zap->zap_object = obj;
373 zap->zap_dbuf = db;
374
375 if (*(uint64_t *)db->db_data != ZBT_MICRO) {
376 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
377 zap->zap_f.zap_block_shift = highbit(db->db_size) - 1;
378 } else {
379 zap->zap_ismicro = TRUE;
380 }
381
382 /*
383 * Make sure that zap_ismicro is set before we let others see
384 * it, because zap_lockdir() checks zap_ismicro without the lock
385 * held.
386 */
387 dmu_buf_init_user(&zap->db_evict, zap_evict);
388 winner = (zap_t *)dmu_buf_set_user(db, &zap->db_evict);
389
390 if (winner != NULL) {
391 rw_exit(&zap->zap_rwlock);
392 rw_destroy(&zap->zap_rwlock);
393 if (!zap->zap_ismicro)
394 mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
395 kmem_free(zap, sizeof (zap_t));
396 return (winner);
397 }
398
399 if (zap->zap_ismicro) {
400 zap->zap_salt = zap->zap_m_phys->mz_salt;
401 zap->zap_normflags = zap->zap_m_phys->mz_normflags;
402 zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
403 avl_create(&zap->zap_m.zap_avl, mze_compare,
404 sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
405
406 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
407 mzap_ent_phys_t *mze =
408 &zap->zap_m_phys->mz_chunk[i];
409 if (mze->mze_name[0]) {
410 zap_name_t *zn;
411
412 zap->zap_m.zap_num_entries++;
413 zn = zap_name_alloc(zap, mze->mze_name,
414 MT_EXACT);
415 mze_insert(zap, i, zn->zn_hash);
416 zap_name_free(zn);
417 }
418 }
419 } else {
420 zap->zap_salt = zap->zap_f_phys->zap_salt;
421 zap->zap_normflags = zap->zap_f_phys->zap_normflags;
422
423 ASSERT3U(sizeof (struct zap_leaf_header), ==,
424 2*ZAP_LEAF_CHUNKSIZE);
425
426 /*
427 * The embedded pointer table should not overlap the
428 * other members.
429 */
430 ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
431 &zap->zap_f_phys->zap_salt);
432
433 /*
434 * The embedded pointer table should end at the end of
435 * the block
436 */
437 ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
438 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
439 (uintptr_t)zap->zap_f_phys, ==,
440 zap->zap_dbuf->db_size);
441 }
442 rw_exit(&zap->zap_rwlock);
443 return (zap);
444 }
445
446 int
447 zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
448 krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
449 {
450 zap_t *zap;
451 dmu_buf_t *db;
452 krw_t lt;
453 int err;
454
455 *zapp = NULL;
456
457 err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH);
458 if (err)
459 return (err);
460
461 #ifdef ZFS_DEBUG
462 {
463 dmu_object_info_t doi;
464 dmu_object_info_from_db(db, &doi);
465 ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
466 }
467 #endif
468
469 zap = (zap_t *)dmu_buf_get_user(db);
470 if (zap == NULL)
471 zap = mzap_open(os, obj, db);
472
473 /*
474 * We're checking zap_ismicro without the lock held, in order to
475 * tell what type of lock we want. Once we have some sort of
476 * lock, see if it really is the right type. In practice this
477 * can only be different if it was upgraded from micro to fat,
478 * and micro wanted WRITER but fat only needs READER.
479 */
480 lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
481 rw_enter(&zap->zap_rwlock, lt);
482 if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
483 /* it was upgraded, now we only need reader */
484 ASSERT(lt == RW_WRITER);
485 ASSERT(RW_READER ==
486 (!zap->zap_ismicro && fatreader) ? RW_READER : lti);
487 rw_downgrade(&zap->zap_rwlock);
488 lt = RW_READER;
489 }
660
661 VERIFY(dmu_object_set_blocksize(os, obj,
662 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0);
663
664 mzap_create_impl(os, obj, normflags, flags, tx);
665 return (obj);
666 }
667
668 int
669 zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
670 {
671 /*
672 * dmu_object_free will free the object number and free the
673 * data. Freeing the data will cause our pageout function to be
674 * called, which will destroy our data (zap_leaf_t's and zap_t).
675 */
676
677 return (dmu_object_free(os, zapobj, tx));
678 }
679
680 void
681 zap_evict(dmu_buf_user_t *dbu)
682 {
683 zap_t *zap = (zap_t *)dbu;
684
685 rw_destroy(&zap->zap_rwlock);
686
687 if (zap->zap_ismicro)
688 mze_destroy(zap);
689 else
690 mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
691
692 kmem_free(zap, sizeof (zap_t));
693 }
694
695 int
696 zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
697 {
698 zap_t *zap;
699 int err;
700
701 err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
702 if (err)
703 return (err);
921 return (SET_ERROR(ENOTSUP));
922 }
923 err = fzap_length(zn, integer_size, num_integers);
924 zap_name_free(zn);
925 zap_unlockdir(zap);
926 return (err);
927 }
928
929 static void
930 mzap_addent(zap_name_t *zn, uint64_t value)
931 {
932 int i;
933 zap_t *zap = zn->zn_zap;
934 int start = zap->zap_m.zap_alloc_next;
935 uint32_t cd;
936
937 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
938
939 #ifdef ZFS_DEBUG
940 for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
941 mzap_ent_phys_t *mze = &zap->zap_m_phys->mz_chunk[i];
942 ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0);
943 }
944 #endif
945
946 cd = mze_find_unused_cd(zap, zn->zn_hash);
947 /* given the limited size of the microzap, this can't happen */
948 ASSERT(cd < zap_maxcd(zap));
949
950 again:
951 for (i = start; i < zap->zap_m.zap_num_chunks; i++) {
952 mzap_ent_phys_t *mze = &zap->zap_m_phys->mz_chunk[i];
953 if (mze->mze_name[0] == 0) {
954 mze->mze_value = value;
955 mze->mze_cd = cd;
956 (void) strcpy(mze->mze_name, zn->zn_key_orig);
957 zap->zap_m.zap_num_entries++;
958 zap->zap_m.zap_alloc_next = i+1;
959 if (zap->zap_m.zap_alloc_next ==
960 zap->zap_m.zap_num_chunks)
961 zap->zap_m.zap_alloc_next = 0;
962 mze_insert(zap, i, zn->zn_hash);
963 return;
964 }
965 }
966 if (start != 0) {
967 start = 0;
968 goto again;
969 }
970 ASSERT(!"out of entries!");
971 }
972
1133 int err;
1134 mzap_ent_t *mze;
1135 zap_name_t *zn;
1136
1137 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap);
1138 if (err)
1139 return (err);
1140 zn = zap_name_alloc(zap, name, mt);
1141 if (zn == NULL) {
1142 zap_unlockdir(zap);
1143 return (SET_ERROR(ENOTSUP));
1144 }
1145 if (!zap->zap_ismicro) {
1146 err = fzap_remove(zn, tx);
1147 } else {
1148 mze = mze_find(zn);
1149 if (mze == NULL) {
1150 err = SET_ERROR(ENOENT);
1151 } else {
1152 zap->zap_m.zap_num_entries--;
1153 bzero(&zap->zap_m_phys->mz_chunk[mze->mze_chunkid],
1154 sizeof (mzap_ent_phys_t));
1155 mze_remove(zap, mze);
1156 }
1157 }
1158 zap_name_free(zn);
1159 zap_unlockdir(zap);
1160 return (err);
1161 }
1162
1163 int
1164 zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1165 int key_numints, dmu_tx_t *tx)
1166 {
1167 zap_t *zap;
1168 int err;
1169 zap_name_t *zn;
1170
1171 err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap);
1172 if (err)
1173 return (err);
|