Print this page
3752 want more verifiable dbuf user eviction
Submitted by:   Justin Gibbs <justing@spectralogic.com>
Submitted by:   Will Andrews <willa@spectralogic.com>


  45 #define MZAP_MAX_BLKSZ          (1 << MZAP_MAX_BLKSHIFT)
  46 
  47 #define ZAP_NEED_CD             (-1U)
  48 
  49 typedef struct mzap_ent_phys {
  50         uint64_t mze_value;
  51         uint32_t mze_cd;
  52         uint16_t mze_pad;       /* in case we want to chain them someday */
  53         char mze_name[MZAP_NAME_LEN];
  54 } mzap_ent_phys_t;
  55 
  56 typedef struct mzap_phys {
  57         uint64_t mz_block_type; /* ZBT_MICRO */
  58         uint64_t mz_salt;
  59         uint64_t mz_normflags;
  60         uint64_t mz_pad[5];
  61         mzap_ent_phys_t mz_chunk[1];
  62         /* actually variable size depending on block size */
  63 } mzap_phys_t;
  64 





  65 typedef struct mzap_ent {
  66         avl_node_t mze_node;
  67         int mze_chunkid;
  68         uint64_t mze_hash;
  69         uint32_t mze_cd; /* copy from mze_phys->mze_cd */
  70 } mzap_ent_t;
  71 
  72 #define MZE_PHYS(zap, mze) \
  73         (&(zap)->zap_m.zap_phys->mz_chunk[(mze)->mze_chunkid])
  74 
  75 /*
  76  * The (fat) zap is stored in one object. It is an array of
  77  * 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of:
  78  *
  79  * ptrtbl fits in first block:
  80  *      [zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ...
  81  *
  82  * ptrtbl too big for first block:
  83  *      [zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ...
  84  *
  85  */
  86 
  87 struct dmu_buf;
  88 struct zap_leaf;
  89 
  90 #define ZBT_LEAF                ((1ULL << 63) + 0)
  91 #define ZBT_HEADER              ((1ULL << 63) + 1)
  92 #define ZBT_MICRO               ((1ULL << 63) + 3)
  93 /* any other values are ptrtbl blocks */
  94 
  95 /*
  96  * the embedded pointer table takes up half a block:
  97  * block size / entry size (2^3) / 2
  98  */
  99 #define ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1)
 100 
 101 /*
 102  * The embedded pointer table starts half-way through the block.  Since
 103  * the pointer table itself is half the block, it starts at (64-bit)
 104  * word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)).
 105  */
 106 #define ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) \
 107         ((uint64_t *)(zap)->zap_f.zap_phys) \
 108         [(idx) + (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap))]
 109 
 110 /*
 111  * TAKE NOTE:
 112  * If zap_phys_t is modified, zap_byteswap() must be modified.
 113  */
 114 typedef struct zap_phys {
 115         uint64_t zap_block_type;        /* ZBT_HEADER */
 116         uint64_t zap_magic;             /* ZAP_MAGIC */
 117 
 118         struct zap_table_phys {
 119                 uint64_t zt_blk;        /* starting block number */
 120                 uint64_t zt_numblks;    /* number of blocks */
 121                 uint64_t zt_shift;      /* bits to index it */
 122                 uint64_t zt_nextblk;    /* next (larger) copy start block */
 123                 uint64_t zt_blks_copied; /* number source blocks copied */
 124         } zap_ptrtbl;
 125 
 126         uint64_t zap_freeblk;           /* the next free block */
 127         uint64_t zap_num_leafs;         /* number of leafs */
 128         uint64_t zap_num_entries;       /* number of entries */
 129         uint64_t zap_salt;              /* salt to stir into hash function */
 130         uint64_t zap_normflags;         /* flags for u8_textprep_str() */
 131         uint64_t zap_flags;             /* zap_flags_t */
 132         /*
 133          * This structure is followed by padding, and then the embedded
 134          * pointer table.  The embedded pointer table takes up second
 135          * half of the block.  It is accessed using the
 136          * ZAP_EMBEDDED_PTRTBL_ENT() macro.
 137          */
 138 } zap_phys_t;
 139 
 140 typedef struct zap_table_phys zap_table_phys_t;
 141 





 142 typedef struct zap {

 143         objset_t *zap_objset;
 144         uint64_t zap_object;
 145         struct dmu_buf *zap_dbuf;




 146         krwlock_t zap_rwlock;
 147         boolean_t zap_ismicro;
 148         int zap_normflags;
 149         uint64_t zap_salt;
 150         union {
 151                 struct {
 152                         zap_phys_t *zap_phys;
 153 
 154                         /*
 155                          * zap_num_entries_mtx protects
 156                          * zap_num_entries
 157                          */
 158                         kmutex_t zap_num_entries_mtx;
 159                         int zap_block_shift;
 160                 } zap_fat;
 161                 struct {
 162                         mzap_phys_t *zap_phys;
 163                         int16_t zap_num_entries;
 164                         int16_t zap_num_chunks;
 165                         int16_t zap_alloc_next;
 166                         avl_tree_t zap_avl;
 167                 } zap_micro;
 168         } zap_u;
 169 } zap_t;
 170 







 171 typedef struct zap_name {
 172         zap_t *zn_zap;
 173         int zn_key_intlen;
 174         const void *zn_key_orig;
 175         int zn_key_orig_numints;
 176         const void *zn_key_norm;
 177         int zn_key_norm_numints;
 178         uint64_t zn_hash;
 179         matchtype_t zn_matchtype;
 180         char zn_normbuf[ZAP_MAXNAMELEN];
 181 } zap_name_t;
 182 
 183 #define zap_f   zap_u.zap_fat
 184 #define zap_m   zap_u.zap_micro
 185 
 186 boolean_t zap_match(zap_name_t *zn, const char *matchname);
 187 int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
 188     krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
 189 void zap_unlockdir(zap_t *zap);
 190 void zap_evict(dmu_buf_t *db, void *vmzap);
 191 zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt);
 192 void zap_name_free(zap_name_t *zn);
 193 int zap_hashbits(zap_t *zap);
 194 uint32_t zap_maxcd(zap_t *zap);
 195 uint64_t zap_getflags(zap_t *zap);
 196 
 197 #define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
 198 
 199 void fzap_byteswap(void *buf, size_t size);
 200 int fzap_count(zap_t *zap, uint64_t *count);
 201 int fzap_lookup(zap_name_t *zn,
 202     uint64_t integer_size, uint64_t num_integers, void *buf,
 203     char *realname, int rn_len, boolean_t *normalization_conflictp);
 204 void fzap_prefetch(zap_name_t *zn);
 205 int fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
 206     uint64_t *tooverwrite);
 207 int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
 208     const void *val, dmu_tx_t *tx);
 209 int fzap_update(zap_name_t *zn,
 210     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);


  45 #define MZAP_MAX_BLKSZ          (1 << MZAP_MAX_BLKSHIFT)
  46 
  47 #define ZAP_NEED_CD             (-1U)
  48 
  49 typedef struct mzap_ent_phys {
  50         uint64_t mze_value;
  51         uint32_t mze_cd;
  52         uint16_t mze_pad;       /* in case we want to chain them someday */
  53         char mze_name[MZAP_NAME_LEN];
  54 } mzap_ent_phys_t;
  55 
  56 typedef struct mzap_phys {
  57         uint64_t mz_block_type; /* ZBT_MICRO */
  58         uint64_t mz_salt;
  59         uint64_t mz_normflags;
  60         uint64_t mz_pad[5];
  61         mzap_ent_phys_t mz_chunk[1];
  62         /* actually variable size depending on block size */
  63 } mzap_phys_t;
  64 
  65 typedef struct mzap_dbuf {
  66         uint8_t mzdb_pad[offsetof(dmu_buf_t, db_data)];
  67         mzap_phys_t *mzdb_data;
  68 } mzap_dbuf_t;
  69 
  70 typedef struct mzap_ent {
  71         avl_node_t mze_node;
  72         int mze_chunkid;
  73         uint64_t mze_hash;
  74         uint32_t mze_cd; /* copy from mze_phys->mze_cd */
  75 } mzap_ent_t;
  76 
  77 #define MZE_PHYS(zap, mze) \
  78         (&(zap)->zap_m_phys->mz_chunk[(mze)->mze_chunkid])
  79 
  80 /*
  81  * The (fat) zap is stored in one object. It is an array of
  82  * 1<<FZAP_BLOCK_SHIFT byte blocks. The layout looks like one of:
  83  *
  84  * ptrtbl fits in first block:
  85  *      [zap_phys_t zap_ptrtbl_shift < 6] [zap_leaf_t] ...
  86  *
  87  * ptrtbl too big for first block:
  88  *      [zap_phys_t zap_ptrtbl_shift >= 6] [zap_leaf_t] [ptrtbl] ...
  89  *
  90  */
  91 
  92 struct dmu_buf;
  93 struct zap_leaf;
  94 
  95 #define ZBT_LEAF                ((1ULL << 63) + 0)
  96 #define ZBT_HEADER              ((1ULL << 63) + 1)
  97 #define ZBT_MICRO               ((1ULL << 63) + 3)
  98 /* any other values are ptrtbl blocks */
  99 
 100 /*
 101  * the embedded pointer table takes up half a block:
 102  * block size / entry size (2^3) / 2
 103  */
 104 #define ZAP_EMBEDDED_PTRTBL_SHIFT(zap) (FZAP_BLOCK_SHIFT(zap) - 3 - 1)
 105 
 106 /*
 107  * The embedded pointer table starts half-way through the block.  Since
 108  * the pointer table itself is half the block, it starts at (64-bit)
 109  * word number (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)).
 110  */
 111 #define ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) \
 112         ((uint64_t *)(zap)->zap_f_phys) \
 113         [(idx) + (1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap))]
 114 
 115 /*
 116  * TAKE NOTE:
 117  * If zap_phys_t is modified, zap_byteswap() must be modified.
 118  */
 119 typedef struct zap_phys {
 120         uint64_t zap_block_type;        /* ZBT_HEADER */
 121         uint64_t zap_magic;             /* ZAP_MAGIC */
 122 
 123         struct zap_table_phys {
 124                 uint64_t zt_blk;        /* starting block number */
 125                 uint64_t zt_numblks;    /* number of blocks */
 126                 uint64_t zt_shift;      /* bits to index it */
 127                 uint64_t zt_nextblk;    /* next (larger) copy start block */
 128                 uint64_t zt_blks_copied; /* number source blocks copied */
 129         } zap_ptrtbl;
 130 
 131         uint64_t zap_freeblk;           /* the next free block */
 132         uint64_t zap_num_leafs;         /* number of leafs */
 133         uint64_t zap_num_entries;       /* number of entries */
 134         uint64_t zap_salt;              /* salt to stir into hash function */
 135         uint64_t zap_normflags;         /* flags for u8_textprep_str() */
 136         uint64_t zap_flags;             /* zap_flags_t */
 137         /*
 138          * This structure is followed by padding, and then the embedded
 139          * pointer table.  The embedded pointer table takes up second
 140          * half of the block.  It is accessed using the
 141          * ZAP_EMBEDDED_PTRTBL_ENT() macro.
 142          */
 143 } zap_phys_t;
 144 
 145 typedef struct zap_table_phys zap_table_phys_t;
 146 
 147 typedef struct fzap_dbuf {
 148         uint8_t fzdb_pad[offsetof(dmu_buf_t, db_data)];
 149         zap_phys_t *fzdb_data;
 150 } fzap_dbuf_t;
 151 
 152 typedef struct zap {
 153         dmu_buf_user_t db_evict;
 154         objset_t *zap_objset;
 155         uint64_t zap_object;
 156         union {
 157                 dmu_buf_t *zap_dmu_db;
 158                 mzap_dbuf_t *mzap_db;
 159                 fzap_dbuf_t *fzap_db;
 160         } zap_db_u;
 161         krwlock_t zap_rwlock;
 162         boolean_t zap_ismicro;
 163         int zap_normflags;
 164         uint64_t zap_salt;
 165         union {
 166                 struct {
 167                         /* protects zap_num_entries */





 168                         kmutex_t zap_num_entries_mtx;
 169                         int zap_block_shift;
 170                 } zap_fat;
 171                 struct {

 172                         int16_t zap_num_entries;
 173                         int16_t zap_num_chunks;
 174                         int16_t zap_alloc_next;
 175                         avl_tree_t zap_avl;
 176                 } zap_micro;
 177         } zap_u;
 178 } zap_t;
 179 
 180 /* See sys/dmu.h:dmu_buf_user_t for why we have these. */
 181 #define zap_dbuf        zap_db_u.zap_dmu_db
 182 #define zap_f           zap_u.zap_fat
 183 #define zap_m           zap_u.zap_micro
 184 #define zap_f_phys      zap_db_u.fzap_db->fzdb_data
 185 #define zap_m_phys      zap_db_u.mzap_db->mzdb_data
 186 
 187 typedef struct zap_name {
 188         zap_t *zn_zap;
 189         int zn_key_intlen;
 190         const void *zn_key_orig;
 191         int zn_key_orig_numints;
 192         const void *zn_key_norm;
 193         int zn_key_norm_numints;
 194         uint64_t zn_hash;
 195         matchtype_t zn_matchtype;
 196         char zn_normbuf[ZAP_MAXNAMELEN];
 197 } zap_name_t;
 198 



 199 boolean_t zap_match(zap_name_t *zn, const char *matchname);
 200 int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
 201     krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
 202 void zap_unlockdir(zap_t *zap);
 203 void zap_evict(dmu_buf_user_t *dbu);
 204 zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt);
 205 void zap_name_free(zap_name_t *zn);
 206 int zap_hashbits(zap_t *zap);
 207 uint32_t zap_maxcd(zap_t *zap);
 208 uint64_t zap_getflags(zap_t *zap);
 209 
 210 #define ZAP_HASH_IDX(hash, n) (((n) == 0) ? 0 : ((hash) >> (64 - (n))))
 211 
 212 void fzap_byteswap(void *buf, size_t size);
 213 int fzap_count(zap_t *zap, uint64_t *count);
 214 int fzap_lookup(zap_name_t *zn,
 215     uint64_t integer_size, uint64_t num_integers, void *buf,
 216     char *realname, int rn_len, boolean_t *normalization_conflictp);
 217 void fzap_prefetch(zap_name_t *zn);
 218 int fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
 219     uint64_t *tooverwrite);
 220 int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
 221     const void *val, dmu_tx_t *tx);
 222 int fzap_update(zap_name_t *zn,
 223     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);