Print this page
3752 want more verifiable dbuf user eviction
Submitted by: Justin Gibbs <justing@spectralogic.com>
Submitted by: Will Andrews <willa@spectralogic.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/zfs/zap.c
+++ new/usr/src/uts/common/fs/zfs/zap.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * This file contains the top half of the zfs directory structure
28 28 * implementation. The bottom half is in zap_leaf.c.
29 29 *
30 30 * The zdir is an extendable hash data structure. There is a table of
31 31 * pointers to buckets (zap_t->zd_data->zd_leafs). The buckets are
32 32 * each a constant size and hold a variable number of directory entries.
33 33 * The buckets (aka "leaf nodes") are implemented in zap_leaf.c.
34 34 *
35 35 * The pointer table holds a power of 2 number of pointers.
36 36 * (1<<zap_t->zd_data->zd_phys->zd_prefix_len). The bucket pointed to
37 37 * by the pointer at index i in the table holds entries whose hash value
38 38 * has a zd_prefix_len - bit prefix
39 39 */
40 40
41 41 #include <sys/spa.h>
42 42 #include <sys/dmu.h>
↓ open down ↓ |
42 lines elided |
↑ open up ↑ |
43 43 #include <sys/zfs_context.h>
44 44 #include <sys/zfs_znode.h>
45 45 #include <sys/fs/zfs.h>
46 46 #include <sys/zap.h>
47 47 #include <sys/refcount.h>
48 48 #include <sys/zap_impl.h>
49 49 #include <sys/zap_leaf.h>
50 50
51 51 int fzap_default_block_shift = 14; /* 16k blocksize */
52 52
53 -static void zap_leaf_pageout(dmu_buf_t *db, void *vl);
54 53 static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks);
55 54
56 -
57 55 void
58 56 fzap_byteswap(void *vbuf, size_t size)
59 57 {
60 58 uint64_t block_type;
61 59
62 60 block_type = *(uint64_t *)vbuf;
63 61
64 62 if (block_type == ZBT_LEAF || block_type == BSWAP_64(ZBT_LEAF))
65 63 zap_leaf_byteswap(vbuf, size);
66 64 else {
67 65 /* it's a ptrtbl block */
68 66 byteswap_uint64_array(vbuf, size);
69 67 }
70 68 }
71 69
72 70 void
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
73 71 fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
74 72 {
75 73 dmu_buf_t *db;
76 74 zap_leaf_t *l;
77 75 int i;
78 76 zap_phys_t *zp;
79 77
80 78 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
81 79 zap->zap_ismicro = FALSE;
82 80
83 - (void) dmu_buf_update_user(zap->zap_dbuf, zap, zap,
84 - &zap->zap_f.zap_phys, zap_evict);
81 + zap->db_evict.evict_func = zap_evict;
85 82
86 83 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
87 84 zap->zap_f.zap_block_shift = highbit(zap->zap_dbuf->db_size) - 1;
88 85
89 - zp = zap->zap_f.zap_phys;
86 + zp = zap->zap_f_phys;
90 87 /*
91 88 * explicitly zero it since it might be coming from an
92 89 * initialized microzap
93 90 */
94 91 bzero(zap->zap_dbuf->db_data, zap->zap_dbuf->db_size);
95 92 zp->zap_block_type = ZBT_HEADER;
96 93 zp->zap_magic = ZAP_MAGIC;
97 94
98 95 zp->zap_ptrtbl.zt_shift = ZAP_EMBEDDED_PTRTBL_SHIFT(zap);
99 96
100 97 zp->zap_freeblk = 2; /* block 1 will be the first leaf */
101 98 zp->zap_num_leafs = 1;
102 99 zp->zap_num_entries = 0;
103 100 zp->zap_salt = zap->zap_salt;
104 101 zp->zap_normflags = zap->zap_normflags;
105 102 zp->zap_flags = flags;
106 103
107 104 /* block 1 will be the first leaf */
108 105 for (i = 0; i < (1<<zp->zap_ptrtbl.zt_shift); i++)
109 106 ZAP_EMBEDDED_PTRTBL_ENT(zap, i) = 1;
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
110 107
111 108 /*
112 109 * set up block 1 - the first leaf
113 110 */
114 111 VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
115 112 1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db, DMU_READ_NO_PREFETCH));
116 113 dmu_buf_will_dirty(db, tx);
117 114
118 115 l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
119 116 l->l_dbuf = db;
120 - l->l_phys = db->db_data;
121 117
122 118 zap_leaf_init(l, zp->zap_normflags != 0);
123 119
124 120 kmem_free(l, sizeof (zap_leaf_t));
125 121 dmu_buf_rele(db, FTAG);
126 122 }
127 123
128 124 static int
129 125 zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx)
130 126 {
131 127 if (RW_WRITE_HELD(&zap->zap_rwlock))
132 128 return (1);
133 129 if (rw_tryupgrade(&zap->zap_rwlock)) {
134 130 dmu_buf_will_dirty(zap->zap_dbuf, tx);
135 131 return (1);
136 132 }
137 133 return (0);
138 134 }
139 135
140 136 /*
141 137 * Generic routines for dealing with the pointer & cookie tables.
142 138 */
143 139
144 140 static int
145 141 zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
146 142 void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n),
147 143 dmu_tx_t *tx)
148 144 {
149 145 uint64_t b, newblk;
150 146 dmu_buf_t *db_old, *db_new;
151 147 int err;
152 148 int bs = FZAP_BLOCK_SHIFT(zap);
153 149 int hepb = 1<<(bs-4);
154 150 /* hepb = half the number of entries in a block */
155 151
156 152 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
157 153 ASSERT(tbl->zt_blk != 0);
158 154 ASSERT(tbl->zt_numblks > 0);
159 155
160 156 if (tbl->zt_nextblk != 0) {
161 157 newblk = tbl->zt_nextblk;
162 158 } else {
163 159 newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
164 160 tbl->zt_nextblk = newblk;
165 161 ASSERT0(tbl->zt_blks_copied);
166 162 dmu_prefetch(zap->zap_objset, zap->zap_object,
167 163 tbl->zt_blk << bs, tbl->zt_numblks << bs);
168 164 }
169 165
170 166 /*
171 167 * Copy the ptrtbl from the old to new location.
172 168 */
173 169
174 170 b = tbl->zt_blks_copied;
175 171 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
176 172 (tbl->zt_blk + b) << bs, FTAG, &db_old, DMU_READ_NO_PREFETCH);
177 173 if (err)
178 174 return (err);
179 175
180 176 /* first half of entries in old[b] go to new[2*b+0] */
181 177 VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
182 178 (newblk + 2*b+0) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
183 179 dmu_buf_will_dirty(db_new, tx);
184 180 transfer_func(db_old->db_data, db_new->db_data, hepb);
185 181 dmu_buf_rele(db_new, FTAG);
186 182
187 183 /* second half of entries in old[b] go to new[2*b+1] */
188 184 VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
189 185 (newblk + 2*b+1) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
190 186 dmu_buf_will_dirty(db_new, tx);
191 187 transfer_func((uint64_t *)db_old->db_data + hepb,
192 188 db_new->db_data, hepb);
193 189 dmu_buf_rele(db_new, FTAG);
194 190
195 191 dmu_buf_rele(db_old, FTAG);
196 192
197 193 tbl->zt_blks_copied++;
198 194
199 195 dprintf("copied block %llu of %llu\n",
200 196 tbl->zt_blks_copied, tbl->zt_numblks);
201 197
202 198 if (tbl->zt_blks_copied == tbl->zt_numblks) {
203 199 (void) dmu_free_range(zap->zap_objset, zap->zap_object,
204 200 tbl->zt_blk << bs, tbl->zt_numblks << bs, tx);
205 201
206 202 tbl->zt_blk = newblk;
207 203 tbl->zt_numblks *= 2;
208 204 tbl->zt_shift++;
209 205 tbl->zt_nextblk = 0;
210 206 tbl->zt_blks_copied = 0;
211 207
212 208 dprintf("finished; numblocks now %llu (%lluk entries)\n",
213 209 tbl->zt_numblks, 1<<(tbl->zt_shift-10));
214 210 }
215 211
216 212 return (0);
217 213 }
218 214
219 215 static int
220 216 zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val,
221 217 dmu_tx_t *tx)
222 218 {
223 219 int err;
224 220 uint64_t blk, off;
225 221 int bs = FZAP_BLOCK_SHIFT(zap);
226 222 dmu_buf_t *db;
227 223
228 224 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
229 225 ASSERT(tbl->zt_blk != 0);
230 226
231 227 dprintf("storing %llx at index %llx\n", val, idx);
232 228
233 229 blk = idx >> (bs-3);
234 230 off = idx & ((1<<(bs-3))-1);
235 231
236 232 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
237 233 (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
238 234 if (err)
239 235 return (err);
240 236 dmu_buf_will_dirty(db, tx);
241 237
242 238 if (tbl->zt_nextblk != 0) {
243 239 uint64_t idx2 = idx * 2;
244 240 uint64_t blk2 = idx2 >> (bs-3);
245 241 uint64_t off2 = idx2 & ((1<<(bs-3))-1);
246 242 dmu_buf_t *db2;
247 243
248 244 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
249 245 (tbl->zt_nextblk + blk2) << bs, FTAG, &db2,
250 246 DMU_READ_NO_PREFETCH);
251 247 if (err) {
252 248 dmu_buf_rele(db, FTAG);
253 249 return (err);
254 250 }
255 251 dmu_buf_will_dirty(db2, tx);
256 252 ((uint64_t *)db2->db_data)[off2] = val;
257 253 ((uint64_t *)db2->db_data)[off2+1] = val;
258 254 dmu_buf_rele(db2, FTAG);
259 255 }
260 256
261 257 ((uint64_t *)db->db_data)[off] = val;
262 258 dmu_buf_rele(db, FTAG);
263 259
264 260 return (0);
265 261 }
266 262
267 263 static int
268 264 zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
269 265 {
270 266 uint64_t blk, off;
271 267 int err;
272 268 dmu_buf_t *db;
273 269 int bs = FZAP_BLOCK_SHIFT(zap);
274 270
275 271 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
276 272
277 273 blk = idx >> (bs-3);
278 274 off = idx & ((1<<(bs-3))-1);
279 275
280 276 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
281 277 (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
282 278 if (err)
283 279 return (err);
284 280 *valp = ((uint64_t *)db->db_data)[off];
285 281 dmu_buf_rele(db, FTAG);
286 282
287 283 if (tbl->zt_nextblk != 0) {
288 284 /*
289 285 * read the nextblk for the sake of i/o error checking,
290 286 * so that zap_table_load() will catch errors for
291 287 * zap_table_store.
292 288 */
293 289 blk = (idx*2) >> (bs-3);
294 290
295 291 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
296 292 (tbl->zt_nextblk + blk) << bs, FTAG, &db,
297 293 DMU_READ_NO_PREFETCH);
298 294 dmu_buf_rele(db, FTAG);
299 295 }
300 296 return (err);
301 297 }
302 298
303 299 /*
304 300 * Routines for growing the ptrtbl.
305 301 */
306 302
307 303 static void
308 304 zap_ptrtbl_transfer(const uint64_t *src, uint64_t *dst, int n)
309 305 {
310 306 int i;
311 307 for (i = 0; i < n; i++) {
312 308 uint64_t lb = src[i];
313 309 dst[2*i+0] = lb;
314 310 dst[2*i+1] = lb;
315 311 }
316 312 }
↓ open down ↓ |
186 lines elided |
↑ open up ↑ |
317 313
318 314 static int
319 315 zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx)
320 316 {
321 317 /*
322 318 * The pointer table should never use more hash bits than we
323 319 * have (otherwise we'd be using useless zero bits to index it).
324 320 * If we are within 2 bits of running out, stop growing, since
325 321 * this is already an aberrant condition.
326 322 */
327 - if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2)
323 + if (zap->zap_f_phys->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2)
328 324 return (SET_ERROR(ENOSPC));
329 325
330 - if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
326 + if (zap->zap_f_phys->zap_ptrtbl.zt_numblks == 0) {
331 327 /*
332 328 * We are outgrowing the "embedded" ptrtbl (the one
333 329 * stored in the header block). Give it its own entire
334 330 * block, which will double the size of the ptrtbl.
335 331 */
336 332 uint64_t newblk;
337 333 dmu_buf_t *db_new;
338 334 int err;
339 335
340 - ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==,
336 + ASSERT3U(zap->zap_f_phys->zap_ptrtbl.zt_shift, ==,
341 337 ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
342 - ASSERT0(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk);
338 + ASSERT0(zap->zap_f_phys->zap_ptrtbl.zt_blk);
343 339
344 340 newblk = zap_allocate_blocks(zap, 1);
345 341 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
346 342 newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new,
347 343 DMU_READ_NO_PREFETCH);
348 344 if (err)
349 345 return (err);
350 346 dmu_buf_will_dirty(db_new, tx);
351 347 zap_ptrtbl_transfer(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
352 348 db_new->db_data, 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
353 349 dmu_buf_rele(db_new, FTAG);
354 350
355 - zap->zap_f.zap_phys->zap_ptrtbl.zt_blk = newblk;
356 - zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks = 1;
357 - zap->zap_f.zap_phys->zap_ptrtbl.zt_shift++;
351 + zap->zap_f_phys->zap_ptrtbl.zt_blk = newblk;
352 + zap->zap_f_phys->zap_ptrtbl.zt_numblks = 1;
353 + zap->zap_f_phys->zap_ptrtbl.zt_shift++;
358 354
359 - ASSERT3U(1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==,
360 - zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks <<
355 + ASSERT3U(1ULL << zap->zap_f_phys->zap_ptrtbl.zt_shift, ==,
356 + zap->zap_f_phys->zap_ptrtbl.zt_numblks <<
361 357 (FZAP_BLOCK_SHIFT(zap)-3));
362 358
363 359 return (0);
364 360 } else {
365 - return (zap_table_grow(zap, &zap->zap_f.zap_phys->zap_ptrtbl,
361 + return (zap_table_grow(zap, &zap->zap_f_phys->zap_ptrtbl,
366 362 zap_ptrtbl_transfer, tx));
367 363 }
368 364 }
369 365
370 366 static void
371 367 zap_increment_num_entries(zap_t *zap, int delta, dmu_tx_t *tx)
372 368 {
373 369 dmu_buf_will_dirty(zap->zap_dbuf, tx);
374 370 mutex_enter(&zap->zap_f.zap_num_entries_mtx);
375 - ASSERT(delta > 0 || zap->zap_f.zap_phys->zap_num_entries >= -delta);
376 - zap->zap_f.zap_phys->zap_num_entries += delta;
371 + ASSERT(delta > 0 || zap->zap_f_phys->zap_num_entries >= -delta);
372 + zap->zap_f_phys->zap_num_entries += delta;
377 373 mutex_exit(&zap->zap_f.zap_num_entries_mtx);
378 374 }
379 375
380 376 static uint64_t
381 377 zap_allocate_blocks(zap_t *zap, int nblocks)
382 378 {
383 379 uint64_t newblk;
384 380 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
385 - newblk = zap->zap_f.zap_phys->zap_freeblk;
386 - zap->zap_f.zap_phys->zap_freeblk += nblocks;
381 + newblk = zap->zap_f_phys->zap_freeblk;
382 + zap->zap_f_phys->zap_freeblk += nblocks;
387 383 return (newblk);
388 384 }
389 385
386 +static void
387 +zap_leaf_pageout(dmu_buf_user_t *dbu)
388 +{
389 + zap_leaf_t *l = (zap_leaf_t *)dbu;
390 +
391 + rw_destroy(&l->l_rwlock);
392 + kmem_free(l, sizeof (zap_leaf_t));
393 +}
394 +
390 395 static zap_leaf_t *
391 396 zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
392 397 {
393 398 void *winner;
394 399 zap_leaf_t *l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP);
395 400
396 401 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
397 402
398 403 rw_init(&l->l_rwlock, 0, 0, 0);
399 404 rw_enter(&l->l_rwlock, RW_WRITER);
400 405 l->l_blkid = zap_allocate_blocks(zap, 1);
401 406 l->l_dbuf = NULL;
402 - l->l_phys = NULL;
403 407
404 408 VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
405 409 l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
406 410 DMU_READ_NO_PREFETCH));
407 - winner = dmu_buf_set_user(l->l_dbuf, l, &l->l_phys, zap_leaf_pageout);
411 + dmu_buf_init_user(&l->db_evict, zap_leaf_pageout);
412 + winner = (zap_leaf_t *)dmu_buf_set_user(l->l_dbuf, &l->db_evict);
408 413 ASSERT(winner == NULL);
409 414 dmu_buf_will_dirty(l->l_dbuf, tx);
410 415
411 416 zap_leaf_init(l, zap->zap_normflags != 0);
412 417
413 - zap->zap_f.zap_phys->zap_num_leafs++;
418 + zap->zap_f_phys->zap_num_leafs++;
414 419
415 420 return (l);
416 421 }
417 422
418 423 int
419 424 fzap_count(zap_t *zap, uint64_t *count)
420 425 {
421 426 ASSERT(!zap->zap_ismicro);
422 427 mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */
423 - *count = zap->zap_f.zap_phys->zap_num_entries;
428 + *count = zap->zap_f_phys->zap_num_entries;
424 429 mutex_exit(&zap->zap_f.zap_num_entries_mtx);
425 430 return (0);
426 431 }
427 432
428 433 /*
429 434 * Routines for obtaining zap_leaf_t's
430 435 */
431 436
432 437 void
433 438 zap_put_leaf(zap_leaf_t *l)
434 439 {
435 440 rw_exit(&l->l_rwlock);
436 441 dmu_buf_rele(l->l_dbuf, NULL);
437 442 }
438 443
439 -_NOTE(ARGSUSED(0))
440 -static void
441 -zap_leaf_pageout(dmu_buf_t *db, void *vl)
442 -{
443 - zap_leaf_t *l = vl;
444 -
445 - rw_destroy(&l->l_rwlock);
446 - kmem_free(l, sizeof (zap_leaf_t));
447 -}
448 -
449 444 static zap_leaf_t *
450 445 zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
451 446 {
452 447 zap_leaf_t *l, *winner;
453 448
454 449 ASSERT(blkid != 0);
455 450
456 451 l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP);
457 452 rw_init(&l->l_rwlock, 0, 0, 0);
458 453 rw_enter(&l->l_rwlock, RW_WRITER);
459 454 l->l_blkid = blkid;
460 455 l->l_bs = highbit(db->db_size)-1;
461 456 l->l_dbuf = db;
462 - l->l_phys = NULL;
463 457
464 - winner = dmu_buf_set_user(db, l, &l->l_phys, zap_leaf_pageout);
458 + dmu_buf_init_user(&l->db_evict, zap_leaf_pageout);
459 + winner = (zap_leaf_t *)dmu_buf_set_user(db, &l->db_evict);
465 460
466 461 rw_exit(&l->l_rwlock);
467 462 if (winner != NULL) {
468 463 /* someone else set it first */
469 - zap_leaf_pageout(NULL, l);
464 + zap_leaf_pageout(&l->db_evict);
470 465 l = winner;
471 466 }
472 467
473 468 /*
474 469 * lhr_pad was previously used for the next leaf in the leaf
475 470 * chain. There should be no chained leafs (as we have removed
476 471 * support for them).
477 472 */
478 473 ASSERT0(l->l_phys->l_hdr.lh_pad1);
479 474
480 475 /*
481 476 * There should be more hash entries than there can be
482 477 * chunks to put in the hash table
483 478 */
484 479 ASSERT3U(ZAP_LEAF_HASH_NUMENTRIES(l), >, ZAP_LEAF_NUMCHUNKS(l) / 3);
485 480
486 481 /* The chunks should begin at the end of the hash table */
487 482 ASSERT3P(&ZAP_LEAF_CHUNK(l, 0), ==,
488 483 &l->l_phys->l_hash[ZAP_LEAF_HASH_NUMENTRIES(l)]);
489 484
490 485 /* The chunks should end at the end of the block */
491 486 ASSERT3U((uintptr_t)&ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)) -
492 487 (uintptr_t)l->l_phys, ==, l->l_dbuf->db_size);
493 488
494 489 return (l);
495 490 }
496 491
497 492 static int
498 493 zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,
499 494 zap_leaf_t **lp)
500 495 {
501 496 dmu_buf_t *db;
502 497 zap_leaf_t *l;
503 498 int bs = FZAP_BLOCK_SHIFT(zap);
504 499 int err;
505 500
506 501 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
507 502
↓ open down ↓ |
28 lines elided |
↑ open up ↑ |
508 503 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
509 504 blkid << bs, NULL, &db, DMU_READ_NO_PREFETCH);
510 505 if (err)
511 506 return (err);
512 507
513 508 ASSERT3U(db->db_object, ==, zap->zap_object);
514 509 ASSERT3U(db->db_offset, ==, blkid << bs);
515 510 ASSERT3U(db->db_size, ==, 1 << bs);
516 511 ASSERT(blkid != 0);
517 512
518 - l = dmu_buf_get_user(db);
513 + l = (zap_leaf_t *)dmu_buf_get_user(db);
519 514
520 515 if (l == NULL)
521 516 l = zap_open_leaf(blkid, db);
522 517
523 518 rw_enter(&l->l_rwlock, lt);
524 519 /*
525 520 * Must lock before dirtying, otherwise l->l_phys could change,
526 521 * causing ASSERT below to fail.
527 522 */
528 523 if (lt == RW_WRITER)
529 524 dmu_buf_will_dirty(db, tx);
530 525 ASSERT3U(l->l_blkid, ==, blkid);
531 526 ASSERT3P(l->l_dbuf, ==, db);
532 527 ASSERT3P(l->l_phys, ==, l->l_dbuf->db_data);
533 528 ASSERT3U(l->l_phys->l_hdr.lh_block_type, ==, ZBT_LEAF);
534 529 ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
535 530
536 531 *lp = l;
537 532 return (0);
538 533 }
539 534
540 535 static int
541 536 zap_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t *valp)
542 537 {
543 538 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
544 539
545 - if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
540 + if (zap->zap_f_phys->zap_ptrtbl.zt_numblks == 0) {
546 541 ASSERT3U(idx, <,
547 - (1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift));
542 + (1ULL << zap->zap_f_phys->zap_ptrtbl.zt_shift));
548 543 *valp = ZAP_EMBEDDED_PTRTBL_ENT(zap, idx);
549 544 return (0);
550 545 } else {
551 - return (zap_table_load(zap, &zap->zap_f.zap_phys->zap_ptrtbl,
546 + return (zap_table_load(zap, &zap->zap_f_phys->zap_ptrtbl,
552 547 idx, valp));
553 548 }
554 549 }
555 550
556 551 static int
557 552 zap_set_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t blk, dmu_tx_t *tx)
558 553 {
559 554 ASSERT(tx != NULL);
560 555 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
561 556
562 - if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0) {
557 + if (zap->zap_f_phys->zap_ptrtbl.zt_blk == 0) {
563 558 ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) = blk;
564 559 return (0);
565 560 } else {
566 - return (zap_table_store(zap, &zap->zap_f.zap_phys->zap_ptrtbl,
561 + return (zap_table_store(zap, &zap->zap_f_phys->zap_ptrtbl,
567 562 idx, blk, tx));
568 563 }
569 564 }
570 565
571 566 static int
572 567 zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt, zap_leaf_t **lp)
573 568 {
574 569 uint64_t idx, blk;
575 570 int err;
576 571
577 572 ASSERT(zap->zap_dbuf == NULL ||
578 - zap->zap_f.zap_phys == zap->zap_dbuf->db_data);
579 - ASSERT3U(zap->zap_f.zap_phys->zap_magic, ==, ZAP_MAGIC);
580 - idx = ZAP_HASH_IDX(h, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
573 + zap->zap_f_phys == zap->zap_dbuf->db_data);
574 + ASSERT3U(zap->zap_f_phys->zap_magic, ==, ZAP_MAGIC);
575 + idx = ZAP_HASH_IDX(h, zap->zap_f_phys->zap_ptrtbl.zt_shift);
581 576 err = zap_idx_to_blk(zap, idx, &blk);
582 577 if (err != 0)
583 578 return (err);
584 579 err = zap_get_leaf_byblk(zap, blk, tx, lt, lp);
585 580
586 581 ASSERT(err || ZAP_HASH_IDX(h, (*lp)->l_phys->l_hdr.lh_prefix_len) ==
587 582 (*lp)->l_phys->l_hdr.lh_prefix);
588 583 return (err);
589 584 }
590 585
591 586 static int
592 587 zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx, zap_leaf_t **lp)
593 588 {
594 589 zap_t *zap = zn->zn_zap;
595 590 uint64_t hash = zn->zn_hash;
596 591 zap_leaf_t *nl;
597 592 int prefix_diff, i, err;
598 593 uint64_t sibling;
599 594 int old_prefix_len = l->l_phys->l_hdr.lh_prefix_len;
600 595
601 - ASSERT3U(old_prefix_len, <=, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
596 + ASSERT3U(old_prefix_len, <=, zap->zap_f_phys->zap_ptrtbl.zt_shift);
602 597 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
603 598
604 599 ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
605 600 l->l_phys->l_hdr.lh_prefix);
606 601
607 602 if (zap_tryupgradedir(zap, tx) == 0 ||
608 - old_prefix_len == zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) {
603 + old_prefix_len == zap->zap_f_phys->zap_ptrtbl.zt_shift) {
609 604 /* We failed to upgrade, or need to grow the pointer table */
610 605 objset_t *os = zap->zap_objset;
611 606 uint64_t object = zap->zap_object;
612 607
613 608 zap_put_leaf(l);
614 609 zap_unlockdir(zap);
615 610 err = zap_lockdir(os, object, tx, RW_WRITER,
616 611 FALSE, FALSE, &zn->zn_zap);
617 612 zap = zn->zn_zap;
618 613 if (err)
619 614 return (err);
620 615 ASSERT(!zap->zap_ismicro);
621 616
622 617 while (old_prefix_len ==
623 - zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) {
618 + zap->zap_f_phys->zap_ptrtbl.zt_shift) {
624 619 err = zap_grow_ptrtbl(zap, tx);
625 620 if (err)
626 621 return (err);
627 622 }
628 623
629 624 err = zap_deref_leaf(zap, hash, tx, RW_WRITER, &l);
630 625 if (err)
631 626 return (err);
632 627
633 628 if (l->l_phys->l_hdr.lh_prefix_len != old_prefix_len) {
634 629 /* it split while our locks were down */
635 630 *lp = l;
636 631 return (0);
637 632 }
638 633 }
639 634 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
640 - ASSERT3U(old_prefix_len, <, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
635 + ASSERT3U(old_prefix_len, <, zap->zap_f_phys->zap_ptrtbl.zt_shift);
641 636 ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
642 637 l->l_phys->l_hdr.lh_prefix);
643 638
644 - prefix_diff = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift -
639 + prefix_diff = zap->zap_f_phys->zap_ptrtbl.zt_shift -
645 640 (old_prefix_len + 1);
646 641 sibling = (ZAP_HASH_IDX(hash, old_prefix_len + 1) | 1) << prefix_diff;
647 642
648 643 /* check for i/o errors before doing zap_leaf_split */
649 644 for (i = 0; i < (1ULL<<prefix_diff); i++) {
650 645 uint64_t blk;
651 646 err = zap_idx_to_blk(zap, sibling+i, &blk);
652 647 if (err)
653 648 return (err);
654 649 ASSERT3U(blk, ==, l->l_blkid);
655 650 }
656 651
657 652 nl = zap_create_leaf(zap, tx);
658 653 zap_leaf_split(l, nl, zap->zap_normflags != 0);
659 654
660 655 /* set sibling pointers */
661 656 for (i = 0; i < (1ULL << prefix_diff); i++) {
662 657 err = zap_set_idx_to_blk(zap, sibling+i, nl->l_blkid, tx);
663 658 ASSERT0(err); /* we checked for i/o errors above */
664 659 }
665 660
666 661 if (hash & (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len))) {
667 662 /* we want the sibling */
668 663 zap_put_leaf(l);
669 664 *lp = nl;
670 665 } else {
671 666 zap_put_leaf(nl);
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
672 667 *lp = l;
673 668 }
674 669
675 670 return (0);
676 671 }
677 672
678 673 static void
679 674 zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
680 675 {
681 676 zap_t *zap = zn->zn_zap;
682 - int shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
677 + int shift = zap->zap_f_phys->zap_ptrtbl.zt_shift;
683 678 int leaffull = (l->l_phys->l_hdr.lh_prefix_len == shift &&
684 679 l->l_phys->l_hdr.lh_nfree < ZAP_LEAF_LOW_WATER);
685 680
686 681 zap_put_leaf(l);
687 682
688 - if (leaffull || zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk) {
683 + if (leaffull || zap->zap_f_phys->zap_ptrtbl.zt_nextblk) {
689 684 int err;
690 685
691 686 /*
692 687 * We are in the middle of growing the pointer table, or
693 688 * this leaf will soon make us grow it.
694 689 */
695 690 if (zap_tryupgradedir(zap, tx) == 0) {
696 691 objset_t *os = zap->zap_objset;
697 692 uint64_t zapobj = zap->zap_object;
698 693
699 694 zap_unlockdir(zap);
700 695 err = zap_lockdir(os, zapobj, tx,
701 696 RW_WRITER, FALSE, FALSE, &zn->zn_zap);
702 697 zap = zn->zn_zap;
703 698 if (err)
704 699 return;
705 700 }
706 701
707 702 /* could have finished growing while our locks were down */
708 - if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift == shift)
703 + if (zap->zap_f_phys->zap_ptrtbl.zt_shift == shift)
709 704 (void) zap_grow_ptrtbl(zap, tx);
710 705 }
711 706 }
712 707
713 708 static int
714 709 fzap_checkname(zap_name_t *zn)
715 710 {
716 711 if (zn->zn_key_orig_numints * zn->zn_key_intlen > ZAP_MAXNAMELEN)
717 712 return (SET_ERROR(ENAMETOOLONG));
718 713 return (0);
719 714 }
720 715
721 716 static int
722 717 fzap_checksize(uint64_t integer_size, uint64_t num_integers)
723 718 {
724 719 /* Only integer sizes supported by C */
725 720 switch (integer_size) {
726 721 case 1:
727 722 case 2:
728 723 case 4:
729 724 case 8:
730 725 break;
731 726 default:
732 727 return (SET_ERROR(EINVAL));
733 728 }
734 729
735 730 if (integer_size * num_integers > ZAP_MAXVALUELEN)
736 731 return (E2BIG);
737 732
738 733 return (0);
739 734 }
740 735
741 736 static int
742 737 fzap_check(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers)
743 738 {
744 739 int err;
745 740
746 741 if ((err = fzap_checkname(zn)) != 0)
747 742 return (err);
748 743 return (fzap_checksize(integer_size, num_integers));
749 744 }
750 745
751 746 /*
752 747 * Routines for manipulating attributes.
753 748 */
754 749 int
755 750 fzap_lookup(zap_name_t *zn,
756 751 uint64_t integer_size, uint64_t num_integers, void *buf,
757 752 char *realname, int rn_len, boolean_t *ncp)
758 753 {
759 754 zap_leaf_t *l;
760 755 int err;
761 756 zap_entry_handle_t zeh;
762 757
763 758 if ((err = fzap_checkname(zn)) != 0)
764 759 return (err);
765 760
766 761 err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, NULL, RW_READER, &l);
767 762 if (err != 0)
768 763 return (err);
769 764 err = zap_leaf_lookup(l, zn, &zeh);
770 765 if (err == 0) {
771 766 if ((err = fzap_checksize(integer_size, num_integers)) != 0) {
772 767 zap_put_leaf(l);
773 768 return (err);
774 769 }
775 770
776 771 err = zap_entry_read(&zeh, integer_size, num_integers, buf);
777 772 (void) zap_entry_read_name(zn->zn_zap, &zeh, rn_len, realname);
778 773 if (ncp) {
779 774 *ncp = zap_entry_normalization_conflict(&zeh,
780 775 zn, NULL, zn->zn_zap);
781 776 }
782 777 }
783 778
784 779 zap_put_leaf(l);
785 780 return (err);
786 781 }
787 782
788 783 int
789 784 fzap_add_cd(zap_name_t *zn,
790 785 uint64_t integer_size, uint64_t num_integers,
791 786 const void *val, uint32_t cd, dmu_tx_t *tx)
792 787 {
793 788 zap_leaf_t *l;
794 789 int err;
795 790 zap_entry_handle_t zeh;
796 791 zap_t *zap = zn->zn_zap;
797 792
798 793 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
799 794 ASSERT(!zap->zap_ismicro);
800 795 ASSERT(fzap_check(zn, integer_size, num_integers) == 0);
801 796
802 797 err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l);
803 798 if (err != 0)
804 799 return (err);
805 800 retry:
806 801 err = zap_leaf_lookup(l, zn, &zeh);
807 802 if (err == 0) {
808 803 err = SET_ERROR(EEXIST);
809 804 goto out;
810 805 }
811 806 if (err != ENOENT)
812 807 goto out;
813 808
814 809 err = zap_entry_create(l, zn, cd,
815 810 integer_size, num_integers, val, &zeh);
816 811
817 812 if (err == 0) {
818 813 zap_increment_num_entries(zap, 1, tx);
819 814 } else if (err == EAGAIN) {
820 815 err = zap_expand_leaf(zn, l, tx, &l);
821 816 zap = zn->zn_zap; /* zap_expand_leaf() may change zap */
822 817 if (err == 0)
823 818 goto retry;
824 819 }
825 820
826 821 out:
827 822 if (zap != NULL)
828 823 zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx);
829 824 return (err);
830 825 }
831 826
832 827 int
833 828 fzap_add(zap_name_t *zn,
834 829 uint64_t integer_size, uint64_t num_integers,
835 830 const void *val, dmu_tx_t *tx)
836 831 {
837 832 int err = fzap_check(zn, integer_size, num_integers);
838 833 if (err != 0)
839 834 return (err);
840 835
841 836 return (fzap_add_cd(zn, integer_size, num_integers,
842 837 val, ZAP_NEED_CD, tx));
843 838 }
844 839
845 840 int
846 841 fzap_update(zap_name_t *zn,
847 842 int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
848 843 {
849 844 zap_leaf_t *l;
850 845 int err, create;
851 846 zap_entry_handle_t zeh;
852 847 zap_t *zap = zn->zn_zap;
853 848
854 849 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
855 850 err = fzap_check(zn, integer_size, num_integers);
856 851 if (err != 0)
857 852 return (err);
858 853
859 854 err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l);
860 855 if (err != 0)
861 856 return (err);
862 857 retry:
863 858 err = zap_leaf_lookup(l, zn, &zeh);
864 859 create = (err == ENOENT);
865 860 ASSERT(err == 0 || err == ENOENT);
866 861
867 862 if (create) {
868 863 err = zap_entry_create(l, zn, ZAP_NEED_CD,
869 864 integer_size, num_integers, val, &zeh);
870 865 if (err == 0)
871 866 zap_increment_num_entries(zap, 1, tx);
872 867 } else {
873 868 err = zap_entry_update(&zeh, integer_size, num_integers, val);
874 869 }
875 870
876 871 if (err == EAGAIN) {
877 872 err = zap_expand_leaf(zn, l, tx, &l);
878 873 zap = zn->zn_zap; /* zap_expand_leaf() may change zap */
879 874 if (err == 0)
880 875 goto retry;
881 876 }
882 877
883 878 if (zap != NULL)
884 879 zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx);
885 880 return (err);
886 881 }
887 882
888 883 int
889 884 fzap_length(zap_name_t *zn,
890 885 uint64_t *integer_size, uint64_t *num_integers)
891 886 {
892 887 zap_leaf_t *l;
893 888 int err;
894 889 zap_entry_handle_t zeh;
895 890
896 891 err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, NULL, RW_READER, &l);
897 892 if (err != 0)
898 893 return (err);
899 894 err = zap_leaf_lookup(l, zn, &zeh);
900 895 if (err != 0)
901 896 goto out;
902 897
903 898 if (integer_size)
904 899 *integer_size = zeh.zeh_integer_size;
905 900 if (num_integers)
906 901 *num_integers = zeh.zeh_num_integers;
907 902 out:
908 903 zap_put_leaf(l);
909 904 return (err);
910 905 }
911 906
912 907 int
913 908 fzap_remove(zap_name_t *zn, dmu_tx_t *tx)
914 909 {
915 910 zap_leaf_t *l;
916 911 int err;
917 912 zap_entry_handle_t zeh;
918 913
919 914 err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, tx, RW_WRITER, &l);
920 915 if (err != 0)
921 916 return (err);
922 917 err = zap_leaf_lookup(l, zn, &zeh);
923 918 if (err == 0) {
924 919 zap_entry_remove(&zeh);
925 920 zap_increment_num_entries(zn->zn_zap, -1, tx);
926 921 }
927 922 zap_put_leaf(l);
928 923 return (err);
↓ open down ↓ |
210 lines elided |
↑ open up ↑ |
929 924 }
930 925
931 926 void
932 927 fzap_prefetch(zap_name_t *zn)
933 928 {
934 929 uint64_t idx, blk;
935 930 zap_t *zap = zn->zn_zap;
936 931 int bs;
937 932
938 933 idx = ZAP_HASH_IDX(zn->zn_hash,
939 - zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
934 + zap->zap_f_phys->zap_ptrtbl.zt_shift);
940 935 if (zap_idx_to_blk(zap, idx, &blk) != 0)
941 936 return;
942 937 bs = FZAP_BLOCK_SHIFT(zap);
943 938 dmu_prefetch(zap->zap_objset, zap->zap_object, blk << bs, 1 << bs);
944 939 }
945 940
946 941 /*
947 942 * Helper functions for consumers.
948 943 */
949 944
950 945 uint64_t
951 946 zap_create_link(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj,
952 947 const char *name, dmu_tx_t *tx)
953 948 {
954 949 uint64_t new_obj;
955 950
956 951 VERIFY((new_obj = zap_create(os, ot, DMU_OT_NONE, 0, tx)) > 0);
957 952 VERIFY(zap_add(os, parent_obj, name, sizeof (uint64_t), 1, &new_obj,
958 953 tx) == 0);
959 954
960 955 return (new_obj);
961 956 }
962 957
963 958 int
964 959 zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask,
965 960 char *name)
966 961 {
967 962 zap_cursor_t zc;
968 963 zap_attribute_t *za;
969 964 int err;
970 965
971 966 if (mask == 0)
972 967 mask = -1ULL;
973 968
974 969 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
975 970 for (zap_cursor_init(&zc, os, zapobj);
976 971 (err = zap_cursor_retrieve(&zc, za)) == 0;
977 972 zap_cursor_advance(&zc)) {
978 973 if ((za->za_first_integer & mask) == (value & mask)) {
979 974 (void) strcpy(name, za->za_name);
980 975 break;
981 976 }
982 977 }
983 978 zap_cursor_fini(&zc);
984 979 kmem_free(za, sizeof (zap_attribute_t));
985 980 return (err);
986 981 }
987 982
988 983 int
989 984 zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx)
990 985 {
991 986 zap_cursor_t zc;
992 987 zap_attribute_t za;
993 988 int err;
994 989
995 990 for (zap_cursor_init(&zc, os, fromobj);
996 991 zap_cursor_retrieve(&zc, &za) == 0;
997 992 (void) zap_cursor_advance(&zc)) {
998 993 if (za.za_integer_length != 8 || za.za_num_integers != 1)
999 994 return (SET_ERROR(EINVAL));
1000 995 err = zap_add(os, intoobj, za.za_name,
1001 996 8, 1, &za.za_first_integer, tx);
1002 997 if (err)
1003 998 return (err);
1004 999 }
1005 1000 zap_cursor_fini(&zc);
1006 1001 return (0);
1007 1002 }
1008 1003
1009 1004 int
1010 1005 zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj,
1011 1006 uint64_t value, dmu_tx_t *tx)
1012 1007 {
1013 1008 zap_cursor_t zc;
1014 1009 zap_attribute_t za;
1015 1010 int err;
1016 1011
1017 1012 for (zap_cursor_init(&zc, os, fromobj);
1018 1013 zap_cursor_retrieve(&zc, &za) == 0;
1019 1014 (void) zap_cursor_advance(&zc)) {
1020 1015 if (za.za_integer_length != 8 || za.za_num_integers != 1)
1021 1016 return (SET_ERROR(EINVAL));
1022 1017 err = zap_add(os, intoobj, za.za_name,
1023 1018 8, 1, &value, tx);
1024 1019 if (err)
1025 1020 return (err);
1026 1021 }
1027 1022 zap_cursor_fini(&zc);
1028 1023 return (0);
1029 1024 }
1030 1025
1031 1026 int
1032 1027 zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,
1033 1028 dmu_tx_t *tx)
1034 1029 {
1035 1030 zap_cursor_t zc;
1036 1031 zap_attribute_t za;
1037 1032 int err;
1038 1033
1039 1034 for (zap_cursor_init(&zc, os, fromobj);
1040 1035 zap_cursor_retrieve(&zc, &za) == 0;
1041 1036 (void) zap_cursor_advance(&zc)) {
1042 1037 uint64_t delta = 0;
1043 1038
1044 1039 if (za.za_integer_length != 8 || za.za_num_integers != 1)
1045 1040 return (SET_ERROR(EINVAL));
1046 1041
1047 1042 err = zap_lookup(os, intoobj, za.za_name, 8, 1, &delta);
1048 1043 if (err != 0 && err != ENOENT)
1049 1044 return (err);
1050 1045 delta += za.za_first_integer;
1051 1046 err = zap_update(os, intoobj, za.za_name, 8, 1, &delta, tx);
1052 1047 if (err)
1053 1048 return (err);
1054 1049 }
1055 1050 zap_cursor_fini(&zc);
1056 1051 return (0);
1057 1052 }
1058 1053
1059 1054 int
1060 1055 zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx)
1061 1056 {
1062 1057 char name[20];
1063 1058
1064 1059 (void) snprintf(name, sizeof (name), "%llx", (longlong_t)value);
1065 1060 return (zap_add(os, obj, name, 8, 1, &value, tx));
1066 1061 }
1067 1062
1068 1063 int
1069 1064 zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx)
1070 1065 {
1071 1066 char name[20];
1072 1067
1073 1068 (void) snprintf(name, sizeof (name), "%llx", (longlong_t)value);
1074 1069 return (zap_remove(os, obj, name, tx));
1075 1070 }
1076 1071
1077 1072 int
1078 1073 zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value)
1079 1074 {
1080 1075 char name[20];
1081 1076
1082 1077 (void) snprintf(name, sizeof (name), "%llx", (longlong_t)value);
1083 1078 return (zap_lookup(os, obj, name, 8, 1, &value));
1084 1079 }
1085 1080
1086 1081 int
1087 1082 zap_add_int_key(objset_t *os, uint64_t obj,
1088 1083 uint64_t key, uint64_t value, dmu_tx_t *tx)
1089 1084 {
1090 1085 char name[20];
1091 1086
1092 1087 (void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
1093 1088 return (zap_add(os, obj, name, 8, 1, &value, tx));
1094 1089 }
1095 1090
1096 1091 int
1097 1092 zap_update_int_key(objset_t *os, uint64_t obj,
1098 1093 uint64_t key, uint64_t value, dmu_tx_t *tx)
1099 1094 {
1100 1095 char name[20];
1101 1096
1102 1097 (void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
1103 1098 return (zap_update(os, obj, name, 8, 1, &value, tx));
1104 1099 }
1105 1100
1106 1101 int
1107 1102 zap_lookup_int_key(objset_t *os, uint64_t obj, uint64_t key, uint64_t *valuep)
1108 1103 {
1109 1104 char name[20];
1110 1105
1111 1106 (void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
1112 1107 return (zap_lookup(os, obj, name, 8, 1, valuep));
1113 1108 }
1114 1109
1115 1110 int
1116 1111 zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
1117 1112 dmu_tx_t *tx)
1118 1113 {
1119 1114 uint64_t value = 0;
1120 1115 int err;
1121 1116
1122 1117 if (delta == 0)
1123 1118 return (0);
1124 1119
1125 1120 err = zap_lookup(os, obj, name, 8, 1, &value);
1126 1121 if (err != 0 && err != ENOENT)
1127 1122 return (err);
1128 1123 value += delta;
1129 1124 if (value == 0)
1130 1125 err = zap_remove(os, obj, name, tx);
1131 1126 else
1132 1127 err = zap_update(os, obj, name, 8, 1, &value, tx);
1133 1128 return (err);
1134 1129 }
1135 1130
1136 1131 int
1137 1132 zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
1138 1133 dmu_tx_t *tx)
1139 1134 {
1140 1135 char name[20];
1141 1136
1142 1137 (void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
1143 1138 return (zap_increment(os, obj, name, delta, tx));
1144 1139 }
1145 1140
1146 1141 /*
1147 1142 * Routines for iterating over the attributes.
1148 1143 */
1149 1144
1150 1145 int
1151 1146 fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
1152 1147 {
1153 1148 int err = ENOENT;
1154 1149 zap_entry_handle_t zeh;
1155 1150 zap_leaf_t *l;
1156 1151
1157 1152 /* retrieve the next entry at or after zc_hash/zc_cd */
1158 1153 /* if no entry, return ENOENT */
1159 1154
1160 1155 if (zc->zc_leaf &&
1161 1156 (ZAP_HASH_IDX(zc->zc_hash,
1162 1157 zc->zc_leaf->l_phys->l_hdr.lh_prefix_len) !=
1163 1158 zc->zc_leaf->l_phys->l_hdr.lh_prefix)) {
1164 1159 rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
1165 1160 zap_put_leaf(zc->zc_leaf);
1166 1161 zc->zc_leaf = NULL;
1167 1162 }
1168 1163
1169 1164 again:
1170 1165 if (zc->zc_leaf == NULL) {
1171 1166 err = zap_deref_leaf(zap, zc->zc_hash, NULL, RW_READER,
1172 1167 &zc->zc_leaf);
1173 1168 if (err != 0)
1174 1169 return (err);
1175 1170 } else {
1176 1171 rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
1177 1172 }
1178 1173 l = zc->zc_leaf;
1179 1174
1180 1175 err = zap_leaf_lookup_closest(l, zc->zc_hash, zc->zc_cd, &zeh);
1181 1176
1182 1177 if (err == ENOENT) {
1183 1178 uint64_t nocare =
1184 1179 (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len)) - 1;
1185 1180 zc->zc_hash = (zc->zc_hash & ~nocare) + nocare + 1;
1186 1181 zc->zc_cd = 0;
1187 1182 if (l->l_phys->l_hdr.lh_prefix_len == 0 || zc->zc_hash == 0) {
1188 1183 zc->zc_hash = -1ULL;
1189 1184 } else {
1190 1185 zap_put_leaf(zc->zc_leaf);
1191 1186 zc->zc_leaf = NULL;
1192 1187 goto again;
1193 1188 }
1194 1189 }
1195 1190
1196 1191 if (err == 0) {
1197 1192 zc->zc_hash = zeh.zeh_hash;
1198 1193 zc->zc_cd = zeh.zeh_cd;
1199 1194 za->za_integer_length = zeh.zeh_integer_size;
1200 1195 za->za_num_integers = zeh.zeh_num_integers;
1201 1196 if (zeh.zeh_num_integers == 0) {
1202 1197 za->za_first_integer = 0;
1203 1198 } else {
1204 1199 err = zap_entry_read(&zeh, 8, 1, &za->za_first_integer);
1205 1200 ASSERT(err == 0 || err == EOVERFLOW);
1206 1201 }
1207 1202 err = zap_entry_read_name(zap, &zeh,
1208 1203 sizeof (za->za_name), za->za_name);
1209 1204 ASSERT(err == 0);
1210 1205
1211 1206 za->za_normalization_conflict =
1212 1207 zap_entry_normalization_conflict(&zeh,
1213 1208 NULL, za->za_name, zap);
1214 1209 }
1215 1210 rw_exit(&zc->zc_leaf->l_rwlock);
1216 1211 return (err);
1217 1212 }
1218 1213
1219 1214 static void
1220 1215 zap_stats_ptrtbl(zap_t *zap, uint64_t *tbl, int len, zap_stats_t *zs)
1221 1216 {
1222 1217 int i, err;
1223 1218 uint64_t lastblk = 0;
1224 1219
1225 1220 /*
1226 1221 * NB: if a leaf has more pointers than an entire ptrtbl block
1227 1222 * can hold, then it'll be accounted for more than once, since
1228 1223 * we won't have lastblk.
1229 1224 */
1230 1225 for (i = 0; i < len; i++) {
1231 1226 zap_leaf_t *l;
1232 1227
1233 1228 if (tbl[i] == lastblk)
1234 1229 continue;
1235 1230 lastblk = tbl[i];
1236 1231
1237 1232 err = zap_get_leaf_byblk(zap, tbl[i], NULL, RW_READER, &l);
1238 1233 if (err == 0) {
1239 1234 zap_leaf_stats(zap, l, zs);
1240 1235 zap_put_leaf(l);
1241 1236 }
1242 1237 }
1243 1238 }
1244 1239
1245 1240 int
1246 1241 fzap_cursor_move_to_key(zap_cursor_t *zc, zap_name_t *zn)
1247 1242 {
1248 1243 int err;
1249 1244 zap_leaf_t *l;
1250 1245 zap_entry_handle_t zeh;
1251 1246
1252 1247 if (zn->zn_key_orig_numints * zn->zn_key_intlen > ZAP_MAXNAMELEN)
1253 1248 return (SET_ERROR(ENAMETOOLONG));
1254 1249
1255 1250 err = zap_deref_leaf(zc->zc_zap, zn->zn_hash, NULL, RW_READER, &l);
1256 1251 if (err != 0)
1257 1252 return (err);
1258 1253
1259 1254 err = zap_leaf_lookup(l, zn, &zeh);
1260 1255 if (err != 0)
1261 1256 return (err);
1262 1257
1263 1258 zc->zc_leaf = l;
1264 1259 zc->zc_hash = zeh.zeh_hash;
1265 1260 zc->zc_cd = zeh.zeh_cd;
1266 1261
1267 1262 return (err);
1268 1263 }
↓ open down ↓ |
319 lines elided |
↑ open up ↑ |
1269 1264
1270 1265 void
1271 1266 fzap_get_stats(zap_t *zap, zap_stats_t *zs)
1272 1267 {
1273 1268 int bs = FZAP_BLOCK_SHIFT(zap);
1274 1269 zs->zs_blocksize = 1ULL << bs;
1275 1270
1276 1271 /*
1277 1272 * Set zap_phys_t fields
1278 1273 */
1279 - zs->zs_num_leafs = zap->zap_f.zap_phys->zap_num_leafs;
1280 - zs->zs_num_entries = zap->zap_f.zap_phys->zap_num_entries;
1281 - zs->zs_num_blocks = zap->zap_f.zap_phys->zap_freeblk;
1282 - zs->zs_block_type = zap->zap_f.zap_phys->zap_block_type;
1283 - zs->zs_magic = zap->zap_f.zap_phys->zap_magic;
1284 - zs->zs_salt = zap->zap_f.zap_phys->zap_salt;
1274 + zs->zs_num_leafs = zap->zap_f_phys->zap_num_leafs;
1275 + zs->zs_num_entries = zap->zap_f_phys->zap_num_entries;
1276 + zs->zs_num_blocks = zap->zap_f_phys->zap_freeblk;
1277 + zs->zs_block_type = zap->zap_f_phys->zap_block_type;
1278 + zs->zs_magic = zap->zap_f_phys->zap_magic;
1279 + zs->zs_salt = zap->zap_f_phys->zap_salt;
1285 1280
1286 1281 /*
1287 1282 * Set zap_ptrtbl fields
1288 1283 */
1289 - zs->zs_ptrtbl_len = 1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
1290 - zs->zs_ptrtbl_nextblk = zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk;
1284 + zs->zs_ptrtbl_len = 1ULL << zap->zap_f_phys->zap_ptrtbl.zt_shift;
1285 + zs->zs_ptrtbl_nextblk = zap->zap_f_phys->zap_ptrtbl.zt_nextblk;
1291 1286 zs->zs_ptrtbl_blks_copied =
1292 - zap->zap_f.zap_phys->zap_ptrtbl.zt_blks_copied;
1293 - zs->zs_ptrtbl_zt_blk = zap->zap_f.zap_phys->zap_ptrtbl.zt_blk;
1294 - zs->zs_ptrtbl_zt_numblks = zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks;
1295 - zs->zs_ptrtbl_zt_shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
1287 + zap->zap_f_phys->zap_ptrtbl.zt_blks_copied;
1288 + zs->zs_ptrtbl_zt_blk = zap->zap_f_phys->zap_ptrtbl.zt_blk;
1289 + zs->zs_ptrtbl_zt_numblks = zap->zap_f_phys->zap_ptrtbl.zt_numblks;
1290 + zs->zs_ptrtbl_zt_shift = zap->zap_f_phys->zap_ptrtbl.zt_shift;
1296 1291
1297 - if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
1292 + if (zap->zap_f_phys->zap_ptrtbl.zt_numblks == 0) {
1298 1293 /* the ptrtbl is entirely in the header block. */
1299 1294 zap_stats_ptrtbl(zap, &ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
1300 1295 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap), zs);
1301 1296 } else {
1302 1297 int b;
1303 1298
1304 1299 dmu_prefetch(zap->zap_objset, zap->zap_object,
1305 - zap->zap_f.zap_phys->zap_ptrtbl.zt_blk << bs,
1306 - zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << bs);
1300 + zap->zap_f_phys->zap_ptrtbl.zt_blk << bs,
1301 + zap->zap_f_phys->zap_ptrtbl.zt_numblks << bs);
1307 1302
1308 - for (b = 0; b < zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks;
1303 + for (b = 0; b < zap->zap_f_phys->zap_ptrtbl.zt_numblks;
1309 1304 b++) {
1310 1305 dmu_buf_t *db;
1311 1306 int err;
1312 1307
1313 1308 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
1314 - (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk + b) << bs,
1309 + (zap->zap_f_phys->zap_ptrtbl.zt_blk + b) << bs,
1315 1310 FTAG, &db, DMU_READ_NO_PREFETCH);
1316 1311 if (err == 0) {
1317 1312 zap_stats_ptrtbl(zap, db->db_data,
1318 1313 1<<(bs-3), zs);
1319 1314 dmu_buf_rele(db, FTAG);
1320 1315 }
1321 1316 }
1322 1317 }
1323 1318 }
1324 1319
1325 1320 int
1326 1321 fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
1327 1322 uint64_t *tooverwrite)
1328 1323 {
1329 1324 zap_t *zap = zn->zn_zap;
1330 1325 zap_leaf_t *l;
1331 1326 int err;
1332 1327
1333 1328 /*
1334 1329 * Account for the header block of the fatzap.
1335 1330 */
1336 1331 if (!add && dmu_buf_freeable(zap->zap_dbuf)) {
1337 1332 *tooverwrite += zap->zap_dbuf->db_size;
1338 1333 } else {
1339 1334 *towrite += zap->zap_dbuf->db_size;
1340 1335 }
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
1341 1336
1342 1337 /*
1343 1338 * Account for the pointer table blocks.
1344 1339 * If we are adding we need to account for the following cases :
1345 1340 * - If the pointer table is embedded, this operation could force an
1346 1341 * external pointer table.
1347 1342 * - If this already has an external pointer table this operation
1348 1343 * could extend the table.
1349 1344 */
1350 1345 if (add) {
1351 - if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0)
1346 + if (zap->zap_f_phys->zap_ptrtbl.zt_blk == 0)
1352 1347 *towrite += zap->zap_dbuf->db_size;
1353 1348 else
1354 1349 *towrite += (zap->zap_dbuf->db_size * 3);
1355 1350 }
1356 1351
1357 1352 /*
1358 1353 * Now, check if the block containing leaf is freeable
1359 1354 * and account accordingly.
1360 1355 */
1361 1356 err = zap_deref_leaf(zap, zn->zn_hash, NULL, RW_READER, &l);
1362 1357 if (err != 0) {
1363 1358 return (err);
1364 1359 }
1365 1360
1366 1361 if (!add && dmu_buf_freeable(l->l_dbuf)) {
1367 1362 *tooverwrite += l->l_dbuf->db_size;
1368 1363 } else {
1369 1364 /*
1370 1365 * If this an add operation, the leaf block could split.
1371 1366 * Hence, we need to account for an additional leaf block.
1372 1367 */
1373 1368 *towrite += (add ? 2 : 1) * l->l_dbuf->db_size;
1374 1369 }
1375 1370
1376 1371 zap_put_leaf(l);
1377 1372 return (0);
1378 1373 }
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX