Print this page
3006 VERIFY[S,U,P] and ASSERT[S,U,P] frequently check if first argument is zero
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/zfs/zap.c
+++ new/usr/src/uts/common/fs/zfs/zap.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2012 by Delphix. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * This file contains the top half of the zfs directory structure
28 28 * implementation. The bottom half is in zap_leaf.c.
29 29 *
30 30 * The zdir is an extendable hash data structure. There is a table of
31 31 * pointers to buckets (zap_t->zd_data->zd_leafs). The buckets are
32 32 * each a constant size and hold a variable number of directory entries.
33 33 * The buckets (aka "leaf nodes") are implemented in zap_leaf.c.
34 34 *
35 35 * The pointer table holds a power of 2 number of pointers.
36 36 * (1<<zap_t->zd_data->zd_phys->zd_prefix_len). The bucket pointed to
37 37 * by the pointer at index i in the table holds entries whose hash value
38 38 * has a zd_prefix_len - bit prefix
39 39 */
40 40
41 41 #include <sys/spa.h>
42 42 #include <sys/dmu.h>
43 43 #include <sys/zfs_context.h>
44 44 #include <sys/zfs_znode.h>
45 45 #include <sys/fs/zfs.h>
46 46 #include <sys/zap.h>
47 47 #include <sys/refcount.h>
48 48 #include <sys/zap_impl.h>
49 49 #include <sys/zap_leaf.h>
50 50
51 51 int fzap_default_block_shift = 14; /* 16k blocksize */
52 52
53 53 static void zap_leaf_pageout(dmu_buf_t *db, void *vl);
54 54 static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks);
55 55
56 56
57 57 void
58 58 fzap_byteswap(void *vbuf, size_t size)
59 59 {
60 60 uint64_t block_type;
61 61
62 62 block_type = *(uint64_t *)vbuf;
63 63
64 64 if (block_type == ZBT_LEAF || block_type == BSWAP_64(ZBT_LEAF))
65 65 zap_leaf_byteswap(vbuf, size);
66 66 else {
67 67 /* it's a ptrtbl block */
68 68 byteswap_uint64_array(vbuf, size);
69 69 }
70 70 }
71 71
72 72 void
73 73 fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
74 74 {
75 75 dmu_buf_t *db;
76 76 zap_leaf_t *l;
77 77 int i;
78 78 zap_phys_t *zp;
79 79
80 80 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
81 81 zap->zap_ismicro = FALSE;
82 82
83 83 (void) dmu_buf_update_user(zap->zap_dbuf, zap, zap,
84 84 &zap->zap_f.zap_phys, zap_evict);
85 85
86 86 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
87 87 zap->zap_f.zap_block_shift = highbit(zap->zap_dbuf->db_size) - 1;
88 88
89 89 zp = zap->zap_f.zap_phys;
90 90 /*
91 91 * explicitly zero it since it might be coming from an
92 92 * initialized microzap
93 93 */
94 94 bzero(zap->zap_dbuf->db_data, zap->zap_dbuf->db_size);
95 95 zp->zap_block_type = ZBT_HEADER;
96 96 zp->zap_magic = ZAP_MAGIC;
97 97
98 98 zp->zap_ptrtbl.zt_shift = ZAP_EMBEDDED_PTRTBL_SHIFT(zap);
99 99
100 100 zp->zap_freeblk = 2; /* block 1 will be the first leaf */
101 101 zp->zap_num_leafs = 1;
102 102 zp->zap_num_entries = 0;
103 103 zp->zap_salt = zap->zap_salt;
104 104 zp->zap_normflags = zap->zap_normflags;
105 105 zp->zap_flags = flags;
106 106
107 107 /* block 1 will be the first leaf */
108 108 for (i = 0; i < (1<<zp->zap_ptrtbl.zt_shift); i++)
109 109 ZAP_EMBEDDED_PTRTBL_ENT(zap, i) = 1;
110 110
111 111 /*
112 112 * set up block 1 - the first leaf
113 113 */
114 114 VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
115 115 1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db, DMU_READ_NO_PREFETCH));
116 116 dmu_buf_will_dirty(db, tx);
117 117
118 118 l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP);
119 119 l->l_dbuf = db;
120 120 l->l_phys = db->db_data;
121 121
122 122 zap_leaf_init(l, zp->zap_normflags != 0);
123 123
124 124 kmem_free(l, sizeof (zap_leaf_t));
125 125 dmu_buf_rele(db, FTAG);
126 126 }
127 127
128 128 static int
129 129 zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx)
130 130 {
131 131 if (RW_WRITE_HELD(&zap->zap_rwlock))
132 132 return (1);
133 133 if (rw_tryupgrade(&zap->zap_rwlock)) {
134 134 dmu_buf_will_dirty(zap->zap_dbuf, tx);
135 135 return (1);
136 136 }
137 137 return (0);
138 138 }
139 139
140 140 /*
141 141 * Generic routines for dealing with the pointer & cookie tables.
142 142 */
143 143
144 144 static int
145 145 zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
146 146 void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n),
147 147 dmu_tx_t *tx)
148 148 {
149 149 uint64_t b, newblk;
150 150 dmu_buf_t *db_old, *db_new;
151 151 int err;
152 152 int bs = FZAP_BLOCK_SHIFT(zap);
153 153 int hepb = 1<<(bs-4);
154 154 /* hepb = half the number of entries in a block */
↓ open down ↓ |
154 lines elided |
↑ open up ↑ |
155 155
156 156 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
157 157 ASSERT(tbl->zt_blk != 0);
158 158 ASSERT(tbl->zt_numblks > 0);
159 159
160 160 if (tbl->zt_nextblk != 0) {
161 161 newblk = tbl->zt_nextblk;
162 162 } else {
163 163 newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
164 164 tbl->zt_nextblk = newblk;
165 - ASSERT3U(tbl->zt_blks_copied, ==, 0);
165 + ASSERT0(tbl->zt_blks_copied);
166 166 dmu_prefetch(zap->zap_objset, zap->zap_object,
167 167 tbl->zt_blk << bs, tbl->zt_numblks << bs);
168 168 }
169 169
170 170 /*
171 171 * Copy the ptrtbl from the old to new location.
172 172 */
173 173
174 174 b = tbl->zt_blks_copied;
175 175 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
176 176 (tbl->zt_blk + b) << bs, FTAG, &db_old, DMU_READ_NO_PREFETCH);
177 177 if (err)
178 178 return (err);
179 179
180 180 /* first half of entries in old[b] go to new[2*b+0] */
181 181 VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
182 182 (newblk + 2*b+0) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
183 183 dmu_buf_will_dirty(db_new, tx);
184 184 transfer_func(db_old->db_data, db_new->db_data, hepb);
185 185 dmu_buf_rele(db_new, FTAG);
186 186
187 187 /* second half of entries in old[b] go to new[2*b+1] */
188 188 VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
189 189 (newblk + 2*b+1) << bs, FTAG, &db_new, DMU_READ_NO_PREFETCH));
190 190 dmu_buf_will_dirty(db_new, tx);
191 191 transfer_func((uint64_t *)db_old->db_data + hepb,
192 192 db_new->db_data, hepb);
193 193 dmu_buf_rele(db_new, FTAG);
194 194
195 195 dmu_buf_rele(db_old, FTAG);
196 196
197 197 tbl->zt_blks_copied++;
198 198
199 199 dprintf("copied block %llu of %llu\n",
200 200 tbl->zt_blks_copied, tbl->zt_numblks);
201 201
202 202 if (tbl->zt_blks_copied == tbl->zt_numblks) {
203 203 (void) dmu_free_range(zap->zap_objset, zap->zap_object,
204 204 tbl->zt_blk << bs, tbl->zt_numblks << bs, tx);
205 205
206 206 tbl->zt_blk = newblk;
207 207 tbl->zt_numblks *= 2;
208 208 tbl->zt_shift++;
209 209 tbl->zt_nextblk = 0;
210 210 tbl->zt_blks_copied = 0;
211 211
212 212 dprintf("finished; numblocks now %llu (%lluk entries)\n",
213 213 tbl->zt_numblks, 1<<(tbl->zt_shift-10));
214 214 }
215 215
216 216 return (0);
217 217 }
218 218
219 219 static int
220 220 zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val,
221 221 dmu_tx_t *tx)
222 222 {
223 223 int err;
224 224 uint64_t blk, off;
225 225 int bs = FZAP_BLOCK_SHIFT(zap);
226 226 dmu_buf_t *db;
227 227
228 228 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
229 229 ASSERT(tbl->zt_blk != 0);
230 230
231 231 dprintf("storing %llx at index %llx\n", val, idx);
232 232
233 233 blk = idx >> (bs-3);
234 234 off = idx & ((1<<(bs-3))-1);
235 235
236 236 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
237 237 (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
238 238 if (err)
239 239 return (err);
240 240 dmu_buf_will_dirty(db, tx);
241 241
242 242 if (tbl->zt_nextblk != 0) {
243 243 uint64_t idx2 = idx * 2;
244 244 uint64_t blk2 = idx2 >> (bs-3);
245 245 uint64_t off2 = idx2 & ((1<<(bs-3))-1);
246 246 dmu_buf_t *db2;
247 247
248 248 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
249 249 (tbl->zt_nextblk + blk2) << bs, FTAG, &db2,
250 250 DMU_READ_NO_PREFETCH);
251 251 if (err) {
252 252 dmu_buf_rele(db, FTAG);
253 253 return (err);
254 254 }
255 255 dmu_buf_will_dirty(db2, tx);
256 256 ((uint64_t *)db2->db_data)[off2] = val;
257 257 ((uint64_t *)db2->db_data)[off2+1] = val;
258 258 dmu_buf_rele(db2, FTAG);
259 259 }
260 260
261 261 ((uint64_t *)db->db_data)[off] = val;
262 262 dmu_buf_rele(db, FTAG);
263 263
264 264 return (0);
265 265 }
266 266
267 267 static int
268 268 zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
269 269 {
270 270 uint64_t blk, off;
271 271 int err;
272 272 dmu_buf_t *db;
273 273 int bs = FZAP_BLOCK_SHIFT(zap);
274 274
275 275 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
276 276
277 277 blk = idx >> (bs-3);
278 278 off = idx & ((1<<(bs-3))-1);
279 279
280 280 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
281 281 (tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
282 282 if (err)
283 283 return (err);
284 284 *valp = ((uint64_t *)db->db_data)[off];
285 285 dmu_buf_rele(db, FTAG);
286 286
287 287 if (tbl->zt_nextblk != 0) {
288 288 /*
289 289 * read the nextblk for the sake of i/o error checking,
290 290 * so that zap_table_load() will catch errors for
291 291 * zap_table_store.
292 292 */
293 293 blk = (idx*2) >> (bs-3);
294 294
295 295 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
296 296 (tbl->zt_nextblk + blk) << bs, FTAG, &db,
297 297 DMU_READ_NO_PREFETCH);
298 298 dmu_buf_rele(db, FTAG);
299 299 }
300 300 return (err);
301 301 }
302 302
303 303 /*
304 304 * Routines for growing the ptrtbl.
305 305 */
306 306
307 307 static void
308 308 zap_ptrtbl_transfer(const uint64_t *src, uint64_t *dst, int n)
309 309 {
310 310 int i;
311 311 for (i = 0; i < n; i++) {
312 312 uint64_t lb = src[i];
313 313 dst[2*i+0] = lb;
314 314 dst[2*i+1] = lb;
315 315 }
316 316 }
317 317
318 318 static int
319 319 zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx)
320 320 {
321 321 /*
322 322 * The pointer table should never use more hash bits than we
323 323 * have (otherwise we'd be using useless zero bits to index it).
324 324 * If we are within 2 bits of running out, stop growing, since
325 325 * this is already an aberrant condition.
326 326 */
327 327 if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2)
328 328 return (ENOSPC);
329 329
330 330 if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
331 331 /*
↓ open down ↓ |
156 lines elided |
↑ open up ↑ |
332 332 * We are outgrowing the "embedded" ptrtbl (the one
333 333 * stored in the header block). Give it its own entire
334 334 * block, which will double the size of the ptrtbl.
335 335 */
336 336 uint64_t newblk;
337 337 dmu_buf_t *db_new;
338 338 int err;
339 339
340 340 ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==,
341 341 ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
342 - ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk, ==, 0);
342 + ASSERT0(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk);
343 343
344 344 newblk = zap_allocate_blocks(zap, 1);
345 345 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
346 346 newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new,
347 347 DMU_READ_NO_PREFETCH);
348 348 if (err)
349 349 return (err);
350 350 dmu_buf_will_dirty(db_new, tx);
351 351 zap_ptrtbl_transfer(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
352 352 db_new->db_data, 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
353 353 dmu_buf_rele(db_new, FTAG);
354 354
355 355 zap->zap_f.zap_phys->zap_ptrtbl.zt_blk = newblk;
356 356 zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks = 1;
357 357 zap->zap_f.zap_phys->zap_ptrtbl.zt_shift++;
358 358
359 359 ASSERT3U(1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==,
360 360 zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks <<
361 361 (FZAP_BLOCK_SHIFT(zap)-3));
362 362
363 363 return (0);
364 364 } else {
365 365 return (zap_table_grow(zap, &zap->zap_f.zap_phys->zap_ptrtbl,
366 366 zap_ptrtbl_transfer, tx));
367 367 }
368 368 }
369 369
370 370 static void
371 371 zap_increment_num_entries(zap_t *zap, int delta, dmu_tx_t *tx)
372 372 {
373 373 dmu_buf_will_dirty(zap->zap_dbuf, tx);
374 374 mutex_enter(&zap->zap_f.zap_num_entries_mtx);
375 375 ASSERT(delta > 0 || zap->zap_f.zap_phys->zap_num_entries >= -delta);
376 376 zap->zap_f.zap_phys->zap_num_entries += delta;
377 377 mutex_exit(&zap->zap_f.zap_num_entries_mtx);
378 378 }
379 379
380 380 static uint64_t
381 381 zap_allocate_blocks(zap_t *zap, int nblocks)
382 382 {
383 383 uint64_t newblk;
384 384 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
385 385 newblk = zap->zap_f.zap_phys->zap_freeblk;
386 386 zap->zap_f.zap_phys->zap_freeblk += nblocks;
387 387 return (newblk);
388 388 }
389 389
390 390 static zap_leaf_t *
391 391 zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
392 392 {
393 393 void *winner;
394 394 zap_leaf_t *l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP);
395 395
396 396 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
397 397
398 398 rw_init(&l->l_rwlock, 0, 0, 0);
399 399 rw_enter(&l->l_rwlock, RW_WRITER);
400 400 l->l_blkid = zap_allocate_blocks(zap, 1);
401 401 l->l_dbuf = NULL;
402 402 l->l_phys = NULL;
403 403
404 404 VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
405 405 l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
406 406 DMU_READ_NO_PREFETCH));
407 407 winner = dmu_buf_set_user(l->l_dbuf, l, &l->l_phys, zap_leaf_pageout);
408 408 ASSERT(winner == NULL);
409 409 dmu_buf_will_dirty(l->l_dbuf, tx);
410 410
411 411 zap_leaf_init(l, zap->zap_normflags != 0);
412 412
413 413 zap->zap_f.zap_phys->zap_num_leafs++;
414 414
415 415 return (l);
416 416 }
417 417
418 418 int
419 419 fzap_count(zap_t *zap, uint64_t *count)
420 420 {
421 421 ASSERT(!zap->zap_ismicro);
422 422 mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */
423 423 *count = zap->zap_f.zap_phys->zap_num_entries;
424 424 mutex_exit(&zap->zap_f.zap_num_entries_mtx);
425 425 return (0);
426 426 }
427 427
428 428 /*
429 429 * Routines for obtaining zap_leaf_t's
430 430 */
431 431
432 432 void
433 433 zap_put_leaf(zap_leaf_t *l)
434 434 {
435 435 rw_exit(&l->l_rwlock);
436 436 dmu_buf_rele(l->l_dbuf, NULL);
437 437 }
438 438
439 439 _NOTE(ARGSUSED(0))
440 440 static void
441 441 zap_leaf_pageout(dmu_buf_t *db, void *vl)
442 442 {
443 443 zap_leaf_t *l = vl;
444 444
445 445 rw_destroy(&l->l_rwlock);
446 446 kmem_free(l, sizeof (zap_leaf_t));
447 447 }
448 448
449 449 static zap_leaf_t *
450 450 zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
451 451 {
452 452 zap_leaf_t *l, *winner;
453 453
454 454 ASSERT(blkid != 0);
455 455
456 456 l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP);
457 457 rw_init(&l->l_rwlock, 0, 0, 0);
458 458 rw_enter(&l->l_rwlock, RW_WRITER);
459 459 l->l_blkid = blkid;
460 460 l->l_bs = highbit(db->db_size)-1;
461 461 l->l_dbuf = db;
462 462 l->l_phys = NULL;
463 463
464 464 winner = dmu_buf_set_user(db, l, &l->l_phys, zap_leaf_pageout);
465 465
466 466 rw_exit(&l->l_rwlock);
467 467 if (winner != NULL) {
↓ open down ↓ |
115 lines elided |
↑ open up ↑ |
468 468 /* someone else set it first */
469 469 zap_leaf_pageout(NULL, l);
470 470 l = winner;
471 471 }
472 472
473 473 /*
474 474 * lhr_pad was previously used for the next leaf in the leaf
475 475 * chain. There should be no chained leafs (as we have removed
476 476 * support for them).
477 477 */
478 - ASSERT3U(l->l_phys->l_hdr.lh_pad1, ==, 0);
478 + ASSERT0(l->l_phys->l_hdr.lh_pad1);
479 479
480 480 /*
481 481 * There should be more hash entries than there can be
482 482 * chunks to put in the hash table
483 483 */
484 484 ASSERT3U(ZAP_LEAF_HASH_NUMENTRIES(l), >, ZAP_LEAF_NUMCHUNKS(l) / 3);
485 485
486 486 /* The chunks should begin at the end of the hash table */
487 487 ASSERT3P(&ZAP_LEAF_CHUNK(l, 0), ==,
488 488 &l->l_phys->l_hash[ZAP_LEAF_HASH_NUMENTRIES(l)]);
489 489
490 490 /* The chunks should end at the end of the block */
491 491 ASSERT3U((uintptr_t)&ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)) -
492 492 (uintptr_t)l->l_phys, ==, l->l_dbuf->db_size);
493 493
494 494 return (l);
495 495 }
496 496
497 497 static int
498 498 zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,
499 499 zap_leaf_t **lp)
500 500 {
501 501 dmu_buf_t *db;
502 502 zap_leaf_t *l;
503 503 int bs = FZAP_BLOCK_SHIFT(zap);
504 504 int err;
505 505
506 506 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
507 507
508 508 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
509 509 blkid << bs, NULL, &db, DMU_READ_NO_PREFETCH);
510 510 if (err)
511 511 return (err);
512 512
513 513 ASSERT3U(db->db_object, ==, zap->zap_object);
514 514 ASSERT3U(db->db_offset, ==, blkid << bs);
515 515 ASSERT3U(db->db_size, ==, 1 << bs);
516 516 ASSERT(blkid != 0);
517 517
518 518 l = dmu_buf_get_user(db);
519 519
520 520 if (l == NULL)
521 521 l = zap_open_leaf(blkid, db);
522 522
523 523 rw_enter(&l->l_rwlock, lt);
524 524 /*
525 525 * Must lock before dirtying, otherwise l->l_phys could change,
526 526 * causing ASSERT below to fail.
527 527 */
528 528 if (lt == RW_WRITER)
529 529 dmu_buf_will_dirty(db, tx);
530 530 ASSERT3U(l->l_blkid, ==, blkid);
531 531 ASSERT3P(l->l_dbuf, ==, db);
532 532 ASSERT3P(l->l_phys, ==, l->l_dbuf->db_data);
533 533 ASSERT3U(l->l_phys->l_hdr.lh_block_type, ==, ZBT_LEAF);
534 534 ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
535 535
536 536 *lp = l;
537 537 return (0);
538 538 }
539 539
540 540 static int
541 541 zap_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t *valp)
542 542 {
543 543 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
544 544
545 545 if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
546 546 ASSERT3U(idx, <,
547 547 (1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift));
548 548 *valp = ZAP_EMBEDDED_PTRTBL_ENT(zap, idx);
549 549 return (0);
550 550 } else {
551 551 return (zap_table_load(zap, &zap->zap_f.zap_phys->zap_ptrtbl,
552 552 idx, valp));
553 553 }
554 554 }
555 555
556 556 static int
557 557 zap_set_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t blk, dmu_tx_t *tx)
558 558 {
559 559 ASSERT(tx != NULL);
560 560 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
561 561
562 562 if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0) {
563 563 ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) = blk;
564 564 return (0);
565 565 } else {
566 566 return (zap_table_store(zap, &zap->zap_f.zap_phys->zap_ptrtbl,
567 567 idx, blk, tx));
568 568 }
569 569 }
570 570
571 571 static int
572 572 zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt, zap_leaf_t **lp)
573 573 {
574 574 uint64_t idx, blk;
575 575 int err;
576 576
577 577 ASSERT(zap->zap_dbuf == NULL ||
578 578 zap->zap_f.zap_phys == zap->zap_dbuf->db_data);
579 579 ASSERT3U(zap->zap_f.zap_phys->zap_magic, ==, ZAP_MAGIC);
580 580 idx = ZAP_HASH_IDX(h, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
581 581 err = zap_idx_to_blk(zap, idx, &blk);
582 582 if (err != 0)
583 583 return (err);
584 584 err = zap_get_leaf_byblk(zap, blk, tx, lt, lp);
585 585
586 586 ASSERT(err || ZAP_HASH_IDX(h, (*lp)->l_phys->l_hdr.lh_prefix_len) ==
587 587 (*lp)->l_phys->l_hdr.lh_prefix);
588 588 return (err);
589 589 }
590 590
591 591 static int
592 592 zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx, zap_leaf_t **lp)
593 593 {
594 594 zap_t *zap = zn->zn_zap;
595 595 uint64_t hash = zn->zn_hash;
596 596 zap_leaf_t *nl;
597 597 int prefix_diff, i, err;
598 598 uint64_t sibling;
599 599 int old_prefix_len = l->l_phys->l_hdr.lh_prefix_len;
600 600
601 601 ASSERT3U(old_prefix_len, <=, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
602 602 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
603 603
604 604 ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
605 605 l->l_phys->l_hdr.lh_prefix);
606 606
607 607 if (zap_tryupgradedir(zap, tx) == 0 ||
608 608 old_prefix_len == zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) {
609 609 /* We failed to upgrade, or need to grow the pointer table */
610 610 objset_t *os = zap->zap_objset;
611 611 uint64_t object = zap->zap_object;
612 612
613 613 zap_put_leaf(l);
614 614 zap_unlockdir(zap);
615 615 err = zap_lockdir(os, object, tx, RW_WRITER,
616 616 FALSE, FALSE, &zn->zn_zap);
617 617 zap = zn->zn_zap;
618 618 if (err)
619 619 return (err);
620 620 ASSERT(!zap->zap_ismicro);
621 621
622 622 while (old_prefix_len ==
623 623 zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) {
624 624 err = zap_grow_ptrtbl(zap, tx);
625 625 if (err)
626 626 return (err);
627 627 }
628 628
629 629 err = zap_deref_leaf(zap, hash, tx, RW_WRITER, &l);
630 630 if (err)
631 631 return (err);
632 632
633 633 if (l->l_phys->l_hdr.lh_prefix_len != old_prefix_len) {
634 634 /* it split while our locks were down */
635 635 *lp = l;
636 636 return (0);
637 637 }
638 638 }
639 639 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
640 640 ASSERT3U(old_prefix_len, <, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
641 641 ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==,
642 642 l->l_phys->l_hdr.lh_prefix);
643 643
644 644 prefix_diff = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift -
645 645 (old_prefix_len + 1);
646 646 sibling = (ZAP_HASH_IDX(hash, old_prefix_len + 1) | 1) << prefix_diff;
647 647
648 648 /* check for i/o errors before doing zap_leaf_split */
649 649 for (i = 0; i < (1ULL<<prefix_diff); i++) {
650 650 uint64_t blk;
↓ open down ↓ |
162 lines elided |
↑ open up ↑ |
651 651 err = zap_idx_to_blk(zap, sibling+i, &blk);
652 652 if (err)
653 653 return (err);
654 654 ASSERT3U(blk, ==, l->l_blkid);
655 655 }
656 656
657 657 nl = zap_create_leaf(zap, tx);
658 658 zap_leaf_split(l, nl, zap->zap_normflags != 0);
659 659
660 660 /* set sibling pointers */
661 - for (i = 0; i < (1ULL<<prefix_diff); i++) {
661 + for (i = 0; i < (1ULL << prefix_diff); i++) {
662 662 err = zap_set_idx_to_blk(zap, sibling+i, nl->l_blkid, tx);
663 - ASSERT3U(err, ==, 0); /* we checked for i/o errors above */
663 + ASSERT0(err); /* we checked for i/o errors above */
664 664 }
665 665
666 666 if (hash & (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len))) {
667 667 /* we want the sibling */
668 668 zap_put_leaf(l);
669 669 *lp = nl;
670 670 } else {
671 671 zap_put_leaf(nl);
672 672 *lp = l;
673 673 }
674 674
675 675 return (0);
676 676 }
677 677
678 678 static void
679 679 zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
680 680 {
681 681 zap_t *zap = zn->zn_zap;
682 682 int shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
683 683 int leaffull = (l->l_phys->l_hdr.lh_prefix_len == shift &&
684 684 l->l_phys->l_hdr.lh_nfree < ZAP_LEAF_LOW_WATER);
685 685
686 686 zap_put_leaf(l);
687 687
688 688 if (leaffull || zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk) {
689 689 int err;
690 690
691 691 /*
692 692 * We are in the middle of growing the pointer table, or
693 693 * this leaf will soon make us grow it.
694 694 */
695 695 if (zap_tryupgradedir(zap, tx) == 0) {
696 696 objset_t *os = zap->zap_objset;
697 697 uint64_t zapobj = zap->zap_object;
698 698
699 699 zap_unlockdir(zap);
700 700 err = zap_lockdir(os, zapobj, tx,
701 701 RW_WRITER, FALSE, FALSE, &zn->zn_zap);
702 702 zap = zn->zn_zap;
703 703 if (err)
704 704 return;
705 705 }
706 706
707 707 /* could have finished growing while our locks were down */
708 708 if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift == shift)
709 709 (void) zap_grow_ptrtbl(zap, tx);
710 710 }
711 711 }
712 712
713 713 static int
714 714 fzap_checkname(zap_name_t *zn)
715 715 {
716 716 if (zn->zn_key_orig_numints * zn->zn_key_intlen > ZAP_MAXNAMELEN)
717 717 return (ENAMETOOLONG);
718 718 return (0);
719 719 }
720 720
721 721 static int
722 722 fzap_checksize(uint64_t integer_size, uint64_t num_integers)
723 723 {
724 724 /* Only integer sizes supported by C */
725 725 switch (integer_size) {
726 726 case 1:
727 727 case 2:
728 728 case 4:
729 729 case 8:
730 730 break;
731 731 default:
732 732 return (EINVAL);
733 733 }
734 734
735 735 if (integer_size * num_integers > ZAP_MAXVALUELEN)
736 736 return (E2BIG);
737 737
738 738 return (0);
739 739 }
740 740
741 741 static int
742 742 fzap_check(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers)
743 743 {
744 744 int err;
745 745
746 746 if ((err = fzap_checkname(zn)) != 0)
747 747 return (err);
748 748 return (fzap_checksize(integer_size, num_integers));
749 749 }
750 750
751 751 /*
752 752 * Routines for manipulating attributes.
753 753 */
754 754 int
755 755 fzap_lookup(zap_name_t *zn,
756 756 uint64_t integer_size, uint64_t num_integers, void *buf,
757 757 char *realname, int rn_len, boolean_t *ncp)
758 758 {
759 759 zap_leaf_t *l;
760 760 int err;
761 761 zap_entry_handle_t zeh;
762 762
763 763 if ((err = fzap_checkname(zn)) != 0)
764 764 return (err);
765 765
766 766 err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, NULL, RW_READER, &l);
767 767 if (err != 0)
768 768 return (err);
769 769 err = zap_leaf_lookup(l, zn, &zeh);
770 770 if (err == 0) {
771 771 if ((err = fzap_checksize(integer_size, num_integers)) != 0) {
772 772 zap_put_leaf(l);
773 773 return (err);
774 774 }
775 775
776 776 err = zap_entry_read(&zeh, integer_size, num_integers, buf);
777 777 (void) zap_entry_read_name(zn->zn_zap, &zeh, rn_len, realname);
778 778 if (ncp) {
779 779 *ncp = zap_entry_normalization_conflict(&zeh,
780 780 zn, NULL, zn->zn_zap);
781 781 }
782 782 }
783 783
784 784 zap_put_leaf(l);
785 785 return (err);
786 786 }
787 787
788 788 int
789 789 fzap_add_cd(zap_name_t *zn,
790 790 uint64_t integer_size, uint64_t num_integers,
791 791 const void *val, uint32_t cd, dmu_tx_t *tx)
792 792 {
793 793 zap_leaf_t *l;
794 794 int err;
795 795 zap_entry_handle_t zeh;
796 796 zap_t *zap = zn->zn_zap;
797 797
798 798 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
799 799 ASSERT(!zap->zap_ismicro);
800 800 ASSERT(fzap_check(zn, integer_size, num_integers) == 0);
801 801
802 802 err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l);
803 803 if (err != 0)
804 804 return (err);
805 805 retry:
806 806 err = zap_leaf_lookup(l, zn, &zeh);
807 807 if (err == 0) {
808 808 err = EEXIST;
809 809 goto out;
810 810 }
811 811 if (err != ENOENT)
812 812 goto out;
813 813
814 814 err = zap_entry_create(l, zn, cd,
815 815 integer_size, num_integers, val, &zeh);
816 816
817 817 if (err == 0) {
818 818 zap_increment_num_entries(zap, 1, tx);
819 819 } else if (err == EAGAIN) {
820 820 err = zap_expand_leaf(zn, l, tx, &l);
821 821 zap = zn->zn_zap; /* zap_expand_leaf() may change zap */
822 822 if (err == 0)
823 823 goto retry;
824 824 }
825 825
826 826 out:
827 827 if (zap != NULL)
828 828 zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx);
829 829 return (err);
830 830 }
831 831
832 832 int
833 833 fzap_add(zap_name_t *zn,
834 834 uint64_t integer_size, uint64_t num_integers,
835 835 const void *val, dmu_tx_t *tx)
836 836 {
837 837 int err = fzap_check(zn, integer_size, num_integers);
838 838 if (err != 0)
839 839 return (err);
840 840
841 841 return (fzap_add_cd(zn, integer_size, num_integers,
842 842 val, ZAP_NEED_CD, tx));
843 843 }
844 844
845 845 int
846 846 fzap_update(zap_name_t *zn,
847 847 int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
848 848 {
849 849 zap_leaf_t *l;
850 850 int err, create;
851 851 zap_entry_handle_t zeh;
852 852 zap_t *zap = zn->zn_zap;
853 853
854 854 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
855 855 err = fzap_check(zn, integer_size, num_integers);
856 856 if (err != 0)
857 857 return (err);
858 858
859 859 err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l);
860 860 if (err != 0)
861 861 return (err);
862 862 retry:
863 863 err = zap_leaf_lookup(l, zn, &zeh);
864 864 create = (err == ENOENT);
865 865 ASSERT(err == 0 || err == ENOENT);
866 866
867 867 if (create) {
868 868 err = zap_entry_create(l, zn, ZAP_NEED_CD,
869 869 integer_size, num_integers, val, &zeh);
870 870 if (err == 0)
871 871 zap_increment_num_entries(zap, 1, tx);
872 872 } else {
873 873 err = zap_entry_update(&zeh, integer_size, num_integers, val);
874 874 }
875 875
876 876 if (err == EAGAIN) {
877 877 err = zap_expand_leaf(zn, l, tx, &l);
878 878 zap = zn->zn_zap; /* zap_expand_leaf() may change zap */
879 879 if (err == 0)
880 880 goto retry;
881 881 }
882 882
883 883 if (zap != NULL)
884 884 zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx);
885 885 return (err);
886 886 }
887 887
888 888 int
889 889 fzap_length(zap_name_t *zn,
890 890 uint64_t *integer_size, uint64_t *num_integers)
891 891 {
892 892 zap_leaf_t *l;
893 893 int err;
894 894 zap_entry_handle_t zeh;
895 895
896 896 err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, NULL, RW_READER, &l);
897 897 if (err != 0)
898 898 return (err);
899 899 err = zap_leaf_lookup(l, zn, &zeh);
900 900 if (err != 0)
901 901 goto out;
902 902
903 903 if (integer_size)
904 904 *integer_size = zeh.zeh_integer_size;
905 905 if (num_integers)
906 906 *num_integers = zeh.zeh_num_integers;
907 907 out:
908 908 zap_put_leaf(l);
909 909 return (err);
910 910 }
911 911
912 912 int
913 913 fzap_remove(zap_name_t *zn, dmu_tx_t *tx)
914 914 {
915 915 zap_leaf_t *l;
916 916 int err;
917 917 zap_entry_handle_t zeh;
918 918
919 919 err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, tx, RW_WRITER, &l);
920 920 if (err != 0)
921 921 return (err);
922 922 err = zap_leaf_lookup(l, zn, &zeh);
923 923 if (err == 0) {
924 924 zap_entry_remove(&zeh);
925 925 zap_increment_num_entries(zn->zn_zap, -1, tx);
926 926 }
927 927 zap_put_leaf(l);
928 928 return (err);
929 929 }
930 930
931 931 void
932 932 fzap_prefetch(zap_name_t *zn)
933 933 {
934 934 uint64_t idx, blk;
935 935 zap_t *zap = zn->zn_zap;
936 936 int bs;
937 937
938 938 idx = ZAP_HASH_IDX(zn->zn_hash,
939 939 zap->zap_f.zap_phys->zap_ptrtbl.zt_shift);
940 940 if (zap_idx_to_blk(zap, idx, &blk) != 0)
941 941 return;
942 942 bs = FZAP_BLOCK_SHIFT(zap);
943 943 dmu_prefetch(zap->zap_objset, zap->zap_object, blk << bs, 1 << bs);
944 944 }
945 945
946 946 /*
947 947 * Helper functions for consumers.
948 948 */
949 949
950 950 uint64_t
951 951 zap_create_link(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj,
952 952 const char *name, dmu_tx_t *tx)
953 953 {
954 954 uint64_t new_obj;
955 955
956 956 VERIFY((new_obj = zap_create(os, ot, DMU_OT_NONE, 0, tx)) > 0);
957 957 VERIFY(zap_add(os, parent_obj, name, sizeof (uint64_t), 1, &new_obj,
958 958 tx) == 0);
959 959
960 960 return (new_obj);
961 961 }
962 962
963 963 int
964 964 zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask,
965 965 char *name)
966 966 {
967 967 zap_cursor_t zc;
968 968 zap_attribute_t *za;
969 969 int err;
970 970
971 971 if (mask == 0)
972 972 mask = -1ULL;
973 973
974 974 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
975 975 for (zap_cursor_init(&zc, os, zapobj);
976 976 (err = zap_cursor_retrieve(&zc, za)) == 0;
977 977 zap_cursor_advance(&zc)) {
978 978 if ((za->za_first_integer & mask) == (value & mask)) {
979 979 (void) strcpy(name, za->za_name);
980 980 break;
981 981 }
982 982 }
983 983 zap_cursor_fini(&zc);
984 984 kmem_free(za, sizeof (zap_attribute_t));
985 985 return (err);
986 986 }
987 987
988 988 int
989 989 zap_join(objset_t *os, uint64_t fromobj, uint64_t intoobj, dmu_tx_t *tx)
990 990 {
991 991 zap_cursor_t zc;
992 992 zap_attribute_t za;
993 993 int err;
994 994
995 995 for (zap_cursor_init(&zc, os, fromobj);
996 996 zap_cursor_retrieve(&zc, &za) == 0;
997 997 (void) zap_cursor_advance(&zc)) {
998 998 if (za.za_integer_length != 8 || za.za_num_integers != 1)
999 999 return (EINVAL);
1000 1000 err = zap_add(os, intoobj, za.za_name,
1001 1001 8, 1, &za.za_first_integer, tx);
1002 1002 if (err)
1003 1003 return (err);
1004 1004 }
1005 1005 zap_cursor_fini(&zc);
1006 1006 return (0);
1007 1007 }
1008 1008
1009 1009 int
1010 1010 zap_join_key(objset_t *os, uint64_t fromobj, uint64_t intoobj,
1011 1011 uint64_t value, dmu_tx_t *tx)
1012 1012 {
1013 1013 zap_cursor_t zc;
1014 1014 zap_attribute_t za;
1015 1015 int err;
1016 1016
1017 1017 for (zap_cursor_init(&zc, os, fromobj);
1018 1018 zap_cursor_retrieve(&zc, &za) == 0;
1019 1019 (void) zap_cursor_advance(&zc)) {
1020 1020 if (za.za_integer_length != 8 || za.za_num_integers != 1)
1021 1021 return (EINVAL);
1022 1022 err = zap_add(os, intoobj, za.za_name,
1023 1023 8, 1, &value, tx);
1024 1024 if (err)
1025 1025 return (err);
1026 1026 }
1027 1027 zap_cursor_fini(&zc);
1028 1028 return (0);
1029 1029 }
1030 1030
1031 1031 int
1032 1032 zap_join_increment(objset_t *os, uint64_t fromobj, uint64_t intoobj,
1033 1033 dmu_tx_t *tx)
1034 1034 {
1035 1035 zap_cursor_t zc;
1036 1036 zap_attribute_t za;
1037 1037 int err;
1038 1038
1039 1039 for (zap_cursor_init(&zc, os, fromobj);
1040 1040 zap_cursor_retrieve(&zc, &za) == 0;
1041 1041 (void) zap_cursor_advance(&zc)) {
1042 1042 uint64_t delta = 0;
1043 1043
1044 1044 if (za.za_integer_length != 8 || za.za_num_integers != 1)
1045 1045 return (EINVAL);
1046 1046
1047 1047 err = zap_lookup(os, intoobj, za.za_name, 8, 1, &delta);
1048 1048 if (err != 0 && err != ENOENT)
1049 1049 return (err);
1050 1050 delta += za.za_first_integer;
1051 1051 err = zap_update(os, intoobj, za.za_name, 8, 1, &delta, tx);
1052 1052 if (err)
1053 1053 return (err);
1054 1054 }
1055 1055 zap_cursor_fini(&zc);
1056 1056 return (0);
1057 1057 }
1058 1058
1059 1059 int
1060 1060 zap_add_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx)
1061 1061 {
1062 1062 char name[20];
1063 1063
1064 1064 (void) snprintf(name, sizeof (name), "%llx", (longlong_t)value);
1065 1065 return (zap_add(os, obj, name, 8, 1, &value, tx));
1066 1066 }
1067 1067
1068 1068 int
1069 1069 zap_remove_int(objset_t *os, uint64_t obj, uint64_t value, dmu_tx_t *tx)
1070 1070 {
1071 1071 char name[20];
1072 1072
1073 1073 (void) snprintf(name, sizeof (name), "%llx", (longlong_t)value);
1074 1074 return (zap_remove(os, obj, name, tx));
1075 1075 }
1076 1076
1077 1077 int
1078 1078 zap_lookup_int(objset_t *os, uint64_t obj, uint64_t value)
1079 1079 {
1080 1080 char name[20];
1081 1081
1082 1082 (void) snprintf(name, sizeof (name), "%llx", (longlong_t)value);
1083 1083 return (zap_lookup(os, obj, name, 8, 1, &value));
1084 1084 }
1085 1085
1086 1086 int
1087 1087 zap_add_int_key(objset_t *os, uint64_t obj,
1088 1088 uint64_t key, uint64_t value, dmu_tx_t *tx)
1089 1089 {
1090 1090 char name[20];
1091 1091
1092 1092 (void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
1093 1093 return (zap_add(os, obj, name, 8, 1, &value, tx));
1094 1094 }
1095 1095
1096 1096 int
1097 1097 zap_lookup_int_key(objset_t *os, uint64_t obj, uint64_t key, uint64_t *valuep)
1098 1098 {
1099 1099 char name[20];
1100 1100
1101 1101 (void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
1102 1102 return (zap_lookup(os, obj, name, 8, 1, valuep));
1103 1103 }
1104 1104
1105 1105 int
1106 1106 zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
1107 1107 dmu_tx_t *tx)
1108 1108 {
1109 1109 uint64_t value = 0;
1110 1110 int err;
1111 1111
1112 1112 if (delta == 0)
1113 1113 return (0);
1114 1114
1115 1115 err = zap_lookup(os, obj, name, 8, 1, &value);
1116 1116 if (err != 0 && err != ENOENT)
1117 1117 return (err);
1118 1118 value += delta;
1119 1119 if (value == 0)
1120 1120 err = zap_remove(os, obj, name, tx);
1121 1121 else
1122 1122 err = zap_update(os, obj, name, 8, 1, &value, tx);
1123 1123 return (err);
1124 1124 }
1125 1125
1126 1126 int
1127 1127 zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
1128 1128 dmu_tx_t *tx)
1129 1129 {
1130 1130 char name[20];
1131 1131
1132 1132 (void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
1133 1133 return (zap_increment(os, obj, name, delta, tx));
1134 1134 }
1135 1135
1136 1136 /*
1137 1137 * Routines for iterating over the attributes.
1138 1138 */
1139 1139
1140 1140 int
1141 1141 fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
1142 1142 {
1143 1143 int err = ENOENT;
1144 1144 zap_entry_handle_t zeh;
1145 1145 zap_leaf_t *l;
1146 1146
1147 1147 /* retrieve the next entry at or after zc_hash/zc_cd */
1148 1148 /* if no entry, return ENOENT */
1149 1149
1150 1150 if (zc->zc_leaf &&
1151 1151 (ZAP_HASH_IDX(zc->zc_hash,
1152 1152 zc->zc_leaf->l_phys->l_hdr.lh_prefix_len) !=
1153 1153 zc->zc_leaf->l_phys->l_hdr.lh_prefix)) {
1154 1154 rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
1155 1155 zap_put_leaf(zc->zc_leaf);
1156 1156 zc->zc_leaf = NULL;
1157 1157 }
1158 1158
1159 1159 again:
1160 1160 if (zc->zc_leaf == NULL) {
1161 1161 err = zap_deref_leaf(zap, zc->zc_hash, NULL, RW_READER,
1162 1162 &zc->zc_leaf);
1163 1163 if (err != 0)
1164 1164 return (err);
1165 1165 } else {
1166 1166 rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
1167 1167 }
1168 1168 l = zc->zc_leaf;
1169 1169
1170 1170 err = zap_leaf_lookup_closest(l, zc->zc_hash, zc->zc_cd, &zeh);
1171 1171
1172 1172 if (err == ENOENT) {
1173 1173 uint64_t nocare =
1174 1174 (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len)) - 1;
1175 1175 zc->zc_hash = (zc->zc_hash & ~nocare) + nocare + 1;
1176 1176 zc->zc_cd = 0;
1177 1177 if (l->l_phys->l_hdr.lh_prefix_len == 0 || zc->zc_hash == 0) {
1178 1178 zc->zc_hash = -1ULL;
1179 1179 } else {
1180 1180 zap_put_leaf(zc->zc_leaf);
1181 1181 zc->zc_leaf = NULL;
1182 1182 goto again;
1183 1183 }
1184 1184 }
1185 1185
1186 1186 if (err == 0) {
1187 1187 zc->zc_hash = zeh.zeh_hash;
1188 1188 zc->zc_cd = zeh.zeh_cd;
1189 1189 za->za_integer_length = zeh.zeh_integer_size;
1190 1190 za->za_num_integers = zeh.zeh_num_integers;
1191 1191 if (zeh.zeh_num_integers == 0) {
1192 1192 za->za_first_integer = 0;
1193 1193 } else {
1194 1194 err = zap_entry_read(&zeh, 8, 1, &za->za_first_integer);
1195 1195 ASSERT(err == 0 || err == EOVERFLOW);
1196 1196 }
1197 1197 err = zap_entry_read_name(zap, &zeh,
1198 1198 sizeof (za->za_name), za->za_name);
1199 1199 ASSERT(err == 0);
1200 1200
1201 1201 za->za_normalization_conflict =
1202 1202 zap_entry_normalization_conflict(&zeh,
1203 1203 NULL, za->za_name, zap);
1204 1204 }
1205 1205 rw_exit(&zc->zc_leaf->l_rwlock);
1206 1206 return (err);
1207 1207 }
1208 1208
1209 1209 static void
1210 1210 zap_stats_ptrtbl(zap_t *zap, uint64_t *tbl, int len, zap_stats_t *zs)
1211 1211 {
1212 1212 int i, err;
1213 1213 uint64_t lastblk = 0;
1214 1214
1215 1215 /*
1216 1216 * NB: if a leaf has more pointers than an entire ptrtbl block
1217 1217 * can hold, then it'll be accounted for more than once, since
1218 1218 * we won't have lastblk.
1219 1219 */
1220 1220 for (i = 0; i < len; i++) {
1221 1221 zap_leaf_t *l;
1222 1222
1223 1223 if (tbl[i] == lastblk)
1224 1224 continue;
1225 1225 lastblk = tbl[i];
1226 1226
1227 1227 err = zap_get_leaf_byblk(zap, tbl[i], NULL, RW_READER, &l);
1228 1228 if (err == 0) {
1229 1229 zap_leaf_stats(zap, l, zs);
1230 1230 zap_put_leaf(l);
1231 1231 }
1232 1232 }
1233 1233 }
1234 1234
1235 1235 int
1236 1236 fzap_cursor_move_to_key(zap_cursor_t *zc, zap_name_t *zn)
1237 1237 {
1238 1238 int err;
1239 1239 zap_leaf_t *l;
1240 1240 zap_entry_handle_t zeh;
1241 1241
1242 1242 if (zn->zn_key_orig_numints * zn->zn_key_intlen > ZAP_MAXNAMELEN)
1243 1243 return (ENAMETOOLONG);
1244 1244
1245 1245 err = zap_deref_leaf(zc->zc_zap, zn->zn_hash, NULL, RW_READER, &l);
1246 1246 if (err != 0)
1247 1247 return (err);
1248 1248
1249 1249 err = zap_leaf_lookup(l, zn, &zeh);
1250 1250 if (err != 0)
1251 1251 return (err);
1252 1252
1253 1253 zc->zc_leaf = l;
1254 1254 zc->zc_hash = zeh.zeh_hash;
1255 1255 zc->zc_cd = zeh.zeh_cd;
1256 1256
1257 1257 return (err);
1258 1258 }
1259 1259
1260 1260 void
1261 1261 fzap_get_stats(zap_t *zap, zap_stats_t *zs)
1262 1262 {
1263 1263 int bs = FZAP_BLOCK_SHIFT(zap);
1264 1264 zs->zs_blocksize = 1ULL << bs;
1265 1265
1266 1266 /*
1267 1267 * Set zap_phys_t fields
1268 1268 */
1269 1269 zs->zs_num_leafs = zap->zap_f.zap_phys->zap_num_leafs;
1270 1270 zs->zs_num_entries = zap->zap_f.zap_phys->zap_num_entries;
1271 1271 zs->zs_num_blocks = zap->zap_f.zap_phys->zap_freeblk;
1272 1272 zs->zs_block_type = zap->zap_f.zap_phys->zap_block_type;
1273 1273 zs->zs_magic = zap->zap_f.zap_phys->zap_magic;
1274 1274 zs->zs_salt = zap->zap_f.zap_phys->zap_salt;
1275 1275
1276 1276 /*
1277 1277 * Set zap_ptrtbl fields
1278 1278 */
1279 1279 zs->zs_ptrtbl_len = 1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
1280 1280 zs->zs_ptrtbl_nextblk = zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk;
1281 1281 zs->zs_ptrtbl_blks_copied =
1282 1282 zap->zap_f.zap_phys->zap_ptrtbl.zt_blks_copied;
1283 1283 zs->zs_ptrtbl_zt_blk = zap->zap_f.zap_phys->zap_ptrtbl.zt_blk;
1284 1284 zs->zs_ptrtbl_zt_numblks = zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks;
1285 1285 zs->zs_ptrtbl_zt_shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift;
1286 1286
1287 1287 if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) {
1288 1288 /* the ptrtbl is entirely in the header block. */
1289 1289 zap_stats_ptrtbl(zap, &ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
1290 1290 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap), zs);
1291 1291 } else {
1292 1292 int b;
1293 1293
1294 1294 dmu_prefetch(zap->zap_objset, zap->zap_object,
1295 1295 zap->zap_f.zap_phys->zap_ptrtbl.zt_blk << bs,
1296 1296 zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << bs);
1297 1297
1298 1298 for (b = 0; b < zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks;
1299 1299 b++) {
1300 1300 dmu_buf_t *db;
1301 1301 int err;
1302 1302
1303 1303 err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
1304 1304 (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk + b) << bs,
1305 1305 FTAG, &db, DMU_READ_NO_PREFETCH);
1306 1306 if (err == 0) {
1307 1307 zap_stats_ptrtbl(zap, db->db_data,
1308 1308 1<<(bs-3), zs);
1309 1309 dmu_buf_rele(db, FTAG);
1310 1310 }
1311 1311 }
1312 1312 }
1313 1313 }
1314 1314
1315 1315 int
1316 1316 fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
1317 1317 uint64_t *tooverwrite)
1318 1318 {
1319 1319 zap_t *zap = zn->zn_zap;
1320 1320 zap_leaf_t *l;
1321 1321 int err;
1322 1322
1323 1323 /*
1324 1324 * Account for the header block of the fatzap.
1325 1325 */
1326 1326 if (!add && dmu_buf_freeable(zap->zap_dbuf)) {
1327 1327 *tooverwrite += zap->zap_dbuf->db_size;
1328 1328 } else {
1329 1329 *towrite += zap->zap_dbuf->db_size;
1330 1330 }
1331 1331
1332 1332 /*
1333 1333 * Account for the pointer table blocks.
1334 1334 * If we are adding we need to account for the following cases :
1335 1335 * - If the pointer table is embedded, this operation could force an
1336 1336 * external pointer table.
1337 1337 * - If this already has an external pointer table this operation
1338 1338 * could extend the table.
1339 1339 */
1340 1340 if (add) {
1341 1341 if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0)
1342 1342 *towrite += zap->zap_dbuf->db_size;
1343 1343 else
1344 1344 *towrite += (zap->zap_dbuf->db_size * 3);
1345 1345 }
1346 1346
1347 1347 /*
1348 1348 * Now, check if the block containing leaf is freeable
1349 1349 * and account accordingly.
1350 1350 */
1351 1351 err = zap_deref_leaf(zap, zn->zn_hash, NULL, RW_READER, &l);
1352 1352 if (err != 0) {
1353 1353 return (err);
1354 1354 }
1355 1355
1356 1356 if (!add && dmu_buf_freeable(l->l_dbuf)) {
1357 1357 *tooverwrite += l->l_dbuf->db_size;
1358 1358 } else {
1359 1359 /*
1360 1360 * If this an add operation, the leaf block could split.
1361 1361 * Hence, we need to account for an additional leaf block.
1362 1362 */
1363 1363 *towrite += (add ? 2 : 1) * l->l_dbuf->db_size;
1364 1364 }
1365 1365
1366 1366 zap_put_leaf(l);
1367 1367 return (0);
1368 1368 }
↓ open down ↓ |
695 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX