Print this page
3741 zfs needs better comments
Submitted by: Will Andrews <willa@spectralogic.com>
Submitted by: Justin Gibbs <justing@spectralogic.com>
Submitted by: Alan Somers <alans@spectralogic.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/zfs/dmu_zfetch.c
+++ new/usr/src/uts/common/fs/zfs/dmu_zfetch.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 #include <sys/zfs_context.h>
27 27 #include <sys/dnode.h>
28 28 #include <sys/dmu_objset.h>
29 29 #include <sys/dmu_zfetch.h>
30 30 #include <sys/dmu.h>
31 31 #include <sys/dbuf.h>
32 32 #include <sys/kstat.h>
33 33
34 34 /*
35 35 * I'm against tune-ables, but these should probably exist as tweakable globals
36 36 * until we can get this working the way we want it to.
37 37 */
38 38
39 39 int zfs_prefetch_disable = 0;
40 40
↓ open down ↓ |
40 lines elided |
↑ open up ↑ |
41 41 /* max # of streams per zfetch */
42 42 uint32_t zfetch_max_streams = 8;
43 43 /* min time before stream reclaim */
44 44 uint32_t zfetch_min_sec_reap = 2;
45 45 /* max number of blocks to fetch at a time */
46 46 uint32_t zfetch_block_cap = 256;
47 47 /* number of bytes in a array_read at which we stop prefetching (1Mb) */
48 48 uint64_t zfetch_array_rd_sz = 1024 * 1024;
49 49
50 50 /* forward decls for static routines */
51 -static int dmu_zfetch_colinear(zfetch_t *, zstream_t *);
51 +static boolean_t dmu_zfetch_colinear(zfetch_t *, zstream_t *);
52 52 static void dmu_zfetch_dofetch(zfetch_t *, zstream_t *);
53 53 static uint64_t dmu_zfetch_fetch(dnode_t *, uint64_t, uint64_t);
54 54 static uint64_t dmu_zfetch_fetchsz(dnode_t *, uint64_t, uint64_t);
55 -static int dmu_zfetch_find(zfetch_t *, zstream_t *, int);
55 +static boolean_t dmu_zfetch_find(zfetch_t *, zstream_t *, int);
56 56 static int dmu_zfetch_stream_insert(zfetch_t *, zstream_t *);
57 57 static zstream_t *dmu_zfetch_stream_reclaim(zfetch_t *);
58 58 static void dmu_zfetch_stream_remove(zfetch_t *, zstream_t *);
59 59 static int dmu_zfetch_streams_equal(zstream_t *, zstream_t *);
60 60
61 61 typedef struct zfetch_stats {
62 62 kstat_named_t zfetchstat_hits;
63 63 kstat_named_t zfetchstat_misses;
64 64 kstat_named_t zfetchstat_colinear_hits;
65 65 kstat_named_t zfetchstat_colinear_misses;
66 66 kstat_named_t zfetchstat_stride_hits;
67 67 kstat_named_t zfetchstat_stride_misses;
68 68 kstat_named_t zfetchstat_reclaim_successes;
69 69 kstat_named_t zfetchstat_reclaim_failures;
70 70 kstat_named_t zfetchstat_stream_resets;
71 71 kstat_named_t zfetchstat_stream_noresets;
72 72 kstat_named_t zfetchstat_bogus_streams;
73 73 } zfetch_stats_t;
74 74
75 75 static zfetch_stats_t zfetch_stats = {
76 76 { "hits", KSTAT_DATA_UINT64 },
77 77 { "misses", KSTAT_DATA_UINT64 },
78 78 { "colinear_hits", KSTAT_DATA_UINT64 },
79 79 { "colinear_misses", KSTAT_DATA_UINT64 },
80 80 { "stride_hits", KSTAT_DATA_UINT64 },
81 81 { "stride_misses", KSTAT_DATA_UINT64 },
82 82 { "reclaim_successes", KSTAT_DATA_UINT64 },
83 83 { "reclaim_failures", KSTAT_DATA_UINT64 },
84 84 { "streams_resets", KSTAT_DATA_UINT64 },
85 85 { "streams_noresets", KSTAT_DATA_UINT64 },
86 86 { "bogus_streams", KSTAT_DATA_UINT64 },
87 87 };
88 88
89 89 #define ZFETCHSTAT_INCR(stat, val) \
90 90 atomic_add_64(&zfetch_stats.stat.value.ui64, (val));
91 91
92 92 #define ZFETCHSTAT_BUMP(stat) ZFETCHSTAT_INCR(stat, 1);
93 93
94 94 kstat_t *zfetch_ksp;
95 95
96 96 /*
↓ open down ↓ |
31 lines elided |
↑ open up ↑ |
97 97 * Given a zfetch structure and a zstream structure, determine whether the
98 98 * blocks to be read are part of a co-linear pair of existing prefetch
99 99 * streams. If a set is found, coalesce the streams, removing one, and
100 100 * configure the prefetch so it looks for a strided access pattern.
101 101 *
102 102 * In other words: if we find two sequential access streams that are
103 103 * the same length and distance N appart, and this read is N from the
104 104 * last stream, then we are probably in a strided access pattern. So
105 105 * combine the two sequential streams into a single strided stream.
106 106 *
107 - * If no co-linear streams are found, return NULL.
107 + * Returns whether co-linear streams were found.
108 108 */
109 -static int
109 +static boolean_t
110 110 dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh)
111 111 {
112 112 zstream_t *z_walk;
113 113 zstream_t *z_comp;
114 114
115 115 if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
116 116 return (0);
117 117
118 118 if (zh == NULL) {
119 119 rw_exit(&zf->zf_rwlock);
120 120 return (0);
121 121 }
122 122
123 123 for (z_walk = list_head(&zf->zf_stream); z_walk;
124 124 z_walk = list_next(&zf->zf_stream, z_walk)) {
125 125 for (z_comp = list_next(&zf->zf_stream, z_walk); z_comp;
126 126 z_comp = list_next(&zf->zf_stream, z_comp)) {
127 127 int64_t diff;
128 128
129 129 if (z_walk->zst_len != z_walk->zst_stride ||
130 130 z_comp->zst_len != z_comp->zst_stride) {
131 131 continue;
132 132 }
133 133
134 134 diff = z_comp->zst_offset - z_walk->zst_offset;
135 135 if (z_comp->zst_offset + diff == zh->zst_offset) {
136 136 z_walk->zst_offset = zh->zst_offset;
137 137 z_walk->zst_direction = diff < 0 ? -1 : 1;
138 138 z_walk->zst_stride =
139 139 diff * z_walk->zst_direction;
140 140 z_walk->zst_ph_offset =
141 141 zh->zst_offset + z_walk->zst_stride;
142 142 dmu_zfetch_stream_remove(zf, z_comp);
143 143 mutex_destroy(&z_comp->zst_lock);
144 144 kmem_free(z_comp, sizeof (zstream_t));
145 145
146 146 dmu_zfetch_dofetch(zf, z_walk);
147 147
148 148 rw_exit(&zf->zf_rwlock);
149 149 return (1);
150 150 }
151 151
152 152 diff = z_walk->zst_offset - z_comp->zst_offset;
153 153 if (z_walk->zst_offset + diff == zh->zst_offset) {
154 154 z_walk->zst_offset = zh->zst_offset;
155 155 z_walk->zst_direction = diff < 0 ? -1 : 1;
156 156 z_walk->zst_stride =
157 157 diff * z_walk->zst_direction;
158 158 z_walk->zst_ph_offset =
159 159 zh->zst_offset + z_walk->zst_stride;
160 160 dmu_zfetch_stream_remove(zf, z_comp);
161 161 mutex_destroy(&z_comp->zst_lock);
162 162 kmem_free(z_comp, sizeof (zstream_t));
163 163
164 164 dmu_zfetch_dofetch(zf, z_walk);
165 165
166 166 rw_exit(&zf->zf_rwlock);
167 167 return (1);
168 168 }
169 169 }
170 170 }
171 171
172 172 rw_exit(&zf->zf_rwlock);
173 173 return (0);
174 174 }
175 175
176 176 /*
177 177 * Given a zstream_t, determine the bounds of the prefetch. Then call the
178 178 * routine that actually prefetches the individual blocks.
179 179 */
180 180 static void
181 181 dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs)
182 182 {
183 183 uint64_t prefetch_tail;
184 184 uint64_t prefetch_limit;
185 185 uint64_t prefetch_ofst;
186 186 uint64_t prefetch_len;
187 187 uint64_t blocks_fetched;
188 188
189 189 zs->zst_stride = MAX((int64_t)zs->zst_stride, zs->zst_len);
190 190 zs->zst_cap = MIN(zfetch_block_cap, 2 * zs->zst_cap);
191 191
192 192 prefetch_tail = MAX((int64_t)zs->zst_ph_offset,
193 193 (int64_t)(zs->zst_offset + zs->zst_stride));
194 194 /*
195 195 * XXX: use a faster division method?
196 196 */
197 197 prefetch_limit = zs->zst_offset + zs->zst_len +
198 198 (zs->zst_cap * zs->zst_stride) / zs->zst_len;
199 199
200 200 while (prefetch_tail < prefetch_limit) {
201 201 prefetch_ofst = zs->zst_offset + zs->zst_direction *
202 202 (prefetch_tail - zs->zst_offset);
203 203
204 204 prefetch_len = zs->zst_len;
205 205
206 206 /*
207 207 * Don't prefetch beyond the end of the file, if working
208 208 * backwards.
209 209 */
210 210 if ((zs->zst_direction == ZFETCH_BACKWARD) &&
211 211 (prefetch_ofst > prefetch_tail)) {
212 212 prefetch_len += prefetch_ofst;
213 213 prefetch_ofst = 0;
214 214 }
215 215
216 216 /* don't prefetch more than we're supposed to */
217 217 if (prefetch_len > zs->zst_len)
218 218 break;
219 219
220 220 blocks_fetched = dmu_zfetch_fetch(zf->zf_dnode,
221 221 prefetch_ofst, zs->zst_len);
222 222
223 223 prefetch_tail += zs->zst_stride;
224 224 /* stop if we've run out of stuff to prefetch */
225 225 if (blocks_fetched < zs->zst_len)
226 226 break;
227 227 }
228 228 zs->zst_ph_offset = prefetch_tail;
229 229 zs->zst_last = ddi_get_lbolt();
230 230 }
231 231
232 232 void
233 233 zfetch_init(void)
234 234 {
235 235
236 236 zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
237 237 KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
238 238 KSTAT_FLAG_VIRTUAL);
239 239
240 240 if (zfetch_ksp != NULL) {
241 241 zfetch_ksp->ks_data = &zfetch_stats;
242 242 kstat_install(zfetch_ksp);
243 243 }
244 244 }
245 245
246 246 void
247 247 zfetch_fini(void)
248 248 {
249 249 if (zfetch_ksp != NULL) {
250 250 kstat_delete(zfetch_ksp);
251 251 zfetch_ksp = NULL;
252 252 }
253 253 }
254 254
255 255 /*
256 256 * This takes a pointer to a zfetch structure and a dnode. It performs the
257 257 * necessary setup for the zfetch structure, grokking data from the
258 258 * associated dnode.
259 259 */
260 260 void
261 261 dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
262 262 {
263 263 if (zf == NULL) {
264 264 return;
265 265 }
266 266
267 267 zf->zf_dnode = dno;
268 268 zf->zf_stream_cnt = 0;
269 269 zf->zf_alloc_fail = 0;
270 270
271 271 list_create(&zf->zf_stream, sizeof (zstream_t),
272 272 offsetof(zstream_t, zst_node));
273 273
274 274 rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
275 275 }
276 276
277 277 /*
278 278 * This function computes the actual size, in blocks, that can be prefetched,
279 279 * and fetches it.
280 280 */
281 281 static uint64_t
282 282 dmu_zfetch_fetch(dnode_t *dn, uint64_t blkid, uint64_t nblks)
283 283 {
284 284 uint64_t fetchsz;
285 285 uint64_t i;
286 286
287 287 fetchsz = dmu_zfetch_fetchsz(dn, blkid, nblks);
288 288
289 289 for (i = 0; i < fetchsz; i++) {
290 290 dbuf_prefetch(dn, blkid + i);
291 291 }
292 292
293 293 return (fetchsz);
294 294 }
295 295
296 296 /*
297 297 * this function returns the number of blocks that would be prefetched, based
298 298 * upon the supplied dnode, blockid, and nblks. This is used so that we can
299 299 * update streams in place, and then prefetch with their old value after the
300 300 * fact. This way, we can delay the prefetch, but subsequent accesses to the
301 301 * stream won't result in the same data being prefetched multiple times.
302 302 */
303 303 static uint64_t
304 304 dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks)
305 305 {
306 306 uint64_t fetchsz;
307 307
308 308 if (blkid > dn->dn_maxblkid) {
309 309 return (0);
310 310 }
311 311
312 312 /* compute fetch size */
313 313 if (blkid + nblks + 1 > dn->dn_maxblkid) {
314 314 fetchsz = (dn->dn_maxblkid - blkid) + 1;
315 315 ASSERT(blkid + fetchsz - 1 <= dn->dn_maxblkid);
316 316 } else {
317 317 fetchsz = nblks;
318 318 }
↓ open down ↓ |
199 lines elided |
↑ open up ↑ |
319 319
320 320
321 321 return (fetchsz);
322 322 }
323 323
324 324 /*
325 325 * given a zfetch and a zstream structure, see if there is an associated zstream
326 326 * for this block read. If so, it starts a prefetch for the stream it
327 327 * located and returns true, otherwise it returns false
328 328 */
329 -static int
329 +static boolean_t
330 330 dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched)
331 331 {
332 332 zstream_t *zs;
333 333 int64_t diff;
334 334 int reset = !prefetched;
335 335 int rc = 0;
336 336
337 337 if (zh == NULL)
338 338 return (0);
339 339
340 340 /*
341 341 * XXX: This locking strategy is a bit coarse; however, it's impact has
342 342 * yet to be tested. If this turns out to be an issue, it can be
343 343 * modified in a number of different ways.
344 344 */
345 345
346 346 rw_enter(&zf->zf_rwlock, RW_READER);
347 347 top:
348 348
349 349 for (zs = list_head(&zf->zf_stream); zs;
350 350 zs = list_next(&zf->zf_stream, zs)) {
351 351
352 352 /*
353 353 * XXX - should this be an assert?
354 354 */
355 355 if (zs->zst_len == 0) {
356 356 /* bogus stream */
357 357 ZFETCHSTAT_BUMP(zfetchstat_bogus_streams);
358 358 continue;
359 359 }
360 360
361 361 /*
362 362 * We hit this case when we are in a strided prefetch stream:
363 363 * we will read "len" blocks before "striding".
364 364 */
365 365 if (zh->zst_offset >= zs->zst_offset &&
366 366 zh->zst_offset < zs->zst_offset + zs->zst_len) {
367 367 if (prefetched) {
368 368 /* already fetched */
369 369 ZFETCHSTAT_BUMP(zfetchstat_stride_hits);
370 370 rc = 1;
371 371 goto out;
372 372 } else {
373 373 ZFETCHSTAT_BUMP(zfetchstat_stride_misses);
374 374 }
375 375 }
376 376
377 377 /*
378 378 * This is the forward sequential read case: we increment
379 379 * len by one each time we hit here, so we will enter this
380 380 * case on every read.
381 381 */
382 382 if (zh->zst_offset == zs->zst_offset + zs->zst_len) {
383 383
384 384 reset = !prefetched && zs->zst_len > 1;
385 385
386 386 mutex_enter(&zs->zst_lock);
387 387
388 388 if (zh->zst_offset != zs->zst_offset + zs->zst_len) {
389 389 mutex_exit(&zs->zst_lock);
390 390 goto top;
391 391 }
392 392 zs->zst_len += zh->zst_len;
393 393 diff = zs->zst_len - zfetch_block_cap;
394 394 if (diff > 0) {
395 395 zs->zst_offset += diff;
396 396 zs->zst_len = zs->zst_len > diff ?
397 397 zs->zst_len - diff : 0;
398 398 }
399 399 zs->zst_direction = ZFETCH_FORWARD;
400 400
401 401 break;
402 402
403 403 /*
404 404 * Same as above, but reading backwards through the file.
405 405 */
406 406 } else if (zh->zst_offset == zs->zst_offset - zh->zst_len) {
407 407 /* backwards sequential access */
408 408
409 409 reset = !prefetched && zs->zst_len > 1;
410 410
411 411 mutex_enter(&zs->zst_lock);
412 412
413 413 if (zh->zst_offset != zs->zst_offset - zh->zst_len) {
414 414 mutex_exit(&zs->zst_lock);
415 415 goto top;
416 416 }
417 417
418 418 zs->zst_offset = zs->zst_offset > zh->zst_len ?
419 419 zs->zst_offset - zh->zst_len : 0;
420 420 zs->zst_ph_offset = zs->zst_ph_offset > zh->zst_len ?
421 421 zs->zst_ph_offset - zh->zst_len : 0;
422 422 zs->zst_len += zh->zst_len;
423 423
424 424 diff = zs->zst_len - zfetch_block_cap;
425 425 if (diff > 0) {
426 426 zs->zst_ph_offset = zs->zst_ph_offset > diff ?
427 427 zs->zst_ph_offset - diff : 0;
428 428 zs->zst_len = zs->zst_len > diff ?
429 429 zs->zst_len - diff : zs->zst_len;
430 430 }
431 431 zs->zst_direction = ZFETCH_BACKWARD;
432 432
433 433 break;
434 434
435 435 } else if ((zh->zst_offset - zs->zst_offset - zs->zst_stride <
436 436 zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
437 437 /* strided forward access */
438 438
439 439 mutex_enter(&zs->zst_lock);
440 440
441 441 if ((zh->zst_offset - zs->zst_offset - zs->zst_stride >=
442 442 zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
443 443 mutex_exit(&zs->zst_lock);
444 444 goto top;
445 445 }
446 446
447 447 zs->zst_offset += zs->zst_stride;
448 448 zs->zst_direction = ZFETCH_FORWARD;
449 449
450 450 break;
451 451
452 452 } else if ((zh->zst_offset - zs->zst_offset + zs->zst_stride <
453 453 zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
454 454 /* strided reverse access */
455 455
456 456 mutex_enter(&zs->zst_lock);
457 457
458 458 if ((zh->zst_offset - zs->zst_offset + zs->zst_stride >=
459 459 zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
460 460 mutex_exit(&zs->zst_lock);
461 461 goto top;
462 462 }
463 463
464 464 zs->zst_offset = zs->zst_offset > zs->zst_stride ?
465 465 zs->zst_offset - zs->zst_stride : 0;
466 466 zs->zst_ph_offset = (zs->zst_ph_offset >
467 467 (2 * zs->zst_stride)) ?
468 468 (zs->zst_ph_offset - (2 * zs->zst_stride)) : 0;
469 469 zs->zst_direction = ZFETCH_BACKWARD;
470 470
471 471 break;
472 472 }
473 473 }
474 474
475 475 if (zs) {
476 476 if (reset) {
477 477 zstream_t *remove = zs;
478 478
479 479 ZFETCHSTAT_BUMP(zfetchstat_stream_resets);
480 480 rc = 0;
481 481 mutex_exit(&zs->zst_lock);
482 482 rw_exit(&zf->zf_rwlock);
483 483 rw_enter(&zf->zf_rwlock, RW_WRITER);
484 484 /*
485 485 * Relocate the stream, in case someone removes
486 486 * it while we were acquiring the WRITER lock.
487 487 */
488 488 for (zs = list_head(&zf->zf_stream); zs;
489 489 zs = list_next(&zf->zf_stream, zs)) {
490 490 if (zs == remove) {
491 491 dmu_zfetch_stream_remove(zf, zs);
492 492 mutex_destroy(&zs->zst_lock);
493 493 kmem_free(zs, sizeof (zstream_t));
494 494 break;
495 495 }
496 496 }
497 497 } else {
498 498 ZFETCHSTAT_BUMP(zfetchstat_stream_noresets);
499 499 rc = 1;
500 500 dmu_zfetch_dofetch(zf, zs);
501 501 mutex_exit(&zs->zst_lock);
502 502 }
503 503 }
504 504 out:
505 505 rw_exit(&zf->zf_rwlock);
506 506 return (rc);
507 507 }
508 508
509 509 /*
510 510 * Clean-up state associated with a zfetch structure. This frees allocated
511 511 * structure members, empties the zf_stream tree, and generally makes things
512 512 * nice. This doesn't free the zfetch_t itself, that's left to the caller.
513 513 */
514 514 void
515 515 dmu_zfetch_rele(zfetch_t *zf)
516 516 {
517 517 zstream_t *zs;
518 518 zstream_t *zs_next;
519 519
520 520 ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
521 521
522 522 for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
523 523 zs_next = list_next(&zf->zf_stream, zs);
524 524
525 525 list_remove(&zf->zf_stream, zs);
526 526 mutex_destroy(&zs->zst_lock);
527 527 kmem_free(zs, sizeof (zstream_t));
528 528 }
529 529 list_destroy(&zf->zf_stream);
530 530 rw_destroy(&zf->zf_rwlock);
531 531
532 532 zf->zf_dnode = NULL;
533 533 }
534 534
535 535 /*
536 536 * Given a zfetch and zstream structure, insert the zstream structure into the
537 537 * AVL tree contained within the zfetch structure. Peform the appropriate
538 538 * book-keeping. It is possible that another thread has inserted a stream which
539 539 * matches one that we are about to insert, so we must be sure to check for this
540 540 * case. If one is found, return failure, and let the caller cleanup the
541 541 * duplicates.
542 542 */
543 543 static int
544 544 dmu_zfetch_stream_insert(zfetch_t *zf, zstream_t *zs)
545 545 {
546 546 zstream_t *zs_walk;
547 547 zstream_t *zs_next;
548 548
549 549 ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
550 550
551 551 for (zs_walk = list_head(&zf->zf_stream); zs_walk; zs_walk = zs_next) {
552 552 zs_next = list_next(&zf->zf_stream, zs_walk);
553 553
554 554 if (dmu_zfetch_streams_equal(zs_walk, zs)) {
555 555 return (0);
556 556 }
557 557 }
558 558
559 559 list_insert_head(&zf->zf_stream, zs);
560 560 zf->zf_stream_cnt++;
561 561 return (1);
562 562 }
563 563
564 564
565 565 /*
566 566 * Walk the list of zstreams in the given zfetch, find an old one (by time), and
567 567 * reclaim it for use by the caller.
568 568 */
569 569 static zstream_t *
570 570 dmu_zfetch_stream_reclaim(zfetch_t *zf)
571 571 {
572 572 zstream_t *zs;
573 573
574 574 if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
575 575 return (0);
576 576
577 577 for (zs = list_head(&zf->zf_stream); zs;
578 578 zs = list_next(&zf->zf_stream, zs)) {
579 579
580 580 if (((ddi_get_lbolt() - zs->zst_last)/hz) > zfetch_min_sec_reap)
581 581 break;
582 582 }
583 583
584 584 if (zs) {
585 585 dmu_zfetch_stream_remove(zf, zs);
586 586 mutex_destroy(&zs->zst_lock);
587 587 bzero(zs, sizeof (zstream_t));
588 588 } else {
589 589 zf->zf_alloc_fail++;
590 590 }
591 591 rw_exit(&zf->zf_rwlock);
592 592
593 593 return (zs);
594 594 }
595 595
596 596 /*
597 597 * Given a zfetch and zstream structure, remove the zstream structure from its
598 598 * container in the zfetch structure. Perform the appropriate book-keeping.
599 599 */
600 600 static void
601 601 dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
602 602 {
603 603 ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
604 604
605 605 list_remove(&zf->zf_stream, zs);
606 606 zf->zf_stream_cnt--;
607 607 }
608 608
609 609 static int
610 610 dmu_zfetch_streams_equal(zstream_t *zs1, zstream_t *zs2)
611 611 {
612 612 if (zs1->zst_offset != zs2->zst_offset)
613 613 return (0);
614 614
615 615 if (zs1->zst_len != zs2->zst_len)
616 616 return (0);
617 617
618 618 if (zs1->zst_stride != zs2->zst_stride)
619 619 return (0);
620 620
621 621 if (zs1->zst_ph_offset != zs2->zst_ph_offset)
622 622 return (0);
623 623
624 624 if (zs1->zst_cap != zs2->zst_cap)
625 625 return (0);
626 626
627 627 if (zs1->zst_direction != zs2->zst_direction)
628 628 return (0);
629 629
630 630 return (1);
631 631 }
↓ open down ↓ |
292 lines elided |
↑ open up ↑ |
632 632
633 633 /*
634 634 * This is the prefetch entry point. It calls all of the other dmu_zfetch
635 635 * routines to create, delete, find, or operate upon prefetch streams.
636 636 */
637 637 void
638 638 dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
639 639 {
640 640 zstream_t zst;
641 641 zstream_t *newstream;
642 - int fetched;
642 + boolean_t fetched;
643 643 int inserted;
644 644 unsigned int blkshft;
645 645 uint64_t blksz;
646 646
647 647 if (zfs_prefetch_disable)
648 648 return;
649 649
650 650 /* files that aren't ln2 blocksz are only one block -- nothing to do */
651 651 if (!zf->zf_dnode->dn_datablkshift)
652 652 return;
653 653
654 654 /* convert offset and size, into blockid and nblocks */
655 655 blkshft = zf->zf_dnode->dn_datablkshift;
656 656 blksz = (1 << blkshft);
657 657
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
658 658 bzero(&zst, sizeof (zstream_t));
659 659 zst.zst_offset = offset >> blkshft;
660 660 zst.zst_len = (P2ROUNDUP(offset + size, blksz) -
661 661 P2ALIGN(offset, blksz)) >> blkshft;
662 662
663 663 fetched = dmu_zfetch_find(zf, &zst, prefetched);
664 664 if (fetched) {
665 665 ZFETCHSTAT_BUMP(zfetchstat_hits);
666 666 } else {
667 667 ZFETCHSTAT_BUMP(zfetchstat_misses);
668 - if (fetched = dmu_zfetch_colinear(zf, &zst)) {
668 + fetched = dmu_zfetch_colinear(zf, &zst);
669 + if (fetched) {
669 670 ZFETCHSTAT_BUMP(zfetchstat_colinear_hits);
670 671 } else {
671 672 ZFETCHSTAT_BUMP(zfetchstat_colinear_misses);
672 673 }
673 674 }
674 675
675 676 if (!fetched) {
676 677 newstream = dmu_zfetch_stream_reclaim(zf);
677 678
678 679 /*
679 680 * we still couldn't find a stream, drop the lock, and allocate
680 681 * one if possible. Otherwise, give up and go home.
681 682 */
682 683 if (newstream) {
683 684 ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes);
684 685 } else {
685 686 uint64_t maxblocks;
686 687 uint32_t max_streams;
687 688 uint32_t cur_streams;
688 689
689 690 ZFETCHSTAT_BUMP(zfetchstat_reclaim_failures);
690 691 cur_streams = zf->zf_stream_cnt;
691 692 maxblocks = zf->zf_dnode->dn_maxblkid;
692 693
693 694 max_streams = MIN(zfetch_max_streams,
694 695 (maxblocks / zfetch_block_cap));
695 696 if (max_streams == 0) {
696 697 max_streams++;
697 698 }
698 699
699 700 if (cur_streams >= max_streams) {
700 701 return;
701 702 }
702 703 newstream = kmem_zalloc(sizeof (zstream_t), KM_SLEEP);
703 704 }
704 705
705 706 newstream->zst_offset = zst.zst_offset;
706 707 newstream->zst_len = zst.zst_len;
707 708 newstream->zst_stride = zst.zst_len;
708 709 newstream->zst_ph_offset = zst.zst_len + zst.zst_offset;
709 710 newstream->zst_cap = zst.zst_len;
710 711 newstream->zst_direction = ZFETCH_FORWARD;
711 712 newstream->zst_last = ddi_get_lbolt();
712 713
713 714 mutex_init(&newstream->zst_lock, NULL, MUTEX_DEFAULT, NULL);
714 715
715 716 rw_enter(&zf->zf_rwlock, RW_WRITER);
716 717 inserted = dmu_zfetch_stream_insert(zf, newstream);
717 718 rw_exit(&zf->zf_rwlock);
718 719
719 720 if (!inserted) {
720 721 mutex_destroy(&newstream->zst_lock);
721 722 kmem_free(newstream, sizeof (zstream_t));
722 723 }
723 724 }
724 725 }
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX