1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2013 by Delphix. All rights reserved.
  28  */
  29 
  30 #include <sys/zfs_context.h>
  31 #include <sys/dnode.h>
  32 #include <sys/dmu_objset.h>
  33 #include <sys/dmu_zfetch.h>
  34 #include <sys/dmu.h>
  35 #include <sys/dbuf.h>
  36 #include <sys/kstat.h>
  37 
  38 /*
  39  * I'm against tune-ables, but these should probably exist as tweakable globals
  40  * until we can get this working the way we want it to.
  41  */
  42 
  43 int zfs_prefetch_disable = 0;
  44 
  45 /* max # of streams per zfetch */
  46 uint32_t        zfetch_max_streams = 8;
  47 /* min time before stream reclaim */
  48 uint32_t        zfetch_min_sec_reap = 2;
  49 /* max number of blocks to fetch at a time */
  50 uint32_t        zfetch_block_cap = 256;
  51 /* number of bytes in a array_read at which we stop prefetching (1Mb) */
  52 uint64_t        zfetch_array_rd_sz = 1024 * 1024;
  53 
  54 /* forward decls for static routines */
  55 static boolean_t        dmu_zfetch_colinear(zfetch_t *, zstream_t *);
  56 static void             dmu_zfetch_dofetch(zfetch_t *, zstream_t *);
  57 static uint64_t         dmu_zfetch_fetch(dnode_t *, uint64_t, uint64_t);
  58 static uint64_t         dmu_zfetch_fetchsz(dnode_t *, uint64_t, uint64_t);
  59 static boolean_t        dmu_zfetch_find(zfetch_t *, zstream_t *, int);
  60 static int              dmu_zfetch_stream_insert(zfetch_t *, zstream_t *);
  61 static zstream_t        *dmu_zfetch_stream_reclaim(zfetch_t *);
  62 static void             dmu_zfetch_stream_remove(zfetch_t *, zstream_t *);
  63 static int              dmu_zfetch_streams_equal(zstream_t *, zstream_t *);
  64 
  65 typedef struct zfetch_stats {
  66         kstat_named_t zfetchstat_hits;
  67         kstat_named_t zfetchstat_misses;
  68         kstat_named_t zfetchstat_colinear_hits;
  69         kstat_named_t zfetchstat_colinear_misses;
  70         kstat_named_t zfetchstat_stride_hits;
  71         kstat_named_t zfetchstat_stride_misses;
  72         kstat_named_t zfetchstat_reclaim_successes;
  73         kstat_named_t zfetchstat_reclaim_failures;
  74         kstat_named_t zfetchstat_stream_resets;
  75         kstat_named_t zfetchstat_stream_noresets;
  76         kstat_named_t zfetchstat_bogus_streams;
  77 } zfetch_stats_t;
  78 
  79 static zfetch_stats_t zfetch_stats = {
  80         { "hits",                       KSTAT_DATA_UINT64 },
  81         { "misses",                     KSTAT_DATA_UINT64 },
  82         { "colinear_hits",              KSTAT_DATA_UINT64 },
  83         { "colinear_misses",            KSTAT_DATA_UINT64 },
  84         { "stride_hits",                KSTAT_DATA_UINT64 },
  85         { "stride_misses",              KSTAT_DATA_UINT64 },
  86         { "reclaim_successes",          KSTAT_DATA_UINT64 },
  87         { "reclaim_failures",           KSTAT_DATA_UINT64 },
  88         { "streams_resets",             KSTAT_DATA_UINT64 },
  89         { "streams_noresets",           KSTAT_DATA_UINT64 },
  90         { "bogus_streams",              KSTAT_DATA_UINT64 },
  91 };
  92 
  93 #define ZFETCHSTAT_INCR(stat, val) \
  94         atomic_add_64(&zfetch_stats.stat.value.ui64, (val));
  95 
  96 #define ZFETCHSTAT_BUMP(stat)           ZFETCHSTAT_INCR(stat, 1);
  97 
  98 kstat_t         *zfetch_ksp;
  99 
 100 /*
 101  * Given a zfetch structure and a zstream structure, determine whether the
 102  * blocks to be read are part of a co-linear pair of existing prefetch
 103  * streams.  If a set is found, coalesce the streams, removing one, and
 104  * configure the prefetch so it looks for a strided access pattern.
 105  *
 106  * In other words: if we find two sequential access streams that are
 107  * the same length and distance N appart, and this read is N from the
 108  * last stream, then we are probably in a strided access pattern.  So
 109  * combine the two sequential streams into a single strided stream.
 110  *
 111  * Returns whether co-linear streams were found.
 112  */
 113 static boolean_t
 114 dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh)
 115 {
 116         zstream_t       *z_walk;
 117         zstream_t       *z_comp;
 118 
 119         if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
 120                 return (0);
 121 
 122         if (zh == NULL) {
 123                 rw_exit(&zf->zf_rwlock);
 124                 return (0);
 125         }
 126 
 127         for (z_walk = list_head(&zf->zf_stream); z_walk;
 128             z_walk = list_next(&zf->zf_stream, z_walk)) {
 129                 for (z_comp = list_next(&zf->zf_stream, z_walk); z_comp;
 130                     z_comp = list_next(&zf->zf_stream, z_comp)) {
 131                         int64_t         diff;
 132 
 133                         if (z_walk->zst_len != z_walk->zst_stride ||
 134                             z_comp->zst_len != z_comp->zst_stride) {
 135                                 continue;
 136                         }
 137 
 138                         diff = z_comp->zst_offset - z_walk->zst_offset;
 139                         if (z_comp->zst_offset + diff == zh->zst_offset) {
 140                                 z_walk->zst_offset = zh->zst_offset;
 141                                 z_walk->zst_direction = diff < 0 ? -1 : 1;
 142                                 z_walk->zst_stride =
 143                                     diff * z_walk->zst_direction;
 144                                 z_walk->zst_ph_offset =
 145                                     zh->zst_offset + z_walk->zst_stride;
 146                                 dmu_zfetch_stream_remove(zf, z_comp);
 147                                 mutex_destroy(&z_comp->zst_lock);
 148                                 kmem_free(z_comp, sizeof (zstream_t));
 149 
 150                                 dmu_zfetch_dofetch(zf, z_walk);
 151 
 152                                 rw_exit(&zf->zf_rwlock);
 153                                 return (1);
 154                         }
 155 
 156                         diff = z_walk->zst_offset - z_comp->zst_offset;
 157                         if (z_walk->zst_offset + diff == zh->zst_offset) {
 158                                 z_walk->zst_offset = zh->zst_offset;
 159                                 z_walk->zst_direction = diff < 0 ? -1 : 1;
 160                                 z_walk->zst_stride =
 161                                     diff * z_walk->zst_direction;
 162                                 z_walk->zst_ph_offset =
 163                                     zh->zst_offset + z_walk->zst_stride;
 164                                 dmu_zfetch_stream_remove(zf, z_comp);
 165                                 mutex_destroy(&z_comp->zst_lock);
 166                                 kmem_free(z_comp, sizeof (zstream_t));
 167 
 168                                 dmu_zfetch_dofetch(zf, z_walk);
 169 
 170                                 rw_exit(&zf->zf_rwlock);
 171                                 return (1);
 172                         }
 173                 }
 174         }
 175 
 176         rw_exit(&zf->zf_rwlock);
 177         return (0);
 178 }
 179 
 180 /*
 181  * Given a zstream_t, determine the bounds of the prefetch.  Then call the
 182  * routine that actually prefetches the individual blocks.
 183  */
 184 static void
 185 dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs)
 186 {
 187         uint64_t        prefetch_tail;
 188         uint64_t        prefetch_limit;
 189         uint64_t        prefetch_ofst;
 190         uint64_t        prefetch_len;
 191         uint64_t        blocks_fetched;
 192 
 193         zs->zst_stride = MAX((int64_t)zs->zst_stride, zs->zst_len);
 194         zs->zst_cap = MIN(zfetch_block_cap, 2 * zs->zst_cap);
 195 
 196         prefetch_tail = MAX((int64_t)zs->zst_ph_offset,
 197             (int64_t)(zs->zst_offset + zs->zst_stride));
 198         /*
 199          * XXX: use a faster division method?
 200          */
 201         prefetch_limit = zs->zst_offset + zs->zst_len +
 202             (zs->zst_cap * zs->zst_stride) / zs->zst_len;
 203 
 204         while (prefetch_tail < prefetch_limit) {
 205                 prefetch_ofst = zs->zst_offset + zs->zst_direction *
 206                     (prefetch_tail - zs->zst_offset);
 207 
 208                 prefetch_len = zs->zst_len;
 209 
 210                 /*
 211                  * Don't prefetch beyond the end of the file, if working
 212                  * backwards.
 213                  */
 214                 if ((zs->zst_direction == ZFETCH_BACKWARD) &&
 215                     (prefetch_ofst > prefetch_tail)) {
 216                         prefetch_len += prefetch_ofst;
 217                         prefetch_ofst = 0;
 218                 }
 219 
 220                 /* don't prefetch more than we're supposed to */
 221                 if (prefetch_len > zs->zst_len)
 222                         break;
 223 
 224                 blocks_fetched = dmu_zfetch_fetch(zf->zf_dnode,
 225                     prefetch_ofst, zs->zst_len);
 226 
 227                 prefetch_tail += zs->zst_stride;
 228                 /* stop if we've run out of stuff to prefetch */
 229                 if (blocks_fetched < zs->zst_len)
 230                         break;
 231         }
 232         zs->zst_ph_offset = prefetch_tail;
 233         zs->zst_last = ddi_get_lbolt();
 234 }
 235 
 236 void
 237 zfetch_init(void)
 238 {
 239 
 240         zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
 241             KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
 242             KSTAT_FLAG_VIRTUAL);
 243 
 244         if (zfetch_ksp != NULL) {
 245                 zfetch_ksp->ks_data = &zfetch_stats;
 246                 kstat_install(zfetch_ksp);
 247         }
 248 }
 249 
 250 void
 251 zfetch_fini(void)
 252 {
 253         if (zfetch_ksp != NULL) {
 254                 kstat_delete(zfetch_ksp);
 255                 zfetch_ksp = NULL;
 256         }
 257 }
 258 
 259 /*
 260  * This takes a pointer to a zfetch structure and a dnode.  It performs the
 261  * necessary setup for the zfetch structure, grokking data from the
 262  * associated dnode.
 263  */
 264 void
 265 dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
 266 {
 267         if (zf == NULL) {
 268                 return;
 269         }
 270 
 271         zf->zf_dnode = dno;
 272         zf->zf_stream_cnt = 0;
 273         zf->zf_alloc_fail = 0;
 274 
 275         list_create(&zf->zf_stream, sizeof (zstream_t),
 276             offsetof(zstream_t, zst_node));
 277 
 278         rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
 279 }
 280 
 281 /*
 282  * This function computes the actual size, in blocks, that can be prefetched,
 283  * and fetches it.
 284  */
 285 static uint64_t
 286 dmu_zfetch_fetch(dnode_t *dn, uint64_t blkid, uint64_t nblks)
 287 {
 288         uint64_t        fetchsz;
 289         uint64_t        i;
 290 
 291         fetchsz = dmu_zfetch_fetchsz(dn, blkid, nblks);
 292 
 293         for (i = 0; i < fetchsz; i++) {
 294                 dbuf_prefetch(dn, blkid + i, ZIO_PRIORITY_ASYNC_READ);
 295         }
 296 
 297         return (fetchsz);
 298 }
 299 
 300 /*
 301  * this function returns the number of blocks that would be prefetched, based
 302  * upon the supplied dnode, blockid, and nblks.  This is used so that we can
 303  * update streams in place, and then prefetch with their old value after the
 304  * fact.  This way, we can delay the prefetch, but subsequent accesses to the
 305  * stream won't result in the same data being prefetched multiple times.
 306  */
 307 static uint64_t
 308 dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks)
 309 {
 310         uint64_t        fetchsz;
 311 
 312         if (blkid > dn->dn_maxblkid) {
 313                 return (0);
 314         }
 315 
 316         /* compute fetch size */
 317         if (blkid + nblks + 1 > dn->dn_maxblkid) {
 318                 fetchsz = (dn->dn_maxblkid - blkid) + 1;
 319                 ASSERT(blkid + fetchsz - 1 <= dn->dn_maxblkid);
 320         } else {
 321                 fetchsz = nblks;
 322         }
 323 
 324 
 325         return (fetchsz);
 326 }
 327 
 328 /*
 329  * given a zfetch and a zstream structure, see if there is an associated zstream
 330  * for this block read.  If so, it starts a prefetch for the stream it
 331  * located and returns true, otherwise it returns false
 332  */
 333 static boolean_t
 334 dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched)
 335 {
 336         zstream_t       *zs;
 337         int64_t         diff;
 338         int             reset = !prefetched;
 339         int             rc = 0;
 340 
 341         if (zh == NULL)
 342                 return (0);
 343 
 344         /*
 345          * XXX: This locking strategy is a bit coarse; however, it's impact has
 346          * yet to be tested.  If this turns out to be an issue, it can be
 347          * modified in a number of different ways.
 348          */
 349 
 350         rw_enter(&zf->zf_rwlock, RW_READER);
 351 top:
 352 
 353         for (zs = list_head(&zf->zf_stream); zs;
 354             zs = list_next(&zf->zf_stream, zs)) {
 355 
 356                 /*
 357                  * XXX - should this be an assert?
 358                  */
 359                 if (zs->zst_len == 0) {
 360                         /* bogus stream */
 361                         ZFETCHSTAT_BUMP(zfetchstat_bogus_streams);
 362                         continue;
 363                 }
 364 
 365                 /*
 366                  * We hit this case when we are in a strided prefetch stream:
 367                  * we will read "len" blocks before "striding".
 368                  */
 369                 if (zh->zst_offset >= zs->zst_offset &&
 370                     zh->zst_offset < zs->zst_offset + zs->zst_len) {
 371                         if (prefetched) {
 372                                 /* already fetched */
 373                                 ZFETCHSTAT_BUMP(zfetchstat_stride_hits);
 374                                 rc = 1;
 375                                 goto out;
 376                         } else {
 377                                 ZFETCHSTAT_BUMP(zfetchstat_stride_misses);
 378                         }
 379                 }
 380 
 381                 /*
 382                  * This is the forward sequential read case: we increment
 383                  * len by one each time we hit here, so we will enter this
 384                  * case on every read.
 385                  */
 386                 if (zh->zst_offset == zs->zst_offset + zs->zst_len) {
 387 
 388                         reset = !prefetched && zs->zst_len > 1;
 389 
 390                         mutex_enter(&zs->zst_lock);
 391 
 392                         if (zh->zst_offset != zs->zst_offset + zs->zst_len) {
 393                                 mutex_exit(&zs->zst_lock);
 394                                 goto top;
 395                         }
 396                         zs->zst_len += zh->zst_len;
 397                         diff = zs->zst_len - zfetch_block_cap;
 398                         if (diff > 0) {
 399                                 zs->zst_offset += diff;
 400                                 zs->zst_len = zs->zst_len > diff ?
 401                                     zs->zst_len - diff : 0;
 402                         }
 403                         zs->zst_direction = ZFETCH_FORWARD;
 404 
 405                         break;
 406 
 407                 /*
 408                  * Same as above, but reading backwards through the file.
 409                  */
 410                 } else if (zh->zst_offset == zs->zst_offset - zh->zst_len) {
 411                         /* backwards sequential access */
 412 
 413                         reset = !prefetched && zs->zst_len > 1;
 414 
 415                         mutex_enter(&zs->zst_lock);
 416 
 417                         if (zh->zst_offset != zs->zst_offset - zh->zst_len) {
 418                                 mutex_exit(&zs->zst_lock);
 419                                 goto top;
 420                         }
 421 
 422                         zs->zst_offset = zs->zst_offset > zh->zst_len ?
 423                             zs->zst_offset - zh->zst_len : 0;
 424                         zs->zst_ph_offset = zs->zst_ph_offset > zh->zst_len ?
 425                             zs->zst_ph_offset - zh->zst_len : 0;
 426                         zs->zst_len += zh->zst_len;
 427 
 428                         diff = zs->zst_len - zfetch_block_cap;
 429                         if (diff > 0) {
 430                                 zs->zst_ph_offset = zs->zst_ph_offset > diff ?
 431                                     zs->zst_ph_offset - diff : 0;
 432                                 zs->zst_len = zs->zst_len > diff ?
 433                                     zs->zst_len - diff : zs->zst_len;
 434                         }
 435                         zs->zst_direction = ZFETCH_BACKWARD;
 436 
 437                         break;
 438 
 439                 } else if ((zh->zst_offset - zs->zst_offset - zs->zst_stride <
 440                     zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
 441                         /* strided forward access */
 442 
 443                         mutex_enter(&zs->zst_lock);
 444 
 445                         if ((zh->zst_offset - zs->zst_offset - zs->zst_stride >=
 446                             zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
 447                                 mutex_exit(&zs->zst_lock);
 448                                 goto top;
 449                         }
 450 
 451                         zs->zst_offset += zs->zst_stride;
 452                         zs->zst_direction = ZFETCH_FORWARD;
 453 
 454                         break;
 455 
 456                 } else if ((zh->zst_offset - zs->zst_offset + zs->zst_stride <
 457                     zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
 458                         /* strided reverse access */
 459 
 460                         mutex_enter(&zs->zst_lock);
 461 
 462                         if ((zh->zst_offset - zs->zst_offset + zs->zst_stride >=
 463                             zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
 464                                 mutex_exit(&zs->zst_lock);
 465                                 goto top;
 466                         }
 467 
 468                         zs->zst_offset = zs->zst_offset > zs->zst_stride ?
 469                             zs->zst_offset - zs->zst_stride : 0;
 470                         zs->zst_ph_offset = (zs->zst_ph_offset >
 471                             (2 * zs->zst_stride)) ?
 472                             (zs->zst_ph_offset - (2 * zs->zst_stride)) : 0;
 473                         zs->zst_direction = ZFETCH_BACKWARD;
 474 
 475                         break;
 476                 }
 477         }
 478 
 479         if (zs) {
 480                 if (reset) {
 481                         zstream_t *remove = zs;
 482 
 483                         ZFETCHSTAT_BUMP(zfetchstat_stream_resets);
 484                         rc = 0;
 485                         mutex_exit(&zs->zst_lock);
 486                         rw_exit(&zf->zf_rwlock);
 487                         rw_enter(&zf->zf_rwlock, RW_WRITER);
 488                         /*
 489                          * Relocate the stream, in case someone removes
 490                          * it while we were acquiring the WRITER lock.
 491                          */
 492                         for (zs = list_head(&zf->zf_stream); zs;
 493                             zs = list_next(&zf->zf_stream, zs)) {
 494                                 if (zs == remove) {
 495                                         dmu_zfetch_stream_remove(zf, zs);
 496                                         mutex_destroy(&zs->zst_lock);
 497                                         kmem_free(zs, sizeof (zstream_t));
 498                                         break;
 499                                 }
 500                         }
 501                 } else {
 502                         ZFETCHSTAT_BUMP(zfetchstat_stream_noresets);
 503                         rc = 1;
 504                         dmu_zfetch_dofetch(zf, zs);
 505                         mutex_exit(&zs->zst_lock);
 506                 }
 507         }
 508 out:
 509         rw_exit(&zf->zf_rwlock);
 510         return (rc);
 511 }
 512 
 513 /*
 514  * Clean-up state associated with a zfetch structure.  This frees allocated
 515  * structure members, empties the zf_stream tree, and generally makes things
 516  * nice.  This doesn't free the zfetch_t itself, that's left to the caller.
 517  */
 518 void
 519 dmu_zfetch_rele(zfetch_t *zf)
 520 {
 521         zstream_t       *zs;
 522         zstream_t       *zs_next;
 523 
 524         ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
 525 
 526         for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
 527                 zs_next = list_next(&zf->zf_stream, zs);
 528 
 529                 list_remove(&zf->zf_stream, zs);
 530                 mutex_destroy(&zs->zst_lock);
 531                 kmem_free(zs, sizeof (zstream_t));
 532         }
 533         list_destroy(&zf->zf_stream);
 534         rw_destroy(&zf->zf_rwlock);
 535 
 536         zf->zf_dnode = NULL;
 537 }
 538 
 539 /*
 540  * Given a zfetch and zstream structure, insert the zstream structure into the
 541  * AVL tree contained within the zfetch structure.  Peform the appropriate
 542  * book-keeping.  It is possible that another thread has inserted a stream which
 543  * matches one that we are about to insert, so we must be sure to check for this
 544  * case.  If one is found, return failure, and let the caller cleanup the
 545  * duplicates.
 546  */
 547 static int
 548 dmu_zfetch_stream_insert(zfetch_t *zf, zstream_t *zs)
 549 {
 550         zstream_t       *zs_walk;
 551         zstream_t       *zs_next;
 552 
 553         ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
 554 
 555         for (zs_walk = list_head(&zf->zf_stream); zs_walk; zs_walk = zs_next) {
 556                 zs_next = list_next(&zf->zf_stream, zs_walk);
 557 
 558                 if (dmu_zfetch_streams_equal(zs_walk, zs)) {
 559                         return (0);
 560                 }
 561         }
 562 
 563         list_insert_head(&zf->zf_stream, zs);
 564         zf->zf_stream_cnt++;
 565         return (1);
 566 }
 567 
 568 
 569 /*
 570  * Walk the list of zstreams in the given zfetch, find an old one (by time), and
 571  * reclaim it for use by the caller.
 572  */
 573 static zstream_t *
 574 dmu_zfetch_stream_reclaim(zfetch_t *zf)
 575 {
 576         zstream_t       *zs;
 577 
 578         if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
 579                 return (0);
 580 
 581         for (zs = list_head(&zf->zf_stream); zs;
 582             zs = list_next(&zf->zf_stream, zs)) {
 583 
 584                 if (((ddi_get_lbolt() - zs->zst_last)/hz) > zfetch_min_sec_reap)
 585                         break;
 586         }
 587 
 588         if (zs) {
 589                 dmu_zfetch_stream_remove(zf, zs);
 590                 mutex_destroy(&zs->zst_lock);
 591                 bzero(zs, sizeof (zstream_t));
 592         } else {
 593                 zf->zf_alloc_fail++;
 594         }
 595         rw_exit(&zf->zf_rwlock);
 596 
 597         return (zs);
 598 }
 599 
 600 /*
 601  * Given a zfetch and zstream structure, remove the zstream structure from its
 602  * container in the zfetch structure.  Perform the appropriate book-keeping.
 603  */
 604 static void
 605 dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
 606 {
 607         ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
 608 
 609         list_remove(&zf->zf_stream, zs);
 610         zf->zf_stream_cnt--;
 611 }
 612 
 613 static int
 614 dmu_zfetch_streams_equal(zstream_t *zs1, zstream_t *zs2)
 615 {
 616         if (zs1->zst_offset != zs2->zst_offset)
 617                 return (0);
 618 
 619         if (zs1->zst_len != zs2->zst_len)
 620                 return (0);
 621 
 622         if (zs1->zst_stride != zs2->zst_stride)
 623                 return (0);
 624 
 625         if (zs1->zst_ph_offset != zs2->zst_ph_offset)
 626                 return (0);
 627 
 628         if (zs1->zst_cap != zs2->zst_cap)
 629                 return (0);
 630 
 631         if (zs1->zst_direction != zs2->zst_direction)
 632                 return (0);
 633 
 634         return (1);
 635 }
 636 
 637 /*
 638  * This is the prefetch entry point.  It calls all of the other dmu_zfetch
 639  * routines to create, delete, find, or operate upon prefetch streams.
 640  */
 641 void
 642 dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
 643 {
 644         zstream_t       zst;
 645         zstream_t       *newstream;
 646         boolean_t       fetched;
 647         int             inserted;
 648         unsigned int    blkshft;
 649         uint64_t        blksz;
 650 
 651         if (zfs_prefetch_disable)
 652                 return;
 653 
 654         /* files that aren't ln2 blocksz are only one block -- nothing to do */
 655         if (!zf->zf_dnode->dn_datablkshift)
 656                 return;
 657 
 658         /* convert offset and size, into blockid and nblocks */
 659         blkshft = zf->zf_dnode->dn_datablkshift;
 660         blksz = (1 << blkshft);
 661 
 662         bzero(&zst, sizeof (zstream_t));
 663         zst.zst_offset = offset >> blkshft;
 664         zst.zst_len = (P2ROUNDUP(offset + size, blksz) -
 665             P2ALIGN(offset, blksz)) >> blkshft;
 666 
 667         fetched = dmu_zfetch_find(zf, &zst, prefetched);
 668         if (fetched) {
 669                 ZFETCHSTAT_BUMP(zfetchstat_hits);
 670         } else {
 671                 ZFETCHSTAT_BUMP(zfetchstat_misses);
 672                 fetched = dmu_zfetch_colinear(zf, &zst);
 673                 if (fetched) {
 674                         ZFETCHSTAT_BUMP(zfetchstat_colinear_hits);
 675                 } else {
 676                         ZFETCHSTAT_BUMP(zfetchstat_colinear_misses);
 677                 }
 678         }
 679 
 680         if (!fetched) {
 681                 newstream = dmu_zfetch_stream_reclaim(zf);
 682 
 683                 /*
 684                  * we still couldn't find a stream, drop the lock, and allocate
 685                  * one if possible.  Otherwise, give up and go home.
 686                  */
 687                 if (newstream) {
 688                         ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes);
 689                 } else {
 690                         uint64_t        maxblocks;
 691                         uint32_t        max_streams;
 692                         uint32_t        cur_streams;
 693 
 694                         ZFETCHSTAT_BUMP(zfetchstat_reclaim_failures);
 695                         cur_streams = zf->zf_stream_cnt;
 696                         maxblocks = zf->zf_dnode->dn_maxblkid;
 697 
 698                         max_streams = MIN(zfetch_max_streams,
 699                             (maxblocks / zfetch_block_cap));
 700                         if (max_streams == 0) {
 701                                 max_streams++;
 702                         }
 703 
 704                         if (cur_streams >= max_streams) {
 705                                 return;
 706                         }
 707                         newstream = kmem_zalloc(sizeof (zstream_t), KM_SLEEP);
 708                 }
 709 
 710                 newstream->zst_offset = zst.zst_offset;
 711                 newstream->zst_len = zst.zst_len;
 712                 newstream->zst_stride = zst.zst_len;
 713                 newstream->zst_ph_offset = zst.zst_len + zst.zst_offset;
 714                 newstream->zst_cap = zst.zst_len;
 715                 newstream->zst_direction = ZFETCH_FORWARD;
 716                 newstream->zst_last = ddi_get_lbolt();
 717 
 718                 mutex_init(&newstream->zst_lock, NULL, MUTEX_DEFAULT, NULL);
 719 
 720                 rw_enter(&zf->zf_rwlock, RW_WRITER);
 721                 inserted = dmu_zfetch_stream_insert(zf, newstream);
 722                 rw_exit(&zf->zf_rwlock);
 723 
 724                 if (!inserted) {
 725                         mutex_destroy(&newstream->zst_lock);
 726                         kmem_free(newstream, sizeof (zstream_t));
 727                 }
 728         }
 729 }