1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/zfs_context.h>
  27 #include <sys/dnode.h>
  28 #include <sys/dmu_objset.h>
  29 #include <sys/dmu_zfetch.h>
  30 #include <sys/dmu.h>
  31 #include <sys/dbuf.h>
  32 #include <sys/kstat.h>
  33 
  34 /*
  35  * I'm against tune-ables, but these should probably exist as tweakable globals
  36  * until we can get this working the way we want it to.
  37  */
  38 
  39 int zfs_prefetch_disable = 0;
  40 
  41 /* max # of streams per zfetch */
  42 uint32_t        zfetch_max_streams = 8;
  43 /* min time before stream reclaim */
  44 uint32_t        zfetch_min_sec_reap = 2;
  45 /* max number of blocks to fetch at a time */
  46 uint32_t        zfetch_block_cap = 256;
  47 /* number of bytes in a array_read at which we stop prefetching (1Mb) */
  48 uint64_t        zfetch_array_rd_sz = 1024 * 1024;
  49 
  50 /* forward decls for static routines */
  51 static int              dmu_zfetch_colinear(zfetch_t *, zstream_t *);
  52 static void             dmu_zfetch_dofetch(zfetch_t *, zstream_t *);
  53 static uint64_t         dmu_zfetch_fetch(dnode_t *, uint64_t, uint64_t);
  54 static uint64_t         dmu_zfetch_fetchsz(dnode_t *, uint64_t, uint64_t);
  55 static int              dmu_zfetch_find(zfetch_t *, zstream_t *, int);
  56 static int              dmu_zfetch_stream_insert(zfetch_t *, zstream_t *);
  57 static zstream_t        *dmu_zfetch_stream_reclaim(zfetch_t *);
  58 static void             dmu_zfetch_stream_remove(zfetch_t *, zstream_t *);
  59 static int              dmu_zfetch_streams_equal(zstream_t *, zstream_t *);
  60 
  61 typedef struct zfetch_stats {
  62         kstat_named_t zfetchstat_hits;
  63         kstat_named_t zfetchstat_misses;
  64         kstat_named_t zfetchstat_colinear_hits;
  65         kstat_named_t zfetchstat_colinear_misses;
  66         kstat_named_t zfetchstat_stride_hits;
  67         kstat_named_t zfetchstat_stride_misses;
  68         kstat_named_t zfetchstat_reclaim_successes;
  69         kstat_named_t zfetchstat_reclaim_failures;
  70         kstat_named_t zfetchstat_stream_resets;
  71         kstat_named_t zfetchstat_stream_noresets;
  72         kstat_named_t zfetchstat_bogus_streams;
  73 } zfetch_stats_t;
  74 
  75 static zfetch_stats_t zfetch_stats = {
  76         { "hits",                       KSTAT_DATA_UINT64 },
  77         { "misses",                     KSTAT_DATA_UINT64 },
  78         { "colinear_hits",              KSTAT_DATA_UINT64 },
  79         { "colinear_misses",            KSTAT_DATA_UINT64 },
  80         { "stride_hits",                KSTAT_DATA_UINT64 },
  81         { "stride_misses",              KSTAT_DATA_UINT64 },
  82         { "reclaim_successes",          KSTAT_DATA_UINT64 },
  83         { "reclaim_failures",           KSTAT_DATA_UINT64 },
  84         { "streams_resets",             KSTAT_DATA_UINT64 },
  85         { "streams_noresets",           KSTAT_DATA_UINT64 },
  86         { "bogus_streams",              KSTAT_DATA_UINT64 },
  87 };
  88 
  89 #define ZFETCHSTAT_INCR(stat, val) \
  90         atomic_add_64(&zfetch_stats.stat.value.ui64, (val));
  91 
  92 #define ZFETCHSTAT_BUMP(stat)           ZFETCHSTAT_INCR(stat, 1);
  93 
  94 kstat_t         *zfetch_ksp;
  95 
  96 /*
  97  * Given a zfetch structure and a zstream structure, determine whether the
  98  * blocks to be read are part of a co-linear pair of existing prefetch
  99  * streams.  If a set is found, coalesce the streams, removing one, and
 100  * configure the prefetch so it looks for a strided access pattern.
 101  *
 102  * In other words: if we find two sequential access streams that are
 103  * the same length and distance N appart, and this read is N from the
 104  * last stream, then we are probably in a strided access pattern.  So
 105  * combine the two sequential streams into a single strided stream.
 106  *
 107  * If no co-linear streams are found, return NULL.
 108  */
 109 static int
 110 dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh)
 111 {
 112         zstream_t       *z_walk;
 113         zstream_t       *z_comp;
 114 
 115         if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
 116                 return (0);
 117 
 118         if (zh == NULL) {
 119                 rw_exit(&zf->zf_rwlock);
 120                 return (0);
 121         }
 122 
 123         for (z_walk = list_head(&zf->zf_stream); z_walk;
 124             z_walk = list_next(&zf->zf_stream, z_walk)) {
 125                 for (z_comp = list_next(&zf->zf_stream, z_walk); z_comp;
 126                     z_comp = list_next(&zf->zf_stream, z_comp)) {
 127                         int64_t         diff;
 128 
 129                         if (z_walk->zst_len != z_walk->zst_stride ||
 130                             z_comp->zst_len != z_comp->zst_stride) {
 131                                 continue;
 132                         }
 133 
 134                         diff = z_comp->zst_offset - z_walk->zst_offset;
 135                         if (z_comp->zst_offset + diff == zh->zst_offset) {
 136                                 z_walk->zst_offset = zh->zst_offset;
 137                                 z_walk->zst_direction = diff < 0 ? -1 : 1;
 138                                 z_walk->zst_stride =
 139                                     diff * z_walk->zst_direction;
 140                                 z_walk->zst_ph_offset =
 141                                     zh->zst_offset + z_walk->zst_stride;
 142                                 dmu_zfetch_stream_remove(zf, z_comp);
 143                                 mutex_destroy(&z_comp->zst_lock);
 144                                 kmem_free(z_comp, sizeof (zstream_t));
 145 
 146                                 dmu_zfetch_dofetch(zf, z_walk);
 147 
 148                                 rw_exit(&zf->zf_rwlock);
 149                                 return (1);
 150                         }
 151 
 152                         diff = z_walk->zst_offset - z_comp->zst_offset;
 153                         if (z_walk->zst_offset + diff == zh->zst_offset) {
 154                                 z_walk->zst_offset = zh->zst_offset;
 155                                 z_walk->zst_direction = diff < 0 ? -1 : 1;
 156                                 z_walk->zst_stride =
 157                                     diff * z_walk->zst_direction;
 158                                 z_walk->zst_ph_offset =
 159                                     zh->zst_offset + z_walk->zst_stride;
 160                                 dmu_zfetch_stream_remove(zf, z_comp);
 161                                 mutex_destroy(&z_comp->zst_lock);
 162                                 kmem_free(z_comp, sizeof (zstream_t));
 163 
 164                                 dmu_zfetch_dofetch(zf, z_walk);
 165 
 166                                 rw_exit(&zf->zf_rwlock);
 167                                 return (1);
 168                         }
 169                 }
 170         }
 171 
 172         rw_exit(&zf->zf_rwlock);
 173         return (0);
 174 }
 175 
 176 /*
 177  * Given a zstream_t, determine the bounds of the prefetch.  Then call the
 178  * routine that actually prefetches the individual blocks.
 179  */
 180 static void
 181 dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs)
 182 {
 183         uint64_t        prefetch_tail;
 184         uint64_t        prefetch_limit;
 185         uint64_t        prefetch_ofst;
 186         uint64_t        prefetch_len;
 187         uint64_t        blocks_fetched;
 188 
 189         zs->zst_stride = MAX((int64_t)zs->zst_stride, zs->zst_len);
 190         zs->zst_cap = MIN(zfetch_block_cap, 2 * zs->zst_cap);
 191 
 192         prefetch_tail = MAX((int64_t)zs->zst_ph_offset,
 193             (int64_t)(zs->zst_offset + zs->zst_stride));
 194         /*
 195          * XXX: use a faster division method?
 196          */
 197         prefetch_limit = zs->zst_offset + zs->zst_len +
 198             (zs->zst_cap * zs->zst_stride) / zs->zst_len;
 199 
 200         while (prefetch_tail < prefetch_limit) {
 201                 prefetch_ofst = zs->zst_offset + zs->zst_direction *
 202                     (prefetch_tail - zs->zst_offset);
 203 
 204                 prefetch_len = zs->zst_len;
 205 
 206                 /*
 207                  * Don't prefetch beyond the end of the file, if working
 208                  * backwards.
 209                  */
 210                 if ((zs->zst_direction == ZFETCH_BACKWARD) &&
 211                     (prefetch_ofst > prefetch_tail)) {
 212                         prefetch_len += prefetch_ofst;
 213                         prefetch_ofst = 0;
 214                 }
 215 
 216                 /* don't prefetch more than we're supposed to */
 217                 if (prefetch_len > zs->zst_len)
 218                         break;
 219 
 220                 blocks_fetched = dmu_zfetch_fetch(zf->zf_dnode,
 221                     prefetch_ofst, zs->zst_len);
 222 
 223                 prefetch_tail += zs->zst_stride;
 224                 /* stop if we've run out of stuff to prefetch */
 225                 if (blocks_fetched < zs->zst_len)
 226                         break;
 227         }
 228         zs->zst_ph_offset = prefetch_tail;
 229         zs->zst_last = ddi_get_lbolt();
 230 }
 231 
 232 void
 233 zfetch_init(void)
 234 {
 235 
 236         zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
 237             KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
 238             KSTAT_FLAG_VIRTUAL);
 239 
 240         if (zfetch_ksp != NULL) {
 241                 zfetch_ksp->ks_data = &zfetch_stats;
 242                 kstat_install(zfetch_ksp);
 243         }
 244 }
 245 
 246 void
 247 zfetch_fini(void)
 248 {
 249         if (zfetch_ksp != NULL) {
 250                 kstat_delete(zfetch_ksp);
 251                 zfetch_ksp = NULL;
 252         }
 253 }
 254 
 255 /*
 256  * This takes a pointer to a zfetch structure and a dnode.  It performs the
 257  * necessary setup for the zfetch structure, grokking data from the
 258  * associated dnode.
 259  */
 260 void
 261 dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
 262 {
 263         if (zf == NULL) {
 264                 return;
 265         }
 266 
 267         zf->zf_dnode = dno;
 268         zf->zf_stream_cnt = 0;
 269         zf->zf_alloc_fail = 0;
 270 
 271         list_create(&zf->zf_stream, sizeof (zstream_t),
 272             offsetof(zstream_t, zst_node));
 273 
 274         rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
 275 }
 276 
 277 /*
 278  * This function computes the actual size, in blocks, that can be prefetched,
 279  * and fetches it.
 280  */
 281 static uint64_t
 282 dmu_zfetch_fetch(dnode_t *dn, uint64_t blkid, uint64_t nblks)
 283 {
 284         uint64_t        fetchsz;
 285         uint64_t        i;
 286 
 287         fetchsz = dmu_zfetch_fetchsz(dn, blkid, nblks);
 288 
 289         for (i = 0; i < fetchsz; i++) {
 290                 dbuf_prefetch(dn, blkid + i);
 291         }
 292 
 293         return (fetchsz);
 294 }
 295 
 296 /*
 297  * this function returns the number of blocks that would be prefetched, based
 298  * upon the supplied dnode, blockid, and nblks.  This is used so that we can
 299  * update streams in place, and then prefetch with their old value after the
 300  * fact.  This way, we can delay the prefetch, but subsequent accesses to the
 301  * stream won't result in the same data being prefetched multiple times.
 302  */
 303 static uint64_t
 304 dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks)
 305 {
 306         uint64_t        fetchsz;
 307 
 308         if (blkid > dn->dn_maxblkid) {
 309                 return (0);
 310         }
 311 
 312         /* compute fetch size */
 313         if (blkid + nblks + 1 > dn->dn_maxblkid) {
 314                 fetchsz = (dn->dn_maxblkid - blkid) + 1;
 315                 ASSERT(blkid + fetchsz - 1 <= dn->dn_maxblkid);
 316         } else {
 317                 fetchsz = nblks;
 318         }
 319 
 320 
 321         return (fetchsz);
 322 }
 323 
 324 /*
 325  * given a zfetch and a zstream structure, see if there is an associated zstream
 326  * for this block read.  If so, it starts a prefetch for the stream it
 327  * located and returns true, otherwise it returns false
 328  */
 329 static int
 330 dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched)
 331 {
 332         zstream_t       *zs;
 333         int64_t         diff;
 334         int             reset = !prefetched;
 335         int             rc = 0;
 336 
 337         if (zh == NULL)
 338                 return (0);
 339 
 340         /*
 341          * XXX: This locking strategy is a bit coarse; however, it's impact has
 342          * yet to be tested.  If this turns out to be an issue, it can be
 343          * modified in a number of different ways.
 344          */
 345 
 346         rw_enter(&zf->zf_rwlock, RW_READER);
 347 top:
 348 
 349         for (zs = list_head(&zf->zf_stream); zs;
 350             zs = list_next(&zf->zf_stream, zs)) {
 351 
 352                 /*
 353                  * XXX - should this be an assert?
 354                  */
 355                 if (zs->zst_len == 0) {
 356                         /* bogus stream */
 357                         ZFETCHSTAT_BUMP(zfetchstat_bogus_streams);
 358                         continue;
 359                 }
 360 
 361                 /*
 362                  * We hit this case when we are in a strided prefetch stream:
 363                  * we will read "len" blocks before "striding".
 364                  */
 365                 if (zh->zst_offset >= zs->zst_offset &&
 366                     zh->zst_offset < zs->zst_offset + zs->zst_len) {
 367                         if (prefetched) {
 368                                 /* already fetched */
 369                                 ZFETCHSTAT_BUMP(zfetchstat_stride_hits);
 370                                 rc = 1;
 371                                 goto out;
 372                         } else {
 373                                 ZFETCHSTAT_BUMP(zfetchstat_stride_misses);
 374                         }
 375                 }
 376 
 377                 /*
 378                  * This is the forward sequential read case: we increment
 379                  * len by one each time we hit here, so we will enter this
 380                  * case on every read.
 381                  */
 382                 if (zh->zst_offset == zs->zst_offset + zs->zst_len) {
 383 
 384                         reset = !prefetched && zs->zst_len > 1;
 385 
 386                         mutex_enter(&zs->zst_lock);
 387 
 388                         if (zh->zst_offset != zs->zst_offset + zs->zst_len) {
 389                                 mutex_exit(&zs->zst_lock);
 390                                 goto top;
 391                         }
 392                         zs->zst_len += zh->zst_len;
 393                         diff = zs->zst_len - zfetch_block_cap;
 394                         if (diff > 0) {
 395                                 zs->zst_offset += diff;
 396                                 zs->zst_len = zs->zst_len > diff ?
 397                                     zs->zst_len - diff : 0;
 398                         }
 399                         zs->zst_direction = ZFETCH_FORWARD;
 400 
 401                         break;
 402 
 403                 /*
 404                  * Same as above, but reading backwards through the file.
 405                  */
 406                 } else if (zh->zst_offset == zs->zst_offset - zh->zst_len) {
 407                         /* backwards sequential access */
 408 
 409                         reset = !prefetched && zs->zst_len > 1;
 410 
 411                         mutex_enter(&zs->zst_lock);
 412 
 413                         if (zh->zst_offset != zs->zst_offset - zh->zst_len) {
 414                                 mutex_exit(&zs->zst_lock);
 415                                 goto top;
 416                         }
 417 
 418                         zs->zst_offset = zs->zst_offset > zh->zst_len ?
 419                             zs->zst_offset - zh->zst_len : 0;
 420                         zs->zst_ph_offset = zs->zst_ph_offset > zh->zst_len ?
 421                             zs->zst_ph_offset - zh->zst_len : 0;
 422                         zs->zst_len += zh->zst_len;
 423 
 424                         diff = zs->zst_len - zfetch_block_cap;
 425                         if (diff > 0) {
 426                                 zs->zst_ph_offset = zs->zst_ph_offset > diff ?
 427                                     zs->zst_ph_offset - diff : 0;
 428                                 zs->zst_len = zs->zst_len > diff ?
 429                                     zs->zst_len - diff : zs->zst_len;
 430                         }
 431                         zs->zst_direction = ZFETCH_BACKWARD;
 432 
 433                         break;
 434 
 435                 } else if ((zh->zst_offset - zs->zst_offset - zs->zst_stride <
 436                     zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
 437                         /* strided forward access */
 438 
 439                         mutex_enter(&zs->zst_lock);
 440 
 441                         if ((zh->zst_offset - zs->zst_offset - zs->zst_stride >=
 442                             zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
 443                                 mutex_exit(&zs->zst_lock);
 444                                 goto top;
 445                         }
 446 
 447                         zs->zst_offset += zs->zst_stride;
 448                         zs->zst_direction = ZFETCH_FORWARD;
 449 
 450                         break;
 451 
 452                 } else if ((zh->zst_offset - zs->zst_offset + zs->zst_stride <
 453                     zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
 454                         /* strided reverse access */
 455 
 456                         mutex_enter(&zs->zst_lock);
 457 
 458                         if ((zh->zst_offset - zs->zst_offset + zs->zst_stride >=
 459                             zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
 460                                 mutex_exit(&zs->zst_lock);
 461                                 goto top;
 462                         }
 463 
 464                         zs->zst_offset = zs->zst_offset > zs->zst_stride ?
 465                             zs->zst_offset - zs->zst_stride : 0;
 466                         zs->zst_ph_offset = (zs->zst_ph_offset >
 467                             (2 * zs->zst_stride)) ?
 468                             (zs->zst_ph_offset - (2 * zs->zst_stride)) : 0;
 469                         zs->zst_direction = ZFETCH_BACKWARD;
 470 
 471                         break;
 472                 }
 473         }
 474 
 475         if (zs) {
 476                 if (reset) {
 477                         zstream_t *remove = zs;
 478 
 479                         ZFETCHSTAT_BUMP(zfetchstat_stream_resets);
 480                         rc = 0;
 481                         mutex_exit(&zs->zst_lock);
 482                         rw_exit(&zf->zf_rwlock);
 483                         rw_enter(&zf->zf_rwlock, RW_WRITER);
 484                         /*
 485                          * Relocate the stream, in case someone removes
 486                          * it while we were acquiring the WRITER lock.
 487                          */
 488                         for (zs = list_head(&zf->zf_stream); zs;
 489                             zs = list_next(&zf->zf_stream, zs)) {
 490                                 if (zs == remove) {
 491                                         dmu_zfetch_stream_remove(zf, zs);
 492                                         mutex_destroy(&zs->zst_lock);
 493                                         kmem_free(zs, sizeof (zstream_t));
 494                                         break;
 495                                 }
 496                         }
 497                 } else {
 498                         ZFETCHSTAT_BUMP(zfetchstat_stream_noresets);
 499                         rc = 1;
 500                         dmu_zfetch_dofetch(zf, zs);
 501                         mutex_exit(&zs->zst_lock);
 502                 }
 503         }
 504 out:
 505         rw_exit(&zf->zf_rwlock);
 506         return (rc);
 507 }
 508 
 509 /*
 510  * Clean-up state associated with a zfetch structure.  This frees allocated
 511  * structure members, empties the zf_stream tree, and generally makes things
 512  * nice.  This doesn't free the zfetch_t itself, that's left to the caller.
 513  */
 514 void
 515 dmu_zfetch_rele(zfetch_t *zf)
 516 {
 517         zstream_t       *zs;
 518         zstream_t       *zs_next;
 519 
 520         ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
 521 
 522         for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
 523                 zs_next = list_next(&zf->zf_stream, zs);
 524 
 525                 list_remove(&zf->zf_stream, zs);
 526                 mutex_destroy(&zs->zst_lock);
 527                 kmem_free(zs, sizeof (zstream_t));
 528         }
 529         list_destroy(&zf->zf_stream);
 530         rw_destroy(&zf->zf_rwlock);
 531 
 532         zf->zf_dnode = NULL;
 533 }
 534 
 535 /*
 536  * Given a zfetch and zstream structure, insert the zstream structure into the
 537  * AVL tree contained within the zfetch structure.  Peform the appropriate
 538  * book-keeping.  It is possible that another thread has inserted a stream which
 539  * matches one that we are about to insert, so we must be sure to check for this
 540  * case.  If one is found, return failure, and let the caller cleanup the
 541  * duplicates.
 542  */
 543 static int
 544 dmu_zfetch_stream_insert(zfetch_t *zf, zstream_t *zs)
 545 {
 546         zstream_t       *zs_walk;
 547         zstream_t       *zs_next;
 548 
 549         ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
 550 
 551         for (zs_walk = list_head(&zf->zf_stream); zs_walk; zs_walk = zs_next) {
 552                 zs_next = list_next(&zf->zf_stream, zs_walk);
 553 
 554                 if (dmu_zfetch_streams_equal(zs_walk, zs)) {
 555                         return (0);
 556                 }
 557         }
 558 
 559         list_insert_head(&zf->zf_stream, zs);
 560         zf->zf_stream_cnt++;
 561         return (1);
 562 }
 563 
 564 
 565 /*
 566  * Walk the list of zstreams in the given zfetch, find an old one (by time), and
 567  * reclaim it for use by the caller.
 568  */
 569 static zstream_t *
 570 dmu_zfetch_stream_reclaim(zfetch_t *zf)
 571 {
 572         zstream_t       *zs;
 573 
 574         if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
 575                 return (0);
 576 
 577         for (zs = list_head(&zf->zf_stream); zs;
 578             zs = list_next(&zf->zf_stream, zs)) {
 579 
 580                 if (((ddi_get_lbolt() - zs->zst_last)/hz) > zfetch_min_sec_reap)
 581                         break;
 582         }
 583 
 584         if (zs) {
 585                 dmu_zfetch_stream_remove(zf, zs);
 586                 mutex_destroy(&zs->zst_lock);
 587                 bzero(zs, sizeof (zstream_t));
 588         } else {
 589                 zf->zf_alloc_fail++;
 590         }
 591         rw_exit(&zf->zf_rwlock);
 592 
 593         return (zs);
 594 }
 595 
 596 /*
 597  * Given a zfetch and zstream structure, remove the zstream structure from its
 598  * container in the zfetch structure.  Perform the appropriate book-keeping.
 599  */
 600 static void
 601 dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
 602 {
 603         ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
 604 
 605         list_remove(&zf->zf_stream, zs);
 606         zf->zf_stream_cnt--;
 607 }
 608 
 609 static int
 610 dmu_zfetch_streams_equal(zstream_t *zs1, zstream_t *zs2)
 611 {
 612         if (zs1->zst_offset != zs2->zst_offset)
 613                 return (0);
 614 
 615         if (zs1->zst_len != zs2->zst_len)
 616                 return (0);
 617 
 618         if (zs1->zst_stride != zs2->zst_stride)
 619                 return (0);
 620 
 621         if (zs1->zst_ph_offset != zs2->zst_ph_offset)
 622                 return (0);
 623 
 624         if (zs1->zst_cap != zs2->zst_cap)
 625                 return (0);
 626 
 627         if (zs1->zst_direction != zs2->zst_direction)
 628                 return (0);
 629 
 630         return (1);
 631 }
 632 
 633 /*
 634  * This is the prefetch entry point.  It calls all of the other dmu_zfetch
 635  * routines to create, delete, find, or operate upon prefetch streams.
 636  */
 637 void
 638 dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
 639 {
 640         zstream_t       zst;
 641         zstream_t       *newstream;
 642         int             fetched;
 643         int             inserted;
 644         unsigned int    blkshft;
 645         uint64_t        blksz;
 646 
 647         if (zfs_prefetch_disable)
 648                 return;
 649 
 650         /* files that aren't ln2 blocksz are only one block -- nothing to do */
 651         if (!zf->zf_dnode->dn_datablkshift)
 652                 return;
 653 
 654         /* convert offset and size, into blockid and nblocks */
 655         blkshft = zf->zf_dnode->dn_datablkshift;
 656         blksz = (1 << blkshft);
 657 
 658         bzero(&zst, sizeof (zstream_t));
 659         zst.zst_offset = offset >> blkshft;
 660         zst.zst_len = (P2ROUNDUP(offset + size, blksz) -
 661             P2ALIGN(offset, blksz)) >> blkshft;
 662 
 663         fetched = dmu_zfetch_find(zf, &zst, prefetched);
 664         if (fetched) {
 665                 ZFETCHSTAT_BUMP(zfetchstat_hits);
 666         } else {
 667                 ZFETCHSTAT_BUMP(zfetchstat_misses);
 668                 if (fetched = dmu_zfetch_colinear(zf, &zst)) {
 669                         ZFETCHSTAT_BUMP(zfetchstat_colinear_hits);
 670                 } else {
 671                         ZFETCHSTAT_BUMP(zfetchstat_colinear_misses);
 672                 }
 673         }
 674 
 675         if (!fetched) {
 676                 newstream = dmu_zfetch_stream_reclaim(zf);
 677 
 678                 /*
 679                  * we still couldn't find a stream, drop the lock, and allocate
 680                  * one if possible.  Otherwise, give up and go home.
 681                  */
 682                 if (newstream) {
 683                         ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes);
 684                 } else {
 685                         uint64_t        maxblocks;
 686                         uint32_t        max_streams;
 687                         uint32_t        cur_streams;
 688 
 689                         ZFETCHSTAT_BUMP(zfetchstat_reclaim_failures);
 690                         cur_streams = zf->zf_stream_cnt;
 691                         maxblocks = zf->zf_dnode->dn_maxblkid;
 692 
 693                         max_streams = MIN(zfetch_max_streams,
 694                             (maxblocks / zfetch_block_cap));
 695                         if (max_streams == 0) {
 696                                 max_streams++;
 697                         }
 698 
 699                         if (cur_streams >= max_streams) {
 700                                 return;
 701                         }
 702                         newstream = kmem_zalloc(sizeof (zstream_t), KM_SLEEP);
 703                 }
 704 
 705                 newstream->zst_offset = zst.zst_offset;
 706                 newstream->zst_len = zst.zst_len;
 707                 newstream->zst_stride = zst.zst_len;
 708                 newstream->zst_ph_offset = zst.zst_len + zst.zst_offset;
 709                 newstream->zst_cap = zst.zst_len;
 710                 newstream->zst_direction = ZFETCH_FORWARD;
 711                 newstream->zst_last = ddi_get_lbolt();
 712 
 713                 mutex_init(&newstream->zst_lock, NULL, MUTEX_DEFAULT, NULL);
 714 
 715                 rw_enter(&zf->zf_rwlock, RW_WRITER);
 716                 inserted = dmu_zfetch_stream_insert(zf, newstream);
 717                 rw_exit(&zf->zf_rwlock);
 718 
 719                 if (!inserted) {
 720                         mutex_destroy(&newstream->zst_lock);
 721                         kmem_free(newstream, sizeof (zstream_t));
 722                 }
 723         }
 724 }