Print this page
3741 zfs needs better comments
Submitted by:   Will Andrews <willa@spectralogic.com>
Submitted by:   Justin Gibbs <justing@spectralogic.com>
Submitted by:   Alan Somers <alans@spectralogic.com>
Reviewed by:    Matthew Ahrens <mahrens@delphix.com>


  31 #include <sys/dbuf.h>
  32 #include <sys/kstat.h>
  33 
  34 /*
  35  * I'm against tune-ables, but these should probably exist as tweakable globals
  36  * until we can get this working the way we want it to.
  37  */
  38 
  39 int zfs_prefetch_disable = 0;
  40 
  41 /* max # of streams per zfetch */
  42 uint32_t        zfetch_max_streams = 8;
  43 /* min time before stream reclaim */
  44 uint32_t        zfetch_min_sec_reap = 2;
  45 /* max number of blocks to fetch at a time */
  46 uint32_t        zfetch_block_cap = 256;
  47 /* number of bytes in a array_read at which we stop prefetching (1Mb) */
  48 uint64_t        zfetch_array_rd_sz = 1024 * 1024;
  49 
  50 /* forward decls for static routines */
  51 static int              dmu_zfetch_colinear(zfetch_t *, zstream_t *);
  52 static void             dmu_zfetch_dofetch(zfetch_t *, zstream_t *);
  53 static uint64_t         dmu_zfetch_fetch(dnode_t *, uint64_t, uint64_t);
  54 static uint64_t         dmu_zfetch_fetchsz(dnode_t *, uint64_t, uint64_t);
  55 static int              dmu_zfetch_find(zfetch_t *, zstream_t *, int);
  56 static int              dmu_zfetch_stream_insert(zfetch_t *, zstream_t *);
  57 static zstream_t        *dmu_zfetch_stream_reclaim(zfetch_t *);
  58 static void             dmu_zfetch_stream_remove(zfetch_t *, zstream_t *);
  59 static int              dmu_zfetch_streams_equal(zstream_t *, zstream_t *);
  60 
  61 typedef struct zfetch_stats {
  62         kstat_named_t zfetchstat_hits;
  63         kstat_named_t zfetchstat_misses;
  64         kstat_named_t zfetchstat_colinear_hits;
  65         kstat_named_t zfetchstat_colinear_misses;
  66         kstat_named_t zfetchstat_stride_hits;
  67         kstat_named_t zfetchstat_stride_misses;
  68         kstat_named_t zfetchstat_reclaim_successes;
  69         kstat_named_t zfetchstat_reclaim_failures;
  70         kstat_named_t zfetchstat_stream_resets;
  71         kstat_named_t zfetchstat_stream_noresets;
  72         kstat_named_t zfetchstat_bogus_streams;
  73 } zfetch_stats_t;
  74 
  75 static zfetch_stats_t zfetch_stats = {


  87 };
  88 
  89 #define ZFETCHSTAT_INCR(stat, val) \
  90         atomic_add_64(&zfetch_stats.stat.value.ui64, (val));
  91 
  92 #define ZFETCHSTAT_BUMP(stat)           ZFETCHSTAT_INCR(stat, 1);
  93 
  94 kstat_t         *zfetch_ksp;
  95 
  96 /*
  97  * Given a zfetch structure and a zstream structure, determine whether the
  98  * blocks to be read are part of a co-linear pair of existing prefetch
  99  * streams.  If a set is found, coalesce the streams, removing one, and
 100  * configure the prefetch so it looks for a strided access pattern.
 101  *
 102  * In other words: if we find two sequential access streams that are
 103  * the same length and distance N appart, and this read is N from the
 104  * last stream, then we are probably in a strided access pattern.  So
 105  * combine the two sequential streams into a single strided stream.
 106  *
 107  * If no co-linear streams are found, return NULL.
 108  */
 109 static int
 110 dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh)
 111 {
 112         zstream_t       *z_walk;
 113         zstream_t       *z_comp;
 114 
 115         if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
 116                 return (0);
 117 
 118         if (zh == NULL) {
 119                 rw_exit(&zf->zf_rwlock);
 120                 return (0);
 121         }
 122 
 123         for (z_walk = list_head(&zf->zf_stream); z_walk;
 124             z_walk = list_next(&zf->zf_stream, z_walk)) {
 125                 for (z_comp = list_next(&zf->zf_stream, z_walk); z_comp;
 126                     z_comp = list_next(&zf->zf_stream, z_comp)) {
 127                         int64_t         diff;
 128 
 129                         if (z_walk->zst_len != z_walk->zst_stride ||


 309                 return (0);
 310         }
 311 
 312         /* compute fetch size */
 313         if (blkid + nblks + 1 > dn->dn_maxblkid) {
 314                 fetchsz = (dn->dn_maxblkid - blkid) + 1;
 315                 ASSERT(blkid + fetchsz - 1 <= dn->dn_maxblkid);
 316         } else {
 317                 fetchsz = nblks;
 318         }
 319 
 320 
 321         return (fetchsz);
 322 }
 323 
 324 /*
 325  * given a zfetch and a zstream structure, see if there is an associated zstream
 326  * for this block read.  If so, it starts a prefetch for the stream it
 327  * located and returns true, otherwise it returns false
 328  */
 329 static int
 330 dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched)
 331 {
 332         zstream_t       *zs;
 333         int64_t         diff;
 334         int             reset = !prefetched;
 335         int             rc = 0;
 336 
 337         if (zh == NULL)
 338                 return (0);
 339 
 340         /*
 341          * XXX: This locking strategy is a bit coarse; however, it's impact has
 342          * yet to be tested.  If this turns out to be an issue, it can be
 343          * modified in a number of different ways.
 344          */
 345 
 346         rw_enter(&zf->zf_rwlock, RW_READER);
 347 top:
 348 
 349         for (zs = list_head(&zf->zf_stream); zs;


 622                 return (0);
 623 
 624         if (zs1->zst_cap != zs2->zst_cap)
 625                 return (0);
 626 
 627         if (zs1->zst_direction != zs2->zst_direction)
 628                 return (0);
 629 
 630         return (1);
 631 }
 632 
 633 /*
 634  * This is the prefetch entry point.  It calls all of the other dmu_zfetch
 635  * routines to create, delete, find, or operate upon prefetch streams.
 636  */
 637 void
 638 dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
 639 {
 640         zstream_t       zst;
 641         zstream_t       *newstream;
 642         int             fetched;
 643         int             inserted;
 644         unsigned int    blkshft;
 645         uint64_t        blksz;
 646 
 647         if (zfs_prefetch_disable)
 648                 return;
 649 
 650         /* files that aren't ln2 blocksz are only one block -- nothing to do */
 651         if (!zf->zf_dnode->dn_datablkshift)
 652                 return;
 653 
 654         /* convert offset and size, into blockid and nblocks */
 655         blkshft = zf->zf_dnode->dn_datablkshift;
 656         blksz = (1 << blkshft);
 657 
 658         bzero(&zst, sizeof (zstream_t));
 659         zst.zst_offset = offset >> blkshft;
 660         zst.zst_len = (P2ROUNDUP(offset + size, blksz) -
 661             P2ALIGN(offset, blksz)) >> blkshft;
 662 
 663         fetched = dmu_zfetch_find(zf, &zst, prefetched);
 664         if (fetched) {
 665                 ZFETCHSTAT_BUMP(zfetchstat_hits);
 666         } else {
 667                 ZFETCHSTAT_BUMP(zfetchstat_misses);
 668                 if (fetched = dmu_zfetch_colinear(zf, &zst)) {

 669                         ZFETCHSTAT_BUMP(zfetchstat_colinear_hits);
 670                 } else {
 671                         ZFETCHSTAT_BUMP(zfetchstat_colinear_misses);
 672                 }
 673         }
 674 
 675         if (!fetched) {
 676                 newstream = dmu_zfetch_stream_reclaim(zf);
 677 
 678                 /*
 679                  * we still couldn't find a stream, drop the lock, and allocate
 680                  * one if possible.  Otherwise, give up and go home.
 681                  */
 682                 if (newstream) {
 683                         ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes);
 684                 } else {
 685                         uint64_t        maxblocks;
 686                         uint32_t        max_streams;
 687                         uint32_t        cur_streams;
 688 




  31 #include <sys/dbuf.h>
  32 #include <sys/kstat.h>
  33 
  34 /*
  35  * I'm against tune-ables, but these should probably exist as tweakable globals
  36  * until we can get this working the way we want it to.
  37  */
  38 
  39 int zfs_prefetch_disable = 0;
  40 
  41 /* max # of streams per zfetch */
  42 uint32_t        zfetch_max_streams = 8;
  43 /* min time before stream reclaim */
  44 uint32_t        zfetch_min_sec_reap = 2;
  45 /* max number of blocks to fetch at a time */
  46 uint32_t        zfetch_block_cap = 256;
  47 /* number of bytes in a array_read at which we stop prefetching (1Mb) */
  48 uint64_t        zfetch_array_rd_sz = 1024 * 1024;
  49 
  50 /* forward decls for static routines */
  51 static boolean_t        dmu_zfetch_colinear(zfetch_t *, zstream_t *);
  52 static void             dmu_zfetch_dofetch(zfetch_t *, zstream_t *);
  53 static uint64_t         dmu_zfetch_fetch(dnode_t *, uint64_t, uint64_t);
  54 static uint64_t         dmu_zfetch_fetchsz(dnode_t *, uint64_t, uint64_t);
  55 static boolean_t        dmu_zfetch_find(zfetch_t *, zstream_t *, int);
  56 static int              dmu_zfetch_stream_insert(zfetch_t *, zstream_t *);
  57 static zstream_t        *dmu_zfetch_stream_reclaim(zfetch_t *);
  58 static void             dmu_zfetch_stream_remove(zfetch_t *, zstream_t *);
  59 static int              dmu_zfetch_streams_equal(zstream_t *, zstream_t *);
  60 
  61 typedef struct zfetch_stats {
  62         kstat_named_t zfetchstat_hits;
  63         kstat_named_t zfetchstat_misses;
  64         kstat_named_t zfetchstat_colinear_hits;
  65         kstat_named_t zfetchstat_colinear_misses;
  66         kstat_named_t zfetchstat_stride_hits;
  67         kstat_named_t zfetchstat_stride_misses;
  68         kstat_named_t zfetchstat_reclaim_successes;
  69         kstat_named_t zfetchstat_reclaim_failures;
  70         kstat_named_t zfetchstat_stream_resets;
  71         kstat_named_t zfetchstat_stream_noresets;
  72         kstat_named_t zfetchstat_bogus_streams;
  73 } zfetch_stats_t;
  74 
  75 static zfetch_stats_t zfetch_stats = {


  87 };
  88 
  89 #define ZFETCHSTAT_INCR(stat, val) \
  90         atomic_add_64(&zfetch_stats.stat.value.ui64, (val));
  91 
  92 #define ZFETCHSTAT_BUMP(stat)           ZFETCHSTAT_INCR(stat, 1);
  93 
  94 kstat_t         *zfetch_ksp;
  95 
  96 /*
  97  * Given a zfetch structure and a zstream structure, determine whether the
  98  * blocks to be read are part of a co-linear pair of existing prefetch
  99  * streams.  If a set is found, coalesce the streams, removing one, and
 100  * configure the prefetch so it looks for a strided access pattern.
 101  *
 102  * In other words: if we find two sequential access streams that are
 103  * the same length and distance N appart, and this read is N from the
 104  * last stream, then we are probably in a strided access pattern.  So
 105  * combine the two sequential streams into a single strided stream.
 106  *
 107  * Returns whether co-linear streams were found.
 108  */
 109 static boolean_t
 110 dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh)
 111 {
 112         zstream_t       *z_walk;
 113         zstream_t       *z_comp;
 114 
 115         if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
 116                 return (0);
 117 
 118         if (zh == NULL) {
 119                 rw_exit(&zf->zf_rwlock);
 120                 return (0);
 121         }
 122 
 123         for (z_walk = list_head(&zf->zf_stream); z_walk;
 124             z_walk = list_next(&zf->zf_stream, z_walk)) {
 125                 for (z_comp = list_next(&zf->zf_stream, z_walk); z_comp;
 126                     z_comp = list_next(&zf->zf_stream, z_comp)) {
 127                         int64_t         diff;
 128 
 129                         if (z_walk->zst_len != z_walk->zst_stride ||


 309                 return (0);
 310         }
 311 
 312         /* compute fetch size */
 313         if (blkid + nblks + 1 > dn->dn_maxblkid) {
 314                 fetchsz = (dn->dn_maxblkid - blkid) + 1;
 315                 ASSERT(blkid + fetchsz - 1 <= dn->dn_maxblkid);
 316         } else {
 317                 fetchsz = nblks;
 318         }
 319 
 320 
 321         return (fetchsz);
 322 }
 323 
 324 /*
 325  * given a zfetch and a zstream structure, see if there is an associated zstream
 326  * for this block read.  If so, it starts a prefetch for the stream it
 327  * located and returns true, otherwise it returns false
 328  */
 329 static boolean_t
 330 dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched)
 331 {
 332         zstream_t       *zs;
 333         int64_t         diff;
 334         int             reset = !prefetched;
 335         int             rc = 0;
 336 
 337         if (zh == NULL)
 338                 return (0);
 339 
 340         /*
 341          * XXX: This locking strategy is a bit coarse; however, it's impact has
 342          * yet to be tested.  If this turns out to be an issue, it can be
 343          * modified in a number of different ways.
 344          */
 345 
 346         rw_enter(&zf->zf_rwlock, RW_READER);
 347 top:
 348 
 349         for (zs = list_head(&zf->zf_stream); zs;


 622                 return (0);
 623 
 624         if (zs1->zst_cap != zs2->zst_cap)
 625                 return (0);
 626 
 627         if (zs1->zst_direction != zs2->zst_direction)
 628                 return (0);
 629 
 630         return (1);
 631 }
 632 
 633 /*
 634  * This is the prefetch entry point.  It calls all of the other dmu_zfetch
 635  * routines to create, delete, find, or operate upon prefetch streams.
 636  */
 637 void
 638 dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
 639 {
 640         zstream_t       zst;
 641         zstream_t       *newstream;
 642         boolean_t       fetched;
 643         int             inserted;
 644         unsigned int    blkshft;
 645         uint64_t        blksz;
 646 
 647         if (zfs_prefetch_disable)
 648                 return;
 649 
 650         /* files that aren't ln2 blocksz are only one block -- nothing to do */
 651         if (!zf->zf_dnode->dn_datablkshift)
 652                 return;
 653 
 654         /* convert offset and size, into blockid and nblocks */
 655         blkshft = zf->zf_dnode->dn_datablkshift;
 656         blksz = (1 << blkshft);
 657 
 658         bzero(&zst, sizeof (zstream_t));
 659         zst.zst_offset = offset >> blkshft;
 660         zst.zst_len = (P2ROUNDUP(offset + size, blksz) -
 661             P2ALIGN(offset, blksz)) >> blkshft;
 662 
 663         fetched = dmu_zfetch_find(zf, &zst, prefetched);
 664         if (fetched) {
 665                 ZFETCHSTAT_BUMP(zfetchstat_hits);
 666         } else {
 667                 ZFETCHSTAT_BUMP(zfetchstat_misses);
 668                 fetched = dmu_zfetch_colinear(zf, &zst);
 669                 if (fetched) {
 670                         ZFETCHSTAT_BUMP(zfetchstat_colinear_hits);
 671                 } else {
 672                         ZFETCHSTAT_BUMP(zfetchstat_colinear_misses);
 673                 }
 674         }
 675 
 676         if (!fetched) {
 677                 newstream = dmu_zfetch_stream_reclaim(zf);
 678 
 679                 /*
 680                  * we still couldn't find a stream, drop the lock, and allocate
 681                  * one if possible.  Otherwise, give up and go home.
 682                  */
 683                 if (newstream) {
 684                         ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes);
 685                 } else {
 686                         uint64_t        maxblocks;
 687                         uint32_t        max_streams;
 688                         uint32_t        cur_streams;
 689