55 * Pages are evicted when the cache is full and there is a cache
56 * miss. Our model has a variable sized cache. It grows with
57 * high use, but also tries to react to memory pressure from the
58 * operating system: decreasing its size when system memory is
59 * tight.
60 *
61 * 3. The Megiddo and Modha model assumes a fixed page size. All
62 * elements of the cache are therefore exactly the same size. So
63 * when adjusting the cache size following a cache miss, its simply
64 * a matter of choosing a single page to evict. In our model, we
65 * have variable sized cache blocks (rangeing from 512 bytes to
66 * 128K bytes). We therefore choose a set of blocks to evict to make
67 * space for a cache miss that approximates as closely as possible
68 * the space used by the new block.
69 *
70 * See also: "ARC: A Self-Tuning, Low Overhead Replacement Cache"
71 * by N. Megiddo & D. Modha, FAST 2003
72 */
73
74 /*
75 * The locking model:
76 *
77 * A new reference to a cache buffer can be obtained in two
78 * ways: 1) via a hash table lookup using the DVA as a key,
79 * or 2) via one of the ARC lists. The arc_read() interface
80 * uses method 1, while the internal arc algorithms for
81 * adjusting the cache use method 2. We therefore provide two
82 * types of locks: 1) the hash table lock array, and 2) the
83 * arc list locks.
84 *
85 * Buffers do not have their own mutexes, rather they rely on the
86 * hash table mutexes for the bulk of their protection (i.e. most
87 * fields in the arc_buf_hdr_t are protected by these mutexes).
88 *
89 * buf_hash_find() returns the appropriate mutex (held) when it
90 * locates the requested buffer in the hash table. It returns
91 * NULL for the mutex if the buffer was not in the table.
92 *
93 * buf_hash_remove() expects the appropriate hash mutex to be
94 * already held before it is invoked.
95 *
96 * Each arc state also has a mutex which is used to protect the
97 * buffer list associated with the state. When attempting to
98 * obtain a hash table lock while holding an arc list lock you
99 * must use: mutex_tryenter() to avoid deadlock. Also note that
100 * the active state mutex must be held before the ghost state mutex.
101 *
102 * Arc buffers may have an associated eviction callback function.
103 * This function will be invoked prior to removing the buffer (e.g.
104 * in arc_do_user_evicts()). Note however that the data associated
105 * with the buffer may be evicted prior to the callback. The callback
106 * must be made with *no locks held* (to prevent deadlock). Additionally,
107 * the users of callbacks must ensure that their private data is
185
186 static int arc_dead;
187
188 /*
189 * The arc has filled available memory and has now warmed up.
190 */
191 static boolean_t arc_warm;
192
193 /*
194 * These tunables are for performance analysis.
195 */
196 uint64_t zfs_arc_max;
197 uint64_t zfs_arc_min;
198 uint64_t zfs_arc_meta_limit = 0;
199 int zfs_arc_grow_retry = 0;
200 int zfs_arc_shrink_shift = 0;
201 int zfs_arc_p_min_shift = 0;
202 int zfs_disable_dup_eviction = 0;
203
204 /*
205 * Note that buffers can be in one of 6 states:
206 * ARC_anon - anonymous (discussed below)
207 * ARC_mru - recently used, currently cached
208 * ARC_mru_ghost - recentely used, no longer in cache
209 * ARC_mfu - frequently used, currently cached
210 * ARC_mfu_ghost - frequently used, no longer in cache
211 * ARC_l2c_only - exists in L2ARC but not other states
212 * When there are no active references to the buffer, they are
213 * are linked onto a list in one of these arc states. These are
214 * the only buffers that can be evicted or deleted. Within each
215 * state there are multiple lists, one for meta-data and one for
216 * non-meta-data. Meta-data (indirect blocks, blocks of dnodes,
217 * etc.) is tracked separately so that it can be managed more
218 * explicitly: favored over data, limited explicitly.
219 *
220 * Anonymous buffers are buffers that are not associated with
221 * a DVA. These are buffers that hold dirty block copies
222 * before they are written to stable storage. By definition,
223 * they are "ref'd" and are considered part of arc_mru
224 * that cannot be freed. Generally, they will aquire a DVA
571 /*
572 * Other sizes
573 */
574
575 #define HDR_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
576 #define L2HDR_SIZE ((int64_t)sizeof (l2arc_buf_hdr_t))
577
578 /*
579 * Hash table routines
580 */
581
582 #define HT_LOCK_PAD 64
583
584 struct ht_lock {
585 kmutex_t ht_lock;
586 #ifdef _KERNEL
587 unsigned char pad[(HT_LOCK_PAD - sizeof (kmutex_t))];
588 #endif
589 };
590
591 #define BUF_LOCKS 256
592 typedef struct buf_hash_table {
593 uint64_t ht_mask;
594 arc_buf_hdr_t **ht_table;
595 struct ht_lock ht_locks[BUF_LOCKS];
596 } buf_hash_table_t;
597
598 static buf_hash_table_t buf_hash_table;
599
600 #define BUF_HASH_INDEX(spa, dva, birth) \
601 (buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)
602 #define BUF_HASH_LOCK_NTRY(idx) (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
603 #define BUF_HASH_LOCK(idx) (&(BUF_HASH_LOCK_NTRY(idx).ht_lock))
604 #define HDR_LOCK(hdr) \
605 (BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth)))
606
607 uint64_t zfs_crc64_table[256];
608
609 /*
610 * Level 2 ARC
611 */
612
613 #define L2ARC_WRITE_SIZE (8 * 1024 * 1024) /* initial write max */
614 #define L2ARC_HEADROOM 2 /* num of writes */
615 /*
616 * If we discover during ARC scan any buffers to be compressed, we boost
617 * our headroom for the next scanning cycle by this percentage multiple.
618 */
619 #define L2ARC_HEADROOM_BOOST 200
620 #define L2ARC_FEED_SECS 1 /* caching interval secs */
621 #define L2ARC_FEED_MIN_MS 200 /* min caching interval ms */
622
690 /* protected by l2arc_free_on_write_mtx */
691 void *l2df_data;
692 size_t l2df_size;
693 void (*l2df_func)(void *, size_t);
694 list_node_t l2df_list_node;
695 } l2arc_data_free_t;
696
697 static kmutex_t l2arc_feed_thr_lock;
698 static kcondvar_t l2arc_feed_thr_cv;
699 static uint8_t l2arc_thread_exit;
700
701 static void l2arc_read_done(zio_t *zio);
702 static void l2arc_hdr_stat_add(void);
703 static void l2arc_hdr_stat_remove(void);
704
705 static boolean_t l2arc_compress_buf(l2arc_buf_hdr_t *l2hdr);
706 static void l2arc_decompress_zio(zio_t *zio, arc_buf_hdr_t *hdr,
707 enum zio_compress c);
708 static void l2arc_release_cdata_buf(arc_buf_hdr_t *ab);
709
710 static uint64_t
711 buf_hash(uint64_t spa, const dva_t *dva, uint64_t birth)
712 {
713 uint8_t *vdva = (uint8_t *)dva;
714 uint64_t crc = -1ULL;
715 int i;
716
717 ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
718
719 for (i = 0; i < sizeof (dva_t); i++)
720 crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ vdva[i]) & 0xFF];
721
722 crc ^= (spa>>8) ^ birth;
723
724 return (crc);
725 }
726
727 #define BUF_EMPTY(buf) \
728 ((buf)->b_dva.dva_word[0] == 0 && \
729 (buf)->b_dva.dva_word[1] == 0 && \
730 (buf)->b_birth == 0)
827 ARCSTAT_BUMPDOWN(arcstat_hash_elements);
828
829 if (buf_hash_table.ht_table[idx] &&
830 buf_hash_table.ht_table[idx]->b_hash_next == NULL)
831 ARCSTAT_BUMPDOWN(arcstat_hash_chains);
832 }
833
834 /*
835 * Global data structures and functions for the buf kmem cache.
836 */
837 static kmem_cache_t *hdr_cache;
838 static kmem_cache_t *buf_cache;
839
840 static void
841 buf_fini(void)
842 {
843 int i;
844
845 kmem_free(buf_hash_table.ht_table,
846 (buf_hash_table.ht_mask + 1) * sizeof (void *));
847 for (i = 0; i < BUF_LOCKS; i++)
848 mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
849 kmem_cache_destroy(hdr_cache);
850 kmem_cache_destroy(buf_cache);
851 }
852
853 /*
854 * Constructor callback - called when the cache is empty
855 * and a new buf is requested.
856 */
857 /* ARGSUSED */
858 static int
859 hdr_cons(void *vbuf, void *unused, int kmflag)
860 {
861 arc_buf_hdr_t *buf = vbuf;
862
863 bzero(buf, sizeof (arc_buf_hdr_t));
864 refcount_create(&buf->b_refcnt);
865 cv_init(&buf->b_cv, NULL, CV_DEFAULT, NULL);
866 mutex_init(&buf->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
867 arc_space_consume(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS);
868
912 /*
913 * Reclaim callback -- invoked when memory is low.
914 */
915 /* ARGSUSED */
916 static void
917 hdr_recl(void *unused)
918 {
919 dprintf("hdr_recl called\n");
920 /*
921 * umem calls the reclaim func when we destroy the buf cache,
922 * which is after we do arc_fini().
923 */
924 if (!arc_dead)
925 cv_signal(&arc_reclaim_thr_cv);
926 }
927
928 static void
929 buf_init(void)
930 {
931 uint64_t *ct;
932 uint64_t hsize = 1ULL << 12;
933 int i, j;
934
935 /*
936 * The hash table is big enough to fill all of physical memory
937 * with an average 64K block size. The table will take up
938 * totalmem*sizeof(void*)/64K (eg. 128KB/GB with 8-byte pointers).
939 */
940 while (hsize * 65536 < physmem * PAGESIZE)
941 hsize <<= 1;
942 retry:
943 buf_hash_table.ht_mask = hsize - 1;
944 buf_hash_table.ht_table =
945 kmem_zalloc(hsize * sizeof (void*), KM_NOSLEEP);
946 if (buf_hash_table.ht_table == NULL) {
947 ASSERT(hsize > (1ULL << 8));
948 hsize >>= 1;
949 goto retry;
950 }
951
952 hdr_cache = kmem_cache_create("arc_buf_hdr_t", sizeof (arc_buf_hdr_t),
953 0, hdr_cons, hdr_dest, hdr_recl, NULL, NULL, 0);
954 buf_cache = kmem_cache_create("arc_buf_t", sizeof (arc_buf_t),
955 0, buf_cons, buf_dest, NULL, NULL, NULL, 0);
956
957 for (i = 0; i < 256; i++)
958 for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--)
959 *ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
960
961 for (i = 0; i < BUF_LOCKS; i++) {
962 mutex_init(&buf_hash_table.ht_locks[i].ht_lock,
963 NULL, MUTEX_DEFAULT, NULL);
964 }
965 }
966
967 #define ARC_MINTIME (hz>>4) /* 62 ms */
968
969 static void
970 arc_cksum_verify(arc_buf_t *buf)
971 {
972 zio_cksum_t zc;
973
974 if (!(zfs_flags & ZFS_DEBUG_MODIFY))
975 return;
976
977 mutex_enter(&buf->b_hdr->b_freeze_lock);
978 if (buf->b_hdr->b_freeze_cksum == NULL ||
979 (buf->b_hdr->b_flags & ARC_IO_ERROR)) {
980 mutex_exit(&buf->b_hdr->b_freeze_lock);
981 return;
982 }
983 fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc);
984 if (!ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc))
|
55 * Pages are evicted when the cache is full and there is a cache
56 * miss. Our model has a variable sized cache. It grows with
57 * high use, but also tries to react to memory pressure from the
58 * operating system: decreasing its size when system memory is
59 * tight.
60 *
61 * 3. The Megiddo and Modha model assumes a fixed page size. All
62 * elements of the cache are therefore exactly the same size. So
63 * when adjusting the cache size following a cache miss, its simply
64 * a matter of choosing a single page to evict. In our model, we
65 * have variable sized cache blocks (rangeing from 512 bytes to
66 * 128K bytes). We therefore choose a set of blocks to evict to make
67 * space for a cache miss that approximates as closely as possible
68 * the space used by the new block.
69 *
70 * See also: "ARC: A Self-Tuning, Low Overhead Replacement Cache"
71 * by N. Megiddo & D. Modha, FAST 2003
72 */
73
74 /*
75 * External users typically access ARC buffers via a hash table
76 * lookup, using the DVA, spa_t pointer value and the birth TXG
77 * number as the key. The hash value is derived by buf_hash(),
78 * which spits out a 64-bit hash index. This index is then masked
79 * with ht_mask to obtain the final index into the hash table:
80 *
81 * ,---------------- & ht_mask ----------------,
82 * 64-bit hash value | (hash table index) |
83 * |XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX|
84 *
85 * Sizing of the hash table is done at boot from the amount of
86 * physical memory. We start with a base value of 2^12 hash
87 * buckets and then evaluate whether this number, multiplied by
88 * 2^zfs_arc_ht_base_masklen (the minimum mask length), is
89 * greater than or equal to the amount of physical memory. If not,
90 * we double the number of hash buckets and repeat. Using the
91 * default settings these values translate to ~1 MB of hash tables
92 * for each 1 GB of physical memory.
93 *
94 * The locking model:
95 *
96 * A new reference to a cache buffer can be obtained in two
97 * ways: 1) via a hash table lookup using the DVA as a key,
98 * or 2) via one of the ARC lists. The arc_read() interface
99 * uses method 1, while the internal arc algorithms for
100 * adjusting the cache use method 2. We therefore provide two
101 * types of locks: 1) the hash table lock array, and 2) the
102 * arc list locks.
103 *
104 * Buffers do not have their own mutexes, rather they rely on the
105 * hash table mutexes for the bulk of their protection (i.e. most
106 * fields in the arc_buf_hdr_t are protected by these mutexes). The
107 * specific mutex is selected by taking its hash value and masking
108 * it by ht_lock_mask, which then produces an index into the mutex
109 * table. The size of the lock table is derived from the amount of
110 * physical memory, which is simply divided by
111 * 2^zfs_arc_ht_lock_shift, giving the number of locks, with a
112 * minimum of MIN_BUF_LOCKS.
113 *
114 * buf_hash_find() returns the appropriate mutex (held) when it
115 * locates the requested buffer in the hash table. It returns
116 * NULL for the mutex if the buffer was not in the table.
117 *
118 * buf_hash_remove() expects the appropriate hash mutex to be
119 * already held before it is invoked.
120 *
121 * Each arc state also has a mutex which is used to protect the
122 * buffer list associated with the state. When attempting to
123 * obtain a hash table lock while holding an arc list lock you
124 * must use: mutex_tryenter() to avoid deadlock. Also note that
125 * the active state mutex must be held before the ghost state mutex.
126 *
127 * Arc buffers may have an associated eviction callback function.
128 * This function will be invoked prior to removing the buffer (e.g.
129 * in arc_do_user_evicts()). Note however that the data associated
130 * with the buffer may be evicted prior to the callback. The callback
131 * must be made with *no locks held* (to prevent deadlock). Additionally,
132 * the users of callbacks must ensure that their private data is
210
211 static int arc_dead;
212
213 /*
214 * The arc has filled available memory and has now warmed up.
215 */
216 static boolean_t arc_warm;
217
218 /*
219 * These tunables are for performance analysis.
220 */
221 uint64_t zfs_arc_max;
222 uint64_t zfs_arc_min;
223 uint64_t zfs_arc_meta_limit = 0;
224 int zfs_arc_grow_retry = 0;
225 int zfs_arc_shrink_shift = 0;
226 int zfs_arc_p_min_shift = 0;
227 int zfs_disable_dup_eviction = 0;
228
229 /*
230 * Used to calculate the size of ARC hash tables and number of hash locks.
231 * See big theory block comment at the start of this file.
232 */
233 uint64_t zfs_arc_ht_base_masklen = 13;
234 /*
235 * We want to allocate one hash lock for every 4GB of memory with a minimum
236 * of MIN_BUF_LOCKS.
237 */
238 uint64_t zfs_arc_ht_lock_shift = 32;
239 #define MIN_BUF_LOCKS 256
240
241 /*
242 * Note that buffers can be in one of 6 states:
243 * ARC_anon - anonymous (discussed below)
244 * ARC_mru - recently used, currently cached
245 * ARC_mru_ghost - recentely used, no longer in cache
246 * ARC_mfu - frequently used, currently cached
247 * ARC_mfu_ghost - frequently used, no longer in cache
248 * ARC_l2c_only - exists in L2ARC but not other states
249 * When there are no active references to the buffer, they are
250 * are linked onto a list in one of these arc states. These are
251 * the only buffers that can be evicted or deleted. Within each
252 * state there are multiple lists, one for meta-data and one for
253 * non-meta-data. Meta-data (indirect blocks, blocks of dnodes,
254 * etc.) is tracked separately so that it can be managed more
255 * explicitly: favored over data, limited explicitly.
256 *
257 * Anonymous buffers are buffers that are not associated with
258 * a DVA. These are buffers that hold dirty block copies
259 * before they are written to stable storage. By definition,
260 * they are "ref'd" and are considered part of arc_mru
261 * that cannot be freed. Generally, they will aquire a DVA
608 /*
609 * Other sizes
610 */
611
612 #define HDR_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
613 #define L2HDR_SIZE ((int64_t)sizeof (l2arc_buf_hdr_t))
614
615 /*
616 * Hash table routines
617 */
618
619 #define HT_LOCK_PAD 64
620
621 struct ht_lock {
622 kmutex_t ht_lock;
623 #ifdef _KERNEL
624 unsigned char pad[(HT_LOCK_PAD - sizeof (kmutex_t))];
625 #endif
626 };
627
628 typedef struct buf_hash_table {
629 uint64_t ht_mask;
630 arc_buf_hdr_t **ht_table;
631 struct ht_lock *ht_locks;
632 uint64_t ht_num_locks, ht_lock_mask;
633 } buf_hash_table_t;
634
635 static buf_hash_table_t buf_hash_table;
636
637 #define BUF_HASH_INDEX(spa, dva, birth) \
638 (buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)
639 #define BUF_HASH_LOCK_NTRY(idx) \
640 (buf_hash_table.ht_locks[idx & buf_hash_table.ht_lock_mask])
641 #define BUF_HASH_LOCK(idx) (&(BUF_HASH_LOCK_NTRY(idx).ht_lock))
642 #define HDR_LOCK(hdr) \
643 (BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth)))
644
645 uint64_t zfs_crc64_table[256];
646
647 /*
648 * Level 2 ARC
649 */
650
651 #define L2ARC_WRITE_SIZE (8 * 1024 * 1024) /* initial write max */
652 #define L2ARC_HEADROOM 2 /* num of writes */
653 /*
654 * If we discover during ARC scan any buffers to be compressed, we boost
655 * our headroom for the next scanning cycle by this percentage multiple.
656 */
657 #define L2ARC_HEADROOM_BOOST 200
658 #define L2ARC_FEED_SECS 1 /* caching interval secs */
659 #define L2ARC_FEED_MIN_MS 200 /* min caching interval ms */
660
728 /* protected by l2arc_free_on_write_mtx */
729 void *l2df_data;
730 size_t l2df_size;
731 void (*l2df_func)(void *, size_t);
732 list_node_t l2df_list_node;
733 } l2arc_data_free_t;
734
735 static kmutex_t l2arc_feed_thr_lock;
736 static kcondvar_t l2arc_feed_thr_cv;
737 static uint8_t l2arc_thread_exit;
738
739 static void l2arc_read_done(zio_t *zio);
740 static void l2arc_hdr_stat_add(void);
741 static void l2arc_hdr_stat_remove(void);
742
743 static boolean_t l2arc_compress_buf(l2arc_buf_hdr_t *l2hdr);
744 static void l2arc_decompress_zio(zio_t *zio, arc_buf_hdr_t *hdr,
745 enum zio_compress c);
746 static void l2arc_release_cdata_buf(arc_buf_hdr_t *ab);
747
748 static inline uint64_t
749 buf_hash(uint64_t spa, const dva_t *dva, uint64_t birth)
750 {
751 uint8_t *vdva = (uint8_t *)dva;
752 uint64_t crc = -1ULL;
753 int i;
754
755 ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
756
757 for (i = 0; i < sizeof (dva_t); i++)
758 crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ vdva[i]) & 0xFF];
759
760 crc ^= (spa>>8) ^ birth;
761
762 return (crc);
763 }
764
765 #define BUF_EMPTY(buf) \
766 ((buf)->b_dva.dva_word[0] == 0 && \
767 (buf)->b_dva.dva_word[1] == 0 && \
768 (buf)->b_birth == 0)
865 ARCSTAT_BUMPDOWN(arcstat_hash_elements);
866
867 if (buf_hash_table.ht_table[idx] &&
868 buf_hash_table.ht_table[idx]->b_hash_next == NULL)
869 ARCSTAT_BUMPDOWN(arcstat_hash_chains);
870 }
871
872 /*
873 * Global data structures and functions for the buf kmem cache.
874 */
875 static kmem_cache_t *hdr_cache;
876 static kmem_cache_t *buf_cache;
877
878 static void
879 buf_fini(void)
880 {
881 int i;
882
883 kmem_free(buf_hash_table.ht_table,
884 (buf_hash_table.ht_mask + 1) * sizeof (void *));
885
886 for (i = 0; i < buf_hash_table.ht_num_locks; i++)
887 mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
888 kmem_free(buf_hash_table.ht_locks, sizeof (struct ht_lock) *
889 buf_hash_table.ht_num_locks);
890 kmem_cache_destroy(hdr_cache);
891 kmem_cache_destroy(buf_cache);
892 }
893
894 /*
895 * Constructor callback - called when the cache is empty
896 * and a new buf is requested.
897 */
898 /* ARGSUSED */
899 static int
900 hdr_cons(void *vbuf, void *unused, int kmflag)
901 {
902 arc_buf_hdr_t *buf = vbuf;
903
904 bzero(buf, sizeof (arc_buf_hdr_t));
905 refcount_create(&buf->b_refcnt);
906 cv_init(&buf->b_cv, NULL, CV_DEFAULT, NULL);
907 mutex_init(&buf->b_freeze_lock, NULL, MUTEX_DEFAULT, NULL);
908 arc_space_consume(sizeof (arc_buf_hdr_t), ARC_SPACE_HDRS);
909
953 /*
954 * Reclaim callback -- invoked when memory is low.
955 */
956 /* ARGSUSED */
957 static void
958 hdr_recl(void *unused)
959 {
960 dprintf("hdr_recl called\n");
961 /*
962 * umem calls the reclaim func when we destroy the buf cache,
963 * which is after we do arc_fini().
964 */
965 if (!arc_dead)
966 cv_signal(&arc_reclaim_thr_cv);
967 }
968
969 static void
970 buf_init(void)
971 {
972 uint64_t *ct;
973 uint64_t ht_masklen = 12;
974 int i, j;
975
976 while ((1ULL << (ht_masklen + zfs_arc_ht_base_masklen)) <
977 physmem * PAGESIZE)
978 ht_masklen++;
979 buf_hash_table.ht_mask = (1ULL << ht_masklen) - 1;
980 buf_hash_table.ht_table =
981 kmem_zalloc((1ULL << ht_masklen) * sizeof (void *), KM_SLEEP);
982
983 buf_hash_table.ht_num_locks = MAX((physmem * PAGESIZE) >>
984 zfs_arc_ht_lock_shift, MIN_BUF_LOCKS);
985 buf_hash_table.ht_lock_mask = buf_hash_table.ht_num_locks - 1;
986 buf_hash_table.ht_locks = kmem_zalloc(sizeof (struct ht_lock) *
987 buf_hash_table.ht_num_locks, KM_SLEEP);
988 for (i = 0; i < buf_hash_table.ht_num_locks; i++) {
989 mutex_init(&buf_hash_table.ht_locks[i].ht_lock,
990 NULL, MUTEX_DEFAULT, NULL);
991 }
992
993 hdr_cache = kmem_cache_create("arc_buf_hdr_t", sizeof (arc_buf_hdr_t),
994 0, hdr_cons, hdr_dest, hdr_recl, NULL, NULL, 0);
995 buf_cache = kmem_cache_create("arc_buf_t", sizeof (arc_buf_t),
996 0, buf_cons, buf_dest, NULL, NULL, NULL, 0);
997
998 for (i = 0; i < 256; i++)
999 for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--)
1000 *ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
1001 }
1002
1003 #define ARC_MINTIME (hz>>4) /* 62 ms */
1004
1005 static void
1006 arc_cksum_verify(arc_buf_t *buf)
1007 {
1008 zio_cksum_t zc;
1009
1010 if (!(zfs_flags & ZFS_DEBUG_MODIFY))
1011 return;
1012
1013 mutex_enter(&buf->b_hdr->b_freeze_lock);
1014 if (buf->b_hdr->b_freeze_cksum == NULL ||
1015 (buf->b_hdr->b_flags & ARC_IO_ERROR)) {
1016 mutex_exit(&buf->b_hdr->b_freeze_lock);
1017 return;
1018 }
1019 fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc);
1020 if (!ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc))
|