727
728 /* updated atomically */
729 clock_t b_arc_access;
730
731 /* self protecting */
732 refcount_t b_refcnt;
733
734 arc_callback_t *b_acb;
735 /* temporary buffer holder for in-flight compressed data */
736 void *b_tmp_cdata;
737 } l1arc_buf_hdr_t;
738
739 typedef struct l2arc_dev l2arc_dev_t;
740
741 typedef struct l2arc_buf_hdr {
742 /* protected by arc_buf_hdr mutex */
743 l2arc_dev_t *b_dev; /* L2ARC device */
744 uint64_t b_daddr; /* disk address, offset byte */
745 /* real alloc'd buffer size depending on b_compress applied */
746 int32_t b_asize;
747
748 list_node_t b_l2node;
749 } l2arc_buf_hdr_t;
750
751 struct arc_buf_hdr {
752 /* protected by hash lock */
753 dva_t b_dva;
754 uint64_t b_birth;
755 /*
756 * Even though this checksum is only set/verified when a buffer is in
757 * the L1 cache, it needs to be in the set of common fields because it
758 * must be preserved from the time before a buffer is written out to
759 * L2ARC until after it is read back in.
760 */
761 zio_cksum_t *b_freeze_cksum;
762
763 arc_buf_hdr_t *b_hash_next;
764 arc_flags_t b_flags;
765
766 /* immutable */
786 #define HDR_PREFETCH(hdr) ((hdr)->b_flags & ARC_FLAG_PREFETCH)
787 #define HDR_FREED_IN_READ(hdr) ((hdr)->b_flags & ARC_FLAG_FREED_IN_READ)
788 #define HDR_BUF_AVAILABLE(hdr) ((hdr)->b_flags & ARC_FLAG_BUF_AVAILABLE)
789
790 #define HDR_L2CACHE(hdr) ((hdr)->b_flags & ARC_FLAG_L2CACHE)
791 #define HDR_L2COMPRESS(hdr) ((hdr)->b_flags & ARC_FLAG_L2COMPRESS)
792 #define HDR_L2_READING(hdr) \
793 (((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) && \
794 ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR))
795 #define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITING)
796 #define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_FLAG_L2_EVICTED)
797 #define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD)
798
799 #define HDR_ISTYPE_METADATA(hdr) \
800 ((hdr)->b_flags & ARC_FLAG_BUFC_METADATA)
801 #define HDR_ISTYPE_DATA(hdr) (!HDR_ISTYPE_METADATA(hdr))
802
803 #define HDR_HAS_L1HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L1HDR)
804 #define HDR_HAS_L2HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR)
805
806 /* For storing compression mode in b_flags */
807 #define HDR_COMPRESS_OFFSET 24
808 #define HDR_COMPRESS_NBITS 7
809
810 #define HDR_GET_COMPRESS(hdr) ((enum zio_compress)BF32_GET(hdr->b_flags, \
811 HDR_COMPRESS_OFFSET, HDR_COMPRESS_NBITS))
812 #define HDR_SET_COMPRESS(hdr, cmp) BF32_SET(hdr->b_flags, \
813 HDR_COMPRESS_OFFSET, HDR_COMPRESS_NBITS, (cmp))
814
815 /*
816 * Other sizes
817 */
818
819 #define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
820 #define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr))
821
822 /*
823 * Hash table routines
824 */
825
826 #define HT_LOCK_PAD 64
827
828 struct ht_lock {
829 kmutex_t ht_lock;
830 #ifdef _KERNEL
831 unsigned char pad[(HT_LOCK_PAD - sizeof (kmutex_t))];
832 #endif
833 };
834
2025 return;
2026
2027 /*
2028 * The header isn't being written to the l2arc device, thus it
2029 * shouldn't have a b_tmp_cdata to free.
2030 */
2031 if (!HDR_L2_WRITING(hdr)) {
2032 ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
2033 return;
2034 }
2035
2036 /*
2037 * The header does not have compression enabled. This can be due
2038 * to the buffer not being compressible, or because we're
2039 * freeing the buffer before the second phase of
2040 * l2arc_write_buffer() has started (which does the compression
2041 * step). In either case, b_tmp_cdata does not point to a
2042 * separately compressed buffer, so there's nothing to free (it
2043 * points to the same buffer as the arc_buf_t's b_data field).
2044 */
2045 if (HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_OFF) {
2046 hdr->b_l1hdr.b_tmp_cdata = NULL;
2047 return;
2048 }
2049
2050 /*
2051 * There's nothing to free since the buffer was all zero's and
2052 * compressed to a zero length buffer.
2053 */
2054 if (HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_EMPTY) {
2055 ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
2056 return;
2057 }
2058
2059 ASSERT(L2ARC_IS_VALID_COMPRESS(HDR_GET_COMPRESS(hdr)));
2060
2061 arc_buf_free_on_write(hdr->b_l1hdr.b_tmp_cdata,
2062 hdr->b_size, zio_data_buf_free);
2063
2064 ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write);
2065 hdr->b_l1hdr.b_tmp_cdata = NULL;
2066 }
2067
2068 /*
2069 * Free up buf->b_data and if 'remove' is set, then pull the
2070 * arc_buf_t off of the the arc_buf_hdr_t's list and free it.
2071 */
2072 static void
2073 arc_buf_destroy(arc_buf_t *buf, boolean_t remove)
2074 {
2075 arc_buf_t **bufp;
2076
2077 /* free up data associated with the buf */
2078 if (buf->b_data != NULL) {
2079 arc_state_t *state = buf->b_hdr->b_l1hdr.b_state;
4152 ASSERT0(hdr->b_l1hdr.b_datacnt);
4153 hdr->b_l1hdr.b_datacnt = 1;
4154 arc_get_data_buf(buf);
4155 arc_access(hdr, hash_lock);
4156 }
4157
4158 ASSERT(!GHOST_STATE(hdr->b_l1hdr.b_state));
4159
4160 acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP);
4161 acb->acb_done = done;
4162 acb->acb_private = private;
4163
4164 ASSERT(hdr->b_l1hdr.b_acb == NULL);
4165 hdr->b_l1hdr.b_acb = acb;
4166 hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS;
4167
4168 if (HDR_HAS_L2HDR(hdr) &&
4169 (vd = hdr->b_l2hdr.b_dev->l2ad_vdev) != NULL) {
4170 devw = hdr->b_l2hdr.b_dev->l2ad_writing;
4171 addr = hdr->b_l2hdr.b_daddr;
4172 b_compress = HDR_GET_COMPRESS(hdr);
4173 b_asize = hdr->b_l2hdr.b_asize;
4174 /*
4175 * Lock out device removal.
4176 */
4177 if (vdev_is_dead(vd) ||
4178 !spa_config_tryenter(spa, SCL_L2ARC, vd, RW_READER))
4179 vd = NULL;
4180 }
4181
4182 if (hash_lock != NULL)
4183 mutex_exit(hash_lock);
4184
4185 /*
4186 * At this point, we have a level 1 cache miss. Try again in
4187 * L2ARC if possible.
4188 */
4189 ASSERT3U(hdr->b_size, ==, size);
4190 DTRACE_PROBE4(arc__miss, arc_buf_hdr_t *, hdr, blkptr_t *, bp,
4191 uint64_t, size, zbookmark_phys_t *, zb);
4192 ARCSTAT_BUMP(arcstat_misses);
5613 ASSERT(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE);
5614
5615 spa_config_exit(zio->io_spa, SCL_L2ARC, zio->io_vd);
5616
5617 cb = zio->io_private;
5618 ASSERT(cb != NULL);
5619 buf = cb->l2rcb_buf;
5620 ASSERT(buf != NULL);
5621
5622 hash_lock = HDR_LOCK(buf->b_hdr);
5623 mutex_enter(hash_lock);
5624 hdr = buf->b_hdr;
5625 ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
5626
5627 /*
5628 * If the buffer was compressed, decompress it first.
5629 */
5630 if (cb->l2rcb_compress != ZIO_COMPRESS_OFF)
5631 l2arc_decompress_zio(zio, hdr, cb->l2rcb_compress);
5632 ASSERT(zio->io_data != NULL);
5633
5634 /*
5635 * Check this survived the L2ARC journey.
5636 */
5637 equal = arc_cksum_equal(buf);
5638 if (equal && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) {
5639 mutex_exit(hash_lock);
5640 zio->io_private = buf;
5641 zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */
5642 zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */
5643 arc_read_done(zio);
5644 } else {
5645 mutex_exit(hash_lock);
5646 /*
5647 * Buffer didn't survive caching. Increment stats and
5648 * reissue to the original storage device.
5649 */
5650 if (zio->io_error != 0) {
5651 ARCSTAT_BUMP(arcstat_l2_io_error);
5652 } else {
5653 zio->io_error = SET_ERROR(EIO);
5654 }
5655 if (!equal)
5656 ARCSTAT_BUMP(arcstat_l2_cksum_bad);
5657
5658 /*
5659 * If there's no waiter, issue an async i/o to the primary
5660 * storage now. If there *is* a waiter, the caller must
5661 * issue the i/o in a context where it's OK to block.
5662 */
5663 if (zio->io_waiter == NULL) {
5664 zio_t *pio = zio_unique_parent(zio);
5665
5666 ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL);
5667
5668 zio_nowait(zio_read(pio, cb->l2rcb_spa, &cb->l2rcb_bp,
5669 buf->b_data, zio->io_size, arc_read_done, buf,
5670 zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb));
5671 }
5672 }
5673
5674 kmem_free(cb, sizeof (l2arc_read_callback_t));
5675 }
5676
5677 /*
5678 * This is the list priority from which the L2ARC will search for pages to
5679 * cache. This is used within loops (0..3) to cycle through lists in the
5680 * desired order. This order can have a significant effect on cache
5681 * performance.
5682 *
5683 * Currently the metadata lists are hit first, MFU then MRU, followed by
5684 * the data lists. This function returns a locked list, and also returns
5685 * the lock pointer.
5686 */
5687 static multilist_sublist_t *
5688 l2arc_sublist_lock(int list_num)
5689 {
5947 sizeof (l2arc_write_callback_t), KM_SLEEP);
5948 cb->l2wcb_dev = dev;
5949 cb->l2wcb_head = head;
5950 pio = zio_root(spa, l2arc_write_done, cb,
5951 ZIO_FLAG_CANFAIL);
5952 }
5953
5954 /*
5955 * Create and add a new L2ARC header.
5956 */
5957 hdr->b_l2hdr.b_dev = dev;
5958 hdr->b_flags |= ARC_FLAG_L2_WRITING;
5959 /*
5960 * Temporarily stash the data buffer in b_tmp_cdata.
5961 * The subsequent write step will pick it up from
5962 * there. This is because can't access b_l1hdr.b_buf
5963 * without holding the hash_lock, which we in turn
5964 * can't access without holding the ARC list locks
5965 * (which we want to avoid during compression/writing).
5966 */
5967 HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
5968 hdr->b_l2hdr.b_asize = hdr->b_size;
5969 hdr->b_l1hdr.b_tmp_cdata = hdr->b_l1hdr.b_buf->b_data;
5970
5971 /*
5972 * Explicitly set the b_daddr field to a known
5973 * value which means "invalid address". This
5974 * enables us to differentiate which stage of
5975 * l2arc_write_buffers() the particular header
5976 * is in (e.g. this loop, or the one below).
5977 * ARC_FLAG_L2_WRITING is not enough to make
5978 * this distinction, and we need to know in
5979 * order to do proper l2arc vdev accounting in
5980 * arc_release() and arc_hdr_destroy().
5981 *
5982 * Note, we can't use a new flag to distinguish
5983 * the two stages because we don't hold the
5984 * header's hash_lock below, in the second stage
5985 * of this function. Thus, we can't simply
5986 * change the b_flags field to denote that the
5987 * IO has been sent. We can change the b_daddr
6137 * device. To indicate this situation b_tmp_cdata is NULL'ed, b_asize is
6138 * set to zero and b_compress is set to ZIO_COMPRESS_EMPTY.
6139 * *) Compression succeeded and b_tmp_cdata was replaced with a temporary
6140 * data buffer which holds the compressed data to be written, and b_asize
6141 * tells us how much data there is. b_compress is set to the appropriate
6142 * compression algorithm. Once writing is done, invoke
6143 * l2arc_release_cdata_buf on this l2hdr to free this temporary buffer.
6144 *
6145 * Returns B_TRUE if compression succeeded, or B_FALSE if it didn't (the
6146 * buffer was incompressible).
6147 */
6148 static boolean_t
6149 l2arc_compress_buf(arc_buf_hdr_t *hdr)
6150 {
6151 void *cdata;
6152 size_t csize, len, rounded;
6153 ASSERT(HDR_HAS_L2HDR(hdr));
6154 l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
6155
6156 ASSERT(HDR_HAS_L1HDR(hdr));
6157 ASSERT(HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_OFF);
6158 ASSERT(hdr->b_l1hdr.b_tmp_cdata != NULL);
6159
6160 len = l2hdr->b_asize;
6161 cdata = zio_data_buf_alloc(len);
6162 ASSERT3P(cdata, !=, NULL);
6163 csize = zio_compress_data(ZIO_COMPRESS_LZ4, hdr->b_l1hdr.b_tmp_cdata,
6164 cdata, l2hdr->b_asize);
6165
6166 rounded = P2ROUNDUP(csize, (size_t)SPA_MINBLOCKSIZE);
6167 if (rounded > csize) {
6168 bzero((char *)cdata + csize, rounded - csize);
6169 csize = rounded;
6170 }
6171
6172 if (csize == 0) {
6173 /* zero block, indicate that there's nothing to write */
6174 zio_data_buf_free(cdata, len);
6175 HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_EMPTY);
6176 l2hdr->b_asize = 0;
6177 hdr->b_l1hdr.b_tmp_cdata = NULL;
6178 ARCSTAT_BUMP(arcstat_l2_compress_zeros);
6179 return (B_TRUE);
6180 } else if (csize > 0 && csize < len) {
6181 /*
6182 * Compression succeeded, we'll keep the cdata around for
6183 * writing and release it afterwards.
6184 */
6185 HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_LZ4);
6186 l2hdr->b_asize = csize;
6187 hdr->b_l1hdr.b_tmp_cdata = cdata;
6188 ARCSTAT_BUMP(arcstat_l2_compress_successes);
6189 return (B_TRUE);
6190 } else {
6191 /*
6192 * Compression failed, release the compressed buffer.
6193 * l2hdr will be left unmodified.
6194 */
6195 zio_data_buf_free(cdata, len);
6196 ARCSTAT_BUMP(arcstat_l2_compress_failures);
6197 return (B_FALSE);
6198 }
6199 }
6200
6201 /*
6202 * Decompresses a zio read back from an l2arc device. On success, the
6203 * underlying zio's io_data buffer is overwritten by the uncompressed
6204 * version. On decompression error (corrupt compressed stream), the
6205 * zio->io_error value is set to signal an I/O error.
6252 bcopy(zio->io_data, cdata, csize);
6253 if (zio_decompress_data(c, cdata, zio->io_data, csize,
6254 hdr->b_size) != 0)
6255 zio->io_error = EIO;
6256 zio_data_buf_free(cdata, csize);
6257 }
6258
6259 /* Restore the expected uncompressed IO size. */
6260 zio->io_orig_size = zio->io_size = hdr->b_size;
6261 }
6262
6263 /*
6264 * Releases the temporary b_tmp_cdata buffer in an l2arc header structure.
6265 * This buffer serves as a temporary holder of compressed data while
6266 * the buffer entry is being written to an l2arc device. Once that is
6267 * done, we can dispose of it.
6268 */
6269 static void
6270 l2arc_release_cdata_buf(arc_buf_hdr_t *hdr)
6271 {
6272 enum zio_compress comp = HDR_GET_COMPRESS(hdr);
6273
6274 ASSERT(HDR_HAS_L1HDR(hdr));
6275 ASSERT(comp == ZIO_COMPRESS_OFF || L2ARC_IS_VALID_COMPRESS(comp));
6276
6277 if (comp == ZIO_COMPRESS_OFF) {
6278 /*
6279 * In this case, b_tmp_cdata points to the same buffer
6280 * as the arc_buf_t's b_data field. We don't want to
6281 * free it, since the arc_buf_t will handle that.
6282 */
6283 hdr->b_l1hdr.b_tmp_cdata = NULL;
6284 } else if (comp == ZIO_COMPRESS_EMPTY) {
6285 /*
6286 * In this case, b_tmp_cdata was compressed to an empty
6287 * buffer, thus there's nothing to free and b_tmp_cdata
6288 * should have been set to NULL in l2arc_write_buffers().
6289 */
6290 ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
6291 } else {
6292 /*
|
727
728 /* updated atomically */
729 clock_t b_arc_access;
730
731 /* self protecting */
732 refcount_t b_refcnt;
733
734 arc_callback_t *b_acb;
735 /* temporary buffer holder for in-flight compressed data */
736 void *b_tmp_cdata;
737 } l1arc_buf_hdr_t;
738
739 typedef struct l2arc_dev l2arc_dev_t;
740
741 typedef struct l2arc_buf_hdr {
742 /* protected by arc_buf_hdr mutex */
743 l2arc_dev_t *b_dev; /* L2ARC device */
744 uint64_t b_daddr; /* disk address, offset byte */
745 /* real alloc'd buffer size depending on b_compress applied */
746 int32_t b_asize;
747 uint8_t b_compress;
748
749 list_node_t b_l2node;
750 } l2arc_buf_hdr_t;
751
752 struct arc_buf_hdr {
753 /* protected by hash lock */
754 dva_t b_dva;
755 uint64_t b_birth;
756 /*
757 * Even though this checksum is only set/verified when a buffer is in
758 * the L1 cache, it needs to be in the set of common fields because it
759 * must be preserved from the time before a buffer is written out to
760 * L2ARC until after it is read back in.
761 */
762 zio_cksum_t *b_freeze_cksum;
763
764 arc_buf_hdr_t *b_hash_next;
765 arc_flags_t b_flags;
766
767 /* immutable */
787 #define HDR_PREFETCH(hdr) ((hdr)->b_flags & ARC_FLAG_PREFETCH)
788 #define HDR_FREED_IN_READ(hdr) ((hdr)->b_flags & ARC_FLAG_FREED_IN_READ)
789 #define HDR_BUF_AVAILABLE(hdr) ((hdr)->b_flags & ARC_FLAG_BUF_AVAILABLE)
790
791 #define HDR_L2CACHE(hdr) ((hdr)->b_flags & ARC_FLAG_L2CACHE)
792 #define HDR_L2COMPRESS(hdr) ((hdr)->b_flags & ARC_FLAG_L2COMPRESS)
793 #define HDR_L2_READING(hdr) \
794 (((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) && \
795 ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR))
796 #define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITING)
797 #define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_FLAG_L2_EVICTED)
798 #define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD)
799
800 #define HDR_ISTYPE_METADATA(hdr) \
801 ((hdr)->b_flags & ARC_FLAG_BUFC_METADATA)
802 #define HDR_ISTYPE_DATA(hdr) (!HDR_ISTYPE_METADATA(hdr))
803
804 #define HDR_HAS_L1HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L1HDR)
805 #define HDR_HAS_L2HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR)
806
807 /*
808 * Other sizes
809 */
810
811 #define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
812 #define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr))
813
814 /*
815 * Hash table routines
816 */
817
818 #define HT_LOCK_PAD 64
819
820 struct ht_lock {
821 kmutex_t ht_lock;
822 #ifdef _KERNEL
823 unsigned char pad[(HT_LOCK_PAD - sizeof (kmutex_t))];
824 #endif
825 };
826
2017 return;
2018
2019 /*
2020 * The header isn't being written to the l2arc device, thus it
2021 * shouldn't have a b_tmp_cdata to free.
2022 */
2023 if (!HDR_L2_WRITING(hdr)) {
2024 ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
2025 return;
2026 }
2027
2028 /*
2029 * The header does not have compression enabled. This can be due
2030 * to the buffer not being compressible, or because we're
2031 * freeing the buffer before the second phase of
2032 * l2arc_write_buffer() has started (which does the compression
2033 * step). In either case, b_tmp_cdata does not point to a
2034 * separately compressed buffer, so there's nothing to free (it
2035 * points to the same buffer as the arc_buf_t's b_data field).
2036 */
2037 if (hdr->b_l2hdr.b_compress == ZIO_COMPRESS_OFF) {
2038 hdr->b_l1hdr.b_tmp_cdata = NULL;
2039 return;
2040 }
2041
2042 /*
2043 * There's nothing to free since the buffer was all zero's and
2044 * compressed to a zero length buffer.
2045 */
2046 if (hdr->b_l2hdr.b_compress == ZIO_COMPRESS_EMPTY) {
2047 ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
2048 return;
2049 }
2050
2051 ASSERT(L2ARC_IS_VALID_COMPRESS(hdr->b_l2hdr.b_compress));
2052
2053 arc_buf_free_on_write(hdr->b_l1hdr.b_tmp_cdata,
2054 hdr->b_size, zio_data_buf_free);
2055
2056 ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write);
2057 hdr->b_l1hdr.b_tmp_cdata = NULL;
2058 }
2059
2060 /*
2061 * Free up buf->b_data and if 'remove' is set, then pull the
2062 * arc_buf_t off of the the arc_buf_hdr_t's list and free it.
2063 */
2064 static void
2065 arc_buf_destroy(arc_buf_t *buf, boolean_t remove)
2066 {
2067 arc_buf_t **bufp;
2068
2069 /* free up data associated with the buf */
2070 if (buf->b_data != NULL) {
2071 arc_state_t *state = buf->b_hdr->b_l1hdr.b_state;
4144 ASSERT0(hdr->b_l1hdr.b_datacnt);
4145 hdr->b_l1hdr.b_datacnt = 1;
4146 arc_get_data_buf(buf);
4147 arc_access(hdr, hash_lock);
4148 }
4149
4150 ASSERT(!GHOST_STATE(hdr->b_l1hdr.b_state));
4151
4152 acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP);
4153 acb->acb_done = done;
4154 acb->acb_private = private;
4155
4156 ASSERT(hdr->b_l1hdr.b_acb == NULL);
4157 hdr->b_l1hdr.b_acb = acb;
4158 hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS;
4159
4160 if (HDR_HAS_L2HDR(hdr) &&
4161 (vd = hdr->b_l2hdr.b_dev->l2ad_vdev) != NULL) {
4162 devw = hdr->b_l2hdr.b_dev->l2ad_writing;
4163 addr = hdr->b_l2hdr.b_daddr;
4164 b_compress = hdr->b_l2hdr.b_compress;
4165 b_asize = hdr->b_l2hdr.b_asize;
4166 /*
4167 * Lock out device removal.
4168 */
4169 if (vdev_is_dead(vd) ||
4170 !spa_config_tryenter(spa, SCL_L2ARC, vd, RW_READER))
4171 vd = NULL;
4172 }
4173
4174 if (hash_lock != NULL)
4175 mutex_exit(hash_lock);
4176
4177 /*
4178 * At this point, we have a level 1 cache miss. Try again in
4179 * L2ARC if possible.
4180 */
4181 ASSERT3U(hdr->b_size, ==, size);
4182 DTRACE_PROBE4(arc__miss, arc_buf_hdr_t *, hdr, blkptr_t *, bp,
4183 uint64_t, size, zbookmark_phys_t *, zb);
4184 ARCSTAT_BUMP(arcstat_misses);
5605 ASSERT(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE);
5606
5607 spa_config_exit(zio->io_spa, SCL_L2ARC, zio->io_vd);
5608
5609 cb = zio->io_private;
5610 ASSERT(cb != NULL);
5611 buf = cb->l2rcb_buf;
5612 ASSERT(buf != NULL);
5613
5614 hash_lock = HDR_LOCK(buf->b_hdr);
5615 mutex_enter(hash_lock);
5616 hdr = buf->b_hdr;
5617 ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
5618
5619 /*
5620 * If the buffer was compressed, decompress it first.
5621 */
5622 if (cb->l2rcb_compress != ZIO_COMPRESS_OFF)
5623 l2arc_decompress_zio(zio, hdr, cb->l2rcb_compress);
5624 ASSERT(zio->io_data != NULL);
5625 ASSERT3U(zio->io_size, ==, hdr->b_size);
5626 ASSERT3U(BP_GET_LSIZE(&cb->l2rcb_bp), ==, hdr->b_size);
5627
5628 /*
5629 * Check this survived the L2ARC journey.
5630 */
5631 equal = arc_cksum_equal(buf);
5632 if (equal && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) {
5633 mutex_exit(hash_lock);
5634 zio->io_private = buf;
5635 zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */
5636 zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */
5637 arc_read_done(zio);
5638 } else {
5639 mutex_exit(hash_lock);
5640 /*
5641 * Buffer didn't survive caching. Increment stats and
5642 * reissue to the original storage device.
5643 */
5644 if (zio->io_error != 0) {
5645 ARCSTAT_BUMP(arcstat_l2_io_error);
5646 } else {
5647 zio->io_error = SET_ERROR(EIO);
5648 }
5649 if (!equal)
5650 ARCSTAT_BUMP(arcstat_l2_cksum_bad);
5651
5652 /*
5653 * If there's no waiter, issue an async i/o to the primary
5654 * storage now. If there *is* a waiter, the caller must
5655 * issue the i/o in a context where it's OK to block.
5656 */
5657 if (zio->io_waiter == NULL) {
5658 zio_t *pio = zio_unique_parent(zio);
5659
5660 ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL);
5661
5662 zio_nowait(zio_read(pio, cb->l2rcb_spa, &cb->l2rcb_bp,
5663 buf->b_data, hdr->b_size, arc_read_done, buf,
5664 zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb));
5665 }
5666 }
5667
5668 kmem_free(cb, sizeof (l2arc_read_callback_t));
5669 }
5670
5671 /*
5672 * This is the list priority from which the L2ARC will search for pages to
5673 * cache. This is used within loops (0..3) to cycle through lists in the
5674 * desired order. This order can have a significant effect on cache
5675 * performance.
5676 *
5677 * Currently the metadata lists are hit first, MFU then MRU, followed by
5678 * the data lists. This function returns a locked list, and also returns
5679 * the lock pointer.
5680 */
5681 static multilist_sublist_t *
5682 l2arc_sublist_lock(int list_num)
5683 {
5941 sizeof (l2arc_write_callback_t), KM_SLEEP);
5942 cb->l2wcb_dev = dev;
5943 cb->l2wcb_head = head;
5944 pio = zio_root(spa, l2arc_write_done, cb,
5945 ZIO_FLAG_CANFAIL);
5946 }
5947
5948 /*
5949 * Create and add a new L2ARC header.
5950 */
5951 hdr->b_l2hdr.b_dev = dev;
5952 hdr->b_flags |= ARC_FLAG_L2_WRITING;
5953 /*
5954 * Temporarily stash the data buffer in b_tmp_cdata.
5955 * The subsequent write step will pick it up from
5956 * there. This is because can't access b_l1hdr.b_buf
5957 * without holding the hash_lock, which we in turn
5958 * can't access without holding the ARC list locks
5959 * (which we want to avoid during compression/writing).
5960 */
5961 hdr->b_l2hdr.b_compress = ZIO_COMPRESS_OFF;
5962 hdr->b_l2hdr.b_asize = hdr->b_size;
5963 hdr->b_l1hdr.b_tmp_cdata = hdr->b_l1hdr.b_buf->b_data;
5964
5965 /*
5966 * Explicitly set the b_daddr field to a known
5967 * value which means "invalid address". This
5968 * enables us to differentiate which stage of
5969 * l2arc_write_buffers() the particular header
5970 * is in (e.g. this loop, or the one below).
5971 * ARC_FLAG_L2_WRITING is not enough to make
5972 * this distinction, and we need to know in
5973 * order to do proper l2arc vdev accounting in
5974 * arc_release() and arc_hdr_destroy().
5975 *
5976 * Note, we can't use a new flag to distinguish
5977 * the two stages because we don't hold the
5978 * header's hash_lock below, in the second stage
5979 * of this function. Thus, we can't simply
5980 * change the b_flags field to denote that the
5981 * IO has been sent. We can change the b_daddr
6131 * device. To indicate this situation b_tmp_cdata is NULL'ed, b_asize is
6132 * set to zero and b_compress is set to ZIO_COMPRESS_EMPTY.
6133 * *) Compression succeeded and b_tmp_cdata was replaced with a temporary
6134 * data buffer which holds the compressed data to be written, and b_asize
6135 * tells us how much data there is. b_compress is set to the appropriate
6136 * compression algorithm. Once writing is done, invoke
6137 * l2arc_release_cdata_buf on this l2hdr to free this temporary buffer.
6138 *
6139 * Returns B_TRUE if compression succeeded, or B_FALSE if it didn't (the
6140 * buffer was incompressible).
6141 */
6142 static boolean_t
6143 l2arc_compress_buf(arc_buf_hdr_t *hdr)
6144 {
6145 void *cdata;
6146 size_t csize, len, rounded;
6147 ASSERT(HDR_HAS_L2HDR(hdr));
6148 l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
6149
6150 ASSERT(HDR_HAS_L1HDR(hdr));
6151 ASSERT(l2hdr->b_compress == ZIO_COMPRESS_OFF);
6152 ASSERT(hdr->b_l1hdr.b_tmp_cdata != NULL);
6153
6154 len = l2hdr->b_asize;
6155 cdata = zio_data_buf_alloc(len);
6156 ASSERT3P(cdata, !=, NULL);
6157 csize = zio_compress_data(ZIO_COMPRESS_LZ4, hdr->b_l1hdr.b_tmp_cdata,
6158 cdata, l2hdr->b_asize);
6159
6160 rounded = P2ROUNDUP(csize, (size_t)SPA_MINBLOCKSIZE);
6161 if (rounded > csize) {
6162 bzero((char *)cdata + csize, rounded - csize);
6163 csize = rounded;
6164 }
6165
6166 if (csize == 0) {
6167 /* zero block, indicate that there's nothing to write */
6168 zio_data_buf_free(cdata, len);
6169 l2hdr->b_compress = ZIO_COMPRESS_EMPTY;
6170 l2hdr->b_asize = 0;
6171 hdr->b_l1hdr.b_tmp_cdata = NULL;
6172 ARCSTAT_BUMP(arcstat_l2_compress_zeros);
6173 return (B_TRUE);
6174 } else if (csize > 0 && csize < len) {
6175 /*
6176 * Compression succeeded, we'll keep the cdata around for
6177 * writing and release it afterwards.
6178 */
6179 l2hdr->b_compress = ZIO_COMPRESS_LZ4;
6180 l2hdr->b_asize = csize;
6181 hdr->b_l1hdr.b_tmp_cdata = cdata;
6182 ARCSTAT_BUMP(arcstat_l2_compress_successes);
6183 return (B_TRUE);
6184 } else {
6185 /*
6186 * Compression failed, release the compressed buffer.
6187 * l2hdr will be left unmodified.
6188 */
6189 zio_data_buf_free(cdata, len);
6190 ARCSTAT_BUMP(arcstat_l2_compress_failures);
6191 return (B_FALSE);
6192 }
6193 }
6194
6195 /*
6196 * Decompresses a zio read back from an l2arc device. On success, the
6197 * underlying zio's io_data buffer is overwritten by the uncompressed
6198 * version. On decompression error (corrupt compressed stream), the
6199 * zio->io_error value is set to signal an I/O error.
6246 bcopy(zio->io_data, cdata, csize);
6247 if (zio_decompress_data(c, cdata, zio->io_data, csize,
6248 hdr->b_size) != 0)
6249 zio->io_error = EIO;
6250 zio_data_buf_free(cdata, csize);
6251 }
6252
6253 /* Restore the expected uncompressed IO size. */
6254 zio->io_orig_size = zio->io_size = hdr->b_size;
6255 }
6256
6257 /*
6258 * Releases the temporary b_tmp_cdata buffer in an l2arc header structure.
6259 * This buffer serves as a temporary holder of compressed data while
6260 * the buffer entry is being written to an l2arc device. Once that is
6261 * done, we can dispose of it.
6262 */
6263 static void
6264 l2arc_release_cdata_buf(arc_buf_hdr_t *hdr)
6265 {
6266 ASSERT(HDR_HAS_L2HDR(hdr));
6267 enum zio_compress comp = hdr->b_l2hdr.b_compress;
6268
6269 ASSERT(HDR_HAS_L1HDR(hdr));
6270 ASSERT(comp == ZIO_COMPRESS_OFF || L2ARC_IS_VALID_COMPRESS(comp));
6271
6272 if (comp == ZIO_COMPRESS_OFF) {
6273 /*
6274 * In this case, b_tmp_cdata points to the same buffer
6275 * as the arc_buf_t's b_data field. We don't want to
6276 * free it, since the arc_buf_t will handle that.
6277 */
6278 hdr->b_l1hdr.b_tmp_cdata = NULL;
6279 } else if (comp == ZIO_COMPRESS_EMPTY) {
6280 /*
6281 * In this case, b_tmp_cdata was compressed to an empty
6282 * buffer, thus there's nothing to free and b_tmp_cdata
6283 * should have been set to NULL in l2arc_write_buffers().
6284 */
6285 ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
6286 } else {
6287 /*
|