Print this page
6214 zpools going south

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/arc.c
          +++ new/usr/src/uts/common/fs/zfs/arc.c
↓ open down ↓ 736 lines elided ↑ open up ↑
 737  737  } l1arc_buf_hdr_t;
 738  738  
 739  739  typedef struct l2arc_dev l2arc_dev_t;
 740  740  
 741  741  typedef struct l2arc_buf_hdr {
 742  742          /* protected by arc_buf_hdr mutex */
 743  743          l2arc_dev_t             *b_dev;         /* L2ARC device */
 744  744          uint64_t                b_daddr;        /* disk address, offset byte */
 745  745          /* real alloc'd buffer size depending on b_compress applied */
 746  746          int32_t                 b_asize;
      747 +        uint8_t                 b_compress;
 747  748  
 748  749          list_node_t             b_l2node;
 749  750  } l2arc_buf_hdr_t;
 750  751  
 751  752  struct arc_buf_hdr {
 752  753          /* protected by hash lock */
 753  754          dva_t                   b_dva;
 754  755          uint64_t                b_birth;
 755  756          /*
 756  757           * Even though this checksum is only set/verified when a buffer is in
↓ open down ↓ 39 lines elided ↑ open up ↑
 796  797  #define HDR_L2_EVICTED(hdr)     ((hdr)->b_flags & ARC_FLAG_L2_EVICTED)
 797  798  #define HDR_L2_WRITE_HEAD(hdr)  ((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD)
 798  799  
 799  800  #define HDR_ISTYPE_METADATA(hdr)        \
 800  801              ((hdr)->b_flags & ARC_FLAG_BUFC_METADATA)
 801  802  #define HDR_ISTYPE_DATA(hdr)    (!HDR_ISTYPE_METADATA(hdr))
 802  803  
 803  804  #define HDR_HAS_L1HDR(hdr)      ((hdr)->b_flags & ARC_FLAG_HAS_L1HDR)
 804  805  #define HDR_HAS_L2HDR(hdr)      ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR)
 805  806  
 806      -/* For storing compression mode in b_flags */
 807      -#define HDR_COMPRESS_OFFSET     24
 808      -#define HDR_COMPRESS_NBITS      7
 809      -
 810      -#define HDR_GET_COMPRESS(hdr)   ((enum zio_compress)BF32_GET(hdr->b_flags, \
 811      -            HDR_COMPRESS_OFFSET, HDR_COMPRESS_NBITS))
 812      -#define HDR_SET_COMPRESS(hdr, cmp) BF32_SET(hdr->b_flags, \
 813      -            HDR_COMPRESS_OFFSET, HDR_COMPRESS_NBITS, (cmp))
 814      -
 815  807  /*
 816  808   * Other sizes
 817  809   */
 818  810  
 819  811  #define HDR_FULL_SIZE ((int64_t)sizeof (arc_buf_hdr_t))
 820  812  #define HDR_L2ONLY_SIZE ((int64_t)offsetof(arc_buf_hdr_t, b_l1hdr))
 821  813  
 822  814  /*
 823  815   * Hash table routines
 824  816   */
↓ open down ↓ 1210 lines elided ↑ open up ↑
2035 2027  
2036 2028          /*
2037 2029           * The header does not have compression enabled. This can be due
2038 2030           * to the buffer not being compressible, or because we're
2039 2031           * freeing the buffer before the second phase of
2040 2032           * l2arc_write_buffer() has started (which does the compression
2041 2033           * step). In either case, b_tmp_cdata does not point to a
2042 2034           * separately compressed buffer, so there's nothing to free (it
2043 2035           * points to the same buffer as the arc_buf_t's b_data field).
2044 2036           */
2045      -        if (HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_OFF) {
     2037 +        if (hdr->b_l2hdr.b_compress == ZIO_COMPRESS_OFF) {
2046 2038                  hdr->b_l1hdr.b_tmp_cdata = NULL;
2047 2039                  return;
2048 2040          }
2049 2041  
2050 2042          /*
2051 2043           * There's nothing to free since the buffer was all zero's and
2052 2044           * compressed to a zero length buffer.
2053 2045           */
2054      -        if (HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_EMPTY) {
     2046 +        if (hdr->b_l2hdr.b_compress == ZIO_COMPRESS_EMPTY) {
2055 2047                  ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
2056 2048                  return;
2057 2049          }
2058 2050  
2059      -        ASSERT(L2ARC_IS_VALID_COMPRESS(HDR_GET_COMPRESS(hdr)));
     2051 +        ASSERT(L2ARC_IS_VALID_COMPRESS(hdr->b_l2hdr.b_compress));
2060 2052  
2061 2053          arc_buf_free_on_write(hdr->b_l1hdr.b_tmp_cdata,
2062 2054              hdr->b_size, zio_data_buf_free);
2063 2055  
2064 2056          ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write);
2065 2057          hdr->b_l1hdr.b_tmp_cdata = NULL;
2066 2058  }
2067 2059  
2068 2060  /*
2069 2061   * Free up buf->b_data and if 'remove' is set, then pull the
↓ open down ↓ 2092 lines elided ↑ open up ↑
4162 4154                  acb->acb_private = private;
4163 4155  
4164 4156                  ASSERT(hdr->b_l1hdr.b_acb == NULL);
4165 4157                  hdr->b_l1hdr.b_acb = acb;
4166 4158                  hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS;
4167 4159  
4168 4160                  if (HDR_HAS_L2HDR(hdr) &&
4169 4161                      (vd = hdr->b_l2hdr.b_dev->l2ad_vdev) != NULL) {
4170 4162                          devw = hdr->b_l2hdr.b_dev->l2ad_writing;
4171 4163                          addr = hdr->b_l2hdr.b_daddr;
4172      -                        b_compress = HDR_GET_COMPRESS(hdr);
     4164 +                        b_compress = hdr->b_l2hdr.b_compress;
4173 4165                          b_asize = hdr->b_l2hdr.b_asize;
4174 4166                          /*
4175 4167                           * Lock out device removal.
4176 4168                           */
4177 4169                          if (vdev_is_dead(vd) ||
4178 4170                              !spa_config_tryenter(spa, SCL_L2ARC, vd, RW_READER))
4179 4171                                  vd = NULL;
4180 4172                  }
4181 4173  
4182 4174                  if (hash_lock != NULL)
↓ open down ↓ 1440 lines elided ↑ open up ↑
5623 5615          mutex_enter(hash_lock);
5624 5616          hdr = buf->b_hdr;
5625 5617          ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
5626 5618  
5627 5619          /*
5628 5620           * If the buffer was compressed, decompress it first.
5629 5621           */
5630 5622          if (cb->l2rcb_compress != ZIO_COMPRESS_OFF)
5631 5623                  l2arc_decompress_zio(zio, hdr, cb->l2rcb_compress);
5632 5624          ASSERT(zio->io_data != NULL);
     5625 +        ASSERT3U(zio->io_size, ==, hdr->b_size);
     5626 +        ASSERT3U(BP_GET_LSIZE(&cb->l2rcb_bp), ==, hdr->b_size);
5633 5627  
5634 5628          /*
5635 5629           * Check this survived the L2ARC journey.
5636 5630           */
5637 5631          equal = arc_cksum_equal(buf);
5638 5632          if (equal && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) {
5639 5633                  mutex_exit(hash_lock);
5640 5634                  zio->io_private = buf;
5641 5635                  zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */
5642 5636                  zio->io_bp = &zio->io_bp_copy;  /* XXX fix in L2ARC 2.0 */
↓ open down ↓ 16 lines elided ↑ open up ↑
5659 5653                   * If there's no waiter, issue an async i/o to the primary
5660 5654                   * storage now.  If there *is* a waiter, the caller must
5661 5655                   * issue the i/o in a context where it's OK to block.
5662 5656                   */
5663 5657                  if (zio->io_waiter == NULL) {
5664 5658                          zio_t *pio = zio_unique_parent(zio);
5665 5659  
5666 5660                          ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL);
5667 5661  
5668 5662                          zio_nowait(zio_read(pio, cb->l2rcb_spa, &cb->l2rcb_bp,
5669      -                            buf->b_data, zio->io_size, arc_read_done, buf,
     5663 +                            buf->b_data, hdr->b_size, arc_read_done, buf,
5670 5664                              zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb));
5671 5665                  }
5672 5666          }
5673 5667  
5674 5668          kmem_free(cb, sizeof (l2arc_read_callback_t));
5675 5669  }
5676 5670  
5677 5671  /*
5678 5672   * This is the list priority from which the L2ARC will search for pages to
5679 5673   * cache.  This is used within loops (0..3) to cycle through lists in the
↓ open down ↓ 277 lines elided ↑ open up ↑
5957 5951                          hdr->b_l2hdr.b_dev = dev;
5958 5952                          hdr->b_flags |= ARC_FLAG_L2_WRITING;
5959 5953                          /*
5960 5954                           * Temporarily stash the data buffer in b_tmp_cdata.
5961 5955                           * The subsequent write step will pick it up from
5962 5956                           * there. This is because can't access b_l1hdr.b_buf
5963 5957                           * without holding the hash_lock, which we in turn
5964 5958                           * can't access without holding the ARC list locks
5965 5959                           * (which we want to avoid during compression/writing).
5966 5960                           */
5967      -                        HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
     5961 +                        hdr->b_l2hdr.b_compress = ZIO_COMPRESS_OFF;
5968 5962                          hdr->b_l2hdr.b_asize = hdr->b_size;
5969 5963                          hdr->b_l1hdr.b_tmp_cdata = hdr->b_l1hdr.b_buf->b_data;
5970 5964  
5971 5965                          /*
5972 5966                           * Explicitly set the b_daddr field to a known
5973 5967                           * value which means "invalid address". This
5974 5968                           * enables us to differentiate which stage of
5975 5969                           * l2arc_write_buffers() the particular header
5976 5970                           * is in (e.g. this loop, or the one below).
5977 5971                           * ARC_FLAG_L2_WRITING is not enough to make
↓ open down ↓ 169 lines elided ↑ open up ↑
6147 6141   */
6148 6142  static boolean_t
6149 6143  l2arc_compress_buf(arc_buf_hdr_t *hdr)
6150 6144  {
6151 6145          void *cdata;
6152 6146          size_t csize, len, rounded;
6153 6147          ASSERT(HDR_HAS_L2HDR(hdr));
6154 6148          l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
6155 6149  
6156 6150          ASSERT(HDR_HAS_L1HDR(hdr));
6157      -        ASSERT(HDR_GET_COMPRESS(hdr) == ZIO_COMPRESS_OFF);
     6151 +        ASSERT(l2hdr->b_compress == ZIO_COMPRESS_OFF);
6158 6152          ASSERT(hdr->b_l1hdr.b_tmp_cdata != NULL);
6159 6153  
6160 6154          len = l2hdr->b_asize;
6161 6155          cdata = zio_data_buf_alloc(len);
6162 6156          ASSERT3P(cdata, !=, NULL);
6163 6157          csize = zio_compress_data(ZIO_COMPRESS_LZ4, hdr->b_l1hdr.b_tmp_cdata,
6164 6158              cdata, l2hdr->b_asize);
6165 6159  
6166 6160          rounded = P2ROUNDUP(csize, (size_t)SPA_MINBLOCKSIZE);
6167 6161          if (rounded > csize) {
6168 6162                  bzero((char *)cdata + csize, rounded - csize);
6169 6163                  csize = rounded;
6170 6164          }
6171 6165  
6172 6166          if (csize == 0) {
6173 6167                  /* zero block, indicate that there's nothing to write */
6174 6168                  zio_data_buf_free(cdata, len);
6175      -                HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_EMPTY);
     6169 +                l2hdr->b_compress = ZIO_COMPRESS_EMPTY;
6176 6170                  l2hdr->b_asize = 0;
6177 6171                  hdr->b_l1hdr.b_tmp_cdata = NULL;
6178 6172                  ARCSTAT_BUMP(arcstat_l2_compress_zeros);
6179 6173                  return (B_TRUE);
6180 6174          } else if (csize > 0 && csize < len) {
6181 6175                  /*
6182 6176                   * Compression succeeded, we'll keep the cdata around for
6183 6177                   * writing and release it afterwards.
6184 6178                   */
6185      -                HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_LZ4);
     6179 +                l2hdr->b_compress = ZIO_COMPRESS_LZ4;
6186 6180                  l2hdr->b_asize = csize;
6187 6181                  hdr->b_l1hdr.b_tmp_cdata = cdata;
6188 6182                  ARCSTAT_BUMP(arcstat_l2_compress_successes);
6189 6183                  return (B_TRUE);
6190 6184          } else {
6191 6185                  /*
6192 6186                   * Compression failed, release the compressed buffer.
6193 6187                   * l2hdr will be left unmodified.
6194 6188                   */
6195 6189                  zio_data_buf_free(cdata, len);
↓ open down ↓ 66 lines elided ↑ open up ↑
6262 6256  
6263 6257  /*
6264 6258   * Releases the temporary b_tmp_cdata buffer in an l2arc header structure.
6265 6259   * This buffer serves as a temporary holder of compressed data while
6266 6260   * the buffer entry is being written to an l2arc device. Once that is
6267 6261   * done, we can dispose of it.
6268 6262   */
6269 6263  static void
6270 6264  l2arc_release_cdata_buf(arc_buf_hdr_t *hdr)
6271 6265  {
6272      -        enum zio_compress comp = HDR_GET_COMPRESS(hdr);
     6266 +        ASSERT(HDR_HAS_L2HDR(hdr));
     6267 +        enum zio_compress comp = hdr->b_l2hdr.b_compress;
6273 6268  
6274 6269          ASSERT(HDR_HAS_L1HDR(hdr));
6275 6270          ASSERT(comp == ZIO_COMPRESS_OFF || L2ARC_IS_VALID_COMPRESS(comp));
6276 6271  
6277 6272          if (comp == ZIO_COMPRESS_OFF) {
6278 6273                  /*
6279 6274                   * In this case, b_tmp_cdata points to the same buffer
6280 6275                   * as the arc_buf_t's b_data field. We don't want to
6281 6276                   * free it, since the arc_buf_t will handle that.
6282 6277                   */
↓ open down ↓ 281 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX