Print this page
5222 l2arc compression buffers "leak"
Author:         Andriy Gapon <avg@FreeBSD.org>
Reviewed by:    Saso Kiselkov <skiselkov.ml@gmail.com>
Reviewed by:    Xin Li <delphij@FreeBSD.org>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/arc.c
          +++ new/usr/src/uts/common/fs/zfs/arc.c
↓ open down ↓ 300 lines elided ↑ open up ↑
 301  301          kstat_named_t arcstat_l2_rw_clash;
 302  302          kstat_named_t arcstat_l2_read_bytes;
 303  303          kstat_named_t arcstat_l2_write_bytes;
 304  304          kstat_named_t arcstat_l2_writes_sent;
 305  305          kstat_named_t arcstat_l2_writes_done;
 306  306          kstat_named_t arcstat_l2_writes_error;
 307  307          kstat_named_t arcstat_l2_writes_hdr_miss;
 308  308          kstat_named_t arcstat_l2_evict_lock_retry;
 309  309          kstat_named_t arcstat_l2_evict_reading;
 310  310          kstat_named_t arcstat_l2_free_on_write;
      311 +        kstat_named_t arcstat_l2_cdata_free_on_write;
 311  312          kstat_named_t arcstat_l2_abort_lowmem;
 312  313          kstat_named_t arcstat_l2_cksum_bad;
 313  314          kstat_named_t arcstat_l2_io_error;
 314  315          kstat_named_t arcstat_l2_size;
 315  316          kstat_named_t arcstat_l2_asize;
 316  317          kstat_named_t arcstat_l2_hdr_size;
 317  318          kstat_named_t arcstat_l2_compress_successes;
 318  319          kstat_named_t arcstat_l2_compress_zeros;
 319  320          kstat_named_t arcstat_l2_compress_failures;
 320  321          kstat_named_t arcstat_memory_throttle_count;
↓ open down ↓ 46 lines elided ↑ open up ↑
 367  368          { "l2_rw_clash",                KSTAT_DATA_UINT64 },
 368  369          { "l2_read_bytes",              KSTAT_DATA_UINT64 },
 369  370          { "l2_write_bytes",             KSTAT_DATA_UINT64 },
 370  371          { "l2_writes_sent",             KSTAT_DATA_UINT64 },
 371  372          { "l2_writes_done",             KSTAT_DATA_UINT64 },
 372  373          { "l2_writes_error",            KSTAT_DATA_UINT64 },
 373  374          { "l2_writes_hdr_miss",         KSTAT_DATA_UINT64 },
 374  375          { "l2_evict_lock_retry",        KSTAT_DATA_UINT64 },
 375  376          { "l2_evict_reading",           KSTAT_DATA_UINT64 },
 376  377          { "l2_free_on_write",           KSTAT_DATA_UINT64 },
      378 +        { "l2_cdata_free_on_write",     KSTAT_DATA_UINT64 },
 377  379          { "l2_abort_lowmem",            KSTAT_DATA_UINT64 },
 378  380          { "l2_cksum_bad",               KSTAT_DATA_UINT64 },
 379  381          { "l2_io_error",                KSTAT_DATA_UINT64 },
 380  382          { "l2_size",                    KSTAT_DATA_UINT64 },
 381  383          { "l2_asize",                   KSTAT_DATA_UINT64 },
 382  384          { "l2_hdr_size",                KSTAT_DATA_UINT64 },
 383  385          { "l2_compress_successes",      KSTAT_DATA_UINT64 },
 384  386          { "l2_compress_zeros",          KSTAT_DATA_UINT64 },
 385  387          { "l2_compress_failures",       KSTAT_DATA_UINT64 },
 386  388          { "memory_throttle_count",      KSTAT_DATA_UINT64 },
↓ open down ↓ 1079 lines elided ↑ open up ↑
1466 1468          add_reference(hdr, hash_lock, tag);
1467 1469          DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
1468 1470          arc_access(hdr, hash_lock);
1469 1471          mutex_exit(hash_lock);
1470 1472          ARCSTAT_BUMP(arcstat_hits);
1471 1473          ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH),
1472 1474              demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
1473 1475              data, metadata, hits);
1474 1476  }
1475 1477  
     1478 +static void
     1479 +arc_buf_free_on_write(void *data, size_t size,
     1480 +    void (*free_func)(void *, size_t))
     1481 +{
     1482 +        l2arc_data_free_t *df;
     1483 +
     1484 +        df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
     1485 +        df->l2df_data = data;
     1486 +        df->l2df_size = size;
     1487 +        df->l2df_func = free_func;
     1488 +        mutex_enter(&l2arc_free_on_write_mtx);
     1489 +        list_insert_head(l2arc_free_on_write, df);
     1490 +        mutex_exit(&l2arc_free_on_write_mtx);
     1491 +}
     1492 +
1476 1493  /*
1477 1494   * Free the arc data buffer.  If it is an l2arc write in progress,
1478 1495   * the buffer is placed on l2arc_free_on_write to be freed later.
1479 1496   */
1480 1497  static void
1481 1498  arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
1482 1499  {
1483 1500          arc_buf_hdr_t *hdr = buf->b_hdr;
1484 1501  
1485 1502          if (HDR_L2_WRITING(hdr)) {
1486      -                l2arc_data_free_t *df;
1487      -                df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
1488      -                df->l2df_data = buf->b_data;
1489      -                df->l2df_size = hdr->b_size;
1490      -                df->l2df_func = free_func;
1491      -                mutex_enter(&l2arc_free_on_write_mtx);
1492      -                list_insert_head(l2arc_free_on_write, df);
1493      -                mutex_exit(&l2arc_free_on_write_mtx);
     1503 +                arc_buf_free_on_write(buf->b_data, hdr->b_size, free_func);
1494 1504                  ARCSTAT_BUMP(arcstat_l2_free_on_write);
1495 1505          } else {
1496 1506                  free_func(buf->b_data, hdr->b_size);
1497 1507          }
1498 1508  }
1499 1509  
1500 1510  /*
1501 1511   * Free up buf->b_data and if 'remove' is set, then pull the
1502 1512   * arc_buf_t off of the the arc_buf_hdr_t's list and free it.
1503 1513   */
1504 1514  static void
     1515 +arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr)
     1516 +{
     1517 +        l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
     1518 +
     1519 +        ASSERT(MUTEX_HELD(&l2arc_buflist_mtx));
     1520 +
     1521 +        if (l2hdr->b_tmp_cdata == NULL)
     1522 +                return;
     1523 +
     1524 +        ASSERT(HDR_L2_WRITING(hdr));
     1525 +        arc_buf_free_on_write(l2hdr->b_tmp_cdata, hdr->b_size,
     1526 +            zio_data_buf_free);
     1527 +        ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write);
     1528 +        l2hdr->b_tmp_cdata = NULL;
     1529 +}
     1530 +
     1531 +static void
1505 1532  arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
1506 1533  {
1507 1534          arc_buf_t **bufp;
1508 1535  
1509 1536          /* free up data associated with the buf */
1510 1537          if (buf->b_data) {
1511 1538                  arc_state_t *state = buf->b_hdr->b_state;
1512 1539                  uint64_t size = buf->b_hdr->b_size;
1513 1540                  arc_buf_contents_t type = buf->b_hdr->b_type;
1514 1541  
↓ open down ↓ 74 lines elided ↑ open up ↑
1589 1616                   * The hdr may be removed from l2ad_buflist before we
1590 1617                   * grab l2arc_buflist_mtx, so b_l2hdr is rechecked.
1591 1618                   */
1592 1619                  if (!buflist_held) {
1593 1620                          mutex_enter(&l2arc_buflist_mtx);
1594 1621                          l2hdr = hdr->b_l2hdr;
1595 1622                  }
1596 1623  
1597 1624                  if (l2hdr != NULL) {
1598 1625                          list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
     1626 +                        arc_buf_l2_cdata_free(hdr);
1599 1627                          ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
1600 1628                          ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
1601 1629                          vdev_space_update(l2hdr->b_dev->l2ad_vdev,
1602 1630                              -l2hdr->b_asize, 0, 0);
1603 1631                          kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
1604 1632                          if (hdr->b_state == arc_l2c_only)
1605 1633                                  l2arc_hdr_stat_remove();
1606 1634                          hdr->b_l2hdr = NULL;
1607 1635                  }
1608 1636  
↓ open down ↓ 1735 lines elided ↑ open up ↑
3344 3372          } else {
3345 3373                  hash_lock = HDR_LOCK(hdr);
3346 3374                  mutex_enter(hash_lock);
3347 3375                  hdr = buf->b_hdr;
3348 3376                  ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
3349 3377          }
3350 3378  
3351 3379          l2hdr = hdr->b_l2hdr;
3352 3380          if (l2hdr) {
3353 3381                  mutex_enter(&l2arc_buflist_mtx);
     3382 +                arc_buf_l2_cdata_free(hdr);
3354 3383                  hdr->b_l2hdr = NULL;
3355 3384                  list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
3356 3385          }
3357 3386          buf_size = hdr->b_size;
3358 3387  
3359 3388          /*
3360 3389           * Do we have more than one buf?
3361 3390           */
3362 3391          if (hdr->b_datacnt > 1) {
3363 3392                  arc_buf_hdr_t *nhdr;
↓ open down ↓ 1160 lines elided ↑ open up ↑
4524 4553                          }
4525 4554  
4526 4555                          /*
4527 4556                           * Tell ARC this no longer exists in L2ARC.
4528 4557                           */
4529 4558                          if (ab->b_l2hdr != NULL) {
4530 4559                                  abl2 = ab->b_l2hdr;
4531 4560                                  ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
4532 4561                                  bytes_evicted += abl2->b_asize;
4533 4562                                  ab->b_l2hdr = NULL;
     4563 +                                /*
     4564 +                                 * We are destroying l2hdr, so ensure that
     4565 +                                 * its compressed buffer, if any, is not leaked.
     4566 +                                 */
     4567 +                                ASSERT(abl2->b_tmp_cdata == NULL);
4534 4568                                  kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
4535 4569                                  ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
4536 4570                          }
4537 4571                          list_remove(buflist, ab);
4538 4572  
4539 4573                          /*
4540 4574                           * This may have been leftover after a
4541 4575                           * failed write.
4542 4576                           */
4543 4577                          ab->b_flags &= ~ARC_L2_WRITING;
↓ open down ↓ 208 lines elided ↑ open up ↑
4752 4786                          }
4753 4787                  }
4754 4788  
4755 4789                  /*
4756 4790                   * Pick up the buffer data we had previously stashed away
4757 4791                   * (and now potentially also compressed).
4758 4792                   */
4759 4793                  buf_data = l2hdr->b_tmp_cdata;
4760 4794                  buf_sz = l2hdr->b_asize;
4761 4795  
     4796 +                /*
     4797 +                 * If the data has not been compressed, then clear b_tmp_cdata
     4798 +                 * to make sure that it points only to a temporary compression
     4799 +                 * buffer.
     4800 +                 */
     4801 +                if (!L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress))
     4802 +                        l2hdr->b_tmp_cdata = NULL;
     4803 +
4762 4804                  /* Compression may have squashed the buffer to zero length. */
4763 4805                  if (buf_sz != 0) {
4764 4806                          uint64_t buf_p_sz;
4765 4807  
4766 4808                          wzio = zio_write_phys(pio, dev->l2ad_vdev,
4767 4809                              dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF,
4768 4810                              NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE,
4769 4811                              ZIO_FLAG_CANFAIL, B_FALSE);
4770 4812  
4771 4813                          DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev,
↓ open down ↓ 170 lines elided ↑ open up ↑
4942 4984   * Releases the temporary b_tmp_cdata buffer in an l2arc header structure.
4943 4985   * This buffer serves as a temporary holder of compressed data while
4944 4986   * the buffer entry is being written to an l2arc device. Once that is
4945 4987   * done, we can dispose of it.
4946 4988   */
4947 4989  static void
4948 4990  l2arc_release_cdata_buf(arc_buf_hdr_t *ab)
4949 4991  {
4950 4992          l2arc_buf_hdr_t *l2hdr = ab->b_l2hdr;
4951 4993  
4952      -        if (l2hdr->b_compress == ZIO_COMPRESS_LZ4) {
     4994 +        ASSERT(L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress));
     4995 +        if (l2hdr->b_compress != ZIO_COMPRESS_EMPTY) {
4953 4996                  /*
4954 4997                   * If the data was compressed, then we've allocated a
4955 4998                   * temporary buffer for it, so now we need to release it.
4956 4999                   */
4957 5000                  ASSERT(l2hdr->b_tmp_cdata != NULL);
4958 5001                  zio_data_buf_free(l2hdr->b_tmp_cdata, ab->b_size);
     5002 +                l2hdr->b_tmp_cdata = NULL;
     5003 +        } else {
     5004 +                ASSERT(l2hdr->b_tmp_cdata == NULL);
4959 5005          }
4960      -        l2hdr->b_tmp_cdata = NULL;
4961 5006  }
4962 5007  
4963 5008  /*
4964 5009   * This thread feeds the L2ARC at regular intervals.  This is the beating
4965 5010   * heart of the L2ARC.
4966 5011   */
4967 5012  static void
4968 5013  l2arc_feed_thread(void)
4969 5014  {
4970 5015          callb_cpr_t cpr;
↓ open down ↓ 253 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX