Print this page
5222 l2arc compression buffers "leak"
Author:         Andriy Gapon <avg@FreeBSD.org>
Reviewed by:    Saso Kiselkov <skiselkov.ml@gmail.com>
Reviewed by:    Xin Li <delphij@FreeBSD.org>


 291         kstat_named_t arcstat_c;
 292         kstat_named_t arcstat_c_min;
 293         kstat_named_t arcstat_c_max;
 294         kstat_named_t arcstat_size;
 295         kstat_named_t arcstat_hdr_size;
 296         kstat_named_t arcstat_data_size;
 297         kstat_named_t arcstat_other_size;
 298         kstat_named_t arcstat_l2_hits;
 299         kstat_named_t arcstat_l2_misses;
 300         kstat_named_t arcstat_l2_feeds;
 301         kstat_named_t arcstat_l2_rw_clash;
 302         kstat_named_t arcstat_l2_read_bytes;
 303         kstat_named_t arcstat_l2_write_bytes;
 304         kstat_named_t arcstat_l2_writes_sent;
 305         kstat_named_t arcstat_l2_writes_done;
 306         kstat_named_t arcstat_l2_writes_error;
 307         kstat_named_t arcstat_l2_writes_hdr_miss;
 308         kstat_named_t arcstat_l2_evict_lock_retry;
 309         kstat_named_t arcstat_l2_evict_reading;
 310         kstat_named_t arcstat_l2_free_on_write;

 311         kstat_named_t arcstat_l2_abort_lowmem;
 312         kstat_named_t arcstat_l2_cksum_bad;
 313         kstat_named_t arcstat_l2_io_error;
 314         kstat_named_t arcstat_l2_size;
 315         kstat_named_t arcstat_l2_asize;
 316         kstat_named_t arcstat_l2_hdr_size;
 317         kstat_named_t arcstat_l2_compress_successes;
 318         kstat_named_t arcstat_l2_compress_zeros;
 319         kstat_named_t arcstat_l2_compress_failures;
 320         kstat_named_t arcstat_memory_throttle_count;
 321         kstat_named_t arcstat_duplicate_buffers;
 322         kstat_named_t arcstat_duplicate_buffers_size;
 323         kstat_named_t arcstat_duplicate_reads;
 324         kstat_named_t arcstat_meta_used;
 325         kstat_named_t arcstat_meta_limit;
 326         kstat_named_t arcstat_meta_max;
 327 } arc_stats_t;
 328 
 329 static arc_stats_t arc_stats = {
 330         { "hits",                       KSTAT_DATA_UINT64 },


 357         { "c",                          KSTAT_DATA_UINT64 },
 358         { "c_min",                      KSTAT_DATA_UINT64 },
 359         { "c_max",                      KSTAT_DATA_UINT64 },
 360         { "size",                       KSTAT_DATA_UINT64 },
 361         { "hdr_size",                   KSTAT_DATA_UINT64 },
 362         { "data_size",                  KSTAT_DATA_UINT64 },
 363         { "other_size",                 KSTAT_DATA_UINT64 },
 364         { "l2_hits",                    KSTAT_DATA_UINT64 },
 365         { "l2_misses",                  KSTAT_DATA_UINT64 },
 366         { "l2_feeds",                   KSTAT_DATA_UINT64 },
 367         { "l2_rw_clash",                KSTAT_DATA_UINT64 },
 368         { "l2_read_bytes",              KSTAT_DATA_UINT64 },
 369         { "l2_write_bytes",             KSTAT_DATA_UINT64 },
 370         { "l2_writes_sent",             KSTAT_DATA_UINT64 },
 371         { "l2_writes_done",             KSTAT_DATA_UINT64 },
 372         { "l2_writes_error",            KSTAT_DATA_UINT64 },
 373         { "l2_writes_hdr_miss",         KSTAT_DATA_UINT64 },
 374         { "l2_evict_lock_retry",        KSTAT_DATA_UINT64 },
 375         { "l2_evict_reading",           KSTAT_DATA_UINT64 },
 376         { "l2_free_on_write",           KSTAT_DATA_UINT64 },

 377         { "l2_abort_lowmem",            KSTAT_DATA_UINT64 },
 378         { "l2_cksum_bad",               KSTAT_DATA_UINT64 },
 379         { "l2_io_error",                KSTAT_DATA_UINT64 },
 380         { "l2_size",                    KSTAT_DATA_UINT64 },
 381         { "l2_asize",                   KSTAT_DATA_UINT64 },
 382         { "l2_hdr_size",                KSTAT_DATA_UINT64 },
 383         { "l2_compress_successes",      KSTAT_DATA_UINT64 },
 384         { "l2_compress_zeros",          KSTAT_DATA_UINT64 },
 385         { "l2_compress_failures",       KSTAT_DATA_UINT64 },
 386         { "memory_throttle_count",      KSTAT_DATA_UINT64 },
 387         { "duplicate_buffers",          KSTAT_DATA_UINT64 },
 388         { "duplicate_buffers_size",     KSTAT_DATA_UINT64 },
 389         { "duplicate_reads",            KSTAT_DATA_UINT64 },
 390         { "arc_meta_used",              KSTAT_DATA_UINT64 },
 391         { "arc_meta_limit",             KSTAT_DATA_UINT64 },
 392         { "arc_meta_max",               KSTAT_DATA_UINT64 }
 393 };
 394 
 395 #define ARCSTAT(stat)   (arc_stats.stat.value.ui64)
 396 


1456                 mutex_exit(&buf->b_evict_lock);
1457                 return;
1458         }
1459         hash_lock = HDR_LOCK(buf->b_hdr);
1460         mutex_enter(hash_lock);
1461         hdr = buf->b_hdr;
1462         ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
1463         mutex_exit(&buf->b_evict_lock);
1464 
1465         ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
1466         add_reference(hdr, hash_lock, tag);
1467         DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
1468         arc_access(hdr, hash_lock);
1469         mutex_exit(hash_lock);
1470         ARCSTAT_BUMP(arcstat_hits);
1471         ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH),
1472             demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
1473             data, metadata, hits);
1474 }
1475 















1476 /*
1477  * Free the arc data buffer.  If it is an l2arc write in progress,
1478  * the buffer is placed on l2arc_free_on_write to be freed later.
1479  */
1480 static void
1481 arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
1482 {
1483         arc_buf_hdr_t *hdr = buf->b_hdr;
1484 
1485         if (HDR_L2_WRITING(hdr)) {
1486                 l2arc_data_free_t *df;
1487                 df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
1488                 df->l2df_data = buf->b_data;
1489                 df->l2df_size = hdr->b_size;
1490                 df->l2df_func = free_func;
1491                 mutex_enter(&l2arc_free_on_write_mtx);
1492                 list_insert_head(l2arc_free_on_write, df);
1493                 mutex_exit(&l2arc_free_on_write_mtx);
1494                 ARCSTAT_BUMP(arcstat_l2_free_on_write);
1495         } else {
1496                 free_func(buf->b_data, hdr->b_size);
1497         }
1498 }
1499 
1500 /*
1501  * Free up buf->b_data and if 'remove' is set, then pull the
1502  * arc_buf_t off of the the arc_buf_hdr_t's list and free it.
1503  */
1504 static void

















1505 arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
1506 {
1507         arc_buf_t **bufp;
1508 
1509         /* free up data associated with the buf */
1510         if (buf->b_data) {
1511                 arc_state_t *state = buf->b_hdr->b_state;
1512                 uint64_t size = buf->b_hdr->b_size;
1513                 arc_buf_contents_t type = buf->b_hdr->b_type;
1514 
1515                 arc_cksum_verify(buf);
1516                 arc_buf_unwatch(buf);
1517 
1518                 if (!recycle) {
1519                         if (type == ARC_BUFC_METADATA) {
1520                                 arc_buf_data_free(buf, zio_buf_free);
1521                                 arc_space_return(size, ARC_SPACE_DATA);
1522                         } else {
1523                                 ASSERT(type == ARC_BUFC_DATA);
1524                                 arc_buf_data_free(buf, zio_data_buf_free);


1579 
1580         if (l2hdr != NULL) {
1581                 boolean_t buflist_held = MUTEX_HELD(&l2arc_buflist_mtx);
1582                 /*
1583                  * To prevent arc_free() and l2arc_evict() from
1584                  * attempting to free the same buffer at the same time,
1585                  * a FREE_IN_PROGRESS flag is given to arc_free() to
1586                  * give it priority.  l2arc_evict() can't destroy this
1587                  * header while we are waiting on l2arc_buflist_mtx.
1588                  *
1589                  * The hdr may be removed from l2ad_buflist before we
1590                  * grab l2arc_buflist_mtx, so b_l2hdr is rechecked.
1591                  */
1592                 if (!buflist_held) {
1593                         mutex_enter(&l2arc_buflist_mtx);
1594                         l2hdr = hdr->b_l2hdr;
1595                 }
1596 
1597                 if (l2hdr != NULL) {
1598                         list_remove(l2hdr->b_dev->l2ad_buflist, hdr);

1599                         ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
1600                         ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
1601                         vdev_space_update(l2hdr->b_dev->l2ad_vdev,
1602                             -l2hdr->b_asize, 0, 0);
1603                         kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
1604                         if (hdr->b_state == arc_l2c_only)
1605                                 l2arc_hdr_stat_remove();
1606                         hdr->b_l2hdr = NULL;
1607                 }
1608 
1609                 if (!buflist_held)
1610                         mutex_exit(&l2arc_buflist_mtx);
1611         }
1612 
1613         if (!BUF_EMPTY(hdr)) {
1614                 ASSERT(!HDR_IN_HASH_TABLE(hdr));
1615                 buf_discard_identity(hdr);
1616         }
1617         while (hdr->b_buf) {
1618                 arc_buf_t *buf = hdr->b_buf;


3334 
3335         mutex_enter(&buf->b_evict_lock);
3336         hdr = buf->b_hdr;
3337 
3338         /* this buffer is not on any list */
3339         ASSERT(refcount_count(&hdr->b_refcnt) > 0);
3340 
3341         if (hdr->b_state == arc_anon) {
3342                 /* this buffer is already released */
3343                 ASSERT(buf->b_efunc == NULL);
3344         } else {
3345                 hash_lock = HDR_LOCK(hdr);
3346                 mutex_enter(hash_lock);
3347                 hdr = buf->b_hdr;
3348                 ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
3349         }
3350 
3351         l2hdr = hdr->b_l2hdr;
3352         if (l2hdr) {
3353                 mutex_enter(&l2arc_buflist_mtx);

3354                 hdr->b_l2hdr = NULL;
3355                 list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
3356         }
3357         buf_size = hdr->b_size;
3358 
3359         /*
3360          * Do we have more than one buf?
3361          */
3362         if (hdr->b_datacnt > 1) {
3363                 arc_buf_hdr_t *nhdr;
3364                 arc_buf_t **bufp;
3365                 uint64_t blksz = hdr->b_size;
3366                 uint64_t spa = hdr->b_spa;
3367                 arc_buf_contents_t type = hdr->b_type;
3368                 uint32_t flags = hdr->b_flags;
3369 
3370                 ASSERT(hdr->b_buf != buf || buf->b_next != NULL);
3371                 /*
3372                  * Pull the data off of this hdr and attach it to
3373                  * a new anonymous hdr.


4514                         arc_hdr_destroy(ab);
4515                 } else {
4516                         /*
4517                          * Invalidate issued or about to be issued
4518                          * reads, since we may be about to write
4519                          * over this location.
4520                          */
4521                         if (HDR_L2_READING(ab)) {
4522                                 ARCSTAT_BUMP(arcstat_l2_evict_reading);
4523                                 ab->b_flags |= ARC_L2_EVICTED;
4524                         }
4525 
4526                         /*
4527                          * Tell ARC this no longer exists in L2ARC.
4528                          */
4529                         if (ab->b_l2hdr != NULL) {
4530                                 abl2 = ab->b_l2hdr;
4531                                 ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
4532                                 bytes_evicted += abl2->b_asize;
4533                                 ab->b_l2hdr = NULL;





4534                                 kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
4535                                 ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
4536                         }
4537                         list_remove(buflist, ab);
4538 
4539                         /*
4540                          * This may have been leftover after a
4541                          * failed write.
4542                          */
4543                         ab->b_flags &= ~ARC_L2_WRITING;
4544                 }
4545                 mutex_exit(hash_lock);
4546         }
4547         mutex_exit(&l2arc_buflist_mtx);
4548 
4549         vdev_space_update(dev->l2ad_vdev, -bytes_evicted, 0, 0);
4550         dev->l2ad_evict = taddr;
4551 }
4552 
4553 /*


4742                 l2hdr->b_daddr = dev->l2ad_hand;
4743 
4744                 if ((ab->b_flags & ARC_L2COMPRESS) &&
4745                     l2hdr->b_asize >= buf_compress_minsz) {
4746                         if (l2arc_compress_buf(l2hdr)) {
4747                                 /*
4748                                  * If compression succeeded, enable headroom
4749                                  * boost on the next scan cycle.
4750                                  */
4751                                 *headroom_boost = B_TRUE;
4752                         }
4753                 }
4754 
4755                 /*
4756                  * Pick up the buffer data we had previously stashed away
4757                  * (and now potentially also compressed).
4758                  */
4759                 buf_data = l2hdr->b_tmp_cdata;
4760                 buf_sz = l2hdr->b_asize;
4761 








4762                 /* Compression may have squashed the buffer to zero length. */
4763                 if (buf_sz != 0) {
4764                         uint64_t buf_p_sz;
4765 
4766                         wzio = zio_write_phys(pio, dev->l2ad_vdev,
4767                             dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF,
4768                             NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE,
4769                             ZIO_FLAG_CANFAIL, B_FALSE);
4770 
4771                         DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev,
4772                             zio_t *, wzio);
4773                         (void) zio_nowait(wzio);
4774 
4775                         write_asize += buf_sz;
4776                         /*
4777                          * Keep the clock hand suitably device-aligned.
4778                          */
4779                         buf_p_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
4780                         write_psize += buf_p_sz;
4781                         dev->l2ad_hand += buf_p_sz;


4932                     hdr->b_size) != 0)
4933                         zio->io_error = EIO;
4934                 zio_data_buf_free(cdata, csize);
4935         }
4936 
4937         /* Restore the expected uncompressed IO size. */
4938         zio->io_orig_size = zio->io_size = hdr->b_size;
4939 }
4940 
4941 /*
4942  * Releases the temporary b_tmp_cdata buffer in an l2arc header structure.
4943  * This buffer serves as a temporary holder of compressed data while
4944  * the buffer entry is being written to an l2arc device. Once that is
4945  * done, we can dispose of it.
4946  */
4947 static void
4948 l2arc_release_cdata_buf(arc_buf_hdr_t *ab)
4949 {
4950         l2arc_buf_hdr_t *l2hdr = ab->b_l2hdr;
4951 
4952         if (l2hdr->b_compress == ZIO_COMPRESS_LZ4) {

4953                 /*
4954                  * If the data was compressed, then we've allocated a
4955                  * temporary buffer for it, so now we need to release it.
4956                  */
4957                 ASSERT(l2hdr->b_tmp_cdata != NULL);
4958                 zio_data_buf_free(l2hdr->b_tmp_cdata, ab->b_size);
4959         }
4960         l2hdr->b_tmp_cdata = NULL;



4961 }
4962 
4963 /*
4964  * This thread feeds the L2ARC at regular intervals.  This is the beating
4965  * heart of the L2ARC.
4966  */
4967 static void
4968 l2arc_feed_thread(void)
4969 {
4970         callb_cpr_t cpr;
4971         l2arc_dev_t *dev;
4972         spa_t *spa;
4973         uint64_t size, wrote;
4974         clock_t begin, next = ddi_get_lbolt();
4975         boolean_t headroom_boost = B_FALSE;
4976 
4977         CALLB_CPR_INIT(&cpr, &l2arc_feed_thr_lock, callb_generic_cpr, FTAG);
4978 
4979         mutex_enter(&l2arc_feed_thr_lock);
4980 




 291         kstat_named_t arcstat_c;
 292         kstat_named_t arcstat_c_min;
 293         kstat_named_t arcstat_c_max;
 294         kstat_named_t arcstat_size;
 295         kstat_named_t arcstat_hdr_size;
 296         kstat_named_t arcstat_data_size;
 297         kstat_named_t arcstat_other_size;
 298         kstat_named_t arcstat_l2_hits;
 299         kstat_named_t arcstat_l2_misses;
 300         kstat_named_t arcstat_l2_feeds;
 301         kstat_named_t arcstat_l2_rw_clash;
 302         kstat_named_t arcstat_l2_read_bytes;
 303         kstat_named_t arcstat_l2_write_bytes;
 304         kstat_named_t arcstat_l2_writes_sent;
 305         kstat_named_t arcstat_l2_writes_done;
 306         kstat_named_t arcstat_l2_writes_error;
 307         kstat_named_t arcstat_l2_writes_hdr_miss;
 308         kstat_named_t arcstat_l2_evict_lock_retry;
 309         kstat_named_t arcstat_l2_evict_reading;
 310         kstat_named_t arcstat_l2_free_on_write;
 311         kstat_named_t arcstat_l2_cdata_free_on_write;
 312         kstat_named_t arcstat_l2_abort_lowmem;
 313         kstat_named_t arcstat_l2_cksum_bad;
 314         kstat_named_t arcstat_l2_io_error;
 315         kstat_named_t arcstat_l2_size;
 316         kstat_named_t arcstat_l2_asize;
 317         kstat_named_t arcstat_l2_hdr_size;
 318         kstat_named_t arcstat_l2_compress_successes;
 319         kstat_named_t arcstat_l2_compress_zeros;
 320         kstat_named_t arcstat_l2_compress_failures;
 321         kstat_named_t arcstat_memory_throttle_count;
 322         kstat_named_t arcstat_duplicate_buffers;
 323         kstat_named_t arcstat_duplicate_buffers_size;
 324         kstat_named_t arcstat_duplicate_reads;
 325         kstat_named_t arcstat_meta_used;
 326         kstat_named_t arcstat_meta_limit;
 327         kstat_named_t arcstat_meta_max;
 328 } arc_stats_t;
 329 
 330 static arc_stats_t arc_stats = {
 331         { "hits",                       KSTAT_DATA_UINT64 },


 358         { "c",                          KSTAT_DATA_UINT64 },
 359         { "c_min",                      KSTAT_DATA_UINT64 },
 360         { "c_max",                      KSTAT_DATA_UINT64 },
 361         { "size",                       KSTAT_DATA_UINT64 },
 362         { "hdr_size",                   KSTAT_DATA_UINT64 },
 363         { "data_size",                  KSTAT_DATA_UINT64 },
 364         { "other_size",                 KSTAT_DATA_UINT64 },
 365         { "l2_hits",                    KSTAT_DATA_UINT64 },
 366         { "l2_misses",                  KSTAT_DATA_UINT64 },
 367         { "l2_feeds",                   KSTAT_DATA_UINT64 },
 368         { "l2_rw_clash",                KSTAT_DATA_UINT64 },
 369         { "l2_read_bytes",              KSTAT_DATA_UINT64 },
 370         { "l2_write_bytes",             KSTAT_DATA_UINT64 },
 371         { "l2_writes_sent",             KSTAT_DATA_UINT64 },
 372         { "l2_writes_done",             KSTAT_DATA_UINT64 },
 373         { "l2_writes_error",            KSTAT_DATA_UINT64 },
 374         { "l2_writes_hdr_miss",         KSTAT_DATA_UINT64 },
 375         { "l2_evict_lock_retry",        KSTAT_DATA_UINT64 },
 376         { "l2_evict_reading",           KSTAT_DATA_UINT64 },
 377         { "l2_free_on_write",           KSTAT_DATA_UINT64 },
 378         { "l2_cdata_free_on_write",     KSTAT_DATA_UINT64 },
 379         { "l2_abort_lowmem",            KSTAT_DATA_UINT64 },
 380         { "l2_cksum_bad",               KSTAT_DATA_UINT64 },
 381         { "l2_io_error",                KSTAT_DATA_UINT64 },
 382         { "l2_size",                    KSTAT_DATA_UINT64 },
 383         { "l2_asize",                   KSTAT_DATA_UINT64 },
 384         { "l2_hdr_size",                KSTAT_DATA_UINT64 },
 385         { "l2_compress_successes",      KSTAT_DATA_UINT64 },
 386         { "l2_compress_zeros",          KSTAT_DATA_UINT64 },
 387         { "l2_compress_failures",       KSTAT_DATA_UINT64 },
 388         { "memory_throttle_count",      KSTAT_DATA_UINT64 },
 389         { "duplicate_buffers",          KSTAT_DATA_UINT64 },
 390         { "duplicate_buffers_size",     KSTAT_DATA_UINT64 },
 391         { "duplicate_reads",            KSTAT_DATA_UINT64 },
 392         { "arc_meta_used",              KSTAT_DATA_UINT64 },
 393         { "arc_meta_limit",             KSTAT_DATA_UINT64 },
 394         { "arc_meta_max",               KSTAT_DATA_UINT64 }
 395 };
 396 
 397 #define ARCSTAT(stat)   (arc_stats.stat.value.ui64)
 398 


1458                 mutex_exit(&buf->b_evict_lock);
1459                 return;
1460         }
1461         hash_lock = HDR_LOCK(buf->b_hdr);
1462         mutex_enter(hash_lock);
1463         hdr = buf->b_hdr;
1464         ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
1465         mutex_exit(&buf->b_evict_lock);
1466 
1467         ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu);
1468         add_reference(hdr, hash_lock, tag);
1469         DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
1470         arc_access(hdr, hash_lock);
1471         mutex_exit(hash_lock);
1472         ARCSTAT_BUMP(arcstat_hits);
1473         ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH),
1474             demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
1475             data, metadata, hits);
1476 }
1477 
1478 static void
1479 arc_buf_free_on_write(void *data, size_t size,
1480     void (*free_func)(void *, size_t))
1481 {
1482         l2arc_data_free_t *df;
1483 
1484         df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP);
1485         df->l2df_data = data;
1486         df->l2df_size = size;
1487         df->l2df_func = free_func;
1488         mutex_enter(&l2arc_free_on_write_mtx);
1489         list_insert_head(l2arc_free_on_write, df);
1490         mutex_exit(&l2arc_free_on_write_mtx);
1491 }
1492 
1493 /*
1494  * Free the arc data buffer.  If it is an l2arc write in progress,
1495  * the buffer is placed on l2arc_free_on_write to be freed later.
1496  */
1497 static void
1498 arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
1499 {
1500         arc_buf_hdr_t *hdr = buf->b_hdr;
1501 
1502         if (HDR_L2_WRITING(hdr)) {
1503                 arc_buf_free_on_write(buf->b_data, hdr->b_size, free_func);







1504                 ARCSTAT_BUMP(arcstat_l2_free_on_write);
1505         } else {
1506                 free_func(buf->b_data, hdr->b_size);
1507         }
1508 }
1509 
1510 /*
1511  * Free up buf->b_data and if 'remove' is set, then pull the
1512  * arc_buf_t off of the the arc_buf_hdr_t's list and free it.
1513  */
1514 static void
1515 arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr)
1516 {
1517         l2arc_buf_hdr_t *l2hdr = hdr->b_l2hdr;
1518 
1519         ASSERT(MUTEX_HELD(&l2arc_buflist_mtx));
1520 
1521         if (l2hdr->b_tmp_cdata == NULL)
1522                 return;
1523 
1524         ASSERT(HDR_L2_WRITING(hdr));
1525         arc_buf_free_on_write(l2hdr->b_tmp_cdata, hdr->b_size,
1526             zio_data_buf_free);
1527         ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write);
1528         l2hdr->b_tmp_cdata = NULL;
1529 }
1530 
1531 static void
1532 arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove)
1533 {
1534         arc_buf_t **bufp;
1535 
1536         /* free up data associated with the buf */
1537         if (buf->b_data) {
1538                 arc_state_t *state = buf->b_hdr->b_state;
1539                 uint64_t size = buf->b_hdr->b_size;
1540                 arc_buf_contents_t type = buf->b_hdr->b_type;
1541 
1542                 arc_cksum_verify(buf);
1543                 arc_buf_unwatch(buf);
1544 
1545                 if (!recycle) {
1546                         if (type == ARC_BUFC_METADATA) {
1547                                 arc_buf_data_free(buf, zio_buf_free);
1548                                 arc_space_return(size, ARC_SPACE_DATA);
1549                         } else {
1550                                 ASSERT(type == ARC_BUFC_DATA);
1551                                 arc_buf_data_free(buf, zio_data_buf_free);


1606 
1607         if (l2hdr != NULL) {
1608                 boolean_t buflist_held = MUTEX_HELD(&l2arc_buflist_mtx);
1609                 /*
1610                  * To prevent arc_free() and l2arc_evict() from
1611                  * attempting to free the same buffer at the same time,
1612                  * a FREE_IN_PROGRESS flag is given to arc_free() to
1613                  * give it priority.  l2arc_evict() can't destroy this
1614                  * header while we are waiting on l2arc_buflist_mtx.
1615                  *
1616                  * The hdr may be removed from l2ad_buflist before we
1617                  * grab l2arc_buflist_mtx, so b_l2hdr is rechecked.
1618                  */
1619                 if (!buflist_held) {
1620                         mutex_enter(&l2arc_buflist_mtx);
1621                         l2hdr = hdr->b_l2hdr;
1622                 }
1623 
1624                 if (l2hdr != NULL) {
1625                         list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
1626                         arc_buf_l2_cdata_free(hdr);
1627                         ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
1628                         ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
1629                         vdev_space_update(l2hdr->b_dev->l2ad_vdev,
1630                             -l2hdr->b_asize, 0, 0);
1631                         kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
1632                         if (hdr->b_state == arc_l2c_only)
1633                                 l2arc_hdr_stat_remove();
1634                         hdr->b_l2hdr = NULL;
1635                 }
1636 
1637                 if (!buflist_held)
1638                         mutex_exit(&l2arc_buflist_mtx);
1639         }
1640 
1641         if (!BUF_EMPTY(hdr)) {
1642                 ASSERT(!HDR_IN_HASH_TABLE(hdr));
1643                 buf_discard_identity(hdr);
1644         }
1645         while (hdr->b_buf) {
1646                 arc_buf_t *buf = hdr->b_buf;


3362 
3363         mutex_enter(&buf->b_evict_lock);
3364         hdr = buf->b_hdr;
3365 
3366         /* this buffer is not on any list */
3367         ASSERT(refcount_count(&hdr->b_refcnt) > 0);
3368 
3369         if (hdr->b_state == arc_anon) {
3370                 /* this buffer is already released */
3371                 ASSERT(buf->b_efunc == NULL);
3372         } else {
3373                 hash_lock = HDR_LOCK(hdr);
3374                 mutex_enter(hash_lock);
3375                 hdr = buf->b_hdr;
3376                 ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
3377         }
3378 
3379         l2hdr = hdr->b_l2hdr;
3380         if (l2hdr) {
3381                 mutex_enter(&l2arc_buflist_mtx);
3382                 arc_buf_l2_cdata_free(hdr);
3383                 hdr->b_l2hdr = NULL;
3384                 list_remove(l2hdr->b_dev->l2ad_buflist, hdr);
3385         }
3386         buf_size = hdr->b_size;
3387 
3388         /*
3389          * Do we have more than one buf?
3390          */
3391         if (hdr->b_datacnt > 1) {
3392                 arc_buf_hdr_t *nhdr;
3393                 arc_buf_t **bufp;
3394                 uint64_t blksz = hdr->b_size;
3395                 uint64_t spa = hdr->b_spa;
3396                 arc_buf_contents_t type = hdr->b_type;
3397                 uint32_t flags = hdr->b_flags;
3398 
3399                 ASSERT(hdr->b_buf != buf || buf->b_next != NULL);
3400                 /*
3401                  * Pull the data off of this hdr and attach it to
3402                  * a new anonymous hdr.


4543                         arc_hdr_destroy(ab);
4544                 } else {
4545                         /*
4546                          * Invalidate issued or about to be issued
4547                          * reads, since we may be about to write
4548                          * over this location.
4549                          */
4550                         if (HDR_L2_READING(ab)) {
4551                                 ARCSTAT_BUMP(arcstat_l2_evict_reading);
4552                                 ab->b_flags |= ARC_L2_EVICTED;
4553                         }
4554 
4555                         /*
4556                          * Tell ARC this no longer exists in L2ARC.
4557                          */
4558                         if (ab->b_l2hdr != NULL) {
4559                                 abl2 = ab->b_l2hdr;
4560                                 ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
4561                                 bytes_evicted += abl2->b_asize;
4562                                 ab->b_l2hdr = NULL;
4563                                 /*
4564                                  * We are destroying l2hdr, so ensure that
4565                                  * its compressed buffer, if any, is not leaked.
4566                                  */
4567                                 ASSERT(abl2->b_tmp_cdata == NULL);
4568                                 kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
4569                                 ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
4570                         }
4571                         list_remove(buflist, ab);
4572 
4573                         /*
4574                          * This may have been leftover after a
4575                          * failed write.
4576                          */
4577                         ab->b_flags &= ~ARC_L2_WRITING;
4578                 }
4579                 mutex_exit(hash_lock);
4580         }
4581         mutex_exit(&l2arc_buflist_mtx);
4582 
4583         vdev_space_update(dev->l2ad_vdev, -bytes_evicted, 0, 0);
4584         dev->l2ad_evict = taddr;
4585 }
4586 
4587 /*


4776                 l2hdr->b_daddr = dev->l2ad_hand;
4777 
4778                 if ((ab->b_flags & ARC_L2COMPRESS) &&
4779                     l2hdr->b_asize >= buf_compress_minsz) {
4780                         if (l2arc_compress_buf(l2hdr)) {
4781                                 /*
4782                                  * If compression succeeded, enable headroom
4783                                  * boost on the next scan cycle.
4784                                  */
4785                                 *headroom_boost = B_TRUE;
4786                         }
4787                 }
4788 
4789                 /*
4790                  * Pick up the buffer data we had previously stashed away
4791                  * (and now potentially also compressed).
4792                  */
4793                 buf_data = l2hdr->b_tmp_cdata;
4794                 buf_sz = l2hdr->b_asize;
4795 
4796                 /*
4797                  * If the data has not been compressed, then clear b_tmp_cdata
4798                  * to make sure that it points only to a temporary compression
4799                  * buffer.
4800                  */
4801                 if (!L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress))
4802                         l2hdr->b_tmp_cdata = NULL;
4803 
4804                 /* Compression may have squashed the buffer to zero length. */
4805                 if (buf_sz != 0) {
4806                         uint64_t buf_p_sz;
4807 
4808                         wzio = zio_write_phys(pio, dev->l2ad_vdev,
4809                             dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF,
4810                             NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE,
4811                             ZIO_FLAG_CANFAIL, B_FALSE);
4812 
4813                         DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev,
4814                             zio_t *, wzio);
4815                         (void) zio_nowait(wzio);
4816 
4817                         write_asize += buf_sz;
4818                         /*
4819                          * Keep the clock hand suitably device-aligned.
4820                          */
4821                         buf_p_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
4822                         write_psize += buf_p_sz;
4823                         dev->l2ad_hand += buf_p_sz;


4974                     hdr->b_size) != 0)
4975                         zio->io_error = EIO;
4976                 zio_data_buf_free(cdata, csize);
4977         }
4978 
4979         /* Restore the expected uncompressed IO size. */
4980         zio->io_orig_size = zio->io_size = hdr->b_size;
4981 }
4982 
4983 /*
4984  * Releases the temporary b_tmp_cdata buffer in an l2arc header structure.
4985  * This buffer serves as a temporary holder of compressed data while
4986  * the buffer entry is being written to an l2arc device. Once that is
4987  * done, we can dispose of it.
4988  */
4989 static void
4990 l2arc_release_cdata_buf(arc_buf_hdr_t *ab)
4991 {
4992         l2arc_buf_hdr_t *l2hdr = ab->b_l2hdr;
4993 
4994         ASSERT(L2ARC_IS_VALID_COMPRESS(l2hdr->b_compress));
4995         if (l2hdr->b_compress != ZIO_COMPRESS_EMPTY) {
4996                 /*
4997                  * If the data was compressed, then we've allocated a
4998                  * temporary buffer for it, so now we need to release it.
4999                  */
5000                 ASSERT(l2hdr->b_tmp_cdata != NULL);
5001                 zio_data_buf_free(l2hdr->b_tmp_cdata, ab->b_size);

5002                 l2hdr->b_tmp_cdata = NULL;
5003         } else {
5004                 ASSERT(l2hdr->b_tmp_cdata == NULL);
5005         }
5006 }
5007 
5008 /*
5009  * This thread feeds the L2ARC at regular intervals.  This is the beating
5010  * heart of the L2ARC.
5011  */
5012 static void
5013 l2arc_feed_thread(void)
5014 {
5015         callb_cpr_t cpr;
5016         l2arc_dev_t *dev;
5017         spa_t *spa;
5018         uint64_t size, wrote;
5019         clock_t begin, next = ddi_get_lbolt();
5020         boolean_t headroom_boost = B_FALSE;
5021 
5022         CALLB_CPR_INIT(&cpr, &l2arc_feed_thr_lock, callb_generic_cpr, FTAG);
5023 
5024         mutex_enter(&l2arc_feed_thr_lock);
5025