Print this page
3995 Memory leak of compressed buffers in l2arc_write_done
3997 ZFS L2ARC default behavior should allow reading while writing


 277         kstat_named_t arcstat_hash_collisions;
 278         kstat_named_t arcstat_hash_chains;
 279         kstat_named_t arcstat_hash_chain_max;
 280         kstat_named_t arcstat_p;
 281         kstat_named_t arcstat_c;
 282         kstat_named_t arcstat_c_min;
 283         kstat_named_t arcstat_c_max;
 284         kstat_named_t arcstat_size;
 285         kstat_named_t arcstat_hdr_size;
 286         kstat_named_t arcstat_data_size;
 287         kstat_named_t arcstat_other_size;
 288         kstat_named_t arcstat_l2_hits;
 289         kstat_named_t arcstat_l2_misses;
 290         kstat_named_t arcstat_l2_feeds;
 291         kstat_named_t arcstat_l2_rw_clash;
 292         kstat_named_t arcstat_l2_read_bytes;
 293         kstat_named_t arcstat_l2_write_bytes;
 294         kstat_named_t arcstat_l2_writes_sent;
 295         kstat_named_t arcstat_l2_writes_done;
 296         kstat_named_t arcstat_l2_writes_error;
 297         kstat_named_t arcstat_l2_writes_hdr_miss;
 298         kstat_named_t arcstat_l2_evict_lock_retry;
 299         kstat_named_t arcstat_l2_evict_reading;
 300         kstat_named_t arcstat_l2_free_on_write;
 301         kstat_named_t arcstat_l2_abort_lowmem;
 302         kstat_named_t arcstat_l2_cksum_bad;
 303         kstat_named_t arcstat_l2_io_error;
 304         kstat_named_t arcstat_l2_size;
 305         kstat_named_t arcstat_l2_asize;
 306         kstat_named_t arcstat_l2_hdr_size;
 307         kstat_named_t arcstat_l2_compress_successes;
 308         kstat_named_t arcstat_l2_compress_zeros;
 309         kstat_named_t arcstat_l2_compress_failures;
 310         kstat_named_t arcstat_memory_throttle_count;
 311         kstat_named_t arcstat_duplicate_buffers;
 312         kstat_named_t arcstat_duplicate_buffers_size;
 313         kstat_named_t arcstat_duplicate_reads;
 314         kstat_named_t arcstat_meta_used;
 315         kstat_named_t arcstat_meta_limit;
 316         kstat_named_t arcstat_meta_max;
 317 } arc_stats_t;


 343         { "hash_collisions",            KSTAT_DATA_UINT64 },
 344         { "hash_chains",                KSTAT_DATA_UINT64 },
 345         { "hash_chain_max",             KSTAT_DATA_UINT64 },
 346         { "p",                          KSTAT_DATA_UINT64 },
 347         { "c",                          KSTAT_DATA_UINT64 },
 348         { "c_min",                      KSTAT_DATA_UINT64 },
 349         { "c_max",                      KSTAT_DATA_UINT64 },
 350         { "size",                       KSTAT_DATA_UINT64 },
 351         { "hdr_size",                   KSTAT_DATA_UINT64 },
 352         { "data_size",                  KSTAT_DATA_UINT64 },
 353         { "other_size",                 KSTAT_DATA_UINT64 },
 354         { "l2_hits",                    KSTAT_DATA_UINT64 },
 355         { "l2_misses",                  KSTAT_DATA_UINT64 },
 356         { "l2_feeds",                   KSTAT_DATA_UINT64 },
 357         { "l2_rw_clash",                KSTAT_DATA_UINT64 },
 358         { "l2_read_bytes",              KSTAT_DATA_UINT64 },
 359         { "l2_write_bytes",             KSTAT_DATA_UINT64 },
 360         { "l2_writes_sent",             KSTAT_DATA_UINT64 },
 361         { "l2_writes_done",             KSTAT_DATA_UINT64 },
 362         { "l2_writes_error",            KSTAT_DATA_UINT64 },
 363         { "l2_writes_hdr_miss",         KSTAT_DATA_UINT64 },
 364         { "l2_evict_lock_retry",        KSTAT_DATA_UINT64 },
 365         { "l2_evict_reading",           KSTAT_DATA_UINT64 },
 366         { "l2_free_on_write",           KSTAT_DATA_UINT64 },
 367         { "l2_abort_lowmem",            KSTAT_DATA_UINT64 },
 368         { "l2_cksum_bad",               KSTAT_DATA_UINT64 },
 369         { "l2_io_error",                KSTAT_DATA_UINT64 },
 370         { "l2_size",                    KSTAT_DATA_UINT64 },
 371         { "l2_asize",                   KSTAT_DATA_UINT64 },
 372         { "l2_hdr_size",                KSTAT_DATA_UINT64 },
 373         { "l2_compress_successes",      KSTAT_DATA_UINT64 },
 374         { "l2_compress_zeros",          KSTAT_DATA_UINT64 },
 375         { "l2_compress_failures",       KSTAT_DATA_UINT64 },
 376         { "memory_throttle_count",      KSTAT_DATA_UINT64 },
 377         { "duplicate_buffers",          KSTAT_DATA_UINT64 },
 378         { "duplicate_buffers_size",     KSTAT_DATA_UINT64 },
 379         { "duplicate_reads",            KSTAT_DATA_UINT64 },
 380         { "arc_meta_used",              KSTAT_DATA_UINT64 },
 381         { "arc_meta_limit",             KSTAT_DATA_UINT64 },
 382         { "arc_meta_max",               KSTAT_DATA_UINT64 }
 383 };


 605 /*
 606  * If we discover during ARC scan any buffers to be compressed, we boost
 607  * our headroom for the next scanning cycle by this percentage multiple.
 608  */
 609 #define L2ARC_HEADROOM_BOOST    200
 610 #define L2ARC_FEED_SECS         1               /* caching interval secs */
 611 #define L2ARC_FEED_MIN_MS       200             /* min caching interval ms */
 612 
 613 #define l2arc_writes_sent       ARCSTAT(arcstat_l2_writes_sent)
 614 #define l2arc_writes_done       ARCSTAT(arcstat_l2_writes_done)
 615 
 616 /* L2ARC Performance Tunables */
 617 uint64_t l2arc_write_max = L2ARC_WRITE_SIZE;    /* default max write size */
 618 uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE;  /* extra write during warmup */
 619 uint64_t l2arc_headroom = L2ARC_HEADROOM;       /* number of dev writes */
 620 uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
 621 uint64_t l2arc_feed_secs = L2ARC_FEED_SECS;     /* interval seconds */
 622 uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval milliseconds */
 623 boolean_t l2arc_noprefetch = B_TRUE;            /* don't cache prefetch bufs */
 624 boolean_t l2arc_feed_again = B_TRUE;            /* turbo warmup */
 625 boolean_t l2arc_norw = B_TRUE;                  /* no reads during writes */
 626 
 627 /*
 628  * L2ARC Internals
 629  */
 630 typedef struct l2arc_dev {
 631         vdev_t                  *l2ad_vdev;     /* vdev */
 632         spa_t                   *l2ad_spa;      /* spa */
 633         uint64_t                l2ad_hand;      /* next write location */
 634         uint64_t                l2ad_start;     /* first addr on device */
 635         uint64_t                l2ad_end;       /* last addr on device */
 636         uint64_t                l2ad_evict;     /* last addr eviction reached */
 637         boolean_t               l2ad_first;     /* first sweep through */
 638         boolean_t               l2ad_writing;   /* currently writing */
 639         list_t                  *l2ad_buflist;  /* buffer list */
 640         list_node_t             l2ad_node;      /* device list node */
 641 } l2arc_dev_t;
 642 
 643 static list_t L2ARC_dev_list;                   /* device list */
 644 static list_t *l2arc_dev_list;                  /* device list pointer */
 645 static kmutex_t l2arc_dev_mtx;                  /* device list mutex */


4131                 ASSERT(df->l2df_func != NULL);
4132                 df->l2df_func(df->l2df_data, df->l2df_size);
4133                 list_remove(buflist, df);
4134                 kmem_free(df, sizeof (l2arc_data_free_t));
4135         }
4136 
4137         mutex_exit(&l2arc_free_on_write_mtx);
4138 }
4139 
4140 /*
4141  * A write to a cache device has completed.  Update all headers to allow
4142  * reads from these buffers to begin.
4143  */
4144 static void
4145 l2arc_write_done(zio_t *zio)
4146 {
4147         l2arc_write_callback_t *cb;
4148         l2arc_dev_t *dev;
4149         list_t *buflist;
4150         arc_buf_hdr_t *head, *ab, *ab_prev;
4151         l2arc_buf_hdr_t *abl2;
4152         kmutex_t *hash_lock;
4153 
4154         cb = zio->io_private;
4155         ASSERT(cb != NULL);
4156         dev = cb->l2wcb_dev;
4157         ASSERT(dev != NULL);
4158         head = cb->l2wcb_head;
4159         ASSERT(head != NULL);
4160         buflist = dev->l2ad_buflist;
4161         ASSERT(buflist != NULL);
4162         DTRACE_PROBE2(l2arc__iodone, zio_t *, zio,
4163             l2arc_write_callback_t *, cb);
4164 
4165         if (zio->io_error != 0)
4166                 ARCSTAT_BUMP(arcstat_l2_writes_error);
4167 
4168         mutex_enter(&l2arc_buflist_mtx);
4169 
4170         /*
4171          * All writes completed, or an error was hit.
4172          */

4173         for (ab = list_prev(buflist, head); ab; ab = ab_prev) {
4174                 ab_prev = list_prev(buflist, ab);
4175 
4176                 hash_lock = HDR_LOCK(ab);
4177                 if (!mutex_tryenter(hash_lock)) {
4178                         /*
4179                          * This buffer misses out.  It may be in a stage
4180                          * of eviction.  Its ARC_L2_WRITING flag will be
4181                          * left set, denying reads to this buffer.
4182                          */
4183                         ARCSTAT_BUMP(arcstat_l2_writes_hdr_miss);
4184                         continue;
4185                 }
4186 
4187                 abl2 = ab->b_l2hdr;
4188 
4189                 /*
4190                  * Release the temporary compressed buffer as soon as possible.
4191                  */
4192                 if (abl2->b_compress != ZIO_COMPRESS_OFF)
4193                         l2arc_release_cdata_buf(ab);
4194 
4195                 if (zio->io_error != 0) {
4196                         /*
4197                          * Error - drop L2ARC entry.
4198                          */
4199                         list_remove(buflist, ab);
4200                         ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
4201                         ab->b_l2hdr = NULL;
4202                         kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
4203                         ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
4204                 }
4205 
4206                 /*
4207                  * Allow ARC to begin reads to this L2ARC entry.
4208                  */
4209                 ab->b_flags &= ~ARC_L2_WRITING;
4210 
4211                 mutex_exit(hash_lock);
4212         }
4213 
4214         atomic_inc_64(&l2arc_writes_done);
4215         list_remove(buflist, head);
4216         kmem_cache_free(hdr_cache, head);
4217         mutex_exit(&l2arc_buflist_mtx);
4218 
4219         l2arc_do_free_on_write();
4220 
4221         kmem_free(cb, sizeof (l2arc_write_callback_t));
4222 }


4333                 list = &arc_mru->arcs_list[ARC_BUFC_DATA];
4334                 *lock = &arc_mru->arcs_mtx;
4335                 break;
4336         }
4337 
4338         ASSERT(!(MUTEX_HELD(*lock)));
4339         mutex_enter(*lock);
4340         return (list);
4341 }
4342 
4343 /*
4344  * Evict buffers from the device write hand to the distance specified in
4345  * bytes.  This distance may span populated buffers, it may span nothing.
4346  * This is clearing a region on the L2ARC device ready for writing.
4347  * If the 'all' boolean is set, every buffer is evicted.
4348  */
4349 static void
4350 l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
4351 {
4352         list_t *buflist;
4353         l2arc_buf_hdr_t *abl2;
4354         arc_buf_hdr_t *ab, *ab_prev;
4355         kmutex_t *hash_lock;
4356         uint64_t taddr;
4357 
4358         buflist = dev->l2ad_buflist;
4359 
4360         if (buflist == NULL)
4361                 return;
4362 
4363         if (!all && dev->l2ad_first) {
4364                 /*
4365                  * This is the first sweep through the device.  There is
4366                  * nothing to evict.
4367                  */
4368                 return;
4369         }
4370 
4371         if (dev->l2ad_hand >= (dev->l2ad_end - (2 * distance))) {
4372                 /*
4373                  * When nearing the end of the device, evict to the end


4433                          * arc_hdr_destroy() will call list_remove()
4434                          * and decrement arcstat_l2_size.
4435                          */
4436                         arc_change_state(arc_anon, ab, hash_lock);
4437                         arc_hdr_destroy(ab);
4438                 } else {
4439                         /*
4440                          * Invalidate issued or about to be issued
4441                          * reads, since we may be about to write
4442                          * over this location.
4443                          */
4444                         if (HDR_L2_READING(ab)) {
4445                                 ARCSTAT_BUMP(arcstat_l2_evict_reading);
4446                                 ab->b_flags |= ARC_L2_EVICTED;
4447                         }
4448 
4449                         /*
4450                          * Tell ARC this no longer exists in L2ARC.
4451                          */
4452                         if (ab->b_l2hdr != NULL) {
4453                                 abl2 = ab->b_l2hdr;
4454                                 ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
4455                                 ab->b_l2hdr = NULL;
4456                                 kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
4457                                 ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
4458                         }
4459                         list_remove(buflist, ab);
4460 
4461                         /*
4462                          * This may have been leftover after a
4463                          * failed write.
4464                          */
4465                         ab->b_flags &= ~ARC_L2_WRITING;
4466                 }
4467                 mutex_exit(hash_lock);
4468         }
4469         mutex_exit(&l2arc_buflist_mtx);
4470 
4471         vdev_space_update(dev->l2ad_vdev, -(taddr - dev->l2ad_evict), 0, 0);
4472         dev->l2ad_evict = taddr;
4473 }
4474 
4475 /*
4476  * Find and write ARC buffers to the L2ARC device.


4708 
4709         ASSERT3U(write_asize, <=, target_sz);
4710         ARCSTAT_BUMP(arcstat_l2_writes_sent);
4711         ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize);
4712         ARCSTAT_INCR(arcstat_l2_size, write_sz);
4713         ARCSTAT_INCR(arcstat_l2_asize, write_asize);
4714         vdev_space_update(dev->l2ad_vdev, write_psize, 0, 0);
4715 
4716         /*
4717          * Bump device hand to the device start if it is approaching the end.
4718          * l2arc_evict() will already have evicted ahead for this case.
4719          */
4720         if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) {
4721                 vdev_space_update(dev->l2ad_vdev,
4722                     dev->l2ad_end - dev->l2ad_hand, 0, 0);
4723                 dev->l2ad_hand = dev->l2ad_start;
4724                 dev->l2ad_evict = dev->l2ad_start;
4725                 dev->l2ad_first = B_FALSE;
4726         }
4727 

4728         dev->l2ad_writing = B_TRUE;
4729         (void) zio_wait(pio);
4730         dev->l2ad_writing = B_FALSE;
4731 
4732         return (write_asize);
4733 }
4734 
4735 /*
4736  * Compresses an L2ARC buffer.
4737  * The data to be compressed must be prefilled in l2hdr->b_tmp_cdata and its
4738  * size in l2hdr->b_asize. This routine tries to compress the data and
4739  * depending on the compression result there are three possible outcomes:
4740  * *) The buffer was incompressible. The original l2hdr contents were left
4741  *    untouched and are ready for writing to an L2 device.
4742  * *) The buffer was all-zeros, so there is no need to write it to an L2
4743  *    device. To indicate this situation b_tmp_cdata is NULL'ed, b_asize is
4744  *    set to zero and b_compress is set to ZIO_COMPRESS_EMPTY.
4745  * *) Compression succeeded and b_tmp_cdata was replaced with a temporary
4746  *    data buffer which holds the compressed data to be written, and b_asize
4747  *    tells us how much data there is. b_compress is set to the appropriate
4748  *    compression algorithm. Once writing is done, invoke
4749  *    l2arc_release_cdata_buf on this l2hdr to free this temporary buffer.
4750  *




 277         kstat_named_t arcstat_hash_collisions;
 278         kstat_named_t arcstat_hash_chains;
 279         kstat_named_t arcstat_hash_chain_max;
 280         kstat_named_t arcstat_p;
 281         kstat_named_t arcstat_c;
 282         kstat_named_t arcstat_c_min;
 283         kstat_named_t arcstat_c_max;
 284         kstat_named_t arcstat_size;
 285         kstat_named_t arcstat_hdr_size;
 286         kstat_named_t arcstat_data_size;
 287         kstat_named_t arcstat_other_size;
 288         kstat_named_t arcstat_l2_hits;
 289         kstat_named_t arcstat_l2_misses;
 290         kstat_named_t arcstat_l2_feeds;
 291         kstat_named_t arcstat_l2_rw_clash;
 292         kstat_named_t arcstat_l2_read_bytes;
 293         kstat_named_t arcstat_l2_write_bytes;
 294         kstat_named_t arcstat_l2_writes_sent;
 295         kstat_named_t arcstat_l2_writes_done;
 296         kstat_named_t arcstat_l2_writes_error;

 297         kstat_named_t arcstat_l2_evict_lock_retry;
 298         kstat_named_t arcstat_l2_evict_reading;
 299         kstat_named_t arcstat_l2_free_on_write;
 300         kstat_named_t arcstat_l2_abort_lowmem;
 301         kstat_named_t arcstat_l2_cksum_bad;
 302         kstat_named_t arcstat_l2_io_error;
 303         kstat_named_t arcstat_l2_size;
 304         kstat_named_t arcstat_l2_asize;
 305         kstat_named_t arcstat_l2_hdr_size;
 306         kstat_named_t arcstat_l2_compress_successes;
 307         kstat_named_t arcstat_l2_compress_zeros;
 308         kstat_named_t arcstat_l2_compress_failures;
 309         kstat_named_t arcstat_memory_throttle_count;
 310         kstat_named_t arcstat_duplicate_buffers;
 311         kstat_named_t arcstat_duplicate_buffers_size;
 312         kstat_named_t arcstat_duplicate_reads;
 313         kstat_named_t arcstat_meta_used;
 314         kstat_named_t arcstat_meta_limit;
 315         kstat_named_t arcstat_meta_max;
 316 } arc_stats_t;


 342         { "hash_collisions",            KSTAT_DATA_UINT64 },
 343         { "hash_chains",                KSTAT_DATA_UINT64 },
 344         { "hash_chain_max",             KSTAT_DATA_UINT64 },
 345         { "p",                          KSTAT_DATA_UINT64 },
 346         { "c",                          KSTAT_DATA_UINT64 },
 347         { "c_min",                      KSTAT_DATA_UINT64 },
 348         { "c_max",                      KSTAT_DATA_UINT64 },
 349         { "size",                       KSTAT_DATA_UINT64 },
 350         { "hdr_size",                   KSTAT_DATA_UINT64 },
 351         { "data_size",                  KSTAT_DATA_UINT64 },
 352         { "other_size",                 KSTAT_DATA_UINT64 },
 353         { "l2_hits",                    KSTAT_DATA_UINT64 },
 354         { "l2_misses",                  KSTAT_DATA_UINT64 },
 355         { "l2_feeds",                   KSTAT_DATA_UINT64 },
 356         { "l2_rw_clash",                KSTAT_DATA_UINT64 },
 357         { "l2_read_bytes",              KSTAT_DATA_UINT64 },
 358         { "l2_write_bytes",             KSTAT_DATA_UINT64 },
 359         { "l2_writes_sent",             KSTAT_DATA_UINT64 },
 360         { "l2_writes_done",             KSTAT_DATA_UINT64 },
 361         { "l2_writes_error",            KSTAT_DATA_UINT64 },

 362         { "l2_evict_lock_retry",        KSTAT_DATA_UINT64 },
 363         { "l2_evict_reading",           KSTAT_DATA_UINT64 },
 364         { "l2_free_on_write",           KSTAT_DATA_UINT64 },
 365         { "l2_abort_lowmem",            KSTAT_DATA_UINT64 },
 366         { "l2_cksum_bad",               KSTAT_DATA_UINT64 },
 367         { "l2_io_error",                KSTAT_DATA_UINT64 },
 368         { "l2_size",                    KSTAT_DATA_UINT64 },
 369         { "l2_asize",                   KSTAT_DATA_UINT64 },
 370         { "l2_hdr_size",                KSTAT_DATA_UINT64 },
 371         { "l2_compress_successes",      KSTAT_DATA_UINT64 },
 372         { "l2_compress_zeros",          KSTAT_DATA_UINT64 },
 373         { "l2_compress_failures",       KSTAT_DATA_UINT64 },
 374         { "memory_throttle_count",      KSTAT_DATA_UINT64 },
 375         { "duplicate_buffers",          KSTAT_DATA_UINT64 },
 376         { "duplicate_buffers_size",     KSTAT_DATA_UINT64 },
 377         { "duplicate_reads",            KSTAT_DATA_UINT64 },
 378         { "arc_meta_used",              KSTAT_DATA_UINT64 },
 379         { "arc_meta_limit",             KSTAT_DATA_UINT64 },
 380         { "arc_meta_max",               KSTAT_DATA_UINT64 }
 381 };


 603 /*
 604  * If we discover during ARC scan any buffers to be compressed, we boost
 605  * our headroom for the next scanning cycle by this percentage multiple.
 606  */
 607 #define L2ARC_HEADROOM_BOOST    200
 608 #define L2ARC_FEED_SECS         1               /* caching interval secs */
 609 #define L2ARC_FEED_MIN_MS       200             /* min caching interval ms */
 610 
 611 #define l2arc_writes_sent       ARCSTAT(arcstat_l2_writes_sent)
 612 #define l2arc_writes_done       ARCSTAT(arcstat_l2_writes_done)
 613 
 614 /* L2ARC Performance Tunables */
 615 uint64_t l2arc_write_max = L2ARC_WRITE_SIZE;    /* default max write size */
 616 uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE;  /* extra write during warmup */
 617 uint64_t l2arc_headroom = L2ARC_HEADROOM;       /* number of dev writes */
 618 uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
 619 uint64_t l2arc_feed_secs = L2ARC_FEED_SECS;     /* interval seconds */
 620 uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval milliseconds */
 621 boolean_t l2arc_noprefetch = B_TRUE;            /* don't cache prefetch bufs */
 622 boolean_t l2arc_feed_again = B_TRUE;            /* turbo warmup */
 623 boolean_t l2arc_norw = B_FALSE;                 /* no reads during writes */
 624 
 625 /*
 626  * L2ARC Internals
 627  */
 628 typedef struct l2arc_dev {
 629         vdev_t                  *l2ad_vdev;     /* vdev */
 630         spa_t                   *l2ad_spa;      /* spa */
 631         uint64_t                l2ad_hand;      /* next write location */
 632         uint64_t                l2ad_start;     /* first addr on device */
 633         uint64_t                l2ad_end;       /* last addr on device */
 634         uint64_t                l2ad_evict;     /* last addr eviction reached */
 635         boolean_t               l2ad_first;     /* first sweep through */
 636         boolean_t               l2ad_writing;   /* currently writing */
 637         list_t                  *l2ad_buflist;  /* buffer list */
 638         list_node_t             l2ad_node;      /* device list node */
 639 } l2arc_dev_t;
 640 
 641 static list_t L2ARC_dev_list;                   /* device list */
 642 static list_t *l2arc_dev_list;                  /* device list pointer */
 643 static kmutex_t l2arc_dev_mtx;                  /* device list mutex */


4129                 ASSERT(df->l2df_func != NULL);
4130                 df->l2df_func(df->l2df_data, df->l2df_size);
4131                 list_remove(buflist, df);
4132                 kmem_free(df, sizeof (l2arc_data_free_t));
4133         }
4134 
4135         mutex_exit(&l2arc_free_on_write_mtx);
4136 }
4137 
4138 /*
4139  * A write to a cache device has completed.  Update all headers to allow
4140  * reads from these buffers to begin.
4141  */
4142 static void
4143 l2arc_write_done(zio_t *zio)
4144 {
4145         l2arc_write_callback_t *cb;
4146         l2arc_dev_t *dev;
4147         list_t *buflist;
4148         arc_buf_hdr_t *head, *ab, *ab_prev;
4149         l2arc_buf_hdr_t *l2hdr;
4150         kmutex_t *hash_lock;
4151 
4152         cb = zio->io_private;
4153         ASSERT(cb != NULL);
4154         dev = cb->l2wcb_dev;
4155         ASSERT(dev != NULL);
4156         head = cb->l2wcb_head;
4157         ASSERT(head != NULL);
4158         buflist = dev->l2ad_buflist;
4159         ASSERT(buflist != NULL);
4160         DTRACE_PROBE2(l2arc__iodone, zio_t *, zio,
4161             l2arc_write_callback_t *, cb);
4162 
4163         if (zio->io_error != 0)
4164                 ARCSTAT_BUMP(arcstat_l2_writes_error);
4165 
4166         mutex_enter(&l2arc_buflist_mtx);
4167 
4168         /*
4169          * All writes completed, or an error was hit.
4170          */
4171         dev->l2ad_writing = B_FALSE;
4172         for (ab = list_prev(buflist, head); ab; ab = ab_prev) {
4173                 ab_prev = list_prev(buflist, ab);
4174 
4175                 hash_lock = HDR_LOCK(ab);
4176                 mutex_enter(hash_lock);








4177 
4178                 l2hdr = ab->b_l2hdr;
4179 
4180                 /*
4181                  * Release the temporary compressed buffer as soon as possible.
4182                  */
4183                 if (l2hdr->b_compress != ZIO_COMPRESS_OFF)
4184                         l2arc_release_cdata_buf(ab);
4185 
4186                 if (zio->io_error != 0) {
4187                         /*
4188                          * Error - drop L2ARC entry.
4189                          */
4190                         list_remove(buflist, ab);
4191                         ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
4192                         ab->b_l2hdr = NULL;
4193                         kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
4194                         ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
4195                 }
4196 
4197                 /*
4198                  * Allow ARC to begin reads to this L2ARC entry.
4199                  */
4200                 ab->b_flags &= ~ARC_L2_WRITING;
4201 
4202                 mutex_exit(hash_lock);
4203         }
4204 
4205         atomic_inc_64(&l2arc_writes_done);
4206         list_remove(buflist, head);
4207         kmem_cache_free(hdr_cache, head);
4208         mutex_exit(&l2arc_buflist_mtx);
4209 
4210         l2arc_do_free_on_write();
4211 
4212         kmem_free(cb, sizeof (l2arc_write_callback_t));
4213 }


4324                 list = &arc_mru->arcs_list[ARC_BUFC_DATA];
4325                 *lock = &arc_mru->arcs_mtx;
4326                 break;
4327         }
4328 
4329         ASSERT(!(MUTEX_HELD(*lock)));
4330         mutex_enter(*lock);
4331         return (list);
4332 }
4333 
4334 /*
4335  * Evict buffers from the device write hand to the distance specified in
4336  * bytes.  This distance may span populated buffers, it may span nothing.
4337  * This is clearing a region on the L2ARC device ready for writing.
4338  * If the 'all' boolean is set, every buffer is evicted.
4339  */
4340 static void
4341 l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
4342 {
4343         list_t *buflist;
4344         l2arc_buf_hdr_t *l2hdr;
4345         arc_buf_hdr_t *ab, *ab_prev;
4346         kmutex_t *hash_lock;
4347         uint64_t taddr;
4348 
4349         buflist = dev->l2ad_buflist;
4350 
4351         if (buflist == NULL)
4352                 return;
4353 
4354         if (!all && dev->l2ad_first) {
4355                 /*
4356                  * This is the first sweep through the device.  There is
4357                  * nothing to evict.
4358                  */
4359                 return;
4360         }
4361 
4362         if (dev->l2ad_hand >= (dev->l2ad_end - (2 * distance))) {
4363                 /*
4364                  * When nearing the end of the device, evict to the end


4424                          * arc_hdr_destroy() will call list_remove()
4425                          * and decrement arcstat_l2_size.
4426                          */
4427                         arc_change_state(arc_anon, ab, hash_lock);
4428                         arc_hdr_destroy(ab);
4429                 } else {
4430                         /*
4431                          * Invalidate issued or about to be issued
4432                          * reads, since we may be about to write
4433                          * over this location.
4434                          */
4435                         if (HDR_L2_READING(ab)) {
4436                                 ARCSTAT_BUMP(arcstat_l2_evict_reading);
4437                                 ab->b_flags |= ARC_L2_EVICTED;
4438                         }
4439 
4440                         /*
4441                          * Tell ARC this no longer exists in L2ARC.
4442                          */
4443                         if (ab->b_l2hdr != NULL) {
4444                                 l2hdr = ab->b_l2hdr;
4445                                 ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
4446                                 ab->b_l2hdr = NULL;
4447                                 kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
4448                                 ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
4449                         }
4450                         list_remove(buflist, ab);
4451 
4452                         /*
4453                          * This may have been leftover after a
4454                          * failed write.
4455                          */
4456                         ab->b_flags &= ~ARC_L2_WRITING;
4457                 }
4458                 mutex_exit(hash_lock);
4459         }
4460         mutex_exit(&l2arc_buflist_mtx);
4461 
4462         vdev_space_update(dev->l2ad_vdev, -(taddr - dev->l2ad_evict), 0, 0);
4463         dev->l2ad_evict = taddr;
4464 }
4465 
4466 /*
4467  * Find and write ARC buffers to the L2ARC device.


4699 
4700         ASSERT3U(write_asize, <=, target_sz);
4701         ARCSTAT_BUMP(arcstat_l2_writes_sent);
4702         ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize);
4703         ARCSTAT_INCR(arcstat_l2_size, write_sz);
4704         ARCSTAT_INCR(arcstat_l2_asize, write_asize);
4705         vdev_space_update(dev->l2ad_vdev, write_psize, 0, 0);
4706 
4707         /*
4708          * Bump device hand to the device start if it is approaching the end.
4709          * l2arc_evict() will already have evicted ahead for this case.
4710          */
4711         if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) {
4712                 vdev_space_update(dev->l2ad_vdev,
4713                     dev->l2ad_end - dev->l2ad_hand, 0, 0);
4714                 dev->l2ad_hand = dev->l2ad_start;
4715                 dev->l2ad_evict = dev->l2ad_start;
4716                 dev->l2ad_first = B_FALSE;
4717         }
4718 
4719         /* dev->l2ad_writing will be lowered in the zio done callback */
4720         dev->l2ad_writing = B_TRUE;
4721         (void) zio_wait(pio);
4722         ASSERT(dev->l2ad_writing == B_FALSE);
4723 
4724         return (write_asize);
4725 }
4726 
4727 /*
4728  * Compresses an L2ARC buffer.
4729  * The data to be compressed must be prefilled in l2hdr->b_tmp_cdata and its
4730  * size in l2hdr->b_asize. This routine tries to compress the data and
4731  * depending on the compression result there are three possible outcomes:
4732  * *) The buffer was incompressible. The original l2hdr contents were left
4733  *    untouched and are ready for writing to an L2 device.
4734  * *) The buffer was all-zeros, so there is no need to write it to an L2
4735  *    device. To indicate this situation b_tmp_cdata is NULL'ed, b_asize is
4736  *    set to zero and b_compress is set to ZIO_COMPRESS_EMPTY.
4737  * *) Compression succeeded and b_tmp_cdata was replaced with a temporary
4738  *    data buffer which holds the compressed data to be written, and b_asize
4739  *    tells us how much data there is. b_compress is set to the appropriate
4740  *    compression algorithm. Once writing is done, invoke
4741  *    l2arc_release_cdata_buf on this l2hdr to free this temporary buffer.
4742  *