Print this page
3995 Memory leak of compressed buffers in l2arc_write_done
3997 ZFS L2ARC default behavior should allow reading while writing

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/arc.c
          +++ new/usr/src/uts/common/fs/zfs/arc.c
↓ open down ↓ 286 lines elided ↑ open up ↑
 287  287          kstat_named_t arcstat_other_size;
 288  288          kstat_named_t arcstat_l2_hits;
 289  289          kstat_named_t arcstat_l2_misses;
 290  290          kstat_named_t arcstat_l2_feeds;
 291  291          kstat_named_t arcstat_l2_rw_clash;
 292  292          kstat_named_t arcstat_l2_read_bytes;
 293  293          kstat_named_t arcstat_l2_write_bytes;
 294  294          kstat_named_t arcstat_l2_writes_sent;
 295  295          kstat_named_t arcstat_l2_writes_done;
 296  296          kstat_named_t arcstat_l2_writes_error;
 297      -        kstat_named_t arcstat_l2_writes_hdr_miss;
 298  297          kstat_named_t arcstat_l2_evict_lock_retry;
 299  298          kstat_named_t arcstat_l2_evict_reading;
 300  299          kstat_named_t arcstat_l2_free_on_write;
 301  300          kstat_named_t arcstat_l2_abort_lowmem;
 302  301          kstat_named_t arcstat_l2_cksum_bad;
 303  302          kstat_named_t arcstat_l2_io_error;
 304  303          kstat_named_t arcstat_l2_size;
 305  304          kstat_named_t arcstat_l2_asize;
 306  305          kstat_named_t arcstat_l2_hdr_size;
 307  306          kstat_named_t arcstat_l2_compress_successes;
↓ open down ↓ 45 lines elided ↑ open up ↑
 353  352          { "other_size",                 KSTAT_DATA_UINT64 },
 354  353          { "l2_hits",                    KSTAT_DATA_UINT64 },
 355  354          { "l2_misses",                  KSTAT_DATA_UINT64 },
 356  355          { "l2_feeds",                   KSTAT_DATA_UINT64 },
 357  356          { "l2_rw_clash",                KSTAT_DATA_UINT64 },
 358  357          { "l2_read_bytes",              KSTAT_DATA_UINT64 },
 359  358          { "l2_write_bytes",             KSTAT_DATA_UINT64 },
 360  359          { "l2_writes_sent",             KSTAT_DATA_UINT64 },
 361  360          { "l2_writes_done",             KSTAT_DATA_UINT64 },
 362  361          { "l2_writes_error",            KSTAT_DATA_UINT64 },
 363      -        { "l2_writes_hdr_miss",         KSTAT_DATA_UINT64 },
 364  362          { "l2_evict_lock_retry",        KSTAT_DATA_UINT64 },
 365  363          { "l2_evict_reading",           KSTAT_DATA_UINT64 },
 366  364          { "l2_free_on_write",           KSTAT_DATA_UINT64 },
 367  365          { "l2_abort_lowmem",            KSTAT_DATA_UINT64 },
 368  366          { "l2_cksum_bad",               KSTAT_DATA_UINT64 },
 369  367          { "l2_io_error",                KSTAT_DATA_UINT64 },
 370  368          { "l2_size",                    KSTAT_DATA_UINT64 },
 371  369          { "l2_asize",                   KSTAT_DATA_UINT64 },
 372  370          { "l2_hdr_size",                KSTAT_DATA_UINT64 },
 373  371          { "l2_compress_successes",      KSTAT_DATA_UINT64 },
↓ open down ↓ 241 lines elided ↑ open up ↑
 615  613  
 616  614  /* L2ARC Performance Tunables */
 617  615  uint64_t l2arc_write_max = L2ARC_WRITE_SIZE;    /* default max write size */
 618  616  uint64_t l2arc_write_boost = L2ARC_WRITE_SIZE;  /* extra write during warmup */
 619  617  uint64_t l2arc_headroom = L2ARC_HEADROOM;       /* number of dev writes */
 620  618  uint64_t l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
 621  619  uint64_t l2arc_feed_secs = L2ARC_FEED_SECS;     /* interval seconds */
 622  620  uint64_t l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval milliseconds */
 623  621  boolean_t l2arc_noprefetch = B_TRUE;            /* don't cache prefetch bufs */
 624  622  boolean_t l2arc_feed_again = B_TRUE;            /* turbo warmup */
 625      -boolean_t l2arc_norw = B_TRUE;                  /* no reads during writes */
      623 +boolean_t l2arc_norw = B_FALSE;                 /* no reads during writes */
 626  624  
 627  625  /*
 628  626   * L2ARC Internals
 629  627   */
 630  628  typedef struct l2arc_dev {
 631  629          vdev_t                  *l2ad_vdev;     /* vdev */
 632  630          spa_t                   *l2ad_spa;      /* spa */
 633  631          uint64_t                l2ad_hand;      /* next write location */
 634  632          uint64_t                l2ad_start;     /* first addr on device */
 635  633          uint64_t                l2ad_end;       /* last addr on device */
↓ open down ↓ 3505 lines elided ↑ open up ↑
4141 4139   * A write to a cache device has completed.  Update all headers to allow
4142 4140   * reads from these buffers to begin.
4143 4141   */
4144 4142  static void
4145 4143  l2arc_write_done(zio_t *zio)
4146 4144  {
4147 4145          l2arc_write_callback_t *cb;
4148 4146          l2arc_dev_t *dev;
4149 4147          list_t *buflist;
4150 4148          arc_buf_hdr_t *head, *ab, *ab_prev;
4151      -        l2arc_buf_hdr_t *abl2;
     4149 +        l2arc_buf_hdr_t *l2hdr;
4152 4150          kmutex_t *hash_lock;
4153 4151  
4154 4152          cb = zio->io_private;
4155 4153          ASSERT(cb != NULL);
4156 4154          dev = cb->l2wcb_dev;
4157 4155          ASSERT(dev != NULL);
4158 4156          head = cb->l2wcb_head;
4159 4157          ASSERT(head != NULL);
4160 4158          buflist = dev->l2ad_buflist;
4161 4159          ASSERT(buflist != NULL);
↓ open down ↓ 1 lines elided ↑ open up ↑
4163 4161              l2arc_write_callback_t *, cb);
4164 4162  
4165 4163          if (zio->io_error != 0)
4166 4164                  ARCSTAT_BUMP(arcstat_l2_writes_error);
4167 4165  
4168 4166          mutex_enter(&l2arc_buflist_mtx);
4169 4167  
4170 4168          /*
4171 4169           * All writes completed, or an error was hit.
4172 4170           */
     4171 +        dev->l2ad_writing = B_FALSE;
4173 4172          for (ab = list_prev(buflist, head); ab; ab = ab_prev) {
4174 4173                  ab_prev = list_prev(buflist, ab);
4175 4174  
4176 4175                  hash_lock = HDR_LOCK(ab);
4177      -                if (!mutex_tryenter(hash_lock)) {
4178      -                        /*
4179      -                         * This buffer misses out.  It may be in a stage
4180      -                         * of eviction.  Its ARC_L2_WRITING flag will be
4181      -                         * left set, denying reads to this buffer.
4182      -                         */
4183      -                        ARCSTAT_BUMP(arcstat_l2_writes_hdr_miss);
4184      -                        continue;
4185      -                }
     4176 +                mutex_enter(hash_lock);
4186 4177  
4187      -                abl2 = ab->b_l2hdr;
     4178 +                l2hdr = ab->b_l2hdr;
4188 4179  
4189 4180                  /*
4190 4181                   * Release the temporary compressed buffer as soon as possible.
4191 4182                   */
4192      -                if (abl2->b_compress != ZIO_COMPRESS_OFF)
     4183 +                if (l2hdr->b_compress != ZIO_COMPRESS_OFF)
4193 4184                          l2arc_release_cdata_buf(ab);
4194 4185  
4195 4186                  if (zio->io_error != 0) {
4196 4187                          /*
4197 4188                           * Error - drop L2ARC entry.
4198 4189                           */
4199 4190                          list_remove(buflist, ab);
4200      -                        ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
     4191 +                        ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
4201 4192                          ab->b_l2hdr = NULL;
4202      -                        kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
     4193 +                        kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
4203 4194                          ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
4204 4195                  }
4205 4196  
4206 4197                  /*
4207 4198                   * Allow ARC to begin reads to this L2ARC entry.
4208 4199                   */
4209 4200                  ab->b_flags &= ~ARC_L2_WRITING;
4210 4201  
4211 4202                  mutex_exit(hash_lock);
4212 4203          }
↓ open down ↓ 130 lines elided ↑ open up ↑
4343 4334  /*
4344 4335   * Evict buffers from the device write hand to the distance specified in
4345 4336   * bytes.  This distance may span populated buffers, it may span nothing.
4346 4337   * This is clearing a region on the L2ARC device ready for writing.
4347 4338   * If the 'all' boolean is set, every buffer is evicted.
4348 4339   */
4349 4340  static void
4350 4341  l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
4351 4342  {
4352 4343          list_t *buflist;
4353      -        l2arc_buf_hdr_t *abl2;
     4344 +        l2arc_buf_hdr_t *l2hdr;
4354 4345          arc_buf_hdr_t *ab, *ab_prev;
4355 4346          kmutex_t *hash_lock;
4356 4347          uint64_t taddr;
4357 4348  
4358 4349          buflist = dev->l2ad_buflist;
4359 4350  
4360 4351          if (buflist == NULL)
4361 4352                  return;
4362 4353  
4363 4354          if (!all && dev->l2ad_first) {
↓ open down ↓ 79 lines elided ↑ open up ↑
4443 4434                           */
4444 4435                          if (HDR_L2_READING(ab)) {
4445 4436                                  ARCSTAT_BUMP(arcstat_l2_evict_reading);
4446 4437                                  ab->b_flags |= ARC_L2_EVICTED;
4447 4438                          }
4448 4439  
4449 4440                          /*
4450 4441                           * Tell ARC this no longer exists in L2ARC.
4451 4442                           */
4452 4443                          if (ab->b_l2hdr != NULL) {
4453      -                                abl2 = ab->b_l2hdr;
4454      -                                ARCSTAT_INCR(arcstat_l2_asize, -abl2->b_asize);
     4444 +                                l2hdr = ab->b_l2hdr;
     4445 +                                ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
4455 4446                                  ab->b_l2hdr = NULL;
4456      -                                kmem_free(abl2, sizeof (l2arc_buf_hdr_t));
     4447 +                                kmem_free(l2hdr, sizeof (l2arc_buf_hdr_t));
4457 4448                                  ARCSTAT_INCR(arcstat_l2_size, -ab->b_size);
4458 4449                          }
4459 4450                          list_remove(buflist, ab);
4460 4451  
4461 4452                          /*
4462 4453                           * This may have been leftover after a
4463 4454                           * failed write.
4464 4455                           */
4465 4456                          ab->b_flags &= ~ARC_L2_WRITING;
4466 4457                  }
↓ open down ↓ 251 lines elided ↑ open up ↑
4718 4709           * l2arc_evict() will already have evicted ahead for this case.
4719 4710           */
4720 4711          if (dev->l2ad_hand >= (dev->l2ad_end - target_sz)) {
4721 4712                  vdev_space_update(dev->l2ad_vdev,
4722 4713                      dev->l2ad_end - dev->l2ad_hand, 0, 0);
4723 4714                  dev->l2ad_hand = dev->l2ad_start;
4724 4715                  dev->l2ad_evict = dev->l2ad_start;
4725 4716                  dev->l2ad_first = B_FALSE;
4726 4717          }
4727 4718  
     4719 +        /* dev->l2ad_writing will be lowered in the zio done callback */
4728 4720          dev->l2ad_writing = B_TRUE;
4729 4721          (void) zio_wait(pio);
4730      -        dev->l2ad_writing = B_FALSE;
     4722 +        ASSERT(dev->l2ad_writing == B_FALSE);
4731 4723  
4732 4724          return (write_asize);
4733 4725  }
4734 4726  
4735 4727  /*
4736 4728   * Compresses an L2ARC buffer.
4737 4729   * The data to be compressed must be prefilled in l2hdr->b_tmp_cdata and its
4738 4730   * size in l2hdr->b_asize. This routine tries to compress the data and
4739 4731   * depending on the compression result there are three possible outcomes:
4740 4732   * *) The buffer was incompressible. The original l2hdr contents were left
↓ open down ↓ 401 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX