illumos-gate Sdiff usr/src/uts/common/fs/zfs/arc.c

Print this page

arc_get_data_buf should be more aggressive in eviction when memory is unavailable

2502  * entries.
2503  *
2504  * 3. Insert for MFU (c - p) > sizeof(arc_mfu) ->
2505  * c minus p represents the MFU space in the cache, since p is the size of the
2506  * cache that is dedicated to the MRU.  In this situation there's still space on
2507  * the MFU side, so the MRU side needs to be victimized.
2508  *
2509  * 4. Insert for MFU (c - p) < sizeof(arc_mfu) ->
2510  * MFU's resident set is consuming more space than it has been allotted.  In
2511  * this situation, we must victimize our own cache, the MFU, for this insertion.
2512  */
2513 static void
2514 arc_get_data_buf(arc_buf_t *buf)
2515 {
2516         arc_state_t             *state = buf->b_hdr->b_state;
2517         uint64_t                size = buf->b_hdr->b_size;
2518         arc_buf_contents_t      type = buf->b_hdr->b_type;
2519 
2520         arc_adapt(size, state);
2521 

2522         /*
2523          * We have not yet reached cache maximum size,
2524          * just allocate a new buffer.
2525          */
2526         if (!arc_evict_needed(type)) {
2527                 if (type == ARC_BUFC_METADATA) {
2528                         buf->b_data = zio_buf_alloc(size);

2529                         arc_space_consume(size, ARC_SPACE_DATA);


2530                 } else {
2531                         ASSERT(type == ARC_BUFC_DATA);
2532                         buf->b_data = zio_data_buf_alloc(size);

2533                         ARCSTAT_INCR(arcstat_data_size, size);
2534                         atomic_add_64(&arc_size, size);
2535                 }
2536                 goto out;
2537         }






2538 
2539         /*
2540          * If we are prefetching from the mfu ghost list, this buffer
2541          * will end up on the mru list; so steal space from there.
2542          */
2543         if (state == arc_mfu_ghost)
2544                 state = buf->b_hdr->b_flags & ARC_PREFETCH ? arc_mru : arc_mfu;
2545         else if (state == arc_mru_ghost)
2546                 state = arc_mru;
2547 
2548         if (state == arc_mru || state == arc_anon) {
2549                 uint64_t mru_used = arc_anon->arcs_size + arc_mru->arcs_size;
2550                 state = (arc_mfu->arcs_lsize[type] >= size &&
2551                     arc_p > mru_used) ? arc_mfu : arc_mru;
2552         } else {
2553                 /* MFU cases */
2554                 uint64_t mfu_space = arc_c - arc_p;
2555                 state =  (arc_mru->arcs_lsize[type] >= size &&
2556                     mfu_space > arc_mfu->arcs_size) ? arc_mru : arc_mfu;
2557         }
2558         if ((buf->b_data = arc_evict(state, NULL, size, TRUE, type)) == NULL) {
2559                 if (type == ARC_BUFC_METADATA) {
2560                         buf->b_data = zio_buf_alloc(size);
2561                         arc_space_consume(size, ARC_SPACE_DATA);
2562                 } else {
2563                         ASSERT(type == ARC_BUFC_DATA);
2564                         buf->b_data = zio_data_buf_alloc(size);
2565                         ARCSTAT_INCR(arcstat_data_size, size);
2566                         atomic_add_64(&arc_size, size);
2567                 }
2568                 ARCSTAT_BUMP(arcstat_recycle_miss);

2569         }
2570         ASSERT(buf->b_data != NULL);
2571 out:
2572         /*
2573          * Update the state size.  Note that ghost states have a
2574          * "ghost size" and so don't need to be updated.
2575          */
2576         if (!GHOST_STATE(buf->b_hdr->b_state)) {
2577                 arc_buf_hdr_t *hdr = buf->b_hdr;
2578 
2579                 atomic_add_64(&hdr->b_state->arcs_size, size);
2580                 if (list_link_active(&hdr->b_arc_node)) {
2581                         ASSERT(refcount_is_zero(&hdr->b_refcnt));
2582                         atomic_add_64(&hdr->b_state->arcs_lsize[type], size);
2583                 }
2584                 /*
2585                  * If we are growing the cache, and we are adding anonymous
2586                  * data, and we have outgrown arc_p, update arc_p
2587                  */
2588                 if (arc_size < arc_c && hdr->b_state == arc_anon &&

2502  * entries.
2503  *
2504  * 3. Insert for MFU (c - p) > sizeof(arc_mfu) ->
2505  * c minus p represents the MFU space in the cache, since p is the size of the
2506  * cache that is dedicated to the MRU.  In this situation there's still space on
2507  * the MFU side, so the MRU side needs to be victimized.
2508  *
2509  * 4. Insert for MFU (c - p) < sizeof(arc_mfu) ->
2510  * MFU's resident set is consuming more space than it has been allotted.  In
2511  * this situation, we must victimize our own cache, the MFU, for this insertion.
2512  */
2513 static void
2514 arc_get_data_buf(arc_buf_t *buf)
2515 {
2516         arc_state_t             *state = buf->b_hdr->b_state;
2517         uint64_t                size = buf->b_hdr->b_size;
2518         arc_buf_contents_t      type = buf->b_hdr->b_type;
2519 
2520         arc_adapt(size, state);
2521 
2522 top:
2523         /*
2524          * We have not yet reached cache maximum size,
2525          * just allocate a new buffer.
2526          */
2527         if (!arc_evict_needed(type)) {
2528                 if (type == ARC_BUFC_METADATA) {
2529                         buf->b_data = zio_buf_alloc_canfail(size);
2530                         if (buf->b_data != NULL) {
2531                                 arc_space_consume(size, ARC_SPACE_DATA);
2532                                 goto out;
2533                         }
2534                 } else {
2535                         ASSERT(type == ARC_BUFC_DATA);
2536                         buf->b_data = zio_data_buf_alloc_canfail(size);
2537                         if (buf->b_data != NULL) {
2538                                 ARCSTAT_INCR(arcstat_data_size, size);
2539                                 atomic_add_64(&arc_size, size);

2540                                 goto out;
2541                         }
2542                 }
2543                 /*
2544                  * Memory allocation failed probably due to excessive
2545                  * fragmentation, we need to evict regardless.
2546                  */
2547         }
2548 
2549         /*
2550          * If we are prefetching from the mfu ghost list, this buffer
2551          * will end up on the mru list; so steal space from there.
2552          */
2553         if (state == arc_mfu_ghost)
2554                 state = buf->b_hdr->b_flags & ARC_PREFETCH ? arc_mru : arc_mfu;
2555         else if (state == arc_mru_ghost)
2556                 state = arc_mru;
2557 
2558         if (state == arc_mru || state == arc_anon) {
2559                 uint64_t mru_used = arc_anon->arcs_size + arc_mru->arcs_size;
2560                 state = (arc_mfu->arcs_lsize[type] >= size &&
2561                     arc_p > mru_used) ? arc_mfu : arc_mru;
2562         } else {
2563                 /* MFU cases */
2564                 uint64_t mfu_space = arc_c - arc_p;
2565                 state =  (arc_mru->arcs_lsize[type] >= size &&
2566                     mfu_space > arc_mfu->arcs_size) ? arc_mru : arc_mfu;
2567         }
2568         if ((buf->b_data = arc_evict(state, NULL, size, TRUE, type)) == NULL) {









2569                 ARCSTAT_BUMP(arcstat_recycle_miss);
2570                 goto top;
2571         }
2572         ASSERT(buf->b_data != NULL);
2573 out:
2574         /*
2575          * Update the state size.  Note that ghost states have a
2576          * "ghost size" and so don't need to be updated.
2577          */
2578         if (!GHOST_STATE(buf->b_hdr->b_state)) {
2579                 arc_buf_hdr_t *hdr = buf->b_hdr;
2580 
2581                 atomic_add_64(&hdr->b_state->arcs_size, size);
2582                 if (list_link_active(&hdr->b_arc_node)) {
2583                         ASSERT(refcount_is_zero(&hdr->b_refcnt));
2584                         atomic_add_64(&hdr->b_state->arcs_lsize[type], size);
2585                 }
2586                 /*
2587                  * If we are growing the cache, and we are adding anonymous
2588                  * data, and we have outgrown arc_p, update arc_p
2589                  */
2590                 if (arc_size < arc_c && hdr->b_state == arc_anon &&