Print this page
3741 zfs needs better comments
Submitted by:   Will Andrews <willa@spectralogic.com>
Submitted by:   Justin Gibbs <justing@spectralogic.com>
Submitted by:   Alan Somers <alans@spectralogic.com>
Reviewed by:    Matthew Ahrens <mahrens@delphix.com>


 237 static arc_state_t ARC_mfu_ghost;
 238 static arc_state_t ARC_l2c_only;
 239 
 240 typedef struct arc_stats {
 241         kstat_named_t arcstat_hits;
 242         kstat_named_t arcstat_misses;
 243         kstat_named_t arcstat_demand_data_hits;
 244         kstat_named_t arcstat_demand_data_misses;
 245         kstat_named_t arcstat_demand_metadata_hits;
 246         kstat_named_t arcstat_demand_metadata_misses;
 247         kstat_named_t arcstat_prefetch_data_hits;
 248         kstat_named_t arcstat_prefetch_data_misses;
 249         kstat_named_t arcstat_prefetch_metadata_hits;
 250         kstat_named_t arcstat_prefetch_metadata_misses;
 251         kstat_named_t arcstat_mru_hits;
 252         kstat_named_t arcstat_mru_ghost_hits;
 253         kstat_named_t arcstat_mfu_hits;
 254         kstat_named_t arcstat_mfu_ghost_hits;
 255         kstat_named_t arcstat_deleted;
 256         kstat_named_t arcstat_recycle_miss;






 257         kstat_named_t arcstat_mutex_miss;





 258         kstat_named_t arcstat_evict_skip;
 259         kstat_named_t arcstat_evict_l2_cached;
 260         kstat_named_t arcstat_evict_l2_eligible;
 261         kstat_named_t arcstat_evict_l2_ineligible;
 262         kstat_named_t arcstat_hash_elements;
 263         kstat_named_t arcstat_hash_elements_max;
 264         kstat_named_t arcstat_hash_collisions;
 265         kstat_named_t arcstat_hash_chains;
 266         kstat_named_t arcstat_hash_chain_max;
 267         kstat_named_t arcstat_p;
 268         kstat_named_t arcstat_c;
 269         kstat_named_t arcstat_c_min;
 270         kstat_named_t arcstat_c_max;
 271         kstat_named_t arcstat_size;
 272         kstat_named_t arcstat_hdr_size;
 273         kstat_named_t arcstat_data_size;
 274         kstat_named_t arcstat_other_size;
 275         kstat_named_t arcstat_l2_hits;
 276         kstat_named_t arcstat_l2_misses;
 277         kstat_named_t arcstat_l2_feeds;


2936                 acb->acb_private = private;
2937 
2938                 ASSERT(hdr->b_acb == NULL);
2939                 hdr->b_acb = acb;
2940                 hdr->b_flags |= ARC_IO_IN_PROGRESS;
2941 
2942                 if (HDR_L2CACHE(hdr) && hdr->b_l2hdr != NULL &&
2943                     (vd = hdr->b_l2hdr->b_dev->l2ad_vdev) != NULL) {
2944                         devw = hdr->b_l2hdr->b_dev->l2ad_writing;
2945                         addr = hdr->b_l2hdr->b_daddr;
2946                         /*
2947                          * Lock out device removal.
2948                          */
2949                         if (vdev_is_dead(vd) ||
2950                             !spa_config_tryenter(spa, SCL_L2ARC, vd, RW_READER))
2951                                 vd = NULL;
2952                 }
2953 
2954                 mutex_exit(hash_lock);
2955 




2956                 ASSERT3U(hdr->b_size, ==, size);
2957                 DTRACE_PROBE4(arc__miss, arc_buf_hdr_t *, hdr, blkptr_t *, bp,
2958                     uint64_t, size, zbookmark_t *, zb);
2959                 ARCSTAT_BUMP(arcstat_misses);
2960                 ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH),
2961                     demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
2962                     data, metadata, misses);
2963 
2964                 if (vd != NULL && l2arc_ndev != 0 && !(l2arc_norw && devw)) {
2965                         /*
2966                          * Read from the L2ARC if the following are true:
2967                          * 1. The L2ARC vdev was previously cached.
2968                          * 2. This buffer still has L2ARC metadata.
2969                          * 3. This buffer isn't currently writing to the L2ARC.
2970                          * 4. The L2ARC entry wasn't evicted, which may
2971                          *    also have invalidated the vdev.
2972                          * 5. This isn't prefetch and l2arc_noprefetch is set.
2973                          */
2974                         if (hdr->b_l2hdr != NULL &&
2975                             !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) &&


3126                 ASSERT(HDR_IN_HASH_TABLE(hdr));
3127                 hdr->b_flags |= ARC_IN_HASH_TABLE;
3128                 hdr->b_flags &= ~ARC_BUF_AVAILABLE;
3129 
3130                 mutex_exit(&evicted_state->arcs_mtx);
3131                 mutex_exit(&old_state->arcs_mtx);
3132         }
3133         mutex_exit(hash_lock);
3134         mutex_exit(&buf->b_evict_lock);
3135 
3136         VERIFY(buf->b_efunc(buf) == 0);
3137         buf->b_efunc = NULL;
3138         buf->b_private = NULL;
3139         buf->b_hdr = NULL;
3140         buf->b_next = NULL;
3141         kmem_cache_free(buf_cache, buf);
3142         return (1);
3143 }
3144 
3145 /*
3146  * Release this buffer from the cache.  This must be done
3147  * after a read and prior to modifying the buffer contents.
3148  * If the buffer has more than one reference, we must make
3149  * a new hdr for the buffer.
3150  */
3151 void
3152 arc_release(arc_buf_t *buf, void *tag)
3153 {
3154         arc_buf_hdr_t *hdr;
3155         kmutex_t *hash_lock = NULL;
3156         l2arc_buf_hdr_t *l2hdr;
3157         uint64_t buf_size;
3158 
3159         /*
3160          * It would be nice to assert that if it's DMU metadata (level >
3161          * 0 || it's the dnode file), then it must be syncing context.
3162          * But we don't know that information at this level.
3163          */
3164 
3165         mutex_enter(&buf->b_evict_lock);
3166         hdr = buf->b_hdr;
3167 




 237 static arc_state_t ARC_mfu_ghost;
 238 static arc_state_t ARC_l2c_only;
 239 
 240 typedef struct arc_stats {
 241         kstat_named_t arcstat_hits;
 242         kstat_named_t arcstat_misses;
 243         kstat_named_t arcstat_demand_data_hits;
 244         kstat_named_t arcstat_demand_data_misses;
 245         kstat_named_t arcstat_demand_metadata_hits;
 246         kstat_named_t arcstat_demand_metadata_misses;
 247         kstat_named_t arcstat_prefetch_data_hits;
 248         kstat_named_t arcstat_prefetch_data_misses;
 249         kstat_named_t arcstat_prefetch_metadata_hits;
 250         kstat_named_t arcstat_prefetch_metadata_misses;
 251         kstat_named_t arcstat_mru_hits;
 252         kstat_named_t arcstat_mru_ghost_hits;
 253         kstat_named_t arcstat_mfu_hits;
 254         kstat_named_t arcstat_mfu_ghost_hits;
 255         kstat_named_t arcstat_deleted;
 256         kstat_named_t arcstat_recycle_miss;
 257         /*
 258          * Number of buffers that could not be evicted because the hash lock
 259          * was held by another thread.  The lock may not necessarily be held
 260          * by something using the same buffer, since hash locks are shared
 261          * by multiple buffers.
 262          */
 263         kstat_named_t arcstat_mutex_miss;
 264         /*
 265          * Number of buffers skipped because they have I/O in progress, are
 266          * indrect prefetch buffers that have not lived long enough, or are
 267          * not from the spa we're trying to evict from.
 268          */
 269         kstat_named_t arcstat_evict_skip;
 270         kstat_named_t arcstat_evict_l2_cached;
 271         kstat_named_t arcstat_evict_l2_eligible;
 272         kstat_named_t arcstat_evict_l2_ineligible;
 273         kstat_named_t arcstat_hash_elements;
 274         kstat_named_t arcstat_hash_elements_max;
 275         kstat_named_t arcstat_hash_collisions;
 276         kstat_named_t arcstat_hash_chains;
 277         kstat_named_t arcstat_hash_chain_max;
 278         kstat_named_t arcstat_p;
 279         kstat_named_t arcstat_c;
 280         kstat_named_t arcstat_c_min;
 281         kstat_named_t arcstat_c_max;
 282         kstat_named_t arcstat_size;
 283         kstat_named_t arcstat_hdr_size;
 284         kstat_named_t arcstat_data_size;
 285         kstat_named_t arcstat_other_size;
 286         kstat_named_t arcstat_l2_hits;
 287         kstat_named_t arcstat_l2_misses;
 288         kstat_named_t arcstat_l2_feeds;


2947                 acb->acb_private = private;
2948 
2949                 ASSERT(hdr->b_acb == NULL);
2950                 hdr->b_acb = acb;
2951                 hdr->b_flags |= ARC_IO_IN_PROGRESS;
2952 
2953                 if (HDR_L2CACHE(hdr) && hdr->b_l2hdr != NULL &&
2954                     (vd = hdr->b_l2hdr->b_dev->l2ad_vdev) != NULL) {
2955                         devw = hdr->b_l2hdr->b_dev->l2ad_writing;
2956                         addr = hdr->b_l2hdr->b_daddr;
2957                         /*
2958                          * Lock out device removal.
2959                          */
2960                         if (vdev_is_dead(vd) ||
2961                             !spa_config_tryenter(spa, SCL_L2ARC, vd, RW_READER))
2962                                 vd = NULL;
2963                 }
2964 
2965                 mutex_exit(hash_lock);
2966 
2967                 /*
2968                  * At this point, we have a level 1 cache miss.  Try again in
2969                  * L2ARC if possible.
2970                  */
2971                 ASSERT3U(hdr->b_size, ==, size);
2972                 DTRACE_PROBE4(arc__miss, arc_buf_hdr_t *, hdr, blkptr_t *, bp,
2973                     uint64_t, size, zbookmark_t *, zb);
2974                 ARCSTAT_BUMP(arcstat_misses);
2975                 ARCSTAT_CONDSTAT(!(hdr->b_flags & ARC_PREFETCH),
2976                     demand, prefetch, hdr->b_type != ARC_BUFC_METADATA,
2977                     data, metadata, misses);
2978 
2979                 if (vd != NULL && l2arc_ndev != 0 && !(l2arc_norw && devw)) {
2980                         /*
2981                          * Read from the L2ARC if the following are true:
2982                          * 1. The L2ARC vdev was previously cached.
2983                          * 2. This buffer still has L2ARC metadata.
2984                          * 3. This buffer isn't currently writing to the L2ARC.
2985                          * 4. The L2ARC entry wasn't evicted, which may
2986                          *    also have invalidated the vdev.
2987                          * 5. This isn't prefetch and l2arc_noprefetch is set.
2988                          */
2989                         if (hdr->b_l2hdr != NULL &&
2990                             !HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) &&


3141                 ASSERT(HDR_IN_HASH_TABLE(hdr));
3142                 hdr->b_flags |= ARC_IN_HASH_TABLE;
3143                 hdr->b_flags &= ~ARC_BUF_AVAILABLE;
3144 
3145                 mutex_exit(&evicted_state->arcs_mtx);
3146                 mutex_exit(&old_state->arcs_mtx);
3147         }
3148         mutex_exit(hash_lock);
3149         mutex_exit(&buf->b_evict_lock);
3150 
3151         VERIFY(buf->b_efunc(buf) == 0);
3152         buf->b_efunc = NULL;
3153         buf->b_private = NULL;
3154         buf->b_hdr = NULL;
3155         buf->b_next = NULL;
3156         kmem_cache_free(buf_cache, buf);
3157         return (1);
3158 }
3159 
3160 /*
3161  * Release this buffer from the cache, making it an anonymous buffer.  This
3162  * must be done after a read and prior to modifying the buffer contents.
3163  * If the buffer has more than one reference, we must make
3164  * a new hdr for the buffer.
3165  */
3166 void
3167 arc_release(arc_buf_t *buf, void *tag)
3168 {
3169         arc_buf_hdr_t *hdr;
3170         kmutex_t *hash_lock = NULL;
3171         l2arc_buf_hdr_t *l2hdr;
3172         uint64_t buf_size;
3173 
3174         /*
3175          * It would be nice to assert that if it's DMU metadata (level >
3176          * 0 || it's the dnode file), then it must be syncing context.
3177          * But we don't know that information at this level.
3178          */
3179 
3180         mutex_enter(&buf->b_evict_lock);
3181         hdr = buf->b_hdr;
3182