Print this page
3752 want more verifiable dbuf user eviction
Submitted by:   Justin Gibbs <justing@spectralogic.com>
Submitted by:   Will Andrews <willa@spectralogic.com>


  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2012 by Delphix. All rights reserved.
  25  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  26  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  27  */
  28 
  29 /* Portions Copyright 2010 Robert Milkowski */
  30 
  31 #ifndef _SYS_DMU_H
  32 #define _SYS_DMU_H
  33 
  34 /*
  35  * This file describes the interface that the DMU provides for its
  36  * consumers.
  37  *
  38  * The DMU also interacts with the SPA.  That interface is described in
  39  * dmu_spa.h.
  40  */
  41 

  42 #include <sys/inttypes.h>
  43 #include <sys/types.h>
  44 #include <sys/param.h>
  45 #include <sys/cred.h>
  46 #include <sys/time.h>
  47 #include <sys/fs/zfs.h>
  48 
  49 #ifdef  __cplusplus
  50 extern "C" {
  51 #endif
  52 
  53 struct uio;
  54 struct xuio;
  55 struct page;
  56 struct vnode;
  57 struct spa;
  58 struct zilog;
  59 struct zio;
  60 struct blkptr;
  61 struct zap_cursor;


 264 int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
 265     void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
 266 int dmu_objset_clone(const char *name, const char *origin);
 267 int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
 268     struct nvlist *errlist);
 269 int dmu_objset_snapshot_one(const char *fsname, const char *snapname);
 270 int dmu_objset_snapshot_tmp(const char *, const char *, int);
 271 int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
 272     int flags);
 273 void dmu_objset_byteswap(void *buf, size_t size);
 274 int dsl_dataset_rename_snapshot(const char *fsname,
 275     const char *oldsnapname, const char *newsnapname, boolean_t recursive);
 276 
 277 typedef struct dmu_buf {
 278         uint64_t db_object;             /* object that this buffer is part of */
 279         uint64_t db_offset;             /* byte offset in this object */
 280         uint64_t db_size;               /* size of buffer in bytes */
 281         void *db_data;                  /* data in buffer */
 282 } dmu_buf_t;
 283 
 284 typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
 285 
 286 /*
 287  * The names of zap entries in the DIRECTORY_OBJECT of the MOS.
 288  */
 289 #define DMU_POOL_DIRECTORY_OBJECT       1
 290 #define DMU_POOL_CONFIG                 "config"
 291 #define DMU_POOL_FEATURES_FOR_WRITE     "features_for_write"
 292 #define DMU_POOL_FEATURES_FOR_READ      "features_for_read"
 293 #define DMU_POOL_FEATURE_DESCRIPTIONS   "feature_descriptions"
 294 #define DMU_POOL_ROOT_DATASET           "root_dataset"
 295 #define DMU_POOL_SYNC_BPOBJ             "sync_bplist"
 296 #define DMU_POOL_ERRLOG_SCRUB           "errlog_scrub"
 297 #define DMU_POOL_ERRLOG_LAST            "errlog_last"
 298 #define DMU_POOL_SPARES                 "spares"
 299 #define DMU_POOL_DEFLATE                "deflate"
 300 #define DMU_POOL_HISTORY                "history"
 301 #define DMU_POOL_PROPS                  "pool_props"
 302 #define DMU_POOL_L2CACHE                "l2cache"
 303 #define DMU_POOL_TMP_USERREFS           "tmp_userrefs"
 304 #define DMU_POOL_DDT                    "DDT-%s-%s-%s"
 305 #define DMU_POOL_DDT_STATS              "DDT-statistics"


 443 int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
 444     void *tag, dmu_buf_t **, int flags);
 445 void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
 446 void dmu_buf_rele(dmu_buf_t *db, void *tag);
 447 uint64_t dmu_buf_refcount(dmu_buf_t *db);
 448 
 449 /*
 450  * dmu_buf_hold_array holds the DMU buffers which contain all bytes in a
 451  * range of an object.  A pointer to an array of dmu_buf_t*'s is
 452  * returned (in *dbpp).
 453  *
 454  * dmu_buf_rele_array releases the hold on an array of dmu_buf_t*'s, and
 455  * frees the array.  The hold on the array of buffers MUST be released
 456  * with dmu_buf_rele_array.  You can NOT release the hold on each buffer
 457  * individually with dmu_buf_rele.
 458  */
 459 int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
 460     uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
 461 void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
 462 




 463 /*
 464  * Returns NULL on success, or the existing user ptr if it's already
 465  * been set.
 466  *
 467  * user_ptr is for use by the user and can be obtained via dmu_buf_get_user().
 468  *
 469  * user_data_ptr_ptr should be NULL, or a pointer to a pointer which
 470  * will be set to db->db_data when you are allowed to access it.  Note
 471  * that db->db_data (the pointer) can change when you do dmu_buf_read(),
 472  * dmu_buf_tryupgrade(), dmu_buf_will_dirty(), or dmu_buf_will_fill().
 473  * *user_data_ptr_ptr will be set to the new value when it changes.
 474  *
 475  * If non-NULL, pageout func will be called when this buffer is being
 476  * excised from the cache, so that you can clean up the data structure
 477  * pointed to by user_ptr.
 478  *
 479  * dmu_evict_user() will call the pageout func for all buffers in a
 480  * objset with a given pageout func.

















 481  */
 482 void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr, void *user_data_ptr_ptr,
 483     dmu_buf_evict_func_t *pageout_func);
 484 /*
 485  * set_user_ie is the same as set_user, but request immediate eviction
 486  * when hold count goes to zero.
 487  */
 488 void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr,
 489     void *user_data_ptr_ptr, dmu_buf_evict_func_t *pageout_func);
 490 void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr,
 491     void *user_ptr, void *user_data_ptr_ptr,
 492     dmu_buf_evict_func_t *pageout_func);
 493 void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func);













 494 
 495 /*
 496  * Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set.
 497  */
 498 void *dmu_buf_get_user(dmu_buf_t *db);





























 499 
 500 /*
 501  * Returns the blkptr associated with this dbuf, or NULL if not set.
 502  */
 503 struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db);
 504 
 505 /*
 506  * Indicate that you are going to modify the buffer's data (db_data).
 507  *
 508  * The transaction (tx) must be assigned to a txg (ie. you've called
 509  * dmu_tx_assign()).  The buffer's object must be held in the tx
 510  * (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
 511  */
 512 void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
 513 
 514 /*
 515  * Tells if the given dbuf is freeable.
 516  */
 517 boolean_t dmu_buf_freeable(dmu_buf_t *);
 518 




  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2012 by Delphix. All rights reserved.
  25  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  26  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  27  */
  28 
  29 /* Portions Copyright 2010 Robert Milkowski */
  30 
  31 #ifndef _SYS_DMU_H
  32 #define _SYS_DMU_H
  33 
  34 /*
  35  * This file describes the interface that the DMU provides for its
  36  * consumers.
  37  *
  38  * The DMU also interacts with the SPA.  That interface is described in
  39  * dmu_spa.h.
  40  */
  41 
  42 #include <sys/zfs_context.h>
  43 #include <sys/inttypes.h>
  44 #include <sys/types.h>
  45 #include <sys/param.h>
  46 #include <sys/cred.h>
  47 #include <sys/time.h>
  48 #include <sys/fs/zfs.h>
  49 
  50 #ifdef  __cplusplus
  51 extern "C" {
  52 #endif
  53 
  54 struct uio;
  55 struct xuio;
  56 struct page;
  57 struct vnode;
  58 struct spa;
  59 struct zilog;
  60 struct zio;
  61 struct blkptr;
  62 struct zap_cursor;


 265 int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
 266     void (*func)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx), void *arg);
 267 int dmu_objset_clone(const char *name, const char *origin);
 268 int dsl_destroy_snapshots_nvl(struct nvlist *snaps, boolean_t defer,
 269     struct nvlist *errlist);
 270 int dmu_objset_snapshot_one(const char *fsname, const char *snapname);
 271 int dmu_objset_snapshot_tmp(const char *, const char *, int);
 272 int dmu_objset_find(char *name, int func(const char *, void *), void *arg,
 273     int flags);
 274 void dmu_objset_byteswap(void *buf, size_t size);
 275 int dsl_dataset_rename_snapshot(const char *fsname,
 276     const char *oldsnapname, const char *newsnapname, boolean_t recursive);
 277 
 278 typedef struct dmu_buf {
 279         uint64_t db_object;             /* object that this buffer is part of */
 280         uint64_t db_offset;             /* byte offset in this object */
 281         uint64_t db_size;               /* size of buffer in bytes */
 282         void *db_data;                  /* data in buffer */
 283 } dmu_buf_t;
 284 


 285 /*
 286  * The names of zap entries in the DIRECTORY_OBJECT of the MOS.
 287  */
 288 #define DMU_POOL_DIRECTORY_OBJECT       1
 289 #define DMU_POOL_CONFIG                 "config"
 290 #define DMU_POOL_FEATURES_FOR_WRITE     "features_for_write"
 291 #define DMU_POOL_FEATURES_FOR_READ      "features_for_read"
 292 #define DMU_POOL_FEATURE_DESCRIPTIONS   "feature_descriptions"
 293 #define DMU_POOL_ROOT_DATASET           "root_dataset"
 294 #define DMU_POOL_SYNC_BPOBJ             "sync_bplist"
 295 #define DMU_POOL_ERRLOG_SCRUB           "errlog_scrub"
 296 #define DMU_POOL_ERRLOG_LAST            "errlog_last"
 297 #define DMU_POOL_SPARES                 "spares"
 298 #define DMU_POOL_DEFLATE                "deflate"
 299 #define DMU_POOL_HISTORY                "history"
 300 #define DMU_POOL_PROPS                  "pool_props"
 301 #define DMU_POOL_L2CACHE                "l2cache"
 302 #define DMU_POOL_TMP_USERREFS           "tmp_userrefs"
 303 #define DMU_POOL_DDT                    "DDT-%s-%s-%s"
 304 #define DMU_POOL_DDT_STATS              "DDT-statistics"


 442 int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
 443     void *tag, dmu_buf_t **, int flags);
 444 void dmu_buf_add_ref(dmu_buf_t *db, void* tag);
 445 void dmu_buf_rele(dmu_buf_t *db, void *tag);
 446 uint64_t dmu_buf_refcount(dmu_buf_t *db);
 447 
 448 /*
 449  * dmu_buf_hold_array holds the DMU buffers which contain all bytes in a
 450  * range of an object.  A pointer to an array of dmu_buf_t*'s is
 451  * returned (in *dbpp).
 452  *
 453  * dmu_buf_rele_array releases the hold on an array of dmu_buf_t*'s, and
 454  * frees the array.  The hold on the array of buffers MUST be released
 455  * with dmu_buf_rele_array.  You can NOT release the hold on each buffer
 456  * individually with dmu_buf_rele.
 457  */
 458 int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
 459     uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
 460 void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
 461 
 462 struct dmu_buf_user;
 463 
 464 typedef void dmu_buf_evict_func_t(struct dmu_buf_user *);
 465 
 466 /*
 467  * The DMU buffer user object is used to allow private data to be
 468  * associated with a dbuf for the duration of its lifetime.  This private
 469  * data must include a dmu_buf_user_t as its first object, which is passed
 470  * into the DMU user data API and can be attached to a dbuf.  Clients can
 471  * regain access to their private data structure with a cast.
 472  *
 473  * DMU buffer users can be notified via a callback when their associated
 474  * dbuf has been evicted.  This is typically used to free the user's
 475  * private data.  The eviction callback is executed without the dbuf
 476  * mutex held or any other type of mechanism to guarantee that the
 477  * dbuf is still available.  For this reason, users must assume the dbuf
 478  * has already been freed and not reference the dbuf from the callback
 479  * context.
 480  *
 481  * Users requestion "immediate eviction" are notified as soon as the dbuf
 482  * is only referenced by dirty records (dirties == holds).  Otherwise the
 483  * eviction callback occurs after the last reference to the dbuf is dropped.    
 484  *
 485  * Eviction Callback Processing
 486  * ============================
 487  * In any context where a dbuf reference drop may trigger an eviction, an       
 488  * eviction queue object must be provided.  This queue must then be
 489  * processed while not holding any dbuf locks.  In this way, the user can       
 490  * perform any work needed in their eviction function without fear of
 491  * lock order reversals.
 492  *
 493  * Implementation Note
 494  * ============================
 495  * Some users will occasionally want to map a structure directly onto the
 496  * backing dbuf.  Using an union with an name alias macro to access these
 497  * overlays reduces the ugliness of code that accesses them.  Initial work on
 498  * user objects involved using a macro that took the user object as an
 499  * argument to access the fields, which resulted in hundreds of lines of
 500  * needless diffs and wasn't any easier to read.
 501  */
 502 typedef struct dmu_buf_user {
 503         /*
 504          * This instance's link in the eviction queue.  Set when the buffer
 505          * has evicted and the callback needs to be called.

 506          */
 507         list_node_t evict_queue_link;
 508         /** This instance's eviction function pointer. */
 509         dmu_buf_evict_func_t *evict_func;
 510 } dmu_buf_user_t;
 511 
 512 /*
 513  * Initialize the given dmu_buf_user_t instance with the eviction function
 514  * evict_func, to be called when the user is evicted.
 515  *
 516  * NOTE: This function should only be called once on a given object.  To
 517  *       help enforce this, dbu should already be zeroed on entry.
 518  */
 519 static inline void
 520 dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func)
 521 {
 522         ASSERT(dbu->evict_func == NULL);
 523         ASSERT(!list_link_active(&dbu->evict_queue_link));
 524         dbu->evict_func = evict_func;
 525 }
 526 
 527 static inline void
 528 dmu_buf_create_user_evict_list(list_t *evict_list_p)
 529 {
 530         list_create(evict_list_p, sizeof(dmu_buf_user_t),
 531             offsetof(dmu_buf_user_t, evict_queue_link));
 532 }
 533 
 534 static inline void
 535 dmu_buf_process_user_evicts(list_t *evict_list_p)
 536 {
 537         dmu_buf_user_t *dbu, *next;
 538 
 539         for (dbu = (dmu_buf_user_t *)list_head(evict_list_p); dbu != NULL;
 540             dbu = next) {
 541                 next = (dmu_buf_user_t *)list_next(evict_list_p, dbu);
 542                 list_remove(evict_list_p, dbu);
 543                 dbu->evict_func(dbu);
 544         }
 545 }
 546 
 547 static inline void
 548 dmu_buf_destroy_user_evict_list(list_t *evict_list_p)
 549 {
 550         dmu_buf_process_user_evicts(evict_list_p);
 551         list_destroy(evict_list_p);
 552 }
 553 
 554 dmu_buf_user_t *dmu_buf_set_user(dmu_buf_t *db, dmu_buf_user_t *user);
 555 dmu_buf_user_t *dmu_buf_set_user_ie(dmu_buf_t *db, dmu_buf_user_t *user);
 556 dmu_buf_user_t *dmu_buf_replace_user(dmu_buf_t *db,
 557     dmu_buf_user_t *old_user, dmu_buf_user_t *new_user);
 558 dmu_buf_user_t *dmu_buf_remove_user(dmu_buf_t *db, dmu_buf_user_t *user);
 559 dmu_buf_user_t *dmu_buf_get_user(dmu_buf_t *db);
 560 
 561 /*
 562  * Returns the blkptr associated with this dbuf, or NULL if not set.
 563  */
 564 struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db);
 565 
 566 /*
 567  * Indicate that you are going to modify the buffer's data (db_data).
 568  *
 569  * The transaction (tx) must be assigned to a txg (ie. you've called
 570  * dmu_tx_assign()).  The buffer's object must be held in the tx
 571  * (ie. you've called dmu_tx_hold_object(tx, db->db_object)).
 572  */
 573 void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
 574 
 575 /*
 576  * Tells if the given dbuf is freeable.
 577  */
 578 boolean_t dmu_buf_freeable(dmu_buf_t *);
 579