Print this page
3752 want more verifiable dbuf user eviction
Submitted by:   Justin Gibbs <justing@spectralogic.com>
Submitted by:   Will Andrews <willa@spectralogic.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/sys/dmu.h
          +++ new/usr/src/uts/common/fs/zfs/sys/dmu.h
↓ open down ↓ 31 lines elided ↑ open up ↑
  32   32  #define _SYS_DMU_H
  33   33  
  34   34  /*
  35   35   * This file describes the interface that the DMU provides for its
  36   36   * consumers.
  37   37   *
  38   38   * The DMU also interacts with the SPA.  That interface is described in
  39   39   * dmu_spa.h.
  40   40   */
  41   41  
       42 +#include <sys/zfs_context.h>
  42   43  #include <sys/inttypes.h>
  43   44  #include <sys/types.h>
  44   45  #include <sys/param.h>
  45   46  #include <sys/cred.h>
  46   47  #include <sys/time.h>
  47   48  #include <sys/fs/zfs.h>
  48   49  
  49   50  #ifdef  __cplusplus
  50   51  extern "C" {
  51   52  #endif
↓ open down ↓ 222 lines elided ↑ open up ↑
 274  275  int dsl_dataset_rename_snapshot(const char *fsname,
 275  276      const char *oldsnapname, const char *newsnapname, boolean_t recursive);
 276  277  
 277  278  typedef struct dmu_buf {
 278  279          uint64_t db_object;             /* object that this buffer is part of */
 279  280          uint64_t db_offset;             /* byte offset in this object */
 280  281          uint64_t db_size;               /* size of buffer in bytes */
 281  282          void *db_data;                  /* data in buffer */
 282  283  } dmu_buf_t;
 283  284  
 284      -typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
 285      -
 286  285  /*
 287  286   * The names of zap entries in the DIRECTORY_OBJECT of the MOS.
 288  287   */
 289  288  #define DMU_POOL_DIRECTORY_OBJECT       1
 290  289  #define DMU_POOL_CONFIG                 "config"
 291  290  #define DMU_POOL_FEATURES_FOR_WRITE     "features_for_write"
 292  291  #define DMU_POOL_FEATURES_FOR_READ      "features_for_read"
 293  292  #define DMU_POOL_FEATURE_DESCRIPTIONS   "feature_descriptions"
 294  293  #define DMU_POOL_ROOT_DATASET           "root_dataset"
 295  294  #define DMU_POOL_SYNC_BPOBJ             "sync_bplist"
↓ open down ↓ 157 lines elided ↑ open up ↑
 453  452   *
 454  453   * dmu_buf_rele_array releases the hold on an array of dmu_buf_t*'s, and
 455  454   * frees the array.  The hold on the array of buffers MUST be released
 456  455   * with dmu_buf_rele_array.  You can NOT release the hold on each buffer
 457  456   * individually with dmu_buf_rele.
 458  457   */
 459  458  int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
 460  459      uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
 461  460  void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
 462  461  
 463      -/*
 464      - * Returns NULL on success, or the existing user ptr if it's already
 465      - * been set.
 466      - *
 467      - * user_ptr is for use by the user and can be obtained via dmu_buf_get_user().
 468      - *
 469      - * user_data_ptr_ptr should be NULL, or a pointer to a pointer which
 470      - * will be set to db->db_data when you are allowed to access it.  Note
 471      - * that db->db_data (the pointer) can change when you do dmu_buf_read(),
 472      - * dmu_buf_tryupgrade(), dmu_buf_will_dirty(), or dmu_buf_will_fill().
 473      - * *user_data_ptr_ptr will be set to the new value when it changes.
 474      - *
 475      - * If non-NULL, pageout func will be called when this buffer is being
 476      - * excised from the cache, so that you can clean up the data structure
 477      - * pointed to by user_ptr.
 478      - *
 479      - * dmu_evict_user() will call the pageout func for all buffers in a
 480      - * objset with a given pageout func.
 481      - */
 482      -void *dmu_buf_set_user(dmu_buf_t *db, void *user_ptr, void *user_data_ptr_ptr,
 483      -    dmu_buf_evict_func_t *pageout_func);
 484      -/*
 485      - * set_user_ie is the same as set_user, but request immediate eviction
 486      - * when hold count goes to zero.
 487      - */
 488      -void *dmu_buf_set_user_ie(dmu_buf_t *db, void *user_ptr,
 489      -    void *user_data_ptr_ptr, dmu_buf_evict_func_t *pageout_func);
 490      -void *dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr,
 491      -    void *user_ptr, void *user_data_ptr_ptr,
 492      -    dmu_buf_evict_func_t *pageout_func);
 493      -void dmu_evict_user(objset_t *os, dmu_buf_evict_func_t *func);
      462 +struct dmu_buf_user;
      463 +
      464 +typedef void dmu_buf_evict_func_t(struct dmu_buf_user *);
 494  465  
 495  466  /*
 496      - * Returns the user_ptr set with dmu_buf_set_user(), or NULL if not set.
      467 + * The DMU buffer user object is used to allow private data to be
      468 + * associated with a dbuf for the duration of its lifetime.  This private
      469 + * data must include a dmu_buf_user_t as its first object, which is passed
      470 + * into the DMU user data API and can be attached to a dbuf.  Clients can
      471 + * regain access to their private data structure with a cast.
      472 + *
      473 + * DMU buffer users can be notified via a callback when their associated
      474 + * dbuf has been evicted.  This is typically used to free the user's
      475 + * private data.  The eviction callback is executed without the dbuf
      476 + * mutex held or any other type of mechanism to guarantee that the
      477 + * dbuf is still available.  For this reason, users must assume the dbuf
      478 + * has already been freed and not reference the dbuf from the callback
      479 + * context.
      480 + *
      481 + * Users requestion "immediate eviction" are notified as soon as the dbuf
      482 + * is only referenced by dirty records (dirties == holds).  Otherwise the
      483 + * eviction callback occurs after the last reference to the dbuf is dropped.    
      484 + *
      485 + * Eviction Callback Processing
      486 + * ============================
      487 + * In any context where a dbuf reference drop may trigger an eviction, an       
      488 + * eviction queue object must be provided.  This queue must then be
      489 + * processed while not holding any dbuf locks.  In this way, the user can       
      490 + * perform any work needed in their eviction function without fear of
      491 + * lock order reversals.
      492 + *
      493 + * Implementation Note
      494 + * ============================
      495 + * Some users will occasionally want to map a structure directly onto the
      496 + * backing dbuf.  Using an union with an name alias macro to access these
      497 + * overlays reduces the ugliness of code that accesses them.  Initial work on
      498 + * user objects involved using a macro that took the user object as an
      499 + * argument to access the fields, which resulted in hundreds of lines of
      500 + * needless diffs and wasn't any easier to read.
 497  501   */
 498      -void *dmu_buf_get_user(dmu_buf_t *db);
      502 +typedef struct dmu_buf_user {
      503 +        /*
      504 +         * This instance's link in the eviction queue.  Set when the buffer
      505 +         * has evicted and the callback needs to be called.
      506 +         */
      507 +        list_node_t evict_queue_link;
      508 +        /** This instance's eviction function pointer. */
      509 +        dmu_buf_evict_func_t *evict_func;
      510 +} dmu_buf_user_t;
      511 +
      512 +/*
      513 + * Initialize the given dmu_buf_user_t instance with the eviction function
      514 + * evict_func, to be called when the user is evicted.
      515 + *
      516 + * NOTE: This function should only be called once on a given object.  To
      517 + *       help enforce this, dbu should already be zeroed on entry.
      518 + */
      519 +static inline void
      520 +dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func)
      521 +{
      522 +        ASSERT(dbu->evict_func == NULL);
      523 +        ASSERT(!list_link_active(&dbu->evict_queue_link));
      524 +        dbu->evict_func = evict_func;
      525 +}
      526 +
      527 +static inline void
      528 +dmu_buf_create_user_evict_list(list_t *evict_list_p)
      529 +{
      530 +        list_create(evict_list_p, sizeof(dmu_buf_user_t),
      531 +            offsetof(dmu_buf_user_t, evict_queue_link));
      532 +}
      533 +
      534 +static inline void
      535 +dmu_buf_process_user_evicts(list_t *evict_list_p)
      536 +{
      537 +        dmu_buf_user_t *dbu, *next;
      538 +
      539 +        for (dbu = (dmu_buf_user_t *)list_head(evict_list_p); dbu != NULL;
      540 +            dbu = next) {
      541 +                next = (dmu_buf_user_t *)list_next(evict_list_p, dbu);
      542 +                list_remove(evict_list_p, dbu);
      543 +                dbu->evict_func(dbu);
      544 +        }
      545 +}
      546 +
      547 +static inline void
      548 +dmu_buf_destroy_user_evict_list(list_t *evict_list_p)
      549 +{
      550 +        dmu_buf_process_user_evicts(evict_list_p);
      551 +        list_destroy(evict_list_p);
      552 +}
      553 +
      554 +dmu_buf_user_t *dmu_buf_set_user(dmu_buf_t *db, dmu_buf_user_t *user);
      555 +dmu_buf_user_t *dmu_buf_set_user_ie(dmu_buf_t *db, dmu_buf_user_t *user);
      556 +dmu_buf_user_t *dmu_buf_replace_user(dmu_buf_t *db,
      557 +    dmu_buf_user_t *old_user, dmu_buf_user_t *new_user);
      558 +dmu_buf_user_t *dmu_buf_remove_user(dmu_buf_t *db, dmu_buf_user_t *user);
      559 +dmu_buf_user_t *dmu_buf_get_user(dmu_buf_t *db);
 499  560  
 500  561  /*
 501  562   * Returns the blkptr associated with this dbuf, or NULL if not set.
 502  563   */
 503  564  struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db);
 504  565  
 505  566  /*
 506  567   * Indicate that you are going to modify the buffer's data (db_data).
 507  568   *
 508  569   * The transaction (tx) must be assigned to a txg (ie. you've called
↓ open down ↓ 296 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX