Print this page
3752 want more verifiable dbuf user eviction
Submitted by:   Justin Gibbs <justing@spectralogic.com>
Submitted by:   Will Andrews <willa@spectralogic.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/dbuf.c
          +++ new/usr/src/uts/common/fs/zfs/dbuf.c
↓ open down ↓ 196 lines elided ↑ open up ↑
 197  197          }
 198  198          *dbp = db->db_hash_next;
 199  199          db->db_hash_next = NULL;
 200  200          mutex_exit(DBUF_HASH_MUTEX(h, idx));
 201  201          atomic_add_64(&dbuf_hash_count, -1);
 202  202  }
 203  203  
 204  204  static arc_evict_func_t dbuf_do_evict;
 205  205  
 206  206  static void
 207      -dbuf_evict_user(dmu_buf_impl_t *db)
      207 +dbuf_verify_user(dmu_buf_impl_t *db, boolean_t evicting)
      208 +{
      209 +#ifdef ZFS_DEBUG
      210 +
      211 +        if (db->db_level != 0)
      212 +                ASSERT(db->db_user == NULL);
      213 +
      214 +        if (db->db_user == NULL)
      215 +                return;
      216 +
      217 +        /* Clients must resolve a dbuf before attaching user data. */
      218 +        ASSERT(db->db.db_data != NULL && db->db_state == DB_CACHED);
      219 +        /*
      220 +         * We can't check the hold count here, because they are modified
      221 +         * independently of the dbuf mutex.  But it would be nice to ensure
      222 +         * that the user has the appropriate number.
      223 +         */
      224 +#endif
      225 +}
      226 +
      227 +/*
      228 + * Evict the dbuf's user, either immediately, or use a provided queue.
      229 + *
      230 + * Call dmu_buf_process_user_evicts or dmu_buf_destroy_user_evict_list
      231 + * on the list when finished generating it.
      232 + *
      233 + * NOTE: If db->db_immediate_evict is FALSE, evict_list_p must be provided.
      234 + * NOTE: See dmu_buf_user_t about how this process works.
      235 + */
      236 +static void
      237 +dbuf_evict_user(dmu_buf_impl_t *db, list_t *evict_list_p)
 208  238  {
 209  239          ASSERT(MUTEX_HELD(&db->db_mtx));
      240 +        ASSERT(evict_list_p != NULL);
      241 +        dbuf_verify_user(db, /*evicting*/B_TRUE);
 210  242  
 211      -        if (db->db_level != 0 || db->db_evict_func == NULL)
      243 +        if (db->db_user == NULL)
 212  244                  return;
 213  245  
 214      -        if (db->db_user_data_ptr_ptr)
 215      -                *db->db_user_data_ptr_ptr = db->db.db_data;
 216      -        db->db_evict_func(&db->db, db->db_user_ptr);
 217      -        db->db_user_ptr = NULL;
 218      -        db->db_user_data_ptr_ptr = NULL;
 219      -        db->db_evict_func = NULL;
      246 +        ASSERT(!list_link_active(&db->db_user->evict_queue_link));
      247 +        list_insert_head(evict_list_p, db->db_user);
      248 +        db->db_user = NULL;
      249 +}
      250 +
      251 +/*
      252 + * Replace the current user of the dbuf.  Requires that the caller knows who
      253 + * the old user is.  Returns the old user, which may not necessarily be
      254 + * the same old_user provided by the caller.
      255 + */
      256 +dmu_buf_user_t *
      257 +dmu_buf_replace_user(dmu_buf_t *db_fake, dmu_buf_user_t *old_user,
      258 +    dmu_buf_user_t *new_user)
      259 +{
      260 +        dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
      261 +
      262 +        mutex_enter(&db->db_mtx);
      263 +        dbuf_verify_user(db, /*evicting*/B_FALSE);
      264 +        if (db->db_user == old_user)
      265 +                db->db_user = new_user;
      266 +        else
      267 +                old_user = db->db_user;
      268 +        dbuf_verify_user(db, /*evicting*/B_FALSE);
      269 +        mutex_exit(&db->db_mtx);
      270 +
      271 +        return (old_user);
      272 +}
      273 +
      274 +/*
      275 + * Set the user eviction data for the DMU beturns NULL on success,
      276 + * or the existing user if another user currently owns the buffer.
      277 + */
      278 +dmu_buf_user_t *
      279 +dmu_buf_set_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
      280 +{
      281 +        return (dmu_buf_replace_user(db_fake, NULL, user));
      282 +}
      283 +
      284 +dmu_buf_user_t *
      285 +dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user)
      286 +{
      287 +        dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
      288 +
      289 +        db->db_immediate_evict = TRUE;
      290 +        return (dmu_buf_set_user(db_fake, user));
      291 +}
      292 +
      293 +/*
      294 + * Remove the user eviction data for the DMU buffer.
      295 + */
      296 +dmu_buf_user_t *
      297 +dmu_buf_remove_user(dmu_buf_t *db_fake, dmu_buf_user_t *user)
      298 +{
      299 +        return (dmu_buf_replace_user(db_fake, user, NULL));
      300 +}
      301 +
      302 +/*
      303 + * Returns the db_user set with dmu_buf_update_user(), or NULL if not set.
      304 + */
      305 +dmu_buf_user_t *
      306 +dmu_buf_get_user(dmu_buf_t *db_fake)
      307 +{
      308 +        dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
      309 +
      310 +        dbuf_verify_user(db, /*evicting*/B_FALSE);
      311 +        return (db->db_user);
      312 +}
      313 +
      314 +static void
      315 +dbuf_clear_data(dmu_buf_impl_t *db, list_t *evict_list_p)
      316 +{
      317 +        ASSERT(MUTEX_HELD(&db->db_mtx));
      318 +        ASSERT(db->db_buf == NULL || !arc_has_callback(db->db_buf));
      319 +        dbuf_evict_user(db, evict_list_p);
      320 +        db->db_buf = NULL;
      321 +        db->db.db_data = NULL;
      322 +        if (db->db_state != DB_NOFILL)
      323 +                db->db_state = DB_UNCACHED;
      324 +}
      325 +
      326 +static void
      327 +dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
      328 +{
      329 +        ASSERT(MUTEX_HELD(&db->db_mtx));
      330 +        ASSERT(db->db_buf == NULL || !arc_has_callback(db->db_buf));
      331 +        ASSERT(buf != NULL);
      332 +
      333 +        db->db_buf = buf;
      334 +        ASSERT(buf->b_data != NULL);
      335 +        db->db.db_data = buf->b_data;
      336 +        if (!arc_released(buf))
      337 +                arc_set_callback(buf, dbuf_do_evict, db);
 220  338  }
 221  339  
 222  340  boolean_t
 223  341  dbuf_is_metadata(dmu_buf_impl_t *db)
 224  342  {
 225  343          if (db->db_level > 0) {
 226  344                  return (B_TRUE);
 227  345          } else {
 228  346                  boolean_t is_metadata;
 229  347  
 230  348                  DB_DNODE_ENTER(db);
 231  349                  is_metadata = DMU_OT_IS_METADATA(DB_DNODE(db)->dn_type);
 232  350                  DB_DNODE_EXIT(db);
 233  351  
 234  352                  return (is_metadata);
 235  353          }
 236  354  }
 237  355  
 238  356  void
 239      -dbuf_evict(dmu_buf_impl_t *db)
      357 +dbuf_evict(dmu_buf_impl_t *db, list_t *evict_list_p)
 240  358  {
 241  359          ASSERT(MUTEX_HELD(&db->db_mtx));
 242  360          ASSERT(db->db_buf == NULL);
 243  361          ASSERT(db->db_data_pending == NULL);
 244  362  
 245      -        dbuf_clear(db);
      363 +        dbuf_clear(db, evict_list_p);
 246  364          dbuf_destroy(db);
 247  365  }
 248  366  
 249  367  void
 250  368  dbuf_init(void)
 251  369  {
 252  370          uint64_t hsize = 1ULL << 16;
 253  371          dbuf_hash_table_t *h = &dbuf_hash_table;
 254  372          int i;
 255  373  
↓ open down ↓ 142 lines elided ↑ open up ↑
 398  516  
 399  517                          for (i = 0; i < db->db.db_size >> 3; i++) {
 400  518                                  ASSERT(buf[i] == 0);
 401  519                          }
 402  520                  }
 403  521          }
 404  522          DB_DNODE_EXIT(db);
 405  523  }
 406  524  #endif
 407  525  
 408      -static void
 409      -dbuf_update_data(dmu_buf_impl_t *db)
 410      -{
 411      -        ASSERT(MUTEX_HELD(&db->db_mtx));
 412      -        if (db->db_level == 0 && db->db_user_data_ptr_ptr) {
 413      -                ASSERT(!refcount_is_zero(&db->db_holds));
 414      -                *db->db_user_data_ptr_ptr = db->db.db_data;
 415      -        }
 416      -}
 417      -
 418      -static void
 419      -dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
 420      -{
 421      -        ASSERT(MUTEX_HELD(&db->db_mtx));
 422      -        ASSERT(db->db_buf == NULL || !arc_has_callback(db->db_buf));
 423      -        db->db_buf = buf;
 424      -        if (buf != NULL) {
 425      -                ASSERT(buf->b_data != NULL);
 426      -                db->db.db_data = buf->b_data;
 427      -                if (!arc_released(buf))
 428      -                        arc_set_callback(buf, dbuf_do_evict, db);
 429      -                dbuf_update_data(db);
 430      -        } else {
 431      -                dbuf_evict_user(db);
 432      -                db->db.db_data = NULL;
 433      -                if (db->db_state != DB_NOFILL)
 434      -                        db->db_state = DB_UNCACHED;
 435      -        }
 436      -}
 437      -
 438  526  /*
 439  527   * Loan out an arc_buf for read.  Return the loaned arc_buf.
 440  528   */
 441  529  arc_buf_t *
 442  530  dbuf_loan_arcbuf(dmu_buf_impl_t *db)
 443  531  {
 444  532          arc_buf_t *abuf;
      533 +        list_t evict_list;
      534 +
      535 +        dmu_buf_create_user_evict_list(&evict_list);
 445  536  
 446  537          mutex_enter(&db->db_mtx);
 447  538          if (arc_released(db->db_buf) || refcount_count(&db->db_holds) > 1) {
 448  539                  int blksz = db->db.db_size;
 449  540                  spa_t *spa;
 450  541  
 451  542                  mutex_exit(&db->db_mtx);
 452  543                  DB_GET_SPA(&spa, db);
 453  544                  abuf = arc_loan_buf(spa, blksz);
 454  545                  bcopy(db->db.db_data, abuf->b_data, blksz);
 455  546          } else {
 456  547                  abuf = db->db_buf;
 457  548                  arc_loan_inuse_buf(abuf, db);
 458      -                dbuf_set_data(db, NULL);
      549 +                dbuf_clear_data(db, &evict_list);
 459  550                  mutex_exit(&db->db_mtx);
 460  551          }
      552 +        dmu_buf_destroy_user_evict_list(&evict_list);
 461  553          return (abuf);
 462  554  }
 463  555  
 464  556  uint64_t
 465  557  dbuf_whichblock(dnode_t *dn, uint64_t offset)
 466  558  {
 467  559          if (dn->dn_datablkshift) {
 468  560                  return (offset >> dn->dn_datablkshift);
 469  561          } else {
 470  562                  ASSERT3U(offset, <, dn->dn_datablksz);
↓ open down ↓ 56 lines elided ↑ open up ↑
 527  619                  int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
 528  620  
 529  621                  ASSERT3U(bonuslen, <=, db->db.db_size);
 530  622                  db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN);
 531  623                  arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
 532  624                  if (bonuslen < DN_MAX_BONUSLEN)
 533  625                          bzero(db->db.db_data, DN_MAX_BONUSLEN);
 534  626                  if (bonuslen)
 535  627                          bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen);
 536  628                  DB_DNODE_EXIT(db);
 537      -                dbuf_update_data(db);
 538  629                  db->db_state = DB_CACHED;
 539  630                  mutex_exit(&db->db_mtx);
 540  631                  return;
 541  632          }
 542  633  
 543  634          /*
 544  635           * Recheck BP_IS_HOLE() after dnode_block_freed() in case dnode_sync()
 545  636           * processes the delete record and clears the bp while we are waiting
 546  637           * for the dn_mtx (resulting in a "no" from block_freed).
 547  638           */
↓ open down ↓ 112 lines elided ↑ open up ↑
 660  751                  mutex_exit(&db->db_mtx);
 661  752          }
 662  753  
 663  754          ASSERT(err || havepzio || db->db_state == DB_CACHED);
 664  755          return (err);
 665  756  }
 666  757  
 667  758  static void
 668  759  dbuf_noread(dmu_buf_impl_t *db)
 669  760  {
      761 +        list_t evict_list;
      762 +
 670  763          ASSERT(!refcount_is_zero(&db->db_holds));
 671  764          ASSERT(db->db_blkid != DMU_BONUS_BLKID);
      765 +        dmu_buf_create_user_evict_list(&evict_list);
      766 +
 672  767          mutex_enter(&db->db_mtx);
 673  768          while (db->db_state == DB_READ || db->db_state == DB_FILL)
 674  769                  cv_wait(&db->db_changed, &db->db_mtx);
 675  770          if (db->db_state == DB_UNCACHED) {
 676  771                  arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
 677  772                  spa_t *spa;
 678  773  
 679  774                  ASSERT(db->db_buf == NULL);
 680  775                  ASSERT(db->db.db_data == NULL);
 681  776                  DB_GET_SPA(&spa, db);
 682  777                  dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type));
 683  778                  db->db_state = DB_FILL;
 684  779          } else if (db->db_state == DB_NOFILL) {
 685      -                dbuf_set_data(db, NULL);
      780 +                dbuf_clear_data(db, &evict_list);
 686  781          } else {
 687  782                  ASSERT3U(db->db_state, ==, DB_CACHED);
 688  783          }
 689  784          mutex_exit(&db->db_mtx);
      785 +        dmu_buf_destroy_user_evict_list(&evict_list);
 690  786  }
 691  787  
 692  788  /*
 693  789   * This is our just-in-time copy function.  It makes a copy of
 694  790   * buffers, that have been modified in a previous transaction
 695  791   * group, before we modify them in the current active group.
 696  792   *
 697  793   * This function is used in two places: when we are dirtying a
 698  794   * buffer for the first time in a txg, and when we are freeing
 699  795   * a range in a dnode that includes this buffer.
 700  796   *
 701  797   * Note that when we are called from dbuf_free_range() we do
 702  798   * not put a hold on the buffer, we just traverse the active
 703  799   * dbuf list for the dnode.
 704  800   */
 705  801  static void
 706      -dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
      802 +dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg, list_t *evict_list_p)
 707  803  {
 708  804          dbuf_dirty_record_t *dr = db->db_last_dirty;
 709  805  
 710  806          ASSERT(MUTEX_HELD(&db->db_mtx));
 711  807          ASSERT(db->db.db_data != NULL);
 712  808          ASSERT(db->db_level == 0);
 713  809          ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT);
 714  810  
 715  811          if (dr == NULL ||
 716  812              (dr->dt.dl.dr_data !=
↓ open down ↓ 15 lines elided ↑ open up ↑
 732  828                  bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN);
 733  829          } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
 734  830                  int size = db->db.db_size;
 735  831                  arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
 736  832                  spa_t *spa;
 737  833  
 738  834                  DB_GET_SPA(&spa, db);
 739  835                  dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type);
 740  836                  bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
 741  837          } else {
 742      -                dbuf_set_data(db, NULL);
      838 +                dbuf_clear_data(db, evict_list_p);
 743  839          }
 744  840  }
 745  841  
 746  842  void
 747  843  dbuf_unoverride(dbuf_dirty_record_t *dr)
 748  844  {
 749  845          dmu_buf_impl_t *db = dr->dr_dbuf;
 750  846          blkptr_t *bp = &dr->dt.dl.dr_overridden_by;
 751  847          uint64_t txg = dr->dr_txg;
 752  848  
↓ open down ↓ 36 lines elided ↑ open up ↑
 789  885   * they stay in memory.
 790  886   */
 791  887  void
 792  888  dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
 793  889  {
 794  890          dmu_buf_impl_t *db, *db_next;
 795  891          uint64_t txg = tx->tx_txg;
 796  892          int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 797  893          uint64_t first_l1 = start >> epbs;
 798  894          uint64_t last_l1 = end >> epbs;
      895 +        list_t evict_list;
      896 +
      897 +        dmu_buf_create_user_evict_list(&evict_list);
 799  898  
 800  899          if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID)) {
 801  900                  end = dn->dn_maxblkid;
 802  901                  last_l1 = end >> epbs;
 803  902          }
 804  903          dprintf_dnode(dn, "start=%llu end=%llu\n", start, end);
 805  904          mutex_enter(&dn->dn_dbufs_mtx);
 806  905          for (db = list_head(&dn->dn_dbufs); db; db = db_next) {
 807  906                  db_next = list_next(&dn->dn_dbufs, db);
 808  907                  ASSERT(db->db_blkid != DMU_BONUS_BLKID);
↓ open down ↓ 33 lines elided ↑ open up ↑
 842  941                          continue;
 843  942                  }
 844  943                  if (db->db_state == DB_READ || db->db_state == DB_FILL) {
 845  944                          /* will be handled in dbuf_read_done or dbuf_rele */
 846  945                          db->db_freed_in_flight = TRUE;
 847  946                          mutex_exit(&db->db_mtx);
 848  947                          continue;
 849  948                  }
 850  949                  if (refcount_count(&db->db_holds) == 0) {
 851  950                          ASSERT(db->db_buf);
 852      -                        dbuf_clear(db);
      951 +                        dbuf_clear(db, &evict_list);
 853  952                          continue;
 854  953                  }
 855  954                  /* The dbuf is referenced */
 856  955  
 857  956                  if (db->db_last_dirty != NULL) {
 858  957                          dbuf_dirty_record_t *dr = db->db_last_dirty;
 859  958  
 860  959                          if (dr->dr_txg == txg) {
 861  960                                  /*
 862  961                                   * This buffer is "in-use", re-adjust the file
↓ open down ↓ 4 lines elided ↑ open up ↑
 867  966                                      db->db_blkid > dn->dn_maxblkid)
 868  967                                          dn->dn_maxblkid = db->db_blkid;
 869  968                                  dbuf_unoverride(dr);
 870  969                          } else {
 871  970                                  /*
 872  971                                   * This dbuf is not dirty in the open context.
 873  972                                   * Either uncache it (if its not referenced in
 874  973                                   * the open context) or reset its contents to
 875  974                                   * empty.
 876  975                                   */
 877      -                                dbuf_fix_old_data(db, txg);
      976 +                                dbuf_fix_old_data(db, txg, &evict_list);
 878  977                          }
 879  978                  }
 880  979                  /* clear the contents if its cached */
 881  980                  if (db->db_state == DB_CACHED) {
 882  981                          ASSERT(db->db.db_data != NULL);
 883  982                          arc_release(db->db_buf, db);
 884  983                          bzero(db->db.db_data, db->db.db_size);
 885  984                          arc_buf_freeze(db->db_buf);
 886  985                  }
 887  986  
 888  987                  mutex_exit(&db->db_mtx);
      988 +                dmu_buf_process_user_evicts(&evict_list);
 889  989          }
 890  990          mutex_exit(&dn->dn_dbufs_mtx);
      991 +        dmu_buf_destroy_user_evict_list(&evict_list);
 891  992  }
 892  993  
 893  994  static int
 894  995  dbuf_block_freeable(dmu_buf_impl_t *db)
 895  996  {
 896  997          dsl_dataset_t *ds = db->db_objset->os_dsl_dataset;
 897  998          uint64_t birth_txg = 0;
 898  999  
 899 1000          /*
 900 1001           * We don't need any locking to protect db_blkptr:
↓ open down ↓ 88 lines elided ↑ open up ↑
 989 1090  
 990 1091  dbuf_dirty_record_t *
 991 1092  dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
 992 1093  {
 993 1094          dnode_t *dn;
 994 1095          objset_t *os;
 995 1096          dbuf_dirty_record_t **drp, *dr;
 996 1097          int drop_struct_lock = FALSE;
 997 1098          boolean_t do_free_accounting = B_FALSE;
 998 1099          int txgoff = tx->tx_txg & TXG_MASK;
     1100 +        list_t evict_list;
     1101 +
     1102 +        dmu_buf_create_user_evict_list(&evict_list);
 999 1103  
1000 1104          ASSERT(tx->tx_txg != 0);
1001 1105          ASSERT(!refcount_is_zero(&db->db_holds));
1002 1106          DMU_TX_DIRTY_BUF(tx, db);
1003 1107  
1004 1108          DB_DNODE_ENTER(db);
1005 1109          dn = DB_DNODE(db);
1006 1110          /*
1007 1111           * Shouldn't dirty a regular buffer in syncing context.  Private
1008 1112           * objects may be dirtied in syncing context, but only if they
↓ open down ↓ 54 lines elided ↑ open up ↑
1063 1167                          /*
1064 1168                           * If this buffer has already been written out,
1065 1169                           * we now need to reset its state.
1066 1170                           */
1067 1171                          dbuf_unoverride(dr);
1068 1172                          if (db->db.db_object != DMU_META_DNODE_OBJECT &&
1069 1173                              db->db_state != DB_NOFILL)
1070 1174                                  arc_buf_thaw(db->db_buf);
1071 1175                  }
1072 1176                  mutex_exit(&db->db_mtx);
     1177 +                dmu_buf_destroy_user_evict_list(&evict_list);
1073 1178                  return (dr);
1074 1179          }
1075 1180  
1076 1181          /*
1077 1182           * Only valid if not already dirty.
1078 1183           */
1079 1184          ASSERT(dn->dn_object == 0 ||
1080 1185              dn->dn_dirtyctx == DN_UNDIRTIED || dn->dn_dirtyctx ==
1081 1186              (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
1082 1187  
↓ open down ↓ 34 lines elided ↑ open up ↑
1117 1222           * If this buffer is dirty in an old transaction group we need
1118 1223           * to make a copy of it so that the changes we make in this
1119 1224           * transaction group won't leak out when we sync the older txg.
1120 1225           */
1121 1226          dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP);
1122 1227          if (db->db_level == 0) {
1123 1228                  void *data_old = db->db_buf;
1124 1229  
1125 1230                  if (db->db_state != DB_NOFILL) {
1126 1231                          if (db->db_blkid == DMU_BONUS_BLKID) {
1127      -                                dbuf_fix_old_data(db, tx->tx_txg);
     1232 +                                dbuf_fix_old_data(db, tx->tx_txg, &evict_list);
1128 1233                                  data_old = db->db.db_data;
1129 1234                          } else if (db->db.db_object != DMU_META_DNODE_OBJECT) {
1130 1235                                  /*
1131 1236                                   * Release the data buffer from the cache so
1132 1237                                   * that we can modify it without impacting
1133 1238                                   * possible other users of this cached data
1134 1239                                   * block.  Note that indirect blocks and
1135 1240                                   * private objects are not released until the
1136 1241                                   * syncing state (since they are only modified
1137 1242                                   * then).
1138 1243                                   */
1139 1244                                  arc_release(db->db_buf, db);
1140      -                                dbuf_fix_old_data(db, tx->tx_txg);
     1245 +                                dbuf_fix_old_data(db, tx->tx_txg, &evict_list);
1141 1246                                  data_old = db->db_buf;
1142 1247                          }
1143 1248                          ASSERT(data_old != NULL);
1144 1249                  }
1145 1250                  dr->dt.dl.dr_data = data_old;
1146 1251          } else {
1147 1252                  mutex_init(&dr->dt.di.dr_mtx, NULL, MUTEX_DEFAULT, NULL);
1148 1253                  list_create(&dr->dt.di.dr_children,
1149 1254                      sizeof (dbuf_dirty_record_t),
1150 1255                      offsetof(dbuf_dirty_record_t, dr_dirty_node));
↓ open down ↓ 17 lines elided ↑ open up ↑
1168 1273          }
1169 1274  
1170 1275          /*
1171 1276           * This buffer is now part of this txg
1172 1277           */
1173 1278          dbuf_add_ref(db, (void *)(uintptr_t)tx->tx_txg);
1174 1279          db->db_dirtycnt += 1;
1175 1280          ASSERT3U(db->db_dirtycnt, <=, 3);
1176 1281  
1177 1282          mutex_exit(&db->db_mtx);
     1283 +        dmu_buf_destroy_user_evict_list(&evict_list);
1178 1284  
1179 1285          if (db->db_blkid == DMU_BONUS_BLKID ||
1180 1286              db->db_blkid == DMU_SPILL_BLKID) {
1181 1287                  mutex_enter(&dn->dn_mtx);
1182 1288                  ASSERT(!list_link_active(&dr->dr_dirty_node));
1183 1289                  list_insert_tail(&dn->dn_dirty_records[txgoff], dr);
1184 1290                  mutex_exit(&dn->dn_mtx);
1185 1291                  dnode_setdirty(dn, tx);
1186 1292                  DB_DNODE_EXIT(db);
1187 1293                  return (dr);
↓ open down ↓ 74 lines elided ↑ open up ↑
1262 1368  
1263 1369  /*
1264 1370   * Return TRUE if this evicted the dbuf.
1265 1371   */
1266 1372  static boolean_t
1267 1373  dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
1268 1374  {
1269 1375          dnode_t *dn;
1270 1376          uint64_t txg = tx->tx_txg;
1271 1377          dbuf_dirty_record_t *dr, **drp;
     1378 +        list_t evict_list;
1272 1379  
1273 1380          ASSERT(txg != 0);
1274 1381          ASSERT(db->db_blkid != DMU_BONUS_BLKID);
1275 1382          ASSERT0(db->db_level);
1276 1383          ASSERT(MUTEX_HELD(&db->db_mtx));
1277 1384  
1278 1385          /*
1279 1386           * If this buffer is not dirty, we're done.
1280 1387           */
1281 1388          for (drp = &db->db_last_dirty; (dr = *drp) != NULL; drp = &dr->dr_next)
1282 1389                  if (dr->dr_txg <= txg)
1283 1390                          break;
1284 1391          if (dr == NULL || dr->dr_txg < txg)
1285 1392                  return (B_FALSE);
1286 1393          ASSERT(dr->dr_txg == txg);
1287 1394          ASSERT(dr->dr_dbuf == db);
1288 1395  
     1396 +        dmu_buf_create_user_evict_list(&evict_list);
     1397 +
1289 1398          DB_DNODE_ENTER(db);
1290 1399          dn = DB_DNODE(db);
1291 1400  
1292 1401          /*
1293 1402           * Note:  This code will probably work even if there are concurrent
1294 1403           * holders, but it is untested in that scenerio, as the ZPL and
1295 1404           * ztest have additional locking (the range locks) that prevents
1296 1405           * that type of concurrent access.
1297 1406           */
1298 1407          ASSERT3U(refcount_count(&db->db_holds), ==, db->db_dirtycnt);
↓ open down ↓ 35 lines elided ↑ open up ↑
1334 1443          }
1335 1444          kmem_free(dr, sizeof (dbuf_dirty_record_t));
1336 1445  
1337 1446          ASSERT(db->db_dirtycnt > 0);
1338 1447          db->db_dirtycnt -= 1;
1339 1448  
1340 1449          if (refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) {
1341 1450                  arc_buf_t *buf = db->db_buf;
1342 1451  
1343 1452                  ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
1344      -                dbuf_set_data(db, NULL);
     1453 +                dbuf_clear_data(db, &evict_list);
1345 1454                  VERIFY(arc_buf_remove_ref(buf, db));
1346      -                dbuf_evict(db);
     1455 +                dbuf_evict(db, &evict_list);
     1456 +                dmu_buf_destroy_user_evict_list(&evict_list);
1347 1457                  return (B_TRUE);
1348 1458          }
1349 1459  
     1460 +        dmu_buf_destroy_user_evict_list(&evict_list);
1350 1461          return (B_FALSE);
1351 1462  }
1352 1463  
1353 1464  #pragma weak dmu_buf_will_dirty = dbuf_will_dirty
1354 1465  void
1355 1466  dbuf_will_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
1356 1467  {
1357 1468          int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
1358 1469  
1359 1470          ASSERT(tx->tx_txg != 0);
↓ open down ↓ 126 lines elided ↑ open up ↑
1486 1597   * entry in the dn_dbufs list.  We have to wait until dbuf_destroy()
1487 1598   * in this case.  For callers from the DMU we will usually see:
1488 1599   *      dbuf_clear()->arc_buf_evict()->dbuf_do_evict()->dbuf_destroy()
1489 1600   * For the arc callback, we will usually see:
1490 1601   *      dbuf_do_evict()->dbuf_clear();dbuf_destroy()
1491 1602   * Sometimes, though, we will get a mix of these two:
1492 1603   *      DMU: dbuf_clear()->arc_buf_evict()
1493 1604   *      ARC: dbuf_do_evict()->dbuf_destroy()
1494 1605   */
1495 1606  void
1496      -dbuf_clear(dmu_buf_impl_t *db)
     1607 +dbuf_clear(dmu_buf_impl_t *db, list_t *evict_list_p)
1497 1608  {
1498 1609          dnode_t *dn;
1499 1610          dmu_buf_impl_t *parent = db->db_parent;
1500 1611          dmu_buf_impl_t *dndb;
1501 1612          int dbuf_gone = FALSE;
1502 1613  
1503 1614          ASSERT(MUTEX_HELD(&db->db_mtx));
1504 1615          ASSERT(refcount_is_zero(&db->db_holds));
1505 1616  
1506      -        dbuf_evict_user(db);
     1617 +        dbuf_evict_user(db, evict_list_p);
1507 1618  
1508 1619          if (db->db_state == DB_CACHED) {
1509 1620                  ASSERT(db->db.db_data != NULL);
1510 1621                  if (db->db_blkid == DMU_BONUS_BLKID) {
1511 1622                          zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN);
1512 1623                          arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
1513 1624                  }
1514 1625                  db->db.db_data = NULL;
1515 1626                  db->db_state = DB_UNCACHED;
1516 1627          }
↓ open down ↓ 122 lines elided ↑ open up ↑
1639 1750          db->db_objset = os;
1640 1751          db->db.db_object = dn->dn_object;
1641 1752          db->db_level = level;
1642 1753          db->db_blkid = blkid;
1643 1754          db->db_last_dirty = NULL;
1644 1755          db->db_dirtycnt = 0;
1645 1756          db->db_dnode_handle = dn->dn_handle;
1646 1757          db->db_parent = parent;
1647 1758          db->db_blkptr = blkptr;
1648 1759  
1649      -        db->db_user_ptr = NULL;
1650      -        db->db_user_data_ptr_ptr = NULL;
1651      -        db->db_evict_func = NULL;
     1760 +        db->db_user = NULL;
1652 1761          db->db_immediate_evict = 0;
1653 1762          db->db_freed_in_flight = 0;
1654 1763  
1655 1764          if (blkid == DMU_BONUS_BLKID) {
1656 1765                  ASSERT3P(parent, ==, dn->dn_dbuf);
1657 1766                  db->db.db_size = DN_MAX_BONUSLEN -
1658 1767                      (dn->dn_nblkptr-1) * sizeof (blkptr_t);
1659 1768                  ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
1660 1769                  db->db.db_offset = DMU_BONUS_BLKID;
1661 1770                  db->db_state = DB_UNCACHED;
↓ open down ↓ 42 lines elided ↑ open up ↑
1704 1813          dprintf_dbuf(db, "db=%p\n", db);
1705 1814  
1706 1815          return (db);
1707 1816  }
1708 1817  
1709 1818  static int
1710 1819  dbuf_do_evict(void *private)
1711 1820  {
1712 1821          arc_buf_t *buf = private;
1713 1822          dmu_buf_impl_t *db = buf->b_private;
     1823 +        list_t evict_list;
     1824 +
     1825 +        dmu_buf_create_user_evict_list(&evict_list);
1714 1826  
1715 1827          if (!MUTEX_HELD(&db->db_mtx))
1716 1828                  mutex_enter(&db->db_mtx);
1717 1829  
1718 1830          ASSERT(refcount_is_zero(&db->db_holds));
1719 1831  
1720 1832          if (db->db_state != DB_EVICTING) {
1721 1833                  ASSERT(db->db_state == DB_CACHED);
1722 1834                  DBUF_VERIFY(db);
1723 1835                  db->db_buf = NULL;
1724      -                dbuf_evict(db);
     1836 +                dbuf_evict(db, &evict_list);
1725 1837          } else {
1726 1838                  mutex_exit(&db->db_mtx);
1727 1839                  dbuf_destroy(db);
1728 1840          }
     1841 +        dmu_buf_destroy_user_evict_list(&evict_list);
1729 1842          return (0);
1730 1843  }
1731 1844  
1732 1845  static void
1733 1846  dbuf_destroy(dmu_buf_impl_t *db)
1734 1847  {
1735 1848          ASSERT(refcount_is_zero(&db->db_holds));
1736 1849  
1737 1850          if (db->db_blkid != DMU_BONUS_BLKID) {
1738 1851                  /*
↓ open down ↓ 80 lines elided ↑ open up ↑
1819 1932  
1820 1933  /*
1821 1934   * Returns with db_holds incremented, and db_mtx not held.
1822 1935   * Note: dn_struct_rwlock must be held.
1823 1936   */
1824 1937  int
1825 1938  dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
1826 1939      void *tag, dmu_buf_impl_t **dbp)
1827 1940  {
1828 1941          dmu_buf_impl_t *db, *parent = NULL;
     1942 +        list_t evict_list;
1829 1943  
1830 1944          ASSERT(blkid != DMU_BONUS_BLKID);
1831 1945          ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
1832 1946          ASSERT3U(dn->dn_nlevels, >, level);
1833 1947  
     1948 +        dmu_buf_create_user_evict_list(&evict_list);
     1949 +
1834 1950          *dbp = NULL;
1835 1951  top:
1836 1952          /* dbuf_find() returns with db_mtx held */
1837 1953          db = dbuf_find(dn, level, blkid);
1838 1954  
1839 1955          if (db == NULL) {
1840 1956                  blkptr_t *bp = NULL;
1841 1957                  int err;
1842 1958  
1843 1959                  ASSERT3P(parent, ==, NULL);
↓ open down ↓ 8 lines elided ↑ open up ↑
1852 1968                          }
1853 1969                  }
1854 1970                  if (err && err != ENOENT)
1855 1971                          return (err);
1856 1972                  db = dbuf_create(dn, level, blkid, parent, bp);
1857 1973          }
1858 1974  
1859 1975          if (db->db_buf && refcount_is_zero(&db->db_holds)) {
1860 1976                  arc_buf_add_ref(db->db_buf, db);
1861 1977                  if (db->db_buf->b_data == NULL) {
1862      -                        dbuf_clear(db);
     1978 +                        dbuf_clear(db, &evict_list);
1863 1979                          if (parent) {
1864 1980                                  dbuf_rele(parent, NULL);
1865 1981                                  parent = NULL;
1866 1982                          }
1867 1983                          goto top;
1868 1984                  }
1869 1985                  ASSERT3P(db->db.db_data, ==, db->db_buf->b_data);
1870 1986          }
1871 1987  
1872 1988          ASSERT(db->db_buf == NULL || arc_referenced(db->db_buf));
↓ open down ↓ 13 lines elided ↑ open up ↑
1886 2002  
1887 2003                          dbuf_set_data(db,
1888 2004                              arc_buf_alloc(dn->dn_objset->os_spa,
1889 2005                              db->db.db_size, db, type));
1890 2006                          bcopy(dr->dt.dl.dr_data->b_data, db->db.db_data,
1891 2007                              db->db.db_size);
1892 2008                  }
1893 2009          }
1894 2010  
1895 2011          (void) refcount_add(&db->db_holds, tag);
1896      -        dbuf_update_data(db);
1897 2012          DBUF_VERIFY(db);
1898 2013          mutex_exit(&db->db_mtx);
1899 2014  
     2015 +        dmu_buf_destroy_user_evict_list(&evict_list);
     2016 +
1900 2017          /* NOTE: we can't rele the parent until after we drop the db_mtx */
1901 2018          if (parent)
1902 2019                  dbuf_rele(parent, NULL);
1903 2020  
1904 2021          ASSERT3P(DB_DNODE(db), ==, dn);
1905 2022          ASSERT3U(db->db_blkid, ==, blkid);
1906 2023          ASSERT3U(db->db_level, ==, level);
1907 2024          *dbp = db;
1908 2025  
1909 2026          return (0);
↓ open down ↓ 79 lines elided ↑ open up ↑
1989 2106  }
1990 2107  
1991 2108  /*
1992 2109   * dbuf_rele() for an already-locked dbuf.  This is necessary to allow
1993 2110   * db_dirtycnt and db_holds to be updated atomically.
1994 2111   */
1995 2112  void
1996 2113  dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
1997 2114  {
1998 2115          int64_t holds;
     2116 +        list_t evict_list;
1999 2117  
2000 2118          ASSERT(MUTEX_HELD(&db->db_mtx));
2001 2119          DBUF_VERIFY(db);
2002 2120  
     2121 +        dmu_buf_create_user_evict_list(&evict_list);
     2122 +
2003 2123          /*
2004 2124           * Remove the reference to the dbuf before removing its hold on the
2005 2125           * dnode so we can guarantee in dnode_move() that a referenced bonus
2006 2126           * buffer has a corresponding dnode hold.
2007 2127           */
2008 2128          holds = refcount_remove(&db->db_holds, tag);
2009 2129          ASSERT(holds >= 0);
2010 2130  
2011 2131          /*
2012 2132           * We can't freeze indirects if there is a possibility that they
2013 2133           * may be modified in the current syncing context.
2014 2134           */
2015 2135          if (db->db_buf && holds == (db->db_level == 0 ? db->db_dirtycnt : 0))
2016 2136                  arc_buf_freeze(db->db_buf);
2017 2137  
2018 2138          if (holds == db->db_dirtycnt &&
2019 2139              db->db_level == 0 && db->db_immediate_evict)
2020      -                dbuf_evict_user(db);
     2140 +                dbuf_evict_user(db, &evict_list);
2021 2141  
2022 2142          if (holds == 0) {
2023 2143                  if (db->db_blkid == DMU_BONUS_BLKID) {
2024 2144                          mutex_exit(&db->db_mtx);
2025 2145  
2026 2146                          /*
2027 2147                           * If the dnode moves here, we cannot cross this barrier
2028 2148                           * until the move completes.
2029 2149                           */
2030 2150                          DB_DNODE_ENTER(db);
↓ open down ↓ 5 lines elided ↑ open up ↑
2036 2156                           * the dnode_rele().
2037 2157                           */
2038 2158                          dnode_rele(DB_DNODE(db), db);
2039 2159                  } else if (db->db_buf == NULL) {
2040 2160                          /*
2041 2161                           * This is a special case: we never associated this
2042 2162                           * dbuf with any data allocated from the ARC.
2043 2163                           */
2044 2164                          ASSERT(db->db_state == DB_UNCACHED ||
2045 2165                              db->db_state == DB_NOFILL);
2046      -                        dbuf_evict(db);
     2166 +                        dbuf_evict(db, &evict_list);
2047 2167                  } else if (arc_released(db->db_buf)) {
2048 2168                          arc_buf_t *buf = db->db_buf;
2049 2169                          /*
2050 2170                           * This dbuf has anonymous data associated with it.
2051 2171                           */
2052      -                        dbuf_set_data(db, NULL);
     2172 +                        dbuf_clear_data(db, &evict_list);
2053 2173                          VERIFY(arc_buf_remove_ref(buf, db));
2054      -                        dbuf_evict(db);
     2174 +                        dbuf_evict(db, &evict_list);
2055 2175                  } else {
2056 2176                          VERIFY(!arc_buf_remove_ref(db->db_buf, db));
2057 2177  
2058 2178                          /*
2059 2179                           * A dbuf will be eligible for eviction if either the
2060 2180                           * 'primarycache' property is set or a duplicate
2061 2181                           * copy of this buffer is already cached in the arc.
2062 2182                           *
2063 2183                           * In the case of the 'primarycache' a buffer
2064 2184                           * is considered for eviction if it matches the
2065 2185                           * criteria set in the property.
2066 2186                           *
2067 2187                           * To decide if our buffer is considered a
2068 2188                           * duplicate, we must call into the arc to determine
2069 2189                           * if multiple buffers are referencing the same
2070 2190                           * block on-disk. If so, then we simply evict
2071 2191                           * ourselves.
2072 2192                           */
2073 2193                          if (!DBUF_IS_CACHEABLE(db) ||
2074 2194                              arc_buf_eviction_needed(db->db_buf))
2075      -                                dbuf_clear(db);
     2195 +                                dbuf_clear(db, &evict_list);
2076 2196                          else
2077 2197                                  mutex_exit(&db->db_mtx);
2078 2198                  }
2079 2199          } else {
2080 2200                  mutex_exit(&db->db_mtx);
2081 2201          }
     2202 +        dmu_buf_destroy_user_evict_list(&evict_list);
2082 2203  }
2083 2204  
2084 2205  #pragma weak dmu_buf_refcount = dbuf_refcount
2085 2206  uint64_t
2086 2207  dbuf_refcount(dmu_buf_impl_t *db)
2087 2208  {
2088 2209          return (refcount_count(&db->db_holds));
2089 2210  }
2090 2211  
2091      -void *
2092      -dmu_buf_set_user(dmu_buf_t *db_fake, void *user_ptr, void *user_data_ptr_ptr,
2093      -    dmu_buf_evict_func_t *evict_func)
2094      -{
2095      -        return (dmu_buf_update_user(db_fake, NULL, user_ptr,
2096      -            user_data_ptr_ptr, evict_func));
2097      -}
2098      -
2099      -void *
2100      -dmu_buf_set_user_ie(dmu_buf_t *db_fake, void *user_ptr, void *user_data_ptr_ptr,
2101      -    dmu_buf_evict_func_t *evict_func)
2102      -{
2103      -        dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
2104      -
2105      -        db->db_immediate_evict = TRUE;
2106      -        return (dmu_buf_update_user(db_fake, NULL, user_ptr,
2107      -            user_data_ptr_ptr, evict_func));
2108      -}
2109      -
2110      -void *
2111      -dmu_buf_update_user(dmu_buf_t *db_fake, void *old_user_ptr, void *user_ptr,
2112      -    void *user_data_ptr_ptr, dmu_buf_evict_func_t *evict_func)
2113      -{
2114      -        dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
2115      -        ASSERT(db->db_level == 0);
2116      -
2117      -        ASSERT((user_ptr == NULL) == (evict_func == NULL));
2118      -
2119      -        mutex_enter(&db->db_mtx);
2120      -
2121      -        if (db->db_user_ptr == old_user_ptr) {
2122      -                db->db_user_ptr = user_ptr;
2123      -                db->db_user_data_ptr_ptr = user_data_ptr_ptr;
2124      -                db->db_evict_func = evict_func;
2125      -
2126      -                dbuf_update_data(db);
2127      -        } else {
2128      -                old_user_ptr = db->db_user_ptr;
2129      -        }
2130      -
2131      -        mutex_exit(&db->db_mtx);
2132      -        return (old_user_ptr);
2133      -}
2134      -
2135      -void *
2136      -dmu_buf_get_user(dmu_buf_t *db_fake)
2137      -{
2138      -        dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
2139      -        ASSERT(!refcount_is_zero(&db->db_holds));
2140      -
2141      -        return (db->db_user_ptr);
2142      -}
2143      -
2144 2212  boolean_t
2145 2213  dmu_buf_freeable(dmu_buf_t *dbuf)
2146 2214  {
2147 2215          boolean_t res = B_FALSE;
2148 2216          dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
2149 2217  
2150 2218          if (db->db_blkptr)
2151 2219                  res = dsl_dataset_block_freeable(db->db_objset->os_dsl_dataset,
2152 2220                      db->db_blkptr, db->db_blkptr->blk_birth);
2153 2221  
↓ open down ↓ 560 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX