Print this page
3752 want more verifiable dbuf user eviction
Submitted by:   Justin Gibbs <justing@spectralogic.com>
Submitted by:   Will Andrews <willa@spectralogic.com>


 441         ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
 442 
 443         mutex_enter(&os->os_lock);
 444         POINTER_INVALIDATE(&dn->dn_objset);
 445         list_remove(&os->os_dnodes, dn);
 446         mutex_exit(&os->os_lock);
 447 
 448         /* the dnode can no longer move, so we can release the handle */
 449         zrl_remove(&dn->dn_handle->dnh_zrlock);
 450 
 451         dn->dn_allocated_txg = 0;
 452         dn->dn_free_txg = 0;
 453         dn->dn_assigned_txg = 0;
 454 
 455         dn->dn_dirtyctx = 0;
 456         if (dn->dn_dirtyctx_firstset != NULL) {
 457                 kmem_free(dn->dn_dirtyctx_firstset, 1);
 458                 dn->dn_dirtyctx_firstset = NULL;
 459         }
 460         if (dn->dn_bonus != NULL) {



 461                 mutex_enter(&dn->dn_bonus->db_mtx);
 462                 dbuf_evict(dn->dn_bonus);

 463                 dn->dn_bonus = NULL;
 464         }
 465         dn->dn_zio = NULL;
 466 
 467         dn->dn_have_spill = B_FALSE;
 468         dn->dn_oldused = 0;
 469         dn->dn_oldflags = 0;
 470         dn->dn_olduid = 0;
 471         dn->dn_oldgid = 0;
 472         dn->dn_newuid = 0;
 473         dn->dn_newgid = 0;
 474         dn->dn_id_flags = 0;
 475 
 476         dmu_zfetch_rele(&dn->dn_zfetch);
 477         kmem_cache_free(dnode_cache, dn);
 478         arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
 479 }
 480 
 481 void
 482 dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,


 940         while (refcount_count(&dn->dn_holds) > 0)
 941                 delay(1);
 942         zrl_add(&dnh->dnh_zrlock);
 943         dnode_destroy(dn); /* implicit zrl_remove() */
 944         zrl_destroy(&dnh->dnh_zrlock);
 945         dnh->dnh_dnode = NULL;
 946 }
 947 
 948 dnode_t *
 949 dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
 950     dnode_handle_t *dnh)
 951 {
 952         dnode_t *dn = dnode_create(os, dnp, NULL, object, dnh);
 953         dnh->dnh_dnode = dn;
 954         zrl_init(&dnh->dnh_zrlock);
 955         DNODE_VERIFY(dn);
 956         return (dn);
 957 }
 958 
 959 static void
 960 dnode_buf_pageout(dmu_buf_t *db, void *arg)
 961 {
 962         dnode_children_t *children_dnodes = arg;
 963         int i;
 964         int epb = db->db_size >> DNODE_SHIFT;
 965 
 966         ASSERT(epb == children_dnodes->dnc_count);
 967 
 968         for (i = 0; i < epb; i++) {
 969                 dnode_handle_t *dnh = &children_dnodes->dnc_children[i];
 970                 dnode_t *dn;
 971 
 972                 /*
 973                  * The dnode handle lock guards against the dnode moving to
 974                  * another valid address, so there is no need here to guard
 975                  * against changes to or from NULL.
 976                  */
 977                 if (dnh->dnh_dnode == NULL) {
 978                         zrl_destroy(&dnh->dnh_zrlock);
 979                         continue;
 980                 }
 981 
 982                 zrl_add(&dnh->dnh_zrlock);
 983                 dn = dnh->dnh_dnode;
 984                 /*
 985                  * If there are holds on this dnode, then there should
 986                  * be holds on the dnode's containing dbuf as well; thus
 987                  * it wouldn't be eligible for eviction and this function
 988                  * would not have been called.
 989                  */
 990                 ASSERT(refcount_is_zero(&dn->dn_holds));
 991                 ASSERT(refcount_is_zero(&dn->dn_tx_holds));
 992 
 993                 dnode_destroy(dn); /* implicit zrl_remove() */
 994                 zrl_destroy(&dnh->dnh_zrlock);
 995                 dnh->dnh_dnode = NULL;
 996         }
 997         kmem_free(children_dnodes, sizeof (dnode_children_t) +
 998             (epb - 1) * sizeof (dnode_handle_t));
 999 }
1000 
1001 /*
1002  * errors:
1003  * EINVAL - invalid object number.
1004  * EIO - i/o error.
1005  * succeeds even for free dnodes.
1006  */
1007 int
1008 dnode_hold_impl(objset_t *os, uint64_t object, int flag,
1009     void *tag, dnode_t **dnp)
1010 {
1011         int epb, idx, err;
1012         int drop_struct_lock = FALSE;
1013         int type;
1014         uint64_t blk;
1015         dnode_t *mdn, *dn;
1016         dmu_buf_impl_t *db;
1017         dnode_children_t *children_dnodes;
1018         dnode_handle_t *dnh;


1058 
1059         blk = dbuf_whichblock(mdn, object * sizeof (dnode_phys_t));
1060 
1061         db = dbuf_hold(mdn, blk, FTAG);
1062         if (drop_struct_lock)
1063                 rw_exit(&mdn->dn_struct_rwlock);
1064         if (db == NULL)
1065                 return (SET_ERROR(EIO));
1066         err = dbuf_read(db, NULL, DB_RF_CANFAIL);
1067         if (err) {
1068                 dbuf_rele(db, FTAG);
1069                 return (err);
1070         }
1071 
1072         ASSERT3U(db->db.db_size, >=, 1<<DNODE_SHIFT);
1073         epb = db->db.db_size >> DNODE_SHIFT;
1074 
1075         idx = object & (epb-1);
1076 
1077         ASSERT(DB_DNODE(db)->dn_type == DMU_OT_DNODE);
1078         children_dnodes = dmu_buf_get_user(&db->db);
1079         if (children_dnodes == NULL) {
1080                 int i;
1081                 dnode_children_t *winner;
1082                 children_dnodes = kmem_alloc(sizeof (dnode_children_t) +
1083                     (epb - 1) * sizeof (dnode_handle_t), KM_SLEEP);
1084                 children_dnodes->dnc_count = epb;
1085                 dnh = &children_dnodes->dnc_children[0];
1086                 for (i = 0; i < epb; i++) {
1087                         zrl_init(&dnh[i].dnh_zrlock);
1088                         dnh[i].dnh_dnode = NULL;
1089                 }
1090                 if (winner = dmu_buf_set_user(&db->db, children_dnodes, NULL,
1091                     dnode_buf_pageout)) {



1092                         kmem_free(children_dnodes, sizeof (dnode_children_t) +
1093                             (epb - 1) * sizeof (dnode_handle_t));
1094                         children_dnodes = winner;
1095                 }
1096         }
1097         ASSERT(children_dnodes->dnc_count == epb);
1098 
1099         dnh = &children_dnodes->dnc_children[idx];
1100         zrl_add(&dnh->dnh_zrlock);
1101         if ((dn = dnh->dnh_dnode) == NULL) {
1102                 dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx;
1103                 dnode_t *winner;
1104 
1105                 dn = dnode_create(os, phys, db, object, dnh);
1106                 winner = atomic_cas_ptr(&dnh->dnh_dnode, NULL, dn);
1107                 if (winner != NULL) {
1108                         zrl_add(&dnh->dnh_zrlock);
1109                         dnode_destroy(dn); /* implicit zrl_remove() */
1110                         dn = winner;
1111                 }




 441         ASSERT((dn->dn_id_flags & DN_ID_NEW_EXIST) == 0);
 442 
 443         mutex_enter(&os->os_lock);
 444         POINTER_INVALIDATE(&dn->dn_objset);
 445         list_remove(&os->os_dnodes, dn);
 446         mutex_exit(&os->os_lock);
 447 
 448         /* the dnode can no longer move, so we can release the handle */
 449         zrl_remove(&dn->dn_handle->dnh_zrlock);
 450 
 451         dn->dn_allocated_txg = 0;
 452         dn->dn_free_txg = 0;
 453         dn->dn_assigned_txg = 0;
 454 
 455         dn->dn_dirtyctx = 0;
 456         if (dn->dn_dirtyctx_firstset != NULL) {
 457                 kmem_free(dn->dn_dirtyctx_firstset, 1);
 458                 dn->dn_dirtyctx_firstset = NULL;
 459         }
 460         if (dn->dn_bonus != NULL) {
 461                 list_t evict_list;
 462 
 463                 dmu_buf_create_user_evict_list(&evict_list);
 464                 mutex_enter(&dn->dn_bonus->db_mtx);
 465                 dbuf_evict(dn->dn_bonus, &evict_list);
 466                 dmu_buf_destroy_user_evict_list(&evict_list);
 467                 dn->dn_bonus = NULL;
 468         }
 469         dn->dn_zio = NULL;
 470 
 471         dn->dn_have_spill = B_FALSE;
 472         dn->dn_oldused = 0;
 473         dn->dn_oldflags = 0;
 474         dn->dn_olduid = 0;
 475         dn->dn_oldgid = 0;
 476         dn->dn_newuid = 0;
 477         dn->dn_newgid = 0;
 478         dn->dn_id_flags = 0;
 479 
 480         dmu_zfetch_rele(&dn->dn_zfetch);
 481         kmem_cache_free(dnode_cache, dn);
 482         arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
 483 }
 484 
 485 void
 486 dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,


 944         while (refcount_count(&dn->dn_holds) > 0)
 945                 delay(1);
 946         zrl_add(&dnh->dnh_zrlock);
 947         dnode_destroy(dn); /* implicit zrl_remove() */
 948         zrl_destroy(&dnh->dnh_zrlock);
 949         dnh->dnh_dnode = NULL;
 950 }
 951 
 952 dnode_t *
 953 dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
 954     dnode_handle_t *dnh)
 955 {
 956         dnode_t *dn = dnode_create(os, dnp, NULL, object, dnh);
 957         dnh->dnh_dnode = dn;
 958         zrl_init(&dnh->dnh_zrlock);
 959         DNODE_VERIFY(dn);
 960         return (dn);
 961 }
 962 
 963 static void
 964 dnode_buf_pageout(dmu_buf_user_t *dbu)
 965 {
 966         dnode_children_t *children_dnodes = (dnode_children_t *)dbu;
 967         int i;



 968 
 969         for (i = 0; i < children_dnodes->dnc_count; i++) {
 970                 dnode_handle_t *dnh = &children_dnodes->dnc_children[i];
 971                 dnode_t *dn;
 972 
 973                 /*
 974                  * The dnode handle lock guards against the dnode moving to
 975                  * another valid address, so there is no need here to guard
 976                  * against changes to or from NULL.
 977                  */
 978                 if (dnh->dnh_dnode == NULL) {
 979                         zrl_destroy(&dnh->dnh_zrlock);
 980                         continue;
 981                 }
 982 
 983                 zrl_add(&dnh->dnh_zrlock);
 984                 dn = dnh->dnh_dnode;
 985                 /*
 986                  * If there are holds on this dnode, then there should
 987                  * be holds on the dnode's containing dbuf as well; thus
 988                  * it wouldn't be eligible for eviction and this function
 989                  * would not have been called.
 990                  */
 991                 ASSERT(refcount_is_zero(&dn->dn_holds));
 992                 ASSERT(refcount_is_zero(&dn->dn_tx_holds));
 993 
 994                 dnode_destroy(dn); /* implicit zrl_remove() */
 995                 zrl_destroy(&dnh->dnh_zrlock);
 996                 dnh->dnh_dnode = NULL;
 997         }
 998         kmem_free(children_dnodes, sizeof (dnode_children_t) +
 999             (children_dnodes->dnc_count - 1) * sizeof (dnode_handle_t));
1000 }
1001 
1002 /*
1003  * errors:
1004  * EINVAL - invalid object number.
1005  * EIO - i/o error.
1006  * succeeds even for free dnodes.
1007  */
1008 int
1009 dnode_hold_impl(objset_t *os, uint64_t object, int flag,
1010     void *tag, dnode_t **dnp)
1011 {
1012         int epb, idx, err;
1013         int drop_struct_lock = FALSE;
1014         int type;
1015         uint64_t blk;
1016         dnode_t *mdn, *dn;
1017         dmu_buf_impl_t *db;
1018         dnode_children_t *children_dnodes;
1019         dnode_handle_t *dnh;


1059 
1060         blk = dbuf_whichblock(mdn, object * sizeof (dnode_phys_t));
1061 
1062         db = dbuf_hold(mdn, blk, FTAG);
1063         if (drop_struct_lock)
1064                 rw_exit(&mdn->dn_struct_rwlock);
1065         if (db == NULL)
1066                 return (SET_ERROR(EIO));
1067         err = dbuf_read(db, NULL, DB_RF_CANFAIL);
1068         if (err) {
1069                 dbuf_rele(db, FTAG);
1070                 return (err);
1071         }
1072 
1073         ASSERT3U(db->db.db_size, >=, 1<<DNODE_SHIFT);
1074         epb = db->db.db_size >> DNODE_SHIFT;
1075 
1076         idx = object & (epb-1);
1077 
1078         ASSERT(DB_DNODE(db)->dn_type == DMU_OT_DNODE);
1079         children_dnodes = (dnode_children_t *)dmu_buf_get_user(&db->db);
1080         if (children_dnodes == NULL) {
1081                 int i;
1082                 dnode_children_t *winner;
1083                 children_dnodes = kmem_alloc(sizeof (dnode_children_t) +
1084                     (epb - 1) * sizeof (dnode_handle_t), KM_SLEEP);
1085                 children_dnodes->dnc_count = epb;
1086                 dnh = &children_dnodes->dnc_children[0];
1087                 for (i = 0; i < epb; i++) {
1088                         zrl_init(&dnh[i].dnh_zrlock);
1089                         dnh[i].dnh_dnode = NULL;
1090                 }
1091                 dmu_buf_init_user(&children_dnodes->db_evict,
1092                     dnode_buf_pageout);
1093                 winner = (dnode_children_t *)
1094                     dmu_buf_set_user(&db->db, &children_dnodes->db_evict);
1095                 if (winner) {
1096                         kmem_free(children_dnodes, sizeof (dnode_children_t) +
1097                             (epb - 1) * sizeof (dnode_handle_t));
1098                         children_dnodes = winner;
1099                 }
1100         }
1101         ASSERT(children_dnodes->dnc_count == epb);
1102 
1103         dnh = &children_dnodes->dnc_children[idx];
1104         zrl_add(&dnh->dnh_zrlock);
1105         if ((dn = dnh->dnh_dnode) == NULL) {
1106                 dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx;
1107                 dnode_t *winner;
1108 
1109                 dn = dnode_create(os, phys, db, object, dnh);
1110                 winner = atomic_cas_ptr(&dnh->dnh_dnode, NULL, dn);
1111                 if (winner != NULL) {
1112                         zrl_add(&dnh->dnh_zrlock);
1113                         dnode_destroy(dn); /* implicit zrl_remove() */
1114                         dn = winner;
1115                 }