Print this page
4047 panic from dbuf_free_range() from dmu_free_object() while doing zfs receive
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>


  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 2013 by Delphix. All rights reserved.
  25  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  26  */
  27 
  28 #include <sys/zfs_context.h>
  29 #include <sys/dmu.h>
  30 #include <sys/dmu_send.h>
  31 #include <sys/dmu_impl.h>
  32 #include <sys/dbuf.h>
  33 #include <sys/dmu_objset.h>
  34 #include <sys/dsl_dataset.h>
  35 #include <sys/dsl_dir.h>
  36 #include <sys/dmu_tx.h>
  37 #include <sys/spa.h>
  38 #include <sys/zio.h>
  39 #include <sys/dmu_zfetch.h>
  40 #include <sys/sa.h>
  41 #include <sys/sa_impl.h>
  42 






  43 static void dbuf_destroy(dmu_buf_impl_t *db);
  44 static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
  45 static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
  46 
  47 /*
  48  * Global data structures and functions for the dbuf cache.
  49  */
  50 static kmem_cache_t *dbuf_cache;
  51 
  52 /* ARGSUSED */
  53 static int
  54 dbuf_cons(void *vdb, void *unused, int kmflag)
  55 {
  56         dmu_buf_impl_t *db = vdb;
  57         bzero(db, sizeof (dmu_buf_impl_t));
  58 
  59         mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL);
  60         cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
  61         refcount_create(&db->db_holds);
  62         return (0);


 802  * they stay in memory.
 803  *
 804  * This is a no-op if the dataset is in the middle of an incremental
 805  * receive; see comment below for details.
 806  */
 807 void
 808 dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
 809 {
 810         dmu_buf_impl_t *db, *db_next;
 811         uint64_t txg = tx->tx_txg;
 812         int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 813         uint64_t first_l1 = start >> epbs;
 814         uint64_t last_l1 = end >> epbs;
 815 
 816         if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID)) {
 817                 end = dn->dn_maxblkid;
 818                 last_l1 = end >> epbs;
 819         }
 820         dprintf_dnode(dn, "start=%llu end=%llu\n", start, end);
 821 
 822         if (dmu_objset_is_receiving(dn->dn_objset)) {





 823                 /*
 824                  * When processing a free record from a zfs receive,
 825                  * there should have been no previous modifications to the
 826                  * data in this range.  Therefore there should be no dbufs
 827                  * in the range.  Searching dn_dbufs for these non-existent
 828                  * dbufs can be very expensive, so simply ignore this.
 829                  */
 830                 VERIFY3P(dbuf_find(dn, 0, start), ==, NULL);
 831                 VERIFY3P(dbuf_find(dn, 0, end), ==, NULL);
 832                 return;
 833         }
 834 
 835         mutex_enter(&dn->dn_dbufs_mtx);
 836         for (db = list_head(&dn->dn_dbufs); db; db = db_next) {
 837                 db_next = list_next(&dn->dn_dbufs, db);
 838                 ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 839 
 840                 if (db->db_level == 1 &&
 841                     db->db_blkid >= first_l1 && db->db_blkid <= last_l1) {
 842                         mutex_enter(&db->db_mtx);
 843                         if (db->db_last_dirty &&
 844                             db->db_last_dirty->dr_txg < txg) {
 845                                 dbuf_add_ref(db, FTAG);
 846                                 mutex_exit(&db->db_mtx);
 847                                 dbuf_will_dirty(db, tx);
 848                                 dbuf_rele(db, FTAG);
 849                         } else {
 850                                 mutex_exit(&db->db_mtx);
 851                         }
 852                 }
 853 
 854                 if (db->db_level != 0)
 855                         continue;


1703                 db->db.db_size = blocksize;
1704                 db->db.db_offset = db->db_blkid * blocksize;
1705         }
1706 
1707         /*
1708          * Hold the dn_dbufs_mtx while we get the new dbuf
1709          * in the hash table *and* added to the dbufs list.
1710          * This prevents a possible deadlock with someone
1711          * trying to look up this dbuf before its added to the
1712          * dn_dbufs list.
1713          */
1714         mutex_enter(&dn->dn_dbufs_mtx);
1715         db->db_state = DB_EVICTING;
1716         if ((odb = dbuf_hash_insert(db)) != NULL) {
1717                 /* someone else inserted it first */
1718                 kmem_cache_free(dbuf_cache, db);
1719                 mutex_exit(&dn->dn_dbufs_mtx);
1720                 return (odb);
1721         }
1722         list_insert_head(&dn->dn_dbufs, db);



1723         db->db_state = DB_UNCACHED;
1724         mutex_exit(&dn->dn_dbufs_mtx);
1725         arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
1726 
1727         if (parent && parent != dn->dn_dbuf)
1728                 dbuf_add_ref(parent, db);
1729 
1730         ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
1731             refcount_count(&dn->dn_holds) > 0);
1732         (void) refcount_add(&dn->dn_holds, db);
1733         (void) atomic_inc_32_nv(&dn->dn_dbufs_count);
1734 
1735         dprintf_dbuf(db, "db=%p\n", db);
1736 
1737         return (db);
1738 }
1739 
1740 static int
1741 dbuf_do_evict(void *private)
1742 {




  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 2013 by Delphix. All rights reserved.
  25  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  26  */
  27 
  28 #include <sys/zfs_context.h>
  29 #include <sys/dmu.h>
  30 #include <sys/dmu_send.h>
  31 #include <sys/dmu_impl.h>
  32 #include <sys/dbuf.h>
  33 #include <sys/dmu_objset.h>
  34 #include <sys/dsl_dataset.h>
  35 #include <sys/dsl_dir.h>
  36 #include <sys/dmu_tx.h>
  37 #include <sys/spa.h>
  38 #include <sys/zio.h>
  39 #include <sys/dmu_zfetch.h>
  40 #include <sys/sa.h>
  41 #include <sys/sa_impl.h>
  42 
  43 /*
  44  * Number of times that zfs_free_range() took the slow path while doing
  45  * a zfs receive.  A nonzero value indicates a potential performance problem.
  46  */
  47 uint64_t zfs_free_range_recv_miss;
  48 
  49 static void dbuf_destroy(dmu_buf_impl_t *db);
  50 static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
  51 static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
  52 
  53 /*
  54  * Global data structures and functions for the dbuf cache.
  55  */
  56 static kmem_cache_t *dbuf_cache;
  57 
  58 /* ARGSUSED */
  59 static int
  60 dbuf_cons(void *vdb, void *unused, int kmflag)
  61 {
  62         dmu_buf_impl_t *db = vdb;
  63         bzero(db, sizeof (dmu_buf_impl_t));
  64 
  65         mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL);
  66         cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
  67         refcount_create(&db->db_holds);
  68         return (0);


 808  * they stay in memory.
 809  *
 810  * This is a no-op if the dataset is in the middle of an incremental
 811  * receive; see comment below for details.
 812  */
 813 void
 814 dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
 815 {
 816         dmu_buf_impl_t *db, *db_next;
 817         uint64_t txg = tx->tx_txg;
 818         int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
 819         uint64_t first_l1 = start >> epbs;
 820         uint64_t last_l1 = end >> epbs;
 821 
 822         if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID)) {
 823                 end = dn->dn_maxblkid;
 824                 last_l1 = end >> epbs;
 825         }
 826         dprintf_dnode(dn, "start=%llu end=%llu\n", start, end);
 827 
 828         mutex_enter(&dn->dn_dbufs_mtx);
 829         if (start >= dn->dn_unlisted_l0_blkid * dn->dn_datablksz) {
 830                 /* There can't be any dbufs in this range; no need to search. */
 831                 mutex_exit(&dn->dn_dbufs_mtx);
 832                 return;
 833         } else if (dmu_objset_is_receiving(dn->dn_objset)) {
 834                 /*
 835                  * If we are receiving, we expect there to be no dbufs in
 836                  * the range to be freed, because receive modifies each
 837                  * block at most once, and in offset order.  If this is
 838                  * not the case, it can lead to performance problems,
 839                  * so note that we unexpectedly took the slow path.
 840                  */
 841                 atomic_inc_64(&zfs_free_range_recv_miss);


 842         }
 843 

 844         for (db = list_head(&dn->dn_dbufs); db; db = db_next) {
 845                 db_next = list_next(&dn->dn_dbufs, db);
 846                 ASSERT(db->db_blkid != DMU_BONUS_BLKID);
 847 
 848                 if (db->db_level == 1 &&
 849                     db->db_blkid >= first_l1 && db->db_blkid <= last_l1) {
 850                         mutex_enter(&db->db_mtx);
 851                         if (db->db_last_dirty &&
 852                             db->db_last_dirty->dr_txg < txg) {
 853                                 dbuf_add_ref(db, FTAG);
 854                                 mutex_exit(&db->db_mtx);
 855                                 dbuf_will_dirty(db, tx);
 856                                 dbuf_rele(db, FTAG);
 857                         } else {
 858                                 mutex_exit(&db->db_mtx);
 859                         }
 860                 }
 861 
 862                 if (db->db_level != 0)
 863                         continue;


1711                 db->db.db_size = blocksize;
1712                 db->db.db_offset = db->db_blkid * blocksize;
1713         }
1714 
1715         /*
1716          * Hold the dn_dbufs_mtx while we get the new dbuf
1717          * in the hash table *and* added to the dbufs list.
1718          * This prevents a possible deadlock with someone
1719          * trying to look up this dbuf before its added to the
1720          * dn_dbufs list.
1721          */
1722         mutex_enter(&dn->dn_dbufs_mtx);
1723         db->db_state = DB_EVICTING;
1724         if ((odb = dbuf_hash_insert(db)) != NULL) {
1725                 /* someone else inserted it first */
1726                 kmem_cache_free(dbuf_cache, db);
1727                 mutex_exit(&dn->dn_dbufs_mtx);
1728                 return (odb);
1729         }
1730         list_insert_head(&dn->dn_dbufs, db);
1731         if (db->db_level == 0 && db->db_blkid >=
1732             dn->dn_unlisted_l0_blkid)
1733                 dn->dn_unlisted_l0_blkid = db->db_blkid + 1;
1734         db->db_state = DB_UNCACHED;
1735         mutex_exit(&dn->dn_dbufs_mtx);
1736         arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
1737 
1738         if (parent && parent != dn->dn_dbuf)
1739                 dbuf_add_ref(parent, db);
1740 
1741         ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
1742             refcount_count(&dn->dn_holds) > 0);
1743         (void) refcount_add(&dn->dn_holds, db);
1744         (void) atomic_inc_32_nv(&dn->dn_dbufs_count);
1745 
1746         dprintf_dbuf(db, "db=%p\n", db);
1747 
1748         return (db);
1749 }
1750 
1751 static int
1752 dbuf_do_evict(void *private)
1753 {