23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2013 by Delphix. All rights reserved.
25 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
26 */
27
28 #include <sys/zfs_context.h>
29 #include <sys/dmu.h>
30 #include <sys/dmu_send.h>
31 #include <sys/dmu_impl.h>
32 #include <sys/dbuf.h>
33 #include <sys/dmu_objset.h>
34 #include <sys/dsl_dataset.h>
35 #include <sys/dsl_dir.h>
36 #include <sys/dmu_tx.h>
37 #include <sys/spa.h>
38 #include <sys/zio.h>
39 #include <sys/dmu_zfetch.h>
40 #include <sys/sa.h>
41 #include <sys/sa_impl.h>
42
43 static void dbuf_destroy(dmu_buf_impl_t *db);
44 static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
45 static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
46
47 /*
48 * Global data structures and functions for the dbuf cache.
49 */
50 static kmem_cache_t *dbuf_cache;
51
52 /* ARGSUSED */
53 static int
54 dbuf_cons(void *vdb, void *unused, int kmflag)
55 {
56 dmu_buf_impl_t *db = vdb;
57 bzero(db, sizeof (dmu_buf_impl_t));
58
59 mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL);
60 cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
61 refcount_create(&db->db_holds);
62 return (0);
802 * they stay in memory.
803 *
804 * This is a no-op if the dataset is in the middle of an incremental
805 * receive; see comment below for details.
806 */
807 void
808 dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
809 {
810 dmu_buf_impl_t *db, *db_next;
811 uint64_t txg = tx->tx_txg;
812 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
813 uint64_t first_l1 = start >> epbs;
814 uint64_t last_l1 = end >> epbs;
815
816 if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID)) {
817 end = dn->dn_maxblkid;
818 last_l1 = end >> epbs;
819 }
820 dprintf_dnode(dn, "start=%llu end=%llu\n", start, end);
821
822 if (dmu_objset_is_receiving(dn->dn_objset)) {
823 /*
824 * When processing a free record from a zfs receive,
825 * there should have been no previous modifications to the
826 * data in this range. Therefore there should be no dbufs
827 * in the range. Searching dn_dbufs for these non-existent
828 * dbufs can be very expensive, so simply ignore this.
829 */
830 VERIFY3P(dbuf_find(dn, 0, start), ==, NULL);
831 VERIFY3P(dbuf_find(dn, 0, end), ==, NULL);
832 return;
833 }
834
835 mutex_enter(&dn->dn_dbufs_mtx);
836 for (db = list_head(&dn->dn_dbufs); db; db = db_next) {
837 db_next = list_next(&dn->dn_dbufs, db);
838 ASSERT(db->db_blkid != DMU_BONUS_BLKID);
839
840 if (db->db_level == 1 &&
841 db->db_blkid >= first_l1 && db->db_blkid <= last_l1) {
842 mutex_enter(&db->db_mtx);
843 if (db->db_last_dirty &&
844 db->db_last_dirty->dr_txg < txg) {
845 dbuf_add_ref(db, FTAG);
846 mutex_exit(&db->db_mtx);
847 dbuf_will_dirty(db, tx);
848 dbuf_rele(db, FTAG);
849 } else {
850 mutex_exit(&db->db_mtx);
851 }
852 }
853
854 if (db->db_level != 0)
855 continue;
1703 db->db.db_size = blocksize;
1704 db->db.db_offset = db->db_blkid * blocksize;
1705 }
1706
1707 /*
1708 * Hold the dn_dbufs_mtx while we get the new dbuf
1709 * in the hash table *and* added to the dbufs list.
1710 * This prevents a possible deadlock with someone
1711 * trying to look up this dbuf before its added to the
1712 * dn_dbufs list.
1713 */
1714 mutex_enter(&dn->dn_dbufs_mtx);
1715 db->db_state = DB_EVICTING;
1716 if ((odb = dbuf_hash_insert(db)) != NULL) {
1717 /* someone else inserted it first */
1718 kmem_cache_free(dbuf_cache, db);
1719 mutex_exit(&dn->dn_dbufs_mtx);
1720 return (odb);
1721 }
1722 list_insert_head(&dn->dn_dbufs, db);
1723 db->db_state = DB_UNCACHED;
1724 mutex_exit(&dn->dn_dbufs_mtx);
1725 arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
1726
1727 if (parent && parent != dn->dn_dbuf)
1728 dbuf_add_ref(parent, db);
1729
1730 ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
1731 refcount_count(&dn->dn_holds) > 0);
1732 (void) refcount_add(&dn->dn_holds, db);
1733 (void) atomic_inc_32_nv(&dn->dn_dbufs_count);
1734
1735 dprintf_dbuf(db, "db=%p\n", db);
1736
1737 return (db);
1738 }
1739
1740 static int
1741 dbuf_do_evict(void *private)
1742 {
|
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 * Copyright (c) 2013 by Delphix. All rights reserved.
25 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
26 */
27
28 #include <sys/zfs_context.h>
29 #include <sys/dmu.h>
30 #include <sys/dmu_send.h>
31 #include <sys/dmu_impl.h>
32 #include <sys/dbuf.h>
33 #include <sys/dmu_objset.h>
34 #include <sys/dsl_dataset.h>
35 #include <sys/dsl_dir.h>
36 #include <sys/dmu_tx.h>
37 #include <sys/spa.h>
38 #include <sys/zio.h>
39 #include <sys/dmu_zfetch.h>
40 #include <sys/sa.h>
41 #include <sys/sa_impl.h>
42
43 /*
44 * Number of times that zfs_free_range() took the slow path while doing
45 * a zfs receive. A nonzero value indicates a potential performance problem.
46 */
47 uint64_t zfs_free_range_recv_miss;
48
49 static void dbuf_destroy(dmu_buf_impl_t *db);
50 static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
51 static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
52
53 /*
54 * Global data structures and functions for the dbuf cache.
55 */
56 static kmem_cache_t *dbuf_cache;
57
58 /* ARGSUSED */
59 static int
60 dbuf_cons(void *vdb, void *unused, int kmflag)
61 {
62 dmu_buf_impl_t *db = vdb;
63 bzero(db, sizeof (dmu_buf_impl_t));
64
65 mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL);
66 cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
67 refcount_create(&db->db_holds);
68 return (0);
808 * they stay in memory.
809 *
810 * This is a no-op if the dataset is in the middle of an incremental
811 * receive; see comment below for details.
812 */
813 void
814 dbuf_free_range(dnode_t *dn, uint64_t start, uint64_t end, dmu_tx_t *tx)
815 {
816 dmu_buf_impl_t *db, *db_next;
817 uint64_t txg = tx->tx_txg;
818 int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
819 uint64_t first_l1 = start >> epbs;
820 uint64_t last_l1 = end >> epbs;
821
822 if (end > dn->dn_maxblkid && (end != DMU_SPILL_BLKID)) {
823 end = dn->dn_maxblkid;
824 last_l1 = end >> epbs;
825 }
826 dprintf_dnode(dn, "start=%llu end=%llu\n", start, end);
827
828 mutex_enter(&dn->dn_dbufs_mtx);
829 if (start >= dn->dn_unlisted_l0_blkid * dn->dn_datablksz) {
830 /* There can't be any dbufs in this range; no need to search. */
831 mutex_exit(&dn->dn_dbufs_mtx);
832 return;
833 } else if (dmu_objset_is_receiving(dn->dn_objset)) {
834 /*
835 * If we are receiving, we expect there to be no dbufs in
836 * the range to be freed, because receive modifies each
837 * block at most once, and in offset order. If this is
838 * not the case, it can lead to performance problems,
839 * so note that we unexpectedly took the slow path.
840 */
841 atomic_inc_64(&zfs_free_range_recv_miss);
842 }
843
844 for (db = list_head(&dn->dn_dbufs); db; db = db_next) {
845 db_next = list_next(&dn->dn_dbufs, db);
846 ASSERT(db->db_blkid != DMU_BONUS_BLKID);
847
848 if (db->db_level == 1 &&
849 db->db_blkid >= first_l1 && db->db_blkid <= last_l1) {
850 mutex_enter(&db->db_mtx);
851 if (db->db_last_dirty &&
852 db->db_last_dirty->dr_txg < txg) {
853 dbuf_add_ref(db, FTAG);
854 mutex_exit(&db->db_mtx);
855 dbuf_will_dirty(db, tx);
856 dbuf_rele(db, FTAG);
857 } else {
858 mutex_exit(&db->db_mtx);
859 }
860 }
861
862 if (db->db_level != 0)
863 continue;
1711 db->db.db_size = blocksize;
1712 db->db.db_offset = db->db_blkid * blocksize;
1713 }
1714
1715 /*
1716 * Hold the dn_dbufs_mtx while we get the new dbuf
1717 * in the hash table *and* added to the dbufs list.
1718 * This prevents a possible deadlock with someone
1719 * trying to look up this dbuf before its added to the
1720 * dn_dbufs list.
1721 */
1722 mutex_enter(&dn->dn_dbufs_mtx);
1723 db->db_state = DB_EVICTING;
1724 if ((odb = dbuf_hash_insert(db)) != NULL) {
1725 /* someone else inserted it first */
1726 kmem_cache_free(dbuf_cache, db);
1727 mutex_exit(&dn->dn_dbufs_mtx);
1728 return (odb);
1729 }
1730 list_insert_head(&dn->dn_dbufs, db);
1731 if (db->db_level == 0 && db->db_blkid >=
1732 dn->dn_unlisted_l0_blkid)
1733 dn->dn_unlisted_l0_blkid = db->db_blkid + 1;
1734 db->db_state = DB_UNCACHED;
1735 mutex_exit(&dn->dn_dbufs_mtx);
1736 arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
1737
1738 if (parent && parent != dn->dn_dbuf)
1739 dbuf_add_ref(parent, db);
1740
1741 ASSERT(dn->dn_object == DMU_META_DNODE_OBJECT ||
1742 refcount_count(&dn->dn_holds) > 0);
1743 (void) refcount_add(&dn->dn_holds, db);
1744 (void) atomic_inc_32_nv(&dn->dn_dbufs_count);
1745
1746 dprintf_dbuf(db, "db=%p\n", db);
1747
1748 return (db);
1749 }
1750
1751 static int
1752 dbuf_do_evict(void *private)
1753 {
|