4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 */
25
26 #include <sys/dmu.h>
27 #include <sys/dmu_impl.h>
28 #include <sys/dmu_tx.h>
29 #include <sys/dbuf.h>
30 #include <sys/dnode.h>
31 #include <sys/zfs_context.h>
32 #include <sys/dmu_objset.h>
33 #include <sys/dmu_traverse.h>
34 #include <sys/dsl_dataset.h>
35 #include <sys/dsl_dir.h>
36 #include <sys/dsl_pool.h>
37 #include <sys/dsl_synctask.h>
38 #include <sys/dsl_prop.h>
39 #include <sys/dmu_zfetch.h>
40 #include <sys/zfs_ioctl.h>
41 #include <sys/zap.h>
42 #include <sys/zio_checksum.h>
43 #include <sys/zio_compress.h>
44 #include <sys/sa.h>
45 #ifdef _KERNEL
46 #include <sys/vmsystm.h>
47 #include <sys/zfs_znode.h>
48 #endif
49
50 /*
51 * Enable/disable nopwrite feature.
52 */
53 int zfs_nopwrite_enabled = 1;
54
55 const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
56 { DMU_BSWAP_UINT8, TRUE, "unallocated" },
57 { DMU_BSWAP_ZAP, TRUE, "object directory" },
58 { DMU_BSWAP_UINT64, TRUE, "object array" },
59 { DMU_BSWAP_UINT8, TRUE, "packed nvlist" },
60 { DMU_BSWAP_UINT64, TRUE, "packed nvlist size" },
61 { DMU_BSWAP_UINT64, TRUE, "bpobj" },
62 { DMU_BSWAP_UINT64, TRUE, "bpobj header" },
63 { DMU_BSWAP_UINT64, TRUE, "SPA space map header" },
64 { DMU_BSWAP_UINT64, TRUE, "SPA space map" },
1400 dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
1401 {
1402 blkptr_t *bp = zgd->zgd_bp;
1403 dmu_buf_impl_t *db = (dmu_buf_impl_t *)zgd->zgd_db;
1404 objset_t *os = db->db_objset;
1405 dsl_dataset_t *ds = os->os_dsl_dataset;
1406 dbuf_dirty_record_t *dr;
1407 dmu_sync_arg_t *dsa;
1408 zbookmark_t zb;
1409 zio_prop_t zp;
1410 dnode_t *dn;
1411
1412 ASSERT(pio != NULL);
1413 ASSERT(txg != 0);
1414
1415 SET_BOOKMARK(&zb, ds->ds_object,
1416 db->db.db_object, db->db_level, db->db_blkid);
1417
1418 DB_DNODE_ENTER(db);
1419 dn = DB_DNODE(db);
1420 dmu_write_policy(os, dn, db->db_level, WP_DMU_SYNC, &zp);
1421 DB_DNODE_EXIT(db);
1422
1423 /*
1424 * If we're frozen (running ziltest), we always need to generate a bp.
1425 */
1426 if (txg > spa_freeze_txg(os->os_spa))
1427 return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb));
1428
1429 /*
1430 * Grabbing db_mtx now provides a barrier between dbuf_sync_leaf()
1431 * and us. If we determine that this txg is not yet syncing,
1432 * but it begins to sync a moment later, that's OK because the
1433 * sync thread will block in dbuf_sync_leaf() until we drop db_mtx.
1434 */
1435 mutex_enter(&db->db_mtx);
1436
1437 if (txg <= spa_last_synced_txg(os->os_spa)) {
1438 /*
1439 * This txg has already synced. There's nothing to do.
1440 */
1538 dn->dn_checksum = checksum;
1539 dnode_setdirty(dn, tx);
1540 dnode_rele(dn, FTAG);
1541 }
1542
1543 void
1544 dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
1545 dmu_tx_t *tx)
1546 {
1547 dnode_t *dn;
1548
1549 /* XXX assumes dnode_hold will not get an i/o error */
1550 (void) dnode_hold(os, object, FTAG, &dn);
1551 ASSERT(compress < ZIO_COMPRESS_FUNCTIONS);
1552 dn->dn_compress = compress;
1553 dnode_setdirty(dn, tx);
1554 dnode_rele(dn, FTAG);
1555 }
1556
1557 int zfs_mdcomp_disable = 0;
1558
1559 void
1560 dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
1561 {
1562 dmu_object_type_t type = dn ? dn->dn_type : DMU_OT_OBJSET;
1563 boolean_t ismd = (level > 0 || DMU_OT_IS_METADATA(type) ||
1564 (wp & WP_SPILL));
1565 enum zio_checksum checksum = os->os_checksum;
1566 enum zio_compress compress = os->os_compress;
1567 enum zio_checksum dedup_checksum = os->os_dedup_checksum;
1568 boolean_t dedup = B_FALSE;
1569 boolean_t nopwrite = B_FALSE;
1570 boolean_t dedup_verify = os->os_dedup_verify;
1571 int copies = os->os_copies;
1572
1573 /*
1574 * We maintain different write policies for each of the following
1575 * types of data:
1576 * 1. metadata
1577 * 2. preallocated blocks (i.e. level-0 blocks of a dump device)
1578 * 3. all other level 0 blocks
1579 */
1580 if (ismd) {
1581 /*
1582 * XXX -- we should design a compression algorithm
1583 * that specializes in arrays of bps.
1584 */
1585 compress = zfs_mdcomp_disable ? ZIO_COMPRESS_EMPTY :
1586 ZIO_COMPRESS_LZJB;
1587
1588 /*
1589 * Metadata always gets checksummed. If the data
1590 * checksum is multi-bit correctable, and it's not a
1591 * ZBT-style checksum, then it's suitable for metadata
1592 * as well. Otherwise, the metadata checksum defaults
1593 * to fletcher4.
1594 */
1595 if (zio_checksum_table[checksum].ci_correctable < 1 ||
1596 zio_checksum_table[checksum].ci_eck)
1597 checksum = ZIO_CHECKSUM_FLETCHER_4;
1598 } else if (wp & WP_NOFILL) {
1599 ASSERT(level == 0);
1600
1601 /*
1602 * If we're writing preallocated blocks, we aren't actually
1603 * writing them so don't set any policy properties. These
1604 * blocks are currently only used by an external subsystem
1605 * outside of zfs (i.e. dump) and not written by the zio
1606 * pipeline.
|
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2013 by Delphix. All rights reserved.
24 * Copyright (c) 2013 Martin Matuska. All rights reserved.
25 */
26
27 #include <sys/dmu.h>
28 #include <sys/dmu_impl.h>
29 #include <sys/dmu_tx.h>
30 #include <sys/dbuf.h>
31 #include <sys/dnode.h>
32 #include <sys/zfs_context.h>
33 #include <sys/dmu_objset.h>
34 #include <sys/dmu_traverse.h>
35 #include <sys/dsl_dataset.h>
36 #include <sys/dsl_dir.h>
37 #include <sys/dsl_pool.h>
38 #include <sys/dsl_synctask.h>
39 #include <sys/dsl_prop.h>
40 #include <sys/dmu_zfetch.h>
41 #include <sys/zfs_ioctl.h>
42 #include <sys/zap.h>
43 #include <sys/zio_checksum.h>
44 #include <sys/zio_compress.h>
45 #include <sys/sa.h>
46 #include <sys/zfeature.h>
47 #ifdef _KERNEL
48 #include <sys/vmsystm.h>
49 #include <sys/zfs_znode.h>
50 #endif
51
52 /*
53 * Enable/disable nopwrite feature.
54 */
55 int zfs_nopwrite_enabled = 1;
56
57 const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
58 { DMU_BSWAP_UINT8, TRUE, "unallocated" },
59 { DMU_BSWAP_ZAP, TRUE, "object directory" },
60 { DMU_BSWAP_UINT64, TRUE, "object array" },
61 { DMU_BSWAP_UINT8, TRUE, "packed nvlist" },
62 { DMU_BSWAP_UINT64, TRUE, "packed nvlist size" },
63 { DMU_BSWAP_UINT64, TRUE, "bpobj" },
64 { DMU_BSWAP_UINT64, TRUE, "bpobj header" },
65 { DMU_BSWAP_UINT64, TRUE, "SPA space map header" },
66 { DMU_BSWAP_UINT64, TRUE, "SPA space map" },
1402 dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
1403 {
1404 blkptr_t *bp = zgd->zgd_bp;
1405 dmu_buf_impl_t *db = (dmu_buf_impl_t *)zgd->zgd_db;
1406 objset_t *os = db->db_objset;
1407 dsl_dataset_t *ds = os->os_dsl_dataset;
1408 dbuf_dirty_record_t *dr;
1409 dmu_sync_arg_t *dsa;
1410 zbookmark_t zb;
1411 zio_prop_t zp;
1412 dnode_t *dn;
1413
1414 ASSERT(pio != NULL);
1415 ASSERT(txg != 0);
1416
1417 SET_BOOKMARK(&zb, ds->ds_object,
1418 db->db.db_object, db->db_level, db->db_blkid);
1419
1420 DB_DNODE_ENTER(db);
1421 dn = DB_DNODE(db);
1422 dmu_write_policy(os, dn, db->db_level, WP_DMU_SYNC, &zp, txg);
1423 DB_DNODE_EXIT(db);
1424
1425 /*
1426 * If we're frozen (running ziltest), we always need to generate a bp.
1427 */
1428 if (txg > spa_freeze_txg(os->os_spa))
1429 return (dmu_sync_late_arrival(pio, os, done, zgd, &zp, &zb));
1430
1431 /*
1432 * Grabbing db_mtx now provides a barrier between dbuf_sync_leaf()
1433 * and us. If we determine that this txg is not yet syncing,
1434 * but it begins to sync a moment later, that's OK because the
1435 * sync thread will block in dbuf_sync_leaf() until we drop db_mtx.
1436 */
1437 mutex_enter(&db->db_mtx);
1438
1439 if (txg <= spa_last_synced_txg(os->os_spa)) {
1440 /*
1441 * This txg has already synced. There's nothing to do.
1442 */
1540 dn->dn_checksum = checksum;
1541 dnode_setdirty(dn, tx);
1542 dnode_rele(dn, FTAG);
1543 }
1544
1545 void
1546 dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
1547 dmu_tx_t *tx)
1548 {
1549 dnode_t *dn;
1550
1551 /* XXX assumes dnode_hold will not get an i/o error */
1552 (void) dnode_hold(os, object, FTAG, &dn);
1553 ASSERT(compress < ZIO_COMPRESS_FUNCTIONS);
1554 dn->dn_compress = compress;
1555 dnode_setdirty(dn, tx);
1556 dnode_rele(dn, FTAG);
1557 }
1558
1559 int zfs_mdcomp_disable = 0;
1560 int zfs_mdcomp_lz4 = 0;
1561
1562 void
1563 dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp,
1564 uint64_t txg)
1565 {
1566 dmu_object_type_t type = dn ? dn->dn_type : DMU_OT_OBJSET;
1567 boolean_t ismd = (level > 0 || DMU_OT_IS_METADATA(type) ||
1568 (wp & WP_SPILL));
1569 enum zio_checksum checksum = os->os_checksum;
1570 enum zio_compress compress = os->os_compress;
1571 enum zio_checksum dedup_checksum = os->os_dedup_checksum;
1572 boolean_t dedup = B_FALSE;
1573 boolean_t nopwrite = B_FALSE;
1574 boolean_t dedup_verify = os->os_dedup_verify;
1575 int copies = os->os_copies;
1576
1577 /*
1578 * We maintain different write policies for each of the following
1579 * types of data:
1580 * 1. metadata
1581 * 2. preallocated blocks (i.e. level-0 blocks of a dump device)
1582 * 3. all other level 0 blocks
1583 */
1584 if (ismd) {
1585 /*
1586 * XXX -- we should design a compression algorithm
1587 * that specializes in arrays of bps.
1588 */
1589 if (zfs_mdcomp_disable)
1590 compress = ZIO_COMPRESS_EMPTY;
1591 else if (zfs_mdcomp_lz4 && os->os_spa != NULL) {
1592 zfeature_info_t *feat = &spa_feature_table
1593 [SPA_FEATURE_LZ4_COMPRESS];
1594
1595 if (spa_feature_is_active(os->os_spa, feat))
1596 compress = ZIO_COMPRESS_LZ4;
1597 else if (spa_feature_is_enabled(os->os_spa, feat)) {
1598 dmu_tx_t *tx;
1599
1600 tx = dmu_tx_create_assigned(
1601 spa_get_dsl(os->os_spa), txg);
1602 spa_feature_incr(os->os_spa, feat, tx);
1603 dmu_tx_commit(tx);
1604 compress = ZIO_COMPRESS_LZ4;
1605 } else
1606 compress = ZIO_COMPRESS_LZJB;
1607 } else
1608 compress = ZIO_COMPRESS_LZJB;
1609
1610 /*
1611 * Metadata always gets checksummed. If the data
1612 * checksum is multi-bit correctable, and it's not a
1613 * ZBT-style checksum, then it's suitable for metadata
1614 * as well. Otherwise, the metadata checksum defaults
1615 * to fletcher4.
1616 */
1617 if (zio_checksum_table[checksum].ci_correctable < 1 ||
1618 zio_checksum_table[checksum].ci_eck)
1619 checksum = ZIO_CHECKSUM_FLETCHER_4;
1620 } else if (wp & WP_NOFILL) {
1621 ASSERT(level == 0);
1622
1623 /*
1624 * If we're writing preallocated blocks, we aren't actually
1625 * writing them so don't set any policy properties. These
1626 * blocks are currently only used by an external subsystem
1627 * outside of zfs (i.e. dump) and not written by the zio
1628 * pipeline.
|