Print this page
Possibility to physically reserve space without writing leaf blocks

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/dbuf.c
          +++ new/usr/src/uts/common/fs/zfs/dbuf.c
↓ open down ↓ 1013 lines elided ↑ open up ↑
1014 1014  
1015 1015          ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
1016 1016          ASSERT(arc_released(os->os_phys_buf) ||
1017 1017              list_link_active(&os->os_dsl_dataset->ds_synced_link));
1018 1018          ASSERT(db->db_parent == NULL || arc_released(db->db_parent->db_buf));
1019 1019  
1020 1020          (void) arc_release(db->db_buf, db);
1021 1021  }
1022 1022  
1023 1023  dbuf_dirty_record_t *
1024      -dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
     1024 +dbuf_zero_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
     1025 +{
     1026 +        ASSERT(db->db_objset != NULL);
     1027 +
     1028 +        return (dbuf_dirty(db, tx, B_TRUE));
     1029 +}
     1030 +
     1031 +dbuf_dirty_record_t *
     1032 +dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx, boolean_t zero_write)
1025 1033  {
1026 1034          dnode_t *dn;
1027 1035          objset_t *os;
1028 1036          dbuf_dirty_record_t **drp, *dr;
1029 1037          int drop_struct_lock = FALSE;
1030 1038          boolean_t do_free_accounting = B_FALSE;
1031 1039          int txgoff = tx->tx_txg & TXG_MASK;
1032 1040  
1033 1041          ASSERT(tx->tx_txg != 0);
1034 1042          ASSERT(!refcount_is_zero(&db->db_holds));
↓ open down ↓ 110 lines elided ↑ open up ↑
1145 1153                  dnode_willuse_space(dn, db->db.db_size, tx);
1146 1154                  do_free_accounting = dbuf_block_freeable(db);
1147 1155          }
1148 1156  
1149 1157          /*
1150 1158           * If this buffer is dirty in an old transaction group we need
1151 1159           * to make a copy of it so that the changes we make in this
1152 1160           * transaction group won't leak out when we sync the older txg.
1153 1161           */
1154 1162          dr = kmem_zalloc(sizeof (dbuf_dirty_record_t), KM_SLEEP);
     1163 +        dr->dr_zero_write = zero_write;
1155 1164          if (db->db_level == 0) {
1156 1165                  void *data_old = db->db_buf;
1157 1166  
1158 1167                  if (db->db_state != DB_NOFILL) {
1159 1168                          if (db->db_blkid == DMU_BONUS_BLKID) {
1160 1169                                  dbuf_fix_old_data(db, tx->tx_txg);
1161 1170                                  data_old = db->db.db_data;
1162 1171                          } else if (db->db.db_object != DMU_META_DNODE_OBJECT) {
1163 1172                                  /*
1164 1173                                   * Release the data buffer from the cache so
↓ open down ↓ 93 lines elided ↑ open up ↑
1258 1267                          int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
1259 1268  
1260 1269                          parent = dbuf_hold_level(dn, db->db_level+1,
1261 1270                              db->db_blkid >> epbs, FTAG);
1262 1271                          ASSERT(parent != NULL);
1263 1272                          parent_held = TRUE;
1264 1273                  }
1265 1274                  if (drop_struct_lock)
1266 1275                          rw_exit(&dn->dn_struct_rwlock);
1267 1276                  ASSERT3U(db->db_level+1, ==, parent->db_level);
1268      -                di = dbuf_dirty(parent, tx);
     1277 +                di = dbuf_dirty(parent, tx, B_FALSE);
1269 1278                  if (parent_held)
1270 1279                          dbuf_rele(parent, FTAG);
1271 1280  
1272 1281                  mutex_enter(&db->db_mtx);
1273 1282                  /*
1274 1283                   * Since we've dropped the mutex, it's possible that
1275 1284                   * dbuf_undirty() might have changed this out from under us.
1276 1285                   */
1277 1286                  if (db->db_last_dirty == dr ||
1278 1287                      dn->dn_object == DMU_META_DNODE_OBJECT) {
↓ open down ↓ 122 lines elided ↑ open up ↑
1401 1410          int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
1402 1411  
1403 1412          ASSERT(tx->tx_txg != 0);
1404 1413          ASSERT(!refcount_is_zero(&db->db_holds));
1405 1414  
1406 1415          DB_DNODE_ENTER(db);
1407 1416          if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock))
1408 1417                  rf |= DB_RF_HAVESTRUCT;
1409 1418          DB_DNODE_EXIT(db);
1410 1419          (void) dbuf_read(db, NULL, rf);
1411      -        (void) dbuf_dirty(db, tx);
     1420 +        (void) dbuf_dirty(db, tx, B_FALSE);
1412 1421  }
1413 1422  
1414 1423  void
1415 1424  dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
1416 1425  {
1417 1426          dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
1418 1427  
1419 1428          db->db_state = DB_NOFILL;
1420 1429  
1421 1430          dmu_buf_will_fill(db_fake, tx);
↓ open down ↓ 6 lines elided ↑ open up ↑
1428 1437  
1429 1438          ASSERT(db->db_blkid != DMU_BONUS_BLKID);
1430 1439          ASSERT(tx->tx_txg != 0);
1431 1440          ASSERT(db->db_level == 0);
1432 1441          ASSERT(!refcount_is_zero(&db->db_holds));
1433 1442  
1434 1443          ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT ||
1435 1444              dmu_tx_private_ok(tx));
1436 1445  
1437 1446          dbuf_noread(db);
1438      -        (void) dbuf_dirty(db, tx);
     1447 +        (void) dbuf_dirty(db, tx, B_FALSE);
     1448 +}
     1449 +
     1450 +
     1451 +void
     1452 +dmu_buf_will_zero_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
     1453 +{
     1454 +        dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
     1455 +
     1456 +        ASSERT(db->db_blkid != DMU_BONUS_BLKID);
     1457 +        ASSERT(tx->tx_txg != 0);
     1458 +        ASSERT(db->db_level == 0);
     1459 +        ASSERT(!refcount_is_zero(&db->db_holds));
     1460 +
     1461 +        ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT ||
     1462 +            dmu_tx_private_ok(tx));
     1463 +
     1464 +        dbuf_noread(db);
     1465 +        (void) dbuf_zero_dirty(db, tx);
1439 1466  }
1440 1467  
1441 1468  #pragma weak dmu_buf_fill_done = dbuf_fill_done
1442 1469  /* ARGSUSED */
1443 1470  void
1444 1471  dbuf_fill_done(dmu_buf_impl_t *db, dmu_tx_t *tx)
1445 1472  {
1446 1473          mutex_enter(&db->db_mtx);
1447 1474          DBUF_VERIFY(db);
1448 1475  
↓ open down ↓ 64 lines elided ↑ open up ↑
1513 1540          mutex_enter(&db->db_mtx);
1514 1541  
1515 1542          while (db->db_state == DB_READ || db->db_state == DB_FILL)
1516 1543                  cv_wait(&db->db_changed, &db->db_mtx);
1517 1544  
1518 1545          ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED);
1519 1546  
1520 1547          if (db->db_state == DB_CACHED &&
1521 1548              refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) {
1522 1549                  mutex_exit(&db->db_mtx);
1523      -                (void) dbuf_dirty(db, tx);
     1550 +                (void) dbuf_dirty(db, tx, B_FALSE);
1524 1551                  bcopy(buf->b_data, db->db.db_data, db->db.db_size);
1525 1552                  VERIFY(arc_buf_remove_ref(buf, db));
1526 1553                  xuio_stat_wbuf_copied();
1527 1554                  return;
1528 1555          }
1529 1556  
1530 1557          xuio_stat_wbuf_nocopy();
1531 1558          if (db->db_state == DB_CACHED) {
1532 1559                  dbuf_dirty_record_t *dr = db->db_last_dirty;
1533 1560  
↓ open down ↓ 10 lines elided ↑ open up ↑
1544 1571                  } else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) {
1545 1572                          arc_release(db->db_buf, db);
1546 1573                          VERIFY(arc_buf_remove_ref(db->db_buf, db));
1547 1574                  }
1548 1575                  db->db_buf = NULL;
1549 1576          }
1550 1577          ASSERT(db->db_buf == NULL);
1551 1578          dbuf_set_data(db, buf);
1552 1579          db->db_state = DB_FILL;
1553 1580          mutex_exit(&db->db_mtx);
1554      -        (void) dbuf_dirty(db, tx);
     1581 +        (void) dbuf_dirty(db, tx, B_FALSE);
1555 1582          dmu_buf_fill_done(&db->db, tx);
1556 1583  }
1557 1584  
1558 1585  /*
1559 1586   * "Clear" the contents of this dbuf.  This will mark the dbuf
1560 1587   * EVICTING and clear *most* of its references.  Unfortunately,
1561 1588   * when we are not holding the dn_dbufs_mtx, we can't clear the
1562 1589   * entry in the dn_dbufs list.  We have to wait until dbuf_destroy()
1563 1590   * in this case.  For callers from the DMU we will usually see:
1564 1591   *      dbuf_clear()->arc_clear_callback()->dbuf_do_evict()->dbuf_destroy()
↓ open down ↓ 1248 lines elided ↑ open up ↑
2813 2840              os->os_dsl_dataset->ds_object : DMU_META_OBJSET,
2814 2841              db->db.db_object, db->db_level, db->db_blkid);
2815 2842  
2816 2843          if (db->db_blkid == DMU_SPILL_BLKID)
2817 2844                  wp_flag = WP_SPILL;
2818 2845          wp_flag |= (db->db_state == DB_NOFILL) ? WP_NOFILL : 0;
2819 2846  
2820 2847          dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
2821 2848          DB_DNODE_EXIT(db);
2822 2849  
     2850 +        if (dr->dr_zero_write) {
     2851 +                zp.zp_zero_write = B_TRUE;
     2852 +
     2853 +                if (!spa_feature_is_active(os->os_spa, SPA_FEATURE_SPACE_RESERVATION))
     2854 +                {
     2855 +                        spa_feature_incr(os->os_spa,
     2856 +                            SPA_FEATURE_SPACE_RESERVATION, tx);
     2857 +                }
     2858 +        }
     2859 +
2823 2860          if (db->db_level == 0 &&
2824 2861              dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
2825 2862                  /*
2826 2863                   * The BP for this block has been provided by open context
2827 2864                   * (by dmu_sync() or dmu_buf_write_embedded()).
2828 2865                   */
2829 2866                  void *contents = (data != NULL) ? data->b_data : NULL;
2830 2867  
2831 2868                  dr->dr_zio = zio_write(zio, os->os_spa, txg,
2832 2869                      db->db_blkptr, contents, db->db.db_size, &zp,
↓ open down ↓ 24 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX