Print this page
3741 zfs needs better comments
Submitted by:   Will Andrews <willa@spectralogic.com>
Submitted by:   Justin Gibbs <justing@spectralogic.com>
Submitted by:   Alan Somers <alans@spectralogic.com>
Reviewed by:    Matthew Ahrens <mahrens@delphix.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/dbuf.c
          +++ new/usr/src/uts/common/fs/zfs/dbuf.c
↓ open down ↓ 630 lines elided ↑ open up ↑
 631  631                          dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
 632  632                              db->db.db_size, flags & DB_RF_CACHED);
 633  633  
 634  634                  if ((flags & DB_RF_HAVESTRUCT) == 0)
 635  635                          rw_exit(&dn->dn_struct_rwlock);
 636  636                  DB_DNODE_EXIT(db);
 637  637  
 638  638                  if (!havepzio)
 639  639                          err = zio_wait(zio);
 640  640          } else {
      641 +                /*
      642 +                 * Another reader came in while the dbuf was in flight
      643 +                 * between UNCACHED and CACHED.  Either a writer will finish
      644 +                 * writing the buffer (sending the dbuf to CACHED) or the
      645 +                 * first reader's request will reach the read_done callback
      646 +                 * and send the dbuf to CACHED.  Otherwise, a failure
      647 +                 * occurred and the dbuf went to UNCACHED.
      648 +                 */
 641  649                  mutex_exit(&db->db_mtx);
 642  650                  if (prefetch)
 643  651                          dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
 644  652                              db->db.db_size, TRUE);
 645  653                  if ((flags & DB_RF_HAVESTRUCT) == 0)
 646  654                          rw_exit(&dn->dn_struct_rwlock);
 647  655                  DB_DNODE_EXIT(db);
 648  656  
      657 +                /* Skip the wait per the caller's request. */
 649  658                  mutex_enter(&db->db_mtx);
 650  659                  if ((flags & DB_RF_NEVERWAIT) == 0) {
 651  660                          while (db->db_state == DB_READ ||
 652  661                              db->db_state == DB_FILL) {
 653  662                                  ASSERT(db->db_state == DB_READ ||
 654  663                                      (flags & DB_RF_HAVESTRUCT) == 0);
 655  664                                  cv_wait(&db->db_changed, &db->db_mtx);
 656  665                          }
 657  666                          if (db->db_state == DB_UNCACHED)
 658  667                                  err = SET_ERROR(EIO);
↓ open down ↓ 595 lines elided ↑ open up ↑
1254 1263                  if (drop_struct_lock)
1255 1264                          rw_exit(&dn->dn_struct_rwlock);
1256 1265          }
1257 1266  
1258 1267          dnode_setdirty(dn, tx);
1259 1268          DB_DNODE_EXIT(db);
1260 1269          return (dr);
1261 1270  }
1262 1271  
1263 1272  /*
1264      - * Return TRUE if this evicted the dbuf.
     1273 + * Undirty a buffer in the transaction group referenced by the given
     1274 + * transaction.  Return whether this evicted the dbuf.
1265 1275   */
1266 1276  static boolean_t
1267 1277  dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
1268 1278  {
1269 1279          dnode_t *dn;
1270 1280          uint64_t txg = tx->tx_txg;
1271 1281          dbuf_dirty_record_t *dr, **drp;
1272 1282  
1273 1283          ASSERT(txg != 0);
1274 1284          ASSERT(db->db_blkid != DMU_BONUS_BLKID);
↓ open down ↓ 940 lines elided ↑ open up ↑
2215 2225  
2216 2226          ASSERT(dmu_tx_is_syncing(tx));
2217 2227  
2218 2228          dprintf_dbuf_bp(db, db->db_blkptr, "blkptr=%p", db->db_blkptr);
2219 2229  
2220 2230          mutex_enter(&db->db_mtx);
2221 2231  
2222 2232          ASSERT(db->db_level > 0);
2223 2233          DBUF_VERIFY(db);
2224 2234  
     2235 +        /* Read the block if it hasn't been read yet. */
2225 2236          if (db->db_buf == NULL) {
2226 2237                  mutex_exit(&db->db_mtx);
2227 2238                  (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
2228 2239                  mutex_enter(&db->db_mtx);
2229 2240          }
2230 2241          ASSERT3U(db->db_state, ==, DB_CACHED);
2231 2242          ASSERT(db->db_buf != NULL);
2232 2243  
2233 2244          DB_DNODE_ENTER(db);
2234 2245          dn = DB_DNODE(db);
     2246 +        /* Indirect block size must match what the dnode thinks it is. */
2235 2247          ASSERT3U(db->db.db_size, ==, 1<<dn->dn_phys->dn_indblkshift);
2236 2248          dbuf_check_blkptr(dn, db);
2237 2249          DB_DNODE_EXIT(db);
2238 2250  
     2251 +        /* Provide the pending dirty record to child dbufs */
2239 2252          db->db_data_pending = dr;
2240 2253  
2241 2254          mutex_exit(&db->db_mtx);
2242 2255          dbuf_write(dr, db->db_buf, tx);
2243 2256  
2244 2257          zio = dr->dr_zio;
2245 2258          mutex_enter(&dr->dt.di.dr_mtx);
2246 2259          dbuf_sync_list(&dr->dt.di.dr_children, tx);
2247 2260          ASSERT(list_head(&dr->dt.di.dr_children) == NULL);
2248 2261          mutex_exit(&dr->dt.di.dr_mtx);
↓ open down ↓ 366 lines elided ↑ open up ↑
2615 2628          if (!BP_EQUAL(zio->io_bp, obp)) {
2616 2629                  if (!BP_IS_HOLE(obp))
2617 2630                          dsl_free(spa_get_dsl(zio->io_spa), zio->io_txg, obp);
2618 2631                  arc_release(dr->dt.dl.dr_data, db);
2619 2632          }
2620 2633          mutex_exit(&db->db_mtx);
2621 2634  
2622 2635          dbuf_write_done(zio, NULL, db);
2623 2636  }
2624 2637  
     2638 +/* Issue I/O to commit a dirty buffer to disk. */
2625 2639  static void
2626 2640  dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
2627 2641  {
2628 2642          dmu_buf_impl_t *db = dr->dr_dbuf;
2629 2643          dnode_t *dn;
2630 2644          objset_t *os;
2631 2645          dmu_buf_impl_t *parent = db->db_parent;
2632 2646          uint64_t txg = tx->tx_txg;
2633 2647          zbookmark_t zb;
2634 2648          zio_prop_t zp;
↓ open down ↓ 14 lines elided ↑ open up ↑
2649 2663                           */
2650 2664                          if (BP_IS_HOLE(db->db_blkptr)) {
2651 2665                                  arc_buf_thaw(data);
2652 2666                          } else {
2653 2667                                  dbuf_release_bp(db);
2654 2668                          }
2655 2669                  }
2656 2670          }
2657 2671  
2658 2672          if (parent != dn->dn_dbuf) {
     2673 +                /* Our parent is an indirect block. */
     2674 +                /* We have a dirty parent that has been scheduled for write. */
2659 2675                  ASSERT(parent && parent->db_data_pending);
     2676 +                /* Our parent's buffer is one level closer to the dnode. */
2660 2677                  ASSERT(db->db_level == parent->db_level-1);
     2678 +                /*
     2679 +                 * We're about to modify our parent's db_data by modifying
     2680 +                 * our block pointer, so the parent must be released.
     2681 +                 */
2661 2682                  ASSERT(arc_released(parent->db_buf));
2662 2683                  zio = parent->db_data_pending->dr_zio;
2663 2684          } else {
     2685 +                /* Our parent is the dnode itself. */
2664 2686                  ASSERT((db->db_level == dn->dn_phys->dn_nlevels-1 &&
2665 2687                      db->db_blkid != DMU_SPILL_BLKID) ||
2666 2688                      (db->db_blkid == DMU_SPILL_BLKID && db->db_level == 0));
2667 2689                  if (db->db_blkid != DMU_SPILL_BLKID)
2668 2690                          ASSERT3P(db->db_blkptr, ==,
2669 2691                              &dn->dn_phys->dn_blkptr[db->db_blkid]);
2670 2692                  zio = dn->dn_zio;
2671 2693          }
2672 2694  
2673 2695          ASSERT(db->db_level == 0 || data == db->db_buf);
↓ open down ↓ 40 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX