Print this page
    
3006 VERIFY[S,U,P] and ASSERT[S,U,P] frequently check if first argument is zero
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/fs/zfs/dnode_sync.c
          +++ new/usr/src/uts/common/fs/zfs/dnode_sync.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright (c) 2012 by Delphix. All rights reserved.
  25   25   */
  26   26  
  27   27  #include <sys/zfs_context.h>
  28   28  #include <sys/dbuf.h>
  29   29  #include <sys/dnode.h>
  30   30  #include <sys/dmu.h>
  31   31  #include <sys/dmu_tx.h>
  32   32  #include <sys/dmu_objset.h>
  33   33  #include <sys/dsl_dataset.h>
  34   34  #include <sys/spa.h>
  35   35  
  36   36  static void
  37   37  dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
  38   38  {
  39   39          dmu_buf_impl_t *db;
  40   40          int txgoff = tx->tx_txg & TXG_MASK;
  41   41          int nblkptr = dn->dn_phys->dn_nblkptr;
  42   42          int old_toplvl = dn->dn_phys->dn_nlevels - 1;
  43   43          int new_level = dn->dn_next_nlevels[txgoff];
  44   44          int i;
  45   45  
  46   46          rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
  47   47  
  48   48          /* this dnode can't be paged out because it's dirty */
  49   49          ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
  50   50          ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
  51   51          ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0);
  52   52  
  53   53          db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG);
  54   54          ASSERT(db != NULL);
  55   55  
  56   56          dn->dn_phys->dn_nlevels = new_level;
  57   57          dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset,
  58   58              dn->dn_object, dn->dn_phys->dn_nlevels);
  59   59  
  60   60          /* check for existing blkptrs in the dnode */
  61   61          for (i = 0; i < nblkptr; i++)
  62   62                  if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i]))
  63   63                          break;
  64   64          if (i != nblkptr) {
  65   65                  /* transfer dnode's block pointers to new indirect block */
  66   66                  (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
  67   67                  ASSERT(db->db.db_data);
  68   68                  ASSERT(arc_released(db->db_buf));
  69   69                  ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
  70   70                  bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
  71   71                      sizeof (blkptr_t) * nblkptr);
  72   72                  arc_buf_freeze(db->db_buf);
  73   73          }
  74   74  
  75   75          /* set dbuf's parent pointers to new indirect buf */
  76   76          for (i = 0; i < nblkptr; i++) {
  77   77                  dmu_buf_impl_t *child = dbuf_find(dn, old_toplvl, i);
  78   78  
  79   79                  if (child == NULL)
  80   80                          continue;
  81   81  #ifdef  DEBUG
  82   82                  DB_DNODE_ENTER(child);
  83   83                  ASSERT3P(DB_DNODE(child), ==, dn);
  84   84                  DB_DNODE_EXIT(child);
  85   85  #endif  /* DEBUG */
  86   86                  if (child->db_parent && child->db_parent != dn->dn_dbuf) {
  87   87                          ASSERT(child->db_parent->db_level == db->db_level);
  88   88                          ASSERT(child->db_blkptr !=
  89   89                              &dn->dn_phys->dn_blkptr[child->db_blkid]);
  90   90                          mutex_exit(&child->db_mtx);
  91   91                          continue;
  92   92                  }
  93   93                  ASSERT(child->db_parent == NULL ||
  94   94                      child->db_parent == dn->dn_dbuf);
  95   95  
  96   96                  child->db_parent = db;
  97   97                  dbuf_add_ref(db, child);
  98   98                  if (db->db.db_data)
  99   99                          child->db_blkptr = (blkptr_t *)db->db.db_data + i;
 100  100                  else
 101  101                          child->db_blkptr = NULL;
 102  102                  dprintf_dbuf_bp(child, child->db_blkptr,
 103  103                      "changed db_blkptr to new indirect %s", "");
 104  104  
 105  105                  mutex_exit(&child->db_mtx);
 106  106          }
 107  107  
 108  108          bzero(dn->dn_phys->dn_blkptr, sizeof (blkptr_t) * nblkptr);
 109  109  
 110  110          dbuf_rele(db, FTAG);
 111  111  
 112  112          rw_exit(&dn->dn_struct_rwlock);
 113  113  }
 114  114  
 115  115  static int
 116  116  free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx)
 117  117  {
 118  118          dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
 119  119          uint64_t bytesfreed = 0;
 120  120          int i, blocks_freed = 0;
 121  121  
 122  122          dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num);
 123  123  
 124  124          for (i = 0; i < num; i++, bp++) {
 125  125                  if (BP_IS_HOLE(bp))
 126  126                          continue;
 127  127  
 128  128                  bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE);
 129  129                  ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys));
 130  130                  bzero(bp, sizeof (blkptr_t));
 131  131                  blocks_freed += 1;
 132  132          }
 133  133          dnode_diduse_space(dn, -bytesfreed);
 134  134          return (blocks_freed);
 135  135  }
 136  136  
 137  137  #ifdef ZFS_DEBUG
 138  138  static void
 139  139  free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
 140  140  {
 141  141          int off, num;
 142  142          int i, err, epbs;
 143  143          uint64_t txg = tx->tx_txg;
 144  144          dnode_t *dn;
 145  145  
 146  146          DB_DNODE_ENTER(db);
 147  147          dn = DB_DNODE(db);
 148  148          epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
 149  149          off = start - (db->db_blkid * 1<<epbs);
 150  150          num = end - start + 1;
 151  151  
 152  152          ASSERT3U(off, >=, 0);
 153  153          ASSERT3U(num, >=, 0);
 154  154          ASSERT3U(db->db_level, >, 0);
 155  155          ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift);
 156  156          ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT);
 157  157          ASSERT(db->db_blkptr != NULL);
 158  158  
 159  159          for (i = off; i < off+num; i++) {
 160  160                  uint64_t *buf;
 161  161                  dmu_buf_impl_t *child;
 162  162                  dbuf_dirty_record_t *dr;
 163  163                  int j;
 164  164  
 165  165                  ASSERT(db->db_level == 1);
 166  166  
 167  167                  rw_enter(&dn->dn_struct_rwlock, RW_READER);
 168  168                  err = dbuf_hold_impl(dn, db->db_level-1,
 169  169                      (db->db_blkid << epbs) + i, TRUE, FTAG, &child);
 170  170                  rw_exit(&dn->dn_struct_rwlock);
 171  171                  if (err == ENOENT)
 172  172                          continue;
 173  173                  ASSERT(err == 0);
 174  174                  ASSERT(child->db_level == 0);
 175  175                  dr = child->db_last_dirty;
 176  176                  while (dr && dr->dr_txg > txg)
 177  177                          dr = dr->dr_next;
 178  178                  ASSERT(dr == NULL || dr->dr_txg == txg);
 179  179  
 180  180                  /* data_old better be zeroed */
 181  181                  if (dr) {
 182  182                          buf = dr->dt.dl.dr_data->b_data;
 183  183                          for (j = 0; j < child->db.db_size >> 3; j++) {
 184  184                                  if (buf[j] != 0) {
 185  185                                          panic("freed data not zero: "
 186  186                                              "child=%p i=%d off=%d num=%d\n",
 187  187                                              (void *)child, i, off, num);
 188  188                                  }
 189  189                          }
 190  190                  }
 191  191  
 192  192                  /*
 193  193                   * db_data better be zeroed unless it's dirty in a
 194  194                   * future txg.
 195  195                   */
 196  196                  mutex_enter(&child->db_mtx);
 197  197                  buf = child->db.db_data;
 198  198                  if (buf != NULL && child->db_state != DB_FILL &&
 199  199                      child->db_last_dirty == NULL) {
 200  200                          for (j = 0; j < child->db.db_size >> 3; j++) {
 201  201                                  if (buf[j] != 0) {
 202  202                                          panic("freed data not zero: "
 203  203                                              "child=%p i=%d off=%d num=%d\n",
 204  204                                              (void *)child, i, off, num);
 205  205                                  }
 206  206                          }
 207  207                  }
 208  208                  mutex_exit(&child->db_mtx);
 209  209  
 210  210                  dbuf_rele(child, FTAG);
 211  211          }
 212  212          DB_DNODE_EXIT(db);
 213  213  }
 214  214  #endif
 215  215  
 216  216  #define ALL -1
 217  217  
 218  218  static int
 219  219  free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc,
 220  220      dmu_tx_t *tx)
 221  221  {
 222  222          dnode_t *dn;
 223  223          blkptr_t *bp;
 224  224          dmu_buf_impl_t *subdb;
 225  225          uint64_t start, end, dbstart, dbend, i;
 226  226          int epbs, shift, err;
 227  227          int all = TRUE;
 228  228          int blocks_freed = 0;
 229  229  
 230  230          /*
 231  231           * There is a small possibility that this block will not be cached:
 232  232           *   1 - if level > 1 and there are no children with level <= 1
 233  233           *   2 - if we didn't get a dirty hold (because this block had just
 234  234           *       finished being written -- and so had no holds), and then this
 235  235           *       block got evicted before we got here.
 236  236           */
 237  237          if (db->db_state != DB_CACHED)
 238  238                  (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);
 239  239  
 240  240          dbuf_release_bp(db);
 241  241          bp = (blkptr_t *)db->db.db_data;
 242  242  
 243  243          DB_DNODE_ENTER(db);
 244  244          dn = DB_DNODE(db);
 245  245          epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
 246  246          shift = (db->db_level - 1) * epbs;
 247  247          dbstart = db->db_blkid << epbs;
 248  248          start = blkid >> shift;
 249  249          if (dbstart < start) {
 250  250                  bp += start - dbstart;
 251  251                  all = FALSE;
 252  252          } else {
 253  253                  start = dbstart;
 254  254          }
 255  255          dbend = ((db->db_blkid + 1) << epbs) - 1;
 256  256          end = (blkid + nblks - 1) >> shift;
 257  257          if (dbend <= end)
 258  258                  end = dbend;
 259  259          else if (all)
 260  260                  all = trunc;
 261  261          ASSERT3U(start, <=, end);
 262  262  
 263  263          if (db->db_level == 1) {
 264  264                  FREE_VERIFY(db, start, end, tx);
 265  265                  blocks_freed = free_blocks(dn, bp, end-start+1, tx);
 266  266                  arc_buf_freeze(db->db_buf);
  
    | 
      ↓ open down ↓ | 
    266 lines elided | 
    
      ↑ open up ↑ | 
  
 267  267                  ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
 268  268                  DB_DNODE_EXIT(db);
 269  269                  return (all ? ALL : blocks_freed);
 270  270          }
 271  271  
 272  272          for (i = start; i <= end; i++, bp++) {
 273  273                  if (BP_IS_HOLE(bp))
 274  274                          continue;
 275  275                  rw_enter(&dn->dn_struct_rwlock, RW_READER);
 276  276                  err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb);
 277      -                ASSERT3U(err, ==, 0);
      277 +                ASSERT0(err);
 278  278                  rw_exit(&dn->dn_struct_rwlock);
 279  279  
 280  280                  if (free_children(subdb, blkid, nblks, trunc, tx) == ALL) {
 281  281                          ASSERT3P(subdb->db_blkptr, ==, bp);
 282  282                          blocks_freed += free_blocks(dn, bp, 1, tx);
 283  283                  } else {
 284  284                          all = FALSE;
 285  285                  }
 286  286                  dbuf_rele(subdb, FTAG);
 287  287          }
 288  288          DB_DNODE_EXIT(db);
 289  289          arc_buf_freeze(db->db_buf);
 290  290  #ifdef ZFS_DEBUG
 291  291          bp -= (end-start)+1;
 292  292          for (i = start; i <= end; i++, bp++) {
 293  293                  if (i == start && blkid != 0)
 294  294                          continue;
 295  295                  else if (i == end && !trunc)
 296  296                          continue;
 297      -                ASSERT3U(bp->blk_birth, ==, 0);
      297 +                ASSERT0(bp->blk_birth);
 298  298          }
 299  299  #endif
 300  300          ASSERT(all || blocks_freed == 0 || db->db_last_dirty);
 301  301          return (all ? ALL : blocks_freed);
 302  302  }
 303  303  
 304  304  /*
 305  305   * free_range: Traverse the indicated range of the provided file
 306  306   * and "free" all the blocks contained there.
 307  307   */
 308  308  static void
 309  309  dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
 310  310  {
 311  311          blkptr_t *bp = dn->dn_phys->dn_blkptr;
 312  312          dmu_buf_impl_t *db;
 313  313          int trunc, start, end, shift, i, err;
 314  314          int dnlevel = dn->dn_phys->dn_nlevels;
 315  315  
 316  316          if (blkid > dn->dn_phys->dn_maxblkid)
 317  317                  return;
 318  318  
 319  319          ASSERT(dn->dn_phys->dn_maxblkid < UINT64_MAX);
 320  320          trunc = blkid + nblks > dn->dn_phys->dn_maxblkid;
 321  321          if (trunc)
 322  322                  nblks = dn->dn_phys->dn_maxblkid - blkid + 1;
 323  323  
 324  324          /* There are no indirect blocks in the object */
 325  325          if (dnlevel == 1) {
 326  326                  if (blkid >= dn->dn_phys->dn_nblkptr) {
 327  327                          /* this range was never made persistent */
 328  328                          return;
 329  329                  }
 330  330                  ASSERT3U(blkid + nblks, <=, dn->dn_phys->dn_nblkptr);
 331  331                  (void) free_blocks(dn, bp + blkid, nblks, tx);
 332  332                  if (trunc) {
 333  333                          uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
 334  334                              (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT);
 335  335                          dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
 336  336                          ASSERT(off < dn->dn_phys->dn_maxblkid ||
 337  337                              dn->dn_phys->dn_maxblkid == 0 ||
 338  338                              dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0);
 339  339                  }
 340  340                  return;
 341  341          }
 342  342  
  
    | 
      ↓ open down ↓ | 
    35 lines elided | 
    
      ↑ open up ↑ | 
  
 343  343          shift = (dnlevel - 1) * (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT);
 344  344          start = blkid >> shift;
 345  345          ASSERT(start < dn->dn_phys->dn_nblkptr);
 346  346          end = (blkid + nblks - 1) >> shift;
 347  347          bp += start;
 348  348          for (i = start; i <= end; i++, bp++) {
 349  349                  if (BP_IS_HOLE(bp))
 350  350                          continue;
 351  351                  rw_enter(&dn->dn_struct_rwlock, RW_READER);
 352  352                  err = dbuf_hold_impl(dn, dnlevel-1, i, TRUE, FTAG, &db);
 353      -                ASSERT3U(err, ==, 0);
      353 +                ASSERT0(err);
 354  354                  rw_exit(&dn->dn_struct_rwlock);
 355  355  
 356  356                  if (free_children(db, blkid, nblks, trunc, tx) == ALL) {
 357  357                          ASSERT3P(db->db_blkptr, ==, bp);
 358  358                          (void) free_blocks(dn, bp, 1, tx);
 359  359                  }
 360  360                  dbuf_rele(db, FTAG);
 361  361          }
 362  362          if (trunc) {
 363  363                  uint64_t off = (dn->dn_phys->dn_maxblkid + 1) *
 364  364                      (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT);
 365  365                  dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0);
 366  366                  ASSERT(off < dn->dn_phys->dn_maxblkid ||
 367  367                      dn->dn_phys->dn_maxblkid == 0 ||
 368  368                      dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0);
 369  369          }
 370  370  }
 371  371  
 372  372  /*
 373  373   * Try to kick all the dnodes dbufs out of the cache...
 374  374   */
 375  375  void
 376  376  dnode_evict_dbufs(dnode_t *dn)
 377  377  {
 378  378          int progress;
 379  379          int pass = 0;
 380  380  
 381  381          do {
 382  382                  dmu_buf_impl_t *db, marker;
 383  383                  int evicting = FALSE;
 384  384  
 385  385                  progress = FALSE;
 386  386                  mutex_enter(&dn->dn_dbufs_mtx);
 387  387                  list_insert_tail(&dn->dn_dbufs, &marker);
 388  388                  db = list_head(&dn->dn_dbufs);
 389  389                  for (; db != ▮ db = list_head(&dn->dn_dbufs)) {
 390  390                          list_remove(&dn->dn_dbufs, db);
 391  391                          list_insert_tail(&dn->dn_dbufs, db);
 392  392  #ifdef  DEBUG
 393  393                          DB_DNODE_ENTER(db);
 394  394                          ASSERT3P(DB_DNODE(db), ==, dn);
 395  395                          DB_DNODE_EXIT(db);
 396  396  #endif  /* DEBUG */
 397  397  
 398  398                          mutex_enter(&db->db_mtx);
 399  399                          if (db->db_state == DB_EVICTING) {
 400  400                                  progress = TRUE;
 401  401                                  evicting = TRUE;
 402  402                                  mutex_exit(&db->db_mtx);
 403  403                          } else if (refcount_is_zero(&db->db_holds)) {
 404  404                                  progress = TRUE;
 405  405                                  dbuf_clear(db); /* exits db_mtx for us */
 406  406                          } else {
 407  407                                  mutex_exit(&db->db_mtx);
 408  408                          }
 409  409  
 410  410                  }
 411  411                  list_remove(&dn->dn_dbufs, &marker);
 412  412                  /*
 413  413                   * NB: we need to drop dn_dbufs_mtx between passes so
 414  414                   * that any DB_EVICTING dbufs can make progress.
 415  415                   * Ideally, we would have some cv we could wait on, but
 416  416                   * since we don't, just wait a bit to give the other
 417  417                   * thread a chance to run.
 418  418                   */
 419  419                  mutex_exit(&dn->dn_dbufs_mtx);
 420  420                  if (evicting)
 421  421                          delay(1);
 422  422                  pass++;
 423  423                  ASSERT(pass < 100); /* sanity check */
 424  424          } while (progress);
 425  425  
 426  426          rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
 427  427          if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
 428  428                  mutex_enter(&dn->dn_bonus->db_mtx);
 429  429                  dbuf_evict(dn->dn_bonus);
 430  430                  dn->dn_bonus = NULL;
 431  431          }
 432  432          rw_exit(&dn->dn_struct_rwlock);
 433  433  }
 434  434  
 435  435  static void
 436  436  dnode_undirty_dbufs(list_t *list)
 437  437  {
 438  438          dbuf_dirty_record_t *dr;
 439  439  
 440  440          while (dr = list_head(list)) {
 441  441                  dmu_buf_impl_t *db = dr->dr_dbuf;
 442  442                  uint64_t txg = dr->dr_txg;
 443  443  
 444  444                  if (db->db_level != 0)
 445  445                          dnode_undirty_dbufs(&dr->dt.di.dr_children);
 446  446  
 447  447                  mutex_enter(&db->db_mtx);
 448  448                  /* XXX - use dbuf_undirty()? */
 449  449                  list_remove(list, dr);
 450  450                  ASSERT(db->db_last_dirty == dr);
 451  451                  db->db_last_dirty = NULL;
 452  452                  db->db_dirtycnt -= 1;
 453  453                  if (db->db_level == 0) {
 454  454                          ASSERT(db->db_blkid == DMU_BONUS_BLKID ||
 455  455                              dr->dt.dl.dr_data == db->db_buf);
 456  456                          dbuf_unoverride(dr);
 457  457                  }
 458  458                  kmem_free(dr, sizeof (dbuf_dirty_record_t));
 459  459                  dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg);
 460  460          }
 461  461  }
 462  462  
 463  463  static void
  
    | 
      ↓ open down ↓ | 
    100 lines elided | 
    
      ↑ open up ↑ | 
  
 464  464  dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
 465  465  {
 466  466          int txgoff = tx->tx_txg & TXG_MASK;
 467  467  
 468  468          ASSERT(dmu_tx_is_syncing(tx));
 469  469  
 470  470          /*
 471  471           * Our contents should have been freed in dnode_sync() by the
 472  472           * free range record inserted by the caller of dnode_free().
 473  473           */
 474      -        ASSERT3U(DN_USED_BYTES(dn->dn_phys), ==, 0);
      474 +        ASSERT0(DN_USED_BYTES(dn->dn_phys));
 475  475          ASSERT(BP_IS_HOLE(dn->dn_phys->dn_blkptr));
 476  476  
 477  477          dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
 478  478          dnode_evict_dbufs(dn);
 479  479          ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL);
 480  480  
 481  481          /*
 482  482           * XXX - It would be nice to assert this, but we may still
 483  483           * have residual holds from async evictions from the arc...
 484  484           *
 485  485           * zfs_obj_to_path() also depends on this being
 486  486           * commented out.
 487  487           *
 488  488           * ASSERT3U(refcount_count(&dn->dn_holds), ==, 1);
 489  489           */
 490  490  
 491  491          /* Undirty next bits */
 492  492          dn->dn_next_nlevels[txgoff] = 0;
 493  493          dn->dn_next_indblkshift[txgoff] = 0;
 494  494          dn->dn_next_blksz[txgoff] = 0;
 495  495  
 496  496          /* ASSERT(blkptrs are zero); */
 497  497          ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
 498  498          ASSERT(dn->dn_type != DMU_OT_NONE);
 499  499  
 500  500          ASSERT(dn->dn_free_txg > 0);
 501  501          if (dn->dn_allocated_txg != dn->dn_free_txg)
 502  502                  dbuf_will_dirty(dn->dn_dbuf, tx);
 503  503          bzero(dn->dn_phys, sizeof (dnode_phys_t));
 504  504  
 505  505          mutex_enter(&dn->dn_mtx);
 506  506          dn->dn_type = DMU_OT_NONE;
 507  507          dn->dn_maxblkid = 0;
 508  508          dn->dn_allocated_txg = 0;
 509  509          dn->dn_free_txg = 0;
 510  510          dn->dn_have_spill = B_FALSE;
 511  511          mutex_exit(&dn->dn_mtx);
 512  512  
 513  513          ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
 514  514  
 515  515          dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
 516  516          /*
 517  517           * Now that we've released our hold, the dnode may
 518  518           * be evicted, so we musn't access it.
 519  519           */
 520  520  }
 521  521  
 522  522  /*
 523  523   * Write out the dnode's dirty buffers.
 524  524   */
 525  525  void
 526  526  dnode_sync(dnode_t *dn, dmu_tx_t *tx)
 527  527  {
 528  528          free_range_t *rp;
 529  529          dnode_phys_t *dnp = dn->dn_phys;
 530  530          int txgoff = tx->tx_txg & TXG_MASK;
 531  531          list_t *list = &dn->dn_dirty_records[txgoff];
 532  532          static const dnode_phys_t zerodn = { 0 };
 533  533          boolean_t kill_spill = B_FALSE;
 534  534  
 535  535          ASSERT(dmu_tx_is_syncing(tx));
 536  536          ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg);
 537  537          ASSERT(dnp->dn_type != DMU_OT_NONE ||
 538  538              bcmp(dnp, &zerodn, DNODE_SIZE) == 0);
 539  539          DNODE_VERIFY(dn);
 540  540  
 541  541          ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf));
 542  542  
 543  543          if (dmu_objset_userused_enabled(dn->dn_objset) &&
 544  544              !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
 545  545                  mutex_enter(&dn->dn_mtx);
 546  546                  dn->dn_oldused = DN_USED_BYTES(dn->dn_phys);
 547  547                  dn->dn_oldflags = dn->dn_phys->dn_flags;
 548  548                  dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED;
 549  549                  mutex_exit(&dn->dn_mtx);
 550  550                  dmu_objset_userquota_get_ids(dn, B_FALSE, tx);
 551  551          } else {
 552  552                  /* Once we account for it, we should always account for it. */
 553  553                  ASSERT(!(dn->dn_phys->dn_flags &
 554  554                      DNODE_FLAG_USERUSED_ACCOUNTED));
 555  555          }
 556  556  
 557  557          mutex_enter(&dn->dn_mtx);
 558  558          if (dn->dn_allocated_txg == tx->tx_txg) {
 559  559                  /* The dnode is newly allocated or reallocated */
 560  560                  if (dnp->dn_type == DMU_OT_NONE) {
 561  561                          /* this is a first alloc, not a realloc */
 562  562                          dnp->dn_nlevels = 1;
 563  563                          dnp->dn_nblkptr = dn->dn_nblkptr;
 564  564                  }
 565  565  
 566  566                  dnp->dn_type = dn->dn_type;
 567  567                  dnp->dn_bonustype = dn->dn_bonustype;
 568  568                  dnp->dn_bonuslen = dn->dn_bonuslen;
 569  569          }
 570  570  
 571  571          ASSERT(dnp->dn_nlevels > 1 ||
 572  572              BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
 573  573              BP_GET_LSIZE(&dnp->dn_blkptr[0]) ==
 574  574              dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
 575  575  
 576  576          if (dn->dn_next_blksz[txgoff]) {
 577  577                  ASSERT(P2PHASE(dn->dn_next_blksz[txgoff],
 578  578                      SPA_MINBLOCKSIZE) == 0);
 579  579                  ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
 580  580                      dn->dn_maxblkid == 0 || list_head(list) != NULL ||
 581  581                      avl_last(&dn->dn_ranges[txgoff]) ||
 582  582                      dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT ==
 583  583                      dnp->dn_datablkszsec);
 584  584                  dnp->dn_datablkszsec =
 585  585                      dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT;
 586  586                  dn->dn_next_blksz[txgoff] = 0;
 587  587          }
 588  588  
 589  589          if (dn->dn_next_bonuslen[txgoff]) {
 590  590                  if (dn->dn_next_bonuslen[txgoff] == DN_ZERO_BONUSLEN)
 591  591                          dnp->dn_bonuslen = 0;
 592  592                  else
 593  593                          dnp->dn_bonuslen = dn->dn_next_bonuslen[txgoff];
 594  594                  ASSERT(dnp->dn_bonuslen <= DN_MAX_BONUSLEN);
 595  595                  dn->dn_next_bonuslen[txgoff] = 0;
 596  596          }
 597  597  
 598  598          if (dn->dn_next_bonustype[txgoff]) {
 599  599                  ASSERT(DMU_OT_IS_VALID(dn->dn_next_bonustype[txgoff]));
 600  600                  dnp->dn_bonustype = dn->dn_next_bonustype[txgoff];
 601  601                  dn->dn_next_bonustype[txgoff] = 0;
 602  602          }
 603  603  
 604  604          /*
 605  605           * We will either remove a spill block when a file is being removed
 606  606           * or we have been asked to remove it.
 607  607           */
 608  608          if (dn->dn_rm_spillblk[txgoff] ||
 609  609              ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) &&
 610  610              dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg)) {
 611  611                  if ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR))
 612  612                          kill_spill = B_TRUE;
 613  613                  dn->dn_rm_spillblk[txgoff] = 0;
 614  614          }
 615  615  
 616  616          if (dn->dn_next_indblkshift[txgoff]) {
 617  617                  ASSERT(dnp->dn_nlevels == 1);
 618  618                  dnp->dn_indblkshift = dn->dn_next_indblkshift[txgoff];
 619  619                  dn->dn_next_indblkshift[txgoff] = 0;
 620  620          }
 621  621  
 622  622          /*
 623  623           * Just take the live (open-context) values for checksum and compress.
 624  624           * Strictly speaking it's a future leak, but nothing bad happens if we
 625  625           * start using the new checksum or compress algorithm a little early.
 626  626           */
 627  627          dnp->dn_checksum = dn->dn_checksum;
 628  628          dnp->dn_compress = dn->dn_compress;
 629  629  
 630  630          mutex_exit(&dn->dn_mtx);
 631  631  
 632  632          if (kill_spill) {
 633  633                  (void) free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx);
 634  634                  mutex_enter(&dn->dn_mtx);
 635  635                  dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR;
 636  636                  mutex_exit(&dn->dn_mtx);
 637  637          }
 638  638  
 639  639          /* process all the "freed" ranges in the file */
 640  640          while (rp = avl_last(&dn->dn_ranges[txgoff])) {
 641  641                  dnode_sync_free_range(dn, rp->fr_blkid, rp->fr_nblks, tx);
 642  642                  /* grab the mutex so we don't race with dnode_block_freed() */
 643  643                  mutex_enter(&dn->dn_mtx);
 644  644                  avl_remove(&dn->dn_ranges[txgoff], rp);
 645  645                  mutex_exit(&dn->dn_mtx);
 646  646                  kmem_free(rp, sizeof (free_range_t));
 647  647          }
 648  648  
 649  649          if (dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg) {
 650  650                  dnode_sync_free(dn, tx);
 651  651                  return;
 652  652          }
 653  653  
 654  654          if (dn->dn_next_nblkptr[txgoff]) {
 655  655                  /* this should only happen on a realloc */
 656  656                  ASSERT(dn->dn_allocated_txg == tx->tx_txg);
 657  657                  if (dn->dn_next_nblkptr[txgoff] > dnp->dn_nblkptr) {
 658  658                          /* zero the new blkptrs we are gaining */
 659  659                          bzero(dnp->dn_blkptr + dnp->dn_nblkptr,
 660  660                              sizeof (blkptr_t) *
 661  661                              (dn->dn_next_nblkptr[txgoff] - dnp->dn_nblkptr));
 662  662  #ifdef ZFS_DEBUG
 663  663                  } else {
 664  664                          int i;
 665  665                          ASSERT(dn->dn_next_nblkptr[txgoff] < dnp->dn_nblkptr);
 666  666                          /* the blkptrs we are losing better be unallocated */
 667  667                          for (i = dn->dn_next_nblkptr[txgoff];
 668  668                              i < dnp->dn_nblkptr; i++)
 669  669                                  ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[i]));
 670  670  #endif
 671  671                  }
 672  672                  mutex_enter(&dn->dn_mtx);
 673  673                  dnp->dn_nblkptr = dn->dn_next_nblkptr[txgoff];
 674  674                  dn->dn_next_nblkptr[txgoff] = 0;
 675  675                  mutex_exit(&dn->dn_mtx);
 676  676          }
 677  677  
 678  678          if (dn->dn_next_nlevels[txgoff]) {
 679  679                  dnode_increase_indirection(dn, tx);
 680  680                  dn->dn_next_nlevels[txgoff] = 0;
 681  681          }
 682  682  
 683  683          dbuf_sync_list(list, tx);
 684  684  
 685  685          if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) {
 686  686                  ASSERT3P(list_head(list), ==, NULL);
 687  687                  dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
 688  688          }
 689  689  
 690  690          /*
 691  691           * Although we have dropped our reference to the dnode, it
 692  692           * can't be evicted until its written, and we haven't yet
 693  693           * initiated the IO for the dnode's dbuf.
 694  694           */
 695  695  }
  
    | 
      ↓ open down ↓ | 
    211 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX