Print this page
3752 want more verifiable dbuf user eviction
Submitted by:   Justin Gibbs <justing@spectralogic.com>
Submitted by:   Will Andrews <willa@spectralogic.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/zap_micro.c
          +++ new/usr/src/uts/common/fs/zfs/zap_micro.c
↓ open down ↓ 37 lines elided ↑ open up ↑
  38   38  #include <sys/sunddi.h>
  39   39  #endif
  40   40  
  41   41  static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags);
  42   42  
  43   43  uint64_t
  44   44  zap_getflags(zap_t *zap)
  45   45  {
  46   46          if (zap->zap_ismicro)
  47   47                  return (0);
  48      -        return (zap->zap_u.zap_fat.zap_phys->zap_flags);
       48 +        return (zap->zap_f_phys->zap_flags);
  49   49  }
  50   50  
  51   51  int
  52   52  zap_hashbits(zap_t *zap)
  53   53  {
  54   54          if (zap_getflags(zap) & ZAP_FLAG_HASH64)
  55   55                  return (48);
  56   56          else
  57   57                  return (28);
  58   58  }
↓ open down ↓ 318 lines elided ↑ open up ↑
 377  377                  zap->zap_f.zap_block_shift = highbit(db->db_size) - 1;
 378  378          } else {
 379  379                  zap->zap_ismicro = TRUE;
 380  380          }
 381  381  
 382  382          /*
 383  383           * Make sure that zap_ismicro is set before we let others see
 384  384           * it, because zap_lockdir() checks zap_ismicro without the lock
 385  385           * held.
 386  386           */
 387      -        winner = dmu_buf_set_user(db, zap, &zap->zap_m.zap_phys, zap_evict);
      387 +        dmu_buf_init_user(&zap->db_evict, zap_evict);
      388 +        winner = (zap_t *)dmu_buf_set_user(db, &zap->db_evict);
 388  389  
 389  390          if (winner != NULL) {
 390  391                  rw_exit(&zap->zap_rwlock);
 391  392                  rw_destroy(&zap->zap_rwlock);
 392  393                  if (!zap->zap_ismicro)
 393  394                          mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
 394  395                  kmem_free(zap, sizeof (zap_t));
 395  396                  return (winner);
 396  397          }
 397  398  
 398  399          if (zap->zap_ismicro) {
 399      -                zap->zap_salt = zap->zap_m.zap_phys->mz_salt;
 400      -                zap->zap_normflags = zap->zap_m.zap_phys->mz_normflags;
      400 +                zap->zap_salt = zap->zap_m_phys->mz_salt;
      401 +                zap->zap_normflags = zap->zap_m_phys->mz_normflags;
 401  402                  zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
 402  403                  avl_create(&zap->zap_m.zap_avl, mze_compare,
 403  404                      sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
 404  405  
 405  406                  for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
 406  407                          mzap_ent_phys_t *mze =
 407      -                            &zap->zap_m.zap_phys->mz_chunk[i];
      408 +                            &zap->zap_m_phys->mz_chunk[i];
 408  409                          if (mze->mze_name[0]) {
 409  410                                  zap_name_t *zn;
 410  411  
 411  412                                  zap->zap_m.zap_num_entries++;
 412  413                                  zn = zap_name_alloc(zap, mze->mze_name,
 413  414                                      MT_EXACT);
 414  415                                  mze_insert(zap, i, zn->zn_hash);
 415  416                                  zap_name_free(zn);
 416  417                          }
 417  418                  }
 418  419          } else {
 419      -                zap->zap_salt = zap->zap_f.zap_phys->zap_salt;
 420      -                zap->zap_normflags = zap->zap_f.zap_phys->zap_normflags;
      420 +                zap->zap_salt = zap->zap_f_phys->zap_salt;
      421 +                zap->zap_normflags = zap->zap_f_phys->zap_normflags;
 421  422  
 422  423                  ASSERT3U(sizeof (struct zap_leaf_header), ==,
 423  424                      2*ZAP_LEAF_CHUNKSIZE);
 424  425  
 425  426                  /*
 426  427                   * The embedded pointer table should not overlap the
 427  428                   * other members.
 428  429                   */
 429  430                  ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
 430      -                    &zap->zap_f.zap_phys->zap_salt);
      431 +                    &zap->zap_f_phys->zap_salt);
 431  432  
 432  433                  /*
 433  434                   * The embedded pointer table should end at the end of
 434  435                   * the block
 435  436                   */
 436  437                  ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
 437  438                      1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
 438      -                    (uintptr_t)zap->zap_f.zap_phys, ==,
      439 +                    (uintptr_t)zap->zap_f_phys, ==,
 439  440                      zap->zap_dbuf->db_size);
 440  441          }
 441  442          rw_exit(&zap->zap_rwlock);
 442  443          return (zap);
 443  444  }
 444  445  
 445  446  int
 446  447  zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
 447  448      krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
 448  449  {
↓ open down ↓ 9 lines elided ↑ open up ↑
 458  459                  return (err);
 459  460  
 460  461  #ifdef ZFS_DEBUG
 461  462          {
 462  463                  dmu_object_info_t doi;
 463  464                  dmu_object_info_from_db(db, &doi);
 464  465                  ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
 465  466          }
 466  467  #endif
 467  468  
 468      -        zap = dmu_buf_get_user(db);
      469 +        zap = (zap_t *)dmu_buf_get_user(db);
 469  470          if (zap == NULL)
 470  471                  zap = mzap_open(os, obj, db);
 471  472  
 472  473          /*
 473  474           * We're checking zap_ismicro without the lock held, in order to
 474  475           * tell what type of lock we want.  Once we have some sort of
 475  476           * lock, see if it really is the right type.  In practice this
 476  477           * can only be different if it was upgraded from micro to fat,
 477  478           * and micro wanted WRITER but fat only needs READER.
 478  479           */
↓ open down ↓ 190 lines elided ↑ open up ↑
 669  670  {
 670  671          /*
 671  672           * dmu_object_free will free the object number and free the
 672  673           * data.  Freeing the data will cause our pageout function to be
 673  674           * called, which will destroy our data (zap_leaf_t's and zap_t).
 674  675           */
 675  676  
 676  677          return (dmu_object_free(os, zapobj, tx));
 677  678  }
 678  679  
 679      -_NOTE(ARGSUSED(0))
 680  680  void
 681      -zap_evict(dmu_buf_t *db, void *vzap)
      681 +zap_evict(dmu_buf_user_t *dbu)
 682  682  {
 683      -        zap_t *zap = vzap;
      683 +        zap_t *zap = (zap_t *)dbu;
 684  684  
 685  685          rw_destroy(&zap->zap_rwlock);
 686  686  
 687  687          if (zap->zap_ismicro)
 688  688                  mze_destroy(zap);
 689  689          else
 690  690                  mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
 691  691  
 692  692          kmem_free(zap, sizeof (zap_t));
 693  693  }
↓ open down ↓ 237 lines elided ↑ open up ↑
 931  931  {
 932  932          int i;
 933  933          zap_t *zap = zn->zn_zap;
 934  934          int start = zap->zap_m.zap_alloc_next;
 935  935          uint32_t cd;
 936  936  
 937  937          ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
 938  938  
 939  939  #ifdef ZFS_DEBUG
 940  940          for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
 941      -                mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i];
      941 +                mzap_ent_phys_t *mze = &zap->zap_m_phys->mz_chunk[i];
 942  942                  ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0);
 943  943          }
 944  944  #endif
 945  945  
 946  946          cd = mze_find_unused_cd(zap, zn->zn_hash);
 947  947          /* given the limited size of the microzap, this can't happen */
 948  948          ASSERT(cd < zap_maxcd(zap));
 949  949  
 950  950  again:
 951  951          for (i = start; i < zap->zap_m.zap_num_chunks; i++) {
 952      -                mzap_ent_phys_t *mze = &zap->zap_m.zap_phys->mz_chunk[i];
      952 +                mzap_ent_phys_t *mze = &zap->zap_m_phys->mz_chunk[i];
 953  953                  if (mze->mze_name[0] == 0) {
 954  954                          mze->mze_value = value;
 955  955                          mze->mze_cd = cd;
 956  956                          (void) strcpy(mze->mze_name, zn->zn_key_orig);
 957  957                          zap->zap_m.zap_num_entries++;
 958  958                          zap->zap_m.zap_alloc_next = i+1;
 959  959                          if (zap->zap_m.zap_alloc_next ==
 960  960                              zap->zap_m.zap_num_chunks)
 961  961                                  zap->zap_m.zap_alloc_next = 0;
 962  962                          mze_insert(zap, i, zn->zn_hash);
↓ open down ↓ 180 lines elided ↑ open up ↑
1143 1143                  return (SET_ERROR(ENOTSUP));
1144 1144          }
1145 1145          if (!zap->zap_ismicro) {
1146 1146                  err = fzap_remove(zn, tx);
1147 1147          } else {
1148 1148                  mze = mze_find(zn);
1149 1149                  if (mze == NULL) {
1150 1150                          err = SET_ERROR(ENOENT);
1151 1151                  } else {
1152 1152                          zap->zap_m.zap_num_entries--;
1153      -                        bzero(&zap->zap_m.zap_phys->mz_chunk[mze->mze_chunkid],
     1153 +                        bzero(&zap->zap_m_phys->mz_chunk[mze->mze_chunkid],
1154 1154                              sizeof (mzap_ent_phys_t));
1155 1155                          mze_remove(zap, mze);
1156 1156                  }
1157 1157          }
1158 1158          zap_name_free(zn);
1159 1159          zap_unlockdir(zap);
1160 1160          return (err);
1161 1161  }
1162 1162  
1163 1163  int
↓ open down ↓ 292 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX