Print this page
4185 New hash algorithm support

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/zio_checksum.c
          +++ new/usr/src/uts/common/fs/zfs/zio_checksum.c
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2013 by Delphix. All rights reserved.
  24   24   * Copyright (c) 2013, Joyent, Inc. All rights reserved.
       25 + * Copyright 2013 Saso Kiselkov. All rights reserved.
  25   26   */
  26   27  
  27   28  #include <sys/zfs_context.h>
  28   29  #include <sys/spa.h>
       30 +#include <sys/spa_impl.h>
  29   31  #include <sys/zio.h>
  30   32  #include <sys/zio_checksum.h>
  31   33  #include <sys/zil.h>
  32   34  #include <zfs_fletcher.h>
  33   35  
  34   36  /*
  35   37   * Checksum vectors.
  36   38   *
  37   39   * In the SPA, everything is checksummed.  We support checksum vectors
  38   40   * for three distinct reasons:
↓ open down ↓ 13 lines elided ↑ open up ↑
  52   54   *      we want the ability to take advantage of that hardware.
  53   55   *
  54   56   * Of course, we don't want a checksum upgrade to invalidate existing
  55   57   * data, so we store the checksum *function* in eight bits of the bp.
  56   58   * This gives us room for up to 256 different checksum functions.
  57   59   *
  58   60   * When writing a block, we always checksum it with the latest-and-greatest
  59   61   * checksum function of the appropriate strength.  When reading a block,
  60   62   * we compare the expected checksum against the actual checksum, which we
  61   63   * compute via the checksum function specified by BP_GET_CHECKSUM(bp).
       64 + *
       65 + * SALTED CHECKSUMS
       66 + *
       67 + * To enable the use of non-cryptographically secure hash algorithms in
       68 + * dedup we introduce the notion of salted checksums (MACs, really). A salted
       69 + * checksum is fed both a random 256-bit value (the salt) and the data to be
       70 + * checksummed. This salt is kept secret (stored on the pool, but never shown
       71 + * to the user), thus even if an attacker knew of collision weaknesses in the
       72 + * hash algorithm, they won't be able to mount a known plaintext attack on
       73 + * the DDT, since the actual hash value cannot be known ahead of time. How
       74 + * the salt is used is algorithm-specific (some might simply prefix it to the
       75 + * data block, others might need to utilize a full-blown HMAC). On disk the
       76 + * salt is stored in a ZAP object in the MOS (DMU_POOL_CHECKSUM_SALT).
       77 + *
       78 + * CONTEXT TEMPLATES
       79 + *
       80 + * Some hashing algorithms need to perform a substantial amount of
       81 + * initialization work (e.g. salted checksums above may need to pre-hash the
       82 + * salt) before being able to process data. Performing this redundant work
       83 + * for each block would be very wasteful, so we instead allow a checksum
       84 + * algorithm to do the work once (the first time it's used) and then keep
       85 + * this pre-initialized context as a template inside the spa_t
       86 + * (spa_cksum_tmpls). If the zio_checksum_info_t contains non-NULL
       87 + * ci_tmpl_init and ci_tmpl_free callbacks, they are used to construct and
       88 + * destruct the pre-initialized checksum context. The pre-initialized
       89 + * context is then reused during each checksum invocation and passed to the
       90 + * checksum function.
  62   91   */
  63   92  
  64   93  /*ARGSUSED*/
  65   94  static void
  66      -zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp)
       95 +zio_checksum_off(const void *buf, uint64_t size, const zio_cksum_salt_t *salt,
       96 +    const void *ctx_template, zio_cksum_t *zcp)
  67   97  {
  68   98          ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
  69   99  }
  70  100  
  71  101  zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
  72      -        {{NULL,                 NULL},                  0, 0, 0, "inherit"},
  73      -        {{NULL,                 NULL},                  0, 0, 0, "on"},
  74      -        {{zio_checksum_off,     zio_checksum_off},      0, 0, 0, "off"},
  75      -        {{zio_checksum_SHA256,  zio_checksum_SHA256},   1, 1, 0, "label"},
  76      -        {{zio_checksum_SHA256,  zio_checksum_SHA256},   1, 1, 0, "gang_header"},
  77      -        {{fletcher_2_native,    fletcher_2_byteswap},   0, 1, 0, "zilog"},
  78      -        {{fletcher_2_native,    fletcher_2_byteswap},   0, 0, 0, "fletcher2"},
  79      -        {{fletcher_4_native,    fletcher_4_byteswap},   1, 0, 0, "fletcher4"},
  80      -        {{zio_checksum_SHA256,  zio_checksum_SHA256},   1, 0, 1, "sha256"},
  81      -        {{fletcher_4_native,    fletcher_4_byteswap},   0, 1, 0, "zilog2"},
  82      -        {{zio_checksum_off,     zio_checksum_off},      0, 0, 0, "noparity"},
      102 +        {{NULL, NULL}, NULL, NULL, 0, 0, 0, 0, "inherit"},
      103 +        {{NULL, NULL}, NULL, NULL, 0, 0, 0, 0, "on"},
      104 +        {{zio_checksum_off,             zio_checksum_off},
      105 +            NULL, NULL, 0, 0, 0, 0, "off"},
      106 +        {{zio_checksum_SHA256,          zio_checksum_SHA256},
      107 +            NULL, NULL, 1, 1, 0, 0, "label"},
      108 +        {{zio_checksum_SHA256,          zio_checksum_SHA256},
      109 +            NULL, NULL, 1, 1, 0, 0, "gang_header"},
      110 +        {{fletcher_2_native,            fletcher_2_byteswap},
      111 +            NULL, NULL, 0, 1, 0, 0, "zilog"},
      112 +        {{fletcher_2_native,            fletcher_2_byteswap},
      113 +            NULL, NULL, 0, 0, 0, 0, "fletcher2"},
      114 +        {{fletcher_4_native,            fletcher_4_byteswap},
      115 +            NULL, NULL, 1, 0, 0, 0, "fletcher4"},
      116 +        {{zio_checksum_SHA256,          zio_checksum_SHA256},
      117 +            NULL, NULL, 1, 0, 1, 0, "sha256"},
      118 +        {{fletcher_4_native,            fletcher_4_byteswap},
      119 +            NULL, NULL, 0, 1, 0, 0, "zilog2"},
      120 +        {{zio_checksum_off,             zio_checksum_off},
      121 +            NULL, NULL, 0, 0, 0, 0, "noparity"},
      122 +        {{zio_checksum_SHA512_native, zio_checksum_SHA512_byteswap},
      123 +            NULL, NULL, 1, 0, 1, 0, "sha512"},
      124 +        {{zio_checksum_skein_native,    zio_checksum_skein_byteswap},
      125 +            zio_checksum_skein_tmpl_init, zio_checksum_skein_tmpl_free,
      126 +            1, 0, 1, 1, "skein"},
      127 +        {{zio_checksum_edonr_native,    zio_checksum_edonr_byteswap},
      128 +            zio_checksum_edonr_tmpl_init, zio_checksum_edonr_tmpl_free,
      129 +            1, 0, 1, 1, "edonr"}
  83  130  };
  84  131  
  85  132  enum zio_checksum
  86  133  zio_checksum_select(enum zio_checksum child, enum zio_checksum parent)
  87  134  {
  88  135          ASSERT(child < ZIO_CHECKSUM_FUNCTIONS);
  89  136          ASSERT(parent < ZIO_CHECKSUM_FUNCTIONS);
  90  137          ASSERT(parent != ZIO_CHECKSUM_INHERIT && parent != ZIO_CHECKSUM_ON);
  91  138  
  92  139          if (child == ZIO_CHECKSUM_INHERIT)
↓ open down ↓ 48 lines elided ↑ open up ↑
 141  188   * The vdev is implicit, and the txg is unknowable at pool open time --
 142  189   * hence the logic in vdev_uberblock_load() to find the most recent copy.
 143  190   */
 144  191  static void
 145  192  zio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset)
 146  193  {
 147  194          ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0);
 148  195  }
 149  196  
 150  197  /*
      198 + * Calls the template init function of a checksum which supports context
      199 + * templates and installs the template into the spa_t.
      200 + */
      201 +static void
      202 +zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa)
      203 +{
      204 +        zio_checksum_info_t *ci = &zio_checksum_table[checksum];
      205 +
      206 +        VERIFY(ci->ci_tmpl_init != NULL && ci->ci_tmpl_free != NULL);
      207 +        mutex_enter(&spa->spa_cksum_tmpls_lock);
      208 +        if (spa->spa_cksum_tmpls[checksum] == NULL) {
      209 +                spa->spa_cksum_tmpls[checksum] =
      210 +                    ci->ci_tmpl_init(&spa->spa_cksum_salt);
      211 +                VERIFY(spa->spa_cksum_tmpls[checksum] != NULL);
      212 +        }
      213 +        mutex_exit(&spa->spa_cksum_tmpls_lock);
      214 +}
      215 +
      216 +/*
 151  217   * Generate the checksum.
 152  218   */
 153  219  void
 154  220  zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
 155  221          void *data, uint64_t size)
 156  222  {
 157  223          blkptr_t *bp = zio->io_bp;
 158  224          uint64_t offset = zio->io_offset;
 159  225          zio_checksum_info_t *ci = &zio_checksum_table[checksum];
 160  226          zio_cksum_t cksum;
      227 +        spa_t *spa = zio->io_spa;
 161  228  
 162  229          ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS);
 163  230          ASSERT(ci->ci_func[0] != NULL);
 164  231  
      232 +        if (ci->ci_tmpl_init != NULL && spa->spa_cksum_tmpls[checksum] == NULL)
      233 +                zio_checksum_template_init(checksum, spa);
      234 +
 165  235          if (ci->ci_eck) {
 166  236                  zio_eck_t *eck;
 167  237  
 168  238                  if (checksum == ZIO_CHECKSUM_ZILOG2) {
 169  239                          zil_chain_t *zilc = data;
 170  240  
 171  241                          size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ,
 172  242                              uint64_t);
 173  243                          eck = &zilc->zc_eck;
 174  244                  } else {
 175  245                          eck = (zio_eck_t *)((char *)data + size) - 1;
 176  246                  }
 177  247                  if (checksum == ZIO_CHECKSUM_GANG_HEADER)
 178  248                          zio_checksum_gang_verifier(&eck->zec_cksum, bp);
 179  249                  else if (checksum == ZIO_CHECKSUM_LABEL)
 180  250                          zio_checksum_label_verifier(&eck->zec_cksum, offset);
 181  251                  else
 182  252                          bp->blk_cksum = eck->zec_cksum;
 183  253                  eck->zec_magic = ZEC_MAGIC;
 184      -                ci->ci_func[0](data, size, &cksum);
      254 +                ci->ci_func[0](data, size, &spa->spa_cksum_salt,
      255 +                    spa->spa_cksum_tmpls[checksum], &cksum);
 185  256                  eck->zec_cksum = cksum;
 186  257          } else {
 187      -                ci->ci_func[0](data, size, &bp->blk_cksum);
      258 +                ci->ci_func[0](data, size, &spa->spa_cksum_salt,
      259 +                    spa->spa_cksum_tmpls[checksum], &bp->blk_cksum);
 188  260          }
 189  261  }
 190  262  
 191  263  int
 192  264  zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info)
 193  265  {
 194  266          blkptr_t *bp = zio->io_bp;
 195  267          uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum :
 196  268              (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp)));
 197  269          int byteswap;
 198  270          int error;
 199  271          uint64_t size = (bp == NULL ? zio->io_size :
 200  272              (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp)));
 201  273          uint64_t offset = zio->io_offset;
 202  274          void *data = zio->io_data;
 203  275          zio_checksum_info_t *ci = &zio_checksum_table[checksum];
 204  276          zio_cksum_t actual_cksum, expected_cksum, verifier;
      277 +        spa_t *spa = zio->io_spa;
 205  278  
 206  279          if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
 207  280                  return (SET_ERROR(EINVAL));
 208  281  
      282 +        if (ci->ci_tmpl_init != NULL && spa->spa_cksum_tmpls[checksum] == NULL)
      283 +                zio_checksum_template_init(checksum, spa);
      284 +
 209  285          if (ci->ci_eck) {
 210  286                  zio_eck_t *eck;
 211  287  
 212  288                  if (checksum == ZIO_CHECKSUM_ZILOG2) {
 213  289                          zil_chain_t *zilc = data;
 214  290                          uint64_t nused;
 215  291  
 216  292                          eck = &zilc->zc_eck;
 217  293                          if (eck->zec_magic == ZEC_MAGIC)
 218  294                                  nused = zilc->zc_nused;
↓ open down ↓ 17 lines elided ↑ open up ↑
 236  312                  else
 237  313                          verifier = bp->blk_cksum;
 238  314  
 239  315                  byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC));
 240  316  
 241  317                  if (byteswap)
 242  318                          byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
 243  319  
 244  320                  expected_cksum = eck->zec_cksum;
 245  321                  eck->zec_cksum = verifier;
 246      -                ci->ci_func[byteswap](data, size, &actual_cksum);
      322 +                ci->ci_func[byteswap](data, size, &spa->spa_cksum_salt,
      323 +                    spa->spa_cksum_tmpls[checksum], &actual_cksum);
 247  324                  eck->zec_cksum = expected_cksum;
 248  325  
 249  326                  if (byteswap)
 250  327                          byteswap_uint64_array(&expected_cksum,
 251  328                              sizeof (zio_cksum_t));
 252  329          } else {
 253  330                  ASSERT(!BP_IS_GANG(bp));
 254  331                  byteswap = BP_SHOULD_BYTESWAP(bp);
 255  332                  expected_cksum = bp->blk_cksum;
 256      -                ci->ci_func[byteswap](data, size, &actual_cksum);
      333 +                ci->ci_func[byteswap](data, size, &spa->spa_cksum_salt,
      334 +                    spa->spa_cksum_tmpls[checksum], &actual_cksum);
 257  335          }
 258  336  
 259  337          info->zbc_expected = expected_cksum;
 260  338          info->zbc_actual = actual_cksum;
 261  339          info->zbc_checksum_name = ci->ci_name;
 262  340          info->zbc_byteswapped = byteswap;
 263  341          info->zbc_injected = 0;
 264  342          info->zbc_has_cksum = 1;
 265  343  
 266  344          if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
 267  345                  return (SET_ERROR(ECKSUM));
 268  346  
 269  347          if (zio_injection_enabled && !zio->io_error &&
 270  348              (error = zio_handle_fault_injection(zio, ECKSUM)) != 0) {
 271  349  
 272  350                  info->zbc_injected = 1;
 273  351                  return (error);
 274  352          }
 275  353  
 276  354          return (0);
      355 +}
      356 +
      357 +/*
      358 + * Called by a spa_t that's about to be deallocated. This steps through
      359 + * all of the checksum context templates and deallocates any that were
      360 + * initialized using the algorithm-specific template init function.
      361 + */
      362 +void
      363 +zio_checksum_templates_free(spa_t *spa)
      364 +{
      365 +        for (int checksum = 0; checksum < ZIO_CHECKSUM_FUNCTIONS; checksum++) {
      366 +                if (spa->spa_cksum_tmpls[checksum] != NULL) {
      367 +                        zio_checksum_info_t *ci = &zio_checksum_table[checksum];
      368 +
      369 +                        VERIFY(ci->ci_tmpl_free != NULL);
      370 +                        ci->ci_tmpl_free(spa->spa_cksum_tmpls[checksum]);
      371 +                        spa->spa_cksum_tmpls[checksum] = NULL;
      372 +                }
      373 +        }
 277  374  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX