Print this page
3742 zfs comments need cleaner, more consistent style
Submitted by:   Will Andrews <willa@spectralogic.com>
Submitted by:   Alan Somers <alans@spectralogic.com>
Reviewed by:    Matthew Ahrens <mahrens@delphix.com>
Reviewed by:    George Wilson <george.wilson@delphix.com>
Reviewed by:    Eric Schrock <eric.schrock@delphix.com>


  43  * Fault-Tolerance in RAID-like Systems" by James S. Plank on which the
  44  * former is also based. The latter is designed to provide higher performance
  45  * for writes.
  46  *
  47  * Note that the Plank paper claimed to support arbitrary N+M, but was then
  48  * amended six years later identifying a critical flaw that invalidates its
  49  * claims. Nevertheless, the technique can be adapted to work for up to
  50  * triple parity. For additional parity, the amendment "Note: Correction to
  51  * the 1997 Tutorial on Reed-Solomon Coding" by James S. Plank and Ying Ding
  52  * is viable, but the additional complexity means that write performance will
  53  * suffer.
  54  *
  55  * All of the methods above operate on a Galois field, defined over the
  56  * integers mod 2^N. In our case we choose N=8 for GF(8) so that all elements
  57  * can be expressed with a single byte. Briefly, the operations on the
  58  * field are defined as follows:
  59  *
  60  *   o addition (+) is represented by a bitwise XOR
  61  *   o subtraction (-) is therefore identical to addition: A + B = A - B
  62  *   o multiplication of A by 2 is defined by the following bitwise expression:

  63  *      (A * 2)_7 = A_6
  64  *      (A * 2)_6 = A_5
  65  *      (A * 2)_5 = A_4
  66  *      (A * 2)_4 = A_3 + A_7
  67  *      (A * 2)_3 = A_2 + A_7
  68  *      (A * 2)_2 = A_1 + A_7
  69  *      (A * 2)_1 = A_0
  70  *      (A * 2)_0 = A_7
  71  *
  72  * In C, multiplying by 2 is therefore ((a << 1) ^ ((a & 0x80) ? 0x1d : 0)).
  73  * As an aside, this multiplication is derived from the error correcting
  74  * primitive polynomial x^8 + x^4 + x^3 + x^2 + 1.
  75  *
  76  * Observe that any number in the field (except for 0) can be expressed as a
  77  * power of 2 -- a generator for the field. We store a table of the powers of
  78  * 2 and logs base 2 for quick look ups, and exploit the fact that A * B can
  79  * be rewritten as 2^(log_2(A) + log_2(B)) (where '+' is normal addition rather
  80  * than field addition). The inverse of a field element A (A^-1) is therefore
  81  * A ^ (255 - 1) = A^254.
  82  *


 141  */
 142 #define VDEV_RAIDZ_64MUL_2(x, mask) \
 143 { \
 144         (mask) = (x) & 0x8080808080808080ULL; \
 145         (mask) = ((mask) << 1) - ((mask) >> 7); \
 146         (x) = (((x) << 1) & 0xfefefefefefefefeULL) ^ \
 147             ((mask) & 0x1d1d1d1d1d1d1d1d); \
 148 }
 149 
 150 #define VDEV_RAIDZ_64MUL_4(x, mask) \
 151 { \
 152         VDEV_RAIDZ_64MUL_2((x), mask); \
 153         VDEV_RAIDZ_64MUL_2((x), mask); \
 154 }
 155 
 156 /*
 157  * Force reconstruction to use the general purpose method.
 158  */
 159 int vdev_raidz_default_to_general;
 160 
 161 /*
 162  * These two tables represent powers and logs of 2 in the Galois field defined
 163  * above. These values were computed by repeatedly multiplying by 2 as above.
 164  */
 165 static const uint8_t vdev_raidz_pow2[256] = {
 166         0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
 167         0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
 168         0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
 169         0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
 170         0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
 171         0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
 172         0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
 173         0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
 174         0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
 175         0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
 176         0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
 177         0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
 178         0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
 179         0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
 180         0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
 181         0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
 182         0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
 183         0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
 184         0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
 185         0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
 186         0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
 187         0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
 188         0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
 189         0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
 190         0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
 191         0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
 192         0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
 193         0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
 194         0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
 195         0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
 196         0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
 197         0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
 198 };

 199 static const uint8_t vdev_raidz_log2[256] = {
 200         0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
 201         0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
 202         0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
 203         0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
 204         0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
 205         0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
 206         0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
 207         0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
 208         0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
 209         0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
 210         0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
 211         0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
 212         0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
 213         0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
 214         0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
 215         0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
 216         0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
 217         0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
 218         0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,




  43  * Fault-Tolerance in RAID-like Systems" by James S. Plank on which the
  44  * former is also based. The latter is designed to provide higher performance
  45  * for writes.
  46  *
  47  * Note that the Plank paper claimed to support arbitrary N+M, but was then
  48  * amended six years later identifying a critical flaw that invalidates its
  49  * claims. Nevertheless, the technique can be adapted to work for up to
  50  * triple parity. For additional parity, the amendment "Note: Correction to
  51  * the 1997 Tutorial on Reed-Solomon Coding" by James S. Plank and Ying Ding
  52  * is viable, but the additional complexity means that write performance will
  53  * suffer.
  54  *
  55  * All of the methods above operate on a Galois field, defined over the
  56  * integers mod 2^N. In our case we choose N=8 for GF(8) so that all elements
  57  * can be expressed with a single byte. Briefly, the operations on the
  58  * field are defined as follows:
  59  *
  60  *   o addition (+) is represented by a bitwise XOR
  61  *   o subtraction (-) is therefore identical to addition: A + B = A - B
  62  *   o multiplication of A by 2 is defined by the following bitwise expression:
  63  *
  64  *      (A * 2)_7 = A_6
  65  *      (A * 2)_6 = A_5
  66  *      (A * 2)_5 = A_4
  67  *      (A * 2)_4 = A_3 + A_7
  68  *      (A * 2)_3 = A_2 + A_7
  69  *      (A * 2)_2 = A_1 + A_7
  70  *      (A * 2)_1 = A_0
  71  *      (A * 2)_0 = A_7
  72  *
  73  * In C, multiplying by 2 is therefore ((a << 1) ^ ((a & 0x80) ? 0x1d : 0)).
  74  * As an aside, this multiplication is derived from the error correcting
  75  * primitive polynomial x^8 + x^4 + x^3 + x^2 + 1.
  76  *
  77  * Observe that any number in the field (except for 0) can be expressed as a
  78  * power of 2 -- a generator for the field. We store a table of the powers of
  79  * 2 and logs base 2 for quick look ups, and exploit the fact that A * B can
  80  * be rewritten as 2^(log_2(A) + log_2(B)) (where '+' is normal addition rather
  81  * than field addition). The inverse of a field element A (A^-1) is therefore
  82  * A ^ (255 - 1) = A^254.
  83  *


 142  */
 143 #define VDEV_RAIDZ_64MUL_2(x, mask) \
 144 { \
 145         (mask) = (x) & 0x8080808080808080ULL; \
 146         (mask) = ((mask) << 1) - ((mask) >> 7); \
 147         (x) = (((x) << 1) & 0xfefefefefefefefeULL) ^ \
 148             ((mask) & 0x1d1d1d1d1d1d1d1d); \
 149 }
 150 
 151 #define VDEV_RAIDZ_64MUL_4(x, mask) \
 152 { \
 153         VDEV_RAIDZ_64MUL_2((x), mask); \
 154         VDEV_RAIDZ_64MUL_2((x), mask); \
 155 }
 156 
 157 /*
 158  * Force reconstruction to use the general purpose method.
 159  */
 160 int vdev_raidz_default_to_general;
 161 
 162 /* Powers of 2 in the Galois field defined above. */



 163 static const uint8_t vdev_raidz_pow2[256] = {
 164         0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
 165         0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
 166         0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
 167         0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
 168         0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
 169         0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
 170         0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
 171         0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
 172         0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
 173         0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
 174         0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
 175         0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
 176         0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
 177         0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
 178         0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
 179         0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
 180         0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
 181         0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
 182         0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
 183         0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
 184         0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
 185         0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
 186         0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
 187         0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
 188         0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
 189         0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
 190         0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
 191         0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
 192         0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
 193         0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
 194         0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
 195         0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
 196 };
 197 /* Logs of 2 in the Galois field defined above. */
 198 static const uint8_t vdev_raidz_log2[256] = {
 199         0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
 200         0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
 201         0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
 202         0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
 203         0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
 204         0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
 205         0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
 206         0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
 207         0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
 208         0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
 209         0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
 210         0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
 211         0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
 212         0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
 213         0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
 214         0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
 215         0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
 216         0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
 217         0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,