1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 /* 27 * Copyright 2019 Joyent, Inc. 28 */ 29 30 #include <sys/types.h> 31 #include <sys/inttypes.h> 32 #include <sys/systm.h> 33 #include <sys/stream.h> 34 #include <sys/strsun.h> 35 #include <sys/strsubr.h> 36 #include <sys/debug.h> 37 #include <sys/ddi.h> 38 #include <sys/vtrace.h> 39 #include <inet/sctp_crc32.h> 40 #include <inet/ip.h> 41 42 #include <sys/multidata.h> 43 #include <sys/multidata_impl.h> 44 45 extern unsigned int ip_ocsum(ushort_t *address, int halfword_count, 46 unsigned int sum); 47 48 /* 49 * Checksum routine for Internet Protocol family headers. 50 * This routine is very heavily used in the network 51 * code and should be modified for each CPU to be as fast as possible. 52 */ 53 54 /* 55 * Even/Odd checks. Usually it is performed on pointers but may be 56 * used on integers as well. uintptr_t is long enough to hold both 57 * integer and pointer. 58 */ 59 #define IS_ODD(p) (((uintptr_t)(p) & 0x1) != 0) 60 #define IS_EVEN(p) (((uintptr_t)(p) & 0x1) == 0) 61 62 /* 63 */ 64 #define HAS_UIOSUM(mp) ((mp)->b_datap->db_struioflag & STRUIO_IP) 65 66 #ifdef _LITTLE_ENDIAN 67 #define FRAG(ptr) (*(ptr)) 68 #else 69 #define FRAG(ptr) (*(ptr) << 8) 70 #endif 71 72 /* 73 * Give the compiler a hint to help optimize the code layout 74 */ 75 #define UNLIKELY(exp) __builtin_expect((exp), 0) 76 77 #define FOLD(val) (((val) & 0xFFFF) + ((val) >> 16)) 78 79 /* 80 * Note: this does not ones-complement the result since it is used 81 * when computing partial checksums. It assumes mp->b_rptr + offset is 82 * 16 bit aligned and a valid offset in mp. 83 */ 84 unsigned int 85 ip_cksum(mblk_t *mp, int offset, uint_t initial_sum) 86 { 87 const uint_t sum_mask[2] = { 0, UINT_MAX }; 88 uint64_t sum = initial_sum; 89 uint64_t total_len = 0; 90 uchar_t *w; 91 size_t mlen = MBLKL(mp); 92 uint_t msum, mask; 93 94 ASSERT3S(offset, >=, 0); 95 96 VERIFY(!HAS_UIOSUM(mp)); 97 while (UNLIKELY(offset > mlen)) { 98 ASSERT3P(mp->b_cont, !=, NULL); 99 mp = mp->b_cont; 100 VERIFY(!HAS_UIOSUM(mp)); 101 offset -= mlen; 102 mlen = MBLKL(mp); 103 } 104 105 /* 106 * Make sure we start with a folded sum. Since the initial sum 107 * is 32 bits, folding twice will always produce a sum <= 0xFFFF 108 */ 109 sum = FOLD(sum); 110 sum = FOLD(sum); 111 ASSERT3U(sum, <=, 0xFFFF); 112 113 while (mp != NULL) { 114 w = mp->b_rptr + offset; 115 mlen = mp->b_wptr - w; 116 offset = 0; 117 118 ASSERT3P(w, <=, mp->b_wptr); 119 VERIFY(!HAS_UIOSUM(mp)); 120 121 if (UNLIKELY(mlen == 0)) { 122 mp = mp->b_cont; 123 continue; 124 } 125 126 /* 127 * ip_ocsum() currently requires a 16-bit aligned address. 128 * For unaligned buffers, we first sum the odd byte (and 129 * fold if necessary) before calling ip_ocsum(). ip_ocsum() 130 * also takes its length in units of 16-bit words. If 131 * we have an odd length, we must also manually add it after 132 * computing the main sum (and again fold if necessary). 133 * 134 * Since ip_ocsum() _should_ be a private per-platform 135 * optimized ip cksum implementation (with ip_cksum() being 136 * the less-private wrapper around it), a nice future 137 * optimization could be to modify ip_ocsum() for each 138 * platform to take a 64-bit sum. This would allow us to 139 * only have to fold exactly once before we return -- 140 * the amount of data we'd need to checksum to overflow 64 141 * bits far exceeds the possible size of any mblk chain we 142 * could ever have. 143 */ 144 if (UNLIKELY(IS_ODD(w))) { 145 sum += FRAG(w); 146 w++; 147 148 --mlen; 149 total_len++; 150 151 if (UNLIKELY(mlen == 0)) { 152 mp = mp->b_cont; 153 continue; 154 } 155 } 156 157 /* 158 * ip_ocsum() takes the length as the number of half words 159 * (i.e. uint16_ts). It returns a result that is already 160 * folded (<= 0xFFFF). 161 */ 162 msum = ip_ocsum((ushort_t *)w, mlen / 2, 0); 163 ASSERT3U(msum, <=, 0xFFFF); 164 165 /* 166 * We mask the last byte based on the length of data. 167 * If the length is odd, we AND with UINT_MAX otherwise 168 * we AND with 0 (resulting in 0) and add the result to 169 * the mblk_t sum. This effectively gives us: 170 * 171 * if (IS_ODD(mlen)) 172 * msum += FRAG(w + mlen - 1); 173 * else 174 * msum += 0; 175 * 176 * Without incurring a branch. 177 */ 178 mask = sum_mask[IS_ODD(mlen)]; 179 msum += FRAG(w + mlen - 1) & mask; 180 181 /* 182 * If the data we are checksumming has been split 183 * between two mblk_ts along a non-16 bit boundary, that is 184 * we have something like: 185 * mblk_t 1: aa bb cc 186 * mblk_t 2: dd ee ff 187 * the result must be the same as if we checksummed a 188 * single mblk_t with 'aa bb cc dd ee ff'. As can be seen 189 * from the example, this situation causes the grouping of 190 * the data in the second mblk_t to be offset by a byte. 191 * The fix is to byteswap the mblk_t sum before adding it 192 * to the final sum. Again, we AND the mblk_t sum with a mask 193 * so that either the non-swapped or byteswapped sum is zeroed 194 * out and the other one is preserved (depending on the 195 * total bytes checksummed so far) and added to the sum. 196 * 197 * Effectively, 198 * 199 * if (IS_ODD(total_len)) 200 * sum += BSWAP_32(msum); 201 * else 202 * sum += msum; 203 */ 204 mask = sum_mask[IS_ODD(total_len)]; 205 sum += BSWAP_32(msum) & mask; 206 sum += msum & ~mask; 207 208 total_len += mlen; 209 mp = mp->b_cont; 210 } 211 212 /* 213 * To avoid unnecessary folding, we store the cumulative sum in 214 * a uint64_t. This means we can always checksum up to 2^56 bytes 215 * (2^(64-8)) without danger of overflowing. Since 2^56 is well past 216 * the petabyte range, and is far beyond the amount of data that 217 * could every be stored in a single mblk_t chain (for the forseeable 218 * future), this serves more as a sanity check than anything else. 219 */ 220 VERIFY3U(total_len, <=, (uint64_t)1 << 56); 221 222 /* 223 * For a 64-bit sum, we have to fold at most 4 times to 224 * produce a sum <= 0xFFFF. 225 */ 226 sum = FOLD(sum); 227 sum = FOLD(sum); 228 sum = FOLD(sum); 229 sum = FOLD(sum); 230 231 TRACE_3(TR_FAC_IP, TR_IP_CKSUM_END, 232 "ip_cksum_end:(%S) type %d (%X)", "ip_cksum", 1, sum); 233 return ((unsigned int)sum); 234 } 235 236 uint32_t 237 sctp_cksum(mblk_t *mp, int offset) 238 { 239 uint32_t crc32; 240 uchar_t *p = NULL; 241 242 crc32 = 0xFFFFFFFF; 243 p = mp->b_rptr + offset; 244 crc32 = sctp_crc32(crc32, p, mp->b_wptr - p); 245 for (mp = mp->b_cont; mp != NULL; mp = mp->b_cont) { 246 crc32 = sctp_crc32(crc32, mp->b_rptr, MBLKL(mp)); 247 } 248 249 /* Complement the result */ 250 crc32 = ~crc32; 251 252 return (crc32); 253 } 254 255 /* 256 * Routine to compute Internet checksum (16-bit 1's complement) of a given 257 * Multidata packet descriptor. As in the non-Multidata routine, this doesn't 258 * 1's complement the result, such that it may be used to compute partial 259 * checksums. Since it works on buffer spans rather than mblks, this routine 260 * does not handle existing partial checksum value as in the STRUIO_IP special 261 * mblk case (supporting this is rather trivial, but is perhaps of no use at 262 * the moment unless synchronous streams and delayed checksum calculation are 263 * revived.) 264 * 265 * Note also here that the given Multidata packet descriptor must refer to 266 * a header buffer, i.e. it must have a header fragment. In addition, the 267 * offset must lie within the boundary of the header fragment. For the 268 * outbound tcp (MDT) case, this will not be an issue because the stack 269 * ensures that such conditions are met, and that there is no need whatsoever 270 * to compute partial checksums on an arbitrary offset that is not part of 271 * the header fragment. We may need to revisit this routine to handle all 272 * cases of the inbound (MDR) case, especially when we need to perform partial 273 * checksum calculation due to padded bytes (non-zeroes) in the frame. 274 */ 275 uint_t 276 ip_md_cksum(pdesc_t *pd, int offset, uint_t sum) 277 { 278 pdescinfo_t *pdi = &pd->pd_pdi; 279 uchar_t *reg_start, *reg_end; 280 ssize_t mlen, i; 281 ushort_t *w; 282 boolean_t byteleft = B_FALSE; 283 284 ASSERT((pdi->flags & PDESC_HAS_REF) != 0); 285 ASSERT(pdi->hdr_rptr != NULL && pdi->hdr_wptr != NULL); 286 ASSERT(offset <= PDESC_HDRL(pdi)); 287 288 for (i = 0; i < pdi->pld_cnt + 1; i++) { 289 if (i == 0) { 290 reg_start = pdi->hdr_rptr; 291 reg_end = pdi->hdr_wptr; 292 } else { 293 reg_start = pdi->pld_ary[i - 1].pld_rptr; 294 reg_end = pdi->pld_ary[i - 1].pld_wptr; 295 offset = 0; 296 } 297 298 w = (ushort_t *)(reg_start + offset); 299 mlen = reg_end - (uchar_t *)w; 300 301 if (mlen > 0 && byteleft) { 302 /* 303 * There is a byte left from the last 304 * segment; add it into the checksum. 305 * Don't have to worry about a carry- 306 * out here because we make sure that 307 * high part of (32 bit) sum is small 308 * below. 309 */ 310 #ifdef _LITTLE_ENDIAN 311 sum += *(uchar_t *)w << 8; 312 #else 313 sum += *(uchar_t *)w; 314 #endif 315 w = (ushort_t *)((char *)w + 1); 316 mlen--; 317 byteleft = B_FALSE; 318 } 319 320 if (mlen == 0) 321 continue; 322 323 if (IS_EVEN(w)) { 324 sum = ip_ocsum(w, mlen >> 1, sum); 325 w += mlen >> 1; 326 /* 327 * If we had an odd number of bytes, 328 * then the last byte goes in the high 329 * part of the sum, and we take the 330 * first byte to the low part of the sum 331 * the next time around the loop. 332 */ 333 if (IS_ODD(mlen)) { 334 #ifdef _LITTLE_ENDIAN 335 sum += *(uchar_t *)w; 336 #else 337 sum += *(uchar_t *)w << 8; 338 #endif 339 byteleft = B_TRUE; 340 } 341 } else { 342 ushort_t swsum; 343 #ifdef _LITTLE_ENDIAN 344 sum += *(uchar_t *)w; 345 #else 346 sum += *(uchar_t *)w << 8; 347 #endif 348 mlen--; 349 w = (ushort_t *)(1 + (uintptr_t)w); 350 351 /* Do a separate checksum and copy operation */ 352 swsum = ip_ocsum(w, mlen >> 1, 0); 353 sum += ((swsum << 8) & 0xffff) | (swsum >> 8); 354 w += mlen >> 1; 355 /* 356 * If we had an even number of bytes, 357 * then the last byte goes in the low 358 * part of the sum. Otherwise we had an 359 * odd number of bytes and we take the first 360 * byte to the low part of the sum the 361 * next time around the loop. 362 */ 363 if (IS_ODD(mlen)) { 364 #ifdef _LITTLE_ENDIAN 365 sum += *(uchar_t *)w << 8; 366 #else 367 sum += *(uchar_t *)w; 368 #endif 369 } else { 370 byteleft = B_TRUE; 371 } 372 } 373 } 374 375 /* 376 * Add together high and low parts of sum and carry to get cksum. 377 * Have to be careful to not drop the last carry here. 378 */ 379 sum = (sum & 0xffff) + (sum >> 16); 380 sum = (sum & 0xffff) + (sum >> 16); 381 382 return (sum); 383 } 384 385 /* Return the IP checksum for the IP header at "iph". */ 386 uint16_t 387 ip_csum_hdr(ipha_t *ipha) 388 { 389 uint16_t *uph; 390 uint32_t sum; 391 int opt_len; 392 393 opt_len = (ipha->ipha_version_and_hdr_length & 0xF) - 394 IP_SIMPLE_HDR_LENGTH_IN_WORDS; 395 uph = (uint16_t *)ipha; 396 sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] + 397 uph[5] + uph[6] + uph[7] + uph[8] + uph[9]; 398 if (opt_len > 0) { 399 do { 400 sum += uph[10]; 401 sum += uph[11]; 402 uph += 2; 403 } while (--opt_len); 404 } 405 sum = (sum & 0xFFFF) + (sum >> 16); 406 sum = ~(sum + (sum >> 16)) & 0xFFFF; 407 if (sum == 0xffff) 408 sum = 0; 409 return ((uint16_t)sum); 410 }