1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /* Copyright (c) 1990 Mentat Inc. */
  26 /*
  27  * Copyright 2019 Joyent, Inc.
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/inttypes.h>
  32 #include <sys/systm.h>
  33 #include <sys/stream.h>
  34 #include <sys/strsun.h>
  35 #include <sys/strsubr.h>
  36 #include <sys/debug.h>
  37 #include <sys/ddi.h>
  38 #include <sys/vtrace.h>
  39 #include <inet/sctp_crc32.h>
  40 #include <inet/ip.h>
  41 
  42 #include <sys/multidata.h>
  43 #include <sys/multidata_impl.h>
  44 
  45 extern unsigned int     ip_ocsum(ushort_t *address, int halfword_count,
  46     unsigned int sum);
  47 
  48 /*
  49  * Checksum routine for Internet Protocol family headers.
  50  * This routine is very heavily used in the network
  51  * code and should be modified for each CPU to be as fast as possible.
  52  */
  53 
  54 /*
  55  * Even/Odd checks. Usually it is performed on pointers but may be
  56  * used on integers as well. uintptr_t is long enough to hold both
  57  * integer and pointer.
  58  */
  59 #define IS_ODD(p)       (((uintptr_t)(p) & 0x1) != 0)
  60 #define IS_EVEN(p)      (((uintptr_t)(p) & 0x1) == 0)
  61 
  62 /*
  63  */
  64 #define HAS_UIOSUM(mp) ((mp)->b_datap->db_struioflag & STRUIO_IP)
  65 
  66 #ifdef _LITTLE_ENDIAN
  67 #define FRAG(ptr) (*(ptr))
  68 #else
  69 #define FRAG(ptr) (*(ptr) << 8)
  70 #endif
  71 
  72 /*
  73  * Give the compiler a hint to help optimize the code layout
  74  */
  75 #define UNLIKELY(exp) __builtin_expect((exp), 0)
  76 
  77 #define FOLD(val) (((val) & 0xFFFF) + ((val) >> 16))
  78 
  79 /*
  80  * Note: this does not ones-complement the result since it is used
  81  * when computing partial checksums.  It assumes mp->b_rptr + offset is
  82  * 16 bit aligned and a valid offset in mp.
  83  */
  84 unsigned int
  85 ip_cksum(mblk_t *mp, int offset, uint_t initial_sum)
  86 {
  87         const uint_t sum_mask[2] = { 0, UINT_MAX };
  88         uint64_t sum = initial_sum;
  89         uint64_t total_len = 0;
  90         uchar_t *w;
  91         size_t mlen = MBLKL(mp);
  92         uint_t msum, mask;
  93 
  94         ASSERT3S(offset, >=, 0);
  95 
  96         VERIFY(!HAS_UIOSUM(mp));
  97         while (UNLIKELY(offset > mlen)) {
  98                 ASSERT3P(mp->b_cont, !=, NULL);
  99                 mp = mp->b_cont;
 100                 VERIFY(!HAS_UIOSUM(mp));
 101                 offset -= mlen;
 102                 mlen = MBLKL(mp);
 103         }
 104 
 105         /*
 106          * Make sure we start with a folded sum.  Since the initial sum
 107          * is 32 bits, folding twice will always produce a sum <= 0xFFFF
 108          */
 109         sum = FOLD(sum);
 110         sum = FOLD(sum);
 111         ASSERT3U(sum, <=, 0xFFFF);
 112 
 113         while (mp != NULL) {
 114                 w = mp->b_rptr + offset;
 115                 mlen = mp->b_wptr - w;
 116                 offset = 0;
 117 
 118                 ASSERT3P(w, <=, mp->b_wptr);
 119                 VERIFY(!HAS_UIOSUM(mp));
 120 
 121                 if (UNLIKELY(mlen == 0)) {
 122                         mp = mp->b_cont;
 123                         continue;
 124                 }
 125 
 126                 /*
 127                  * ip_ocsum() currently requires a 16-bit aligned address.
 128                  * For unaligned buffers, we first sum the odd byte (and
 129                  * fold if necessary) before calling ip_ocsum(). ip_ocsum()
 130                  * also takes its length in units of 16-bit words.  If
 131                  * we have an odd length, we must also manually add it after
 132                  * computing the main sum (and again fold if necessary).
 133                  *
 134                  * Since ip_ocsum() _should_ be a private per-platform
 135                  * optimized ip cksum implementation (with ip_cksum() being
 136                  * the less-private wrapper around it), a nice future
 137                  * optimization could be to modify ip_ocsum() for each
 138                  * platform to take a 64-bit sum.  This would allow us to
 139                  * only have to fold exactly once before we return --
 140                  * the amount of data we'd need to checksum to overflow 64
 141                  * bits far exceeds the possible size of any mblk chain we
 142                  * could ever have.
 143                  */
 144                 if (UNLIKELY(IS_ODD(w))) {
 145                         sum += FRAG(w);
 146                         w++;
 147 
 148                         --mlen;
 149                         total_len++;
 150 
 151                         if (UNLIKELY(mlen == 0)) {
 152                                 mp = mp->b_cont;
 153                                 continue;
 154                         }
 155                 }
 156 
 157                 /*
 158                  * ip_ocsum() takes the length as the number of half words
 159                  * (i.e. uint16_ts). It returns a result that is already
 160                  * folded (<= 0xFFFF).
 161                  */
 162                 msum = ip_ocsum((ushort_t *)w, mlen / 2, 0);
 163                 ASSERT3U(msum, <=, 0xFFFF);
 164 
 165                 /*
 166                  * We mask the last byte based on the length of data.
 167                  * If the length is odd, we AND with UINT_MAX otherwise
 168                  * we AND with 0 (resulting in 0) and add the result to
 169                  * the mblk_t sum. This effectively gives us:
 170                  *
 171                  * if (IS_ODD(mlen))
 172                  *      msum += FRAG(w + mlen - 1);
 173                  * else
 174                  *      msum += 0;
 175                  *
 176                  * Without incurring a branch.
 177                  */
 178                 mask = sum_mask[IS_ODD(mlen)];
 179                 msum += FRAG(w + mlen - 1) & mask;
 180 
 181                 /*
 182                  * If the data we are checksumming has been split
 183                  * between two mblk_ts along a non-16 bit boundary, that is
 184                  * we have something like:
 185                  *      mblk_t 1: aa bb cc
 186                  *      mblk_t 2: dd ee ff
 187                  * the result must be the same as if we checksummed a
 188                  * single mblk_t with 'aa bb cc dd ee ff'. As can be seen
 189                  * from the example, this situation causes the grouping of
 190                  * the data in the second mblk_t to be offset by a byte.
 191                  * The fix is to byteswap the mblk_t sum before adding it
 192                  * to the final sum. Again, we AND the mblk_t sum with a mask
 193                  * so that either the non-swapped or byteswapped sum is zeroed
 194                  * out and the other one is preserved (depending on the
 195                  * total bytes checksummed so far) and added to the sum.
 196                  *
 197                  * Effectively,
 198                  *
 199                  * if (IS_ODD(total_len))
 200                  *      sum += BSWAP_32(msum);
 201                  * else
 202                  *      sum += msum;
 203                  */
 204                 mask = sum_mask[IS_ODD(total_len)];
 205                 sum += BSWAP_32(msum) & mask;
 206                 sum += msum & ~mask;
 207 
 208                 total_len += mlen;
 209                 mp = mp->b_cont;
 210         }
 211 
 212         /*
 213          * To avoid unnecessary folding, we store the cumulative sum in
 214          * a uint64_t. This means we can always checksum up to 2^56 bytes
 215          * (2^(64-8)) without danger of overflowing.  Since 2^56 is well past
 216          * the petabyte range, and is far beyond the amount of data that
 217          * could every be stored in a single mblk_t chain (for the forseeable
 218          * future), this serves more as a sanity check than anything else.
 219          */
 220         VERIFY3U(total_len, <=, (uint64_t)1 << 56);
 221 
 222         /*
 223          * For a 64-bit sum, we have to fold at most 4 times to
 224          * produce a sum <= 0xFFFF.
 225          */
 226         sum = FOLD(sum);
 227         sum = FOLD(sum);
 228         sum = FOLD(sum);
 229         sum = FOLD(sum);
 230 
 231         TRACE_3(TR_FAC_IP, TR_IP_CKSUM_END,
 232             "ip_cksum_end:(%S) type %d (%X)", "ip_cksum", 1, sum);
 233         return ((unsigned int)sum);
 234 }
 235 
 236 uint32_t
 237 sctp_cksum(mblk_t *mp, int offset)
 238 {
 239         uint32_t crc32;
 240         uchar_t *p = NULL;
 241 
 242         crc32 = 0xFFFFFFFF;
 243         p = mp->b_rptr + offset;
 244         crc32 = sctp_crc32(crc32, p, mp->b_wptr - p);
 245         for (mp = mp->b_cont; mp != NULL; mp = mp->b_cont) {
 246                 crc32 = sctp_crc32(crc32, mp->b_rptr, MBLKL(mp));
 247         }
 248 
 249         /* Complement the result */
 250         crc32 = ~crc32;
 251 
 252         return (crc32);
 253 }
 254 
 255 /*
 256  * Routine to compute Internet checksum (16-bit 1's complement) of a given
 257  * Multidata packet descriptor.  As in the non-Multidata routine, this doesn't
 258  * 1's complement the result, such that it may be used to compute partial
 259  * checksums.  Since it works on buffer spans rather than mblks, this routine
 260  * does not handle existing partial checksum value as in the STRUIO_IP special
 261  * mblk case (supporting this is rather trivial, but is perhaps of no use at
 262  * the moment unless synchronous streams and delayed checksum calculation are
 263  * revived.)
 264  *
 265  * Note also here that the given Multidata packet descriptor must refer to
 266  * a header buffer, i.e. it must have a header fragment.  In addition, the
 267  * offset must lie within the boundary of the header fragment.  For the
 268  * outbound tcp (MDT) case, this will not be an issue because the stack
 269  * ensures that such conditions are met, and that there is no need whatsoever
 270  * to compute partial checksums on an arbitrary offset that is not part of
 271  * the header fragment.  We may need to revisit this routine to handle all
 272  * cases of the inbound (MDR) case, especially when we need to perform partial
 273  * checksum calculation due to padded bytes (non-zeroes) in the frame.
 274  */
 275 uint_t
 276 ip_md_cksum(pdesc_t *pd, int offset, uint_t sum)
 277 {
 278         pdescinfo_t     *pdi = &pd->pd_pdi;
 279         uchar_t         *reg_start, *reg_end;
 280         ssize_t         mlen, i;
 281         ushort_t        *w;
 282         boolean_t       byteleft = B_FALSE;
 283 
 284         ASSERT((pdi->flags & PDESC_HAS_REF) != 0);
 285         ASSERT(pdi->hdr_rptr != NULL && pdi->hdr_wptr != NULL);
 286         ASSERT(offset <= PDESC_HDRL(pdi));
 287 
 288         for (i = 0; i < pdi->pld_cnt + 1; i++) {
 289                 if (i == 0) {
 290                         reg_start = pdi->hdr_rptr;
 291                         reg_end = pdi->hdr_wptr;
 292                 } else {
 293                         reg_start = pdi->pld_ary[i - 1].pld_rptr;
 294                         reg_end = pdi->pld_ary[i - 1].pld_wptr;
 295                         offset = 0;
 296                 }
 297 
 298                 w = (ushort_t *)(reg_start + offset);
 299                 mlen = reg_end - (uchar_t *)w;
 300 
 301                 if (mlen > 0 && byteleft) {
 302                         /*
 303                          * There is a byte left from the last
 304                          * segment; add it into the checksum.
 305                          * Don't have to worry about a carry-
 306                          * out here because we make sure that
 307                          * high part of (32 bit) sum is small
 308                          * below.
 309                          */
 310 #ifdef _LITTLE_ENDIAN
 311                         sum += *(uchar_t *)w << 8;
 312 #else
 313                         sum += *(uchar_t *)w;
 314 #endif
 315                         w = (ushort_t *)((char *)w + 1);
 316                         mlen--;
 317                         byteleft = B_FALSE;
 318                 }
 319 
 320                 if (mlen == 0)
 321                         continue;
 322 
 323                 if (IS_EVEN(w)) {
 324                         sum = ip_ocsum(w, mlen >> 1, sum);
 325                         w += mlen >> 1;
 326                         /*
 327                          * If we had an odd number of bytes,
 328                          * then the last byte goes in the high
 329                          * part of the sum, and we take the
 330                          * first byte to the low part of the sum
 331                          * the next time around the loop.
 332                          */
 333                         if (IS_ODD(mlen)) {
 334 #ifdef _LITTLE_ENDIAN
 335                                 sum += *(uchar_t *)w;
 336 #else
 337                                 sum += *(uchar_t *)w << 8;
 338 #endif
 339                                 byteleft = B_TRUE;
 340                         }
 341                 } else {
 342                         ushort_t swsum;
 343 #ifdef _LITTLE_ENDIAN
 344                         sum += *(uchar_t *)w;
 345 #else
 346                         sum += *(uchar_t *)w << 8;
 347 #endif
 348                         mlen--;
 349                         w = (ushort_t *)(1 + (uintptr_t)w);
 350 
 351                         /* Do a separate checksum and copy operation */
 352                         swsum = ip_ocsum(w, mlen >> 1, 0);
 353                         sum += ((swsum << 8) & 0xffff) | (swsum >> 8);
 354                         w += mlen >> 1;
 355                         /*
 356                          * If we had an even number of bytes,
 357                          * then the last byte goes in the low
 358                          * part of the sum.  Otherwise we had an
 359                          * odd number of bytes and we take the first
 360                          * byte to the low part of the sum the
 361                          * next time around the loop.
 362                          */
 363                         if (IS_ODD(mlen)) {
 364 #ifdef _LITTLE_ENDIAN
 365                                 sum += *(uchar_t *)w << 8;
 366 #else
 367                                 sum += *(uchar_t *)w;
 368 #endif
 369                         } else {
 370                                 byteleft = B_TRUE;
 371                         }
 372                 }
 373         }
 374 
 375         /*
 376          * Add together high and low parts of sum and carry to get cksum.
 377          * Have to be careful to not drop the last carry here.
 378          */
 379         sum = (sum & 0xffff) + (sum >> 16);
 380         sum = (sum & 0xffff) + (sum >> 16);
 381 
 382         return (sum);
 383 }
 384 
 385 /* Return the IP checksum for the IP header at "iph". */
 386 uint16_t
 387 ip_csum_hdr(ipha_t *ipha)
 388 {
 389         uint16_t        *uph;
 390         uint32_t        sum;
 391         int             opt_len;
 392 
 393         opt_len = (ipha->ipha_version_and_hdr_length & 0xF) -
 394             IP_SIMPLE_HDR_LENGTH_IN_WORDS;
 395         uph = (uint16_t *)ipha;
 396         sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] +
 397             uph[5] + uph[6] + uph[7] + uph[8] + uph[9];
 398         if (opt_len > 0) {
 399                 do {
 400                         sum += uph[10];
 401                         sum += uph[11];
 402                         uph += 2;
 403                 } while (--opt_len);
 404         }
 405         sum = (sum & 0xFFFF) + (sum >> 16);
 406         sum = ~(sum + (sum >> 16)) & 0xFFFF;
 407         if (sum == 0xffff)
 408                 sum = 0;
 409         return ((uint16_t)sum);
 410 }