1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25 /* Copyright (c) 1990 Mentat Inc. */
26 /*
27 * Copyright 2019 Joyent, Inc.
28 */
29
30 #include <sys/types.h>
31 #include <sys/inttypes.h>
32 #include <sys/systm.h>
33 #include <sys/stream.h>
34 #include <sys/strsun.h>
35 #include <sys/strsubr.h>
36 #include <sys/debug.h>
37 #include <sys/ddi.h>
38 #include <sys/vtrace.h>
39 #include <inet/sctp_crc32.h>
40 #include <inet/ip.h>
41
42 #include <sys/multidata.h>
43 #include <sys/multidata_impl.h>
44
45 extern unsigned int ip_ocsum(ushort_t *address, int halfword_count,
46 unsigned int sum);
47
48 /*
49 * Checksum routine for Internet Protocol family headers.
50 * This routine is very heavily used in the network
51 * code and should be modified for each CPU to be as fast as possible.
52 */
53
54 /*
55 * Even/Odd checks. Usually it is performed on pointers but may be
56 * used on integers as well. uintptr_t is long enough to hold both
57 * integer and pointer.
58 */
59 #define IS_ODD(p) (((uintptr_t)(p) & 0x1) != 0)
60 #define IS_EVEN(p) (((uintptr_t)(p) & 0x1) == 0)
61
62 /*
63 */
64 #define HAS_UIOSUM(mp) ((mp)->b_datap->db_struioflag & STRUIO_IP)
65
66 #ifdef _LITTLE_ENDIAN
67 #define FRAG(ptr) (*(ptr))
68 #else
69 #define FRAG(ptr) (*(ptr) << 8)
70 #endif
71
72 /*
73 * Give the compiler a hint to help optimize the code layout
74 */
75 #define UNLIKELY(exp) __builtin_expect((exp), 0)
76
77 #define FOLD(val) (((val) & 0xFFFF) + ((val) >> 16))
78
79 /*
80 * Note: this does not ones-complement the result since it is used
81 * when computing partial checksums. It assumes mp->b_rptr + offset is
82 * 16 bit aligned and a valid offset in mp.
83 */
84 unsigned int
85 ip_cksum(mblk_t *mp, int offset, uint_t initial_sum)
86 {
87 const uint_t sum_mask[2] = { 0, UINT_MAX };
88 uint64_t sum = initial_sum;
89 uint64_t total_len = 0;
90 uchar_t *w;
91 size_t mlen = MBLKL(mp);
92 uint_t msum, mask;
93
94 ASSERT3S(offset, >=, 0);
95
96 VERIFY(!HAS_UIOSUM(mp));
97 while (UNLIKELY(offset > mlen)) {
98 ASSERT3P(mp->b_cont, !=, NULL);
99 mp = mp->b_cont;
100 VERIFY(!HAS_UIOSUM(mp));
101 offset -= mlen;
102 mlen = MBLKL(mp);
103 }
104
105 /*
106 * Make sure we start with a folded sum. Since the initial sum
107 * is 32 bits, folding twice will always produce a sum <= 0xFFFF
108 */
109 sum = FOLD(sum);
110 sum = FOLD(sum);
111 ASSERT3U(sum, <=, 0xFFFF);
112
113 while (mp != NULL) {
114 w = mp->b_rptr + offset;
115 mlen = mp->b_wptr - w;
116 offset = 0;
117
118 ASSERT3P(w, <=, mp->b_wptr);
119 VERIFY(!HAS_UIOSUM(mp));
120
121 if (UNLIKELY(mlen == 0)) {
122 mp = mp->b_cont;
123 continue;
124 }
125
126 /*
127 * ip_ocsum() currently requires a 16-bit aligned address.
128 * For unaligned buffers, we first sum the odd byte (and
129 * fold if necessary) before calling ip_ocsum(). ip_ocsum()
130 * also takes its length in units of 16-bit words. If
131 * we have an odd length, we must also manually add it after
132 * computing the main sum (and again fold if necessary).
133 *
134 * Since ip_ocsum() _should_ be a private per-platform
135 * optimized ip cksum implementation (with ip_cksum() being
136 * the less-private wrapper around it), a nice future
137 * optimization could be to modify ip_ocsum() for each
138 * platform to take a 64-bit sum. This would allow us to
139 * only have to fold exactly once before we return --
140 * the amount of data we'd need to checksum to overflow 64
141 * bits far exceeds the possible size of any mblk chain we
142 * could ever have.
143 */
144 if (UNLIKELY(IS_ODD(w))) {
145 sum += FRAG(w);
146 w++;
147
148 --mlen;
149 total_len++;
150
151 if (UNLIKELY(mlen == 0)) {
152 mp = mp->b_cont;
153 continue;
154 }
155 }
156
157 /*
158 * ip_ocsum() takes the length as the number of half words
159 * (i.e. uint16_ts). It returns a result that is already
160 * folded (<= 0xFFFF).
161 */
162 msum = ip_ocsum((ushort_t *)w, mlen / 2, 0);
163 ASSERT3U(msum, <=, 0xFFFF);
164
165 /*
166 * We mask the last byte based on the length of data.
167 * If the length is odd, we AND with UINT_MAX otherwise
168 * we AND with 0 (resulting in 0) and add the result to
169 * the mblk_t sum. This effectively gives us:
170 *
171 * if (IS_ODD(mlen))
172 * msum += FRAG(w + mlen - 1);
173 * else
174 * msum += 0;
175 *
176 * Without incurring a branch.
177 */
178 mask = sum_mask[IS_ODD(mlen)];
179 msum += FRAG(w + mlen - 1) & mask;
180
181 /*
182 * If the data we are checksumming has been split
183 * between two mblk_ts along a non-16 bit boundary, that is
184 * we have something like:
185 * mblk_t 1: aa bb cc
186 * mblk_t 2: dd ee ff
187 * the result must be the same as if we checksummed a
188 * single mblk_t with 'aa bb cc dd ee ff'. As can be seen
189 * from the example, this situation causes the grouping of
190 * the data in the second mblk_t to be offset by a byte.
191 * The fix is to byteswap the mblk_t sum before adding it
192 * to the final sum. Again, we AND the mblk_t sum with a mask
193 * so that either the non-swapped or byteswapped sum is zeroed
194 * out and the other one is preserved (depending on the
195 * total bytes checksummed so far) and added to the sum.
196 *
197 * Effectively,
198 *
199 * if (IS_ODD(total_len))
200 * sum += BSWAP_32(msum);
201 * else
202 * sum += msum;
203 */
204 mask = sum_mask[IS_ODD(total_len)];
205 sum += BSWAP_32(msum) & mask;
206 sum += msum & ~mask;
207
208 total_len += mlen;
209 mp = mp->b_cont;
210 }
211
212 /*
213 * To avoid unnecessary folding, we store the cumulative sum in
214 * a uint64_t. This means we can always checksum up to 2^56 bytes
215 * (2^(64-8)) without danger of overflowing. Since 2^56 is well past
216 * the petabyte range, and is far beyond the amount of data that
217 * could every be stored in a single mblk_t chain (for the forseeable
218 * future), this serves more as a sanity check than anything else.
219 */
220 VERIFY3U(total_len, <=, (uint64_t)1 << 56);
221
222 /*
223 * For a 64-bit sum, we have to fold at most 4 times to
224 * produce a sum <= 0xFFFF.
225 */
226 sum = FOLD(sum);
227 sum = FOLD(sum);
228 sum = FOLD(sum);
229 sum = FOLD(sum);
230
231 TRACE_3(TR_FAC_IP, TR_IP_CKSUM_END,
232 "ip_cksum_end:(%S) type %d (%X)", "ip_cksum", 1, sum);
233 return ((unsigned int)sum);
234 }
235
236 uint32_t
237 sctp_cksum(mblk_t *mp, int offset)
238 {
239 uint32_t crc32;
240 uchar_t *p = NULL;
241
242 crc32 = 0xFFFFFFFF;
243 p = mp->b_rptr + offset;
244 crc32 = sctp_crc32(crc32, p, mp->b_wptr - p);
245 for (mp = mp->b_cont; mp != NULL; mp = mp->b_cont) {
246 crc32 = sctp_crc32(crc32, mp->b_rptr, MBLKL(mp));
247 }
248
249 /* Complement the result */
250 crc32 = ~crc32;
251
252 return (crc32);
253 }
254
255 /*
256 * Routine to compute Internet checksum (16-bit 1's complement) of a given
257 * Multidata packet descriptor. As in the non-Multidata routine, this doesn't
258 * 1's complement the result, such that it may be used to compute partial
259 * checksums. Since it works on buffer spans rather than mblks, this routine
260 * does not handle existing partial checksum value as in the STRUIO_IP special
261 * mblk case (supporting this is rather trivial, but is perhaps of no use at
262 * the moment unless synchronous streams and delayed checksum calculation are
263 * revived.)
264 *
265 * Note also here that the given Multidata packet descriptor must refer to
266 * a header buffer, i.e. it must have a header fragment. In addition, the
267 * offset must lie within the boundary of the header fragment. For the
268 * outbound tcp (MDT) case, this will not be an issue because the stack
269 * ensures that such conditions are met, and that there is no need whatsoever
270 * to compute partial checksums on an arbitrary offset that is not part of
271 * the header fragment. We may need to revisit this routine to handle all
272 * cases of the inbound (MDR) case, especially when we need to perform partial
273 * checksum calculation due to padded bytes (non-zeroes) in the frame.
274 */
275 uint_t
276 ip_md_cksum(pdesc_t *pd, int offset, uint_t sum)
277 {
278 pdescinfo_t *pdi = &pd->pd_pdi;
279 uchar_t *reg_start, *reg_end;
280 ssize_t mlen, i;
281 ushort_t *w;
282 boolean_t byteleft = B_FALSE;
283
284 ASSERT((pdi->flags & PDESC_HAS_REF) != 0);
285 ASSERT(pdi->hdr_rptr != NULL && pdi->hdr_wptr != NULL);
286 ASSERT(offset <= PDESC_HDRL(pdi));
287
288 for (i = 0; i < pdi->pld_cnt + 1; i++) {
289 if (i == 0) {
290 reg_start = pdi->hdr_rptr;
291 reg_end = pdi->hdr_wptr;
292 } else {
293 reg_start = pdi->pld_ary[i - 1].pld_rptr;
294 reg_end = pdi->pld_ary[i - 1].pld_wptr;
295 offset = 0;
296 }
297
298 w = (ushort_t *)(reg_start + offset);
299 mlen = reg_end - (uchar_t *)w;
300
301 if (mlen > 0 && byteleft) {
302 /*
303 * There is a byte left from the last
304 * segment; add it into the checksum.
305 * Don't have to worry about a carry-
306 * out here because we make sure that
307 * high part of (32 bit) sum is small
308 * below.
309 */
310 #ifdef _LITTLE_ENDIAN
311 sum += *(uchar_t *)w << 8;
312 #else
313 sum += *(uchar_t *)w;
314 #endif
315 w = (ushort_t *)((char *)w + 1);
316 mlen--;
317 byteleft = B_FALSE;
318 }
319
320 if (mlen == 0)
321 continue;
322
323 if (IS_EVEN(w)) {
324 sum = ip_ocsum(w, mlen >> 1, sum);
325 w += mlen >> 1;
326 /*
327 * If we had an odd number of bytes,
328 * then the last byte goes in the high
329 * part of the sum, and we take the
330 * first byte to the low part of the sum
331 * the next time around the loop.
332 */
333 if (IS_ODD(mlen)) {
334 #ifdef _LITTLE_ENDIAN
335 sum += *(uchar_t *)w;
336 #else
337 sum += *(uchar_t *)w << 8;
338 #endif
339 byteleft = B_TRUE;
340 }
341 } else {
342 ushort_t swsum;
343 #ifdef _LITTLE_ENDIAN
344 sum += *(uchar_t *)w;
345 #else
346 sum += *(uchar_t *)w << 8;
347 #endif
348 mlen--;
349 w = (ushort_t *)(1 + (uintptr_t)w);
350
351 /* Do a separate checksum and copy operation */
352 swsum = ip_ocsum(w, mlen >> 1, 0);
353 sum += ((swsum << 8) & 0xffff) | (swsum >> 8);
354 w += mlen >> 1;
355 /*
356 * If we had an even number of bytes,
357 * then the last byte goes in the low
358 * part of the sum. Otherwise we had an
359 * odd number of bytes and we take the first
360 * byte to the low part of the sum the
361 * next time around the loop.
362 */
363 if (IS_ODD(mlen)) {
364 #ifdef _LITTLE_ENDIAN
365 sum += *(uchar_t *)w << 8;
366 #else
367 sum += *(uchar_t *)w;
368 #endif
369 } else {
370 byteleft = B_TRUE;
371 }
372 }
373 }
374
375 /*
376 * Add together high and low parts of sum and carry to get cksum.
377 * Have to be careful to not drop the last carry here.
378 */
379 sum = (sum & 0xffff) + (sum >> 16);
380 sum = (sum & 0xffff) + (sum >> 16);
381
382 return (sum);
383 }
384
385 /* Return the IP checksum for the IP header at "iph". */
386 uint16_t
387 ip_csum_hdr(ipha_t *ipha)
388 {
389 uint16_t *uph;
390 uint32_t sum;
391 int opt_len;
392
393 opt_len = (ipha->ipha_version_and_hdr_length & 0xF) -
394 IP_SIMPLE_HDR_LENGTH_IN_WORDS;
395 uph = (uint16_t *)ipha;
396 sum = uph[0] + uph[1] + uph[2] + uph[3] + uph[4] +
397 uph[5] + uph[6] + uph[7] + uph[8] + uph[9];
398 if (opt_len > 0) {
399 do {
400 sum += uph[10];
401 sum += uph[11];
402 uph += 2;
403 } while (--opt_len);
404 }
405 sum = (sum & 0xFFFF) + (sum >> 16);
406 sum = ~(sum + (sum >> 16)) & 0xFFFF;
407 if (sum == 0xffff)
408 sum = 0;
409 return ((uint16_t)sum);
410 }