Print this page
3903 DTrace SCTP Provider
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/sctp/sctp_output.c
+++ new/usr/src/uts/common/inet/sctp/sctp_output.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 #include <sys/types.h>
27 27 #include <sys/systm.h>
28 28 #include <sys/stream.h>
29 29 #include <sys/cmn_err.h>
30 30 #define _SUN_TPI_VERSION 2
31 31 #include <sys/tihdr.h>
32 32 #include <sys/socket.h>
33 33 #include <sys/stropts.h>
34 34 #include <sys/strsun.h>
35 35 #include <sys/strsubr.h>
36 36 #include <sys/socketvar.h>
37 37 #include <inet/common.h>
38 38 #include <inet/mi.h>
39 39 #include <inet/ip.h>
40 40 #include <inet/ip_ire.h>
41 41 #include <inet/ip6.h>
42 42 #include <inet/sctp_ip.h>
43 43 #include <inet/ipclassifier.h>
44 44
45 45 /*
46 46 * PR-SCTP comments.
47 47 *
48 48 * A message can expire before it gets to the transmit list (i.e. it is still
49 49 * in the unsent list - unchunked), after it gets to the transmit list, but
50 50 * before transmission has actually started, or after transmission has begun.
51 51 * Accordingly, we check for the status of a message in sctp_chunkify() when
52 52 * the message is being transferred from the unsent list to the transmit list;
53 53 * in sctp_get_msg_to_send(), when we get the next chunk from the transmit
54 54 * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted.
55 55 * When we nuke a message in sctp_chunkify(), all we need to do is take it
56 56 * out of the unsent list and update sctp_unsent; when a message is deemed
57 57 * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit
58 58 * list, update sctp_unsent IFF transmission for the message has not yet begun
59 59 * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the
60 60 * message has started, then we cannot just take it out of the list, we need
61 61 * to send Forward TSN chunk to the peer so that the peer can clear its
62 62 * fragment list for this message. However, we cannot just send the Forward
63 63 * TSN in sctp_get_msg_to_send() because there might be unacked chunks for
64 64 * messages preceeding this abandoned message. So, we send a Forward TSN
65 65 * IFF all messages prior to this abandoned message has been SACKd, if not
66 66 * we defer sending the Forward TSN to sctp_cumack(), which will check for
67 67 * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In
68 68 * sctp_rexmit() when we check for retransmissions, we need to determine if
69 69 * the advanced peer ack point can be moved ahead, and if so, send a Forward
70 70 * TSN to the peer instead of retransmitting the chunk. Note that when
71 71 * we send a Forward TSN for a message, there may be yet unsent chunks for
72 72 * this message; we need to mark all such chunks as abandoned, so that
73 73 * sctp_cumack() can take the message out of the transmit list, additionally
74 74 * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e.
75 75 * decremented when a message/chunk is deemed abandoned), sockfs needs to
76 76 * be notified so that it can adjust its idea of the queued message.
77 77 */
78 78
79 79 #include "sctp_impl.h"
80 80
81 81 static struct kmem_cache *sctp_kmem_ftsn_set_cache;
82 82 static mblk_t *sctp_chunkify(sctp_t *, int, int, int);
83 83
84 84 #ifdef DEBUG
85 85 static boolean_t sctp_verify_chain(mblk_t *, mblk_t *);
86 86 #endif
87 87
88 88 /*
89 89 * Called to allocate a header mblk when sending data to SCTP.
90 90 * Data will follow in b_cont of this mblk.
91 91 */
92 92 mblk_t *
93 93 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen,
94 94 int flags)
95 95 {
96 96 mblk_t *mp;
97 97 struct T_unitdata_req *tudr;
98 98 size_t size;
99 99 int error;
100 100
101 101 size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen;
102 102 size = MAX(size, sizeof (sctp_msg_hdr_t));
103 103 if (flags & SCTP_CAN_BLOCK) {
104 104 mp = allocb_wait(size, BPRI_MED, 0, &error);
105 105 } else {
106 106 mp = allocb(size, BPRI_MED);
107 107 }
108 108 if (mp) {
109 109 tudr = (struct T_unitdata_req *)mp->b_rptr;
110 110 tudr->PRIM_type = T_UNITDATA_REQ;
111 111 tudr->DEST_length = nlen;
112 112 tudr->DEST_offset = sizeof (*tudr);
113 113 tudr->OPT_length = clen;
114 114 tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) +
115 115 _TPI_ALIGN_TOPT(nlen));
116 116 if (nlen > 0)
117 117 bcopy(name, tudr + 1, nlen);
118 118 if (clen > 0)
119 119 bcopy(control, (char *)tudr + tudr->OPT_offset, clen);
120 120 mp->b_wptr += (tudr ->OPT_offset + clen);
121 121 mp->b_datap->db_type = M_PROTO;
122 122 }
123 123 return (mp);
124 124 }
125 125
126 126 /*ARGSUSED2*/
127 127 int
128 128 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags)
129 129 {
130 130 sctp_faddr_t *fp = NULL;
131 131 struct T_unitdata_req *tudr;
132 132 int error = 0;
133 133 mblk_t *mproto = mp;
134 134 in6_addr_t *addr;
135 135 in6_addr_t tmpaddr;
136 136 uint16_t sid = sctp->sctp_def_stream;
137 137 uint32_t ppid = sctp->sctp_def_ppid;
138 138 uint32_t context = sctp->sctp_def_context;
139 139 uint16_t msg_flags = sctp->sctp_def_flags;
140 140 sctp_msg_hdr_t *sctp_msg_hdr;
141 141 uint32_t msg_len = 0;
142 142 uint32_t timetolive = sctp->sctp_def_timetolive;
143 143 conn_t *connp = sctp->sctp_connp;
144 144
145 145 ASSERT(DB_TYPE(mproto) == M_PROTO);
146 146
147 147 mp = mp->b_cont;
148 148 ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA);
149 149
150 150 tudr = (struct T_unitdata_req *)mproto->b_rptr;
151 151 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
152 152
153 153 /* Get destination address, if specified */
154 154 if (tudr->DEST_length > 0) {
155 155 sin_t *sin;
156 156 sin6_t *sin6;
157 157
158 158 sin = (struct sockaddr_in *)
159 159 (mproto->b_rptr + tudr->DEST_offset);
160 160 switch (sin->sin_family) {
161 161 case AF_INET:
162 162 if (tudr->DEST_length < sizeof (*sin)) {
163 163 return (EINVAL);
164 164 }
165 165 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr);
166 166 addr = &tmpaddr;
167 167 break;
168 168 case AF_INET6:
169 169 if (tudr->DEST_length < sizeof (*sin6)) {
170 170 return (EINVAL);
171 171 }
172 172 sin6 = (struct sockaddr_in6 *)
173 173 (mproto->b_rptr + tudr->DEST_offset);
174 174 addr = &sin6->sin6_addr;
175 175 break;
176 176 default:
177 177 return (EAFNOSUPPORT);
178 178 }
179 179 fp = sctp_lookup_faddr(sctp, addr);
180 180 if (fp == NULL) {
181 181 return (EINVAL);
182 182 }
183 183 }
184 184 /* Ancillary Data? */
185 185 if (tudr->OPT_length > 0) {
186 186 struct cmsghdr *cmsg;
187 187 char *cend;
188 188 struct sctp_sndrcvinfo *sndrcv;
189 189
190 190 cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset);
191 191 cend = ((char *)cmsg + tudr->OPT_length);
192 192 ASSERT(cend <= (char *)mproto->b_wptr);
193 193
194 194 for (;;) {
195 195 if ((char *)(cmsg + 1) > cend ||
196 196 ((char *)cmsg + cmsg->cmsg_len) > cend) {
197 197 break;
198 198 }
199 199 if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
200 200 (cmsg->cmsg_type == SCTP_SNDRCV)) {
201 201 if (cmsg->cmsg_len <
202 202 (sizeof (*sndrcv) + sizeof (*cmsg))) {
203 203 return (EINVAL);
204 204 }
205 205 sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1);
206 206 sid = sndrcv->sinfo_stream;
207 207 msg_flags = sndrcv->sinfo_flags;
208 208 ppid = sndrcv->sinfo_ppid;
209 209 context = sndrcv->sinfo_context;
210 210 timetolive = sndrcv->sinfo_timetolive;
211 211 break;
212 212 }
213 213 if (cmsg->cmsg_len > 0)
214 214 cmsg = CMSG_NEXT(cmsg);
215 215 else
216 216 break;
217 217 }
218 218 }
219 219 if (msg_flags & MSG_ABORT) {
220 220 if (mp && mp->b_cont) {
221 221 mblk_t *pump = msgpullup(mp, -1);
222 222 if (!pump) {
223 223 return (ENOMEM);
224 224 }
225 225 freemsg(mp);
226 226 mp = pump;
227 227 mproto->b_cont = mp;
228 228 }
229 229 RUN_SCTP(sctp);
230 230 sctp_user_abort(sctp, mp);
231 231 freemsg(mproto);
232 232 goto done2;
233 233 }
234 234 if (mp == NULL)
235 235 goto done;
236 236
237 237 RUN_SCTP(sctp);
238 238
239 239 /* Reject any new data requests if we are shutting down */
240 240 if (sctp->sctp_state > SCTPS_ESTABLISHED ||
241 241 (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
242 242 error = EPIPE;
243 243 goto unlock_done;
244 244 }
245 245
246 246 /* Re-use the mproto to store relevant info. */
247 247 ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr));
248 248
249 249 mproto->b_rptr = mproto->b_datap->db_base;
250 250 mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr);
251 251
252 252 sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr;
253 253 bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr));
254 254 sctp_msg_hdr->smh_context = context;
255 255 sctp_msg_hdr->smh_sid = sid;
256 256 sctp_msg_hdr->smh_ppid = ppid;
257 257 sctp_msg_hdr->smh_flags = msg_flags;
258 258 sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive);
259 259 sctp_msg_hdr->smh_tob = ddi_get_lbolt64();
260 260 for (; mp != NULL; mp = mp->b_cont)
261 261 msg_len += MBLKL(mp);
262 262 sctp_msg_hdr->smh_msglen = msg_len;
263 263
264 264 /* User requested specific destination */
265 265 SCTP_SET_CHUNK_DEST(mproto, fp);
266 266
267 267 if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED &&
268 268 sid >= sctp->sctp_num_ostr) {
269 269 /* Send sendfail event */
270 270 sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID,
271 271 B_FALSE);
272 272 error = EINVAL;
273 273 goto unlock_done;
274 274 }
275 275
276 276 /* no data */
277 277 if (msg_len == 0) {
278 278 sctp_sendfail_event(sctp, dupmsg(mproto),
279 279 SCTP_ERR_NO_USR_DATA, B_FALSE);
280 280 error = EINVAL;
281 281 goto unlock_done;
282 282 }
283 283
284 284 /* Add it to the unsent list */
285 285 if (sctp->sctp_xmit_unsent == NULL) {
286 286 sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto;
287 287 } else {
288 288 sctp->sctp_xmit_unsent_tail->b_next = mproto;
289 289 sctp->sctp_xmit_unsent_tail = mproto;
290 290 }
291 291 sctp->sctp_unsent += msg_len;
292 292 BUMP_LOCAL(sctp->sctp_msgcount);
293 293 /*
294 294 * Notify sockfs if the tx queue is full.
295 295 */
296 296 if (SCTP_TXQ_LEN(sctp) >= connp->conn_sndbuf) {
297 297 sctp->sctp_txq_full = 1;
298 298 sctp->sctp_ulp_txq_full(sctp->sctp_ulpd, B_TRUE);
299 299 }
300 300 if (sctp->sctp_state == SCTPS_ESTABLISHED)
301 301 sctp_output(sctp, UINT_MAX);
302 302 done2:
303 303 WAKE_SCTP(sctp);
304 304 return (0);
305 305 unlock_done:
306 306 WAKE_SCTP(sctp);
307 307 done:
308 308 return (error);
309 309 }
310 310
311 311 /*
312 312 * While there are messages on sctp_xmit_unsent, detach each one. For each:
313 313 * allocate space for the chunk header, fill in the data chunk, and fill in
314 314 * the chunk header. Then append it to sctp_xmit_tail.
315 315 * Return after appending as many bytes as required (bytes_to_send).
316 316 * We also return if we've appended one or more chunks, and find a subsequent
317 317 * unsent message is too big to fit in the segment.
318 318 */
319 319 mblk_t *
320 320 sctp_chunkify(sctp_t *sctp, int mss, int firstseg_len, int bytes_to_send)
321 321 {
322 322 mblk_t *mp;
323 323 mblk_t *chunk_mp;
324 324 mblk_t *chunk_head;
325 325 mblk_t *chunk_hdr;
326 326 mblk_t *chunk_tail = NULL;
327 327 int count;
328 328 int chunksize;
329 329 sctp_data_hdr_t *sdc;
330 330 mblk_t *mdblk = sctp->sctp_xmit_unsent;
331 331 sctp_faddr_t *fp;
332 332 sctp_faddr_t *fp1;
333 333 size_t xtralen;
334 334 sctp_msg_hdr_t *msg_hdr;
335 335 sctp_stack_t *sctps = sctp->sctp_sctps;
336 336 sctp_msg_hdr_t *next_msg_hdr;
337 337 size_t nextlen;
338 338 int remaining_len = mss - firstseg_len;
339 339
340 340 ASSERT(remaining_len >= 0);
341 341
342 342 fp = SCTP_CHUNK_DEST(mdblk);
343 343 if (fp == NULL)
344 344 fp = sctp->sctp_current;
345 345 if (fp->sf_isv4)
346 346 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra +
347 347 sizeof (*sdc);
348 348 else
349 349 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra +
350 350 sizeof (*sdc);
351 351 count = chunksize = remaining_len - sizeof (*sdc);
352 352 nextmsg:
353 353 next_msg_hdr = (sctp_msg_hdr_t *)sctp->sctp_xmit_unsent->b_rptr;
354 354 nextlen = next_msg_hdr->smh_msglen;
355 355 /*
356 356 * Will the entire next message fit in the current packet ?
357 357 * if not, leave it on the unsent list.
358 358 */
359 359 if ((firstseg_len != 0) && (nextlen > remaining_len))
360 360 return (NULL);
361 361
362 362 chunk_mp = mdblk->b_cont;
363 363
364 364 /*
365 365 * If this partially chunked, we ignore the next one for now and
366 366 * use the one already present. For the unchunked bits, we use the
367 367 * length of the last chunk.
368 368 */
369 369 if (SCTP_IS_MSG_CHUNKED(mdblk)) {
370 370 int chunk_len;
371 371
372 372 ASSERT(chunk_mp->b_next != NULL);
373 373 mdblk->b_cont = chunk_mp->b_next;
374 374 chunk_mp->b_next = NULL;
375 375 SCTP_MSG_CLEAR_CHUNKED(mdblk);
376 376 mp = mdblk->b_cont;
377 377 while (mp->b_next != NULL)
378 378 mp = mp->b_next;
379 379 chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len);
380 380 if (fp->sf_pmss - chunk_len > sizeof (*sdc))
381 381 count = chunksize = fp->sf_pmss - chunk_len;
382 382 else
383 383 count = chunksize = fp->sf_pmss;
384 384 count = chunksize = count - sizeof (*sdc);
385 385 } else {
386 386 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
387 387 if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) {
388 388 sctp->sctp_xmit_unsent = mdblk->b_next;
389 389 if (sctp->sctp_xmit_unsent == NULL)
390 390 sctp->sctp_xmit_unsent_tail = NULL;
391 391 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
392 392 sctp->sctp_unsent -= msg_hdr->smh_msglen;
393 393 mdblk->b_next = NULL;
394 394 BUMP_LOCAL(sctp->sctp_prsctpdrop);
395 395 /*
396 396 * Update ULP the amount of queued data, which is
397 397 * sent-unack'ed + unsent.
398 398 */
399 399 if (!SCTP_IS_DETACHED(sctp))
400 400 SCTP_TXQ_UPDATE(sctp);
401 401 sctp_sendfail_event(sctp, mdblk, 0, B_FALSE);
402 402 goto try_next;
403 403 }
404 404 mdblk->b_cont = NULL;
405 405 }
406 406 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
407 407 nextchunk:
408 408 chunk_head = chunk_mp;
409 409 chunk_tail = NULL;
410 410
411 411 /* Skip as many mblk's as we need */
412 412 while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) {
413 413 count -= MBLKL(chunk_mp);
414 414 chunk_tail = chunk_mp;
415 415 chunk_mp = chunk_mp->b_cont;
416 416 }
417 417 /* Split the chain, if needed */
418 418 if (chunk_mp != NULL) {
419 419 if (count > 0) {
420 420 mblk_t *split_mp = dupb(chunk_mp);
421 421
422 422 if (split_mp == NULL) {
423 423 if (mdblk->b_cont == NULL) {
424 424 mdblk->b_cont = chunk_head;
425 425 } else {
426 426 SCTP_MSG_SET_CHUNKED(mdblk);
427 427 ASSERT(chunk_head->b_next == NULL);
428 428 chunk_head->b_next = mdblk->b_cont;
429 429 mdblk->b_cont = chunk_head;
430 430 }
431 431 return (sctp->sctp_xmit_tail);
432 432 }
433 433 if (chunk_tail != NULL) {
434 434 chunk_tail->b_cont = split_mp;
435 435 chunk_tail = chunk_tail->b_cont;
436 436 } else {
437 437 chunk_head = chunk_tail = split_mp;
438 438 }
439 439 chunk_tail->b_wptr = chunk_tail->b_rptr + count;
440 440 chunk_mp->b_rptr = chunk_tail->b_wptr;
441 441 count = 0;
442 442 } else if (chunk_tail == NULL) {
443 443 goto next;
444 444 } else {
445 445 chunk_tail->b_cont = NULL;
446 446 }
447 447 }
448 448 /* Alloc chunk hdr, if needed */
449 449 if (DB_REF(chunk_head) > 1 ||
450 450 ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) ||
451 451 MBLKHEAD(chunk_head) < sizeof (*sdc)) {
452 452 if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) {
453 453 if (mdblk->b_cont == NULL) {
454 454 if (chunk_mp != NULL)
455 455 linkb(chunk_head, chunk_mp);
456 456 mdblk->b_cont = chunk_head;
457 457 } else {
458 458 SCTP_MSG_SET_CHUNKED(mdblk);
459 459 if (chunk_mp != NULL)
460 460 linkb(chunk_head, chunk_mp);
461 461 ASSERT(chunk_head->b_next == NULL);
462 462 chunk_head->b_next = mdblk->b_cont;
463 463 mdblk->b_cont = chunk_head;
464 464 }
465 465 return (sctp->sctp_xmit_tail);
466 466 }
467 467 chunk_hdr->b_rptr += xtralen - sizeof (*sdc);
468 468 chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc);
469 469 chunk_hdr->b_cont = chunk_head;
470 470 } else {
471 471 chunk_hdr = chunk_head;
472 472 chunk_hdr->b_rptr -= sizeof (*sdc);
473 473 }
474 474 ASSERT(chunk_hdr->b_datap->db_ref == 1);
475 475 sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr;
476 476 sdc->sdh_id = CHUNK_DATA;
477 477 sdc->sdh_flags = 0;
478 478 sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count);
479 479 ASSERT(sdc->sdh_len);
480 480 sdc->sdh_sid = htons(msg_hdr->smh_sid);
481 481 /*
482 482 * We defer assigning the SSN just before sending the chunk, else
483 483 * if we drop the chunk in sctp_get_msg_to_send(), we would need
484 484 * to send a Forward TSN to let the peer know. Some more comments
485 485 * about this in sctp_impl.h for SCTP_CHUNK_SENT.
486 486 */
487 487 sdc->sdh_payload_id = msg_hdr->smh_ppid;
488 488
489 489 if (mdblk->b_cont == NULL) {
490 490 mdblk->b_cont = chunk_hdr;
491 491 SCTP_DATA_SET_BBIT(sdc);
492 492 } else {
493 493 mp = mdblk->b_cont;
494 494 while (mp->b_next != NULL)
495 495 mp = mp->b_next;
496 496 mp->b_next = chunk_hdr;
497 497 }
498 498
499 499 bytes_to_send -= (chunksize - count);
500 500 if (chunk_mp != NULL) {
501 501 next:
502 502 count = chunksize = fp->sf_pmss - sizeof (*sdc);
503 503 goto nextchunk;
504 504 }
505 505 SCTP_DATA_SET_EBIT(sdc);
506 506 sctp->sctp_xmit_unsent = mdblk->b_next;
507 507 if (mdblk->b_next == NULL) {
508 508 sctp->sctp_xmit_unsent_tail = NULL;
509 509 }
510 510 mdblk->b_next = NULL;
511 511
512 512 if (sctp->sctp_xmit_tail == NULL) {
513 513 sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk;
514 514 } else {
515 515 mp = sctp->sctp_xmit_tail;
516 516 while (mp->b_next != NULL)
517 517 mp = mp->b_next;
518 518 mp->b_next = mdblk;
519 519 mdblk->b_prev = mp;
520 520 }
521 521 try_next:
522 522 if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) {
523 523 mdblk = sctp->sctp_xmit_unsent;
524 524 fp1 = SCTP_CHUNK_DEST(mdblk);
525 525 if (fp1 == NULL)
526 526 fp1 = sctp->sctp_current;
527 527 if (fp == fp1) {
528 528 size_t len = MBLKL(mdblk->b_cont);
529 529 if ((count > 0) &&
530 530 ((len > fp->sf_pmss - sizeof (*sdc)) ||
531 531 (len <= count))) {
532 532 count -= sizeof (*sdc);
533 533 count = chunksize = count - (count & 0x3);
534 534 } else {
535 535 count = chunksize = fp->sf_pmss -
536 536 sizeof (*sdc);
537 537 }
538 538 } else {
539 539 if (fp1->sf_isv4)
540 540 xtralen = sctp->sctp_hdr_len;
541 541 else
542 542 xtralen = sctp->sctp_hdr6_len;
543 543 xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc);
544 544 count = chunksize = fp1->sf_pmss - sizeof (*sdc);
545 545 fp = fp1;
546 546 }
547 547 goto nextmsg;
548 548 }
549 549 return (sctp->sctp_xmit_tail);
550 550 }
551 551
552 552 void
553 553 sctp_free_msg(mblk_t *ump)
554 554 {
555 555 mblk_t *mp, *nmp;
556 556
557 557 for (mp = ump->b_cont; mp; mp = nmp) {
558 558 nmp = mp->b_next;
559 559 mp->b_next = mp->b_prev = NULL;
560 560 freemsg(mp);
561 561 }
562 562 ASSERT(!ump->b_prev);
563 563 ump->b_next = NULL;
564 564 freeb(ump);
565 565 }
566 566
567 567 mblk_t *
568 568 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen,
569 569 int *error)
570 570 {
571 571 int hdrlen;
572 572 uchar_t *hdr;
573 573 int isv4 = fp->sf_isv4;
574 574 sctp_stack_t *sctps = sctp->sctp_sctps;
575 575
576 576 if (error != NULL)
577 577 *error = 0;
578 578
579 579 if (isv4) {
580 580 hdrlen = sctp->sctp_hdr_len;
581 581 hdr = sctp->sctp_iphc;
582 582 } else {
583 583 hdrlen = sctp->sctp_hdr6_len;
584 584 hdr = sctp->sctp_iphc6;
585 585 }
586 586 /*
587 587 * A reject|blackhole could mean that the address is 'down'. Similarly,
588 588 * it is possible that the address went down, we tried to send an
589 589 * heartbeat and ended up setting fp->sf_saddr as unspec because we
590 590 * didn't have any usable source address. In either case
591 591 * sctp_get_dest() will try find an IRE, if available, and set
592 592 * the source address, if needed. If we still don't have any
593 593 * usable source address, fp->sf_state will be SCTP_FADDRS_UNREACH and
594 594 * we return EHOSTUNREACH.
595 595 */
596 596 ASSERT(fp->sf_ixa->ixa_ire != NULL);
597 597 if ((fp->sf_ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
598 598 SCTP_IS_ADDR_UNSPEC(fp->sf_isv4, fp->sf_saddr)) {
599 599 sctp_get_dest(sctp, fp);
600 600 if (fp->sf_state == SCTP_FADDRS_UNREACH) {
601 601 if (error != NULL)
602 602 *error = EHOSTUNREACH;
603 603 return (NULL);
604 604 }
605 605 }
606 606 /* Copy in IP header. */
607 607 if ((mp->b_rptr - mp->b_datap->db_base) <
608 608 (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2) {
609 609 mblk_t *nmp;
610 610
611 611 /*
612 612 * This can happen if IP headers are adjusted after
613 613 * data was moved into chunks, or during retransmission,
614 614 * or things like snoop is running.
615 615 */
616 616 nmp = allocb(sctps->sctps_wroff_xtra + hdrlen + sacklen,
617 617 BPRI_MED);
618 618 if (nmp == NULL) {
619 619 if (error != NULL)
620 620 *error = ENOMEM;
621 621 return (NULL);
622 622 }
623 623 nmp->b_rptr += sctps->sctps_wroff_xtra;
624 624 nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen;
625 625 nmp->b_cont = mp;
626 626 mp = nmp;
627 627 } else {
628 628 mp->b_rptr -= (hdrlen + sacklen);
629 629 }
630 630 bcopy(hdr, mp->b_rptr, hdrlen);
631 631 if (sacklen) {
632 632 sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen);
633 633 }
634 634 if (fp != sctp->sctp_current) {
635 635 /* change addresses in header */
636 636 if (isv4) {
637 637 ipha_t *iph = (ipha_t *)mp->b_rptr;
638 638
639 639 IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, iph->ipha_dst);
640 640 if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->sf_saddr)) {
641 641 IN6_V4MAPPED_TO_IPADDR(&fp->sf_saddr,
642 642 iph->ipha_src);
643 643 } else if (sctp->sctp_bound_to_all) {
644 644 iph->ipha_src = INADDR_ANY;
645 645 }
646 646 } else {
647 647 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
648 648
649 649 ip6h->ip6_dst = fp->sf_faddr;
650 650 if (!IN6_IS_ADDR_UNSPECIFIED(&fp->sf_saddr)) {
651 651 ip6h->ip6_src = fp->sf_saddr;
652 652 } else if (sctp->sctp_bound_to_all) {
653 653 ip6h->ip6_src = ipv6_all_zeros;
654 654 }
655 655 }
656 656 }
657 657 return (mp);
658 658 }
659 659
660 660 /*
661 661 * SCTP requires every chunk to be padded so that the total length
662 662 * is a multiple of SCTP_ALIGN. This function returns a mblk with
663 663 * the specified pad length.
664 664 */
665 665 static mblk_t *
666 666 sctp_get_padding(sctp_t *sctp, int pad)
667 667 {
668 668 mblk_t *fill;
669 669
670 670 ASSERT(pad < SCTP_ALIGN);
671 671 ASSERT(sctp->sctp_pad_mp != NULL);
672 672 if ((fill = dupb(sctp->sctp_pad_mp)) != NULL) {
673 673 fill->b_wptr += pad;
674 674 return (fill);
675 675 }
676 676
677 677 /*
678 678 * The memory saving path of reusing the sctp_pad_mp
679 679 * fails may be because it has been dupb() too
680 680 * many times (DBLK_REFMAX). Use the memory consuming
681 681 * path of allocating the pad mblk.
682 682 */
683 683 if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) {
684 684 /* Zero it out. SCTP_ALIGN is sizeof (int32_t) */
685 685 *(int32_t *)fill->b_rptr = 0;
686 686 fill->b_wptr += pad;
687 687 }
688 688 return (fill);
689 689 }
690 690
691 691 static mblk_t *
692 692 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp)
693 693 {
694 694 mblk_t *meta;
695 695 mblk_t *start_mp = NULL;
696 696 mblk_t *end_mp = NULL;
697 697 mblk_t *mp, *nmp;
698 698 mblk_t *fill;
699 699 sctp_data_hdr_t *sdh;
700 700 int msglen;
701 701 int extra;
702 702 sctp_msg_hdr_t *msg_hdr;
703 703 sctp_faddr_t *old_fp = NULL;
704 704 sctp_faddr_t *chunk_fp;
705 705 sctp_stack_t *sctps = sctp->sctp_sctps;
706 706
707 707 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
708 708 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
709 709 if (SCTP_IS_MSG_ABANDONED(meta) ||
710 710 SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
711 711 continue;
712 712 }
713 713 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
714 714 if (SCTP_CHUNK_WANT_REXMIT(mp)) {
715 715 /*
716 716 * Use the same peer address to do fast
717 717 * retransmission. If the original peer
718 718 * address is dead, switch to the current
719 719 * one. Record the old one so that we
720 720 * will pick the chunks sent to the old
721 721 * one for fast retransmission.
722 722 */
723 723 chunk_fp = SCTP_CHUNK_DEST(mp);
724 724 if (*fp == NULL) {
725 725 *fp = chunk_fp;
726 726 if ((*fp)->sf_state !=
727 727 SCTP_FADDRS_ALIVE) {
728 728 old_fp = *fp;
729 729 *fp = sctp->sctp_current;
730 730 }
731 731 } else if (old_fp == NULL && *fp != chunk_fp) {
732 732 continue;
733 733 } else if (old_fp != NULL &&
734 734 old_fp != chunk_fp) {
735 735 continue;
736 736 }
737 737
738 738 sdh = (sctp_data_hdr_t *)mp->b_rptr;
739 739 msglen = ntohs(sdh->sdh_len);
740 740 if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) {
741 741 extra = SCTP_ALIGN - extra;
742 742 }
743 743
744 744 /*
745 745 * We still return at least the first message
746 746 * even if that message cannot fit in as
747 747 * PMTU may have changed.
748 748 */
749 749 if (*total + msglen + extra >
750 750 (*fp)->sf_pmss && start_mp != NULL) {
751 751 return (start_mp);
752 752 }
753 753 if ((nmp = dupmsg(mp)) == NULL)
754 754 return (start_mp);
755 755 if (extra > 0) {
756 756 fill = sctp_get_padding(sctp, extra);
757 757 if (fill != NULL) {
758 758 linkb(nmp, fill);
759 759 } else {
760 760 return (start_mp);
761 761 }
762 762 }
763 763 SCTPS_BUMP_MIB(sctps, sctpOutFastRetrans);
764 764 BUMP_LOCAL(sctp->sctp_rxtchunks);
765 765 SCTP_CHUNK_CLEAR_REXMIT(mp);
766 766 if (start_mp == NULL) {
767 767 start_mp = nmp;
768 768 } else {
769 769 linkb(end_mp, nmp);
770 770 }
771 771 end_mp = nmp;
772 772 *total += msglen + extra;
773 773 dprint(2, ("sctp_find_fast_rexmit_mblks: "
774 774 "tsn %x\n", sdh->sdh_tsn));
775 775 }
776 776 }
777 777 }
778 778 /* Clear the flag as there is no more message to be fast rexmitted. */
779 779 sctp->sctp_chk_fast_rexmit = B_FALSE;
780 780 return (start_mp);
781 781 }
782 782
783 783 /* A debug function just to make sure that a mblk chain is not broken */
784 784 #ifdef DEBUG
785 785 static boolean_t
786 786 sctp_verify_chain(mblk_t *head, mblk_t *tail)
787 787 {
788 788 mblk_t *mp = head;
789 789
790 790 if (head == NULL || tail == NULL)
791 791 return (B_TRUE);
792 792 while (mp != NULL) {
793 793 if (mp == tail)
794 794 return (B_TRUE);
795 795 mp = mp->b_next;
796 796 }
797 797 return (B_FALSE);
798 798 }
799 799 #endif
800 800
801 801 /*
802 802 * Gets the next unsent chunk to transmit. Messages that are abandoned are
803 803 * skipped. A message can be abandoned if it has a non-zero timetolive and
804 804 * transmission has not yet started or if it is a partially reliable
805 805 * message and its time is up (assuming we are PR-SCTP aware).
806 806 * We only return a chunk if it will fit entirely in the current packet.
807 807 * 'cansend' is used to determine if need to try and chunkify messages from
808 808 * the unsent list, if any, and also as an input to sctp_chunkify() if so.
809 809 *
810 810 * firstseg_len indicates the space already used, cansend represents remaining
811 811 * space in the window, ((sf_pmss - firstseg_len) can therefore reasonably
812 812 * be used to compute the cansend arg).
813 813 */
814 814 mblk_t *
815 815 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int *error,
816 816 int32_t firstseg_len, uint32_t cansend, sctp_faddr_t *fp)
817 817 {
818 818 mblk_t *mp1;
819 819 sctp_msg_hdr_t *msg_hdr;
820 820 mblk_t *tmp_meta;
821 821 sctp_faddr_t *fp1;
822 822
823 823 ASSERT(error != NULL && mp != NULL);
824 824 *error = 0;
825 825
826 826 ASSERT(sctp->sctp_current != NULL);
827 827
828 828 chunkified:
829 829 while (meta != NULL) {
830 830 tmp_meta = meta->b_next;
831 831 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
832 832 mp1 = meta->b_cont;
833 833 if (SCTP_IS_MSG_ABANDONED(meta))
834 834 goto next_msg;
835 835 if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
836 836 while (mp1 != NULL) {
837 837 if (SCTP_CHUNK_CANSEND(mp1)) {
838 838 *mp = mp1;
839 839 #ifdef DEBUG
840 840 ASSERT(sctp_verify_chain(
841 841 sctp->sctp_xmit_head, meta));
842 842 #endif
843 843 return (meta);
844 844 }
845 845 mp1 = mp1->b_next;
846 846 }
847 847 goto next_msg;
848 848 }
849 849 /*
850 850 * If we come here and the first chunk is sent, then we
851 851 * we are PR-SCTP aware, in which case if the cumulative
852 852 * TSN has moved upto or beyond the first chunk (which
853 853 * means all the previous messages have been cumulative
854 854 * SACK'd), then we send a Forward TSN with the last
855 855 * chunk that was sent in this message. If we can't send
856 856 * a Forward TSN because previous non-abandoned messages
857 857 * have not been acked then we will defer the Forward TSN
858 858 * to sctp_rexmit() or sctp_cumack().
859 859 */
860 860 if (SCTP_CHUNK_ISSENT(mp1)) {
861 861 *error = sctp_check_abandoned_msg(sctp, meta);
862 862 if (*error != 0) {
863 863 #ifdef DEBUG
864 864 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head,
865 865 sctp->sctp_xmit_tail));
866 866 #endif
867 867 return (NULL);
868 868 }
869 869 goto next_msg;
870 870 }
871 871 BUMP_LOCAL(sctp->sctp_prsctpdrop);
872 872 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
873 873 if (meta->b_prev == NULL) {
874 874 ASSERT(sctp->sctp_xmit_head == meta);
875 875 sctp->sctp_xmit_head = tmp_meta;
876 876 if (sctp->sctp_xmit_tail == meta)
877 877 sctp->sctp_xmit_tail = tmp_meta;
878 878 meta->b_next = NULL;
879 879 if (tmp_meta != NULL)
880 880 tmp_meta->b_prev = NULL;
881 881 } else if (meta->b_next == NULL) {
882 882 if (sctp->sctp_xmit_tail == meta)
883 883 sctp->sctp_xmit_tail = meta->b_prev;
884 884 meta->b_prev->b_next = NULL;
885 885 meta->b_prev = NULL;
886 886 } else {
887 887 meta->b_prev->b_next = tmp_meta;
888 888 tmp_meta->b_prev = meta->b_prev;
889 889 if (sctp->sctp_xmit_tail == meta)
890 890 sctp->sctp_xmit_tail = tmp_meta;
891 891 meta->b_prev = NULL;
892 892 meta->b_next = NULL;
893 893 }
894 894 sctp->sctp_unsent -= msg_hdr->smh_msglen;
895 895 /*
896 896 * Update ULP the amount of queued data, which is
897 897 * sent-unack'ed + unsent.
898 898 */
899 899 if (!SCTP_IS_DETACHED(sctp))
900 900 SCTP_TXQ_UPDATE(sctp);
901 901 sctp_sendfail_event(sctp, meta, 0, B_TRUE);
902 902 next_msg:
903 903 meta = tmp_meta;
904 904 }
905 905 /* chunkify, if needed */
906 906 if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) {
907 907 ASSERT(sctp->sctp_unsent > 0);
908 908 if (fp == NULL) {
909 909 fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
910 910 if (fp == NULL || fp->sf_state != SCTP_FADDRS_ALIVE)
911 911 fp = sctp->sctp_current;
912 912 } else {
913 913 /*
914 914 * If user specified destination, try to honor that.
915 915 */
916 916 fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
917 917 if (fp1 != NULL && fp1->sf_state == SCTP_FADDRS_ALIVE &&
918 918 fp1 != fp) {
919 919 goto chunk_done;
920 920 }
921 921 }
922 922 meta = sctp_chunkify(sctp, fp->sf_pmss, firstseg_len, cansend);
923 923 if (meta == NULL)
924 924 goto chunk_done;
925 925 /*
926 926 * sctp_chunkify() won't advance sctp_xmit_tail if it adds
927 927 * new chunk(s) to the tail, so we need to skip the
928 928 * sctp_xmit_tail, which would have already been processed.
929 929 * This could happen when there is unacked chunks, but
930 930 * nothing new to send.
931 931 * When sctp_chunkify() is called when the transmit queue
932 932 * is empty then we need to start from sctp_xmit_tail.
933 933 */
934 934 if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) {
935 935 #ifdef DEBUG
936 936 mp1 = sctp->sctp_xmit_tail->b_cont;
937 937 while (mp1 != NULL) {
938 938 ASSERT(!SCTP_CHUNK_CANSEND(mp1));
939 939 mp1 = mp1->b_next;
940 940 }
941 941 #endif
942 942 if ((meta = sctp->sctp_xmit_tail->b_next) == NULL)
943 943 goto chunk_done;
944 944 }
945 945 goto chunkified;
946 946 }
947 947 chunk_done:
948 948 #ifdef DEBUG
949 949 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail));
950 950 #endif
951 951 return (NULL);
952 952 }
953 953
954 954 void
955 955 sctp_fast_rexmit(sctp_t *sctp)
956 956 {
957 957 mblk_t *mp, *head;
958 958 int pktlen = 0;
959 959 sctp_faddr_t *fp = NULL;
960 960 sctp_stack_t *sctps = sctp->sctp_sctps;
961 961
962 962 ASSERT(sctp->sctp_xmit_head != NULL);
963 963 mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp);
964 964 if (mp == NULL) {
965 965 SCTP_KSTAT(sctps, sctp_fr_not_found);
966 966 return;
967 967 }
968 968 if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) {
969 969 freemsg(mp);
↓ open down ↓ |
969 lines elided |
↑ open up ↑ |
970 970 SCTP_KSTAT(sctps, sctp_fr_add_hdr);
971 971 return;
972 972 }
973 973 if ((pktlen > fp->sf_pmss) && fp->sf_isv4) {
974 974 ipha_t *iph = (ipha_t *)head->b_rptr;
975 975
976 976 iph->ipha_fragment_offset_and_flags = 0;
977 977 }
978 978
979 979 sctp_set_iplen(sctp, head, fp->sf_ixa);
980 +
981 + DTRACE_SCTP5(send, mblk_t *, NULL, ip_xmit_attr_t *, fp->sf_ixa,
982 + void_ip_t *, mp->b_rptr, sctp_t *, sctp, sctp_hdr_t *,
983 + &mp->b_rptr[fp->sf_ixa->ixa_ip_hdr_length]);
984 +
980 985 (void) conn_ip_output(head, fp->sf_ixa);
981 986 BUMP_LOCAL(sctp->sctp_opkts);
982 987 sctp->sctp_active = fp->sf_lastactive = ddi_get_lbolt64();
983 988 }
984 989
985 990 void
986 991 sctp_output(sctp_t *sctp, uint_t num_pkt)
987 992 {
988 993 mblk_t *mp = NULL;
989 994 mblk_t *nmp;
990 995 mblk_t *head;
991 996 mblk_t *meta = sctp->sctp_xmit_tail;
992 997 mblk_t *fill = NULL;
993 998 uint16_t chunklen;
994 999 uint32_t cansend;
995 1000 int32_t seglen;
996 1001 int32_t xtralen;
997 1002 int32_t sacklen;
998 1003 int32_t pad = 0;
999 1004 int32_t pathmax;
1000 1005 int extra;
1001 1006 int64_t now = LBOLT_FASTPATH64;
1002 1007 sctp_faddr_t *fp;
1003 1008 sctp_faddr_t *lfp;
1004 1009 sctp_data_hdr_t *sdc;
1005 1010 int error;
1006 1011 boolean_t notsent = B_TRUE;
1007 1012 sctp_stack_t *sctps = sctp->sctp_sctps;
1008 1013 uint32_t tsn;
1009 1014
1010 1015 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1011 1016 sacklen = 0;
1012 1017 } else {
1013 1018 /* send a SACK chunk */
1014 1019 sacklen = sizeof (sctp_chunk_hdr_t) +
1015 1020 sizeof (sctp_sack_chunk_t) +
1016 1021 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1017 1022 lfp = sctp->sctp_lastdata;
1018 1023 ASSERT(lfp != NULL);
1019 1024 if (lfp->sf_state != SCTP_FADDRS_ALIVE)
1020 1025 lfp = sctp->sctp_current;
1021 1026 }
1022 1027
1023 1028 cansend = sctp->sctp_frwnd;
1024 1029 if (sctp->sctp_unsent < cansend)
1025 1030 cansend = sctp->sctp_unsent;
1026 1031
1027 1032 /*
1028 1033 * Start persist timer if unable to send or when
1029 1034 * trying to send into a zero window. This timer
1030 1035 * ensures the blocked send attempt is retried.
1031 1036 */
1032 1037 if ((cansend < sctp->sctp_current->sf_pmss / 2) &&
1033 1038 (sctp->sctp_unacked != 0) &&
1034 1039 (sctp->sctp_unacked < sctp->sctp_current->sf_pmss) &&
1035 1040 !sctp->sctp_ndelay ||
1036 1041 (cansend == 0 && sctp->sctp_unacked == 0 &&
1037 1042 sctp->sctp_unsent != 0)) {
1038 1043 head = NULL;
1039 1044 fp = sctp->sctp_current;
1040 1045 goto unsent_data;
1041 1046 }
1042 1047 if (meta != NULL)
1043 1048 mp = meta->b_cont;
1044 1049 while (cansend > 0 && num_pkt-- != 0) {
1045 1050 pad = 0;
1046 1051
1047 1052 /*
1048 1053 * Find first segment eligible for transmit.
1049 1054 */
1050 1055 while (mp != NULL) {
1051 1056 if (SCTP_CHUNK_CANSEND(mp))
1052 1057 break;
1053 1058 mp = mp->b_next;
1054 1059 }
1055 1060 if (mp == NULL) {
1056 1061 meta = sctp_get_msg_to_send(sctp, &mp,
1057 1062 meta == NULL ? NULL : meta->b_next, &error, sacklen,
1058 1063 cansend, NULL);
1059 1064 if (error != 0 || meta == NULL) {
1060 1065 head = NULL;
1061 1066 fp = sctp->sctp_current;
1062 1067 goto unsent_data;
1063 1068 }
1064 1069 sctp->sctp_xmit_tail = meta;
1065 1070 }
1066 1071
1067 1072 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1068 1073 seglen = ntohs(sdc->sdh_len);
1069 1074 xtralen = sizeof (*sdc);
1070 1075 chunklen = seglen - xtralen;
1071 1076
1072 1077 /*
1073 1078 * Check rwnd.
1074 1079 */
1075 1080 if (chunklen > cansend) {
1076 1081 head = NULL;
1077 1082 fp = SCTP_CHUNK_DEST(meta);
1078 1083 if (fp == NULL || fp->sf_state != SCTP_FADDRS_ALIVE)
1079 1084 fp = sctp->sctp_current;
1080 1085 goto unsent_data;
1081 1086 }
1082 1087 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1083 1088 extra = SCTP_ALIGN - extra;
1084 1089
1085 1090 /*
1086 1091 * Pick destination address, and check cwnd.
1087 1092 */
1088 1093 if (sacklen > 0 && (seglen + extra <= lfp->sf_cwnd -
1089 1094 lfp->sf_suna) &&
1090 1095 (seglen + sacklen + extra <= lfp->sf_pmss)) {
1091 1096 /*
1092 1097 * Only include SACK chunk if it can be bundled
1093 1098 * with a data chunk, and sent to sctp_lastdata.
1094 1099 */
1095 1100 pathmax = lfp->sf_cwnd - lfp->sf_suna;
1096 1101
1097 1102 fp = lfp;
1098 1103 if ((nmp = dupmsg(mp)) == NULL) {
1099 1104 head = NULL;
1100 1105 goto unsent_data;
1101 1106 }
1102 1107 SCTP_CHUNK_CLEAR_FLAGS(nmp);
1103 1108 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen,
1104 1109 &error);
1105 1110 if (head == NULL) {
1106 1111 /*
1107 1112 * If none of the source addresses are
1108 1113 * available (i.e error == EHOSTUNREACH),
1109 1114 * pretend we have sent the data. We will
1110 1115 * eventually time out trying to retramsmit
1111 1116 * the data if the interface never comes up.
1112 1117 * If we have already sent some stuff (i.e.,
1113 1118 * notsent is B_FALSE) then we are fine, else
1114 1119 * just mark this packet as sent.
1115 1120 */
1116 1121 if (notsent && error == EHOSTUNREACH) {
1117 1122 SCTP_CHUNK_SENT(sctp, mp, sdc,
1118 1123 fp, chunklen, meta);
1119 1124 }
1120 1125 freemsg(nmp);
1121 1126 SCTP_KSTAT(sctps, sctp_output_failed);
1122 1127 goto unsent_data;
1123 1128 }
1124 1129 seglen += sacklen;
1125 1130 xtralen += sacklen;
1126 1131 sacklen = 0;
1127 1132 } else {
1128 1133 fp = SCTP_CHUNK_DEST(meta);
1129 1134 if (fp == NULL || fp->sf_state != SCTP_FADDRS_ALIVE)
1130 1135 fp = sctp->sctp_current;
1131 1136 /*
1132 1137 * If we haven't sent data to this destination for
1133 1138 * a while, do slow start again.
1134 1139 */
1135 1140 if (now - fp->sf_lastactive > fp->sf_rto) {
1136 1141 SET_CWND(fp, fp->sf_pmss,
1137 1142 sctps->sctps_slow_start_after_idle);
1138 1143 }
1139 1144
1140 1145 pathmax = fp->sf_cwnd - fp->sf_suna;
1141 1146 if (seglen + extra > pathmax) {
1142 1147 head = NULL;
1143 1148 goto unsent_data;
1144 1149 }
1145 1150 if ((nmp = dupmsg(mp)) == NULL) {
1146 1151 head = NULL;
1147 1152 goto unsent_data;
1148 1153 }
1149 1154 SCTP_CHUNK_CLEAR_FLAGS(nmp);
1150 1155 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error);
1151 1156 if (head == NULL) {
1152 1157 /*
1153 1158 * If none of the source addresses are
1154 1159 * available (i.e error == EHOSTUNREACH),
1155 1160 * pretend we have sent the data. We will
1156 1161 * eventually time out trying to retramsmit
1157 1162 * the data if the interface never comes up.
1158 1163 * If we have already sent some stuff (i.e.,
1159 1164 * notsent is B_FALSE) then we are fine, else
1160 1165 * just mark this packet as sent.
1161 1166 */
1162 1167 if (notsent && error == EHOSTUNREACH) {
1163 1168 SCTP_CHUNK_SENT(sctp, mp, sdc,
1164 1169 fp, chunklen, meta);
1165 1170 }
1166 1171 freemsg(nmp);
1167 1172 SCTP_KSTAT(sctps, sctp_output_failed);
1168 1173 goto unsent_data;
1169 1174 }
1170 1175 }
1171 1176 fp->sf_lastactive = now;
1172 1177 if (pathmax > fp->sf_pmss)
1173 1178 pathmax = fp->sf_pmss;
1174 1179 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1175 1180 mp = mp->b_next;
1176 1181
1177 1182 /*
1178 1183 * Use this chunk to measure RTT?
1179 1184 * Must not be a retransmision of an earlier chunk,
1180 1185 * ensure the tsn is current.
1181 1186 */
1182 1187 tsn = ntohl(sdc->sdh_tsn);
1183 1188 if (sctp->sctp_out_time == 0 && tsn == (sctp->sctp_ltsn - 1)) {
1184 1189 sctp->sctp_out_time = now;
1185 1190 sctp->sctp_rtt_tsn = tsn;
1186 1191 }
1187 1192 if (extra > 0) {
1188 1193 fill = sctp_get_padding(sctp, extra);
1189 1194 if (fill != NULL) {
1190 1195 linkb(head, fill);
1191 1196 pad = extra;
1192 1197 seglen += extra;
1193 1198 } else {
1194 1199 goto unsent_data;
1195 1200 }
1196 1201 }
1197 1202 /*
1198 1203 * Bundle chunks. We linkb() the chunks together to send
1199 1204 * downstream in a single packet.
1200 1205 * Partial chunks MUST NOT be bundled with full chunks, so we
1201 1206 * rely on sctp_get_msg_to_send() to only return messages that
1202 1207 * will fit entirely in the current packet.
1203 1208 */
1204 1209 while (seglen < pathmax) {
1205 1210 int32_t new_len;
1206 1211 int32_t new_xtralen;
1207 1212
1208 1213 while (mp != NULL) {
1209 1214 if (SCTP_CHUNK_CANSEND(mp))
1210 1215 break;
1211 1216 mp = mp->b_next;
1212 1217 }
1213 1218 if (mp == NULL) {
1214 1219 meta = sctp_get_msg_to_send(sctp, &mp,
1215 1220 meta->b_next, &error, seglen,
1216 1221 (seglen - xtralen) >= cansend ? 0 :
1217 1222 cansend - seglen, fp);
1218 1223 if (error != 0)
1219 1224 break;
1220 1225 /* If no more eligible chunks, cease bundling */
1221 1226 if (meta == NULL)
1222 1227 break;
1223 1228 sctp->sctp_xmit_tail = meta;
1224 1229 }
1225 1230 ASSERT(mp != NULL);
1226 1231 if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) &&
1227 1232 fp != SCTP_CHUNK_DEST(meta)) {
1228 1233 break;
1229 1234 }
1230 1235 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1231 1236 chunklen = ntohs(sdc->sdh_len);
1232 1237 if ((extra = chunklen & (SCTP_ALIGN - 1)) != 0)
1233 1238 extra = SCTP_ALIGN - extra;
1234 1239
1235 1240 new_len = seglen + chunklen;
1236 1241 new_xtralen = xtralen + sizeof (*sdc);
1237 1242 chunklen -= sizeof (*sdc);
1238 1243
1239 1244 if (new_len - new_xtralen > cansend ||
1240 1245 new_len + extra > pathmax) {
1241 1246 break;
1242 1247 }
1243 1248 if ((nmp = dupmsg(mp)) == NULL)
1244 1249 break;
1245 1250 if (extra > 0) {
1246 1251 fill = sctp_get_padding(sctp, extra);
1247 1252 if (fill != NULL) {
1248 1253 pad += extra;
1249 1254 new_len += extra;
1250 1255 linkb(nmp, fill);
1251 1256 } else {
1252 1257 freemsg(nmp);
1253 1258 break;
1254 1259 }
1255 1260 }
1256 1261 seglen = new_len;
1257 1262 xtralen = new_xtralen;
1258 1263 SCTP_CHUNK_CLEAR_FLAGS(nmp);
1259 1264 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1260 1265 linkb(head, nmp);
1261 1266 mp = mp->b_next;
1262 1267 }
1263 1268 if ((seglen > fp->sf_pmss) && fp->sf_isv4) {
1264 1269 ipha_t *iph = (ipha_t *)head->b_rptr;
1265 1270
1266 1271 /*
1267 1272 * Path MTU is different from what we thought it would
1268 1273 * be when we created chunks, or IP headers have grown.
1269 1274 * Need to clear the DF bit.
1270 1275 */
1271 1276 iph->ipha_fragment_offset_and_flags = 0;
↓ open down ↓ |
282 lines elided |
↑ open up ↑ |
1272 1277 }
1273 1278 /* xmit segment */
1274 1279 ASSERT(cansend >= seglen - pad - xtralen);
1275 1280 cansend -= (seglen - pad - xtralen);
1276 1281 dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x "
1277 1282 "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n",
1278 1283 seglen - xtralen, ntohl(sdc->sdh_tsn),
1279 1284 ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd,
1280 1285 cansend, sctp->sctp_lastack_rxd));
1281 1286 sctp_set_iplen(sctp, head, fp->sf_ixa);
1287 +
1288 + DTRACE_SCTP5(send, mblk_t *, NULL, ip_xmit_attr_t *, fp->sf_ixa,
1289 + void_ip_t *, head->b_rptr, sctp_t *, sctp, sctp_hdr_t *,
1290 + &head->b_rptr[fp->sf_ixa->ixa_ip_hdr_length]);
1291 +
1282 1292 (void) conn_ip_output(head, fp->sf_ixa);
1283 1293 BUMP_LOCAL(sctp->sctp_opkts);
1284 1294 /* arm rto timer (if not set) */
1285 1295 if (!fp->sf_timer_running)
1286 1296 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
1287 1297 notsent = B_FALSE;
1288 1298 }
1289 1299 sctp->sctp_active = now;
1290 1300 return;
1291 1301 unsent_data:
1292 1302 /* arm persist timer (if rto timer not set) */
1293 1303 if (!fp->sf_timer_running)
1294 1304 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
1295 1305 if (head != NULL)
1296 1306 freemsg(head);
1297 1307 }
1298 1308
1299 1309 /*
1300 1310 * The following two functions initialize and destroy the cache
1301 1311 * associated with the sets used for PR-SCTP.
1302 1312 */
1303 1313 void
1304 1314 sctp_ftsn_sets_init(void)
1305 1315 {
1306 1316 sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache",
1307 1317 sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL,
1308 1318 NULL, 0);
1309 1319 }
1310 1320
1311 1321 void
1312 1322 sctp_ftsn_sets_fini(void)
1313 1323 {
1314 1324 kmem_cache_destroy(sctp_kmem_ftsn_set_cache);
1315 1325 }
1316 1326
1317 1327
1318 1328 /* Free PR-SCTP sets */
1319 1329 void
1320 1330 sctp_free_ftsn_set(sctp_ftsn_set_t *s)
1321 1331 {
1322 1332 sctp_ftsn_set_t *p;
1323 1333
1324 1334 while (s != NULL) {
1325 1335 p = s->next;
1326 1336 s->next = NULL;
1327 1337 kmem_cache_free(sctp_kmem_ftsn_set_cache, s);
1328 1338 s = p;
1329 1339 }
1330 1340 }
1331 1341
1332 1342 /*
1333 1343 * Given a message meta block, meta, this routine creates or modifies
1334 1344 * the set that will be used to generate a Forward TSN chunk. If the
1335 1345 * entry for stream id, sid, for this message already exists, the
1336 1346 * sequence number, ssn, is updated if it is greater than the existing
1337 1347 * one. If an entry for this sid does not exist, one is created if
1338 1348 * the size does not exceed fp->sf_pmss. We return false in case
1339 1349 * or an error.
1340 1350 */
1341 1351 boolean_t
1342 1352 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta,
1343 1353 uint_t *nsets, uint32_t *slen)
1344 1354 {
1345 1355 sctp_ftsn_set_t *p;
1346 1356 sctp_msg_hdr_t *msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1347 1357 uint16_t sid = htons(msg_hdr->smh_sid);
1348 1358 /* msg_hdr->smh_ssn is already in NBO */
1349 1359 uint16_t ssn = msg_hdr->smh_ssn;
1350 1360
1351 1361 ASSERT(s != NULL && nsets != NULL);
1352 1362 ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL));
1353 1363
1354 1364 if (*s == NULL) {
1355 1365 ASSERT((*slen + sizeof (uint32_t)) <= fp->sf_pmss);
1356 1366 *s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP);
1357 1367 if (*s == NULL)
1358 1368 return (B_FALSE);
1359 1369 (*s)->ftsn_entries.ftsn_sid = sid;
1360 1370 (*s)->ftsn_entries.ftsn_ssn = ssn;
1361 1371 (*s)->next = NULL;
1362 1372 *nsets = 1;
1363 1373 *slen += sizeof (uint32_t);
1364 1374 return (B_TRUE);
1365 1375 }
1366 1376 for (p = *s; p->next != NULL; p = p->next) {
1367 1377 if (p->ftsn_entries.ftsn_sid == sid) {
1368 1378 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1369 1379 p->ftsn_entries.ftsn_ssn = ssn;
1370 1380 return (B_TRUE);
1371 1381 }
1372 1382 }
1373 1383 /* the last one */
1374 1384 if (p->ftsn_entries.ftsn_sid == sid) {
1375 1385 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1376 1386 p->ftsn_entries.ftsn_ssn = ssn;
1377 1387 } else {
1378 1388 if ((*slen + sizeof (uint32_t)) > fp->sf_pmss)
1379 1389 return (B_FALSE);
1380 1390 p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache,
1381 1391 KM_NOSLEEP);
1382 1392 if (p->next == NULL)
1383 1393 return (B_FALSE);
1384 1394 p = p->next;
1385 1395 p->ftsn_entries.ftsn_sid = sid;
1386 1396 p->ftsn_entries.ftsn_ssn = ssn;
1387 1397 p->next = NULL;
1388 1398 (*nsets)++;
1389 1399 *slen += sizeof (uint32_t);
1390 1400 }
1391 1401 return (B_TRUE);
1392 1402 }
1393 1403
1394 1404 /*
1395 1405 * Given a set of stream id - sequence number pairs, this routing creates
1396 1406 * a Forward TSN chunk. The cumulative TSN (advanced peer ack point)
1397 1407 * for the chunk is obtained from sctp->sctp_adv_pap. The caller
1398 1408 * will add the IP/SCTP header.
1399 1409 */
1400 1410 mblk_t *
1401 1411 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets,
1402 1412 uint_t nsets, uint32_t seglen)
1403 1413 {
1404 1414 mblk_t *ftsn_mp;
1405 1415 sctp_chunk_hdr_t *ch_hdr;
1406 1416 uint32_t *advtsn;
1407 1417 uint16_t schlen;
1408 1418 size_t xtralen;
1409 1419 ftsn_entry_t *ftsn_entry;
1410 1420 sctp_stack_t *sctps = sctp->sctp_sctps;
1411 1421
1412 1422 seglen += sizeof (sctp_chunk_hdr_t);
1413 1423 if (fp->sf_isv4)
1414 1424 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra;
1415 1425 else
1416 1426 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra;
1417 1427 ftsn_mp = allocb(xtralen + seglen, BPRI_MED);
1418 1428 if (ftsn_mp == NULL)
1419 1429 return (NULL);
1420 1430 ftsn_mp->b_rptr += xtralen;
1421 1431 ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen;
1422 1432
1423 1433 ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr;
1424 1434 ch_hdr->sch_id = CHUNK_FORWARD_TSN;
1425 1435 ch_hdr->sch_flags = 0;
1426 1436 /*
1427 1437 * The cast here should not be an issue since seglen is
1428 1438 * the length of the Forward TSN chunk.
1429 1439 */
1430 1440 schlen = (uint16_t)seglen;
1431 1441 U16_TO_ABE16(schlen, &(ch_hdr->sch_len));
1432 1442
1433 1443 advtsn = (uint32_t *)(ch_hdr + 1);
1434 1444 U32_TO_ABE32(sctp->sctp_adv_pap, advtsn);
1435 1445 ftsn_entry = (ftsn_entry_t *)(advtsn + 1);
1436 1446 while (nsets > 0) {
1437 1447 ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr);
1438 1448 ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid;
1439 1449 ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn;
1440 1450 ftsn_entry++;
1441 1451 sets = sets->next;
1442 1452 nsets--;
1443 1453 }
1444 1454 return (ftsn_mp);
1445 1455 }
1446 1456
1447 1457 /*
1448 1458 * Given a starting message, the routine steps through all the
1449 1459 * messages whose TSN is less than sctp->sctp_adv_pap and creates
1450 1460 * ftsn sets. The ftsn sets is then used to create an Forward TSN
1451 1461 * chunk. All the messages, that have chunks that are included in the
1452 1462 * ftsn sets, are flagged abandonded. If a message is partially sent
1453 1463 * and is deemed abandoned, all remaining unsent chunks are marked
1454 1464 * abandoned and are deducted from sctp_unsent.
1455 1465 */
1456 1466 void
1457 1467 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp,
1458 1468 sctp_faddr_t *fp, uint32_t *seglen)
1459 1469 {
1460 1470 mblk_t *mp1 = mp;
1461 1471 mblk_t *mp_head = mp;
1462 1472 mblk_t *meta_head = meta;
1463 1473 mblk_t *head;
1464 1474 sctp_ftsn_set_t *sets = NULL;
1465 1475 uint_t nsets = 0;
1466 1476 uint16_t clen;
1467 1477 sctp_data_hdr_t *sdc;
1468 1478 uint32_t sacklen;
1469 1479 uint32_t adv_pap = sctp->sctp_adv_pap;
1470 1480 uint32_t unsent = 0;
1471 1481 boolean_t ubit;
1472 1482 sctp_stack_t *sctps = sctp->sctp_sctps;
1473 1483
1474 1484 *seglen = sizeof (uint32_t);
1475 1485
1476 1486 sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1477 1487 while (meta != NULL &&
1478 1488 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1479 1489 /*
1480 1490 * Skip adding FTSN sets for un-ordered messages as they do
1481 1491 * not have SSNs.
1482 1492 */
1483 1493 ubit = SCTP_DATA_GET_UBIT(sdc);
1484 1494 if (!ubit &&
1485 1495 !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) {
1486 1496 meta = NULL;
1487 1497 sctp->sctp_adv_pap = adv_pap;
1488 1498 goto ftsn_done;
1489 1499 }
1490 1500 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1491 1501 sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1492 1502 adv_pap = ntohl(sdc->sdh_tsn);
1493 1503 mp1 = mp1->b_next;
1494 1504 }
1495 1505 meta = meta->b_next;
1496 1506 if (meta != NULL) {
1497 1507 mp1 = meta->b_cont;
1498 1508 if (!SCTP_CHUNK_ISSENT(mp1))
1499 1509 break;
1500 1510 sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1501 1511 }
1502 1512 }
1503 1513 ftsn_done:
1504 1514 /*
1505 1515 * Can't compare with sets == NULL, since we don't add any
1506 1516 * sets for un-ordered messages.
1507 1517 */
1508 1518 if (meta == meta_head)
1509 1519 return;
1510 1520 *nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen);
1511 1521 sctp_free_ftsn_set(sets);
1512 1522 if (*nmp == NULL)
1513 1523 return;
1514 1524 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1515 1525 sacklen = 0;
1516 1526 } else {
1517 1527 sacklen = sizeof (sctp_chunk_hdr_t) +
1518 1528 sizeof (sctp_sack_chunk_t) +
1519 1529 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1520 1530 if (*seglen + sacklen > sctp->sctp_lastdata->sf_pmss) {
1521 1531 /* piggybacked SACK doesn't fit */
1522 1532 sacklen = 0;
1523 1533 } else {
1524 1534 fp = sctp->sctp_lastdata;
1525 1535 }
1526 1536 }
1527 1537 head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL);
1528 1538 if (head == NULL) {
1529 1539 freemsg(*nmp);
1530 1540 *nmp = NULL;
1531 1541 SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
1532 1542 return;
1533 1543 }
1534 1544 *seglen += sacklen;
1535 1545 *nmp = head;
1536 1546
1537 1547 /*
1538 1548 * XXXNeed to optimise this, the reason it is done here is so
1539 1549 * that we don't have to undo in case of failure.
1540 1550 */
1541 1551 mp1 = mp_head;
1542 1552 sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1543 1553 while (meta_head != NULL &&
1544 1554 SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1545 1555 if (!SCTP_IS_MSG_ABANDONED(meta_head))
1546 1556 SCTP_MSG_SET_ABANDONED(meta_head);
1547 1557 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1548 1558 sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1549 1559 if (!SCTP_CHUNK_ISACKED(mp1)) {
1550 1560 clen = ntohs(sdc->sdh_len) - sizeof (*sdc);
1551 1561 SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen,
1552 1562 meta_head);
1553 1563 }
1554 1564 mp1 = mp1->b_next;
1555 1565 }
1556 1566 while (mp1 != NULL) {
1557 1567 sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1558 1568 if (!SCTP_CHUNK_ABANDONED(mp1)) {
1559 1569 ASSERT(!SCTP_CHUNK_ISSENT(mp1));
1560 1570 unsent += ntohs(sdc->sdh_len) - sizeof (*sdc);
1561 1571 SCTP_ABANDON_CHUNK(mp1);
1562 1572 }
1563 1573 mp1 = mp1->b_next;
1564 1574 }
1565 1575 meta_head = meta_head->b_next;
1566 1576 if (meta_head != NULL) {
1567 1577 mp1 = meta_head->b_cont;
1568 1578 if (!SCTP_CHUNK_ISSENT(mp1))
1569 1579 break;
1570 1580 sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1571 1581 }
1572 1582 }
1573 1583 if (unsent > 0) {
1574 1584 ASSERT(sctp->sctp_unsent >= unsent);
1575 1585 sctp->sctp_unsent -= unsent;
1576 1586 /*
1577 1587 * Update ULP the amount of queued data, which is
1578 1588 * sent-unack'ed + unsent.
1579 1589 */
1580 1590 if (!SCTP_IS_DETACHED(sctp))
1581 1591 SCTP_TXQ_UPDATE(sctp);
1582 1592 }
1583 1593 }
1584 1594
1585 1595 /*
1586 1596 * This function steps through messages starting at meta and checks if
1587 1597 * the message is abandoned. It stops when it hits an unsent chunk or
1588 1598 * a message that has all its chunk acked. This is the only place
1589 1599 * where the sctp_adv_pap is moved forward to indicated abandoned
1590 1600 * messages.
1591 1601 */
1592 1602 void
1593 1603 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp)
1594 1604 {
1595 1605 uint32_t tsn = sctp->sctp_adv_pap;
1596 1606 sctp_data_hdr_t *sdc;
1597 1607 sctp_msg_hdr_t *msg_hdr;
1598 1608
1599 1609 ASSERT(mp != NULL);
1600 1610 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1601 1611 ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd));
1602 1612 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1603 1613 if (!SCTP_IS_MSG_ABANDONED(meta) &&
1604 1614 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1605 1615 return;
1606 1616 }
1607 1617 while (meta != NULL) {
1608 1618 while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) {
1609 1619 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1610 1620 tsn = ntohl(sdc->sdh_tsn);
1611 1621 mp = mp->b_next;
1612 1622 }
1613 1623 if (mp != NULL)
1614 1624 break;
1615 1625 /*
1616 1626 * We continue checking for successive messages only if there
1617 1627 * is a chunk marked for retransmission. Else, we might
1618 1628 * end up sending FTSN prematurely for chunks that have been
1619 1629 * sent, but not yet acked.
1620 1630 */
1621 1631 if ((meta = meta->b_next) != NULL) {
1622 1632 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1623 1633 if (!SCTP_IS_MSG_ABANDONED(meta) &&
1624 1634 !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1625 1635 break;
1626 1636 }
1627 1637 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1628 1638 if (!SCTP_CHUNK_ISSENT(mp)) {
1629 1639 sctp->sctp_adv_pap = tsn;
1630 1640 return;
1631 1641 }
1632 1642 if (SCTP_CHUNK_WANT_REXMIT(mp))
1633 1643 break;
1634 1644 }
1635 1645 if (mp == NULL)
1636 1646 break;
1637 1647 }
1638 1648 }
1639 1649 sctp->sctp_adv_pap = tsn;
1640 1650 }
1641 1651
1642 1652
1643 1653 /*
1644 1654 * Determine if we should bundle a data chunk with the chunk being
1645 1655 * retransmitted. We bundle if
1646 1656 *
1647 1657 * - the chunk is sent to the same destination and unack'ed.
1648 1658 *
1649 1659 * OR
1650 1660 *
1651 1661 * - the chunk is unsent, i.e. new data.
1652 1662 */
1653 1663 #define SCTP_CHUNK_RX_CANBUNDLE(mp, fp) \
1654 1664 (!SCTP_CHUNK_ABANDONED((mp)) && \
1655 1665 ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) && \
1656 1666 !SCTP_CHUNK_ISACKED(mp))) || \
1657 1667 (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \
1658 1668 SCTP_CHUNK_FLAG_SENT)))
1659 1669
1660 1670 /*
1661 1671 * Retransmit first segment which hasn't been acked with cumtsn or send
1662 1672 * a Forward TSN chunk, if appropriate.
1663 1673 */
1664 1674 void
1665 1675 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp)
1666 1676 {
1667 1677 mblk_t *mp;
1668 1678 mblk_t *nmp = NULL;
1669 1679 mblk_t *head;
1670 1680 mblk_t *meta = sctp->sctp_xmit_head;
1671 1681 mblk_t *fill;
1672 1682 uint32_t seglen = 0;
1673 1683 uint32_t sacklen;
1674 1684 uint16_t chunklen;
1675 1685 int extra;
1676 1686 sctp_data_hdr_t *sdc;
1677 1687 sctp_faddr_t *fp;
1678 1688 uint32_t adv_pap = sctp->sctp_adv_pap;
1679 1689 boolean_t do_ftsn = B_FALSE;
1680 1690 boolean_t ftsn_check = B_TRUE;
1681 1691 uint32_t first_ua_tsn;
1682 1692 sctp_msg_hdr_t *mhdr;
1683 1693 sctp_stack_t *sctps = sctp->sctp_sctps;
1684 1694 int error;
1685 1695
1686 1696 while (meta != NULL) {
1687 1697 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1688 1698 uint32_t tsn;
1689 1699
1690 1700 if (!SCTP_CHUNK_ISSENT(mp))
1691 1701 goto window_probe;
1692 1702 /*
1693 1703 * We break in the following cases -
1694 1704 *
1695 1705 * if the advanced peer ack point includes the next
1696 1706 * chunk to be retransmited - possibly the Forward
1697 1707 * TSN was lost.
1698 1708 *
1699 1709 * if we are PRSCTP aware and the next chunk to be
1700 1710 * retransmitted is now abandoned
1701 1711 *
1702 1712 * if the next chunk to be retransmitted is for
1703 1713 * the dest on which the timer went off. (this
1704 1714 * message is not abandoned).
1705 1715 *
1706 1716 * We check for Forward TSN only for the first
1707 1717 * eligible chunk to be retransmitted. The reason
1708 1718 * being if the first eligible chunk is skipped (say
1709 1719 * it was sent to a destination other than oldfp)
1710 1720 * then we cannot advance the cum TSN via Forward
1711 1721 * TSN chunk.
1712 1722 *
1713 1723 * Also, ftsn_check is B_TRUE only for the first
1714 1724 * eligible chunk, it will be B_FALSE for all
1715 1725 * subsequent candidate messages for retransmission.
1716 1726 */
1717 1727 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1718 1728 tsn = ntohl(sdc->sdh_tsn);
1719 1729 if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) {
1720 1730 if (sctp->sctp_prsctp_aware && ftsn_check) {
1721 1731 if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) {
1722 1732 ASSERT(sctp->sctp_prsctp_aware);
1723 1733 do_ftsn = B_TRUE;
1724 1734 goto out;
1725 1735 } else {
1726 1736 sctp_check_adv_ack_pt(sctp,
1727 1737 meta, mp);
1728 1738 if (SEQ_GT(sctp->sctp_adv_pap,
1729 1739 adv_pap)) {
1730 1740 do_ftsn = B_TRUE;
1731 1741 goto out;
1732 1742 }
1733 1743 }
1734 1744 ftsn_check = B_FALSE;
1735 1745 }
1736 1746 if (SCTP_CHUNK_DEST(mp) == oldfp)
1737 1747 goto out;
1738 1748 }
1739 1749 }
1740 1750 meta = meta->b_next;
1741 1751 if (meta != NULL && sctp->sctp_prsctp_aware) {
1742 1752 mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1743 1753
1744 1754 while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) ||
1745 1755 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) {
1746 1756 meta = meta->b_next;
1747 1757 }
1748 1758 }
1749 1759 }
1750 1760 window_probe:
1751 1761 /*
1752 1762 * Retransmit fired for a destination which didn't have
1753 1763 * any unacked data pending.
1754 1764 */
1755 1765 if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) {
1756 1766 /*
1757 1767 * Send a window probe. Inflate frwnd to allow
1758 1768 * sending one segment.
1759 1769 */
1760 1770 if (sctp->sctp_frwnd < (oldfp->sf_pmss - sizeof (*sdc)))
1761 1771 sctp->sctp_frwnd = oldfp->sf_pmss - sizeof (*sdc);
1762 1772
1763 1773 /* next TSN to send */
1764 1774 sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
1765 1775
1766 1776 /*
1767 1777 * The above sctp_frwnd adjustment is coarse. The "changed"
1768 1778 * sctp_frwnd may allow us to send more than 1 packet. So
1769 1779 * tell sctp_output() to send only 1 packet.
1770 1780 */
1771 1781 sctp_output(sctp, 1);
1772 1782
1773 1783 /* Last sent TSN */
1774 1784 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
1775 1785 ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn);
1776 1786 sctp->sctp_zero_win_probe = B_TRUE;
1777 1787 SCTPS_BUMP_MIB(sctps, sctpOutWinProbe);
1778 1788 }
1779 1789 return;
1780 1790 out:
1781 1791 /*
1782 1792 * After a time out, assume that everything has left the network. So
1783 1793 * we can clear rxt_unacked for the original peer address.
1784 1794 */
1785 1795 oldfp->sf_rxt_unacked = 0;
1786 1796
1787 1797 /*
1788 1798 * If we were probing for zero window, don't adjust retransmission
1789 1799 * variables, but the timer is still backed off.
1790 1800 */
1791 1801 if (sctp->sctp_zero_win_probe) {
1792 1802 mblk_t *pkt;
1793 1803 uint_t pkt_len;
1794 1804
1795 1805 /*
↓ open down ↓ |
504 lines elided |
↑ open up ↑ |
1796 1806 * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn
1797 1807 * and sctp_rxt_maxtsn will specify the ZWP packet.
1798 1808 */
1799 1809 fp = oldfp;
1800 1810 if (oldfp->sf_state != SCTP_FADDRS_ALIVE)
1801 1811 fp = sctp_rotate_faddr(sctp, oldfp);
1802 1812 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
1803 1813 if (pkt != NULL) {
1804 1814 ASSERT(pkt_len <= fp->sf_pmss);
1805 1815 sctp_set_iplen(sctp, pkt, fp->sf_ixa);
1816 +
1817 + DTRACE_SCTP5(send, mblk_t *, NULL,
1818 + ip_xmit_attr_t *, fp->sf_ixa,
1819 + void_ip_t *, mp->b_rptr, sctp_t *, sctp, sctp_hdr_t *,
1820 + &mp->b_rptr[fp->sf_ixa->ixa_ip_hdr_length]);
1821 +
1806 1822 (void) conn_ip_output(pkt, fp->sf_ixa);
1807 1823 BUMP_LOCAL(sctp->sctp_opkts);
1808 1824 } else {
1809 1825 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
1810 1826 }
1811 1827
1812 1828 /*
1813 1829 * The strikes will be clear by sctp_faddr_alive() when the
1814 1830 * other side sends us an ack.
1815 1831 */
1816 1832 oldfp->sf_strikes++;
1817 1833 sctp->sctp_strikes++;
1818 1834
1819 1835 SCTP_CALC_RXT(sctp, oldfp, sctp->sctp_rto_max);
1820 1836 if (oldfp != fp && oldfp->sf_suna != 0)
1821 1837 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->sf_rto);
1822 1838 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
1823 1839 SCTPS_BUMP_MIB(sctps, sctpOutWinProbe);
1824 1840 return;
1825 1841 }
1826 1842
1827 1843 /*
1828 1844 * Enter slowstart for this destination
1829 1845 */
1830 1846 oldfp->sf_ssthresh = oldfp->sf_cwnd / 2;
1831 1847 if (oldfp->sf_ssthresh < 2 * oldfp->sf_pmss)
1832 1848 oldfp->sf_ssthresh = 2 * oldfp->sf_pmss;
1833 1849 oldfp->sf_cwnd = oldfp->sf_pmss;
1834 1850 oldfp->sf_pba = 0;
1835 1851 fp = sctp_rotate_faddr(sctp, oldfp);
1836 1852 ASSERT(fp != NULL);
1837 1853 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1838 1854
1839 1855 first_ua_tsn = ntohl(sdc->sdh_tsn);
1840 1856 if (do_ftsn) {
1841 1857 sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
1842 1858 if (nmp == NULL) {
1843 1859 sctp->sctp_adv_pap = adv_pap;
1844 1860 goto restart_timer;
1845 1861 }
1846 1862 head = nmp;
1847 1863 /*
1848 1864 * Move to the next unabandoned chunk. XXXCheck if meta will
1849 1865 * always be marked abandoned.
1850 1866 */
1851 1867 while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta))
1852 1868 meta = meta->b_next;
1853 1869 if (meta != NULL)
1854 1870 mp = mp->b_cont;
1855 1871 else
1856 1872 mp = NULL;
1857 1873 goto try_bundle;
1858 1874 }
1859 1875 seglen = ntohs(sdc->sdh_len);
1860 1876 chunklen = seglen - sizeof (*sdc);
1861 1877 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1862 1878 extra = SCTP_ALIGN - extra;
1863 1879
1864 1880 /* Find out if we need to piggyback SACK. */
1865 1881 if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1866 1882 sacklen = 0;
1867 1883 } else {
1868 1884 sacklen = sizeof (sctp_chunk_hdr_t) +
1869 1885 sizeof (sctp_sack_chunk_t) +
1870 1886 (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1871 1887 if (seglen + sacklen > sctp->sctp_lastdata->sf_pmss) {
1872 1888 /* piggybacked SACK doesn't fit */
1873 1889 sacklen = 0;
1874 1890 } else {
1875 1891 /*
1876 1892 * OK, we have room to send SACK back. But we
1877 1893 * should send it back to the last fp where we
1878 1894 * receive data from, unless sctp_lastdata equals
1879 1895 * oldfp, then we should probably not send it
1880 1896 * back to that fp. Also we should check that
1881 1897 * the fp is alive.
1882 1898 */
1883 1899 if (sctp->sctp_lastdata != oldfp &&
1884 1900 sctp->sctp_lastdata->sf_state ==
1885 1901 SCTP_FADDRS_ALIVE) {
1886 1902 fp = sctp->sctp_lastdata;
1887 1903 }
1888 1904 }
1889 1905 }
1890 1906
1891 1907 /*
1892 1908 * Cancel RTT measurement if the retransmitted TSN is before the
1893 1909 * TSN used for timimg.
1894 1910 */
1895 1911 if (sctp->sctp_out_time != 0 &&
1896 1912 SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) {
1897 1913 sctp->sctp_out_time = 0;
1898 1914 }
1899 1915 /* Clear the counter as the RTT calculation may be off. */
1900 1916 fp->sf_rtt_updates = 0;
1901 1917 oldfp->sf_rtt_updates = 0;
1902 1918
1903 1919 /*
1904 1920 * After a timeout, we should change the current faddr so that
1905 1921 * new chunks will be sent to the alternate address.
1906 1922 */
1907 1923 sctp_set_faddr_current(sctp, fp);
1908 1924
1909 1925 nmp = dupmsg(mp);
1910 1926 if (nmp == NULL)
1911 1927 goto restart_timer;
1912 1928 if (extra > 0) {
1913 1929 fill = sctp_get_padding(sctp, extra);
1914 1930 if (fill != NULL) {
1915 1931 linkb(nmp, fill);
1916 1932 seglen += extra;
1917 1933 } else {
1918 1934 freemsg(nmp);
1919 1935 goto restart_timer;
1920 1936 }
1921 1937 }
1922 1938 SCTP_CHUNK_CLEAR_FLAGS(nmp);
1923 1939 head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL);
1924 1940 if (head == NULL) {
1925 1941 freemsg(nmp);
1926 1942 SCTP_KSTAT(sctps, sctp_rexmit_failed);
1927 1943 goto restart_timer;
1928 1944 }
1929 1945 seglen += sacklen;
1930 1946
1931 1947 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1932 1948
1933 1949 mp = mp->b_next;
1934 1950
1935 1951 try_bundle:
1936 1952 /* We can at least and at most send 1 packet at timeout. */
1937 1953 while (seglen < fp->sf_pmss) {
1938 1954 int32_t new_len;
1939 1955
1940 1956 /* Go through the list to find more chunks to be bundled. */
1941 1957 while (mp != NULL) {
1942 1958 /* Check if the chunk can be bundled. */
1943 1959 if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp))
1944 1960 break;
1945 1961 mp = mp->b_next;
1946 1962 }
1947 1963 /* Go to the next message. */
1948 1964 if (mp == NULL) {
1949 1965 for (meta = meta->b_next; meta != NULL;
1950 1966 meta = meta->b_next) {
1951 1967 mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1952 1968
1953 1969 if (SCTP_IS_MSG_ABANDONED(meta) ||
1954 1970 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr,
1955 1971 sctp)) {
1956 1972 continue;
1957 1973 }
1958 1974
1959 1975 mp = meta->b_cont;
1960 1976 goto try_bundle;
1961 1977 }
1962 1978 /*
1963 1979 * Check if there is a new message which potentially
1964 1980 * could be bundled with this retransmission.
1965 1981 */
1966 1982 meta = sctp_get_msg_to_send(sctp, &mp, NULL, &error,
1967 1983 seglen, fp->sf_pmss - seglen, NULL);
1968 1984 if (error != 0 || meta == NULL) {
1969 1985 /* No more chunk to be bundled. */
1970 1986 break;
1971 1987 } else {
1972 1988 goto try_bundle;
1973 1989 }
1974 1990 }
1975 1991
1976 1992 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1977 1993 new_len = ntohs(sdc->sdh_len);
1978 1994 chunklen = new_len - sizeof (*sdc);
1979 1995
1980 1996 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
1981 1997 extra = SCTP_ALIGN - extra;
1982 1998 if ((new_len = seglen + new_len + extra) > fp->sf_pmss)
1983 1999 break;
1984 2000 if ((nmp = dupmsg(mp)) == NULL)
1985 2001 break;
1986 2002
1987 2003 if (extra > 0) {
1988 2004 fill = sctp_get_padding(sctp, extra);
1989 2005 if (fill != NULL) {
1990 2006 linkb(nmp, fill);
1991 2007 } else {
1992 2008 freemsg(nmp);
1993 2009 break;
1994 2010 }
1995 2011 }
1996 2012 linkb(head, nmp);
1997 2013
1998 2014 SCTP_CHUNK_CLEAR_FLAGS(nmp);
1999 2015 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
2000 2016
2001 2017 seglen = new_len;
2002 2018 mp = mp->b_next;
2003 2019 }
2004 2020 done_bundle:
2005 2021 if ((seglen > fp->sf_pmss) && fp->sf_isv4) {
2006 2022 ipha_t *iph = (ipha_t *)head->b_rptr;
2007 2023
2008 2024 /*
2009 2025 * Path MTU is different from path we thought it would
2010 2026 * be when we created chunks, or IP headers have grown.
2011 2027 * Need to clear the DF bit.
2012 2028 */
2013 2029 iph->ipha_fragment_offset_and_flags = 0;
2014 2030 }
2015 2031 fp->sf_rxt_unacked += seglen;
↓ open down ↓ |
200 lines elided |
↑ open up ↑ |
2016 2032
2017 2033 dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x "
2018 2034 "ssn %d to %p (rwnd %d, lastack_rxd %x)\n",
2019 2035 seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn),
2020 2036 (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd));
2021 2037
2022 2038 sctp->sctp_rexmitting = B_TRUE;
2023 2039 sctp->sctp_rxt_nxttsn = first_ua_tsn;
2024 2040 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
2025 2041 sctp_set_iplen(sctp, head, fp->sf_ixa);
2042 +
2043 + DTRACE_SCTP5(send, mblk_t *, NULL, ip_xmit_attr_t *, fp->sf_ixa,
2044 + void_ip_t *, mp->b_rptr, sctp_t *, sctp, sctp_hdr_t *,
2045 + &mp->b_rptr[fp->sf_ixa->ixa_ip_hdr_length]);
2046 +
2026 2047 (void) conn_ip_output(head, fp->sf_ixa);
2027 2048 BUMP_LOCAL(sctp->sctp_opkts);
2028 2049
2029 2050 /*
2030 2051 * Restart the oldfp timer with exponential backoff and
2031 2052 * the new fp timer for the retransmitted chunks.
2032 2053 */
2033 2054 restart_timer:
2034 2055 oldfp->sf_strikes++;
2035 2056 sctp->sctp_strikes++;
2036 2057 SCTP_CALC_RXT(sctp, oldfp, sctp->sctp_rto_max);
2037 2058 /*
2038 2059 * If there is still some data in the oldfp, restart the
2039 2060 * retransmission timer. If there is no data, the heartbeat will
2040 2061 * continue to run so it will do its job in checking the reachability
2041 2062 * of the oldfp.
2042 2063 */
2043 2064 if (oldfp != fp && oldfp->sf_suna != 0)
2044 2065 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->sf_rto);
2045 2066
2046 2067 /*
2047 2068 * Should we restart the timer of the new fp? If there is
2048 2069 * outstanding data to the new fp, the timer should be
2049 2070 * running already. So restarting it means that the timer
2050 2071 * will fire later for those outstanding data. But if
2051 2072 * we don't restart it, the timer will fire too early for the
2052 2073 * just retransmitted chunks to the new fp. The reason is that we
2053 2074 * don't keep a timestamp on when a chunk is retransmitted.
2054 2075 * So when the timer fires, it will just search for the
2055 2076 * chunk with the earliest TSN sent to new fp. This probably
2056 2077 * is the chunk we just retransmitted. So for now, let's
2057 2078 * be conservative and restart the timer of the new fp.
2058 2079 */
2059 2080 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2060 2081
2061 2082 sctp->sctp_active = ddi_get_lbolt64();
2062 2083 }
2063 2084
2064 2085 /*
2065 2086 * This function is called by sctp_ss_rexmit() to create a packet
2066 2087 * to be retransmitted to the given fp. The given meta and mp
2067 2088 * parameters are respectively the sctp_msg_hdr_t and the mblk of the
2068 2089 * first chunk to be retransmitted. This is also called when we want
2069 2090 * to retransmit a zero window probe from sctp_rexmit() or when we
2070 2091 * want to retransmit the zero window probe after the window has
2071 2092 * opened from sctp_got_sack().
2072 2093 */
2073 2094 mblk_t *
2074 2095 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp,
2075 2096 uint_t *packet_len)
2076 2097 {
2077 2098 uint32_t seglen = 0;
2078 2099 uint16_t chunklen;
2079 2100 int extra;
2080 2101 mblk_t *nmp;
2081 2102 mblk_t *head;
2082 2103 mblk_t *fill;
2083 2104 sctp_data_hdr_t *sdc;
2084 2105 sctp_msg_hdr_t *mhdr;
2085 2106
2086 2107 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2087 2108 seglen = ntohs(sdc->sdh_len);
2088 2109 chunklen = seglen - sizeof (*sdc);
2089 2110 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
2090 2111 extra = SCTP_ALIGN - extra;
2091 2112
2092 2113 nmp = dupmsg(*mp);
2093 2114 if (nmp == NULL)
2094 2115 return (NULL);
2095 2116 if (extra > 0) {
2096 2117 fill = sctp_get_padding(sctp, extra);
2097 2118 if (fill != NULL) {
2098 2119 linkb(nmp, fill);
2099 2120 seglen += extra;
2100 2121 } else {
2101 2122 freemsg(nmp);
2102 2123 return (NULL);
2103 2124 }
2104 2125 }
2105 2126 SCTP_CHUNK_CLEAR_FLAGS(nmp);
2106 2127 head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
2107 2128 if (head == NULL) {
2108 2129 freemsg(nmp);
2109 2130 return (NULL);
2110 2131 }
2111 2132 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2112 2133 /*
2113 2134 * Don't update the TSN if we are doing a Zero Win Probe.
2114 2135 */
2115 2136 if (!sctp->sctp_zero_win_probe)
2116 2137 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2117 2138 *mp = (*mp)->b_next;
2118 2139
2119 2140 try_bundle:
2120 2141 while (seglen < fp->sf_pmss) {
2121 2142 int32_t new_len;
2122 2143
2123 2144 /*
2124 2145 * Go through the list to find more chunks to be bundled.
2125 2146 * We should only retransmit sent by unack'ed chunks. Since
2126 2147 * they were sent before, the peer's receive window should
2127 2148 * be able to receive them.
2128 2149 */
2129 2150 while (*mp != NULL) {
2130 2151 /* Check if the chunk can be bundled. */
2131 2152 if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp))
2132 2153 break;
2133 2154 *mp = (*mp)->b_next;
2134 2155 }
2135 2156 /* Go to the next message. */
2136 2157 if (*mp == NULL) {
2137 2158 for (*meta = (*meta)->b_next; *meta != NULL;
2138 2159 *meta = (*meta)->b_next) {
2139 2160 mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr;
2140 2161
2141 2162 if (SCTP_IS_MSG_ABANDONED(*meta) ||
2142 2163 SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr,
2143 2164 sctp)) {
2144 2165 continue;
2145 2166 }
2146 2167
2147 2168 *mp = (*meta)->b_cont;
2148 2169 goto try_bundle;
2149 2170 }
2150 2171 /* No more chunk to be bundled. */
2151 2172 break;
2152 2173 }
2153 2174
2154 2175 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2155 2176 /* Don't bundle chunks beyond sctp_rxt_maxtsn. */
2156 2177 if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn))
2157 2178 break;
2158 2179 new_len = ntohs(sdc->sdh_len);
2159 2180 chunklen = new_len - sizeof (*sdc);
2160 2181
2161 2182 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
2162 2183 extra = SCTP_ALIGN - extra;
2163 2184 if ((new_len = seglen + new_len + extra) > fp->sf_pmss)
2164 2185 break;
2165 2186 if ((nmp = dupmsg(*mp)) == NULL)
2166 2187 break;
2167 2188
2168 2189 if (extra > 0) {
2169 2190 fill = sctp_get_padding(sctp, extra);
2170 2191 if (fill != NULL) {
2171 2192 linkb(nmp, fill);
2172 2193 } else {
2173 2194 freemsg(nmp);
2174 2195 break;
2175 2196 }
2176 2197 }
2177 2198 linkb(head, nmp);
2178 2199
2179 2200 SCTP_CHUNK_CLEAR_FLAGS(nmp);
2180 2201 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2181 2202 /*
2182 2203 * Don't update the TSN if we are doing a Zero Win Probe.
2183 2204 */
2184 2205 if (!sctp->sctp_zero_win_probe)
2185 2206 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2186 2207
2187 2208 seglen = new_len;
2188 2209 *mp = (*mp)->b_next;
2189 2210 }
2190 2211 *packet_len = seglen;
2191 2212 fp->sf_rxt_unacked += seglen;
2192 2213 return (head);
2193 2214 }
2194 2215
2195 2216 /*
2196 2217 * sctp_ss_rexmit() is called when we get a SACK after a timeout which
2197 2218 * advances the cum_tsn but the cum_tsn is still less than what we have sent
2198 2219 * (sctp_rxt_maxtsn) at the time of the timeout. This SACK is a "partial"
2199 2220 * SACK. We retransmit unacked chunks without having to wait for another
2200 2221 * timeout. The rationale is that the SACK should not be "partial" if all the
2201 2222 * lost chunks have been retransmitted. Since the SACK is "partial,"
2202 2223 * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still
2203 2224 * be missing. It is better for us to retransmit them now instead
2204 2225 * of waiting for a timeout.
2205 2226 */
2206 2227 void
2207 2228 sctp_ss_rexmit(sctp_t *sctp)
2208 2229 {
2209 2230 mblk_t *meta;
2210 2231 mblk_t *mp;
2211 2232 mblk_t *pkt;
2212 2233 sctp_faddr_t *fp;
2213 2234 uint_t pkt_len;
2214 2235 uint32_t tot_wnd;
2215 2236 sctp_data_hdr_t *sdc;
2216 2237 int burst;
2217 2238 sctp_stack_t *sctps = sctp->sctp_sctps;
2218 2239
2219 2240 ASSERT(!sctp->sctp_zero_win_probe);
2220 2241
2221 2242 /*
2222 2243 * If the last cum ack is smaller than what we have just
2223 2244 * retransmitted, simply return.
2224 2245 */
2225 2246 if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn))
2226 2247 sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1;
2227 2248 else
2228 2249 return;
2229 2250 ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn));
2230 2251
2231 2252 /*
2232 2253 * After a timer fires, sctp_current should be set to the new
2233 2254 * fp where the retransmitted chunks are sent.
2234 2255 */
2235 2256 fp = sctp->sctp_current;
2236 2257
2237 2258 /*
2238 2259 * Since we are retransmitting, we only need to use cwnd to determine
2239 2260 * how much we can send as we were allowed (by peer's receive window)
2240 2261 * to send those retransmitted chunks previously when they are first
2241 2262 * sent. If we record how much we have retransmitted but
2242 2263 * unacknowledged using rxt_unacked, then the amount we can now send
2243 2264 * is equal to cwnd minus rxt_unacked.
2244 2265 *
2245 2266 * The field rxt_unacked is incremented when we retransmit a packet
2246 2267 * and decremented when we got a SACK acknowledging something. And
2247 2268 * it is reset when the retransmission timer fires as we assume that
2248 2269 * all packets have left the network after a timeout. If this
2249 2270 * assumption is not true, it means that after a timeout, we can
2250 2271 * get a SACK acknowledging more than rxt_unacked (its value only
2251 2272 * contains what is retransmitted when the timer fires). So
2252 2273 * rxt_unacked will become very big (it is an unsiged int so going
2253 2274 * negative means that the value is huge). This is the reason we
2254 2275 * always send at least 1 MSS bytes.
2255 2276 *
2256 2277 * The reason why we do not have an accurate count is that we
2257 2278 * only know how many packets are outstanding (using the TSN numbers).
2258 2279 * But we do not know how many bytes those packets contain. To
2259 2280 * have an accurate count, we need to walk through the send list.
2260 2281 * As it is not really important to have an accurate count during
2261 2282 * retransmission, we skip this walk to save some time. This should
2262 2283 * not make the retransmission too aggressive to cause congestion.
2263 2284 */
2264 2285 if (fp->sf_cwnd <= fp->sf_rxt_unacked)
2265 2286 tot_wnd = fp->sf_pmss;
2266 2287 else
2267 2288 tot_wnd = fp->sf_cwnd - fp->sf_rxt_unacked;
2268 2289
2269 2290 /* Find the first unack'ed chunk */
2270 2291 for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
2271 2292 sctp_msg_hdr_t *mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
2272 2293
2273 2294 if (SCTP_IS_MSG_ABANDONED(meta) ||
2274 2295 SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) {
2275 2296 continue;
2276 2297 }
2277 2298
2278 2299 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
2279 2300 /* Again, this may not be possible */
2280 2301 if (!SCTP_CHUNK_ISSENT(mp))
2281 2302 return;
2282 2303 sdc = (sctp_data_hdr_t *)mp->b_rptr;
2283 2304 if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn)
2284 2305 goto found_msg;
2285 2306 }
2286 2307 }
2287 2308
2288 2309 /* Everything is abandoned... */
2289 2310 return;
2290 2311
2291 2312 found_msg:
2292 2313 if (!fp->sf_timer_running)
2293 2314 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2294 2315 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
2295 2316 if (pkt == NULL) {
2296 2317 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
2297 2318 return;
2298 2319 }
2299 2320 if ((pkt_len > fp->sf_pmss) && fp->sf_isv4) {
↓ open down ↓ |
264 lines elided |
↑ open up ↑ |
2300 2321 ipha_t *iph = (ipha_t *)pkt->b_rptr;
2301 2322
2302 2323 /*
2303 2324 * Path MTU is different from path we thought it would
2304 2325 * be when we created chunks, or IP headers have grown.
2305 2326 * Need to clear the DF bit.
2306 2327 */
2307 2328 iph->ipha_fragment_offset_and_flags = 0;
2308 2329 }
2309 2330 sctp_set_iplen(sctp, pkt, fp->sf_ixa);
2331 +
2332 + DTRACE_SCTP5(send, mblk_t *, NULL, ip_xmit_attr_t *, fp->sf_ixa,
2333 + void_ip_t *, mp->b_rptr, sctp_t *, sctp, sctp_hdr_t *,
2334 + &mp->b_rptr[fp->sf_ixa->ixa_ip_hdr_length]);
2335 +
2310 2336 (void) conn_ip_output(pkt, fp->sf_ixa);
2311 2337 BUMP_LOCAL(sctp->sctp_opkts);
2312 2338
2313 2339 /* Check and see if there is more chunk to be retransmitted. */
2314 2340 if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sf_pmss ||
2315 2341 meta == NULL)
2316 2342 return;
2317 2343 if (mp == NULL)
2318 2344 meta = meta->b_next;
2319 2345 if (meta == NULL)
2320 2346 return;
2321 2347
2322 2348 /* Retransmit another packet if the window allows. */
2323 2349 for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1;
2324 2350 meta != NULL && burst > 0; meta = meta->b_next, burst--) {
2325 2351 if (mp == NULL)
2326 2352 mp = meta->b_cont;
2327 2353 for (; mp != NULL; mp = mp->b_next) {
2328 2354 /* Again, this may not be possible */
2329 2355 if (!SCTP_CHUNK_ISSENT(mp))
2330 2356 return;
2331 2357 if (!SCTP_CHUNK_ISACKED(mp))
2332 2358 goto found_msg;
2333 2359 }
2334 2360 }
2335 2361 }
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX