1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/systm.h>
  28 #include <sys/stream.h>
  29 #include <sys/cmn_err.h>
  30 #define _SUN_TPI_VERSION 2
  31 #include <sys/tihdr.h>
  32 #include <sys/socket.h>
  33 #include <sys/stropts.h>
  34 #include <sys/strsun.h>
  35 #include <sys/strsubr.h>
  36 #include <sys/socketvar.h>
  37 #include <inet/common.h>
  38 #include <inet/mi.h>
  39 #include <inet/ip.h>
  40 #include <inet/ip_ire.h>
  41 #include <inet/ip6.h>
  42 #include <inet/sctp_ip.h>
  43 #include <inet/ipclassifier.h>
  44 
  45 /*
  46  * PR-SCTP comments.
  47  *
  48  * A message can expire before it gets to the transmit list (i.e. it is still
  49  * in the unsent list - unchunked), after it gets to the transmit list, but
  50  * before transmission has actually started, or after transmission has begun.
  51  * Accordingly, we check for the status of a message in sctp_chunkify() when
  52  * the message is being transferred from the unsent list to the transmit list;
  53  * in sctp_get_msg_to_send(), when we get the next chunk from the transmit
  54  * list and in sctp_rexmit() when we get the next chunk to be (re)transmitted.
  55  * When we nuke a message in sctp_chunkify(), all we need to do is take it
  56  * out of the unsent list and update sctp_unsent; when a message is deemed
  57  * timed-out in sctp_get_msg_to_send() we can just take it out of the transmit
  58  * list, update sctp_unsent IFF transmission for the message has not yet begun
  59  * (i.e. !SCTP_CHUNK_ISSENT(meta->b_cont)). However, if transmission for the
  60  * message has started, then we cannot just take it out of the list, we need
  61  * to send Forward TSN chunk to the peer so that the peer can clear its
  62  * fragment list for this message. However, we cannot just send the Forward
  63  * TSN in sctp_get_msg_to_send() because there might be unacked chunks for
  64  * messages preceeding this abandoned message. So, we send a Forward TSN
  65  * IFF all messages prior to this abandoned message has been SACKd, if not
  66  * we defer sending the Forward TSN to sctp_cumack(), which will check for
  67  * this condition and send the Forward TSN via sctp_check_abandoned_msg(). In
  68  * sctp_rexmit() when we check for retransmissions, we need to determine if
  69  * the advanced peer ack point can be moved ahead, and if so, send a Forward
  70  * TSN to the peer instead of retransmitting the chunk. Note that when
  71  * we send a Forward TSN for a message, there may be yet unsent chunks for
  72  * this message; we need to mark all such chunks as abandoned, so that
  73  * sctp_cumack() can take the message out of the transmit list, additionally
  74  * sctp_unsent need to be adjusted. Whenever sctp_unsent is updated (i.e.
  75  * decremented when a message/chunk is deemed abandoned), sockfs needs to
  76  * be notified so that it can adjust its idea of the queued message.
  77  */
  78 
  79 #include "sctp_impl.h"
  80 
  81 static struct kmem_cache        *sctp_kmem_ftsn_set_cache;
  82 static mblk_t                   *sctp_chunkify(sctp_t *, int, int, int);
  83 
  84 #ifdef  DEBUG
  85 static boolean_t        sctp_verify_chain(mblk_t *, mblk_t *);
  86 #endif
  87 
  88 /*
  89  * Called to allocate a header mblk when sending data to SCTP.
  90  * Data will follow in b_cont of this mblk.
  91  */
  92 mblk_t *
  93 sctp_alloc_hdr(const char *name, int nlen, const char *control, int clen,
  94     int flags)
  95 {
  96         mblk_t *mp;
  97         struct T_unitdata_req *tudr;
  98         size_t size;
  99         int error;
 100 
 101         size = sizeof (*tudr) + _TPI_ALIGN_TOPT(nlen) + clen;
 102         size = MAX(size, sizeof (sctp_msg_hdr_t));
 103         if (flags & SCTP_CAN_BLOCK) {
 104                 mp = allocb_wait(size, BPRI_MED, 0, &error);
 105         } else {
 106                 mp = allocb(size, BPRI_MED);
 107         }
 108         if (mp) {
 109                 tudr = (struct T_unitdata_req *)mp->b_rptr;
 110                 tudr->PRIM_type = T_UNITDATA_REQ;
 111                 tudr->DEST_length = nlen;
 112                 tudr->DEST_offset = sizeof (*tudr);
 113                 tudr->OPT_length = clen;
 114                 tudr->OPT_offset = (t_scalar_t)(sizeof (*tudr) +
 115                     _TPI_ALIGN_TOPT(nlen));
 116                 if (nlen > 0)
 117                         bcopy(name, tudr + 1, nlen);
 118                 if (clen > 0)
 119                         bcopy(control, (char *)tudr + tudr->OPT_offset, clen);
 120                 mp->b_wptr += (tudr ->OPT_offset + clen);
 121                 mp->b_datap->db_type = M_PROTO;
 122         }
 123         return (mp);
 124 }
 125 
 126 /*ARGSUSED2*/
 127 int
 128 sctp_sendmsg(sctp_t *sctp, mblk_t *mp, int flags)
 129 {
 130         sctp_faddr_t    *fp = NULL;
 131         struct T_unitdata_req   *tudr;
 132         int             error = 0;
 133         mblk_t          *mproto = mp;
 134         in6_addr_t      *addr;
 135         in6_addr_t      tmpaddr;
 136         uint16_t        sid = sctp->sctp_def_stream;
 137         uint32_t        ppid = sctp->sctp_def_ppid;
 138         uint32_t        context = sctp->sctp_def_context;
 139         uint16_t        msg_flags = sctp->sctp_def_flags;
 140         sctp_msg_hdr_t  *sctp_msg_hdr;
 141         uint32_t        msg_len = 0;
 142         uint32_t        timetolive = sctp->sctp_def_timetolive;
 143         conn_t          *connp = sctp->sctp_connp;
 144 
 145         ASSERT(DB_TYPE(mproto) == M_PROTO);
 146 
 147         mp = mp->b_cont;
 148         ASSERT(mp == NULL || DB_TYPE(mp) == M_DATA);
 149 
 150         tudr = (struct T_unitdata_req *)mproto->b_rptr;
 151         ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
 152 
 153         /* Get destination address, if specified */
 154         if (tudr->DEST_length > 0) {
 155                 sin_t *sin;
 156                 sin6_t *sin6;
 157 
 158                 sin = (struct sockaddr_in *)
 159                     (mproto->b_rptr + tudr->DEST_offset);
 160                 switch (sin->sin_family) {
 161                 case AF_INET:
 162                         if (tudr->DEST_length < sizeof (*sin)) {
 163                                 return (EINVAL);
 164                         }
 165                         IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &tmpaddr);
 166                         addr = &tmpaddr;
 167                         break;
 168                 case AF_INET6:
 169                         if (tudr->DEST_length < sizeof (*sin6)) {
 170                                 return (EINVAL);
 171                         }
 172                         sin6 = (struct sockaddr_in6 *)
 173                             (mproto->b_rptr + tudr->DEST_offset);
 174                         addr = &sin6->sin6_addr;
 175                         break;
 176                 default:
 177                         return (EAFNOSUPPORT);
 178                 }
 179                 fp = sctp_lookup_faddr(sctp, addr);
 180                 if (fp == NULL) {
 181                         return (EINVAL);
 182                 }
 183         }
 184         /* Ancillary Data? */
 185         if (tudr->OPT_length > 0) {
 186                 struct cmsghdr          *cmsg;
 187                 char                    *cend;
 188                 struct sctp_sndrcvinfo  *sndrcv;
 189 
 190                 cmsg = (struct cmsghdr *)(mproto->b_rptr + tudr->OPT_offset);
 191                 cend = ((char *)cmsg + tudr->OPT_length);
 192                 ASSERT(cend <= (char *)mproto->b_wptr);
 193 
 194                 for (;;) {
 195                         if ((char *)(cmsg + 1) > cend ||
 196                             ((char *)cmsg + cmsg->cmsg_len) > cend) {
 197                                 break;
 198                         }
 199                         if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
 200                             (cmsg->cmsg_type == SCTP_SNDRCV)) {
 201                                 if (cmsg->cmsg_len <
 202                                     (sizeof (*sndrcv) + sizeof (*cmsg))) {
 203                                         return (EINVAL);
 204                                 }
 205                                 sndrcv = (struct sctp_sndrcvinfo *)(cmsg + 1);
 206                                 sid = sndrcv->sinfo_stream;
 207                                 msg_flags = sndrcv->sinfo_flags;
 208                                 ppid = sndrcv->sinfo_ppid;
 209                                 context = sndrcv->sinfo_context;
 210                                 timetolive = sndrcv->sinfo_timetolive;
 211                                 break;
 212                         }
 213                         if (cmsg->cmsg_len > 0)
 214                                 cmsg = CMSG_NEXT(cmsg);
 215                         else
 216                                 break;
 217                 }
 218         }
 219         if (msg_flags & MSG_ABORT) {
 220                 if (mp && mp->b_cont) {
 221                         mblk_t *pump = msgpullup(mp, -1);
 222                         if (!pump) {
 223                                 return (ENOMEM);
 224                         }
 225                         freemsg(mp);
 226                         mp = pump;
 227                         mproto->b_cont = mp;
 228                 }
 229                 RUN_SCTP(sctp);
 230                 sctp_user_abort(sctp, mp);
 231                 freemsg(mproto);
 232                 goto done2;
 233         }
 234         if (mp == NULL)
 235                 goto done;
 236 
 237         RUN_SCTP(sctp);
 238 
 239         /* Reject any new data requests if we are shutting down */
 240         if (sctp->sctp_state > SCTPS_ESTABLISHED ||
 241             (sctp->sctp_connp->conn_state_flags & CONN_CLOSING)) {
 242                 error = EPIPE;
 243                 goto unlock_done;
 244         }
 245 
 246         /* Re-use the mproto to store relevant info. */
 247         ASSERT(MBLKSIZE(mproto) >= sizeof (*sctp_msg_hdr));
 248 
 249         mproto->b_rptr = mproto->b_datap->db_base;
 250         mproto->b_wptr = mproto->b_rptr + sizeof (*sctp_msg_hdr);
 251 
 252         sctp_msg_hdr = (sctp_msg_hdr_t *)mproto->b_rptr;
 253         bzero(sctp_msg_hdr, sizeof (*sctp_msg_hdr));
 254         sctp_msg_hdr->smh_context = context;
 255         sctp_msg_hdr->smh_sid = sid;
 256         sctp_msg_hdr->smh_ppid = ppid;
 257         sctp_msg_hdr->smh_flags = msg_flags;
 258         sctp_msg_hdr->smh_ttl = MSEC_TO_TICK(timetolive);
 259         sctp_msg_hdr->smh_tob = ddi_get_lbolt64();
 260         for (; mp != NULL; mp = mp->b_cont)
 261                 msg_len += MBLKL(mp);
 262         sctp_msg_hdr->smh_msglen = msg_len;
 263 
 264         /* User requested specific destination */
 265         SCTP_SET_CHUNK_DEST(mproto, fp);
 266 
 267         if (sctp->sctp_state >= SCTPS_COOKIE_ECHOED &&
 268             sid >= sctp->sctp_num_ostr) {
 269                 /* Send sendfail event */
 270                 sctp_sendfail_event(sctp, dupmsg(mproto), SCTP_ERR_BAD_SID,
 271                     B_FALSE);
 272                 error = EINVAL;
 273                 goto unlock_done;
 274         }
 275 
 276         /* no data */
 277         if (msg_len == 0) {
 278                 sctp_sendfail_event(sctp, dupmsg(mproto),
 279                     SCTP_ERR_NO_USR_DATA, B_FALSE);
 280                 error = EINVAL;
 281                 goto unlock_done;
 282         }
 283 
 284         /* Add it to the unsent list */
 285         if (sctp->sctp_xmit_unsent == NULL) {
 286                 sctp->sctp_xmit_unsent = sctp->sctp_xmit_unsent_tail = mproto;
 287         } else {
 288                 sctp->sctp_xmit_unsent_tail->b_next = mproto;
 289                 sctp->sctp_xmit_unsent_tail = mproto;
 290         }
 291         sctp->sctp_unsent += msg_len;
 292         BUMP_LOCAL(sctp->sctp_msgcount);
 293         /*
 294          * Notify sockfs if the tx queue is full.
 295          */
 296         if (SCTP_TXQ_LEN(sctp) >= connp->conn_sndbuf) {
 297                 sctp->sctp_txq_full = 1;
 298                 sctp->sctp_ulp_txq_full(sctp->sctp_ulpd, B_TRUE);
 299         }
 300         if (sctp->sctp_state == SCTPS_ESTABLISHED)
 301                 sctp_output(sctp, UINT_MAX);
 302 done2:
 303         WAKE_SCTP(sctp);
 304         return (0);
 305 unlock_done:
 306         WAKE_SCTP(sctp);
 307 done:
 308         return (error);
 309 }
 310 
 311 /*
 312  * While there are messages on sctp_xmit_unsent, detach each one. For each:
 313  * allocate space for the chunk header, fill in the data chunk, and fill in
 314  * the chunk header. Then append it to sctp_xmit_tail.
 315  * Return after appending as many bytes as required (bytes_to_send).
 316  * We also return if we've appended one or more chunks, and find a subsequent
 317  * unsent message is too big to fit in the segment.
 318  */
 319 mblk_t *
 320 sctp_chunkify(sctp_t *sctp, int mss, int firstseg_len, int bytes_to_send)
 321 {
 322         mblk_t                  *mp;
 323         mblk_t                  *chunk_mp;
 324         mblk_t                  *chunk_head;
 325         mblk_t                  *chunk_hdr;
 326         mblk_t                  *chunk_tail = NULL;
 327         int                     count;
 328         int                     chunksize;
 329         sctp_data_hdr_t         *sdc;
 330         mblk_t                  *mdblk = sctp->sctp_xmit_unsent;
 331         sctp_faddr_t            *fp;
 332         sctp_faddr_t            *fp1;
 333         size_t                  xtralen;
 334         sctp_msg_hdr_t          *msg_hdr;
 335         sctp_stack_t            *sctps = sctp->sctp_sctps;
 336         sctp_msg_hdr_t          *next_msg_hdr;
 337         size_t                  nextlen;
 338         int                     remaining_len = mss - firstseg_len;
 339 
 340         ASSERT(remaining_len >= 0);
 341 
 342         fp = SCTP_CHUNK_DEST(mdblk);
 343         if (fp == NULL)
 344                 fp = sctp->sctp_current;
 345         if (fp->sf_isv4)
 346                 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra +
 347                     sizeof (*sdc);
 348         else
 349                 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra +
 350                     sizeof (*sdc);
 351         count = chunksize = remaining_len - sizeof (*sdc);
 352 nextmsg:
 353         next_msg_hdr = (sctp_msg_hdr_t *)sctp->sctp_xmit_unsent->b_rptr;
 354         nextlen = next_msg_hdr->smh_msglen;
 355         /*
 356          * Will the entire next message fit in the current packet ?
 357          * if not, leave it on the unsent list.
 358          */
 359         if ((firstseg_len != 0) && (nextlen > remaining_len))
 360                 return (NULL);
 361 
 362         chunk_mp = mdblk->b_cont;
 363 
 364         /*
 365          * If this partially chunked, we ignore the next one for now and
 366          * use the one already present. For the unchunked bits, we use the
 367          * length of the last chunk.
 368          */
 369         if (SCTP_IS_MSG_CHUNKED(mdblk)) {
 370                 int     chunk_len;
 371 
 372                 ASSERT(chunk_mp->b_next != NULL);
 373                 mdblk->b_cont = chunk_mp->b_next;
 374                 chunk_mp->b_next = NULL;
 375                 SCTP_MSG_CLEAR_CHUNKED(mdblk);
 376                 mp = mdblk->b_cont;
 377                 while (mp->b_next != NULL)
 378                         mp = mp->b_next;
 379                 chunk_len = ntohs(((sctp_data_hdr_t *)mp->b_rptr)->sdh_len);
 380                 if (fp->sf_pmss - chunk_len > sizeof (*sdc))
 381                         count = chunksize = fp->sf_pmss - chunk_len;
 382                 else
 383                         count = chunksize = fp->sf_pmss;
 384                 count = chunksize = count - sizeof (*sdc);
 385         } else {
 386                 msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
 387                 if (SCTP_MSG_TO_BE_ABANDONED(mdblk, msg_hdr, sctp)) {
 388                         sctp->sctp_xmit_unsent = mdblk->b_next;
 389                         if (sctp->sctp_xmit_unsent == NULL)
 390                                 sctp->sctp_xmit_unsent_tail = NULL;
 391                         ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
 392                         sctp->sctp_unsent -= msg_hdr->smh_msglen;
 393                         mdblk->b_next = NULL;
 394                         BUMP_LOCAL(sctp->sctp_prsctpdrop);
 395                         /*
 396                          * Update ULP the amount of queued data, which is
 397                          * sent-unack'ed + unsent.
 398                          */
 399                         if (!SCTP_IS_DETACHED(sctp))
 400                                 SCTP_TXQ_UPDATE(sctp);
 401                         sctp_sendfail_event(sctp, mdblk, 0, B_FALSE);
 402                         goto try_next;
 403                 }
 404                 mdblk->b_cont = NULL;
 405         }
 406         msg_hdr = (sctp_msg_hdr_t *)mdblk->b_rptr;
 407 nextchunk:
 408         chunk_head = chunk_mp;
 409         chunk_tail = NULL;
 410 
 411         /* Skip as many mblk's as we need */
 412         while (chunk_mp != NULL && ((count - MBLKL(chunk_mp)) >= 0)) {
 413                 count -= MBLKL(chunk_mp);
 414                 chunk_tail = chunk_mp;
 415                 chunk_mp = chunk_mp->b_cont;
 416         }
 417         /* Split the chain, if needed */
 418         if (chunk_mp != NULL) {
 419                 if (count > 0) {
 420                         mblk_t  *split_mp = dupb(chunk_mp);
 421 
 422                         if (split_mp == NULL) {
 423                                 if (mdblk->b_cont == NULL) {
 424                                         mdblk->b_cont = chunk_head;
 425                                 } else  {
 426                                         SCTP_MSG_SET_CHUNKED(mdblk);
 427                                         ASSERT(chunk_head->b_next == NULL);
 428                                         chunk_head->b_next = mdblk->b_cont;
 429                                         mdblk->b_cont = chunk_head;
 430                                 }
 431                                 return (sctp->sctp_xmit_tail);
 432                         }
 433                         if (chunk_tail != NULL) {
 434                                 chunk_tail->b_cont = split_mp;
 435                                 chunk_tail = chunk_tail->b_cont;
 436                         } else {
 437                                 chunk_head = chunk_tail = split_mp;
 438                         }
 439                         chunk_tail->b_wptr = chunk_tail->b_rptr + count;
 440                         chunk_mp->b_rptr = chunk_tail->b_wptr;
 441                         count = 0;
 442                 } else if (chunk_tail == NULL) {
 443                         goto next;
 444                 } else {
 445                         chunk_tail->b_cont = NULL;
 446                 }
 447         }
 448         /* Alloc chunk hdr, if needed */
 449         if (DB_REF(chunk_head) > 1 ||
 450             ((intptr_t)chunk_head->b_rptr) & (SCTP_ALIGN - 1) ||
 451             MBLKHEAD(chunk_head) < sizeof (*sdc)) {
 452                 if ((chunk_hdr = allocb(xtralen, BPRI_MED)) == NULL) {
 453                         if (mdblk->b_cont == NULL) {
 454                                 if (chunk_mp != NULL)
 455                                         linkb(chunk_head, chunk_mp);
 456                                 mdblk->b_cont = chunk_head;
 457                         } else {
 458                                 SCTP_MSG_SET_CHUNKED(mdblk);
 459                                 if (chunk_mp != NULL)
 460                                         linkb(chunk_head, chunk_mp);
 461                                 ASSERT(chunk_head->b_next == NULL);
 462                                 chunk_head->b_next = mdblk->b_cont;
 463                                 mdblk->b_cont = chunk_head;
 464                         }
 465                         return (sctp->sctp_xmit_tail);
 466                 }
 467                 chunk_hdr->b_rptr += xtralen - sizeof (*sdc);
 468                 chunk_hdr->b_wptr = chunk_hdr->b_rptr + sizeof (*sdc);
 469                 chunk_hdr->b_cont = chunk_head;
 470         } else {
 471                 chunk_hdr = chunk_head;
 472                 chunk_hdr->b_rptr -= sizeof (*sdc);
 473         }
 474         ASSERT(chunk_hdr->b_datap->db_ref == 1);
 475         sdc = (sctp_data_hdr_t *)chunk_hdr->b_rptr;
 476         sdc->sdh_id = CHUNK_DATA;
 477         sdc->sdh_flags = 0;
 478         sdc->sdh_len = htons(sizeof (*sdc) + chunksize - count);
 479         ASSERT(sdc->sdh_len);
 480         sdc->sdh_sid = htons(msg_hdr->smh_sid);
 481         /*
 482          * We defer assigning the SSN just before sending the chunk, else
 483          * if we drop the chunk in sctp_get_msg_to_send(), we would need
 484          * to send a Forward TSN to let the peer know. Some more comments
 485          * about this in sctp_impl.h for SCTP_CHUNK_SENT.
 486          */
 487         sdc->sdh_payload_id = msg_hdr->smh_ppid;
 488 
 489         if (mdblk->b_cont == NULL) {
 490                 mdblk->b_cont = chunk_hdr;
 491                 SCTP_DATA_SET_BBIT(sdc);
 492         } else {
 493                 mp = mdblk->b_cont;
 494                 while (mp->b_next != NULL)
 495                         mp = mp->b_next;
 496                 mp->b_next = chunk_hdr;
 497         }
 498 
 499         bytes_to_send -= (chunksize - count);
 500         if (chunk_mp != NULL) {
 501 next:
 502                 count = chunksize = fp->sf_pmss - sizeof (*sdc);
 503                 goto nextchunk;
 504         }
 505         SCTP_DATA_SET_EBIT(sdc);
 506         sctp->sctp_xmit_unsent = mdblk->b_next;
 507         if (mdblk->b_next == NULL) {
 508                 sctp->sctp_xmit_unsent_tail = NULL;
 509         }
 510         mdblk->b_next = NULL;
 511 
 512         if (sctp->sctp_xmit_tail == NULL) {
 513                 sctp->sctp_xmit_head = sctp->sctp_xmit_tail = mdblk;
 514         } else {
 515                 mp = sctp->sctp_xmit_tail;
 516                 while (mp->b_next != NULL)
 517                         mp = mp->b_next;
 518                 mp->b_next = mdblk;
 519                 mdblk->b_prev = mp;
 520         }
 521 try_next:
 522         if (bytes_to_send > 0 && sctp->sctp_xmit_unsent != NULL) {
 523                 mdblk = sctp->sctp_xmit_unsent;
 524                 fp1 = SCTP_CHUNK_DEST(mdblk);
 525                 if (fp1 == NULL)
 526                         fp1 = sctp->sctp_current;
 527                 if (fp == fp1) {
 528                         size_t len = MBLKL(mdblk->b_cont);
 529                         if ((count > 0) &&
 530                             ((len > fp->sf_pmss - sizeof (*sdc)) ||
 531                             (len <= count))) {
 532                                 count -= sizeof (*sdc);
 533                                 count = chunksize = count - (count & 0x3);
 534                         } else {
 535                                 count = chunksize = fp->sf_pmss -
 536                                     sizeof (*sdc);
 537                         }
 538                 } else {
 539                         if (fp1->sf_isv4)
 540                                 xtralen = sctp->sctp_hdr_len;
 541                         else
 542                                 xtralen = sctp->sctp_hdr6_len;
 543                         xtralen += sctps->sctps_wroff_xtra + sizeof (*sdc);
 544                         count = chunksize = fp1->sf_pmss - sizeof (*sdc);
 545                         fp = fp1;
 546                 }
 547                 goto nextmsg;
 548         }
 549         return (sctp->sctp_xmit_tail);
 550 }
 551 
 552 void
 553 sctp_free_msg(mblk_t *ump)
 554 {
 555         mblk_t *mp, *nmp;
 556 
 557         for (mp = ump->b_cont; mp; mp = nmp) {
 558                 nmp = mp->b_next;
 559                 mp->b_next = mp->b_prev = NULL;
 560                 freemsg(mp);
 561         }
 562         ASSERT(!ump->b_prev);
 563         ump->b_next = NULL;
 564         freeb(ump);
 565 }
 566 
 567 mblk_t *
 568 sctp_add_proto_hdr(sctp_t *sctp, sctp_faddr_t *fp, mblk_t *mp, int sacklen,
 569     int *error)
 570 {
 571         int hdrlen;
 572         uchar_t *hdr;
 573         int isv4 = fp->sf_isv4;
 574         sctp_stack_t    *sctps = sctp->sctp_sctps;
 575 
 576         if (error != NULL)
 577                 *error = 0;
 578 
 579         if (isv4) {
 580                 hdrlen = sctp->sctp_hdr_len;
 581                 hdr = sctp->sctp_iphc;
 582         } else {
 583                 hdrlen = sctp->sctp_hdr6_len;
 584                 hdr = sctp->sctp_iphc6;
 585         }
 586         /*
 587          * A reject|blackhole could mean that the address is 'down'. Similarly,
 588          * it is possible that the address went down, we tried to send an
 589          * heartbeat and ended up setting fp->sf_saddr as unspec because we
 590          * didn't have any usable source address.  In either case
 591          * sctp_get_dest() will try find an IRE, if available, and set
 592          * the source address, if needed.  If we still don't have any
 593          * usable source address, fp->sf_state will be SCTP_FADDRS_UNREACH and
 594          * we return EHOSTUNREACH.
 595          */
 596         ASSERT(fp->sf_ixa->ixa_ire != NULL);
 597         if ((fp->sf_ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
 598             SCTP_IS_ADDR_UNSPEC(fp->sf_isv4, fp->sf_saddr)) {
 599                 sctp_get_dest(sctp, fp);
 600                 if (fp->sf_state == SCTP_FADDRS_UNREACH) {
 601                         if (error != NULL)
 602                                 *error = EHOSTUNREACH;
 603                         return (NULL);
 604                 }
 605         }
 606         /* Copy in IP header. */
 607         if ((mp->b_rptr - mp->b_datap->db_base) <
 608             (sctps->sctps_wroff_xtra + hdrlen + sacklen) || DB_REF(mp) > 2) {
 609                 mblk_t *nmp;
 610 
 611                 /*
 612                  * This can happen if IP headers are adjusted after
 613                  * data was moved into chunks, or during retransmission,
 614                  * or things like snoop is running.
 615                  */
 616                 nmp = allocb(sctps->sctps_wroff_xtra + hdrlen + sacklen,
 617                     BPRI_MED);
 618                 if (nmp == NULL) {
 619                         if (error !=  NULL)
 620                                 *error = ENOMEM;
 621                         return (NULL);
 622                 }
 623                 nmp->b_rptr += sctps->sctps_wroff_xtra;
 624                 nmp->b_wptr = nmp->b_rptr + hdrlen + sacklen;
 625                 nmp->b_cont = mp;
 626                 mp = nmp;
 627         } else {
 628                 mp->b_rptr -= (hdrlen + sacklen);
 629         }
 630         bcopy(hdr, mp->b_rptr, hdrlen);
 631         if (sacklen) {
 632                 sctp_fill_sack(sctp, mp->b_rptr + hdrlen, sacklen);
 633         }
 634         if (fp != sctp->sctp_current) {
 635                 /* change addresses in header */
 636                 if (isv4) {
 637                         ipha_t *iph = (ipha_t *)mp->b_rptr;
 638 
 639                         IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, iph->ipha_dst);
 640                         if (!IN6_IS_ADDR_V4MAPPED_ANY(&fp->sf_saddr)) {
 641                                 IN6_V4MAPPED_TO_IPADDR(&fp->sf_saddr,
 642                                     iph->ipha_src);
 643                         } else if (sctp->sctp_bound_to_all) {
 644                                 iph->ipha_src = INADDR_ANY;
 645                         }
 646                 } else {
 647                         ip6_t *ip6h = (ip6_t *)mp->b_rptr;
 648 
 649                         ip6h->ip6_dst = fp->sf_faddr;
 650                         if (!IN6_IS_ADDR_UNSPECIFIED(&fp->sf_saddr)) {
 651                                 ip6h->ip6_src = fp->sf_saddr;
 652                         } else if (sctp->sctp_bound_to_all) {
 653                                 ip6h->ip6_src = ipv6_all_zeros;
 654                         }
 655                 }
 656         }
 657         return (mp);
 658 }
 659 
 660 /*
 661  * SCTP requires every chunk to be padded so that the total length
 662  * is a multiple of SCTP_ALIGN.  This function returns a mblk with
 663  * the specified pad length.
 664  */
 665 static mblk_t *
 666 sctp_get_padding(sctp_t *sctp, int pad)
 667 {
 668         mblk_t *fill;
 669 
 670         ASSERT(pad < SCTP_ALIGN);
 671         ASSERT(sctp->sctp_pad_mp != NULL);
 672         if ((fill = dupb(sctp->sctp_pad_mp)) != NULL) {
 673                 fill->b_wptr += pad;
 674                 return (fill);
 675         }
 676 
 677         /*
 678          * The memory saving path of reusing the sctp_pad_mp
 679          * fails may be because it has been dupb() too
 680          * many times (DBLK_REFMAX).  Use the memory consuming
 681          * path of allocating the pad mblk.
 682          */
 683         if ((fill = allocb(SCTP_ALIGN, BPRI_MED)) != NULL) {
 684                 /* Zero it out.  SCTP_ALIGN is sizeof (int32_t) */
 685                 *(int32_t *)fill->b_rptr = 0;
 686                 fill->b_wptr += pad;
 687         }
 688         return (fill);
 689 }
 690 
 691 static mblk_t *
 692 sctp_find_fast_rexmit_mblks(sctp_t *sctp, int *total, sctp_faddr_t **fp)
 693 {
 694         mblk_t          *meta;
 695         mblk_t          *start_mp = NULL;
 696         mblk_t          *end_mp = NULL;
 697         mblk_t          *mp, *nmp;
 698         mblk_t          *fill;
 699         sctp_data_hdr_t *sdh;
 700         int             msglen;
 701         int             extra;
 702         sctp_msg_hdr_t  *msg_hdr;
 703         sctp_faddr_t    *old_fp = NULL;
 704         sctp_faddr_t    *chunk_fp;
 705         sctp_stack_t    *sctps = sctp->sctp_sctps;
 706 
 707         for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
 708                 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
 709                 if (SCTP_IS_MSG_ABANDONED(meta) ||
 710                     SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
 711                         continue;
 712                 }
 713                 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
 714                         if (SCTP_CHUNK_WANT_REXMIT(mp)) {
 715                                 /*
 716                                  * Use the same peer address to do fast
 717                                  * retransmission.  If the original peer
 718                                  * address is dead, switch to the current
 719                                  * one.  Record the old one so that we
 720                                  * will pick the chunks sent to the old
 721                                  * one for fast retransmission.
 722                                  */
 723                                 chunk_fp = SCTP_CHUNK_DEST(mp);
 724                                 if (*fp == NULL) {
 725                                         *fp = chunk_fp;
 726                                         if ((*fp)->sf_state !=
 727                                             SCTP_FADDRS_ALIVE) {
 728                                                 old_fp = *fp;
 729                                                 *fp = sctp->sctp_current;
 730                                         }
 731                                 } else if (old_fp == NULL && *fp != chunk_fp) {
 732                                         continue;
 733                                 } else if (old_fp != NULL &&
 734                                     old_fp != chunk_fp) {
 735                                         continue;
 736                                 }
 737 
 738                                 sdh = (sctp_data_hdr_t *)mp->b_rptr;
 739                                 msglen = ntohs(sdh->sdh_len);
 740                                 if ((extra = msglen & (SCTP_ALIGN - 1)) != 0) {
 741                                         extra = SCTP_ALIGN - extra;
 742                                 }
 743 
 744                                 /*
 745                                  * We still return at least the first message
 746                                  * even if that message cannot fit in as
 747                                  * PMTU may have changed.
 748                                  */
 749                                 if (*total + msglen + extra >
 750                                     (*fp)->sf_pmss && start_mp != NULL) {
 751                                         return (start_mp);
 752                                 }
 753                                 if ((nmp = dupmsg(mp)) == NULL)
 754                                         return (start_mp);
 755                                 if (extra > 0) {
 756                                         fill = sctp_get_padding(sctp, extra);
 757                                         if (fill != NULL) {
 758                                                 linkb(nmp, fill);
 759                                         } else {
 760                                                 return (start_mp);
 761                                         }
 762                                 }
 763                                 SCTPS_BUMP_MIB(sctps, sctpOutFastRetrans);
 764                                 BUMP_LOCAL(sctp->sctp_rxtchunks);
 765                                 SCTP_CHUNK_CLEAR_REXMIT(mp);
 766                                 if (start_mp == NULL) {
 767                                         start_mp = nmp;
 768                                 } else {
 769                                         linkb(end_mp, nmp);
 770                                 }
 771                                 end_mp = nmp;
 772                                 *total += msglen + extra;
 773                                 dprint(2, ("sctp_find_fast_rexmit_mblks: "
 774                                     "tsn %x\n", sdh->sdh_tsn));
 775                         }
 776                 }
 777         }
 778         /* Clear the flag as there is no more message to be fast rexmitted. */
 779         sctp->sctp_chk_fast_rexmit = B_FALSE;
 780         return (start_mp);
 781 }
 782 
 783 /* A debug function just to make sure that a mblk chain is not broken */
 784 #ifdef  DEBUG
 785 static boolean_t
 786 sctp_verify_chain(mblk_t *head, mblk_t *tail)
 787 {
 788         mblk_t  *mp = head;
 789 
 790         if (head == NULL || tail == NULL)
 791                 return (B_TRUE);
 792         while (mp != NULL) {
 793                 if (mp == tail)
 794                         return (B_TRUE);
 795                 mp = mp->b_next;
 796         }
 797         return (B_FALSE);
 798 }
 799 #endif
 800 
 801 /*
 802  * Gets the next unsent chunk to transmit. Messages that are abandoned are
 803  * skipped. A message can be abandoned if it has a non-zero timetolive and
 804  * transmission has not yet started or if it is a partially reliable
 805  * message and its time is up (assuming we are PR-SCTP aware).
 806  * We only return a chunk if it will fit entirely in the current packet.
 807  * 'cansend' is used to determine if need to try and chunkify messages from
 808  * the unsent list, if any, and also as an input to sctp_chunkify() if so.
 809  *
 810  * firstseg_len indicates the space already used, cansend represents remaining
 811  * space in the window, ((sf_pmss - firstseg_len) can therefore reasonably
 812  * be used to compute the cansend arg).
 813  */
 814 mblk_t *
 815 sctp_get_msg_to_send(sctp_t *sctp, mblk_t **mp, mblk_t *meta, int  *error,
 816     int32_t firstseg_len, uint32_t cansend, sctp_faddr_t *fp)
 817 {
 818         mblk_t          *mp1;
 819         sctp_msg_hdr_t  *msg_hdr;
 820         mblk_t          *tmp_meta;
 821         sctp_faddr_t    *fp1;
 822 
 823         ASSERT(error != NULL && mp != NULL);
 824         *error = 0;
 825 
 826         ASSERT(sctp->sctp_current != NULL);
 827 
 828 chunkified:
 829         while (meta != NULL) {
 830                 tmp_meta = meta->b_next;
 831                 msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
 832                 mp1 = meta->b_cont;
 833                 if (SCTP_IS_MSG_ABANDONED(meta))
 834                         goto next_msg;
 835                 if (!SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
 836                         while (mp1 != NULL) {
 837                                 if (SCTP_CHUNK_CANSEND(mp1)) {
 838                                         *mp = mp1;
 839 #ifdef  DEBUG
 840                                         ASSERT(sctp_verify_chain(
 841                                             sctp->sctp_xmit_head, meta));
 842 #endif
 843                                         return (meta);
 844                                 }
 845                                 mp1 = mp1->b_next;
 846                         }
 847                         goto next_msg;
 848                 }
 849                 /*
 850                  * If we come here and the first chunk is sent, then we
 851                  * we are PR-SCTP aware, in which case if the cumulative
 852                  * TSN has moved upto or beyond the first chunk (which
 853                  * means all the previous messages have been cumulative
 854                  * SACK'd), then we send a Forward TSN with the last
 855                  * chunk that was sent in this message. If we can't send
 856                  * a Forward TSN because previous non-abandoned messages
 857                  * have not been acked then we will defer the Forward TSN
 858                  * to sctp_rexmit() or sctp_cumack().
 859                  */
 860                 if (SCTP_CHUNK_ISSENT(mp1)) {
 861                         *error = sctp_check_abandoned_msg(sctp, meta);
 862                         if (*error != 0) {
 863 #ifdef  DEBUG
 864                                 ASSERT(sctp_verify_chain(sctp->sctp_xmit_head,
 865                                     sctp->sctp_xmit_tail));
 866 #endif
 867                                 return (NULL);
 868                         }
 869                         goto next_msg;
 870                 }
 871                 BUMP_LOCAL(sctp->sctp_prsctpdrop);
 872                 ASSERT(sctp->sctp_unsent >= msg_hdr->smh_msglen);
 873                 if (meta->b_prev == NULL) {
 874                         ASSERT(sctp->sctp_xmit_head == meta);
 875                         sctp->sctp_xmit_head = tmp_meta;
 876                         if (sctp->sctp_xmit_tail == meta)
 877                                 sctp->sctp_xmit_tail = tmp_meta;
 878                         meta->b_next = NULL;
 879                         if (tmp_meta != NULL)
 880                                 tmp_meta->b_prev = NULL;
 881                 } else if (meta->b_next == NULL) {
 882                         if (sctp->sctp_xmit_tail == meta)
 883                                 sctp->sctp_xmit_tail = meta->b_prev;
 884                         meta->b_prev->b_next = NULL;
 885                         meta->b_prev = NULL;
 886                 } else {
 887                         meta->b_prev->b_next = tmp_meta;
 888                         tmp_meta->b_prev = meta->b_prev;
 889                         if (sctp->sctp_xmit_tail == meta)
 890                                 sctp->sctp_xmit_tail = tmp_meta;
 891                         meta->b_prev = NULL;
 892                         meta->b_next = NULL;
 893                 }
 894                 sctp->sctp_unsent -= msg_hdr->smh_msglen;
 895                 /*
 896                  * Update ULP the amount of queued data, which is
 897                  * sent-unack'ed + unsent.
 898                  */
 899                 if (!SCTP_IS_DETACHED(sctp))
 900                         SCTP_TXQ_UPDATE(sctp);
 901                 sctp_sendfail_event(sctp, meta, 0, B_TRUE);
 902 next_msg:
 903                 meta = tmp_meta;
 904         }
 905         /* chunkify, if needed */
 906         if (cansend > 0 && sctp->sctp_xmit_unsent != NULL) {
 907                 ASSERT(sctp->sctp_unsent > 0);
 908                 if (fp == NULL) {
 909                         fp = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
 910                         if (fp == NULL || fp->sf_state != SCTP_FADDRS_ALIVE)
 911                                 fp = sctp->sctp_current;
 912                 } else {
 913                         /*
 914                          * If user specified destination, try to honor that.
 915                          */
 916                         fp1 = SCTP_CHUNK_DEST(sctp->sctp_xmit_unsent);
 917                         if (fp1 != NULL && fp1->sf_state == SCTP_FADDRS_ALIVE &&
 918                             fp1 != fp) {
 919                                 goto chunk_done;
 920                         }
 921                 }
 922                 meta = sctp_chunkify(sctp, fp->sf_pmss, firstseg_len, cansend);
 923                 if (meta == NULL)
 924                         goto chunk_done;
 925                 /*
 926                  * sctp_chunkify() won't advance sctp_xmit_tail if it adds
 927                  * new chunk(s) to the tail, so we need to skip the
 928                  * sctp_xmit_tail, which would have already been processed.
 929                  * This could happen when there is unacked chunks, but
 930                  * nothing new to send.
 931                  * When sctp_chunkify() is called when the transmit queue
 932                  * is empty then we need to start from sctp_xmit_tail.
 933                  */
 934                 if (SCTP_CHUNK_ISSENT(sctp->sctp_xmit_tail->b_cont)) {
 935 #ifdef  DEBUG
 936                         mp1 = sctp->sctp_xmit_tail->b_cont;
 937                         while (mp1 != NULL) {
 938                                 ASSERT(!SCTP_CHUNK_CANSEND(mp1));
 939                                 mp1 = mp1->b_next;
 940                         }
 941 #endif
 942                         if ((meta = sctp->sctp_xmit_tail->b_next) == NULL)
 943                                 goto chunk_done;
 944                 }
 945                 goto chunkified;
 946         }
 947 chunk_done:
 948 #ifdef  DEBUG
 949         ASSERT(sctp_verify_chain(sctp->sctp_xmit_head, sctp->sctp_xmit_tail));
 950 #endif
 951         return (NULL);
 952 }
 953 
 954 void
 955 sctp_fast_rexmit(sctp_t *sctp)
 956 {
 957         mblk_t          *mp, *head;
 958         int             pktlen = 0;
 959         sctp_faddr_t    *fp = NULL;
 960         sctp_stack_t    *sctps = sctp->sctp_sctps;
 961 
 962         ASSERT(sctp->sctp_xmit_head != NULL);
 963         mp = sctp_find_fast_rexmit_mblks(sctp, &pktlen, &fp);
 964         if (mp == NULL) {
 965                 SCTP_KSTAT(sctps, sctp_fr_not_found);
 966                 return;
 967         }
 968         if ((head = sctp_add_proto_hdr(sctp, fp, mp, 0, NULL)) == NULL) {
 969                 freemsg(mp);
 970                 SCTP_KSTAT(sctps, sctp_fr_add_hdr);
 971                 return;
 972         }
 973         if ((pktlen > fp->sf_pmss) && fp->sf_isv4) {
 974                 ipha_t *iph = (ipha_t *)head->b_rptr;
 975 
 976                 iph->ipha_fragment_offset_and_flags = 0;
 977         }
 978 
 979         sctp_set_iplen(sctp, head, fp->sf_ixa);
 980 
 981         DTRACE_SCTP5(send, mblk_t *, NULL, ip_xmit_attr_t *, fp->sf_ixa,
 982             void_ip_t *, mp->b_rptr, sctp_t *, sctp, sctp_hdr_t *,
 983             &mp->b_rptr[fp->sf_ixa->ixa_ip_hdr_length]);
 984 
 985         (void) conn_ip_output(head, fp->sf_ixa);
 986         BUMP_LOCAL(sctp->sctp_opkts);
 987         sctp->sctp_active = fp->sf_lastactive = ddi_get_lbolt64();
 988 }
 989 
 990 void
 991 sctp_output(sctp_t *sctp, uint_t num_pkt)
 992 {
 993         mblk_t                  *mp = NULL;
 994         mblk_t                  *nmp;
 995         mblk_t                  *head;
 996         mblk_t                  *meta = sctp->sctp_xmit_tail;
 997         mblk_t                  *fill = NULL;
 998         uint16_t                chunklen;
 999         uint32_t                cansend;
1000         int32_t                 seglen;
1001         int32_t                 xtralen;
1002         int32_t                 sacklen;
1003         int32_t                 pad = 0;
1004         int32_t                 pathmax;
1005         int                     extra;
1006         int64_t                 now = LBOLT_FASTPATH64;
1007         sctp_faddr_t            *fp;
1008         sctp_faddr_t            *lfp;
1009         sctp_data_hdr_t         *sdc;
1010         int                     error;
1011         boolean_t               notsent = B_TRUE;
1012         sctp_stack_t            *sctps = sctp->sctp_sctps;
1013         uint32_t                tsn;
1014 
1015         if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1016                 sacklen = 0;
1017         } else {
1018                 /* send a SACK chunk */
1019                 sacklen = sizeof (sctp_chunk_hdr_t) +
1020                     sizeof (sctp_sack_chunk_t) +
1021                     (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1022                 lfp = sctp->sctp_lastdata;
1023                 ASSERT(lfp != NULL);
1024                 if (lfp->sf_state != SCTP_FADDRS_ALIVE)
1025                         lfp = sctp->sctp_current;
1026         }
1027 
1028         cansend = sctp->sctp_frwnd;
1029         if (sctp->sctp_unsent < cansend)
1030                 cansend = sctp->sctp_unsent;
1031 
1032         /*
1033          * Start persist timer if unable to send or when
1034          * trying to send into a zero window. This timer
1035          * ensures the blocked send attempt is retried.
1036          */
1037         if ((cansend < sctp->sctp_current->sf_pmss / 2) &&
1038             (sctp->sctp_unacked != 0) &&
1039             (sctp->sctp_unacked < sctp->sctp_current->sf_pmss) &&
1040             !sctp->sctp_ndelay ||
1041             (cansend == 0 && sctp->sctp_unacked == 0 &&
1042             sctp->sctp_unsent != 0)) {
1043                 head = NULL;
1044                 fp = sctp->sctp_current;
1045                 goto unsent_data;
1046         }
1047         if (meta != NULL)
1048                 mp = meta->b_cont;
1049         while (cansend > 0 && num_pkt-- != 0) {
1050                 pad = 0;
1051 
1052                 /*
1053                  * Find first segment eligible for transmit.
1054                  */
1055                 while (mp != NULL) {
1056                         if (SCTP_CHUNK_CANSEND(mp))
1057                                 break;
1058                         mp = mp->b_next;
1059                 }
1060                 if (mp == NULL) {
1061                         meta = sctp_get_msg_to_send(sctp, &mp,
1062                             meta == NULL ? NULL : meta->b_next, &error, sacklen,
1063                             cansend, NULL);
1064                         if (error != 0 || meta == NULL) {
1065                                 head = NULL;
1066                                 fp = sctp->sctp_current;
1067                                 goto unsent_data;
1068                         }
1069                         sctp->sctp_xmit_tail =  meta;
1070                 }
1071 
1072                 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1073                 seglen = ntohs(sdc->sdh_len);
1074                 xtralen = sizeof (*sdc);
1075                 chunklen = seglen - xtralen;
1076 
1077                 /*
1078                  * Check rwnd.
1079                  */
1080                 if (chunklen > cansend) {
1081                         head = NULL;
1082                         fp = SCTP_CHUNK_DEST(meta);
1083                         if (fp == NULL || fp->sf_state != SCTP_FADDRS_ALIVE)
1084                                 fp = sctp->sctp_current;
1085                         goto unsent_data;
1086                 }
1087                 if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1088                         extra = SCTP_ALIGN - extra;
1089 
1090                 /*
1091                  * Pick destination address, and check cwnd.
1092                  */
1093                 if (sacklen > 0 && (seglen + extra <= lfp->sf_cwnd -
1094                     lfp->sf_suna) &&
1095                     (seglen + sacklen + extra <= lfp->sf_pmss)) {
1096                         /*
1097                          * Only include SACK chunk if it can be bundled
1098                          * with a data chunk, and sent to sctp_lastdata.
1099                          */
1100                         pathmax = lfp->sf_cwnd - lfp->sf_suna;
1101 
1102                         fp = lfp;
1103                         if ((nmp = dupmsg(mp)) == NULL) {
1104                                 head = NULL;
1105                                 goto unsent_data;
1106                         }
1107                         SCTP_CHUNK_CLEAR_FLAGS(nmp);
1108                         head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen,
1109                             &error);
1110                         if (head == NULL) {
1111                                 /*
1112                                  * If none of the source addresses are
1113                                  * available (i.e error == EHOSTUNREACH),
1114                                  * pretend we have sent the data. We will
1115                                  * eventually time out trying to retramsmit
1116                                  * the data if the interface never comes up.
1117                                  * If we have already sent some stuff (i.e.,
1118                                  * notsent is B_FALSE) then we are fine, else
1119                                  * just mark this packet as sent.
1120                                  */
1121                                 if (notsent && error == EHOSTUNREACH) {
1122                                         SCTP_CHUNK_SENT(sctp, mp, sdc,
1123                                             fp, chunklen, meta);
1124                                 }
1125                                 freemsg(nmp);
1126                                 SCTP_KSTAT(sctps, sctp_output_failed);
1127                                 goto unsent_data;
1128                         }
1129                         seglen += sacklen;
1130                         xtralen += sacklen;
1131                         sacklen = 0;
1132                 } else {
1133                         fp = SCTP_CHUNK_DEST(meta);
1134                         if (fp == NULL || fp->sf_state != SCTP_FADDRS_ALIVE)
1135                                 fp = sctp->sctp_current;
1136                         /*
1137                          * If we haven't sent data to this destination for
1138                          * a while, do slow start again.
1139                          */
1140                         if (now - fp->sf_lastactive > fp->sf_rto) {
1141                                 SET_CWND(fp, fp->sf_pmss,
1142                                     sctps->sctps_slow_start_after_idle);
1143                         }
1144 
1145                         pathmax = fp->sf_cwnd - fp->sf_suna;
1146                         if (seglen + extra > pathmax) {
1147                                 head = NULL;
1148                                 goto unsent_data;
1149                         }
1150                         if ((nmp = dupmsg(mp)) == NULL) {
1151                                 head = NULL;
1152                                 goto unsent_data;
1153                         }
1154                         SCTP_CHUNK_CLEAR_FLAGS(nmp);
1155                         head = sctp_add_proto_hdr(sctp, fp, nmp, 0, &error);
1156                         if (head == NULL) {
1157                                 /*
1158                                  * If none of the source addresses are
1159                                  * available (i.e error == EHOSTUNREACH),
1160                                  * pretend we have sent the data. We will
1161                                  * eventually time out trying to retramsmit
1162                                  * the data if the interface never comes up.
1163                                  * If we have already sent some stuff (i.e.,
1164                                  * notsent is B_FALSE) then we are fine, else
1165                                  * just mark this packet as sent.
1166                                  */
1167                                 if (notsent && error == EHOSTUNREACH) {
1168                                         SCTP_CHUNK_SENT(sctp, mp, sdc,
1169                                             fp, chunklen, meta);
1170                                 }
1171                                 freemsg(nmp);
1172                                 SCTP_KSTAT(sctps, sctp_output_failed);
1173                                 goto unsent_data;
1174                         }
1175                 }
1176                 fp->sf_lastactive = now;
1177                 if (pathmax > fp->sf_pmss)
1178                         pathmax = fp->sf_pmss;
1179                 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1180                 mp = mp->b_next;
1181 
1182                 /*
1183                  * Use this chunk to measure RTT?
1184                  * Must not be a retransmision of an earlier chunk,
1185                  * ensure the tsn is current.
1186                  */
1187                 tsn = ntohl(sdc->sdh_tsn);
1188                 if (sctp->sctp_out_time == 0 && tsn == (sctp->sctp_ltsn - 1)) {
1189                         sctp->sctp_out_time = now;
1190                         sctp->sctp_rtt_tsn = tsn;
1191                 }
1192                 if (extra > 0) {
1193                         fill = sctp_get_padding(sctp, extra);
1194                         if (fill != NULL) {
1195                                 linkb(head, fill);
1196                                 pad = extra;
1197                                 seglen += extra;
1198                         } else {
1199                                 goto unsent_data;
1200                         }
1201                 }
1202                 /*
1203                  * Bundle chunks. We linkb() the chunks together to send
1204                  * downstream in a single packet.
1205                  * Partial chunks MUST NOT be bundled with full chunks, so we
1206                  * rely on sctp_get_msg_to_send() to only return messages that
1207                  * will fit entirely in the current packet.
1208                  */
1209                 while (seglen < pathmax) {
1210                         int32_t         new_len;
1211                         int32_t         new_xtralen;
1212 
1213                         while (mp != NULL) {
1214                                 if (SCTP_CHUNK_CANSEND(mp))
1215                                         break;
1216                                 mp = mp->b_next;
1217                         }
1218                         if (mp == NULL) {
1219                                 meta = sctp_get_msg_to_send(sctp, &mp,
1220                                     meta->b_next, &error, seglen,
1221                                     (seglen - xtralen) >= cansend ? 0 :
1222                                     cansend - seglen, fp);
1223                                 if (error != 0)
1224                                         break;
1225                                 /* If no more eligible chunks, cease bundling */
1226                                 if (meta == NULL)
1227                                         break;
1228                                 sctp->sctp_xmit_tail =  meta;
1229                         }
1230                         ASSERT(mp != NULL);
1231                         if (!SCTP_CHUNK_ISSENT(mp) && SCTP_CHUNK_DEST(meta) &&
1232                             fp != SCTP_CHUNK_DEST(meta)) {
1233                                 break;
1234                         }
1235                         sdc = (sctp_data_hdr_t *)mp->b_rptr;
1236                         chunklen = ntohs(sdc->sdh_len);
1237                         if ((extra = chunklen  & (SCTP_ALIGN - 1)) != 0)
1238                                 extra = SCTP_ALIGN - extra;
1239 
1240                         new_len = seglen + chunklen;
1241                         new_xtralen = xtralen + sizeof (*sdc);
1242                         chunklen -= sizeof (*sdc);
1243 
1244                         if (new_len - new_xtralen > cansend ||
1245                             new_len + extra > pathmax) {
1246                                 break;
1247                         }
1248                         if ((nmp = dupmsg(mp)) == NULL)
1249                                 break;
1250                         if (extra > 0) {
1251                                 fill = sctp_get_padding(sctp, extra);
1252                                 if (fill != NULL) {
1253                                         pad += extra;
1254                                         new_len += extra;
1255                                         linkb(nmp, fill);
1256                                 } else {
1257                                         freemsg(nmp);
1258                                         break;
1259                                 }
1260                         }
1261                         seglen = new_len;
1262                         xtralen = new_xtralen;
1263                         SCTP_CHUNK_CLEAR_FLAGS(nmp);
1264                         SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1265                         linkb(head, nmp);
1266                         mp = mp->b_next;
1267                 }
1268                 if ((seglen > fp->sf_pmss) && fp->sf_isv4) {
1269                         ipha_t *iph = (ipha_t *)head->b_rptr;
1270 
1271                         /*
1272                          * Path MTU is different from what we thought it would
1273                          * be when we created chunks, or IP headers have grown.
1274                          * Need to clear the DF bit.
1275                          */
1276                         iph->ipha_fragment_offset_and_flags = 0;
1277                 }
1278                 /* xmit segment */
1279                 ASSERT(cansend >= seglen - pad - xtralen);
1280                 cansend -= (seglen - pad - xtralen);
1281                 dprint(2, ("sctp_output: Sending packet %d bytes, tsn %x "
1282                     "ssn %d to %p (rwnd %d, cansend %d, lastack_rxd %x)\n",
1283                     seglen - xtralen, ntohl(sdc->sdh_tsn),
1284                     ntohs(sdc->sdh_ssn), (void *)fp, sctp->sctp_frwnd,
1285                     cansend, sctp->sctp_lastack_rxd));
1286                 sctp_set_iplen(sctp, head, fp->sf_ixa);
1287 
1288                 DTRACE_SCTP5(send, mblk_t *, NULL, ip_xmit_attr_t *, fp->sf_ixa,
1289                     void_ip_t *, head->b_rptr, sctp_t *, sctp, sctp_hdr_t *,
1290                     &head->b_rptr[fp->sf_ixa->ixa_ip_hdr_length]);
1291 
1292                 (void) conn_ip_output(head, fp->sf_ixa);
1293                 BUMP_LOCAL(sctp->sctp_opkts);
1294                 /* arm rto timer (if not set) */
1295                 if (!fp->sf_timer_running)
1296                         SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
1297                 notsent = B_FALSE;
1298         }
1299         sctp->sctp_active = now;
1300         return;
1301 unsent_data:
1302         /* arm persist timer (if rto timer not set) */
1303         if (!fp->sf_timer_running)
1304                 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
1305         if (head != NULL)
1306                 freemsg(head);
1307 }
1308 
1309 /*
1310  * The following two functions initialize and destroy the cache
1311  * associated with the sets used for PR-SCTP.
1312  */
1313 void
1314 sctp_ftsn_sets_init(void)
1315 {
1316         sctp_kmem_ftsn_set_cache = kmem_cache_create("sctp_ftsn_set_cache",
1317             sizeof (sctp_ftsn_set_t), 0, NULL, NULL, NULL, NULL,
1318             NULL, 0);
1319 }
1320 
1321 void
1322 sctp_ftsn_sets_fini(void)
1323 {
1324         kmem_cache_destroy(sctp_kmem_ftsn_set_cache);
1325 }
1326 
1327 
1328 /* Free PR-SCTP sets */
1329 void
1330 sctp_free_ftsn_set(sctp_ftsn_set_t *s)
1331 {
1332         sctp_ftsn_set_t *p;
1333 
1334         while (s != NULL) {
1335                 p = s->next;
1336                 s->next = NULL;
1337                 kmem_cache_free(sctp_kmem_ftsn_set_cache, s);
1338                 s = p;
1339         }
1340 }
1341 
1342 /*
1343  * Given a message meta block, meta, this routine creates or modifies
1344  * the set that will be used to generate a Forward TSN chunk. If the
1345  * entry for stream id, sid, for this message already exists, the
1346  * sequence number, ssn, is updated if it is greater than the existing
1347  * one. If an entry for this sid does not exist, one is created if
1348  * the size does not exceed fp->sf_pmss. We return false in case
1349  * or an error.
1350  */
1351 boolean_t
1352 sctp_add_ftsn_set(sctp_ftsn_set_t **s, sctp_faddr_t *fp, mblk_t *meta,
1353     uint_t *nsets, uint32_t *slen)
1354 {
1355         sctp_ftsn_set_t         *p;
1356         sctp_msg_hdr_t          *msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1357         uint16_t                sid = htons(msg_hdr->smh_sid);
1358         /* msg_hdr->smh_ssn is already in NBO */
1359         uint16_t                ssn = msg_hdr->smh_ssn;
1360 
1361         ASSERT(s != NULL && nsets != NULL);
1362         ASSERT((*nsets == 0 && *s == NULL) || (*nsets > 0 && *s != NULL));
1363 
1364         if (*s == NULL) {
1365                 ASSERT((*slen + sizeof (uint32_t)) <= fp->sf_pmss);
1366                 *s = kmem_cache_alloc(sctp_kmem_ftsn_set_cache, KM_NOSLEEP);
1367                 if (*s == NULL)
1368                         return (B_FALSE);
1369                 (*s)->ftsn_entries.ftsn_sid = sid;
1370                 (*s)->ftsn_entries.ftsn_ssn = ssn;
1371                 (*s)->next = NULL;
1372                 *nsets = 1;
1373                 *slen += sizeof (uint32_t);
1374                 return (B_TRUE);
1375         }
1376         for (p = *s; p->next != NULL; p = p->next) {
1377                 if (p->ftsn_entries.ftsn_sid == sid) {
1378                         if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1379                                 p->ftsn_entries.ftsn_ssn = ssn;
1380                         return (B_TRUE);
1381                 }
1382         }
1383         /* the last one */
1384         if (p->ftsn_entries.ftsn_sid == sid) {
1385                 if (SSN_GT(ssn, p->ftsn_entries.ftsn_ssn))
1386                         p->ftsn_entries.ftsn_ssn = ssn;
1387         } else {
1388                 if ((*slen + sizeof (uint32_t)) > fp->sf_pmss)
1389                         return (B_FALSE);
1390                 p->next = kmem_cache_alloc(sctp_kmem_ftsn_set_cache,
1391                     KM_NOSLEEP);
1392                 if (p->next == NULL)
1393                         return (B_FALSE);
1394                 p = p->next;
1395                 p->ftsn_entries.ftsn_sid = sid;
1396                 p->ftsn_entries.ftsn_ssn = ssn;
1397                 p->next = NULL;
1398                 (*nsets)++;
1399                 *slen += sizeof (uint32_t);
1400         }
1401         return (B_TRUE);
1402 }
1403 
1404 /*
1405  * Given a set of stream id - sequence number pairs, this routing creates
1406  * a Forward TSN chunk. The cumulative TSN (advanced peer ack point)
1407  * for the chunk is obtained from sctp->sctp_adv_pap. The caller
1408  * will add the IP/SCTP header.
1409  */
1410 mblk_t *
1411 sctp_make_ftsn_chunk(sctp_t *sctp, sctp_faddr_t *fp, sctp_ftsn_set_t *sets,
1412     uint_t nsets, uint32_t seglen)
1413 {
1414         mblk_t                  *ftsn_mp;
1415         sctp_chunk_hdr_t        *ch_hdr;
1416         uint32_t                *advtsn;
1417         uint16_t                schlen;
1418         size_t                  xtralen;
1419         ftsn_entry_t            *ftsn_entry;
1420         sctp_stack_t    *sctps = sctp->sctp_sctps;
1421 
1422         seglen += sizeof (sctp_chunk_hdr_t);
1423         if (fp->sf_isv4)
1424                 xtralen = sctp->sctp_hdr_len + sctps->sctps_wroff_xtra;
1425         else
1426                 xtralen = sctp->sctp_hdr6_len + sctps->sctps_wroff_xtra;
1427         ftsn_mp = allocb(xtralen + seglen, BPRI_MED);
1428         if (ftsn_mp == NULL)
1429                 return (NULL);
1430         ftsn_mp->b_rptr += xtralen;
1431         ftsn_mp->b_wptr = ftsn_mp->b_rptr + seglen;
1432 
1433         ch_hdr = (sctp_chunk_hdr_t *)ftsn_mp->b_rptr;
1434         ch_hdr->sch_id = CHUNK_FORWARD_TSN;
1435         ch_hdr->sch_flags = 0;
1436         /*
1437          * The cast here should not be an issue since seglen is
1438          * the length of the Forward TSN chunk.
1439          */
1440         schlen = (uint16_t)seglen;
1441         U16_TO_ABE16(schlen, &(ch_hdr->sch_len));
1442 
1443         advtsn = (uint32_t *)(ch_hdr + 1);
1444         U32_TO_ABE32(sctp->sctp_adv_pap, advtsn);
1445         ftsn_entry = (ftsn_entry_t *)(advtsn + 1);
1446         while (nsets > 0) {
1447                 ASSERT((uchar_t *)&ftsn_entry[1] <= ftsn_mp->b_wptr);
1448                 ftsn_entry->ftsn_sid = sets->ftsn_entries.ftsn_sid;
1449                 ftsn_entry->ftsn_ssn = sets->ftsn_entries.ftsn_ssn;
1450                 ftsn_entry++;
1451                 sets = sets->next;
1452                 nsets--;
1453         }
1454         return (ftsn_mp);
1455 }
1456 
1457 /*
1458  * Given a starting message, the routine steps through all the
1459  * messages whose TSN is less than sctp->sctp_adv_pap and creates
1460  * ftsn sets. The ftsn sets is then used to create an Forward TSN
1461  * chunk. All the messages, that have chunks that are included in the
1462  * ftsn sets, are flagged abandonded. If a message is partially sent
1463  * and is deemed abandoned, all remaining unsent chunks are marked
1464  * abandoned and are deducted from sctp_unsent.
1465  */
1466 void
1467 sctp_make_ftsns(sctp_t *sctp, mblk_t *meta, mblk_t *mp, mblk_t **nmp,
1468     sctp_faddr_t *fp, uint32_t *seglen)
1469 {
1470         mblk_t          *mp1 = mp;
1471         mblk_t          *mp_head = mp;
1472         mblk_t          *meta_head = meta;
1473         mblk_t          *head;
1474         sctp_ftsn_set_t *sets = NULL;
1475         uint_t          nsets = 0;
1476         uint16_t        clen;
1477         sctp_data_hdr_t *sdc;
1478         uint32_t        sacklen;
1479         uint32_t        adv_pap = sctp->sctp_adv_pap;
1480         uint32_t        unsent = 0;
1481         boolean_t       ubit;
1482         sctp_stack_t    *sctps = sctp->sctp_sctps;
1483 
1484         *seglen = sizeof (uint32_t);
1485 
1486         sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1487         while (meta != NULL &&
1488             SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1489                 /*
1490                  * Skip adding FTSN sets for un-ordered messages as they do
1491                  * not have SSNs.
1492                  */
1493                 ubit = SCTP_DATA_GET_UBIT(sdc);
1494                 if (!ubit &&
1495                     !sctp_add_ftsn_set(&sets, fp, meta, &nsets, seglen)) {
1496                         meta = NULL;
1497                         sctp->sctp_adv_pap = adv_pap;
1498                         goto ftsn_done;
1499                 }
1500                 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1501                         sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1502                         adv_pap = ntohl(sdc->sdh_tsn);
1503                         mp1 = mp1->b_next;
1504                 }
1505                 meta = meta->b_next;
1506                 if (meta != NULL) {
1507                         mp1 = meta->b_cont;
1508                         if (!SCTP_CHUNK_ISSENT(mp1))
1509                                 break;
1510                         sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1511                 }
1512         }
1513 ftsn_done:
1514         /*
1515          * Can't compare with sets == NULL, since we don't add any
1516          * sets for un-ordered messages.
1517          */
1518         if (meta == meta_head)
1519                 return;
1520         *nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, *seglen);
1521         sctp_free_ftsn_set(sets);
1522         if (*nmp == NULL)
1523                 return;
1524         if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1525                 sacklen = 0;
1526         } else {
1527                 sacklen = sizeof (sctp_chunk_hdr_t) +
1528                     sizeof (sctp_sack_chunk_t) +
1529                     (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1530                 if (*seglen + sacklen > sctp->sctp_lastdata->sf_pmss) {
1531                         /* piggybacked SACK doesn't fit */
1532                         sacklen = 0;
1533                 } else {
1534                         fp = sctp->sctp_lastdata;
1535                 }
1536         }
1537         head = sctp_add_proto_hdr(sctp, fp, *nmp, sacklen, NULL);
1538         if (head == NULL) {
1539                 freemsg(*nmp);
1540                 *nmp = NULL;
1541                 SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
1542                 return;
1543         }
1544         *seglen += sacklen;
1545         *nmp = head;
1546 
1547         /*
1548          * XXXNeed to optimise this, the reason it is done here is so
1549          * that we don't have to undo in case of failure.
1550          */
1551         mp1 = mp_head;
1552         sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1553         while (meta_head != NULL &&
1554             SEQ_GEQ(sctp->sctp_adv_pap, ntohl(sdc->sdh_tsn))) {
1555                 if (!SCTP_IS_MSG_ABANDONED(meta_head))
1556                         SCTP_MSG_SET_ABANDONED(meta_head);
1557                 while (mp1 != NULL && SCTP_CHUNK_ISSENT(mp1)) {
1558                         sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1559                         if (!SCTP_CHUNK_ISACKED(mp1)) {
1560                                 clen = ntohs(sdc->sdh_len) - sizeof (*sdc);
1561                                 SCTP_CHUNK_SENT(sctp, mp1, sdc, fp, clen,
1562                                     meta_head);
1563                         }
1564                         mp1 = mp1->b_next;
1565                 }
1566                 while (mp1 != NULL) {
1567                         sdc = (sctp_data_hdr_t *)mp1->b_rptr;
1568                         if (!SCTP_CHUNK_ABANDONED(mp1)) {
1569                                 ASSERT(!SCTP_CHUNK_ISSENT(mp1));
1570                                 unsent += ntohs(sdc->sdh_len) - sizeof (*sdc);
1571                                 SCTP_ABANDON_CHUNK(mp1);
1572                         }
1573                         mp1 = mp1->b_next;
1574                 }
1575                 meta_head = meta_head->b_next;
1576                 if (meta_head != NULL) {
1577                         mp1 = meta_head->b_cont;
1578                         if (!SCTP_CHUNK_ISSENT(mp1))
1579                                 break;
1580                         sdc  = (sctp_data_hdr_t *)mp1->b_rptr;
1581                 }
1582         }
1583         if (unsent > 0) {
1584                 ASSERT(sctp->sctp_unsent >= unsent);
1585                 sctp->sctp_unsent -= unsent;
1586                 /*
1587                  * Update ULP the amount of queued data, which is
1588                  * sent-unack'ed + unsent.
1589                  */
1590                 if (!SCTP_IS_DETACHED(sctp))
1591                         SCTP_TXQ_UPDATE(sctp);
1592         }
1593 }
1594 
1595 /*
1596  * This function steps through messages starting at meta and checks if
1597  * the message is abandoned. It stops when it hits an unsent chunk or
1598  * a message that has all its chunk acked. This is the only place
1599  * where the sctp_adv_pap is moved forward to indicated abandoned
1600  * messages.
1601  */
1602 void
1603 sctp_check_adv_ack_pt(sctp_t *sctp, mblk_t *meta, mblk_t *mp)
1604 {
1605         uint32_t        tsn = sctp->sctp_adv_pap;
1606         sctp_data_hdr_t *sdc;
1607         sctp_msg_hdr_t  *msg_hdr;
1608 
1609         ASSERT(mp != NULL);
1610         sdc = (sctp_data_hdr_t *)mp->b_rptr;
1611         ASSERT(SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_lastack_rxd));
1612         msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1613         if (!SCTP_IS_MSG_ABANDONED(meta) &&
1614             !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1615                 return;
1616         }
1617         while (meta != NULL) {
1618                 while (mp != NULL && SCTP_CHUNK_ISSENT(mp)) {
1619                         sdc = (sctp_data_hdr_t *)mp->b_rptr;
1620                         tsn = ntohl(sdc->sdh_tsn);
1621                         mp = mp->b_next;
1622                 }
1623                 if (mp != NULL)
1624                         break;
1625                 /*
1626                  * We continue checking for successive messages only if there
1627                  * is a chunk marked for retransmission. Else, we might
1628                  * end up sending FTSN prematurely for chunks that have been
1629                  * sent, but not yet acked.
1630                  */
1631                 if ((meta = meta->b_next) != NULL) {
1632                         msg_hdr = (sctp_msg_hdr_t *)meta->b_rptr;
1633                         if (!SCTP_IS_MSG_ABANDONED(meta) &&
1634                             !SCTP_MSG_TO_BE_ABANDONED(meta, msg_hdr, sctp)) {
1635                                 break;
1636                         }
1637                         for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1638                                 if (!SCTP_CHUNK_ISSENT(mp)) {
1639                                         sctp->sctp_adv_pap = tsn;
1640                                         return;
1641                                 }
1642                                 if (SCTP_CHUNK_WANT_REXMIT(mp))
1643                                         break;
1644                         }
1645                         if (mp == NULL)
1646                                 break;
1647                 }
1648         }
1649         sctp->sctp_adv_pap = tsn;
1650 }
1651 
1652 
1653 /*
1654  * Determine if we should bundle a data chunk with the chunk being
1655  * retransmitted.  We bundle if
1656  *
1657  * - the chunk is sent to the same destination and unack'ed.
1658  *
1659  * OR
1660  *
1661  * - the chunk is unsent, i.e. new data.
1662  */
1663 #define SCTP_CHUNK_RX_CANBUNDLE(mp, fp)                                 \
1664         (!SCTP_CHUNK_ABANDONED((mp)) &&                                 \
1665         ((SCTP_CHUNK_ISSENT((mp)) && (SCTP_CHUNK_DEST(mp) == (fp) &&    \
1666         !SCTP_CHUNK_ISACKED(mp))) ||                                    \
1667         (((mp)->b_flag & (SCTP_CHUNK_FLAG_REXMIT|SCTP_CHUNK_FLAG_SENT)) != \
1668         SCTP_CHUNK_FLAG_SENT)))
1669 
1670 /*
1671  * Retransmit first segment which hasn't been acked with cumtsn or send
1672  * a Forward TSN chunk, if appropriate.
1673  */
1674 void
1675 sctp_rexmit(sctp_t *sctp, sctp_faddr_t *oldfp)
1676 {
1677         mblk_t          *mp;
1678         mblk_t          *nmp = NULL;
1679         mblk_t          *head;
1680         mblk_t          *meta = sctp->sctp_xmit_head;
1681         mblk_t          *fill;
1682         uint32_t        seglen = 0;
1683         uint32_t        sacklen;
1684         uint16_t        chunklen;
1685         int             extra;
1686         sctp_data_hdr_t *sdc;
1687         sctp_faddr_t    *fp;
1688         uint32_t        adv_pap = sctp->sctp_adv_pap;
1689         boolean_t       do_ftsn = B_FALSE;
1690         boolean_t       ftsn_check = B_TRUE;
1691         uint32_t        first_ua_tsn;
1692         sctp_msg_hdr_t  *mhdr;
1693         sctp_stack_t    *sctps = sctp->sctp_sctps;
1694         int             error;
1695 
1696         while (meta != NULL) {
1697                 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
1698                         uint32_t        tsn;
1699 
1700                         if (!SCTP_CHUNK_ISSENT(mp))
1701                                 goto window_probe;
1702                         /*
1703                          * We break in the following cases -
1704                          *
1705                          *      if the advanced peer ack point includes the next
1706                          *      chunk to be retransmited - possibly the Forward
1707                          *      TSN was lost.
1708                          *
1709                          *      if we are PRSCTP aware and the next chunk to be
1710                          *      retransmitted is now abandoned
1711                          *
1712                          *      if the next chunk to be retransmitted is for
1713                          *      the dest on which the timer went off. (this
1714                          *      message is not abandoned).
1715                          *
1716                          * We check for Forward TSN only for the first
1717                          * eligible chunk to be retransmitted. The reason
1718                          * being if the first eligible chunk is skipped (say
1719                          * it was sent to a destination other than oldfp)
1720                          * then we cannot advance the cum TSN via Forward
1721                          * TSN chunk.
1722                          *
1723                          * Also, ftsn_check is B_TRUE only for the first
1724                          * eligible chunk, it  will be B_FALSE for all
1725                          * subsequent candidate messages for retransmission.
1726                          */
1727                         sdc = (sctp_data_hdr_t *)mp->b_rptr;
1728                         tsn = ntohl(sdc->sdh_tsn);
1729                         if (SEQ_GT(tsn, sctp->sctp_lastack_rxd)) {
1730                                 if (sctp->sctp_prsctp_aware && ftsn_check) {
1731                                         if (SEQ_GEQ(sctp->sctp_adv_pap, tsn)) {
1732                                                 ASSERT(sctp->sctp_prsctp_aware);
1733                                                 do_ftsn = B_TRUE;
1734                                                 goto out;
1735                                         } else {
1736                                                 sctp_check_adv_ack_pt(sctp,
1737                                                     meta, mp);
1738                                                 if (SEQ_GT(sctp->sctp_adv_pap,
1739                                                     adv_pap)) {
1740                                                         do_ftsn = B_TRUE;
1741                                                         goto out;
1742                                                 }
1743                                         }
1744                                         ftsn_check = B_FALSE;
1745                                 }
1746                                 if (SCTP_CHUNK_DEST(mp) == oldfp)
1747                                         goto out;
1748                         }
1749                 }
1750                 meta = meta->b_next;
1751                 if (meta != NULL && sctp->sctp_prsctp_aware) {
1752                         mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1753 
1754                         while (meta != NULL && (SCTP_IS_MSG_ABANDONED(meta) ||
1755                             SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp))) {
1756                                 meta = meta->b_next;
1757                         }
1758                 }
1759         }
1760 window_probe:
1761         /*
1762          * Retransmit fired for a destination which didn't have
1763          * any unacked data pending.
1764          */
1765         if (sctp->sctp_unacked == 0 && sctp->sctp_unsent != 0) {
1766                 /*
1767                  * Send a window probe. Inflate frwnd to allow
1768                  * sending one segment.
1769                  */
1770                 if (sctp->sctp_frwnd < (oldfp->sf_pmss - sizeof (*sdc)))
1771                         sctp->sctp_frwnd = oldfp->sf_pmss - sizeof (*sdc);
1772 
1773                 /* next TSN to send */
1774                 sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
1775 
1776                 /*
1777                  * The above sctp_frwnd adjustment is coarse.  The "changed"
1778                  * sctp_frwnd may allow us to send more than 1 packet.  So
1779                  * tell sctp_output() to send only 1 packet.
1780                  */
1781                 sctp_output(sctp, 1);
1782 
1783                 /* Last sent TSN */
1784                 sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
1785                 ASSERT(sctp->sctp_rxt_maxtsn >= sctp->sctp_rxt_nxttsn);
1786                 sctp->sctp_zero_win_probe = B_TRUE;
1787                 SCTPS_BUMP_MIB(sctps, sctpOutWinProbe);
1788         }
1789         return;
1790 out:
1791         /*
1792          * After a time out, assume that everything has left the network.  So
1793          * we can clear rxt_unacked for the original peer address.
1794          */
1795         oldfp->sf_rxt_unacked = 0;
1796 
1797         /*
1798          * If we were probing for zero window, don't adjust retransmission
1799          * variables, but the timer is still backed off.
1800          */
1801         if (sctp->sctp_zero_win_probe) {
1802                 mblk_t  *pkt;
1803                 uint_t  pkt_len;
1804 
1805                 /*
1806                  * Get the Zero Win Probe for retrasmission, sctp_rxt_nxttsn
1807                  * and sctp_rxt_maxtsn will specify the ZWP packet.
1808                  */
1809                 fp = oldfp;
1810                 if (oldfp->sf_state != SCTP_FADDRS_ALIVE)
1811                         fp = sctp_rotate_faddr(sctp, oldfp);
1812                 pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
1813                 if (pkt != NULL) {
1814                         ASSERT(pkt_len <= fp->sf_pmss);
1815                         sctp_set_iplen(sctp, pkt, fp->sf_ixa);
1816 
1817                         DTRACE_SCTP5(send, mblk_t *, NULL,
1818                             ip_xmit_attr_t *, fp->sf_ixa,
1819                             void_ip_t *, mp->b_rptr, sctp_t *, sctp, sctp_hdr_t *,
1820                             &mp->b_rptr[fp->sf_ixa->ixa_ip_hdr_length]);
1821 
1822                         (void) conn_ip_output(pkt, fp->sf_ixa);
1823                         BUMP_LOCAL(sctp->sctp_opkts);
1824                 } else {
1825                         SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
1826                 }
1827 
1828                 /*
1829                  * The strikes will be clear by sctp_faddr_alive() when the
1830                  * other side sends us an ack.
1831                  */
1832                 oldfp->sf_strikes++;
1833                 sctp->sctp_strikes++;
1834 
1835                 SCTP_CALC_RXT(sctp, oldfp, sctp->sctp_rto_max);
1836                 if (oldfp != fp && oldfp->sf_suna != 0)
1837                         SCTP_FADDR_TIMER_RESTART(sctp, oldfp, fp->sf_rto);
1838                 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
1839                 SCTPS_BUMP_MIB(sctps, sctpOutWinProbe);
1840                 return;
1841         }
1842 
1843         /*
1844          * Enter slowstart for this destination
1845          */
1846         oldfp->sf_ssthresh = oldfp->sf_cwnd / 2;
1847         if (oldfp->sf_ssthresh < 2 * oldfp->sf_pmss)
1848                 oldfp->sf_ssthresh = 2 * oldfp->sf_pmss;
1849         oldfp->sf_cwnd = oldfp->sf_pmss;
1850         oldfp->sf_pba = 0;
1851         fp = sctp_rotate_faddr(sctp, oldfp);
1852         ASSERT(fp != NULL);
1853         sdc = (sctp_data_hdr_t *)mp->b_rptr;
1854 
1855         first_ua_tsn = ntohl(sdc->sdh_tsn);
1856         if (do_ftsn) {
1857                 sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
1858                 if (nmp == NULL) {
1859                         sctp->sctp_adv_pap = adv_pap;
1860                         goto restart_timer;
1861                 }
1862                 head = nmp;
1863                 /*
1864                  * Move to the next unabandoned chunk. XXXCheck if meta will
1865                  * always be marked abandoned.
1866                  */
1867                 while (meta != NULL && SCTP_IS_MSG_ABANDONED(meta))
1868                         meta = meta->b_next;
1869                 if (meta != NULL)
1870                         mp = mp->b_cont;
1871                 else
1872                         mp = NULL;
1873                 goto try_bundle;
1874         }
1875         seglen = ntohs(sdc->sdh_len);
1876         chunklen = seglen - sizeof (*sdc);
1877         if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
1878                 extra = SCTP_ALIGN - extra;
1879 
1880         /* Find out if we need to piggyback SACK. */
1881         if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1882                 sacklen = 0;
1883         } else {
1884                 sacklen = sizeof (sctp_chunk_hdr_t) +
1885                     sizeof (sctp_sack_chunk_t) +
1886                     (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1887                 if (seglen + sacklen > sctp->sctp_lastdata->sf_pmss) {
1888                         /* piggybacked SACK doesn't fit */
1889                         sacklen = 0;
1890                 } else {
1891                         /*
1892                          * OK, we have room to send SACK back.  But we
1893                          * should send it back to the last fp where we
1894                          * receive data from, unless sctp_lastdata equals
1895                          * oldfp, then we should probably not send it
1896                          * back to that fp.  Also we should check that
1897                          * the fp is alive.
1898                          */
1899                         if (sctp->sctp_lastdata != oldfp &&
1900                             sctp->sctp_lastdata->sf_state ==
1901                             SCTP_FADDRS_ALIVE) {
1902                                 fp = sctp->sctp_lastdata;
1903                         }
1904                 }
1905         }
1906 
1907         /*
1908          * Cancel RTT measurement if the retransmitted TSN is before the
1909          * TSN used for timimg.
1910          */
1911         if (sctp->sctp_out_time != 0 &&
1912             SEQ_GEQ(sctp->sctp_rtt_tsn, sdc->sdh_tsn)) {
1913                 sctp->sctp_out_time = 0;
1914         }
1915         /* Clear the counter as the RTT calculation may be off. */
1916         fp->sf_rtt_updates = 0;
1917         oldfp->sf_rtt_updates = 0;
1918 
1919         /*
1920          * After a timeout, we should change the current faddr so that
1921          * new chunks will be sent to the alternate address.
1922          */
1923         sctp_set_faddr_current(sctp, fp);
1924 
1925         nmp = dupmsg(mp);
1926         if (nmp == NULL)
1927                 goto restart_timer;
1928         if (extra > 0) {
1929                 fill = sctp_get_padding(sctp, extra);
1930                 if (fill != NULL) {
1931                         linkb(nmp, fill);
1932                         seglen += extra;
1933                 } else {
1934                         freemsg(nmp);
1935                         goto restart_timer;
1936                 }
1937         }
1938         SCTP_CHUNK_CLEAR_FLAGS(nmp);
1939         head = sctp_add_proto_hdr(sctp, fp, nmp, sacklen, NULL);
1940         if (head == NULL) {
1941                 freemsg(nmp);
1942                 SCTP_KSTAT(sctps, sctp_rexmit_failed);
1943                 goto restart_timer;
1944         }
1945         seglen += sacklen;
1946 
1947         SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
1948 
1949         mp = mp->b_next;
1950 
1951 try_bundle:
1952         /* We can at least and at most send 1 packet at timeout. */
1953         while (seglen < fp->sf_pmss) {
1954                 int32_t new_len;
1955 
1956                 /* Go through the list to find more chunks to be bundled. */
1957                 while (mp != NULL) {
1958                         /* Check if the chunk can be bundled. */
1959                         if (SCTP_CHUNK_RX_CANBUNDLE(mp, oldfp))
1960                                 break;
1961                         mp = mp->b_next;
1962                 }
1963                 /* Go to the next message. */
1964                 if (mp == NULL) {
1965                         for (meta = meta->b_next; meta != NULL;
1966                             meta = meta->b_next) {
1967                                 mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
1968 
1969                                 if (SCTP_IS_MSG_ABANDONED(meta) ||
1970                                     SCTP_MSG_TO_BE_ABANDONED(meta, mhdr,
1971                                     sctp)) {
1972                                         continue;
1973                                 }
1974 
1975                                 mp = meta->b_cont;
1976                                 goto try_bundle;
1977                         }
1978                         /*
1979                          * Check if there is a new message which potentially
1980                          * could be bundled with this retransmission.
1981                          */
1982                         meta = sctp_get_msg_to_send(sctp, &mp, NULL, &error,
1983                             seglen, fp->sf_pmss - seglen, NULL);
1984                         if (error != 0 || meta == NULL) {
1985                                 /* No more chunk to be bundled. */
1986                                 break;
1987                         } else {
1988                                 goto try_bundle;
1989                         }
1990                 }
1991 
1992                 sdc = (sctp_data_hdr_t *)mp->b_rptr;
1993                 new_len = ntohs(sdc->sdh_len);
1994                 chunklen = new_len - sizeof (*sdc);
1995 
1996                 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
1997                         extra = SCTP_ALIGN - extra;
1998                 if ((new_len = seglen + new_len + extra) > fp->sf_pmss)
1999                         break;
2000                 if ((nmp = dupmsg(mp)) == NULL)
2001                         break;
2002 
2003                 if (extra > 0) {
2004                         fill = sctp_get_padding(sctp, extra);
2005                         if (fill != NULL) {
2006                                 linkb(nmp, fill);
2007                         } else {
2008                                 freemsg(nmp);
2009                                 break;
2010                         }
2011                 }
2012                 linkb(head, nmp);
2013 
2014                 SCTP_CHUNK_CLEAR_FLAGS(nmp);
2015                 SCTP_CHUNK_SENT(sctp, mp, sdc, fp, chunklen, meta);
2016 
2017                 seglen = new_len;
2018                 mp = mp->b_next;
2019         }
2020 done_bundle:
2021         if ((seglen > fp->sf_pmss) && fp->sf_isv4) {
2022                 ipha_t *iph = (ipha_t *)head->b_rptr;
2023 
2024                 /*
2025                  * Path MTU is different from path we thought it would
2026                  * be when we created chunks, or IP headers have grown.
2027                  * Need to clear the DF bit.
2028                  */
2029                 iph->ipha_fragment_offset_and_flags = 0;
2030         }
2031         fp->sf_rxt_unacked += seglen;
2032 
2033         dprint(2, ("sctp_rexmit: Sending packet %d bytes, tsn %x "
2034             "ssn %d to %p (rwnd %d, lastack_rxd %x)\n",
2035             seglen, ntohl(sdc->sdh_tsn), ntohs(sdc->sdh_ssn),
2036             (void *)fp, sctp->sctp_frwnd, sctp->sctp_lastack_rxd));
2037 
2038         sctp->sctp_rexmitting = B_TRUE;
2039         sctp->sctp_rxt_nxttsn = first_ua_tsn;
2040         sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn - 1;
2041         sctp_set_iplen(sctp, head, fp->sf_ixa);
2042 
2043         DTRACE_SCTP5(send, mblk_t *, NULL, ip_xmit_attr_t *, fp->sf_ixa,
2044             void_ip_t *, mp->b_rptr, sctp_t *, sctp, sctp_hdr_t *,
2045             &mp->b_rptr[fp->sf_ixa->ixa_ip_hdr_length]);
2046 
2047         (void) conn_ip_output(head, fp->sf_ixa);
2048         BUMP_LOCAL(sctp->sctp_opkts);
2049 
2050         /*
2051          * Restart the oldfp timer with exponential backoff and
2052          * the new fp timer for the retransmitted chunks.
2053          */
2054 restart_timer:
2055         oldfp->sf_strikes++;
2056         sctp->sctp_strikes++;
2057         SCTP_CALC_RXT(sctp, oldfp, sctp->sctp_rto_max);
2058         /*
2059          * If there is still some data in the oldfp, restart the
2060          * retransmission timer.  If there is no data, the heartbeat will
2061          * continue to run so it will do its job in checking the reachability
2062          * of the oldfp.
2063          */
2064         if (oldfp != fp && oldfp->sf_suna != 0)
2065                 SCTP_FADDR_TIMER_RESTART(sctp, oldfp, oldfp->sf_rto);
2066 
2067         /*
2068          * Should we restart the timer of the new fp?  If there is
2069          * outstanding data to the new fp, the timer should be
2070          * running already.  So restarting it means that the timer
2071          * will fire later for those outstanding data.  But if
2072          * we don't restart it, the timer will fire too early for the
2073          * just retransmitted chunks to the new fp.  The reason is that we
2074          * don't keep a timestamp on when a chunk is retransmitted.
2075          * So when the timer fires, it will just search for the
2076          * chunk with the earliest TSN sent to new fp.  This probably
2077          * is the chunk we just retransmitted.  So for now, let's
2078          * be conservative and restart the timer of the new fp.
2079          */
2080         SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2081 
2082         sctp->sctp_active = ddi_get_lbolt64();
2083 }
2084 
2085 /*
2086  * This function is called by sctp_ss_rexmit() to create a packet
2087  * to be retransmitted to the given fp.  The given meta and mp
2088  * parameters are respectively the sctp_msg_hdr_t and the mblk of the
2089  * first chunk to be retransmitted.  This is also called when we want
2090  * to retransmit a zero window probe from sctp_rexmit() or when we
2091  * want to retransmit the zero window probe after the window has
2092  * opened from sctp_got_sack().
2093  */
2094 mblk_t *
2095 sctp_rexmit_packet(sctp_t *sctp, mblk_t **meta, mblk_t **mp, sctp_faddr_t *fp,
2096     uint_t *packet_len)
2097 {
2098         uint32_t        seglen = 0;
2099         uint16_t        chunklen;
2100         int             extra;
2101         mblk_t          *nmp;
2102         mblk_t          *head;
2103         mblk_t          *fill;
2104         sctp_data_hdr_t *sdc;
2105         sctp_msg_hdr_t  *mhdr;
2106 
2107         sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2108         seglen = ntohs(sdc->sdh_len);
2109         chunklen = seglen - sizeof (*sdc);
2110         if ((extra = seglen & (SCTP_ALIGN - 1)) != 0)
2111                 extra = SCTP_ALIGN - extra;
2112 
2113         nmp = dupmsg(*mp);
2114         if (nmp == NULL)
2115                 return (NULL);
2116         if (extra > 0) {
2117                 fill = sctp_get_padding(sctp, extra);
2118                 if (fill != NULL) {
2119                         linkb(nmp, fill);
2120                         seglen += extra;
2121                 } else {
2122                         freemsg(nmp);
2123                         return (NULL);
2124                 }
2125         }
2126         SCTP_CHUNK_CLEAR_FLAGS(nmp);
2127         head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
2128         if (head == NULL) {
2129                 freemsg(nmp);
2130                 return (NULL);
2131         }
2132         SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2133         /*
2134          * Don't update the TSN if we are doing a Zero Win Probe.
2135          */
2136         if (!sctp->sctp_zero_win_probe)
2137                 sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2138         *mp = (*mp)->b_next;
2139 
2140 try_bundle:
2141         while (seglen < fp->sf_pmss) {
2142                 int32_t new_len;
2143 
2144                 /*
2145                  * Go through the list to find more chunks to be bundled.
2146                  * We should only retransmit sent by unack'ed chunks.  Since
2147                  * they were sent before, the peer's receive window should
2148                  * be able to receive them.
2149                  */
2150                 while (*mp != NULL) {
2151                         /* Check if the chunk can be bundled. */
2152                         if (SCTP_CHUNK_ISSENT(*mp) && !SCTP_CHUNK_ISACKED(*mp))
2153                                 break;
2154                         *mp = (*mp)->b_next;
2155                 }
2156                 /* Go to the next message. */
2157                 if (*mp == NULL) {
2158                         for (*meta = (*meta)->b_next; *meta != NULL;
2159                             *meta = (*meta)->b_next) {
2160                                 mhdr = (sctp_msg_hdr_t *)(*meta)->b_rptr;
2161 
2162                                 if (SCTP_IS_MSG_ABANDONED(*meta) ||
2163                                     SCTP_MSG_TO_BE_ABANDONED(*meta, mhdr,
2164                                     sctp)) {
2165                                         continue;
2166                                 }
2167 
2168                                 *mp = (*meta)->b_cont;
2169                                 goto try_bundle;
2170                         }
2171                         /* No more chunk to be bundled. */
2172                         break;
2173                 }
2174 
2175                 sdc = (sctp_data_hdr_t *)(*mp)->b_rptr;
2176                 /* Don't bundle chunks beyond sctp_rxt_maxtsn. */
2177                 if (SEQ_GT(ntohl(sdc->sdh_tsn), sctp->sctp_rxt_maxtsn))
2178                         break;
2179                 new_len = ntohs(sdc->sdh_len);
2180                 chunklen = new_len - sizeof (*sdc);
2181 
2182                 if ((extra = new_len & (SCTP_ALIGN - 1)) != 0)
2183                         extra = SCTP_ALIGN - extra;
2184                 if ((new_len = seglen + new_len + extra) > fp->sf_pmss)
2185                         break;
2186                 if ((nmp = dupmsg(*mp)) == NULL)
2187                         break;
2188 
2189                 if (extra > 0) {
2190                         fill = sctp_get_padding(sctp, extra);
2191                         if (fill != NULL) {
2192                                 linkb(nmp, fill);
2193                         } else {
2194                                 freemsg(nmp);
2195                                 break;
2196                         }
2197                 }
2198                 linkb(head, nmp);
2199 
2200                 SCTP_CHUNK_CLEAR_FLAGS(nmp);
2201                 SCTP_CHUNK_SENT(sctp, *mp, sdc, fp, chunklen, *meta);
2202                 /*
2203                  * Don't update the TSN if we are doing a Zero Win Probe.
2204                  */
2205                 if (!sctp->sctp_zero_win_probe)
2206                         sctp->sctp_rxt_nxttsn = ntohl(sdc->sdh_tsn);
2207 
2208                 seglen = new_len;
2209                 *mp = (*mp)->b_next;
2210         }
2211         *packet_len = seglen;
2212         fp->sf_rxt_unacked += seglen;
2213         return (head);
2214 }
2215 
2216 /*
2217  * sctp_ss_rexmit() is called when we get a SACK after a timeout which
2218  * advances the cum_tsn but the cum_tsn is still less than what we have sent
2219  * (sctp_rxt_maxtsn) at the time of the timeout.  This SACK is a "partial"
2220  * SACK.  We retransmit unacked chunks without having to wait for another
2221  * timeout.  The rationale is that the SACK should not be "partial" if all the
2222  * lost chunks have been retransmitted.  Since the SACK is "partial,"
2223  * the chunks between the cum_tsn and the sctp_rxt_maxtsn should still
2224  * be missing.  It is better for us to retransmit them now instead
2225  * of waiting for a timeout.
2226  */
2227 void
2228 sctp_ss_rexmit(sctp_t *sctp)
2229 {
2230         mblk_t          *meta;
2231         mblk_t          *mp;
2232         mblk_t          *pkt;
2233         sctp_faddr_t    *fp;
2234         uint_t          pkt_len;
2235         uint32_t        tot_wnd;
2236         sctp_data_hdr_t *sdc;
2237         int             burst;
2238         sctp_stack_t    *sctps = sctp->sctp_sctps;
2239 
2240         ASSERT(!sctp->sctp_zero_win_probe);
2241 
2242         /*
2243          * If the last cum ack is smaller than what we have just
2244          * retransmitted, simply return.
2245          */
2246         if (SEQ_GEQ(sctp->sctp_lastack_rxd, sctp->sctp_rxt_nxttsn))
2247                 sctp->sctp_rxt_nxttsn = sctp->sctp_lastack_rxd + 1;
2248         else
2249                 return;
2250         ASSERT(SEQ_LEQ(sctp->sctp_rxt_nxttsn, sctp->sctp_rxt_maxtsn));
2251 
2252         /*
2253          * After a timer fires, sctp_current should be set to the new
2254          * fp where the retransmitted chunks are sent.
2255          */
2256         fp = sctp->sctp_current;
2257 
2258         /*
2259          * Since we are retransmitting, we only need to use cwnd to determine
2260          * how much we can send as we were allowed (by peer's receive window)
2261          * to send those retransmitted chunks previously when they are first
2262          * sent.  If we record how much we have retransmitted but
2263          * unacknowledged using rxt_unacked, then the amount we can now send
2264          * is equal to cwnd minus rxt_unacked.
2265          *
2266          * The field rxt_unacked is incremented when we retransmit a packet
2267          * and decremented when we got a SACK acknowledging something.  And
2268          * it is reset when the retransmission timer fires as we assume that
2269          * all packets have left the network after a timeout.  If this
2270          * assumption is not true, it means that after a timeout, we can
2271          * get a SACK acknowledging more than rxt_unacked (its value only
2272          * contains what is retransmitted when the timer fires).  So
2273          * rxt_unacked will become very big (it is an unsiged int so going
2274          * negative means that the value is huge).  This is the reason we
2275          * always send at least 1 MSS bytes.
2276          *
2277          * The reason why we do not have an accurate count is that we
2278          * only know how many packets are outstanding (using the TSN numbers).
2279          * But we do not know how many bytes those packets contain.  To
2280          * have an accurate count, we need to walk through the send list.
2281          * As it is not really important to have an accurate count during
2282          * retransmission, we skip this walk to save some time.  This should
2283          * not make the retransmission too aggressive to cause congestion.
2284          */
2285         if (fp->sf_cwnd <= fp->sf_rxt_unacked)
2286                 tot_wnd = fp->sf_pmss;
2287         else
2288                 tot_wnd = fp->sf_cwnd - fp->sf_rxt_unacked;
2289 
2290         /* Find the first unack'ed chunk */
2291         for (meta = sctp->sctp_xmit_head; meta != NULL; meta = meta->b_next) {
2292                 sctp_msg_hdr_t  *mhdr = (sctp_msg_hdr_t *)meta->b_rptr;
2293 
2294                 if (SCTP_IS_MSG_ABANDONED(meta) ||
2295                     SCTP_MSG_TO_BE_ABANDONED(meta, mhdr, sctp)) {
2296                         continue;
2297                 }
2298 
2299                 for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
2300                         /* Again, this may not be possible */
2301                         if (!SCTP_CHUNK_ISSENT(mp))
2302                                 return;
2303                         sdc = (sctp_data_hdr_t *)mp->b_rptr;
2304                         if (ntohl(sdc->sdh_tsn) == sctp->sctp_rxt_nxttsn)
2305                                 goto found_msg;
2306                 }
2307         }
2308 
2309         /* Everything is abandoned... */
2310         return;
2311 
2312 found_msg:
2313         if (!fp->sf_timer_running)
2314                 SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2315         pkt = sctp_rexmit_packet(sctp, &meta, &mp, fp, &pkt_len);
2316         if (pkt == NULL) {
2317                 SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
2318                 return;
2319         }
2320         if ((pkt_len > fp->sf_pmss) && fp->sf_isv4) {
2321                 ipha_t  *iph = (ipha_t *)pkt->b_rptr;
2322 
2323                 /*
2324                  * Path MTU is different from path we thought it would
2325                  * be when we created chunks, or IP headers have grown.
2326                  *  Need to clear the DF bit.
2327                  */
2328                 iph->ipha_fragment_offset_and_flags = 0;
2329         }
2330         sctp_set_iplen(sctp, pkt, fp->sf_ixa);
2331 
2332         DTRACE_SCTP5(send, mblk_t *, NULL, ip_xmit_attr_t *, fp->sf_ixa,
2333             void_ip_t *, mp->b_rptr, sctp_t *, sctp, sctp_hdr_t *,
2334             &mp->b_rptr[fp->sf_ixa->ixa_ip_hdr_length]);
2335 
2336         (void) conn_ip_output(pkt, fp->sf_ixa);
2337         BUMP_LOCAL(sctp->sctp_opkts);
2338 
2339         /* Check and see if there is more chunk to be retransmitted. */
2340         if (tot_wnd <= pkt_len || tot_wnd - pkt_len < fp->sf_pmss ||
2341             meta == NULL)
2342                 return;
2343         if (mp == NULL)
2344                 meta = meta->b_next;
2345         if (meta == NULL)
2346                 return;
2347 
2348         /* Retransmit another packet if the window allows. */
2349         for (tot_wnd -= pkt_len, burst = sctps->sctps_maxburst - 1;
2350             meta != NULL && burst > 0; meta = meta->b_next, burst--) {
2351                 if (mp == NULL)
2352                         mp = meta->b_cont;
2353                 for (; mp != NULL; mp = mp->b_next) {
2354                         /* Again, this may not be possible */
2355                         if (!SCTP_CHUNK_ISSENT(mp))
2356                                 return;
2357                         if (!SCTP_CHUNK_ISACKED(mp))
2358                                 goto found_msg;
2359                 }
2360         }
2361 }