Print this page
11553 Want pluggable TCP congestion control algorithms
Portions contributed by: Cody Peter Mello <cody.mello@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Robert Mustacchi <robert.mustacchi@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/inet/tcp/tcp_output.c
          +++ new/usr/src/uts/common/inet/tcp/tcp_output.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24      - * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
       24 + * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
  25   25   * Copyright 2019 Joyent, Inc.
  26   26   */
  27   27  
  28   28  /* This file contains all TCP output processing functions. */
  29   29  
  30   30  #include <sys/types.h>
  31   31  #include <sys/stream.h>
  32   32  #include <sys/strsun.h>
  33   33  #include <sys/strsubr.h>
  34   34  #include <sys/stropts.h>
↓ open down ↓ 39 lines elided ↑ open up ↑
  74   74  static void     tcp_wput_proto(void *, mblk_t *, void *, ip_recv_attr_t *);
  75   75  
  76   76  /*
  77   77   * This controls how tiny a write must be before we try to copy it
  78   78   * into the mblk on the tail of the transmit queue.  Not much
  79   79   * speedup is observed for values larger than sixteen.  Zero will
  80   80   * disable the optimisation.
  81   81   */
  82   82  static int tcp_tx_pull_len = 16;
  83   83  
       84 +static void
       85 +cc_after_idle(tcp_t *tcp)
       86 +{
       87 +        uint32_t old_cwnd = tcp->tcp_cwnd;
       88 +
       89 +        if (CC_ALGO(tcp)->after_idle != NULL)
       90 +                CC_ALGO(tcp)->after_idle(&tcp->tcp_ccv);
       91 +
       92 +        DTRACE_PROBE3(cwnd__cc__after__idle, tcp_t *, tcp, uint32_t, old_cwnd,
       93 +            uint32_t, tcp->tcp_cwnd);
       94 +}
       95 +
  84   96  int
  85   97  tcp_wput(queue_t *q, mblk_t *mp)
  86   98  {
  87   99          conn_t  *connp = Q_TO_CONN(q);
  88  100          tcp_t   *tcp;
  89  101          void (*output_proc)();
  90  102          t_scalar_t type;
  91  103          uchar_t *rptr;
  92  104          struct iocblk   *iocp;
  93  105          size_t size;
↓ open down ↓ 118 lines elided ↑ open up ↑
 212  224          uint32_t        snxt;
 213  225          int             tail_unsent;
 214  226          int             tcpstate;
 215  227          int             usable = 0;
 216  228          mblk_t          *xmit_tail;
 217  229          int32_t         mss;
 218  230          int32_t         num_sack_blk = 0;
 219  231          int32_t         total_hdr_len;
 220  232          int32_t         tcp_hdr_len;
 221  233          int             rc;
 222      -        tcp_stack_t     *tcps = tcp->tcp_tcps;
 223  234          conn_t          *connp = tcp->tcp_connp;
 224  235          clock_t         now = LBOLT_FASTPATH;
 225  236  
 226  237          tcpstate = tcp->tcp_state;
 227  238          if (mp == NULL) {
 228  239                  /*
 229  240                   * tcp_wput_data() with NULL mp should only be called when
 230  241                   * there is unsent data.
 231  242                   */
 232  243                  ASSERT(tcp->tcp_unsent > 0);
↓ open down ↓ 134 lines elided ↑ open up ↑
 367  378                  total_hdr_len = connp->conn_ht_iphc_len + opt_len;
 368  379                  tcp_hdr_len = connp->conn_ht_ulp_len + opt_len;
 369  380          } else {
 370  381                  mss = tcp->tcp_mss;
 371  382                  total_hdr_len = connp->conn_ht_iphc_len;
 372  383                  tcp_hdr_len = connp->conn_ht_ulp_len;
 373  384          }
 374  385  
 375  386          if ((tcp->tcp_suna == snxt) && !tcp->tcp_localnet &&
 376  387              (TICK_TO_MSEC(now - tcp->tcp_last_recv_time) >= tcp->tcp_rto)) {
 377      -                TCP_SET_INIT_CWND(tcp, mss, tcps->tcps_slow_start_after_idle);
      388 +                cc_after_idle(tcp);
 378  389          }
 379  390          if (tcpstate == TCPS_SYN_RCVD) {
 380  391                  /*
 381  392                   * The three-way connection establishment handshake is not
 382  393                   * complete yet. We want to queue the data for transmission
 383  394                   * after entering ESTABLISHED state (RFC793). A jump to
 384  395                   * "done" label effectively leaves data on the queue.
 385  396                   */
 386  397                  goto done;
 387  398          } else {
↓ open down ↓ 800 lines elided ↑ open up ↑
1188 1199           * ACK is expected. If so, then the congestion window size is no longer
1189 1200           * meaningfully tied to current network conditions.
1190 1201           *
1191 1202           * We reinitialize tcp_cwnd, and slow start again to get back the
1192 1203           * connection's "self-clock" as described in Van Jacobson's 1988 paper
1193 1204           * "Congestion avoidance and control".
1194 1205           */
1195 1206          now = LBOLT_FASTPATH;
1196 1207          if ((tcp->tcp_suna == snxt) && !tcp->tcp_localnet &&
1197 1208              (TICK_TO_MSEC(now - tcp->tcp_last_recv_time) >= tcp->tcp_rto)) {
1198      -                TCP_SET_INIT_CWND(tcp, mss, tcps->tcps_slow_start_after_idle);
     1209 +                cc_after_idle(tcp);
1199 1210          }
1200 1211  
1201 1212          usable = tcp->tcp_swnd;         /* tcp window size */
1202 1213          if (usable > tcp->tcp_cwnd)
1203 1214                  usable = tcp->tcp_cwnd; /* congestion window smaller */
1204 1215          usable -= snxt;         /* subtract stuff already sent */
1205 1216          suna = tcp->tcp_suna;
1206 1217          usable += suna;
1207 1218          /* usable can be < 0 if the congestion window is smaller */
1208 1219          if (len > usable) {
↓ open down ↓ 2521 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX