Print this page
3660 tcp_slow_start_* tunables should allow increasing the initial congestion window
Reviewed by: Dan McDonald <danmcd@nexenta.com>
Reviewed by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed by: Brendan Gregg <brendan.gregg@joyent.com>


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, Joyent Inc. All rights reserved.

  24  */
  25 
  26 #ifndef _INET_TCP_IMPL_H
  27 #define _INET_TCP_IMPL_H
  28 
  29 /*
  30  * TCP implementation private declarations.  These interfaces are
  31  * used to build the IP module and are not meant to be accessed
  32  * by any modules except IP itself.  They are undocumented and are
  33  * subject to change without notice.
  34  */
  35 
  36 #ifdef  __cplusplus
  37 extern "C" {
  38 #endif
  39 
  40 #ifdef _KERNEL
  41 
  42 #include <sys/cpuvar.h>
  43 #include <sys/clock_impl.h>       /* For LBOLT_FASTPATH{,64} */


 182  * prescribed by Steve Bellovin.  This involves adding time, the 125000 per
 183  * connection, and a one-way hash (MD5) of the connection ID <sport, dport,
 184  * src, dst>, a "truly" random (per RFC 1750) number, and a console-entered
 185  * password.
 186  */
 187 #define ISS_INCR        250000
 188 #define ISS_NSEC_SHT    12
 189 
 190 /* Macros for timestamp comparisons */
 191 #define TSTMP_GEQ(a, b) ((int32_t)((a)-(b)) >= 0)
 192 #define TSTMP_LT(a, b)  ((int32_t)((a)-(b)) < 0)
 193 
 194 /*
 195  * Initialize cwnd according to RFC 3390.  def_max_init_cwnd is
 196  * either tcp_slow_start_initial or tcp_slow_start_after idle
 197  * depending on the caller.  If the upper layer has not used the
 198  * TCP_INIT_CWND option to change the initial cwnd, tcp_init_cwnd
 199  * should be 0 and we use the formula in RFC 3390 to set tcp_cwnd.
 200  * If the upper layer has changed set the tcp_init_cwnd, just use
 201  * it to calculate the tcp_cwnd.


















 202  */




 203 #define TCP_SET_INIT_CWND(tcp, mss, def_max_init_cwnd)                  \
 204 {                                                                       \
 205         if ((tcp)->tcp_init_cwnd == 0) {                             \
 206                 (tcp)->tcp_cwnd = MIN(def_max_init_cwnd * (mss),     \
 207                     MIN(4 * (mss), MAX(2 * (mss), 4380 / (mss) * (mss)))); \

 208         } else {                                                        \




 209                 (tcp)->tcp_cwnd = (tcp)->tcp_init_cwnd * (mss);           \
 210         }                                                               \
 211         tcp->tcp_cwnd_cnt = 0;                                               \
 212 }
 213 
 214 /*
 215  * Set ECN capable transport (ECT) code point in IP header.
 216  *
 217  * Note that there are 2 ECT code points '01' and '10', which are called
 218  * ECT(1) and ECT(0) respectively.  Here we follow the original ECT code
 219  * point ECT(0) for TCP as described in RFC 2481.
 220  */
 221 #define TCP_SET_ECT(tcp, iph) \
 222         if ((tcp)->tcp_connp->conn_ipversion == IPV4_VERSION) { \
 223                 /* We need to clear the code point first. */ \
 224                 ((ipha_t *)(iph))->ipha_type_of_service &= 0xFC; \
 225                 ((ipha_t *)(iph))->ipha_type_of_service |= IPH_ECN_ECT0; \
 226         } else { \
 227                 ((ip6_t *)(iph))->ip6_vcf &= htonl(0xFFCFFFFF); \
 228                 ((ip6_t *)(iph))->ip6_vcf |= htonl(IPH_ECN_ECT0 << 20); \




   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, Joyent Inc. All rights reserved.
  24  * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
  25  */
  26 
  27 #ifndef _INET_TCP_IMPL_H
  28 #define _INET_TCP_IMPL_H
  29 
  30 /*
  31  * TCP implementation private declarations.  These interfaces are
  32  * used to build the IP module and are not meant to be accessed
  33  * by any modules except IP itself.  They are undocumented and are
  34  * subject to change without notice.
  35  */
  36 
  37 #ifdef  __cplusplus
  38 extern "C" {
  39 #endif
  40 
  41 #ifdef _KERNEL
  42 
  43 #include <sys/cpuvar.h>
  44 #include <sys/clock_impl.h>       /* For LBOLT_FASTPATH{,64} */


 183  * prescribed by Steve Bellovin.  This involves adding time, the 125000 per
 184  * connection, and a one-way hash (MD5) of the connection ID <sport, dport,
 185  * src, dst>, a "truly" random (per RFC 1750) number, and a console-entered
 186  * password.
 187  */
 188 #define ISS_INCR        250000
 189 #define ISS_NSEC_SHT    12
 190 
 191 /* Macros for timestamp comparisons */
 192 #define TSTMP_GEQ(a, b) ((int32_t)((a)-(b)) >= 0)
 193 #define TSTMP_LT(a, b)  ((int32_t)((a)-(b)) < 0)
 194 
 195 /*
 196  * Initialize cwnd according to RFC 3390.  def_max_init_cwnd is
 197  * either tcp_slow_start_initial or tcp_slow_start_after idle
 198  * depending on the caller.  If the upper layer has not used the
 199  * TCP_INIT_CWND option to change the initial cwnd, tcp_init_cwnd
 200  * should be 0 and we use the formula in RFC 3390 to set tcp_cwnd.
 201  * If the upper layer has changed set the tcp_init_cwnd, just use
 202  * it to calculate the tcp_cwnd.
 203  *
 204  * "An Argument for Increasing TCP's Initial Congestion Window"
 205  * ACM SIGCOMM Computer Communications Review, vol. 40 (2010), pp. 27-33
 206  *  -- Nandita Dukkipati, Tiziana Refice, Yuchung Cheng,
 207  *     Hsiao-keng Jerry Chu, Tom Herbert, Amit Agarwal,
 208  *     Arvind Jain, Natalia Sutin
 209  *
 210  *   "Based on the results from our experiments, we believe the
 211  *    initial congestion window should be at least ten segments
 212  *    and the same be investigated for standardization by the IETF."
 213  *
 214  * As such, the def_max_init_cwnd argument with which this macro is
 215  * invoked is either the tcps_slow_start_initial or
 216  * tcps_slow_start_after_idle which both default to 0 and will respect
 217  * RFC 3390 exactly.  If the tunables are explicitly set by the operator,
 218  * then the initial congestion window should be set as the operator
 219  * demands, within reason. We shall arbitrarily define reason as a
 220  * maximum of 16 (same as used by the TCP_INIT_CWND setsockopt).
 221  */
 222 
 223 /* Maximum TCP initial cwin (start/restart). */
 224 #define TCP_MAX_INIT_CWND       16
 225 
 226 #define TCP_SET_INIT_CWND(tcp, mss, def_max_init_cwnd)                  \
 227 {                                                                       \
 228         if ((tcp)->tcp_init_cwnd == 0) {                             \
 229                 if (def_max_init_cwnd == 0) {                           \
 230                         (tcp)->tcp_cwnd = MIN(4 * (mss),             \
 231                             MAX(2 * (mss), 4380 / (mss) * (mss)));      \
 232                 } else {                                                \
 233                         (tcp)->tcp_cwnd = MIN(TCP_MAX_INIT_CWND * (mss),\
 234                             def_max_init_cwnd * (mss));                 \
 235                 }                                                       \
 236         } else {                                                        \
 237                 (tcp)->tcp_cwnd = (tcp)->tcp_init_cwnd * (mss);           \
 238         }                                                               \
 239         tcp->tcp_cwnd_cnt = 0;                                               \
 240 }
 241 
 242 /*
 243  * Set ECN capable transport (ECT) code point in IP header.
 244  *
 245  * Note that there are 2 ECT code points '01' and '10', which are called
 246  * ECT(1) and ECT(0) respectively.  Here we follow the original ECT code
 247  * point ECT(0) for TCP as described in RFC 2481.
 248  */
 249 #define TCP_SET_ECT(tcp, iph) \
 250         if ((tcp)->tcp_connp->conn_ipversion == IPV4_VERSION) { \
 251                 /* We need to clear the code point first. */ \
 252                 ((ipha_t *)(iph))->ipha_type_of_service &= 0xFC; \
 253                 ((ipha_t *)(iph))->ipha_type_of_service |= IPH_ECN_ECT0; \
 254         } else { \
 255                 ((ip6_t *)(iph))->ip6_vcf &= htonl(0xFFCFFFFF); \
 256                 ((ip6_t *)(iph))->ip6_vcf |= htonl(IPH_ECN_ECT0 << 20); \