1 /*
   2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
   3  *      The Regents of the University of California.
   4  * Copyright (c) 2007-2008,2010
   5  *      Swinburne University of Technology, Melbourne, Australia.
   6  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
   7  * Copyright (c) 2010 The FreeBSD Foundation
   8  * All rights reserved.
   9  * Copyright (c) 2017 by Delphix. All rights reserved.
  10  *
  11  * This software was developed at the Centre for Advanced Internet
  12  * Architectures, Swinburne University of Technology, by Lawrence Stewart, James
  13  * Healy and David Hayes, made possible in part by a grant from the Cisco
  14  * University Research Program Fund at Community Foundation Silicon Valley.
  15  *
  16  * Portions of this software were developed at the Centre for Advanced
  17  * Internet Architectures, Swinburne University of Technology, Melbourne,
  18  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
  19  *
  20  * Redistribution and use in source and binary forms, with or without
  21  * modification, are permitted provided that the following conditions
  22  * are met:
  23  * 1. Redistributions of source code must retain the above copyright
  24  *    notice, this list of conditions and the following disclaimer.
  25  * 2. Redistributions in binary form must reproduce the above copyright
  26  *    notice, this list of conditions and the following disclaimer in the
  27  *    documentation and/or other materials provided with the distribution.
  28  *
  29  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  32  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  39  * SUCH DAMAGE.
  40  */
  41 
  42 /*
  43  * This software was first released in 2007 by James Healy and Lawrence Stewart
  44  * whilst working on the NewTCP research project at Swinburne University of
  45  * Technology's Centre for Advanced Internet Architectures, Melbourne,
  46  * Australia, which was made possible in part by a grant from the Cisco
  47  * University Research Program Fund at Community Foundation Silicon Valley.
  48  * More details are available at:
  49  *   http://caia.swin.edu.au/urp/newtcp/
  50  */
  51 
  52 #include <sys/errno.h>
  53 #include <inet/tcp.h>
  54 #include <inet/tcp_impl.h>
  55 #include <inet/cc.h>
  56 #include <inet/cc/cc_module.h>
  57 
  58 static void     newreno_ack_received(struct cc_var *ccv, uint16_t type);
  59 static void     newreno_after_idle(struct cc_var *ccv);
  60 static void     newreno_cong_signal(struct cc_var *ccv, uint32_t type);
  61 static void     newreno_post_recovery(struct cc_var *ccv);
  62 
  63 static struct modlmisc cc_newreno_modlmisc = {
  64         &mod_miscops,
  65         "New Reno Congestion Control"
  66 };
  67 
  68 static struct modlinkage cc_newreno_modlinkage = {
  69         MODREV_1,
  70         &cc_newreno_modlmisc,
  71         NULL
  72 };
  73 
  74 struct cc_algo newreno_cc_algo = {
  75         .name = "newreno",
  76         .ack_received = newreno_ack_received,
  77         .after_idle = newreno_after_idle,
  78         .cong_signal = newreno_cong_signal,
  79         .post_recovery = newreno_post_recovery,
  80 };
  81 
  82 int
  83 _init(void)
  84 {
  85         int err;
  86 
  87         if ((err = cc_register_algo(&newreno_cc_algo)) == 0) {
  88                 if ((err = mod_install(&cc_newreno_modlinkage)) != 0)
  89                         (void) cc_deregister_algo(&newreno_cc_algo);
  90         }
  91         return (err);
  92 }
  93 
  94 int
  95 _fini(void)
  96 {
  97         /* XXX Not unloadable for now */
  98         return (EBUSY);
  99 }
 100 
 101 int
 102 _info(struct modinfo *modinfop)
 103 {
 104         return (mod_info(&cc_newreno_modlinkage, modinfop));
 105 }
 106 
 107 static void
 108 newreno_ack_received(struct cc_var *ccv, uint16_t type)
 109 {
 110         if (type == CC_ACK && !IN_RECOVERY(ccv->flags) &&
 111             (ccv->flags & CCF_CWND_LIMITED)) {
 112                 uint_t cw = CCV(ccv, tcp_cwnd);
 113                 uint_t incr = CCV(ccv, tcp_mss);
 114 
 115                 /*
 116                  * Regular in-order ACK, open the congestion window.
 117                  * Method depends on which congestion control state we're
 118                  * in (slow start or cong avoid) and if ABC (RFC 3465) is
 119                  * enabled.
 120                  *
 121                  * slow start: cwnd <= ssthresh
 122                  * cong avoid: cwnd > ssthresh
 123                  *
 124                  * slow start and ABC (RFC 3465):
 125                  *   Grow cwnd exponentially by the amount of data
 126                  *   ACKed capping the max increment per ACK to
 127                  *   (abc_l_var * maxseg) bytes.
 128                  *
 129                  * slow start without ABC (RFC 5681):
 130                  *   Grow cwnd exponentially by maxseg per ACK.
 131                  *
 132                  * cong avoid and ABC (RFC 3465):
 133                  *   Grow cwnd linearly by maxseg per RTT for each
 134                  *   cwnd worth of ACKed data.
 135                  *
 136                  * cong avoid without ABC (RFC 5681):
 137                  *   Grow cwnd linearly by approximately maxseg per RTT using
 138                  *   maxseg^2 / cwnd per ACK as the increment.
 139                  *   If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
 140                  *   avoid capping cwnd.
 141                  */
 142                 if (cw > CCV(ccv, tcp_cwnd_ssthresh)) {
 143                         if (CC_ABC(ccv)) {
 144                                 if (ccv->flags & CCF_ABC_SENTAWND)
 145                                         ccv->flags &= ~CCF_ABC_SENTAWND;
 146                                 else
 147                                         incr = 0;
 148                         } else
 149                                 incr = max((incr * incr / cw), 1);
 150                 } else if (CC_ABC(ccv)) {
 151                         /*
 152                          * In slow-start with ABC enabled and no RTO in sight?
 153                          * (Must not use abc_l_var > 1 if slow starting after
 154                          * an RTO.
 155                          */
 156                         if (ccv->flags & CCF_RTO) {
 157                                 incr = min(ccv->bytes_this_ack,
 158                                     CCV(ccv, tcp_mss));
 159                         } else {
 160                                 incr = min(ccv->bytes_this_ack,
 161                                     CC_ABC_L_VAR(ccv) * CCV(ccv, tcp_mss));
 162                         }
 163 
 164                 }
 165                 /* ABC is on by default, so incr equals 0 frequently. */
 166                 if (incr > 0)
 167                         CCV(ccv, tcp_cwnd) = min(cw + incr,
 168                             TCP_MAXWIN << CCV(ccv, tcp_snd_ws));
 169         }
 170 }
 171 
 172 static void
 173 newreno_after_idle(struct cc_var *ccv)
 174 {
 175         int rw;
 176 
 177         /*
 178          * If we've been idle for more than one retransmit timeout the old
 179          * congestion window is no longer current and we have to reduce it to
 180          * the restart window before we can transmit again.
 181          *
 182          * The restart window is the initial window or the last CWND, whichever
 183          * is smaller.
 184          *
 185          * This is done to prevent us from flooding the path with a full CWND at
 186          * wirespeed, overloading router and switch buffers along the way.
 187          *
 188          * See RFC5681 Section 4.1. "Restarting Idle Connections".
 189          */
 190         if (CCV(ccv, tcp_init_cwnd) != 0) {
 191                 /*
 192                  * The TCP_INIT_CWND socket option was used to override the
 193                  * default.
 194                  */
 195                 rw = CCV(ccv, tcp_init_cwnd) * CCV(ccv, tcp_mss);
 196         } else if (CCSV(ccv, tcps_slow_start_initial) != 0) {
 197                 /* The _slow_start_initial tunable was explicitly set. */
 198                 rw = min(TCP_MAX_INIT_CWND, CCSV(ccv, tcps_slow_start_initial))
 199                     * CCV(ccv, tcp_mss);
 200         } else {
 201                 /* Do RFC 3390 */
 202                 rw = min(4 * CCV(ccv, tcp_mss),
 203                     max(2 * CCV(ccv, tcp_mss), 4380));
 204         }
 205 
 206         CCV(ccv, tcp_cwnd) = min(rw, CCV(ccv, tcp_cwnd));
 207 }
 208 
 209 /*
 210  * Perform any necessary tasks before we enter congestion recovery.
 211  */
 212 static void
 213 newreno_cong_signal(struct cc_var *ccv, uint32_t type)
 214 {
 215         uint32_t cwin, ssthresh_on_loss;
 216         uint32_t mss;
 217 
 218         cwin = CCV(ccv, tcp_cwnd);
 219         mss = CCV(ccv, tcp_mss);
 220         ssthresh_on_loss =
 221             max((CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna)) / 2 / mss, 2)
 222             * mss;
 223 
 224         /* Catch algos which mistakenly leak private signal types. */
 225         ASSERT((type & CC_SIGPRIVMASK) == 0);
 226 
 227         cwin = max(cwin / 2 / mss, 2) * mss;
 228 
 229         switch (type) {
 230         case CC_NDUPACK:
 231                 if (!IN_FASTRECOVERY(ccv->flags)) {
 232                         if (!IN_CONGRECOVERY(ccv->flags)) {
 233                                 CCV(ccv, tcp_cwnd_ssthresh) = ssthresh_on_loss;
 234                                 CCV(ccv, tcp_cwnd) = cwin;
 235                         }
 236                         ENTER_RECOVERY(ccv->flags);
 237                 }
 238                 break;
 239         case CC_ECN:
 240                 if (!IN_CONGRECOVERY(ccv->flags)) {
 241                         CCV(ccv, tcp_cwnd_ssthresh) = ssthresh_on_loss;
 242                         CCV(ccv, tcp_cwnd) = cwin;
 243                         ENTER_CONGRECOVERY(ccv->flags);
 244                 }
 245                 break;
 246         case CC_RTO:
 247                 CCV(ccv, tcp_cwnd_ssthresh) = ssthresh_on_loss;
 248                 CCV(ccv, tcp_cwnd) = mss;
 249                 break;
 250         }
 251 }
 252 
 253 /*
 254  * Perform any necessary tasks before we exit congestion recovery.
 255  */
 256 static void
 257 newreno_post_recovery(struct cc_var *ccv)
 258 {
 259         if (IN_FASTRECOVERY(ccv->flags)) {
 260                 /*
 261                  * Fast recovery will conclude after returning from this
 262                  * function.
 263                  */
 264                 if (CCV(ccv, tcp_cwnd) > CCV(ccv, tcp_cwnd_ssthresh)) {
 265                         CCV(ccv, tcp_cwnd) = CCV(ccv, tcp_cwnd_ssthresh);
 266                 }
 267         }
 268 }