1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2017 by Delphix. All rights reserved.
  24  */
  25 
  26 /*
  27  * The TCP congestion control algorithm extracted from the pre-framework
  28  * implementation of TCP congestion control.
  29  */
  30 
  31 #include <sys/errno.h>
  32 #include <inet/tcp.h>
  33 #include <inet/tcp_impl.h>
  34 #include <inet/cc.h>
  35 #include <inet/cc/cc_module.h>
  36 
  37 static void     sunreno_ack_received(struct cc_var *ccv, uint16_t type);
  38 static void     sunreno_after_idle(struct cc_var *ccv);
  39 static void     sunreno_cong_signal(struct cc_var *ccv, uint32_t type);
  40 static void     sunreno_post_recovery(struct cc_var *ccv);
  41 
  42 #define CC_SUNRENO_ALGO_NAME "sunreno"
  43 
  44 static struct modlmisc cc_sunreno_modlmisc = {
  45         &mod_miscops,
  46         "SUNReno Congestion Control"
  47 };
  48 
  49 static struct modlinkage cc_sunreno_modlinkage = {
  50         MODREV_1,
  51         &cc_sunreno_modlmisc,
  52         NULL
  53 };
  54 
  55 struct cc_algo sunreno_cc_algo = {
  56         .name = CC_SUNRENO_ALGO_NAME,
  57         .ack_received = sunreno_ack_received,
  58         .after_idle = sunreno_after_idle,
  59         .cong_signal = sunreno_cong_signal,
  60         .post_recovery = sunreno_post_recovery,
  61 };
  62 
  63 int
  64 _init(void)
  65 {
  66         int err;
  67 
  68         if ((err = cc_register_algo(&sunreno_cc_algo)) == 0) {
  69                 if ((err = mod_install(&cc_sunreno_modlinkage)) != 0)
  70                         (void) cc_deregister_algo(&sunreno_cc_algo);
  71         }
  72         return (err);
  73 }
  74 
  75 int
  76 _fini(void)
  77 {
  78         return (EBUSY);
  79 }
  80 
  81 int
  82 _info(struct modinfo *modinfop)
  83 {
  84         return (mod_info(&cc_sunreno_modlinkage, modinfop));
  85 }
  86 
  87 static void
  88 sunreno_ack_received(struct cc_var *ccv, uint16_t type)
  89 {
  90         uint32_t add;
  91         uint32_t cwnd;
  92         int mss;
  93 
  94         if (type == CC_ACK && !IN_RECOVERY(ccv->flags)) {
  95                 mss = CCV(ccv, tcp_mss);
  96                 cwnd = CCV(ccv, tcp_cwnd);
  97                 add = mss;
  98 
  99                 if (cwnd >= CCV(ccv, tcp_cwnd_ssthresh)) {
 100                         /*
 101                          * This is to prevent an increase of less than 1 MSS of
 102                          * tcp_cwnd.  With partial increase, tcp_wput_data()
 103                          * may send out tinygrams in order to preserve mblk
 104                          * boundaries.
 105                          *
 106                          * By initializing tcp_cwnd_cnt to new tcp_cwnd and
 107                          * decrementing it by 1 MSS for every ACKs, tcp_cwnd is
 108                          * increased by 1 MSS for every RTTs.
 109                          */
 110                         if (CCV(ccv, tcp_cwnd_cnt) <= 0) {
 111                                 CCV(ccv, tcp_cwnd_cnt) = cwnd + add;
 112                         } else {
 113                                 CCV(ccv, tcp_cwnd_cnt) -= add;
 114                                 add = 0;
 115                         }
 116                 }
 117                 CCV(ccv, tcp_cwnd) = MIN(cwnd + add, CCV(ccv, tcp_cwnd_max));
 118         }
 119 }
 120 
 121 static void
 122 sunreno_after_idle(struct cc_var *ccv)
 123 {
 124         int32_t num_sack_blk = 0;
 125         int mss;
 126 
 127         if (CCV(ccv, tcp_snd_sack_ok) && CCV(ccv, tcp_num_sack_blk) > 0) {
 128                 int32_t opt_len;
 129 
 130                 num_sack_blk = MIN(CCV(ccv, tcp_max_sack_blk),
 131                     CCV(ccv, tcp_num_sack_blk));
 132                 opt_len = num_sack_blk * sizeof (sack_blk_t) + TCPOPT_NOP_LEN *
 133                     2 + TCPOPT_HEADER_LEN;
 134                 mss = CCV(ccv, tcp_mss) - opt_len;
 135         } else {
 136                 mss = CCV(ccv, tcp_mss);
 137         }
 138 
 139         TCP_SET_INIT_CWND(CCV_PROTO(ccv), mss,
 140             CCSV(ccv, tcps_slow_start_after_idle));
 141 }
 142 
 143 /*
 144  * Perform any necessary tasks before we enter congestion recovery.
 145  */
 146 static void
 147 sunreno_cong_signal(struct cc_var *ccv, uint32_t type)
 148 {
 149         int npkt;
 150         int mss;
 151 
 152         /* Catch algos which mistakenly leak private signal types. */
 153         ASSERT((type & CC_SIGPRIVMASK) == 0);
 154 
 155         mss = CCV(ccv, tcp_mss);
 156         npkt = ((CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna)) >> 1) / mss;
 157 
 158         switch (type) {
 159         case CC_NDUPACK:
 160                 if (!IN_FASTRECOVERY(ccv->flags)) {
 161                         if (!IN_CONGRECOVERY(ccv->flags)) {
 162                                 CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) *
 163                                     mss;
 164                                 CCV(ccv, tcp_cwnd) = (npkt +
 165                                     CCV(ccv, tcp_dupack_cnt)) * mss;
 166                         }
 167                         ENTER_RECOVERY(ccv->flags);
 168                 }
 169                 break;
 170         case CC_ECN:
 171                 if (!IN_CONGRECOVERY(ccv->flags) && !CCV(ccv, tcp_cwr)) {
 172                         CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) * mss;
 173                         CCV(ccv, tcp_cwnd) = npkt * mss;
 174                         if (CCV(ccv, tcp_cwnd) == 0) {
 175                                 /*
 176                                  * This makes sure that when the ACK comes
 177                                  * back, we will increase tcp_cwnd by 1 MSS.
 178                                  */
 179                                 CCV(ccv, tcp_cwnd_cnt) = 0;
 180                         }
 181                         ENTER_CONGRECOVERY(ccv->flags);
 182                 }
 183                 break;
 184         case CC_RTO:
 185                 /*
 186                  * After retransmission, we need to do slow start.  Set the
 187                  * ssthresh to one half of current effective window and cwnd to
 188                  * one MSS.  Also reset tcp_cwnd_cnt.
 189                  *
 190                  * Note that if tcp_ssthresh is reduced because of ECN, do not
 191                  * reduce it again unless it is already one window of data away
 192                  * (tcp_cwr should then be cleared) or this is a timeout for a
 193                  * retransmitted segment.
 194                  */
 195                 if (!CCV(ccv, tcp_cwr) || CCV(ccv, tcp_rexmit)) {
 196                         if (CCV(ccv, tcp_timer_backoff) != 0)
 197                                 npkt = CCV(ccv, tcp_cwnd_ssthresh) / 2 / mss;
 198                         CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) * mss;
 199                 }
 200                 CCV(ccv, tcp_cwnd) = mss;
 201                 CCV(ccv, tcp_cwnd_cnt) = 0;
 202                 break;
 203         }
 204 }
 205 
 206 /*
 207  * Perform any necessary tasks before we exit congestion recovery.
 208  */
 209 static void
 210 sunreno_post_recovery(struct cc_var *ccv)
 211 {
 212         /*
 213          * Restore the congestion window back to ssthresh as per RFC 5681
 214          * section 3.2.
 215          */
 216         if (IN_FASTRECOVERY(ccv->flags)) {
 217                 if (CCV(ccv, tcp_cwnd) > CCV(ccv, tcp_cwnd_ssthresh)) {
 218                         CCV(ccv, tcp_cwnd) = CCV(ccv, tcp_cwnd_ssthresh);
 219                 }
 220         }
 221         CCV(ccv, tcp_cwnd_cnt) = 0;
 222 }