1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2017 by Delphix. All rights reserved. 24 */ 25 26 /* 27 * The TCP congestion control algorithm extracted from the pre-framework 28 * implementation of TCP congestion control. 29 */ 30 31 #include <sys/errno.h> 32 #include <inet/tcp.h> 33 #include <inet/tcp_impl.h> 34 #include <inet/cc.h> 35 #include <inet/cc/cc_module.h> 36 37 static void sunreno_ack_received(struct cc_var *ccv, uint16_t type); 38 static void sunreno_after_idle(struct cc_var *ccv); 39 static void sunreno_cong_signal(struct cc_var *ccv, uint32_t type); 40 static void sunreno_post_recovery(struct cc_var *ccv); 41 42 #define CC_SUNRENO_ALGO_NAME "sunreno" 43 44 static struct modlmisc cc_sunreno_modlmisc = { 45 &mod_miscops, 46 "SUNReno Congestion Control" 47 }; 48 49 static struct modlinkage cc_sunreno_modlinkage = { 50 MODREV_1, 51 &cc_sunreno_modlmisc, 52 NULL 53 }; 54 55 struct cc_algo sunreno_cc_algo = { 56 .name = CC_SUNRENO_ALGO_NAME, 57 .ack_received = sunreno_ack_received, 58 .after_idle = sunreno_after_idle, 59 .cong_signal = sunreno_cong_signal, 60 .post_recovery = sunreno_post_recovery, 61 }; 62 63 int 64 _init(void) 65 { 66 int err; 67 68 if ((err = cc_register_algo(&sunreno_cc_algo)) == 0) { 69 if ((err = mod_install(&cc_sunreno_modlinkage)) != 0) 70 (void) cc_deregister_algo(&sunreno_cc_algo); 71 } 72 return (err); 73 } 74 75 int 76 _fini(void) 77 { 78 return (EBUSY); 79 } 80 81 int 82 _info(struct modinfo *modinfop) 83 { 84 return (mod_info(&cc_sunreno_modlinkage, modinfop)); 85 } 86 87 static void 88 sunreno_ack_received(struct cc_var *ccv, uint16_t type) 89 { 90 uint32_t add; 91 uint32_t cwnd; 92 int mss; 93 94 if (type == CC_ACK && !IN_RECOVERY(ccv->flags)) { 95 mss = CCV(ccv, tcp_mss); 96 cwnd = CCV(ccv, tcp_cwnd); 97 add = mss; 98 99 if (cwnd >= CCV(ccv, tcp_cwnd_ssthresh)) { 100 /* 101 * This is to prevent an increase of less than 1 MSS of 102 * tcp_cwnd. With partial increase, tcp_wput_data() 103 * may send out tinygrams in order to preserve mblk 104 * boundaries. 105 * 106 * By initializing tcp_cwnd_cnt to new tcp_cwnd and 107 * decrementing it by 1 MSS for every ACKs, tcp_cwnd is 108 * increased by 1 MSS for every RTTs. 109 */ 110 if (CCV(ccv, tcp_cwnd_cnt) <= 0) { 111 CCV(ccv, tcp_cwnd_cnt) = cwnd + add; 112 } else { 113 CCV(ccv, tcp_cwnd_cnt) -= add; 114 add = 0; 115 } 116 } 117 CCV(ccv, tcp_cwnd) = MIN(cwnd + add, CCV(ccv, tcp_cwnd_max)); 118 } 119 } 120 121 static void 122 sunreno_after_idle(struct cc_var *ccv) 123 { 124 int32_t num_sack_blk = 0; 125 int mss; 126 127 if (CCV(ccv, tcp_snd_sack_ok) && CCV(ccv, tcp_num_sack_blk) > 0) { 128 int32_t opt_len; 129 130 num_sack_blk = MIN(CCV(ccv, tcp_max_sack_blk), 131 CCV(ccv, tcp_num_sack_blk)); 132 opt_len = num_sack_blk * sizeof (sack_blk_t) + TCPOPT_NOP_LEN * 133 2 + TCPOPT_HEADER_LEN; 134 mss = CCV(ccv, tcp_mss) - opt_len; 135 } else { 136 mss = CCV(ccv, tcp_mss); 137 } 138 139 TCP_SET_INIT_CWND(CCV_PROTO(ccv), mss, 140 CCSV(ccv, tcps_slow_start_after_idle)); 141 } 142 143 /* 144 * Perform any necessary tasks before we enter congestion recovery. 145 */ 146 static void 147 sunreno_cong_signal(struct cc_var *ccv, uint32_t type) 148 { 149 int npkt; 150 int mss; 151 152 /* Catch algos which mistakenly leak private signal types. */ 153 ASSERT((type & CC_SIGPRIVMASK) == 0); 154 155 mss = CCV(ccv, tcp_mss); 156 npkt = ((CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna)) >> 1) / mss; 157 158 switch (type) { 159 case CC_NDUPACK: 160 if (!IN_FASTRECOVERY(ccv->flags)) { 161 if (!IN_CONGRECOVERY(ccv->flags)) { 162 CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) * 163 mss; 164 CCV(ccv, tcp_cwnd) = (npkt + 165 CCV(ccv, tcp_dupack_cnt)) * mss; 166 } 167 ENTER_RECOVERY(ccv->flags); 168 } 169 break; 170 case CC_ECN: 171 if (!IN_CONGRECOVERY(ccv->flags) && !CCV(ccv, tcp_cwr)) { 172 CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) * mss; 173 CCV(ccv, tcp_cwnd) = npkt * mss; 174 if (CCV(ccv, tcp_cwnd) == 0) { 175 /* 176 * This makes sure that when the ACK comes 177 * back, we will increase tcp_cwnd by 1 MSS. 178 */ 179 CCV(ccv, tcp_cwnd_cnt) = 0; 180 } 181 ENTER_CONGRECOVERY(ccv->flags); 182 } 183 break; 184 case CC_RTO: 185 /* 186 * After retransmission, we need to do slow start. Set the 187 * ssthresh to one half of current effective window and cwnd to 188 * one MSS. Also reset tcp_cwnd_cnt. 189 * 190 * Note that if tcp_ssthresh is reduced because of ECN, do not 191 * reduce it again unless it is already one window of data away 192 * (tcp_cwr should then be cleared) or this is a timeout for a 193 * retransmitted segment. 194 */ 195 if (!CCV(ccv, tcp_cwr) || CCV(ccv, tcp_rexmit)) { 196 if (CCV(ccv, tcp_timer_backoff) != 0) 197 npkt = CCV(ccv, tcp_cwnd_ssthresh) / 2 / mss; 198 CCV(ccv, tcp_cwnd_ssthresh) = MAX(npkt, 2) * mss; 199 } 200 CCV(ccv, tcp_cwnd) = mss; 201 CCV(ccv, tcp_cwnd_cnt) = 0; 202 break; 203 } 204 } 205 206 /* 207 * Perform any necessary tasks before we exit congestion recovery. 208 */ 209 static void 210 sunreno_post_recovery(struct cc_var *ccv) 211 { 212 /* 213 * Restore the congestion window back to ssthresh as per RFC 5681 214 * section 3.2. 215 */ 216 if (IN_FASTRECOVERY(ccv->flags)) { 217 if (CCV(ccv, tcp_cwnd) > CCV(ccv, tcp_cwnd_ssthresh)) { 218 CCV(ccv, tcp_cwnd) = CCV(ccv, tcp_cwnd_ssthresh); 219 } 220 } 221 CCV(ccv, tcp_cwnd_cnt) = 0; 222 }