1 /*
2 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995
3 * The Regents of the University of California.
4 * Copyright (c) 2007-2008,2010
5 * Swinburne University of Technology, Melbourne, Australia.
6 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
7 * Copyright (c) 2010 The FreeBSD Foundation
8 * All rights reserved.
9 * Copyright (c) 2017 by Delphix. All rights reserved.
10 *
11 * This software was developed at the Centre for Advanced Internet
12 * Architectures, Swinburne University of Technology, by Lawrence Stewart, James
13 * Healy and David Hayes, made possible in part by a grant from the Cisco
14 * University Research Program Fund at Community Foundation Silicon Valley.
15 *
16 * Portions of this software were developed at the Centre for Advanced
17 * Internet Architectures, Swinburne University of Technology, Melbourne,
18 * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 * 1. Redistributions of source code must retain the above copyright
24 * notice, this list of conditions and the following disclaimer.
25 * 2. Redistributions in binary form must reproduce the above copyright
26 * notice, this list of conditions and the following disclaimer in the
27 * documentation and/or other materials provided with the distribution.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 */
41
42 /*
43 * This software was first released in 2007 by James Healy and Lawrence Stewart
44 * whilst working on the NewTCP research project at Swinburne University of
45 * Technology's Centre for Advanced Internet Architectures, Melbourne,
46 * Australia, which was made possible in part by a grant from the Cisco
47 * University Research Program Fund at Community Foundation Silicon Valley.
48 * More details are available at:
49 * http://caia.swin.edu.au/urp/newtcp/
50 */
51
52 #include <sys/errno.h>
53 #include <inet/tcp.h>
54 #include <inet/tcp_impl.h>
55 #include <inet/cc.h>
56 #include <inet/cc/cc_module.h>
57
58 static void newreno_ack_received(struct cc_var *ccv, uint16_t type);
59 static void newreno_after_idle(struct cc_var *ccv);
60 static void newreno_cong_signal(struct cc_var *ccv, uint32_t type);
61 static void newreno_post_recovery(struct cc_var *ccv);
62
63 static struct modlmisc cc_newreno_modlmisc = {
64 &mod_miscops,
65 "New Reno Congestion Control"
66 };
67
68 static struct modlinkage cc_newreno_modlinkage = {
69 MODREV_1,
70 &cc_newreno_modlmisc,
71 NULL
72 };
73
74 struct cc_algo newreno_cc_algo = {
75 .name = "newreno",
76 .ack_received = newreno_ack_received,
77 .after_idle = newreno_after_idle,
78 .cong_signal = newreno_cong_signal,
79 .post_recovery = newreno_post_recovery,
80 };
81
82 int
83 _init(void)
84 {
85 int err;
86
87 if ((err = cc_register_algo(&newreno_cc_algo)) == 0) {
88 if ((err = mod_install(&cc_newreno_modlinkage)) != 0)
89 (void) cc_deregister_algo(&newreno_cc_algo);
90 }
91 return (err);
92 }
93
94 int
95 _fini(void)
96 {
97 /* XXX Not unloadable for now */
98 return (EBUSY);
99 }
100
101 int
102 _info(struct modinfo *modinfop)
103 {
104 return (mod_info(&cc_newreno_modlinkage, modinfop));
105 }
106
107 static void
108 newreno_ack_received(struct cc_var *ccv, uint16_t type)
109 {
110 if (type == CC_ACK && !IN_RECOVERY(ccv->flags) &&
111 (ccv->flags & CCF_CWND_LIMITED)) {
112 uint_t cw = CCV(ccv, tcp_cwnd);
113 uint_t incr = CCV(ccv, tcp_mss);
114
115 /*
116 * Regular in-order ACK, open the congestion window.
117 * Method depends on which congestion control state we're
118 * in (slow start or cong avoid) and if ABC (RFC 3465) is
119 * enabled.
120 *
121 * slow start: cwnd <= ssthresh
122 * cong avoid: cwnd > ssthresh
123 *
124 * slow start and ABC (RFC 3465):
125 * Grow cwnd exponentially by the amount of data
126 * ACKed capping the max increment per ACK to
127 * (abc_l_var * maxseg) bytes.
128 *
129 * slow start without ABC (RFC 5681):
130 * Grow cwnd exponentially by maxseg per ACK.
131 *
132 * cong avoid and ABC (RFC 3465):
133 * Grow cwnd linearly by maxseg per RTT for each
134 * cwnd worth of ACKed data.
135 *
136 * cong avoid without ABC (RFC 5681):
137 * Grow cwnd linearly by approximately maxseg per RTT using
138 * maxseg^2 / cwnd per ACK as the increment.
139 * If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
140 * avoid capping cwnd.
141 */
142 if (cw > CCV(ccv, tcp_cwnd_ssthresh)) {
143 if (CC_ABC(ccv)) {
144 if (ccv->flags & CCF_ABC_SENTAWND)
145 ccv->flags &= ~CCF_ABC_SENTAWND;
146 else
147 incr = 0;
148 } else
149 incr = max((incr * incr / cw), 1);
150 } else if (CC_ABC(ccv)) {
151 /*
152 * In slow-start with ABC enabled and no RTO in sight?
153 * (Must not use abc_l_var > 1 if slow starting after
154 * an RTO.
155 */
156 if (ccv->flags & CCF_RTO) {
157 incr = min(ccv->bytes_this_ack,
158 CCV(ccv, tcp_mss));
159 } else {
160 incr = min(ccv->bytes_this_ack,
161 CC_ABC_L_VAR(ccv) * CCV(ccv, tcp_mss));
162 }
163
164 }
165 /* ABC is on by default, so incr equals 0 frequently. */
166 if (incr > 0)
167 CCV(ccv, tcp_cwnd) = min(cw + incr,
168 TCP_MAXWIN << CCV(ccv, tcp_snd_ws));
169 }
170 }
171
172 static void
173 newreno_after_idle(struct cc_var *ccv)
174 {
175 int rw;
176
177 /*
178 * If we've been idle for more than one retransmit timeout the old
179 * congestion window is no longer current and we have to reduce it to
180 * the restart window before we can transmit again.
181 *
182 * The restart window is the initial window or the last CWND, whichever
183 * is smaller.
184 *
185 * This is done to prevent us from flooding the path with a full CWND at
186 * wirespeed, overloading router and switch buffers along the way.
187 *
188 * See RFC5681 Section 4.1. "Restarting Idle Connections".
189 */
190 if (CCV(ccv, tcp_init_cwnd) != 0) {
191 /*
192 * The TCP_INIT_CWND socket option was used to override the
193 * default.
194 */
195 rw = CCV(ccv, tcp_init_cwnd) * CCV(ccv, tcp_mss);
196 } else if (CCSV(ccv, tcps_slow_start_initial) != 0) {
197 /* The _slow_start_initial tunable was explicitly set. */
198 rw = min(TCP_MAX_INIT_CWND, CCSV(ccv, tcps_slow_start_initial))
199 * CCV(ccv, tcp_mss);
200 } else {
201 /* Do RFC 3390 */
202 rw = min(4 * CCV(ccv, tcp_mss),
203 max(2 * CCV(ccv, tcp_mss), 4380));
204 }
205
206 CCV(ccv, tcp_cwnd) = min(rw, CCV(ccv, tcp_cwnd));
207 }
208
209 /*
210 * Perform any necessary tasks before we enter congestion recovery.
211 */
212 static void
213 newreno_cong_signal(struct cc_var *ccv, uint32_t type)
214 {
215 uint32_t cwin, ssthresh_on_loss;
216 uint32_t mss;
217
218 cwin = CCV(ccv, tcp_cwnd);
219 mss = CCV(ccv, tcp_mss);
220 ssthresh_on_loss =
221 max((CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna)) / 2 / mss, 2)
222 * mss;
223
224 /* Catch algos which mistakenly leak private signal types. */
225 ASSERT((type & CC_SIGPRIVMASK) == 0);
226
227 cwin = max(cwin / 2 / mss, 2) * mss;
228
229 switch (type) {
230 case CC_NDUPACK:
231 if (!IN_FASTRECOVERY(ccv->flags)) {
232 if (!IN_CONGRECOVERY(ccv->flags)) {
233 CCV(ccv, tcp_cwnd_ssthresh) = ssthresh_on_loss;
234 CCV(ccv, tcp_cwnd) = cwin;
235 }
236 ENTER_RECOVERY(ccv->flags);
237 }
238 break;
239 case CC_ECN:
240 if (!IN_CONGRECOVERY(ccv->flags)) {
241 CCV(ccv, tcp_cwnd_ssthresh) = ssthresh_on_loss;
242 CCV(ccv, tcp_cwnd) = cwin;
243 ENTER_CONGRECOVERY(ccv->flags);
244 }
245 break;
246 case CC_RTO:
247 CCV(ccv, tcp_cwnd_ssthresh) = ssthresh_on_loss;
248 CCV(ccv, tcp_cwnd) = mss;
249 break;
250 }
251 }
252
253 /*
254 * Perform any necessary tasks before we exit congestion recovery.
255 */
256 static void
257 newreno_post_recovery(struct cc_var *ccv)
258 {
259 if (IN_FASTRECOVERY(ccv->flags)) {
260 /*
261 * Fast recovery will conclude after returning from this
262 * function.
263 */
264 if (CCV(ccv, tcp_cwnd) > CCV(ccv, tcp_cwnd_ssthresh)) {
265 CCV(ccv, tcp_cwnd) = CCV(ccv, tcp_cwnd_ssthresh);
266 }
267 }
268 }