Print this page
3660 tcp_slow_start_* tunables should allow increasing the initial congestion window
Reviewed by: Dan McDonald <danmcd@nexenta.com>
Reviewed by: Sebastien Roy <sebastien.roy@delphix.com>
Reviewed by: Brendan Gregg <brendan.gregg@joyent.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/tcp/tcp_tunables.c
+++ new/usr/src/uts/common/inet/tcp/tcp_tunables.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2011, Joyent Inc. All rights reserved.
24 24 */
25 25 /* Copyright (c) 1990 Mentat Inc. */
26 26
27 27 #include <inet/ip.h>
28 28 #include <inet/tcp_impl.h>
29 29 #include <sys/multidata.h>
30 30 #include <sys/sunddi.h>
31 31
32 32 /* Max size IP datagram is 64k - 1 */
33 33 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
34 34 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
35 35
36 36 /* Max of the above */
37 37 #define TCP_MSS_MAX TCP_MSS_MAX_IPV4
38 38
39 39 #define TCP_XMIT_LOWATER 4096
40 40 #define TCP_XMIT_HIWATER 49152
41 41 #define TCP_RECV_LOWATER 2048
42 42 #define TCP_RECV_HIWATER 128000
43 43
44 44 /*
45 45 * Set the RFC 1948 pass phrase
46 46 */
47 47 /* ARGSUSED */
48 48 static int
49 49 tcp_set_1948phrase(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
50 50 const char *ifname, const void* pr_val, uint_t flags)
51 51 {
52 52 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
53 53
54 54 if (flags & MOD_PROP_DEFAULT)
55 55 return (ENOTSUP);
56 56
57 57 /*
58 58 * Basically, value contains a new pass phrase. Pass it along!
59 59 */
60 60 tcp_iss_key_init((uint8_t *)pr_val, strlen(pr_val), tcps);
61 61 return (0);
62 62 }
63 63
64 64 /*
65 65 * returns the current list of listener limit configuration.
66 66 */
67 67 /* ARGSUSED */
68 68 static int
69 69 tcp_listener_conf_get(void *cbarg, mod_prop_info_t *pinfo, const char *ifname,
70 70 void *val, uint_t psize, uint_t flags)
71 71 {
72 72 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
73 73 tcp_listener_t *tl;
74 74 char *pval = val;
75 75 size_t nbytes = 0, tbytes = 0;
76 76 uint_t size;
77 77 int err = 0;
78 78
79 79 bzero(pval, psize);
80 80 size = psize;
81 81
82 82 if (flags & (MOD_PROP_DEFAULT|MOD_PROP_PERM|MOD_PROP_POSSIBLE))
83 83 return (0);
84 84
85 85 mutex_enter(&tcps->tcps_listener_conf_lock);
86 86 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
87 87 tl = list_next(&tcps->tcps_listener_conf, tl)) {
88 88 if (psize == size)
89 89 nbytes = snprintf(pval, size, "%d:%d", tl->tl_port,
90 90 tl->tl_ratio);
91 91 else
92 92 nbytes = snprintf(pval, size, ",%d:%d", tl->tl_port,
93 93 tl->tl_ratio);
94 94 size -= nbytes;
95 95 pval += nbytes;
96 96 tbytes += nbytes;
97 97 if (tbytes >= psize) {
98 98 /* Buffer overflow, stop copying information */
99 99 err = ENOBUFS;
100 100 break;
101 101 }
102 102 }
103 103
104 104 mutex_exit(&tcps->tcps_listener_conf_lock);
105 105 return (err);
106 106 }
107 107
108 108 /*
109 109 * add a new listener limit configuration.
110 110 */
111 111 /* ARGSUSED */
112 112 static int
113 113 tcp_listener_conf_add(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
114 114 const char *ifname, const void* pval, uint_t flags)
115 115 {
116 116 tcp_listener_t *new_tl;
117 117 tcp_listener_t *tl;
118 118 long lport;
119 119 long ratio;
120 120 char *colon;
121 121 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
122 122
123 123 if (flags & MOD_PROP_DEFAULT)
124 124 return (ENOTSUP);
125 125
126 126 if (ddi_strtol(pval, &colon, 10, &lport) != 0 || lport <= 0 ||
127 127 lport > USHRT_MAX || *colon != ':') {
128 128 return (EINVAL);
129 129 }
130 130 if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0)
131 131 return (EINVAL);
132 132
133 133 mutex_enter(&tcps->tcps_listener_conf_lock);
134 134 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
135 135 tl = list_next(&tcps->tcps_listener_conf, tl)) {
136 136 /* There is an existing entry, so update its ratio value. */
137 137 if (tl->tl_port == lport) {
138 138 tl->tl_ratio = ratio;
139 139 mutex_exit(&tcps->tcps_listener_conf_lock);
140 140 return (0);
141 141 }
142 142 }
143 143
144 144 if ((new_tl = kmem_alloc(sizeof (tcp_listener_t), KM_NOSLEEP)) ==
145 145 NULL) {
146 146 mutex_exit(&tcps->tcps_listener_conf_lock);
147 147 return (ENOMEM);
148 148 }
149 149
150 150 new_tl->tl_port = lport;
151 151 new_tl->tl_ratio = ratio;
152 152 list_insert_tail(&tcps->tcps_listener_conf, new_tl);
153 153 mutex_exit(&tcps->tcps_listener_conf_lock);
154 154 return (0);
155 155 }
156 156
157 157 /*
158 158 * remove a listener limit configuration.
159 159 */
160 160 /* ARGSUSED */
161 161 static int
162 162 tcp_listener_conf_del(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
163 163 const char *ifname, const void* pval, uint_t flags)
164 164 {
165 165 tcp_listener_t *tl;
166 166 long lport;
167 167 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
168 168
169 169 if (flags & MOD_PROP_DEFAULT)
170 170 return (ENOTSUP);
171 171
172 172 if (ddi_strtol(pval, NULL, 10, &lport) != 0 || lport <= 0 ||
173 173 lport > USHRT_MAX) {
174 174 return (EINVAL);
175 175 }
176 176 mutex_enter(&tcps->tcps_listener_conf_lock);
177 177 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
178 178 tl = list_next(&tcps->tcps_listener_conf, tl)) {
179 179 if (tl->tl_port == lport) {
180 180 list_remove(&tcps->tcps_listener_conf, tl);
181 181 mutex_exit(&tcps->tcps_listener_conf_lock);
182 182 kmem_free(tl, sizeof (tcp_listener_t));
183 183 return (0);
184 184 }
185 185 }
186 186 mutex_exit(&tcps->tcps_listener_conf_lock);
187 187 return (ESRCH);
188 188 }
189 189
190 190 /*
191 191 * All of these are alterable, within the min/max values given, at run time.
192 192 *
193 193 * Note: All those tunables which do not start with "_" are Committed and
194 194 * therefore are public. See PSARC 2010/080.
195 195 */
196 196 mod_prop_info_t tcp_propinfo_tbl[] = {
197 197 /* tunable - 0 */
198 198 { "_time_wait_interval", MOD_PROTO_TCP,
199 199 mod_set_uint32, mod_get_uint32,
200 200 {1*SECONDS, 10*MINUTES, 1*MINUTES}, {1*MINUTES} },
201 201
202 202 { "_conn_req_max_q", MOD_PROTO_TCP,
203 203 mod_set_uint32, mod_get_uint32,
204 204 {1, UINT32_MAX, 128}, {128} },
205 205
206 206 { "_conn_req_max_q0", MOD_PROTO_TCP,
207 207 mod_set_uint32, mod_get_uint32,
208 208 {0, UINT32_MAX, 1024}, {1024} },
209 209
210 210 { "_conn_req_min", MOD_PROTO_TCP,
211 211 mod_set_uint32, mod_get_uint32,
212 212 {1, 1024, 1}, {1} },
213 213
214 214 { "_conn_grace_period", MOD_PROTO_TCP,
215 215 mod_set_uint32, mod_get_uint32,
216 216 {0*MS, 20*SECONDS, 0*MS}, {0*MS} },
217 217
218 218 { "_cwnd_max", MOD_PROTO_TCP,
219 219 mod_set_uint32, mod_get_uint32,
220 220 {128, (1<<30), 1024*1024}, {1024*1024} },
221 221
222 222 { "_debug", MOD_PROTO_TCP,
223 223 mod_set_uint32, mod_get_uint32,
224 224 {0, 10, 0}, {0} },
225 225
226 226 { "smallest_nonpriv_port", MOD_PROTO_TCP,
227 227 mod_set_uint32, mod_get_uint32,
228 228 {1024, (32*1024), 1024}, {1024} },
229 229
230 230 { "_ip_abort_cinterval", MOD_PROTO_TCP,
231 231 mod_set_uint32, mod_get_uint32,
232 232 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
233 233
234 234 { "_ip_abort_linterval", MOD_PROTO_TCP,
235 235 mod_set_uint32, mod_get_uint32,
236 236 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
237 237
238 238 /* tunable - 10 */
239 239 { "_ip_abort_interval", MOD_PROTO_TCP,
240 240 mod_set_uint32, mod_get_uint32,
241 241 {500*MS, UINT32_MAX, 5*MINUTES}, {5*MINUTES} },
242 242
243 243 { "_ip_notify_cinterval", MOD_PROTO_TCP,
244 244 mod_set_uint32, mod_get_uint32,
245 245 {1*SECONDS, UINT32_MAX, 10*SECONDS},
246 246 {10*SECONDS} },
247 247
248 248 { "_ip_notify_interval", MOD_PROTO_TCP,
249 249 mod_set_uint32, mod_get_uint32,
250 250 {500*MS, UINT32_MAX, 10*SECONDS}, {10*SECONDS} },
251 251
252 252 { "_ipv4_ttl", MOD_PROTO_TCP,
253 253 mod_set_uint32, mod_get_uint32,
254 254 {1, 255, 64}, {64} },
255 255
256 256 { "_keepalive_interval", MOD_PROTO_TCP,
257 257 mod_set_uint32, mod_get_uint32,
258 258 {10*SECONDS, 10*DAYS, 2*HOURS}, {2*HOURS} },
259 259
260 260 { "_maxpsz_multiplier", MOD_PROTO_TCP,
261 261 mod_set_uint32, mod_get_uint32,
262 262 {0, 100, 10}, {10} },
263 263
264 264 { "_mss_def_ipv4", MOD_PROTO_TCP,
265 265 mod_set_uint32, mod_get_uint32,
266 266 {1, TCP_MSS_MAX_IPV4, 536}, {536} },
267 267
268 268 { "_mss_max_ipv4", MOD_PROTO_TCP,
269 269 mod_set_uint32, mod_get_uint32,
270 270 {1, TCP_MSS_MAX_IPV4, TCP_MSS_MAX_IPV4},
271 271 {TCP_MSS_MAX_IPV4} },
272 272
273 273 { "_mss_min", MOD_PROTO_TCP,
274 274 mod_set_uint32, mod_get_uint32,
275 275 {1, TCP_MSS_MAX, 108}, {108} },
276 276
277 277 { "_naglim_def", MOD_PROTO_TCP,
278 278 mod_set_uint32, mod_get_uint32,
279 279 {1, (64*1024)-1, (4*1024)-1}, {(4*1024)-1} },
280 280
281 281 /* tunable - 20 */
282 282 { "_rexmit_interval_initial", MOD_PROTO_TCP,
283 283 mod_set_uint32, mod_get_uint32,
284 284 {1*MS, 20*SECONDS, 1*SECONDS}, {1*SECONDS} },
285 285
286 286 { "_rexmit_interval_max", MOD_PROTO_TCP,
287 287 mod_set_uint32, mod_get_uint32,
288 288 {1*MS, 2*HOURS, 60*SECONDS}, {60*SECONDS} },
289 289
290 290 { "_rexmit_interval_min", MOD_PROTO_TCP,
291 291 mod_set_uint32, mod_get_uint32,
292 292 {1*MS, 2*HOURS, 400*MS}, {400*MS} },
293 293
294 294 { "_deferred_ack_interval", MOD_PROTO_TCP,
295 295 mod_set_uint32, mod_get_uint32,
296 296 {1*MS, 1*MINUTES, 100*MS}, {100*MS} },
297 297
298 298 { "_snd_lowat_fraction", MOD_PROTO_TCP,
299 299 mod_set_uint32, mod_get_uint32,
300 300 {0, 16, 0}, {0} },
301 301
302 302 { "_dupack_fast_retransmit", MOD_PROTO_TCP,
303 303 mod_set_uint32, mod_get_uint32,
304 304 {1, 10000, 3}, {3} },
305 305
306 306 { "_ignore_path_mtu", MOD_PROTO_TCP,
307 307 mod_set_boolean, mod_get_boolean,
308 308 {B_FALSE}, {B_FALSE} },
309 309
310 310 { "smallest_anon_port", MOD_PROTO_TCP,
311 311 mod_set_uint32, mod_get_uint32,
312 312 {1024, ULP_MAX_PORT, 32*1024}, {32*1024} },
313 313
314 314 { "largest_anon_port", MOD_PROTO_TCP,
315 315 mod_set_uint32, mod_get_uint32,
316 316 {1024, ULP_MAX_PORT, ULP_MAX_PORT},
317 317 {ULP_MAX_PORT} },
318 318
319 319 { "send_maxbuf", MOD_PROTO_TCP,
320 320 mod_set_uint32, mod_get_uint32,
321 321 {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_HIWATER},
322 322 {TCP_XMIT_HIWATER} },
323 323
324 324 /* tunable - 30 */
325 325 { "_xmit_lowat", MOD_PROTO_TCP,
326 326 mod_set_uint32, mod_get_uint32,
327 327 {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_LOWATER},
328 328 {TCP_XMIT_LOWATER} },
329 329
330 330 { "recv_maxbuf", MOD_PROTO_TCP,
331 331 mod_set_uint32, mod_get_uint32,
332 332 {TCP_RECV_LOWATER, (1<<30), TCP_RECV_HIWATER},
333 333 {TCP_RECV_HIWATER} },
334 334
335 335 { "_recv_hiwat_minmss", MOD_PROTO_TCP,
336 336 mod_set_uint32, mod_get_uint32,
337 337 {1, 65536, 4}, {4} },
338 338
339 339 { "_fin_wait_2_flush_interval", MOD_PROTO_TCP,
340 340 mod_set_uint32, mod_get_uint32,
341 341 {1*SECONDS, 2*HOURS, 60*SECONDS},
342 342 {60*SECONDS} },
343 343
344 344 { "_max_buf", MOD_PROTO_TCP,
345 345 mod_set_uint32, mod_get_uint32,
346 346 {8192, (1<<30), 1024*1024}, {1024*1024} },
347 347
348 348 /*
349 349 * Question: What default value should I set for tcp_strong_iss?
350 350 */
351 351 { "_strong_iss", MOD_PROTO_TCP,
352 352 mod_set_uint32, mod_get_uint32,
353 353 {0, 2, 1}, {1} },
354 354
355 355 { "_rtt_updates", MOD_PROTO_TCP,
356 356 mod_set_uint32, mod_get_uint32,
357 357 {0, 65536, 20}, {20} },
358 358
359 359 { "_wscale_always", MOD_PROTO_TCP,
360 360 mod_set_boolean, mod_get_boolean,
361 361 {B_TRUE}, {B_TRUE} },
362 362
363 363 { "_tstamp_always", MOD_PROTO_TCP,
364 364 mod_set_boolean, mod_get_boolean,
365 365 {B_FALSE}, {B_FALSE} },
366 366
367 367 { "_tstamp_if_wscale", MOD_PROTO_TCP,
368 368 mod_set_boolean, mod_get_boolean,
369 369 {B_TRUE}, {B_TRUE} },
370 370
371 371 /* tunable - 40 */
↓ open down ↓ |
371 lines elided |
↑ open up ↑ |
372 372 { "_rexmit_interval_extra", MOD_PROTO_TCP,
373 373 mod_set_uint32, mod_get_uint32,
374 374 {0*MS, 2*HOURS, 0*MS}, {0*MS} },
375 375
376 376 { "_deferred_acks_max", MOD_PROTO_TCP,
377 377 mod_set_uint32, mod_get_uint32,
378 378 {0, 16, 2}, {2} },
379 379
380 380 { "_slow_start_after_idle", MOD_PROTO_TCP,
381 381 mod_set_uint32, mod_get_uint32,
382 - {1, 16384, 4}, {4} },
382 + {0, 16384, 0}, {0} },
383 383
384 384 { "_slow_start_initial", MOD_PROTO_TCP,
385 385 mod_set_uint32, mod_get_uint32,
386 - {1, 4, 4}, {4} },
386 + {0, 16, 0}, {0} },
387 387
388 388 { "sack", MOD_PROTO_TCP,
389 389 mod_set_uint32, mod_get_uint32,
390 390 {0, 2, 2}, {2} },
391 391
392 392 { "_ipv6_hoplimit", MOD_PROTO_TCP,
393 393 mod_set_uint32, mod_get_uint32,
394 394 {0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS},
395 395 {IPV6_DEFAULT_HOPS} },
396 396
397 397 { "_mss_def_ipv6", MOD_PROTO_TCP,
398 398 mod_set_uint32, mod_get_uint32,
399 399 {1, TCP_MSS_MAX_IPV6, 1220}, {1220} },
400 400
401 401 { "_mss_max_ipv6", MOD_PROTO_TCP,
402 402 mod_set_uint32, mod_get_uint32,
403 403 {1, TCP_MSS_MAX_IPV6, TCP_MSS_MAX_IPV6},
404 404 {TCP_MSS_MAX_IPV6} },
405 405
406 406 { "_rev_src_routes", MOD_PROTO_TCP,
407 407 mod_set_boolean, mod_get_boolean,
408 408 {B_FALSE}, {B_FALSE} },
409 409
410 410 { "_local_dack_interval", MOD_PROTO_TCP,
411 411 mod_set_uint32, mod_get_uint32,
412 412 {10*MS, 500*MS, 50*MS}, {50*MS} },
413 413
414 414 /* tunable - 50 */
415 415 { "_local_dacks_max", MOD_PROTO_TCP,
416 416 mod_set_uint32, mod_get_uint32,
417 417 {0, 16, 8}, {8} },
418 418
419 419 { "ecn", MOD_PROTO_TCP,
420 420 mod_set_uint32, mod_get_uint32,
421 421 {0, 2, 1}, {1} },
422 422
423 423 { "_rst_sent_rate_enabled", MOD_PROTO_TCP,
424 424 mod_set_boolean, mod_get_boolean,
425 425 {B_TRUE}, {B_TRUE} },
426 426
427 427 { "_rst_sent_rate", MOD_PROTO_TCP,
428 428 mod_set_uint32, mod_get_uint32,
429 429 {0, UINT32_MAX, 40}, {40} },
430 430
431 431 { "_push_timer_interval", MOD_PROTO_TCP,
432 432 mod_set_uint32, mod_get_uint32,
433 433 {0, 100*MS, 50*MS}, {50*MS} },
434 434
435 435 { "_use_smss_as_mss_opt", MOD_PROTO_TCP,
436 436 mod_set_boolean, mod_get_boolean,
437 437 {B_FALSE}, {B_FALSE} },
438 438
439 439 { "_keepalive_abort_interval", MOD_PROTO_TCP,
440 440 mod_set_uint32, mod_get_uint32,
441 441 {0, UINT32_MAX, 8*MINUTES}, {8*MINUTES} },
442 442
443 443 /*
444 444 * tcp_wroff_xtra is the extra space in front of TCP/IP header for link
445 445 * layer header. It has to be a multiple of 8.
446 446 */
447 447 { "_wroff_xtra", MOD_PROTO_TCP,
448 448 mod_set_aligned, mod_get_uint32,
449 449 {0, 256, 32}, {32} },
450 450
451 451 { "_dev_flow_ctl", MOD_PROTO_TCP,
452 452 mod_set_boolean, mod_get_boolean,
453 453 {B_FALSE}, {B_FALSE} },
454 454
455 455 { "_reass_timeout", MOD_PROTO_TCP,
456 456 mod_set_uint32, mod_get_uint32,
457 457 {0, UINT32_MAX, 100*SECONDS}, {100*SECONDS} },
458 458
459 459 /* tunable - 60 */
460 460 { "extra_priv_ports", MOD_PROTO_TCP,
461 461 mod_set_extra_privports, mod_get_extra_privports,
462 462 {1, ULP_MAX_PORT, 0}, {0} },
463 463
464 464 { "_1948_phrase", MOD_PROTO_TCP,
465 465 tcp_set_1948phrase, NULL, {0}, {0} },
466 466
467 467 { "_listener_limit_conf", MOD_PROTO_TCP,
468 468 NULL, tcp_listener_conf_get, {0}, {0} },
469 469
470 470 { "_listener_limit_conf_add", MOD_PROTO_TCP,
471 471 tcp_listener_conf_add, NULL, {0}, {0} },
472 472
473 473 { "_listener_limit_conf_del", MOD_PROTO_TCP,
474 474 tcp_listener_conf_del, NULL, {0}, {0} },
475 475
476 476 { "_iss_incr", MOD_PROTO_TCP,
477 477 mod_set_uint32, mod_get_uint32,
478 478 {1, ISS_INCR, ISS_INCR},
479 479 {ISS_INCR} },
480 480
481 481 { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
482 482
483 483 { NULL, 0, NULL, NULL, {0}, {0} }
484 484 };
485 485
486 486 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);
↓ open down ↓ |
90 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX