1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011, Joyent Inc. All rights reserved.
24 */
25 /* Copyright (c) 1990 Mentat Inc. */
26
27 #include <inet/ip.h>
28 #include <inet/tcp_impl.h>
29 #include <sys/multidata.h>
30 #include <sys/sunddi.h>
31
32 /* Max size IP datagram is 64k - 1 */
33 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
34 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
35
36 /* Max of the above */
37 #define TCP_MSS_MAX TCP_MSS_MAX_IPV4
38
39 #define TCP_XMIT_LOWATER 4096
40 #define TCP_XMIT_HIWATER 49152
41 #define TCP_RECV_LOWATER 2048
42 #define TCP_RECV_HIWATER 128000
43
44 /*
45 * Set the RFC 1948 pass phrase
46 */
47 /* ARGSUSED */
48 static int
49 tcp_set_1948phrase(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
50 const char *ifname, const void* pr_val, uint_t flags)
51 {
52 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
53
54 if (flags & MOD_PROP_DEFAULT)
55 return (ENOTSUP);
56
57 /*
58 * Basically, value contains a new pass phrase. Pass it along!
59 */
60 tcp_iss_key_init((uint8_t *)pr_val, strlen(pr_val), tcps);
61 return (0);
62 }
63
64 /*
65 * returns the current list of listener limit configuration.
66 */
67 /* ARGSUSED */
68 static int
69 tcp_listener_conf_get(void *cbarg, mod_prop_info_t *pinfo, const char *ifname,
70 void *val, uint_t psize, uint_t flags)
71 {
72 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
73 tcp_listener_t *tl;
74 char *pval = val;
75 size_t nbytes = 0, tbytes = 0;
76 uint_t size;
77 int err = 0;
78
79 bzero(pval, psize);
80 size = psize;
81
82 if (flags & (MOD_PROP_DEFAULT|MOD_PROP_PERM|MOD_PROP_POSSIBLE))
83 return (0);
84
85 mutex_enter(&tcps->tcps_listener_conf_lock);
86 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
87 tl = list_next(&tcps->tcps_listener_conf, tl)) {
88 if (psize == size)
89 nbytes = snprintf(pval, size, "%d:%d", tl->tl_port,
90 tl->tl_ratio);
91 else
92 nbytes = snprintf(pval, size, ",%d:%d", tl->tl_port,
93 tl->tl_ratio);
94 size -= nbytes;
95 pval += nbytes;
96 tbytes += nbytes;
97 if (tbytes >= psize) {
98 /* Buffer overflow, stop copying information */
99 err = ENOBUFS;
100 break;
101 }
102 }
103
104 mutex_exit(&tcps->tcps_listener_conf_lock);
105 return (err);
106 }
107
108 /*
109 * add a new listener limit configuration.
110 */
111 /* ARGSUSED */
112 static int
113 tcp_listener_conf_add(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
114 const char *ifname, const void* pval, uint_t flags)
115 {
116 tcp_listener_t *new_tl;
117 tcp_listener_t *tl;
118 long lport;
119 long ratio;
120 char *colon;
121 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
122
123 if (flags & MOD_PROP_DEFAULT)
124 return (ENOTSUP);
125
126 if (ddi_strtol(pval, &colon, 10, &lport) != 0 || lport <= 0 ||
127 lport > USHRT_MAX || *colon != ':') {
128 return (EINVAL);
129 }
130 if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0)
131 return (EINVAL);
132
133 mutex_enter(&tcps->tcps_listener_conf_lock);
134 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
135 tl = list_next(&tcps->tcps_listener_conf, tl)) {
136 /* There is an existing entry, so update its ratio value. */
137 if (tl->tl_port == lport) {
138 tl->tl_ratio = ratio;
139 mutex_exit(&tcps->tcps_listener_conf_lock);
140 return (0);
141 }
142 }
143
144 if ((new_tl = kmem_alloc(sizeof (tcp_listener_t), KM_NOSLEEP)) ==
145 NULL) {
146 mutex_exit(&tcps->tcps_listener_conf_lock);
147 return (ENOMEM);
148 }
149
150 new_tl->tl_port = lport;
151 new_tl->tl_ratio = ratio;
152 list_insert_tail(&tcps->tcps_listener_conf, new_tl);
153 mutex_exit(&tcps->tcps_listener_conf_lock);
154 return (0);
155 }
156
157 /*
158 * remove a listener limit configuration.
159 */
160 /* ARGSUSED */
161 static int
162 tcp_listener_conf_del(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
163 const char *ifname, const void* pval, uint_t flags)
164 {
165 tcp_listener_t *tl;
166 long lport;
167 tcp_stack_t *tcps = (tcp_stack_t *)cbarg;
168
169 if (flags & MOD_PROP_DEFAULT)
170 return (ENOTSUP);
171
172 if (ddi_strtol(pval, NULL, 10, &lport) != 0 || lport <= 0 ||
173 lport > USHRT_MAX) {
174 return (EINVAL);
175 }
176 mutex_enter(&tcps->tcps_listener_conf_lock);
177 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
178 tl = list_next(&tcps->tcps_listener_conf, tl)) {
179 if (tl->tl_port == lport) {
180 list_remove(&tcps->tcps_listener_conf, tl);
181 mutex_exit(&tcps->tcps_listener_conf_lock);
182 kmem_free(tl, sizeof (tcp_listener_t));
183 return (0);
184 }
185 }
186 mutex_exit(&tcps->tcps_listener_conf_lock);
187 return (ESRCH);
188 }
189
190 /*
191 * All of these are alterable, within the min/max values given, at run time.
192 *
193 * Note: All those tunables which do not start with "_" are Committed and
194 * therefore are public. See PSARC 2010/080.
195 */
196 mod_prop_info_t tcp_propinfo_tbl[] = {
197 /* tunable - 0 */
198 { "_time_wait_interval", MOD_PROTO_TCP,
199 mod_set_uint32, mod_get_uint32,
200 {1*SECONDS, 10*MINUTES, 1*MINUTES}, {1*MINUTES} },
201
202 { "_conn_req_max_q", MOD_PROTO_TCP,
203 mod_set_uint32, mod_get_uint32,
204 {1, UINT32_MAX, 128}, {128} },
205
206 { "_conn_req_max_q0", MOD_PROTO_TCP,
207 mod_set_uint32, mod_get_uint32,
208 {0, UINT32_MAX, 1024}, {1024} },
209
210 { "_conn_req_min", MOD_PROTO_TCP,
211 mod_set_uint32, mod_get_uint32,
212 {1, 1024, 1}, {1} },
213
214 { "_conn_grace_period", MOD_PROTO_TCP,
215 mod_set_uint32, mod_get_uint32,
216 {0*MS, 20*SECONDS, 0*MS}, {0*MS} },
217
218 { "_cwnd_max", MOD_PROTO_TCP,
219 mod_set_uint32, mod_get_uint32,
220 {128, (1<<30), 1024*1024}, {1024*1024} },
221
222 { "_debug", MOD_PROTO_TCP,
223 mod_set_uint32, mod_get_uint32,
224 {0, 10, 0}, {0} },
225
226 { "smallest_nonpriv_port", MOD_PROTO_TCP,
227 mod_set_uint32, mod_get_uint32,
228 {1024, (32*1024), 1024}, {1024} },
229
230 { "_ip_abort_cinterval", MOD_PROTO_TCP,
231 mod_set_uint32, mod_get_uint32,
232 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
233
234 { "_ip_abort_linterval", MOD_PROTO_TCP,
235 mod_set_uint32, mod_get_uint32,
236 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
237
238 /* tunable - 10 */
239 { "_ip_abort_interval", MOD_PROTO_TCP,
240 mod_set_uint32, mod_get_uint32,
241 {500*MS, UINT32_MAX, 5*MINUTES}, {5*MINUTES} },
242
243 { "_ip_notify_cinterval", MOD_PROTO_TCP,
244 mod_set_uint32, mod_get_uint32,
245 {1*SECONDS, UINT32_MAX, 10*SECONDS},
246 {10*SECONDS} },
247
248 { "_ip_notify_interval", MOD_PROTO_TCP,
249 mod_set_uint32, mod_get_uint32,
250 {500*MS, UINT32_MAX, 10*SECONDS}, {10*SECONDS} },
251
252 { "_ipv4_ttl", MOD_PROTO_TCP,
253 mod_set_uint32, mod_get_uint32,
254 {1, 255, 64}, {64} },
255
256 { "_keepalive_interval", MOD_PROTO_TCP,
257 mod_set_uint32, mod_get_uint32,
258 {10*SECONDS, 10*DAYS, 2*HOURS}, {2*HOURS} },
259
260 { "_maxpsz_multiplier", MOD_PROTO_TCP,
261 mod_set_uint32, mod_get_uint32,
262 {0, 100, 10}, {10} },
263
264 { "_mss_def_ipv4", MOD_PROTO_TCP,
265 mod_set_uint32, mod_get_uint32,
266 {1, TCP_MSS_MAX_IPV4, 536}, {536} },
267
268 { "_mss_max_ipv4", MOD_PROTO_TCP,
269 mod_set_uint32, mod_get_uint32,
270 {1, TCP_MSS_MAX_IPV4, TCP_MSS_MAX_IPV4},
271 {TCP_MSS_MAX_IPV4} },
272
273 { "_mss_min", MOD_PROTO_TCP,
274 mod_set_uint32, mod_get_uint32,
275 {1, TCP_MSS_MAX, 108}, {108} },
276
277 { "_naglim_def", MOD_PROTO_TCP,
278 mod_set_uint32, mod_get_uint32,
279 {1, (64*1024)-1, (4*1024)-1}, {(4*1024)-1} },
280
281 /* tunable - 20 */
282 { "_rexmit_interval_initial", MOD_PROTO_TCP,
283 mod_set_uint32, mod_get_uint32,
284 {1*MS, 20*SECONDS, 1*SECONDS}, {1*SECONDS} },
285
286 { "_rexmit_interval_max", MOD_PROTO_TCP,
287 mod_set_uint32, mod_get_uint32,
288 {1*MS, 2*HOURS, 60*SECONDS}, {60*SECONDS} },
289
290 { "_rexmit_interval_min", MOD_PROTO_TCP,
291 mod_set_uint32, mod_get_uint32,
292 {1*MS, 2*HOURS, 400*MS}, {400*MS} },
293
294 { "_deferred_ack_interval", MOD_PROTO_TCP,
295 mod_set_uint32, mod_get_uint32,
296 {1*MS, 1*MINUTES, 100*MS}, {100*MS} },
297
298 { "_snd_lowat_fraction", MOD_PROTO_TCP,
299 mod_set_uint32, mod_get_uint32,
300 {0, 16, 0}, {0} },
301
302 { "_dupack_fast_retransmit", MOD_PROTO_TCP,
303 mod_set_uint32, mod_get_uint32,
304 {1, 10000, 3}, {3} },
305
306 { "_ignore_path_mtu", MOD_PROTO_TCP,
307 mod_set_boolean, mod_get_boolean,
308 {B_FALSE}, {B_FALSE} },
309
310 { "smallest_anon_port", MOD_PROTO_TCP,
311 mod_set_uint32, mod_get_uint32,
312 {1024, ULP_MAX_PORT, 32*1024}, {32*1024} },
313
314 { "largest_anon_port", MOD_PROTO_TCP,
315 mod_set_uint32, mod_get_uint32,
316 {1024, ULP_MAX_PORT, ULP_MAX_PORT},
317 {ULP_MAX_PORT} },
318
319 { "send_maxbuf", MOD_PROTO_TCP,
320 mod_set_uint32, mod_get_uint32,
321 {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_HIWATER},
322 {TCP_XMIT_HIWATER} },
323
324 /* tunable - 30 */
325 { "_xmit_lowat", MOD_PROTO_TCP,
326 mod_set_uint32, mod_get_uint32,
327 {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_LOWATER},
328 {TCP_XMIT_LOWATER} },
329
330 { "recv_maxbuf", MOD_PROTO_TCP,
331 mod_set_uint32, mod_get_uint32,
332 {TCP_RECV_LOWATER, (1<<30), TCP_RECV_HIWATER},
333 {TCP_RECV_HIWATER} },
334
335 { "_recv_hiwat_minmss", MOD_PROTO_TCP,
336 mod_set_uint32, mod_get_uint32,
337 {1, 65536, 4}, {4} },
338
339 { "_fin_wait_2_flush_interval", MOD_PROTO_TCP,
340 mod_set_uint32, mod_get_uint32,
341 {1*SECONDS, 2*HOURS, 60*SECONDS},
342 {60*SECONDS} },
343
344 { "_max_buf", MOD_PROTO_TCP,
345 mod_set_uint32, mod_get_uint32,
346 {8192, (1<<30), 1024*1024}, {1024*1024} },
347
348 /*
349 * Question: What default value should I set for tcp_strong_iss?
350 */
351 { "_strong_iss", MOD_PROTO_TCP,
352 mod_set_uint32, mod_get_uint32,
353 {0, 2, 1}, {1} },
354
355 { "_rtt_updates", MOD_PROTO_TCP,
356 mod_set_uint32, mod_get_uint32,
357 {0, 65536, 20}, {20} },
358
359 { "_wscale_always", MOD_PROTO_TCP,
360 mod_set_boolean, mod_get_boolean,
361 {B_TRUE}, {B_TRUE} },
362
363 { "_tstamp_always", MOD_PROTO_TCP,
364 mod_set_boolean, mod_get_boolean,
365 {B_FALSE}, {B_FALSE} },
366
367 { "_tstamp_if_wscale", MOD_PROTO_TCP,
368 mod_set_boolean, mod_get_boolean,
369 {B_TRUE}, {B_TRUE} },
370
371 /* tunable - 40 */
372 { "_rexmit_interval_extra", MOD_PROTO_TCP,
373 mod_set_uint32, mod_get_uint32,
374 {0*MS, 2*HOURS, 0*MS}, {0*MS} },
375
376 { "_deferred_acks_max", MOD_PROTO_TCP,
377 mod_set_uint32, mod_get_uint32,
378 {0, 16, 2}, {2} },
379
380 { "_slow_start_after_idle", MOD_PROTO_TCP,
381 mod_set_uint32, mod_get_uint32,
382 {1, 16384, 4}, {4} },
383
384 { "_slow_start_initial", MOD_PROTO_TCP,
385 mod_set_uint32, mod_get_uint32,
386 {1, 4, 4}, {4} },
387
388 { "sack", MOD_PROTO_TCP,
389 mod_set_uint32, mod_get_uint32,
390 {0, 2, 2}, {2} },
391
392 { "_ipv6_hoplimit", MOD_PROTO_TCP,
393 mod_set_uint32, mod_get_uint32,
394 {0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS},
395 {IPV6_DEFAULT_HOPS} },
396
397 { "_mss_def_ipv6", MOD_PROTO_TCP,
398 mod_set_uint32, mod_get_uint32,
399 {1, TCP_MSS_MAX_IPV6, 1220}, {1220} },
400
401 { "_mss_max_ipv6", MOD_PROTO_TCP,
402 mod_set_uint32, mod_get_uint32,
403 {1, TCP_MSS_MAX_IPV6, TCP_MSS_MAX_IPV6},
404 {TCP_MSS_MAX_IPV6} },
405
406 { "_rev_src_routes", MOD_PROTO_TCP,
407 mod_set_boolean, mod_get_boolean,
408 {B_FALSE}, {B_FALSE} },
409
410 { "_local_dack_interval", MOD_PROTO_TCP,
411 mod_set_uint32, mod_get_uint32,
412 {10*MS, 500*MS, 50*MS}, {50*MS} },
413
414 /* tunable - 50 */
415 { "_local_dacks_max", MOD_PROTO_TCP,
416 mod_set_uint32, mod_get_uint32,
417 {0, 16, 8}, {8} },
418
419 { "ecn", MOD_PROTO_TCP,
420 mod_set_uint32, mod_get_uint32,
421 {0, 2, 1}, {1} },
422
423 { "_rst_sent_rate_enabled", MOD_PROTO_TCP,
424 mod_set_boolean, mod_get_boolean,
425 {B_TRUE}, {B_TRUE} },
426
427 { "_rst_sent_rate", MOD_PROTO_TCP,
428 mod_set_uint32, mod_get_uint32,
429 {0, UINT32_MAX, 40}, {40} },
430
431 { "_push_timer_interval", MOD_PROTO_TCP,
432 mod_set_uint32, mod_get_uint32,
433 {0, 100*MS, 50*MS}, {50*MS} },
434
435 { "_use_smss_as_mss_opt", MOD_PROTO_TCP,
436 mod_set_boolean, mod_get_boolean,
437 {B_FALSE}, {B_FALSE} },
438
439 { "_keepalive_abort_interval", MOD_PROTO_TCP,
440 mod_set_uint32, mod_get_uint32,
441 {0, UINT32_MAX, 8*MINUTES}, {8*MINUTES} },
442
443 /*
444 * tcp_wroff_xtra is the extra space in front of TCP/IP header for link
445 * layer header. It has to be a multiple of 8.
446 */
447 { "_wroff_xtra", MOD_PROTO_TCP,
448 mod_set_aligned, mod_get_uint32,
449 {0, 256, 32}, {32} },
450
451 { "_dev_flow_ctl", MOD_PROTO_TCP,
452 mod_set_boolean, mod_get_boolean,
453 {B_FALSE}, {B_FALSE} },
454
455 { "_reass_timeout", MOD_PROTO_TCP,
456 mod_set_uint32, mod_get_uint32,
457 {0, UINT32_MAX, 100*SECONDS}, {100*SECONDS} },
458
459 /* tunable - 60 */
460 { "extra_priv_ports", MOD_PROTO_TCP,
461 mod_set_extra_privports, mod_get_extra_privports,
462 {1, ULP_MAX_PORT, 0}, {0} },
463
464 { "_1948_phrase", MOD_PROTO_TCP,
465 tcp_set_1948phrase, NULL, {0}, {0} },
466
467 { "_listener_limit_conf", MOD_PROTO_TCP,
468 NULL, tcp_listener_conf_get, {0}, {0} },
469
470 { "_listener_limit_conf_add", MOD_PROTO_TCP,
471 tcp_listener_conf_add, NULL, {0}, {0} },
472
473 { "_listener_limit_conf_del", MOD_PROTO_TCP,
474 tcp_listener_conf_del, NULL, {0}, {0} },
475
476 { "_iss_incr", MOD_PROTO_TCP,
477 mod_set_uint32, mod_get_uint32,
478 {1, ISS_INCR, ISS_INCR},
479 {ISS_INCR} },
480
481 { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
482
483 { NULL, 0, NULL, NULL, {0}, {0} }
484 };
485
486 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);