1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011, Joyent Inc. All rights reserved. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <inet/ip.h> 28 #include <inet/tcp_impl.h> 29 #include <sys/multidata.h> 30 #include <sys/sunddi.h> 31 32 /* Max size IP datagram is 64k - 1 */ 33 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t))) 34 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t))) 35 36 /* Max of the above */ 37 #define TCP_MSS_MAX TCP_MSS_MAX_IPV4 38 39 #define TCP_XMIT_LOWATER 4096 40 #define TCP_XMIT_HIWATER 49152 41 #define TCP_RECV_LOWATER 2048 42 #define TCP_RECV_HIWATER 128000 43 44 /* 45 * Set the RFC 1948 pass phrase 46 */ 47 /* ARGSUSED */ 48 static int 49 tcp_set_1948phrase(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo, 50 const char *ifname, const void* pr_val, uint_t flags) 51 { 52 tcp_stack_t *tcps = (tcp_stack_t *)cbarg; 53 54 if (flags & MOD_PROP_DEFAULT) 55 return (ENOTSUP); 56 57 /* 58 * Basically, value contains a new pass phrase. Pass it along! 59 */ 60 tcp_iss_key_init((uint8_t *)pr_val, strlen(pr_val), tcps); 61 return (0); 62 } 63 64 /* 65 * returns the current list of listener limit configuration. 66 */ 67 /* ARGSUSED */ 68 static int 69 tcp_listener_conf_get(void *cbarg, mod_prop_info_t *pinfo, const char *ifname, 70 void *val, uint_t psize, uint_t flags) 71 { 72 tcp_stack_t *tcps = (tcp_stack_t *)cbarg; 73 tcp_listener_t *tl; 74 char *pval = val; 75 size_t nbytes = 0, tbytes = 0; 76 uint_t size; 77 int err = 0; 78 79 bzero(pval, psize); 80 size = psize; 81 82 if (flags & (MOD_PROP_DEFAULT|MOD_PROP_PERM|MOD_PROP_POSSIBLE)) 83 return (0); 84 85 mutex_enter(&tcps->tcps_listener_conf_lock); 86 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL; 87 tl = list_next(&tcps->tcps_listener_conf, tl)) { 88 if (psize == size) 89 nbytes = snprintf(pval, size, "%d:%d", tl->tl_port, 90 tl->tl_ratio); 91 else 92 nbytes = snprintf(pval, size, ",%d:%d", tl->tl_port, 93 tl->tl_ratio); 94 size -= nbytes; 95 pval += nbytes; 96 tbytes += nbytes; 97 if (tbytes >= psize) { 98 /* Buffer overflow, stop copying information */ 99 err = ENOBUFS; 100 break; 101 } 102 } 103 104 mutex_exit(&tcps->tcps_listener_conf_lock); 105 return (err); 106 } 107 108 /* 109 * add a new listener limit configuration. 110 */ 111 /* ARGSUSED */ 112 static int 113 tcp_listener_conf_add(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo, 114 const char *ifname, const void* pval, uint_t flags) 115 { 116 tcp_listener_t *new_tl; 117 tcp_listener_t *tl; 118 long lport; 119 long ratio; 120 char *colon; 121 tcp_stack_t *tcps = (tcp_stack_t *)cbarg; 122 123 if (flags & MOD_PROP_DEFAULT) 124 return (ENOTSUP); 125 126 if (ddi_strtol(pval, &colon, 10, &lport) != 0 || lport <= 0 || 127 lport > USHRT_MAX || *colon != ':') { 128 return (EINVAL); 129 } 130 if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0) 131 return (EINVAL); 132 133 mutex_enter(&tcps->tcps_listener_conf_lock); 134 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL; 135 tl = list_next(&tcps->tcps_listener_conf, tl)) { 136 /* There is an existing entry, so update its ratio value. */ 137 if (tl->tl_port == lport) { 138 tl->tl_ratio = ratio; 139 mutex_exit(&tcps->tcps_listener_conf_lock); 140 return (0); 141 } 142 } 143 144 if ((new_tl = kmem_alloc(sizeof (tcp_listener_t), KM_NOSLEEP)) == 145 NULL) { 146 mutex_exit(&tcps->tcps_listener_conf_lock); 147 return (ENOMEM); 148 } 149 150 new_tl->tl_port = lport; 151 new_tl->tl_ratio = ratio; 152 list_insert_tail(&tcps->tcps_listener_conf, new_tl); 153 mutex_exit(&tcps->tcps_listener_conf_lock); 154 return (0); 155 } 156 157 /* 158 * remove a listener limit configuration. 159 */ 160 /* ARGSUSED */ 161 static int 162 tcp_listener_conf_del(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo, 163 const char *ifname, const void* pval, uint_t flags) 164 { 165 tcp_listener_t *tl; 166 long lport; 167 tcp_stack_t *tcps = (tcp_stack_t *)cbarg; 168 169 if (flags & MOD_PROP_DEFAULT) 170 return (ENOTSUP); 171 172 if (ddi_strtol(pval, NULL, 10, &lport) != 0 || lport <= 0 || 173 lport > USHRT_MAX) { 174 return (EINVAL); 175 } 176 mutex_enter(&tcps->tcps_listener_conf_lock); 177 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL; 178 tl = list_next(&tcps->tcps_listener_conf, tl)) { 179 if (tl->tl_port == lport) { 180 list_remove(&tcps->tcps_listener_conf, tl); 181 mutex_exit(&tcps->tcps_listener_conf_lock); 182 kmem_free(tl, sizeof (tcp_listener_t)); 183 return (0); 184 } 185 } 186 mutex_exit(&tcps->tcps_listener_conf_lock); 187 return (ESRCH); 188 } 189 190 /* 191 * All of these are alterable, within the min/max values given, at run time. 192 * 193 * Note: All those tunables which do not start with "_" are Committed and 194 * therefore are public. See PSARC 2010/080. 195 */ 196 mod_prop_info_t tcp_propinfo_tbl[] = { 197 /* tunable - 0 */ 198 { "_time_wait_interval", MOD_PROTO_TCP, 199 mod_set_uint32, mod_get_uint32, 200 {1*SECONDS, 10*MINUTES, 1*MINUTES}, {1*MINUTES} }, 201 202 { "_conn_req_max_q", MOD_PROTO_TCP, 203 mod_set_uint32, mod_get_uint32, 204 {1, UINT32_MAX, 128}, {128} }, 205 206 { "_conn_req_max_q0", MOD_PROTO_TCP, 207 mod_set_uint32, mod_get_uint32, 208 {0, UINT32_MAX, 1024}, {1024} }, 209 210 { "_conn_req_min", MOD_PROTO_TCP, 211 mod_set_uint32, mod_get_uint32, 212 {1, 1024, 1}, {1} }, 213 214 { "_conn_grace_period", MOD_PROTO_TCP, 215 mod_set_uint32, mod_get_uint32, 216 {0*MS, 20*SECONDS, 0*MS}, {0*MS} }, 217 218 { "_cwnd_max", MOD_PROTO_TCP, 219 mod_set_uint32, mod_get_uint32, 220 {128, (1<<30), 1024*1024}, {1024*1024} }, 221 222 { "_debug", MOD_PROTO_TCP, 223 mod_set_uint32, mod_get_uint32, 224 {0, 10, 0}, {0} }, 225 226 { "smallest_nonpriv_port", MOD_PROTO_TCP, 227 mod_set_uint32, mod_get_uint32, 228 {1024, (32*1024), 1024}, {1024} }, 229 230 { "_ip_abort_cinterval", MOD_PROTO_TCP, 231 mod_set_uint32, mod_get_uint32, 232 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} }, 233 234 { "_ip_abort_linterval", MOD_PROTO_TCP, 235 mod_set_uint32, mod_get_uint32, 236 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} }, 237 238 /* tunable - 10 */ 239 { "_ip_abort_interval", MOD_PROTO_TCP, 240 mod_set_uint32, mod_get_uint32, 241 {500*MS, UINT32_MAX, 5*MINUTES}, {5*MINUTES} }, 242 243 { "_ip_notify_cinterval", MOD_PROTO_TCP, 244 mod_set_uint32, mod_get_uint32, 245 {1*SECONDS, UINT32_MAX, 10*SECONDS}, 246 {10*SECONDS} }, 247 248 { "_ip_notify_interval", MOD_PROTO_TCP, 249 mod_set_uint32, mod_get_uint32, 250 {500*MS, UINT32_MAX, 10*SECONDS}, {10*SECONDS} }, 251 252 { "_ipv4_ttl", MOD_PROTO_TCP, 253 mod_set_uint32, mod_get_uint32, 254 {1, 255, 64}, {64} }, 255 256 { "_keepalive_interval", MOD_PROTO_TCP, 257 mod_set_uint32, mod_get_uint32, 258 {10*SECONDS, 10*DAYS, 2*HOURS}, {2*HOURS} }, 259 260 { "_maxpsz_multiplier", MOD_PROTO_TCP, 261 mod_set_uint32, mod_get_uint32, 262 {0, 100, 10}, {10} }, 263 264 { "_mss_def_ipv4", MOD_PROTO_TCP, 265 mod_set_uint32, mod_get_uint32, 266 {1, TCP_MSS_MAX_IPV4, 536}, {536} }, 267 268 { "_mss_max_ipv4", MOD_PROTO_TCP, 269 mod_set_uint32, mod_get_uint32, 270 {1, TCP_MSS_MAX_IPV4, TCP_MSS_MAX_IPV4}, 271 {TCP_MSS_MAX_IPV4} }, 272 273 { "_mss_min", MOD_PROTO_TCP, 274 mod_set_uint32, mod_get_uint32, 275 {1, TCP_MSS_MAX, 108}, {108} }, 276 277 { "_naglim_def", MOD_PROTO_TCP, 278 mod_set_uint32, mod_get_uint32, 279 {1, (64*1024)-1, (4*1024)-1}, {(4*1024)-1} }, 280 281 /* tunable - 20 */ 282 { "_rexmit_interval_initial", MOD_PROTO_TCP, 283 mod_set_uint32, mod_get_uint32, 284 {1*MS, 20*SECONDS, 1*SECONDS}, {1*SECONDS} }, 285 286 { "_rexmit_interval_max", MOD_PROTO_TCP, 287 mod_set_uint32, mod_get_uint32, 288 {1*MS, 2*HOURS, 60*SECONDS}, {60*SECONDS} }, 289 290 { "_rexmit_interval_min", MOD_PROTO_TCP, 291 mod_set_uint32, mod_get_uint32, 292 {1*MS, 2*HOURS, 400*MS}, {400*MS} }, 293 294 { "_deferred_ack_interval", MOD_PROTO_TCP, 295 mod_set_uint32, mod_get_uint32, 296 {1*MS, 1*MINUTES, 100*MS}, {100*MS} }, 297 298 { "_snd_lowat_fraction", MOD_PROTO_TCP, 299 mod_set_uint32, mod_get_uint32, 300 {0, 16, 0}, {0} }, 301 302 { "_dupack_fast_retransmit", MOD_PROTO_TCP, 303 mod_set_uint32, mod_get_uint32, 304 {1, 10000, 3}, {3} }, 305 306 { "_ignore_path_mtu", MOD_PROTO_TCP, 307 mod_set_boolean, mod_get_boolean, 308 {B_FALSE}, {B_FALSE} }, 309 310 { "smallest_anon_port", MOD_PROTO_TCP, 311 mod_set_uint32, mod_get_uint32, 312 {1024, ULP_MAX_PORT, 32*1024}, {32*1024} }, 313 314 { "largest_anon_port", MOD_PROTO_TCP, 315 mod_set_uint32, mod_get_uint32, 316 {1024, ULP_MAX_PORT, ULP_MAX_PORT}, 317 {ULP_MAX_PORT} }, 318 319 { "send_maxbuf", MOD_PROTO_TCP, 320 mod_set_uint32, mod_get_uint32, 321 {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_HIWATER}, 322 {TCP_XMIT_HIWATER} }, 323 324 /* tunable - 30 */ 325 { "_xmit_lowat", MOD_PROTO_TCP, 326 mod_set_uint32, mod_get_uint32, 327 {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_LOWATER}, 328 {TCP_XMIT_LOWATER} }, 329 330 { "recv_maxbuf", MOD_PROTO_TCP, 331 mod_set_uint32, mod_get_uint32, 332 {TCP_RECV_LOWATER, (1<<30), TCP_RECV_HIWATER}, 333 {TCP_RECV_HIWATER} }, 334 335 { "_recv_hiwat_minmss", MOD_PROTO_TCP, 336 mod_set_uint32, mod_get_uint32, 337 {1, 65536, 4}, {4} }, 338 339 { "_fin_wait_2_flush_interval", MOD_PROTO_TCP, 340 mod_set_uint32, mod_get_uint32, 341 {1*SECONDS, 2*HOURS, 60*SECONDS}, 342 {60*SECONDS} }, 343 344 { "_max_buf", MOD_PROTO_TCP, 345 mod_set_uint32, mod_get_uint32, 346 {8192, (1<<30), 1024*1024}, {1024*1024} }, 347 348 /* 349 * Question: What default value should I set for tcp_strong_iss? 350 */ 351 { "_strong_iss", MOD_PROTO_TCP, 352 mod_set_uint32, mod_get_uint32, 353 {0, 2, 1}, {1} }, 354 355 { "_rtt_updates", MOD_PROTO_TCP, 356 mod_set_uint32, mod_get_uint32, 357 {0, 65536, 20}, {20} }, 358 359 { "_wscale_always", MOD_PROTO_TCP, 360 mod_set_boolean, mod_get_boolean, 361 {B_TRUE}, {B_TRUE} }, 362 363 { "_tstamp_always", MOD_PROTO_TCP, 364 mod_set_boolean, mod_get_boolean, 365 {B_FALSE}, {B_FALSE} }, 366 367 { "_tstamp_if_wscale", MOD_PROTO_TCP, 368 mod_set_boolean, mod_get_boolean, 369 {B_TRUE}, {B_TRUE} }, 370 371 /* tunable - 40 */ 372 { "_rexmit_interval_extra", MOD_PROTO_TCP, 373 mod_set_uint32, mod_get_uint32, 374 {0*MS, 2*HOURS, 0*MS}, {0*MS} }, 375 376 { "_deferred_acks_max", MOD_PROTO_TCP, 377 mod_set_uint32, mod_get_uint32, 378 {0, 16, 2}, {2} }, 379 380 { "_slow_start_after_idle", MOD_PROTO_TCP, 381 mod_set_uint32, mod_get_uint32, 382 {1, 16384, 4}, {4} }, 383 384 { "_slow_start_initial", MOD_PROTO_TCP, 385 mod_set_uint32, mod_get_uint32, 386 {1, 4, 4}, {4} }, 387 388 { "sack", MOD_PROTO_TCP, 389 mod_set_uint32, mod_get_uint32, 390 {0, 2, 2}, {2} }, 391 392 { "_ipv6_hoplimit", MOD_PROTO_TCP, 393 mod_set_uint32, mod_get_uint32, 394 {0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS}, 395 {IPV6_DEFAULT_HOPS} }, 396 397 { "_mss_def_ipv6", MOD_PROTO_TCP, 398 mod_set_uint32, mod_get_uint32, 399 {1, TCP_MSS_MAX_IPV6, 1220}, {1220} }, 400 401 { "_mss_max_ipv6", MOD_PROTO_TCP, 402 mod_set_uint32, mod_get_uint32, 403 {1, TCP_MSS_MAX_IPV6, TCP_MSS_MAX_IPV6}, 404 {TCP_MSS_MAX_IPV6} }, 405 406 { "_rev_src_routes", MOD_PROTO_TCP, 407 mod_set_boolean, mod_get_boolean, 408 {B_FALSE}, {B_FALSE} }, 409 410 { "_local_dack_interval", MOD_PROTO_TCP, 411 mod_set_uint32, mod_get_uint32, 412 {10*MS, 500*MS, 50*MS}, {50*MS} }, 413 414 /* tunable - 50 */ 415 { "_local_dacks_max", MOD_PROTO_TCP, 416 mod_set_uint32, mod_get_uint32, 417 {0, 16, 8}, {8} }, 418 419 { "ecn", MOD_PROTO_TCP, 420 mod_set_uint32, mod_get_uint32, 421 {0, 2, 1}, {1} }, 422 423 { "_rst_sent_rate_enabled", MOD_PROTO_TCP, 424 mod_set_boolean, mod_get_boolean, 425 {B_TRUE}, {B_TRUE} }, 426 427 { "_rst_sent_rate", MOD_PROTO_TCP, 428 mod_set_uint32, mod_get_uint32, 429 {0, UINT32_MAX, 40}, {40} }, 430 431 { "_push_timer_interval", MOD_PROTO_TCP, 432 mod_set_uint32, mod_get_uint32, 433 {0, 100*MS, 50*MS}, {50*MS} }, 434 435 { "_use_smss_as_mss_opt", MOD_PROTO_TCP, 436 mod_set_boolean, mod_get_boolean, 437 {B_FALSE}, {B_FALSE} }, 438 439 { "_keepalive_abort_interval", MOD_PROTO_TCP, 440 mod_set_uint32, mod_get_uint32, 441 {0, UINT32_MAX, 8*MINUTES}, {8*MINUTES} }, 442 443 /* 444 * tcp_wroff_xtra is the extra space in front of TCP/IP header for link 445 * layer header. It has to be a multiple of 8. 446 */ 447 { "_wroff_xtra", MOD_PROTO_TCP, 448 mod_set_aligned, mod_get_uint32, 449 {0, 256, 32}, {32} }, 450 451 { "_dev_flow_ctl", MOD_PROTO_TCP, 452 mod_set_boolean, mod_get_boolean, 453 {B_FALSE}, {B_FALSE} }, 454 455 { "_reass_timeout", MOD_PROTO_TCP, 456 mod_set_uint32, mod_get_uint32, 457 {0, UINT32_MAX, 100*SECONDS}, {100*SECONDS} }, 458 459 /* tunable - 60 */ 460 { "extra_priv_ports", MOD_PROTO_TCP, 461 mod_set_extra_privports, mod_get_extra_privports, 462 {1, ULP_MAX_PORT, 0}, {0} }, 463 464 { "_1948_phrase", MOD_PROTO_TCP, 465 tcp_set_1948phrase, NULL, {0}, {0} }, 466 467 { "_listener_limit_conf", MOD_PROTO_TCP, 468 NULL, tcp_listener_conf_get, {0}, {0} }, 469 470 { "_listener_limit_conf_add", MOD_PROTO_TCP, 471 tcp_listener_conf_add, NULL, {0}, {0} }, 472 473 { "_listener_limit_conf_del", MOD_PROTO_TCP, 474 tcp_listener_conf_del, NULL, {0}, {0} }, 475 476 { "_iss_incr", MOD_PROTO_TCP, 477 mod_set_uint32, mod_get_uint32, 478 {1, ISS_INCR, ISS_INCR}, 479 {ISS_INCR} }, 480 481 { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} }, 482 483 { NULL, 0, NULL, NULL, {0}, {0} } 484 }; 485 486 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);