1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011, Joyent Inc. All rights reserved. 24 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2013 by Delphix. All rights reserved. 26 */ 27 /* Copyright (c) 1990 Mentat Inc. */ 28 29 #include <inet/ip.h> 30 #include <inet/tcp_impl.h> 31 #include <sys/multidata.h> 32 #include <sys/sunddi.h> 33 34 /* Max size IP datagram is 64k - 1 */ 35 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t))) 36 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t))) 37 38 /* Max of the above */ 39 #define TCP_MSS_MAX TCP_MSS_MAX_IPV4 40 41 /* 42 * Set the RFC 1948 pass phrase 43 */ 44 /* ARGSUSED */ 45 static int 46 tcp_set_1948phrase(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo, 47 const char *ifname, const void* pr_val, uint_t flags) 48 { 49 if (flags & MOD_PROP_DEFAULT) 50 return (ENOTSUP); 51 52 /* 53 * Basically, value contains a new pass phrase. Pass it along! 54 */ 55 tcp_iss_key_init((uint8_t *)pr_val, strlen(pr_val), 56 stack->netstack_tcp); 57 return (0); 58 } 59 60 /* 61 * returns the current list of listener limit configuration. 62 */ 63 /* ARGSUSED */ 64 static int 65 tcp_listener_conf_get(netstack_t *stack, mod_prop_info_t *pinfo, 66 const char *ifname, void *val, uint_t psize, uint_t flags) 67 { 68 tcp_stack_t *tcps = stack->netstack_tcp; 69 tcp_listener_t *tl; 70 char *pval = val; 71 size_t nbytes = 0, tbytes = 0; 72 uint_t size; 73 int err = 0; 74 75 bzero(pval, psize); 76 size = psize; 77 78 if (flags & (MOD_PROP_DEFAULT|MOD_PROP_PERM|MOD_PROP_POSSIBLE)) 79 return (0); 80 81 mutex_enter(&tcps->tcps_listener_conf_lock); 82 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL; 83 tl = list_next(&tcps->tcps_listener_conf, tl)) { 84 if (psize == size) 85 nbytes = snprintf(pval, size, "%d:%d", tl->tl_port, 86 tl->tl_ratio); 87 else 88 nbytes = snprintf(pval, size, ",%d:%d", tl->tl_port, 89 tl->tl_ratio); 90 size -= nbytes; 91 pval += nbytes; 92 tbytes += nbytes; 93 if (tbytes >= psize) { 94 /* Buffer overflow, stop copying information */ 95 err = ENOBUFS; 96 break; 97 } 98 } 99 100 mutex_exit(&tcps->tcps_listener_conf_lock); 101 return (err); 102 } 103 104 /* 105 * add a new listener limit configuration. 106 */ 107 /* ARGSUSED */ 108 static int 109 tcp_listener_conf_add(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo, 110 const char *ifname, const void* pval, uint_t flags) 111 { 112 tcp_listener_t *new_tl; 113 tcp_listener_t *tl; 114 long lport; 115 long ratio; 116 char *colon; 117 tcp_stack_t *tcps = stack->netstack_tcp; 118 119 if (flags & MOD_PROP_DEFAULT) 120 return (ENOTSUP); 121 122 if (ddi_strtol(pval, &colon, 10, &lport) != 0 || lport <= 0 || 123 lport > USHRT_MAX || *colon != ':') { 124 return (EINVAL); 125 } 126 if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0) 127 return (EINVAL); 128 129 mutex_enter(&tcps->tcps_listener_conf_lock); 130 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL; 131 tl = list_next(&tcps->tcps_listener_conf, tl)) { 132 /* There is an existing entry, so update its ratio value. */ 133 if (tl->tl_port == lport) { 134 tl->tl_ratio = ratio; 135 mutex_exit(&tcps->tcps_listener_conf_lock); 136 return (0); 137 } 138 } 139 140 if ((new_tl = kmem_alloc(sizeof (tcp_listener_t), KM_NOSLEEP)) == 141 NULL) { 142 mutex_exit(&tcps->tcps_listener_conf_lock); 143 return (ENOMEM); 144 } 145 146 new_tl->tl_port = lport; 147 new_tl->tl_ratio = ratio; 148 list_insert_tail(&tcps->tcps_listener_conf, new_tl); 149 mutex_exit(&tcps->tcps_listener_conf_lock); 150 return (0); 151 } 152 153 /* 154 * remove a listener limit configuration. 155 */ 156 /* ARGSUSED */ 157 static int 158 tcp_listener_conf_del(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo, 159 const char *ifname, const void* pval, uint_t flags) 160 { 161 tcp_listener_t *tl; 162 long lport; 163 tcp_stack_t *tcps = stack->netstack_tcp; 164 165 if (flags & MOD_PROP_DEFAULT) 166 return (ENOTSUP); 167 168 if (ddi_strtol(pval, NULL, 10, &lport) != 0 || lport <= 0 || 169 lport > USHRT_MAX) { 170 return (EINVAL); 171 } 172 mutex_enter(&tcps->tcps_listener_conf_lock); 173 for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL; 174 tl = list_next(&tcps->tcps_listener_conf, tl)) { 175 if (tl->tl_port == lport) { 176 list_remove(&tcps->tcps_listener_conf, tl); 177 mutex_exit(&tcps->tcps_listener_conf_lock); 178 kmem_free(tl, sizeof (tcp_listener_t)); 179 return (0); 180 } 181 } 182 mutex_exit(&tcps->tcps_listener_conf_lock); 183 return (ESRCH); 184 } 185 186 static int 187 tcp_set_buf_prop(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo, 188 const char *ifname, const void *pval, uint_t flags) 189 { 190 return (mod_set_buf_prop(stack->netstack_tcp->tcps_propinfo_tbl, stack, 191 cr, pinfo, ifname, pval, flags)); 192 } 193 194 static int 195 tcp_get_buf_prop(netstack_t *stack, mod_prop_info_t *pinfo, const char *ifname, 196 void *val, uint_t psize, uint_t flags) 197 { 198 return (mod_get_buf_prop(stack->netstack_tcp->tcps_propinfo_tbl, stack, 199 pinfo, ifname, val, psize, flags)); 200 } 201 202 /* 203 * Special checkers for smallest/largest anonymous port so they don't 204 * ever happen to be (largest < smallest). 205 */ 206 /* ARGSUSED */ 207 static int 208 tcp_smallest_anon_set(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo, 209 const char *ifname, const void *pval, uint_t flags) 210 { 211 unsigned long new_value; 212 tcp_stack_t *tcps = stack->netstack_tcp; 213 int err; 214 215 if ((err = mod_uint32_value(pval, pinfo, flags, &new_value)) != 0) 216 return (err); 217 /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */ 218 if ((uint32_t)new_value > tcps->tcps_largest_anon_port) 219 return (ERANGE); 220 pinfo->prop_cur_uval = (uint32_t)new_value; 221 return (0); 222 } 223 224 /* ARGSUSED */ 225 static int 226 tcp_largest_anon_set(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo, 227 const char *ifname, const void *pval, uint_t flags) 228 { 229 unsigned long new_value; 230 tcp_stack_t *tcps = stack->netstack_tcp; 231 int err; 232 233 if ((err = mod_uint32_value(pval, pinfo, flags, &new_value)) != 0) 234 return (err); 235 /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */ 236 if ((uint32_t)new_value < tcps->tcps_smallest_anon_port) 237 return (ERANGE); 238 pinfo->prop_cur_uval = (uint32_t)new_value; 239 return (0); 240 } 241 242 /* 243 * All of these are alterable, within the min/max values given, at run time. 244 * 245 * Note: All those tunables which do not start with "_" are Committed and 246 * therefore are public. See PSARC 2010/080. 247 */ 248 mod_prop_info_t tcp_propinfo_tbl[] = { 249 /* tunable - 0 */ 250 { "_time_wait_interval", MOD_PROTO_TCP, 251 mod_set_uint32, mod_get_uint32, 252 {1*SECONDS, 10*MINUTES, 1*MINUTES}, {1*MINUTES} }, 253 254 { "_conn_req_max_q", MOD_PROTO_TCP, 255 mod_set_uint32, mod_get_uint32, 256 {1, UINT32_MAX, 128}, {128} }, 257 258 { "_conn_req_max_q0", MOD_PROTO_TCP, 259 mod_set_uint32, mod_get_uint32, 260 {0, UINT32_MAX, 1024}, {1024} }, 261 262 { "_conn_req_min", MOD_PROTO_TCP, 263 mod_set_uint32, mod_get_uint32, 264 {1, 1024, 1}, {1} }, 265 266 { "_conn_grace_period", MOD_PROTO_TCP, 267 mod_set_uint32, mod_get_uint32, 268 {0*MS, 20*SECONDS, 0*MS}, {0*MS} }, 269 270 { "_cwnd_max", MOD_PROTO_TCP, 271 mod_set_uint32, mod_get_uint32, 272 {128, ULP_MAX_BUF, 1024*1024}, {1024*1024} }, 273 274 { "_debug", MOD_PROTO_TCP, 275 mod_set_uint32, mod_get_uint32, 276 {0, 10, 0}, {0} }, 277 278 { "smallest_nonpriv_port", MOD_PROTO_TCP, 279 mod_set_uint32, mod_get_uint32, 280 {1024, (32*1024), 1024}, {1024} }, 281 282 { "_ip_abort_cinterval", MOD_PROTO_TCP, 283 mod_set_uint32, mod_get_uint32, 284 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} }, 285 286 { "_ip_abort_linterval", MOD_PROTO_TCP, 287 mod_set_uint32, mod_get_uint32, 288 {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} }, 289 290 /* tunable - 10 */ 291 { "_ip_abort_interval", MOD_PROTO_TCP, 292 mod_set_uint32, mod_get_uint32, 293 {500*MS, UINT32_MAX, 5*MINUTES}, {5*MINUTES} }, 294 295 { "_ip_notify_cinterval", MOD_PROTO_TCP, 296 mod_set_uint32, mod_get_uint32, 297 {1*SECONDS, UINT32_MAX, 10*SECONDS}, 298 {10*SECONDS} }, 299 300 { "_ip_notify_interval", MOD_PROTO_TCP, 301 mod_set_uint32, mod_get_uint32, 302 {500*MS, UINT32_MAX, 10*SECONDS}, {10*SECONDS} }, 303 304 { "_ipv4_ttl", MOD_PROTO_TCP, 305 mod_set_uint32, mod_get_uint32, 306 {1, 255, 64}, {64} }, 307 308 { "_keepalive_interval", MOD_PROTO_TCP, 309 mod_set_uint32, mod_get_uint32, 310 {10*SECONDS, 10*DAYS, 2*HOURS}, {2*HOURS} }, 311 312 { "_maxpsz_multiplier", MOD_PROTO_TCP, 313 mod_set_uint32, mod_get_uint32, 314 {0, 100, 10}, {10} }, 315 316 { "_mss_def_ipv4", MOD_PROTO_TCP, 317 mod_set_uint32, mod_get_uint32, 318 {1, TCP_MSS_MAX_IPV4, 536}, {536} }, 319 320 { "_mss_max_ipv4", MOD_PROTO_TCP, 321 mod_set_uint32, mod_get_uint32, 322 {1, TCP_MSS_MAX_IPV4, TCP_MSS_MAX_IPV4}, 323 {TCP_MSS_MAX_IPV4} }, 324 325 { "_mss_min", MOD_PROTO_TCP, 326 mod_set_uint32, mod_get_uint32, 327 {1, TCP_MSS_MAX, 108}, {108} }, 328 329 { "_naglim_def", MOD_PROTO_TCP, 330 mod_set_uint32, mod_get_uint32, 331 {1, (64*1024)-1, (4*1024)-1}, {(4*1024)-1} }, 332 333 /* tunable - 20 */ 334 { "_rexmit_interval_initial", MOD_PROTO_TCP, 335 mod_set_uint32, mod_get_uint32, 336 {1*MS, 20*SECONDS, 1*SECONDS}, {1*SECONDS} }, 337 338 { "_rexmit_interval_max", MOD_PROTO_TCP, 339 mod_set_uint32, mod_get_uint32, 340 {1*MS, 2*HOURS, 60*SECONDS}, {60*SECONDS} }, 341 342 { "_rexmit_interval_min", MOD_PROTO_TCP, 343 mod_set_uint32, mod_get_uint32, 344 {1*MS, 2*HOURS, 400*MS}, {400*MS} }, 345 346 { "_deferred_ack_interval", MOD_PROTO_TCP, 347 mod_set_uint32, mod_get_uint32, 348 {1*MS, 1*MINUTES, 100*MS}, {100*MS} }, 349 350 { "_snd_lowat_fraction", MOD_PROTO_TCP, 351 mod_set_uint32, mod_get_uint32, 352 {0, 16, 10}, {10} }, 353 354 { "_dupack_fast_retransmit", MOD_PROTO_TCP, 355 mod_set_uint32, mod_get_uint32, 356 {1, 10000, 3}, {3} }, 357 358 { "_ignore_path_mtu", MOD_PROTO_TCP, 359 mod_set_boolean, mod_get_boolean, 360 {B_FALSE}, {B_FALSE} }, 361 362 { "smallest_anon_port", MOD_PROTO_TCP, 363 tcp_smallest_anon_set, mod_get_uint32, 364 {1024, ULP_MAX_PORT, 32*1024}, {32*1024} }, 365 366 { "largest_anon_port", MOD_PROTO_TCP, 367 tcp_largest_anon_set, mod_get_uint32, 368 {1024, ULP_MAX_PORT, ULP_MAX_PORT}, 369 {ULP_MAX_PORT} }, 370 371 { "send_buf", MOD_PROTO_TCP, 372 tcp_set_buf_prop, tcp_get_buf_prop, 373 {TCP_XMIT_LOWATER, ULP_MAX_BUF, TCP_XMIT_HIWATER}, 374 {TCP_XMIT_HIWATER} }, 375 376 /* tunable - 30 */ 377 { "_xmit_lowat", MOD_PROTO_TCP, 378 mod_set_uint32, mod_get_uint32, 379 {TCP_XMIT_LOWATER, ULP_MAX_BUF, TCP_XMIT_LOWATER}, 380 {TCP_XMIT_LOWATER} }, 381 382 { "recv_buf", MOD_PROTO_TCP, 383 tcp_set_buf_prop, tcp_get_buf_prop, 384 {TCP_RECV_LOWATER, ULP_MAX_BUF, TCP_RECV_HIWATER}, 385 {TCP_RECV_HIWATER} }, 386 387 { "_recv_hiwat_minmss", MOD_PROTO_TCP, 388 mod_set_uint32, mod_get_uint32, 389 {1, 65536, 4}, {4} }, 390 391 { "_fin_wait_2_flush_interval", MOD_PROTO_TCP, 392 mod_set_uint32, mod_get_uint32, 393 {1*SECONDS, 2*HOURS, 60*SECONDS}, 394 {60*SECONDS} }, 395 396 { "max_buf", MOD_PROTO_TCP, 397 mod_set_uint32, mod_get_uint32, 398 {8192, ULP_MAX_BUF, 1024*1024}, {1024*1024} }, 399 400 /* 401 * Question: What default value should I set for tcp_strong_iss? 402 */ 403 { "_strong_iss", MOD_PROTO_TCP, 404 mod_set_uint32, mod_get_uint32, 405 {0, 2, 1}, {1} }, 406 407 { "_rtt_updates", MOD_PROTO_TCP, 408 mod_set_uint32, mod_get_uint32, 409 {0, 65536, 20}, {20} }, 410 411 { "_wscale_always", MOD_PROTO_TCP, 412 mod_set_boolean, mod_get_boolean, 413 {B_TRUE}, {B_TRUE} }, 414 415 { "_tstamp_always", MOD_PROTO_TCP, 416 mod_set_boolean, mod_get_boolean, 417 {B_FALSE}, {B_FALSE} }, 418 419 { "_tstamp_if_wscale", MOD_PROTO_TCP, 420 mod_set_boolean, mod_get_boolean, 421 {B_TRUE}, {B_TRUE} }, 422 423 /* tunable - 40 */ 424 { "_rexmit_interval_extra", MOD_PROTO_TCP, 425 mod_set_uint32, mod_get_uint32, 426 {0*MS, 2*HOURS, 0*MS}, {0*MS} }, 427 428 { "_deferred_acks_max", MOD_PROTO_TCP, 429 mod_set_uint32, mod_get_uint32, 430 {0, 16, 2}, {2} }, 431 432 { "_slow_start_after_idle", MOD_PROTO_TCP, 433 mod_set_uint32, mod_get_uint32, 434 {0, 16384, 0}, {0} }, 435 436 { "_slow_start_initial", MOD_PROTO_TCP, 437 mod_set_uint32, mod_get_uint32, 438 {0, 16, 0}, {0} }, 439 440 { "sack", MOD_PROTO_TCP, 441 mod_set_uint32, mod_get_uint32, 442 {0, 2, 2}, {2} }, 443 444 { "_ipv6_hoplimit", MOD_PROTO_TCP, 445 mod_set_uint32, mod_get_uint32, 446 {0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS}, 447 {IPV6_DEFAULT_HOPS} }, 448 449 { "_mss_def_ipv6", MOD_PROTO_TCP, 450 mod_set_uint32, mod_get_uint32, 451 {1, TCP_MSS_MAX_IPV6, 1220}, {1220} }, 452 453 { "_mss_max_ipv6", MOD_PROTO_TCP, 454 mod_set_uint32, mod_get_uint32, 455 {1, TCP_MSS_MAX_IPV6, TCP_MSS_MAX_IPV6}, 456 {TCP_MSS_MAX_IPV6} }, 457 458 { "_rev_src_routes", MOD_PROTO_TCP, 459 mod_set_boolean, mod_get_boolean, 460 {B_FALSE}, {B_FALSE} }, 461 462 { "_local_dack_interval", MOD_PROTO_TCP, 463 mod_set_uint32, mod_get_uint32, 464 {10*MS, 500*MS, 50*MS}, {50*MS} }, 465 466 /* tunable - 50 */ 467 { "_local_dacks_max", MOD_PROTO_TCP, 468 mod_set_uint32, mod_get_uint32, 469 {0, 16, 8}, {8} }, 470 471 { "ecn", MOD_PROTO_TCP, 472 mod_set_uint32, mod_get_uint32, 473 {0, 2, 1}, {1} }, 474 475 { "_rst_sent_rate_enabled", MOD_PROTO_TCP, 476 mod_set_boolean, mod_get_boolean, 477 {B_TRUE}, {B_TRUE} }, 478 479 { "_rst_sent_rate", MOD_PROTO_TCP, 480 mod_set_uint32, mod_get_uint32, 481 {0, UINT32_MAX, 40}, {40} }, 482 483 { "_push_timer_interval", MOD_PROTO_TCP, 484 mod_set_uint32, mod_get_uint32, 485 {0, 100*MS, 50*MS}, {50*MS} }, 486 487 { "_use_smss_as_mss_opt", MOD_PROTO_TCP, 488 mod_set_boolean, mod_get_boolean, 489 {B_FALSE}, {B_FALSE} }, 490 491 { "_keepalive_abort_interval", MOD_PROTO_TCP, 492 mod_set_uint32, mod_get_uint32, 493 {0, UINT32_MAX, 8*MINUTES}, {8*MINUTES} }, 494 495 /* 496 * tcp_wroff_xtra is the extra space in front of TCP/IP header for link 497 * layer header. It has to be a multiple of 8. 498 */ 499 { "_wroff_xtra", MOD_PROTO_TCP, 500 mod_set_aligned, mod_get_uint32, 501 {0, 256, 32}, {32} }, 502 503 { "_dev_flow_ctl", MOD_PROTO_TCP, 504 mod_set_boolean, mod_get_boolean, 505 {B_FALSE}, {B_FALSE} }, 506 507 { "_reass_timeout", MOD_PROTO_TCP, 508 mod_set_uint32, mod_get_uint32, 509 {0, UINT32_MAX, 100*SECONDS}, {100*SECONDS} }, 510 511 /* tunable - 60 */ 512 { "extra_priv_ports", MOD_PROTO_TCP, 513 mod_set_extra_privports, mod_get_extra_privports, 514 {1, ULP_MAX_PORT, 0}, {0} }, 515 516 { "_1948_phrase", MOD_PROTO_TCP, 517 tcp_set_1948phrase, NULL, {0}, {0} }, 518 519 { "_listener_limit_conf", MOD_PROTO_TCP, 520 NULL, tcp_listener_conf_get, {0}, {0} }, 521 522 { "_listener_limit_conf_add", MOD_PROTO_TCP, 523 tcp_listener_conf_add, NULL, {0}, {0} }, 524 525 { "_listener_limit_conf_del", MOD_PROTO_TCP, 526 tcp_listener_conf_del, NULL, {0}, {0} }, 527 528 { "_iss_incr", MOD_PROTO_TCP, 529 mod_set_uint32, mod_get_uint32, 530 {1, ISS_INCR, ISS_INCR}, 531 {ISS_INCR} }, 532 533 { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} }, 534 535 { NULL, 0, NULL, NULL, {0}, {0} } 536 }; 537 538 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);