1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2011, Joyent Inc. All rights reserved.
  24  */
  25 /* Copyright (c) 1990 Mentat Inc. */
  26 
  27 #include <inet/ip.h>
  28 #include <inet/tcp_impl.h>
  29 #include <sys/multidata.h>
  30 #include <sys/sunddi.h>
  31 
  32 /* Max size IP datagram is 64k - 1 */
  33 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
  34 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
  35 
  36 /* Max of the above */
  37 #define TCP_MSS_MAX             TCP_MSS_MAX_IPV4
  38 
  39 #define TCP_XMIT_LOWATER        4096
  40 #define TCP_XMIT_HIWATER        49152
  41 #define TCP_RECV_LOWATER        2048
  42 #define TCP_RECV_HIWATER        128000
  43 
  44 /*
  45  * Set the RFC 1948 pass phrase
  46  */
  47 /* ARGSUSED */
  48 static int
  49 tcp_set_1948phrase(void *cbarg,  cred_t *cr, mod_prop_info_t *pinfo,
  50     const char *ifname, const void* pr_val, uint_t flags)
  51 {
  52         tcp_stack_t     *tcps = (tcp_stack_t *)cbarg;
  53 
  54         if (flags & MOD_PROP_DEFAULT)
  55                 return (ENOTSUP);
  56 
  57         /*
  58          * Basically, value contains a new pass phrase.  Pass it along!
  59          */
  60         tcp_iss_key_init((uint8_t *)pr_val, strlen(pr_val), tcps);
  61         return (0);
  62 }
  63 
  64 /*
  65  * returns the current list of listener limit configuration.
  66  */
  67 /* ARGSUSED */
  68 static int
  69 tcp_listener_conf_get(void *cbarg, mod_prop_info_t *pinfo, const char *ifname,
  70     void *val, uint_t psize, uint_t flags)
  71 {
  72         tcp_stack_t     *tcps = (tcp_stack_t *)cbarg;
  73         tcp_listener_t  *tl;
  74         char            *pval = val;
  75         size_t          nbytes = 0, tbytes = 0;
  76         uint_t          size;
  77         int             err = 0;
  78 
  79         bzero(pval, psize);
  80         size = psize;
  81 
  82         if (flags & (MOD_PROP_DEFAULT|MOD_PROP_PERM|MOD_PROP_POSSIBLE))
  83                 return (0);
  84 
  85         mutex_enter(&tcps->tcps_listener_conf_lock);
  86         for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
  87             tl = list_next(&tcps->tcps_listener_conf, tl)) {
  88                 if (psize == size)
  89                         nbytes = snprintf(pval, size, "%d:%d",  tl->tl_port,
  90                             tl->tl_ratio);
  91                 else
  92                         nbytes = snprintf(pval, size, ",%d:%d",  tl->tl_port,
  93                             tl->tl_ratio);
  94                 size -= nbytes;
  95                 pval += nbytes;
  96                 tbytes += nbytes;
  97                 if (tbytes >= psize) {
  98                         /* Buffer overflow, stop copying information */
  99                         err = ENOBUFS;
 100                         break;
 101                 }
 102         }
 103 
 104         mutex_exit(&tcps->tcps_listener_conf_lock);
 105         return (err);
 106 }
 107 
 108 /*
 109  * add a new listener limit configuration.
 110  */
 111 /* ARGSUSED */
 112 static int
 113 tcp_listener_conf_add(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
 114     const char *ifname, const void* pval, uint_t flags)
 115 {
 116         tcp_listener_t  *new_tl;
 117         tcp_listener_t  *tl;
 118         long            lport;
 119         long            ratio;
 120         char            *colon;
 121         tcp_stack_t     *tcps = (tcp_stack_t *)cbarg;
 122 
 123         if (flags & MOD_PROP_DEFAULT)
 124                 return (ENOTSUP);
 125 
 126         if (ddi_strtol(pval, &colon, 10, &lport) != 0 || lport <= 0 ||
 127             lport > USHRT_MAX || *colon != ':') {
 128                 return (EINVAL);
 129         }
 130         if (ddi_strtol(colon + 1, NULL, 10, &ratio) != 0 || ratio <= 0)
 131                 return (EINVAL);
 132 
 133         mutex_enter(&tcps->tcps_listener_conf_lock);
 134         for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
 135             tl = list_next(&tcps->tcps_listener_conf, tl)) {
 136                 /* There is an existing entry, so update its ratio value. */
 137                 if (tl->tl_port == lport) {
 138                         tl->tl_ratio = ratio;
 139                         mutex_exit(&tcps->tcps_listener_conf_lock);
 140                         return (0);
 141                 }
 142         }
 143 
 144         if ((new_tl = kmem_alloc(sizeof (tcp_listener_t), KM_NOSLEEP)) ==
 145             NULL) {
 146                 mutex_exit(&tcps->tcps_listener_conf_lock);
 147                 return (ENOMEM);
 148         }
 149 
 150         new_tl->tl_port = lport;
 151         new_tl->tl_ratio = ratio;
 152         list_insert_tail(&tcps->tcps_listener_conf, new_tl);
 153         mutex_exit(&tcps->tcps_listener_conf_lock);
 154         return (0);
 155 }
 156 
 157 /*
 158  * remove a listener limit configuration.
 159  */
 160 /* ARGSUSED */
 161 static int
 162 tcp_listener_conf_del(void *cbarg, cred_t *cr, mod_prop_info_t *pinfo,
 163     const char *ifname, const void* pval, uint_t flags)
 164 {
 165         tcp_listener_t  *tl;
 166         long            lport;
 167         tcp_stack_t     *tcps = (tcp_stack_t *)cbarg;
 168 
 169         if (flags & MOD_PROP_DEFAULT)
 170                 return (ENOTSUP);
 171 
 172         if (ddi_strtol(pval, NULL, 10, &lport) != 0 || lport <= 0 ||
 173             lport > USHRT_MAX) {
 174                 return (EINVAL);
 175         }
 176         mutex_enter(&tcps->tcps_listener_conf_lock);
 177         for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
 178             tl = list_next(&tcps->tcps_listener_conf, tl)) {
 179                 if (tl->tl_port == lport) {
 180                         list_remove(&tcps->tcps_listener_conf, tl);
 181                         mutex_exit(&tcps->tcps_listener_conf_lock);
 182                         kmem_free(tl, sizeof (tcp_listener_t));
 183                         return (0);
 184                 }
 185         }
 186         mutex_exit(&tcps->tcps_listener_conf_lock);
 187         return (ESRCH);
 188 }
 189 
 190 /*
 191  * All of these are alterable, within the min/max values given, at run time.
 192  *
 193  * Note: All those tunables which do not start with "_" are Committed and
 194  * therefore are public. See PSARC 2010/080.
 195  */
 196 mod_prop_info_t tcp_propinfo_tbl[] = {
 197         /* tunable - 0 */
 198         { "_time_wait_interval", MOD_PROTO_TCP,
 199             mod_set_uint32, mod_get_uint32,
 200             {1*SECONDS, 10*MINUTES, 1*MINUTES}, {1*MINUTES} },
 201 
 202         { "_conn_req_max_q", MOD_PROTO_TCP,
 203             mod_set_uint32, mod_get_uint32,
 204             {1, UINT32_MAX, 128}, {128} },
 205 
 206         { "_conn_req_max_q0", MOD_PROTO_TCP,
 207             mod_set_uint32, mod_get_uint32,
 208             {0, UINT32_MAX, 1024}, {1024} },
 209 
 210         { "_conn_req_min", MOD_PROTO_TCP,
 211             mod_set_uint32, mod_get_uint32,
 212             {1, 1024, 1}, {1} },
 213 
 214         { "_conn_grace_period", MOD_PROTO_TCP,
 215             mod_set_uint32, mod_get_uint32,
 216             {0*MS, 20*SECONDS, 0*MS}, {0*MS} },
 217 
 218         { "_cwnd_max", MOD_PROTO_TCP,
 219             mod_set_uint32, mod_get_uint32,
 220             {128, (1<<30), 1024*1024}, {1024*1024} },
 221 
 222         { "_debug", MOD_PROTO_TCP,
 223             mod_set_uint32, mod_get_uint32,
 224             {0, 10, 0}, {0} },
 225 
 226         { "smallest_nonpriv_port", MOD_PROTO_TCP,
 227             mod_set_uint32, mod_get_uint32,
 228             {1024, (32*1024), 1024}, {1024} },
 229 
 230         { "_ip_abort_cinterval", MOD_PROTO_TCP,
 231             mod_set_uint32, mod_get_uint32,
 232             {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
 233 
 234         { "_ip_abort_linterval", MOD_PROTO_TCP,
 235             mod_set_uint32, mod_get_uint32,
 236             {1*SECONDS, UINT32_MAX, 3*MINUTES}, {3*MINUTES} },
 237 
 238         /* tunable - 10 */
 239         { "_ip_abort_interval", MOD_PROTO_TCP,
 240             mod_set_uint32, mod_get_uint32,
 241             {500*MS, UINT32_MAX, 5*MINUTES}, {5*MINUTES} },
 242 
 243         { "_ip_notify_cinterval", MOD_PROTO_TCP,
 244             mod_set_uint32, mod_get_uint32,
 245             {1*SECONDS, UINT32_MAX, 10*SECONDS},
 246             {10*SECONDS} },
 247 
 248         { "_ip_notify_interval", MOD_PROTO_TCP,
 249             mod_set_uint32, mod_get_uint32,
 250             {500*MS, UINT32_MAX, 10*SECONDS}, {10*SECONDS} },
 251 
 252         { "_ipv4_ttl", MOD_PROTO_TCP,
 253             mod_set_uint32, mod_get_uint32,
 254             {1, 255, 64}, {64} },
 255 
 256         { "_keepalive_interval", MOD_PROTO_TCP,
 257             mod_set_uint32, mod_get_uint32,
 258             {10*SECONDS, 10*DAYS, 2*HOURS}, {2*HOURS} },
 259 
 260         { "_maxpsz_multiplier", MOD_PROTO_TCP,
 261             mod_set_uint32, mod_get_uint32,
 262             {0, 100, 10}, {10} },
 263 
 264         { "_mss_def_ipv4", MOD_PROTO_TCP,
 265             mod_set_uint32, mod_get_uint32,
 266             {1, TCP_MSS_MAX_IPV4, 536}, {536} },
 267 
 268         { "_mss_max_ipv4", MOD_PROTO_TCP,
 269             mod_set_uint32, mod_get_uint32,
 270             {1, TCP_MSS_MAX_IPV4, TCP_MSS_MAX_IPV4},
 271             {TCP_MSS_MAX_IPV4} },
 272 
 273         { "_mss_min", MOD_PROTO_TCP,
 274             mod_set_uint32, mod_get_uint32,
 275             {1, TCP_MSS_MAX, 108}, {108} },
 276 
 277         { "_naglim_def", MOD_PROTO_TCP,
 278             mod_set_uint32, mod_get_uint32,
 279             {1, (64*1024)-1, (4*1024)-1}, {(4*1024)-1} },
 280 
 281         /* tunable - 20 */
 282         { "_rexmit_interval_initial", MOD_PROTO_TCP,
 283             mod_set_uint32, mod_get_uint32,
 284             {1*MS, 20*SECONDS, 1*SECONDS}, {1*SECONDS} },
 285 
 286         { "_rexmit_interval_max", MOD_PROTO_TCP,
 287             mod_set_uint32, mod_get_uint32,
 288             {1*MS, 2*HOURS, 60*SECONDS}, {60*SECONDS} },
 289 
 290         { "_rexmit_interval_min", MOD_PROTO_TCP,
 291             mod_set_uint32, mod_get_uint32,
 292             {1*MS, 2*HOURS, 400*MS}, {400*MS} },
 293 
 294         { "_deferred_ack_interval", MOD_PROTO_TCP,
 295             mod_set_uint32, mod_get_uint32,
 296             {1*MS, 1*MINUTES, 100*MS}, {100*MS} },
 297 
 298         { "_snd_lowat_fraction", MOD_PROTO_TCP,
 299             mod_set_uint32, mod_get_uint32,
 300             {0, 16, 0}, {0} },
 301 
 302         { "_dupack_fast_retransmit", MOD_PROTO_TCP,
 303             mod_set_uint32, mod_get_uint32,
 304             {1, 10000, 3}, {3} },
 305 
 306         { "_ignore_path_mtu", MOD_PROTO_TCP,
 307             mod_set_boolean, mod_get_boolean,
 308             {B_FALSE}, {B_FALSE} },
 309 
 310         { "smallest_anon_port", MOD_PROTO_TCP,
 311             mod_set_uint32, mod_get_uint32,
 312             {1024, ULP_MAX_PORT, 32*1024}, {32*1024} },
 313 
 314         { "largest_anon_port", MOD_PROTO_TCP,
 315             mod_set_uint32, mod_get_uint32,
 316             {1024, ULP_MAX_PORT, ULP_MAX_PORT},
 317             {ULP_MAX_PORT} },
 318 
 319         { "send_maxbuf", MOD_PROTO_TCP,
 320             mod_set_uint32, mod_get_uint32,
 321             {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_HIWATER},
 322             {TCP_XMIT_HIWATER} },
 323 
 324         /* tunable - 30 */
 325         { "_xmit_lowat", MOD_PROTO_TCP,
 326             mod_set_uint32, mod_get_uint32,
 327             {TCP_XMIT_LOWATER, (1<<30), TCP_XMIT_LOWATER},
 328             {TCP_XMIT_LOWATER} },
 329 
 330         { "recv_maxbuf", MOD_PROTO_TCP,
 331             mod_set_uint32, mod_get_uint32,
 332             {TCP_RECV_LOWATER, (1<<30), TCP_RECV_HIWATER},
 333             {TCP_RECV_HIWATER} },
 334 
 335         { "_recv_hiwat_minmss", MOD_PROTO_TCP,
 336             mod_set_uint32, mod_get_uint32,
 337             {1, 65536, 4}, {4} },
 338 
 339         { "_fin_wait_2_flush_interval", MOD_PROTO_TCP,
 340             mod_set_uint32, mod_get_uint32,
 341             {1*SECONDS, 2*HOURS, 60*SECONDS},
 342             {60*SECONDS} },
 343 
 344         { "_max_buf", MOD_PROTO_TCP,
 345             mod_set_uint32, mod_get_uint32,
 346             {8192, (1<<30), 1024*1024}, {1024*1024} },
 347 
 348         /*
 349          * Question:  What default value should I set for tcp_strong_iss?
 350          */
 351         { "_strong_iss", MOD_PROTO_TCP,
 352             mod_set_uint32, mod_get_uint32,
 353             {0, 2, 1}, {1} },
 354 
 355         { "_rtt_updates", MOD_PROTO_TCP,
 356             mod_set_uint32, mod_get_uint32,
 357             {0, 65536, 20}, {20} },
 358 
 359         { "_wscale_always", MOD_PROTO_TCP,
 360             mod_set_boolean, mod_get_boolean,
 361             {B_TRUE}, {B_TRUE} },
 362 
 363         { "_tstamp_always", MOD_PROTO_TCP,
 364             mod_set_boolean, mod_get_boolean,
 365             {B_FALSE}, {B_FALSE} },
 366 
 367         { "_tstamp_if_wscale", MOD_PROTO_TCP,
 368             mod_set_boolean, mod_get_boolean,
 369             {B_TRUE}, {B_TRUE} },
 370 
 371         /* tunable - 40 */
 372         { "_rexmit_interval_extra", MOD_PROTO_TCP,
 373             mod_set_uint32, mod_get_uint32,
 374             {0*MS, 2*HOURS, 0*MS}, {0*MS} },
 375 
 376         { "_deferred_acks_max", MOD_PROTO_TCP,
 377             mod_set_uint32, mod_get_uint32,
 378             {0, 16, 2}, {2} },
 379 
 380         { "_slow_start_after_idle", MOD_PROTO_TCP,
 381             mod_set_uint32, mod_get_uint32,
 382             {0, 16384, 0}, {0} },
 383 
 384         { "_slow_start_initial", MOD_PROTO_TCP,
 385             mod_set_uint32, mod_get_uint32,
 386             {0, 16, 0}, {0} },
 387 
 388         { "sack", MOD_PROTO_TCP,
 389             mod_set_uint32, mod_get_uint32,
 390             {0, 2, 2}, {2} },
 391 
 392         { "_ipv6_hoplimit", MOD_PROTO_TCP,
 393             mod_set_uint32, mod_get_uint32,
 394             {0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS},
 395             {IPV6_DEFAULT_HOPS} },
 396 
 397         { "_mss_def_ipv6", MOD_PROTO_TCP,
 398             mod_set_uint32, mod_get_uint32,
 399             {1, TCP_MSS_MAX_IPV6, 1220}, {1220} },
 400 
 401         { "_mss_max_ipv6", MOD_PROTO_TCP,
 402             mod_set_uint32, mod_get_uint32,
 403             {1, TCP_MSS_MAX_IPV6, TCP_MSS_MAX_IPV6},
 404             {TCP_MSS_MAX_IPV6} },
 405 
 406         { "_rev_src_routes", MOD_PROTO_TCP,
 407             mod_set_boolean, mod_get_boolean,
 408             {B_FALSE}, {B_FALSE} },
 409 
 410         { "_local_dack_interval", MOD_PROTO_TCP,
 411             mod_set_uint32, mod_get_uint32,
 412             {10*MS, 500*MS, 50*MS}, {50*MS} },
 413 
 414         /* tunable - 50 */
 415         { "_local_dacks_max", MOD_PROTO_TCP,
 416             mod_set_uint32, mod_get_uint32,
 417             {0, 16, 8}, {8} },
 418 
 419         { "ecn", MOD_PROTO_TCP,
 420             mod_set_uint32, mod_get_uint32,
 421             {0, 2, 1}, {1} },
 422 
 423         { "_rst_sent_rate_enabled", MOD_PROTO_TCP,
 424             mod_set_boolean, mod_get_boolean,
 425             {B_TRUE}, {B_TRUE} },
 426 
 427         { "_rst_sent_rate", MOD_PROTO_TCP,
 428             mod_set_uint32, mod_get_uint32,
 429             {0, UINT32_MAX, 40}, {40} },
 430 
 431         { "_push_timer_interval", MOD_PROTO_TCP,
 432             mod_set_uint32, mod_get_uint32,
 433             {0, 100*MS, 50*MS}, {50*MS} },
 434 
 435         { "_use_smss_as_mss_opt", MOD_PROTO_TCP,
 436             mod_set_boolean, mod_get_boolean,
 437             {B_FALSE}, {B_FALSE} },
 438 
 439         { "_keepalive_abort_interval", MOD_PROTO_TCP,
 440             mod_set_uint32, mod_get_uint32,
 441             {0, UINT32_MAX, 8*MINUTES}, {8*MINUTES} },
 442 
 443         /*
 444          * tcp_wroff_xtra is the extra space in front of TCP/IP header for link
 445          * layer header.  It has to be a multiple of 8.
 446          */
 447         { "_wroff_xtra", MOD_PROTO_TCP,
 448             mod_set_aligned, mod_get_uint32,
 449             {0, 256, 32}, {32} },
 450 
 451         { "_dev_flow_ctl", MOD_PROTO_TCP,
 452             mod_set_boolean, mod_get_boolean,
 453             {B_FALSE}, {B_FALSE} },
 454 
 455         { "_reass_timeout", MOD_PROTO_TCP,
 456             mod_set_uint32, mod_get_uint32,
 457             {0, UINT32_MAX, 100*SECONDS}, {100*SECONDS} },
 458 
 459         /* tunable - 60 */
 460         { "extra_priv_ports", MOD_PROTO_TCP,
 461             mod_set_extra_privports, mod_get_extra_privports,
 462             {1, ULP_MAX_PORT, 0}, {0} },
 463 
 464         { "_1948_phrase", MOD_PROTO_TCP,
 465             tcp_set_1948phrase, NULL, {0}, {0} },
 466 
 467         { "_listener_limit_conf", MOD_PROTO_TCP,
 468             NULL, tcp_listener_conf_get, {0}, {0} },
 469 
 470         { "_listener_limit_conf_add", MOD_PROTO_TCP,
 471             tcp_listener_conf_add, NULL, {0}, {0} },
 472 
 473         { "_listener_limit_conf_del", MOD_PROTO_TCP,
 474             tcp_listener_conf_del, NULL, {0}, {0} },
 475 
 476         { "_iss_incr", MOD_PROTO_TCP,
 477             mod_set_uint32, mod_get_uint32,
 478             {1, ISS_INCR, ISS_INCR},
 479             {ISS_INCR} },
 480 
 481         { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
 482 
 483         { NULL, 0, NULL, NULL, {0}, {0} }
 484 };
 485 
 486 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);