Print this page
11553 Want pluggable TCP congestion control algorithms
Portions contributed by: Cody Peter Mello <cody.mello@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Robert Mustacchi <robert.mustacchi@joyent.com>


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2016 Joyent, Inc.
  24  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  25  * Copyright (c) 2013 by Delphix. All rights reserved.
  26  */
  27 /* Copyright (c) 1990 Mentat Inc. */
  28 
  29 #include <inet/ip.h>
  30 #include <inet/tcp_impl.h>

  31 #include <sys/multidata.h>
  32 #include <sys/sunddi.h>
  33 
  34 /* Max size IP datagram is 64k - 1 */
  35 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
  36 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
  37 
  38 /* Max of the above */
  39 #define TCP_MSS_MAX             TCP_MSS_MAX_IPV4
  40 






  41 /*
  42  * Set the RFC 1948 pass phrase
  43  */
  44 /* ARGSUSED */
  45 static int
  46 tcp_set_1948phrase(netstack_t *stack,  cred_t *cr, mod_prop_info_t *pinfo,
  47     const char *ifname, const void* pr_val, uint_t flags)
  48 {
  49         if (flags & MOD_PROP_DEFAULT)
  50                 return (ENOTSUP);
  51 
  52         /*
  53          * Basically, value contains a new pass phrase.  Pass it along!
  54          */
  55         tcp_iss_key_init((uint8_t *)pr_val, strlen(pr_val),
  56             stack->netstack_tcp);
  57         return (0);
  58 }
  59 
  60 /*


 222 }
 223 
 224 /* ARGSUSED */
 225 static int
 226 tcp_largest_anon_set(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
 227     const char *ifname, const void *pval, uint_t flags)
 228 {
 229         unsigned long new_value;
 230         tcp_stack_t *tcps = stack->netstack_tcp;
 231         int err;
 232 
 233         if ((err = mod_uint32_value(pval, pinfo, flags, &new_value)) != 0)
 234                 return (err);
 235         /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */
 236         if ((uint32_t)new_value < tcps->tcps_smallest_anon_port)
 237                 return (ERANGE);
 238         pinfo->prop_cur_uval = (uint32_t)new_value;
 239         return (0);
 240 }
 241 



























































 242 /*
 243  * All of these are alterable, within the min/max values given, at run time.
 244  *
 245  * Note: All those tunables which do not start with "_" are Committed and
 246  * therefore are public. See PSARC 2010/080.
 247  */
 248 mod_prop_info_t tcp_propinfo_tbl[] = {
 249         /* tunable - 0 */
 250         { "_time_wait_interval", MOD_PROTO_TCP,
 251             mod_set_uint32, mod_get_uint32,
 252             {1*SECONDS, TCP_TIME_WAIT_MAX, 1*MINUTES}, {1*MINUTES} },
 253 
 254         { "_conn_req_max_q", MOD_PROTO_TCP,
 255             mod_set_uint32, mod_get_uint32,
 256             {1, UINT32_MAX, 128}, {128} },
 257 
 258         { "_conn_req_max_q0", MOD_PROTO_TCP,
 259             mod_set_uint32, mod_get_uint32,
 260             {0, UINT32_MAX, 1024}, {1024} },
 261 


 510             mod_set_extra_privports, mod_get_extra_privports,
 511             {1, ULP_MAX_PORT, 0}, {0} },
 512 
 513         { "_1948_phrase", MOD_PROTO_TCP,
 514             tcp_set_1948phrase, NULL, {0}, {0} },
 515 
 516         { "_listener_limit_conf", MOD_PROTO_TCP,
 517             NULL, tcp_listener_conf_get, {0}, {0} },
 518 
 519         { "_listener_limit_conf_add", MOD_PROTO_TCP,
 520             tcp_listener_conf_add, NULL, {0}, {0} },
 521 
 522         { "_listener_limit_conf_del", MOD_PROTO_TCP,
 523             tcp_listener_conf_del, NULL, {0}, {0} },
 524 
 525         { "_iss_incr", MOD_PROTO_TCP,
 526             mod_set_uint32, mod_get_uint32,
 527             {1, ISS_INCR, ISS_INCR},
 528             {ISS_INCR} },
 529 











 530         { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
 531 
 532         { NULL, 0, NULL, NULL, {0}, {0} }
 533 };
 534 
 535 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2016 Joyent, Inc.
  24  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  25  * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  26  */
  27 /* Copyright (c) 1990 Mentat Inc. */
  28 
  29 #include <inet/ip.h>
  30 #include <inet/tcp_impl.h>
  31 #include <inet/cc.h>
  32 #include <sys/multidata.h>
  33 #include <sys/sunddi.h>
  34 
  35 /* Max size IP datagram is 64k - 1 */
  36 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
  37 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
  38 
  39 /* Max of the above */
  40 #define TCP_MSS_MAX             TCP_MSS_MAX_IPV4
  41 
  42 typedef struct {
  43         char *ccn_buf;
  44         uint_t ccn_bufsize;
  45         uint_t ccn_bytes;
  46 } tcp_copy_ccname_t;
  47 
  48 /*
  49  * Set the RFC 1948 pass phrase
  50  */
  51 /* ARGSUSED */
  52 static int
  53 tcp_set_1948phrase(netstack_t *stack,  cred_t *cr, mod_prop_info_t *pinfo,
  54     const char *ifname, const void* pr_val, uint_t flags)
  55 {
  56         if (flags & MOD_PROP_DEFAULT)
  57                 return (ENOTSUP);
  58 
  59         /*
  60          * Basically, value contains a new pass phrase.  Pass it along!
  61          */
  62         tcp_iss_key_init((uint8_t *)pr_val, strlen(pr_val),
  63             stack->netstack_tcp);
  64         return (0);
  65 }
  66 
  67 /*


 229 }
 230 
 231 /* ARGSUSED */
 232 static int
 233 tcp_largest_anon_set(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
 234     const char *ifname, const void *pval, uint_t flags)
 235 {
 236         unsigned long new_value;
 237         tcp_stack_t *tcps = stack->netstack_tcp;
 238         int err;
 239 
 240         if ((err = mod_uint32_value(pval, pinfo, flags, &new_value)) != 0)
 241                 return (err);
 242         /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */
 243         if ((uint32_t)new_value < tcps->tcps_smallest_anon_port)
 244                 return (ERANGE);
 245         pinfo->prop_cur_uval = (uint32_t)new_value;
 246         return (0);
 247 }
 248 
 249 /* ARGSUSED */
 250 static int
 251 tcp_set_cc_algorithm(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
 252     const char *ifname, const void *pval, uint_t flags)
 253 {
 254         tcp_stack_t *tcps = stack->netstack_tcp;
 255         char *name = (flags & MOD_PROP_DEFAULT) ?
 256             CC_DEFAULT_ALGO_NAME : (char *)pval;
 257         struct cc_algo *algo = cc_load_algo(name);
 258 
 259         if (algo == NULL) {
 260                 return (EINVAL);
 261         }
 262 
 263         tcps->tcps_default_cc_algo = algo;
 264 
 265         return (0);
 266 }
 267 
 268 static int
 269 tcp_copy_ccname(void *data, struct cc_algo *algo)
 270 {
 271         tcp_copy_ccname_t *cd = data;
 272         char *sep = cd->ccn_bytes > 0 ? "," : "";
 273         size_t avail = 0;
 274 
 275         if (cd->ccn_bytes < cd->ccn_bufsize) {
 276                 avail = cd->ccn_bufsize - cd->ccn_bytes;
 277         }
 278 
 279         cd->ccn_bytes += snprintf(cd->ccn_buf + cd->ccn_bytes, avail,
 280             "%s%s", sep, algo->name);
 281 
 282         return (cd->ccn_bytes >= cd->ccn_bufsize ? ENOBUFS : 0);
 283 }
 284 
 285 /* ARGSUSED */
 286 static int
 287 tcp_get_cc_algorithm(netstack_t *stack, mod_prop_info_t *pinfo,
 288     const char *ifname, void *pval, uint_t psize, uint_t flags)
 289 {
 290         size_t nbytes;
 291 
 292         if (flags & MOD_PROP_POSSIBLE) {
 293                 tcp_copy_ccname_t cd = { pval, psize, 0 };
 294                 return (cc_walk_algos(tcp_copy_ccname, &cd));
 295         } else if (flags & MOD_PROP_PERM) {
 296                 nbytes = snprintf(pval, psize, "%u", MOD_PROP_PERM_RW);
 297         } else if (flags & MOD_PROP_DEFAULT) {
 298                 nbytes = snprintf(pval, psize, "%s", CC_DEFAULT_ALGO_NAME);
 299         } else {
 300                 nbytes = snprintf(pval, psize, "%s",
 301                     stack->netstack_tcp->tcps_default_cc_algo->name);
 302         }
 303         if (nbytes >= psize)
 304                 return (ENOBUFS);
 305         return (0);
 306 }
 307 
 308 /*
 309  * All of these are alterable, within the min/max values given, at run time.
 310  *
 311  * Note: All those tunables which do not start with "_" are Committed and
 312  * therefore are public. See PSARC 2010/080.
 313  */
 314 mod_prop_info_t tcp_propinfo_tbl[] = {
 315         /* tunable - 0 */
 316         { "_time_wait_interval", MOD_PROTO_TCP,
 317             mod_set_uint32, mod_get_uint32,
 318             {1*SECONDS, TCP_TIME_WAIT_MAX, 1*MINUTES}, {1*MINUTES} },
 319 
 320         { "_conn_req_max_q", MOD_PROTO_TCP,
 321             mod_set_uint32, mod_get_uint32,
 322             {1, UINT32_MAX, 128}, {128} },
 323 
 324         { "_conn_req_max_q0", MOD_PROTO_TCP,
 325             mod_set_uint32, mod_get_uint32,
 326             {0, UINT32_MAX, 1024}, {1024} },
 327 


 576             mod_set_extra_privports, mod_get_extra_privports,
 577             {1, ULP_MAX_PORT, 0}, {0} },
 578 
 579         { "_1948_phrase", MOD_PROTO_TCP,
 580             tcp_set_1948phrase, NULL, {0}, {0} },
 581 
 582         { "_listener_limit_conf", MOD_PROTO_TCP,
 583             NULL, tcp_listener_conf_get, {0}, {0} },
 584 
 585         { "_listener_limit_conf_add", MOD_PROTO_TCP,
 586             tcp_listener_conf_add, NULL, {0}, {0} },
 587 
 588         { "_listener_limit_conf_del", MOD_PROTO_TCP,
 589             tcp_listener_conf_del, NULL, {0}, {0} },
 590 
 591         { "_iss_incr", MOD_PROTO_TCP,
 592             mod_set_uint32, mod_get_uint32,
 593             {1, ISS_INCR, ISS_INCR},
 594             {ISS_INCR} },
 595 
 596         { "congestion_control", MOD_PROTO_TCP,
 597             tcp_set_cc_algorithm, tcp_get_cc_algorithm, {0}, {0} },
 598 
 599         /* RFC 3465 - TCP Congestion Control with Appropriate Byte Counting */
 600         { "_abc", MOD_PROTO_TCP,
 601             mod_set_boolean, mod_get_boolean, {B_TRUE}, {B_TRUE} },
 602 
 603         /* "L" value from RFC 3465 */
 604         { "_abc_l_var", MOD_PROTO_TCP,
 605             mod_set_uint32, mod_get_uint32, {1, UINT32_MAX, 2}, {2} },
 606 
 607         { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
 608 
 609         { NULL, 0, NULL, NULL, {0}, {0} }
 610 };
 611 
 612 int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);