Print this page
11553 Want pluggable TCP congestion control algorithms
Portions contributed by: Cody Peter Mello <cody.mello@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Robert Mustacchi <robert.mustacchi@joyent.com>

*** 20,35 **** */ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2016 Joyent, Inc. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. ! * Copyright (c) 2013 by Delphix. All rights reserved. */ /* Copyright (c) 1990 Mentat Inc. */ #include <inet/ip.h> #include <inet/tcp_impl.h> #include <sys/multidata.h> #include <sys/sunddi.h> /* Max size IP datagram is 64k - 1 */ #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t))) --- 20,36 ---- */ /* * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2016 Joyent, Inc. * Copyright 2013 Nexenta Systems, Inc. All rights reserved. ! * Copyright (c) 2012, 2017 by Delphix. All rights reserved. */ /* Copyright (c) 1990 Mentat Inc. */ #include <inet/ip.h> #include <inet/tcp_impl.h> + #include <inet/cc.h> #include <sys/multidata.h> #include <sys/sunddi.h> /* Max size IP datagram is 64k - 1 */ #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
*** 36,45 **** --- 37,52 ---- #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t))) /* Max of the above */ #define TCP_MSS_MAX TCP_MSS_MAX_IPV4 + typedef struct { + char *ccn_buf; + uint_t ccn_bufsize; + uint_t ccn_bytes; + } tcp_copy_ccname_t; + /* * Set the RFC 1948 pass phrase */ /* ARGSUSED */ static int
*** 237,246 **** --- 244,312 ---- return (ERANGE); pinfo->prop_cur_uval = (uint32_t)new_value; return (0); } + /* ARGSUSED */ + static int + tcp_set_cc_algorithm(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo, + const char *ifname, const void *pval, uint_t flags) + { + tcp_stack_t *tcps = stack->netstack_tcp; + char *name = (flags & MOD_PROP_DEFAULT) ? + CC_DEFAULT_ALGO_NAME : (char *)pval; + struct cc_algo *algo = cc_load_algo(name); + + if (algo == NULL) { + return (EINVAL); + } + + tcps->tcps_default_cc_algo = algo; + + return (0); + } + + static int + tcp_copy_ccname(void *data, struct cc_algo *algo) + { + tcp_copy_ccname_t *cd = data; + char *sep = cd->ccn_bytes > 0 ? "," : ""; + size_t avail = 0; + + if (cd->ccn_bytes < cd->ccn_bufsize) { + avail = cd->ccn_bufsize - cd->ccn_bytes; + } + + cd->ccn_bytes += snprintf(cd->ccn_buf + cd->ccn_bytes, avail, + "%s%s", sep, algo->name); + + return (cd->ccn_bytes >= cd->ccn_bufsize ? ENOBUFS : 0); + } + + /* ARGSUSED */ + static int + tcp_get_cc_algorithm(netstack_t *stack, mod_prop_info_t *pinfo, + const char *ifname, void *pval, uint_t psize, uint_t flags) + { + size_t nbytes; + + if (flags & MOD_PROP_POSSIBLE) { + tcp_copy_ccname_t cd = { pval, psize, 0 }; + return (cc_walk_algos(tcp_copy_ccname, &cd)); + } else if (flags & MOD_PROP_PERM) { + nbytes = snprintf(pval, psize, "%u", MOD_PROP_PERM_RW); + } else if (flags & MOD_PROP_DEFAULT) { + nbytes = snprintf(pval, psize, "%s", CC_DEFAULT_ALGO_NAME); + } else { + nbytes = snprintf(pval, psize, "%s", + stack->netstack_tcp->tcps_default_cc_algo->name); + } + if (nbytes >= psize) + return (ENOBUFS); + return (0); + } + /* * All of these are alterable, within the min/max values given, at run time. * * Note: All those tunables which do not start with "_" are Committed and * therefore are public. See PSARC 2010/080.
*** 525,534 **** --- 591,611 ---- { "_iss_incr", MOD_PROTO_TCP, mod_set_uint32, mod_get_uint32, {1, ISS_INCR, ISS_INCR}, {ISS_INCR} }, + { "congestion_control", MOD_PROTO_TCP, + tcp_set_cc_algorithm, tcp_get_cc_algorithm, {0}, {0} }, + + /* RFC 3465 - TCP Congestion Control with Appropriate Byte Counting */ + { "_abc", MOD_PROTO_TCP, + mod_set_boolean, mod_get_boolean, {B_TRUE}, {B_TRUE} }, + + /* "L" value from RFC 3465 */ + { "_abc_l_var", MOD_PROTO_TCP, + mod_set_uint32, mod_get_uint32, {1, UINT32_MAX, 2}, {2} }, + { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} }, { NULL, 0, NULL, NULL, {0}, {0} } };