Print this page
11553 Want pluggable TCP congestion control algorithms
Portions contributed by: Cody Peter Mello <cody.mello@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Robert Mustacchi <robert.mustacchi@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/inet/tcp/tcp_tunables.c
          +++ new/usr/src/uts/common/inet/tcp/tcp_tunables.c
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright 2016 Joyent, Inc.
  24   24   * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  25      - * Copyright (c) 2013 by Delphix. All rights reserved.
       25 + * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  26   26   */
  27   27  /* Copyright (c) 1990 Mentat Inc. */
  28   28  
  29   29  #include <inet/ip.h>
  30   30  #include <inet/tcp_impl.h>
       31 +#include <inet/cc.h>
  31   32  #include <sys/multidata.h>
  32   33  #include <sys/sunddi.h>
  33   34  
  34   35  /* Max size IP datagram is 64k - 1 */
  35   36  #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))
  36   37  #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
  37   38  
  38   39  /* Max of the above */
  39   40  #define TCP_MSS_MAX             TCP_MSS_MAX_IPV4
  40   41  
       42 +typedef struct {
       43 +        char *ccn_buf;
       44 +        uint_t ccn_bufsize;
       45 +        uint_t ccn_bytes;
       46 +} tcp_copy_ccname_t;
       47 +
  41   48  /*
  42   49   * Set the RFC 1948 pass phrase
  43   50   */
  44   51  /* ARGSUSED */
  45   52  static int
  46   53  tcp_set_1948phrase(netstack_t *stack,  cred_t *cr, mod_prop_info_t *pinfo,
  47   54      const char *ifname, const void* pr_val, uint_t flags)
  48   55  {
  49   56          if (flags & MOD_PROP_DEFAULT)
  50   57                  return (ENOTSUP);
↓ open down ↓ 181 lines elided ↑ open up ↑
 232  239  
 233  240          if ((err = mod_uint32_value(pval, pinfo, flags, &new_value)) != 0)
 234  241                  return (err);
 235  242          /* mod_uint32_value() + pinfo guarantees we're in TCP port range. */
 236  243          if ((uint32_t)new_value < tcps->tcps_smallest_anon_port)
 237  244                  return (ERANGE);
 238  245          pinfo->prop_cur_uval = (uint32_t)new_value;
 239  246          return (0);
 240  247  }
 241  248  
      249 +/* ARGSUSED */
      250 +static int
      251 +tcp_set_cc_algorithm(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
      252 +    const char *ifname, const void *pval, uint_t flags)
      253 +{
      254 +        tcp_stack_t *tcps = stack->netstack_tcp;
      255 +        char *name = (flags & MOD_PROP_DEFAULT) ?
      256 +            CC_DEFAULT_ALGO_NAME : (char *)pval;
      257 +        struct cc_algo *algo = cc_load_algo(name);
      258 +
      259 +        if (algo == NULL) {
      260 +                return (EINVAL);
      261 +        }
      262 +
      263 +        tcps->tcps_default_cc_algo = algo;
      264 +
      265 +        return (0);
      266 +}
      267 +
      268 +static int
      269 +tcp_copy_ccname(void *data, struct cc_algo *algo)
      270 +{
      271 +        tcp_copy_ccname_t *cd = data;
      272 +        char *sep = cd->ccn_bytes > 0 ? "," : "";
      273 +        size_t avail = 0;
      274 +
      275 +        if (cd->ccn_bytes < cd->ccn_bufsize) {
      276 +                avail = cd->ccn_bufsize - cd->ccn_bytes;
      277 +        }
      278 +
      279 +        cd->ccn_bytes += snprintf(cd->ccn_buf + cd->ccn_bytes, avail,
      280 +            "%s%s", sep, algo->name);
      281 +
      282 +        return (cd->ccn_bytes >= cd->ccn_bufsize ? ENOBUFS : 0);
      283 +}
      284 +
      285 +/* ARGSUSED */
      286 +static int
      287 +tcp_get_cc_algorithm(netstack_t *stack, mod_prop_info_t *pinfo,
      288 +    const char *ifname, void *pval, uint_t psize, uint_t flags)
      289 +{
      290 +        size_t nbytes;
      291 +
      292 +        if (flags & MOD_PROP_POSSIBLE) {
      293 +                tcp_copy_ccname_t cd = { pval, psize, 0 };
      294 +                return (cc_walk_algos(tcp_copy_ccname, &cd));
      295 +        } else if (flags & MOD_PROP_PERM) {
      296 +                nbytes = snprintf(pval, psize, "%u", MOD_PROP_PERM_RW);
      297 +        } else if (flags & MOD_PROP_DEFAULT) {
      298 +                nbytes = snprintf(pval, psize, "%s", CC_DEFAULT_ALGO_NAME);
      299 +        } else {
      300 +                nbytes = snprintf(pval, psize, "%s",
      301 +                    stack->netstack_tcp->tcps_default_cc_algo->name);
      302 +        }
      303 +        if (nbytes >= psize)
      304 +                return (ENOBUFS);
      305 +        return (0);
      306 +}
      307 +
 242  308  /*
 243  309   * All of these are alterable, within the min/max values given, at run time.
 244  310   *
 245  311   * Note: All those tunables which do not start with "_" are Committed and
 246  312   * therefore are public. See PSARC 2010/080.
 247  313   */
 248  314  mod_prop_info_t tcp_propinfo_tbl[] = {
 249  315          /* tunable - 0 */
 250  316          { "_time_wait_interval", MOD_PROTO_TCP,
 251  317              mod_set_uint32, mod_get_uint32,
↓ open down ↓ 268 lines elided ↑ open up ↑
 520  586              tcp_listener_conf_add, NULL, {0}, {0} },
 521  587  
 522  588          { "_listener_limit_conf_del", MOD_PROTO_TCP,
 523  589              tcp_listener_conf_del, NULL, {0}, {0} },
 524  590  
 525  591          { "_iss_incr", MOD_PROTO_TCP,
 526  592              mod_set_uint32, mod_get_uint32,
 527  593              {1, ISS_INCR, ISS_INCR},
 528  594              {ISS_INCR} },
 529  595  
      596 +        { "congestion_control", MOD_PROTO_TCP,
      597 +            tcp_set_cc_algorithm, tcp_get_cc_algorithm, {0}, {0} },
      598 +
      599 +        /* RFC 3465 - TCP Congestion Control with Appropriate Byte Counting */
      600 +        { "_abc", MOD_PROTO_TCP,
      601 +            mod_set_boolean, mod_get_boolean, {B_TRUE}, {B_TRUE} },
      602 +
      603 +        /* "L" value from RFC 3465 */
      604 +        { "_abc_l_var", MOD_PROTO_TCP,
      605 +            mod_set_uint32, mod_get_uint32, {1, UINT32_MAX, 2}, {2} },
      606 +
 530  607          { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
 531  608  
 532  609          { NULL, 0, NULL, NULL, {0}, {0} }
 533  610  };
 534  611  
 535  612  int tcp_propinfo_count = A_CNT(tcp_propinfo_tbl);
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX