Print this page
11553 Want pluggable TCP congestion control algorithms
Portions contributed by: Cody Peter Mello <cody.mello@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Robert Mustacchi <robert.mustacchi@joyent.com>

@@ -20,16 +20,17 @@
  */
 /*
  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2016 Joyent, Inc.
  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  */
 /* Copyright (c) 1990 Mentat Inc. */
 
 #include <inet/ip.h>
 #include <inet/tcp_impl.h>
+#include <inet/cc.h>
 #include <sys/multidata.h>
 #include <sys/sunddi.h>
 
 /* Max size IP datagram is 64k - 1 */
 #define TCP_MSS_MAX_IPV4 (IP_MAXPACKET - (sizeof (ipha_t) + sizeof (tcpha_t)))

@@ -36,10 +37,16 @@
 #define TCP_MSS_MAX_IPV6 (IP_MAXPACKET - (sizeof (ip6_t) + sizeof (tcpha_t)))
 
 /* Max of the above */
 #define TCP_MSS_MAX             TCP_MSS_MAX_IPV4
 
+typedef struct {
+        char *ccn_buf;
+        uint_t ccn_bufsize;
+        uint_t ccn_bytes;
+} tcp_copy_ccname_t;
+
 /*
  * Set the RFC 1948 pass phrase
  */
 /* ARGSUSED */
 static int

@@ -237,10 +244,69 @@
                 return (ERANGE);
         pinfo->prop_cur_uval = (uint32_t)new_value;
         return (0);
 }
 
+/* ARGSUSED */
+static int
+tcp_set_cc_algorithm(netstack_t *stack, cred_t *cr, mod_prop_info_t *pinfo,
+    const char *ifname, const void *pval, uint_t flags)
+{
+        tcp_stack_t *tcps = stack->netstack_tcp;
+        char *name = (flags & MOD_PROP_DEFAULT) ?
+            CC_DEFAULT_ALGO_NAME : (char *)pval;
+        struct cc_algo *algo = cc_load_algo(name);
+
+        if (algo == NULL) {
+                return (EINVAL);
+        }
+
+        tcps->tcps_default_cc_algo = algo;
+
+        return (0);
+}
+
+static int
+tcp_copy_ccname(void *data, struct cc_algo *algo)
+{
+        tcp_copy_ccname_t *cd = data;
+        char *sep = cd->ccn_bytes > 0 ? "," : "";
+        size_t avail = 0;
+
+        if (cd->ccn_bytes < cd->ccn_bufsize) {
+                avail = cd->ccn_bufsize - cd->ccn_bytes;
+        }
+
+        cd->ccn_bytes += snprintf(cd->ccn_buf + cd->ccn_bytes, avail,
+            "%s%s", sep, algo->name);
+
+        return (cd->ccn_bytes >= cd->ccn_bufsize ? ENOBUFS : 0);
+}
+
+/* ARGSUSED */
+static int
+tcp_get_cc_algorithm(netstack_t *stack, mod_prop_info_t *pinfo,
+    const char *ifname, void *pval, uint_t psize, uint_t flags)
+{
+        size_t nbytes;
+
+        if (flags & MOD_PROP_POSSIBLE) {
+                tcp_copy_ccname_t cd = { pval, psize, 0 };
+                return (cc_walk_algos(tcp_copy_ccname, &cd));
+        } else if (flags & MOD_PROP_PERM) {
+                nbytes = snprintf(pval, psize, "%u", MOD_PROP_PERM_RW);
+        } else if (flags & MOD_PROP_DEFAULT) {
+                nbytes = snprintf(pval, psize, "%s", CC_DEFAULT_ALGO_NAME);
+        } else {
+                nbytes = snprintf(pval, psize, "%s",
+                    stack->netstack_tcp->tcps_default_cc_algo->name);
+        }
+        if (nbytes >= psize)
+                return (ENOBUFS);
+        return (0);
+}
+
 /*
  * All of these are alterable, within the min/max values given, at run time.
  *
  * Note: All those tunables which do not start with "_" are Committed and
  * therefore are public. See PSARC 2010/080.

@@ -525,10 +591,21 @@
         { "_iss_incr", MOD_PROTO_TCP,
             mod_set_uint32, mod_get_uint32,
             {1, ISS_INCR, ISS_INCR},
             {ISS_INCR} },
 
+        { "congestion_control", MOD_PROTO_TCP,
+            tcp_set_cc_algorithm, tcp_get_cc_algorithm, {0}, {0} },
+
+        /* RFC 3465 - TCP Congestion Control with Appropriate Byte Counting */
+        { "_abc", MOD_PROTO_TCP,
+            mod_set_boolean, mod_get_boolean, {B_TRUE}, {B_TRUE} },
+
+        /* "L" value from RFC 3465 */
+        { "_abc_l_var", MOD_PROTO_TCP,
+            mod_set_uint32, mod_get_uint32, {1, UINT32_MAX, 2}, {2} },
+
         { "?", MOD_PROTO_TCP, NULL, mod_get_allprop, {0}, {0} },
 
         { NULL, 0, NULL, NULL, {0}, {0} }
 };