Print this page
11554 Want TCP_CONGESTION socket option
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/inet/tcp/tcp_opt_data.c
          +++ new/usr/src/uts/common/inet/tcp/tcp_opt_data.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
  24      - * Copyright 2016 Joyent, Inc.
       24 + * Copyright 2019 Joyent, Inc.
  25   25   * Copyright (c) 2016 by Delphix. All rights reserved.
  26   26   */
  27   27  
  28   28  #include <sys/types.h>
  29   29  #include <sys/stream.h>
  30   30  #define _SUN_TPI_VERSION 2
  31   31  #include <sys/tihdr.h>
  32   32  #include <sys/socket.h>
  33   33  #include <sys/xti_xtiopt.h>
  34   34  #include <sys/xti_inet.h>
  35   35  #include <sys/policy.h>
  36   36  
       37 +#include <inet/cc.h>
  37   38  #include <inet/common.h>
  38   39  #include <netinet/ip6.h>
  39   40  #include <inet/ip.h>
  40   41  
  41   42  #include <netinet/in.h>
  42   43  #include <netinet/tcp.h>
  43   44  #include <inet/optcom.h>
  44   45  #include <inet/proto_set.h>
  45   46  #include <inet/tcp_impl.h>
  46   47  
↓ open down ↓ 87 lines elided ↑ open up ↑
 134  135  { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 135  136  
 136  137  { TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
 137  138  
 138  139  { TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
 139  140  
 140  141  { TCP_RTO_MAX, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
 141  142  
 142  143  { TCP_LINGER2, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 143  144  
      145 +{ TCP_CONGESTION, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
      146 +        OP_VARLEN, CC_ALGO_NAME_MAX, 0 },
      147 +
 144  148  { IP_OPTIONS,   IPPROTO_IP, OA_RW, OA_RW, OP_NP,
 145  149          (OP_VARLEN|OP_NODEFAULT),
 146  150          IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
 147  151  { T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
 148  152          (OP_VARLEN|OP_NODEFAULT),
 149  153          IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
 150  154  
 151  155  { IP_TOS,       IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 152  156  { T_IP_TOS,     IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
 153  157  { IP_TTL,       IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
↓ open down ↓ 272 lines elided ↑ open up ↑
 426  430                  /*
 427  431                   * TCP_KEEPINTVL expects value in seconds, but
 428  432                   * tcp_ka_rinterval is in milliseconds.
 429  433                   */
 430  434                  case TCP_KEEPINTVL:
 431  435                          *i1 = tcp->tcp_ka_rinterval / 1000;
 432  436                          return (sizeof (int));
 433  437                  case TCP_KEEPALIVE_ABORT_THRESHOLD:
 434  438                          *i1 = tcp->tcp_ka_abort_thres;
 435  439                          return (sizeof (int));
      440 +                case TCP_CONGESTION: {
      441 +                        size_t len = strlcpy((char *)ptr, CC_ALGO(tcp)->name,
      442 +                            CC_ALGO_NAME_MAX);
      443 +                        if (len >= CC_ALGO_NAME_MAX)
      444 +                                return (-1);
      445 +                        return (len + 1);
      446 +                }
 436  447                  case TCP_CORK:
 437  448                          *i1 = tcp->tcp_cork;
 438  449                          return (sizeof (int));
 439  450                  case TCP_RTO_INITIAL:
 440  451                          *i1 = tcp->tcp_rto_initial;
 441  452                          return (sizeof (uint32_t));
 442  453                  case TCP_RTO_MIN:
 443  454                          *i1 = tcp->tcp_rto_min;
 444  455                          return (sizeof (uint32_t));
 445  456                  case TCP_RTO_MAX:
↓ open down ↓ 401 lines elided ↑ open up ↑
 847  858                                      *i1 >
 848  859                                      tcps->tcps_keepalive_abort_interval_high) {
 849  860                                          *outlenp = 0;
 850  861                                          return (EINVAL);
 851  862                                  }
 852  863                                  tcp->tcp_ka_abort_thres = *i1;
 853  864                                  tcp->tcp_ka_cnt = 0;
 854  865                                  tcp->tcp_ka_rinterval = 0;
 855  866                          }
 856  867                          break;
      868 +                case TCP_CONGESTION: {
      869 +                        struct cc_algo *algo;
      870 +
      871 +                        if (checkonly) {
      872 +                                break;
      873 +                        }
      874 +
      875 +                        /*
      876 +                         * Make sure the string is NUL-terminated. Some
      877 +                         * consumers pass only the number of characters
      878 +                         * in the string, and don't include the NUL
      879 +                         * terminator, so we set it for them.
      880 +                         */
      881 +                        if (inlen < CC_ALGO_NAME_MAX) {
      882 +                                invalp[inlen] = '\0';
      883 +                        }
      884 +                        invalp[CC_ALGO_NAME_MAX - 1] = '\0';
      885 +
      886 +                        if ((algo = cc_load_algo((char *)invalp)) == NULL) {
      887 +                                return (ENOENT);
      888 +                        }
      889 +
      890 +                        if (CC_ALGO(tcp)->cb_destroy != NULL) {
      891 +                                CC_ALGO(tcp)->cb_destroy(&tcp->tcp_ccv);
      892 +                        }
      893 +
      894 +                        CC_DATA(tcp) = NULL;
      895 +                        CC_ALGO(tcp) = algo;
      896 +
      897 +                        if (CC_ALGO(tcp)->cb_init != NULL) {
      898 +                                VERIFY0(CC_ALGO(tcp)->cb_init(&tcp->tcp_ccv));
      899 +                        }
      900 +
      901 +                        break;
      902 +                }
 857  903                  case TCP_CORK:
 858  904                          if (!checkonly) {
 859  905                                  /*
 860  906                                   * if tcp->tcp_cork was set and is now
 861  907                                   * being unset, we have to make sure that
 862  908                                   * the remaining data gets sent out. Also
 863  909                                   * unset tcp->tcp_cork so that tcp_wput_data()
 864  910                                   * can send data even if it is less than mss
 865  911                                   */
 866  912                                  if (tcp->tcp_cork && onoff == 0 &&
↓ open down ↓ 241 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX