1 /*
   2  * Copyright (c) 2007-2008
   3  *      Swinburne University of Technology, Melbourne, Australia.
   4  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
   5  * Copyright (c) 2010 The FreeBSD Foundation
   6  * All rights reserved.
   7  * Copyright (c) 2017 by Delphix. All rights reserved.
   8  *
   9  * This software was developed at the Centre for Advanced Internet
  10  * Architectures, Swinburne University of Technology, by Lawrence Stewart and
  11  * James Healy, made possible in part by a grant from the Cisco University
  12  * Research Program Fund at Community Foundation Silicon Valley.
  13  *
  14  * Portions of this software were developed at the Centre for Advanced
  15  * Internet Architectures, Swinburne University of Technology, Melbourne,
  16  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
  17  *
  18  * Redistribution and use in source and binary forms, with or without
  19  * modification, are permitted provided that the following conditions
  20  * are met:
  21  * 1. Redistributions of source code must retain the above copyright
  22  *    notice, this list of conditions and the following disclaimer.
  23  * 2. Redistributions in binary form must reproduce the above copyright
  24  *    notice, this list of conditions and the following disclaimer in the
  25  *    documentation and/or other materials provided with the distribution.
  26  *
  27  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  30  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  37  * SUCH DAMAGE.
  38  *
  39  * $FreeBSD$
  40  */
  41 
  42 /*
  43  * This software was first released in 2007 by James Healy and Lawrence Stewart
  44  * whilst working on the NewTCP research project at Swinburne University of
  45  * Technology's Centre for Advanced Internet Architectures, Melbourne,
  46  * Australia, which was made possible in part by a grant from the Cisco
  47  * University Research Program Fund at Community Foundation Silicon Valley.
  48  * More details are available at:
  49  *   http://caia.swin.edu.au/urp/newtcp/
  50  */
  51 
  52 #ifndef _NETINET_CC_H_
  53 #define _NETINET_CC_H_
  54 
  55 #ifdef  __cplusplus
  56 extern "C" {
  57 #endif
  58 
  59 #include <netinet/tcp.h>
  60 #include <sys/queue.h>
  61 #include <sys/rwlock.h>
  62 
  63 #define CC_ALGO_NAME_MAX        16      /* max congestion control name length */
  64 
  65 #define CC_DEFAULT_ALGO_NAME    "sunreno"
  66 
  67 struct tcp_s;
  68 struct sctp_s;
  69 
  70 /* CC housekeeping functions. */
  71 extern struct cc_algo *cc_load_algo(const char *name);
  72 extern int      cc_register_algo(struct cc_algo *add_cc);
  73 extern int      cc_deregister_algo(struct cc_algo *remove_cc);
  74 
  75 /*
  76  * Wrapper around transport structs that contain same-named congestion
  77  * control variables. Allows algos to be shared amongst multiple CC aware
  78  * transports.
  79  *
  80  * In theory, this code (from FreeBSD) can be used to support pluggable
  81  * congestion control for sctp as well as tcp.  However, the support for sctp
  82  * in FreeBSD is incomplete, and in practice "type" is ignored.  cc_module.h
  83  * provides a CCV macro which implementations can use to get a variable out of
  84  * the protocol-appropriate structure.
  85  *
  86  * If FreeBSD eventually does extend support for pluggable congestion control
  87  * to sctp, we'll need to make sure we're setting "type" appropriately or use
  88  * a definition of CCV that ignores it.
  89  */
  90 struct cc_var {
  91         void            *cc_data; /* Per-connection private algorithm data. */
  92         int             bytes_this_ack; /* # bytes acked by the current ACK. */
  93         int             t_bytes_acked; /* # bytes acked during current RTT */
  94         tcp_seq         curack; /* Most recent ACK. */
  95         uint32_t        flags; /* Flags for cc_var (see below) */
  96         int             type; /* Indicates which ptr is valid in ccvc. */
  97         union ccv_container {
  98                 struct tcp_s    *tcp;
  99                 struct sctp_s   *sctp;
 100         } ccvc;
 101         uint16_t        nsegs; /* # segments coalesced into current chain. */
 102 };
 103 
 104 /*
 105  * cc_var flags.
 106  *
 107  * CCF_ABC_SENTAWND is set when a full congestion window of data has been ACKed
 108  *   according to the Appropriate Byte Counting spec, defined in RFC 3465.
 109  */
 110 #define CCF_ABC_SENTAWND        0x0001  /* ABC counted cwnd worth of bytes? */
 111 #define CCF_CWND_LIMITED        0x0002  /* Are we currently cwnd limited? */
 112 #define CCF_FASTRECOVERY        0x0004  /* in NewReno Fast Recovery */
 113 #define CCF_WASFRECOVERY        0x0008  /* was in NewReno Fast Recovery */
 114 #define CCF_CONGRECOVERY        0x0010  /* congestion recovery mode */
 115 #define CCF_WASCRECOVERY        0x0020  /* was in congestion recovery */
 116 /*
 117  * In slow-start due to a retransmission timeout. This flag is enabled for the
 118  * duration of the slow-start phase.
 119  */
 120 #define CCF_RTO                 0x0040  /* in slow-start due to timeout */
 121 
 122 #define IN_FASTRECOVERY(flags)          (flags & CCF_FASTRECOVERY)
 123 #define ENTER_FASTRECOVERY(flags)       flags |= CCF_FASTRECOVERY
 124 #define EXIT_FASTRECOVERY(flags)        flags &= ~CCF_FASTRECOVERY
 125 
 126 #define IN_CONGRECOVERY(flags)          (flags & CCF_CONGRECOVERY)
 127 #define ENTER_CONGRECOVERY(flags)       flags |= CCF_CONGRECOVERY
 128 #define EXIT_CONGRECOVERY(flags)        flags &= ~CCF_CONGRECOVERY
 129 
 130 #define IN_RECOVERY(flags) (flags & (CCF_CONGRECOVERY | CCF_FASTRECOVERY))
 131 #define ENTER_RECOVERY(flags) flags |= (CCF_CONGRECOVERY | CCF_FASTRECOVERY)
 132 #define EXIT_RECOVERY(flags) flags &= ~(CCF_CONGRECOVERY | CCF_FASTRECOVERY)
 133 
 134 /*
 135  * ACK types passed to the ack_received() hook.
 136  *
 137  * CC_ACK is passed when an ACK acknowledges previously unACKed data.
 138  * CC_DUPACK is passed when a duplicate ACK is received.  The conditions under
 139  *   which an ACK is considered a duplicate ACK are defined in RFC 5681.
 140  */
 141 #define CC_ACK          0x0001  /* Regular in sequence ACK. */
 142 #define CC_DUPACK       0x0002  /* Duplicate ACK. */
 143 #define CC_PARTIALACK   0x0004  /* Not yet. */
 144 #define CC_SACK         0x0008  /* Not yet. */
 145 
 146 /*
 147  * Congestion signal types passed to the cong_signal() hook. The highest order 8
 148  * bits (0x01000000 - 0x80000000) are reserved for CC algos to declare their own
 149  * congestion signal types.
 150  *
 151  * The congestion signals defined here cover the following situations:
 152  * CC_ECN: A packet with an Explicit Congestion Notification was received
 153  *   See RFC 3168.
 154  * CC_RTO: A round-trip timeout occured.
 155  * CC_RTO_ERR: An ACK was received for a sequence number after we fired an RTO
 156  *   for that sequence number
 157  * CC_NDUPACK: Trigger fast retransmit based on the assumption that receiving
 158  *   N duplicate ACKs indicates packet loss rather than reordering.  Fast
 159  *   retransmit is followed by fast recovery.  Fast retransmit and recovery
 160  *   were originally described in RFC 2581 and were updated by RFC3782
 161  *   (NewReno).  In both RFC2581 and RFC3782, N is 3.
 162  */
 163 #define CC_ECN          0x00000001      /* ECN marked packet received. */
 164 #define CC_RTO          0x00000002      /* RTO fired. */
 165 #define CC_RTO_ERR      0x00000004      /* RTO fired in error. */
 166 #define CC_NDUPACK      0x00000008      /* Threshold of dupack's reached. */
 167 
 168 #define CC_SIGPRIVMASK  0xFF000000      /* Mask to check if sig is private. */
 169 
 170 /*
 171  * Structure to hold data and function pointers that together represent a
 172  * congestion control algorithm.
 173  */
 174 struct cc_algo {
 175         char    name[CC_ALGO_NAME_MAX];
 176 
 177         /* Init CC state for a new control block. */
 178         int     (*cb_init)(struct cc_var *ccv);
 179 
 180         /* Cleanup CC state for a terminating control block. */
 181         void    (*cb_destroy)(struct cc_var *ccv);
 182 
 183         /* Init variables for a newly established connection. */
 184         void    (*conn_init)(struct cc_var *ccv);
 185 
 186         /* Called on receipt of an ack. */
 187         void    (*ack_received)(struct cc_var *ccv, uint16_t type);
 188 
 189         /* Called on detection of a congestion signal. */
 190         void    (*cong_signal)(struct cc_var *ccv, uint32_t type);
 191 
 192         /* Called after exiting congestion recovery. */
 193         void    (*post_recovery)(struct cc_var *ccv);
 194 
 195         /* Called when data transfer resumes after an idle period. */
 196         void    (*after_idle)(struct cc_var *ccv);
 197 
 198         STAILQ_ENTRY(cc_algo) entries;
 199 };
 200 
 201 typedef int cc_walk_func_t(void *, struct cc_algo *);
 202 extern int      cc_walk_algos(cc_walk_func_t *, void *);
 203 
 204 /* Macro to obtain the CC algo's struct ptr. */
 205 #define CC_ALGO(tp)     ((tp)->tcp_cc_algo)
 206 
 207 /* Macro to obtain the CC algo's data ptr. */
 208 #define CC_DATA(tp)     ((tp)->tcp_ccv.cc_data)
 209 
 210 #ifdef  __cplusplus
 211 }
 212 #endif
 213 
 214 #endif /* _NETINET_CC_H_ */