Print this page
11546 Track TCP round-trip time in nanoseconds
Portions contributed by: Cody Peter Mello <cody.mello@joyent.com>
Portions contributed by: Brandon Baker <bbaker@delphix.com>
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
*** 18,30 ****
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
! * Copyright 2016 Joyent, Inc.
* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
! * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
*/
#ifndef _INET_TCP_IMPL_H
#define _INET_TCP_IMPL_H
--- 18,30 ----
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
! * Copyright 2019 Joyent, Inc.
* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
! * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
*/
#ifndef _INET_TCP_IMPL_H
#define _INET_TCP_IMPL_H
*** 298,318 ****
((ip6_t *)(iph))->ip6_vcf &= htonl(0xFFCFFFFF); \
((ip6_t *)(iph))->ip6_vcf |= htonl(IPH_ECN_ECT0 << 20); \
}
/*
- * Set tcp_rto with boundary checking.
- */
- #define TCP_SET_RTO(tcp, rto) \
- if ((rto) < (tcp)->tcp_rto_min) \
- (tcp)->tcp_rto = (tcp)->tcp_rto_min; \
- else if ((rto) > (tcp)->tcp_rto_max) \
- (tcp)->tcp_rto = (tcp)->tcp_rto_max; \
- else \
- (tcp)->tcp_rto = (rto);
-
- /*
* TCP options struct returned from tcp_parse_options.
*/
typedef struct tcp_opt_s {
uint32_t tcp_opt_mss;
uint32_t tcp_opt_wscale;
--- 298,307 ----
*** 572,581 ****
--- 561,625 ----
#define tcps_wroff_xtra tcps_propinfo_tbl[57].prop_cur_uval
#define tcps_dev_flow_ctl tcps_propinfo_tbl[58].prop_cur_bval
#define tcps_reass_timeout tcps_propinfo_tbl[59].prop_cur_uval
#define tcps_iss_incr tcps_propinfo_tbl[65].prop_cur_uval
+
+ /*
+ * As defined in RFC 6298, the RTO is the average estimates (SRTT) plus a
+ * multiple of the deviation estimates (K * RTTVAR):
+ *
+ * RTO = SRTT + max(G, K * RTTVAR)
+ *
+ * K is defined in the RFC as 4, and G is the clock granularity. We constrain
+ * the minimum mean deviation to TCP_SD_MIN when processing new RTTs, so this
+ * becomes:
+ *
+ * RTO = SRTT + 4 * RTTVAR
+ *
+ * In practice, however, we make several additions to it. As we use a finer
+ * grained clock than BSD and update RTO for every ACK, we add in another 1/4 of
+ * RTT to the deviation of RTO to accommodate burstiness of 1/4 of window size:
+ *
+ * RTO = SRTT + (SRTT / 4) + 4 * RTTVAR
+ *
+ * Since tcp_rtt_sa is 8 times the SRTT, and tcp_rtt_sd is 4 times the RTTVAR,
+ * this becomes:
+ *
+ * RTO = (tcp_rtt_sa / 8) + ((tcp_rtt_sa / 8) / 4) + tcp_rtt_sd
+ * RTO = (tcp_rtt_sa / 2^3) + (tcp_rtt_sa / 2^5) + tcp_rtt_sd
+ * RTO = (tcp_rtt_sa >> 3) + (tcp_rtt_sa >> 5) + tcp_rtt_sd
+ *
+ * The "tcp_rexmit_interval_extra" and "tcp_conn_grace_period" tunables are
+ * used to help account for extreme environments where the algorithm fails to
+ * work; by default they should be 0. (The latter tunable is only used for
+ * calculating the intial RTO, and so is optionally passed in as "extra".) We
+ * add them here:
+ *
+ * RTO = (tcp_rtt_sa >> 3) + (tcp_rtt_sa >> 5) + tcp_rtt_sd +
+ * tcps_rexmit_interval_extra + tcps_conn_grace_period
+ *
+ * We then pin the RTO within our configured boundaries (sections 2.4 and 2.5
+ * of RFC 6298).
+ */
+ static __GNU_INLINE clock_t
+ tcp_calculate_rto(tcp_t *tcp, tcp_stack_t *tcps, uint32_t extra)
+ {
+ clock_t rto;
+
+ rto = NSEC2MSEC((tcp->tcp_rtt_sa >> 3) + (tcp->tcp_rtt_sa >> 5) +
+ tcp->tcp_rtt_sd) + tcps->tcps_rexmit_interval_extra + extra;
+
+ if (rto < tcp->tcp_rto_min) {
+ rto = tcp->tcp_rto_min;
+ } else if (rto > tcp->tcp_rto_max) {
+ rto = tcp->tcp_rto_max;
+ }
+
+ return (rto);
+ }
+
extern struct qinit tcp_rinitv4, tcp_rinitv6;
extern boolean_t do_tcp_fusion;
/*
* Object to represent database of options to search passed to