Print this page
11546 Track TCP round-trip time in nanoseconds
Portions contributed by: Cody Peter Mello <cody.mello@joyent.com>
Portions contributed by: Brandon Baker <bbaker@delphix.com>
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
*** 19,29 ****
* CDDL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
! * Copyright (c) 2014 by Delphix. All rights reserved.
*/
/* This file contains all TCP output processing functions. */
#include <sys/types.h>
--- 19,30 ----
* CDDL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
! * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
! * Copyright 2019 Joyent, Inc.
*/
/* This file contains all TCP output processing functions. */
#include <sys/types.h>
*** 56,71 ****
static void tcp_wput_cmdblk(queue_t *, mblk_t *);
static void tcp_wput_flush(tcp_t *, mblk_t *);
static void tcp_wput_iocdata(tcp_t *tcp, mblk_t *mp);
static int tcp_xmit_end(tcp_t *);
static int tcp_send(tcp_t *, const int, const int, const int,
! const int, int *, uint_t *, int *, mblk_t **, mblk_t *);
static void tcp_xmit_early_reset(char *, mblk_t *, uint32_t, uint32_t,
int, ip_recv_attr_t *, ip_stack_t *, conn_t *);
static boolean_t tcp_send_rst_chk(tcp_stack_t *);
static void tcp_process_shrunk_swnd(tcp_t *, uint32_t);
! static void tcp_fill_header(tcp_t *, uchar_t *, clock_t, int);
/*
* Functions called directly via squeue having a prototype of edesc_t.
*/
static void tcp_wput_nondata(void *, mblk_t *, void *, ip_recv_attr_t *);
--- 57,72 ----
static void tcp_wput_cmdblk(queue_t *, mblk_t *);
static void tcp_wput_flush(tcp_t *, mblk_t *);
static void tcp_wput_iocdata(tcp_t *tcp, mblk_t *mp);
static int tcp_xmit_end(tcp_t *);
static int tcp_send(tcp_t *, const int, const int, const int,
! const int, int *, uint32_t *, int *, mblk_t **, mblk_t *);
static void tcp_xmit_early_reset(char *, mblk_t *, uint32_t, uint32_t,
int, ip_recv_attr_t *, ip_stack_t *, conn_t *);
static boolean_t tcp_send_rst_chk(tcp_stack_t *);
static void tcp_process_shrunk_swnd(tcp_t *, uint32_t);
! static void tcp_fill_header(tcp_t *, uchar_t *, int);
/*
* Functions called directly via squeue having a prototype of edesc_t.
*/
static void tcp_wput_nondata(void *, mblk_t *, void *, ip_recv_attr_t *);
*** 452,462 ****
/* Bypass all other unnecessary processing. */
goto done;
}
}
! local_time = (mblk_t *)now;
/*
* "Our" Nagle Algorithm. This is not the same as in the old
* BSD. This is more in line with the true intent of Nagle.
*
--- 453,463 ----
/* Bypass all other unnecessary processing. */
goto done;
}
}
! local_time = (mblk_t *)(intptr_t)gethrtime();
/*
* "Our" Nagle Algorithm. This is not the same as in the old
* BSD. This is more in line with the true intent of Nagle.
*
*** 1181,1196 ****
/* start sending from tcp_snxt */
snxt = tcp->tcp_snxt;
/*
! * Check to see if this connection has been idled for some
! * time and no ACK is expected. If it is, we need to slow
! * start again to get back the connection's "self-clock" as
! * described in VJ's paper.
*
! * Reinitialize tcp_cwnd after idle.
*/
now = LBOLT_FASTPATH;
if ((tcp->tcp_suna == snxt) && !tcp->tcp_localnet &&
(TICK_TO_MSEC(now - tcp->tcp_last_recv_time) >= tcp->tcp_rto)) {
TCP_SET_INIT_CWND(tcp, mss, tcps->tcps_slow_start_after_idle);
--- 1182,1198 ----
/* start sending from tcp_snxt */
snxt = tcp->tcp_snxt;
/*
! * Check to see if this connection has been idle for some time and no
! * ACK is expected. If so, then the congestion window size is no longer
! * meaningfully tied to current network conditions.
*
! * We reinitialize tcp_cwnd, and slow start again to get back the
! * connection's "self-clock" as described in Van Jacobson's 1988 paper
! * "Congestion avoidance and control".
*/
now = LBOLT_FASTPATH;
if ((tcp->tcp_suna == snxt) && !tcp->tcp_localnet &&
(TICK_TO_MSEC(now - tcp->tcp_last_recv_time) >= tcp->tcp_rto)) {
TCP_SET_INIT_CWND(tcp, mss, tcps->tcps_slow_start_after_idle);
*** 1254,1264 ****
tcp->tcp_snxt = snxt + len;
tcp->tcp_rack = tcp->tcp_rnxt;
if ((mp1 = dupb(mp)) == 0)
goto no_memory;
! mp->b_prev = (mblk_t *)(uintptr_t)now;
mp->b_next = (mblk_t *)(uintptr_t)snxt;
/* adjust tcp header information */
tcpha = tcp->tcp_tcpha;
tcpha->tha_flags = (TH_ACK|TH_PUSH);
--- 1256,1266 ----
tcp->tcp_snxt = snxt + len;
tcp->tcp_rack = tcp->tcp_rnxt;
if ((mp1 = dupb(mp)) == 0)
goto no_memory;
! mp->b_prev = (mblk_t *)(intptr_t)gethrtime();
mp->b_next = (mblk_t *)(uintptr_t)snxt;
/* adjust tcp header information */
tcpha = tcp->tcp_tcpha;
tcpha->tha_flags = (TH_ACK|TH_PUSH);
*** 1309,1324 ****
}
mp1->b_rptr = rptr;
/* Fill in the timestamp option. */
if (tcp->tcp_snd_ts_ok) {
! uint32_t llbolt = (uint32_t)LBOLT_FASTPATH;
!
! U32_TO_BE32(llbolt,
! (char *)tcpha + TCP_MIN_HEADER_LENGTH+4);
U32_TO_BE32(tcp->tcp_ts_recent,
! (char *)tcpha + TCP_MIN_HEADER_LENGTH+8);
} else {
ASSERT(connp->conn_ht_ulp_len == TCP_MIN_HEADER_LENGTH);
}
/* copy header into outgoing packet */
--- 1311,1324 ----
}
mp1->b_rptr = rptr;
/* Fill in the timestamp option. */
if (tcp->tcp_snd_ts_ok) {
! U32_TO_BE32(now,
! (char *)tcpha + TCP_MIN_HEADER_LENGTH + 4);
U32_TO_BE32(tcp->tcp_ts_recent,
! (char *)tcpha + TCP_MIN_HEADER_LENGTH + 8);
} else {
ASSERT(connp->conn_ht_ulp_len == TCP_MIN_HEADER_LENGTH);
}
/* copy header into outgoing packet */
*** 1769,1779 ****
* small and we'd rather wait until later before sending again.
*/
static int
tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len,
const int tcp_hdr_len, const int num_sack_blk, int *usable,
! uint_t *snxt, int *tail_unsent, mblk_t **xmit_tail, mblk_t *local_time)
{
int num_lso_seg = 1;
uint_t lso_usable;
boolean_t do_lso_send = B_FALSE;
tcp_stack_t *tcps = tcp->tcp_tcps;
--- 1769,1779 ----
* small and we'd rather wait until later before sending again.
*/
static int
tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len,
const int tcp_hdr_len, const int num_sack_blk, int *usable,
! uint32_t *snxt, int *tail_unsent, mblk_t **xmit_tail, mblk_t *local_time)
{
int num_lso_seg = 1;
uint_t lso_usable;
boolean_t do_lso_send = B_FALSE;
tcp_stack_t *tcps = tcp->tcp_tcps;
*** 2064,2074 ****
/*
* Fill in the header using the template header, and add
* options such as time-stamp, ECN and/or SACK, as needed.
*/
! tcp_fill_header(tcp, rptr, (clock_t)local_time, num_sack_blk);
mp->b_rptr = rptr;
if (*tail_unsent) {
int spill = *tail_unsent;
--- 2064,2074 ----
/*
* Fill in the header using the template header, and add
* options such as time-stamp, ECN and/or SACK, as needed.
*/
! tcp_fill_header(tcp, rptr, num_sack_blk);
mp->b_rptr = rptr;
if (*tail_unsent) {
int spill = *tail_unsent;
*** 2282,2293 ****
/*
* We do not have a good algorithm to update ssthresh at this time.
* So don't do any update.
*/
bzero(&uinfo, sizeof (uinfo));
! uinfo.iulp_rtt = tcp->tcp_rtt_sa;
! uinfo.iulp_rtt_sd = tcp->tcp_rtt_sd;
/*
* Note that uinfo is kept for conn_faddr in the DCE. Could update even
* if source routed but we don't.
*/
--- 2282,2293 ----
/*
* We do not have a good algorithm to update ssthresh at this time.
* So don't do any update.
*/
bzero(&uinfo, sizeof (uinfo));
! uinfo.iulp_rtt = NSEC2MSEC(tcp->tcp_rtt_sa);
! uinfo.iulp_rtt_sd = NSEC2MSEC(tcp->tcp_rtt_sd);
/*
* Note that uinfo is kept for conn_faddr in the DCE. Could update even
* if source routed but we don't.
*/
*** 3387,3397 ****
tcp_send_data(tcp, xmit_mp);
/*
* Update the send timestamp to avoid false retransmission.
*/
! snxt_mp->b_prev = (mblk_t *)ddi_get_lbolt();
TCPS_BUMP_MIB(tcps, tcpRetransSegs);
TCPS_UPDATE_MIB(tcps, tcpRetransBytes, seg_len);
TCPS_BUMP_MIB(tcps, tcpOutSackRetransSegs);
/*
--- 3387,3397 ----
tcp_send_data(tcp, xmit_mp);
/*
* Update the send timestamp to avoid false retransmission.
*/
! snxt_mp->b_prev = (mblk_t *)(intptr_t)gethrtime();
TCPS_BUMP_MIB(tcps, tcpRetransSegs);
TCPS_UPDATE_MIB(tcps, tcpRetransBytes, seg_len);
TCPS_BUMP_MIB(tcps, tcpOutSackRetransSegs);
/*
*** 3459,3469 ****
win -= cnt;
/*
* Update the send timestamp to avoid false
* retransmission.
*/
! old_snxt_mp->b_prev = (mblk_t *)ddi_get_lbolt();
TCPS_BUMP_MIB(tcps, tcpRetransSegs);
TCPS_UPDATE_MIB(tcps, tcpRetransBytes, cnt);
tcp->tcp_rexmit_nxt = snxt;
}
--- 3459,3469 ----
win -= cnt;
/*
* Update the send timestamp to avoid false
* retransmission.
*/
! old_snxt_mp->b_prev = (mblk_t *)(intptr_t)gethrtime();
TCPS_BUMP_MIB(tcps, tcpRetransSegs);
TCPS_UPDATE_MIB(tcps, tcpRetransBytes, cnt);
tcp->tcp_rexmit_nxt = snxt;
}
*** 3619,3629 ****
* tcp_fill_header is called by tcp_send() to fill the outgoing TCP header
* with the template header, as well as other options such as time-stamp,
* ECN and/or SACK.
*/
static void
! tcp_fill_header(tcp_t *tcp, uchar_t *rptr, clock_t now, int num_sack_blk)
{
tcpha_t *tcp_tmpl, *tcpha;
uint32_t *dst, *src;
int hdrlen;
conn_t *connp = tcp->tcp_connp;
--- 3619,3629 ----
* tcp_fill_header is called by tcp_send() to fill the outgoing TCP header
* with the template header, as well as other options such as time-stamp,
* ECN and/or SACK.
*/
static void
! tcp_fill_header(tcp_t *tcp, uchar_t *rptr, int num_sack_blk)
{
tcpha_t *tcp_tmpl, *tcpha;
uint32_t *dst, *src;
int hdrlen;
conn_t *connp = tcp->tcp_connp;
*** 3641,3651 ****
src = (uint32_t *)connp->conn_ht_iphc;
hdrlen = connp->conn_ht_iphc_len;
/* Fill time-stamp option if needed */
if (tcp->tcp_snd_ts_ok) {
! U32_TO_BE32((uint32_t)now,
(char *)tcp_tmpl + TCP_MIN_HEADER_LENGTH + 4);
U32_TO_BE32(tcp->tcp_ts_recent,
(char *)tcp_tmpl + TCP_MIN_HEADER_LENGTH + 8);
} else {
ASSERT(connp->conn_ht_ulp_len == TCP_MIN_HEADER_LENGTH);
--- 3641,3651 ----
src = (uint32_t *)connp->conn_ht_iphc;
hdrlen = connp->conn_ht_iphc_len;
/* Fill time-stamp option if needed */
if (tcp->tcp_snd_ts_ok) {
! U32_TO_BE32(LBOLT_FASTPATH,
(char *)tcp_tmpl + TCP_MIN_HEADER_LENGTH + 4);
U32_TO_BE32(tcp->tcp_ts_recent,
(char *)tcp_tmpl + TCP_MIN_HEADER_LENGTH + 8);
} else {
ASSERT(connp->conn_ht_ulp_len == TCP_MIN_HEADER_LENGTH);