Print this page
11546 Track TCP round-trip time in nanoseconds
Portions contributed by: Cody Peter Mello <cody.mello@joyent.com>
Portions contributed by: Brandon Baker <bbaker@delphix.com>
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>

*** 19,29 **** * CDDL HEADER END */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. ! * Copyright (c) 2014 by Delphix. All rights reserved. */ /* This file contains all TCP output processing functions. */ #include <sys/types.h> --- 19,30 ---- * CDDL HEADER END */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. ! * Copyright (c) 2014, 2016 by Delphix. All rights reserved. ! * Copyright 2019 Joyent, Inc. */ /* This file contains all TCP output processing functions. */ #include <sys/types.h>
*** 56,71 **** static void tcp_wput_cmdblk(queue_t *, mblk_t *); static void tcp_wput_flush(tcp_t *, mblk_t *); static void tcp_wput_iocdata(tcp_t *tcp, mblk_t *mp); static int tcp_xmit_end(tcp_t *); static int tcp_send(tcp_t *, const int, const int, const int, ! const int, int *, uint_t *, int *, mblk_t **, mblk_t *); static void tcp_xmit_early_reset(char *, mblk_t *, uint32_t, uint32_t, int, ip_recv_attr_t *, ip_stack_t *, conn_t *); static boolean_t tcp_send_rst_chk(tcp_stack_t *); static void tcp_process_shrunk_swnd(tcp_t *, uint32_t); ! static void tcp_fill_header(tcp_t *, uchar_t *, clock_t, int); /* * Functions called directly via squeue having a prototype of edesc_t. */ static void tcp_wput_nondata(void *, mblk_t *, void *, ip_recv_attr_t *); --- 57,72 ---- static void tcp_wput_cmdblk(queue_t *, mblk_t *); static void tcp_wput_flush(tcp_t *, mblk_t *); static void tcp_wput_iocdata(tcp_t *tcp, mblk_t *mp); static int tcp_xmit_end(tcp_t *); static int tcp_send(tcp_t *, const int, const int, const int, ! const int, int *, uint32_t *, int *, mblk_t **, mblk_t *); static void tcp_xmit_early_reset(char *, mblk_t *, uint32_t, uint32_t, int, ip_recv_attr_t *, ip_stack_t *, conn_t *); static boolean_t tcp_send_rst_chk(tcp_stack_t *); static void tcp_process_shrunk_swnd(tcp_t *, uint32_t); ! static void tcp_fill_header(tcp_t *, uchar_t *, int); /* * Functions called directly via squeue having a prototype of edesc_t. */ static void tcp_wput_nondata(void *, mblk_t *, void *, ip_recv_attr_t *);
*** 452,462 **** /* Bypass all other unnecessary processing. */ goto done; } } ! local_time = (mblk_t *)now; /* * "Our" Nagle Algorithm. This is not the same as in the old * BSD. This is more in line with the true intent of Nagle. * --- 453,463 ---- /* Bypass all other unnecessary processing. */ goto done; } } ! local_time = (mblk_t *)(intptr_t)gethrtime(); /* * "Our" Nagle Algorithm. This is not the same as in the old * BSD. This is more in line with the true intent of Nagle. *
*** 1181,1196 **** /* start sending from tcp_snxt */ snxt = tcp->tcp_snxt; /* ! * Check to see if this connection has been idled for some ! * time and no ACK is expected. If it is, we need to slow ! * start again to get back the connection's "self-clock" as ! * described in VJ's paper. * ! * Reinitialize tcp_cwnd after idle. */ now = LBOLT_FASTPATH; if ((tcp->tcp_suna == snxt) && !tcp->tcp_localnet && (TICK_TO_MSEC(now - tcp->tcp_last_recv_time) >= tcp->tcp_rto)) { TCP_SET_INIT_CWND(tcp, mss, tcps->tcps_slow_start_after_idle); --- 1182,1198 ---- /* start sending from tcp_snxt */ snxt = tcp->tcp_snxt; /* ! * Check to see if this connection has been idle for some time and no ! * ACK is expected. If so, then the congestion window size is no longer ! * meaningfully tied to current network conditions. * ! * We reinitialize tcp_cwnd, and slow start again to get back the ! * connection's "self-clock" as described in Van Jacobson's 1988 paper ! * "Congestion avoidance and control". */ now = LBOLT_FASTPATH; if ((tcp->tcp_suna == snxt) && !tcp->tcp_localnet && (TICK_TO_MSEC(now - tcp->tcp_last_recv_time) >= tcp->tcp_rto)) { TCP_SET_INIT_CWND(tcp, mss, tcps->tcps_slow_start_after_idle);
*** 1254,1264 **** tcp->tcp_snxt = snxt + len; tcp->tcp_rack = tcp->tcp_rnxt; if ((mp1 = dupb(mp)) == 0) goto no_memory; ! mp->b_prev = (mblk_t *)(uintptr_t)now; mp->b_next = (mblk_t *)(uintptr_t)snxt; /* adjust tcp header information */ tcpha = tcp->tcp_tcpha; tcpha->tha_flags = (TH_ACK|TH_PUSH); --- 1256,1266 ---- tcp->tcp_snxt = snxt + len; tcp->tcp_rack = tcp->tcp_rnxt; if ((mp1 = dupb(mp)) == 0) goto no_memory; ! mp->b_prev = (mblk_t *)(intptr_t)gethrtime(); mp->b_next = (mblk_t *)(uintptr_t)snxt; /* adjust tcp header information */ tcpha = tcp->tcp_tcpha; tcpha->tha_flags = (TH_ACK|TH_PUSH);
*** 1309,1324 **** } mp1->b_rptr = rptr; /* Fill in the timestamp option. */ if (tcp->tcp_snd_ts_ok) { ! uint32_t llbolt = (uint32_t)LBOLT_FASTPATH; ! ! U32_TO_BE32(llbolt, ! (char *)tcpha + TCP_MIN_HEADER_LENGTH+4); U32_TO_BE32(tcp->tcp_ts_recent, ! (char *)tcpha + TCP_MIN_HEADER_LENGTH+8); } else { ASSERT(connp->conn_ht_ulp_len == TCP_MIN_HEADER_LENGTH); } /* copy header into outgoing packet */ --- 1311,1324 ---- } mp1->b_rptr = rptr; /* Fill in the timestamp option. */ if (tcp->tcp_snd_ts_ok) { ! U32_TO_BE32(now, ! (char *)tcpha + TCP_MIN_HEADER_LENGTH + 4); U32_TO_BE32(tcp->tcp_ts_recent, ! (char *)tcpha + TCP_MIN_HEADER_LENGTH + 8); } else { ASSERT(connp->conn_ht_ulp_len == TCP_MIN_HEADER_LENGTH); } /* copy header into outgoing packet */
*** 1769,1779 **** * small and we'd rather wait until later before sending again. */ static int tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len, const int tcp_hdr_len, const int num_sack_blk, int *usable, ! uint_t *snxt, int *tail_unsent, mblk_t **xmit_tail, mblk_t *local_time) { int num_lso_seg = 1; uint_t lso_usable; boolean_t do_lso_send = B_FALSE; tcp_stack_t *tcps = tcp->tcp_tcps; --- 1769,1779 ---- * small and we'd rather wait until later before sending again. */ static int tcp_send(tcp_t *tcp, const int mss, const int total_hdr_len, const int tcp_hdr_len, const int num_sack_blk, int *usable, ! uint32_t *snxt, int *tail_unsent, mblk_t **xmit_tail, mblk_t *local_time) { int num_lso_seg = 1; uint_t lso_usable; boolean_t do_lso_send = B_FALSE; tcp_stack_t *tcps = tcp->tcp_tcps;
*** 2064,2074 **** /* * Fill in the header using the template header, and add * options such as time-stamp, ECN and/or SACK, as needed. */ ! tcp_fill_header(tcp, rptr, (clock_t)local_time, num_sack_blk); mp->b_rptr = rptr; if (*tail_unsent) { int spill = *tail_unsent; --- 2064,2074 ---- /* * Fill in the header using the template header, and add * options such as time-stamp, ECN and/or SACK, as needed. */ ! tcp_fill_header(tcp, rptr, num_sack_blk); mp->b_rptr = rptr; if (*tail_unsent) { int spill = *tail_unsent;
*** 2282,2293 **** /* * We do not have a good algorithm to update ssthresh at this time. * So don't do any update. */ bzero(&uinfo, sizeof (uinfo)); ! uinfo.iulp_rtt = tcp->tcp_rtt_sa; ! uinfo.iulp_rtt_sd = tcp->tcp_rtt_sd; /* * Note that uinfo is kept for conn_faddr in the DCE. Could update even * if source routed but we don't. */ --- 2282,2293 ---- /* * We do not have a good algorithm to update ssthresh at this time. * So don't do any update. */ bzero(&uinfo, sizeof (uinfo)); ! uinfo.iulp_rtt = NSEC2MSEC(tcp->tcp_rtt_sa); ! uinfo.iulp_rtt_sd = NSEC2MSEC(tcp->tcp_rtt_sd); /* * Note that uinfo is kept for conn_faddr in the DCE. Could update even * if source routed but we don't. */
*** 3387,3397 **** tcp_send_data(tcp, xmit_mp); /* * Update the send timestamp to avoid false retransmission. */ ! snxt_mp->b_prev = (mblk_t *)ddi_get_lbolt(); TCPS_BUMP_MIB(tcps, tcpRetransSegs); TCPS_UPDATE_MIB(tcps, tcpRetransBytes, seg_len); TCPS_BUMP_MIB(tcps, tcpOutSackRetransSegs); /* --- 3387,3397 ---- tcp_send_data(tcp, xmit_mp); /* * Update the send timestamp to avoid false retransmission. */ ! snxt_mp->b_prev = (mblk_t *)(intptr_t)gethrtime(); TCPS_BUMP_MIB(tcps, tcpRetransSegs); TCPS_UPDATE_MIB(tcps, tcpRetransBytes, seg_len); TCPS_BUMP_MIB(tcps, tcpOutSackRetransSegs); /*
*** 3459,3469 **** win -= cnt; /* * Update the send timestamp to avoid false * retransmission. */ ! old_snxt_mp->b_prev = (mblk_t *)ddi_get_lbolt(); TCPS_BUMP_MIB(tcps, tcpRetransSegs); TCPS_UPDATE_MIB(tcps, tcpRetransBytes, cnt); tcp->tcp_rexmit_nxt = snxt; } --- 3459,3469 ---- win -= cnt; /* * Update the send timestamp to avoid false * retransmission. */ ! old_snxt_mp->b_prev = (mblk_t *)(intptr_t)gethrtime(); TCPS_BUMP_MIB(tcps, tcpRetransSegs); TCPS_UPDATE_MIB(tcps, tcpRetransBytes, cnt); tcp->tcp_rexmit_nxt = snxt; }
*** 3619,3629 **** * tcp_fill_header is called by tcp_send() to fill the outgoing TCP header * with the template header, as well as other options such as time-stamp, * ECN and/or SACK. */ static void ! tcp_fill_header(tcp_t *tcp, uchar_t *rptr, clock_t now, int num_sack_blk) { tcpha_t *tcp_tmpl, *tcpha; uint32_t *dst, *src; int hdrlen; conn_t *connp = tcp->tcp_connp; --- 3619,3629 ---- * tcp_fill_header is called by tcp_send() to fill the outgoing TCP header * with the template header, as well as other options such as time-stamp, * ECN and/or SACK. */ static void ! tcp_fill_header(tcp_t *tcp, uchar_t *rptr, int num_sack_blk) { tcpha_t *tcp_tmpl, *tcpha; uint32_t *dst, *src; int hdrlen; conn_t *connp = tcp->tcp_connp;
*** 3641,3651 **** src = (uint32_t *)connp->conn_ht_iphc; hdrlen = connp->conn_ht_iphc_len; /* Fill time-stamp option if needed */ if (tcp->tcp_snd_ts_ok) { ! U32_TO_BE32((uint32_t)now, (char *)tcp_tmpl + TCP_MIN_HEADER_LENGTH + 4); U32_TO_BE32(tcp->tcp_ts_recent, (char *)tcp_tmpl + TCP_MIN_HEADER_LENGTH + 8); } else { ASSERT(connp->conn_ht_ulp_len == TCP_MIN_HEADER_LENGTH); --- 3641,3651 ---- src = (uint32_t *)connp->conn_ht_iphc; hdrlen = connp->conn_ht_iphc_len; /* Fill time-stamp option if needed */ if (tcp->tcp_snd_ts_ok) { ! U32_TO_BE32(LBOLT_FASTPATH, (char *)tcp_tmpl + TCP_MIN_HEADER_LENGTH + 4); U32_TO_BE32(tcp->tcp_ts_recent, (char *)tcp_tmpl + TCP_MIN_HEADER_LENGTH + 8); } else { ASSERT(connp->conn_ht_ulp_len == TCP_MIN_HEADER_LENGTH);