6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2011 Joyent, Inc. All rights reserved.
26 * Copyright (c) 2014 by Delphix. All rights reserved.
27 */
28
29 #include <sys/types.h>
30 #include <sys/strlog.h>
31 #include <sys/strsun.h>
32 #include <sys/squeue_impl.h>
33 #include <sys/squeue.h>
34 #include <sys/callo.h>
35 #include <sys/strsubr.h>
36
37 #include <inet/common.h>
38 #include <inet/ip.h>
39 #include <inet/ip_ire.h>
40 #include <inet/ip_rts.h>
41 #include <inet/tcp.h>
42 #include <inet/tcp_impl.h>
43
44 /*
45 * Implementation of TCP Timers.
46 * =============================
739 /*
740 * If the end point has not been closed, TCP can retransmit
741 * forever. But if the end point is closed, the normal
742 * timeout applies.
743 */
744 if (second_threshold == 0) {
745 second_threshold = tcps->tcps_ip_abort_linterval;
746 dont_timeout = B_TRUE;
747 }
748 /* FALLTHRU */
749 case TCPS_FIN_WAIT_1:
750 case TCPS_CLOSING:
751 case TCPS_LAST_ACK:
752 /* If we have data to rexmit */
753 if (tcp->tcp_suna != tcp->tcp_snxt) {
754 clock_t time_to_wait;
755
756 TCPS_BUMP_MIB(tcps, tcpTimRetrans);
757 if (!tcp->tcp_xmit_head)
758 break;
759 time_to_wait = ddi_get_lbolt() -
760 (clock_t)tcp->tcp_xmit_head->b_prev;
761 time_to_wait = tcp->tcp_rto -
762 TICK_TO_MSEC(time_to_wait);
763 /*
764 * If the timer fires too early, 1 clock tick earlier,
765 * restart the timer.
766 */
767 if (time_to_wait > msec_per_tick) {
768 TCP_STAT(tcps, tcp_timer_fire_early);
769 TCP_TIMER_RESTART(tcp, time_to_wait);
770 return;
771 }
772 /*
773 * When we probe zero windows, we force the swnd open.
774 * If our peer acks with a closed window swnd will be
775 * set to zero by tcp_rput(). As long as we are
776 * receiving acks tcp_rput will
777 * reset 'tcp_ms_we_have_waited' so as not to trip the
778 * first and second interval actions. NOTE: the timer
779 * interval is allowed to continue its exponential
780 * backoff.
781 */
782 if (tcp->tcp_swnd == 0 || tcp->tcp_zero_win_probe) {
995 */
996 tcp->tcp_ms_we_have_waited = second_threshold;
997 }
998 } else if (ms > first_threshold) {
999 /*
1000 * Should not hold the zero-copy messages for too long.
1001 */
1002 if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean)
1003 tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
1004 tcp->tcp_xmit_head, B_TRUE);
1005
1006 /*
1007 * We have been retransmitting for too long... The RTT
1008 * we calculated is probably incorrect. Reinitialize it.
1009 * Need to compensate for 0 tcp_rtt_sa. Reset
1010 * tcp_rtt_update so that we won't accidentally cache a
1011 * bad value. But only do this if this is not a zero
1012 * window probe.
1013 */
1014 if (tcp->tcp_rtt_sa != 0 && tcp->tcp_zero_win_probe == 0) {
1015 tcp->tcp_rtt_sd += (tcp->tcp_rtt_sa >> 3) +
1016 (tcp->tcp_rtt_sa >> 5);
1017 tcp->tcp_rtt_sa = 0;
1018 tcp_ip_notify(tcp);
1019 tcp->tcp_rtt_update = 0;
1020 }
1021 }
1022
1023 timer_rexmit:
1024 tcp->tcp_timer_backoff++;
1025 if ((ms = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
1026 tcps->tcps_rexmit_interval_extra + (tcp->tcp_rtt_sa >> 5)) <
1027 tcp->tcp_rto_min) {
1028 /*
1029 * This means the original RTO is tcp_rexmit_interval_min.
1030 * So we will use tcp_rexmit_interval_min as the RTO value
1031 * and do the backoff.
1032 */
1033 ms = tcp->tcp_rto_min << tcp->tcp_timer_backoff;
1034 } else {
1035 ms <<= tcp->tcp_timer_backoff;
1036 }
1037 if (ms > tcp->tcp_rto_max) {
1038 ms = tcp->tcp_rto_max;
1039 /*
1040 * ms is at max, decrement tcp_timer_backoff to avoid
1041 * overflow.
1042 */
1043 tcp->tcp_timer_backoff--;
1044 }
1045 tcp->tcp_ms_we_have_waited += ms;
1046 if (tcp->tcp_zero_win_probe == 0) {
1047 tcp->tcp_rto = ms;
1048 }
1049 TCP_TIMER_RESTART(tcp, ms);
1050 /*
1051 * This is after a timeout and tcp_rto is backed off. Set
1052 * tcp_set_timer to 1 so that next time RTO is updated, we will
1053 * restart the timer with a correct value.
1054 */
1055 tcp->tcp_set_timer = 1;
1056 mss = tcp->tcp_snxt - tcp->tcp_suna;
1057 if (mss > tcp->tcp_mss)
1058 mss = tcp->tcp_mss;
1059 if (mss > tcp->tcp_swnd && tcp->tcp_swnd != 0)
1060 mss = tcp->tcp_swnd;
1061
1062 if ((mp = tcp->tcp_xmit_head) != NULL)
1063 mp->b_prev = (mblk_t *)ddi_get_lbolt();
1064 mp = tcp_xmit_mp(tcp, mp, mss, NULL, NULL, tcp->tcp_suna, B_TRUE, &mss,
1065 B_TRUE);
1066
1067 /*
1068 * When slow start after retransmission begins, start with
1069 * this seq no. tcp_rexmit_max marks the end of special slow
1070 * start phase.
1071 */
1072 tcp->tcp_rexmit_nxt = tcp->tcp_suna;
1073 if ((tcp->tcp_valid_bits & TCP_FSS_VALID) &&
1074 (tcp->tcp_unsent == 0)) {
1075 tcp->tcp_rexmit_max = tcp->tcp_fss;
1076 } else {
1077 tcp->tcp_rexmit_max = tcp->tcp_snxt;
1078 }
1079 tcp->tcp_rexmit = B_TRUE;
1080 tcp->tcp_dupack_cnt = 0;
1081
1082 /*
1083 * Remove all rexmit SACK blk to start from fresh.
|
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
25 * Copyright 2011 Joyent, Inc. All rights reserved.
26 * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
27 */
28
29 #include <sys/types.h>
30 #include <sys/strlog.h>
31 #include <sys/strsun.h>
32 #include <sys/squeue_impl.h>
33 #include <sys/squeue.h>
34 #include <sys/callo.h>
35 #include <sys/strsubr.h>
36
37 #include <inet/common.h>
38 #include <inet/ip.h>
39 #include <inet/ip_ire.h>
40 #include <inet/ip_rts.h>
41 #include <inet/tcp.h>
42 #include <inet/tcp_impl.h>
43
44 /*
45 * Implementation of TCP Timers.
46 * =============================
739 /*
740 * If the end point has not been closed, TCP can retransmit
741 * forever. But if the end point is closed, the normal
742 * timeout applies.
743 */
744 if (second_threshold == 0) {
745 second_threshold = tcps->tcps_ip_abort_linterval;
746 dont_timeout = B_TRUE;
747 }
748 /* FALLTHRU */
749 case TCPS_FIN_WAIT_1:
750 case TCPS_CLOSING:
751 case TCPS_LAST_ACK:
752 /* If we have data to rexmit */
753 if (tcp->tcp_suna != tcp->tcp_snxt) {
754 clock_t time_to_wait;
755
756 TCPS_BUMP_MIB(tcps, tcpTimRetrans);
757 if (!tcp->tcp_xmit_head)
758 break;
759 time_to_wait = NSEC2MSEC(gethrtime() -
760 (hrtime_t)(intptr_t)tcp->tcp_xmit_head->b_prev);
761 time_to_wait = tcp->tcp_rto - time_to_wait;
762 /*
763 * If the timer fires too early, 1 clock tick earlier,
764 * restart the timer.
765 */
766 if (time_to_wait > msec_per_tick) {
767 TCP_STAT(tcps, tcp_timer_fire_early);
768 TCP_TIMER_RESTART(tcp, time_to_wait);
769 return;
770 }
771 /*
772 * When we probe zero windows, we force the swnd open.
773 * If our peer acks with a closed window swnd will be
774 * set to zero by tcp_rput(). As long as we are
775 * receiving acks tcp_rput will
776 * reset 'tcp_ms_we_have_waited' so as not to trip the
777 * first and second interval actions. NOTE: the timer
778 * interval is allowed to continue its exponential
779 * backoff.
780 */
781 if (tcp->tcp_swnd == 0 || tcp->tcp_zero_win_probe) {
994 */
995 tcp->tcp_ms_we_have_waited = second_threshold;
996 }
997 } else if (ms > first_threshold) {
998 /*
999 * Should not hold the zero-copy messages for too long.
1000 */
1001 if (tcp->tcp_snd_zcopy_aware && !tcp->tcp_xmit_zc_clean)
1002 tcp->tcp_xmit_head = tcp_zcopy_backoff(tcp,
1003 tcp->tcp_xmit_head, B_TRUE);
1004
1005 /*
1006 * We have been retransmitting for too long... The RTT
1007 * we calculated is probably incorrect. Reinitialize it.
1008 * Need to compensate for 0 tcp_rtt_sa. Reset
1009 * tcp_rtt_update so that we won't accidentally cache a
1010 * bad value. But only do this if this is not a zero
1011 * window probe.
1012 */
1013 if (tcp->tcp_rtt_sa != 0 && tcp->tcp_zero_win_probe == 0) {
1014 tcp->tcp_rtt_sd += tcp->tcp_rtt_sa >> 3 +
1015 tcp->tcp_rtt_sa >> 5;
1016 tcp->tcp_rtt_sa = 0;
1017 tcp_ip_notify(tcp);
1018 tcp->tcp_rtt_update = 0;
1019 }
1020 }
1021
1022 timer_rexmit:
1023 tcp->tcp_timer_backoff++;
1024 /*
1025 * Calculate the backed off retransmission timeout. If the shift brings
1026 * us back over the max, then we repin the value, and decrement the
1027 * backoff to avoid overflow.
1028 */
1029 ms = tcp_calculate_rto(tcp, tcps, 0) << tcp->tcp_timer_backoff;
1030 if (ms > tcp->tcp_rto_max) {
1031 ms = tcp->tcp_rto_max;
1032 tcp->tcp_timer_backoff--;
1033 }
1034 tcp->tcp_ms_we_have_waited += ms;
1035 if (tcp->tcp_zero_win_probe == 0) {
1036 tcp->tcp_rto = ms;
1037 }
1038 TCP_TIMER_RESTART(tcp, ms);
1039 /*
1040 * This is after a timeout and tcp_rto is backed off. Set
1041 * tcp_set_timer to 1 so that next time RTO is updated, we will
1042 * restart the timer with a correct value.
1043 */
1044 tcp->tcp_set_timer = 1;
1045 mss = tcp->tcp_snxt - tcp->tcp_suna;
1046 if (mss > tcp->tcp_mss)
1047 mss = tcp->tcp_mss;
1048 if (mss > tcp->tcp_swnd && tcp->tcp_swnd != 0)
1049 mss = tcp->tcp_swnd;
1050
1051 if ((mp = tcp->tcp_xmit_head) != NULL) {
1052 mp->b_prev = (mblk_t *)(intptr_t)gethrtime();
1053 }
1054 mp = tcp_xmit_mp(tcp, mp, mss, NULL, NULL, tcp->tcp_suna, B_TRUE, &mss,
1055 B_TRUE);
1056
1057 /*
1058 * When slow start after retransmission begins, start with
1059 * this seq no. tcp_rexmit_max marks the end of special slow
1060 * start phase.
1061 */
1062 tcp->tcp_rexmit_nxt = tcp->tcp_suna;
1063 if ((tcp->tcp_valid_bits & TCP_FSS_VALID) &&
1064 (tcp->tcp_unsent == 0)) {
1065 tcp->tcp_rexmit_max = tcp->tcp_fss;
1066 } else {
1067 tcp->tcp_rexmit_max = tcp->tcp_snxt;
1068 }
1069 tcp->tcp_rexmit = B_TRUE;
1070 tcp->tcp_dupack_cnt = 0;
1071
1072 /*
1073 * Remove all rexmit SACK blk to start from fresh.
|