Print this page
11546 Track TCP round-trip time in nanoseconds
Portions contributed by: Cody Peter Mello <cody.mello@joyent.com>
Portions contributed by: Brandon Baker <bbaker@delphix.com>
Reviewed by: Jason King <jason.king@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/inet/tcp/tcp_timers.c
          +++ new/usr/src/uts/common/inet/tcp/tcp_timers.c
↓ open down ↓ 15 lines elided ↑ open up ↑
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
  25   25   * Copyright 2011 Joyent, Inc.  All rights reserved.
  26      - * Copyright (c) 2014 by Delphix. All rights reserved.
       26 + * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
  27   27   */
  28   28  
  29   29  #include <sys/types.h>
  30   30  #include <sys/strlog.h>
  31   31  #include <sys/strsun.h>
  32   32  #include <sys/squeue_impl.h>
  33   33  #include <sys/squeue.h>
  34   34  #include <sys/callo.h>
  35   35  #include <sys/strsubr.h>
  36   36  
↓ open down ↓ 707 lines elided ↑ open up ↑
 744  744                  if (second_threshold == 0) {
 745  745                          second_threshold = tcps->tcps_ip_abort_linterval;
 746  746                          dont_timeout = B_TRUE;
 747  747                  }
 748  748                  /* FALLTHRU */
 749  749          case TCPS_FIN_WAIT_1:
 750  750          case TCPS_CLOSING:
 751  751          case TCPS_LAST_ACK:
 752  752                  /* If we have data to rexmit */
 753  753                  if (tcp->tcp_suna != tcp->tcp_snxt) {
 754      -                        clock_t time_to_wait;
      754 +                        clock_t time_to_wait;
 755  755  
 756  756                          TCPS_BUMP_MIB(tcps, tcpTimRetrans);
 757  757                          if (!tcp->tcp_xmit_head)
 758  758                                  break;
 759      -                        time_to_wait = ddi_get_lbolt() -
 760      -                            (clock_t)tcp->tcp_xmit_head->b_prev;
 761      -                        time_to_wait = tcp->tcp_rto -
 762      -                            TICK_TO_MSEC(time_to_wait);
      759 +                        time_to_wait = NSEC2MSEC(gethrtime() -
      760 +                            (hrtime_t)(intptr_t)tcp->tcp_xmit_head->b_prev);
      761 +                        time_to_wait = tcp->tcp_rto - time_to_wait;
 763  762                          /*
 764  763                           * If the timer fires too early, 1 clock tick earlier,
 765  764                           * restart the timer.
 766  765                           */
 767  766                          if (time_to_wait > msec_per_tick) {
 768  767                                  TCP_STAT(tcps, tcp_timer_fire_early);
 769  768                                  TCP_TIMER_RESTART(tcp, time_to_wait);
 770  769                                  return;
 771  770                          }
 772  771                          /*
↓ open down ↓ 232 lines elided ↑ open up ↑
1005 1004  
1006 1005                  /*
1007 1006                   * We have been retransmitting for too long...  The RTT
1008 1007                   * we calculated is probably incorrect.  Reinitialize it.
1009 1008                   * Need to compensate for 0 tcp_rtt_sa.  Reset
1010 1009                   * tcp_rtt_update so that we won't accidentally cache a
1011 1010                   * bad value.  But only do this if this is not a zero
1012 1011                   * window probe.
1013 1012                   */
1014 1013                  if (tcp->tcp_rtt_sa != 0 && tcp->tcp_zero_win_probe == 0) {
1015      -                        tcp->tcp_rtt_sd += (tcp->tcp_rtt_sa >> 3) +
1016      -                            (tcp->tcp_rtt_sa >> 5);
     1014 +                        tcp->tcp_rtt_sd += tcp->tcp_rtt_sa >> 3 +
     1015 +                            tcp->tcp_rtt_sa >> 5;
1017 1016                          tcp->tcp_rtt_sa = 0;
1018 1017                          tcp_ip_notify(tcp);
1019 1018                          tcp->tcp_rtt_update = 0;
1020 1019                  }
1021 1020          }
1022 1021  
1023 1022  timer_rexmit:
1024 1023          tcp->tcp_timer_backoff++;
1025      -        if ((ms = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
1026      -            tcps->tcps_rexmit_interval_extra + (tcp->tcp_rtt_sa >> 5)) <
1027      -            tcp->tcp_rto_min) {
1028      -                /*
1029      -                 * This means the original RTO is tcp_rexmit_interval_min.
1030      -                 * So we will use tcp_rexmit_interval_min as the RTO value
1031      -                 * and do the backoff.
1032      -                 */
1033      -                ms = tcp->tcp_rto_min << tcp->tcp_timer_backoff;
1034      -        } else {
1035      -                ms <<= tcp->tcp_timer_backoff;
1036      -        }
     1024 +        /*
     1025 +         * Calculate the backed off retransmission timeout. If the shift brings
     1026 +         * us back over the max, then we repin the value, and decrement the
     1027 +         * backoff to avoid overflow.
     1028 +         */
     1029 +        ms = tcp_calculate_rto(tcp, tcps, 0) << tcp->tcp_timer_backoff;
1037 1030          if (ms > tcp->tcp_rto_max) {
1038 1031                  ms = tcp->tcp_rto_max;
1039      -                /*
1040      -                 * ms is at max, decrement tcp_timer_backoff to avoid
1041      -                 * overflow.
1042      -                 */
1043 1032                  tcp->tcp_timer_backoff--;
1044 1033          }
1045 1034          tcp->tcp_ms_we_have_waited += ms;
1046 1035          if (tcp->tcp_zero_win_probe == 0) {
1047 1036                  tcp->tcp_rto = ms;
1048 1037          }
1049 1038          TCP_TIMER_RESTART(tcp, ms);
1050 1039          /*
1051 1040           * This is after a timeout and tcp_rto is backed off.  Set
1052 1041           * tcp_set_timer to 1 so that next time RTO is updated, we will
1053 1042           * restart the timer with a correct value.
1054 1043           */
1055 1044          tcp->tcp_set_timer = 1;
1056 1045          mss = tcp->tcp_snxt - tcp->tcp_suna;
1057 1046          if (mss > tcp->tcp_mss)
1058 1047                  mss = tcp->tcp_mss;
1059 1048          if (mss > tcp->tcp_swnd && tcp->tcp_swnd != 0)
1060 1049                  mss = tcp->tcp_swnd;
1061 1050  
1062      -        if ((mp = tcp->tcp_xmit_head) != NULL)
1063      -                mp->b_prev = (mblk_t *)ddi_get_lbolt();
     1051 +        if ((mp = tcp->tcp_xmit_head) != NULL) {
     1052 +                mp->b_prev = (mblk_t *)(intptr_t)gethrtime();
     1053 +        }
1064 1054          mp = tcp_xmit_mp(tcp, mp, mss, NULL, NULL, tcp->tcp_suna, B_TRUE, &mss,
1065 1055              B_TRUE);
1066 1056  
1067 1057          /*
1068 1058           * When slow start after retransmission begins, start with
1069 1059           * this seq no.  tcp_rexmit_max marks the end of special slow
1070 1060           * start phase.
1071 1061           */
1072 1062          tcp->tcp_rexmit_nxt = tcp->tcp_suna;
1073 1063          if ((tcp->tcp_valid_bits & TCP_FSS_VALID) &&
↓ open down ↓ 37 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX