3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2016 Joyent, Inc.
24 * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
25 * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
26 */
27
28 #ifndef _INET_TCP_IMPL_H
29 #define _INET_TCP_IMPL_H
30
31 /*
32 * TCP implementation private declarations. These interfaces are
33 * used to build the IP module and are not meant to be accessed
34 * by any modules except IP itself. They are undocumented and are
35 * subject to change without notice.
36 */
37
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41
42 #ifdef _KERNEL
43
44 #include <sys/cpuvar.h>
45 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */
283 }
284
285 /*
286 * Set ECN capable transport (ECT) code point in IP header.
287 *
288 * Note that there are 2 ECT code points '01' and '10', which are called
289 * ECT(1) and ECT(0) respectively. Here we follow the original ECT code
290 * point ECT(0) for TCP as described in RFC 2481.
291 */
292 #define TCP_SET_ECT(tcp, iph) \
293 if ((tcp)->tcp_connp->conn_ipversion == IPV4_VERSION) { \
294 /* We need to clear the code point first. */ \
295 ((ipha_t *)(iph))->ipha_type_of_service &= 0xFC; \
296 ((ipha_t *)(iph))->ipha_type_of_service |= IPH_ECN_ECT0; \
297 } else { \
298 ((ip6_t *)(iph))->ip6_vcf &= htonl(0xFFCFFFFF); \
299 ((ip6_t *)(iph))->ip6_vcf |= htonl(IPH_ECN_ECT0 << 20); \
300 }
301
302 /*
303 * Set tcp_rto with boundary checking.
304 */
305 #define TCP_SET_RTO(tcp, rto) \
306 if ((rto) < (tcp)->tcp_rto_min) \
307 (tcp)->tcp_rto = (tcp)->tcp_rto_min; \
308 else if ((rto) > (tcp)->tcp_rto_max) \
309 (tcp)->tcp_rto = (tcp)->tcp_rto_max; \
310 else \
311 (tcp)->tcp_rto = (rto);
312
313 /*
314 * TCP options struct returned from tcp_parse_options.
315 */
316 typedef struct tcp_opt_s {
317 uint32_t tcp_opt_mss;
318 uint32_t tcp_opt_wscale;
319 uint32_t tcp_opt_ts_val;
320 uint32_t tcp_opt_ts_ecr;
321 tcp_t *tcp;
322 } tcp_opt_t;
323
324 /*
325 * Flags returned from tcp_parse_options.
326 */
327 #define TCP_OPT_MSS_PRESENT 1
328 #define TCP_OPT_WSCALE_PRESENT 2
329 #define TCP_OPT_TSTAMP_PRESENT 4
330 #define TCP_OPT_SACK_OK_PRESENT 8
331 #define TCP_OPT_SACK_PRESENT 16
332
333 /*
557 #define tcps_mss_max_ipv6 tcps_propinfo_tbl[47].prop_cur_uval
558 #define tcps_rev_src_routes tcps_propinfo_tbl[48].prop_cur_bval
559 #define tcps_local_dack_interval tcps_propinfo_tbl[49].prop_cur_uval
560 #define tcps_local_dacks_max tcps_propinfo_tbl[50].prop_cur_uval
561 #define tcps_ecn_permitted tcps_propinfo_tbl[51].prop_cur_uval
562 #define tcps_rst_sent_rate_enabled tcps_propinfo_tbl[52].prop_cur_bval
563 #define tcps_rst_sent_rate tcps_propinfo_tbl[53].prop_cur_uval
564 #define tcps_push_timer_interval tcps_propinfo_tbl[54].prop_cur_uval
565 #define tcps_use_smss_as_mss_opt tcps_propinfo_tbl[55].prop_cur_bval
566 #define tcps_keepalive_abort_interval_high \
567 tcps_propinfo_tbl[56].prop_max_uval
568 #define tcps_keepalive_abort_interval \
569 tcps_propinfo_tbl[56].prop_cur_uval
570 #define tcps_keepalive_abort_interval_low \
571 tcps_propinfo_tbl[56].prop_min_uval
572 #define tcps_wroff_xtra tcps_propinfo_tbl[57].prop_cur_uval
573 #define tcps_dev_flow_ctl tcps_propinfo_tbl[58].prop_cur_bval
574 #define tcps_reass_timeout tcps_propinfo_tbl[59].prop_cur_uval
575 #define tcps_iss_incr tcps_propinfo_tbl[65].prop_cur_uval
576
577 extern struct qinit tcp_rinitv4, tcp_rinitv6;
578 extern boolean_t do_tcp_fusion;
579
580 /*
581 * Object to represent database of options to search passed to
582 * {sock,tpi}optcom_req() interface routine to take care of option
583 * management and associated methods.
584 */
585 extern optdb_obj_t tcp_opt_obj;
586 extern uint_t tcp_max_optsize;
587
588 extern int tcp_squeue_flag;
589
590 extern uint_t tcp_free_list_max_cnt;
591
592 /*
593 * Functions in tcp.c.
594 */
595 extern void tcp_acceptor_hash_insert(t_uscalar_t, tcp_t *);
596 extern tcp_t *tcp_acceptor_hash_lookup(t_uscalar_t, tcp_stack_t *);
|
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2019 Joyent, Inc.
24 * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
25 * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
26 */
27
28 #ifndef _INET_TCP_IMPL_H
29 #define _INET_TCP_IMPL_H
30
31 /*
32 * TCP implementation private declarations. These interfaces are
33 * used to build the IP module and are not meant to be accessed
34 * by any modules except IP itself. They are undocumented and are
35 * subject to change without notice.
36 */
37
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41
42 #ifdef _KERNEL
43
44 #include <sys/cpuvar.h>
45 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */
283 }
284
285 /*
286 * Set ECN capable transport (ECT) code point in IP header.
287 *
288 * Note that there are 2 ECT code points '01' and '10', which are called
289 * ECT(1) and ECT(0) respectively. Here we follow the original ECT code
290 * point ECT(0) for TCP as described in RFC 2481.
291 */
292 #define TCP_SET_ECT(tcp, iph) \
293 if ((tcp)->tcp_connp->conn_ipversion == IPV4_VERSION) { \
294 /* We need to clear the code point first. */ \
295 ((ipha_t *)(iph))->ipha_type_of_service &= 0xFC; \
296 ((ipha_t *)(iph))->ipha_type_of_service |= IPH_ECN_ECT0; \
297 } else { \
298 ((ip6_t *)(iph))->ip6_vcf &= htonl(0xFFCFFFFF); \
299 ((ip6_t *)(iph))->ip6_vcf |= htonl(IPH_ECN_ECT0 << 20); \
300 }
301
302 /*
303 * TCP options struct returned from tcp_parse_options.
304 */
305 typedef struct tcp_opt_s {
306 uint32_t tcp_opt_mss;
307 uint32_t tcp_opt_wscale;
308 uint32_t tcp_opt_ts_val;
309 uint32_t tcp_opt_ts_ecr;
310 tcp_t *tcp;
311 } tcp_opt_t;
312
313 /*
314 * Flags returned from tcp_parse_options.
315 */
316 #define TCP_OPT_MSS_PRESENT 1
317 #define TCP_OPT_WSCALE_PRESENT 2
318 #define TCP_OPT_TSTAMP_PRESENT 4
319 #define TCP_OPT_SACK_OK_PRESENT 8
320 #define TCP_OPT_SACK_PRESENT 16
321
322 /*
546 #define tcps_mss_max_ipv6 tcps_propinfo_tbl[47].prop_cur_uval
547 #define tcps_rev_src_routes tcps_propinfo_tbl[48].prop_cur_bval
548 #define tcps_local_dack_interval tcps_propinfo_tbl[49].prop_cur_uval
549 #define tcps_local_dacks_max tcps_propinfo_tbl[50].prop_cur_uval
550 #define tcps_ecn_permitted tcps_propinfo_tbl[51].prop_cur_uval
551 #define tcps_rst_sent_rate_enabled tcps_propinfo_tbl[52].prop_cur_bval
552 #define tcps_rst_sent_rate tcps_propinfo_tbl[53].prop_cur_uval
553 #define tcps_push_timer_interval tcps_propinfo_tbl[54].prop_cur_uval
554 #define tcps_use_smss_as_mss_opt tcps_propinfo_tbl[55].prop_cur_bval
555 #define tcps_keepalive_abort_interval_high \
556 tcps_propinfo_tbl[56].prop_max_uval
557 #define tcps_keepalive_abort_interval \
558 tcps_propinfo_tbl[56].prop_cur_uval
559 #define tcps_keepalive_abort_interval_low \
560 tcps_propinfo_tbl[56].prop_min_uval
561 #define tcps_wroff_xtra tcps_propinfo_tbl[57].prop_cur_uval
562 #define tcps_dev_flow_ctl tcps_propinfo_tbl[58].prop_cur_bval
563 #define tcps_reass_timeout tcps_propinfo_tbl[59].prop_cur_uval
564 #define tcps_iss_incr tcps_propinfo_tbl[65].prop_cur_uval
565
566
567 /*
568 * As defined in RFC 6298, the RTO is the average estimates (SRTT) plus a
569 * multiple of the deviation estimates (K * RTTVAR):
570 *
571 * RTO = SRTT + max(G, K * RTTVAR)
572 *
573 * K is defined in the RFC as 4, and G is the clock granularity. We constrain
574 * the minimum mean deviation to TCP_SD_MIN when processing new RTTs, so this
575 * becomes:
576 *
577 * RTO = SRTT + 4 * RTTVAR
578 *
579 * In practice, however, we make several additions to it. As we use a finer
580 * grained clock than BSD and update RTO for every ACK, we add in another 1/4 of
581 * RTT to the deviation of RTO to accommodate burstiness of 1/4 of window size:
582 *
583 * RTO = SRTT + (SRTT / 4) + 4 * RTTVAR
584 *
585 * Since tcp_rtt_sa is 8 times the SRTT, and tcp_rtt_sd is 4 times the RTTVAR,
586 * this becomes:
587 *
588 * RTO = (tcp_rtt_sa / 8) + ((tcp_rtt_sa / 8) / 4) + tcp_rtt_sd
589 * RTO = (tcp_rtt_sa / 2^3) + (tcp_rtt_sa / 2^5) + tcp_rtt_sd
590 * RTO = (tcp_rtt_sa >> 3) + (tcp_rtt_sa >> 5) + tcp_rtt_sd
591 *
592 * The "tcp_rexmit_interval_extra" and "tcp_conn_grace_period" tunables are
593 * used to help account for extreme environments where the algorithm fails to
594 * work; by default they should be 0. (The latter tunable is only used for
595 * calculating the intial RTO, and so is optionally passed in as "extra".) We
596 * add them here:
597 *
598 * RTO = (tcp_rtt_sa >> 3) + (tcp_rtt_sa >> 5) + tcp_rtt_sd +
599 * tcps_rexmit_interval_extra + tcps_conn_grace_period
600 *
601 * We then pin the RTO within our configured boundaries (sections 2.4 and 2.5
602 * of RFC 6298).
603 */
604 static __GNU_INLINE clock_t
605 tcp_calculate_rto(tcp_t *tcp, tcp_stack_t *tcps, uint32_t extra)
606 {
607 clock_t rto;
608
609 rto = NSEC2MSEC((tcp->tcp_rtt_sa >> 3) + (tcp->tcp_rtt_sa >> 5) +
610 tcp->tcp_rtt_sd) + tcps->tcps_rexmit_interval_extra + extra;
611
612 if (rto < tcp->tcp_rto_min) {
613 rto = tcp->tcp_rto_min;
614 } else if (rto > tcp->tcp_rto_max) {
615 rto = tcp->tcp_rto_max;
616 }
617
618 return (rto);
619 }
620
621 extern struct qinit tcp_rinitv4, tcp_rinitv6;
622 extern boolean_t do_tcp_fusion;
623
624 /*
625 * Object to represent database of options to search passed to
626 * {sock,tpi}optcom_req() interface routine to take care of option
627 * management and associated methods.
628 */
629 extern optdb_obj_t tcp_opt_obj;
630 extern uint_t tcp_max_optsize;
631
632 extern int tcp_squeue_flag;
633
634 extern uint_t tcp_free_list_max_cnt;
635
636 /*
637 * Functions in tcp.c.
638 */
639 extern void tcp_acceptor_hash_insert(t_uscalar_t, tcp_t *);
640 extern tcp_t *tcp_acceptor_hash_lookup(t_uscalar_t, tcp_stack_t *);
|