6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, Joyent Inc. All rights reserved.
25 * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright (c) 2013,2014 by Delphix. All rights reserved.
27 * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
28 */
29 /* Copyright (c) 1990 Mentat Inc. */
30
31 #include <sys/types.h>
32 #include <sys/stream.h>
33 #include <sys/strsun.h>
34 #include <sys/strsubr.h>
35 #include <sys/stropts.h>
36 #include <sys/strlog.h>
37 #define _SUN_TPI_VERSION 2
38 #include <sys/tihdr.h>
39 #include <sys/timod.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/suntpi.h>
43 #include <sys/xti_inet.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/sdt.h>
249 ((uint_t)(accid) & (TCP_ACCEPTOR_FANOUT_SIZE - 1))
250 #endif /* _ILP32 */
251
252 /*
253 * Minimum number of connections which can be created per listener. Used
254 * when the listener connection count is in effect.
255 */
256 static uint32_t tcp_min_conn_listener = 2;
257
258 uint32_t tcp_early_abort = 30;
259
260 /* TCP Timer control structure */
261 typedef struct tcpt_s {
262 pfv_t tcpt_pfv; /* The routine we are to call */
263 tcp_t *tcpt_tcp; /* The parameter we are to pass in */
264 } tcpt_t;
265
266 /*
267 * Functions called directly via squeue having a prototype of edesc_t.
268 */
269 void tcp_input_listener(void *arg, mblk_t *mp, void *arg2,
270 ip_recv_attr_t *ira);
271 void tcp_input_data(void *arg, mblk_t *mp, void *arg2,
272 ip_recv_attr_t *ira);
273 static void tcp_linger_interrupted(void *arg, mblk_t *mp, void *arg2,
274 ip_recv_attr_t *dummy);
275
276
277 /* Prototype for TCP functions */
278 static void tcp_random_init(void);
279 int tcp_random(void);
280 static int tcp_connect_ipv4(tcp_t *tcp, ipaddr_t *dstaddrp,
281 in_port_t dstport, uint_t srcid);
282 static int tcp_connect_ipv6(tcp_t *tcp, in6_addr_t *dstaddrp,
283 in_port_t dstport, uint32_t flowinfo,
284 uint_t srcid, uint32_t scope_id);
285 static void tcp_iss_init(tcp_t *tcp);
286 static void tcp_reinit(tcp_t *tcp);
287 static void tcp_reinit_values(tcp_t *tcp);
288
289 static int tcp_wsrv(queue_t *q);
290 static void tcp_update_lso(tcp_t *tcp, ip_xmit_attr_t *ixa);
623 */
624 flags |= IPDF_UNIQUE_DCE;
625
626 if (!tcps->tcps_ignore_path_mtu)
627 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
628
629 /* Use conn_lock to satify ASSERT; tcp is already serialized */
630 mutex_enter(&connp->conn_lock);
631 error = conn_connect(connp, &uinfo, flags);
632 mutex_exit(&connp->conn_lock);
633 if (error != 0)
634 return (error);
635
636 error = tcp_build_hdrs(tcp);
637 if (error != 0)
638 return (error);
639
640 tcp->tcp_localnet = uinfo.iulp_localnet;
641
642 if (uinfo.iulp_rtt != 0) {
643 clock_t rto;
644
645 tcp->tcp_rtt_sa = uinfo.iulp_rtt;
646 tcp->tcp_rtt_sd = uinfo.iulp_rtt_sd;
647 rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
648 tcps->tcps_rexmit_interval_extra +
649 (tcp->tcp_rtt_sa >> 5);
650
651 TCP_SET_RTO(tcp, rto);
652 }
653 if (uinfo.iulp_ssthresh != 0)
654 tcp->tcp_cwnd_ssthresh = uinfo.iulp_ssthresh;
655 else
656 tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN;
657 if (uinfo.iulp_spipe > 0) {
658 connp->conn_sndbuf = MIN(uinfo.iulp_spipe,
659 tcps->tcps_max_buf);
660 if (tcps->tcps_snd_lowat_fraction != 0) {
661 connp->conn_sndlowat = connp->conn_sndbuf /
662 tcps->tcps_snd_lowat_fraction;
663 }
664 (void) tcp_maxpsz_set(tcp, B_TRUE);
665 }
666 /*
667 * Note that up till now, acceptor always inherits receive
668 * window from the listener. But if there is a metrics
669 * associated with a host, we should use that instead of
670 * inheriting it from listener. Thus we need to pass this
671 * info back to the caller.
2317 #endif
2318
2319 PRESERVE(tcp->tcp_connid);
2320
2321 ASSERT(tcp->tcp_listen_cnt == NULL);
2322 ASSERT(tcp->tcp_reass_tid == 0);
2323
2324 #undef DONTCARE
2325 #undef PRESERVE
2326 }
2327
2328 /*
2329 * Initialize the various fields in tcp_t. If parent (the listener) is non
2330 * NULL, certain values will be inheritted from it.
2331 */
2332 void
2333 tcp_init_values(tcp_t *tcp, tcp_t *parent)
2334 {
2335 tcp_stack_t *tcps = tcp->tcp_tcps;
2336 conn_t *connp = tcp->tcp_connp;
2337 clock_t rto;
2338
2339 ASSERT((connp->conn_family == AF_INET &&
2340 connp->conn_ipversion == IPV4_VERSION) ||
2341 (connp->conn_family == AF_INET6 &&
2342 (connp->conn_ipversion == IPV4_VERSION ||
2343 connp->conn_ipversion == IPV6_VERSION)));
2344
2345 if (parent == NULL) {
2346 tcp->tcp_naglim = tcps->tcps_naglim_def;
2347
2348 tcp->tcp_rto_initial = tcps->tcps_rexmit_interval_initial;
2349 tcp->tcp_rto_min = tcps->tcps_rexmit_interval_min;
2350 tcp->tcp_rto_max = tcps->tcps_rexmit_interval_max;
2351
2352 tcp->tcp_first_ctimer_threshold =
2353 tcps->tcps_ip_notify_cinterval;
2354 tcp->tcp_second_ctimer_threshold =
2355 tcps->tcps_ip_abort_cinterval;
2356 tcp->tcp_first_timer_threshold = tcps->tcps_ip_notify_interval;
2357 tcp->tcp_second_timer_threshold = tcps->tcps_ip_abort_interval;
2386 parent->tcp_second_timer_threshold;
2387
2388 tcp->tcp_fin_wait_2_flush_interval =
2389 parent->tcp_fin_wait_2_flush_interval;
2390
2391 tcp->tcp_ka_interval = parent->tcp_ka_interval;
2392 tcp->tcp_ka_abort_thres = parent->tcp_ka_abort_thres;
2393 tcp->tcp_ka_cnt = parent->tcp_ka_cnt;
2394 tcp->tcp_ka_rinterval = parent->tcp_ka_rinterval;
2395
2396 tcp->tcp_init_cwnd = parent->tcp_init_cwnd;
2397 }
2398
2399 /*
2400 * Initialize tcp_rtt_sa and tcp_rtt_sd so that the calculated RTO
2401 * will be close to tcp_rexmit_interval_initial. By doing this, we
2402 * allow the algorithm to adjust slowly to large fluctuations of RTT
2403 * during first few transmissions of a connection as seen in slow
2404 * links.
2405 */
2406 tcp->tcp_rtt_sa = tcp->tcp_rto_initial << 2;
2407 tcp->tcp_rtt_sd = tcp->tcp_rto_initial >> 1;
2408 rto = (tcp->tcp_rtt_sa >> 3) + tcp->tcp_rtt_sd +
2409 tcps->tcps_rexmit_interval_extra + (tcp->tcp_rtt_sa >> 5) +
2410 tcps->tcps_conn_grace_period;
2411 TCP_SET_RTO(tcp, rto);
2412
2413 tcp->tcp_timer_backoff = 0;
2414 tcp->tcp_ms_we_have_waited = 0;
2415 tcp->tcp_last_recv_time = ddi_get_lbolt();
2416 tcp->tcp_cwnd_max = tcps->tcps_cwnd_max_;
2417 tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN;
2418
2419 tcp->tcp_maxpsz_multiplier = tcps->tcps_maxpsz_multiplier;
2420
2421 /* NOTE: ISS is now set in tcp_set_destination(). */
2422
2423 /* Reset fusion-related fields */
2424 tcp->tcp_fused = B_FALSE;
2425 tcp->tcp_unfusable = B_FALSE;
2426 tcp->tcp_fused_sigurg = B_FALSE;
2427 tcp->tcp_loopback_peer = NULL;
2428
2429 /* We rebuild the header template on the next connect/conn_request */
2430
2431 connp->conn_mlp_type = mlptSingle;
|
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2011, Joyent Inc. All rights reserved.
25 * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
26 * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
27 * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved.
28 */
29 /* Copyright (c) 1990 Mentat Inc. */
30
31 #include <sys/types.h>
32 #include <sys/stream.h>
33 #include <sys/strsun.h>
34 #include <sys/strsubr.h>
35 #include <sys/stropts.h>
36 #include <sys/strlog.h>
37 #define _SUN_TPI_VERSION 2
38 #include <sys/tihdr.h>
39 #include <sys/timod.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/suntpi.h>
43 #include <sys/xti_inet.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/sdt.h>
249 ((uint_t)(accid) & (TCP_ACCEPTOR_FANOUT_SIZE - 1))
250 #endif /* _ILP32 */
251
252 /*
253 * Minimum number of connections which can be created per listener. Used
254 * when the listener connection count is in effect.
255 */
256 static uint32_t tcp_min_conn_listener = 2;
257
258 uint32_t tcp_early_abort = 30;
259
260 /* TCP Timer control structure */
261 typedef struct tcpt_s {
262 pfv_t tcpt_pfv; /* The routine we are to call */
263 tcp_t *tcpt_tcp; /* The parameter we are to pass in */
264 } tcpt_t;
265
266 /*
267 * Functions called directly via squeue having a prototype of edesc_t.
268 */
269 void tcp_input_data(void *arg, mblk_t *mp, void *arg2,
270 ip_recv_attr_t *ira);
271 static void tcp_linger_interrupted(void *arg, mblk_t *mp, void *arg2,
272 ip_recv_attr_t *dummy);
273
274
275 /* Prototype for TCP functions */
276 static void tcp_random_init(void);
277 int tcp_random(void);
278 static int tcp_connect_ipv4(tcp_t *tcp, ipaddr_t *dstaddrp,
279 in_port_t dstport, uint_t srcid);
280 static int tcp_connect_ipv6(tcp_t *tcp, in6_addr_t *dstaddrp,
281 in_port_t dstport, uint32_t flowinfo,
282 uint_t srcid, uint32_t scope_id);
283 static void tcp_iss_init(tcp_t *tcp);
284 static void tcp_reinit(tcp_t *tcp);
285 static void tcp_reinit_values(tcp_t *tcp);
286
287 static int tcp_wsrv(queue_t *q);
288 static void tcp_update_lso(tcp_t *tcp, ip_xmit_attr_t *ixa);
621 */
622 flags |= IPDF_UNIQUE_DCE;
623
624 if (!tcps->tcps_ignore_path_mtu)
625 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
626
627 /* Use conn_lock to satify ASSERT; tcp is already serialized */
628 mutex_enter(&connp->conn_lock);
629 error = conn_connect(connp, &uinfo, flags);
630 mutex_exit(&connp->conn_lock);
631 if (error != 0)
632 return (error);
633
634 error = tcp_build_hdrs(tcp);
635 if (error != 0)
636 return (error);
637
638 tcp->tcp_localnet = uinfo.iulp_localnet;
639
640 if (uinfo.iulp_rtt != 0) {
641 tcp->tcp_rtt_sa = MSEC2NSEC(uinfo.iulp_rtt);
642 tcp->tcp_rtt_sd = MSEC2NSEC(uinfo.iulp_rtt_sd);
643 tcp->tcp_rto = tcp_calculate_rto(tcp, tcps, 0);
644 }
645 if (uinfo.iulp_ssthresh != 0)
646 tcp->tcp_cwnd_ssthresh = uinfo.iulp_ssthresh;
647 else
648 tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN;
649 if (uinfo.iulp_spipe > 0) {
650 connp->conn_sndbuf = MIN(uinfo.iulp_spipe,
651 tcps->tcps_max_buf);
652 if (tcps->tcps_snd_lowat_fraction != 0) {
653 connp->conn_sndlowat = connp->conn_sndbuf /
654 tcps->tcps_snd_lowat_fraction;
655 }
656 (void) tcp_maxpsz_set(tcp, B_TRUE);
657 }
658 /*
659 * Note that up till now, acceptor always inherits receive
660 * window from the listener. But if there is a metrics
661 * associated with a host, we should use that instead of
662 * inheriting it from listener. Thus we need to pass this
663 * info back to the caller.
2309 #endif
2310
2311 PRESERVE(tcp->tcp_connid);
2312
2313 ASSERT(tcp->tcp_listen_cnt == NULL);
2314 ASSERT(tcp->tcp_reass_tid == 0);
2315
2316 #undef DONTCARE
2317 #undef PRESERVE
2318 }
2319
2320 /*
2321 * Initialize the various fields in tcp_t. If parent (the listener) is non
2322 * NULL, certain values will be inheritted from it.
2323 */
2324 void
2325 tcp_init_values(tcp_t *tcp, tcp_t *parent)
2326 {
2327 tcp_stack_t *tcps = tcp->tcp_tcps;
2328 conn_t *connp = tcp->tcp_connp;
2329
2330 ASSERT((connp->conn_family == AF_INET &&
2331 connp->conn_ipversion == IPV4_VERSION) ||
2332 (connp->conn_family == AF_INET6 &&
2333 (connp->conn_ipversion == IPV4_VERSION ||
2334 connp->conn_ipversion == IPV6_VERSION)));
2335
2336 if (parent == NULL) {
2337 tcp->tcp_naglim = tcps->tcps_naglim_def;
2338
2339 tcp->tcp_rto_initial = tcps->tcps_rexmit_interval_initial;
2340 tcp->tcp_rto_min = tcps->tcps_rexmit_interval_min;
2341 tcp->tcp_rto_max = tcps->tcps_rexmit_interval_max;
2342
2343 tcp->tcp_first_ctimer_threshold =
2344 tcps->tcps_ip_notify_cinterval;
2345 tcp->tcp_second_ctimer_threshold =
2346 tcps->tcps_ip_abort_cinterval;
2347 tcp->tcp_first_timer_threshold = tcps->tcps_ip_notify_interval;
2348 tcp->tcp_second_timer_threshold = tcps->tcps_ip_abort_interval;
2377 parent->tcp_second_timer_threshold;
2378
2379 tcp->tcp_fin_wait_2_flush_interval =
2380 parent->tcp_fin_wait_2_flush_interval;
2381
2382 tcp->tcp_ka_interval = parent->tcp_ka_interval;
2383 tcp->tcp_ka_abort_thres = parent->tcp_ka_abort_thres;
2384 tcp->tcp_ka_cnt = parent->tcp_ka_cnt;
2385 tcp->tcp_ka_rinterval = parent->tcp_ka_rinterval;
2386
2387 tcp->tcp_init_cwnd = parent->tcp_init_cwnd;
2388 }
2389
2390 /*
2391 * Initialize tcp_rtt_sa and tcp_rtt_sd so that the calculated RTO
2392 * will be close to tcp_rexmit_interval_initial. By doing this, we
2393 * allow the algorithm to adjust slowly to large fluctuations of RTT
2394 * during first few transmissions of a connection as seen in slow
2395 * links.
2396 */
2397 tcp->tcp_rtt_sa = MSEC2NSEC(tcp->tcp_rto_initial) << 2;
2398 tcp->tcp_rtt_sd = MSEC2NSEC(tcp->tcp_rto_initial) >> 1;
2399 tcp->tcp_rto = tcp_calculate_rto(tcp, tcps,
2400 tcps->tcps_conn_grace_period);
2401
2402 tcp->tcp_timer_backoff = 0;
2403 tcp->tcp_ms_we_have_waited = 0;
2404 tcp->tcp_last_recv_time = ddi_get_lbolt();
2405 tcp->tcp_cwnd_max = tcps->tcps_cwnd_max_;
2406 tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN;
2407
2408 tcp->tcp_maxpsz_multiplier = tcps->tcps_maxpsz_multiplier;
2409
2410 /* NOTE: ISS is now set in tcp_set_destination(). */
2411
2412 /* Reset fusion-related fields */
2413 tcp->tcp_fused = B_FALSE;
2414 tcp->tcp_unfusable = B_FALSE;
2415 tcp->tcp_fused_sigurg = B_FALSE;
2416 tcp->tcp_loopback_peer = NULL;
2417
2418 /* We rebuild the header template on the next connect/conn_request */
2419
2420 connp->conn_mlp_type = mlptSingle;
|