Print this page
11554 Want TCP_CONGESTION socket option
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/tcp/tcp_opt_data.c
+++ new/usr/src/uts/common/inet/tcp/tcp_opt_data.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
24 - * Copyright 2016 Joyent, Inc.
24 + * Copyright 2019 Joyent, Inc.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 26 */
27 27
28 28 #include <sys/types.h>
29 29 #include <sys/stream.h>
30 30 #define _SUN_TPI_VERSION 2
31 31 #include <sys/tihdr.h>
32 32 #include <sys/socket.h>
33 33 #include <sys/xti_xtiopt.h>
34 34 #include <sys/xti_inet.h>
35 35 #include <sys/policy.h>
36 36
37 +#include <inet/cc.h>
37 38 #include <inet/common.h>
38 39 #include <netinet/ip6.h>
39 40 #include <inet/ip.h>
40 41
41 42 #include <netinet/in.h>
42 43 #include <netinet/tcp.h>
43 44 #include <inet/optcom.h>
44 45 #include <inet/proto_set.h>
45 46 #include <inet/tcp_impl.h>
46 47
47 48 static int tcp_opt_default(queue_t *, int, int, uchar_t *);
48 49
49 50 /*
50 51 * Table of all known options handled on a TCP protocol stack.
51 52 *
52 53 * Note: This table contains options processed by both TCP and IP levels
53 54 * and is the superset of options that can be performed on a TCP over IP
54 55 * stack.
55 56 */
56 57 opdes_t tcp_opt_arr[] = {
57 58
58 59 { SO_LINGER, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
59 60 sizeof (struct linger), 0 },
60 61
61 62 { SO_DEBUG, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
62 63 { SO_KEEPALIVE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
63 64 { SO_DONTROUTE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
64 65 { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
65 66 },
66 67 { SO_BROADCAST, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
67 68 { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
68 69 { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
69 70 { SO_TYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
70 71 { SO_SNDBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
71 72 { SO_RCVBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
72 73 { SO_SNDTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
73 74 sizeof (struct timeval), 0 },
74 75 { SO_RCVTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
75 76 sizeof (struct timeval), 0 },
76 77 { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
77 78 },
78 79 { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
79 80 { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
80 81 0 },
81 82 { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
82 83 0 },
83 84 { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
84 85 0 },
85 86 { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
86 87 0 },
87 88 { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
88 89
89 90 { SO_DOMAIN, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
90 91
91 92 { SO_PROTOTYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
92 93
93 94 { TCP_NODELAY, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
94 95 },
95 96 { TCP_MAXSEG, IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t),
96 97 536 },
97 98
98 99 { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
99 100 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
100 101
101 102 { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
102 103 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
103 104
104 105 { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
105 106 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
106 107
107 108 { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
108 109 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
109 110
110 111 { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
111 112 0 },
112 113
113 114 { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0,
114 115 sizeof (int), 0 },
115 116
116 117 { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
117 118 },
118 119
119 120 { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0,
120 121 sizeof (int), 0 },
121 122
122 123 { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
123 124 sizeof (int), 0 },
124 125
125 126 { TCP_KEEPIDLE, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
126 127
127 128 { TCP_KEEPCNT, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
128 129
129 130 { TCP_KEEPINTVL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
130 131
131 132 { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
132 133 sizeof (int), 0 },
133 134
↓ open down ↓ |
87 lines elided |
↑ open up ↑ |
134 135 { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
135 136
136 137 { TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
137 138
138 139 { TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
139 140
140 141 { TCP_RTO_MAX, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
141 142
142 143 { TCP_LINGER2, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
143 144
145 +{ TCP_CONGESTION, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
146 + OP_VARLEN, CC_ALGO_NAME_MAX, 0 },
147 +
144 148 { IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
145 149 (OP_VARLEN|OP_NODEFAULT),
146 150 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
147 151 { T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
148 152 (OP_VARLEN|OP_NODEFAULT),
149 153 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
150 154
151 155 { IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
152 156 { T_IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
153 157 { IP_TTL, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
154 158 sizeof (int), -1 /* not initialized */ },
155 159
156 160 { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
157 161 sizeof (ipsec_req_t), -1 /* not initialized */ },
158 162
159 163 { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0,
160 164 sizeof (int), 0 /* no ifindex */ },
161 165
162 166 { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0,
163 167 sizeof (int), 0 },
164 168
165 169 { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
166 170 sizeof (int), -1 /* not initialized */ },
167 171
168 172 { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
169 173 sizeof (int), 0 /* no ifindex */ },
170 174
171 175 { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
172 176
173 177 { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0,
174 178 sizeof (in_addr_t), -1 /* not initialized */ },
175 179
176 180 { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0,
177 181 sizeof (int), 0 },
178 182
179 183 { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
180 184 (OP_NODEFAULT|OP_VARLEN),
181 185 sizeof (struct in6_pktinfo), -1 /* not initialized */ },
182 186 { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
183 187 OP_NODEFAULT,
184 188 sizeof (sin6_t), -1 /* not initialized */ },
185 189 { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
186 190 (OP_VARLEN|OP_NODEFAULT), 255*8,
187 191 -1 /* not initialized */ },
188 192 { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
189 193 (OP_VARLEN|OP_NODEFAULT), 255*8,
190 194 -1 /* not initialized */ },
191 195 { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
192 196 (OP_VARLEN|OP_NODEFAULT), 255*8,
193 197 -1 /* not initialized */ },
194 198 { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
195 199 (OP_VARLEN|OP_NODEFAULT), 255*8,
196 200 -1 /* not initialized */ },
197 201 { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
198 202 OP_NODEFAULT,
199 203 sizeof (int), -1 /* not initialized */ },
200 204 { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
201 205 OP_NODEFAULT,
202 206 sizeof (struct ip6_mtuinfo), -1 /* not initialized */ },
203 207 { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
204 208 sizeof (int), 0 },
205 209 { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
206 210 sizeof (int), 0 },
207 211 { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
208 212 sizeof (int), 0 },
209 213
210 214 /* Enable receipt of ancillary data */
211 215 { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
212 216 sizeof (int), 0 },
213 217 { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
214 218 sizeof (int), 0 },
215 219 { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
216 220 sizeof (int), 0 },
217 221 { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
218 222 sizeof (int), 0 },
219 223 { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
220 224 sizeof (int), 0 },
221 225 { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
222 226 sizeof (int), 0 },
223 227 { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
224 228 sizeof (int), 0 },
225 229 { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
226 230 sizeof (int), 0 },
227 231
228 232 { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
229 233 sizeof (ipsec_req_t), -1 /* not initialized */ },
230 234 { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
231 235 sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT },
232 236 };
233 237
234 238 /*
235 239 * Table of all supported levels
236 240 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
237 241 * any supported options so we need this info separately.
238 242 *
239 243 * This is needed only for topmost tpi providers and is used only by
240 244 * XTI interfaces.
241 245 */
242 246 optlevel_t tcp_valid_levels_arr[] = {
243 247 XTI_GENERIC,
244 248 SOL_SOCKET,
245 249 IPPROTO_TCP,
246 250 IPPROTO_IP,
247 251 IPPROTO_IPV6
248 252 };
249 253
250 254
251 255 #define TCP_OPT_ARR_CNT A_CNT(tcp_opt_arr)
252 256 #define TCP_VALID_LEVELS_CNT A_CNT(tcp_valid_levels_arr)
253 257
254 258 uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
255 259
256 260 /*
257 261 * Initialize option database object for TCP
258 262 *
259 263 * This object represents database of options to search passed to
260 264 * {sock,tpi}optcom_req() interface routine to take care of option
261 265 * management and associated methods.
262 266 */
263 267
264 268 optdb_obj_t tcp_opt_obj = {
265 269 tcp_opt_default, /* TCP default value function pointer */
266 270 tcp_tpi_opt_get, /* TCP get function pointer */
267 271 tcp_tpi_opt_set, /* TCP set function pointer */
268 272 TCP_OPT_ARR_CNT, /* TCP option database count of entries */
269 273 tcp_opt_arr, /* TCP option database */
270 274 TCP_VALID_LEVELS_CNT, /* TCP valid level count of entries */
271 275 tcp_valid_levels_arr /* TCP valid level array */
272 276 };
273 277
274 278 static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND;
275 279
276 280 /*
277 281 * Some TCP options can be "set" by requesting them in the option
278 282 * buffer. This is needed for XTI feature test though we do not
279 283 * allow it in general. We interpret that this mechanism is more
280 284 * applicable to OSI protocols and need not be allowed in general.
281 285 * This routine filters out options for which it is not allowed (most)
282 286 * and lets through those (few) for which it is. [ The XTI interface
283 287 * test suite specifics will imply that any XTI_GENERIC level XTI_* if
284 288 * ever implemented will have to be allowed here ].
285 289 */
286 290 static boolean_t
287 291 tcp_allow_connopt_set(int level, int name)
288 292 {
289 293
290 294 switch (level) {
291 295 case IPPROTO_TCP:
292 296 switch (name) {
293 297 case TCP_NODELAY:
294 298 return (B_TRUE);
295 299 default:
296 300 return (B_FALSE);
297 301 }
298 302 /*NOTREACHED*/
299 303 default:
300 304 return (B_FALSE);
301 305 }
302 306 /*NOTREACHED*/
303 307 }
304 308
305 309 /*
306 310 * This routine gets default values of certain options whose default
307 311 * values are maintained by protocol specific code
308 312 */
309 313 /* ARGSUSED */
310 314 static int
311 315 tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
312 316 {
313 317 int32_t *i1 = (int32_t *)ptr;
314 318 tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps;
315 319
316 320 switch (level) {
317 321 case IPPROTO_TCP:
318 322 switch (name) {
319 323 case TCP_NOTIFY_THRESHOLD:
320 324 *i1 = tcps->tcps_ip_notify_interval;
321 325 break;
322 326 case TCP_ABORT_THRESHOLD:
323 327 *i1 = tcps->tcps_ip_abort_interval;
324 328 break;
325 329 case TCP_CONN_NOTIFY_THRESHOLD:
326 330 *i1 = tcps->tcps_ip_notify_cinterval;
327 331 break;
328 332 case TCP_CONN_ABORT_THRESHOLD:
329 333 *i1 = tcps->tcps_ip_abort_cinterval;
330 334 break;
331 335 default:
332 336 return (-1);
333 337 }
334 338 break;
335 339 case IPPROTO_IP:
336 340 switch (name) {
337 341 case IP_TTL:
338 342 *i1 = tcps->tcps_ipv4_ttl;
339 343 break;
340 344 default:
341 345 return (-1);
342 346 }
343 347 break;
344 348 case IPPROTO_IPV6:
345 349 switch (name) {
346 350 case IPV6_UNICAST_HOPS:
347 351 *i1 = tcps->tcps_ipv6_hoplimit;
348 352 break;
349 353 default:
350 354 return (-1);
351 355 }
352 356 break;
353 357 default:
354 358 return (-1);
355 359 }
356 360 return (sizeof (int));
357 361 }
358 362
359 363 /*
360 364 * TCP routine to get the values of options.
361 365 */
362 366 int
363 367 tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
364 368 {
365 369 int *i1 = (int *)ptr;
366 370 tcp_t *tcp = connp->conn_tcp;
367 371 conn_opt_arg_t coas;
368 372 int retval;
369 373
370 374 coas.coa_connp = connp;
371 375 coas.coa_ixa = connp->conn_ixa;
372 376 coas.coa_ipp = &connp->conn_xmit_ipp;
373 377 coas.coa_ancillary = B_FALSE;
374 378 coas.coa_changed = 0;
375 379
376 380 switch (level) {
377 381 case SOL_SOCKET:
378 382 switch (name) {
379 383 case SO_SND_COPYAVOID:
380 384 *i1 = tcp->tcp_snd_zcopy_on ?
381 385 SO_SND_COPYAVOID : 0;
382 386 return (sizeof (int));
383 387 case SO_ACCEPTCONN:
384 388 *i1 = (tcp->tcp_state == TCPS_LISTEN);
385 389 return (sizeof (int));
386 390 }
387 391 break;
388 392 case IPPROTO_TCP:
389 393 switch (name) {
390 394 case TCP_NODELAY:
391 395 *i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
392 396 return (sizeof (int));
393 397 case TCP_MAXSEG:
394 398 *i1 = tcp->tcp_mss;
395 399 return (sizeof (int));
396 400 case TCP_NOTIFY_THRESHOLD:
397 401 *i1 = (int)tcp->tcp_first_timer_threshold;
398 402 return (sizeof (int));
399 403 case TCP_ABORT_THRESHOLD:
400 404 *i1 = tcp->tcp_second_timer_threshold;
401 405 return (sizeof (int));
402 406 case TCP_CONN_NOTIFY_THRESHOLD:
403 407 *i1 = tcp->tcp_first_ctimer_threshold;
404 408 return (sizeof (int));
405 409 case TCP_CONN_ABORT_THRESHOLD:
406 410 *i1 = tcp->tcp_second_ctimer_threshold;
407 411 return (sizeof (int));
408 412 case TCP_INIT_CWND:
409 413 *i1 = tcp->tcp_init_cwnd;
410 414 return (sizeof (int));
411 415 case TCP_KEEPALIVE_THRESHOLD:
412 416 *i1 = tcp->tcp_ka_interval;
413 417 return (sizeof (int));
414 418
415 419 /*
416 420 * TCP_KEEPIDLE expects value in seconds, but
417 421 * tcp_ka_interval is in milliseconds.
418 422 */
419 423 case TCP_KEEPIDLE:
420 424 *i1 = tcp->tcp_ka_interval / 1000;
421 425 return (sizeof (int));
422 426 case TCP_KEEPCNT:
423 427 *i1 = tcp->tcp_ka_cnt;
424 428 return (sizeof (int));
425 429
↓ open down ↓ |
272 lines elided |
↑ open up ↑ |
426 430 /*
427 431 * TCP_KEEPINTVL expects value in seconds, but
428 432 * tcp_ka_rinterval is in milliseconds.
429 433 */
430 434 case TCP_KEEPINTVL:
431 435 *i1 = tcp->tcp_ka_rinterval / 1000;
432 436 return (sizeof (int));
433 437 case TCP_KEEPALIVE_ABORT_THRESHOLD:
434 438 *i1 = tcp->tcp_ka_abort_thres;
435 439 return (sizeof (int));
440 + case TCP_CONGESTION: {
441 + size_t len = strlcpy((char *)ptr, CC_ALGO(tcp)->name,
442 + CC_ALGO_NAME_MAX);
443 + if (len >= CC_ALGO_NAME_MAX)
444 + return (-1);
445 + return (len + 1);
446 + }
436 447 case TCP_CORK:
437 448 *i1 = tcp->tcp_cork;
438 449 return (sizeof (int));
439 450 case TCP_RTO_INITIAL:
440 451 *i1 = tcp->tcp_rto_initial;
441 452 return (sizeof (uint32_t));
442 453 case TCP_RTO_MIN:
443 454 *i1 = tcp->tcp_rto_min;
444 455 return (sizeof (uint32_t));
445 456 case TCP_RTO_MAX:
446 457 *i1 = tcp->tcp_rto_max;
447 458 return (sizeof (uint32_t));
448 459 case TCP_LINGER2:
449 460 *i1 = tcp->tcp_fin_wait_2_flush_interval / SECONDS;
450 461 return (sizeof (int));
451 462 }
452 463 break;
453 464 case IPPROTO_IP:
454 465 if (connp->conn_family != AF_INET)
455 466 return (-1);
456 467 switch (name) {
457 468 case IP_OPTIONS:
458 469 case T_IP_OPTIONS:
459 470 /* Caller ensures enough space */
460 471 return (ip_opt_get_user(connp, ptr));
461 472 default:
462 473 break;
463 474 }
464 475 break;
465 476
466 477 case IPPROTO_IPV6:
467 478 /*
468 479 * IPPROTO_IPV6 options are only supported for sockets
469 480 * that are using IPv6 on the wire.
470 481 */
471 482 if (connp->conn_ipversion != IPV6_VERSION) {
472 483 return (-1);
473 484 }
474 485 switch (name) {
475 486 case IPV6_PATHMTU:
476 487 if (tcp->tcp_state < TCPS_ESTABLISHED)
477 488 return (-1);
478 489 break;
479 490 }
480 491 break;
481 492 }
482 493 mutex_enter(&connp->conn_lock);
483 494 retval = conn_opt_get(&coas, level, name, ptr);
484 495 mutex_exit(&connp->conn_lock);
485 496 return (retval);
486 497 }
487 498
488 499 /*
489 500 * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
490 501 * Parameters are assumed to be verified by the caller.
491 502 */
492 503 /* ARGSUSED */
493 504 int
494 505 tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
495 506 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
496 507 void *thisdg_attrs, cred_t *cr)
497 508 {
498 509 tcp_t *tcp = connp->conn_tcp;
499 510 int *i1 = (int *)invalp;
500 511 boolean_t onoff = (*i1 == 0) ? 0 : 1;
501 512 boolean_t checkonly;
502 513 int reterr;
503 514 tcp_stack_t *tcps = tcp->tcp_tcps;
504 515 conn_opt_arg_t coas;
505 516 uint32_t val = *((uint32_t *)invalp);
506 517
507 518 coas.coa_connp = connp;
508 519 coas.coa_ixa = connp->conn_ixa;
509 520 coas.coa_ipp = &connp->conn_xmit_ipp;
510 521 coas.coa_ancillary = B_FALSE;
511 522 coas.coa_changed = 0;
512 523
513 524 switch (optset_context) {
514 525 case SETFN_OPTCOM_CHECKONLY:
515 526 checkonly = B_TRUE;
516 527 /*
517 528 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
518 529 * inlen != 0 implies value supplied and
519 530 * we have to "pretend" to set it.
520 531 * inlen == 0 implies that there is no
521 532 * value part in T_CHECK request and just validation
522 533 * done elsewhere should be enough, we just return here.
523 534 */
524 535 if (inlen == 0) {
525 536 *outlenp = 0;
526 537 return (0);
527 538 }
528 539 break;
529 540 case SETFN_OPTCOM_NEGOTIATE:
530 541 checkonly = B_FALSE;
531 542 break;
532 543 case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */
533 544 case SETFN_CONN_NEGOTIATE:
534 545 checkonly = B_FALSE;
535 546 /*
536 547 * Negotiating local and "association-related" options
537 548 * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ)
538 549 * primitives is allowed by XTI, but we choose
539 550 * to not implement this style negotiation for Internet
540 551 * protocols (We interpret it is a must for OSI world but
541 552 * optional for Internet protocols) for all options.
542 553 * [ Will do only for the few options that enable test
543 554 * suites that our XTI implementation of this feature
544 555 * works for transports that do allow it ]
545 556 */
546 557 if (!tcp_allow_connopt_set(level, name)) {
547 558 *outlenp = 0;
548 559 return (EINVAL);
549 560 }
550 561 break;
551 562 default:
552 563 /*
553 564 * We should never get here
554 565 */
555 566 *outlenp = 0;
556 567 return (EINVAL);
557 568 }
558 569
559 570 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
560 571 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
561 572
562 573 /*
563 574 * For TCP, we should have no ancillary data sent down
564 575 * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs
565 576 * has to be zero.
566 577 */
567 578 ASSERT(thisdg_attrs == NULL);
568 579
569 580 /*
570 581 * For fixed length options, no sanity check
571 582 * of passed in length is done. It is assumed *_optcom_req()
572 583 * routines do the right thing.
573 584 */
574 585 switch (level) {
575 586 case SOL_SOCKET:
576 587 switch (name) {
577 588 case SO_KEEPALIVE:
578 589 if (checkonly) {
579 590 /* check only case */
580 591 break;
581 592 }
582 593
583 594 if (!onoff) {
584 595 if (connp->conn_keepalive) {
585 596 if (tcp->tcp_ka_tid != 0) {
586 597 (void) TCP_TIMER_CANCEL(tcp,
587 598 tcp->tcp_ka_tid);
588 599 tcp->tcp_ka_tid = 0;
589 600 }
590 601 connp->conn_keepalive = 0;
591 602 }
592 603 break;
593 604 }
594 605 if (!connp->conn_keepalive) {
595 606 /* Crank up the keepalive timer */
596 607 tcp->tcp_ka_last_intrvl = 0;
597 608 tcp->tcp_ka_tid = TCP_TIMER(tcp,
598 609 tcp_keepalive_timer, tcp->tcp_ka_interval);
599 610 connp->conn_keepalive = 1;
600 611 }
601 612 break;
602 613 case SO_SNDBUF: {
603 614 if (*i1 > tcps->tcps_max_buf) {
604 615 *outlenp = 0;
605 616 return (ENOBUFS);
606 617 }
607 618 if (checkonly)
608 619 break;
609 620
610 621 connp->conn_sndbuf = *i1;
611 622 if (tcps->tcps_snd_lowat_fraction != 0) {
612 623 connp->conn_sndlowat = connp->conn_sndbuf /
613 624 tcps->tcps_snd_lowat_fraction;
614 625 }
615 626 (void) tcp_maxpsz_set(tcp, B_TRUE);
616 627 /*
617 628 * If we are flow-controlled, recheck the condition.
618 629 * There are apps that increase SO_SNDBUF size when
619 630 * flow-controlled (EWOULDBLOCK), and expect the flow
620 631 * control condition to be lifted right away.
621 632 */
622 633 mutex_enter(&tcp->tcp_non_sq_lock);
623 634 if (tcp->tcp_flow_stopped &&
624 635 TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
625 636 tcp_clrqfull(tcp);
626 637 }
627 638 mutex_exit(&tcp->tcp_non_sq_lock);
628 639 *outlenp = inlen;
629 640 return (0);
630 641 }
631 642 case SO_RCVBUF:
632 643 if (*i1 > tcps->tcps_max_buf) {
633 644 *outlenp = 0;
634 645 return (ENOBUFS);
635 646 }
636 647 /* Silently ignore zero */
637 648 if (!checkonly && *i1 != 0) {
638 649 *i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss);
639 650 (void) tcp_rwnd_set(tcp, *i1);
640 651 }
641 652 /*
642 653 * XXX should we return the rwnd here
643 654 * and tcp_opt_get ?
644 655 */
645 656 *outlenp = inlen;
646 657 return (0);
647 658 case SO_SND_COPYAVOID:
648 659 if (!checkonly) {
649 660 if (tcp->tcp_loopback ||
650 661 (onoff != 1) || !tcp_zcopy_check(tcp)) {
651 662 *outlenp = 0;
652 663 return (EOPNOTSUPP);
653 664 }
654 665 tcp->tcp_snd_zcopy_aware = 1;
655 666 }
656 667 *outlenp = inlen;
657 668 return (0);
658 669 }
659 670 break;
660 671 case IPPROTO_TCP:
661 672 switch (name) {
662 673 case TCP_NODELAY:
663 674 if (!checkonly)
664 675 tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss;
665 676 break;
666 677 case TCP_NOTIFY_THRESHOLD:
667 678 if (!checkonly)
668 679 tcp->tcp_first_timer_threshold = *i1;
669 680 break;
670 681 case TCP_ABORT_THRESHOLD:
671 682 if (!checkonly)
672 683 tcp->tcp_second_timer_threshold = *i1;
673 684 break;
674 685 case TCP_CONN_NOTIFY_THRESHOLD:
675 686 if (!checkonly)
676 687 tcp->tcp_first_ctimer_threshold = *i1;
677 688 break;
678 689 case TCP_CONN_ABORT_THRESHOLD:
679 690 if (!checkonly)
680 691 tcp->tcp_second_ctimer_threshold = *i1;
681 692 break;
682 693 case TCP_RECVDSTADDR:
683 694 if (tcp->tcp_state > TCPS_LISTEN) {
684 695 *outlenp = 0;
685 696 return (EOPNOTSUPP);
686 697 }
687 698 /* Setting done in conn_opt_set */
688 699 break;
689 700 case TCP_INIT_CWND:
690 701 if (checkonly)
691 702 break;
692 703
693 704 /*
694 705 * Only allow socket with network configuration
695 706 * privilege to set the initial cwnd to be larger
696 707 * than allowed by RFC 3390.
697 708 */
698 709 if (val > MIN(4, MAX(2, 4380 / tcp->tcp_mss))) {
699 710 if ((reterr = secpolicy_ip_config(cr, B_TRUE))
700 711 != 0) {
701 712 *outlenp = 0;
702 713 return (reterr);
703 714 }
704 715 if (val > tcp_max_init_cwnd) {
705 716 *outlenp = 0;
706 717 return (EINVAL);
707 718 }
708 719 }
709 720
710 721 tcp->tcp_init_cwnd = val;
711 722
712 723 /*
713 724 * If the socket is connected, AND no outbound data
714 725 * has been sent, reset the actual cwnd values.
715 726 */
716 727 if (tcp->tcp_state == TCPS_ESTABLISHED &&
717 728 tcp->tcp_iss == tcp->tcp_snxt - 1) {
718 729 tcp->tcp_cwnd =
719 730 MIN(tcp->tcp_rwnd, val * tcp->tcp_mss);
720 731 }
721 732 break;
722 733
723 734 /*
724 735 * TCP_KEEPIDLE is in seconds but TCP_KEEPALIVE_THRESHOLD
725 736 * is in milliseconds. TCP_KEEPIDLE is introduced for
726 737 * compatibility with other Unix flavors.
727 738 * We can fall through TCP_KEEPALIVE_THRESHOLD logic after
728 739 * converting the input to milliseconds.
729 740 */
730 741 case TCP_KEEPIDLE:
731 742 *i1 *= 1000;
732 743 /* FALLTHRU */
733 744
734 745 case TCP_KEEPALIVE_THRESHOLD:
735 746 if (checkonly)
736 747 break;
737 748
738 749 if (*i1 < tcps->tcps_keepalive_interval_low ||
739 750 *i1 > tcps->tcps_keepalive_interval_high) {
740 751 *outlenp = 0;
741 752 return (EINVAL);
742 753 }
743 754 if (*i1 != tcp->tcp_ka_interval) {
744 755 tcp->tcp_ka_interval = *i1;
745 756 /*
746 757 * Check if we need to restart the
747 758 * keepalive timer.
748 759 */
749 760 if (tcp->tcp_ka_tid != 0) {
750 761 ASSERT(connp->conn_keepalive);
751 762 (void) TCP_TIMER_CANCEL(tcp,
752 763 tcp->tcp_ka_tid);
753 764 tcp->tcp_ka_last_intrvl = 0;
754 765 tcp->tcp_ka_tid = TCP_TIMER(tcp,
755 766 tcp_keepalive_timer,
756 767 tcp->tcp_ka_interval);
757 768 }
758 769 }
759 770 break;
760 771
761 772 /*
762 773 * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt.
763 774 * So setting TCP_KEEPCNT or TCP_KEEPINTVL can affect all the
764 775 * three members - tcp_ka_abort_thres, tcp_ka_rinterval and
765 776 * tcp_ka_cnt.
766 777 */
767 778 case TCP_KEEPCNT:
768 779 if (checkonly)
769 780 break;
770 781
771 782 if (*i1 == 0) {
772 783 return (EINVAL);
773 784 } else if (tcp->tcp_ka_rinterval == 0) {
774 785 /*
775 786 * When TCP_KEEPCNT is specified without first
776 787 * specifying a TCP_KEEPINTVL, we infer an
777 788 * interval based on a tunable specific to our
778 789 * stack: the tcp_keepalive_abort_interval.
779 790 * (Or the TCP_KEEPALIVE_ABORT_THRESHOLD, in
780 791 * the unlikely event that that has been set.)
781 792 * Given the abort interval's default value of
782 793 * 480 seconds, low TCP_KEEPCNT values can
783 794 * result in intervals that exceed the default
784 795 * maximum RTO of 60 seconds. Rather than
785 796 * fail in these cases, we (implicitly) clamp
786 797 * the interval at the maximum RTO; if the
787 798 * TCP_KEEPCNT is shortly followed by a
788 799 * TCP_KEEPINTVL (as we expect), the abort
789 800 * threshold will be recalculated correctly --
790 801 * and if a TCP_KEEPINTVL is not forthcoming,
791 802 * keep-alive will at least operate reasonably
792 803 * given the underconfigured state.
793 804 */
794 805 uint32_t interval;
795 806
796 807 interval = tcp->tcp_ka_abort_thres / *i1;
797 808
798 809 if (interval < tcp->tcp_rto_min)
799 810 interval = tcp->tcp_rto_min;
800 811
801 812 if (interval > tcp->tcp_rto_max)
802 813 interval = tcp->tcp_rto_max;
803 814
804 815 tcp->tcp_ka_rinterval = interval;
805 816 } else {
806 817 if ((*i1 * tcp->tcp_ka_rinterval) <
807 818 tcps->tcps_keepalive_abort_interval_low ||
808 819 (*i1 * tcp->tcp_ka_rinterval) >
809 820 tcps->tcps_keepalive_abort_interval_high)
810 821 return (EINVAL);
811 822 tcp->tcp_ka_abort_thres =
812 823 (*i1 * tcp->tcp_ka_rinterval);
813 824 }
814 825 tcp->tcp_ka_cnt = *i1;
815 826 break;
816 827 case TCP_KEEPINTVL:
817 828 /*
818 829 * TCP_KEEPINTVL is specified in seconds, but
819 830 * tcp_ka_rinterval is in milliseconds.
820 831 */
821 832
822 833 if (checkonly)
823 834 break;
824 835
825 836 if ((*i1 * 1000) < tcp->tcp_rto_min ||
826 837 (*i1 * 1000) > tcp->tcp_rto_max)
827 838 return (EINVAL);
828 839
829 840 if (tcp->tcp_ka_cnt == 0) {
830 841 tcp->tcp_ka_cnt =
831 842 tcp->tcp_ka_abort_thres / (*i1 * 1000);
832 843 } else {
833 844 if ((*i1 * tcp->tcp_ka_cnt * 1000) <
834 845 tcps->tcps_keepalive_abort_interval_low ||
835 846 (*i1 * tcp->tcp_ka_cnt * 1000) >
836 847 tcps->tcps_keepalive_abort_interval_high)
837 848 return (EINVAL);
838 849 tcp->tcp_ka_abort_thres =
839 850 (*i1 * tcp->tcp_ka_cnt * 1000);
840 851 }
841 852 tcp->tcp_ka_rinterval = *i1 * 1000;
842 853 break;
843 854 case TCP_KEEPALIVE_ABORT_THRESHOLD:
844 855 if (!checkonly) {
845 856 if (*i1 <
846 857 tcps->tcps_keepalive_abort_interval_low ||
↓ open down ↓ |
401 lines elided |
↑ open up ↑ |
847 858 *i1 >
848 859 tcps->tcps_keepalive_abort_interval_high) {
849 860 *outlenp = 0;
850 861 return (EINVAL);
851 862 }
852 863 tcp->tcp_ka_abort_thres = *i1;
853 864 tcp->tcp_ka_cnt = 0;
854 865 tcp->tcp_ka_rinterval = 0;
855 866 }
856 867 break;
868 + case TCP_CONGESTION: {
869 + struct cc_algo *algo;
870 +
871 + if (checkonly) {
872 + break;
873 + }
874 +
875 + /*
876 + * Make sure the string is NUL-terminated. Some
877 + * consumers pass only the number of characters
878 + * in the string, and don't include the NUL
879 + * terminator, so we set it for them.
880 + */
881 + if (inlen < CC_ALGO_NAME_MAX) {
882 + invalp[inlen] = '\0';
883 + }
884 + invalp[CC_ALGO_NAME_MAX - 1] = '\0';
885 +
886 + if ((algo = cc_load_algo((char *)invalp)) == NULL) {
887 + return (ENOENT);
888 + }
889 +
890 + if (CC_ALGO(tcp)->cb_destroy != NULL) {
891 + CC_ALGO(tcp)->cb_destroy(&tcp->tcp_ccv);
892 + }
893 +
894 + CC_DATA(tcp) = NULL;
895 + CC_ALGO(tcp) = algo;
896 +
897 + if (CC_ALGO(tcp)->cb_init != NULL) {
898 + VERIFY0(CC_ALGO(tcp)->cb_init(&tcp->tcp_ccv));
899 + }
900 +
901 + break;
902 + }
857 903 case TCP_CORK:
858 904 if (!checkonly) {
859 905 /*
860 906 * if tcp->tcp_cork was set and is now
861 907 * being unset, we have to make sure that
862 908 * the remaining data gets sent out. Also
863 909 * unset tcp->tcp_cork so that tcp_wput_data()
864 910 * can send data even if it is less than mss
865 911 */
866 912 if (tcp->tcp_cork && onoff == 0 &&
867 913 tcp->tcp_unsent > 0) {
868 914 tcp->tcp_cork = B_FALSE;
869 915 tcp_wput_data(tcp, NULL, B_FALSE);
870 916 }
871 917 tcp->tcp_cork = onoff;
872 918 }
873 919 break;
874 920 case TCP_RTO_INITIAL:
875 921 if (checkonly || val == 0)
876 922 break;
877 923
878 924 /*
879 925 * Sanity checks
880 926 *
881 927 * The initial RTO should be bounded by the minimum
882 928 * and maximum RTO. And it should also be smaller
883 929 * than the connect attempt abort timeout. Otherwise,
884 930 * the connection won't be aborted in a period
885 931 * reasonably close to that timeout.
886 932 */
887 933 if (val < tcp->tcp_rto_min || val > tcp->tcp_rto_max ||
888 934 val > tcp->tcp_second_ctimer_threshold ||
889 935 val < tcps->tcps_rexmit_interval_initial_low ||
890 936 val > tcps->tcps_rexmit_interval_initial_high) {
891 937 *outlenp = 0;
892 938 return (EINVAL);
893 939 }
894 940 tcp->tcp_rto_initial = val;
895 941
896 942 /*
897 943 * If TCP has not sent anything, need to re-calculate
898 944 * tcp_rto. Otherwise, this option change does not
899 945 * really affect anything.
900 946 */
901 947 if (tcp->tcp_state >= TCPS_SYN_SENT)
902 948 break;
903 949
904 950 tcp->tcp_rtt_sa = MSEC2NSEC(tcp->tcp_rto_initial) << 2;
905 951 tcp->tcp_rtt_sd = MSEC2NSEC(tcp->tcp_rto_initial) >> 1;
906 952 tcp->tcp_rto = tcp_calculate_rto(tcp, tcps,
907 953 tcps->tcps_conn_grace_period);
908 954 break;
909 955 case TCP_RTO_MIN:
910 956 if (checkonly || val == 0)
911 957 break;
912 958
913 959 if (val < tcps->tcps_rexmit_interval_min_low ||
914 960 val > tcps->tcps_rexmit_interval_min_high ||
915 961 val > tcp->tcp_rto_max) {
916 962 *outlenp = 0;
917 963 return (EINVAL);
918 964 }
919 965 tcp->tcp_rto_min = val;
920 966 if (tcp->tcp_rto < val)
921 967 tcp->tcp_rto = val;
922 968 break;
923 969 case TCP_RTO_MAX:
924 970 if (checkonly || val == 0)
925 971 break;
926 972
927 973 /*
928 974 * Sanity checks
929 975 *
930 976 * The maximum RTO should not be larger than the
931 977 * connection abort timeout. Otherwise, the
932 978 * connection won't be aborted in a period reasonably
933 979 * close to that timeout.
934 980 */
935 981 if (val < tcps->tcps_rexmit_interval_max_low ||
936 982 val > tcps->tcps_rexmit_interval_max_high ||
937 983 val < tcp->tcp_rto_min ||
938 984 val > tcp->tcp_second_timer_threshold) {
939 985 *outlenp = 0;
940 986 return (EINVAL);
941 987 }
942 988 tcp->tcp_rto_max = val;
943 989 if (tcp->tcp_rto > val)
944 990 tcp->tcp_rto = val;
945 991 break;
946 992 case TCP_LINGER2:
947 993 if (checkonly || *i1 == 0)
948 994 break;
949 995
950 996 /*
951 997 * Note that the option value's unit is second. And
952 998 * the value should be bigger than the private
953 999 * parameter tcp_fin_wait_2_flush_interval's lower
954 1000 * bound and smaller than the current value of that
955 1001 * parameter. It should be smaller than the current
956 1002 * value to avoid an app setting TCP_LINGER2 to a big
957 1003 * value, causing resource to be held up too long in
958 1004 * FIN-WAIT-2 state.
959 1005 */
960 1006 if (*i1 < 0 ||
961 1007 tcps->tcps_fin_wait_2_flush_interval_low/SECONDS >
962 1008 *i1 ||
963 1009 tcps->tcps_fin_wait_2_flush_interval/SECONDS <
964 1010 *i1) {
965 1011 *outlenp = 0;
966 1012 return (EINVAL);
967 1013 }
968 1014 tcp->tcp_fin_wait_2_flush_interval = *i1 * SECONDS;
969 1015 break;
970 1016 default:
971 1017 break;
972 1018 }
973 1019 break;
974 1020 case IPPROTO_IP:
975 1021 if (connp->conn_family != AF_INET) {
976 1022 *outlenp = 0;
977 1023 return (EINVAL);
978 1024 }
979 1025 switch (name) {
980 1026 case IP_SEC_OPT:
981 1027 /*
982 1028 * We should not allow policy setting after
983 1029 * we start listening for connections.
984 1030 */
985 1031 if (tcp->tcp_state == TCPS_LISTEN) {
986 1032 return (EINVAL);
987 1033 }
988 1034 break;
989 1035 }
990 1036 break;
991 1037 case IPPROTO_IPV6:
992 1038 /*
993 1039 * IPPROTO_IPV6 options are only supported for sockets
994 1040 * that are using IPv6 on the wire.
995 1041 */
996 1042 if (connp->conn_ipversion != IPV6_VERSION) {
997 1043 *outlenp = 0;
998 1044 return (EINVAL);
999 1045 }
1000 1046
1001 1047 switch (name) {
1002 1048 case IPV6_RECVPKTINFO:
1003 1049 if (!checkonly) {
1004 1050 /* Force it to be sent up with the next msg */
1005 1051 tcp->tcp_recvifindex = 0;
1006 1052 }
1007 1053 break;
1008 1054 case IPV6_RECVTCLASS:
1009 1055 if (!checkonly) {
1010 1056 /* Force it to be sent up with the next msg */
1011 1057 tcp->tcp_recvtclass = 0xffffffffU;
1012 1058 }
1013 1059 break;
1014 1060 case IPV6_RECVHOPLIMIT:
1015 1061 if (!checkonly) {
1016 1062 /* Force it to be sent up with the next msg */
1017 1063 tcp->tcp_recvhops = 0xffffffffU;
1018 1064 }
1019 1065 break;
1020 1066 case IPV6_PKTINFO:
1021 1067 /* This is an extra check for TCP */
1022 1068 if (inlen == sizeof (struct in6_pktinfo)) {
1023 1069 struct in6_pktinfo *pkti;
1024 1070
1025 1071 pkti = (struct in6_pktinfo *)invalp;
1026 1072 /*
1027 1073 * RFC 3542 states that ipi6_addr must be
1028 1074 * the unspecified address when setting the
1029 1075 * IPV6_PKTINFO sticky socket option on a
1030 1076 * TCP socket.
1031 1077 */
1032 1078 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
1033 1079 return (EINVAL);
1034 1080 }
1035 1081 break;
1036 1082 case IPV6_SEC_OPT:
1037 1083 /*
1038 1084 * We should not allow policy setting after
1039 1085 * we start listening for connections.
1040 1086 */
1041 1087 if (tcp->tcp_state == TCPS_LISTEN) {
1042 1088 return (EINVAL);
1043 1089 }
1044 1090 break;
1045 1091 }
1046 1092 break;
1047 1093 }
1048 1094 reterr = conn_opt_set(&coas, level, name, inlen, invalp,
1049 1095 checkonly, cr);
1050 1096 if (reterr != 0) {
1051 1097 *outlenp = 0;
1052 1098 return (reterr);
1053 1099 }
1054 1100
1055 1101 /*
1056 1102 * Common case of OK return with outval same as inval
1057 1103 */
1058 1104 if (invalp != outvalp) {
1059 1105 /* don't trust bcopy for identical src/dst */
1060 1106 (void) bcopy(invalp, outvalp, inlen);
1061 1107 }
1062 1108 *outlenp = inlen;
1063 1109
1064 1110 if (coas.coa_changed & COA_HEADER_CHANGED) {
1065 1111 /* If we are connected we rebuilt the headers */
1066 1112 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1067 1113 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1068 1114 reterr = tcp_build_hdrs(tcp);
1069 1115 if (reterr != 0)
1070 1116 return (reterr);
1071 1117 }
1072 1118 }
1073 1119 if (coas.coa_changed & COA_ROUTE_CHANGED) {
1074 1120 in6_addr_t nexthop;
1075 1121
1076 1122 /*
1077 1123 * If we are connected we re-cache the information.
1078 1124 * We ignore errors to preserve BSD behavior.
1079 1125 * Note that we don't redo IPsec policy lookup here
1080 1126 * since the final destination (or source) didn't change.
1081 1127 */
1082 1128 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
1083 1129 &connp->conn_faddr_v6, &nexthop);
1084 1130
1085 1131 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1086 1132 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1087 1133 (void) ip_attr_connect(connp, connp->conn_ixa,
1088 1134 &connp->conn_laddr_v6, &connp->conn_faddr_v6,
1089 1135 &nexthop, connp->conn_fport, NULL, NULL,
1090 1136 IPDF_VERIFY_DST);
1091 1137 }
1092 1138 }
1093 1139 if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1094 1140 connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1095 1141 }
1096 1142 if (coas.coa_changed & COA_WROFF_CHANGED) {
1097 1143 connp->conn_wroff = connp->conn_ht_iphc_allocated +
1098 1144 tcps->tcps_wroff_xtra;
1099 1145 (void) proto_set_tx_wroff(connp->conn_rq, connp,
1100 1146 connp->conn_wroff);
1101 1147 }
1102 1148 if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
1103 1149 if (IPCL_IS_NONSTR(connp))
1104 1150 proto_set_rx_oob_opt(connp, onoff);
1105 1151 }
1106 1152 return (0);
1107 1153 }
↓ open down ↓ |
241 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX