Print this page
13175 Add support for IP_RECVTOS
13182 CMSG_ macros should have man pages
Change-ID: I784aa36cfd3c17e3cccbf1fd329fa7e69b663ef9
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/tcp/tcp_opt_data.c
+++ new/usr/src/uts/common/inet/tcp/tcp_opt_data.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
24 24 * Copyright 2019 Joyent, Inc.
25 25 * Copyright (c) 2016 by Delphix. All rights reserved.
26 + * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
26 27 */
27 28
28 29 #include <sys/types.h>
29 30 #include <sys/stream.h>
30 31 #define _SUN_TPI_VERSION 2
31 32 #include <sys/tihdr.h>
32 33 #include <sys/socket.h>
33 34 #include <sys/xti_xtiopt.h>
34 35 #include <sys/xti_inet.h>
35 36 #include <sys/policy.h>
36 37
37 38 #include <inet/cc.h>
38 39 #include <inet/common.h>
39 40 #include <netinet/ip6.h>
40 41 #include <inet/ip.h>
41 42
42 43 #include <netinet/in.h>
43 44 #include <netinet/tcp.h>
44 45 #include <inet/optcom.h>
45 46 #include <inet/proto_set.h>
46 47 #include <inet/tcp_impl.h>
47 48
48 49 static int tcp_opt_default(queue_t *, int, int, uchar_t *);
49 50
50 51 /*
51 52 * Table of all known options handled on a TCP protocol stack.
52 53 *
53 54 * Note: This table contains options processed by both TCP and IP levels
54 55 * and is the superset of options that can be performed on a TCP over IP
55 56 * stack.
56 57 */
57 58 opdes_t tcp_opt_arr[] = {
58 59
59 60 { SO_LINGER, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
60 61 sizeof (struct linger), 0 },
61 62
62 63 { SO_DEBUG, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
63 64 { SO_KEEPALIVE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
64 65 { SO_DONTROUTE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
65 66 { SO_USELOOPBACK, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
66 67 },
67 68 { SO_BROADCAST, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
68 69 { SO_REUSEADDR, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
69 70 { SO_OOBINLINE, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
70 71 { SO_TYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
71 72 { SO_SNDBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
72 73 { SO_RCVBUF, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
73 74 { SO_SNDTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
74 75 sizeof (struct timeval), 0 },
75 76 { SO_RCVTIMEO, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0,
76 77 sizeof (struct timeval), 0 },
77 78 { SO_DGRAM_ERRIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
78 79 },
79 80 { SO_SND_COPYAVOID, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
80 81 { SO_ANON_MLP, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
81 82 0 },
82 83 { SO_MAC_EXEMPT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
83 84 0 },
84 85 { SO_MAC_IMPLICIT, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
85 86 0 },
86 87 { SO_ALLZONES, SOL_SOCKET, OA_R, OA_RW, OP_CONFIG, 0, sizeof (int),
87 88 0 },
88 89 { SO_EXCLBIND, SOL_SOCKET, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
89 90
90 91 { SO_DOMAIN, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
91 92
92 93 { SO_PROTOTYPE, SOL_SOCKET, OA_R, OA_R, OP_NP, 0, sizeof (int), 0 },
93 94
94 95 { TCP_NODELAY, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
95 96 },
96 97 { TCP_MAXSEG, IPPROTO_TCP, OA_R, OA_R, OP_NP, 0, sizeof (uint_t),
97 98 536 },
98 99
99 100 { TCP_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
100 101 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
101 102
102 103 { TCP_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
103 104 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
104 105
105 106 { TCP_CONN_NOTIFY_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
106 107 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
107 108
108 109 { TCP_CONN_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
109 110 OP_DEF_FN, sizeof (int), -1 /* not initialized */ },
110 111
111 112 { TCP_RECVDSTADDR, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int),
112 113 0 },
113 114
114 115 { TCP_ANONPRIVBIND, IPPROTO_TCP, OA_R, OA_RW, OP_PRIVPORT, 0,
115 116 sizeof (int), 0 },
116 117
117 118 { TCP_EXCLBIND, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0
118 119 },
119 120
120 121 { TCP_INIT_CWND, IPPROTO_TCP, OA_RW, OA_RW, OP_CONFIG, 0,
121 122 sizeof (int), 0 },
122 123
123 124 { TCP_KEEPALIVE_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
124 125 sizeof (int), 0 },
125 126
126 127 { TCP_KEEPIDLE, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
127 128
128 129 { TCP_KEEPCNT, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
129 130
130 131 { TCP_KEEPINTVL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
131 132
132 133 { TCP_KEEPALIVE_ABORT_THRESHOLD, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0,
133 134 sizeof (int), 0 },
134 135
135 136 { TCP_CORK, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
136 137
137 138 { TCP_RTO_INITIAL, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
138 139
139 140 { TCP_RTO_MIN, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
140 141
141 142 { TCP_RTO_MAX, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (uint32_t), 0 },
142 143
143 144 { TCP_LINGER2, IPPROTO_TCP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
144 145
145 146 { TCP_CONGESTION, IPPROTO_TCP, OA_RW, OA_RW, OP_NP,
146 147 OP_VARLEN, CC_ALGO_NAME_MAX, 0 },
147 148
148 149 { IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
↓ open down ↓ |
113 lines elided |
↑ open up ↑ |
149 150 (OP_VARLEN|OP_NODEFAULT),
150 151 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
151 152 { T_IP_OPTIONS, IPPROTO_IP, OA_RW, OA_RW, OP_NP,
152 153 (OP_VARLEN|OP_NODEFAULT),
153 154 IP_MAX_OPT_LENGTH + IP_ADDR_LEN, -1 /* not initialized */ },
154 155
155 156 { IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
156 157 { T_IP_TOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
157 158 { IP_TTL, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
158 159 sizeof (int), -1 /* not initialized */ },
160 +{ IP_RECVTOS, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
159 161
160 162 { IP_SEC_OPT, IPPROTO_IP, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
161 163 sizeof (ipsec_req_t), -1 /* not initialized */ },
162 164
163 165 { IP_BOUND_IF, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0,
164 166 sizeof (int), 0 /* no ifindex */ },
165 167
166 168 { IP_UNSPEC_SRC, IPPROTO_IP, OA_R, OA_RW, OP_RAW, 0,
167 169 sizeof (int), 0 },
168 170
169 171 { IPV6_UNICAST_HOPS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_DEF_FN,
170 172 sizeof (int), -1 /* not initialized */ },
171 173
172 174 { IPV6_BOUND_IF, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
173 175 sizeof (int), 0 /* no ifindex */ },
174 176
175 177 { IP_DONTFRAG, IPPROTO_IP, OA_RW, OA_RW, OP_NP, 0, sizeof (int), 0 },
176 178
177 179 { IP_NEXTHOP, IPPROTO_IP, OA_R, OA_RW, OP_CONFIG, 0,
178 180 sizeof (in_addr_t), -1 /* not initialized */ },
179 181
180 182 { IPV6_UNSPEC_SRC, IPPROTO_IPV6, OA_R, OA_RW, OP_RAW, 0,
181 183 sizeof (int), 0 },
182 184
183 185 { IPV6_PKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
184 186 (OP_NODEFAULT|OP_VARLEN),
185 187 sizeof (struct in6_pktinfo), -1 /* not initialized */ },
186 188 { IPV6_NEXTHOP, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
187 189 OP_NODEFAULT,
188 190 sizeof (sin6_t), -1 /* not initialized */ },
189 191 { IPV6_HOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
190 192 (OP_VARLEN|OP_NODEFAULT), 255*8,
191 193 -1 /* not initialized */ },
192 194 { IPV6_DSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
193 195 (OP_VARLEN|OP_NODEFAULT), 255*8,
194 196 -1 /* not initialized */ },
195 197 { IPV6_RTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
196 198 (OP_VARLEN|OP_NODEFAULT), 255*8,
197 199 -1 /* not initialized */ },
198 200 { IPV6_RTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
199 201 (OP_VARLEN|OP_NODEFAULT), 255*8,
200 202 -1 /* not initialized */ },
201 203 { IPV6_TCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
202 204 OP_NODEFAULT,
203 205 sizeof (int), -1 /* not initialized */ },
204 206 { IPV6_PATHMTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP,
205 207 OP_NODEFAULT,
206 208 sizeof (struct ip6_mtuinfo), -1 /* not initialized */ },
207 209 { IPV6_DONTFRAG, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
208 210 sizeof (int), 0 },
209 211 { IPV6_USE_MIN_MTU, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
210 212 sizeof (int), 0 },
211 213 { IPV6_V6ONLY, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
212 214 sizeof (int), 0 },
213 215
214 216 /* Enable receipt of ancillary data */
215 217 { IPV6_RECVPKTINFO, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
216 218 sizeof (int), 0 },
217 219 { IPV6_RECVHOPLIMIT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
218 220 sizeof (int), 0 },
219 221 { IPV6_RECVHOPOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
220 222 sizeof (int), 0 },
221 223 { _OLD_IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
222 224 sizeof (int), 0 },
223 225 { IPV6_RECVDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
224 226 sizeof (int), 0 },
225 227 { IPV6_RECVRTHDR, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
226 228 sizeof (int), 0 },
227 229 { IPV6_RECVRTHDRDSTOPTS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
228 230 sizeof (int), 0 },
229 231 { IPV6_RECVTCLASS, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
230 232 sizeof (int), 0 },
231 233
232 234 { IPV6_SEC_OPT, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, OP_NODEFAULT,
233 235 sizeof (ipsec_req_t), -1 /* not initialized */ },
234 236 { IPV6_SRC_PREFERENCES, IPPROTO_IPV6, OA_RW, OA_RW, OP_NP, 0,
235 237 sizeof (uint32_t), IPV6_PREFER_SRC_DEFAULT },
236 238 };
237 239
238 240 /*
239 241 * Table of all supported levels
240 242 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
241 243 * any supported options so we need this info separately.
242 244 *
243 245 * This is needed only for topmost tpi providers and is used only by
244 246 * XTI interfaces.
245 247 */
246 248 optlevel_t tcp_valid_levels_arr[] = {
247 249 XTI_GENERIC,
248 250 SOL_SOCKET,
249 251 IPPROTO_TCP,
250 252 IPPROTO_IP,
251 253 IPPROTO_IPV6
252 254 };
253 255
254 256
255 257 #define TCP_OPT_ARR_CNT A_CNT(tcp_opt_arr)
256 258 #define TCP_VALID_LEVELS_CNT A_CNT(tcp_valid_levels_arr)
257 259
258 260 uint_t tcp_max_optsize; /* initialized when TCP driver is loaded */
259 261
260 262 /*
261 263 * Initialize option database object for TCP
262 264 *
263 265 * This object represents database of options to search passed to
264 266 * {sock,tpi}optcom_req() interface routine to take care of option
265 267 * management and associated methods.
266 268 */
267 269
268 270 optdb_obj_t tcp_opt_obj = {
269 271 tcp_opt_default, /* TCP default value function pointer */
270 272 tcp_tpi_opt_get, /* TCP get function pointer */
271 273 tcp_tpi_opt_set, /* TCP set function pointer */
272 274 TCP_OPT_ARR_CNT, /* TCP option database count of entries */
273 275 tcp_opt_arr, /* TCP option database */
274 276 TCP_VALID_LEVELS_CNT, /* TCP valid level count of entries */
275 277 tcp_valid_levels_arr /* TCP valid level array */
276 278 };
277 279
278 280 static int tcp_max_init_cwnd = TCP_MAX_INIT_CWND;
279 281
280 282 /*
281 283 * Some TCP options can be "set" by requesting them in the option
282 284 * buffer. This is needed for XTI feature test though we do not
283 285 * allow it in general. We interpret that this mechanism is more
284 286 * applicable to OSI protocols and need not be allowed in general.
285 287 * This routine filters out options for which it is not allowed (most)
286 288 * and lets through those (few) for which it is. [ The XTI interface
287 289 * test suite specifics will imply that any XTI_GENERIC level XTI_* if
288 290 * ever implemented will have to be allowed here ].
289 291 */
290 292 static boolean_t
291 293 tcp_allow_connopt_set(int level, int name)
292 294 {
293 295
294 296 switch (level) {
295 297 case IPPROTO_TCP:
296 298 switch (name) {
297 299 case TCP_NODELAY:
298 300 return (B_TRUE);
299 301 default:
300 302 return (B_FALSE);
301 303 }
302 304 /*NOTREACHED*/
303 305 default:
304 306 return (B_FALSE);
305 307 }
306 308 /*NOTREACHED*/
307 309 }
308 310
309 311 /*
310 312 * This routine gets default values of certain options whose default
311 313 * values are maintained by protocol specific code
312 314 */
313 315 /* ARGSUSED */
314 316 static int
315 317 tcp_opt_default(queue_t *q, int level, int name, uchar_t *ptr)
316 318 {
317 319 int32_t *i1 = (int32_t *)ptr;
318 320 tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps;
319 321
320 322 switch (level) {
321 323 case IPPROTO_TCP:
322 324 switch (name) {
323 325 case TCP_NOTIFY_THRESHOLD:
324 326 *i1 = tcps->tcps_ip_notify_interval;
325 327 break;
326 328 case TCP_ABORT_THRESHOLD:
327 329 *i1 = tcps->tcps_ip_abort_interval;
328 330 break;
329 331 case TCP_CONN_NOTIFY_THRESHOLD:
330 332 *i1 = tcps->tcps_ip_notify_cinterval;
331 333 break;
332 334 case TCP_CONN_ABORT_THRESHOLD:
333 335 *i1 = tcps->tcps_ip_abort_cinterval;
334 336 break;
335 337 default:
336 338 return (-1);
337 339 }
338 340 break;
339 341 case IPPROTO_IP:
340 342 switch (name) {
341 343 case IP_TTL:
342 344 *i1 = tcps->tcps_ipv4_ttl;
343 345 break;
344 346 default:
345 347 return (-1);
346 348 }
347 349 break;
348 350 case IPPROTO_IPV6:
349 351 switch (name) {
350 352 case IPV6_UNICAST_HOPS:
351 353 *i1 = tcps->tcps_ipv6_hoplimit;
352 354 break;
353 355 default:
354 356 return (-1);
355 357 }
356 358 break;
357 359 default:
358 360 return (-1);
359 361 }
360 362 return (sizeof (int));
361 363 }
362 364
363 365 /*
364 366 * TCP routine to get the values of options.
365 367 */
366 368 int
367 369 tcp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
368 370 {
369 371 int *i1 = (int *)ptr;
370 372 tcp_t *tcp = connp->conn_tcp;
371 373 conn_opt_arg_t coas;
372 374 int retval;
373 375
374 376 coas.coa_connp = connp;
375 377 coas.coa_ixa = connp->conn_ixa;
376 378 coas.coa_ipp = &connp->conn_xmit_ipp;
377 379 coas.coa_ancillary = B_FALSE;
378 380 coas.coa_changed = 0;
379 381
380 382 switch (level) {
381 383 case SOL_SOCKET:
382 384 switch (name) {
383 385 case SO_SND_COPYAVOID:
384 386 *i1 = tcp->tcp_snd_zcopy_on ?
385 387 SO_SND_COPYAVOID : 0;
386 388 return (sizeof (int));
387 389 case SO_ACCEPTCONN:
388 390 *i1 = (tcp->tcp_state == TCPS_LISTEN);
389 391 return (sizeof (int));
390 392 }
391 393 break;
392 394 case IPPROTO_TCP:
393 395 switch (name) {
394 396 case TCP_NODELAY:
395 397 *i1 = (tcp->tcp_naglim == 1) ? TCP_NODELAY : 0;
396 398 return (sizeof (int));
397 399 case TCP_MAXSEG:
398 400 *i1 = tcp->tcp_mss;
399 401 return (sizeof (int));
400 402 case TCP_NOTIFY_THRESHOLD:
401 403 *i1 = (int)tcp->tcp_first_timer_threshold;
402 404 return (sizeof (int));
403 405 case TCP_ABORT_THRESHOLD:
404 406 *i1 = tcp->tcp_second_timer_threshold;
405 407 return (sizeof (int));
406 408 case TCP_CONN_NOTIFY_THRESHOLD:
407 409 *i1 = tcp->tcp_first_ctimer_threshold;
408 410 return (sizeof (int));
409 411 case TCP_CONN_ABORT_THRESHOLD:
410 412 *i1 = tcp->tcp_second_ctimer_threshold;
411 413 return (sizeof (int));
412 414 case TCP_INIT_CWND:
413 415 *i1 = tcp->tcp_init_cwnd;
414 416 return (sizeof (int));
415 417 case TCP_KEEPALIVE_THRESHOLD:
416 418 *i1 = tcp->tcp_ka_interval;
417 419 return (sizeof (int));
418 420
419 421 /*
420 422 * TCP_KEEPIDLE expects value in seconds, but
421 423 * tcp_ka_interval is in milliseconds.
422 424 */
423 425 case TCP_KEEPIDLE:
424 426 *i1 = tcp->tcp_ka_interval / 1000;
425 427 return (sizeof (int));
426 428 case TCP_KEEPCNT:
427 429 *i1 = tcp->tcp_ka_cnt;
428 430 return (sizeof (int));
429 431
430 432 /*
431 433 * TCP_KEEPINTVL expects value in seconds, but
432 434 * tcp_ka_rinterval is in milliseconds.
433 435 */
434 436 case TCP_KEEPINTVL:
435 437 *i1 = tcp->tcp_ka_rinterval / 1000;
436 438 return (sizeof (int));
437 439 case TCP_KEEPALIVE_ABORT_THRESHOLD:
438 440 *i1 = tcp->tcp_ka_abort_thres;
439 441 return (sizeof (int));
440 442 case TCP_CONGESTION: {
441 443 size_t len = strlcpy((char *)ptr, CC_ALGO(tcp)->name,
442 444 CC_ALGO_NAME_MAX);
443 445 if (len >= CC_ALGO_NAME_MAX)
444 446 return (-1);
445 447 return (len + 1);
446 448 }
447 449 case TCP_CORK:
448 450 *i1 = tcp->tcp_cork;
449 451 return (sizeof (int));
450 452 case TCP_RTO_INITIAL:
451 453 *i1 = tcp->tcp_rto_initial;
452 454 return (sizeof (uint32_t));
453 455 case TCP_RTO_MIN:
454 456 *i1 = tcp->tcp_rto_min;
455 457 return (sizeof (uint32_t));
456 458 case TCP_RTO_MAX:
457 459 *i1 = tcp->tcp_rto_max;
458 460 return (sizeof (uint32_t));
459 461 case TCP_LINGER2:
460 462 *i1 = tcp->tcp_fin_wait_2_flush_interval / SECONDS;
461 463 return (sizeof (int));
462 464 }
463 465 break;
464 466 case IPPROTO_IP:
465 467 if (connp->conn_family != AF_INET)
466 468 return (-1);
467 469 switch (name) {
468 470 case IP_OPTIONS:
469 471 case T_IP_OPTIONS:
470 472 /* Caller ensures enough space */
471 473 return (ip_opt_get_user(connp, ptr));
472 474 default:
473 475 break;
474 476 }
475 477 break;
476 478
477 479 case IPPROTO_IPV6:
478 480 /*
479 481 * IPPROTO_IPV6 options are only supported for sockets
480 482 * that are using IPv6 on the wire.
481 483 */
482 484 if (connp->conn_ipversion != IPV6_VERSION) {
483 485 return (-1);
484 486 }
485 487 switch (name) {
486 488 case IPV6_PATHMTU:
487 489 if (tcp->tcp_state < TCPS_ESTABLISHED)
488 490 return (-1);
489 491 break;
490 492 }
491 493 break;
492 494 }
493 495 mutex_enter(&connp->conn_lock);
494 496 retval = conn_opt_get(&coas, level, name, ptr);
495 497 mutex_exit(&connp->conn_lock);
496 498 return (retval);
497 499 }
498 500
499 501 /*
500 502 * We declare as 'int' rather than 'void' to satisfy pfi_t arg requirements.
501 503 * Parameters are assumed to be verified by the caller.
502 504 */
503 505 /* ARGSUSED */
504 506 int
505 507 tcp_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
506 508 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
507 509 void *thisdg_attrs, cred_t *cr)
508 510 {
509 511 tcp_t *tcp = connp->conn_tcp;
510 512 int *i1 = (int *)invalp;
511 513 boolean_t onoff = (*i1 == 0) ? 0 : 1;
512 514 boolean_t checkonly;
513 515 int reterr;
514 516 tcp_stack_t *tcps = tcp->tcp_tcps;
515 517 conn_opt_arg_t coas;
516 518 uint32_t val = *((uint32_t *)invalp);
517 519
518 520 coas.coa_connp = connp;
519 521 coas.coa_ixa = connp->conn_ixa;
↓ open down ↓ |
351 lines elided |
↑ open up ↑ |
520 522 coas.coa_ipp = &connp->conn_xmit_ipp;
521 523 coas.coa_ancillary = B_FALSE;
522 524 coas.coa_changed = 0;
523 525
524 526 switch (optset_context) {
525 527 case SETFN_OPTCOM_CHECKONLY:
526 528 checkonly = B_TRUE;
527 529 /*
528 530 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
529 531 * inlen != 0 implies value supplied and
530 - * we have to "pretend" to set it.
532 + * we have to "pretend" to set it.
531 533 * inlen == 0 implies that there is no
532 - * value part in T_CHECK request and just validation
534 + * value part in T_CHECK request and just validation
533 535 * done elsewhere should be enough, we just return here.
534 536 */
535 537 if (inlen == 0) {
536 538 *outlenp = 0;
537 539 return (0);
538 540 }
539 541 break;
540 542 case SETFN_OPTCOM_NEGOTIATE:
541 543 checkonly = B_FALSE;
542 544 break;
543 545 case SETFN_UD_NEGOTIATE: /* error on conn-oriented transports ? */
544 546 case SETFN_CONN_NEGOTIATE:
545 547 checkonly = B_FALSE;
546 548 /*
547 549 * Negotiating local and "association-related" options
548 550 * from other (T_CONN_REQ, T_CONN_RES,T_UNITDATA_REQ)
549 551 * primitives is allowed by XTI, but we choose
550 552 * to not implement this style negotiation for Internet
551 553 * protocols (We interpret it is a must for OSI world but
552 554 * optional for Internet protocols) for all options.
553 555 * [ Will do only for the few options that enable test
554 556 * suites that our XTI implementation of this feature
555 557 * works for transports that do allow it ]
556 558 */
557 559 if (!tcp_allow_connopt_set(level, name)) {
558 560 *outlenp = 0;
559 561 return (EINVAL);
560 562 }
561 563 break;
562 564 default:
563 565 /*
564 566 * We should never get here
565 567 */
566 568 *outlenp = 0;
567 569 return (EINVAL);
568 570 }
569 571
570 572 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
571 573 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
572 574
573 575 /*
574 576 * For TCP, we should have no ancillary data sent down
575 577 * (sendmsg isn't supported for SOCK_STREAM), so thisdg_attrs
576 578 * has to be zero.
577 579 */
578 580 ASSERT(thisdg_attrs == NULL);
579 581
580 582 /*
581 583 * For fixed length options, no sanity check
582 584 * of passed in length is done. It is assumed *_optcom_req()
583 585 * routines do the right thing.
584 586 */
585 587 switch (level) {
586 588 case SOL_SOCKET:
587 589 switch (name) {
588 590 case SO_KEEPALIVE:
589 591 if (checkonly) {
590 592 /* check only case */
591 593 break;
592 594 }
593 595
594 596 if (!onoff) {
595 597 if (connp->conn_keepalive) {
596 598 if (tcp->tcp_ka_tid != 0) {
597 599 (void) TCP_TIMER_CANCEL(tcp,
598 600 tcp->tcp_ka_tid);
599 601 tcp->tcp_ka_tid = 0;
600 602 }
601 603 connp->conn_keepalive = 0;
602 604 }
603 605 break;
604 606 }
605 607 if (!connp->conn_keepalive) {
606 608 /* Crank up the keepalive timer */
607 609 tcp->tcp_ka_last_intrvl = 0;
608 610 tcp->tcp_ka_tid = TCP_TIMER(tcp,
609 611 tcp_keepalive_timer, tcp->tcp_ka_interval);
610 612 connp->conn_keepalive = 1;
611 613 }
612 614 break;
613 615 case SO_SNDBUF: {
614 616 if (*i1 > tcps->tcps_max_buf) {
615 617 *outlenp = 0;
616 618 return (ENOBUFS);
617 619 }
618 620 if (checkonly)
619 621 break;
620 622
621 623 connp->conn_sndbuf = *i1;
622 624 if (tcps->tcps_snd_lowat_fraction != 0) {
623 625 connp->conn_sndlowat = connp->conn_sndbuf /
624 626 tcps->tcps_snd_lowat_fraction;
625 627 }
626 628 (void) tcp_maxpsz_set(tcp, B_TRUE);
627 629 /*
628 630 * If we are flow-controlled, recheck the condition.
629 631 * There are apps that increase SO_SNDBUF size when
630 632 * flow-controlled (EWOULDBLOCK), and expect the flow
631 633 * control condition to be lifted right away.
632 634 */
633 635 mutex_enter(&tcp->tcp_non_sq_lock);
634 636 if (tcp->tcp_flow_stopped &&
635 637 TCP_UNSENT_BYTES(tcp) < connp->conn_sndbuf) {
636 638 tcp_clrqfull(tcp);
637 639 }
638 640 mutex_exit(&tcp->tcp_non_sq_lock);
639 641 *outlenp = inlen;
640 642 return (0);
641 643 }
642 644 case SO_RCVBUF:
643 645 if (*i1 > tcps->tcps_max_buf) {
644 646 *outlenp = 0;
645 647 return (ENOBUFS);
646 648 }
647 649 /* Silently ignore zero */
648 650 if (!checkonly && *i1 != 0) {
649 651 *i1 = MSS_ROUNDUP(*i1, tcp->tcp_mss);
650 652 (void) tcp_rwnd_set(tcp, *i1);
651 653 }
652 654 /*
653 655 * XXX should we return the rwnd here
654 656 * and tcp_opt_get ?
655 657 */
656 658 *outlenp = inlen;
657 659 return (0);
658 660 case SO_SND_COPYAVOID:
659 661 if (!checkonly) {
660 662 if (tcp->tcp_loopback ||
661 663 (onoff != 1) || !tcp_zcopy_check(tcp)) {
662 664 *outlenp = 0;
663 665 return (EOPNOTSUPP);
664 666 }
665 667 tcp->tcp_snd_zcopy_aware = 1;
666 668 }
667 669 *outlenp = inlen;
668 670 return (0);
669 671 }
670 672 break;
671 673 case IPPROTO_TCP:
672 674 switch (name) {
673 675 case TCP_NODELAY:
674 676 if (!checkonly)
675 677 tcp->tcp_naglim = *i1 ? 1 : tcp->tcp_mss;
676 678 break;
677 679 case TCP_NOTIFY_THRESHOLD:
678 680 if (!checkonly)
679 681 tcp->tcp_first_timer_threshold = *i1;
680 682 break;
681 683 case TCP_ABORT_THRESHOLD:
682 684 if (!checkonly)
683 685 tcp->tcp_second_timer_threshold = *i1;
684 686 break;
685 687 case TCP_CONN_NOTIFY_THRESHOLD:
686 688 if (!checkonly)
687 689 tcp->tcp_first_ctimer_threshold = *i1;
688 690 break;
689 691 case TCP_CONN_ABORT_THRESHOLD:
690 692 if (!checkonly)
691 693 tcp->tcp_second_ctimer_threshold = *i1;
692 694 break;
693 695 case TCP_RECVDSTADDR:
694 696 if (tcp->tcp_state > TCPS_LISTEN) {
695 697 *outlenp = 0;
696 698 return (EOPNOTSUPP);
697 699 }
698 700 /* Setting done in conn_opt_set */
699 701 break;
700 702 case TCP_INIT_CWND:
701 703 if (checkonly)
702 704 break;
703 705
704 706 /*
705 707 * Only allow socket with network configuration
706 708 * privilege to set the initial cwnd to be larger
707 709 * than allowed by RFC 3390.
708 710 */
709 711 if (val > MIN(4, MAX(2, 4380 / tcp->tcp_mss))) {
710 712 if ((reterr = secpolicy_ip_config(cr, B_TRUE))
711 713 != 0) {
712 714 *outlenp = 0;
713 715 return (reterr);
714 716 }
715 717 if (val > tcp_max_init_cwnd) {
716 718 *outlenp = 0;
717 719 return (EINVAL);
718 720 }
719 721 }
720 722
721 723 tcp->tcp_init_cwnd = val;
722 724
723 725 /*
724 726 * If the socket is connected, AND no outbound data
725 727 * has been sent, reset the actual cwnd values.
726 728 */
727 729 if (tcp->tcp_state == TCPS_ESTABLISHED &&
728 730 tcp->tcp_iss == tcp->tcp_snxt - 1) {
729 731 tcp->tcp_cwnd =
730 732 MIN(tcp->tcp_rwnd, val * tcp->tcp_mss);
731 733 }
732 734 break;
733 735
734 736 /*
735 737 * TCP_KEEPIDLE is in seconds but TCP_KEEPALIVE_THRESHOLD
736 738 * is in milliseconds. TCP_KEEPIDLE is introduced for
737 739 * compatibility with other Unix flavors.
738 740 * We can fall through TCP_KEEPALIVE_THRESHOLD logic after
739 741 * converting the input to milliseconds.
740 742 */
741 743 case TCP_KEEPIDLE:
742 744 *i1 *= 1000;
743 745 /* FALLTHRU */
744 746
745 747 case TCP_KEEPALIVE_THRESHOLD:
746 748 if (checkonly)
747 749 break;
748 750
749 751 if (*i1 < tcps->tcps_keepalive_interval_low ||
750 752 *i1 > tcps->tcps_keepalive_interval_high) {
751 753 *outlenp = 0;
752 754 return (EINVAL);
753 755 }
754 756 if (*i1 != tcp->tcp_ka_interval) {
755 757 tcp->tcp_ka_interval = *i1;
756 758 /*
757 759 * Check if we need to restart the
758 760 * keepalive timer.
759 761 */
760 762 if (tcp->tcp_ka_tid != 0) {
761 763 ASSERT(connp->conn_keepalive);
762 764 (void) TCP_TIMER_CANCEL(tcp,
763 765 tcp->tcp_ka_tid);
764 766 tcp->tcp_ka_last_intrvl = 0;
765 767 tcp->tcp_ka_tid = TCP_TIMER(tcp,
766 768 tcp_keepalive_timer,
767 769 tcp->tcp_ka_interval);
768 770 }
769 771 }
770 772 break;
771 773
772 774 /*
773 775 * tcp_ka_abort_thres = tcp_ka_rinterval * tcp_ka_cnt.
774 776 * So setting TCP_KEEPCNT or TCP_KEEPINTVL can affect all the
775 777 * three members - tcp_ka_abort_thres, tcp_ka_rinterval and
776 778 * tcp_ka_cnt.
777 779 */
778 780 case TCP_KEEPCNT:
779 781 if (checkonly)
780 782 break;
781 783
782 784 if (*i1 == 0) {
783 785 return (EINVAL);
784 786 } else if (tcp->tcp_ka_rinterval == 0) {
785 787 /*
786 788 * When TCP_KEEPCNT is specified without first
787 789 * specifying a TCP_KEEPINTVL, we infer an
788 790 * interval based on a tunable specific to our
789 791 * stack: the tcp_keepalive_abort_interval.
790 792 * (Or the TCP_KEEPALIVE_ABORT_THRESHOLD, in
791 793 * the unlikely event that that has been set.)
792 794 * Given the abort interval's default value of
793 795 * 480 seconds, low TCP_KEEPCNT values can
794 796 * result in intervals that exceed the default
795 797 * maximum RTO of 60 seconds. Rather than
796 798 * fail in these cases, we (implicitly) clamp
797 799 * the interval at the maximum RTO; if the
798 800 * TCP_KEEPCNT is shortly followed by a
799 801 * TCP_KEEPINTVL (as we expect), the abort
800 802 * threshold will be recalculated correctly --
801 803 * and if a TCP_KEEPINTVL is not forthcoming,
802 804 * keep-alive will at least operate reasonably
803 805 * given the underconfigured state.
804 806 */
805 807 uint32_t interval;
806 808
807 809 interval = tcp->tcp_ka_abort_thres / *i1;
808 810
809 811 if (interval < tcp->tcp_rto_min)
810 812 interval = tcp->tcp_rto_min;
811 813
812 814 if (interval > tcp->tcp_rto_max)
813 815 interval = tcp->tcp_rto_max;
814 816
815 817 tcp->tcp_ka_rinterval = interval;
816 818 } else {
817 819 if ((*i1 * tcp->tcp_ka_rinterval) <
818 820 tcps->tcps_keepalive_abort_interval_low ||
819 821 (*i1 * tcp->tcp_ka_rinterval) >
820 822 tcps->tcps_keepalive_abort_interval_high)
821 823 return (EINVAL);
822 824 tcp->tcp_ka_abort_thres =
823 825 (*i1 * tcp->tcp_ka_rinterval);
824 826 }
825 827 tcp->tcp_ka_cnt = *i1;
826 828 break;
827 829 case TCP_KEEPINTVL:
828 830 /*
829 831 * TCP_KEEPINTVL is specified in seconds, but
830 832 * tcp_ka_rinterval is in milliseconds.
831 833 */
832 834
833 835 if (checkonly)
834 836 break;
835 837
836 838 if ((*i1 * 1000) < tcp->tcp_rto_min ||
837 839 (*i1 * 1000) > tcp->tcp_rto_max)
838 840 return (EINVAL);
839 841
840 842 if (tcp->tcp_ka_cnt == 0) {
841 843 tcp->tcp_ka_cnt =
842 844 tcp->tcp_ka_abort_thres / (*i1 * 1000);
843 845 } else {
844 846 if ((*i1 * tcp->tcp_ka_cnt * 1000) <
845 847 tcps->tcps_keepalive_abort_interval_low ||
846 848 (*i1 * tcp->tcp_ka_cnt * 1000) >
847 849 tcps->tcps_keepalive_abort_interval_high)
848 850 return (EINVAL);
849 851 tcp->tcp_ka_abort_thres =
850 852 (*i1 * tcp->tcp_ka_cnt * 1000);
851 853 }
852 854 tcp->tcp_ka_rinterval = *i1 * 1000;
853 855 break;
854 856 case TCP_KEEPALIVE_ABORT_THRESHOLD:
855 857 if (!checkonly) {
856 858 if (*i1 <
857 859 tcps->tcps_keepalive_abort_interval_low ||
858 860 *i1 >
859 861 tcps->tcps_keepalive_abort_interval_high) {
860 862 *outlenp = 0;
861 863 return (EINVAL);
862 864 }
863 865 tcp->tcp_ka_abort_thres = *i1;
864 866 tcp->tcp_ka_cnt = 0;
865 867 tcp->tcp_ka_rinterval = 0;
866 868 }
867 869 break;
868 870 case TCP_CONGESTION: {
869 871 struct cc_algo *algo;
870 872
871 873 if (checkonly) {
872 874 break;
873 875 }
874 876
875 877 /*
876 878 * Make sure the string is NUL-terminated. Some
877 879 * consumers pass only the number of characters
878 880 * in the string, and don't include the NUL
879 881 * terminator, so we set it for them.
880 882 */
881 883 if (inlen < CC_ALGO_NAME_MAX) {
882 884 invalp[inlen] = '\0';
883 885 }
884 886 invalp[CC_ALGO_NAME_MAX - 1] = '\0';
885 887
886 888 if ((algo = cc_load_algo((char *)invalp)) == NULL) {
887 889 return (ENOENT);
888 890 }
889 891
890 892 if (CC_ALGO(tcp)->cb_destroy != NULL) {
891 893 CC_ALGO(tcp)->cb_destroy(&tcp->tcp_ccv);
892 894 }
893 895
894 896 CC_DATA(tcp) = NULL;
895 897 CC_ALGO(tcp) = algo;
896 898
897 899 if (CC_ALGO(tcp)->cb_init != NULL) {
898 900 VERIFY0(CC_ALGO(tcp)->cb_init(&tcp->tcp_ccv));
899 901 }
900 902
901 903 break;
902 904 }
903 905 case TCP_CORK:
904 906 if (!checkonly) {
905 907 /*
906 908 * if tcp->tcp_cork was set and is now
907 909 * being unset, we have to make sure that
908 910 * the remaining data gets sent out. Also
909 911 * unset tcp->tcp_cork so that tcp_wput_data()
910 912 * can send data even if it is less than mss
911 913 */
912 914 if (tcp->tcp_cork && onoff == 0 &&
913 915 tcp->tcp_unsent > 0) {
914 916 tcp->tcp_cork = B_FALSE;
915 917 tcp_wput_data(tcp, NULL, B_FALSE);
916 918 }
917 919 tcp->tcp_cork = onoff;
918 920 }
919 921 break;
920 922 case TCP_RTO_INITIAL:
921 923 if (checkonly || val == 0)
922 924 break;
923 925
924 926 /*
925 927 * Sanity checks
926 928 *
927 929 * The initial RTO should be bounded by the minimum
928 930 * and maximum RTO. And it should also be smaller
929 931 * than the connect attempt abort timeout. Otherwise,
930 932 * the connection won't be aborted in a period
931 933 * reasonably close to that timeout.
932 934 */
933 935 if (val < tcp->tcp_rto_min || val > tcp->tcp_rto_max ||
934 936 val > tcp->tcp_second_ctimer_threshold ||
935 937 val < tcps->tcps_rexmit_interval_initial_low ||
936 938 val > tcps->tcps_rexmit_interval_initial_high) {
937 939 *outlenp = 0;
938 940 return (EINVAL);
939 941 }
940 942 tcp->tcp_rto_initial = val;
941 943
942 944 /*
943 945 * If TCP has not sent anything, need to re-calculate
944 946 * tcp_rto. Otherwise, this option change does not
945 947 * really affect anything.
946 948 */
947 949 if (tcp->tcp_state >= TCPS_SYN_SENT)
948 950 break;
949 951
950 952 tcp->tcp_rtt_sa = MSEC2NSEC(tcp->tcp_rto_initial) << 2;
951 953 tcp->tcp_rtt_sd = MSEC2NSEC(tcp->tcp_rto_initial) >> 1;
952 954 tcp->tcp_rto = tcp_calculate_rto(tcp, tcps,
953 955 tcps->tcps_conn_grace_period);
954 956 break;
955 957 case TCP_RTO_MIN:
956 958 if (checkonly || val == 0)
957 959 break;
958 960
959 961 if (val < tcps->tcps_rexmit_interval_min_low ||
960 962 val > tcps->tcps_rexmit_interval_min_high ||
961 963 val > tcp->tcp_rto_max) {
962 964 *outlenp = 0;
963 965 return (EINVAL);
964 966 }
965 967 tcp->tcp_rto_min = val;
966 968 if (tcp->tcp_rto < val)
967 969 tcp->tcp_rto = val;
968 970 break;
969 971 case TCP_RTO_MAX:
970 972 if (checkonly || val == 0)
971 973 break;
972 974
973 975 /*
974 976 * Sanity checks
975 977 *
976 978 * The maximum RTO should not be larger than the
977 979 * connection abort timeout. Otherwise, the
978 980 * connection won't be aborted in a period reasonably
979 981 * close to that timeout.
980 982 */
981 983 if (val < tcps->tcps_rexmit_interval_max_low ||
982 984 val > tcps->tcps_rexmit_interval_max_high ||
983 985 val < tcp->tcp_rto_min ||
984 986 val > tcp->tcp_second_timer_threshold) {
985 987 *outlenp = 0;
986 988 return (EINVAL);
987 989 }
988 990 tcp->tcp_rto_max = val;
989 991 if (tcp->tcp_rto > val)
990 992 tcp->tcp_rto = val;
991 993 break;
992 994 case TCP_LINGER2:
993 995 if (checkonly || *i1 == 0)
994 996 break;
995 997
996 998 /*
997 999 * Note that the option value's unit is second. And
998 1000 * the value should be bigger than the private
999 1001 * parameter tcp_fin_wait_2_flush_interval's lower
1000 1002 * bound and smaller than the current value of that
1001 1003 * parameter. It should be smaller than the current
1002 1004 * value to avoid an app setting TCP_LINGER2 to a big
1003 1005 * value, causing resource to be held up too long in
1004 1006 * FIN-WAIT-2 state.
1005 1007 */
1006 1008 if (*i1 < 0 ||
1007 1009 tcps->tcps_fin_wait_2_flush_interval_low/SECONDS >
1008 1010 *i1 ||
1009 1011 tcps->tcps_fin_wait_2_flush_interval/SECONDS <
1010 1012 *i1) {
1011 1013 *outlenp = 0;
1012 1014 return (EINVAL);
1013 1015 }
1014 1016 tcp->tcp_fin_wait_2_flush_interval = *i1 * SECONDS;
1015 1017 break;
1016 1018 default:
1017 1019 break;
1018 1020 }
1019 1021 break;
1020 1022 case IPPROTO_IP:
1021 1023 if (connp->conn_family != AF_INET) {
1022 1024 *outlenp = 0;
1023 1025 return (EINVAL);
1024 1026 }
↓ open down ↓ |
482 lines elided |
↑ open up ↑ |
1025 1027 switch (name) {
1026 1028 case IP_SEC_OPT:
1027 1029 /*
1028 1030 * We should not allow policy setting after
1029 1031 * we start listening for connections.
1030 1032 */
1031 1033 if (tcp->tcp_state == TCPS_LISTEN) {
1032 1034 return (EINVAL);
1033 1035 }
1034 1036 break;
1037 + case IP_RECVTOS:
1038 + if (!checkonly) {
1039 + /*
1040 + * Force it to be sent up with the next msg
1041 + * by setting it to a value which cannot
1042 + * appear in a packet (TOS is only 8-bits)
1043 + */
1044 + tcp->tcp_recvtos = 0xffffffffU;
1045 + }
1046 + break;
1035 1047 }
1036 1048 break;
1037 1049 case IPPROTO_IPV6:
1038 1050 /*
1039 1051 * IPPROTO_IPV6 options are only supported for sockets
1040 1052 * that are using IPv6 on the wire.
1041 1053 */
1042 1054 if (connp->conn_ipversion != IPV6_VERSION) {
1043 1055 *outlenp = 0;
1044 1056 return (EINVAL);
1045 1057 }
1046 1058
1047 1059 switch (name) {
1048 1060 case IPV6_RECVPKTINFO:
1049 1061 if (!checkonly) {
1050 1062 /* Force it to be sent up with the next msg */
1051 1063 tcp->tcp_recvifindex = 0;
1052 1064 }
1053 1065 break;
1054 1066 case IPV6_RECVTCLASS:
1055 1067 if (!checkonly) {
1056 1068 /* Force it to be sent up with the next msg */
1057 1069 tcp->tcp_recvtclass = 0xffffffffU;
1058 1070 }
1059 1071 break;
1060 1072 case IPV6_RECVHOPLIMIT:
1061 1073 if (!checkonly) {
1062 1074 /* Force it to be sent up with the next msg */
1063 1075 tcp->tcp_recvhops = 0xffffffffU;
1064 1076 }
1065 1077 break;
1066 1078 case IPV6_PKTINFO:
1067 1079 /* This is an extra check for TCP */
1068 1080 if (inlen == sizeof (struct in6_pktinfo)) {
1069 1081 struct in6_pktinfo *pkti;
1070 1082
1071 1083 pkti = (struct in6_pktinfo *)invalp;
1072 1084 /*
1073 1085 * RFC 3542 states that ipi6_addr must be
1074 1086 * the unspecified address when setting the
1075 1087 * IPV6_PKTINFO sticky socket option on a
1076 1088 * TCP socket.
1077 1089 */
1078 1090 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr))
1079 1091 return (EINVAL);
1080 1092 }
1081 1093 break;
1082 1094 case IPV6_SEC_OPT:
1083 1095 /*
1084 1096 * We should not allow policy setting after
1085 1097 * we start listening for connections.
1086 1098 */
1087 1099 if (tcp->tcp_state == TCPS_LISTEN) {
1088 1100 return (EINVAL);
1089 1101 }
1090 1102 break;
1091 1103 }
1092 1104 break;
1093 1105 }
1094 1106 reterr = conn_opt_set(&coas, level, name, inlen, invalp,
1095 1107 checkonly, cr);
1096 1108 if (reterr != 0) {
1097 1109 *outlenp = 0;
1098 1110 return (reterr);
1099 1111 }
1100 1112
1101 1113 /*
1102 1114 * Common case of OK return with outval same as inval
1103 1115 */
1104 1116 if (invalp != outvalp) {
1105 1117 /* don't trust bcopy for identical src/dst */
1106 1118 (void) bcopy(invalp, outvalp, inlen);
1107 1119 }
1108 1120 *outlenp = inlen;
1109 1121
1110 1122 if (coas.coa_changed & COA_HEADER_CHANGED) {
1111 1123 /* If we are connected we rebuilt the headers */
1112 1124 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1113 1125 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1114 1126 reterr = tcp_build_hdrs(tcp);
1115 1127 if (reterr != 0)
1116 1128 return (reterr);
1117 1129 }
1118 1130 }
1119 1131 if (coas.coa_changed & COA_ROUTE_CHANGED) {
1120 1132 in6_addr_t nexthop;
1121 1133
1122 1134 /*
1123 1135 * If we are connected we re-cache the information.
1124 1136 * We ignore errors to preserve BSD behavior.
1125 1137 * Note that we don't redo IPsec policy lookup here
1126 1138 * since the final destination (or source) didn't change.
1127 1139 */
1128 1140 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
1129 1141 &connp->conn_faddr_v6, &nexthop);
1130 1142
1131 1143 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1132 1144 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1133 1145 (void) ip_attr_connect(connp, connp->conn_ixa,
1134 1146 &connp->conn_laddr_v6, &connp->conn_faddr_v6,
1135 1147 &nexthop, connp->conn_fport, NULL, NULL,
1136 1148 IPDF_VERIFY_DST);
1137 1149 }
1138 1150 }
1139 1151 if ((coas.coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
1140 1152 connp->conn_wq->q_hiwat = connp->conn_sndbuf;
1141 1153 }
1142 1154 if (coas.coa_changed & COA_WROFF_CHANGED) {
1143 1155 connp->conn_wroff = connp->conn_ht_iphc_allocated +
1144 1156 tcps->tcps_wroff_xtra;
1145 1157 (void) proto_set_tx_wroff(connp->conn_rq, connp,
1146 1158 connp->conn_wroff);
1147 1159 }
1148 1160 if (coas.coa_changed & COA_OOBINLINE_CHANGED) {
1149 1161 if (IPCL_IS_NONSTR(connp))
1150 1162 proto_set_rx_oob_opt(connp, onoff);
1151 1163 }
1152 1164 return (0);
1153 1165 }
↓ open down ↓ |
109 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX