Print this page
%B
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/ip/ipclassifier.c
+++ new/usr/src/uts/common/inet/ip/ipclassifier.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /*
26 26 * IP PACKET CLASSIFIER
27 27 *
28 28 * The IP packet classifier provides mapping between IP packets and persistent
29 29 * connection state for connection-oriented protocols. It also provides
30 30 * interface for managing connection states.
31 31 *
32 32 * The connection state is kept in conn_t data structure and contains, among
33 33 * other things:
34 34 *
35 35 * o local/remote address and ports
36 36 * o Transport protocol
37 37 * o squeue for the connection (for TCP only)
38 38 * o reference counter
39 39 * o Connection state
40 40 * o hash table linkage
41 41 * o interface/ire information
42 42 * o credentials
43 43 * o ipsec policy
44 44 * o send and receive functions.
45 45 * o mutex lock.
46 46 *
47 47 * Connections use a reference counting scheme. They are freed when the
48 48 * reference counter drops to zero. A reference is incremented when connection
49 49 * is placed in a list or table, when incoming packet for the connection arrives
50 50 * and when connection is processed via squeue (squeue processing may be
51 51 * asynchronous and the reference protects the connection from being destroyed
52 52 * before its processing is finished).
53 53 *
54 54 * conn_recv is used to pass up packets to the ULP.
55 55 * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
56 56 * a listener, and changes to tcp_input_listener as the listener has picked a
57 57 * good squeue. For other cases it is set to tcp_input_data.
58 58 *
59 59 * conn_recvicmp is used to pass up ICMP errors to the ULP.
60 60 *
61 61 * Classifier uses several hash tables:
62 62 *
63 63 * ipcl_conn_fanout: contains all TCP connections in CONNECTED state
64 64 * ipcl_bind_fanout: contains all connections in BOUND state
65 65 * ipcl_proto_fanout: IPv4 protocol fanout
66 66 * ipcl_proto_fanout_v6: IPv6 protocol fanout
67 67 * ipcl_udp_fanout: contains all UDP connections
68 68 * ipcl_iptun_fanout: contains all IP tunnel connections
69 69 * ipcl_globalhash_fanout: contains all connections
70 70 *
71 71 * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
72 72 * which need to view all existing connections.
73 73 *
74 74 * All tables are protected by per-bucket locks. When both per-bucket lock and
75 75 * connection lock need to be held, the per-bucket lock should be acquired
76 76 * first, followed by the connection lock.
77 77 *
78 78 * All functions doing search in one of these tables increment a reference
79 79 * counter on the connection found (if any). This reference should be dropped
80 80 * when the caller has finished processing the connection.
81 81 *
82 82 *
83 83 * INTERFACES:
84 84 * ===========
85 85 *
86 86 * Connection Lookup:
87 87 * ------------------
88 88 *
89 89 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
90 90 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
91 91 *
92 92 * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
93 93 * it can't find any associated connection. If the connection is found, its
94 94 * reference counter is incremented.
95 95 *
96 96 * mp: mblock, containing packet header. The full header should fit
97 97 * into a single mblock. It should also contain at least full IP
98 98 * and TCP or UDP header.
99 99 *
100 100 * protocol: Either IPPROTO_TCP or IPPROTO_UDP.
101 101 *
102 102 * hdr_len: The size of IP header. It is used to find TCP or UDP header in
103 103 * the packet.
104 104 *
105 105 * ira->ira_zoneid: The zone in which the returned connection must be; the
106 106 * zoneid corresponding to the ire_zoneid on the IRE located for
107 107 * the packet's destination address.
108 108 *
109 109 * ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
110 110 * IRAF_TX_SHARED_ADDR flags
111 111 *
112 112 * For TCP connections, the lookup order is as follows:
113 113 * 5-tuple {src, dst, protocol, local port, remote port}
114 114 * lookup in ipcl_conn_fanout table.
115 115 * 3-tuple {dst, remote port, protocol} lookup in
116 116 * ipcl_bind_fanout table.
117 117 *
118 118 * For UDP connections, a 5-tuple {src, dst, protocol, local port,
119 119 * remote port} lookup is done on ipcl_udp_fanout. Note that,
120 120 * these interfaces do not handle cases where a packets belongs
121 121 * to multiple UDP clients, which is handled in IP itself.
122 122 *
123 123 * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
124 124 * determine which actual zone gets the segment. This is used only in a
125 125 * labeled environment. The matching rules are:
126 126 *
127 127 * - If it's not a multilevel port, then the label on the packet selects
128 128 * the zone. Unlabeled packets are delivered to the global zone.
129 129 *
130 130 * - If it's a multilevel port, then only the zone registered to receive
131 131 * packets on that port matches.
132 132 *
133 133 * Also, in a labeled environment, packet labels need to be checked. For fully
134 134 * bound TCP connections, we can assume that the packet label was checked
135 135 * during connection establishment, and doesn't need to be checked on each
136 136 * packet. For others, though, we need to check for strict equality or, for
137 137 * multilevel ports, membership in the range or set. This part currently does
138 138 * a tnrh lookup on each packet, but could be optimized to use cached results
139 139 * if that were necessary. (SCTP doesn't come through here, but if it did,
140 140 * we would apply the same rules as TCP.)
141 141 *
142 142 * An implication of the above is that fully-bound TCP sockets must always use
143 143 * distinct 4-tuples; they can't be discriminated by label alone.
144 144 *
145 145 * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
146 146 * as there's no connection set-up handshake and no shared state.
147 147 *
148 148 * Labels on looped-back packets within a single zone do not need to be
149 149 * checked, as all processes in the same zone have the same label.
150 150 *
151 151 * Finally, for unlabeled packets received by a labeled system, special rules
152 152 * apply. We consider only the MLP if there is one. Otherwise, we prefer a
153 153 * socket in the zone whose label matches the default label of the sender, if
154 154 * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the
155 155 * receiver's label must dominate the sender's default label.
156 156 *
157 157 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
158 158 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
159 159 * ip_stack);
160 160 *
161 161 * Lookup routine to find a exact match for {src, dst, local port,
162 162 * remote port) for TCP connections in ipcl_conn_fanout. The address and
163 163 * ports are read from the IP and TCP header respectively.
164 164 *
165 165 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol,
166 166 * zoneid, ip_stack);
167 167 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
168 168 * zoneid, ip_stack);
169 169 *
170 170 * Lookup routine to find a listener with the tuple {lport, laddr,
171 171 * protocol} in the ipcl_bind_fanout table. For IPv6, an additional
172 172 * parameter interface index is also compared.
173 173 *
174 174 * void ipcl_walk(func, arg, ip_stack)
175 175 *
176 176 * Apply 'func' to every connection available. The 'func' is called as
177 177 * (*func)(connp, arg). The walk is non-atomic so connections may be
178 178 * created and destroyed during the walk. The CONN_CONDEMNED and
179 179 * CONN_INCIPIENT flags ensure that connections which are newly created
180 180 * or being destroyed are not selected by the walker.
181 181 *
182 182 * Table Updates
183 183 * -------------
184 184 *
185 185 * int ipcl_conn_insert(connp);
186 186 * int ipcl_conn_insert_v4(connp);
187 187 * int ipcl_conn_insert_v6(connp);
188 188 *
189 189 * Insert 'connp' in the ipcl_conn_fanout.
190 190 * Arguements :
191 191 * connp conn_t to be inserted
192 192 *
193 193 * Return value :
194 194 * 0 if connp was inserted
195 195 * EADDRINUSE if the connection with the same tuple
196 196 * already exists.
197 197 *
198 198 * int ipcl_bind_insert(connp);
199 199 * int ipcl_bind_insert_v4(connp);
200 200 * int ipcl_bind_insert_v6(connp);
201 201 *
202 202 * Insert 'connp' in ipcl_bind_fanout.
203 203 * Arguements :
204 204 * connp conn_t to be inserted
205 205 *
206 206 *
207 207 * void ipcl_hash_remove(connp);
208 208 *
209 209 * Removes the 'connp' from the connection fanout table.
210 210 *
211 211 * Connection Creation/Destruction
212 212 * -------------------------------
213 213 *
214 214 * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
215 215 *
↓ open down ↓ |
215 lines elided |
↑ open up ↑ |
216 216 * Creates a new conn based on the type flag, inserts it into
217 217 * globalhash table.
218 218 *
219 219 * type: This flag determines the type of conn_t which needs to be
220 220 * created i.e., which kmem_cache it comes from.
221 221 * IPCL_TCPCONN indicates a TCP connection
222 222 * IPCL_SCTPCONN indicates a SCTP connection
223 223 * IPCL_UDPCONN indicates a UDP conn_t.
224 224 * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t.
225 225 * IPCL_RTSCONN indicates a RTS conn_t.
226 + * IPCL_DCCPCONN indicates a DCCP conn_t.
226 227 * IPCL_IPCCONN indicates all other connections.
227 228 *
228 229 * void ipcl_conn_destroy(connp)
229 230 *
230 231 * Destroys the connection state, removes it from the global
231 232 * connection hash table and frees its memory.
232 233 */
233 234
234 235 #include <sys/types.h>
235 236 #include <sys/stream.h>
236 237 #include <sys/stropts.h>
237 238 #include <sys/sysmacros.h>
238 239 #include <sys/strsubr.h>
239 240 #include <sys/strsun.h>
240 241 #define _SUN_TPI_VERSION 2
241 242 #include <sys/ddi.h>
242 243 #include <sys/cmn_err.h>
243 244 #include <sys/debug.h>
244 245
245 246 #include <sys/systm.h>
246 247 #include <sys/param.h>
247 248 #include <sys/kmem.h>
248 249 #include <sys/isa_defs.h>
249 250 #include <inet/common.h>
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
250 251 #include <netinet/ip6.h>
251 252 #include <netinet/icmp6.h>
252 253
253 254 #include <inet/ip.h>
254 255 #include <inet/ip_if.h>
255 256 #include <inet/ip_ire.h>
256 257 #include <inet/ip6.h>
257 258 #include <inet/ip_ndp.h>
258 259 #include <inet/ip_impl.h>
259 260 #include <inet/udp_impl.h>
261 +#include <inet/dccp/dccp_impl.h>
260 262 #include <inet/sctp_ip.h>
261 263 #include <inet/sctp/sctp_impl.h>
262 264 #include <inet/rawip_impl.h>
263 265 #include <inet/rts_impl.h>
264 266 #include <inet/iptun/iptun_impl.h>
265 267
266 268 #include <sys/cpuvar.h>
267 269
268 270 #include <inet/ipclassifier.h>
269 271 #include <inet/tcp.h>
270 272 #include <inet/ipsec_impl.h>
271 273
272 274 #include <sys/tsol/tnet.h>
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
273 275 #include <sys/sockio.h>
274 276
275 277 /* Old value for compatibility. Setable in /etc/system */
276 278 uint_t tcp_conn_hash_size = 0;
277 279
278 280 /* New value. Zero means choose automatically. Setable in /etc/system */
279 281 uint_t ipcl_conn_hash_size = 0;
280 282 uint_t ipcl_conn_hash_memfactor = 8192;
281 283 uint_t ipcl_conn_hash_maxsize = 82500;
282 284
283 -/* bind/udp fanout table size */
285 +/* bind/dccp/udp fanout table size */
284 286 uint_t ipcl_bind_fanout_size = 512;
287 +uint_t ipcl_dccp_fanout_size = 512;
285 288 uint_t ipcl_udp_fanout_size = 16384;
286 289
287 290 /* Raw socket fanout size. Must be a power of 2. */
288 291 uint_t ipcl_raw_fanout_size = 256;
289 292
290 293 /*
291 294 * The IPCL_IPTUN_HASH() function works best with a prime table size. We
292 295 * expect that most large deployments would have hundreds of tunnels, and
293 296 * thousands in the extreme case.
294 297 */
295 298 uint_t ipcl_iptun_fanout_size = 6143;
296 299
297 300 /*
298 301 * Power of 2^N Primes useful for hashing for N of 0-28,
299 302 * these primes are the nearest prime <= 2^N - 2^(N-2).
300 303 */
301 304
302 305 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \
303 306 6143, 12281, 24571, 49139, 98299, 196597, 393209, \
304 307 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \
305 308 50331599, 100663291, 201326557, 0}
306 309
307 310 /*
308 311 * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
309 312 * are aligned on cache lines.
310 313 */
311 314 typedef union itc_s {
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
312 315 conn_t itc_conn;
313 316 char itcu_filler[CACHE_ALIGN(conn_s)];
314 317 } itc_t;
315 318
316 319 struct kmem_cache *tcp_conn_cache;
317 320 struct kmem_cache *ip_conn_cache;
318 321 extern struct kmem_cache *sctp_conn_cache;
319 322 struct kmem_cache *udp_conn_cache;
320 323 struct kmem_cache *rawip_conn_cache;
321 324 struct kmem_cache *rts_conn_cache;
325 +struct kmem_cache *dccp_conn_cache;
322 326
323 327 extern void tcp_timermp_free(tcp_t *);
324 328 extern mblk_t *tcp_timermp_alloc(int);
325 329
326 330 static int ip_conn_constructor(void *, void *, int);
327 331 static void ip_conn_destructor(void *, void *);
328 332
329 333 static int tcp_conn_constructor(void *, void *, int);
330 334 static void tcp_conn_destructor(void *, void *);
331 335
332 336 static int udp_conn_constructor(void *, void *, int);
333 337 static void udp_conn_destructor(void *, void *);
334 338
335 339 static int rawip_conn_constructor(void *, void *, int);
336 340 static void rawip_conn_destructor(void *, void *);
337 341
338 342 static int rts_conn_constructor(void *, void *, int);
339 343 static void rts_conn_destructor(void *, void *);
340 344
345 +static int dccp_conn_constructor(void *, void *, int);
346 +static void dccp_conn_destructor(void *, void *);
347 +
341 348 /*
342 349 * Global (for all stack instances) init routine
343 350 */
344 351 void
345 352 ipcl_g_init(void)
346 353 {
347 354 ip_conn_cache = kmem_cache_create("ip_conn_cache",
348 355 sizeof (conn_t), CACHE_ALIGN_SIZE,
349 356 ip_conn_constructor, ip_conn_destructor,
350 357 NULL, NULL, NULL, 0);
351 358
352 359 tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
353 360 sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
354 361 tcp_conn_constructor, tcp_conn_destructor,
355 362 tcp_conn_reclaim, NULL, NULL, 0);
356 363
357 364 udp_conn_cache = kmem_cache_create("udp_conn_cache",
358 365 sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
359 366 udp_conn_constructor, udp_conn_destructor,
360 367 NULL, NULL, NULL, 0);
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
361 368
362 369 rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
363 370 sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
364 371 rawip_conn_constructor, rawip_conn_destructor,
365 372 NULL, NULL, NULL, 0);
366 373
367 374 rts_conn_cache = kmem_cache_create("rts_conn_cache",
368 375 sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
369 376 rts_conn_constructor, rts_conn_destructor,
370 377 NULL, NULL, NULL, 0);
378 +
379 + /* XXX:DCCP reclaim */
380 + dccp_conn_cache = kmem_cache_create("dccp_conn_cache",
381 + sizeof (itc_t) + sizeof (dccp_t), CACHE_ALIGN_SIZE,
382 + dccp_conn_constructor, dccp_conn_destructor,
383 + NULL, NULL, NULL, 0);
371 384 }
372 385
373 386 /*
374 387 * ipclassifier intialization routine, sets up hash tables.
375 388 */
376 389 void
377 390 ipcl_init(ip_stack_t *ipst)
378 391 {
379 392 int i;
380 393 int sizes[] = P2Ps();
381 394
382 395 /*
383 396 * Calculate size of conn fanout table from /etc/system settings
384 397 */
385 398 if (ipcl_conn_hash_size != 0) {
386 399 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
387 400 } else if (tcp_conn_hash_size != 0) {
388 401 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
389 402 } else {
390 403 extern pgcnt_t freemem;
391 404
392 405 ipst->ips_ipcl_conn_fanout_size =
393 406 (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
394 407
395 408 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
396 409 ipst->ips_ipcl_conn_fanout_size =
397 410 ipcl_conn_hash_maxsize;
398 411 }
399 412 }
400 413
401 414 for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
402 415 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
↓ open down ↓ |
22 lines elided |
↑ open up ↑ |
403 416 break;
404 417 }
405 418 }
406 419 if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
407 420 /* Out of range, use the 2^16 value */
408 421 ipst->ips_ipcl_conn_fanout_size = sizes[16];
409 422 }
410 423
411 424 /* Take values from /etc/system */
412 425 ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
426 + ipst->ips_ipcl_dccp_fanout_size = ipcl_dccp_fanout_size;
413 427 ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
414 428 ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
415 429 ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
416 430
417 431 ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
418 432
419 433 ipst->ips_ipcl_conn_fanout = kmem_zalloc(
420 434 ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
421 435
422 436 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
423 437 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
424 438 MUTEX_DEFAULT, NULL);
425 439 }
426 440
427 441 ipst->ips_ipcl_bind_fanout = kmem_zalloc(
428 442 ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
429 443
430 444 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
431 445 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
432 446 MUTEX_DEFAULT, NULL);
433 447 }
434 448
435 449 ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
436 450 sizeof (connf_t), KM_SLEEP);
437 451 for (i = 0; i < IPPROTO_MAX; i++) {
438 452 mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
439 453 MUTEX_DEFAULT, NULL);
440 454 }
441 455
442 456 ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
443 457 sizeof (connf_t), KM_SLEEP);
444 458 for (i = 0; i < IPPROTO_MAX; i++) {
445 459 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
446 460 MUTEX_DEFAULT, NULL);
447 461 }
448 462
449 463 ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
450 464 mutex_init(&ipst->ips_rts_clients->connf_lock,
451 465 NULL, MUTEX_DEFAULT, NULL);
452 466
453 467 ipst->ips_ipcl_udp_fanout = kmem_zalloc(
454 468 ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
455 469 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
456 470 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
457 471 MUTEX_DEFAULT, NULL);
458 472 }
459 473
460 474 ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
461 475 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
462 476 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
463 477 mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
464 478 MUTEX_DEFAULT, NULL);
465 479 }
466 480
467 481 ipst->ips_ipcl_raw_fanout = kmem_zalloc(
468 482 ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
469 483 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
↓ open down ↓ |
47 lines elided |
↑ open up ↑ |
470 484 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
471 485 MUTEX_DEFAULT, NULL);
472 486 }
473 487
474 488 ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
475 489 sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
476 490 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
477 491 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
478 492 NULL, MUTEX_DEFAULT, NULL);
479 493 }
494 +
495 + ipst->ips_ipcl_dccp_fanout = kmem_zalloc(
496 + ipst->ips_ipcl_dccp_fanout_size * sizeof (connf_t), KM_SLEEP);
497 + for (i = 0; i < ipst->ips_ipcl_dccp_fanout_size; i++) {
498 + mutex_init(&ipst->ips_ipcl_dccp_fanout[i].connf_lock, NULL,
499 + MUTEX_DEFAULT, NULL);
500 + }
480 501 }
481 502
482 503 void
483 504 ipcl_g_destroy(void)
484 505 {
485 506 kmem_cache_destroy(ip_conn_cache);
486 507 kmem_cache_destroy(tcp_conn_cache);
487 508 kmem_cache_destroy(udp_conn_cache);
488 509 kmem_cache_destroy(rawip_conn_cache);
489 510 kmem_cache_destroy(rts_conn_cache);
511 + kmem_cache_destroy(dccp_conn_cache);
490 512 }
491 513
492 514 /*
493 515 * All user-level and kernel use of the stack must be gone
494 516 * by now.
495 517 */
496 518 void
497 519 ipcl_destroy(ip_stack_t *ipst)
498 520 {
499 521 int i;
500 522
501 523 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
502 524 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
503 525 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
504 526 }
505 527 kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
506 528 sizeof (connf_t));
507 529 ipst->ips_ipcl_conn_fanout = NULL;
508 530
509 531 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
510 532 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
511 533 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
512 534 }
513 535 kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
514 536 sizeof (connf_t));
515 537 ipst->ips_ipcl_bind_fanout = NULL;
516 538
517 539 for (i = 0; i < IPPROTO_MAX; i++) {
518 540 ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
519 541 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
520 542 }
521 543 kmem_free(ipst->ips_ipcl_proto_fanout_v4,
522 544 IPPROTO_MAX * sizeof (connf_t));
523 545 ipst->ips_ipcl_proto_fanout_v4 = NULL;
524 546
525 547 for (i = 0; i < IPPROTO_MAX; i++) {
526 548 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
527 549 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
528 550 }
529 551 kmem_free(ipst->ips_ipcl_proto_fanout_v6,
530 552 IPPROTO_MAX * sizeof (connf_t));
531 553 ipst->ips_ipcl_proto_fanout_v6 = NULL;
532 554
533 555 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
534 556 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
535 557 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
536 558 }
537 559 kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
538 560 sizeof (connf_t));
539 561 ipst->ips_ipcl_udp_fanout = NULL;
540 562
541 563 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
542 564 ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
543 565 mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
544 566 }
545 567 kmem_free(ipst->ips_ipcl_iptun_fanout,
546 568 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
547 569 ipst->ips_ipcl_iptun_fanout = NULL;
548 570
549 571 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
550 572 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
551 573 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
552 574 }
553 575 kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
554 576 sizeof (connf_t));
↓ open down ↓ |
55 lines elided |
↑ open up ↑ |
555 577 ipst->ips_ipcl_raw_fanout = NULL;
556 578
557 579 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
558 580 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
559 581 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
560 582 }
561 583 kmem_free(ipst->ips_ipcl_globalhash_fanout,
562 584 sizeof (connf_t) * CONN_G_HASH_SIZE);
563 585 ipst->ips_ipcl_globalhash_fanout = NULL;
564 586
587 + for (i = 0; i < ipst->ips_ipcl_dccp_fanout_size; i++) {
588 + ASSERT(ipst->ips_ipcl_dccp_fanout[i].connf_head == NULL);
589 + mutex_destroy(&ipst->ips_ipcl_dccp_fanout[i].connf_lock);
590 + }
591 + kmem_free(ipst->ips_ipcl_dccp_fanout, ipst->ips_ipcl_dccp_fanout_size *
592 + sizeof (connf_t));
593 + ipst->ips_ipcl_dccp_fanout = NULL;
594 +
565 595 ASSERT(ipst->ips_rts_clients->connf_head == NULL);
566 596 mutex_destroy(&ipst->ips_rts_clients->connf_lock);
567 597 kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
568 598 ipst->ips_rts_clients = NULL;
569 599 }
570 600
571 601 /*
572 602 * conn creation routine. initialize the conn, sets the reference
573 603 * and inserts it in the global hash table.
574 604 */
575 605 conn_t *
576 606 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
577 607 {
578 608 conn_t *connp;
579 609 struct kmem_cache *conn_cache;
580 610
581 611 switch (type) {
582 612 case IPCL_SCTPCONN:
583 613 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
584 614 return (NULL);
585 615 sctp_conn_init(connp);
586 616 netstack_hold(ns);
587 617 connp->conn_netstack = ns;
588 618 connp->conn_ixa->ixa_ipst = ns->netstack_ip;
589 619 connp->conn_ixa->ixa_conn_id = (long)connp;
590 620 ipcl_globalhash_insert(connp);
591 621 return (connp);
592 622
593 623 case IPCL_TCPCONN:
594 624 conn_cache = tcp_conn_cache;
595 625 break;
596 626
597 627 case IPCL_UDPCONN:
598 628 conn_cache = udp_conn_cache;
599 629 break;
600 630
601 631 case IPCL_RAWIPCONN:
602 632 conn_cache = rawip_conn_cache;
↓ open down ↓ |
28 lines elided |
↑ open up ↑ |
603 633 break;
604 634
605 635 case IPCL_RTSCONN:
606 636 conn_cache = rts_conn_cache;
607 637 break;
608 638
609 639 case IPCL_IPCCONN:
610 640 conn_cache = ip_conn_cache;
611 641 break;
612 642
643 + case IPCL_DCCPCONN:
644 + conn_cache = dccp_conn_cache;
645 + break;
646 +
613 647 default:
614 648 connp = NULL;
615 649 ASSERT(0);
616 650 }
617 651
618 652 if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
619 653 return (NULL);
620 654
621 655 connp->conn_ref = 1;
622 656 netstack_hold(ns);
623 657 connp->conn_netstack = ns;
624 658 connp->conn_ixa->ixa_ipst = ns->netstack_ip;
625 659 connp->conn_ixa->ixa_conn_id = (long)connp;
626 660 ipcl_globalhash_insert(connp);
627 661 return (connp);
628 662 }
629 663
630 664 void
631 665 ipcl_conn_destroy(conn_t *connp)
632 666 {
633 667 mblk_t *mp;
634 668 netstack_t *ns = connp->conn_netstack;
635 669
636 670 ASSERT(!MUTEX_HELD(&connp->conn_lock));
637 671 ASSERT(connp->conn_ref == 0);
638 672 ASSERT(connp->conn_ioctlref == 0);
639 673
640 674 DTRACE_PROBE1(conn__destroy, conn_t *, connp);
641 675
642 676 if (connp->conn_cred != NULL) {
643 677 crfree(connp->conn_cred);
644 678 connp->conn_cred = NULL;
645 679 /* ixa_cred done in ipcl_conn_cleanup below */
646 680 }
647 681
648 682 if (connp->conn_ht_iphc != NULL) {
649 683 kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
650 684 connp->conn_ht_iphc = NULL;
651 685 connp->conn_ht_iphc_allocated = 0;
652 686 connp->conn_ht_iphc_len = 0;
653 687 connp->conn_ht_ulp = NULL;
654 688 connp->conn_ht_ulp_len = 0;
655 689 }
656 690 ip_pkt_free(&connp->conn_xmit_ipp);
657 691
658 692 ipcl_globalhash_remove(connp);
659 693
660 694 if (connp->conn_latch != NULL) {
661 695 IPLATCH_REFRELE(connp->conn_latch);
662 696 connp->conn_latch = NULL;
663 697 }
664 698 if (connp->conn_latch_in_policy != NULL) {
665 699 IPPOL_REFRELE(connp->conn_latch_in_policy);
666 700 connp->conn_latch_in_policy = NULL;
667 701 }
668 702 if (connp->conn_latch_in_action != NULL) {
669 703 IPACT_REFRELE(connp->conn_latch_in_action);
670 704 connp->conn_latch_in_action = NULL;
671 705 }
672 706 if (connp->conn_policy != NULL) {
673 707 IPPH_REFRELE(connp->conn_policy, ns);
674 708 connp->conn_policy = NULL;
675 709 }
676 710
677 711 if (connp->conn_ipsec_opt_mp != NULL) {
678 712 freemsg(connp->conn_ipsec_opt_mp);
679 713 connp->conn_ipsec_opt_mp = NULL;
680 714 }
681 715
682 716 if (connp->conn_flags & IPCL_TCPCONN) {
683 717 tcp_t *tcp = connp->conn_tcp;
684 718
685 719 tcp_free(tcp);
686 720 mp = tcp->tcp_timercache;
687 721
688 722 tcp->tcp_tcps = NULL;
689 723
690 724 /*
691 725 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
692 726 * the mblk.
693 727 */
694 728 if (tcp->tcp_rsrv_mp != NULL) {
695 729 freeb(tcp->tcp_rsrv_mp);
696 730 tcp->tcp_rsrv_mp = NULL;
697 731 mutex_destroy(&tcp->tcp_rsrv_mp_lock);
698 732 }
699 733
700 734 ipcl_conn_cleanup(connp);
701 735 connp->conn_flags = IPCL_TCPCONN;
702 736 if (ns != NULL) {
703 737 ASSERT(tcp->tcp_tcps == NULL);
704 738 connp->conn_netstack = NULL;
705 739 connp->conn_ixa->ixa_ipst = NULL;
706 740 netstack_rele(ns);
707 741 }
708 742
709 743 bzero(tcp, sizeof (tcp_t));
710 744
711 745 tcp->tcp_timercache = mp;
712 746 tcp->tcp_connp = connp;
↓ open down ↓ |
90 lines elided |
↑ open up ↑ |
713 747 kmem_cache_free(tcp_conn_cache, connp);
714 748 return;
715 749 }
716 750
717 751 if (connp->conn_flags & IPCL_SCTPCONN) {
718 752 ASSERT(ns != NULL);
719 753 sctp_free(connp);
720 754 return;
721 755 }
722 756
757 + if (connp->conn_flags & IPCL_DCCPCONN) {
758 + dccp_t *dccp = connp->conn_dccp;
759 +
760 + cmn_err(CE_NOTE, "ipclassifier: conn_flags DCCP cache_free");
761 +
762 + /* XXX:DCCP */
763 + /* Crash bug here: udp_conn_cache and dccp_conn_cache */
764 +/*
765 + ipcl_conn_cleanup(connp);
766 + connp->conn_flags = IPCL_DCCPCONN;
767 + bzero(dccp, sizeof (dccp_t));
768 + dccp->dccp_connp = connp;
769 + kmem_cache_free(dccp_conn_cache, connp);
770 + return;
771 +*/
772 + }
773 +
723 774 ipcl_conn_cleanup(connp);
724 775 if (ns != NULL) {
725 776 connp->conn_netstack = NULL;
726 777 connp->conn_ixa->ixa_ipst = NULL;
727 778 netstack_rele(ns);
728 779 }
729 780
730 781 /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
731 782 if (connp->conn_flags & IPCL_UDPCONN) {
732 783 connp->conn_flags = IPCL_UDPCONN;
733 784 kmem_cache_free(udp_conn_cache, connp);
734 785 } else if (connp->conn_flags & IPCL_RAWIPCONN) {
735 786 connp->conn_flags = IPCL_RAWIPCONN;
736 787 connp->conn_proto = IPPROTO_ICMP;
737 788 connp->conn_ixa->ixa_protocol = connp->conn_proto;
738 789 kmem_cache_free(rawip_conn_cache, connp);
739 790 } else if (connp->conn_flags & IPCL_RTSCONN) {
740 791 connp->conn_flags = IPCL_RTSCONN;
741 792 kmem_cache_free(rts_conn_cache, connp);
742 793 } else {
743 794 connp->conn_flags = IPCL_IPCCONN;
744 795 ASSERT(connp->conn_flags & IPCL_IPCCONN);
745 796 ASSERT(connp->conn_priv == NULL);
746 797 kmem_cache_free(ip_conn_cache, connp);
747 798 }
748 799 }
749 800
750 801 /*
751 802 * Running in cluster mode - deregister listener information
752 803 */
753 804 static void
754 805 ipcl_conn_unlisten(conn_t *connp)
755 806 {
756 807 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
757 808 ASSERT(connp->conn_lport != 0);
758 809
759 810 if (cl_inet_unlisten != NULL) {
760 811 sa_family_t addr_family;
761 812 uint8_t *laddrp;
762 813
763 814 if (connp->conn_ipversion == IPV6_VERSION) {
764 815 addr_family = AF_INET6;
765 816 laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
766 817 } else {
767 818 addr_family = AF_INET;
768 819 laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
769 820 }
770 821 (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
771 822 IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
772 823 }
773 824 connp->conn_flags &= ~IPCL_CL_LISTENER;
774 825 }
775 826
776 827 /*
777 828 * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
778 829 * which table the conn belonged to). So for debugging we can see which hash
779 830 * table this connection was in.
780 831 */
781 832 #define IPCL_HASH_REMOVE(connp) { \
782 833 connf_t *connfp = (connp)->conn_fanout; \
783 834 ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \
784 835 if (connfp != NULL) { \
785 836 mutex_enter(&connfp->connf_lock); \
786 837 if ((connp)->conn_next != NULL) \
787 838 (connp)->conn_next->conn_prev = \
788 839 (connp)->conn_prev; \
789 840 if ((connp)->conn_prev != NULL) \
790 841 (connp)->conn_prev->conn_next = \
791 842 (connp)->conn_next; \
792 843 else \
793 844 connfp->connf_head = (connp)->conn_next; \
794 845 (connp)->conn_fanout = NULL; \
795 846 (connp)->conn_next = NULL; \
796 847 (connp)->conn_prev = NULL; \
797 848 (connp)->conn_flags |= IPCL_REMOVED; \
798 849 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \
799 850 ipcl_conn_unlisten((connp)); \
800 851 CONN_DEC_REF((connp)); \
801 852 mutex_exit(&connfp->connf_lock); \
802 853 } \
803 854 }
804 855
805 856 void
806 857 ipcl_hash_remove(conn_t *connp)
807 858 {
808 859 uint8_t protocol = connp->conn_proto;
809 860
810 861 IPCL_HASH_REMOVE(connp);
811 862 if (protocol == IPPROTO_RSVP)
812 863 ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
813 864 }
814 865
815 866 /*
816 867 * The whole purpose of this function is allow removal of
817 868 * a conn_t from the connected hash for timewait reclaim.
818 869 * This is essentially a TW reclaim fastpath where timewait
819 870 * collector checks under fanout lock (so no one else can
820 871 * get access to the conn_t) that refcnt is 2 i.e. one for
821 872 * TCP and one for the classifier hash list. If ref count
822 873 * is indeed 2, we can just remove the conn under lock and
823 874 * avoid cleaning up the conn under squeue. This gives us
824 875 * improved performance.
825 876 */
826 877 void
827 878 ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp)
828 879 {
829 880 ASSERT(MUTEX_HELD(&connfp->connf_lock));
830 881 ASSERT(MUTEX_HELD(&connp->conn_lock));
831 882 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
832 883
833 884 if ((connp)->conn_next != NULL) {
834 885 (connp)->conn_next->conn_prev = (connp)->conn_prev;
835 886 }
836 887 if ((connp)->conn_prev != NULL) {
837 888 (connp)->conn_prev->conn_next = (connp)->conn_next;
838 889 } else {
839 890 connfp->connf_head = (connp)->conn_next;
840 891 }
841 892 (connp)->conn_fanout = NULL;
842 893 (connp)->conn_next = NULL;
843 894 (connp)->conn_prev = NULL;
844 895 (connp)->conn_flags |= IPCL_REMOVED;
845 896 ASSERT((connp)->conn_ref == 2);
846 897 (connp)->conn_ref--;
847 898 }
848 899
849 900 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \
850 901 ASSERT((connp)->conn_fanout == NULL); \
851 902 ASSERT((connp)->conn_next == NULL); \
852 903 ASSERT((connp)->conn_prev == NULL); \
853 904 if ((connfp)->connf_head != NULL) { \
854 905 (connfp)->connf_head->conn_prev = (connp); \
855 906 (connp)->conn_next = (connfp)->connf_head; \
856 907 } \
857 908 (connp)->conn_fanout = (connfp); \
858 909 (connfp)->connf_head = (connp); \
859 910 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
860 911 IPCL_CONNECTED; \
861 912 CONN_INC_REF(connp); \
862 913 }
863 914
864 915 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \
865 916 IPCL_HASH_REMOVE((connp)); \
866 917 mutex_enter(&(connfp)->connf_lock); \
867 918 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \
868 919 mutex_exit(&(connfp)->connf_lock); \
869 920 }
870 921
871 922 #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \
872 923 conn_t *pconnp = NULL, *nconnp; \
873 924 IPCL_HASH_REMOVE((connp)); \
874 925 mutex_enter(&(connfp)->connf_lock); \
875 926 nconnp = (connfp)->connf_head; \
876 927 while (nconnp != NULL && \
877 928 !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) { \
878 929 pconnp = nconnp; \
879 930 nconnp = nconnp->conn_next; \
880 931 } \
881 932 if (pconnp != NULL) { \
882 933 pconnp->conn_next = (connp); \
883 934 (connp)->conn_prev = pconnp; \
884 935 } else { \
885 936 (connfp)->connf_head = (connp); \
886 937 } \
887 938 if (nconnp != NULL) { \
888 939 (connp)->conn_next = nconnp; \
889 940 nconnp->conn_prev = (connp); \
890 941 } \
891 942 (connp)->conn_fanout = (connfp); \
892 943 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
893 944 IPCL_BOUND; \
894 945 CONN_INC_REF(connp); \
895 946 mutex_exit(&(connfp)->connf_lock); \
896 947 }
897 948
898 949 #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \
899 950 conn_t **list, *prev, *next; \
900 951 boolean_t isv4mapped = \
901 952 IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6); \
902 953 IPCL_HASH_REMOVE((connp)); \
903 954 mutex_enter(&(connfp)->connf_lock); \
904 955 list = &(connfp)->connf_head; \
905 956 prev = NULL; \
906 957 while ((next = *list) != NULL) { \
907 958 if (isv4mapped && \
908 959 IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) && \
909 960 connp->conn_zoneid == next->conn_zoneid) { \
910 961 (connp)->conn_next = next; \
911 962 if (prev != NULL) \
912 963 prev = next->conn_prev; \
913 964 next->conn_prev = (connp); \
914 965 break; \
915 966 } \
916 967 list = &next->conn_next; \
917 968 prev = next; \
918 969 } \
919 970 (connp)->conn_prev = prev; \
920 971 *list = (connp); \
921 972 (connp)->conn_fanout = (connfp); \
922 973 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
923 974 IPCL_BOUND; \
924 975 CONN_INC_REF((connp)); \
925 976 mutex_exit(&(connfp)->connf_lock); \
926 977 }
927 978
928 979 void
929 980 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
930 981 {
931 982 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
932 983 }
933 984
934 985 /*
935 986 * Because the classifier is used to classify inbound packets, the destination
936 987 * address is meant to be our local tunnel address (tunnel source), and the
937 988 * source the remote tunnel address (tunnel destination).
938 989 *
939 990 * Note that conn_proto can't be used for fanout since the upper protocol
940 991 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
941 992 */
942 993 conn_t *
943 994 ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
944 995 {
945 996 connf_t *connfp;
946 997 conn_t *connp;
947 998
948 999 /* first look for IPv4 tunnel links */
949 1000 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
950 1001 mutex_enter(&connfp->connf_lock);
951 1002 for (connp = connfp->connf_head; connp != NULL;
952 1003 connp = connp->conn_next) {
953 1004 if (IPCL_IPTUN_MATCH(connp, *dst, *src))
954 1005 break;
955 1006 }
956 1007 if (connp != NULL)
957 1008 goto done;
958 1009
959 1010 mutex_exit(&connfp->connf_lock);
960 1011
961 1012 /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
962 1013 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
963 1014 INADDR_ANY)];
964 1015 mutex_enter(&connfp->connf_lock);
965 1016 for (connp = connfp->connf_head; connp != NULL;
966 1017 connp = connp->conn_next) {
967 1018 if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
968 1019 break;
969 1020 }
970 1021 done:
971 1022 if (connp != NULL)
972 1023 CONN_INC_REF(connp);
973 1024 mutex_exit(&connfp->connf_lock);
974 1025 return (connp);
975 1026 }
976 1027
977 1028 conn_t *
978 1029 ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
979 1030 {
980 1031 connf_t *connfp;
981 1032 conn_t *connp;
982 1033
983 1034 /* Look for an IPv6 tunnel link */
984 1035 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
985 1036 mutex_enter(&connfp->connf_lock);
986 1037 for (connp = connfp->connf_head; connp != NULL;
987 1038 connp = connp->conn_next) {
988 1039 if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
989 1040 CONN_INC_REF(connp);
990 1041 break;
991 1042 }
992 1043 }
993 1044 mutex_exit(&connfp->connf_lock);
994 1045 return (connp);
995 1046 }
996 1047
997 1048 /*
998 1049 * This function is used only for inserting SCTP raw socket now.
999 1050 * This may change later.
1000 1051 *
1001 1052 * Note that only one raw socket can be bound to a port. The param
1002 1053 * lport is in network byte order.
1003 1054 */
1004 1055 static int
1005 1056 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
1006 1057 {
1007 1058 connf_t *connfp;
1008 1059 conn_t *oconnp;
1009 1060 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1010 1061
1011 1062 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1012 1063
1013 1064 /* Check for existing raw socket already bound to the port. */
1014 1065 mutex_enter(&connfp->connf_lock);
1015 1066 for (oconnp = connfp->connf_head; oconnp != NULL;
1016 1067 oconnp = oconnp->conn_next) {
1017 1068 if (oconnp->conn_lport == lport &&
1018 1069 oconnp->conn_zoneid == connp->conn_zoneid &&
1019 1070 oconnp->conn_family == connp->conn_family &&
1020 1071 ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1021 1072 IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
1022 1073 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
1023 1074 IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
1024 1075 IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
1025 1076 &connp->conn_laddr_v6))) {
1026 1077 break;
1027 1078 }
1028 1079 }
1029 1080 mutex_exit(&connfp->connf_lock);
1030 1081 if (oconnp != NULL)
1031 1082 return (EADDRNOTAVAIL);
1032 1083
1033 1084 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
1034 1085 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1035 1086 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1036 1087 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
1037 1088 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1038 1089 } else {
1039 1090 IPCL_HASH_INSERT_BOUND(connfp, connp);
1040 1091 }
1041 1092 } else {
1042 1093 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1043 1094 }
1044 1095 return (0);
1045 1096 }
1046 1097
1047 1098 static int
1048 1099 ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
1049 1100 {
1050 1101 connf_t *connfp;
1051 1102 conn_t *tconnp;
1052 1103 ipaddr_t laddr = connp->conn_laddr_v4;
1053 1104 ipaddr_t faddr = connp->conn_faddr_v4;
1054 1105
1055 1106 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
1056 1107 mutex_enter(&connfp->connf_lock);
1057 1108 for (tconnp = connfp->connf_head; tconnp != NULL;
1058 1109 tconnp = tconnp->conn_next) {
1059 1110 if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
1060 1111 /* A tunnel is already bound to these addresses. */
1061 1112 mutex_exit(&connfp->connf_lock);
1062 1113 return (EADDRINUSE);
1063 1114 }
1064 1115 }
1065 1116 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1066 1117 mutex_exit(&connfp->connf_lock);
1067 1118 return (0);
1068 1119 }
1069 1120
1070 1121 static int
1071 1122 ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
1072 1123 {
1073 1124 connf_t *connfp;
1074 1125 conn_t *tconnp;
1075 1126 in6_addr_t *laddr = &connp->conn_laddr_v6;
1076 1127 in6_addr_t *faddr = &connp->conn_faddr_v6;
1077 1128
1078 1129 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
1079 1130 mutex_enter(&connfp->connf_lock);
1080 1131 for (tconnp = connfp->connf_head; tconnp != NULL;
1081 1132 tconnp = tconnp->conn_next) {
1082 1133 if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
1083 1134 /* A tunnel is already bound to these addresses. */
1084 1135 mutex_exit(&connfp->connf_lock);
1085 1136 return (EADDRINUSE);
1086 1137 }
1087 1138 }
1088 1139 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1089 1140 mutex_exit(&connfp->connf_lock);
1090 1141 return (0);
1091 1142 }
1092 1143
1093 1144 /*
1094 1145 * Check for a MAC exemption conflict on a labeled system. Note that for
1095 1146 * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
1096 1147 * transport layer. This check is for binding all other protocols.
1097 1148 *
1098 1149 * Returns true if there's a conflict.
1099 1150 */
1100 1151 static boolean_t
1101 1152 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
1102 1153 {
1103 1154 connf_t *connfp;
1104 1155 conn_t *tconn;
1105 1156
1106 1157 connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
1107 1158 mutex_enter(&connfp->connf_lock);
1108 1159 for (tconn = connfp->connf_head; tconn != NULL;
1109 1160 tconn = tconn->conn_next) {
1110 1161 /* We don't allow v4 fallback for v6 raw socket */
1111 1162 if (connp->conn_family != tconn->conn_family)
1112 1163 continue;
1113 1164 /* If neither is exempt, then there's no conflict */
1114 1165 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1115 1166 (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1116 1167 continue;
1117 1168 /* We are only concerned about sockets for a different zone */
1118 1169 if (connp->conn_zoneid == tconn->conn_zoneid)
1119 1170 continue;
1120 1171 /* If both are bound to different specific addrs, ok */
1121 1172 if (connp->conn_laddr_v4 != INADDR_ANY &&
1122 1173 tconn->conn_laddr_v4 != INADDR_ANY &&
1123 1174 connp->conn_laddr_v4 != tconn->conn_laddr_v4)
1124 1175 continue;
1125 1176 /* These two conflict; fail */
1126 1177 break;
1127 1178 }
1128 1179 mutex_exit(&connfp->connf_lock);
1129 1180 return (tconn != NULL);
1130 1181 }
1131 1182
1132 1183 static boolean_t
1133 1184 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
1134 1185 {
1135 1186 connf_t *connfp;
1136 1187 conn_t *tconn;
1137 1188
1138 1189 connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
1139 1190 mutex_enter(&connfp->connf_lock);
1140 1191 for (tconn = connfp->connf_head; tconn != NULL;
1141 1192 tconn = tconn->conn_next) {
1142 1193 /* We don't allow v4 fallback for v6 raw socket */
1143 1194 if (connp->conn_family != tconn->conn_family)
1144 1195 continue;
1145 1196 /* If neither is exempt, then there's no conflict */
1146 1197 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1147 1198 (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1148 1199 continue;
1149 1200 /* We are only concerned about sockets for a different zone */
1150 1201 if (connp->conn_zoneid == tconn->conn_zoneid)
1151 1202 continue;
1152 1203 /* If both are bound to different addrs, ok */
1153 1204 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
1154 1205 !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
1155 1206 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
1156 1207 &tconn->conn_laddr_v6))
1157 1208 continue;
1158 1209 /* These two conflict; fail */
1159 1210 break;
1160 1211 }
1161 1212 mutex_exit(&connfp->connf_lock);
1162 1213 return (tconn != NULL);
1163 1214 }
1164 1215
1165 1216 /*
1166 1217 * (v4, v6) bind hash insertion routines
1167 1218 * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
1168 1219 */
1169 1220
1170 1221 int
1171 1222 ipcl_bind_insert(conn_t *connp)
1172 1223 {
1173 1224 if (connp->conn_ipversion == IPV6_VERSION)
1174 1225 return (ipcl_bind_insert_v6(connp));
1175 1226 else
1176 1227 return (ipcl_bind_insert_v4(connp));
1177 1228 }
1178 1229
1179 1230 int
1180 1231 ipcl_bind_insert_v4(conn_t *connp)
1181 1232 {
1182 1233 connf_t *connfp;
1183 1234 int ret = 0;
1184 1235 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1185 1236 uint16_t lport = connp->conn_lport;
1186 1237 uint8_t protocol = connp->conn_proto;
1187 1238
1188 1239 if (IPCL_IS_IPTUN(connp))
1189 1240 return (ipcl_iptun_hash_insert(connp, ipst));
1190 1241
1191 1242 switch (protocol) {
1192 1243 default:
1193 1244 if (is_system_labeled() &&
1194 1245 check_exempt_conflict_v4(connp, ipst))
1195 1246 return (EADDRINUSE);
1196 1247 /* FALLTHROUGH */
1197 1248 case IPPROTO_UDP:
1198 1249 if (protocol == IPPROTO_UDP) {
1199 1250 connfp = &ipst->ips_ipcl_udp_fanout[
1200 1251 IPCL_UDP_HASH(lport, ipst)];
1201 1252 } else {
1202 1253 connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1203 1254 }
1204 1255
1205 1256 if (connp->conn_faddr_v4 != INADDR_ANY) {
1206 1257 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1207 1258 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1208 1259 IPCL_HASH_INSERT_BOUND(connfp, connp);
1209 1260 } else {
1210 1261 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1211 1262 }
1212 1263 if (protocol == IPPROTO_RSVP)
1213 1264 ill_set_inputfn_all(ipst);
1214 1265 break;
1215 1266
1216 1267 case IPPROTO_TCP:
1217 1268 /* Insert it in the Bind Hash */
1218 1269 ASSERT(connp->conn_zoneid != ALL_ZONES);
1219 1270 connfp = &ipst->ips_ipcl_bind_fanout[
1220 1271 IPCL_BIND_HASH(lport, ipst)];
1221 1272 if (connp->conn_laddr_v4 != INADDR_ANY) {
1222 1273 IPCL_HASH_INSERT_BOUND(connfp, connp);
1223 1274 } else {
1224 1275 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1225 1276 }
1226 1277 if (cl_inet_listen != NULL) {
1227 1278 ASSERT(connp->conn_ipversion == IPV4_VERSION);
1228 1279 connp->conn_flags |= IPCL_CL_LISTENER;
↓ open down ↓ |
496 lines elided |
↑ open up ↑ |
1229 1280 (*cl_inet_listen)(
1230 1281 connp->conn_netstack->netstack_stackid,
1231 1282 IPPROTO_TCP, AF_INET,
1232 1283 (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
1233 1284 }
1234 1285 break;
1235 1286
1236 1287 case IPPROTO_SCTP:
1237 1288 ret = ipcl_sctp_hash_insert(connp, lport);
1238 1289 break;
1290 +
1291 + case IPPROTO_DCCP:
1292 + cmn_err(CE_NOTE, "ipcl_bind_insert_v4");
1293 + ASSERT(connp->conn_zoneid != ALL_ZONES);
1294 + connfp = &ipst->ips_ipcl_dccp_fanout[
1295 + IPCL_DCCP_HASH(lport, ipst)];
1296 + if (connp->conn_laddr_v4 != INADDR_ANY) {
1297 + IPCL_HASH_INSERT_BOUND(connfp, connp);
1298 + } else {
1299 + IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1300 + }
1301 + /* XXX:DCCP */
1302 + break;
1239 1303 }
1240 1304
1305 +
1241 1306 return (ret);
1242 1307 }
1243 1308
1244 1309 int
1245 1310 ipcl_bind_insert_v6(conn_t *connp)
1246 1311 {
1247 1312 connf_t *connfp;
1248 1313 int ret = 0;
1249 1314 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1250 1315 uint16_t lport = connp->conn_lport;
1251 1316 uint8_t protocol = connp->conn_proto;
1252 1317
1253 1318 if (IPCL_IS_IPTUN(connp)) {
1254 1319 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1255 1320 }
1256 1321
1257 1322 switch (protocol) {
1258 1323 default:
1259 1324 if (is_system_labeled() &&
1260 1325 check_exempt_conflict_v6(connp, ipst))
1261 1326 return (EADDRINUSE);
1262 1327 /* FALLTHROUGH */
1263 1328 case IPPROTO_UDP:
1264 1329 if (protocol == IPPROTO_UDP) {
1265 1330 connfp = &ipst->ips_ipcl_udp_fanout[
1266 1331 IPCL_UDP_HASH(lport, ipst)];
1267 1332 } else {
1268 1333 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1269 1334 }
1270 1335
1271 1336 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1272 1337 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1273 1338 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1274 1339 IPCL_HASH_INSERT_BOUND(connfp, connp);
1275 1340 } else {
1276 1341 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1277 1342 }
1278 1343 break;
1279 1344
1280 1345 case IPPROTO_TCP:
1281 1346 /* Insert it in the Bind Hash */
1282 1347 ASSERT(connp->conn_zoneid != ALL_ZONES);
1283 1348 connfp = &ipst->ips_ipcl_bind_fanout[
1284 1349 IPCL_BIND_HASH(lport, ipst)];
1285 1350 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1286 1351 IPCL_HASH_INSERT_BOUND(connfp, connp);
1287 1352 } else {
1288 1353 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1289 1354 }
1290 1355 if (cl_inet_listen != NULL) {
1291 1356 sa_family_t addr_family;
1292 1357 uint8_t *laddrp;
1293 1358
1294 1359 if (connp->conn_ipversion == IPV6_VERSION) {
1295 1360 addr_family = AF_INET6;
1296 1361 laddrp =
1297 1362 (uint8_t *)&connp->conn_bound_addr_v6;
1298 1363 } else {
1299 1364 addr_family = AF_INET;
1300 1365 laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
1301 1366 }
↓ open down ↓ |
51 lines elided |
↑ open up ↑ |
1302 1367 connp->conn_flags |= IPCL_CL_LISTENER;
1303 1368 (*cl_inet_listen)(
1304 1369 connp->conn_netstack->netstack_stackid,
1305 1370 IPPROTO_TCP, addr_family, laddrp, lport, NULL);
1306 1371 }
1307 1372 break;
1308 1373
1309 1374 case IPPROTO_SCTP:
1310 1375 ret = ipcl_sctp_hash_insert(connp, lport);
1311 1376 break;
1377 +
1378 + case IPPROTO_DCCP:
1379 + /* XXX:DCCP */
1380 + break;
1312 1381 }
1313 1382
1314 1383 return (ret);
1315 1384 }
1316 1385
1317 1386 /*
1318 1387 * ipcl_conn_hash insertion routines.
1319 1388 * The caller has already set conn_proto and the addresses/ports in the conn_t.
1320 1389 */
1321 1390
1322 1391 int
1323 1392 ipcl_conn_insert(conn_t *connp)
1324 1393 {
1325 1394 if (connp->conn_ipversion == IPV6_VERSION)
1326 1395 return (ipcl_conn_insert_v6(connp));
1327 1396 else
1328 1397 return (ipcl_conn_insert_v4(connp));
1329 1398 }
1330 1399
1331 1400 int
1332 1401 ipcl_conn_insert_v4(conn_t *connp)
1333 1402 {
1334 1403 connf_t *connfp;
1335 1404 conn_t *tconnp;
1336 1405 int ret = 0;
1337 1406 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1338 1407 uint16_t lport = connp->conn_lport;
1339 1408 uint8_t protocol = connp->conn_proto;
1340 1409
1341 1410 if (IPCL_IS_IPTUN(connp))
1342 1411 return (ipcl_iptun_hash_insert(connp, ipst));
1343 1412
1344 1413 switch (protocol) {
1345 1414 case IPPROTO_TCP:
1346 1415 /*
1347 1416 * For TCP, we check whether the connection tuple already
1348 1417 * exists before allowing the connection to proceed. We
1349 1418 * also allow indexing on the zoneid. This is to allow
1350 1419 * multiple shared stack zones to have the same tcp
1351 1420 * connection tuple. In practice this only happens for
1352 1421 * INADDR_LOOPBACK as it's the only local address which
1353 1422 * doesn't have to be unique.
1354 1423 */
1355 1424 connfp = &ipst->ips_ipcl_conn_fanout[
1356 1425 IPCL_CONN_HASH(connp->conn_faddr_v4,
1357 1426 connp->conn_ports, ipst)];
1358 1427 mutex_enter(&connfp->connf_lock);
1359 1428 for (tconnp = connfp->connf_head; tconnp != NULL;
1360 1429 tconnp = tconnp->conn_next) {
1361 1430 if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
1362 1431 connp->conn_faddr_v4, connp->conn_laddr_v4,
1363 1432 connp->conn_ports) &&
1364 1433 IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1365 1434 /* Already have a conn. bail out */
1366 1435 mutex_exit(&connfp->connf_lock);
1367 1436 return (EADDRINUSE);
1368 1437 }
1369 1438 }
1370 1439 if (connp->conn_fanout != NULL) {
1371 1440 /*
1372 1441 * Probably a XTI/TLI application trying to do a
1373 1442 * rebind. Let it happen.
1374 1443 */
1375 1444 mutex_exit(&connfp->connf_lock);
1376 1445 IPCL_HASH_REMOVE(connp);
1377 1446 mutex_enter(&connfp->connf_lock);
1378 1447 }
1379 1448
1380 1449 ASSERT(connp->conn_recv != NULL);
1381 1450 ASSERT(connp->conn_recvicmp != NULL);
1382 1451
1383 1452 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1384 1453 mutex_exit(&connfp->connf_lock);
1385 1454 break;
↓ open down ↓ |
64 lines elided |
↑ open up ↑ |
1386 1455
1387 1456 case IPPROTO_SCTP:
1388 1457 /*
1389 1458 * The raw socket may have already been bound, remove it
1390 1459 * from the hash first.
1391 1460 */
1392 1461 IPCL_HASH_REMOVE(connp);
1393 1462 ret = ipcl_sctp_hash_insert(connp, lport);
1394 1463 break;
1395 1464
1465 + case IPPROTO_DCCP:
1466 + cmn_err(CE_NOTE, "insert v4");
1467 +
1468 + connfp = &ipst->ips_ipcl_conn_fanout[
1469 + IPCL_CONN_HASH(connp->conn_faddr_v4,
1470 + connp->conn_ports, ipst)];
1471 + mutex_enter(&connfp->connf_lock);
1472 + IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1473 + mutex_exit(&connfp->connf_lock);
1474 + /* XXX:DCCP */
1475 + break;
1476 +
1396 1477 default:
1397 1478 /*
1398 1479 * Check for conflicts among MAC exempt bindings. For
1399 1480 * transports with port numbers, this is done by the upper
1400 1481 * level per-transport binding logic. For all others, it's
1401 1482 * done here.
1402 1483 */
1403 1484 if (is_system_labeled() &&
1404 1485 check_exempt_conflict_v4(connp, ipst))
1405 1486 return (EADDRINUSE);
1406 1487 /* FALLTHROUGH */
1407 1488
1408 1489 case IPPROTO_UDP:
1409 1490 if (protocol == IPPROTO_UDP) {
1410 1491 connfp = &ipst->ips_ipcl_udp_fanout[
1411 1492 IPCL_UDP_HASH(lport, ipst)];
1412 1493 } else {
1413 1494 connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1414 1495 }
1415 1496
1416 1497 if (connp->conn_faddr_v4 != INADDR_ANY) {
1417 1498 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1418 1499 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1419 1500 IPCL_HASH_INSERT_BOUND(connfp, connp);
1420 1501 } else {
1421 1502 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1422 1503 }
1423 1504 break;
1424 1505 }
1425 1506
1426 1507 return (ret);
1427 1508 }
1428 1509
1429 1510 int
1430 1511 ipcl_conn_insert_v6(conn_t *connp)
1431 1512 {
1432 1513 connf_t *connfp;
1433 1514 conn_t *tconnp;
1434 1515 int ret = 0;
1435 1516 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1436 1517 uint16_t lport = connp->conn_lport;
1437 1518 uint8_t protocol = connp->conn_proto;
1438 1519 uint_t ifindex = connp->conn_bound_if;
1439 1520
1440 1521 if (IPCL_IS_IPTUN(connp))
1441 1522 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1442 1523
1443 1524 switch (protocol) {
1444 1525 case IPPROTO_TCP:
1445 1526
1446 1527 /*
1447 1528 * For tcp, we check whether the connection tuple already
1448 1529 * exists before allowing the connection to proceed. We
1449 1530 * also allow indexing on the zoneid. This is to allow
1450 1531 * multiple shared stack zones to have the same tcp
1451 1532 * connection tuple. In practice this only happens for
1452 1533 * ipv6_loopback as it's the only local address which
1453 1534 * doesn't have to be unique.
1454 1535 */
1455 1536 connfp = &ipst->ips_ipcl_conn_fanout[
1456 1537 IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
1457 1538 ipst)];
1458 1539 mutex_enter(&connfp->connf_lock);
1459 1540 for (tconnp = connfp->connf_head; tconnp != NULL;
1460 1541 tconnp = tconnp->conn_next) {
1461 1542 /* NOTE: need to match zoneid. Bug in onnv-gate */
1462 1543 if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
1463 1544 connp->conn_faddr_v6, connp->conn_laddr_v6,
1464 1545 connp->conn_ports) &&
1465 1546 (tconnp->conn_bound_if == 0 ||
1466 1547 tconnp->conn_bound_if == ifindex) &&
1467 1548 IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1468 1549 /* Already have a conn. bail out */
1469 1550 mutex_exit(&connfp->connf_lock);
1470 1551 return (EADDRINUSE);
1471 1552 }
1472 1553 }
1473 1554 if (connp->conn_fanout != NULL) {
1474 1555 /*
1475 1556 * Probably a XTI/TLI application trying to do a
1476 1557 * rebind. Let it happen.
1477 1558 */
1478 1559 mutex_exit(&connfp->connf_lock);
1479 1560 IPCL_HASH_REMOVE(connp);
1480 1561 mutex_enter(&connfp->connf_lock);
↓ open down ↓ |
75 lines elided |
↑ open up ↑ |
1481 1562 }
1482 1563 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1483 1564 mutex_exit(&connfp->connf_lock);
1484 1565 break;
1485 1566
1486 1567 case IPPROTO_SCTP:
1487 1568 IPCL_HASH_REMOVE(connp);
1488 1569 ret = ipcl_sctp_hash_insert(connp, lport);
1489 1570 break;
1490 1571
1572 + case IPPROTO_DCCP:
1573 + /* XXX:DCCP */
1574 + break;
1575 +
1491 1576 default:
1492 1577 if (is_system_labeled() &&
1493 1578 check_exempt_conflict_v6(connp, ipst))
1494 1579 return (EADDRINUSE);
1495 1580 /* FALLTHROUGH */
1496 1581 case IPPROTO_UDP:
1497 1582 if (protocol == IPPROTO_UDP) {
1498 1583 connfp = &ipst->ips_ipcl_udp_fanout[
1499 1584 IPCL_UDP_HASH(lport, ipst)];
1500 1585 } else {
1501 1586 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1502 1587 }
1503 1588
1504 1589 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1505 1590 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1506 1591 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1507 1592 IPCL_HASH_INSERT_BOUND(connfp, connp);
1508 1593 } else {
1509 1594 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1510 1595 }
1511 1596 break;
1512 1597 }
1513 1598
1514 1599 return (ret);
1515 1600 }
1516 1601
1517 1602 /*
1518 1603 * v4 packet classifying function. looks up the fanout table to
1519 1604 * find the conn, the packet belongs to. returns the conn with
1520 1605 * the reference held, null otherwise.
1521 1606 *
1522 1607 * If zoneid is ALL_ZONES, then the search rules described in the "Connection
1523 1608 * Lookup" comment block are applied. Labels are also checked as described
1524 1609 * above. If the packet is from the inside (looped back), and is from the same
1525 1610 * zone, then label checks are omitted.
1526 1611 */
1527 1612 conn_t *
1528 1613 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1529 1614 ip_recv_attr_t *ira, ip_stack_t *ipst)
1530 1615 {
1531 1616 ipha_t *ipha;
1532 1617 connf_t *connfp, *bind_connfp;
1533 1618 uint16_t lport;
1534 1619 uint16_t fport;
1535 1620 uint32_t ports;
1536 1621 conn_t *connp;
1537 1622 uint16_t *up;
1538 1623 zoneid_t zoneid = ira->ira_zoneid;
1539 1624
1540 1625 ipha = (ipha_t *)mp->b_rptr;
1541 1626 up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
1542 1627
1543 1628 switch (protocol) {
1544 1629 case IPPROTO_TCP:
1545 1630 ports = *(uint32_t *)up;
1546 1631 connfp =
1547 1632 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1548 1633 ports, ipst)];
1549 1634 mutex_enter(&connfp->connf_lock);
1550 1635 for (connp = connfp->connf_head; connp != NULL;
1551 1636 connp = connp->conn_next) {
1552 1637 if (IPCL_CONN_MATCH(connp, protocol,
1553 1638 ipha->ipha_src, ipha->ipha_dst, ports) &&
1554 1639 (connp->conn_zoneid == zoneid ||
1555 1640 connp->conn_allzones ||
1556 1641 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1557 1642 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1558 1643 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1559 1644 break;
1560 1645 }
1561 1646
1562 1647 if (connp != NULL) {
1563 1648 /*
1564 1649 * We have a fully-bound TCP connection.
1565 1650 *
1566 1651 * For labeled systems, there's no need to check the
1567 1652 * label here. It's known to be good as we checked
1568 1653 * before allowing the connection to become bound.
1569 1654 */
1570 1655 CONN_INC_REF(connp);
1571 1656 mutex_exit(&connfp->connf_lock);
1572 1657 return (connp);
1573 1658 }
1574 1659
1575 1660 mutex_exit(&connfp->connf_lock);
1576 1661 lport = up[1];
1577 1662 bind_connfp =
1578 1663 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1579 1664 mutex_enter(&bind_connfp->connf_lock);
1580 1665 for (connp = bind_connfp->connf_head; connp != NULL;
1581 1666 connp = connp->conn_next) {
1582 1667 if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1583 1668 lport) &&
1584 1669 (connp->conn_zoneid == zoneid ||
1585 1670 connp->conn_allzones ||
1586 1671 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1587 1672 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1588 1673 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1589 1674 break;
1590 1675 }
1591 1676
1592 1677 /*
1593 1678 * If the matching connection is SLP on a private address, then
1594 1679 * the label on the packet must match the local zone's label.
1595 1680 * Otherwise, it must be in the label range defined by tnrh.
1596 1681 * This is ensured by tsol_receive_local.
1597 1682 *
1598 1683 * Note that we don't check tsol_receive_local for
1599 1684 * the connected case.
1600 1685 */
1601 1686 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1602 1687 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1603 1688 ira, connp)) {
1604 1689 DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
1605 1690 char *, "connp(1) could not receive mp(2)",
1606 1691 conn_t *, connp, mblk_t *, mp);
1607 1692 connp = NULL;
1608 1693 }
1609 1694
1610 1695 if (connp != NULL) {
1611 1696 /* Have a listener at least */
1612 1697 CONN_INC_REF(connp);
1613 1698 mutex_exit(&bind_connfp->connf_lock);
1614 1699 return (connp);
1615 1700 }
1616 1701
1617 1702 mutex_exit(&bind_connfp->connf_lock);
1618 1703 break;
1619 1704
1620 1705 case IPPROTO_UDP:
1621 1706 lport = up[1];
1622 1707 fport = up[0];
1623 1708 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1624 1709 mutex_enter(&connfp->connf_lock);
1625 1710 for (connp = connfp->connf_head; connp != NULL;
1626 1711 connp = connp->conn_next) {
1627 1712 if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
1628 1713 fport, ipha->ipha_src) &&
1629 1714 (connp->conn_zoneid == zoneid ||
1630 1715 connp->conn_allzones ||
1631 1716 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1632 1717 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
1633 1718 break;
1634 1719 }
1635 1720
1636 1721 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1637 1722 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1638 1723 ira, connp)) {
1639 1724 DTRACE_PROBE3(tx__ip__log__info__classify__udp,
1640 1725 char *, "connp(1) could not receive mp(2)",
1641 1726 conn_t *, connp, mblk_t *, mp);
1642 1727 connp = NULL;
1643 1728 }
1644 1729
1645 1730 if (connp != NULL) {
1646 1731 CONN_INC_REF(connp);
1647 1732 mutex_exit(&connfp->connf_lock);
↓ open down ↓ |
147 lines elided |
↑ open up ↑ |
1648 1733 return (connp);
1649 1734 }
1650 1735
1651 1736 /*
1652 1737 * We shouldn't come here for multicast/broadcast packets
1653 1738 */
1654 1739 mutex_exit(&connfp->connf_lock);
1655 1740
1656 1741 break;
1657 1742
1743 + case IPPROTO_DCCP:
1744 + fport = up[0];
1745 + lport = up[1];
1746 + connfp = &ipst->ips_ipcl_dccp_fanout[IPCL_DCCP_HASH(
1747 + lport, ipst)];
1748 + mutex_enter(&connfp->connf_lock);
1749 + for (connp = connfp->connf_head; connp != NULL;
1750 + connp = connp->conn_next) {
1751 + cmn_err(CE_NOTE, "connfp found");
1752 + /* XXX:DCCP */
1753 + if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
1754 + fport, ipha->ipha_src)) {
1755 + break;
1756 + }
1757 + }
1758 +
1759 + if (connp != NULL) {
1760 + CONN_INC_REF(connp);
1761 + mutex_exit(&connfp->connf_lock);
1762 + return (connp);
1763 + }
1764 +
1765 + mutex_exit(&connfp->connf_lock);
1766 + break;
1767 +
1658 1768 case IPPROTO_ENCAP:
1659 1769 case IPPROTO_IPV6:
1660 1770 return (ipcl_iptun_classify_v4(&ipha->ipha_src,
1661 1771 &ipha->ipha_dst, ipst));
1662 1772 }
1663 1773
1664 1774 return (NULL);
1665 1775 }
1666 1776
1667 1777 conn_t *
1668 1778 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1669 1779 ip_recv_attr_t *ira, ip_stack_t *ipst)
1670 1780 {
1671 1781 ip6_t *ip6h;
1672 1782 connf_t *connfp, *bind_connfp;
1673 1783 uint16_t lport;
1674 1784 uint16_t fport;
1675 1785 tcpha_t *tcpha;
1676 1786 uint32_t ports;
1677 1787 conn_t *connp;
1678 1788 uint16_t *up;
1679 1789 zoneid_t zoneid = ira->ira_zoneid;
1680 1790
1681 1791 ip6h = (ip6_t *)mp->b_rptr;
1682 1792
1683 1793 switch (protocol) {
1684 1794 case IPPROTO_TCP:
1685 1795 tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
1686 1796 up = &tcpha->tha_lport;
1687 1797 ports = *(uint32_t *)up;
1688 1798
1689 1799 connfp =
1690 1800 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1691 1801 ports, ipst)];
1692 1802 mutex_enter(&connfp->connf_lock);
1693 1803 for (connp = connfp->connf_head; connp != NULL;
1694 1804 connp = connp->conn_next) {
1695 1805 if (IPCL_CONN_MATCH_V6(connp, protocol,
1696 1806 ip6h->ip6_src, ip6h->ip6_dst, ports) &&
1697 1807 (connp->conn_zoneid == zoneid ||
1698 1808 connp->conn_allzones ||
1699 1809 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1700 1810 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1701 1811 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1702 1812 break;
1703 1813 }
1704 1814
1705 1815 if (connp != NULL) {
1706 1816 /*
1707 1817 * We have a fully-bound TCP connection.
1708 1818 *
1709 1819 * For labeled systems, there's no need to check the
1710 1820 * label here. It's known to be good as we checked
1711 1821 * before allowing the connection to become bound.
1712 1822 */
1713 1823 CONN_INC_REF(connp);
1714 1824 mutex_exit(&connfp->connf_lock);
1715 1825 return (connp);
1716 1826 }
1717 1827
1718 1828 mutex_exit(&connfp->connf_lock);
1719 1829
1720 1830 lport = up[1];
1721 1831 bind_connfp =
1722 1832 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1723 1833 mutex_enter(&bind_connfp->connf_lock);
1724 1834 for (connp = bind_connfp->connf_head; connp != NULL;
1725 1835 connp = connp->conn_next) {
1726 1836 if (IPCL_BIND_MATCH_V6(connp, protocol,
1727 1837 ip6h->ip6_dst, lport) &&
1728 1838 (connp->conn_zoneid == zoneid ||
1729 1839 connp->conn_allzones ||
1730 1840 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1731 1841 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1732 1842 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1733 1843 break;
1734 1844 }
1735 1845
1736 1846 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1737 1847 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1738 1848 ira, connp)) {
1739 1849 DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
1740 1850 char *, "connp(1) could not receive mp(2)",
1741 1851 conn_t *, connp, mblk_t *, mp);
1742 1852 connp = NULL;
1743 1853 }
1744 1854
1745 1855 if (connp != NULL) {
1746 1856 /* Have a listner at least */
1747 1857 CONN_INC_REF(connp);
1748 1858 mutex_exit(&bind_connfp->connf_lock);
1749 1859 return (connp);
1750 1860 }
1751 1861
1752 1862 mutex_exit(&bind_connfp->connf_lock);
1753 1863 break;
1754 1864
1755 1865 case IPPROTO_UDP:
1756 1866 up = (uint16_t *)&mp->b_rptr[hdr_len];
1757 1867 lport = up[1];
1758 1868 fport = up[0];
1759 1869 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1760 1870 mutex_enter(&connfp->connf_lock);
1761 1871 for (connp = connfp->connf_head; connp != NULL;
1762 1872 connp = connp->conn_next) {
1763 1873 if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
1764 1874 fport, ip6h->ip6_src) &&
1765 1875 (connp->conn_zoneid == zoneid ||
1766 1876 connp->conn_allzones ||
1767 1877 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1768 1878 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1769 1879 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1770 1880 break;
1771 1881 }
1772 1882
1773 1883 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1774 1884 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1775 1885 ira, connp)) {
1776 1886 DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
1777 1887 char *, "connp(1) could not receive mp(2)",
1778 1888 conn_t *, connp, mblk_t *, mp);
1779 1889 connp = NULL;
1780 1890 }
1781 1891
1782 1892 if (connp != NULL) {
1783 1893 CONN_INC_REF(connp);
1784 1894 mutex_exit(&connfp->connf_lock);
1785 1895 return (connp);
1786 1896 }
1787 1897
1788 1898 /*
1789 1899 * We shouldn't come here for multicast/broadcast packets
1790 1900 */
1791 1901 mutex_exit(&connfp->connf_lock);
1792 1902 break;
1793 1903 case IPPROTO_ENCAP:
1794 1904 case IPPROTO_IPV6:
1795 1905 return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
1796 1906 &ip6h->ip6_dst, ipst));
1797 1907 }
1798 1908
1799 1909 return (NULL);
1800 1910 }
1801 1911
1802 1912 /*
1803 1913 * wrapper around ipcl_classify_(v4,v6) routines.
1804 1914 */
1805 1915 conn_t *
1806 1916 ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
1807 1917 {
1808 1918 if (ira->ira_flags & IRAF_IS_IPV4) {
1809 1919 return (ipcl_classify_v4(mp, ira->ira_protocol,
1810 1920 ira->ira_ip_hdr_length, ira, ipst));
1811 1921 } else {
1812 1922 return (ipcl_classify_v6(mp, ira->ira_protocol,
1813 1923 ira->ira_ip_hdr_length, ira, ipst));
1814 1924 }
1815 1925 }
1816 1926
1817 1927 /*
1818 1928 * Only used to classify SCTP RAW sockets
1819 1929 */
1820 1930 conn_t *
1821 1931 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
1822 1932 ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
1823 1933 {
1824 1934 connf_t *connfp;
1825 1935 conn_t *connp;
1826 1936 in_port_t lport;
1827 1937 int ipversion;
1828 1938 const void *dst;
1829 1939 zoneid_t zoneid = ira->ira_zoneid;
1830 1940
1831 1941 lport = ((uint16_t *)&ports)[1];
1832 1942 if (ira->ira_flags & IRAF_IS_IPV4) {
1833 1943 dst = (const void *)&ipha->ipha_dst;
1834 1944 ipversion = IPV4_VERSION;
1835 1945 } else {
1836 1946 dst = (const void *)&ip6h->ip6_dst;
1837 1947 ipversion = IPV6_VERSION;
1838 1948 }
1839 1949
1840 1950 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1841 1951 mutex_enter(&connfp->connf_lock);
1842 1952 for (connp = connfp->connf_head; connp != NULL;
1843 1953 connp = connp->conn_next) {
1844 1954 /* We don't allow v4 fallback for v6 raw socket. */
1845 1955 if (ipversion != connp->conn_ipversion)
1846 1956 continue;
1847 1957 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1848 1958 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1849 1959 if (ipversion == IPV4_VERSION) {
1850 1960 if (!IPCL_CONN_MATCH(connp, protocol,
1851 1961 ipha->ipha_src, ipha->ipha_dst, ports))
1852 1962 continue;
1853 1963 } else {
1854 1964 if (!IPCL_CONN_MATCH_V6(connp, protocol,
1855 1965 ip6h->ip6_src, ip6h->ip6_dst, ports))
1856 1966 continue;
1857 1967 }
1858 1968 } else {
1859 1969 if (ipversion == IPV4_VERSION) {
1860 1970 if (!IPCL_BIND_MATCH(connp, protocol,
1861 1971 ipha->ipha_dst, lport))
1862 1972 continue;
1863 1973 } else {
1864 1974 if (!IPCL_BIND_MATCH_V6(connp, protocol,
1865 1975 ip6h->ip6_dst, lport))
1866 1976 continue;
1867 1977 }
1868 1978 }
1869 1979
1870 1980 if (connp->conn_zoneid == zoneid ||
1871 1981 connp->conn_allzones ||
1872 1982 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1873 1983 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1874 1984 (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
1875 1985 break;
1876 1986 }
1877 1987
1878 1988 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1879 1989 !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
1880 1990 DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
1881 1991 char *, "connp(1) could not receive mp(2)",
1882 1992 conn_t *, connp, mblk_t *, mp);
1883 1993 connp = NULL;
1884 1994 }
1885 1995
1886 1996 if (connp != NULL)
1887 1997 goto found;
1888 1998 mutex_exit(&connfp->connf_lock);
1889 1999
1890 2000 /* Try to look for a wildcard SCTP RAW socket match. */
1891 2001 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
1892 2002 mutex_enter(&connfp->connf_lock);
1893 2003 for (connp = connfp->connf_head; connp != NULL;
1894 2004 connp = connp->conn_next) {
1895 2005 /* We don't allow v4 fallback for v6 raw socket. */
1896 2006 if (ipversion != connp->conn_ipversion)
1897 2007 continue;
1898 2008 if (!IPCL_ZONE_MATCH(connp, zoneid))
1899 2009 continue;
1900 2010
1901 2011 if (ipversion == IPV4_VERSION) {
1902 2012 if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
1903 2013 break;
1904 2014 } else {
1905 2015 if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
1906 2016 break;
1907 2017 }
1908 2018 }
1909 2019 }
1910 2020
1911 2021 if (connp != NULL)
1912 2022 goto found;
1913 2023
1914 2024 mutex_exit(&connfp->connf_lock);
1915 2025 return (NULL);
1916 2026
1917 2027 found:
1918 2028 ASSERT(connp != NULL);
1919 2029 CONN_INC_REF(connp);
1920 2030 mutex_exit(&connfp->connf_lock);
1921 2031 return (connp);
1922 2032 }
1923 2033
1924 2034 /* ARGSUSED */
1925 2035 static int
1926 2036 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
1927 2037 {
1928 2038 itc_t *itc = (itc_t *)buf;
1929 2039 conn_t *connp = &itc->itc_conn;
1930 2040 tcp_t *tcp = (tcp_t *)&itc[1];
1931 2041
1932 2042 bzero(connp, sizeof (conn_t));
1933 2043 bzero(tcp, sizeof (tcp_t));
1934 2044
1935 2045 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1936 2046 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1937 2047 cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
1938 2048 tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
1939 2049 if (tcp->tcp_timercache == NULL)
1940 2050 return (ENOMEM);
1941 2051 connp->conn_tcp = tcp;
1942 2052 connp->conn_flags = IPCL_TCPCONN;
1943 2053 connp->conn_proto = IPPROTO_TCP;
1944 2054 tcp->tcp_connp = connp;
1945 2055 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1946 2056
1947 2057 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1948 2058 if (connp->conn_ixa == NULL) {
1949 2059 tcp_timermp_free(tcp);
1950 2060 return (ENOMEM);
1951 2061 }
1952 2062 connp->conn_ixa->ixa_refcnt = 1;
1953 2063 connp->conn_ixa->ixa_protocol = connp->conn_proto;
1954 2064 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
1955 2065 return (0);
1956 2066 }
1957 2067
1958 2068 /* ARGSUSED */
1959 2069 static void
1960 2070 tcp_conn_destructor(void *buf, void *cdrarg)
1961 2071 {
1962 2072 itc_t *itc = (itc_t *)buf;
1963 2073 conn_t *connp = &itc->itc_conn;
1964 2074 tcp_t *tcp = (tcp_t *)&itc[1];
1965 2075
1966 2076 ASSERT(connp->conn_flags & IPCL_TCPCONN);
1967 2077 ASSERT(tcp->tcp_connp == connp);
1968 2078 ASSERT(connp->conn_tcp == tcp);
1969 2079 tcp_timermp_free(tcp);
1970 2080 mutex_destroy(&connp->conn_lock);
1971 2081 cv_destroy(&connp->conn_cv);
1972 2082 cv_destroy(&connp->conn_sq_cv);
1973 2083 rw_destroy(&connp->conn_ilg_lock);
1974 2084
1975 2085 /* Can be NULL if constructor failed */
1976 2086 if (connp->conn_ixa != NULL) {
1977 2087 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
1978 2088 ASSERT(connp->conn_ixa->ixa_ire == NULL);
1979 2089 ASSERT(connp->conn_ixa->ixa_nce == NULL);
1980 2090 ixa_refrele(connp->conn_ixa);
1981 2091 }
1982 2092 }
1983 2093
1984 2094 /* ARGSUSED */
1985 2095 static int
1986 2096 ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
1987 2097 {
1988 2098 itc_t *itc = (itc_t *)buf;
1989 2099 conn_t *connp = &itc->itc_conn;
1990 2100
1991 2101 bzero(connp, sizeof (conn_t));
1992 2102 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1993 2103 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1994 2104 connp->conn_flags = IPCL_IPCCONN;
1995 2105 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1996 2106
1997 2107 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1998 2108 if (connp->conn_ixa == NULL)
1999 2109 return (ENOMEM);
2000 2110 connp->conn_ixa->ixa_refcnt = 1;
2001 2111 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2002 2112 return (0);
2003 2113 }
2004 2114
2005 2115 /* ARGSUSED */
2006 2116 static void
2007 2117 ip_conn_destructor(void *buf, void *cdrarg)
2008 2118 {
2009 2119 itc_t *itc = (itc_t *)buf;
2010 2120 conn_t *connp = &itc->itc_conn;
2011 2121
2012 2122 ASSERT(connp->conn_flags & IPCL_IPCCONN);
2013 2123 ASSERT(connp->conn_priv == NULL);
2014 2124 mutex_destroy(&connp->conn_lock);
2015 2125 cv_destroy(&connp->conn_cv);
2016 2126 rw_destroy(&connp->conn_ilg_lock);
2017 2127
2018 2128 /* Can be NULL if constructor failed */
2019 2129 if (connp->conn_ixa != NULL) {
2020 2130 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2021 2131 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2022 2132 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2023 2133 ixa_refrele(connp->conn_ixa);
2024 2134 }
2025 2135 }
2026 2136
2027 2137 /* ARGSUSED */
2028 2138 static int
2029 2139 udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2030 2140 {
2031 2141 itc_t *itc = (itc_t *)buf;
2032 2142 conn_t *connp = &itc->itc_conn;
2033 2143 udp_t *udp = (udp_t *)&itc[1];
2034 2144
2035 2145 bzero(connp, sizeof (conn_t));
2036 2146 bzero(udp, sizeof (udp_t));
2037 2147
2038 2148 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2039 2149 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2040 2150 connp->conn_udp = udp;
2041 2151 connp->conn_flags = IPCL_UDPCONN;
2042 2152 connp->conn_proto = IPPROTO_UDP;
2043 2153 udp->udp_connp = connp;
2044 2154 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2045 2155 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2046 2156 if (connp->conn_ixa == NULL)
2047 2157 return (ENOMEM);
2048 2158 connp->conn_ixa->ixa_refcnt = 1;
2049 2159 connp->conn_ixa->ixa_protocol = connp->conn_proto;
2050 2160 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2051 2161 return (0);
2052 2162 }
2053 2163
2054 2164 /* ARGSUSED */
2055 2165 static void
2056 2166 udp_conn_destructor(void *buf, void *cdrarg)
2057 2167 {
2058 2168 itc_t *itc = (itc_t *)buf;
2059 2169 conn_t *connp = &itc->itc_conn;
2060 2170 udp_t *udp = (udp_t *)&itc[1];
2061 2171
2062 2172 ASSERT(connp->conn_flags & IPCL_UDPCONN);
2063 2173 ASSERT(udp->udp_connp == connp);
2064 2174 ASSERT(connp->conn_udp == udp);
2065 2175 mutex_destroy(&connp->conn_lock);
2066 2176 cv_destroy(&connp->conn_cv);
2067 2177 rw_destroy(&connp->conn_ilg_lock);
2068 2178
2069 2179 /* Can be NULL if constructor failed */
2070 2180 if (connp->conn_ixa != NULL) {
2071 2181 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2072 2182 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2073 2183 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2074 2184 ixa_refrele(connp->conn_ixa);
2075 2185 }
2076 2186 }
2077 2187
2078 2188 /* ARGSUSED */
2079 2189 static int
2080 2190 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2081 2191 {
2082 2192 itc_t *itc = (itc_t *)buf;
2083 2193 conn_t *connp = &itc->itc_conn;
2084 2194 icmp_t *icmp = (icmp_t *)&itc[1];
2085 2195
2086 2196 bzero(connp, sizeof (conn_t));
2087 2197 bzero(icmp, sizeof (icmp_t));
2088 2198
2089 2199 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2090 2200 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2091 2201 connp->conn_icmp = icmp;
2092 2202 connp->conn_flags = IPCL_RAWIPCONN;
2093 2203 connp->conn_proto = IPPROTO_ICMP;
2094 2204 icmp->icmp_connp = connp;
2095 2205 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2096 2206 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2097 2207 if (connp->conn_ixa == NULL)
2098 2208 return (ENOMEM);
2099 2209 connp->conn_ixa->ixa_refcnt = 1;
2100 2210 connp->conn_ixa->ixa_protocol = connp->conn_proto;
2101 2211 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2102 2212 return (0);
2103 2213 }
2104 2214
2105 2215 /* ARGSUSED */
2106 2216 static void
2107 2217 rawip_conn_destructor(void *buf, void *cdrarg)
2108 2218 {
2109 2219 itc_t *itc = (itc_t *)buf;
2110 2220 conn_t *connp = &itc->itc_conn;
2111 2221 icmp_t *icmp = (icmp_t *)&itc[1];
2112 2222
2113 2223 ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2114 2224 ASSERT(icmp->icmp_connp == connp);
2115 2225 ASSERT(connp->conn_icmp == icmp);
2116 2226 mutex_destroy(&connp->conn_lock);
2117 2227 cv_destroy(&connp->conn_cv);
2118 2228 rw_destroy(&connp->conn_ilg_lock);
2119 2229
2120 2230 /* Can be NULL if constructor failed */
2121 2231 if (connp->conn_ixa != NULL) {
2122 2232 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2123 2233 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2124 2234 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2125 2235 ixa_refrele(connp->conn_ixa);
2126 2236 }
2127 2237 }
2128 2238
2129 2239 /* ARGSUSED */
2130 2240 static int
2131 2241 rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
2132 2242 {
2133 2243 itc_t *itc = (itc_t *)buf;
2134 2244 conn_t *connp = &itc->itc_conn;
2135 2245 rts_t *rts = (rts_t *)&itc[1];
2136 2246
2137 2247 bzero(connp, sizeof (conn_t));
2138 2248 bzero(rts, sizeof (rts_t));
2139 2249
2140 2250 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2141 2251 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2142 2252 connp->conn_rts = rts;
2143 2253 connp->conn_flags = IPCL_RTSCONN;
2144 2254 rts->rts_connp = connp;
2145 2255 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2146 2256 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2147 2257 if (connp->conn_ixa == NULL)
2148 2258 return (ENOMEM);
2149 2259 connp->conn_ixa->ixa_refcnt = 1;
2150 2260 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2151 2261 return (0);
2152 2262 }
2153 2263
2154 2264 /* ARGSUSED */
2155 2265 static void
2156 2266 rts_conn_destructor(void *buf, void *cdrarg)
2157 2267 {
2158 2268 itc_t *itc = (itc_t *)buf;
2159 2269 conn_t *connp = &itc->itc_conn;
2160 2270 rts_t *rts = (rts_t *)&itc[1];
2161 2271
2162 2272 ASSERT(connp->conn_flags & IPCL_RTSCONN);
2163 2273 ASSERT(rts->rts_connp == connp);
2164 2274 ASSERT(connp->conn_rts == rts);
2165 2275 mutex_destroy(&connp->conn_lock);
2166 2276 cv_destroy(&connp->conn_cv);
2167 2277 rw_destroy(&connp->conn_ilg_lock);
↓ open down ↓ |
500 lines elided |
↑ open up ↑ |
2168 2278
2169 2279 /* Can be NULL if constructor failed */
2170 2280 if (connp->conn_ixa != NULL) {
2171 2281 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2172 2282 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2173 2283 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2174 2284 ixa_refrele(connp->conn_ixa);
2175 2285 }
2176 2286 }
2177 2287
2288 +/* ARGSUSED */
2289 +static int
2290 +dccp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2291 +{
2292 + itc_t *itc = (itc_t *)buf;
2293 + conn_t *connp = &itc->itc_conn;
2294 + dccp_t *dccp = (dccp_t *)&itc[1];
2295 +
2296 + bzero(connp, sizeof (conn_t));
2297 + bzero(dccp, sizeof (dccp_t));
2298 +
2299 + mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2300 + cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2301 + rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2302 +
2303 + connp->conn_dccp = dccp;
2304 + connp->conn_flags = IPCL_DCCPCONN;
2305 + connp->conn_proto = IPPROTO_DCCP;
2306 + dccp->dccp_connp = connp;
2307 + connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2308 + if (connp->conn_ixa == NULL)
2309 + return (NULL);
2310 + connp->conn_ixa->ixa_refcnt = 1;
2311 + connp->conn_ixa->ixa_protocol = connp->conn_proto;
2312 + connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2313 +
2314 + return (0);
2315 +}
2316 +
2317 +/* ARGSUSED */
2318 +static void
2319 +dccp_conn_destructor(void *buf, void *cdrarg)
2320 +{
2321 + itc_t *itc = (itc_t *)buf;
2322 + conn_t *connp = &itc->itc_conn;
2323 + dccp_t *dccp = (dccp_t *)&itc[1];
2324 +
2325 + ASSERT(connp->conn_flags & IPCL_DCCPCONN);
2326 + ASSERT(dccp->dccp_connp == connp);
2327 + ASSERT(connp->conn_dccp == dccp);
2328 +
2329 + mutex_destroy(&connp->conn_lock);
2330 + cv_destroy(&connp->conn_cv);
2331 + rw_destroy(&connp->conn_ilg_lock);
2332 +
2333 + if (connp->conn_ixa != NULL) {
2334 + ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2335 + ASSERT(connp->conn_ixa->ixa_ire == NULL);
2336 + ASSERT(connp->conn_ixa->ixa_nce == NULL);
2337 +
2338 + ixa_refrele(connp->conn_ixa);
2339 + }
2340 +}
2341 +
2178 2342 /*
2179 2343 * Called as part of ipcl_conn_destroy to assert and clear any pointers
2180 2344 * in the conn_t.
2181 2345 *
2182 2346 * Below we list all the pointers in the conn_t as a documentation aid.
2183 2347 * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
2184 2348 * If you add any pointers to the conn_t please add an ASSERT here
2185 2349 * and #ifdef it out if it can't be actually asserted to be NULL.
2186 2350 * In any case, we bzero most of the conn_t at the end of the function.
2187 2351 */
2188 2352 void
2189 2353 ipcl_conn_cleanup(conn_t *connp)
2190 2354 {
2191 2355 ip_xmit_attr_t *ixa;
2192 2356
2193 2357 ASSERT(connp->conn_latch == NULL);
2194 2358 ASSERT(connp->conn_latch_in_policy == NULL);
2195 2359 ASSERT(connp->conn_latch_in_action == NULL);
2196 2360 #ifdef notdef
2197 2361 ASSERT(connp->conn_rq == NULL);
2198 2362 ASSERT(connp->conn_wq == NULL);
2199 2363 #endif
2200 2364 ASSERT(connp->conn_cred == NULL);
2201 2365 ASSERT(connp->conn_g_fanout == NULL);
2202 2366 ASSERT(connp->conn_g_next == NULL);
2203 2367 ASSERT(connp->conn_g_prev == NULL);
2204 2368 ASSERT(connp->conn_policy == NULL);
2205 2369 ASSERT(connp->conn_fanout == NULL);
2206 2370 ASSERT(connp->conn_next == NULL);
2207 2371 ASSERT(connp->conn_prev == NULL);
2208 2372 ASSERT(connp->conn_oper_pending_ill == NULL);
2209 2373 ASSERT(connp->conn_ilg == NULL);
2210 2374 ASSERT(connp->conn_drain_next == NULL);
2211 2375 ASSERT(connp->conn_drain_prev == NULL);
2212 2376 #ifdef notdef
2213 2377 /* conn_idl is not cleared when removed from idl list */
2214 2378 ASSERT(connp->conn_idl == NULL);
2215 2379 #endif
2216 2380 ASSERT(connp->conn_ipsec_opt_mp == NULL);
2217 2381 #ifdef notdef
2218 2382 /* conn_netstack is cleared by the caller; needed by ixa_cleanup */
2219 2383 ASSERT(connp->conn_netstack == NULL);
2220 2384 #endif
2221 2385
2222 2386 ASSERT(connp->conn_helper_info == NULL);
2223 2387 ASSERT(connp->conn_ixa != NULL);
2224 2388 ixa = connp->conn_ixa;
2225 2389 ASSERT(ixa->ixa_refcnt == 1);
2226 2390 /* Need to preserve ixa_protocol */
2227 2391 ixa_cleanup(ixa);
2228 2392 ixa->ixa_flags = 0;
2229 2393
2230 2394 /* Clear out the conn_t fields that are not preserved */
2231 2395 bzero(&connp->conn_start_clr,
2232 2396 sizeof (conn_t) -
2233 2397 ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
2234 2398 }
2235 2399
2236 2400 /*
2237 2401 * All conns are inserted in a global multi-list for the benefit of
2238 2402 * walkers. The walk is guaranteed to walk all open conns at the time
2239 2403 * of the start of the walk exactly once. This property is needed to
2240 2404 * achieve some cleanups during unplumb of interfaces. This is achieved
2241 2405 * as follows.
2242 2406 *
2243 2407 * ipcl_conn_create and ipcl_conn_destroy are the only functions that
2244 2408 * call the insert and delete functions below at creation and deletion
2245 2409 * time respectively. The conn never moves or changes its position in this
2246 2410 * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
2247 2411 * won't increase due to walkers, once the conn deletion has started. Note
2248 2412 * that we can't remove the conn from the global list and then wait for
2249 2413 * the refcnt to drop to zero, since walkers would then see a truncated
2250 2414 * list. CONN_INCIPIENT ensures that walkers don't start looking at
2251 2415 * conns until ip_open is ready to make them globally visible.
2252 2416 * The global round robin multi-list locks are held only to get the
2253 2417 * next member/insertion/deletion and contention should be negligible
2254 2418 * if the multi-list is much greater than the number of cpus.
2255 2419 */
2256 2420 void
2257 2421 ipcl_globalhash_insert(conn_t *connp)
2258 2422 {
2259 2423 int index;
2260 2424 struct connf_s *connfp;
2261 2425 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
2262 2426
2263 2427 /*
2264 2428 * No need for atomic here. Approximate even distribution
2265 2429 * in the global lists is sufficient.
2266 2430 */
2267 2431 ipst->ips_conn_g_index++;
2268 2432 index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
2269 2433
2270 2434 connp->conn_g_prev = NULL;
2271 2435 /*
2272 2436 * Mark as INCIPIENT, so that walkers will ignore this
2273 2437 * for now, till ip_open is ready to make it visible globally.
2274 2438 */
2275 2439 connp->conn_state_flags |= CONN_INCIPIENT;
2276 2440
2277 2441 connfp = &ipst->ips_ipcl_globalhash_fanout[index];
2278 2442 /* Insert at the head of the list */
2279 2443 mutex_enter(&connfp->connf_lock);
2280 2444 connp->conn_g_next = connfp->connf_head;
2281 2445 if (connp->conn_g_next != NULL)
2282 2446 connp->conn_g_next->conn_g_prev = connp;
2283 2447 connfp->connf_head = connp;
2284 2448
2285 2449 /* The fanout bucket this conn points to */
2286 2450 connp->conn_g_fanout = connfp;
2287 2451
2288 2452 mutex_exit(&connfp->connf_lock);
2289 2453 }
2290 2454
2291 2455 void
2292 2456 ipcl_globalhash_remove(conn_t *connp)
2293 2457 {
2294 2458 struct connf_s *connfp;
2295 2459
2296 2460 /*
2297 2461 * We were never inserted in the global multi list.
2298 2462 * IPCL_NONE variety is never inserted in the global multilist
2299 2463 * since it is presumed to not need any cleanup and is transient.
2300 2464 */
2301 2465 if (connp->conn_g_fanout == NULL)
2302 2466 return;
2303 2467
2304 2468 connfp = connp->conn_g_fanout;
2305 2469 mutex_enter(&connfp->connf_lock);
2306 2470 if (connp->conn_g_prev != NULL)
2307 2471 connp->conn_g_prev->conn_g_next = connp->conn_g_next;
2308 2472 else
2309 2473 connfp->connf_head = connp->conn_g_next;
2310 2474 if (connp->conn_g_next != NULL)
2311 2475 connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2312 2476 mutex_exit(&connfp->connf_lock);
2313 2477
2314 2478 /* Better to stumble on a null pointer than to corrupt memory */
2315 2479 connp->conn_g_next = NULL;
2316 2480 connp->conn_g_prev = NULL;
2317 2481 connp->conn_g_fanout = NULL;
2318 2482 }
2319 2483
2320 2484 /*
2321 2485 * Walk the list of all conn_t's in the system, calling the function provided
2322 2486 * With the specified argument for each.
2323 2487 * Applies to both IPv4 and IPv6.
2324 2488 *
2325 2489 * CONNs may hold pointers to ills (conn_dhcpinit_ill and
2326 2490 * conn_oper_pending_ill). To guard against stale pointers
2327 2491 * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
2328 2492 * unplumbed or removed. New conn_t's that are created while we are walking
2329 2493 * may be missed by this walk, because they are not necessarily inserted
2330 2494 * at the tail of the list. They are new conn_t's and thus don't have any
2331 2495 * stale pointers. The CONN_CLOSING flag ensures that no new reference
2332 2496 * is created to the struct that is going away.
2333 2497 */
2334 2498 void
2335 2499 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
2336 2500 {
2337 2501 int i;
2338 2502 conn_t *connp;
2339 2503 conn_t *prev_connp;
2340 2504
2341 2505 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2342 2506 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2343 2507 prev_connp = NULL;
2344 2508 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
2345 2509 while (connp != NULL) {
2346 2510 mutex_enter(&connp->conn_lock);
2347 2511 if (connp->conn_state_flags &
2348 2512 (CONN_CONDEMNED | CONN_INCIPIENT)) {
2349 2513 mutex_exit(&connp->conn_lock);
2350 2514 connp = connp->conn_g_next;
2351 2515 continue;
2352 2516 }
2353 2517 CONN_INC_REF_LOCKED(connp);
2354 2518 mutex_exit(&connp->conn_lock);
2355 2519 mutex_exit(
2356 2520 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2357 2521 (*func)(connp, arg);
2358 2522 if (prev_connp != NULL)
2359 2523 CONN_DEC_REF(prev_connp);
2360 2524 mutex_enter(
2361 2525 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2362 2526 prev_connp = connp;
2363 2527 connp = connp->conn_g_next;
2364 2528 }
2365 2529 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2366 2530 if (prev_connp != NULL)
2367 2531 CONN_DEC_REF(prev_connp);
2368 2532 }
2369 2533 }
2370 2534
2371 2535 /*
2372 2536 * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
2373 2537 * the {src, dst, lport, fport} quadruplet. Returns with conn reference
2374 2538 * held; caller must call CONN_DEC_REF. Only checks for connected entries
2375 2539 * (peer tcp in ESTABLISHED state).
2376 2540 */
2377 2541 conn_t *
2378 2542 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
2379 2543 ip_stack_t *ipst)
2380 2544 {
2381 2545 uint32_t ports;
2382 2546 uint16_t *pports = (uint16_t *)&ports;
2383 2547 connf_t *connfp;
2384 2548 conn_t *tconnp;
2385 2549 boolean_t zone_chk;
2386 2550
2387 2551 /*
2388 2552 * If either the source of destination address is loopback, then
2389 2553 * both endpoints must be in the same Zone. Otherwise, both of
2390 2554 * the addresses are system-wide unique (tcp is in ESTABLISHED
2391 2555 * state) and the endpoints may reside in different Zones.
2392 2556 */
2393 2557 zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
2394 2558 ipha->ipha_dst == htonl(INADDR_LOOPBACK));
2395 2559
2396 2560 pports[0] = tcpha->tha_fport;
2397 2561 pports[1] = tcpha->tha_lport;
2398 2562
2399 2563 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2400 2564 ports, ipst)];
2401 2565
2402 2566 mutex_enter(&connfp->connf_lock);
2403 2567 for (tconnp = connfp->connf_head; tconnp != NULL;
2404 2568 tconnp = tconnp->conn_next) {
2405 2569
2406 2570 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2407 2571 ipha->ipha_dst, ipha->ipha_src, ports) &&
2408 2572 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2409 2573 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2410 2574
2411 2575 ASSERT(tconnp != connp);
2412 2576 CONN_INC_REF(tconnp);
2413 2577 mutex_exit(&connfp->connf_lock);
2414 2578 return (tconnp);
2415 2579 }
2416 2580 }
2417 2581 mutex_exit(&connfp->connf_lock);
2418 2582 return (NULL);
2419 2583 }
2420 2584
2421 2585 /*
2422 2586 * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
2423 2587 * the {src, dst, lport, fport} quadruplet. Returns with conn reference
2424 2588 * held; caller must call CONN_DEC_REF. Only checks for connected entries
2425 2589 * (peer tcp in ESTABLISHED state).
2426 2590 */
2427 2591 conn_t *
2428 2592 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
2429 2593 ip_stack_t *ipst)
2430 2594 {
2431 2595 uint32_t ports;
2432 2596 uint16_t *pports = (uint16_t *)&ports;
2433 2597 connf_t *connfp;
2434 2598 conn_t *tconnp;
2435 2599 boolean_t zone_chk;
2436 2600
2437 2601 /*
2438 2602 * If either the source of destination address is loopback, then
2439 2603 * both endpoints must be in the same Zone. Otherwise, both of
2440 2604 * the addresses are system-wide unique (tcp is in ESTABLISHED
2441 2605 * state) and the endpoints may reside in different Zones. We
2442 2606 * don't do Zone check for link local address(es) because the
2443 2607 * current Zone implementation treats each link local address as
2444 2608 * being unique per system node, i.e. they belong to global Zone.
2445 2609 */
2446 2610 zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
2447 2611 IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
2448 2612
2449 2613 pports[0] = tcpha->tha_fport;
2450 2614 pports[1] = tcpha->tha_lport;
2451 2615
2452 2616 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2453 2617 ports, ipst)];
2454 2618
2455 2619 mutex_enter(&connfp->connf_lock);
2456 2620 for (tconnp = connfp->connf_head; tconnp != NULL;
2457 2621 tconnp = tconnp->conn_next) {
2458 2622
2459 2623 /* We skip conn_bound_if check here as this is loopback tcp */
2460 2624 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2461 2625 ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2462 2626 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2463 2627 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2464 2628
2465 2629 ASSERT(tconnp != connp);
2466 2630 CONN_INC_REF(tconnp);
2467 2631 mutex_exit(&connfp->connf_lock);
2468 2632 return (tconnp);
2469 2633 }
2470 2634 }
2471 2635 mutex_exit(&connfp->connf_lock);
2472 2636 return (NULL);
2473 2637 }
2474 2638
2475 2639 /*
2476 2640 * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2477 2641 * Returns with conn reference held. Caller must call CONN_DEC_REF.
2478 2642 * Only checks for connected entries i.e. no INADDR_ANY checks.
2479 2643 */
2480 2644 conn_t *
2481 2645 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
2482 2646 ip_stack_t *ipst)
2483 2647 {
2484 2648 uint32_t ports;
2485 2649 uint16_t *pports;
2486 2650 connf_t *connfp;
2487 2651 conn_t *tconnp;
2488 2652
2489 2653 pports = (uint16_t *)&ports;
2490 2654 pports[0] = tcpha->tha_fport;
2491 2655 pports[1] = tcpha->tha_lport;
2492 2656
2493 2657 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2494 2658 ports, ipst)];
2495 2659
2496 2660 mutex_enter(&connfp->connf_lock);
2497 2661 for (tconnp = connfp->connf_head; tconnp != NULL;
2498 2662 tconnp = tconnp->conn_next) {
2499 2663
2500 2664 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2501 2665 ipha->ipha_dst, ipha->ipha_src, ports) &&
2502 2666 tconnp->conn_tcp->tcp_state >= min_state) {
2503 2667
2504 2668 CONN_INC_REF(tconnp);
2505 2669 mutex_exit(&connfp->connf_lock);
2506 2670 return (tconnp);
2507 2671 }
2508 2672 }
2509 2673 mutex_exit(&connfp->connf_lock);
2510 2674 return (NULL);
2511 2675 }
2512 2676
2513 2677 /*
2514 2678 * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2515 2679 * Returns with conn reference held. Caller must call CONN_DEC_REF.
2516 2680 * Only checks for connected entries i.e. no INADDR_ANY checks.
2517 2681 * Match on ifindex in addition to addresses.
2518 2682 */
2519 2683 conn_t *
2520 2684 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2521 2685 uint_t ifindex, ip_stack_t *ipst)
2522 2686 {
2523 2687 tcp_t *tcp;
2524 2688 uint32_t ports;
2525 2689 uint16_t *pports;
2526 2690 connf_t *connfp;
2527 2691 conn_t *tconnp;
2528 2692
2529 2693 pports = (uint16_t *)&ports;
2530 2694 pports[0] = tcpha->tha_fport;
2531 2695 pports[1] = tcpha->tha_lport;
2532 2696
2533 2697 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2534 2698 ports, ipst)];
2535 2699
2536 2700 mutex_enter(&connfp->connf_lock);
2537 2701 for (tconnp = connfp->connf_head; tconnp != NULL;
2538 2702 tconnp = tconnp->conn_next) {
2539 2703
2540 2704 tcp = tconnp->conn_tcp;
2541 2705 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2542 2706 ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2543 2707 tcp->tcp_state >= min_state &&
2544 2708 (tconnp->conn_bound_if == 0 ||
2545 2709 tconnp->conn_bound_if == ifindex)) {
2546 2710
2547 2711 CONN_INC_REF(tconnp);
2548 2712 mutex_exit(&connfp->connf_lock);
2549 2713 return (tconnp);
2550 2714 }
2551 2715 }
2552 2716 mutex_exit(&connfp->connf_lock);
2553 2717 return (NULL);
2554 2718 }
2555 2719
2556 2720 /*
2557 2721 * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
2558 2722 * a listener when changing state.
2559 2723 */
2560 2724 conn_t *
2561 2725 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2562 2726 ip_stack_t *ipst)
2563 2727 {
2564 2728 connf_t *bind_connfp;
2565 2729 conn_t *connp;
2566 2730 tcp_t *tcp;
2567 2731
2568 2732 /*
2569 2733 * Avoid false matches for packets sent to an IP destination of
2570 2734 * all zeros.
2571 2735 */
2572 2736 if (laddr == 0)
2573 2737 return (NULL);
2574 2738
2575 2739 ASSERT(zoneid != ALL_ZONES);
2576 2740
2577 2741 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2578 2742 mutex_enter(&bind_connfp->connf_lock);
2579 2743 for (connp = bind_connfp->connf_head; connp != NULL;
2580 2744 connp = connp->conn_next) {
2581 2745 tcp = connp->conn_tcp;
2582 2746 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
2583 2747 IPCL_ZONE_MATCH(connp, zoneid) &&
2584 2748 (tcp->tcp_listener == NULL)) {
2585 2749 CONN_INC_REF(connp);
2586 2750 mutex_exit(&bind_connfp->connf_lock);
2587 2751 return (connp);
2588 2752 }
2589 2753 }
2590 2754 mutex_exit(&bind_connfp->connf_lock);
2591 2755 return (NULL);
2592 2756 }
2593 2757
2594 2758 /*
2595 2759 * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
2596 2760 * a listener when changing state.
2597 2761 */
2598 2762 conn_t *
2599 2763 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2600 2764 zoneid_t zoneid, ip_stack_t *ipst)
2601 2765 {
2602 2766 connf_t *bind_connfp;
2603 2767 conn_t *connp = NULL;
2604 2768 tcp_t *tcp;
2605 2769
2606 2770 /*
2607 2771 * Avoid false matches for packets sent to an IP destination of
2608 2772 * all zeros.
2609 2773 */
2610 2774 if (IN6_IS_ADDR_UNSPECIFIED(laddr))
2611 2775 return (NULL);
2612 2776
2613 2777 ASSERT(zoneid != ALL_ZONES);
2614 2778
2615 2779 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2616 2780 mutex_enter(&bind_connfp->connf_lock);
2617 2781 for (connp = bind_connfp->connf_head; connp != NULL;
2618 2782 connp = connp->conn_next) {
2619 2783 tcp = connp->conn_tcp;
2620 2784 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
2621 2785 IPCL_ZONE_MATCH(connp, zoneid) &&
2622 2786 (connp->conn_bound_if == 0 ||
2623 2787 connp->conn_bound_if == ifindex) &&
2624 2788 tcp->tcp_listener == NULL) {
2625 2789 CONN_INC_REF(connp);
2626 2790 mutex_exit(&bind_connfp->connf_lock);
2627 2791 return (connp);
2628 2792 }
2629 2793 }
2630 2794 mutex_exit(&bind_connfp->connf_lock);
2631 2795 return (NULL);
2632 2796 }
2633 2797
2634 2798 /*
2635 2799 * ipcl_get_next_conn
2636 2800 * get the next entry in the conn global list
2637 2801 * and put a reference on the next_conn.
2638 2802 * decrement the reference on the current conn.
2639 2803 *
2640 2804 * This is an iterator based walker function that also provides for
2641 2805 * some selection by the caller. It walks through the conn_hash bucket
2642 2806 * searching for the next valid connp in the list, and selects connections
2643 2807 * that are neither closed nor condemned. It also REFHOLDS the conn
2644 2808 * thus ensuring that the conn exists when the caller uses the conn.
2645 2809 */
2646 2810 conn_t *
2647 2811 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2648 2812 {
2649 2813 conn_t *next_connp;
2650 2814
2651 2815 if (connfp == NULL)
2652 2816 return (NULL);
2653 2817
2654 2818 mutex_enter(&connfp->connf_lock);
2655 2819
2656 2820 next_connp = (connp == NULL) ?
2657 2821 connfp->connf_head : connp->conn_g_next;
2658 2822
2659 2823 while (next_connp != NULL) {
2660 2824 mutex_enter(&next_connp->conn_lock);
2661 2825 if (!(next_connp->conn_flags & conn_flags) ||
2662 2826 (next_connp->conn_state_flags &
2663 2827 (CONN_CONDEMNED | CONN_INCIPIENT))) {
2664 2828 /*
2665 2829 * This conn has been condemned or
2666 2830 * is closing, or the flags don't match
2667 2831 */
2668 2832 mutex_exit(&next_connp->conn_lock);
2669 2833 next_connp = next_connp->conn_g_next;
2670 2834 continue;
2671 2835 }
2672 2836 CONN_INC_REF_LOCKED(next_connp);
2673 2837 mutex_exit(&next_connp->conn_lock);
2674 2838 break;
2675 2839 }
2676 2840
2677 2841 mutex_exit(&connfp->connf_lock);
2678 2842
2679 2843 if (connp != NULL)
2680 2844 CONN_DEC_REF(connp);
2681 2845
2682 2846 return (next_connp);
2683 2847 }
2684 2848
2685 2849 #ifdef CONN_DEBUG
2686 2850 /*
2687 2851 * Trace of the last NBUF refhold/refrele
2688 2852 */
2689 2853 int
2690 2854 conn_trace_ref(conn_t *connp)
2691 2855 {
2692 2856 int last;
2693 2857 conn_trace_t *ctb;
2694 2858
2695 2859 ASSERT(MUTEX_HELD(&connp->conn_lock));
2696 2860 last = connp->conn_trace_last;
2697 2861 last++;
2698 2862 if (last == CONN_TRACE_MAX)
2699 2863 last = 0;
2700 2864
2701 2865 ctb = &connp->conn_trace_buf[last];
2702 2866 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2703 2867 connp->conn_trace_last = last;
2704 2868 return (1);
2705 2869 }
2706 2870
2707 2871 int
2708 2872 conn_untrace_ref(conn_t *connp)
2709 2873 {
2710 2874 int last;
2711 2875 conn_trace_t *ctb;
2712 2876
2713 2877 ASSERT(MUTEX_HELD(&connp->conn_lock));
2714 2878 last = connp->conn_trace_last;
2715 2879 last++;
2716 2880 if (last == CONN_TRACE_MAX)
2717 2881 last = 0;
2718 2882
2719 2883 ctb = &connp->conn_trace_buf[last];
2720 2884 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2721 2885 connp->conn_trace_last = last;
2722 2886 return (1);
2723 2887 }
2724 2888 #endif
↓ open down ↓ |
537 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX