Print this page
dccp: conn_t
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/ip/ipclassifier.c
+++ new/usr/src/uts/common/inet/ip/ipclassifier.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /*
26 26 * IP PACKET CLASSIFIER
27 27 *
28 28 * The IP packet classifier provides mapping between IP packets and persistent
29 29 * connection state for connection-oriented protocols. It also provides
30 30 * interface for managing connection states.
31 31 *
32 32 * The connection state is kept in conn_t data structure and contains, among
33 33 * other things:
34 34 *
35 35 * o local/remote address and ports
36 36 * o Transport protocol
37 37 * o squeue for the connection (for TCP only)
38 38 * o reference counter
39 39 * o Connection state
40 40 * o hash table linkage
41 41 * o interface/ire information
42 42 * o credentials
43 43 * o ipsec policy
44 44 * o send and receive functions.
45 45 * o mutex lock.
46 46 *
47 47 * Connections use a reference counting scheme. They are freed when the
48 48 * reference counter drops to zero. A reference is incremented when connection
49 49 * is placed in a list or table, when incoming packet for the connection arrives
50 50 * and when connection is processed via squeue (squeue processing may be
51 51 * asynchronous and the reference protects the connection from being destroyed
52 52 * before its processing is finished).
53 53 *
54 54 * conn_recv is used to pass up packets to the ULP.
55 55 * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
56 56 * a listener, and changes to tcp_input_listener as the listener has picked a
57 57 * good squeue. For other cases it is set to tcp_input_data.
58 58 *
59 59 * conn_recvicmp is used to pass up ICMP errors to the ULP.
↓ open down ↓ |
59 lines elided |
↑ open up ↑ |
60 60 *
61 61 * Classifier uses several hash tables:
62 62 *
63 63 * ipcl_conn_fanout: contains all TCP connections in CONNECTED state
64 64 * ipcl_bind_fanout: contains all connections in BOUND state
65 65 * ipcl_proto_fanout: IPv4 protocol fanout
66 66 * ipcl_proto_fanout_v6: IPv6 protocol fanout
67 67 * ipcl_udp_fanout: contains all UDP connections
68 68 * ipcl_iptun_fanout: contains all IP tunnel connections
69 69 * ipcl_globalhash_fanout: contains all connections
70 + *` ipcl_dccp_conn_fanout: contains all DCCP connections in CONNECTED state
71 + * ipcl_dccp_bind_fanout: contains all DCCP connections in BOUND state
70 72 *
71 73 * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
72 74 * which need to view all existing connections.
73 75 *
74 76 * All tables are protected by per-bucket locks. When both per-bucket lock and
75 77 * connection lock need to be held, the per-bucket lock should be acquired
76 78 * first, followed by the connection lock.
77 79 *
78 80 * All functions doing search in one of these tables increment a reference
79 81 * counter on the connection found (if any). This reference should be dropped
80 82 * when the caller has finished processing the connection.
81 83 *
82 84 *
83 85 * INTERFACES:
84 86 * ===========
85 87 *
86 88 * Connection Lookup:
87 89 * ------------------
88 90 *
89 91 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
90 92 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
91 93 *
92 94 * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
93 95 * it can't find any associated connection. If the connection is found, its
94 96 * reference counter is incremented.
95 97 *
96 98 * mp: mblock, containing packet header. The full header should fit
97 99 * into a single mblock. It should also contain at least full IP
98 100 * and TCP or UDP header.
99 101 *
100 102 * protocol: Either IPPROTO_TCP or IPPROTO_UDP.
101 103 *
102 104 * hdr_len: The size of IP header. It is used to find TCP or UDP header in
103 105 * the packet.
104 106 *
105 107 * ira->ira_zoneid: The zone in which the returned connection must be; the
106 108 * zoneid corresponding to the ire_zoneid on the IRE located for
107 109 * the packet's destination address.
108 110 *
109 111 * ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
110 112 * IRAF_TX_SHARED_ADDR flags
111 113 *
112 114 * For TCP connections, the lookup order is as follows:
113 115 * 5-tuple {src, dst, protocol, local port, remote port}
114 116 * lookup in ipcl_conn_fanout table.
115 117 * 3-tuple {dst, remote port, protocol} lookup in
116 118 * ipcl_bind_fanout table.
117 119 *
118 120 * For UDP connections, a 5-tuple {src, dst, protocol, local port,
119 121 * remote port} lookup is done on ipcl_udp_fanout. Note that,
120 122 * these interfaces do not handle cases where a packets belongs
121 123 * to multiple UDP clients, which is handled in IP itself.
122 124 *
123 125 * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
124 126 * determine which actual zone gets the segment. This is used only in a
125 127 * labeled environment. The matching rules are:
126 128 *
127 129 * - If it's not a multilevel port, then the label on the packet selects
128 130 * the zone. Unlabeled packets are delivered to the global zone.
129 131 *
130 132 * - If it's a multilevel port, then only the zone registered to receive
131 133 * packets on that port matches.
132 134 *
133 135 * Also, in a labeled environment, packet labels need to be checked. For fully
134 136 * bound TCP connections, we can assume that the packet label was checked
135 137 * during connection establishment, and doesn't need to be checked on each
136 138 * packet. For others, though, we need to check for strict equality or, for
137 139 * multilevel ports, membership in the range or set. This part currently does
138 140 * a tnrh lookup on each packet, but could be optimized to use cached results
139 141 * if that were necessary. (SCTP doesn't come through here, but if it did,
140 142 * we would apply the same rules as TCP.)
141 143 *
142 144 * An implication of the above is that fully-bound TCP sockets must always use
143 145 * distinct 4-tuples; they can't be discriminated by label alone.
144 146 *
145 147 * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
146 148 * as there's no connection set-up handshake and no shared state.
147 149 *
148 150 * Labels on looped-back packets within a single zone do not need to be
149 151 * checked, as all processes in the same zone have the same label.
150 152 *
151 153 * Finally, for unlabeled packets received by a labeled system, special rules
152 154 * apply. We consider only the MLP if there is one. Otherwise, we prefer a
153 155 * socket in the zone whose label matches the default label of the sender, if
154 156 * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the
155 157 * receiver's label must dominate the sender's default label.
156 158 *
157 159 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
158 160 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
159 161 * ip_stack);
160 162 *
161 163 * Lookup routine to find a exact match for {src, dst, local port,
162 164 * remote port) for TCP connections in ipcl_conn_fanout. The address and
163 165 * ports are read from the IP and TCP header respectively.
164 166 *
165 167 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol,
166 168 * zoneid, ip_stack);
167 169 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
168 170 * zoneid, ip_stack);
169 171 *
170 172 * Lookup routine to find a listener with the tuple {lport, laddr,
171 173 * protocol} in the ipcl_bind_fanout table. For IPv6, an additional
172 174 * parameter interface index is also compared.
173 175 *
174 176 * void ipcl_walk(func, arg, ip_stack)
175 177 *
176 178 * Apply 'func' to every connection available. The 'func' is called as
177 179 * (*func)(connp, arg). The walk is non-atomic so connections may be
178 180 * created and destroyed during the walk. The CONN_CONDEMNED and
179 181 * CONN_INCIPIENT flags ensure that connections which are newly created
180 182 * or being destroyed are not selected by the walker.
181 183 *
182 184 * Table Updates
183 185 * -------------
184 186 *
185 187 * int ipcl_conn_insert(connp);
186 188 * int ipcl_conn_insert_v4(connp);
187 189 * int ipcl_conn_insert_v6(connp);
188 190 *
189 191 * Insert 'connp' in the ipcl_conn_fanout.
190 192 * Arguements :
191 193 * connp conn_t to be inserted
192 194 *
193 195 * Return value :
194 196 * 0 if connp was inserted
195 197 * EADDRINUSE if the connection with the same tuple
196 198 * already exists.
197 199 *
198 200 * int ipcl_bind_insert(connp);
199 201 * int ipcl_bind_insert_v4(connp);
200 202 * int ipcl_bind_insert_v6(connp);
201 203 *
202 204 * Insert 'connp' in ipcl_bind_fanout.
203 205 * Arguements :
204 206 * connp conn_t to be inserted
205 207 *
206 208 *
207 209 * void ipcl_hash_remove(connp);
208 210 *
209 211 * Removes the 'connp' from the connection fanout table.
210 212 *
211 213 * Connection Creation/Destruction
212 214 * -------------------------------
213 215 *
214 216 * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
215 217 *
↓ open down ↓ |
136 lines elided |
↑ open up ↑ |
216 218 * Creates a new conn based on the type flag, inserts it into
217 219 * globalhash table.
218 220 *
219 221 * type: This flag determines the type of conn_t which needs to be
220 222 * created i.e., which kmem_cache it comes from.
221 223 * IPCL_TCPCONN indicates a TCP connection
222 224 * IPCL_SCTPCONN indicates a SCTP connection
223 225 * IPCL_UDPCONN indicates a UDP conn_t.
224 226 * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t.
225 227 * IPCL_RTSCONN indicates a RTS conn_t.
228 + * IPCL_DCCPCONN indicates a DCCP conn_t.
226 229 * IPCL_IPCCONN indicates all other connections.
227 230 *
228 231 * void ipcl_conn_destroy(connp)
229 232 *
230 233 * Destroys the connection state, removes it from the global
231 234 * connection hash table and frees its memory.
232 235 */
233 236
234 237 #include <sys/types.h>
235 238 #include <sys/stream.h>
236 239 #include <sys/stropts.h>
237 240 #include <sys/sysmacros.h>
238 241 #include <sys/strsubr.h>
239 242 #include <sys/strsun.h>
240 243 #define _SUN_TPI_VERSION 2
241 244 #include <sys/ddi.h>
242 245 #include <sys/cmn_err.h>
243 246 #include <sys/debug.h>
244 247
245 248 #include <sys/systm.h>
246 249 #include <sys/param.h>
247 250 #include <sys/kmem.h>
248 251 #include <sys/isa_defs.h>
249 252 #include <inet/common.h>
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
250 253 #include <netinet/ip6.h>
251 254 #include <netinet/icmp6.h>
252 255
253 256 #include <inet/ip.h>
254 257 #include <inet/ip_if.h>
255 258 #include <inet/ip_ire.h>
256 259 #include <inet/ip6.h>
257 260 #include <inet/ip_ndp.h>
258 261 #include <inet/ip_impl.h>
259 262 #include <inet/udp_impl.h>
263 +#include <inet/dccp_impl.h>
260 264 #include <inet/sctp_ip.h>
261 265 #include <inet/sctp/sctp_impl.h>
262 266 #include <inet/rawip_impl.h>
263 267 #include <inet/rts_impl.h>
264 268 #include <inet/iptun/iptun_impl.h>
265 269
266 270 #include <sys/cpuvar.h>
267 271
268 272 #include <inet/ipclassifier.h>
269 273 #include <inet/tcp.h>
270 274 #include <inet/ipsec_impl.h>
271 275
272 276 #include <sys/tsol/tnet.h>
273 277 #include <sys/sockio.h>
274 278
275 279 /* Old value for compatibility. Setable in /etc/system */
276 280 uint_t tcp_conn_hash_size = 0;
↓ open down ↓ |
7 lines elided |
↑ open up ↑ |
277 281
278 282 /* New value. Zero means choose automatically. Setable in /etc/system */
279 283 uint_t ipcl_conn_hash_size = 0;
280 284 uint_t ipcl_conn_hash_memfactor = 8192;
281 285 uint_t ipcl_conn_hash_maxsize = 82500;
282 286
283 287 /* bind/udp fanout table size */
284 288 uint_t ipcl_bind_fanout_size = 512;
285 289 uint_t ipcl_udp_fanout_size = 16384;
286 290
291 +/* Fanout table sizes for dccp */
292 +uint_t ipcl_dccp_conn_fanout_size = 512;
293 +uint_t ipcl_dccp_bind_fanout_size = 512;
294 +
287 295 /* Raw socket fanout size. Must be a power of 2. */
288 296 uint_t ipcl_raw_fanout_size = 256;
289 297
290 298 /*
291 299 * The IPCL_IPTUN_HASH() function works best with a prime table size. We
292 300 * expect that most large deployments would have hundreds of tunnels, and
293 301 * thousands in the extreme case.
294 302 */
295 303 uint_t ipcl_iptun_fanout_size = 6143;
296 304
297 305 /*
298 306 * Power of 2^N Primes useful for hashing for N of 0-28,
299 307 * these primes are the nearest prime <= 2^N - 2^(N-2).
300 308 */
301 309
302 310 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \
303 311 6143, 12281, 24571, 49139, 98299, 196597, 393209, \
304 312 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \
305 313 50331599, 100663291, 201326557, 0}
306 314
307 315 /*
308 316 * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
309 317 * are aligned on cache lines.
310 318 */
311 319 typedef union itc_s {
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
312 320 conn_t itc_conn;
313 321 char itcu_filler[CACHE_ALIGN(conn_s)];
314 322 } itc_t;
315 323
316 324 struct kmem_cache *tcp_conn_cache;
317 325 struct kmem_cache *ip_conn_cache;
318 326 extern struct kmem_cache *sctp_conn_cache;
319 327 struct kmem_cache *udp_conn_cache;
320 328 struct kmem_cache *rawip_conn_cache;
321 329 struct kmem_cache *rts_conn_cache;
330 +struct kmem_cache *dccp_conn_cache;
322 331
323 332 extern void tcp_timermp_free(tcp_t *);
324 333 extern mblk_t *tcp_timermp_alloc(int);
325 334
326 335 static int ip_conn_constructor(void *, void *, int);
327 336 static void ip_conn_destructor(void *, void *);
328 337
329 338 static int tcp_conn_constructor(void *, void *, int);
330 339 static void tcp_conn_destructor(void *, void *);
331 340
332 341 static int udp_conn_constructor(void *, void *, int);
333 342 static void udp_conn_destructor(void *, void *);
334 343
335 344 static int rawip_conn_constructor(void *, void *, int);
336 345 static void rawip_conn_destructor(void *, void *);
337 346
338 347 static int rts_conn_constructor(void *, void *, int);
339 348 static void rts_conn_destructor(void *, void *);
340 349
350 +static int dccp_conn_constructor(void *, void *, int);
351 +static void dccp_conn_destructor(void *, void *);
352 +
341 353 /*
342 354 * Global (for all stack instances) init routine
343 355 */
344 356 void
345 357 ipcl_g_init(void)
346 358 {
347 359 ip_conn_cache = kmem_cache_create("ip_conn_cache",
348 360 sizeof (conn_t), CACHE_ALIGN_SIZE,
349 361 ip_conn_constructor, ip_conn_destructor,
350 362 NULL, NULL, NULL, 0);
351 363
352 364 tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
353 365 sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
354 366 tcp_conn_constructor, tcp_conn_destructor,
355 367 tcp_conn_reclaim, NULL, NULL, 0);
356 368
357 369 udp_conn_cache = kmem_cache_create("udp_conn_cache",
358 370 sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
359 371 udp_conn_constructor, udp_conn_destructor,
360 372 NULL, NULL, NULL, 0);
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
361 373
362 374 rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
363 375 sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
364 376 rawip_conn_constructor, rawip_conn_destructor,
365 377 NULL, NULL, NULL, 0);
366 378
367 379 rts_conn_cache = kmem_cache_create("rts_conn_cache",
368 380 sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
369 381 rts_conn_constructor, rts_conn_destructor,
370 382 NULL, NULL, NULL, 0);
383 +
384 + /* XXX:DCCP reclaim */
385 + dccp_conn_cache = kmem_cache_create("dccp_conn_cache",
386 + sizeof (itc_t) + sizeof (dccp_t), CACHE_ALIGN_SIZE,
387 + dccp_conn_constructor, dccp_conn_destructor,
388 + NULL, NULL, NULL, 0);
371 389 }
372 390
373 391 /*
374 392 * ipclassifier intialization routine, sets up hash tables.
375 393 */
376 394 void
377 395 ipcl_init(ip_stack_t *ipst)
378 396 {
379 397 int i;
380 398 int sizes[] = P2Ps();
381 399
382 400 /*
383 401 * Calculate size of conn fanout table from /etc/system settings
384 402 */
385 403 if (ipcl_conn_hash_size != 0) {
386 404 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
387 405 } else if (tcp_conn_hash_size != 0) {
388 406 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
389 407 } else {
390 408 extern pgcnt_t freemem;
391 409
392 410 ipst->ips_ipcl_conn_fanout_size =
393 411 (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
394 412
395 413 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
396 414 ipst->ips_ipcl_conn_fanout_size =
397 415 ipcl_conn_hash_maxsize;
398 416 }
399 417 }
400 418
401 419 for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
402 420 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
↓ open down ↓ |
22 lines elided |
↑ open up ↑ |
403 421 break;
404 422 }
405 423 }
406 424 if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
407 425 /* Out of range, use the 2^16 value */
408 426 ipst->ips_ipcl_conn_fanout_size = sizes[16];
409 427 }
410 428
411 429 /* Take values from /etc/system */
412 430 ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
431 + ipst->ips_ipcl_dccp_conn_fanout_size = ipcl_dccp_conn_fanout_size;
432 + ipst->ips_ipcl_dccp_bind_fanout_size = ipcl_dccp_bind_fanout_size;
413 433 ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
414 434 ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
415 435 ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
416 436
417 437 ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
418 438
419 439 ipst->ips_ipcl_conn_fanout = kmem_zalloc(
420 440 ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
421 441
422 442 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
423 443 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
424 444 MUTEX_DEFAULT, NULL);
425 445 }
426 446
427 447 ipst->ips_ipcl_bind_fanout = kmem_zalloc(
428 448 ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
429 449
430 450 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
431 451 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
432 452 MUTEX_DEFAULT, NULL);
433 453 }
434 454
435 455 ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
436 456 sizeof (connf_t), KM_SLEEP);
437 457 for (i = 0; i < IPPROTO_MAX; i++) {
438 458 mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
439 459 MUTEX_DEFAULT, NULL);
440 460 }
441 461
442 462 ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
443 463 sizeof (connf_t), KM_SLEEP);
444 464 for (i = 0; i < IPPROTO_MAX; i++) {
445 465 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
446 466 MUTEX_DEFAULT, NULL);
447 467 }
448 468
449 469 ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
450 470 mutex_init(&ipst->ips_rts_clients->connf_lock,
451 471 NULL, MUTEX_DEFAULT, NULL);
452 472
453 473 ipst->ips_ipcl_udp_fanout = kmem_zalloc(
454 474 ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
455 475 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
456 476 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
457 477 MUTEX_DEFAULT, NULL);
458 478 }
459 479
460 480 ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
461 481 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
462 482 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
463 483 mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
464 484 MUTEX_DEFAULT, NULL);
465 485 }
466 486
467 487 ipst->ips_ipcl_raw_fanout = kmem_zalloc(
468 488 ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
469 489 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
↓ open down ↓ |
47 lines elided |
↑ open up ↑ |
470 490 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
471 491 MUTEX_DEFAULT, NULL);
472 492 }
473 493
474 494 ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
475 495 sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
476 496 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
477 497 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
478 498 NULL, MUTEX_DEFAULT, NULL);
479 499 }
500 +
501 + ipst->ips_ipcl_dccp_conn_fanout = kmem_zalloc(
502 + ipst->ips_ipcl_dccp_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
503 + for (i = 0; i < ipst->ips_ipcl_dccp_conn_fanout_size; i++) {
504 + mutex_init(&ipst->ips_ipcl_dccp_conn_fanout[i].connf_lock, NULL,
505 + MUTEX_DEFAULT, NULL);
506 + }
507 +
508 + ipst->ips_ipcl_dccp_bind_fanout = kmem_zalloc(
509 + ipst->ips_ipcl_dccp_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
510 + for (i = 0; i < ipst->ips_ipcl_dccp_bind_fanout_size; i++) {
511 + mutex_init(&ipst->ips_ipcl_dccp_bind_fanout[i].connf_lock, NULL,
512 + MUTEX_DEFAULT, NULL);
513 + }
480 514 }
481 515
482 516 void
483 517 ipcl_g_destroy(void)
484 518 {
485 519 kmem_cache_destroy(ip_conn_cache);
486 520 kmem_cache_destroy(tcp_conn_cache);
487 521 kmem_cache_destroy(udp_conn_cache);
488 522 kmem_cache_destroy(rawip_conn_cache);
489 523 kmem_cache_destroy(rts_conn_cache);
524 + kmem_cache_destroy(dccp_conn_cache);
490 525 }
491 526
492 527 /*
493 528 * All user-level and kernel use of the stack must be gone
494 529 * by now.
495 530 */
496 531 void
497 532 ipcl_destroy(ip_stack_t *ipst)
498 533 {
499 534 int i;
500 535
501 536 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
502 537 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
503 538 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
504 539 }
505 540 kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
506 541 sizeof (connf_t));
507 542 ipst->ips_ipcl_conn_fanout = NULL;
508 543
509 544 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
510 545 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
511 546 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
512 547 }
513 548 kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
514 549 sizeof (connf_t));
515 550 ipst->ips_ipcl_bind_fanout = NULL;
516 551
517 552 for (i = 0; i < IPPROTO_MAX; i++) {
518 553 ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
519 554 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
520 555 }
521 556 kmem_free(ipst->ips_ipcl_proto_fanout_v4,
522 557 IPPROTO_MAX * sizeof (connf_t));
523 558 ipst->ips_ipcl_proto_fanout_v4 = NULL;
524 559
525 560 for (i = 0; i < IPPROTO_MAX; i++) {
526 561 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
527 562 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
528 563 }
529 564 kmem_free(ipst->ips_ipcl_proto_fanout_v6,
530 565 IPPROTO_MAX * sizeof (connf_t));
531 566 ipst->ips_ipcl_proto_fanout_v6 = NULL;
532 567
533 568 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
534 569 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
535 570 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
536 571 }
537 572 kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
538 573 sizeof (connf_t));
539 574 ipst->ips_ipcl_udp_fanout = NULL;
540 575
541 576 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
542 577 ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
543 578 mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
544 579 }
545 580 kmem_free(ipst->ips_ipcl_iptun_fanout,
546 581 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
547 582 ipst->ips_ipcl_iptun_fanout = NULL;
548 583
549 584 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
550 585 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
551 586 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
552 587 }
553 588 kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
554 589 sizeof (connf_t));
↓ open down ↓ |
55 lines elided |
↑ open up ↑ |
555 590 ipst->ips_ipcl_raw_fanout = NULL;
556 591
557 592 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
558 593 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
559 594 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
560 595 }
561 596 kmem_free(ipst->ips_ipcl_globalhash_fanout,
562 597 sizeof (connf_t) * CONN_G_HASH_SIZE);
563 598 ipst->ips_ipcl_globalhash_fanout = NULL;
564 599
600 + for (i = 0; i < ipst->ips_ipcl_dccp_conn_fanout_size; i++) {
601 + ASSERT(ipst->ips_ipcl_dccp_conn_fanout[i].connf_head == NULL);
602 + mutex_destroy(&ipst->ips_ipcl_dccp_conn_fanout[i].connf_lock);
603 + }
604 + kmem_free(ipst->ips_ipcl_dccp_conn_fanout,
605 + ipst->ips_ipcl_dccp_conn_fanout_size * sizeof (connf_t));
606 + ipst->ips_ipcl_dccp_conn_fanout = NULL;
607 +
608 + for (i = 0; i < ipst->ips_ipcl_dccp_bind_fanout_size; i++) {
609 + ASSERT(ipst->ips_ipcl_dccp_bind_fanout[i].connf_head == NULL);
610 + mutex_destroy(&ipst->ips_ipcl_dccp_bind_fanout[i].connf_lock);
611 + }
612 + kmem_free(ipst->ips_ipcl_dccp_bind_fanout,
613 + ipst->ips_ipcl_dccp_bind_fanout_size * sizeof (connf_t));
614 + ipst->ips_ipcl_dccp_bind_fanout = NULL;
615 +
565 616 ASSERT(ipst->ips_rts_clients->connf_head == NULL);
566 617 mutex_destroy(&ipst->ips_rts_clients->connf_lock);
567 618 kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
568 619 ipst->ips_rts_clients = NULL;
569 620 }
570 621
571 622 /*
572 623 * conn creation routine. initialize the conn, sets the reference
573 624 * and inserts it in the global hash table.
574 625 */
575 626 conn_t *
576 627 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
577 628 {
578 629 conn_t *connp;
579 630 struct kmem_cache *conn_cache;
580 631
581 632 switch (type) {
582 633 case IPCL_SCTPCONN:
583 634 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
584 635 return (NULL);
585 636 sctp_conn_init(connp);
586 637 netstack_hold(ns);
587 638 connp->conn_netstack = ns;
588 639 connp->conn_ixa->ixa_ipst = ns->netstack_ip;
589 640 connp->conn_ixa->ixa_conn_id = (long)connp;
590 641 ipcl_globalhash_insert(connp);
591 642 return (connp);
592 643
593 644 case IPCL_TCPCONN:
594 645 conn_cache = tcp_conn_cache;
595 646 break;
596 647
597 648 case IPCL_UDPCONN:
598 649 conn_cache = udp_conn_cache;
599 650 break;
600 651
601 652 case IPCL_RAWIPCONN:
602 653 conn_cache = rawip_conn_cache;
↓ open down ↓ |
28 lines elided |
↑ open up ↑ |
603 654 break;
604 655
605 656 case IPCL_RTSCONN:
606 657 conn_cache = rts_conn_cache;
607 658 break;
608 659
609 660 case IPCL_IPCCONN:
610 661 conn_cache = ip_conn_cache;
611 662 break;
612 663
664 + case IPCL_DCCPCONN:
665 + conn_cache = dccp_conn_cache;
666 + break;
667 +
613 668 default:
614 669 connp = NULL;
615 670 ASSERT(0);
616 671 }
617 672
618 673 if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
619 674 return (NULL);
620 675
621 676 connp->conn_ref = 1;
622 677 netstack_hold(ns);
623 678 connp->conn_netstack = ns;
624 679 connp->conn_ixa->ixa_ipst = ns->netstack_ip;
625 680 connp->conn_ixa->ixa_conn_id = (long)connp;
626 681 ipcl_globalhash_insert(connp);
627 682 return (connp);
628 683 }
629 684
630 685 void
631 686 ipcl_conn_destroy(conn_t *connp)
632 687 {
633 688 mblk_t *mp;
634 689 netstack_t *ns = connp->conn_netstack;
635 690
636 691 ASSERT(!MUTEX_HELD(&connp->conn_lock));
637 692 ASSERT(connp->conn_ref == 0);
638 693 ASSERT(connp->conn_ioctlref == 0);
639 694
640 695 DTRACE_PROBE1(conn__destroy, conn_t *, connp);
641 696
642 697 if (connp->conn_cred != NULL) {
643 698 crfree(connp->conn_cred);
644 699 connp->conn_cred = NULL;
645 700 /* ixa_cred done in ipcl_conn_cleanup below */
646 701 }
647 702
648 703 if (connp->conn_ht_iphc != NULL) {
649 704 kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
650 705 connp->conn_ht_iphc = NULL;
651 706 connp->conn_ht_iphc_allocated = 0;
652 707 connp->conn_ht_iphc_len = 0;
653 708 connp->conn_ht_ulp = NULL;
654 709 connp->conn_ht_ulp_len = 0;
655 710 }
656 711 ip_pkt_free(&connp->conn_xmit_ipp);
657 712
658 713 ipcl_globalhash_remove(connp);
659 714
660 715 if (connp->conn_latch != NULL) {
661 716 IPLATCH_REFRELE(connp->conn_latch);
662 717 connp->conn_latch = NULL;
663 718 }
664 719 if (connp->conn_latch_in_policy != NULL) {
665 720 IPPOL_REFRELE(connp->conn_latch_in_policy);
666 721 connp->conn_latch_in_policy = NULL;
667 722 }
668 723 if (connp->conn_latch_in_action != NULL) {
669 724 IPACT_REFRELE(connp->conn_latch_in_action);
670 725 connp->conn_latch_in_action = NULL;
671 726 }
672 727 if (connp->conn_policy != NULL) {
673 728 IPPH_REFRELE(connp->conn_policy, ns);
674 729 connp->conn_policy = NULL;
675 730 }
676 731
677 732 if (connp->conn_ipsec_opt_mp != NULL) {
678 733 freemsg(connp->conn_ipsec_opt_mp);
679 734 connp->conn_ipsec_opt_mp = NULL;
680 735 }
681 736
682 737 if (connp->conn_flags & IPCL_TCPCONN) {
683 738 tcp_t *tcp = connp->conn_tcp;
684 739
685 740 tcp_free(tcp);
686 741 mp = tcp->tcp_timercache;
687 742
688 743 tcp->tcp_tcps = NULL;
689 744
690 745 /*
691 746 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
692 747 * the mblk.
693 748 */
694 749 if (tcp->tcp_rsrv_mp != NULL) {
695 750 freeb(tcp->tcp_rsrv_mp);
696 751 tcp->tcp_rsrv_mp = NULL;
697 752 mutex_destroy(&tcp->tcp_rsrv_mp_lock);
698 753 }
699 754
700 755 ipcl_conn_cleanup(connp);
701 756 connp->conn_flags = IPCL_TCPCONN;
702 757 if (ns != NULL) {
703 758 ASSERT(tcp->tcp_tcps == NULL);
704 759 connp->conn_netstack = NULL;
705 760 connp->conn_ixa->ixa_ipst = NULL;
706 761 netstack_rele(ns);
707 762 }
708 763
709 764 bzero(tcp, sizeof (tcp_t));
710 765
711 766 tcp->tcp_timercache = mp;
712 767 tcp->tcp_connp = connp;
↓ open down ↓ |
90 lines elided |
↑ open up ↑ |
713 768 kmem_cache_free(tcp_conn_cache, connp);
714 769 return;
715 770 }
716 771
717 772 if (connp->conn_flags & IPCL_SCTPCONN) {
718 773 ASSERT(ns != NULL);
719 774 sctp_free(connp);
720 775 return;
721 776 }
722 777
778 + if (connp->conn_flags & IPCL_DCCPCONN) {
779 + dccp_t *dccp = connp->conn_dccp;
780 +
781 + cmn_err(CE_NOTE, "ipclassifier: conn_flags DCCP cache_free");
782 +
783 + dccp_free(dccp);
784 + mp = dccp->dccp_timercache;
785 +
786 + dccp->dccp_dccps = NULL;
787 +
788 + ipcl_conn_cleanup(connp);
789 + connp->conn_flags = IPCL_DCCPCONN;
790 + if (ns != NULL) {
791 + ASSERT(dccp->dccps == NULL);
792 + connp->conn_netstack = NULL;
793 + connp->conn_ixa->ixa_ipst = NULL;
794 + netstack_rele(ns);
795 + }
796 +
797 + bzero(dccp, sizeof (dccp_t));
798 +
799 + dccp->dccp_timercache = mp;
800 + dccp->dccp_connp = connp;
801 + kmem_cache_free(dccp_conn_cache, connp);
802 + return;
803 + }
804 +
723 805 ipcl_conn_cleanup(connp);
724 806 if (ns != NULL) {
725 807 connp->conn_netstack = NULL;
726 808 connp->conn_ixa->ixa_ipst = NULL;
727 809 netstack_rele(ns);
728 810 }
729 811
730 812 /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
731 813 if (connp->conn_flags & IPCL_UDPCONN) {
732 814 connp->conn_flags = IPCL_UDPCONN;
733 815 kmem_cache_free(udp_conn_cache, connp);
734 816 } else if (connp->conn_flags & IPCL_RAWIPCONN) {
735 817 connp->conn_flags = IPCL_RAWIPCONN;
736 818 connp->conn_proto = IPPROTO_ICMP;
737 819 connp->conn_ixa->ixa_protocol = connp->conn_proto;
738 820 kmem_cache_free(rawip_conn_cache, connp);
739 821 } else if (connp->conn_flags & IPCL_RTSCONN) {
740 822 connp->conn_flags = IPCL_RTSCONN;
741 823 kmem_cache_free(rts_conn_cache, connp);
742 824 } else {
743 825 connp->conn_flags = IPCL_IPCCONN;
744 826 ASSERT(connp->conn_flags & IPCL_IPCCONN);
745 827 ASSERT(connp->conn_priv == NULL);
746 828 kmem_cache_free(ip_conn_cache, connp);
747 829 }
748 830 }
749 831
750 832 /*
751 833 * Running in cluster mode - deregister listener information
752 834 */
753 835 static void
754 836 ipcl_conn_unlisten(conn_t *connp)
755 837 {
756 838 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
757 839 ASSERT(connp->conn_lport != 0);
758 840
759 841 if (cl_inet_unlisten != NULL) {
760 842 sa_family_t addr_family;
761 843 uint8_t *laddrp;
762 844
763 845 if (connp->conn_ipversion == IPV6_VERSION) {
764 846 addr_family = AF_INET6;
765 847 laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
766 848 } else {
767 849 addr_family = AF_INET;
768 850 laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
769 851 }
770 852 (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
771 853 IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
772 854 }
773 855 connp->conn_flags &= ~IPCL_CL_LISTENER;
774 856 }
775 857
776 858 /*
777 859 * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
778 860 * which table the conn belonged to). So for debugging we can see which hash
779 861 * table this connection was in.
780 862 */
781 863 #define IPCL_HASH_REMOVE(connp) { \
782 864 connf_t *connfp = (connp)->conn_fanout; \
783 865 ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \
784 866 if (connfp != NULL) { \
785 867 mutex_enter(&connfp->connf_lock); \
786 868 if ((connp)->conn_next != NULL) \
787 869 (connp)->conn_next->conn_prev = \
788 870 (connp)->conn_prev; \
789 871 if ((connp)->conn_prev != NULL) \
790 872 (connp)->conn_prev->conn_next = \
791 873 (connp)->conn_next; \
792 874 else \
793 875 connfp->connf_head = (connp)->conn_next; \
794 876 (connp)->conn_fanout = NULL; \
795 877 (connp)->conn_next = NULL; \
796 878 (connp)->conn_prev = NULL; \
797 879 (connp)->conn_flags |= IPCL_REMOVED; \
798 880 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \
799 881 ipcl_conn_unlisten((connp)); \
800 882 CONN_DEC_REF((connp)); \
801 883 mutex_exit(&connfp->connf_lock); \
802 884 } \
803 885 }
804 886
805 887 void
806 888 ipcl_hash_remove(conn_t *connp)
807 889 {
808 890 uint8_t protocol = connp->conn_proto;
809 891
810 892 IPCL_HASH_REMOVE(connp);
811 893 if (protocol == IPPROTO_RSVP)
812 894 ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
813 895 }
814 896
815 897 /*
816 898 * The whole purpose of this function is allow removal of
817 899 * a conn_t from the connected hash for timewait reclaim.
818 900 * This is essentially a TW reclaim fastpath where timewait
819 901 * collector checks under fanout lock (so no one else can
820 902 * get access to the conn_t) that refcnt is 2 i.e. one for
821 903 * TCP and one for the classifier hash list. If ref count
822 904 * is indeed 2, we can just remove the conn under lock and
823 905 * avoid cleaning up the conn under squeue. This gives us
824 906 * improved performance.
825 907 */
826 908 void
827 909 ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp)
828 910 {
829 911 ASSERT(MUTEX_HELD(&connfp->connf_lock));
830 912 ASSERT(MUTEX_HELD(&connp->conn_lock));
831 913 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
832 914
833 915 if ((connp)->conn_next != NULL) {
834 916 (connp)->conn_next->conn_prev = (connp)->conn_prev;
835 917 }
836 918 if ((connp)->conn_prev != NULL) {
837 919 (connp)->conn_prev->conn_next = (connp)->conn_next;
838 920 } else {
839 921 connfp->connf_head = (connp)->conn_next;
840 922 }
841 923 (connp)->conn_fanout = NULL;
842 924 (connp)->conn_next = NULL;
843 925 (connp)->conn_prev = NULL;
844 926 (connp)->conn_flags |= IPCL_REMOVED;
845 927 ASSERT((connp)->conn_ref == 2);
846 928 (connp)->conn_ref--;
847 929 }
848 930
849 931 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \
850 932 ASSERT((connp)->conn_fanout == NULL); \
851 933 ASSERT((connp)->conn_next == NULL); \
852 934 ASSERT((connp)->conn_prev == NULL); \
853 935 if ((connfp)->connf_head != NULL) { \
854 936 (connfp)->connf_head->conn_prev = (connp); \
855 937 (connp)->conn_next = (connfp)->connf_head; \
856 938 } \
857 939 (connp)->conn_fanout = (connfp); \
858 940 (connfp)->connf_head = (connp); \
859 941 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
860 942 IPCL_CONNECTED; \
861 943 CONN_INC_REF(connp); \
862 944 }
863 945
864 946 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \
865 947 IPCL_HASH_REMOVE((connp)); \
866 948 mutex_enter(&(connfp)->connf_lock); \
867 949 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \
868 950 mutex_exit(&(connfp)->connf_lock); \
869 951 }
870 952
871 953 #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \
872 954 conn_t *pconnp = NULL, *nconnp; \
873 955 IPCL_HASH_REMOVE((connp)); \
874 956 mutex_enter(&(connfp)->connf_lock); \
875 957 nconnp = (connfp)->connf_head; \
876 958 while (nconnp != NULL && \
877 959 !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) { \
878 960 pconnp = nconnp; \
879 961 nconnp = nconnp->conn_next; \
880 962 } \
881 963 if (pconnp != NULL) { \
882 964 pconnp->conn_next = (connp); \
883 965 (connp)->conn_prev = pconnp; \
884 966 } else { \
885 967 (connfp)->connf_head = (connp); \
886 968 } \
887 969 if (nconnp != NULL) { \
888 970 (connp)->conn_next = nconnp; \
889 971 nconnp->conn_prev = (connp); \
890 972 } \
891 973 (connp)->conn_fanout = (connfp); \
892 974 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
893 975 IPCL_BOUND; \
894 976 CONN_INC_REF(connp); \
895 977 mutex_exit(&(connfp)->connf_lock); \
896 978 }
897 979
898 980 #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \
899 981 conn_t **list, *prev, *next; \
900 982 boolean_t isv4mapped = \
901 983 IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6); \
902 984 IPCL_HASH_REMOVE((connp)); \
903 985 mutex_enter(&(connfp)->connf_lock); \
904 986 list = &(connfp)->connf_head; \
905 987 prev = NULL; \
906 988 while ((next = *list) != NULL) { \
907 989 if (isv4mapped && \
908 990 IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) && \
909 991 connp->conn_zoneid == next->conn_zoneid) { \
910 992 (connp)->conn_next = next; \
911 993 if (prev != NULL) \
912 994 prev = next->conn_prev; \
913 995 next->conn_prev = (connp); \
914 996 break; \
915 997 } \
916 998 list = &next->conn_next; \
917 999 prev = next; \
918 1000 } \
919 1001 (connp)->conn_prev = prev; \
920 1002 *list = (connp); \
921 1003 (connp)->conn_fanout = (connfp); \
922 1004 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
923 1005 IPCL_BOUND; \
924 1006 CONN_INC_REF((connp)); \
925 1007 mutex_exit(&(connfp)->connf_lock); \
926 1008 }
927 1009
928 1010 void
929 1011 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
930 1012 {
931 1013 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
932 1014 }
933 1015
934 1016 /*
935 1017 * Because the classifier is used to classify inbound packets, the destination
936 1018 * address is meant to be our local tunnel address (tunnel source), and the
937 1019 * source the remote tunnel address (tunnel destination).
938 1020 *
939 1021 * Note that conn_proto can't be used for fanout since the upper protocol
940 1022 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
941 1023 */
942 1024 conn_t *
943 1025 ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
944 1026 {
945 1027 connf_t *connfp;
946 1028 conn_t *connp;
947 1029
948 1030 /* first look for IPv4 tunnel links */
949 1031 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
950 1032 mutex_enter(&connfp->connf_lock);
951 1033 for (connp = connfp->connf_head; connp != NULL;
952 1034 connp = connp->conn_next) {
953 1035 if (IPCL_IPTUN_MATCH(connp, *dst, *src))
954 1036 break;
955 1037 }
956 1038 if (connp != NULL)
957 1039 goto done;
958 1040
959 1041 mutex_exit(&connfp->connf_lock);
960 1042
961 1043 /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
962 1044 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
963 1045 INADDR_ANY)];
964 1046 mutex_enter(&connfp->connf_lock);
965 1047 for (connp = connfp->connf_head; connp != NULL;
966 1048 connp = connp->conn_next) {
967 1049 if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
968 1050 break;
969 1051 }
970 1052 done:
971 1053 if (connp != NULL)
972 1054 CONN_INC_REF(connp);
973 1055 mutex_exit(&connfp->connf_lock);
974 1056 return (connp);
975 1057 }
976 1058
977 1059 conn_t *
978 1060 ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
979 1061 {
980 1062 connf_t *connfp;
981 1063 conn_t *connp;
982 1064
983 1065 /* Look for an IPv6 tunnel link */
984 1066 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
985 1067 mutex_enter(&connfp->connf_lock);
986 1068 for (connp = connfp->connf_head; connp != NULL;
987 1069 connp = connp->conn_next) {
988 1070 if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
989 1071 CONN_INC_REF(connp);
990 1072 break;
991 1073 }
992 1074 }
993 1075 mutex_exit(&connfp->connf_lock);
994 1076 return (connp);
995 1077 }
996 1078
997 1079 /*
998 1080 * This function is used only for inserting SCTP raw socket now.
999 1081 * This may change later.
1000 1082 *
1001 1083 * Note that only one raw socket can be bound to a port. The param
1002 1084 * lport is in network byte order.
1003 1085 */
1004 1086 static int
1005 1087 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
1006 1088 {
1007 1089 connf_t *connfp;
1008 1090 conn_t *oconnp;
1009 1091 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1010 1092
1011 1093 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1012 1094
1013 1095 /* Check for existing raw socket already bound to the port. */
1014 1096 mutex_enter(&connfp->connf_lock);
1015 1097 for (oconnp = connfp->connf_head; oconnp != NULL;
1016 1098 oconnp = oconnp->conn_next) {
1017 1099 if (oconnp->conn_lport == lport &&
1018 1100 oconnp->conn_zoneid == connp->conn_zoneid &&
1019 1101 oconnp->conn_family == connp->conn_family &&
1020 1102 ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1021 1103 IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
1022 1104 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
1023 1105 IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
1024 1106 IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
1025 1107 &connp->conn_laddr_v6))) {
1026 1108 break;
1027 1109 }
1028 1110 }
1029 1111 mutex_exit(&connfp->connf_lock);
1030 1112 if (oconnp != NULL)
1031 1113 return (EADDRNOTAVAIL);
1032 1114
1033 1115 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
1034 1116 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1035 1117 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1036 1118 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
1037 1119 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1038 1120 } else {
1039 1121 IPCL_HASH_INSERT_BOUND(connfp, connp);
1040 1122 }
1041 1123 } else {
1042 1124 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1043 1125 }
1044 1126 return (0);
1045 1127 }
1046 1128
1047 1129 static int
1048 1130 ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
1049 1131 {
1050 1132 connf_t *connfp;
1051 1133 conn_t *tconnp;
1052 1134 ipaddr_t laddr = connp->conn_laddr_v4;
1053 1135 ipaddr_t faddr = connp->conn_faddr_v4;
1054 1136
1055 1137 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
1056 1138 mutex_enter(&connfp->connf_lock);
1057 1139 for (tconnp = connfp->connf_head; tconnp != NULL;
1058 1140 tconnp = tconnp->conn_next) {
1059 1141 if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
1060 1142 /* A tunnel is already bound to these addresses. */
1061 1143 mutex_exit(&connfp->connf_lock);
1062 1144 return (EADDRINUSE);
1063 1145 }
1064 1146 }
1065 1147 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1066 1148 mutex_exit(&connfp->connf_lock);
1067 1149 return (0);
1068 1150 }
1069 1151
1070 1152 static int
1071 1153 ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
1072 1154 {
1073 1155 connf_t *connfp;
1074 1156 conn_t *tconnp;
1075 1157 in6_addr_t *laddr = &connp->conn_laddr_v6;
1076 1158 in6_addr_t *faddr = &connp->conn_faddr_v6;
1077 1159
1078 1160 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
1079 1161 mutex_enter(&connfp->connf_lock);
1080 1162 for (tconnp = connfp->connf_head; tconnp != NULL;
1081 1163 tconnp = tconnp->conn_next) {
1082 1164 if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
1083 1165 /* A tunnel is already bound to these addresses. */
1084 1166 mutex_exit(&connfp->connf_lock);
1085 1167 return (EADDRINUSE);
1086 1168 }
1087 1169 }
1088 1170 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1089 1171 mutex_exit(&connfp->connf_lock);
1090 1172 return (0);
1091 1173 }
1092 1174
1093 1175 /*
1094 1176 * Check for a MAC exemption conflict on a labeled system. Note that for
1095 1177 * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
1096 1178 * transport layer. This check is for binding all other protocols.
1097 1179 *
1098 1180 * Returns true if there's a conflict.
1099 1181 */
1100 1182 static boolean_t
1101 1183 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
1102 1184 {
1103 1185 connf_t *connfp;
1104 1186 conn_t *tconn;
1105 1187
1106 1188 connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
1107 1189 mutex_enter(&connfp->connf_lock);
1108 1190 for (tconn = connfp->connf_head; tconn != NULL;
1109 1191 tconn = tconn->conn_next) {
1110 1192 /* We don't allow v4 fallback for v6 raw socket */
1111 1193 if (connp->conn_family != tconn->conn_family)
1112 1194 continue;
1113 1195 /* If neither is exempt, then there's no conflict */
1114 1196 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1115 1197 (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1116 1198 continue;
1117 1199 /* We are only concerned about sockets for a different zone */
1118 1200 if (connp->conn_zoneid == tconn->conn_zoneid)
1119 1201 continue;
1120 1202 /* If both are bound to different specific addrs, ok */
1121 1203 if (connp->conn_laddr_v4 != INADDR_ANY &&
1122 1204 tconn->conn_laddr_v4 != INADDR_ANY &&
1123 1205 connp->conn_laddr_v4 != tconn->conn_laddr_v4)
1124 1206 continue;
1125 1207 /* These two conflict; fail */
1126 1208 break;
1127 1209 }
1128 1210 mutex_exit(&connfp->connf_lock);
1129 1211 return (tconn != NULL);
1130 1212 }
1131 1213
1132 1214 static boolean_t
1133 1215 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
1134 1216 {
1135 1217 connf_t *connfp;
1136 1218 conn_t *tconn;
1137 1219
1138 1220 connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
1139 1221 mutex_enter(&connfp->connf_lock);
1140 1222 for (tconn = connfp->connf_head; tconn != NULL;
1141 1223 tconn = tconn->conn_next) {
1142 1224 /* We don't allow v4 fallback for v6 raw socket */
1143 1225 if (connp->conn_family != tconn->conn_family)
1144 1226 continue;
1145 1227 /* If neither is exempt, then there's no conflict */
1146 1228 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1147 1229 (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1148 1230 continue;
1149 1231 /* We are only concerned about sockets for a different zone */
1150 1232 if (connp->conn_zoneid == tconn->conn_zoneid)
1151 1233 continue;
1152 1234 /* If both are bound to different addrs, ok */
1153 1235 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
1154 1236 !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
1155 1237 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
1156 1238 &tconn->conn_laddr_v6))
1157 1239 continue;
1158 1240 /* These two conflict; fail */
1159 1241 break;
1160 1242 }
1161 1243 mutex_exit(&connfp->connf_lock);
1162 1244 return (tconn != NULL);
1163 1245 }
1164 1246
1165 1247 /*
1166 1248 * (v4, v6) bind hash insertion routines
1167 1249 * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
1168 1250 */
1169 1251
1170 1252 int
1171 1253 ipcl_bind_insert(conn_t *connp)
1172 1254 {
1173 1255 if (connp->conn_ipversion == IPV6_VERSION)
1174 1256 return (ipcl_bind_insert_v6(connp));
1175 1257 else
1176 1258 return (ipcl_bind_insert_v4(connp));
1177 1259 }
1178 1260
1179 1261 int
1180 1262 ipcl_bind_insert_v4(conn_t *connp)
1181 1263 {
1182 1264 connf_t *connfp;
1183 1265 int ret = 0;
1184 1266 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1185 1267 uint16_t lport = connp->conn_lport;
1186 1268 uint8_t protocol = connp->conn_proto;
1187 1269
1188 1270 if (IPCL_IS_IPTUN(connp))
1189 1271 return (ipcl_iptun_hash_insert(connp, ipst));
1190 1272
1191 1273 switch (protocol) {
1192 1274 default:
1193 1275 if (is_system_labeled() &&
1194 1276 check_exempt_conflict_v4(connp, ipst))
1195 1277 return (EADDRINUSE);
1196 1278 /* FALLTHROUGH */
1197 1279 case IPPROTO_UDP:
1198 1280 if (protocol == IPPROTO_UDP) {
1199 1281 connfp = &ipst->ips_ipcl_udp_fanout[
1200 1282 IPCL_UDP_HASH(lport, ipst)];
1201 1283 } else {
1202 1284 connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1203 1285 }
1204 1286
1205 1287 if (connp->conn_faddr_v4 != INADDR_ANY) {
1206 1288 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1207 1289 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1208 1290 IPCL_HASH_INSERT_BOUND(connfp, connp);
1209 1291 } else {
1210 1292 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1211 1293 }
1212 1294 if (protocol == IPPROTO_RSVP)
1213 1295 ill_set_inputfn_all(ipst);
1214 1296 break;
1215 1297
1216 1298 case IPPROTO_TCP:
1217 1299 /* Insert it in the Bind Hash */
1218 1300 ASSERT(connp->conn_zoneid != ALL_ZONES);
1219 1301 connfp = &ipst->ips_ipcl_bind_fanout[
1220 1302 IPCL_BIND_HASH(lport, ipst)];
1221 1303 if (connp->conn_laddr_v4 != INADDR_ANY) {
1222 1304 IPCL_HASH_INSERT_BOUND(connfp, connp);
1223 1305 } else {
1224 1306 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1225 1307 }
1226 1308 if (cl_inet_listen != NULL) {
1227 1309 ASSERT(connp->conn_ipversion == IPV4_VERSION);
1228 1310 connp->conn_flags |= IPCL_CL_LISTENER;
↓ open down ↓ |
496 lines elided |
↑ open up ↑ |
1229 1311 (*cl_inet_listen)(
1230 1312 connp->conn_netstack->netstack_stackid,
1231 1313 IPPROTO_TCP, AF_INET,
1232 1314 (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
1233 1315 }
1234 1316 break;
1235 1317
1236 1318 case IPPROTO_SCTP:
1237 1319 ret = ipcl_sctp_hash_insert(connp, lport);
1238 1320 break;
1321 +
1322 + case IPPROTO_DCCP:
1323 + cmn_err(CE_NOTE, "ipclassifier.c: ipcl_bind_insert_v4");
1324 + ASSERT(connp->conn_zoneid != ALL_ZONES);
1325 + connfp = &ipst->ips_ipcl_dccp_bind_fanout[
1326 + IPCL_DCCP_BIND_HASH(lport, ipst)];
1327 + if (connp->conn_laddr_v4 != INADDR_ANY) {
1328 + IPCL_HASH_INSERT_BOUND(connfp, connp);
1329 + } else {
1330 + IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1331 + }
1332 + break;
1239 1333 }
1240 1334
1335 +
1241 1336 return (ret);
1242 1337 }
1243 1338
1244 1339 int
1245 1340 ipcl_bind_insert_v6(conn_t *connp)
1246 1341 {
1247 1342 connf_t *connfp;
1248 1343 int ret = 0;
1249 1344 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1250 1345 uint16_t lport = connp->conn_lport;
1251 1346 uint8_t protocol = connp->conn_proto;
1252 1347
1253 1348 if (IPCL_IS_IPTUN(connp)) {
1254 1349 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1255 1350 }
1256 1351
1257 1352 switch (protocol) {
1258 1353 default:
1259 1354 if (is_system_labeled() &&
1260 1355 check_exempt_conflict_v6(connp, ipst))
1261 1356 return (EADDRINUSE);
1262 1357 /* FALLTHROUGH */
1263 1358 case IPPROTO_UDP:
1264 1359 if (protocol == IPPROTO_UDP) {
1265 1360 connfp = &ipst->ips_ipcl_udp_fanout[
1266 1361 IPCL_UDP_HASH(lport, ipst)];
1267 1362 } else {
1268 1363 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1269 1364 }
1270 1365
1271 1366 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1272 1367 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1273 1368 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1274 1369 IPCL_HASH_INSERT_BOUND(connfp, connp);
1275 1370 } else {
1276 1371 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1277 1372 }
1278 1373 break;
1279 1374
1280 1375 case IPPROTO_TCP:
1281 1376 /* Insert it in the Bind Hash */
1282 1377 ASSERT(connp->conn_zoneid != ALL_ZONES);
1283 1378 connfp = &ipst->ips_ipcl_bind_fanout[
1284 1379 IPCL_BIND_HASH(lport, ipst)];
1285 1380 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1286 1381 IPCL_HASH_INSERT_BOUND(connfp, connp);
1287 1382 } else {
1288 1383 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1289 1384 }
1290 1385 if (cl_inet_listen != NULL) {
1291 1386 sa_family_t addr_family;
1292 1387 uint8_t *laddrp;
1293 1388
1294 1389 if (connp->conn_ipversion == IPV6_VERSION) {
1295 1390 addr_family = AF_INET6;
1296 1391 laddrp =
1297 1392 (uint8_t *)&connp->conn_bound_addr_v6;
1298 1393 } else {
1299 1394 addr_family = AF_INET;
1300 1395 laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
1301 1396 }
↓ open down ↓ |
51 lines elided |
↑ open up ↑ |
1302 1397 connp->conn_flags |= IPCL_CL_LISTENER;
1303 1398 (*cl_inet_listen)(
1304 1399 connp->conn_netstack->netstack_stackid,
1305 1400 IPPROTO_TCP, addr_family, laddrp, lport, NULL);
1306 1401 }
1307 1402 break;
1308 1403
1309 1404 case IPPROTO_SCTP:
1310 1405 ret = ipcl_sctp_hash_insert(connp, lport);
1311 1406 break;
1407 +
1408 + case IPPROTO_DCCP:
1409 + cmn_err(CE_NOTE, "ipclassifier.c: ipcl_bind_insert_v6");
1410 + ASSERT(connp->conn_zoneid != ALL_ZONES);
1411 + connfp = &ipst->ips_ipcl_dccp_bind_fanout[
1412 + IPCL_DCCP_BIND_HASH(lport, ipst)];
1413 + if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1414 + IPCL_HASH_INSERT_BOUND(connfp, connp);
1415 + } else {
1416 + IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1417 + }
1418 + break;
1312 1419 }
1313 1420
1314 1421 return (ret);
1315 1422 }
1316 1423
1317 1424 /*
1318 1425 * ipcl_conn_hash insertion routines.
1319 1426 * The caller has already set conn_proto and the addresses/ports in the conn_t.
1320 1427 */
1321 1428
1322 1429 int
1323 1430 ipcl_conn_insert(conn_t *connp)
1324 1431 {
1325 1432 if (connp->conn_ipversion == IPV6_VERSION)
1326 1433 return (ipcl_conn_insert_v6(connp));
1327 1434 else
1328 1435 return (ipcl_conn_insert_v4(connp));
1329 1436 }
1330 1437
1331 1438 int
1332 1439 ipcl_conn_insert_v4(conn_t *connp)
1333 1440 {
1334 1441 connf_t *connfp;
1335 1442 conn_t *tconnp;
1336 1443 int ret = 0;
1337 1444 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1338 1445 uint16_t lport = connp->conn_lport;
1339 1446 uint8_t protocol = connp->conn_proto;
1340 1447
1341 1448 if (IPCL_IS_IPTUN(connp))
1342 1449 return (ipcl_iptun_hash_insert(connp, ipst));
1343 1450
1344 1451 switch (protocol) {
1345 1452 case IPPROTO_TCP:
1346 1453 /*
1347 1454 * For TCP, we check whether the connection tuple already
1348 1455 * exists before allowing the connection to proceed. We
1349 1456 * also allow indexing on the zoneid. This is to allow
1350 1457 * multiple shared stack zones to have the same tcp
1351 1458 * connection tuple. In practice this only happens for
1352 1459 * INADDR_LOOPBACK as it's the only local address which
1353 1460 * doesn't have to be unique.
1354 1461 */
1355 1462 connfp = &ipst->ips_ipcl_conn_fanout[
1356 1463 IPCL_CONN_HASH(connp->conn_faddr_v4,
1357 1464 connp->conn_ports, ipst)];
1358 1465 mutex_enter(&connfp->connf_lock);
1359 1466 for (tconnp = connfp->connf_head; tconnp != NULL;
1360 1467 tconnp = tconnp->conn_next) {
1361 1468 if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
1362 1469 connp->conn_faddr_v4, connp->conn_laddr_v4,
1363 1470 connp->conn_ports) &&
1364 1471 IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1365 1472 /* Already have a conn. bail out */
1366 1473 mutex_exit(&connfp->connf_lock);
1367 1474 return (EADDRINUSE);
1368 1475 }
1369 1476 }
1370 1477 if (connp->conn_fanout != NULL) {
1371 1478 /*
1372 1479 * Probably a XTI/TLI application trying to do a
1373 1480 * rebind. Let it happen.
1374 1481 */
1375 1482 mutex_exit(&connfp->connf_lock);
1376 1483 IPCL_HASH_REMOVE(connp);
1377 1484 mutex_enter(&connfp->connf_lock);
1378 1485 }
1379 1486
1380 1487 ASSERT(connp->conn_recv != NULL);
1381 1488 ASSERT(connp->conn_recvicmp != NULL);
1382 1489
1383 1490 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1384 1491 mutex_exit(&connfp->connf_lock);
1385 1492 break;
↓ open down ↓ |
64 lines elided |
↑ open up ↑ |
1386 1493
1387 1494 case IPPROTO_SCTP:
1388 1495 /*
1389 1496 * The raw socket may have already been bound, remove it
1390 1497 * from the hash first.
1391 1498 */
1392 1499 IPCL_HASH_REMOVE(connp);
1393 1500 ret = ipcl_sctp_hash_insert(connp, lport);
1394 1501 break;
1395 1502
1503 + case IPPROTO_DCCP:
1504 + cmn_err(CE_NOTE, "ipclassifier.c: ipcl_conn_insert_v4");
1505 + connfp = &ipst->ips_ipcl_dccp_conn_fanout[IPCL_DCCP_CONN_HASH(
1506 + connp->conn_faddr_v4, connp->conn_ports, ipst)];
1507 + mutex_enter(&connfp->connf_lock);
1508 + for (tconnp = connfp->connf_head; tconnp != NULL;
1509 + tconnp = tconnp->conn_next) {
1510 + if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
1511 + connp->conn_faddr_v4, connp->conn_laddr_v4,
1512 + connp->conn_ports) &&
1513 + IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1514 + /* Already have a conn. bail out */
1515 + mutex_exit(&connfp->connf_lock);
1516 + return (EADDRINUSE);
1517 + }
1518 + }
1519 +
1520 + /* XXX:DCCP XTI/TLI application? */
1521 +
1522 + ASSERT(connp->conn_recv != NULL);
1523 + ASSERT(connp->conn_recvicmp != NULL);
1524 +
1525 + IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1526 + mutex_exit(&connfp->connf_lock);
1527 + break;
1528 +
1396 1529 default:
1397 1530 /*
1398 1531 * Check for conflicts among MAC exempt bindings. For
1399 1532 * transports with port numbers, this is done by the upper
1400 1533 * level per-transport binding logic. For all others, it's
1401 1534 * done here.
1402 1535 */
1403 1536 if (is_system_labeled() &&
1404 1537 check_exempt_conflict_v4(connp, ipst))
1405 1538 return (EADDRINUSE);
1406 1539 /* FALLTHROUGH */
1407 1540
1408 1541 case IPPROTO_UDP:
1409 1542 if (protocol == IPPROTO_UDP) {
1410 1543 connfp = &ipst->ips_ipcl_udp_fanout[
1411 1544 IPCL_UDP_HASH(lport, ipst)];
1412 1545 } else {
1413 1546 connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1414 1547 }
1415 1548
1416 1549 if (connp->conn_faddr_v4 != INADDR_ANY) {
1417 1550 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1418 1551 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1419 1552 IPCL_HASH_INSERT_BOUND(connfp, connp);
1420 1553 } else {
1421 1554 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1422 1555 }
1423 1556 break;
1424 1557 }
1425 1558
1426 1559 return (ret);
1427 1560 }
1428 1561
1429 1562 int
1430 1563 ipcl_conn_insert_v6(conn_t *connp)
1431 1564 {
1432 1565 connf_t *connfp;
1433 1566 conn_t *tconnp;
1434 1567 int ret = 0;
1435 1568 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1436 1569 uint16_t lport = connp->conn_lport;
1437 1570 uint8_t protocol = connp->conn_proto;
1438 1571 uint_t ifindex = connp->conn_bound_if;
1439 1572
1440 1573 if (IPCL_IS_IPTUN(connp))
1441 1574 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1442 1575
1443 1576 switch (protocol) {
1444 1577 case IPPROTO_TCP:
1445 1578
1446 1579 /*
1447 1580 * For tcp, we check whether the connection tuple already
1448 1581 * exists before allowing the connection to proceed. We
1449 1582 * also allow indexing on the zoneid. This is to allow
1450 1583 * multiple shared stack zones to have the same tcp
1451 1584 * connection tuple. In practice this only happens for
1452 1585 * ipv6_loopback as it's the only local address which
1453 1586 * doesn't have to be unique.
1454 1587 */
1455 1588 connfp = &ipst->ips_ipcl_conn_fanout[
1456 1589 IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
1457 1590 ipst)];
1458 1591 mutex_enter(&connfp->connf_lock);
1459 1592 for (tconnp = connfp->connf_head; tconnp != NULL;
1460 1593 tconnp = tconnp->conn_next) {
1461 1594 /* NOTE: need to match zoneid. Bug in onnv-gate */
1462 1595 if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
1463 1596 connp->conn_faddr_v6, connp->conn_laddr_v6,
1464 1597 connp->conn_ports) &&
1465 1598 (tconnp->conn_bound_if == 0 ||
1466 1599 tconnp->conn_bound_if == ifindex) &&
1467 1600 IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1468 1601 /* Already have a conn. bail out */
1469 1602 mutex_exit(&connfp->connf_lock);
1470 1603 return (EADDRINUSE);
1471 1604 }
1472 1605 }
1473 1606 if (connp->conn_fanout != NULL) {
1474 1607 /*
1475 1608 * Probably a XTI/TLI application trying to do a
1476 1609 * rebind. Let it happen.
1477 1610 */
1478 1611 mutex_exit(&connfp->connf_lock);
1479 1612 IPCL_HASH_REMOVE(connp);
1480 1613 mutex_enter(&connfp->connf_lock);
↓ open down ↓ |
75 lines elided |
↑ open up ↑ |
1481 1614 }
1482 1615 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1483 1616 mutex_exit(&connfp->connf_lock);
1484 1617 break;
1485 1618
1486 1619 case IPPROTO_SCTP:
1487 1620 IPCL_HASH_REMOVE(connp);
1488 1621 ret = ipcl_sctp_hash_insert(connp, lport);
1489 1622 break;
1490 1623
1624 + case IPPROTO_DCCP:
1625 + cmn_err(CE_NOTE, "ipclassifier.c: ipcl_conn_insert_v6");
1626 + connfp = &ipst->ips_ipcl_dccp_conn_fanout[
1627 + IPCL_DCCP_CONN_HASH_V6(connp->conn_faddr_v6,
1628 + connp->conn_ports, ipst)];
1629 + mutex_enter(&connfp->connf_lock);
1630 + for (tconnp = connfp->connf_head; tconnp != NULL;
1631 + tconnp = tconnp->conn_next) {
1632 + /* NOTE: need to match zoneid. Bug in onnv-gate */
1633 + if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
1634 + connp->conn_faddr_v6, connp->conn_laddr_v6,
1635 + connp->conn_ports) &&
1636 + (tconnp->conn_bound_if == 0 ||
1637 + tconnp->conn_bound_if == ifindex) &&
1638 + IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1639 + /* Already have a conn. bail out */
1640 + mutex_exit(&connfp->connf_lock);
1641 + return (EADDRINUSE);
1642 + }
1643 + }
1644 +
1645 + /* XXX:DCCP XTI/TLI? */
1646 + IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1647 + mutex_exit(&connfp->connf_lock);
1648 + break;
1649 +
1491 1650 default:
1492 1651 if (is_system_labeled() &&
1493 1652 check_exempt_conflict_v6(connp, ipst))
1494 1653 return (EADDRINUSE);
1495 1654 /* FALLTHROUGH */
1496 1655 case IPPROTO_UDP:
1497 1656 if (protocol == IPPROTO_UDP) {
1498 1657 connfp = &ipst->ips_ipcl_udp_fanout[
1499 1658 IPCL_UDP_HASH(lport, ipst)];
1500 1659 } else {
1501 1660 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1502 1661 }
1503 1662
1504 1663 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1505 1664 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1506 1665 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1507 1666 IPCL_HASH_INSERT_BOUND(connfp, connp);
1508 1667 } else {
1509 1668 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1510 1669 }
1511 1670 break;
1512 1671 }
1513 1672
1514 1673 return (ret);
1515 1674 }
1516 1675
1517 1676 /*
1518 1677 * v4 packet classifying function. looks up the fanout table to
1519 1678 * find the conn, the packet belongs to. returns the conn with
1520 1679 * the reference held, null otherwise.
1521 1680 *
1522 1681 * If zoneid is ALL_ZONES, then the search rules described in the "Connection
1523 1682 * Lookup" comment block are applied. Labels are also checked as described
1524 1683 * above. If the packet is from the inside (looped back), and is from the same
1525 1684 * zone, then label checks are omitted.
1526 1685 */
1527 1686 conn_t *
1528 1687 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1529 1688 ip_recv_attr_t *ira, ip_stack_t *ipst)
1530 1689 {
1531 1690 ipha_t *ipha;
1532 1691 connf_t *connfp, *bind_connfp;
1533 1692 uint16_t lport;
1534 1693 uint16_t fport;
1535 1694 uint32_t ports;
1536 1695 conn_t *connp;
1537 1696 uint16_t *up;
1538 1697 zoneid_t zoneid = ira->ira_zoneid;
1539 1698
1540 1699 ipha = (ipha_t *)mp->b_rptr;
1541 1700 up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
1542 1701
1543 1702 switch (protocol) {
1544 1703 case IPPROTO_TCP:
1545 1704 ports = *(uint32_t *)up;
1546 1705 connfp =
1547 1706 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1548 1707 ports, ipst)];
1549 1708 mutex_enter(&connfp->connf_lock);
1550 1709 for (connp = connfp->connf_head; connp != NULL;
1551 1710 connp = connp->conn_next) {
1552 1711 if (IPCL_CONN_MATCH(connp, protocol,
1553 1712 ipha->ipha_src, ipha->ipha_dst, ports) &&
1554 1713 (connp->conn_zoneid == zoneid ||
1555 1714 connp->conn_allzones ||
1556 1715 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1557 1716 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1558 1717 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1559 1718 break;
1560 1719 }
1561 1720
1562 1721 if (connp != NULL) {
1563 1722 /*
1564 1723 * We have a fully-bound TCP connection.
1565 1724 *
1566 1725 * For labeled systems, there's no need to check the
1567 1726 * label here. It's known to be good as we checked
1568 1727 * before allowing the connection to become bound.
1569 1728 */
1570 1729 CONN_INC_REF(connp);
1571 1730 mutex_exit(&connfp->connf_lock);
1572 1731 return (connp);
1573 1732 }
1574 1733
1575 1734 mutex_exit(&connfp->connf_lock);
1576 1735 lport = up[1];
1577 1736 bind_connfp =
1578 1737 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1579 1738 mutex_enter(&bind_connfp->connf_lock);
1580 1739 for (connp = bind_connfp->connf_head; connp != NULL;
1581 1740 connp = connp->conn_next) {
1582 1741 if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1583 1742 lport) &&
1584 1743 (connp->conn_zoneid == zoneid ||
1585 1744 connp->conn_allzones ||
1586 1745 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1587 1746 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1588 1747 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1589 1748 break;
1590 1749 }
1591 1750
1592 1751 /*
1593 1752 * If the matching connection is SLP on a private address, then
1594 1753 * the label on the packet must match the local zone's label.
1595 1754 * Otherwise, it must be in the label range defined by tnrh.
1596 1755 * This is ensured by tsol_receive_local.
1597 1756 *
1598 1757 * Note that we don't check tsol_receive_local for
1599 1758 * the connected case.
1600 1759 */
1601 1760 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1602 1761 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1603 1762 ira, connp)) {
1604 1763 DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
1605 1764 char *, "connp(1) could not receive mp(2)",
1606 1765 conn_t *, connp, mblk_t *, mp);
1607 1766 connp = NULL;
1608 1767 }
1609 1768
1610 1769 if (connp != NULL) {
1611 1770 /* Have a listener at least */
1612 1771 CONN_INC_REF(connp);
1613 1772 mutex_exit(&bind_connfp->connf_lock);
1614 1773 return (connp);
1615 1774 }
1616 1775
1617 1776 mutex_exit(&bind_connfp->connf_lock);
1618 1777 break;
1619 1778
1620 1779 case IPPROTO_UDP:
1621 1780 lport = up[1];
1622 1781 fport = up[0];
1623 1782 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1624 1783 mutex_enter(&connfp->connf_lock);
1625 1784 for (connp = connfp->connf_head; connp != NULL;
1626 1785 connp = connp->conn_next) {
1627 1786 if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
1628 1787 fport, ipha->ipha_src) &&
1629 1788 (connp->conn_zoneid == zoneid ||
1630 1789 connp->conn_allzones ||
1631 1790 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1632 1791 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
1633 1792 break;
1634 1793 }
1635 1794
1636 1795 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1637 1796 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1638 1797 ira, connp)) {
1639 1798 DTRACE_PROBE3(tx__ip__log__info__classify__udp,
1640 1799 char *, "connp(1) could not receive mp(2)",
1641 1800 conn_t *, connp, mblk_t *, mp);
1642 1801 connp = NULL;
1643 1802 }
1644 1803
1645 1804 if (connp != NULL) {
1646 1805 CONN_INC_REF(connp);
1647 1806 mutex_exit(&connfp->connf_lock);
↓ open down ↓ |
147 lines elided |
↑ open up ↑ |
1648 1807 return (connp);
1649 1808 }
1650 1809
1651 1810 /*
1652 1811 * We shouldn't come here for multicast/broadcast packets
1653 1812 */
1654 1813 mutex_exit(&connfp->connf_lock);
1655 1814
1656 1815 break;
1657 1816
1817 + case IPPROTO_DCCP:
1818 + ports = *(uint32_t *)up;
1819 +
1820 + /*
1821 + * Search for fully-bound connection.
1822 + */
1823 + connfp = &ipst->ips_ipcl_dccp_conn_fanout[IPCL_DCCP_CONN_HASH(
1824 + ipha->ipha_src, ports, ipst)];
1825 + mutex_enter(&connfp->connf_lock);
1826 + for (connp = connfp->connf_head; connp != NULL;
1827 + connp = connp->conn_next) {
1828 + /* XXX:DCCP */
1829 + if (IPCL_CONN_MATCH(connp, protocol,
1830 + ipha->ipha_src, ipha->ipha_dst, ports)) {
1831 + /* XXX */
1832 + cmn_err(CE_NOTE, "ipclassifier.c: fully bound connection found");
1833 + break;
1834 + }
1835 + }
1836 +
1837 + if (connp != NULL) {
1838 + /*
1839 + * We have a fully-bound DCCP connection.
1840 + */
1841 + CONN_INC_REF(connp);
1842 + mutex_exit(&connfp->connf_lock);
1843 + return (connp);
1844 + }
1845 +
1846 + mutex_exit(&connfp->connf_lock);
1847 + lport = up[1];
1848 +
1849 + /*
1850 + * Fully-bound connection was not found, search for listener.
1851 + */
1852 + bind_connfp = &ipst->ips_ipcl_dccp_bind_fanout[
1853 + IPCL_DCCP_BIND_HASH(lport, ipst)];
1854 + mutex_enter(&bind_connfp->connf_lock);
1855 + for (connp = bind_connfp->connf_head; connp != NULL;
1856 + connp = connp->conn_next) {
1857 + if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1858 + lport) &&
1859 + (connp->conn_zoneid == zoneid ||
1860 + connp->conn_allzones ||
1861 + ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1862 + (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1863 + (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1864 + break;
1865 + }
1866 +
1867 + if (connp != NULL) {
1868 + cmn_err(CE_NOTE, "ipclassifier.c: half-bound bind listener");
1869 + /* Have a listener at least */
1870 + CONN_INC_REF(connp);
1871 + mutex_exit(&bind_connfp->connf_lock);
1872 + return (connp);
1873 + }
1874 +
1875 + mutex_exit(&bind_connfp->connf_lock);
1876 + break;
1877 +
1658 1878 case IPPROTO_ENCAP:
1659 1879 case IPPROTO_IPV6:
1660 1880 return (ipcl_iptun_classify_v4(&ipha->ipha_src,
1661 1881 &ipha->ipha_dst, ipst));
1662 1882 }
1663 1883
1664 1884 return (NULL);
1665 1885 }
1666 1886
1667 1887 conn_t *
1668 1888 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1669 1889 ip_recv_attr_t *ira, ip_stack_t *ipst)
1670 1890 {
1671 1891 ip6_t *ip6h;
1672 1892 connf_t *connfp, *bind_connfp;
1673 1893 uint16_t lport;
1674 1894 uint16_t fport;
1675 1895 tcpha_t *tcpha;
1676 1896 uint32_t ports;
1677 1897 conn_t *connp;
1678 1898 uint16_t *up;
1679 1899 zoneid_t zoneid = ira->ira_zoneid;
1680 1900
1681 1901 ip6h = (ip6_t *)mp->b_rptr;
1682 1902
1683 1903 switch (protocol) {
1684 1904 case IPPROTO_TCP:
1685 1905 tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
1686 1906 up = &tcpha->tha_lport;
1687 1907 ports = *(uint32_t *)up;
1688 1908
1689 1909 connfp =
1690 1910 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1691 1911 ports, ipst)];
1692 1912 mutex_enter(&connfp->connf_lock);
1693 1913 for (connp = connfp->connf_head; connp != NULL;
1694 1914 connp = connp->conn_next) {
1695 1915 if (IPCL_CONN_MATCH_V6(connp, protocol,
1696 1916 ip6h->ip6_src, ip6h->ip6_dst, ports) &&
1697 1917 (connp->conn_zoneid == zoneid ||
1698 1918 connp->conn_allzones ||
1699 1919 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1700 1920 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1701 1921 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1702 1922 break;
1703 1923 }
1704 1924
1705 1925 if (connp != NULL) {
1706 1926 /*
1707 1927 * We have a fully-bound TCP connection.
1708 1928 *
1709 1929 * For labeled systems, there's no need to check the
1710 1930 * label here. It's known to be good as we checked
1711 1931 * before allowing the connection to become bound.
1712 1932 */
1713 1933 CONN_INC_REF(connp);
1714 1934 mutex_exit(&connfp->connf_lock);
1715 1935 return (connp);
1716 1936 }
1717 1937
1718 1938 mutex_exit(&connfp->connf_lock);
1719 1939
1720 1940 lport = up[1];
1721 1941 bind_connfp =
1722 1942 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1723 1943 mutex_enter(&bind_connfp->connf_lock);
1724 1944 for (connp = bind_connfp->connf_head; connp != NULL;
1725 1945 connp = connp->conn_next) {
1726 1946 if (IPCL_BIND_MATCH_V6(connp, protocol,
1727 1947 ip6h->ip6_dst, lport) &&
1728 1948 (connp->conn_zoneid == zoneid ||
1729 1949 connp->conn_allzones ||
1730 1950 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1731 1951 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1732 1952 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1733 1953 break;
1734 1954 }
1735 1955
1736 1956 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1737 1957 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1738 1958 ira, connp)) {
1739 1959 DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
1740 1960 char *, "connp(1) could not receive mp(2)",
1741 1961 conn_t *, connp, mblk_t *, mp);
1742 1962 connp = NULL;
1743 1963 }
1744 1964
1745 1965 if (connp != NULL) {
1746 1966 /* Have a listner at least */
1747 1967 CONN_INC_REF(connp);
1748 1968 mutex_exit(&bind_connfp->connf_lock);
1749 1969 return (connp);
1750 1970 }
1751 1971
1752 1972 mutex_exit(&bind_connfp->connf_lock);
1753 1973 break;
1754 1974
1755 1975 case IPPROTO_UDP:
1756 1976 up = (uint16_t *)&mp->b_rptr[hdr_len];
1757 1977 lport = up[1];
1758 1978 fport = up[0];
1759 1979 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1760 1980 mutex_enter(&connfp->connf_lock);
1761 1981 for (connp = connfp->connf_head; connp != NULL;
1762 1982 connp = connp->conn_next) {
1763 1983 if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
1764 1984 fport, ip6h->ip6_src) &&
1765 1985 (connp->conn_zoneid == zoneid ||
1766 1986 connp->conn_allzones ||
1767 1987 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1768 1988 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1769 1989 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1770 1990 break;
1771 1991 }
1772 1992
1773 1993 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1774 1994 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1775 1995 ira, connp)) {
1776 1996 DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
1777 1997 char *, "connp(1) could not receive mp(2)",
1778 1998 conn_t *, connp, mblk_t *, mp);
1779 1999 connp = NULL;
1780 2000 }
1781 2001
1782 2002 if (connp != NULL) {
1783 2003 CONN_INC_REF(connp);
1784 2004 mutex_exit(&connfp->connf_lock);
1785 2005 return (connp);
1786 2006 }
1787 2007
1788 2008 /*
1789 2009 * We shouldn't come here for multicast/broadcast packets
1790 2010 */
1791 2011 mutex_exit(&connfp->connf_lock);
1792 2012 break;
1793 2013 case IPPROTO_ENCAP:
1794 2014 case IPPROTO_IPV6:
1795 2015 return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
1796 2016 &ip6h->ip6_dst, ipst));
1797 2017 }
1798 2018
1799 2019 return (NULL);
1800 2020 }
1801 2021
1802 2022 /*
1803 2023 * wrapper around ipcl_classify_(v4,v6) routines.
1804 2024 */
1805 2025 conn_t *
1806 2026 ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
1807 2027 {
1808 2028 if (ira->ira_flags & IRAF_IS_IPV4) {
1809 2029 return (ipcl_classify_v4(mp, ira->ira_protocol,
1810 2030 ira->ira_ip_hdr_length, ira, ipst));
1811 2031 } else {
1812 2032 return (ipcl_classify_v6(mp, ira->ira_protocol,
1813 2033 ira->ira_ip_hdr_length, ira, ipst));
1814 2034 }
1815 2035 }
1816 2036
1817 2037 /*
1818 2038 * Only used to classify SCTP RAW sockets
1819 2039 */
1820 2040 conn_t *
1821 2041 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
1822 2042 ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
1823 2043 {
1824 2044 connf_t *connfp;
1825 2045 conn_t *connp;
1826 2046 in_port_t lport;
1827 2047 int ipversion;
1828 2048 const void *dst;
1829 2049 zoneid_t zoneid = ira->ira_zoneid;
1830 2050
1831 2051 lport = ((uint16_t *)&ports)[1];
1832 2052 if (ira->ira_flags & IRAF_IS_IPV4) {
1833 2053 dst = (const void *)&ipha->ipha_dst;
1834 2054 ipversion = IPV4_VERSION;
1835 2055 } else {
1836 2056 dst = (const void *)&ip6h->ip6_dst;
1837 2057 ipversion = IPV6_VERSION;
1838 2058 }
1839 2059
1840 2060 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1841 2061 mutex_enter(&connfp->connf_lock);
1842 2062 for (connp = connfp->connf_head; connp != NULL;
1843 2063 connp = connp->conn_next) {
1844 2064 /* We don't allow v4 fallback for v6 raw socket. */
1845 2065 if (ipversion != connp->conn_ipversion)
1846 2066 continue;
1847 2067 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1848 2068 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1849 2069 if (ipversion == IPV4_VERSION) {
1850 2070 if (!IPCL_CONN_MATCH(connp, protocol,
1851 2071 ipha->ipha_src, ipha->ipha_dst, ports))
1852 2072 continue;
1853 2073 } else {
1854 2074 if (!IPCL_CONN_MATCH_V6(connp, protocol,
1855 2075 ip6h->ip6_src, ip6h->ip6_dst, ports))
1856 2076 continue;
1857 2077 }
1858 2078 } else {
1859 2079 if (ipversion == IPV4_VERSION) {
1860 2080 if (!IPCL_BIND_MATCH(connp, protocol,
1861 2081 ipha->ipha_dst, lport))
1862 2082 continue;
1863 2083 } else {
1864 2084 if (!IPCL_BIND_MATCH_V6(connp, protocol,
1865 2085 ip6h->ip6_dst, lport))
1866 2086 continue;
1867 2087 }
1868 2088 }
1869 2089
1870 2090 if (connp->conn_zoneid == zoneid ||
1871 2091 connp->conn_allzones ||
1872 2092 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1873 2093 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1874 2094 (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
1875 2095 break;
1876 2096 }
1877 2097
1878 2098 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1879 2099 !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
1880 2100 DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
1881 2101 char *, "connp(1) could not receive mp(2)",
1882 2102 conn_t *, connp, mblk_t *, mp);
1883 2103 connp = NULL;
1884 2104 }
1885 2105
1886 2106 if (connp != NULL)
1887 2107 goto found;
1888 2108 mutex_exit(&connfp->connf_lock);
1889 2109
1890 2110 /* Try to look for a wildcard SCTP RAW socket match. */
1891 2111 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
1892 2112 mutex_enter(&connfp->connf_lock);
1893 2113 for (connp = connfp->connf_head; connp != NULL;
1894 2114 connp = connp->conn_next) {
1895 2115 /* We don't allow v4 fallback for v6 raw socket. */
1896 2116 if (ipversion != connp->conn_ipversion)
1897 2117 continue;
1898 2118 if (!IPCL_ZONE_MATCH(connp, zoneid))
1899 2119 continue;
1900 2120
1901 2121 if (ipversion == IPV4_VERSION) {
1902 2122 if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
1903 2123 break;
1904 2124 } else {
1905 2125 if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
1906 2126 break;
1907 2127 }
1908 2128 }
1909 2129 }
1910 2130
1911 2131 if (connp != NULL)
1912 2132 goto found;
1913 2133
1914 2134 mutex_exit(&connfp->connf_lock);
1915 2135 return (NULL);
1916 2136
1917 2137 found:
1918 2138 ASSERT(connp != NULL);
1919 2139 CONN_INC_REF(connp);
1920 2140 mutex_exit(&connfp->connf_lock);
1921 2141 return (connp);
1922 2142 }
1923 2143
1924 2144 /* ARGSUSED */
1925 2145 static int
1926 2146 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
1927 2147 {
1928 2148 itc_t *itc = (itc_t *)buf;
1929 2149 conn_t *connp = &itc->itc_conn;
1930 2150 tcp_t *tcp = (tcp_t *)&itc[1];
1931 2151
1932 2152 bzero(connp, sizeof (conn_t));
1933 2153 bzero(tcp, sizeof (tcp_t));
1934 2154
1935 2155 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1936 2156 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1937 2157 cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
1938 2158 tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
1939 2159 if (tcp->tcp_timercache == NULL)
1940 2160 return (ENOMEM);
1941 2161 connp->conn_tcp = tcp;
1942 2162 connp->conn_flags = IPCL_TCPCONN;
1943 2163 connp->conn_proto = IPPROTO_TCP;
1944 2164 tcp->tcp_connp = connp;
1945 2165 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1946 2166
1947 2167 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1948 2168 if (connp->conn_ixa == NULL) {
1949 2169 tcp_timermp_free(tcp);
1950 2170 return (ENOMEM);
1951 2171 }
1952 2172 connp->conn_ixa->ixa_refcnt = 1;
1953 2173 connp->conn_ixa->ixa_protocol = connp->conn_proto;
1954 2174 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
1955 2175 return (0);
1956 2176 }
1957 2177
1958 2178 /* ARGSUSED */
1959 2179 static void
1960 2180 tcp_conn_destructor(void *buf, void *cdrarg)
1961 2181 {
1962 2182 itc_t *itc = (itc_t *)buf;
1963 2183 conn_t *connp = &itc->itc_conn;
1964 2184 tcp_t *tcp = (tcp_t *)&itc[1];
1965 2185
1966 2186 ASSERT(connp->conn_flags & IPCL_TCPCONN);
1967 2187 ASSERT(tcp->tcp_connp == connp);
1968 2188 ASSERT(connp->conn_tcp == tcp);
1969 2189 tcp_timermp_free(tcp);
1970 2190 mutex_destroy(&connp->conn_lock);
1971 2191 cv_destroy(&connp->conn_cv);
1972 2192 cv_destroy(&connp->conn_sq_cv);
1973 2193 rw_destroy(&connp->conn_ilg_lock);
1974 2194
1975 2195 /* Can be NULL if constructor failed */
1976 2196 if (connp->conn_ixa != NULL) {
1977 2197 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
1978 2198 ASSERT(connp->conn_ixa->ixa_ire == NULL);
1979 2199 ASSERT(connp->conn_ixa->ixa_nce == NULL);
1980 2200 ixa_refrele(connp->conn_ixa);
1981 2201 }
1982 2202 }
1983 2203
1984 2204 /* ARGSUSED */
1985 2205 static int
1986 2206 ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
1987 2207 {
1988 2208 itc_t *itc = (itc_t *)buf;
1989 2209 conn_t *connp = &itc->itc_conn;
1990 2210
1991 2211 bzero(connp, sizeof (conn_t));
1992 2212 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1993 2213 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1994 2214 connp->conn_flags = IPCL_IPCCONN;
1995 2215 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1996 2216
1997 2217 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1998 2218 if (connp->conn_ixa == NULL)
1999 2219 return (ENOMEM);
2000 2220 connp->conn_ixa->ixa_refcnt = 1;
2001 2221 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2002 2222 return (0);
2003 2223 }
2004 2224
2005 2225 /* ARGSUSED */
2006 2226 static void
2007 2227 ip_conn_destructor(void *buf, void *cdrarg)
2008 2228 {
2009 2229 itc_t *itc = (itc_t *)buf;
2010 2230 conn_t *connp = &itc->itc_conn;
2011 2231
2012 2232 ASSERT(connp->conn_flags & IPCL_IPCCONN);
2013 2233 ASSERT(connp->conn_priv == NULL);
2014 2234 mutex_destroy(&connp->conn_lock);
2015 2235 cv_destroy(&connp->conn_cv);
2016 2236 rw_destroy(&connp->conn_ilg_lock);
2017 2237
2018 2238 /* Can be NULL if constructor failed */
2019 2239 if (connp->conn_ixa != NULL) {
2020 2240 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2021 2241 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2022 2242 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2023 2243 ixa_refrele(connp->conn_ixa);
2024 2244 }
2025 2245 }
2026 2246
2027 2247 /* ARGSUSED */
2028 2248 static int
2029 2249 udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2030 2250 {
2031 2251 itc_t *itc = (itc_t *)buf;
2032 2252 conn_t *connp = &itc->itc_conn;
2033 2253 udp_t *udp = (udp_t *)&itc[1];
2034 2254
2035 2255 bzero(connp, sizeof (conn_t));
2036 2256 bzero(udp, sizeof (udp_t));
2037 2257
2038 2258 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2039 2259 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2040 2260 connp->conn_udp = udp;
2041 2261 connp->conn_flags = IPCL_UDPCONN;
2042 2262 connp->conn_proto = IPPROTO_UDP;
2043 2263 udp->udp_connp = connp;
2044 2264 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2045 2265 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2046 2266 if (connp->conn_ixa == NULL)
2047 2267 return (ENOMEM);
2048 2268 connp->conn_ixa->ixa_refcnt = 1;
2049 2269 connp->conn_ixa->ixa_protocol = connp->conn_proto;
2050 2270 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2051 2271 return (0);
2052 2272 }
2053 2273
2054 2274 /* ARGSUSED */
2055 2275 static void
2056 2276 udp_conn_destructor(void *buf, void *cdrarg)
2057 2277 {
2058 2278 itc_t *itc = (itc_t *)buf;
2059 2279 conn_t *connp = &itc->itc_conn;
2060 2280 udp_t *udp = (udp_t *)&itc[1];
2061 2281
2062 2282 ASSERT(connp->conn_flags & IPCL_UDPCONN);
2063 2283 ASSERT(udp->udp_connp == connp);
2064 2284 ASSERT(connp->conn_udp == udp);
2065 2285 mutex_destroy(&connp->conn_lock);
2066 2286 cv_destroy(&connp->conn_cv);
2067 2287 rw_destroy(&connp->conn_ilg_lock);
2068 2288
2069 2289 /* Can be NULL if constructor failed */
2070 2290 if (connp->conn_ixa != NULL) {
2071 2291 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2072 2292 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2073 2293 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2074 2294 ixa_refrele(connp->conn_ixa);
2075 2295 }
2076 2296 }
2077 2297
2078 2298 /* ARGSUSED */
2079 2299 static int
2080 2300 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2081 2301 {
2082 2302 itc_t *itc = (itc_t *)buf;
2083 2303 conn_t *connp = &itc->itc_conn;
2084 2304 icmp_t *icmp = (icmp_t *)&itc[1];
2085 2305
2086 2306 bzero(connp, sizeof (conn_t));
2087 2307 bzero(icmp, sizeof (icmp_t));
2088 2308
2089 2309 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2090 2310 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2091 2311 connp->conn_icmp = icmp;
2092 2312 connp->conn_flags = IPCL_RAWIPCONN;
2093 2313 connp->conn_proto = IPPROTO_ICMP;
2094 2314 icmp->icmp_connp = connp;
2095 2315 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2096 2316 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2097 2317 if (connp->conn_ixa == NULL)
2098 2318 return (ENOMEM);
2099 2319 connp->conn_ixa->ixa_refcnt = 1;
2100 2320 connp->conn_ixa->ixa_protocol = connp->conn_proto;
2101 2321 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2102 2322 return (0);
2103 2323 }
2104 2324
2105 2325 /* ARGSUSED */
2106 2326 static void
2107 2327 rawip_conn_destructor(void *buf, void *cdrarg)
2108 2328 {
2109 2329 itc_t *itc = (itc_t *)buf;
2110 2330 conn_t *connp = &itc->itc_conn;
2111 2331 icmp_t *icmp = (icmp_t *)&itc[1];
2112 2332
2113 2333 ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2114 2334 ASSERT(icmp->icmp_connp == connp);
2115 2335 ASSERT(connp->conn_icmp == icmp);
2116 2336 mutex_destroy(&connp->conn_lock);
2117 2337 cv_destroy(&connp->conn_cv);
2118 2338 rw_destroy(&connp->conn_ilg_lock);
2119 2339
2120 2340 /* Can be NULL if constructor failed */
2121 2341 if (connp->conn_ixa != NULL) {
2122 2342 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2123 2343 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2124 2344 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2125 2345 ixa_refrele(connp->conn_ixa);
2126 2346 }
2127 2347 }
2128 2348
2129 2349 /* ARGSUSED */
2130 2350 static int
2131 2351 rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
2132 2352 {
2133 2353 itc_t *itc = (itc_t *)buf;
2134 2354 conn_t *connp = &itc->itc_conn;
2135 2355 rts_t *rts = (rts_t *)&itc[1];
2136 2356
2137 2357 bzero(connp, sizeof (conn_t));
2138 2358 bzero(rts, sizeof (rts_t));
2139 2359
2140 2360 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2141 2361 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2142 2362 connp->conn_rts = rts;
2143 2363 connp->conn_flags = IPCL_RTSCONN;
2144 2364 rts->rts_connp = connp;
2145 2365 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2146 2366 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2147 2367 if (connp->conn_ixa == NULL)
2148 2368 return (ENOMEM);
2149 2369 connp->conn_ixa->ixa_refcnt = 1;
2150 2370 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2151 2371 return (0);
2152 2372 }
2153 2373
2154 2374 /* ARGSUSED */
2155 2375 static void
2156 2376 rts_conn_destructor(void *buf, void *cdrarg)
2157 2377 {
2158 2378 itc_t *itc = (itc_t *)buf;
2159 2379 conn_t *connp = &itc->itc_conn;
2160 2380 rts_t *rts = (rts_t *)&itc[1];
2161 2381
2162 2382 ASSERT(connp->conn_flags & IPCL_RTSCONN);
2163 2383 ASSERT(rts->rts_connp == connp);
2164 2384 ASSERT(connp->conn_rts == rts);
2165 2385 mutex_destroy(&connp->conn_lock);
2166 2386 cv_destroy(&connp->conn_cv);
2167 2387 rw_destroy(&connp->conn_ilg_lock);
↓ open down ↓ |
500 lines elided |
↑ open up ↑ |
2168 2388
2169 2389 /* Can be NULL if constructor failed */
2170 2390 if (connp->conn_ixa != NULL) {
2171 2391 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2172 2392 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2173 2393 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2174 2394 ixa_refrele(connp->conn_ixa);
2175 2395 }
2176 2396 }
2177 2397
2398 +/* ARGSUSED */
2399 +static int
2400 +dccp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2401 +{
2402 + itc_t *itc = (itc_t *)buf;
2403 + conn_t *connp = &itc->itc_conn;
2404 + dccp_t *dccp = (dccp_t *)&itc[1];
2405 +
2406 + bzero(connp, sizeof (conn_t));
2407 + bzero(dccp, sizeof (dccp_t));
2408 +
2409 + mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2410 + cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2411 + rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2412 +
2413 + dccp->dccp_timercache = dccp_timermp_alloc(kmflags);
2414 + if (dccp->dccp_timercache == NULL) {
2415 + return (ENOMEM);
2416 + }
2417 +
2418 + connp->conn_dccp = dccp;
2419 + connp->conn_flags = IPCL_DCCPCONN;
2420 + connp->conn_proto = IPPROTO_DCCP;
2421 + dccp->dccp_connp = connp;
2422 +
2423 + connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2424 + if (connp->conn_ixa == NULL) {
2425 + return (NULL);
2426 + }
2427 +
2428 + connp->conn_ixa->ixa_refcnt = 1;
2429 + connp->conn_ixa->ixa_protocol = connp->conn_proto;
2430 + connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2431 +
2432 + return (0);
2433 +}
2434 +
2435 +/* ARGSUSED */
2436 +static void
2437 +dccp_conn_destructor(void *buf, void *cdrarg)
2438 +{
2439 + itc_t *itc = (itc_t *)buf;
2440 + conn_t *connp = &itc->itc_conn;
2441 + dccp_t *dccp = (dccp_t *)&itc[1];
2442 +
2443 + ASSERT(connp->conn_flags & IPCL_DCCPCONN);
2444 + ASSERT(dccp->dccp_connp == connp);
2445 + ASSERT(connp->conn_dccp == dccp);
2446 +
2447 + dccp_timermp_free(dccp);
2448 +
2449 + mutex_destroy(&connp->conn_lock);
2450 + cv_destroy(&connp->conn_cv);
2451 + rw_destroy(&connp->conn_ilg_lock);
2452 +
2453 + if (connp->conn_ixa != NULL) {
2454 + ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2455 + ASSERT(connp->conn_ixa->ixa_ire == NULL);
2456 + ASSERT(connp->conn_ixa->ixa_nce == NULL);
2457 +
2458 + ixa_refrele(connp->conn_ixa);
2459 + }
2460 +}
2461 +
2178 2462 /*
2179 2463 * Called as part of ipcl_conn_destroy to assert and clear any pointers
2180 2464 * in the conn_t.
2181 2465 *
2182 2466 * Below we list all the pointers in the conn_t as a documentation aid.
2183 2467 * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
2184 2468 * If you add any pointers to the conn_t please add an ASSERT here
2185 2469 * and #ifdef it out if it can't be actually asserted to be NULL.
2186 2470 * In any case, we bzero most of the conn_t at the end of the function.
2187 2471 */
2188 2472 void
2189 2473 ipcl_conn_cleanup(conn_t *connp)
2190 2474 {
2191 2475 ip_xmit_attr_t *ixa;
2192 2476
2193 2477 ASSERT(connp->conn_latch == NULL);
2194 2478 ASSERT(connp->conn_latch_in_policy == NULL);
2195 2479 ASSERT(connp->conn_latch_in_action == NULL);
2196 2480 #ifdef notdef
2197 2481 ASSERT(connp->conn_rq == NULL);
2198 2482 ASSERT(connp->conn_wq == NULL);
2199 2483 #endif
2200 2484 ASSERT(connp->conn_cred == NULL);
2201 2485 ASSERT(connp->conn_g_fanout == NULL);
2202 2486 ASSERT(connp->conn_g_next == NULL);
2203 2487 ASSERT(connp->conn_g_prev == NULL);
2204 2488 ASSERT(connp->conn_policy == NULL);
2205 2489 ASSERT(connp->conn_fanout == NULL);
2206 2490 ASSERT(connp->conn_next == NULL);
2207 2491 ASSERT(connp->conn_prev == NULL);
2208 2492 ASSERT(connp->conn_oper_pending_ill == NULL);
2209 2493 ASSERT(connp->conn_ilg == NULL);
2210 2494 ASSERT(connp->conn_drain_next == NULL);
2211 2495 ASSERT(connp->conn_drain_prev == NULL);
2212 2496 #ifdef notdef
2213 2497 /* conn_idl is not cleared when removed from idl list */
2214 2498 ASSERT(connp->conn_idl == NULL);
2215 2499 #endif
2216 2500 ASSERT(connp->conn_ipsec_opt_mp == NULL);
2217 2501 #ifdef notdef
2218 2502 /* conn_netstack is cleared by the caller; needed by ixa_cleanup */
2219 2503 ASSERT(connp->conn_netstack == NULL);
2220 2504 #endif
2221 2505
2222 2506 ASSERT(connp->conn_helper_info == NULL);
2223 2507 ASSERT(connp->conn_ixa != NULL);
2224 2508 ixa = connp->conn_ixa;
2225 2509 ASSERT(ixa->ixa_refcnt == 1);
2226 2510 /* Need to preserve ixa_protocol */
2227 2511 ixa_cleanup(ixa);
2228 2512 ixa->ixa_flags = 0;
2229 2513
2230 2514 /* Clear out the conn_t fields that are not preserved */
2231 2515 bzero(&connp->conn_start_clr,
2232 2516 sizeof (conn_t) -
2233 2517 ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
2234 2518 }
2235 2519
2236 2520 /*
2237 2521 * All conns are inserted in a global multi-list for the benefit of
2238 2522 * walkers. The walk is guaranteed to walk all open conns at the time
2239 2523 * of the start of the walk exactly once. This property is needed to
2240 2524 * achieve some cleanups during unplumb of interfaces. This is achieved
2241 2525 * as follows.
2242 2526 *
2243 2527 * ipcl_conn_create and ipcl_conn_destroy are the only functions that
2244 2528 * call the insert and delete functions below at creation and deletion
2245 2529 * time respectively. The conn never moves or changes its position in this
2246 2530 * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
2247 2531 * won't increase due to walkers, once the conn deletion has started. Note
2248 2532 * that we can't remove the conn from the global list and then wait for
2249 2533 * the refcnt to drop to zero, since walkers would then see a truncated
2250 2534 * list. CONN_INCIPIENT ensures that walkers don't start looking at
2251 2535 * conns until ip_open is ready to make them globally visible.
2252 2536 * The global round robin multi-list locks are held only to get the
2253 2537 * next member/insertion/deletion and contention should be negligible
2254 2538 * if the multi-list is much greater than the number of cpus.
2255 2539 */
2256 2540 void
2257 2541 ipcl_globalhash_insert(conn_t *connp)
2258 2542 {
2259 2543 int index;
2260 2544 struct connf_s *connfp;
2261 2545 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
2262 2546
2263 2547 /*
2264 2548 * No need for atomic here. Approximate even distribution
2265 2549 * in the global lists is sufficient.
2266 2550 */
2267 2551 ipst->ips_conn_g_index++;
2268 2552 index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
2269 2553
2270 2554 connp->conn_g_prev = NULL;
2271 2555 /*
2272 2556 * Mark as INCIPIENT, so that walkers will ignore this
2273 2557 * for now, till ip_open is ready to make it visible globally.
2274 2558 */
2275 2559 connp->conn_state_flags |= CONN_INCIPIENT;
2276 2560
2277 2561 connfp = &ipst->ips_ipcl_globalhash_fanout[index];
2278 2562 /* Insert at the head of the list */
2279 2563 mutex_enter(&connfp->connf_lock);
2280 2564 connp->conn_g_next = connfp->connf_head;
2281 2565 if (connp->conn_g_next != NULL)
2282 2566 connp->conn_g_next->conn_g_prev = connp;
2283 2567 connfp->connf_head = connp;
2284 2568
2285 2569 /* The fanout bucket this conn points to */
2286 2570 connp->conn_g_fanout = connfp;
2287 2571
2288 2572 mutex_exit(&connfp->connf_lock);
2289 2573 }
2290 2574
2291 2575 void
2292 2576 ipcl_globalhash_remove(conn_t *connp)
2293 2577 {
2294 2578 struct connf_s *connfp;
2295 2579
2296 2580 /*
2297 2581 * We were never inserted in the global multi list.
2298 2582 * IPCL_NONE variety is never inserted in the global multilist
2299 2583 * since it is presumed to not need any cleanup and is transient.
2300 2584 */
2301 2585 if (connp->conn_g_fanout == NULL)
2302 2586 return;
2303 2587
2304 2588 connfp = connp->conn_g_fanout;
2305 2589 mutex_enter(&connfp->connf_lock);
2306 2590 if (connp->conn_g_prev != NULL)
2307 2591 connp->conn_g_prev->conn_g_next = connp->conn_g_next;
2308 2592 else
2309 2593 connfp->connf_head = connp->conn_g_next;
2310 2594 if (connp->conn_g_next != NULL)
2311 2595 connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2312 2596 mutex_exit(&connfp->connf_lock);
2313 2597
2314 2598 /* Better to stumble on a null pointer than to corrupt memory */
2315 2599 connp->conn_g_next = NULL;
2316 2600 connp->conn_g_prev = NULL;
2317 2601 connp->conn_g_fanout = NULL;
2318 2602 }
2319 2603
2320 2604 /*
2321 2605 * Walk the list of all conn_t's in the system, calling the function provided
2322 2606 * With the specified argument for each.
2323 2607 * Applies to both IPv4 and IPv6.
2324 2608 *
2325 2609 * CONNs may hold pointers to ills (conn_dhcpinit_ill and
2326 2610 * conn_oper_pending_ill). To guard against stale pointers
2327 2611 * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
2328 2612 * unplumbed or removed. New conn_t's that are created while we are walking
2329 2613 * may be missed by this walk, because they are not necessarily inserted
2330 2614 * at the tail of the list. They are new conn_t's and thus don't have any
2331 2615 * stale pointers. The CONN_CLOSING flag ensures that no new reference
2332 2616 * is created to the struct that is going away.
2333 2617 */
2334 2618 void
2335 2619 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
2336 2620 {
2337 2621 int i;
2338 2622 conn_t *connp;
2339 2623 conn_t *prev_connp;
2340 2624
2341 2625 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2342 2626 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2343 2627 prev_connp = NULL;
2344 2628 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
2345 2629 while (connp != NULL) {
2346 2630 mutex_enter(&connp->conn_lock);
2347 2631 if (connp->conn_state_flags &
2348 2632 (CONN_CONDEMNED | CONN_INCIPIENT)) {
2349 2633 mutex_exit(&connp->conn_lock);
2350 2634 connp = connp->conn_g_next;
2351 2635 continue;
2352 2636 }
2353 2637 CONN_INC_REF_LOCKED(connp);
2354 2638 mutex_exit(&connp->conn_lock);
2355 2639 mutex_exit(
2356 2640 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2357 2641 (*func)(connp, arg);
2358 2642 if (prev_connp != NULL)
2359 2643 CONN_DEC_REF(prev_connp);
2360 2644 mutex_enter(
2361 2645 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2362 2646 prev_connp = connp;
2363 2647 connp = connp->conn_g_next;
2364 2648 }
2365 2649 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2366 2650 if (prev_connp != NULL)
2367 2651 CONN_DEC_REF(prev_connp);
2368 2652 }
2369 2653 }
2370 2654
2371 2655 /*
2372 2656 * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
2373 2657 * the {src, dst, lport, fport} quadruplet. Returns with conn reference
2374 2658 * held; caller must call CONN_DEC_REF. Only checks for connected entries
2375 2659 * (peer tcp in ESTABLISHED state).
2376 2660 */
2377 2661 conn_t *
2378 2662 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
2379 2663 ip_stack_t *ipst)
2380 2664 {
2381 2665 uint32_t ports;
2382 2666 uint16_t *pports = (uint16_t *)&ports;
2383 2667 connf_t *connfp;
2384 2668 conn_t *tconnp;
2385 2669 boolean_t zone_chk;
2386 2670
2387 2671 /*
2388 2672 * If either the source of destination address is loopback, then
2389 2673 * both endpoints must be in the same Zone. Otherwise, both of
2390 2674 * the addresses are system-wide unique (tcp is in ESTABLISHED
2391 2675 * state) and the endpoints may reside in different Zones.
2392 2676 */
2393 2677 zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
2394 2678 ipha->ipha_dst == htonl(INADDR_LOOPBACK));
2395 2679
2396 2680 pports[0] = tcpha->tha_fport;
2397 2681 pports[1] = tcpha->tha_lport;
2398 2682
2399 2683 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2400 2684 ports, ipst)];
2401 2685
2402 2686 mutex_enter(&connfp->connf_lock);
2403 2687 for (tconnp = connfp->connf_head; tconnp != NULL;
2404 2688 tconnp = tconnp->conn_next) {
2405 2689
2406 2690 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2407 2691 ipha->ipha_dst, ipha->ipha_src, ports) &&
2408 2692 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2409 2693 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2410 2694
2411 2695 ASSERT(tconnp != connp);
2412 2696 CONN_INC_REF(tconnp);
2413 2697 mutex_exit(&connfp->connf_lock);
2414 2698 return (tconnp);
2415 2699 }
2416 2700 }
2417 2701 mutex_exit(&connfp->connf_lock);
2418 2702 return (NULL);
2419 2703 }
2420 2704
2421 2705 /*
2422 2706 * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
2423 2707 * the {src, dst, lport, fport} quadruplet. Returns with conn reference
2424 2708 * held; caller must call CONN_DEC_REF. Only checks for connected entries
2425 2709 * (peer tcp in ESTABLISHED state).
2426 2710 */
2427 2711 conn_t *
2428 2712 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
2429 2713 ip_stack_t *ipst)
2430 2714 {
2431 2715 uint32_t ports;
2432 2716 uint16_t *pports = (uint16_t *)&ports;
2433 2717 connf_t *connfp;
2434 2718 conn_t *tconnp;
2435 2719 boolean_t zone_chk;
2436 2720
2437 2721 /*
2438 2722 * If either the source of destination address is loopback, then
2439 2723 * both endpoints must be in the same Zone. Otherwise, both of
2440 2724 * the addresses are system-wide unique (tcp is in ESTABLISHED
2441 2725 * state) and the endpoints may reside in different Zones. We
2442 2726 * don't do Zone check for link local address(es) because the
2443 2727 * current Zone implementation treats each link local address as
2444 2728 * being unique per system node, i.e. they belong to global Zone.
2445 2729 */
2446 2730 zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
2447 2731 IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
2448 2732
2449 2733 pports[0] = tcpha->tha_fport;
2450 2734 pports[1] = tcpha->tha_lport;
2451 2735
2452 2736 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2453 2737 ports, ipst)];
2454 2738
2455 2739 mutex_enter(&connfp->connf_lock);
2456 2740 for (tconnp = connfp->connf_head; tconnp != NULL;
2457 2741 tconnp = tconnp->conn_next) {
2458 2742
2459 2743 /* We skip conn_bound_if check here as this is loopback tcp */
2460 2744 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2461 2745 ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2462 2746 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2463 2747 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2464 2748
2465 2749 ASSERT(tconnp != connp);
2466 2750 CONN_INC_REF(tconnp);
2467 2751 mutex_exit(&connfp->connf_lock);
2468 2752 return (tconnp);
2469 2753 }
2470 2754 }
2471 2755 mutex_exit(&connfp->connf_lock);
2472 2756 return (NULL);
2473 2757 }
2474 2758
2475 2759 /*
2476 2760 * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2477 2761 * Returns with conn reference held. Caller must call CONN_DEC_REF.
2478 2762 * Only checks for connected entries i.e. no INADDR_ANY checks.
2479 2763 */
2480 2764 conn_t *
2481 2765 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
2482 2766 ip_stack_t *ipst)
2483 2767 {
2484 2768 uint32_t ports;
2485 2769 uint16_t *pports;
2486 2770 connf_t *connfp;
2487 2771 conn_t *tconnp;
2488 2772
2489 2773 pports = (uint16_t *)&ports;
2490 2774 pports[0] = tcpha->tha_fport;
2491 2775 pports[1] = tcpha->tha_lport;
2492 2776
2493 2777 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2494 2778 ports, ipst)];
2495 2779
2496 2780 mutex_enter(&connfp->connf_lock);
2497 2781 for (tconnp = connfp->connf_head; tconnp != NULL;
2498 2782 tconnp = tconnp->conn_next) {
2499 2783
2500 2784 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2501 2785 ipha->ipha_dst, ipha->ipha_src, ports) &&
2502 2786 tconnp->conn_tcp->tcp_state >= min_state) {
2503 2787
2504 2788 CONN_INC_REF(tconnp);
2505 2789 mutex_exit(&connfp->connf_lock);
2506 2790 return (tconnp);
2507 2791 }
2508 2792 }
2509 2793 mutex_exit(&connfp->connf_lock);
2510 2794 return (NULL);
2511 2795 }
2512 2796
2513 2797 /*
2514 2798 * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2515 2799 * Returns with conn reference held. Caller must call CONN_DEC_REF.
2516 2800 * Only checks for connected entries i.e. no INADDR_ANY checks.
2517 2801 * Match on ifindex in addition to addresses.
2518 2802 */
2519 2803 conn_t *
2520 2804 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2521 2805 uint_t ifindex, ip_stack_t *ipst)
2522 2806 {
2523 2807 tcp_t *tcp;
2524 2808 uint32_t ports;
2525 2809 uint16_t *pports;
2526 2810 connf_t *connfp;
2527 2811 conn_t *tconnp;
2528 2812
2529 2813 pports = (uint16_t *)&ports;
2530 2814 pports[0] = tcpha->tha_fport;
2531 2815 pports[1] = tcpha->tha_lport;
2532 2816
2533 2817 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2534 2818 ports, ipst)];
2535 2819
2536 2820 mutex_enter(&connfp->connf_lock);
2537 2821 for (tconnp = connfp->connf_head; tconnp != NULL;
2538 2822 tconnp = tconnp->conn_next) {
2539 2823
2540 2824 tcp = tconnp->conn_tcp;
2541 2825 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2542 2826 ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2543 2827 tcp->tcp_state >= min_state &&
2544 2828 (tconnp->conn_bound_if == 0 ||
2545 2829 tconnp->conn_bound_if == ifindex)) {
2546 2830
↓ open down ↓ |
359 lines elided |
↑ open up ↑ |
2547 2831 CONN_INC_REF(tconnp);
2548 2832 mutex_exit(&connfp->connf_lock);
2549 2833 return (tconnp);
2550 2834 }
2551 2835 }
2552 2836 mutex_exit(&connfp->connf_lock);
2553 2837 return (NULL);
2554 2838 }
2555 2839
2556 2840 /*
2841 + * Same as ipcl_tcp_lookup_reversed_ipv4.
2842 + */
2843 +conn_t *
2844 +ipcl_dccp_lookup_reversed_ipv4(ipha_t *ipha, dccpha_t *dccpha, int min_state,
2845 + ip_stack_t *ipst)
2846 +{
2847 + conn_t *tconnp;
2848 + connf_t *connfp;
2849 + uint16_t *pports;
2850 + uint32_t ports;
2851 +
2852 + pports = (uint16_t *)&ports;
2853 + pports[0] = dccpha->dha_fport;
2854 + pports[1] = dccpha->dha_lport;
2855 +
2856 + connfp = &ipst->ips_ipcl_dccp_conn_fanout[IPCL_DCCP_CONN_HASH(
2857 + ipha->ipha_dst, ports, ipst)];
2858 +
2859 + mutex_enter(&connfp->connf_lock);
2860 + for (tconnp = connfp->connf_head; tconnp != NULL;
2861 + tconnp = tconnp->conn_next) {
2862 + if (IPCL_CONN_MATCH(tconnp, IPPROTO_DCCP,
2863 + ipha->ipha_dst, ipha->ipha_src, ports) &&
2864 + tconnp->conn_dccp->dccp_state >= min_state) {
2865 + CONN_INC_REF(tconnp);
2866 + mutex_exit(&connfp->connf_lock);
2867 + return (tconnp);
2868 + }
2869 + }
2870 + mutex_exit(&connfp->connf_lock);
2871 +
2872 + return (NULL);
2873 +}
2874 +
2875 +/*
2876 + * Same as ipcl_tcp_lookup_reversed_ipv6.
2877 + */
2878 +conn_t *
2879 +ipcl_dccp_lookup_reversed_ipv6(ip6_t *ip6h, dccpha_t *dccpha, int min_state,
2880 + uint_t ifindex, ip_stack_t *ipst)
2881 +{
2882 + conn_t *tconnp;
2883 + tcp_t *tcp;
2884 + connf_t *connfp;
2885 + uint32_t ports;
2886 + uint16_t *pports;
2887 +
2888 + pports = (uint16_t *)&ports;
2889 + pports[0] = dccpha->dha_fport;
2890 + pports[1] = dccpha->dha_lport;
2891 +/*
2892 + connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2893 + ports, ipst)];
2894 +
2895 + mutex_enter(&connfp->connf_lock);
2896 + for (tconnp = connfp->connf_head; tconnp != NULL;
2897 + tconnp = tconnp->conn_next) {
2898 +
2899 + tcp = tconnp->conn_tcp;
2900 + if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2901 + ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2902 + tcp->tcp_state >= min_state &&
2903 + (tconnp->conn_bound_if == 0 ||
2904 + tconnp->conn_bound_if == ifindex)) {
2905 +
2906 + CONN_INC_REF(tconnp);
2907 + mutex_exit(&connfp->connf_lock);
2908 + return (tconnp);
2909 + }
2910 + }
2911 + mutex_exit(&connfp->connf_lock);
2912 +*/
2913 + return (NULL);
2914 +}
2915 +
2916 +/*
2557 2917 * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
2558 2918 * a listener when changing state.
2559 2919 */
2560 2920 conn_t *
2561 2921 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2562 2922 ip_stack_t *ipst)
2563 2923 {
2564 2924 connf_t *bind_connfp;
2565 2925 conn_t *connp;
2566 2926 tcp_t *tcp;
2567 2927
2568 2928 /*
2569 2929 * Avoid false matches for packets sent to an IP destination of
2570 2930 * all zeros.
2571 2931 */
2572 2932 if (laddr == 0)
2573 2933 return (NULL);
2574 2934
2575 2935 ASSERT(zoneid != ALL_ZONES);
2576 2936
2577 2937 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2578 2938 mutex_enter(&bind_connfp->connf_lock);
2579 2939 for (connp = bind_connfp->connf_head; connp != NULL;
2580 2940 connp = connp->conn_next) {
2581 2941 tcp = connp->conn_tcp;
2582 2942 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
2583 2943 IPCL_ZONE_MATCH(connp, zoneid) &&
2584 2944 (tcp->tcp_listener == NULL)) {
2585 2945 CONN_INC_REF(connp);
2586 2946 mutex_exit(&bind_connfp->connf_lock);
2587 2947 return (connp);
2588 2948 }
2589 2949 }
2590 2950 mutex_exit(&bind_connfp->connf_lock);
2591 2951 return (NULL);
2592 2952 }
2593 2953
2594 2954 /*
2595 2955 * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
2596 2956 * a listener when changing state.
2597 2957 */
2598 2958 conn_t *
2599 2959 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2600 2960 zoneid_t zoneid, ip_stack_t *ipst)
2601 2961 {
2602 2962 connf_t *bind_connfp;
2603 2963 conn_t *connp = NULL;
2604 2964 tcp_t *tcp;
2605 2965
2606 2966 /*
2607 2967 * Avoid false matches for packets sent to an IP destination of
2608 2968 * all zeros.
2609 2969 */
2610 2970 if (IN6_IS_ADDR_UNSPECIFIED(laddr))
2611 2971 return (NULL);
2612 2972
2613 2973 ASSERT(zoneid != ALL_ZONES);
2614 2974
2615 2975 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2616 2976 mutex_enter(&bind_connfp->connf_lock);
2617 2977 for (connp = bind_connfp->connf_head; connp != NULL;
2618 2978 connp = connp->conn_next) {
2619 2979 tcp = connp->conn_tcp;
2620 2980 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
2621 2981 IPCL_ZONE_MATCH(connp, zoneid) &&
2622 2982 (connp->conn_bound_if == 0 ||
2623 2983 connp->conn_bound_if == ifindex) &&
2624 2984 tcp->tcp_listener == NULL) {
2625 2985 CONN_INC_REF(connp);
2626 2986 mutex_exit(&bind_connfp->connf_lock);
2627 2987 return (connp);
2628 2988 }
2629 2989 }
2630 2990 mutex_exit(&bind_connfp->connf_lock);
2631 2991 return (NULL);
2632 2992 }
2633 2993
2634 2994 /*
2635 2995 * ipcl_get_next_conn
2636 2996 * get the next entry in the conn global list
2637 2997 * and put a reference on the next_conn.
2638 2998 * decrement the reference on the current conn.
2639 2999 *
2640 3000 * This is an iterator based walker function that also provides for
2641 3001 * some selection by the caller. It walks through the conn_hash bucket
2642 3002 * searching for the next valid connp in the list, and selects connections
2643 3003 * that are neither closed nor condemned. It also REFHOLDS the conn
2644 3004 * thus ensuring that the conn exists when the caller uses the conn.
2645 3005 */
2646 3006 conn_t *
2647 3007 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2648 3008 {
2649 3009 conn_t *next_connp;
2650 3010
2651 3011 if (connfp == NULL)
2652 3012 return (NULL);
2653 3013
2654 3014 mutex_enter(&connfp->connf_lock);
2655 3015
2656 3016 next_connp = (connp == NULL) ?
2657 3017 connfp->connf_head : connp->conn_g_next;
2658 3018
2659 3019 while (next_connp != NULL) {
2660 3020 mutex_enter(&next_connp->conn_lock);
2661 3021 if (!(next_connp->conn_flags & conn_flags) ||
2662 3022 (next_connp->conn_state_flags &
2663 3023 (CONN_CONDEMNED | CONN_INCIPIENT))) {
2664 3024 /*
2665 3025 * This conn has been condemned or
2666 3026 * is closing, or the flags don't match
2667 3027 */
2668 3028 mutex_exit(&next_connp->conn_lock);
2669 3029 next_connp = next_connp->conn_g_next;
2670 3030 continue;
2671 3031 }
2672 3032 CONN_INC_REF_LOCKED(next_connp);
2673 3033 mutex_exit(&next_connp->conn_lock);
2674 3034 break;
2675 3035 }
2676 3036
2677 3037 mutex_exit(&connfp->connf_lock);
2678 3038
2679 3039 if (connp != NULL)
2680 3040 CONN_DEC_REF(connp);
2681 3041
2682 3042 return (next_connp);
2683 3043 }
2684 3044
2685 3045 #ifdef CONN_DEBUG
2686 3046 /*
2687 3047 * Trace of the last NBUF refhold/refrele
2688 3048 */
2689 3049 int
2690 3050 conn_trace_ref(conn_t *connp)
2691 3051 {
2692 3052 int last;
2693 3053 conn_trace_t *ctb;
2694 3054
2695 3055 ASSERT(MUTEX_HELD(&connp->conn_lock));
2696 3056 last = connp->conn_trace_last;
2697 3057 last++;
2698 3058 if (last == CONN_TRACE_MAX)
2699 3059 last = 0;
2700 3060
2701 3061 ctb = &connp->conn_trace_buf[last];
2702 3062 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2703 3063 connp->conn_trace_last = last;
2704 3064 return (1);
2705 3065 }
2706 3066
2707 3067 int
2708 3068 conn_untrace_ref(conn_t *connp)
2709 3069 {
2710 3070 int last;
2711 3071 conn_trace_t *ctb;
2712 3072
2713 3073 ASSERT(MUTEX_HELD(&connp->conn_lock));
2714 3074 last = connp->conn_trace_last;
2715 3075 last++;
2716 3076 if (last == CONN_TRACE_MAX)
2717 3077 last = 0;
2718 3078
2719 3079 ctb = &connp->conn_trace_buf[last];
2720 3080 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2721 3081 connp->conn_trace_last = last;
2722 3082 return (1);
2723 3083 }
2724 3084 #endif
↓ open down ↓ |
158 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX