1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 /*
26 * IP PACKET CLASSIFIER
27 *
28 * The IP packet classifier provides mapping between IP packets and persistent
29 * connection state for connection-oriented protocols. It also provides
30 * interface for managing connection states.
31 *
32 * The connection state is kept in conn_t data structure and contains, among
33 * other things:
34 *
35 * o local/remote address and ports
36 * o Transport protocol
37 * o squeue for the connection (for TCP only)
38 * o reference counter
39 * o Connection state
40 * o hash table linkage
41 * o interface/ire information
42 * o credentials
43 * o ipsec policy
44 * o send and receive functions.
45 * o mutex lock.
46 *
47 * Connections use a reference counting scheme. They are freed when the
48 * reference counter drops to zero. A reference is incremented when connection
49 * is placed in a list or table, when incoming packet for the connection arrives
50 * and when connection is processed via squeue (squeue processing may be
51 * asynchronous and the reference protects the connection from being destroyed
52 * before its processing is finished).
53 *
54 * conn_recv is used to pass up packets to the ULP.
55 * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
56 * a listener, and changes to tcp_input_listener as the listener has picked a
57 * good squeue. For other cases it is set to tcp_input_data.
58 *
59 * conn_recvicmp is used to pass up ICMP errors to the ULP.
60 *
61 * Classifier uses several hash tables:
62 *
63 * ipcl_conn_fanout: contains all TCP connections in CONNECTED state
64 * ipcl_bind_fanout: contains all connections in BOUND state
65 * ipcl_proto_fanout: IPv4 protocol fanout
66 * ipcl_proto_fanout_v6: IPv6 protocol fanout
67 * ipcl_udp_fanout: contains all UDP connections
68 * ipcl_iptun_fanout: contains all IP tunnel connections
69 * ipcl_globalhash_fanout: contains all connections
70 *
71 * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
72 * which need to view all existing connections.
73 *
74 * All tables are protected by per-bucket locks. When both per-bucket lock and
75 * connection lock need to be held, the per-bucket lock should be acquired
76 * first, followed by the connection lock.
77 *
78 * All functions doing search in one of these tables increment a reference
79 * counter on the connection found (if any). This reference should be dropped
80 * when the caller has finished processing the connection.
81 *
82 *
83 * INTERFACES:
84 * ===========
85 *
86 * Connection Lookup:
87 * ------------------
88 *
89 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
90 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
91 *
92 * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
93 * it can't find any associated connection. If the connection is found, its
94 * reference counter is incremented.
95 *
96 * mp: mblock, containing packet header. The full header should fit
97 * into a single mblock. It should also contain at least full IP
98 * and TCP or UDP header.
99 *
100 * protocol: Either IPPROTO_TCP or IPPROTO_UDP.
101 *
102 * hdr_len: The size of IP header. It is used to find TCP or UDP header in
103 * the packet.
104 *
105 * ira->ira_zoneid: The zone in which the returned connection must be; the
106 * zoneid corresponding to the ire_zoneid on the IRE located for
107 * the packet's destination address.
108 *
109 * ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
110 * IRAF_TX_SHARED_ADDR flags
111 *
112 * For TCP connections, the lookup order is as follows:
113 * 5-tuple {src, dst, protocol, local port, remote port}
114 * lookup in ipcl_conn_fanout table.
115 * 3-tuple {dst, remote port, protocol} lookup in
116 * ipcl_bind_fanout table.
117 *
118 * For UDP connections, a 5-tuple {src, dst, protocol, local port,
119 * remote port} lookup is done on ipcl_udp_fanout. Note that,
120 * these interfaces do not handle cases where a packets belongs
121 * to multiple UDP clients, which is handled in IP itself.
122 *
123 * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
124 * determine which actual zone gets the segment. This is used only in a
125 * labeled environment. The matching rules are:
126 *
127 * - If it's not a multilevel port, then the label on the packet selects
128 * the zone. Unlabeled packets are delivered to the global zone.
129 *
130 * - If it's a multilevel port, then only the zone registered to receive
131 * packets on that port matches.
132 *
133 * Also, in a labeled environment, packet labels need to be checked. For fully
134 * bound TCP connections, we can assume that the packet label was checked
135 * during connection establishment, and doesn't need to be checked on each
136 * packet. For others, though, we need to check for strict equality or, for
137 * multilevel ports, membership in the range or set. This part currently does
138 * a tnrh lookup on each packet, but could be optimized to use cached results
139 * if that were necessary. (SCTP doesn't come through here, but if it did,
140 * we would apply the same rules as TCP.)
141 *
142 * An implication of the above is that fully-bound TCP sockets must always use
143 * distinct 4-tuples; they can't be discriminated by label alone.
144 *
145 * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
146 * as there's no connection set-up handshake and no shared state.
147 *
148 * Labels on looped-back packets within a single zone do not need to be
149 * checked, as all processes in the same zone have the same label.
150 *
151 * Finally, for unlabeled packets received by a labeled system, special rules
152 * apply. We consider only the MLP if there is one. Otherwise, we prefer a
153 * socket in the zone whose label matches the default label of the sender, if
154 * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the
155 * receiver's label must dominate the sender's default label.
156 *
157 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
158 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
159 * ip_stack);
160 *
161 * Lookup routine to find a exact match for {src, dst, local port,
162 * remote port) for TCP connections in ipcl_conn_fanout. The address and
163 * ports are read from the IP and TCP header respectively.
164 *
165 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol,
166 * zoneid, ip_stack);
167 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
168 * zoneid, ip_stack);
169 *
170 * Lookup routine to find a listener with the tuple {lport, laddr,
171 * protocol} in the ipcl_bind_fanout table. For IPv6, an additional
172 * parameter interface index is also compared.
173 *
174 * void ipcl_walk(func, arg, ip_stack)
175 *
176 * Apply 'func' to every connection available. The 'func' is called as
177 * (*func)(connp, arg). The walk is non-atomic so connections may be
178 * created and destroyed during the walk. The CONN_CONDEMNED and
179 * CONN_INCIPIENT flags ensure that connections which are newly created
180 * or being destroyed are not selected by the walker.
181 *
182 * Table Updates
183 * -------------
184 *
185 * int ipcl_conn_insert(connp);
186 * int ipcl_conn_insert_v4(connp);
187 * int ipcl_conn_insert_v6(connp);
188 *
189 * Insert 'connp' in the ipcl_conn_fanout.
190 * Arguements :
191 * connp conn_t to be inserted
192 *
193 * Return value :
194 * 0 if connp was inserted
195 * EADDRINUSE if the connection with the same tuple
196 * already exists.
197 *
198 * int ipcl_bind_insert(connp);
199 * int ipcl_bind_insert_v4(connp);
200 * int ipcl_bind_insert_v6(connp);
201 *
202 * Insert 'connp' in ipcl_bind_fanout.
203 * Arguements :
204 * connp conn_t to be inserted
205 *
206 *
207 * void ipcl_hash_remove(connp);
208 *
209 * Removes the 'connp' from the connection fanout table.
210 *
211 * Connection Creation/Destruction
212 * -------------------------------
213 *
214 * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
215 *
216 * Creates a new conn based on the type flag, inserts it into
217 * globalhash table.
218 *
219 * type: This flag determines the type of conn_t which needs to be
220 * created i.e., which kmem_cache it comes from.
221 * IPCL_TCPCONN indicates a TCP connection
222 * IPCL_SCTPCONN indicates a SCTP connection
223 * IPCL_UDPCONN indicates a UDP conn_t.
224 * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t.
225 * IPCL_RTSCONN indicates a RTS conn_t.
226 * IPCL_DCCPCONN indicates a DCCP conn_t.
227 * IPCL_IPCCONN indicates all other connections.
228 *
229 * void ipcl_conn_destroy(connp)
230 *
231 * Destroys the connection state, removes it from the global
232 * connection hash table and frees its memory.
233 */
234
235 #include <sys/types.h>
236 #include <sys/stream.h>
237 #include <sys/stropts.h>
238 #include <sys/sysmacros.h>
239 #include <sys/strsubr.h>
240 #include <sys/strsun.h>
241 #define _SUN_TPI_VERSION 2
242 #include <sys/ddi.h>
243 #include <sys/cmn_err.h>
244 #include <sys/debug.h>
245
246 #include <sys/systm.h>
247 #include <sys/param.h>
248 #include <sys/kmem.h>
249 #include <sys/isa_defs.h>
250 #include <inet/common.h>
251 #include <netinet/ip6.h>
252 #include <netinet/icmp6.h>
253
254 #include <inet/ip.h>
255 #include <inet/ip_if.h>
256 #include <inet/ip_ire.h>
257 #include <inet/ip6.h>
258 #include <inet/ip_ndp.h>
259 #include <inet/ip_impl.h>
260 #include <inet/udp_impl.h>
261 #include <inet/dccp/dccp_impl.h>
262 #include <inet/sctp_ip.h>
263 #include <inet/sctp/sctp_impl.h>
264 #include <inet/rawip_impl.h>
265 #include <inet/rts_impl.h>
266 #include <inet/iptun/iptun_impl.h>
267
268 #include <sys/cpuvar.h>
269
270 #include <inet/ipclassifier.h>
271 #include <inet/tcp.h>
272 #include <inet/ipsec_impl.h>
273
274 #include <sys/tsol/tnet.h>
275 #include <sys/sockio.h>
276
277 /* Old value for compatibility. Setable in /etc/system */
278 uint_t tcp_conn_hash_size = 0;
279
280 /* New value. Zero means choose automatically. Setable in /etc/system */
281 uint_t ipcl_conn_hash_size = 0;
282 uint_t ipcl_conn_hash_memfactor = 8192;
283 uint_t ipcl_conn_hash_maxsize = 82500;
284
285 /* bind/dccp/udp fanout table size */
286 uint_t ipcl_bind_fanout_size = 512;
287 uint_t ipcl_dccp_fanout_size = 512;
288 uint_t ipcl_udp_fanout_size = 16384;
289
290 /* Raw socket fanout size. Must be a power of 2. */
291 uint_t ipcl_raw_fanout_size = 256;
292
293 /*
294 * The IPCL_IPTUN_HASH() function works best with a prime table size. We
295 * expect that most large deployments would have hundreds of tunnels, and
296 * thousands in the extreme case.
297 */
298 uint_t ipcl_iptun_fanout_size = 6143;
299
300 /*
301 * Power of 2^N Primes useful for hashing for N of 0-28,
302 * these primes are the nearest prime <= 2^N - 2^(N-2).
303 */
304
305 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \
306 6143, 12281, 24571, 49139, 98299, 196597, 393209, \
307 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \
308 50331599, 100663291, 201326557, 0}
309
310 /*
311 * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
312 * are aligned on cache lines.
313 */
314 typedef union itc_s {
315 conn_t itc_conn;
316 char itcu_filler[CACHE_ALIGN(conn_s)];
317 } itc_t;
318
319 struct kmem_cache *tcp_conn_cache;
320 struct kmem_cache *ip_conn_cache;
321 extern struct kmem_cache *sctp_conn_cache;
322 struct kmem_cache *udp_conn_cache;
323 struct kmem_cache *rawip_conn_cache;
324 struct kmem_cache *rts_conn_cache;
325 struct kmem_cache *dccp_conn_cache;
326
327 extern void tcp_timermp_free(tcp_t *);
328 extern mblk_t *tcp_timermp_alloc(int);
329
330 static int ip_conn_constructor(void *, void *, int);
331 static void ip_conn_destructor(void *, void *);
332
333 static int tcp_conn_constructor(void *, void *, int);
334 static void tcp_conn_destructor(void *, void *);
335
336 static int udp_conn_constructor(void *, void *, int);
337 static void udp_conn_destructor(void *, void *);
338
339 static int rawip_conn_constructor(void *, void *, int);
340 static void rawip_conn_destructor(void *, void *);
341
342 static int rts_conn_constructor(void *, void *, int);
343 static void rts_conn_destructor(void *, void *);
344
345 static int dccp_conn_constructor(void *, void *, int);
346 static void dccp_conn_destructor(void *, void *);
347
348 /*
349 * Global (for all stack instances) init routine
350 */
351 void
352 ipcl_g_init(void)
353 {
354 ip_conn_cache = kmem_cache_create("ip_conn_cache",
355 sizeof (conn_t), CACHE_ALIGN_SIZE,
356 ip_conn_constructor, ip_conn_destructor,
357 NULL, NULL, NULL, 0);
358
359 tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
360 sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
361 tcp_conn_constructor, tcp_conn_destructor,
362 tcp_conn_reclaim, NULL, NULL, 0);
363
364 udp_conn_cache = kmem_cache_create("udp_conn_cache",
365 sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
366 udp_conn_constructor, udp_conn_destructor,
367 NULL, NULL, NULL, 0);
368
369 rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
370 sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
371 rawip_conn_constructor, rawip_conn_destructor,
372 NULL, NULL, NULL, 0);
373
374 rts_conn_cache = kmem_cache_create("rts_conn_cache",
375 sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
376 rts_conn_constructor, rts_conn_destructor,
377 NULL, NULL, NULL, 0);
378
379 /* XXX:DCCP reclaim */
380 dccp_conn_cache = kmem_cache_create("dccp_conn_cache",
381 sizeof (itc_t) + sizeof (dccp_t), CACHE_ALIGN_SIZE,
382 dccp_conn_constructor, dccp_conn_destructor,
383 NULL, NULL, NULL, 0);
384 }
385
386 /*
387 * ipclassifier intialization routine, sets up hash tables.
388 */
389 void
390 ipcl_init(ip_stack_t *ipst)
391 {
392 int i;
393 int sizes[] = P2Ps();
394
395 /*
396 * Calculate size of conn fanout table from /etc/system settings
397 */
398 if (ipcl_conn_hash_size != 0) {
399 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
400 } else if (tcp_conn_hash_size != 0) {
401 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
402 } else {
403 extern pgcnt_t freemem;
404
405 ipst->ips_ipcl_conn_fanout_size =
406 (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
407
408 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
409 ipst->ips_ipcl_conn_fanout_size =
410 ipcl_conn_hash_maxsize;
411 }
412 }
413
414 for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
415 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
416 break;
417 }
418 }
419 if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
420 /* Out of range, use the 2^16 value */
421 ipst->ips_ipcl_conn_fanout_size = sizes[16];
422 }
423
424 /* Take values from /etc/system */
425 ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
426 ipst->ips_ipcl_dccp_fanout_size = ipcl_dccp_fanout_size;
427 ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
428 ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
429 ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
430
431 ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
432
433 ipst->ips_ipcl_conn_fanout = kmem_zalloc(
434 ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
435
436 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
437 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
438 MUTEX_DEFAULT, NULL);
439 }
440
441 ipst->ips_ipcl_bind_fanout = kmem_zalloc(
442 ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
443
444 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
445 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
446 MUTEX_DEFAULT, NULL);
447 }
448
449 ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
450 sizeof (connf_t), KM_SLEEP);
451 for (i = 0; i < IPPROTO_MAX; i++) {
452 mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
453 MUTEX_DEFAULT, NULL);
454 }
455
456 ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
457 sizeof (connf_t), KM_SLEEP);
458 for (i = 0; i < IPPROTO_MAX; i++) {
459 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
460 MUTEX_DEFAULT, NULL);
461 }
462
463 ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
464 mutex_init(&ipst->ips_rts_clients->connf_lock,
465 NULL, MUTEX_DEFAULT, NULL);
466
467 ipst->ips_ipcl_udp_fanout = kmem_zalloc(
468 ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
469 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
470 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
471 MUTEX_DEFAULT, NULL);
472 }
473
474 ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
475 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
476 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
477 mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
478 MUTEX_DEFAULT, NULL);
479 }
480
481 ipst->ips_ipcl_raw_fanout = kmem_zalloc(
482 ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
483 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
484 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
485 MUTEX_DEFAULT, NULL);
486 }
487
488 ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
489 sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
490 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
491 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
492 NULL, MUTEX_DEFAULT, NULL);
493 }
494
495 ipst->ips_ipcl_dccp_fanout = kmem_zalloc(
496 ipst->ips_ipcl_dccp_fanout_size * sizeof (connf_t), KM_SLEEP);
497 for (i = 0; i < ipst->ips_ipcl_dccp_fanout_size; i++) {
498 mutex_init(&ipst->ips_ipcl_dccp_fanout[i].connf_lock, NULL,
499 MUTEX_DEFAULT, NULL);
500 }
501 }
502
503 void
504 ipcl_g_destroy(void)
505 {
506 kmem_cache_destroy(ip_conn_cache);
507 kmem_cache_destroy(tcp_conn_cache);
508 kmem_cache_destroy(udp_conn_cache);
509 kmem_cache_destroy(rawip_conn_cache);
510 kmem_cache_destroy(rts_conn_cache);
511 kmem_cache_destroy(dccp_conn_cache);
512 }
513
514 /*
515 * All user-level and kernel use of the stack must be gone
516 * by now.
517 */
518 void
519 ipcl_destroy(ip_stack_t *ipst)
520 {
521 int i;
522
523 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
524 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
525 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
526 }
527 kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
528 sizeof (connf_t));
529 ipst->ips_ipcl_conn_fanout = NULL;
530
531 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
532 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
533 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
534 }
535 kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
536 sizeof (connf_t));
537 ipst->ips_ipcl_bind_fanout = NULL;
538
539 for (i = 0; i < IPPROTO_MAX; i++) {
540 ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
541 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
542 }
543 kmem_free(ipst->ips_ipcl_proto_fanout_v4,
544 IPPROTO_MAX * sizeof (connf_t));
545 ipst->ips_ipcl_proto_fanout_v4 = NULL;
546
547 for (i = 0; i < IPPROTO_MAX; i++) {
548 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
549 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
550 }
551 kmem_free(ipst->ips_ipcl_proto_fanout_v6,
552 IPPROTO_MAX * sizeof (connf_t));
553 ipst->ips_ipcl_proto_fanout_v6 = NULL;
554
555 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
556 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
557 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
558 }
559 kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
560 sizeof (connf_t));
561 ipst->ips_ipcl_udp_fanout = NULL;
562
563 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
564 ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
565 mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
566 }
567 kmem_free(ipst->ips_ipcl_iptun_fanout,
568 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
569 ipst->ips_ipcl_iptun_fanout = NULL;
570
571 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
572 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
573 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
574 }
575 kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
576 sizeof (connf_t));
577 ipst->ips_ipcl_raw_fanout = NULL;
578
579 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
580 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
581 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
582 }
583 kmem_free(ipst->ips_ipcl_globalhash_fanout,
584 sizeof (connf_t) * CONN_G_HASH_SIZE);
585 ipst->ips_ipcl_globalhash_fanout = NULL;
586
587 for (i = 0; i < ipst->ips_ipcl_dccp_fanout_size; i++) {
588 ASSERT(ipst->ips_ipcl_dccp_fanout[i].connf_head == NULL);
589 mutex_destroy(&ipst->ips_ipcl_dccp_fanout[i].connf_lock);
590 }
591 kmem_free(ipst->ips_ipcl_dccp_fanout, ipst->ips_ipcl_dccp_fanout_size *
592 sizeof (connf_t));
593 ipst->ips_ipcl_dccp_fanout = NULL;
594
595 ASSERT(ipst->ips_rts_clients->connf_head == NULL);
596 mutex_destroy(&ipst->ips_rts_clients->connf_lock);
597 kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
598 ipst->ips_rts_clients = NULL;
599 }
600
601 /*
602 * conn creation routine. initialize the conn, sets the reference
603 * and inserts it in the global hash table.
604 */
605 conn_t *
606 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
607 {
608 conn_t *connp;
609 struct kmem_cache *conn_cache;
610
611 switch (type) {
612 case IPCL_SCTPCONN:
613 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
614 return (NULL);
615 sctp_conn_init(connp);
616 netstack_hold(ns);
617 connp->conn_netstack = ns;
618 connp->conn_ixa->ixa_ipst = ns->netstack_ip;
619 connp->conn_ixa->ixa_conn_id = (long)connp;
620 ipcl_globalhash_insert(connp);
621 return (connp);
622
623 case IPCL_TCPCONN:
624 conn_cache = tcp_conn_cache;
625 break;
626
627 case IPCL_UDPCONN:
628 conn_cache = udp_conn_cache;
629 break;
630
631 case IPCL_RAWIPCONN:
632 conn_cache = rawip_conn_cache;
633 break;
634
635 case IPCL_RTSCONN:
636 conn_cache = rts_conn_cache;
637 break;
638
639 case IPCL_IPCCONN:
640 conn_cache = ip_conn_cache;
641 break;
642
643 case IPCL_DCCPCONN:
644 conn_cache = dccp_conn_cache;
645 break;
646
647 default:
648 connp = NULL;
649 ASSERT(0);
650 }
651
652 if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
653 return (NULL);
654
655 connp->conn_ref = 1;
656 netstack_hold(ns);
657 connp->conn_netstack = ns;
658 connp->conn_ixa->ixa_ipst = ns->netstack_ip;
659 connp->conn_ixa->ixa_conn_id = (long)connp;
660 ipcl_globalhash_insert(connp);
661 return (connp);
662 }
663
664 void
665 ipcl_conn_destroy(conn_t *connp)
666 {
667 mblk_t *mp;
668 netstack_t *ns = connp->conn_netstack;
669
670 ASSERT(!MUTEX_HELD(&connp->conn_lock));
671 ASSERT(connp->conn_ref == 0);
672 ASSERT(connp->conn_ioctlref == 0);
673
674 DTRACE_PROBE1(conn__destroy, conn_t *, connp);
675
676 if (connp->conn_cred != NULL) {
677 crfree(connp->conn_cred);
678 connp->conn_cred = NULL;
679 /* ixa_cred done in ipcl_conn_cleanup below */
680 }
681
682 if (connp->conn_ht_iphc != NULL) {
683 kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
684 connp->conn_ht_iphc = NULL;
685 connp->conn_ht_iphc_allocated = 0;
686 connp->conn_ht_iphc_len = 0;
687 connp->conn_ht_ulp = NULL;
688 connp->conn_ht_ulp_len = 0;
689 }
690 ip_pkt_free(&connp->conn_xmit_ipp);
691
692 ipcl_globalhash_remove(connp);
693
694 if (connp->conn_latch != NULL) {
695 IPLATCH_REFRELE(connp->conn_latch);
696 connp->conn_latch = NULL;
697 }
698 if (connp->conn_latch_in_policy != NULL) {
699 IPPOL_REFRELE(connp->conn_latch_in_policy);
700 connp->conn_latch_in_policy = NULL;
701 }
702 if (connp->conn_latch_in_action != NULL) {
703 IPACT_REFRELE(connp->conn_latch_in_action);
704 connp->conn_latch_in_action = NULL;
705 }
706 if (connp->conn_policy != NULL) {
707 IPPH_REFRELE(connp->conn_policy, ns);
708 connp->conn_policy = NULL;
709 }
710
711 if (connp->conn_ipsec_opt_mp != NULL) {
712 freemsg(connp->conn_ipsec_opt_mp);
713 connp->conn_ipsec_opt_mp = NULL;
714 }
715
716 if (connp->conn_flags & IPCL_TCPCONN) {
717 tcp_t *tcp = connp->conn_tcp;
718
719 tcp_free(tcp);
720 mp = tcp->tcp_timercache;
721
722 tcp->tcp_tcps = NULL;
723
724 /*
725 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
726 * the mblk.
727 */
728 if (tcp->tcp_rsrv_mp != NULL) {
729 freeb(tcp->tcp_rsrv_mp);
730 tcp->tcp_rsrv_mp = NULL;
731 mutex_destroy(&tcp->tcp_rsrv_mp_lock);
732 }
733
734 ipcl_conn_cleanup(connp);
735 connp->conn_flags = IPCL_TCPCONN;
736 if (ns != NULL) {
737 ASSERT(tcp->tcp_tcps == NULL);
738 connp->conn_netstack = NULL;
739 connp->conn_ixa->ixa_ipst = NULL;
740 netstack_rele(ns);
741 }
742
743 bzero(tcp, sizeof (tcp_t));
744
745 tcp->tcp_timercache = mp;
746 tcp->tcp_connp = connp;
747 kmem_cache_free(tcp_conn_cache, connp);
748 return;
749 }
750
751 if (connp->conn_flags & IPCL_SCTPCONN) {
752 ASSERT(ns != NULL);
753 sctp_free(connp);
754 return;
755 }
756
757 if (connp->conn_flags & IPCL_DCCPCONN) {
758 dccp_t *dccp = connp->conn_dccp;
759
760 cmn_err(CE_NOTE, "ipclassifier: conn_flags DCCP cache_free");
761
762 /* XXX:DCCP */
763 /* Crash bug here: udp_conn_cache and dccp_conn_cache */
764 /*
765 ipcl_conn_cleanup(connp);
766 connp->conn_flags = IPCL_DCCPCONN;
767 bzero(dccp, sizeof (dccp_t));
768 dccp->dccp_connp = connp;
769 kmem_cache_free(dccp_conn_cache, connp);
770 return;
771 */
772 }
773
774 ipcl_conn_cleanup(connp);
775 if (ns != NULL) {
776 connp->conn_netstack = NULL;
777 connp->conn_ixa->ixa_ipst = NULL;
778 netstack_rele(ns);
779 }
780
781 /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
782 if (connp->conn_flags & IPCL_UDPCONN) {
783 connp->conn_flags = IPCL_UDPCONN;
784 kmem_cache_free(udp_conn_cache, connp);
785 } else if (connp->conn_flags & IPCL_RAWIPCONN) {
786 connp->conn_flags = IPCL_RAWIPCONN;
787 connp->conn_proto = IPPROTO_ICMP;
788 connp->conn_ixa->ixa_protocol = connp->conn_proto;
789 kmem_cache_free(rawip_conn_cache, connp);
790 } else if (connp->conn_flags & IPCL_RTSCONN) {
791 connp->conn_flags = IPCL_RTSCONN;
792 kmem_cache_free(rts_conn_cache, connp);
793 } else {
794 connp->conn_flags = IPCL_IPCCONN;
795 ASSERT(connp->conn_flags & IPCL_IPCCONN);
796 ASSERT(connp->conn_priv == NULL);
797 kmem_cache_free(ip_conn_cache, connp);
798 }
799 }
800
801 /*
802 * Running in cluster mode - deregister listener information
803 */
804 static void
805 ipcl_conn_unlisten(conn_t *connp)
806 {
807 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
808 ASSERT(connp->conn_lport != 0);
809
810 if (cl_inet_unlisten != NULL) {
811 sa_family_t addr_family;
812 uint8_t *laddrp;
813
814 if (connp->conn_ipversion == IPV6_VERSION) {
815 addr_family = AF_INET6;
816 laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
817 } else {
818 addr_family = AF_INET;
819 laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
820 }
821 (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
822 IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
823 }
824 connp->conn_flags &= ~IPCL_CL_LISTENER;
825 }
826
827 /*
828 * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
829 * which table the conn belonged to). So for debugging we can see which hash
830 * table this connection was in.
831 */
832 #define IPCL_HASH_REMOVE(connp) { \
833 connf_t *connfp = (connp)->conn_fanout; \
834 ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \
835 if (connfp != NULL) { \
836 mutex_enter(&connfp->connf_lock); \
837 if ((connp)->conn_next != NULL) \
838 (connp)->conn_next->conn_prev = \
839 (connp)->conn_prev; \
840 if ((connp)->conn_prev != NULL) \
841 (connp)->conn_prev->conn_next = \
842 (connp)->conn_next; \
843 else \
844 connfp->connf_head = (connp)->conn_next; \
845 (connp)->conn_fanout = NULL; \
846 (connp)->conn_next = NULL; \
847 (connp)->conn_prev = NULL; \
848 (connp)->conn_flags |= IPCL_REMOVED; \
849 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \
850 ipcl_conn_unlisten((connp)); \
851 CONN_DEC_REF((connp)); \
852 mutex_exit(&connfp->connf_lock); \
853 } \
854 }
855
856 void
857 ipcl_hash_remove(conn_t *connp)
858 {
859 uint8_t protocol = connp->conn_proto;
860
861 IPCL_HASH_REMOVE(connp);
862 if (protocol == IPPROTO_RSVP)
863 ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
864 }
865
866 /*
867 * The whole purpose of this function is allow removal of
868 * a conn_t from the connected hash for timewait reclaim.
869 * This is essentially a TW reclaim fastpath where timewait
870 * collector checks under fanout lock (so no one else can
871 * get access to the conn_t) that refcnt is 2 i.e. one for
872 * TCP and one for the classifier hash list. If ref count
873 * is indeed 2, we can just remove the conn under lock and
874 * avoid cleaning up the conn under squeue. This gives us
875 * improved performance.
876 */
877 void
878 ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp)
879 {
880 ASSERT(MUTEX_HELD(&connfp->connf_lock));
881 ASSERT(MUTEX_HELD(&connp->conn_lock));
882 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
883
884 if ((connp)->conn_next != NULL) {
885 (connp)->conn_next->conn_prev = (connp)->conn_prev;
886 }
887 if ((connp)->conn_prev != NULL) {
888 (connp)->conn_prev->conn_next = (connp)->conn_next;
889 } else {
890 connfp->connf_head = (connp)->conn_next;
891 }
892 (connp)->conn_fanout = NULL;
893 (connp)->conn_next = NULL;
894 (connp)->conn_prev = NULL;
895 (connp)->conn_flags |= IPCL_REMOVED;
896 ASSERT((connp)->conn_ref == 2);
897 (connp)->conn_ref--;
898 }
899
900 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \
901 ASSERT((connp)->conn_fanout == NULL); \
902 ASSERT((connp)->conn_next == NULL); \
903 ASSERT((connp)->conn_prev == NULL); \
904 if ((connfp)->connf_head != NULL) { \
905 (connfp)->connf_head->conn_prev = (connp); \
906 (connp)->conn_next = (connfp)->connf_head; \
907 } \
908 (connp)->conn_fanout = (connfp); \
909 (connfp)->connf_head = (connp); \
910 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
911 IPCL_CONNECTED; \
912 CONN_INC_REF(connp); \
913 }
914
915 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \
916 IPCL_HASH_REMOVE((connp)); \
917 mutex_enter(&(connfp)->connf_lock); \
918 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \
919 mutex_exit(&(connfp)->connf_lock); \
920 }
921
922 #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \
923 conn_t *pconnp = NULL, *nconnp; \
924 IPCL_HASH_REMOVE((connp)); \
925 mutex_enter(&(connfp)->connf_lock); \
926 nconnp = (connfp)->connf_head; \
927 while (nconnp != NULL && \
928 !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) { \
929 pconnp = nconnp; \
930 nconnp = nconnp->conn_next; \
931 } \
932 if (pconnp != NULL) { \
933 pconnp->conn_next = (connp); \
934 (connp)->conn_prev = pconnp; \
935 } else { \
936 (connfp)->connf_head = (connp); \
937 } \
938 if (nconnp != NULL) { \
939 (connp)->conn_next = nconnp; \
940 nconnp->conn_prev = (connp); \
941 } \
942 (connp)->conn_fanout = (connfp); \
943 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
944 IPCL_BOUND; \
945 CONN_INC_REF(connp); \
946 mutex_exit(&(connfp)->connf_lock); \
947 }
948
949 #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \
950 conn_t **list, *prev, *next; \
951 boolean_t isv4mapped = \
952 IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6); \
953 IPCL_HASH_REMOVE((connp)); \
954 mutex_enter(&(connfp)->connf_lock); \
955 list = &(connfp)->connf_head; \
956 prev = NULL; \
957 while ((next = *list) != NULL) { \
958 if (isv4mapped && \
959 IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) && \
960 connp->conn_zoneid == next->conn_zoneid) { \
961 (connp)->conn_next = next; \
962 if (prev != NULL) \
963 prev = next->conn_prev; \
964 next->conn_prev = (connp); \
965 break; \
966 } \
967 list = &next->conn_next; \
968 prev = next; \
969 } \
970 (connp)->conn_prev = prev; \
971 *list = (connp); \
972 (connp)->conn_fanout = (connfp); \
973 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
974 IPCL_BOUND; \
975 CONN_INC_REF((connp)); \
976 mutex_exit(&(connfp)->connf_lock); \
977 }
978
979 void
980 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
981 {
982 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
983 }
984
985 /*
986 * Because the classifier is used to classify inbound packets, the destination
987 * address is meant to be our local tunnel address (tunnel source), and the
988 * source the remote tunnel address (tunnel destination).
989 *
990 * Note that conn_proto can't be used for fanout since the upper protocol
991 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
992 */
993 conn_t *
994 ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
995 {
996 connf_t *connfp;
997 conn_t *connp;
998
999 /* first look for IPv4 tunnel links */
1000 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
1001 mutex_enter(&connfp->connf_lock);
1002 for (connp = connfp->connf_head; connp != NULL;
1003 connp = connp->conn_next) {
1004 if (IPCL_IPTUN_MATCH(connp, *dst, *src))
1005 break;
1006 }
1007 if (connp != NULL)
1008 goto done;
1009
1010 mutex_exit(&connfp->connf_lock);
1011
1012 /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
1013 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
1014 INADDR_ANY)];
1015 mutex_enter(&connfp->connf_lock);
1016 for (connp = connfp->connf_head; connp != NULL;
1017 connp = connp->conn_next) {
1018 if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
1019 break;
1020 }
1021 done:
1022 if (connp != NULL)
1023 CONN_INC_REF(connp);
1024 mutex_exit(&connfp->connf_lock);
1025 return (connp);
1026 }
1027
1028 conn_t *
1029 ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
1030 {
1031 connf_t *connfp;
1032 conn_t *connp;
1033
1034 /* Look for an IPv6 tunnel link */
1035 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
1036 mutex_enter(&connfp->connf_lock);
1037 for (connp = connfp->connf_head; connp != NULL;
1038 connp = connp->conn_next) {
1039 if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
1040 CONN_INC_REF(connp);
1041 break;
1042 }
1043 }
1044 mutex_exit(&connfp->connf_lock);
1045 return (connp);
1046 }
1047
1048 /*
1049 * This function is used only for inserting SCTP raw socket now.
1050 * This may change later.
1051 *
1052 * Note that only one raw socket can be bound to a port. The param
1053 * lport is in network byte order.
1054 */
1055 static int
1056 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
1057 {
1058 connf_t *connfp;
1059 conn_t *oconnp;
1060 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1061
1062 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1063
1064 /* Check for existing raw socket already bound to the port. */
1065 mutex_enter(&connfp->connf_lock);
1066 for (oconnp = connfp->connf_head; oconnp != NULL;
1067 oconnp = oconnp->conn_next) {
1068 if (oconnp->conn_lport == lport &&
1069 oconnp->conn_zoneid == connp->conn_zoneid &&
1070 oconnp->conn_family == connp->conn_family &&
1071 ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1072 IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
1073 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
1074 IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
1075 IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
1076 &connp->conn_laddr_v6))) {
1077 break;
1078 }
1079 }
1080 mutex_exit(&connfp->connf_lock);
1081 if (oconnp != NULL)
1082 return (EADDRNOTAVAIL);
1083
1084 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
1085 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1086 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1087 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
1088 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1089 } else {
1090 IPCL_HASH_INSERT_BOUND(connfp, connp);
1091 }
1092 } else {
1093 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1094 }
1095 return (0);
1096 }
1097
1098 static int
1099 ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
1100 {
1101 connf_t *connfp;
1102 conn_t *tconnp;
1103 ipaddr_t laddr = connp->conn_laddr_v4;
1104 ipaddr_t faddr = connp->conn_faddr_v4;
1105
1106 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
1107 mutex_enter(&connfp->connf_lock);
1108 for (tconnp = connfp->connf_head; tconnp != NULL;
1109 tconnp = tconnp->conn_next) {
1110 if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
1111 /* A tunnel is already bound to these addresses. */
1112 mutex_exit(&connfp->connf_lock);
1113 return (EADDRINUSE);
1114 }
1115 }
1116 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1117 mutex_exit(&connfp->connf_lock);
1118 return (0);
1119 }
1120
1121 static int
1122 ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
1123 {
1124 connf_t *connfp;
1125 conn_t *tconnp;
1126 in6_addr_t *laddr = &connp->conn_laddr_v6;
1127 in6_addr_t *faddr = &connp->conn_faddr_v6;
1128
1129 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
1130 mutex_enter(&connfp->connf_lock);
1131 for (tconnp = connfp->connf_head; tconnp != NULL;
1132 tconnp = tconnp->conn_next) {
1133 if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
1134 /* A tunnel is already bound to these addresses. */
1135 mutex_exit(&connfp->connf_lock);
1136 return (EADDRINUSE);
1137 }
1138 }
1139 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1140 mutex_exit(&connfp->connf_lock);
1141 return (0);
1142 }
1143
1144 /*
1145 * Check for a MAC exemption conflict on a labeled system. Note that for
1146 * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
1147 * transport layer. This check is for binding all other protocols.
1148 *
1149 * Returns true if there's a conflict.
1150 */
1151 static boolean_t
1152 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
1153 {
1154 connf_t *connfp;
1155 conn_t *tconn;
1156
1157 connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
1158 mutex_enter(&connfp->connf_lock);
1159 for (tconn = connfp->connf_head; tconn != NULL;
1160 tconn = tconn->conn_next) {
1161 /* We don't allow v4 fallback for v6 raw socket */
1162 if (connp->conn_family != tconn->conn_family)
1163 continue;
1164 /* If neither is exempt, then there's no conflict */
1165 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1166 (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1167 continue;
1168 /* We are only concerned about sockets for a different zone */
1169 if (connp->conn_zoneid == tconn->conn_zoneid)
1170 continue;
1171 /* If both are bound to different specific addrs, ok */
1172 if (connp->conn_laddr_v4 != INADDR_ANY &&
1173 tconn->conn_laddr_v4 != INADDR_ANY &&
1174 connp->conn_laddr_v4 != tconn->conn_laddr_v4)
1175 continue;
1176 /* These two conflict; fail */
1177 break;
1178 }
1179 mutex_exit(&connfp->connf_lock);
1180 return (tconn != NULL);
1181 }
1182
1183 static boolean_t
1184 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
1185 {
1186 connf_t *connfp;
1187 conn_t *tconn;
1188
1189 connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
1190 mutex_enter(&connfp->connf_lock);
1191 for (tconn = connfp->connf_head; tconn != NULL;
1192 tconn = tconn->conn_next) {
1193 /* We don't allow v4 fallback for v6 raw socket */
1194 if (connp->conn_family != tconn->conn_family)
1195 continue;
1196 /* If neither is exempt, then there's no conflict */
1197 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1198 (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1199 continue;
1200 /* We are only concerned about sockets for a different zone */
1201 if (connp->conn_zoneid == tconn->conn_zoneid)
1202 continue;
1203 /* If both are bound to different addrs, ok */
1204 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
1205 !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
1206 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
1207 &tconn->conn_laddr_v6))
1208 continue;
1209 /* These two conflict; fail */
1210 break;
1211 }
1212 mutex_exit(&connfp->connf_lock);
1213 return (tconn != NULL);
1214 }
1215
1216 /*
1217 * (v4, v6) bind hash insertion routines
1218 * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
1219 */
1220
1221 int
1222 ipcl_bind_insert(conn_t *connp)
1223 {
1224 if (connp->conn_ipversion == IPV6_VERSION)
1225 return (ipcl_bind_insert_v6(connp));
1226 else
1227 return (ipcl_bind_insert_v4(connp));
1228 }
1229
1230 int
1231 ipcl_bind_insert_v4(conn_t *connp)
1232 {
1233 connf_t *connfp;
1234 int ret = 0;
1235 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1236 uint16_t lport = connp->conn_lport;
1237 uint8_t protocol = connp->conn_proto;
1238
1239 if (IPCL_IS_IPTUN(connp))
1240 return (ipcl_iptun_hash_insert(connp, ipst));
1241
1242 switch (protocol) {
1243 default:
1244 if (is_system_labeled() &&
1245 check_exempt_conflict_v4(connp, ipst))
1246 return (EADDRINUSE);
1247 /* FALLTHROUGH */
1248 case IPPROTO_UDP:
1249 if (protocol == IPPROTO_UDP) {
1250 connfp = &ipst->ips_ipcl_udp_fanout[
1251 IPCL_UDP_HASH(lport, ipst)];
1252 } else {
1253 connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1254 }
1255
1256 if (connp->conn_faddr_v4 != INADDR_ANY) {
1257 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1258 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1259 IPCL_HASH_INSERT_BOUND(connfp, connp);
1260 } else {
1261 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1262 }
1263 if (protocol == IPPROTO_RSVP)
1264 ill_set_inputfn_all(ipst);
1265 break;
1266
1267 case IPPROTO_TCP:
1268 /* Insert it in the Bind Hash */
1269 ASSERT(connp->conn_zoneid != ALL_ZONES);
1270 connfp = &ipst->ips_ipcl_bind_fanout[
1271 IPCL_BIND_HASH(lport, ipst)];
1272 if (connp->conn_laddr_v4 != INADDR_ANY) {
1273 IPCL_HASH_INSERT_BOUND(connfp, connp);
1274 } else {
1275 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1276 }
1277 if (cl_inet_listen != NULL) {
1278 ASSERT(connp->conn_ipversion == IPV4_VERSION);
1279 connp->conn_flags |= IPCL_CL_LISTENER;
1280 (*cl_inet_listen)(
1281 connp->conn_netstack->netstack_stackid,
1282 IPPROTO_TCP, AF_INET,
1283 (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
1284 }
1285 break;
1286
1287 case IPPROTO_SCTP:
1288 ret = ipcl_sctp_hash_insert(connp, lport);
1289 break;
1290
1291 case IPPROTO_DCCP:
1292 cmn_err(CE_NOTE, "ipcl_bind_insert_v4");
1293 ASSERT(connp->conn_zoneid != ALL_ZONES);
1294 connfp = &ipst->ips_ipcl_dccp_fanout[
1295 IPCL_DCCP_HASH(lport, ipst)];
1296 if (connp->conn_laddr_v4 != INADDR_ANY) {
1297 IPCL_HASH_INSERT_BOUND(connfp, connp);
1298 } else {
1299 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1300 }
1301 /* XXX:DCCP */
1302 break;
1303 }
1304
1305
1306 return (ret);
1307 }
1308
1309 int
1310 ipcl_bind_insert_v6(conn_t *connp)
1311 {
1312 connf_t *connfp;
1313 int ret = 0;
1314 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1315 uint16_t lport = connp->conn_lport;
1316 uint8_t protocol = connp->conn_proto;
1317
1318 if (IPCL_IS_IPTUN(connp)) {
1319 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1320 }
1321
1322 switch (protocol) {
1323 default:
1324 if (is_system_labeled() &&
1325 check_exempt_conflict_v6(connp, ipst))
1326 return (EADDRINUSE);
1327 /* FALLTHROUGH */
1328 case IPPROTO_UDP:
1329 if (protocol == IPPROTO_UDP) {
1330 connfp = &ipst->ips_ipcl_udp_fanout[
1331 IPCL_UDP_HASH(lport, ipst)];
1332 } else {
1333 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1334 }
1335
1336 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1337 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1338 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1339 IPCL_HASH_INSERT_BOUND(connfp, connp);
1340 } else {
1341 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1342 }
1343 break;
1344
1345 case IPPROTO_TCP:
1346 /* Insert it in the Bind Hash */
1347 ASSERT(connp->conn_zoneid != ALL_ZONES);
1348 connfp = &ipst->ips_ipcl_bind_fanout[
1349 IPCL_BIND_HASH(lport, ipst)];
1350 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1351 IPCL_HASH_INSERT_BOUND(connfp, connp);
1352 } else {
1353 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1354 }
1355 if (cl_inet_listen != NULL) {
1356 sa_family_t addr_family;
1357 uint8_t *laddrp;
1358
1359 if (connp->conn_ipversion == IPV6_VERSION) {
1360 addr_family = AF_INET6;
1361 laddrp =
1362 (uint8_t *)&connp->conn_bound_addr_v6;
1363 } else {
1364 addr_family = AF_INET;
1365 laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
1366 }
1367 connp->conn_flags |= IPCL_CL_LISTENER;
1368 (*cl_inet_listen)(
1369 connp->conn_netstack->netstack_stackid,
1370 IPPROTO_TCP, addr_family, laddrp, lport, NULL);
1371 }
1372 break;
1373
1374 case IPPROTO_SCTP:
1375 ret = ipcl_sctp_hash_insert(connp, lport);
1376 break;
1377
1378 case IPPROTO_DCCP:
1379 /* XXX:DCCP */
1380 break;
1381 }
1382
1383 return (ret);
1384 }
1385
1386 /*
1387 * ipcl_conn_hash insertion routines.
1388 * The caller has already set conn_proto and the addresses/ports in the conn_t.
1389 */
1390
1391 int
1392 ipcl_conn_insert(conn_t *connp)
1393 {
1394 if (connp->conn_ipversion == IPV6_VERSION)
1395 return (ipcl_conn_insert_v6(connp));
1396 else
1397 return (ipcl_conn_insert_v4(connp));
1398 }
1399
1400 int
1401 ipcl_conn_insert_v4(conn_t *connp)
1402 {
1403 connf_t *connfp;
1404 conn_t *tconnp;
1405 int ret = 0;
1406 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1407 uint16_t lport = connp->conn_lport;
1408 uint8_t protocol = connp->conn_proto;
1409
1410 if (IPCL_IS_IPTUN(connp))
1411 return (ipcl_iptun_hash_insert(connp, ipst));
1412
1413 switch (protocol) {
1414 case IPPROTO_TCP:
1415 /*
1416 * For TCP, we check whether the connection tuple already
1417 * exists before allowing the connection to proceed. We
1418 * also allow indexing on the zoneid. This is to allow
1419 * multiple shared stack zones to have the same tcp
1420 * connection tuple. In practice this only happens for
1421 * INADDR_LOOPBACK as it's the only local address which
1422 * doesn't have to be unique.
1423 */
1424 connfp = &ipst->ips_ipcl_conn_fanout[
1425 IPCL_CONN_HASH(connp->conn_faddr_v4,
1426 connp->conn_ports, ipst)];
1427 mutex_enter(&connfp->connf_lock);
1428 for (tconnp = connfp->connf_head; tconnp != NULL;
1429 tconnp = tconnp->conn_next) {
1430 if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
1431 connp->conn_faddr_v4, connp->conn_laddr_v4,
1432 connp->conn_ports) &&
1433 IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1434 /* Already have a conn. bail out */
1435 mutex_exit(&connfp->connf_lock);
1436 return (EADDRINUSE);
1437 }
1438 }
1439 if (connp->conn_fanout != NULL) {
1440 /*
1441 * Probably a XTI/TLI application trying to do a
1442 * rebind. Let it happen.
1443 */
1444 mutex_exit(&connfp->connf_lock);
1445 IPCL_HASH_REMOVE(connp);
1446 mutex_enter(&connfp->connf_lock);
1447 }
1448
1449 ASSERT(connp->conn_recv != NULL);
1450 ASSERT(connp->conn_recvicmp != NULL);
1451
1452 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1453 mutex_exit(&connfp->connf_lock);
1454 break;
1455
1456 case IPPROTO_SCTP:
1457 /*
1458 * The raw socket may have already been bound, remove it
1459 * from the hash first.
1460 */
1461 IPCL_HASH_REMOVE(connp);
1462 ret = ipcl_sctp_hash_insert(connp, lport);
1463 break;
1464
1465 case IPPROTO_DCCP:
1466 cmn_err(CE_NOTE, "insert v4");
1467
1468 connfp = &ipst->ips_ipcl_conn_fanout[
1469 IPCL_CONN_HASH(connp->conn_faddr_v4,
1470 connp->conn_ports, ipst)];
1471 mutex_enter(&connfp->connf_lock);
1472 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1473 mutex_exit(&connfp->connf_lock);
1474 /* XXX:DCCP */
1475 break;
1476
1477 default:
1478 /*
1479 * Check for conflicts among MAC exempt bindings. For
1480 * transports with port numbers, this is done by the upper
1481 * level per-transport binding logic. For all others, it's
1482 * done here.
1483 */
1484 if (is_system_labeled() &&
1485 check_exempt_conflict_v4(connp, ipst))
1486 return (EADDRINUSE);
1487 /* FALLTHROUGH */
1488
1489 case IPPROTO_UDP:
1490 if (protocol == IPPROTO_UDP) {
1491 connfp = &ipst->ips_ipcl_udp_fanout[
1492 IPCL_UDP_HASH(lport, ipst)];
1493 } else {
1494 connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1495 }
1496
1497 if (connp->conn_faddr_v4 != INADDR_ANY) {
1498 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1499 } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1500 IPCL_HASH_INSERT_BOUND(connfp, connp);
1501 } else {
1502 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1503 }
1504 break;
1505 }
1506
1507 return (ret);
1508 }
1509
1510 int
1511 ipcl_conn_insert_v6(conn_t *connp)
1512 {
1513 connf_t *connfp;
1514 conn_t *tconnp;
1515 int ret = 0;
1516 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1517 uint16_t lport = connp->conn_lport;
1518 uint8_t protocol = connp->conn_proto;
1519 uint_t ifindex = connp->conn_bound_if;
1520
1521 if (IPCL_IS_IPTUN(connp))
1522 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1523
1524 switch (protocol) {
1525 case IPPROTO_TCP:
1526
1527 /*
1528 * For tcp, we check whether the connection tuple already
1529 * exists before allowing the connection to proceed. We
1530 * also allow indexing on the zoneid. This is to allow
1531 * multiple shared stack zones to have the same tcp
1532 * connection tuple. In practice this only happens for
1533 * ipv6_loopback as it's the only local address which
1534 * doesn't have to be unique.
1535 */
1536 connfp = &ipst->ips_ipcl_conn_fanout[
1537 IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
1538 ipst)];
1539 mutex_enter(&connfp->connf_lock);
1540 for (tconnp = connfp->connf_head; tconnp != NULL;
1541 tconnp = tconnp->conn_next) {
1542 /* NOTE: need to match zoneid. Bug in onnv-gate */
1543 if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
1544 connp->conn_faddr_v6, connp->conn_laddr_v6,
1545 connp->conn_ports) &&
1546 (tconnp->conn_bound_if == 0 ||
1547 tconnp->conn_bound_if == ifindex) &&
1548 IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1549 /* Already have a conn. bail out */
1550 mutex_exit(&connfp->connf_lock);
1551 return (EADDRINUSE);
1552 }
1553 }
1554 if (connp->conn_fanout != NULL) {
1555 /*
1556 * Probably a XTI/TLI application trying to do a
1557 * rebind. Let it happen.
1558 */
1559 mutex_exit(&connfp->connf_lock);
1560 IPCL_HASH_REMOVE(connp);
1561 mutex_enter(&connfp->connf_lock);
1562 }
1563 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1564 mutex_exit(&connfp->connf_lock);
1565 break;
1566
1567 case IPPROTO_SCTP:
1568 IPCL_HASH_REMOVE(connp);
1569 ret = ipcl_sctp_hash_insert(connp, lport);
1570 break;
1571
1572 case IPPROTO_DCCP:
1573 /* XXX:DCCP */
1574 break;
1575
1576 default:
1577 if (is_system_labeled() &&
1578 check_exempt_conflict_v6(connp, ipst))
1579 return (EADDRINUSE);
1580 /* FALLTHROUGH */
1581 case IPPROTO_UDP:
1582 if (protocol == IPPROTO_UDP) {
1583 connfp = &ipst->ips_ipcl_udp_fanout[
1584 IPCL_UDP_HASH(lport, ipst)];
1585 } else {
1586 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1587 }
1588
1589 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1590 IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1591 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1592 IPCL_HASH_INSERT_BOUND(connfp, connp);
1593 } else {
1594 IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1595 }
1596 break;
1597 }
1598
1599 return (ret);
1600 }
1601
1602 /*
1603 * v4 packet classifying function. looks up the fanout table to
1604 * find the conn, the packet belongs to. returns the conn with
1605 * the reference held, null otherwise.
1606 *
1607 * If zoneid is ALL_ZONES, then the search rules described in the "Connection
1608 * Lookup" comment block are applied. Labels are also checked as described
1609 * above. If the packet is from the inside (looped back), and is from the same
1610 * zone, then label checks are omitted.
1611 */
1612 conn_t *
1613 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1614 ip_recv_attr_t *ira, ip_stack_t *ipst)
1615 {
1616 ipha_t *ipha;
1617 connf_t *connfp, *bind_connfp;
1618 uint16_t lport;
1619 uint16_t fport;
1620 uint32_t ports;
1621 conn_t *connp;
1622 uint16_t *up;
1623 zoneid_t zoneid = ira->ira_zoneid;
1624
1625 ipha = (ipha_t *)mp->b_rptr;
1626 up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
1627
1628 switch (protocol) {
1629 case IPPROTO_TCP:
1630 ports = *(uint32_t *)up;
1631 connfp =
1632 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1633 ports, ipst)];
1634 mutex_enter(&connfp->connf_lock);
1635 for (connp = connfp->connf_head; connp != NULL;
1636 connp = connp->conn_next) {
1637 if (IPCL_CONN_MATCH(connp, protocol,
1638 ipha->ipha_src, ipha->ipha_dst, ports) &&
1639 (connp->conn_zoneid == zoneid ||
1640 connp->conn_allzones ||
1641 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1642 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1643 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1644 break;
1645 }
1646
1647 if (connp != NULL) {
1648 /*
1649 * We have a fully-bound TCP connection.
1650 *
1651 * For labeled systems, there's no need to check the
1652 * label here. It's known to be good as we checked
1653 * before allowing the connection to become bound.
1654 */
1655 CONN_INC_REF(connp);
1656 mutex_exit(&connfp->connf_lock);
1657 return (connp);
1658 }
1659
1660 mutex_exit(&connfp->connf_lock);
1661 lport = up[1];
1662 bind_connfp =
1663 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1664 mutex_enter(&bind_connfp->connf_lock);
1665 for (connp = bind_connfp->connf_head; connp != NULL;
1666 connp = connp->conn_next) {
1667 if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1668 lport) &&
1669 (connp->conn_zoneid == zoneid ||
1670 connp->conn_allzones ||
1671 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1672 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1673 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1674 break;
1675 }
1676
1677 /*
1678 * If the matching connection is SLP on a private address, then
1679 * the label on the packet must match the local zone's label.
1680 * Otherwise, it must be in the label range defined by tnrh.
1681 * This is ensured by tsol_receive_local.
1682 *
1683 * Note that we don't check tsol_receive_local for
1684 * the connected case.
1685 */
1686 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1687 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1688 ira, connp)) {
1689 DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
1690 char *, "connp(1) could not receive mp(2)",
1691 conn_t *, connp, mblk_t *, mp);
1692 connp = NULL;
1693 }
1694
1695 if (connp != NULL) {
1696 /* Have a listener at least */
1697 CONN_INC_REF(connp);
1698 mutex_exit(&bind_connfp->connf_lock);
1699 return (connp);
1700 }
1701
1702 mutex_exit(&bind_connfp->connf_lock);
1703 break;
1704
1705 case IPPROTO_UDP:
1706 lport = up[1];
1707 fport = up[0];
1708 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1709 mutex_enter(&connfp->connf_lock);
1710 for (connp = connfp->connf_head; connp != NULL;
1711 connp = connp->conn_next) {
1712 if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
1713 fport, ipha->ipha_src) &&
1714 (connp->conn_zoneid == zoneid ||
1715 connp->conn_allzones ||
1716 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1717 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
1718 break;
1719 }
1720
1721 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1722 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1723 ira, connp)) {
1724 DTRACE_PROBE3(tx__ip__log__info__classify__udp,
1725 char *, "connp(1) could not receive mp(2)",
1726 conn_t *, connp, mblk_t *, mp);
1727 connp = NULL;
1728 }
1729
1730 if (connp != NULL) {
1731 CONN_INC_REF(connp);
1732 mutex_exit(&connfp->connf_lock);
1733 return (connp);
1734 }
1735
1736 /*
1737 * We shouldn't come here for multicast/broadcast packets
1738 */
1739 mutex_exit(&connfp->connf_lock);
1740
1741 break;
1742
1743 case IPPROTO_DCCP:
1744 fport = up[0];
1745 lport = up[1];
1746 connfp = &ipst->ips_ipcl_dccp_fanout[IPCL_DCCP_HASH(
1747 lport, ipst)];
1748 mutex_enter(&connfp->connf_lock);
1749 for (connp = connfp->connf_head; connp != NULL;
1750 connp = connp->conn_next) {
1751 cmn_err(CE_NOTE, "connfp found");
1752 /* XXX:DCCP */
1753 if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
1754 fport, ipha->ipha_src)) {
1755 break;
1756 }
1757 }
1758
1759 if (connp != NULL) {
1760 CONN_INC_REF(connp);
1761 mutex_exit(&connfp->connf_lock);
1762 return (connp);
1763 }
1764
1765 mutex_exit(&connfp->connf_lock);
1766 break;
1767
1768 case IPPROTO_ENCAP:
1769 case IPPROTO_IPV6:
1770 return (ipcl_iptun_classify_v4(&ipha->ipha_src,
1771 &ipha->ipha_dst, ipst));
1772 }
1773
1774 return (NULL);
1775 }
1776
1777 conn_t *
1778 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1779 ip_recv_attr_t *ira, ip_stack_t *ipst)
1780 {
1781 ip6_t *ip6h;
1782 connf_t *connfp, *bind_connfp;
1783 uint16_t lport;
1784 uint16_t fport;
1785 tcpha_t *tcpha;
1786 uint32_t ports;
1787 conn_t *connp;
1788 uint16_t *up;
1789 zoneid_t zoneid = ira->ira_zoneid;
1790
1791 ip6h = (ip6_t *)mp->b_rptr;
1792
1793 switch (protocol) {
1794 case IPPROTO_TCP:
1795 tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
1796 up = &tcpha->tha_lport;
1797 ports = *(uint32_t *)up;
1798
1799 connfp =
1800 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1801 ports, ipst)];
1802 mutex_enter(&connfp->connf_lock);
1803 for (connp = connfp->connf_head; connp != NULL;
1804 connp = connp->conn_next) {
1805 if (IPCL_CONN_MATCH_V6(connp, protocol,
1806 ip6h->ip6_src, ip6h->ip6_dst, ports) &&
1807 (connp->conn_zoneid == zoneid ||
1808 connp->conn_allzones ||
1809 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1810 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1811 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1812 break;
1813 }
1814
1815 if (connp != NULL) {
1816 /*
1817 * We have a fully-bound TCP connection.
1818 *
1819 * For labeled systems, there's no need to check the
1820 * label here. It's known to be good as we checked
1821 * before allowing the connection to become bound.
1822 */
1823 CONN_INC_REF(connp);
1824 mutex_exit(&connfp->connf_lock);
1825 return (connp);
1826 }
1827
1828 mutex_exit(&connfp->connf_lock);
1829
1830 lport = up[1];
1831 bind_connfp =
1832 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1833 mutex_enter(&bind_connfp->connf_lock);
1834 for (connp = bind_connfp->connf_head; connp != NULL;
1835 connp = connp->conn_next) {
1836 if (IPCL_BIND_MATCH_V6(connp, protocol,
1837 ip6h->ip6_dst, lport) &&
1838 (connp->conn_zoneid == zoneid ||
1839 connp->conn_allzones ||
1840 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1841 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1842 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1843 break;
1844 }
1845
1846 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1847 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1848 ira, connp)) {
1849 DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
1850 char *, "connp(1) could not receive mp(2)",
1851 conn_t *, connp, mblk_t *, mp);
1852 connp = NULL;
1853 }
1854
1855 if (connp != NULL) {
1856 /* Have a listner at least */
1857 CONN_INC_REF(connp);
1858 mutex_exit(&bind_connfp->connf_lock);
1859 return (connp);
1860 }
1861
1862 mutex_exit(&bind_connfp->connf_lock);
1863 break;
1864
1865 case IPPROTO_UDP:
1866 up = (uint16_t *)&mp->b_rptr[hdr_len];
1867 lport = up[1];
1868 fport = up[0];
1869 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1870 mutex_enter(&connfp->connf_lock);
1871 for (connp = connfp->connf_head; connp != NULL;
1872 connp = connp->conn_next) {
1873 if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
1874 fport, ip6h->ip6_src) &&
1875 (connp->conn_zoneid == zoneid ||
1876 connp->conn_allzones ||
1877 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1878 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1879 (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1880 break;
1881 }
1882
1883 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1884 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1885 ira, connp)) {
1886 DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
1887 char *, "connp(1) could not receive mp(2)",
1888 conn_t *, connp, mblk_t *, mp);
1889 connp = NULL;
1890 }
1891
1892 if (connp != NULL) {
1893 CONN_INC_REF(connp);
1894 mutex_exit(&connfp->connf_lock);
1895 return (connp);
1896 }
1897
1898 /*
1899 * We shouldn't come here for multicast/broadcast packets
1900 */
1901 mutex_exit(&connfp->connf_lock);
1902 break;
1903 case IPPROTO_ENCAP:
1904 case IPPROTO_IPV6:
1905 return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
1906 &ip6h->ip6_dst, ipst));
1907 }
1908
1909 return (NULL);
1910 }
1911
1912 /*
1913 * wrapper around ipcl_classify_(v4,v6) routines.
1914 */
1915 conn_t *
1916 ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
1917 {
1918 if (ira->ira_flags & IRAF_IS_IPV4) {
1919 return (ipcl_classify_v4(mp, ira->ira_protocol,
1920 ira->ira_ip_hdr_length, ira, ipst));
1921 } else {
1922 return (ipcl_classify_v6(mp, ira->ira_protocol,
1923 ira->ira_ip_hdr_length, ira, ipst));
1924 }
1925 }
1926
1927 /*
1928 * Only used to classify SCTP RAW sockets
1929 */
1930 conn_t *
1931 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
1932 ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
1933 {
1934 connf_t *connfp;
1935 conn_t *connp;
1936 in_port_t lport;
1937 int ipversion;
1938 const void *dst;
1939 zoneid_t zoneid = ira->ira_zoneid;
1940
1941 lport = ((uint16_t *)&ports)[1];
1942 if (ira->ira_flags & IRAF_IS_IPV4) {
1943 dst = (const void *)&ipha->ipha_dst;
1944 ipversion = IPV4_VERSION;
1945 } else {
1946 dst = (const void *)&ip6h->ip6_dst;
1947 ipversion = IPV6_VERSION;
1948 }
1949
1950 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1951 mutex_enter(&connfp->connf_lock);
1952 for (connp = connfp->connf_head; connp != NULL;
1953 connp = connp->conn_next) {
1954 /* We don't allow v4 fallback for v6 raw socket. */
1955 if (ipversion != connp->conn_ipversion)
1956 continue;
1957 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1958 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1959 if (ipversion == IPV4_VERSION) {
1960 if (!IPCL_CONN_MATCH(connp, protocol,
1961 ipha->ipha_src, ipha->ipha_dst, ports))
1962 continue;
1963 } else {
1964 if (!IPCL_CONN_MATCH_V6(connp, protocol,
1965 ip6h->ip6_src, ip6h->ip6_dst, ports))
1966 continue;
1967 }
1968 } else {
1969 if (ipversion == IPV4_VERSION) {
1970 if (!IPCL_BIND_MATCH(connp, protocol,
1971 ipha->ipha_dst, lport))
1972 continue;
1973 } else {
1974 if (!IPCL_BIND_MATCH_V6(connp, protocol,
1975 ip6h->ip6_dst, lport))
1976 continue;
1977 }
1978 }
1979
1980 if (connp->conn_zoneid == zoneid ||
1981 connp->conn_allzones ||
1982 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1983 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1984 (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
1985 break;
1986 }
1987
1988 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1989 !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
1990 DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
1991 char *, "connp(1) could not receive mp(2)",
1992 conn_t *, connp, mblk_t *, mp);
1993 connp = NULL;
1994 }
1995
1996 if (connp != NULL)
1997 goto found;
1998 mutex_exit(&connfp->connf_lock);
1999
2000 /* Try to look for a wildcard SCTP RAW socket match. */
2001 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
2002 mutex_enter(&connfp->connf_lock);
2003 for (connp = connfp->connf_head; connp != NULL;
2004 connp = connp->conn_next) {
2005 /* We don't allow v4 fallback for v6 raw socket. */
2006 if (ipversion != connp->conn_ipversion)
2007 continue;
2008 if (!IPCL_ZONE_MATCH(connp, zoneid))
2009 continue;
2010
2011 if (ipversion == IPV4_VERSION) {
2012 if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
2013 break;
2014 } else {
2015 if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
2016 break;
2017 }
2018 }
2019 }
2020
2021 if (connp != NULL)
2022 goto found;
2023
2024 mutex_exit(&connfp->connf_lock);
2025 return (NULL);
2026
2027 found:
2028 ASSERT(connp != NULL);
2029 CONN_INC_REF(connp);
2030 mutex_exit(&connfp->connf_lock);
2031 return (connp);
2032 }
2033
2034 /* ARGSUSED */
2035 static int
2036 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2037 {
2038 itc_t *itc = (itc_t *)buf;
2039 conn_t *connp = &itc->itc_conn;
2040 tcp_t *tcp = (tcp_t *)&itc[1];
2041
2042 bzero(connp, sizeof (conn_t));
2043 bzero(tcp, sizeof (tcp_t));
2044
2045 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2046 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2047 cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
2048 tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
2049 if (tcp->tcp_timercache == NULL)
2050 return (ENOMEM);
2051 connp->conn_tcp = tcp;
2052 connp->conn_flags = IPCL_TCPCONN;
2053 connp->conn_proto = IPPROTO_TCP;
2054 tcp->tcp_connp = connp;
2055 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2056
2057 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2058 if (connp->conn_ixa == NULL) {
2059 tcp_timermp_free(tcp);
2060 return (ENOMEM);
2061 }
2062 connp->conn_ixa->ixa_refcnt = 1;
2063 connp->conn_ixa->ixa_protocol = connp->conn_proto;
2064 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2065 return (0);
2066 }
2067
2068 /* ARGSUSED */
2069 static void
2070 tcp_conn_destructor(void *buf, void *cdrarg)
2071 {
2072 itc_t *itc = (itc_t *)buf;
2073 conn_t *connp = &itc->itc_conn;
2074 tcp_t *tcp = (tcp_t *)&itc[1];
2075
2076 ASSERT(connp->conn_flags & IPCL_TCPCONN);
2077 ASSERT(tcp->tcp_connp == connp);
2078 ASSERT(connp->conn_tcp == tcp);
2079 tcp_timermp_free(tcp);
2080 mutex_destroy(&connp->conn_lock);
2081 cv_destroy(&connp->conn_cv);
2082 cv_destroy(&connp->conn_sq_cv);
2083 rw_destroy(&connp->conn_ilg_lock);
2084
2085 /* Can be NULL if constructor failed */
2086 if (connp->conn_ixa != NULL) {
2087 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2088 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2089 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2090 ixa_refrele(connp->conn_ixa);
2091 }
2092 }
2093
2094 /* ARGSUSED */
2095 static int
2096 ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2097 {
2098 itc_t *itc = (itc_t *)buf;
2099 conn_t *connp = &itc->itc_conn;
2100
2101 bzero(connp, sizeof (conn_t));
2102 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2103 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2104 connp->conn_flags = IPCL_IPCCONN;
2105 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2106
2107 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2108 if (connp->conn_ixa == NULL)
2109 return (ENOMEM);
2110 connp->conn_ixa->ixa_refcnt = 1;
2111 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2112 return (0);
2113 }
2114
2115 /* ARGSUSED */
2116 static void
2117 ip_conn_destructor(void *buf, void *cdrarg)
2118 {
2119 itc_t *itc = (itc_t *)buf;
2120 conn_t *connp = &itc->itc_conn;
2121
2122 ASSERT(connp->conn_flags & IPCL_IPCCONN);
2123 ASSERT(connp->conn_priv == NULL);
2124 mutex_destroy(&connp->conn_lock);
2125 cv_destroy(&connp->conn_cv);
2126 rw_destroy(&connp->conn_ilg_lock);
2127
2128 /* Can be NULL if constructor failed */
2129 if (connp->conn_ixa != NULL) {
2130 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2131 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2132 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2133 ixa_refrele(connp->conn_ixa);
2134 }
2135 }
2136
2137 /* ARGSUSED */
2138 static int
2139 udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2140 {
2141 itc_t *itc = (itc_t *)buf;
2142 conn_t *connp = &itc->itc_conn;
2143 udp_t *udp = (udp_t *)&itc[1];
2144
2145 bzero(connp, sizeof (conn_t));
2146 bzero(udp, sizeof (udp_t));
2147
2148 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2149 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2150 connp->conn_udp = udp;
2151 connp->conn_flags = IPCL_UDPCONN;
2152 connp->conn_proto = IPPROTO_UDP;
2153 udp->udp_connp = connp;
2154 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2155 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2156 if (connp->conn_ixa == NULL)
2157 return (ENOMEM);
2158 connp->conn_ixa->ixa_refcnt = 1;
2159 connp->conn_ixa->ixa_protocol = connp->conn_proto;
2160 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2161 return (0);
2162 }
2163
2164 /* ARGSUSED */
2165 static void
2166 udp_conn_destructor(void *buf, void *cdrarg)
2167 {
2168 itc_t *itc = (itc_t *)buf;
2169 conn_t *connp = &itc->itc_conn;
2170 udp_t *udp = (udp_t *)&itc[1];
2171
2172 ASSERT(connp->conn_flags & IPCL_UDPCONN);
2173 ASSERT(udp->udp_connp == connp);
2174 ASSERT(connp->conn_udp == udp);
2175 mutex_destroy(&connp->conn_lock);
2176 cv_destroy(&connp->conn_cv);
2177 rw_destroy(&connp->conn_ilg_lock);
2178
2179 /* Can be NULL if constructor failed */
2180 if (connp->conn_ixa != NULL) {
2181 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2182 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2183 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2184 ixa_refrele(connp->conn_ixa);
2185 }
2186 }
2187
2188 /* ARGSUSED */
2189 static int
2190 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2191 {
2192 itc_t *itc = (itc_t *)buf;
2193 conn_t *connp = &itc->itc_conn;
2194 icmp_t *icmp = (icmp_t *)&itc[1];
2195
2196 bzero(connp, sizeof (conn_t));
2197 bzero(icmp, sizeof (icmp_t));
2198
2199 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2200 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2201 connp->conn_icmp = icmp;
2202 connp->conn_flags = IPCL_RAWIPCONN;
2203 connp->conn_proto = IPPROTO_ICMP;
2204 icmp->icmp_connp = connp;
2205 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2206 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2207 if (connp->conn_ixa == NULL)
2208 return (ENOMEM);
2209 connp->conn_ixa->ixa_refcnt = 1;
2210 connp->conn_ixa->ixa_protocol = connp->conn_proto;
2211 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2212 return (0);
2213 }
2214
2215 /* ARGSUSED */
2216 static void
2217 rawip_conn_destructor(void *buf, void *cdrarg)
2218 {
2219 itc_t *itc = (itc_t *)buf;
2220 conn_t *connp = &itc->itc_conn;
2221 icmp_t *icmp = (icmp_t *)&itc[1];
2222
2223 ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2224 ASSERT(icmp->icmp_connp == connp);
2225 ASSERT(connp->conn_icmp == icmp);
2226 mutex_destroy(&connp->conn_lock);
2227 cv_destroy(&connp->conn_cv);
2228 rw_destroy(&connp->conn_ilg_lock);
2229
2230 /* Can be NULL if constructor failed */
2231 if (connp->conn_ixa != NULL) {
2232 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2233 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2234 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2235 ixa_refrele(connp->conn_ixa);
2236 }
2237 }
2238
2239 /* ARGSUSED */
2240 static int
2241 rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
2242 {
2243 itc_t *itc = (itc_t *)buf;
2244 conn_t *connp = &itc->itc_conn;
2245 rts_t *rts = (rts_t *)&itc[1];
2246
2247 bzero(connp, sizeof (conn_t));
2248 bzero(rts, sizeof (rts_t));
2249
2250 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2251 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2252 connp->conn_rts = rts;
2253 connp->conn_flags = IPCL_RTSCONN;
2254 rts->rts_connp = connp;
2255 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2256 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2257 if (connp->conn_ixa == NULL)
2258 return (ENOMEM);
2259 connp->conn_ixa->ixa_refcnt = 1;
2260 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2261 return (0);
2262 }
2263
2264 /* ARGSUSED */
2265 static void
2266 rts_conn_destructor(void *buf, void *cdrarg)
2267 {
2268 itc_t *itc = (itc_t *)buf;
2269 conn_t *connp = &itc->itc_conn;
2270 rts_t *rts = (rts_t *)&itc[1];
2271
2272 ASSERT(connp->conn_flags & IPCL_RTSCONN);
2273 ASSERT(rts->rts_connp == connp);
2274 ASSERT(connp->conn_rts == rts);
2275 mutex_destroy(&connp->conn_lock);
2276 cv_destroy(&connp->conn_cv);
2277 rw_destroy(&connp->conn_ilg_lock);
2278
2279 /* Can be NULL if constructor failed */
2280 if (connp->conn_ixa != NULL) {
2281 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2282 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2283 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2284 ixa_refrele(connp->conn_ixa);
2285 }
2286 }
2287
2288 /* ARGSUSED */
2289 static int
2290 dccp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2291 {
2292 itc_t *itc = (itc_t *)buf;
2293 conn_t *connp = &itc->itc_conn;
2294 dccp_t *dccp = (dccp_t *)&itc[1];
2295
2296 bzero(connp, sizeof (conn_t));
2297 bzero(dccp, sizeof (dccp_t));
2298
2299 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2300 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2301 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2302
2303 connp->conn_dccp = dccp;
2304 connp->conn_flags = IPCL_DCCPCONN;
2305 connp->conn_proto = IPPROTO_DCCP;
2306 dccp->dccp_connp = connp;
2307 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2308 if (connp->conn_ixa == NULL)
2309 return (NULL);
2310 connp->conn_ixa->ixa_refcnt = 1;
2311 connp->conn_ixa->ixa_protocol = connp->conn_proto;
2312 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2313
2314 return (0);
2315 }
2316
2317 /* ARGSUSED */
2318 static void
2319 dccp_conn_destructor(void *buf, void *cdrarg)
2320 {
2321 itc_t *itc = (itc_t *)buf;
2322 conn_t *connp = &itc->itc_conn;
2323 dccp_t *dccp = (dccp_t *)&itc[1];
2324
2325 ASSERT(connp->conn_flags & IPCL_DCCPCONN);
2326 ASSERT(dccp->dccp_connp == connp);
2327 ASSERT(connp->conn_dccp == dccp);
2328
2329 mutex_destroy(&connp->conn_lock);
2330 cv_destroy(&connp->conn_cv);
2331 rw_destroy(&connp->conn_ilg_lock);
2332
2333 if (connp->conn_ixa != NULL) {
2334 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2335 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2336 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2337
2338 ixa_refrele(connp->conn_ixa);
2339 }
2340 }
2341
2342 /*
2343 * Called as part of ipcl_conn_destroy to assert and clear any pointers
2344 * in the conn_t.
2345 *
2346 * Below we list all the pointers in the conn_t as a documentation aid.
2347 * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
2348 * If you add any pointers to the conn_t please add an ASSERT here
2349 * and #ifdef it out if it can't be actually asserted to be NULL.
2350 * In any case, we bzero most of the conn_t at the end of the function.
2351 */
2352 void
2353 ipcl_conn_cleanup(conn_t *connp)
2354 {
2355 ip_xmit_attr_t *ixa;
2356
2357 ASSERT(connp->conn_latch == NULL);
2358 ASSERT(connp->conn_latch_in_policy == NULL);
2359 ASSERT(connp->conn_latch_in_action == NULL);
2360 #ifdef notdef
2361 ASSERT(connp->conn_rq == NULL);
2362 ASSERT(connp->conn_wq == NULL);
2363 #endif
2364 ASSERT(connp->conn_cred == NULL);
2365 ASSERT(connp->conn_g_fanout == NULL);
2366 ASSERT(connp->conn_g_next == NULL);
2367 ASSERT(connp->conn_g_prev == NULL);
2368 ASSERT(connp->conn_policy == NULL);
2369 ASSERT(connp->conn_fanout == NULL);
2370 ASSERT(connp->conn_next == NULL);
2371 ASSERT(connp->conn_prev == NULL);
2372 ASSERT(connp->conn_oper_pending_ill == NULL);
2373 ASSERT(connp->conn_ilg == NULL);
2374 ASSERT(connp->conn_drain_next == NULL);
2375 ASSERT(connp->conn_drain_prev == NULL);
2376 #ifdef notdef
2377 /* conn_idl is not cleared when removed from idl list */
2378 ASSERT(connp->conn_idl == NULL);
2379 #endif
2380 ASSERT(connp->conn_ipsec_opt_mp == NULL);
2381 #ifdef notdef
2382 /* conn_netstack is cleared by the caller; needed by ixa_cleanup */
2383 ASSERT(connp->conn_netstack == NULL);
2384 #endif
2385
2386 ASSERT(connp->conn_helper_info == NULL);
2387 ASSERT(connp->conn_ixa != NULL);
2388 ixa = connp->conn_ixa;
2389 ASSERT(ixa->ixa_refcnt == 1);
2390 /* Need to preserve ixa_protocol */
2391 ixa_cleanup(ixa);
2392 ixa->ixa_flags = 0;
2393
2394 /* Clear out the conn_t fields that are not preserved */
2395 bzero(&connp->conn_start_clr,
2396 sizeof (conn_t) -
2397 ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
2398 }
2399
2400 /*
2401 * All conns are inserted in a global multi-list for the benefit of
2402 * walkers. The walk is guaranteed to walk all open conns at the time
2403 * of the start of the walk exactly once. This property is needed to
2404 * achieve some cleanups during unplumb of interfaces. This is achieved
2405 * as follows.
2406 *
2407 * ipcl_conn_create and ipcl_conn_destroy are the only functions that
2408 * call the insert and delete functions below at creation and deletion
2409 * time respectively. The conn never moves or changes its position in this
2410 * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
2411 * won't increase due to walkers, once the conn deletion has started. Note
2412 * that we can't remove the conn from the global list and then wait for
2413 * the refcnt to drop to zero, since walkers would then see a truncated
2414 * list. CONN_INCIPIENT ensures that walkers don't start looking at
2415 * conns until ip_open is ready to make them globally visible.
2416 * The global round robin multi-list locks are held only to get the
2417 * next member/insertion/deletion and contention should be negligible
2418 * if the multi-list is much greater than the number of cpus.
2419 */
2420 void
2421 ipcl_globalhash_insert(conn_t *connp)
2422 {
2423 int index;
2424 struct connf_s *connfp;
2425 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
2426
2427 /*
2428 * No need for atomic here. Approximate even distribution
2429 * in the global lists is sufficient.
2430 */
2431 ipst->ips_conn_g_index++;
2432 index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
2433
2434 connp->conn_g_prev = NULL;
2435 /*
2436 * Mark as INCIPIENT, so that walkers will ignore this
2437 * for now, till ip_open is ready to make it visible globally.
2438 */
2439 connp->conn_state_flags |= CONN_INCIPIENT;
2440
2441 connfp = &ipst->ips_ipcl_globalhash_fanout[index];
2442 /* Insert at the head of the list */
2443 mutex_enter(&connfp->connf_lock);
2444 connp->conn_g_next = connfp->connf_head;
2445 if (connp->conn_g_next != NULL)
2446 connp->conn_g_next->conn_g_prev = connp;
2447 connfp->connf_head = connp;
2448
2449 /* The fanout bucket this conn points to */
2450 connp->conn_g_fanout = connfp;
2451
2452 mutex_exit(&connfp->connf_lock);
2453 }
2454
2455 void
2456 ipcl_globalhash_remove(conn_t *connp)
2457 {
2458 struct connf_s *connfp;
2459
2460 /*
2461 * We were never inserted in the global multi list.
2462 * IPCL_NONE variety is never inserted in the global multilist
2463 * since it is presumed to not need any cleanup and is transient.
2464 */
2465 if (connp->conn_g_fanout == NULL)
2466 return;
2467
2468 connfp = connp->conn_g_fanout;
2469 mutex_enter(&connfp->connf_lock);
2470 if (connp->conn_g_prev != NULL)
2471 connp->conn_g_prev->conn_g_next = connp->conn_g_next;
2472 else
2473 connfp->connf_head = connp->conn_g_next;
2474 if (connp->conn_g_next != NULL)
2475 connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2476 mutex_exit(&connfp->connf_lock);
2477
2478 /* Better to stumble on a null pointer than to corrupt memory */
2479 connp->conn_g_next = NULL;
2480 connp->conn_g_prev = NULL;
2481 connp->conn_g_fanout = NULL;
2482 }
2483
2484 /*
2485 * Walk the list of all conn_t's in the system, calling the function provided
2486 * With the specified argument for each.
2487 * Applies to both IPv4 and IPv6.
2488 *
2489 * CONNs may hold pointers to ills (conn_dhcpinit_ill and
2490 * conn_oper_pending_ill). To guard against stale pointers
2491 * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
2492 * unplumbed or removed. New conn_t's that are created while we are walking
2493 * may be missed by this walk, because they are not necessarily inserted
2494 * at the tail of the list. They are new conn_t's and thus don't have any
2495 * stale pointers. The CONN_CLOSING flag ensures that no new reference
2496 * is created to the struct that is going away.
2497 */
2498 void
2499 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
2500 {
2501 int i;
2502 conn_t *connp;
2503 conn_t *prev_connp;
2504
2505 for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2506 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2507 prev_connp = NULL;
2508 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
2509 while (connp != NULL) {
2510 mutex_enter(&connp->conn_lock);
2511 if (connp->conn_state_flags &
2512 (CONN_CONDEMNED | CONN_INCIPIENT)) {
2513 mutex_exit(&connp->conn_lock);
2514 connp = connp->conn_g_next;
2515 continue;
2516 }
2517 CONN_INC_REF_LOCKED(connp);
2518 mutex_exit(&connp->conn_lock);
2519 mutex_exit(
2520 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2521 (*func)(connp, arg);
2522 if (prev_connp != NULL)
2523 CONN_DEC_REF(prev_connp);
2524 mutex_enter(
2525 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2526 prev_connp = connp;
2527 connp = connp->conn_g_next;
2528 }
2529 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2530 if (prev_connp != NULL)
2531 CONN_DEC_REF(prev_connp);
2532 }
2533 }
2534
2535 /*
2536 * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
2537 * the {src, dst, lport, fport} quadruplet. Returns with conn reference
2538 * held; caller must call CONN_DEC_REF. Only checks for connected entries
2539 * (peer tcp in ESTABLISHED state).
2540 */
2541 conn_t *
2542 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
2543 ip_stack_t *ipst)
2544 {
2545 uint32_t ports;
2546 uint16_t *pports = (uint16_t *)&ports;
2547 connf_t *connfp;
2548 conn_t *tconnp;
2549 boolean_t zone_chk;
2550
2551 /*
2552 * If either the source of destination address is loopback, then
2553 * both endpoints must be in the same Zone. Otherwise, both of
2554 * the addresses are system-wide unique (tcp is in ESTABLISHED
2555 * state) and the endpoints may reside in different Zones.
2556 */
2557 zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
2558 ipha->ipha_dst == htonl(INADDR_LOOPBACK));
2559
2560 pports[0] = tcpha->tha_fport;
2561 pports[1] = tcpha->tha_lport;
2562
2563 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2564 ports, ipst)];
2565
2566 mutex_enter(&connfp->connf_lock);
2567 for (tconnp = connfp->connf_head; tconnp != NULL;
2568 tconnp = tconnp->conn_next) {
2569
2570 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2571 ipha->ipha_dst, ipha->ipha_src, ports) &&
2572 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2573 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2574
2575 ASSERT(tconnp != connp);
2576 CONN_INC_REF(tconnp);
2577 mutex_exit(&connfp->connf_lock);
2578 return (tconnp);
2579 }
2580 }
2581 mutex_exit(&connfp->connf_lock);
2582 return (NULL);
2583 }
2584
2585 /*
2586 * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
2587 * the {src, dst, lport, fport} quadruplet. Returns with conn reference
2588 * held; caller must call CONN_DEC_REF. Only checks for connected entries
2589 * (peer tcp in ESTABLISHED state).
2590 */
2591 conn_t *
2592 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
2593 ip_stack_t *ipst)
2594 {
2595 uint32_t ports;
2596 uint16_t *pports = (uint16_t *)&ports;
2597 connf_t *connfp;
2598 conn_t *tconnp;
2599 boolean_t zone_chk;
2600
2601 /*
2602 * If either the source of destination address is loopback, then
2603 * both endpoints must be in the same Zone. Otherwise, both of
2604 * the addresses are system-wide unique (tcp is in ESTABLISHED
2605 * state) and the endpoints may reside in different Zones. We
2606 * don't do Zone check for link local address(es) because the
2607 * current Zone implementation treats each link local address as
2608 * being unique per system node, i.e. they belong to global Zone.
2609 */
2610 zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
2611 IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
2612
2613 pports[0] = tcpha->tha_fport;
2614 pports[1] = tcpha->tha_lport;
2615
2616 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2617 ports, ipst)];
2618
2619 mutex_enter(&connfp->connf_lock);
2620 for (tconnp = connfp->connf_head; tconnp != NULL;
2621 tconnp = tconnp->conn_next) {
2622
2623 /* We skip conn_bound_if check here as this is loopback tcp */
2624 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2625 ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2626 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2627 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2628
2629 ASSERT(tconnp != connp);
2630 CONN_INC_REF(tconnp);
2631 mutex_exit(&connfp->connf_lock);
2632 return (tconnp);
2633 }
2634 }
2635 mutex_exit(&connfp->connf_lock);
2636 return (NULL);
2637 }
2638
2639 /*
2640 * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2641 * Returns with conn reference held. Caller must call CONN_DEC_REF.
2642 * Only checks for connected entries i.e. no INADDR_ANY checks.
2643 */
2644 conn_t *
2645 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
2646 ip_stack_t *ipst)
2647 {
2648 uint32_t ports;
2649 uint16_t *pports;
2650 connf_t *connfp;
2651 conn_t *tconnp;
2652
2653 pports = (uint16_t *)&ports;
2654 pports[0] = tcpha->tha_fport;
2655 pports[1] = tcpha->tha_lport;
2656
2657 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2658 ports, ipst)];
2659
2660 mutex_enter(&connfp->connf_lock);
2661 for (tconnp = connfp->connf_head; tconnp != NULL;
2662 tconnp = tconnp->conn_next) {
2663
2664 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2665 ipha->ipha_dst, ipha->ipha_src, ports) &&
2666 tconnp->conn_tcp->tcp_state >= min_state) {
2667
2668 CONN_INC_REF(tconnp);
2669 mutex_exit(&connfp->connf_lock);
2670 return (tconnp);
2671 }
2672 }
2673 mutex_exit(&connfp->connf_lock);
2674 return (NULL);
2675 }
2676
2677 /*
2678 * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2679 * Returns with conn reference held. Caller must call CONN_DEC_REF.
2680 * Only checks for connected entries i.e. no INADDR_ANY checks.
2681 * Match on ifindex in addition to addresses.
2682 */
2683 conn_t *
2684 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2685 uint_t ifindex, ip_stack_t *ipst)
2686 {
2687 tcp_t *tcp;
2688 uint32_t ports;
2689 uint16_t *pports;
2690 connf_t *connfp;
2691 conn_t *tconnp;
2692
2693 pports = (uint16_t *)&ports;
2694 pports[0] = tcpha->tha_fport;
2695 pports[1] = tcpha->tha_lport;
2696
2697 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2698 ports, ipst)];
2699
2700 mutex_enter(&connfp->connf_lock);
2701 for (tconnp = connfp->connf_head; tconnp != NULL;
2702 tconnp = tconnp->conn_next) {
2703
2704 tcp = tconnp->conn_tcp;
2705 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2706 ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2707 tcp->tcp_state >= min_state &&
2708 (tconnp->conn_bound_if == 0 ||
2709 tconnp->conn_bound_if == ifindex)) {
2710
2711 CONN_INC_REF(tconnp);
2712 mutex_exit(&connfp->connf_lock);
2713 return (tconnp);
2714 }
2715 }
2716 mutex_exit(&connfp->connf_lock);
2717 return (NULL);
2718 }
2719
2720 /*
2721 * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
2722 * a listener when changing state.
2723 */
2724 conn_t *
2725 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2726 ip_stack_t *ipst)
2727 {
2728 connf_t *bind_connfp;
2729 conn_t *connp;
2730 tcp_t *tcp;
2731
2732 /*
2733 * Avoid false matches for packets sent to an IP destination of
2734 * all zeros.
2735 */
2736 if (laddr == 0)
2737 return (NULL);
2738
2739 ASSERT(zoneid != ALL_ZONES);
2740
2741 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2742 mutex_enter(&bind_connfp->connf_lock);
2743 for (connp = bind_connfp->connf_head; connp != NULL;
2744 connp = connp->conn_next) {
2745 tcp = connp->conn_tcp;
2746 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
2747 IPCL_ZONE_MATCH(connp, zoneid) &&
2748 (tcp->tcp_listener == NULL)) {
2749 CONN_INC_REF(connp);
2750 mutex_exit(&bind_connfp->connf_lock);
2751 return (connp);
2752 }
2753 }
2754 mutex_exit(&bind_connfp->connf_lock);
2755 return (NULL);
2756 }
2757
2758 /*
2759 * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
2760 * a listener when changing state.
2761 */
2762 conn_t *
2763 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2764 zoneid_t zoneid, ip_stack_t *ipst)
2765 {
2766 connf_t *bind_connfp;
2767 conn_t *connp = NULL;
2768 tcp_t *tcp;
2769
2770 /*
2771 * Avoid false matches for packets sent to an IP destination of
2772 * all zeros.
2773 */
2774 if (IN6_IS_ADDR_UNSPECIFIED(laddr))
2775 return (NULL);
2776
2777 ASSERT(zoneid != ALL_ZONES);
2778
2779 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2780 mutex_enter(&bind_connfp->connf_lock);
2781 for (connp = bind_connfp->connf_head; connp != NULL;
2782 connp = connp->conn_next) {
2783 tcp = connp->conn_tcp;
2784 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
2785 IPCL_ZONE_MATCH(connp, zoneid) &&
2786 (connp->conn_bound_if == 0 ||
2787 connp->conn_bound_if == ifindex) &&
2788 tcp->tcp_listener == NULL) {
2789 CONN_INC_REF(connp);
2790 mutex_exit(&bind_connfp->connf_lock);
2791 return (connp);
2792 }
2793 }
2794 mutex_exit(&bind_connfp->connf_lock);
2795 return (NULL);
2796 }
2797
2798 /*
2799 * ipcl_get_next_conn
2800 * get the next entry in the conn global list
2801 * and put a reference on the next_conn.
2802 * decrement the reference on the current conn.
2803 *
2804 * This is an iterator based walker function that also provides for
2805 * some selection by the caller. It walks through the conn_hash bucket
2806 * searching for the next valid connp in the list, and selects connections
2807 * that are neither closed nor condemned. It also REFHOLDS the conn
2808 * thus ensuring that the conn exists when the caller uses the conn.
2809 */
2810 conn_t *
2811 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2812 {
2813 conn_t *next_connp;
2814
2815 if (connfp == NULL)
2816 return (NULL);
2817
2818 mutex_enter(&connfp->connf_lock);
2819
2820 next_connp = (connp == NULL) ?
2821 connfp->connf_head : connp->conn_g_next;
2822
2823 while (next_connp != NULL) {
2824 mutex_enter(&next_connp->conn_lock);
2825 if (!(next_connp->conn_flags & conn_flags) ||
2826 (next_connp->conn_state_flags &
2827 (CONN_CONDEMNED | CONN_INCIPIENT))) {
2828 /*
2829 * This conn has been condemned or
2830 * is closing, or the flags don't match
2831 */
2832 mutex_exit(&next_connp->conn_lock);
2833 next_connp = next_connp->conn_g_next;
2834 continue;
2835 }
2836 CONN_INC_REF_LOCKED(next_connp);
2837 mutex_exit(&next_connp->conn_lock);
2838 break;
2839 }
2840
2841 mutex_exit(&connfp->connf_lock);
2842
2843 if (connp != NULL)
2844 CONN_DEC_REF(connp);
2845
2846 return (next_connp);
2847 }
2848
2849 #ifdef CONN_DEBUG
2850 /*
2851 * Trace of the last NBUF refhold/refrele
2852 */
2853 int
2854 conn_trace_ref(conn_t *connp)
2855 {
2856 int last;
2857 conn_trace_t *ctb;
2858
2859 ASSERT(MUTEX_HELD(&connp->conn_lock));
2860 last = connp->conn_trace_last;
2861 last++;
2862 if (last == CONN_TRACE_MAX)
2863 last = 0;
2864
2865 ctb = &connp->conn_trace_buf[last];
2866 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2867 connp->conn_trace_last = last;
2868 return (1);
2869 }
2870
2871 int
2872 conn_untrace_ref(conn_t *connp)
2873 {
2874 int last;
2875 conn_trace_t *ctb;
2876
2877 ASSERT(MUTEX_HELD(&connp->conn_lock));
2878 last = connp->conn_trace_last;
2879 last++;
2880 if (last == CONN_TRACE_MAX)
2881 last = 0;
2882
2883 ctb = &connp->conn_trace_buf[last];
2884 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2885 connp->conn_trace_last = last;
2886 return (1);
2887 }
2888 #endif