1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * IP PACKET CLASSIFIER 27 * 28 * The IP packet classifier provides mapping between IP packets and persistent 29 * connection state for connection-oriented protocols. It also provides 30 * interface for managing connection states. 31 * 32 * The connection state is kept in conn_t data structure and contains, among 33 * other things: 34 * 35 * o local/remote address and ports 36 * o Transport protocol 37 * o squeue for the connection (for TCP only) 38 * o reference counter 39 * o Connection state 40 * o hash table linkage 41 * o interface/ire information 42 * o credentials 43 * o ipsec policy 44 * o send and receive functions. 45 * o mutex lock. 46 * 47 * Connections use a reference counting scheme. They are freed when the 48 * reference counter drops to zero. A reference is incremented when connection 49 * is placed in a list or table, when incoming packet for the connection arrives 50 * and when connection is processed via squeue (squeue processing may be 51 * asynchronous and the reference protects the connection from being destroyed 52 * before its processing is finished). 53 * 54 * conn_recv is used to pass up packets to the ULP. 55 * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for 56 * a listener, and changes to tcp_input_listener as the listener has picked a 57 * good squeue. For other cases it is set to tcp_input_data. 58 * 59 * conn_recvicmp is used to pass up ICMP errors to the ULP. 60 * 61 * Classifier uses several hash tables: 62 * 63 * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 64 * ipcl_bind_fanout: contains all connections in BOUND state 65 * ipcl_proto_fanout: IPv4 protocol fanout 66 * ipcl_proto_fanout_v6: IPv6 protocol fanout 67 * ipcl_udp_fanout: contains all UDP connections 68 * ipcl_iptun_fanout: contains all IP tunnel connections 69 * ipcl_globalhash_fanout: contains all connections 70 * 71 * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 72 * which need to view all existing connections. 73 * 74 * All tables are protected by per-bucket locks. When both per-bucket lock and 75 * connection lock need to be held, the per-bucket lock should be acquired 76 * first, followed by the connection lock. 77 * 78 * All functions doing search in one of these tables increment a reference 79 * counter on the connection found (if any). This reference should be dropped 80 * when the caller has finished processing the connection. 81 * 82 * 83 * INTERFACES: 84 * =========== 85 * 86 * Connection Lookup: 87 * ------------------ 88 * 89 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack) 90 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack) 91 * 92 * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 93 * it can't find any associated connection. If the connection is found, its 94 * reference counter is incremented. 95 * 96 * mp: mblock, containing packet header. The full header should fit 97 * into a single mblock. It should also contain at least full IP 98 * and TCP or UDP header. 99 * 100 * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 101 * 102 * hdr_len: The size of IP header. It is used to find TCP or UDP header in 103 * the packet. 104 * 105 * ira->ira_zoneid: The zone in which the returned connection must be; the 106 * zoneid corresponding to the ire_zoneid on the IRE located for 107 * the packet's destination address. 108 * 109 * ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and 110 * IRAF_TX_SHARED_ADDR flags 111 * 112 * For TCP connections, the lookup order is as follows: 113 * 5-tuple {src, dst, protocol, local port, remote port} 114 * lookup in ipcl_conn_fanout table. 115 * 3-tuple {dst, remote port, protocol} lookup in 116 * ipcl_bind_fanout table. 117 * 118 * For UDP connections, a 5-tuple {src, dst, protocol, local port, 119 * remote port} lookup is done on ipcl_udp_fanout. Note that, 120 * these interfaces do not handle cases where a packets belongs 121 * to multiple UDP clients, which is handled in IP itself. 122 * 123 * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must 124 * determine which actual zone gets the segment. This is used only in a 125 * labeled environment. The matching rules are: 126 * 127 * - If it's not a multilevel port, then the label on the packet selects 128 * the zone. Unlabeled packets are delivered to the global zone. 129 * 130 * - If it's a multilevel port, then only the zone registered to receive 131 * packets on that port matches. 132 * 133 * Also, in a labeled environment, packet labels need to be checked. For fully 134 * bound TCP connections, we can assume that the packet label was checked 135 * during connection establishment, and doesn't need to be checked on each 136 * packet. For others, though, we need to check for strict equality or, for 137 * multilevel ports, membership in the range or set. This part currently does 138 * a tnrh lookup on each packet, but could be optimized to use cached results 139 * if that were necessary. (SCTP doesn't come through here, but if it did, 140 * we would apply the same rules as TCP.) 141 * 142 * An implication of the above is that fully-bound TCP sockets must always use 143 * distinct 4-tuples; they can't be discriminated by label alone. 144 * 145 * Note that we cannot trust labels on packets sent to fully-bound UDP sockets, 146 * as there's no connection set-up handshake and no shared state. 147 * 148 * Labels on looped-back packets within a single zone do not need to be 149 * checked, as all processes in the same zone have the same label. 150 * 151 * Finally, for unlabeled packets received by a labeled system, special rules 152 * apply. We consider only the MLP if there is one. Otherwise, we prefer a 153 * socket in the zone whose label matches the default label of the sender, if 154 * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the 155 * receiver's label must dominate the sender's default label. 156 * 157 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack); 158 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t, 159 * ip_stack); 160 * 161 * Lookup routine to find a exact match for {src, dst, local port, 162 * remote port) for TCP connections in ipcl_conn_fanout. The address and 163 * ports are read from the IP and TCP header respectively. 164 * 165 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol, 166 * zoneid, ip_stack); 167 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex, 168 * zoneid, ip_stack); 169 * 170 * Lookup routine to find a listener with the tuple {lport, laddr, 171 * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 172 * parameter interface index is also compared. 173 * 174 * void ipcl_walk(func, arg, ip_stack) 175 * 176 * Apply 'func' to every connection available. The 'func' is called as 177 * (*func)(connp, arg). The walk is non-atomic so connections may be 178 * created and destroyed during the walk. The CONN_CONDEMNED and 179 * CONN_INCIPIENT flags ensure that connections which are newly created 180 * or being destroyed are not selected by the walker. 181 * 182 * Table Updates 183 * ------------- 184 * 185 * int ipcl_conn_insert(connp); 186 * int ipcl_conn_insert_v4(connp); 187 * int ipcl_conn_insert_v6(connp); 188 * 189 * Insert 'connp' in the ipcl_conn_fanout. 190 * Arguements : 191 * connp conn_t to be inserted 192 * 193 * Return value : 194 * 0 if connp was inserted 195 * EADDRINUSE if the connection with the same tuple 196 * already exists. 197 * 198 * int ipcl_bind_insert(connp); 199 * int ipcl_bind_insert_v4(connp); 200 * int ipcl_bind_insert_v6(connp); 201 * 202 * Insert 'connp' in ipcl_bind_fanout. 203 * Arguements : 204 * connp conn_t to be inserted 205 * 206 * 207 * void ipcl_hash_remove(connp); 208 * 209 * Removes the 'connp' from the connection fanout table. 210 * 211 * Connection Creation/Destruction 212 * ------------------------------- 213 * 214 * conn_t *ipcl_conn_create(type, sleep, netstack_t *) 215 * 216 * Creates a new conn based on the type flag, inserts it into 217 * globalhash table. 218 * 219 * type: This flag determines the type of conn_t which needs to be 220 * created i.e., which kmem_cache it comes from. 221 * IPCL_TCPCONN indicates a TCP connection 222 * IPCL_SCTPCONN indicates a SCTP connection 223 * IPCL_UDPCONN indicates a UDP conn_t. 224 * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t. 225 * IPCL_RTSCONN indicates a RTS conn_t. 226 * IPCL_DCCPCONN indicates a DCCP conn_t. 227 * IPCL_IPCCONN indicates all other connections. 228 * 229 * void ipcl_conn_destroy(connp) 230 * 231 * Destroys the connection state, removes it from the global 232 * connection hash table and frees its memory. 233 */ 234 235 #include <sys/types.h> 236 #include <sys/stream.h> 237 #include <sys/stropts.h> 238 #include <sys/sysmacros.h> 239 #include <sys/strsubr.h> 240 #include <sys/strsun.h> 241 #define _SUN_TPI_VERSION 2 242 #include <sys/ddi.h> 243 #include <sys/cmn_err.h> 244 #include <sys/debug.h> 245 246 #include <sys/systm.h> 247 #include <sys/param.h> 248 #include <sys/kmem.h> 249 #include <sys/isa_defs.h> 250 #include <inet/common.h> 251 #include <netinet/ip6.h> 252 #include <netinet/icmp6.h> 253 254 #include <inet/ip.h> 255 #include <inet/ip_if.h> 256 #include <inet/ip_ire.h> 257 #include <inet/ip6.h> 258 #include <inet/ip_ndp.h> 259 #include <inet/ip_impl.h> 260 #include <inet/udp_impl.h> 261 #include <inet/dccp/dccp_impl.h> 262 #include <inet/sctp_ip.h> 263 #include <inet/sctp/sctp_impl.h> 264 #include <inet/rawip_impl.h> 265 #include <inet/rts_impl.h> 266 #include <inet/iptun/iptun_impl.h> 267 268 #include <sys/cpuvar.h> 269 270 #include <inet/ipclassifier.h> 271 #include <inet/tcp.h> 272 #include <inet/ipsec_impl.h> 273 274 #include <sys/tsol/tnet.h> 275 #include <sys/sockio.h> 276 277 /* Old value for compatibility. Setable in /etc/system */ 278 uint_t tcp_conn_hash_size = 0; 279 280 /* New value. Zero means choose automatically. Setable in /etc/system */ 281 uint_t ipcl_conn_hash_size = 0; 282 uint_t ipcl_conn_hash_memfactor = 8192; 283 uint_t ipcl_conn_hash_maxsize = 82500; 284 285 /* bind/dccp/udp fanout table size */ 286 uint_t ipcl_bind_fanout_size = 512; 287 uint_t ipcl_dccp_fanout_size = 512; 288 uint_t ipcl_udp_fanout_size = 16384; 289 290 /* Raw socket fanout size. Must be a power of 2. */ 291 uint_t ipcl_raw_fanout_size = 256; 292 293 /* 294 * The IPCL_IPTUN_HASH() function works best with a prime table size. We 295 * expect that most large deployments would have hundreds of tunnels, and 296 * thousands in the extreme case. 297 */ 298 uint_t ipcl_iptun_fanout_size = 6143; 299 300 /* 301 * Power of 2^N Primes useful for hashing for N of 0-28, 302 * these primes are the nearest prime <= 2^N - 2^(N-2). 303 */ 304 305 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 306 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 307 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 308 50331599, 100663291, 201326557, 0} 309 310 /* 311 * wrapper structure to ensure that conn and what follows it (tcp_t, etc) 312 * are aligned on cache lines. 313 */ 314 typedef union itc_s { 315 conn_t itc_conn; 316 char itcu_filler[CACHE_ALIGN(conn_s)]; 317 } itc_t; 318 319 struct kmem_cache *tcp_conn_cache; 320 struct kmem_cache *ip_conn_cache; 321 extern struct kmem_cache *sctp_conn_cache; 322 struct kmem_cache *udp_conn_cache; 323 struct kmem_cache *rawip_conn_cache; 324 struct kmem_cache *rts_conn_cache; 325 struct kmem_cache *dccp_conn_cache; 326 327 extern void tcp_timermp_free(tcp_t *); 328 extern mblk_t *tcp_timermp_alloc(int); 329 330 static int ip_conn_constructor(void *, void *, int); 331 static void ip_conn_destructor(void *, void *); 332 333 static int tcp_conn_constructor(void *, void *, int); 334 static void tcp_conn_destructor(void *, void *); 335 336 static int udp_conn_constructor(void *, void *, int); 337 static void udp_conn_destructor(void *, void *); 338 339 static int rawip_conn_constructor(void *, void *, int); 340 static void rawip_conn_destructor(void *, void *); 341 342 static int rts_conn_constructor(void *, void *, int); 343 static void rts_conn_destructor(void *, void *); 344 345 static int dccp_conn_constructor(void *, void *, int); 346 static void dccp_conn_destructor(void *, void *); 347 348 /* 349 * Global (for all stack instances) init routine 350 */ 351 void 352 ipcl_g_init(void) 353 { 354 ip_conn_cache = kmem_cache_create("ip_conn_cache", 355 sizeof (conn_t), CACHE_ALIGN_SIZE, 356 ip_conn_constructor, ip_conn_destructor, 357 NULL, NULL, NULL, 0); 358 359 tcp_conn_cache = kmem_cache_create("tcp_conn_cache", 360 sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE, 361 tcp_conn_constructor, tcp_conn_destructor, 362 tcp_conn_reclaim, NULL, NULL, 0); 363 364 udp_conn_cache = kmem_cache_create("udp_conn_cache", 365 sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE, 366 udp_conn_constructor, udp_conn_destructor, 367 NULL, NULL, NULL, 0); 368 369 rawip_conn_cache = kmem_cache_create("rawip_conn_cache", 370 sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE, 371 rawip_conn_constructor, rawip_conn_destructor, 372 NULL, NULL, NULL, 0); 373 374 rts_conn_cache = kmem_cache_create("rts_conn_cache", 375 sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE, 376 rts_conn_constructor, rts_conn_destructor, 377 NULL, NULL, NULL, 0); 378 379 /* XXX:DCCP reclaim */ 380 dccp_conn_cache = kmem_cache_create("dccp_conn_cache", 381 sizeof (itc_t) + sizeof (dccp_t), CACHE_ALIGN_SIZE, 382 dccp_conn_constructor, dccp_conn_destructor, 383 NULL, NULL, NULL, 0); 384 } 385 386 /* 387 * ipclassifier intialization routine, sets up hash tables. 388 */ 389 void 390 ipcl_init(ip_stack_t *ipst) 391 { 392 int i; 393 int sizes[] = P2Ps(); 394 395 /* 396 * Calculate size of conn fanout table from /etc/system settings 397 */ 398 if (ipcl_conn_hash_size != 0) { 399 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size; 400 } else if (tcp_conn_hash_size != 0) { 401 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size; 402 } else { 403 extern pgcnt_t freemem; 404 405 ipst->ips_ipcl_conn_fanout_size = 406 (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 407 408 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) { 409 ipst->ips_ipcl_conn_fanout_size = 410 ipcl_conn_hash_maxsize; 411 } 412 } 413 414 for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 415 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) { 416 break; 417 } 418 } 419 if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) { 420 /* Out of range, use the 2^16 value */ 421 ipst->ips_ipcl_conn_fanout_size = sizes[16]; 422 } 423 424 /* Take values from /etc/system */ 425 ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size; 426 ipst->ips_ipcl_dccp_fanout_size = ipcl_dccp_fanout_size; 427 ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size; 428 ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size; 429 ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size; 430 431 ASSERT(ipst->ips_ipcl_conn_fanout == NULL); 432 433 ipst->ips_ipcl_conn_fanout = kmem_zalloc( 434 ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP); 435 436 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 437 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL, 438 MUTEX_DEFAULT, NULL); 439 } 440 441 ipst->ips_ipcl_bind_fanout = kmem_zalloc( 442 ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP); 443 444 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 445 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL, 446 MUTEX_DEFAULT, NULL); 447 } 448 449 ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX * 450 sizeof (connf_t), KM_SLEEP); 451 for (i = 0; i < IPPROTO_MAX; i++) { 452 mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL, 453 MUTEX_DEFAULT, NULL); 454 } 455 456 ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX * 457 sizeof (connf_t), KM_SLEEP); 458 for (i = 0; i < IPPROTO_MAX; i++) { 459 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL, 460 MUTEX_DEFAULT, NULL); 461 } 462 463 ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP); 464 mutex_init(&ipst->ips_rts_clients->connf_lock, 465 NULL, MUTEX_DEFAULT, NULL); 466 467 ipst->ips_ipcl_udp_fanout = kmem_zalloc( 468 ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP); 469 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 470 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL, 471 MUTEX_DEFAULT, NULL); 472 } 473 474 ipst->ips_ipcl_iptun_fanout = kmem_zalloc( 475 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP); 476 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { 477 mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL, 478 MUTEX_DEFAULT, NULL); 479 } 480 481 ipst->ips_ipcl_raw_fanout = kmem_zalloc( 482 ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP); 483 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 484 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL, 485 MUTEX_DEFAULT, NULL); 486 } 487 488 ipst->ips_ipcl_globalhash_fanout = kmem_zalloc( 489 sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP); 490 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 491 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock, 492 NULL, MUTEX_DEFAULT, NULL); 493 } 494 495 ipst->ips_ipcl_dccp_fanout = kmem_zalloc( 496 ipst->ips_ipcl_dccp_fanout_size * sizeof (connf_t), KM_SLEEP); 497 for (i = 0; i < ipst->ips_ipcl_dccp_fanout_size; i++) { 498 mutex_init(&ipst->ips_ipcl_dccp_fanout[i].connf_lock, NULL, 499 MUTEX_DEFAULT, NULL); 500 } 501 } 502 503 void 504 ipcl_g_destroy(void) 505 { 506 kmem_cache_destroy(ip_conn_cache); 507 kmem_cache_destroy(tcp_conn_cache); 508 kmem_cache_destroy(udp_conn_cache); 509 kmem_cache_destroy(rawip_conn_cache); 510 kmem_cache_destroy(rts_conn_cache); 511 kmem_cache_destroy(dccp_conn_cache); 512 } 513 514 /* 515 * All user-level and kernel use of the stack must be gone 516 * by now. 517 */ 518 void 519 ipcl_destroy(ip_stack_t *ipst) 520 { 521 int i; 522 523 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 524 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL); 525 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock); 526 } 527 kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size * 528 sizeof (connf_t)); 529 ipst->ips_ipcl_conn_fanout = NULL; 530 531 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 532 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL); 533 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock); 534 } 535 kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size * 536 sizeof (connf_t)); 537 ipst->ips_ipcl_bind_fanout = NULL; 538 539 for (i = 0; i < IPPROTO_MAX; i++) { 540 ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL); 541 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock); 542 } 543 kmem_free(ipst->ips_ipcl_proto_fanout_v4, 544 IPPROTO_MAX * sizeof (connf_t)); 545 ipst->ips_ipcl_proto_fanout_v4 = NULL; 546 547 for (i = 0; i < IPPROTO_MAX; i++) { 548 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL); 549 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock); 550 } 551 kmem_free(ipst->ips_ipcl_proto_fanout_v6, 552 IPPROTO_MAX * sizeof (connf_t)); 553 ipst->ips_ipcl_proto_fanout_v6 = NULL; 554 555 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 556 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL); 557 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock); 558 } 559 kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size * 560 sizeof (connf_t)); 561 ipst->ips_ipcl_udp_fanout = NULL; 562 563 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { 564 ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL); 565 mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock); 566 } 567 kmem_free(ipst->ips_ipcl_iptun_fanout, 568 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t)); 569 ipst->ips_ipcl_iptun_fanout = NULL; 570 571 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 572 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL); 573 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock); 574 } 575 kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size * 576 sizeof (connf_t)); 577 ipst->ips_ipcl_raw_fanout = NULL; 578 579 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 580 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL); 581 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 582 } 583 kmem_free(ipst->ips_ipcl_globalhash_fanout, 584 sizeof (connf_t) * CONN_G_HASH_SIZE); 585 ipst->ips_ipcl_globalhash_fanout = NULL; 586 587 for (i = 0; i < ipst->ips_ipcl_dccp_fanout_size; i++) { 588 ASSERT(ipst->ips_ipcl_dccp_fanout[i].connf_head == NULL); 589 mutex_destroy(&ipst->ips_ipcl_dccp_fanout[i].connf_lock); 590 } 591 kmem_free(ipst->ips_ipcl_dccp_fanout, ipst->ips_ipcl_dccp_fanout_size * 592 sizeof (connf_t)); 593 ipst->ips_ipcl_dccp_fanout = NULL; 594 595 ASSERT(ipst->ips_rts_clients->connf_head == NULL); 596 mutex_destroy(&ipst->ips_rts_clients->connf_lock); 597 kmem_free(ipst->ips_rts_clients, sizeof (connf_t)); 598 ipst->ips_rts_clients = NULL; 599 } 600 601 /* 602 * conn creation routine. initialize the conn, sets the reference 603 * and inserts it in the global hash table. 604 */ 605 conn_t * 606 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) 607 { 608 conn_t *connp; 609 struct kmem_cache *conn_cache; 610 611 switch (type) { 612 case IPCL_SCTPCONN: 613 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 614 return (NULL); 615 sctp_conn_init(connp); 616 netstack_hold(ns); 617 connp->conn_netstack = ns; 618 connp->conn_ixa->ixa_ipst = ns->netstack_ip; 619 connp->conn_ixa->ixa_conn_id = (long)connp; 620 ipcl_globalhash_insert(connp); 621 return (connp); 622 623 case IPCL_TCPCONN: 624 conn_cache = tcp_conn_cache; 625 break; 626 627 case IPCL_UDPCONN: 628 conn_cache = udp_conn_cache; 629 break; 630 631 case IPCL_RAWIPCONN: 632 conn_cache = rawip_conn_cache; 633 break; 634 635 case IPCL_RTSCONN: 636 conn_cache = rts_conn_cache; 637 break; 638 639 case IPCL_IPCCONN: 640 conn_cache = ip_conn_cache; 641 break; 642 643 case IPCL_DCCPCONN: 644 conn_cache = dccp_conn_cache; 645 break; 646 647 default: 648 connp = NULL; 649 ASSERT(0); 650 } 651 652 if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL) 653 return (NULL); 654 655 connp->conn_ref = 1; 656 netstack_hold(ns); 657 connp->conn_netstack = ns; 658 connp->conn_ixa->ixa_ipst = ns->netstack_ip; 659 connp->conn_ixa->ixa_conn_id = (long)connp; 660 ipcl_globalhash_insert(connp); 661 return (connp); 662 } 663 664 void 665 ipcl_conn_destroy(conn_t *connp) 666 { 667 mblk_t *mp; 668 netstack_t *ns = connp->conn_netstack; 669 670 ASSERT(!MUTEX_HELD(&connp->conn_lock)); 671 ASSERT(connp->conn_ref == 0); 672 ASSERT(connp->conn_ioctlref == 0); 673 674 DTRACE_PROBE1(conn__destroy, conn_t *, connp); 675 676 if (connp->conn_cred != NULL) { 677 crfree(connp->conn_cred); 678 connp->conn_cred = NULL; 679 /* ixa_cred done in ipcl_conn_cleanup below */ 680 } 681 682 if (connp->conn_ht_iphc != NULL) { 683 kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated); 684 connp->conn_ht_iphc = NULL; 685 connp->conn_ht_iphc_allocated = 0; 686 connp->conn_ht_iphc_len = 0; 687 connp->conn_ht_ulp = NULL; 688 connp->conn_ht_ulp_len = 0; 689 } 690 ip_pkt_free(&connp->conn_xmit_ipp); 691 692 ipcl_globalhash_remove(connp); 693 694 if (connp->conn_latch != NULL) { 695 IPLATCH_REFRELE(connp->conn_latch); 696 connp->conn_latch = NULL; 697 } 698 if (connp->conn_latch_in_policy != NULL) { 699 IPPOL_REFRELE(connp->conn_latch_in_policy); 700 connp->conn_latch_in_policy = NULL; 701 } 702 if (connp->conn_latch_in_action != NULL) { 703 IPACT_REFRELE(connp->conn_latch_in_action); 704 connp->conn_latch_in_action = NULL; 705 } 706 if (connp->conn_policy != NULL) { 707 IPPH_REFRELE(connp->conn_policy, ns); 708 connp->conn_policy = NULL; 709 } 710 711 if (connp->conn_ipsec_opt_mp != NULL) { 712 freemsg(connp->conn_ipsec_opt_mp); 713 connp->conn_ipsec_opt_mp = NULL; 714 } 715 716 if (connp->conn_flags & IPCL_TCPCONN) { 717 tcp_t *tcp = connp->conn_tcp; 718 719 tcp_free(tcp); 720 mp = tcp->tcp_timercache; 721 722 tcp->tcp_tcps = NULL; 723 724 /* 725 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate 726 * the mblk. 727 */ 728 if (tcp->tcp_rsrv_mp != NULL) { 729 freeb(tcp->tcp_rsrv_mp); 730 tcp->tcp_rsrv_mp = NULL; 731 mutex_destroy(&tcp->tcp_rsrv_mp_lock); 732 } 733 734 ipcl_conn_cleanup(connp); 735 connp->conn_flags = IPCL_TCPCONN; 736 if (ns != NULL) { 737 ASSERT(tcp->tcp_tcps == NULL); 738 connp->conn_netstack = NULL; 739 connp->conn_ixa->ixa_ipst = NULL; 740 netstack_rele(ns); 741 } 742 743 bzero(tcp, sizeof (tcp_t)); 744 745 tcp->tcp_timercache = mp; 746 tcp->tcp_connp = connp; 747 kmem_cache_free(tcp_conn_cache, connp); 748 return; 749 } 750 751 if (connp->conn_flags & IPCL_SCTPCONN) { 752 ASSERT(ns != NULL); 753 sctp_free(connp); 754 return; 755 } 756 757 if (connp->conn_flags & IPCL_DCCPCONN) { 758 dccp_t *dccp = connp->conn_dccp; 759 760 cmn_err(CE_NOTE, "ipclassifier: conn_flags DCCP cache_free"); 761 762 /* XXX:DCCP */ 763 /* Crash bug here: udp_conn_cache and dccp_conn_cache */ 764 /* 765 ipcl_conn_cleanup(connp); 766 connp->conn_flags = IPCL_DCCPCONN; 767 bzero(dccp, sizeof (dccp_t)); 768 dccp->dccp_connp = connp; 769 kmem_cache_free(dccp_conn_cache, connp); 770 return; 771 */ 772 } 773 774 ipcl_conn_cleanup(connp); 775 if (ns != NULL) { 776 connp->conn_netstack = NULL; 777 connp->conn_ixa->ixa_ipst = NULL; 778 netstack_rele(ns); 779 } 780 781 /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */ 782 if (connp->conn_flags & IPCL_UDPCONN) { 783 connp->conn_flags = IPCL_UDPCONN; 784 kmem_cache_free(udp_conn_cache, connp); 785 } else if (connp->conn_flags & IPCL_RAWIPCONN) { 786 connp->conn_flags = IPCL_RAWIPCONN; 787 connp->conn_proto = IPPROTO_ICMP; 788 connp->conn_ixa->ixa_protocol = connp->conn_proto; 789 kmem_cache_free(rawip_conn_cache, connp); 790 } else if (connp->conn_flags & IPCL_RTSCONN) { 791 connp->conn_flags = IPCL_RTSCONN; 792 kmem_cache_free(rts_conn_cache, connp); 793 } else { 794 connp->conn_flags = IPCL_IPCCONN; 795 ASSERT(connp->conn_flags & IPCL_IPCCONN); 796 ASSERT(connp->conn_priv == NULL); 797 kmem_cache_free(ip_conn_cache, connp); 798 } 799 } 800 801 /* 802 * Running in cluster mode - deregister listener information 803 */ 804 static void 805 ipcl_conn_unlisten(conn_t *connp) 806 { 807 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 808 ASSERT(connp->conn_lport != 0); 809 810 if (cl_inet_unlisten != NULL) { 811 sa_family_t addr_family; 812 uint8_t *laddrp; 813 814 if (connp->conn_ipversion == IPV6_VERSION) { 815 addr_family = AF_INET6; 816 laddrp = (uint8_t *)&connp->conn_bound_addr_v6; 817 } else { 818 addr_family = AF_INET; 819 laddrp = (uint8_t *)&connp->conn_bound_addr_v4; 820 } 821 (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid, 822 IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL); 823 } 824 connp->conn_flags &= ~IPCL_CL_LISTENER; 825 } 826 827 /* 828 * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 829 * which table the conn belonged to). So for debugging we can see which hash 830 * table this connection was in. 831 */ 832 #define IPCL_HASH_REMOVE(connp) { \ 833 connf_t *connfp = (connp)->conn_fanout; \ 834 ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 835 if (connfp != NULL) { \ 836 mutex_enter(&connfp->connf_lock); \ 837 if ((connp)->conn_next != NULL) \ 838 (connp)->conn_next->conn_prev = \ 839 (connp)->conn_prev; \ 840 if ((connp)->conn_prev != NULL) \ 841 (connp)->conn_prev->conn_next = \ 842 (connp)->conn_next; \ 843 else \ 844 connfp->connf_head = (connp)->conn_next; \ 845 (connp)->conn_fanout = NULL; \ 846 (connp)->conn_next = NULL; \ 847 (connp)->conn_prev = NULL; \ 848 (connp)->conn_flags |= IPCL_REMOVED; \ 849 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 850 ipcl_conn_unlisten((connp)); \ 851 CONN_DEC_REF((connp)); \ 852 mutex_exit(&connfp->connf_lock); \ 853 } \ 854 } 855 856 void 857 ipcl_hash_remove(conn_t *connp) 858 { 859 uint8_t protocol = connp->conn_proto; 860 861 IPCL_HASH_REMOVE(connp); 862 if (protocol == IPPROTO_RSVP) 863 ill_set_inputfn_all(connp->conn_netstack->netstack_ip); 864 } 865 866 /* 867 * The whole purpose of this function is allow removal of 868 * a conn_t from the connected hash for timewait reclaim. 869 * This is essentially a TW reclaim fastpath where timewait 870 * collector checks under fanout lock (so no one else can 871 * get access to the conn_t) that refcnt is 2 i.e. one for 872 * TCP and one for the classifier hash list. If ref count 873 * is indeed 2, we can just remove the conn under lock and 874 * avoid cleaning up the conn under squeue. This gives us 875 * improved performance. 876 */ 877 void 878 ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 879 { 880 ASSERT(MUTEX_HELD(&connfp->connf_lock)); 881 ASSERT(MUTEX_HELD(&connp->conn_lock)); 882 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 883 884 if ((connp)->conn_next != NULL) { 885 (connp)->conn_next->conn_prev = (connp)->conn_prev; 886 } 887 if ((connp)->conn_prev != NULL) { 888 (connp)->conn_prev->conn_next = (connp)->conn_next; 889 } else { 890 connfp->connf_head = (connp)->conn_next; 891 } 892 (connp)->conn_fanout = NULL; 893 (connp)->conn_next = NULL; 894 (connp)->conn_prev = NULL; 895 (connp)->conn_flags |= IPCL_REMOVED; 896 ASSERT((connp)->conn_ref == 2); 897 (connp)->conn_ref--; 898 } 899 900 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 901 ASSERT((connp)->conn_fanout == NULL); \ 902 ASSERT((connp)->conn_next == NULL); \ 903 ASSERT((connp)->conn_prev == NULL); \ 904 if ((connfp)->connf_head != NULL) { \ 905 (connfp)->connf_head->conn_prev = (connp); \ 906 (connp)->conn_next = (connfp)->connf_head; \ 907 } \ 908 (connp)->conn_fanout = (connfp); \ 909 (connfp)->connf_head = (connp); \ 910 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 911 IPCL_CONNECTED; \ 912 CONN_INC_REF(connp); \ 913 } 914 915 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 916 IPCL_HASH_REMOVE((connp)); \ 917 mutex_enter(&(connfp)->connf_lock); \ 918 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 919 mutex_exit(&(connfp)->connf_lock); \ 920 } 921 922 #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 923 conn_t *pconnp = NULL, *nconnp; \ 924 IPCL_HASH_REMOVE((connp)); \ 925 mutex_enter(&(connfp)->connf_lock); \ 926 nconnp = (connfp)->connf_head; \ 927 while (nconnp != NULL && \ 928 !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) { \ 929 pconnp = nconnp; \ 930 nconnp = nconnp->conn_next; \ 931 } \ 932 if (pconnp != NULL) { \ 933 pconnp->conn_next = (connp); \ 934 (connp)->conn_prev = pconnp; \ 935 } else { \ 936 (connfp)->connf_head = (connp); \ 937 } \ 938 if (nconnp != NULL) { \ 939 (connp)->conn_next = nconnp; \ 940 nconnp->conn_prev = (connp); \ 941 } \ 942 (connp)->conn_fanout = (connfp); \ 943 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 944 IPCL_BOUND; \ 945 CONN_INC_REF(connp); \ 946 mutex_exit(&(connfp)->connf_lock); \ 947 } 948 949 #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 950 conn_t **list, *prev, *next; \ 951 boolean_t isv4mapped = \ 952 IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6); \ 953 IPCL_HASH_REMOVE((connp)); \ 954 mutex_enter(&(connfp)->connf_lock); \ 955 list = &(connfp)->connf_head; \ 956 prev = NULL; \ 957 while ((next = *list) != NULL) { \ 958 if (isv4mapped && \ 959 IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) && \ 960 connp->conn_zoneid == next->conn_zoneid) { \ 961 (connp)->conn_next = next; \ 962 if (prev != NULL) \ 963 prev = next->conn_prev; \ 964 next->conn_prev = (connp); \ 965 break; \ 966 } \ 967 list = &next->conn_next; \ 968 prev = next; \ 969 } \ 970 (connp)->conn_prev = prev; \ 971 *list = (connp); \ 972 (connp)->conn_fanout = (connfp); \ 973 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 974 IPCL_BOUND; \ 975 CONN_INC_REF((connp)); \ 976 mutex_exit(&(connfp)->connf_lock); \ 977 } 978 979 void 980 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 981 { 982 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 983 } 984 985 /* 986 * Because the classifier is used to classify inbound packets, the destination 987 * address is meant to be our local tunnel address (tunnel source), and the 988 * source the remote tunnel address (tunnel destination). 989 * 990 * Note that conn_proto can't be used for fanout since the upper protocol 991 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel. 992 */ 993 conn_t * 994 ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst) 995 { 996 connf_t *connfp; 997 conn_t *connp; 998 999 /* first look for IPv4 tunnel links */ 1000 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)]; 1001 mutex_enter(&connfp->connf_lock); 1002 for (connp = connfp->connf_head; connp != NULL; 1003 connp = connp->conn_next) { 1004 if (IPCL_IPTUN_MATCH(connp, *dst, *src)) 1005 break; 1006 } 1007 if (connp != NULL) 1008 goto done; 1009 1010 mutex_exit(&connfp->connf_lock); 1011 1012 /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */ 1013 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, 1014 INADDR_ANY)]; 1015 mutex_enter(&connfp->connf_lock); 1016 for (connp = connfp->connf_head; connp != NULL; 1017 connp = connp->conn_next) { 1018 if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY)) 1019 break; 1020 } 1021 done: 1022 if (connp != NULL) 1023 CONN_INC_REF(connp); 1024 mutex_exit(&connfp->connf_lock); 1025 return (connp); 1026 } 1027 1028 conn_t * 1029 ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst) 1030 { 1031 connf_t *connfp; 1032 conn_t *connp; 1033 1034 /* Look for an IPv6 tunnel link */ 1035 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)]; 1036 mutex_enter(&connfp->connf_lock); 1037 for (connp = connfp->connf_head; connp != NULL; 1038 connp = connp->conn_next) { 1039 if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) { 1040 CONN_INC_REF(connp); 1041 break; 1042 } 1043 } 1044 mutex_exit(&connfp->connf_lock); 1045 return (connp); 1046 } 1047 1048 /* 1049 * This function is used only for inserting SCTP raw socket now. 1050 * This may change later. 1051 * 1052 * Note that only one raw socket can be bound to a port. The param 1053 * lport is in network byte order. 1054 */ 1055 static int 1056 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 1057 { 1058 connf_t *connfp; 1059 conn_t *oconnp; 1060 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1061 1062 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 1063 1064 /* Check for existing raw socket already bound to the port. */ 1065 mutex_enter(&connfp->connf_lock); 1066 for (oconnp = connfp->connf_head; oconnp != NULL; 1067 oconnp = oconnp->conn_next) { 1068 if (oconnp->conn_lport == lport && 1069 oconnp->conn_zoneid == connp->conn_zoneid && 1070 oconnp->conn_family == connp->conn_family && 1071 ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) || 1072 IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) || 1073 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) || 1074 IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) || 1075 IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6, 1076 &connp->conn_laddr_v6))) { 1077 break; 1078 } 1079 } 1080 mutex_exit(&connfp->connf_lock); 1081 if (oconnp != NULL) 1082 return (EADDRNOTAVAIL); 1083 1084 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) || 1085 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 1086 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) || 1087 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) { 1088 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1089 } else { 1090 IPCL_HASH_INSERT_BOUND(connfp, connp); 1091 } 1092 } else { 1093 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1094 } 1095 return (0); 1096 } 1097 1098 static int 1099 ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst) 1100 { 1101 connf_t *connfp; 1102 conn_t *tconnp; 1103 ipaddr_t laddr = connp->conn_laddr_v4; 1104 ipaddr_t faddr = connp->conn_faddr_v4; 1105 1106 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)]; 1107 mutex_enter(&connfp->connf_lock); 1108 for (tconnp = connfp->connf_head; tconnp != NULL; 1109 tconnp = tconnp->conn_next) { 1110 if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) { 1111 /* A tunnel is already bound to these addresses. */ 1112 mutex_exit(&connfp->connf_lock); 1113 return (EADDRINUSE); 1114 } 1115 } 1116 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1117 mutex_exit(&connfp->connf_lock); 1118 return (0); 1119 } 1120 1121 static int 1122 ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst) 1123 { 1124 connf_t *connfp; 1125 conn_t *tconnp; 1126 in6_addr_t *laddr = &connp->conn_laddr_v6; 1127 in6_addr_t *faddr = &connp->conn_faddr_v6; 1128 1129 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)]; 1130 mutex_enter(&connfp->connf_lock); 1131 for (tconnp = connfp->connf_head; tconnp != NULL; 1132 tconnp = tconnp->conn_next) { 1133 if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) { 1134 /* A tunnel is already bound to these addresses. */ 1135 mutex_exit(&connfp->connf_lock); 1136 return (EADDRINUSE); 1137 } 1138 } 1139 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1140 mutex_exit(&connfp->connf_lock); 1141 return (0); 1142 } 1143 1144 /* 1145 * Check for a MAC exemption conflict on a labeled system. Note that for 1146 * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the 1147 * transport layer. This check is for binding all other protocols. 1148 * 1149 * Returns true if there's a conflict. 1150 */ 1151 static boolean_t 1152 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst) 1153 { 1154 connf_t *connfp; 1155 conn_t *tconn; 1156 1157 connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto]; 1158 mutex_enter(&connfp->connf_lock); 1159 for (tconn = connfp->connf_head; tconn != NULL; 1160 tconn = tconn->conn_next) { 1161 /* We don't allow v4 fallback for v6 raw socket */ 1162 if (connp->conn_family != tconn->conn_family) 1163 continue; 1164 /* If neither is exempt, then there's no conflict */ 1165 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) && 1166 (tconn->conn_mac_mode == CONN_MAC_DEFAULT)) 1167 continue; 1168 /* We are only concerned about sockets for a different zone */ 1169 if (connp->conn_zoneid == tconn->conn_zoneid) 1170 continue; 1171 /* If both are bound to different specific addrs, ok */ 1172 if (connp->conn_laddr_v4 != INADDR_ANY && 1173 tconn->conn_laddr_v4 != INADDR_ANY && 1174 connp->conn_laddr_v4 != tconn->conn_laddr_v4) 1175 continue; 1176 /* These two conflict; fail */ 1177 break; 1178 } 1179 mutex_exit(&connfp->connf_lock); 1180 return (tconn != NULL); 1181 } 1182 1183 static boolean_t 1184 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst) 1185 { 1186 connf_t *connfp; 1187 conn_t *tconn; 1188 1189 connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto]; 1190 mutex_enter(&connfp->connf_lock); 1191 for (tconn = connfp->connf_head; tconn != NULL; 1192 tconn = tconn->conn_next) { 1193 /* We don't allow v4 fallback for v6 raw socket */ 1194 if (connp->conn_family != tconn->conn_family) 1195 continue; 1196 /* If neither is exempt, then there's no conflict */ 1197 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) && 1198 (tconn->conn_mac_mode == CONN_MAC_DEFAULT)) 1199 continue; 1200 /* We are only concerned about sockets for a different zone */ 1201 if (connp->conn_zoneid == tconn->conn_zoneid) 1202 continue; 1203 /* If both are bound to different addrs, ok */ 1204 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) && 1205 !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) && 1206 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 1207 &tconn->conn_laddr_v6)) 1208 continue; 1209 /* These two conflict; fail */ 1210 break; 1211 } 1212 mutex_exit(&connfp->connf_lock); 1213 return (tconn != NULL); 1214 } 1215 1216 /* 1217 * (v4, v6) bind hash insertion routines 1218 * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport) 1219 */ 1220 1221 int 1222 ipcl_bind_insert(conn_t *connp) 1223 { 1224 if (connp->conn_ipversion == IPV6_VERSION) 1225 return (ipcl_bind_insert_v6(connp)); 1226 else 1227 return (ipcl_bind_insert_v4(connp)); 1228 } 1229 1230 int 1231 ipcl_bind_insert_v4(conn_t *connp) 1232 { 1233 connf_t *connfp; 1234 int ret = 0; 1235 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1236 uint16_t lport = connp->conn_lport; 1237 uint8_t protocol = connp->conn_proto; 1238 1239 if (IPCL_IS_IPTUN(connp)) 1240 return (ipcl_iptun_hash_insert(connp, ipst)); 1241 1242 switch (protocol) { 1243 default: 1244 if (is_system_labeled() && 1245 check_exempt_conflict_v4(connp, ipst)) 1246 return (EADDRINUSE); 1247 /* FALLTHROUGH */ 1248 case IPPROTO_UDP: 1249 if (protocol == IPPROTO_UDP) { 1250 connfp = &ipst->ips_ipcl_udp_fanout[ 1251 IPCL_UDP_HASH(lport, ipst)]; 1252 } else { 1253 connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol]; 1254 } 1255 1256 if (connp->conn_faddr_v4 != INADDR_ANY) { 1257 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1258 } else if (connp->conn_laddr_v4 != INADDR_ANY) { 1259 IPCL_HASH_INSERT_BOUND(connfp, connp); 1260 } else { 1261 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1262 } 1263 if (protocol == IPPROTO_RSVP) 1264 ill_set_inputfn_all(ipst); 1265 break; 1266 1267 case IPPROTO_TCP: 1268 /* Insert it in the Bind Hash */ 1269 ASSERT(connp->conn_zoneid != ALL_ZONES); 1270 connfp = &ipst->ips_ipcl_bind_fanout[ 1271 IPCL_BIND_HASH(lport, ipst)]; 1272 if (connp->conn_laddr_v4 != INADDR_ANY) { 1273 IPCL_HASH_INSERT_BOUND(connfp, connp); 1274 } else { 1275 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1276 } 1277 if (cl_inet_listen != NULL) { 1278 ASSERT(connp->conn_ipversion == IPV4_VERSION); 1279 connp->conn_flags |= IPCL_CL_LISTENER; 1280 (*cl_inet_listen)( 1281 connp->conn_netstack->netstack_stackid, 1282 IPPROTO_TCP, AF_INET, 1283 (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL); 1284 } 1285 break; 1286 1287 case IPPROTO_SCTP: 1288 ret = ipcl_sctp_hash_insert(connp, lport); 1289 break; 1290 1291 case IPPROTO_DCCP: 1292 cmn_err(CE_NOTE, "ipcl_bind_insert_v4"); 1293 ASSERT(connp->conn_zoneid != ALL_ZONES); 1294 connfp = &ipst->ips_ipcl_dccp_fanout[ 1295 IPCL_DCCP_HASH(lport, ipst)]; 1296 if (connp->conn_laddr_v4 != INADDR_ANY) { 1297 IPCL_HASH_INSERT_BOUND(connfp, connp); 1298 } else { 1299 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1300 } 1301 /* XXX:DCCP */ 1302 break; 1303 } 1304 1305 1306 return (ret); 1307 } 1308 1309 int 1310 ipcl_bind_insert_v6(conn_t *connp) 1311 { 1312 connf_t *connfp; 1313 int ret = 0; 1314 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1315 uint16_t lport = connp->conn_lport; 1316 uint8_t protocol = connp->conn_proto; 1317 1318 if (IPCL_IS_IPTUN(connp)) { 1319 return (ipcl_iptun_hash_insert_v6(connp, ipst)); 1320 } 1321 1322 switch (protocol) { 1323 default: 1324 if (is_system_labeled() && 1325 check_exempt_conflict_v6(connp, ipst)) 1326 return (EADDRINUSE); 1327 /* FALLTHROUGH */ 1328 case IPPROTO_UDP: 1329 if (protocol == IPPROTO_UDP) { 1330 connfp = &ipst->ips_ipcl_udp_fanout[ 1331 IPCL_UDP_HASH(lport, ipst)]; 1332 } else { 1333 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 1334 } 1335 1336 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) { 1337 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1338 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 1339 IPCL_HASH_INSERT_BOUND(connfp, connp); 1340 } else { 1341 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1342 } 1343 break; 1344 1345 case IPPROTO_TCP: 1346 /* Insert it in the Bind Hash */ 1347 ASSERT(connp->conn_zoneid != ALL_ZONES); 1348 connfp = &ipst->ips_ipcl_bind_fanout[ 1349 IPCL_BIND_HASH(lport, ipst)]; 1350 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 1351 IPCL_HASH_INSERT_BOUND(connfp, connp); 1352 } else { 1353 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1354 } 1355 if (cl_inet_listen != NULL) { 1356 sa_family_t addr_family; 1357 uint8_t *laddrp; 1358 1359 if (connp->conn_ipversion == IPV6_VERSION) { 1360 addr_family = AF_INET6; 1361 laddrp = 1362 (uint8_t *)&connp->conn_bound_addr_v6; 1363 } else { 1364 addr_family = AF_INET; 1365 laddrp = (uint8_t *)&connp->conn_bound_addr_v4; 1366 } 1367 connp->conn_flags |= IPCL_CL_LISTENER; 1368 (*cl_inet_listen)( 1369 connp->conn_netstack->netstack_stackid, 1370 IPPROTO_TCP, addr_family, laddrp, lport, NULL); 1371 } 1372 break; 1373 1374 case IPPROTO_SCTP: 1375 ret = ipcl_sctp_hash_insert(connp, lport); 1376 break; 1377 1378 case IPPROTO_DCCP: 1379 /* XXX:DCCP */ 1380 break; 1381 } 1382 1383 return (ret); 1384 } 1385 1386 /* 1387 * ipcl_conn_hash insertion routines. 1388 * The caller has already set conn_proto and the addresses/ports in the conn_t. 1389 */ 1390 1391 int 1392 ipcl_conn_insert(conn_t *connp) 1393 { 1394 if (connp->conn_ipversion == IPV6_VERSION) 1395 return (ipcl_conn_insert_v6(connp)); 1396 else 1397 return (ipcl_conn_insert_v4(connp)); 1398 } 1399 1400 int 1401 ipcl_conn_insert_v4(conn_t *connp) 1402 { 1403 connf_t *connfp; 1404 conn_t *tconnp; 1405 int ret = 0; 1406 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1407 uint16_t lport = connp->conn_lport; 1408 uint8_t protocol = connp->conn_proto; 1409 1410 if (IPCL_IS_IPTUN(connp)) 1411 return (ipcl_iptun_hash_insert(connp, ipst)); 1412 1413 switch (protocol) { 1414 case IPPROTO_TCP: 1415 /* 1416 * For TCP, we check whether the connection tuple already 1417 * exists before allowing the connection to proceed. We 1418 * also allow indexing on the zoneid. This is to allow 1419 * multiple shared stack zones to have the same tcp 1420 * connection tuple. In practice this only happens for 1421 * INADDR_LOOPBACK as it's the only local address which 1422 * doesn't have to be unique. 1423 */ 1424 connfp = &ipst->ips_ipcl_conn_fanout[ 1425 IPCL_CONN_HASH(connp->conn_faddr_v4, 1426 connp->conn_ports, ipst)]; 1427 mutex_enter(&connfp->connf_lock); 1428 for (tconnp = connfp->connf_head; tconnp != NULL; 1429 tconnp = tconnp->conn_next) { 1430 if (IPCL_CONN_MATCH(tconnp, connp->conn_proto, 1431 connp->conn_faddr_v4, connp->conn_laddr_v4, 1432 connp->conn_ports) && 1433 IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) { 1434 /* Already have a conn. bail out */ 1435 mutex_exit(&connfp->connf_lock); 1436 return (EADDRINUSE); 1437 } 1438 } 1439 if (connp->conn_fanout != NULL) { 1440 /* 1441 * Probably a XTI/TLI application trying to do a 1442 * rebind. Let it happen. 1443 */ 1444 mutex_exit(&connfp->connf_lock); 1445 IPCL_HASH_REMOVE(connp); 1446 mutex_enter(&connfp->connf_lock); 1447 } 1448 1449 ASSERT(connp->conn_recv != NULL); 1450 ASSERT(connp->conn_recvicmp != NULL); 1451 1452 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1453 mutex_exit(&connfp->connf_lock); 1454 break; 1455 1456 case IPPROTO_SCTP: 1457 /* 1458 * The raw socket may have already been bound, remove it 1459 * from the hash first. 1460 */ 1461 IPCL_HASH_REMOVE(connp); 1462 ret = ipcl_sctp_hash_insert(connp, lport); 1463 break; 1464 1465 case IPPROTO_DCCP: 1466 cmn_err(CE_NOTE, "ipclassifier.c: ipcl_conn_insert_v4"); 1467 1468 connfp = &ipst->ips_ipcl_conn_fanout[ 1469 IPCL_CONN_HASH(connp->conn_faddr_v4, 1470 connp->conn_ports, ipst)]; 1471 mutex_enter(&connfp->connf_lock); 1472 1473 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1474 mutex_exit(&connfp->connf_lock); 1475 break; 1476 1477 default: 1478 /* 1479 * Check for conflicts among MAC exempt bindings. For 1480 * transports with port numbers, this is done by the upper 1481 * level per-transport binding logic. For all others, it's 1482 * done here. 1483 */ 1484 if (is_system_labeled() && 1485 check_exempt_conflict_v4(connp, ipst)) 1486 return (EADDRINUSE); 1487 /* FALLTHROUGH */ 1488 1489 case IPPROTO_UDP: 1490 if (protocol == IPPROTO_UDP) { 1491 connfp = &ipst->ips_ipcl_udp_fanout[ 1492 IPCL_UDP_HASH(lport, ipst)]; 1493 } else { 1494 connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol]; 1495 } 1496 1497 if (connp->conn_faddr_v4 != INADDR_ANY) { 1498 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1499 } else if (connp->conn_laddr_v4 != INADDR_ANY) { 1500 IPCL_HASH_INSERT_BOUND(connfp, connp); 1501 } else { 1502 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1503 } 1504 break; 1505 } 1506 1507 return (ret); 1508 } 1509 1510 int 1511 ipcl_conn_insert_v6(conn_t *connp) 1512 { 1513 connf_t *connfp; 1514 conn_t *tconnp; 1515 int ret = 0; 1516 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1517 uint16_t lport = connp->conn_lport; 1518 uint8_t protocol = connp->conn_proto; 1519 uint_t ifindex = connp->conn_bound_if; 1520 1521 if (IPCL_IS_IPTUN(connp)) 1522 return (ipcl_iptun_hash_insert_v6(connp, ipst)); 1523 1524 switch (protocol) { 1525 case IPPROTO_TCP: 1526 1527 /* 1528 * For tcp, we check whether the connection tuple already 1529 * exists before allowing the connection to proceed. We 1530 * also allow indexing on the zoneid. This is to allow 1531 * multiple shared stack zones to have the same tcp 1532 * connection tuple. In practice this only happens for 1533 * ipv6_loopback as it's the only local address which 1534 * doesn't have to be unique. 1535 */ 1536 connfp = &ipst->ips_ipcl_conn_fanout[ 1537 IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports, 1538 ipst)]; 1539 mutex_enter(&connfp->connf_lock); 1540 for (tconnp = connfp->connf_head; tconnp != NULL; 1541 tconnp = tconnp->conn_next) { 1542 /* NOTE: need to match zoneid. Bug in onnv-gate */ 1543 if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto, 1544 connp->conn_faddr_v6, connp->conn_laddr_v6, 1545 connp->conn_ports) && 1546 (tconnp->conn_bound_if == 0 || 1547 tconnp->conn_bound_if == ifindex) && 1548 IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) { 1549 /* Already have a conn. bail out */ 1550 mutex_exit(&connfp->connf_lock); 1551 return (EADDRINUSE); 1552 } 1553 } 1554 if (connp->conn_fanout != NULL) { 1555 /* 1556 * Probably a XTI/TLI application trying to do a 1557 * rebind. Let it happen. 1558 */ 1559 mutex_exit(&connfp->connf_lock); 1560 IPCL_HASH_REMOVE(connp); 1561 mutex_enter(&connfp->connf_lock); 1562 } 1563 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1564 mutex_exit(&connfp->connf_lock); 1565 break; 1566 1567 case IPPROTO_SCTP: 1568 IPCL_HASH_REMOVE(connp); 1569 ret = ipcl_sctp_hash_insert(connp, lport); 1570 break; 1571 1572 case IPPROTO_DCCP: 1573 /* XXX:DCCP */ 1574 break; 1575 1576 default: 1577 if (is_system_labeled() && 1578 check_exempt_conflict_v6(connp, ipst)) 1579 return (EADDRINUSE); 1580 /* FALLTHROUGH */ 1581 case IPPROTO_UDP: 1582 if (protocol == IPPROTO_UDP) { 1583 connfp = &ipst->ips_ipcl_udp_fanout[ 1584 IPCL_UDP_HASH(lport, ipst)]; 1585 } else { 1586 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 1587 } 1588 1589 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) { 1590 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1591 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 1592 IPCL_HASH_INSERT_BOUND(connfp, connp); 1593 } else { 1594 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1595 } 1596 break; 1597 } 1598 1599 return (ret); 1600 } 1601 1602 /* 1603 * v4 packet classifying function. looks up the fanout table to 1604 * find the conn, the packet belongs to. returns the conn with 1605 * the reference held, null otherwise. 1606 * 1607 * If zoneid is ALL_ZONES, then the search rules described in the "Connection 1608 * Lookup" comment block are applied. Labels are also checked as described 1609 * above. If the packet is from the inside (looped back), and is from the same 1610 * zone, then label checks are omitted. 1611 */ 1612 conn_t * 1613 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, 1614 ip_recv_attr_t *ira, ip_stack_t *ipst) 1615 { 1616 ipha_t *ipha; 1617 connf_t *connfp, *bind_connfp; 1618 uint16_t lport; 1619 uint16_t fport; 1620 uint32_t ports; 1621 conn_t *connp; 1622 uint16_t *up; 1623 zoneid_t zoneid = ira->ira_zoneid; 1624 1625 ipha = (ipha_t *)mp->b_rptr; 1626 up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 1627 1628 switch (protocol) { 1629 case IPPROTO_TCP: 1630 ports = *(uint32_t *)up; 1631 connfp = 1632 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, 1633 ports, ipst)]; 1634 mutex_enter(&connfp->connf_lock); 1635 for (connp = connfp->connf_head; connp != NULL; 1636 connp = connp->conn_next) { 1637 if (IPCL_CONN_MATCH(connp, protocol, 1638 ipha->ipha_src, ipha->ipha_dst, ports) && 1639 (connp->conn_zoneid == zoneid || 1640 connp->conn_allzones || 1641 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1642 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1643 (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1644 break; 1645 } 1646 1647 if (connp != NULL) { 1648 /* 1649 * We have a fully-bound TCP connection. 1650 * 1651 * For labeled systems, there's no need to check the 1652 * label here. It's known to be good as we checked 1653 * before allowing the connection to become bound. 1654 */ 1655 CONN_INC_REF(connp); 1656 mutex_exit(&connfp->connf_lock); 1657 return (connp); 1658 } 1659 1660 mutex_exit(&connfp->connf_lock); 1661 lport = up[1]; 1662 bind_connfp = 1663 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 1664 mutex_enter(&bind_connfp->connf_lock); 1665 for (connp = bind_connfp->connf_head; connp != NULL; 1666 connp = connp->conn_next) { 1667 if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst, 1668 lport) && 1669 (connp->conn_zoneid == zoneid || 1670 connp->conn_allzones || 1671 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1672 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1673 (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1674 break; 1675 } 1676 1677 /* 1678 * If the matching connection is SLP on a private address, then 1679 * the label on the packet must match the local zone's label. 1680 * Otherwise, it must be in the label range defined by tnrh. 1681 * This is ensured by tsol_receive_local. 1682 * 1683 * Note that we don't check tsol_receive_local for 1684 * the connected case. 1685 */ 1686 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 1687 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1688 ira, connp)) { 1689 DTRACE_PROBE3(tx__ip__log__info__classify__tcp, 1690 char *, "connp(1) could not receive mp(2)", 1691 conn_t *, connp, mblk_t *, mp); 1692 connp = NULL; 1693 } 1694 1695 if (connp != NULL) { 1696 /* Have a listener at least */ 1697 CONN_INC_REF(connp); 1698 mutex_exit(&bind_connfp->connf_lock); 1699 return (connp); 1700 } 1701 1702 mutex_exit(&bind_connfp->connf_lock); 1703 break; 1704 1705 case IPPROTO_UDP: 1706 lport = up[1]; 1707 fport = up[0]; 1708 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 1709 mutex_enter(&connfp->connf_lock); 1710 for (connp = connfp->connf_head; connp != NULL; 1711 connp = connp->conn_next) { 1712 if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 1713 fport, ipha->ipha_src) && 1714 (connp->conn_zoneid == zoneid || 1715 connp->conn_allzones || 1716 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1717 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE)))) 1718 break; 1719 } 1720 1721 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 1722 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1723 ira, connp)) { 1724 DTRACE_PROBE3(tx__ip__log__info__classify__udp, 1725 char *, "connp(1) could not receive mp(2)", 1726 conn_t *, connp, mblk_t *, mp); 1727 connp = NULL; 1728 } 1729 1730 if (connp != NULL) { 1731 CONN_INC_REF(connp); 1732 mutex_exit(&connfp->connf_lock); 1733 return (connp); 1734 } 1735 1736 /* 1737 * We shouldn't come here for multicast/broadcast packets 1738 */ 1739 mutex_exit(&connfp->connf_lock); 1740 1741 break; 1742 1743 case IPPROTO_DCCP: 1744 fport = up[0]; 1745 lport = up[1]; 1746 connfp = &ipst->ips_ipcl_dccp_fanout[IPCL_DCCP_HASH( 1747 lport, ipst)]; 1748 mutex_enter(&connfp->connf_lock); 1749 for (connp = connfp->connf_head; connp != NULL; 1750 connp = connp->conn_next) { 1751 cmn_err(CE_NOTE, "connfp found"); 1752 /* XXX:DCCP */ 1753 if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 1754 fport, ipha->ipha_src)) { 1755 break; 1756 } 1757 } 1758 1759 if (connp != NULL) { 1760 CONN_INC_REF(connp); 1761 mutex_exit(&connfp->connf_lock); 1762 return (connp); 1763 } 1764 1765 mutex_exit(&connfp->connf_lock); 1766 break; 1767 1768 case IPPROTO_ENCAP: 1769 case IPPROTO_IPV6: 1770 return (ipcl_iptun_classify_v4(&ipha->ipha_src, 1771 &ipha->ipha_dst, ipst)); 1772 } 1773 1774 return (NULL); 1775 } 1776 1777 conn_t * 1778 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, 1779 ip_recv_attr_t *ira, ip_stack_t *ipst) 1780 { 1781 ip6_t *ip6h; 1782 connf_t *connfp, *bind_connfp; 1783 uint16_t lport; 1784 uint16_t fport; 1785 tcpha_t *tcpha; 1786 uint32_t ports; 1787 conn_t *connp; 1788 uint16_t *up; 1789 zoneid_t zoneid = ira->ira_zoneid; 1790 1791 ip6h = (ip6_t *)mp->b_rptr; 1792 1793 switch (protocol) { 1794 case IPPROTO_TCP: 1795 tcpha = (tcpha_t *)&mp->b_rptr[hdr_len]; 1796 up = &tcpha->tha_lport; 1797 ports = *(uint32_t *)up; 1798 1799 connfp = 1800 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, 1801 ports, ipst)]; 1802 mutex_enter(&connfp->connf_lock); 1803 for (connp = connfp->connf_head; connp != NULL; 1804 connp = connp->conn_next) { 1805 if (IPCL_CONN_MATCH_V6(connp, protocol, 1806 ip6h->ip6_src, ip6h->ip6_dst, ports) && 1807 (connp->conn_zoneid == zoneid || 1808 connp->conn_allzones || 1809 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1810 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1811 (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1812 break; 1813 } 1814 1815 if (connp != NULL) { 1816 /* 1817 * We have a fully-bound TCP connection. 1818 * 1819 * For labeled systems, there's no need to check the 1820 * label here. It's known to be good as we checked 1821 * before allowing the connection to become bound. 1822 */ 1823 CONN_INC_REF(connp); 1824 mutex_exit(&connfp->connf_lock); 1825 return (connp); 1826 } 1827 1828 mutex_exit(&connfp->connf_lock); 1829 1830 lport = up[1]; 1831 bind_connfp = 1832 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 1833 mutex_enter(&bind_connfp->connf_lock); 1834 for (connp = bind_connfp->connf_head; connp != NULL; 1835 connp = connp->conn_next) { 1836 if (IPCL_BIND_MATCH_V6(connp, protocol, 1837 ip6h->ip6_dst, lport) && 1838 (connp->conn_zoneid == zoneid || 1839 connp->conn_allzones || 1840 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1841 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1842 (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1843 break; 1844 } 1845 1846 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 1847 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 1848 ira, connp)) { 1849 DTRACE_PROBE3(tx__ip__log__info__classify__tcp6, 1850 char *, "connp(1) could not receive mp(2)", 1851 conn_t *, connp, mblk_t *, mp); 1852 connp = NULL; 1853 } 1854 1855 if (connp != NULL) { 1856 /* Have a listner at least */ 1857 CONN_INC_REF(connp); 1858 mutex_exit(&bind_connfp->connf_lock); 1859 return (connp); 1860 } 1861 1862 mutex_exit(&bind_connfp->connf_lock); 1863 break; 1864 1865 case IPPROTO_UDP: 1866 up = (uint16_t *)&mp->b_rptr[hdr_len]; 1867 lport = up[1]; 1868 fport = up[0]; 1869 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 1870 mutex_enter(&connfp->connf_lock); 1871 for (connp = connfp->connf_head; connp != NULL; 1872 connp = connp->conn_next) { 1873 if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 1874 fport, ip6h->ip6_src) && 1875 (connp->conn_zoneid == zoneid || 1876 connp->conn_allzones || 1877 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1878 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1879 (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1880 break; 1881 } 1882 1883 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 1884 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 1885 ira, connp)) { 1886 DTRACE_PROBE3(tx__ip__log__info__classify__udp6, 1887 char *, "connp(1) could not receive mp(2)", 1888 conn_t *, connp, mblk_t *, mp); 1889 connp = NULL; 1890 } 1891 1892 if (connp != NULL) { 1893 CONN_INC_REF(connp); 1894 mutex_exit(&connfp->connf_lock); 1895 return (connp); 1896 } 1897 1898 /* 1899 * We shouldn't come here for multicast/broadcast packets 1900 */ 1901 mutex_exit(&connfp->connf_lock); 1902 break; 1903 case IPPROTO_ENCAP: 1904 case IPPROTO_IPV6: 1905 return (ipcl_iptun_classify_v6(&ip6h->ip6_src, 1906 &ip6h->ip6_dst, ipst)); 1907 } 1908 1909 return (NULL); 1910 } 1911 1912 /* 1913 * wrapper around ipcl_classify_(v4,v6) routines. 1914 */ 1915 conn_t * 1916 ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst) 1917 { 1918 if (ira->ira_flags & IRAF_IS_IPV4) { 1919 return (ipcl_classify_v4(mp, ira->ira_protocol, 1920 ira->ira_ip_hdr_length, ira, ipst)); 1921 } else { 1922 return (ipcl_classify_v6(mp, ira->ira_protocol, 1923 ira->ira_ip_hdr_length, ira, ipst)); 1924 } 1925 } 1926 1927 /* 1928 * Only used to classify SCTP RAW sockets 1929 */ 1930 conn_t * 1931 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports, 1932 ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst) 1933 { 1934 connf_t *connfp; 1935 conn_t *connp; 1936 in_port_t lport; 1937 int ipversion; 1938 const void *dst; 1939 zoneid_t zoneid = ira->ira_zoneid; 1940 1941 lport = ((uint16_t *)&ports)[1]; 1942 if (ira->ira_flags & IRAF_IS_IPV4) { 1943 dst = (const void *)&ipha->ipha_dst; 1944 ipversion = IPV4_VERSION; 1945 } else { 1946 dst = (const void *)&ip6h->ip6_dst; 1947 ipversion = IPV6_VERSION; 1948 } 1949 1950 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 1951 mutex_enter(&connfp->connf_lock); 1952 for (connp = connfp->connf_head; connp != NULL; 1953 connp = connp->conn_next) { 1954 /* We don't allow v4 fallback for v6 raw socket. */ 1955 if (ipversion != connp->conn_ipversion) 1956 continue; 1957 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 1958 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 1959 if (ipversion == IPV4_VERSION) { 1960 if (!IPCL_CONN_MATCH(connp, protocol, 1961 ipha->ipha_src, ipha->ipha_dst, ports)) 1962 continue; 1963 } else { 1964 if (!IPCL_CONN_MATCH_V6(connp, protocol, 1965 ip6h->ip6_src, ip6h->ip6_dst, ports)) 1966 continue; 1967 } 1968 } else { 1969 if (ipversion == IPV4_VERSION) { 1970 if (!IPCL_BIND_MATCH(connp, protocol, 1971 ipha->ipha_dst, lport)) 1972 continue; 1973 } else { 1974 if (!IPCL_BIND_MATCH_V6(connp, protocol, 1975 ip6h->ip6_dst, lport)) 1976 continue; 1977 } 1978 } 1979 1980 if (connp->conn_zoneid == zoneid || 1981 connp->conn_allzones || 1982 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1983 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1984 (ira->ira_flags & IRAF_TX_SHARED_ADDR))) 1985 break; 1986 } 1987 1988 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 1989 !tsol_receive_local(mp, dst, ipversion, ira, connp)) { 1990 DTRACE_PROBE3(tx__ip__log__info__classify__rawip, 1991 char *, "connp(1) could not receive mp(2)", 1992 conn_t *, connp, mblk_t *, mp); 1993 connp = NULL; 1994 } 1995 1996 if (connp != NULL) 1997 goto found; 1998 mutex_exit(&connfp->connf_lock); 1999 2000 /* Try to look for a wildcard SCTP RAW socket match. */ 2001 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)]; 2002 mutex_enter(&connfp->connf_lock); 2003 for (connp = connfp->connf_head; connp != NULL; 2004 connp = connp->conn_next) { 2005 /* We don't allow v4 fallback for v6 raw socket. */ 2006 if (ipversion != connp->conn_ipversion) 2007 continue; 2008 if (!IPCL_ZONE_MATCH(connp, zoneid)) 2009 continue; 2010 2011 if (ipversion == IPV4_VERSION) { 2012 if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst)) 2013 break; 2014 } else { 2015 if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) { 2016 break; 2017 } 2018 } 2019 } 2020 2021 if (connp != NULL) 2022 goto found; 2023 2024 mutex_exit(&connfp->connf_lock); 2025 return (NULL); 2026 2027 found: 2028 ASSERT(connp != NULL); 2029 CONN_INC_REF(connp); 2030 mutex_exit(&connfp->connf_lock); 2031 return (connp); 2032 } 2033 2034 /* ARGSUSED */ 2035 static int 2036 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags) 2037 { 2038 itc_t *itc = (itc_t *)buf; 2039 conn_t *connp = &itc->itc_conn; 2040 tcp_t *tcp = (tcp_t *)&itc[1]; 2041 2042 bzero(connp, sizeof (conn_t)); 2043 bzero(tcp, sizeof (tcp_t)); 2044 2045 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2046 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2047 cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL); 2048 tcp->tcp_timercache = tcp_timermp_alloc(kmflags); 2049 if (tcp->tcp_timercache == NULL) 2050 return (ENOMEM); 2051 connp->conn_tcp = tcp; 2052 connp->conn_flags = IPCL_TCPCONN; 2053 connp->conn_proto = IPPROTO_TCP; 2054 tcp->tcp_connp = connp; 2055 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2056 2057 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2058 if (connp->conn_ixa == NULL) { 2059 tcp_timermp_free(tcp); 2060 return (ENOMEM); 2061 } 2062 connp->conn_ixa->ixa_refcnt = 1; 2063 connp->conn_ixa->ixa_protocol = connp->conn_proto; 2064 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2065 return (0); 2066 } 2067 2068 /* ARGSUSED */ 2069 static void 2070 tcp_conn_destructor(void *buf, void *cdrarg) 2071 { 2072 itc_t *itc = (itc_t *)buf; 2073 conn_t *connp = &itc->itc_conn; 2074 tcp_t *tcp = (tcp_t *)&itc[1]; 2075 2076 ASSERT(connp->conn_flags & IPCL_TCPCONN); 2077 ASSERT(tcp->tcp_connp == connp); 2078 ASSERT(connp->conn_tcp == tcp); 2079 tcp_timermp_free(tcp); 2080 mutex_destroy(&connp->conn_lock); 2081 cv_destroy(&connp->conn_cv); 2082 cv_destroy(&connp->conn_sq_cv); 2083 rw_destroy(&connp->conn_ilg_lock); 2084 2085 /* Can be NULL if constructor failed */ 2086 if (connp->conn_ixa != NULL) { 2087 ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2088 ASSERT(connp->conn_ixa->ixa_ire == NULL); 2089 ASSERT(connp->conn_ixa->ixa_nce == NULL); 2090 ixa_refrele(connp->conn_ixa); 2091 } 2092 } 2093 2094 /* ARGSUSED */ 2095 static int 2096 ip_conn_constructor(void *buf, void *cdrarg, int kmflags) 2097 { 2098 itc_t *itc = (itc_t *)buf; 2099 conn_t *connp = &itc->itc_conn; 2100 2101 bzero(connp, sizeof (conn_t)); 2102 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2103 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2104 connp->conn_flags = IPCL_IPCCONN; 2105 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2106 2107 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2108 if (connp->conn_ixa == NULL) 2109 return (ENOMEM); 2110 connp->conn_ixa->ixa_refcnt = 1; 2111 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2112 return (0); 2113 } 2114 2115 /* ARGSUSED */ 2116 static void 2117 ip_conn_destructor(void *buf, void *cdrarg) 2118 { 2119 itc_t *itc = (itc_t *)buf; 2120 conn_t *connp = &itc->itc_conn; 2121 2122 ASSERT(connp->conn_flags & IPCL_IPCCONN); 2123 ASSERT(connp->conn_priv == NULL); 2124 mutex_destroy(&connp->conn_lock); 2125 cv_destroy(&connp->conn_cv); 2126 rw_destroy(&connp->conn_ilg_lock); 2127 2128 /* Can be NULL if constructor failed */ 2129 if (connp->conn_ixa != NULL) { 2130 ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2131 ASSERT(connp->conn_ixa->ixa_ire == NULL); 2132 ASSERT(connp->conn_ixa->ixa_nce == NULL); 2133 ixa_refrele(connp->conn_ixa); 2134 } 2135 } 2136 2137 /* ARGSUSED */ 2138 static int 2139 udp_conn_constructor(void *buf, void *cdrarg, int kmflags) 2140 { 2141 itc_t *itc = (itc_t *)buf; 2142 conn_t *connp = &itc->itc_conn; 2143 udp_t *udp = (udp_t *)&itc[1]; 2144 2145 bzero(connp, sizeof (conn_t)); 2146 bzero(udp, sizeof (udp_t)); 2147 2148 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2149 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2150 connp->conn_udp = udp; 2151 connp->conn_flags = IPCL_UDPCONN; 2152 connp->conn_proto = IPPROTO_UDP; 2153 udp->udp_connp = connp; 2154 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2155 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2156 if (connp->conn_ixa == NULL) 2157 return (ENOMEM); 2158 connp->conn_ixa->ixa_refcnt = 1; 2159 connp->conn_ixa->ixa_protocol = connp->conn_proto; 2160 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2161 return (0); 2162 } 2163 2164 /* ARGSUSED */ 2165 static void 2166 udp_conn_destructor(void *buf, void *cdrarg) 2167 { 2168 itc_t *itc = (itc_t *)buf; 2169 conn_t *connp = &itc->itc_conn; 2170 udp_t *udp = (udp_t *)&itc[1]; 2171 2172 ASSERT(connp->conn_flags & IPCL_UDPCONN); 2173 ASSERT(udp->udp_connp == connp); 2174 ASSERT(connp->conn_udp == udp); 2175 mutex_destroy(&connp->conn_lock); 2176 cv_destroy(&connp->conn_cv); 2177 rw_destroy(&connp->conn_ilg_lock); 2178 2179 /* Can be NULL if constructor failed */ 2180 if (connp->conn_ixa != NULL) { 2181 ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2182 ASSERT(connp->conn_ixa->ixa_ire == NULL); 2183 ASSERT(connp->conn_ixa->ixa_nce == NULL); 2184 ixa_refrele(connp->conn_ixa); 2185 } 2186 } 2187 2188 /* ARGSUSED */ 2189 static int 2190 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags) 2191 { 2192 itc_t *itc = (itc_t *)buf; 2193 conn_t *connp = &itc->itc_conn; 2194 icmp_t *icmp = (icmp_t *)&itc[1]; 2195 2196 bzero(connp, sizeof (conn_t)); 2197 bzero(icmp, sizeof (icmp_t)); 2198 2199 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2200 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2201 connp->conn_icmp = icmp; 2202 connp->conn_flags = IPCL_RAWIPCONN; 2203 connp->conn_proto = IPPROTO_ICMP; 2204 icmp->icmp_connp = connp; 2205 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2206 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2207 if (connp->conn_ixa == NULL) 2208 return (ENOMEM); 2209 connp->conn_ixa->ixa_refcnt = 1; 2210 connp->conn_ixa->ixa_protocol = connp->conn_proto; 2211 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2212 return (0); 2213 } 2214 2215 /* ARGSUSED */ 2216 static void 2217 rawip_conn_destructor(void *buf, void *cdrarg) 2218 { 2219 itc_t *itc = (itc_t *)buf; 2220 conn_t *connp = &itc->itc_conn; 2221 icmp_t *icmp = (icmp_t *)&itc[1]; 2222 2223 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 2224 ASSERT(icmp->icmp_connp == connp); 2225 ASSERT(connp->conn_icmp == icmp); 2226 mutex_destroy(&connp->conn_lock); 2227 cv_destroy(&connp->conn_cv); 2228 rw_destroy(&connp->conn_ilg_lock); 2229 2230 /* Can be NULL if constructor failed */ 2231 if (connp->conn_ixa != NULL) { 2232 ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2233 ASSERT(connp->conn_ixa->ixa_ire == NULL); 2234 ASSERT(connp->conn_ixa->ixa_nce == NULL); 2235 ixa_refrele(connp->conn_ixa); 2236 } 2237 } 2238 2239 /* ARGSUSED */ 2240 static int 2241 rts_conn_constructor(void *buf, void *cdrarg, int kmflags) 2242 { 2243 itc_t *itc = (itc_t *)buf; 2244 conn_t *connp = &itc->itc_conn; 2245 rts_t *rts = (rts_t *)&itc[1]; 2246 2247 bzero(connp, sizeof (conn_t)); 2248 bzero(rts, sizeof (rts_t)); 2249 2250 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2251 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2252 connp->conn_rts = rts; 2253 connp->conn_flags = IPCL_RTSCONN; 2254 rts->rts_connp = connp; 2255 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2256 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2257 if (connp->conn_ixa == NULL) 2258 return (ENOMEM); 2259 connp->conn_ixa->ixa_refcnt = 1; 2260 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2261 return (0); 2262 } 2263 2264 /* ARGSUSED */ 2265 static void 2266 rts_conn_destructor(void *buf, void *cdrarg) 2267 { 2268 itc_t *itc = (itc_t *)buf; 2269 conn_t *connp = &itc->itc_conn; 2270 rts_t *rts = (rts_t *)&itc[1]; 2271 2272 ASSERT(connp->conn_flags & IPCL_RTSCONN); 2273 ASSERT(rts->rts_connp == connp); 2274 ASSERT(connp->conn_rts == rts); 2275 mutex_destroy(&connp->conn_lock); 2276 cv_destroy(&connp->conn_cv); 2277 rw_destroy(&connp->conn_ilg_lock); 2278 2279 /* Can be NULL if constructor failed */ 2280 if (connp->conn_ixa != NULL) { 2281 ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2282 ASSERT(connp->conn_ixa->ixa_ire == NULL); 2283 ASSERT(connp->conn_ixa->ixa_nce == NULL); 2284 ixa_refrele(connp->conn_ixa); 2285 } 2286 } 2287 2288 /* ARGSUSED */ 2289 static int 2290 dccp_conn_constructor(void *buf, void *cdrarg, int kmflags) 2291 { 2292 itc_t *itc = (itc_t *)buf; 2293 conn_t *connp = &itc->itc_conn; 2294 dccp_t *dccp = (dccp_t *)&itc[1]; 2295 2296 bzero(connp, sizeof (conn_t)); 2297 bzero(dccp, sizeof (dccp_t)); 2298 2299 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2300 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2301 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2302 2303 connp->conn_dccp = dccp; 2304 connp->conn_flags = IPCL_DCCPCONN; 2305 connp->conn_proto = IPPROTO_DCCP; 2306 dccp->dccp_connp = connp; 2307 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2308 if (connp->conn_ixa == NULL) { 2309 return (NULL); 2310 } 2311 connp->conn_ixa->ixa_refcnt = 1; 2312 connp->conn_ixa->ixa_protocol = connp->conn_proto; 2313 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2314 2315 return (0); 2316 } 2317 2318 /* ARGSUSED */ 2319 static void 2320 dccp_conn_destructor(void *buf, void *cdrarg) 2321 { 2322 itc_t *itc = (itc_t *)buf; 2323 conn_t *connp = &itc->itc_conn; 2324 dccp_t *dccp = (dccp_t *)&itc[1]; 2325 2326 ASSERT(connp->conn_flags & IPCL_DCCPCONN); 2327 ASSERT(dccp->dccp_connp == connp); 2328 ASSERT(connp->conn_dccp == dccp); 2329 2330 mutex_destroy(&connp->conn_lock); 2331 cv_destroy(&connp->conn_cv); 2332 rw_destroy(&connp->conn_ilg_lock); 2333 2334 if (connp->conn_ixa != NULL) { 2335 ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2336 ASSERT(connp->conn_ixa->ixa_ire == NULL); 2337 ASSERT(connp->conn_ixa->ixa_nce == NULL); 2338 2339 ixa_refrele(connp->conn_ixa); 2340 } 2341 } 2342 2343 /* 2344 * Called as part of ipcl_conn_destroy to assert and clear any pointers 2345 * in the conn_t. 2346 * 2347 * Below we list all the pointers in the conn_t as a documentation aid. 2348 * The ones that we can not ASSERT to be NULL are #ifdef'ed out. 2349 * If you add any pointers to the conn_t please add an ASSERT here 2350 * and #ifdef it out if it can't be actually asserted to be NULL. 2351 * In any case, we bzero most of the conn_t at the end of the function. 2352 */ 2353 void 2354 ipcl_conn_cleanup(conn_t *connp) 2355 { 2356 ip_xmit_attr_t *ixa; 2357 2358 ASSERT(connp->conn_latch == NULL); 2359 ASSERT(connp->conn_latch_in_policy == NULL); 2360 ASSERT(connp->conn_latch_in_action == NULL); 2361 #ifdef notdef 2362 ASSERT(connp->conn_rq == NULL); 2363 ASSERT(connp->conn_wq == NULL); 2364 #endif 2365 ASSERT(connp->conn_cred == NULL); 2366 ASSERT(connp->conn_g_fanout == NULL); 2367 ASSERT(connp->conn_g_next == NULL); 2368 ASSERT(connp->conn_g_prev == NULL); 2369 ASSERT(connp->conn_policy == NULL); 2370 ASSERT(connp->conn_fanout == NULL); 2371 ASSERT(connp->conn_next == NULL); 2372 ASSERT(connp->conn_prev == NULL); 2373 ASSERT(connp->conn_oper_pending_ill == NULL); 2374 ASSERT(connp->conn_ilg == NULL); 2375 ASSERT(connp->conn_drain_next == NULL); 2376 ASSERT(connp->conn_drain_prev == NULL); 2377 #ifdef notdef 2378 /* conn_idl is not cleared when removed from idl list */ 2379 ASSERT(connp->conn_idl == NULL); 2380 #endif 2381 ASSERT(connp->conn_ipsec_opt_mp == NULL); 2382 #ifdef notdef 2383 /* conn_netstack is cleared by the caller; needed by ixa_cleanup */ 2384 ASSERT(connp->conn_netstack == NULL); 2385 #endif 2386 2387 ASSERT(connp->conn_helper_info == NULL); 2388 ASSERT(connp->conn_ixa != NULL); 2389 ixa = connp->conn_ixa; 2390 ASSERT(ixa->ixa_refcnt == 1); 2391 /* Need to preserve ixa_protocol */ 2392 ixa_cleanup(ixa); 2393 ixa->ixa_flags = 0; 2394 2395 /* Clear out the conn_t fields that are not preserved */ 2396 bzero(&connp->conn_start_clr, 2397 sizeof (conn_t) - 2398 ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp)); 2399 } 2400 2401 /* 2402 * All conns are inserted in a global multi-list for the benefit of 2403 * walkers. The walk is guaranteed to walk all open conns at the time 2404 * of the start of the walk exactly once. This property is needed to 2405 * achieve some cleanups during unplumb of interfaces. This is achieved 2406 * as follows. 2407 * 2408 * ipcl_conn_create and ipcl_conn_destroy are the only functions that 2409 * call the insert and delete functions below at creation and deletion 2410 * time respectively. The conn never moves or changes its position in this 2411 * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 2412 * won't increase due to walkers, once the conn deletion has started. Note 2413 * that we can't remove the conn from the global list and then wait for 2414 * the refcnt to drop to zero, since walkers would then see a truncated 2415 * list. CONN_INCIPIENT ensures that walkers don't start looking at 2416 * conns until ip_open is ready to make them globally visible. 2417 * The global round robin multi-list locks are held only to get the 2418 * next member/insertion/deletion and contention should be negligible 2419 * if the multi-list is much greater than the number of cpus. 2420 */ 2421 void 2422 ipcl_globalhash_insert(conn_t *connp) 2423 { 2424 int index; 2425 struct connf_s *connfp; 2426 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2427 2428 /* 2429 * No need for atomic here. Approximate even distribution 2430 * in the global lists is sufficient. 2431 */ 2432 ipst->ips_conn_g_index++; 2433 index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1); 2434 2435 connp->conn_g_prev = NULL; 2436 /* 2437 * Mark as INCIPIENT, so that walkers will ignore this 2438 * for now, till ip_open is ready to make it visible globally. 2439 */ 2440 connp->conn_state_flags |= CONN_INCIPIENT; 2441 2442 connfp = &ipst->ips_ipcl_globalhash_fanout[index]; 2443 /* Insert at the head of the list */ 2444 mutex_enter(&connfp->connf_lock); 2445 connp->conn_g_next = connfp->connf_head; 2446 if (connp->conn_g_next != NULL) 2447 connp->conn_g_next->conn_g_prev = connp; 2448 connfp->connf_head = connp; 2449 2450 /* The fanout bucket this conn points to */ 2451 connp->conn_g_fanout = connfp; 2452 2453 mutex_exit(&connfp->connf_lock); 2454 } 2455 2456 void 2457 ipcl_globalhash_remove(conn_t *connp) 2458 { 2459 struct connf_s *connfp; 2460 2461 /* 2462 * We were never inserted in the global multi list. 2463 * IPCL_NONE variety is never inserted in the global multilist 2464 * since it is presumed to not need any cleanup and is transient. 2465 */ 2466 if (connp->conn_g_fanout == NULL) 2467 return; 2468 2469 connfp = connp->conn_g_fanout; 2470 mutex_enter(&connfp->connf_lock); 2471 if (connp->conn_g_prev != NULL) 2472 connp->conn_g_prev->conn_g_next = connp->conn_g_next; 2473 else 2474 connfp->connf_head = connp->conn_g_next; 2475 if (connp->conn_g_next != NULL) 2476 connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 2477 mutex_exit(&connfp->connf_lock); 2478 2479 /* Better to stumble on a null pointer than to corrupt memory */ 2480 connp->conn_g_next = NULL; 2481 connp->conn_g_prev = NULL; 2482 connp->conn_g_fanout = NULL; 2483 } 2484 2485 /* 2486 * Walk the list of all conn_t's in the system, calling the function provided 2487 * With the specified argument for each. 2488 * Applies to both IPv4 and IPv6. 2489 * 2490 * CONNs may hold pointers to ills (conn_dhcpinit_ill and 2491 * conn_oper_pending_ill). To guard against stale pointers 2492 * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 2493 * unplumbed or removed. New conn_t's that are created while we are walking 2494 * may be missed by this walk, because they are not necessarily inserted 2495 * at the tail of the list. They are new conn_t's and thus don't have any 2496 * stale pointers. The CONN_CLOSING flag ensures that no new reference 2497 * is created to the struct that is going away. 2498 */ 2499 void 2500 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst) 2501 { 2502 int i; 2503 conn_t *connp; 2504 conn_t *prev_connp; 2505 2506 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2507 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2508 prev_connp = NULL; 2509 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head; 2510 while (connp != NULL) { 2511 mutex_enter(&connp->conn_lock); 2512 if (connp->conn_state_flags & 2513 (CONN_CONDEMNED | CONN_INCIPIENT)) { 2514 mutex_exit(&connp->conn_lock); 2515 connp = connp->conn_g_next; 2516 continue; 2517 } 2518 CONN_INC_REF_LOCKED(connp); 2519 mutex_exit(&connp->conn_lock); 2520 mutex_exit( 2521 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2522 (*func)(connp, arg); 2523 if (prev_connp != NULL) 2524 CONN_DEC_REF(prev_connp); 2525 mutex_enter( 2526 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2527 prev_connp = connp; 2528 connp = connp->conn_g_next; 2529 } 2530 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2531 if (prev_connp != NULL) 2532 CONN_DEC_REF(prev_connp); 2533 } 2534 } 2535 2536 /* 2537 * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 2538 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 2539 * held; caller must call CONN_DEC_REF. Only checks for connected entries 2540 * (peer tcp in ESTABLISHED state). 2541 */ 2542 conn_t * 2543 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha, 2544 ip_stack_t *ipst) 2545 { 2546 uint32_t ports; 2547 uint16_t *pports = (uint16_t *)&ports; 2548 connf_t *connfp; 2549 conn_t *tconnp; 2550 boolean_t zone_chk; 2551 2552 /* 2553 * If either the source of destination address is loopback, then 2554 * both endpoints must be in the same Zone. Otherwise, both of 2555 * the addresses are system-wide unique (tcp is in ESTABLISHED 2556 * state) and the endpoints may reside in different Zones. 2557 */ 2558 zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 2559 ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 2560 2561 pports[0] = tcpha->tha_fport; 2562 pports[1] = tcpha->tha_lport; 2563 2564 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2565 ports, ipst)]; 2566 2567 mutex_enter(&connfp->connf_lock); 2568 for (tconnp = connfp->connf_head; tconnp != NULL; 2569 tconnp = tconnp->conn_next) { 2570 2571 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 2572 ipha->ipha_dst, ipha->ipha_src, ports) && 2573 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 2574 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 2575 2576 ASSERT(tconnp != connp); 2577 CONN_INC_REF(tconnp); 2578 mutex_exit(&connfp->connf_lock); 2579 return (tconnp); 2580 } 2581 } 2582 mutex_exit(&connfp->connf_lock); 2583 return (NULL); 2584 } 2585 2586 /* 2587 * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 2588 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 2589 * held; caller must call CONN_DEC_REF. Only checks for connected entries 2590 * (peer tcp in ESTABLISHED state). 2591 */ 2592 conn_t * 2593 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha, 2594 ip_stack_t *ipst) 2595 { 2596 uint32_t ports; 2597 uint16_t *pports = (uint16_t *)&ports; 2598 connf_t *connfp; 2599 conn_t *tconnp; 2600 boolean_t zone_chk; 2601 2602 /* 2603 * If either the source of destination address is loopback, then 2604 * both endpoints must be in the same Zone. Otherwise, both of 2605 * the addresses are system-wide unique (tcp is in ESTABLISHED 2606 * state) and the endpoints may reside in different Zones. We 2607 * don't do Zone check for link local address(es) because the 2608 * current Zone implementation treats each link local address as 2609 * being unique per system node, i.e. they belong to global Zone. 2610 */ 2611 zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 2612 IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 2613 2614 pports[0] = tcpha->tha_fport; 2615 pports[1] = tcpha->tha_lport; 2616 2617 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2618 ports, ipst)]; 2619 2620 mutex_enter(&connfp->connf_lock); 2621 for (tconnp = connfp->connf_head; tconnp != NULL; 2622 tconnp = tconnp->conn_next) { 2623 2624 /* We skip conn_bound_if check here as this is loopback tcp */ 2625 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 2626 ip6h->ip6_dst, ip6h->ip6_src, ports) && 2627 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 2628 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 2629 2630 ASSERT(tconnp != connp); 2631 CONN_INC_REF(tconnp); 2632 mutex_exit(&connfp->connf_lock); 2633 return (tconnp); 2634 } 2635 } 2636 mutex_exit(&connfp->connf_lock); 2637 return (NULL); 2638 } 2639 2640 /* 2641 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 2642 * Returns with conn reference held. Caller must call CONN_DEC_REF. 2643 * Only checks for connected entries i.e. no INADDR_ANY checks. 2644 */ 2645 conn_t * 2646 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state, 2647 ip_stack_t *ipst) 2648 { 2649 uint32_t ports; 2650 uint16_t *pports; 2651 connf_t *connfp; 2652 conn_t *tconnp; 2653 2654 pports = (uint16_t *)&ports; 2655 pports[0] = tcpha->tha_fport; 2656 pports[1] = tcpha->tha_lport; 2657 2658 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2659 ports, ipst)]; 2660 2661 mutex_enter(&connfp->connf_lock); 2662 for (tconnp = connfp->connf_head; tconnp != NULL; 2663 tconnp = tconnp->conn_next) { 2664 2665 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 2666 ipha->ipha_dst, ipha->ipha_src, ports) && 2667 tconnp->conn_tcp->tcp_state >= min_state) { 2668 2669 CONN_INC_REF(tconnp); 2670 mutex_exit(&connfp->connf_lock); 2671 return (tconnp); 2672 } 2673 } 2674 mutex_exit(&connfp->connf_lock); 2675 return (NULL); 2676 } 2677 2678 /* 2679 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 2680 * Returns with conn reference held. Caller must call CONN_DEC_REF. 2681 * Only checks for connected entries i.e. no INADDR_ANY checks. 2682 * Match on ifindex in addition to addresses. 2683 */ 2684 conn_t * 2685 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 2686 uint_t ifindex, ip_stack_t *ipst) 2687 { 2688 tcp_t *tcp; 2689 uint32_t ports; 2690 uint16_t *pports; 2691 connf_t *connfp; 2692 conn_t *tconnp; 2693 2694 pports = (uint16_t *)&ports; 2695 pports[0] = tcpha->tha_fport; 2696 pports[1] = tcpha->tha_lport; 2697 2698 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2699 ports, ipst)]; 2700 2701 mutex_enter(&connfp->connf_lock); 2702 for (tconnp = connfp->connf_head; tconnp != NULL; 2703 tconnp = tconnp->conn_next) { 2704 2705 tcp = tconnp->conn_tcp; 2706 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 2707 ip6h->ip6_dst, ip6h->ip6_src, ports) && 2708 tcp->tcp_state >= min_state && 2709 (tconnp->conn_bound_if == 0 || 2710 tconnp->conn_bound_if == ifindex)) { 2711 2712 CONN_INC_REF(tconnp); 2713 mutex_exit(&connfp->connf_lock); 2714 return (tconnp); 2715 } 2716 } 2717 mutex_exit(&connfp->connf_lock); 2718 return (NULL); 2719 } 2720 2721 /* 2722 * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate 2723 * a listener when changing state. 2724 */ 2725 conn_t * 2726 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid, 2727 ip_stack_t *ipst) 2728 { 2729 connf_t *bind_connfp; 2730 conn_t *connp; 2731 tcp_t *tcp; 2732 2733 /* 2734 * Avoid false matches for packets sent to an IP destination of 2735 * all zeros. 2736 */ 2737 if (laddr == 0) 2738 return (NULL); 2739 2740 ASSERT(zoneid != ALL_ZONES); 2741 2742 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 2743 mutex_enter(&bind_connfp->connf_lock); 2744 for (connp = bind_connfp->connf_head; connp != NULL; 2745 connp = connp->conn_next) { 2746 tcp = connp->conn_tcp; 2747 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 2748 IPCL_ZONE_MATCH(connp, zoneid) && 2749 (tcp->tcp_listener == NULL)) { 2750 CONN_INC_REF(connp); 2751 mutex_exit(&bind_connfp->connf_lock); 2752 return (connp); 2753 } 2754 } 2755 mutex_exit(&bind_connfp->connf_lock); 2756 return (NULL); 2757 } 2758 2759 /* 2760 * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate 2761 * a listener when changing state. 2762 */ 2763 conn_t * 2764 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 2765 zoneid_t zoneid, ip_stack_t *ipst) 2766 { 2767 connf_t *bind_connfp; 2768 conn_t *connp = NULL; 2769 tcp_t *tcp; 2770 2771 /* 2772 * Avoid false matches for packets sent to an IP destination of 2773 * all zeros. 2774 */ 2775 if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 2776 return (NULL); 2777 2778 ASSERT(zoneid != ALL_ZONES); 2779 2780 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 2781 mutex_enter(&bind_connfp->connf_lock); 2782 for (connp = bind_connfp->connf_head; connp != NULL; 2783 connp = connp->conn_next) { 2784 tcp = connp->conn_tcp; 2785 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 2786 IPCL_ZONE_MATCH(connp, zoneid) && 2787 (connp->conn_bound_if == 0 || 2788 connp->conn_bound_if == ifindex) && 2789 tcp->tcp_listener == NULL) { 2790 CONN_INC_REF(connp); 2791 mutex_exit(&bind_connfp->connf_lock); 2792 return (connp); 2793 } 2794 } 2795 mutex_exit(&bind_connfp->connf_lock); 2796 return (NULL); 2797 } 2798 2799 /* 2800 * ipcl_get_next_conn 2801 * get the next entry in the conn global list 2802 * and put a reference on the next_conn. 2803 * decrement the reference on the current conn. 2804 * 2805 * This is an iterator based walker function that also provides for 2806 * some selection by the caller. It walks through the conn_hash bucket 2807 * searching for the next valid connp in the list, and selects connections 2808 * that are neither closed nor condemned. It also REFHOLDS the conn 2809 * thus ensuring that the conn exists when the caller uses the conn. 2810 */ 2811 conn_t * 2812 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 2813 { 2814 conn_t *next_connp; 2815 2816 if (connfp == NULL) 2817 return (NULL); 2818 2819 mutex_enter(&connfp->connf_lock); 2820 2821 next_connp = (connp == NULL) ? 2822 connfp->connf_head : connp->conn_g_next; 2823 2824 while (next_connp != NULL) { 2825 mutex_enter(&next_connp->conn_lock); 2826 if (!(next_connp->conn_flags & conn_flags) || 2827 (next_connp->conn_state_flags & 2828 (CONN_CONDEMNED | CONN_INCIPIENT))) { 2829 /* 2830 * This conn has been condemned or 2831 * is closing, or the flags don't match 2832 */ 2833 mutex_exit(&next_connp->conn_lock); 2834 next_connp = next_connp->conn_g_next; 2835 continue; 2836 } 2837 CONN_INC_REF_LOCKED(next_connp); 2838 mutex_exit(&next_connp->conn_lock); 2839 break; 2840 } 2841 2842 mutex_exit(&connfp->connf_lock); 2843 2844 if (connp != NULL) 2845 CONN_DEC_REF(connp); 2846 2847 return (next_connp); 2848 } 2849 2850 #ifdef CONN_DEBUG 2851 /* 2852 * Trace of the last NBUF refhold/refrele 2853 */ 2854 int 2855 conn_trace_ref(conn_t *connp) 2856 { 2857 int last; 2858 conn_trace_t *ctb; 2859 2860 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2861 last = connp->conn_trace_last; 2862 last++; 2863 if (last == CONN_TRACE_MAX) 2864 last = 0; 2865 2866 ctb = &connp->conn_trace_buf[last]; 2867 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 2868 connp->conn_trace_last = last; 2869 return (1); 2870 } 2871 2872 int 2873 conn_untrace_ref(conn_t *connp) 2874 { 2875 int last; 2876 conn_trace_t *ctb; 2877 2878 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2879 last = connp->conn_trace_last; 2880 last++; 2881 if (last == CONN_TRACE_MAX) 2882 last = 0; 2883 2884 ctb = &connp->conn_trace_buf[last]; 2885 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 2886 connp->conn_trace_last = last; 2887 return (1); 2888 } 2889 #endif