1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * IP PACKET CLASSIFIER 27 * 28 * The IP packet classifier provides mapping between IP packets and persistent 29 * connection state for connection-oriented protocols. It also provides 30 * interface for managing connection states. 31 * 32 * The connection state is kept in conn_t data structure and contains, among 33 * other things: 34 * 35 * o local/remote address and ports 36 * o Transport protocol 37 * o squeue for the connection (for TCP only) 38 * o reference counter 39 * o Connection state 40 * o hash table linkage 41 * o interface/ire information 42 * o credentials 43 * o ipsec policy 44 * o send and receive functions. 45 * o mutex lock. 46 * 47 * Connections use a reference counting scheme. They are freed when the 48 * reference counter drops to zero. A reference is incremented when connection 49 * is placed in a list or table, when incoming packet for the connection arrives 50 * and when connection is processed via squeue (squeue processing may be 51 * asynchronous and the reference protects the connection from being destroyed 52 * before its processing is finished). 53 * 54 * conn_recv is used to pass up packets to the ULP. 55 * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for 56 * a listener, and changes to tcp_input_listener as the listener has picked a 57 * good squeue. For other cases it is set to tcp_input_data. 58 * 59 * conn_recvicmp is used to pass up ICMP errors to the ULP. 60 * 61 * Classifier uses several hash tables: 62 * 63 * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 64 * ipcl_bind_fanout: contains all connections in BOUND state 65 * ipcl_proto_fanout: IPv4 protocol fanout 66 * ipcl_proto_fanout_v6: IPv6 protocol fanout 67 * ipcl_udp_fanout: contains all UDP connections 68 * ipcl_iptun_fanout: contains all IP tunnel connections 69 * ipcl_globalhash_fanout: contains all connections 70 *` ipcl_dccp_conn_fanout: contains all DCCP connections in CONNECTED state 71 * ipcl_dccp_bind_fanout: contains all DCCP connections in BOUND state 72 * 73 * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 74 * which need to view all existing connections. 75 * 76 * All tables are protected by per-bucket locks. When both per-bucket lock and 77 * connection lock need to be held, the per-bucket lock should be acquired 78 * first, followed by the connection lock. 79 * 80 * All functions doing search in one of these tables increment a reference 81 * counter on the connection found (if any). This reference should be dropped 82 * when the caller has finished processing the connection. 83 * 84 * 85 * INTERFACES: 86 * =========== 87 * 88 * Connection Lookup: 89 * ------------------ 90 * 91 * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack) 92 * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack) 93 * 94 * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 95 * it can't find any associated connection. If the connection is found, its 96 * reference counter is incremented. 97 * 98 * mp: mblock, containing packet header. The full header should fit 99 * into a single mblock. It should also contain at least full IP 100 * and TCP or UDP header. 101 * 102 * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 103 * 104 * hdr_len: The size of IP header. It is used to find TCP or UDP header in 105 * the packet. 106 * 107 * ira->ira_zoneid: The zone in which the returned connection must be; the 108 * zoneid corresponding to the ire_zoneid on the IRE located for 109 * the packet's destination address. 110 * 111 * ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and 112 * IRAF_TX_SHARED_ADDR flags 113 * 114 * For TCP connections, the lookup order is as follows: 115 * 5-tuple {src, dst, protocol, local port, remote port} 116 * lookup in ipcl_conn_fanout table. 117 * 3-tuple {dst, remote port, protocol} lookup in 118 * ipcl_bind_fanout table. 119 * 120 * For UDP connections, a 5-tuple {src, dst, protocol, local port, 121 * remote port} lookup is done on ipcl_udp_fanout. Note that, 122 * these interfaces do not handle cases where a packets belongs 123 * to multiple UDP clients, which is handled in IP itself. 124 * 125 * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must 126 * determine which actual zone gets the segment. This is used only in a 127 * labeled environment. The matching rules are: 128 * 129 * - If it's not a multilevel port, then the label on the packet selects 130 * the zone. Unlabeled packets are delivered to the global zone. 131 * 132 * - If it's a multilevel port, then only the zone registered to receive 133 * packets on that port matches. 134 * 135 * Also, in a labeled environment, packet labels need to be checked. For fully 136 * bound TCP connections, we can assume that the packet label was checked 137 * during connection establishment, and doesn't need to be checked on each 138 * packet. For others, though, we need to check for strict equality or, for 139 * multilevel ports, membership in the range or set. This part currently does 140 * a tnrh lookup on each packet, but could be optimized to use cached results 141 * if that were necessary. (SCTP doesn't come through here, but if it did, 142 * we would apply the same rules as TCP.) 143 * 144 * An implication of the above is that fully-bound TCP sockets must always use 145 * distinct 4-tuples; they can't be discriminated by label alone. 146 * 147 * Note that we cannot trust labels on packets sent to fully-bound UDP sockets, 148 * as there's no connection set-up handshake and no shared state. 149 * 150 * Labels on looped-back packets within a single zone do not need to be 151 * checked, as all processes in the same zone have the same label. 152 * 153 * Finally, for unlabeled packets received by a labeled system, special rules 154 * apply. We consider only the MLP if there is one. Otherwise, we prefer a 155 * socket in the zone whose label matches the default label of the sender, if 156 * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the 157 * receiver's label must dominate the sender's default label. 158 * 159 * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack); 160 * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t, 161 * ip_stack); 162 * 163 * Lookup routine to find a exact match for {src, dst, local port, 164 * remote port) for TCP connections in ipcl_conn_fanout. The address and 165 * ports are read from the IP and TCP header respectively. 166 * 167 * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol, 168 * zoneid, ip_stack); 169 * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex, 170 * zoneid, ip_stack); 171 * 172 * Lookup routine to find a listener with the tuple {lport, laddr, 173 * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 174 * parameter interface index is also compared. 175 * 176 * void ipcl_walk(func, arg, ip_stack) 177 * 178 * Apply 'func' to every connection available. The 'func' is called as 179 * (*func)(connp, arg). The walk is non-atomic so connections may be 180 * created and destroyed during the walk. The CONN_CONDEMNED and 181 * CONN_INCIPIENT flags ensure that connections which are newly created 182 * or being destroyed are not selected by the walker. 183 * 184 * Table Updates 185 * ------------- 186 * 187 * int ipcl_conn_insert(connp); 188 * int ipcl_conn_insert_v4(connp); 189 * int ipcl_conn_insert_v6(connp); 190 * 191 * Insert 'connp' in the ipcl_conn_fanout. 192 * Arguements : 193 * connp conn_t to be inserted 194 * 195 * Return value : 196 * 0 if connp was inserted 197 * EADDRINUSE if the connection with the same tuple 198 * already exists. 199 * 200 * int ipcl_bind_insert(connp); 201 * int ipcl_bind_insert_v4(connp); 202 * int ipcl_bind_insert_v6(connp); 203 * 204 * Insert 'connp' in ipcl_bind_fanout. 205 * Arguements : 206 * connp conn_t to be inserted 207 * 208 * 209 * void ipcl_hash_remove(connp); 210 * 211 * Removes the 'connp' from the connection fanout table. 212 * 213 * Connection Creation/Destruction 214 * ------------------------------- 215 * 216 * conn_t *ipcl_conn_create(type, sleep, netstack_t *) 217 * 218 * Creates a new conn based on the type flag, inserts it into 219 * globalhash table. 220 * 221 * type: This flag determines the type of conn_t which needs to be 222 * created i.e., which kmem_cache it comes from. 223 * IPCL_TCPCONN indicates a TCP connection 224 * IPCL_SCTPCONN indicates a SCTP connection 225 * IPCL_UDPCONN indicates a UDP conn_t. 226 * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t. 227 * IPCL_RTSCONN indicates a RTS conn_t. 228 * IPCL_DCCPCONN indicates a DCCP conn_t. 229 * IPCL_IPCCONN indicates all other connections. 230 * 231 * void ipcl_conn_destroy(connp) 232 * 233 * Destroys the connection state, removes it from the global 234 * connection hash table and frees its memory. 235 */ 236 237 #include <sys/types.h> 238 #include <sys/stream.h> 239 #include <sys/stropts.h> 240 #include <sys/sysmacros.h> 241 #include <sys/strsubr.h> 242 #include <sys/strsun.h> 243 #define _SUN_TPI_VERSION 2 244 #include <sys/ddi.h> 245 #include <sys/cmn_err.h> 246 #include <sys/debug.h> 247 248 #include <sys/systm.h> 249 #include <sys/param.h> 250 #include <sys/kmem.h> 251 #include <sys/isa_defs.h> 252 #include <inet/common.h> 253 #include <netinet/ip6.h> 254 #include <netinet/icmp6.h> 255 256 #include <inet/ip.h> 257 #include <inet/ip_if.h> 258 #include <inet/ip_ire.h> 259 #include <inet/ip6.h> 260 #include <inet/ip_ndp.h> 261 #include <inet/ip_impl.h> 262 #include <inet/udp_impl.h> 263 #include <inet/dccp_impl.h> 264 #include <inet/sctp_ip.h> 265 #include <inet/sctp/sctp_impl.h> 266 #include <inet/rawip_impl.h> 267 #include <inet/rts_impl.h> 268 #include <inet/iptun/iptun_impl.h> 269 270 #include <sys/cpuvar.h> 271 272 #include <inet/ipclassifier.h> 273 #include <inet/tcp.h> 274 #include <inet/ipsec_impl.h> 275 276 #include <sys/tsol/tnet.h> 277 #include <sys/sockio.h> 278 279 /* Old value for compatibility. Setable in /etc/system */ 280 uint_t tcp_conn_hash_size = 0; 281 282 /* New value. Zero means choose automatically. Setable in /etc/system */ 283 uint_t ipcl_conn_hash_size = 0; 284 uint_t ipcl_conn_hash_memfactor = 8192; 285 uint_t ipcl_conn_hash_maxsize = 82500; 286 287 /* bind/udp fanout table size */ 288 uint_t ipcl_bind_fanout_size = 512; 289 uint_t ipcl_udp_fanout_size = 16384; 290 291 /* Fanout table sizes for dccp */ 292 uint_t ipcl_dccp_conn_fanout_size = 512; 293 uint_t ipcl_dccp_bind_fanout_size = 512; 294 295 /* Raw socket fanout size. Must be a power of 2. */ 296 uint_t ipcl_raw_fanout_size = 256; 297 298 /* 299 * The IPCL_IPTUN_HASH() function works best with a prime table size. We 300 * expect that most large deployments would have hundreds of tunnels, and 301 * thousands in the extreme case. 302 */ 303 uint_t ipcl_iptun_fanout_size = 6143; 304 305 /* 306 * Power of 2^N Primes useful for hashing for N of 0-28, 307 * these primes are the nearest prime <= 2^N - 2^(N-2). 308 */ 309 310 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 311 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 312 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 313 50331599, 100663291, 201326557, 0} 314 315 /* 316 * wrapper structure to ensure that conn and what follows it (tcp_t, etc) 317 * are aligned on cache lines. 318 */ 319 typedef union itc_s { 320 conn_t itc_conn; 321 char itcu_filler[CACHE_ALIGN(conn_s)]; 322 } itc_t; 323 324 struct kmem_cache *tcp_conn_cache; 325 struct kmem_cache *ip_conn_cache; 326 extern struct kmem_cache *sctp_conn_cache; 327 struct kmem_cache *udp_conn_cache; 328 struct kmem_cache *rawip_conn_cache; 329 struct kmem_cache *rts_conn_cache; 330 struct kmem_cache *dccp_conn_cache; 331 332 extern void tcp_timermp_free(tcp_t *); 333 extern mblk_t *tcp_timermp_alloc(int); 334 335 static int ip_conn_constructor(void *, void *, int); 336 static void ip_conn_destructor(void *, void *); 337 338 static int tcp_conn_constructor(void *, void *, int); 339 static void tcp_conn_destructor(void *, void *); 340 341 static int udp_conn_constructor(void *, void *, int); 342 static void udp_conn_destructor(void *, void *); 343 344 static int rawip_conn_constructor(void *, void *, int); 345 static void rawip_conn_destructor(void *, void *); 346 347 static int rts_conn_constructor(void *, void *, int); 348 static void rts_conn_destructor(void *, void *); 349 350 static int dccp_conn_constructor(void *, void *, int); 351 static void dccp_conn_destructor(void *, void *); 352 353 /* 354 * Global (for all stack instances) init routine 355 */ 356 void 357 ipcl_g_init(void) 358 { 359 ip_conn_cache = kmem_cache_create("ip_conn_cache", 360 sizeof (conn_t), CACHE_ALIGN_SIZE, 361 ip_conn_constructor, ip_conn_destructor, 362 NULL, NULL, NULL, 0); 363 364 tcp_conn_cache = kmem_cache_create("tcp_conn_cache", 365 sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE, 366 tcp_conn_constructor, tcp_conn_destructor, 367 tcp_conn_reclaim, NULL, NULL, 0); 368 369 udp_conn_cache = kmem_cache_create("udp_conn_cache", 370 sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE, 371 udp_conn_constructor, udp_conn_destructor, 372 NULL, NULL, NULL, 0); 373 374 rawip_conn_cache = kmem_cache_create("rawip_conn_cache", 375 sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE, 376 rawip_conn_constructor, rawip_conn_destructor, 377 NULL, NULL, NULL, 0); 378 379 rts_conn_cache = kmem_cache_create("rts_conn_cache", 380 sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE, 381 rts_conn_constructor, rts_conn_destructor, 382 NULL, NULL, NULL, 0); 383 384 /* XXX:DCCP reclaim */ 385 dccp_conn_cache = kmem_cache_create("dccp_conn_cache", 386 sizeof (itc_t) + sizeof (dccp_t), CACHE_ALIGN_SIZE, 387 dccp_conn_constructor, dccp_conn_destructor, 388 NULL, NULL, NULL, 0); 389 } 390 391 /* 392 * ipclassifier intialization routine, sets up hash tables. 393 */ 394 void 395 ipcl_init(ip_stack_t *ipst) 396 { 397 int i; 398 int sizes[] = P2Ps(); 399 400 /* 401 * Calculate size of conn fanout table from /etc/system settings 402 */ 403 if (ipcl_conn_hash_size != 0) { 404 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size; 405 } else if (tcp_conn_hash_size != 0) { 406 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size; 407 } else { 408 extern pgcnt_t freemem; 409 410 ipst->ips_ipcl_conn_fanout_size = 411 (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 412 413 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) { 414 ipst->ips_ipcl_conn_fanout_size = 415 ipcl_conn_hash_maxsize; 416 } 417 } 418 419 for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 420 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) { 421 break; 422 } 423 } 424 if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) { 425 /* Out of range, use the 2^16 value */ 426 ipst->ips_ipcl_conn_fanout_size = sizes[16]; 427 } 428 429 /* Take values from /etc/system */ 430 ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size; 431 ipst->ips_ipcl_dccp_conn_fanout_size = ipcl_dccp_conn_fanout_size; 432 ipst->ips_ipcl_dccp_bind_fanout_size = ipcl_dccp_bind_fanout_size; 433 ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size; 434 ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size; 435 ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size; 436 437 ASSERT(ipst->ips_ipcl_conn_fanout == NULL); 438 439 ipst->ips_ipcl_conn_fanout = kmem_zalloc( 440 ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP); 441 442 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 443 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL, 444 MUTEX_DEFAULT, NULL); 445 } 446 447 ipst->ips_ipcl_bind_fanout = kmem_zalloc( 448 ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP); 449 450 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 451 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL, 452 MUTEX_DEFAULT, NULL); 453 } 454 455 ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX * 456 sizeof (connf_t), KM_SLEEP); 457 for (i = 0; i < IPPROTO_MAX; i++) { 458 mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL, 459 MUTEX_DEFAULT, NULL); 460 } 461 462 ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX * 463 sizeof (connf_t), KM_SLEEP); 464 for (i = 0; i < IPPROTO_MAX; i++) { 465 mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL, 466 MUTEX_DEFAULT, NULL); 467 } 468 469 ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP); 470 mutex_init(&ipst->ips_rts_clients->connf_lock, 471 NULL, MUTEX_DEFAULT, NULL); 472 473 ipst->ips_ipcl_udp_fanout = kmem_zalloc( 474 ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP); 475 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 476 mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL, 477 MUTEX_DEFAULT, NULL); 478 } 479 480 ipst->ips_ipcl_iptun_fanout = kmem_zalloc( 481 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP); 482 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { 483 mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL, 484 MUTEX_DEFAULT, NULL); 485 } 486 487 ipst->ips_ipcl_raw_fanout = kmem_zalloc( 488 ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP); 489 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 490 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL, 491 MUTEX_DEFAULT, NULL); 492 } 493 494 ipst->ips_ipcl_globalhash_fanout = kmem_zalloc( 495 sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP); 496 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 497 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock, 498 NULL, MUTEX_DEFAULT, NULL); 499 } 500 501 ipst->ips_ipcl_dccp_conn_fanout = kmem_zalloc( 502 ipst->ips_ipcl_dccp_conn_fanout_size * sizeof (connf_t), KM_SLEEP); 503 for (i = 0; i < ipst->ips_ipcl_dccp_conn_fanout_size; i++) { 504 mutex_init(&ipst->ips_ipcl_dccp_conn_fanout[i].connf_lock, NULL, 505 MUTEX_DEFAULT, NULL); 506 } 507 508 ipst->ips_ipcl_dccp_bind_fanout = kmem_zalloc( 509 ipst->ips_ipcl_dccp_bind_fanout_size * sizeof (connf_t), KM_SLEEP); 510 for (i = 0; i < ipst->ips_ipcl_dccp_bind_fanout_size; i++) { 511 mutex_init(&ipst->ips_ipcl_dccp_bind_fanout[i].connf_lock, NULL, 512 MUTEX_DEFAULT, NULL); 513 } 514 } 515 516 void 517 ipcl_g_destroy(void) 518 { 519 kmem_cache_destroy(ip_conn_cache); 520 kmem_cache_destroy(tcp_conn_cache); 521 kmem_cache_destroy(udp_conn_cache); 522 kmem_cache_destroy(rawip_conn_cache); 523 kmem_cache_destroy(rts_conn_cache); 524 kmem_cache_destroy(dccp_conn_cache); 525 } 526 527 /* 528 * All user-level and kernel use of the stack must be gone 529 * by now. 530 */ 531 void 532 ipcl_destroy(ip_stack_t *ipst) 533 { 534 int i; 535 536 for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 537 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL); 538 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock); 539 } 540 kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size * 541 sizeof (connf_t)); 542 ipst->ips_ipcl_conn_fanout = NULL; 543 544 for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 545 ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL); 546 mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock); 547 } 548 kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size * 549 sizeof (connf_t)); 550 ipst->ips_ipcl_bind_fanout = NULL; 551 552 for (i = 0; i < IPPROTO_MAX; i++) { 553 ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL); 554 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock); 555 } 556 kmem_free(ipst->ips_ipcl_proto_fanout_v4, 557 IPPROTO_MAX * sizeof (connf_t)); 558 ipst->ips_ipcl_proto_fanout_v4 = NULL; 559 560 for (i = 0; i < IPPROTO_MAX; i++) { 561 ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL); 562 mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock); 563 } 564 kmem_free(ipst->ips_ipcl_proto_fanout_v6, 565 IPPROTO_MAX * sizeof (connf_t)); 566 ipst->ips_ipcl_proto_fanout_v6 = NULL; 567 568 for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 569 ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL); 570 mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock); 571 } 572 kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size * 573 sizeof (connf_t)); 574 ipst->ips_ipcl_udp_fanout = NULL; 575 576 for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { 577 ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL); 578 mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock); 579 } 580 kmem_free(ipst->ips_ipcl_iptun_fanout, 581 ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t)); 582 ipst->ips_ipcl_iptun_fanout = NULL; 583 584 for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 585 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL); 586 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock); 587 } 588 kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size * 589 sizeof (connf_t)); 590 ipst->ips_ipcl_raw_fanout = NULL; 591 592 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 593 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL); 594 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 595 } 596 kmem_free(ipst->ips_ipcl_globalhash_fanout, 597 sizeof (connf_t) * CONN_G_HASH_SIZE); 598 ipst->ips_ipcl_globalhash_fanout = NULL; 599 600 for (i = 0; i < ipst->ips_ipcl_dccp_conn_fanout_size; i++) { 601 ASSERT(ipst->ips_ipcl_dccp_conn_fanout[i].connf_head == NULL); 602 mutex_destroy(&ipst->ips_ipcl_dccp_conn_fanout[i].connf_lock); 603 } 604 kmem_free(ipst->ips_ipcl_dccp_conn_fanout, 605 ipst->ips_ipcl_dccp_conn_fanout_size * sizeof (connf_t)); 606 ipst->ips_ipcl_dccp_conn_fanout = NULL; 607 608 for (i = 0; i < ipst->ips_ipcl_dccp_bind_fanout_size; i++) { 609 ASSERT(ipst->ips_ipcl_dccp_bind_fanout[i].connf_head == NULL); 610 mutex_destroy(&ipst->ips_ipcl_dccp_bind_fanout[i].connf_lock); 611 } 612 kmem_free(ipst->ips_ipcl_dccp_bind_fanout, 613 ipst->ips_ipcl_dccp_bind_fanout_size * sizeof (connf_t)); 614 ipst->ips_ipcl_dccp_bind_fanout = NULL; 615 616 ASSERT(ipst->ips_rts_clients->connf_head == NULL); 617 mutex_destroy(&ipst->ips_rts_clients->connf_lock); 618 kmem_free(ipst->ips_rts_clients, sizeof (connf_t)); 619 ipst->ips_rts_clients = NULL; 620 } 621 622 /* 623 * conn creation routine. initialize the conn, sets the reference 624 * and inserts it in the global hash table. 625 */ 626 conn_t * 627 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) 628 { 629 conn_t *connp; 630 struct kmem_cache *conn_cache; 631 632 switch (type) { 633 case IPCL_SCTPCONN: 634 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 635 return (NULL); 636 sctp_conn_init(connp); 637 netstack_hold(ns); 638 connp->conn_netstack = ns; 639 connp->conn_ixa->ixa_ipst = ns->netstack_ip; 640 connp->conn_ixa->ixa_conn_id = (long)connp; 641 ipcl_globalhash_insert(connp); 642 return (connp); 643 644 case IPCL_TCPCONN: 645 conn_cache = tcp_conn_cache; 646 break; 647 648 case IPCL_UDPCONN: 649 conn_cache = udp_conn_cache; 650 break; 651 652 case IPCL_RAWIPCONN: 653 conn_cache = rawip_conn_cache; 654 break; 655 656 case IPCL_RTSCONN: 657 conn_cache = rts_conn_cache; 658 break; 659 660 case IPCL_IPCCONN: 661 conn_cache = ip_conn_cache; 662 break; 663 664 case IPCL_DCCPCONN: 665 conn_cache = dccp_conn_cache; 666 break; 667 668 default: 669 connp = NULL; 670 ASSERT(0); 671 } 672 673 if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL) 674 return (NULL); 675 676 connp->conn_ref = 1; 677 netstack_hold(ns); 678 connp->conn_netstack = ns; 679 connp->conn_ixa->ixa_ipst = ns->netstack_ip; 680 connp->conn_ixa->ixa_conn_id = (long)connp; 681 ipcl_globalhash_insert(connp); 682 return (connp); 683 } 684 685 void 686 ipcl_conn_destroy(conn_t *connp) 687 { 688 mblk_t *mp; 689 netstack_t *ns = connp->conn_netstack; 690 691 ASSERT(!MUTEX_HELD(&connp->conn_lock)); 692 ASSERT(connp->conn_ref == 0); 693 ASSERT(connp->conn_ioctlref == 0); 694 695 DTRACE_PROBE1(conn__destroy, conn_t *, connp); 696 697 if (connp->conn_cred != NULL) { 698 crfree(connp->conn_cred); 699 connp->conn_cred = NULL; 700 /* ixa_cred done in ipcl_conn_cleanup below */ 701 } 702 703 if (connp->conn_ht_iphc != NULL) { 704 kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated); 705 connp->conn_ht_iphc = NULL; 706 connp->conn_ht_iphc_allocated = 0; 707 connp->conn_ht_iphc_len = 0; 708 connp->conn_ht_ulp = NULL; 709 connp->conn_ht_ulp_len = 0; 710 } 711 ip_pkt_free(&connp->conn_xmit_ipp); 712 713 ipcl_globalhash_remove(connp); 714 715 if (connp->conn_latch != NULL) { 716 IPLATCH_REFRELE(connp->conn_latch); 717 connp->conn_latch = NULL; 718 } 719 if (connp->conn_latch_in_policy != NULL) { 720 IPPOL_REFRELE(connp->conn_latch_in_policy); 721 connp->conn_latch_in_policy = NULL; 722 } 723 if (connp->conn_latch_in_action != NULL) { 724 IPACT_REFRELE(connp->conn_latch_in_action); 725 connp->conn_latch_in_action = NULL; 726 } 727 if (connp->conn_policy != NULL) { 728 IPPH_REFRELE(connp->conn_policy, ns); 729 connp->conn_policy = NULL; 730 } 731 732 if (connp->conn_ipsec_opt_mp != NULL) { 733 freemsg(connp->conn_ipsec_opt_mp); 734 connp->conn_ipsec_opt_mp = NULL; 735 } 736 737 if (connp->conn_flags & IPCL_TCPCONN) { 738 tcp_t *tcp = connp->conn_tcp; 739 740 tcp_free(tcp); 741 mp = tcp->tcp_timercache; 742 743 tcp->tcp_tcps = NULL; 744 745 /* 746 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate 747 * the mblk. 748 */ 749 if (tcp->tcp_rsrv_mp != NULL) { 750 freeb(tcp->tcp_rsrv_mp); 751 tcp->tcp_rsrv_mp = NULL; 752 mutex_destroy(&tcp->tcp_rsrv_mp_lock); 753 } 754 755 ipcl_conn_cleanup(connp); 756 connp->conn_flags = IPCL_TCPCONN; 757 if (ns != NULL) { 758 ASSERT(tcp->tcp_tcps == NULL); 759 connp->conn_netstack = NULL; 760 connp->conn_ixa->ixa_ipst = NULL; 761 netstack_rele(ns); 762 } 763 764 bzero(tcp, sizeof (tcp_t)); 765 766 tcp->tcp_timercache = mp; 767 tcp->tcp_connp = connp; 768 kmem_cache_free(tcp_conn_cache, connp); 769 return; 770 } 771 772 if (connp->conn_flags & IPCL_SCTPCONN) { 773 ASSERT(ns != NULL); 774 sctp_free(connp); 775 return; 776 } 777 778 if (connp->conn_flags & IPCL_DCCPCONN) { 779 dccp_t *dccp = connp->conn_dccp; 780 781 cmn_err(CE_NOTE, "ipclassifier: conn_flags DCCP cache_free"); 782 783 dccp_free(dccp); 784 mp = dccp->dccp_timercache; 785 786 dccp->dccp_dccps = NULL; 787 788 ipcl_conn_cleanup(connp); 789 connp->conn_flags = IPCL_DCCPCONN; 790 if (ns != NULL) { 791 ASSERT(dccp->dccps == NULL); 792 connp->conn_netstack = NULL; 793 connp->conn_ixa->ixa_ipst = NULL; 794 netstack_rele(ns); 795 } 796 797 bzero(dccp, sizeof (dccp_t)); 798 799 dccp->dccp_timercache = mp; 800 dccp->dccp_connp = connp; 801 kmem_cache_free(dccp_conn_cache, connp); 802 return; 803 } 804 805 ipcl_conn_cleanup(connp); 806 if (ns != NULL) { 807 connp->conn_netstack = NULL; 808 connp->conn_ixa->ixa_ipst = NULL; 809 netstack_rele(ns); 810 } 811 812 /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */ 813 if (connp->conn_flags & IPCL_UDPCONN) { 814 connp->conn_flags = IPCL_UDPCONN; 815 kmem_cache_free(udp_conn_cache, connp); 816 } else if (connp->conn_flags & IPCL_RAWIPCONN) { 817 connp->conn_flags = IPCL_RAWIPCONN; 818 connp->conn_proto = IPPROTO_ICMP; 819 connp->conn_ixa->ixa_protocol = connp->conn_proto; 820 kmem_cache_free(rawip_conn_cache, connp); 821 } else if (connp->conn_flags & IPCL_RTSCONN) { 822 connp->conn_flags = IPCL_RTSCONN; 823 kmem_cache_free(rts_conn_cache, connp); 824 } else { 825 connp->conn_flags = IPCL_IPCCONN; 826 ASSERT(connp->conn_flags & IPCL_IPCCONN); 827 ASSERT(connp->conn_priv == NULL); 828 kmem_cache_free(ip_conn_cache, connp); 829 } 830 } 831 832 /* 833 * Running in cluster mode - deregister listener information 834 */ 835 static void 836 ipcl_conn_unlisten(conn_t *connp) 837 { 838 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 839 ASSERT(connp->conn_lport != 0); 840 841 if (cl_inet_unlisten != NULL) { 842 sa_family_t addr_family; 843 uint8_t *laddrp; 844 845 if (connp->conn_ipversion == IPV6_VERSION) { 846 addr_family = AF_INET6; 847 laddrp = (uint8_t *)&connp->conn_bound_addr_v6; 848 } else { 849 addr_family = AF_INET; 850 laddrp = (uint8_t *)&connp->conn_bound_addr_v4; 851 } 852 (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid, 853 IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL); 854 } 855 connp->conn_flags &= ~IPCL_CL_LISTENER; 856 } 857 858 /* 859 * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 860 * which table the conn belonged to). So for debugging we can see which hash 861 * table this connection was in. 862 */ 863 #define IPCL_HASH_REMOVE(connp) { \ 864 connf_t *connfp = (connp)->conn_fanout; \ 865 ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 866 if (connfp != NULL) { \ 867 mutex_enter(&connfp->connf_lock); \ 868 if ((connp)->conn_next != NULL) \ 869 (connp)->conn_next->conn_prev = \ 870 (connp)->conn_prev; \ 871 if ((connp)->conn_prev != NULL) \ 872 (connp)->conn_prev->conn_next = \ 873 (connp)->conn_next; \ 874 else \ 875 connfp->connf_head = (connp)->conn_next; \ 876 (connp)->conn_fanout = NULL; \ 877 (connp)->conn_next = NULL; \ 878 (connp)->conn_prev = NULL; \ 879 (connp)->conn_flags |= IPCL_REMOVED; \ 880 if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 881 ipcl_conn_unlisten((connp)); \ 882 CONN_DEC_REF((connp)); \ 883 mutex_exit(&connfp->connf_lock); \ 884 } \ 885 } 886 887 void 888 ipcl_hash_remove(conn_t *connp) 889 { 890 uint8_t protocol = connp->conn_proto; 891 892 IPCL_HASH_REMOVE(connp); 893 if (protocol == IPPROTO_RSVP) 894 ill_set_inputfn_all(connp->conn_netstack->netstack_ip); 895 } 896 897 /* 898 * The whole purpose of this function is allow removal of 899 * a conn_t from the connected hash for timewait reclaim. 900 * This is essentially a TW reclaim fastpath where timewait 901 * collector checks under fanout lock (so no one else can 902 * get access to the conn_t) that refcnt is 2 i.e. one for 903 * TCP and one for the classifier hash list. If ref count 904 * is indeed 2, we can just remove the conn under lock and 905 * avoid cleaning up the conn under squeue. This gives us 906 * improved performance. 907 */ 908 void 909 ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 910 { 911 ASSERT(MUTEX_HELD(&connfp->connf_lock)); 912 ASSERT(MUTEX_HELD(&connp->conn_lock)); 913 ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 914 915 if ((connp)->conn_next != NULL) { 916 (connp)->conn_next->conn_prev = (connp)->conn_prev; 917 } 918 if ((connp)->conn_prev != NULL) { 919 (connp)->conn_prev->conn_next = (connp)->conn_next; 920 } else { 921 connfp->connf_head = (connp)->conn_next; 922 } 923 (connp)->conn_fanout = NULL; 924 (connp)->conn_next = NULL; 925 (connp)->conn_prev = NULL; 926 (connp)->conn_flags |= IPCL_REMOVED; 927 ASSERT((connp)->conn_ref == 2); 928 (connp)->conn_ref--; 929 } 930 931 #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 932 ASSERT((connp)->conn_fanout == NULL); \ 933 ASSERT((connp)->conn_next == NULL); \ 934 ASSERT((connp)->conn_prev == NULL); \ 935 if ((connfp)->connf_head != NULL) { \ 936 (connfp)->connf_head->conn_prev = (connp); \ 937 (connp)->conn_next = (connfp)->connf_head; \ 938 } \ 939 (connp)->conn_fanout = (connfp); \ 940 (connfp)->connf_head = (connp); \ 941 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 942 IPCL_CONNECTED; \ 943 CONN_INC_REF(connp); \ 944 } 945 946 #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 947 IPCL_HASH_REMOVE((connp)); \ 948 mutex_enter(&(connfp)->connf_lock); \ 949 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 950 mutex_exit(&(connfp)->connf_lock); \ 951 } 952 953 #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 954 conn_t *pconnp = NULL, *nconnp; \ 955 IPCL_HASH_REMOVE((connp)); \ 956 mutex_enter(&(connfp)->connf_lock); \ 957 nconnp = (connfp)->connf_head; \ 958 while (nconnp != NULL && \ 959 !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) { \ 960 pconnp = nconnp; \ 961 nconnp = nconnp->conn_next; \ 962 } \ 963 if (pconnp != NULL) { \ 964 pconnp->conn_next = (connp); \ 965 (connp)->conn_prev = pconnp; \ 966 } else { \ 967 (connfp)->connf_head = (connp); \ 968 } \ 969 if (nconnp != NULL) { \ 970 (connp)->conn_next = nconnp; \ 971 nconnp->conn_prev = (connp); \ 972 } \ 973 (connp)->conn_fanout = (connfp); \ 974 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 975 IPCL_BOUND; \ 976 CONN_INC_REF(connp); \ 977 mutex_exit(&(connfp)->connf_lock); \ 978 } 979 980 #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 981 conn_t **list, *prev, *next; \ 982 boolean_t isv4mapped = \ 983 IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6); \ 984 IPCL_HASH_REMOVE((connp)); \ 985 mutex_enter(&(connfp)->connf_lock); \ 986 list = &(connfp)->connf_head; \ 987 prev = NULL; \ 988 while ((next = *list) != NULL) { \ 989 if (isv4mapped && \ 990 IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) && \ 991 connp->conn_zoneid == next->conn_zoneid) { \ 992 (connp)->conn_next = next; \ 993 if (prev != NULL) \ 994 prev = next->conn_prev; \ 995 next->conn_prev = (connp); \ 996 break; \ 997 } \ 998 list = &next->conn_next; \ 999 prev = next; \ 1000 } \ 1001 (connp)->conn_prev = prev; \ 1002 *list = (connp); \ 1003 (connp)->conn_fanout = (connfp); \ 1004 (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 1005 IPCL_BOUND; \ 1006 CONN_INC_REF((connp)); \ 1007 mutex_exit(&(connfp)->connf_lock); \ 1008 } 1009 1010 void 1011 ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 1012 { 1013 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1014 } 1015 1016 /* 1017 * Because the classifier is used to classify inbound packets, the destination 1018 * address is meant to be our local tunnel address (tunnel source), and the 1019 * source the remote tunnel address (tunnel destination). 1020 * 1021 * Note that conn_proto can't be used for fanout since the upper protocol 1022 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel. 1023 */ 1024 conn_t * 1025 ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst) 1026 { 1027 connf_t *connfp; 1028 conn_t *connp; 1029 1030 /* first look for IPv4 tunnel links */ 1031 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)]; 1032 mutex_enter(&connfp->connf_lock); 1033 for (connp = connfp->connf_head; connp != NULL; 1034 connp = connp->conn_next) { 1035 if (IPCL_IPTUN_MATCH(connp, *dst, *src)) 1036 break; 1037 } 1038 if (connp != NULL) 1039 goto done; 1040 1041 mutex_exit(&connfp->connf_lock); 1042 1043 /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */ 1044 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, 1045 INADDR_ANY)]; 1046 mutex_enter(&connfp->connf_lock); 1047 for (connp = connfp->connf_head; connp != NULL; 1048 connp = connp->conn_next) { 1049 if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY)) 1050 break; 1051 } 1052 done: 1053 if (connp != NULL) 1054 CONN_INC_REF(connp); 1055 mutex_exit(&connfp->connf_lock); 1056 return (connp); 1057 } 1058 1059 conn_t * 1060 ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst) 1061 { 1062 connf_t *connfp; 1063 conn_t *connp; 1064 1065 /* Look for an IPv6 tunnel link */ 1066 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)]; 1067 mutex_enter(&connfp->connf_lock); 1068 for (connp = connfp->connf_head; connp != NULL; 1069 connp = connp->conn_next) { 1070 if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) { 1071 CONN_INC_REF(connp); 1072 break; 1073 } 1074 } 1075 mutex_exit(&connfp->connf_lock); 1076 return (connp); 1077 } 1078 1079 /* 1080 * This function is used only for inserting SCTP raw socket now. 1081 * This may change later. 1082 * 1083 * Note that only one raw socket can be bound to a port. The param 1084 * lport is in network byte order. 1085 */ 1086 static int 1087 ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 1088 { 1089 connf_t *connfp; 1090 conn_t *oconnp; 1091 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1092 1093 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 1094 1095 /* Check for existing raw socket already bound to the port. */ 1096 mutex_enter(&connfp->connf_lock); 1097 for (oconnp = connfp->connf_head; oconnp != NULL; 1098 oconnp = oconnp->conn_next) { 1099 if (oconnp->conn_lport == lport && 1100 oconnp->conn_zoneid == connp->conn_zoneid && 1101 oconnp->conn_family == connp->conn_family && 1102 ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) || 1103 IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) || 1104 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) || 1105 IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) || 1106 IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6, 1107 &connp->conn_laddr_v6))) { 1108 break; 1109 } 1110 } 1111 mutex_exit(&connfp->connf_lock); 1112 if (oconnp != NULL) 1113 return (EADDRNOTAVAIL); 1114 1115 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) || 1116 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 1117 if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) || 1118 IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) { 1119 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1120 } else { 1121 IPCL_HASH_INSERT_BOUND(connfp, connp); 1122 } 1123 } else { 1124 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1125 } 1126 return (0); 1127 } 1128 1129 static int 1130 ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst) 1131 { 1132 connf_t *connfp; 1133 conn_t *tconnp; 1134 ipaddr_t laddr = connp->conn_laddr_v4; 1135 ipaddr_t faddr = connp->conn_faddr_v4; 1136 1137 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)]; 1138 mutex_enter(&connfp->connf_lock); 1139 for (tconnp = connfp->connf_head; tconnp != NULL; 1140 tconnp = tconnp->conn_next) { 1141 if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) { 1142 /* A tunnel is already bound to these addresses. */ 1143 mutex_exit(&connfp->connf_lock); 1144 return (EADDRINUSE); 1145 } 1146 } 1147 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1148 mutex_exit(&connfp->connf_lock); 1149 return (0); 1150 } 1151 1152 static int 1153 ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst) 1154 { 1155 connf_t *connfp; 1156 conn_t *tconnp; 1157 in6_addr_t *laddr = &connp->conn_laddr_v6; 1158 in6_addr_t *faddr = &connp->conn_faddr_v6; 1159 1160 connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)]; 1161 mutex_enter(&connfp->connf_lock); 1162 for (tconnp = connfp->connf_head; tconnp != NULL; 1163 tconnp = tconnp->conn_next) { 1164 if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) { 1165 /* A tunnel is already bound to these addresses. */ 1166 mutex_exit(&connfp->connf_lock); 1167 return (EADDRINUSE); 1168 } 1169 } 1170 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1171 mutex_exit(&connfp->connf_lock); 1172 return (0); 1173 } 1174 1175 /* 1176 * Check for a MAC exemption conflict on a labeled system. Note that for 1177 * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the 1178 * transport layer. This check is for binding all other protocols. 1179 * 1180 * Returns true if there's a conflict. 1181 */ 1182 static boolean_t 1183 check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst) 1184 { 1185 connf_t *connfp; 1186 conn_t *tconn; 1187 1188 connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto]; 1189 mutex_enter(&connfp->connf_lock); 1190 for (tconn = connfp->connf_head; tconn != NULL; 1191 tconn = tconn->conn_next) { 1192 /* We don't allow v4 fallback for v6 raw socket */ 1193 if (connp->conn_family != tconn->conn_family) 1194 continue; 1195 /* If neither is exempt, then there's no conflict */ 1196 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) && 1197 (tconn->conn_mac_mode == CONN_MAC_DEFAULT)) 1198 continue; 1199 /* We are only concerned about sockets for a different zone */ 1200 if (connp->conn_zoneid == tconn->conn_zoneid) 1201 continue; 1202 /* If both are bound to different specific addrs, ok */ 1203 if (connp->conn_laddr_v4 != INADDR_ANY && 1204 tconn->conn_laddr_v4 != INADDR_ANY && 1205 connp->conn_laddr_v4 != tconn->conn_laddr_v4) 1206 continue; 1207 /* These two conflict; fail */ 1208 break; 1209 } 1210 mutex_exit(&connfp->connf_lock); 1211 return (tconn != NULL); 1212 } 1213 1214 static boolean_t 1215 check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst) 1216 { 1217 connf_t *connfp; 1218 conn_t *tconn; 1219 1220 connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto]; 1221 mutex_enter(&connfp->connf_lock); 1222 for (tconn = connfp->connf_head; tconn != NULL; 1223 tconn = tconn->conn_next) { 1224 /* We don't allow v4 fallback for v6 raw socket */ 1225 if (connp->conn_family != tconn->conn_family) 1226 continue; 1227 /* If neither is exempt, then there's no conflict */ 1228 if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) && 1229 (tconn->conn_mac_mode == CONN_MAC_DEFAULT)) 1230 continue; 1231 /* We are only concerned about sockets for a different zone */ 1232 if (connp->conn_zoneid == tconn->conn_zoneid) 1233 continue; 1234 /* If both are bound to different addrs, ok */ 1235 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) && 1236 !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) && 1237 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 1238 &tconn->conn_laddr_v6)) 1239 continue; 1240 /* These two conflict; fail */ 1241 break; 1242 } 1243 mutex_exit(&connfp->connf_lock); 1244 return (tconn != NULL); 1245 } 1246 1247 /* 1248 * (v4, v6) bind hash insertion routines 1249 * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport) 1250 */ 1251 1252 int 1253 ipcl_bind_insert(conn_t *connp) 1254 { 1255 if (connp->conn_ipversion == IPV6_VERSION) 1256 return (ipcl_bind_insert_v6(connp)); 1257 else 1258 return (ipcl_bind_insert_v4(connp)); 1259 } 1260 1261 int 1262 ipcl_bind_insert_v4(conn_t *connp) 1263 { 1264 connf_t *connfp; 1265 int ret = 0; 1266 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1267 uint16_t lport = connp->conn_lport; 1268 uint8_t protocol = connp->conn_proto; 1269 1270 if (IPCL_IS_IPTUN(connp)) 1271 return (ipcl_iptun_hash_insert(connp, ipst)); 1272 1273 switch (protocol) { 1274 default: 1275 if (is_system_labeled() && 1276 check_exempt_conflict_v4(connp, ipst)) 1277 return (EADDRINUSE); 1278 /* FALLTHROUGH */ 1279 case IPPROTO_UDP: 1280 if (protocol == IPPROTO_UDP) { 1281 connfp = &ipst->ips_ipcl_udp_fanout[ 1282 IPCL_UDP_HASH(lport, ipst)]; 1283 } else { 1284 connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol]; 1285 } 1286 1287 if (connp->conn_faddr_v4 != INADDR_ANY) { 1288 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1289 } else if (connp->conn_laddr_v4 != INADDR_ANY) { 1290 IPCL_HASH_INSERT_BOUND(connfp, connp); 1291 } else { 1292 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1293 } 1294 if (protocol == IPPROTO_RSVP) 1295 ill_set_inputfn_all(ipst); 1296 break; 1297 1298 case IPPROTO_TCP: 1299 /* Insert it in the Bind Hash */ 1300 ASSERT(connp->conn_zoneid != ALL_ZONES); 1301 connfp = &ipst->ips_ipcl_bind_fanout[ 1302 IPCL_BIND_HASH(lport, ipst)]; 1303 if (connp->conn_laddr_v4 != INADDR_ANY) { 1304 IPCL_HASH_INSERT_BOUND(connfp, connp); 1305 } else { 1306 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1307 } 1308 if (cl_inet_listen != NULL) { 1309 ASSERT(connp->conn_ipversion == IPV4_VERSION); 1310 connp->conn_flags |= IPCL_CL_LISTENER; 1311 (*cl_inet_listen)( 1312 connp->conn_netstack->netstack_stackid, 1313 IPPROTO_TCP, AF_INET, 1314 (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL); 1315 } 1316 break; 1317 1318 case IPPROTO_SCTP: 1319 ret = ipcl_sctp_hash_insert(connp, lport); 1320 break; 1321 1322 case IPPROTO_DCCP: 1323 cmn_err(CE_NOTE, "ipclassifier.c: ipcl_bind_insert_v4"); 1324 ASSERT(connp->conn_zoneid != ALL_ZONES); 1325 connfp = &ipst->ips_ipcl_dccp_bind_fanout[ 1326 IPCL_DCCP_BIND_HASH(lport, ipst)]; 1327 if (connp->conn_laddr_v4 != INADDR_ANY) { 1328 IPCL_HASH_INSERT_BOUND(connfp, connp); 1329 } else { 1330 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1331 } 1332 break; 1333 } 1334 1335 1336 return (ret); 1337 } 1338 1339 int 1340 ipcl_bind_insert_v6(conn_t *connp) 1341 { 1342 connf_t *connfp; 1343 int ret = 0; 1344 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1345 uint16_t lport = connp->conn_lport; 1346 uint8_t protocol = connp->conn_proto; 1347 1348 if (IPCL_IS_IPTUN(connp)) { 1349 return (ipcl_iptun_hash_insert_v6(connp, ipst)); 1350 } 1351 1352 switch (protocol) { 1353 default: 1354 if (is_system_labeled() && 1355 check_exempt_conflict_v6(connp, ipst)) 1356 return (EADDRINUSE); 1357 /* FALLTHROUGH */ 1358 case IPPROTO_UDP: 1359 if (protocol == IPPROTO_UDP) { 1360 connfp = &ipst->ips_ipcl_udp_fanout[ 1361 IPCL_UDP_HASH(lport, ipst)]; 1362 } else { 1363 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 1364 } 1365 1366 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) { 1367 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1368 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 1369 IPCL_HASH_INSERT_BOUND(connfp, connp); 1370 } else { 1371 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1372 } 1373 break; 1374 1375 case IPPROTO_TCP: 1376 /* Insert it in the Bind Hash */ 1377 ASSERT(connp->conn_zoneid != ALL_ZONES); 1378 connfp = &ipst->ips_ipcl_bind_fanout[ 1379 IPCL_BIND_HASH(lport, ipst)]; 1380 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 1381 IPCL_HASH_INSERT_BOUND(connfp, connp); 1382 } else { 1383 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1384 } 1385 if (cl_inet_listen != NULL) { 1386 sa_family_t addr_family; 1387 uint8_t *laddrp; 1388 1389 if (connp->conn_ipversion == IPV6_VERSION) { 1390 addr_family = AF_INET6; 1391 laddrp = 1392 (uint8_t *)&connp->conn_bound_addr_v6; 1393 } else { 1394 addr_family = AF_INET; 1395 laddrp = (uint8_t *)&connp->conn_bound_addr_v4; 1396 } 1397 connp->conn_flags |= IPCL_CL_LISTENER; 1398 (*cl_inet_listen)( 1399 connp->conn_netstack->netstack_stackid, 1400 IPPROTO_TCP, addr_family, laddrp, lport, NULL); 1401 } 1402 break; 1403 1404 case IPPROTO_SCTP: 1405 ret = ipcl_sctp_hash_insert(connp, lport); 1406 break; 1407 1408 case IPPROTO_DCCP: 1409 cmn_err(CE_NOTE, "ipclassifier.c: ipcl_bind_insert_v6"); 1410 ASSERT(connp->conn_zoneid != ALL_ZONES); 1411 connfp = &ipst->ips_ipcl_dccp_bind_fanout[ 1412 IPCL_DCCP_BIND_HASH(lport, ipst)]; 1413 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 1414 IPCL_HASH_INSERT_BOUND(connfp, connp); 1415 } else { 1416 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1417 } 1418 break; 1419 } 1420 1421 return (ret); 1422 } 1423 1424 /* 1425 * ipcl_conn_hash insertion routines. 1426 * The caller has already set conn_proto and the addresses/ports in the conn_t. 1427 */ 1428 1429 int 1430 ipcl_conn_insert(conn_t *connp) 1431 { 1432 if (connp->conn_ipversion == IPV6_VERSION) 1433 return (ipcl_conn_insert_v6(connp)); 1434 else 1435 return (ipcl_conn_insert_v4(connp)); 1436 } 1437 1438 int 1439 ipcl_conn_insert_v4(conn_t *connp) 1440 { 1441 connf_t *connfp; 1442 conn_t *tconnp; 1443 int ret = 0; 1444 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1445 uint16_t lport = connp->conn_lport; 1446 uint8_t protocol = connp->conn_proto; 1447 1448 if (IPCL_IS_IPTUN(connp)) 1449 return (ipcl_iptun_hash_insert(connp, ipst)); 1450 1451 switch (protocol) { 1452 case IPPROTO_TCP: 1453 /* 1454 * For TCP, we check whether the connection tuple already 1455 * exists before allowing the connection to proceed. We 1456 * also allow indexing on the zoneid. This is to allow 1457 * multiple shared stack zones to have the same tcp 1458 * connection tuple. In practice this only happens for 1459 * INADDR_LOOPBACK as it's the only local address which 1460 * doesn't have to be unique. 1461 */ 1462 connfp = &ipst->ips_ipcl_conn_fanout[ 1463 IPCL_CONN_HASH(connp->conn_faddr_v4, 1464 connp->conn_ports, ipst)]; 1465 mutex_enter(&connfp->connf_lock); 1466 for (tconnp = connfp->connf_head; tconnp != NULL; 1467 tconnp = tconnp->conn_next) { 1468 if (IPCL_CONN_MATCH(tconnp, connp->conn_proto, 1469 connp->conn_faddr_v4, connp->conn_laddr_v4, 1470 connp->conn_ports) && 1471 IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) { 1472 /* Already have a conn. bail out */ 1473 mutex_exit(&connfp->connf_lock); 1474 return (EADDRINUSE); 1475 } 1476 } 1477 if (connp->conn_fanout != NULL) { 1478 /* 1479 * Probably a XTI/TLI application trying to do a 1480 * rebind. Let it happen. 1481 */ 1482 mutex_exit(&connfp->connf_lock); 1483 IPCL_HASH_REMOVE(connp); 1484 mutex_enter(&connfp->connf_lock); 1485 } 1486 1487 ASSERT(connp->conn_recv != NULL); 1488 ASSERT(connp->conn_recvicmp != NULL); 1489 1490 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1491 mutex_exit(&connfp->connf_lock); 1492 break; 1493 1494 case IPPROTO_SCTP: 1495 /* 1496 * The raw socket may have already been bound, remove it 1497 * from the hash first. 1498 */ 1499 IPCL_HASH_REMOVE(connp); 1500 ret = ipcl_sctp_hash_insert(connp, lport); 1501 break; 1502 1503 case IPPROTO_DCCP: 1504 cmn_err(CE_NOTE, "ipclassifier.c: ipcl_conn_insert_v4"); 1505 connfp = &ipst->ips_ipcl_dccp_conn_fanout[IPCL_DCCP_CONN_HASH( 1506 connp->conn_faddr_v4, connp->conn_ports, ipst)]; 1507 mutex_enter(&connfp->connf_lock); 1508 for (tconnp = connfp->connf_head; tconnp != NULL; 1509 tconnp = tconnp->conn_next) { 1510 if (IPCL_CONN_MATCH(tconnp, connp->conn_proto, 1511 connp->conn_faddr_v4, connp->conn_laddr_v4, 1512 connp->conn_ports) && 1513 IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) { 1514 /* Already have a conn. bail out */ 1515 mutex_exit(&connfp->connf_lock); 1516 return (EADDRINUSE); 1517 } 1518 } 1519 1520 /* XXX:DCCP XTI/TLI application? */ 1521 1522 ASSERT(connp->conn_recv != NULL); 1523 ASSERT(connp->conn_recvicmp != NULL); 1524 1525 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1526 mutex_exit(&connfp->connf_lock); 1527 break; 1528 1529 default: 1530 /* 1531 * Check for conflicts among MAC exempt bindings. For 1532 * transports with port numbers, this is done by the upper 1533 * level per-transport binding logic. For all others, it's 1534 * done here. 1535 */ 1536 if (is_system_labeled() && 1537 check_exempt_conflict_v4(connp, ipst)) 1538 return (EADDRINUSE); 1539 /* FALLTHROUGH */ 1540 1541 case IPPROTO_UDP: 1542 if (protocol == IPPROTO_UDP) { 1543 connfp = &ipst->ips_ipcl_udp_fanout[ 1544 IPCL_UDP_HASH(lport, ipst)]; 1545 } else { 1546 connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol]; 1547 } 1548 1549 if (connp->conn_faddr_v4 != INADDR_ANY) { 1550 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1551 } else if (connp->conn_laddr_v4 != INADDR_ANY) { 1552 IPCL_HASH_INSERT_BOUND(connfp, connp); 1553 } else { 1554 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1555 } 1556 break; 1557 } 1558 1559 return (ret); 1560 } 1561 1562 int 1563 ipcl_conn_insert_v6(conn_t *connp) 1564 { 1565 connf_t *connfp; 1566 conn_t *tconnp; 1567 int ret = 0; 1568 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1569 uint16_t lport = connp->conn_lport; 1570 uint8_t protocol = connp->conn_proto; 1571 uint_t ifindex = connp->conn_bound_if; 1572 1573 if (IPCL_IS_IPTUN(connp)) 1574 return (ipcl_iptun_hash_insert_v6(connp, ipst)); 1575 1576 switch (protocol) { 1577 case IPPROTO_TCP: 1578 1579 /* 1580 * For tcp, we check whether the connection tuple already 1581 * exists before allowing the connection to proceed. We 1582 * also allow indexing on the zoneid. This is to allow 1583 * multiple shared stack zones to have the same tcp 1584 * connection tuple. In practice this only happens for 1585 * ipv6_loopback as it's the only local address which 1586 * doesn't have to be unique. 1587 */ 1588 connfp = &ipst->ips_ipcl_conn_fanout[ 1589 IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports, 1590 ipst)]; 1591 mutex_enter(&connfp->connf_lock); 1592 for (tconnp = connfp->connf_head; tconnp != NULL; 1593 tconnp = tconnp->conn_next) { 1594 /* NOTE: need to match zoneid. Bug in onnv-gate */ 1595 if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto, 1596 connp->conn_faddr_v6, connp->conn_laddr_v6, 1597 connp->conn_ports) && 1598 (tconnp->conn_bound_if == 0 || 1599 tconnp->conn_bound_if == ifindex) && 1600 IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) { 1601 /* Already have a conn. bail out */ 1602 mutex_exit(&connfp->connf_lock); 1603 return (EADDRINUSE); 1604 } 1605 } 1606 if (connp->conn_fanout != NULL) { 1607 /* 1608 * Probably a XTI/TLI application trying to do a 1609 * rebind. Let it happen. 1610 */ 1611 mutex_exit(&connfp->connf_lock); 1612 IPCL_HASH_REMOVE(connp); 1613 mutex_enter(&connfp->connf_lock); 1614 } 1615 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1616 mutex_exit(&connfp->connf_lock); 1617 break; 1618 1619 case IPPROTO_SCTP: 1620 IPCL_HASH_REMOVE(connp); 1621 ret = ipcl_sctp_hash_insert(connp, lport); 1622 break; 1623 1624 case IPPROTO_DCCP: 1625 cmn_err(CE_NOTE, "ipclassifier.c: ipcl_conn_insert_v6"); 1626 connfp = &ipst->ips_ipcl_dccp_conn_fanout[ 1627 IPCL_DCCP_CONN_HASH_V6(connp->conn_faddr_v6, 1628 connp->conn_ports, ipst)]; 1629 mutex_enter(&connfp->connf_lock); 1630 for (tconnp = connfp->connf_head; tconnp != NULL; 1631 tconnp = tconnp->conn_next) { 1632 /* NOTE: need to match zoneid. Bug in onnv-gate */ 1633 if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto, 1634 connp->conn_faddr_v6, connp->conn_laddr_v6, 1635 connp->conn_ports) && 1636 (tconnp->conn_bound_if == 0 || 1637 tconnp->conn_bound_if == ifindex) && 1638 IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) { 1639 /* Already have a conn. bail out */ 1640 mutex_exit(&connfp->connf_lock); 1641 return (EADDRINUSE); 1642 } 1643 } 1644 1645 /* XXX:DCCP XTI/TLI? */ 1646 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 1647 mutex_exit(&connfp->connf_lock); 1648 break; 1649 1650 default: 1651 if (is_system_labeled() && 1652 check_exempt_conflict_v6(connp, ipst)) 1653 return (EADDRINUSE); 1654 /* FALLTHROUGH */ 1655 case IPPROTO_UDP: 1656 if (protocol == IPPROTO_UDP) { 1657 connfp = &ipst->ips_ipcl_udp_fanout[ 1658 IPCL_UDP_HASH(lport, ipst)]; 1659 } else { 1660 connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 1661 } 1662 1663 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) { 1664 IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1665 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 1666 IPCL_HASH_INSERT_BOUND(connfp, connp); 1667 } else { 1668 IPCL_HASH_INSERT_WILDCARD(connfp, connp); 1669 } 1670 break; 1671 } 1672 1673 return (ret); 1674 } 1675 1676 /* 1677 * v4 packet classifying function. looks up the fanout table to 1678 * find the conn, the packet belongs to. returns the conn with 1679 * the reference held, null otherwise. 1680 * 1681 * If zoneid is ALL_ZONES, then the search rules described in the "Connection 1682 * Lookup" comment block are applied. Labels are also checked as described 1683 * above. If the packet is from the inside (looped back), and is from the same 1684 * zone, then label checks are omitted. 1685 */ 1686 conn_t * 1687 ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, 1688 ip_recv_attr_t *ira, ip_stack_t *ipst) 1689 { 1690 ipha_t *ipha; 1691 connf_t *connfp, *bind_connfp; 1692 uint16_t lport; 1693 uint16_t fport; 1694 uint32_t ports; 1695 conn_t *connp; 1696 uint16_t *up; 1697 zoneid_t zoneid = ira->ira_zoneid; 1698 1699 ipha = (ipha_t *)mp->b_rptr; 1700 up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 1701 1702 switch (protocol) { 1703 case IPPROTO_TCP: 1704 ports = *(uint32_t *)up; 1705 connfp = 1706 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, 1707 ports, ipst)]; 1708 mutex_enter(&connfp->connf_lock); 1709 for (connp = connfp->connf_head; connp != NULL; 1710 connp = connp->conn_next) { 1711 if (IPCL_CONN_MATCH(connp, protocol, 1712 ipha->ipha_src, ipha->ipha_dst, ports) && 1713 (connp->conn_zoneid == zoneid || 1714 connp->conn_allzones || 1715 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1716 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1717 (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1718 break; 1719 } 1720 1721 if (connp != NULL) { 1722 /* 1723 * We have a fully-bound TCP connection. 1724 * 1725 * For labeled systems, there's no need to check the 1726 * label here. It's known to be good as we checked 1727 * before allowing the connection to become bound. 1728 */ 1729 CONN_INC_REF(connp); 1730 mutex_exit(&connfp->connf_lock); 1731 return (connp); 1732 } 1733 1734 mutex_exit(&connfp->connf_lock); 1735 lport = up[1]; 1736 bind_connfp = 1737 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 1738 mutex_enter(&bind_connfp->connf_lock); 1739 for (connp = bind_connfp->connf_head; connp != NULL; 1740 connp = connp->conn_next) { 1741 if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst, 1742 lport) && 1743 (connp->conn_zoneid == zoneid || 1744 connp->conn_allzones || 1745 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1746 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1747 (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1748 break; 1749 } 1750 1751 /* 1752 * If the matching connection is SLP on a private address, then 1753 * the label on the packet must match the local zone's label. 1754 * Otherwise, it must be in the label range defined by tnrh. 1755 * This is ensured by tsol_receive_local. 1756 * 1757 * Note that we don't check tsol_receive_local for 1758 * the connected case. 1759 */ 1760 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 1761 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1762 ira, connp)) { 1763 DTRACE_PROBE3(tx__ip__log__info__classify__tcp, 1764 char *, "connp(1) could not receive mp(2)", 1765 conn_t *, connp, mblk_t *, mp); 1766 connp = NULL; 1767 } 1768 1769 if (connp != NULL) { 1770 /* Have a listener at least */ 1771 CONN_INC_REF(connp); 1772 mutex_exit(&bind_connfp->connf_lock); 1773 return (connp); 1774 } 1775 1776 mutex_exit(&bind_connfp->connf_lock); 1777 break; 1778 1779 case IPPROTO_UDP: 1780 lport = up[1]; 1781 fport = up[0]; 1782 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 1783 mutex_enter(&connfp->connf_lock); 1784 for (connp = connfp->connf_head; connp != NULL; 1785 connp = connp->conn_next) { 1786 if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 1787 fport, ipha->ipha_src) && 1788 (connp->conn_zoneid == zoneid || 1789 connp->conn_allzones || 1790 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1791 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE)))) 1792 break; 1793 } 1794 1795 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 1796 !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1797 ira, connp)) { 1798 DTRACE_PROBE3(tx__ip__log__info__classify__udp, 1799 char *, "connp(1) could not receive mp(2)", 1800 conn_t *, connp, mblk_t *, mp); 1801 connp = NULL; 1802 } 1803 1804 if (connp != NULL) { 1805 CONN_INC_REF(connp); 1806 mutex_exit(&connfp->connf_lock); 1807 return (connp); 1808 } 1809 1810 /* 1811 * We shouldn't come here for multicast/broadcast packets 1812 */ 1813 mutex_exit(&connfp->connf_lock); 1814 1815 break; 1816 1817 case IPPROTO_DCCP: 1818 ports = *(uint32_t *)up; 1819 1820 /* 1821 * Search for fully-bound connection. 1822 */ 1823 connfp = &ipst->ips_ipcl_dccp_conn_fanout[IPCL_DCCP_CONN_HASH( 1824 ipha->ipha_src, ports, ipst)]; 1825 mutex_enter(&connfp->connf_lock); 1826 for (connp = connfp->connf_head; connp != NULL; 1827 connp = connp->conn_next) { 1828 /* XXX:DCCP */ 1829 if (IPCL_CONN_MATCH(connp, protocol, 1830 ipha->ipha_src, ipha->ipha_dst, ports)) { 1831 /* XXX */ 1832 cmn_err(CE_NOTE, "ipclassifier.c: fully bound connection found"); 1833 break; 1834 } 1835 } 1836 1837 if (connp != NULL) { 1838 /* 1839 * We have a fully-bound DCCP connection. 1840 */ 1841 CONN_INC_REF(connp); 1842 mutex_exit(&connfp->connf_lock); 1843 return (connp); 1844 } 1845 1846 mutex_exit(&connfp->connf_lock); 1847 lport = up[1]; 1848 1849 /* 1850 * Fully-bound connection was not found, search for listener. 1851 */ 1852 bind_connfp = &ipst->ips_ipcl_dccp_bind_fanout[ 1853 IPCL_DCCP_BIND_HASH(lport, ipst)]; 1854 mutex_enter(&bind_connfp->connf_lock); 1855 for (connp = bind_connfp->connf_head; connp != NULL; 1856 connp = connp->conn_next) { 1857 if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst, 1858 lport) && 1859 (connp->conn_zoneid == zoneid || 1860 connp->conn_allzones || 1861 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1862 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1863 (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1864 break; 1865 } 1866 1867 if (connp != NULL) { 1868 cmn_err(CE_NOTE, "ipclassifier.c: half-bound bind listener"); 1869 /* Have a listener at least */ 1870 CONN_INC_REF(connp); 1871 mutex_exit(&bind_connfp->connf_lock); 1872 return (connp); 1873 } 1874 1875 mutex_exit(&bind_connfp->connf_lock); 1876 break; 1877 1878 case IPPROTO_ENCAP: 1879 case IPPROTO_IPV6: 1880 return (ipcl_iptun_classify_v4(&ipha->ipha_src, 1881 &ipha->ipha_dst, ipst)); 1882 } 1883 1884 return (NULL); 1885 } 1886 1887 conn_t * 1888 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, 1889 ip_recv_attr_t *ira, ip_stack_t *ipst) 1890 { 1891 ip6_t *ip6h; 1892 connf_t *connfp, *bind_connfp; 1893 uint16_t lport; 1894 uint16_t fport; 1895 tcpha_t *tcpha; 1896 uint32_t ports; 1897 conn_t *connp; 1898 uint16_t *up; 1899 zoneid_t zoneid = ira->ira_zoneid; 1900 1901 ip6h = (ip6_t *)mp->b_rptr; 1902 1903 switch (protocol) { 1904 case IPPROTO_TCP: 1905 tcpha = (tcpha_t *)&mp->b_rptr[hdr_len]; 1906 up = &tcpha->tha_lport; 1907 ports = *(uint32_t *)up; 1908 1909 connfp = 1910 &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, 1911 ports, ipst)]; 1912 mutex_enter(&connfp->connf_lock); 1913 for (connp = connfp->connf_head; connp != NULL; 1914 connp = connp->conn_next) { 1915 if (IPCL_CONN_MATCH_V6(connp, protocol, 1916 ip6h->ip6_src, ip6h->ip6_dst, ports) && 1917 (connp->conn_zoneid == zoneid || 1918 connp->conn_allzones || 1919 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1920 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1921 (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1922 break; 1923 } 1924 1925 if (connp != NULL) { 1926 /* 1927 * We have a fully-bound TCP connection. 1928 * 1929 * For labeled systems, there's no need to check the 1930 * label here. It's known to be good as we checked 1931 * before allowing the connection to become bound. 1932 */ 1933 CONN_INC_REF(connp); 1934 mutex_exit(&connfp->connf_lock); 1935 return (connp); 1936 } 1937 1938 mutex_exit(&connfp->connf_lock); 1939 1940 lport = up[1]; 1941 bind_connfp = 1942 &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 1943 mutex_enter(&bind_connfp->connf_lock); 1944 for (connp = bind_connfp->connf_head; connp != NULL; 1945 connp = connp->conn_next) { 1946 if (IPCL_BIND_MATCH_V6(connp, protocol, 1947 ip6h->ip6_dst, lport) && 1948 (connp->conn_zoneid == zoneid || 1949 connp->conn_allzones || 1950 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1951 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1952 (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1953 break; 1954 } 1955 1956 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 1957 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 1958 ira, connp)) { 1959 DTRACE_PROBE3(tx__ip__log__info__classify__tcp6, 1960 char *, "connp(1) could not receive mp(2)", 1961 conn_t *, connp, mblk_t *, mp); 1962 connp = NULL; 1963 } 1964 1965 if (connp != NULL) { 1966 /* Have a listner at least */ 1967 CONN_INC_REF(connp); 1968 mutex_exit(&bind_connfp->connf_lock); 1969 return (connp); 1970 } 1971 1972 mutex_exit(&bind_connfp->connf_lock); 1973 break; 1974 1975 case IPPROTO_UDP: 1976 up = (uint16_t *)&mp->b_rptr[hdr_len]; 1977 lport = up[1]; 1978 fport = up[0]; 1979 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 1980 mutex_enter(&connfp->connf_lock); 1981 for (connp = connfp->connf_head; connp != NULL; 1982 connp = connp->conn_next) { 1983 if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 1984 fport, ip6h->ip6_src) && 1985 (connp->conn_zoneid == zoneid || 1986 connp->conn_allzones || 1987 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1988 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1989 (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 1990 break; 1991 } 1992 1993 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 1994 !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 1995 ira, connp)) { 1996 DTRACE_PROBE3(tx__ip__log__info__classify__udp6, 1997 char *, "connp(1) could not receive mp(2)", 1998 conn_t *, connp, mblk_t *, mp); 1999 connp = NULL; 2000 } 2001 2002 if (connp != NULL) { 2003 CONN_INC_REF(connp); 2004 mutex_exit(&connfp->connf_lock); 2005 return (connp); 2006 } 2007 2008 /* 2009 * We shouldn't come here for multicast/broadcast packets 2010 */ 2011 mutex_exit(&connfp->connf_lock); 2012 break; 2013 case IPPROTO_ENCAP: 2014 case IPPROTO_IPV6: 2015 return (ipcl_iptun_classify_v6(&ip6h->ip6_src, 2016 &ip6h->ip6_dst, ipst)); 2017 } 2018 2019 return (NULL); 2020 } 2021 2022 /* 2023 * wrapper around ipcl_classify_(v4,v6) routines. 2024 */ 2025 conn_t * 2026 ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst) 2027 { 2028 if (ira->ira_flags & IRAF_IS_IPV4) { 2029 return (ipcl_classify_v4(mp, ira->ira_protocol, 2030 ira->ira_ip_hdr_length, ira, ipst)); 2031 } else { 2032 return (ipcl_classify_v6(mp, ira->ira_protocol, 2033 ira->ira_ip_hdr_length, ira, ipst)); 2034 } 2035 } 2036 2037 /* 2038 * Only used to classify SCTP RAW sockets 2039 */ 2040 conn_t * 2041 ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports, 2042 ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst) 2043 { 2044 connf_t *connfp; 2045 conn_t *connp; 2046 in_port_t lport; 2047 int ipversion; 2048 const void *dst; 2049 zoneid_t zoneid = ira->ira_zoneid; 2050 2051 lport = ((uint16_t *)&ports)[1]; 2052 if (ira->ira_flags & IRAF_IS_IPV4) { 2053 dst = (const void *)&ipha->ipha_dst; 2054 ipversion = IPV4_VERSION; 2055 } else { 2056 dst = (const void *)&ip6h->ip6_dst; 2057 ipversion = IPV6_VERSION; 2058 } 2059 2060 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 2061 mutex_enter(&connfp->connf_lock); 2062 for (connp = connfp->connf_head; connp != NULL; 2063 connp = connp->conn_next) { 2064 /* We don't allow v4 fallback for v6 raw socket. */ 2065 if (ipversion != connp->conn_ipversion) 2066 continue; 2067 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 2068 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 2069 if (ipversion == IPV4_VERSION) { 2070 if (!IPCL_CONN_MATCH(connp, protocol, 2071 ipha->ipha_src, ipha->ipha_dst, ports)) 2072 continue; 2073 } else { 2074 if (!IPCL_CONN_MATCH_V6(connp, protocol, 2075 ip6h->ip6_src, ip6h->ip6_dst, ports)) 2076 continue; 2077 } 2078 } else { 2079 if (ipversion == IPV4_VERSION) { 2080 if (!IPCL_BIND_MATCH(connp, protocol, 2081 ipha->ipha_dst, lport)) 2082 continue; 2083 } else { 2084 if (!IPCL_BIND_MATCH_V6(connp, protocol, 2085 ip6h->ip6_dst, lport)) 2086 continue; 2087 } 2088 } 2089 2090 if (connp->conn_zoneid == zoneid || 2091 connp->conn_allzones || 2092 ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 2093 (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 2094 (ira->ira_flags & IRAF_TX_SHARED_ADDR))) 2095 break; 2096 } 2097 2098 if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 2099 !tsol_receive_local(mp, dst, ipversion, ira, connp)) { 2100 DTRACE_PROBE3(tx__ip__log__info__classify__rawip, 2101 char *, "connp(1) could not receive mp(2)", 2102 conn_t *, connp, mblk_t *, mp); 2103 connp = NULL; 2104 } 2105 2106 if (connp != NULL) 2107 goto found; 2108 mutex_exit(&connfp->connf_lock); 2109 2110 /* Try to look for a wildcard SCTP RAW socket match. */ 2111 connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)]; 2112 mutex_enter(&connfp->connf_lock); 2113 for (connp = connfp->connf_head; connp != NULL; 2114 connp = connp->conn_next) { 2115 /* We don't allow v4 fallback for v6 raw socket. */ 2116 if (ipversion != connp->conn_ipversion) 2117 continue; 2118 if (!IPCL_ZONE_MATCH(connp, zoneid)) 2119 continue; 2120 2121 if (ipversion == IPV4_VERSION) { 2122 if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst)) 2123 break; 2124 } else { 2125 if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) { 2126 break; 2127 } 2128 } 2129 } 2130 2131 if (connp != NULL) 2132 goto found; 2133 2134 mutex_exit(&connfp->connf_lock); 2135 return (NULL); 2136 2137 found: 2138 ASSERT(connp != NULL); 2139 CONN_INC_REF(connp); 2140 mutex_exit(&connfp->connf_lock); 2141 return (connp); 2142 } 2143 2144 /* ARGSUSED */ 2145 static int 2146 tcp_conn_constructor(void *buf, void *cdrarg, int kmflags) 2147 { 2148 itc_t *itc = (itc_t *)buf; 2149 conn_t *connp = &itc->itc_conn; 2150 tcp_t *tcp = (tcp_t *)&itc[1]; 2151 2152 bzero(connp, sizeof (conn_t)); 2153 bzero(tcp, sizeof (tcp_t)); 2154 2155 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2156 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2157 cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL); 2158 tcp->tcp_timercache = tcp_timermp_alloc(kmflags); 2159 if (tcp->tcp_timercache == NULL) 2160 return (ENOMEM); 2161 connp->conn_tcp = tcp; 2162 connp->conn_flags = IPCL_TCPCONN; 2163 connp->conn_proto = IPPROTO_TCP; 2164 tcp->tcp_connp = connp; 2165 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2166 2167 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2168 if (connp->conn_ixa == NULL) { 2169 tcp_timermp_free(tcp); 2170 return (ENOMEM); 2171 } 2172 connp->conn_ixa->ixa_refcnt = 1; 2173 connp->conn_ixa->ixa_protocol = connp->conn_proto; 2174 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2175 return (0); 2176 } 2177 2178 /* ARGSUSED */ 2179 static void 2180 tcp_conn_destructor(void *buf, void *cdrarg) 2181 { 2182 itc_t *itc = (itc_t *)buf; 2183 conn_t *connp = &itc->itc_conn; 2184 tcp_t *tcp = (tcp_t *)&itc[1]; 2185 2186 ASSERT(connp->conn_flags & IPCL_TCPCONN); 2187 ASSERT(tcp->tcp_connp == connp); 2188 ASSERT(connp->conn_tcp == tcp); 2189 tcp_timermp_free(tcp); 2190 mutex_destroy(&connp->conn_lock); 2191 cv_destroy(&connp->conn_cv); 2192 cv_destroy(&connp->conn_sq_cv); 2193 rw_destroy(&connp->conn_ilg_lock); 2194 2195 /* Can be NULL if constructor failed */ 2196 if (connp->conn_ixa != NULL) { 2197 ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2198 ASSERT(connp->conn_ixa->ixa_ire == NULL); 2199 ASSERT(connp->conn_ixa->ixa_nce == NULL); 2200 ixa_refrele(connp->conn_ixa); 2201 } 2202 } 2203 2204 /* ARGSUSED */ 2205 static int 2206 ip_conn_constructor(void *buf, void *cdrarg, int kmflags) 2207 { 2208 itc_t *itc = (itc_t *)buf; 2209 conn_t *connp = &itc->itc_conn; 2210 2211 bzero(connp, sizeof (conn_t)); 2212 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2213 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2214 connp->conn_flags = IPCL_IPCCONN; 2215 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2216 2217 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2218 if (connp->conn_ixa == NULL) 2219 return (ENOMEM); 2220 connp->conn_ixa->ixa_refcnt = 1; 2221 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2222 return (0); 2223 } 2224 2225 /* ARGSUSED */ 2226 static void 2227 ip_conn_destructor(void *buf, void *cdrarg) 2228 { 2229 itc_t *itc = (itc_t *)buf; 2230 conn_t *connp = &itc->itc_conn; 2231 2232 ASSERT(connp->conn_flags & IPCL_IPCCONN); 2233 ASSERT(connp->conn_priv == NULL); 2234 mutex_destroy(&connp->conn_lock); 2235 cv_destroy(&connp->conn_cv); 2236 rw_destroy(&connp->conn_ilg_lock); 2237 2238 /* Can be NULL if constructor failed */ 2239 if (connp->conn_ixa != NULL) { 2240 ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2241 ASSERT(connp->conn_ixa->ixa_ire == NULL); 2242 ASSERT(connp->conn_ixa->ixa_nce == NULL); 2243 ixa_refrele(connp->conn_ixa); 2244 } 2245 } 2246 2247 /* ARGSUSED */ 2248 static int 2249 udp_conn_constructor(void *buf, void *cdrarg, int kmflags) 2250 { 2251 itc_t *itc = (itc_t *)buf; 2252 conn_t *connp = &itc->itc_conn; 2253 udp_t *udp = (udp_t *)&itc[1]; 2254 2255 bzero(connp, sizeof (conn_t)); 2256 bzero(udp, sizeof (udp_t)); 2257 2258 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2259 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2260 connp->conn_udp = udp; 2261 connp->conn_flags = IPCL_UDPCONN; 2262 connp->conn_proto = IPPROTO_UDP; 2263 udp->udp_connp = connp; 2264 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2265 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2266 if (connp->conn_ixa == NULL) 2267 return (ENOMEM); 2268 connp->conn_ixa->ixa_refcnt = 1; 2269 connp->conn_ixa->ixa_protocol = connp->conn_proto; 2270 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2271 return (0); 2272 } 2273 2274 /* ARGSUSED */ 2275 static void 2276 udp_conn_destructor(void *buf, void *cdrarg) 2277 { 2278 itc_t *itc = (itc_t *)buf; 2279 conn_t *connp = &itc->itc_conn; 2280 udp_t *udp = (udp_t *)&itc[1]; 2281 2282 ASSERT(connp->conn_flags & IPCL_UDPCONN); 2283 ASSERT(udp->udp_connp == connp); 2284 ASSERT(connp->conn_udp == udp); 2285 mutex_destroy(&connp->conn_lock); 2286 cv_destroy(&connp->conn_cv); 2287 rw_destroy(&connp->conn_ilg_lock); 2288 2289 /* Can be NULL if constructor failed */ 2290 if (connp->conn_ixa != NULL) { 2291 ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2292 ASSERT(connp->conn_ixa->ixa_ire == NULL); 2293 ASSERT(connp->conn_ixa->ixa_nce == NULL); 2294 ixa_refrele(connp->conn_ixa); 2295 } 2296 } 2297 2298 /* ARGSUSED */ 2299 static int 2300 rawip_conn_constructor(void *buf, void *cdrarg, int kmflags) 2301 { 2302 itc_t *itc = (itc_t *)buf; 2303 conn_t *connp = &itc->itc_conn; 2304 icmp_t *icmp = (icmp_t *)&itc[1]; 2305 2306 bzero(connp, sizeof (conn_t)); 2307 bzero(icmp, sizeof (icmp_t)); 2308 2309 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2310 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2311 connp->conn_icmp = icmp; 2312 connp->conn_flags = IPCL_RAWIPCONN; 2313 connp->conn_proto = IPPROTO_ICMP; 2314 icmp->icmp_connp = connp; 2315 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2316 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2317 if (connp->conn_ixa == NULL) 2318 return (ENOMEM); 2319 connp->conn_ixa->ixa_refcnt = 1; 2320 connp->conn_ixa->ixa_protocol = connp->conn_proto; 2321 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2322 return (0); 2323 } 2324 2325 /* ARGSUSED */ 2326 static void 2327 rawip_conn_destructor(void *buf, void *cdrarg) 2328 { 2329 itc_t *itc = (itc_t *)buf; 2330 conn_t *connp = &itc->itc_conn; 2331 icmp_t *icmp = (icmp_t *)&itc[1]; 2332 2333 ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 2334 ASSERT(icmp->icmp_connp == connp); 2335 ASSERT(connp->conn_icmp == icmp); 2336 mutex_destroy(&connp->conn_lock); 2337 cv_destroy(&connp->conn_cv); 2338 rw_destroy(&connp->conn_ilg_lock); 2339 2340 /* Can be NULL if constructor failed */ 2341 if (connp->conn_ixa != NULL) { 2342 ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2343 ASSERT(connp->conn_ixa->ixa_ire == NULL); 2344 ASSERT(connp->conn_ixa->ixa_nce == NULL); 2345 ixa_refrele(connp->conn_ixa); 2346 } 2347 } 2348 2349 /* ARGSUSED */ 2350 static int 2351 rts_conn_constructor(void *buf, void *cdrarg, int kmflags) 2352 { 2353 itc_t *itc = (itc_t *)buf; 2354 conn_t *connp = &itc->itc_conn; 2355 rts_t *rts = (rts_t *)&itc[1]; 2356 2357 bzero(connp, sizeof (conn_t)); 2358 bzero(rts, sizeof (rts_t)); 2359 2360 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2361 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2362 connp->conn_rts = rts; 2363 connp->conn_flags = IPCL_RTSCONN; 2364 rts->rts_connp = connp; 2365 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2366 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2367 if (connp->conn_ixa == NULL) 2368 return (ENOMEM); 2369 connp->conn_ixa->ixa_refcnt = 1; 2370 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2371 return (0); 2372 } 2373 2374 /* ARGSUSED */ 2375 static void 2376 rts_conn_destructor(void *buf, void *cdrarg) 2377 { 2378 itc_t *itc = (itc_t *)buf; 2379 conn_t *connp = &itc->itc_conn; 2380 rts_t *rts = (rts_t *)&itc[1]; 2381 2382 ASSERT(connp->conn_flags & IPCL_RTSCONN); 2383 ASSERT(rts->rts_connp == connp); 2384 ASSERT(connp->conn_rts == rts); 2385 mutex_destroy(&connp->conn_lock); 2386 cv_destroy(&connp->conn_cv); 2387 rw_destroy(&connp->conn_ilg_lock); 2388 2389 /* Can be NULL if constructor failed */ 2390 if (connp->conn_ixa != NULL) { 2391 ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2392 ASSERT(connp->conn_ixa->ixa_ire == NULL); 2393 ASSERT(connp->conn_ixa->ixa_nce == NULL); 2394 ixa_refrele(connp->conn_ixa); 2395 } 2396 } 2397 2398 /* ARGSUSED */ 2399 static int 2400 dccp_conn_constructor(void *buf, void *cdrarg, int kmflags) 2401 { 2402 itc_t *itc = (itc_t *)buf; 2403 conn_t *connp = &itc->itc_conn; 2404 dccp_t *dccp = (dccp_t *)&itc[1]; 2405 2406 bzero(connp, sizeof (conn_t)); 2407 bzero(dccp, sizeof (dccp_t)); 2408 2409 mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2410 cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2411 rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2412 2413 dccp->dccp_timercache = dccp_timermp_alloc(kmflags); 2414 if (dccp->dccp_timercache == NULL) { 2415 return (ENOMEM); 2416 } 2417 2418 connp->conn_dccp = dccp; 2419 connp->conn_flags = IPCL_DCCPCONN; 2420 connp->conn_proto = IPPROTO_DCCP; 2421 dccp->dccp_connp = connp; 2422 2423 connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2424 if (connp->conn_ixa == NULL) { 2425 return (NULL); 2426 } 2427 2428 connp->conn_ixa->ixa_refcnt = 1; 2429 connp->conn_ixa->ixa_protocol = connp->conn_proto; 2430 connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2431 2432 return (0); 2433 } 2434 2435 /* ARGSUSED */ 2436 static void 2437 dccp_conn_destructor(void *buf, void *cdrarg) 2438 { 2439 itc_t *itc = (itc_t *)buf; 2440 conn_t *connp = &itc->itc_conn; 2441 dccp_t *dccp = (dccp_t *)&itc[1]; 2442 2443 ASSERT(connp->conn_flags & IPCL_DCCPCONN); 2444 ASSERT(dccp->dccp_connp == connp); 2445 ASSERT(connp->conn_dccp == dccp); 2446 2447 dccp_timermp_free(dccp); 2448 2449 mutex_destroy(&connp->conn_lock); 2450 cv_destroy(&connp->conn_cv); 2451 rw_destroy(&connp->conn_ilg_lock); 2452 2453 if (connp->conn_ixa != NULL) { 2454 ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2455 ASSERT(connp->conn_ixa->ixa_ire == NULL); 2456 ASSERT(connp->conn_ixa->ixa_nce == NULL); 2457 2458 ixa_refrele(connp->conn_ixa); 2459 } 2460 } 2461 2462 /* 2463 * Called as part of ipcl_conn_destroy to assert and clear any pointers 2464 * in the conn_t. 2465 * 2466 * Below we list all the pointers in the conn_t as a documentation aid. 2467 * The ones that we can not ASSERT to be NULL are #ifdef'ed out. 2468 * If you add any pointers to the conn_t please add an ASSERT here 2469 * and #ifdef it out if it can't be actually asserted to be NULL. 2470 * In any case, we bzero most of the conn_t at the end of the function. 2471 */ 2472 void 2473 ipcl_conn_cleanup(conn_t *connp) 2474 { 2475 ip_xmit_attr_t *ixa; 2476 2477 ASSERT(connp->conn_latch == NULL); 2478 ASSERT(connp->conn_latch_in_policy == NULL); 2479 ASSERT(connp->conn_latch_in_action == NULL); 2480 #ifdef notdef 2481 ASSERT(connp->conn_rq == NULL); 2482 ASSERT(connp->conn_wq == NULL); 2483 #endif 2484 ASSERT(connp->conn_cred == NULL); 2485 ASSERT(connp->conn_g_fanout == NULL); 2486 ASSERT(connp->conn_g_next == NULL); 2487 ASSERT(connp->conn_g_prev == NULL); 2488 ASSERT(connp->conn_policy == NULL); 2489 ASSERT(connp->conn_fanout == NULL); 2490 ASSERT(connp->conn_next == NULL); 2491 ASSERT(connp->conn_prev == NULL); 2492 ASSERT(connp->conn_oper_pending_ill == NULL); 2493 ASSERT(connp->conn_ilg == NULL); 2494 ASSERT(connp->conn_drain_next == NULL); 2495 ASSERT(connp->conn_drain_prev == NULL); 2496 #ifdef notdef 2497 /* conn_idl is not cleared when removed from idl list */ 2498 ASSERT(connp->conn_idl == NULL); 2499 #endif 2500 ASSERT(connp->conn_ipsec_opt_mp == NULL); 2501 #ifdef notdef 2502 /* conn_netstack is cleared by the caller; needed by ixa_cleanup */ 2503 ASSERT(connp->conn_netstack == NULL); 2504 #endif 2505 2506 ASSERT(connp->conn_helper_info == NULL); 2507 ASSERT(connp->conn_ixa != NULL); 2508 ixa = connp->conn_ixa; 2509 ASSERT(ixa->ixa_refcnt == 1); 2510 /* Need to preserve ixa_protocol */ 2511 ixa_cleanup(ixa); 2512 ixa->ixa_flags = 0; 2513 2514 /* Clear out the conn_t fields that are not preserved */ 2515 bzero(&connp->conn_start_clr, 2516 sizeof (conn_t) - 2517 ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp)); 2518 } 2519 2520 /* 2521 * All conns are inserted in a global multi-list for the benefit of 2522 * walkers. The walk is guaranteed to walk all open conns at the time 2523 * of the start of the walk exactly once. This property is needed to 2524 * achieve some cleanups during unplumb of interfaces. This is achieved 2525 * as follows. 2526 * 2527 * ipcl_conn_create and ipcl_conn_destroy are the only functions that 2528 * call the insert and delete functions below at creation and deletion 2529 * time respectively. The conn never moves or changes its position in this 2530 * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 2531 * won't increase due to walkers, once the conn deletion has started. Note 2532 * that we can't remove the conn from the global list and then wait for 2533 * the refcnt to drop to zero, since walkers would then see a truncated 2534 * list. CONN_INCIPIENT ensures that walkers don't start looking at 2535 * conns until ip_open is ready to make them globally visible. 2536 * The global round robin multi-list locks are held only to get the 2537 * next member/insertion/deletion and contention should be negligible 2538 * if the multi-list is much greater than the number of cpus. 2539 */ 2540 void 2541 ipcl_globalhash_insert(conn_t *connp) 2542 { 2543 int index; 2544 struct connf_s *connfp; 2545 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2546 2547 /* 2548 * No need for atomic here. Approximate even distribution 2549 * in the global lists is sufficient. 2550 */ 2551 ipst->ips_conn_g_index++; 2552 index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1); 2553 2554 connp->conn_g_prev = NULL; 2555 /* 2556 * Mark as INCIPIENT, so that walkers will ignore this 2557 * for now, till ip_open is ready to make it visible globally. 2558 */ 2559 connp->conn_state_flags |= CONN_INCIPIENT; 2560 2561 connfp = &ipst->ips_ipcl_globalhash_fanout[index]; 2562 /* Insert at the head of the list */ 2563 mutex_enter(&connfp->connf_lock); 2564 connp->conn_g_next = connfp->connf_head; 2565 if (connp->conn_g_next != NULL) 2566 connp->conn_g_next->conn_g_prev = connp; 2567 connfp->connf_head = connp; 2568 2569 /* The fanout bucket this conn points to */ 2570 connp->conn_g_fanout = connfp; 2571 2572 mutex_exit(&connfp->connf_lock); 2573 } 2574 2575 void 2576 ipcl_globalhash_remove(conn_t *connp) 2577 { 2578 struct connf_s *connfp; 2579 2580 /* 2581 * We were never inserted in the global multi list. 2582 * IPCL_NONE variety is never inserted in the global multilist 2583 * since it is presumed to not need any cleanup and is transient. 2584 */ 2585 if (connp->conn_g_fanout == NULL) 2586 return; 2587 2588 connfp = connp->conn_g_fanout; 2589 mutex_enter(&connfp->connf_lock); 2590 if (connp->conn_g_prev != NULL) 2591 connp->conn_g_prev->conn_g_next = connp->conn_g_next; 2592 else 2593 connfp->connf_head = connp->conn_g_next; 2594 if (connp->conn_g_next != NULL) 2595 connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 2596 mutex_exit(&connfp->connf_lock); 2597 2598 /* Better to stumble on a null pointer than to corrupt memory */ 2599 connp->conn_g_next = NULL; 2600 connp->conn_g_prev = NULL; 2601 connp->conn_g_fanout = NULL; 2602 } 2603 2604 /* 2605 * Walk the list of all conn_t's in the system, calling the function provided 2606 * With the specified argument for each. 2607 * Applies to both IPv4 and IPv6. 2608 * 2609 * CONNs may hold pointers to ills (conn_dhcpinit_ill and 2610 * conn_oper_pending_ill). To guard against stale pointers 2611 * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 2612 * unplumbed or removed. New conn_t's that are created while we are walking 2613 * may be missed by this walk, because they are not necessarily inserted 2614 * at the tail of the list. They are new conn_t's and thus don't have any 2615 * stale pointers. The CONN_CLOSING flag ensures that no new reference 2616 * is created to the struct that is going away. 2617 */ 2618 void 2619 ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst) 2620 { 2621 int i; 2622 conn_t *connp; 2623 conn_t *prev_connp; 2624 2625 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2626 mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2627 prev_connp = NULL; 2628 connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head; 2629 while (connp != NULL) { 2630 mutex_enter(&connp->conn_lock); 2631 if (connp->conn_state_flags & 2632 (CONN_CONDEMNED | CONN_INCIPIENT)) { 2633 mutex_exit(&connp->conn_lock); 2634 connp = connp->conn_g_next; 2635 continue; 2636 } 2637 CONN_INC_REF_LOCKED(connp); 2638 mutex_exit(&connp->conn_lock); 2639 mutex_exit( 2640 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2641 (*func)(connp, arg); 2642 if (prev_connp != NULL) 2643 CONN_DEC_REF(prev_connp); 2644 mutex_enter( 2645 &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2646 prev_connp = connp; 2647 connp = connp->conn_g_next; 2648 } 2649 mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 2650 if (prev_connp != NULL) 2651 CONN_DEC_REF(prev_connp); 2652 } 2653 } 2654 2655 /* 2656 * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 2657 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 2658 * held; caller must call CONN_DEC_REF. Only checks for connected entries 2659 * (peer tcp in ESTABLISHED state). 2660 */ 2661 conn_t * 2662 ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha, 2663 ip_stack_t *ipst) 2664 { 2665 uint32_t ports; 2666 uint16_t *pports = (uint16_t *)&ports; 2667 connf_t *connfp; 2668 conn_t *tconnp; 2669 boolean_t zone_chk; 2670 2671 /* 2672 * If either the source of destination address is loopback, then 2673 * both endpoints must be in the same Zone. Otherwise, both of 2674 * the addresses are system-wide unique (tcp is in ESTABLISHED 2675 * state) and the endpoints may reside in different Zones. 2676 */ 2677 zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 2678 ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 2679 2680 pports[0] = tcpha->tha_fport; 2681 pports[1] = tcpha->tha_lport; 2682 2683 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2684 ports, ipst)]; 2685 2686 mutex_enter(&connfp->connf_lock); 2687 for (tconnp = connfp->connf_head; tconnp != NULL; 2688 tconnp = tconnp->conn_next) { 2689 2690 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 2691 ipha->ipha_dst, ipha->ipha_src, ports) && 2692 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 2693 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 2694 2695 ASSERT(tconnp != connp); 2696 CONN_INC_REF(tconnp); 2697 mutex_exit(&connfp->connf_lock); 2698 return (tconnp); 2699 } 2700 } 2701 mutex_exit(&connfp->connf_lock); 2702 return (NULL); 2703 } 2704 2705 /* 2706 * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 2707 * the {src, dst, lport, fport} quadruplet. Returns with conn reference 2708 * held; caller must call CONN_DEC_REF. Only checks for connected entries 2709 * (peer tcp in ESTABLISHED state). 2710 */ 2711 conn_t * 2712 ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha, 2713 ip_stack_t *ipst) 2714 { 2715 uint32_t ports; 2716 uint16_t *pports = (uint16_t *)&ports; 2717 connf_t *connfp; 2718 conn_t *tconnp; 2719 boolean_t zone_chk; 2720 2721 /* 2722 * If either the source of destination address is loopback, then 2723 * both endpoints must be in the same Zone. Otherwise, both of 2724 * the addresses are system-wide unique (tcp is in ESTABLISHED 2725 * state) and the endpoints may reside in different Zones. We 2726 * don't do Zone check for link local address(es) because the 2727 * current Zone implementation treats each link local address as 2728 * being unique per system node, i.e. they belong to global Zone. 2729 */ 2730 zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 2731 IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 2732 2733 pports[0] = tcpha->tha_fport; 2734 pports[1] = tcpha->tha_lport; 2735 2736 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2737 ports, ipst)]; 2738 2739 mutex_enter(&connfp->connf_lock); 2740 for (tconnp = connfp->connf_head; tconnp != NULL; 2741 tconnp = tconnp->conn_next) { 2742 2743 /* We skip conn_bound_if check here as this is loopback tcp */ 2744 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 2745 ip6h->ip6_dst, ip6h->ip6_src, ports) && 2746 tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 2747 (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 2748 2749 ASSERT(tconnp != connp); 2750 CONN_INC_REF(tconnp); 2751 mutex_exit(&connfp->connf_lock); 2752 return (tconnp); 2753 } 2754 } 2755 mutex_exit(&connfp->connf_lock); 2756 return (NULL); 2757 } 2758 2759 /* 2760 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 2761 * Returns with conn reference held. Caller must call CONN_DEC_REF. 2762 * Only checks for connected entries i.e. no INADDR_ANY checks. 2763 */ 2764 conn_t * 2765 ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state, 2766 ip_stack_t *ipst) 2767 { 2768 uint32_t ports; 2769 uint16_t *pports; 2770 connf_t *connfp; 2771 conn_t *tconnp; 2772 2773 pports = (uint16_t *)&ports; 2774 pports[0] = tcpha->tha_fport; 2775 pports[1] = tcpha->tha_lport; 2776 2777 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2778 ports, ipst)]; 2779 2780 mutex_enter(&connfp->connf_lock); 2781 for (tconnp = connfp->connf_head; tconnp != NULL; 2782 tconnp = tconnp->conn_next) { 2783 2784 if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 2785 ipha->ipha_dst, ipha->ipha_src, ports) && 2786 tconnp->conn_tcp->tcp_state >= min_state) { 2787 2788 CONN_INC_REF(tconnp); 2789 mutex_exit(&connfp->connf_lock); 2790 return (tconnp); 2791 } 2792 } 2793 mutex_exit(&connfp->connf_lock); 2794 return (NULL); 2795 } 2796 2797 /* 2798 * Find an exact {src, dst, lport, fport} match for a bounced datagram. 2799 * Returns with conn reference held. Caller must call CONN_DEC_REF. 2800 * Only checks for connected entries i.e. no INADDR_ANY checks. 2801 * Match on ifindex in addition to addresses. 2802 */ 2803 conn_t * 2804 ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 2805 uint_t ifindex, ip_stack_t *ipst) 2806 { 2807 tcp_t *tcp; 2808 uint32_t ports; 2809 uint16_t *pports; 2810 connf_t *connfp; 2811 conn_t *tconnp; 2812 2813 pports = (uint16_t *)&ports; 2814 pports[0] = tcpha->tha_fport; 2815 pports[1] = tcpha->tha_lport; 2816 2817 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2818 ports, ipst)]; 2819 2820 mutex_enter(&connfp->connf_lock); 2821 for (tconnp = connfp->connf_head; tconnp != NULL; 2822 tconnp = tconnp->conn_next) { 2823 2824 tcp = tconnp->conn_tcp; 2825 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 2826 ip6h->ip6_dst, ip6h->ip6_src, ports) && 2827 tcp->tcp_state >= min_state && 2828 (tconnp->conn_bound_if == 0 || 2829 tconnp->conn_bound_if == ifindex)) { 2830 2831 CONN_INC_REF(tconnp); 2832 mutex_exit(&connfp->connf_lock); 2833 return (tconnp); 2834 } 2835 } 2836 mutex_exit(&connfp->connf_lock); 2837 return (NULL); 2838 } 2839 2840 /* 2841 * Same as ipcl_tcp_lookup_reversed_ipv4. 2842 */ 2843 conn_t * 2844 ipcl_dccp_lookup_reversed_ipv4(ipha_t *ipha, dccpha_t *dccpha, int min_state, 2845 ip_stack_t *ipst) 2846 { 2847 conn_t *tconnp; 2848 connf_t *connfp; 2849 uint16_t *pports; 2850 uint32_t ports; 2851 2852 pports = (uint16_t *)&ports; 2853 pports[0] = dccpha->dha_fport; 2854 pports[1] = dccpha->dha_lport; 2855 2856 connfp = &ipst->ips_ipcl_dccp_conn_fanout[IPCL_DCCP_CONN_HASH( 2857 ipha->ipha_dst, ports, ipst)]; 2858 2859 mutex_enter(&connfp->connf_lock); 2860 for (tconnp = connfp->connf_head; tconnp != NULL; 2861 tconnp = tconnp->conn_next) { 2862 if (IPCL_CONN_MATCH(tconnp, IPPROTO_DCCP, 2863 ipha->ipha_dst, ipha->ipha_src, ports) && 2864 tconnp->conn_dccp->dccp_state >= min_state) { 2865 CONN_INC_REF(tconnp); 2866 mutex_exit(&connfp->connf_lock); 2867 return (tconnp); 2868 } 2869 } 2870 mutex_exit(&connfp->connf_lock); 2871 2872 return (NULL); 2873 } 2874 2875 /* 2876 * Same as ipcl_tcp_lookup_reversed_ipv6. 2877 */ 2878 conn_t * 2879 ipcl_dccp_lookup_reversed_ipv6(ip6_t *ip6h, dccpha_t *dccpha, int min_state, 2880 uint_t ifindex, ip_stack_t *ipst) 2881 { 2882 conn_t *tconnp; 2883 tcp_t *tcp; 2884 connf_t *connfp; 2885 uint32_t ports; 2886 uint16_t *pports; 2887 2888 pports = (uint16_t *)&ports; 2889 pports[0] = dccpha->dha_fport; 2890 pports[1] = dccpha->dha_lport; 2891 /* 2892 connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2893 ports, ipst)]; 2894 2895 mutex_enter(&connfp->connf_lock); 2896 for (tconnp = connfp->connf_head; tconnp != NULL; 2897 tconnp = tconnp->conn_next) { 2898 2899 tcp = tconnp->conn_tcp; 2900 if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 2901 ip6h->ip6_dst, ip6h->ip6_src, ports) && 2902 tcp->tcp_state >= min_state && 2903 (tconnp->conn_bound_if == 0 || 2904 tconnp->conn_bound_if == ifindex)) { 2905 2906 CONN_INC_REF(tconnp); 2907 mutex_exit(&connfp->connf_lock); 2908 return (tconnp); 2909 } 2910 } 2911 mutex_exit(&connfp->connf_lock); 2912 */ 2913 return (NULL); 2914 } 2915 2916 /* 2917 * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate 2918 * a listener when changing state. 2919 */ 2920 conn_t * 2921 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid, 2922 ip_stack_t *ipst) 2923 { 2924 connf_t *bind_connfp; 2925 conn_t *connp; 2926 tcp_t *tcp; 2927 2928 /* 2929 * Avoid false matches for packets sent to an IP destination of 2930 * all zeros. 2931 */ 2932 if (laddr == 0) 2933 return (NULL); 2934 2935 ASSERT(zoneid != ALL_ZONES); 2936 2937 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 2938 mutex_enter(&bind_connfp->connf_lock); 2939 for (connp = bind_connfp->connf_head; connp != NULL; 2940 connp = connp->conn_next) { 2941 tcp = connp->conn_tcp; 2942 if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 2943 IPCL_ZONE_MATCH(connp, zoneid) && 2944 (tcp->tcp_listener == NULL)) { 2945 CONN_INC_REF(connp); 2946 mutex_exit(&bind_connfp->connf_lock); 2947 return (connp); 2948 } 2949 } 2950 mutex_exit(&bind_connfp->connf_lock); 2951 return (NULL); 2952 } 2953 2954 /* 2955 * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate 2956 * a listener when changing state. 2957 */ 2958 conn_t * 2959 ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 2960 zoneid_t zoneid, ip_stack_t *ipst) 2961 { 2962 connf_t *bind_connfp; 2963 conn_t *connp = NULL; 2964 tcp_t *tcp; 2965 2966 /* 2967 * Avoid false matches for packets sent to an IP destination of 2968 * all zeros. 2969 */ 2970 if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 2971 return (NULL); 2972 2973 ASSERT(zoneid != ALL_ZONES); 2974 2975 bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 2976 mutex_enter(&bind_connfp->connf_lock); 2977 for (connp = bind_connfp->connf_head; connp != NULL; 2978 connp = connp->conn_next) { 2979 tcp = connp->conn_tcp; 2980 if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 2981 IPCL_ZONE_MATCH(connp, zoneid) && 2982 (connp->conn_bound_if == 0 || 2983 connp->conn_bound_if == ifindex) && 2984 tcp->tcp_listener == NULL) { 2985 CONN_INC_REF(connp); 2986 mutex_exit(&bind_connfp->connf_lock); 2987 return (connp); 2988 } 2989 } 2990 mutex_exit(&bind_connfp->connf_lock); 2991 return (NULL); 2992 } 2993 2994 /* 2995 * ipcl_get_next_conn 2996 * get the next entry in the conn global list 2997 * and put a reference on the next_conn. 2998 * decrement the reference on the current conn. 2999 * 3000 * This is an iterator based walker function that also provides for 3001 * some selection by the caller. It walks through the conn_hash bucket 3002 * searching for the next valid connp in the list, and selects connections 3003 * that are neither closed nor condemned. It also REFHOLDS the conn 3004 * thus ensuring that the conn exists when the caller uses the conn. 3005 */ 3006 conn_t * 3007 ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 3008 { 3009 conn_t *next_connp; 3010 3011 if (connfp == NULL) 3012 return (NULL); 3013 3014 mutex_enter(&connfp->connf_lock); 3015 3016 next_connp = (connp == NULL) ? 3017 connfp->connf_head : connp->conn_g_next; 3018 3019 while (next_connp != NULL) { 3020 mutex_enter(&next_connp->conn_lock); 3021 if (!(next_connp->conn_flags & conn_flags) || 3022 (next_connp->conn_state_flags & 3023 (CONN_CONDEMNED | CONN_INCIPIENT))) { 3024 /* 3025 * This conn has been condemned or 3026 * is closing, or the flags don't match 3027 */ 3028 mutex_exit(&next_connp->conn_lock); 3029 next_connp = next_connp->conn_g_next; 3030 continue; 3031 } 3032 CONN_INC_REF_LOCKED(next_connp); 3033 mutex_exit(&next_connp->conn_lock); 3034 break; 3035 } 3036 3037 mutex_exit(&connfp->connf_lock); 3038 3039 if (connp != NULL) 3040 CONN_DEC_REF(connp); 3041 3042 return (next_connp); 3043 } 3044 3045 #ifdef CONN_DEBUG 3046 /* 3047 * Trace of the last NBUF refhold/refrele 3048 */ 3049 int 3050 conn_trace_ref(conn_t *connp) 3051 { 3052 int last; 3053 conn_trace_t *ctb; 3054 3055 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3056 last = connp->conn_trace_last; 3057 last++; 3058 if (last == CONN_TRACE_MAX) 3059 last = 0; 3060 3061 ctb = &connp->conn_trace_buf[last]; 3062 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 3063 connp->conn_trace_last = last; 3064 return (1); 3065 } 3066 3067 int 3068 conn_untrace_ref(conn_t *connp) 3069 { 3070 int last; 3071 conn_trace_t *ctb; 3072 3073 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3074 last = connp->conn_trace_last; 3075 last++; 3076 if (last == CONN_TRACE_MAX) 3077 last = 0; 3078 3079 ctb = &connp->conn_trace_buf[last]; 3080 ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 3081 connp->conn_trace_last = last; 3082 return (1); 3083 } 3084 #endif