Print this page
dccp: conn_t


  50  * and when connection is processed via squeue (squeue processing may be
  51  * asynchronous and the reference protects the connection from being destroyed
  52  * before its processing is finished).
  53  *
  54  * conn_recv is used to pass up packets to the ULP.
  55  * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
  56  * a listener, and changes to tcp_input_listener as the listener has picked a
  57  * good squeue. For other cases it is set to tcp_input_data.
  58  *
  59  * conn_recvicmp is used to pass up ICMP errors to the ULP.
  60  *
  61  * Classifier uses several hash tables:
  62  *
  63  *      ipcl_conn_fanout:       contains all TCP connections in CONNECTED state
  64  *      ipcl_bind_fanout:       contains all connections in BOUND state
  65  *      ipcl_proto_fanout:      IPv4 protocol fanout
  66  *      ipcl_proto_fanout_v6:   IPv6 protocol fanout
  67  *      ipcl_udp_fanout:        contains all UDP connections
  68  *      ipcl_iptun_fanout:      contains all IP tunnel connections
  69  *      ipcl_globalhash_fanout: contains all connections


  70  *
  71  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
  72  * which need to view all existing connections.
  73  *
  74  * All tables are protected by per-bucket locks. When both per-bucket lock and
  75  * connection lock need to be held, the per-bucket lock should be acquired
  76  * first, followed by the connection lock.
  77  *
  78  * All functions doing search in one of these tables increment a reference
  79  * counter on the connection found (if any). This reference should be dropped
  80  * when the caller has finished processing the connection.
  81  *
  82  *
  83  * INTERFACES:
  84  * ===========
  85  *
  86  * Connection Lookup:
  87  * ------------------
  88  *
  89  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)


 206  *
 207  * void ipcl_hash_remove(connp);
 208  *
 209  *      Removes the 'connp' from the connection fanout table.
 210  *
 211  * Connection Creation/Destruction
 212  * -------------------------------
 213  *
 214  * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
 215  *
 216  *      Creates a new conn based on the type flag, inserts it into
 217  *      globalhash table.
 218  *
 219  *      type:   This flag determines the type of conn_t which needs to be
 220  *              created i.e., which kmem_cache it comes from.
 221  *              IPCL_TCPCONN    indicates a TCP connection
 222  *              IPCL_SCTPCONN   indicates a SCTP connection
 223  *              IPCL_UDPCONN    indicates a UDP conn_t.
 224  *              IPCL_RAWIPCONN  indicates a RAWIP/ICMP conn_t.
 225  *              IPCL_RTSCONN    indicates a RTS conn_t.

 226  *              IPCL_IPCCONN    indicates all other connections.
 227  *
 228  * void ipcl_conn_destroy(connp)
 229  *
 230  *      Destroys the connection state, removes it from the global
 231  *      connection hash table and frees its memory.
 232  */
 233 
 234 #include <sys/types.h>
 235 #include <sys/stream.h>
 236 #include <sys/stropts.h>
 237 #include <sys/sysmacros.h>
 238 #include <sys/strsubr.h>
 239 #include <sys/strsun.h>
 240 #define _SUN_TPI_VERSION 2
 241 #include <sys/ddi.h>
 242 #include <sys/cmn_err.h>
 243 #include <sys/debug.h>
 244 
 245 #include <sys/systm.h>
 246 #include <sys/param.h>
 247 #include <sys/kmem.h>
 248 #include <sys/isa_defs.h>
 249 #include <inet/common.h>
 250 #include <netinet/ip6.h>
 251 #include <netinet/icmp6.h>
 252 
 253 #include <inet/ip.h>
 254 #include <inet/ip_if.h>
 255 #include <inet/ip_ire.h>
 256 #include <inet/ip6.h>
 257 #include <inet/ip_ndp.h>
 258 #include <inet/ip_impl.h>
 259 #include <inet/udp_impl.h>

 260 #include <inet/sctp_ip.h>
 261 #include <inet/sctp/sctp_impl.h>
 262 #include <inet/rawip_impl.h>
 263 #include <inet/rts_impl.h>
 264 #include <inet/iptun/iptun_impl.h>
 265 
 266 #include <sys/cpuvar.h>
 267 
 268 #include <inet/ipclassifier.h>
 269 #include <inet/tcp.h>
 270 #include <inet/ipsec_impl.h>
 271 
 272 #include <sys/tsol/tnet.h>
 273 #include <sys/sockio.h>
 274 
 275 /* Old value for compatibility. Setable in /etc/system */
 276 uint_t tcp_conn_hash_size = 0;
 277 
 278 /* New value. Zero means choose automatically.  Setable in /etc/system */
 279 uint_t ipcl_conn_hash_size = 0;
 280 uint_t ipcl_conn_hash_memfactor = 8192;
 281 uint_t ipcl_conn_hash_maxsize = 82500;
 282 
 283 /* bind/udp fanout table size */
 284 uint_t ipcl_bind_fanout_size = 512;
 285 uint_t ipcl_udp_fanout_size = 16384;
 286 




 287 /* Raw socket fanout size.  Must be a power of 2. */
 288 uint_t ipcl_raw_fanout_size = 256;
 289 
 290 /*
 291  * The IPCL_IPTUN_HASH() function works best with a prime table size.  We
 292  * expect that most large deployments would have hundreds of tunnels, and
 293  * thousands in the extreme case.
 294  */
 295 uint_t ipcl_iptun_fanout_size = 6143;
 296 
 297 /*
 298  * Power of 2^N Primes useful for hashing for N of 0-28,
 299  * these primes are the nearest prime <= 2^N - 2^(N-2).
 300  */
 301 
 302 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,  \
 303                 6143, 12281, 24571, 49139, 98299, 196597, 393209,       \
 304                 786431, 1572853, 3145721, 6291449, 12582893, 25165813,  \
 305                 50331599, 100663291, 201326557, 0}
 306 
 307 /*
 308  * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
 309  * are aligned on cache lines.
 310  */
 311 typedef union itc_s {
 312         conn_t  itc_conn;
 313         char    itcu_filler[CACHE_ALIGN(conn_s)];
 314 } itc_t;
 315 
 316 struct kmem_cache  *tcp_conn_cache;
 317 struct kmem_cache  *ip_conn_cache;
 318 extern struct kmem_cache  *sctp_conn_cache;
 319 struct kmem_cache  *udp_conn_cache;
 320 struct kmem_cache  *rawip_conn_cache;
 321 struct kmem_cache  *rts_conn_cache;

 322 
 323 extern void     tcp_timermp_free(tcp_t *);
 324 extern mblk_t   *tcp_timermp_alloc(int);
 325 
 326 static int      ip_conn_constructor(void *, void *, int);
 327 static void     ip_conn_destructor(void *, void *);
 328 
 329 static int      tcp_conn_constructor(void *, void *, int);
 330 static void     tcp_conn_destructor(void *, void *);
 331 
 332 static int      udp_conn_constructor(void *, void *, int);
 333 static void     udp_conn_destructor(void *, void *);
 334 
 335 static int      rawip_conn_constructor(void *, void *, int);
 336 static void     rawip_conn_destructor(void *, void *);
 337 
 338 static int      rts_conn_constructor(void *, void *, int);
 339 static void     rts_conn_destructor(void *, void *);
 340 



 341 /*
 342  * Global (for all stack instances) init routine
 343  */
 344 void
 345 ipcl_g_init(void)
 346 {
 347         ip_conn_cache = kmem_cache_create("ip_conn_cache",
 348             sizeof (conn_t), CACHE_ALIGN_SIZE,
 349             ip_conn_constructor, ip_conn_destructor,
 350             NULL, NULL, NULL, 0);
 351 
 352         tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
 353             sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
 354             tcp_conn_constructor, tcp_conn_destructor,
 355             tcp_conn_reclaim, NULL, NULL, 0);
 356 
 357         udp_conn_cache = kmem_cache_create("udp_conn_cache",
 358             sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
 359             udp_conn_constructor, udp_conn_destructor,
 360             NULL, NULL, NULL, 0);
 361 
 362         rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
 363             sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
 364             rawip_conn_constructor, rawip_conn_destructor,
 365             NULL, NULL, NULL, 0);
 366 
 367         rts_conn_cache = kmem_cache_create("rts_conn_cache",
 368             sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
 369             rts_conn_constructor, rts_conn_destructor,
 370             NULL, NULL, NULL, 0);






 371 }
 372 
 373 /*
 374  * ipclassifier intialization routine, sets up hash tables.
 375  */
 376 void
 377 ipcl_init(ip_stack_t *ipst)
 378 {
 379         int i;
 380         int sizes[] = P2Ps();
 381 
 382         /*
 383          * Calculate size of conn fanout table from /etc/system settings
 384          */
 385         if (ipcl_conn_hash_size != 0) {
 386                 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
 387         } else if (tcp_conn_hash_size != 0) {
 388                 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
 389         } else {
 390                 extern pgcnt_t freemem;


 393                     (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
 394 
 395                 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
 396                         ipst->ips_ipcl_conn_fanout_size =
 397                             ipcl_conn_hash_maxsize;
 398                 }
 399         }
 400 
 401         for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
 402                 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
 403                         break;
 404                 }
 405         }
 406         if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
 407                 /* Out of range, use the 2^16 value */
 408                 ipst->ips_ipcl_conn_fanout_size = sizes[16];
 409         }
 410 
 411         /* Take values from /etc/system */
 412         ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;


 413         ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
 414         ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
 415         ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
 416 
 417         ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
 418 
 419         ipst->ips_ipcl_conn_fanout = kmem_zalloc(
 420             ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
 421 
 422         for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
 423                 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
 424                     MUTEX_DEFAULT, NULL);
 425         }
 426 
 427         ipst->ips_ipcl_bind_fanout = kmem_zalloc(
 428             ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
 429 
 430         for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
 431                 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
 432                     MUTEX_DEFAULT, NULL);


 460         ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
 461             ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
 462         for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
 463                 mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
 464                     MUTEX_DEFAULT, NULL);
 465         }
 466 
 467         ipst->ips_ipcl_raw_fanout = kmem_zalloc(
 468             ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
 469         for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
 470                 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
 471                     MUTEX_DEFAULT, NULL);
 472         }
 473 
 474         ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
 475             sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
 476         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
 477                 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
 478                     NULL, MUTEX_DEFAULT, NULL);
 479         }














 480 }
 481 
 482 void
 483 ipcl_g_destroy(void)
 484 {
 485         kmem_cache_destroy(ip_conn_cache);
 486         kmem_cache_destroy(tcp_conn_cache);
 487         kmem_cache_destroy(udp_conn_cache);
 488         kmem_cache_destroy(rawip_conn_cache);
 489         kmem_cache_destroy(rts_conn_cache);

 490 }
 491 
 492 /*
 493  * All user-level and kernel use of the stack must be gone
 494  * by now.
 495  */
 496 void
 497 ipcl_destroy(ip_stack_t *ipst)
 498 {
 499         int i;
 500 
 501         for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
 502                 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
 503                 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
 504         }
 505         kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
 506             sizeof (connf_t));
 507         ipst->ips_ipcl_conn_fanout = NULL;
 508 
 509         for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {


 545         kmem_free(ipst->ips_ipcl_iptun_fanout,
 546             ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
 547         ipst->ips_ipcl_iptun_fanout = NULL;
 548 
 549         for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
 550                 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
 551                 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
 552         }
 553         kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
 554             sizeof (connf_t));
 555         ipst->ips_ipcl_raw_fanout = NULL;
 556 
 557         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
 558                 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
 559                 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
 560         }
 561         kmem_free(ipst->ips_ipcl_globalhash_fanout,
 562             sizeof (connf_t) * CONN_G_HASH_SIZE);
 563         ipst->ips_ipcl_globalhash_fanout = NULL;
 564 
















 565         ASSERT(ipst->ips_rts_clients->connf_head == NULL);
 566         mutex_destroy(&ipst->ips_rts_clients->connf_lock);
 567         kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
 568         ipst->ips_rts_clients = NULL;
 569 }
 570 
 571 /*
 572  * conn creation routine. initialize the conn, sets the reference
 573  * and inserts it in the global hash table.
 574  */
 575 conn_t *
 576 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
 577 {
 578         conn_t  *connp;
 579         struct kmem_cache *conn_cache;
 580 
 581         switch (type) {
 582         case IPCL_SCTPCONN:
 583                 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
 584                         return (NULL);


 593         case IPCL_TCPCONN:
 594                 conn_cache = tcp_conn_cache;
 595                 break;
 596 
 597         case IPCL_UDPCONN:
 598                 conn_cache = udp_conn_cache;
 599                 break;
 600 
 601         case IPCL_RAWIPCONN:
 602                 conn_cache = rawip_conn_cache;
 603                 break;
 604 
 605         case IPCL_RTSCONN:
 606                 conn_cache = rts_conn_cache;
 607                 break;
 608 
 609         case IPCL_IPCCONN:
 610                 conn_cache = ip_conn_cache;
 611                 break;
 612 




 613         default:
 614                 connp = NULL;
 615                 ASSERT(0);
 616         }
 617 
 618         if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
 619                 return (NULL);
 620 
 621         connp->conn_ref = 1;
 622         netstack_hold(ns);
 623         connp->conn_netstack = ns;
 624         connp->conn_ixa->ixa_ipst = ns->netstack_ip;
 625         connp->conn_ixa->ixa_conn_id = (long)connp;
 626         ipcl_globalhash_insert(connp);
 627         return (connp);
 628 }
 629 
 630 void
 631 ipcl_conn_destroy(conn_t *connp)
 632 {


 703                         ASSERT(tcp->tcp_tcps == NULL);
 704                         connp->conn_netstack = NULL;
 705                         connp->conn_ixa->ixa_ipst = NULL;
 706                         netstack_rele(ns);
 707                 }
 708 
 709                 bzero(tcp, sizeof (tcp_t));
 710 
 711                 tcp->tcp_timercache = mp;
 712                 tcp->tcp_connp = connp;
 713                 kmem_cache_free(tcp_conn_cache, connp);
 714                 return;
 715         }
 716 
 717         if (connp->conn_flags & IPCL_SCTPCONN) {
 718                 ASSERT(ns != NULL);
 719                 sctp_free(connp);
 720                 return;
 721         }
 722 



























 723         ipcl_conn_cleanup(connp);
 724         if (ns != NULL) {
 725                 connp->conn_netstack = NULL;
 726                 connp->conn_ixa->ixa_ipst = NULL;
 727                 netstack_rele(ns);
 728         }
 729 
 730         /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
 731         if (connp->conn_flags & IPCL_UDPCONN) {
 732                 connp->conn_flags = IPCL_UDPCONN;
 733                 kmem_cache_free(udp_conn_cache, connp);
 734         } else if (connp->conn_flags & IPCL_RAWIPCONN) {
 735                 connp->conn_flags = IPCL_RAWIPCONN;
 736                 connp->conn_proto = IPPROTO_ICMP;
 737                 connp->conn_ixa->ixa_protocol = connp->conn_proto;
 738                 kmem_cache_free(rawip_conn_cache, connp);
 739         } else if (connp->conn_flags & IPCL_RTSCONN) {
 740                 connp->conn_flags = IPCL_RTSCONN;
 741                 kmem_cache_free(rts_conn_cache, connp);
 742         } else {


1219                 connfp = &ipst->ips_ipcl_bind_fanout[
1220                     IPCL_BIND_HASH(lport, ipst)];
1221                 if (connp->conn_laddr_v4 != INADDR_ANY) {
1222                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1223                 } else {
1224                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1225                 }
1226                 if (cl_inet_listen != NULL) {
1227                         ASSERT(connp->conn_ipversion == IPV4_VERSION);
1228                         connp->conn_flags |= IPCL_CL_LISTENER;
1229                         (*cl_inet_listen)(
1230                             connp->conn_netstack->netstack_stackid,
1231                             IPPROTO_TCP, AF_INET,
1232                             (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
1233                 }
1234                 break;
1235 
1236         case IPPROTO_SCTP:
1237                 ret = ipcl_sctp_hash_insert(connp, lport);
1238                 break;










1239         }



1240 
1241         return (ret);
1242 }
1243 
1244 int
1245 ipcl_bind_insert_v6(conn_t *connp)
1246 {
1247         connf_t         *connfp;
1248         int             ret = 0;
1249         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1250         uint16_t        lport = connp->conn_lport;
1251         uint8_t         protocol = connp->conn_proto;
1252 
1253         if (IPCL_IS_IPTUN(connp)) {
1254                 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1255         }
1256 
1257         switch (protocol) {
1258         default:
1259                 if (is_system_labeled() &&


1292                         uint8_t         *laddrp;
1293 
1294                         if (connp->conn_ipversion == IPV6_VERSION) {
1295                                 addr_family = AF_INET6;
1296                                 laddrp =
1297                                     (uint8_t *)&connp->conn_bound_addr_v6;
1298                         } else {
1299                                 addr_family = AF_INET;
1300                                 laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
1301                         }
1302                         connp->conn_flags |= IPCL_CL_LISTENER;
1303                         (*cl_inet_listen)(
1304                             connp->conn_netstack->netstack_stackid,
1305                             IPPROTO_TCP, addr_family, laddrp, lport, NULL);
1306                 }
1307                 break;
1308 
1309         case IPPROTO_SCTP:
1310                 ret = ipcl_sctp_hash_insert(connp, lport);
1311                 break;












1312         }
1313 
1314         return (ret);
1315 }
1316 
1317 /*
1318  * ipcl_conn_hash insertion routines.
1319  * The caller has already set conn_proto and the addresses/ports in the conn_t.
1320  */
1321 
1322 int
1323 ipcl_conn_insert(conn_t *connp)
1324 {
1325         if (connp->conn_ipversion == IPV6_VERSION)
1326                 return (ipcl_conn_insert_v6(connp));
1327         else
1328                 return (ipcl_conn_insert_v4(connp));
1329 }
1330 
1331 int


1376                         IPCL_HASH_REMOVE(connp);
1377                         mutex_enter(&connfp->connf_lock);
1378                 }
1379 
1380                 ASSERT(connp->conn_recv != NULL);
1381                 ASSERT(connp->conn_recvicmp != NULL);
1382 
1383                 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1384                 mutex_exit(&connfp->connf_lock);
1385                 break;
1386 
1387         case IPPROTO_SCTP:
1388                 /*
1389                  * The raw socket may have already been bound, remove it
1390                  * from the hash first.
1391                  */
1392                 IPCL_HASH_REMOVE(connp);
1393                 ret = ipcl_sctp_hash_insert(connp, lport);
1394                 break;
1395 


























1396         default:
1397                 /*
1398                  * Check for conflicts among MAC exempt bindings.  For
1399                  * transports with port numbers, this is done by the upper
1400                  * level per-transport binding logic.  For all others, it's
1401                  * done here.
1402                  */
1403                 if (is_system_labeled() &&
1404                     check_exempt_conflict_v4(connp, ipst))
1405                         return (EADDRINUSE);
1406                 /* FALLTHROUGH */
1407 
1408         case IPPROTO_UDP:
1409                 if (protocol == IPPROTO_UDP) {
1410                         connfp = &ipst->ips_ipcl_udp_fanout[
1411                             IPCL_UDP_HASH(lport, ipst)];
1412                 } else {
1413                         connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1414                 }
1415 


1471                         }
1472                 }
1473                 if (connp->conn_fanout != NULL) {
1474                         /*
1475                          * Probably a XTI/TLI application trying to do a
1476                          * rebind. Let it happen.
1477                          */
1478                         mutex_exit(&connfp->connf_lock);
1479                         IPCL_HASH_REMOVE(connp);
1480                         mutex_enter(&connfp->connf_lock);
1481                 }
1482                 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1483                 mutex_exit(&connfp->connf_lock);
1484                 break;
1485 
1486         case IPPROTO_SCTP:
1487                 IPCL_HASH_REMOVE(connp);
1488                 ret = ipcl_sctp_hash_insert(connp, lport);
1489                 break;
1490 


























1491         default:
1492                 if (is_system_labeled() &&
1493                     check_exempt_conflict_v6(connp, ipst))
1494                         return (EADDRINUSE);
1495                 /* FALLTHROUGH */
1496         case IPPROTO_UDP:
1497                 if (protocol == IPPROTO_UDP) {
1498                         connfp = &ipst->ips_ipcl_udp_fanout[
1499                             IPCL_UDP_HASH(lport, ipst)];
1500                 } else {
1501                         connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1502                 }
1503 
1504                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1505                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1506                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1507                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1508                 } else {
1509                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1510                 }


1638                     ira, connp)) {
1639                         DTRACE_PROBE3(tx__ip__log__info__classify__udp,
1640                             char *, "connp(1) could not receive mp(2)",
1641                             conn_t *, connp, mblk_t *, mp);
1642                         connp = NULL;
1643                 }
1644 
1645                 if (connp != NULL) {
1646                         CONN_INC_REF(connp);
1647                         mutex_exit(&connfp->connf_lock);
1648                         return (connp);
1649                 }
1650 
1651                 /*
1652                  * We shouldn't come here for multicast/broadcast packets
1653                  */
1654                 mutex_exit(&connfp->connf_lock);
1655 
1656                 break;
1657 





























































1658         case IPPROTO_ENCAP:
1659         case IPPROTO_IPV6:
1660                 return (ipcl_iptun_classify_v4(&ipha->ipha_src,
1661                     &ipha->ipha_dst, ipst));
1662         }
1663 
1664         return (NULL);
1665 }
1666 
1667 conn_t *
1668 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1669     ip_recv_attr_t *ira, ip_stack_t *ipst)
1670 {
1671         ip6_t           *ip6h;
1672         connf_t         *connfp, *bind_connfp;
1673         uint16_t        lport;
1674         uint16_t        fport;
1675         tcpha_t         *tcpha;
1676         uint32_t        ports;
1677         conn_t          *connp;


2158         itc_t   *itc = (itc_t *)buf;
2159         conn_t  *connp = &itc->itc_conn;
2160         rts_t   *rts = (rts_t *)&itc[1];
2161 
2162         ASSERT(connp->conn_flags & IPCL_RTSCONN);
2163         ASSERT(rts->rts_connp == connp);
2164         ASSERT(connp->conn_rts == rts);
2165         mutex_destroy(&connp->conn_lock);
2166         cv_destroy(&connp->conn_cv);
2167         rw_destroy(&connp->conn_ilg_lock);
2168 
2169         /* Can be NULL if constructor failed */
2170         if (connp->conn_ixa != NULL) {
2171                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2172                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2173                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2174                 ixa_refrele(connp->conn_ixa);
2175         }
2176 }
2177 
































































2178 /*
2179  * Called as part of ipcl_conn_destroy to assert and clear any pointers
2180  * in the conn_t.
2181  *
2182  * Below we list all the pointers in the conn_t as a documentation aid.
2183  * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
2184  * If you add any pointers to the conn_t please add an ASSERT here
2185  * and #ifdef it out if it can't be actually asserted to be NULL.
2186  * In any case, we bzero most of the conn_t at the end of the function.
2187  */
2188 void
2189 ipcl_conn_cleanup(conn_t *connp)
2190 {
2191         ip_xmit_attr_t  *ixa;
2192 
2193         ASSERT(connp->conn_latch == NULL);
2194         ASSERT(connp->conn_latch_in_policy == NULL);
2195         ASSERT(connp->conn_latch_in_action == NULL);
2196 #ifdef notdef
2197         ASSERT(connp->conn_rq == NULL);




  50  * and when connection is processed via squeue (squeue processing may be
  51  * asynchronous and the reference protects the connection from being destroyed
  52  * before its processing is finished).
  53  *
  54  * conn_recv is used to pass up packets to the ULP.
  55  * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
  56  * a listener, and changes to tcp_input_listener as the listener has picked a
  57  * good squeue. For other cases it is set to tcp_input_data.
  58  *
  59  * conn_recvicmp is used to pass up ICMP errors to the ULP.
  60  *
  61  * Classifier uses several hash tables:
  62  *
  63  *      ipcl_conn_fanout:       contains all TCP connections in CONNECTED state
  64  *      ipcl_bind_fanout:       contains all connections in BOUND state
  65  *      ipcl_proto_fanout:      IPv4 protocol fanout
  66  *      ipcl_proto_fanout_v6:   IPv6 protocol fanout
  67  *      ipcl_udp_fanout:        contains all UDP connections
  68  *      ipcl_iptun_fanout:      contains all IP tunnel connections
  69  *      ipcl_globalhash_fanout: contains all connections
  70  *`     ipcl_dccp_conn_fanout:  contains all DCCP connections in CONNECTED state
  71  *      ipcl_dccp_bind_fanout:  contains all DCCP connections in BOUND state
  72  *
  73  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
  74  * which need to view all existing connections.
  75  *
  76  * All tables are protected by per-bucket locks. When both per-bucket lock and
  77  * connection lock need to be held, the per-bucket lock should be acquired
  78  * first, followed by the connection lock.
  79  *
  80  * All functions doing search in one of these tables increment a reference
  81  * counter on the connection found (if any). This reference should be dropped
  82  * when the caller has finished processing the connection.
  83  *
  84  *
  85  * INTERFACES:
  86  * ===========
  87  *
  88  * Connection Lookup:
  89  * ------------------
  90  *
  91  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)


 208  *
 209  * void ipcl_hash_remove(connp);
 210  *
 211  *      Removes the 'connp' from the connection fanout table.
 212  *
 213  * Connection Creation/Destruction
 214  * -------------------------------
 215  *
 216  * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
 217  *
 218  *      Creates a new conn based on the type flag, inserts it into
 219  *      globalhash table.
 220  *
 221  *      type:   This flag determines the type of conn_t which needs to be
 222  *              created i.e., which kmem_cache it comes from.
 223  *              IPCL_TCPCONN    indicates a TCP connection
 224  *              IPCL_SCTPCONN   indicates a SCTP connection
 225  *              IPCL_UDPCONN    indicates a UDP conn_t.
 226  *              IPCL_RAWIPCONN  indicates a RAWIP/ICMP conn_t.
 227  *              IPCL_RTSCONN    indicates a RTS conn_t.
 228  *              IPCL_DCCPCONN   indicates a DCCP conn_t.
 229  *              IPCL_IPCCONN    indicates all other connections.
 230  *
 231  * void ipcl_conn_destroy(connp)
 232  *
 233  *      Destroys the connection state, removes it from the global
 234  *      connection hash table and frees its memory.
 235  */
 236 
 237 #include <sys/types.h>
 238 #include <sys/stream.h>
 239 #include <sys/stropts.h>
 240 #include <sys/sysmacros.h>
 241 #include <sys/strsubr.h>
 242 #include <sys/strsun.h>
 243 #define _SUN_TPI_VERSION 2
 244 #include <sys/ddi.h>
 245 #include <sys/cmn_err.h>
 246 #include <sys/debug.h>
 247 
 248 #include <sys/systm.h>
 249 #include <sys/param.h>
 250 #include <sys/kmem.h>
 251 #include <sys/isa_defs.h>
 252 #include <inet/common.h>
 253 #include <netinet/ip6.h>
 254 #include <netinet/icmp6.h>
 255 
 256 #include <inet/ip.h>
 257 #include <inet/ip_if.h>
 258 #include <inet/ip_ire.h>
 259 #include <inet/ip6.h>
 260 #include <inet/ip_ndp.h>
 261 #include <inet/ip_impl.h>
 262 #include <inet/udp_impl.h>
 263 #include <inet/dccp_impl.h>
 264 #include <inet/sctp_ip.h>
 265 #include <inet/sctp/sctp_impl.h>
 266 #include <inet/rawip_impl.h>
 267 #include <inet/rts_impl.h>
 268 #include <inet/iptun/iptun_impl.h>
 269 
 270 #include <sys/cpuvar.h>
 271 
 272 #include <inet/ipclassifier.h>
 273 #include <inet/tcp.h>
 274 #include <inet/ipsec_impl.h>
 275 
 276 #include <sys/tsol/tnet.h>
 277 #include <sys/sockio.h>
 278 
 279 /* Old value for compatibility. Setable in /etc/system */
 280 uint_t tcp_conn_hash_size = 0;
 281 
 282 /* New value. Zero means choose automatically.  Setable in /etc/system */
 283 uint_t ipcl_conn_hash_size = 0;
 284 uint_t ipcl_conn_hash_memfactor = 8192;
 285 uint_t ipcl_conn_hash_maxsize = 82500;
 286 
 287 /* bind/udp fanout table size */
 288 uint_t ipcl_bind_fanout_size = 512;
 289 uint_t ipcl_udp_fanout_size = 16384;
 290 
 291 /* Fanout table sizes for dccp */
 292 uint_t ipcl_dccp_conn_fanout_size = 512;
 293 uint_t ipcl_dccp_bind_fanout_size = 512;
 294 
 295 /* Raw socket fanout size.  Must be a power of 2. */
 296 uint_t ipcl_raw_fanout_size = 256;
 297 
 298 /*
 299  * The IPCL_IPTUN_HASH() function works best with a prime table size.  We
 300  * expect that most large deployments would have hundreds of tunnels, and
 301  * thousands in the extreme case.
 302  */
 303 uint_t ipcl_iptun_fanout_size = 6143;
 304 
 305 /*
 306  * Power of 2^N Primes useful for hashing for N of 0-28,
 307  * these primes are the nearest prime <= 2^N - 2^(N-2).
 308  */
 309 
 310 #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,  \
 311                 6143, 12281, 24571, 49139, 98299, 196597, 393209,       \
 312                 786431, 1572853, 3145721, 6291449, 12582893, 25165813,  \
 313                 50331599, 100663291, 201326557, 0}
 314 
 315 /*
 316  * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
 317  * are aligned on cache lines.
 318  */
 319 typedef union itc_s {
 320         conn_t  itc_conn;
 321         char    itcu_filler[CACHE_ALIGN(conn_s)];
 322 } itc_t;
 323 
 324 struct kmem_cache  *tcp_conn_cache;
 325 struct kmem_cache  *ip_conn_cache;
 326 extern struct kmem_cache  *sctp_conn_cache;
 327 struct kmem_cache  *udp_conn_cache;
 328 struct kmem_cache  *rawip_conn_cache;
 329 struct kmem_cache  *rts_conn_cache;
 330 struct kmem_cache  *dccp_conn_cache;
 331 
 332 extern void     tcp_timermp_free(tcp_t *);
 333 extern mblk_t   *tcp_timermp_alloc(int);
 334 
 335 static int      ip_conn_constructor(void *, void *, int);
 336 static void     ip_conn_destructor(void *, void *);
 337 
 338 static int      tcp_conn_constructor(void *, void *, int);
 339 static void     tcp_conn_destructor(void *, void *);
 340 
 341 static int      udp_conn_constructor(void *, void *, int);
 342 static void     udp_conn_destructor(void *, void *);
 343 
 344 static int      rawip_conn_constructor(void *, void *, int);
 345 static void     rawip_conn_destructor(void *, void *);
 346 
 347 static int      rts_conn_constructor(void *, void *, int);
 348 static void     rts_conn_destructor(void *, void *);
 349 
 350 static int      dccp_conn_constructor(void *, void *, int);
 351 static void     dccp_conn_destructor(void *, void *);
 352 
 353 /*
 354  * Global (for all stack instances) init routine
 355  */
 356 void
 357 ipcl_g_init(void)
 358 {
 359         ip_conn_cache = kmem_cache_create("ip_conn_cache",
 360             sizeof (conn_t), CACHE_ALIGN_SIZE,
 361             ip_conn_constructor, ip_conn_destructor,
 362             NULL, NULL, NULL, 0);
 363 
 364         tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
 365             sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
 366             tcp_conn_constructor, tcp_conn_destructor,
 367             tcp_conn_reclaim, NULL, NULL, 0);
 368 
 369         udp_conn_cache = kmem_cache_create("udp_conn_cache",
 370             sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
 371             udp_conn_constructor, udp_conn_destructor,
 372             NULL, NULL, NULL, 0);
 373 
 374         rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
 375             sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
 376             rawip_conn_constructor, rawip_conn_destructor,
 377             NULL, NULL, NULL, 0);
 378 
 379         rts_conn_cache = kmem_cache_create("rts_conn_cache",
 380             sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
 381             rts_conn_constructor, rts_conn_destructor,
 382             NULL, NULL, NULL, 0);
 383 
 384         /* XXX:DCCP reclaim */
 385         dccp_conn_cache = kmem_cache_create("dccp_conn_cache",
 386             sizeof (itc_t) + sizeof (dccp_t), CACHE_ALIGN_SIZE,
 387             dccp_conn_constructor, dccp_conn_destructor,
 388             NULL, NULL, NULL, 0);
 389 }
 390 
 391 /*
 392  * ipclassifier intialization routine, sets up hash tables.
 393  */
 394 void
 395 ipcl_init(ip_stack_t *ipst)
 396 {
 397         int i;
 398         int sizes[] = P2Ps();
 399 
 400         /*
 401          * Calculate size of conn fanout table from /etc/system settings
 402          */
 403         if (ipcl_conn_hash_size != 0) {
 404                 ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
 405         } else if (tcp_conn_hash_size != 0) {
 406                 ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
 407         } else {
 408                 extern pgcnt_t freemem;


 411                     (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
 412 
 413                 if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
 414                         ipst->ips_ipcl_conn_fanout_size =
 415                             ipcl_conn_hash_maxsize;
 416                 }
 417         }
 418 
 419         for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
 420                 if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
 421                         break;
 422                 }
 423         }
 424         if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
 425                 /* Out of range, use the 2^16 value */
 426                 ipst->ips_ipcl_conn_fanout_size = sizes[16];
 427         }
 428 
 429         /* Take values from /etc/system */
 430         ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
 431         ipst->ips_ipcl_dccp_conn_fanout_size = ipcl_dccp_conn_fanout_size;
 432         ipst->ips_ipcl_dccp_bind_fanout_size = ipcl_dccp_bind_fanout_size;
 433         ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
 434         ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
 435         ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
 436 
 437         ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
 438 
 439         ipst->ips_ipcl_conn_fanout = kmem_zalloc(
 440             ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
 441 
 442         for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
 443                 mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
 444                     MUTEX_DEFAULT, NULL);
 445         }
 446 
 447         ipst->ips_ipcl_bind_fanout = kmem_zalloc(
 448             ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
 449 
 450         for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
 451                 mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
 452                     MUTEX_DEFAULT, NULL);


 480         ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
 481             ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
 482         for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
 483                 mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
 484                     MUTEX_DEFAULT, NULL);
 485         }
 486 
 487         ipst->ips_ipcl_raw_fanout = kmem_zalloc(
 488             ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
 489         for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
 490                 mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
 491                     MUTEX_DEFAULT, NULL);
 492         }
 493 
 494         ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
 495             sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
 496         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
 497                 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
 498                     NULL, MUTEX_DEFAULT, NULL);
 499         }
 500 
 501         ipst->ips_ipcl_dccp_conn_fanout = kmem_zalloc(
 502             ipst->ips_ipcl_dccp_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
 503         for (i = 0; i < ipst->ips_ipcl_dccp_conn_fanout_size; i++) {
 504                 mutex_init(&ipst->ips_ipcl_dccp_conn_fanout[i].connf_lock, NULL,
 505                     MUTEX_DEFAULT, NULL);
 506         }
 507 
 508         ipst->ips_ipcl_dccp_bind_fanout = kmem_zalloc(
 509             ipst->ips_ipcl_dccp_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
 510         for (i = 0; i < ipst->ips_ipcl_dccp_bind_fanout_size; i++) {
 511                 mutex_init(&ipst->ips_ipcl_dccp_bind_fanout[i].connf_lock, NULL,
 512                     MUTEX_DEFAULT, NULL);
 513         }
 514 }
 515 
 516 void
 517 ipcl_g_destroy(void)
 518 {
 519         kmem_cache_destroy(ip_conn_cache);
 520         kmem_cache_destroy(tcp_conn_cache);
 521         kmem_cache_destroy(udp_conn_cache);
 522         kmem_cache_destroy(rawip_conn_cache);
 523         kmem_cache_destroy(rts_conn_cache);
 524         kmem_cache_destroy(dccp_conn_cache);
 525 }
 526 
 527 /*
 528  * All user-level and kernel use of the stack must be gone
 529  * by now.
 530  */
 531 void
 532 ipcl_destroy(ip_stack_t *ipst)
 533 {
 534         int i;
 535 
 536         for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
 537                 ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
 538                 mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
 539         }
 540         kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
 541             sizeof (connf_t));
 542         ipst->ips_ipcl_conn_fanout = NULL;
 543 
 544         for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {


 580         kmem_free(ipst->ips_ipcl_iptun_fanout,
 581             ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
 582         ipst->ips_ipcl_iptun_fanout = NULL;
 583 
 584         for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
 585                 ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
 586                 mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
 587         }
 588         kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
 589             sizeof (connf_t));
 590         ipst->ips_ipcl_raw_fanout = NULL;
 591 
 592         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
 593                 ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
 594                 mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
 595         }
 596         kmem_free(ipst->ips_ipcl_globalhash_fanout,
 597             sizeof (connf_t) * CONN_G_HASH_SIZE);
 598         ipst->ips_ipcl_globalhash_fanout = NULL;
 599 
 600         for (i = 0; i < ipst->ips_ipcl_dccp_conn_fanout_size; i++) {
 601                 ASSERT(ipst->ips_ipcl_dccp_conn_fanout[i].connf_head == NULL);
 602                 mutex_destroy(&ipst->ips_ipcl_dccp_conn_fanout[i].connf_lock);
 603         }
 604         kmem_free(ipst->ips_ipcl_dccp_conn_fanout,
 605             ipst->ips_ipcl_dccp_conn_fanout_size * sizeof (connf_t));
 606         ipst->ips_ipcl_dccp_conn_fanout = NULL;
 607 
 608         for (i = 0; i < ipst->ips_ipcl_dccp_bind_fanout_size; i++) {
 609                 ASSERT(ipst->ips_ipcl_dccp_bind_fanout[i].connf_head == NULL);
 610                 mutex_destroy(&ipst->ips_ipcl_dccp_bind_fanout[i].connf_lock);
 611         }
 612         kmem_free(ipst->ips_ipcl_dccp_bind_fanout,
 613             ipst->ips_ipcl_dccp_bind_fanout_size * sizeof (connf_t));
 614         ipst->ips_ipcl_dccp_bind_fanout = NULL;
 615 
 616         ASSERT(ipst->ips_rts_clients->connf_head == NULL);
 617         mutex_destroy(&ipst->ips_rts_clients->connf_lock);
 618         kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
 619         ipst->ips_rts_clients = NULL;
 620 }
 621 
 622 /*
 623  * conn creation routine. initialize the conn, sets the reference
 624  * and inserts it in the global hash table.
 625  */
 626 conn_t *
 627 ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
 628 {
 629         conn_t  *connp;
 630         struct kmem_cache *conn_cache;
 631 
 632         switch (type) {
 633         case IPCL_SCTPCONN:
 634                 if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
 635                         return (NULL);


 644         case IPCL_TCPCONN:
 645                 conn_cache = tcp_conn_cache;
 646                 break;
 647 
 648         case IPCL_UDPCONN:
 649                 conn_cache = udp_conn_cache;
 650                 break;
 651 
 652         case IPCL_RAWIPCONN:
 653                 conn_cache = rawip_conn_cache;
 654                 break;
 655 
 656         case IPCL_RTSCONN:
 657                 conn_cache = rts_conn_cache;
 658                 break;
 659 
 660         case IPCL_IPCCONN:
 661                 conn_cache = ip_conn_cache;
 662                 break;
 663 
 664         case IPCL_DCCPCONN:
 665                 conn_cache = dccp_conn_cache;
 666                 break;
 667 
 668         default:
 669                 connp = NULL;
 670                 ASSERT(0);
 671         }
 672 
 673         if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
 674                 return (NULL);
 675 
 676         connp->conn_ref = 1;
 677         netstack_hold(ns);
 678         connp->conn_netstack = ns;
 679         connp->conn_ixa->ixa_ipst = ns->netstack_ip;
 680         connp->conn_ixa->ixa_conn_id = (long)connp;
 681         ipcl_globalhash_insert(connp);
 682         return (connp);
 683 }
 684 
 685 void
 686 ipcl_conn_destroy(conn_t *connp)
 687 {


 758                         ASSERT(tcp->tcp_tcps == NULL);
 759                         connp->conn_netstack = NULL;
 760                         connp->conn_ixa->ixa_ipst = NULL;
 761                         netstack_rele(ns);
 762                 }
 763 
 764                 bzero(tcp, sizeof (tcp_t));
 765 
 766                 tcp->tcp_timercache = mp;
 767                 tcp->tcp_connp = connp;
 768                 kmem_cache_free(tcp_conn_cache, connp);
 769                 return;
 770         }
 771 
 772         if (connp->conn_flags & IPCL_SCTPCONN) {
 773                 ASSERT(ns != NULL);
 774                 sctp_free(connp);
 775                 return;
 776         }
 777 
 778         if (connp->conn_flags & IPCL_DCCPCONN) {
 779                 dccp_t  *dccp = connp->conn_dccp;
 780 
 781                 cmn_err(CE_NOTE, "ipclassifier: conn_flags DCCP cache_free");
 782 
 783                 dccp_free(dccp);
 784                 mp = dccp->dccp_timercache;
 785 
 786                 dccp->dccp_dccps = NULL;
 787 
 788                 ipcl_conn_cleanup(connp);
 789                 connp->conn_flags = IPCL_DCCPCONN;
 790                 if (ns != NULL) {
 791                         ASSERT(dccp->dccps == NULL);
 792                         connp->conn_netstack = NULL;
 793                         connp->conn_ixa->ixa_ipst = NULL;
 794                         netstack_rele(ns);
 795                 }
 796 
 797                 bzero(dccp, sizeof (dccp_t));
 798 
 799                 dccp->dccp_timercache = mp;
 800                 dccp->dccp_connp = connp;
 801                 kmem_cache_free(dccp_conn_cache, connp);
 802                 return;
 803         }
 804 
 805         ipcl_conn_cleanup(connp);
 806         if (ns != NULL) {
 807                 connp->conn_netstack = NULL;
 808                 connp->conn_ixa->ixa_ipst = NULL;
 809                 netstack_rele(ns);
 810         }
 811 
 812         /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
 813         if (connp->conn_flags & IPCL_UDPCONN) {
 814                 connp->conn_flags = IPCL_UDPCONN;
 815                 kmem_cache_free(udp_conn_cache, connp);
 816         } else if (connp->conn_flags & IPCL_RAWIPCONN) {
 817                 connp->conn_flags = IPCL_RAWIPCONN;
 818                 connp->conn_proto = IPPROTO_ICMP;
 819                 connp->conn_ixa->ixa_protocol = connp->conn_proto;
 820                 kmem_cache_free(rawip_conn_cache, connp);
 821         } else if (connp->conn_flags & IPCL_RTSCONN) {
 822                 connp->conn_flags = IPCL_RTSCONN;
 823                 kmem_cache_free(rts_conn_cache, connp);
 824         } else {


1301                 connfp = &ipst->ips_ipcl_bind_fanout[
1302                     IPCL_BIND_HASH(lport, ipst)];
1303                 if (connp->conn_laddr_v4 != INADDR_ANY) {
1304                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1305                 } else {
1306                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1307                 }
1308                 if (cl_inet_listen != NULL) {
1309                         ASSERT(connp->conn_ipversion == IPV4_VERSION);
1310                         connp->conn_flags |= IPCL_CL_LISTENER;
1311                         (*cl_inet_listen)(
1312                             connp->conn_netstack->netstack_stackid,
1313                             IPPROTO_TCP, AF_INET,
1314                             (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
1315                 }
1316                 break;
1317 
1318         case IPPROTO_SCTP:
1319                 ret = ipcl_sctp_hash_insert(connp, lport);
1320                 break;
1321 
1322         case IPPROTO_DCCP:
1323                 cmn_err(CE_NOTE, "ipclassifier.c: ipcl_bind_insert_v4");
1324                 ASSERT(connp->conn_zoneid != ALL_ZONES);
1325                 connfp = &ipst->ips_ipcl_dccp_bind_fanout[
1326                     IPCL_DCCP_BIND_HASH(lport, ipst)];
1327                 if (connp->conn_laddr_v4 != INADDR_ANY) {
1328                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1329                 } else {
1330                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1331                 }
1332                 break;
1333         }
1334 
1335 
1336         return (ret);
1337 }
1338 
1339 int
1340 ipcl_bind_insert_v6(conn_t *connp)
1341 {
1342         connf_t         *connfp;
1343         int             ret = 0;
1344         ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1345         uint16_t        lport = connp->conn_lport;
1346         uint8_t         protocol = connp->conn_proto;
1347 
1348         if (IPCL_IS_IPTUN(connp)) {
1349                 return (ipcl_iptun_hash_insert_v6(connp, ipst));
1350         }
1351 
1352         switch (protocol) {
1353         default:
1354                 if (is_system_labeled() &&


1387                         uint8_t         *laddrp;
1388 
1389                         if (connp->conn_ipversion == IPV6_VERSION) {
1390                                 addr_family = AF_INET6;
1391                                 laddrp =
1392                                     (uint8_t *)&connp->conn_bound_addr_v6;
1393                         } else {
1394                                 addr_family = AF_INET;
1395                                 laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
1396                         }
1397                         connp->conn_flags |= IPCL_CL_LISTENER;
1398                         (*cl_inet_listen)(
1399                             connp->conn_netstack->netstack_stackid,
1400                             IPPROTO_TCP, addr_family, laddrp, lport, NULL);
1401                 }
1402                 break;
1403 
1404         case IPPROTO_SCTP:
1405                 ret = ipcl_sctp_hash_insert(connp, lport);
1406                 break;
1407 
1408         case IPPROTO_DCCP:
1409                 cmn_err(CE_NOTE, "ipclassifier.c: ipcl_bind_insert_v6");
1410                 ASSERT(connp->conn_zoneid != ALL_ZONES);
1411                 connfp = &ipst->ips_ipcl_dccp_bind_fanout[
1412                     IPCL_DCCP_BIND_HASH(lport, ipst)];
1413                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1414                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1415                 } else {
1416                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1417                 }
1418                 break;
1419         }
1420 
1421         return (ret);
1422 }
1423 
1424 /*
1425  * ipcl_conn_hash insertion routines.
1426  * The caller has already set conn_proto and the addresses/ports in the conn_t.
1427  */
1428 
1429 int
1430 ipcl_conn_insert(conn_t *connp)
1431 {
1432         if (connp->conn_ipversion == IPV6_VERSION)
1433                 return (ipcl_conn_insert_v6(connp));
1434         else
1435                 return (ipcl_conn_insert_v4(connp));
1436 }
1437 
1438 int


1483                         IPCL_HASH_REMOVE(connp);
1484                         mutex_enter(&connfp->connf_lock);
1485                 }
1486 
1487                 ASSERT(connp->conn_recv != NULL);
1488                 ASSERT(connp->conn_recvicmp != NULL);
1489 
1490                 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1491                 mutex_exit(&connfp->connf_lock);
1492                 break;
1493 
1494         case IPPROTO_SCTP:
1495                 /*
1496                  * The raw socket may have already been bound, remove it
1497                  * from the hash first.
1498                  */
1499                 IPCL_HASH_REMOVE(connp);
1500                 ret = ipcl_sctp_hash_insert(connp, lport);
1501                 break;
1502 
1503         case IPPROTO_DCCP:
1504                 cmn_err(CE_NOTE, "ipclassifier.c: ipcl_conn_insert_v4");
1505                 connfp = &ipst->ips_ipcl_dccp_conn_fanout[IPCL_DCCP_CONN_HASH(
1506                     connp->conn_faddr_v4, connp->conn_ports, ipst)];
1507                 mutex_enter(&connfp->connf_lock);
1508                 for (tconnp = connfp->connf_head; tconnp != NULL;
1509                     tconnp = tconnp->conn_next) {
1510                         if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
1511                             connp->conn_faddr_v4, connp->conn_laddr_v4,
1512                             connp->conn_ports) &&
1513                             IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1514                                 /* Already have a conn. bail out */
1515                                 mutex_exit(&connfp->connf_lock);
1516                                 return (EADDRINUSE);
1517                         }
1518                 }
1519 
1520                 /* XXX:DCCP XTI/TLI application? */
1521 
1522                 ASSERT(connp->conn_recv != NULL);
1523                 ASSERT(connp->conn_recvicmp != NULL);
1524 
1525                 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1526                 mutex_exit(&connfp->connf_lock);
1527                 break;
1528 
1529         default:
1530                 /*
1531                  * Check for conflicts among MAC exempt bindings.  For
1532                  * transports with port numbers, this is done by the upper
1533                  * level per-transport binding logic.  For all others, it's
1534                  * done here.
1535                  */
1536                 if (is_system_labeled() &&
1537                     check_exempt_conflict_v4(connp, ipst))
1538                         return (EADDRINUSE);
1539                 /* FALLTHROUGH */
1540 
1541         case IPPROTO_UDP:
1542                 if (protocol == IPPROTO_UDP) {
1543                         connfp = &ipst->ips_ipcl_udp_fanout[
1544                             IPCL_UDP_HASH(lport, ipst)];
1545                 } else {
1546                         connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1547                 }
1548 


1604                         }
1605                 }
1606                 if (connp->conn_fanout != NULL) {
1607                         /*
1608                          * Probably a XTI/TLI application trying to do a
1609                          * rebind. Let it happen.
1610                          */
1611                         mutex_exit(&connfp->connf_lock);
1612                         IPCL_HASH_REMOVE(connp);
1613                         mutex_enter(&connfp->connf_lock);
1614                 }
1615                 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1616                 mutex_exit(&connfp->connf_lock);
1617                 break;
1618 
1619         case IPPROTO_SCTP:
1620                 IPCL_HASH_REMOVE(connp);
1621                 ret = ipcl_sctp_hash_insert(connp, lport);
1622                 break;
1623 
1624         case IPPROTO_DCCP:
1625                 cmn_err(CE_NOTE, "ipclassifier.c: ipcl_conn_insert_v6");
1626                 connfp = &ipst->ips_ipcl_dccp_conn_fanout[
1627                     IPCL_DCCP_CONN_HASH_V6(connp->conn_faddr_v6,
1628                     connp->conn_ports, ipst)];
1629                 mutex_enter(&connfp->connf_lock);
1630                 for (tconnp = connfp->connf_head; tconnp != NULL;
1631                     tconnp = tconnp->conn_next) {
1632                         /* NOTE: need to match zoneid. Bug in onnv-gate */
1633                         if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
1634                             connp->conn_faddr_v6, connp->conn_laddr_v6,
1635                             connp->conn_ports) &&
1636                             (tconnp->conn_bound_if == 0 ||
1637                             tconnp->conn_bound_if == ifindex) &&
1638                             IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1639                                 /* Already have a conn. bail out */
1640                                 mutex_exit(&connfp->connf_lock);
1641                                 return (EADDRINUSE);
1642                         }
1643                 }
1644 
1645                 /* XXX:DCCP XTI/TLI? */
1646                 IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1647                 mutex_exit(&connfp->connf_lock);
1648                 break;
1649 
1650         default:
1651                 if (is_system_labeled() &&
1652                     check_exempt_conflict_v6(connp, ipst))
1653                         return (EADDRINUSE);
1654                 /* FALLTHROUGH */
1655         case IPPROTO_UDP:
1656                 if (protocol == IPPROTO_UDP) {
1657                         connfp = &ipst->ips_ipcl_udp_fanout[
1658                             IPCL_UDP_HASH(lport, ipst)];
1659                 } else {
1660                         connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1661                 }
1662 
1663                 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1664                         IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1665                 } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1666                         IPCL_HASH_INSERT_BOUND(connfp, connp);
1667                 } else {
1668                         IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1669                 }


1797                     ira, connp)) {
1798                         DTRACE_PROBE3(tx__ip__log__info__classify__udp,
1799                             char *, "connp(1) could not receive mp(2)",
1800                             conn_t *, connp, mblk_t *, mp);
1801                         connp = NULL;
1802                 }
1803 
1804                 if (connp != NULL) {
1805                         CONN_INC_REF(connp);
1806                         mutex_exit(&connfp->connf_lock);
1807                         return (connp);
1808                 }
1809 
1810                 /*
1811                  * We shouldn't come here for multicast/broadcast packets
1812                  */
1813                 mutex_exit(&connfp->connf_lock);
1814 
1815                 break;
1816 
1817         case IPPROTO_DCCP:
1818                 ports = *(uint32_t *)up;
1819 
1820                 /*
1821                  * Search for fully-bound connection.
1822                  */
1823                 connfp = &ipst->ips_ipcl_dccp_conn_fanout[IPCL_DCCP_CONN_HASH(
1824                     ipha->ipha_src, ports, ipst)];
1825                 mutex_enter(&connfp->connf_lock);
1826                 for (connp = connfp->connf_head; connp != NULL;
1827                     connp = connp->conn_next) {
1828                         /* XXX:DCCP */
1829                         if (IPCL_CONN_MATCH(connp, protocol,
1830                             ipha->ipha_src, ipha->ipha_dst, ports)) {
1831                                 /* XXX */
1832                                 cmn_err(CE_NOTE, "ipclassifier.c: fully bound connection found");
1833                                 break;
1834                         }
1835                 }
1836 
1837                 if (connp != NULL) {
1838                         /*
1839                          * We have a fully-bound DCCP connection.
1840                          */
1841                         CONN_INC_REF(connp);
1842                         mutex_exit(&connfp->connf_lock);
1843                         return (connp);
1844                 }
1845 
1846                 mutex_exit(&connfp->connf_lock);
1847                 lport = up[1];
1848 
1849                 /*
1850                  * Fully-bound connection was not found, search for listener.
1851                  */
1852                 bind_connfp = &ipst->ips_ipcl_dccp_bind_fanout[
1853                     IPCL_DCCP_BIND_HASH(lport, ipst)];
1854                 mutex_enter(&bind_connfp->connf_lock);
1855                 for (connp = bind_connfp->connf_head; connp != NULL;
1856                     connp = connp->conn_next) {
1857                         if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1858                             lport) &&
1859                             (connp->conn_zoneid == zoneid ||
1860                             connp->conn_allzones ||
1861                             ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1862                             (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1863                             (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1864                                 break;
1865                 }
1866 
1867                 if (connp != NULL) {
1868                         cmn_err(CE_NOTE, "ipclassifier.c: half-bound bind listener");
1869                         /* Have a listener at least */
1870                         CONN_INC_REF(connp);
1871                         mutex_exit(&bind_connfp->connf_lock);
1872                         return (connp);
1873                 }
1874 
1875                 mutex_exit(&bind_connfp->connf_lock);
1876                 break;
1877 
1878         case IPPROTO_ENCAP:
1879         case IPPROTO_IPV6:
1880                 return (ipcl_iptun_classify_v4(&ipha->ipha_src,
1881                     &ipha->ipha_dst, ipst));
1882         }
1883 
1884         return (NULL);
1885 }
1886 
1887 conn_t *
1888 ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1889     ip_recv_attr_t *ira, ip_stack_t *ipst)
1890 {
1891         ip6_t           *ip6h;
1892         connf_t         *connfp, *bind_connfp;
1893         uint16_t        lport;
1894         uint16_t        fport;
1895         tcpha_t         *tcpha;
1896         uint32_t        ports;
1897         conn_t          *connp;


2378         itc_t   *itc = (itc_t *)buf;
2379         conn_t  *connp = &itc->itc_conn;
2380         rts_t   *rts = (rts_t *)&itc[1];
2381 
2382         ASSERT(connp->conn_flags & IPCL_RTSCONN);
2383         ASSERT(rts->rts_connp == connp);
2384         ASSERT(connp->conn_rts == rts);
2385         mutex_destroy(&connp->conn_lock);
2386         cv_destroy(&connp->conn_cv);
2387         rw_destroy(&connp->conn_ilg_lock);
2388 
2389         /* Can be NULL if constructor failed */
2390         if (connp->conn_ixa != NULL) {
2391                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2392                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2393                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2394                 ixa_refrele(connp->conn_ixa);
2395         }
2396 }
2397 
2398 /* ARGSUSED */
2399 static int
2400 dccp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2401 {
2402         itc_t   *itc = (itc_t *)buf;
2403         conn_t  *connp = &itc->itc_conn;
2404         dccp_t  *dccp = (dccp_t *)&itc[1];
2405 
2406         bzero(connp, sizeof (conn_t));
2407         bzero(dccp, sizeof (dccp_t));
2408 
2409         mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2410         cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2411         rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2412 
2413         dccp->dccp_timercache = dccp_timermp_alloc(kmflags);
2414         if (dccp->dccp_timercache == NULL) {
2415                 return (ENOMEM);
2416         }
2417 
2418         connp->conn_dccp = dccp;
2419         connp->conn_flags = IPCL_DCCPCONN;
2420         connp->conn_proto = IPPROTO_DCCP;
2421         dccp->dccp_connp = connp;
2422 
2423         connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2424         if (connp->conn_ixa == NULL) {
2425                 return (NULL);
2426         }
2427 
2428         connp->conn_ixa->ixa_refcnt = 1;
2429         connp->conn_ixa->ixa_protocol = connp->conn_proto;
2430         connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2431 
2432         return (0);
2433 }
2434 
2435 /* ARGSUSED */
2436 static void
2437 dccp_conn_destructor(void *buf, void *cdrarg)
2438 {
2439         itc_t   *itc = (itc_t *)buf;
2440         conn_t  *connp = &itc->itc_conn;
2441         dccp_t  *dccp = (dccp_t *)&itc[1];
2442 
2443         ASSERT(connp->conn_flags & IPCL_DCCPCONN);
2444         ASSERT(dccp->dccp_connp == connp);
2445         ASSERT(connp->conn_dccp == dccp);
2446 
2447         dccp_timermp_free(dccp);
2448 
2449         mutex_destroy(&connp->conn_lock);
2450         cv_destroy(&connp->conn_cv);
2451         rw_destroy(&connp->conn_ilg_lock);
2452 
2453         if (connp->conn_ixa != NULL) {
2454                 ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2455                 ASSERT(connp->conn_ixa->ixa_ire == NULL);
2456                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
2457 
2458                 ixa_refrele(connp->conn_ixa);
2459         }
2460 }
2461 
2462 /*
2463  * Called as part of ipcl_conn_destroy to assert and clear any pointers
2464  * in the conn_t.
2465  *
2466  * Below we list all the pointers in the conn_t as a documentation aid.
2467  * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
2468  * If you add any pointers to the conn_t please add an ASSERT here
2469  * and #ifdef it out if it can't be actually asserted to be NULL.
2470  * In any case, we bzero most of the conn_t at the end of the function.
2471  */
2472 void
2473 ipcl_conn_cleanup(conn_t *connp)
2474 {
2475         ip_xmit_attr_t  *ixa;
2476 
2477         ASSERT(connp->conn_latch == NULL);
2478         ASSERT(connp->conn_latch_in_policy == NULL);
2479         ASSERT(connp->conn_latch_in_action == NULL);
2480 #ifdef notdef
2481         ASSERT(connp->conn_rq == NULL);