Print this page
dccp: conn_t

@@ -65,10 +65,12 @@
  *      ipcl_proto_fanout:      IPv4 protocol fanout
  *      ipcl_proto_fanout_v6:   IPv6 protocol fanout
  *      ipcl_udp_fanout:        contains all UDP connections
  *      ipcl_iptun_fanout:      contains all IP tunnel connections
  *      ipcl_globalhash_fanout: contains all connections
+ *`     ipcl_dccp_conn_fanout:  contains all DCCP connections in CONNECTED state
+ *      ipcl_dccp_bind_fanout:  contains all DCCP connections in BOUND state
  *
  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
  * which need to view all existing connections.
  *
  * All tables are protected by per-bucket locks. When both per-bucket lock and

@@ -221,10 +223,11 @@
  *              IPCL_TCPCONN    indicates a TCP connection
  *              IPCL_SCTPCONN   indicates a SCTP connection
  *              IPCL_UDPCONN    indicates a UDP conn_t.
  *              IPCL_RAWIPCONN  indicates a RAWIP/ICMP conn_t.
  *              IPCL_RTSCONN    indicates a RTS conn_t.
+ *              IPCL_DCCPCONN   indicates a DCCP conn_t.
  *              IPCL_IPCCONN    indicates all other connections.
  *
  * void ipcl_conn_destroy(connp)
  *
  *      Destroys the connection state, removes it from the global

@@ -255,10 +258,11 @@
 #include <inet/ip_ire.h>
 #include <inet/ip6.h>
 #include <inet/ip_ndp.h>
 #include <inet/ip_impl.h>
 #include <inet/udp_impl.h>
+#include <inet/dccp_impl.h>
 #include <inet/sctp_ip.h>
 #include <inet/sctp/sctp_impl.h>
 #include <inet/rawip_impl.h>
 #include <inet/rts_impl.h>
 #include <inet/iptun/iptun_impl.h>

@@ -282,10 +286,14 @@
 
 /* bind/udp fanout table size */
 uint_t ipcl_bind_fanout_size = 512;
 uint_t ipcl_udp_fanout_size = 16384;
 
+/* Fanout table sizes for dccp */
+uint_t ipcl_dccp_conn_fanout_size = 512;
+uint_t ipcl_dccp_bind_fanout_size = 512;
+
 /* Raw socket fanout size.  Must be a power of 2. */
 uint_t ipcl_raw_fanout_size = 256;
 
 /*
  * The IPCL_IPTUN_HASH() function works best with a prime table size.  We

@@ -317,10 +325,11 @@
 struct kmem_cache  *ip_conn_cache;
 extern struct kmem_cache  *sctp_conn_cache;
 struct kmem_cache  *udp_conn_cache;
 struct kmem_cache  *rawip_conn_cache;
 struct kmem_cache  *rts_conn_cache;
+struct kmem_cache  *dccp_conn_cache;
 
 extern void     tcp_timermp_free(tcp_t *);
 extern mblk_t   *tcp_timermp_alloc(int);
 
 static int      ip_conn_constructor(void *, void *, int);

@@ -336,10 +345,13 @@
 static void     rawip_conn_destructor(void *, void *);
 
 static int      rts_conn_constructor(void *, void *, int);
 static void     rts_conn_destructor(void *, void *);
 
+static int      dccp_conn_constructor(void *, void *, int);
+static void     dccp_conn_destructor(void *, void *);
+
 /*
  * Global (for all stack instances) init routine
  */
 void
 ipcl_g_init(void)

@@ -366,10 +378,16 @@
 
         rts_conn_cache = kmem_cache_create("rts_conn_cache",
             sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
             rts_conn_constructor, rts_conn_destructor,
             NULL, NULL, NULL, 0);
+
+        /* XXX:DCCP reclaim */
+        dccp_conn_cache = kmem_cache_create("dccp_conn_cache",
+            sizeof (itc_t) + sizeof (dccp_t), CACHE_ALIGN_SIZE,
+            dccp_conn_constructor, dccp_conn_destructor,
+            NULL, NULL, NULL, 0);
 }
 
 /*
  * ipclassifier intialization routine, sets up hash tables.
  */

@@ -408,10 +426,12 @@
                 ipst->ips_ipcl_conn_fanout_size = sizes[16];
         }
 
         /* Take values from /etc/system */
         ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
+        ipst->ips_ipcl_dccp_conn_fanout_size = ipcl_dccp_conn_fanout_size;
+        ipst->ips_ipcl_dccp_bind_fanout_size = ipcl_dccp_bind_fanout_size;
         ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
         ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
         ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
 
         ASSERT(ipst->ips_ipcl_conn_fanout == NULL);

@@ -475,20 +495,35 @@
             sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
         for (i = 0; i < CONN_G_HASH_SIZE; i++) {
                 mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
                     NULL, MUTEX_DEFAULT, NULL);
         }
+
+        ipst->ips_ipcl_dccp_conn_fanout = kmem_zalloc(
+            ipst->ips_ipcl_dccp_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
+        for (i = 0; i < ipst->ips_ipcl_dccp_conn_fanout_size; i++) {
+                mutex_init(&ipst->ips_ipcl_dccp_conn_fanout[i].connf_lock, NULL,
+                    MUTEX_DEFAULT, NULL);
+        }
+
+        ipst->ips_ipcl_dccp_bind_fanout = kmem_zalloc(
+            ipst->ips_ipcl_dccp_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
+        for (i = 0; i < ipst->ips_ipcl_dccp_bind_fanout_size; i++) {
+                mutex_init(&ipst->ips_ipcl_dccp_bind_fanout[i].connf_lock, NULL,
+                    MUTEX_DEFAULT, NULL);
+        }
 }
 
 void
 ipcl_g_destroy(void)
 {
         kmem_cache_destroy(ip_conn_cache);
         kmem_cache_destroy(tcp_conn_cache);
         kmem_cache_destroy(udp_conn_cache);
         kmem_cache_destroy(rawip_conn_cache);
         kmem_cache_destroy(rts_conn_cache);
+        kmem_cache_destroy(dccp_conn_cache);
 }
 
 /*
  * All user-level and kernel use of the stack must be gone
  * by now.

@@ -560,10 +595,26 @@
         }
         kmem_free(ipst->ips_ipcl_globalhash_fanout,
             sizeof (connf_t) * CONN_G_HASH_SIZE);
         ipst->ips_ipcl_globalhash_fanout = NULL;
 
+        for (i = 0; i < ipst->ips_ipcl_dccp_conn_fanout_size; i++) {
+                ASSERT(ipst->ips_ipcl_dccp_conn_fanout[i].connf_head == NULL);
+                mutex_destroy(&ipst->ips_ipcl_dccp_conn_fanout[i].connf_lock);
+        }
+        kmem_free(ipst->ips_ipcl_dccp_conn_fanout,
+            ipst->ips_ipcl_dccp_conn_fanout_size * sizeof (connf_t));
+        ipst->ips_ipcl_dccp_conn_fanout = NULL;
+
+        for (i = 0; i < ipst->ips_ipcl_dccp_bind_fanout_size; i++) {
+                ASSERT(ipst->ips_ipcl_dccp_bind_fanout[i].connf_head == NULL);
+                mutex_destroy(&ipst->ips_ipcl_dccp_bind_fanout[i].connf_lock);
+        }
+        kmem_free(ipst->ips_ipcl_dccp_bind_fanout,
+            ipst->ips_ipcl_dccp_bind_fanout_size * sizeof (connf_t));
+        ipst->ips_ipcl_dccp_bind_fanout = NULL;
+
         ASSERT(ipst->ips_rts_clients->connf_head == NULL);
         mutex_destroy(&ipst->ips_rts_clients->connf_lock);
         kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
         ipst->ips_rts_clients = NULL;
 }

@@ -608,10 +659,14 @@
 
         case IPCL_IPCCONN:
                 conn_cache = ip_conn_cache;
                 break;
 
+        case IPCL_DCCPCONN:
+                conn_cache = dccp_conn_cache;
+                break;
+
         default:
                 connp = NULL;
                 ASSERT(0);
         }
 

@@ -718,10 +773,37 @@
                 ASSERT(ns != NULL);
                 sctp_free(connp);
                 return;
         }
 
+        if (connp->conn_flags & IPCL_DCCPCONN) {
+                dccp_t  *dccp = connp->conn_dccp;
+
+                cmn_err(CE_NOTE, "ipclassifier: conn_flags DCCP cache_free");
+
+                dccp_free(dccp);
+                mp = dccp->dccp_timercache;
+
+                dccp->dccp_dccps = NULL;
+
+                ipcl_conn_cleanup(connp);
+                connp->conn_flags = IPCL_DCCPCONN;
+                if (ns != NULL) {
+                        ASSERT(dccp->dccps == NULL);
+                        connp->conn_netstack = NULL;
+                        connp->conn_ixa->ixa_ipst = NULL;
+                        netstack_rele(ns);
+                }
+
+                bzero(dccp, sizeof (dccp_t));
+
+                dccp->dccp_timercache = mp;
+                dccp->dccp_connp = connp;
+                kmem_cache_free(dccp_conn_cache, connp);
+                return;
+        }
+
         ipcl_conn_cleanup(connp);
         if (ns != NULL) {
                 connp->conn_netstack = NULL;
                 connp->conn_ixa->ixa_ipst = NULL;
                 netstack_rele(ns);

@@ -1234,12 +1316,25 @@
                 break;
 
         case IPPROTO_SCTP:
                 ret = ipcl_sctp_hash_insert(connp, lport);
                 break;
+
+        case IPPROTO_DCCP:
+                cmn_err(CE_NOTE, "ipclassifier.c: ipcl_bind_insert_v4");
+                ASSERT(connp->conn_zoneid != ALL_ZONES);
+                connfp = &ipst->ips_ipcl_dccp_bind_fanout[
+                    IPCL_DCCP_BIND_HASH(lport, ipst)];
+                if (connp->conn_laddr_v4 != INADDR_ANY) {
+                        IPCL_HASH_INSERT_BOUND(connfp, connp);
+                } else {
+                        IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+                }
+                break;
         }
 
+
         return (ret);
 }
 
 int
 ipcl_bind_insert_v6(conn_t *connp)

@@ -1307,10 +1402,22 @@
                 break;
 
         case IPPROTO_SCTP:
                 ret = ipcl_sctp_hash_insert(connp, lport);
                 break;
+
+        case IPPROTO_DCCP:
+                cmn_err(CE_NOTE, "ipclassifier.c: ipcl_bind_insert_v6");
+                ASSERT(connp->conn_zoneid != ALL_ZONES);
+                connfp = &ipst->ips_ipcl_dccp_bind_fanout[
+                    IPCL_DCCP_BIND_HASH(lport, ipst)];
+                if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
+                        IPCL_HASH_INSERT_BOUND(connfp, connp);
+                } else {
+                        IPCL_HASH_INSERT_WILDCARD(connfp, connp);
+                }
+                break;
         }
 
         return (ret);
 }
 

@@ -1391,10 +1498,36 @@
                  */
                 IPCL_HASH_REMOVE(connp);
                 ret = ipcl_sctp_hash_insert(connp, lport);
                 break;
 
+        case IPPROTO_DCCP:
+                cmn_err(CE_NOTE, "ipclassifier.c: ipcl_conn_insert_v4");
+                connfp = &ipst->ips_ipcl_dccp_conn_fanout[IPCL_DCCP_CONN_HASH(
+                    connp->conn_faddr_v4, connp->conn_ports, ipst)];
+                mutex_enter(&connfp->connf_lock);
+                for (tconnp = connfp->connf_head; tconnp != NULL;
+                    tconnp = tconnp->conn_next) {
+                        if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
+                            connp->conn_faddr_v4, connp->conn_laddr_v4,
+                            connp->conn_ports) &&
+                            IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
+                                /* Already have a conn. bail out */
+                                mutex_exit(&connfp->connf_lock);
+                                return (EADDRINUSE);
+                        }
+                }
+
+                /* XXX:DCCP XTI/TLI application? */
+
+                ASSERT(connp->conn_recv != NULL);
+                ASSERT(connp->conn_recvicmp != NULL);
+
+                IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
+                mutex_exit(&connfp->connf_lock);
+                break;
+
         default:
                 /*
                  * Check for conflicts among MAC exempt bindings.  For
                  * transports with port numbers, this is done by the upper
                  * level per-transport binding logic.  For all others, it's

@@ -1486,10 +1619,36 @@
         case IPPROTO_SCTP:
                 IPCL_HASH_REMOVE(connp);
                 ret = ipcl_sctp_hash_insert(connp, lport);
                 break;
 
+        case IPPROTO_DCCP:
+                cmn_err(CE_NOTE, "ipclassifier.c: ipcl_conn_insert_v6");
+                connfp = &ipst->ips_ipcl_dccp_conn_fanout[
+                    IPCL_DCCP_CONN_HASH_V6(connp->conn_faddr_v6,
+                    connp->conn_ports, ipst)];
+                mutex_enter(&connfp->connf_lock);
+                for (tconnp = connfp->connf_head; tconnp != NULL;
+                    tconnp = tconnp->conn_next) {
+                        /* NOTE: need to match zoneid. Bug in onnv-gate */
+                        if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
+                            connp->conn_faddr_v6, connp->conn_laddr_v6,
+                            connp->conn_ports) &&
+                            (tconnp->conn_bound_if == 0 ||
+                            tconnp->conn_bound_if == ifindex) &&
+                            IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
+                                /* Already have a conn. bail out */
+                                mutex_exit(&connfp->connf_lock);
+                                return (EADDRINUSE);
+                        }
+                }
+
+                /* XXX:DCCP XTI/TLI? */
+                IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
+                mutex_exit(&connfp->connf_lock);
+                break;
+
         default:
                 if (is_system_labeled() &&
                     check_exempt_conflict_v6(connp, ipst))
                         return (EADDRINUSE);
                 /* FALLTHROUGH */

@@ -1653,10 +1812,71 @@
                  */
                 mutex_exit(&connfp->connf_lock);
 
                 break;
 
+        case IPPROTO_DCCP:
+                ports = *(uint32_t *)up;
+
+                /*
+                 * Search for fully-bound connection.
+                 */
+                connfp = &ipst->ips_ipcl_dccp_conn_fanout[IPCL_DCCP_CONN_HASH(
+                    ipha->ipha_src, ports, ipst)];
+                mutex_enter(&connfp->connf_lock);
+                for (connp = connfp->connf_head; connp != NULL;
+                    connp = connp->conn_next) {
+                        /* XXX:DCCP */
+                        if (IPCL_CONN_MATCH(connp, protocol,
+                            ipha->ipha_src, ipha->ipha_dst, ports)) {
+                                /* XXX */
+                                cmn_err(CE_NOTE, "ipclassifier.c: fully bound connection found");
+                                break;
+                        }
+                }
+
+                if (connp != NULL) {
+                        /*
+                         * We have a fully-bound DCCP connection.
+                         */
+                        CONN_INC_REF(connp);
+                        mutex_exit(&connfp->connf_lock);
+                        return (connp);
+                }
+
+                mutex_exit(&connfp->connf_lock);
+                lport = up[1];
+
+                /*
+                 * Fully-bound connection was not found, search for listener.
+                 */
+                bind_connfp = &ipst->ips_ipcl_dccp_bind_fanout[
+                    IPCL_DCCP_BIND_HASH(lport, ipst)];
+                mutex_enter(&bind_connfp->connf_lock);
+                for (connp = bind_connfp->connf_head; connp != NULL;
+                    connp = connp->conn_next) {
+                        if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
+                            lport) &&
+                            (connp->conn_zoneid == zoneid ||
+                            connp->conn_allzones ||
+                            ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
+                            (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
+                            (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
+                                break;
+                }
+
+                if (connp != NULL) {
+                        cmn_err(CE_NOTE, "ipclassifier.c: half-bound bind listener");
+                        /* Have a listener at least */
+                        CONN_INC_REF(connp);
+                        mutex_exit(&bind_connfp->connf_lock);
+                        return (connp);
+                }
+
+                mutex_exit(&bind_connfp->connf_lock);
+                break;
+
         case IPPROTO_ENCAP:
         case IPPROTO_IPV6:
                 return (ipcl_iptun_classify_v4(&ipha->ipha_src,
                     &ipha->ipha_dst, ipst));
         }

@@ -2173,10 +2393,74 @@
                 ASSERT(connp->conn_ixa->ixa_nce == NULL);
                 ixa_refrele(connp->conn_ixa);
         }
 }
 
+/* ARGSUSED */
+static int
+dccp_conn_constructor(void *buf, void *cdrarg, int kmflags)
+{
+        itc_t   *itc = (itc_t *)buf;
+        conn_t  *connp = &itc->itc_conn;
+        dccp_t  *dccp = (dccp_t *)&itc[1];
+
+        bzero(connp, sizeof (conn_t));
+        bzero(dccp, sizeof (dccp_t));
+
+        mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
+        cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
+        rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
+
+        dccp->dccp_timercache = dccp_timermp_alloc(kmflags);
+        if (dccp->dccp_timercache == NULL) {
+                return (ENOMEM);
+        }
+
+        connp->conn_dccp = dccp;
+        connp->conn_flags = IPCL_DCCPCONN;
+        connp->conn_proto = IPPROTO_DCCP;
+        dccp->dccp_connp = connp;
+
+        connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
+        if (connp->conn_ixa == NULL) {
+                return (NULL);
+        }
+
+        connp->conn_ixa->ixa_refcnt = 1;
+        connp->conn_ixa->ixa_protocol = connp->conn_proto;
+        connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
+
+        return (0);
+}
+
+/* ARGSUSED */
+static void
+dccp_conn_destructor(void *buf, void *cdrarg)
+{
+        itc_t   *itc = (itc_t *)buf;
+        conn_t  *connp = &itc->itc_conn;
+        dccp_t  *dccp = (dccp_t *)&itc[1];
+
+        ASSERT(connp->conn_flags & IPCL_DCCPCONN);
+        ASSERT(dccp->dccp_connp == connp);
+        ASSERT(connp->conn_dccp == dccp);
+
+        dccp_timermp_free(dccp);
+
+        mutex_destroy(&connp->conn_lock);
+        cv_destroy(&connp->conn_cv);
+        rw_destroy(&connp->conn_ilg_lock);
+
+        if (connp->conn_ixa != NULL) {
+                ASSERT(connp->conn_ixa->ixa_refcnt == 1);
+                ASSERT(connp->conn_ixa->ixa_ire == NULL);
+                ASSERT(connp->conn_ixa->ixa_nce == NULL);
+
+                ixa_refrele(connp->conn_ixa);
+        }
+}
+
 /*
  * Called as part of ipcl_conn_destroy to assert and clear any pointers
  * in the conn_t.
  *
  * Below we list all the pointers in the conn_t as a documentation aid.

@@ -2552,10 +2836,86 @@
         mutex_exit(&connfp->connf_lock);
         return (NULL);
 }
 
 /*
+ * Same as ipcl_tcp_lookup_reversed_ipv4.
+ */
+conn_t *
+ipcl_dccp_lookup_reversed_ipv4(ipha_t *ipha, dccpha_t *dccpha, int min_state,
+    ip_stack_t *ipst)
+{
+        conn_t          *tconnp;
+        connf_t         *connfp;
+        uint16_t        *pports;
+        uint32_t        ports;
+
+        pports = (uint16_t *)&ports;
+        pports[0] = dccpha->dha_fport;
+        pports[1] = dccpha->dha_lport;
+
+        connfp = &ipst->ips_ipcl_dccp_conn_fanout[IPCL_DCCP_CONN_HASH(
+            ipha->ipha_dst, ports, ipst)];
+
+        mutex_enter(&connfp->connf_lock);
+        for (tconnp = connfp->connf_head; tconnp != NULL;
+            tconnp = tconnp->conn_next) {
+                if (IPCL_CONN_MATCH(tconnp, IPPROTO_DCCP,
+                    ipha->ipha_dst, ipha->ipha_src, ports) &&
+                    tconnp->conn_dccp->dccp_state >= min_state) {
+                        CONN_INC_REF(tconnp);
+                        mutex_exit(&connfp->connf_lock);
+                        return (tconnp);
+                }
+        }
+        mutex_exit(&connfp->connf_lock);
+
+        return (NULL);
+}
+
+/*
+ * Same as ipcl_tcp_lookup_reversed_ipv6.
+ */
+conn_t *
+ipcl_dccp_lookup_reversed_ipv6(ip6_t *ip6h, dccpha_t *dccpha, int min_state,
+    uint_t ifindex, ip_stack_t *ipst)
+{
+        conn_t          *tconnp;
+        tcp_t           *tcp;
+        connf_t         *connfp;
+        uint32_t         ports;
+        uint16_t         *pports;
+
+        pports = (uint16_t *)&ports;
+        pports[0] = dccpha->dha_fport;
+        pports[1] = dccpha->dha_lport;
+/*
+        connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
+            ports, ipst)];
+
+        mutex_enter(&connfp->connf_lock);
+        for (tconnp = connfp->connf_head; tconnp != NULL;
+            tconnp = tconnp->conn_next) {
+
+                tcp = tconnp->conn_tcp;
+                if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
+                    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
+                    tcp->tcp_state >= min_state &&
+                    (tconnp->conn_bound_if == 0 ||
+                    tconnp->conn_bound_if == ifindex)) {
+
+                        CONN_INC_REF(tconnp);
+                        mutex_exit(&connfp->connf_lock);
+                        return (tconnp);
+                }
+        }
+        mutex_exit(&connfp->connf_lock);
+*/
+        return (NULL);
+}
+
+/*
  * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
  * a listener when changing state.
  */
 conn_t *
 ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,