1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #ifndef _INET_IPCLASSIFIER_H
  27 #define _INET_IPCLASSIFIER_H
  28 
  29 #ifdef  __cplusplus
  30 extern "C" {
  31 #endif
  32 
  33 #include <inet/common.h>
  34 #include <inet/ip.h>
  35 #include <inet/mi.h>
  36 #include <inet/tcp.h>
  37 #include <inet/dccp.h>
  38 #include <inet/ip6.h>
  39 #include <netinet/in.h>           /* for IPPROTO_* constants */
  40 #include <sys/sdt.h>
  41 #include <sys/socket_proto.h>
  42 #include <sys/sunddi.h>
  43 #include <sys/sunldi.h>
  44 
  45 typedef void (*edesc_rpf)(void *, mblk_t *, void *, ip_recv_attr_t *);
  46 struct icmph_s;
  47 struct icmp6_hdr;
  48 typedef boolean_t (*edesc_vpf)(conn_t *, void *, struct icmph_s *,
  49     struct icmp6_hdr *, ip_recv_attr_t *);
  50 
  51 /*
  52  * ==============================
  53  * =    The CONNECTION          =
  54  * ==============================
  55  */
  56 
  57 /*
  58  * The connection structure contains the common information/flags/ref needed.
  59  * Implementation will keep the connection struct, the layers (with their
  60  * respective data for event i.e. tcp_t if event was tcp_input_data) all in one
  61  * contiguous memory location.
  62  */
  63 
  64 /* Conn Flags */
  65 /* Unused                       0x00020000 */
  66 /* Unused                       0x00040000 */
  67 #define IPCL_FULLY_BOUND        0x00080000      /* Bound to correct squeue */
  68 /* Unused                       0x00100000 */
  69 /* Unused                       0x00200000 */
  70 /* Unused                       0x00400000 */
  71 #define IPCL_CL_LISTENER        0x00800000      /* Cluster listener */
  72 /* Unused                       0x01000000 */
  73 /* Unused                       0x02000000 */
  74 /* Unused                       0x04000000 */
  75 /* Unused                       0x08000000 */
  76 /* Unused                       0x10000000 */
  77 /* Unused                       0x20000000 */
  78 #define IPCL_CONNECTED          0x40000000      /* Conn in connected table */
  79 #define IPCL_BOUND              0x80000000      /* Conn in bind table */
  80 
  81 /* Flags identifying the type of conn */
  82 #define IPCL_TCPCONN            0x00000001      /* From tcp_conn_cache */
  83 #define IPCL_SCTPCONN           0x00000002      /* From sctp_conn_cache */
  84 #define IPCL_IPCCONN            0x00000004      /* From ip_conn_cache */
  85 #define IPCL_UDPCONN            0x00000008      /* From udp_conn_cache */
  86 #define IPCL_RAWIPCONN          0x00000010      /* From rawip_conn_cache */
  87 #define IPCL_RTSCONN            0x00000020      /* From rts_conn_cache */
  88 #define IPCL_DCCPCONN           0x00000040      /* From dccp_conn_cache */
  89 #define IPCL_IPTUN              0x00000080      /* iptun module above us */
  90 
  91 #define IPCL_NONSTR             0x00001000      /* A non-STREAMS socket */
  92 /* Unused                       0x10000000 */
  93 
  94 #define IPCL_REMOVED            0x00000100
  95 #define IPCL_REUSED             0x00000200
  96 
  97 #define IPCL_IS_CONNECTED(connp)                                        \
  98         ((connp)->conn_flags & IPCL_CONNECTED)
  99 
 100 #define IPCL_IS_BOUND(connp)                                            \
 101         ((connp)->conn_flags & IPCL_BOUND)
 102 
 103 /*
 104  * Can't use conn_proto since we need to tell difference
 105  * between a real TCP socket and a SOCK_RAW, IPPROTO_TCP.
 106  */
 107 #define IPCL_IS_TCP(connp)                                              \
 108         ((connp)->conn_flags & IPCL_TCPCONN)
 109 
 110 #define IPCL_IS_SCTP(connp)                                             \
 111         ((connp)->conn_flags & IPCL_SCTPCONN)
 112 
 113 #define IPCL_IS_UDP(connp)                                              \
 114         ((connp)->conn_flags & IPCL_UDPCONN)
 115 
 116 #define IPCL_IS_RAWIP(connp)                                            \
 117         ((connp)->conn_flags & IPCL_RAWIPCONN)
 118 
 119 #define IPCL_IS_RTS(connp)                                              \
 120         ((connp)->conn_flags & IPCL_RTSCONN)
 121 
 122 #define IPCL_IS_IPTUN(connp)                                            \
 123         ((connp)->conn_flags & IPCL_IPTUN)
 124 
 125 #define IPCL_IS_DCCP(connp)                                             \
 126         ((connp)->conn_flags & IPCL_DCCPCONN)
 127 
 128 #define IPCL_IS_NONSTR(connp)   ((connp)->conn_flags & IPCL_NONSTR)
 129 
 130 typedef struct connf_s connf_t;
 131 
 132 typedef struct
 133 {
 134         int     ctb_depth;
 135 #define CONN_STACK_DEPTH        15
 136         pc_t    ctb_stack[CONN_STACK_DEPTH];
 137 } conn_trace_t;
 138 
 139 typedef struct ip_helper_minor_info_s {
 140         dev_t   ip_minfo_dev;           /* Device */
 141         vmem_t  *ip_minfo_arena;        /* Arena */
 142 } ip_helper_minfo_t;
 143 
 144 /*
 145  * ip helper stream info
 146  */
 147 typedef struct ip_helper_stream_info_s {
 148         ldi_handle_t            iphs_handle;
 149         queue_t                 *iphs_rq;
 150         queue_t                 *iphs_wq;
 151         ip_helper_minfo_t       *iphs_minfo;
 152 } ip_helper_stream_info_t;
 153 
 154 /*
 155  * Mandatory Access Control mode, in conn_t's conn_mac_mode field.
 156  *      CONN_MAC_DEFAULT: strict enforcement of MAC.
 157  *      CONN_MAC_AWARE:   allows communications between unlabeled systems
 158  *                        and privileged daemons
 159  *      CONN_MAC_IMPLICIT: allows communications without explicit labels
 160  *                         on the wire with privileged daemons.
 161  *
 162  * CONN_MAC_IMPLICIT is intended specifically for labeled IPsec key management
 163  * in networks which don't pass CIPSO-labeled packets.
 164  */
 165 #define CONN_MAC_DEFAULT 0
 166 #define CONN_MAC_AWARE 1
 167 #define CONN_MAC_IMPLICIT 2
 168 
 169 /*
 170  * conn receive ancillary definition.
 171  *
 172  * These are the set of socket options that make the receive side
 173  * potentially pass up ancillary data items.
 174  * We have a union with an integer so that we can quickly check whether
 175  * any ancillary data items need to be added.
 176  */
 177 typedef struct crb_s {
 178         union {
 179                 uint32_t        crbu_all;
 180                 struct {
 181                         uint32_t
 182         crbb_recvdstaddr : 1,           /* IP_RECVDSTADDR option */
 183         crbb_recvopts : 1,              /* IP_RECVOPTS option */
 184         crbb_recvif : 1,                /* IP_RECVIF option */
 185         crbb_recvslla : 1,              /* IP_RECVSLLA option */
 186 
 187         crbb_recvttl : 1,               /* IP_RECVTTL option */
 188         crbb_ip_recvpktinfo : 1,        /* IP*_RECVPKTINFO option  */
 189         crbb_ipv6_recvhoplimit : 1,     /* IPV6_RECVHOPLIMIT option */
 190         crbb_ipv6_recvhopopts : 1,      /* IPV6_RECVHOPOPTS option */
 191 
 192         crbb_ipv6_recvdstopts : 1,      /* IPV6_RECVDSTOPTS option */
 193         crbb_ipv6_recvrthdr : 1,        /* IPV6_RECVRTHDR option */
 194         crbb_old_ipv6_recvdstopts : 1,  /* old form of IPV6_DSTOPTS */
 195         crbb_ipv6_recvrthdrdstopts : 1, /* IPV6_RECVRTHDRDSTOPTS */
 196 
 197         crbb_ipv6_recvtclass : 1,       /* IPV6_RECVTCLASS */
 198         crbb_recvucred : 1,             /* IP_RECVUCRED option */
 199         crbb_timestamp : 1;             /* SO_TIMESTAMP "socket" option */
 200 
 201                 } crbb;
 202         } crbu;
 203 } crb_t;
 204 
 205 #define crb_all                         crbu.crbu_all
 206 #define crb_recvdstaddr                 crbu.crbb.crbb_recvdstaddr
 207 #define crb_recvopts                    crbu.crbb.crbb_recvopts
 208 #define crb_recvif                      crbu.crbb.crbb_recvif
 209 #define crb_recvslla                    crbu.crbb.crbb_recvslla
 210 #define crb_recvttl                     crbu.crbb.crbb_recvttl
 211 #define crb_ip_recvpktinfo              crbu.crbb.crbb_ip_recvpktinfo
 212 #define crb_ipv6_recvhoplimit           crbu.crbb.crbb_ipv6_recvhoplimit
 213 #define crb_ipv6_recvhopopts            crbu.crbb.crbb_ipv6_recvhopopts
 214 #define crb_ipv6_recvdstopts            crbu.crbb.crbb_ipv6_recvdstopts
 215 #define crb_ipv6_recvrthdr              crbu.crbb.crbb_ipv6_recvrthdr
 216 #define crb_old_ipv6_recvdstopts        crbu.crbb.crbb_old_ipv6_recvdstopts
 217 #define crb_ipv6_recvrthdrdstopts       crbu.crbb.crbb_ipv6_recvrthdrdstopts
 218 #define crb_ipv6_recvtclass             crbu.crbb.crbb_ipv6_recvtclass
 219 #define crb_recvucred                   crbu.crbb.crbb_recvucred
 220 #define crb_timestamp                   crbu.crbb.crbb_timestamp
 221 
 222 /*
 223  * The initial fields in the conn_t are setup by the kmem_cache constructor,
 224  * and are preserved when it is freed. Fields after that are bzero'ed when
 225  * the conn_t is freed.
 226  *
 227  * Much of the conn_t is protected by conn_lock.
 228  *
 229  * conn_lock is also used by some ULPs (like UDP and RAWIP) to protect
 230  * their state.
 231  */
 232 struct conn_s {
 233         kmutex_t        conn_lock;
 234         uint32_t        conn_ref;               /* Reference counter */
 235         uint32_t        conn_flags;             /* Conn Flags */
 236 
 237         union {
 238                 tcp_t           *cp_tcp;        /* Pointer to the tcp struct */
 239                 struct udp_s    *cp_udp;        /* Pointer to the udp struct */
 240                 struct icmp_s   *cp_icmp;       /* Pointer to rawip struct */
 241                 struct rts_s    *cp_rts;        /* Pointer to rts struct */
 242                 struct iptun_s  *cp_iptun;      /* Pointer to iptun_t */
 243                 struct sctp_s   *cp_sctp;       /* For IPCL_SCTPCONN */
 244                 struct dccp_s   *cp_dccp;       /* Pointer to dccp struct */
 245                 void            *cp_priv;
 246         } conn_proto_priv;
 247 #define conn_tcp        conn_proto_priv.cp_tcp
 248 #define conn_udp        conn_proto_priv.cp_udp
 249 #define conn_icmp       conn_proto_priv.cp_icmp
 250 #define conn_rts        conn_proto_priv.cp_rts
 251 #define conn_iptun      conn_proto_priv.cp_iptun
 252 #define conn_sctp       conn_proto_priv.cp_sctp
 253 #define conn_dccp       conn_proto_priv.cp_dccp
 254 #define conn_priv       conn_proto_priv.cp_priv
 255 
 256         kcondvar_t      conn_cv;
 257         uint8_t         conn_proto;             /* protocol type */
 258 
 259         edesc_rpf       conn_recv;              /* Pointer to recv routine */
 260         edesc_rpf       conn_recvicmp;          /* For ICMP error */
 261         edesc_vpf       conn_verifyicmp;        /* Verify ICMP error */
 262 
 263         ip_xmit_attr_t  *conn_ixa;              /* Options if no ancil data */
 264 
 265         /* Fields after this are bzero'ed when the conn_t is freed. */
 266 #define conn_start_clr  conn_recv_ancillary
 267 
 268         /* Options for receive-side ancillary data */
 269         crb_t           conn_recv_ancillary;
 270 
 271         squeue_t        *conn_sqp;              /* Squeue for processing */
 272         uint_t          conn_state_flags;       /* IP state flags */
 273 
 274         int             conn_lingertime;        /* linger time (in seconds) */
 275 
 276         unsigned int
 277                 conn_on_sqp : 1,                /* Conn is being processed */
 278                 conn_linger : 1,                /* SO_LINGER state */
 279                 conn_useloopback : 1,           /* SO_USELOOPBACK state */
 280                 conn_broadcast : 1,             /* SO_BROADCAST state */
 281 
 282                 conn_reuseaddr : 1,             /* SO_REUSEADDR state */
 283                 conn_keepalive : 1,             /* SO_KEEPALIVE state */
 284                 conn_multi_router : 1,          /* Wants all multicast pkts */
 285                 conn_unspec_src : 1,            /* IP_UNSPEC_SRC */
 286 
 287                 conn_policy_cached : 1,         /* Is policy cached/latched ? */
 288                 conn_in_enforce_policy : 1,     /* Enforce Policy on inbound */
 289                 conn_out_enforce_policy : 1,    /* Enforce Policy on outbound */
 290                 conn_debug : 1,                 /* SO_DEBUG */
 291 
 292                 conn_ipv6_v6only : 1,           /* IPV6_V6ONLY */
 293                 conn_oobinline : 1,             /* SO_OOBINLINE state */
 294                 conn_dgram_errind : 1,          /* SO_DGRAM_ERRIND state */
 295                 conn_exclbind : 1,              /* SO_EXCLBIND state */
 296 
 297                 conn_mdt_ok : 1,                /* MDT is permitted */
 298                 conn_allzones : 1,              /* SO_ALLZONES */
 299                 conn_ipv6_recvpathmtu : 1,      /* IPV6_RECVPATHMTU */
 300                 conn_mcbc_bind : 1,             /* Bound to multi/broadcast */
 301 
 302                 conn_pad_to_bit_31 : 12;
 303 
 304         boolean_t       conn_blocked;           /* conn is flow-controlled */
 305 
 306         squeue_t        *conn_initial_sqp;      /* Squeue at open time */
 307         squeue_t        *conn_final_sqp;        /* Squeue after connect */
 308         ill_t           *conn_dhcpinit_ill;     /* IP_DHCPINIT_IF */
 309         ipsec_latch_t   *conn_latch;            /* latched IDS */
 310         struct ipsec_policy_s   *conn_latch_in_policy; /* latched policy (in) */
 311         struct ipsec_action_s   *conn_latch_in_action; /* latched action (in) */
 312         uint_t          conn_bound_if;          /* IP*_BOUND_IF */
 313         queue_t         *conn_rq;               /* Read queue */
 314         queue_t         *conn_wq;               /* Write queue */
 315         dev_t           conn_dev;               /* Minor number */
 316         vmem_t          *conn_minor_arena;      /* Minor arena */
 317         ip_helper_stream_info_t *conn_helper_info;
 318 
 319         cred_t          *conn_cred;             /* Credentials */
 320         pid_t           conn_cpid;              /* pid from open/connect */
 321         uint64_t        conn_open_time;         /* time when this was opened */
 322 
 323         connf_t         *conn_g_fanout;         /* Global Hash bucket head */
 324         struct conn_s   *conn_g_next;           /* Global Hash chain next */
 325         struct conn_s   *conn_g_prev;           /* Global Hash chain prev */
 326         struct ipsec_policy_head_s *conn_policy; /* Configured policy */
 327         in6_addr_t      conn_bound_addr_v6;     /* Address in bind() */
 328 #define conn_bound_addr_v4      V4_PART_OF_V6(conn_bound_addr_v6)
 329         connf_t         *conn_fanout;           /* Hash bucket we're part of */
 330         struct conn_s   *conn_next;             /* Hash chain next */
 331         struct conn_s   *conn_prev;             /* Hash chain prev */
 332 
 333         struct {
 334                 in6_addr_t connua_laddr;        /* Local address - match */
 335                 in6_addr_t connua_faddr;        /* Remote address */
 336         } connua_v6addr;
 337 #define conn_laddr_v4   V4_PART_OF_V6(connua_v6addr.connua_laddr)
 338 #define conn_faddr_v4   V4_PART_OF_V6(connua_v6addr.connua_faddr)
 339 #define conn_laddr_v6   connua_v6addr.connua_laddr
 340 #define conn_faddr_v6   connua_v6addr.connua_faddr
 341         in6_addr_t      conn_saddr_v6;          /* Local address - source */
 342 #define conn_saddr_v4   V4_PART_OF_V6(conn_saddr_v6)
 343 
 344         union {
 345                 /* Used for classifier match performance */
 346                 uint32_t                connu_ports2;
 347                 struct {
 348                         in_port_t       connu_fport;    /* Remote port */
 349                         in_port_t       connu_lport;    /* Local port */
 350                 } connu_ports;
 351         } u_port;
 352 #define conn_fport      u_port.connu_ports.connu_fport
 353 #define conn_lport      u_port.connu_ports.connu_lport
 354 #define conn_ports      u_port.connu_ports2
 355 
 356         uint_t          conn_incoming_ifindex;  /* IP{,V6}_BOUND_IF, scopeid */
 357         ill_t           *conn_oper_pending_ill; /* pending shared ioctl */
 358 
 359         krwlock_t       conn_ilg_lock;          /* Protects conn_ilg_* */
 360         ilg_t           *conn_ilg;              /* Group memberships */
 361 
 362         kcondvar_t      conn_refcv;             /* For conn_oper_pending_ill */
 363 
 364         struct conn_s   *conn_drain_next;       /* Next conn in drain list */
 365         struct conn_s   *conn_drain_prev;       /* Prev conn in drain list */
 366         idl_t           *conn_idl;              /* Ptr to the drain list head */
 367         mblk_t          *conn_ipsec_opt_mp;     /* ipsec option mblk */
 368         zoneid_t        conn_zoneid;            /* zone connection is in */
 369         int             conn_rtaware;           /* RT_AWARE sockopt value */
 370         kcondvar_t      conn_sq_cv;             /* For non-STREAMS socket IO */
 371         sock_upcalls_t  *conn_upcalls;          /* Upcalls to sockfs */
 372         sock_upper_handle_t conn_upper_handle;  /* Upper handle: sonode * */
 373 
 374         unsigned int
 375                 conn_mlp_type : 2,              /* mlp_type_t; tsol/tndb.h */
 376                 conn_anon_mlp : 1,              /* user wants anon MLP */
 377                 conn_anon_port : 1,             /* user bound anonymously */
 378 
 379                 conn_mac_mode : 2,              /* normal/loose/implicit MAC */
 380                 conn_anon_priv_bind : 1,        /* *_ANON_PRIV_BIND state */
 381                 conn_zone_is_global : 1,        /* GLOBAL_ZONEID */
 382                 conn_isvrrp : 1,                /* VRRP control socket */
 383                 conn_spare : 23;
 384 
 385         boolean_t       conn_flow_cntrld;
 386         netstack_t      *conn_netstack; /* Corresponds to a netstack_hold */
 387 
 388         /*
 389          * IP format that packets received for this struct should use.
 390          * Value can be IP4_VERSION or IPV6_VERSION.
 391          * The sending version is encoded using IXAF_IS_IPV4.
 392          */
 393         ushort_t        conn_ipversion;
 394 
 395         /* Written to only once at the time of opening the endpoint */
 396         sa_family_t     conn_family;            /* Family from socket() call */
 397         uint_t          conn_so_type;           /* Type from socket() call */
 398 
 399         uint_t          conn_sndbuf;            /* SO_SNDBUF state */
 400         uint_t          conn_rcvbuf;            /* SO_RCVBUF state */
 401         uint_t          conn_wroff;             /* Current write offset */
 402 
 403         uint_t          conn_sndlowat;          /* Send buffer low water mark */
 404         uint_t          conn_rcvlowat;          /* Recv buffer low water mark */
 405 
 406         uint8_t         conn_default_ttl;       /* Default TTL/hoplimit */
 407 
 408         uint32_t        conn_flowinfo;  /* Connected flow id and tclass */
 409 
 410         /*
 411          * The most recent address for sendto. Initially set to zero
 412          * which is always different than then the destination address
 413          * since the send interprets zero as the loopback address.
 414          */
 415         in6_addr_t      conn_v6lastdst;
 416 #define conn_v4lastdst  V4_PART_OF_V6(conn_v6lastdst)
 417         ushort_t        conn_lastipversion;
 418         in_port_t       conn_lastdstport;
 419         uint32_t        conn_lastflowinfo;      /* IPv6-only */
 420         uint_t          conn_lastscopeid;       /* IPv6-only */
 421         uint_t          conn_lastsrcid;         /* Only for AF_INET6 */
 422         /*
 423          * When we are not connected conn_saddr might be unspecified.
 424          * We track the source that was used with conn_v6lastdst here.
 425          */
 426         in6_addr_t      conn_v6lastsrc;
 427 #define conn_v4lastsrc  V4_PART_OF_V6(conn_v6lastsrc)
 428 
 429         /* Templates for transmitting packets */
 430         ip_pkt_t        conn_xmit_ipp;          /* Options if no ancil data */
 431 
 432         /*
 433          * Header template - conn_ht_ulp is a pointer into conn_ht_iphc.
 434          * Note that ixa_ip_hdr_length indicates the offset of ht_ulp in
 435          * ht_iphc
 436          *
 437          * The header template is maintained for connected endpoints (and
 438          * updated when sticky options are changed) and also for the lastdst.
 439          * There is no conflict between those usages since SOCK_DGRAM and
 440          * SOCK_RAW can not be used to specify a destination address (with
 441          * sendto/sendmsg) if the socket has been connected.
 442          */
 443         uint8_t         *conn_ht_iphc;          /* Start of IP header */
 444         uint_t          conn_ht_iphc_allocated; /* Allocated buffer size */
 445         uint_t          conn_ht_iphc_len;       /* IP+ULP size */
 446         uint8_t         *conn_ht_ulp;           /* Upper-layer header */
 447         uint_t          conn_ht_ulp_len;        /* ULP header len */
 448 
 449         /* Checksum to compensate for source routed packets. Host byte order */
 450         uint32_t        conn_sum;
 451 
 452         uint32_t        conn_ioctlref;          /* ioctl ref count */
 453 #ifdef CONN_DEBUG
 454 #define CONN_TRACE_MAX  10
 455         int             conn_trace_last;        /* ndx of last used tracebuf */
 456         conn_trace_t    conn_trace_buf[CONN_TRACE_MAX];
 457 #endif
 458 };
 459 
 460 /*
 461  * connf_t - connection fanout data.
 462  *
 463  * The hash tables and their linkage (conn_t.{hashnextp, hashprevp} are
 464  * protected by the per-bucket lock. Each conn_t inserted in the list
 465  * points back at the connf_t that heads the bucket.
 466  */
 467 struct connf_s {
 468         struct conn_s   *connf_head;
 469         kmutex_t        connf_lock;
 470 };
 471 
 472 #define CONN_INC_REF(connp)     {                               \
 473         mutex_enter(&(connp)->conn_lock);                        \
 474         DTRACE_PROBE1(conn__inc__ref, conn_t *, connp);         \
 475         ASSERT(conn_trace_ref(connp));                          \
 476         (connp)->conn_ref++;                                 \
 477         ASSERT((connp)->conn_ref != 0);                              \
 478         mutex_exit(&(connp)->conn_lock);                 \
 479 }
 480 
 481 #define CONN_INC_REF_LOCKED(connp)      {                       \
 482         DTRACE_PROBE1(conn__inc__ref, conn_t *, connp);         \
 483         ASSERT(MUTEX_HELD(&(connp)->conn_lock));         \
 484         ASSERT(conn_trace_ref(connp));                          \
 485         (connp)->conn_ref++;                                 \
 486         ASSERT((connp)->conn_ref != 0);                              \
 487 }
 488 
 489 #define CONN_DEC_REF(connp)     {                                       \
 490         mutex_enter(&(connp)->conn_lock);                                \
 491         DTRACE_PROBE1(conn__dec__ref, conn_t *, connp);                 \
 492         /*                                                              \
 493          * The squeue framework always does a CONN_DEC_REF after return \
 494          * from TCP. Hence the refcnt must be at least 2 if conn_on_sqp \
 495          * is B_TRUE and conn_ref is being decremented. This is to      \
 496          * account for the mblk being currently processed.              \
 497          */                                                             \
 498         if ((connp)->conn_ref == 0 ||                                        \
 499             ((connp)->conn_ref == 1 && (connp)->conn_on_sqp))             \
 500                 cmn_err(CE_PANIC, "CONN_DEC_REF: connp(%p) has ref "    \
 501                         "= %d\n", (void *)(connp), (connp)->conn_ref);       \
 502         ASSERT(conn_untrace_ref(connp));                                \
 503         (connp)->conn_ref--;                                         \
 504         if ((connp)->conn_ref == 0) {                                        \
 505                 /* Refcnt can't increase again, safe to drop lock */    \
 506                 mutex_exit(&(connp)->conn_lock);                 \
 507                 ipcl_conn_destroy(connp);                               \
 508         } else {                                                        \
 509                 cv_broadcast(&(connp)->conn_cv);                 \
 510                 mutex_exit(&(connp)->conn_lock);                 \
 511         }                                                               \
 512 }
 513 
 514 /*
 515  * For use with subsystems within ip which use ALL_ZONES as a wildcard
 516  */
 517 #define IPCL_ZONEID(connp)                                              \
 518         ((connp)->conn_allzones ? ALL_ZONES : (connp)->conn_zoneid)
 519 
 520 /*
 521  * For matching between a conn_t and a zoneid.
 522  */
 523 #define IPCL_ZONE_MATCH(connp, zoneid)                                  \
 524         (((connp)->conn_allzones) ||                                 \
 525             ((zoneid) == ALL_ZONES) ||                                  \
 526             (connp)->conn_zoneid == (zoneid))
 527 
 528 /*
 529  * On a labeled system, we must treat bindings to ports
 530  * on shared IP addresses by sockets with MAC exemption
 531  * privilege as being in all zones, as there's
 532  * otherwise no way to identify the right receiver.
 533  */
 534 
 535 #define IPCL_CONNS_MAC(conn1, conn2)                                    \
 536         (((conn1)->conn_mac_mode != CONN_MAC_DEFAULT) ||             \
 537         ((conn2)->conn_mac_mode != CONN_MAC_DEFAULT))
 538 
 539 #define IPCL_BIND_ZONE_MATCH(conn1, conn2)                              \
 540         (IPCL_CONNS_MAC(conn1, conn2) ||                                \
 541         IPCL_ZONE_MATCH(conn1, conn2->conn_zoneid) ||                        \
 542         IPCL_ZONE_MATCH(conn2, conn1->conn_zoneid))
 543 
 544 
 545 #define _IPCL_V4_MATCH(v6addr, v4addr)  \
 546         (V4_PART_OF_V6((v6addr)) == (v4addr) && IN6_IS_ADDR_V4MAPPED(&(v6addr)))
 547 
 548 #define _IPCL_V4_MATCH_ANY(addr)        \
 549         (IN6_IS_ADDR_V4MAPPED_ANY(&(addr)) || IN6_IS_ADDR_UNSPECIFIED(&(addr)))
 550 
 551 
 552 /*
 553  * IPCL_PROTO_MATCH() and IPCL_PROTO_MATCH_V6() only matches conns with
 554  * the specified ira_zoneid or conn_allzones by calling conn_wantpacket.
 555  */
 556 #define IPCL_PROTO_MATCH(connp, ira, ipha)                              \
 557         ((((connp)->conn_laddr_v4 == INADDR_ANY) ||                  \
 558         (((connp)->conn_laddr_v4 == ((ipha)->ipha_dst)) &&                \
 559             (((connp)->conn_faddr_v4 == INADDR_ANY) ||                       \
 560         ((connp)->conn_faddr_v4 == ((ipha)->ipha_src))))) &&              \
 561         conn_wantpacket((connp), (ira), (ipha)))
 562 
 563 #define IPCL_PROTO_MATCH_V6(connp, ira, ip6h)                           \
 564         ((IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_laddr_v6) ||            \
 565         (IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, &((ip6h)->ip6_dst)) &&   \
 566         (IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_faddr_v6) ||                   \
 567         IN6_ARE_ADDR_EQUAL(&(connp)->conn_faddr_v6, &((ip6h)->ip6_src))))) && \
 568         (conn_wantpacket_v6((connp), (ira), (ip6h))))
 569 
 570 #define IPCL_CONN_HASH(src, ports, ipst)                                \
 571         ((unsigned)(ntohl((src)) ^ ((ports) >> 24) ^ ((ports) >> 16) ^      \
 572         ((ports) >> 8) ^ (ports)) % (ipst)->ips_ipcl_conn_fanout_size)
 573 
 574 #define IPCL_CONN_HASH_V6(src, ports, ipst)                             \
 575         IPCL_CONN_HASH(V4_PART_OF_V6((src)), (ports), (ipst))
 576 
 577 #define IPCL_CONN_MATCH(connp, proto, src, dst, ports)                  \
 578         ((connp)->conn_proto == (proto) &&                           \
 579                 (connp)->conn_ports == (ports) &&                            \
 580                 _IPCL_V4_MATCH((connp)->conn_faddr_v6, (src)) &&     \
 581                 _IPCL_V4_MATCH((connp)->conn_laddr_v6, (dst)) &&     \
 582                 !(connp)->conn_ipv6_v6only)
 583 
 584 #define IPCL_CONN_MATCH_V6(connp, proto, src, dst, ports)               \
 585         ((connp)->conn_proto == (proto) &&                           \
 586                 (connp)->conn_ports == (ports) &&                            \
 587                 IN6_ARE_ADDR_EQUAL(&(connp)->conn_faddr_v6, &(src)) &&       \
 588                 IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, &(dst)))
 589 
 590 #define IPCL_PORT_HASH(port, size) \
 591         ((((port) >> 8) ^ (port)) & ((size) - 1))
 592 
 593 #define IPCL_BIND_HASH(lport, ipst)                                     \
 594         ((unsigned)(((lport) >> 8) ^ (lport)) % \
 595             (ipst)->ips_ipcl_bind_fanout_size)
 596 
 597 #define IPCL_BIND_MATCH(connp, proto, laddr, lport)                     \
 598         ((connp)->conn_proto == (proto) &&                           \
 599                 (connp)->conn_lport == (lport) &&                    \
 600                 (_IPCL_V4_MATCH_ANY((connp)->conn_laddr_v6) ||               \
 601                 _IPCL_V4_MATCH((connp)->conn_laddr_v6, (laddr))) &&  \
 602                 !(connp)->conn_ipv6_v6only)
 603 
 604 #define IPCL_BIND_MATCH_V6(connp, proto, laddr, lport)                  \
 605         ((connp)->conn_proto == (proto) &&                           \
 606                 (connp)->conn_lport == (lport) &&                    \
 607                 (IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, &(laddr)) || \
 608                 IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_laddr_v6)))
 609 
 610 /*
 611  * We compare conn_laddr since it captures both connected and a bind to
 612  * a multicast or broadcast address.
 613  * The caller needs to match the zoneid and also call conn_wantpacket
 614  * for multicast, broadcast, or when conn_incoming_ifindex is set.
 615  */
 616 #define IPCL_UDP_MATCH(connp, lport, laddr, fport, faddr)               \
 617         (((connp)->conn_lport == (lport)) &&                         \
 618         ((_IPCL_V4_MATCH_ANY((connp)->conn_laddr_v6) ||                      \
 619         (_IPCL_V4_MATCH((connp)->conn_laddr_v6, (laddr)) &&          \
 620         (_IPCL_V4_MATCH_ANY((connp)->conn_faddr_v6) ||                       \
 621         (_IPCL_V4_MATCH((connp)->conn_faddr_v6, (faddr)) &&          \
 622         (connp)->conn_fport == (fport)))))) &&                               \
 623         !(connp)->conn_ipv6_v6only)
 624 
 625 /*
 626  * We compare conn_laddr since it captures both connected and a bind to
 627  * a multicast or broadcast address.
 628  * The caller needs to match the zoneid and also call conn_wantpacket_v6
 629  * for multicast or when conn_incoming_ifindex is set.
 630  */
 631 #define IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)    \
 632         (((connp)->conn_lport == (lport)) &&                 \
 633         (IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_laddr_v6) ||     \
 634         (IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, &(laddr)) &&    \
 635         (IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_faddr_v6) ||     \
 636         (IN6_ARE_ADDR_EQUAL(&(connp)->conn_faddr_v6, &(faddr)) &&    \
 637         (connp)->conn_fport == (fport))))))
 638 
 639 #define IPCL_IPTUN_HASH(laddr, faddr)                                   \
 640         ((ntohl(laddr) ^ ((ntohl(faddr) << 24) | (ntohl(faddr) >> 8))) % \
 641         ipcl_iptun_fanout_size)
 642 
 643 #define IPCL_IPTUN_HASH_V6(laddr, faddr)                                \
 644         IPCL_IPTUN_HASH((laddr)->s6_addr32[0] ^ (laddr)->s6_addr32[1] ^   \
 645             (faddr)->s6_addr32[2] ^ (faddr)->s6_addr32[3],                \
 646             (faddr)->s6_addr32[0] ^ (faddr)->s6_addr32[1] ^               \
 647             (laddr)->s6_addr32[2] ^ (laddr)->s6_addr32[3])
 648 
 649 #define IPCL_IPTUN_MATCH(connp, laddr, faddr)                   \
 650         (_IPCL_V4_MATCH((connp)->conn_laddr_v6, (laddr)) &&  \
 651         _IPCL_V4_MATCH((connp)->conn_faddr_v6, (faddr)))
 652 
 653 #define IPCL_IPTUN_MATCH_V6(connp, laddr, faddr)                \
 654         (IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, (laddr)) && \
 655         IN6_ARE_ADDR_EQUAL(&(connp)->conn_faddr_v6, (faddr)))
 656 
 657 #define IPCL_UDP_HASH(lport, ipst)      \
 658         IPCL_PORT_HASH(lport, (ipst)->ips_ipcl_udp_fanout_size)
 659 
 660 #define IPCL_DCCP_CONN_HASH(src, ports, ipst)                           \
 661         ((unsigned)(ntohl((src)) ^ ((ports) >> 24) ^ ((ports) >> 16) ^      \
 662         ((ports) >> 8) ^ (ports)) % (ipst)->ips_ipcl_dccp_conn_fanout_size)
 663 
 664 #define IPCL_DCCP_CONN_HASH_V6(src, ports, ipst)                        \
 665         IPCL_DCCP_CONN_HASH(V4_PART_OF_V6((src)), (ports), (ipst))
 666 
 667 #define IPCL_DCCP_BIND_HASH(lport, ipst)                                \
 668         ((unsigned)(((lport) >> 8) ^ (lport)) %                           \
 669             (ipst)->ips_ipcl_dccp_bind_fanout_size)
 670 
 671 
 672 #define CONN_G_HASH_SIZE        1024
 673 
 674 /* Raw socket hash function. */
 675 #define IPCL_RAW_HASH(lport, ipst)      \
 676         IPCL_PORT_HASH(lport, (ipst)->ips_ipcl_raw_fanout_size)
 677 
 678 /*
 679  * This is similar to IPCL_BIND_MATCH except that the local port check
 680  * is changed to a wildcard port check.
 681  * We compare conn_laddr since it captures both connected and a bind to
 682  * a multicast or broadcast address.
 683  */
 684 #define IPCL_RAW_MATCH(connp, proto, laddr)                     \
 685         ((connp)->conn_proto == (proto) &&                   \
 686         (connp)->conn_lport == 0 &&                          \
 687         (_IPCL_V4_MATCH_ANY((connp)->conn_laddr_v6) ||               \
 688         _IPCL_V4_MATCH((connp)->conn_laddr_v6, (laddr))))
 689 
 690 #define IPCL_RAW_MATCH_V6(connp, proto, laddr)                  \
 691         ((connp)->conn_proto == (proto) &&                   \
 692         (connp)->conn_lport == 0 &&                          \
 693         (IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_laddr_v6) ||     \
 694         IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, &(laddr))))
 695 
 696 /* Function prototypes */
 697 extern void ipcl_g_init(void);
 698 extern void ipcl_init(ip_stack_t *);
 699 extern void ipcl_g_destroy(void);
 700 extern void ipcl_destroy(ip_stack_t *);
 701 extern conn_t *ipcl_conn_create(uint32_t, int, netstack_t *);
 702 extern void ipcl_conn_destroy(conn_t *);
 703 
 704 void ipcl_hash_insert_wildcard(connf_t *, conn_t *);
 705 void ipcl_hash_remove(conn_t *);
 706 void ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp);
 707 
 708 extern int      ipcl_bind_insert(conn_t *);
 709 extern int      ipcl_bind_insert_v4(conn_t *);
 710 extern int      ipcl_bind_insert_v6(conn_t *);
 711 extern int      ipcl_conn_insert(conn_t *);
 712 extern int      ipcl_conn_insert_v4(conn_t *);
 713 extern int      ipcl_conn_insert_v6(conn_t *);
 714 extern conn_t   *ipcl_get_next_conn(connf_t *, conn_t *, uint32_t);
 715 
 716 conn_t *ipcl_classify_v4(mblk_t *, uint8_t, uint_t, ip_recv_attr_t *,
 717             ip_stack_t *);
 718 conn_t *ipcl_classify_v6(mblk_t *, uint8_t, uint_t, ip_recv_attr_t *,
 719             ip_stack_t *);
 720 conn_t *ipcl_classify(mblk_t *, ip_recv_attr_t *, ip_stack_t *);
 721 conn_t *ipcl_classify_raw(mblk_t *, uint8_t, uint32_t, ipha_t *,
 722     ip6_t *, ip_recv_attr_t *, ip_stack_t *);
 723 conn_t *ipcl_iptun_classify_v4(ipaddr_t *, ipaddr_t *, ip_stack_t *);
 724 conn_t *ipcl_iptun_classify_v6(in6_addr_t *, in6_addr_t *, ip_stack_t *);
 725 void    ipcl_globalhash_insert(conn_t *);
 726 void    ipcl_globalhash_remove(conn_t *);
 727 void    ipcl_walk(pfv_t, void *, ip_stack_t *);
 728 conn_t  *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack_t *);
 729 conn_t  *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
 730             ip_stack_t *);
 731 /*
 732 conn_t  *ipcl_dccp_lookup_reversed_ipv4(ipha_t *, dccpha_t *, int, ip_stack_t *);
 733 conn_t  *ipcl_dccp_lookup_reversed_ipv6(ip6_t *, dccpha_t *, int, uint_t,
 734             ip_stack_t *);
 735 */
 736 conn_t  *ipcl_lookup_listener_v4(uint16_t, ipaddr_t, zoneid_t, ip_stack_t *);
 737 conn_t  *ipcl_lookup_listener_v6(uint16_t, in6_addr_t *, uint_t, zoneid_t,
 738             ip_stack_t *);
 739 int     conn_trace_ref(conn_t *);
 740 int     conn_untrace_ref(conn_t *);
 741 void    ipcl_conn_cleanup(conn_t *);
 742 extern uint_t   conn_recvancillary_size(conn_t *, crb_t, ip_recv_attr_t *,
 743     mblk_t *, ip_pkt_t *);
 744 extern void     conn_recvancillary_add(conn_t *, crb_t, ip_recv_attr_t *,
 745     ip_pkt_t *, uchar_t *, uint_t);
 746 conn_t *ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *, ipha_t *, tcpha_t *,
 747             ip_stack_t *);
 748 conn_t *ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *, ip6_t *, tcpha_t *,
 749             ip_stack_t *);
 750 
 751 extern int ip_create_helper_stream(conn_t *, ldi_ident_t);
 752 extern void ip_free_helper_stream(conn_t *);
 753 extern int      ip_helper_stream_setup(queue_t *, dev_t *, int, int,
 754     cred_t *, boolean_t);
 755 
 756 #ifdef  __cplusplus
 757 }
 758 #endif
 759 
 760 #endif  /* _INET_IPCLASSIFIER_H */