Print this page
    
%B
    
      
        | Split | Close | 
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/uts/common/inet/ip/ipclassifier.c
          +++ new/usr/src/uts/common/inet/ip/ipclassifier.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   */
  24   24  
  25   25  /*
  26   26   * IP PACKET CLASSIFIER
  27   27   *
  28   28   * The IP packet classifier provides mapping between IP packets and persistent
  29   29   * connection state for connection-oriented protocols. It also provides
  30   30   * interface for managing connection states.
  31   31   *
  32   32   * The connection state is kept in conn_t data structure and contains, among
  33   33   * other things:
  34   34   *
  35   35   *      o local/remote address and ports
  36   36   *      o Transport protocol
  37   37   *      o squeue for the connection (for TCP only)
  38   38   *      o reference counter
  39   39   *      o Connection state
  40   40   *      o hash table linkage
  41   41   *      o interface/ire information
  42   42   *      o credentials
  43   43   *      o ipsec policy
  44   44   *      o send and receive functions.
  45   45   *      o mutex lock.
  46   46   *
  47   47   * Connections use a reference counting scheme. They are freed when the
  48   48   * reference counter drops to zero. A reference is incremented when connection
  49   49   * is placed in a list or table, when incoming packet for the connection arrives
  50   50   * and when connection is processed via squeue (squeue processing may be
  51   51   * asynchronous and the reference protects the connection from being destroyed
  52   52   * before its processing is finished).
  53   53   *
  54   54   * conn_recv is used to pass up packets to the ULP.
  55   55   * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
  56   56   * a listener, and changes to tcp_input_listener as the listener has picked a
  57   57   * good squeue. For other cases it is set to tcp_input_data.
  58   58   *
  59   59   * conn_recvicmp is used to pass up ICMP errors to the ULP.
  60   60   *
  61   61   * Classifier uses several hash tables:
  62   62   *
  63   63   *      ipcl_conn_fanout:       contains all TCP connections in CONNECTED state
  64   64   *      ipcl_bind_fanout:       contains all connections in BOUND state
  65   65   *      ipcl_proto_fanout:      IPv4 protocol fanout
  66   66   *      ipcl_proto_fanout_v6:   IPv6 protocol fanout
  67   67   *      ipcl_udp_fanout:        contains all UDP connections
  68   68   *      ipcl_iptun_fanout:      contains all IP tunnel connections
  69   69   *      ipcl_globalhash_fanout: contains all connections
  70   70   *
  71   71   * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
  72   72   * which need to view all existing connections.
  73   73   *
  74   74   * All tables are protected by per-bucket locks. When both per-bucket lock and
  75   75   * connection lock need to be held, the per-bucket lock should be acquired
  76   76   * first, followed by the connection lock.
  77   77   *
  78   78   * All functions doing search in one of these tables increment a reference
  79   79   * counter on the connection found (if any). This reference should be dropped
  80   80   * when the caller has finished processing the connection.
  81   81   *
  82   82   *
  83   83   * INTERFACES:
  84   84   * ===========
  85   85   *
  86   86   * Connection Lookup:
  87   87   * ------------------
  88   88   *
  89   89   * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
  90   90   * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
  91   91   *
  92   92   * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
  93   93   * it can't find any associated connection. If the connection is found, its
  94   94   * reference counter is incremented.
  95   95   *
  96   96   *      mp:     mblock, containing packet header. The full header should fit
  97   97   *              into a single mblock. It should also contain at least full IP
  98   98   *              and TCP or UDP header.
  99   99   *
 100  100   *      protocol: Either IPPROTO_TCP or IPPROTO_UDP.
 101  101   *
 102  102   *      hdr_len: The size of IP header. It is used to find TCP or UDP header in
 103  103   *               the packet.
 104  104   *
 105  105   *      ira->ira_zoneid: The zone in which the returned connection must be; the
 106  106   *              zoneid corresponding to the ire_zoneid on the IRE located for
 107  107   *              the packet's destination address.
 108  108   *
 109  109   *      ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
 110  110   *              IRAF_TX_SHARED_ADDR flags
 111  111   *
 112  112   *      For TCP connections, the lookup order is as follows:
 113  113   *              5-tuple {src, dst, protocol, local port, remote port}
 114  114   *                      lookup in ipcl_conn_fanout table.
 115  115   *              3-tuple {dst, remote port, protocol} lookup in
 116  116   *                      ipcl_bind_fanout table.
 117  117   *
 118  118   *      For UDP connections, a 5-tuple {src, dst, protocol, local port,
 119  119   *      remote port} lookup is done on ipcl_udp_fanout. Note that,
 120  120   *      these interfaces do not handle cases where a packets belongs
 121  121   *      to multiple UDP clients, which is handled in IP itself.
 122  122   *
 123  123   * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
 124  124   * determine which actual zone gets the segment.  This is used only in a
 125  125   * labeled environment.  The matching rules are:
 126  126   *
 127  127   *      - If it's not a multilevel port, then the label on the packet selects
 128  128   *        the zone.  Unlabeled packets are delivered to the global zone.
 129  129   *
 130  130   *      - If it's a multilevel port, then only the zone registered to receive
 131  131   *        packets on that port matches.
 132  132   *
 133  133   * Also, in a labeled environment, packet labels need to be checked.  For fully
 134  134   * bound TCP connections, we can assume that the packet label was checked
 135  135   * during connection establishment, and doesn't need to be checked on each
 136  136   * packet.  For others, though, we need to check for strict equality or, for
 137  137   * multilevel ports, membership in the range or set.  This part currently does
 138  138   * a tnrh lookup on each packet, but could be optimized to use cached results
 139  139   * if that were necessary.  (SCTP doesn't come through here, but if it did,
 140  140   * we would apply the same rules as TCP.)
 141  141   *
 142  142   * An implication of the above is that fully-bound TCP sockets must always use
 143  143   * distinct 4-tuples; they can't be discriminated by label alone.
 144  144   *
 145  145   * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
 146  146   * as there's no connection set-up handshake and no shared state.
 147  147   *
 148  148   * Labels on looped-back packets within a single zone do not need to be
 149  149   * checked, as all processes in the same zone have the same label.
 150  150   *
 151  151   * Finally, for unlabeled packets received by a labeled system, special rules
 152  152   * apply.  We consider only the MLP if there is one.  Otherwise, we prefer a
 153  153   * socket in the zone whose label matches the default label of the sender, if
 154  154   * any.  In any event, the receiving socket must have SO_MAC_EXEMPT set and the
 155  155   * receiver's label must dominate the sender's default label.
 156  156   *
 157  157   * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
 158  158   * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
 159  159   *                                       ip_stack);
 160  160   *
 161  161   *      Lookup routine to find a exact match for {src, dst, local port,
 162  162   *      remote port) for TCP connections in ipcl_conn_fanout. The address and
 163  163   *      ports are read from the IP and TCP header respectively.
 164  164   *
 165  165   * conn_t       *ipcl_lookup_listener_v4(lport, laddr, protocol,
 166  166   *                                       zoneid, ip_stack);
 167  167   * conn_t       *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
 168  168   *                                       zoneid, ip_stack);
 169  169   *
 170  170   *      Lookup routine to find a listener with the tuple {lport, laddr,
 171  171   *      protocol} in the ipcl_bind_fanout table. For IPv6, an additional
 172  172   *      parameter interface index is also compared.
 173  173   *
 174  174   * void ipcl_walk(func, arg, ip_stack)
 175  175   *
 176  176   *      Apply 'func' to every connection available. The 'func' is called as
 177  177   *      (*func)(connp, arg). The walk is non-atomic so connections may be
 178  178   *      created and destroyed during the walk. The CONN_CONDEMNED and
 179  179   *      CONN_INCIPIENT flags ensure that connections which are newly created
 180  180   *      or being destroyed are not selected by the walker.
 181  181   *
 182  182   * Table Updates
 183  183   * -------------
 184  184   *
 185  185   * int ipcl_conn_insert(connp);
 186  186   * int ipcl_conn_insert_v4(connp);
 187  187   * int ipcl_conn_insert_v6(connp);
 188  188   *
 189  189   *      Insert 'connp' in the ipcl_conn_fanout.
 190  190   *      Arguements :
 191  191   *              connp           conn_t to be inserted
 192  192   *
 193  193   *      Return value :
 194  194   *              0               if connp was inserted
 195  195   *              EADDRINUSE      if the connection with the same tuple
 196  196   *                              already exists.
 197  197   *
 198  198   * int ipcl_bind_insert(connp);
 199  199   * int ipcl_bind_insert_v4(connp);
 200  200   * int ipcl_bind_insert_v6(connp);
 201  201   *
 202  202   *      Insert 'connp' in ipcl_bind_fanout.
 203  203   *      Arguements :
 204  204   *              connp           conn_t to be inserted
 205  205   *
 206  206   *
 207  207   * void ipcl_hash_remove(connp);
 208  208   *
 209  209   *      Removes the 'connp' from the connection fanout table.
 210  210   *
 211  211   * Connection Creation/Destruction
 212  212   * -------------------------------
 213  213   *
 214  214   * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
 215  215   *
  
    | ↓ open down ↓ | 215 lines elided | ↑ open up ↑ | 
 216  216   *      Creates a new conn based on the type flag, inserts it into
 217  217   *      globalhash table.
 218  218   *
 219  219   *      type:   This flag determines the type of conn_t which needs to be
 220  220   *              created i.e., which kmem_cache it comes from.
 221  221   *              IPCL_TCPCONN    indicates a TCP connection
 222  222   *              IPCL_SCTPCONN   indicates a SCTP connection
 223  223   *              IPCL_UDPCONN    indicates a UDP conn_t.
 224  224   *              IPCL_RAWIPCONN  indicates a RAWIP/ICMP conn_t.
 225  225   *              IPCL_RTSCONN    indicates a RTS conn_t.
      226 + *              IPCL_DCCPCONN   indicates a DCCP conn_t.
 226  227   *              IPCL_IPCCONN    indicates all other connections.
 227  228   *
 228  229   * void ipcl_conn_destroy(connp)
 229  230   *
 230  231   *      Destroys the connection state, removes it from the global
 231  232   *      connection hash table and frees its memory.
 232  233   */
 233  234  
 234  235  #include <sys/types.h>
 235  236  #include <sys/stream.h>
 236  237  #include <sys/stropts.h>
 237  238  #include <sys/sysmacros.h>
 238  239  #include <sys/strsubr.h>
 239  240  #include <sys/strsun.h>
 240  241  #define _SUN_TPI_VERSION 2
 241  242  #include <sys/ddi.h>
 242  243  #include <sys/cmn_err.h>
 243  244  #include <sys/debug.h>
 244  245  
 245  246  #include <sys/systm.h>
 246  247  #include <sys/param.h>
 247  248  #include <sys/kmem.h>
 248  249  #include <sys/isa_defs.h>
 249  250  #include <inet/common.h>
  
    | ↓ open down ↓ | 14 lines elided | ↑ open up ↑ | 
 250  251  #include <netinet/ip6.h>
 251  252  #include <netinet/icmp6.h>
 252  253  
 253  254  #include <inet/ip.h>
 254  255  #include <inet/ip_if.h>
 255  256  #include <inet/ip_ire.h>
 256  257  #include <inet/ip6.h>
 257  258  #include <inet/ip_ndp.h>
 258  259  #include <inet/ip_impl.h>
 259  260  #include <inet/udp_impl.h>
      261 +#include <inet/dccp/dccp_impl.h>
 260  262  #include <inet/sctp_ip.h>
 261  263  #include <inet/sctp/sctp_impl.h>
 262  264  #include <inet/rawip_impl.h>
 263  265  #include <inet/rts_impl.h>
 264  266  #include <inet/iptun/iptun_impl.h>
 265  267  
 266  268  #include <sys/cpuvar.h>
 267  269  
 268  270  #include <inet/ipclassifier.h>
 269  271  #include <inet/tcp.h>
 270  272  #include <inet/ipsec_impl.h>
 271  273  
 272  274  #include <sys/tsol/tnet.h>
  
    | ↓ open down ↓ | 3 lines elided | ↑ open up ↑ | 
 273  275  #include <sys/sockio.h>
 274  276  
 275  277  /* Old value for compatibility. Setable in /etc/system */
 276  278  uint_t tcp_conn_hash_size = 0;
 277  279  
 278  280  /* New value. Zero means choose automatically.  Setable in /etc/system */
 279  281  uint_t ipcl_conn_hash_size = 0;
 280  282  uint_t ipcl_conn_hash_memfactor = 8192;
 281  283  uint_t ipcl_conn_hash_maxsize = 82500;
 282  284  
 283      -/* bind/udp fanout table size */
      285 +/* bind/dccp/udp fanout table size */
 284  286  uint_t ipcl_bind_fanout_size = 512;
      287 +uint_t ipcl_dccp_fanout_size = 512;
 285  288  uint_t ipcl_udp_fanout_size = 16384;
 286  289  
 287  290  /* Raw socket fanout size.  Must be a power of 2. */
 288  291  uint_t ipcl_raw_fanout_size = 256;
 289  292  
 290  293  /*
 291  294   * The IPCL_IPTUN_HASH() function works best with a prime table size.  We
 292  295   * expect that most large deployments would have hundreds of tunnels, and
 293  296   * thousands in the extreme case.
 294  297   */
 295  298  uint_t ipcl_iptun_fanout_size = 6143;
 296  299  
 297  300  /*
 298  301   * Power of 2^N Primes useful for hashing for N of 0-28,
 299  302   * these primes are the nearest prime <= 2^N - 2^(N-2).
 300  303   */
 301  304  
 302  305  #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,  \
 303  306                  6143, 12281, 24571, 49139, 98299, 196597, 393209,       \
 304  307                  786431, 1572853, 3145721, 6291449, 12582893, 25165813,  \
 305  308                  50331599, 100663291, 201326557, 0}
 306  309  
 307  310  /*
 308  311   * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
 309  312   * are aligned on cache lines.
 310  313   */
 311  314  typedef union itc_s {
  
    | ↓ open down ↓ | 17 lines elided | ↑ open up ↑ | 
 312  315          conn_t  itc_conn;
 313  316          char    itcu_filler[CACHE_ALIGN(conn_s)];
 314  317  } itc_t;
 315  318  
 316  319  struct kmem_cache  *tcp_conn_cache;
 317  320  struct kmem_cache  *ip_conn_cache;
 318  321  extern struct kmem_cache  *sctp_conn_cache;
 319  322  struct kmem_cache  *udp_conn_cache;
 320  323  struct kmem_cache  *rawip_conn_cache;
 321  324  struct kmem_cache  *rts_conn_cache;
      325 +struct kmem_cache  *dccp_conn_cache;
 322  326  
 323  327  extern void     tcp_timermp_free(tcp_t *);
 324  328  extern mblk_t   *tcp_timermp_alloc(int);
 325  329  
 326  330  static int      ip_conn_constructor(void *, void *, int);
 327  331  static void     ip_conn_destructor(void *, void *);
 328  332  
 329  333  static int      tcp_conn_constructor(void *, void *, int);
 330  334  static void     tcp_conn_destructor(void *, void *);
 331  335  
 332  336  static int      udp_conn_constructor(void *, void *, int);
 333  337  static void     udp_conn_destructor(void *, void *);
 334  338  
 335  339  static int      rawip_conn_constructor(void *, void *, int);
 336  340  static void     rawip_conn_destructor(void *, void *);
 337  341  
 338  342  static int      rts_conn_constructor(void *, void *, int);
 339  343  static void     rts_conn_destructor(void *, void *);
 340  344  
      345 +static int      dccp_conn_constructor(void *, void *, int);
      346 +static void     dccp_conn_destructor(void *, void *);
      347 +
 341  348  /*
 342  349   * Global (for all stack instances) init routine
 343  350   */
 344  351  void
 345  352  ipcl_g_init(void)
 346  353  {
 347  354          ip_conn_cache = kmem_cache_create("ip_conn_cache",
 348  355              sizeof (conn_t), CACHE_ALIGN_SIZE,
 349  356              ip_conn_constructor, ip_conn_destructor,
 350  357              NULL, NULL, NULL, 0);
 351  358  
 352  359          tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
 353  360              sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
 354  361              tcp_conn_constructor, tcp_conn_destructor,
 355  362              tcp_conn_reclaim, NULL, NULL, 0);
 356  363  
 357  364          udp_conn_cache = kmem_cache_create("udp_conn_cache",
 358  365              sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
 359  366              udp_conn_constructor, udp_conn_destructor,
 360  367              NULL, NULL, NULL, 0);
  
    | ↓ open down ↓ | 10 lines elided | ↑ open up ↑ | 
 361  368  
 362  369          rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
 363  370              sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
 364  371              rawip_conn_constructor, rawip_conn_destructor,
 365  372              NULL, NULL, NULL, 0);
 366  373  
 367  374          rts_conn_cache = kmem_cache_create("rts_conn_cache",
 368  375              sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
 369  376              rts_conn_constructor, rts_conn_destructor,
 370  377              NULL, NULL, NULL, 0);
      378 +
      379 +        /* XXX:DCCP reclaim */
      380 +        dccp_conn_cache = kmem_cache_create("dccp_conn_cache",
      381 +            sizeof (itc_t) + sizeof (dccp_t), CACHE_ALIGN_SIZE,
      382 +            dccp_conn_constructor, dccp_conn_destructor,
      383 +            NULL, NULL, NULL, 0);
 371  384  }
 372  385  
 373  386  /*
 374  387   * ipclassifier intialization routine, sets up hash tables.
 375  388   */
 376  389  void
 377  390  ipcl_init(ip_stack_t *ipst)
 378  391  {
 379  392          int i;
 380  393          int sizes[] = P2Ps();
 381  394  
 382  395          /*
 383  396           * Calculate size of conn fanout table from /etc/system settings
 384  397           */
 385  398          if (ipcl_conn_hash_size != 0) {
 386  399                  ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
 387  400          } else if (tcp_conn_hash_size != 0) {
 388  401                  ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
 389  402          } else {
 390  403                  extern pgcnt_t freemem;
 391  404  
 392  405                  ipst->ips_ipcl_conn_fanout_size =
 393  406                      (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
 394  407  
 395  408                  if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
 396  409                          ipst->ips_ipcl_conn_fanout_size =
 397  410                              ipcl_conn_hash_maxsize;
 398  411                  }
 399  412          }
 400  413  
 401  414          for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
 402  415                  if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
  
    | ↓ open down ↓ | 22 lines elided | ↑ open up ↑ | 
 403  416                          break;
 404  417                  }
 405  418          }
 406  419          if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
 407  420                  /* Out of range, use the 2^16 value */
 408  421                  ipst->ips_ipcl_conn_fanout_size = sizes[16];
 409  422          }
 410  423  
 411  424          /* Take values from /etc/system */
 412  425          ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
      426 +        ipst->ips_ipcl_dccp_fanout_size = ipcl_dccp_fanout_size;
 413  427          ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
 414  428          ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
 415  429          ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
 416  430  
 417  431          ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
 418  432  
 419  433          ipst->ips_ipcl_conn_fanout = kmem_zalloc(
 420  434              ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
 421  435  
 422  436          for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
 423  437                  mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
 424  438                      MUTEX_DEFAULT, NULL);
 425  439          }
 426  440  
 427  441          ipst->ips_ipcl_bind_fanout = kmem_zalloc(
 428  442              ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
 429  443  
 430  444          for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
 431  445                  mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
 432  446                      MUTEX_DEFAULT, NULL);
 433  447          }
 434  448  
 435  449          ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
 436  450              sizeof (connf_t), KM_SLEEP);
 437  451          for (i = 0; i < IPPROTO_MAX; i++) {
 438  452                  mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
 439  453                      MUTEX_DEFAULT, NULL);
 440  454          }
 441  455  
 442  456          ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
 443  457              sizeof (connf_t), KM_SLEEP);
 444  458          for (i = 0; i < IPPROTO_MAX; i++) {
 445  459                  mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
 446  460                      MUTEX_DEFAULT, NULL);
 447  461          }
 448  462  
 449  463          ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
 450  464          mutex_init(&ipst->ips_rts_clients->connf_lock,
 451  465              NULL, MUTEX_DEFAULT, NULL);
 452  466  
 453  467          ipst->ips_ipcl_udp_fanout = kmem_zalloc(
 454  468              ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
 455  469          for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
 456  470                  mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
 457  471                      MUTEX_DEFAULT, NULL);
 458  472          }
 459  473  
 460  474          ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
 461  475              ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
 462  476          for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
 463  477                  mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
 464  478                      MUTEX_DEFAULT, NULL);
 465  479          }
 466  480  
 467  481          ipst->ips_ipcl_raw_fanout = kmem_zalloc(
 468  482              ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
 469  483          for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
  
    | ↓ open down ↓ | 47 lines elided | ↑ open up ↑ | 
 470  484                  mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
 471  485                      MUTEX_DEFAULT, NULL);
 472  486          }
 473  487  
 474  488          ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
 475  489              sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
 476  490          for (i = 0; i < CONN_G_HASH_SIZE; i++) {
 477  491                  mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
 478  492                      NULL, MUTEX_DEFAULT, NULL);
 479  493          }
      494 +
      495 +        ipst->ips_ipcl_dccp_fanout = kmem_zalloc(
      496 +            ipst->ips_ipcl_dccp_fanout_size * sizeof (connf_t), KM_SLEEP);
      497 +        for (i = 0; i < ipst->ips_ipcl_dccp_fanout_size; i++) {
      498 +                mutex_init(&ipst->ips_ipcl_dccp_fanout[i].connf_lock, NULL,
      499 +                    MUTEX_DEFAULT, NULL);
      500 +        }
 480  501  }
 481  502  
 482  503  void
 483  504  ipcl_g_destroy(void)
 484  505  {
 485  506          kmem_cache_destroy(ip_conn_cache);
 486  507          kmem_cache_destroy(tcp_conn_cache);
 487  508          kmem_cache_destroy(udp_conn_cache);
 488  509          kmem_cache_destroy(rawip_conn_cache);
 489  510          kmem_cache_destroy(rts_conn_cache);
      511 +        kmem_cache_destroy(dccp_conn_cache);
 490  512  }
 491  513  
 492  514  /*
 493  515   * All user-level and kernel use of the stack must be gone
 494  516   * by now.
 495  517   */
 496  518  void
 497  519  ipcl_destroy(ip_stack_t *ipst)
 498  520  {
 499  521          int i;
 500  522  
 501  523          for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
 502  524                  ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
 503  525                  mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
 504  526          }
 505  527          kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
 506  528              sizeof (connf_t));
 507  529          ipst->ips_ipcl_conn_fanout = NULL;
 508  530  
 509  531          for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
 510  532                  ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
 511  533                  mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
 512  534          }
 513  535          kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
 514  536              sizeof (connf_t));
 515  537          ipst->ips_ipcl_bind_fanout = NULL;
 516  538  
 517  539          for (i = 0; i < IPPROTO_MAX; i++) {
 518  540                  ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
 519  541                  mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
 520  542          }
 521  543          kmem_free(ipst->ips_ipcl_proto_fanout_v4,
 522  544              IPPROTO_MAX * sizeof (connf_t));
 523  545          ipst->ips_ipcl_proto_fanout_v4 = NULL;
 524  546  
 525  547          for (i = 0; i < IPPROTO_MAX; i++) {
 526  548                  ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
 527  549                  mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
 528  550          }
 529  551          kmem_free(ipst->ips_ipcl_proto_fanout_v6,
 530  552              IPPROTO_MAX * sizeof (connf_t));
 531  553          ipst->ips_ipcl_proto_fanout_v6 = NULL;
 532  554  
 533  555          for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
 534  556                  ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
 535  557                  mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
 536  558          }
 537  559          kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
 538  560              sizeof (connf_t));
 539  561          ipst->ips_ipcl_udp_fanout = NULL;
 540  562  
 541  563          for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
 542  564                  ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
 543  565                  mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
 544  566          }
 545  567          kmem_free(ipst->ips_ipcl_iptun_fanout,
 546  568              ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
 547  569          ipst->ips_ipcl_iptun_fanout = NULL;
 548  570  
 549  571          for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
 550  572                  ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
 551  573                  mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
 552  574          }
 553  575          kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
 554  576              sizeof (connf_t));
  
    | ↓ open down ↓ | 55 lines elided | ↑ open up ↑ | 
 555  577          ipst->ips_ipcl_raw_fanout = NULL;
 556  578  
 557  579          for (i = 0; i < CONN_G_HASH_SIZE; i++) {
 558  580                  ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
 559  581                  mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
 560  582          }
 561  583          kmem_free(ipst->ips_ipcl_globalhash_fanout,
 562  584              sizeof (connf_t) * CONN_G_HASH_SIZE);
 563  585          ipst->ips_ipcl_globalhash_fanout = NULL;
 564  586  
      587 +        for (i = 0; i < ipst->ips_ipcl_dccp_fanout_size; i++) {
      588 +                ASSERT(ipst->ips_ipcl_dccp_fanout[i].connf_head == NULL);
      589 +                mutex_destroy(&ipst->ips_ipcl_dccp_fanout[i].connf_lock);
      590 +        }
      591 +        kmem_free(ipst->ips_ipcl_dccp_fanout, ipst->ips_ipcl_dccp_fanout_size *
      592 +            sizeof (connf_t));
      593 +        ipst->ips_ipcl_dccp_fanout = NULL;
      594 +
 565  595          ASSERT(ipst->ips_rts_clients->connf_head == NULL);
 566  596          mutex_destroy(&ipst->ips_rts_clients->connf_lock);
 567  597          kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
 568  598          ipst->ips_rts_clients = NULL;
 569  599  }
 570  600  
 571  601  /*
 572  602   * conn creation routine. initialize the conn, sets the reference
 573  603   * and inserts it in the global hash table.
 574  604   */
 575  605  conn_t *
 576  606  ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
 577  607  {
 578  608          conn_t  *connp;
 579  609          struct kmem_cache *conn_cache;
 580  610  
 581  611          switch (type) {
 582  612          case IPCL_SCTPCONN:
 583  613                  if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
 584  614                          return (NULL);
 585  615                  sctp_conn_init(connp);
 586  616                  netstack_hold(ns);
 587  617                  connp->conn_netstack = ns;
 588  618                  connp->conn_ixa->ixa_ipst = ns->netstack_ip;
 589  619                  connp->conn_ixa->ixa_conn_id = (long)connp;
 590  620                  ipcl_globalhash_insert(connp);
 591  621                  return (connp);
 592  622  
 593  623          case IPCL_TCPCONN:
 594  624                  conn_cache = tcp_conn_cache;
 595  625                  break;
 596  626  
 597  627          case IPCL_UDPCONN:
 598  628                  conn_cache = udp_conn_cache;
 599  629                  break;
 600  630  
 601  631          case IPCL_RAWIPCONN:
 602  632                  conn_cache = rawip_conn_cache;
  
    | ↓ open down ↓ | 28 lines elided | ↑ open up ↑ | 
 603  633                  break;
 604  634  
 605  635          case IPCL_RTSCONN:
 606  636                  conn_cache = rts_conn_cache;
 607  637                  break;
 608  638  
 609  639          case IPCL_IPCCONN:
 610  640                  conn_cache = ip_conn_cache;
 611  641                  break;
 612  642  
      643 +        case IPCL_DCCPCONN:
      644 +                conn_cache = dccp_conn_cache;
      645 +                break;
      646 +
 613  647          default:
 614  648                  connp = NULL;
 615  649                  ASSERT(0);
 616  650          }
 617  651  
 618  652          if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
 619  653                  return (NULL);
 620  654  
 621  655          connp->conn_ref = 1;
 622  656          netstack_hold(ns);
 623  657          connp->conn_netstack = ns;
 624  658          connp->conn_ixa->ixa_ipst = ns->netstack_ip;
 625  659          connp->conn_ixa->ixa_conn_id = (long)connp;
 626  660          ipcl_globalhash_insert(connp);
 627  661          return (connp);
 628  662  }
 629  663  
 630  664  void
 631  665  ipcl_conn_destroy(conn_t *connp)
 632  666  {
 633  667          mblk_t  *mp;
 634  668          netstack_t      *ns = connp->conn_netstack;
 635  669  
 636  670          ASSERT(!MUTEX_HELD(&connp->conn_lock));
 637  671          ASSERT(connp->conn_ref == 0);
 638  672          ASSERT(connp->conn_ioctlref == 0);
 639  673  
 640  674          DTRACE_PROBE1(conn__destroy, conn_t *, connp);
 641  675  
 642  676          if (connp->conn_cred != NULL) {
 643  677                  crfree(connp->conn_cred);
 644  678                  connp->conn_cred = NULL;
 645  679                  /* ixa_cred done in ipcl_conn_cleanup below */
 646  680          }
 647  681  
 648  682          if (connp->conn_ht_iphc != NULL) {
 649  683                  kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
 650  684                  connp->conn_ht_iphc = NULL;
 651  685                  connp->conn_ht_iphc_allocated = 0;
 652  686                  connp->conn_ht_iphc_len = 0;
 653  687                  connp->conn_ht_ulp = NULL;
 654  688                  connp->conn_ht_ulp_len = 0;
 655  689          }
 656  690          ip_pkt_free(&connp->conn_xmit_ipp);
 657  691  
 658  692          ipcl_globalhash_remove(connp);
 659  693  
 660  694          if (connp->conn_latch != NULL) {
 661  695                  IPLATCH_REFRELE(connp->conn_latch);
 662  696                  connp->conn_latch = NULL;
 663  697          }
 664  698          if (connp->conn_latch_in_policy != NULL) {
 665  699                  IPPOL_REFRELE(connp->conn_latch_in_policy);
 666  700                  connp->conn_latch_in_policy = NULL;
 667  701          }
 668  702          if (connp->conn_latch_in_action != NULL) {
 669  703                  IPACT_REFRELE(connp->conn_latch_in_action);
 670  704                  connp->conn_latch_in_action = NULL;
 671  705          }
 672  706          if (connp->conn_policy != NULL) {
 673  707                  IPPH_REFRELE(connp->conn_policy, ns);
 674  708                  connp->conn_policy = NULL;
 675  709          }
 676  710  
 677  711          if (connp->conn_ipsec_opt_mp != NULL) {
 678  712                  freemsg(connp->conn_ipsec_opt_mp);
 679  713                  connp->conn_ipsec_opt_mp = NULL;
 680  714          }
 681  715  
 682  716          if (connp->conn_flags & IPCL_TCPCONN) {
 683  717                  tcp_t *tcp = connp->conn_tcp;
 684  718  
 685  719                  tcp_free(tcp);
 686  720                  mp = tcp->tcp_timercache;
 687  721  
 688  722                  tcp->tcp_tcps = NULL;
 689  723  
 690  724                  /*
 691  725                   * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
 692  726                   * the mblk.
 693  727                   */
 694  728                  if (tcp->tcp_rsrv_mp != NULL) {
 695  729                          freeb(tcp->tcp_rsrv_mp);
 696  730                          tcp->tcp_rsrv_mp = NULL;
 697  731                          mutex_destroy(&tcp->tcp_rsrv_mp_lock);
 698  732                  }
 699  733  
 700  734                  ipcl_conn_cleanup(connp);
 701  735                  connp->conn_flags = IPCL_TCPCONN;
 702  736                  if (ns != NULL) {
 703  737                          ASSERT(tcp->tcp_tcps == NULL);
 704  738                          connp->conn_netstack = NULL;
 705  739                          connp->conn_ixa->ixa_ipst = NULL;
 706  740                          netstack_rele(ns);
 707  741                  }
 708  742  
 709  743                  bzero(tcp, sizeof (tcp_t));
 710  744  
 711  745                  tcp->tcp_timercache = mp;
 712  746                  tcp->tcp_connp = connp;
  
    | ↓ open down ↓ | 90 lines elided | ↑ open up ↑ | 
 713  747                  kmem_cache_free(tcp_conn_cache, connp);
 714  748                  return;
 715  749          }
 716  750  
 717  751          if (connp->conn_flags & IPCL_SCTPCONN) {
 718  752                  ASSERT(ns != NULL);
 719  753                  sctp_free(connp);
 720  754                  return;
 721  755          }
 722  756  
      757 +        if (connp->conn_flags & IPCL_DCCPCONN) {
      758 +                dccp_t  *dccp = connp->conn_dccp;
      759 +
      760 +                cmn_err(CE_NOTE, "ipclassifier: conn_flags DCCP cache_free");
      761 +
      762 +                /* XXX:DCCP */
      763 +                /* Crash bug here: udp_conn_cache and dccp_conn_cache */
      764 +/*
      765 +                ipcl_conn_cleanup(connp);
      766 +                connp->conn_flags = IPCL_DCCPCONN;
      767 +                bzero(dccp, sizeof (dccp_t));
      768 +                dccp->dccp_connp = connp;
      769 +                kmem_cache_free(dccp_conn_cache, connp);
      770 +                return;
      771 +*/
      772 +        }
      773 +
 723  774          ipcl_conn_cleanup(connp);
 724  775          if (ns != NULL) {
 725  776                  connp->conn_netstack = NULL;
 726  777                  connp->conn_ixa->ixa_ipst = NULL;
 727  778                  netstack_rele(ns);
 728  779          }
 729  780  
 730  781          /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
 731  782          if (connp->conn_flags & IPCL_UDPCONN) {
 732  783                  connp->conn_flags = IPCL_UDPCONN;
 733  784                  kmem_cache_free(udp_conn_cache, connp);
 734  785          } else if (connp->conn_flags & IPCL_RAWIPCONN) {
 735  786                  connp->conn_flags = IPCL_RAWIPCONN;
 736  787                  connp->conn_proto = IPPROTO_ICMP;
 737  788                  connp->conn_ixa->ixa_protocol = connp->conn_proto;
 738  789                  kmem_cache_free(rawip_conn_cache, connp);
 739  790          } else if (connp->conn_flags & IPCL_RTSCONN) {
 740  791                  connp->conn_flags = IPCL_RTSCONN;
 741  792                  kmem_cache_free(rts_conn_cache, connp);
 742  793          } else {
 743  794                  connp->conn_flags = IPCL_IPCCONN;
 744  795                  ASSERT(connp->conn_flags & IPCL_IPCCONN);
 745  796                  ASSERT(connp->conn_priv == NULL);
 746  797                  kmem_cache_free(ip_conn_cache, connp);
 747  798          }
 748  799  }
 749  800  
 750  801  /*
 751  802   * Running in cluster mode - deregister listener information
 752  803   */
 753  804  static void
 754  805  ipcl_conn_unlisten(conn_t *connp)
 755  806  {
 756  807          ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
 757  808          ASSERT(connp->conn_lport != 0);
 758  809  
 759  810          if (cl_inet_unlisten != NULL) {
 760  811                  sa_family_t     addr_family;
 761  812                  uint8_t         *laddrp;
 762  813  
 763  814                  if (connp->conn_ipversion == IPV6_VERSION) {
 764  815                          addr_family = AF_INET6;
 765  816                          laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
 766  817                  } else {
 767  818                          addr_family = AF_INET;
 768  819                          laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
 769  820                  }
 770  821                  (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
 771  822                      IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
 772  823          }
 773  824          connp->conn_flags &= ~IPCL_CL_LISTENER;
 774  825  }
 775  826  
 776  827  /*
 777  828   * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
 778  829   * which table the conn belonged to). So for debugging we can see which hash
 779  830   * table this connection was in.
 780  831   */
 781  832  #define IPCL_HASH_REMOVE(connp) {                                       \
 782  833          connf_t *connfp = (connp)->conn_fanout;                         \
 783  834          ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));                     \
 784  835          if (connfp != NULL) {                                           \
 785  836                  mutex_enter(&connfp->connf_lock);                       \
 786  837                  if ((connp)->conn_next != NULL)                         \
 787  838                          (connp)->conn_next->conn_prev =                 \
 788  839                              (connp)->conn_prev;                         \
 789  840                  if ((connp)->conn_prev != NULL)                         \
 790  841                          (connp)->conn_prev->conn_next =                 \
 791  842                              (connp)->conn_next;                         \
 792  843                  else                                                    \
 793  844                          connfp->connf_head = (connp)->conn_next;        \
 794  845                  (connp)->conn_fanout = NULL;                            \
 795  846                  (connp)->conn_next = NULL;                              \
 796  847                  (connp)->conn_prev = NULL;                              \
 797  848                  (connp)->conn_flags |= IPCL_REMOVED;                    \
 798  849                  if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)      \
 799  850                          ipcl_conn_unlisten((connp));                    \
 800  851                  CONN_DEC_REF((connp));                                  \
 801  852                  mutex_exit(&connfp->connf_lock);                        \
 802  853          }                                                               \
 803  854  }
 804  855  
 805  856  void
 806  857  ipcl_hash_remove(conn_t *connp)
 807  858  {
 808  859          uint8_t         protocol = connp->conn_proto;
 809  860  
 810  861          IPCL_HASH_REMOVE(connp);
 811  862          if (protocol == IPPROTO_RSVP)
 812  863                  ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
 813  864  }
 814  865  
 815  866  /*
 816  867   * The whole purpose of this function is allow removal of
 817  868   * a conn_t from the connected hash for timewait reclaim.
 818  869   * This is essentially a TW reclaim fastpath where timewait
 819  870   * collector checks under fanout lock (so no one else can
 820  871   * get access to the conn_t) that refcnt is 2 i.e. one for
 821  872   * TCP and one for the classifier hash list. If ref count
 822  873   * is indeed 2, we can just remove the conn under lock and
 823  874   * avoid cleaning up the conn under squeue. This gives us
 824  875   * improved performance.
 825  876   */
 826  877  void
 827  878  ipcl_hash_remove_locked(conn_t *connp, connf_t  *connfp)
 828  879  {
 829  880          ASSERT(MUTEX_HELD(&connfp->connf_lock));
 830  881          ASSERT(MUTEX_HELD(&connp->conn_lock));
 831  882          ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
 832  883  
 833  884          if ((connp)->conn_next != NULL) {
 834  885                  (connp)->conn_next->conn_prev = (connp)->conn_prev;
 835  886          }
 836  887          if ((connp)->conn_prev != NULL) {
 837  888                  (connp)->conn_prev->conn_next = (connp)->conn_next;
 838  889          } else {
 839  890                  connfp->connf_head = (connp)->conn_next;
 840  891          }
 841  892          (connp)->conn_fanout = NULL;
 842  893          (connp)->conn_next = NULL;
 843  894          (connp)->conn_prev = NULL;
 844  895          (connp)->conn_flags |= IPCL_REMOVED;
 845  896          ASSERT((connp)->conn_ref == 2);
 846  897          (connp)->conn_ref--;
 847  898  }
 848  899  
 849  900  #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {              \
 850  901          ASSERT((connp)->conn_fanout == NULL);                           \
 851  902          ASSERT((connp)->conn_next == NULL);                             \
 852  903          ASSERT((connp)->conn_prev == NULL);                             \
 853  904          if ((connfp)->connf_head != NULL) {                             \
 854  905                  (connfp)->connf_head->conn_prev = (connp);              \
 855  906                  (connp)->conn_next = (connfp)->connf_head;              \
 856  907          }                                                               \
 857  908          (connp)->conn_fanout = (connfp);                                \
 858  909          (connfp)->connf_head = (connp);                                 \
 859  910          (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |   \
 860  911              IPCL_CONNECTED;                                             \
 861  912          CONN_INC_REF(connp);                                            \
 862  913  }
 863  914  
 864  915  #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) {                     \
 865  916          IPCL_HASH_REMOVE((connp));                                      \
 866  917          mutex_enter(&(connfp)->connf_lock);                             \
 867  918          IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);               \
 868  919          mutex_exit(&(connfp)->connf_lock);                              \
 869  920  }
 870  921  
 871  922  #define IPCL_HASH_INSERT_BOUND(connfp, connp) {                         \
 872  923          conn_t *pconnp = NULL, *nconnp;                                 \
 873  924          IPCL_HASH_REMOVE((connp));                                      \
 874  925          mutex_enter(&(connfp)->connf_lock);                             \
 875  926          nconnp = (connfp)->connf_head;                                  \
 876  927          while (nconnp != NULL &&                                        \
 877  928              !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) {               \
 878  929                  pconnp = nconnp;                                        \
 879  930                  nconnp = nconnp->conn_next;                             \
 880  931          }                                                               \
 881  932          if (pconnp != NULL) {                                           \
 882  933                  pconnp->conn_next = (connp);                            \
 883  934                  (connp)->conn_prev = pconnp;                            \
 884  935          } else {                                                        \
 885  936                  (connfp)->connf_head = (connp);                         \
 886  937          }                                                               \
 887  938          if (nconnp != NULL) {                                           \
 888  939                  (connp)->conn_next = nconnp;                            \
 889  940                  nconnp->conn_prev = (connp);                            \
 890  941          }                                                               \
 891  942          (connp)->conn_fanout = (connfp);                                \
 892  943          (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |   \
 893  944              IPCL_BOUND;                                                 \
 894  945          CONN_INC_REF(connp);                                            \
 895  946          mutex_exit(&(connfp)->connf_lock);                              \
 896  947  }
 897  948  
 898  949  #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) {                      \
 899  950          conn_t **list, *prev, *next;                                    \
 900  951          boolean_t isv4mapped =                                          \
 901  952              IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6);              \
 902  953          IPCL_HASH_REMOVE((connp));                                      \
 903  954          mutex_enter(&(connfp)->connf_lock);                             \
 904  955          list = &(connfp)->connf_head;                                   \
 905  956          prev = NULL;                                                    \
 906  957          while ((next = *list) != NULL) {                                \
 907  958                  if (isv4mapped &&                                       \
 908  959                      IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) &&    \
 909  960                      connp->conn_zoneid == next->conn_zoneid) {          \
 910  961                          (connp)->conn_next = next;                      \
 911  962                          if (prev != NULL)                               \
 912  963                                  prev = next->conn_prev;                 \
 913  964                          next->conn_prev = (connp);                      \
 914  965                          break;                                          \
 915  966                  }                                                       \
 916  967                  list = &next->conn_next;                                \
 917  968                  prev = next;                                            \
 918  969          }                                                               \
 919  970          (connp)->conn_prev = prev;                                      \
 920  971          *list = (connp);                                                \
 921  972          (connp)->conn_fanout = (connfp);                                \
 922  973          (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |   \
 923  974              IPCL_BOUND;                                                 \
 924  975          CONN_INC_REF((connp));                                          \
 925  976          mutex_exit(&(connfp)->connf_lock);                              \
 926  977  }
 927  978  
 928  979  void
 929  980  ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
 930  981  {
 931  982          IPCL_HASH_INSERT_WILDCARD(connfp, connp);
 932  983  }
 933  984  
 934  985  /*
 935  986   * Because the classifier is used to classify inbound packets, the destination
 936  987   * address is meant to be our local tunnel address (tunnel source), and the
 937  988   * source the remote tunnel address (tunnel destination).
 938  989   *
 939  990   * Note that conn_proto can't be used for fanout since the upper protocol
 940  991   * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
 941  992   */
 942  993  conn_t *
 943  994  ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
 944  995  {
 945  996          connf_t *connfp;
 946  997          conn_t  *connp;
 947  998  
 948  999          /* first look for IPv4 tunnel links */
 949 1000          connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
 950 1001          mutex_enter(&connfp->connf_lock);
 951 1002          for (connp = connfp->connf_head; connp != NULL;
 952 1003              connp = connp->conn_next) {
 953 1004                  if (IPCL_IPTUN_MATCH(connp, *dst, *src))
 954 1005                          break;
 955 1006          }
 956 1007          if (connp != NULL)
 957 1008                  goto done;
 958 1009  
 959 1010          mutex_exit(&connfp->connf_lock);
 960 1011  
 961 1012          /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
 962 1013          connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
 963 1014              INADDR_ANY)];
 964 1015          mutex_enter(&connfp->connf_lock);
 965 1016          for (connp = connfp->connf_head; connp != NULL;
 966 1017              connp = connp->conn_next) {
 967 1018                  if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
 968 1019                          break;
 969 1020          }
 970 1021  done:
 971 1022          if (connp != NULL)
 972 1023                  CONN_INC_REF(connp);
 973 1024          mutex_exit(&connfp->connf_lock);
 974 1025          return (connp);
 975 1026  }
 976 1027  
 977 1028  conn_t *
 978 1029  ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
 979 1030  {
 980 1031          connf_t *connfp;
 981 1032          conn_t  *connp;
 982 1033  
 983 1034          /* Look for an IPv6 tunnel link */
 984 1035          connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
 985 1036          mutex_enter(&connfp->connf_lock);
 986 1037          for (connp = connfp->connf_head; connp != NULL;
 987 1038              connp = connp->conn_next) {
 988 1039                  if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
 989 1040                          CONN_INC_REF(connp);
 990 1041                          break;
 991 1042                  }
 992 1043          }
 993 1044          mutex_exit(&connfp->connf_lock);
 994 1045          return (connp);
 995 1046  }
 996 1047  
 997 1048  /*
 998 1049   * This function is used only for inserting SCTP raw socket now.
 999 1050   * This may change later.
1000 1051   *
1001 1052   * Note that only one raw socket can be bound to a port.  The param
1002 1053   * lport is in network byte order.
1003 1054   */
1004 1055  static int
1005 1056  ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
1006 1057  {
1007 1058          connf_t *connfp;
1008 1059          conn_t  *oconnp;
1009 1060          ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1010 1061  
1011 1062          connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1012 1063  
1013 1064          /* Check for existing raw socket already bound to the port. */
1014 1065          mutex_enter(&connfp->connf_lock);
1015 1066          for (oconnp = connfp->connf_head; oconnp != NULL;
1016 1067              oconnp = oconnp->conn_next) {
1017 1068                  if (oconnp->conn_lport == lport &&
1018 1069                      oconnp->conn_zoneid == connp->conn_zoneid &&
1019 1070                      oconnp->conn_family == connp->conn_family &&
1020 1071                      ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1021 1072                      IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
1022 1073                      IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
1023 1074                      IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
1024 1075                      IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
1025 1076                      &connp->conn_laddr_v6))) {
1026 1077                          break;
1027 1078                  }
1028 1079          }
1029 1080          mutex_exit(&connfp->connf_lock);
1030 1081          if (oconnp != NULL)
1031 1082                  return (EADDRNOTAVAIL);
1032 1083  
1033 1084          if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
1034 1085              IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1035 1086                  if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1036 1087                      IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
1037 1088                          IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1038 1089                  } else {
1039 1090                          IPCL_HASH_INSERT_BOUND(connfp, connp);
1040 1091                  }
1041 1092          } else {
1042 1093                  IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1043 1094          }
1044 1095          return (0);
1045 1096  }
1046 1097  
1047 1098  static int
1048 1099  ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
1049 1100  {
1050 1101          connf_t *connfp;
1051 1102          conn_t  *tconnp;
1052 1103          ipaddr_t laddr = connp->conn_laddr_v4;
1053 1104          ipaddr_t faddr = connp->conn_faddr_v4;
1054 1105  
1055 1106          connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
1056 1107          mutex_enter(&connfp->connf_lock);
1057 1108          for (tconnp = connfp->connf_head; tconnp != NULL;
1058 1109              tconnp = tconnp->conn_next) {
1059 1110                  if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
1060 1111                          /* A tunnel is already bound to these addresses. */
1061 1112                          mutex_exit(&connfp->connf_lock);
1062 1113                          return (EADDRINUSE);
1063 1114                  }
1064 1115          }
1065 1116          IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1066 1117          mutex_exit(&connfp->connf_lock);
1067 1118          return (0);
1068 1119  }
1069 1120  
1070 1121  static int
1071 1122  ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
1072 1123  {
1073 1124          connf_t *connfp;
1074 1125          conn_t  *tconnp;
1075 1126          in6_addr_t *laddr = &connp->conn_laddr_v6;
1076 1127          in6_addr_t *faddr = &connp->conn_faddr_v6;
1077 1128  
1078 1129          connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
1079 1130          mutex_enter(&connfp->connf_lock);
1080 1131          for (tconnp = connfp->connf_head; tconnp != NULL;
1081 1132              tconnp = tconnp->conn_next) {
1082 1133                  if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
1083 1134                          /* A tunnel is already bound to these addresses. */
1084 1135                          mutex_exit(&connfp->connf_lock);
1085 1136                          return (EADDRINUSE);
1086 1137                  }
1087 1138          }
1088 1139          IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1089 1140          mutex_exit(&connfp->connf_lock);
1090 1141          return (0);
1091 1142  }
1092 1143  
1093 1144  /*
1094 1145   * Check for a MAC exemption conflict on a labeled system.  Note that for
1095 1146   * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
1096 1147   * transport layer.  This check is for binding all other protocols.
1097 1148   *
1098 1149   * Returns true if there's a conflict.
1099 1150   */
1100 1151  static boolean_t
1101 1152  check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
1102 1153  {
1103 1154          connf_t *connfp;
1104 1155          conn_t *tconn;
1105 1156  
1106 1157          connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
1107 1158          mutex_enter(&connfp->connf_lock);
1108 1159          for (tconn = connfp->connf_head; tconn != NULL;
1109 1160              tconn = tconn->conn_next) {
1110 1161                  /* We don't allow v4 fallback for v6 raw socket */
1111 1162                  if (connp->conn_family != tconn->conn_family)
1112 1163                          continue;
1113 1164                  /* If neither is exempt, then there's no conflict */
1114 1165                  if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1115 1166                      (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1116 1167                          continue;
1117 1168                  /* We are only concerned about sockets for a different zone */
1118 1169                  if (connp->conn_zoneid == tconn->conn_zoneid)
1119 1170                          continue;
1120 1171                  /* If both are bound to different specific addrs, ok */
1121 1172                  if (connp->conn_laddr_v4 != INADDR_ANY &&
1122 1173                      tconn->conn_laddr_v4 != INADDR_ANY &&
1123 1174                      connp->conn_laddr_v4 != tconn->conn_laddr_v4)
1124 1175                          continue;
1125 1176                  /* These two conflict; fail */
1126 1177                  break;
1127 1178          }
1128 1179          mutex_exit(&connfp->connf_lock);
1129 1180          return (tconn != NULL);
1130 1181  }
1131 1182  
1132 1183  static boolean_t
1133 1184  check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
1134 1185  {
1135 1186          connf_t *connfp;
1136 1187          conn_t *tconn;
1137 1188  
1138 1189          connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
1139 1190          mutex_enter(&connfp->connf_lock);
1140 1191          for (tconn = connfp->connf_head; tconn != NULL;
1141 1192              tconn = tconn->conn_next) {
1142 1193                  /* We don't allow v4 fallback for v6 raw socket */
1143 1194                  if (connp->conn_family != tconn->conn_family)
1144 1195                          continue;
1145 1196                  /* If neither is exempt, then there's no conflict */
1146 1197                  if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
1147 1198                      (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
1148 1199                          continue;
1149 1200                  /* We are only concerned about sockets for a different zone */
1150 1201                  if (connp->conn_zoneid == tconn->conn_zoneid)
1151 1202                          continue;
1152 1203                  /* If both are bound to different addrs, ok */
1153 1204                  if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
1154 1205                      !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
1155 1206                      !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
1156 1207                      &tconn->conn_laddr_v6))
1157 1208                          continue;
1158 1209                  /* These two conflict; fail */
1159 1210                  break;
1160 1211          }
1161 1212          mutex_exit(&connfp->connf_lock);
1162 1213          return (tconn != NULL);
1163 1214  }
1164 1215  
1165 1216  /*
1166 1217   * (v4, v6) bind hash insertion routines
1167 1218   * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
1168 1219   */
1169 1220  
1170 1221  int
1171 1222  ipcl_bind_insert(conn_t *connp)
1172 1223  {
1173 1224          if (connp->conn_ipversion == IPV6_VERSION)
1174 1225                  return (ipcl_bind_insert_v6(connp));
1175 1226          else
1176 1227                  return (ipcl_bind_insert_v4(connp));
1177 1228  }
1178 1229  
1179 1230  int
1180 1231  ipcl_bind_insert_v4(conn_t *connp)
1181 1232  {
1182 1233          connf_t *connfp;
1183 1234          int     ret = 0;
1184 1235          ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1185 1236          uint16_t        lport = connp->conn_lport;
1186 1237          uint8_t         protocol = connp->conn_proto;
1187 1238  
1188 1239          if (IPCL_IS_IPTUN(connp))
1189 1240                  return (ipcl_iptun_hash_insert(connp, ipst));
1190 1241  
1191 1242          switch (protocol) {
1192 1243          default:
1193 1244                  if (is_system_labeled() &&
1194 1245                      check_exempt_conflict_v4(connp, ipst))
1195 1246                          return (EADDRINUSE);
1196 1247                  /* FALLTHROUGH */
1197 1248          case IPPROTO_UDP:
1198 1249                  if (protocol == IPPROTO_UDP) {
1199 1250                          connfp = &ipst->ips_ipcl_udp_fanout[
1200 1251                              IPCL_UDP_HASH(lport, ipst)];
1201 1252                  } else {
1202 1253                          connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1203 1254                  }
1204 1255  
1205 1256                  if (connp->conn_faddr_v4 != INADDR_ANY) {
1206 1257                          IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1207 1258                  } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1208 1259                          IPCL_HASH_INSERT_BOUND(connfp, connp);
1209 1260                  } else {
1210 1261                          IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1211 1262                  }
1212 1263                  if (protocol == IPPROTO_RSVP)
1213 1264                          ill_set_inputfn_all(ipst);
1214 1265                  break;
1215 1266  
1216 1267          case IPPROTO_TCP:
1217 1268                  /* Insert it in the Bind Hash */
1218 1269                  ASSERT(connp->conn_zoneid != ALL_ZONES);
1219 1270                  connfp = &ipst->ips_ipcl_bind_fanout[
1220 1271                      IPCL_BIND_HASH(lport, ipst)];
1221 1272                  if (connp->conn_laddr_v4 != INADDR_ANY) {
1222 1273                          IPCL_HASH_INSERT_BOUND(connfp, connp);
1223 1274                  } else {
1224 1275                          IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1225 1276                  }
1226 1277                  if (cl_inet_listen != NULL) {
1227 1278                          ASSERT(connp->conn_ipversion == IPV4_VERSION);
1228 1279                          connp->conn_flags |= IPCL_CL_LISTENER;
  
    | ↓ open down ↓ | 496 lines elided | ↑ open up ↑ | 
1229 1280                          (*cl_inet_listen)(
1230 1281                              connp->conn_netstack->netstack_stackid,
1231 1282                              IPPROTO_TCP, AF_INET,
1232 1283                              (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
1233 1284                  }
1234 1285                  break;
1235 1286  
1236 1287          case IPPROTO_SCTP:
1237 1288                  ret = ipcl_sctp_hash_insert(connp, lport);
1238 1289                  break;
     1290 +
     1291 +        case IPPROTO_DCCP:
     1292 +                cmn_err(CE_NOTE, "ipcl_bind_insert_v4");
     1293 +                ASSERT(connp->conn_zoneid != ALL_ZONES);
     1294 +                connfp = &ipst->ips_ipcl_dccp_fanout[
     1295 +                    IPCL_DCCP_HASH(lport, ipst)];
     1296 +                if (connp->conn_laddr_v4 != INADDR_ANY) {
     1297 +                        IPCL_HASH_INSERT_BOUND(connfp, connp);
     1298 +                } else {
     1299 +                        IPCL_HASH_INSERT_WILDCARD(connfp, connp);
     1300 +                }
     1301 +                /* XXX:DCCP */
     1302 +                break;
1239 1303          }
1240 1304  
     1305 +
1241 1306          return (ret);
1242 1307  }
1243 1308  
1244 1309  int
1245 1310  ipcl_bind_insert_v6(conn_t *connp)
1246 1311  {
1247 1312          connf_t         *connfp;
1248 1313          int             ret = 0;
1249 1314          ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1250 1315          uint16_t        lport = connp->conn_lport;
1251 1316          uint8_t         protocol = connp->conn_proto;
1252 1317  
1253 1318          if (IPCL_IS_IPTUN(connp)) {
1254 1319                  return (ipcl_iptun_hash_insert_v6(connp, ipst));
1255 1320          }
1256 1321  
1257 1322          switch (protocol) {
1258 1323          default:
1259 1324                  if (is_system_labeled() &&
1260 1325                      check_exempt_conflict_v6(connp, ipst))
1261 1326                          return (EADDRINUSE);
1262 1327                  /* FALLTHROUGH */
1263 1328          case IPPROTO_UDP:
1264 1329                  if (protocol == IPPROTO_UDP) {
1265 1330                          connfp = &ipst->ips_ipcl_udp_fanout[
1266 1331                              IPCL_UDP_HASH(lport, ipst)];
1267 1332                  } else {
1268 1333                          connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1269 1334                  }
1270 1335  
1271 1336                  if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1272 1337                          IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1273 1338                  } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1274 1339                          IPCL_HASH_INSERT_BOUND(connfp, connp);
1275 1340                  } else {
1276 1341                          IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1277 1342                  }
1278 1343                  break;
1279 1344  
1280 1345          case IPPROTO_TCP:
1281 1346                  /* Insert it in the Bind Hash */
1282 1347                  ASSERT(connp->conn_zoneid != ALL_ZONES);
1283 1348                  connfp = &ipst->ips_ipcl_bind_fanout[
1284 1349                      IPCL_BIND_HASH(lport, ipst)];
1285 1350                  if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1286 1351                          IPCL_HASH_INSERT_BOUND(connfp, connp);
1287 1352                  } else {
1288 1353                          IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1289 1354                  }
1290 1355                  if (cl_inet_listen != NULL) {
1291 1356                          sa_family_t     addr_family;
1292 1357                          uint8_t         *laddrp;
1293 1358  
1294 1359                          if (connp->conn_ipversion == IPV6_VERSION) {
1295 1360                                  addr_family = AF_INET6;
1296 1361                                  laddrp =
1297 1362                                      (uint8_t *)&connp->conn_bound_addr_v6;
1298 1363                          } else {
1299 1364                                  addr_family = AF_INET;
1300 1365                                  laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
1301 1366                          }
  
    | ↓ open down ↓ | 51 lines elided | ↑ open up ↑ | 
1302 1367                          connp->conn_flags |= IPCL_CL_LISTENER;
1303 1368                          (*cl_inet_listen)(
1304 1369                              connp->conn_netstack->netstack_stackid,
1305 1370                              IPPROTO_TCP, addr_family, laddrp, lport, NULL);
1306 1371                  }
1307 1372                  break;
1308 1373  
1309 1374          case IPPROTO_SCTP:
1310 1375                  ret = ipcl_sctp_hash_insert(connp, lport);
1311 1376                  break;
     1377 +
     1378 +        case IPPROTO_DCCP:
     1379 +                /* XXX:DCCP */
     1380 +                break;
1312 1381          }
1313 1382  
1314 1383          return (ret);
1315 1384  }
1316 1385  
1317 1386  /*
1318 1387   * ipcl_conn_hash insertion routines.
1319 1388   * The caller has already set conn_proto and the addresses/ports in the conn_t.
1320 1389   */
1321 1390  
1322 1391  int
1323 1392  ipcl_conn_insert(conn_t *connp)
1324 1393  {
1325 1394          if (connp->conn_ipversion == IPV6_VERSION)
1326 1395                  return (ipcl_conn_insert_v6(connp));
1327 1396          else
1328 1397                  return (ipcl_conn_insert_v4(connp));
1329 1398  }
1330 1399  
1331 1400  int
1332 1401  ipcl_conn_insert_v4(conn_t *connp)
1333 1402  {
1334 1403          connf_t         *connfp;
1335 1404          conn_t          *tconnp;
1336 1405          int             ret = 0;
1337 1406          ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1338 1407          uint16_t        lport = connp->conn_lport;
1339 1408          uint8_t         protocol = connp->conn_proto;
1340 1409  
1341 1410          if (IPCL_IS_IPTUN(connp))
1342 1411                  return (ipcl_iptun_hash_insert(connp, ipst));
1343 1412  
1344 1413          switch (protocol) {
1345 1414          case IPPROTO_TCP:
1346 1415                  /*
1347 1416                   * For TCP, we check whether the connection tuple already
1348 1417                   * exists before allowing the connection to proceed.  We
1349 1418                   * also allow indexing on the zoneid. This is to allow
1350 1419                   * multiple shared stack zones to have the same tcp
1351 1420                   * connection tuple. In practice this only happens for
1352 1421                   * INADDR_LOOPBACK as it's the only local address which
1353 1422                   * doesn't have to be unique.
1354 1423                   */
1355 1424                  connfp = &ipst->ips_ipcl_conn_fanout[
1356 1425                      IPCL_CONN_HASH(connp->conn_faddr_v4,
1357 1426                      connp->conn_ports, ipst)];
1358 1427                  mutex_enter(&connfp->connf_lock);
1359 1428                  for (tconnp = connfp->connf_head; tconnp != NULL;
1360 1429                      tconnp = tconnp->conn_next) {
1361 1430                          if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
1362 1431                              connp->conn_faddr_v4, connp->conn_laddr_v4,
1363 1432                              connp->conn_ports) &&
1364 1433                              IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1365 1434                                  /* Already have a conn. bail out */
1366 1435                                  mutex_exit(&connfp->connf_lock);
1367 1436                                  return (EADDRINUSE);
1368 1437                          }
1369 1438                  }
1370 1439                  if (connp->conn_fanout != NULL) {
1371 1440                          /*
1372 1441                           * Probably a XTI/TLI application trying to do a
1373 1442                           * rebind. Let it happen.
1374 1443                           */
1375 1444                          mutex_exit(&connfp->connf_lock);
1376 1445                          IPCL_HASH_REMOVE(connp);
1377 1446                          mutex_enter(&connfp->connf_lock);
1378 1447                  }
1379 1448  
1380 1449                  ASSERT(connp->conn_recv != NULL);
1381 1450                  ASSERT(connp->conn_recvicmp != NULL);
1382 1451  
1383 1452                  IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1384 1453                  mutex_exit(&connfp->connf_lock);
1385 1454                  break;
  
    | ↓ open down ↓ | 64 lines elided | ↑ open up ↑ | 
1386 1455  
1387 1456          case IPPROTO_SCTP:
1388 1457                  /*
1389 1458                   * The raw socket may have already been bound, remove it
1390 1459                   * from the hash first.
1391 1460                   */
1392 1461                  IPCL_HASH_REMOVE(connp);
1393 1462                  ret = ipcl_sctp_hash_insert(connp, lport);
1394 1463                  break;
1395 1464  
     1465 +        case IPPROTO_DCCP:
     1466 +                cmn_err(CE_NOTE, "insert v4");
     1467 +
     1468 +                connfp = &ipst->ips_ipcl_conn_fanout[
     1469 +                    IPCL_CONN_HASH(connp->conn_faddr_v4,
     1470 +                    connp->conn_ports, ipst)];
     1471 +                mutex_enter(&connfp->connf_lock);
     1472 +                IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
     1473 +                mutex_exit(&connfp->connf_lock);
     1474 +                /* XXX:DCCP */
     1475 +                break;
     1476 +
1396 1477          default:
1397 1478                  /*
1398 1479                   * Check for conflicts among MAC exempt bindings.  For
1399 1480                   * transports with port numbers, this is done by the upper
1400 1481                   * level per-transport binding logic.  For all others, it's
1401 1482                   * done here.
1402 1483                   */
1403 1484                  if (is_system_labeled() &&
1404 1485                      check_exempt_conflict_v4(connp, ipst))
1405 1486                          return (EADDRINUSE);
1406 1487                  /* FALLTHROUGH */
1407 1488  
1408 1489          case IPPROTO_UDP:
1409 1490                  if (protocol == IPPROTO_UDP) {
1410 1491                          connfp = &ipst->ips_ipcl_udp_fanout[
1411 1492                              IPCL_UDP_HASH(lport, ipst)];
1412 1493                  } else {
1413 1494                          connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
1414 1495                  }
1415 1496  
1416 1497                  if (connp->conn_faddr_v4 != INADDR_ANY) {
1417 1498                          IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1418 1499                  } else if (connp->conn_laddr_v4 != INADDR_ANY) {
1419 1500                          IPCL_HASH_INSERT_BOUND(connfp, connp);
1420 1501                  } else {
1421 1502                          IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1422 1503                  }
1423 1504                  break;
1424 1505          }
1425 1506  
1426 1507          return (ret);
1427 1508  }
1428 1509  
1429 1510  int
1430 1511  ipcl_conn_insert_v6(conn_t *connp)
1431 1512  {
1432 1513          connf_t         *connfp;
1433 1514          conn_t          *tconnp;
1434 1515          int             ret = 0;
1435 1516          ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
1436 1517          uint16_t        lport = connp->conn_lport;
1437 1518          uint8_t         protocol = connp->conn_proto;
1438 1519          uint_t          ifindex = connp->conn_bound_if;
1439 1520  
1440 1521          if (IPCL_IS_IPTUN(connp))
1441 1522                  return (ipcl_iptun_hash_insert_v6(connp, ipst));
1442 1523  
1443 1524          switch (protocol) {
1444 1525          case IPPROTO_TCP:
1445 1526  
1446 1527                  /*
1447 1528                   * For tcp, we check whether the connection tuple already
1448 1529                   * exists before allowing the connection to proceed.  We
1449 1530                   * also allow indexing on the zoneid. This is to allow
1450 1531                   * multiple shared stack zones to have the same tcp
1451 1532                   * connection tuple. In practice this only happens for
1452 1533                   * ipv6_loopback as it's the only local address which
1453 1534                   * doesn't have to be unique.
1454 1535                   */
1455 1536                  connfp = &ipst->ips_ipcl_conn_fanout[
1456 1537                      IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
1457 1538                      ipst)];
1458 1539                  mutex_enter(&connfp->connf_lock);
1459 1540                  for (tconnp = connfp->connf_head; tconnp != NULL;
1460 1541                      tconnp = tconnp->conn_next) {
1461 1542                          /* NOTE: need to match zoneid. Bug in onnv-gate */
1462 1543                          if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
1463 1544                              connp->conn_faddr_v6, connp->conn_laddr_v6,
1464 1545                              connp->conn_ports) &&
1465 1546                              (tconnp->conn_bound_if == 0 ||
1466 1547                              tconnp->conn_bound_if == ifindex) &&
1467 1548                              IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
1468 1549                                  /* Already have a conn. bail out */
1469 1550                                  mutex_exit(&connfp->connf_lock);
1470 1551                                  return (EADDRINUSE);
1471 1552                          }
1472 1553                  }
1473 1554                  if (connp->conn_fanout != NULL) {
1474 1555                          /*
1475 1556                           * Probably a XTI/TLI application trying to do a
1476 1557                           * rebind. Let it happen.
1477 1558                           */
1478 1559                          mutex_exit(&connfp->connf_lock);
1479 1560                          IPCL_HASH_REMOVE(connp);
1480 1561                          mutex_enter(&connfp->connf_lock);
  
    | ↓ open down ↓ | 75 lines elided | ↑ open up ↑ | 
1481 1562                  }
1482 1563                  IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
1483 1564                  mutex_exit(&connfp->connf_lock);
1484 1565                  break;
1485 1566  
1486 1567          case IPPROTO_SCTP:
1487 1568                  IPCL_HASH_REMOVE(connp);
1488 1569                  ret = ipcl_sctp_hash_insert(connp, lport);
1489 1570                  break;
1490 1571  
     1572 +        case IPPROTO_DCCP:
     1573 +                /* XXX:DCCP */
     1574 +                break;
     1575 +
1491 1576          default:
1492 1577                  if (is_system_labeled() &&
1493 1578                      check_exempt_conflict_v6(connp, ipst))
1494 1579                          return (EADDRINUSE);
1495 1580                  /* FALLTHROUGH */
1496 1581          case IPPROTO_UDP:
1497 1582                  if (protocol == IPPROTO_UDP) {
1498 1583                          connfp = &ipst->ips_ipcl_udp_fanout[
1499 1584                              IPCL_UDP_HASH(lport, ipst)];
1500 1585                  } else {
1501 1586                          connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
1502 1587                  }
1503 1588  
1504 1589                  if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
1505 1590                          IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1506 1591                  } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
1507 1592                          IPCL_HASH_INSERT_BOUND(connfp, connp);
1508 1593                  } else {
1509 1594                          IPCL_HASH_INSERT_WILDCARD(connfp, connp);
1510 1595                  }
1511 1596                  break;
1512 1597          }
1513 1598  
1514 1599          return (ret);
1515 1600  }
1516 1601  
1517 1602  /*
1518 1603   * v4 packet classifying function. looks up the fanout table to
1519 1604   * find the conn, the packet belongs to. returns the conn with
1520 1605   * the reference held, null otherwise.
1521 1606   *
1522 1607   * If zoneid is ALL_ZONES, then the search rules described in the "Connection
1523 1608   * Lookup" comment block are applied.  Labels are also checked as described
1524 1609   * above.  If the packet is from the inside (looped back), and is from the same
1525 1610   * zone, then label checks are omitted.
1526 1611   */
1527 1612  conn_t *
1528 1613  ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1529 1614      ip_recv_attr_t *ira, ip_stack_t *ipst)
1530 1615  {
1531 1616          ipha_t  *ipha;
1532 1617          connf_t *connfp, *bind_connfp;
1533 1618          uint16_t lport;
1534 1619          uint16_t fport;
1535 1620          uint32_t ports;
1536 1621          conn_t  *connp;
1537 1622          uint16_t  *up;
1538 1623          zoneid_t        zoneid = ira->ira_zoneid;
1539 1624  
1540 1625          ipha = (ipha_t *)mp->b_rptr;
1541 1626          up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
1542 1627  
1543 1628          switch (protocol) {
1544 1629          case IPPROTO_TCP:
1545 1630                  ports = *(uint32_t *)up;
1546 1631                  connfp =
1547 1632                      &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1548 1633                      ports, ipst)];
1549 1634                  mutex_enter(&connfp->connf_lock);
1550 1635                  for (connp = connfp->connf_head; connp != NULL;
1551 1636                      connp = connp->conn_next) {
1552 1637                          if (IPCL_CONN_MATCH(connp, protocol,
1553 1638                              ipha->ipha_src, ipha->ipha_dst, ports) &&
1554 1639                              (connp->conn_zoneid == zoneid ||
1555 1640                              connp->conn_allzones ||
1556 1641                              ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1557 1642                              (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1558 1643                              (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1559 1644                                  break;
1560 1645                  }
1561 1646  
1562 1647                  if (connp != NULL) {
1563 1648                          /*
1564 1649                           * We have a fully-bound TCP connection.
1565 1650                           *
1566 1651                           * For labeled systems, there's no need to check the
1567 1652                           * label here.  It's known to be good as we checked
1568 1653                           * before allowing the connection to become bound.
1569 1654                           */
1570 1655                          CONN_INC_REF(connp);
1571 1656                          mutex_exit(&connfp->connf_lock);
1572 1657                          return (connp);
1573 1658                  }
1574 1659  
1575 1660                  mutex_exit(&connfp->connf_lock);
1576 1661                  lport = up[1];
1577 1662                  bind_connfp =
1578 1663                      &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1579 1664                  mutex_enter(&bind_connfp->connf_lock);
1580 1665                  for (connp = bind_connfp->connf_head; connp != NULL;
1581 1666                      connp = connp->conn_next) {
1582 1667                          if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1583 1668                              lport) &&
1584 1669                              (connp->conn_zoneid == zoneid ||
1585 1670                              connp->conn_allzones ||
1586 1671                              ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1587 1672                              (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1588 1673                              (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1589 1674                                  break;
1590 1675                  }
1591 1676  
1592 1677                  /*
1593 1678                   * If the matching connection is SLP on a private address, then
1594 1679                   * the label on the packet must match the local zone's label.
1595 1680                   * Otherwise, it must be in the label range defined by tnrh.
1596 1681                   * This is ensured by tsol_receive_local.
1597 1682                   *
1598 1683                   * Note that we don't check tsol_receive_local for
1599 1684                   * the connected case.
1600 1685                   */
1601 1686                  if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1602 1687                      !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1603 1688                      ira, connp)) {
1604 1689                          DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
1605 1690                              char *, "connp(1) could not receive mp(2)",
1606 1691                              conn_t *, connp, mblk_t *, mp);
1607 1692                          connp = NULL;
1608 1693                  }
1609 1694  
1610 1695                  if (connp != NULL) {
1611 1696                          /* Have a listener at least */
1612 1697                          CONN_INC_REF(connp);
1613 1698                          mutex_exit(&bind_connfp->connf_lock);
1614 1699                          return (connp);
1615 1700                  }
1616 1701  
1617 1702                  mutex_exit(&bind_connfp->connf_lock);
1618 1703                  break;
1619 1704  
1620 1705          case IPPROTO_UDP:
1621 1706                  lport = up[1];
1622 1707                  fport = up[0];
1623 1708                  connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1624 1709                  mutex_enter(&connfp->connf_lock);
1625 1710                  for (connp = connfp->connf_head; connp != NULL;
1626 1711                      connp = connp->conn_next) {
1627 1712                          if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
1628 1713                              fport, ipha->ipha_src) &&
1629 1714                              (connp->conn_zoneid == zoneid ||
1630 1715                              connp->conn_allzones ||
1631 1716                              ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1632 1717                              (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
1633 1718                                  break;
1634 1719                  }
1635 1720  
1636 1721                  if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1637 1722                      !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1638 1723                      ira, connp)) {
1639 1724                          DTRACE_PROBE3(tx__ip__log__info__classify__udp,
1640 1725                              char *, "connp(1) could not receive mp(2)",
1641 1726                              conn_t *, connp, mblk_t *, mp);
1642 1727                          connp = NULL;
1643 1728                  }
1644 1729  
1645 1730                  if (connp != NULL) {
1646 1731                          CONN_INC_REF(connp);
1647 1732                          mutex_exit(&connfp->connf_lock);
  
    | ↓ open down ↓ | 147 lines elided | ↑ open up ↑ | 
1648 1733                          return (connp);
1649 1734                  }
1650 1735  
1651 1736                  /*
1652 1737                   * We shouldn't come here for multicast/broadcast packets
1653 1738                   */
1654 1739                  mutex_exit(&connfp->connf_lock);
1655 1740  
1656 1741                  break;
1657 1742  
     1743 +        case IPPROTO_DCCP:
     1744 +                fport = up[0];
     1745 +                lport = up[1];
     1746 +                connfp = &ipst->ips_ipcl_dccp_fanout[IPCL_DCCP_HASH(
     1747 +                    lport, ipst)];
     1748 +                mutex_enter(&connfp->connf_lock);
     1749 +                for (connp = connfp->connf_head; connp != NULL;
     1750 +                    connp = connp->conn_next) {
     1751 +                        cmn_err(CE_NOTE, "connfp found");
     1752 +                        /* XXX:DCCP */
     1753 +                        if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
     1754 +                            fport, ipha->ipha_src)) {
     1755 +                                break;
     1756 +                        }
     1757 +                }
     1758 +
     1759 +                if (connp != NULL) {
     1760 +                        CONN_INC_REF(connp);
     1761 +                        mutex_exit(&connfp->connf_lock);
     1762 +                        return (connp);
     1763 +                }
     1764 +
     1765 +                mutex_exit(&connfp->connf_lock);
     1766 +                break;
     1767 +
1658 1768          case IPPROTO_ENCAP:
1659 1769          case IPPROTO_IPV6:
1660 1770                  return (ipcl_iptun_classify_v4(&ipha->ipha_src,
1661 1771                      &ipha->ipha_dst, ipst));
1662 1772          }
1663 1773  
1664 1774          return (NULL);
1665 1775  }
1666 1776  
1667 1777  conn_t *
1668 1778  ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1669 1779      ip_recv_attr_t *ira, ip_stack_t *ipst)
1670 1780  {
1671 1781          ip6_t           *ip6h;
1672 1782          connf_t         *connfp, *bind_connfp;
1673 1783          uint16_t        lport;
1674 1784          uint16_t        fport;
1675 1785          tcpha_t         *tcpha;
1676 1786          uint32_t        ports;
1677 1787          conn_t          *connp;
1678 1788          uint16_t        *up;
1679 1789          zoneid_t        zoneid = ira->ira_zoneid;
1680 1790  
1681 1791          ip6h = (ip6_t *)mp->b_rptr;
1682 1792  
1683 1793          switch (protocol) {
1684 1794          case IPPROTO_TCP:
1685 1795                  tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
1686 1796                  up = &tcpha->tha_lport;
1687 1797                  ports = *(uint32_t *)up;
1688 1798  
1689 1799                  connfp =
1690 1800                      &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1691 1801                      ports, ipst)];
1692 1802                  mutex_enter(&connfp->connf_lock);
1693 1803                  for (connp = connfp->connf_head; connp != NULL;
1694 1804                      connp = connp->conn_next) {
1695 1805                          if (IPCL_CONN_MATCH_V6(connp, protocol,
1696 1806                              ip6h->ip6_src, ip6h->ip6_dst, ports) &&
1697 1807                              (connp->conn_zoneid == zoneid ||
1698 1808                              connp->conn_allzones ||
1699 1809                              ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1700 1810                              (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1701 1811                              (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1702 1812                                  break;
1703 1813                  }
1704 1814  
1705 1815                  if (connp != NULL) {
1706 1816                          /*
1707 1817                           * We have a fully-bound TCP connection.
1708 1818                           *
1709 1819                           * For labeled systems, there's no need to check the
1710 1820                           * label here.  It's known to be good as we checked
1711 1821                           * before allowing the connection to become bound.
1712 1822                           */
1713 1823                          CONN_INC_REF(connp);
1714 1824                          mutex_exit(&connfp->connf_lock);
1715 1825                          return (connp);
1716 1826                  }
1717 1827  
1718 1828                  mutex_exit(&connfp->connf_lock);
1719 1829  
1720 1830                  lport = up[1];
1721 1831                  bind_connfp =
1722 1832                      &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
1723 1833                  mutex_enter(&bind_connfp->connf_lock);
1724 1834                  for (connp = bind_connfp->connf_head; connp != NULL;
1725 1835                      connp = connp->conn_next) {
1726 1836                          if (IPCL_BIND_MATCH_V6(connp, protocol,
1727 1837                              ip6h->ip6_dst, lport) &&
1728 1838                              (connp->conn_zoneid == zoneid ||
1729 1839                              connp->conn_allzones ||
1730 1840                              ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1731 1841                              (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1732 1842                              (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1733 1843                                  break;
1734 1844                  }
1735 1845  
1736 1846                  if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1737 1847                      !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1738 1848                      ira, connp)) {
1739 1849                          DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
1740 1850                              char *, "connp(1) could not receive mp(2)",
1741 1851                              conn_t *, connp, mblk_t *, mp);
1742 1852                          connp = NULL;
1743 1853                  }
1744 1854  
1745 1855                  if (connp != NULL) {
1746 1856                          /* Have a listner at least */
1747 1857                          CONN_INC_REF(connp);
1748 1858                          mutex_exit(&bind_connfp->connf_lock);
1749 1859                          return (connp);
1750 1860                  }
1751 1861  
1752 1862                  mutex_exit(&bind_connfp->connf_lock);
1753 1863                  break;
1754 1864  
1755 1865          case IPPROTO_UDP:
1756 1866                  up = (uint16_t *)&mp->b_rptr[hdr_len];
1757 1867                  lport = up[1];
1758 1868                  fport = up[0];
1759 1869                  connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
1760 1870                  mutex_enter(&connfp->connf_lock);
1761 1871                  for (connp = connfp->connf_head; connp != NULL;
1762 1872                      connp = connp->conn_next) {
1763 1873                          if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
1764 1874                              fport, ip6h->ip6_src) &&
1765 1875                              (connp->conn_zoneid == zoneid ||
1766 1876                              connp->conn_allzones ||
1767 1877                              ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1768 1878                              (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1769 1879                              (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
1770 1880                                  break;
1771 1881                  }
1772 1882  
1773 1883                  if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1774 1884                      !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1775 1885                      ira, connp)) {
1776 1886                          DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
1777 1887                              char *, "connp(1) could not receive mp(2)",
1778 1888                              conn_t *, connp, mblk_t *, mp);
1779 1889                          connp = NULL;
1780 1890                  }
1781 1891  
1782 1892                  if (connp != NULL) {
1783 1893                          CONN_INC_REF(connp);
1784 1894                          mutex_exit(&connfp->connf_lock);
1785 1895                          return (connp);
1786 1896                  }
1787 1897  
1788 1898                  /*
1789 1899                   * We shouldn't come here for multicast/broadcast packets
1790 1900                   */
1791 1901                  mutex_exit(&connfp->connf_lock);
1792 1902                  break;
1793 1903          case IPPROTO_ENCAP:
1794 1904          case IPPROTO_IPV6:
1795 1905                  return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
1796 1906                      &ip6h->ip6_dst, ipst));
1797 1907          }
1798 1908  
1799 1909          return (NULL);
1800 1910  }
1801 1911  
1802 1912  /*
1803 1913   * wrapper around ipcl_classify_(v4,v6) routines.
1804 1914   */
1805 1915  conn_t *
1806 1916  ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
1807 1917  {
1808 1918          if (ira->ira_flags & IRAF_IS_IPV4) {
1809 1919                  return (ipcl_classify_v4(mp, ira->ira_protocol,
1810 1920                      ira->ira_ip_hdr_length, ira, ipst));
1811 1921          } else {
1812 1922                  return (ipcl_classify_v6(mp, ira->ira_protocol,
1813 1923                      ira->ira_ip_hdr_length, ira, ipst));
1814 1924          }
1815 1925  }
1816 1926  
1817 1927  /*
1818 1928   * Only used to classify SCTP RAW sockets
1819 1929   */
1820 1930  conn_t *
1821 1931  ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
1822 1932      ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
1823 1933  {
1824 1934          connf_t         *connfp;
1825 1935          conn_t          *connp;
1826 1936          in_port_t       lport;
1827 1937          int             ipversion;
1828 1938          const void      *dst;
1829 1939          zoneid_t        zoneid = ira->ira_zoneid;
1830 1940  
1831 1941          lport = ((uint16_t *)&ports)[1];
1832 1942          if (ira->ira_flags & IRAF_IS_IPV4) {
1833 1943                  dst = (const void *)&ipha->ipha_dst;
1834 1944                  ipversion = IPV4_VERSION;
1835 1945          } else {
1836 1946                  dst = (const void *)&ip6h->ip6_dst;
1837 1947                  ipversion = IPV6_VERSION;
1838 1948          }
1839 1949  
1840 1950          connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
1841 1951          mutex_enter(&connfp->connf_lock);
1842 1952          for (connp = connfp->connf_head; connp != NULL;
1843 1953              connp = connp->conn_next) {
1844 1954                  /* We don't allow v4 fallback for v6 raw socket. */
1845 1955                  if (ipversion != connp->conn_ipversion)
1846 1956                          continue;
1847 1957                  if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1848 1958                      !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1849 1959                          if (ipversion == IPV4_VERSION) {
1850 1960                                  if (!IPCL_CONN_MATCH(connp, protocol,
1851 1961                                      ipha->ipha_src, ipha->ipha_dst, ports))
1852 1962                                          continue;
1853 1963                          } else {
1854 1964                                  if (!IPCL_CONN_MATCH_V6(connp, protocol,
1855 1965                                      ip6h->ip6_src, ip6h->ip6_dst, ports))
1856 1966                                          continue;
1857 1967                          }
1858 1968                  } else {
1859 1969                          if (ipversion == IPV4_VERSION) {
1860 1970                                  if (!IPCL_BIND_MATCH(connp, protocol,
1861 1971                                      ipha->ipha_dst, lport))
1862 1972                                          continue;
1863 1973                          } else {
1864 1974                                  if (!IPCL_BIND_MATCH_V6(connp, protocol,
1865 1975                                      ip6h->ip6_dst, lport))
1866 1976                                          continue;
1867 1977                          }
1868 1978                  }
1869 1979  
1870 1980                  if (connp->conn_zoneid == zoneid ||
1871 1981                      connp->conn_allzones ||
1872 1982                      ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1873 1983                      (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1874 1984                      (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
1875 1985                          break;
1876 1986          }
1877 1987  
1878 1988          if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1879 1989              !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
1880 1990                  DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
1881 1991                      char *, "connp(1) could not receive mp(2)",
1882 1992                      conn_t *, connp, mblk_t *, mp);
1883 1993                  connp = NULL;
1884 1994          }
1885 1995  
1886 1996          if (connp != NULL)
1887 1997                  goto found;
1888 1998          mutex_exit(&connfp->connf_lock);
1889 1999  
1890 2000          /* Try to look for a wildcard SCTP RAW socket match. */
1891 2001          connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
1892 2002          mutex_enter(&connfp->connf_lock);
1893 2003          for (connp = connfp->connf_head; connp != NULL;
1894 2004              connp = connp->conn_next) {
1895 2005                  /* We don't allow v4 fallback for v6 raw socket. */
1896 2006                  if (ipversion != connp->conn_ipversion)
1897 2007                          continue;
1898 2008                  if (!IPCL_ZONE_MATCH(connp, zoneid))
1899 2009                          continue;
1900 2010  
1901 2011                  if (ipversion == IPV4_VERSION) {
1902 2012                          if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
1903 2013                                  break;
1904 2014                  } else {
1905 2015                          if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
1906 2016                                  break;
1907 2017                          }
1908 2018                  }
1909 2019          }
1910 2020  
1911 2021          if (connp != NULL)
1912 2022                  goto found;
1913 2023  
1914 2024          mutex_exit(&connfp->connf_lock);
1915 2025          return (NULL);
1916 2026  
1917 2027  found:
1918 2028          ASSERT(connp != NULL);
1919 2029          CONN_INC_REF(connp);
1920 2030          mutex_exit(&connfp->connf_lock);
1921 2031          return (connp);
1922 2032  }
1923 2033  
1924 2034  /* ARGSUSED */
1925 2035  static int
1926 2036  tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
1927 2037  {
1928 2038          itc_t   *itc = (itc_t *)buf;
1929 2039          conn_t  *connp = &itc->itc_conn;
1930 2040          tcp_t   *tcp = (tcp_t *)&itc[1];
1931 2041  
1932 2042          bzero(connp, sizeof (conn_t));
1933 2043          bzero(tcp, sizeof (tcp_t));
1934 2044  
1935 2045          mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1936 2046          cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1937 2047          cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
1938 2048          tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
1939 2049          if (tcp->tcp_timercache == NULL)
1940 2050                  return (ENOMEM);
1941 2051          connp->conn_tcp = tcp;
1942 2052          connp->conn_flags = IPCL_TCPCONN;
1943 2053          connp->conn_proto = IPPROTO_TCP;
1944 2054          tcp->tcp_connp = connp;
1945 2055          rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1946 2056  
1947 2057          connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1948 2058          if (connp->conn_ixa == NULL) {
1949 2059                  tcp_timermp_free(tcp);
1950 2060                  return (ENOMEM);
1951 2061          }
1952 2062          connp->conn_ixa->ixa_refcnt = 1;
1953 2063          connp->conn_ixa->ixa_protocol = connp->conn_proto;
1954 2064          connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
1955 2065          return (0);
1956 2066  }
1957 2067  
1958 2068  /* ARGSUSED */
1959 2069  static void
1960 2070  tcp_conn_destructor(void *buf, void *cdrarg)
1961 2071  {
1962 2072          itc_t   *itc = (itc_t *)buf;
1963 2073          conn_t  *connp = &itc->itc_conn;
1964 2074          tcp_t   *tcp = (tcp_t *)&itc[1];
1965 2075  
1966 2076          ASSERT(connp->conn_flags & IPCL_TCPCONN);
1967 2077          ASSERT(tcp->tcp_connp == connp);
1968 2078          ASSERT(connp->conn_tcp == tcp);
1969 2079          tcp_timermp_free(tcp);
1970 2080          mutex_destroy(&connp->conn_lock);
1971 2081          cv_destroy(&connp->conn_cv);
1972 2082          cv_destroy(&connp->conn_sq_cv);
1973 2083          rw_destroy(&connp->conn_ilg_lock);
1974 2084  
1975 2085          /* Can be NULL if constructor failed */
1976 2086          if (connp->conn_ixa != NULL) {
1977 2087                  ASSERT(connp->conn_ixa->ixa_refcnt == 1);
1978 2088                  ASSERT(connp->conn_ixa->ixa_ire == NULL);
1979 2089                  ASSERT(connp->conn_ixa->ixa_nce == NULL);
1980 2090                  ixa_refrele(connp->conn_ixa);
1981 2091          }
1982 2092  }
1983 2093  
1984 2094  /* ARGSUSED */
1985 2095  static int
1986 2096  ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
1987 2097  {
1988 2098          itc_t   *itc = (itc_t *)buf;
1989 2099          conn_t  *connp = &itc->itc_conn;
1990 2100  
1991 2101          bzero(connp, sizeof (conn_t));
1992 2102          mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1993 2103          cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1994 2104          connp->conn_flags = IPCL_IPCCONN;
1995 2105          rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1996 2106  
1997 2107          connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1998 2108          if (connp->conn_ixa == NULL)
1999 2109                  return (ENOMEM);
2000 2110          connp->conn_ixa->ixa_refcnt = 1;
2001 2111          connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2002 2112          return (0);
2003 2113  }
2004 2114  
2005 2115  /* ARGSUSED */
2006 2116  static void
2007 2117  ip_conn_destructor(void *buf, void *cdrarg)
2008 2118  {
2009 2119          itc_t   *itc = (itc_t *)buf;
2010 2120          conn_t  *connp = &itc->itc_conn;
2011 2121  
2012 2122          ASSERT(connp->conn_flags & IPCL_IPCCONN);
2013 2123          ASSERT(connp->conn_priv == NULL);
2014 2124          mutex_destroy(&connp->conn_lock);
2015 2125          cv_destroy(&connp->conn_cv);
2016 2126          rw_destroy(&connp->conn_ilg_lock);
2017 2127  
2018 2128          /* Can be NULL if constructor failed */
2019 2129          if (connp->conn_ixa != NULL) {
2020 2130                  ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2021 2131                  ASSERT(connp->conn_ixa->ixa_ire == NULL);
2022 2132                  ASSERT(connp->conn_ixa->ixa_nce == NULL);
2023 2133                  ixa_refrele(connp->conn_ixa);
2024 2134          }
2025 2135  }
2026 2136  
2027 2137  /* ARGSUSED */
2028 2138  static int
2029 2139  udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2030 2140  {
2031 2141          itc_t   *itc = (itc_t *)buf;
2032 2142          conn_t  *connp = &itc->itc_conn;
2033 2143          udp_t   *udp = (udp_t *)&itc[1];
2034 2144  
2035 2145          bzero(connp, sizeof (conn_t));
2036 2146          bzero(udp, sizeof (udp_t));
2037 2147  
2038 2148          mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2039 2149          cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2040 2150          connp->conn_udp = udp;
2041 2151          connp->conn_flags = IPCL_UDPCONN;
2042 2152          connp->conn_proto = IPPROTO_UDP;
2043 2153          udp->udp_connp = connp;
2044 2154          rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2045 2155          connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2046 2156          if (connp->conn_ixa == NULL)
2047 2157                  return (ENOMEM);
2048 2158          connp->conn_ixa->ixa_refcnt = 1;
2049 2159          connp->conn_ixa->ixa_protocol = connp->conn_proto;
2050 2160          connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2051 2161          return (0);
2052 2162  }
2053 2163  
2054 2164  /* ARGSUSED */
2055 2165  static void
2056 2166  udp_conn_destructor(void *buf, void *cdrarg)
2057 2167  {
2058 2168          itc_t   *itc = (itc_t *)buf;
2059 2169          conn_t  *connp = &itc->itc_conn;
2060 2170          udp_t   *udp = (udp_t *)&itc[1];
2061 2171  
2062 2172          ASSERT(connp->conn_flags & IPCL_UDPCONN);
2063 2173          ASSERT(udp->udp_connp == connp);
2064 2174          ASSERT(connp->conn_udp == udp);
2065 2175          mutex_destroy(&connp->conn_lock);
2066 2176          cv_destroy(&connp->conn_cv);
2067 2177          rw_destroy(&connp->conn_ilg_lock);
2068 2178  
2069 2179          /* Can be NULL if constructor failed */
2070 2180          if (connp->conn_ixa != NULL) {
2071 2181                  ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2072 2182                  ASSERT(connp->conn_ixa->ixa_ire == NULL);
2073 2183                  ASSERT(connp->conn_ixa->ixa_nce == NULL);
2074 2184                  ixa_refrele(connp->conn_ixa);
2075 2185          }
2076 2186  }
2077 2187  
2078 2188  /* ARGSUSED */
2079 2189  static int
2080 2190  rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2081 2191  {
2082 2192          itc_t   *itc = (itc_t *)buf;
2083 2193          conn_t  *connp = &itc->itc_conn;
2084 2194          icmp_t  *icmp = (icmp_t *)&itc[1];
2085 2195  
2086 2196          bzero(connp, sizeof (conn_t));
2087 2197          bzero(icmp, sizeof (icmp_t));
2088 2198  
2089 2199          mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2090 2200          cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2091 2201          connp->conn_icmp = icmp;
2092 2202          connp->conn_flags = IPCL_RAWIPCONN;
2093 2203          connp->conn_proto = IPPROTO_ICMP;
2094 2204          icmp->icmp_connp = connp;
2095 2205          rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2096 2206          connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2097 2207          if (connp->conn_ixa == NULL)
2098 2208                  return (ENOMEM);
2099 2209          connp->conn_ixa->ixa_refcnt = 1;
2100 2210          connp->conn_ixa->ixa_protocol = connp->conn_proto;
2101 2211          connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2102 2212          return (0);
2103 2213  }
2104 2214  
2105 2215  /* ARGSUSED */
2106 2216  static void
2107 2217  rawip_conn_destructor(void *buf, void *cdrarg)
2108 2218  {
2109 2219          itc_t   *itc = (itc_t *)buf;
2110 2220          conn_t  *connp = &itc->itc_conn;
2111 2221          icmp_t  *icmp = (icmp_t *)&itc[1];
2112 2222  
2113 2223          ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2114 2224          ASSERT(icmp->icmp_connp == connp);
2115 2225          ASSERT(connp->conn_icmp == icmp);
2116 2226          mutex_destroy(&connp->conn_lock);
2117 2227          cv_destroy(&connp->conn_cv);
2118 2228          rw_destroy(&connp->conn_ilg_lock);
2119 2229  
2120 2230          /* Can be NULL if constructor failed */
2121 2231          if (connp->conn_ixa != NULL) {
2122 2232                  ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2123 2233                  ASSERT(connp->conn_ixa->ixa_ire == NULL);
2124 2234                  ASSERT(connp->conn_ixa->ixa_nce == NULL);
2125 2235                  ixa_refrele(connp->conn_ixa);
2126 2236          }
2127 2237  }
2128 2238  
2129 2239  /* ARGSUSED */
2130 2240  static int
2131 2241  rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
2132 2242  {
2133 2243          itc_t   *itc = (itc_t *)buf;
2134 2244          conn_t  *connp = &itc->itc_conn;
2135 2245          rts_t   *rts = (rts_t *)&itc[1];
2136 2246  
2137 2247          bzero(connp, sizeof (conn_t));
2138 2248          bzero(rts, sizeof (rts_t));
2139 2249  
2140 2250          mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2141 2251          cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2142 2252          connp->conn_rts = rts;
2143 2253          connp->conn_flags = IPCL_RTSCONN;
2144 2254          rts->rts_connp = connp;
2145 2255          rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2146 2256          connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2147 2257          if (connp->conn_ixa == NULL)
2148 2258                  return (ENOMEM);
2149 2259          connp->conn_ixa->ixa_refcnt = 1;
2150 2260          connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2151 2261          return (0);
2152 2262  }
2153 2263  
2154 2264  /* ARGSUSED */
2155 2265  static void
2156 2266  rts_conn_destructor(void *buf, void *cdrarg)
2157 2267  {
2158 2268          itc_t   *itc = (itc_t *)buf;
2159 2269          conn_t  *connp = &itc->itc_conn;
2160 2270          rts_t   *rts = (rts_t *)&itc[1];
2161 2271  
2162 2272          ASSERT(connp->conn_flags & IPCL_RTSCONN);
2163 2273          ASSERT(rts->rts_connp == connp);
2164 2274          ASSERT(connp->conn_rts == rts);
2165 2275          mutex_destroy(&connp->conn_lock);
2166 2276          cv_destroy(&connp->conn_cv);
2167 2277          rw_destroy(&connp->conn_ilg_lock);
  
    | ↓ open down ↓ | 500 lines elided | ↑ open up ↑ | 
2168 2278  
2169 2279          /* Can be NULL if constructor failed */
2170 2280          if (connp->conn_ixa != NULL) {
2171 2281                  ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2172 2282                  ASSERT(connp->conn_ixa->ixa_ire == NULL);
2173 2283                  ASSERT(connp->conn_ixa->ixa_nce == NULL);
2174 2284                  ixa_refrele(connp->conn_ixa);
2175 2285          }
2176 2286  }
2177 2287  
     2288 +/* ARGSUSED */
     2289 +static int
     2290 +dccp_conn_constructor(void *buf, void *cdrarg, int kmflags)
     2291 +{
     2292 +        itc_t   *itc = (itc_t *)buf;
     2293 +        conn_t  *connp = &itc->itc_conn;
     2294 +        dccp_t  *dccp = (dccp_t *)&itc[1];
     2295 +
     2296 +        bzero(connp, sizeof (conn_t));
     2297 +        bzero(dccp, sizeof (dccp_t));
     2298 +
     2299 +        mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
     2300 +        cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
     2301 +        rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
     2302 +
     2303 +        connp->conn_dccp = dccp;
     2304 +        connp->conn_flags = IPCL_DCCPCONN;
     2305 +        connp->conn_proto = IPPROTO_DCCP;
     2306 +        dccp->dccp_connp = connp;
     2307 +        connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
     2308 +        if (connp->conn_ixa == NULL)
     2309 +                return (NULL);
     2310 +        connp->conn_ixa->ixa_refcnt = 1;
     2311 +        connp->conn_ixa->ixa_protocol = connp->conn_proto;
     2312 +        connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
     2313 +
     2314 +        return (0);
     2315 +}
     2316 +
     2317 +/* ARGSUSED */
     2318 +static void
     2319 +dccp_conn_destructor(void *buf, void *cdrarg)
     2320 +{
     2321 +        itc_t   *itc = (itc_t *)buf;
     2322 +        conn_t  *connp = &itc->itc_conn;
     2323 +        dccp_t  *dccp = (dccp_t *)&itc[1];
     2324 +
     2325 +        ASSERT(connp->conn_flags & IPCL_DCCPCONN);
     2326 +        ASSERT(dccp->dccp_connp == connp);
     2327 +        ASSERT(connp->conn_dccp == dccp);
     2328 +
     2329 +        mutex_destroy(&connp->conn_lock);
     2330 +        cv_destroy(&connp->conn_cv);
     2331 +        rw_destroy(&connp->conn_ilg_lock);
     2332 +
     2333 +        if (connp->conn_ixa != NULL) {
     2334 +                ASSERT(connp->conn_ixa->ixa_refcnt == 1);
     2335 +                ASSERT(connp->conn_ixa->ixa_ire == NULL);
     2336 +                ASSERT(connp->conn_ixa->ixa_nce == NULL);
     2337 +
     2338 +                ixa_refrele(connp->conn_ixa);
     2339 +        }
     2340 +}
     2341 +
2178 2342  /*
2179 2343   * Called as part of ipcl_conn_destroy to assert and clear any pointers
2180 2344   * in the conn_t.
2181 2345   *
2182 2346   * Below we list all the pointers in the conn_t as a documentation aid.
2183 2347   * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
2184 2348   * If you add any pointers to the conn_t please add an ASSERT here
2185 2349   * and #ifdef it out if it can't be actually asserted to be NULL.
2186 2350   * In any case, we bzero most of the conn_t at the end of the function.
2187 2351   */
2188 2352  void
2189 2353  ipcl_conn_cleanup(conn_t *connp)
2190 2354  {
2191 2355          ip_xmit_attr_t  *ixa;
2192 2356  
2193 2357          ASSERT(connp->conn_latch == NULL);
2194 2358          ASSERT(connp->conn_latch_in_policy == NULL);
2195 2359          ASSERT(connp->conn_latch_in_action == NULL);
2196 2360  #ifdef notdef
2197 2361          ASSERT(connp->conn_rq == NULL);
2198 2362          ASSERT(connp->conn_wq == NULL);
2199 2363  #endif
2200 2364          ASSERT(connp->conn_cred == NULL);
2201 2365          ASSERT(connp->conn_g_fanout == NULL);
2202 2366          ASSERT(connp->conn_g_next == NULL);
2203 2367          ASSERT(connp->conn_g_prev == NULL);
2204 2368          ASSERT(connp->conn_policy == NULL);
2205 2369          ASSERT(connp->conn_fanout == NULL);
2206 2370          ASSERT(connp->conn_next == NULL);
2207 2371          ASSERT(connp->conn_prev == NULL);
2208 2372          ASSERT(connp->conn_oper_pending_ill == NULL);
2209 2373          ASSERT(connp->conn_ilg == NULL);
2210 2374          ASSERT(connp->conn_drain_next == NULL);
2211 2375          ASSERT(connp->conn_drain_prev == NULL);
2212 2376  #ifdef notdef
2213 2377          /* conn_idl is not cleared when removed from idl list */
2214 2378          ASSERT(connp->conn_idl == NULL);
2215 2379  #endif
2216 2380          ASSERT(connp->conn_ipsec_opt_mp == NULL);
2217 2381  #ifdef notdef
2218 2382          /* conn_netstack is cleared by the caller; needed by ixa_cleanup */
2219 2383          ASSERT(connp->conn_netstack == NULL);
2220 2384  #endif
2221 2385  
2222 2386          ASSERT(connp->conn_helper_info == NULL);
2223 2387          ASSERT(connp->conn_ixa != NULL);
2224 2388          ixa = connp->conn_ixa;
2225 2389          ASSERT(ixa->ixa_refcnt == 1);
2226 2390          /* Need to preserve ixa_protocol */
2227 2391          ixa_cleanup(ixa);
2228 2392          ixa->ixa_flags = 0;
2229 2393  
2230 2394          /* Clear out the conn_t fields that are not preserved */
2231 2395          bzero(&connp->conn_start_clr,
2232 2396              sizeof (conn_t) -
2233 2397              ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
2234 2398  }
2235 2399  
2236 2400  /*
2237 2401   * All conns are inserted in a global multi-list for the benefit of
2238 2402   * walkers. The walk is guaranteed to walk all open conns at the time
2239 2403   * of the start of the walk exactly once. This property is needed to
2240 2404   * achieve some cleanups during unplumb of interfaces. This is achieved
2241 2405   * as follows.
2242 2406   *
2243 2407   * ipcl_conn_create and ipcl_conn_destroy are the only functions that
2244 2408   * call the insert and delete functions below at creation and deletion
2245 2409   * time respectively. The conn never moves or changes its position in this
2246 2410   * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
2247 2411   * won't increase due to walkers, once the conn deletion has started. Note
2248 2412   * that we can't remove the conn from the global list and then wait for
2249 2413   * the refcnt to drop to zero, since walkers would then see a truncated
2250 2414   * list. CONN_INCIPIENT ensures that walkers don't start looking at
2251 2415   * conns until ip_open is ready to make them globally visible.
2252 2416   * The global round robin multi-list locks are held only to get the
2253 2417   * next member/insertion/deletion and contention should be negligible
2254 2418   * if the multi-list is much greater than the number of cpus.
2255 2419   */
2256 2420  void
2257 2421  ipcl_globalhash_insert(conn_t *connp)
2258 2422  {
2259 2423          int     index;
2260 2424          struct connf_s  *connfp;
2261 2425          ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
2262 2426  
2263 2427          /*
2264 2428           * No need for atomic here. Approximate even distribution
2265 2429           * in the global lists is sufficient.
2266 2430           */
2267 2431          ipst->ips_conn_g_index++;
2268 2432          index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
2269 2433  
2270 2434          connp->conn_g_prev = NULL;
2271 2435          /*
2272 2436           * Mark as INCIPIENT, so that walkers will ignore this
2273 2437           * for now, till ip_open is ready to make it visible globally.
2274 2438           */
2275 2439          connp->conn_state_flags |= CONN_INCIPIENT;
2276 2440  
2277 2441          connfp = &ipst->ips_ipcl_globalhash_fanout[index];
2278 2442          /* Insert at the head of the list */
2279 2443          mutex_enter(&connfp->connf_lock);
2280 2444          connp->conn_g_next = connfp->connf_head;
2281 2445          if (connp->conn_g_next != NULL)
2282 2446                  connp->conn_g_next->conn_g_prev = connp;
2283 2447          connfp->connf_head = connp;
2284 2448  
2285 2449          /* The fanout bucket this conn points to */
2286 2450          connp->conn_g_fanout = connfp;
2287 2451  
2288 2452          mutex_exit(&connfp->connf_lock);
2289 2453  }
2290 2454  
2291 2455  void
2292 2456  ipcl_globalhash_remove(conn_t *connp)
2293 2457  {
2294 2458          struct connf_s  *connfp;
2295 2459  
2296 2460          /*
2297 2461           * We were never inserted in the global multi list.
2298 2462           * IPCL_NONE variety is never inserted in the global multilist
2299 2463           * since it is presumed to not need any cleanup and is transient.
2300 2464           */
2301 2465          if (connp->conn_g_fanout == NULL)
2302 2466                  return;
2303 2467  
2304 2468          connfp = connp->conn_g_fanout;
2305 2469          mutex_enter(&connfp->connf_lock);
2306 2470          if (connp->conn_g_prev != NULL)
2307 2471                  connp->conn_g_prev->conn_g_next = connp->conn_g_next;
2308 2472          else
2309 2473                  connfp->connf_head = connp->conn_g_next;
2310 2474          if (connp->conn_g_next != NULL)
2311 2475                  connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2312 2476          mutex_exit(&connfp->connf_lock);
2313 2477  
2314 2478          /* Better to stumble on a null pointer than to corrupt memory */
2315 2479          connp->conn_g_next = NULL;
2316 2480          connp->conn_g_prev = NULL;
2317 2481          connp->conn_g_fanout = NULL;
2318 2482  }
2319 2483  
2320 2484  /*
2321 2485   * Walk the list of all conn_t's in the system, calling the function provided
2322 2486   * With the specified argument for each.
2323 2487   * Applies to both IPv4 and IPv6.
2324 2488   *
2325 2489   * CONNs may hold pointers to ills (conn_dhcpinit_ill and
2326 2490   * conn_oper_pending_ill). To guard against stale pointers
2327 2491   * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
2328 2492   * unplumbed or removed. New conn_t's that are created while we are walking
2329 2493   * may be missed by this walk, because they are not necessarily inserted
2330 2494   * at the tail of the list. They are new conn_t's and thus don't have any
2331 2495   * stale pointers. The CONN_CLOSING flag ensures that no new reference
2332 2496   * is created to the struct that is going away.
2333 2497   */
2334 2498  void
2335 2499  ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
2336 2500  {
2337 2501          int     i;
2338 2502          conn_t  *connp;
2339 2503          conn_t  *prev_connp;
2340 2504  
2341 2505          for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2342 2506                  mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2343 2507                  prev_connp = NULL;
2344 2508                  connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
2345 2509                  while (connp != NULL) {
2346 2510                          mutex_enter(&connp->conn_lock);
2347 2511                          if (connp->conn_state_flags &
2348 2512                              (CONN_CONDEMNED | CONN_INCIPIENT)) {
2349 2513                                  mutex_exit(&connp->conn_lock);
2350 2514                                  connp = connp->conn_g_next;
2351 2515                                  continue;
2352 2516                          }
2353 2517                          CONN_INC_REF_LOCKED(connp);
2354 2518                          mutex_exit(&connp->conn_lock);
2355 2519                          mutex_exit(
2356 2520                              &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2357 2521                          (*func)(connp, arg);
2358 2522                          if (prev_connp != NULL)
2359 2523                                  CONN_DEC_REF(prev_connp);
2360 2524                          mutex_enter(
2361 2525                              &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2362 2526                          prev_connp = connp;
2363 2527                          connp = connp->conn_g_next;
2364 2528                  }
2365 2529                  mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
2366 2530                  if (prev_connp != NULL)
2367 2531                          CONN_DEC_REF(prev_connp);
2368 2532          }
2369 2533  }
2370 2534  
2371 2535  /*
2372 2536   * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
2373 2537   * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2374 2538   * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2375 2539   * (peer tcp in ESTABLISHED state).
2376 2540   */
2377 2541  conn_t *
2378 2542  ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
2379 2543      ip_stack_t *ipst)
2380 2544  {
2381 2545          uint32_t ports;
2382 2546          uint16_t *pports = (uint16_t *)&ports;
2383 2547          connf_t *connfp;
2384 2548          conn_t  *tconnp;
2385 2549          boolean_t zone_chk;
2386 2550  
2387 2551          /*
2388 2552           * If either the source of destination address is loopback, then
2389 2553           * both endpoints must be in the same Zone.  Otherwise, both of
2390 2554           * the addresses are system-wide unique (tcp is in ESTABLISHED
2391 2555           * state) and the endpoints may reside in different Zones.
2392 2556           */
2393 2557          zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
2394 2558              ipha->ipha_dst == htonl(INADDR_LOOPBACK));
2395 2559  
2396 2560          pports[0] = tcpha->tha_fport;
2397 2561          pports[1] = tcpha->tha_lport;
2398 2562  
2399 2563          connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2400 2564              ports, ipst)];
2401 2565  
2402 2566          mutex_enter(&connfp->connf_lock);
2403 2567          for (tconnp = connfp->connf_head; tconnp != NULL;
2404 2568              tconnp = tconnp->conn_next) {
2405 2569  
2406 2570                  if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2407 2571                      ipha->ipha_dst, ipha->ipha_src, ports) &&
2408 2572                      tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2409 2573                      (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2410 2574  
2411 2575                          ASSERT(tconnp != connp);
2412 2576                          CONN_INC_REF(tconnp);
2413 2577                          mutex_exit(&connfp->connf_lock);
2414 2578                          return (tconnp);
2415 2579                  }
2416 2580          }
2417 2581          mutex_exit(&connfp->connf_lock);
2418 2582          return (NULL);
2419 2583  }
2420 2584  
2421 2585  /*
2422 2586   * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
2423 2587   * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
2424 2588   * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2425 2589   * (peer tcp in ESTABLISHED state).
2426 2590   */
2427 2591  conn_t *
2428 2592  ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
2429 2593      ip_stack_t *ipst)
2430 2594  {
2431 2595          uint32_t ports;
2432 2596          uint16_t *pports = (uint16_t *)&ports;
2433 2597          connf_t *connfp;
2434 2598          conn_t  *tconnp;
2435 2599          boolean_t zone_chk;
2436 2600  
2437 2601          /*
2438 2602           * If either the source of destination address is loopback, then
2439 2603           * both endpoints must be in the same Zone.  Otherwise, both of
2440 2604           * the addresses are system-wide unique (tcp is in ESTABLISHED
2441 2605           * state) and the endpoints may reside in different Zones.  We
2442 2606           * don't do Zone check for link local address(es) because the
2443 2607           * current Zone implementation treats each link local address as
2444 2608           * being unique per system node, i.e. they belong to global Zone.
2445 2609           */
2446 2610          zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
2447 2611              IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
2448 2612  
2449 2613          pports[0] = tcpha->tha_fport;
2450 2614          pports[1] = tcpha->tha_lport;
2451 2615  
2452 2616          connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2453 2617              ports, ipst)];
2454 2618  
2455 2619          mutex_enter(&connfp->connf_lock);
2456 2620          for (tconnp = connfp->connf_head; tconnp != NULL;
2457 2621              tconnp = tconnp->conn_next) {
2458 2622  
2459 2623                  /* We skip conn_bound_if check here as this is loopback tcp */
2460 2624                  if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2461 2625                      ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2462 2626                      tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
2463 2627                      (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
2464 2628  
2465 2629                          ASSERT(tconnp != connp);
2466 2630                          CONN_INC_REF(tconnp);
2467 2631                          mutex_exit(&connfp->connf_lock);
2468 2632                          return (tconnp);
2469 2633                  }
2470 2634          }
2471 2635          mutex_exit(&connfp->connf_lock);
2472 2636          return (NULL);
2473 2637  }
2474 2638  
2475 2639  /*
2476 2640   * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2477 2641   * Returns with conn reference held. Caller must call CONN_DEC_REF.
2478 2642   * Only checks for connected entries i.e. no INADDR_ANY checks.
2479 2643   */
2480 2644  conn_t *
2481 2645  ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
2482 2646      ip_stack_t *ipst)
2483 2647  {
2484 2648          uint32_t ports;
2485 2649          uint16_t *pports;
2486 2650          connf_t *connfp;
2487 2651          conn_t  *tconnp;
2488 2652  
2489 2653          pports = (uint16_t *)&ports;
2490 2654          pports[0] = tcpha->tha_fport;
2491 2655          pports[1] = tcpha->tha_lport;
2492 2656  
2493 2657          connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2494 2658              ports, ipst)];
2495 2659  
2496 2660          mutex_enter(&connfp->connf_lock);
2497 2661          for (tconnp = connfp->connf_head; tconnp != NULL;
2498 2662              tconnp = tconnp->conn_next) {
2499 2663  
2500 2664                  if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
2501 2665                      ipha->ipha_dst, ipha->ipha_src, ports) &&
2502 2666                      tconnp->conn_tcp->tcp_state >= min_state) {
2503 2667  
2504 2668                          CONN_INC_REF(tconnp);
2505 2669                          mutex_exit(&connfp->connf_lock);
2506 2670                          return (tconnp);
2507 2671                  }
2508 2672          }
2509 2673          mutex_exit(&connfp->connf_lock);
2510 2674          return (NULL);
2511 2675  }
2512 2676  
2513 2677  /*
2514 2678   * Find an exact {src, dst, lport, fport} match for a bounced datagram.
2515 2679   * Returns with conn reference held. Caller must call CONN_DEC_REF.
2516 2680   * Only checks for connected entries i.e. no INADDR_ANY checks.
2517 2681   * Match on ifindex in addition to addresses.
2518 2682   */
2519 2683  conn_t *
2520 2684  ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2521 2685      uint_t ifindex, ip_stack_t *ipst)
2522 2686  {
2523 2687          tcp_t   *tcp;
2524 2688          uint32_t ports;
2525 2689          uint16_t *pports;
2526 2690          connf_t *connfp;
2527 2691          conn_t  *tconnp;
2528 2692  
2529 2693          pports = (uint16_t *)&ports;
2530 2694          pports[0] = tcpha->tha_fport;
2531 2695          pports[1] = tcpha->tha_lport;
2532 2696  
2533 2697          connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2534 2698              ports, ipst)];
2535 2699  
2536 2700          mutex_enter(&connfp->connf_lock);
2537 2701          for (tconnp = connfp->connf_head; tconnp != NULL;
2538 2702              tconnp = tconnp->conn_next) {
2539 2703  
2540 2704                  tcp = tconnp->conn_tcp;
2541 2705                  if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
2542 2706                      ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2543 2707                      tcp->tcp_state >= min_state &&
2544 2708                      (tconnp->conn_bound_if == 0 ||
2545 2709                      tconnp->conn_bound_if == ifindex)) {
2546 2710  
2547 2711                          CONN_INC_REF(tconnp);
2548 2712                          mutex_exit(&connfp->connf_lock);
2549 2713                          return (tconnp);
2550 2714                  }
2551 2715          }
2552 2716          mutex_exit(&connfp->connf_lock);
2553 2717          return (NULL);
2554 2718  }
2555 2719  
2556 2720  /*
2557 2721   * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
2558 2722   * a listener when changing state.
2559 2723   */
2560 2724  conn_t *
2561 2725  ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2562 2726      ip_stack_t *ipst)
2563 2727  {
2564 2728          connf_t         *bind_connfp;
2565 2729          conn_t          *connp;
2566 2730          tcp_t           *tcp;
2567 2731  
2568 2732          /*
2569 2733           * Avoid false matches for packets sent to an IP destination of
2570 2734           * all zeros.
2571 2735           */
2572 2736          if (laddr == 0)
2573 2737                  return (NULL);
2574 2738  
2575 2739          ASSERT(zoneid != ALL_ZONES);
2576 2740  
2577 2741          bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2578 2742          mutex_enter(&bind_connfp->connf_lock);
2579 2743          for (connp = bind_connfp->connf_head; connp != NULL;
2580 2744              connp = connp->conn_next) {
2581 2745                  tcp = connp->conn_tcp;
2582 2746                  if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
2583 2747                      IPCL_ZONE_MATCH(connp, zoneid) &&
2584 2748                      (tcp->tcp_listener == NULL)) {
2585 2749                          CONN_INC_REF(connp);
2586 2750                          mutex_exit(&bind_connfp->connf_lock);
2587 2751                          return (connp);
2588 2752                  }
2589 2753          }
2590 2754          mutex_exit(&bind_connfp->connf_lock);
2591 2755          return (NULL);
2592 2756  }
2593 2757  
2594 2758  /*
2595 2759   * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
2596 2760   * a listener when changing state.
2597 2761   */
2598 2762  conn_t *
2599 2763  ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2600 2764      zoneid_t zoneid, ip_stack_t *ipst)
2601 2765  {
2602 2766          connf_t         *bind_connfp;
2603 2767          conn_t          *connp = NULL;
2604 2768          tcp_t           *tcp;
2605 2769  
2606 2770          /*
2607 2771           * Avoid false matches for packets sent to an IP destination of
2608 2772           * all zeros.
2609 2773           */
2610 2774          if (IN6_IS_ADDR_UNSPECIFIED(laddr))
2611 2775                  return (NULL);
2612 2776  
2613 2777          ASSERT(zoneid != ALL_ZONES);
2614 2778  
2615 2779          bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
2616 2780          mutex_enter(&bind_connfp->connf_lock);
2617 2781          for (connp = bind_connfp->connf_head; connp != NULL;
2618 2782              connp = connp->conn_next) {
2619 2783                  tcp = connp->conn_tcp;
2620 2784                  if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
2621 2785                      IPCL_ZONE_MATCH(connp, zoneid) &&
2622 2786                      (connp->conn_bound_if == 0 ||
2623 2787                      connp->conn_bound_if == ifindex) &&
2624 2788                      tcp->tcp_listener == NULL) {
2625 2789                          CONN_INC_REF(connp);
2626 2790                          mutex_exit(&bind_connfp->connf_lock);
2627 2791                          return (connp);
2628 2792                  }
2629 2793          }
2630 2794          mutex_exit(&bind_connfp->connf_lock);
2631 2795          return (NULL);
2632 2796  }
2633 2797  
2634 2798  /*
2635 2799   * ipcl_get_next_conn
2636 2800   *      get the next entry in the conn global list
2637 2801   *      and put a reference on the next_conn.
2638 2802   *      decrement the reference on the current conn.
2639 2803   *
2640 2804   * This is an iterator based walker function that also provides for
2641 2805   * some selection by the caller. It walks through the conn_hash bucket
2642 2806   * searching for the next valid connp in the list, and selects connections
2643 2807   * that are neither closed nor condemned. It also REFHOLDS the conn
2644 2808   * thus ensuring that the conn exists when the caller uses the conn.
2645 2809   */
2646 2810  conn_t *
2647 2811  ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2648 2812  {
2649 2813          conn_t  *next_connp;
2650 2814  
2651 2815          if (connfp == NULL)
2652 2816                  return (NULL);
2653 2817  
2654 2818          mutex_enter(&connfp->connf_lock);
2655 2819  
2656 2820          next_connp = (connp == NULL) ?
2657 2821              connfp->connf_head : connp->conn_g_next;
2658 2822  
2659 2823          while (next_connp != NULL) {
2660 2824                  mutex_enter(&next_connp->conn_lock);
2661 2825                  if (!(next_connp->conn_flags & conn_flags) ||
2662 2826                      (next_connp->conn_state_flags &
2663 2827                      (CONN_CONDEMNED | CONN_INCIPIENT))) {
2664 2828                          /*
2665 2829                           * This conn has been condemned or
2666 2830                           * is closing, or the flags don't match
2667 2831                           */
2668 2832                          mutex_exit(&next_connp->conn_lock);
2669 2833                          next_connp = next_connp->conn_g_next;
2670 2834                          continue;
2671 2835                  }
2672 2836                  CONN_INC_REF_LOCKED(next_connp);
2673 2837                  mutex_exit(&next_connp->conn_lock);
2674 2838                  break;
2675 2839          }
2676 2840  
2677 2841          mutex_exit(&connfp->connf_lock);
2678 2842  
2679 2843          if (connp != NULL)
2680 2844                  CONN_DEC_REF(connp);
2681 2845  
2682 2846          return (next_connp);
2683 2847  }
2684 2848  
2685 2849  #ifdef CONN_DEBUG
2686 2850  /*
2687 2851   * Trace of the last NBUF refhold/refrele
2688 2852   */
2689 2853  int
2690 2854  conn_trace_ref(conn_t *connp)
2691 2855  {
2692 2856          int     last;
2693 2857          conn_trace_t    *ctb;
2694 2858  
2695 2859          ASSERT(MUTEX_HELD(&connp->conn_lock));
2696 2860          last = connp->conn_trace_last;
2697 2861          last++;
2698 2862          if (last == CONN_TRACE_MAX)
2699 2863                  last = 0;
2700 2864  
2701 2865          ctb = &connp->conn_trace_buf[last];
2702 2866          ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2703 2867          connp->conn_trace_last = last;
2704 2868          return (1);
2705 2869  }
2706 2870  
2707 2871  int
2708 2872  conn_untrace_ref(conn_t *connp)
2709 2873  {
2710 2874          int     last;
2711 2875          conn_trace_t    *ctb;
2712 2876  
2713 2877          ASSERT(MUTEX_HELD(&connp->conn_lock));
2714 2878          last = connp->conn_trace_last;
2715 2879          last++;
2716 2880          if (last == CONN_TRACE_MAX)
2717 2881                  last = 0;
2718 2882  
2719 2883          ctb = &connp->conn_trace_buf[last];
2720 2884          ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
2721 2885          connp->conn_trace_last = last;
2722 2886          return (1);
2723 2887  }
2724 2888  #endif
  
    | ↓ open down ↓ | 537 lines elided | ↑ open up ↑ | 
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX