1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 #ifndef _SYS_IB_EOIB_EIB_IMPL_H
  27 #define _SYS_IB_EOIB_EIB_IMPL_H
  28 
  29 #ifdef __cplusplus
  30 extern "C" {
  31 #endif
  32 
  33 #include <sys/ddi.h>
  34 #include <sys/mac.h>
  35 #include <sys/sunddi.h>
  36 #include <sys/varargs.h>
  37 #include <sys/vlan.h>
  38 #include <sys/ib/ibtl/ibti.h>
  39 #include <sys/ib/ibtl/ibvti.h>
  40 #include <sys/ib/ib_pkt_hdrs.h>
  41 
  42 #include <sys/ib/clients/eoib/fip.h>
  43 #include <sys/ib/clients/eoib/eib.h>
  44 
  45 /*
  46  * Driver specific constants
  47  */
  48 #define EIB_E_SUCCESS                   0
  49 #define EIB_E_FAILURE                   -1
  50 #define EIB_MAX_LINE                    128
  51 #define EIB_MAX_SGL                     59
  52 #define EIB_MAX_POST_MULTIPLE           4
  53 #define EIB_MAX_PAYLOAD_HDR_SZ          160
  54 #define EIB_TX_COPY_THRESH              4096    /* greater than mtu */
  55 #define EIB_MAX_VNICS                   64      /* do not change this */
  56 #define EIB_LOGIN_TIMEOUT_USEC          8000000
  57 #define EIB_RWR_CHUNK_SZ                8
  58 #define EIB_IPHDR_ALIGN_ROOM            32
  59 #define EIB_IP_HDR_ALIGN                2
  60 #define EIB_MAX_RX_PKTS_ONINTR          0x800
  61 #define EIB_MAX_LOGIN_ATTEMPTS          3
  62 #define EIB_MAX_VHUB_TBL_ATTEMPTS       3
  63 #define EIB_MAX_KA_ATTEMPTS             3
  64 #define EIB_MAX_ATTEMPTS                10
  65 #define EIB_DELAY_HALF_SECOND           500000
  66 #define EIB_GRH_SZ                      (sizeof (ib_grh_t))
  67 
  68 /*
  69  * Debug messages
  70  */
  71 #define EIB_MSGS_CRIT           0x01
  72 #define EIB_MSGS_ERR            0x02
  73 #define EIB_MSGS_WARN           0x04
  74 #define EIB_MSGS_DEBUG          0x08
  75 #define EIB_MSGS_ARGS           0x10
  76 #define EIB_MSGS_PKT            0x20
  77 #define EIB_MSGS_VERBOSE        0x40
  78 #define EIB_MSGS_DEFAULT        (EIB_MSGS_CRIT | EIB_MSGS_ERR | EIB_MSGS_WARN)
  79 
  80 #define EIB_LOGSZ_DEFAULT       0x20000
  81 
  82 #define EIB_DPRINTF_CRIT        eib_dprintf_crit
  83 #define EIB_DPRINTF_ERR         eib_dprintf_err
  84 #define EIB_DPRINTF_WARN        eib_dprintf_warn
  85 #ifdef EIB_DEBUG
  86 #define EIB_DPRINTF_DEBUG       eib_dprintf_debug
  87 #define EIB_DPRINTF_ARGS        eib_dprintf_args
  88 #define EIB_DPRINTF_PKT         eib_dprintf_pkt
  89 #define EIB_DPRINTF_VERBOSE     eib_dprintf_verbose
  90 #else
  91 #define EIB_DPRINTF_DEBUG       0 &&
  92 #define EIB_DPRINTF_ARGS        0 &&
  93 #define EIB_DPRINTF_PKT         0 &&
  94 #define EIB_DPRINTF_VERBOSE     0 &&
  95 #endif
  96 
  97 /*
  98  *  EoIB threads to provide various services
  99  */
 100 #define EIB_EVENTS_HDLR         "eib_events_handler"
 101 #define EIB_RWQES_REFILLER      "eib_rwqes_refiller"
 102 #define EIB_VNIC_CREATOR        "eib_vnic_creator"
 103 #define EIB_TXWQES_MONITOR      "eib_txwqe_monitor"
 104 #define EIB_LSOBUFS_MONITOR     "eib_lsobufs_monitor"
 105 
 106 /*
 107  * Macro for finding the least significant bit set in a 64-bit unsigned int
 108  */
 109 #define EIB_FIND_LSB_SET(val64) eib_setbit_mod67[((-(val64) & (val64)) % 67)]
 110 
 111 /*
 112  * LSO buffers
 113  *
 114  * Under normal circumstances we should never need to use any buffer
 115  * that's larger than MTU.  Unfortunately, IB HCA has limitations
 116  * on the length of SGL that are much smaller than those for regular
 117  * ethernet NICs.  Since the network layer doesn't care to limit the
 118  * number of mblk fragments in any send mp chain, we end up having to
 119  * use these larger buffers occasionally.
 120  */
 121 #define EIB_LSO_MAXLEN                  65536
 122 #define EIB_LSO_BUFSZ                   8192
 123 #define EIB_LSO_NUM_BUFS                1024
 124 #define EIB_LSO_FREE_BUFS_THRESH        (EIB_LSO_NUM_BUFS >> 5)
 125 
 126 typedef struct eib_lsobuf_s {
 127         struct eib_lsobuf_s *lb_next;
 128         uint8_t         *lb_buf;
 129         int             lb_isfree;
 130 } eib_lsobuf_t;
 131 
 132 typedef struct eib_lsobkt_s {
 133         kmutex_t        bk_lock;
 134         kcondvar_t      bk_cv;
 135         uint_t          bk_status;
 136         uint8_t         *bk_mem;
 137         eib_lsobuf_t    *bk_bufl;
 138         eib_lsobuf_t    *bk_free_head;
 139         ibt_mr_hdl_t    bk_mr_hdl;
 140         ibt_lkey_t      bk_lkey;
 141         uint_t          bk_nelem;
 142         uint_t          bk_nfree;
 143 } eib_lsobkt_t;
 144 
 145 #define EIB_LBUF_SHORT          0x1
 146 #define EIB_LBUF_MONITOR_DIE    0x2
 147 
 148 /*
 149  * The admin partition is only used for sending login and logout messages
 150  * and receiving login acknowledgements from the gateway.  While packets
 151  * going out on several vlans at the same time could result in multiple
 152  * vnic creations happening at the same time (and therefore multiple login
 153  * packets), we serialize the vnic creation via the vnic creator thread, so
 154  * we shouldn't need a lot of send wqes or receive wqes.  Note also that we
 155  * keep the cq size request to slightly less than a 2^n boundary to allow
 156  * the alloc cq routine to return the closest 2^n boundary as the real cq
 157  * size without wasting too much memory.
 158  */
 159 #define EIB_ADMIN_MAX_SWQE      30
 160 #define EIB_ADMIN_MAX_RWQE      30
 161 #define EIB_ADMIN_CQ_SIZE       (EIB_ADMIN_MAX_SWQE + EIB_ADMIN_MAX_RWQE + 1)
 162 
 163 /*
 164  * The control qp is per vhub partition, and is used to send and receive
 165  * vhub control messages such as vhub table request/response, vhub
 166  * update response and vnic alive messages.  While the vhub table response
 167  * and vhub update messages might take a few rwqes, the vhub table request
 168  * is made only once per vnic, and the vnic alive message is periodic
 169  * and uses a single swqe as well.  Per vnic, we should certainly not need
 170  * too many swqes/rwqes.
 171  */
 172 #define EIB_CTL_MAX_SWQE        30
 173 #define EIB_CTL_MAX_RWQE        30
 174 #define EIB_CTL_CQ_SIZE         (EIB_CTL_MAX_SWQE + EIB_CTL_MAX_RWQE + 1)
 175 
 176 /*
 177  * For the vNIC's data channel, there are three items that are of importance:
 178  * the constraints defined below, the hca_max_chan_sz attribute and the value of
 179  * (hca_max_cq_sz - 1).  The maximum limit on swqe/rwqe is set to the minimum
 180  * of these three values.
 181  *
 182  * While the total number of RWQEs posted to the data channel of any vNIC will
 183  * not exceed EIB_DATA_MAX_RWQE, we also do not want to acquire and post all of
 184  * it during the data channel initialization, since that is a lot of wqes for
 185  * one vnic to consume when we don't even know if the vnic will need it at all.
 186  * We post an initial set of EIB_DATA_RWQE_BKT rwqes, and slowly post more and
 187  * more sets as we see them being consumed, until we hit the hard limit of
 188  * EIB_DATA_MAX_RWQE.
 189  */
 190 #define EIB_DATA_MAX_SWQE       4000
 191 #define EIB_DATA_MAX_RWQE       4000
 192 #define EIB_DATA_RWQE_BKT       512
 193 
 194 /*
 195  * vNIC data channel CQ moderation parameters
 196  */
 197 #define EIB_TX_COMP_COUNT               10
 198 #define EIB_TX_COMP_USEC                300
 199 #define EIB_RX_COMP_COUNT               4
 200 #define EIB_RX_COMP_USEC                10
 201 
 202 /*
 203  * qe_info masks (blk:ndx:type:flags)
 204  */
 205 #define EIB_WQEBLK_SHIFT                24
 206 #define EIB_WQEBLK_MASK                 0xFF
 207 #define EIB_WQENDX_SHIFT                16
 208 #define EIB_WQENDX_MASK                 0xFF
 209 #define EIB_WQETYP_SHIFT                8
 210 #define EIB_WQETYP_MASK                 0xFF
 211 #define EIB_WQEFLGS_SHIFT               0
 212 #define EIB_WQEFLGS_MASK                0xFF
 213 
 214 /*
 215  * Macros to get the bit fields from qe_info
 216  */
 217 #define EIB_WQE_BLK(info)       (((info) >> EIB_WQEBLK_SHIFT) & EIB_WQEBLK_MASK)
 218 #define EIB_WQE_NDX(info)       (((info) >> EIB_WQENDX_SHIFT) & EIB_WQENDX_MASK)
 219 #define EIB_WQE_TYPE(info)      (((info) >> EIB_WQETYP_SHIFT) & EIB_WQETYP_MASK)
 220 #define EIB_WQE_FLAGS(info)     ((info) & EIB_WQEFLGS_MASK)
 221 
 222 /*
 223  * Values for type and flags in qe_info
 224  */
 225 #define EIB_WQE_TX                      0x1
 226 #define EIB_WQE_RX                      0x2
 227 
 228 /*
 229  * Flags for rx wqes/buffers
 230  */
 231 #define EIB_WQE_FLG_POSTED_TO_HCA       0x1
 232 #define EIB_WQE_FLG_WITH_NW             0x2
 233 
 234 /*
 235  * Flags for tx wqes/buffers
 236  */
 237 #define EIB_WQE_FLG_BUFTYPE_LSO         0x4
 238 #define EIB_WQE_FLG_BUFTYPE_MAPPED      0x8
 239 
 240 /*
 241  * Send/Recv workq entries
 242  */
 243 typedef struct eib_wqe_s {
 244         struct eib_wqe_pool_s   *qe_pool;
 245         uint8_t                 *qe_cpbuf;
 246         uint8_t                 *qe_payload_hdr;
 247         uint_t                  qe_bufsz;
 248         uint_t                  qe_info;
 249         int                     qe_vnic_inst;
 250         ibt_ud_dest_hdl_t       qe_dest;
 251         frtn_t                  qe_frp;
 252 
 253         mblk_t                  *qe_mp;
 254         ibt_mi_hdl_t            qe_iov_hdl;
 255         ibt_all_wr_t            qe_wr;
 256         ibt_wr_ds_t             qe_sgl;
 257         ibt_wr_ds_t             qe_big_sgl[EIB_MAX_SGL];
 258         struct eib_wqe_s        *qe_nxt_post;
 259         struct eib_chan_s       *qe_chan;
 260 } eib_wqe_t;
 261 
 262 /*
 263  * The wqe in-use/free status in EoIB is managed via a 2-level bitmap
 264  * logic.
 265  *
 266  * Each set of 64 wqes (a "wqe block") is managed by a single 64-bit
 267  * integer bitmap.  The free status of a set of 64 such wqe blocks (a
 268  * "wqe pool") is managed by one 64-bit integer bitmap (if any wqe in
 269  * the wqe block is free, the bit in the map is 1, otherwise it is 0).
 270  *
 271  * The maximum pool size is 4096 wqes, but this can easily be extended
 272  * to support more wqes using additional pools of wqes.
 273  *
 274  * Note that an entire pool of wqes is allocated via a single allocation,
 275  * the wqe addresses in a pool are all contiguous.  The tx/rx copy buffers
 276  * for a wqe pool are also allocated via a single allocation.
 277  */
 278 #define EIB_BLKS_PER_POOL       64
 279 #define EIB_WQES_PER_BLK        64      /* do not change this */
 280 #define EIB_WQES_PER_POOL       (EIB_BLKS_PER_POOL * EIB_WQES_PER_BLK)
 281 
 282 #define EIB_WQE_SZ              (sizeof (eib_wqe_t))
 283 #define EIB_WQEBLK_SZ           (EIB_WQES_PER_BLK * EIB_WQE_SZ)
 284 
 285 typedef struct eib_wqe_pool_s {
 286         struct eib_wqe_pool_s   *wp_next;
 287         struct eib_s            *wp_ss;
 288         ib_vaddr_t              wp_vaddr;
 289         ib_memlen_t             wp_memsz;
 290         ibt_mr_hdl_t            wp_mr;
 291         ibt_lkey_t              wp_lkey;
 292         uint_t                  wp_nfree_lwm;
 293         int                     wp_type;
 294 
 295         kmutex_t                wp_lock;
 296         kcondvar_t              wp_cv;
 297         uint_t                  wp_status;
 298         uint_t                  wp_nfree;
 299         uint64_t                wp_free_blks;
 300         uint64_t                wp_free_wqes[EIB_BLKS_PER_POOL];
 301         struct eib_wqe_s        *wp_wqe;
 302 } eib_wqe_pool_t;
 303 
 304 /*
 305  * Values for wp_type
 306  */
 307 #define EIB_WP_TYPE_TX          0x1
 308 #define EIB_WP_TYPE_RX          0x2
 309 
 310 /*
 311  * Values for wp_status (bit fields)
 312  */
 313 #define EIB_TXWQE_SHORT         0x1     /* only for tx wqe pool */
 314 #define EIB_TXWQE_MONITOR_DIE   0x2     /* only for tx wqe pool */
 315 
 316 #define EIB_RXWQE_SHORT         0x1     /* only for rx wqe pool */
 317 
 318 /*
 319  * The low-water-mark is an indication of when wqe grabs for low-priority
 320  * qps should start to get refused (swqe grabs for control messages such
 321  * as keepalives and rwqe grabs for posting back to control qps will still
 322  * be allowed).  The high-water-mark is an indication of when normal
 323  * behavior should resume.
 324  */
 325 #define EIB_NFREE_SWQES_LWM     (EIB_WQES_PER_POOL / 64)        /* 1/64 */
 326 #define EIB_NFREE_SWQES_HWM     (EIB_WQES_PER_POOL / 32)        /* 1/32 */
 327 #define EIB_NFREE_RWQES_LWM     (EIB_WQES_PER_POOL / 10)        /* 10% */
 328 #define EIB_NFREE_RWQES_HWM     (EIB_WQES_PER_POOL / 5)         /* 20% */
 329 
 330 /*
 331  * The "rwqes low" is used to determine when we should start using allocb()
 332  * to copy and send received mblks in the rx path.  It should be a little
 333  * above the rwqes low-water-mark, but less than the high-water-mark.
 334  */
 335 #define EIB_NFREE_RWQES_LOW     \
 336         ((EIB_NFREE_RWQES_LWM + EIB_NFREE_RWQES_HWM) / 2)
 337 
 338 #define EIB_WPRI_HI             1       /* for keepalive posts */
 339 #define EIB_WPRI_LO             2       /* for all other posts */
 340 
 341 /*
 342  * Multicast GID Layout: the multicast gid is specified in big-endian
 343  * representation, as a collection of different-sized fields in the
 344  * EoIB specification.  On Solaris, the multicast gid is represented
 345  * as a collection of two 8-byte fields (in ib_gid_t).
 346  */
 347 typedef struct eib_mgid_spec_s {
 348         uint8_t                 sp_mgid_prefix[FIP_MGID_PREFIX_LEN];
 349         uint8_t                 sp_type;
 350         uint8_t                 sp_dmac[ETHERADDRL];
 351         uint8_t                 sp_rss_hash;
 352         uint8_t                 sp_vhub_id[FIP_VHUBID_LEN];
 353 } eib_mgid_spec_t;
 354 
 355 /*
 356  * Values for sp_type in mgid as per EoIB specification
 357  */
 358 #define EIB_MGID_VHUB_DATA      0x0
 359 #define EIB_MGID_VHUB_UPDATE    0x2
 360 #define EIB_MGID_VHUB_TABLE     0x3
 361 
 362 typedef union eib_mgid_s {
 363         eib_mgid_spec_t         gd_spec;
 364         ib_gid_t                gd_sol;
 365 } eib_mgid_t;
 366 
 367 /*
 368  * Gateway properties handed over to us by the EoIB nexus
 369  */
 370 typedef struct eib_gw_props_s {
 371         kmutex_t                pp_gw_lock;
 372 
 373         ib_guid_t               pp_gw_system_guid;
 374         ib_guid_t               pp_gw_guid;
 375         ib_sn_prefix_t          pp_gw_sn_prefix;
 376 
 377         uint_t                  pp_gw_adv_period;
 378         uint_t                  pp_gw_ka_period;
 379         uint_t                  pp_vnic_ka_period;
 380 
 381         ib_qpn_t                pp_gw_ctrl_qpn;
 382         ib_lid_t                pp_gw_lid;
 383         uint16_t                pp_gw_portid;
 384 
 385         uint16_t                pp_gw_num_net_vnics;
 386         uint8_t                 pp_gw_flag_available;
 387         uint8_t                 pp_gw_is_host_adm_vnics;
 388         uint8_t                 pp_gw_sl;
 389         uint8_t                 pp_gw_n_rss_qpn;
 390 
 391         uint8_t                 *pp_gw_system_name;
 392         uint8_t                 *pp_gw_port_name;
 393         uint8_t                 *pp_gw_vendor_id;
 394 
 395         clock_t                 pp_gw_ka_ticks;         /* 2.5 x gw_ka_period */
 396         clock_t                 pp_vnic_ka_ticks;       /* vnic_ka_period */
 397 } eib_gw_props_t;
 398 
 399 /*
 400  * Port-specific properties
 401  */
 402 typedef struct eib_props_s {
 403         uint64_t                ep_ifspeed;
 404         ib_guid_t               ep_hca_guid;
 405         uint8_t                 ep_port_num;
 406         ib_gid_t                ep_sgid;
 407         ib_lid_t                ep_blid;
 408         uint16_t                ep_mtu;
 409         ibt_srate_t             ep_srate;
 410 } eib_props_t;
 411 
 412 /*
 413  * Capabilities derived from HCA attributes
 414  */
 415 typedef struct eib_caps_s {
 416         uint_t                  cp_lso_maxlen;
 417         uint32_t                cp_cksum_flags;
 418         int                     cp_resv_lkey_capab;
 419         ibt_lkey_t              cp_resv_lkey;
 420 
 421         uint_t                  cp_max_swqe;
 422         uint_t                  cp_max_rwqe;
 423         uint_t                  cp_max_sgl;
 424         uint_t                  cp_hiwm_sgl;
 425 } eib_caps_t;
 426 
 427 /*
 428  * List of multicast groups the vnic joined
 429  */
 430 typedef struct eib_mcg_s {
 431         struct eib_mcg_s        *mg_next;
 432         ib_gid_t                mg_rgid;
 433         ib_gid_t                mg_mgid;
 434         uint8_t                 mg_join_state;
 435         uint8_t                 mg_mac[ETHERADDRL];
 436         ibt_mcg_info_t          *mg_mcginfo;
 437 } eib_mcg_t;
 438 
 439 /*
 440  * Admin/control/data channel information
 441  */
 442 typedef struct eib_chan_s {
 443         ibt_channel_hdl_t       ch_chan;
 444         ib_qpn_t                ch_qpn;
 445 
 446         ibt_wc_t                *ch_wc;
 447         ibt_cq_hdl_t            ch_cq_hdl;
 448         uint_t                  ch_cq_sz;
 449 
 450         ibt_wc_t                *ch_rcv_wc;
 451         ibt_cq_hdl_t            ch_rcv_cq_hdl;
 452         uint_t                  ch_rcv_cq_sz;
 453 
 454         int                     ch_vnic_inst;
 455         uint_t                  ch_max_swqes;
 456         uint_t                  ch_max_rwqes;
 457         uint_t                  ch_lwm_rwqes;
 458         uint_t                  ch_rwqe_bktsz;
 459         uint_t                  ch_ip_hdr_align;
 460         boolean_t               ch_alloc_mp;
 461         boolean_t               ch_tear_down;
 462 
 463         kmutex_t                ch_pkey_lock;
 464         ib_pkey_t               ch_pkey;
 465         uint16_t                ch_pkey_ix;
 466 
 467         kmutex_t                ch_cep_lock;
 468         kcondvar_t              ch_cep_cv;
 469         ibt_cep_state_t         ch_cep_state;
 470 
 471         kmutex_t                ch_tx_lock;
 472         kcondvar_t              ch_tx_cv;
 473         uint_t                  ch_tx_posted;
 474         boolean_t               ch_tx_busy;
 475         struct eib_wqe_s        *ch_tx;
 476         struct eib_wqe_s        *ch_tx_tail;
 477 
 478         kmutex_t                ch_rx_lock;
 479         kcondvar_t              ch_rx_cv;
 480         uint_t                  ch_rx_posted;
 481         boolean_t               ch_rx_refilling;
 482 
 483         kmutex_t                ch_vhub_lock;
 484         struct eib_mcg_s        *ch_vhub_table;
 485         struct eib_mcg_s        *ch_vhub_update;
 486         struct eib_mcg_s        *ch_vhub_data;
 487 
 488         struct eib_chan_s       *ch_rxpost_next;
 489 } eib_chan_t;
 490 
 491 /*
 492  * States for vNIC state machine during login
 493  */
 494 #define EIB_LOGIN_INIT          0
 495 #define EIB_LOGIN_ACK_WAIT      1
 496 #define EIB_LOGIN_ACK_RCVD      2
 497 #define EIB_LOGIN_NACK_RCVD     3
 498 #define EIB_LOGIN_TBL_WAIT      4
 499 #define EIB_LOGIN_TBL_INPROG    5
 500 #define EIB_LOGIN_TBL_DONE      6
 501 #define EIB_LOGIN_TBL_FAILED    7
 502 #define EIB_LOGIN_DONE          8
 503 #define EIB_LOGIN_TIMED_OUT     9
 504 #define EIB_LOGOUT_DONE         10
 505 
 506 typedef struct eib_login_data_s {
 507         ib_guid_t               ld_gw_guid;
 508         ib_lid_t                ld_gw_lid;
 509         uint_t                  ld_syndrome;
 510         uint16_t                ld_gw_port_id;
 511         ib_qpn_t                ld_gw_data_qpn;
 512         ib_qpn_t                ld_gw_ctl_qpn;
 513         uint16_t                ld_vnic_id;     /* includes set msbit */
 514         uint16_t                ld_vhub_mtu;
 515         uint16_t                ld_vhub_pkey;
 516         uint16_t                ld_assigned_vlan;
 517         uint8_t                 ld_gw_sl;
 518         uint8_t                 ld_n_rss_mcgid;
 519         uint8_t                 ld_n_mac_mcgid;
 520         uint8_t                 ld_vnic_name[FIP_VNIC_NAME_LEN];
 521         uint8_t                 ld_assigned_mac[ETHERADDRL];
 522         uint8_t                 ld_gw_mgid_prefix[FIP_MGID_PREFIX_LEN];
 523         uint8_t                 ld_vlan_in_packets;
 524         uint32_t                ld_vhub_id;
 525 } eib_login_data_t;
 526 
 527 #define EIB_UNICAST_MAC(mac)            (((mac)[0] & 0x01) == 0)
 528 
 529 /*
 530  * Map to translate between DMAC and {qpn, lid, sl}
 531  */
 532 typedef struct eib_vhub_map_s {
 533         struct eib_vhub_map_s   *mp_next;
 534         uint32_t                mp_tusn;
 535         ib_qpn_t                mp_qpn;
 536         ib_lid_t                mp_lid;
 537         uint8_t                 mp_mac[ETHERADDRL];
 538         uint8_t                 mp_sl;
 539         uint8_t                 mp_v_rss_type;
 540 } eib_vhub_map_t;
 541 
 542 /*
 543  * Per-vNIC vHUB Table
 544  */
 545 #define EIB_TB_NBUCKETS         13
 546 typedef struct eib_vhub_table_s {
 547         kmutex_t                tb_lock;
 548         struct eib_vhub_map_s   *tb_gateway;
 549         struct eib_vhub_map_s   *tb_unicast_miss;
 550         struct eib_vhub_map_s   *tb_vhub_multicast;
 551         struct eib_vhub_map_s   *tb_vnic_entry[EIB_TB_NBUCKETS];
 552         struct eib_vhub_map_s   *tb_mcast_entry[EIB_TB_NBUCKETS];
 553 
 554         uint32_t                tb_tusn;
 555         uint8_t                 tb_eport_state;
 556 
 557         uint16_t                tb_entries_seen;
 558         uint16_t                tb_entries_in_table;
 559         uint32_t                tb_checksum;
 560 } eib_vhub_table_t;
 561 
 562 typedef struct eib_vhub_update_s {
 563         kmutex_t                up_lock;
 564         eib_vhub_map_t          *up_vnic_entry;
 565         uint32_t                up_tusn;
 566         uint8_t                 up_eport_state;
 567 } eib_vhub_update_t;
 568 
 569 typedef struct eib_ether_hdr_s {
 570         int                     eh_tagless;
 571         uint16_t                eh_ether_type;
 572         uint16_t                eh_vlan;
 573         uint8_t                 eh_dmac[ETHERADDRL];
 574         uint8_t                 eh_smac[ETHERADDRL];
 575 } eib_ether_hdr_t;
 576 
 577 /*
 578  * vNIC Information
 579  */
 580 typedef struct eib_vnic_s {
 581         struct eib_s            *vn_ss;
 582         eib_chan_t              *vn_ctl_chan;
 583         eib_chan_t              *vn_data_chan;
 584         int                     vn_instance;
 585         uint16_t                vn_vlan;
 586         uint16_t                vn_id;
 587         uint8_t                 vn_macaddr[ETHERADDRL];
 588         struct eib_login_data_s vn_login_data;
 589 
 590         kmutex_t                vn_lock;
 591         kcondvar_t              vn_cv;
 592         uint_t                  vn_state;
 593         struct eib_vhub_table_s *vn_vhub_table;
 594         struct eib_vhub_update_s *vn_vhub_update;
 595 
 596         ddi_softint_handle_t    vn_ctl_si_hdl;
 597         ddi_softint_handle_t    vn_data_tx_si_hdl;
 598         ddi_softint_handle_t    vn_data_rx_si_hdl;
 599 } eib_vnic_t;
 600 
 601 
 602 /*
 603  * Base NIC's mac state flags. The lock protects the starting/stopping
 604  * bits.  Access to the rest of the mac state is protected by these
 605  * two bits.
 606  */
 607 #define EIB_NIC_STARTING        0x01
 608 #define EIB_NIC_STOPPING        0x02
 609 #define EIB_NIC_STARTED         0x80
 610 #define EIB_NIC_RESTARTING      (EIB_NIC_STARTING | EIB_NIC_STOPPING)
 611 
 612 typedef struct eib_node_state_s {
 613         kmutex_t                ns_lock;
 614         kcondvar_t              ns_cv;
 615         uint_t                  ns_nic_state;
 616         link_state_t            ns_link_state;
 617 } eib_node_state_t;
 618 
 619 /*
 620  * MIB-II statistics to report to the mac layer
 621  */
 622 typedef struct eib_stats_s {
 623         uint64_t                st_obytes;      /* bytes sent out */
 624         uint64_t                st_opkts;       /* pkts sent out */
 625         uint64_t                st_brdcstxmit;  /* broadcast pkts transmitted */
 626         uint64_t                st_multixmit;   /* multicast pkts transmitted */
 627         uint64_t                st_oerrors;     /* transmit errors */
 628         uint64_t                st_noxmitbuf;   /* transmit pkts discarded */
 629 
 630         uint64_t                st_rbytes;      /* bytes received */
 631         uint64_t                st_ipkts;       /* pkts received */
 632         uint64_t                st_brdcstrcv;   /* broadcast pkts received */
 633         uint64_t                st_multircv;    /* multicast pkts received */
 634         uint64_t                st_ierrors;     /* receive errors */
 635         uint64_t                st_norcvbuf;    /* receive pkts discarded */
 636 } eib_stats_t;
 637 
 638 #define EIB_UPDATE_COUNTER(addr, val)   (atomic_add_64((addr), (val)))
 639 #define EIB_INCR_COUNTER(addr)          (atomic_inc_64((addr)))
 640 #define EIB_DECR_COUNTER(addr)          (atomic_dec_64((addr)))
 641 
 642 /*
 643  * Cache of address vectors with dlid as the key. Currently we use
 644  * eib state structure's  ei_lock to protect the individual address
 645  * vector's fields.  This is a lock granularity that's slightly
 646  * bigger than ideal, but it should do for now.
 647  */
 648 #define EIB_AV_NBUCKETS         17
 649 typedef struct eib_avect_s {
 650         struct eib_avect_s      *av_next;
 651         ibt_adds_vect_t         av_vect;
 652         uint_t                  av_ref;
 653 } eib_avect_t;
 654 
 655 /*
 656  * vNIC creation and deletion are serialized by a non-zero value
 657  * to the ei_vnic_state member (i.e. only one vnic may be created
 658  * or deleted at a time). The code makes sure to access/update
 659  * the ei_active_vnics member only after a successful setting of
 660  * ei_vnic_state.
 661  */
 662 #define EIB_VN_BEING_CREATED    0x01
 663 #define EIB_VN_BEING_DELETED    0x02
 664 #define EIB_VN_BEING_MODIFIED   (EIB_VN_BEING_CREATED | EIB_VN_BEING_DELETED)
 665 
 666 /*
 667  * All possible EoIB event work items that need to be handled
 668  */
 669 #define EIB_EV_NONE             0
 670 #define EIB_EV_PORT_DOWN        1
 671 #define EIB_EV_PORT_UP          2
 672 #define EIB_EV_PKEY_CHANGE      3
 673 #define EIB_EV_SGID_CHANGE      4
 674 #define EIB_EV_CLNT_REREG       5
 675 #define EIB_EV_GW_EPORT_DOWN    6
 676 #define EIB_EV_GW_DOWN          7
 677 #define EIB_EV_GW_UP            8
 678 #define EIB_EV_GW_INFO_UPDATE   9
 679 #define EIB_EV_MCG_DELETED      10
 680 #define EIB_EV_MCG_CREATED      11
 681 #define EIB_EV_SHUTDOWN         12
 682 
 683 typedef struct eib_event_s {
 684         struct eib_event_s      *ev_next;
 685         uint_t                  ev_code;
 686         void                    *ev_arg;
 687 } eib_event_t;
 688 
 689 /*
 690  * Work element for new vnic creation
 691  */
 692 typedef struct eib_vnic_req_s {
 693         struct eib_vnic_req_s   *vr_next;
 694         uint_t                  vr_req;
 695         uint8_t                 vr_mac[ETHERADDRL];
 696         uint16_t                vr_vlan;
 697 } eib_vnic_req_t;
 698 
 699 /*
 700  * Values for vr_req
 701  */
 702 #define EIB_CR_REQ_NEW_VNIC     1
 703 #define EIB_CR_REQ_FLUSH        2
 704 #define EIB_CR_REQ_DIE          3
 705 
 706 /*
 707  * Work element for vnics kept alive by the keepalive manager thread
 708  * and bitfield values for ei_ka_vnics_event.
 709  */
 710 typedef struct eib_ka_vnics_s {
 711         struct eib_ka_vnics_s   *ka_next;
 712         struct eib_vnic_s       *ka_vnic;
 713 } eib_ka_vnics_t;
 714 
 715 #define EIB_KA_VNICS_DIE        0x1
 716 #define EIB_KA_VNICS_TIMED_OUT  0x2
 717 
 718 /*
 719  * EoIB per-instance state
 720  */
 721 typedef struct eib_s {
 722         ibt_clnt_hdl_t          ei_ibt_hdl;
 723         ibt_hca_hdl_t           ei_hca_hdl;
 724         ibt_pd_hdl_t            ei_pd_hdl;
 725         mac_handle_t            ei_mac_hdl;
 726 
 727         ddi_softint_handle_t    ei_admin_si_hdl;
 728         ddi_callback_id_t       ei_login_ack_cb;
 729         ddi_callback_id_t       ei_gw_alive_cb;
 730         ddi_callback_id_t       ei_gw_info_cb;
 731 
 732         ibt_hca_attr_t          *ei_hca_attrs;
 733         dev_info_t              *ei_dip;
 734         uint_t                  ei_instance;
 735 
 736         struct eib_gw_props_s   *ei_gw_props;
 737         struct eib_props_s      *ei_props;
 738         struct eib_caps_s       *ei_caps;
 739         struct eib_stats_s      *ei_stats;
 740 
 741         struct eib_node_state_s *ei_node_state;
 742         struct eib_chan_s       *ei_admin_chan;
 743 
 744         struct eib_wqe_pool_s   *ei_tx;
 745         struct eib_wqe_pool_s   *ei_rx;
 746         struct eib_lsobkt_s     *ei_lso;
 747 
 748         kmutex_t                ei_vnic_lock;
 749         kcondvar_t              ei_vnic_cv;
 750         uint_t                  ei_vnic_state;
 751         uint64_t                ei_active_vnics;
 752         uint64_t                ei_zombie_vnics;
 753         uint64_t                ei_rejoin_vnics;
 754         struct eib_vnic_s       *ei_vnic[EIB_MAX_VNICS];
 755         struct eib_vnic_s       *ei_vnic_pending;
 756         int64_t                 ei_gw_last_heartbeat;
 757         boolean_t               ei_gw_unreachable;
 758         uint8_t                 ei_gw_eport_state;
 759 
 760         kmutex_t                ei_av_lock;
 761         struct eib_avect_s      *ei_av[EIB_AV_NBUCKETS];
 762 
 763         kmutex_t                ei_ev_lock;
 764         kcondvar_t              ei_ev_cv;
 765         struct eib_event_s      *ei_event;
 766 
 767         kmutex_t                ei_rxpost_lock;
 768         kcondvar_t              ei_rxpost_cv;
 769         uint_t                  ei_rxpost_die;
 770         struct eib_chan_s       *ei_rxpost;
 771 
 772         kmutex_t                ei_vnic_req_lock;
 773         kcondvar_t              ei_vnic_req_cv;
 774         struct eib_vnic_req_s   *ei_vnic_req;
 775         struct eib_vnic_req_s   *ei_failed_vnic_req;
 776         struct eib_vnic_req_s   *ei_pending_vnic_req;
 777 
 778         kmutex_t                ei_ka_vnics_lock;
 779         kcondvar_t              ei_ka_vnics_cv;
 780         uint_t                  ei_ka_vnics_event;
 781         struct eib_ka_vnics_s   *ei_ka_vnics;
 782 
 783         kt_did_t                ei_txwqe_monitor;
 784         kt_did_t                ei_lsobufs_monitor;
 785         kt_did_t                ei_rwqes_refiller;
 786         kt_did_t                ei_vnic_creator;
 787         kt_did_t                ei_events_handler;
 788         kt_did_t                ei_keepalives_manager;
 789 } eib_t;
 790 
 791 /*
 792  * Private read-only datalink properties
 793  */
 794 #define EIB_DLPROP_GW_EPORT_STATE       "_eib_eport_state"
 795 #define EIB_DLPROP_HCA_GUID             "_eib_hca_guid"
 796 #define EIB_DLPROP_PORT_GUID            "_eib_port_guid"
 797 
 798 /*
 799  * FUNCTION PROTOTYPES FOR CROSS-FILE LINKAGE
 800  */
 801 
 802 /*
 803  * FIP protocol related
 804  */
 805 extern int eib_fip_login(eib_t *, eib_vnic_t *, int *);
 806 extern int eib_fip_heartbeat(eib_t *, eib_vnic_t *, int *);
 807 extern int eib_fip_vhub_table(eib_t *, eib_vnic_t *, int *);
 808 extern int eib_fip_logout(eib_t *, eib_vnic_t *, int *);
 809 extern int eib_fip_parse_login_ack(eib_t *, uint8_t *, eib_login_data_t *);
 810 extern int eib_fip_parse_ctl_pkt(uint8_t *, eib_vnic_t *);
 811 
 812 /*
 813  * Service threads and other handlers
 814  */
 815 extern void eib_events_handler(eib_t *);
 816 extern void eib_svc_enqueue_event(eib_t *, eib_event_t *);
 817 extern void eib_refill_rwqes(eib_t *);
 818 extern void eib_vnic_creator(eib_t *);
 819 extern void eib_monitor_tx_wqes(eib_t *);
 820 extern void eib_monitor_lso_bufs(eib_t *);
 821 extern void eib_manage_keepalives(eib_t *);
 822 extern void eib_stop_events_handler(eib_t *);
 823 extern void eib_stop_refill_rwqes(eib_t *);
 824 extern void eib_stop_vnic_creator(eib_t *);
 825 extern void eib_stop_monitor_tx_wqes(eib_t *);
 826 extern int eib_stop_monitor_lso_bufs(eib_t *, boolean_t);
 827 extern void eib_stop_manage_keepalives(eib_t *);
 828 extern void eib_flush_vnic_reqs(eib_t *);
 829 extern void eib_gw_info_cb(dev_info_t *, ddi_eventcookie_t, void *, void *);
 830 extern void eib_gw_alive_cb(dev_info_t *, ddi_eventcookie_t, void *, void *);
 831 extern void eib_login_ack_cb(dev_info_t *, ddi_eventcookie_t, void *, void *);
 832 
 833 /*
 834  * Admin QP related
 835  */
 836 extern int eib_adm_setup_qp(eib_t *, int *);
 837 extern uint_t eib_adm_comp_handler(caddr_t, caddr_t);
 838 extern void eib_rb_adm_setup_qp(eib_t *);
 839 
 840 /*
 841  * Control QP related
 842  */
 843 extern int eib_ctl_create_qp(eib_t *, eib_vnic_t *, int *);
 844 extern uint_t eib_ctl_comp_handler(caddr_t, caddr_t);
 845 extern void eib_rb_ctl_create_qp(eib_t *, eib_vnic_t *);
 846 
 847 /*
 848  * Data QP related
 849  */
 850 extern int eib_data_create_qp(eib_t *, eib_vnic_t *, int *);
 851 extern uint_t eib_data_rx_comp_handler(caddr_t, caddr_t);
 852 extern uint_t eib_data_tx_comp_handler(caddr_t, caddr_t);
 853 extern void eib_data_rx_recycle(caddr_t);
 854 extern void eib_data_post_tx(eib_vnic_t *, eib_wqe_t *);
 855 extern void eib_data_parse_ether_hdr(mblk_t *, eib_ether_hdr_t *);
 856 extern int eib_data_lookup_vnic(eib_t *, uint8_t *, uint16_t, eib_vnic_t **,
 857     boolean_t *);
 858 extern int eib_data_prepare_frame(eib_vnic_t *, eib_wqe_t *, mblk_t *,
 859     eib_ether_hdr_t *);
 860 extern void eib_rb_data_create_qp(eib_t *, eib_vnic_t *);
 861 
 862 /*
 863  * Resource related
 864  */
 865 extern int eib_rsrc_setup_bufs(eib_t *, int *);
 866 extern int eib_rsrc_grab_swqes(eib_t *, eib_wqe_t **, uint_t, uint_t *, int);
 867 extern int eib_rsrc_grab_rwqes(eib_t *, eib_wqe_t **, uint_t, uint_t *, int);
 868 extern int eib_rsrc_grab_lsobufs(eib_t *, uint_t, ibt_wr_ds_t *, uint32_t *);
 869 extern eib_wqe_t *eib_rsrc_grab_swqe(eib_t *, int);
 870 extern eib_wqe_t *eib_rsrc_grab_rwqe(eib_t *, int);
 871 extern void eib_rsrc_return_swqe(eib_t *, eib_wqe_t *, eib_chan_t *);
 872 extern void eib_rsrc_return_rwqe(eib_t *, eib_wqe_t *, eib_chan_t *);
 873 extern void eib_rsrc_return_lsobufs(eib_t *, ibt_wr_ds_t *, uint32_t);
 874 extern void eib_rsrc_decr_posted_swqe(eib_t *, eib_chan_t *);
 875 extern void eib_rsrc_decr_posted_rwqe(eib_t *, eib_chan_t *);
 876 extern void eib_rsrc_txwqes_needed(eib_t *);
 877 extern void eib_rsrc_lsobufs_needed(eib_t *);
 878 extern boolean_t eib_rsrc_rxpool_low(eib_wqe_t *);
 879 extern void eib_rb_rsrc_setup_bufs(eib_t *, boolean_t);
 880 
 881 /*
 882  * IBT related
 883  */
 884 extern int eib_ibt_hca_init(eib_t *);
 885 extern void eib_ibt_link_mod(eib_t *);
 886 extern int eib_ibt_modify_chan_pkey(eib_t *, eib_chan_t *, ib_pkey_t);
 887 extern eib_avect_t *eib_ibt_hold_avect(eib_t *, ib_lid_t, uint8_t);
 888 extern void eib_ibt_release_avect(eib_t *, eib_avect_t *);
 889 extern void eib_ibt_free_avects(eib_t *);
 890 extern void eib_ibt_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
 891     ibt_async_event_t *);
 892 extern void eib_ibt_record_capab(eib_t *, ibt_hca_attr_t *, eib_caps_t *);
 893 extern void eib_rb_ibt_hca_init(eib_t *, uint_t);
 894 
 895 /*
 896  * Chan related
 897  */
 898 extern eib_chan_t *eib_chan_init(void);
 899 extern void eib_chan_fini(eib_chan_t *);
 900 extern int eib_chan_post_rx(eib_t *, eib_chan_t *, uint_t *);
 901 extern int eib_chan_post_recv(eib_t *, eib_chan_t *, eib_wqe_t *);
 902 
 903 /*
 904  * Mac layer related
 905  */
 906 extern void eib_mac_set_nic_state(eib_t *, uint_t);
 907 extern void eib_mac_clr_nic_state(eib_t *, uint_t);
 908 extern void eib_mac_upd_nic_state(eib_t *, uint_t, uint_t);
 909 extern uint_t eib_mac_get_nic_state(eib_t *);
 910 extern void eib_mac_link_state(eib_t *, link_state_t, boolean_t);
 911 extern void eib_mac_link_down(eib_t *, boolean_t);
 912 extern void eib_mac_link_up(eib_t *, boolean_t);
 913 extern int eib_mac_start(eib_t *);
 914 extern void eib_mac_stop(eib_t *);
 915 extern int eib_mac_multicast(eib_t *, boolean_t, uint8_t *);
 916 extern int eib_mac_promisc(eib_t *, boolean_t);
 917 extern int eib_mac_tx(eib_t *, mblk_t *);
 918 extern int eib_mac_hca_portstate(eib_t *, ib_lid_t *, int *);
 919 
 920 /*
 921  * VNIC related
 922  */
 923 extern int eib_vnic_create(eib_t *, uint8_t *, uint16_t, eib_vnic_t **, int *);
 924 extern void eib_vnic_delete(eib_t *, eib_vnic_t *);
 925 extern int eib_vnic_wait_for_login_ack(eib_t *, eib_vnic_t *, int *);
 926 extern void eib_vnic_login_ack(eib_t *, eib_login_data_t *);
 927 extern int eib_vnic_wait_for_table(eib_t *, eib_vnic_t *, int *);
 928 extern void eib_vnic_vhub_table_done(eib_vnic_t *, uint_t);
 929 extern int eib_vnic_join_data_mcg(eib_t *, eib_vnic_t *, uint8_t *,
 930     boolean_t, int *);
 931 extern int eib_vnic_setup_dest(eib_vnic_t *, eib_wqe_t *, uint8_t *, uint16_t);
 932 extern void eib_vnic_leave_data_mcg(eib_t *, eib_vnic_t *, uint8_t *);
 933 extern void eib_vnic_init_tables(eib_t *, eib_vnic_t *);
 934 extern void eib_vnic_fini_tables(eib_t *, eib_vnic_t *, boolean_t);
 935 extern eib_chan_t *eib_vnic_get_data_chan(eib_t *, int);
 936 extern void eib_vnic_need_new(eib_t *, uint8_t *, uint16_t);
 937 extern void eib_vnic_enqueue_req(eib_t *, eib_vnic_req_t *);
 938 extern void eib_vnic_resurrect_zombies(eib_t *, uint8_t *);
 939 extern void eib_vnic_restart(eib_t *, int, uint8_t *);
 940 extern void eib_vnic_rejoin_mcgs(eib_t *);
 941 extern void eib_rb_vnic_create(eib_t *, eib_vnic_t *, uint_t);
 942 
 943 /*
 944  * Logging and other stuff
 945  */
 946 extern void eib_debug_init(void);
 947 extern void eib_debug_fini(void);
 948 extern void eib_dprintf_crit(int, const char *fmt, ...);
 949 extern void eib_dprintf_err(int, const char *fmt, ...);
 950 extern void eib_dprintf_warn(int, const char *fmt, ...);
 951 #ifdef EIB_DEBUG
 952 extern void eib_dprintf_debug(int, const char *fmt, ...);
 953 extern void eib_dprintf_args(int, const char *fmt, ...);
 954 extern void eib_dprintf_pkt(int, uint8_t *, uint_t);
 955 extern void eib_dprintf_verbose(int, const char *fmt, ...);
 956 #endif
 957 extern int eib_get_props(eib_t *);
 958 extern void eib_update_props(eib_t *, eib_gw_info_t *);
 959 extern void eib_rb_get_props(eib_t *);
 960 
 961 /*
 962  * EoIB specific global variables
 963  */
 964 extern ib_gid_t eib_reserved_gid;
 965 extern uint8_t eib_zero_mac[];
 966 extern uint8_t eib_broadcast_mac[];
 967 extern int eib_setbit_mod67[];
 968 extern char *eib_pvt_props[];
 969 
 970 /*
 971  * HW/FW workarounds
 972  */
 973 extern int eib_wa_no_desc_list_len;
 974 extern int eib_wa_no_cksum_offload;
 975 extern int eib_wa_no_lso;
 976 extern int eib_wa_no_mcast_entries;
 977 extern int eib_wa_no_av_discover;
 978 extern int eib_wa_no_good_vp_flag;
 979 extern int eib_wa_no_good_vhub_cksum;
 980 
 981 /*
 982  * Miscellaneous externs
 983  */
 984 extern void freemsgchain(mblk_t *);
 985 extern pri_t minclsyspri;
 986 
 987 #ifdef __cplusplus
 988 }
 989 #endif
 990 
 991 #endif  /* _SYS_IB_EOIB_EIB_IMPL_H */