1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2019, Joyent, Inc.
  28  */
  29 
  30 #ifndef _SYS_IB_EOIB_EIB_IMPL_H
  31 #define _SYS_IB_EOIB_EIB_IMPL_H
  32 
  33 #ifdef __cplusplus
  34 extern "C" {
  35 #endif
  36 
  37 #include <sys/ddi.h>
  38 #include <sys/mac.h>
  39 #include <sys/sunddi.h>
  40 #include <sys/varargs.h>
  41 #include <sys/vlan.h>
  42 #include <sys/ib/ibtl/ibti.h>
  43 #include <sys/ib/ibtl/ibvti.h>
  44 #include <sys/ib/ib_pkt_hdrs.h>
  45 
  46 #include <sys/ib/clients/eoib/fip.h>
  47 #include <sys/ib/clients/eoib/eib.h>
  48 
  49 /*
  50  * Driver specific constants
  51  */
  52 #define EIB_E_SUCCESS                   0
  53 #define EIB_E_FAILURE                   -1
  54 #define EIB_MAX_LINE                    128
  55 #define EIB_MAX_SGL                     59
  56 #define EIB_MAX_POST_MULTIPLE           4
  57 #define EIB_MAX_PAYLOAD_HDR_SZ          160
  58 #define EIB_TX_COPY_THRESH              4096    /* greater than mtu */
  59 #define EIB_MAX_VNICS                   64      /* do not change this */
  60 #define EIB_LOGIN_TIMEOUT_USEC          8000000
  61 #define EIB_RWR_CHUNK_SZ                8
  62 #define EIB_IPHDR_ALIGN_ROOM            32
  63 #define EIB_IP_HDR_ALIGN                2
  64 #define EIB_MAX_RX_PKTS_ONINTR          0x800
  65 #define EIB_MAX_LOGIN_ATTEMPTS          3
  66 #define EIB_MAX_VHUB_TBL_ATTEMPTS       3
  67 #define EIB_MAX_KA_ATTEMPTS             3
  68 #define EIB_MAX_ATTEMPTS                10
  69 #define EIB_DELAY_HALF_SECOND           500000
  70 #define EIB_GRH_SZ                      (sizeof (ib_grh_t))
  71 
  72 /*
  73  * Debug messages
  74  */
  75 #define EIB_MSGS_CRIT           0x01
  76 #define EIB_MSGS_ERR            0x02
  77 #define EIB_MSGS_WARN           0x04
  78 #define EIB_MSGS_DEBUG          0x08
  79 #define EIB_MSGS_ARGS           0x10
  80 #define EIB_MSGS_PKT            0x20
  81 #define EIB_MSGS_VERBOSE        0x40
  82 #define EIB_MSGS_DEFAULT        (EIB_MSGS_CRIT | EIB_MSGS_ERR | EIB_MSGS_WARN)
  83 
  84 #define EIB_LOGSZ_DEFAULT       0x20000
  85 
  86 #define EIB_DPRINTF_CRIT        eib_dprintf_crit
  87 #define EIB_DPRINTF_ERR         eib_dprintf_err
  88 #define EIB_DPRINTF_WARN        eib_dprintf_warn
  89 #ifdef EIB_DEBUG
  90 #define EIB_DPRINTF_DEBUG       eib_dprintf_debug
  91 #define EIB_DPRINTF_ARGS        eib_dprintf_args
  92 #define EIB_DPRINTF_PKT         eib_dprintf_pkt
  93 #define EIB_DPRINTF_VERBOSE     eib_dprintf_verbose
  94 #else
  95 #define EIB_DPRINTF_DEBUG(...)  (void)(0)
  96 #define EIB_DPRINTF_ARGS(...)   (void)(0)
  97 #define EIB_DPRINTF_PKT(...)    (void)(0)
  98 #define EIB_DPRINTF_VERBOSE(...) (void)(0)
  99 #endif
 100 
 101 /*
 102  *  EoIB threads to provide various services
 103  */
 104 #define EIB_EVENTS_HDLR         "eib_events_handler"
 105 #define EIB_RWQES_REFILLER      "eib_rwqes_refiller"
 106 #define EIB_VNIC_CREATOR        "eib_vnic_creator"
 107 #define EIB_TXWQES_MONITOR      "eib_txwqe_monitor"
 108 #define EIB_LSOBUFS_MONITOR     "eib_lsobufs_monitor"
 109 
 110 /*
 111  * Macro for finding the least significant bit set in a 64-bit unsigned int
 112  */
 113 #define EIB_FIND_LSB_SET(val64) eib_setbit_mod67[((-(val64) & (val64)) % 67)]
 114 
 115 /*
 116  * LSO buffers
 117  *
 118  * Under normal circumstances we should never need to use any buffer
 119  * that's larger than MTU.  Unfortunately, IB HCA has limitations
 120  * on the length of SGL that are much smaller than those for regular
 121  * ethernet NICs.  Since the network layer doesn't care to limit the
 122  * number of mblk fragments in any send mp chain, we end up having to
 123  * use these larger buffers occasionally.
 124  */
 125 #define EIB_LSO_MAXLEN                  65536
 126 #define EIB_LSO_BUFSZ                   8192
 127 #define EIB_LSO_NUM_BUFS                1024
 128 #define EIB_LSO_FREE_BUFS_THRESH        (EIB_LSO_NUM_BUFS >> 5)
 129 
 130 typedef struct eib_lsobuf_s {
 131         struct eib_lsobuf_s *lb_next;
 132         uint8_t         *lb_buf;
 133         int             lb_isfree;
 134 } eib_lsobuf_t;
 135 
 136 typedef struct eib_lsobkt_s {
 137         kmutex_t        bk_lock;
 138         kcondvar_t      bk_cv;
 139         uint_t          bk_status;
 140         uint8_t         *bk_mem;
 141         eib_lsobuf_t    *bk_bufl;
 142         eib_lsobuf_t    *bk_free_head;
 143         ibt_mr_hdl_t    bk_mr_hdl;
 144         ibt_lkey_t      bk_lkey;
 145         uint_t          bk_nelem;
 146         uint_t          bk_nfree;
 147 } eib_lsobkt_t;
 148 
 149 #define EIB_LBUF_SHORT          0x1
 150 #define EIB_LBUF_MONITOR_DIE    0x2
 151 
 152 /*
 153  * The admin partition is only used for sending login and logout messages
 154  * and receiving login acknowledgements from the gateway.  While packets
 155  * going out on several vlans at the same time could result in multiple
 156  * vnic creations happening at the same time (and therefore multiple login
 157  * packets), we serialize the vnic creation via the vnic creator thread, so
 158  * we shouldn't need a lot of send wqes or receive wqes.  Note also that we
 159  * keep the cq size request to slightly less than a 2^n boundary to allow
 160  * the alloc cq routine to return the closest 2^n boundary as the real cq
 161  * size without wasting too much memory.
 162  */
 163 #define EIB_ADMIN_MAX_SWQE      30
 164 #define EIB_ADMIN_MAX_RWQE      30
 165 #define EIB_ADMIN_CQ_SIZE       (EIB_ADMIN_MAX_SWQE + EIB_ADMIN_MAX_RWQE + 1)
 166 
 167 /*
 168  * The control qp is per vhub partition, and is used to send and receive
 169  * vhub control messages such as vhub table request/response, vhub
 170  * update response and vnic alive messages.  While the vhub table response
 171  * and vhub update messages might take a few rwqes, the vhub table request
 172  * is made only once per vnic, and the vnic alive message is periodic
 173  * and uses a single swqe as well.  Per vnic, we should certainly not need
 174  * too many swqes/rwqes.
 175  */
 176 #define EIB_CTL_MAX_SWQE        30
 177 #define EIB_CTL_MAX_RWQE        30
 178 #define EIB_CTL_CQ_SIZE         (EIB_CTL_MAX_SWQE + EIB_CTL_MAX_RWQE + 1)
 179 
 180 /*
 181  * For the vNIC's data channel, there are three items that are of importance:
 182  * the constraints defined below, the hca_max_chan_sz attribute and the value of
 183  * (hca_max_cq_sz - 1).  The maximum limit on swqe/rwqe is set to the minimum
 184  * of these three values.
 185  *
 186  * While the total number of RWQEs posted to the data channel of any vNIC will
 187  * not exceed EIB_DATA_MAX_RWQE, we also do not want to acquire and post all of
 188  * it during the data channel initialization, since that is a lot of wqes for
 189  * one vnic to consume when we don't even know if the vnic will need it at all.
 190  * We post an initial set of EIB_DATA_RWQE_BKT rwqes, and slowly post more and
 191  * more sets as we see them being consumed, until we hit the hard limit of
 192  * EIB_DATA_MAX_RWQE.
 193  */
 194 #define EIB_DATA_MAX_SWQE       4000
 195 #define EIB_DATA_MAX_RWQE       4000
 196 #define EIB_DATA_RWQE_BKT       512
 197 
 198 /*
 199  * vNIC data channel CQ moderation parameters
 200  */
 201 #define EIB_TX_COMP_COUNT               10
 202 #define EIB_TX_COMP_USEC                300
 203 #define EIB_RX_COMP_COUNT               4
 204 #define EIB_RX_COMP_USEC                10
 205 
 206 /*
 207  * qe_info masks (blk:ndx:type:flags)
 208  */
 209 #define EIB_WQEBLK_SHIFT                24
 210 #define EIB_WQEBLK_MASK                 0xFF
 211 #define EIB_WQENDX_SHIFT                16
 212 #define EIB_WQENDX_MASK                 0xFF
 213 #define EIB_WQETYP_SHIFT                8
 214 #define EIB_WQETYP_MASK                 0xFF
 215 #define EIB_WQEFLGS_SHIFT               0
 216 #define EIB_WQEFLGS_MASK                0xFF
 217 
 218 /*
 219  * Macros to get the bit fields from qe_info
 220  */
 221 #define EIB_WQE_BLK(info)       (((info) >> EIB_WQEBLK_SHIFT) & EIB_WQEBLK_MASK)
 222 #define EIB_WQE_NDX(info)       (((info) >> EIB_WQENDX_SHIFT) & EIB_WQENDX_MASK)
 223 #define EIB_WQE_TYPE(info)      (((info) >> EIB_WQETYP_SHIFT) & EIB_WQETYP_MASK)
 224 #define EIB_WQE_FLAGS(info)     ((info) & EIB_WQEFLGS_MASK)
 225 
 226 /*
 227  * Values for type and flags in qe_info
 228  */
 229 #define EIB_WQE_TX                      0x1
 230 #define EIB_WQE_RX                      0x2
 231 
 232 /*
 233  * Flags for rx wqes/buffers
 234  */
 235 #define EIB_WQE_FLG_POSTED_TO_HCA       0x1
 236 #define EIB_WQE_FLG_WITH_NW             0x2
 237 
 238 /*
 239  * Flags for tx wqes/buffers
 240  */
 241 #define EIB_WQE_FLG_BUFTYPE_LSO         0x4
 242 #define EIB_WQE_FLG_BUFTYPE_MAPPED      0x8
 243 
 244 /*
 245  * Send/Recv workq entries
 246  */
 247 typedef struct eib_wqe_s {
 248         struct eib_wqe_pool_s   *qe_pool;
 249         uint8_t                 *qe_cpbuf;
 250         uint8_t                 *qe_payload_hdr;
 251         uint_t                  qe_bufsz;
 252         uint_t                  qe_info;
 253         int                     qe_vnic_inst;
 254         ibt_ud_dest_hdl_t       qe_dest;
 255         frtn_t                  qe_frp;
 256 
 257         mblk_t                  *qe_mp;
 258         ibt_mi_hdl_t            qe_iov_hdl;
 259         ibt_all_wr_t            qe_wr;
 260         ibt_wr_ds_t             qe_sgl;
 261         ibt_wr_ds_t             qe_big_sgl[EIB_MAX_SGL];
 262         struct eib_wqe_s        *qe_nxt_post;
 263         struct eib_chan_s       *qe_chan;
 264 } eib_wqe_t;
 265 
 266 /*
 267  * The wqe in-use/free status in EoIB is managed via a 2-level bitmap
 268  * logic.
 269  *
 270  * Each set of 64 wqes (a "wqe block") is managed by a single 64-bit
 271  * integer bitmap.  The free status of a set of 64 such wqe blocks (a
 272  * "wqe pool") is managed by one 64-bit integer bitmap (if any wqe in
 273  * the wqe block is free, the bit in the map is 1, otherwise it is 0).
 274  *
 275  * The maximum pool size is 4096 wqes, but this can easily be extended
 276  * to support more wqes using additional pools of wqes.
 277  *
 278  * Note that an entire pool of wqes is allocated via a single allocation,
 279  * the wqe addresses in a pool are all contiguous.  The tx/rx copy buffers
 280  * for a wqe pool are also allocated via a single allocation.
 281  */
 282 #define EIB_BLKS_PER_POOL       64
 283 #define EIB_WQES_PER_BLK        64      /* do not change this */
 284 #define EIB_WQES_PER_POOL       (EIB_BLKS_PER_POOL * EIB_WQES_PER_BLK)
 285 
 286 #define EIB_WQE_SZ              (sizeof (eib_wqe_t))
 287 #define EIB_WQEBLK_SZ           (EIB_WQES_PER_BLK * EIB_WQE_SZ)
 288 
 289 typedef struct eib_wqe_pool_s {
 290         struct eib_wqe_pool_s   *wp_next;
 291         struct eib_s            *wp_ss;
 292         ib_vaddr_t              wp_vaddr;
 293         ib_memlen_t             wp_memsz;
 294         ibt_mr_hdl_t            wp_mr;
 295         ibt_lkey_t              wp_lkey;
 296         uint_t                  wp_nfree_lwm;
 297         int                     wp_type;
 298 
 299         kmutex_t                wp_lock;
 300         kcondvar_t              wp_cv;
 301         uint_t                  wp_status;
 302         uint_t                  wp_nfree;
 303         uint64_t                wp_free_blks;
 304         uint64_t                wp_free_wqes[EIB_BLKS_PER_POOL];
 305         struct eib_wqe_s        *wp_wqe;
 306 } eib_wqe_pool_t;
 307 
 308 /*
 309  * Values for wp_type
 310  */
 311 #define EIB_WP_TYPE_TX          0x1
 312 #define EIB_WP_TYPE_RX          0x2
 313 
 314 /*
 315  * Values for wp_status (bit fields)
 316  */
 317 #define EIB_TXWQE_SHORT         0x1     /* only for tx wqe pool */
 318 #define EIB_TXWQE_MONITOR_DIE   0x2     /* only for tx wqe pool */
 319 
 320 #define EIB_RXWQE_SHORT         0x1     /* only for rx wqe pool */
 321 
 322 /*
 323  * The low-water-mark is an indication of when wqe grabs for low-priority
 324  * qps should start to get refused (swqe grabs for control messages such
 325  * as keepalives and rwqe grabs for posting back to control qps will still
 326  * be allowed).  The high-water-mark is an indication of when normal
 327  * behavior should resume.
 328  */
 329 #define EIB_NFREE_SWQES_LWM     (EIB_WQES_PER_POOL / 64)        /* 1/64 */
 330 #define EIB_NFREE_SWQES_HWM     (EIB_WQES_PER_POOL / 32)        /* 1/32 */
 331 #define EIB_NFREE_RWQES_LWM     (EIB_WQES_PER_POOL / 10)        /* 10% */
 332 #define EIB_NFREE_RWQES_HWM     (EIB_WQES_PER_POOL / 5)         /* 20% */
 333 
 334 /*
 335  * The "rwqes low" is used to determine when we should start using allocb()
 336  * to copy and send received mblks in the rx path.  It should be a little
 337  * above the rwqes low-water-mark, but less than the high-water-mark.
 338  */
 339 #define EIB_NFREE_RWQES_LOW     \
 340         ((EIB_NFREE_RWQES_LWM + EIB_NFREE_RWQES_HWM) / 2)
 341 
 342 #define EIB_WPRI_HI             1       /* for keepalive posts */
 343 #define EIB_WPRI_LO             2       /* for all other posts */
 344 
 345 /*
 346  * Multicast GID Layout: the multicast gid is specified in big-endian
 347  * representation, as a collection of different-sized fields in the
 348  * EoIB specification.  On Solaris, the multicast gid is represented
 349  * as a collection of two 8-byte fields (in ib_gid_t).
 350  */
 351 typedef struct eib_mgid_spec_s {
 352         uint8_t                 sp_mgid_prefix[FIP_MGID_PREFIX_LEN];
 353         uint8_t                 sp_type;
 354         uint8_t                 sp_dmac[ETHERADDRL];
 355         uint8_t                 sp_rss_hash;
 356         uint8_t                 sp_vhub_id[FIP_VHUBID_LEN];
 357 } eib_mgid_spec_t;
 358 
 359 /*
 360  * Values for sp_type in mgid as per EoIB specification
 361  */
 362 #define EIB_MGID_VHUB_DATA      0x0
 363 #define EIB_MGID_VHUB_UPDATE    0x2
 364 #define EIB_MGID_VHUB_TABLE     0x3
 365 
 366 typedef union eib_mgid_s {
 367         eib_mgid_spec_t         gd_spec;
 368         ib_gid_t                gd_sol;
 369 } eib_mgid_t;
 370 
 371 /*
 372  * Gateway properties handed over to us by the EoIB nexus
 373  */
 374 typedef struct eib_gw_props_s {
 375         kmutex_t                pp_gw_lock;
 376 
 377         ib_guid_t               pp_gw_system_guid;
 378         ib_guid_t               pp_gw_guid;
 379         ib_sn_prefix_t          pp_gw_sn_prefix;
 380 
 381         uint_t                  pp_gw_adv_period;
 382         uint_t                  pp_gw_ka_period;
 383         uint_t                  pp_vnic_ka_period;
 384 
 385         ib_qpn_t                pp_gw_ctrl_qpn;
 386         ib_lid_t                pp_gw_lid;
 387         uint16_t                pp_gw_portid;
 388 
 389         uint16_t                pp_gw_num_net_vnics;
 390         uint8_t                 pp_gw_flag_available;
 391         uint8_t                 pp_gw_is_host_adm_vnics;
 392         uint8_t                 pp_gw_sl;
 393         uint8_t                 pp_gw_n_rss_qpn;
 394 
 395         uint8_t                 *pp_gw_system_name;
 396         uint8_t                 *pp_gw_port_name;
 397         uint8_t                 *pp_gw_vendor_id;
 398 
 399         clock_t                 pp_gw_ka_ticks;         /* 2.5 x gw_ka_period */
 400         clock_t                 pp_vnic_ka_ticks;       /* vnic_ka_period */
 401 } eib_gw_props_t;
 402 
 403 /*
 404  * Port-specific properties
 405  */
 406 typedef struct eib_props_s {
 407         uint64_t                ep_ifspeed;
 408         ib_guid_t               ep_hca_guid;
 409         uint8_t                 ep_port_num;
 410         ib_gid_t                ep_sgid;
 411         ib_lid_t                ep_blid;
 412         uint16_t                ep_mtu;
 413         ibt_srate_t             ep_srate;
 414 } eib_props_t;
 415 
 416 /*
 417  * Capabilities derived from HCA attributes
 418  */
 419 typedef struct eib_caps_s {
 420         uint_t                  cp_lso_maxlen;
 421         uint32_t                cp_cksum_flags;
 422         int                     cp_resv_lkey_capab;
 423         ibt_lkey_t              cp_resv_lkey;
 424 
 425         uint_t                  cp_max_swqe;
 426         uint_t                  cp_max_rwqe;
 427         uint_t                  cp_max_sgl;
 428         uint_t                  cp_hiwm_sgl;
 429 } eib_caps_t;
 430 
 431 /*
 432  * List of multicast groups the vnic joined
 433  */
 434 typedef struct eib_mcg_s {
 435         struct eib_mcg_s        *mg_next;
 436         ib_gid_t                mg_rgid;
 437         ib_gid_t                mg_mgid;
 438         uint8_t                 mg_join_state;
 439         uint8_t                 mg_mac[ETHERADDRL];
 440         ibt_mcg_info_t          *mg_mcginfo;
 441 } eib_mcg_t;
 442 
 443 /*
 444  * Admin/control/data channel information
 445  */
 446 typedef struct eib_chan_s {
 447         ibt_channel_hdl_t       ch_chan;
 448         ib_qpn_t                ch_qpn;
 449 
 450         ibt_wc_t                *ch_wc;
 451         ibt_cq_hdl_t            ch_cq_hdl;
 452         uint_t                  ch_cq_sz;
 453 
 454         ibt_wc_t                *ch_rcv_wc;
 455         ibt_cq_hdl_t            ch_rcv_cq_hdl;
 456         uint_t                  ch_rcv_cq_sz;
 457 
 458         int                     ch_vnic_inst;
 459         uint_t                  ch_max_swqes;
 460         uint_t                  ch_max_rwqes;
 461         uint_t                  ch_lwm_rwqes;
 462         uint_t                  ch_rwqe_bktsz;
 463         uint_t                  ch_ip_hdr_align;
 464         boolean_t               ch_alloc_mp;
 465         boolean_t               ch_tear_down;
 466 
 467         kmutex_t                ch_pkey_lock;
 468         ib_pkey_t               ch_pkey;
 469         uint16_t                ch_pkey_ix;
 470 
 471         kmutex_t                ch_cep_lock;
 472         kcondvar_t              ch_cep_cv;
 473         ibt_cep_state_t         ch_cep_state;
 474 
 475         kmutex_t                ch_tx_lock;
 476         kcondvar_t              ch_tx_cv;
 477         uint_t                  ch_tx_posted;
 478         boolean_t               ch_tx_busy;
 479         struct eib_wqe_s        *ch_tx;
 480         struct eib_wqe_s        *ch_tx_tail;
 481 
 482         kmutex_t                ch_rx_lock;
 483         kcondvar_t              ch_rx_cv;
 484         uint_t                  ch_rx_posted;
 485         boolean_t               ch_rx_refilling;
 486 
 487         kmutex_t                ch_vhub_lock;
 488         struct eib_mcg_s        *ch_vhub_table;
 489         struct eib_mcg_s        *ch_vhub_update;
 490         struct eib_mcg_s        *ch_vhub_data;
 491 
 492         struct eib_chan_s       *ch_rxpost_next;
 493 } eib_chan_t;
 494 
 495 /*
 496  * States for vNIC state machine during login
 497  */
 498 #define EIB_LOGIN_INIT          0
 499 #define EIB_LOGIN_ACK_WAIT      1
 500 #define EIB_LOGIN_ACK_RCVD      2
 501 #define EIB_LOGIN_NACK_RCVD     3
 502 #define EIB_LOGIN_TBL_WAIT      4
 503 #define EIB_LOGIN_TBL_INPROG    5
 504 #define EIB_LOGIN_TBL_DONE      6
 505 #define EIB_LOGIN_TBL_FAILED    7
 506 #define EIB_LOGIN_DONE          8
 507 #define EIB_LOGIN_TIMED_OUT     9
 508 #define EIB_LOGOUT_DONE         10
 509 
 510 typedef struct eib_login_data_s {
 511         ib_guid_t               ld_gw_guid;
 512         ib_lid_t                ld_gw_lid;
 513         uint_t                  ld_syndrome;
 514         uint16_t                ld_gw_port_id;
 515         ib_qpn_t                ld_gw_data_qpn;
 516         ib_qpn_t                ld_gw_ctl_qpn;
 517         uint16_t                ld_vnic_id;     /* includes set msbit */
 518         uint16_t                ld_vhub_mtu;
 519         uint16_t                ld_vhub_pkey;
 520         uint16_t                ld_assigned_vlan;
 521         uint8_t                 ld_gw_sl;
 522         uint8_t                 ld_n_rss_mcgid;
 523         uint8_t                 ld_n_mac_mcgid;
 524         uint8_t                 ld_vnic_name[FIP_VNIC_NAME_LEN];
 525         uint8_t                 ld_assigned_mac[ETHERADDRL];
 526         uint8_t                 ld_gw_mgid_prefix[FIP_MGID_PREFIX_LEN];
 527         uint8_t                 ld_vlan_in_packets;
 528         uint32_t                ld_vhub_id;
 529 } eib_login_data_t;
 530 
 531 #define EIB_UNICAST_MAC(mac)            (((mac)[0] & 0x01) == 0)
 532 
 533 /*
 534  * Map to translate between DMAC and {qpn, lid, sl}
 535  */
 536 typedef struct eib_vhub_map_s {
 537         struct eib_vhub_map_s   *mp_next;
 538         uint32_t                mp_tusn;
 539         ib_qpn_t                mp_qpn;
 540         ib_lid_t                mp_lid;
 541         uint8_t                 mp_mac[ETHERADDRL];
 542         uint8_t                 mp_sl;
 543         uint8_t                 mp_v_rss_type;
 544 } eib_vhub_map_t;
 545 
 546 /*
 547  * Per-vNIC vHUB Table
 548  */
 549 #define EIB_TB_NBUCKETS         13
 550 typedef struct eib_vhub_table_s {
 551         kmutex_t                tb_lock;
 552         struct eib_vhub_map_s   *tb_gateway;
 553         struct eib_vhub_map_s   *tb_unicast_miss;
 554         struct eib_vhub_map_s   *tb_vhub_multicast;
 555         struct eib_vhub_map_s   *tb_vnic_entry[EIB_TB_NBUCKETS];
 556         struct eib_vhub_map_s   *tb_mcast_entry[EIB_TB_NBUCKETS];
 557 
 558         uint32_t                tb_tusn;
 559         uint8_t                 tb_eport_state;
 560 
 561         uint16_t                tb_entries_seen;
 562         uint16_t                tb_entries_in_table;
 563         uint32_t                tb_checksum;
 564 } eib_vhub_table_t;
 565 
 566 typedef struct eib_vhub_update_s {
 567         kmutex_t                up_lock;
 568         eib_vhub_map_t          *up_vnic_entry;
 569         uint32_t                up_tusn;
 570         uint8_t                 up_eport_state;
 571 } eib_vhub_update_t;
 572 
 573 typedef struct eib_ether_hdr_s {
 574         int                     eh_tagless;
 575         uint16_t                eh_ether_type;
 576         uint16_t                eh_vlan;
 577         uint8_t                 eh_dmac[ETHERADDRL];
 578         uint8_t                 eh_smac[ETHERADDRL];
 579 } eib_ether_hdr_t;
 580 
 581 /*
 582  * vNIC Information
 583  */
 584 typedef struct eib_vnic_s {
 585         struct eib_s            *vn_ss;
 586         eib_chan_t              *vn_ctl_chan;
 587         eib_chan_t              *vn_data_chan;
 588         int                     vn_instance;
 589         uint16_t                vn_vlan;
 590         uint16_t                vn_id;
 591         uint8_t                 vn_macaddr[ETHERADDRL];
 592         struct eib_login_data_s vn_login_data;
 593 
 594         kmutex_t                vn_lock;
 595         kcondvar_t              vn_cv;
 596         uint_t                  vn_state;
 597         struct eib_vhub_table_s *vn_vhub_table;
 598         struct eib_vhub_update_s *vn_vhub_update;
 599 
 600         ddi_softint_handle_t    vn_ctl_si_hdl;
 601         ddi_softint_handle_t    vn_data_tx_si_hdl;
 602         ddi_softint_handle_t    vn_data_rx_si_hdl;
 603 } eib_vnic_t;
 604 
 605 
 606 /*
 607  * Base NIC's mac state flags. The lock protects the starting/stopping
 608  * bits.  Access to the rest of the mac state is protected by these
 609  * two bits.
 610  */
 611 #define EIB_NIC_STARTING        0x01
 612 #define EIB_NIC_STOPPING        0x02
 613 #define EIB_NIC_STARTED         0x80
 614 #define EIB_NIC_RESTARTING      (EIB_NIC_STARTING | EIB_NIC_STOPPING)
 615 
 616 typedef struct eib_node_state_s {
 617         kmutex_t                ns_lock;
 618         kcondvar_t              ns_cv;
 619         uint_t                  ns_nic_state;
 620         link_state_t            ns_link_state;
 621 } eib_node_state_t;
 622 
 623 /*
 624  * MIB-II statistics to report to the mac layer
 625  */
 626 typedef struct eib_stats_s {
 627         uint64_t                st_obytes;      /* bytes sent out */
 628         uint64_t                st_opkts;       /* pkts sent out */
 629         uint64_t                st_brdcstxmit;  /* broadcast pkts transmitted */
 630         uint64_t                st_multixmit;   /* multicast pkts transmitted */
 631         uint64_t                st_oerrors;     /* transmit errors */
 632         uint64_t                st_noxmitbuf;   /* transmit pkts discarded */
 633 
 634         uint64_t                st_rbytes;      /* bytes received */
 635         uint64_t                st_ipkts;       /* pkts received */
 636         uint64_t                st_brdcstrcv;   /* broadcast pkts received */
 637         uint64_t                st_multircv;    /* multicast pkts received */
 638         uint64_t                st_ierrors;     /* receive errors */
 639         uint64_t                st_norcvbuf;    /* receive pkts discarded */
 640 } eib_stats_t;
 641 
 642 #define EIB_UPDATE_COUNTER(addr, val)   (atomic_add_64((addr), (val)))
 643 #define EIB_INCR_COUNTER(addr)          (atomic_inc_64((addr)))
 644 #define EIB_DECR_COUNTER(addr)          (atomic_dec_64((addr)))
 645 
 646 /*
 647  * Cache of address vectors with dlid as the key. Currently we use
 648  * eib state structure's  ei_lock to protect the individual address
 649  * vector's fields.  This is a lock granularity that's slightly
 650  * bigger than ideal, but it should do for now.
 651  */
 652 #define EIB_AV_NBUCKETS         17
 653 typedef struct eib_avect_s {
 654         struct eib_avect_s      *av_next;
 655         ibt_adds_vect_t         av_vect;
 656         uint_t                  av_ref;
 657 } eib_avect_t;
 658 
 659 /*
 660  * vNIC creation and deletion are serialized by a non-zero value
 661  * to the ei_vnic_state member (i.e. only one vnic may be created
 662  * or deleted at a time). The code makes sure to access/update
 663  * the ei_active_vnics member only after a successful setting of
 664  * ei_vnic_state.
 665  */
 666 #define EIB_VN_BEING_CREATED    0x01
 667 #define EIB_VN_BEING_DELETED    0x02
 668 #define EIB_VN_BEING_MODIFIED   (EIB_VN_BEING_CREATED | EIB_VN_BEING_DELETED)
 669 
 670 /*
 671  * All possible EoIB event work items that need to be handled
 672  */
 673 #define EIB_EV_NONE             0
 674 #define EIB_EV_PORT_DOWN        1
 675 #define EIB_EV_PORT_UP          2
 676 #define EIB_EV_PKEY_CHANGE      3
 677 #define EIB_EV_SGID_CHANGE      4
 678 #define EIB_EV_CLNT_REREG       5
 679 #define EIB_EV_GW_EPORT_DOWN    6
 680 #define EIB_EV_GW_DOWN          7
 681 #define EIB_EV_GW_UP            8
 682 #define EIB_EV_GW_INFO_UPDATE   9
 683 #define EIB_EV_MCG_DELETED      10
 684 #define EIB_EV_MCG_CREATED      11
 685 #define EIB_EV_SHUTDOWN         12
 686 
 687 typedef struct eib_event_s {
 688         struct eib_event_s      *ev_next;
 689         uint_t                  ev_code;
 690         void                    *ev_arg;
 691 } eib_event_t;
 692 
 693 /*
 694  * Work element for new vnic creation
 695  */
 696 typedef struct eib_vnic_req_s {
 697         struct eib_vnic_req_s   *vr_next;
 698         uint_t                  vr_req;
 699         uint8_t                 vr_mac[ETHERADDRL];
 700         uint16_t                vr_vlan;
 701 } eib_vnic_req_t;
 702 
 703 /*
 704  * Values for vr_req
 705  */
 706 #define EIB_CR_REQ_NEW_VNIC     1
 707 #define EIB_CR_REQ_FLUSH        2
 708 #define EIB_CR_REQ_DIE          3
 709 
 710 /*
 711  * Work element for vnics kept alive by the keepalive manager thread
 712  * and bitfield values for ei_ka_vnics_event.
 713  */
 714 typedef struct eib_ka_vnics_s {
 715         struct eib_ka_vnics_s   *ka_next;
 716         struct eib_vnic_s       *ka_vnic;
 717 } eib_ka_vnics_t;
 718 
 719 #define EIB_KA_VNICS_DIE        0x1
 720 #define EIB_KA_VNICS_TIMED_OUT  0x2
 721 
 722 /*
 723  * EoIB per-instance state
 724  */
 725 typedef struct eib_s {
 726         ibt_clnt_hdl_t          ei_ibt_hdl;
 727         ibt_hca_hdl_t           ei_hca_hdl;
 728         ibt_pd_hdl_t            ei_pd_hdl;
 729         mac_handle_t            ei_mac_hdl;
 730 
 731         ddi_softint_handle_t    ei_admin_si_hdl;
 732         ddi_callback_id_t       ei_login_ack_cb;
 733         ddi_callback_id_t       ei_gw_alive_cb;
 734         ddi_callback_id_t       ei_gw_info_cb;
 735 
 736         ibt_hca_attr_t          *ei_hca_attrs;
 737         dev_info_t              *ei_dip;
 738         uint_t                  ei_instance;
 739 
 740         struct eib_gw_props_s   *ei_gw_props;
 741         struct eib_props_s      *ei_props;
 742         struct eib_caps_s       *ei_caps;
 743         struct eib_stats_s      *ei_stats;
 744 
 745         struct eib_node_state_s *ei_node_state;
 746         struct eib_chan_s       *ei_admin_chan;
 747 
 748         struct eib_wqe_pool_s   *ei_tx;
 749         struct eib_wqe_pool_s   *ei_rx;
 750         struct eib_lsobkt_s     *ei_lso;
 751 
 752         kmutex_t                ei_vnic_lock;
 753         kcondvar_t              ei_vnic_cv;
 754         uint_t                  ei_vnic_state;
 755         uint64_t                ei_active_vnics;
 756         uint64_t                ei_zombie_vnics;
 757         uint64_t                ei_rejoin_vnics;
 758         struct eib_vnic_s       *ei_vnic[EIB_MAX_VNICS];
 759         struct eib_vnic_s       *ei_vnic_pending;
 760         int64_t                 ei_gw_last_heartbeat;
 761         boolean_t               ei_gw_unreachable;
 762         uint8_t                 ei_gw_eport_state;
 763 
 764         kmutex_t                ei_av_lock;
 765         struct eib_avect_s      *ei_av[EIB_AV_NBUCKETS];
 766 
 767         kmutex_t                ei_ev_lock;
 768         kcondvar_t              ei_ev_cv;
 769         struct eib_event_s      *ei_event;
 770 
 771         kmutex_t                ei_rxpost_lock;
 772         kcondvar_t              ei_rxpost_cv;
 773         uint_t                  ei_rxpost_die;
 774         struct eib_chan_s       *ei_rxpost;
 775 
 776         kmutex_t                ei_vnic_req_lock;
 777         kcondvar_t              ei_vnic_req_cv;
 778         struct eib_vnic_req_s   *ei_vnic_req;
 779         struct eib_vnic_req_s   *ei_failed_vnic_req;
 780         struct eib_vnic_req_s   *ei_pending_vnic_req;
 781 
 782         kmutex_t                ei_ka_vnics_lock;
 783         kcondvar_t              ei_ka_vnics_cv;
 784         uint_t                  ei_ka_vnics_event;
 785         struct eib_ka_vnics_s   *ei_ka_vnics;
 786 
 787         kt_did_t                ei_txwqe_monitor;
 788         kt_did_t                ei_lsobufs_monitor;
 789         kt_did_t                ei_rwqes_refiller;
 790         kt_did_t                ei_vnic_creator;
 791         kt_did_t                ei_events_handler;
 792         kt_did_t                ei_keepalives_manager;
 793 } eib_t;
 794 
 795 /*
 796  * Private read-only datalink properties
 797  */
 798 #define EIB_DLPROP_GW_EPORT_STATE       "_eib_eport_state"
 799 #define EIB_DLPROP_HCA_GUID             "_eib_hca_guid"
 800 #define EIB_DLPROP_PORT_GUID            "_eib_port_guid"
 801 
 802 /*
 803  * FUNCTION PROTOTYPES FOR CROSS-FILE LINKAGE
 804  */
 805 
 806 /*
 807  * FIP protocol related
 808  */
 809 extern int eib_fip_login(eib_t *, eib_vnic_t *, int *);
 810 extern int eib_fip_heartbeat(eib_t *, eib_vnic_t *, int *);
 811 extern int eib_fip_vhub_table(eib_t *, eib_vnic_t *, int *);
 812 extern int eib_fip_logout(eib_t *, eib_vnic_t *, int *);
 813 extern int eib_fip_parse_login_ack(eib_t *, uint8_t *, eib_login_data_t *);
 814 extern int eib_fip_parse_ctl_pkt(uint8_t *, eib_vnic_t *);
 815 
 816 /*
 817  * Service threads and other handlers
 818  */
 819 extern void eib_events_handler(eib_t *);
 820 extern void eib_svc_enqueue_event(eib_t *, eib_event_t *);
 821 extern void eib_refill_rwqes(eib_t *);
 822 extern void eib_vnic_creator(eib_t *);
 823 extern void eib_monitor_tx_wqes(eib_t *);
 824 extern void eib_monitor_lso_bufs(eib_t *);
 825 extern void eib_manage_keepalives(eib_t *);
 826 extern void eib_stop_events_handler(eib_t *);
 827 extern void eib_stop_refill_rwqes(eib_t *);
 828 extern void eib_stop_vnic_creator(eib_t *);
 829 extern void eib_stop_monitor_tx_wqes(eib_t *);
 830 extern int eib_stop_monitor_lso_bufs(eib_t *, boolean_t);
 831 extern void eib_stop_manage_keepalives(eib_t *);
 832 extern void eib_flush_vnic_reqs(eib_t *);
 833 extern void eib_gw_info_cb(dev_info_t *, ddi_eventcookie_t, void *, void *);
 834 extern void eib_gw_alive_cb(dev_info_t *, ddi_eventcookie_t, void *, void *);
 835 extern void eib_login_ack_cb(dev_info_t *, ddi_eventcookie_t, void *, void *);
 836 
 837 /*
 838  * Admin QP related
 839  */
 840 extern int eib_adm_setup_qp(eib_t *, int *);
 841 extern uint_t eib_adm_comp_handler(caddr_t, caddr_t);
 842 extern void eib_rb_adm_setup_qp(eib_t *);
 843 
 844 /*
 845  * Control QP related
 846  */
 847 extern int eib_ctl_create_qp(eib_t *, eib_vnic_t *, int *);
 848 extern uint_t eib_ctl_comp_handler(caddr_t, caddr_t);
 849 extern void eib_rb_ctl_create_qp(eib_t *, eib_vnic_t *);
 850 
 851 /*
 852  * Data QP related
 853  */
 854 extern int eib_data_create_qp(eib_t *, eib_vnic_t *, int *);
 855 extern uint_t eib_data_rx_comp_handler(caddr_t, caddr_t);
 856 extern uint_t eib_data_tx_comp_handler(caddr_t, caddr_t);
 857 extern void eib_data_rx_recycle(caddr_t);
 858 extern void eib_data_post_tx(eib_vnic_t *, eib_wqe_t *);
 859 extern void eib_data_parse_ether_hdr(mblk_t *, eib_ether_hdr_t *);
 860 extern int eib_data_lookup_vnic(eib_t *, uint8_t *, uint16_t, eib_vnic_t **,
 861     boolean_t *);
 862 extern int eib_data_prepare_frame(eib_vnic_t *, eib_wqe_t *, mblk_t *,
 863     eib_ether_hdr_t *);
 864 extern void eib_rb_data_create_qp(eib_t *, eib_vnic_t *);
 865 
 866 /*
 867  * Resource related
 868  */
 869 extern int eib_rsrc_setup_bufs(eib_t *, int *);
 870 extern int eib_rsrc_grab_swqes(eib_t *, eib_wqe_t **, uint_t, uint_t *, int);
 871 extern int eib_rsrc_grab_rwqes(eib_t *, eib_wqe_t **, uint_t, uint_t *, int);
 872 extern int eib_rsrc_grab_lsobufs(eib_t *, uint_t, ibt_wr_ds_t *, uint32_t *);
 873 extern eib_wqe_t *eib_rsrc_grab_swqe(eib_t *, int);
 874 extern eib_wqe_t *eib_rsrc_grab_rwqe(eib_t *, int);
 875 extern void eib_rsrc_return_swqe(eib_t *, eib_wqe_t *, eib_chan_t *);
 876 extern void eib_rsrc_return_rwqe(eib_t *, eib_wqe_t *, eib_chan_t *);
 877 extern void eib_rsrc_return_lsobufs(eib_t *, ibt_wr_ds_t *, uint32_t);
 878 extern void eib_rsrc_decr_posted_swqe(eib_t *, eib_chan_t *);
 879 extern void eib_rsrc_decr_posted_rwqe(eib_t *, eib_chan_t *);
 880 extern void eib_rsrc_txwqes_needed(eib_t *);
 881 extern void eib_rsrc_lsobufs_needed(eib_t *);
 882 extern boolean_t eib_rsrc_rxpool_low(eib_wqe_t *);
 883 extern void eib_rb_rsrc_setup_bufs(eib_t *, boolean_t);
 884 
 885 /*
 886  * IBT related
 887  */
 888 extern int eib_ibt_hca_init(eib_t *);
 889 extern void eib_ibt_link_mod(eib_t *);
 890 extern int eib_ibt_modify_chan_pkey(eib_t *, eib_chan_t *, ib_pkey_t);
 891 extern eib_avect_t *eib_ibt_hold_avect(eib_t *, ib_lid_t, uint8_t);
 892 extern void eib_ibt_release_avect(eib_t *, eib_avect_t *);
 893 extern void eib_ibt_free_avects(eib_t *);
 894 extern void eib_ibt_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
 895     ibt_async_event_t *);
 896 extern void eib_ibt_record_capab(eib_t *, ibt_hca_attr_t *, eib_caps_t *);
 897 extern void eib_rb_ibt_hca_init(eib_t *, uint_t);
 898 
 899 /*
 900  * Chan related
 901  */
 902 extern eib_chan_t *eib_chan_init(void);
 903 extern void eib_chan_fini(eib_chan_t *);
 904 extern int eib_chan_post_rx(eib_t *, eib_chan_t *, uint_t *);
 905 extern int eib_chan_post_recv(eib_t *, eib_chan_t *, eib_wqe_t *);
 906 
 907 /*
 908  * Mac layer related
 909  */
 910 extern void eib_mac_set_nic_state(eib_t *, uint_t);
 911 extern void eib_mac_clr_nic_state(eib_t *, uint_t);
 912 extern void eib_mac_upd_nic_state(eib_t *, uint_t, uint_t);
 913 extern uint_t eib_mac_get_nic_state(eib_t *);
 914 extern void eib_mac_link_state(eib_t *, link_state_t, boolean_t);
 915 extern void eib_mac_link_down(eib_t *, boolean_t);
 916 extern void eib_mac_link_up(eib_t *, boolean_t);
 917 extern int eib_mac_start(eib_t *);
 918 extern void eib_mac_stop(eib_t *);
 919 extern int eib_mac_multicast(eib_t *, boolean_t, uint8_t *);
 920 extern int eib_mac_promisc(eib_t *, boolean_t);
 921 extern int eib_mac_tx(eib_t *, mblk_t *);
 922 extern int eib_mac_hca_portstate(eib_t *, ib_lid_t *, int *);
 923 
 924 /*
 925  * VNIC related
 926  */
 927 extern int eib_vnic_create(eib_t *, uint8_t *, uint16_t, eib_vnic_t **, int *);
 928 extern void eib_vnic_delete(eib_t *, eib_vnic_t *);
 929 extern int eib_vnic_wait_for_login_ack(eib_t *, eib_vnic_t *, int *);
 930 extern void eib_vnic_login_ack(eib_t *, eib_login_data_t *);
 931 extern int eib_vnic_wait_for_table(eib_t *, eib_vnic_t *, int *);
 932 extern void eib_vnic_vhub_table_done(eib_vnic_t *, uint_t);
 933 extern int eib_vnic_join_data_mcg(eib_t *, eib_vnic_t *, uint8_t *,
 934     boolean_t, int *);
 935 extern int eib_vnic_setup_dest(eib_vnic_t *, eib_wqe_t *, uint8_t *, uint16_t);
 936 extern void eib_vnic_leave_data_mcg(eib_t *, eib_vnic_t *, uint8_t *);
 937 extern void eib_vnic_init_tables(eib_t *, eib_vnic_t *);
 938 extern void eib_vnic_fini_tables(eib_t *, eib_vnic_t *, boolean_t);
 939 extern eib_chan_t *eib_vnic_get_data_chan(eib_t *, int);
 940 extern void eib_vnic_need_new(eib_t *, uint8_t *, uint16_t);
 941 extern void eib_vnic_enqueue_req(eib_t *, eib_vnic_req_t *);
 942 extern void eib_vnic_resurrect_zombies(eib_t *, uint8_t *);
 943 extern void eib_vnic_restart(eib_t *, int, uint8_t *);
 944 extern void eib_vnic_rejoin_mcgs(eib_t *);
 945 extern void eib_rb_vnic_create(eib_t *, eib_vnic_t *, uint_t);
 946 
 947 /*
 948  * Logging and other stuff
 949  */
 950 extern void eib_debug_init(void);
 951 extern void eib_debug_fini(void);
 952 extern void eib_dprintf_crit(int, const char *fmt, ...);
 953 extern void eib_dprintf_err(int, const char *fmt, ...);
 954 extern void eib_dprintf_warn(int, const char *fmt, ...);
 955 #ifdef EIB_DEBUG
 956 extern void eib_dprintf_debug(int, const char *fmt, ...);
 957 extern void eib_dprintf_args(int, const char *fmt, ...);
 958 extern void eib_dprintf_pkt(int, uint8_t *, uint_t);
 959 extern void eib_dprintf_verbose(int, const char *fmt, ...);
 960 #endif
 961 extern int eib_get_props(eib_t *);
 962 extern void eib_update_props(eib_t *, eib_gw_info_t *);
 963 extern void eib_rb_get_props(eib_t *);
 964 
 965 /*
 966  * EoIB specific global variables
 967  */
 968 extern ib_gid_t eib_reserved_gid;
 969 extern uint8_t eib_zero_mac[];
 970 extern uint8_t eib_broadcast_mac[];
 971 extern int eib_setbit_mod67[];
 972 extern char *eib_pvt_props[];
 973 
 974 /*
 975  * HW/FW workarounds
 976  */
 977 extern int eib_wa_no_desc_list_len;
 978 extern int eib_wa_no_cksum_offload;
 979 extern int eib_wa_no_lso;
 980 extern int eib_wa_no_mcast_entries;
 981 extern int eib_wa_no_av_discover;
 982 extern int eib_wa_no_good_vp_flag;
 983 extern int eib_wa_no_good_vhub_cksum;
 984 
 985 /*
 986  * Miscellaneous externs
 987  */
 988 extern void freemsgchain(mblk_t *);
 989 extern pri_t minclsyspri;
 990 
 991 #ifdef __cplusplus
 992 }
 993 #endif
 994 
 995 #endif  /* _SYS_IB_EOIB_EIB_IMPL_H */