1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2019, Joyent, Inc. 28 */ 29 30 #ifndef _SYS_IB_EOIB_EIB_IMPL_H 31 #define _SYS_IB_EOIB_EIB_IMPL_H 32 33 #ifdef __cplusplus 34 extern "C" { 35 #endif 36 37 #include <sys/ddi.h> 38 #include <sys/mac.h> 39 #include <sys/sunddi.h> 40 #include <sys/varargs.h> 41 #include <sys/vlan.h> 42 #include <sys/ib/ibtl/ibti.h> 43 #include <sys/ib/ibtl/ibvti.h> 44 #include <sys/ib/ib_pkt_hdrs.h> 45 46 #include <sys/ib/clients/eoib/fip.h> 47 #include <sys/ib/clients/eoib/eib.h> 48 49 /* 50 * Driver specific constants 51 */ 52 #define EIB_E_SUCCESS 0 53 #define EIB_E_FAILURE -1 54 #define EIB_MAX_LINE 128 55 #define EIB_MAX_SGL 59 56 #define EIB_MAX_POST_MULTIPLE 4 57 #define EIB_MAX_PAYLOAD_HDR_SZ 160 58 #define EIB_TX_COPY_THRESH 4096 /* greater than mtu */ 59 #define EIB_MAX_VNICS 64 /* do not change this */ 60 #define EIB_LOGIN_TIMEOUT_USEC 8000000 61 #define EIB_RWR_CHUNK_SZ 8 62 #define EIB_IPHDR_ALIGN_ROOM 32 63 #define EIB_IP_HDR_ALIGN 2 64 #define EIB_MAX_RX_PKTS_ONINTR 0x800 65 #define EIB_MAX_LOGIN_ATTEMPTS 3 66 #define EIB_MAX_VHUB_TBL_ATTEMPTS 3 67 #define EIB_MAX_KA_ATTEMPTS 3 68 #define EIB_MAX_ATTEMPTS 10 69 #define EIB_DELAY_HALF_SECOND 500000 70 #define EIB_GRH_SZ (sizeof (ib_grh_t)) 71 72 /* 73 * Debug messages 74 */ 75 #define EIB_MSGS_CRIT 0x01 76 #define EIB_MSGS_ERR 0x02 77 #define EIB_MSGS_WARN 0x04 78 #define EIB_MSGS_DEBUG 0x08 79 #define EIB_MSGS_ARGS 0x10 80 #define EIB_MSGS_PKT 0x20 81 #define EIB_MSGS_VERBOSE 0x40 82 #define EIB_MSGS_DEFAULT (EIB_MSGS_CRIT | EIB_MSGS_ERR | EIB_MSGS_WARN) 83 84 #define EIB_LOGSZ_DEFAULT 0x20000 85 86 #define EIB_DPRINTF_CRIT eib_dprintf_crit 87 #define EIB_DPRINTF_ERR eib_dprintf_err 88 #define EIB_DPRINTF_WARN eib_dprintf_warn 89 #ifdef EIB_DEBUG 90 #define EIB_DPRINTF_DEBUG eib_dprintf_debug 91 #define EIB_DPRINTF_ARGS eib_dprintf_args 92 #define EIB_DPRINTF_PKT eib_dprintf_pkt 93 #define EIB_DPRINTF_VERBOSE eib_dprintf_verbose 94 #else 95 #define EIB_DPRINTF_DEBUG(...) (void)(0) 96 #define EIB_DPRINTF_ARGS(...) (void)(0) 97 #define EIB_DPRINTF_PKT(...) (void)(0) 98 #define EIB_DPRINTF_VERBOSE(...) (void)(0) 99 #endif 100 101 /* 102 * EoIB threads to provide various services 103 */ 104 #define EIB_EVENTS_HDLR "eib_events_handler" 105 #define EIB_RWQES_REFILLER "eib_rwqes_refiller" 106 #define EIB_VNIC_CREATOR "eib_vnic_creator" 107 #define EIB_TXWQES_MONITOR "eib_txwqe_monitor" 108 #define EIB_LSOBUFS_MONITOR "eib_lsobufs_monitor" 109 110 /* 111 * Macro for finding the least significant bit set in a 64-bit unsigned int 112 */ 113 #define EIB_FIND_LSB_SET(val64) eib_setbit_mod67[((-(val64) & (val64)) % 67)] 114 115 /* 116 * LSO buffers 117 * 118 * Under normal circumstances we should never need to use any buffer 119 * that's larger than MTU. Unfortunately, IB HCA has limitations 120 * on the length of SGL that are much smaller than those for regular 121 * ethernet NICs. Since the network layer doesn't care to limit the 122 * number of mblk fragments in any send mp chain, we end up having to 123 * use these larger buffers occasionally. 124 */ 125 #define EIB_LSO_MAXLEN 65536 126 #define EIB_LSO_BUFSZ 8192 127 #define EIB_LSO_NUM_BUFS 1024 128 #define EIB_LSO_FREE_BUFS_THRESH (EIB_LSO_NUM_BUFS >> 5) 129 130 typedef struct eib_lsobuf_s { 131 struct eib_lsobuf_s *lb_next; 132 uint8_t *lb_buf; 133 int lb_isfree; 134 } eib_lsobuf_t; 135 136 typedef struct eib_lsobkt_s { 137 kmutex_t bk_lock; 138 kcondvar_t bk_cv; 139 uint_t bk_status; 140 uint8_t *bk_mem; 141 eib_lsobuf_t *bk_bufl; 142 eib_lsobuf_t *bk_free_head; 143 ibt_mr_hdl_t bk_mr_hdl; 144 ibt_lkey_t bk_lkey; 145 uint_t bk_nelem; 146 uint_t bk_nfree; 147 } eib_lsobkt_t; 148 149 #define EIB_LBUF_SHORT 0x1 150 #define EIB_LBUF_MONITOR_DIE 0x2 151 152 /* 153 * The admin partition is only used for sending login and logout messages 154 * and receiving login acknowledgements from the gateway. While packets 155 * going out on several vlans at the same time could result in multiple 156 * vnic creations happening at the same time (and therefore multiple login 157 * packets), we serialize the vnic creation via the vnic creator thread, so 158 * we shouldn't need a lot of send wqes or receive wqes. Note also that we 159 * keep the cq size request to slightly less than a 2^n boundary to allow 160 * the alloc cq routine to return the closest 2^n boundary as the real cq 161 * size without wasting too much memory. 162 */ 163 #define EIB_ADMIN_MAX_SWQE 30 164 #define EIB_ADMIN_MAX_RWQE 30 165 #define EIB_ADMIN_CQ_SIZE (EIB_ADMIN_MAX_SWQE + EIB_ADMIN_MAX_RWQE + 1) 166 167 /* 168 * The control qp is per vhub partition, and is used to send and receive 169 * vhub control messages such as vhub table request/response, vhub 170 * update response and vnic alive messages. While the vhub table response 171 * and vhub update messages might take a few rwqes, the vhub table request 172 * is made only once per vnic, and the vnic alive message is periodic 173 * and uses a single swqe as well. Per vnic, we should certainly not need 174 * too many swqes/rwqes. 175 */ 176 #define EIB_CTL_MAX_SWQE 30 177 #define EIB_CTL_MAX_RWQE 30 178 #define EIB_CTL_CQ_SIZE (EIB_CTL_MAX_SWQE + EIB_CTL_MAX_RWQE + 1) 179 180 /* 181 * For the vNIC's data channel, there are three items that are of importance: 182 * the constraints defined below, the hca_max_chan_sz attribute and the value of 183 * (hca_max_cq_sz - 1). The maximum limit on swqe/rwqe is set to the minimum 184 * of these three values. 185 * 186 * While the total number of RWQEs posted to the data channel of any vNIC will 187 * not exceed EIB_DATA_MAX_RWQE, we also do not want to acquire and post all of 188 * it during the data channel initialization, since that is a lot of wqes for 189 * one vnic to consume when we don't even know if the vnic will need it at all. 190 * We post an initial set of EIB_DATA_RWQE_BKT rwqes, and slowly post more and 191 * more sets as we see them being consumed, until we hit the hard limit of 192 * EIB_DATA_MAX_RWQE. 193 */ 194 #define EIB_DATA_MAX_SWQE 4000 195 #define EIB_DATA_MAX_RWQE 4000 196 #define EIB_DATA_RWQE_BKT 512 197 198 /* 199 * vNIC data channel CQ moderation parameters 200 */ 201 #define EIB_TX_COMP_COUNT 10 202 #define EIB_TX_COMP_USEC 300 203 #define EIB_RX_COMP_COUNT 4 204 #define EIB_RX_COMP_USEC 10 205 206 /* 207 * qe_info masks (blk:ndx:type:flags) 208 */ 209 #define EIB_WQEBLK_SHIFT 24 210 #define EIB_WQEBLK_MASK 0xFF 211 #define EIB_WQENDX_SHIFT 16 212 #define EIB_WQENDX_MASK 0xFF 213 #define EIB_WQETYP_SHIFT 8 214 #define EIB_WQETYP_MASK 0xFF 215 #define EIB_WQEFLGS_SHIFT 0 216 #define EIB_WQEFLGS_MASK 0xFF 217 218 /* 219 * Macros to get the bit fields from qe_info 220 */ 221 #define EIB_WQE_BLK(info) (((info) >> EIB_WQEBLK_SHIFT) & EIB_WQEBLK_MASK) 222 #define EIB_WQE_NDX(info) (((info) >> EIB_WQENDX_SHIFT) & EIB_WQENDX_MASK) 223 #define EIB_WQE_TYPE(info) (((info) >> EIB_WQETYP_SHIFT) & EIB_WQETYP_MASK) 224 #define EIB_WQE_FLAGS(info) ((info) & EIB_WQEFLGS_MASK) 225 226 /* 227 * Values for type and flags in qe_info 228 */ 229 #define EIB_WQE_TX 0x1 230 #define EIB_WQE_RX 0x2 231 232 /* 233 * Flags for rx wqes/buffers 234 */ 235 #define EIB_WQE_FLG_POSTED_TO_HCA 0x1 236 #define EIB_WQE_FLG_WITH_NW 0x2 237 238 /* 239 * Flags for tx wqes/buffers 240 */ 241 #define EIB_WQE_FLG_BUFTYPE_LSO 0x4 242 #define EIB_WQE_FLG_BUFTYPE_MAPPED 0x8 243 244 /* 245 * Send/Recv workq entries 246 */ 247 typedef struct eib_wqe_s { 248 struct eib_wqe_pool_s *qe_pool; 249 uint8_t *qe_cpbuf; 250 uint8_t *qe_payload_hdr; 251 uint_t qe_bufsz; 252 uint_t qe_info; 253 int qe_vnic_inst; 254 ibt_ud_dest_hdl_t qe_dest; 255 frtn_t qe_frp; 256 257 mblk_t *qe_mp; 258 ibt_mi_hdl_t qe_iov_hdl; 259 ibt_all_wr_t qe_wr; 260 ibt_wr_ds_t qe_sgl; 261 ibt_wr_ds_t qe_big_sgl[EIB_MAX_SGL]; 262 struct eib_wqe_s *qe_nxt_post; 263 struct eib_chan_s *qe_chan; 264 } eib_wqe_t; 265 266 /* 267 * The wqe in-use/free status in EoIB is managed via a 2-level bitmap 268 * logic. 269 * 270 * Each set of 64 wqes (a "wqe block") is managed by a single 64-bit 271 * integer bitmap. The free status of a set of 64 such wqe blocks (a 272 * "wqe pool") is managed by one 64-bit integer bitmap (if any wqe in 273 * the wqe block is free, the bit in the map is 1, otherwise it is 0). 274 * 275 * The maximum pool size is 4096 wqes, but this can easily be extended 276 * to support more wqes using additional pools of wqes. 277 * 278 * Note that an entire pool of wqes is allocated via a single allocation, 279 * the wqe addresses in a pool are all contiguous. The tx/rx copy buffers 280 * for a wqe pool are also allocated via a single allocation. 281 */ 282 #define EIB_BLKS_PER_POOL 64 283 #define EIB_WQES_PER_BLK 64 /* do not change this */ 284 #define EIB_WQES_PER_POOL (EIB_BLKS_PER_POOL * EIB_WQES_PER_BLK) 285 286 #define EIB_WQE_SZ (sizeof (eib_wqe_t)) 287 #define EIB_WQEBLK_SZ (EIB_WQES_PER_BLK * EIB_WQE_SZ) 288 289 typedef struct eib_wqe_pool_s { 290 struct eib_wqe_pool_s *wp_next; 291 struct eib_s *wp_ss; 292 ib_vaddr_t wp_vaddr; 293 ib_memlen_t wp_memsz; 294 ibt_mr_hdl_t wp_mr; 295 ibt_lkey_t wp_lkey; 296 uint_t wp_nfree_lwm; 297 int wp_type; 298 299 kmutex_t wp_lock; 300 kcondvar_t wp_cv; 301 uint_t wp_status; 302 uint_t wp_nfree; 303 uint64_t wp_free_blks; 304 uint64_t wp_free_wqes[EIB_BLKS_PER_POOL]; 305 struct eib_wqe_s *wp_wqe; 306 } eib_wqe_pool_t; 307 308 /* 309 * Values for wp_type 310 */ 311 #define EIB_WP_TYPE_TX 0x1 312 #define EIB_WP_TYPE_RX 0x2 313 314 /* 315 * Values for wp_status (bit fields) 316 */ 317 #define EIB_TXWQE_SHORT 0x1 /* only for tx wqe pool */ 318 #define EIB_TXWQE_MONITOR_DIE 0x2 /* only for tx wqe pool */ 319 320 #define EIB_RXWQE_SHORT 0x1 /* only for rx wqe pool */ 321 322 /* 323 * The low-water-mark is an indication of when wqe grabs for low-priority 324 * qps should start to get refused (swqe grabs for control messages such 325 * as keepalives and rwqe grabs for posting back to control qps will still 326 * be allowed). The high-water-mark is an indication of when normal 327 * behavior should resume. 328 */ 329 #define EIB_NFREE_SWQES_LWM (EIB_WQES_PER_POOL / 64) /* 1/64 */ 330 #define EIB_NFREE_SWQES_HWM (EIB_WQES_PER_POOL / 32) /* 1/32 */ 331 #define EIB_NFREE_RWQES_LWM (EIB_WQES_PER_POOL / 10) /* 10% */ 332 #define EIB_NFREE_RWQES_HWM (EIB_WQES_PER_POOL / 5) /* 20% */ 333 334 /* 335 * The "rwqes low" is used to determine when we should start using allocb() 336 * to copy and send received mblks in the rx path. It should be a little 337 * above the rwqes low-water-mark, but less than the high-water-mark. 338 */ 339 #define EIB_NFREE_RWQES_LOW \ 340 ((EIB_NFREE_RWQES_LWM + EIB_NFREE_RWQES_HWM) / 2) 341 342 #define EIB_WPRI_HI 1 /* for keepalive posts */ 343 #define EIB_WPRI_LO 2 /* for all other posts */ 344 345 /* 346 * Multicast GID Layout: the multicast gid is specified in big-endian 347 * representation, as a collection of different-sized fields in the 348 * EoIB specification. On Solaris, the multicast gid is represented 349 * as a collection of two 8-byte fields (in ib_gid_t). 350 */ 351 typedef struct eib_mgid_spec_s { 352 uint8_t sp_mgid_prefix[FIP_MGID_PREFIX_LEN]; 353 uint8_t sp_type; 354 uint8_t sp_dmac[ETHERADDRL]; 355 uint8_t sp_rss_hash; 356 uint8_t sp_vhub_id[FIP_VHUBID_LEN]; 357 } eib_mgid_spec_t; 358 359 /* 360 * Values for sp_type in mgid as per EoIB specification 361 */ 362 #define EIB_MGID_VHUB_DATA 0x0 363 #define EIB_MGID_VHUB_UPDATE 0x2 364 #define EIB_MGID_VHUB_TABLE 0x3 365 366 typedef union eib_mgid_s { 367 eib_mgid_spec_t gd_spec; 368 ib_gid_t gd_sol; 369 } eib_mgid_t; 370 371 /* 372 * Gateway properties handed over to us by the EoIB nexus 373 */ 374 typedef struct eib_gw_props_s { 375 kmutex_t pp_gw_lock; 376 377 ib_guid_t pp_gw_system_guid; 378 ib_guid_t pp_gw_guid; 379 ib_sn_prefix_t pp_gw_sn_prefix; 380 381 uint_t pp_gw_adv_period; 382 uint_t pp_gw_ka_period; 383 uint_t pp_vnic_ka_period; 384 385 ib_qpn_t pp_gw_ctrl_qpn; 386 ib_lid_t pp_gw_lid; 387 uint16_t pp_gw_portid; 388 389 uint16_t pp_gw_num_net_vnics; 390 uint8_t pp_gw_flag_available; 391 uint8_t pp_gw_is_host_adm_vnics; 392 uint8_t pp_gw_sl; 393 uint8_t pp_gw_n_rss_qpn; 394 395 uint8_t *pp_gw_system_name; 396 uint8_t *pp_gw_port_name; 397 uint8_t *pp_gw_vendor_id; 398 399 clock_t pp_gw_ka_ticks; /* 2.5 x gw_ka_period */ 400 clock_t pp_vnic_ka_ticks; /* vnic_ka_period */ 401 } eib_gw_props_t; 402 403 /* 404 * Port-specific properties 405 */ 406 typedef struct eib_props_s { 407 uint64_t ep_ifspeed; 408 ib_guid_t ep_hca_guid; 409 uint8_t ep_port_num; 410 ib_gid_t ep_sgid; 411 ib_lid_t ep_blid; 412 uint16_t ep_mtu; 413 ibt_srate_t ep_srate; 414 } eib_props_t; 415 416 /* 417 * Capabilities derived from HCA attributes 418 */ 419 typedef struct eib_caps_s { 420 uint_t cp_lso_maxlen; 421 uint32_t cp_cksum_flags; 422 int cp_resv_lkey_capab; 423 ibt_lkey_t cp_resv_lkey; 424 425 uint_t cp_max_swqe; 426 uint_t cp_max_rwqe; 427 uint_t cp_max_sgl; 428 uint_t cp_hiwm_sgl; 429 } eib_caps_t; 430 431 /* 432 * List of multicast groups the vnic joined 433 */ 434 typedef struct eib_mcg_s { 435 struct eib_mcg_s *mg_next; 436 ib_gid_t mg_rgid; 437 ib_gid_t mg_mgid; 438 uint8_t mg_join_state; 439 uint8_t mg_mac[ETHERADDRL]; 440 ibt_mcg_info_t *mg_mcginfo; 441 } eib_mcg_t; 442 443 /* 444 * Admin/control/data channel information 445 */ 446 typedef struct eib_chan_s { 447 ibt_channel_hdl_t ch_chan; 448 ib_qpn_t ch_qpn; 449 450 ibt_wc_t *ch_wc; 451 ibt_cq_hdl_t ch_cq_hdl; 452 uint_t ch_cq_sz; 453 454 ibt_wc_t *ch_rcv_wc; 455 ibt_cq_hdl_t ch_rcv_cq_hdl; 456 uint_t ch_rcv_cq_sz; 457 458 int ch_vnic_inst; 459 uint_t ch_max_swqes; 460 uint_t ch_max_rwqes; 461 uint_t ch_lwm_rwqes; 462 uint_t ch_rwqe_bktsz; 463 uint_t ch_ip_hdr_align; 464 boolean_t ch_alloc_mp; 465 boolean_t ch_tear_down; 466 467 kmutex_t ch_pkey_lock; 468 ib_pkey_t ch_pkey; 469 uint16_t ch_pkey_ix; 470 471 kmutex_t ch_cep_lock; 472 kcondvar_t ch_cep_cv; 473 ibt_cep_state_t ch_cep_state; 474 475 kmutex_t ch_tx_lock; 476 kcondvar_t ch_tx_cv; 477 uint_t ch_tx_posted; 478 boolean_t ch_tx_busy; 479 struct eib_wqe_s *ch_tx; 480 struct eib_wqe_s *ch_tx_tail; 481 482 kmutex_t ch_rx_lock; 483 kcondvar_t ch_rx_cv; 484 uint_t ch_rx_posted; 485 boolean_t ch_rx_refilling; 486 487 kmutex_t ch_vhub_lock; 488 struct eib_mcg_s *ch_vhub_table; 489 struct eib_mcg_s *ch_vhub_update; 490 struct eib_mcg_s *ch_vhub_data; 491 492 struct eib_chan_s *ch_rxpost_next; 493 } eib_chan_t; 494 495 /* 496 * States for vNIC state machine during login 497 */ 498 #define EIB_LOGIN_INIT 0 499 #define EIB_LOGIN_ACK_WAIT 1 500 #define EIB_LOGIN_ACK_RCVD 2 501 #define EIB_LOGIN_NACK_RCVD 3 502 #define EIB_LOGIN_TBL_WAIT 4 503 #define EIB_LOGIN_TBL_INPROG 5 504 #define EIB_LOGIN_TBL_DONE 6 505 #define EIB_LOGIN_TBL_FAILED 7 506 #define EIB_LOGIN_DONE 8 507 #define EIB_LOGIN_TIMED_OUT 9 508 #define EIB_LOGOUT_DONE 10 509 510 typedef struct eib_login_data_s { 511 ib_guid_t ld_gw_guid; 512 ib_lid_t ld_gw_lid; 513 uint_t ld_syndrome; 514 uint16_t ld_gw_port_id; 515 ib_qpn_t ld_gw_data_qpn; 516 ib_qpn_t ld_gw_ctl_qpn; 517 uint16_t ld_vnic_id; /* includes set msbit */ 518 uint16_t ld_vhub_mtu; 519 uint16_t ld_vhub_pkey; 520 uint16_t ld_assigned_vlan; 521 uint8_t ld_gw_sl; 522 uint8_t ld_n_rss_mcgid; 523 uint8_t ld_n_mac_mcgid; 524 uint8_t ld_vnic_name[FIP_VNIC_NAME_LEN]; 525 uint8_t ld_assigned_mac[ETHERADDRL]; 526 uint8_t ld_gw_mgid_prefix[FIP_MGID_PREFIX_LEN]; 527 uint8_t ld_vlan_in_packets; 528 uint32_t ld_vhub_id; 529 } eib_login_data_t; 530 531 #define EIB_UNICAST_MAC(mac) (((mac)[0] & 0x01) == 0) 532 533 /* 534 * Map to translate between DMAC and {qpn, lid, sl} 535 */ 536 typedef struct eib_vhub_map_s { 537 struct eib_vhub_map_s *mp_next; 538 uint32_t mp_tusn; 539 ib_qpn_t mp_qpn; 540 ib_lid_t mp_lid; 541 uint8_t mp_mac[ETHERADDRL]; 542 uint8_t mp_sl; 543 uint8_t mp_v_rss_type; 544 } eib_vhub_map_t; 545 546 /* 547 * Per-vNIC vHUB Table 548 */ 549 #define EIB_TB_NBUCKETS 13 550 typedef struct eib_vhub_table_s { 551 kmutex_t tb_lock; 552 struct eib_vhub_map_s *tb_gateway; 553 struct eib_vhub_map_s *tb_unicast_miss; 554 struct eib_vhub_map_s *tb_vhub_multicast; 555 struct eib_vhub_map_s *tb_vnic_entry[EIB_TB_NBUCKETS]; 556 struct eib_vhub_map_s *tb_mcast_entry[EIB_TB_NBUCKETS]; 557 558 uint32_t tb_tusn; 559 uint8_t tb_eport_state; 560 561 uint16_t tb_entries_seen; 562 uint16_t tb_entries_in_table; 563 uint32_t tb_checksum; 564 } eib_vhub_table_t; 565 566 typedef struct eib_vhub_update_s { 567 kmutex_t up_lock; 568 eib_vhub_map_t *up_vnic_entry; 569 uint32_t up_tusn; 570 uint8_t up_eport_state; 571 } eib_vhub_update_t; 572 573 typedef struct eib_ether_hdr_s { 574 int eh_tagless; 575 uint16_t eh_ether_type; 576 uint16_t eh_vlan; 577 uint8_t eh_dmac[ETHERADDRL]; 578 uint8_t eh_smac[ETHERADDRL]; 579 } eib_ether_hdr_t; 580 581 /* 582 * vNIC Information 583 */ 584 typedef struct eib_vnic_s { 585 struct eib_s *vn_ss; 586 eib_chan_t *vn_ctl_chan; 587 eib_chan_t *vn_data_chan; 588 int vn_instance; 589 uint16_t vn_vlan; 590 uint16_t vn_id; 591 uint8_t vn_macaddr[ETHERADDRL]; 592 struct eib_login_data_s vn_login_data; 593 594 kmutex_t vn_lock; 595 kcondvar_t vn_cv; 596 uint_t vn_state; 597 struct eib_vhub_table_s *vn_vhub_table; 598 struct eib_vhub_update_s *vn_vhub_update; 599 600 ddi_softint_handle_t vn_ctl_si_hdl; 601 ddi_softint_handle_t vn_data_tx_si_hdl; 602 ddi_softint_handle_t vn_data_rx_si_hdl; 603 } eib_vnic_t; 604 605 606 /* 607 * Base NIC's mac state flags. The lock protects the starting/stopping 608 * bits. Access to the rest of the mac state is protected by these 609 * two bits. 610 */ 611 #define EIB_NIC_STARTING 0x01 612 #define EIB_NIC_STOPPING 0x02 613 #define EIB_NIC_STARTED 0x80 614 #define EIB_NIC_RESTARTING (EIB_NIC_STARTING | EIB_NIC_STOPPING) 615 616 typedef struct eib_node_state_s { 617 kmutex_t ns_lock; 618 kcondvar_t ns_cv; 619 uint_t ns_nic_state; 620 link_state_t ns_link_state; 621 } eib_node_state_t; 622 623 /* 624 * MIB-II statistics to report to the mac layer 625 */ 626 typedef struct eib_stats_s { 627 uint64_t st_obytes; /* bytes sent out */ 628 uint64_t st_opkts; /* pkts sent out */ 629 uint64_t st_brdcstxmit; /* broadcast pkts transmitted */ 630 uint64_t st_multixmit; /* multicast pkts transmitted */ 631 uint64_t st_oerrors; /* transmit errors */ 632 uint64_t st_noxmitbuf; /* transmit pkts discarded */ 633 634 uint64_t st_rbytes; /* bytes received */ 635 uint64_t st_ipkts; /* pkts received */ 636 uint64_t st_brdcstrcv; /* broadcast pkts received */ 637 uint64_t st_multircv; /* multicast pkts received */ 638 uint64_t st_ierrors; /* receive errors */ 639 uint64_t st_norcvbuf; /* receive pkts discarded */ 640 } eib_stats_t; 641 642 #define EIB_UPDATE_COUNTER(addr, val) (atomic_add_64((addr), (val))) 643 #define EIB_INCR_COUNTER(addr) (atomic_inc_64((addr))) 644 #define EIB_DECR_COUNTER(addr) (atomic_dec_64((addr))) 645 646 /* 647 * Cache of address vectors with dlid as the key. Currently we use 648 * eib state structure's ei_lock to protect the individual address 649 * vector's fields. This is a lock granularity that's slightly 650 * bigger than ideal, but it should do for now. 651 */ 652 #define EIB_AV_NBUCKETS 17 653 typedef struct eib_avect_s { 654 struct eib_avect_s *av_next; 655 ibt_adds_vect_t av_vect; 656 uint_t av_ref; 657 } eib_avect_t; 658 659 /* 660 * vNIC creation and deletion are serialized by a non-zero value 661 * to the ei_vnic_state member (i.e. only one vnic may be created 662 * or deleted at a time). The code makes sure to access/update 663 * the ei_active_vnics member only after a successful setting of 664 * ei_vnic_state. 665 */ 666 #define EIB_VN_BEING_CREATED 0x01 667 #define EIB_VN_BEING_DELETED 0x02 668 #define EIB_VN_BEING_MODIFIED (EIB_VN_BEING_CREATED | EIB_VN_BEING_DELETED) 669 670 /* 671 * All possible EoIB event work items that need to be handled 672 */ 673 #define EIB_EV_NONE 0 674 #define EIB_EV_PORT_DOWN 1 675 #define EIB_EV_PORT_UP 2 676 #define EIB_EV_PKEY_CHANGE 3 677 #define EIB_EV_SGID_CHANGE 4 678 #define EIB_EV_CLNT_REREG 5 679 #define EIB_EV_GW_EPORT_DOWN 6 680 #define EIB_EV_GW_DOWN 7 681 #define EIB_EV_GW_UP 8 682 #define EIB_EV_GW_INFO_UPDATE 9 683 #define EIB_EV_MCG_DELETED 10 684 #define EIB_EV_MCG_CREATED 11 685 #define EIB_EV_SHUTDOWN 12 686 687 typedef struct eib_event_s { 688 struct eib_event_s *ev_next; 689 uint_t ev_code; 690 void *ev_arg; 691 } eib_event_t; 692 693 /* 694 * Work element for new vnic creation 695 */ 696 typedef struct eib_vnic_req_s { 697 struct eib_vnic_req_s *vr_next; 698 uint_t vr_req; 699 uint8_t vr_mac[ETHERADDRL]; 700 uint16_t vr_vlan; 701 } eib_vnic_req_t; 702 703 /* 704 * Values for vr_req 705 */ 706 #define EIB_CR_REQ_NEW_VNIC 1 707 #define EIB_CR_REQ_FLUSH 2 708 #define EIB_CR_REQ_DIE 3 709 710 /* 711 * Work element for vnics kept alive by the keepalive manager thread 712 * and bitfield values for ei_ka_vnics_event. 713 */ 714 typedef struct eib_ka_vnics_s { 715 struct eib_ka_vnics_s *ka_next; 716 struct eib_vnic_s *ka_vnic; 717 } eib_ka_vnics_t; 718 719 #define EIB_KA_VNICS_DIE 0x1 720 #define EIB_KA_VNICS_TIMED_OUT 0x2 721 722 /* 723 * EoIB per-instance state 724 */ 725 typedef struct eib_s { 726 ibt_clnt_hdl_t ei_ibt_hdl; 727 ibt_hca_hdl_t ei_hca_hdl; 728 ibt_pd_hdl_t ei_pd_hdl; 729 mac_handle_t ei_mac_hdl; 730 731 ddi_softint_handle_t ei_admin_si_hdl; 732 ddi_callback_id_t ei_login_ack_cb; 733 ddi_callback_id_t ei_gw_alive_cb; 734 ddi_callback_id_t ei_gw_info_cb; 735 736 ibt_hca_attr_t *ei_hca_attrs; 737 dev_info_t *ei_dip; 738 uint_t ei_instance; 739 740 struct eib_gw_props_s *ei_gw_props; 741 struct eib_props_s *ei_props; 742 struct eib_caps_s *ei_caps; 743 struct eib_stats_s *ei_stats; 744 745 struct eib_node_state_s *ei_node_state; 746 struct eib_chan_s *ei_admin_chan; 747 748 struct eib_wqe_pool_s *ei_tx; 749 struct eib_wqe_pool_s *ei_rx; 750 struct eib_lsobkt_s *ei_lso; 751 752 kmutex_t ei_vnic_lock; 753 kcondvar_t ei_vnic_cv; 754 uint_t ei_vnic_state; 755 uint64_t ei_active_vnics; 756 uint64_t ei_zombie_vnics; 757 uint64_t ei_rejoin_vnics; 758 struct eib_vnic_s *ei_vnic[EIB_MAX_VNICS]; 759 struct eib_vnic_s *ei_vnic_pending; 760 int64_t ei_gw_last_heartbeat; 761 boolean_t ei_gw_unreachable; 762 uint8_t ei_gw_eport_state; 763 764 kmutex_t ei_av_lock; 765 struct eib_avect_s *ei_av[EIB_AV_NBUCKETS]; 766 767 kmutex_t ei_ev_lock; 768 kcondvar_t ei_ev_cv; 769 struct eib_event_s *ei_event; 770 771 kmutex_t ei_rxpost_lock; 772 kcondvar_t ei_rxpost_cv; 773 uint_t ei_rxpost_die; 774 struct eib_chan_s *ei_rxpost; 775 776 kmutex_t ei_vnic_req_lock; 777 kcondvar_t ei_vnic_req_cv; 778 struct eib_vnic_req_s *ei_vnic_req; 779 struct eib_vnic_req_s *ei_failed_vnic_req; 780 struct eib_vnic_req_s *ei_pending_vnic_req; 781 782 kmutex_t ei_ka_vnics_lock; 783 kcondvar_t ei_ka_vnics_cv; 784 uint_t ei_ka_vnics_event; 785 struct eib_ka_vnics_s *ei_ka_vnics; 786 787 kt_did_t ei_txwqe_monitor; 788 kt_did_t ei_lsobufs_monitor; 789 kt_did_t ei_rwqes_refiller; 790 kt_did_t ei_vnic_creator; 791 kt_did_t ei_events_handler; 792 kt_did_t ei_keepalives_manager; 793 } eib_t; 794 795 /* 796 * Private read-only datalink properties 797 */ 798 #define EIB_DLPROP_GW_EPORT_STATE "_eib_eport_state" 799 #define EIB_DLPROP_HCA_GUID "_eib_hca_guid" 800 #define EIB_DLPROP_PORT_GUID "_eib_port_guid" 801 802 /* 803 * FUNCTION PROTOTYPES FOR CROSS-FILE LINKAGE 804 */ 805 806 /* 807 * FIP protocol related 808 */ 809 extern int eib_fip_login(eib_t *, eib_vnic_t *, int *); 810 extern int eib_fip_heartbeat(eib_t *, eib_vnic_t *, int *); 811 extern int eib_fip_vhub_table(eib_t *, eib_vnic_t *, int *); 812 extern int eib_fip_logout(eib_t *, eib_vnic_t *, int *); 813 extern int eib_fip_parse_login_ack(eib_t *, uint8_t *, eib_login_data_t *); 814 extern int eib_fip_parse_ctl_pkt(uint8_t *, eib_vnic_t *); 815 816 /* 817 * Service threads and other handlers 818 */ 819 extern void eib_events_handler(eib_t *); 820 extern void eib_svc_enqueue_event(eib_t *, eib_event_t *); 821 extern void eib_refill_rwqes(eib_t *); 822 extern void eib_vnic_creator(eib_t *); 823 extern void eib_monitor_tx_wqes(eib_t *); 824 extern void eib_monitor_lso_bufs(eib_t *); 825 extern void eib_manage_keepalives(eib_t *); 826 extern void eib_stop_events_handler(eib_t *); 827 extern void eib_stop_refill_rwqes(eib_t *); 828 extern void eib_stop_vnic_creator(eib_t *); 829 extern void eib_stop_monitor_tx_wqes(eib_t *); 830 extern int eib_stop_monitor_lso_bufs(eib_t *, boolean_t); 831 extern void eib_stop_manage_keepalives(eib_t *); 832 extern void eib_flush_vnic_reqs(eib_t *); 833 extern void eib_gw_info_cb(dev_info_t *, ddi_eventcookie_t, void *, void *); 834 extern void eib_gw_alive_cb(dev_info_t *, ddi_eventcookie_t, void *, void *); 835 extern void eib_login_ack_cb(dev_info_t *, ddi_eventcookie_t, void *, void *); 836 837 /* 838 * Admin QP related 839 */ 840 extern int eib_adm_setup_qp(eib_t *, int *); 841 extern uint_t eib_adm_comp_handler(caddr_t, caddr_t); 842 extern void eib_rb_adm_setup_qp(eib_t *); 843 844 /* 845 * Control QP related 846 */ 847 extern int eib_ctl_create_qp(eib_t *, eib_vnic_t *, int *); 848 extern uint_t eib_ctl_comp_handler(caddr_t, caddr_t); 849 extern void eib_rb_ctl_create_qp(eib_t *, eib_vnic_t *); 850 851 /* 852 * Data QP related 853 */ 854 extern int eib_data_create_qp(eib_t *, eib_vnic_t *, int *); 855 extern uint_t eib_data_rx_comp_handler(caddr_t, caddr_t); 856 extern uint_t eib_data_tx_comp_handler(caddr_t, caddr_t); 857 extern void eib_data_rx_recycle(caddr_t); 858 extern void eib_data_post_tx(eib_vnic_t *, eib_wqe_t *); 859 extern void eib_data_parse_ether_hdr(mblk_t *, eib_ether_hdr_t *); 860 extern int eib_data_lookup_vnic(eib_t *, uint8_t *, uint16_t, eib_vnic_t **, 861 boolean_t *); 862 extern int eib_data_prepare_frame(eib_vnic_t *, eib_wqe_t *, mblk_t *, 863 eib_ether_hdr_t *); 864 extern void eib_rb_data_create_qp(eib_t *, eib_vnic_t *); 865 866 /* 867 * Resource related 868 */ 869 extern int eib_rsrc_setup_bufs(eib_t *, int *); 870 extern int eib_rsrc_grab_swqes(eib_t *, eib_wqe_t **, uint_t, uint_t *, int); 871 extern int eib_rsrc_grab_rwqes(eib_t *, eib_wqe_t **, uint_t, uint_t *, int); 872 extern int eib_rsrc_grab_lsobufs(eib_t *, uint_t, ibt_wr_ds_t *, uint32_t *); 873 extern eib_wqe_t *eib_rsrc_grab_swqe(eib_t *, int); 874 extern eib_wqe_t *eib_rsrc_grab_rwqe(eib_t *, int); 875 extern void eib_rsrc_return_swqe(eib_t *, eib_wqe_t *, eib_chan_t *); 876 extern void eib_rsrc_return_rwqe(eib_t *, eib_wqe_t *, eib_chan_t *); 877 extern void eib_rsrc_return_lsobufs(eib_t *, ibt_wr_ds_t *, uint32_t); 878 extern void eib_rsrc_decr_posted_swqe(eib_t *, eib_chan_t *); 879 extern void eib_rsrc_decr_posted_rwqe(eib_t *, eib_chan_t *); 880 extern void eib_rsrc_txwqes_needed(eib_t *); 881 extern void eib_rsrc_lsobufs_needed(eib_t *); 882 extern boolean_t eib_rsrc_rxpool_low(eib_wqe_t *); 883 extern void eib_rb_rsrc_setup_bufs(eib_t *, boolean_t); 884 885 /* 886 * IBT related 887 */ 888 extern int eib_ibt_hca_init(eib_t *); 889 extern void eib_ibt_link_mod(eib_t *); 890 extern int eib_ibt_modify_chan_pkey(eib_t *, eib_chan_t *, ib_pkey_t); 891 extern eib_avect_t *eib_ibt_hold_avect(eib_t *, ib_lid_t, uint8_t); 892 extern void eib_ibt_release_avect(eib_t *, eib_avect_t *); 893 extern void eib_ibt_free_avects(eib_t *); 894 extern void eib_ibt_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t, 895 ibt_async_event_t *); 896 extern void eib_ibt_record_capab(eib_t *, ibt_hca_attr_t *, eib_caps_t *); 897 extern void eib_rb_ibt_hca_init(eib_t *, uint_t); 898 899 /* 900 * Chan related 901 */ 902 extern eib_chan_t *eib_chan_init(void); 903 extern void eib_chan_fini(eib_chan_t *); 904 extern int eib_chan_post_rx(eib_t *, eib_chan_t *, uint_t *); 905 extern int eib_chan_post_recv(eib_t *, eib_chan_t *, eib_wqe_t *); 906 907 /* 908 * Mac layer related 909 */ 910 extern void eib_mac_set_nic_state(eib_t *, uint_t); 911 extern void eib_mac_clr_nic_state(eib_t *, uint_t); 912 extern void eib_mac_upd_nic_state(eib_t *, uint_t, uint_t); 913 extern uint_t eib_mac_get_nic_state(eib_t *); 914 extern void eib_mac_link_state(eib_t *, link_state_t, boolean_t); 915 extern void eib_mac_link_down(eib_t *, boolean_t); 916 extern void eib_mac_link_up(eib_t *, boolean_t); 917 extern int eib_mac_start(eib_t *); 918 extern void eib_mac_stop(eib_t *); 919 extern int eib_mac_multicast(eib_t *, boolean_t, uint8_t *); 920 extern int eib_mac_promisc(eib_t *, boolean_t); 921 extern int eib_mac_tx(eib_t *, mblk_t *); 922 extern int eib_mac_hca_portstate(eib_t *, ib_lid_t *, int *); 923 924 /* 925 * VNIC related 926 */ 927 extern int eib_vnic_create(eib_t *, uint8_t *, uint16_t, eib_vnic_t **, int *); 928 extern void eib_vnic_delete(eib_t *, eib_vnic_t *); 929 extern int eib_vnic_wait_for_login_ack(eib_t *, eib_vnic_t *, int *); 930 extern void eib_vnic_login_ack(eib_t *, eib_login_data_t *); 931 extern int eib_vnic_wait_for_table(eib_t *, eib_vnic_t *, int *); 932 extern void eib_vnic_vhub_table_done(eib_vnic_t *, uint_t); 933 extern int eib_vnic_join_data_mcg(eib_t *, eib_vnic_t *, uint8_t *, 934 boolean_t, int *); 935 extern int eib_vnic_setup_dest(eib_vnic_t *, eib_wqe_t *, uint8_t *, uint16_t); 936 extern void eib_vnic_leave_data_mcg(eib_t *, eib_vnic_t *, uint8_t *); 937 extern void eib_vnic_init_tables(eib_t *, eib_vnic_t *); 938 extern void eib_vnic_fini_tables(eib_t *, eib_vnic_t *, boolean_t); 939 extern eib_chan_t *eib_vnic_get_data_chan(eib_t *, int); 940 extern void eib_vnic_need_new(eib_t *, uint8_t *, uint16_t); 941 extern void eib_vnic_enqueue_req(eib_t *, eib_vnic_req_t *); 942 extern void eib_vnic_resurrect_zombies(eib_t *, uint8_t *); 943 extern void eib_vnic_restart(eib_t *, int, uint8_t *); 944 extern void eib_vnic_rejoin_mcgs(eib_t *); 945 extern void eib_rb_vnic_create(eib_t *, eib_vnic_t *, uint_t); 946 947 /* 948 * Logging and other stuff 949 */ 950 extern void eib_debug_init(void); 951 extern void eib_debug_fini(void); 952 extern void eib_dprintf_crit(int, const char *fmt, ...); 953 extern void eib_dprintf_err(int, const char *fmt, ...); 954 extern void eib_dprintf_warn(int, const char *fmt, ...); 955 #ifdef EIB_DEBUG 956 extern void eib_dprintf_debug(int, const char *fmt, ...); 957 extern void eib_dprintf_args(int, const char *fmt, ...); 958 extern void eib_dprintf_pkt(int, uint8_t *, uint_t); 959 extern void eib_dprintf_verbose(int, const char *fmt, ...); 960 #endif 961 extern int eib_get_props(eib_t *); 962 extern void eib_update_props(eib_t *, eib_gw_info_t *); 963 extern void eib_rb_get_props(eib_t *); 964 965 /* 966 * EoIB specific global variables 967 */ 968 extern ib_gid_t eib_reserved_gid; 969 extern uint8_t eib_zero_mac[]; 970 extern uint8_t eib_broadcast_mac[]; 971 extern int eib_setbit_mod67[]; 972 extern char *eib_pvt_props[]; 973 974 /* 975 * HW/FW workarounds 976 */ 977 extern int eib_wa_no_desc_list_len; 978 extern int eib_wa_no_cksum_offload; 979 extern int eib_wa_no_lso; 980 extern int eib_wa_no_mcast_entries; 981 extern int eib_wa_no_av_discover; 982 extern int eib_wa_no_good_vp_flag; 983 extern int eib_wa_no_good_vhub_cksum; 984 985 /* 986 * Miscellaneous externs 987 */ 988 extern void freemsgchain(mblk_t *); 989 extern pri_t minclsyspri; 990 991 #ifdef __cplusplus 992 } 993 #endif 994 995 #endif /* _SYS_IB_EOIB_EIB_IMPL_H */