1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #ifndef _SYS_IB_EOIB_ENX_IMPL_H 27 #define _SYS_IB_EOIB_ENX_IMPL_H 28 29 #ifdef __cplusplus 30 extern "C" { 31 #endif 32 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/varargs.h> 36 #include <sys/ib/ibtl/ibti.h> 37 #include <sys/ib/ibtl/ibvti.h> 38 #include <sys/ib/ib_pkt_hdrs.h> 39 #include <sys/ib/ibtl/impl/ibtl_ibnex.h> 40 #include <sys/ib/mgt/sm_attr.h> 41 42 #include <sys/ib/clients/eoib/fip.h> 43 #include <sys/ib/clients/eoib/eib.h> 44 45 /* 46 * Driver specific constants 47 */ 48 #define ENX_E_SUCCESS 0 49 #define ENX_E_FAILURE -1 50 #define ENX_MAX_LINE 128 51 #define ENX_GRH_SZ (sizeof (ib_grh_t)) 52 53 /* 54 * Debug messages 55 */ 56 #define ENX_MSGS_CRIT 0x01 57 #define ENX_MSGS_ERR 0x02 58 #define ENX_MSGS_WARN 0x04 59 #define ENX_MSGS_DEBUG 0x08 60 #define ENX_MSGS_ARGS 0x10 61 #define ENX_MSGS_VERBOSE 0x20 62 #define ENX_MSGS_DEFAULT (ENX_MSGS_CRIT | ENX_MSGS_ERR | ENX_MSGS_WARN) 63 64 #define ENX_LOGSZ_DEFAULT 0x20000 65 66 #define ENX_DPRINTF_CRIT eibnx_dprintf_crit 67 #define ENX_DPRINTF_ERR eibnx_dprintf_err 68 #define ENX_DPRINTF_WARN eibnx_dprintf_warn 69 #ifdef ENX_DEBUG 70 #define ENX_DPRINTF_DEBUG eibnx_dprintf_debug 71 #define ENX_DPRINTF_ARGS eibnx_dprintf_args 72 #define ENX_DPRINTF_VERBOSE eibnx_dprintf_verbose 73 #else 74 #define ENX_DPRINTF_DEBUG 0 && 75 #define ENX_DPRINTF_ARGS 0 && 76 #define ENX_DPRINTF_VERBOSE 0 && 77 #endif 78 79 /* 80 * EoIB Nexus service threads 81 */ 82 #define ENX_PORT_MONITOR "eibnx_port_%d_monitor" 83 #define ENX_NODE_CREATOR "eibnx_node_creator" 84 85 /* 86 * Default period (us) for unicast solicitations to discovered gateways. 87 * EoIB specification requires that hosts send solicitation atleast every 88 * 4 * GW_ADV_PERIOD. 89 */ 90 #define ENX_DFL_SOLICIT_PERIOD_USEC 32000000 91 92 /* 93 * Portinfo list per HCA 94 */ 95 typedef struct eibnx_port_s { 96 struct eibnx_port_s *po_next; 97 ibt_hca_portinfo_t *po_pi; 98 uint_t po_pi_size; 99 } eibnx_port_t; 100 101 /* 102 * HCA details 103 */ 104 typedef struct eibnx_hca_s { 105 struct eibnx_hca_s *hc_next; 106 ib_guid_t hc_guid; 107 ibt_hca_hdl_t hc_hdl; 108 ibt_pd_hdl_t hc_pd; 109 eibnx_port_t *hc_port; 110 } eibnx_hca_t; 111 112 /* 113 * The port_monitor thread in EoIB nexus driver only sends two types of 114 * packets: multicast solicitation the first time around, and periodic 115 * unicast solicitations later to gateways that have been discovered. So 116 * we need a couple of send wqes for the multicast solicitation and 117 * probably as many send wqes as the number of gateways that may be 118 * discovered from each port, for sending the unicast solicitations. 119 * For unicast solicitations though, the UD destination needs to be set 120 * up at the time we receive the advertisement from the gateway, using 121 * ibt_modify_reply_ud_dest(), so we'll assign one send wqe for each 122 * gateway that we discover. This means that we need to acquire these 123 * send wqe entries during rx processing in the completion handler, which 124 * means we must avoid sleeping in trying to acquire the swqe. Therefore, 125 * we'll pre-allocate these unicast solication send wqes to be atleast 126 * twice the number of recv wqes. 127 * 128 * The receive packets expected by the EoIB nexus driver are the multicast 129 * and unicast messages on the SOLICIT and ADVERTISE groups. These 130 * shouldn't be too many, and should be tuned as we gain experience on 131 * the traffic pattern. We'll start with 16. 132 */ 133 #define ENX_NUM_SWQE 46 134 #define ENX_NUM_RWQE 16 135 #define ENX_CQ_SIZE (ENX_NUM_SWQE + ENX_NUM_RWQE + 2) 136 137 /* 138 * qe_type values 139 */ 140 #define ENX_QETYP_RWQE 0x1 141 #define ENX_QETYP_SWQE 0x2 142 143 /* 144 * qe_flags bitmasks (protected by qe_lock). None of the 145 * flag values may be zero. 146 */ 147 #define ENX_QEFL_INUSE 0x01 148 #define ENX_QEFL_POSTED 0x02 149 #define ENX_QEFL_RELONCOMP 0x04 150 151 /* 152 * Recv and send workq entries 153 */ 154 typedef struct eibnx_wqe_s { 155 uint_t qe_type; 156 uint_t qe_bufsz; 157 ibt_wr_ds_t qe_sgl; 158 ibt_all_wr_t qe_wr; 159 kmutex_t qe_lock; 160 uint_t qe_flags; 161 } eibnx_wqe_t; 162 163 /* 164 * Tx descriptor 165 */ 166 typedef struct eibnx_tx_s { 167 ib_vaddr_t tx_vaddr; 168 ibt_mr_hdl_t tx_mr; 169 ibt_lkey_t tx_lkey; 170 eibnx_wqe_t tx_wqe[ENX_NUM_SWQE]; 171 } eibnx_tx_t; 172 173 /* 174 * Rx descriptor 175 */ 176 typedef struct eibnx_rx_s { 177 ib_vaddr_t rx_vaddr; 178 ibt_mr_hdl_t rx_mr; 179 ibt_lkey_t rx_lkey; 180 eibnx_wqe_t rx_wqe[ENX_NUM_RWQE]; 181 } eibnx_rx_t; 182 183 /* 184 * Details about the address of each gateway we discover. 185 */ 186 typedef struct eibnx_gw_addr_s { 187 ibt_adds_vect_t *ga_vect; 188 ib_gid_t ga_gid; 189 ib_qpn_t ga_qpn; 190 ib_qkey_t ga_qkey; 191 ib_pkey_t ga_pkey; 192 } eibnx_gw_addr_t; 193 194 /* 195 * States for each GW 196 */ 197 #define ENX_GW_STATE_UNAVAILABLE 1 /* GW nackd availability */ 198 #define ENX_GW_STATE_AVAILABLE 2 /* GW mcasted availability */ 199 #define ENX_GW_STATE_READY_TO_LOGIN 3 /* GW ucasted availability */ 200 201 typedef struct eibnx_gw_info_s { 202 struct eibnx_gw_info_s *gw_next; 203 eibnx_wqe_t *gw_swqe; 204 uint_t gw_state; 205 206 kmutex_t gw_adv_lock; 207 uint_t gw_adv_flag; 208 int64_t gw_adv_last_lbolt; 209 int64_t gw_adv_timeout_ticks; 210 211 eibnx_gw_addr_t gw_addr; 212 213 ib_guid_t gw_system_guid; 214 ib_guid_t gw_guid; 215 216 uint32_t gw_adv_period; 217 uint32_t gw_ka_period; 218 uint32_t gw_vnic_ka_period; 219 ib_qpn_t gw_ctrl_qpn; 220 221 ib_lid_t gw_lid; 222 uint16_t gw_portid; 223 uint16_t gw_num_net_vnics; 224 225 uint8_t gw_is_host_adm_vnics; 226 uint8_t gw_sl; 227 uint8_t gw_n_rss_qpn; 228 uint8_t gw_flag_ucast_advt; 229 uint8_t gw_flag_available; 230 231 uint8_t gw_system_name[EIB_GW_SYSNAME_LEN]; 232 uint8_t gw_port_name[EIB_GW_PORTNAME_LEN]; 233 uint8_t gw_vendor_id[EIB_GW_VENDOR_LEN]; 234 } eibnx_gw_info_t; 235 236 /* 237 * Values for gw_adv_flag (non-zero only) 238 */ 239 #define ENX_GW_DEAD 1 240 #define ENX_GW_ALIVE 2 241 #define ENX_GW_AWARE 3 242 243 /* 244 * Currently, we only expect the advertisement type of packets 245 * from the gw. But we do get login acks from the gateway also 246 * here in the nexus, so we'll need an identifier for that. 247 */ 248 typedef enum { 249 FIP_GW_ADVERTISE_MCAST = 0, 250 FIP_GW_ADVERTISE_UCAST, 251 FIP_VNIC_LOGIN_ACK 252 } eibnx_gw_pkt_type_t; 253 254 /* 255 * Currently, the only gw response handled by the eibnx driver 256 * are the ucast/mcast advertisements. Information collected from 257 * both these responses may be packed into a eibnx_gw_info_t. 258 * In the future, if we decide to handle other types of responses 259 * from the gw, we could simply add the new types to the union. 260 */ 261 typedef struct eibnx_gw_msg_s { 262 eibnx_gw_pkt_type_t gm_type; 263 union { 264 eibnx_gw_info_t gm_info; 265 } u; 266 } eibnx_gw_msg_t; 267 268 /* 269 * List to hold the devinfo nodes of eoib instances 270 */ 271 typedef struct eibnx_child_s { 272 struct eibnx_child_s *ch_next; 273 dev_info_t *ch_dip; 274 eibnx_gw_info_t *ch_gwi; 275 char *ch_node_name; 276 } eibnx_child_t; 277 278 /* 279 * Event bitmasks for the port-monitor to wait on. None of these flags 280 * may be zero. 281 */ 282 #define ENX_EVENT_LINK_UP 0x01 283 #define ENX_EVENT_MCGS_AVAILABLE 0x02 284 #define ENX_EVENT_TIMED_OUT 0x04 285 #define ENX_EVENT_DIE 0x08 286 #define ENX_EVENT_COMPLETION 0x10 287 288 /* 289 * MCG Query/Join status 290 */ 291 #define ENX_MCGS_FOUND 0x1 292 #define ENX_MCGS_JOINED 0x2 293 294 /* 295 * Information that each port-monitor thread cares about 296 */ 297 typedef struct eibnx_thr_info_s { 298 struct eibnx_thr_info_s *ti_next; 299 uint_t ti_progress; 300 301 /* 302 * Our kernel thread id 303 */ 304 kt_did_t ti_kt_did; 305 306 /* 307 * HCA, port and protection domain information 308 */ 309 ib_guid_t ti_hca_guid; 310 ibt_hca_hdl_t ti_hca; 311 ibt_pd_hdl_t ti_pd; 312 ibt_hca_portinfo_t *ti_pi; 313 char *ti_ident; 314 315 /* 316 * Well-known multicast groups for solicitations 317 * and advertisements. 318 */ 319 kmutex_t ti_mcg_lock; 320 uint_t ti_mcg_status; 321 ibt_mcg_info_t *ti_advertise_mcg; 322 ibt_mcg_info_t *ti_solicit_mcg; 323 uint_t ti_mcast_done; 324 325 /* 326 * Completion queue stuff 327 */ 328 ibt_cq_hdl_t ti_cq_hdl; 329 uint_t ti_cq_sz; 330 ibt_wc_t *ti_wc; 331 ddi_softint_handle_t ti_softint_hdl; 332 333 /* 334 * Channel related 335 */ 336 ibt_channel_hdl_t ti_chan; 337 ib_qpn_t ti_qpn; 338 339 /* 340 * Transmit/Receive stuff 341 */ 342 eibnx_tx_t ti_snd; 343 eibnx_rx_t ti_rcv; 344 345 /* 346 * GW related stuff 347 */ 348 kmutex_t ti_gw_lock; 349 eibnx_gw_info_t *ti_gw; 350 351 /* 352 * Devinfo nodes for the eoib children 353 */ 354 kmutex_t ti_child_lock; 355 eibnx_child_t *ti_child; 356 357 /* 358 * Events that we wait on and/or handle 359 */ 360 kmutex_t ti_event_lock; 361 kcondvar_t ti_event_cv; 362 uint_t ti_event; 363 } eibnx_thr_info_t; 364 365 /* 366 * Workq entry for creation of eoib nodes 367 */ 368 typedef struct eibnx_nodeq_s { 369 struct eibnx_nodeq_s *nc_next; 370 eibnx_thr_info_t *nc_info; 371 eibnx_gw_info_t *nc_gwi; 372 } eibnx_nodeq_t; 373 374 /* 375 * Bus config status flags. The in-prog is protected by 376 * nx_lock, and the rest of the flags (currently only 377 * buscfg-complete) is protected by the in-prog bit itself. 378 */ 379 #define NX_FL_BUSOP_INPROG 0x1 380 #define NX_FL_BUSCFG_COMPLETE 0x2 381 #define NX_FL_BUSOP_MASK 0x3 382 383 /* 384 * EoIB nexus per-instance state 385 */ 386 typedef struct eibnx_s { 387 dev_info_t *nx_dip; 388 ibt_clnt_hdl_t nx_ibt_hdl; 389 390 kmutex_t nx_lock; 391 eibnx_hca_t *nx_hca; 392 eibnx_thr_info_t *nx_thr_info; 393 boolean_t nx_monitors_up; 394 395 kmutex_t nx_nodeq_lock; 396 kcondvar_t nx_nodeq_cv; 397 eibnx_nodeq_t *nx_nodeq; 398 kt_did_t nx_nodeq_kt_did; 399 uint_t nx_nodeq_thr_die; 400 401 kmutex_t nx_busop_lock; 402 kcondvar_t nx_busop_cv; 403 uint_t nx_busop_flags; 404 } eibnx_t; 405 406 407 /* 408 * Event tags for EoIB Nexus events delivered to EoIB instances 409 */ 410 #define ENX_EVENT_TAG_GW_INFO_UPDATE 0 411 #define ENX_EVENT_TAG_GW_AVAILABLE 1 412 #define ENX_EVENT_TAG_LOGIN_ACK 2 413 414 /* 415 * FUNCTION PROTOTYPES FOR CROSS-FILE LINKAGE 416 */ 417 418 /* 419 * Threads and Event Handlers 420 */ 421 void eibnx_port_monitor(eibnx_thr_info_t *); 422 void eibnx_subnet_notices_handler(void *, ib_gid_t, ibt_subnet_event_code_t, 423 ibt_subnet_event_t *); 424 void eibnx_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t, 425 ibt_async_event_t *); 426 boolean_t eibnx_is_gw_dead(eibnx_gw_info_t *); 427 void eibnx_create_eoib_node(void); 428 void eibnx_comp_intr(ibt_cq_hdl_t, void *); 429 uint_t eibnx_comp_handler(caddr_t, caddr_t); 430 431 /* 432 * IBT related functions 433 */ 434 int eibnx_ibt_init(eibnx_t *); 435 int eibnx_find_mgroups(eibnx_thr_info_t *); 436 int eibnx_setup_cq(eibnx_thr_info_t *); 437 int eibnx_setup_ud_channel(eibnx_thr_info_t *); 438 int eibnx_setup_bufs(eibnx_thr_info_t *); 439 int eibnx_setup_cq_handler(eibnx_thr_info_t *); 440 int eibnx_join_mcgs(eibnx_thr_info_t *); 441 int eibnx_rejoin_mcgs(eibnx_thr_info_t *); 442 int eibnx_ibt_fini(eibnx_t *); 443 444 void eibnx_rb_find_mgroups(eibnx_thr_info_t *); 445 void eibnx_rb_setup_cq(eibnx_thr_info_t *); 446 void eibnx_rb_setup_ud_channel(eibnx_thr_info_t *); 447 void eibnx_rb_setup_bufs(eibnx_thr_info_t *); 448 void eibnx_rb_setup_cq_handler(eibnx_thr_info_t *); 449 void eibnx_rb_join_mcgs(eibnx_thr_info_t *); 450 451 eibnx_hca_t *eibnx_prepare_hca(ib_guid_t); 452 int eibnx_cleanup_hca(eibnx_hca_t *); 453 454 /* 455 * FIP packetizing related functions 456 */ 457 int eibnx_fip_solicit_mcast(eibnx_thr_info_t *); 458 int eibnx_fip_solicit_ucast(eibnx_thr_info_t *, clock_t *); 459 int eibnx_fip_parse_pkt(uint8_t *, eibnx_gw_msg_t *); 460 461 /* 462 * Queue and List related routines 463 */ 464 eibnx_wqe_t *eibnx_acquire_swqe(eibnx_thr_info_t *, int); 465 void eibnx_return_swqe(eibnx_wqe_t *); 466 void eibnx_return_rwqe(eibnx_thr_info_t *, eibnx_wqe_t *); 467 void eibnx_release_swqe(eibnx_wqe_t *); 468 469 void eibnx_enqueue_child(eibnx_thr_info_t *, eibnx_gw_info_t *, char *, 470 dev_info_t *); 471 int eibnx_update_child(eibnx_thr_info_t *, eibnx_gw_info_t *, dev_info_t *); 472 dev_info_t *eibnx_find_child_dip_by_inst(eibnx_thr_info_t *, int); 473 dev_info_t *eibnx_find_child_dip_by_gw(eibnx_thr_info_t *, uint16_t); 474 475 eibnx_gw_info_t *eibnx_find_gw_in_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *); 476 eibnx_gw_info_t *eibnx_add_gw_to_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *, 477 ibt_wc_t *, uint8_t *); 478 void eibnx_replace_gw_in_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *, 479 eibnx_gw_info_t *, ibt_wc_t *, uint8_t *, boolean_t *); 480 void eibnx_queue_for_creation(eibnx_thr_info_t *, eibnx_gw_info_t *); 481 482 /* 483 * Logging and Error reporting routines 484 */ 485 void eibnx_debug_init(void); 486 void eibnx_debug_fini(void); 487 void eibnx_dprintf_crit(const char *fmt, ...); 488 void eibnx_dprintf_err(const char *fmt, ...); 489 void eibnx_dprintf_warn(const char *fmt, ...); 490 #ifdef ENX_DEBUG 491 void eibnx_dprintf_debug(const char *fmt, ...); 492 void eibnx_dprintf_args(const char *fmt, ...); 493 void eibnx_dprintf_verbose(const char *fmt, ...); 494 #endif 495 496 /* 497 * Miscellaneous 498 */ 499 void eibnx_cleanup_port_nodes(eibnx_thr_info_t *); 500 void eibnx_create_node_props(dev_info_t *, eibnx_thr_info_t *, 501 eibnx_gw_info_t *); 502 int eibnx_name_child(dev_info_t *, char *, size_t); 503 void eibnx_busop_inprog_enter(eibnx_t *); 504 void eibnx_busop_inprog_exit(eibnx_t *); 505 eibnx_thr_info_t *eibnx_start_port_monitor(eibnx_hca_t *, eibnx_port_t *); 506 void eibnx_stop_port_monitor(eibnx_thr_info_t *); 507 void eibnx_terminate_monitors(void); 508 int eibnx_configure_node(eibnx_thr_info_t *, eibnx_gw_info_t *, dev_info_t **); 509 int eibnx_unconfigure_node(eibnx_thr_info_t *, eibnx_gw_info_t *); 510 int eibnx_locate_node_name(char *, eibnx_thr_info_t **, eibnx_gw_info_t **); 511 int eibnx_locate_unconfigured_node(eibnx_thr_info_t **, eibnx_gw_info_t **); 512 513 /* 514 * Devctl cbops (currently dummy) 515 */ 516 int eibnx_devctl_open(dev_t *, int, int, cred_t *); 517 int eibnx_devctl_close(dev_t, int, int, cred_t *); 518 int eibnx_devctl_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 519 520 /* 521 * External variable references 522 */ 523 extern pri_t minclsyspri; 524 extern eibnx_t *enx_global_ss; 525 extern ib_gid_t enx_solicit_mgid; 526 extern ib_gid_t enx_advertise_mgid; 527 528 #ifdef __cplusplus 529 } 530 #endif 531 532 #endif /* _SYS_IB_EOIB_ENX_IMPL_H */