1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 #ifndef _SYS_IB_EOIB_ENX_IMPL_H
  27 #define _SYS_IB_EOIB_ENX_IMPL_H
  28 
  29 #ifdef __cplusplus
  30 extern "C" {
  31 #endif
  32 
  33 #include <sys/ddi.h>
  34 #include <sys/sunddi.h>
  35 #include <sys/varargs.h>
  36 #include <sys/ib/ibtl/ibti.h>
  37 #include <sys/ib/ibtl/ibvti.h>
  38 #include <sys/ib/ib_pkt_hdrs.h>
  39 #include <sys/ib/ibtl/impl/ibtl_ibnex.h>
  40 #include <sys/ib/mgt/sm_attr.h>
  41 
  42 #include <sys/ib/clients/eoib/fip.h>
  43 #include <sys/ib/clients/eoib/eib.h>
  44 
  45 /*
  46  * Driver specific constants
  47  */
  48 #define ENX_E_SUCCESS           0
  49 #define ENX_E_FAILURE           -1
  50 #define ENX_MAX_LINE            128
  51 #define ENX_GRH_SZ              (sizeof (ib_grh_t))
  52 
  53 /*
  54  * Debug messages
  55  */
  56 #define ENX_MSGS_CRIT           0x01
  57 #define ENX_MSGS_ERR            0x02
  58 #define ENX_MSGS_WARN           0x04
  59 #define ENX_MSGS_DEBUG          0x08
  60 #define ENX_MSGS_ARGS           0x10
  61 #define ENX_MSGS_VERBOSE        0x20
  62 #define ENX_MSGS_DEFAULT        (ENX_MSGS_CRIT | ENX_MSGS_ERR | ENX_MSGS_WARN)
  63 
  64 #define ENX_LOGSZ_DEFAULT       0x20000
  65 
  66 #define ENX_DPRINTF_CRIT        eibnx_dprintf_crit
  67 #define ENX_DPRINTF_ERR         eibnx_dprintf_err
  68 #define ENX_DPRINTF_WARN        eibnx_dprintf_warn
  69 #ifdef ENX_DEBUG
  70 #define ENX_DPRINTF_DEBUG       eibnx_dprintf_debug
  71 #define ENX_DPRINTF_ARGS        eibnx_dprintf_args
  72 #define ENX_DPRINTF_VERBOSE     eibnx_dprintf_verbose
  73 #else
  74 #define ENX_DPRINTF_DEBUG       0 &&
  75 #define ENX_DPRINTF_ARGS        0 &&
  76 #define ENX_DPRINTF_VERBOSE     0 &&
  77 #endif
  78 
  79 /*
  80  *  EoIB Nexus service threads
  81  */
  82 #define ENX_PORT_MONITOR        "eibnx_port_%d_monitor"
  83 #define ENX_NODE_CREATOR        "eibnx_node_creator"
  84 
  85 /*
  86  * Default period (us) for unicast solicitations to discovered gateways.
  87  * EoIB specification requires that hosts send solicitation atleast every
  88  * 4 * GW_ADV_PERIOD.
  89  */
  90 #define ENX_DFL_SOLICIT_PERIOD_USEC     32000000
  91 
  92 /*
  93  * Portinfo list per HCA
  94  */
  95 typedef struct eibnx_port_s {
  96         struct eibnx_port_s     *po_next;
  97         ibt_hca_portinfo_t      *po_pi;
  98         uint_t                  po_pi_size;
  99 } eibnx_port_t;
 100 
 101 /*
 102  * HCA details
 103  */
 104 typedef struct eibnx_hca_s {
 105         struct eibnx_hca_s      *hc_next;
 106         ib_guid_t               hc_guid;
 107         ibt_hca_hdl_t           hc_hdl;
 108         ibt_pd_hdl_t            hc_pd;
 109         eibnx_port_t            *hc_port;
 110 } eibnx_hca_t;
 111 
 112 /*
 113  * The port_monitor thread in EoIB nexus driver only sends two types of
 114  * packets: multicast solicitation the first time around, and periodic
 115  * unicast solicitations later to gateways that have been discovered. So
 116  * we need a couple of send wqes for the multicast solicitation and
 117  * probably as many send wqes as the number of gateways that may be
 118  * discovered from each port, for sending the unicast solicitations.
 119  * For unicast solicitations though, the UD destination needs to be set
 120  * up at the time we receive the advertisement from the gateway, using
 121  * ibt_modify_reply_ud_dest(), so we'll assign one send wqe for each
 122  * gateway that we discover.  This means that we need to acquire these
 123  * send wqe entries during rx processing in the completion handler, which
 124  * means we must avoid sleeping in trying to acquire the swqe. Therefore,
 125  * we'll pre-allocate these unicast solication send wqes to be atleast
 126  * twice the number of recv wqes.
 127  *
 128  * The receive packets expected by the EoIB nexus driver are the multicast
 129  * and unicast messages on the SOLICIT and ADVERTISE groups. These
 130  * shouldn't be too many, and should be tuned as we gain experience on
 131  * the traffic pattern.  We'll start with 16.
 132  */
 133 #define ENX_NUM_SWQE                    46
 134 #define ENX_NUM_RWQE                    16
 135 #define ENX_CQ_SIZE                     (ENX_NUM_SWQE + ENX_NUM_RWQE + 2)
 136 
 137 /*
 138  * qe_type values
 139  */
 140 #define ENX_QETYP_RWQE                  0x1
 141 #define ENX_QETYP_SWQE                  0x2
 142 
 143 /*
 144  * qe_flags bitmasks (protected by qe_lock). None of the
 145  * flag values may be zero.
 146  */
 147 #define ENX_QEFL_INUSE                  0x01
 148 #define ENX_QEFL_POSTED                 0x02
 149 #define ENX_QEFL_RELONCOMP              0x04
 150 
 151 /*
 152  * Recv and send workq entries
 153  */
 154 typedef struct eibnx_wqe_s {
 155         uint_t                  qe_type;
 156         uint_t                  qe_bufsz;
 157         ibt_wr_ds_t             qe_sgl;
 158         ibt_all_wr_t            qe_wr;
 159         kmutex_t                qe_lock;
 160         uint_t                  qe_flags;
 161 } eibnx_wqe_t;
 162 
 163 /*
 164  * Tx descriptor
 165  */
 166 typedef struct eibnx_tx_s {
 167         ib_vaddr_t              tx_vaddr;
 168         ibt_mr_hdl_t            tx_mr;
 169         ibt_lkey_t              tx_lkey;
 170         eibnx_wqe_t             tx_wqe[ENX_NUM_SWQE];
 171 } eibnx_tx_t;
 172 
 173 /*
 174  * Rx descriptor
 175  */
 176 typedef struct eibnx_rx_s {
 177         ib_vaddr_t              rx_vaddr;
 178         ibt_mr_hdl_t            rx_mr;
 179         ibt_lkey_t              rx_lkey;
 180         eibnx_wqe_t             rx_wqe[ENX_NUM_RWQE];
 181 } eibnx_rx_t;
 182 
 183 /*
 184  * Details about the address of each gateway we discover.
 185  */
 186 typedef struct eibnx_gw_addr_s {
 187         ibt_adds_vect_t         *ga_vect;
 188         ib_gid_t                ga_gid;
 189         ib_qpn_t                ga_qpn;
 190         ib_qkey_t               ga_qkey;
 191         ib_pkey_t               ga_pkey;
 192 } eibnx_gw_addr_t;
 193 
 194 /*
 195  * States for each GW
 196  */
 197 #define ENX_GW_STATE_UNAVAILABLE        1       /* GW nackd availability */
 198 #define ENX_GW_STATE_AVAILABLE          2       /* GW mcasted availability */
 199 #define ENX_GW_STATE_READY_TO_LOGIN     3       /* GW ucasted availability */
 200 
 201 typedef struct eibnx_gw_info_s {
 202         struct eibnx_gw_info_s  *gw_next;
 203         eibnx_wqe_t             *gw_swqe;
 204         uint_t                  gw_state;
 205 
 206         kmutex_t                gw_adv_lock;
 207         uint_t                  gw_adv_flag;
 208         int64_t                 gw_adv_last_lbolt;
 209         int64_t                 gw_adv_timeout_ticks;
 210 
 211         eibnx_gw_addr_t         gw_addr;
 212 
 213         ib_guid_t               gw_system_guid;
 214         ib_guid_t               gw_guid;
 215 
 216         uint32_t                gw_adv_period;
 217         uint32_t                gw_ka_period;
 218         uint32_t                gw_vnic_ka_period;
 219         ib_qpn_t                gw_ctrl_qpn;
 220 
 221         ib_lid_t                gw_lid;
 222         uint16_t                gw_portid;
 223         uint16_t                gw_num_net_vnics;
 224 
 225         uint8_t                 gw_is_host_adm_vnics;
 226         uint8_t                 gw_sl;
 227         uint8_t                 gw_n_rss_qpn;
 228         uint8_t                 gw_flag_ucast_advt;
 229         uint8_t                 gw_flag_available;
 230 
 231         uint8_t                 gw_system_name[EIB_GW_SYSNAME_LEN];
 232         uint8_t                 gw_port_name[EIB_GW_PORTNAME_LEN];
 233         uint8_t                 gw_vendor_id[EIB_GW_VENDOR_LEN];
 234 } eibnx_gw_info_t;
 235 
 236 /*
 237  * Values for gw_adv_flag (non-zero only)
 238  */
 239 #define ENX_GW_DEAD             1
 240 #define ENX_GW_ALIVE            2
 241 #define ENX_GW_AWARE            3
 242 
 243 /*
 244  * Currently, we only expect the advertisement type of packets
 245  * from the gw. But we do get login acks from the gateway also
 246  * here in the nexus, so we'll need an identifier for that.
 247  */
 248 typedef enum {
 249         FIP_GW_ADVERTISE_MCAST = 0,
 250         FIP_GW_ADVERTISE_UCAST,
 251         FIP_VNIC_LOGIN_ACK
 252 } eibnx_gw_pkt_type_t;
 253 
 254 /*
 255  * Currently, the only gw response handled by the eibnx driver
 256  * are the ucast/mcast advertisements.  Information collected from
 257  * both these responses may be packed into a eibnx_gw_info_t.
 258  * In the future, if we decide to handle other types of responses
 259  * from the gw, we could simply add the new types to the union.
 260  */
 261 typedef struct eibnx_gw_msg_s {
 262         eibnx_gw_pkt_type_t     gm_type;
 263         union {
 264                 eibnx_gw_info_t gm_info;
 265         } u;
 266 } eibnx_gw_msg_t;
 267 
 268 /*
 269  * List to hold the devinfo nodes of eoib instances
 270  */
 271 typedef struct eibnx_child_s {
 272         struct eibnx_child_s    *ch_next;
 273         dev_info_t              *ch_dip;
 274         eibnx_gw_info_t         *ch_gwi;
 275         char                    *ch_node_name;
 276 } eibnx_child_t;
 277 
 278 /*
 279  * Event bitmasks for the port-monitor to wait on. None of these flags
 280  * may be zero.
 281  */
 282 #define ENX_EVENT_LINK_UP               0x01
 283 #define ENX_EVENT_MCGS_AVAILABLE        0x02
 284 #define ENX_EVENT_TIMED_OUT             0x04
 285 #define ENX_EVENT_DIE                   0x08
 286 #define ENX_EVENT_COMPLETION            0x10
 287 
 288 /*
 289  * MCG Query/Join status
 290  */
 291 #define ENX_MCGS_FOUND                  0x1
 292 #define ENX_MCGS_JOINED                 0x2
 293 
 294 /*
 295  * Information that each port-monitor thread cares about
 296  */
 297 typedef struct eibnx_thr_info_s {
 298         struct eibnx_thr_info_s *ti_next;
 299         uint_t                  ti_progress;
 300 
 301         /*
 302          * Our kernel thread id
 303          */
 304         kt_did_t                ti_kt_did;
 305 
 306         /*
 307          * HCA, port and protection domain information
 308          */
 309         ib_guid_t               ti_hca_guid;
 310         ibt_hca_hdl_t           ti_hca;
 311         ibt_pd_hdl_t            ti_pd;
 312         ibt_hca_portinfo_t      *ti_pi;
 313         char                    *ti_ident;
 314 
 315         /*
 316          * Well-known multicast groups for solicitations
 317          * and advertisements.
 318          */
 319         kmutex_t                ti_mcg_lock;
 320         uint_t                  ti_mcg_status;
 321         ibt_mcg_info_t          *ti_advertise_mcg;
 322         ibt_mcg_info_t          *ti_solicit_mcg;
 323         uint_t                  ti_mcast_done;
 324 
 325         /*
 326          * Completion queue stuff
 327          */
 328         ibt_cq_hdl_t            ti_cq_hdl;
 329         uint_t                  ti_cq_sz;
 330         ibt_wc_t                *ti_wc;
 331         ddi_softint_handle_t    ti_softint_hdl;
 332 
 333         /*
 334          * Channel related
 335          */
 336         ibt_channel_hdl_t       ti_chan;
 337         ib_qpn_t                ti_qpn;
 338 
 339         /*
 340          * Transmit/Receive stuff
 341          */
 342         eibnx_tx_t              ti_snd;
 343         eibnx_rx_t              ti_rcv;
 344 
 345         /*
 346          * GW related stuff
 347          */
 348         kmutex_t                ti_gw_lock;
 349         eibnx_gw_info_t         *ti_gw;
 350 
 351         /*
 352          * Devinfo nodes for the eoib children
 353          */
 354         kmutex_t                ti_child_lock;
 355         eibnx_child_t           *ti_child;
 356 
 357         /*
 358          * Events that we wait on and/or handle
 359          */
 360         kmutex_t                ti_event_lock;
 361         kcondvar_t              ti_event_cv;
 362         uint_t                  ti_event;
 363 } eibnx_thr_info_t;
 364 
 365 /*
 366  * Workq entry for creation of eoib nodes
 367  */
 368 typedef struct eibnx_nodeq_s {
 369         struct eibnx_nodeq_s    *nc_next;
 370         eibnx_thr_info_t        *nc_info;
 371         eibnx_gw_info_t         *nc_gwi;
 372 } eibnx_nodeq_t;
 373 
 374 /*
 375  * Bus config status flags.  The in-prog is protected by
 376  * nx_lock, and the rest of the flags (currently only
 377  * buscfg-complete) is protected by the in-prog bit itself.
 378  */
 379 #define NX_FL_BUSOP_INPROG              0x1
 380 #define NX_FL_BUSCFG_COMPLETE           0x2
 381 #define NX_FL_BUSOP_MASK                0x3
 382 
 383 /*
 384  * EoIB nexus per-instance state
 385  */
 386 typedef struct eibnx_s {
 387         dev_info_t              *nx_dip;
 388         ibt_clnt_hdl_t          nx_ibt_hdl;
 389 
 390         kmutex_t                nx_lock;
 391         eibnx_hca_t             *nx_hca;
 392         eibnx_thr_info_t        *nx_thr_info;
 393         boolean_t               nx_monitors_up;
 394 
 395         kmutex_t                nx_nodeq_lock;
 396         kcondvar_t              nx_nodeq_cv;
 397         eibnx_nodeq_t           *nx_nodeq;
 398         kt_did_t                nx_nodeq_kt_did;
 399         uint_t                  nx_nodeq_thr_die;
 400 
 401         kmutex_t                nx_busop_lock;
 402         kcondvar_t              nx_busop_cv;
 403         uint_t                  nx_busop_flags;
 404 } eibnx_t;
 405 
 406 
 407 /*
 408  * Event tags for EoIB Nexus events delivered to EoIB instances
 409  */
 410 #define ENX_EVENT_TAG_GW_INFO_UPDATE            0
 411 #define ENX_EVENT_TAG_GW_AVAILABLE              1
 412 #define ENX_EVENT_TAG_LOGIN_ACK                 2
 413 
 414 /*
 415  * FUNCTION PROTOTYPES FOR CROSS-FILE LINKAGE
 416  */
 417 
 418 /*
 419  * Threads and Event Handlers
 420  */
 421 void eibnx_port_monitor(eibnx_thr_info_t *);
 422 void eibnx_subnet_notices_handler(void *, ib_gid_t, ibt_subnet_event_code_t,
 423     ibt_subnet_event_t *);
 424 void eibnx_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
 425     ibt_async_event_t *);
 426 boolean_t eibnx_is_gw_dead(eibnx_gw_info_t *);
 427 void eibnx_create_eoib_node(void);
 428 void eibnx_comp_intr(ibt_cq_hdl_t, void *);
 429 uint_t eibnx_comp_handler(caddr_t, caddr_t);
 430 
 431 /*
 432  * IBT related functions
 433  */
 434 int eibnx_ibt_init(eibnx_t *);
 435 int eibnx_find_mgroups(eibnx_thr_info_t *);
 436 int eibnx_setup_cq(eibnx_thr_info_t *);
 437 int eibnx_setup_ud_channel(eibnx_thr_info_t *);
 438 int eibnx_setup_bufs(eibnx_thr_info_t *);
 439 int eibnx_setup_cq_handler(eibnx_thr_info_t *);
 440 int eibnx_join_mcgs(eibnx_thr_info_t *);
 441 int eibnx_rejoin_mcgs(eibnx_thr_info_t *);
 442 int eibnx_ibt_fini(eibnx_t *);
 443 
 444 void eibnx_rb_find_mgroups(eibnx_thr_info_t *);
 445 void eibnx_rb_setup_cq(eibnx_thr_info_t *);
 446 void eibnx_rb_setup_ud_channel(eibnx_thr_info_t *);
 447 void eibnx_rb_setup_bufs(eibnx_thr_info_t *);
 448 void eibnx_rb_setup_cq_handler(eibnx_thr_info_t *);
 449 void eibnx_rb_join_mcgs(eibnx_thr_info_t *);
 450 
 451 eibnx_hca_t *eibnx_prepare_hca(ib_guid_t);
 452 int eibnx_cleanup_hca(eibnx_hca_t *);
 453 
 454 /*
 455  * FIP packetizing related functions
 456  */
 457 int eibnx_fip_solicit_mcast(eibnx_thr_info_t *);
 458 int eibnx_fip_solicit_ucast(eibnx_thr_info_t *, clock_t *);
 459 int eibnx_fip_parse_pkt(uint8_t *, eibnx_gw_msg_t *);
 460 
 461 /*
 462  * Queue and List related routines
 463  */
 464 eibnx_wqe_t *eibnx_acquire_swqe(eibnx_thr_info_t *, int);
 465 void eibnx_return_swqe(eibnx_wqe_t *);
 466 void eibnx_return_rwqe(eibnx_thr_info_t *, eibnx_wqe_t *);
 467 void eibnx_release_swqe(eibnx_wqe_t *);
 468 
 469 void eibnx_enqueue_child(eibnx_thr_info_t *, eibnx_gw_info_t *, char *,
 470     dev_info_t *);
 471 int eibnx_update_child(eibnx_thr_info_t *, eibnx_gw_info_t *, dev_info_t *);
 472 dev_info_t *eibnx_find_child_dip_by_inst(eibnx_thr_info_t *, int);
 473 dev_info_t *eibnx_find_child_dip_by_gw(eibnx_thr_info_t *, uint16_t);
 474 
 475 eibnx_gw_info_t *eibnx_find_gw_in_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *);
 476 eibnx_gw_info_t *eibnx_add_gw_to_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *,
 477     ibt_wc_t *, uint8_t *);
 478 void eibnx_replace_gw_in_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *,
 479     eibnx_gw_info_t *, ibt_wc_t *, uint8_t *, boolean_t *);
 480 void eibnx_queue_for_creation(eibnx_thr_info_t *, eibnx_gw_info_t *);
 481 
 482 /*
 483  * Logging and Error reporting routines
 484  */
 485 void eibnx_debug_init(void);
 486 void eibnx_debug_fini(void);
 487 void eibnx_dprintf_crit(const char *fmt, ...);
 488 void eibnx_dprintf_err(const char *fmt, ...);
 489 void eibnx_dprintf_warn(const char *fmt, ...);
 490 #ifdef ENX_DEBUG
 491 void eibnx_dprintf_debug(const char *fmt, ...);
 492 void eibnx_dprintf_args(const char *fmt, ...);
 493 void eibnx_dprintf_verbose(const char *fmt, ...);
 494 #endif
 495 
 496 /*
 497  * Miscellaneous
 498  */
 499 void eibnx_cleanup_port_nodes(eibnx_thr_info_t *);
 500 void eibnx_create_node_props(dev_info_t *, eibnx_thr_info_t *,
 501     eibnx_gw_info_t *);
 502 int eibnx_name_child(dev_info_t *, char *, size_t);
 503 void eibnx_busop_inprog_enter(eibnx_t *);
 504 void eibnx_busop_inprog_exit(eibnx_t *);
 505 eibnx_thr_info_t *eibnx_start_port_monitor(eibnx_hca_t *, eibnx_port_t *);
 506 void eibnx_stop_port_monitor(eibnx_thr_info_t *);
 507 void eibnx_terminate_monitors(void);
 508 int eibnx_configure_node(eibnx_thr_info_t *, eibnx_gw_info_t *, dev_info_t **);
 509 int eibnx_unconfigure_node(eibnx_thr_info_t *, eibnx_gw_info_t *);
 510 int eibnx_locate_node_name(char *, eibnx_thr_info_t **, eibnx_gw_info_t **);
 511 int eibnx_locate_unconfigured_node(eibnx_thr_info_t **, eibnx_gw_info_t **);
 512 
 513 /*
 514  * Devctl cbops (currently dummy)
 515  */
 516 int eibnx_devctl_open(dev_t *, int, int, cred_t *);
 517 int eibnx_devctl_close(dev_t, int, int, cred_t *);
 518 int eibnx_devctl_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 519 
 520 /*
 521  * External variable references
 522  */
 523 extern pri_t minclsyspri;
 524 extern eibnx_t *enx_global_ss;
 525 extern ib_gid_t enx_solicit_mgid;
 526 extern ib_gid_t enx_advertise_mgid;
 527 
 528 #ifdef __cplusplus
 529 }
 530 #endif
 531 
 532 #endif  /* _SYS_IB_EOIB_ENX_IMPL_H */