1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2019, Joyent, Inc.
  28  */
  29 
  30 #ifndef _SYS_IB_EOIB_ENX_IMPL_H
  31 #define _SYS_IB_EOIB_ENX_IMPL_H
  32 
  33 #ifdef __cplusplus
  34 extern "C" {
  35 #endif
  36 
  37 #include <sys/ddi.h>
  38 #include <sys/sunddi.h>
  39 #include <sys/varargs.h>
  40 #include <sys/ib/ibtl/ibti.h>
  41 #include <sys/ib/ibtl/ibvti.h>
  42 #include <sys/ib/ib_pkt_hdrs.h>
  43 #include <sys/ib/ibtl/impl/ibtl_ibnex.h>
  44 #include <sys/ib/mgt/sm_attr.h>
  45 
  46 #include <sys/ib/clients/eoib/fip.h>
  47 #include <sys/ib/clients/eoib/eib.h>
  48 
  49 /*
  50  * Driver specific constants
  51  */
  52 #define ENX_E_SUCCESS           0
  53 #define ENX_E_FAILURE           -1
  54 #define ENX_MAX_LINE            128
  55 #define ENX_GRH_SZ              (sizeof (ib_grh_t))
  56 
  57 /*
  58  * Debug messages
  59  */
  60 #define ENX_MSGS_CRIT           0x01
  61 #define ENX_MSGS_ERR            0x02
  62 #define ENX_MSGS_WARN           0x04
  63 #define ENX_MSGS_DEBUG          0x08
  64 #define ENX_MSGS_ARGS           0x10
  65 #define ENX_MSGS_VERBOSE        0x20
  66 #define ENX_MSGS_DEFAULT        (ENX_MSGS_CRIT | ENX_MSGS_ERR | ENX_MSGS_WARN)
  67 
  68 #define ENX_LOGSZ_DEFAULT       0x20000
  69 
  70 #define ENX_DPRINTF_CRIT        eibnx_dprintf_crit
  71 #define ENX_DPRINTF_ERR         eibnx_dprintf_err
  72 #define ENX_DPRINTF_WARN        eibnx_dprintf_warn
  73 #ifdef ENX_DEBUG
  74 #define ENX_DPRINTF_DEBUG       eibnx_dprintf_debug
  75 #define ENX_DPRINTF_ARGS        eibnx_dprintf_args
  76 #define ENX_DPRINTF_VERBOSE     eibnx_dprintf_verbose
  77 #else
  78 #define ENX_DPRINTF_DEBUG(...)  (void)(0)
  79 #define ENX_DPRINTF_ARGS(...)   (void)(0)
  80 #define ENX_DPRINTF_VERBOSE(...) (void)(0)
  81 #endif
  82 
  83 /*
  84  *  EoIB Nexus service threads
  85  */
  86 #define ENX_PORT_MONITOR        "eibnx_port_%d_monitor"
  87 #define ENX_NODE_CREATOR        "eibnx_node_creator"
  88 
  89 /*
  90  * Default period (us) for unicast solicitations to discovered gateways.
  91  * EoIB specification requires that hosts send solicitation atleast every
  92  * 4 * GW_ADV_PERIOD.
  93  */
  94 #define ENX_DFL_SOLICIT_PERIOD_USEC     32000000
  95 
  96 /*
  97  * Portinfo list per HCA
  98  */
  99 typedef struct eibnx_port_s {
 100         struct eibnx_port_s     *po_next;
 101         ibt_hca_portinfo_t      *po_pi;
 102         uint_t                  po_pi_size;
 103 } eibnx_port_t;
 104 
 105 /*
 106  * HCA details
 107  */
 108 typedef struct eibnx_hca_s {
 109         struct eibnx_hca_s      *hc_next;
 110         ib_guid_t               hc_guid;
 111         ibt_hca_hdl_t           hc_hdl;
 112         ibt_pd_hdl_t            hc_pd;
 113         eibnx_port_t            *hc_port;
 114 } eibnx_hca_t;
 115 
 116 /*
 117  * The port_monitor thread in EoIB nexus driver only sends two types of
 118  * packets: multicast solicitation the first time around, and periodic
 119  * unicast solicitations later to gateways that have been discovered. So
 120  * we need a couple of send wqes for the multicast solicitation and
 121  * probably as many send wqes as the number of gateways that may be
 122  * discovered from each port, for sending the unicast solicitations.
 123  * For unicast solicitations though, the UD destination needs to be set
 124  * up at the time we receive the advertisement from the gateway, using
 125  * ibt_modify_reply_ud_dest(), so we'll assign one send wqe for each
 126  * gateway that we discover.  This means that we need to acquire these
 127  * send wqe entries during rx processing in the completion handler, which
 128  * means we must avoid sleeping in trying to acquire the swqe. Therefore,
 129  * we'll pre-allocate these unicast solication send wqes to be atleast
 130  * twice the number of recv wqes.
 131  *
 132  * The receive packets expected by the EoIB nexus driver are the multicast
 133  * and unicast messages on the SOLICIT and ADVERTISE groups. These
 134  * shouldn't be too many, and should be tuned as we gain experience on
 135  * the traffic pattern.  We'll start with 16.
 136  */
 137 #define ENX_NUM_SWQE                    46
 138 #define ENX_NUM_RWQE                    16
 139 #define ENX_CQ_SIZE                     (ENX_NUM_SWQE + ENX_NUM_RWQE + 2)
 140 
 141 /*
 142  * qe_type values
 143  */
 144 #define ENX_QETYP_RWQE                  0x1
 145 #define ENX_QETYP_SWQE                  0x2
 146 
 147 /*
 148  * qe_flags bitmasks (protected by qe_lock). None of the
 149  * flag values may be zero.
 150  */
 151 #define ENX_QEFL_INUSE                  0x01
 152 #define ENX_QEFL_POSTED                 0x02
 153 #define ENX_QEFL_RELONCOMP              0x04
 154 
 155 /*
 156  * Recv and send workq entries
 157  */
 158 typedef struct eibnx_wqe_s {
 159         uint_t                  qe_type;
 160         uint_t                  qe_bufsz;
 161         ibt_wr_ds_t             qe_sgl;
 162         ibt_all_wr_t            qe_wr;
 163         kmutex_t                qe_lock;
 164         uint_t                  qe_flags;
 165 } eibnx_wqe_t;
 166 
 167 /*
 168  * Tx descriptor
 169  */
 170 typedef struct eibnx_tx_s {
 171         ib_vaddr_t              tx_vaddr;
 172         ibt_mr_hdl_t            tx_mr;
 173         ibt_lkey_t              tx_lkey;
 174         eibnx_wqe_t             tx_wqe[ENX_NUM_SWQE];
 175 } eibnx_tx_t;
 176 
 177 /*
 178  * Rx descriptor
 179  */
 180 typedef struct eibnx_rx_s {
 181         ib_vaddr_t              rx_vaddr;
 182         ibt_mr_hdl_t            rx_mr;
 183         ibt_lkey_t              rx_lkey;
 184         eibnx_wqe_t             rx_wqe[ENX_NUM_RWQE];
 185 } eibnx_rx_t;
 186 
 187 /*
 188  * Details about the address of each gateway we discover.
 189  */
 190 typedef struct eibnx_gw_addr_s {
 191         ibt_adds_vect_t         *ga_vect;
 192         ib_gid_t                ga_gid;
 193         ib_qpn_t                ga_qpn;
 194         ib_qkey_t               ga_qkey;
 195         ib_pkey_t               ga_pkey;
 196 } eibnx_gw_addr_t;
 197 
 198 /*
 199  * States for each GW
 200  */
 201 #define ENX_GW_STATE_UNAVAILABLE        1       /* GW nackd availability */
 202 #define ENX_GW_STATE_AVAILABLE          2       /* GW mcasted availability */
 203 #define ENX_GW_STATE_READY_TO_LOGIN     3       /* GW ucasted availability */
 204 
 205 typedef struct eibnx_gw_info_s {
 206         struct eibnx_gw_info_s  *gw_next;
 207         eibnx_wqe_t             *gw_swqe;
 208         uint_t                  gw_state;
 209 
 210         kmutex_t                gw_adv_lock;
 211         uint_t                  gw_adv_flag;
 212         int64_t                 gw_adv_last_lbolt;
 213         int64_t                 gw_adv_timeout_ticks;
 214 
 215         eibnx_gw_addr_t         gw_addr;
 216 
 217         ib_guid_t               gw_system_guid;
 218         ib_guid_t               gw_guid;
 219 
 220         uint32_t                gw_adv_period;
 221         uint32_t                gw_ka_period;
 222         uint32_t                gw_vnic_ka_period;
 223         ib_qpn_t                gw_ctrl_qpn;
 224 
 225         ib_lid_t                gw_lid;
 226         uint16_t                gw_portid;
 227         uint16_t                gw_num_net_vnics;
 228 
 229         uint8_t                 gw_is_host_adm_vnics;
 230         uint8_t                 gw_sl;
 231         uint8_t                 gw_n_rss_qpn;
 232         uint8_t                 gw_flag_ucast_advt;
 233         uint8_t                 gw_flag_available;
 234 
 235         uint8_t                 gw_system_name[EIB_GW_SYSNAME_LEN];
 236         uint8_t                 gw_port_name[EIB_GW_PORTNAME_LEN];
 237         uint8_t                 gw_vendor_id[EIB_GW_VENDOR_LEN];
 238 } eibnx_gw_info_t;
 239 
 240 /*
 241  * Values for gw_adv_flag (non-zero only)
 242  */
 243 #define ENX_GW_DEAD             1
 244 #define ENX_GW_ALIVE            2
 245 #define ENX_GW_AWARE            3
 246 
 247 /*
 248  * Currently, we only expect the advertisement type of packets
 249  * from the gw. But we do get login acks from the gateway also
 250  * here in the nexus, so we'll need an identifier for that.
 251  */
 252 typedef enum {
 253         FIP_GW_ADVERTISE_MCAST = 0,
 254         FIP_GW_ADVERTISE_UCAST,
 255         FIP_VNIC_LOGIN_ACK
 256 } eibnx_gw_pkt_type_t;
 257 
 258 /*
 259  * Currently, the only gw response handled by the eibnx driver
 260  * are the ucast/mcast advertisements.  Information collected from
 261  * both these responses may be packed into a eibnx_gw_info_t.
 262  * In the future, if we decide to handle other types of responses
 263  * from the gw, we could simply add the new types to the union.
 264  */
 265 typedef struct eibnx_gw_msg_s {
 266         eibnx_gw_pkt_type_t     gm_type;
 267         union {
 268                 eibnx_gw_info_t gm_info;
 269         } u;
 270 } eibnx_gw_msg_t;
 271 
 272 /*
 273  * List to hold the devinfo nodes of eoib instances
 274  */
 275 typedef struct eibnx_child_s {
 276         struct eibnx_child_s    *ch_next;
 277         dev_info_t              *ch_dip;
 278         eibnx_gw_info_t         *ch_gwi;
 279         char                    *ch_node_name;
 280 } eibnx_child_t;
 281 
 282 /*
 283  * Event bitmasks for the port-monitor to wait on. None of these flags
 284  * may be zero.
 285  */
 286 #define ENX_EVENT_LINK_UP               0x01
 287 #define ENX_EVENT_MCGS_AVAILABLE        0x02
 288 #define ENX_EVENT_TIMED_OUT             0x04
 289 #define ENX_EVENT_DIE                   0x08
 290 #define ENX_EVENT_COMPLETION            0x10
 291 
 292 /*
 293  * MCG Query/Join status
 294  */
 295 #define ENX_MCGS_FOUND                  0x1
 296 #define ENX_MCGS_JOINED                 0x2
 297 
 298 /*
 299  * Information that each port-monitor thread cares about
 300  */
 301 typedef struct eibnx_thr_info_s {
 302         struct eibnx_thr_info_s *ti_next;
 303         uint_t                  ti_progress;
 304 
 305         /*
 306          * Our kernel thread id
 307          */
 308         kt_did_t                ti_kt_did;
 309 
 310         /*
 311          * HCA, port and protection domain information
 312          */
 313         ib_guid_t               ti_hca_guid;
 314         ibt_hca_hdl_t           ti_hca;
 315         ibt_pd_hdl_t            ti_pd;
 316         ibt_hca_portinfo_t      *ti_pi;
 317         char                    *ti_ident;
 318 
 319         /*
 320          * Well-known multicast groups for solicitations
 321          * and advertisements.
 322          */
 323         kmutex_t                ti_mcg_lock;
 324         uint_t                  ti_mcg_status;
 325         ibt_mcg_info_t          *ti_advertise_mcg;
 326         ibt_mcg_info_t          *ti_solicit_mcg;
 327         uint_t                  ti_mcast_done;
 328 
 329         /*
 330          * Completion queue stuff
 331          */
 332         ibt_cq_hdl_t            ti_cq_hdl;
 333         uint_t                  ti_cq_sz;
 334         ibt_wc_t                *ti_wc;
 335         ddi_softint_handle_t    ti_softint_hdl;
 336 
 337         /*
 338          * Channel related
 339          */
 340         ibt_channel_hdl_t       ti_chan;
 341         ib_qpn_t                ti_qpn;
 342 
 343         /*
 344          * Transmit/Receive stuff
 345          */
 346         eibnx_tx_t              ti_snd;
 347         eibnx_rx_t              ti_rcv;
 348 
 349         /*
 350          * GW related stuff
 351          */
 352         kmutex_t                ti_gw_lock;
 353         eibnx_gw_info_t         *ti_gw;
 354 
 355         /*
 356          * Devinfo nodes for the eoib children
 357          */
 358         kmutex_t                ti_child_lock;
 359         eibnx_child_t           *ti_child;
 360 
 361         /*
 362          * Events that we wait on and/or handle
 363          */
 364         kmutex_t                ti_event_lock;
 365         kcondvar_t              ti_event_cv;
 366         uint_t                  ti_event;
 367 } eibnx_thr_info_t;
 368 
 369 /*
 370  * Workq entry for creation of eoib nodes
 371  */
 372 typedef struct eibnx_nodeq_s {
 373         struct eibnx_nodeq_s    *nc_next;
 374         eibnx_thr_info_t        *nc_info;
 375         eibnx_gw_info_t         *nc_gwi;
 376 } eibnx_nodeq_t;
 377 
 378 /*
 379  * Bus config status flags.  The in-prog is protected by
 380  * nx_lock, and the rest of the flags (currently only
 381  * buscfg-complete) is protected by the in-prog bit itself.
 382  */
 383 #define NX_FL_BUSOP_INPROG              0x1
 384 #define NX_FL_BUSCFG_COMPLETE           0x2
 385 #define NX_FL_BUSOP_MASK                0x3
 386 
 387 /*
 388  * EoIB nexus per-instance state
 389  */
 390 typedef struct eibnx_s {
 391         dev_info_t              *nx_dip;
 392         ibt_clnt_hdl_t          nx_ibt_hdl;
 393 
 394         kmutex_t                nx_lock;
 395         eibnx_hca_t             *nx_hca;
 396         eibnx_thr_info_t        *nx_thr_info;
 397         boolean_t               nx_monitors_up;
 398 
 399         kmutex_t                nx_nodeq_lock;
 400         kcondvar_t              nx_nodeq_cv;
 401         eibnx_nodeq_t           *nx_nodeq;
 402         kt_did_t                nx_nodeq_kt_did;
 403         uint_t                  nx_nodeq_thr_die;
 404 
 405         kmutex_t                nx_busop_lock;
 406         kcondvar_t              nx_busop_cv;
 407         uint_t                  nx_busop_flags;
 408 } eibnx_t;
 409 
 410 
 411 /*
 412  * Event tags for EoIB Nexus events delivered to EoIB instances
 413  */
 414 #define ENX_EVENT_TAG_GW_INFO_UPDATE            0
 415 #define ENX_EVENT_TAG_GW_AVAILABLE              1
 416 #define ENX_EVENT_TAG_LOGIN_ACK                 2
 417 
 418 /*
 419  * FUNCTION PROTOTYPES FOR CROSS-FILE LINKAGE
 420  */
 421 
 422 /*
 423  * Threads and Event Handlers
 424  */
 425 void eibnx_port_monitor(eibnx_thr_info_t *);
 426 void eibnx_subnet_notices_handler(void *, ib_gid_t, ibt_subnet_event_code_t,
 427     ibt_subnet_event_t *);
 428 void eibnx_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
 429     ibt_async_event_t *);
 430 boolean_t eibnx_is_gw_dead(eibnx_gw_info_t *);
 431 void eibnx_create_eoib_node(void);
 432 void eibnx_comp_intr(ibt_cq_hdl_t, void *);
 433 uint_t eibnx_comp_handler(caddr_t, caddr_t);
 434 
 435 /*
 436  * IBT related functions
 437  */
 438 int eibnx_ibt_init(eibnx_t *);
 439 int eibnx_find_mgroups(eibnx_thr_info_t *);
 440 int eibnx_setup_cq(eibnx_thr_info_t *);
 441 int eibnx_setup_ud_channel(eibnx_thr_info_t *);
 442 int eibnx_setup_bufs(eibnx_thr_info_t *);
 443 int eibnx_setup_cq_handler(eibnx_thr_info_t *);
 444 int eibnx_join_mcgs(eibnx_thr_info_t *);
 445 int eibnx_rejoin_mcgs(eibnx_thr_info_t *);
 446 int eibnx_ibt_fini(eibnx_t *);
 447 
 448 void eibnx_rb_find_mgroups(eibnx_thr_info_t *);
 449 void eibnx_rb_setup_cq(eibnx_thr_info_t *);
 450 void eibnx_rb_setup_ud_channel(eibnx_thr_info_t *);
 451 void eibnx_rb_setup_bufs(eibnx_thr_info_t *);
 452 void eibnx_rb_setup_cq_handler(eibnx_thr_info_t *);
 453 void eibnx_rb_join_mcgs(eibnx_thr_info_t *);
 454 
 455 eibnx_hca_t *eibnx_prepare_hca(ib_guid_t);
 456 int eibnx_cleanup_hca(eibnx_hca_t *);
 457 
 458 /*
 459  * FIP packetizing related functions
 460  */
 461 int eibnx_fip_solicit_mcast(eibnx_thr_info_t *);
 462 int eibnx_fip_solicit_ucast(eibnx_thr_info_t *, clock_t *);
 463 int eibnx_fip_parse_pkt(uint8_t *, eibnx_gw_msg_t *);
 464 
 465 /*
 466  * Queue and List related routines
 467  */
 468 eibnx_wqe_t *eibnx_acquire_swqe(eibnx_thr_info_t *, int);
 469 void eibnx_return_swqe(eibnx_wqe_t *);
 470 void eibnx_return_rwqe(eibnx_thr_info_t *, eibnx_wqe_t *);
 471 void eibnx_release_swqe(eibnx_wqe_t *);
 472 
 473 void eibnx_enqueue_child(eibnx_thr_info_t *, eibnx_gw_info_t *, char *,
 474     dev_info_t *);
 475 int eibnx_update_child(eibnx_thr_info_t *, eibnx_gw_info_t *, dev_info_t *);
 476 dev_info_t *eibnx_find_child_dip_by_inst(eibnx_thr_info_t *, int);
 477 dev_info_t *eibnx_find_child_dip_by_gw(eibnx_thr_info_t *, uint16_t);
 478 
 479 eibnx_gw_info_t *eibnx_find_gw_in_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *);
 480 eibnx_gw_info_t *eibnx_add_gw_to_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *,
 481     ibt_wc_t *, uint8_t *);
 482 void eibnx_replace_gw_in_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *,
 483     eibnx_gw_info_t *, ibt_wc_t *, uint8_t *, boolean_t *);
 484 void eibnx_queue_for_creation(eibnx_thr_info_t *, eibnx_gw_info_t *);
 485 
 486 /*
 487  * Logging and Error reporting routines
 488  */
 489 void eibnx_debug_init(void);
 490 void eibnx_debug_fini(void);
 491 void eibnx_dprintf_crit(const char *fmt, ...);
 492 void eibnx_dprintf_err(const char *fmt, ...);
 493 void eibnx_dprintf_warn(const char *fmt, ...);
 494 #ifdef ENX_DEBUG
 495 void eibnx_dprintf_debug(const char *fmt, ...);
 496 void eibnx_dprintf_args(const char *fmt, ...);
 497 void eibnx_dprintf_verbose(const char *fmt, ...);
 498 #endif
 499 
 500 /*
 501  * Miscellaneous
 502  */
 503 void eibnx_cleanup_port_nodes(eibnx_thr_info_t *);
 504 void eibnx_create_node_props(dev_info_t *, eibnx_thr_info_t *,
 505     eibnx_gw_info_t *);
 506 int eibnx_name_child(dev_info_t *, char *, size_t);
 507 void eibnx_busop_inprog_enter(eibnx_t *);
 508 void eibnx_busop_inprog_exit(eibnx_t *);
 509 eibnx_thr_info_t *eibnx_start_port_monitor(eibnx_hca_t *, eibnx_port_t *);
 510 void eibnx_stop_port_monitor(eibnx_thr_info_t *);
 511 void eibnx_terminate_monitors(void);
 512 int eibnx_configure_node(eibnx_thr_info_t *, eibnx_gw_info_t *, dev_info_t **);
 513 int eibnx_unconfigure_node(eibnx_thr_info_t *, eibnx_gw_info_t *);
 514 int eibnx_locate_node_name(char *, eibnx_thr_info_t **, eibnx_gw_info_t **);
 515 int eibnx_locate_unconfigured_node(eibnx_thr_info_t **, eibnx_gw_info_t **);
 516 
 517 /*
 518  * Devctl cbops (currently dummy)
 519  */
 520 int eibnx_devctl_open(dev_t *, int, int, cred_t *);
 521 int eibnx_devctl_close(dev_t, int, int, cred_t *);
 522 int eibnx_devctl_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 523 
 524 /*
 525  * External variable references
 526  */
 527 extern pri_t minclsyspri;
 528 extern eibnx_t *enx_global_ss;
 529 extern ib_gid_t enx_solicit_mgid;
 530 extern ib_gid_t enx_advertise_mgid;
 531 
 532 #ifdef __cplusplus
 533 }
 534 #endif
 535 
 536 #endif  /* _SYS_IB_EOIB_ENX_IMPL_H */