1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * ibcm_impl.c
  28  *
  29  * contains internal functions of IB CM module.
  30  *
  31  * TBD:
  32  * 1. HCA CATASTROPHIC/RECOVERED not handled yet
  33  */
  34 
  35 #include <sys/ib/mgt/ibcm/ibcm_impl.h>
  36 #include <sys/disp.h>
  37 
  38 
  39 /* function prototypes */
  40 static ibcm_status_t    ibcm_init(void);
  41 static ibcm_status_t    ibcm_fini(void);
  42 
  43 /* Routines to initialize and destroy CM global locks and CVs */
  44 static void             ibcm_init_locks(void);
  45 static void             ibcm_fini_locks(void);
  46 
  47 /* Routines that initialize/teardown CM's global hca structures */
  48 static void             ibcm_init_hcas();
  49 static ibcm_status_t    ibcm_fini_hcas();
  50 
  51 static void             ibcm_init_classportinfo();
  52 static void             ibcm_stop_timeout_thread();
  53 
  54 /* Routines that handle HCA attach/detach asyncs */
  55 static void             ibcm_hca_attach(ib_guid_t);
  56 static ibcm_status_t    ibcm_hca_detach(ibcm_hca_info_t *);
  57 
  58 /* Routines that initialize the HCA's port related fields */
  59 static ibt_status_t     ibcm_hca_init_port(ibcm_hca_info_t *hcap,
  60                             uint8_t port_index);
  61 static ibcm_status_t    ibcm_hca_fini_port(ibcm_hca_info_t *hcap,
  62                             uint8_t port_index);
  63 
  64 static void ibcm_rc_flow_control_init(void);
  65 static void ibcm_rc_flow_control_fini(void);
  66 
  67 /*
  68  * Routines that check if hca's avl trees and sidr lists are free of any
  69  * active client resources ie., RC or UD state structures in certain states
  70  */
  71 static ibcm_status_t    ibcm_check_avl_clean(ibcm_hca_info_t *hcap);
  72 static ibcm_status_t    ibcm_check_sidr_clean(ibcm_hca_info_t *hcap);
  73 
  74 /* Add a new hca structure to CM's global hca list */
  75 static ibcm_hca_info_t  *ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports);
  76 
  77 static void             ibcm_comm_est_handler(ibt_async_event_t *);
  78 void                    ibcm_async_handler(void *, ibt_hca_hdl_t,
  79                             ibt_async_code_t, ibt_async_event_t *);
  80 
  81 /* Global variables */
  82 char                    cmlog[] = "ibcm";       /* for debug log messages */
  83 ibt_clnt_hdl_t          ibcm_ibt_handle;        /* IBT handle */
  84 kmutex_t                ibcm_svc_info_lock;     /* list lock */
  85 kcondvar_t              ibcm_svc_info_cv;       /* cv for deregister */
  86 kmutex_t                ibcm_recv_mutex;
  87 avl_tree_t              ibcm_svc_avl_tree;
  88 taskq_t                 *ibcm_taskq = NULL;
  89 int                     taskq_dispatch_fail_cnt;
  90 
  91 kmutex_t                ibcm_mcglist_lock;      /* MCG list lock */
  92 kmutex_t                ibcm_trace_mutex;       /* Trace mutex */
  93 kmutex_t                ibcm_trace_print_mutex; /* Trace print mutex */
  94 int                     ibcm_conn_max_trcnt = IBCM_MAX_CONN_TRCNT;
  95 
  96 int                     ibcm_enable_trace = 2;  /* Trace level 4 by default */
  97 int                     ibcm_dtrace = 0; /* conditionally enable more dtrace */
  98 
  99 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_info_s::{svc_bind_list
 100     svc_ref_cnt svc_to_delete}))
 101 
 102 _NOTE(MUTEX_PROTECTS_DATA(ibcm_svc_info_lock, ibcm_svc_bind_s::{sbind_link}))
 103 
 104 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_mutex, ibcm_conn_trace_s))
 105 
 106 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_conn_trace_s))
 107 
 108 _NOTE(MUTEX_PROTECTS_DATA(ibcm_trace_print_mutex, ibcm_debug_buf))
 109 
 110 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_debug_buf))
 111 
 112 /*
 113  * Initial state is INIT. All hca dr's return success immediately in this
 114  * state, without adding or deleting any hca's to CM.
 115  */
 116 ibcm_finit_state_t      ibcm_finit_state = IBCM_FINIT_INIT;
 117 
 118 /* mutex and cv to manage hca's reference and resource count(s) */
 119 kmutex_t                ibcm_global_hca_lock;
 120 kcondvar_t              ibcm_global_hca_cv;
 121 
 122 /* mutex and cv to sa session open */
 123 kmutex_t                ibcm_sa_open_lock;
 124 kcondvar_t              ibcm_sa_open_cv;
 125 int                     ibcm_sa_timeout_delay = 1;              /* in ticks */
 126 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sa_open_lock,
 127     ibcm_port_info_s::{port_ibmf_saa_hdl port_saa_open_in_progress}))
 128 
 129 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_port_info_s::{port_ibmf_saa_hdl}))
 130 
 131 /* serialize sm notice callbacks */
 132 kmutex_t                ibcm_sm_notice_serialize_lock;
 133 
 134 _NOTE(LOCK_ORDER(ibcm_sm_notice_serialize_lock ibcm_global_hca_lock))
 135 
 136 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock, ibcm_hca_info_s::{hca_state
 137     hca_svc_cnt hca_acc_cnt hca_res_cnt hca_next}))
 138 
 139 _NOTE(MUTEX_PROTECTS_DATA(ibcm_global_hca_lock,
 140     ibcm_port_info_s::{port_ibmf_hdl}))
 141 
 142 _NOTE(MUTEX_PROTECTS_DATA(ibcm_sm_notice_serialize_lock,
 143     ibcm_port_info_s::{port_event_status}))
 144 
 145 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibcm_hca_info_s::{hca_state}))
 146 _NOTE(DATA_READABLE_WITHOUT_LOCK(
 147     ibcm_hca_info_s::{hca_port_info.port_ibmf_hdl}))
 148 
 149 /* mutex for CM's qp list management */
 150 kmutex_t                ibcm_qp_list_lock;
 151 
 152 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_port_info_s::{port_qplist}))
 153 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
 154 _NOTE(MUTEX_PROTECTS_DATA(ibcm_qp_list_lock, ibcm_qp_list_s))
 155 
 156 kcondvar_t              ibcm_timeout_list_cv;
 157 kcondvar_t              ibcm_timeout_thread_done_cv;
 158 kt_did_t                ibcm_timeout_thread_did;
 159 ibcm_state_data_t       *ibcm_timeout_list_hdr, *ibcm_timeout_list_tail;
 160 ibcm_ud_state_data_t    *ibcm_ud_timeout_list_hdr, *ibcm_ud_timeout_list_tail;
 161 kmutex_t                ibcm_timeout_list_lock;
 162 uint8_t                 ibcm_timeout_list_flags = 0;
 163 pri_t                   ibcm_timeout_thread_pri = MINCLSYSPRI;
 164 
 165 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
 166     ibcm_state_data_s::timeout_next))
 167 
 168 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock,
 169     ibcm_ud_state_data_s::ud_timeout_next))
 170 
 171 /*
 172  * Flow control logic for open_rc_channel uses the following.
 173  */
 174 
 175 struct ibcm_open_s {
 176         kmutex_t                mutex;
 177         kcondvar_t              cv;
 178         uint8_t                 task_running;
 179         uint_t                  queued;
 180         uint_t                  exit_deferred;
 181         uint_t                  in_progress;
 182         uint_t                  in_progress_max;
 183         uint_t                  sends;
 184         uint_t                  sends_max;
 185         uint_t                  sends_lowat;
 186         uint_t                  sends_hiwat;
 187         ibcm_state_data_t       *tail;
 188         ibcm_state_data_t       head;
 189 } ibcm_open;
 190 
 191 /*
 192  * Flow control logic for SA access and close_rc_channel calls follows.
 193  */
 194 
 195 int ibcm_close_simul_max        = 12;
 196 int ibcm_lapr_simul_max         = 12;
 197 int ibcm_saa_simul_max          = 8;
 198 
 199 typedef struct ibcm_flow1_s {
 200         struct ibcm_flow1_s     *link;
 201         kcondvar_t              cv;
 202         uint8_t                 waiters;        /* 1 to IBCM_FLOW_SIMUL_MAX */
 203 } ibcm_flow1_t;
 204 
 205 typedef struct ibcm_flow_s {
 206         ibcm_flow1_t            *list;
 207         uint_t                  simul;  /* #requests currently outstanding */
 208         uint_t                  simul_max;
 209         uint_t                  waiters_per_chunk;
 210         uint_t                  lowat;
 211         uint_t                  lowat_default;
 212         /* statistics */
 213         uint_t                  total;
 214 } ibcm_flow_t;
 215 
 216 ibcm_flow_t ibcm_saa_flow;
 217 ibcm_flow_t ibcm_close_flow;
 218 ibcm_flow_t ibcm_lapr_flow;
 219 
 220 /* NONBLOCKING close requests are queued */
 221 struct ibcm_close_s {
 222         kmutex_t                mutex;
 223         ibcm_state_data_t       *tail;
 224         ibcm_state_data_t       head;
 225 } ibcm_close;
 226 
 227 static ibt_clnt_modinfo_t ibcm_ibt_modinfo = {  /* Client's modinfop */
 228         IBTI_V_CURR,
 229         IBT_CM,
 230         ibcm_async_handler,
 231         NULL,
 232         "IBCM"
 233 };
 234 
 235 /* IBCM's list of HCAs registered with it */
 236 static ibcm_hca_info_t  *ibcm_hca_listp = NULL; /* CM's HCA list */
 237 
 238 /* Array of CM state call table functions */
 239 ibcm_state_handler_t    ibcm_sm_funcs_tbl[] = {
 240         ibcm_process_req_msg,
 241         ibcm_process_mra_msg,
 242         ibcm_process_rej_msg,
 243         ibcm_process_rep_msg,
 244         ibcm_process_rtu_msg,
 245         ibcm_process_dreq_msg,
 246         ibcm_process_drep_msg,
 247         ibcm_process_sidr_req_msg,
 248         ibcm_process_sidr_rep_msg,
 249         ibcm_process_lap_msg,
 250         ibcm_process_apr_msg
 251 };
 252 
 253 /* the following globals are CM tunables */
 254 ibt_rnr_nak_time_t      ibcm_default_rnr_nak_time = IBT_RNR_NAK_655ms;
 255 
 256 uint8_t         ibcm_max_retries = IBCM_MAX_RETRIES;
 257 clock_t         ibcm_local_processing_time = IBCM_LOCAL_RESPONSE_TIME;
 258 clock_t         ibcm_remote_response_time = IBCM_REMOTE_RESPONSE_TIME;
 259 ib_time_t       ibcm_max_sidr_rep_proctime = IBCM_MAX_SIDR_PROCESS_TIME;
 260 ib_time_t       ibcm_max_sidr_pktlife_time = IBCM_MAX_SIDR_PKT_LIFE_TIME;
 261 
 262 ib_time_t       ibcm_max_sidr_rep_store_time = 18;
 263 uint32_t        ibcm_wait_for_acc_cnt_timeout = 2000000;        /* 2 sec */
 264 
 265 ib_time_t       ibcm_max_ib_pkt_lt = IBCM_MAX_IB_PKT_LT;
 266 ib_time_t       ibcm_max_ib_mad_pkt_lt = IBCM_MAX_IB_MAD_PKT_LT;
 267 
 268 /*
 269  * This delay accounts for time involved in various activities as follows :
 270  *
 271  * IBMF delays for posting the MADs in non-blocking mode
 272  * IBMF delays for receiving the MADs and delivering to CM
 273  * CM delays in processing the MADs before invoking client handlers,
 274  * Any other delays associated with HCA driver in processing the MADs and
 275  *      other subsystems that CM may invoke (ex : SA, HCA driver)
 276  */
 277 uint32_t        ibcm_sw_delay   = 1000; /* 1000us / 1ms */
 278 uint32_t        ibcm_max_sa_retries = IBCM_MAX_SA_RETRIES + 1;
 279 
 280 /*      approx boot time */
 281 uint32_t        ibcm_adj_btime = 4;     /* 4 seconds */
 282 
 283 /*
 284  * The information in ibcm_clpinfo is kept in wireformat and is setup at
 285  * init time, and used read-only after that
 286  */
 287 ibcm_classportinfo_msg_t        ibcm_clpinfo;
 288 
 289 char    *event_str[] = {
 290         "NEVER SEE THIS             ",
 291         "SESSION_ID                 ",
 292         "CHAN_HDL                   ",
 293         "LOCAL_COMID/HCA/PORT       ",
 294         "LOCAL_QPN                  ",
 295         "REMOTE_COMID/HCA           ",
 296         "REMOTE_QPN                 ",
 297         "BASE_TIME                  ",
 298         "INCOMING_REQ               ",
 299         "INCOMING_REP               ",
 300         "INCOMING_RTU               ",
 301         "INCOMING_COMEST            ",
 302         "INCOMING_MRA               ",
 303         "INCOMING_REJ               ",
 304         "INCOMING_LAP               ",
 305         "INCOMING_APR               ",
 306         "INCOMING_DREQ              ",
 307         "INCOMING_DREP              ",
 308         "OUTGOING_REQ               ",
 309         "OUTGOING_REP               ",
 310         "OUTGOING_RTU               ",
 311         "OUTGOING_LAP               ",
 312         "OUTGOING_APR               ",
 313         "OUTGOING_MRA               ",
 314         "OUTGOING_REJ               ",
 315         "OUTGOING_DREQ              ",
 316         "OUTGOING_DREP              ",
 317         "REQ_POST_COMPLETE          ",
 318         "REP_POST_COMPLETE          ",
 319         "RTU_POST_COMPLETE          ",
 320         "MRA_POST_COMPLETE          ",
 321         "REJ_POST_COMPLETE          ",
 322         "LAP_POST_COMPLETE          ",
 323         "APR_POST_COMPLETE          ",
 324         "DREQ_POST_COMPLETE         ",
 325         "DREP_POST_COMPLETE         ",
 326         "TIMEOUT_REP                ",
 327         "CALLED_REQ_RCVD_EVENT      ",
 328         "RET_REQ_RCVD_EVENT         ",
 329         "CALLED_REP_RCVD_EVENT      ",
 330         "RET_REP_RCVD_EVENT         ",
 331         "CALLED_CONN_EST_EVENT      ",
 332         "RET_CONN_EST_EVENT         ",
 333         "CALLED_CONN_FAIL_EVENT     ",
 334         "RET_CONN_FAIL_EVENT        ",
 335         "CALLED_CONN_CLOSE_EVENT    ",
 336         "RET_CONN_CLOSE_EVENT       ",
 337         "INIT_INIT                  ",
 338         "INIT_INIT_FAIL             ",
 339         "INIT_RTR                   ",
 340         "INIT_RTR_FAIL              ",
 341         "RTR_RTS                    ",
 342         "RTR_RTS_FAIL               ",
 343         "RTS_RTS                    ",
 344         "RTS_RTS_FAIL               ",
 345         "TO_ERROR                   ",
 346         "ERROR_FAIL                 ",
 347         "SET_ALT                    ",
 348         "SET_ALT_FAIL               ",
 349         "STALE_DETECT               ",
 350         "OUTGOING_REQ_RETRY         ",
 351         "OUTGOING_REP_RETRY         ",
 352         "OUTGOING_LAP_RETRY         ",
 353         "OUTGOING_MRA_RETRY         ",
 354         "OUTGOING_DREQ_RETRY        ",
 355         "NEVER SEE THIS             "
 356 };
 357 
 358 char    ibcm_debug_buf[IBCM_DEBUG_BUF_SIZE];
 359 
 360 _NOTE(SCHEME_PROTECTS_DATA("used in a localized function consistently",
 361     ibcm_debug_buf))
 362 _NOTE(READ_ONLY_DATA(ibcm_taskq))
 363 
 364 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_flags))
 365 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_timeout_list_hdr))
 366 _NOTE(MUTEX_PROTECTS_DATA(ibcm_timeout_list_lock, ibcm_ud_timeout_list_hdr))
 367 
 368 #ifdef DEBUG
 369 int             ibcm_test_mode = 0;     /* set to 1, if running tests */
 370 #endif
 371 
 372 
 373 /* Module Driver Info */
 374 static struct modlmisc ibcm_modlmisc = {
 375         &mod_miscops,
 376         "IB Communication Manager"
 377 };
 378 
 379 /* Module Linkage */
 380 static struct modlinkage ibcm_modlinkage = {
 381         MODREV_1,
 382         &ibcm_modlmisc,
 383         NULL
 384 };
 385 
 386 
 387 int
 388 _init(void)
 389 {
 390         int             rval;
 391         ibcm_status_t   status;
 392 
 393         status = ibcm_init();
 394         if (status != IBCM_SUCCESS) {
 395                 IBTF_DPRINTF_L2(cmlog, "_init: ibcm failed %d", status);
 396                 return (EINVAL);
 397         }
 398 
 399         rval = mod_install(&ibcm_modlinkage);
 400         if (rval != 0) {
 401                 IBTF_DPRINTF_L2(cmlog, "_init: ibcm mod_install failed %d",
 402                     rval);
 403                 (void) ibcm_fini();
 404         }
 405 
 406         IBTF_DPRINTF_L5(cmlog, "_init: ibcm successful");
 407         return (rval);
 408 
 409 }
 410 
 411 
 412 int
 413 _info(struct modinfo *modinfop)
 414 {
 415         return (mod_info(&ibcm_modlinkage, modinfop));
 416 }
 417 
 418 
 419 int
 420 _fini(void)
 421 {
 422         int status;
 423 
 424         if (ibcm_fini() != IBCM_SUCCESS)
 425                 return (EBUSY);
 426 
 427         if ((status = mod_remove(&ibcm_modlinkage)) != 0) {
 428                 IBTF_DPRINTF_L2(cmlog, "_fini: ibcm mod_remove failed %d",
 429                     status);
 430                 return (status);
 431         }
 432 
 433         IBTF_DPRINTF_L5(cmlog, "_fini: ibcm successful");
 434 
 435         return (status);
 436 }
 437 
 438 /* Initializes all global mutex and CV in cm module */
 439 static void
 440 ibcm_init_locks()
 441 {
 442 
 443         /* Verify CM MAD sizes */
 444 #ifdef DEBUG
 445 
 446         if (ibcm_test_mode > 1) {
 447 
 448                 IBTF_DPRINTF_L1(cmlog, "REQ MAD SIZE %d",
 449                     sizeof (ibcm_req_msg_t));
 450                 IBTF_DPRINTF_L1(cmlog, "REP MAD SIZE %d",
 451                     sizeof (ibcm_rep_msg_t));
 452                 IBTF_DPRINTF_L1(cmlog, "RTU MAD SIZE %d",
 453                     sizeof (ibcm_rtu_msg_t));
 454                 IBTF_DPRINTF_L1(cmlog, "MRA MAD SIZE %d",
 455                     sizeof (ibcm_mra_msg_t));
 456                 IBTF_DPRINTF_L1(cmlog, "REJ MAD SIZE %d",
 457                     sizeof (ibcm_rej_msg_t));
 458                 IBTF_DPRINTF_L1(cmlog, "LAP MAD SIZE %d",
 459                     sizeof (ibcm_lap_msg_t));
 460                 IBTF_DPRINTF_L1(cmlog, "APR MAD SIZE %d",
 461                     sizeof (ibcm_apr_msg_t));
 462                 IBTF_DPRINTF_L1(cmlog, "DREQ MAD SIZE %d",
 463                     sizeof (ibcm_dreq_msg_t));
 464                 IBTF_DPRINTF_L1(cmlog, "DREP MAD SIZE %d",
 465                     sizeof (ibcm_drep_msg_t));
 466                 IBTF_DPRINTF_L1(cmlog, "SIDR REQ MAD SIZE %d",
 467                     sizeof (ibcm_sidr_req_msg_t));
 468                 IBTF_DPRINTF_L1(cmlog, "SIDR REP MAD SIZE %d",
 469                     sizeof (ibcm_sidr_rep_msg_t));
 470         }
 471 
 472 #endif
 473 
 474         /* Create all global locks within cm module */
 475         mutex_init(&ibcm_svc_info_lock, NULL, MUTEX_DEFAULT, NULL);
 476         mutex_init(&ibcm_mcglist_lock, NULL, MUTEX_DEFAULT, NULL);
 477         mutex_init(&ibcm_timeout_list_lock, NULL, MUTEX_DEFAULT, NULL);
 478         mutex_init(&ibcm_global_hca_lock, NULL, MUTEX_DEFAULT, NULL);
 479         mutex_init(&ibcm_sa_open_lock, NULL, MUTEX_DEFAULT, NULL);
 480         mutex_init(&ibcm_recv_mutex, NULL, MUTEX_DEFAULT, NULL);
 481         mutex_init(&ibcm_sm_notice_serialize_lock, NULL, MUTEX_DEFAULT, NULL);
 482         mutex_init(&ibcm_qp_list_lock, NULL, MUTEX_DEFAULT, NULL);
 483         mutex_init(&ibcm_trace_mutex, NULL, MUTEX_DEFAULT, NULL);
 484         mutex_init(&ibcm_trace_print_mutex, NULL, MUTEX_DEFAULT, NULL);
 485         cv_init(&ibcm_svc_info_cv, NULL, CV_DRIVER, NULL);
 486         cv_init(&ibcm_timeout_list_cv, NULL, CV_DRIVER, NULL);
 487         cv_init(&ibcm_timeout_thread_done_cv, NULL, CV_DRIVER, NULL);
 488         cv_init(&ibcm_global_hca_cv, NULL, CV_DRIVER, NULL);
 489         cv_init(&ibcm_sa_open_cv, NULL, CV_DRIVER, NULL);
 490         avl_create(&ibcm_svc_avl_tree, ibcm_svc_compare,
 491             sizeof (ibcm_svc_info_t),
 492             offsetof(struct ibcm_svc_info_s, svc_link));
 493 
 494         IBTF_DPRINTF_L5(cmlog, "ibcm_init_locks: done");
 495 }
 496 
 497 /* Destroys all global mutex and CV in cm module */
 498 static void
 499 ibcm_fini_locks()
 500 {
 501         /* Destroy all global locks within cm module */
 502         mutex_destroy(&ibcm_svc_info_lock);
 503         mutex_destroy(&ibcm_mcglist_lock);
 504         mutex_destroy(&ibcm_timeout_list_lock);
 505         mutex_destroy(&ibcm_global_hca_lock);
 506         mutex_destroy(&ibcm_sa_open_lock);
 507         mutex_destroy(&ibcm_recv_mutex);
 508         mutex_destroy(&ibcm_sm_notice_serialize_lock);
 509         mutex_destroy(&ibcm_qp_list_lock);
 510         mutex_destroy(&ibcm_trace_mutex);
 511         mutex_destroy(&ibcm_trace_print_mutex);
 512         cv_destroy(&ibcm_svc_info_cv);
 513         cv_destroy(&ibcm_timeout_list_cv);
 514         cv_destroy(&ibcm_timeout_thread_done_cv);
 515         cv_destroy(&ibcm_global_hca_cv);
 516         cv_destroy(&ibcm_sa_open_cv);
 517         avl_destroy(&ibcm_svc_avl_tree);
 518 
 519         IBTF_DPRINTF_L5(cmlog, "ibcm_fini_locks: done");
 520 }
 521 
 522 
 523 /* Initialize CM's classport info */
 524 static void
 525 ibcm_init_classportinfo()
 526 {
 527         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
 528 
 529         ibcm_clpinfo.BaseVersion = IBCM_MAD_BASE_VERSION;
 530         ibcm_clpinfo.ClassVersion = IBCM_MAD_CLASS_VERSION;
 531 
 532         /* For now, CM supports same capabilities at all ports */
 533         ibcm_clpinfo.CapabilityMask =
 534             h2b16(IBCM_CPINFO_CAP_RC | IBCM_CPINFO_CAP_SIDR);
 535 
 536         /* Bits 0-7 are all 0 for Communication Mgmt Class */
 537 
 538         /* For now, CM has the same respvalue at all ports */
 539         ibcm_clpinfo.RespTimeValue_plus =
 540             h2b32(ibt_usec2ib(ibcm_local_processing_time) & 0x1f);
 541 
 542         /* For now, redirect fields are set to 0 */
 543         /* Trap fields are not applicable to CM, hence set to 0 */
 544 
 545         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_clpinfo));
 546         IBTF_DPRINTF_L5(cmlog, "ibcm_init_classportinfo: done");
 547 }
 548 
 549 /*
 550  * ibcm_init():
 551  *      - call ibt_attach()
 552  *      - create AVL trees
 553  *      - Attach HCA handlers that are already present before
 554  *      CM got loaded.
 555  *
 556  * Arguments:   NONE
 557  *
 558  * Return values:
 559  *      IBCM_SUCCESS - success
 560  */
 561 static ibcm_status_t
 562 ibcm_init(void)
 563 {
 564         ibt_status_t    status;
 565         kthread_t       *t;
 566 
 567         IBTF_DPRINTF_L3(cmlog, "ibcm_init:");
 568 
 569         ibcm_init_classportinfo();
 570 
 571         if (ibcm_init_ids() != IBCM_SUCCESS) {
 572                 IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
 573                     "fatal error: vmem_create() failed");
 574                 return (IBCM_FAILURE);
 575         }
 576         ibcm_init_locks();
 577 
 578         if (ibcm_ar_init() != IBCM_SUCCESS) {
 579                 IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
 580                     "fatal error: ibcm_ar_init() failed");
 581                 ibcm_fini_ids();
 582                 ibcm_fini_locks();
 583                 return (IBCM_FAILURE);
 584         }
 585         ibcm_rc_flow_control_init();
 586 
 587         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_taskq))
 588         ibcm_taskq = system_taskq;
 589         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_taskq))
 590 
 591         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
 592         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
 593 
 594         /* Start the timeout list processing thread */
 595         ibcm_timeout_list_flags = 0;
 596         t = thread_create(NULL, 0, ibcm_process_tlist, 0, 0, &p0, TS_RUN,
 597             ibcm_timeout_thread_pri);
 598         ibcm_timeout_thread_did = t->t_did;
 599 
 600         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_flags))
 601         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_thread_did))
 602 
 603         /*
 604          * NOTE : if ibt_attach is done after ibcm_init_hcas, then some
 605          * HCA DR events may be lost. CM could call re-init hca list
 606          * again, but it is more complicated. Some HCA's DR's lost may
 607          * be HCA detach, which makes hca list re-syncing and locking more
 608          * complex
 609          */
 610         status = ibt_attach(&ibcm_ibt_modinfo, NULL, NULL, &ibcm_ibt_handle);
 611         if (status != IBT_SUCCESS) {
 612                 IBTF_DPRINTF_L2(cmlog, "ibcm_init(): ibt_attach failed %d",
 613                     status);
 614                 (void) ibcm_ar_fini();
 615                 ibcm_stop_timeout_thread();
 616                 ibcm_fini_ids();
 617                 ibcm_fini_locks();
 618                 ibcm_rc_flow_control_fini();
 619                 return (IBCM_FAILURE);
 620         }
 621 
 622         /* Block all HCA attach/detach asyncs */
 623         mutex_enter(&ibcm_global_hca_lock);
 624 
 625         ibcm_init_hcas();
 626         ibcm_finit_state = IBCM_FINIT_IDLE;
 627 
 628         ibcm_path_cache_init();
 629         /*
 630          * This callback will be used by IBTL to get the Node record for a
 631          * given LID via the speccified HCA and port.
 632          */
 633         ibtl_cm_set_node_info_cb(ibcm_ibtl_node_info);
 634 
 635         /* Unblock any waiting HCA DR asyncs in CM */
 636         mutex_exit(&ibcm_global_hca_lock);
 637 
 638         IBTF_DPRINTF_L4(cmlog, "ibcm_init: done");
 639         return (IBCM_SUCCESS);
 640 }
 641 
 642 /* Allocates and initializes the "per hca" global data in CM */
 643 static void
 644 ibcm_init_hcas()
 645 {
 646         uint_t  num_hcas = 0;
 647         ib_guid_t *guid_array;
 648         int i;
 649 
 650         IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas:");
 651 
 652         /* Get the number of HCAs */
 653         num_hcas = ibt_get_hca_list(&guid_array);
 654         IBTF_DPRINTF_L4(cmlog, "ibcm_init_hcas: ibt_get_hca_list() "
 655             "returned %d hcas", num_hcas);
 656 
 657         ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
 658 
 659         for (i = 0; i < num_hcas; i++)
 660                 ibcm_hca_attach(guid_array[i]);
 661 
 662         if (num_hcas)
 663                 ibt_free_hca_list(guid_array, num_hcas);
 664 
 665         IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas: done");
 666 }
 667 
 668 
 669 /*
 670  * ibcm_fini():
 671  *      - Deregister w/ ibt
 672  *      - Cleanup IBCM HCA listp
 673  *      - Destroy mutexes
 674  *
 675  * Arguments:   NONE
 676  *
 677  * Return values:
 678  *      IBCM_SUCCESS - success
 679  */
 680 static ibcm_status_t
 681 ibcm_fini(void)
 682 {
 683         ibt_status_t    status;
 684 
 685         IBTF_DPRINTF_L3(cmlog, "ibcm_fini:");
 686 
 687         /*
 688          * CM assumes that the all general clients got rid of all the
 689          * established connections and service registrations, completed all
 690          * pending SIDR operations before a call to ibcm_fini()
 691          */
 692 
 693         if (ibcm_ar_fini() != IBCM_SUCCESS) {
 694                 IBTF_DPRINTF_L2(cmlog, "ibcm_fini: ibcm_ar_fini failed");
 695                 return (IBCM_FAILURE);
 696         }
 697 
 698         /* cleanup the svcinfo list */
 699         mutex_enter(&ibcm_svc_info_lock);
 700         if (avl_first(&ibcm_svc_avl_tree) != NULL) {
 701                 IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
 702                     "ibcm_svc_avl_tree is not empty");
 703                 mutex_exit(&ibcm_svc_info_lock);
 704                 return (IBCM_FAILURE);
 705         }
 706         mutex_exit(&ibcm_svc_info_lock);
 707 
 708         /* disables any new hca attach/detaches */
 709         mutex_enter(&ibcm_global_hca_lock);
 710 
 711         ibcm_finit_state = IBCM_FINIT_BUSY;
 712 
 713         if (ibcm_fini_hcas() != IBCM_SUCCESS) {
 714                 IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
 715                     "some hca's still have client resources");
 716 
 717                 /* First, re-initialize the hcas */
 718                 ibcm_init_hcas();
 719                 /* and then enable the HCA asyncs */
 720                 ibcm_finit_state = IBCM_FINIT_IDLE;
 721                 mutex_exit(&ibcm_global_hca_lock);
 722                 if (ibcm_ar_init() != IBCM_SUCCESS) {
 723                         IBTF_DPRINTF_L1(cmlog, "ibcm_fini:ibcm_ar_init failed");
 724                 }
 725                 return (IBCM_FAILURE);
 726         }
 727 
 728         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
 729         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
 730 
 731         ASSERT(ibcm_timeout_list_hdr == NULL);
 732         ASSERT(ibcm_ud_timeout_list_hdr == NULL);
 733 
 734         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_timeout_list_hdr))
 735         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibcm_ud_timeout_list_hdr))
 736 
 737         /* Release any pending asyncs on ibcm_global_hca_lock */
 738         ibcm_finit_state = IBCM_FINIT_SUCCESS;
 739         mutex_exit(&ibcm_global_hca_lock);
 740 
 741         ibcm_stop_timeout_thread();
 742 
 743         ibtl_cm_set_node_info_cb(NULL);
 744         /*
 745          * Detach from IBTL. Waits until all pending asyncs are complete.
 746          * Above cv_broadcast wakes up any waiting hca attach/detach asyncs
 747          */
 748         status = ibt_detach(ibcm_ibt_handle);
 749 
 750         /* if detach fails, CM didn't free up some resources, so assert */
 751         if (status != IBT_SUCCESS)
 752                 IBTF_DPRINTF_L1(cmlog, "ibcm_fini: ibt_detach failed %d",
 753                     status);
 754 
 755         ibcm_rc_flow_control_fini();
 756 
 757         ibcm_path_cache_fini();
 758 
 759         ibcm_fini_ids();
 760         ibcm_fini_locks();
 761         IBTF_DPRINTF_L3(cmlog, "ibcm_fini: done");
 762         return (IBCM_SUCCESS);
 763 }
 764 
 765 /* This routine exit's the ibcm timeout thread  */
 766 static void
 767 ibcm_stop_timeout_thread()
 768 {
 769         mutex_enter(&ibcm_timeout_list_lock);
 770 
 771         /* Stop the timeout list processing thread */
 772         ibcm_timeout_list_flags =
 773             ibcm_timeout_list_flags | IBCM_TIMEOUT_THREAD_EXIT;
 774 
 775         /* Wake up, if the timeout thread is on a cv_wait */
 776         cv_signal(&ibcm_timeout_list_cv);
 777 
 778         mutex_exit(&ibcm_timeout_list_lock);
 779         thread_join(ibcm_timeout_thread_did);
 780 
 781         IBTF_DPRINTF_L5(cmlog, "ibcm_stop_timeout_thread: done");
 782 }
 783 
 784 
 785 /* Attempts to release all the hca's associated with CM */
 786 static ibcm_status_t
 787 ibcm_fini_hcas()
 788 {
 789         ibcm_hca_info_t *hcap, *next;
 790 
 791         IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas:");
 792 
 793         ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
 794 
 795         hcap = ibcm_hca_listp;
 796         while (hcap != NULL) {
 797                 next = hcap->hca_next;
 798                 if (ibcm_hca_detach(hcap) != IBCM_SUCCESS) {
 799                         ibcm_hca_listp = hcap;
 800                         return (IBCM_FAILURE);
 801                 }
 802                 hcap = next;
 803         }
 804 
 805         IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas: SUCCEEDED");
 806         return (IBCM_SUCCESS);
 807 }
 808 
 809 
 810 /*
 811  * ibcm_hca_attach():
 812  *      Called as an asynchronous event to notify CM of an attach of HCA.
 813  *      Here ibcm_hca_info_t is initialized and all fields are
 814  *      filled in along with SA Access handles and IBMA handles.
 815  *      Also called from ibcm_init to initialize ibcm_hca_info_t's for each
 816  *      hca's
 817  *
 818  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
 819  *      hca_guid        - HCA's guid
 820  *
 821  * Return values: NONE
 822  */
 823 static void
 824 ibcm_hca_attach(ib_guid_t hcaguid)
 825 {
 826         int                     i;
 827         ibt_status_t            status;
 828         uint8_t                 nports = 0;
 829         ibcm_hca_info_t         *hcap;
 830         ibt_hca_attr_t          hca_attrs;
 831 
 832         IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: guid = 0x%llX", hcaguid);
 833 
 834         ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
 835 
 836         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*hcap))
 837 
 838         status = ibt_query_hca_byguid(hcaguid, &hca_attrs);
 839         if (status != IBT_SUCCESS) {
 840                 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
 841                     "ibt_query_hca_byguid failed = %d", status);
 842                 return;
 843         }
 844         nports = hca_attrs.hca_nports;
 845 
 846         IBTF_DPRINTF_L4(cmlog, "ibcm_hca_attach: num ports = %x", nports);
 847 
 848         if ((hcap = ibcm_add_hca_entry(hcaguid, nports)) == NULL)
 849                 return;
 850 
 851         hcap->hca_guid = hcaguid;    /* Set GUID */
 852         hcap->hca_num_ports = nports;        /* Set number of ports */
 853 
 854         if (ibcm_init_hca_ids(hcap) != IBCM_SUCCESS) {
 855                 ibcm_delete_hca_entry(hcap);
 856                 return;
 857         }
 858 
 859         /* Store the static hca attribute data */
 860         hcap->hca_caps = hca_attrs.hca_flags;
 861         hcap->hca_vendor_id = hca_attrs.hca_vendor_id;
 862         hcap->hca_device_id = hca_attrs.hca_device_id;
 863         hcap->hca_ack_delay = hca_attrs.hca_local_ack_delay;
 864         hcap->hca_max_rdma_in_qp = hca_attrs.hca_max_rdma_in_qp;
 865         hcap->hca_max_rdma_out_qp = hca_attrs.hca_max_rdma_out_qp;
 866 
 867         /* loop thru nports and initialize IBMF handles */
 868         for (i = 0; i < hcap->hca_num_ports; i++) {
 869                 status = ibt_get_port_state_byguid(hcaguid, i + 1, NULL, NULL);
 870                 if (status != IBT_SUCCESS) {
 871                         IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
 872                             "port_num %d state DOWN", i + 1);
 873                 }
 874 
 875                 hcap->hca_port_info[i].port_hcap = hcap;
 876                 hcap->hca_port_info[i].port_num = i+1;
 877 
 878                 if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
 879                         IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
 880                             "ibcm_hca_init_port failed %d port_num %d",
 881                             status, i+1);
 882         }
 883 
 884         /* create the "active" CM AVL tree */
 885         avl_create(&hcap->hca_active_tree, ibcm_active_node_compare,
 886             sizeof (ibcm_state_data_t),
 887             offsetof(struct ibcm_state_data_s, avl_active_link));
 888 
 889         /* create the "passive" CM AVL tree */
 890         avl_create(&hcap->hca_passive_tree, ibcm_passive_node_compare,
 891             sizeof (ibcm_state_data_t),
 892             offsetof(struct ibcm_state_data_s, avl_passive_link));
 893 
 894         /* create the "passive comid" CM AVL tree */
 895         avl_create(&hcap->hca_passive_comid_tree,
 896             ibcm_passive_comid_node_compare,
 897             sizeof (ibcm_state_data_t),
 898             offsetof(struct ibcm_state_data_s, avl_passive_comid_link));
 899 
 900         /*
 901          * Mark the state of the HCA to "attach" only at the end
 902          * Now CM starts accepting incoming MADs and client API calls
 903          */
 904         hcap->hca_state = IBCM_HCA_ACTIVE;
 905 
 906         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*hcap))
 907 
 908         IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: ATTACH Done");
 909 }
 910 
 911 /*
 912  * ibcm_hca_detach():
 913  *      Called as an asynchronous event to notify CM of a detach of HCA.
 914  *      Here ibcm_hca_info_t is freed up and all fields that
 915  *      were initialized earlier are cleaned up
 916  *
 917  * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
 918  *      hca_guid    - HCA's guid
 919  *
 920  * Return values:
 921  *      IBCM_SUCCESS    - able to detach HCA
 922  *      IBCM_FAILURE    - failed to detach HCA
 923  */
 924 static ibcm_status_t
 925 ibcm_hca_detach(ibcm_hca_info_t *hcap)
 926 {
 927         int             port_index, i;
 928         ibcm_status_t   status = IBCM_SUCCESS;
 929         clock_t         absolute_time;
 930 
 931         IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: hcap = 0x%p guid = 0x%llX",
 932             hcap, hcap->hca_guid);
 933 
 934         ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
 935 
 936         /*
 937          * Declare hca is going away to all CM clients. Wait until the
 938          * access count becomes zero.
 939          */
 940         hcap->hca_state = IBCM_HCA_NOT_ACTIVE;
 941 
 942         /* wait on response CV */
 943         absolute_time = ddi_get_lbolt() +
 944             drv_usectohz(ibcm_wait_for_acc_cnt_timeout);
 945 
 946         while (hcap->hca_acc_cnt > 0)
 947                 if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
 948                     absolute_time) == -1)
 949                         break;
 950 
 951         if (hcap->hca_acc_cnt != 0) {
 952                 /* We got a timeout */
 953                 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
 954                     " to timeout on hca_acc_cnt %u, \n Some CM Clients are "
 955                     "still active, looks like we need to wait some more time "
 956                     "(ibcm_wait_for_acc_cnt_timeout).", hcap->hca_acc_cnt);
 957                 hcap->hca_state = IBCM_HCA_ACTIVE;
 958                 return (IBCM_FAILURE);
 959         }
 960 
 961         /*
 962          * First make sure, there are no active users of ibma handles,
 963          * and then de-register handles.
 964          */
 965 
 966         /* make sure that there are no "Service"s registered w/ this HCA. */
 967         if (hcap->hca_svc_cnt != 0) {
 968                 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
 969                     "Active services still there %d", hcap->hca_svc_cnt);
 970                 hcap->hca_state = IBCM_HCA_ACTIVE;
 971                 return (IBCM_FAILURE);
 972         }
 973 
 974         if (ibcm_check_sidr_clean(hcap) != IBCM_SUCCESS) {
 975                 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach:"
 976                     "There are active SIDR operations");
 977                 hcap->hca_state = IBCM_HCA_ACTIVE;
 978                 return (IBCM_FAILURE);
 979         }
 980 
 981         if (ibcm_check_avl_clean(hcap) != IBCM_SUCCESS) {
 982                 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
 983                     "There are active RC connections");
 984                 hcap->hca_state = IBCM_HCA_ACTIVE;
 985                 return (IBCM_FAILURE);
 986         }
 987 
 988         /*
 989          * Now, wait until all rc and sidr stateps go away
 990          * All these stateps must be short lived ones, waiting to be cleaned
 991          * up after some timeout value, based on the current state.
 992          */
 993         IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach:hca_guid = 0x%llX res_cnt = %d",
 994             hcap->hca_guid, hcap->hca_res_cnt);
 995 
 996         while (hcap->hca_res_cnt > 0)
 997                 cv_wait(&ibcm_global_hca_cv, &ibcm_global_hca_lock);
 998 
 999         /* Re-assert the while loop step above */
1000         ASSERT(hcap->hca_sidr_list == NULL);
1001         avl_destroy(&hcap->hca_active_tree);
1002         avl_destroy(&hcap->hca_passive_tree);
1003         avl_destroy(&hcap->hca_passive_comid_tree);
1004 
1005         /*
1006          * Unregister all ports from IBMA
1007          * If there is a failure, re-initialize any free'd ibma handles. This
1008          * is required to receive the incoming mads
1009          */
1010         status = IBCM_SUCCESS;
1011         for (port_index = 0; port_index < hcap->hca_num_ports; port_index++) {
1012                 if ((status = ibcm_hca_fini_port(hcap, port_index)) !=
1013                     IBCM_SUCCESS) {
1014                         IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
1015                             "Failed to free IBMA Handle for port_num %d",
1016                             port_index + 1);
1017                         break;
1018                 }
1019         }
1020 
1021         /* If detach fails, re-initialize ibma handles for incoming mads */
1022         if (status != IBCM_SUCCESS)  {
1023                 for (i = 0; i < port_index; i++) {
1024                         if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
1025                                 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
1026                                     "Failed to re-allocate IBMA Handles for"
1027                                     " port_num %d", port_index + 1);
1028                 }
1029                 hcap->hca_state = IBCM_HCA_ACTIVE;
1030                 return (IBCM_FAILURE);
1031         }
1032 
1033         ibcm_fini_hca_ids(hcap);
1034         ibcm_delete_hca_entry(hcap);
1035 
1036         IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: DETACH succeeded");
1037         return (IBCM_SUCCESS);
1038 }
1039 
1040 /* Checks, if there are any active sidr state entries in the specified hca */
1041 static ibcm_status_t
1042 ibcm_check_sidr_clean(ibcm_hca_info_t *hcap)
1043 {
1044         ibcm_ud_state_data_t    *usp;
1045         uint32_t                transient_cnt = 0;
1046 
1047         IBTF_DPRINTF_L5(cmlog, "ibcm_check_sidr_clean:");
1048 
1049         rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
1050         usp = hcap->hca_sidr_list;   /* Point to the list */
1051         while (usp != NULL) {
1052                 mutex_enter(&usp->ud_state_mutex);
1053                 if ((usp->ud_state != IBCM_STATE_SIDR_REP_SENT) &&
1054                     (usp->ud_state != IBCM_STATE_TIMED_OUT) &&
1055                     (usp->ud_state != IBCM_STATE_DELETE)) {
1056 
1057                         IBTF_DPRINTF_L3(cmlog, "ibcm_check_sidr_clean:"
1058                             "usp = %p not in transient state = %d", usp,
1059                             usp->ud_state);
1060 
1061                         mutex_exit(&usp->ud_state_mutex);
1062                         rw_exit(&hcap->hca_sidr_list_lock);
1063                         return (IBCM_FAILURE);
1064                 } else {
1065                         mutex_exit(&usp->ud_state_mutex);
1066                         ++transient_cnt;
1067                 }
1068 
1069                 usp = usp->ud_nextp;
1070         }
1071         rw_exit(&hcap->hca_sidr_list_lock);
1072 
1073         IBTF_DPRINTF_L4(cmlog, "ibcm_check_sidr_clean: transient_cnt %d",
1074             transient_cnt);
1075 
1076         return (IBCM_SUCCESS);
1077 }
1078 
1079 /* Checks, if there are any active rc state entries, in the specified hca */
1080 static ibcm_status_t
1081 ibcm_check_avl_clean(ibcm_hca_info_t *hcap)
1082 
1083 {
1084         ibcm_state_data_t       *sp;
1085         avl_tree_t              *avl_tree;
1086         uint32_t                transient_cnt = 0;
1087 
1088         IBTF_DPRINTF_L5(cmlog, "ibcm_check_avl_clean:");
1089         /*
1090          * Both the trees ie., active and passive must reference to all
1091          * statep's, so let's use one
1092          */
1093         avl_tree = &hcap->hca_active_tree;
1094 
1095         rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
1096 
1097         for (sp = avl_first(avl_tree); sp != NULL;
1098             sp = avl_walk(avl_tree, sp, AVL_AFTER)) {
1099                 mutex_enter(&sp->state_mutex);
1100                 if ((sp->state != IBCM_STATE_TIMEWAIT) &&
1101                     (sp->state != IBCM_STATE_REJ_SENT) &&
1102                     (sp->state != IBCM_STATE_DELETE)) {
1103                         IBTF_DPRINTF_L3(cmlog, "ibcm_check_avl_clean: "
1104                             "sp = %p not in transient state = %d", sp,
1105                             sp->state);
1106                         mutex_exit(&sp->state_mutex);
1107                         rw_exit(&hcap->hca_state_rwlock);
1108                         return (IBCM_FAILURE);
1109                 } else {
1110                         mutex_exit(&sp->state_mutex);
1111                         ++transient_cnt;
1112                 }
1113         }
1114 
1115         rw_exit(&hcap->hca_state_rwlock);
1116 
1117         IBTF_DPRINTF_L4(cmlog, "ibcm_check_avl_clean: transient_cnt %d",
1118             transient_cnt);
1119 
1120         return (IBCM_SUCCESS);
1121 }
1122 
1123 /* Adds a new entry into CM's global hca list, if hca_guid is not there yet */
1124 static ibcm_hca_info_t *
1125 ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports)
1126 {
1127         ibcm_hca_info_t *hcap;
1128 
1129         IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: guid = 0x%llX",
1130             hcaguid);
1131 
1132         ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1133 
1134         /*
1135          * Check if this hca_guid already in the list
1136          * If yes, then ignore this and return NULL
1137          */
1138 
1139         hcap = ibcm_hca_listp;
1140 
1141         /* search for this HCA */
1142         while (hcap != NULL) {
1143                 if (hcap->hca_guid == hcaguid) {
1144                         /* already exists */
1145                         IBTF_DPRINTF_L2(cmlog, "ibcm_add_hca_entry: "
1146                             "hcap %p guid 0x%llX, entry already exists !!",
1147                             hcap, hcap->hca_guid);
1148                         return (NULL);
1149                 }
1150                 hcap = hcap->hca_next;
1151         }
1152 
1153         /* Allocate storage for the new HCA entry found */
1154         hcap = kmem_zalloc(sizeof (ibcm_hca_info_t) +
1155             (nports - 1) * sizeof (ibcm_port_info_t), KM_SLEEP);
1156 
1157         /* initialize RW lock */
1158         rw_init(&hcap->hca_state_rwlock, NULL, RW_DRIVER, NULL);
1159         /* initialize SIDR list lock */
1160         rw_init(&hcap->hca_sidr_list_lock, NULL, RW_DRIVER, NULL);
1161         /* Insert "hcap" into the global HCA list maintained by CM */
1162         hcap->hca_next = ibcm_hca_listp;
1163         ibcm_hca_listp = hcap;
1164 
1165         IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: done hcap = 0x%p", hcap);
1166 
1167         return (hcap);
1168 
1169 }
1170 
1171 /* deletes the given ibcm_hca_info_t from CM's global hca list */
1172 void
1173 ibcm_delete_hca_entry(ibcm_hca_info_t *hcap)
1174 {
1175         ibcm_hca_info_t *headp, *prevp = NULL;
1176 
1177         /* ibcm_hca_global_lock is held */
1178         IBTF_DPRINTF_L5(cmlog, "ibcm_delete_hca_entry: guid = 0x%llX "
1179             "hcap = 0x%p", hcap->hca_guid, hcap);
1180 
1181         ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1182 
1183         headp = ibcm_hca_listp;
1184         while (headp != NULL) {
1185                 if (headp == hcap) {
1186                         IBTF_DPRINTF_L3(cmlog, "ibcm_delete_hca_entry: "
1187                             "deleting hcap %p hcaguid %llX", hcap,
1188                             hcap->hca_guid);
1189                         if (prevp) {
1190                                 prevp->hca_next = headp->hca_next;
1191                         } else {
1192                                 prevp = headp->hca_next;
1193                                 ibcm_hca_listp = prevp;
1194                         }
1195                         rw_destroy(&hcap->hca_state_rwlock);
1196                         rw_destroy(&hcap->hca_sidr_list_lock);
1197                         kmem_free(hcap, sizeof (ibcm_hca_info_t) +
1198                             (hcap->hca_num_ports - 1) *
1199                             sizeof (ibcm_port_info_t));
1200                         return;
1201                 }
1202 
1203                 prevp = headp;
1204                 headp = headp->hca_next;
1205         }
1206 }
1207 
1208 /*
1209  * ibcm_find_hca_entry:
1210  *      Given a HCA's GUID find out ibcm_hca_info_t entry for that HCA
1211  *      This entry can be then used to access AVL tree/SIDR list etc.
1212  *      If entry exists and in HCA ATTACH state, then hca's ref cnt is
1213  *      incremented and entry returned. Else NULL returned.
1214  *
1215  *      All functions that use ibcm_find_hca_entry and get a non-NULL
1216  *      return values must call ibcm_dec_hca_acc_cnt to decrement the
1217  *      respective hca ref cnt. There shouldn't be any usage of
1218  *      ibcm_hca_info_t * returned from ibcm_find_hca_entry,
1219  *      after decrementing the hca_acc_cnt
1220  *
1221  * INPUTS:
1222  *      hca_guid        - HCA's guid
1223  *
1224  * RETURN VALUE:
1225  *      hcap            - if a match is found, else NULL
1226  */
1227 ibcm_hca_info_t *
1228 ibcm_find_hca_entry(ib_guid_t hca_guid)
1229 {
1230         ibcm_hca_info_t *hcap;
1231 
1232         IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: guid = 0x%llX", hca_guid);
1233 
1234         mutex_enter(&ibcm_global_hca_lock);
1235 
1236         hcap = ibcm_hca_listp;
1237         /* search for this HCA */
1238         while (hcap != NULL) {
1239                 if (hcap->hca_guid == hca_guid)
1240                         break;
1241                 hcap = hcap->hca_next;
1242         }
1243 
1244         /* if no hcap for the hca_guid, return NULL */
1245         if (hcap == NULL) {
1246                 mutex_exit(&ibcm_global_hca_lock);
1247                 return (NULL);
1248         }
1249 
1250         /* return hcap, only if it valid to use */
1251         if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1252                 ++(hcap->hca_acc_cnt);
1253 
1254                 IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: "
1255                     "found hcap = 0x%p hca_acc_cnt %u", hcap,
1256                     hcap->hca_acc_cnt);
1257 
1258                 mutex_exit(&ibcm_global_hca_lock);
1259                 return (hcap);
1260         } else {
1261                 mutex_exit(&ibcm_global_hca_lock);
1262 
1263                 IBTF_DPRINTF_L2(cmlog, "ibcm_find_hca_entry: "
1264                     "found hcap = 0x%p not in active state", hcap);
1265                 return (NULL);
1266         }
1267 }
1268 
1269 /*
1270  * Searches for ibcm_hca_info_t entry based on hca_guid, but doesn't increment
1271  * the hca's reference count. This function is used, where the calling context
1272  * is attempting to delete hcap itself and hence acc_cnt cannot be incremented
1273  * OR assumes that valid hcap must be available in ibcm's global hca list.
1274  */
1275 ibcm_hca_info_t *
1276 ibcm_find_hcap_entry(ib_guid_t hca_guid)
1277 {
1278         ibcm_hca_info_t *hcap;
1279 
1280         IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: guid = 0x%llX", hca_guid);
1281 
1282         ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1283 
1284         hcap = ibcm_hca_listp;
1285         /* search for this HCA */
1286         while (hcap != NULL) {
1287                 if (hcap->hca_guid == hca_guid)
1288                         break;
1289                 hcap = hcap->hca_next;
1290         }
1291 
1292         if (hcap == NULL)
1293                 IBTF_DPRINTF_L2(cmlog, "ibcm_find_hcap_entry: No hcap found for"
1294                     " hca_guid 0x%llX", hca_guid);
1295         else
1296                 IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: hcap found for"
1297                     " hca_guid 0x%llX", hca_guid);
1298 
1299         return (hcap);
1300 }
1301 
1302 /* increment the hca's temporary reference count */
1303 ibcm_status_t
1304 ibcm_inc_hca_acc_cnt(ibcm_hca_info_t *hcap)
1305 {
1306         mutex_enter(&ibcm_global_hca_lock);
1307         if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1308                 ++(hcap->hca_acc_cnt);
1309                 IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_acc_cnt: "
1310                     "hcap = 0x%p  acc_cnt = %d ", hcap, hcap->hca_acc_cnt);
1311                 mutex_exit(&ibcm_global_hca_lock);
1312                 return (IBCM_SUCCESS);
1313         } else {
1314                 IBTF_DPRINTF_L2(cmlog, "ibcm_inc_hca_acc_cnt: "
1315                     "hcap INACTIVE 0x%p  acc_cnt = %d ", hcap,
1316                     hcap->hca_acc_cnt);
1317                 mutex_exit(&ibcm_global_hca_lock);
1318                 return (IBCM_FAILURE);
1319         }
1320 }
1321 
1322 /* decrement the hca's ref count, and wake up any waiting threads */
1323 void
1324 ibcm_dec_hca_acc_cnt(ibcm_hca_info_t *hcap)
1325 {
1326         mutex_enter(&ibcm_global_hca_lock);
1327         ASSERT(hcap->hca_acc_cnt > 0);
1328         --(hcap->hca_acc_cnt);
1329         IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_acc_cnt: hcap = 0x%p "
1330             "acc_cnt = %d", hcap, hcap->hca_acc_cnt);
1331         if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1332             (hcap->hca_acc_cnt == 0)) {
1333                 IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_acc_cnt: "
1334                     "cv_broadcast for hcap = 0x%p", hcap);
1335                 cv_broadcast(&ibcm_global_hca_cv);
1336         }
1337         mutex_exit(&ibcm_global_hca_lock);
1338 }
1339 
1340 /* increment the hca's resource count */
1341 void
1342 ibcm_inc_hca_res_cnt(ibcm_hca_info_t *hcap)
1343 
1344 {
1345         mutex_enter(&ibcm_global_hca_lock);
1346         ++(hcap->hca_res_cnt);
1347         IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_res_cnt: hcap = 0x%p "
1348             "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1349         mutex_exit(&ibcm_global_hca_lock);
1350 }
1351 
1352 /* decrement the hca's resource count, and wake up any waiting threads */
1353 void
1354 ibcm_dec_hca_res_cnt(ibcm_hca_info_t *hcap)
1355 {
1356         mutex_enter(&ibcm_global_hca_lock);
1357         ASSERT(hcap->hca_res_cnt > 0);
1358         --(hcap->hca_res_cnt);
1359         IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_res_cnt: hcap = 0x%p "
1360             "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1361         if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1362             (hcap->hca_res_cnt == 0)) {
1363                 IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_res_cnt: "
1364                     "cv_broadcast for hcap = 0x%p", hcap);
1365                 cv_broadcast(&ibcm_global_hca_cv);
1366         }
1367         mutex_exit(&ibcm_global_hca_lock);
1368 }
1369 
1370 /* increment the hca's service count */
1371 void
1372 ibcm_inc_hca_svc_cnt(ibcm_hca_info_t *hcap)
1373 
1374 {
1375         mutex_enter(&ibcm_global_hca_lock);
1376         ++(hcap->hca_svc_cnt);
1377         IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_svc_cnt: hcap = 0x%p "
1378             "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1379         mutex_exit(&ibcm_global_hca_lock);
1380 }
1381 
1382 /* decrement the hca's service count */
1383 void
1384 ibcm_dec_hca_svc_cnt(ibcm_hca_info_t *hcap)
1385 {
1386         mutex_enter(&ibcm_global_hca_lock);
1387         ASSERT(hcap->hca_svc_cnt > 0);
1388         --(hcap->hca_svc_cnt);
1389         IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_svc_cnt: hcap = 0x%p "
1390             "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1391         mutex_exit(&ibcm_global_hca_lock);
1392 }
1393 
1394 /*
1395  * The following code manages three classes of requests that CM makes to
1396  * the fabric.  Those three classes are SA_ACCESS, REQ/REP/RTU, and DREQ/DREP.
1397  * The main issue is that the fabric can become very busy, and the CM
1398  * protocols rely on responses being made based on a predefined timeout
1399  * value.  By managing how many simultaneous sessions are allowed, there
1400  * is observed extremely high reliability of CM protocol succeeding when
1401  * it should.
1402  *
1403  * SA_ACCESS and DREQ/DREP are managed at the thread level, whereby the
1404  * thread blocks until there are less than some number of threads doing
1405  * similar requests.
1406  *
1407  * REQ/REP/RTU requests beyond a given limit are added to a list,
1408  * allowing the thread to return immediately to its caller in the
1409  * case where the "mode" is IBT_NONBLOCKING.  This is the mode used
1410  * by uDAPL and seems to be an important feature/behavior.
1411  */
1412 
1413 static int
1414 ibcm_ok_to_start(struct ibcm_open_s *openp)
1415 {
1416         return (openp->sends < openp->sends_hiwat &&
1417             openp->in_progress < openp->in_progress_max);
1418 }
1419 
1420 void
1421 ibcm_open_done(ibcm_state_data_t *statep)
1422 {
1423         int run;
1424         ibcm_state_data_t **linkp, *tmp;
1425 
1426         ASSERT(MUTEX_HELD(&statep->state_mutex));
1427         if (statep->open_flow == 1) {
1428                 statep->open_flow = 0;
1429                 mutex_enter(&ibcm_open.mutex);
1430                 if (statep->open_link == NULL) {
1431                         ibcm_open.in_progress--;
1432                         run = ibcm_ok_to_start(&ibcm_open);
1433                 } else {
1434                         ibcm_open.queued--;
1435                         linkp = &ibcm_open.head.open_link;
1436                         while (*linkp != statep)
1437                                 linkp = &((*linkp)->open_link);
1438                         *linkp = statep->open_link;
1439                         statep->open_link = NULL;
1440                         /*
1441                          * If we remove what tail pointed to, we need
1442                          * to reassign tail (it is never NULL).
1443                          * tail points to head for the empty list.
1444                          */
1445                         if (ibcm_open.tail == statep) {
1446                                 tmp = &ibcm_open.head;
1447                                 while (tmp->open_link != &ibcm_open.head)
1448                                         tmp = tmp->open_link;
1449                                 ibcm_open.tail = tmp;
1450                         }
1451                         run = 0;
1452                 }
1453                 mutex_exit(&ibcm_open.mutex);
1454                 if (run)
1455                         ibcm_run_tlist_thread();
1456         }
1457 }
1458 
1459 /* dtrace */
1460 void
1461 ibcm_open_wait(hrtime_t delta)
1462 {
1463         if (delta > 1000000)
1464                 IBTF_DPRINTF_L2(cmlog, "ibcm_open_wait: flow more %lld", delta);
1465 }
1466 
1467 void
1468 ibcm_open_start(ibcm_state_data_t *statep)
1469 {
1470         ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_REQ);
1471 
1472         mutex_enter(&statep->state_mutex);
1473         ibcm_open_wait(gethrtime() - statep->post_time);
1474         mutex_exit(&statep->state_mutex);
1475 
1476         ibcm_post_rc_mad(statep, statep->stored_msg, ibcm_post_req_complete,
1477             statep);
1478 
1479         mutex_enter(&statep->state_mutex);
1480         IBCM_REF_CNT_DECR(statep);
1481         mutex_exit(&statep->state_mutex);
1482 }
1483 
1484 void
1485 ibcm_open_enqueue(ibcm_state_data_t *statep)
1486 {
1487         int run;
1488 
1489         mutex_enter(&statep->state_mutex);
1490         statep->post_time = gethrtime();
1491         mutex_exit(&statep->state_mutex);
1492         mutex_enter(&ibcm_open.mutex);
1493         if (ibcm_open.queued == 0 && ibcm_ok_to_start(&ibcm_open)) {
1494                 ibcm_open.in_progress++;
1495                 mutex_exit(&ibcm_open.mutex);
1496                 ibcm_open_start(statep);
1497         } else {
1498                 ibcm_open.queued++;
1499                 statep->open_link = &ibcm_open.head;
1500                 ibcm_open.tail->open_link = statep;
1501                 ibcm_open.tail = statep;
1502                 run = ibcm_ok_to_start(&ibcm_open);
1503                 mutex_exit(&ibcm_open.mutex);
1504                 if (run)
1505                         ibcm_run_tlist_thread();
1506         }
1507 }
1508 
1509 ibcm_state_data_t *
1510 ibcm_open_dequeue(void)
1511 {
1512         ibcm_state_data_t *statep;
1513 
1514         ASSERT(MUTEX_HELD(&ibcm_open.mutex));
1515         ibcm_open.queued--;
1516         ibcm_open.in_progress++;
1517         statep = ibcm_open.head.open_link;
1518         ibcm_open.head.open_link = statep->open_link;
1519         statep->open_link = NULL;
1520         /*
1521          * If we remove what tail pointed to, we need
1522          * to reassign tail (it is never NULL).
1523          * tail points to head for the empty list.
1524          */
1525         if (ibcm_open.tail == statep)
1526                 ibcm_open.tail = &ibcm_open.head;
1527         return (statep);
1528 }
1529 
1530 void
1531 ibcm_check_for_opens(void)
1532 {
1533         ibcm_state_data_t       *statep;
1534 
1535         mutex_enter(&ibcm_open.mutex);
1536 
1537         while (ibcm_open.queued > 0) {
1538                 if (ibcm_ok_to_start(&ibcm_open)) {
1539                         statep = ibcm_open_dequeue();
1540                         mutex_exit(&ibcm_open.mutex);
1541 
1542                         ibcm_open_start(statep);
1543 
1544                         mutex_enter(&ibcm_open.mutex);
1545                 } else {
1546                         break;
1547                 }
1548         }
1549         mutex_exit(&ibcm_open.mutex);
1550 }
1551 
1552 
1553 static void
1554 ibcm_flow_init(ibcm_flow_t *flow, uint_t simul_max)
1555 {
1556         flow->list                   = NULL;
1557         flow->simul                  = 0;
1558         flow->waiters_per_chunk              = 4;
1559         flow->simul_max                      = simul_max;
1560         flow->lowat                  = simul_max - flow->waiters_per_chunk;
1561         flow->lowat_default          = flow->lowat;
1562         /* stats */
1563         flow->total                  = 0;
1564 }
1565 
1566 static void
1567 ibcm_rc_flow_control_init(void)
1568 {
1569         mutex_init(&ibcm_open.mutex, NULL, MUTEX_DEFAULT, NULL);
1570         mutex_enter(&ibcm_open.mutex);
1571         ibcm_flow_init(&ibcm_close_flow, ibcm_close_simul_max);
1572         ibcm_flow_init(&ibcm_lapr_flow, ibcm_lapr_simul_max);
1573         ibcm_flow_init(&ibcm_saa_flow, ibcm_saa_simul_max);
1574 
1575         ibcm_open.queued                = 0;
1576         ibcm_open.exit_deferred         = 0;
1577         ibcm_open.in_progress           = 0;
1578         ibcm_open.in_progress_max       = 16;
1579         ibcm_open.sends                 = 0;
1580         ibcm_open.sends_max             = 0;
1581         ibcm_open.sends_lowat           = 8;
1582         ibcm_open.sends_hiwat           = 16;
1583         ibcm_open.tail                  = &ibcm_open.head;
1584         ibcm_open.head.open_link        = NULL;
1585         mutex_exit(&ibcm_open.mutex);
1586 
1587         mutex_init(&ibcm_close.mutex, NULL, MUTEX_DEFAULT, NULL);
1588         mutex_enter(&ibcm_close.mutex);
1589         ibcm_close.tail                 = &ibcm_close.head;
1590         ibcm_close.head.close_link      = NULL;
1591         mutex_exit(&ibcm_close.mutex);
1592 }
1593 
1594 static void
1595 ibcm_rc_flow_control_fini(void)
1596 {
1597         mutex_destroy(&ibcm_open.mutex);
1598         mutex_destroy(&ibcm_close.mutex);
1599 }
1600 
1601 static ibcm_flow1_t *
1602 ibcm_flow_find(ibcm_flow_t *flow)
1603 {
1604         ibcm_flow1_t *flow1;
1605         ibcm_flow1_t *f;
1606 
1607         f = flow->list;
1608         if (f) {        /* most likely code path */
1609                 while (f->link != NULL)
1610                         f = f->link;
1611                 if (f->waiters < flow->waiters_per_chunk)
1612                         return (f);
1613         }
1614 
1615         /* There was no flow1 list element ready for another waiter */
1616         mutex_exit(&ibcm_open.mutex);
1617         flow1 = kmem_alloc(sizeof (*flow1), KM_SLEEP);
1618         mutex_enter(&ibcm_open.mutex);
1619 
1620         f = flow->list;
1621         if (f) {
1622                 while (f->link != NULL)
1623                         f = f->link;
1624                 if (f->waiters < flow->waiters_per_chunk) {
1625                         kmem_free(flow1, sizeof (*flow1));
1626                         return (f);
1627                 }
1628                 f->link = flow1;
1629         } else {
1630                 flow->list = flow1;
1631         }
1632         cv_init(&flow1->cv, NULL, CV_DRIVER, NULL);
1633         flow1->waiters = 0;
1634         flow1->link = NULL;
1635         return (flow1);
1636 }
1637 
1638 static void
1639 ibcm_flow_enter(ibcm_flow_t *flow)
1640 {
1641         mutex_enter(&ibcm_open.mutex);
1642         if (flow->list == NULL && flow->simul < flow->simul_max) {
1643                 flow->simul++;
1644                 flow->total++;
1645                 mutex_exit(&ibcm_open.mutex);
1646         } else {
1647                 ibcm_flow1_t *flow1;
1648 
1649                 flow1 = ibcm_flow_find(flow);
1650                 flow1->waiters++;
1651                 cv_wait(&flow1->cv, &ibcm_open.mutex);
1652                 if (--flow1->waiters == 0) {
1653                         cv_destroy(&flow1->cv);
1654                         mutex_exit(&ibcm_open.mutex);
1655                         kmem_free(flow1, sizeof (*flow1));
1656                 } else
1657                         mutex_exit(&ibcm_open.mutex);
1658         }
1659 }
1660 
1661 static void
1662 ibcm_flow_exit(ibcm_flow_t *flow)
1663 {
1664         mutex_enter(&ibcm_open.mutex);
1665         if (--flow->simul < flow->lowat) {
1666                 if (flow->lowat < flow->lowat_default)
1667                         flow->lowat++;
1668                 if (flow->list) {
1669                         ibcm_flow1_t *flow1;
1670 
1671                         flow1 = flow->list;
1672                         flow->list = flow1->link; /* unlink */
1673                         flow1->link = NULL;          /* be clean */
1674                         flow->total += flow1->waiters;
1675                         flow->simul += flow1->waiters;
1676                         cv_broadcast(&flow1->cv);
1677                 }
1678         }
1679         mutex_exit(&ibcm_open.mutex);
1680 }
1681 
1682 void
1683 ibcm_flow_inc(void)
1684 {
1685         mutex_enter(&ibcm_open.mutex);
1686         if (++ibcm_open.sends > ibcm_open.sends_max) {
1687                 ibcm_open.sends_max = ibcm_open.sends;
1688                 IBTF_DPRINTF_L2(cmlog, "ibcm_flow_inc: sends max = %d",
1689                     ibcm_open.sends_max);
1690         }
1691         mutex_exit(&ibcm_open.mutex);
1692 }
1693 
1694 static void
1695 ibcm_check_send_cmpltn_time(hrtime_t delta, char *event_msg)
1696 {
1697         if (delta > 4000000LL) {
1698                 IBTF_DPRINTF_L2(cmlog, "ibcm_check_send_cmpltn_time: "
1699                     "%s: %lldns", event_msg, delta);
1700         }
1701 }
1702 
1703 void
1704 ibcm_flow_dec(hrtime_t time, char *mad_type)
1705 {
1706         int flow_exit = 0;
1707         int run = 0;
1708 
1709         if (ibcm_dtrace)
1710                 ibcm_check_send_cmpltn_time(gethrtime() - time, mad_type);
1711         mutex_enter(&ibcm_open.mutex);
1712         ibcm_open.sends--;
1713         if (ibcm_open.sends < ibcm_open.sends_lowat) {
1714                 run = ibcm_ok_to_start(&ibcm_open);
1715                 if (ibcm_open.exit_deferred) {
1716                         ibcm_open.exit_deferred--;
1717                         flow_exit = 1;
1718                 }
1719         }
1720         mutex_exit(&ibcm_open.mutex);
1721         if (flow_exit)
1722                 ibcm_flow_exit(&ibcm_close_flow);
1723         if (run)
1724                 ibcm_run_tlist_thread();
1725 }
1726 
1727 void
1728 ibcm_close_enqueue(ibcm_state_data_t *statep)
1729 {
1730         mutex_enter(&ibcm_close.mutex);
1731         statep->close_link = NULL;
1732         ibcm_close.tail->close_link = statep;
1733         ibcm_close.tail = statep;
1734         mutex_exit(&ibcm_close.mutex);
1735         ibcm_run_tlist_thread();
1736 }
1737 
1738 void
1739 ibcm_check_for_async_close()
1740 {
1741         ibcm_state_data_t       *statep;
1742 
1743         mutex_enter(&ibcm_close.mutex);
1744 
1745         while (ibcm_close.head.close_link) {
1746                 statep = ibcm_close.head.close_link;
1747                 ibcm_close.head.close_link = statep->close_link;
1748                 statep->close_link = NULL;
1749                 if (ibcm_close.tail == statep)
1750                         ibcm_close.tail = &ibcm_close.head;
1751                 mutex_exit(&ibcm_close.mutex);
1752                 ibcm_close_start(statep);
1753                 mutex_enter(&ibcm_close.mutex);
1754         }
1755         mutex_exit(&ibcm_close.mutex);
1756 }
1757 
1758 void
1759 ibcm_close_enter(void)
1760 {
1761         ibcm_flow_enter(&ibcm_close_flow);
1762 }
1763 
1764 void
1765 ibcm_close_exit(void)
1766 {
1767         int flow_exit;
1768 
1769         mutex_enter(&ibcm_open.mutex);
1770         if (ibcm_open.sends < ibcm_open.sends_lowat ||
1771             ibcm_open.exit_deferred >= 4)
1772                 flow_exit = 1;
1773         else {
1774                 flow_exit = 0;
1775                 ibcm_open.exit_deferred++;
1776         }
1777         mutex_exit(&ibcm_open.mutex);
1778         if (flow_exit)
1779                 ibcm_flow_exit(&ibcm_close_flow);
1780 }
1781 
1782 /*
1783  * This function needs to be called twice to finish our flow
1784  * control accounting when closing down a connection.  One
1785  * call has send_done set to 1, while the other has it set to 0.
1786  * Because of retries, this could get called more than once
1787  * with either 0 or 1, but additional calls have no effect.
1788  */
1789 void
1790 ibcm_close_done(ibcm_state_data_t *statep, int send_done)
1791 {
1792         int flow_exit;
1793 
1794         ASSERT(MUTEX_HELD(&statep->state_mutex));
1795         if (statep->close_flow == 1) {
1796                 if (send_done)
1797                         statep->close_flow = 3;
1798                 else
1799                         statep->close_flow = 2;
1800         } else if ((send_done && statep->close_flow == 2) ||
1801             (!send_done && statep->close_flow == 3)) {
1802                 statep->close_flow = 0;
1803                 mutex_enter(&ibcm_open.mutex);
1804                 if (ibcm_open.sends < ibcm_open.sends_lowat ||
1805                     ibcm_open.exit_deferred >= 4)
1806                         flow_exit = 1;
1807                 else {
1808                         flow_exit = 0;
1809                         ibcm_open.exit_deferred++;
1810                 }
1811                 mutex_exit(&ibcm_open.mutex);
1812                 if (flow_exit)
1813                         ibcm_flow_exit(&ibcm_close_flow);
1814         }
1815 }
1816 
1817 void
1818 ibcm_lapr_enter(void)
1819 {
1820         ibcm_flow_enter(&ibcm_lapr_flow);
1821 }
1822 
1823 void
1824 ibcm_lapr_exit(void)
1825 {
1826         ibcm_flow_exit(&ibcm_lapr_flow);
1827 }
1828 
1829 void
1830 ibcm_sa_access_enter()
1831 {
1832         ibcm_flow_enter(&ibcm_saa_flow);
1833 }
1834 
1835 void
1836 ibcm_sa_access_exit()
1837 {
1838         ibcm_flow_exit(&ibcm_saa_flow);
1839 }
1840 
1841 static void
1842 ibcm_sm_notice_handler(ibmf_saa_handle_t saa_handle,
1843     ibmf_saa_subnet_event_t saa_event_code,
1844     ibmf_saa_event_details_t *saa_event_details,
1845     void *callback_arg)
1846 {
1847         ibcm_port_info_t        *portp = (ibcm_port_info_t *)callback_arg;
1848         ibt_subnet_event_code_t code;
1849         ibt_subnet_event_t      event;
1850         uint8_t                 event_status;
1851 
1852         IBTF_DPRINTF_L3(cmlog, "ibcm_sm_notice_handler: saa_hdl %p, code = %d",
1853             saa_handle, saa_event_code);
1854 
1855         mutex_enter(&ibcm_sm_notice_serialize_lock);
1856 
1857         switch (saa_event_code) {
1858         case IBMF_SAA_EVENT_MCG_CREATED:
1859                 code = IBT_SM_EVENT_MCG_CREATED;
1860                 break;
1861         case IBMF_SAA_EVENT_MCG_DELETED:
1862                 code = IBT_SM_EVENT_MCG_DELETED;
1863                 break;
1864         case IBMF_SAA_EVENT_GID_AVAILABLE:
1865                 code = IBT_SM_EVENT_GID_AVAIL;
1866                 ibcm_path_cache_purge();
1867                 break;
1868         case IBMF_SAA_EVENT_GID_UNAVAILABLE:
1869                 code = IBT_SM_EVENT_GID_UNAVAIL;
1870                 ibcm_path_cache_purge();
1871                 break;
1872         case IBMF_SAA_EVENT_SUBSCRIBER_STATUS_CHG:
1873                 event_status =
1874                     saa_event_details->ie_producer_event_status_mask &
1875                     IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
1876                 if (event_status == (portp->port_event_status &
1877                     IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)) {
1878                         mutex_exit(&ibcm_sm_notice_serialize_lock);
1879                         return; /* no change */
1880                 }
1881                 portp->port_event_status = event_status;
1882                 if (event_status == IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)
1883                         code = IBT_SM_EVENT_AVAILABLE;
1884                 else
1885                         code = IBT_SM_EVENT_UNAVAILABLE;
1886                 break;
1887         default:
1888                 mutex_exit(&ibcm_sm_notice_serialize_lock);
1889                 return;
1890         }
1891 
1892         mutex_enter(&ibcm_global_hca_lock);
1893 
1894         /* don't send the event if we're tearing down */
1895         if (!IBCM_ACCESS_HCA_OK(portp->port_hcap)) {
1896                 mutex_exit(&ibcm_global_hca_lock);
1897                 mutex_exit(&ibcm_sm_notice_serialize_lock);
1898                 return;
1899         }
1900 
1901         ++(portp->port_hcap->hca_acc_cnt);
1902         mutex_exit(&ibcm_global_hca_lock);
1903 
1904         event.sm_notice_gid = saa_event_details->ie_gid;
1905         ibtl_cm_sm_notice_handler(portp->port_sgid0, code, &event);
1906 
1907         mutex_exit(&ibcm_sm_notice_serialize_lock);
1908 
1909         ibcm_dec_hca_acc_cnt(portp->port_hcap);
1910 }
1911 
1912 void
1913 ibt_register_subnet_notices(ibt_clnt_hdl_t ibt_hdl,
1914     ibt_sm_notice_handler_t sm_notice_handler, void *private)
1915 {
1916         ibcm_port_info_t        *portp;
1917         ibcm_hca_info_t         *hcap;
1918         uint8_t                 port;
1919         int                     num_failed_sgids;
1920         ibtl_cm_sm_init_fail_t  *ifail;
1921         ib_gid_t                *sgidp;
1922 
1923         IBTF_DPRINTF_L3(cmlog, "ibt_register_subnet_notices(%p, %s)",
1924             ibt_hdl, ibtl_cm_get_clnt_name(ibt_hdl));
1925 
1926         mutex_enter(&ibcm_sm_notice_serialize_lock);
1927 
1928         ibtl_cm_set_sm_notice_handler(ibt_hdl, sm_notice_handler, private);
1929         if (sm_notice_handler == NULL) {
1930                 mutex_exit(&ibcm_sm_notice_serialize_lock);
1931                 return;
1932         }
1933 
1934         /* for each port, if service is not available, make a call */
1935         mutex_enter(&ibcm_global_hca_lock);
1936         num_failed_sgids = 0;
1937         hcap = ibcm_hca_listp;
1938         while (hcap != NULL) {
1939                 portp = hcap->hca_port_info;
1940                 for (port = 0; port < hcap->hca_num_ports; port++) {
1941                         if (!(portp->port_event_status &
1942                             IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1943                                 num_failed_sgids++;
1944                         portp++;
1945                 }
1946                 hcap = hcap->hca_next;
1947         }
1948         if (num_failed_sgids != 0) {
1949                 ifail = kmem_alloc(sizeof (*ifail) +
1950                     (num_failed_sgids - 1) * sizeof (ib_gid_t), KM_SLEEP);
1951                 ifail->smf_num_sgids = num_failed_sgids;
1952                 ifail->smf_ibt_hdl = ibt_hdl;
1953                 sgidp = &ifail->smf_sgid[0];
1954                 hcap = ibcm_hca_listp;
1955                 while (hcap != NULL) {
1956                         portp = hcap->hca_port_info;
1957                         for (port = 0; port < hcap->hca_num_ports; port++) {
1958                                 if (!(portp->port_event_status &
1959                                     IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1960                                         *sgidp++ = portp->port_sgid0;
1961                                 portp++;
1962                         }
1963                         hcap = hcap->hca_next;
1964                 }
1965         }
1966         mutex_exit(&ibcm_global_hca_lock);
1967 
1968         if (num_failed_sgids != 0) {
1969                 ibtl_cm_sm_notice_init_failure(ifail);
1970                 kmem_free(ifail, sizeof (*ifail) +
1971                     (num_failed_sgids - 1) * sizeof (ib_gid_t));
1972         }
1973         mutex_exit(&ibcm_sm_notice_serialize_lock);
1974 }
1975 
1976 /* The following is run from a taskq because we've seen the stack overflow. */
1977 static void
1978 ibcm_init_saa(void *arg)
1979 {
1980         ibcm_port_info_t                *portp = (ibcm_port_info_t *)arg;
1981         int                             status;
1982         ib_guid_t                       port_guid;
1983         ibmf_saa_subnet_event_args_t    event_args;
1984 
1985         port_guid = portp->port_sgid0.gid_guid;
1986 
1987         IBTF_DPRINTF_L3(cmlog, "ibcm_init_saa: port guid %llX", port_guid);
1988 
1989         event_args.is_event_callback_arg = portp;
1990         event_args.is_event_callback = ibcm_sm_notice_handler;
1991 
1992         if ((status = ibmf_sa_session_open(port_guid, 0, &event_args,
1993             IBMF_VERSION, 0, &portp->port_ibmf_saa_hdl)) != IBMF_SUCCESS) {
1994                 IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
1995                     "ibmf_sa_session_open failed for port guid %llX "
1996                     "status = %d", port_guid, status);
1997         } else {
1998                 IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
1999                     "registered sa_hdl 0x%p for port guid %llX",
2000                     portp->port_ibmf_saa_hdl, port_guid);
2001         }
2002 
2003         mutex_enter(&ibcm_sa_open_lock);
2004         portp->port_saa_open_in_progress = 0;
2005         cv_broadcast(&ibcm_sa_open_cv);
2006         mutex_exit(&ibcm_sa_open_lock);
2007 }
2008 
2009 void
2010 ibcm_init_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
2011 {
2012         ibmf_saa_handle_t       saa_handle;
2013         uint8_t                 port_index = port - 1;
2014         ibcm_port_info_t        *portp = &hcap->hca_port_info[port_index];
2015         ibt_status_t            ibt_status;
2016 
2017         if (port_index >= hcap->hca_num_ports)
2018                 return;
2019 
2020         mutex_enter(&ibcm_sa_open_lock);
2021         if (portp->port_saa_open_in_progress) {
2022                 mutex_exit(&ibcm_sa_open_lock);
2023                 return;
2024         }
2025 
2026         saa_handle = portp->port_ibmf_saa_hdl;
2027         if (saa_handle != NULL) {
2028                 mutex_exit(&ibcm_sa_open_lock);
2029                 return;
2030         }
2031 
2032         portp->port_saa_open_in_progress = 1;
2033         mutex_exit(&ibcm_sa_open_lock);
2034 
2035         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(portp->port_event_status))
2036 
2037         /* The assumption is that we're getting event notifications */
2038         portp->port_event_status = IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
2039 
2040         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(portp->port_event_status))
2041 
2042         ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
2043             portp->port_num, &portp->port_sgid0, NULL);
2044         if (ibt_status != IBT_SUCCESS) {
2045                 IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa_handle: "
2046                     "ibt_get_port_state_byguid failed for guid %llX "
2047                     "with status %d", portp->port_hcap->hca_guid, ibt_status);
2048                 mutex_enter(&ibcm_sa_open_lock);
2049                 portp->port_saa_open_in_progress = 0;
2050                 cv_broadcast(&ibcm_sa_open_cv);
2051                 mutex_exit(&ibcm_sa_open_lock);
2052                 return;
2053         }
2054         /* if the port is UP, try sa_session_open */
2055         (void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
2056 }
2057 
2058 
2059 ibmf_saa_handle_t
2060 ibcm_get_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
2061 {
2062         ibmf_saa_handle_t       saa_handle;
2063         uint8_t                 port_index = port - 1;
2064         ibcm_port_info_t        *portp = &hcap->hca_port_info[port_index];
2065         ibt_status_t            ibt_status;
2066 
2067         if (port_index >= hcap->hca_num_ports)
2068                 return (NULL);
2069 
2070         mutex_enter(&ibcm_sa_open_lock);
2071         while (portp->port_saa_open_in_progress) {
2072                 cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
2073         }
2074 
2075         saa_handle = portp->port_ibmf_saa_hdl;
2076         if (saa_handle != NULL) {
2077                 mutex_exit(&ibcm_sa_open_lock);
2078                 return (saa_handle);
2079         }
2080 
2081         portp->port_saa_open_in_progress = 1;
2082         mutex_exit(&ibcm_sa_open_lock);
2083 
2084         ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
2085             portp->port_num, &portp->port_sgid0, NULL);
2086         if (ibt_status != IBT_SUCCESS) {
2087                 IBTF_DPRINTF_L2(cmlog, "ibcm_get_saa_handle: "
2088                     "ibt_get_port_state_byguid failed for guid %llX "
2089                     "with status %d", portp->port_hcap->hca_guid, ibt_status);
2090                 mutex_enter(&ibcm_sa_open_lock);
2091                 portp->port_saa_open_in_progress = 0;
2092                 cv_broadcast(&ibcm_sa_open_cv);
2093                 mutex_exit(&ibcm_sa_open_lock);
2094                 return (NULL);
2095         }
2096         /* if the port is UP, try sa_session_open */
2097         (void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
2098 
2099         mutex_enter(&ibcm_sa_open_lock);
2100         while (portp->port_saa_open_in_progress) {
2101                 cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
2102         }
2103         saa_handle = portp->port_ibmf_saa_hdl;
2104         mutex_exit(&ibcm_sa_open_lock);
2105         return (saa_handle);
2106 }
2107 
2108 
2109 /*
2110  * ibcm_hca_init_port():
2111  *      - Register port with IBMA
2112  *
2113  * Arguments:
2114  *      hcap            - HCA's guid
2115  *      port_index      - port number minus 1
2116  *
2117  * Return values:
2118  *      IBCM_SUCCESS - success
2119  */
2120 ibt_status_t
2121 ibcm_hca_init_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2122 {
2123         int                     status;
2124         ibmf_register_info_t    *ibmf_reg;
2125 
2126         IBTF_DPRINTF_L4(cmlog, "ibcm_hca_init_port: hcap = 0x%p port_num %d",
2127             hcap, port_index + 1);
2128 
2129         ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2130 
2131         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hcap->hca_port_info))
2132 
2133         if (hcap->hca_port_info[port_index].port_ibmf_hdl == NULL) {
2134                 /* Register with IBMF */
2135                 ibmf_reg = &hcap->hca_port_info[port_index].port_ibmf_reg;
2136                 ibmf_reg->ir_ci_guid = hcap->hca_guid;
2137                 ibmf_reg->ir_port_num = port_index + 1;
2138                 ibmf_reg->ir_client_class = COMM_MGT_MANAGER_AGENT;
2139 
2140                 /*
2141                  * register with management framework
2142                  */
2143                 status = ibmf_register(ibmf_reg, IBMF_VERSION,
2144                     IBMF_REG_FLAG_NO_OFFLOAD, NULL, NULL,
2145                     &(hcap->hca_port_info[port_index].port_ibmf_hdl),
2146                     &(hcap->hca_port_info[port_index].port_ibmf_caps));
2147 
2148                 if (status != IBMF_SUCCESS) {
2149                         IBTF_DPRINTF_L2(cmlog, "ibcm_hca_init_port: "
2150                             "ibmf_register failed for port_num %x, "
2151                             "status = %x", port_index + 1, status);
2152                         return (ibcm_ibmf_analyze_error(status));
2153                 }
2154 
2155                 hcap->hca_port_info[port_index].port_qp1.qp_cm =
2156                     IBMF_QP_HANDLE_DEFAULT;
2157                 hcap->hca_port_info[port_index].port_qp1.qp_port =
2158                     &(hcap->hca_port_info[port_index]);
2159 
2160                 /*
2161                  * Register the read callback with IBMF.
2162                  * Since we just did an ibmf_register, handle is
2163                  * valid and ibcm_recv_cb() is valid so we can
2164                  * safely assert for success of ibmf_setup_recv_cb()
2165                  *
2166                  * Depending on the "state" of the HCA,
2167                  * CM may drop incoming packets
2168                  */
2169                 status = ibmf_setup_async_cb(
2170                     hcap->hca_port_info[port_index].port_ibmf_hdl,
2171                     IBMF_QP_HANDLE_DEFAULT, ibcm_recv_cb,
2172                     &(hcap->hca_port_info[port_index].port_qp1), 0);
2173                 ASSERT(status == IBMF_SUCCESS);
2174 
2175                 IBTF_DPRINTF_L5(cmlog, "ibcm_hca_init_port: "
2176                     "IBMF hdl[%x] = 0x%p", port_index,
2177                     hcap->hca_port_info[port_index].port_ibmf_hdl);
2178 
2179                 /* Attempt to get the saa_handle for this port */
2180                 ibcm_init_saa_handle(hcap, port_index + 1);
2181         }
2182 
2183         return (IBT_SUCCESS);
2184 }
2185 
2186 /*
2187  * useful, to re attempt to initialize port ibma handles from elsewhere in
2188  * cm code
2189  */
2190 ibt_status_t
2191 ibcm_hca_reinit_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2192 {
2193         ibt_status_t    status;
2194 
2195         IBTF_DPRINTF_L5(cmlog, "ibcm_hca_reinit_port: hcap 0x%p port_num %d",
2196             hcap, port_index + 1);
2197 
2198         mutex_enter(&ibcm_global_hca_lock);
2199         status = ibcm_hca_init_port(hcap, port_index);
2200         mutex_exit(&ibcm_global_hca_lock);
2201         return (status);
2202 }
2203 
2204 
2205 /*
2206  * ibcm_hca_fini_port():
2207  *      - Deregister port with IBMA
2208  *
2209  * Arguments:
2210  *      hcap            - HCA's guid
2211  *      port_index      - port number minus 1
2212  *
2213  * Return values:
2214  *      IBCM_SUCCESS - success
2215  */
2216 static ibcm_status_t
2217 ibcm_hca_fini_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2218 {
2219         int                     ibmf_status;
2220         ibcm_status_t           ibcm_status;
2221 
2222         IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: hcap = 0x%p port_num %d ",
2223             hcap, port_index + 1);
2224 
2225         ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2226 
2227         if (hcap->hca_port_info[port_index].port_ibmf_saa_hdl != NULL) {
2228                 IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2229                     "ibmf_sa_session_close IBMF SAA hdl %p",
2230                     hcap->hca_port_info[port_index].port_ibmf_saa_hdl);
2231 
2232                 ibmf_status = ibmf_sa_session_close(
2233                     &hcap->hca_port_info[port_index].port_ibmf_saa_hdl, 0);
2234                 if (ibmf_status != IBMF_SUCCESS) {
2235                         IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2236                             "ibmf_sa_session_close of port %d returned %x",
2237                             port_index + 1, ibmf_status);
2238                         return (IBCM_FAILURE);
2239                 }
2240         }
2241 
2242         if (hcap->hca_port_info[port_index].port_ibmf_hdl != NULL) {
2243                 IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2244                     "ibmf_unregister IBMF Hdl %p",
2245                     hcap->hca_port_info[port_index].port_ibmf_hdl);
2246 
2247                 /* clean-up all the ibmf qp's allocated on this port */
2248                 ibcm_status = ibcm_free_allqps(hcap, port_index + 1);
2249 
2250                 if (ibcm_status != IBCM_SUCCESS) {
2251 
2252                         IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2253                             "ibcm_free_allqps failed for port_num %d",
2254                             port_index + 1);
2255                         return (IBCM_FAILURE);
2256                 }
2257 
2258                 /* Tear down the receive callback */
2259                 ibmf_status = ibmf_tear_down_async_cb(
2260                     hcap->hca_port_info[port_index].port_ibmf_hdl,
2261                     IBMF_QP_HANDLE_DEFAULT, 0);
2262 
2263                 if (ibmf_status != IBMF_SUCCESS) {
2264                         IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2265                             "ibmf_tear_down_async_cb failed %d port_num %d",
2266                             ibmf_status, port_index + 1);
2267                         return (IBCM_FAILURE);
2268                 }
2269 
2270                 /* Now, unregister with IBMF */
2271                 ibmf_status = ibmf_unregister(
2272                     &hcap->hca_port_info[port_index].port_ibmf_hdl, 0);
2273                 IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: "
2274                     "ibmf_unregister of port_num %x returned %x",
2275                     port_index + 1, ibmf_status);
2276 
2277                 if (ibmf_status == IBMF_SUCCESS)
2278                         hcap->hca_port_info[port_index].port_ibmf_hdl = NULL;
2279                 else {
2280                         IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2281                             "ibmf_unregister failed %d port_num %d",
2282                             ibmf_status, port_index + 1);
2283                         return (IBCM_FAILURE);
2284                 }
2285         }
2286         return (IBCM_SUCCESS);
2287 }
2288 
2289 /*
2290  * ibcm_comm_est_handler():
2291  *      Check if the given channel is in ESTABLISHED state or not
2292  *
2293  * Arguments:
2294  *      eventp  - A pointer to an ibt_async_event_t struct
2295  *
2296  * Return values: NONE
2297  */
2298 static void
2299 ibcm_comm_est_handler(ibt_async_event_t *eventp)
2300 {
2301         ibcm_state_data_t       *statep;
2302 
2303         IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler:");
2304 
2305         /* Both QP and EEC handles can't be NULL */
2306         if (eventp->ev_chan_hdl == NULL) {
2307                 IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2308                     "both QP and EEC handles are NULL");
2309                 return;
2310         }
2311 
2312         /* get the "statep" from qp/eec handles */
2313         IBCM_GET_CHAN_PRIVATE(eventp->ev_chan_hdl, statep);
2314         if (statep == NULL) {
2315                 IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: statep is NULL");
2316                 return;
2317         }
2318 
2319         mutex_enter(&statep->state_mutex);
2320 
2321         IBCM_RELEASE_CHAN_PRIVATE(eventp->ev_chan_hdl);
2322 
2323         IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: statep = %p", statep);
2324 
2325         IBCM_REF_CNT_INCR(statep);
2326 
2327         if ((statep->state == IBCM_STATE_REP_SENT) ||
2328             (statep->state == IBCM_STATE_MRA_REP_RCVD)) {
2329                 timeout_id_t    timer_val = statep->timerid;
2330 
2331                 statep->state = IBCM_STATE_TRANSIENT_ESTABLISHED;
2332 
2333                 if (timer_val) {
2334                         statep->timerid = 0;
2335                         mutex_exit(&statep->state_mutex);
2336                         (void) untimeout(timer_val);
2337                 } else
2338                         mutex_exit(&statep->state_mutex);
2339 
2340                 /* CM doesn't have RTU message here */
2341                 ibcm_cep_state_rtu(statep, NULL);
2342 
2343         } else {
2344                 if (statep->state == IBCM_STATE_ESTABLISHED ||
2345                     statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
2346                         IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: "
2347                             "Channel already in ESTABLISHED state");
2348                 } else {
2349                         /* An unexpected behavior from remote */
2350                         IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2351                             "Unexpected in state = %d", statep->state);
2352                 }
2353                 mutex_exit(&statep->state_mutex);
2354 
2355                 ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_COMEST);
2356         }
2357 
2358         mutex_enter(&statep->state_mutex);
2359         IBCM_REF_CNT_DECR(statep);
2360         mutex_exit(&statep->state_mutex);
2361 }
2362 
2363 
2364 /*
2365  * ibcm_async_handler():
2366  *      CM's Async Handler
2367  *      (Handles ATTACH, DETACH, COM_EST events)
2368  *
2369  * Arguments:
2370  *      eventp  - A pointer to an ibt_async_event_t struct
2371  *
2372  * Return values: None
2373  *
2374  * NOTE : CM assumes that all HCA DR events are delivered sequentially
2375  * i.e., until ibcm_async_handler  completes for a given HCA DR, framework
2376  * shall not invoke ibcm_async_handler with another DR event for the same
2377  * HCA
2378  */
2379 /* ARGSUSED */
2380 void
2381 ibcm_async_handler(void *clnt_hdl, ibt_hca_hdl_t hca_hdl,
2382     ibt_async_code_t code, ibt_async_event_t *eventp)
2383 {
2384         ibcm_hca_info_t         *hcap;
2385         ibcm_port_up_t          *pup;
2386 
2387         IBTF_DPRINTF_L3(cmlog, "ibcm_async_handler: "
2388             "clnt_hdl = %p, code = 0x%x, eventp = 0x%p",
2389             clnt_hdl, code, eventp);
2390 
2391         mutex_enter(&ibcm_global_hca_lock);
2392 
2393         /* If fini is going to complete successfully, then return */
2394         if (ibcm_finit_state != IBCM_FINIT_IDLE) {
2395 
2396                 /*
2397                  * This finit state implies one of the following:
2398                  * Init either didn't start or didn't complete OR
2399                  * Fini is about to return SUCCESS and release the global lock.
2400                  * In all these cases, it is safe to ignore the async.
2401                  */
2402 
2403                 IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler: ignoring event %x, "
2404                     "as either init didn't complete or fini about to succeed",
2405                     code);
2406                 mutex_exit(&ibcm_global_hca_lock);
2407                 return;
2408         }
2409 
2410         switch (code) {
2411         case IBT_PORT_CHANGE_EVENT:
2412                 if ((eventp->ev_port_flags & IBT_PORT_CHANGE_SM_LID) == 0)
2413                         break;
2414         /* FALLTHROUGH */
2415         case IBT_CLNT_REREG_EVENT:
2416         case IBT_EVENT_PORT_UP:
2417                 mutex_exit(&ibcm_global_hca_lock);
2418                 pup = kmem_alloc(sizeof (ibcm_port_up_t), KM_SLEEP);
2419                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pup))
2420                 pup->pup_hca_guid = eventp->ev_hca_guid;
2421                 pup->pup_port = eventp->ev_port;
2422                 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pup))
2423                 (void) taskq_dispatch(ibcm_taskq,
2424                     ibcm_service_record_rewrite_task, pup, TQ_SLEEP);
2425                 ibcm_path_cache_purge();
2426                 return;
2427 
2428         case IBT_HCA_ATTACH_EVENT:
2429 
2430                 /* eventp->ev_hcaguid is the HCA GUID of interest */
2431                 ibcm_hca_attach(eventp->ev_hca_guid);
2432                 break;
2433 
2434         case IBT_HCA_DETACH_EVENT:
2435 
2436                 /* eventp->ev_hca_guid is the HCA GUID of interest */
2437                 if ((hcap = ibcm_find_hcap_entry(eventp->ev_hca_guid)) ==
2438                     NULL) {
2439                         IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler:"
2440                             " hca %llX doesn't exist", eventp->ev_hca_guid);
2441                         break;
2442                 }
2443 
2444                 (void) ibcm_hca_detach(hcap);
2445                 break;
2446 
2447         case IBT_EVENT_COM_EST_QP:
2448                 /* eventp->ev_qp_hdl is the ibt_qp_hdl_t of interest */
2449         case IBT_EVENT_COM_EST_EEC:
2450                 /* eventp->ev_eec_hdl is the ibt_eec_hdl_t of interest */
2451                 ibcm_comm_est_handler(eventp);
2452                 break;
2453         default:
2454                 break;
2455         }
2456 
2457         /* Unblock, any blocked fini/init operations */
2458         mutex_exit(&ibcm_global_hca_lock);
2459 }