1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * ibcm_impl.c
28 *
29 * contains internal functions of IB CM module.
30 *
31 * TBD:
32 * 1. HCA CATASTROPHIC/RECOVERED not handled yet
33 */
34
35 #include <sys/ib/mgt/ibcm/ibcm_impl.h>
36 #include <sys/disp.h>
37
38
39 /* function prototypes */
40 static ibcm_status_t ibcm_init(void);
41 static ibcm_status_t ibcm_fini(void);
42
43 /* Routines to initialize and destroy CM global locks and CVs */
44 static void ibcm_init_locks(void);
45 static void ibcm_fini_locks(void);
46
47 /* Routines that initialize/teardown CM's global hca structures */
48 static void ibcm_init_hcas();
49 static ibcm_status_t ibcm_fini_hcas();
50
51 static void ibcm_init_classportinfo();
52 static void ibcm_stop_timeout_thread();
53
54 /* Routines that handle HCA attach/detach asyncs */
55 static void ibcm_hca_attach(ib_guid_t);
56 static ibcm_status_t ibcm_hca_detach(ibcm_hca_info_t *);
57
58 /* Routines that initialize the HCA's port related fields */
59 static ibt_status_t ibcm_hca_init_port(ibcm_hca_info_t *hcap,
60 uint8_t port_index);
61 static ibcm_status_t ibcm_hca_fini_port(ibcm_hca_info_t *hcap,
62 uint8_t port_index);
63
64 static void ibcm_rc_flow_control_init(void);
65 static void ibcm_rc_flow_control_fini(void);
66
67 /*
68 * Routines that check if hca's avl trees and sidr lists are free of any
69 * active client resources ie., RC or UD state structures in certain states
70 */
71 static ibcm_status_t ibcm_check_avl_clean(ibcm_hca_info_t *hcap);
72 static ibcm_status_t ibcm_check_sidr_clean(ibcm_hca_info_t *hcap);
73
74 /* Add a new hca structure to CM's global hca list */
75 static ibcm_hca_info_t *ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports);
76
77 static void ibcm_comm_est_handler(ibt_async_event_t *);
78 void ibcm_async_handler(void *, ibt_hca_hdl_t,
79 ibt_async_code_t, ibt_async_event_t *);
80
81 /* Global variables */
82 char cmlog[] = "ibcm"; /* for debug log messages */
83 ibt_clnt_hdl_t ibcm_ibt_handle; /* IBT handle */
84 kmutex_t ibcm_svc_info_lock; /* list lock */
85 kcondvar_t ibcm_svc_info_cv; /* cv for deregister */
86 kmutex_t ibcm_recv_mutex;
87 avl_tree_t ibcm_svc_avl_tree;
88 taskq_t *ibcm_taskq = NULL;
89 int taskq_dispatch_fail_cnt;
90
91 kmutex_t ibcm_mcglist_lock; /* MCG list lock */
92 kmutex_t ibcm_trace_mutex; /* Trace mutex */
93 kmutex_t ibcm_trace_print_mutex; /* Trace print mutex */
94 int ibcm_conn_max_trcnt = IBCM_MAX_CONN_TRCNT;
95
96 int ibcm_enable_trace = 2; /* Trace level 4 by default */
97 int ibcm_dtrace = 0; /* conditionally enable more dtrace */
98
99 /*
100 * Initial state is INIT. All hca dr's return success immediately in this
101 * state, without adding or deleting any hca's to CM.
102 */
103 ibcm_finit_state_t ibcm_finit_state = IBCM_FINIT_INIT;
104
105 /* mutex and cv to manage hca's reference and resource count(s) */
106 kmutex_t ibcm_global_hca_lock;
107 kcondvar_t ibcm_global_hca_cv;
108
109 /* mutex and cv to sa session open */
110 kmutex_t ibcm_sa_open_lock;
111 kcondvar_t ibcm_sa_open_cv;
112 int ibcm_sa_timeout_delay = 1; /* in ticks */
113
114 /* serialize sm notice callbacks */
115 kmutex_t ibcm_sm_notice_serialize_lock;
116
117 /* mutex for CM's qp list management */
118 kmutex_t ibcm_qp_list_lock;
119
120 kcondvar_t ibcm_timeout_list_cv;
121 kcondvar_t ibcm_timeout_thread_done_cv;
122 kt_did_t ibcm_timeout_thread_did;
123 ibcm_state_data_t *ibcm_timeout_list_hdr, *ibcm_timeout_list_tail;
124 ibcm_ud_state_data_t *ibcm_ud_timeout_list_hdr, *ibcm_ud_timeout_list_tail;
125 kmutex_t ibcm_timeout_list_lock;
126 uint8_t ibcm_timeout_list_flags = 0;
127 pri_t ibcm_timeout_thread_pri = MINCLSYSPRI;
128
129 /*
130 * Flow control logic for open_rc_channel uses the following.
131 */
132
133 struct ibcm_open_s {
134 kmutex_t mutex;
135 kcondvar_t cv;
136 uint8_t task_running;
137 uint_t queued;
138 uint_t exit_deferred;
139 uint_t in_progress;
140 uint_t in_progress_max;
141 uint_t sends;
142 uint_t sends_max;
143 uint_t sends_lowat;
144 uint_t sends_hiwat;
145 ibcm_state_data_t *tail;
146 ibcm_state_data_t head;
147 } ibcm_open;
148
149 /*
150 * Flow control logic for SA access and close_rc_channel calls follows.
151 */
152
153 int ibcm_close_simul_max = 12;
154 int ibcm_lapr_simul_max = 12;
155 int ibcm_saa_simul_max = 8;
156
157 typedef struct ibcm_flow1_s {
158 struct ibcm_flow1_s *link;
159 kcondvar_t cv;
160 uint8_t waiters; /* 1 to IBCM_FLOW_SIMUL_MAX */
161 } ibcm_flow1_t;
162
163 typedef struct ibcm_flow_s {
164 ibcm_flow1_t *list;
165 uint_t simul; /* #requests currently outstanding */
166 uint_t simul_max;
167 uint_t waiters_per_chunk;
168 uint_t lowat;
169 uint_t lowat_default;
170 /* statistics */
171 uint_t total;
172 } ibcm_flow_t;
173
174 ibcm_flow_t ibcm_saa_flow;
175 ibcm_flow_t ibcm_close_flow;
176 ibcm_flow_t ibcm_lapr_flow;
177
178 /* NONBLOCKING close requests are queued */
179 struct ibcm_close_s {
180 kmutex_t mutex;
181 ibcm_state_data_t *tail;
182 ibcm_state_data_t head;
183 } ibcm_close;
184
185 static ibt_clnt_modinfo_t ibcm_ibt_modinfo = { /* Client's modinfop */
186 IBTI_V_CURR,
187 IBT_CM,
188 ibcm_async_handler,
189 NULL,
190 "IBCM"
191 };
192
193 /* IBCM's list of HCAs registered with it */
194 static ibcm_hca_info_t *ibcm_hca_listp = NULL; /* CM's HCA list */
195
196 /* Array of CM state call table functions */
197 ibcm_state_handler_t ibcm_sm_funcs_tbl[] = {
198 ibcm_process_req_msg,
199 ibcm_process_mra_msg,
200 ibcm_process_rej_msg,
201 ibcm_process_rep_msg,
202 ibcm_process_rtu_msg,
203 ibcm_process_dreq_msg,
204 ibcm_process_drep_msg,
205 ibcm_process_sidr_req_msg,
206 ibcm_process_sidr_rep_msg,
207 ibcm_process_lap_msg,
208 ibcm_process_apr_msg
209 };
210
211 /* the following globals are CM tunables */
212 ibt_rnr_nak_time_t ibcm_default_rnr_nak_time = IBT_RNR_NAK_655ms;
213
214 uint8_t ibcm_max_retries = IBCM_MAX_RETRIES;
215 clock_t ibcm_local_processing_time = IBCM_LOCAL_RESPONSE_TIME;
216 clock_t ibcm_remote_response_time = IBCM_REMOTE_RESPONSE_TIME;
217 ib_time_t ibcm_max_sidr_rep_proctime = IBCM_MAX_SIDR_PROCESS_TIME;
218 ib_time_t ibcm_max_sidr_pktlife_time = IBCM_MAX_SIDR_PKT_LIFE_TIME;
219
220 ib_time_t ibcm_max_sidr_rep_store_time = 18;
221 uint32_t ibcm_wait_for_acc_cnt_timeout = 2000000; /* 2 sec */
222
223 ib_time_t ibcm_max_ib_pkt_lt = IBCM_MAX_IB_PKT_LT;
224 ib_time_t ibcm_max_ib_mad_pkt_lt = IBCM_MAX_IB_MAD_PKT_LT;
225
226 /*
227 * This delay accounts for time involved in various activities as follows :
228 *
229 * IBMF delays for posting the MADs in non-blocking mode
230 * IBMF delays for receiving the MADs and delivering to CM
231 * CM delays in processing the MADs before invoking client handlers,
232 * Any other delays associated with HCA driver in processing the MADs and
233 * other subsystems that CM may invoke (ex : SA, HCA driver)
234 */
235 uint32_t ibcm_sw_delay = 1000; /* 1000us / 1ms */
236 uint32_t ibcm_max_sa_retries = IBCM_MAX_SA_RETRIES + 1;
237
238 /* approx boot time */
239 uint32_t ibcm_adj_btime = 4; /* 4 seconds */
240
241 /*
242 * The information in ibcm_clpinfo is kept in wireformat and is setup at
243 * init time, and used read-only after that
244 */
245 ibcm_classportinfo_msg_t ibcm_clpinfo;
246
247 char *event_str[] = {
248 "NEVER SEE THIS ",
249 "SESSION_ID ",
250 "CHAN_HDL ",
251 "LOCAL_COMID/HCA/PORT ",
252 "LOCAL_QPN ",
253 "REMOTE_COMID/HCA ",
254 "REMOTE_QPN ",
255 "BASE_TIME ",
256 "INCOMING_REQ ",
257 "INCOMING_REP ",
258 "INCOMING_RTU ",
259 "INCOMING_COMEST ",
260 "INCOMING_MRA ",
261 "INCOMING_REJ ",
262 "INCOMING_LAP ",
263 "INCOMING_APR ",
264 "INCOMING_DREQ ",
265 "INCOMING_DREP ",
266 "OUTGOING_REQ ",
267 "OUTGOING_REP ",
268 "OUTGOING_RTU ",
269 "OUTGOING_LAP ",
270 "OUTGOING_APR ",
271 "OUTGOING_MRA ",
272 "OUTGOING_REJ ",
273 "OUTGOING_DREQ ",
274 "OUTGOING_DREP ",
275 "REQ_POST_COMPLETE ",
276 "REP_POST_COMPLETE ",
277 "RTU_POST_COMPLETE ",
278 "MRA_POST_COMPLETE ",
279 "REJ_POST_COMPLETE ",
280 "LAP_POST_COMPLETE ",
281 "APR_POST_COMPLETE ",
282 "DREQ_POST_COMPLETE ",
283 "DREP_POST_COMPLETE ",
284 "TIMEOUT_REP ",
285 "CALLED_REQ_RCVD_EVENT ",
286 "RET_REQ_RCVD_EVENT ",
287 "CALLED_REP_RCVD_EVENT ",
288 "RET_REP_RCVD_EVENT ",
289 "CALLED_CONN_EST_EVENT ",
290 "RET_CONN_EST_EVENT ",
291 "CALLED_CONN_FAIL_EVENT ",
292 "RET_CONN_FAIL_EVENT ",
293 "CALLED_CONN_CLOSE_EVENT ",
294 "RET_CONN_CLOSE_EVENT ",
295 "INIT_INIT ",
296 "INIT_INIT_FAIL ",
297 "INIT_RTR ",
298 "INIT_RTR_FAIL ",
299 "RTR_RTS ",
300 "RTR_RTS_FAIL ",
301 "RTS_RTS ",
302 "RTS_RTS_FAIL ",
303 "TO_ERROR ",
304 "ERROR_FAIL ",
305 "SET_ALT ",
306 "SET_ALT_FAIL ",
307 "STALE_DETECT ",
308 "OUTGOING_REQ_RETRY ",
309 "OUTGOING_REP_RETRY ",
310 "OUTGOING_LAP_RETRY ",
311 "OUTGOING_MRA_RETRY ",
312 "OUTGOING_DREQ_RETRY ",
313 "NEVER SEE THIS "
314 };
315
316 char ibcm_debug_buf[IBCM_DEBUG_BUF_SIZE];
317
318 #ifdef DEBUG
319 int ibcm_test_mode = 0; /* set to 1, if running tests */
320 #endif
321
322
323 /* Module Driver Info */
324 static struct modlmisc ibcm_modlmisc = {
325 &mod_miscops,
326 "IB Communication Manager"
327 };
328
329 /* Module Linkage */
330 static struct modlinkage ibcm_modlinkage = {
331 MODREV_1,
332 &ibcm_modlmisc,
333 NULL
334 };
335
336
337 int
338 _init(void)
339 {
340 int rval;
341 ibcm_status_t status;
342
343 status = ibcm_init();
344 if (status != IBCM_SUCCESS) {
345 IBTF_DPRINTF_L2(cmlog, "_init: ibcm failed %d", status);
346 return (EINVAL);
347 }
348
349 rval = mod_install(&ibcm_modlinkage);
350 if (rval != 0) {
351 IBTF_DPRINTF_L2(cmlog, "_init: ibcm mod_install failed %d",
352 rval);
353 (void) ibcm_fini();
354 }
355
356 IBTF_DPRINTF_L5(cmlog, "_init: ibcm successful");
357 return (rval);
358
359 }
360
361
362 int
363 _info(struct modinfo *modinfop)
364 {
365 return (mod_info(&ibcm_modlinkage, modinfop));
366 }
367
368
369 int
370 _fini(void)
371 {
372 int status;
373
374 if (ibcm_fini() != IBCM_SUCCESS)
375 return (EBUSY);
376
377 if ((status = mod_remove(&ibcm_modlinkage)) != 0) {
378 IBTF_DPRINTF_L2(cmlog, "_fini: ibcm mod_remove failed %d",
379 status);
380 return (status);
381 }
382
383 IBTF_DPRINTF_L5(cmlog, "_fini: ibcm successful");
384
385 return (status);
386 }
387
388 /* Initializes all global mutex and CV in cm module */
389 static void
390 ibcm_init_locks()
391 {
392
393 /* Verify CM MAD sizes */
394 #ifdef DEBUG
395
396 if (ibcm_test_mode > 1) {
397
398 IBTF_DPRINTF_L1(cmlog, "REQ MAD SIZE %d",
399 sizeof (ibcm_req_msg_t));
400 IBTF_DPRINTF_L1(cmlog, "REP MAD SIZE %d",
401 sizeof (ibcm_rep_msg_t));
402 IBTF_DPRINTF_L1(cmlog, "RTU MAD SIZE %d",
403 sizeof (ibcm_rtu_msg_t));
404 IBTF_DPRINTF_L1(cmlog, "MRA MAD SIZE %d",
405 sizeof (ibcm_mra_msg_t));
406 IBTF_DPRINTF_L1(cmlog, "REJ MAD SIZE %d",
407 sizeof (ibcm_rej_msg_t));
408 IBTF_DPRINTF_L1(cmlog, "LAP MAD SIZE %d",
409 sizeof (ibcm_lap_msg_t));
410 IBTF_DPRINTF_L1(cmlog, "APR MAD SIZE %d",
411 sizeof (ibcm_apr_msg_t));
412 IBTF_DPRINTF_L1(cmlog, "DREQ MAD SIZE %d",
413 sizeof (ibcm_dreq_msg_t));
414 IBTF_DPRINTF_L1(cmlog, "DREP MAD SIZE %d",
415 sizeof (ibcm_drep_msg_t));
416 IBTF_DPRINTF_L1(cmlog, "SIDR REQ MAD SIZE %d",
417 sizeof (ibcm_sidr_req_msg_t));
418 IBTF_DPRINTF_L1(cmlog, "SIDR REP MAD SIZE %d",
419 sizeof (ibcm_sidr_rep_msg_t));
420 }
421
422 #endif
423
424 /* Create all global locks within cm module */
425 mutex_init(&ibcm_svc_info_lock, NULL, MUTEX_DEFAULT, NULL);
426 mutex_init(&ibcm_mcglist_lock, NULL, MUTEX_DEFAULT, NULL);
427 mutex_init(&ibcm_timeout_list_lock, NULL, MUTEX_DEFAULT, NULL);
428 mutex_init(&ibcm_global_hca_lock, NULL, MUTEX_DEFAULT, NULL);
429 mutex_init(&ibcm_sa_open_lock, NULL, MUTEX_DEFAULT, NULL);
430 mutex_init(&ibcm_recv_mutex, NULL, MUTEX_DEFAULT, NULL);
431 mutex_init(&ibcm_sm_notice_serialize_lock, NULL, MUTEX_DEFAULT, NULL);
432 mutex_init(&ibcm_qp_list_lock, NULL, MUTEX_DEFAULT, NULL);
433 mutex_init(&ibcm_trace_mutex, NULL, MUTEX_DEFAULT, NULL);
434 mutex_init(&ibcm_trace_print_mutex, NULL, MUTEX_DEFAULT, NULL);
435 cv_init(&ibcm_svc_info_cv, NULL, CV_DRIVER, NULL);
436 cv_init(&ibcm_timeout_list_cv, NULL, CV_DRIVER, NULL);
437 cv_init(&ibcm_timeout_thread_done_cv, NULL, CV_DRIVER, NULL);
438 cv_init(&ibcm_global_hca_cv, NULL, CV_DRIVER, NULL);
439 cv_init(&ibcm_sa_open_cv, NULL, CV_DRIVER, NULL);
440 avl_create(&ibcm_svc_avl_tree, ibcm_svc_compare,
441 sizeof (ibcm_svc_info_t),
442 offsetof(struct ibcm_svc_info_s, svc_link));
443
444 IBTF_DPRINTF_L5(cmlog, "ibcm_init_locks: done");
445 }
446
447 /* Destroys all global mutex and CV in cm module */
448 static void
449 ibcm_fini_locks()
450 {
451 /* Destroy all global locks within cm module */
452 mutex_destroy(&ibcm_svc_info_lock);
453 mutex_destroy(&ibcm_mcglist_lock);
454 mutex_destroy(&ibcm_timeout_list_lock);
455 mutex_destroy(&ibcm_global_hca_lock);
456 mutex_destroy(&ibcm_sa_open_lock);
457 mutex_destroy(&ibcm_recv_mutex);
458 mutex_destroy(&ibcm_sm_notice_serialize_lock);
459 mutex_destroy(&ibcm_qp_list_lock);
460 mutex_destroy(&ibcm_trace_mutex);
461 mutex_destroy(&ibcm_trace_print_mutex);
462 cv_destroy(&ibcm_svc_info_cv);
463 cv_destroy(&ibcm_timeout_list_cv);
464 cv_destroy(&ibcm_timeout_thread_done_cv);
465 cv_destroy(&ibcm_global_hca_cv);
466 cv_destroy(&ibcm_sa_open_cv);
467 avl_destroy(&ibcm_svc_avl_tree);
468
469 IBTF_DPRINTF_L5(cmlog, "ibcm_fini_locks: done");
470 }
471
472
473 /* Initialize CM's classport info */
474 static void
475 ibcm_init_classportinfo()
476 {
477 ibcm_clpinfo.BaseVersion = IBCM_MAD_BASE_VERSION;
478 ibcm_clpinfo.ClassVersion = IBCM_MAD_CLASS_VERSION;
479
480 /* For now, CM supports same capabilities at all ports */
481 ibcm_clpinfo.CapabilityMask =
482 h2b16(IBCM_CPINFO_CAP_RC | IBCM_CPINFO_CAP_SIDR);
483
484 /* Bits 0-7 are all 0 for Communication Mgmt Class */
485
486 /* For now, CM has the same respvalue at all ports */
487 ibcm_clpinfo.RespTimeValue_plus =
488 h2b32(ibt_usec2ib(ibcm_local_processing_time) & 0x1f);
489
490 /* For now, redirect fields are set to 0 */
491 /* Trap fields are not applicable to CM, hence set to 0 */
492
493 IBTF_DPRINTF_L5(cmlog, "ibcm_init_classportinfo: done");
494 }
495
496 /*
497 * ibcm_init():
498 * - call ibt_attach()
499 * - create AVL trees
500 * - Attach HCA handlers that are already present before
501 * CM got loaded.
502 *
503 * Arguments: NONE
504 *
505 * Return values:
506 * IBCM_SUCCESS - success
507 */
508 static ibcm_status_t
509 ibcm_init(void)
510 {
511 ibt_status_t status;
512 kthread_t *t;
513
514 IBTF_DPRINTF_L3(cmlog, "ibcm_init:");
515
516 ibcm_init_classportinfo();
517
518 if (ibcm_init_ids() != IBCM_SUCCESS) {
519 IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
520 "fatal error: vmem_create() failed");
521 return (IBCM_FAILURE);
522 }
523 ibcm_init_locks();
524
525 if (ibcm_ar_init() != IBCM_SUCCESS) {
526 IBTF_DPRINTF_L1(cmlog, "ibcm_init: "
527 "fatal error: ibcm_ar_init() failed");
528 ibcm_fini_ids();
529 ibcm_fini_locks();
530 return (IBCM_FAILURE);
531 }
532 ibcm_rc_flow_control_init();
533
534 ibcm_taskq = system_taskq;
535
536 /* Start the timeout list processing thread */
537 ibcm_timeout_list_flags = 0;
538 t = thread_create(NULL, 0, ibcm_process_tlist, 0, 0, &p0, TS_RUN,
539 ibcm_timeout_thread_pri);
540 ibcm_timeout_thread_did = t->t_did;
541
542 /*
543 * NOTE : if ibt_attach is done after ibcm_init_hcas, then some
544 * HCA DR events may be lost. CM could call re-init hca list
545 * again, but it is more complicated. Some HCA's DR's lost may
546 * be HCA detach, which makes hca list re-syncing and locking more
547 * complex
548 */
549 status = ibt_attach(&ibcm_ibt_modinfo, NULL, NULL, &ibcm_ibt_handle);
550 if (status != IBT_SUCCESS) {
551 IBTF_DPRINTF_L2(cmlog, "ibcm_init(): ibt_attach failed %d",
552 status);
553 (void) ibcm_ar_fini();
554 ibcm_stop_timeout_thread();
555 ibcm_fini_ids();
556 ibcm_fini_locks();
557 ibcm_rc_flow_control_fini();
558 return (IBCM_FAILURE);
559 }
560
561 /* Block all HCA attach/detach asyncs */
562 mutex_enter(&ibcm_global_hca_lock);
563
564 ibcm_init_hcas();
565 ibcm_finit_state = IBCM_FINIT_IDLE;
566
567 ibcm_path_cache_init();
568 /*
569 * This callback will be used by IBTL to get the Node record for a
570 * given LID via the speccified HCA and port.
571 */
572 ibtl_cm_set_node_info_cb(ibcm_ibtl_node_info);
573
574 /* Unblock any waiting HCA DR asyncs in CM */
575 mutex_exit(&ibcm_global_hca_lock);
576
577 IBTF_DPRINTF_L4(cmlog, "ibcm_init: done");
578 return (IBCM_SUCCESS);
579 }
580
581 /* Allocates and initializes the "per hca" global data in CM */
582 static void
583 ibcm_init_hcas()
584 {
585 uint_t num_hcas = 0;
586 ib_guid_t *guid_array;
587 int i;
588
589 IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas:");
590
591 /* Get the number of HCAs */
592 num_hcas = ibt_get_hca_list(&guid_array);
593 IBTF_DPRINTF_L4(cmlog, "ibcm_init_hcas: ibt_get_hca_list() "
594 "returned %d hcas", num_hcas);
595
596 ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
597
598 for (i = 0; i < num_hcas; i++)
599 ibcm_hca_attach(guid_array[i]);
600
601 if (num_hcas)
602 ibt_free_hca_list(guid_array, num_hcas);
603
604 IBTF_DPRINTF_L5(cmlog, "ibcm_init_hcas: done");
605 }
606
607
608 /*
609 * ibcm_fini():
610 * - Deregister w/ ibt
611 * - Cleanup IBCM HCA listp
612 * - Destroy mutexes
613 *
614 * Arguments: NONE
615 *
616 * Return values:
617 * IBCM_SUCCESS - success
618 */
619 static ibcm_status_t
620 ibcm_fini(void)
621 {
622 ibt_status_t status;
623
624 IBTF_DPRINTF_L3(cmlog, "ibcm_fini:");
625
626 /*
627 * CM assumes that the all general clients got rid of all the
628 * established connections and service registrations, completed all
629 * pending SIDR operations before a call to ibcm_fini()
630 */
631
632 if (ibcm_ar_fini() != IBCM_SUCCESS) {
633 IBTF_DPRINTF_L2(cmlog, "ibcm_fini: ibcm_ar_fini failed");
634 return (IBCM_FAILURE);
635 }
636
637 /* cleanup the svcinfo list */
638 mutex_enter(&ibcm_svc_info_lock);
639 if (avl_first(&ibcm_svc_avl_tree) != NULL) {
640 IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
641 "ibcm_svc_avl_tree is not empty");
642 mutex_exit(&ibcm_svc_info_lock);
643 return (IBCM_FAILURE);
644 }
645 mutex_exit(&ibcm_svc_info_lock);
646
647 /* disables any new hca attach/detaches */
648 mutex_enter(&ibcm_global_hca_lock);
649
650 ibcm_finit_state = IBCM_FINIT_BUSY;
651
652 if (ibcm_fini_hcas() != IBCM_SUCCESS) {
653 IBTF_DPRINTF_L2(cmlog, "ibcm_fini: "
654 "some hca's still have client resources");
655
656 /* First, re-initialize the hcas */
657 ibcm_init_hcas();
658 /* and then enable the HCA asyncs */
659 ibcm_finit_state = IBCM_FINIT_IDLE;
660 mutex_exit(&ibcm_global_hca_lock);
661 if (ibcm_ar_init() != IBCM_SUCCESS) {
662 IBTF_DPRINTF_L1(cmlog, "ibcm_fini:ibcm_ar_init failed");
663 }
664 return (IBCM_FAILURE);
665 }
666
667 ASSERT(ibcm_timeout_list_hdr == NULL);
668 ASSERT(ibcm_ud_timeout_list_hdr == NULL);
669
670 /* Release any pending asyncs on ibcm_global_hca_lock */
671 ibcm_finit_state = IBCM_FINIT_SUCCESS;
672 mutex_exit(&ibcm_global_hca_lock);
673
674 ibcm_stop_timeout_thread();
675
676 ibtl_cm_set_node_info_cb(NULL);
677 /*
678 * Detach from IBTL. Waits until all pending asyncs are complete.
679 * Above cv_broadcast wakes up any waiting hca attach/detach asyncs
680 */
681 status = ibt_detach(ibcm_ibt_handle);
682
683 /* if detach fails, CM didn't free up some resources, so assert */
684 if (status != IBT_SUCCESS)
685 IBTF_DPRINTF_L1(cmlog, "ibcm_fini: ibt_detach failed %d",
686 status);
687
688 ibcm_rc_flow_control_fini();
689
690 ibcm_path_cache_fini();
691
692 ibcm_fini_ids();
693 ibcm_fini_locks();
694 IBTF_DPRINTF_L3(cmlog, "ibcm_fini: done");
695 return (IBCM_SUCCESS);
696 }
697
698 /* This routine exit's the ibcm timeout thread */
699 static void
700 ibcm_stop_timeout_thread()
701 {
702 mutex_enter(&ibcm_timeout_list_lock);
703
704 /* Stop the timeout list processing thread */
705 ibcm_timeout_list_flags =
706 ibcm_timeout_list_flags | IBCM_TIMEOUT_THREAD_EXIT;
707
708 /* Wake up, if the timeout thread is on a cv_wait */
709 cv_signal(&ibcm_timeout_list_cv);
710
711 mutex_exit(&ibcm_timeout_list_lock);
712 thread_join(ibcm_timeout_thread_did);
713
714 IBTF_DPRINTF_L5(cmlog, "ibcm_stop_timeout_thread: done");
715 }
716
717
718 /* Attempts to release all the hca's associated with CM */
719 static ibcm_status_t
720 ibcm_fini_hcas()
721 {
722 ibcm_hca_info_t *hcap, *next;
723
724 IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas:");
725
726 ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
727
728 hcap = ibcm_hca_listp;
729 while (hcap != NULL) {
730 next = hcap->hca_next;
731 if (ibcm_hca_detach(hcap) != IBCM_SUCCESS) {
732 ibcm_hca_listp = hcap;
733 return (IBCM_FAILURE);
734 }
735 hcap = next;
736 }
737
738 IBTF_DPRINTF_L4(cmlog, "ibcm_fini_hcas: SUCCEEDED");
739 return (IBCM_SUCCESS);
740 }
741
742
743 /*
744 * ibcm_hca_attach():
745 * Called as an asynchronous event to notify CM of an attach of HCA.
746 * Here ibcm_hca_info_t is initialized and all fields are
747 * filled in along with SA Access handles and IBMA handles.
748 * Also called from ibcm_init to initialize ibcm_hca_info_t's for each
749 * hca's
750 *
751 * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
752 * hca_guid - HCA's guid
753 *
754 * Return values: NONE
755 */
756 static void
757 ibcm_hca_attach(ib_guid_t hcaguid)
758 {
759 int i;
760 ibt_status_t status;
761 uint8_t nports = 0;
762 ibcm_hca_info_t *hcap;
763 ibt_hca_attr_t hca_attrs;
764
765 IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: guid = 0x%llX", hcaguid);
766
767 ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
768
769 status = ibt_query_hca_byguid(hcaguid, &hca_attrs);
770 if (status != IBT_SUCCESS) {
771 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
772 "ibt_query_hca_byguid failed = %d", status);
773 return;
774 }
775 nports = hca_attrs.hca_nports;
776
777 IBTF_DPRINTF_L4(cmlog, "ibcm_hca_attach: num ports = %x", nports);
778
779 if ((hcap = ibcm_add_hca_entry(hcaguid, nports)) == NULL)
780 return;
781
782 hcap->hca_guid = hcaguid; /* Set GUID */
783 hcap->hca_num_ports = nports; /* Set number of ports */
784
785 if (ibcm_init_hca_ids(hcap) != IBCM_SUCCESS) {
786 ibcm_delete_hca_entry(hcap);
787 return;
788 }
789
790 /* Store the static hca attribute data */
791 hcap->hca_caps = hca_attrs.hca_flags;
792 hcap->hca_vendor_id = hca_attrs.hca_vendor_id;
793 hcap->hca_device_id = hca_attrs.hca_device_id;
794 hcap->hca_ack_delay = hca_attrs.hca_local_ack_delay;
795 hcap->hca_max_rdma_in_qp = hca_attrs.hca_max_rdma_in_qp;
796 hcap->hca_max_rdma_out_qp = hca_attrs.hca_max_rdma_out_qp;
797
798 /* loop thru nports and initialize IBMF handles */
799 for (i = 0; i < hcap->hca_num_ports; i++) {
800 status = ibt_get_port_state_byguid(hcaguid, i + 1, NULL, NULL);
801 if (status != IBT_SUCCESS) {
802 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
803 "port_num %d state DOWN", i + 1);
804 }
805
806 hcap->hca_port_info[i].port_hcap = hcap;
807 hcap->hca_port_info[i].port_num = i+1;
808
809 if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
810 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_attach: "
811 "ibcm_hca_init_port failed %d port_num %d",
812 status, i+1);
813 }
814
815 /* create the "active" CM AVL tree */
816 avl_create(&hcap->hca_active_tree, ibcm_active_node_compare,
817 sizeof (ibcm_state_data_t),
818 offsetof(struct ibcm_state_data_s, avl_active_link));
819
820 /* create the "passive" CM AVL tree */
821 avl_create(&hcap->hca_passive_tree, ibcm_passive_node_compare,
822 sizeof (ibcm_state_data_t),
823 offsetof(struct ibcm_state_data_s, avl_passive_link));
824
825 /* create the "passive comid" CM AVL tree */
826 avl_create(&hcap->hca_passive_comid_tree,
827 ibcm_passive_comid_node_compare,
828 sizeof (ibcm_state_data_t),
829 offsetof(struct ibcm_state_data_s, avl_passive_comid_link));
830
831 /*
832 * Mark the state of the HCA to "attach" only at the end
833 * Now CM starts accepting incoming MADs and client API calls
834 */
835 hcap->hca_state = IBCM_HCA_ACTIVE;
836
837 IBTF_DPRINTF_L3(cmlog, "ibcm_hca_attach: ATTACH Done");
838 }
839
840 /*
841 * ibcm_hca_detach():
842 * Called as an asynchronous event to notify CM of a detach of HCA.
843 * Here ibcm_hca_info_t is freed up and all fields that
844 * were initialized earlier are cleaned up
845 *
846 * Arguments: (WILL CHANGE BASED ON ASYNC EVENT CODE)
847 * hca_guid - HCA's guid
848 *
849 * Return values:
850 * IBCM_SUCCESS - able to detach HCA
851 * IBCM_FAILURE - failed to detach HCA
852 */
853 static ibcm_status_t
854 ibcm_hca_detach(ibcm_hca_info_t *hcap)
855 {
856 int port_index, i;
857 ibcm_status_t status = IBCM_SUCCESS;
858 clock_t absolute_time;
859
860 IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: hcap = 0x%p guid = 0x%llX",
861 hcap, hcap->hca_guid);
862
863 ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
864
865 /*
866 * Declare hca is going away to all CM clients. Wait until the
867 * access count becomes zero.
868 */
869 hcap->hca_state = IBCM_HCA_NOT_ACTIVE;
870
871 /* wait on response CV */
872 absolute_time = ddi_get_lbolt() +
873 drv_usectohz(ibcm_wait_for_acc_cnt_timeout);
874
875 while (hcap->hca_acc_cnt > 0)
876 if (cv_timedwait(&ibcm_global_hca_cv, &ibcm_global_hca_lock,
877 absolute_time) == -1)
878 break;
879
880 if (hcap->hca_acc_cnt != 0) {
881 /* We got a timeout */
882 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: Aborting due"
883 " to timeout on hca_acc_cnt %u, \n Some CM Clients are "
884 "still active, looks like we need to wait some more time "
885 "(ibcm_wait_for_acc_cnt_timeout).", hcap->hca_acc_cnt);
886 hcap->hca_state = IBCM_HCA_ACTIVE;
887 return (IBCM_FAILURE);
888 }
889
890 /*
891 * First make sure, there are no active users of ibma handles,
892 * and then de-register handles.
893 */
894
895 /* make sure that there are no "Service"s registered w/ this HCA. */
896 if (hcap->hca_svc_cnt != 0) {
897 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
898 "Active services still there %d", hcap->hca_svc_cnt);
899 hcap->hca_state = IBCM_HCA_ACTIVE;
900 return (IBCM_FAILURE);
901 }
902
903 if (ibcm_check_sidr_clean(hcap) != IBCM_SUCCESS) {
904 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach:"
905 "There are active SIDR operations");
906 hcap->hca_state = IBCM_HCA_ACTIVE;
907 return (IBCM_FAILURE);
908 }
909
910 if (ibcm_check_avl_clean(hcap) != IBCM_SUCCESS) {
911 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
912 "There are active RC connections");
913 hcap->hca_state = IBCM_HCA_ACTIVE;
914 return (IBCM_FAILURE);
915 }
916
917 /*
918 * Now, wait until all rc and sidr stateps go away
919 * All these stateps must be short lived ones, waiting to be cleaned
920 * up after some timeout value, based on the current state.
921 */
922 IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach:hca_guid = 0x%llX res_cnt = %d",
923 hcap->hca_guid, hcap->hca_res_cnt);
924
925 while (hcap->hca_res_cnt > 0)
926 cv_wait(&ibcm_global_hca_cv, &ibcm_global_hca_lock);
927
928 /* Re-assert the while loop step above */
929 ASSERT(hcap->hca_sidr_list == NULL);
930 avl_destroy(&hcap->hca_active_tree);
931 avl_destroy(&hcap->hca_passive_tree);
932 avl_destroy(&hcap->hca_passive_comid_tree);
933
934 /*
935 * Unregister all ports from IBMA
936 * If there is a failure, re-initialize any free'd ibma handles. This
937 * is required to receive the incoming mads
938 */
939 status = IBCM_SUCCESS;
940 for (port_index = 0; port_index < hcap->hca_num_ports; port_index++) {
941 if ((status = ibcm_hca_fini_port(hcap, port_index)) !=
942 IBCM_SUCCESS) {
943 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
944 "Failed to free IBMA Handle for port_num %d",
945 port_index + 1);
946 break;
947 }
948 }
949
950 /* If detach fails, re-initialize ibma handles for incoming mads */
951 if (status != IBCM_SUCCESS) {
952 for (i = 0; i < port_index; i++) {
953 if (ibcm_hca_init_port(hcap, i) != IBT_SUCCESS)
954 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_detach: "
955 "Failed to re-allocate IBMA Handles for"
956 " port_num %d", port_index + 1);
957 }
958 hcap->hca_state = IBCM_HCA_ACTIVE;
959 return (IBCM_FAILURE);
960 }
961
962 ibcm_fini_hca_ids(hcap);
963 ibcm_delete_hca_entry(hcap);
964
965 IBTF_DPRINTF_L3(cmlog, "ibcm_hca_detach: DETACH succeeded");
966 return (IBCM_SUCCESS);
967 }
968
969 /* Checks, if there are any active sidr state entries in the specified hca */
970 static ibcm_status_t
971 ibcm_check_sidr_clean(ibcm_hca_info_t *hcap)
972 {
973 ibcm_ud_state_data_t *usp;
974 uint32_t transient_cnt = 0;
975
976 IBTF_DPRINTF_L5(cmlog, "ibcm_check_sidr_clean:");
977
978 rw_enter(&hcap->hca_sidr_list_lock, RW_WRITER);
979 usp = hcap->hca_sidr_list; /* Point to the list */
980 while (usp != NULL) {
981 mutex_enter(&usp->ud_state_mutex);
982 if ((usp->ud_state != IBCM_STATE_SIDR_REP_SENT) &&
983 (usp->ud_state != IBCM_STATE_TIMED_OUT) &&
984 (usp->ud_state != IBCM_STATE_DELETE)) {
985
986 IBTF_DPRINTF_L3(cmlog, "ibcm_check_sidr_clean:"
987 "usp = %p not in transient state = %d", usp,
988 usp->ud_state);
989
990 mutex_exit(&usp->ud_state_mutex);
991 rw_exit(&hcap->hca_sidr_list_lock);
992 return (IBCM_FAILURE);
993 } else {
994 mutex_exit(&usp->ud_state_mutex);
995 ++transient_cnt;
996 }
997
998 usp = usp->ud_nextp;
999 }
1000 rw_exit(&hcap->hca_sidr_list_lock);
1001
1002 IBTF_DPRINTF_L4(cmlog, "ibcm_check_sidr_clean: transient_cnt %d",
1003 transient_cnt);
1004
1005 return (IBCM_SUCCESS);
1006 }
1007
1008 /* Checks, if there are any active rc state entries, in the specified hca */
1009 static ibcm_status_t
1010 ibcm_check_avl_clean(ibcm_hca_info_t *hcap)
1011
1012 {
1013 ibcm_state_data_t *sp;
1014 avl_tree_t *avl_tree;
1015 uint32_t transient_cnt = 0;
1016
1017 IBTF_DPRINTF_L5(cmlog, "ibcm_check_avl_clean:");
1018 /*
1019 * Both the trees ie., active and passive must reference to all
1020 * statep's, so let's use one
1021 */
1022 avl_tree = &hcap->hca_active_tree;
1023
1024 rw_enter(&hcap->hca_state_rwlock, RW_WRITER);
1025
1026 for (sp = avl_first(avl_tree); sp != NULL;
1027 sp = avl_walk(avl_tree, sp, AVL_AFTER)) {
1028 mutex_enter(&sp->state_mutex);
1029 if ((sp->state != IBCM_STATE_TIMEWAIT) &&
1030 (sp->state != IBCM_STATE_REJ_SENT) &&
1031 (sp->state != IBCM_STATE_DELETE)) {
1032 IBTF_DPRINTF_L3(cmlog, "ibcm_check_avl_clean: "
1033 "sp = %p not in transient state = %d", sp,
1034 sp->state);
1035 mutex_exit(&sp->state_mutex);
1036 rw_exit(&hcap->hca_state_rwlock);
1037 return (IBCM_FAILURE);
1038 } else {
1039 mutex_exit(&sp->state_mutex);
1040 ++transient_cnt;
1041 }
1042 }
1043
1044 rw_exit(&hcap->hca_state_rwlock);
1045
1046 IBTF_DPRINTF_L4(cmlog, "ibcm_check_avl_clean: transient_cnt %d",
1047 transient_cnt);
1048
1049 return (IBCM_SUCCESS);
1050 }
1051
1052 /* Adds a new entry into CM's global hca list, if hca_guid is not there yet */
1053 static ibcm_hca_info_t *
1054 ibcm_add_hca_entry(ib_guid_t hcaguid, uint_t nports)
1055 {
1056 ibcm_hca_info_t *hcap;
1057
1058 IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: guid = 0x%llX",
1059 hcaguid);
1060
1061 ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1062
1063 /*
1064 * Check if this hca_guid already in the list
1065 * If yes, then ignore this and return NULL
1066 */
1067
1068 hcap = ibcm_hca_listp;
1069
1070 /* search for this HCA */
1071 while (hcap != NULL) {
1072 if (hcap->hca_guid == hcaguid) {
1073 /* already exists */
1074 IBTF_DPRINTF_L2(cmlog, "ibcm_add_hca_entry: "
1075 "hcap %p guid 0x%llX, entry already exists !!",
1076 hcap, hcap->hca_guid);
1077 return (NULL);
1078 }
1079 hcap = hcap->hca_next;
1080 }
1081
1082 /* Allocate storage for the new HCA entry found */
1083 hcap = kmem_zalloc(sizeof (ibcm_hca_info_t) +
1084 (nports - 1) * sizeof (ibcm_port_info_t), KM_SLEEP);
1085
1086 /* initialize RW lock */
1087 rw_init(&hcap->hca_state_rwlock, NULL, RW_DRIVER, NULL);
1088 /* initialize SIDR list lock */
1089 rw_init(&hcap->hca_sidr_list_lock, NULL, RW_DRIVER, NULL);
1090 /* Insert "hcap" into the global HCA list maintained by CM */
1091 hcap->hca_next = ibcm_hca_listp;
1092 ibcm_hca_listp = hcap;
1093
1094 IBTF_DPRINTF_L5(cmlog, "ibcm_add_hca_entry: done hcap = 0x%p", hcap);
1095
1096 return (hcap);
1097
1098 }
1099
1100 /* deletes the given ibcm_hca_info_t from CM's global hca list */
1101 void
1102 ibcm_delete_hca_entry(ibcm_hca_info_t *hcap)
1103 {
1104 ibcm_hca_info_t *headp, *prevp = NULL;
1105
1106 /* ibcm_hca_global_lock is held */
1107 IBTF_DPRINTF_L5(cmlog, "ibcm_delete_hca_entry: guid = 0x%llX "
1108 "hcap = 0x%p", hcap->hca_guid, hcap);
1109
1110 ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1111
1112 headp = ibcm_hca_listp;
1113 while (headp != NULL) {
1114 if (headp == hcap) {
1115 IBTF_DPRINTF_L3(cmlog, "ibcm_delete_hca_entry: "
1116 "deleting hcap %p hcaguid %llX", hcap,
1117 hcap->hca_guid);
1118 if (prevp) {
1119 prevp->hca_next = headp->hca_next;
1120 } else {
1121 prevp = headp->hca_next;
1122 ibcm_hca_listp = prevp;
1123 }
1124 rw_destroy(&hcap->hca_state_rwlock);
1125 rw_destroy(&hcap->hca_sidr_list_lock);
1126 kmem_free(hcap, sizeof (ibcm_hca_info_t) +
1127 (hcap->hca_num_ports - 1) *
1128 sizeof (ibcm_port_info_t));
1129 return;
1130 }
1131
1132 prevp = headp;
1133 headp = headp->hca_next;
1134 }
1135 }
1136
1137 /*
1138 * ibcm_find_hca_entry:
1139 * Given a HCA's GUID find out ibcm_hca_info_t entry for that HCA
1140 * This entry can be then used to access AVL tree/SIDR list etc.
1141 * If entry exists and in HCA ATTACH state, then hca's ref cnt is
1142 * incremented and entry returned. Else NULL returned.
1143 *
1144 * All functions that use ibcm_find_hca_entry and get a non-NULL
1145 * return values must call ibcm_dec_hca_acc_cnt to decrement the
1146 * respective hca ref cnt. There shouldn't be any usage of
1147 * ibcm_hca_info_t * returned from ibcm_find_hca_entry,
1148 * after decrementing the hca_acc_cnt
1149 *
1150 * INPUTS:
1151 * hca_guid - HCA's guid
1152 *
1153 * RETURN VALUE:
1154 * hcap - if a match is found, else NULL
1155 */
1156 ibcm_hca_info_t *
1157 ibcm_find_hca_entry(ib_guid_t hca_guid)
1158 {
1159 ibcm_hca_info_t *hcap;
1160
1161 IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: guid = 0x%llX", hca_guid);
1162
1163 mutex_enter(&ibcm_global_hca_lock);
1164
1165 hcap = ibcm_hca_listp;
1166 /* search for this HCA */
1167 while (hcap != NULL) {
1168 if (hcap->hca_guid == hca_guid)
1169 break;
1170 hcap = hcap->hca_next;
1171 }
1172
1173 /* if no hcap for the hca_guid, return NULL */
1174 if (hcap == NULL) {
1175 mutex_exit(&ibcm_global_hca_lock);
1176 return (NULL);
1177 }
1178
1179 /* return hcap, only if it valid to use */
1180 if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1181 ++(hcap->hca_acc_cnt);
1182
1183 IBTF_DPRINTF_L5(cmlog, "ibcm_find_hca_entry: "
1184 "found hcap = 0x%p hca_acc_cnt %u", hcap,
1185 hcap->hca_acc_cnt);
1186
1187 mutex_exit(&ibcm_global_hca_lock);
1188 return (hcap);
1189 } else {
1190 mutex_exit(&ibcm_global_hca_lock);
1191
1192 IBTF_DPRINTF_L2(cmlog, "ibcm_find_hca_entry: "
1193 "found hcap = 0x%p not in active state", hcap);
1194 return (NULL);
1195 }
1196 }
1197
1198 /*
1199 * Searches for ibcm_hca_info_t entry based on hca_guid, but doesn't increment
1200 * the hca's reference count. This function is used, where the calling context
1201 * is attempting to delete hcap itself and hence acc_cnt cannot be incremented
1202 * OR assumes that valid hcap must be available in ibcm's global hca list.
1203 */
1204 ibcm_hca_info_t *
1205 ibcm_find_hcap_entry(ib_guid_t hca_guid)
1206 {
1207 ibcm_hca_info_t *hcap;
1208
1209 IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: guid = 0x%llX", hca_guid);
1210
1211 ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
1212
1213 hcap = ibcm_hca_listp;
1214 /* search for this HCA */
1215 while (hcap != NULL) {
1216 if (hcap->hca_guid == hca_guid)
1217 break;
1218 hcap = hcap->hca_next;
1219 }
1220
1221 if (hcap == NULL)
1222 IBTF_DPRINTF_L2(cmlog, "ibcm_find_hcap_entry: No hcap found for"
1223 " hca_guid 0x%llX", hca_guid);
1224 else
1225 IBTF_DPRINTF_L5(cmlog, "ibcm_find_hcap_entry: hcap found for"
1226 " hca_guid 0x%llX", hca_guid);
1227
1228 return (hcap);
1229 }
1230
1231 /* increment the hca's temporary reference count */
1232 ibcm_status_t
1233 ibcm_inc_hca_acc_cnt(ibcm_hca_info_t *hcap)
1234 {
1235 mutex_enter(&ibcm_global_hca_lock);
1236 if (hcap->hca_state == IBCM_HCA_ACTIVE) {
1237 ++(hcap->hca_acc_cnt);
1238 IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_acc_cnt: "
1239 "hcap = 0x%p acc_cnt = %d ", hcap, hcap->hca_acc_cnt);
1240 mutex_exit(&ibcm_global_hca_lock);
1241 return (IBCM_SUCCESS);
1242 } else {
1243 IBTF_DPRINTF_L2(cmlog, "ibcm_inc_hca_acc_cnt: "
1244 "hcap INACTIVE 0x%p acc_cnt = %d ", hcap,
1245 hcap->hca_acc_cnt);
1246 mutex_exit(&ibcm_global_hca_lock);
1247 return (IBCM_FAILURE);
1248 }
1249 }
1250
1251 /* decrement the hca's ref count, and wake up any waiting threads */
1252 void
1253 ibcm_dec_hca_acc_cnt(ibcm_hca_info_t *hcap)
1254 {
1255 mutex_enter(&ibcm_global_hca_lock);
1256 ASSERT(hcap->hca_acc_cnt > 0);
1257 --(hcap->hca_acc_cnt);
1258 IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_acc_cnt: hcap = 0x%p "
1259 "acc_cnt = %d", hcap, hcap->hca_acc_cnt);
1260 if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1261 (hcap->hca_acc_cnt == 0)) {
1262 IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_acc_cnt: "
1263 "cv_broadcast for hcap = 0x%p", hcap);
1264 cv_broadcast(&ibcm_global_hca_cv);
1265 }
1266 mutex_exit(&ibcm_global_hca_lock);
1267 }
1268
1269 /* increment the hca's resource count */
1270 void
1271 ibcm_inc_hca_res_cnt(ibcm_hca_info_t *hcap)
1272
1273 {
1274 mutex_enter(&ibcm_global_hca_lock);
1275 ++(hcap->hca_res_cnt);
1276 IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_res_cnt: hcap = 0x%p "
1277 "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1278 mutex_exit(&ibcm_global_hca_lock);
1279 }
1280
1281 /* decrement the hca's resource count, and wake up any waiting threads */
1282 void
1283 ibcm_dec_hca_res_cnt(ibcm_hca_info_t *hcap)
1284 {
1285 mutex_enter(&ibcm_global_hca_lock);
1286 ASSERT(hcap->hca_res_cnt > 0);
1287 --(hcap->hca_res_cnt);
1288 IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_res_cnt: hcap = 0x%p "
1289 "ref_cnt = %d", hcap, hcap->hca_res_cnt);
1290 if ((hcap->hca_state == IBCM_HCA_NOT_ACTIVE) &&
1291 (hcap->hca_res_cnt == 0)) {
1292 IBTF_DPRINTF_L3(cmlog, "ibcm_dec_hca_res_cnt: "
1293 "cv_broadcast for hcap = 0x%p", hcap);
1294 cv_broadcast(&ibcm_global_hca_cv);
1295 }
1296 mutex_exit(&ibcm_global_hca_lock);
1297 }
1298
1299 /* increment the hca's service count */
1300 void
1301 ibcm_inc_hca_svc_cnt(ibcm_hca_info_t *hcap)
1302
1303 {
1304 mutex_enter(&ibcm_global_hca_lock);
1305 ++(hcap->hca_svc_cnt);
1306 IBTF_DPRINTF_L5(cmlog, "ibcm_inc_hca_svc_cnt: hcap = 0x%p "
1307 "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1308 mutex_exit(&ibcm_global_hca_lock);
1309 }
1310
1311 /* decrement the hca's service count */
1312 void
1313 ibcm_dec_hca_svc_cnt(ibcm_hca_info_t *hcap)
1314 {
1315 mutex_enter(&ibcm_global_hca_lock);
1316 ASSERT(hcap->hca_svc_cnt > 0);
1317 --(hcap->hca_svc_cnt);
1318 IBTF_DPRINTF_L5(cmlog, "ibcm_dec_hca_svc_cnt: hcap = 0x%p "
1319 "svc_cnt = %d", hcap, hcap->hca_svc_cnt);
1320 mutex_exit(&ibcm_global_hca_lock);
1321 }
1322
1323 /*
1324 * The following code manages three classes of requests that CM makes to
1325 * the fabric. Those three classes are SA_ACCESS, REQ/REP/RTU, and DREQ/DREP.
1326 * The main issue is that the fabric can become very busy, and the CM
1327 * protocols rely on responses being made based on a predefined timeout
1328 * value. By managing how many simultaneous sessions are allowed, there
1329 * is observed extremely high reliability of CM protocol succeeding when
1330 * it should.
1331 *
1332 * SA_ACCESS and DREQ/DREP are managed at the thread level, whereby the
1333 * thread blocks until there are less than some number of threads doing
1334 * similar requests.
1335 *
1336 * REQ/REP/RTU requests beyond a given limit are added to a list,
1337 * allowing the thread to return immediately to its caller in the
1338 * case where the "mode" is IBT_NONBLOCKING. This is the mode used
1339 * by uDAPL and seems to be an important feature/behavior.
1340 */
1341
1342 static int
1343 ibcm_ok_to_start(struct ibcm_open_s *openp)
1344 {
1345 return (openp->sends < openp->sends_hiwat &&
1346 openp->in_progress < openp->in_progress_max);
1347 }
1348
1349 void
1350 ibcm_open_done(ibcm_state_data_t *statep)
1351 {
1352 int run;
1353 ibcm_state_data_t **linkp, *tmp;
1354
1355 ASSERT(MUTEX_HELD(&statep->state_mutex));
1356 if (statep->open_flow == 1) {
1357 statep->open_flow = 0;
1358 mutex_enter(&ibcm_open.mutex);
1359 if (statep->open_link == NULL) {
1360 ibcm_open.in_progress--;
1361 run = ibcm_ok_to_start(&ibcm_open);
1362 } else {
1363 ibcm_open.queued--;
1364 linkp = &ibcm_open.head.open_link;
1365 while (*linkp != statep)
1366 linkp = &((*linkp)->open_link);
1367 *linkp = statep->open_link;
1368 statep->open_link = NULL;
1369 /*
1370 * If we remove what tail pointed to, we need
1371 * to reassign tail (it is never NULL).
1372 * tail points to head for the empty list.
1373 */
1374 if (ibcm_open.tail == statep) {
1375 tmp = &ibcm_open.head;
1376 while (tmp->open_link != &ibcm_open.head)
1377 tmp = tmp->open_link;
1378 ibcm_open.tail = tmp;
1379 }
1380 run = 0;
1381 }
1382 mutex_exit(&ibcm_open.mutex);
1383 if (run)
1384 ibcm_run_tlist_thread();
1385 }
1386 }
1387
1388 /* dtrace */
1389 void
1390 ibcm_open_wait(hrtime_t delta)
1391 {
1392 if (delta > 1000000)
1393 IBTF_DPRINTF_L2(cmlog, "ibcm_open_wait: flow more %lld", delta);
1394 }
1395
1396 void
1397 ibcm_open_start(ibcm_state_data_t *statep)
1398 {
1399 ibcm_insert_trace(statep, IBCM_TRACE_OUTGOING_REQ);
1400
1401 mutex_enter(&statep->state_mutex);
1402 ibcm_open_wait(gethrtime() - statep->post_time);
1403 mutex_exit(&statep->state_mutex);
1404
1405 ibcm_post_rc_mad(statep, statep->stored_msg, ibcm_post_req_complete,
1406 statep);
1407
1408 mutex_enter(&statep->state_mutex);
1409 IBCM_REF_CNT_DECR(statep);
1410 mutex_exit(&statep->state_mutex);
1411 }
1412
1413 void
1414 ibcm_open_enqueue(ibcm_state_data_t *statep)
1415 {
1416 int run;
1417
1418 mutex_enter(&statep->state_mutex);
1419 statep->post_time = gethrtime();
1420 mutex_exit(&statep->state_mutex);
1421 mutex_enter(&ibcm_open.mutex);
1422 if (ibcm_open.queued == 0 && ibcm_ok_to_start(&ibcm_open)) {
1423 ibcm_open.in_progress++;
1424 mutex_exit(&ibcm_open.mutex);
1425 ibcm_open_start(statep);
1426 } else {
1427 ibcm_open.queued++;
1428 statep->open_link = &ibcm_open.head;
1429 ibcm_open.tail->open_link = statep;
1430 ibcm_open.tail = statep;
1431 run = ibcm_ok_to_start(&ibcm_open);
1432 mutex_exit(&ibcm_open.mutex);
1433 if (run)
1434 ibcm_run_tlist_thread();
1435 }
1436 }
1437
1438 ibcm_state_data_t *
1439 ibcm_open_dequeue(void)
1440 {
1441 ibcm_state_data_t *statep;
1442
1443 ASSERT(MUTEX_HELD(&ibcm_open.mutex));
1444 ibcm_open.queued--;
1445 ibcm_open.in_progress++;
1446 statep = ibcm_open.head.open_link;
1447 ibcm_open.head.open_link = statep->open_link;
1448 statep->open_link = NULL;
1449 /*
1450 * If we remove what tail pointed to, we need
1451 * to reassign tail (it is never NULL).
1452 * tail points to head for the empty list.
1453 */
1454 if (ibcm_open.tail == statep)
1455 ibcm_open.tail = &ibcm_open.head;
1456 return (statep);
1457 }
1458
1459 void
1460 ibcm_check_for_opens(void)
1461 {
1462 ibcm_state_data_t *statep;
1463
1464 mutex_enter(&ibcm_open.mutex);
1465
1466 while (ibcm_open.queued > 0) {
1467 if (ibcm_ok_to_start(&ibcm_open)) {
1468 statep = ibcm_open_dequeue();
1469 mutex_exit(&ibcm_open.mutex);
1470
1471 ibcm_open_start(statep);
1472
1473 mutex_enter(&ibcm_open.mutex);
1474 } else {
1475 break;
1476 }
1477 }
1478 mutex_exit(&ibcm_open.mutex);
1479 }
1480
1481
1482 static void
1483 ibcm_flow_init(ibcm_flow_t *flow, uint_t simul_max)
1484 {
1485 flow->list = NULL;
1486 flow->simul = 0;
1487 flow->waiters_per_chunk = 4;
1488 flow->simul_max = simul_max;
1489 flow->lowat = simul_max - flow->waiters_per_chunk;
1490 flow->lowat_default = flow->lowat;
1491 /* stats */
1492 flow->total = 0;
1493 }
1494
1495 static void
1496 ibcm_rc_flow_control_init(void)
1497 {
1498 mutex_init(&ibcm_open.mutex, NULL, MUTEX_DEFAULT, NULL);
1499 mutex_enter(&ibcm_open.mutex);
1500 ibcm_flow_init(&ibcm_close_flow, ibcm_close_simul_max);
1501 ibcm_flow_init(&ibcm_lapr_flow, ibcm_lapr_simul_max);
1502 ibcm_flow_init(&ibcm_saa_flow, ibcm_saa_simul_max);
1503
1504 ibcm_open.queued = 0;
1505 ibcm_open.exit_deferred = 0;
1506 ibcm_open.in_progress = 0;
1507 ibcm_open.in_progress_max = 16;
1508 ibcm_open.sends = 0;
1509 ibcm_open.sends_max = 0;
1510 ibcm_open.sends_lowat = 8;
1511 ibcm_open.sends_hiwat = 16;
1512 ibcm_open.tail = &ibcm_open.head;
1513 ibcm_open.head.open_link = NULL;
1514 mutex_exit(&ibcm_open.mutex);
1515
1516 mutex_init(&ibcm_close.mutex, NULL, MUTEX_DEFAULT, NULL);
1517 mutex_enter(&ibcm_close.mutex);
1518 ibcm_close.tail = &ibcm_close.head;
1519 ibcm_close.head.close_link = NULL;
1520 mutex_exit(&ibcm_close.mutex);
1521 }
1522
1523 static void
1524 ibcm_rc_flow_control_fini(void)
1525 {
1526 mutex_destroy(&ibcm_open.mutex);
1527 mutex_destroy(&ibcm_close.mutex);
1528 }
1529
1530 static ibcm_flow1_t *
1531 ibcm_flow_find(ibcm_flow_t *flow)
1532 {
1533 ibcm_flow1_t *flow1;
1534 ibcm_flow1_t *f;
1535
1536 f = flow->list;
1537 if (f) { /* most likely code path */
1538 while (f->link != NULL)
1539 f = f->link;
1540 if (f->waiters < flow->waiters_per_chunk)
1541 return (f);
1542 }
1543
1544 /* There was no flow1 list element ready for another waiter */
1545 mutex_exit(&ibcm_open.mutex);
1546 flow1 = kmem_alloc(sizeof (*flow1), KM_SLEEP);
1547 mutex_enter(&ibcm_open.mutex);
1548
1549 f = flow->list;
1550 if (f) {
1551 while (f->link != NULL)
1552 f = f->link;
1553 if (f->waiters < flow->waiters_per_chunk) {
1554 kmem_free(flow1, sizeof (*flow1));
1555 return (f);
1556 }
1557 f->link = flow1;
1558 } else {
1559 flow->list = flow1;
1560 }
1561 cv_init(&flow1->cv, NULL, CV_DRIVER, NULL);
1562 flow1->waiters = 0;
1563 flow1->link = NULL;
1564 return (flow1);
1565 }
1566
1567 static void
1568 ibcm_flow_enter(ibcm_flow_t *flow)
1569 {
1570 mutex_enter(&ibcm_open.mutex);
1571 if (flow->list == NULL && flow->simul < flow->simul_max) {
1572 flow->simul++;
1573 flow->total++;
1574 mutex_exit(&ibcm_open.mutex);
1575 } else {
1576 ibcm_flow1_t *flow1;
1577
1578 flow1 = ibcm_flow_find(flow);
1579 flow1->waiters++;
1580 cv_wait(&flow1->cv, &ibcm_open.mutex);
1581 if (--flow1->waiters == 0) {
1582 cv_destroy(&flow1->cv);
1583 mutex_exit(&ibcm_open.mutex);
1584 kmem_free(flow1, sizeof (*flow1));
1585 } else
1586 mutex_exit(&ibcm_open.mutex);
1587 }
1588 }
1589
1590 static void
1591 ibcm_flow_exit(ibcm_flow_t *flow)
1592 {
1593 mutex_enter(&ibcm_open.mutex);
1594 if (--flow->simul < flow->lowat) {
1595 if (flow->lowat < flow->lowat_default)
1596 flow->lowat++;
1597 if (flow->list) {
1598 ibcm_flow1_t *flow1;
1599
1600 flow1 = flow->list;
1601 flow->list = flow1->link; /* unlink */
1602 flow1->link = NULL; /* be clean */
1603 flow->total += flow1->waiters;
1604 flow->simul += flow1->waiters;
1605 cv_broadcast(&flow1->cv);
1606 }
1607 }
1608 mutex_exit(&ibcm_open.mutex);
1609 }
1610
1611 void
1612 ibcm_flow_inc(void)
1613 {
1614 mutex_enter(&ibcm_open.mutex);
1615 if (++ibcm_open.sends > ibcm_open.sends_max) {
1616 ibcm_open.sends_max = ibcm_open.sends;
1617 IBTF_DPRINTF_L2(cmlog, "ibcm_flow_inc: sends max = %d",
1618 ibcm_open.sends_max);
1619 }
1620 mutex_exit(&ibcm_open.mutex);
1621 }
1622
1623 static void
1624 ibcm_check_send_cmpltn_time(hrtime_t delta, char *event_msg)
1625 {
1626 if (delta > 4000000LL) {
1627 IBTF_DPRINTF_L2(cmlog, "ibcm_check_send_cmpltn_time: "
1628 "%s: %lldns", event_msg, delta);
1629 }
1630 }
1631
1632 void
1633 ibcm_flow_dec(hrtime_t time, char *mad_type)
1634 {
1635 int flow_exit = 0;
1636 int run = 0;
1637
1638 if (ibcm_dtrace)
1639 ibcm_check_send_cmpltn_time(gethrtime() - time, mad_type);
1640 mutex_enter(&ibcm_open.mutex);
1641 ibcm_open.sends--;
1642 if (ibcm_open.sends < ibcm_open.sends_lowat) {
1643 run = ibcm_ok_to_start(&ibcm_open);
1644 if (ibcm_open.exit_deferred) {
1645 ibcm_open.exit_deferred--;
1646 flow_exit = 1;
1647 }
1648 }
1649 mutex_exit(&ibcm_open.mutex);
1650 if (flow_exit)
1651 ibcm_flow_exit(&ibcm_close_flow);
1652 if (run)
1653 ibcm_run_tlist_thread();
1654 }
1655
1656 void
1657 ibcm_close_enqueue(ibcm_state_data_t *statep)
1658 {
1659 mutex_enter(&ibcm_close.mutex);
1660 statep->close_link = NULL;
1661 ibcm_close.tail->close_link = statep;
1662 ibcm_close.tail = statep;
1663 mutex_exit(&ibcm_close.mutex);
1664 ibcm_run_tlist_thread();
1665 }
1666
1667 void
1668 ibcm_check_for_async_close()
1669 {
1670 ibcm_state_data_t *statep;
1671
1672 mutex_enter(&ibcm_close.mutex);
1673
1674 while (ibcm_close.head.close_link) {
1675 statep = ibcm_close.head.close_link;
1676 ibcm_close.head.close_link = statep->close_link;
1677 statep->close_link = NULL;
1678 if (ibcm_close.tail == statep)
1679 ibcm_close.tail = &ibcm_close.head;
1680 mutex_exit(&ibcm_close.mutex);
1681 ibcm_close_start(statep);
1682 mutex_enter(&ibcm_close.mutex);
1683 }
1684 mutex_exit(&ibcm_close.mutex);
1685 }
1686
1687 void
1688 ibcm_close_enter(void)
1689 {
1690 ibcm_flow_enter(&ibcm_close_flow);
1691 }
1692
1693 void
1694 ibcm_close_exit(void)
1695 {
1696 int flow_exit;
1697
1698 mutex_enter(&ibcm_open.mutex);
1699 if (ibcm_open.sends < ibcm_open.sends_lowat ||
1700 ibcm_open.exit_deferred >= 4)
1701 flow_exit = 1;
1702 else {
1703 flow_exit = 0;
1704 ibcm_open.exit_deferred++;
1705 }
1706 mutex_exit(&ibcm_open.mutex);
1707 if (flow_exit)
1708 ibcm_flow_exit(&ibcm_close_flow);
1709 }
1710
1711 /*
1712 * This function needs to be called twice to finish our flow
1713 * control accounting when closing down a connection. One
1714 * call has send_done set to 1, while the other has it set to 0.
1715 * Because of retries, this could get called more than once
1716 * with either 0 or 1, but additional calls have no effect.
1717 */
1718 void
1719 ibcm_close_done(ibcm_state_data_t *statep, int send_done)
1720 {
1721 int flow_exit;
1722
1723 ASSERT(MUTEX_HELD(&statep->state_mutex));
1724 if (statep->close_flow == 1) {
1725 if (send_done)
1726 statep->close_flow = 3;
1727 else
1728 statep->close_flow = 2;
1729 } else if ((send_done && statep->close_flow == 2) ||
1730 (!send_done && statep->close_flow == 3)) {
1731 statep->close_flow = 0;
1732 mutex_enter(&ibcm_open.mutex);
1733 if (ibcm_open.sends < ibcm_open.sends_lowat ||
1734 ibcm_open.exit_deferred >= 4)
1735 flow_exit = 1;
1736 else {
1737 flow_exit = 0;
1738 ibcm_open.exit_deferred++;
1739 }
1740 mutex_exit(&ibcm_open.mutex);
1741 if (flow_exit)
1742 ibcm_flow_exit(&ibcm_close_flow);
1743 }
1744 }
1745
1746 void
1747 ibcm_lapr_enter(void)
1748 {
1749 ibcm_flow_enter(&ibcm_lapr_flow);
1750 }
1751
1752 void
1753 ibcm_lapr_exit(void)
1754 {
1755 ibcm_flow_exit(&ibcm_lapr_flow);
1756 }
1757
1758 void
1759 ibcm_sa_access_enter()
1760 {
1761 ibcm_flow_enter(&ibcm_saa_flow);
1762 }
1763
1764 void
1765 ibcm_sa_access_exit()
1766 {
1767 ibcm_flow_exit(&ibcm_saa_flow);
1768 }
1769
1770 static void
1771 ibcm_sm_notice_handler(ibmf_saa_handle_t saa_handle,
1772 ibmf_saa_subnet_event_t saa_event_code,
1773 ibmf_saa_event_details_t *saa_event_details,
1774 void *callback_arg)
1775 {
1776 ibcm_port_info_t *portp = (ibcm_port_info_t *)callback_arg;
1777 ibt_subnet_event_code_t code;
1778 ibt_subnet_event_t event;
1779 uint8_t event_status;
1780
1781 IBTF_DPRINTF_L3(cmlog, "ibcm_sm_notice_handler: saa_hdl %p, code = %d",
1782 saa_handle, saa_event_code);
1783
1784 mutex_enter(&ibcm_sm_notice_serialize_lock);
1785
1786 switch (saa_event_code) {
1787 case IBMF_SAA_EVENT_MCG_CREATED:
1788 code = IBT_SM_EVENT_MCG_CREATED;
1789 break;
1790 case IBMF_SAA_EVENT_MCG_DELETED:
1791 code = IBT_SM_EVENT_MCG_DELETED;
1792 break;
1793 case IBMF_SAA_EVENT_GID_AVAILABLE:
1794 code = IBT_SM_EVENT_GID_AVAIL;
1795 ibcm_path_cache_purge();
1796 break;
1797 case IBMF_SAA_EVENT_GID_UNAVAILABLE:
1798 code = IBT_SM_EVENT_GID_UNAVAIL;
1799 ibcm_path_cache_purge();
1800 break;
1801 case IBMF_SAA_EVENT_SUBSCRIBER_STATUS_CHG:
1802 event_status =
1803 saa_event_details->ie_producer_event_status_mask &
1804 IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
1805 if (event_status == (portp->port_event_status &
1806 IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)) {
1807 mutex_exit(&ibcm_sm_notice_serialize_lock);
1808 return; /* no change */
1809 }
1810 portp->port_event_status = event_status;
1811 if (event_status == IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM)
1812 code = IBT_SM_EVENT_AVAILABLE;
1813 else
1814 code = IBT_SM_EVENT_UNAVAILABLE;
1815 break;
1816 default:
1817 mutex_exit(&ibcm_sm_notice_serialize_lock);
1818 return;
1819 }
1820
1821 mutex_enter(&ibcm_global_hca_lock);
1822
1823 /* don't send the event if we're tearing down */
1824 if (!IBCM_ACCESS_HCA_OK(portp->port_hcap)) {
1825 mutex_exit(&ibcm_global_hca_lock);
1826 mutex_exit(&ibcm_sm_notice_serialize_lock);
1827 return;
1828 }
1829
1830 ++(portp->port_hcap->hca_acc_cnt);
1831 mutex_exit(&ibcm_global_hca_lock);
1832
1833 event.sm_notice_gid = saa_event_details->ie_gid;
1834 ibtl_cm_sm_notice_handler(portp->port_sgid0, code, &event);
1835
1836 mutex_exit(&ibcm_sm_notice_serialize_lock);
1837
1838 ibcm_dec_hca_acc_cnt(portp->port_hcap);
1839 }
1840
1841 void
1842 ibt_register_subnet_notices(ibt_clnt_hdl_t ibt_hdl,
1843 ibt_sm_notice_handler_t sm_notice_handler, void *private)
1844 {
1845 ibcm_port_info_t *portp;
1846 ibcm_hca_info_t *hcap;
1847 uint8_t port;
1848 int num_failed_sgids;
1849 ibtl_cm_sm_init_fail_t *ifail;
1850 ib_gid_t *sgidp;
1851
1852 IBTF_DPRINTF_L3(cmlog, "ibt_register_subnet_notices(%p, %s)",
1853 ibt_hdl, ibtl_cm_get_clnt_name(ibt_hdl));
1854
1855 mutex_enter(&ibcm_sm_notice_serialize_lock);
1856
1857 ibtl_cm_set_sm_notice_handler(ibt_hdl, sm_notice_handler, private);
1858 if (sm_notice_handler == NULL) {
1859 mutex_exit(&ibcm_sm_notice_serialize_lock);
1860 return;
1861 }
1862
1863 /* for each port, if service is not available, make a call */
1864 mutex_enter(&ibcm_global_hca_lock);
1865 num_failed_sgids = 0;
1866 hcap = ibcm_hca_listp;
1867 while (hcap != NULL) {
1868 portp = hcap->hca_port_info;
1869 for (port = 0; port < hcap->hca_num_ports; port++) {
1870 if (!(portp->port_event_status &
1871 IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1872 num_failed_sgids++;
1873 portp++;
1874 }
1875 hcap = hcap->hca_next;
1876 }
1877 if (num_failed_sgids != 0) {
1878 ifail = kmem_alloc(sizeof (*ifail) +
1879 (num_failed_sgids - 1) * sizeof (ib_gid_t), KM_SLEEP);
1880 ifail->smf_num_sgids = num_failed_sgids;
1881 ifail->smf_ibt_hdl = ibt_hdl;
1882 sgidp = &ifail->smf_sgid[0];
1883 hcap = ibcm_hca_listp;
1884 while (hcap != NULL) {
1885 portp = hcap->hca_port_info;
1886 for (port = 0; port < hcap->hca_num_ports; port++) {
1887 if (!(portp->port_event_status &
1888 IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM))
1889 *sgidp++ = portp->port_sgid0;
1890 portp++;
1891 }
1892 hcap = hcap->hca_next;
1893 }
1894 }
1895 mutex_exit(&ibcm_global_hca_lock);
1896
1897 if (num_failed_sgids != 0) {
1898 ibtl_cm_sm_notice_init_failure(ifail);
1899 kmem_free(ifail, sizeof (*ifail) +
1900 (num_failed_sgids - 1) * sizeof (ib_gid_t));
1901 }
1902 mutex_exit(&ibcm_sm_notice_serialize_lock);
1903 }
1904
1905 /* The following is run from a taskq because we've seen the stack overflow. */
1906 static void
1907 ibcm_init_saa(void *arg)
1908 {
1909 ibcm_port_info_t *portp = (ibcm_port_info_t *)arg;
1910 int status;
1911 ib_guid_t port_guid;
1912 ibmf_saa_subnet_event_args_t event_args;
1913
1914 port_guid = portp->port_sgid0.gid_guid;
1915
1916 IBTF_DPRINTF_L3(cmlog, "ibcm_init_saa: port guid %llX", port_guid);
1917
1918 event_args.is_event_callback_arg = portp;
1919 event_args.is_event_callback = ibcm_sm_notice_handler;
1920
1921 if ((status = ibmf_sa_session_open(port_guid, 0, &event_args,
1922 IBMF_VERSION, 0, &portp->port_ibmf_saa_hdl)) != IBMF_SUCCESS) {
1923 IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
1924 "ibmf_sa_session_open failed for port guid %llX "
1925 "status = %d", port_guid, status);
1926 } else {
1927 IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa: "
1928 "registered sa_hdl 0x%p for port guid %llX",
1929 portp->port_ibmf_saa_hdl, port_guid);
1930 }
1931
1932 mutex_enter(&ibcm_sa_open_lock);
1933 portp->port_saa_open_in_progress = 0;
1934 cv_broadcast(&ibcm_sa_open_cv);
1935 mutex_exit(&ibcm_sa_open_lock);
1936 }
1937
1938 void
1939 ibcm_init_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
1940 {
1941 ibmf_saa_handle_t saa_handle;
1942 uint8_t port_index = port - 1;
1943 ibcm_port_info_t *portp = &hcap->hca_port_info[port_index];
1944 ibt_status_t ibt_status;
1945
1946 if (port_index >= hcap->hca_num_ports)
1947 return;
1948
1949 mutex_enter(&ibcm_sa_open_lock);
1950 if (portp->port_saa_open_in_progress) {
1951 mutex_exit(&ibcm_sa_open_lock);
1952 return;
1953 }
1954
1955 saa_handle = portp->port_ibmf_saa_hdl;
1956 if (saa_handle != NULL) {
1957 mutex_exit(&ibcm_sa_open_lock);
1958 return;
1959 }
1960
1961 portp->port_saa_open_in_progress = 1;
1962 mutex_exit(&ibcm_sa_open_lock);
1963
1964 /* The assumption is that we're getting event notifications */
1965 portp->port_event_status = IBMF_SAA_EVENT_STATUS_MASK_PRODUCER_SM;
1966
1967 ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
1968 portp->port_num, &portp->port_sgid0, NULL);
1969 if (ibt_status != IBT_SUCCESS) {
1970 IBTF_DPRINTF_L2(cmlog, "ibcm_init_saa_handle: "
1971 "ibt_get_port_state_byguid failed for guid %llX "
1972 "with status %d", portp->port_hcap->hca_guid, ibt_status);
1973 mutex_enter(&ibcm_sa_open_lock);
1974 portp->port_saa_open_in_progress = 0;
1975 cv_broadcast(&ibcm_sa_open_cv);
1976 mutex_exit(&ibcm_sa_open_lock);
1977 return;
1978 }
1979 /* if the port is UP, try sa_session_open */
1980 (void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
1981 }
1982
1983
1984 ibmf_saa_handle_t
1985 ibcm_get_saa_handle(ibcm_hca_info_t *hcap, uint8_t port)
1986 {
1987 ibmf_saa_handle_t saa_handle;
1988 uint8_t port_index = port - 1;
1989 ibcm_port_info_t *portp = &hcap->hca_port_info[port_index];
1990 ibt_status_t ibt_status;
1991
1992 if (port_index >= hcap->hca_num_ports)
1993 return (NULL);
1994
1995 mutex_enter(&ibcm_sa_open_lock);
1996 while (portp->port_saa_open_in_progress) {
1997 cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
1998 }
1999
2000 saa_handle = portp->port_ibmf_saa_hdl;
2001 if (saa_handle != NULL) {
2002 mutex_exit(&ibcm_sa_open_lock);
2003 return (saa_handle);
2004 }
2005
2006 portp->port_saa_open_in_progress = 1;
2007 mutex_exit(&ibcm_sa_open_lock);
2008
2009 ibt_status = ibt_get_port_state_byguid(portp->port_hcap->hca_guid,
2010 portp->port_num, &portp->port_sgid0, NULL);
2011 if (ibt_status != IBT_SUCCESS) {
2012 IBTF_DPRINTF_L2(cmlog, "ibcm_get_saa_handle: "
2013 "ibt_get_port_state_byguid failed for guid %llX "
2014 "with status %d", portp->port_hcap->hca_guid, ibt_status);
2015 mutex_enter(&ibcm_sa_open_lock);
2016 portp->port_saa_open_in_progress = 0;
2017 cv_broadcast(&ibcm_sa_open_cv);
2018 mutex_exit(&ibcm_sa_open_lock);
2019 return (NULL);
2020 }
2021 /* if the port is UP, try sa_session_open */
2022 (void) taskq_dispatch(ibcm_taskq, ibcm_init_saa, portp, TQ_SLEEP);
2023
2024 mutex_enter(&ibcm_sa_open_lock);
2025 while (portp->port_saa_open_in_progress) {
2026 cv_wait(&ibcm_sa_open_cv, &ibcm_sa_open_lock);
2027 }
2028 saa_handle = portp->port_ibmf_saa_hdl;
2029 mutex_exit(&ibcm_sa_open_lock);
2030 return (saa_handle);
2031 }
2032
2033
2034 /*
2035 * ibcm_hca_init_port():
2036 * - Register port with IBMA
2037 *
2038 * Arguments:
2039 * hcap - HCA's guid
2040 * port_index - port number minus 1
2041 *
2042 * Return values:
2043 * IBCM_SUCCESS - success
2044 */
2045 ibt_status_t
2046 ibcm_hca_init_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2047 {
2048 int status;
2049 ibmf_register_info_t *ibmf_reg;
2050
2051 IBTF_DPRINTF_L4(cmlog, "ibcm_hca_init_port: hcap = 0x%p port_num %d",
2052 hcap, port_index + 1);
2053
2054 ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2055
2056 if (hcap->hca_port_info[port_index].port_ibmf_hdl == NULL) {
2057 /* Register with IBMF */
2058 ibmf_reg = &hcap->hca_port_info[port_index].port_ibmf_reg;
2059 ibmf_reg->ir_ci_guid = hcap->hca_guid;
2060 ibmf_reg->ir_port_num = port_index + 1;
2061 ibmf_reg->ir_client_class = COMM_MGT_MANAGER_AGENT;
2062
2063 /*
2064 * register with management framework
2065 */
2066 status = ibmf_register(ibmf_reg, IBMF_VERSION,
2067 IBMF_REG_FLAG_NO_OFFLOAD, NULL, NULL,
2068 &(hcap->hca_port_info[port_index].port_ibmf_hdl),
2069 &(hcap->hca_port_info[port_index].port_ibmf_caps));
2070
2071 if (status != IBMF_SUCCESS) {
2072 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_init_port: "
2073 "ibmf_register failed for port_num %x, "
2074 "status = %x", port_index + 1, status);
2075 return (ibcm_ibmf_analyze_error(status));
2076 }
2077
2078 hcap->hca_port_info[port_index].port_qp1.qp_cm =
2079 IBMF_QP_HANDLE_DEFAULT;
2080 hcap->hca_port_info[port_index].port_qp1.qp_port =
2081 &(hcap->hca_port_info[port_index]);
2082
2083 /*
2084 * Register the read callback with IBMF.
2085 * Since we just did an ibmf_register, handle is
2086 * valid and ibcm_recv_cb() is valid so we can
2087 * safely assert for success of ibmf_setup_recv_cb()
2088 *
2089 * Depending on the "state" of the HCA,
2090 * CM may drop incoming packets
2091 */
2092 status = ibmf_setup_async_cb(
2093 hcap->hca_port_info[port_index].port_ibmf_hdl,
2094 IBMF_QP_HANDLE_DEFAULT, ibcm_recv_cb,
2095 &(hcap->hca_port_info[port_index].port_qp1), 0);
2096 ASSERT(status == IBMF_SUCCESS);
2097
2098 IBTF_DPRINTF_L5(cmlog, "ibcm_hca_init_port: "
2099 "IBMF hdl[%x] = 0x%p", port_index,
2100 hcap->hca_port_info[port_index].port_ibmf_hdl);
2101
2102 /* Attempt to get the saa_handle for this port */
2103 ibcm_init_saa_handle(hcap, port_index + 1);
2104 }
2105
2106 return (IBT_SUCCESS);
2107 }
2108
2109 /*
2110 * useful, to re attempt to initialize port ibma handles from elsewhere in
2111 * cm code
2112 */
2113 ibt_status_t
2114 ibcm_hca_reinit_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2115 {
2116 ibt_status_t status;
2117
2118 IBTF_DPRINTF_L5(cmlog, "ibcm_hca_reinit_port: hcap 0x%p port_num %d",
2119 hcap, port_index + 1);
2120
2121 mutex_enter(&ibcm_global_hca_lock);
2122 status = ibcm_hca_init_port(hcap, port_index);
2123 mutex_exit(&ibcm_global_hca_lock);
2124 return (status);
2125 }
2126
2127
2128 /*
2129 * ibcm_hca_fini_port():
2130 * - Deregister port with IBMA
2131 *
2132 * Arguments:
2133 * hcap - HCA's guid
2134 * port_index - port number minus 1
2135 *
2136 * Return values:
2137 * IBCM_SUCCESS - success
2138 */
2139 static ibcm_status_t
2140 ibcm_hca_fini_port(ibcm_hca_info_t *hcap, uint8_t port_index)
2141 {
2142 int ibmf_status;
2143 ibcm_status_t ibcm_status;
2144
2145 IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: hcap = 0x%p port_num %d ",
2146 hcap, port_index + 1);
2147
2148 ASSERT(MUTEX_HELD(&ibcm_global_hca_lock));
2149
2150 if (hcap->hca_port_info[port_index].port_ibmf_saa_hdl != NULL) {
2151 IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2152 "ibmf_sa_session_close IBMF SAA hdl %p",
2153 hcap->hca_port_info[port_index].port_ibmf_saa_hdl);
2154
2155 ibmf_status = ibmf_sa_session_close(
2156 &hcap->hca_port_info[port_index].port_ibmf_saa_hdl, 0);
2157 if (ibmf_status != IBMF_SUCCESS) {
2158 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2159 "ibmf_sa_session_close of port %d returned %x",
2160 port_index + 1, ibmf_status);
2161 return (IBCM_FAILURE);
2162 }
2163 }
2164
2165 if (hcap->hca_port_info[port_index].port_ibmf_hdl != NULL) {
2166 IBTF_DPRINTF_L5(cmlog, "ibcm_hca_fini_port: "
2167 "ibmf_unregister IBMF Hdl %p",
2168 hcap->hca_port_info[port_index].port_ibmf_hdl);
2169
2170 /* clean-up all the ibmf qp's allocated on this port */
2171 ibcm_status = ibcm_free_allqps(hcap, port_index + 1);
2172
2173 if (ibcm_status != IBCM_SUCCESS) {
2174
2175 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2176 "ibcm_free_allqps failed for port_num %d",
2177 port_index + 1);
2178 return (IBCM_FAILURE);
2179 }
2180
2181 /* Tear down the receive callback */
2182 ibmf_status = ibmf_tear_down_async_cb(
2183 hcap->hca_port_info[port_index].port_ibmf_hdl,
2184 IBMF_QP_HANDLE_DEFAULT, 0);
2185
2186 if (ibmf_status != IBMF_SUCCESS) {
2187 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2188 "ibmf_tear_down_async_cb failed %d port_num %d",
2189 ibmf_status, port_index + 1);
2190 return (IBCM_FAILURE);
2191 }
2192
2193 /* Now, unregister with IBMF */
2194 ibmf_status = ibmf_unregister(
2195 &hcap->hca_port_info[port_index].port_ibmf_hdl, 0);
2196 IBTF_DPRINTF_L4(cmlog, "ibcm_hca_fini_port: "
2197 "ibmf_unregister of port_num %x returned %x",
2198 port_index + 1, ibmf_status);
2199
2200 if (ibmf_status == IBMF_SUCCESS)
2201 hcap->hca_port_info[port_index].port_ibmf_hdl = NULL;
2202 else {
2203 IBTF_DPRINTF_L2(cmlog, "ibcm_hca_fini_port: "
2204 "ibmf_unregister failed %d port_num %d",
2205 ibmf_status, port_index + 1);
2206 return (IBCM_FAILURE);
2207 }
2208 }
2209 return (IBCM_SUCCESS);
2210 }
2211
2212 /*
2213 * ibcm_comm_est_handler():
2214 * Check if the given channel is in ESTABLISHED state or not
2215 *
2216 * Arguments:
2217 * eventp - A pointer to an ibt_async_event_t struct
2218 *
2219 * Return values: NONE
2220 */
2221 static void
2222 ibcm_comm_est_handler(ibt_async_event_t *eventp)
2223 {
2224 ibcm_state_data_t *statep;
2225
2226 IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler:");
2227
2228 /* Both QP and EEC handles can't be NULL */
2229 if (eventp->ev_chan_hdl == NULL) {
2230 IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2231 "both QP and EEC handles are NULL");
2232 return;
2233 }
2234
2235 /* get the "statep" from qp/eec handles */
2236 IBCM_GET_CHAN_PRIVATE(eventp->ev_chan_hdl, statep);
2237 if (statep == NULL) {
2238 IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: statep is NULL");
2239 return;
2240 }
2241
2242 mutex_enter(&statep->state_mutex);
2243
2244 IBCM_RELEASE_CHAN_PRIVATE(eventp->ev_chan_hdl);
2245
2246 IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: statep = %p", statep);
2247
2248 IBCM_REF_CNT_INCR(statep);
2249
2250 if ((statep->state == IBCM_STATE_REP_SENT) ||
2251 (statep->state == IBCM_STATE_MRA_REP_RCVD)) {
2252 timeout_id_t timer_val = statep->timerid;
2253
2254 statep->state = IBCM_STATE_TRANSIENT_ESTABLISHED;
2255
2256 if (timer_val) {
2257 statep->timerid = 0;
2258 mutex_exit(&statep->state_mutex);
2259 (void) untimeout(timer_val);
2260 } else
2261 mutex_exit(&statep->state_mutex);
2262
2263 /* CM doesn't have RTU message here */
2264 ibcm_cep_state_rtu(statep, NULL);
2265
2266 } else {
2267 if (statep->state == IBCM_STATE_ESTABLISHED ||
2268 statep->state == IBCM_STATE_TRANSIENT_ESTABLISHED) {
2269 IBTF_DPRINTF_L4(cmlog, "ibcm_comm_est_handler: "
2270 "Channel already in ESTABLISHED state");
2271 } else {
2272 /* An unexpected behavior from remote */
2273 IBTF_DPRINTF_L2(cmlog, "ibcm_comm_est_handler: "
2274 "Unexpected in state = %d", statep->state);
2275 }
2276 mutex_exit(&statep->state_mutex);
2277
2278 ibcm_insert_trace(statep, IBCM_TRACE_INCOMING_COMEST);
2279 }
2280
2281 mutex_enter(&statep->state_mutex);
2282 IBCM_REF_CNT_DECR(statep);
2283 mutex_exit(&statep->state_mutex);
2284 }
2285
2286
2287 /*
2288 * ibcm_async_handler():
2289 * CM's Async Handler
2290 * (Handles ATTACH, DETACH, COM_EST events)
2291 *
2292 * Arguments:
2293 * eventp - A pointer to an ibt_async_event_t struct
2294 *
2295 * Return values: None
2296 *
2297 * NOTE : CM assumes that all HCA DR events are delivered sequentially
2298 * i.e., until ibcm_async_handler completes for a given HCA DR, framework
2299 * shall not invoke ibcm_async_handler with another DR event for the same
2300 * HCA
2301 */
2302 /* ARGSUSED */
2303 void
2304 ibcm_async_handler(void *clnt_hdl, ibt_hca_hdl_t hca_hdl,
2305 ibt_async_code_t code, ibt_async_event_t *eventp)
2306 {
2307 ibcm_hca_info_t *hcap;
2308 ibcm_port_up_t *pup;
2309
2310 IBTF_DPRINTF_L3(cmlog, "ibcm_async_handler: "
2311 "clnt_hdl = %p, code = 0x%x, eventp = 0x%p",
2312 clnt_hdl, code, eventp);
2313
2314 mutex_enter(&ibcm_global_hca_lock);
2315
2316 /* If fini is going to complete successfully, then return */
2317 if (ibcm_finit_state != IBCM_FINIT_IDLE) {
2318
2319 /*
2320 * This finit state implies one of the following:
2321 * Init either didn't start or didn't complete OR
2322 * Fini is about to return SUCCESS and release the global lock.
2323 * In all these cases, it is safe to ignore the async.
2324 */
2325
2326 IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler: ignoring event %x, "
2327 "as either init didn't complete or fini about to succeed",
2328 code);
2329 mutex_exit(&ibcm_global_hca_lock);
2330 return;
2331 }
2332
2333 switch (code) {
2334 case IBT_PORT_CHANGE_EVENT:
2335 if ((eventp->ev_port_flags & IBT_PORT_CHANGE_SM_LID) == 0)
2336 break;
2337 /* FALLTHROUGH */
2338 case IBT_CLNT_REREG_EVENT:
2339 case IBT_EVENT_PORT_UP:
2340 mutex_exit(&ibcm_global_hca_lock);
2341 pup = kmem_alloc(sizeof (ibcm_port_up_t), KM_SLEEP);
2342 pup->pup_hca_guid = eventp->ev_hca_guid;
2343 pup->pup_port = eventp->ev_port;
2344 (void) taskq_dispatch(ibcm_taskq,
2345 ibcm_service_record_rewrite_task, pup, TQ_SLEEP);
2346 ibcm_path_cache_purge();
2347 return;
2348
2349 case IBT_HCA_ATTACH_EVENT:
2350
2351 /* eventp->ev_hcaguid is the HCA GUID of interest */
2352 ibcm_hca_attach(eventp->ev_hca_guid);
2353 break;
2354
2355 case IBT_HCA_DETACH_EVENT:
2356
2357 /* eventp->ev_hca_guid is the HCA GUID of interest */
2358 if ((hcap = ibcm_find_hcap_entry(eventp->ev_hca_guid)) ==
2359 NULL) {
2360 IBTF_DPRINTF_L2(cmlog, "ibcm_async_handler:"
2361 " hca %llX doesn't exist", eventp->ev_hca_guid);
2362 break;
2363 }
2364
2365 (void) ibcm_hca_detach(hcap);
2366 break;
2367
2368 case IBT_EVENT_COM_EST_QP:
2369 /* eventp->ev_qp_hdl is the ibt_qp_hdl_t of interest */
2370 case IBT_EVENT_COM_EST_EEC:
2371 /* eventp->ev_eec_hdl is the ibt_eec_hdl_t of interest */
2372 ibcm_comm_est_handler(eventp);
2373 break;
2374 default:
2375 break;
2376 }
2377
2378 /* Unblock, any blocked fini/init operations */
2379 mutex_exit(&ibcm_global_hca_lock);
2380 }