1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * UDAPL kernel agent 27 */ 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/stropts.h> 33 #include <sys/stream.h> 34 #include <sys/strlog.h> 35 #include <sys/cmn_err.h> 36 #include <sys/kmem.h> 37 #include <sys/conf.h> 38 #include <sys/stat.h> 39 #include <sys/modctl.h> 40 #include <sys/kstat.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/strsun.h> 44 #include <sys/taskq.h> 45 #include <sys/open.h> 46 #include <sys/uio.h> 47 #include <sys/cpuvar.h> 48 #include <sys/atomic.h> 49 #include <sys/sysmacros.h> 50 #include <sys/esunddi.h> 51 #include <sys/avl.h> 52 #include <sys/cred.h> 53 #include <sys/note.h> 54 #include <sys/ib/ibtl/ibti.h> 55 #include <sys/socket.h> 56 #include <netinet/in.h> 57 #include <daplt_if.h> 58 #include <daplt.h> 59 60 /* 61 * The following variables support the debug log buffer scheme. 62 */ 63 #ifdef DEBUG 64 static char daplka_dbgbuf[0x80000]; 65 #else /* DEBUG */ 66 static char daplka_dbgbuf[0x4000]; 67 #endif /* DEBUG */ 68 static int daplka_dbgsize = sizeof (daplka_dbgbuf); 69 static size_t daplka_dbgnext; 70 static int daplka_dbginit = 0; 71 static kmutex_t daplka_dbglock; 72 73 static int daplka_dbg = 0x0103; 74 static void daplka_console(const char *, ...); 75 static void daplka_debug(const char *, ...); 76 static int daplka_apm = 0x1; /* default enable */ 77 static int daplka_failback = 0x1; /* default enable */ 78 static int daplka_query_aft_setaltpath = 10; 79 80 #define DERR \ 81 if (daplka_dbg & 0x100) \ 82 daplka_debug 83 84 #ifdef DEBUG 85 86 #define DINFO \ 87 daplka_console 88 89 #define D1 \ 90 if (daplka_dbg & 0x01) \ 91 daplka_debug 92 #define D2 \ 93 if (daplka_dbg & 0x02) \ 94 daplka_debug 95 #define D3 \ 96 if (daplka_dbg & 0x04) \ 97 daplka_debug 98 #define D4 \ 99 if (daplka_dbg & 0x08) \ 100 daplka_debug 101 102 #else /* DEBUG */ 103 104 #define DINFO if (0) printf 105 #define D1 if (0) printf 106 #define D2 if (0) printf 107 #define D3 if (0) printf 108 #define D4 if (0) printf 109 110 #endif /* DEBUG */ 111 112 /* 113 * driver entry points 114 */ 115 static int daplka_open(dev_t *, int, int, struct cred *); 116 static int daplka_close(dev_t, int, int, struct cred *); 117 static int daplka_attach(dev_info_t *, ddi_attach_cmd_t); 118 static int daplka_detach(dev_info_t *, ddi_detach_cmd_t); 119 static int daplka_info(dev_info_t *, ddi_info_cmd_t, void *, void **); 120 static int daplka_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 121 122 /* 123 * types of ioctls 124 */ 125 static int daplka_common_ioctl(int, minor_t, intptr_t, int, cred_t *, int *); 126 static int daplka_misc_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 127 cred_t *, int *); 128 static int daplka_ep_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 129 cred_t *, int *); 130 static int daplka_evd_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 131 cred_t *, int *); 132 static int daplka_mr_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 133 cred_t *, int *); 134 static int daplka_cno_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 135 cred_t *, int *); 136 static int daplka_pd_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 137 cred_t *, int *); 138 static int daplka_sp_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 139 cred_t *, int *); 140 static int daplka_srq_ioctl(int, daplka_ia_resource_t *, intptr_t, int, 141 cred_t *, int *); 142 143 /* 144 * common ioctls and supporting functions 145 */ 146 static int daplka_ia_create(minor_t, intptr_t, int, cred_t *, int *); 147 static int daplka_ia_destroy(daplka_resource_t *); 148 149 /* 150 * EP ioctls and supporting functions 151 */ 152 static int daplka_ep_create(daplka_ia_resource_t *, intptr_t, int, 153 cred_t *, int *); 154 static int daplka_ep_modify(daplka_ia_resource_t *, intptr_t, int, 155 cred_t *, int *); 156 static int daplka_ep_free(daplka_ia_resource_t *, intptr_t, int, 157 cred_t *, int *); 158 static int daplka_ep_connect(daplka_ia_resource_t *, intptr_t, int, 159 cred_t *, int *); 160 static int daplka_ep_disconnect(daplka_ia_resource_t *, intptr_t, int, 161 cred_t *, int *); 162 static int daplka_ep_reinit(daplka_ia_resource_t *, intptr_t, int, 163 cred_t *, int *); 164 static int daplka_ep_destroy(daplka_resource_t *); 165 static void daplka_hash_ep_free(void *); 166 static int daplka_ep_failback(void *objp, void *arg); 167 static int daplka_ep_altpath(daplka_ep_resource_t *, ib_gid_t *); 168 169 static uint32_t daplka_ep_get_state(daplka_ep_resource_t *); 170 static void daplka_ep_set_state(daplka_ep_resource_t *, uint32_t, uint32_t); 171 static boolean_t daplka_ep_transition_is_valid(uint32_t, uint32_t); 172 static daplka_timer_info_t *daplka_timer_info_alloc(daplka_ep_resource_t *); 173 static void daplka_timer_info_free(daplka_timer_info_t *); 174 static void daplka_timer_handler(void *); 175 static void daplka_timer_dispatch(void *); 176 static void daplka_timer_thread(void *); 177 static int daplka_cancel_timer(daplka_ep_resource_t *); 178 static void daplka_hash_timer_free(void *); 179 180 /* 181 * EVD ioctls and supporting functions 182 */ 183 static int daplka_evd_create(daplka_ia_resource_t *, intptr_t, int, 184 cred_t *, int *); 185 static int daplka_cq_resize(daplka_ia_resource_t *, intptr_t, int, 186 cred_t *, int *); 187 static int daplka_evd_free(daplka_ia_resource_t *, intptr_t, int, 188 cred_t *, int *); 189 static int daplka_event_poll(daplka_ia_resource_t *, intptr_t, int, 190 cred_t *, int *); 191 static int daplka_evd_destroy(daplka_resource_t *); 192 static void daplka_cq_handler(ibt_cq_hdl_t, void *); 193 static void daplka_evd_wakeup(daplka_evd_resource_t *, 194 daplka_evd_event_list_t *, daplka_evd_event_t *); 195 static void daplka_evd_event_enqueue(daplka_evd_event_list_t *, 196 daplka_evd_event_t *); 197 static daplka_evd_event_t *daplka_evd_event_dequeue(daplka_evd_event_list_t *); 198 static void daplka_hash_evd_free(void *); 199 200 201 /* 202 * SRQ ioctls and supporting functions 203 */ 204 static int daplka_srq_create(daplka_ia_resource_t *, intptr_t, int, 205 cred_t *, int *); 206 static int daplka_srq_resize(daplka_ia_resource_t *, intptr_t, int, 207 cred_t *, int *); 208 static int daplka_srq_free(daplka_ia_resource_t *, intptr_t, int, 209 cred_t *, int *); 210 static int daplka_srq_destroy(daplka_resource_t *); 211 static void daplka_hash_srq_free(void *); 212 213 /* 214 * Miscellaneous ioctls 215 */ 216 static int daplka_cr_accept(daplka_ia_resource_t *, intptr_t, int, 217 cred_t *, int *); 218 static int daplka_cr_reject(daplka_ia_resource_t *, intptr_t, int, 219 cred_t *, int *); 220 static int daplka_cr_handoff(daplka_ia_resource_t *, intptr_t, int, 221 cred_t *, int *); 222 static int daplka_ia_query(daplka_ia_resource_t *, intptr_t, int, 223 cred_t *, int *); 224 225 /* 226 * PD ioctls and supporting functions 227 */ 228 static int daplka_pd_alloc(daplka_ia_resource_t *, intptr_t, int, 229 cred_t *, int *); 230 static int daplka_pd_free(daplka_ia_resource_t *, intptr_t, int, 231 cred_t *, int *); 232 static int daplka_pd_destroy(daplka_resource_t *); 233 static void daplka_hash_pd_free(void *); 234 235 /* 236 * SP ioctls and supporting functions 237 */ 238 static int daplka_service_register(daplka_ia_resource_t *, intptr_t, int, 239 cred_t *, int *); 240 static int daplka_service_deregister(daplka_ia_resource_t *, intptr_t, int, 241 cred_t *, int *); 242 static int daplka_sp_destroy(daplka_resource_t *); 243 static void daplka_hash_sp_free(void *); 244 static void daplka_hash_sp_unref(void *); 245 246 /* 247 * MR ioctls and supporting functions 248 */ 249 static int daplka_mr_register(daplka_ia_resource_t *, intptr_t, int, 250 cred_t *, int *); 251 static int daplka_mr_register_lmr(daplka_ia_resource_t *, intptr_t, int, 252 cred_t *, int *); 253 static int daplka_mr_register_shared(daplka_ia_resource_t *, intptr_t, int, 254 cred_t *, int *); 255 static int daplka_mr_deregister(daplka_ia_resource_t *, intptr_t, int, 256 cred_t *, int *); 257 static int daplka_mr_sync(daplka_ia_resource_t *, intptr_t, int, 258 cred_t *, int *); 259 static int daplka_mr_destroy(daplka_resource_t *); 260 static void daplka_hash_mr_free(void *); 261 static void daplka_shared_mr_free(daplka_mr_resource_t *); 262 263 /* 264 * MW ioctls and supporting functions 265 */ 266 static int daplka_mw_alloc(daplka_ia_resource_t *, intptr_t, int, 267 cred_t *, int *); 268 static int daplka_mw_free(daplka_ia_resource_t *, intptr_t, int, 269 cred_t *, int *); 270 static int daplka_mw_destroy(daplka_resource_t *); 271 static void daplka_hash_mw_free(void *); 272 273 /* 274 * CNO ioctls and supporting functions 275 */ 276 static int daplka_cno_alloc(daplka_ia_resource_t *, intptr_t, int, 277 cred_t *, int *); 278 static int daplka_cno_free(daplka_ia_resource_t *, intptr_t, int, 279 cred_t *, int *); 280 static int daplka_cno_wait(daplka_ia_resource_t *, intptr_t, int, 281 cred_t *, int *); 282 static int daplka_cno_destroy(daplka_resource_t *); 283 static void daplka_hash_cno_free(void *); 284 285 /* 286 * CM handlers 287 */ 288 static ibt_cm_status_t daplka_cm_rc_handler(void *, ibt_cm_event_t *, 289 ibt_cm_return_args_t *, void *, ibt_priv_data_len_t); 290 291 static ibt_cm_status_t daplka_cm_service_handler(void *, ibt_cm_event_t *, 292 ibt_cm_return_args_t *, void *, ibt_priv_data_len_t); 293 294 static ibt_cm_status_t daplka_cm_service_req(daplka_sp_resource_t *, 295 ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t); 296 297 /* 298 * resource management routines 299 */ 300 static int daplka_resource_reserve(minor_t *); 301 static int daplka_resource_insert(minor_t, daplka_resource_t *); 302 static daplka_resource_t *daplka_resource_remove(minor_t rnum); 303 static daplka_resource_t *daplka_resource_lookup(minor_t); 304 static void daplka_resource_init(void); 305 static void daplka_resource_fini(void); 306 static struct daplka_resource_table daplka_resource; 307 308 /* 309 * hash table routines 310 */ 311 static int daplka_hash_insert(daplka_hash_table_t *, uint64_t *, void *); 312 static int daplka_hash_remove(daplka_hash_table_t *, uint64_t, void **); 313 static void daplka_hash_walk(daplka_hash_table_t *, int (*)(void *, void *), 314 void *, krw_t); 315 static void *daplka_hash_lookup(daplka_hash_table_t *, uint64_t); 316 static int daplka_hash_create(daplka_hash_table_t *, uint_t, 317 void (*)(void *), void (*)(void *)); 318 static void daplka_hash_destroy(daplka_hash_table_t *); 319 static uint32_t daplka_hash_getsize(daplka_hash_table_t *); 320 static void daplka_hash_generic_lookup(void *); 321 322 static uint32_t daplka_timer_hkey_gen(); 323 324 /* 325 * async event handlers 326 */ 327 static void daplka_async_event_create(ibt_async_code_t, ibt_async_event_t *, 328 uint64_t, daplka_ia_resource_t *); 329 static void daplka_rc_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t, 330 ibt_async_event_t *); 331 static void daplka_cq_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t, 332 ibt_async_event_t *); 333 static void daplka_un_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t, 334 ibt_async_event_t *); 335 static void daplka_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t, 336 ibt_async_event_t *); 337 static void daplka_sm_notice_handler(void *, ib_gid_t, ibt_subnet_event_code_t, 338 ibt_subnet_event_t *event); 339 static void daplka_sm_gid_avail(ib_gid_t *, ib_gid_t *); 340 341 /* 342 * IBTF wrappers and default limits used for resource accounting 343 */ 344 static boolean_t daplka_accounting_enabled = B_TRUE; 345 static uint32_t daplka_max_qp_percent = 100; 346 static uint32_t daplka_max_cq_percent = 100; 347 static uint32_t daplka_max_pd_percent = 100; 348 static uint32_t daplka_max_mw_percent = 100; 349 static uint32_t daplka_max_mr_percent = 100; 350 static uint32_t daplka_max_srq_percent = 100; 351 352 static ibt_status_t 353 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *, ibt_hca_hdl_t, 354 ibt_chan_alloc_flags_t, ibt_rc_chan_alloc_args_t *, 355 ibt_channel_hdl_t *, ibt_chan_sizes_t *); 356 357 static ibt_status_t 358 daplka_ibt_free_channel(daplka_ep_resource_t *, ibt_channel_hdl_t); 359 360 static ibt_status_t 361 daplka_ibt_alloc_cq(daplka_evd_resource_t *, ibt_hca_hdl_t, 362 ibt_cq_attr_t *, ibt_cq_hdl_t *, uint_t *); 363 364 static ibt_status_t 365 daplka_ibt_free_cq(daplka_evd_resource_t *, ibt_cq_hdl_t); 366 367 static ibt_status_t 368 daplka_ibt_alloc_pd(daplka_pd_resource_t *, ibt_hca_hdl_t, 369 ibt_pd_flags_t, ibt_pd_hdl_t *); 370 371 static ibt_status_t 372 daplka_ibt_free_pd(daplka_pd_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t); 373 374 static ibt_status_t 375 daplka_ibt_alloc_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t, 376 ibt_mw_flags_t, ibt_mw_hdl_t *, ibt_rkey_t *); 377 378 static ibt_status_t 379 daplka_ibt_free_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_mw_hdl_t); 380 381 static ibt_status_t 382 daplka_ibt_register_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t, 383 ibt_mr_attr_t *, ibt_mr_hdl_t *, ibt_mr_desc_t *); 384 385 static ibt_status_t 386 daplka_ibt_register_shared_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, 387 ibt_mr_hdl_t, ibt_pd_hdl_t, ibt_smr_attr_t *, ibt_mr_hdl_t *, 388 ibt_mr_desc_t *); 389 390 static ibt_status_t 391 daplka_ibt_deregister_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_mr_hdl_t); 392 393 static ibt_status_t 394 daplka_ibt_alloc_srq(daplka_srq_resource_t *, ibt_hca_hdl_t, ibt_srq_flags_t, 395 ibt_pd_hdl_t, ibt_srq_sizes_t *, ibt_srq_hdl_t *, ibt_srq_sizes_t *); 396 397 static ibt_status_t 398 daplka_ibt_free_srq(daplka_srq_resource_t *, ibt_srq_hdl_t); 399 400 /* 401 * macros for manipulating resource objects. 402 * these macros can be used on objects that begin with a 403 * daplka_resource_t header. 404 */ 405 #define DAPLKA_RS_REFCNT(rp) ((rp)->header.rs_refcnt) 406 407 #define DAPLKA_RS_REF(rp) { \ 408 mutex_enter(&(rp)->header.rs_reflock); \ 409 (rp)->header.rs_refcnt++; \ 410 ASSERT((rp)->header.rs_refcnt != 0); \ 411 mutex_exit(&(rp)->header.rs_reflock); \ 412 } 413 414 #define DAPLKA_RS_UNREF(rp) { \ 415 mutex_enter(&(rp)->header.rs_reflock); \ 416 ASSERT((rp)->header.rs_refcnt != 0); \ 417 if (--(rp)->header.rs_refcnt == 0) { \ 418 ASSERT((rp)->header.rs_free != NULL); \ 419 mutex_exit(&(rp)->header.rs_reflock); \ 420 (rp)->header.rs_free((daplka_resource_t *)rp); \ 421 } else { \ 422 mutex_exit(&(rp)->header.rs_reflock); \ 423 } \ 424 } 425 426 #define DAPLKA_RS_INIT(rp, type, rnum, free_func) { \ 427 (rp)->header.rs_refcnt = 1; \ 428 (rp)->header.rs_type = (type); \ 429 (rp)->header.rs_rnum = (rnum); \ 430 (rp)->header.rs_charged = 0; \ 431 (rp)->header.rs_free = (free_func); \ 432 mutex_init(&(rp)->header.rs_reflock, NULL, \ 433 MUTEX_DRIVER, NULL); \ 434 } 435 436 #define DAPLKA_RS_FINI(rp) { \ 437 mutex_destroy(&(rp)->header.rs_reflock); \ 438 } 439 440 #define DAPLKA_RS_ACCT_INC(rp, cnt) { \ 441 atomic_add_32(&(rp)->header.rs_charged, (cnt)); \ 442 } 443 #define DAPLKA_RS_ACCT_DEC(rp, cnt) { \ 444 atomic_add_32(&(rp)->header.rs_charged, -(cnt)); \ 445 } 446 #define DAPLKA_RS_ACCT_CHARGED(rp) ((rp)->header.rs_charged) 447 448 #define DAPLKA_RS_RNUM(rp) ((rp)->header.rs_rnum) 449 #define DAPLKA_RS_TYPE(rp) ((rp)->header.rs_type) 450 #define DAPLKA_RS_RESERVED(rp) ((intptr_t)(rp) == DAPLKA_RC_RESERVED) 451 452 /* 453 * depending on the timeout value does a cv_wait_sig or cv_timedwait_sig 454 */ 455 #define DAPLKA_EVD_WAIT(cvp, mp, timeout) \ 456 ((timeout) == LONG_MAX) ? cv_wait_sig((cvp), (mp)) : \ 457 cv_timedwait_sig((cvp), (mp), (timeout)) 458 459 #define DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca) ((hca)->hca_ref_cnt++) 460 #define DAPLKA_RELE_HCA_WITHOUT_LOCK(hca) ((hca)->hca_ref_cnt--) 461 462 #define DAPLKA_HOLD_HCA(dp, hca) { \ 463 mutex_enter(&(dp)->daplka_mutex); \ 464 DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca); \ 465 mutex_exit(&(dp)->daplka_mutex); \ 466 } 467 468 #define DAPLKA_RELE_HCA(dp, hca) { \ 469 mutex_enter(&(dp)->daplka_mutex); \ 470 DAPLKA_RELE_HCA_WITHOUT_LOCK(hca); \ 471 mutex_exit(&(dp)->daplka_mutex); \ 472 } 473 474 #define DAPLKA_HCA_BUSY(hca) \ 475 ((hca)->hca_ref_cnt != 0 || \ 476 (hca)->hca_qp_count != 0 || \ 477 (hca)->hca_cq_count != 0 || \ 478 (hca)->hca_pd_count != 0 || \ 479 (hca)->hca_mw_count != 0 || \ 480 (hca)->hca_mr_count != 0) 481 482 483 static struct cb_ops daplka_cb_ops = { 484 daplka_open, /* cb_open */ 485 daplka_close, /* cb_close */ 486 nodev, /* cb_strategy */ 487 nodev, /* cb_print */ 488 nodev, /* cb_dump */ 489 nodev, /* cb_read */ 490 nodev, /* cb_write */ 491 daplka_ioctl, /* cb_ioctl */ 492 nodev, /* cb_devmap */ 493 nodev, /* cb_mmap */ 494 nodev, /* cb_segmap */ 495 nochpoll, /* cb_chpoll */ 496 ddi_prop_op, /* cb_prop_op */ 497 NULL, /* cb_stream */ 498 D_NEW | D_MP, /* cb_flag */ 499 CB_REV, /* rev */ 500 nodev, /* int (*cb_aread)() */ 501 nodev /* int (*cb_awrite)() */ 502 }; 503 504 static struct dev_ops daplka_ops = { 505 DEVO_REV, /* devo_rev */ 506 0, /* devo_refcnt */ 507 daplka_info, /* devo_getinfo */ 508 nulldev, /* devo_identify */ 509 nulldev, /* devo_probe */ 510 daplka_attach, /* devo_attach */ 511 daplka_detach, /* devo_detach */ 512 nodev, /* devo_reset */ 513 &daplka_cb_ops, /* devo_cb_ops */ 514 (struct bus_ops *)NULL, /* devo_bus_ops */ 515 nulldev, /* power */ 516 ddi_quiesce_not_needed, /* devo_quiesce */ 517 }; 518 519 /* 520 * Module linkage information for the kernel. 521 */ 522 static struct modldrv modldrv = { 523 &mod_driverops, 524 "uDAPL Service Driver", 525 &daplka_ops, 526 }; 527 528 static struct modlinkage modlinkage = { 529 #ifdef _LP64 530 MODREV_1, { (void *) &modldrv, NULL, NULL, NULL, NULL, NULL, NULL } 531 #else 532 MODREV_1, { (void *) &modldrv, NULL, NULL, NULL } 533 #endif 534 }; 535 536 /* 537 * daplka_dev holds global driver state and a list of HCAs 538 */ 539 static daplka_t *daplka_dev = NULL; 540 static void *daplka_state = NULL; 541 542 /* 543 * global SP hash table 544 */ 545 static daplka_hash_table_t daplka_global_sp_htbl; 546 547 /* 548 * timer_info hash table 549 */ 550 static daplka_hash_table_t daplka_timer_info_htbl; 551 static uint32_t daplka_timer_hkey = 0; 552 553 /* 554 * shared MR avl tree 555 */ 556 static avl_tree_t daplka_shared_mr_tree; 557 static kmutex_t daplka_shared_mr_lock; 558 static int daplka_shared_mr_cmp(const void *, const void *); 559 560 /* 561 * default kmem flags used by this driver 562 */ 563 static int daplka_km_flags = KM_SLEEP; 564 565 /* 566 * taskq used for handling background tasks 567 */ 568 static taskq_t *daplka_taskq = NULL; 569 570 /* 571 * daplka_cm_delay is the length of time the active 572 * side needs to wait before timing out on the REP message. 573 */ 574 static clock_t daplka_cm_delay = 60000000; 575 576 /* 577 * modunload will fail if pending_close is non-zero 578 */ 579 static uint32_t daplka_pending_close = 0; 580 581 static struct ibt_clnt_modinfo_s daplka_clnt_modinfo = { 582 IBTI_V_CURR, 583 IBT_USER, 584 daplka_async_handler, 585 NULL, 586 DAPLKA_DRV_NAME 587 }; 588 589 /* 590 * Module Installation 591 */ 592 int 593 _init(void) 594 { 595 int status; 596 597 status = ddi_soft_state_init(&daplka_state, sizeof (daplka_t), 1); 598 if (status != 0) { 599 return (status); 600 } 601 602 mutex_init(&daplka_dbglock, NULL, MUTEX_DRIVER, NULL); 603 bzero(daplka_dbgbuf, sizeof (daplka_dbgbuf)); 604 daplka_dbgnext = 0; 605 daplka_dbginit = 1; 606 607 daplka_resource_init(); 608 609 status = mod_install(&modlinkage); 610 if (status != DDI_SUCCESS) { 611 /* undo inits done before mod_install */ 612 daplka_resource_fini(); 613 mutex_destroy(&daplka_dbglock); 614 ddi_soft_state_fini(&daplka_state); 615 } 616 return (status); 617 } 618 619 /* 620 * Module Removal 621 */ 622 int 623 _fini(void) 624 { 625 int status; 626 627 /* 628 * mod_remove causes detach to be called 629 */ 630 if ((status = mod_remove(&modlinkage)) != 0) { 631 DERR("fini: mod_remove failed: 0x%x\n", status); 632 return (status); 633 } 634 635 daplka_resource_fini(); 636 mutex_destroy(&daplka_dbglock); 637 ddi_soft_state_fini(&daplka_state); 638 639 return (status); 640 } 641 642 /* 643 * Return Module Info. 644 */ 645 int 646 _info(struct modinfo *modinfop) 647 { 648 return (mod_info(&modlinkage, modinfop)); 649 } 650 651 static void 652 daplka_enqueue_hca(daplka_t *dp, daplka_hca_t *hca) 653 { 654 daplka_hca_t *h; 655 656 ASSERT(mutex_owned(&dp->daplka_mutex)); 657 658 if (dp->daplka_hca_list_head == NULL) { 659 dp->daplka_hca_list_head = hca; 660 } else { 661 h = dp->daplka_hca_list_head; 662 while (h->hca_next != NULL) 663 h = h->hca_next; 664 665 h->hca_next = hca; 666 } 667 } 668 669 static void 670 daplka_dequeue_hca(daplka_t *dp, daplka_hca_t *hca) 671 { 672 daplka_hca_t *h; 673 674 ASSERT(mutex_owned(&dp->daplka_mutex)); 675 676 if (dp->daplka_hca_list_head == hca) 677 dp->daplka_hca_list_head = hca->hca_next; 678 else { 679 h = dp->daplka_hca_list_head; 680 while (h->hca_next != hca) 681 h = h->hca_next; 682 h->hca_next = hca->hca_next; 683 } 684 } 685 686 static int 687 daplka_init_hca(daplka_t *dp, ib_guid_t hca_guid) 688 { 689 daplka_hca_t *hca; 690 ibt_hca_portinfo_t *pinfop; 691 uint_t size; 692 int j; 693 ibt_status_t status; 694 695 hca = kmem_zalloc(sizeof (daplka_hca_t), KM_SLEEP); 696 697 hca->hca_guid = hca_guid; 698 699 /* 700 * open the HCA for use 701 */ 702 status = ibt_open_hca(dp->daplka_clnt_hdl, hca_guid, &hca->hca_hdl); 703 if (status != IBT_SUCCESS) { 704 if (status == IBT_HCA_IN_USE) { 705 DERR("ibt_open_hca() returned IBT_HCA_IN_USE\n"); 706 } else { 707 DERR("ibt_open_hca() returned %d\n", status); 708 } 709 kmem_free(hca, sizeof (daplka_hca_t)); 710 return (status); 711 } 712 713 /* 714 * query HCA to get its info 715 */ 716 status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr); 717 if (status != IBT_SUCCESS) { 718 DERR("ibt_query_hca returned %d (hca_guid 0x%llx)\n", 719 status, (longlong_t)hca_guid); 720 goto out; 721 } 722 723 /* 724 * query HCA to get info of all ports 725 */ 726 status = ibt_query_hca_ports(hca->hca_hdl, 727 0, &pinfop, &hca->hca_nports, &size); 728 if (status != IBT_SUCCESS) { 729 DERR("ibt_query_all_ports returned %d " 730 "(hca_guid 0x%llx)\n", status, 731 (longlong_t)hca_guid); 732 goto out; 733 } 734 hca->hca_ports = pinfop; 735 hca->hca_pinfosz = size; 736 737 DERR("hca guid 0x%llx, nports %d\n", 738 (longlong_t)hca_guid, hca->hca_nports); 739 for (j = 0; j < hca->hca_nports; j++) { 740 DERR("port %d: state %d prefix 0x%016llx " 741 "guid %016llx\n", 742 pinfop[j].p_port_num, pinfop[j].p_linkstate, 743 (longlong_t)pinfop[j].p_sgid_tbl[0].gid_prefix, 744 (longlong_t)pinfop[j].p_sgid_tbl[0].gid_guid); 745 } 746 747 mutex_enter(&dp->daplka_mutex); 748 daplka_enqueue_hca(dp, hca); 749 mutex_exit(&dp->daplka_mutex); 750 751 return (IBT_SUCCESS); 752 753 out: 754 (void) ibt_close_hca(hca->hca_hdl); 755 kmem_free(hca, sizeof (daplka_hca_t)); 756 return (status); 757 } 758 759 /* 760 * this function obtains the list of HCAs from IBTF. 761 * the HCAs are then opened and the returned handles 762 * and attributes are stored into the global daplka_dev 763 * structure. 764 */ 765 static int 766 daplka_init_hcas(daplka_t *dp) 767 { 768 int i; 769 ib_guid_t *hca_guids; 770 uint32_t hca_count; 771 772 /* 773 * get the num & list of HCAs present 774 */ 775 hca_count = ibt_get_hca_list(&hca_guids); 776 DERR("No. of HCAs present %d\n", hca_count); 777 778 if (hca_count != 0) { 779 /* 780 * get the info for each available HCA 781 */ 782 for (i = 0; i < hca_count; i++) 783 (void) daplka_init_hca(dp, hca_guids[i]); 784 785 ibt_free_hca_list(hca_guids, hca_count); 786 } 787 788 if (dp->daplka_hca_list_head != NULL) 789 return (IBT_SUCCESS); 790 else 791 return (IBT_FAILURE); 792 } 793 794 static int 795 daplka_fini_hca(daplka_t *dp, daplka_hca_t *hca) 796 { 797 ibt_status_t status; 798 799 if (hca->hca_hdl != NULL) { 800 status = ibt_close_hca(hca->hca_hdl); 801 if (status != IBT_SUCCESS) { 802 DERR("ibt_close_hca returned %d" 803 " (hca_guid 0x%llx)\n", status, 804 (longlong_t)hca->hca_guid); 805 806 mutex_enter(&dp->daplka_mutex); 807 daplka_enqueue_hca(dp, hca); 808 mutex_exit(&dp->daplka_mutex); 809 810 return (status); 811 } 812 } 813 814 if (hca->hca_ports != NULL) 815 ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz); 816 817 kmem_free(hca, sizeof (daplka_hca_t)); 818 return (IBT_SUCCESS); 819 } 820 821 /* 822 * closes all HCAs and frees up the HCA list 823 */ 824 static int 825 daplka_fini_hcas(daplka_t *dp) 826 { 827 ibt_status_t status; 828 daplka_hca_t *hca; 829 830 mutex_enter(&daplka_dev->daplka_mutex); 831 while ((hca = dp->daplka_hca_list_head) != NULL) { 832 if (DAPLKA_HCA_BUSY(hca)) { 833 mutex_exit(&daplka_dev->daplka_mutex); 834 return (IBT_HCA_RESOURCES_NOT_FREED); 835 } 836 daplka_dequeue_hca(daplka_dev, hca); 837 mutex_exit(&daplka_dev->daplka_mutex); 838 839 if ((status = daplka_fini_hca(dp, hca)) != IBT_SUCCESS) 840 return (status); 841 842 mutex_enter(&daplka_dev->daplka_mutex); 843 } 844 mutex_exit(&daplka_dev->daplka_mutex); 845 846 DERR("dapl kernel agent unloaded\n"); 847 return (IBT_SUCCESS); 848 } 849 850 851 /* 852 * Attach the device, create and fill in daplka_dev 853 */ 854 static int 855 daplka_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 856 { 857 daplka_t *dp; 858 int instance, retval, err; 859 boolean_t sp_htbl_allocated = B_FALSE; 860 boolean_t timer_htbl_allocated = B_FALSE; 861 boolean_t shared_mr_tree_allocated = B_FALSE; 862 863 switch (cmd) { 864 case DDI_ATTACH: 865 break; 866 case DDI_RESUME: 867 return (DDI_SUCCESS); 868 default: 869 return (DDI_FAILURE); 870 } 871 872 /* 873 * Allocate soft data structure 874 */ 875 instance = ddi_get_instance(dip); 876 if (ddi_soft_state_zalloc(daplka_state, instance) != DDI_SUCCESS) { 877 DERR("attach: bad state zalloc\n"); 878 return (DDI_FAILURE); 879 } 880 881 dp = ddi_get_soft_state(daplka_state, instance); 882 if (dp == NULL) { 883 ddi_soft_state_free(daplka_state, instance); 884 DERR("attach: cannot get soft state\n"); 885 return (DDI_FAILURE); 886 } 887 /* 888 * Stuff private info into dip. 889 */ 890 dp->daplka_dip = dip; 891 ddi_set_driver_private(dip, dp); 892 daplka_dev = dp; 893 mutex_init(&dp->daplka_mutex, NULL, MUTEX_DRIVER, NULL); 894 895 /* 896 * Register driver with IBTF 897 */ 898 retval = ibt_attach(&daplka_clnt_modinfo, dip, dp, 899 &dp->daplka_clnt_hdl); 900 if (retval != IBT_SUCCESS) { 901 DERR("attach: ibt_attach failed: error = %d\n", retval); 902 retval = DDI_FAILURE; 903 goto error; 904 } 905 /* Register to receive SM events */ 906 ibt_register_subnet_notices(dp->daplka_clnt_hdl, 907 daplka_sm_notice_handler, NULL); 908 909 retval = daplka_init_hcas(dp); 910 if (retval != IBT_SUCCESS) { 911 DERR("attach: hca_init failed: error = %d\n", retval); 912 retval = DDI_FAILURE; 913 goto error; 914 } 915 /* 916 * this table is used by cr_handoff 917 */ 918 retval = daplka_hash_create(&daplka_global_sp_htbl, 919 DAPLKA_G_SP_HTBL_SZ, daplka_hash_sp_unref, 920 daplka_hash_generic_lookup); 921 if (retval != 0) { 922 DERR("attach: cannot create sp hash table\n"); 923 retval = DDI_FAILURE; 924 goto error; 925 } 926 sp_htbl_allocated = B_TRUE; 927 928 /* 929 * this table stores per EP timer information. 930 * timer_info_t objects are inserted into this table whenever 931 * a EP timer is set. timers get removed when they expire 932 * or when they get cancelled. 933 */ 934 retval = daplka_hash_create(&daplka_timer_info_htbl, 935 DAPLKA_TIMER_HTBL_SZ, daplka_hash_timer_free, NULL); 936 if (retval != 0) { 937 DERR("attach: cannot create timer hash table\n"); 938 retval = DDI_FAILURE; 939 goto error; 940 } 941 timer_htbl_allocated = B_TRUE; 942 943 /* 944 * this taskq is currently only used for processing timers. 945 * other processing may also use this taskq in the future. 946 */ 947 daplka_taskq = taskq_create(DAPLKA_DRV_NAME, DAPLKA_TQ_NTHREADS, 948 maxclsyspri, 1, DAPLKA_TQ_NTHREADS, TASKQ_DYNAMIC); 949 if (daplka_taskq == NULL) { 950 DERR("attach: cannot create daplka_taskq\n"); 951 retval = DDI_FAILURE; 952 goto error; 953 } 954 955 /* 956 * daplka_shared_mr_tree holds daplka_shared_mr_t objects that 957 * gets retrieved or created when daplka_mr_register_shared is 958 * called. 959 */ 960 mutex_init(&daplka_shared_mr_lock, NULL, MUTEX_DRIVER, NULL); 961 962 avl_create(&daplka_shared_mr_tree, daplka_shared_mr_cmp, 963 sizeof (daplka_shared_mr_t), 964 offsetof(daplka_shared_mr_t, smr_node)); 965 shared_mr_tree_allocated = B_TRUE; 966 967 /* 968 * Create the filesystem device node. 969 */ 970 if (ddi_create_minor_node(dip, DAPLKA_MINOR_NAME, S_IFCHR, 971 0, DDI_PSEUDO, NULL) != DDI_SUCCESS) { 972 DERR("attach: bad create_minor_node\n"); 973 retval = DDI_FAILURE; 974 goto error; 975 } 976 dp->daplka_status = DAPLKA_STATE_ATTACHED; 977 ddi_report_dev(dip); 978 return (DDI_SUCCESS); 979 980 error: 981 if (shared_mr_tree_allocated) { 982 avl_destroy(&daplka_shared_mr_tree); 983 mutex_destroy(&daplka_shared_mr_lock); 984 } 985 986 if (daplka_taskq) { 987 taskq_destroy(daplka_taskq); 988 daplka_taskq = NULL; 989 } 990 991 if (timer_htbl_allocated) { 992 daplka_hash_destroy(&daplka_timer_info_htbl); 993 } 994 995 if (sp_htbl_allocated) { 996 daplka_hash_destroy(&daplka_global_sp_htbl); 997 } 998 999 err = daplka_fini_hcas(dp); 1000 if (err != IBT_SUCCESS) { 1001 DERR("attach: hca_fini returned %d\n", err); 1002 } 1003 1004 if (dp->daplka_clnt_hdl != NULL) { 1005 /* unregister SM event notification */ 1006 ibt_register_subnet_notices(dp->daplka_clnt_hdl, 1007 (ibt_sm_notice_handler_t)NULL, NULL); 1008 err = ibt_detach(dp->daplka_clnt_hdl); 1009 1010 if (err != IBT_SUCCESS) { 1011 DERR("attach: ibt_detach returned %d\n", err); 1012 } 1013 } 1014 mutex_destroy(&dp->daplka_mutex); 1015 1016 if (dp->daplka_status == DAPLKA_STATE_ATTACHED) { 1017 ddi_remove_minor_node(dip, NULL); 1018 } 1019 ddi_soft_state_free(daplka_state, instance); 1020 return (retval); 1021 } 1022 1023 /* 1024 * Detach - Free resources allocated in attach 1025 */ 1026 /* ARGSUSED */ 1027 static int 1028 daplka_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1029 { 1030 int instance, err; 1031 void *cookie = NULL; 1032 daplka_t *dp; 1033 1034 if (cmd != DDI_DETACH) { 1035 return (DDI_FAILURE); 1036 } 1037 if (daplka_resource.daplka_rc_cnt > 0 || 1038 daplka_pending_close > 0) { 1039 DERR("detach: driver in use\n"); 1040 return (DDI_FAILURE); 1041 } 1042 1043 instance = ddi_get_instance(dip); 1044 dp = ddi_get_soft_state(daplka_state, instance); 1045 if (dp == NULL) { 1046 DERR("detach: cannot get soft state\n"); 1047 return (DDI_FAILURE); 1048 } 1049 err = daplka_fini_hcas(dp); 1050 if (err != IBT_SUCCESS) { 1051 DERR("detach: hca_fini returned %d\n", err); 1052 return (DDI_FAILURE); 1053 } 1054 if (dp->daplka_clnt_hdl != NULL) { 1055 /* unregister SM event notification */ 1056 ibt_register_subnet_notices(dp->daplka_clnt_hdl, 1057 (ibt_sm_notice_handler_t)NULL, NULL); 1058 err = ibt_detach(dp->daplka_clnt_hdl); 1059 if (err != IBT_SUCCESS) { 1060 DERR("detach: ibt_detach returned %d\n", err); 1061 return (DDI_FAILURE); 1062 } 1063 dp->daplka_clnt_hdl = NULL; 1064 } 1065 mutex_destroy(&dp->daplka_mutex); 1066 if (dp->daplka_status == DAPLKA_STATE_ATTACHED) { 1067 ddi_remove_minor_node(dip, NULL); 1068 } 1069 dp->daplka_status = DAPLKA_STATE_DETACHED; 1070 ddi_soft_state_free(daplka_state, instance); 1071 daplka_dev = NULL; 1072 1073 /* 1074 * by the time we get here, all clients of dapl should 1075 * have exited and completed their cleanup properly. 1076 * we can assert that all global data structures are now 1077 * empty. 1078 */ 1079 ASSERT(avl_destroy_nodes(&daplka_shared_mr_tree, &cookie) == NULL); 1080 avl_destroy(&daplka_shared_mr_tree); 1081 mutex_destroy(&daplka_shared_mr_lock); 1082 1083 ASSERT(daplka_hash_getsize(&daplka_timer_info_htbl) == 0); 1084 daplka_hash_destroy(&daplka_timer_info_htbl); 1085 1086 ASSERT(daplka_hash_getsize(&daplka_global_sp_htbl) == 0); 1087 daplka_hash_destroy(&daplka_global_sp_htbl); 1088 1089 taskq_destroy(daplka_taskq); 1090 1091 return (DDI_SUCCESS); 1092 } 1093 1094 /* ARGSUSED */ 1095 static int 1096 daplka_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1097 { 1098 switch (infocmd) { 1099 case DDI_INFO_DEVT2DEVINFO: 1100 if (daplka_dev != NULL) { 1101 *result = daplka_dev->daplka_dip; 1102 return (DDI_SUCCESS); 1103 } else { 1104 return (DDI_FAILURE); 1105 } 1106 1107 case DDI_INFO_DEVT2INSTANCE: 1108 *result = 0; 1109 return (DDI_SUCCESS); 1110 1111 default: 1112 return (DDI_FAILURE); 1113 } 1114 } 1115 1116 /* 1117 * creates a EP resource. 1118 * A EP resource contains a RC channel. A EP resource holds a 1119 * reference to a send_evd (for the send CQ), recv_evd (for the 1120 * recv CQ), a connection evd and a PD. These references ensure 1121 * that the referenced resources are not freed until the EP itself 1122 * gets freed. 1123 */ 1124 /* ARGSUSED */ 1125 static int 1126 daplka_ep_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 1127 cred_t *cred, int *rvalp) 1128 { 1129 daplka_ep_resource_t *ep_rp; 1130 daplka_pd_resource_t *pd_rp; 1131 dapl_ep_create_t args; 1132 ibt_rc_chan_alloc_args_t chan_args; 1133 ibt_chan_alloc_flags_t achan_flags; 1134 ibt_chan_sizes_t chan_real_sizes; 1135 ibt_hca_attr_t *hca_attrp; 1136 uint64_t ep_hkey = 0; 1137 boolean_t inserted = B_FALSE; 1138 uint32_t old_state, new_state; 1139 int retval; 1140 ibt_status_t status; 1141 1142 D3("ep_create: enter\n"); 1143 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_create_t), 1144 mode); 1145 if (retval != 0) { 1146 DERR("ep_create: copyin error %d\n", retval); 1147 return (EFAULT); 1148 } 1149 ep_rp = kmem_zalloc(sizeof (daplka_ep_resource_t), daplka_km_flags); 1150 if (ep_rp == NULL) { 1151 DERR("ep_create: cannot allocate ep_rp\n"); 1152 return (ENOMEM); 1153 } 1154 DAPLKA_RS_INIT(ep_rp, DAPL_TYPE_EP, 1155 DAPLKA_RS_RNUM(ia_rp), daplka_ep_destroy); 1156 1157 mutex_init(&ep_rp->ep_lock, NULL, MUTEX_DRIVER, NULL); 1158 cv_init(&ep_rp->ep_cv, NULL, CV_DRIVER, NULL); 1159 ep_rp->ep_hca = ia_rp->ia_hca; 1160 ep_rp->ep_cookie = args.ep_cookie; 1161 ep_rp->ep_timer_hkey = 0; 1162 1163 /* 1164 * we don't have to use ep_get_state here because ep_rp is not in 1165 * ep_htbl yet. refer to the description of daplka_ep_set_state 1166 * for details about the EP state machine. 1167 */ 1168 ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING; 1169 new_state = old_state = DAPLKA_EP_STATE_CLOSED; 1170 1171 /* get reference to send evd and get cq handle */ 1172 ep_rp->ep_snd_evd = (daplka_evd_resource_t *) 1173 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_snd_evd_hkey); 1174 if (ep_rp->ep_snd_evd == NULL) { 1175 DERR("ep_create: ep_snd_evd %llx not found\n", 1176 args.ep_snd_evd_hkey); 1177 retval = EINVAL; 1178 goto cleanup; 1179 } 1180 chan_args.rc_scq = ep_rp->ep_snd_evd->evd_cq_hdl; 1181 if (chan_args.rc_scq == NULL) { 1182 DERR("ep_create: ep_snd_evd cq invalid\n"); 1183 retval = EINVAL; 1184 goto cleanup; 1185 } 1186 1187 /* get reference to recv evd and get cq handle */ 1188 ep_rp->ep_rcv_evd = (daplka_evd_resource_t *) 1189 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_rcv_evd_hkey); 1190 if (ep_rp->ep_rcv_evd == NULL) { 1191 DERR("ep_create: ep_rcv_evd %llx not found\n", 1192 args.ep_rcv_evd_hkey); 1193 retval = EINVAL; 1194 goto cleanup; 1195 } 1196 chan_args.rc_rcq = ep_rp->ep_rcv_evd->evd_cq_hdl; 1197 if (chan_args.rc_rcq == NULL) { 1198 DERR("ep_create: ep_rcv_evd cq invalid\n"); 1199 retval = EINVAL; 1200 goto cleanup; 1201 } 1202 1203 /* get reference to conn evd */ 1204 ep_rp->ep_conn_evd = (daplka_evd_resource_t *) 1205 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_conn_evd_hkey); 1206 if (ep_rp->ep_conn_evd == NULL) { 1207 DERR("ep_create: ep_conn_evd %llx not found\n", 1208 args.ep_conn_evd_hkey); 1209 retval = EINVAL; 1210 goto cleanup; 1211 } 1212 1213 /* get reference to SRQ if needed */ 1214 if (args.ep_srq_attached) { 1215 ep_rp->ep_srq_res = (daplka_srq_resource_t *)daplka_hash_lookup( 1216 &ia_rp->ia_srq_htbl, args.ep_srq_hkey); 1217 if (ep_rp->ep_srq_res == NULL) { 1218 DERR("ep_create: ep_srq %llx not found\n", 1219 (longlong_t)args.ep_srq_hkey); 1220 retval = EINVAL; 1221 goto cleanup; 1222 } 1223 ASSERT(DAPLKA_RS_TYPE(ep_rp->ep_srq_res) == DAPL_TYPE_SRQ); 1224 D3("ep_create: ep_srq %p %llx\n", ep_rp->ep_srq_res, 1225 (longlong_t)args.ep_srq_hkey); 1226 } else { 1227 ep_rp->ep_srq_res = NULL; 1228 } 1229 1230 /* get pd handle */ 1231 pd_rp = (daplka_pd_resource_t *) 1232 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.ep_pd_hkey); 1233 if (pd_rp == NULL) { 1234 DERR("ep_create: cannot find pd resource\n"); 1235 retval = EINVAL; 1236 goto cleanup; 1237 } 1238 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 1239 ep_rp->ep_pd_res = pd_rp; 1240 chan_args.rc_pd = pd_rp->pd_hdl; 1241 1242 1243 /* 1244 * these checks ensure that the requested channel sizes 1245 * are within the limits supported by the chosen HCA. 1246 */ 1247 hca_attrp = &ia_rp->ia_hca->hca_attr; 1248 if (args.ep_ch_sizes.dcs_sq_sgl > hca_attrp->hca_max_sgl) { 1249 DERR("ep_create: invalid cs_sq_sgl %d\n", 1250 args.ep_ch_sizes.dcs_sq_sgl); 1251 retval = EINVAL; 1252 goto cleanup; 1253 } 1254 if (args.ep_ch_sizes.dcs_rq_sgl > hca_attrp->hca_max_sgl) { 1255 DERR("ep_create: invalid cs_rq_sgl %d\n", 1256 args.ep_ch_sizes.dcs_rq_sgl); 1257 retval = EINVAL; 1258 goto cleanup; 1259 } 1260 if (args.ep_ch_sizes.dcs_sq > hca_attrp->hca_max_chan_sz) { 1261 DERR("ep_create: invalid cs_sq %d\n", 1262 args.ep_ch_sizes.dcs_sq); 1263 retval = EINVAL; 1264 goto cleanup; 1265 } 1266 if (args.ep_ch_sizes.dcs_rq > hca_attrp->hca_max_chan_sz) { 1267 DERR("ep_create: invalid cs_rq %d\n", 1268 args.ep_ch_sizes.dcs_rq); 1269 retval = EINVAL; 1270 goto cleanup; 1271 } 1272 1273 chan_args.rc_sizes.cs_sq_sgl = args.ep_ch_sizes.dcs_sq_sgl; 1274 chan_args.rc_sizes.cs_rq_sgl = args.ep_ch_sizes.dcs_rq_sgl; 1275 chan_args.rc_sizes.cs_sq = args.ep_ch_sizes.dcs_sq; 1276 chan_args.rc_sizes.cs_rq = args.ep_ch_sizes.dcs_rq; 1277 chan_args.rc_flags = IBT_WR_SIGNALED; 1278 chan_args.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; 1279 chan_args.rc_hca_port_num = ia_rp->ia_port_num; 1280 chan_args.rc_clone_chan = NULL; 1281 if (args.ep_srq_attached) { 1282 chan_args.rc_srq = ep_rp->ep_srq_res->srq_hdl; 1283 } else { 1284 chan_args.rc_srq = NULL; 1285 } 1286 1287 D3("ep_create: sq_sgl %d, rq_sgl %d, sq %d, rq %d, " 1288 "sig_type 0x%x, control 0x%x, portnum %d, clone_chan 0x%p\n", 1289 args.ep_ch_sizes.dcs_sq_sgl, args.ep_ch_sizes.dcs_rq_sgl, 1290 args.ep_ch_sizes.dcs_sq, args.ep_ch_sizes.dcs_rq, 1291 chan_args.rc_flags, chan_args.rc_control, 1292 chan_args.rc_hca_port_num, chan_args.rc_clone_chan); 1293 1294 if (args.ep_srq_attached) { 1295 achan_flags = IBT_ACHAN_USER_MAP | IBT_ACHAN_USES_SRQ; 1296 } else { 1297 achan_flags = IBT_ACHAN_USER_MAP; 1298 } 1299 /* create rc channel */ 1300 status = daplka_ibt_alloc_rc_channel(ep_rp, ia_rp->ia_hca_hdl, 1301 achan_flags, &chan_args, &ep_rp->ep_chan_hdl, 1302 &chan_real_sizes); 1303 if (status != IBT_SUCCESS) { 1304 DERR("ep_create: alloc_rc_channel returned %d\n", status); 1305 *rvalp = (int)status; 1306 retval = 0; 1307 goto cleanup; 1308 } 1309 1310 args.ep_ch_real_sizes.dcs_sq = chan_real_sizes.cs_sq; 1311 args.ep_ch_real_sizes.dcs_rq = chan_real_sizes.cs_rq; 1312 args.ep_ch_real_sizes.dcs_sq_sgl = chan_real_sizes.cs_sq_sgl; 1313 args.ep_ch_real_sizes.dcs_rq_sgl = chan_real_sizes.cs_rq_sgl; 1314 1315 /* 1316 * store ep ptr with chan_hdl. 1317 * this ep_ptr is used by the CM handlers (both active and 1318 * passive) 1319 * mutex is only needed for race of "destroy" and "async" 1320 */ 1321 mutex_enter(&daplka_dev->daplka_mutex); 1322 ibt_set_chan_private(ep_rp->ep_chan_hdl, (void *)ep_rp); 1323 mutex_exit(&daplka_dev->daplka_mutex); 1324 1325 /* Get HCA-specific data_out info */ 1326 status = ibt_ci_data_out(ia_rp->ia_hca_hdl, 1327 IBT_CI_NO_FLAGS, IBT_HDL_CHANNEL, (void *)ep_rp->ep_chan_hdl, 1328 &args.ep_qp_data_out, sizeof (args.ep_qp_data_out)); 1329 1330 if (status != IBT_SUCCESS) { 1331 DERR("ep_create: ibt_ci_data_out error(%d)\n", 1332 status); 1333 *rvalp = (int)status; 1334 retval = 0; 1335 goto cleanup; 1336 } 1337 1338 /* insert into ep hash table */ 1339 retval = daplka_hash_insert(&ia_rp->ia_ep_htbl, 1340 &ep_hkey, (void *)ep_rp); 1341 if (retval != 0) { 1342 DERR("ep_create: cannot insert ep resource into ep_htbl\n"); 1343 goto cleanup; 1344 } 1345 inserted = B_TRUE; 1346 1347 /* 1348 * at this point, the ep_rp can be looked up by other threads 1349 * if they manage to guess the correct hkey. but they are not 1350 * permitted to operate on ep_rp until we transition to the 1351 * CLOSED state. 1352 */ 1353 1354 /* return hkey to library */ 1355 args.ep_hkey = ep_hkey; 1356 1357 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ep_create_t), 1358 mode); 1359 if (retval != 0) { 1360 DERR("ep_create: copyout error %d\n", retval); 1361 retval = EFAULT; 1362 goto cleanup; 1363 } 1364 1365 daplka_ep_set_state(ep_rp, old_state, new_state); 1366 D3("ep_create: exit\n"); 1367 return (0); 1368 1369 cleanup: 1370 if (inserted) { 1371 daplka_ep_resource_t *free_rp = NULL; 1372 1373 (void) daplka_hash_remove(&ia_rp->ia_ep_htbl, ep_hkey, 1374 (void **)&free_rp); 1375 if (free_rp != ep_rp) { 1376 /* 1377 * this case is impossible because ep_free will 1378 * wait until our state transition is complete. 1379 */ 1380 DERR("ep_create: cannot remove ep from hash table\n"); 1381 ASSERT(B_FALSE); 1382 return (retval); 1383 } 1384 } 1385 new_state = DAPLKA_EP_STATE_FREED; 1386 daplka_ep_set_state(ep_rp, old_state, new_state); 1387 DAPLKA_RS_UNREF(ep_rp); 1388 return (retval); 1389 } 1390 1391 /* 1392 * daplka_ep_get_state retrieves the current state of the EP and 1393 * sets the state to TRANSITIONING. if the current state is already 1394 * TRANSITIONING, this function will wait until the state becomes one 1395 * of the other EP states. Most of the EP related ioctls follow the 1396 * call sequence: 1397 * 1398 * new_state = old_state = daplka_ep_get_state(ep_rp); 1399 * ... 1400 * ...some code that affects the EP 1401 * ... 1402 * new_state = <NEW_STATE>; 1403 * daplka_ep_set_state(ep_rp, old_state, new_state); 1404 * 1405 * this call sequence ensures that only one thread may access the EP 1406 * during the time ep_state is in TRANSITIONING. daplka_ep_set_state 1407 * transitions ep_state to new_state and wakes up any waiters blocking 1408 * on ep_cv. 1409 * 1410 */ 1411 static uint32_t 1412 daplka_ep_get_state(daplka_ep_resource_t *ep_rp) 1413 { 1414 uint32_t old_state = 0; 1415 1416 mutex_enter(&ep_rp->ep_lock); 1417 while (ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING) { 1418 D2("get_state: wait for state transition to complete\n"); 1419 cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock); 1420 D2("get_state: done, curr state = %d\n", ep_rp->ep_state); 1421 } 1422 ASSERT(ep_rp->ep_state != DAPLKA_EP_STATE_TRANSITIONING); 1423 old_state = ep_rp->ep_state; 1424 1425 /* 1426 * an ep that is in the FREED state cannot transition 1427 * back to any of the regular states 1428 */ 1429 if (old_state != DAPLKA_EP_STATE_FREED) { 1430 ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING; 1431 } 1432 mutex_exit(&ep_rp->ep_lock); 1433 return (old_state); 1434 } 1435 1436 /* 1437 * EP state transition diagram 1438 * 1439 * CLOSED<------------------- 1440 * | | 1441 * | | 1442 * ------------------------ | 1443 * | | | 1444 * | | | 1445 * v v | 1446 * CONNECTING ACCEPTING | 1447 * | | | | | | 1448 * | | | | | | 1449 * | | | | | | 1450 * | | |_______|_______| | 1451 * | | | | | | 1452 * | |___________| | | | 1453 * | | | | | 1454 * | v | |---->DISCONNECTED 1455 * | CONNECTED | ^ 1456 * v | | | 1457 * ABORTING |---------|--------------| 1458 * | | | | 1459 * | | v | 1460 * | |-------->DISCONNECTING--| 1461 * | | 1462 * |---------------------------------| 1463 * 1464 * *not shown in this diagram: 1465 * -loopback transitions 1466 * -transitions to the FREED state 1467 */ 1468 static boolean_t 1469 daplka_ep_transition_is_valid(uint32_t old_state, uint32_t new_state) 1470 { 1471 boolean_t valid = B_FALSE; 1472 1473 /* 1474 * reseting to the same state is a no-op and is always 1475 * permitted. transitioning to the FREED state indicates 1476 * that the ep is about to be freed and no further operation 1477 * is allowed on it. to support abrupt close, the ep is 1478 * permitted to transition to the FREED state from any state. 1479 */ 1480 if (old_state == new_state || 1481 new_state == DAPLKA_EP_STATE_FREED) { 1482 return (B_TRUE); 1483 } 1484 1485 switch (old_state) { 1486 case DAPLKA_EP_STATE_CLOSED: 1487 /* 1488 * this is the initial ep_state. 1489 * a transition to CONNECTING or ACCEPTING may occur 1490 * upon calling daplka_ep_connect or daplka_cr_accept, 1491 * respectively. 1492 */ 1493 if (new_state == DAPLKA_EP_STATE_CONNECTING || 1494 new_state == DAPLKA_EP_STATE_ACCEPTING) { 1495 valid = B_TRUE; 1496 } 1497 break; 1498 case DAPLKA_EP_STATE_CONNECTING: 1499 /* 1500 * we transition to this state if daplka_ep_connect 1501 * is successful. from this state, we can transition 1502 * to CONNECTED if daplka_cm_rc_conn_est gets called; 1503 * or to DISCONNECTED if daplka_cm_rc_conn_closed or 1504 * daplka_cm_rc_event_failure gets called. If the 1505 * client calls daplka_ep_disconnect, we transition 1506 * to DISCONNECTING. If a timer was set at ep_connect 1507 * time and if the timer expires prior to any of the 1508 * CM callbacks, we transition to ABORTING and then 1509 * to DISCONNECTED. 1510 */ 1511 if (new_state == DAPLKA_EP_STATE_CONNECTED || 1512 new_state == DAPLKA_EP_STATE_DISCONNECTING || 1513 new_state == DAPLKA_EP_STATE_DISCONNECTED || 1514 new_state == DAPLKA_EP_STATE_ABORTING) { 1515 valid = B_TRUE; 1516 } 1517 break; 1518 case DAPLKA_EP_STATE_ACCEPTING: 1519 /* 1520 * we transition to this state if daplka_cr_accept 1521 * is successful. from this state, we can transition 1522 * to CONNECTED if daplka_cm_service_conn_est gets called; 1523 * or to DISCONNECTED if daplka_cm_service_conn_closed or 1524 * daplka_cm_service_event_failure gets called. If the 1525 * client calls daplka_ep_disconnect, we transition to 1526 * DISCONNECTING. 1527 */ 1528 if (new_state == DAPLKA_EP_STATE_CONNECTED || 1529 new_state == DAPLKA_EP_STATE_DISCONNECTING || 1530 new_state == DAPLKA_EP_STATE_DISCONNECTED) { 1531 valid = B_TRUE; 1532 } 1533 break; 1534 case DAPLKA_EP_STATE_CONNECTED: 1535 /* 1536 * we transition to this state if a active or passive 1537 * connection gets established. if the client calls 1538 * daplka_ep_disconnect, we transition to the 1539 * DISCONNECTING state. subsequent CM callbacks will 1540 * cause ep_state to be set to DISCONNECTED. If the 1541 * remote peer terminates the connection before we do, 1542 * it is possible for us to transition directly from 1543 * CONNECTED to DISCONNECTED. 1544 */ 1545 if (new_state == DAPLKA_EP_STATE_DISCONNECTING || 1546 new_state == DAPLKA_EP_STATE_DISCONNECTED) { 1547 valid = B_TRUE; 1548 } 1549 break; 1550 case DAPLKA_EP_STATE_DISCONNECTING: 1551 /* 1552 * we transition to this state if the client calls 1553 * daplka_ep_disconnect. 1554 */ 1555 if (new_state == DAPLKA_EP_STATE_DISCONNECTED) { 1556 valid = B_TRUE; 1557 } 1558 break; 1559 case DAPLKA_EP_STATE_ABORTING: 1560 /* 1561 * we transition to this state if the active side 1562 * EP timer has expired. this is only a transient 1563 * state that is set during timer processing. when 1564 * timer processing completes, ep_state will become 1565 * DISCONNECTED. 1566 */ 1567 if (new_state == DAPLKA_EP_STATE_DISCONNECTED) { 1568 valid = B_TRUE; 1569 } 1570 break; 1571 case DAPLKA_EP_STATE_DISCONNECTED: 1572 /* 1573 * we transition to this state if we get a closed 1574 * or event_failure CM callback. an expired timer 1575 * can also cause us to be in this state. this 1576 * is the only state in which we permit the 1577 * ep_reinit operation. 1578 */ 1579 if (new_state == DAPLKA_EP_STATE_CLOSED) { 1580 valid = B_TRUE; 1581 } 1582 break; 1583 default: 1584 break; 1585 } 1586 1587 if (!valid) { 1588 DERR("ep_transition: invalid state change %d -> %d\n", 1589 old_state, new_state); 1590 } 1591 return (valid); 1592 } 1593 1594 /* 1595 * first check if the transition is valid. then set ep_state 1596 * to new_state and wake up all waiters. 1597 */ 1598 static void 1599 daplka_ep_set_state(daplka_ep_resource_t *ep_rp, uint32_t old_state, 1600 uint32_t new_state) 1601 { 1602 boolean_t valid; 1603 1604 ASSERT(new_state != DAPLKA_EP_STATE_TRANSITIONING); 1605 1606 valid = daplka_ep_transition_is_valid(old_state, new_state); 1607 mutex_enter(&ep_rp->ep_lock); 1608 if (ep_rp->ep_state != DAPLKA_EP_STATE_FREED) { 1609 if (valid) { 1610 ep_rp->ep_state = new_state; 1611 } else { 1612 /* 1613 * this case is impossible. 1614 * we have a serious problem if we get here. 1615 * instead of panicing, we reset the state to 1616 * old_state. doing this would at least prevent 1617 * threads from hanging due to ep_state being 1618 * stuck in TRANSITIONING. 1619 */ 1620 ep_rp->ep_state = old_state; 1621 ASSERT(B_FALSE); 1622 } 1623 } 1624 cv_broadcast(&ep_rp->ep_cv); 1625 mutex_exit(&ep_rp->ep_lock); 1626 } 1627 1628 /* 1629 * modifies RC channel attributes. 1630 * currently, only the rdma_in and rdma_out attributes may 1631 * be modified. the channel must be in quiescent state when 1632 * this function is called. 1633 */ 1634 /* ARGSUSED */ 1635 static int 1636 daplka_ep_modify(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 1637 cred_t *cred, int *rvalp) 1638 { 1639 daplka_ep_resource_t *ep_rp = NULL; 1640 ibt_cep_modify_flags_t good_flags; 1641 ibt_rc_chan_modify_attr_t rcm_attr; 1642 ibt_hca_attr_t *hca_attrp; 1643 dapl_ep_modify_t args; 1644 ibt_status_t status; 1645 uint32_t old_state, new_state; 1646 int retval = 0; 1647 1648 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_modify_t), 1649 mode); 1650 if (retval != 0) { 1651 DERR("ep_modify: copyin error %d\n", retval); 1652 return (EFAULT); 1653 } 1654 ep_rp = (daplka_ep_resource_t *) 1655 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epm_hkey); 1656 if (ep_rp == NULL) { 1657 DERR("ep_modify: cannot find ep resource\n"); 1658 return (EINVAL); 1659 } 1660 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP); 1661 new_state = old_state = daplka_ep_get_state(ep_rp); 1662 1663 if (old_state != DAPLKA_EP_STATE_CLOSED && 1664 old_state != DAPLKA_EP_STATE_DISCONNECTED) { 1665 DERR("ep_modify: invalid state %d\n", old_state); 1666 retval = EINVAL; 1667 goto cleanup; 1668 } 1669 1670 good_flags = IBT_CEP_SET_RDMARA_OUT | IBT_CEP_SET_RDMARA_IN; 1671 if ((args.epm_flags & ~good_flags) != 0) { 1672 DERR("ep_modify: invalid flags 0x%x\n", args.epm_flags); 1673 retval = EINVAL; 1674 goto cleanup; 1675 } 1676 1677 hca_attrp = &ia_rp->ia_hca->hca_attr; 1678 1679 bzero(&rcm_attr, sizeof (ibt_rc_chan_modify_attr_t)); 1680 if ((args.epm_flags & IBT_CEP_SET_RDMARA_OUT) != 0) { 1681 if (args.epm_rdma_ra_out > hca_attrp->hca_max_rdma_out_chan) { 1682 DERR("ep_modify: invalid epm_rdma_ra_out %d\n", 1683 args.epm_rdma_ra_out); 1684 retval = EINVAL; 1685 goto cleanup; 1686 } 1687 rcm_attr.rc_rdma_ra_out = args.epm_rdma_ra_out; 1688 } 1689 if ((args.epm_flags & IBT_CEP_SET_RDMARA_IN) != 0) { 1690 if (args.epm_rdma_ra_in > hca_attrp->hca_max_rdma_in_chan) { 1691 DERR("ep_modify: epm_rdma_ra_in %d\n", 1692 args.epm_rdma_ra_in); 1693 retval = EINVAL; 1694 goto cleanup; 1695 } 1696 rcm_attr.rc_rdma_ra_in = args.epm_rdma_ra_in; 1697 } 1698 status = ibt_modify_rc_channel(ep_rp->ep_chan_hdl, args.epm_flags, 1699 &rcm_attr, NULL); 1700 if (status != IBT_SUCCESS) { 1701 DERR("ep_modify: modify_rc_channel returned %d\n", status); 1702 *rvalp = (int)status; 1703 retval = 0; 1704 goto cleanup; 1705 } 1706 1707 /* 1708 * ep_modify does not change ep_state 1709 */ 1710 cleanup:; 1711 daplka_ep_set_state(ep_rp, old_state, new_state); 1712 DAPLKA_RS_UNREF(ep_rp); 1713 return (retval); 1714 } 1715 1716 /* 1717 * Frees a EP resource. 1718 * a EP may only be freed when it is in the CLOSED or 1719 * DISCONNECTED state. 1720 */ 1721 /* ARGSUSED */ 1722 static int 1723 daplka_ep_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 1724 cred_t *cred, int *rvalp) 1725 { 1726 daplka_ep_resource_t *ep_rp = NULL; 1727 dapl_ep_free_t args; 1728 uint32_t old_state, new_state; 1729 int retval; 1730 1731 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_free_t), mode); 1732 if (retval != 0) { 1733 DERR("ep_free: copyin error %d\n", retval); 1734 return (EFAULT); 1735 } 1736 ep_rp = (daplka_ep_resource_t *) 1737 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epf_hkey); 1738 if (ep_rp == NULL) { 1739 DERR("ep_free: cannot find ep resource\n"); 1740 return (EINVAL); 1741 } 1742 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP); 1743 new_state = old_state = daplka_ep_get_state(ep_rp); 1744 1745 /* 1746 * ep cannot be freed if it is in an invalid state. 1747 */ 1748 if (old_state != DAPLKA_EP_STATE_CLOSED && 1749 old_state != DAPLKA_EP_STATE_DISCONNECTED) { 1750 DERR("ep_free: invalid state %d\n", old_state); 1751 retval = EINVAL; 1752 goto cleanup; 1753 } 1754 ep_rp = NULL; 1755 retval = daplka_hash_remove(&ia_rp->ia_ep_htbl, 1756 args.epf_hkey, (void **)&ep_rp); 1757 if (retval != 0 || ep_rp == NULL) { 1758 /* 1759 * this is only possible if we have two threads 1760 * calling ep_free in parallel. 1761 */ 1762 DERR("ep_free: cannot find ep resource\n"); 1763 goto cleanup; 1764 } 1765 /* there should not be any outstanding timers */ 1766 ASSERT(ep_rp->ep_timer_hkey == 0); 1767 1768 new_state = DAPLKA_EP_STATE_FREED; 1769 daplka_ep_set_state(ep_rp, old_state, new_state); 1770 1771 /* remove reference obtained by lookup */ 1772 DAPLKA_RS_UNREF(ep_rp); 1773 1774 /* UNREF calls the actual free function when refcnt is zero */ 1775 DAPLKA_RS_UNREF(ep_rp); 1776 return (0); 1777 1778 cleanup:; 1779 daplka_ep_set_state(ep_rp, old_state, new_state); 1780 1781 /* remove reference obtained by lookup */ 1782 DAPLKA_RS_UNREF(ep_rp); 1783 return (retval); 1784 } 1785 1786 /* 1787 * The following routines supports the timeout feature of ep_connect. 1788 * Refer to the description of ep_connect for details. 1789 */ 1790 1791 /* 1792 * this is the timer processing thread. 1793 */ 1794 static void 1795 daplka_timer_thread(void *arg) 1796 { 1797 daplka_timer_info_t *timerp = (daplka_timer_info_t *)arg; 1798 daplka_ep_resource_t *ep_rp; 1799 daplka_evd_event_t *disc_ev = NULL; 1800 ibt_status_t status; 1801 int old_state, new_state; 1802 1803 ep_rp = timerp->ti_ep_res; 1804 ASSERT(ep_rp != NULL); 1805 ASSERT(timerp->ti_tmo_id != 0); 1806 timerp->ti_tmo_id = 0; 1807 1808 new_state = old_state = daplka_ep_get_state(ep_rp); 1809 if (old_state != DAPLKA_EP_STATE_CONNECTING) { 1810 /* unblock hash_ep_free */ 1811 mutex_enter(&ep_rp->ep_lock); 1812 ASSERT(ep_rp->ep_timer_hkey != 0); 1813 ep_rp->ep_timer_hkey = 0; 1814 cv_broadcast(&ep_rp->ep_cv); 1815 mutex_exit(&ep_rp->ep_lock); 1816 1817 /* reset state to original state */ 1818 daplka_ep_set_state(ep_rp, old_state, new_state); 1819 1820 /* this function will also unref ep_rp */ 1821 daplka_timer_info_free(timerp); 1822 return; 1823 } 1824 1825 ASSERT(ep_rp->ep_timer_hkey != 0); 1826 ep_rp->ep_timer_hkey = 0; 1827 1828 /* 1829 * we cannot keep ep_state in TRANSITIONING if we call 1830 * ibt_close_rc_channel in blocking mode. this would cause 1831 * a deadlock because the cm callbacks will be blocked and 1832 * will not be able to wake us up. 1833 */ 1834 new_state = DAPLKA_EP_STATE_ABORTING; 1835 daplka_ep_set_state(ep_rp, old_state, new_state); 1836 1837 /* 1838 * when we return from close_rc_channel, all callbacks should have 1839 * completed. we can also be certain that these callbacks did not 1840 * enqueue any events to conn_evd. 1841 */ 1842 status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING, 1843 NULL, 0, NULL, NULL, NULL); 1844 if (status != IBT_SUCCESS) { 1845 DERR("timer_thread: ibt_close_rc_channel returned %d\n", 1846 status); 1847 } 1848 old_state = daplka_ep_get_state(ep_rp); 1849 1850 /* 1851 * this is the only thread that can transition ep_state out 1852 * of ABORTING. all other ep operations would fail when 1853 * ep_state is in ABORTING. 1854 */ 1855 ASSERT(old_state == DAPLKA_EP_STATE_ABORTING); 1856 1857 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_SLEEP); 1858 ASSERT(disc_ev != NULL); 1859 1860 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT; 1861 disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie; 1862 disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE; 1863 disc_ev->ee_cmev.ec_cm_psep_cookie = 0; 1864 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL; 1865 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0; 1866 1867 D2("timer_thread: enqueue event(%p) evdp(%p)\n", 1868 disc_ev, ep_rp->ep_conn_evd); 1869 1870 new_state = DAPLKA_EP_STATE_DISCONNECTED; 1871 daplka_ep_set_state(ep_rp, old_state, new_state); 1872 1873 daplka_evd_wakeup(ep_rp->ep_conn_evd, 1874 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev); 1875 1876 /* this function will also unref ep_rp */ 1877 daplka_timer_info_free(timerp); 1878 } 1879 1880 /* 1881 * dispatches a thread to continue with timer processing. 1882 */ 1883 static void 1884 daplka_timer_dispatch(void *arg) 1885 { 1886 /* 1887 * keep rescheduling this function until 1888 * taskq_dispatch succeeds. 1889 */ 1890 if (taskq_dispatch(daplka_taskq, 1891 daplka_timer_thread, arg, TQ_NOSLEEP) == 0) { 1892 DERR("timer_dispatch: taskq_dispatch failed, retrying...\n"); 1893 (void) timeout(daplka_timer_dispatch, arg, 10); 1894 } 1895 } 1896 1897 /* 1898 * this function is called by the kernel's callout thread. 1899 * we first attempt to remove the timer object from the 1900 * global timer table. if it is found, we dispatch a thread 1901 * to continue processing the timer object. if it is not 1902 * found, that means the timer has been cancelled by someone 1903 * else. 1904 */ 1905 static void 1906 daplka_timer_handler(void *arg) 1907 { 1908 uint64_t timer_hkey = (uintptr_t)arg; 1909 daplka_timer_info_t *timerp = NULL; 1910 1911 D2("timer_handler: timer_hkey 0x%llx\n", (longlong_t)timer_hkey); 1912 1913 (void) daplka_hash_remove(&daplka_timer_info_htbl, 1914 timer_hkey, (void **)&timerp); 1915 if (timerp == NULL) { 1916 D2("timer_handler: timer already cancelled\n"); 1917 return; 1918 } 1919 daplka_timer_dispatch((void *)timerp); 1920 } 1921 1922 /* 1923 * allocates a timer_info object. 1924 * a reference to a EP is held by this object. this ensures 1925 * that the EP stays valid when a timer is outstanding. 1926 */ 1927 static daplka_timer_info_t * 1928 daplka_timer_info_alloc(daplka_ep_resource_t *ep_rp) 1929 { 1930 daplka_timer_info_t *timerp; 1931 1932 timerp = kmem_zalloc(sizeof (*timerp), daplka_km_flags); 1933 if (timerp == NULL) { 1934 DERR("timer_info_alloc: cannot allocate timer info\n"); 1935 return (NULL); 1936 } 1937 timerp->ti_ep_res = ep_rp; 1938 timerp->ti_tmo_id = 0; 1939 1940 return (timerp); 1941 } 1942 1943 /* 1944 * Frees the timer_info object. 1945 * we release the EP reference before freeing the object. 1946 */ 1947 static void 1948 daplka_timer_info_free(daplka_timer_info_t *timerp) 1949 { 1950 ASSERT(timerp->ti_ep_res != NULL); 1951 DAPLKA_RS_UNREF(timerp->ti_ep_res); 1952 timerp->ti_ep_res = NULL; 1953 ASSERT(timerp->ti_tmo_id == 0); 1954 kmem_free(timerp, sizeof (*timerp)); 1955 } 1956 1957 /* 1958 * cancels the timer set by ep_connect. 1959 * returns -1 if timer handling is in progress 1960 * and 0 otherwise. 1961 */ 1962 static int 1963 daplka_cancel_timer(daplka_ep_resource_t *ep_rp) 1964 { 1965 /* 1966 * this function can only be called when ep_state 1967 * is frozen. 1968 */ 1969 ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING); 1970 if (ep_rp->ep_timer_hkey != 0) { 1971 daplka_timer_info_t *timerp = NULL; 1972 1973 (void) daplka_hash_remove(&daplka_timer_info_htbl, 1974 ep_rp->ep_timer_hkey, (void **)&timerp); 1975 if (timerp == NULL) { 1976 /* 1977 * this is possible if the timer_handler has 1978 * removed the timerp but the taskq thread has 1979 * not transitioned the ep_state to DISCONNECTED. 1980 * we need to reset the ep_state to allow the 1981 * taskq thread to continue with its work. the 1982 * taskq thread will set the ep_timer_hkey to 0 1983 * so we don't have to do it here. 1984 */ 1985 DERR("cancel_timer: timer is being processed\n"); 1986 return (-1); 1987 } 1988 /* 1989 * we got the timer object. if the handler fires at 1990 * this point, it will not be able to find the object 1991 * and will return immediately. normally, ti_tmo_id gets 1992 * cleared when the handler fires. 1993 */ 1994 ASSERT(timerp->ti_tmo_id != 0); 1995 1996 /* 1997 * note that untimeout can possibly call the handler. 1998 * we are safe because the handler will be a no-op. 1999 */ 2000 (void) untimeout(timerp->ti_tmo_id); 2001 timerp->ti_tmo_id = 0; 2002 daplka_timer_info_free(timerp); 2003 ep_rp->ep_timer_hkey = 0; 2004 } 2005 return (0); 2006 } 2007 2008 /* 2009 * this function is called by daplka_hash_destroy for 2010 * freeing timer_info objects 2011 */ 2012 static void 2013 daplka_hash_timer_free(void *obj) 2014 { 2015 daplka_timer_info_free((daplka_timer_info_t *)obj); 2016 } 2017 2018 /* ARGSUSED */ 2019 static uint16_t 2020 daplka_hellomsg_cksum(DAPL_PRIVATE *dp) 2021 { 2022 uint8_t *bp; 2023 int i; 2024 uint16_t cksum = 0; 2025 2026 bp = (uint8_t *)dp; 2027 for (i = 0; i < sizeof (DAPL_PRIVATE); i++) { 2028 cksum += bp[i]; 2029 } 2030 return (cksum); 2031 } 2032 2033 /* 2034 * ep_connect is called by the client to initiate a connection to a 2035 * remote service point. It is a non-blocking call. If a non-zero 2036 * timeout is specified by the client, a timer will be set just before 2037 * returning from ep_connect. Upon a successful return from ep_connect, 2038 * the client will call evd_wait to wait for the connection to complete. 2039 * If the connection is rejected or has failed due to an error, the 2040 * client will be notified with an event containing the appropriate error 2041 * code. If the connection is accepted, the client will be notified with 2042 * the CONN_ESTABLISHED event. If the timer expires before either of the 2043 * above events (error or established), a TIMED_OUT event will be delivered 2044 * to the client. 2045 * 2046 * the complicated part of the timer logic is the handling of race 2047 * conditions with CM callbacks. we need to ensure that either the CM or 2048 * the timer thread gets to deliver an event, but not both. when the 2049 * CM callback is about to deliver an event, it always tries to cancel 2050 * the outstanding timer. if cancel_timer indicates a that the timer is 2051 * already being processed, the CM callback will simply return without 2052 * delivering an event. when the timer thread executes, it tries to check 2053 * if the EP is still in CONNECTING state (timers only work on the active 2054 * side). if the EP is not in this state, the timer thread will return 2055 * without delivering an event. 2056 */ 2057 /* ARGSUSED */ 2058 static int 2059 daplka_ep_connect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 2060 cred_t *cred, int *rvalp) 2061 { 2062 daplka_ep_resource_t *ep_rp = NULL; 2063 dapl_ep_connect_t args; 2064 daplka_timer_info_t *timerp = NULL; 2065 uint32_t old_state, new_state; 2066 boolean_t timer_inserted = B_FALSE; 2067 uint64_t timer_hkey = 0; 2068 ibt_path_info_t path_info; 2069 ibt_path_attr_t path_attr; 2070 ibt_hca_attr_t *hca_attrp; 2071 ibt_chan_open_args_t chan_args; 2072 ibt_status_t status = IBT_SUCCESS; 2073 uint8_t num_paths; 2074 void *priv_data; 2075 DAPL_PRIVATE *dp; 2076 int retval = 0; 2077 ib_gid_t *sgid; 2078 ib_gid_t *dgid; 2079 uint64_t dgid_ored; 2080 ibt_ar_t ar_query_s; 2081 ibt_ar_t ar_result_s; 2082 ibt_path_flags_t pathflags; 2083 2084 D3("ep_connect: enter\n"); 2085 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_connect_t), 2086 mode); 2087 if (retval != 0) { 2088 DERR("ep_connect: copyin error %d\n", retval); 2089 return (EFAULT); 2090 } 2091 ep_rp = (daplka_ep_resource_t *) 2092 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epc_hkey); 2093 if (ep_rp == NULL) { 2094 DERR("ep_connect: cannot find ep resource\n"); 2095 return (EINVAL); 2096 } 2097 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP); 2098 2099 new_state = old_state = daplka_ep_get_state(ep_rp); 2100 if (old_state != DAPLKA_EP_STATE_CLOSED) { 2101 DERR("ep_connect: invalid state %d\n", old_state); 2102 retval = EINVAL; 2103 goto cleanup; 2104 } 2105 if (args.epc_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) { 2106 DERR("ep_connect: private data len (%d) exceeded " 2107 "max size %d\n", args.epc_priv_sz, 2108 DAPL_MAX_PRIVATE_DATA_SIZE); 2109 retval = EINVAL; 2110 goto cleanup; 2111 } 2112 2113 /* 2114 * check for remote ipaddress to dgid resolution needs ATS 2115 */ 2116 dgid = &args.epc_dgid; 2117 dgid_ored = dgid->gid_guid | dgid->gid_prefix; 2118 #if defined(DAPLKA_DEBUG_FORCE_ATS) 2119 dgid_ored = 0ULL; 2120 #endif /* DAPLKA_DEBUG_FORCE_ATS */ 2121 /* check for unidentified dgid */ 2122 if (dgid_ored == 0ULL) { 2123 /* 2124 * setup for ibt_query_ar() 2125 */ 2126 sgid = &ia_rp->ia_hca_sgid; 2127 ar_query_s.ar_gid.gid_guid = 0ULL; 2128 ar_query_s.ar_gid.gid_prefix = 0ULL; 2129 ar_query_s.ar_pkey = 0; 2130 bcopy(args.epc_raddr_sadata.iad_sadata, 2131 ar_query_s.ar_data, DAPL_ATS_NBYTES); 2132 #define UR(b) ar_query_s.ar_data[(b)] 2133 D3("daplka_ep_connect: SA[8] %d.%d.%d.%d\n", 2134 UR(8), UR(9), UR(10), UR(11)); 2135 D3("daplka_ep_connect: SA[12] %d.%d.%d.%d\n", 2136 UR(12), UR(13), UR(14), UR(15)); 2137 status = ibt_query_ar(sgid, &ar_query_s, &ar_result_s); 2138 if (status != IBT_SUCCESS) { 2139 DERR("ep_connect: ibt_query_ar returned %d\n", status); 2140 *rvalp = (int)status; 2141 retval = 0; 2142 goto cleanup; 2143 } 2144 /* 2145 * dgid identified from SA record 2146 */ 2147 dgid = &ar_result_s.ar_gid; 2148 D2("daplka_ep_connect: ATS dgid=%llx:%llx\n", 2149 (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid); 2150 } 2151 2152 bzero(&path_info, sizeof (ibt_path_info_t)); 2153 bzero(&path_attr, sizeof (ibt_path_attr_t)); 2154 bzero(&chan_args, sizeof (ibt_chan_open_args_t)); 2155 2156 path_attr.pa_dgids = dgid; 2157 path_attr.pa_num_dgids = 1; 2158 /* 2159 * don't set sid in path_attr saves 1 SA query 2160 * Also makes server side not to write the service record 2161 */ 2162 path_attr.pa_sgid = ia_rp->ia_hca_sgid; 2163 path_attr.pa_pkey = ia_rp->ia_port_pkey; 2164 2165 /* save the connection ep - struct copy */ 2166 ep_rp->ep_sgid = ia_rp->ia_hca_sgid; 2167 ep_rp->ep_dgid = *dgid; 2168 2169 num_paths = 0; 2170 pathflags = IBT_PATH_PKEY; 2171 /* enable APM on remote port but not on loopback case */ 2172 if (daplka_apm && ((dgid->gid_prefix != path_attr.pa_sgid.gid_prefix) || 2173 (dgid->gid_guid != path_attr.pa_sgid.gid_guid))) { 2174 pathflags |= IBT_PATH_APM; 2175 } 2176 status = ibt_get_paths(daplka_dev->daplka_clnt_hdl, 2177 pathflags, &path_attr, 1, &path_info, &num_paths); 2178 2179 if (status != IBT_SUCCESS && status != IBT_INSUFF_DATA) { 2180 DERR("ep_connect: ibt_get_paths returned %d paths %d\n", 2181 status, num_paths); 2182 *rvalp = (int)status; 2183 retval = 0; 2184 goto cleanup; 2185 } 2186 /* fill in the sid directly to path_info */ 2187 path_info.pi_sid = args.epc_sid; 2188 hca_attrp = &ia_rp->ia_hca->hca_attr; 2189 2190 /* fill in open channel args */ 2191 chan_args.oc_path = &path_info; 2192 chan_args.oc_cm_handler = daplka_cm_rc_handler; 2193 chan_args.oc_cm_clnt_private = (void *)ep_rp; 2194 chan_args.oc_rdma_ra_out = hca_attrp->hca_max_rdma_out_chan; 2195 chan_args.oc_rdma_ra_in = hca_attrp->hca_max_rdma_in_chan; 2196 chan_args.oc_path_retry_cnt = 7; /* 3-bit field */ 2197 chan_args.oc_path_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY; 2198 2199 ASSERT(args.epc_priv_sz > 0); 2200 priv_data = (void *)args.epc_priv; 2201 2202 chan_args.oc_priv_data_len = args.epc_priv_sz; 2203 chan_args.oc_priv_data = priv_data; 2204 2205 /* 2206 * calculate checksum value of hello message and 2207 * put hello message in networking byte order 2208 */ 2209 dp = (DAPL_PRIVATE *)priv_data; 2210 dp->hello_msg.hi_port = htons(dp->hello_msg.hi_port); 2211 dp->hello_msg.hi_checksum = 0; 2212 dp->hello_msg.hi_checksum = htons(daplka_hellomsg_cksum(dp)); 2213 2214 if (args.epc_timeout > 0) { 2215 /* 2216 * increment refcnt before passing reference to 2217 * timer_info_alloc. 2218 */ 2219 DAPLKA_RS_REF(ep_rp); 2220 timerp = daplka_timer_info_alloc(ep_rp); 2221 if (timerp == NULL) { 2222 DERR("ep_connect: cannot allocate timer\n"); 2223 /* 2224 * we need to remove the reference if 2225 * allocation failed. 2226 */ 2227 DAPLKA_RS_UNREF(ep_rp); 2228 retval = ENOMEM; 2229 goto cleanup; 2230 } 2231 /* 2232 * We generate our own hkeys so that timer_hkey can fit 2233 * into a pointer and passed as an arg to timeout() 2234 */ 2235 timer_hkey = (uint64_t)daplka_timer_hkey_gen(); 2236 retval = daplka_hash_insert(&daplka_timer_info_htbl, 2237 &timer_hkey, (void *)timerp); 2238 if (retval != 0) { 2239 DERR("ep_connect: cannot insert timer info\n"); 2240 goto cleanup; 2241 } 2242 ASSERT(ep_rp->ep_timer_hkey == 0); 2243 ep_rp->ep_timer_hkey = timer_hkey; 2244 timer_inserted = B_TRUE; 2245 D2("ep_connect: timer_hkey = 0x%llx\n", 2246 (longlong_t)timer_hkey); 2247 } 2248 status = ibt_open_rc_channel(ep_rp->ep_chan_hdl, IBT_OCHAN_NO_FLAGS, 2249 IBT_NONBLOCKING, &chan_args, NULL); 2250 2251 if (status != IBT_SUCCESS) { 2252 DERR("ep_connect: ibt_open_rc_channel returned %d\n", status); 2253 *rvalp = (int)status; 2254 retval = 0; 2255 goto cleanup; 2256 } 2257 /* 2258 * if a cm callback gets called at this point, it'll have to wait until 2259 * ep_state becomes connecting (or some other state if another thread 2260 * manages to get ahead of the callback). this guarantees that the 2261 * callback will not touch the timer until it gets set. 2262 */ 2263 if (timerp != NULL) { 2264 clock_t tmo; 2265 2266 tmo = drv_usectohz((clock_t)args.epc_timeout); 2267 /* 2268 * We generate our own 32 bit timer_hkey so that it can fit 2269 * into a pointer 2270 */ 2271 ASSERT(timer_hkey != 0); 2272 timerp->ti_tmo_id = timeout(daplka_timer_handler, 2273 (void *)(uintptr_t)timer_hkey, tmo); 2274 } 2275 new_state = DAPLKA_EP_STATE_CONNECTING; 2276 2277 cleanup:; 2278 if (timerp != NULL && (retval != 0 || status != IBT_SUCCESS)) { 2279 /* 2280 * if ibt_open_rc_channel failed, the timerp must still 2281 * be in daplka_timer_info_htbl because neither the cm 2282 * callback nor the timer_handler will be called. 2283 */ 2284 if (timer_inserted) { 2285 daplka_timer_info_t *new_timerp = NULL; 2286 2287 ASSERT(timer_hkey != 0); 2288 (void) daplka_hash_remove(&daplka_timer_info_htbl, 2289 timer_hkey, (void **)&new_timerp); 2290 ASSERT(new_timerp == timerp); 2291 ep_rp->ep_timer_hkey = 0; 2292 } 2293 daplka_timer_info_free(timerp); 2294 } 2295 daplka_ep_set_state(ep_rp, old_state, new_state); 2296 DAPLKA_RS_UNREF(ep_rp); 2297 D3("ep_connect: exit\n"); 2298 return (retval); 2299 } 2300 2301 /* 2302 * ep_disconnect closes a connection with a remote peer. 2303 * if a connection has not been established, ep_disconnect 2304 * will instead flush all recv bufs posted to this channel. 2305 * if the EP state is CONNECTED, CONNECTING or ACCEPTING upon 2306 * entry to ep_disconnect, the EP state will transition to 2307 * DISCONNECTING upon exit. the CM callbacks triggered by 2308 * ibt_close_rc_channel will cause EP state to become 2309 * DISCONNECTED. This function is a no-op if EP state is 2310 * DISCONNECTED. 2311 */ 2312 /* ARGSUSED */ 2313 static int 2314 daplka_ep_disconnect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 2315 cred_t *cred, int *rvalp) 2316 { 2317 daplka_ep_resource_t *ep_rp = NULL; 2318 dapl_ep_disconnect_t args; 2319 ibt_status_t status; 2320 uint32_t old_state, new_state; 2321 int retval = 0; 2322 2323 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_disconnect_t), 2324 mode); 2325 if (retval != 0) { 2326 DERR("ep_disconnect: copyin error %d\n", retval); 2327 return (EFAULT); 2328 } 2329 ep_rp = (daplka_ep_resource_t *) 2330 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epd_hkey); 2331 if (ep_rp == NULL) { 2332 DERR("ep_disconnect: cannot find ep resource\n"); 2333 return (EINVAL); 2334 } 2335 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP); 2336 2337 new_state = old_state = daplka_ep_get_state(ep_rp); 2338 if (old_state != DAPLKA_EP_STATE_CONNECTED && 2339 old_state != DAPLKA_EP_STATE_CONNECTING && 2340 old_state != DAPLKA_EP_STATE_ACCEPTING && 2341 old_state != DAPLKA_EP_STATE_DISCONNECTED && 2342 old_state != DAPLKA_EP_STATE_DISCONNECTING && 2343 old_state != DAPLKA_EP_STATE_CLOSED) { 2344 DERR("ep_disconnect: invalid state %d\n", old_state); 2345 retval = EINVAL; 2346 goto cleanup; 2347 } 2348 2349 if ((old_state == DAPLKA_EP_STATE_DISCONNECTED) || 2350 (old_state == DAPLKA_EP_STATE_DISCONNECTING)) { 2351 D2("ep_disconnect: ep already disconnected\n"); 2352 retval = 0; 2353 /* we leave the state as DISCONNECTED */ 2354 goto cleanup; 2355 } 2356 if (old_state == DAPLKA_EP_STATE_CONNECTING || 2357 old_state == DAPLKA_EP_STATE_ACCEPTING) { 2358 D2("ep_disconnect: aborting, old_state = %d\n", old_state); 2359 } 2360 2361 /* 2362 * according to the udapl spec, ep_disconnect should 2363 * flush the channel if the channel is not CONNECTED. 2364 */ 2365 if (old_state == DAPLKA_EP_STATE_CLOSED) { 2366 status = ibt_flush_channel(ep_rp->ep_chan_hdl); 2367 if (status != IBT_SUCCESS) { 2368 DERR("ep_disconnect: ibt_flush_channel failed %d\n", 2369 status); 2370 *rvalp = (int)status; 2371 } 2372 retval = 0; 2373 /* we leave the state as CLOSED */ 2374 goto cleanup; 2375 } 2376 2377 new_state = DAPLKA_EP_STATE_DISCONNECTING; 2378 daplka_ep_set_state(ep_rp, old_state, new_state); 2379 status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_NONBLOCKING, 2380 NULL, 0, NULL, NULL, NULL); 2381 2382 if (status == IBT_SUCCESS) { 2383 DAPLKA_RS_UNREF(ep_rp); 2384 return (retval); 2385 } else { 2386 DERR("ep_disconnect: ibt_close_rc_channel returned %d\n", 2387 status); 2388 *rvalp = (int)status; 2389 retval = 0; 2390 new_state = old_state; 2391 } 2392 2393 cleanup:; 2394 daplka_ep_set_state(ep_rp, old_state, new_state); 2395 DAPLKA_RS_UNREF(ep_rp); 2396 return (retval); 2397 } 2398 2399 /* 2400 * this function resets the EP to a usable state (ie. from 2401 * DISCONNECTED to CLOSED). this function is best implemented using 2402 * the ibt_recycle_channel interface. until that is available, we will 2403 * instead clone and tear down the existing channel and replace the 2404 * existing channel with the cloned one. 2405 */ 2406 /* ARGSUSED */ 2407 static int 2408 daplka_ep_reinit(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 2409 cred_t *cred, int *rvalp) 2410 { 2411 daplka_ep_resource_t *ep_rp = NULL; 2412 dapl_ep_reinit_t args; 2413 ibt_status_t status; 2414 uint32_t old_state, new_state; 2415 int retval = 0; 2416 2417 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_reinit_t), 2418 mode); 2419 if (retval != 0) { 2420 DERR("reinit: copyin error %d\n", retval); 2421 return (EFAULT); 2422 } 2423 ep_rp = (daplka_ep_resource_t *) 2424 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epri_hkey); 2425 if (ep_rp == NULL) { 2426 DERR("reinit: cannot find ep resource\n"); 2427 return (EINVAL); 2428 } 2429 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP); 2430 new_state = old_state = daplka_ep_get_state(ep_rp); 2431 if ((old_state != DAPLKA_EP_STATE_CLOSED) && 2432 (old_state != DAPLKA_EP_STATE_DISCONNECTED)) { 2433 DERR("reinit: invalid state %d\n", old_state); 2434 retval = EINVAL; 2435 goto cleanup; 2436 } 2437 2438 status = ibt_recycle_rc(ep_rp->ep_chan_hdl, 2439 IBT_CEP_RDMA_RD|IBT_CEP_RDMA_WR, 2440 ia_rp->ia_port_num, NULL, NULL); 2441 if (status != IBT_SUCCESS) { 2442 DERR("reinit: unable to clone channel\n"); 2443 *rvalp = (int)status; 2444 retval = 0; 2445 goto cleanup; 2446 } 2447 new_state = DAPLKA_EP_STATE_CLOSED; 2448 2449 cleanup:; 2450 daplka_ep_set_state(ep_rp, old_state, new_state); 2451 DAPLKA_RS_UNREF(ep_rp); 2452 return (retval); 2453 } 2454 2455 /* 2456 * destroys a EP resource. 2457 * called when refcnt drops to zero. 2458 */ 2459 static int 2460 daplka_ep_destroy(daplka_resource_t *gen_rp) 2461 { 2462 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)gen_rp; 2463 ibt_status_t status; 2464 2465 ASSERT(DAPLKA_RS_REFCNT(ep_rp) == 0); 2466 ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_FREED); 2467 2468 /* 2469 * by the time we get here, we can be sure that 2470 * there is no outstanding timer. 2471 */ 2472 ASSERT(ep_rp->ep_timer_hkey == 0); 2473 2474 D3("ep_destroy: entering, ep_rp 0x%p, rnum %d\n", 2475 ep_rp, DAPLKA_RS_RNUM(ep_rp)); 2476 /* 2477 * free rc channel 2478 */ 2479 if (ep_rp->ep_chan_hdl != NULL) { 2480 mutex_enter(&daplka_dev->daplka_mutex); 2481 ibt_set_chan_private(ep_rp->ep_chan_hdl, NULL); 2482 mutex_exit(&daplka_dev->daplka_mutex); 2483 status = daplka_ibt_free_channel(ep_rp, ep_rp->ep_chan_hdl); 2484 if (status != IBT_SUCCESS) { 2485 DERR("ep_free: ibt_free_channel returned %d\n", 2486 status); 2487 } 2488 ep_rp->ep_chan_hdl = NULL; 2489 D3("ep_destroy: qp freed, rnum %d\n", DAPLKA_RS_RNUM(ep_rp)); 2490 } 2491 /* 2492 * release all references 2493 */ 2494 if (ep_rp->ep_snd_evd != NULL) { 2495 DAPLKA_RS_UNREF(ep_rp->ep_snd_evd); 2496 ep_rp->ep_snd_evd = NULL; 2497 } 2498 if (ep_rp->ep_rcv_evd != NULL) { 2499 DAPLKA_RS_UNREF(ep_rp->ep_rcv_evd); 2500 ep_rp->ep_rcv_evd = NULL; 2501 } 2502 if (ep_rp->ep_conn_evd != NULL) { 2503 DAPLKA_RS_UNREF(ep_rp->ep_conn_evd); 2504 ep_rp->ep_conn_evd = NULL; 2505 } 2506 if (ep_rp->ep_srq_res != NULL) { 2507 DAPLKA_RS_UNREF(ep_rp->ep_srq_res); 2508 ep_rp->ep_srq_res = NULL; 2509 } 2510 if (ep_rp->ep_pd_res != NULL) { 2511 DAPLKA_RS_UNREF(ep_rp->ep_pd_res); 2512 ep_rp->ep_pd_res = NULL; 2513 } 2514 cv_destroy(&ep_rp->ep_cv); 2515 mutex_destroy(&ep_rp->ep_lock); 2516 2517 DAPLKA_RS_FINI(ep_rp); 2518 kmem_free(ep_rp, sizeof (daplka_ep_resource_t)); 2519 D3("ep_destroy: exiting, ep_rp 0x%p\n", ep_rp); 2520 return (0); 2521 } 2522 2523 /* 2524 * this function is called by daplka_hash_destroy for 2525 * freeing EP resource objects 2526 */ 2527 static void 2528 daplka_hash_ep_free(void *obj) 2529 { 2530 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)obj; 2531 ibt_status_t status; 2532 uint32_t old_state, new_state; 2533 int retval; 2534 2535 old_state = daplka_ep_get_state(ep_rp); 2536 retval = daplka_cancel_timer(ep_rp); 2537 new_state = DAPLKA_EP_STATE_FREED; 2538 daplka_ep_set_state(ep_rp, old_state, new_state); 2539 2540 if (retval != 0) { 2541 D2("hash_ep_free: ep_rp 0x%p " 2542 "timer is still being processed\n", ep_rp); 2543 mutex_enter(&ep_rp->ep_lock); 2544 if (ep_rp->ep_timer_hkey != 0) { 2545 D2("hash_ep_free: ep_rp 0x%p " 2546 "waiting for timer_hkey to be 0\n", ep_rp); 2547 cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock); 2548 } 2549 mutex_exit(&ep_rp->ep_lock); 2550 } 2551 2552 /* call ibt_close_rc_channel regardless of what state we are in */ 2553 status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING, 2554 NULL, 0, NULL, NULL, NULL); 2555 if (status != IBT_SUCCESS) { 2556 if (old_state == DAPLKA_EP_STATE_CONNECTED || 2557 old_state == DAPLKA_EP_STATE_CONNECTING || 2558 old_state == DAPLKA_EP_STATE_ACCEPTING) { 2559 DERR("hash_ep_free: ep_rp 0x%p state %d " 2560 "unexpected error %d from close_rc_channel\n", 2561 ep_rp, old_state, status); 2562 } 2563 D2("hash_ep_free: close_rc_channel, status %d\n", status); 2564 } 2565 2566 DAPLKA_RS_UNREF(ep_rp); 2567 } 2568 2569 /* 2570 * creates a EVD resource. 2571 * a EVD is used by the client to wait for events from one 2572 * or more sources. 2573 */ 2574 /* ARGSUSED */ 2575 static int 2576 daplka_evd_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 2577 cred_t *cred, int *rvalp) 2578 { 2579 daplka_evd_resource_t *evd_rp = NULL; 2580 daplka_async_evd_hkey_t *async_evd; 2581 ibt_hca_attr_t *hca_attrp; 2582 ibt_cq_attr_t cq_attr; 2583 dapl_evd_create_t args; 2584 uint64_t evd_hkey = 0; 2585 boolean_t inserted = B_FALSE; 2586 int retval = 0; 2587 ibt_status_t status; 2588 2589 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_create_t), 2590 mode); 2591 if (retval != 0) { 2592 DERR("evd_create: copyin error %d", retval); 2593 return (EFAULT); 2594 } 2595 if ((args.evd_flags & 2596 ~(DAT_EVD_DEFAULT_FLAG | DAT_EVD_SOFTWARE_FLAG)) != 0) { 2597 DERR("evd_create: invalid flags 0x%x\n", args.evd_flags); 2598 return (EINVAL); 2599 } 2600 2601 evd_rp = kmem_zalloc(sizeof (daplka_evd_resource_t), daplka_km_flags); 2602 DAPLKA_RS_INIT(evd_rp, DAPL_TYPE_EVD, 2603 DAPLKA_RS_RNUM(ia_rp), daplka_evd_destroy); 2604 2605 mutex_init(&evd_rp->evd_lock, NULL, MUTEX_DRIVER, NULL); 2606 cv_init(&evd_rp->evd_cv, NULL, CV_DRIVER, NULL); 2607 evd_rp->evd_hca = ia_rp->ia_hca; 2608 evd_rp->evd_flags = args.evd_flags; 2609 evd_rp->evd_hca_hdl = ia_rp->ia_hca_hdl; 2610 evd_rp->evd_cookie = args.evd_cookie; 2611 evd_rp->evd_cno_res = NULL; 2612 evd_rp->evd_cr_events.eel_event_type = DAPLKA_EVD_CM_EVENTS; 2613 evd_rp->evd_conn_events.eel_event_type = DAPLKA_EVD_CM_EVENTS; 2614 evd_rp->evd_async_events.eel_event_type = DAPLKA_EVD_ASYNC_EVENTS; 2615 2616 /* 2617 * if the client specified a non-zero cno_hkey, we 2618 * lookup the cno and save the reference for later use. 2619 */ 2620 if (args.evd_cno_hkey > 0) { 2621 daplka_cno_resource_t *cno_rp; 2622 2623 cno_rp = (daplka_cno_resource_t *) 2624 daplka_hash_lookup(&ia_rp->ia_cno_htbl, 2625 args.evd_cno_hkey); 2626 if (cno_rp == NULL) { 2627 DERR("evd_create: cannot find cno resource\n"); 2628 goto cleanup; 2629 } 2630 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO); 2631 evd_rp->evd_cno_res = cno_rp; 2632 } 2633 hca_attrp = &ia_rp->ia_hca->hca_attr; 2634 if ((evd_rp->evd_flags & 2635 (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) != 0) { 2636 if (args.evd_cq_size > hca_attrp->hca_max_cq_sz) { 2637 DERR("evd_create: invalid cq size %d", 2638 args.evd_cq_size); 2639 retval = EINVAL; 2640 goto cleanup; 2641 } 2642 cq_attr.cq_size = args.evd_cq_size; 2643 cq_attr.cq_sched = NULL; 2644 cq_attr.cq_flags = IBT_CQ_USER_MAP; 2645 2646 status = daplka_ibt_alloc_cq(evd_rp, evd_rp->evd_hca_hdl, 2647 &cq_attr, &evd_rp->evd_cq_hdl, &evd_rp->evd_cq_real_size); 2648 2649 if (status != IBT_SUCCESS) { 2650 DERR("evd_create: ibt_alloc_cq returned %d", status); 2651 *rvalp = (int)status; 2652 retval = 0; 2653 goto cleanup; 2654 } 2655 2656 /* 2657 * store evd ptr with cq_hdl 2658 * mutex is only needed for race of "destroy" and "async" 2659 */ 2660 mutex_enter(&daplka_dev->daplka_mutex); 2661 ibt_set_cq_private(evd_rp->evd_cq_hdl, (void *)evd_rp); 2662 mutex_exit(&daplka_dev->daplka_mutex); 2663 2664 /* Get HCA-specific data_out info */ 2665 status = ibt_ci_data_out(evd_rp->evd_hca_hdl, 2666 IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl, 2667 &args.evd_cq_data_out, sizeof (args.evd_cq_data_out)); 2668 2669 if (status != IBT_SUCCESS) { 2670 DERR("evd_create: ibt_ci_data_out error(%d)", status); 2671 *rvalp = (int)status; 2672 retval = 0; 2673 goto cleanup; 2674 } 2675 2676 args.evd_cq_real_size = evd_rp->evd_cq_real_size; 2677 2678 ibt_set_cq_handler(evd_rp->evd_cq_hdl, daplka_cq_handler, 2679 (void *)evd_rp); 2680 } 2681 2682 retval = daplka_hash_insert(&ia_rp->ia_evd_htbl, 2683 &evd_hkey, (void *)evd_rp); 2684 if (retval != 0) { 2685 DERR("evd_ceate: cannot insert evd %d\n", retval); 2686 goto cleanup; 2687 } 2688 inserted = B_TRUE; 2689 2690 /* 2691 * If this evd handles async events need to add to the IA resource 2692 * async evd list 2693 */ 2694 if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) { 2695 async_evd = kmem_zalloc(sizeof (daplka_async_evd_hkey_t), 2696 daplka_km_flags); 2697 /* add the evd to the head of the list */ 2698 mutex_enter(&ia_rp->ia_lock); 2699 async_evd->aeh_evd_hkey = evd_hkey; 2700 async_evd->aeh_next = ia_rp->ia_async_evd_hkeys; 2701 ia_rp->ia_async_evd_hkeys = async_evd; 2702 mutex_exit(&ia_rp->ia_lock); 2703 } 2704 2705 args.evd_hkey = evd_hkey; 2706 retval = copyout(&args, (void *)arg, sizeof (dapl_evd_create_t)); 2707 if (retval != 0) { 2708 DERR("evd_create: copyout error %d\n", retval); 2709 retval = EFAULT; 2710 goto cleanup; 2711 } 2712 return (0); 2713 2714 cleanup:; 2715 if (inserted) { 2716 daplka_evd_resource_t *free_rp = NULL; 2717 2718 (void) daplka_hash_remove(&ia_rp->ia_evd_htbl, evd_hkey, 2719 (void **)&free_rp); 2720 if (free_rp != evd_rp) { 2721 DERR("evd_create: cannot remove evd\n"); 2722 /* 2723 * we can only get here if another thread 2724 * has completed the cleanup in evd_free 2725 */ 2726 return (retval); 2727 } 2728 } 2729 DAPLKA_RS_UNREF(evd_rp); 2730 return (retval); 2731 } 2732 2733 /* 2734 * resizes CQ and returns new mapping info to library. 2735 */ 2736 /* ARGSUSED */ 2737 static int 2738 daplka_cq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 2739 cred_t *cred, int *rvalp) 2740 { 2741 daplka_evd_resource_t *evd_rp = NULL; 2742 ibt_hca_attr_t *hca_attrp; 2743 dapl_cq_resize_t args; 2744 ibt_status_t status; 2745 int retval = 0; 2746 2747 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cq_resize_t), 2748 mode); 2749 if (retval != 0) { 2750 DERR("cq_resize: copyin error %d\n", retval); 2751 return (EFAULT); 2752 } 2753 2754 /* get evd resource */ 2755 evd_rp = (daplka_evd_resource_t *) 2756 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.cqr_evd_hkey); 2757 if (evd_rp == NULL) { 2758 DERR("cq_resize: cannot find evd resource\n"); 2759 return (EINVAL); 2760 } 2761 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD); 2762 2763 hca_attrp = &ia_rp->ia_hca->hca_attr; 2764 if (args.cqr_cq_new_size > hca_attrp->hca_max_cq_sz) { 2765 DERR("cq_resize: invalid cq size %d", args.cqr_cq_new_size); 2766 retval = EINVAL; 2767 goto cleanup; 2768 } 2769 /* 2770 * If ibt_resize_cq fails that it is primarily due to resource 2771 * shortage. Per IB spec resize will never loose events and 2772 * a resize error leaves the CQ intact. Therefore even if the 2773 * resize request fails we proceed and get the mapping data 2774 * from the CQ so that the library can mmap it. 2775 */ 2776 status = ibt_resize_cq(evd_rp->evd_cq_hdl, args.cqr_cq_new_size, 2777 &args.cqr_cq_real_size); 2778 if (status != IBT_SUCCESS) { 2779 /* we return the size of the old CQ if resize fails */ 2780 args.cqr_cq_real_size = evd_rp->evd_cq_real_size; 2781 ASSERT(status != IBT_CQ_HDL_INVALID); 2782 DERR("cq_resize: ibt_resize_cq failed:%d\n", status); 2783 } else { 2784 mutex_enter(&evd_rp->evd_lock); 2785 evd_rp->evd_cq_real_size = args.cqr_cq_real_size; 2786 mutex_exit(&evd_rp->evd_lock); 2787 } 2788 2789 D2("cq_resize(%d): done new_sz(%u) real_sz(%u)\n", 2790 DAPLKA_RS_RNUM(evd_rp), 2791 args.cqr_cq_new_size, args.cqr_cq_real_size); 2792 2793 /* Get HCA-specific data_out info */ 2794 status = ibt_ci_data_out(evd_rp->evd_hca_hdl, 2795 IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl, 2796 &args.cqr_cq_data_out, sizeof (args.cqr_cq_data_out)); 2797 if (status != IBT_SUCCESS) { 2798 DERR("cq_resize: ibt_ci_data_out error(%d)\n", status); 2799 /* return ibt_ci_data_out status */ 2800 *rvalp = (int)status; 2801 retval = 0; 2802 goto cleanup; 2803 } 2804 2805 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cq_resize_t), 2806 mode); 2807 if (retval != 0) { 2808 DERR("cq_resize: copyout error %d\n", retval); 2809 retval = EFAULT; 2810 goto cleanup; 2811 } 2812 2813 cleanup:; 2814 if (evd_rp != NULL) { 2815 DAPLKA_RS_UNREF(evd_rp); 2816 } 2817 return (retval); 2818 } 2819 2820 /* 2821 * Routine to copyin the event poll message so that 32 bit libraries 2822 * can be safely supported 2823 */ 2824 int 2825 daplka_event_poll_copyin(intptr_t inarg, dapl_event_poll_t *outarg, int mode) 2826 { 2827 int retval; 2828 2829 #ifdef _MULTI_DATAMODEL 2830 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 2831 dapl_event_poll32_t args32; 2832 2833 retval = ddi_copyin((void *)inarg, &args32, 2834 sizeof (dapl_event_poll32_t), mode); 2835 if (retval != 0) { 2836 DERR("event_poll_copyin: 32bit error %d\n", retval); 2837 return (EFAULT); 2838 } 2839 2840 outarg->evp_evd_hkey = args32.evp_evd_hkey; 2841 outarg->evp_threshold = args32.evp_threshold; 2842 outarg->evp_timeout = args32.evp_timeout; 2843 outarg->evp_ep = (dapl_ib_event_t *)(uintptr_t)args32.evp_ep; 2844 outarg->evp_num_ev = args32.evp_num_ev; 2845 outarg->evp_num_polled = args32.evp_num_polled; 2846 return (0); 2847 } 2848 #endif 2849 retval = ddi_copyin((void *)inarg, outarg, sizeof (dapl_event_poll_t), 2850 mode); 2851 if (retval != 0) { 2852 DERR("event_poll: copyin error %d\n", retval); 2853 return (EFAULT); 2854 } 2855 2856 return (0); 2857 } 2858 2859 /* 2860 * Routine to copyout the event poll message so that 32 bit libraries 2861 * can be safely supported 2862 */ 2863 int 2864 daplka_event_poll_copyout(dapl_event_poll_t *inarg, intptr_t outarg, int mode) 2865 { 2866 int retval; 2867 2868 #ifdef _MULTI_DATAMODEL 2869 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) { 2870 dapl_event_poll32_t args32; 2871 2872 args32.evp_evd_hkey = inarg->evp_evd_hkey; 2873 args32.evp_threshold = inarg->evp_threshold; 2874 args32.evp_timeout = inarg->evp_timeout; 2875 args32.evp_ep = (caddr32_t)(uintptr_t)inarg->evp_ep; 2876 args32.evp_num_ev = inarg->evp_num_ev; 2877 args32.evp_num_polled = inarg->evp_num_polled; 2878 2879 retval = ddi_copyout((void *)&args32, (void *)outarg, 2880 sizeof (dapl_event_poll32_t), mode); 2881 if (retval != 0) { 2882 DERR("event_poll_copyout: 32bit error %d\n", retval); 2883 return (EFAULT); 2884 } 2885 return (0); 2886 } 2887 #endif 2888 retval = ddi_copyout((void *)inarg, (void *)outarg, 2889 sizeof (dapl_event_poll_t), mode); 2890 if (retval != 0) { 2891 DERR("event_poll_copyout: error %d\n", retval); 2892 return (EFAULT); 2893 } 2894 2895 return (0); 2896 } 2897 2898 /* 2899 * fucntion to handle CM REQ RCV private data from Solaris or third parties 2900 */ 2901 /* ARGSUSED */ 2902 static void 2903 daplka_crevent_privdata_post(daplka_ia_resource_t *ia_rp, 2904 dapl_ib_event_t *evd_rp, daplka_evd_event_t *cr_ev) 2905 { 2906 DAPL_PRIVATE *dp; 2907 ib_gid_t *lgid; 2908 ibt_ar_t ar_query_s; 2909 ibt_ar_t ar_result_s; 2910 DAPL_HELLO_MSG *hip; 2911 uint32_t ipaddr_ord; 2912 ibt_priv_data_len_t clen; 2913 ibt_priv_data_len_t olen; 2914 ibt_status_t status; 2915 uint16_t cksum; 2916 2917 /* 2918 * get private data and len 2919 */ 2920 dp = (DAPL_PRIVATE *)cr_ev->ee_cmev.ec_cm_ev_priv_data; 2921 clen = cr_ev->ee_cmev.ec_cm_ev_priv_data_len; 2922 #if defined(DAPLKA_DEBUG_FORCE_ATS) 2923 /* skip the DAPL_PRIVATE chekcsum check */ 2924 #else 2925 /* for remote connects */ 2926 /* look up hello message in the CM private data area */ 2927 if (clen >= sizeof (DAPL_PRIVATE) && 2928 (dp->hello_msg.hi_vers == DAPL_HELLO_MSG_VERS)) { 2929 cksum = ntohs(dp->hello_msg.hi_checksum); 2930 dp->hello_msg.hi_checksum = 0; 2931 if (daplka_hellomsg_cksum(dp) == cksum) { 2932 D2("daplka_crevent_privdata_post: Solaris msg\n"); 2933 evd_rp->ibe_ce.ibce_priv_data_size = clen; 2934 dp->hello_msg.hi_checksum = DAPL_CHECKSUM; 2935 dp->hello_msg.hi_port = ntohs(dp->hello_msg.hi_port); 2936 bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen); 2937 kmem_free(dp, clen); 2938 return; 2939 } 2940 } 2941 #endif /* DAPLKA_DEBUG_FORCE_ATS */ 2942 2943 D2("daplka_crevent_privdata_post: 3rd party msg\n"); 2944 /* transpose CM private data into hello message */ 2945 if (clen) { 2946 olen = clen; 2947 if (clen > DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE) { 2948 clen = DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE; 2949 } 2950 bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen); 2951 kmem_free(dp, olen); 2952 } else { 2953 bzero(evd_rp->ibe_ce.ibce_priv_data_ptr, 2954 DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE); 2955 } 2956 evd_rp->ibe_ce.ibce_priv_data_size = sizeof (DAPL_PRIVATE); 2957 dp = (DAPL_PRIVATE *)evd_rp->ibe_ce.ibce_priv_data_ptr; 2958 /* 2959 * fill in hello message 2960 */ 2961 hip = &dp->hello_msg; 2962 hip->hi_checksum = DAPL_CHECKSUM; 2963 hip->hi_clen = clen; 2964 hip->hi_mid = 0; 2965 hip->hi_vers = DAPL_HELLO_MSG_VERS; 2966 hip->hi_port = 0; 2967 2968 /* assign sgid and dgid */ 2969 lgid = &ia_rp->ia_hca_sgid; 2970 ar_query_s.ar_gid.gid_prefix = 2971 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix; 2972 ar_query_s.ar_gid.gid_guid = 2973 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid; 2974 ar_query_s.ar_pkey = ia_rp->ia_port_pkey; 2975 bzero(ar_query_s.ar_data, DAPL_ATS_NBYTES); 2976 2977 /* reverse ip address lookup through ATS */ 2978 status = ibt_query_ar(lgid, &ar_query_s, &ar_result_s); 2979 if (status == IBT_SUCCESS) { 2980 bcopy(ar_result_s.ar_data, hip->hi_saaddr, DAPL_ATS_NBYTES); 2981 /* determine the address families */ 2982 ipaddr_ord = hip->hi_v4pad[0] | hip->hi_v4pad[1] | 2983 hip->hi_v4pad[2]; 2984 if (ipaddr_ord == 0) { 2985 hip->hi_ipv = AF_INET; 2986 } else { 2987 hip->hi_ipv = AF_INET6; 2988 } 2989 2990 #define UL(b) ar_result_s.ar_data[(b)] 2991 D3("daplka_privdata_post: family=%d :SA[8] %d.%d.%d.%d\n", 2992 hip->hi_ipv, UL(8), UL(9), UL(10), UL(11)); 2993 D3("daplka_privdata_post: SA[12] %d.%d.%d.%d\n", 2994 UL(12), UL(13), UL(14), UL(15)); 2995 } else { 2996 /* non-conformed third parties */ 2997 hip->hi_ipv = AF_UNSPEC; 2998 bzero(hip->hi_saaddr, DAPL_ATS_NBYTES); 2999 } 3000 } 3001 3002 /* 3003 * this function is called by evd_wait and evd_dequeue to wait for 3004 * connection events and CQ notifications. typically this function 3005 * is called when the userland CQ is empty and the client has 3006 * specified a non-zero timeout to evd_wait. if the client is 3007 * interested in CQ events, the CQ must be armed in userland prior 3008 * to calling this function. 3009 */ 3010 /* ARGSUSED */ 3011 static int 3012 daplka_event_poll(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3013 cred_t *cred, int *rvalp) 3014 { 3015 daplka_evd_resource_t *evd_rp = NULL; 3016 dapl_event_poll_t args; 3017 daplka_evd_event_t *head; 3018 dapl_ib_event_t evp_arr[NUM_EVENTS_PER_POLL]; 3019 dapl_ib_event_t *evp; 3020 dapl_ib_event_t *evp_start; 3021 size_t evp_size; 3022 int threshold; 3023 clock_t timeout; 3024 uint32_t max_events; 3025 uint32_t num_events = 0; 3026 void *pd; 3027 ibt_priv_data_len_t n; 3028 int retval = 0; 3029 int rc; 3030 3031 retval = daplka_event_poll_copyin(arg, &args, mode); 3032 if (retval != 0) { 3033 return (EFAULT); 3034 } 3035 3036 if ((args.evp_num_ev > 0) && (args.evp_ep == NULL)) { 3037 DERR("event_poll: evp_ep cannot be NULL if num_wc=%d", 3038 args.evp_num_ev); 3039 return (EINVAL); 3040 } 3041 /* 3042 * Note: dequeue requests have a threshold = 0, timeout = 0 3043 */ 3044 threshold = args.evp_threshold; 3045 3046 max_events = args.evp_num_ev; 3047 /* ensure library is passing sensible values */ 3048 if (max_events < threshold) { 3049 DERR("event_poll: max_events(%d) < threshold(%d)\n", 3050 max_events, threshold); 3051 return (EINVAL); 3052 } 3053 /* Do a sanity check to avoid excessive memory allocation */ 3054 if (max_events > DAPL_EVD_MAX_EVENTS) { 3055 DERR("event_poll: max_events(%d) > %d", 3056 max_events, DAPL_EVD_MAX_EVENTS); 3057 return (EINVAL); 3058 } 3059 D4("event_poll: threshold(%d) timeout(0x%llx) max_events(%d)\n", 3060 threshold, (longlong_t)args.evp_timeout, max_events); 3061 3062 /* get evd resource */ 3063 evd_rp = (daplka_evd_resource_t *) 3064 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evp_evd_hkey); 3065 if (evd_rp == NULL) { 3066 DERR("event_poll: cannot find evd resource\n"); 3067 return (EINVAL); 3068 } 3069 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD); 3070 3071 /* 3072 * Use event array on the stack if possible 3073 */ 3074 if (max_events <= NUM_EVENTS_PER_POLL) { 3075 evp_start = evp = &evp_arr[0]; 3076 } else { 3077 evp_size = max_events * sizeof (dapl_ib_event_t); 3078 evp_start = evp = kmem_zalloc(evp_size, daplka_km_flags); 3079 if (evp == NULL) { 3080 DERR("event_poll: kmem_zalloc failed, evp_size %d", 3081 evp_size); 3082 retval = ENOMEM; 3083 goto cleanup; 3084 } 3085 } 3086 3087 /* 3088 * The Event poll algorithm is as follows - 3089 * The library passes a buffer big enough to hold "max_events" 3090 * events. max_events is >= threshold. If at any stage we get 3091 * max_events no. of events we bail. The events are polled in 3092 * the following order - 3093 * 1) Check for CR events in the evd_cr_events list 3094 * 2) Check for Connection events in the evd_connection_events list 3095 * 3096 * If after the above 2 steps we don't have enough(>= threshold) events 3097 * we block for CQ notification and sleep. Upon being woken up we start 3098 * at step 1 again. 3099 */ 3100 3101 /* 3102 * Note: this could be 0 or INFINITE or anyother value in microsec 3103 */ 3104 if (args.evp_timeout > 0) { 3105 if (args.evp_timeout >= LONG_MAX) { 3106 timeout = LONG_MAX; 3107 } else { 3108 clock_t curr_time = ddi_get_lbolt(); 3109 3110 timeout = curr_time + 3111 drv_usectohz((clock_t)args.evp_timeout); 3112 /* 3113 * use the max value if we wrapped around 3114 */ 3115 if (timeout <= curr_time) { 3116 timeout = LONG_MAX; 3117 } 3118 } 3119 } else { 3120 timeout = 0; 3121 } 3122 3123 mutex_enter(&evd_rp->evd_lock); 3124 for (;;) { 3125 /* 3126 * If this evd is waiting for CM events check that now. 3127 */ 3128 if ((evd_rp->evd_flags & DAT_EVD_CR_FLAG) && 3129 (evd_rp->evd_cr_events.eel_num_elements > 0)) { 3130 /* dequeue events from evd_cr_events list */ 3131 while (head = daplka_evd_event_dequeue( 3132 &evd_rp->evd_cr_events)) { 3133 /* 3134 * populate the evp array 3135 */ 3136 evp[num_events].ibe_ev_family = DAPL_CR_EVENTS; 3137 evp[num_events].ibe_ce.ibce_event = 3138 head->ee_cmev.ec_cm_ev_type; 3139 evp[num_events].ibe_ce.ibce_cookie = 3140 (uint64_t)head->ee_cmev.ec_cm_cookie; 3141 evp[num_events].ibe_ce.ibce_psep_cookie = 3142 head->ee_cmev.ec_cm_psep_cookie; 3143 daplka_crevent_privdata_post(ia_rp, 3144 &evp[num_events], head); 3145 kmem_free(head, sizeof (daplka_evd_event_t)); 3146 3147 if (++num_events == max_events) { 3148 mutex_exit(&evd_rp->evd_lock); 3149 goto maxevent_reached; 3150 } 3151 } 3152 } 3153 3154 if ((evd_rp->evd_flags & DAT_EVD_CONNECTION_FLAG) && 3155 (evd_rp->evd_conn_events.eel_num_elements > 0)) { 3156 /* dequeue events from evd_connection_events list */ 3157 while ((head = daplka_evd_event_dequeue 3158 (&evd_rp->evd_conn_events))) { 3159 /* 3160 * populate the evp array - 3161 * 3162 */ 3163 if (head->ee_cmev.ec_cm_is_passive) { 3164 evp[num_events].ibe_ev_family = 3165 DAPL_PASSIVE_CONNECTION_EVENTS; 3166 } else { 3167 evp[num_events].ibe_ev_family = 3168 DAPL_ACTIVE_CONNECTION_EVENTS; 3169 } 3170 evp[num_events].ibe_ce.ibce_event = 3171 head->ee_cmev.ec_cm_ev_type; 3172 evp[num_events].ibe_ce.ibce_cookie = 3173 (uint64_t)head->ee_cmev.ec_cm_cookie; 3174 evp[num_events].ibe_ce.ibce_psep_cookie = 3175 head->ee_cmev.ec_cm_psep_cookie; 3176 3177 if (head->ee_cmev.ec_cm_ev_priv_data_len > 0) { 3178 pd = head->ee_cmev.ec_cm_ev_priv_data; 3179 n = head-> 3180 ee_cmev.ec_cm_ev_priv_data_len; 3181 bcopy(pd, (void *)evp[num_events]. 3182 ibe_ce.ibce_priv_data_ptr, n); 3183 evp[num_events].ibe_ce. 3184 ibce_priv_data_size = n; 3185 kmem_free(pd, n); 3186 } 3187 3188 kmem_free(head, sizeof (daplka_evd_event_t)); 3189 3190 if (++num_events == max_events) { 3191 mutex_exit(&evd_rp->evd_lock); 3192 goto maxevent_reached; 3193 } 3194 } 3195 } 3196 3197 if ((evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) && 3198 (evd_rp->evd_async_events.eel_num_elements > 0)) { 3199 /* dequeue events from evd_async_events list */ 3200 while (head = daplka_evd_event_dequeue( 3201 &evd_rp->evd_async_events)) { 3202 /* 3203 * populate the evp array 3204 */ 3205 evp[num_events].ibe_ev_family = 3206 DAPL_ASYNC_EVENTS; 3207 evp[num_events].ibe_async.ibae_type = 3208 head->ee_aev.ibae_type; 3209 evp[num_events].ibe_async.ibae_hca_guid = 3210 head->ee_aev.ibae_hca_guid; 3211 evp[num_events].ibe_async.ibae_cookie = 3212 head->ee_aev.ibae_cookie; 3213 evp[num_events].ibe_async.ibae_port = 3214 head->ee_aev.ibae_port; 3215 3216 kmem_free(head, sizeof (daplka_evd_event_t)); 3217 3218 if (++num_events == max_events) { 3219 break; 3220 } 3221 } 3222 } 3223 3224 /* 3225 * We have sufficient events for this call so no need to wait 3226 */ 3227 if ((threshold > 0) && (num_events >= threshold)) { 3228 mutex_exit(&evd_rp->evd_lock); 3229 break; 3230 } 3231 3232 evd_rp->evd_waiters++; 3233 /* 3234 * There are no new events and a timeout was specified. 3235 * Note: for CQ events threshold is 0 but timeout is 3236 * not necessarily 0. 3237 */ 3238 while ((evd_rp->evd_newevents == DAPLKA_EVD_NO_EVENTS) && 3239 timeout) { 3240 retval = DAPLKA_EVD_WAIT(&evd_rp->evd_cv, 3241 &evd_rp->evd_lock, timeout); 3242 if (retval == 0) { 3243 retval = EINTR; 3244 break; 3245 } else if (retval == -1) { 3246 retval = ETIME; 3247 break; 3248 } else { 3249 retval = 0; 3250 continue; 3251 } 3252 } 3253 evd_rp->evd_waiters--; 3254 if (evd_rp->evd_newevents != DAPLKA_EVD_NO_EVENTS) { 3255 /* 3256 * If we got woken up by the CQ handler due to events 3257 * in the CQ. Need to go to userland to check for 3258 * CQ events. Or if we were woken up due to S/W events 3259 */ 3260 3261 /* check for userland events only */ 3262 if (!(evd_rp->evd_newevents & 3263 ~DAPLKA_EVD_ULAND_EVENTS)) { 3264 evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS; 3265 mutex_exit(&evd_rp->evd_lock); 3266 break; 3267 } 3268 /* 3269 * Clear newevents since we are going to loopback 3270 * back and check for both CM and CQ events 3271 */ 3272 evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS; 3273 } else { /* error */ 3274 mutex_exit(&evd_rp->evd_lock); 3275 break; 3276 } 3277 } 3278 3279 maxevent_reached: 3280 args.evp_num_polled = num_events; 3281 3282 /* 3283 * At this point retval might have a value that we want to return 3284 * back to the user. So the copyouts shouldn't tamper retval. 3285 */ 3286 if (args.evp_num_polled > 0) { /* copyout the events */ 3287 rc = ddi_copyout(evp, args.evp_ep, args.evp_num_polled * 3288 sizeof (dapl_ib_event_t), mode); 3289 if (rc != 0) { /* XXX: we are losing events here */ 3290 DERR("event_poll: event array copyout error %d", rc); 3291 retval = EFAULT; 3292 goto cleanup; 3293 } 3294 rc = daplka_event_poll_copyout(&args, arg, mode); 3295 if (rc != 0) { /* XXX: we are losing events here */ 3296 DERR("event_poll: copyout error %d\n", rc); 3297 retval = EFAULT; 3298 goto cleanup; 3299 } 3300 } 3301 3302 cleanup:; 3303 if ((max_events > NUM_EVENTS_PER_POLL) && (evp_start != NULL)) { 3304 kmem_free(evp_start, evp_size); 3305 } 3306 3307 if (evd_rp != NULL) { 3308 DAPLKA_RS_UNREF(evd_rp); 3309 } 3310 return (retval); 3311 } 3312 3313 /* ARGSUSED */ 3314 static int 3315 daplka_event_wakeup(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3316 cred_t *cred, int *rvalp) 3317 { 3318 dapl_event_wakeup_t args; 3319 daplka_evd_resource_t *evd_rp; 3320 int retval; 3321 3322 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_event_wakeup_t), 3323 mode); 3324 if (retval != 0) { 3325 DERR("event_wakeup: copyin error %d\n", retval); 3326 return (EFAULT); 3327 } 3328 3329 /* get evd resource */ 3330 evd_rp = (daplka_evd_resource_t *) 3331 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evw_hkey); 3332 if (evd_rp == NULL) { 3333 DERR("event_wakeup: cannot find evd resource\n"); 3334 return (EINVAL); 3335 } 3336 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD); 3337 3338 daplka_evd_wakeup(evd_rp, NULL, NULL); 3339 3340 DAPLKA_RS_UNREF(evd_rp); 3341 3342 return (retval); 3343 } 3344 3345 /* ARGSUSED */ 3346 static int 3347 daplka_evd_modify_cno(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3348 cred_t *cred, int *rvalp) 3349 { 3350 dapl_evd_modify_cno_t args; 3351 daplka_evd_resource_t *evd_rp; 3352 daplka_cno_resource_t *cno_rp; 3353 daplka_cno_resource_t *old_cno_rp; 3354 int retval; 3355 3356 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_modify_cno_t), 3357 mode); 3358 if (retval != 0) { 3359 DERR("evd_modify_cno: copyin error %d\n", retval); 3360 return (EFAULT); 3361 } 3362 3363 /* get evd resource */ 3364 evd_rp = (daplka_evd_resource_t *) 3365 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evmc_hkey); 3366 if (evd_rp == NULL) { 3367 DERR("evd_modify_cno: cannot find evd resource\n"); 3368 retval = EINVAL; 3369 goto cleanup; 3370 } 3371 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD); 3372 3373 if (args.evmc_cno_hkey > 0) { 3374 /* get cno resource corresponding to the new CNO */ 3375 cno_rp = (daplka_cno_resource_t *) 3376 daplka_hash_lookup(&ia_rp->ia_cno_htbl, 3377 args.evmc_cno_hkey); 3378 if (cno_rp == NULL) { 3379 DERR("evd_modify_cno: cannot find CNO resource\n"); 3380 retval = EINVAL; 3381 goto cleanup; 3382 } 3383 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO); 3384 } else { 3385 cno_rp = NULL; 3386 } 3387 3388 mutex_enter(&evd_rp->evd_lock); 3389 old_cno_rp = evd_rp->evd_cno_res; 3390 evd_rp->evd_cno_res = cno_rp; 3391 mutex_exit(&evd_rp->evd_lock); 3392 3393 /* 3394 * drop the refcnt on the old CNO, the refcnt on the new CNO is 3395 * retained since the evd holds a reference to it. 3396 */ 3397 if (old_cno_rp) { 3398 DAPLKA_RS_UNREF(old_cno_rp); 3399 } 3400 3401 cleanup: 3402 if (evd_rp) { 3403 DAPLKA_RS_UNREF(evd_rp); 3404 } 3405 3406 return (retval); 3407 } 3408 3409 /* 3410 * Frees the EVD and associated resources. 3411 * If there are other threads still using this EVD, the destruction 3412 * will defer until the EVD's refcnt drops to zero. 3413 */ 3414 /* ARGSUSED */ 3415 static int 3416 daplka_evd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3417 cred_t *cred, int *rvalp) 3418 { 3419 daplka_evd_resource_t *evd_rp = NULL; 3420 daplka_async_evd_hkey_t *curr; 3421 daplka_async_evd_hkey_t *prev; 3422 dapl_evd_free_t args; 3423 int retval = 0; 3424 3425 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_free_t), mode); 3426 if (retval != 0) { 3427 DERR("evd_free: copyin error %d\n", retval); 3428 return (EFAULT); 3429 } 3430 retval = daplka_hash_remove(&ia_rp->ia_evd_htbl, args.evf_hkey, 3431 (void **)&evd_rp); 3432 if (retval != 0 || evd_rp == NULL) { 3433 DERR("evd_free: cannot find evd resource\n"); 3434 return (EINVAL); 3435 } 3436 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD); 3437 3438 /* If this is an async evd remove it from the IA's async evd list */ 3439 if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) { 3440 mutex_enter(&ia_rp->ia_lock); 3441 curr = prev = ia_rp->ia_async_evd_hkeys; 3442 while (curr != NULL) { 3443 if (curr->aeh_evd_hkey == args.evf_hkey) { 3444 /* unlink curr from the list */ 3445 if (curr == prev) { 3446 /* 3447 * if first element in the list update 3448 * the list head 3449 */ 3450 ia_rp->ia_async_evd_hkeys = 3451 curr->aeh_next; 3452 } else { 3453 prev->aeh_next = curr->aeh_next; 3454 } 3455 break; 3456 } 3457 prev = curr; 3458 curr = curr->aeh_next; 3459 } 3460 mutex_exit(&ia_rp->ia_lock); 3461 /* free the curr entry */ 3462 kmem_free(curr, sizeof (daplka_async_evd_hkey_t)); 3463 } 3464 3465 /* UNREF calls the actual free function when refcnt is zero */ 3466 DAPLKA_RS_UNREF(evd_rp); 3467 return (0); 3468 } 3469 3470 /* 3471 * destroys EVD resource. 3472 * called when refcnt drops to zero. 3473 */ 3474 static int 3475 daplka_evd_destroy(daplka_resource_t *gen_rp) 3476 { 3477 daplka_evd_resource_t *evd_rp = (daplka_evd_resource_t *)gen_rp; 3478 ibt_status_t status; 3479 daplka_evd_event_t *evt; 3480 ibt_priv_data_len_t len; 3481 3482 D3("evd_destroy: entering, evd_rp 0x%p, rnum %d\n", 3483 evd_rp, DAPLKA_RS_RNUM(evd_rp)); 3484 /* 3485 * free CQ 3486 */ 3487 if (evd_rp->evd_cq_hdl) { 3488 ibt_set_cq_handler(evd_rp->evd_cq_hdl, NULL, NULL); 3489 mutex_enter(&daplka_dev->daplka_mutex); 3490 ibt_set_cq_private(evd_rp->evd_cq_hdl, NULL); 3491 mutex_exit(&daplka_dev->daplka_mutex); 3492 3493 status = daplka_ibt_free_cq(evd_rp, evd_rp->evd_cq_hdl); 3494 if (status != IBT_SUCCESS) { 3495 DERR("evd_destroy: ibt_free_cq returned %d\n", status); 3496 } 3497 evd_rp->evd_cq_hdl = NULL; 3498 D2("evd_destroy: cq freed, rnum %d\n", DAPLKA_RS_RNUM(evd_rp)); 3499 } 3500 3501 /* 3502 * release reference on CNO 3503 */ 3504 if (evd_rp->evd_cno_res != NULL) { 3505 mutex_enter(&evd_rp->evd_cno_res->cno_lock); 3506 if (evd_rp->evd_cno_res->cno_evd_cookie == 3507 evd_rp->evd_cookie) { 3508 evd_rp->evd_cno_res->cno_evd_cookie = 0; 3509 } 3510 mutex_exit(&evd_rp->evd_cno_res->cno_lock); 3511 DAPLKA_RS_UNREF(evd_rp->evd_cno_res); 3512 evd_rp->evd_cno_res = NULL; 3513 } 3514 3515 /* 3516 * discard all remaining events 3517 */ 3518 mutex_enter(&evd_rp->evd_lock); 3519 while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_cr_events))) { 3520 D2("evd_destroy: discarding CR event: %d\n", 3521 evt->ee_cmev.ec_cm_ev_type); 3522 len = evt->ee_cmev.ec_cm_ev_priv_data_len; 3523 if (len > 0) { 3524 kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len); 3525 evt->ee_cmev.ec_cm_ev_priv_data = NULL; 3526 evt->ee_cmev.ec_cm_ev_priv_data_len = 0; 3527 } 3528 kmem_free(evt, sizeof (*evt)); 3529 } 3530 ASSERT(evd_rp->evd_cr_events.eel_num_elements == 0); 3531 3532 while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_conn_events))) { 3533 D2("evd_destroy: discarding CONN event: %d\n", 3534 evt->ee_cmev.ec_cm_ev_type); 3535 len = evt->ee_cmev.ec_cm_ev_priv_data_len; 3536 if (len > 0) { 3537 kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len); 3538 evt->ee_cmev.ec_cm_ev_priv_data = NULL; 3539 evt->ee_cmev.ec_cm_ev_priv_data_len = 0; 3540 } 3541 kmem_free(evt, sizeof (*evt)); 3542 } 3543 ASSERT(evd_rp->evd_conn_events.eel_num_elements == 0); 3544 3545 while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_async_events))) { 3546 DERR("evd_destroy: discarding ASYNC event: %d\n", 3547 evt->ee_aev.ibae_type); 3548 kmem_free(evt, sizeof (*evt)); 3549 } 3550 ASSERT(evd_rp->evd_async_events.eel_num_elements == 0); 3551 mutex_exit(&evd_rp->evd_lock); 3552 3553 mutex_destroy(&evd_rp->evd_lock); 3554 DAPLKA_RS_FINI(evd_rp); 3555 kmem_free(evd_rp, sizeof (daplka_evd_resource_t)); 3556 D3("evd_destroy: exiting, evd_rp 0x%p\n", evd_rp); 3557 return (0); 3558 } 3559 3560 static void 3561 daplka_hash_evd_free(void *obj) 3562 { 3563 daplka_evd_resource_t *evd_rp = (daplka_evd_resource_t *)obj; 3564 3565 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD); 3566 DAPLKA_RS_UNREF(evd_rp); 3567 } 3568 3569 /* 3570 * this handler fires when new completions arrive. 3571 */ 3572 /* ARGSUSED */ 3573 static void 3574 daplka_cq_handler(ibt_cq_hdl_t ibt_cq, void *arg) 3575 { 3576 D3("cq_handler: fired setting evd_newevents\n"); 3577 daplka_evd_wakeup((daplka_evd_resource_t *)arg, NULL, NULL); 3578 } 3579 3580 /* 3581 * this routine wakes up a client from evd_wait. if evtq and evt 3582 * are non-null, the event evt will be enqueued prior to waking 3583 * up the client. if the evd is associated with a CNO and if there 3584 * are no waiters on the evd, the CNO will be notified. 3585 */ 3586 static void 3587 daplka_evd_wakeup(daplka_evd_resource_t *evd_rp, daplka_evd_event_list_t *evtq, 3588 daplka_evd_event_t *evt) 3589 { 3590 uint32_t waiters = 0; 3591 3592 mutex_enter(&evd_rp->evd_lock); 3593 if (evtq != NULL && evt != NULL) { 3594 ASSERT(evtq == &evd_rp->evd_cr_events || 3595 evtq == &evd_rp->evd_conn_events || 3596 evtq == &evd_rp->evd_async_events); 3597 daplka_evd_event_enqueue(evtq, evt); 3598 ASSERT((evtq->eel_event_type == DAPLKA_EVD_CM_EVENTS) || 3599 (evtq->eel_event_type == DAPLKA_EVD_ASYNC_EVENTS)); 3600 evd_rp->evd_newevents |= evtq->eel_event_type; 3601 } else { 3602 evd_rp->evd_newevents |= DAPLKA_EVD_ULAND_EVENTS; 3603 } 3604 waiters = evd_rp->evd_waiters; 3605 cv_broadcast(&evd_rp->evd_cv); 3606 mutex_exit(&evd_rp->evd_lock); 3607 3608 /* 3609 * only wakeup the CNO if there are no waiters on this evd. 3610 */ 3611 if (evd_rp->evd_cno_res != NULL && waiters == 0) { 3612 mutex_enter(&evd_rp->evd_cno_res->cno_lock); 3613 evd_rp->evd_cno_res->cno_evd_cookie = evd_rp->evd_cookie; 3614 cv_broadcast(&evd_rp->evd_cno_res->cno_cv); 3615 mutex_exit(&evd_rp->evd_cno_res->cno_lock); 3616 } 3617 } 3618 3619 /* 3620 * daplka_evd_event_enqueue adds elem to the end of the event list 3621 * The caller is expected to acquire appropriate locks before 3622 * calling enqueue 3623 */ 3624 static void 3625 daplka_evd_event_enqueue(daplka_evd_event_list_t *evlist, 3626 daplka_evd_event_t *elem) 3627 { 3628 if (evlist->eel_tail) { 3629 evlist->eel_tail->ee_next = elem; 3630 evlist->eel_tail = elem; 3631 } else { 3632 /* list is empty */ 3633 ASSERT(evlist->eel_head == NULL); 3634 evlist->eel_head = elem; 3635 evlist->eel_tail = elem; 3636 } 3637 evlist->eel_num_elements++; 3638 } 3639 3640 /* 3641 * daplka_evd_event_dequeue removes and returns the first element of event 3642 * list. NULL is returned if the list is empty. The caller is expected to 3643 * acquire appropriate locks before calling enqueue. 3644 */ 3645 static daplka_evd_event_t * 3646 daplka_evd_event_dequeue(daplka_evd_event_list_t *evlist) 3647 { 3648 daplka_evd_event_t *head; 3649 3650 head = evlist->eel_head; 3651 if (head == NULL) { 3652 return (NULL); 3653 } 3654 3655 evlist->eel_head = head->ee_next; 3656 evlist->eel_num_elements--; 3657 /* if it was the last element update the tail pointer too */ 3658 if (evlist->eel_head == NULL) { 3659 ASSERT(evlist->eel_num_elements == 0); 3660 evlist->eel_tail = NULL; 3661 } 3662 return (head); 3663 } 3664 3665 /* 3666 * A CNO allows the client to wait for notifications from multiple EVDs. 3667 * To use a CNO, the client needs to follow the procedure below: 3668 * 1. allocate a CNO. this returns a cno_hkey that identifies the CNO. 3669 * 2. create one or more EVDs using the returned cno_hkey. 3670 * 3. call cno_wait. when one of the associated EVDs get notified, the 3671 * CNO will also get notified. cno_wait will then return with a 3672 * evd_cookie identifying the EVD that triggered the event. 3673 * 3674 * A note about cno_wait: 3675 * -unlike a EVD, a CNO does not maintain a queue of notifications. For 3676 * example, suppose multiple EVDs triggered a CNO before the client calls 3677 * cno_wait; when the client calls cno_wait, it will return with the 3678 * evd_cookie that identifies the *last* EVD that triggered the CNO. It 3679 * is the responsibility of the client, upon returning from cno_wait, to 3680 * check on all EVDs that can potentially trigger the CNO. the returned 3681 * evd_cookie is only meant to be a hint. there is no guarantee that the 3682 * EVD identified by the evd_cookie still contains an event or still 3683 * exists by the time cno_wait returns. 3684 */ 3685 3686 /* 3687 * allocates a CNO. 3688 * the returned cno_hkey may subsequently be used in evd_create. 3689 */ 3690 /* ARGSUSED */ 3691 static int 3692 daplka_cno_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3693 cred_t *cred, int *rvalp) 3694 { 3695 dapl_cno_alloc_t args; 3696 daplka_cno_resource_t *cno_rp = NULL; 3697 uint64_t cno_hkey = 0; 3698 boolean_t inserted = B_FALSE; 3699 int retval = 0; 3700 3701 cno_rp = kmem_zalloc(sizeof (*cno_rp), daplka_km_flags); 3702 if (cno_rp == NULL) { 3703 DERR("cno_alloc: cannot allocate cno resource\n"); 3704 return (ENOMEM); 3705 } 3706 DAPLKA_RS_INIT(cno_rp, DAPL_TYPE_CNO, 3707 DAPLKA_RS_RNUM(ia_rp), daplka_cno_destroy); 3708 3709 mutex_init(&cno_rp->cno_lock, NULL, MUTEX_DRIVER, NULL); 3710 cv_init(&cno_rp->cno_cv, NULL, CV_DRIVER, NULL); 3711 cno_rp->cno_evd_cookie = 0; 3712 3713 /* insert into cno hash table */ 3714 retval = daplka_hash_insert(&ia_rp->ia_cno_htbl, 3715 &cno_hkey, (void *)cno_rp); 3716 if (retval != 0) { 3717 DERR("cno_alloc: cannot insert cno resource\n"); 3718 goto cleanup; 3719 } 3720 inserted = B_TRUE; 3721 3722 /* return hkey to library */ 3723 args.cno_hkey = cno_hkey; 3724 3725 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cno_alloc_t), 3726 mode); 3727 if (retval != 0) { 3728 DERR("cno_alloc: copyout error %d\n", retval); 3729 retval = EFAULT; 3730 goto cleanup; 3731 } 3732 return (0); 3733 3734 cleanup:; 3735 if (inserted) { 3736 daplka_cno_resource_t *free_rp = NULL; 3737 3738 (void) daplka_hash_remove(&ia_rp->ia_cno_htbl, cno_hkey, 3739 (void **)&free_rp); 3740 if (free_rp != cno_rp) { 3741 DERR("cno_alloc: cannot remove cno\n"); 3742 /* 3743 * we can only get here if another thread 3744 * has completed the cleanup in cno_free 3745 */ 3746 return (retval); 3747 } 3748 } 3749 DAPLKA_RS_UNREF(cno_rp); 3750 return (retval); 3751 } 3752 3753 /* 3754 * destroys a CNO. 3755 * this gets called when a CNO resource's refcnt drops to zero. 3756 */ 3757 static int 3758 daplka_cno_destroy(daplka_resource_t *gen_rp) 3759 { 3760 daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)gen_rp; 3761 3762 ASSERT(DAPLKA_RS_REFCNT(cno_rp) == 0); 3763 D2("cno_destroy: entering, cno_rp %p, rnum %d\n", 3764 cno_rp, DAPLKA_RS_RNUM(cno_rp)); 3765 3766 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO); 3767 cv_destroy(&cno_rp->cno_cv); 3768 mutex_destroy(&cno_rp->cno_lock); 3769 3770 DAPLKA_RS_FINI(cno_rp); 3771 kmem_free(cno_rp, sizeof (daplka_cno_resource_t)); 3772 D2("cno_destroy: exiting, cno_rp %p\n", cno_rp); 3773 return (0); 3774 } 3775 3776 static void 3777 daplka_hash_cno_free(void *obj) 3778 { 3779 daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)obj; 3780 3781 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO); 3782 DAPLKA_RS_UNREF(cno_rp); 3783 } 3784 3785 /* 3786 * removes the CNO from the cno hash table and frees the CNO 3787 * if there are no references to it. if there are references to 3788 * it, the CNO will be destroyed when the last of the references 3789 * is released. once the CNO is removed from the cno hash table, 3790 * the client will no longer be able to call cno_wait on the CNO. 3791 */ 3792 /* ARGSUSED */ 3793 static int 3794 daplka_cno_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3795 cred_t *cred, int *rvalp) 3796 { 3797 daplka_cno_resource_t *cno_rp = NULL; 3798 dapl_cno_free_t args; 3799 int retval = 0; 3800 3801 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_free_t), mode); 3802 if (retval != 0) { 3803 DERR("cno_free: copyin error %d\n", retval); 3804 return (EINVAL); 3805 } 3806 3807 retval = daplka_hash_remove(&ia_rp->ia_cno_htbl, 3808 args.cnf_hkey, (void **)&cno_rp); 3809 if (retval != 0 || cno_rp == NULL) { 3810 DERR("cno_free: cannot find cno resource\n"); 3811 return (EINVAL); 3812 } 3813 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO); 3814 3815 /* UNREF calls the actual free function when refcnt is zero */ 3816 DAPLKA_RS_UNREF(cno_rp); 3817 return (0); 3818 } 3819 3820 /* 3821 * wait for a notification from one of the associated EVDs. 3822 */ 3823 /* ARGSUSED */ 3824 static int 3825 daplka_cno_wait(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3826 cred_t *cred, int *rvalp) 3827 { 3828 daplka_cno_resource_t *cno_rp = NULL; 3829 dapl_cno_wait_t args; 3830 int retval = 0; 3831 uint64_t evd_cookie = 0; 3832 clock_t timeout, curr_time; 3833 3834 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_wait_t), mode); 3835 if (retval != 0) { 3836 DERR("cno_wait: copyin error %d\n", retval); 3837 return (EINVAL); 3838 } 3839 /* get cno resource */ 3840 cno_rp = (daplka_cno_resource_t *) 3841 daplka_hash_lookup(&ia_rp->ia_cno_htbl, args.cnw_hkey); 3842 if (cno_rp == NULL) { 3843 DERR("cno_wait: cannot find cno resource\n"); 3844 return (EINVAL); 3845 } 3846 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO); 3847 3848 curr_time = ddi_get_lbolt(); 3849 timeout = curr_time + drv_usectohz(args.cnw_timeout); 3850 3851 /* 3852 * use the max value if we wrapped around 3853 */ 3854 if (args.cnw_timeout > 0 && timeout <= curr_time) { 3855 /* 3856 * clock_t (size long) changes between 32 and 64-bit kernels 3857 */ 3858 timeout = LONG_MAX >> 4; 3859 } 3860 mutex_enter(&cno_rp->cno_lock); 3861 while (cno_rp->cno_evd_cookie == 0) { 3862 int rval = 0; 3863 3864 rval = cv_timedwait_sig(&cno_rp->cno_cv, 3865 &cno_rp->cno_lock, timeout); 3866 if (rval == 0) { 3867 DERR("cno_wait: interrupted\n"); 3868 mutex_exit(&cno_rp->cno_lock); 3869 retval = EINTR; 3870 goto cleanup; 3871 } else if (rval == -1) { 3872 DERR("cno_wait: timed out\n"); 3873 mutex_exit(&cno_rp->cno_lock); 3874 retval = ETIME; 3875 goto cleanup; 3876 } 3877 } 3878 evd_cookie = cno_rp->cno_evd_cookie; 3879 cno_rp->cno_evd_cookie = 0; 3880 mutex_exit(&cno_rp->cno_lock); 3881 3882 ASSERT(evd_cookie != 0); 3883 D2("cno_wait: returning evd_cookie 0x%p\n", 3884 (void *)(uintptr_t)evd_cookie); 3885 args.cnw_evd_cookie = evd_cookie; 3886 retval = ddi_copyout((void *)&args, (void *)arg, 3887 sizeof (dapl_cno_wait_t), mode); 3888 if (retval != 0) { 3889 DERR("cno_wait: copyout error %d\n", retval); 3890 retval = EFAULT; 3891 goto cleanup; 3892 } 3893 3894 cleanup:; 3895 if (cno_rp != NULL) { 3896 DAPLKA_RS_UNREF(cno_rp); 3897 } 3898 return (retval); 3899 } 3900 3901 /* 3902 * this function is called by the client when it decides to 3903 * accept a connection request. a connection request is generated 3904 * when the active side generates REQ MAD to a service point on 3905 * the destination node. this causes the CM service handler 3906 * (daplka_cm_service_req) on the passive side to be callee. This 3907 * handler will then enqueue this connection request to the backlog 3908 * array of the service point. A connection event containing the 3909 * backlog array index and connection request private data is passed 3910 * to the client's service point EVD (sp_evd_res). once the event 3911 * is passed up to the userland, the client may examine the request 3912 * to decide whether to call daplka_cr_accept or dapka_cr_reject. 3913 */ 3914 /* ARGSUSED */ 3915 static int 3916 daplka_cr_accept(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 3917 cred_t *cred, int *rvalp) 3918 { 3919 daplka_ep_resource_t *ep_rp = NULL; 3920 daplka_sp_resource_t *sp_rp = NULL; 3921 dapl_cr_accept_t args; 3922 daplka_sp_conn_pend_t *conn; 3923 ibt_cm_proceed_reply_t proc_reply; 3924 ibt_status_t status; 3925 uint16_t bkl_index; 3926 uint32_t old_state, new_state; 3927 int retval = 0; 3928 void *priv_data = NULL, *sid; 3929 3930 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_accept_t), 3931 mode); 3932 if (retval != 0) { 3933 DERR("cr_accept: copyin error %d\n", retval); 3934 return (EFAULT); 3935 } 3936 if (args.cra_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) { 3937 DERR("cr_accept: private data len (%d) exceeded " 3938 "max size %d\n", args.cra_priv_sz, 3939 DAPL_MAX_PRIVATE_DATA_SIZE); 3940 return (EINVAL); 3941 } 3942 priv_data = (args.cra_priv_sz > 0) ? (void *)args.cra_priv : NULL; 3943 3944 D2("cr_accept: priv(0x%p) priv_len(%u) psep(0x%llx)\n", priv_data, 3945 args.cra_priv_sz, (longlong_t)args.cra_bkl_cookie); 3946 3947 /* get sp resource */ 3948 sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl, 3949 args.cra_sp_hkey); 3950 if (sp_rp == NULL) { 3951 DERR("cr_accept: cannot find sp resource\n"); 3952 return (EINVAL); 3953 } 3954 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP); 3955 3956 /* get ep resource */ 3957 ep_rp = (daplka_ep_resource_t *)daplka_hash_lookup(&ia_rp->ia_ep_htbl, 3958 args.cra_ep_hkey); 3959 if (ep_rp == NULL) { 3960 DERR("cr_accept: cannot find ep resource\n"); 3961 retval = EINVAL; 3962 goto cleanup; 3963 } 3964 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP); 3965 3966 /* 3967 * accept is only allowed if ep_state is CLOSED. 3968 * note that after this point, the ep_state is frozen 3969 * (i.e. TRANSITIONING) until we transition ep_state 3970 * to ACCEPTING or back to CLOSED if we get an error. 3971 */ 3972 new_state = old_state = daplka_ep_get_state(ep_rp); 3973 if (old_state != DAPLKA_EP_STATE_CLOSED) { 3974 DERR("cr_accept: invalid ep state %d\n", old_state); 3975 retval = EINVAL; 3976 goto cleanup; 3977 } 3978 3979 mutex_enter(&sp_rp->sp_lock); 3980 bkl_index = DAPLKA_GET_PSEP_INDEX(args.cra_bkl_cookie); 3981 /* 3982 * make sure the backlog index is not bogus. 3983 */ 3984 if (bkl_index >= sp_rp->sp_backlog_size) { 3985 DERR("cr_accept: invalid backlog index 0x%llx %d\n", 3986 (longlong_t)args.cra_bkl_cookie, bkl_index); 3987 mutex_exit(&sp_rp->sp_lock); 3988 retval = EINVAL; 3989 goto cleanup; 3990 } 3991 /* 3992 * make sure the backlog index indeed refers 3993 * to a pending connection. 3994 */ 3995 conn = &sp_rp->sp_backlog[bkl_index]; 3996 if (conn->spcp_state != DAPLKA_SPCP_PENDING) { 3997 DERR("cr_accept: invalid conn state %d\n", 3998 conn->spcp_state); 3999 mutex_exit(&sp_rp->sp_lock); 4000 retval = EINVAL; 4001 goto cleanup; 4002 } 4003 if (conn->spcp_sid == NULL) { 4004 DERR("cr_accept: sid == NULL\n"); 4005 mutex_exit(&sp_rp->sp_lock); 4006 retval = EINVAL; 4007 goto cleanup; 4008 } 4009 if (ep_rp->ep_chan_hdl == NULL) { 4010 /* 4011 * a ep_rp with a NULL chan_hdl is impossible. 4012 */ 4013 DERR("cr_accept: ep_chan_hdl == NULL\n"); 4014 mutex_exit(&sp_rp->sp_lock); 4015 ASSERT(B_FALSE); 4016 retval = EINVAL; 4017 goto cleanup; 4018 } 4019 proc_reply.rep.cm_channel = ep_rp->ep_chan_hdl; 4020 proc_reply.rep.cm_rdma_ra_out = conn->spcp_rdma_ra_out; 4021 proc_reply.rep.cm_rdma_ra_in = conn->spcp_rdma_ra_in; 4022 proc_reply.rep.cm_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY; 4023 sid = conn->spcp_sid; 4024 4025 /* 4026 * this clears our slot in the backlog array. 4027 * this slot may now be used by other pending connections. 4028 */ 4029 conn->spcp_sid = NULL; 4030 conn->spcp_state = DAPLKA_SPCP_INIT; 4031 conn->spcp_req_len = 0; 4032 mutex_exit(&sp_rp->sp_lock); 4033 4034 /* 4035 * Set the unique cookie corresponding to the CR to this EP 4036 * so that is can be used in passive side CM callbacks 4037 */ 4038 ep_rp->ep_psep_cookie = args.cra_bkl_cookie; 4039 4040 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, IBT_CM_ACCEPT, 4041 &proc_reply, priv_data, (ibt_priv_data_len_t)args.cra_priv_sz); 4042 4043 if (status != IBT_SUCCESS) { 4044 DERR("cr_accept: ibt_cm_proceed returned %d\n", status); 4045 *rvalp = (int)status; 4046 retval = 0; 4047 } 4048 /* 4049 * note that the CM handler may actually be called at this 4050 * point. but since ep_state is still in TRANSITIONING, the 4051 * handler will wait until we transition to ACCEPTING. this 4052 * prevents the case where we set ep_state to ACCEPTING after 4053 * daplka_service_conn_est sets ep_state to CONNECTED. 4054 */ 4055 new_state = DAPLKA_EP_STATE_ACCEPTING; 4056 4057 cleanup:; 4058 if (sp_rp != NULL) { 4059 DAPLKA_RS_UNREF(sp_rp); 4060 } 4061 if (ep_rp != NULL) { 4062 daplka_ep_set_state(ep_rp, old_state, new_state); 4063 DAPLKA_RS_UNREF(ep_rp); 4064 } 4065 return (retval); 4066 } 4067 4068 /* 4069 * this function is called by the client to reject a 4070 * connection request. 4071 */ 4072 /* ARGSUSED */ 4073 static int 4074 daplka_cr_reject(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 4075 cred_t *cred, int *rvalp) 4076 { 4077 dapl_cr_reject_t args; 4078 daplka_sp_resource_t *sp_rp = NULL; 4079 daplka_sp_conn_pend_t *conn; 4080 ibt_cm_proceed_reply_t proc_reply; 4081 ibt_cm_status_t proc_status; 4082 ibt_status_t status; 4083 uint16_t bkl_index; 4084 int retval = 0; 4085 void *sid; 4086 4087 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_reject_t), 4088 mode); 4089 if (retval != 0) { 4090 DERR("cr_reject: copyin error %d\n", retval); 4091 return (EFAULT); 4092 } 4093 /* get sp resource */ 4094 sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl, 4095 args.crr_sp_hkey); 4096 if (sp_rp == NULL) { 4097 DERR("cr_reject: cannot find sp resource\n"); 4098 return (EINVAL); 4099 } 4100 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP); 4101 4102 D2("cr_reject: psep(0x%llx)\n", (longlong_t)args.crr_bkl_cookie); 4103 4104 mutex_enter(&sp_rp->sp_lock); 4105 bkl_index = DAPLKA_GET_PSEP_INDEX(args.crr_bkl_cookie); 4106 /* 4107 * make sure the backlog index is not bogus. 4108 */ 4109 if (bkl_index >= sp_rp->sp_backlog_size) { 4110 DERR("cr_reject: invalid backlog index 0x%llx %d\n", 4111 (longlong_t)args.crr_bkl_cookie, bkl_index); 4112 mutex_exit(&sp_rp->sp_lock); 4113 retval = EINVAL; 4114 goto cleanup; 4115 } 4116 /* 4117 * make sure the backlog index indeed refers 4118 * to a pending connection. 4119 */ 4120 conn = &sp_rp->sp_backlog[bkl_index]; 4121 if (conn->spcp_state != DAPLKA_SPCP_PENDING) { 4122 DERR("cr_reject: invalid conn state %d\n", 4123 conn->spcp_state); 4124 mutex_exit(&sp_rp->sp_lock); 4125 retval = EINVAL; 4126 goto cleanup; 4127 } 4128 if (conn->spcp_sid == NULL) { 4129 DERR("cr_reject: sid == NULL\n"); 4130 mutex_exit(&sp_rp->sp_lock); 4131 retval = EINVAL; 4132 goto cleanup; 4133 } 4134 bzero(&proc_reply, sizeof (proc_reply)); 4135 sid = conn->spcp_sid; 4136 4137 /* 4138 * this clears our slot in the backlog array. 4139 * this slot may now be used by other pending connections. 4140 */ 4141 conn->spcp_sid = NULL; 4142 conn->spcp_state = DAPLKA_SPCP_INIT; 4143 conn->spcp_req_len = 0; 4144 4145 switch (args.crr_reason) { 4146 case DAPL_IB_CM_REJ_REASON_CONSUMER_REJ: 4147 /* results in IBT_CM_CONSUMER as the reason for reject */ 4148 proc_status = IBT_CM_REJECT; 4149 break; 4150 case DAPL_IB_CME_LOCAL_FAILURE: 4151 /*FALLTHRU*/ 4152 case DAPL_IB_CME_DESTINATION_UNREACHABLE: 4153 /* results in IBT_CM_NO_RESC as the reason for reject */ 4154 proc_status = IBT_CM_NO_RESOURCE; 4155 break; 4156 default: 4157 /* unexpect reason code */ 4158 ASSERT(!"unexpected reject reason code"); 4159 proc_status = IBT_CM_NO_RESOURCE; 4160 break; 4161 } 4162 4163 mutex_exit(&sp_rp->sp_lock); 4164 4165 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, proc_status, 4166 &proc_reply, NULL, 0); 4167 4168 if (status != IBT_SUCCESS) { 4169 DERR("cr_reject: ibt_cm_proceed returned %d\n", status); 4170 *rvalp = (int)status; 4171 retval = 0; 4172 } 4173 4174 cleanup:; 4175 if (sp_rp != NULL) { 4176 DAPLKA_RS_UNREF(sp_rp); 4177 } 4178 return (retval); 4179 } 4180 4181 4182 /* 4183 * daplka_sp_match is used by daplka_hash_walk for finding SPs 4184 */ 4185 typedef struct daplka_sp_match_s { 4186 uint64_t spm_conn_qual; 4187 daplka_sp_resource_t *spm_sp_rp; 4188 } daplka_sp_match_t; 4189 4190 static int 4191 daplka_sp_match(void *objp, void *arg) 4192 { 4193 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)objp; 4194 4195 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP); 4196 if (sp_rp->sp_conn_qual == 4197 ((daplka_sp_match_t *)arg)->spm_conn_qual) { 4198 ((daplka_sp_match_t *)arg)->spm_sp_rp = sp_rp; 4199 D2("daplka_sp_match: found sp, conn_qual %016llu\n", 4200 (longlong_t)((daplka_sp_match_t *)arg)->spm_conn_qual); 4201 DAPLKA_RS_REF(sp_rp); 4202 return (1); 4203 } 4204 return (0); 4205 } 4206 4207 /* 4208 * cr_handoff allows the client to handoff a connection request from 4209 * one service point to another. 4210 */ 4211 /* ARGSUSED */ 4212 static int 4213 daplka_cr_handoff(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 4214 cred_t *cred, int *rvalp) 4215 { 4216 dapl_cr_handoff_t args; 4217 daplka_sp_resource_t *sp_rp = NULL, *new_sp_rp = NULL; 4218 daplka_sp_conn_pend_t *conn; 4219 daplka_sp_match_t sp_match; 4220 ibt_cm_event_t fake_event; 4221 ibt_cm_status_t cm_status; 4222 ibt_status_t status; 4223 uint16_t bkl_index; 4224 void *sid, *priv = NULL; 4225 int retval = 0, priv_len = 0; 4226 4227 D3("cr_handoff: entering\n"); 4228 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_handoff_t), 4229 mode); 4230 if (retval != 0) { 4231 DERR("cr_handoff: copyin error %d\n", retval); 4232 return (EFAULT); 4233 } 4234 /* get sp resource */ 4235 sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl, 4236 args.crh_sp_hkey); 4237 if (sp_rp == NULL) { 4238 DERR("cr_handoff: cannot find sp resource\n"); 4239 return (EINVAL); 4240 } 4241 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP); 4242 4243 /* 4244 * find the destination service point. 4245 */ 4246 sp_match.spm_conn_qual = args.crh_conn_qual; 4247 sp_match.spm_sp_rp = NULL; 4248 daplka_hash_walk(&daplka_global_sp_htbl, daplka_sp_match, 4249 (void *)&sp_match, RW_READER); 4250 4251 /* 4252 * return if we cannot find the service point 4253 */ 4254 if (sp_match.spm_sp_rp == NULL) { 4255 DERR("cr_handoff: new sp not found, conn qual = %llu\n", 4256 (longlong_t)args.crh_conn_qual); 4257 retval = EINVAL; 4258 goto cleanup; 4259 } 4260 new_sp_rp = sp_match.spm_sp_rp; 4261 4262 /* 4263 * the spec does not discuss the security implications of this 4264 * function. to be safe, we currently only allow processes 4265 * owned by the same user to handoff connection requests 4266 * to each other. 4267 */ 4268 if (crgetruid(cred) != new_sp_rp->sp_ruid) { 4269 DERR("cr_handoff: permission denied\n"); 4270 retval = EPERM; 4271 goto cleanup; 4272 } 4273 4274 D2("cr_handoff: psep(0x%llx)\n", (longlong_t)args.crh_bkl_cookie); 4275 4276 mutex_enter(&sp_rp->sp_lock); 4277 bkl_index = DAPLKA_GET_PSEP_INDEX(args.crh_bkl_cookie); 4278 /* 4279 * make sure the backlog index is not bogus. 4280 */ 4281 if (bkl_index >= sp_rp->sp_backlog_size) { 4282 DERR("cr_handoff: invalid backlog index 0x%llx %d\n", 4283 (longlong_t)args.crh_bkl_cookie, bkl_index); 4284 mutex_exit(&sp_rp->sp_lock); 4285 retval = EINVAL; 4286 goto cleanup; 4287 } 4288 /* 4289 * make sure the backlog index indeed refers 4290 * to a pending connection. 4291 */ 4292 conn = &sp_rp->sp_backlog[bkl_index]; 4293 if (conn->spcp_state != DAPLKA_SPCP_PENDING) { 4294 DERR("cr_handoff: invalid conn state %d\n", 4295 conn->spcp_state); 4296 mutex_exit(&sp_rp->sp_lock); 4297 retval = EINVAL; 4298 goto cleanup; 4299 } 4300 if (conn->spcp_sid == NULL) { 4301 DERR("cr_handoff: sid == NULL\n"); 4302 mutex_exit(&sp_rp->sp_lock); 4303 retval = EINVAL; 4304 goto cleanup; 4305 } 4306 sid = conn->spcp_sid; 4307 priv = NULL; 4308 priv_len = conn->spcp_req_len; 4309 if (priv_len > 0) { 4310 priv = kmem_zalloc(priv_len, daplka_km_flags); 4311 if (priv == NULL) { 4312 mutex_exit(&sp_rp->sp_lock); 4313 retval = ENOMEM; 4314 goto cleanup; 4315 } 4316 bcopy(conn->spcp_req_data, priv, priv_len); 4317 } 4318 /* 4319 * this clears our slot in the backlog array. 4320 * this slot may now be used by other pending connections. 4321 */ 4322 conn->spcp_sid = NULL; 4323 conn->spcp_state = DAPLKA_SPCP_INIT; 4324 conn->spcp_req_len = 0; 4325 mutex_exit(&sp_rp->sp_lock); 4326 4327 /* fill fake_event and call service_req handler */ 4328 bzero(&fake_event, sizeof (fake_event)); 4329 fake_event.cm_type = IBT_CM_EVENT_REQ_RCV; 4330 fake_event.cm_session_id = sid; 4331 fake_event.cm_priv_data_len = priv_len; 4332 fake_event.cm_priv_data = priv; 4333 4334 cm_status = daplka_cm_service_req(new_sp_rp, 4335 &fake_event, NULL, priv, (ibt_priv_data_len_t)priv_len); 4336 if (cm_status != IBT_CM_DEFER) { 4337 ibt_cm_proceed_reply_t proc_reply; 4338 4339 DERR("cr_handoff: service_req returned %d\n", cm_status); 4340 /* 4341 * if for some reason cm_service_req failed, we 4342 * reject the connection. 4343 */ 4344 bzero(&proc_reply, sizeof (proc_reply)); 4345 4346 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, 4347 IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0); 4348 if (status != IBT_SUCCESS) { 4349 DERR("cr_handoff: ibt_cm_proceed returned %d\n", 4350 status); 4351 } 4352 *rvalp = (int)status; 4353 retval = 0; 4354 } 4355 4356 cleanup:; 4357 if (priv_len > 0 && priv != NULL) { 4358 kmem_free(priv, priv_len); 4359 } 4360 if (new_sp_rp != NULL) { 4361 DAPLKA_RS_UNREF(new_sp_rp); 4362 } 4363 if (sp_rp != NULL) { 4364 DAPLKA_RS_UNREF(sp_rp); 4365 } 4366 D3("cr_handoff: exiting\n"); 4367 return (retval); 4368 } 4369 4370 /* 4371 * returns a list of hca attributes 4372 */ 4373 /* ARGSUSED */ 4374 static int 4375 daplka_ia_query(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 4376 cred_t *cred, int *rvalp) 4377 { 4378 dapl_ia_query_t args; 4379 int retval; 4380 ibt_hca_attr_t *hcap; 4381 4382 hcap = &ia_rp->ia_hca->hca_attr; 4383 4384 /* 4385 * Take the ibt_hca_attr_t and stuff them into dapl_hca_attr_t 4386 */ 4387 args.hca_attr.dhca_vendor_id = hcap->hca_vendor_id; 4388 args.hca_attr.dhca_device_id = hcap->hca_device_id; 4389 args.hca_attr.dhca_version_id = hcap->hca_version_id; 4390 args.hca_attr.dhca_max_chans = hcap->hca_max_chans; 4391 args.hca_attr.dhca_max_chan_sz = hcap->hca_max_chan_sz; 4392 args.hca_attr.dhca_max_sgl = hcap->hca_max_sgl; 4393 args.hca_attr.dhca_max_cq = hcap->hca_max_cq; 4394 args.hca_attr.dhca_max_cq_sz = hcap->hca_max_cq_sz; 4395 args.hca_attr.dhca_max_memr = hcap->hca_max_memr; 4396 args.hca_attr.dhca_max_memr_len = hcap->hca_max_memr_len; 4397 args.hca_attr.dhca_max_mem_win = hcap->hca_max_mem_win; 4398 args.hca_attr.dhca_max_rdma_in_chan = hcap->hca_max_rdma_in_chan; 4399 args.hca_attr.dhca_max_rdma_out_chan = hcap->hca_max_rdma_out_chan; 4400 args.hca_attr.dhca_max_partitions = hcap->hca_max_partitions; 4401 args.hca_attr.dhca_nports = hcap->hca_nports; 4402 args.hca_attr.dhca_node_guid = hcap->hca_node_guid; 4403 args.hca_attr.dhca_max_pd = hcap->hca_max_pd; 4404 args.hca_attr.dhca_max_srqs = hcap->hca_max_srqs; 4405 args.hca_attr.dhca_max_srqs_sz = hcap->hca_max_srqs_sz; 4406 args.hca_attr.dhca_max_srq_sgl = hcap->hca_max_srq_sgl; 4407 4408 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ia_query_t), 4409 mode); 4410 if (retval != 0) { 4411 DERR("ia_query: copyout error %d\n", retval); 4412 return (EFAULT); 4413 } 4414 return (0); 4415 } 4416 4417 /* 4418 * This routine is passed to hash walk in the daplka_pre_mr_cleanup_callback, 4419 * it frees the mw embedded in the mw resource object. 4420 */ 4421 4422 /* ARGSUSED */ 4423 static int 4424 daplka_mr_cb_freemw(void *objp, void *arg) 4425 { 4426 daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)objp; 4427 ibt_mw_hdl_t mw_hdl; 4428 ibt_status_t status; 4429 4430 D3("mr_cb_freemw: entering, mw_rp 0x%p\n", mw_rp); 4431 DAPLKA_RS_REF(mw_rp); 4432 4433 mutex_enter(&mw_rp->mw_lock); 4434 mw_hdl = mw_rp->mw_hdl; 4435 /* 4436 * we set mw_hdl to NULL so it won't get freed again 4437 */ 4438 mw_rp->mw_hdl = NULL; 4439 mutex_exit(&mw_rp->mw_lock); 4440 4441 if (mw_hdl != NULL) { 4442 status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl, mw_hdl); 4443 if (status != IBT_SUCCESS) { 4444 DERR("mr_cb_freemw: ibt_free_mw returned %d\n", status); 4445 } 4446 D3("mr_cb_freemw: mw freed\n"); 4447 } 4448 4449 DAPLKA_RS_UNREF(mw_rp); 4450 return (0); 4451 } 4452 4453 /* 4454 * This routine is called from HCA driver's umem lock undo callback 4455 * when the memory associated with an MR is being unmapped. In this callback 4456 * we free all the MW associated with the IA and post an unaffiliated 4457 * async event to tell the app that there was a catastrophic event. 4458 * This allows the HCA to deregister the MR in its callback processing. 4459 */ 4460 static void 4461 daplka_pre_mr_cleanup_callback(void *arg1, void *arg2 /*ARGSUSED*/) 4462 { 4463 daplka_mr_resource_t *mr_rp; 4464 daplka_ia_resource_t *ia_rp; 4465 #ifdef _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB 4466 ibt_async_event_t event; 4467 ibt_hca_attr_t *hca_attrp; 4468 #endif 4469 minor_t rnum; 4470 4471 mr_rp = (daplka_mr_resource_t *)arg1; 4472 rnum = DAPLKA_RS_RNUM(mr_rp); 4473 daplka_shared_mr_free(mr_rp); 4474 4475 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum); 4476 if (ia_rp == NULL) { 4477 DERR("daplka_mr_unlock_callback: resource not found, rnum %d\n", 4478 rnum); 4479 return; 4480 } 4481 4482 DERR("daplka_mr_unlock_callback: resource(%p) rnum(%d)\n", ia_rp, rnum); 4483 4484 mutex_enter(&ia_rp->ia_lock); 4485 /* 4486 * MW is being alloced OR MW freeze has already begun. In 4487 * both these cases we wait for that to complete before 4488 * continuing. 4489 */ 4490 while ((ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS) || 4491 (ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS)) { 4492 cv_wait(&ia_rp->ia_cv, &ia_rp->ia_lock); 4493 } 4494 4495 switch (ia_rp->ia_state) { 4496 case DAPLKA_IA_INIT: 4497 ia_rp->ia_state = DAPLKA_IA_MW_FREEZE_IN_PROGRESS; 4498 mutex_exit(&ia_rp->ia_lock); 4499 break; 4500 case DAPLKA_IA_MW_FROZEN: 4501 /* the mw on this ia have been freed */ 4502 D2("daplka_mr_unlock_callback: ia_state %d nothing to do\n", 4503 ia_rp->ia_state); 4504 mutex_exit(&ia_rp->ia_lock); 4505 goto cleanup; 4506 default: 4507 ASSERT(!"daplka_mr_unlock_callback: IA state invalid"); 4508 DERR("daplka_mr_unlock_callback: invalid ia_state %d\n", 4509 ia_rp->ia_state); 4510 mutex_exit(&ia_rp->ia_lock); 4511 goto cleanup; 4512 } 4513 4514 /* 4515 * Walk the mw hash table and free the mws. Acquire a writer 4516 * lock since we don't want anyone else traversing this tree 4517 * while we are freeing the MW. 4518 */ 4519 daplka_hash_walk(&ia_rp->ia_mw_htbl, daplka_mr_cb_freemw, NULL, 4520 RW_WRITER); 4521 4522 mutex_enter(&ia_rp->ia_lock); 4523 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS); 4524 ia_rp->ia_state = DAPLKA_IA_MW_FROZEN; 4525 cv_broadcast(&ia_rp->ia_cv); 4526 mutex_exit(&ia_rp->ia_lock); 4527 4528 /* 4529 * Currently commented out because Oracle skgxp is incapable 4530 * of handling async events correctly. 4531 */ 4532 #ifdef _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB 4533 /* 4534 * Enqueue an unaffiliated async error event to indicate this 4535 * IA has encountered a problem that caused the MW to freed up 4536 */ 4537 4538 /* Create a fake event, only relevant field is the hca_guid */ 4539 bzero(&event, sizeof (ibt_async_event_t)); 4540 hca_attrp = &ia_rp->ia_hca->hca_attr; 4541 event.ev_hca_guid = hca_attrp->hca_node_guid; 4542 4543 daplka_async_event_create(IBT_ERROR_LOCAL_CATASTROPHIC, &event, 0, 4544 ia_rp); 4545 #endif /* _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB */ 4546 4547 cleanup:; 4548 D2("daplka_mr_unlock_callback: resource(%p) done\n", ia_rp); 4549 DAPLKA_RS_UNREF(ia_rp); 4550 } 4551 4552 /* 4553 * registers a memory region. 4554 * memory locking will be done by the HCA driver. 4555 */ 4556 /* ARGSUSED */ 4557 static int 4558 daplka_mr_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 4559 cred_t *cred, int *rvalp) 4560 { 4561 boolean_t inserted = B_FALSE; 4562 daplka_mr_resource_t *mr_rp; 4563 daplka_pd_resource_t *pd_rp; 4564 dapl_mr_register_t args; 4565 ibt_mr_data_in_t mr_cb_data_in; 4566 uint64_t mr_hkey = 0; 4567 ibt_status_t status; 4568 int retval; 4569 4570 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_register_t), 4571 mode); 4572 if (retval != 0) { 4573 DERR("mr_register: copyin error %d\n", retval); 4574 return (EINVAL); 4575 } 4576 mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags); 4577 if (mr_rp == NULL) { 4578 DERR("mr_register: cannot allocate mr resource\n"); 4579 return (ENOMEM); 4580 } 4581 DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR, 4582 DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy); 4583 4584 mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL); 4585 mr_rp->mr_hca = ia_rp->ia_hca; 4586 mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl; 4587 mr_rp->mr_next = NULL; 4588 mr_rp->mr_shared_mr = NULL; 4589 4590 /* get pd handle */ 4591 pd_rp = (daplka_pd_resource_t *) 4592 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mr_pd_hkey); 4593 if (pd_rp == NULL) { 4594 DERR("mr_register: cannot find pd resource\n"); 4595 retval = EINVAL; 4596 goto cleanup; 4597 } 4598 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 4599 mr_rp->mr_pd_res = pd_rp; 4600 4601 mr_rp->mr_attr.mr_vaddr = args.mr_vaddr; 4602 mr_rp->mr_attr.mr_len = args.mr_len; 4603 mr_rp->mr_attr.mr_as = curproc->p_as; 4604 mr_rp->mr_attr.mr_flags = args.mr_flags | IBT_MR_NOSLEEP; 4605 4606 D3("mr_register: mr_vaddr %p, mr_len %llu, mr_flags 0x%x\n", 4607 (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr, 4608 (longlong_t)mr_rp->mr_attr.mr_len, 4609 mr_rp->mr_attr.mr_flags); 4610 4611 status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl, 4612 mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr, &mr_rp->mr_hdl, 4613 &mr_rp->mr_desc); 4614 4615 if (status != IBT_SUCCESS) { 4616 DERR("mr_register: ibt_register_mr error %d\n", status); 4617 *rvalp = (int)status; 4618 retval = 0; 4619 goto cleanup; 4620 } 4621 4622 mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION; 4623 mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback; 4624 mr_cb_data_in.mr_arg1 = (void *)mr_rp; 4625 mr_cb_data_in.mr_arg2 = NULL; 4626 4627 /* Pass the service driver mr cleanup handler to the hca driver */ 4628 status = ibt_ci_data_in(ia_rp->ia_hca_hdl, 4629 IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl, 4630 &mr_cb_data_in, sizeof (mr_cb_data_in)); 4631 4632 if (status != IBT_SUCCESS) { 4633 DERR("mr_register: ibt_ci_data_in error(%d) ver(%d)", 4634 status, mr_cb_data_in.mr_rev); 4635 *rvalp = (int)status; 4636 retval = 0; 4637 goto cleanup; 4638 } 4639 4640 /* insert into mr hash table */ 4641 retval = daplka_hash_insert(&ia_rp->ia_mr_htbl, 4642 &mr_hkey, (void *)mr_rp); 4643 if (retval != 0) { 4644 DERR("mr_register: cannot insert mr resource into mr_htbl\n"); 4645 goto cleanup; 4646 } 4647 inserted = B_TRUE; 4648 4649 args.mr_lkey = mr_rp->mr_desc.md_lkey; 4650 args.mr_rkey = mr_rp->mr_desc.md_rkey; 4651 args.mr_hkey = mr_hkey; 4652 4653 retval = ddi_copyout((void *)&args, (void *)arg, 4654 sizeof (dapl_mr_register_t), mode); 4655 if (retval != 0) { 4656 DERR("mr_register: copyout error %d\n", retval); 4657 retval = EFAULT; 4658 goto cleanup; 4659 } 4660 return (0); 4661 4662 cleanup:; 4663 if (inserted) { 4664 daplka_mr_resource_t *free_rp = NULL; 4665 4666 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey, 4667 (void **)&free_rp); 4668 if (free_rp != mr_rp) { 4669 DERR("mr_register: cannot remove mr from hash table\n"); 4670 /* 4671 * we can only get here if another thread 4672 * has completed the cleanup in mr_deregister 4673 */ 4674 return (retval); 4675 } 4676 } 4677 DAPLKA_RS_UNREF(mr_rp); 4678 return (retval); 4679 } 4680 4681 /* 4682 * registers a shared memory region. 4683 * the client calls this function with the intention to share the memory 4684 * region with other clients. it is assumed that, prior to calling this 4685 * function, the client(s) are already sharing parts of their address 4686 * space using a mechanism such as SYSV shared memory. the first client 4687 * that calls this function will create and insert a daplka_shared_mr_t 4688 * object into the global daplka_shared_mr_tree. this shared mr object 4689 * will be identified by a unique 40-byte key and will maintain a list 4690 * of mr resources. every time this function gets called with the same 4691 * 40-byte key, a new mr resource (containing a new mr handle generated 4692 * by ibt_register_mr or ibt_register_shared_mr) is created and inserted 4693 * into this list. similarly, every time a shared mr gets deregistered 4694 * or invalidated by a callback, the mr resource gets removed from this 4695 * list. the shared mr object has a reference count. when it drops to 4696 * zero, the shared mr object will be removed from the global avl tree 4697 * and be freed. 4698 */ 4699 /* ARGSUSED */ 4700 static int 4701 daplka_mr_register_shared(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 4702 cred_t *cred, int *rvalp) 4703 { 4704 dapl_mr_register_shared_t args; 4705 daplka_shared_mr_t *smrp = NULL; 4706 daplka_shared_mr_t tmp_smr; 4707 ibt_mr_data_in_t mr_cb_data_in; 4708 avl_index_t where; 4709 boolean_t inserted = B_FALSE; 4710 daplka_mr_resource_t *mr_rp = NULL; 4711 daplka_pd_resource_t *pd_rp; 4712 uint64_t mr_hkey = 0; 4713 ibt_status_t status; 4714 int retval; 4715 4716 retval = ddi_copyin((void *)arg, &args, 4717 sizeof (dapl_mr_register_shared_t), mode); 4718 if (retval != 0) { 4719 DERR("mr_register_shared: copyin error %d\n", retval); 4720 return (EINVAL); 4721 } 4722 4723 mutex_enter(&daplka_shared_mr_lock); 4724 /* 4725 * find smrp from the global avl tree. 4726 * the 40-byte key is used as the lookup key. 4727 */ 4728 tmp_smr.smr_cookie = args.mrs_shm_cookie; 4729 smrp = (daplka_shared_mr_t *) 4730 avl_find(&daplka_shared_mr_tree, &tmp_smr, &where); 4731 if (smrp != NULL) { 4732 D2("mr_register_shared: smrp 0x%p, found cookie:\n" 4733 "0x%016llx%016llx%016llx%016llx%016llx\n", smrp, 4734 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4], 4735 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3], 4736 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2], 4737 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1], 4738 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]); 4739 4740 /* 4741 * if the smrp exists, other threads could still be 4742 * accessing it. we wait until they are done before 4743 * we continue. 4744 */ 4745 smrp->smr_refcnt++; 4746 while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) { 4747 D2("mr_register_shared: smrp 0x%p, " 4748 "waiting in transitioning state, refcnt %d\n", 4749 smrp, smrp->smr_refcnt); 4750 cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock); 4751 } 4752 ASSERT(smrp->smr_state == DAPLKA_SMR_READY); 4753 D2("mr_register_shared: smrp 0x%p, refcnt %d, ready\n", 4754 smrp, smrp->smr_refcnt); 4755 4756 /* 4757 * we set smr_state to TRANSITIONING to temporarily 4758 * prevent other threads from trying to access smrp. 4759 */ 4760 smrp->smr_state = DAPLKA_SMR_TRANSITIONING; 4761 } else { 4762 D2("mr_register_shared: cannot find cookie:\n" 4763 "0x%016llx%016llx%016llx%016llx%016llx\n", 4764 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4], 4765 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3], 4766 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2], 4767 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1], 4768 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]); 4769 4770 /* 4771 * if we cannot find smrp, we need to create and 4772 * insert one into daplka_shared_mr_tree 4773 */ 4774 smrp = kmem_zalloc(sizeof (daplka_shared_mr_t), 4775 daplka_km_flags); 4776 if (smrp == NULL) { 4777 retval = ENOMEM; 4778 mutex_exit(&daplka_shared_mr_lock); 4779 goto cleanup; 4780 } 4781 smrp->smr_refcnt = 1; 4782 smrp->smr_cookie = args.mrs_shm_cookie; 4783 smrp->smr_state = DAPLKA_SMR_TRANSITIONING; 4784 smrp->smr_mr_list = NULL; 4785 cv_init(&smrp->smr_cv, NULL, CV_DRIVER, NULL); 4786 avl_insert(&daplka_shared_mr_tree, smrp, where); 4787 } 4788 mutex_exit(&daplka_shared_mr_lock); 4789 4790 mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags); 4791 if (mr_rp == NULL) { 4792 DERR("mr_register_shared: cannot allocate mr resource\n"); 4793 goto cleanup; 4794 } 4795 DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR, 4796 DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy); 4797 4798 mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL); 4799 mr_rp->mr_hca = ia_rp->ia_hca; 4800 mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl; 4801 mr_rp->mr_next = NULL; 4802 mr_rp->mr_shared_mr = NULL; 4803 4804 /* get pd handle */ 4805 pd_rp = (daplka_pd_resource_t *) 4806 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mrs_pd_hkey); 4807 if (pd_rp == NULL) { 4808 DERR("mr_register_shared: cannot find pd resource\n"); 4809 retval = EINVAL; 4810 goto cleanup; 4811 } 4812 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 4813 mr_rp->mr_pd_res = pd_rp; 4814 4815 mr_rp->mr_attr.mr_vaddr = args.mrs_vaddr; 4816 mr_rp->mr_attr.mr_len = args.mrs_len; 4817 mr_rp->mr_attr.mr_flags = args.mrs_flags | IBT_MR_NOSLEEP; 4818 mr_rp->mr_attr.mr_as = curproc->p_as; 4819 4820 D2("mr_register_shared: mr_vaddr 0x%p, mr_len %llu, " 4821 "mr_flags 0x%x, mr_as 0x%p, mr_exists %d, smrp 0x%p\n", 4822 (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr, 4823 (longlong_t)mr_rp->mr_attr.mr_len, 4824 mr_rp->mr_attr.mr_flags, mr_rp->mr_attr.mr_as, 4825 (int)(smrp->smr_mr_list != NULL), smrp); 4826 4827 /* 4828 * since we are in TRANSITIONING state, we are guaranteed 4829 * that we have exclusive access to smr_mr_list. 4830 */ 4831 if (smrp->smr_mr_list != NULL) { 4832 ibt_smr_attr_t mem_sattr; 4833 4834 /* 4835 * a non-null smr_mr_list indicates that someone 4836 * else has already inserted an mr_resource into 4837 * smr_mr_list. we use the mr_handle from the first 4838 * element as an arg to ibt_register_shared_mr. 4839 */ 4840 mem_sattr.mr_vaddr = smrp->smr_mr_list->mr_desc.md_vaddr; 4841 mem_sattr.mr_flags = mr_rp->mr_attr.mr_flags; 4842 4843 D2("mr_register_shared: mem_sattr vaddr 0x%p flags 0x%x\n", 4844 (void *)(uintptr_t)mem_sattr.mr_vaddr, mem_sattr.mr_flags); 4845 status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl, 4846 smrp->smr_mr_list->mr_hdl, mr_rp->mr_pd_res->pd_hdl, 4847 &mem_sattr, &mr_rp->mr_hdl, &mr_rp->mr_desc); 4848 4849 if (status != IBT_SUCCESS) { 4850 DERR("mr_register_shared: " 4851 "ibt_register_shared_mr error %d\n", status); 4852 *rvalp = (int)status; 4853 retval = 0; 4854 goto cleanup; 4855 } 4856 } else { 4857 /* 4858 * an mr does not exist yet. we need to create one 4859 * using ibt_register_mr. 4860 */ 4861 status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl, 4862 mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr, 4863 &mr_rp->mr_hdl, &mr_rp->mr_desc); 4864 4865 if (status != IBT_SUCCESS) { 4866 DERR("mr_register_shared: " 4867 "ibt_register_mr error %d\n", status); 4868 *rvalp = (int)status; 4869 retval = 0; 4870 goto cleanup; 4871 } 4872 } 4873 4874 mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION; 4875 mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback; 4876 mr_cb_data_in.mr_arg1 = (void *)mr_rp; 4877 mr_cb_data_in.mr_arg2 = NULL; 4878 4879 /* Pass the service driver mr cleanup handler to the hca driver */ 4880 status = ibt_ci_data_in(ia_rp->ia_hca_hdl, 4881 IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl, 4882 &mr_cb_data_in, sizeof (mr_cb_data_in)); 4883 4884 if (status != IBT_SUCCESS) { 4885 DERR("mr_register_shared: ibt_ci_data_in error(%d) ver(%d)", 4886 status, mr_cb_data_in.mr_rev); 4887 *rvalp = (int)status; 4888 retval = 0; 4889 goto cleanup; 4890 } 4891 4892 /* 4893 * we bump reference of mr_rp and enqueue it onto smrp. 4894 */ 4895 DAPLKA_RS_REF(mr_rp); 4896 mr_rp->mr_next = smrp->smr_mr_list; 4897 smrp->smr_mr_list = mr_rp; 4898 mr_rp->mr_shared_mr = smrp; 4899 4900 /* insert into mr hash table */ 4901 retval = daplka_hash_insert(&ia_rp->ia_mr_htbl, 4902 &mr_hkey, (void *)mr_rp); 4903 if (retval != 0) { 4904 DERR("mr_register_shared: cannot insert mr resource\n"); 4905 goto cleanup; 4906 } 4907 inserted = B_TRUE; 4908 4909 /* 4910 * at this point, there are two references to our mr resource. 4911 * one is kept in ia_mr_htbl. the other is kept in the list 4912 * within this shared mr object (smrp). when we deregister this 4913 * mr or when a callback invalidates this mr, the reference kept 4914 * by this shared mr object will be removed. 4915 */ 4916 4917 args.mrs_lkey = mr_rp->mr_desc.md_lkey; 4918 args.mrs_rkey = mr_rp->mr_desc.md_rkey; 4919 args.mrs_hkey = mr_hkey; 4920 4921 retval = ddi_copyout((void *)&args, (void *)arg, 4922 sizeof (dapl_mr_register_shared_t), mode); 4923 if (retval != 0) { 4924 DERR("mr_register_shared: copyout error %d\n", retval); 4925 retval = EFAULT; 4926 goto cleanup; 4927 } 4928 4929 /* 4930 * set the state to READY to allow others to continue 4931 */ 4932 mutex_enter(&daplka_shared_mr_lock); 4933 smrp->smr_state = DAPLKA_SMR_READY; 4934 cv_broadcast(&smrp->smr_cv); 4935 mutex_exit(&daplka_shared_mr_lock); 4936 return (0); 4937 4938 cleanup:; 4939 if (inserted) { 4940 daplka_mr_resource_t *free_rp = NULL; 4941 4942 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey, 4943 (void **)&free_rp); 4944 if (free_rp != mr_rp) { 4945 DERR("mr_register_shared: " 4946 "cannot remove mr from hash table\n"); 4947 /* 4948 * we can only get here if another thread 4949 * has completed the cleanup in mr_deregister 4950 */ 4951 return (retval); 4952 } 4953 } 4954 if (smrp != NULL) { 4955 mutex_enter(&daplka_shared_mr_lock); 4956 ASSERT(smrp->smr_refcnt > 0); 4957 smrp->smr_refcnt--; 4958 4959 if (smrp->smr_refcnt == 0) { 4960 DERR("mr_register_shared: freeing smrp 0x%p\n", smrp); 4961 avl_remove(&daplka_shared_mr_tree, smrp); 4962 if (smrp->smr_mr_list != NULL) { 4963 /* 4964 * the refcnt is 0. if there is anything 4965 * left on the list, it must be ours. 4966 */ 4967 ASSERT(smrp->smr_mr_list == mr_rp); 4968 DAPLKA_RS_UNREF(mr_rp); 4969 smrp->smr_mr_list = NULL; 4970 ASSERT(mr_rp->mr_shared_mr == smrp); 4971 mr_rp->mr_shared_mr = NULL; 4972 ASSERT(mr_rp->mr_next == NULL); 4973 } 4974 smrp->smr_state = DAPLKA_SMR_FREED; 4975 cv_destroy(&smrp->smr_cv); 4976 kmem_free(smrp, sizeof (daplka_shared_mr_t)); 4977 } else { 4978 DERR("mr_register_shared: resetting smr_state " 4979 "smrp 0x%p, %d waiters remain\n", smrp, 4980 smrp->smr_refcnt); 4981 ASSERT(smrp->smr_state == DAPLKA_SMR_TRANSITIONING); 4982 if (smrp->smr_mr_list != NULL && mr_rp != NULL) { 4983 daplka_mr_resource_t **mpp; 4984 4985 /* 4986 * search and remove mr_rp from smr_mr_list 4987 */ 4988 mpp = &smrp->smr_mr_list; 4989 while (*mpp != NULL) { 4990 if (*mpp == mr_rp) { 4991 *mpp = (*mpp)->mr_next; 4992 DAPLKA_RS_UNREF(mr_rp); 4993 ASSERT(mr_rp->mr_shared_mr == 4994 smrp); 4995 mr_rp->mr_shared_mr = NULL; 4996 mr_rp->mr_next = NULL; 4997 break; 4998 } 4999 mpp = &(*mpp)->mr_next; 5000 } 5001 } 5002 /* 5003 * note that smr_state == READY does not necessarily 5004 * mean that smr_mr_list is non empty. for this case, 5005 * we are doing cleanup because of a failure. we set 5006 * the state to READY to allow other threads to 5007 * continue. 5008 */ 5009 smrp->smr_state = DAPLKA_SMR_READY; 5010 cv_broadcast(&smrp->smr_cv); 5011 } 5012 mutex_exit(&daplka_shared_mr_lock); 5013 } 5014 if (mr_rp != NULL) { 5015 DAPLKA_RS_UNREF(mr_rp); 5016 } 5017 return (retval); 5018 } 5019 5020 /* 5021 * registers a memory region using the attributes of an 5022 * existing region. 5023 */ 5024 /* ARGSUSED */ 5025 static int 5026 daplka_mr_register_lmr(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5027 cred_t *cred, int *rvalp) 5028 { 5029 boolean_t inserted = B_FALSE; 5030 dapl_mr_register_lmr_t args; 5031 ibt_mr_data_in_t mr_cb_data_in; 5032 daplka_mr_resource_t *orig_mr_rp = NULL; 5033 daplka_mr_resource_t *mr_rp; 5034 ibt_smr_attr_t mem_sattr; 5035 uint64_t mr_hkey = 0; 5036 ibt_status_t status; 5037 int retval; 5038 5039 retval = ddi_copyin((void *)arg, &args, 5040 sizeof (dapl_mr_register_lmr_t), mode); 5041 if (retval != 0) { 5042 DERR("mr_register_lmr: copyin error %d\n", retval); 5043 return (EINVAL); 5044 } 5045 orig_mr_rp = (daplka_mr_resource_t *) 5046 daplka_hash_lookup(&ia_rp->ia_mr_htbl, args.mrl_orig_hkey); 5047 if (orig_mr_rp == NULL) { 5048 DERR("mr_register_lmr: cannot find mr resource\n"); 5049 return (EINVAL); 5050 } 5051 ASSERT(DAPLKA_RS_TYPE(orig_mr_rp) == DAPL_TYPE_MR); 5052 5053 mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags); 5054 if (mr_rp == NULL) { 5055 DERR("mr_register_lmr: cannot allocate mr resource\n"); 5056 retval = ENOMEM; 5057 goto cleanup; 5058 } 5059 DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR, 5060 DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy); 5061 5062 mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL); 5063 mr_rp->mr_hca = ia_rp->ia_hca; 5064 mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl; 5065 mr_rp->mr_next = NULL; 5066 mr_rp->mr_shared_mr = NULL; 5067 5068 DAPLKA_RS_REF(orig_mr_rp->mr_pd_res); 5069 mr_rp->mr_pd_res = orig_mr_rp->mr_pd_res; 5070 mr_rp->mr_attr = orig_mr_rp->mr_attr; 5071 5072 /* Pass the IO addr that was returned while allocating the orig MR */ 5073 mem_sattr.mr_vaddr = orig_mr_rp->mr_desc.md_vaddr; 5074 mem_sattr.mr_flags = args.mrl_flags | IBT_MR_NOSLEEP; 5075 5076 status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl, 5077 orig_mr_rp->mr_hdl, mr_rp->mr_pd_res->pd_hdl, &mem_sattr, 5078 &mr_rp->mr_hdl, &mr_rp->mr_desc); 5079 5080 if (status != IBT_SUCCESS) { 5081 DERR("mr_register_lmr: ibt_register_shared_mr error %d\n", 5082 status); 5083 *rvalp = (int)status; 5084 retval = 0; 5085 goto cleanup; 5086 } 5087 5088 mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION; 5089 mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback; 5090 mr_cb_data_in.mr_arg1 = (void *)mr_rp; 5091 mr_cb_data_in.mr_arg2 = NULL; 5092 5093 /* Pass the service driver mr cleanup handler to the hca driver */ 5094 status = ibt_ci_data_in(ia_rp->ia_hca_hdl, 5095 IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl, 5096 &mr_cb_data_in, sizeof (mr_cb_data_in)); 5097 5098 if (status != IBT_SUCCESS) { 5099 DERR("mr_register_lmr: ibt_ci_data_in error(%d) ver(%d)", 5100 status, mr_cb_data_in.mr_rev); 5101 *rvalp = (int)status; 5102 retval = 0; 5103 goto cleanup; 5104 } 5105 mr_rp->mr_attr.mr_len = orig_mr_rp->mr_attr.mr_len; 5106 mr_rp->mr_attr.mr_flags = mem_sattr.mr_flags; 5107 5108 /* insert into mr hash table */ 5109 retval = daplka_hash_insert(&ia_rp->ia_mr_htbl, &mr_hkey, 5110 (void *)mr_rp); 5111 if (retval != 0) { 5112 DERR("mr_register: cannot insert mr resource into mr_htbl\n"); 5113 goto cleanup; 5114 } 5115 inserted = B_TRUE; 5116 5117 args.mrl_lkey = mr_rp->mr_desc.md_lkey; 5118 args.mrl_rkey = mr_rp->mr_desc.md_rkey; 5119 args.mrl_hkey = mr_hkey; 5120 5121 retval = ddi_copyout((void *)&args, (void *)arg, 5122 sizeof (dapl_mr_register_lmr_t), mode); 5123 if (retval != 0) { 5124 DERR("mr_register_lmr: copyout error %d\n", retval); 5125 retval = EFAULT; 5126 goto cleanup; 5127 } 5128 if (orig_mr_rp != NULL) { 5129 DAPLKA_RS_UNREF(orig_mr_rp); 5130 } 5131 return (0); 5132 5133 cleanup:; 5134 if (inserted) { 5135 daplka_mr_resource_t *free_rp = NULL; 5136 5137 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey, 5138 (void **)&free_rp); 5139 if (free_rp != mr_rp) { 5140 DERR("mr_register: cannot remove mr from hash table\n"); 5141 /* 5142 * we can only get here if another thread 5143 * has completed the cleanup in mr_deregister 5144 */ 5145 return (retval); 5146 } 5147 } 5148 if (orig_mr_rp != NULL) { 5149 DAPLKA_RS_UNREF(orig_mr_rp); 5150 } 5151 if (mr_rp != NULL) { 5152 DAPLKA_RS_UNREF(mr_rp); 5153 } 5154 return (retval); 5155 } 5156 5157 /* 5158 * this function is called by mr_deregister and mr_cleanup_callback to 5159 * remove a mr resource from the shared mr object mr_rp->mr_shared_mr. 5160 * if mr_shared_mr is already NULL, that means the region being 5161 * deregistered or invalidated is not a shared mr region and we can 5162 * return immediately. 5163 */ 5164 static void 5165 daplka_shared_mr_free(daplka_mr_resource_t *mr_rp) 5166 { 5167 daplka_shared_mr_t *smrp; 5168 5169 /* 5170 * we need a lock because mr_callback also checks this field. 5171 * for the rare case that mr_deregister and mr_cleanup_callback 5172 * gets called simultaneously, we are guaranteed that smrp won't 5173 * be dereferenced twice because either function will find 5174 * mr_shared_mr to be NULL. 5175 */ 5176 mutex_enter(&mr_rp->mr_lock); 5177 smrp = mr_rp->mr_shared_mr; 5178 mr_rp->mr_shared_mr = NULL; 5179 mutex_exit(&mr_rp->mr_lock); 5180 5181 if (smrp != NULL) { 5182 daplka_mr_resource_t **mpp; 5183 boolean_t mr_found = B_FALSE; 5184 5185 mutex_enter(&daplka_shared_mr_lock); 5186 ASSERT(smrp->smr_refcnt > 0); 5187 while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) { 5188 cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock); 5189 } 5190 ASSERT(smrp->smr_state == DAPLKA_SMR_READY); 5191 smrp->smr_state = DAPLKA_SMR_TRANSITIONING; 5192 smrp->smr_refcnt--; 5193 5194 /* 5195 * search and remove mr_rp from smr_mr_list. 5196 * also UNREF mr_rp because it is no longer 5197 * on the list. 5198 */ 5199 mpp = &smrp->smr_mr_list; 5200 while (*mpp != NULL) { 5201 if (*mpp == mr_rp) { 5202 *mpp = (*mpp)->mr_next; 5203 DAPLKA_RS_UNREF(mr_rp); 5204 mr_rp->mr_next = NULL; 5205 mr_found = B_TRUE; 5206 break; 5207 } 5208 mpp = &(*mpp)->mr_next; 5209 } 5210 /* 5211 * since mr_clean_callback may not touch smr_mr_list 5212 * at this time (due to smr_state), we can be sure 5213 * that we can find and remove mr_rp from smr_mr_list 5214 */ 5215 ASSERT(mr_found); 5216 if (smrp->smr_refcnt == 0) { 5217 D3("shared_mr_free: freeing smrp 0x%p\n", smrp); 5218 avl_remove(&daplka_shared_mr_tree, smrp); 5219 ASSERT(smrp->smr_mr_list == NULL); 5220 smrp->smr_state = DAPLKA_SMR_FREED; 5221 cv_destroy(&smrp->smr_cv); 5222 kmem_free(smrp, sizeof (daplka_shared_mr_t)); 5223 } else { 5224 D3("shared_mr_free: smrp 0x%p, refcnt %d\n", 5225 smrp, smrp->smr_refcnt); 5226 smrp->smr_state = DAPLKA_SMR_READY; 5227 cv_broadcast(&smrp->smr_cv); 5228 } 5229 mutex_exit(&daplka_shared_mr_lock); 5230 } 5231 } 5232 5233 /* 5234 * deregisters a memory region. 5235 * if mr is shared, remove reference from global shared mr object. 5236 * release the initial reference to the mr. if the mr's refcnt is 5237 * zero, call mr_destroy to free mr. 5238 */ 5239 /* ARGSUSED */ 5240 static int 5241 daplka_mr_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5242 cred_t *cred, int *rvalp) 5243 { 5244 daplka_mr_resource_t *mr_rp; 5245 dapl_mr_deregister_t args; 5246 int retval; 5247 5248 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_deregister_t), 5249 mode); 5250 if (retval != 0) { 5251 DERR("mr_deregister: copyin error %d\n", retval); 5252 return (EINVAL); 5253 } 5254 retval = daplka_hash_remove(&ia_rp->ia_mr_htbl, 5255 args.mrd_hkey, (void **)&mr_rp); 5256 if (retval != 0 || mr_rp == NULL) { 5257 DERR("mr_deregister: cannot find mr resource\n"); 5258 return (EINVAL); 5259 } 5260 ASSERT(DAPLKA_RS_TYPE(mr_rp) == DAPL_TYPE_MR); 5261 5262 daplka_shared_mr_free(mr_rp); 5263 DAPLKA_RS_UNREF(mr_rp); 5264 return (0); 5265 } 5266 5267 /* 5268 * sync local memory regions on RDMA read or write. 5269 */ 5270 /* ARGSUSED */ 5271 static int 5272 daplka_mr_sync(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5273 cred_t *cred, int *rvalp) 5274 { 5275 dapl_mr_sync_t args; 5276 daplka_mr_resource_t *mr_rp[DAPL_MR_PER_SYNC]; 5277 ibt_mr_sync_t mrs[DAPL_MR_PER_SYNC]; 5278 uint32_t sync_direction_flags; 5279 ibt_status_t status; 5280 int i, j; 5281 int retval; 5282 5283 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_sync_t), mode); 5284 if (retval != 0) { 5285 DERR("mr_sync: copyin error %d\n", retval); 5286 return (EFAULT); 5287 } 5288 5289 /* number of segments bound check */ 5290 if (args.mrs_numseg > DAPL_MR_PER_SYNC) { 5291 DERR("mr_sync: number of segments too large\n"); 5292 return (EINVAL); 5293 } 5294 5295 /* translate MR sync direction flag */ 5296 if (args.mrs_flags == DAPL_MR_SYNC_RDMA_RD) { 5297 sync_direction_flags = IBT_SYNC_READ; 5298 } else if (args.mrs_flags == DAPL_MR_SYNC_RDMA_WR) { 5299 sync_direction_flags = IBT_SYNC_WRITE; 5300 } else { 5301 DERR("mr_sync: unknown flags\n"); 5302 return (EINVAL); 5303 } 5304 5305 /* 5306 * all the segments are going to be sync'd by ibtl together 5307 */ 5308 for (i = 0; i < args.mrs_numseg; i++) { 5309 mr_rp[i] = (daplka_mr_resource_t *)daplka_hash_lookup( 5310 &ia_rp->ia_mr_htbl, args.mrs_vec[i].mrsv_hkey); 5311 if (mr_rp[i] == NULL) { 5312 for (j = 0; j < i; j++) { 5313 DAPLKA_RS_UNREF(mr_rp[j]); 5314 } 5315 DERR("mr_sync: lookup error\n"); 5316 return (EINVAL); 5317 } 5318 ASSERT(DAPLKA_RS_TYPE(mr_rp[i]) == DAPL_TYPE_MR); 5319 mrs[i].ms_handle = mr_rp[i]->mr_hdl; 5320 mrs[i].ms_vaddr = args.mrs_vec[i].mrsv_va; 5321 mrs[i].ms_len = args.mrs_vec[i].mrsv_len; 5322 mrs[i].ms_flags = sync_direction_flags; 5323 } 5324 5325 status = ibt_sync_mr(ia_rp->ia_hca_hdl, mrs, args.mrs_numseg); 5326 if (status != IBT_SUCCESS) { 5327 DERR("mr_sync: ibt_sync_mr error %d\n", status); 5328 *rvalp = (int)status; 5329 } 5330 for (i = 0; i < args.mrs_numseg; i++) { 5331 DAPLKA_RS_UNREF(mr_rp[i]); 5332 } 5333 return (0); 5334 } 5335 5336 /* 5337 * destroys a memory region. 5338 * called when refcnt drops to zero. 5339 */ 5340 static int 5341 daplka_mr_destroy(daplka_resource_t *gen_rp) 5342 { 5343 daplka_mr_resource_t *mr_rp = (daplka_mr_resource_t *)gen_rp; 5344 ibt_status_t status; 5345 5346 ASSERT(DAPLKA_RS_REFCNT(mr_rp) == 0); 5347 ASSERT(mr_rp->mr_shared_mr == NULL); 5348 D3("mr_destroy: entering, mr_rp 0x%p, rnum %d\n", 5349 mr_rp, DAPLKA_RS_RNUM(mr_rp)); 5350 5351 /* 5352 * deregister mr 5353 */ 5354 if (mr_rp->mr_hdl) { 5355 status = daplka_ibt_deregister_mr(mr_rp, mr_rp->mr_hca_hdl, 5356 mr_rp->mr_hdl); 5357 if (status != IBT_SUCCESS) { 5358 DERR("mr_destroy: ibt_deregister_mr returned %d\n", 5359 status); 5360 } 5361 mr_rp->mr_hdl = NULL; 5362 D3("mr_destroy: mr deregistered\n"); 5363 } 5364 mr_rp->mr_attr.mr_vaddr = NULL; 5365 5366 /* 5367 * release reference on PD 5368 */ 5369 if (mr_rp->mr_pd_res != NULL) { 5370 DAPLKA_RS_UNREF(mr_rp->mr_pd_res); 5371 mr_rp->mr_pd_res = NULL; 5372 } 5373 mutex_destroy(&mr_rp->mr_lock); 5374 DAPLKA_RS_FINI(mr_rp); 5375 kmem_free(mr_rp, sizeof (daplka_mr_resource_t)); 5376 D3("mr_destroy: exiting, mr_rp 0x%p\n", mr_rp); 5377 return (0); 5378 } 5379 5380 /* 5381 * this function is called by daplka_hash_destroy for 5382 * freeing MR resource objects 5383 */ 5384 static void 5385 daplka_hash_mr_free(void *obj) 5386 { 5387 daplka_mr_resource_t *mr_rp = (daplka_mr_resource_t *)obj; 5388 5389 daplka_shared_mr_free(mr_rp); 5390 DAPLKA_RS_UNREF(mr_rp); 5391 } 5392 5393 /* 5394 * comparison function used for finding a shared mr object 5395 * from the global shared mr avl tree. 5396 */ 5397 static int 5398 daplka_shared_mr_cmp(const void *smr1, const void *smr2) 5399 { 5400 daplka_shared_mr_t *s1 = (daplka_shared_mr_t *)smr1; 5401 daplka_shared_mr_t *s2 = (daplka_shared_mr_t *)smr2; 5402 int i; 5403 5404 for (i = 4; i >= 0; i--) { 5405 if (s1->smr_cookie.mc_uint_arr[i] < 5406 s2->smr_cookie.mc_uint_arr[i]) { 5407 return (-1); 5408 } 5409 if (s1->smr_cookie.mc_uint_arr[i] > 5410 s2->smr_cookie.mc_uint_arr[i]) { 5411 return (1); 5412 } 5413 } 5414 return (0); 5415 } 5416 5417 /* 5418 * allocates a protection domain. 5419 */ 5420 /* ARGSUSED */ 5421 static int 5422 daplka_pd_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5423 cred_t *cred, int *rvalp) 5424 { 5425 dapl_pd_alloc_t args; 5426 daplka_pd_resource_t *pd_rp; 5427 ibt_status_t status; 5428 uint64_t pd_hkey = 0; 5429 boolean_t inserted = B_FALSE; 5430 int retval; 5431 5432 pd_rp = kmem_zalloc(sizeof (*pd_rp), daplka_km_flags); 5433 if (pd_rp == NULL) { 5434 DERR("pd_alloc: cannot allocate pd resource\n"); 5435 return (ENOMEM); 5436 } 5437 DAPLKA_RS_INIT(pd_rp, DAPL_TYPE_PD, 5438 DAPLKA_RS_RNUM(ia_rp), daplka_pd_destroy); 5439 5440 pd_rp->pd_hca = ia_rp->ia_hca; 5441 pd_rp->pd_hca_hdl = ia_rp->ia_hca_hdl; 5442 status = daplka_ibt_alloc_pd(pd_rp, pd_rp->pd_hca_hdl, 5443 IBT_PD_NO_FLAGS, &pd_rp->pd_hdl); 5444 if (status != IBT_SUCCESS) { 5445 DERR("pd_alloc: ibt_alloc_pd returned %d\n", status); 5446 *rvalp = (int)status; 5447 retval = 0; 5448 goto cleanup; 5449 } 5450 5451 /* insert into pd hash table */ 5452 retval = daplka_hash_insert(&ia_rp->ia_pd_htbl, 5453 &pd_hkey, (void *)pd_rp); 5454 if (retval != 0) { 5455 DERR("pd_alloc: cannot insert pd resource into pd_htbl\n"); 5456 goto cleanup; 5457 } 5458 inserted = B_TRUE; 5459 5460 /* return hkey to library */ 5461 args.pda_hkey = pd_hkey; 5462 5463 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_pd_alloc_t), 5464 mode); 5465 if (retval != 0) { 5466 DERR("pd_alloc: copyout error %d\n", retval); 5467 retval = EFAULT; 5468 goto cleanup; 5469 } 5470 return (0); 5471 5472 cleanup:; 5473 if (inserted) { 5474 daplka_pd_resource_t *free_rp = NULL; 5475 5476 (void) daplka_hash_remove(&ia_rp->ia_pd_htbl, pd_hkey, 5477 (void **)&free_rp); 5478 if (free_rp != pd_rp) { 5479 DERR("pd_alloc: cannot remove pd from hash table\n"); 5480 /* 5481 * we can only get here if another thread 5482 * has completed the cleanup in pd_free 5483 */ 5484 return (retval); 5485 } 5486 } 5487 DAPLKA_RS_UNREF(pd_rp); 5488 return (retval); 5489 } 5490 5491 /* 5492 * destroys a protection domain. 5493 * called when refcnt drops to zero. 5494 */ 5495 static int 5496 daplka_pd_destroy(daplka_resource_t *gen_rp) 5497 { 5498 daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)gen_rp; 5499 ibt_status_t status; 5500 5501 ASSERT(DAPLKA_RS_REFCNT(pd_rp) == 0); 5502 D3("pd_destroy: entering, pd_rp %p, rnum %d\n", 5503 pd_rp, DAPLKA_RS_RNUM(pd_rp)); 5504 5505 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 5506 if (pd_rp->pd_hdl != NULL) { 5507 status = daplka_ibt_free_pd(pd_rp, pd_rp->pd_hca_hdl, 5508 pd_rp->pd_hdl); 5509 if (status != IBT_SUCCESS) { 5510 DERR("pd_destroy: ibt_free_pd returned %d\n", status); 5511 } 5512 } 5513 DAPLKA_RS_FINI(pd_rp); 5514 kmem_free(pd_rp, sizeof (daplka_pd_resource_t)); 5515 D3("pd_destroy: exiting, pd_rp %p\n", pd_rp); 5516 return (0); 5517 } 5518 5519 static void 5520 daplka_hash_pd_free(void *obj) 5521 { 5522 daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)obj; 5523 5524 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 5525 DAPLKA_RS_UNREF(pd_rp); 5526 } 5527 5528 /* 5529 * removes the pd reference from ia_pd_htbl and releases the 5530 * initial reference to the pd. also destroys the pd if the refcnt 5531 * is zero. 5532 */ 5533 /* ARGSUSED */ 5534 static int 5535 daplka_pd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5536 cred_t *cred, int *rvalp) 5537 { 5538 daplka_pd_resource_t *pd_rp; 5539 dapl_pd_free_t args; 5540 int retval; 5541 5542 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_pd_free_t), mode); 5543 if (retval != 0) { 5544 DERR("pd_free: copyin error %d\n", retval); 5545 return (EINVAL); 5546 } 5547 5548 retval = daplka_hash_remove(&ia_rp->ia_pd_htbl, 5549 args.pdf_hkey, (void **)&pd_rp); 5550 if (retval != 0 || pd_rp == NULL) { 5551 DERR("pd_free: cannot find pd resource\n"); 5552 return (EINVAL); 5553 } 5554 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 5555 5556 /* UNREF calls the actual free function when refcnt is zero */ 5557 DAPLKA_RS_UNREF(pd_rp); 5558 return (0); 5559 } 5560 5561 /* 5562 * allocates a memory window 5563 */ 5564 /* ARGSUSED */ 5565 static int 5566 daplka_mw_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5567 cred_t *cred, int *rvalp) 5568 { 5569 daplka_pd_resource_t *pd_rp; 5570 daplka_mw_resource_t *mw_rp; 5571 dapl_mw_alloc_t args; 5572 ibt_status_t status; 5573 boolean_t inserted = B_FALSE; 5574 uint64_t mw_hkey; 5575 ibt_rkey_t mw_rkey; 5576 int retval; 5577 5578 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_alloc_t), mode); 5579 if (retval != 0) { 5580 DERR("mw_alloc: copyin error %d\n", retval); 5581 return (EFAULT); 5582 } 5583 5584 /* 5585 * Allocate and initialize a MW resource 5586 */ 5587 mw_rp = kmem_zalloc(sizeof (daplka_mw_resource_t), daplka_km_flags); 5588 if (mw_rp == NULL) { 5589 DERR("mw_alloc: cannot allocate mw resource\n"); 5590 return (ENOMEM); 5591 } 5592 DAPLKA_RS_INIT(mw_rp, DAPL_TYPE_MW, 5593 DAPLKA_RS_RNUM(ia_rp), daplka_mw_destroy); 5594 5595 mutex_init(&mw_rp->mw_lock, NULL, MUTEX_DRIVER, NULL); 5596 mw_rp->mw_hca = ia_rp->ia_hca; 5597 mw_rp->mw_hca_hdl = ia_rp->ia_hca_hdl; 5598 5599 /* get pd handle */ 5600 pd_rp = (daplka_pd_resource_t *) 5601 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mw_pd_hkey); 5602 if (pd_rp == NULL) { 5603 DERR("mw_alloc: cannot find pd resource\n"); 5604 goto cleanup; 5605 } 5606 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 5607 5608 mw_rp->mw_pd_res = pd_rp; 5609 5610 status = daplka_ibt_alloc_mw(mw_rp, mw_rp->mw_hca_hdl, 5611 pd_rp->pd_hdl, IBT_MW_NOSLEEP, &mw_rp->mw_hdl, &mw_rkey); 5612 5613 if (status != IBT_SUCCESS) { 5614 DERR("mw_alloc: ibt_alloc_mw returned %d\n", status); 5615 *rvalp = (int)status; 5616 retval = 0; 5617 goto cleanup; 5618 } 5619 5620 mutex_enter(&ia_rp->ia_lock); 5621 switch (ia_rp->ia_state) { 5622 case DAPLKA_IA_INIT: 5623 ia_rp->ia_state = DAPLKA_IA_MW_ALLOC_IN_PROGRESS; 5624 ia_rp->ia_mw_alloccnt++; 5625 retval = 0; 5626 break; 5627 case DAPLKA_IA_MW_ALLOC_IN_PROGRESS: 5628 /* another mw_alloc is already in progress increase cnt */ 5629 ia_rp->ia_mw_alloccnt++; 5630 retval = 0; 5631 break; 5632 case DAPLKA_IA_MW_FREEZE_IN_PROGRESS: 5633 /* FALLTHRU */ 5634 case DAPLKA_IA_MW_FROZEN: 5635 /* 5636 * IA is being or already frozen don't allow more MWs to be 5637 * allocated. 5638 */ 5639 DERR("mw_alloc: IA is freezing MWs (state=%d)\n", 5640 ia_rp->ia_state); 5641 retval = EINVAL; 5642 break; 5643 default: 5644 ASSERT(!"Invalid IA state in mw_alloc"); 5645 DERR("mw_alloc: IA state=%d invalid\n", ia_rp->ia_state); 5646 retval = EINVAL; 5647 break; 5648 } 5649 mutex_exit(&ia_rp->ia_lock); 5650 /* retval is 0 when ia_mw_alloccnt is incremented */ 5651 if (retval != 0) { 5652 goto cleanup; 5653 } 5654 5655 /* insert into mw hash table */ 5656 mw_hkey = 0; 5657 retval = daplka_hash_insert(&ia_rp->ia_mw_htbl, &mw_hkey, 5658 (void *)mw_rp); 5659 if (retval != 0) { 5660 DERR("mw_alloc: cannot insert mw resource into mw_htbl\n"); 5661 mutex_enter(&ia_rp->ia_lock); 5662 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS); 5663 ia_rp->ia_mw_alloccnt--; 5664 if (ia_rp->ia_mw_alloccnt == 0) { 5665 ia_rp->ia_state = DAPLKA_IA_INIT; 5666 cv_broadcast(&ia_rp->ia_cv); 5667 } 5668 mutex_exit(&ia_rp->ia_lock); 5669 goto cleanup; 5670 } 5671 inserted = B_TRUE; 5672 5673 D3("mw_alloc: ibt_alloc_mw mw_hdl(%p) mw_rkey(0x%llx)\n", 5674 mw_rp->mw_hdl, (longlong_t)mw_rkey); 5675 5676 mutex_enter(&ia_rp->ia_lock); 5677 /* 5678 * We are done with mw_alloc if this was the last mw_alloc 5679 * change state back to DAPLKA_IA_INIT and wake up waiters 5680 * specifically the unlock callback. 5681 */ 5682 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS); 5683 ia_rp->ia_mw_alloccnt--; 5684 if (ia_rp->ia_mw_alloccnt == 0) { 5685 ia_rp->ia_state = DAPLKA_IA_INIT; 5686 cv_broadcast(&ia_rp->ia_cv); 5687 } 5688 mutex_exit(&ia_rp->ia_lock); 5689 5690 args.mw_hkey = mw_hkey; 5691 args.mw_rkey = mw_rkey; 5692 5693 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_mw_alloc_t), 5694 mode); 5695 if (retval != 0) { 5696 DERR("mw_alloc: copyout error %d\n", retval); 5697 retval = EFAULT; 5698 goto cleanup; 5699 } 5700 return (0); 5701 5702 cleanup:; 5703 if (inserted) { 5704 daplka_mw_resource_t *free_rp = NULL; 5705 5706 (void) daplka_hash_remove(&ia_rp->ia_mw_htbl, mw_hkey, 5707 (void **)&free_rp); 5708 if (free_rp != mw_rp) { 5709 DERR("mw_alloc: cannot remove mw from hash table\n"); 5710 /* 5711 * we can only get here if another thread 5712 * has completed the cleanup in mw_free 5713 */ 5714 return (retval); 5715 } 5716 } 5717 DAPLKA_RS_UNREF(mw_rp); 5718 return (retval); 5719 } 5720 5721 /* 5722 * removes the mw reference from ia_mw_htbl and releases the 5723 * initial reference to the mw. also destroys the mw if the refcnt 5724 * is zero. 5725 */ 5726 /* ARGSUSED */ 5727 static int 5728 daplka_mw_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5729 cred_t *cred, int *rvalp) 5730 { 5731 daplka_mw_resource_t *mw_rp = NULL; 5732 dapl_mw_free_t args; 5733 int retval = 0; 5734 5735 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_free_t), mode); 5736 if (retval != 0) { 5737 DERR("mw_free: copyin error %d\n", retval); 5738 return (EFAULT); 5739 } 5740 5741 retval = daplka_hash_remove(&ia_rp->ia_mw_htbl, args.mw_hkey, 5742 (void **)&mw_rp); 5743 if (retval != 0 || mw_rp == NULL) { 5744 DERR("mw_free: cannot find mw resrc (0x%llx)\n", 5745 (longlong_t)args.mw_hkey); 5746 return (EINVAL); 5747 } 5748 5749 ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW); 5750 5751 /* UNREF calls the actual free function when refcnt is zero */ 5752 DAPLKA_RS_UNREF(mw_rp); 5753 return (retval); 5754 } 5755 5756 /* 5757 * destroys the memory window. 5758 * called when refcnt drops to zero. 5759 */ 5760 static int 5761 daplka_mw_destroy(daplka_resource_t *gen_rp) 5762 { 5763 daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)gen_rp; 5764 ibt_status_t status; 5765 5766 ASSERT(DAPLKA_RS_REFCNT(mw_rp) == 0); 5767 D3("mw_destroy: entering, mw_rp 0x%p, rnum %d\n", 5768 mw_rp, DAPLKA_RS_RNUM(mw_rp)); 5769 5770 /* 5771 * free memory window 5772 */ 5773 if (mw_rp->mw_hdl) { 5774 status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl, 5775 mw_rp->mw_hdl); 5776 if (status != IBT_SUCCESS) { 5777 DERR("mw_destroy: ibt_free_mw returned %d\n", status); 5778 } 5779 mw_rp->mw_hdl = NULL; 5780 D3("mw_destroy: mw freed\n"); 5781 } 5782 5783 /* 5784 * release reference on PD 5785 */ 5786 if (mw_rp->mw_pd_res != NULL) { 5787 DAPLKA_RS_UNREF(mw_rp->mw_pd_res); 5788 mw_rp->mw_pd_res = NULL; 5789 } 5790 mutex_destroy(&mw_rp->mw_lock); 5791 DAPLKA_RS_FINI(mw_rp); 5792 kmem_free(mw_rp, sizeof (daplka_mw_resource_t)); 5793 D3("mw_destroy: exiting, mw_rp 0x%p\n", mw_rp); 5794 return (0); 5795 } 5796 5797 static void 5798 daplka_hash_mw_free(void *obj) 5799 { 5800 daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)obj; 5801 5802 ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW); 5803 DAPLKA_RS_UNREF(mw_rp); 5804 } 5805 5806 /* 5807 * SRQ ioctls and supporting functions 5808 */ 5809 /* ARGSUSED */ 5810 static int 5811 daplka_srq_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5812 cred_t *cred, int *rvalp) 5813 { 5814 daplka_srq_resource_t *srq_rp; 5815 daplka_pd_resource_t *pd_rp; 5816 dapl_srq_create_t args; 5817 ibt_srq_sizes_t srq_sizes; 5818 ibt_srq_sizes_t srq_real_sizes; 5819 ibt_hca_attr_t *hca_attrp; 5820 uint64_t srq_hkey = 0; 5821 boolean_t inserted = B_FALSE; 5822 int retval; 5823 ibt_status_t status; 5824 5825 D3("srq_create: enter\n"); 5826 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_create_t), 5827 mode); 5828 if (retval != 0) { 5829 DERR("srq_create: copyin error %d\n", retval); 5830 return (EFAULT); 5831 } 5832 srq_rp = kmem_zalloc(sizeof (daplka_srq_resource_t), daplka_km_flags); 5833 if (srq_rp == NULL) { 5834 DERR("srq_create: cannot allocate ep_rp\n"); 5835 return (ENOMEM); 5836 } 5837 DAPLKA_RS_INIT(srq_rp, DAPL_TYPE_SRQ, 5838 DAPLKA_RS_RNUM(ia_rp), daplka_srq_destroy); 5839 5840 srq_rp->srq_hca = ia_rp->ia_hca; 5841 srq_rp->srq_hca_hdl = ia_rp->ia_hca_hdl; 5842 mutex_init(&srq_rp->srq_lock, NULL, MUTEX_DRIVER, NULL); 5843 5844 /* get pd handle */ 5845 pd_rp = (daplka_pd_resource_t *) 5846 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.srqc_pd_hkey); 5847 if (pd_rp == NULL) { 5848 DERR("srq_create: cannot find pd resource\n"); 5849 retval = EINVAL; 5850 goto cleanup; 5851 } 5852 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD); 5853 srq_rp->srq_pd_res = pd_rp; 5854 5855 /* 5856 * these checks ensure that the requested SRQ sizes 5857 * are within the limits supported by the chosen HCA. 5858 */ 5859 hca_attrp = &ia_rp->ia_hca->hca_attr; 5860 if (args.srqc_sizes.srqs_sz > hca_attrp->hca_max_srqs_sz) { 5861 DERR("srq_create: invalid srqs_sz %d\n", 5862 args.srqc_sizes.srqs_sz); 5863 retval = EINVAL; 5864 goto cleanup; 5865 } 5866 if (args.srqc_sizes.srqs_sgl > hca_attrp->hca_max_srq_sgl) { 5867 DERR("srq_create: invalid srqs_sgl %d\n", 5868 args.srqc_sizes.srqs_sgl); 5869 retval = EINVAL; 5870 goto cleanup; 5871 } 5872 5873 D3("srq_create: srq_sgl %d, srq_sz %d\n", 5874 args.srqc_sizes.srqs_sgl, args.srqc_sizes.srqs_sz); 5875 5876 srq_sizes.srq_wr_sz = args.srqc_sizes.srqs_sz; 5877 srq_sizes.srq_sgl_sz = args.srqc_sizes.srqs_sgl; 5878 5879 /* create srq */ 5880 status = daplka_ibt_alloc_srq(srq_rp, ia_rp->ia_hca_hdl, 5881 IBT_SRQ_USER_MAP, pd_rp->pd_hdl, &srq_sizes, &srq_rp->srq_hdl, 5882 &srq_real_sizes); 5883 if (status != IBT_SUCCESS) { 5884 DERR("srq_create: alloc_srq returned %d\n", status); 5885 *rvalp = (int)status; 5886 retval = 0; 5887 goto cleanup; 5888 } 5889 5890 args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz; 5891 args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz; 5892 5893 /* Get HCA-specific data_out info */ 5894 status = ibt_ci_data_out(ia_rp->ia_hca_hdl, 5895 IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl, 5896 &args.srqc_data_out, sizeof (args.srqc_data_out)); 5897 5898 if (status != IBT_SUCCESS) { 5899 DERR("srq_create: ibt_ci_data_out error(%d)\n", status); 5900 *rvalp = (int)status; 5901 retval = 0; 5902 goto cleanup; 5903 } 5904 5905 srq_rp->srq_real_size = srq_real_sizes.srq_wr_sz; 5906 5907 /* preparing to copyout map_data back to the library */ 5908 args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz; 5909 args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz; 5910 5911 /* insert into srq hash table */ 5912 retval = daplka_hash_insert(&ia_rp->ia_srq_htbl, 5913 &srq_hkey, (void *)srq_rp); 5914 if (retval != 0) { 5915 DERR("srq_create: cannot insert srq resource into srq_htbl\n"); 5916 goto cleanup; 5917 } 5918 inserted = B_TRUE; 5919 5920 /* return hkey to library */ 5921 args.srqc_hkey = srq_hkey; 5922 5923 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_create_t), 5924 mode); 5925 if (retval != 0) { 5926 DERR("srq_create: copyout error %d\n", retval); 5927 retval = EFAULT; 5928 goto cleanup; 5929 } 5930 5931 D3("srq_create: %p, 0x%llx\n", srq_rp->srq_hdl, (longlong_t)srq_hkey); 5932 D3(" sz(%d) sgl(%d)\n", 5933 args.srqc_real_sizes.srqs_sz, args.srqc_real_sizes.srqs_sgl); 5934 D3("srq_create: exit\n"); 5935 return (0); 5936 5937 cleanup: 5938 if (inserted) { 5939 daplka_srq_resource_t *free_rp = NULL; 5940 5941 (void) daplka_hash_remove(&ia_rp->ia_srq_htbl, srq_hkey, 5942 (void **)&free_rp); 5943 if (free_rp != srq_rp) { 5944 /* 5945 * this case is impossible because ep_free will 5946 * wait until our state transition is complete. 5947 */ 5948 DERR("srq_create: cannot remove srq from hash table\n"); 5949 ASSERT(B_FALSE); 5950 return (retval); 5951 } 5952 } 5953 DAPLKA_RS_UNREF(srq_rp); 5954 return (retval); 5955 } 5956 5957 /* 5958 * Resize an existing SRQ 5959 */ 5960 /* ARGSUSED */ 5961 static int 5962 daplka_srq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 5963 cred_t *cred, int *rvalp) 5964 { 5965 daplka_srq_resource_t *srq_rp = NULL; 5966 ibt_hca_attr_t *hca_attrp; 5967 dapl_srq_resize_t args; 5968 ibt_status_t status; 5969 int retval = 0; 5970 5971 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_resize_t), 5972 mode); 5973 if (retval != 0) { 5974 DERR("srq_resize: copyin error %d\n", retval); 5975 return (EFAULT); 5976 } 5977 5978 /* get srq resource */ 5979 srq_rp = (daplka_srq_resource_t *) 5980 daplka_hash_lookup(&ia_rp->ia_srq_htbl, args.srqr_hkey); 5981 if (srq_rp == NULL) { 5982 DERR("srq_resize: cannot find srq resource\n"); 5983 return (EINVAL); 5984 } 5985 ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ); 5986 5987 hca_attrp = &ia_rp->ia_hca->hca_attr; 5988 if (args.srqr_new_size > hca_attrp->hca_max_srqs_sz) { 5989 DERR("srq_resize: invalid srq size %d", args.srqr_new_size); 5990 retval = EINVAL; 5991 goto cleanup; 5992 } 5993 5994 mutex_enter(&srq_rp->srq_lock); 5995 /* 5996 * If ibt_resize_srq fails that it is primarily due to resource 5997 * shortage. Per IB spec resize will never loose events and 5998 * a resize error leaves the SRQ intact. Therefore even if the 5999 * resize request fails we proceed and get the mapping data 6000 * from the SRQ so that the library can mmap it. 6001 */ 6002 status = ibt_modify_srq(srq_rp->srq_hdl, IBT_SRQ_SET_SIZE, 6003 args.srqr_new_size, 0, &args.srqr_real_size); 6004 if (status != IBT_SUCCESS) { 6005 /* we return the size of the old CQ if resize fails */ 6006 args.srqr_real_size = srq_rp->srq_real_size; 6007 ASSERT(status != IBT_SRQ_HDL_INVALID); 6008 DERR("srq_resize: ibt_modify_srq failed:%d\n", status); 6009 } else { 6010 srq_rp->srq_real_size = args.srqr_real_size; 6011 } 6012 mutex_exit(&srq_rp->srq_lock); 6013 6014 6015 D2("srq_resize(%d): done new_sz(%u) real_sz(%u)\n", 6016 DAPLKA_RS_RNUM(srq_rp), args.srqr_new_size, args.srqr_real_size); 6017 6018 /* Get HCA-specific data_out info */ 6019 status = ibt_ci_data_out(srq_rp->srq_hca_hdl, 6020 IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl, 6021 &args.srqr_data_out, sizeof (args.srqr_data_out)); 6022 if (status != IBT_SUCCESS) { 6023 DERR("srq_resize: ibt_ci_data_out error(%d)\n", status); 6024 /* return ibt_ci_data_out status */ 6025 *rvalp = (int)status; 6026 retval = 0; 6027 goto cleanup; 6028 } 6029 6030 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_resize_t), 6031 mode); 6032 if (retval != 0) { 6033 DERR("srq_resize: copyout error %d\n", retval); 6034 retval = EFAULT; 6035 goto cleanup; 6036 } 6037 6038 cleanup:; 6039 if (srq_rp != NULL) { 6040 DAPLKA_RS_UNREF(srq_rp); 6041 } 6042 return (retval); 6043 } 6044 6045 /* 6046 * Frees an SRQ resource. 6047 */ 6048 /* ARGSUSED */ 6049 static int 6050 daplka_srq_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 6051 cred_t *cred, int *rvalp) 6052 { 6053 daplka_srq_resource_t *srq_rp = NULL; 6054 dapl_srq_free_t args; 6055 int retval; 6056 6057 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_free_t), mode); 6058 if (retval != 0) { 6059 DERR("srq_free: copyin error %d\n", retval); 6060 return (EFAULT); 6061 } 6062 6063 retval = daplka_hash_remove(&ia_rp->ia_srq_htbl, 6064 args.srqf_hkey, (void **)&srq_rp); 6065 if (retval != 0 || srq_rp == NULL) { 6066 /* 6067 * this is only possible if we have two threads 6068 * calling ep_free in parallel. 6069 */ 6070 DERR("srq_free: cannot find resource retval(%d) 0x%llx\n", 6071 retval, args.srqf_hkey); 6072 return (EINVAL); 6073 } 6074 6075 /* UNREF calls the actual free function when refcnt is zero */ 6076 DAPLKA_RS_UNREF(srq_rp); 6077 return (0); 6078 } 6079 6080 /* 6081 * destroys a SRQ resource. 6082 * called when refcnt drops to zero. 6083 */ 6084 static int 6085 daplka_srq_destroy(daplka_resource_t *gen_rp) 6086 { 6087 daplka_srq_resource_t *srq_rp = (daplka_srq_resource_t *)gen_rp; 6088 ibt_status_t status; 6089 6090 ASSERT(DAPLKA_RS_REFCNT(srq_rp) == 0); 6091 6092 D3("srq_destroy: entering, srq_rp 0x%p, rnum %d\n", 6093 srq_rp, DAPLKA_RS_RNUM(srq_rp)); 6094 /* 6095 * destroy the srq 6096 */ 6097 if (srq_rp->srq_hdl != NULL) { 6098 status = daplka_ibt_free_srq(srq_rp, srq_rp->srq_hdl); 6099 if (status != IBT_SUCCESS) { 6100 DERR("srq_destroy: ibt_free_srq returned %d\n", 6101 status); 6102 } 6103 srq_rp->srq_hdl = NULL; 6104 D3("srq_destroy: srq freed, rnum %d\n", DAPLKA_RS_RNUM(srq_rp)); 6105 } 6106 /* 6107 * release all references 6108 */ 6109 if (srq_rp->srq_pd_res != NULL) { 6110 DAPLKA_RS_UNREF(srq_rp->srq_pd_res); 6111 srq_rp->srq_pd_res = NULL; 6112 } 6113 6114 mutex_destroy(&srq_rp->srq_lock); 6115 DAPLKA_RS_FINI(srq_rp); 6116 kmem_free(srq_rp, sizeof (daplka_srq_resource_t)); 6117 D3("srq_destroy: exiting, srq_rp 0x%p\n", srq_rp); 6118 return (0); 6119 } 6120 6121 static void 6122 daplka_hash_srq_free(void *obj) 6123 { 6124 daplka_srq_resource_t *srq_rp = (daplka_srq_resource_t *)obj; 6125 6126 ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ); 6127 DAPLKA_RS_UNREF(srq_rp); 6128 } 6129 6130 /* 6131 * This function tells the CM to start listening on a service id. 6132 * It must be called by the passive side client before the client 6133 * can receive connection requests from remote endpoints. If the 6134 * client specifies a non-zero service id (connection qualifier in 6135 * dapl terms), this function will attempt to bind to this service 6136 * id and return an error if the id is already in use. If the client 6137 * specifies zero as the service id, this function will try to find 6138 * the next available service id and return it back to the client. 6139 * To support the cr_handoff function, this function will, in addition 6140 * to creating and inserting an SP resource into the per-IA SP hash 6141 * table, insert the SP resource into a global SP table. This table 6142 * maintains all active service points created by all dapl clients. 6143 * CR handoff locates the target SP by iterating through this global 6144 * table. 6145 */ 6146 /* ARGSUSED */ 6147 static int 6148 daplka_service_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 6149 cred_t *cred, int *rvalp) 6150 { 6151 daplka_evd_resource_t *evd_rp = NULL; 6152 daplka_sp_resource_t *sp_rp = NULL; 6153 dapl_service_register_t args; 6154 ibt_srv_desc_t sd_args; 6155 ibt_srv_bind_t sb_args; 6156 ibt_status_t status; 6157 ib_svc_id_t retsid = 0; 6158 uint64_t sp_hkey = 0; 6159 boolean_t bumped = B_FALSE; 6160 int backlog_size; 6161 int retval = 0; 6162 6163 retval = ddi_copyin((void *)arg, &args, 6164 sizeof (dapl_service_register_t), mode); 6165 if (retval != 0) { 6166 DERR("service_register: copyin error %d\n", retval); 6167 return (EINVAL); 6168 } 6169 6170 sp_rp = kmem_zalloc(sizeof (*sp_rp), daplka_km_flags); 6171 if (sp_rp == NULL) { 6172 DERR("service_register: cannot allocate sp resource\n"); 6173 return (ENOMEM); 6174 } 6175 DAPLKA_RS_INIT(sp_rp, DAPL_TYPE_SP, 6176 DAPLKA_RS_RNUM(ia_rp), daplka_sp_destroy); 6177 6178 /* check if evd exists */ 6179 evd_rp = (daplka_evd_resource_t *) 6180 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.sr_evd_hkey); 6181 if (evd_rp == NULL) { 6182 DERR("service_register: evd resource not found\n"); 6183 retval = EINVAL; 6184 goto cleanup; 6185 } 6186 /* 6187 * initialize backlog size 6188 */ 6189 if (evd_rp && evd_rp->evd_cq_real_size > 0) { 6190 backlog_size = evd_rp->evd_cq_real_size + 1; 6191 } else { 6192 backlog_size = DAPLKA_DEFAULT_SP_BACKLOG; 6193 } 6194 D2("service_register: args.sr_sid = %llu\n", (longlong_t)args.sr_sid); 6195 6196 /* save the userland sp ptr */ 6197 sp_rp->sp_cookie = args.sr_sp_cookie; 6198 sp_rp->sp_backlog_size = backlog_size; 6199 D3("service_register: backlog set to %d\n", sp_rp->sp_backlog_size); 6200 sp_rp->sp_backlog = kmem_zalloc(sp_rp->sp_backlog_size * 6201 sizeof (daplka_sp_conn_pend_t), daplka_km_flags); 6202 6203 /* save evd resource pointer */ 6204 sp_rp->sp_evd_res = evd_rp; 6205 6206 /* 6207 * save ruid here so that we can do a comparison later 6208 * when someone does cr_handoff. the check will prevent 6209 * a malicious app from passing a CR to us. 6210 */ 6211 sp_rp->sp_ruid = crgetruid(cred); 6212 6213 /* fill in args for register_service */ 6214 sd_args.sd_ud_handler = NULL; 6215 sd_args.sd_handler = daplka_cm_service_handler; 6216 sd_args.sd_flags = IBT_SRV_NO_FLAGS; 6217 6218 status = ibt_register_service(daplka_dev->daplka_clnt_hdl, 6219 &sd_args, args.sr_sid, 1, &sp_rp->sp_srv_hdl, &retsid); 6220 6221 if (status != IBT_SUCCESS) { 6222 DERR("service_register: ibt_register_service returned %d\n", 6223 status); 6224 *rvalp = (int)status; 6225 retval = 0; 6226 goto cleanup; 6227 } 6228 /* save returned sid */ 6229 sp_rp->sp_conn_qual = retsid; 6230 args.sr_retsid = retsid; 6231 6232 /* fill in args for bind_service */ 6233 sb_args.sb_pkey = ia_rp->ia_port_pkey; 6234 sb_args.sb_lease = 0xffffffff; 6235 sb_args.sb_key[0] = 0x1234; 6236 sb_args.sb_key[1] = 0x5678; 6237 sb_args.sb_name = DAPLKA_DRV_NAME; 6238 6239 D2("service_register: bind(0x%llx:0x%llx)\n", 6240 (longlong_t)ia_rp->ia_hca_sgid.gid_prefix, 6241 (longlong_t)ia_rp->ia_hca_sgid.gid_guid); 6242 6243 status = ibt_bind_service(sp_rp->sp_srv_hdl, ia_rp->ia_hca_sgid, 6244 &sb_args, (void *)sp_rp, &sp_rp->sp_bind_hdl); 6245 if (status != IBT_SUCCESS) { 6246 DERR("service_register: ibt_bind_service returned %d\n", 6247 status); 6248 *rvalp = (int)status; 6249 retval = 0; 6250 goto cleanup; 6251 } 6252 6253 /* 6254 * need to bump refcnt because the global hash table will 6255 * have a reference to sp_rp 6256 */ 6257 DAPLKA_RS_REF(sp_rp); 6258 bumped = B_TRUE; 6259 6260 /* insert into global sp hash table */ 6261 sp_rp->sp_global_hkey = 0; 6262 retval = daplka_hash_insert(&daplka_global_sp_htbl, 6263 &sp_rp->sp_global_hkey, (void *)sp_rp); 6264 if (retval != 0) { 6265 DERR("service_register: cannot insert sp resource\n"); 6266 goto cleanup; 6267 } 6268 6269 /* insert into per-IA sp hash table */ 6270 retval = daplka_hash_insert(&ia_rp->ia_sp_htbl, 6271 &sp_hkey, (void *)sp_rp); 6272 if (retval != 0) { 6273 DERR("service_register: cannot insert sp resource\n"); 6274 goto cleanup; 6275 } 6276 6277 /* pass index to application */ 6278 args.sr_sp_hkey = sp_hkey; 6279 retval = ddi_copyout(&args, (void *)arg, 6280 sizeof (dapl_service_register_t), mode); 6281 if (retval != 0) { 6282 DERR("service_register: copyout error %d\n", retval); 6283 retval = EFAULT; 6284 goto cleanup; 6285 } 6286 return (0); 6287 6288 cleanup:; 6289 ASSERT(sp_rp != NULL); 6290 /* remove from ia table */ 6291 if (sp_hkey != 0) { 6292 daplka_sp_resource_t *free_rp = NULL; 6293 6294 (void) daplka_hash_remove(&ia_rp->ia_sp_htbl, 6295 sp_hkey, (void **)&free_rp); 6296 if (free_rp != sp_rp) { 6297 DERR("service_register: cannot remove sp\n"); 6298 /* 6299 * we can only get here if another thread 6300 * has completed the cleanup in svc_deregister 6301 */ 6302 return (retval); 6303 } 6304 } 6305 6306 /* remove from global table */ 6307 if (sp_rp->sp_global_hkey != 0) { 6308 daplka_sp_resource_t *free_rp = NULL; 6309 6310 /* 6311 * we get here if either the hash_insert into 6312 * ia_sp_htbl failed or the ddi_copyout failed. 6313 * hash_insert failure implies that we are the 6314 * only thread with a reference to sp. ddi_copyout 6315 * failure implies that svc_deregister could have 6316 * picked up the sp and destroyed it. but since 6317 * we got to this point, we must have removed 6318 * the sp ourselves in hash_remove above and 6319 * that the sp can be destroyed by us. 6320 */ 6321 (void) daplka_hash_remove(&daplka_global_sp_htbl, 6322 sp_rp->sp_global_hkey, (void **)&free_rp); 6323 if (free_rp != sp_rp) { 6324 DERR("service_register: cannot remove sp\n"); 6325 /* 6326 * this case is impossible. see explanation above. 6327 */ 6328 ASSERT(B_FALSE); 6329 return (retval); 6330 } 6331 sp_rp->sp_global_hkey = 0; 6332 } 6333 /* unreference sp */ 6334 if (bumped) { 6335 DAPLKA_RS_UNREF(sp_rp); 6336 } 6337 6338 /* destroy sp resource */ 6339 DAPLKA_RS_UNREF(sp_rp); 6340 return (retval); 6341 } 6342 6343 /* 6344 * deregisters the service and removes SP from the global table. 6345 */ 6346 /* ARGSUSED */ 6347 static int 6348 daplka_service_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode, 6349 cred_t *cred, int *rvalp) 6350 { 6351 dapl_service_deregister_t args; 6352 daplka_sp_resource_t *sp_rp = NULL, *g_sp_rp = NULL; 6353 int retval; 6354 6355 retval = ddi_copyin((void *)arg, &args, 6356 sizeof (dapl_service_deregister_t), mode); 6357 6358 if (retval != 0) { 6359 DERR("service_deregister: copyin error %d\n", retval); 6360 return (EINVAL); 6361 } 6362 6363 retval = daplka_hash_remove(&ia_rp->ia_sp_htbl, 6364 args.sdr_sp_hkey, (void **)&sp_rp); 6365 if (retval != 0 || sp_rp == NULL) { 6366 DERR("service_deregister: cannot find sp resource\n"); 6367 return (EINVAL); 6368 } 6369 6370 retval = daplka_hash_remove(&daplka_global_sp_htbl, 6371 sp_rp->sp_global_hkey, (void **)&g_sp_rp); 6372 if (retval != 0 || g_sp_rp == NULL) { 6373 DERR("service_deregister: cannot find sp resource\n"); 6374 } 6375 6376 /* remove the global reference */ 6377 if (g_sp_rp == sp_rp) { 6378 DAPLKA_RS_UNREF(g_sp_rp); 6379 } 6380 6381 DAPLKA_RS_UNREF(sp_rp); 6382 return (0); 6383 } 6384 6385 /* 6386 * destroys a service point. 6387 * called when the refcnt drops to zero. 6388 */ 6389 static int 6390 daplka_sp_destroy(daplka_resource_t *gen_rp) 6391 { 6392 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)gen_rp; 6393 ibt_status_t status; 6394 6395 ASSERT(DAPLKA_RS_REFCNT(sp_rp) == 0); 6396 D3("sp_destroy: entering, sp_rp %p, rnum %d\n", 6397 sp_rp, DAPLKA_RS_RNUM(sp_rp)); 6398 6399 /* 6400 * it is possible for pending connections to remain 6401 * on an SP. We need to clean them up here. 6402 */ 6403 if (sp_rp->sp_backlog != NULL) { 6404 ibt_cm_proceed_reply_t proc_reply; 6405 int i, cnt = 0; 6406 void *spcp_sidp; 6407 6408 for (i = 0; i < sp_rp->sp_backlog_size; i++) { 6409 if (sp_rp->sp_backlog[i].spcp_state == 6410 DAPLKA_SPCP_PENDING) { 6411 cnt++; 6412 if (sp_rp->sp_backlog[i].spcp_sid == NULL) { 6413 DERR("sp_destroy: " 6414 "spcp_sid == NULL!\n"); 6415 continue; 6416 } 6417 mutex_enter(&sp_rp->sp_lock); 6418 spcp_sidp = sp_rp->sp_backlog[i].spcp_sid; 6419 sp_rp->sp_backlog[i].spcp_state = 6420 DAPLKA_SPCP_INIT; 6421 sp_rp->sp_backlog[i].spcp_sid = NULL; 6422 sp_rp->sp_backlog[i].spcp_req_len = 0; 6423 mutex_exit(&sp_rp->sp_lock); 6424 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, 6425 spcp_sidp, 6426 IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0); 6427 if (status != IBT_SUCCESS) { 6428 DERR("sp_destroy: proceed failed %d\n", 6429 status); 6430 } 6431 } 6432 } 6433 if (cnt > 0) { 6434 DERR("sp_destroy: found %d pending " 6435 "connections\n", cnt); 6436 } 6437 } 6438 6439 if (sp_rp->sp_srv_hdl != NULL && sp_rp->sp_bind_hdl != NULL) { 6440 status = ibt_unbind_service(sp_rp->sp_srv_hdl, 6441 sp_rp->sp_bind_hdl); 6442 if (status != IBT_SUCCESS) { 6443 DERR("sp_destroy: ibt_unbind_service " 6444 "failed: %d\n", status); 6445 } 6446 } 6447 6448 if (sp_rp->sp_srv_hdl != NULL) { 6449 status = ibt_deregister_service(daplka_dev->daplka_clnt_hdl, 6450 sp_rp->sp_srv_hdl); 6451 if (status != IBT_SUCCESS) { 6452 DERR("sp_destroy: ibt_deregister_service " 6453 "failed: %d\n", status); 6454 } 6455 } 6456 if (sp_rp->sp_backlog != NULL) { 6457 kmem_free(sp_rp->sp_backlog, 6458 sp_rp->sp_backlog_size * sizeof (daplka_sp_conn_pend_t)); 6459 sp_rp->sp_backlog = NULL; 6460 sp_rp->sp_backlog_size = 0; 6461 } 6462 6463 /* 6464 * release reference to evd 6465 */ 6466 if (sp_rp->sp_evd_res != NULL) { 6467 DAPLKA_RS_UNREF(sp_rp->sp_evd_res); 6468 } 6469 sp_rp->sp_bind_hdl = NULL; 6470 sp_rp->sp_srv_hdl = NULL; 6471 DAPLKA_RS_FINI(sp_rp); 6472 kmem_free(sp_rp, sizeof (*sp_rp)); 6473 D3("sp_destroy: exiting, sp_rp %p\n", sp_rp); 6474 return (0); 6475 } 6476 6477 /* 6478 * this function is called by daplka_hash_destroy for 6479 * freeing SP resource objects 6480 */ 6481 static void 6482 daplka_hash_sp_free(void *obj) 6483 { 6484 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj; 6485 daplka_sp_resource_t *g_sp_rp; 6486 int retval; 6487 6488 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP); 6489 6490 retval = daplka_hash_remove(&daplka_global_sp_htbl, 6491 sp_rp->sp_global_hkey, (void **)&g_sp_rp); 6492 if (retval != 0 || g_sp_rp == NULL) { 6493 DERR("sp_free: cannot find sp resource\n"); 6494 } 6495 if (g_sp_rp == sp_rp) { 6496 DAPLKA_RS_UNREF(g_sp_rp); 6497 } 6498 6499 DAPLKA_RS_UNREF(sp_rp); 6500 } 6501 6502 static void 6503 daplka_hash_sp_unref(void *obj) 6504 { 6505 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj; 6506 6507 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP); 6508 DAPLKA_RS_UNREF(sp_rp); 6509 } 6510 6511 /* 6512 * Passive side CM handlers 6513 */ 6514 6515 /* 6516 * processes the REQ_RCV event 6517 */ 6518 /* ARGSUSED */ 6519 static ibt_cm_status_t 6520 daplka_cm_service_req(daplka_sp_resource_t *spp, ibt_cm_event_t *event, 6521 ibt_cm_return_args_t *ret_args, void *pr_data, ibt_priv_data_len_t pr_len) 6522 { 6523 daplka_sp_conn_pend_t *conn = NULL; 6524 daplka_evd_event_t *cr_ev = NULL; 6525 ibt_cm_status_t cm_status = IBT_CM_DEFAULT; 6526 uint16_t bkl_index; 6527 ibt_status_t status; 6528 6529 /* 6530 * acquire a slot in the connection backlog of this service point 6531 */ 6532 mutex_enter(&spp->sp_lock); 6533 for (bkl_index = 0; bkl_index < spp->sp_backlog_size; bkl_index++) { 6534 if (spp->sp_backlog[bkl_index].spcp_state == DAPLKA_SPCP_INIT) { 6535 conn = &spp->sp_backlog[bkl_index]; 6536 ASSERT(conn->spcp_sid == NULL); 6537 conn->spcp_state = DAPLKA_SPCP_PENDING; 6538 conn->spcp_sid = event->cm_session_id; 6539 break; 6540 } 6541 } 6542 mutex_exit(&spp->sp_lock); 6543 6544 /* 6545 * too many pending connections 6546 */ 6547 if (bkl_index == spp->sp_backlog_size) { 6548 DERR("service_req: connection pending exceeded %d limit\n", 6549 spp->sp_backlog_size); 6550 return (IBT_CM_NO_RESOURCE); 6551 } 6552 6553 /* 6554 * save data for cr_handoff 6555 */ 6556 if (pr_data != NULL && pr_len > 0) { 6557 int trunc_len = pr_len; 6558 6559 if (trunc_len > DAPL_MAX_PRIVATE_DATA_SIZE) { 6560 DERR("service_req: private data truncated\n"); 6561 trunc_len = DAPL_MAX_PRIVATE_DATA_SIZE; 6562 } 6563 conn->spcp_req_len = trunc_len; 6564 bcopy(pr_data, conn->spcp_req_data, trunc_len); 6565 } else { 6566 conn->spcp_req_len = 0; 6567 } 6568 conn->spcp_rdma_ra_in = event->cm_event.req.req_rdma_ra_in; 6569 conn->spcp_rdma_ra_out = event->cm_event.req.req_rdma_ra_out; 6570 6571 /* 6572 * create a CR event 6573 */ 6574 cr_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 6575 if (cr_ev == NULL) { 6576 DERR("service_req: could not alloc cr_ev\n"); 6577 cm_status = IBT_CM_NO_RESOURCE; 6578 goto cleanup; 6579 } 6580 6581 cr_ev->ee_next = NULL; 6582 cr_ev->ee_cmev.ec_cm_cookie = spp->sp_cookie; 6583 cr_ev->ee_cmev.ec_cm_is_passive = B_TRUE; 6584 cr_ev->ee_cmev.ec_cm_psep_cookie = DAPLKA_CREATE_PSEP_COOKIE(bkl_index); 6585 /* 6586 * save the requestor gid 6587 * daplka_event_poll needs this if this is a third party REQ_RCV 6588 */ 6589 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix = 6590 event->cm_event.req.req_prim_addr.av_dgid.gid_prefix; 6591 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid = 6592 event->cm_event.req.req_prim_addr.av_dgid.gid_guid; 6593 6594 /* 6595 * set event type 6596 */ 6597 if (pr_len == 0) { 6598 cr_ev->ee_cmev.ec_cm_ev_type = 6599 DAPL_IB_CME_CONNECTION_REQUEST_PENDING; 6600 } else { 6601 cr_ev->ee_cmev.ec_cm_ev_priv_data = 6602 kmem_zalloc(pr_len, KM_NOSLEEP); 6603 if (cr_ev->ee_cmev.ec_cm_ev_priv_data == NULL) { 6604 DERR("service_req: could not alloc priv\n"); 6605 cm_status = IBT_CM_NO_RESOURCE; 6606 goto cleanup; 6607 } 6608 bcopy(pr_data, cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len); 6609 cr_ev->ee_cmev.ec_cm_ev_type = 6610 DAPL_IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA; 6611 } 6612 cr_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len; 6613 6614 /* 6615 * tell the active side to expect the processing time to be 6616 * at most equal to daplka_cm_delay 6617 */ 6618 status = ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id, 6619 daplka_cm_delay, NULL, 0); 6620 if (status != IBT_SUCCESS) { 6621 DERR("service_req: ibt_cm_delay failed %d\n", status); 6622 cm_status = IBT_CM_NO_RESOURCE; 6623 goto cleanup; 6624 } 6625 6626 /* 6627 * enqueue cr_ev onto the cr_events list of the EVD 6628 * corresponding to the SP 6629 */ 6630 D2("service_req: enqueue event(%p) evdp(%p) priv_data(%p) " 6631 "priv_len(%d) psep(0x%llx)\n", cr_ev, spp->sp_evd_res, 6632 cr_ev->ee_cmev.ec_cm_ev_priv_data, 6633 (int)cr_ev->ee_cmev.ec_cm_ev_priv_data_len, 6634 (longlong_t)cr_ev->ee_cmev.ec_cm_psep_cookie); 6635 6636 daplka_evd_wakeup(spp->sp_evd_res, 6637 &spp->sp_evd_res->evd_cr_events, cr_ev); 6638 6639 return (IBT_CM_DEFER); 6640 6641 cleanup:; 6642 /* 6643 * free the cr event 6644 */ 6645 if (cr_ev != NULL) { 6646 if (cr_ev->ee_cmev.ec_cm_ev_priv_data != NULL) { 6647 kmem_free(cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len); 6648 cr_ev->ee_cmev.ec_cm_ev_priv_data = NULL; 6649 cr_ev->ee_cmev.ec_cm_ev_priv_data_len = 0; 6650 } 6651 kmem_free(cr_ev, sizeof (daplka_evd_event_t)); 6652 } 6653 /* 6654 * release our slot in the backlog array 6655 */ 6656 if (conn != NULL) { 6657 mutex_enter(&spp->sp_lock); 6658 ASSERT(conn->spcp_state == DAPLKA_SPCP_PENDING); 6659 ASSERT(conn->spcp_sid == event->cm_session_id); 6660 conn->spcp_state = DAPLKA_SPCP_INIT; 6661 conn->spcp_req_len = 0; 6662 conn->spcp_sid = NULL; 6663 mutex_exit(&spp->sp_lock); 6664 } 6665 return (cm_status); 6666 } 6667 6668 /* 6669 * processes the CONN_CLOSED event 6670 */ 6671 /* ARGSUSED */ 6672 static ibt_cm_status_t 6673 daplka_cm_service_conn_closed(daplka_sp_resource_t *sp_rp, 6674 ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args, 6675 void *priv_data, ibt_priv_data_len_t len) 6676 { 6677 daplka_ep_resource_t *ep_rp; 6678 daplka_evd_event_t *disc_ev; 6679 uint32_t old_state, new_state; 6680 6681 ep_rp = (daplka_ep_resource_t *) 6682 ibt_get_chan_private(event->cm_channel); 6683 if (ep_rp == NULL) { 6684 DERR("service_conn_closed: ep_rp == NULL\n"); 6685 return (IBT_CM_ACCEPT); 6686 } 6687 6688 /* 6689 * verify that the ep_state is either CONNECTED or 6690 * DISCONNECTING. if it is not in either states return 6691 * without generating an event. 6692 */ 6693 new_state = old_state = daplka_ep_get_state(ep_rp); 6694 if (old_state != DAPLKA_EP_STATE_CONNECTED && 6695 old_state != DAPLKA_EP_STATE_DISCONNECTING) { 6696 /* 6697 * we can get here if the connection is being aborted 6698 */ 6699 D2("service_conn_closed: conn aborted, state = %d, " 6700 "closed = %d\n", old_state, (int)event->cm_event.closed); 6701 daplka_ep_set_state(ep_rp, old_state, new_state); 6702 return (IBT_CM_ACCEPT); 6703 } 6704 6705 /* 6706 * create a DAPL_IB_CME_DISCONNECTED event 6707 */ 6708 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 6709 if (disc_ev == NULL) { 6710 DERR("service_conn_closed: cannot alloc disc_ev\n"); 6711 daplka_ep_set_state(ep_rp, old_state, new_state); 6712 return (IBT_CM_ACCEPT); 6713 } 6714 6715 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED; 6716 disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie; 6717 disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE; 6718 disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie; 6719 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL; 6720 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0; 6721 6722 D2("service_conn_closed: enqueue event(%p) evdp(%p) psep(0x%llx)\n", 6723 disc_ev, sp_rp->sp_evd_res, (longlong_t)ep_rp->ep_psep_cookie); 6724 6725 /* 6726 * transition ep_state to DISCONNECTED 6727 */ 6728 new_state = DAPLKA_EP_STATE_DISCONNECTED; 6729 daplka_ep_set_state(ep_rp, old_state, new_state); 6730 6731 /* 6732 * enqueue event onto the conn_evd owned by ep_rp 6733 */ 6734 daplka_evd_wakeup(ep_rp->ep_conn_evd, 6735 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev); 6736 6737 return (IBT_CM_ACCEPT); 6738 } 6739 6740 /* 6741 * processes the CONN_EST event 6742 */ 6743 /* ARGSUSED */ 6744 static ibt_cm_status_t 6745 daplka_cm_service_conn_est(daplka_sp_resource_t *sp_rp, ibt_cm_event_t *event, 6746 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len) 6747 { 6748 daplka_ep_resource_t *ep_rp; 6749 daplka_evd_event_t *conn_ev; 6750 void *pr_data = event->cm_priv_data; 6751 ibt_priv_data_len_t pr_len = event->cm_priv_data_len; 6752 uint32_t old_state, new_state; 6753 6754 ep_rp = (daplka_ep_resource_t *) 6755 ibt_get_chan_private(event->cm_channel); 6756 if (ep_rp == NULL) { 6757 DERR("service_conn_est: ep_rp == NULL\n"); 6758 return (IBT_CM_ACCEPT); 6759 } 6760 6761 /* 6762 * verify that ep_state is ACCEPTING. if it is not in this 6763 * state, return without generating an event. 6764 */ 6765 new_state = old_state = daplka_ep_get_state(ep_rp); 6766 if (old_state != DAPLKA_EP_STATE_ACCEPTING) { 6767 /* 6768 * we can get here if the connection is being aborted 6769 */ 6770 DERR("service_conn_est: conn aborted, state = %d\n", 6771 old_state); 6772 daplka_ep_set_state(ep_rp, old_state, new_state); 6773 return (IBT_CM_ACCEPT); 6774 } 6775 6776 /* 6777 * create a DAPL_IB_CME_CONNECTED event 6778 */ 6779 conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 6780 if (conn_ev == NULL) { 6781 DERR("service_conn_est: conn_ev alloc failed\n"); 6782 daplka_ep_set_state(ep_rp, old_state, new_state); 6783 return (IBT_CM_ACCEPT); 6784 } 6785 6786 conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED; 6787 conn_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie; 6788 conn_ev->ee_cmev.ec_cm_is_passive = B_TRUE; 6789 conn_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie; 6790 6791 /* 6792 * copy private data into event 6793 */ 6794 if (pr_len > 0) { 6795 conn_ev->ee_cmev.ec_cm_ev_priv_data = 6796 kmem_zalloc(pr_len, KM_NOSLEEP); 6797 if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) { 6798 DERR("service_conn_est: pr_data alloc failed\n"); 6799 daplka_ep_set_state(ep_rp, old_state, new_state); 6800 kmem_free(conn_ev, sizeof (daplka_evd_event_t)); 6801 return (IBT_CM_ACCEPT); 6802 } 6803 bcopy(pr_data, conn_ev->ee_cmev.ec_cm_ev_priv_data, pr_len); 6804 } 6805 conn_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len; 6806 6807 D2("service_conn_est: enqueue event(%p) evdp(%p)\n", 6808 conn_ev, ep_rp->ep_conn_evd); 6809 6810 /* 6811 * transition ep_state to CONNECTED 6812 */ 6813 new_state = DAPLKA_EP_STATE_CONNECTED; 6814 daplka_ep_set_state(ep_rp, old_state, new_state); 6815 6816 /* 6817 * enqueue event onto the conn_evd owned by ep_rp 6818 */ 6819 daplka_evd_wakeup(ep_rp->ep_conn_evd, 6820 &ep_rp->ep_conn_evd->evd_conn_events, conn_ev); 6821 6822 return (IBT_CM_ACCEPT); 6823 } 6824 6825 /* 6826 * processes the FAILURE event 6827 */ 6828 /* ARGSUSED */ 6829 static ibt_cm_status_t 6830 daplka_cm_service_event_failure(daplka_sp_resource_t *sp_rp, 6831 ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args, void *priv_data, 6832 ibt_priv_data_len_t len) 6833 { 6834 daplka_evd_event_t *disc_ev; 6835 daplka_ep_resource_t *ep_rp; 6836 uint32_t old_state, new_state; 6837 ibt_rc_chan_query_attr_t chan_attrs; 6838 ibt_status_t status; 6839 6840 /* 6841 * check that we still have a valid cm_channel before continuing 6842 */ 6843 if (event->cm_channel == NULL) { 6844 DERR("serice_event_failure: event->cm_channel == NULL\n"); 6845 return (IBT_CM_ACCEPT); 6846 } 6847 ep_rp = (daplka_ep_resource_t *) 6848 ibt_get_chan_private(event->cm_channel); 6849 if (ep_rp == NULL) { 6850 DERR("service_event_failure: ep_rp == NULL\n"); 6851 return (IBT_CM_ACCEPT); 6852 } 6853 6854 /* 6855 * verify that ep_state is ACCEPTING or DISCONNECTING. if it 6856 * is not in either state, return without generating an event. 6857 */ 6858 new_state = old_state = daplka_ep_get_state(ep_rp); 6859 if (old_state != DAPLKA_EP_STATE_ACCEPTING && 6860 old_state != DAPLKA_EP_STATE_DISCONNECTING) { 6861 /* 6862 * we can get here if the connection is being aborted 6863 */ 6864 DERR("service_event_failure: conn aborted, state = %d, " 6865 "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state, 6866 (int)event->cm_event.failed.cf_code, 6867 (int)event->cm_event.failed.cf_msg, 6868 (int)event->cm_event.failed.cf_reason); 6869 6870 daplka_ep_set_state(ep_rp, old_state, new_state); 6871 return (IBT_CM_ACCEPT); 6872 } 6873 6874 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t)); 6875 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs); 6876 6877 if ((status == IBT_SUCCESS) && 6878 (chan_attrs.rc_state != IBT_STATE_ERROR)) { 6879 DERR("service_event_failure: conn abort qpn %d state %d\n", 6880 chan_attrs.rc_qpn, chan_attrs.rc_state); 6881 6882 /* explicit transition the QP to ERROR state */ 6883 status = ibt_flush_channel(ep_rp->ep_chan_hdl); 6884 } 6885 6886 /* 6887 * create an event 6888 */ 6889 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 6890 if (disc_ev == NULL) { 6891 DERR("service_event_failure: cannot alloc disc_ev\n"); 6892 daplka_ep_set_state(ep_rp, old_state, new_state); 6893 return (IBT_CM_ACCEPT); 6894 } 6895 6896 /* 6897 * fill in the appropriate event type 6898 */ 6899 if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) { 6900 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT; 6901 } else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) { 6902 switch (event->cm_event.failed.cf_reason) { 6903 case IBT_CM_INVALID_CID: 6904 disc_ev->ee_cmev.ec_cm_ev_type = 6905 DAPL_IB_CME_DESTINATION_REJECT; 6906 break; 6907 default: 6908 disc_ev->ee_cmev.ec_cm_ev_type = 6909 DAPL_IB_CME_LOCAL_FAILURE; 6910 break; 6911 } 6912 } else { 6913 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE; 6914 } 6915 disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie; 6916 disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE; 6917 disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie; 6918 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0; 6919 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL; 6920 6921 D2("service_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) " 6922 "cf_msg(%d) cf_reason(%d) psep(0x%llx)\n", disc_ev, 6923 ep_rp->ep_conn_evd, (int)event->cm_event.failed.cf_code, 6924 (int)event->cm_event.failed.cf_msg, 6925 (int)event->cm_event.failed.cf_reason, 6926 (longlong_t)ep_rp->ep_psep_cookie); 6927 6928 /* 6929 * transition ep_state to DISCONNECTED 6930 */ 6931 new_state = DAPLKA_EP_STATE_DISCONNECTED; 6932 daplka_ep_set_state(ep_rp, old_state, new_state); 6933 6934 /* 6935 * enqueue event onto the conn_evd owned by ep_rp 6936 */ 6937 daplka_evd_wakeup(ep_rp->ep_conn_evd, 6938 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev); 6939 6940 return (IBT_CM_ACCEPT); 6941 } 6942 6943 /* 6944 * this is the passive side CM handler. it gets registered 6945 * when an SP resource is created in daplka_service_register. 6946 */ 6947 static ibt_cm_status_t 6948 daplka_cm_service_handler(void *cm_private, ibt_cm_event_t *event, 6949 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len) 6950 { 6951 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)cm_private; 6952 6953 if (sp_rp == NULL) { 6954 DERR("service_handler: sp_rp == NULL\n"); 6955 return (IBT_CM_NO_RESOURCE); 6956 } 6957 /* 6958 * default is not to return priv data 6959 */ 6960 if (ret_args != NULL) { 6961 ret_args->cm_ret_len = 0; 6962 } 6963 6964 switch (event->cm_type) { 6965 case IBT_CM_EVENT_REQ_RCV: 6966 D2("service_handler: IBT_CM_EVENT_REQ_RCV\n"); 6967 return (daplka_cm_service_req(sp_rp, event, ret_args, 6968 event->cm_priv_data, event->cm_priv_data_len)); 6969 6970 case IBT_CM_EVENT_REP_RCV: 6971 /* passive side should not receive this event */ 6972 D2("service_handler: IBT_CM_EVENT_REP_RCV\n"); 6973 return (IBT_CM_DEFAULT); 6974 6975 case IBT_CM_EVENT_CONN_CLOSED: 6976 D2("service_handler: IBT_CM_EVENT_CONN_CLOSED %d\n", 6977 event->cm_event.closed); 6978 return (daplka_cm_service_conn_closed(sp_rp, event, ret_args, 6979 priv_data, len)); 6980 6981 case IBT_CM_EVENT_MRA_RCV: 6982 /* passive side does default processing MRA event */ 6983 D2("service_handler: IBT_CM_EVENT_MRA_RCV\n"); 6984 return (IBT_CM_DEFAULT); 6985 6986 case IBT_CM_EVENT_CONN_EST: 6987 D2("service_handler: IBT_CM_EVENT_CONN_EST\n"); 6988 return (daplka_cm_service_conn_est(sp_rp, event, ret_args, 6989 priv_data, len)); 6990 6991 case IBT_CM_EVENT_FAILURE: 6992 D2("service_handler: IBT_CM_EVENT_FAILURE\n"); 6993 return (daplka_cm_service_event_failure(sp_rp, event, ret_args, 6994 priv_data, len)); 6995 case IBT_CM_EVENT_LAP_RCV: 6996 /* active side had initiated a path migration operation */ 6997 D2("service_handler: IBT_CM_EVENT_LAP_RCV\n"); 6998 return (IBT_CM_ACCEPT); 6999 default: 7000 DERR("service_handler: invalid event %d\n", event->cm_type); 7001 break; 7002 } 7003 return (IBT_CM_DEFAULT); 7004 } 7005 7006 /* 7007 * Active side CM handlers 7008 */ 7009 7010 /* 7011 * Processes the REP_RCV event. When the passive side accepts the 7012 * connection, this handler is called. We make a copy of the private 7013 * data into the ep so that it can be passed back to userland in when 7014 * the CONN_EST event occurs. 7015 */ 7016 /* ARGSUSED */ 7017 static ibt_cm_status_t 7018 daplka_cm_rc_rep_rcv(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event, 7019 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len) 7020 { 7021 void *pr_data = event->cm_priv_data; 7022 ibt_priv_data_len_t pr_len = event->cm_priv_data_len; 7023 uint32_t old_state, new_state; 7024 7025 D2("rc_rep_rcv: pr_data(0x%p), pr_len(%d)\n", pr_data, 7026 (int)pr_len); 7027 7028 ASSERT(ep_rp != NULL); 7029 new_state = old_state = daplka_ep_get_state(ep_rp); 7030 if (old_state != DAPLKA_EP_STATE_CONNECTING) { 7031 /* 7032 * we can get here if the connection is being aborted 7033 */ 7034 DERR("rc_rep_rcv: conn aborted, state = %d\n", old_state); 7035 daplka_ep_set_state(ep_rp, old_state, new_state); 7036 return (IBT_CM_NO_CHANNEL); 7037 } 7038 7039 /* 7040 * we do not cancel the timer here because the connection 7041 * handshake is still in progress. 7042 */ 7043 7044 /* 7045 * save the private data. it will be passed up when 7046 * the connection is established. 7047 */ 7048 if (pr_len > 0) { 7049 ep_rp->ep_priv_len = pr_len; 7050 bcopy(pr_data, ep_rp->ep_priv_data, (size_t)pr_len); 7051 } 7052 7053 /* 7054 * we do not actually transition to a different state. 7055 * the state will change when we get a conn_est, failure, 7056 * closed, or timeout event. 7057 */ 7058 daplka_ep_set_state(ep_rp, old_state, new_state); 7059 return (IBT_CM_ACCEPT); 7060 } 7061 7062 /* 7063 * Processes the CONN_CLOSED event. This gets called when either 7064 * the active or passive side closes the rc channel. 7065 */ 7066 /* ARGSUSED */ 7067 static ibt_cm_status_t 7068 daplka_cm_rc_conn_closed(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event, 7069 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len) 7070 { 7071 daplka_evd_event_t *disc_ev; 7072 uint32_t old_state, new_state; 7073 7074 ASSERT(ep_rp != NULL); 7075 old_state = new_state = daplka_ep_get_state(ep_rp); 7076 if (old_state != DAPLKA_EP_STATE_CONNECTED && 7077 old_state != DAPLKA_EP_STATE_DISCONNECTING) { 7078 /* 7079 * we can get here if the connection is being aborted 7080 */ 7081 D2("rc_conn_closed: conn aborted, state = %d, " 7082 "closed = %d\n", old_state, (int)event->cm_event.closed); 7083 daplka_ep_set_state(ep_rp, old_state, new_state); 7084 return (IBT_CM_ACCEPT); 7085 } 7086 7087 /* 7088 * it's ok for the timer to fire at this point. the 7089 * taskq thread that processes the timer will just wait 7090 * until we are done with our state transition. 7091 */ 7092 if (daplka_cancel_timer(ep_rp) != 0) { 7093 /* 7094 * daplka_cancel_timer returns -1 if the timer is 7095 * being processed and 0 for all other cases. 7096 * we need to reset ep_state to allow timer processing 7097 * to continue. 7098 */ 7099 DERR("rc_conn_closed: timer is being processed\n"); 7100 daplka_ep_set_state(ep_rp, old_state, new_state); 7101 return (IBT_CM_ACCEPT); 7102 } 7103 7104 /* 7105 * create a DAPL_IB_CME_DISCONNECTED event 7106 */ 7107 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 7108 if (disc_ev == NULL) { 7109 DERR("rc_conn_closed: could not alloc ev\n"); 7110 daplka_ep_set_state(ep_rp, old_state, new_state); 7111 return (IBT_CM_ACCEPT); 7112 } 7113 7114 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED; 7115 disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie; 7116 disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE; 7117 disc_ev->ee_cmev.ec_cm_psep_cookie = 0; 7118 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL; 7119 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0; 7120 7121 D2("rc_conn_closed: enqueue event(%p) evdp(%p) closed(%d)\n", 7122 disc_ev, ep_rp->ep_conn_evd, (int)event->cm_event.closed); 7123 7124 /* 7125 * transition ep_state to DISCONNECTED 7126 */ 7127 new_state = DAPLKA_EP_STATE_DISCONNECTED; 7128 daplka_ep_set_state(ep_rp, old_state, new_state); 7129 7130 /* 7131 * enqueue event onto the conn_evd owned by ep_rp 7132 */ 7133 daplka_evd_wakeup(ep_rp->ep_conn_evd, 7134 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev); 7135 7136 return (IBT_CM_ACCEPT); 7137 } 7138 7139 /* 7140 * processes the CONN_EST event 7141 */ 7142 /* ARGSUSED */ 7143 static ibt_cm_status_t 7144 daplka_cm_rc_conn_est(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event, 7145 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len) 7146 { 7147 daplka_evd_event_t *conn_ev; 7148 uint32_t old_state, new_state; 7149 7150 ASSERT(ep_rp != NULL); 7151 old_state = new_state = daplka_ep_get_state(ep_rp); 7152 if (old_state != DAPLKA_EP_STATE_CONNECTING) { 7153 /* 7154 * we can get here if the connection is being aborted 7155 */ 7156 DERR("rc_conn_est: conn aborted, state = %d\n", old_state); 7157 daplka_ep_set_state(ep_rp, old_state, new_state); 7158 return (IBT_CM_ACCEPT); 7159 } 7160 7161 /* 7162 * it's ok for the timer to fire at this point. the 7163 * taskq thread that processes the timer will just wait 7164 * until we are done with our state transition. 7165 */ 7166 if (daplka_cancel_timer(ep_rp) != 0) { 7167 /* 7168 * daplka_cancel_timer returns -1 if the timer is 7169 * being processed and 0 for all other cases. 7170 * we need to reset ep_state to allow timer processing 7171 * to continue. 7172 */ 7173 DERR("rc_conn_est: timer is being processed\n"); 7174 daplka_ep_set_state(ep_rp, old_state, new_state); 7175 return (IBT_CM_ACCEPT); 7176 } 7177 7178 /* 7179 * create a DAPL_IB_CME_CONNECTED event 7180 */ 7181 conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 7182 if (conn_ev == NULL) { 7183 DERR("rc_conn_est: could not alloc ev\n"); 7184 daplka_ep_set_state(ep_rp, old_state, new_state); 7185 return (IBT_CM_ACCEPT); 7186 } 7187 7188 conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED; 7189 conn_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie; 7190 conn_ev->ee_cmev.ec_cm_is_passive = B_FALSE; 7191 conn_ev->ee_cmev.ec_cm_psep_cookie = 0; 7192 7193 /* 7194 * The private data passed back in the connection established 7195 * event is what was recvd in the daplka_cm_rc_rep_rcv handler and 7196 * saved in ep resource structure. 7197 */ 7198 if (ep_rp->ep_priv_len > 0) { 7199 conn_ev->ee_cmev.ec_cm_ev_priv_data = 7200 kmem_zalloc(ep_rp->ep_priv_len, KM_NOSLEEP); 7201 7202 if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) { 7203 DERR("rc_conn_est: could not alloc pr_data\n"); 7204 kmem_free(conn_ev, sizeof (daplka_evd_event_t)); 7205 daplka_ep_set_state(ep_rp, old_state, new_state); 7206 return (IBT_CM_ACCEPT); 7207 } 7208 bcopy(ep_rp->ep_priv_data, conn_ev->ee_cmev.ec_cm_ev_priv_data, 7209 ep_rp->ep_priv_len); 7210 } 7211 conn_ev->ee_cmev.ec_cm_ev_priv_data_len = ep_rp->ep_priv_len; 7212 7213 D2("rc_conn_est: enqueue event(%p) evdp(%p) pr_data(0x%p), " 7214 "pr_len(%d)\n", conn_ev, ep_rp->ep_conn_evd, 7215 conn_ev->ee_cmev.ec_cm_ev_priv_data, 7216 (int)conn_ev->ee_cmev.ec_cm_ev_priv_data_len); 7217 7218 /* 7219 * transition ep_state to CONNECTED 7220 */ 7221 new_state = DAPLKA_EP_STATE_CONNECTED; 7222 daplka_ep_set_state(ep_rp, old_state, new_state); 7223 7224 /* 7225 * enqueue event onto the conn_evd owned by ep_rp 7226 */ 7227 daplka_evd_wakeup(ep_rp->ep_conn_evd, 7228 &ep_rp->ep_conn_evd->evd_conn_events, conn_ev); 7229 7230 return (IBT_CM_ACCEPT); 7231 } 7232 7233 /* 7234 * processes the FAILURE event 7235 */ 7236 /* ARGSUSED */ 7237 static ibt_cm_status_t 7238 daplka_cm_rc_event_failure(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event, 7239 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len) 7240 { 7241 daplka_evd_event_t *disc_ev; 7242 ibt_priv_data_len_t pr_len = event->cm_priv_data_len; 7243 void *pr_data = event->cm_priv_data; 7244 uint32_t old_state, new_state; 7245 ibt_rc_chan_query_attr_t chan_attrs; 7246 ibt_status_t status; 7247 7248 ASSERT(ep_rp != NULL); 7249 old_state = new_state = daplka_ep_get_state(ep_rp); 7250 if (old_state != DAPLKA_EP_STATE_CONNECTING && 7251 old_state != DAPLKA_EP_STATE_DISCONNECTING) { 7252 /* 7253 * we can get here if the connection is being aborted 7254 */ 7255 DERR("rc_event_failure: conn aborted, state = %d, " 7256 "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state, 7257 (int)event->cm_event.failed.cf_code, 7258 (int)event->cm_event.failed.cf_msg, 7259 (int)event->cm_event.failed.cf_reason); 7260 7261 daplka_ep_set_state(ep_rp, old_state, new_state); 7262 return (IBT_CM_ACCEPT); 7263 } 7264 7265 /* 7266 * it's ok for the timer to fire at this point. the 7267 * taskq thread that processes the timer will just wait 7268 * until we are done with our state transition. 7269 */ 7270 if (daplka_cancel_timer(ep_rp) != 0) { 7271 /* 7272 * daplka_cancel_timer returns -1 if the timer is 7273 * being processed and 0 for all other cases. 7274 * we need to reset ep_state to allow timer processing 7275 * to continue. 7276 */ 7277 DERR("rc_event_failure: timer is being processed\n"); 7278 daplka_ep_set_state(ep_rp, old_state, new_state); 7279 return (IBT_CM_ACCEPT); 7280 } 7281 7282 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t)); 7283 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs); 7284 7285 if ((status == IBT_SUCCESS) && 7286 (chan_attrs.rc_state != IBT_STATE_ERROR)) { 7287 DERR("rc_event_failure: conn abort qpn %d state %d\n", 7288 chan_attrs.rc_qpn, chan_attrs.rc_state); 7289 7290 /* explicit transition the QP to ERROR state */ 7291 status = ibt_flush_channel(ep_rp->ep_chan_hdl); 7292 } 7293 7294 /* 7295 * create an event 7296 */ 7297 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 7298 if (disc_ev == NULL) { 7299 DERR("rc_event_failure: cannot alloc disc_ev\n"); 7300 daplka_ep_set_state(ep_rp, old_state, new_state); 7301 return (IBT_CM_ACCEPT); 7302 } 7303 7304 /* 7305 * copy private data into event 7306 */ 7307 if (pr_len > 0) { 7308 disc_ev->ee_cmev.ec_cm_ev_priv_data = 7309 kmem_zalloc(pr_len, KM_NOSLEEP); 7310 7311 if (disc_ev->ee_cmev.ec_cm_ev_priv_data == NULL) { 7312 DERR("rc_event_failure: cannot alloc pr data\n"); 7313 kmem_free(disc_ev, sizeof (daplka_evd_event_t)); 7314 daplka_ep_set_state(ep_rp, old_state, new_state); 7315 return (IBT_CM_ACCEPT); 7316 } 7317 bcopy(pr_data, disc_ev->ee_cmev.ec_cm_ev_priv_data, pr_len); 7318 } 7319 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len; 7320 7321 /* 7322 * fill in the appropriate event type 7323 */ 7324 if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) { 7325 switch (event->cm_event.failed.cf_reason) { 7326 case IBT_CM_CONSUMER: 7327 disc_ev->ee_cmev.ec_cm_ev_type = 7328 DAPL_IB_CME_DESTINATION_REJECT_PRIVATE_DATA; 7329 break; 7330 case IBT_CM_NO_CHAN: 7331 case IBT_CM_NO_RESC: 7332 disc_ev->ee_cmev.ec_cm_ev_type = 7333 DAPL_IB_CME_DESTINATION_REJECT; 7334 break; 7335 default: 7336 disc_ev->ee_cmev.ec_cm_ev_type = 7337 DAPL_IB_CME_DESTINATION_REJECT; 7338 break; 7339 } 7340 } else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) { 7341 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT; 7342 } else { 7343 /* others we'll mark as local failure */ 7344 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE; 7345 } 7346 disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie; 7347 disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE; 7348 disc_ev->ee_cmev.ec_cm_psep_cookie = 0; 7349 7350 D2("rc_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) " 7351 "cf_msg(%d) cf_reason(%d)\n", disc_ev, ep_rp->ep_conn_evd, 7352 (int)event->cm_event.failed.cf_code, 7353 (int)event->cm_event.failed.cf_msg, 7354 (int)event->cm_event.failed.cf_reason); 7355 7356 /* 7357 * transition ep_state to DISCONNECTED 7358 */ 7359 new_state = DAPLKA_EP_STATE_DISCONNECTED; 7360 daplka_ep_set_state(ep_rp, old_state, new_state); 7361 7362 /* 7363 * enqueue event onto the conn_evd owned by ep_rp 7364 */ 7365 daplka_evd_wakeup(ep_rp->ep_conn_evd, 7366 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev); 7367 7368 return (IBT_CM_ACCEPT); 7369 } 7370 7371 /* 7372 * This is the active side CM handler. It gets registered when 7373 * ibt_open_rc_channel is called. 7374 */ 7375 static ibt_cm_status_t 7376 daplka_cm_rc_handler(void *cm_private, ibt_cm_event_t *event, 7377 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len) 7378 { 7379 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)cm_private; 7380 7381 if (ep_rp == NULL) { 7382 DERR("rc_handler: ep_rp == NULL\n"); 7383 return (IBT_CM_NO_CHANNEL); 7384 } 7385 /* 7386 * default is not to return priv data 7387 */ 7388 if (ret_args != NULL) { 7389 ret_args->cm_ret_len = 0; 7390 } 7391 7392 switch (event->cm_type) { 7393 case IBT_CM_EVENT_REQ_RCV: 7394 /* active side should not receive this event */ 7395 D2("rc_handler: IBT_CM_EVENT_REQ_RCV\n"); 7396 break; 7397 7398 case IBT_CM_EVENT_REP_RCV: 7399 /* connection accepted by passive side */ 7400 D2("rc_handler: IBT_CM_EVENT_REP_RCV\n"); 7401 return (daplka_cm_rc_rep_rcv(ep_rp, event, ret_args, 7402 priv_data, len)); 7403 7404 case IBT_CM_EVENT_CONN_CLOSED: 7405 D2("rc_handler: IBT_CM_EVENT_CONN_CLOSED %d\n", 7406 event->cm_event.closed); 7407 return (daplka_cm_rc_conn_closed(ep_rp, event, ret_args, 7408 priv_data, len)); 7409 7410 case IBT_CM_EVENT_MRA_RCV: 7411 /* passive side does default processing MRA event */ 7412 D2("rc_handler: IBT_CM_EVENT_MRA_RCV\n"); 7413 return (IBT_CM_DEFAULT); 7414 7415 case IBT_CM_EVENT_CONN_EST: 7416 D2("rc_handler: IBT_CM_EVENT_CONN_EST\n"); 7417 return (daplka_cm_rc_conn_est(ep_rp, event, ret_args, 7418 priv_data, len)); 7419 7420 case IBT_CM_EVENT_FAILURE: 7421 D2("rc_handler: IBT_CM_EVENT_FAILURE\n"); 7422 return (daplka_cm_rc_event_failure(ep_rp, event, ret_args, 7423 priv_data, len)); 7424 7425 default: 7426 D2("rc_handler: invalid event %d\n", event->cm_type); 7427 break; 7428 } 7429 return (IBT_CM_DEFAULT); 7430 } 7431 7432 /* 7433 * creates an IA resource and inserts it into the global resource table. 7434 */ 7435 /* ARGSUSED */ 7436 static int 7437 daplka_ia_create(minor_t rnum, intptr_t arg, int mode, 7438 cred_t *cred, int *rvalp) 7439 { 7440 daplka_ia_resource_t *ia_rp, *tmp_rp; 7441 boolean_t inserted = B_FALSE; 7442 dapl_ia_create_t args; 7443 ibt_hca_hdl_t hca_hdl; 7444 ibt_status_t status; 7445 ib_gid_t sgid; 7446 int retval; 7447 ibt_hca_portinfo_t *pinfop; 7448 uint_t pinfon; 7449 uint_t size; 7450 ibt_ar_t ar_s; 7451 daplka_hca_t *hca; 7452 7453 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ia_create_t), 7454 mode); 7455 if (retval != 0) { 7456 DERR("ia_create: copyin error %d\n", retval); 7457 return (EFAULT); 7458 } 7459 if (args.ia_version != DAPL_IF_VERSION) { 7460 DERR("ia_create: invalid version %d, expected version %d\n", 7461 args.ia_version, DAPL_IF_VERSION); 7462 return (EINVAL); 7463 } 7464 7465 /* 7466 * find the hca with the matching guid 7467 */ 7468 mutex_enter(&daplka_dev->daplka_mutex); 7469 for (hca = daplka_dev->daplka_hca_list_head; hca != NULL; 7470 hca = hca->hca_next) { 7471 if (hca->hca_guid == args.ia_guid) { 7472 DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca); 7473 break; 7474 } 7475 } 7476 mutex_exit(&daplka_dev->daplka_mutex); 7477 7478 if (hca == NULL) { 7479 DERR("ia_create: guid 0x%016llx not found\n", 7480 (longlong_t)args.ia_guid); 7481 return (EINVAL); 7482 } 7483 7484 /* 7485 * check whether port number is valid and whether it is up 7486 */ 7487 if (args.ia_port > hca->hca_nports) { 7488 DERR("ia_create: invalid hca_port %d\n", args.ia_port); 7489 DAPLKA_RELE_HCA(daplka_dev, hca); 7490 return (EINVAL); 7491 } 7492 hca_hdl = hca->hca_hdl; 7493 if (hca_hdl == NULL) { 7494 DERR("ia_create: hca_hdl == NULL\n"); 7495 DAPLKA_RELE_HCA(daplka_dev, hca); 7496 return (EINVAL); 7497 } 7498 status = ibt_query_hca_ports(hca_hdl, (uint8_t)args.ia_port, 7499 &pinfop, &pinfon, &size); 7500 if (status != IBT_SUCCESS) { 7501 DERR("ia_create: ibt_query_hca_ports returned %d\n", status); 7502 *rvalp = (int)status; 7503 DAPLKA_RELE_HCA(daplka_dev, hca); 7504 return (0); 7505 } 7506 sgid = pinfop->p_sgid_tbl[0]; 7507 ibt_free_portinfo(pinfop, size); 7508 7509 ia_rp = kmem_zalloc(sizeof (daplka_ia_resource_t), daplka_km_flags); 7510 DAPLKA_RS_INIT(ia_rp, DAPL_TYPE_IA, rnum, daplka_ia_destroy); 7511 7512 mutex_init(&ia_rp->ia_lock, NULL, MUTEX_DRIVER, NULL); 7513 cv_init(&ia_rp->ia_cv, NULL, CV_DRIVER, NULL); 7514 ia_rp->ia_hca_hdl = hca_hdl; 7515 ia_rp->ia_hca_sgid = sgid; 7516 ia_rp->ia_hca = hca; 7517 ia_rp->ia_port_num = args.ia_port; 7518 ia_rp->ia_port_pkey = args.ia_pkey; 7519 ia_rp->ia_pid = ddi_get_pid(); 7520 ia_rp->ia_async_evd_hkeys = NULL; 7521 ia_rp->ia_ar_registered = B_FALSE; 7522 bcopy(args.ia_sadata, ia_rp->ia_sadata, DAPL_ATS_NBYTES); 7523 7524 /* register Address Record */ 7525 ar_s.ar_gid = ia_rp->ia_hca_sgid; 7526 ar_s.ar_pkey = ia_rp->ia_port_pkey; 7527 bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES); 7528 #define UC(b) ar_s.ar_data[(b)] 7529 D3("daplka_ia_create: SA[8] %d.%d.%d.%d\n", 7530 UC(8), UC(9), UC(10), UC(11)); 7531 D3("daplka_ia_create: SA[12] %d.%d.%d.%d\n", 7532 UC(12), UC(13), UC(14), UC(15)); 7533 retval = ibt_register_ar(daplka_dev->daplka_clnt_hdl, &ar_s); 7534 if (retval != IBT_SUCCESS) { 7535 DERR("ia_create: failed to register Address Record.\n"); 7536 retval = EINVAL; 7537 goto cleanup; 7538 } 7539 ia_rp->ia_ar_registered = B_TRUE; 7540 7541 /* 7542 * create hash tables for all object types 7543 */ 7544 retval = daplka_hash_create(&ia_rp->ia_ep_htbl, DAPLKA_EP_HTBL_SZ, 7545 daplka_hash_ep_free, daplka_hash_generic_lookup); 7546 if (retval != 0) { 7547 DERR("ia_create: cannot create ep hash table\n"); 7548 goto cleanup; 7549 } 7550 retval = daplka_hash_create(&ia_rp->ia_mr_htbl, DAPLKA_MR_HTBL_SZ, 7551 daplka_hash_mr_free, daplka_hash_generic_lookup); 7552 if (retval != 0) { 7553 DERR("ia_create: cannot create mr hash table\n"); 7554 goto cleanup; 7555 } 7556 retval = daplka_hash_create(&ia_rp->ia_mw_htbl, DAPLKA_MW_HTBL_SZ, 7557 daplka_hash_mw_free, daplka_hash_generic_lookup); 7558 if (retval != 0) { 7559 DERR("ia_create: cannot create mw hash table\n"); 7560 goto cleanup; 7561 } 7562 retval = daplka_hash_create(&ia_rp->ia_pd_htbl, DAPLKA_PD_HTBL_SZ, 7563 daplka_hash_pd_free, daplka_hash_generic_lookup); 7564 if (retval != 0) { 7565 DERR("ia_create: cannot create pd hash table\n"); 7566 goto cleanup; 7567 } 7568 retval = daplka_hash_create(&ia_rp->ia_evd_htbl, DAPLKA_EVD_HTBL_SZ, 7569 daplka_hash_evd_free, daplka_hash_generic_lookup); 7570 if (retval != 0) { 7571 DERR("ia_create: cannot create evd hash table\n"); 7572 goto cleanup; 7573 } 7574 retval = daplka_hash_create(&ia_rp->ia_cno_htbl, DAPLKA_CNO_HTBL_SZ, 7575 daplka_hash_cno_free, daplka_hash_generic_lookup); 7576 if (retval != 0) { 7577 DERR("ia_create: cannot create cno hash table\n"); 7578 goto cleanup; 7579 } 7580 retval = daplka_hash_create(&ia_rp->ia_sp_htbl, DAPLKA_SP_HTBL_SZ, 7581 daplka_hash_sp_free, daplka_hash_generic_lookup); 7582 if (retval != 0) { 7583 DERR("ia_create: cannot create sp hash table\n"); 7584 goto cleanup; 7585 } 7586 retval = daplka_hash_create(&ia_rp->ia_srq_htbl, DAPLKA_SRQ_HTBL_SZ, 7587 daplka_hash_srq_free, daplka_hash_generic_lookup); 7588 if (retval != 0) { 7589 DERR("ia_create: cannot create srq hash table\n"); 7590 goto cleanup; 7591 } 7592 /* 7593 * insert ia_rp into the global resource table 7594 */ 7595 retval = daplka_resource_insert(rnum, (daplka_resource_t *)ia_rp); 7596 if (retval != 0) { 7597 DERR("ia_create: cannot insert resource\n"); 7598 goto cleanup; 7599 } 7600 inserted = B_TRUE; 7601 7602 args.ia_resnum = rnum; 7603 retval = copyout(&args, (void *)arg, sizeof (dapl_ia_create_t)); 7604 if (retval != 0) { 7605 DERR("ia_create: copyout error %d\n", retval); 7606 retval = EFAULT; 7607 goto cleanup; 7608 } 7609 return (0); 7610 7611 cleanup:; 7612 if (inserted) { 7613 tmp_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum); 7614 if (tmp_rp != ia_rp) { 7615 /* 7616 * we can return here because another thread must 7617 * have freed up the resource 7618 */ 7619 DERR("ia_create: cannot remove resource\n"); 7620 return (retval); 7621 } 7622 } 7623 DAPLKA_RS_UNREF(ia_rp); 7624 return (retval); 7625 } 7626 7627 /* 7628 * destroys an IA resource 7629 */ 7630 static int 7631 daplka_ia_destroy(daplka_resource_t *gen_rp) 7632 { 7633 daplka_ia_resource_t *ia_rp = (daplka_ia_resource_t *)gen_rp; 7634 daplka_async_evd_hkey_t *hkp; 7635 int cnt; 7636 ibt_ar_t ar_s; 7637 7638 D3("ia_destroy: entering, ia_rp 0x%p\n", ia_rp); 7639 7640 /* deregister Address Record */ 7641 if (ia_rp->ia_ar_registered) { 7642 ar_s.ar_gid = ia_rp->ia_hca_sgid; 7643 ar_s.ar_pkey = ia_rp->ia_port_pkey; 7644 bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES); 7645 (void) ibt_deregister_ar(daplka_dev->daplka_clnt_hdl, &ar_s); 7646 ia_rp->ia_ar_registered = B_FALSE; 7647 } 7648 7649 /* 7650 * destroy hash tables. make sure resources are 7651 * destroyed in the correct order. 7652 */ 7653 daplka_hash_destroy(&ia_rp->ia_mw_htbl); 7654 daplka_hash_destroy(&ia_rp->ia_mr_htbl); 7655 daplka_hash_destroy(&ia_rp->ia_ep_htbl); 7656 daplka_hash_destroy(&ia_rp->ia_srq_htbl); 7657 daplka_hash_destroy(&ia_rp->ia_evd_htbl); 7658 daplka_hash_destroy(&ia_rp->ia_cno_htbl); 7659 daplka_hash_destroy(&ia_rp->ia_pd_htbl); 7660 daplka_hash_destroy(&ia_rp->ia_sp_htbl); 7661 7662 /* 7663 * free the async evd list 7664 */ 7665 cnt = 0; 7666 hkp = ia_rp->ia_async_evd_hkeys; 7667 while (hkp != NULL) { 7668 daplka_async_evd_hkey_t *free_hkp; 7669 7670 cnt++; 7671 free_hkp = hkp; 7672 hkp = hkp->aeh_next; 7673 kmem_free(free_hkp, sizeof (*free_hkp)); 7674 } 7675 if (cnt > 0) { 7676 D3("ia_destroy: freed %d hkeys\n", cnt); 7677 } 7678 mutex_destroy(&ia_rp->ia_lock); 7679 cv_destroy(&ia_rp->ia_cv); 7680 ia_rp->ia_hca_hdl = NULL; 7681 7682 DAPLKA_RS_FINI(ia_rp); 7683 7684 if (ia_rp->ia_hca) 7685 DAPLKA_RELE_HCA(daplka_dev, ia_rp->ia_hca); 7686 7687 kmem_free(ia_rp, sizeof (daplka_ia_resource_t)); 7688 D3("ia_destroy: exiting, ia_rp 0x%p\n", ia_rp); 7689 return (0); 7690 } 7691 7692 static void 7693 daplka_async_event_create(ibt_async_code_t code, ibt_async_event_t *event, 7694 uint64_t cookie, daplka_ia_resource_t *ia_rp) 7695 { 7696 daplka_evd_event_t *evp; 7697 daplka_evd_resource_t *async_evd; 7698 daplka_async_evd_hkey_t *curr; 7699 7700 mutex_enter(&ia_rp->ia_lock); 7701 curr = ia_rp->ia_async_evd_hkeys; 7702 while (curr != NULL) { 7703 /* 7704 * Note: this allocation does not zero out the buffer 7705 * since we init all the fields. 7706 */ 7707 evp = kmem_alloc(sizeof (daplka_evd_event_t), KM_NOSLEEP); 7708 if (evp == NULL) { 7709 DERR("async_event_enqueue: event alloc failed" 7710 "!found\n", ia_rp, curr->aeh_evd_hkey); 7711 curr = curr->aeh_next; 7712 continue; 7713 } 7714 evp->ee_next = NULL; 7715 evp->ee_aev.ibae_type = code; 7716 evp->ee_aev.ibae_hca_guid = event->ev_hca_guid; 7717 evp->ee_aev.ibae_cookie = cookie; 7718 evp->ee_aev.ibae_port = event->ev_port; 7719 7720 /* 7721 * Lookup the async evd corresponding to this ia and enqueue 7722 * evp and wakeup any waiter. 7723 */ 7724 async_evd = (daplka_evd_resource_t *) 7725 daplka_hash_lookup(&ia_rp->ia_evd_htbl, curr->aeh_evd_hkey); 7726 if (async_evd == NULL) { /* async evd is being freed */ 7727 DERR("async_event_enqueue: ia_rp(%p) asycn_evd %llx " 7728 "!found\n", ia_rp, (longlong_t)curr->aeh_evd_hkey); 7729 kmem_free(evp, sizeof (daplka_evd_event_t)); 7730 curr = curr->aeh_next; 7731 continue; 7732 } 7733 daplka_evd_wakeup(async_evd, &async_evd->evd_async_events, evp); 7734 7735 /* decrement refcnt on async_evd */ 7736 DAPLKA_RS_UNREF(async_evd); 7737 curr = curr->aeh_next; 7738 } 7739 mutex_exit(&ia_rp->ia_lock); 7740 } 7741 /* 7742 * This routine is called in kernel context 7743 */ 7744 7745 /* ARGSUSED */ 7746 static void 7747 daplka_rc_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl, 7748 ibt_async_code_t code, ibt_async_event_t *event) 7749 { 7750 daplka_ep_resource_t *epp; 7751 daplka_ia_resource_t *ia_rp; 7752 minor_t ia_rnum; 7753 7754 if (event->ev_chan_hdl == NULL) { 7755 DERR("daplka_rc_async_handler: ev_chan_hdl is NULL\n"); 7756 return; 7757 } 7758 7759 mutex_enter(&daplka_dev->daplka_mutex); 7760 epp = ibt_get_chan_private(event->ev_chan_hdl); 7761 if (epp == NULL) { 7762 mutex_exit(&daplka_dev->daplka_mutex); 7763 DERR("daplka_rc_async_handler: chan_private is NULL\n"); 7764 return; 7765 } 7766 7767 /* grab a reference to this ep */ 7768 DAPLKA_RS_REF(epp); 7769 mutex_exit(&daplka_dev->daplka_mutex); 7770 7771 /* 7772 * The endpoint resource has the resource number corresponding to 7773 * the IA resource. Use that to lookup the ia resource entry 7774 */ 7775 ia_rnum = DAPLKA_RS_RNUM(epp); 7776 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum); 7777 if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) { 7778 D2("daplka_rc_async_handler: resource (%d) not found\n", 7779 ia_rnum); 7780 DAPLKA_RS_UNREF(epp); 7781 return; 7782 } 7783 7784 /* 7785 * Create an async event and chain it to the async evd 7786 */ 7787 daplka_async_event_create(code, event, epp->ep_cookie, ia_rp); 7788 7789 DAPLKA_RS_UNREF(ia_rp); 7790 DAPLKA_RS_UNREF(epp); 7791 } 7792 7793 /* 7794 * This routine is called in kernel context 7795 */ 7796 7797 /* ARGSUSED */ 7798 static void 7799 daplka_cq_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl, 7800 ibt_async_code_t code, ibt_async_event_t *event) 7801 { 7802 daplka_evd_resource_t *evdp; 7803 daplka_ia_resource_t *ia_rp; 7804 minor_t ia_rnum; 7805 7806 if (event->ev_cq_hdl == NULL) 7807 return; 7808 7809 mutex_enter(&daplka_dev->daplka_mutex); 7810 evdp = ibt_get_cq_private(event->ev_cq_hdl); 7811 if (evdp == NULL) { 7812 mutex_exit(&daplka_dev->daplka_mutex); 7813 DERR("daplka_cq_async_handler: get cq private(%p) failed\n", 7814 event->ev_cq_hdl); 7815 return; 7816 } 7817 /* grab a reference to this evd resource */ 7818 DAPLKA_RS_REF(evdp); 7819 mutex_exit(&daplka_dev->daplka_mutex); 7820 7821 /* 7822 * The endpoint resource has the resource number corresponding to 7823 * the IA resource. Use that to lookup the ia resource entry 7824 */ 7825 ia_rnum = DAPLKA_RS_RNUM(evdp); 7826 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum); 7827 if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) { 7828 DERR("daplka_cq_async_handler: resource (%d) not found\n", 7829 ia_rnum); 7830 DAPLKA_RS_UNREF(evdp); 7831 return; 7832 } 7833 7834 /* 7835 * Create an async event and chain it to the async evd 7836 */ 7837 daplka_async_event_create(code, event, evdp->evd_cookie, ia_rp); 7838 7839 /* release all the refcount that were acquired */ 7840 DAPLKA_RS_UNREF(ia_rp); 7841 DAPLKA_RS_UNREF(evdp); 7842 } 7843 7844 /* 7845 * This routine is called in kernel context, handles unaffiliated async errors 7846 */ 7847 7848 /* ARGSUSED */ 7849 static void 7850 daplka_un_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl, 7851 ibt_async_code_t code, ibt_async_event_t *event) 7852 { 7853 int i, j; 7854 daplka_resource_blk_t *blk; 7855 daplka_resource_t *rp; 7856 daplka_ia_resource_t *ia_rp; 7857 7858 /* 7859 * Walk the resource table looking for an ia that matches the 7860 * hca_hdl. 7861 */ 7862 rw_enter(&daplka_resource.daplka_rct_lock, RW_READER); 7863 for (i = 0; i < daplka_resource.daplka_rc_len; i++) { 7864 blk = daplka_resource.daplka_rc_root[i]; 7865 if (blk == NULL) 7866 continue; 7867 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) { 7868 rp = blk->daplka_rcblk_blks[j]; 7869 if ((rp == NULL) || 7870 ((intptr_t)rp == DAPLKA_RC_RESERVED) || 7871 (rp->rs_type != DAPL_TYPE_IA)) { 7872 continue; 7873 } 7874 /* 7875 * rp is an IA resource check if it belongs 7876 * to the hca/port for which we got the event 7877 */ 7878 ia_rp = (daplka_ia_resource_t *)rp; 7879 DAPLKA_RS_REF(ia_rp); 7880 if ((hca_hdl == ia_rp->ia_hca_hdl) && 7881 (event->ev_port == ia_rp->ia_port_num)) { 7882 /* 7883 * walk the ep hash table. Acquire a 7884 * reader lock. NULL dgid indicates 7885 * local port up event. 7886 */ 7887 daplka_hash_walk(&ia_rp->ia_ep_htbl, 7888 daplka_ep_failback, NULL, RW_READER); 7889 } 7890 DAPLKA_RS_UNREF(ia_rp); 7891 } 7892 } 7893 rw_exit(&daplka_resource.daplka_rct_lock); 7894 } 7895 7896 static int 7897 daplka_handle_hca_detach_event(ibt_async_event_t *event) 7898 { 7899 daplka_hca_t *hca; 7900 7901 /* 7902 * find the hca with the matching guid 7903 */ 7904 mutex_enter(&daplka_dev->daplka_mutex); 7905 for (hca = daplka_dev->daplka_hca_list_head; hca != NULL; 7906 hca = hca->hca_next) { 7907 if (hca->hca_guid == event->ev_hca_guid) { 7908 if (DAPLKA_HCA_BUSY(hca)) { 7909 mutex_exit(&daplka_dev->daplka_mutex); 7910 return (IBT_HCA_RESOURCES_NOT_FREED); 7911 } 7912 daplka_dequeue_hca(daplka_dev, hca); 7913 break; 7914 } 7915 } 7916 mutex_exit(&daplka_dev->daplka_mutex); 7917 7918 if (hca == NULL) 7919 return (IBT_FAILURE); 7920 7921 return (daplka_fini_hca(daplka_dev, hca)); 7922 } 7923 7924 /* 7925 * This routine is called in kernel context 7926 */ 7927 static void 7928 daplka_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl, 7929 ibt_async_code_t code, ibt_async_event_t *event) 7930 { 7931 switch (code) { 7932 case IBT_ERROR_CATASTROPHIC_CHAN: 7933 case IBT_ERROR_INVALID_REQUEST_CHAN: 7934 case IBT_ERROR_ACCESS_VIOLATION_CHAN: 7935 case IBT_ERROR_PATH_MIGRATE_REQ: 7936 D2("daplka_async_handler(): Channel affiliated=0x%x\n", code); 7937 /* These events are affiliated with a the RC channel */ 7938 daplka_rc_async_handler(clnt_private, hca_hdl, code, event); 7939 break; 7940 case IBT_ERROR_CQ: 7941 /* This event is affiliated with a the CQ */ 7942 D2("daplka_async_handler(): IBT_ERROR_CQ\n"); 7943 daplka_cq_async_handler(clnt_private, hca_hdl, code, event); 7944 break; 7945 case IBT_ERROR_PORT_DOWN: 7946 D2("daplka_async_handler(): IBT_PORT_DOWN\n"); 7947 break; 7948 case IBT_EVENT_PORT_UP: 7949 D2("daplka_async_handler(): IBT_PORT_UP\n"); 7950 if (daplka_apm) { 7951 daplka_un_async_handler(clnt_private, hca_hdl, code, 7952 event); 7953 } 7954 break; 7955 case IBT_HCA_ATTACH_EVENT: 7956 /* 7957 * NOTE: In some error recovery paths, it is possible to 7958 * receive IBT_HCA_ATTACH_EVENTs on already known HCAs. 7959 */ 7960 D2("daplka_async_handler(): IBT_HCA_ATTACH\n"); 7961 (void) daplka_init_hca(daplka_dev, event->ev_hca_guid); 7962 break; 7963 case IBT_HCA_DETACH_EVENT: 7964 D2("daplka_async_handler(): IBT_HCA_DETACH\n"); 7965 /* Free all hca resources and close the HCA. */ 7966 (void) daplka_handle_hca_detach_event(event); 7967 break; 7968 case IBT_EVENT_PATH_MIGRATED: 7969 /* This event is affiliated with APM */ 7970 D2("daplka_async_handler(): IBT_PATH_MIGRATED.\n"); 7971 break; 7972 default: 7973 D2("daplka_async_handler(): unhandled code = 0x%x\n", code); 7974 break; 7975 } 7976 } 7977 7978 /* 7979 * This routine is called in kernel context related to Subnet events 7980 */ 7981 /*ARGSUSED*/ 7982 static void 7983 daplka_sm_notice_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t code, 7984 ibt_subnet_event_t *event) 7985 { 7986 ib_gid_t *sgid = &gid; 7987 ib_gid_t *dgid; 7988 7989 dgid = &event->sm_notice_gid; 7990 switch (code) { 7991 case IBT_SM_EVENT_GID_AVAIL: 7992 /* This event is affiliated with remote port up */ 7993 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_AVAIL\n"); 7994 if (daplka_apm) 7995 daplka_sm_gid_avail(sgid, dgid); 7996 return; 7997 case IBT_SM_EVENT_GID_UNAVAIL: 7998 /* This event is affiliated with remote port down */ 7999 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_UNAVAIL\n"); 8000 return; 8001 default: 8002 D2("daplka_sm_notice_handler(): unhandled IBT_SM_EVENT_[%d]\n", 8003 code); 8004 return; 8005 } 8006 } 8007 8008 /* 8009 * This routine is called in kernel context, handles Subnet GID avail events 8010 * which correspond to remote port up. Setting up alternate path or path 8011 * migration (failback) has to be initiated from the active side of the 8012 * original connect. 8013 */ 8014 static void 8015 daplka_sm_gid_avail(ib_gid_t *sgid, ib_gid_t *dgid) 8016 { 8017 int i, j; 8018 daplka_resource_blk_t *blk; 8019 daplka_resource_t *rp; 8020 daplka_ia_resource_t *ia_rp; 8021 8022 D2("daplka_sm_gid_avail: sgid=%llx:%llx dgid=%llx:%llx\n", 8023 (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid, 8024 (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid); 8025 8026 /* 8027 * Walk the resource table looking for an ia that matches the sgid 8028 */ 8029 rw_enter(&daplka_resource.daplka_rct_lock, RW_READER); 8030 for (i = 0; i < daplka_resource.daplka_rc_len; i++) { 8031 blk = daplka_resource.daplka_rc_root[i]; 8032 if (blk == NULL) 8033 continue; 8034 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) { 8035 rp = blk->daplka_rcblk_blks[j]; 8036 if ((rp == NULL) || 8037 ((intptr_t)rp == DAPLKA_RC_RESERVED) || 8038 (rp->rs_type != DAPL_TYPE_IA)) { 8039 continue; 8040 } 8041 /* 8042 * rp is an IA resource check if its gid 8043 * matches with the calling sgid 8044 */ 8045 ia_rp = (daplka_ia_resource_t *)rp; 8046 DAPLKA_RS_REF(ia_rp); 8047 if ((sgid->gid_prefix == 8048 ia_rp->ia_hca_sgid.gid_prefix) && 8049 (sgid->gid_guid == ia_rp->ia_hca_sgid.gid_guid)) { 8050 /* 8051 * walk the ep hash table. Acquire a 8052 * reader lock. 8053 */ 8054 daplka_hash_walk(&ia_rp->ia_ep_htbl, 8055 daplka_ep_failback, 8056 (void *)dgid, RW_READER); 8057 } 8058 DAPLKA_RS_UNREF(ia_rp); 8059 } 8060 } 8061 rw_exit(&daplka_resource.daplka_rct_lock); 8062 } 8063 8064 /* 8065 * This routine is called in kernel context to get and set an alternate path 8066 */ 8067 static int 8068 daplka_ep_altpath(daplka_ep_resource_t *ep_rp, ib_gid_t *dgid) 8069 { 8070 ibt_alt_path_info_t path_info; 8071 ibt_alt_path_attr_t path_attr; 8072 ibt_ap_returns_t ap_rets; 8073 ibt_status_t status; 8074 8075 D2("daplka_ep_altpath : ibt_get_alt_path()\n"); 8076 bzero(&path_info, sizeof (ibt_alt_path_info_t)); 8077 bzero(&path_attr, sizeof (ibt_alt_path_attr_t)); 8078 if (dgid != NULL) { 8079 path_attr.apa_sgid = ep_rp->ep_sgid; 8080 path_attr.apa_dgid = *dgid; 8081 } 8082 status = ibt_get_alt_path(ep_rp->ep_chan_hdl, IBT_PATH_AVAIL, 8083 &path_attr, &path_info); 8084 if (status != IBT_SUCCESS) { 8085 DERR("daplka_ep_altpath : ibt_get_alt_path failed %d\n", 8086 status); 8087 return (1); 8088 } 8089 8090 D2("daplka_ep_altpath : ibt_set_alt_path()\n"); 8091 bzero(&ap_rets, sizeof (ibt_ap_returns_t)); 8092 status = ibt_set_alt_path(ep_rp->ep_chan_hdl, IBT_BLOCKING, 8093 &path_info, NULL, 0, &ap_rets); 8094 if ((status != IBT_SUCCESS) || 8095 (ap_rets.ap_status != IBT_CM_AP_LOADED)) { 8096 DERR("daplka_ep_altpath : ibt_set_alt_path failed " 8097 "status %d ap_status %d\n", status, ap_rets.ap_status); 8098 return (1); 8099 } 8100 return (0); 8101 } 8102 8103 /* 8104 * This routine is called in kernel context to failback to the original path 8105 */ 8106 static int 8107 daplka_ep_failback(void *objp, void *arg) 8108 { 8109 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)objp; 8110 ib_gid_t *dgid; 8111 ibt_status_t status; 8112 ibt_rc_chan_query_attr_t chan_attrs; 8113 int i; 8114 8115 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP); 8116 D2("daplka_ep_failback ep : sgid=%llx:%llx dgid=%llx:%llx\n", 8117 (longlong_t)ep_rp->ep_sgid.gid_prefix, 8118 (longlong_t)ep_rp->ep_sgid.gid_guid, 8119 (longlong_t)ep_rp->ep_dgid.gid_prefix, 8120 (longlong_t)ep_rp->ep_dgid.gid_guid); 8121 8122 /* 8123 * daplka_ep_failback is called from daplka_hash_walk 8124 * which holds the read lock on hash table to protect 8125 * the endpoint resource from removal 8126 */ 8127 mutex_enter(&ep_rp->ep_lock); 8128 /* check for unconnected endpoints */ 8129 /* first check for ep state */ 8130 if (ep_rp->ep_state != DAPLKA_EP_STATE_CONNECTED) { 8131 mutex_exit(&ep_rp->ep_lock); 8132 D2("daplka_ep_failback : endpoints not connected\n"); 8133 return (0); 8134 } 8135 8136 /* second check for gids */ 8137 if (((ep_rp->ep_sgid.gid_prefix == 0) && 8138 (ep_rp->ep_sgid.gid_guid == 0)) || 8139 ((ep_rp->ep_dgid.gid_prefix == 0) && 8140 (ep_rp->ep_dgid.gid_guid == 0))) { 8141 mutex_exit(&ep_rp->ep_lock); 8142 D2("daplka_ep_failback : skip unconnected endpoints\n"); 8143 return (0); 8144 } 8145 8146 /* 8147 * matching destination ep 8148 * when dgid is NULL, the async event is a local port up. 8149 * dgid becomes wild card, i.e. all endpoints match 8150 */ 8151 dgid = (ib_gid_t *)arg; 8152 if (dgid == NULL) { 8153 /* ignore loopback ep */ 8154 if ((ep_rp->ep_sgid.gid_prefix == ep_rp->ep_dgid.gid_prefix) && 8155 (ep_rp->ep_sgid.gid_guid == ep_rp->ep_dgid.gid_guid)) { 8156 mutex_exit(&ep_rp->ep_lock); 8157 D2("daplka_ep_failback : skip loopback endpoints\n"); 8158 return (0); 8159 } 8160 } else { 8161 /* matching remote ep */ 8162 if ((ep_rp->ep_dgid.gid_prefix != dgid->gid_prefix) || 8163 (ep_rp->ep_dgid.gid_guid != dgid->gid_guid)) { 8164 mutex_exit(&ep_rp->ep_lock); 8165 D2("daplka_ep_failback : unrelated endpoints\n"); 8166 return (0); 8167 } 8168 } 8169 8170 /* call get and set altpath with original dgid used in ep_connect */ 8171 if (daplka_ep_altpath(ep_rp, &ep_rp->ep_dgid)) { 8172 mutex_exit(&ep_rp->ep_lock); 8173 return (0); 8174 } 8175 8176 /* 8177 * wait for migration state to be ARMed 8178 * e.g. a post_send msg will transit mig_state from REARM to ARM 8179 */ 8180 for (i = 0; i < daplka_query_aft_setaltpath; i++) { 8181 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t)); 8182 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs); 8183 if (status != IBT_SUCCESS) { 8184 mutex_exit(&ep_rp->ep_lock); 8185 DERR("daplka_ep_altpath : ibt_query_rc_channel err\n"); 8186 return (0); 8187 } 8188 if (chan_attrs.rc_mig_state == IBT_STATE_ARMED) 8189 break; 8190 } 8191 8192 D2("daplka_ep_altpath : query[%d] mig_st=%d\n", 8193 i, chan_attrs.rc_mig_state); 8194 D2("daplka_ep_altpath : P sgid=%llx:%llx dgid=%llx:%llx\n", 8195 (longlong_t) 8196 chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_prefix, 8197 (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_guid, 8198 (longlong_t) 8199 chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_prefix, 8200 (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_guid); 8201 D2("daplka_ep_altpath : A sgid=%llx:%llx dgid=%llx:%llx\n", 8202 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_prefix, 8203 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_guid, 8204 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_prefix, 8205 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_guid); 8206 8207 /* skip failback on ARMed state not reached or env override */ 8208 if ((i >= daplka_query_aft_setaltpath) || (daplka_failback == 0)) { 8209 mutex_exit(&ep_rp->ep_lock); 8210 DERR("daplka_ep_altpath : ARMed state not reached\n"); 8211 return (0); 8212 } 8213 8214 D2("daplka_ep_failback : ibt_migrate_path() to original ep\n"); 8215 status = ibt_migrate_path(ep_rp->ep_chan_hdl); 8216 if (status != IBT_SUCCESS) { 8217 mutex_exit(&ep_rp->ep_lock); 8218 DERR("daplka_ep_failback : migration failed " 8219 "status %d\n", status); 8220 return (0); 8221 } 8222 8223 /* call get and altpath with NULL dgid to indicate unspecified dgid */ 8224 (void) daplka_ep_altpath(ep_rp, NULL); 8225 mutex_exit(&ep_rp->ep_lock); 8226 return (0); 8227 } 8228 8229 /* 8230 * IBTF wrappers used for resource accounting 8231 */ 8232 static ibt_status_t 8233 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *ep_rp, ibt_hca_hdl_t hca_hdl, 8234 ibt_chan_alloc_flags_t flags, ibt_rc_chan_alloc_args_t *args, 8235 ibt_channel_hdl_t *chan_hdl_p, ibt_chan_sizes_t *sizes) 8236 { 8237 daplka_hca_t *hca_p; 8238 uint32_t max_qps; 8239 boolean_t acct_enabled; 8240 ibt_status_t status; 8241 8242 acct_enabled = daplka_accounting_enabled; 8243 hca_p = ep_rp->ep_hca; 8244 max_qps = daplka_max_qp_percent * hca_p->hca_attr.hca_max_chans / 100; 8245 8246 if (acct_enabled) { 8247 if (daplka_max_qp_percent != 0 && 8248 max_qps <= hca_p->hca_qp_count) { 8249 DERR("ibt_alloc_rc_channel: resource limit exceeded " 8250 "(limit %d, count %d)\n", max_qps, 8251 hca_p->hca_qp_count); 8252 return (IBT_INSUFF_RESOURCE); 8253 } 8254 DAPLKA_RS_ACCT_INC(ep_rp, 1); 8255 atomic_inc_32(&hca_p->hca_qp_count); 8256 } 8257 status = ibt_alloc_rc_channel(hca_hdl, flags, args, chan_hdl_p, sizes); 8258 8259 if (status != IBT_SUCCESS && acct_enabled) { 8260 DAPLKA_RS_ACCT_DEC(ep_rp, 1); 8261 atomic_dec_32(&hca_p->hca_qp_count); 8262 } 8263 return (status); 8264 } 8265 8266 static ibt_status_t 8267 daplka_ibt_free_channel(daplka_ep_resource_t *ep_rp, ibt_channel_hdl_t chan_hdl) 8268 { 8269 daplka_hca_t *hca_p; 8270 ibt_status_t status; 8271 8272 hca_p = ep_rp->ep_hca; 8273 8274 status = ibt_free_channel(chan_hdl); 8275 if (status != IBT_SUCCESS) { 8276 return (status); 8277 } 8278 if (DAPLKA_RS_ACCT_CHARGED(ep_rp) > 0) { 8279 DAPLKA_RS_ACCT_DEC(ep_rp, 1); 8280 atomic_dec_32(&hca_p->hca_qp_count); 8281 } 8282 return (status); 8283 } 8284 8285 static ibt_status_t 8286 daplka_ibt_alloc_cq(daplka_evd_resource_t *evd_rp, ibt_hca_hdl_t hca_hdl, 8287 ibt_cq_attr_t *cq_attr, ibt_cq_hdl_t *ibt_cq_p, uint32_t *real_size) 8288 { 8289 daplka_hca_t *hca_p; 8290 uint32_t max_cqs; 8291 boolean_t acct_enabled; 8292 ibt_status_t status; 8293 8294 acct_enabled = daplka_accounting_enabled; 8295 hca_p = evd_rp->evd_hca; 8296 max_cqs = daplka_max_cq_percent * hca_p->hca_attr.hca_max_cq / 100; 8297 8298 if (acct_enabled) { 8299 if (daplka_max_cq_percent != 0 && 8300 max_cqs <= hca_p->hca_cq_count) { 8301 DERR("ibt_alloc_cq: resource limit exceeded " 8302 "(limit %d, count %d)\n", max_cqs, 8303 hca_p->hca_cq_count); 8304 return (IBT_INSUFF_RESOURCE); 8305 } 8306 DAPLKA_RS_ACCT_INC(evd_rp, 1); 8307 atomic_inc_32(&hca_p->hca_cq_count); 8308 } 8309 status = ibt_alloc_cq(hca_hdl, cq_attr, ibt_cq_p, real_size); 8310 8311 if (status != IBT_SUCCESS && acct_enabled) { 8312 DAPLKA_RS_ACCT_DEC(evd_rp, 1); 8313 atomic_dec_32(&hca_p->hca_cq_count); 8314 } 8315 return (status); 8316 } 8317 8318 static ibt_status_t 8319 daplka_ibt_free_cq(daplka_evd_resource_t *evd_rp, ibt_cq_hdl_t cq_hdl) 8320 { 8321 daplka_hca_t *hca_p; 8322 ibt_status_t status; 8323 8324 hca_p = evd_rp->evd_hca; 8325 8326 status = ibt_free_cq(cq_hdl); 8327 if (status != IBT_SUCCESS) { 8328 return (status); 8329 } 8330 if (DAPLKA_RS_ACCT_CHARGED(evd_rp) > 0) { 8331 DAPLKA_RS_ACCT_DEC(evd_rp, 1); 8332 atomic_dec_32(&hca_p->hca_cq_count); 8333 } 8334 return (status); 8335 } 8336 8337 static ibt_status_t 8338 daplka_ibt_alloc_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl, 8339 ibt_pd_flags_t flags, ibt_pd_hdl_t *pd_hdl_p) 8340 { 8341 daplka_hca_t *hca_p; 8342 uint32_t max_pds; 8343 boolean_t acct_enabled; 8344 ibt_status_t status; 8345 8346 acct_enabled = daplka_accounting_enabled; 8347 hca_p = pd_rp->pd_hca; 8348 max_pds = daplka_max_pd_percent * hca_p->hca_attr.hca_max_pd / 100; 8349 8350 if (acct_enabled) { 8351 if (daplka_max_pd_percent != 0 && 8352 max_pds <= hca_p->hca_pd_count) { 8353 DERR("ibt_alloc_pd: resource limit exceeded " 8354 "(limit %d, count %d)\n", max_pds, 8355 hca_p->hca_pd_count); 8356 return (IBT_INSUFF_RESOURCE); 8357 } 8358 DAPLKA_RS_ACCT_INC(pd_rp, 1); 8359 atomic_inc_32(&hca_p->hca_pd_count); 8360 } 8361 status = ibt_alloc_pd(hca_hdl, flags, pd_hdl_p); 8362 8363 if (status != IBT_SUCCESS && acct_enabled) { 8364 DAPLKA_RS_ACCT_DEC(pd_rp, 1); 8365 atomic_dec_32(&hca_p->hca_pd_count); 8366 } 8367 return (status); 8368 } 8369 8370 static ibt_status_t 8371 daplka_ibt_free_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl, 8372 ibt_pd_hdl_t pd_hdl) 8373 { 8374 daplka_hca_t *hca_p; 8375 ibt_status_t status; 8376 8377 hca_p = pd_rp->pd_hca; 8378 8379 status = ibt_free_pd(hca_hdl, pd_hdl); 8380 if (status != IBT_SUCCESS) { 8381 return (status); 8382 } 8383 if (DAPLKA_RS_ACCT_CHARGED(pd_rp) > 0) { 8384 DAPLKA_RS_ACCT_DEC(pd_rp, 1); 8385 atomic_dec_32(&hca_p->hca_pd_count); 8386 } 8387 return (status); 8388 } 8389 8390 static ibt_status_t 8391 daplka_ibt_alloc_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl, 8392 ibt_pd_hdl_t pd_hdl, ibt_mw_flags_t flags, ibt_mw_hdl_t *mw_hdl_p, 8393 ibt_rkey_t *rkey_p) 8394 { 8395 daplka_hca_t *hca_p; 8396 uint32_t max_mws; 8397 boolean_t acct_enabled; 8398 ibt_status_t status; 8399 8400 acct_enabled = daplka_accounting_enabled; 8401 hca_p = mw_rp->mw_hca; 8402 max_mws = daplka_max_mw_percent * hca_p->hca_attr.hca_max_mem_win / 100; 8403 8404 if (acct_enabled) { 8405 if (daplka_max_mw_percent != 0 && 8406 max_mws <= hca_p->hca_mw_count) { 8407 DERR("ibt_alloc_mw: resource limit exceeded " 8408 "(limit %d, count %d)\n", max_mws, 8409 hca_p->hca_mw_count); 8410 return (IBT_INSUFF_RESOURCE); 8411 } 8412 DAPLKA_RS_ACCT_INC(mw_rp, 1); 8413 atomic_inc_32(&hca_p->hca_mw_count); 8414 } 8415 status = ibt_alloc_mw(hca_hdl, pd_hdl, flags, mw_hdl_p, rkey_p); 8416 8417 if (status != IBT_SUCCESS && acct_enabled) { 8418 DAPLKA_RS_ACCT_DEC(mw_rp, 1); 8419 atomic_dec_32(&hca_p->hca_mw_count); 8420 } 8421 return (status); 8422 } 8423 8424 static ibt_status_t 8425 daplka_ibt_free_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl, 8426 ibt_mw_hdl_t mw_hdl) 8427 { 8428 daplka_hca_t *hca_p; 8429 ibt_status_t status; 8430 8431 hca_p = mw_rp->mw_hca; 8432 8433 status = ibt_free_mw(hca_hdl, mw_hdl); 8434 if (status != IBT_SUCCESS) { 8435 return (status); 8436 } 8437 if (DAPLKA_RS_ACCT_CHARGED(mw_rp) > 0) { 8438 DAPLKA_RS_ACCT_DEC(mw_rp, 1); 8439 atomic_dec_32(&hca_p->hca_mw_count); 8440 } 8441 return (status); 8442 } 8443 8444 static ibt_status_t 8445 daplka_ibt_register_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl, 8446 ibt_pd_hdl_t pd_hdl, ibt_mr_attr_t *mr_attr, ibt_mr_hdl_t *mr_hdl_p, 8447 ibt_mr_desc_t *mr_desc_p) 8448 { 8449 daplka_hca_t *hca_p; 8450 uint32_t max_mrs; 8451 boolean_t acct_enabled; 8452 ibt_status_t status; 8453 8454 acct_enabled = daplka_accounting_enabled; 8455 hca_p = mr_rp->mr_hca; 8456 max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100; 8457 8458 if (acct_enabled) { 8459 if (daplka_max_mr_percent != 0 && 8460 max_mrs <= hca_p->hca_mr_count) { 8461 DERR("ibt_register_mr: resource limit exceeded " 8462 "(limit %d, count %d)\n", max_mrs, 8463 hca_p->hca_mr_count); 8464 return (IBT_INSUFF_RESOURCE); 8465 } 8466 DAPLKA_RS_ACCT_INC(mr_rp, 1); 8467 atomic_inc_32(&hca_p->hca_mr_count); 8468 } 8469 status = ibt_register_mr(hca_hdl, pd_hdl, mr_attr, mr_hdl_p, mr_desc_p); 8470 8471 if (status != IBT_SUCCESS && acct_enabled) { 8472 DAPLKA_RS_ACCT_DEC(mr_rp, 1); 8473 atomic_dec_32(&hca_p->hca_mr_count); 8474 } 8475 return (status); 8476 } 8477 8478 static ibt_status_t 8479 daplka_ibt_register_shared_mr(daplka_mr_resource_t *mr_rp, 8480 ibt_hca_hdl_t hca_hdl, ibt_mr_hdl_t mr_hdl, ibt_pd_hdl_t pd_hdl, 8481 ibt_smr_attr_t *smr_attr_p, ibt_mr_hdl_t *mr_hdl_p, 8482 ibt_mr_desc_t *mr_desc_p) 8483 { 8484 daplka_hca_t *hca_p; 8485 uint32_t max_mrs; 8486 boolean_t acct_enabled; 8487 ibt_status_t status; 8488 8489 acct_enabled = daplka_accounting_enabled; 8490 hca_p = mr_rp->mr_hca; 8491 max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100; 8492 8493 if (acct_enabled) { 8494 if (daplka_max_mr_percent != 0 && 8495 max_mrs <= hca_p->hca_mr_count) { 8496 DERR("ibt_register_shared_mr: resource limit exceeded " 8497 "(limit %d, count %d)\n", max_mrs, 8498 hca_p->hca_mr_count); 8499 return (IBT_INSUFF_RESOURCE); 8500 } 8501 DAPLKA_RS_ACCT_INC(mr_rp, 1); 8502 atomic_inc_32(&hca_p->hca_mr_count); 8503 } 8504 status = ibt_register_shared_mr(hca_hdl, mr_hdl, pd_hdl, 8505 smr_attr_p, mr_hdl_p, mr_desc_p); 8506 8507 if (status != IBT_SUCCESS && acct_enabled) { 8508 DAPLKA_RS_ACCT_DEC(mr_rp, 1); 8509 atomic_dec_32(&hca_p->hca_mr_count); 8510 } 8511 return (status); 8512 } 8513 8514 static ibt_status_t 8515 daplka_ibt_deregister_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl, 8516 ibt_mr_hdl_t mr_hdl) 8517 { 8518 daplka_hca_t *hca_p; 8519 ibt_status_t status; 8520 8521 hca_p = mr_rp->mr_hca; 8522 8523 status = ibt_deregister_mr(hca_hdl, mr_hdl); 8524 if (status != IBT_SUCCESS) { 8525 return (status); 8526 } 8527 if (DAPLKA_RS_ACCT_CHARGED(mr_rp) > 0) { 8528 DAPLKA_RS_ACCT_DEC(mr_rp, 1); 8529 atomic_dec_32(&hca_p->hca_mr_count); 8530 } 8531 return (status); 8532 } 8533 8534 static ibt_status_t 8535 daplka_ibt_alloc_srq(daplka_srq_resource_t *srq_rp, ibt_hca_hdl_t hca_hdl, 8536 ibt_srq_flags_t flags, ibt_pd_hdl_t pd, ibt_srq_sizes_t *reqsz, 8537 ibt_srq_hdl_t *srq_hdl_p, ibt_srq_sizes_t *realsz) 8538 { 8539 daplka_hca_t *hca_p; 8540 uint32_t max_srqs; 8541 boolean_t acct_enabled; 8542 ibt_status_t status; 8543 8544 acct_enabled = daplka_accounting_enabled; 8545 hca_p = srq_rp->srq_hca; 8546 max_srqs = daplka_max_srq_percent * hca_p->hca_attr.hca_max_srqs / 100; 8547 8548 if (acct_enabled) { 8549 if (daplka_max_srq_percent != 0 && 8550 max_srqs <= hca_p->hca_srq_count) { 8551 DERR("ibt_alloc_srq: resource limit exceeded " 8552 "(limit %d, count %d)\n", max_srqs, 8553 hca_p->hca_srq_count); 8554 return (IBT_INSUFF_RESOURCE); 8555 } 8556 DAPLKA_RS_ACCT_INC(srq_rp, 1); 8557 atomic_inc_32(&hca_p->hca_srq_count); 8558 } 8559 status = ibt_alloc_srq(hca_hdl, flags, pd, reqsz, srq_hdl_p, realsz); 8560 8561 if (status != IBT_SUCCESS && acct_enabled) { 8562 DAPLKA_RS_ACCT_DEC(srq_rp, 1); 8563 atomic_dec_32(&hca_p->hca_srq_count); 8564 } 8565 return (status); 8566 } 8567 8568 static ibt_status_t 8569 daplka_ibt_free_srq(daplka_srq_resource_t *srq_rp, ibt_srq_hdl_t srq_hdl) 8570 { 8571 daplka_hca_t *hca_p; 8572 ibt_status_t status; 8573 8574 hca_p = srq_rp->srq_hca; 8575 8576 D3("ibt_free_srq: %p %p\n", srq_rp, srq_hdl); 8577 8578 status = ibt_free_srq(srq_hdl); 8579 if (status != IBT_SUCCESS) { 8580 return (status); 8581 } 8582 if (DAPLKA_RS_ACCT_CHARGED(srq_rp) > 0) { 8583 DAPLKA_RS_ACCT_DEC(srq_rp, 1); 8584 atomic_dec_32(&hca_p->hca_srq_count); 8585 } 8586 return (status); 8587 } 8588 8589 8590 static int 8591 daplka_common_ioctl(int cmd, minor_t rnum, intptr_t arg, int mode, 8592 cred_t *cred, int *rvalp) 8593 { 8594 int error; 8595 8596 switch (cmd) { 8597 case DAPL_IA_CREATE: 8598 error = daplka_ia_create(rnum, arg, mode, cred, rvalp); 8599 break; 8600 8601 /* can potentially add other commands here */ 8602 8603 default: 8604 DERR("daplka_common_ioctl: cmd not supported\n"); 8605 error = DDI_FAILURE; 8606 } 8607 return (error); 8608 } 8609 8610 static int 8611 daplka_evd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8612 cred_t *cred, int *rvalp) 8613 { 8614 int error; 8615 8616 switch (cmd) { 8617 case DAPL_EVD_CREATE: 8618 error = daplka_evd_create(rp, arg, mode, cred, rvalp); 8619 break; 8620 8621 case DAPL_CQ_RESIZE: 8622 error = daplka_cq_resize(rp, arg, mode, cred, rvalp); 8623 break; 8624 8625 case DAPL_EVENT_POLL: 8626 error = daplka_event_poll(rp, arg, mode, cred, rvalp); 8627 break; 8628 8629 case DAPL_EVENT_WAKEUP: 8630 error = daplka_event_wakeup(rp, arg, mode, cred, rvalp); 8631 break; 8632 8633 case DAPL_EVD_MODIFY_CNO: 8634 error = daplka_evd_modify_cno(rp, arg, mode, cred, rvalp); 8635 break; 8636 8637 case DAPL_EVD_FREE: 8638 error = daplka_evd_free(rp, arg, mode, cred, rvalp); 8639 break; 8640 8641 default: 8642 DERR("daplka_evd_ioctl: cmd not supported\n"); 8643 error = DDI_FAILURE; 8644 } 8645 return (error); 8646 } 8647 8648 static int 8649 daplka_ep_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8650 cred_t *cred, int *rvalp) 8651 { 8652 int error; 8653 8654 switch (cmd) { 8655 case DAPL_EP_MODIFY: 8656 error = daplka_ep_modify(rp, arg, mode, cred, rvalp); 8657 break; 8658 8659 case DAPL_EP_FREE: 8660 error = daplka_ep_free(rp, arg, mode, cred, rvalp); 8661 break; 8662 8663 case DAPL_EP_CONNECT: 8664 error = daplka_ep_connect(rp, arg, mode, cred, rvalp); 8665 break; 8666 8667 case DAPL_EP_DISCONNECT: 8668 error = daplka_ep_disconnect(rp, arg, mode, cred, rvalp); 8669 break; 8670 8671 case DAPL_EP_REINIT: 8672 error = daplka_ep_reinit(rp, arg, mode, cred, rvalp); 8673 break; 8674 8675 case DAPL_EP_CREATE: 8676 error = daplka_ep_create(rp, arg, mode, cred, rvalp); 8677 break; 8678 8679 default: 8680 DERR("daplka_ep_ioctl: cmd not supported\n"); 8681 error = DDI_FAILURE; 8682 } 8683 return (error); 8684 } 8685 8686 static int 8687 daplka_mr_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8688 cred_t *cred, int *rvalp) 8689 { 8690 int error; 8691 8692 switch (cmd) { 8693 case DAPL_MR_REGISTER: 8694 error = daplka_mr_register(rp, arg, mode, cred, rvalp); 8695 break; 8696 8697 case DAPL_MR_REGISTER_LMR: 8698 error = daplka_mr_register_lmr(rp, arg, mode, cred, rvalp); 8699 break; 8700 8701 case DAPL_MR_REGISTER_SHARED: 8702 error = daplka_mr_register_shared(rp, arg, mode, cred, rvalp); 8703 break; 8704 8705 case DAPL_MR_DEREGISTER: 8706 error = daplka_mr_deregister(rp, arg, mode, cred, rvalp); 8707 break; 8708 8709 case DAPL_MR_SYNC: 8710 error = daplka_mr_sync(rp, arg, mode, cred, rvalp); 8711 break; 8712 8713 default: 8714 DERR("daplka_mr_ioctl: cmd not supported\n"); 8715 error = DDI_FAILURE; 8716 } 8717 return (error); 8718 } 8719 8720 static int 8721 daplka_mw_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8722 cred_t *cred, int *rvalp) 8723 { 8724 int error; 8725 8726 switch (cmd) { 8727 case DAPL_MW_ALLOC: 8728 error = daplka_mw_alloc(rp, arg, mode, cred, rvalp); 8729 break; 8730 8731 case DAPL_MW_FREE: 8732 error = daplka_mw_free(rp, arg, mode, cred, rvalp); 8733 break; 8734 8735 default: 8736 DERR("daplka_mw_ioctl: cmd not supported\n"); 8737 error = DDI_FAILURE; 8738 } 8739 return (error); 8740 } 8741 8742 static int 8743 daplka_cno_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8744 cred_t *cred, int *rvalp) 8745 { 8746 int error; 8747 8748 switch (cmd) { 8749 case DAPL_CNO_ALLOC: 8750 error = daplka_cno_alloc(rp, arg, mode, cred, rvalp); 8751 break; 8752 8753 case DAPL_CNO_FREE: 8754 error = daplka_cno_free(rp, arg, mode, cred, rvalp); 8755 break; 8756 8757 case DAPL_CNO_WAIT: 8758 error = daplka_cno_wait(rp, arg, mode, cred, rvalp); 8759 break; 8760 8761 default: 8762 DERR("daplka_cno_ioctl: cmd not supported\n"); 8763 error = DDI_FAILURE; 8764 } 8765 return (error); 8766 } 8767 8768 static int 8769 daplka_pd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8770 cred_t *cred, int *rvalp) 8771 { 8772 int error; 8773 8774 switch (cmd) { 8775 case DAPL_PD_ALLOC: 8776 error = daplka_pd_alloc(rp, arg, mode, cred, rvalp); 8777 break; 8778 8779 case DAPL_PD_FREE: 8780 error = daplka_pd_free(rp, arg, mode, cred, rvalp); 8781 break; 8782 8783 default: 8784 DERR("daplka_pd_ioctl: cmd not supported\n"); 8785 error = DDI_FAILURE; 8786 } 8787 return (error); 8788 } 8789 8790 static int 8791 daplka_sp_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8792 cred_t *cred, int *rvalp) 8793 { 8794 int error; 8795 8796 switch (cmd) { 8797 case DAPL_SERVICE_REGISTER: 8798 error = daplka_service_register(rp, arg, mode, cred, rvalp); 8799 break; 8800 8801 case DAPL_SERVICE_DEREGISTER: 8802 error = daplka_service_deregister(rp, arg, mode, cred, rvalp); 8803 break; 8804 8805 default: 8806 DERR("daplka_sp_ioctl: cmd not supported\n"); 8807 error = DDI_FAILURE; 8808 } 8809 return (error); 8810 } 8811 8812 static int 8813 daplka_srq_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8814 cred_t *cred, int *rvalp) 8815 { 8816 int error; 8817 8818 switch (cmd) { 8819 case DAPL_SRQ_CREATE: 8820 error = daplka_srq_create(rp, arg, mode, cred, rvalp); 8821 break; 8822 8823 case DAPL_SRQ_RESIZE: 8824 error = daplka_srq_resize(rp, arg, mode, cred, rvalp); 8825 break; 8826 8827 case DAPL_SRQ_FREE: 8828 error = daplka_srq_free(rp, arg, mode, cred, rvalp); 8829 break; 8830 8831 default: 8832 DERR("daplka_srq_ioctl: cmd(%d) not supported\n", cmd); 8833 error = DDI_FAILURE; 8834 break; 8835 } 8836 return (error); 8837 } 8838 8839 static int 8840 daplka_misc_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode, 8841 cred_t *cred, int *rvalp) 8842 { 8843 int error; 8844 8845 switch (cmd) { 8846 case DAPL_CR_ACCEPT: 8847 error = daplka_cr_accept(rp, arg, mode, cred, rvalp); 8848 break; 8849 8850 case DAPL_CR_REJECT: 8851 error = daplka_cr_reject(rp, arg, mode, cred, rvalp); 8852 break; 8853 8854 case DAPL_IA_QUERY: 8855 error = daplka_ia_query(rp, arg, mode, cred, rvalp); 8856 break; 8857 8858 case DAPL_CR_HANDOFF: 8859 error = daplka_cr_handoff(rp, arg, mode, cred, rvalp); 8860 break; 8861 8862 default: 8863 DERR("daplka_misc_ioctl: cmd not supported\n"); 8864 error = DDI_FAILURE; 8865 } 8866 return (error); 8867 } 8868 8869 /*ARGSUSED*/ 8870 static int 8871 daplka_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred, 8872 int *rvalp) 8873 { 8874 daplka_ia_resource_t *ia_rp; 8875 minor_t rnum; 8876 int error = 0; 8877 8878 rnum = getminor(dev); 8879 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum); 8880 if (ia_rp == NULL) { 8881 DERR("ioctl: resource not found, rnum %d\n", rnum); 8882 return (ENXIO); 8883 } 8884 8885 D4("ioctl: rnum = %d, cmd = 0x%x\n", rnum, cmd); 8886 if (DAPLKA_RS_RESERVED(ia_rp)) { 8887 error = daplka_common_ioctl(cmd, rnum, arg, mode, cred, rvalp); 8888 return (error); 8889 } 8890 if (DAPLKA_RS_TYPE(ia_rp) != DAPL_TYPE_IA) { 8891 DERR("ioctl: invalid type %d\n", DAPLKA_RS_TYPE(ia_rp)); 8892 error = EINVAL; 8893 goto cleanup; 8894 } 8895 if (ia_rp->ia_pid != ddi_get_pid()) { 8896 DERR("ioctl: ia_pid %d != pid %d\n", 8897 ia_rp->ia_pid, ddi_get_pid()); 8898 error = EINVAL; 8899 goto cleanup; 8900 } 8901 8902 switch (cmd & DAPL_TYPE_MASK) { 8903 case DAPL_TYPE_EVD: 8904 error = daplka_evd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8905 break; 8906 8907 case DAPL_TYPE_EP: 8908 error = daplka_ep_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8909 break; 8910 8911 case DAPL_TYPE_MR: 8912 error = daplka_mr_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8913 break; 8914 8915 case DAPL_TYPE_MW: 8916 error = daplka_mw_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8917 break; 8918 8919 case DAPL_TYPE_PD: 8920 error = daplka_pd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8921 break; 8922 8923 case DAPL_TYPE_SP: 8924 error = daplka_sp_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8925 break; 8926 8927 case DAPL_TYPE_CNO: 8928 error = daplka_cno_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8929 break; 8930 8931 case DAPL_TYPE_MISC: 8932 error = daplka_misc_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8933 break; 8934 8935 case DAPL_TYPE_SRQ: 8936 error = daplka_srq_ioctl(cmd, ia_rp, arg, mode, cred, rvalp); 8937 break; 8938 8939 default: 8940 DERR("ioctl: invalid dapl type = %d\n", DAPLKA_RS_TYPE(ia_rp)); 8941 error = DDI_FAILURE; 8942 } 8943 8944 cleanup:; 8945 DAPLKA_RS_UNREF(ia_rp); 8946 return (error); 8947 } 8948 8949 /* ARGSUSED */ 8950 static int 8951 daplka_open(dev_t *devp, int flag, int otyp, struct cred *cred) 8952 { 8953 minor_t rnum; 8954 8955 /* 8956 * Char only 8957 */ 8958 if (otyp != OTYP_CHR) { 8959 return (EINVAL); 8960 } 8961 8962 /* 8963 * Only zero can be opened, clones are used for resources. 8964 */ 8965 if (getminor(*devp) != DAPLKA_DRIVER_MINOR) { 8966 DERR("daplka_open: bad minor %d\n", getminor(*devp)); 8967 return (ENODEV); 8968 } 8969 8970 /* 8971 * - allocate new minor number 8972 * - update devp argument to new device 8973 */ 8974 if (daplka_resource_reserve(&rnum) == 0) { 8975 *devp = makedevice(getmajor(*devp), rnum); 8976 } else { 8977 return (ENOMEM); 8978 } 8979 8980 return (DDI_SUCCESS); 8981 } 8982 8983 /* ARGSUSED */ 8984 static int 8985 daplka_close(dev_t dev, int flag, int otyp, struct cred *cred) 8986 { 8987 daplka_ia_resource_t *ia_rp; 8988 minor_t rnum = getminor(dev); 8989 8990 /* 8991 * Char only 8992 */ 8993 if (otyp != OTYP_CHR) { 8994 return (EINVAL); 8995 } 8996 D2("daplka_close: closing rnum = %d\n", rnum); 8997 atomic_inc_32(&daplka_pending_close); 8998 8999 /* 9000 * remove from resource table. 9001 */ 9002 ia_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum); 9003 9004 /* 9005 * remove the initial reference 9006 */ 9007 if (ia_rp != NULL) { 9008 DAPLKA_RS_UNREF(ia_rp); 9009 } 9010 atomic_dec_32(&daplka_pending_close); 9011 return (DDI_SUCCESS); 9012 } 9013 9014 9015 /* 9016 * Resource management routines 9017 * 9018 * We start with no resource array. Each time we run out of slots, we 9019 * reallocate a new larger array and copy the pointer to the new array and 9020 * a new resource blk is allocated and added to the hash table. 9021 * 9022 * The resource control block contains: 9023 * root - array of pointer of resource blks 9024 * sz - current size of array. 9025 * len - last valid entry in array. 9026 * 9027 * A search operation based on a resource number is as follows: 9028 * index = rnum / RESOURCE_BLKSZ; 9029 * ASSERT(index < resource_block.len); 9030 * ASSERT(index < resource_block.sz); 9031 * offset = rnum % RESOURCE_BLKSZ; 9032 * ASSERT(offset >= resource_block.root[index]->base); 9033 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ); 9034 * return resource_block.root[index]->blks[offset]; 9035 * 9036 * A resource blk is freed when its used count reaches zero. 9037 */ 9038 9039 /* 9040 * initializes the global resource table 9041 */ 9042 static void 9043 daplka_resource_init(void) 9044 { 9045 rw_init(&daplka_resource.daplka_rct_lock, NULL, RW_DRIVER, NULL); 9046 daplka_resource.daplka_rc_len = 0; 9047 daplka_resource.daplka_rc_sz = 0; 9048 daplka_resource.daplka_rc_cnt = 0; 9049 daplka_resource.daplka_rc_flag = 0; 9050 daplka_resource.daplka_rc_root = NULL; 9051 } 9052 9053 /* 9054 * destroys the global resource table 9055 */ 9056 static void 9057 daplka_resource_fini(void) 9058 { 9059 int i; 9060 9061 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER); 9062 for (i = 0; i < daplka_resource.daplka_rc_len; i++) { 9063 daplka_resource_blk_t *blk; 9064 int j; 9065 9066 blk = daplka_resource.daplka_rc_root[i]; 9067 if (blk == NULL) { 9068 continue; 9069 } 9070 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) { 9071 if (blk->daplka_rcblk_blks[j] != NULL) { 9072 DERR("resource_fini: non-null slot %d, %p\n", 9073 j, blk->daplka_rcblk_blks[j]); 9074 } 9075 } 9076 kmem_free(blk, sizeof (*blk)); 9077 daplka_resource.daplka_rc_root[i] = NULL; 9078 } 9079 if (daplka_resource.daplka_rc_root != NULL) { 9080 uint_t sz; 9081 9082 sz = daplka_resource.daplka_rc_sz * 9083 sizeof (daplka_resource_blk_t *); 9084 kmem_free(daplka_resource.daplka_rc_root, (uint_t)sz); 9085 daplka_resource.daplka_rc_root = NULL; 9086 daplka_resource.daplka_rc_len = 0; 9087 daplka_resource.daplka_rc_sz = 0; 9088 } 9089 rw_exit(&daplka_resource.daplka_rct_lock); 9090 rw_destroy(&daplka_resource.daplka_rct_lock); 9091 } 9092 9093 /* 9094 * reserves a slot in the global resource table. 9095 * this is called by the open() syscall. it is needed because 9096 * at open() time, we do not have sufficient information to 9097 * create an IA resource. the library needs to subsequently 9098 * call daplka_ia_create to insert an IA resource into this 9099 * reserved slot. 9100 */ 9101 static int 9102 daplka_resource_reserve(minor_t *rnum) 9103 { 9104 int i, j, empty = -1; 9105 daplka_resource_blk_t *blk; 9106 9107 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER); 9108 /* 9109 * Try to find an empty slot 9110 */ 9111 for (i = 0; i < daplka_resource.daplka_rc_len; i++) { 9112 blk = daplka_resource.daplka_rc_root[i]; 9113 if (blk != NULL && blk->daplka_rcblk_avail > 0) { 9114 9115 D3("resource_alloc: available blks %d\n", 9116 blk->daplka_rcblk_avail); 9117 9118 /* 9119 * found an empty slot in this blk 9120 */ 9121 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) { 9122 if (blk->daplka_rcblk_blks[j] == NULL) { 9123 *rnum = (minor_t) 9124 (j + (i * DAPLKA_RC_BLKSZ)); 9125 blk->daplka_rcblk_blks[j] = 9126 (daplka_resource_t *) 9127 DAPLKA_RC_RESERVED; 9128 blk->daplka_rcblk_avail--; 9129 daplka_resource.daplka_rc_cnt++; 9130 rw_exit(&daplka_resource. 9131 daplka_rct_lock); 9132 return (0); 9133 } 9134 } 9135 } else if (blk == NULL && empty < 0) { 9136 /* 9137 * remember first empty slot 9138 */ 9139 empty = i; 9140 } 9141 } 9142 9143 /* 9144 * Couldn't find anything, allocate a new blk 9145 * Do we need to reallocate the root array 9146 */ 9147 if (empty < 0) { 9148 if (daplka_resource.daplka_rc_len == 9149 daplka_resource.daplka_rc_sz) { 9150 /* 9151 * Allocate new array and copy current stuff into it 9152 */ 9153 daplka_resource_blk_t **p; 9154 uint_t newsz = (uint_t)daplka_resource.daplka_rc_sz + 9155 DAPLKA_RC_BLKSZ; 9156 9157 D3("resource_alloc: increasing no. of buckets to %d\n", 9158 newsz); 9159 9160 p = kmem_zalloc(newsz * sizeof (*p), daplka_km_flags); 9161 9162 if (daplka_resource.daplka_rc_root) { 9163 uint_t oldsz; 9164 9165 oldsz = (uint_t)(daplka_resource.daplka_rc_sz * 9166 (int)sizeof (*p)); 9167 9168 /* 9169 * Copy old data into new space and 9170 * free old stuff 9171 */ 9172 bcopy(daplka_resource.daplka_rc_root, p, oldsz); 9173 kmem_free(daplka_resource.daplka_rc_root, 9174 oldsz); 9175 } 9176 9177 daplka_resource.daplka_rc_root = p; 9178 daplka_resource.daplka_rc_sz = (int)newsz; 9179 } 9180 9181 empty = daplka_resource.daplka_rc_len; 9182 daplka_resource.daplka_rc_len++; 9183 9184 D3("resource_alloc: daplka_rc_len %d\n", 9185 daplka_resource.daplka_rc_len); 9186 } 9187 9188 /* 9189 * Allocate a new blk 9190 */ 9191 blk = kmem_zalloc(sizeof (*blk), daplka_km_flags); 9192 ASSERT(daplka_resource.daplka_rc_root[empty] == NULL); 9193 daplka_resource.daplka_rc_root[empty] = blk; 9194 blk->daplka_rcblk_avail = DAPLKA_RC_BLKSZ - 1; 9195 9196 /* 9197 * Allocate slot 9198 */ 9199 *rnum = (minor_t)(empty * DAPLKA_RC_BLKSZ); 9200 blk->daplka_rcblk_blks[0] = (daplka_resource_t *)DAPLKA_RC_RESERVED; 9201 daplka_resource.daplka_rc_cnt++; 9202 rw_exit(&daplka_resource.daplka_rct_lock); 9203 9204 return (0); 9205 } 9206 9207 /* 9208 * removes resource from global resource table 9209 */ 9210 static daplka_resource_t * 9211 daplka_resource_remove(minor_t rnum) 9212 { 9213 int i, j; 9214 daplka_resource_blk_t *blk; 9215 daplka_resource_t *p; 9216 9217 i = (int)(rnum / DAPLKA_RC_BLKSZ); 9218 j = (int)(rnum % DAPLKA_RC_BLKSZ); 9219 9220 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER); 9221 if (i >= daplka_resource.daplka_rc_len) { 9222 rw_exit(&daplka_resource.daplka_rct_lock); 9223 DERR("resource_remove: invalid rnum %d\n", rnum); 9224 return (NULL); 9225 } 9226 9227 ASSERT(daplka_resource.daplka_rc_root); 9228 ASSERT(i < daplka_resource.daplka_rc_len); 9229 ASSERT(i < daplka_resource.daplka_rc_sz); 9230 blk = daplka_resource.daplka_rc_root[i]; 9231 if (blk == NULL) { 9232 rw_exit(&daplka_resource.daplka_rct_lock); 9233 DERR("resource_remove: invalid rnum %d\n", rnum); 9234 return (NULL); 9235 } 9236 9237 if (blk->daplka_rcblk_blks[j] == NULL) { 9238 rw_exit(&daplka_resource.daplka_rct_lock); 9239 DERR("resource_remove: blk->daplka_rcblk_blks[j] == NULL\n"); 9240 return (NULL); 9241 } 9242 p = blk->daplka_rcblk_blks[j]; 9243 blk->daplka_rcblk_blks[j] = NULL; 9244 blk->daplka_rcblk_avail++; 9245 if (blk->daplka_rcblk_avail == DAPLKA_RC_BLKSZ) { 9246 /* 9247 * free this blk 9248 */ 9249 kmem_free(blk, sizeof (*blk)); 9250 daplka_resource.daplka_rc_root[i] = NULL; 9251 } 9252 daplka_resource.daplka_rc_cnt--; 9253 rw_exit(&daplka_resource.daplka_rct_lock); 9254 9255 if ((intptr_t)p == DAPLKA_RC_RESERVED) { 9256 return (NULL); 9257 } else { 9258 return (p); 9259 } 9260 } 9261 9262 /* 9263 * inserts resource into the slot designated by rnum 9264 */ 9265 static int 9266 daplka_resource_insert(minor_t rnum, daplka_resource_t *rp) 9267 { 9268 int i, j, error = -1; 9269 daplka_resource_blk_t *blk; 9270 9271 /* 9272 * Find resource and lock it in WRITER mode 9273 * search for available resource slot 9274 */ 9275 9276 i = (int)(rnum / DAPLKA_RC_BLKSZ); 9277 j = (int)(rnum % DAPLKA_RC_BLKSZ); 9278 9279 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER); 9280 if (i >= daplka_resource.daplka_rc_len) { 9281 rw_exit(&daplka_resource.daplka_rct_lock); 9282 DERR("resource_insert: resource %d not found\n", rnum); 9283 return (-1); 9284 } 9285 9286 blk = daplka_resource.daplka_rc_root[i]; 9287 if (blk != NULL) { 9288 ASSERT(i < daplka_resource.daplka_rc_len); 9289 ASSERT(i < daplka_resource.daplka_rc_sz); 9290 9291 if ((intptr_t)blk->daplka_rcblk_blks[j] == DAPLKA_RC_RESERVED) { 9292 blk->daplka_rcblk_blks[j] = rp; 9293 error = 0; 9294 } else { 9295 DERR("resource_insert: %d not reserved, blk = %p\n", 9296 rnum, blk->daplka_rcblk_blks[j]); 9297 } 9298 } else { 9299 DERR("resource_insert: resource %d not found\n", rnum); 9300 } 9301 rw_exit(&daplka_resource.daplka_rct_lock); 9302 return (error); 9303 } 9304 9305 /* 9306 * finds resource using minor device number 9307 */ 9308 static daplka_resource_t * 9309 daplka_resource_lookup(minor_t rnum) 9310 { 9311 int i, j; 9312 daplka_resource_blk_t *blk; 9313 daplka_resource_t *rp; 9314 9315 /* 9316 * Find resource and lock it in READER mode 9317 * search for available resource slot 9318 */ 9319 9320 i = (int)(rnum / DAPLKA_RC_BLKSZ); 9321 j = (int)(rnum % DAPLKA_RC_BLKSZ); 9322 9323 rw_enter(&daplka_resource.daplka_rct_lock, RW_READER); 9324 if (i >= daplka_resource.daplka_rc_len) { 9325 rw_exit(&daplka_resource.daplka_rct_lock); 9326 DERR("resource_lookup: resource %d not found\n", rnum); 9327 return (NULL); 9328 } 9329 9330 blk = daplka_resource.daplka_rc_root[i]; 9331 if (blk != NULL) { 9332 ASSERT(i < daplka_resource.daplka_rc_len); 9333 ASSERT(i < daplka_resource.daplka_rc_sz); 9334 9335 rp = blk->daplka_rcblk_blks[j]; 9336 if (rp == NULL || (intptr_t)rp == DAPLKA_RC_RESERVED) { 9337 D3("resource_lookup: %d not found, blk = %p\n", 9338 rnum, blk->daplka_rcblk_blks[j]); 9339 } else { 9340 DAPLKA_RS_REF((daplka_ia_resource_t *)rp); 9341 } 9342 } else { 9343 DERR("resource_lookup: resource %d not found\n", rnum); 9344 rp = NULL; 9345 } 9346 rw_exit(&daplka_resource.daplka_rct_lock); 9347 return (rp); 9348 } 9349 9350 /* 9351 * generic hash table implementation 9352 */ 9353 9354 /* 9355 * daplka_hash_create: 9356 * initializes a hash table with the specified parameters 9357 * 9358 * input: 9359 * htblp pointer to hash table 9360 * 9361 * nbuckets number of buckets (must be power of 2) 9362 * 9363 * free_func this function is called on each hash 9364 * table element when daplka_hash_destroy 9365 * is called 9366 * 9367 * lookup_func if daplka_hash_lookup is able to find 9368 * the desired object, this function is 9369 * applied on the object before 9370 * daplka_hash_lookup returns 9371 * output: 9372 * none 9373 * 9374 * return value(s): 9375 * EINVAL nbuckets is not a power of 2 9376 * ENOMEM cannot allocate buckets 9377 * 0 success 9378 */ 9379 static int 9380 daplka_hash_create(daplka_hash_table_t *htblp, uint_t nbuckets, 9381 void (*free_func)(void *), void (*lookup_func)(void *)) 9382 { 9383 int i; 9384 9385 if ((nbuckets & ~(nbuckets - 1)) != nbuckets) { 9386 DERR("hash_create: nbuckets not power of 2\n"); 9387 return (EINVAL); 9388 } 9389 9390 htblp->ht_buckets = 9391 kmem_zalloc(sizeof (daplka_hash_bucket_t) * nbuckets, 9392 daplka_km_flags); 9393 if (htblp->ht_buckets == NULL) { 9394 DERR("hash_create: cannot allocate buckets\n"); 9395 return (ENOMEM); 9396 } 9397 for (i = 0; i < nbuckets; i++) { 9398 htblp->ht_buckets[i].hb_count = 0; 9399 htblp->ht_buckets[i].hb_entries = NULL; 9400 } 9401 rw_init(&htblp->ht_table_lock, NULL, RW_DRIVER, NULL); 9402 mutex_init(&htblp->ht_key_lock, NULL, MUTEX_DRIVER, NULL); 9403 9404 htblp->ht_count = 0; 9405 htblp->ht_next_hkey = (uint64_t)gethrtime(); 9406 htblp->ht_nbuckets = nbuckets; 9407 htblp->ht_free_func = free_func; 9408 htblp->ht_lookup_func = lookup_func; 9409 htblp->ht_initialized = B_TRUE; 9410 D3("hash_create: done, buckets = %d\n", nbuckets); 9411 return (0); 9412 } 9413 9414 /* 9415 * daplka_hash_insert: 9416 * inserts an object into a hash table 9417 * 9418 * input: 9419 * htblp pointer to hash table 9420 * 9421 * hkeyp pointer to hash key. 9422 * *hkeyp being non-zero means that the caller 9423 * has generated its own hkey. if *hkeyp is zero, 9424 * this function will generate an hkey for the 9425 * caller. it is recommended that the caller 9426 * leave the hkey generation to this function 9427 * because the hkey is more likely to be evenly 9428 * distributed. 9429 * 9430 * objp pointer to object to be inserted into 9431 * hash table 9432 * 9433 * output: 9434 * hkeyp the generated hkey is returned via this pointer 9435 * 9436 * return value(s): 9437 * EINVAL invalid parameter 9438 * ENOMEM cannot allocate hash entry 9439 * 0 successful 9440 */ 9441 static int 9442 daplka_hash_insert(daplka_hash_table_t *htblp, uint64_t *hkeyp, void *objp) 9443 { 9444 daplka_hash_entry_t *hep, *curr_hep; 9445 daplka_hash_bucket_t *hbp; 9446 uint32_t bucket; 9447 uint64_t hkey; 9448 9449 if (hkeyp == NULL) { 9450 DERR("hash_insert: hkeyp == NULL\n"); 9451 return (EINVAL); 9452 } 9453 hep = kmem_zalloc(sizeof (*hep), daplka_km_flags); 9454 if (hep == NULL) { 9455 DERR("hash_insert: cannot alloc hash_entry\n"); 9456 return (ENOMEM); 9457 } 9458 if (*hkeyp == 0) { 9459 /* generate a new key */ 9460 mutex_enter(&htblp->ht_key_lock); 9461 hkey = ++htblp->ht_next_hkey; 9462 if (hkey == 0) { 9463 hkey = htblp->ht_next_hkey = (uint64_t)gethrtime(); 9464 } 9465 mutex_exit(&htblp->ht_key_lock); 9466 } else { 9467 /* use user generated key */ 9468 hkey = *hkeyp; 9469 } 9470 9471 /* only works if ht_nbuckets is a power of 2 */ 9472 bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1)); 9473 ASSERT(objp != NULL); 9474 ASSERT(bucket < htblp->ht_nbuckets); 9475 9476 rw_enter(&htblp->ht_table_lock, RW_WRITER); 9477 hep->he_hkey = hkey; 9478 hep->he_objp = objp; 9479 9480 /* look for duplicate entries */ 9481 hbp = &htblp->ht_buckets[bucket]; 9482 curr_hep = hbp->hb_entries; 9483 while (curr_hep != NULL) { 9484 if (curr_hep->he_hkey == hep->he_hkey) { 9485 break; 9486 } 9487 curr_hep = curr_hep->he_next; 9488 } 9489 if (curr_hep != NULL) { 9490 DERR("hash_insert: found duplicate hash entry: " 9491 "bucket %d, hkey 0x%016llx\n", 9492 bucket, (longlong_t)hep->he_hkey); 9493 kmem_free(hep, sizeof (*hep)); 9494 rw_exit(&htblp->ht_table_lock); 9495 return (EINVAL); 9496 } 9497 hep->he_next = hbp->hb_entries; 9498 hbp->hb_entries = hep; 9499 hbp->hb_count++; 9500 htblp->ht_count++; 9501 rw_exit(&htblp->ht_table_lock); 9502 9503 if (*hkeyp == 0) { 9504 *hkeyp = hkey; 9505 ASSERT(*hkeyp != 0); 9506 } 9507 D3("hash_insert: htblp 0x%p, hkey = 0x%016llx, bucket = %d\n", 9508 htblp, (longlong_t)*hkeyp, bucket); 9509 return (0); 9510 } 9511 9512 /* 9513 * daplka_hash_remove: 9514 * removes object identified by hkey from hash table 9515 * 9516 * input: 9517 * htblp pointer to hash table 9518 * 9519 * hkey hkey that identifies the object to be removed 9520 * 9521 * output: 9522 * objpp pointer to pointer to object. 9523 * if remove is successful, the removed object 9524 * will be returned via *objpp. 9525 * 9526 * return value(s): 9527 * EINVAL cannot find hash entry 9528 * 0 successful 9529 */ 9530 static int 9531 daplka_hash_remove(daplka_hash_table_t *htblp, uint64_t hkey, void **objpp) 9532 { 9533 daplka_hash_entry_t *free_hep, **curr_hepp; 9534 daplka_hash_bucket_t *hbp; 9535 uint32_t bucket; 9536 9537 bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1)); 9538 9539 rw_enter(&htblp->ht_table_lock, RW_WRITER); 9540 hbp = &htblp->ht_buckets[bucket]; 9541 9542 curr_hepp = &hbp->hb_entries; 9543 while (*curr_hepp != NULL) { 9544 if ((*curr_hepp)->he_hkey == hkey) { 9545 break; 9546 } 9547 curr_hepp = &(*curr_hepp)->he_next; 9548 } 9549 if (*curr_hepp == NULL) { 9550 DERR("hash_remove: cannot find hash entry: " 9551 "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey); 9552 rw_exit(&htblp->ht_table_lock); 9553 return (EINVAL); 9554 } else { 9555 if (objpp != NULL) { 9556 *objpp = (*curr_hepp)->he_objp; 9557 } 9558 free_hep = *curr_hepp; 9559 *curr_hepp = (*curr_hepp)->he_next; 9560 kmem_free(free_hep, sizeof (*free_hep)); 9561 } 9562 hbp->hb_count--; 9563 htblp->ht_count--; 9564 D3("hash_remove: removed entry, hkey 0x%016llx, bucket %d, " 9565 "hb_count %d, hb_count %d\n", 9566 (longlong_t)hkey, bucket, hbp->hb_count, htblp->ht_count); 9567 rw_exit(&htblp->ht_table_lock); 9568 return (0); 9569 } 9570 9571 /* 9572 * daplka_hash_walk: 9573 * walks through the entire hash table. applying func on each of 9574 * the inserted objects. stops walking if func returns non-zero. 9575 * 9576 * input: 9577 * htblp pointer to hash table 9578 * 9579 * func function to be applied on each object 9580 * 9581 * farg second argument to func 9582 * 9583 * lockmode can be RW_WRITER or RW_READER. this 9584 * allows the caller to choose what type 9585 * of lock to acquire before walking the 9586 * table. 9587 * 9588 * output: 9589 * none 9590 * 9591 * return value(s): 9592 * none 9593 */ 9594 static void 9595 daplka_hash_walk(daplka_hash_table_t *htblp, int (*func)(void *, void *), 9596 void *farg, krw_t lockmode) 9597 { 9598 daplka_hash_entry_t *curr_hep; 9599 daplka_hash_bucket_t *hbp; 9600 uint32_t bucket, retval = 0; 9601 9602 ASSERT(lockmode == RW_WRITER || lockmode == RW_READER); 9603 9604 if (lockmode == RW_WRITER) { 9605 rw_enter(&htblp->ht_table_lock, RW_WRITER); 9606 } else { 9607 rw_enter(&htblp->ht_table_lock, RW_READER); 9608 } 9609 for (bucket = 0; bucket < htblp->ht_nbuckets && retval == 0; bucket++) { 9610 hbp = &htblp->ht_buckets[bucket]; 9611 curr_hep = hbp->hb_entries; 9612 while (curr_hep != NULL) { 9613 retval = (*func)(curr_hep->he_objp, farg); 9614 if (retval != 0) { 9615 break; 9616 } 9617 curr_hep = curr_hep->he_next; 9618 } 9619 } 9620 rw_exit(&htblp->ht_table_lock); 9621 } 9622 9623 /* 9624 * daplka_hash_lookup: 9625 * finds object from hkey 9626 * 9627 * input: 9628 * htblp pointer to hash table 9629 * 9630 * hkey hkey that identifies the object to be looked up 9631 * 9632 * output: 9633 * none 9634 * 9635 * return value(s): 9636 * NULL if not found 9637 * object pointer if found 9638 */ 9639 static void * 9640 daplka_hash_lookup(daplka_hash_table_t *htblp, uint64_t hkey) 9641 { 9642 daplka_hash_entry_t *curr_hep; 9643 uint32_t bucket; 9644 void *objp; 9645 9646 bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1)); 9647 9648 rw_enter(&htblp->ht_table_lock, RW_READER); 9649 curr_hep = htblp->ht_buckets[bucket].hb_entries; 9650 while (curr_hep != NULL) { 9651 if (curr_hep->he_hkey == hkey) { 9652 break; 9653 } 9654 curr_hep = curr_hep->he_next; 9655 } 9656 if (curr_hep == NULL) { 9657 DERR("hash_lookup: cannot find hash entry: " 9658 "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey); 9659 rw_exit(&htblp->ht_table_lock); 9660 return (NULL); 9661 } 9662 objp = curr_hep->he_objp; 9663 ASSERT(objp != NULL); 9664 if (htblp->ht_lookup_func != NULL) { 9665 (*htblp->ht_lookup_func)(objp); 9666 } 9667 rw_exit(&htblp->ht_table_lock); 9668 return (objp); 9669 } 9670 9671 /* 9672 * daplka_hash_destroy: 9673 * destroys hash table. applies free_func on all inserted objects. 9674 * 9675 * input: 9676 * htblp pointer to hash table 9677 * 9678 * output: 9679 * none 9680 * 9681 * return value(s): 9682 * none 9683 */ 9684 static void 9685 daplka_hash_destroy(daplka_hash_table_t *htblp) 9686 { 9687 daplka_hash_entry_t *curr_hep, *free_hep; 9688 daplka_hash_entry_t *free_list = NULL; 9689 daplka_hash_bucket_t *hbp; 9690 uint32_t bucket, cnt, total = 0; 9691 9692 if (!htblp->ht_initialized) { 9693 DERR("hash_destroy: not initialized\n"); 9694 return; 9695 } 9696 /* free all elements from hash table */ 9697 rw_enter(&htblp->ht_table_lock, RW_WRITER); 9698 for (bucket = 0; bucket < htblp->ht_nbuckets; bucket++) { 9699 hbp = &htblp->ht_buckets[bucket]; 9700 9701 /* build list of elements to be freed */ 9702 curr_hep = hbp->hb_entries; 9703 cnt = 0; 9704 while (curr_hep != NULL) { 9705 cnt++; 9706 free_hep = curr_hep; 9707 curr_hep = curr_hep->he_next; 9708 9709 free_hep->he_next = free_list; 9710 free_list = free_hep; 9711 } 9712 ASSERT(cnt == hbp->hb_count); 9713 total += cnt; 9714 hbp->hb_count = 0; 9715 hbp->hb_entries = NULL; 9716 } 9717 ASSERT(total == htblp->ht_count); 9718 D3("hash_destroy: htblp 0x%p, nbuckets %d, freed %d hash entries\n", 9719 htblp, htblp->ht_nbuckets, total); 9720 rw_exit(&htblp->ht_table_lock); 9721 9722 /* free all objects, now without holding the hash table lock */ 9723 cnt = 0; 9724 while (free_list != NULL) { 9725 cnt++; 9726 free_hep = free_list; 9727 free_list = free_list->he_next; 9728 if (htblp->ht_free_func != NULL) { 9729 (*htblp->ht_free_func)(free_hep->he_objp); 9730 } 9731 kmem_free(free_hep, sizeof (*free_hep)); 9732 } 9733 ASSERT(total == cnt); 9734 9735 /* free hash buckets and destroy locks */ 9736 kmem_free(htblp->ht_buckets, 9737 sizeof (daplka_hash_bucket_t) * htblp->ht_nbuckets); 9738 9739 rw_enter(&htblp->ht_table_lock, RW_WRITER); 9740 htblp->ht_buckets = NULL; 9741 htblp->ht_count = 0; 9742 htblp->ht_nbuckets = 0; 9743 htblp->ht_free_func = NULL; 9744 htblp->ht_lookup_func = NULL; 9745 htblp->ht_initialized = B_FALSE; 9746 rw_exit(&htblp->ht_table_lock); 9747 9748 mutex_destroy(&htblp->ht_key_lock); 9749 rw_destroy(&htblp->ht_table_lock); 9750 } 9751 9752 /* 9753 * daplka_hash_getsize: 9754 * return the number of objects in hash table 9755 * 9756 * input: 9757 * htblp pointer to hash table 9758 * 9759 * output: 9760 * none 9761 * 9762 * return value(s): 9763 * number of objects in hash table 9764 */ 9765 static uint32_t 9766 daplka_hash_getsize(daplka_hash_table_t *htblp) 9767 { 9768 uint32_t sz; 9769 9770 rw_enter(&htblp->ht_table_lock, RW_READER); 9771 sz = htblp->ht_count; 9772 rw_exit(&htblp->ht_table_lock); 9773 9774 return (sz); 9775 } 9776 9777 /* 9778 * this function is used as ht_lookup_func above when lookup is called. 9779 * other types of objs may use a more elaborate lookup_func. 9780 */ 9781 static void 9782 daplka_hash_generic_lookup(void *obj) 9783 { 9784 daplka_resource_t *rp = (daplka_resource_t *)obj; 9785 9786 mutex_enter(&rp->rs_reflock); 9787 rp->rs_refcnt++; 9788 ASSERT(rp->rs_refcnt != 0); 9789 mutex_exit(&rp->rs_reflock); 9790 } 9791 9792 /* 9793 * Generates a non-zero 32 bit hash key used for the timer hash table. 9794 */ 9795 static uint32_t 9796 daplka_timer_hkey_gen() 9797 { 9798 uint32_t new_hkey; 9799 9800 do { 9801 new_hkey = atomic_inc_32_nv(&daplka_timer_hkey); 9802 } while (new_hkey == 0); 9803 9804 return (new_hkey); 9805 } 9806 9807 9808 /* 9809 * The DAPL KA debug logging routines 9810 */ 9811 9812 /* 9813 * Add the string str to the end of the debug log, followed by a newline. 9814 */ 9815 static void 9816 daplka_dbglog(char *str) 9817 { 9818 size_t length; 9819 size_t remlen; 9820 9821 /* 9822 * If this is the first time we've written to the log, initialize it. 9823 */ 9824 if (!daplka_dbginit) { 9825 return; 9826 } 9827 mutex_enter(&daplka_dbglock); 9828 /* 9829 * Note the log is circular; if this string would run over the end, 9830 * we copy the first piece to the end and then the last piece to 9831 * the beginning of the log. 9832 */ 9833 length = strlen(str); 9834 9835 remlen = (size_t)sizeof (daplka_dbgbuf) - daplka_dbgnext - 1; 9836 9837 if (length > remlen) { 9838 if (remlen) 9839 bcopy(str, daplka_dbgbuf + daplka_dbgnext, remlen); 9840 daplka_dbgbuf[sizeof (daplka_dbgbuf) - 1] = (char)NULL; 9841 str += remlen; 9842 length -= remlen; 9843 daplka_dbgnext = 0; 9844 } 9845 bcopy(str, daplka_dbgbuf + daplka_dbgnext, length); 9846 daplka_dbgnext += length; 9847 9848 if (daplka_dbgnext >= sizeof (daplka_dbgbuf)) 9849 daplka_dbgnext = 0; 9850 mutex_exit(&daplka_dbglock); 9851 } 9852 9853 9854 /* 9855 * Add a printf-style message to whichever debug logs we're currently using. 9856 */ 9857 static void 9858 daplka_debug(const char *fmt, ...) 9859 { 9860 char buff[512]; 9861 va_list ap; 9862 /* 9863 * The system prepends the thread id and high resolution time 9864 * (nanoseconds are dropped and so are the upper digits) 9865 * to the specified string. 9866 * The unit for timestamp is 10 microseconds. 9867 * It wraps around every 10000 seconds. 9868 * Ex: gethrtime() = X ns = X/1000 us = X/10000 10 micro sec. 9869 */ 9870 int micro_time = (int)((gethrtime() / 10000) % 1000000000); 9871 (void) sprintf(buff, "th %p tm %9d: ", (void *)curthread, micro_time); 9872 9873 va_start(ap, fmt); 9874 (void) vsprintf(buff+strlen(buff), fmt, ap); 9875 va_end(ap); 9876 9877 daplka_dbglog(buff); 9878 } 9879 9880 static void 9881 daplka_console(const char *fmt, ...) 9882 { 9883 char buff[512]; 9884 va_list ap; 9885 9886 va_start(ap, fmt); 9887 (void) vsprintf(buff, fmt, ap); 9888 va_end(ap); 9889 9890 cmn_err(CE_CONT, "%s", buff); 9891 }