1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  *
  26  * UDAPL kernel agent
  27  */
  28 
  29 #include <sys/types.h>
  30 #include <sys/errno.h>
  31 #include <sys/debug.h>
  32 #include <sys/stropts.h>
  33 #include <sys/stream.h>
  34 #include <sys/strlog.h>
  35 #include <sys/cmn_err.h>
  36 #include <sys/kmem.h>
  37 #include <sys/conf.h>
  38 #include <sys/stat.h>
  39 #include <sys/modctl.h>
  40 #include <sys/kstat.h>
  41 #include <sys/ddi.h>
  42 #include <sys/sunddi.h>
  43 #include <sys/strsun.h>
  44 #include <sys/taskq.h>
  45 #include <sys/open.h>
  46 #include <sys/uio.h>
  47 #include <sys/cpuvar.h>
  48 #include <sys/atomic.h>
  49 #include <sys/sysmacros.h>
  50 #include <sys/esunddi.h>
  51 #include <sys/avl.h>
  52 #include <sys/cred.h>
  53 #include <sys/note.h>
  54 #include <sys/ib/ibtl/ibti.h>
  55 #include <sys/socket.h>
  56 #include <netinet/in.h>
  57 #include <daplt_if.h>
  58 #include <daplt.h>
  59 
  60 /*
  61  * The following variables support the debug log buffer scheme.
  62  */
  63 #ifdef  DEBUG
  64 static char daplka_dbgbuf[0x80000];
  65 #else /* DEBUG */
  66 static char daplka_dbgbuf[0x4000];
  67 #endif /* DEBUG */
  68 static int daplka_dbgsize = sizeof (daplka_dbgbuf);
  69 static size_t daplka_dbgnext;
  70 static int daplka_dbginit = 0;
  71 static kmutex_t daplka_dbglock;
  72 
  73 static int daplka_dbg = 0x0103;
  74 static void daplka_console(const char *, ...);
  75 static void daplka_debug(const char *, ...);
  76 static int daplka_apm = 0x1;                    /* default enable */
  77 static int daplka_failback = 0x1;               /* default enable */
  78 static int daplka_query_aft_setaltpath = 10;
  79 
  80 #define DERR                            \
  81         if (daplka_dbg & 0x100)     \
  82             daplka_debug
  83 
  84 #ifdef DEBUG
  85 
  86 #define DINFO                           \
  87         daplka_console
  88 
  89 #define D1                              \
  90         if (daplka_dbg & 0x01)              \
  91             daplka_debug
  92 #define D2                              \
  93         if (daplka_dbg & 0x02)              \
  94             daplka_debug
  95 #define D3                              \
  96         if (daplka_dbg & 0x04)              \
  97             daplka_debug
  98 #define D4                              \
  99         if (daplka_dbg & 0x08)              \
 100             daplka_debug
 101 
 102 #else /* DEBUG */
 103 
 104 #define DINFO   if (0) printf
 105 #define D1      if (0) printf
 106 #define D2      if (0) printf
 107 #define D3      if (0) printf
 108 #define D4      if (0) printf
 109 
 110 #endif /* DEBUG */
 111 
 112 /*
 113  * driver entry points
 114  */
 115 static int daplka_open(dev_t *, int, int, struct cred *);
 116 static int daplka_close(dev_t, int, int, struct cred *);
 117 static int daplka_attach(dev_info_t *, ddi_attach_cmd_t);
 118 static int daplka_detach(dev_info_t *, ddi_detach_cmd_t);
 119 static int daplka_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 120 static int daplka_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 121 
 122 /*
 123  * types of ioctls
 124  */
 125 static int daplka_common_ioctl(int, minor_t, intptr_t, int, cred_t *, int *);
 126 static int daplka_misc_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 127     cred_t *, int *);
 128 static int daplka_ep_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 129     cred_t *, int *);
 130 static int daplka_evd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 131     cred_t *, int *);
 132 static int daplka_mr_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 133     cred_t *, int *);
 134 static int daplka_cno_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 135     cred_t *, int *);
 136 static int daplka_pd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 137     cred_t *, int *);
 138 static int daplka_sp_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 139     cred_t *, int *);
 140 static int daplka_srq_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 141     cred_t *, int *);
 142 
 143 /*
 144  * common ioctls and supporting functions
 145  */
 146 static int daplka_ia_create(minor_t, intptr_t, int, cred_t *, int *);
 147 static int daplka_ia_destroy(daplka_resource_t *);
 148 
 149 /*
 150  * EP ioctls and supporting functions
 151  */
 152 static int daplka_ep_create(daplka_ia_resource_t *, intptr_t, int,
 153     cred_t *, int *);
 154 static int daplka_ep_modify(daplka_ia_resource_t *, intptr_t, int,
 155     cred_t *, int *);
 156 static int daplka_ep_free(daplka_ia_resource_t *, intptr_t, int,
 157     cred_t *, int *);
 158 static int daplka_ep_connect(daplka_ia_resource_t *, intptr_t, int,
 159     cred_t *, int *);
 160 static int daplka_ep_disconnect(daplka_ia_resource_t *, intptr_t, int,
 161     cred_t *, int *);
 162 static int daplka_ep_reinit(daplka_ia_resource_t *, intptr_t, int,
 163     cred_t *, int *);
 164 static int daplka_ep_destroy(daplka_resource_t *);
 165 static void daplka_hash_ep_free(void *);
 166 static int daplka_ep_failback(void *objp, void *arg);
 167 static int daplka_ep_altpath(daplka_ep_resource_t *, ib_gid_t *);
 168 
 169 static uint32_t daplka_ep_get_state(daplka_ep_resource_t *);
 170 static void daplka_ep_set_state(daplka_ep_resource_t *, uint32_t, uint32_t);
 171 static boolean_t daplka_ep_transition_is_valid(uint32_t, uint32_t);
 172 static daplka_timer_info_t *daplka_timer_info_alloc(daplka_ep_resource_t *);
 173 static void daplka_timer_info_free(daplka_timer_info_t *);
 174 static void daplka_timer_handler(void *);
 175 static void daplka_timer_dispatch(void *);
 176 static void daplka_timer_thread(void *);
 177 static int daplka_cancel_timer(daplka_ep_resource_t *);
 178 static void daplka_hash_timer_free(void *);
 179 
 180 /*
 181  * EVD ioctls and supporting functions
 182  */
 183 static int daplka_evd_create(daplka_ia_resource_t *, intptr_t, int,
 184     cred_t *, int *);
 185 static int daplka_cq_resize(daplka_ia_resource_t *, intptr_t, int,
 186     cred_t *, int *);
 187 static int daplka_evd_free(daplka_ia_resource_t *, intptr_t, int,
 188     cred_t *, int *);
 189 static int daplka_event_poll(daplka_ia_resource_t *, intptr_t, int,
 190     cred_t *, int *);
 191 static int daplka_evd_destroy(daplka_resource_t *);
 192 static void daplka_cq_handler(ibt_cq_hdl_t, void *);
 193 static void daplka_evd_wakeup(daplka_evd_resource_t *,
 194     daplka_evd_event_list_t *, daplka_evd_event_t *);
 195 static void daplka_evd_event_enqueue(daplka_evd_event_list_t *,
 196     daplka_evd_event_t *);
 197 static daplka_evd_event_t *daplka_evd_event_dequeue(daplka_evd_event_list_t *);
 198 static void daplka_hash_evd_free(void *);
 199 
 200 
 201 /*
 202  * SRQ ioctls and supporting functions
 203  */
 204 static int daplka_srq_create(daplka_ia_resource_t *, intptr_t, int,
 205     cred_t *, int *);
 206 static int daplka_srq_resize(daplka_ia_resource_t *, intptr_t, int,
 207     cred_t *, int *);
 208 static int daplka_srq_free(daplka_ia_resource_t *, intptr_t, int,
 209     cred_t *, int *);
 210 static int daplka_srq_destroy(daplka_resource_t *);
 211 static void daplka_hash_srq_free(void *);
 212 
 213 /*
 214  * Miscellaneous ioctls
 215  */
 216 static int daplka_cr_accept(daplka_ia_resource_t *, intptr_t, int,
 217     cred_t *, int *);
 218 static int daplka_cr_reject(daplka_ia_resource_t *, intptr_t, int,
 219     cred_t *, int *);
 220 static int daplka_cr_handoff(daplka_ia_resource_t *, intptr_t, int,
 221     cred_t *, int *);
 222 static int daplka_ia_query(daplka_ia_resource_t *, intptr_t, int,
 223     cred_t *, int *);
 224 
 225 /*
 226  * PD ioctls and supporting functions
 227  */
 228 static int daplka_pd_alloc(daplka_ia_resource_t *, intptr_t, int,
 229     cred_t *, int *);
 230 static int daplka_pd_free(daplka_ia_resource_t *, intptr_t, int,
 231     cred_t *, int *);
 232 static int daplka_pd_destroy(daplka_resource_t *);
 233 static void daplka_hash_pd_free(void *);
 234 
 235 /*
 236  * SP ioctls and supporting functions
 237  */
 238 static int daplka_service_register(daplka_ia_resource_t *, intptr_t, int,
 239     cred_t *, int *);
 240 static int daplka_service_deregister(daplka_ia_resource_t *, intptr_t, int,
 241     cred_t *, int *);
 242 static int daplka_sp_destroy(daplka_resource_t *);
 243 static void daplka_hash_sp_free(void *);
 244 static void daplka_hash_sp_unref(void *);
 245 
 246 /*
 247  * MR ioctls and supporting functions
 248  */
 249 static int daplka_mr_register(daplka_ia_resource_t *, intptr_t, int,
 250     cred_t *, int *);
 251 static int daplka_mr_register_lmr(daplka_ia_resource_t *, intptr_t, int,
 252     cred_t *, int *);
 253 static int daplka_mr_register_shared(daplka_ia_resource_t *, intptr_t, int,
 254     cred_t *, int *);
 255 static int daplka_mr_deregister(daplka_ia_resource_t *, intptr_t, int,
 256     cred_t *, int *);
 257 static int daplka_mr_sync(daplka_ia_resource_t *, intptr_t, int,
 258     cred_t *, int *);
 259 static int daplka_mr_destroy(daplka_resource_t *);
 260 static void daplka_hash_mr_free(void *);
 261 static void daplka_shared_mr_free(daplka_mr_resource_t *);
 262 
 263 /*
 264  * MW ioctls and supporting functions
 265  */
 266 static int daplka_mw_alloc(daplka_ia_resource_t *, intptr_t, int,
 267     cred_t *, int *);
 268 static int daplka_mw_free(daplka_ia_resource_t *, intptr_t, int,
 269     cred_t *, int *);
 270 static int daplka_mw_destroy(daplka_resource_t *);
 271 static void daplka_hash_mw_free(void *);
 272 
 273 /*
 274  * CNO ioctls and supporting functions
 275  */
 276 static int daplka_cno_alloc(daplka_ia_resource_t *, intptr_t, int,
 277     cred_t *, int *);
 278 static int daplka_cno_free(daplka_ia_resource_t *, intptr_t, int,
 279     cred_t *, int *);
 280 static int daplka_cno_wait(daplka_ia_resource_t *, intptr_t, int,
 281     cred_t *, int *);
 282 static int daplka_cno_destroy(daplka_resource_t *);
 283 static void daplka_hash_cno_free(void *);
 284 
 285 /*
 286  * CM handlers
 287  */
 288 static  ibt_cm_status_t daplka_cm_rc_handler(void *, ibt_cm_event_t *,
 289     ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
 290 
 291 static  ibt_cm_status_t daplka_cm_service_handler(void *, ibt_cm_event_t *,
 292     ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
 293 
 294 static ibt_cm_status_t daplka_cm_service_req(daplka_sp_resource_t *,
 295     ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
 296 
 297 /*
 298  * resource management routines
 299  */
 300 static int daplka_resource_reserve(minor_t *);
 301 static int daplka_resource_insert(minor_t, daplka_resource_t *);
 302 static daplka_resource_t *daplka_resource_remove(minor_t rnum);
 303 static daplka_resource_t *daplka_resource_lookup(minor_t);
 304 static void daplka_resource_init(void);
 305 static void daplka_resource_fini(void);
 306 static struct daplka_resource_table daplka_resource;
 307 
 308 /*
 309  * hash table routines
 310  */
 311 static int daplka_hash_insert(daplka_hash_table_t *, uint64_t *, void *);
 312 static int daplka_hash_remove(daplka_hash_table_t *, uint64_t, void **);
 313 static void daplka_hash_walk(daplka_hash_table_t *, int (*)(void *, void *),
 314     void *, krw_t);
 315 static void *daplka_hash_lookup(daplka_hash_table_t *, uint64_t);
 316 static int daplka_hash_create(daplka_hash_table_t *, uint_t,
 317     void (*)(void *), void (*)(void *));
 318 static void daplka_hash_destroy(daplka_hash_table_t *);
 319 static uint32_t daplka_hash_getsize(daplka_hash_table_t *);
 320 static void daplka_hash_generic_lookup(void *);
 321 
 322 static uint32_t daplka_timer_hkey_gen();
 323 
 324 /*
 325  * async event handlers
 326  */
 327 static void daplka_async_event_create(ibt_async_code_t, ibt_async_event_t *,
 328     uint64_t, daplka_ia_resource_t *);
 329 static void daplka_rc_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
 330     ibt_async_event_t *);
 331 static void daplka_cq_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
 332     ibt_async_event_t *);
 333 static void daplka_un_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
 334     ibt_async_event_t *);
 335 static void daplka_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
 336     ibt_async_event_t *);
 337 static void daplka_sm_notice_handler(void *, ib_gid_t, ibt_subnet_event_code_t,
 338     ibt_subnet_event_t *event);
 339 static void daplka_sm_gid_avail(ib_gid_t *, ib_gid_t *);
 340 
 341 /*
 342  * IBTF wrappers and default limits used for resource accounting
 343  */
 344 static boolean_t        daplka_accounting_enabled = B_TRUE;
 345 static uint32_t         daplka_max_qp_percent = 100;
 346 static uint32_t         daplka_max_cq_percent = 100;
 347 static uint32_t         daplka_max_pd_percent = 100;
 348 static uint32_t         daplka_max_mw_percent = 100;
 349 static uint32_t         daplka_max_mr_percent = 100;
 350 static uint32_t         daplka_max_srq_percent = 100;
 351 
 352 static ibt_status_t
 353 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *, ibt_hca_hdl_t,
 354     ibt_chan_alloc_flags_t, ibt_rc_chan_alloc_args_t *,
 355     ibt_channel_hdl_t *, ibt_chan_sizes_t *);
 356 
 357 static ibt_status_t
 358 daplka_ibt_free_channel(daplka_ep_resource_t *, ibt_channel_hdl_t);
 359 
 360 static ibt_status_t
 361 daplka_ibt_alloc_cq(daplka_evd_resource_t *, ibt_hca_hdl_t,
 362     ibt_cq_attr_t *, ibt_cq_hdl_t *, uint_t *);
 363 
 364 static ibt_status_t
 365 daplka_ibt_free_cq(daplka_evd_resource_t *, ibt_cq_hdl_t);
 366 
 367 static ibt_status_t
 368 daplka_ibt_alloc_pd(daplka_pd_resource_t *, ibt_hca_hdl_t,
 369     ibt_pd_flags_t, ibt_pd_hdl_t *);
 370 
 371 static ibt_status_t
 372 daplka_ibt_free_pd(daplka_pd_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t);
 373 
 374 static ibt_status_t
 375 daplka_ibt_alloc_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
 376     ibt_mw_flags_t, ibt_mw_hdl_t *, ibt_rkey_t *);
 377 
 378 static ibt_status_t
 379 daplka_ibt_free_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_mw_hdl_t);
 380 
 381 static ibt_status_t
 382 daplka_ibt_register_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
 383     ibt_mr_attr_t *, ibt_mr_hdl_t *, ibt_mr_desc_t *);
 384 
 385 static ibt_status_t
 386 daplka_ibt_register_shared_mr(daplka_mr_resource_t *, ibt_hca_hdl_t,
 387     ibt_mr_hdl_t, ibt_pd_hdl_t, ibt_smr_attr_t *, ibt_mr_hdl_t *,
 388     ibt_mr_desc_t *);
 389 
 390 static ibt_status_t
 391 daplka_ibt_deregister_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_mr_hdl_t);
 392 
 393 static ibt_status_t
 394 daplka_ibt_alloc_srq(daplka_srq_resource_t *, ibt_hca_hdl_t, ibt_srq_flags_t,
 395     ibt_pd_hdl_t, ibt_srq_sizes_t *, ibt_srq_hdl_t *, ibt_srq_sizes_t *);
 396 
 397 static ibt_status_t
 398 daplka_ibt_free_srq(daplka_srq_resource_t *, ibt_srq_hdl_t);
 399 
 400 /*
 401  * macros for manipulating resource objects.
 402  * these macros can be used on objects that begin with a
 403  * daplka_resource_t header.
 404  */
 405 #define DAPLKA_RS_REFCNT(rp) ((rp)->header.rs_refcnt)
 406 
 407 #define DAPLKA_RS_REF(rp) {                     \
 408         mutex_enter(&(rp)->header.rs_reflock);   \
 409         (rp)->header.rs_refcnt++;            \
 410         ASSERT((rp)->header.rs_refcnt != 0); \
 411         mutex_exit(&(rp)->header.rs_reflock);    \
 412 }
 413 
 414 #define DAPLKA_RS_UNREF(rp) {                                   \
 415         mutex_enter(&(rp)->header.rs_reflock);                   \
 416         ASSERT((rp)->header.rs_refcnt != 0);                 \
 417         if (--(rp)->header.rs_refcnt == 0) {                 \
 418                 ASSERT((rp)->header.rs_free != NULL);                \
 419                 mutex_exit(&(rp)->header.rs_reflock);            \
 420                 (rp)->header.rs_free((daplka_resource_t *)rp);       \
 421         } else {                                                \
 422                 mutex_exit(&(rp)->header.rs_reflock);            \
 423         }                                                       \
 424 }
 425 
 426 #define DAPLKA_RS_INIT(rp, type, rnum, free_func) {     \
 427         (rp)->header.rs_refcnt = 1;                  \
 428         (rp)->header.rs_type = (type);                       \
 429         (rp)->header.rs_rnum = (rnum);                       \
 430         (rp)->header.rs_charged = 0;                 \
 431         (rp)->header.rs_free = (free_func);          \
 432         mutex_init(&(rp)->header.rs_reflock, NULL,       \
 433             MUTEX_DRIVER, NULL);                        \
 434 }
 435 
 436 #define DAPLKA_RS_FINI(rp) {                            \
 437         mutex_destroy(&(rp)->header.rs_reflock); \
 438 }
 439 
 440 #define DAPLKA_RS_ACCT_INC(rp, cnt) {                           \
 441         atomic_add_32(&(rp)->header.rs_charged, (cnt));          \
 442 }
 443 #define DAPLKA_RS_ACCT_DEC(rp, cnt) {                           \
 444         atomic_add_32(&(rp)->header.rs_charged, -(cnt)); \
 445 }
 446 #define DAPLKA_RS_ACCT_CHARGED(rp) ((rp)->header.rs_charged)
 447 
 448 #define DAPLKA_RS_RNUM(rp) ((rp)->header.rs_rnum)
 449 #define DAPLKA_RS_TYPE(rp) ((rp)->header.rs_type)
 450 #define DAPLKA_RS_RESERVED(rp) ((intptr_t)(rp) == DAPLKA_RC_RESERVED)
 451 
 452 /*
 453  * depending on the timeout value does a cv_wait_sig or cv_timedwait_sig
 454  */
 455 #define DAPLKA_EVD_WAIT(cvp, mp, timeout)                       \
 456         ((timeout) == LONG_MAX) ? cv_wait_sig((cvp), (mp)) :    \
 457         cv_timedwait_sig((cvp), (mp), (timeout))
 458 
 459 #define DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca)       ((hca)->hca_ref_cnt++)
 460 #define DAPLKA_RELE_HCA_WITHOUT_LOCK(hca)       ((hca)->hca_ref_cnt--)
 461 
 462 #define DAPLKA_HOLD_HCA(dp, hca) {                      \
 463         mutex_enter(&(dp)->daplka_mutex);                \
 464         DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca);              \
 465         mutex_exit(&(dp)->daplka_mutex);         \
 466 }
 467 
 468 #define DAPLKA_RELE_HCA(dp, hca) {                      \
 469         mutex_enter(&(dp)->daplka_mutex);                \
 470         DAPLKA_RELE_HCA_WITHOUT_LOCK(hca);              \
 471         mutex_exit(&(dp)->daplka_mutex);         \
 472 }
 473 
 474 #define DAPLKA_HCA_BUSY(hca)                            \
 475         ((hca)->hca_ref_cnt != 0 ||                  \
 476         (hca)->hca_qp_count != 0 ||                  \
 477         (hca)->hca_cq_count != 0 ||                  \
 478         (hca)->hca_pd_count != 0 ||                  \
 479         (hca)->hca_mw_count != 0 ||                  \
 480         (hca)->hca_mr_count != 0)
 481 
 482 
 483 static struct cb_ops daplka_cb_ops = {
 484         daplka_open,            /* cb_open */
 485         daplka_close,           /* cb_close */
 486         nodev,                  /* cb_strategy */
 487         nodev,                  /* cb_print */
 488         nodev,                  /* cb_dump */
 489         nodev,                  /* cb_read */
 490         nodev,                  /* cb_write */
 491         daplka_ioctl,           /* cb_ioctl */
 492         nodev,                  /* cb_devmap */
 493         nodev,                  /* cb_mmap */
 494         nodev,                  /* cb_segmap */
 495         nochpoll,               /* cb_chpoll */
 496         ddi_prop_op,            /* cb_prop_op */
 497         NULL,                   /* cb_stream */
 498         D_NEW | D_MP,           /* cb_flag */
 499         CB_REV,                 /* rev */
 500         nodev,                  /* int (*cb_aread)() */
 501         nodev                   /* int (*cb_awrite)() */
 502 };
 503 
 504 static struct dev_ops daplka_ops = {
 505         DEVO_REV,               /* devo_rev */
 506         0,                      /* devo_refcnt */
 507         daplka_info,            /* devo_getinfo */
 508         nulldev,                /* devo_identify */
 509         nulldev,                /* devo_probe */
 510         daplka_attach,          /* devo_attach */
 511         daplka_detach,          /* devo_detach */
 512         nodev,                  /* devo_reset */
 513         &daplka_cb_ops,             /* devo_cb_ops */
 514         (struct bus_ops *)NULL, /* devo_bus_ops */
 515         nulldev,                /* power */
 516         ddi_quiesce_not_needed, /* devo_quiesce */
 517 };
 518 
 519 /*
 520  * Module linkage information for the kernel.
 521  */
 522 static struct modldrv modldrv = {
 523         &mod_driverops,
 524         "uDAPL Service Driver",
 525         &daplka_ops,
 526 };
 527 
 528 static struct modlinkage modlinkage = {
 529 #ifdef _LP64
 530         MODREV_1, { (void *) &modldrv, NULL, NULL, NULL, NULL, NULL, NULL }
 531 #else
 532         MODREV_1, { (void *) &modldrv, NULL, NULL, NULL }
 533 #endif
 534 };
 535 
 536 /*
 537  * daplka_dev holds global driver state and a list of HCAs
 538  */
 539 static daplka_t *daplka_dev = NULL;
 540 static void *daplka_state = NULL;
 541 
 542 /*
 543  * global SP hash table
 544  */
 545 static daplka_hash_table_t daplka_global_sp_htbl;
 546 
 547 /*
 548  * timer_info hash table
 549  */
 550 static daplka_hash_table_t daplka_timer_info_htbl;
 551 static uint32_t daplka_timer_hkey = 0;
 552 
 553 /*
 554  * shared MR avl tree
 555  */
 556 static avl_tree_t daplka_shared_mr_tree;
 557 static kmutex_t daplka_shared_mr_lock;
 558 static int daplka_shared_mr_cmp(const void *, const void *);
 559 
 560 /*
 561  * default kmem flags used by this driver
 562  */
 563 static int daplka_km_flags = KM_SLEEP;
 564 
 565 /*
 566  * taskq used for handling background tasks
 567  */
 568 static taskq_t *daplka_taskq = NULL;
 569 
 570 /*
 571  * daplka_cm_delay is the length of time the active
 572  * side needs to wait before timing out on the REP message.
 573  */
 574 static clock_t daplka_cm_delay = 60000000;
 575 
 576 /*
 577  * modunload will fail if pending_close is non-zero
 578  */
 579 static uint32_t daplka_pending_close = 0;
 580 
 581 static struct ibt_clnt_modinfo_s daplka_clnt_modinfo = {
 582         IBTI_V_CURR,
 583         IBT_USER,
 584         daplka_async_handler,
 585         NULL,
 586         DAPLKA_DRV_NAME
 587 };
 588 
 589 /*
 590  * Module Installation
 591  */
 592 int
 593 _init(void)
 594 {
 595         int status;
 596 
 597         status = ddi_soft_state_init(&daplka_state, sizeof (daplka_t), 1);
 598         if (status != 0) {
 599                 return (status);
 600         }
 601 
 602         mutex_init(&daplka_dbglock, NULL, MUTEX_DRIVER, NULL);
 603         bzero(daplka_dbgbuf, sizeof (daplka_dbgbuf));
 604         daplka_dbgnext = 0;
 605         daplka_dbginit = 1;
 606 
 607         daplka_resource_init();
 608 
 609         status = mod_install(&modlinkage);
 610         if (status != DDI_SUCCESS) {
 611                 /* undo inits done before mod_install */
 612                 daplka_resource_fini();
 613                 mutex_destroy(&daplka_dbglock);
 614                 ddi_soft_state_fini(&daplka_state);
 615         }
 616         return (status);
 617 }
 618 
 619 /*
 620  * Module Removal
 621  */
 622 int
 623 _fini(void)
 624 {
 625         int     status;
 626 
 627         /*
 628          * mod_remove causes detach to be called
 629          */
 630         if ((status = mod_remove(&modlinkage)) != 0) {
 631                 DERR("fini: mod_remove failed: 0x%x\n", status);
 632                 return (status);
 633         }
 634 
 635         daplka_resource_fini();
 636         mutex_destroy(&daplka_dbglock);
 637         ddi_soft_state_fini(&daplka_state);
 638 
 639         return (status);
 640 }
 641 
 642 /*
 643  * Return Module Info.
 644  */
 645 int
 646 _info(struct modinfo *modinfop)
 647 {
 648         return (mod_info(&modlinkage, modinfop));
 649 }
 650 
 651 static void
 652 daplka_enqueue_hca(daplka_t *dp, daplka_hca_t *hca)
 653 {
 654         daplka_hca_t *h;
 655 
 656         ASSERT(mutex_owned(&dp->daplka_mutex));
 657 
 658         if (dp->daplka_hca_list_head == NULL) {
 659                 dp->daplka_hca_list_head = hca;
 660         } else {
 661                 h = dp->daplka_hca_list_head;
 662                 while (h->hca_next != NULL)
 663                         h = h->hca_next;
 664 
 665                 h->hca_next = hca;
 666         }
 667 }
 668 
 669 static void
 670 daplka_dequeue_hca(daplka_t *dp, daplka_hca_t *hca)
 671 {
 672         daplka_hca_t *h;
 673 
 674         ASSERT(mutex_owned(&dp->daplka_mutex));
 675 
 676         if (dp->daplka_hca_list_head == hca)
 677                 dp->daplka_hca_list_head = hca->hca_next;
 678         else {
 679                 h = dp->daplka_hca_list_head;
 680                 while (h->hca_next != hca)
 681                         h = h->hca_next;
 682                 h->hca_next = hca->hca_next;
 683         }
 684 }
 685 
 686 static int
 687 daplka_init_hca(daplka_t *dp, ib_guid_t hca_guid)
 688 {
 689         daplka_hca_t            *hca;
 690         ibt_hca_portinfo_t      *pinfop;
 691         uint_t                  size;
 692         int                     j;
 693         ibt_status_t            status;
 694 
 695         hca = kmem_zalloc(sizeof (daplka_hca_t), KM_SLEEP);
 696 
 697         hca->hca_guid = hca_guid;
 698 
 699         /*
 700          * open the HCA for use
 701          */
 702         status = ibt_open_hca(dp->daplka_clnt_hdl, hca_guid, &hca->hca_hdl);
 703         if (status != IBT_SUCCESS) {
 704                 if (status == IBT_HCA_IN_USE) {
 705                         DERR("ibt_open_hca() returned IBT_HCA_IN_USE\n");
 706                 } else {
 707                         DERR("ibt_open_hca() returned %d\n", status);
 708                 }
 709                 kmem_free(hca, sizeof (daplka_hca_t));
 710                 return (status);
 711         }
 712 
 713         /*
 714          * query HCA to get its info
 715          */
 716         status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr);
 717         if (status != IBT_SUCCESS) {
 718                 DERR("ibt_query_hca returned %d (hca_guid 0x%llx)\n",
 719                     status, (longlong_t)hca_guid);
 720                 goto out;
 721         }
 722 
 723         /*
 724          * query HCA to get info of all ports
 725          */
 726         status = ibt_query_hca_ports(hca->hca_hdl,
 727             0, &pinfop, &hca->hca_nports, &size);
 728         if (status != IBT_SUCCESS) {
 729                 DERR("ibt_query_all_ports returned %d "
 730                     "(hca_guid 0x%llx)\n", status,
 731                     (longlong_t)hca_guid);
 732                 goto out;
 733         }
 734         hca->hca_ports = pinfop;
 735         hca->hca_pinfosz = size;
 736 
 737         DERR("hca guid 0x%llx, nports %d\n",
 738             (longlong_t)hca_guid, hca->hca_nports);
 739         for (j = 0; j < hca->hca_nports; j++) {
 740                 DERR("port %d: state %d prefix 0x%016llx "
 741                     "guid %016llx\n",
 742                     pinfop[j].p_port_num, pinfop[j].p_linkstate,
 743                     (longlong_t)pinfop[j].p_sgid_tbl[0].gid_prefix,
 744                     (longlong_t)pinfop[j].p_sgid_tbl[0].gid_guid);
 745         }
 746 
 747         mutex_enter(&dp->daplka_mutex);
 748         daplka_enqueue_hca(dp, hca);
 749         mutex_exit(&dp->daplka_mutex);
 750 
 751         return (IBT_SUCCESS);
 752 
 753 out:
 754         (void) ibt_close_hca(hca->hca_hdl);
 755         kmem_free(hca, sizeof (daplka_hca_t));
 756         return (status);
 757 }
 758 
 759 /*
 760  * this function obtains the list of HCAs from IBTF.
 761  * the HCAs are then opened and the returned handles
 762  * and attributes are stored into the global daplka_dev
 763  * structure.
 764  */
 765 static int
 766 daplka_init_hcas(daplka_t *dp)
 767 {
 768         int             i;
 769         ib_guid_t       *hca_guids;
 770         uint32_t        hca_count;
 771 
 772         /*
 773          * get the num & list of HCAs present
 774          */
 775         hca_count = ibt_get_hca_list(&hca_guids);
 776         DERR("No. of HCAs present %d\n", hca_count);
 777 
 778         if (hca_count != 0) {
 779                 /*
 780                  * get the info for each available HCA
 781                  */
 782                 for (i = 0; i < hca_count; i++)
 783                         (void) daplka_init_hca(dp, hca_guids[i]);
 784 
 785                 ibt_free_hca_list(hca_guids, hca_count);
 786         }
 787 
 788         if (dp->daplka_hca_list_head != NULL)
 789                 return (IBT_SUCCESS);
 790         else
 791                 return (IBT_FAILURE);
 792 }
 793 
 794 static int
 795 daplka_fini_hca(daplka_t *dp, daplka_hca_t *hca)
 796 {
 797         ibt_status_t    status;
 798 
 799         if (hca->hca_hdl != NULL) {
 800                 status = ibt_close_hca(hca->hca_hdl);
 801                 if (status != IBT_SUCCESS) {
 802                         DERR("ibt_close_hca returned %d"
 803                             " (hca_guid 0x%llx)\n", status,
 804                             (longlong_t)hca->hca_guid);
 805 
 806                         mutex_enter(&dp->daplka_mutex);
 807                         daplka_enqueue_hca(dp, hca);
 808                         mutex_exit(&dp->daplka_mutex);
 809 
 810                         return (status);
 811                 }
 812         }
 813 
 814         if (hca->hca_ports != NULL)
 815                 ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
 816 
 817         kmem_free(hca, sizeof (daplka_hca_t));
 818         return (IBT_SUCCESS);
 819 }
 820 
 821 /*
 822  * closes all HCAs and frees up the HCA list
 823  */
 824 static int
 825 daplka_fini_hcas(daplka_t *dp)
 826 {
 827         ibt_status_t    status;
 828         daplka_hca_t    *hca;
 829 
 830         mutex_enter(&daplka_dev->daplka_mutex);
 831         while ((hca = dp->daplka_hca_list_head) != NULL) {
 832                 if (DAPLKA_HCA_BUSY(hca)) {
 833                         mutex_exit(&daplka_dev->daplka_mutex);
 834                         return (IBT_HCA_RESOURCES_NOT_FREED);
 835                 }
 836                 daplka_dequeue_hca(daplka_dev, hca);
 837                 mutex_exit(&daplka_dev->daplka_mutex);
 838 
 839                 if ((status = daplka_fini_hca(dp, hca)) != IBT_SUCCESS)
 840                         return (status);
 841 
 842                 mutex_enter(&daplka_dev->daplka_mutex);
 843         }
 844         mutex_exit(&daplka_dev->daplka_mutex);
 845 
 846         DERR("dapl kernel agent unloaded\n");
 847         return (IBT_SUCCESS);
 848 }
 849 
 850 
 851 /*
 852  * Attach the device, create and fill in daplka_dev
 853  */
 854 static int
 855 daplka_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 856 {
 857         daplka_t        *dp;
 858         int             instance, retval, err;
 859         boolean_t       sp_htbl_allocated = B_FALSE;
 860         boolean_t       timer_htbl_allocated = B_FALSE;
 861         boolean_t       shared_mr_tree_allocated = B_FALSE;
 862 
 863         switch (cmd) {
 864         case DDI_ATTACH:
 865                 break;
 866         case DDI_RESUME:
 867                 return (DDI_SUCCESS);
 868         default:
 869                 return (DDI_FAILURE);
 870         }
 871 
 872         /*
 873          * Allocate soft data structure
 874          */
 875         instance = ddi_get_instance(dip);
 876         if (ddi_soft_state_zalloc(daplka_state, instance) != DDI_SUCCESS) {
 877                 DERR("attach: bad state zalloc\n");
 878                 return (DDI_FAILURE);
 879         }
 880 
 881         dp = ddi_get_soft_state(daplka_state, instance);
 882         if (dp == NULL) {
 883                 ddi_soft_state_free(daplka_state, instance);
 884                 DERR("attach: cannot get soft state\n");
 885                 return (DDI_FAILURE);
 886         }
 887         /*
 888          * Stuff private info into dip.
 889          */
 890         dp->daplka_dip = dip;
 891         ddi_set_driver_private(dip, dp);
 892         daplka_dev = dp;
 893         mutex_init(&dp->daplka_mutex, NULL, MUTEX_DRIVER, NULL);
 894 
 895         /*
 896          * Register driver with IBTF
 897          */
 898         retval = ibt_attach(&daplka_clnt_modinfo, dip, dp,
 899             &dp->daplka_clnt_hdl);
 900         if (retval != IBT_SUCCESS) {
 901                 DERR("attach: ibt_attach failed: error = %d\n", retval);
 902                 retval = DDI_FAILURE;
 903                 goto error;
 904         }
 905         /* Register to receive SM events */
 906         ibt_register_subnet_notices(dp->daplka_clnt_hdl,
 907             daplka_sm_notice_handler, NULL);
 908 
 909         retval = daplka_init_hcas(dp);
 910         if (retval != IBT_SUCCESS) {
 911                 DERR("attach: hca_init failed: error = %d\n", retval);
 912                 retval = DDI_FAILURE;
 913                 goto error;
 914         }
 915         /*
 916          * this table is used by cr_handoff
 917          */
 918         retval = daplka_hash_create(&daplka_global_sp_htbl,
 919             DAPLKA_G_SP_HTBL_SZ, daplka_hash_sp_unref,
 920             daplka_hash_generic_lookup);
 921         if (retval != 0) {
 922                 DERR("attach: cannot create sp hash table\n");
 923                 retval = DDI_FAILURE;
 924                 goto error;
 925         }
 926         sp_htbl_allocated = B_TRUE;
 927 
 928         /*
 929          * this table stores per EP timer information.
 930          * timer_info_t objects are inserted into this table whenever
 931          * a EP timer is set. timers get removed when they expire
 932          * or when they get cancelled.
 933          */
 934         retval = daplka_hash_create(&daplka_timer_info_htbl,
 935             DAPLKA_TIMER_HTBL_SZ, daplka_hash_timer_free, NULL);
 936         if (retval != 0) {
 937                 DERR("attach: cannot create timer hash table\n");
 938                 retval = DDI_FAILURE;
 939                 goto error;
 940         }
 941         timer_htbl_allocated = B_TRUE;
 942 
 943         /*
 944          * this taskq is currently only used for processing timers.
 945          * other processing may also use this taskq in the future.
 946          */
 947         daplka_taskq = taskq_create(DAPLKA_DRV_NAME, DAPLKA_TQ_NTHREADS,
 948             maxclsyspri, 1, DAPLKA_TQ_NTHREADS, TASKQ_DYNAMIC);
 949         if (daplka_taskq == NULL) {
 950                 DERR("attach: cannot create daplka_taskq\n");
 951                 retval = DDI_FAILURE;
 952                 goto error;
 953         }
 954 
 955         /*
 956          * daplka_shared_mr_tree holds daplka_shared_mr_t objects that
 957          * gets retrieved or created when daplka_mr_register_shared is
 958          * called.
 959          */
 960         mutex_init(&daplka_shared_mr_lock, NULL, MUTEX_DRIVER, NULL);
 961 
 962         avl_create(&daplka_shared_mr_tree, daplka_shared_mr_cmp,
 963             sizeof (daplka_shared_mr_t),
 964             offsetof(daplka_shared_mr_t, smr_node));
 965         shared_mr_tree_allocated = B_TRUE;
 966 
 967         /*
 968          * Create the filesystem device node.
 969          */
 970         if (ddi_create_minor_node(dip, DAPLKA_MINOR_NAME, S_IFCHR,
 971             0, DDI_PSEUDO, NULL) != DDI_SUCCESS) {
 972                 DERR("attach: bad create_minor_node\n");
 973                 retval = DDI_FAILURE;
 974                 goto error;
 975         }
 976         dp->daplka_status = DAPLKA_STATE_ATTACHED;
 977         ddi_report_dev(dip);
 978         return (DDI_SUCCESS);
 979 
 980 error:
 981         if (shared_mr_tree_allocated) {
 982                 avl_destroy(&daplka_shared_mr_tree);
 983                 mutex_destroy(&daplka_shared_mr_lock);
 984         }
 985 
 986         if (daplka_taskq) {
 987                 taskq_destroy(daplka_taskq);
 988                 daplka_taskq = NULL;
 989         }
 990 
 991         if (timer_htbl_allocated) {
 992                 daplka_hash_destroy(&daplka_timer_info_htbl);
 993         }
 994 
 995         if (sp_htbl_allocated) {
 996                 daplka_hash_destroy(&daplka_global_sp_htbl);
 997         }
 998 
 999         err = daplka_fini_hcas(dp);
1000         if (err != IBT_SUCCESS) {
1001                 DERR("attach: hca_fini returned %d\n", err);
1002         }
1003 
1004         if (dp->daplka_clnt_hdl != NULL) {
1005                 /* unregister SM event notification */
1006                 ibt_register_subnet_notices(dp->daplka_clnt_hdl,
1007                     (ibt_sm_notice_handler_t)NULL, NULL);
1008                 err = ibt_detach(dp->daplka_clnt_hdl);
1009 
1010                 if (err != IBT_SUCCESS) {
1011                         DERR("attach: ibt_detach returned %d\n", err);
1012                 }
1013         }
1014         mutex_destroy(&dp->daplka_mutex);
1015 
1016         if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
1017                 ddi_remove_minor_node(dip, NULL);
1018         }
1019         ddi_soft_state_free(daplka_state, instance);
1020         return (retval);
1021 }
1022 
1023 /*
1024  * Detach - Free resources allocated in attach
1025  */
1026 /* ARGSUSED */
1027 static int
1028 daplka_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1029 {
1030         int             instance, err;
1031         void            *cookie = NULL;
1032         daplka_t        *dp;
1033 
1034         if (cmd != DDI_DETACH) {
1035                 return (DDI_FAILURE);
1036         }
1037         if (daplka_resource.daplka_rc_cnt > 0 ||
1038             daplka_pending_close > 0) {
1039                 DERR("detach: driver in use\n");
1040                 return (DDI_FAILURE);
1041         }
1042 
1043         instance = ddi_get_instance(dip);
1044         dp = ddi_get_soft_state(daplka_state, instance);
1045         if (dp == NULL) {
1046                 DERR("detach: cannot get soft state\n");
1047                 return (DDI_FAILURE);
1048         }
1049         err = daplka_fini_hcas(dp);
1050         if (err != IBT_SUCCESS) {
1051                 DERR("detach: hca_fini returned %d\n", err);
1052                 return (DDI_FAILURE);
1053         }
1054         if (dp->daplka_clnt_hdl != NULL) {
1055                 /* unregister SM event notification */
1056                 ibt_register_subnet_notices(dp->daplka_clnt_hdl,
1057                     (ibt_sm_notice_handler_t)NULL, NULL);
1058                 err = ibt_detach(dp->daplka_clnt_hdl);
1059                 if (err != IBT_SUCCESS) {
1060                         DERR("detach: ibt_detach returned %d\n", err);
1061                         return (DDI_FAILURE);
1062                 }
1063                 dp->daplka_clnt_hdl = NULL;
1064         }
1065         mutex_destroy(&dp->daplka_mutex);
1066         if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
1067                 ddi_remove_minor_node(dip, NULL);
1068         }
1069         dp->daplka_status = DAPLKA_STATE_DETACHED;
1070         ddi_soft_state_free(daplka_state, instance);
1071         daplka_dev = NULL;
1072 
1073         /*
1074          * by the time we get here, all clients of dapl should
1075          * have exited and completed their cleanup properly.
1076          * we can assert that all global data structures are now
1077          * empty.
1078          */
1079         ASSERT(avl_destroy_nodes(&daplka_shared_mr_tree, &cookie) == NULL);
1080         avl_destroy(&daplka_shared_mr_tree);
1081         mutex_destroy(&daplka_shared_mr_lock);
1082 
1083         ASSERT(daplka_hash_getsize(&daplka_timer_info_htbl) == 0);
1084         daplka_hash_destroy(&daplka_timer_info_htbl);
1085 
1086         ASSERT(daplka_hash_getsize(&daplka_global_sp_htbl) == 0);
1087         daplka_hash_destroy(&daplka_global_sp_htbl);
1088 
1089         taskq_destroy(daplka_taskq);
1090 
1091         return (DDI_SUCCESS);
1092 }
1093 
1094 /* ARGSUSED */
1095 static int
1096 daplka_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1097 {
1098         switch (infocmd) {
1099         case DDI_INFO_DEVT2DEVINFO:
1100                 if (daplka_dev !=  NULL) {
1101                         *result = daplka_dev->daplka_dip;
1102                         return (DDI_SUCCESS);
1103                 } else {
1104                         return (DDI_FAILURE);
1105                 }
1106 
1107         case DDI_INFO_DEVT2INSTANCE:
1108                 *result = 0;
1109                 return (DDI_SUCCESS);
1110 
1111         default:
1112                 return (DDI_FAILURE);
1113         }
1114 }
1115 
1116 /*
1117  * creates a EP resource.
1118  * A EP resource contains a RC channel. A EP resource holds a
1119  * reference to a send_evd (for the send CQ), recv_evd (for the
1120  * recv CQ), a connection evd and a PD. These references ensure
1121  * that the referenced resources are not freed until the EP itself
1122  * gets freed.
1123  */
1124 /* ARGSUSED */
1125 static int
1126 daplka_ep_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1127         cred_t *cred, int *rvalp)
1128 {
1129         daplka_ep_resource_t            *ep_rp;
1130         daplka_pd_resource_t            *pd_rp;
1131         dapl_ep_create_t                args;
1132         ibt_rc_chan_alloc_args_t        chan_args;
1133         ibt_chan_alloc_flags_t          achan_flags;
1134         ibt_chan_sizes_t                chan_real_sizes;
1135         ibt_hca_attr_t                  *hca_attrp;
1136         uint64_t                        ep_hkey = 0;
1137         boolean_t                       inserted = B_FALSE;
1138         uint32_t                        old_state, new_state;
1139         int                             retval;
1140         ibt_status_t                    status;
1141 
1142         D3("ep_create: enter\n");
1143         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_create_t),
1144             mode);
1145         if (retval != 0) {
1146                 DERR("ep_create: copyin error %d\n", retval);
1147                 return (EFAULT);
1148         }
1149         ep_rp = kmem_zalloc(sizeof (daplka_ep_resource_t), daplka_km_flags);
1150         if (ep_rp == NULL) {
1151                 DERR("ep_create: cannot allocate ep_rp\n");
1152                 return (ENOMEM);
1153         }
1154         DAPLKA_RS_INIT(ep_rp, DAPL_TYPE_EP,
1155             DAPLKA_RS_RNUM(ia_rp), daplka_ep_destroy);
1156 
1157         mutex_init(&ep_rp->ep_lock, NULL, MUTEX_DRIVER, NULL);
1158         cv_init(&ep_rp->ep_cv, NULL, CV_DRIVER, NULL);
1159         ep_rp->ep_hca = ia_rp->ia_hca;
1160         ep_rp->ep_cookie = args.ep_cookie;
1161         ep_rp->ep_timer_hkey = 0;
1162 
1163         /*
1164          * we don't have to use ep_get_state here because ep_rp is not in
1165          * ep_htbl yet. refer to the description of daplka_ep_set_state
1166          * for details about the EP state machine.
1167          */
1168         ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
1169         new_state = old_state = DAPLKA_EP_STATE_CLOSED;
1170 
1171         /* get reference to send evd and get cq handle */
1172         ep_rp->ep_snd_evd = (daplka_evd_resource_t *)
1173             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_snd_evd_hkey);
1174         if (ep_rp->ep_snd_evd == NULL) {
1175                 DERR("ep_create: ep_snd_evd %llx not found\n",
1176                     args.ep_snd_evd_hkey);
1177                 retval = EINVAL;
1178                 goto cleanup;
1179         }
1180         chan_args.rc_scq = ep_rp->ep_snd_evd->evd_cq_hdl;
1181         if (chan_args.rc_scq == NULL) {
1182                 DERR("ep_create: ep_snd_evd cq invalid\n");
1183                 retval = EINVAL;
1184                 goto cleanup;
1185         }
1186 
1187         /* get reference to recv evd and get cq handle */
1188         ep_rp->ep_rcv_evd = (daplka_evd_resource_t *)
1189             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_rcv_evd_hkey);
1190         if (ep_rp->ep_rcv_evd == NULL) {
1191                 DERR("ep_create: ep_rcv_evd %llx not found\n",
1192                     args.ep_rcv_evd_hkey);
1193                 retval = EINVAL;
1194                 goto cleanup;
1195         }
1196         chan_args.rc_rcq = ep_rp->ep_rcv_evd->evd_cq_hdl;
1197         if (chan_args.rc_rcq == NULL) {
1198                 DERR("ep_create: ep_rcv_evd cq invalid\n");
1199                 retval = EINVAL;
1200                 goto cleanup;
1201         }
1202 
1203         /* get reference to conn evd */
1204         ep_rp->ep_conn_evd = (daplka_evd_resource_t *)
1205             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_conn_evd_hkey);
1206         if (ep_rp->ep_conn_evd == NULL) {
1207                 DERR("ep_create: ep_conn_evd %llx not found\n",
1208                     args.ep_conn_evd_hkey);
1209                 retval = EINVAL;
1210                 goto cleanup;
1211         }
1212 
1213         /* get reference to SRQ if needed */
1214         if (args.ep_srq_attached) {
1215                 ep_rp->ep_srq_res = (daplka_srq_resource_t *)daplka_hash_lookup(
1216                     &ia_rp->ia_srq_htbl, args.ep_srq_hkey);
1217                 if (ep_rp->ep_srq_res == NULL) {
1218                         DERR("ep_create: ep_srq %llx not found\n",
1219                             (longlong_t)args.ep_srq_hkey);
1220                         retval = EINVAL;
1221                         goto cleanup;
1222                 }
1223                 ASSERT(DAPLKA_RS_TYPE(ep_rp->ep_srq_res) == DAPL_TYPE_SRQ);
1224                 D3("ep_create: ep_srq %p %llx\n", ep_rp->ep_srq_res,
1225                     (longlong_t)args.ep_srq_hkey);
1226         } else {
1227                 ep_rp->ep_srq_res = NULL;
1228         }
1229 
1230         /* get pd handle */
1231         pd_rp = (daplka_pd_resource_t *)
1232             daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.ep_pd_hkey);
1233         if (pd_rp == NULL) {
1234                 DERR("ep_create: cannot find pd resource\n");
1235                 retval = EINVAL;
1236                 goto cleanup;
1237         }
1238         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
1239         ep_rp->ep_pd_res = pd_rp;
1240         chan_args.rc_pd = pd_rp->pd_hdl;
1241 
1242 
1243         /*
1244          * these checks ensure that the requested channel sizes
1245          * are within the limits supported by the chosen HCA.
1246          */
1247         hca_attrp = &ia_rp->ia_hca->hca_attr;
1248         if (args.ep_ch_sizes.dcs_sq_sgl > hca_attrp->hca_max_sgl) {
1249                 DERR("ep_create: invalid cs_sq_sgl %d\n",
1250                     args.ep_ch_sizes.dcs_sq_sgl);
1251                 retval = EINVAL;
1252                 goto cleanup;
1253         }
1254         if (args.ep_ch_sizes.dcs_rq_sgl > hca_attrp->hca_max_sgl) {
1255                 DERR("ep_create: invalid cs_rq_sgl %d\n",
1256                     args.ep_ch_sizes.dcs_rq_sgl);
1257                 retval = EINVAL;
1258                 goto cleanup;
1259         }
1260         if (args.ep_ch_sizes.dcs_sq > hca_attrp->hca_max_chan_sz) {
1261                 DERR("ep_create: invalid cs_sq %d\n",
1262                     args.ep_ch_sizes.dcs_sq);
1263                 retval = EINVAL;
1264                 goto cleanup;
1265         }
1266         if (args.ep_ch_sizes.dcs_rq > hca_attrp->hca_max_chan_sz) {
1267                 DERR("ep_create: invalid cs_rq %d\n",
1268                     args.ep_ch_sizes.dcs_rq);
1269                 retval = EINVAL;
1270                 goto cleanup;
1271         }
1272 
1273         chan_args.rc_sizes.cs_sq_sgl = args.ep_ch_sizes.dcs_sq_sgl;
1274         chan_args.rc_sizes.cs_rq_sgl = args.ep_ch_sizes.dcs_rq_sgl;
1275         chan_args.rc_sizes.cs_sq = args.ep_ch_sizes.dcs_sq;
1276         chan_args.rc_sizes.cs_rq = args.ep_ch_sizes.dcs_rq;
1277         chan_args.rc_flags = IBT_WR_SIGNALED;
1278         chan_args.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
1279         chan_args.rc_hca_port_num = ia_rp->ia_port_num;
1280         chan_args.rc_clone_chan = NULL;
1281         if (args.ep_srq_attached) {
1282                 chan_args.rc_srq = ep_rp->ep_srq_res->srq_hdl;
1283         } else {
1284                 chan_args.rc_srq = NULL;
1285         }
1286 
1287         D3("ep_create: sq_sgl %d, rq_sgl %d, sq %d, rq %d, "
1288             "sig_type 0x%x, control 0x%x, portnum %d, clone_chan 0x%p\n",
1289             args.ep_ch_sizes.dcs_sq_sgl, args.ep_ch_sizes.dcs_rq_sgl,
1290             args.ep_ch_sizes.dcs_sq, args.ep_ch_sizes.dcs_rq,
1291             chan_args.rc_flags, chan_args.rc_control,
1292             chan_args.rc_hca_port_num, chan_args.rc_clone_chan);
1293 
1294         if (args.ep_srq_attached) {
1295                 achan_flags = IBT_ACHAN_USER_MAP | IBT_ACHAN_USES_SRQ;
1296         } else {
1297                 achan_flags = IBT_ACHAN_USER_MAP;
1298         }
1299         /* create rc channel */
1300         status = daplka_ibt_alloc_rc_channel(ep_rp, ia_rp->ia_hca_hdl,
1301             achan_flags, &chan_args, &ep_rp->ep_chan_hdl,
1302             &chan_real_sizes);
1303         if (status != IBT_SUCCESS) {
1304                 DERR("ep_create: alloc_rc_channel returned %d\n", status);
1305                 *rvalp = (int)status;
1306                 retval = 0;
1307                 goto cleanup;
1308         }
1309 
1310         args.ep_ch_real_sizes.dcs_sq = chan_real_sizes.cs_sq;
1311         args.ep_ch_real_sizes.dcs_rq = chan_real_sizes.cs_rq;
1312         args.ep_ch_real_sizes.dcs_sq_sgl = chan_real_sizes.cs_sq_sgl;
1313         args.ep_ch_real_sizes.dcs_rq_sgl = chan_real_sizes.cs_rq_sgl;
1314 
1315         /*
1316          * store ep ptr with chan_hdl.
1317          * this ep_ptr is used by the CM handlers (both active and
1318          * passive)
1319          * mutex is only needed for race of "destroy" and "async"
1320          */
1321         mutex_enter(&daplka_dev->daplka_mutex);
1322         ibt_set_chan_private(ep_rp->ep_chan_hdl, (void *)ep_rp);
1323         mutex_exit(&daplka_dev->daplka_mutex);
1324 
1325         /* Get HCA-specific data_out info */
1326         status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
1327             IBT_CI_NO_FLAGS, IBT_HDL_CHANNEL, (void *)ep_rp->ep_chan_hdl,
1328             &args.ep_qp_data_out, sizeof (args.ep_qp_data_out));
1329 
1330         if (status != IBT_SUCCESS) {
1331                 DERR("ep_create: ibt_ci_data_out error(%d)\n",
1332                     status);
1333                 *rvalp = (int)status;
1334                 retval = 0;
1335                 goto cleanup;
1336         }
1337 
1338         /* insert into ep hash table */
1339         retval = daplka_hash_insert(&ia_rp->ia_ep_htbl,
1340             &ep_hkey, (void *)ep_rp);
1341         if (retval != 0) {
1342                 DERR("ep_create: cannot insert ep resource into ep_htbl\n");
1343                 goto cleanup;
1344         }
1345         inserted = B_TRUE;
1346 
1347         /*
1348          * at this point, the ep_rp can be looked up by other threads
1349          * if they manage to guess the correct hkey. but they are not
1350          * permitted to operate on ep_rp until we transition to the
1351          * CLOSED state.
1352          */
1353 
1354         /* return hkey to library */
1355         args.ep_hkey = ep_hkey;
1356 
1357         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ep_create_t),
1358             mode);
1359         if (retval != 0) {
1360                 DERR("ep_create: copyout error %d\n", retval);
1361                 retval = EFAULT;
1362                 goto cleanup;
1363         }
1364 
1365         daplka_ep_set_state(ep_rp, old_state, new_state);
1366         D3("ep_create: exit\n");
1367         return (0);
1368 
1369 cleanup:
1370         if (inserted) {
1371                 daplka_ep_resource_t *free_rp = NULL;
1372 
1373                 (void) daplka_hash_remove(&ia_rp->ia_ep_htbl, ep_hkey,
1374                     (void **)&free_rp);
1375                 if (free_rp != ep_rp) {
1376                         /*
1377                          * this case is impossible because ep_free will
1378                          * wait until our state transition is complete.
1379                          */
1380                         DERR("ep_create: cannot remove ep from hash table\n");
1381                         ASSERT(B_FALSE);
1382                         return (retval);
1383                 }
1384         }
1385         new_state = DAPLKA_EP_STATE_FREED;
1386         daplka_ep_set_state(ep_rp, old_state, new_state);
1387         DAPLKA_RS_UNREF(ep_rp);
1388         return (retval);
1389 }
1390 
1391 /*
1392  * daplka_ep_get_state retrieves the current state of the EP and
1393  * sets the state to TRANSITIONING. if the current state is already
1394  * TRANSITIONING, this function will wait until the state becomes one
1395  * of the other EP states. Most of the EP related ioctls follow the
1396  * call sequence:
1397  *
1398  *      new_state = old_state = daplka_ep_get_state(ep_rp);
1399  *      ...
1400  *      ...some code that affects the EP
1401  *      ...
1402  *      new_state = <NEW_STATE>;
1403  *      daplka_ep_set_state(ep_rp, old_state, new_state);
1404  *
1405  * this call sequence ensures that only one thread may access the EP
1406  * during the time ep_state is in TRANSITIONING. daplka_ep_set_state
1407  * transitions ep_state to new_state and wakes up any waiters blocking
1408  * on ep_cv.
1409  *
1410  */
1411 static uint32_t
1412 daplka_ep_get_state(daplka_ep_resource_t *ep_rp)
1413 {
1414         uint32_t        old_state = 0;
1415 
1416         mutex_enter(&ep_rp->ep_lock);
1417         while (ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING) {
1418                 D2("get_state: wait for state transition to complete\n");
1419                 cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
1420                 D2("get_state: done, curr state = %d\n", ep_rp->ep_state);
1421         }
1422         ASSERT(ep_rp->ep_state != DAPLKA_EP_STATE_TRANSITIONING);
1423         old_state = ep_rp->ep_state;
1424 
1425         /*
1426          * an ep that is in the FREED state cannot transition
1427          * back to any of the regular states
1428          */
1429         if (old_state != DAPLKA_EP_STATE_FREED) {
1430                 ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
1431         }
1432         mutex_exit(&ep_rp->ep_lock);
1433         return (old_state);
1434 }
1435 
1436 /*
1437  * EP state transition diagram
1438  *
1439  *              CLOSED<-------------------
1440  *                |                      |
1441  *                |                      |
1442  *     ------------------------          |
1443  *     |                      |          |
1444  *     |                      |          |
1445  *     v                      v          |
1446  *   CONNECTING       ACCEPTING          |
1447  *     |  |   |       |       |          |
1448  *     |  |   |       |       |          |
1449  *     |  |   |       |       |          |
1450  *     |  |   |_______|_______|          |
1451  *     |  |           |   |   |          |
1452  *     |  |___________|   |   |          |
1453  *     |        |         |   |          |
1454  *     |        v         |   |---->DISCONNECTED
1455  *     |     CONNECTED    |              ^
1456  *     v        |         |              |
1457  *    ABORTING  |---------|--------------|
1458  *     |        |         |              |
1459  *     |        |         v              |
1460  *     |        |-------->DISCONNECTING--|
1461  *     |                                 |
1462  *     |---------------------------------|
1463  *
1464  *      *not shown in this diagram:
1465  *          -loopback transitions
1466  *          -transitions to the FREED state
1467  */
1468 static boolean_t
1469 daplka_ep_transition_is_valid(uint32_t old_state, uint32_t new_state)
1470 {
1471         boolean_t valid = B_FALSE;
1472 
1473         /*
1474          * reseting to the same state is a no-op and is always
1475          * permitted. transitioning to the FREED state indicates
1476          * that the ep is about to be freed and no further operation
1477          * is allowed on it. to support abrupt close, the ep is
1478          * permitted to transition to the FREED state from any state.
1479          */
1480         if (old_state == new_state ||
1481             new_state == DAPLKA_EP_STATE_FREED) {
1482                 return (B_TRUE);
1483         }
1484 
1485         switch (old_state) {
1486         case DAPLKA_EP_STATE_CLOSED:
1487                 /*
1488                  * this is the initial ep_state.
1489                  * a transition to CONNECTING or ACCEPTING may occur
1490                  * upon calling daplka_ep_connect or daplka_cr_accept,
1491                  * respectively.
1492                  */
1493                 if (new_state == DAPLKA_EP_STATE_CONNECTING ||
1494                     new_state == DAPLKA_EP_STATE_ACCEPTING) {
1495                         valid = B_TRUE;
1496                 }
1497                 break;
1498         case DAPLKA_EP_STATE_CONNECTING:
1499                 /*
1500                  * we transition to this state if daplka_ep_connect
1501                  * is successful. from this state, we can transition
1502                  * to CONNECTED if daplka_cm_rc_conn_est gets called;
1503                  * or to DISCONNECTED if daplka_cm_rc_conn_closed or
1504                  * daplka_cm_rc_event_failure gets called. If the
1505                  * client calls daplka_ep_disconnect, we transition
1506                  * to DISCONNECTING. If a timer was set at ep_connect
1507                  * time and if the timer expires prior to any of the
1508                  * CM callbacks, we transition to ABORTING and then
1509                  * to DISCONNECTED.
1510                  */
1511                 if (new_state == DAPLKA_EP_STATE_CONNECTED ||
1512                     new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1513                     new_state == DAPLKA_EP_STATE_DISCONNECTED ||
1514                     new_state == DAPLKA_EP_STATE_ABORTING) {
1515                         valid = B_TRUE;
1516                 }
1517                 break;
1518         case DAPLKA_EP_STATE_ACCEPTING:
1519                 /*
1520                  * we transition to this state if daplka_cr_accept
1521                  * is successful. from this state, we can transition
1522                  * to CONNECTED if daplka_cm_service_conn_est gets called;
1523                  * or to DISCONNECTED if daplka_cm_service_conn_closed or
1524                  * daplka_cm_service_event_failure gets called. If the
1525                  * client calls daplka_ep_disconnect, we transition to
1526                  * DISCONNECTING.
1527                  */
1528                 if (new_state == DAPLKA_EP_STATE_CONNECTED ||
1529                     new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1530                     new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1531                         valid = B_TRUE;
1532                 }
1533                 break;
1534         case DAPLKA_EP_STATE_CONNECTED:
1535                 /*
1536                  * we transition to this state if a active or passive
1537                  * connection gets established. if the client calls
1538                  * daplka_ep_disconnect, we transition to the
1539                  * DISCONNECTING state. subsequent CM callbacks will
1540                  * cause ep_state to be set to DISCONNECTED. If the
1541                  * remote peer terminates the connection before we do,
1542                  * it is possible for us to transition directly from
1543                  * CONNECTED to DISCONNECTED.
1544                  */
1545                 if (new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1546                     new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1547                         valid = B_TRUE;
1548                 }
1549                 break;
1550         case DAPLKA_EP_STATE_DISCONNECTING:
1551                 /*
1552                  * we transition to this state if the client calls
1553                  * daplka_ep_disconnect.
1554                  */
1555                 if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1556                         valid = B_TRUE;
1557                 }
1558                 break;
1559         case DAPLKA_EP_STATE_ABORTING:
1560                 /*
1561                  * we transition to this state if the active side
1562                  * EP timer has expired. this is only a transient
1563                  * state that is set during timer processing. when
1564                  * timer processing completes, ep_state will become
1565                  * DISCONNECTED.
1566                  */
1567                 if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1568                         valid = B_TRUE;
1569                 }
1570                 break;
1571         case DAPLKA_EP_STATE_DISCONNECTED:
1572                 /*
1573                  * we transition to this state if we get a closed
1574                  * or event_failure CM callback. an expired timer
1575                  * can also cause us to be in this state. this
1576                  * is the only state in which we permit the
1577                  * ep_reinit operation.
1578                  */
1579                 if (new_state == DAPLKA_EP_STATE_CLOSED) {
1580                         valid = B_TRUE;
1581                 }
1582                 break;
1583         default:
1584                 break;
1585         }
1586 
1587         if (!valid) {
1588                 DERR("ep_transition: invalid state change %d -> %d\n",
1589                     old_state, new_state);
1590         }
1591         return (valid);
1592 }
1593 
1594 /*
1595  * first check if the transition is valid. then set ep_state
1596  * to new_state and wake up all waiters.
1597  */
1598 static void
1599 daplka_ep_set_state(daplka_ep_resource_t *ep_rp, uint32_t old_state,
1600         uint32_t new_state)
1601 {
1602         boolean_t       valid;
1603 
1604         ASSERT(new_state != DAPLKA_EP_STATE_TRANSITIONING);
1605 
1606         valid = daplka_ep_transition_is_valid(old_state, new_state);
1607         mutex_enter(&ep_rp->ep_lock);
1608         if (ep_rp->ep_state != DAPLKA_EP_STATE_FREED) {
1609                 if (valid) {
1610                         ep_rp->ep_state = new_state;
1611                 } else {
1612                         /*
1613                          * this case is impossible.
1614                          * we have a serious problem if we get here.
1615                          * instead of panicing, we reset the state to
1616                          * old_state. doing this would at least prevent
1617                          * threads from hanging due to ep_state being
1618                          * stuck in TRANSITIONING.
1619                          */
1620                         ep_rp->ep_state = old_state;
1621                         ASSERT(B_FALSE);
1622                 }
1623         }
1624         cv_broadcast(&ep_rp->ep_cv);
1625         mutex_exit(&ep_rp->ep_lock);
1626 }
1627 
1628 /*
1629  * modifies RC channel attributes.
1630  * currently, only the rdma_in and rdma_out attributes may
1631  * be modified. the channel must be in quiescent state when
1632  * this function is called.
1633  */
1634 /* ARGSUSED */
1635 static int
1636 daplka_ep_modify(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1637         cred_t *cred, int *rvalp)
1638 {
1639         daplka_ep_resource_t            *ep_rp = NULL;
1640         ibt_cep_modify_flags_t          good_flags;
1641         ibt_rc_chan_modify_attr_t       rcm_attr;
1642         ibt_hca_attr_t                  *hca_attrp;
1643         dapl_ep_modify_t                args;
1644         ibt_status_t                    status;
1645         uint32_t                        old_state, new_state;
1646         int                             retval = 0;
1647 
1648         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_modify_t),
1649             mode);
1650         if (retval != 0) {
1651                 DERR("ep_modify: copyin error %d\n", retval);
1652                 return (EFAULT);
1653         }
1654         ep_rp = (daplka_ep_resource_t *)
1655             daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epm_hkey);
1656         if (ep_rp == NULL) {
1657                 DERR("ep_modify: cannot find ep resource\n");
1658                 return (EINVAL);
1659         }
1660         ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
1661         new_state = old_state = daplka_ep_get_state(ep_rp);
1662 
1663         if (old_state != DAPLKA_EP_STATE_CLOSED &&
1664             old_state != DAPLKA_EP_STATE_DISCONNECTED) {
1665                 DERR("ep_modify: invalid state %d\n", old_state);
1666                 retval = EINVAL;
1667                 goto cleanup;
1668         }
1669 
1670         good_flags = IBT_CEP_SET_RDMARA_OUT | IBT_CEP_SET_RDMARA_IN;
1671         if ((args.epm_flags & ~good_flags) != 0) {
1672                 DERR("ep_modify: invalid flags 0x%x\n", args.epm_flags);
1673                 retval = EINVAL;
1674                 goto cleanup;
1675         }
1676 
1677         hca_attrp = &ia_rp->ia_hca->hca_attr;
1678 
1679         bzero(&rcm_attr, sizeof (ibt_rc_chan_modify_attr_t));
1680         if ((args.epm_flags & IBT_CEP_SET_RDMARA_OUT) != 0) {
1681                 if (args.epm_rdma_ra_out > hca_attrp->hca_max_rdma_out_chan) {
1682                         DERR("ep_modify: invalid epm_rdma_ra_out %d\n",
1683                             args.epm_rdma_ra_out);
1684                         retval = EINVAL;
1685                         goto cleanup;
1686                 }
1687                 rcm_attr.rc_rdma_ra_out = args.epm_rdma_ra_out;
1688         }
1689         if ((args.epm_flags & IBT_CEP_SET_RDMARA_IN) != 0) {
1690                 if (args.epm_rdma_ra_in > hca_attrp->hca_max_rdma_in_chan) {
1691                         DERR("ep_modify: epm_rdma_ra_in %d\n",
1692                             args.epm_rdma_ra_in);
1693                         retval = EINVAL;
1694                         goto cleanup;
1695                 }
1696                 rcm_attr.rc_rdma_ra_in = args.epm_rdma_ra_in;
1697         }
1698         status = ibt_modify_rc_channel(ep_rp->ep_chan_hdl, args.epm_flags,
1699             &rcm_attr, NULL);
1700         if (status != IBT_SUCCESS) {
1701                 DERR("ep_modify: modify_rc_channel returned %d\n", status);
1702                 *rvalp = (int)status;
1703                 retval = 0;
1704                 goto cleanup;
1705         }
1706 
1707         /*
1708          * ep_modify does not change ep_state
1709          */
1710 cleanup:;
1711         daplka_ep_set_state(ep_rp, old_state, new_state);
1712         DAPLKA_RS_UNREF(ep_rp);
1713         return (retval);
1714 }
1715 
1716 /*
1717  * Frees a EP resource.
1718  * a EP may only be freed when it is in the CLOSED or
1719  * DISCONNECTED state.
1720  */
1721 /* ARGSUSED */
1722 static int
1723 daplka_ep_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1724         cred_t *cred, int *rvalp)
1725 {
1726         daplka_ep_resource_t    *ep_rp = NULL;
1727         dapl_ep_free_t          args;
1728         uint32_t                old_state, new_state;
1729         int                     retval;
1730 
1731         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_free_t), mode);
1732         if (retval != 0) {
1733                 DERR("ep_free: copyin error %d\n", retval);
1734                 return (EFAULT);
1735         }
1736         ep_rp = (daplka_ep_resource_t *)
1737             daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epf_hkey);
1738         if (ep_rp == NULL) {
1739                 DERR("ep_free: cannot find ep resource\n");
1740                 return (EINVAL);
1741         }
1742         ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
1743         new_state = old_state = daplka_ep_get_state(ep_rp);
1744 
1745         /*
1746          * ep cannot be freed if it is in an invalid state.
1747          */
1748         if (old_state != DAPLKA_EP_STATE_CLOSED &&
1749             old_state != DAPLKA_EP_STATE_DISCONNECTED) {
1750                 DERR("ep_free: invalid state %d\n", old_state);
1751                 retval = EINVAL;
1752                 goto cleanup;
1753         }
1754         ep_rp = NULL;
1755         retval = daplka_hash_remove(&ia_rp->ia_ep_htbl,
1756             args.epf_hkey, (void **)&ep_rp);
1757         if (retval != 0 || ep_rp == NULL) {
1758                 /*
1759                  * this is only possible if we have two threads
1760                  * calling ep_free in parallel.
1761                  */
1762                 DERR("ep_free: cannot find ep resource\n");
1763                 goto cleanup;
1764         }
1765         /* there should not be any outstanding timers */
1766         ASSERT(ep_rp->ep_timer_hkey == 0);
1767 
1768         new_state = DAPLKA_EP_STATE_FREED;
1769         daplka_ep_set_state(ep_rp, old_state, new_state);
1770 
1771         /* remove reference obtained by lookup */
1772         DAPLKA_RS_UNREF(ep_rp);
1773 
1774         /* UNREF calls the actual free function when refcnt is zero */
1775         DAPLKA_RS_UNREF(ep_rp);
1776         return (0);
1777 
1778 cleanup:;
1779         daplka_ep_set_state(ep_rp, old_state, new_state);
1780 
1781         /* remove reference obtained by lookup */
1782         DAPLKA_RS_UNREF(ep_rp);
1783         return (retval);
1784 }
1785 
1786 /*
1787  * The following routines supports the timeout feature of ep_connect.
1788  * Refer to the description of ep_connect for details.
1789  */
1790 
1791 /*
1792  * this is the timer processing thread.
1793  */
1794 static void
1795 daplka_timer_thread(void *arg)
1796 {
1797         daplka_timer_info_t     *timerp = (daplka_timer_info_t *)arg;
1798         daplka_ep_resource_t    *ep_rp;
1799         daplka_evd_event_t      *disc_ev = NULL;
1800         ibt_status_t            status;
1801         int                     old_state, new_state;
1802 
1803         ep_rp = timerp->ti_ep_res;
1804         ASSERT(ep_rp != NULL);
1805         ASSERT(timerp->ti_tmo_id != 0);
1806         timerp->ti_tmo_id = 0;
1807 
1808         new_state = old_state = daplka_ep_get_state(ep_rp);
1809         if (old_state != DAPLKA_EP_STATE_CONNECTING) {
1810                 /* unblock hash_ep_free */
1811                 mutex_enter(&ep_rp->ep_lock);
1812                 ASSERT(ep_rp->ep_timer_hkey != 0);
1813                 ep_rp->ep_timer_hkey = 0;
1814                 cv_broadcast(&ep_rp->ep_cv);
1815                 mutex_exit(&ep_rp->ep_lock);
1816 
1817                 /* reset state to original state */
1818                 daplka_ep_set_state(ep_rp, old_state, new_state);
1819 
1820                 /* this function will also unref ep_rp */
1821                 daplka_timer_info_free(timerp);
1822                 return;
1823         }
1824 
1825         ASSERT(ep_rp->ep_timer_hkey != 0);
1826         ep_rp->ep_timer_hkey = 0;
1827 
1828         /*
1829          * we cannot keep ep_state in TRANSITIONING if we call
1830          * ibt_close_rc_channel in blocking mode. this would cause
1831          * a deadlock because the cm callbacks will be blocked and
1832          * will not be able to wake us up.
1833          */
1834         new_state = DAPLKA_EP_STATE_ABORTING;
1835         daplka_ep_set_state(ep_rp, old_state, new_state);
1836 
1837         /*
1838          * when we return from close_rc_channel, all callbacks should have
1839          * completed. we can also be certain that these callbacks did not
1840          * enqueue any events to conn_evd.
1841          */
1842         status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
1843             NULL, 0, NULL, NULL, NULL);
1844         if (status != IBT_SUCCESS) {
1845                 DERR("timer_thread: ibt_close_rc_channel returned %d\n",
1846                     status);
1847         }
1848         old_state = daplka_ep_get_state(ep_rp);
1849 
1850         /*
1851          * this is the only thread that can transition ep_state out
1852          * of ABORTING. all other ep operations would fail when
1853          * ep_state is in ABORTING.
1854          */
1855         ASSERT(old_state == DAPLKA_EP_STATE_ABORTING);
1856 
1857         disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_SLEEP);
1858         ASSERT(disc_ev != NULL);
1859 
1860         disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
1861         disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
1862         disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
1863         disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
1864         disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
1865         disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
1866 
1867         D2("timer_thread: enqueue event(%p) evdp(%p)\n",
1868             disc_ev, ep_rp->ep_conn_evd);
1869 
1870         new_state = DAPLKA_EP_STATE_DISCONNECTED;
1871         daplka_ep_set_state(ep_rp, old_state, new_state);
1872 
1873         daplka_evd_wakeup(ep_rp->ep_conn_evd,
1874             &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
1875 
1876         /* this function will also unref ep_rp */
1877         daplka_timer_info_free(timerp);
1878 }
1879 
1880 /*
1881  * dispatches a thread to continue with timer processing.
1882  */
1883 static void
1884 daplka_timer_dispatch(void *arg)
1885 {
1886         /*
1887          * keep rescheduling this function until
1888          * taskq_dispatch succeeds.
1889          */
1890         if (taskq_dispatch(daplka_taskq,
1891             daplka_timer_thread, arg, TQ_NOSLEEP) == 0) {
1892                 DERR("timer_dispatch: taskq_dispatch failed, retrying...\n");
1893                 (void) timeout(daplka_timer_dispatch, arg, 10);
1894         }
1895 }
1896 
1897 /*
1898  * this function is called by the kernel's callout thread.
1899  * we first attempt to remove the timer object from the
1900  * global timer table. if it is found, we dispatch a thread
1901  * to continue processing the timer object. if it is not
1902  * found, that means the timer has been cancelled by someone
1903  * else.
1904  */
1905 static void
1906 daplka_timer_handler(void *arg)
1907 {
1908         uint64_t                timer_hkey = (uintptr_t)arg;
1909         daplka_timer_info_t     *timerp = NULL;
1910 
1911         D2("timer_handler: timer_hkey 0x%llx\n", (longlong_t)timer_hkey);
1912 
1913         (void) daplka_hash_remove(&daplka_timer_info_htbl,
1914             timer_hkey, (void **)&timerp);
1915         if (timerp == NULL) {
1916                 D2("timer_handler: timer already cancelled\n");
1917                 return;
1918         }
1919         daplka_timer_dispatch((void *)timerp);
1920 }
1921 
1922 /*
1923  * allocates a timer_info object.
1924  * a reference to a EP is held by this object. this ensures
1925  * that the EP stays valid when a timer is outstanding.
1926  */
1927 static daplka_timer_info_t *
1928 daplka_timer_info_alloc(daplka_ep_resource_t *ep_rp)
1929 {
1930         daplka_timer_info_t     *timerp;
1931 
1932         timerp = kmem_zalloc(sizeof (*timerp), daplka_km_flags);
1933         if (timerp == NULL) {
1934                 DERR("timer_info_alloc: cannot allocate timer info\n");
1935                 return (NULL);
1936         }
1937         timerp->ti_ep_res = ep_rp;
1938         timerp->ti_tmo_id = 0;
1939 
1940         return (timerp);
1941 }
1942 
1943 /*
1944  * Frees the timer_info object.
1945  * we release the EP reference before freeing the object.
1946  */
1947 static void
1948 daplka_timer_info_free(daplka_timer_info_t *timerp)
1949 {
1950         ASSERT(timerp->ti_ep_res != NULL);
1951         DAPLKA_RS_UNREF(timerp->ti_ep_res);
1952         timerp->ti_ep_res = NULL;
1953         ASSERT(timerp->ti_tmo_id == 0);
1954         kmem_free(timerp, sizeof (*timerp));
1955 }
1956 
1957 /*
1958  * cancels the timer set by ep_connect.
1959  * returns -1 if timer handling is in progress
1960  * and 0 otherwise.
1961  */
1962 static int
1963 daplka_cancel_timer(daplka_ep_resource_t *ep_rp)
1964 {
1965         /*
1966          * this function can only be called when ep_state
1967          * is frozen.
1968          */
1969         ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING);
1970         if (ep_rp->ep_timer_hkey != 0) {
1971                 daplka_timer_info_t     *timerp = NULL;
1972 
1973                 (void) daplka_hash_remove(&daplka_timer_info_htbl,
1974                     ep_rp->ep_timer_hkey, (void **)&timerp);
1975                 if (timerp == NULL) {
1976                         /*
1977                          * this is possible if the timer_handler has
1978                          * removed the timerp but the taskq thread has
1979                          * not transitioned the ep_state to DISCONNECTED.
1980                          * we need to reset the ep_state to allow the
1981                          * taskq thread to continue with its work. the
1982                          * taskq thread will set the ep_timer_hkey to 0
1983                          * so we don't have to do it here.
1984                          */
1985                         DERR("cancel_timer: timer is being processed\n");
1986                         return (-1);
1987                 }
1988                 /*
1989                  * we got the timer object. if the handler fires at
1990                  * this point, it will not be able to find the object
1991                  * and will return immediately. normally, ti_tmo_id gets
1992                  * cleared when the handler fires.
1993                  */
1994                 ASSERT(timerp->ti_tmo_id != 0);
1995 
1996                 /*
1997                  * note that untimeout can possibly call the handler.
1998                  * we are safe because the handler will be a no-op.
1999                  */
2000                 (void) untimeout(timerp->ti_tmo_id);
2001                 timerp->ti_tmo_id = 0;
2002                 daplka_timer_info_free(timerp);
2003                 ep_rp->ep_timer_hkey = 0;
2004         }
2005         return (0);
2006 }
2007 
2008 /*
2009  * this function is called by daplka_hash_destroy for
2010  * freeing timer_info objects
2011  */
2012 static void
2013 daplka_hash_timer_free(void *obj)
2014 {
2015         daplka_timer_info_free((daplka_timer_info_t *)obj);
2016 }
2017 
2018 /* ARGSUSED */
2019 static uint16_t
2020 daplka_hellomsg_cksum(DAPL_PRIVATE *dp)
2021 {
2022         uint8_t *bp;
2023         int i;
2024         uint16_t cksum = 0;
2025 
2026         bp = (uint8_t *)dp;
2027         for (i = 0; i < sizeof (DAPL_PRIVATE); i++) {
2028                 cksum += bp[i];
2029         }
2030         return (cksum);
2031 }
2032 
2033 /*
2034  * ep_connect is called by the client to initiate a connection to a
2035  * remote service point. It is a non-blocking call. If a non-zero
2036  * timeout is specified by the client, a timer will be set just before
2037  * returning from ep_connect. Upon a successful return from ep_connect,
2038  * the client will call evd_wait to wait for the connection to complete.
2039  * If the connection is rejected or has failed due to an error, the
2040  * client will be notified with an event containing the appropriate error
2041  * code. If the connection is accepted, the client will be notified with
2042  * the CONN_ESTABLISHED event. If the timer expires before either of the
2043  * above events (error or established), a TIMED_OUT event will be delivered
2044  * to the client.
2045  *
2046  * the complicated part of the timer logic is the handling of race
2047  * conditions with CM callbacks. we need to ensure that either the CM or
2048  * the timer thread gets to deliver an event, but not both. when the
2049  * CM callback is about to deliver an event, it always tries to cancel
2050  * the outstanding timer. if cancel_timer indicates a that the timer is
2051  * already being processed, the CM callback will simply return without
2052  * delivering an event. when the timer thread executes, it tries to check
2053  * if the EP is still in CONNECTING state (timers only work on the active
2054  * side). if the EP is not in this state, the timer thread will return
2055  * without delivering an event.
2056  */
2057 /* ARGSUSED */
2058 static int
2059 daplka_ep_connect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2060         cred_t *cred, int *rvalp)
2061 {
2062         daplka_ep_resource_t    *ep_rp = NULL;
2063         dapl_ep_connect_t       args;
2064         daplka_timer_info_t     *timerp = NULL;
2065         uint32_t                old_state, new_state;
2066         boolean_t               timer_inserted = B_FALSE;
2067         uint64_t                timer_hkey = 0;
2068         ibt_path_info_t         path_info;
2069         ibt_path_attr_t         path_attr;
2070         ibt_hca_attr_t          *hca_attrp;
2071         ibt_chan_open_args_t    chan_args;
2072         ibt_status_t            status = IBT_SUCCESS;
2073         uint8_t                 num_paths;
2074         void                    *priv_data;
2075         DAPL_PRIVATE            *dp;
2076         int                     retval = 0;
2077         ib_gid_t                *sgid;
2078         ib_gid_t                *dgid;
2079         uint64_t                dgid_ored;
2080         ibt_ar_t                ar_query_s;
2081         ibt_ar_t                ar_result_s;
2082         ibt_path_flags_t        pathflags;
2083 
2084         D3("ep_connect: enter\n");
2085         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_connect_t),
2086             mode);
2087         if (retval != 0) {
2088                 DERR("ep_connect: copyin error %d\n", retval);
2089                 return (EFAULT);
2090         }
2091         ep_rp = (daplka_ep_resource_t *)
2092             daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epc_hkey);
2093         if (ep_rp == NULL) {
2094                 DERR("ep_connect: cannot find ep resource\n");
2095                 return (EINVAL);
2096         }
2097         ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2098 
2099         new_state = old_state = daplka_ep_get_state(ep_rp);
2100         if (old_state != DAPLKA_EP_STATE_CLOSED) {
2101                 DERR("ep_connect: invalid state %d\n", old_state);
2102                 retval = EINVAL;
2103                 goto cleanup;
2104         }
2105         if (args.epc_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
2106                 DERR("ep_connect: private data len (%d) exceeded "
2107                     "max size %d\n", args.epc_priv_sz,
2108                     DAPL_MAX_PRIVATE_DATA_SIZE);
2109                 retval = EINVAL;
2110                 goto cleanup;
2111         }
2112 
2113         /*
2114          * check for remote ipaddress to dgid resolution needs ATS
2115          */
2116         dgid = &args.epc_dgid;
2117         dgid_ored = dgid->gid_guid | dgid->gid_prefix;
2118 #if defined(DAPLKA_DEBUG_FORCE_ATS)
2119         dgid_ored = 0ULL;
2120 #endif /* DAPLKA_DEBUG_FORCE_ATS */
2121         /* check for unidentified dgid */
2122         if (dgid_ored == 0ULL) {
2123                 /*
2124                  * setup for ibt_query_ar()
2125                  */
2126                 sgid = &ia_rp->ia_hca_sgid;
2127                 ar_query_s.ar_gid.gid_guid = 0ULL;
2128                 ar_query_s.ar_gid.gid_prefix = 0ULL;
2129                 ar_query_s.ar_pkey = 0;
2130                 bcopy(args.epc_raddr_sadata.iad_sadata,
2131                     ar_query_s.ar_data, DAPL_ATS_NBYTES);
2132 #define UR(b) ar_query_s.ar_data[(b)]
2133                 D3("daplka_ep_connect: SA[8] %d.%d.%d.%d\n",
2134                     UR(8), UR(9), UR(10), UR(11));
2135                 D3("daplka_ep_connect: SA[12] %d.%d.%d.%d\n",
2136                     UR(12), UR(13), UR(14), UR(15));
2137                 status = ibt_query_ar(sgid, &ar_query_s, &ar_result_s);
2138                 if (status != IBT_SUCCESS) {
2139                         DERR("ep_connect: ibt_query_ar returned %d\n", status);
2140                         *rvalp = (int)status;
2141                         retval = 0;
2142                         goto cleanup;
2143                 }
2144                 /*
2145                  * dgid identified from SA record
2146                  */
2147                 dgid = &ar_result_s.ar_gid;
2148                 D2("daplka_ep_connect: ATS dgid=%llx:%llx\n",
2149                     (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);
2150         }
2151 
2152         bzero(&path_info, sizeof (ibt_path_info_t));
2153         bzero(&path_attr, sizeof (ibt_path_attr_t));
2154         bzero(&chan_args, sizeof (ibt_chan_open_args_t));
2155 
2156         path_attr.pa_dgids = dgid;
2157         path_attr.pa_num_dgids = 1;
2158         /*
2159          * don't set sid in path_attr saves 1 SA query
2160          * Also makes server side not to write the service record
2161          */
2162         path_attr.pa_sgid = ia_rp->ia_hca_sgid;
2163         path_attr.pa_pkey = ia_rp->ia_port_pkey;
2164 
2165         /* save the connection ep  - struct copy */
2166         ep_rp->ep_sgid = ia_rp->ia_hca_sgid;
2167         ep_rp->ep_dgid = *dgid;
2168 
2169         num_paths = 0;
2170         pathflags = IBT_PATH_PKEY;
2171         /* enable APM on remote port but not on loopback case */
2172         if (daplka_apm && ((dgid->gid_prefix != path_attr.pa_sgid.gid_prefix) ||
2173             (dgid->gid_guid != path_attr.pa_sgid.gid_guid))) {
2174                 pathflags |= IBT_PATH_APM;
2175         }
2176         status = ibt_get_paths(daplka_dev->daplka_clnt_hdl,
2177             pathflags, &path_attr, 1, &path_info, &num_paths);
2178 
2179         if (status != IBT_SUCCESS && status != IBT_INSUFF_DATA) {
2180                 DERR("ep_connect: ibt_get_paths returned %d paths %d\n",
2181                     status, num_paths);
2182                 *rvalp = (int)status;
2183                 retval = 0;
2184                 goto cleanup;
2185         }
2186         /* fill in the sid directly to path_info */
2187         path_info.pi_sid = args.epc_sid;
2188         hca_attrp = &ia_rp->ia_hca->hca_attr;
2189 
2190         /* fill in open channel args */
2191         chan_args.oc_path = &path_info;
2192         chan_args.oc_cm_handler = daplka_cm_rc_handler;
2193         chan_args.oc_cm_clnt_private = (void *)ep_rp;
2194         chan_args.oc_rdma_ra_out = hca_attrp->hca_max_rdma_out_chan;
2195         chan_args.oc_rdma_ra_in = hca_attrp->hca_max_rdma_in_chan;
2196         chan_args.oc_path_retry_cnt = 7;        /* 3-bit field */
2197         chan_args.oc_path_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;
2198 
2199         ASSERT(args.epc_priv_sz > 0);
2200         priv_data = (void *)args.epc_priv;
2201 
2202         chan_args.oc_priv_data_len = args.epc_priv_sz;
2203         chan_args.oc_priv_data = priv_data;
2204 
2205         /*
2206          * calculate checksum value of hello message and
2207          * put hello message in networking byte order
2208          */
2209         dp = (DAPL_PRIVATE *)priv_data;
2210         dp->hello_msg.hi_port = htons(dp->hello_msg.hi_port);
2211         dp->hello_msg.hi_checksum = 0;
2212         dp->hello_msg.hi_checksum = htons(daplka_hellomsg_cksum(dp));
2213 
2214         if (args.epc_timeout > 0) {
2215                 /*
2216                  * increment refcnt before passing reference to
2217                  * timer_info_alloc.
2218                  */
2219                 DAPLKA_RS_REF(ep_rp);
2220                 timerp = daplka_timer_info_alloc(ep_rp);
2221                 if (timerp == NULL) {
2222                         DERR("ep_connect: cannot allocate timer\n");
2223                         /*
2224                          * we need to remove the reference if
2225                          * allocation failed.
2226                          */
2227                         DAPLKA_RS_UNREF(ep_rp);
2228                         retval = ENOMEM;
2229                         goto cleanup;
2230                 }
2231                 /*
2232                  * We generate our own hkeys so that timer_hkey can fit
2233                  * into a pointer and passed as an arg to timeout()
2234                  */
2235                 timer_hkey = (uint64_t)daplka_timer_hkey_gen();
2236                 retval = daplka_hash_insert(&daplka_timer_info_htbl,
2237                     &timer_hkey, (void *)timerp);
2238                 if (retval != 0) {
2239                         DERR("ep_connect: cannot insert timer info\n");
2240                         goto cleanup;
2241                 }
2242                 ASSERT(ep_rp->ep_timer_hkey == 0);
2243                 ep_rp->ep_timer_hkey = timer_hkey;
2244                 timer_inserted = B_TRUE;
2245                 D2("ep_connect: timer_hkey = 0x%llx\n",
2246                     (longlong_t)timer_hkey);
2247         }
2248         status = ibt_open_rc_channel(ep_rp->ep_chan_hdl, IBT_OCHAN_NO_FLAGS,
2249             IBT_NONBLOCKING, &chan_args, NULL);
2250 
2251         if (status != IBT_SUCCESS) {
2252                 DERR("ep_connect: ibt_open_rc_channel returned %d\n", status);
2253                 *rvalp = (int)status;
2254                 retval = 0;
2255                 goto cleanup;
2256         }
2257         /*
2258          * if a cm callback gets called at this point, it'll have to wait until
2259          * ep_state becomes connecting (or some other state if another thread
2260          * manages to get ahead of the callback). this guarantees that the
2261          * callback will not touch the timer until it gets set.
2262          */
2263         if (timerp != NULL) {
2264                 clock_t         tmo;
2265 
2266                 tmo = drv_usectohz((clock_t)args.epc_timeout);
2267                 /*
2268                  * We generate our own 32 bit timer_hkey so that it can fit
2269                  * into a pointer
2270                  */
2271                 ASSERT(timer_hkey != 0);
2272                 timerp->ti_tmo_id = timeout(daplka_timer_handler,
2273                     (void *)(uintptr_t)timer_hkey, tmo);
2274         }
2275         new_state = DAPLKA_EP_STATE_CONNECTING;
2276 
2277 cleanup:;
2278         if (timerp != NULL && (retval != 0 || status != IBT_SUCCESS)) {
2279                 /*
2280                  * if ibt_open_rc_channel failed, the timerp must still
2281                  * be in daplka_timer_info_htbl because neither the cm
2282                  * callback nor the timer_handler will be called.
2283                  */
2284                 if (timer_inserted) {
2285                         daplka_timer_info_t     *new_timerp = NULL;
2286 
2287                         ASSERT(timer_hkey != 0);
2288                         (void) daplka_hash_remove(&daplka_timer_info_htbl,
2289                             timer_hkey, (void **)&new_timerp);
2290                         ASSERT(new_timerp == timerp);
2291                         ep_rp->ep_timer_hkey = 0;
2292                 }
2293                 daplka_timer_info_free(timerp);
2294         }
2295         daplka_ep_set_state(ep_rp, old_state, new_state);
2296         DAPLKA_RS_UNREF(ep_rp);
2297         D3("ep_connect: exit\n");
2298         return (retval);
2299 }
2300 
2301 /*
2302  * ep_disconnect closes a connection with a remote peer.
2303  * if a connection has not been established, ep_disconnect
2304  * will instead flush all recv bufs posted to this channel.
2305  * if the EP state is CONNECTED, CONNECTING or ACCEPTING upon
2306  * entry to ep_disconnect, the EP state will transition to
2307  * DISCONNECTING upon exit. the CM callbacks triggered by
2308  * ibt_close_rc_channel will cause EP state to become
2309  * DISCONNECTED. This function is a no-op if EP state is
2310  * DISCONNECTED.
2311  */
2312 /* ARGSUSED */
2313 static int
2314 daplka_ep_disconnect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2315         cred_t *cred, int *rvalp)
2316 {
2317         daplka_ep_resource_t    *ep_rp = NULL;
2318         dapl_ep_disconnect_t    args;
2319         ibt_status_t            status;
2320         uint32_t                old_state, new_state;
2321         int                     retval = 0;
2322 
2323         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_disconnect_t),
2324             mode);
2325         if (retval != 0) {
2326                 DERR("ep_disconnect: copyin error %d\n", retval);
2327                 return (EFAULT);
2328         }
2329         ep_rp = (daplka_ep_resource_t *)
2330             daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epd_hkey);
2331         if (ep_rp == NULL) {
2332                 DERR("ep_disconnect: cannot find ep resource\n");
2333                 return (EINVAL);
2334         }
2335         ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2336 
2337         new_state = old_state = daplka_ep_get_state(ep_rp);
2338         if (old_state != DAPLKA_EP_STATE_CONNECTED &&
2339             old_state != DAPLKA_EP_STATE_CONNECTING &&
2340             old_state != DAPLKA_EP_STATE_ACCEPTING &&
2341             old_state != DAPLKA_EP_STATE_DISCONNECTED &&
2342             old_state != DAPLKA_EP_STATE_DISCONNECTING &&
2343             old_state != DAPLKA_EP_STATE_CLOSED) {
2344                 DERR("ep_disconnect: invalid state %d\n", old_state);
2345                 retval = EINVAL;
2346                 goto cleanup;
2347         }
2348 
2349         if ((old_state == DAPLKA_EP_STATE_DISCONNECTED) ||
2350             (old_state == DAPLKA_EP_STATE_DISCONNECTING)) {
2351                 D2("ep_disconnect: ep already disconnected\n");
2352                 retval = 0;
2353                 /* we leave the state as DISCONNECTED */
2354                 goto cleanup;
2355         }
2356         if (old_state == DAPLKA_EP_STATE_CONNECTING ||
2357             old_state == DAPLKA_EP_STATE_ACCEPTING) {
2358                 D2("ep_disconnect: aborting, old_state = %d\n", old_state);
2359         }
2360 
2361         /*
2362          * according to the udapl spec, ep_disconnect should
2363          * flush the channel if the channel is not CONNECTED.
2364          */
2365         if (old_state == DAPLKA_EP_STATE_CLOSED) {
2366                 status = ibt_flush_channel(ep_rp->ep_chan_hdl);
2367                 if (status != IBT_SUCCESS) {
2368                         DERR("ep_disconnect: ibt_flush_channel failed %d\n",
2369                             status);
2370                         *rvalp = (int)status;
2371                 }
2372                 retval = 0;
2373                 /* we leave the state as CLOSED */
2374                 goto cleanup;
2375         }
2376 
2377         new_state = DAPLKA_EP_STATE_DISCONNECTING;
2378         daplka_ep_set_state(ep_rp, old_state, new_state);
2379         status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_NONBLOCKING,
2380             NULL, 0, NULL, NULL, NULL);
2381 
2382         if (status == IBT_SUCCESS) {
2383                 DAPLKA_RS_UNREF(ep_rp);
2384                 return (retval);
2385         } else {
2386                 DERR("ep_disconnect: ibt_close_rc_channel returned %d\n",
2387                     status);
2388                 *rvalp = (int)status;
2389                 retval = 0;
2390                 new_state = old_state;
2391         }
2392 
2393 cleanup:;
2394         daplka_ep_set_state(ep_rp, old_state, new_state);
2395         DAPLKA_RS_UNREF(ep_rp);
2396         return (retval);
2397 }
2398 
2399 /*
2400  * this function resets the EP to a usable state (ie. from
2401  * DISCONNECTED to CLOSED). this function is best implemented using
2402  * the ibt_recycle_channel interface. until that is available, we will
2403  * instead clone and tear down the existing channel and replace the
2404  * existing channel with the cloned one.
2405  */
2406 /* ARGSUSED */
2407 static int
2408 daplka_ep_reinit(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2409         cred_t *cred, int *rvalp)
2410 {
2411         daplka_ep_resource_t            *ep_rp = NULL;
2412         dapl_ep_reinit_t                args;
2413         ibt_status_t                    status;
2414         uint32_t                        old_state, new_state;
2415         int                             retval = 0;
2416 
2417         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_reinit_t),
2418             mode);
2419         if (retval != 0) {
2420                 DERR("reinit: copyin error %d\n", retval);
2421                 return (EFAULT);
2422         }
2423         ep_rp = (daplka_ep_resource_t *)
2424             daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epri_hkey);
2425         if (ep_rp == NULL) {
2426                 DERR("reinit: cannot find ep resource\n");
2427                 return (EINVAL);
2428         }
2429         ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2430         new_state = old_state = daplka_ep_get_state(ep_rp);
2431         if ((old_state != DAPLKA_EP_STATE_CLOSED) &&
2432             (old_state != DAPLKA_EP_STATE_DISCONNECTED)) {
2433                 DERR("reinit: invalid state %d\n", old_state);
2434                 retval = EINVAL;
2435                 goto cleanup;
2436         }
2437 
2438         status = ibt_recycle_rc(ep_rp->ep_chan_hdl,
2439             IBT_CEP_RDMA_RD|IBT_CEP_RDMA_WR,
2440             ia_rp->ia_port_num, NULL, NULL);
2441         if (status != IBT_SUCCESS) {
2442                 DERR("reinit: unable to clone channel\n");
2443                 *rvalp = (int)status;
2444                 retval = 0;
2445                 goto cleanup;
2446         }
2447         new_state = DAPLKA_EP_STATE_CLOSED;
2448 
2449 cleanup:;
2450         daplka_ep_set_state(ep_rp, old_state, new_state);
2451         DAPLKA_RS_UNREF(ep_rp);
2452         return (retval);
2453 }
2454 
2455 /*
2456  * destroys a EP resource.
2457  * called when refcnt drops to zero.
2458  */
2459 static int
2460 daplka_ep_destroy(daplka_resource_t *gen_rp)
2461 {
2462         daplka_ep_resource_t    *ep_rp = (daplka_ep_resource_t *)gen_rp;
2463         ibt_status_t            status;
2464 
2465         ASSERT(DAPLKA_RS_REFCNT(ep_rp) == 0);
2466         ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_FREED);
2467 
2468         /*
2469          * by the time we get here, we can be sure that
2470          * there is no outstanding timer.
2471          */
2472         ASSERT(ep_rp->ep_timer_hkey == 0);
2473 
2474         D3("ep_destroy: entering, ep_rp 0x%p, rnum %d\n",
2475             ep_rp, DAPLKA_RS_RNUM(ep_rp));
2476         /*
2477          * free rc channel
2478          */
2479         if (ep_rp->ep_chan_hdl != NULL) {
2480                 mutex_enter(&daplka_dev->daplka_mutex);
2481                 ibt_set_chan_private(ep_rp->ep_chan_hdl, NULL);
2482                 mutex_exit(&daplka_dev->daplka_mutex);
2483                 status = daplka_ibt_free_channel(ep_rp, ep_rp->ep_chan_hdl);
2484                 if (status != IBT_SUCCESS) {
2485                         DERR("ep_free: ibt_free_channel returned %d\n",
2486                             status);
2487                 }
2488                 ep_rp->ep_chan_hdl = NULL;
2489                 D3("ep_destroy: qp freed, rnum %d\n", DAPLKA_RS_RNUM(ep_rp));
2490         }
2491         /*
2492          * release all references
2493          */
2494         if (ep_rp->ep_snd_evd != NULL) {
2495                 DAPLKA_RS_UNREF(ep_rp->ep_snd_evd);
2496                 ep_rp->ep_snd_evd = NULL;
2497         }
2498         if (ep_rp->ep_rcv_evd != NULL) {
2499                 DAPLKA_RS_UNREF(ep_rp->ep_rcv_evd);
2500                 ep_rp->ep_rcv_evd = NULL;
2501         }
2502         if (ep_rp->ep_conn_evd != NULL) {
2503                 DAPLKA_RS_UNREF(ep_rp->ep_conn_evd);
2504                 ep_rp->ep_conn_evd = NULL;
2505         }
2506         if (ep_rp->ep_srq_res != NULL) {
2507                 DAPLKA_RS_UNREF(ep_rp->ep_srq_res);
2508                 ep_rp->ep_srq_res = NULL;
2509         }
2510         if (ep_rp->ep_pd_res != NULL) {
2511                 DAPLKA_RS_UNREF(ep_rp->ep_pd_res);
2512                 ep_rp->ep_pd_res = NULL;
2513         }
2514         cv_destroy(&ep_rp->ep_cv);
2515         mutex_destroy(&ep_rp->ep_lock);
2516 
2517         DAPLKA_RS_FINI(ep_rp);
2518         kmem_free(ep_rp, sizeof (daplka_ep_resource_t));
2519         D3("ep_destroy: exiting, ep_rp 0x%p\n", ep_rp);
2520         return (0);
2521 }
2522 
2523 /*
2524  * this function is called by daplka_hash_destroy for
2525  * freeing EP resource objects
2526  */
2527 static void
2528 daplka_hash_ep_free(void *obj)
2529 {
2530         daplka_ep_resource_t    *ep_rp = (daplka_ep_resource_t *)obj;
2531         ibt_status_t            status;
2532         uint32_t                old_state, new_state;
2533         int                     retval;
2534 
2535         old_state = daplka_ep_get_state(ep_rp);
2536         retval = daplka_cancel_timer(ep_rp);
2537         new_state = DAPLKA_EP_STATE_FREED;
2538         daplka_ep_set_state(ep_rp, old_state, new_state);
2539 
2540         if (retval != 0) {
2541                 D2("hash_ep_free: ep_rp 0x%p "
2542                     "timer is still being processed\n", ep_rp);
2543                 mutex_enter(&ep_rp->ep_lock);
2544                 if (ep_rp->ep_timer_hkey != 0) {
2545                         D2("hash_ep_free: ep_rp 0x%p "
2546                             "waiting for timer_hkey to be 0\n", ep_rp);
2547                         cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
2548                 }
2549                 mutex_exit(&ep_rp->ep_lock);
2550         }
2551 
2552         /* call ibt_close_rc_channel regardless of what state we are in */
2553         status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
2554             NULL, 0, NULL, NULL, NULL);
2555         if (status != IBT_SUCCESS) {
2556                 if (old_state == DAPLKA_EP_STATE_CONNECTED ||
2557                     old_state == DAPLKA_EP_STATE_CONNECTING ||
2558                     old_state == DAPLKA_EP_STATE_ACCEPTING) {
2559                         DERR("hash_ep_free: ep_rp 0x%p state %d "
2560                             "unexpected error %d from close_rc_channel\n",
2561                             ep_rp, old_state, status);
2562                 }
2563                 D2("hash_ep_free: close_rc_channel, status %d\n", status);
2564         }
2565 
2566         DAPLKA_RS_UNREF(ep_rp);
2567 }
2568 
2569 /*
2570  * creates a EVD resource.
2571  * a EVD is used by the client to wait for events from one
2572  * or more sources.
2573  */
2574 /* ARGSUSED */
2575 static int
2576 daplka_evd_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2577         cred_t *cred, int *rvalp)
2578 {
2579         daplka_evd_resource_t           *evd_rp = NULL;
2580         daplka_async_evd_hkey_t         *async_evd;
2581         ibt_hca_attr_t                  *hca_attrp;
2582         ibt_cq_attr_t                   cq_attr;
2583         dapl_evd_create_t               args;
2584         uint64_t                        evd_hkey = 0;
2585         boolean_t                       inserted = B_FALSE;
2586         int                             retval = 0;
2587         ibt_status_t                    status;
2588 
2589         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_create_t),
2590             mode);
2591         if (retval != 0) {
2592                 DERR("evd_create: copyin error %d", retval);
2593                 return (EFAULT);
2594         }
2595         if ((args.evd_flags &
2596             ~(DAT_EVD_DEFAULT_FLAG | DAT_EVD_SOFTWARE_FLAG)) != 0) {
2597                 DERR("evd_create: invalid flags 0x%x\n", args.evd_flags);
2598                 return (EINVAL);
2599         }
2600 
2601         evd_rp = kmem_zalloc(sizeof (daplka_evd_resource_t), daplka_km_flags);
2602         DAPLKA_RS_INIT(evd_rp, DAPL_TYPE_EVD,
2603             DAPLKA_RS_RNUM(ia_rp), daplka_evd_destroy);
2604 
2605         mutex_init(&evd_rp->evd_lock, NULL, MUTEX_DRIVER, NULL);
2606         cv_init(&evd_rp->evd_cv, NULL, CV_DRIVER, NULL);
2607         evd_rp->evd_hca = ia_rp->ia_hca;
2608         evd_rp->evd_flags = args.evd_flags;
2609         evd_rp->evd_hca_hdl = ia_rp->ia_hca_hdl;
2610         evd_rp->evd_cookie = args.evd_cookie;
2611         evd_rp->evd_cno_res = NULL;
2612         evd_rp->evd_cr_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
2613         evd_rp->evd_conn_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
2614         evd_rp->evd_async_events.eel_event_type = DAPLKA_EVD_ASYNC_EVENTS;
2615 
2616         /*
2617          * if the client specified a non-zero cno_hkey, we
2618          * lookup the cno and save the reference for later use.
2619          */
2620         if (args.evd_cno_hkey > 0) {
2621                 daplka_cno_resource_t *cno_rp;
2622 
2623                 cno_rp = (daplka_cno_resource_t *)
2624                     daplka_hash_lookup(&ia_rp->ia_cno_htbl,
2625                     args.evd_cno_hkey);
2626                 if (cno_rp == NULL) {
2627                         DERR("evd_create: cannot find cno resource\n");
2628                         goto cleanup;
2629                 }
2630                 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
2631                 evd_rp->evd_cno_res = cno_rp;
2632         }
2633         hca_attrp = &ia_rp->ia_hca->hca_attr;
2634         if ((evd_rp->evd_flags &
2635             (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) != 0) {
2636                 if (args.evd_cq_size > hca_attrp->hca_max_cq_sz) {
2637                         DERR("evd_create: invalid cq size %d",
2638                             args.evd_cq_size);
2639                         retval = EINVAL;
2640                         goto cleanup;
2641                 }
2642                 cq_attr.cq_size = args.evd_cq_size;
2643                 cq_attr.cq_sched = NULL;
2644                 cq_attr.cq_flags = IBT_CQ_USER_MAP;
2645 
2646                 status = daplka_ibt_alloc_cq(evd_rp, evd_rp->evd_hca_hdl,
2647                     &cq_attr, &evd_rp->evd_cq_hdl, &evd_rp->evd_cq_real_size);
2648 
2649                 if (status != IBT_SUCCESS) {
2650                         DERR("evd_create: ibt_alloc_cq returned %d", status);
2651                         *rvalp = (int)status;
2652                         retval = 0;
2653                         goto cleanup;
2654                 }
2655 
2656                 /*
2657                  * store evd ptr with cq_hdl
2658                  * mutex is only needed for race of "destroy" and "async"
2659                  */
2660                 mutex_enter(&daplka_dev->daplka_mutex);
2661                 ibt_set_cq_private(evd_rp->evd_cq_hdl, (void *)evd_rp);
2662                 mutex_exit(&daplka_dev->daplka_mutex);
2663 
2664                 /* Get HCA-specific data_out info */
2665                 status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
2666                     IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
2667                     &args.evd_cq_data_out, sizeof (args.evd_cq_data_out));
2668 
2669                 if (status != IBT_SUCCESS) {
2670                         DERR("evd_create: ibt_ci_data_out error(%d)", status);
2671                         *rvalp = (int)status;
2672                         retval = 0;
2673                         goto cleanup;
2674                 }
2675 
2676                 args.evd_cq_real_size = evd_rp->evd_cq_real_size;
2677 
2678                 ibt_set_cq_handler(evd_rp->evd_cq_hdl, daplka_cq_handler,
2679                     (void *)evd_rp);
2680         }
2681 
2682         retval = daplka_hash_insert(&ia_rp->ia_evd_htbl,
2683             &evd_hkey, (void *)evd_rp);
2684         if (retval != 0) {
2685                 DERR("evd_ceate: cannot insert evd %d\n", retval);
2686                 goto cleanup;
2687         }
2688         inserted = B_TRUE;
2689 
2690         /*
2691          * If this evd handles async events need to add to the IA resource
2692          * async evd list
2693          */
2694         if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
2695                 async_evd = kmem_zalloc(sizeof (daplka_async_evd_hkey_t),
2696                     daplka_km_flags);
2697                 /* add the evd to the head of the list */
2698                 mutex_enter(&ia_rp->ia_lock);
2699                 async_evd->aeh_evd_hkey = evd_hkey;
2700                 async_evd->aeh_next = ia_rp->ia_async_evd_hkeys;
2701                 ia_rp->ia_async_evd_hkeys = async_evd;
2702                 mutex_exit(&ia_rp->ia_lock);
2703         }
2704 
2705         args.evd_hkey = evd_hkey;
2706         retval = copyout(&args, (void *)arg, sizeof (dapl_evd_create_t));
2707         if (retval != 0) {
2708                 DERR("evd_create: copyout error %d\n", retval);
2709                 retval = EFAULT;
2710                 goto cleanup;
2711         }
2712         return (0);
2713 
2714 cleanup:;
2715         if (inserted) {
2716                 daplka_evd_resource_t *free_rp = NULL;
2717 
2718                 (void) daplka_hash_remove(&ia_rp->ia_evd_htbl, evd_hkey,
2719                     (void **)&free_rp);
2720                 if (free_rp != evd_rp) {
2721                         DERR("evd_create: cannot remove evd\n");
2722                         /*
2723                          * we can only get here if another thread
2724                          * has completed the cleanup in evd_free
2725                          */
2726                         return (retval);
2727                 }
2728         }
2729         DAPLKA_RS_UNREF(evd_rp);
2730         return (retval);
2731 }
2732 
2733 /*
2734  * resizes CQ and returns new mapping info to library.
2735  */
2736 /* ARGSUSED */
2737 static int
2738 daplka_cq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2739         cred_t *cred, int *rvalp)
2740 {
2741         daplka_evd_resource_t           *evd_rp = NULL;
2742         ibt_hca_attr_t                  *hca_attrp;
2743         dapl_cq_resize_t                args;
2744         ibt_status_t                    status;
2745         int                             retval = 0;
2746 
2747         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cq_resize_t),
2748             mode);
2749         if (retval != 0) {
2750                 DERR("cq_resize: copyin error %d\n", retval);
2751                 return (EFAULT);
2752         }
2753 
2754         /* get evd resource */
2755         evd_rp = (daplka_evd_resource_t *)
2756             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.cqr_evd_hkey);
2757         if (evd_rp == NULL) {
2758                 DERR("cq_resize: cannot find evd resource\n");
2759                 return (EINVAL);
2760         }
2761         ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
2762 
2763         hca_attrp = &ia_rp->ia_hca->hca_attr;
2764         if (args.cqr_cq_new_size > hca_attrp->hca_max_cq_sz) {
2765                 DERR("cq_resize: invalid cq size %d", args.cqr_cq_new_size);
2766                 retval = EINVAL;
2767                 goto cleanup;
2768         }
2769         /*
2770          * If ibt_resize_cq fails that it is primarily due to resource
2771          * shortage. Per IB spec resize will never loose events and
2772          * a resize error leaves the CQ intact. Therefore even if the
2773          * resize request fails we proceed and get the mapping data
2774          * from the CQ so that the library can mmap it.
2775          */
2776         status = ibt_resize_cq(evd_rp->evd_cq_hdl, args.cqr_cq_new_size,
2777             &args.cqr_cq_real_size);
2778         if (status != IBT_SUCCESS) {
2779                 /* we return the size of the old CQ if resize fails */
2780                 args.cqr_cq_real_size = evd_rp->evd_cq_real_size;
2781                 ASSERT(status != IBT_CQ_HDL_INVALID);
2782                 DERR("cq_resize: ibt_resize_cq failed:%d\n", status);
2783         } else {
2784                 mutex_enter(&evd_rp->evd_lock);
2785                 evd_rp->evd_cq_real_size = args.cqr_cq_real_size;
2786                 mutex_exit(&evd_rp->evd_lock);
2787         }
2788 
2789         D2("cq_resize(%d): done new_sz(%u) real_sz(%u)\n",
2790             DAPLKA_RS_RNUM(evd_rp),
2791             args.cqr_cq_new_size, args.cqr_cq_real_size);
2792 
2793         /* Get HCA-specific data_out info */
2794         status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
2795             IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
2796             &args.cqr_cq_data_out, sizeof (args.cqr_cq_data_out));
2797         if (status != IBT_SUCCESS) {
2798                 DERR("cq_resize: ibt_ci_data_out error(%d)\n", status);
2799                 /* return ibt_ci_data_out status */
2800                 *rvalp = (int)status;
2801                 retval = 0;
2802                 goto cleanup;
2803         }
2804 
2805         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cq_resize_t),
2806             mode);
2807         if (retval != 0) {
2808                 DERR("cq_resize: copyout error %d\n", retval);
2809                 retval = EFAULT;
2810                 goto cleanup;
2811         }
2812 
2813 cleanup:;
2814         if (evd_rp != NULL) {
2815                 DAPLKA_RS_UNREF(evd_rp);
2816         }
2817         return (retval);
2818 }
2819 
2820 /*
2821  * Routine to copyin the event poll message so that 32 bit libraries
2822  * can be safely supported
2823  */
2824 int
2825 daplka_event_poll_copyin(intptr_t inarg, dapl_event_poll_t *outarg, int mode)
2826 {
2827         int     retval;
2828 
2829 #ifdef _MULTI_DATAMODEL
2830         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2831                 dapl_event_poll32_t     args32;
2832 
2833                 retval = ddi_copyin((void *)inarg, &args32,
2834                     sizeof (dapl_event_poll32_t), mode);
2835                 if (retval != 0) {
2836                         DERR("event_poll_copyin: 32bit error %d\n", retval);
2837                         return (EFAULT);
2838                 }
2839 
2840                 outarg->evp_evd_hkey = args32.evp_evd_hkey;
2841                 outarg->evp_threshold = args32.evp_threshold;
2842                 outarg->evp_timeout = args32.evp_timeout;
2843                 outarg->evp_ep = (dapl_ib_event_t *)(uintptr_t)args32.evp_ep;
2844                 outarg->evp_num_ev = args32.evp_num_ev;
2845                 outarg->evp_num_polled = args32.evp_num_polled;
2846                 return (0);
2847         }
2848 #endif
2849         retval = ddi_copyin((void *)inarg, outarg, sizeof (dapl_event_poll_t),
2850             mode);
2851         if (retval != 0) {
2852                 DERR("event_poll: copyin error %d\n", retval);
2853                 return (EFAULT);
2854         }
2855 
2856         return (0);
2857 }
2858 
2859 /*
2860  * Routine to copyout the event poll message so that 32 bit libraries
2861  * can be safely supported
2862  */
2863 int
2864 daplka_event_poll_copyout(dapl_event_poll_t *inarg, intptr_t outarg, int mode)
2865 {
2866         int     retval;
2867 
2868 #ifdef _MULTI_DATAMODEL
2869         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2870                 dapl_event_poll32_t     args32;
2871 
2872                 args32.evp_evd_hkey = inarg->evp_evd_hkey;
2873                 args32.evp_threshold = inarg->evp_threshold;
2874                 args32.evp_timeout = inarg->evp_timeout;
2875                 args32.evp_ep = (caddr32_t)(uintptr_t)inarg->evp_ep;
2876                 args32.evp_num_ev = inarg->evp_num_ev;
2877                 args32.evp_num_polled = inarg->evp_num_polled;
2878 
2879                 retval = ddi_copyout((void *)&args32, (void *)outarg,
2880                     sizeof (dapl_event_poll32_t), mode);
2881                 if (retval != 0) {
2882                         DERR("event_poll_copyout: 32bit error %d\n", retval);
2883                         return (EFAULT);
2884                 }
2885                 return (0);
2886         }
2887 #endif
2888         retval = ddi_copyout((void *)inarg, (void *)outarg,
2889             sizeof (dapl_event_poll_t), mode);
2890         if (retval != 0) {
2891                 DERR("event_poll_copyout: error %d\n", retval);
2892                 return (EFAULT);
2893         }
2894 
2895         return (0);
2896 }
2897 
2898 /*
2899  * fucntion to handle CM REQ RCV private data from Solaris or third parties
2900  */
2901 /* ARGSUSED */
2902 static void
2903 daplka_crevent_privdata_post(daplka_ia_resource_t *ia_rp,
2904         dapl_ib_event_t *evd_rp, daplka_evd_event_t *cr_ev)
2905 {
2906         DAPL_PRIVATE    *dp;
2907         ib_gid_t        *lgid;
2908         ibt_ar_t        ar_query_s;
2909         ibt_ar_t        ar_result_s;
2910         DAPL_HELLO_MSG  *hip;
2911         uint32_t        ipaddr_ord;
2912         ibt_priv_data_len_t clen;
2913         ibt_priv_data_len_t olen;
2914         ibt_status_t    status;
2915         uint16_t        cksum;
2916 
2917         /*
2918          * get private data and len
2919          */
2920         dp = (DAPL_PRIVATE *)cr_ev->ee_cmev.ec_cm_ev_priv_data;
2921         clen = cr_ev->ee_cmev.ec_cm_ev_priv_data_len;
2922 #if defined(DAPLKA_DEBUG_FORCE_ATS)
2923         /* skip the DAPL_PRIVATE chekcsum check */
2924 #else
2925         /* for remote connects */
2926         /* look up hello message in the CM private data area */
2927         if (clen >= sizeof (DAPL_PRIVATE) &&
2928             (dp->hello_msg.hi_vers == DAPL_HELLO_MSG_VERS)) {
2929                 cksum = ntohs(dp->hello_msg.hi_checksum);
2930                 dp->hello_msg.hi_checksum = 0;
2931                 if (daplka_hellomsg_cksum(dp) == cksum) {
2932                         D2("daplka_crevent_privdata_post: Solaris msg\n");
2933                         evd_rp->ibe_ce.ibce_priv_data_size = clen;
2934                         dp->hello_msg.hi_checksum = DAPL_CHECKSUM;
2935                         dp->hello_msg.hi_port = ntohs(dp->hello_msg.hi_port);
2936                         bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
2937                         kmem_free(dp, clen);
2938                         return;
2939                 }
2940         }
2941 #endif /* DAPLKA_DEBUG_FORCE_ATS */
2942 
2943         D2("daplka_crevent_privdata_post: 3rd party msg\n");
2944         /* transpose CM private data into hello message */
2945         if (clen) {
2946                 olen = clen;
2947                 if (clen > DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE) {
2948                         clen = DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE;
2949                 }
2950                 bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
2951                 kmem_free(dp, olen);
2952         } else {
2953                 bzero(evd_rp->ibe_ce.ibce_priv_data_ptr,
2954                     DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE);
2955         }
2956         evd_rp->ibe_ce.ibce_priv_data_size = sizeof (DAPL_PRIVATE);
2957         dp = (DAPL_PRIVATE *)evd_rp->ibe_ce.ibce_priv_data_ptr;
2958         /*
2959          * fill in hello message
2960          */
2961         hip = &dp->hello_msg;
2962         hip->hi_checksum = DAPL_CHECKSUM;
2963         hip->hi_clen = clen;
2964         hip->hi_mid = 0;
2965         hip->hi_vers = DAPL_HELLO_MSG_VERS;
2966         hip->hi_port = 0;
2967 
2968         /* assign sgid and dgid */
2969         lgid = &ia_rp->ia_hca_sgid;
2970         ar_query_s.ar_gid.gid_prefix =
2971             cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix;
2972         ar_query_s.ar_gid.gid_guid =
2973             cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid;
2974         ar_query_s.ar_pkey = ia_rp->ia_port_pkey;
2975         bzero(ar_query_s.ar_data, DAPL_ATS_NBYTES);
2976 
2977         /* reverse ip address lookup through ATS */
2978         status = ibt_query_ar(lgid, &ar_query_s, &ar_result_s);
2979         if (status == IBT_SUCCESS) {
2980                 bcopy(ar_result_s.ar_data, hip->hi_saaddr, DAPL_ATS_NBYTES);
2981                 /* determine the address families */
2982                 ipaddr_ord = hip->hi_v4pad[0] | hip->hi_v4pad[1] |
2983                     hip->hi_v4pad[2];
2984                 if (ipaddr_ord == 0) {
2985                         hip->hi_ipv = AF_INET;
2986                 } else {
2987                         hip->hi_ipv = AF_INET6;
2988                 }
2989 
2990 #define UL(b) ar_result_s.ar_data[(b)]
2991                 D3("daplka_privdata_post: family=%d :SA[8] %d.%d.%d.%d\n",
2992                     hip->hi_ipv, UL(8), UL(9), UL(10), UL(11));
2993                 D3("daplka_privdata_post: SA[12] %d.%d.%d.%d\n",
2994                     UL(12), UL(13), UL(14), UL(15));
2995         } else {
2996                 /* non-conformed third parties */
2997                 hip->hi_ipv = AF_UNSPEC;
2998                 bzero(hip->hi_saaddr, DAPL_ATS_NBYTES);
2999         }
3000 }
3001 
3002 /*
3003  * this function is called by evd_wait and evd_dequeue to wait for
3004  * connection events and CQ notifications. typically this function
3005  * is called when the userland CQ is empty and the client has
3006  * specified a non-zero timeout to evd_wait. if the client is
3007  * interested in CQ events, the CQ must be armed in userland prior
3008  * to calling this function.
3009  */
3010 /* ARGSUSED */
3011 static int
3012 daplka_event_poll(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3013         cred_t *cred, int *rvalp)
3014 {
3015         daplka_evd_resource_t   *evd_rp = NULL;
3016         dapl_event_poll_t       args;
3017         daplka_evd_event_t      *head;
3018         dapl_ib_event_t         evp_arr[NUM_EVENTS_PER_POLL];
3019         dapl_ib_event_t         *evp;
3020         dapl_ib_event_t         *evp_start;
3021         size_t                  evp_size;
3022         int                     threshold;
3023         clock_t                 timeout;
3024         uint32_t                max_events;
3025         uint32_t                num_events = 0;
3026         void                    *pd;
3027         ibt_priv_data_len_t     n;
3028         int                     retval = 0;
3029         int                     rc;
3030 
3031         retval = daplka_event_poll_copyin(arg, &args, mode);
3032         if (retval != 0) {
3033                 return (EFAULT);
3034         }
3035 
3036         if ((args.evp_num_ev > 0) && (args.evp_ep == NULL)) {
3037                 DERR("event_poll: evp_ep cannot be NULL if num_wc=%d",
3038                     args.evp_num_ev);
3039                 return (EINVAL);
3040         }
3041         /*
3042          * Note: dequeue requests have a threshold = 0, timeout = 0
3043          */
3044         threshold = args.evp_threshold;
3045 
3046         max_events = args.evp_num_ev;
3047         /* ensure library is passing sensible values */
3048         if (max_events < threshold) {
3049                 DERR("event_poll: max_events(%d) < threshold(%d)\n",
3050                     max_events, threshold);
3051                 return (EINVAL);
3052         }
3053         /* Do a sanity check to avoid excessive memory allocation */
3054         if (max_events > DAPL_EVD_MAX_EVENTS) {
3055                 DERR("event_poll: max_events(%d) > %d",
3056                     max_events, DAPL_EVD_MAX_EVENTS);
3057                 return (EINVAL);
3058         }
3059         D4("event_poll: threshold(%d) timeout(0x%llx) max_events(%d)\n",
3060             threshold, (longlong_t)args.evp_timeout, max_events);
3061 
3062         /* get evd resource */
3063         evd_rp = (daplka_evd_resource_t *)
3064             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evp_evd_hkey);
3065         if (evd_rp == NULL) {
3066                 DERR("event_poll: cannot find evd resource\n");
3067                 return (EINVAL);
3068         }
3069         ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3070 
3071         /*
3072          * Use event array on the stack if possible
3073          */
3074         if (max_events <= NUM_EVENTS_PER_POLL) {
3075                 evp_start = evp = &evp_arr[0];
3076         } else {
3077                 evp_size = max_events * sizeof (dapl_ib_event_t);
3078                 evp_start = evp = kmem_zalloc(evp_size, daplka_km_flags);
3079                 if (evp == NULL) {
3080                         DERR("event_poll: kmem_zalloc failed, evp_size %d",
3081                             evp_size);
3082                         retval = ENOMEM;
3083                         goto cleanup;
3084                 }
3085         }
3086 
3087         /*
3088          * The Event poll algorithm is as follows -
3089          * The library passes a buffer big enough to hold "max_events"
3090          * events. max_events is >= threshold. If at any stage we get
3091          * max_events no. of events we bail. The events are polled in
3092          * the following order -
3093          * 1) Check for CR events in the evd_cr_events list
3094          * 2) Check for Connection events in the evd_connection_events list
3095          *
3096          * If after the above 2 steps we don't have enough(>= threshold) events
3097          * we block for CQ notification and sleep. Upon being woken up we start
3098          * at step 1 again.
3099          */
3100 
3101         /*
3102          * Note: this could be 0 or INFINITE or anyother value in microsec
3103          */
3104         if (args.evp_timeout > 0) {
3105                 if (args.evp_timeout >= LONG_MAX) {
3106                         timeout = LONG_MAX;
3107                 } else {
3108                         clock_t curr_time = ddi_get_lbolt();
3109 
3110                         timeout = curr_time +
3111                             drv_usectohz((clock_t)args.evp_timeout);
3112                         /*
3113                          * use the max value if we wrapped around
3114                          */
3115                         if (timeout <= curr_time) {
3116                                 timeout = LONG_MAX;
3117                         }
3118                 }
3119         } else {
3120                 timeout = 0;
3121         }
3122 
3123         mutex_enter(&evd_rp->evd_lock);
3124         for (;;) {
3125                 /*
3126                  * If this evd is waiting for CM events check that now.
3127                  */
3128                 if ((evd_rp->evd_flags & DAT_EVD_CR_FLAG) &&
3129                     (evd_rp->evd_cr_events.eel_num_elements > 0)) {
3130                         /* dequeue events from evd_cr_events list */
3131                         while (head = daplka_evd_event_dequeue(
3132                             &evd_rp->evd_cr_events)) {
3133                                 /*
3134                                  * populate the evp array
3135                                  */
3136                                 evp[num_events].ibe_ev_family = DAPL_CR_EVENTS;
3137                                 evp[num_events].ibe_ce.ibce_event =
3138                                     head->ee_cmev.ec_cm_ev_type;
3139                                 evp[num_events].ibe_ce.ibce_cookie =
3140                                     (uint64_t)head->ee_cmev.ec_cm_cookie;
3141                                 evp[num_events].ibe_ce.ibce_psep_cookie =
3142                                     head->ee_cmev.ec_cm_psep_cookie;
3143                                 daplka_crevent_privdata_post(ia_rp,
3144                                     &evp[num_events], head);
3145                                 kmem_free(head, sizeof (daplka_evd_event_t));
3146 
3147                                 if (++num_events == max_events) {
3148                                         mutex_exit(&evd_rp->evd_lock);
3149                                         goto maxevent_reached;
3150                                 }
3151                         }
3152                 }
3153 
3154                 if ((evd_rp->evd_flags & DAT_EVD_CONNECTION_FLAG) &&
3155                     (evd_rp->evd_conn_events.eel_num_elements > 0)) {
3156                         /* dequeue events from evd_connection_events list */
3157                         while ((head = daplka_evd_event_dequeue
3158                             (&evd_rp->evd_conn_events))) {
3159                                 /*
3160                                  * populate the evp array -
3161                                  *
3162                                  */
3163                                 if (head->ee_cmev.ec_cm_is_passive) {
3164                                         evp[num_events].ibe_ev_family =
3165                                             DAPL_PASSIVE_CONNECTION_EVENTS;
3166                                 } else {
3167                                         evp[num_events].ibe_ev_family =
3168                                             DAPL_ACTIVE_CONNECTION_EVENTS;
3169                                 }
3170                                 evp[num_events].ibe_ce.ibce_event =
3171                                     head->ee_cmev.ec_cm_ev_type;
3172                                 evp[num_events].ibe_ce.ibce_cookie =
3173                                     (uint64_t)head->ee_cmev.ec_cm_cookie;
3174                                 evp[num_events].ibe_ce.ibce_psep_cookie =
3175                                     head->ee_cmev.ec_cm_psep_cookie;
3176 
3177                                 if (head->ee_cmev.ec_cm_ev_priv_data_len > 0) {
3178                                         pd = head->ee_cmev.ec_cm_ev_priv_data;
3179                                         n = head->
3180                                             ee_cmev.ec_cm_ev_priv_data_len;
3181                                         bcopy(pd, (void *)evp[num_events].
3182                                             ibe_ce.ibce_priv_data_ptr, n);
3183                                         evp[num_events].ibe_ce.
3184                                             ibce_priv_data_size = n;
3185                                         kmem_free(pd, n);
3186                                 }
3187 
3188                                 kmem_free(head, sizeof (daplka_evd_event_t));
3189 
3190                                 if (++num_events == max_events) {
3191                                         mutex_exit(&evd_rp->evd_lock);
3192                                         goto maxevent_reached;
3193                                 }
3194                         }
3195                 }
3196 
3197                 if ((evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) &&
3198                     (evd_rp->evd_async_events.eel_num_elements > 0)) {
3199                         /* dequeue events from evd_async_events list */
3200                         while (head = daplka_evd_event_dequeue(
3201                             &evd_rp->evd_async_events)) {
3202                                 /*
3203                                  * populate the evp array
3204                                  */
3205                                 evp[num_events].ibe_ev_family =
3206                                     DAPL_ASYNC_EVENTS;
3207                                 evp[num_events].ibe_async.ibae_type =
3208                                     head->ee_aev.ibae_type;
3209                                 evp[num_events].ibe_async.ibae_hca_guid =
3210                                     head->ee_aev.ibae_hca_guid;
3211                                 evp[num_events].ibe_async.ibae_cookie =
3212                                     head->ee_aev.ibae_cookie;
3213                                 evp[num_events].ibe_async.ibae_port =
3214                                     head->ee_aev.ibae_port;
3215 
3216                                 kmem_free(head, sizeof (daplka_evd_event_t));
3217 
3218                                 if (++num_events == max_events) {
3219                                         break;
3220                                 }
3221                         }
3222                 }
3223 
3224                 /*
3225                  * We have sufficient events for this call so no need to wait
3226                  */
3227                 if ((threshold > 0) && (num_events >= threshold)) {
3228                         mutex_exit(&evd_rp->evd_lock);
3229                         break;
3230                 }
3231 
3232                 evd_rp->evd_waiters++;
3233                 /*
3234                  * There are no new events and a timeout was specified.
3235                  * Note: for CQ events threshold is 0 but timeout is
3236                  * not necessarily 0.
3237                  */
3238                 while ((evd_rp->evd_newevents == DAPLKA_EVD_NO_EVENTS) &&
3239                     timeout) {
3240                         retval = DAPLKA_EVD_WAIT(&evd_rp->evd_cv,
3241                             &evd_rp->evd_lock, timeout);
3242                         if (retval == 0) {
3243                                 retval = EINTR;
3244                                 break;
3245                         } else if (retval == -1) {
3246                                 retval = ETIME;
3247                                 break;
3248                         } else {
3249                                 retval = 0;
3250                                 continue;
3251                         }
3252                 }
3253                 evd_rp->evd_waiters--;
3254                 if (evd_rp->evd_newevents != DAPLKA_EVD_NO_EVENTS) {
3255                         /*
3256                          * If we got woken up by the CQ handler due to events
3257                          * in the CQ. Need to go to userland to check for
3258                          * CQ events. Or if we were woken up due to S/W events
3259                          */
3260 
3261                         /* check for userland events only */
3262                         if (!(evd_rp->evd_newevents &
3263                             ~DAPLKA_EVD_ULAND_EVENTS)) {
3264                                 evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
3265                                 mutex_exit(&evd_rp->evd_lock);
3266                                 break;
3267                         }
3268                         /*
3269                          * Clear newevents since we are going to loopback
3270                          * back and check for both CM and CQ events
3271                          */
3272                         evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
3273                 } else { /* error */
3274                         mutex_exit(&evd_rp->evd_lock);
3275                         break;
3276                 }
3277         }
3278 
3279 maxevent_reached:
3280         args.evp_num_polled = num_events;
3281 
3282         /*
3283          * At this point retval might have a value that we want to return
3284          * back to the user. So the copyouts shouldn't tamper retval.
3285          */
3286         if (args.evp_num_polled > 0) { /* copyout the events */
3287                 rc = ddi_copyout(evp, args.evp_ep, args.evp_num_polled *
3288                     sizeof (dapl_ib_event_t), mode);
3289                 if (rc != 0) { /* XXX: we are losing events here */
3290                         DERR("event_poll: event array copyout error %d", rc);
3291                         retval = EFAULT;
3292                         goto cleanup;
3293                 }
3294                 rc = daplka_event_poll_copyout(&args, arg, mode);
3295                 if (rc != 0) {  /* XXX: we are losing events here */
3296                         DERR("event_poll: copyout error %d\n", rc);
3297                         retval = EFAULT;
3298                         goto cleanup;
3299                 }
3300         }
3301 
3302 cleanup:;
3303         if ((max_events > NUM_EVENTS_PER_POLL) && (evp_start != NULL)) {
3304                 kmem_free(evp_start, evp_size);
3305         }
3306 
3307         if (evd_rp != NULL) {
3308                 DAPLKA_RS_UNREF(evd_rp);
3309         }
3310         return (retval);
3311 }
3312 
3313 /* ARGSUSED */
3314 static int
3315 daplka_event_wakeup(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3316         cred_t *cred, int *rvalp)
3317 {
3318         dapl_event_wakeup_t     args;
3319         daplka_evd_resource_t   *evd_rp;
3320         int                     retval;
3321 
3322         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_event_wakeup_t),
3323             mode);
3324         if (retval != 0) {
3325                 DERR("event_wakeup: copyin error %d\n", retval);
3326                 return (EFAULT);
3327         }
3328 
3329         /* get evd resource */
3330         evd_rp = (daplka_evd_resource_t *)
3331             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evw_hkey);
3332         if (evd_rp == NULL) {
3333                 DERR("event_wakeup: cannot find evd resource\n");
3334                 return (EINVAL);
3335         }
3336         ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3337 
3338         daplka_evd_wakeup(evd_rp, NULL, NULL);
3339 
3340         DAPLKA_RS_UNREF(evd_rp);
3341 
3342         return (retval);
3343 }
3344 
3345 /* ARGSUSED */
3346 static int
3347 daplka_evd_modify_cno(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3348         cred_t *cred, int *rvalp)
3349 {
3350         dapl_evd_modify_cno_t   args;
3351         daplka_evd_resource_t   *evd_rp;
3352         daplka_cno_resource_t   *cno_rp;
3353         daplka_cno_resource_t   *old_cno_rp;
3354         int                     retval;
3355 
3356         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_modify_cno_t),
3357             mode);
3358         if (retval != 0) {
3359                 DERR("evd_modify_cno: copyin error %d\n", retval);
3360                 return (EFAULT);
3361         }
3362 
3363         /* get evd resource */
3364         evd_rp = (daplka_evd_resource_t *)
3365             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evmc_hkey);
3366         if (evd_rp == NULL) {
3367                 DERR("evd_modify_cno: cannot find evd resource\n");
3368                 retval = EINVAL;
3369                 goto cleanup;
3370         }
3371         ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3372 
3373         if (args.evmc_cno_hkey > 0) {
3374                 /* get cno resource corresponding to the new CNO */
3375                 cno_rp = (daplka_cno_resource_t *)
3376                     daplka_hash_lookup(&ia_rp->ia_cno_htbl,
3377                     args.evmc_cno_hkey);
3378                 if (cno_rp == NULL) {
3379                         DERR("evd_modify_cno: cannot find CNO resource\n");
3380                         retval = EINVAL;
3381                         goto cleanup;
3382                 }
3383                 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3384         } else {
3385                 cno_rp = NULL;
3386         }
3387 
3388         mutex_enter(&evd_rp->evd_lock);
3389         old_cno_rp = evd_rp->evd_cno_res;
3390         evd_rp->evd_cno_res = cno_rp;
3391         mutex_exit(&evd_rp->evd_lock);
3392 
3393         /*
3394          * drop the refcnt on the old CNO, the refcnt on the new CNO is
3395          * retained since the evd holds a reference to it.
3396          */
3397         if (old_cno_rp) {
3398                 DAPLKA_RS_UNREF(old_cno_rp);
3399         }
3400 
3401 cleanup:
3402         if (evd_rp) {
3403                 DAPLKA_RS_UNREF(evd_rp);
3404         }
3405 
3406         return (retval);
3407 }
3408 
3409 /*
3410  * Frees the EVD and associated resources.
3411  * If there are other threads still using this EVD, the destruction
3412  * will defer until the EVD's refcnt drops to zero.
3413  */
3414 /* ARGSUSED */
3415 static int
3416 daplka_evd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3417         cred_t *cred, int *rvalp)
3418 {
3419         daplka_evd_resource_t   *evd_rp = NULL;
3420         daplka_async_evd_hkey_t *curr;
3421         daplka_async_evd_hkey_t *prev;
3422         dapl_evd_free_t         args;
3423         int                     retval = 0;
3424 
3425         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_free_t), mode);
3426         if (retval != 0) {
3427                 DERR("evd_free: copyin error %d\n", retval);
3428                 return (EFAULT);
3429         }
3430         retval = daplka_hash_remove(&ia_rp->ia_evd_htbl, args.evf_hkey,
3431             (void **)&evd_rp);
3432         if (retval != 0 || evd_rp == NULL) {
3433                 DERR("evd_free: cannot find evd resource\n");
3434                 return (EINVAL);
3435         }
3436         ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3437 
3438         /* If this is an async evd remove it from the IA's async evd list */
3439         if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
3440                 mutex_enter(&ia_rp->ia_lock);
3441                 curr = prev = ia_rp->ia_async_evd_hkeys;
3442                 while (curr != NULL) {
3443                         if (curr->aeh_evd_hkey == args.evf_hkey) {
3444                                 /* unlink curr from the list */
3445                                 if (curr == prev) {
3446                                         /*
3447                                          * if first element in the list update
3448                                          * the list head
3449                                          */
3450                                         ia_rp->ia_async_evd_hkeys =
3451                                             curr->aeh_next;
3452                                 } else {
3453                                         prev->aeh_next = curr->aeh_next;
3454                                 }
3455                                 break;
3456                         }
3457                         prev = curr;
3458                         curr = curr->aeh_next;
3459                 }
3460                 mutex_exit(&ia_rp->ia_lock);
3461                 /* free the curr entry */
3462                 kmem_free(curr, sizeof (daplka_async_evd_hkey_t));
3463         }
3464 
3465         /* UNREF calls the actual free function when refcnt is zero */
3466         DAPLKA_RS_UNREF(evd_rp);
3467         return (0);
3468 }
3469 
3470 /*
3471  * destroys EVD resource.
3472  * called when refcnt drops to zero.
3473  */
3474 static int
3475 daplka_evd_destroy(daplka_resource_t *gen_rp)
3476 {
3477         daplka_evd_resource_t   *evd_rp = (daplka_evd_resource_t *)gen_rp;
3478         ibt_status_t            status;
3479         daplka_evd_event_t      *evt;
3480         ibt_priv_data_len_t     len;
3481 
3482         D3("evd_destroy: entering, evd_rp 0x%p, rnum %d\n",
3483             evd_rp, DAPLKA_RS_RNUM(evd_rp));
3484         /*
3485          * free CQ
3486          */
3487         if (evd_rp->evd_cq_hdl) {
3488                 ibt_set_cq_handler(evd_rp->evd_cq_hdl, NULL, NULL);
3489                 mutex_enter(&daplka_dev->daplka_mutex);
3490                 ibt_set_cq_private(evd_rp->evd_cq_hdl, NULL);
3491                 mutex_exit(&daplka_dev->daplka_mutex);
3492 
3493                 status = daplka_ibt_free_cq(evd_rp, evd_rp->evd_cq_hdl);
3494                 if (status != IBT_SUCCESS) {
3495                         DERR("evd_destroy: ibt_free_cq returned %d\n", status);
3496                 }
3497                 evd_rp->evd_cq_hdl = NULL;
3498                 D2("evd_destroy: cq freed, rnum %d\n", DAPLKA_RS_RNUM(evd_rp));
3499         }
3500 
3501         /*
3502          * release reference on CNO
3503          */
3504         if (evd_rp->evd_cno_res != NULL) {
3505                 mutex_enter(&evd_rp->evd_cno_res->cno_lock);
3506                 if (evd_rp->evd_cno_res->cno_evd_cookie ==
3507                     evd_rp->evd_cookie) {
3508                         evd_rp->evd_cno_res->cno_evd_cookie = 0;
3509                 }
3510                 mutex_exit(&evd_rp->evd_cno_res->cno_lock);
3511                 DAPLKA_RS_UNREF(evd_rp->evd_cno_res);
3512                 evd_rp->evd_cno_res = NULL;
3513         }
3514 
3515         /*
3516          * discard all remaining events
3517          */
3518         mutex_enter(&evd_rp->evd_lock);
3519         while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_cr_events))) {
3520                 D2("evd_destroy: discarding CR event: %d\n",
3521                     evt->ee_cmev.ec_cm_ev_type);
3522                 len = evt->ee_cmev.ec_cm_ev_priv_data_len;
3523                 if (len > 0) {
3524                         kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
3525                         evt->ee_cmev.ec_cm_ev_priv_data = NULL;
3526                         evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
3527                 }
3528                 kmem_free(evt, sizeof (*evt));
3529         }
3530         ASSERT(evd_rp->evd_cr_events.eel_num_elements == 0);
3531 
3532         while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_conn_events))) {
3533                 D2("evd_destroy: discarding CONN event: %d\n",
3534                     evt->ee_cmev.ec_cm_ev_type);
3535                 len = evt->ee_cmev.ec_cm_ev_priv_data_len;
3536                 if (len > 0) {
3537                         kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
3538                         evt->ee_cmev.ec_cm_ev_priv_data = NULL;
3539                         evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
3540                 }
3541                 kmem_free(evt, sizeof (*evt));
3542         }
3543         ASSERT(evd_rp->evd_conn_events.eel_num_elements == 0);
3544 
3545         while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_async_events))) {
3546                 DERR("evd_destroy: discarding ASYNC event: %d\n",
3547                     evt->ee_aev.ibae_type);
3548                 kmem_free(evt, sizeof (*evt));
3549         }
3550         ASSERT(evd_rp->evd_async_events.eel_num_elements == 0);
3551         mutex_exit(&evd_rp->evd_lock);
3552 
3553         mutex_destroy(&evd_rp->evd_lock);
3554         DAPLKA_RS_FINI(evd_rp);
3555         kmem_free(evd_rp, sizeof (daplka_evd_resource_t));
3556         D3("evd_destroy: exiting, evd_rp 0x%p\n", evd_rp);
3557         return (0);
3558 }
3559 
3560 static void
3561 daplka_hash_evd_free(void *obj)
3562 {
3563         daplka_evd_resource_t *evd_rp = (daplka_evd_resource_t *)obj;
3564 
3565         ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3566         DAPLKA_RS_UNREF(evd_rp);
3567 }
3568 
3569 /*
3570  * this handler fires when new completions arrive.
3571  */
3572 /* ARGSUSED */
3573 static void
3574 daplka_cq_handler(ibt_cq_hdl_t ibt_cq, void *arg)
3575 {
3576         D3("cq_handler: fired setting evd_newevents\n");
3577         daplka_evd_wakeup((daplka_evd_resource_t *)arg, NULL, NULL);
3578 }
3579 
3580 /*
3581  * this routine wakes up a client from evd_wait. if evtq and evt
3582  * are non-null, the event evt will be enqueued prior to waking
3583  * up the client. if the evd is associated with a CNO and if there
3584  * are no waiters on the evd, the CNO will be notified.
3585  */
3586 static void
3587 daplka_evd_wakeup(daplka_evd_resource_t *evd_rp, daplka_evd_event_list_t *evtq,
3588         daplka_evd_event_t *evt)
3589 {
3590         uint32_t waiters = 0;
3591 
3592         mutex_enter(&evd_rp->evd_lock);
3593         if (evtq != NULL && evt != NULL) {
3594                 ASSERT(evtq == &evd_rp->evd_cr_events ||
3595                     evtq == &evd_rp->evd_conn_events ||
3596                     evtq == &evd_rp->evd_async_events);
3597                 daplka_evd_event_enqueue(evtq, evt);
3598                 ASSERT((evtq->eel_event_type == DAPLKA_EVD_CM_EVENTS) ||
3599                     (evtq->eel_event_type == DAPLKA_EVD_ASYNC_EVENTS));
3600                 evd_rp->evd_newevents |= evtq->eel_event_type;
3601         } else {
3602                 evd_rp->evd_newevents |= DAPLKA_EVD_ULAND_EVENTS;
3603         }
3604         waiters = evd_rp->evd_waiters;
3605         cv_broadcast(&evd_rp->evd_cv);
3606         mutex_exit(&evd_rp->evd_lock);
3607 
3608         /*
3609          * only wakeup the CNO if there are no waiters on this evd.
3610          */
3611         if (evd_rp->evd_cno_res != NULL && waiters == 0) {
3612                 mutex_enter(&evd_rp->evd_cno_res->cno_lock);
3613                 evd_rp->evd_cno_res->cno_evd_cookie = evd_rp->evd_cookie;
3614                 cv_broadcast(&evd_rp->evd_cno_res->cno_cv);
3615                 mutex_exit(&evd_rp->evd_cno_res->cno_lock);
3616         }
3617 }
3618 
3619 /*
3620  * daplka_evd_event_enqueue adds elem to the end of the event list
3621  * The caller is expected to acquire appropriate locks before
3622  * calling enqueue
3623  */
3624 static void
3625 daplka_evd_event_enqueue(daplka_evd_event_list_t *evlist,
3626     daplka_evd_event_t *elem)
3627 {
3628         if (evlist->eel_tail) {
3629                 evlist->eel_tail->ee_next = elem;
3630                 evlist->eel_tail = elem;
3631         } else {
3632                 /* list is empty */
3633                 ASSERT(evlist->eel_head == NULL);
3634                 evlist->eel_head = elem;
3635                 evlist->eel_tail = elem;
3636         }
3637         evlist->eel_num_elements++;
3638 }
3639 
3640 /*
3641  * daplka_evd_event_dequeue removes and returns the first element of event
3642  * list. NULL is returned if the list is empty. The caller is expected to
3643  * acquire appropriate locks before calling enqueue.
3644  */
3645 static daplka_evd_event_t *
3646 daplka_evd_event_dequeue(daplka_evd_event_list_t *evlist)
3647 {
3648         daplka_evd_event_t *head;
3649 
3650         head = evlist->eel_head;
3651         if (head == NULL) {
3652                 return (NULL);
3653         }
3654 
3655         evlist->eel_head = head->ee_next;
3656         evlist->eel_num_elements--;
3657         /* if it was the last element update the tail pointer too */
3658         if (evlist->eel_head == NULL) {
3659                 ASSERT(evlist->eel_num_elements == 0);
3660                 evlist->eel_tail = NULL;
3661         }
3662         return (head);
3663 }
3664 
3665 /*
3666  * A CNO allows the client to wait for notifications from multiple EVDs.
3667  * To use a CNO, the client needs to follow the procedure below:
3668  * 1. allocate a CNO. this returns a cno_hkey that identifies the CNO.
3669  * 2. create one or more EVDs using the returned cno_hkey.
3670  * 3. call cno_wait. when one of the associated EVDs get notified, the
3671  *    CNO will also get notified. cno_wait will then return with a
3672  *    evd_cookie identifying the EVD that triggered the event.
3673  *
3674  * A note about cno_wait:
3675  * -unlike a EVD, a CNO does not maintain a queue of notifications. For
3676  *  example, suppose multiple EVDs triggered a CNO before the client calls
3677  *  cno_wait; when the client calls cno_wait, it will return with the
3678  *  evd_cookie that identifies the *last* EVD that triggered the CNO. It
3679  *  is the responsibility of the client, upon returning from cno_wait, to
3680  *  check on all EVDs that can potentially trigger the CNO. the returned
3681  *  evd_cookie is only meant to be a hint. there is no guarantee that the
3682  *  EVD identified by the evd_cookie still contains an event or still
3683  *  exists by the time cno_wait returns.
3684  */
3685 
3686 /*
3687  * allocates a CNO.
3688  * the returned cno_hkey may subsequently be used in evd_create.
3689  */
3690 /* ARGSUSED */
3691 static int
3692 daplka_cno_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3693         cred_t *cred, int *rvalp)
3694 {
3695         dapl_cno_alloc_t        args;
3696         daplka_cno_resource_t   *cno_rp = NULL;
3697         uint64_t                cno_hkey = 0;
3698         boolean_t               inserted = B_FALSE;
3699         int                     retval = 0;
3700 
3701         cno_rp = kmem_zalloc(sizeof (*cno_rp), daplka_km_flags);
3702         if (cno_rp == NULL) {
3703                 DERR("cno_alloc: cannot allocate cno resource\n");
3704                 return (ENOMEM);
3705         }
3706         DAPLKA_RS_INIT(cno_rp, DAPL_TYPE_CNO,
3707             DAPLKA_RS_RNUM(ia_rp), daplka_cno_destroy);
3708 
3709         mutex_init(&cno_rp->cno_lock, NULL, MUTEX_DRIVER, NULL);
3710         cv_init(&cno_rp->cno_cv, NULL, CV_DRIVER, NULL);
3711         cno_rp->cno_evd_cookie = 0;
3712 
3713         /* insert into cno hash table */
3714         retval = daplka_hash_insert(&ia_rp->ia_cno_htbl,
3715             &cno_hkey, (void *)cno_rp);
3716         if (retval != 0) {
3717                 DERR("cno_alloc: cannot insert cno resource\n");
3718                 goto cleanup;
3719         }
3720         inserted = B_TRUE;
3721 
3722         /* return hkey to library */
3723         args.cno_hkey = cno_hkey;
3724 
3725         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cno_alloc_t),
3726             mode);
3727         if (retval != 0) {
3728                 DERR("cno_alloc: copyout error %d\n", retval);
3729                 retval = EFAULT;
3730                 goto cleanup;
3731         }
3732         return (0);
3733 
3734 cleanup:;
3735         if (inserted) {
3736                 daplka_cno_resource_t *free_rp = NULL;
3737 
3738                 (void) daplka_hash_remove(&ia_rp->ia_cno_htbl, cno_hkey,
3739                     (void **)&free_rp);
3740                 if (free_rp != cno_rp) {
3741                         DERR("cno_alloc: cannot remove cno\n");
3742                         /*
3743                          * we can only get here if another thread
3744                          * has completed the cleanup in cno_free
3745                          */
3746                         return (retval);
3747                 }
3748         }
3749         DAPLKA_RS_UNREF(cno_rp);
3750         return (retval);
3751 }
3752 
3753 /*
3754  * destroys a CNO.
3755  * this gets called when a CNO resource's refcnt drops to zero.
3756  */
3757 static int
3758 daplka_cno_destroy(daplka_resource_t *gen_rp)
3759 {
3760         daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)gen_rp;
3761 
3762         ASSERT(DAPLKA_RS_REFCNT(cno_rp) == 0);
3763         D2("cno_destroy: entering, cno_rp %p, rnum %d\n",
3764             cno_rp, DAPLKA_RS_RNUM(cno_rp));
3765 
3766         ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3767         cv_destroy(&cno_rp->cno_cv);
3768         mutex_destroy(&cno_rp->cno_lock);
3769 
3770         DAPLKA_RS_FINI(cno_rp);
3771         kmem_free(cno_rp, sizeof (daplka_cno_resource_t));
3772         D2("cno_destroy: exiting, cno_rp %p\n", cno_rp);
3773         return (0);
3774 }
3775 
3776 static void
3777 daplka_hash_cno_free(void *obj)
3778 {
3779         daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)obj;
3780 
3781         ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3782         DAPLKA_RS_UNREF(cno_rp);
3783 }
3784 
3785 /*
3786  * removes the CNO from the cno hash table and frees the CNO
3787  * if there are no references to it. if there are references to
3788  * it, the CNO will be destroyed when the last of the references
3789  * is released. once the CNO is removed from the cno hash table,
3790  * the client will no longer be able to call cno_wait on the CNO.
3791  */
3792 /* ARGSUSED */
3793 static int
3794 daplka_cno_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3795         cred_t *cred, int *rvalp)
3796 {
3797         daplka_cno_resource_t   *cno_rp = NULL;
3798         dapl_cno_free_t         args;
3799         int                     retval = 0;
3800 
3801         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_free_t), mode);
3802         if (retval != 0) {
3803                 DERR("cno_free: copyin error %d\n", retval);
3804                 return (EINVAL);
3805         }
3806 
3807         retval = daplka_hash_remove(&ia_rp->ia_cno_htbl,
3808             args.cnf_hkey, (void **)&cno_rp);
3809         if (retval != 0 || cno_rp == NULL) {
3810                 DERR("cno_free: cannot find cno resource\n");
3811                 return (EINVAL);
3812         }
3813         ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3814 
3815         /* UNREF calls the actual free function when refcnt is zero */
3816         DAPLKA_RS_UNREF(cno_rp);
3817         return (0);
3818 }
3819 
3820 /*
3821  * wait for a notification from one of the associated EVDs.
3822  */
3823 /* ARGSUSED */
3824 static int
3825 daplka_cno_wait(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3826         cred_t *cred, int *rvalp)
3827 {
3828         daplka_cno_resource_t   *cno_rp = NULL;
3829         dapl_cno_wait_t         args;
3830         int                     retval = 0;
3831         uint64_t                evd_cookie = 0;
3832         clock_t                 timeout, curr_time;
3833 
3834         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_wait_t), mode);
3835         if (retval != 0) {
3836                 DERR("cno_wait: copyin error %d\n", retval);
3837                 return (EINVAL);
3838         }
3839         /* get cno resource */
3840         cno_rp = (daplka_cno_resource_t *)
3841             daplka_hash_lookup(&ia_rp->ia_cno_htbl, args.cnw_hkey);
3842         if (cno_rp == NULL) {
3843                 DERR("cno_wait: cannot find cno resource\n");
3844                 return (EINVAL);
3845         }
3846         ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3847 
3848         curr_time = ddi_get_lbolt();
3849         timeout = curr_time + drv_usectohz(args.cnw_timeout);
3850 
3851         /*
3852          * use the max value if we wrapped around
3853          */
3854         if (args.cnw_timeout > 0 && timeout <= curr_time) {
3855                 /*
3856                  * clock_t (size long) changes between 32 and 64-bit kernels
3857                  */
3858                 timeout = LONG_MAX >> 4;
3859         }
3860         mutex_enter(&cno_rp->cno_lock);
3861         while (cno_rp->cno_evd_cookie == 0) {
3862                 int rval = 0;
3863 
3864                 rval = cv_timedwait_sig(&cno_rp->cno_cv,
3865                     &cno_rp->cno_lock, timeout);
3866                 if (rval == 0) {
3867                         DERR("cno_wait: interrupted\n");
3868                         mutex_exit(&cno_rp->cno_lock);
3869                         retval = EINTR;
3870                         goto cleanup;
3871                 } else if (rval == -1) {
3872                         DERR("cno_wait: timed out\n");
3873                         mutex_exit(&cno_rp->cno_lock);
3874                         retval = ETIME;
3875                         goto cleanup;
3876                 }
3877         }
3878         evd_cookie = cno_rp->cno_evd_cookie;
3879         cno_rp->cno_evd_cookie = 0;
3880         mutex_exit(&cno_rp->cno_lock);
3881 
3882         ASSERT(evd_cookie != 0);
3883         D2("cno_wait: returning evd_cookie 0x%p\n",
3884             (void *)(uintptr_t)evd_cookie);
3885         args.cnw_evd_cookie = evd_cookie;
3886         retval = ddi_copyout((void *)&args, (void *)arg,
3887             sizeof (dapl_cno_wait_t), mode);
3888         if (retval != 0) {
3889                 DERR("cno_wait: copyout error %d\n", retval);
3890                 retval = EFAULT;
3891                 goto cleanup;
3892         }
3893 
3894 cleanup:;
3895         if (cno_rp != NULL) {
3896                 DAPLKA_RS_UNREF(cno_rp);
3897         }
3898         return (retval);
3899 }
3900 
3901 /*
3902  * this function is called by the client when it decides to
3903  * accept a connection request. a connection request is generated
3904  * when the active side generates REQ MAD to a service point on
3905  * the destination node. this causes the CM service handler
3906  * (daplka_cm_service_req) on the passive side to be callee. This
3907  * handler will then enqueue this connection request to the backlog
3908  * array of the service point. A connection event containing the
3909  * backlog array index and connection request private data is passed
3910  * to the client's service point EVD (sp_evd_res). once the event
3911  * is passed up to the userland, the client may examine the request
3912  * to decide whether to call daplka_cr_accept or dapka_cr_reject.
3913  */
3914 /* ARGSUSED */
3915 static int
3916 daplka_cr_accept(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3917         cred_t *cred, int *rvalp)
3918 {
3919         daplka_ep_resource_t            *ep_rp = NULL;
3920         daplka_sp_resource_t            *sp_rp = NULL;
3921         dapl_cr_accept_t                args;
3922         daplka_sp_conn_pend_t           *conn;
3923         ibt_cm_proceed_reply_t          proc_reply;
3924         ibt_status_t                    status;
3925         uint16_t                        bkl_index;
3926         uint32_t                        old_state, new_state;
3927         int                             retval = 0;
3928         void                            *priv_data = NULL, *sid;
3929 
3930         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_accept_t),
3931             mode);
3932         if (retval != 0) {
3933                 DERR("cr_accept: copyin error %d\n", retval);
3934                 return (EFAULT);
3935         }
3936         if (args.cra_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
3937                 DERR("cr_accept: private data len (%d) exceeded "
3938                     "max size %d\n", args.cra_priv_sz,
3939                     DAPL_MAX_PRIVATE_DATA_SIZE);
3940                 return (EINVAL);
3941         }
3942         priv_data = (args.cra_priv_sz > 0) ? (void *)args.cra_priv : NULL;
3943 
3944         D2("cr_accept: priv(0x%p) priv_len(%u) psep(0x%llx)\n", priv_data,
3945             args.cra_priv_sz, (longlong_t)args.cra_bkl_cookie);
3946 
3947         /* get sp resource */
3948         sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
3949             args.cra_sp_hkey);
3950         if (sp_rp == NULL) {
3951                 DERR("cr_accept: cannot find sp resource\n");
3952                 return (EINVAL);
3953         }
3954         ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
3955 
3956         /* get ep resource */
3957         ep_rp = (daplka_ep_resource_t *)daplka_hash_lookup(&ia_rp->ia_ep_htbl,
3958             args.cra_ep_hkey);
3959         if (ep_rp == NULL) {
3960                 DERR("cr_accept: cannot find ep resource\n");
3961                 retval = EINVAL;
3962                 goto cleanup;
3963         }
3964         ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
3965 
3966         /*
3967          * accept is only allowed if ep_state is CLOSED.
3968          * note that after this point, the ep_state is frozen
3969          * (i.e. TRANSITIONING) until we transition ep_state
3970          * to ACCEPTING or back to CLOSED if we get an error.
3971          */
3972         new_state = old_state = daplka_ep_get_state(ep_rp);
3973         if (old_state != DAPLKA_EP_STATE_CLOSED) {
3974                 DERR("cr_accept: invalid ep state %d\n", old_state);
3975                 retval = EINVAL;
3976                 goto cleanup;
3977         }
3978 
3979         mutex_enter(&sp_rp->sp_lock);
3980         bkl_index = DAPLKA_GET_PSEP_INDEX(args.cra_bkl_cookie);
3981         /*
3982          * make sure the backlog index is not bogus.
3983          */
3984         if (bkl_index >= sp_rp->sp_backlog_size) {
3985                 DERR("cr_accept: invalid backlog index 0x%llx %d\n",
3986                     (longlong_t)args.cra_bkl_cookie, bkl_index);
3987                 mutex_exit(&sp_rp->sp_lock);
3988                 retval = EINVAL;
3989                 goto cleanup;
3990         }
3991         /*
3992          * make sure the backlog index indeed refers
3993          * to a pending connection.
3994          */
3995         conn = &sp_rp->sp_backlog[bkl_index];
3996         if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
3997                 DERR("cr_accept: invalid conn state %d\n",
3998                     conn->spcp_state);
3999                 mutex_exit(&sp_rp->sp_lock);
4000                 retval = EINVAL;
4001                 goto cleanup;
4002         }
4003         if (conn->spcp_sid == NULL) {
4004                 DERR("cr_accept: sid == NULL\n");
4005                 mutex_exit(&sp_rp->sp_lock);
4006                 retval = EINVAL;
4007                 goto cleanup;
4008         }
4009         if (ep_rp->ep_chan_hdl == NULL) {
4010                 /*
4011                  * a ep_rp with a NULL chan_hdl is impossible.
4012                  */
4013                 DERR("cr_accept: ep_chan_hdl == NULL\n");
4014                 mutex_exit(&sp_rp->sp_lock);
4015                 ASSERT(B_FALSE);
4016                 retval = EINVAL;
4017                 goto cleanup;
4018         }
4019         proc_reply.rep.cm_channel = ep_rp->ep_chan_hdl;
4020         proc_reply.rep.cm_rdma_ra_out = conn->spcp_rdma_ra_out;
4021         proc_reply.rep.cm_rdma_ra_in = conn->spcp_rdma_ra_in;
4022         proc_reply.rep.cm_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;
4023         sid = conn->spcp_sid;
4024 
4025         /*
4026          * this clears our slot in the backlog array.
4027          * this slot may now be used by other pending connections.
4028          */
4029         conn->spcp_sid = NULL;
4030         conn->spcp_state = DAPLKA_SPCP_INIT;
4031         conn->spcp_req_len = 0;
4032         mutex_exit(&sp_rp->sp_lock);
4033 
4034         /*
4035          * Set the unique cookie corresponding to the CR to this EP
4036          * so that is can be used in passive side CM callbacks
4037          */
4038         ep_rp->ep_psep_cookie = args.cra_bkl_cookie;
4039 
4040         status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, IBT_CM_ACCEPT,
4041             &proc_reply, priv_data, (ibt_priv_data_len_t)args.cra_priv_sz);
4042 
4043         if (status != IBT_SUCCESS) {
4044                 DERR("cr_accept: ibt_cm_proceed returned %d\n", status);
4045                 *rvalp = (int)status;
4046                 retval = 0;
4047         }
4048         /*
4049          * note that the CM handler may actually be called at this
4050          * point. but since ep_state is still in TRANSITIONING, the
4051          * handler will wait until we transition to ACCEPTING. this
4052          * prevents the case where we set ep_state to ACCEPTING after
4053          * daplka_service_conn_est sets ep_state to CONNECTED.
4054          */
4055         new_state = DAPLKA_EP_STATE_ACCEPTING;
4056 
4057 cleanup:;
4058         if (sp_rp != NULL) {
4059                 DAPLKA_RS_UNREF(sp_rp);
4060         }
4061         if (ep_rp != NULL) {
4062                 daplka_ep_set_state(ep_rp, old_state, new_state);
4063                 DAPLKA_RS_UNREF(ep_rp);
4064         }
4065         return (retval);
4066 }
4067 
4068 /*
4069  * this function is called by the client to reject a
4070  * connection request.
4071  */
4072 /* ARGSUSED */
4073 static int
4074 daplka_cr_reject(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4075         cred_t *cred, int *rvalp)
4076 {
4077         dapl_cr_reject_t        args;
4078         daplka_sp_resource_t    *sp_rp = NULL;
4079         daplka_sp_conn_pend_t   *conn;
4080         ibt_cm_proceed_reply_t  proc_reply;
4081         ibt_cm_status_t         proc_status;
4082         ibt_status_t            status;
4083         uint16_t                bkl_index;
4084         int                     retval = 0;
4085         void                    *sid;
4086 
4087         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_reject_t),
4088             mode);
4089         if (retval != 0) {
4090                 DERR("cr_reject: copyin error %d\n", retval);
4091                 return (EFAULT);
4092         }
4093         /* get sp resource */
4094         sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
4095             args.crr_sp_hkey);
4096         if (sp_rp == NULL) {
4097                 DERR("cr_reject: cannot find sp resource\n");
4098                 return (EINVAL);
4099         }
4100         ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4101 
4102         D2("cr_reject: psep(0x%llx)\n", (longlong_t)args.crr_bkl_cookie);
4103 
4104         mutex_enter(&sp_rp->sp_lock);
4105         bkl_index = DAPLKA_GET_PSEP_INDEX(args.crr_bkl_cookie);
4106         /*
4107          * make sure the backlog index is not bogus.
4108          */
4109         if (bkl_index >= sp_rp->sp_backlog_size) {
4110                 DERR("cr_reject: invalid backlog index 0x%llx %d\n",
4111                     (longlong_t)args.crr_bkl_cookie, bkl_index);
4112                 mutex_exit(&sp_rp->sp_lock);
4113                 retval = EINVAL;
4114                 goto cleanup;
4115         }
4116         /*
4117          * make sure the backlog index indeed refers
4118          * to a pending connection.
4119          */
4120         conn = &sp_rp->sp_backlog[bkl_index];
4121         if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4122                 DERR("cr_reject: invalid conn state %d\n",
4123                     conn->spcp_state);
4124                 mutex_exit(&sp_rp->sp_lock);
4125                 retval = EINVAL;
4126                 goto cleanup;
4127         }
4128         if (conn->spcp_sid == NULL) {
4129                 DERR("cr_reject: sid == NULL\n");
4130                 mutex_exit(&sp_rp->sp_lock);
4131                 retval = EINVAL;
4132                 goto cleanup;
4133         }
4134         bzero(&proc_reply, sizeof (proc_reply));
4135         sid = conn->spcp_sid;
4136 
4137         /*
4138          * this clears our slot in the backlog array.
4139          * this slot may now be used by other pending connections.
4140          */
4141         conn->spcp_sid = NULL;
4142         conn->spcp_state = DAPLKA_SPCP_INIT;
4143         conn->spcp_req_len = 0;
4144 
4145         switch (args.crr_reason) {
4146         case DAPL_IB_CM_REJ_REASON_CONSUMER_REJ:
4147                 /* results in IBT_CM_CONSUMER as the reason for reject */
4148                 proc_status = IBT_CM_REJECT;
4149                 break;
4150         case DAPL_IB_CME_LOCAL_FAILURE:
4151                 /*FALLTHRU*/
4152         case DAPL_IB_CME_DESTINATION_UNREACHABLE:
4153                 /* results in IBT_CM_NO_RESC as the reason for reject */
4154                 proc_status = IBT_CM_NO_RESOURCE;
4155                 break;
4156         default:
4157                 /* unexpect reason code */
4158                 ASSERT(!"unexpected reject reason code");
4159                 proc_status = IBT_CM_NO_RESOURCE;
4160                 break;
4161         }
4162 
4163         mutex_exit(&sp_rp->sp_lock);
4164 
4165         status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, proc_status,
4166             &proc_reply, NULL, 0);
4167 
4168         if (status != IBT_SUCCESS) {
4169                 DERR("cr_reject: ibt_cm_proceed returned %d\n", status);
4170                 *rvalp = (int)status;
4171                 retval = 0;
4172         }
4173 
4174 cleanup:;
4175         if (sp_rp != NULL) {
4176                 DAPLKA_RS_UNREF(sp_rp);
4177         }
4178         return (retval);
4179 }
4180 
4181 
4182 /*
4183  * daplka_sp_match is used by daplka_hash_walk for finding SPs
4184  */
4185 typedef struct daplka_sp_match_s {
4186         uint64_t                spm_conn_qual;
4187         daplka_sp_resource_t    *spm_sp_rp;
4188 } daplka_sp_match_t;
4189 
4190 static int
4191 daplka_sp_match(void *objp, void *arg)
4192 {
4193         daplka_sp_resource_t    *sp_rp = (daplka_sp_resource_t *)objp;
4194 
4195         ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4196         if (sp_rp->sp_conn_qual ==
4197             ((daplka_sp_match_t *)arg)->spm_conn_qual) {
4198                 ((daplka_sp_match_t *)arg)->spm_sp_rp = sp_rp;
4199                 D2("daplka_sp_match: found sp, conn_qual %016llu\n",
4200                     (longlong_t)((daplka_sp_match_t *)arg)->spm_conn_qual);
4201                 DAPLKA_RS_REF(sp_rp);
4202                 return (1);
4203         }
4204         return (0);
4205 }
4206 
4207 /*
4208  * cr_handoff allows the client to handoff a connection request from
4209  * one service point to another.
4210  */
4211 /* ARGSUSED */
4212 static int
4213 daplka_cr_handoff(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4214         cred_t *cred, int *rvalp)
4215 {
4216         dapl_cr_handoff_t               args;
4217         daplka_sp_resource_t            *sp_rp = NULL, *new_sp_rp = NULL;
4218         daplka_sp_conn_pend_t           *conn;
4219         daplka_sp_match_t               sp_match;
4220         ibt_cm_event_t                  fake_event;
4221         ibt_cm_status_t                 cm_status;
4222         ibt_status_t                    status;
4223         uint16_t                        bkl_index;
4224         void                            *sid, *priv = NULL;
4225         int                             retval = 0, priv_len = 0;
4226 
4227         D3("cr_handoff: entering\n");
4228         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_handoff_t),
4229             mode);
4230         if (retval != 0) {
4231                 DERR("cr_handoff: copyin error %d\n", retval);
4232                 return (EFAULT);
4233         }
4234         /* get sp resource */
4235         sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
4236             args.crh_sp_hkey);
4237         if (sp_rp == NULL) {
4238                 DERR("cr_handoff: cannot find sp resource\n");
4239                 return (EINVAL);
4240         }
4241         ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4242 
4243         /*
4244          * find the destination service point.
4245          */
4246         sp_match.spm_conn_qual = args.crh_conn_qual;
4247         sp_match.spm_sp_rp = NULL;
4248         daplka_hash_walk(&daplka_global_sp_htbl, daplka_sp_match,
4249             (void *)&sp_match, RW_READER);
4250 
4251         /*
4252          * return if we cannot find the service point
4253          */
4254         if (sp_match.spm_sp_rp == NULL) {
4255                 DERR("cr_handoff: new sp not found, conn qual = %llu\n",
4256                     (longlong_t)args.crh_conn_qual);
4257                 retval = EINVAL;
4258                 goto cleanup;
4259         }
4260         new_sp_rp = sp_match.spm_sp_rp;
4261 
4262         /*
4263          * the spec does not discuss the security implications of this
4264          * function. to be safe, we currently only allow processes
4265          * owned by the same user to handoff connection requests
4266          * to each other.
4267          */
4268         if (crgetruid(cred) != new_sp_rp->sp_ruid) {
4269                 DERR("cr_handoff: permission denied\n");
4270                 retval = EPERM;
4271                 goto cleanup;
4272         }
4273 
4274         D2("cr_handoff: psep(0x%llx)\n", (longlong_t)args.crh_bkl_cookie);
4275 
4276         mutex_enter(&sp_rp->sp_lock);
4277         bkl_index = DAPLKA_GET_PSEP_INDEX(args.crh_bkl_cookie);
4278         /*
4279          * make sure the backlog index is not bogus.
4280          */
4281         if (bkl_index >= sp_rp->sp_backlog_size) {
4282                 DERR("cr_handoff: invalid backlog index 0x%llx %d\n",
4283                     (longlong_t)args.crh_bkl_cookie, bkl_index);
4284                 mutex_exit(&sp_rp->sp_lock);
4285                 retval = EINVAL;
4286                 goto cleanup;
4287         }
4288         /*
4289          * make sure the backlog index indeed refers
4290          * to a pending connection.
4291          */
4292         conn = &sp_rp->sp_backlog[bkl_index];
4293         if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4294                 DERR("cr_handoff: invalid conn state %d\n",
4295                     conn->spcp_state);
4296                 mutex_exit(&sp_rp->sp_lock);
4297                 retval = EINVAL;
4298                 goto cleanup;
4299         }
4300         if (conn->spcp_sid == NULL) {
4301                 DERR("cr_handoff: sid == NULL\n");
4302                 mutex_exit(&sp_rp->sp_lock);
4303                 retval = EINVAL;
4304                 goto cleanup;
4305         }
4306         sid = conn->spcp_sid;
4307         priv = NULL;
4308         priv_len = conn->spcp_req_len;
4309         if (priv_len > 0) {
4310                 priv = kmem_zalloc(priv_len, daplka_km_flags);
4311                 if (priv == NULL) {
4312                         mutex_exit(&sp_rp->sp_lock);
4313                         retval = ENOMEM;
4314                         goto cleanup;
4315                 }
4316                 bcopy(conn->spcp_req_data, priv, priv_len);
4317         }
4318         /*
4319          * this clears our slot in the backlog array.
4320          * this slot may now be used by other pending connections.
4321          */
4322         conn->spcp_sid = NULL;
4323         conn->spcp_state = DAPLKA_SPCP_INIT;
4324         conn->spcp_req_len = 0;
4325         mutex_exit(&sp_rp->sp_lock);
4326 
4327         /* fill fake_event and call service_req handler */
4328         bzero(&fake_event, sizeof (fake_event));
4329         fake_event.cm_type = IBT_CM_EVENT_REQ_RCV;
4330         fake_event.cm_session_id = sid;
4331         fake_event.cm_priv_data_len = priv_len;
4332         fake_event.cm_priv_data = priv;
4333 
4334         cm_status = daplka_cm_service_req(new_sp_rp,
4335             &fake_event, NULL, priv, (ibt_priv_data_len_t)priv_len);
4336         if (cm_status != IBT_CM_DEFER) {
4337                 ibt_cm_proceed_reply_t  proc_reply;
4338 
4339                 DERR("cr_handoff: service_req returned %d\n", cm_status);
4340                 /*
4341                  * if for some reason cm_service_req failed, we
4342                  * reject the connection.
4343                  */
4344                 bzero(&proc_reply, sizeof (proc_reply));
4345 
4346                 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid,
4347                     IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
4348                 if (status != IBT_SUCCESS) {
4349                         DERR("cr_handoff: ibt_cm_proceed returned %d\n",
4350                             status);
4351                 }
4352                 *rvalp = (int)status;
4353                 retval = 0;
4354         }
4355 
4356 cleanup:;
4357         if (priv_len > 0 && priv != NULL) {
4358                 kmem_free(priv, priv_len);
4359         }
4360         if (new_sp_rp != NULL) {
4361                 DAPLKA_RS_UNREF(new_sp_rp);
4362         }
4363         if (sp_rp != NULL) {
4364                 DAPLKA_RS_UNREF(sp_rp);
4365         }
4366         D3("cr_handoff: exiting\n");
4367         return (retval);
4368 }
4369 
4370 /*
4371  * returns a list of hca attributes
4372  */
4373 /* ARGSUSED */
4374 static int
4375 daplka_ia_query(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4376         cred_t *cred, int *rvalp)
4377 {
4378         dapl_ia_query_t         args;
4379         int                     retval;
4380         ibt_hca_attr_t          *hcap;
4381 
4382         hcap = &ia_rp->ia_hca->hca_attr;
4383 
4384         /*
4385          * Take the ibt_hca_attr_t and stuff them into dapl_hca_attr_t
4386          */
4387         args.hca_attr.dhca_vendor_id = hcap->hca_vendor_id;
4388         args.hca_attr.dhca_device_id = hcap->hca_device_id;
4389         args.hca_attr.dhca_version_id = hcap->hca_version_id;
4390         args.hca_attr.dhca_max_chans = hcap->hca_max_chans;
4391         args.hca_attr.dhca_max_chan_sz = hcap->hca_max_chan_sz;
4392         args.hca_attr.dhca_max_sgl = hcap->hca_max_sgl;
4393         args.hca_attr.dhca_max_cq = hcap->hca_max_cq;
4394         args.hca_attr.dhca_max_cq_sz = hcap->hca_max_cq_sz;
4395         args.hca_attr.dhca_max_memr = hcap->hca_max_memr;
4396         args.hca_attr.dhca_max_memr_len = hcap->hca_max_memr_len;
4397         args.hca_attr.dhca_max_mem_win = hcap->hca_max_mem_win;
4398         args.hca_attr.dhca_max_rdma_in_chan = hcap->hca_max_rdma_in_chan;
4399         args.hca_attr.dhca_max_rdma_out_chan = hcap->hca_max_rdma_out_chan;
4400         args.hca_attr.dhca_max_partitions  = hcap->hca_max_partitions;
4401         args.hca_attr.dhca_nports  = hcap->hca_nports;
4402         args.hca_attr.dhca_node_guid  = hcap->hca_node_guid;
4403         args.hca_attr.dhca_max_pd = hcap->hca_max_pd;
4404         args.hca_attr.dhca_max_srqs = hcap->hca_max_srqs;
4405         args.hca_attr.dhca_max_srqs_sz = hcap->hca_max_srqs_sz;
4406         args.hca_attr.dhca_max_srq_sgl = hcap->hca_max_srq_sgl;
4407 
4408         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ia_query_t),
4409             mode);
4410         if (retval != 0) {
4411                 DERR("ia_query: copyout error %d\n", retval);
4412                 return (EFAULT);
4413         }
4414         return (0);
4415 }
4416 
4417 /*
4418  * This routine is passed to hash walk in the daplka_pre_mr_cleanup_callback,
4419  * it frees the mw embedded in the mw resource object.
4420  */
4421 
4422 /* ARGSUSED */
4423 static int
4424 daplka_mr_cb_freemw(void *objp, void *arg)
4425 {
4426         daplka_mw_resource_t    *mw_rp = (daplka_mw_resource_t *)objp;
4427         ibt_mw_hdl_t            mw_hdl;
4428         ibt_status_t            status;
4429 
4430         D3("mr_cb_freemw: entering, mw_rp 0x%p\n", mw_rp);
4431         DAPLKA_RS_REF(mw_rp);
4432 
4433         mutex_enter(&mw_rp->mw_lock);
4434         mw_hdl = mw_rp->mw_hdl;
4435         /*
4436          * we set mw_hdl to NULL so it won't get freed again
4437          */
4438         mw_rp->mw_hdl = NULL;
4439         mutex_exit(&mw_rp->mw_lock);
4440 
4441         if (mw_hdl != NULL) {
4442                 status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl, mw_hdl);
4443                 if (status != IBT_SUCCESS) {
4444                         DERR("mr_cb_freemw: ibt_free_mw returned %d\n", status);
4445                 }
4446                 D3("mr_cb_freemw: mw freed\n");
4447         }
4448 
4449         DAPLKA_RS_UNREF(mw_rp);
4450         return (0);
4451 }
4452 
4453 /*
4454  * This routine is called from HCA driver's umem lock undo callback
4455  * when the memory associated with an MR is being unmapped. In this callback
4456  * we free all the MW associated with the IA and post an unaffiliated
4457  * async event to tell the app that there was a catastrophic event.
4458  * This allows the HCA to deregister the MR in its callback processing.
4459  */
4460 static void
4461 daplka_pre_mr_cleanup_callback(void *arg1, void *arg2 /*ARGSUSED*/)
4462 {
4463         daplka_mr_resource_t    *mr_rp;
4464         daplka_ia_resource_t    *ia_rp;
4465 #ifdef  _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4466         ibt_async_event_t       event;
4467         ibt_hca_attr_t          *hca_attrp;
4468 #endif
4469         minor_t                 rnum;
4470 
4471         mr_rp = (daplka_mr_resource_t *)arg1;
4472         rnum = DAPLKA_RS_RNUM(mr_rp);
4473         daplka_shared_mr_free(mr_rp);
4474 
4475         ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
4476         if (ia_rp == NULL) {
4477                 DERR("daplka_mr_unlock_callback: resource not found, rnum %d\n",
4478                     rnum);
4479                 return;
4480         }
4481 
4482         DERR("daplka_mr_unlock_callback: resource(%p) rnum(%d)\n", ia_rp, rnum);
4483 
4484         mutex_enter(&ia_rp->ia_lock);
4485         /*
4486          * MW is being alloced OR MW freeze has already begun. In
4487          * both these cases we wait for that to complete before
4488          * continuing.
4489          */
4490         while ((ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS) ||
4491             (ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS)) {
4492                 cv_wait(&ia_rp->ia_cv, &ia_rp->ia_lock);
4493         }
4494 
4495         switch (ia_rp->ia_state) {
4496         case DAPLKA_IA_INIT:
4497                 ia_rp->ia_state = DAPLKA_IA_MW_FREEZE_IN_PROGRESS;
4498                 mutex_exit(&ia_rp->ia_lock);
4499                 break;
4500         case DAPLKA_IA_MW_FROZEN:
4501                 /* the mw on this ia have been freed */
4502                 D2("daplka_mr_unlock_callback: ia_state %d nothing to do\n",
4503                     ia_rp->ia_state);
4504                 mutex_exit(&ia_rp->ia_lock);
4505                 goto cleanup;
4506         default:
4507                 ASSERT(!"daplka_mr_unlock_callback: IA state invalid");
4508                 DERR("daplka_mr_unlock_callback: invalid ia_state %d\n",
4509                     ia_rp->ia_state);
4510                 mutex_exit(&ia_rp->ia_lock);
4511                 goto cleanup;
4512         }
4513 
4514         /*
4515          * Walk the mw hash table and free the mws. Acquire a writer
4516          * lock since we don't want anyone else traversing this tree
4517          * while we are freeing the MW.
4518          */
4519         daplka_hash_walk(&ia_rp->ia_mw_htbl, daplka_mr_cb_freemw, NULL,
4520             RW_WRITER);
4521 
4522         mutex_enter(&ia_rp->ia_lock);
4523         ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS);
4524         ia_rp->ia_state = DAPLKA_IA_MW_FROZEN;
4525         cv_broadcast(&ia_rp->ia_cv);
4526         mutex_exit(&ia_rp->ia_lock);
4527 
4528         /*
4529          * Currently commented out because Oracle skgxp is incapable
4530          * of handling async events correctly.
4531          */
4532 #ifdef  _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4533         /*
4534          * Enqueue an unaffiliated async error event to indicate this
4535          * IA has encountered a problem that caused the MW to freed up
4536          */
4537 
4538         /* Create a fake event, only relevant field is the hca_guid */
4539         bzero(&event, sizeof (ibt_async_event_t));
4540         hca_attrp = &ia_rp->ia_hca->hca_attr;
4541         event.ev_hca_guid = hca_attrp->hca_node_guid;
4542 
4543         daplka_async_event_create(IBT_ERROR_LOCAL_CATASTROPHIC, &event, 0,
4544             ia_rp);
4545 #endif  /* _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB */
4546 
4547 cleanup:;
4548         D2("daplka_mr_unlock_callback: resource(%p) done\n", ia_rp);
4549         DAPLKA_RS_UNREF(ia_rp);
4550 }
4551 
4552 /*
4553  * registers a memory region.
4554  * memory locking will be done by the HCA driver.
4555  */
4556 /* ARGSUSED */
4557 static int
4558 daplka_mr_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4559         cred_t *cred, int *rvalp)
4560 {
4561         boolean_t                       inserted = B_FALSE;
4562         daplka_mr_resource_t            *mr_rp;
4563         daplka_pd_resource_t            *pd_rp;
4564         dapl_mr_register_t              args;
4565         ibt_mr_data_in_t                mr_cb_data_in;
4566         uint64_t                        mr_hkey = 0;
4567         ibt_status_t                    status;
4568         int                             retval;
4569 
4570         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_register_t),
4571             mode);
4572         if (retval != 0) {
4573                 DERR("mr_register: copyin error %d\n", retval);
4574                 return (EINVAL);
4575         }
4576         mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
4577         if (mr_rp == NULL) {
4578                 DERR("mr_register: cannot allocate mr resource\n");
4579                 return (ENOMEM);
4580         }
4581         DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
4582             DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
4583 
4584         mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
4585         mr_rp->mr_hca = ia_rp->ia_hca;
4586         mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
4587         mr_rp->mr_next = NULL;
4588         mr_rp->mr_shared_mr = NULL;
4589 
4590         /* get pd handle */
4591         pd_rp = (daplka_pd_resource_t *)
4592             daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mr_pd_hkey);
4593         if (pd_rp == NULL) {
4594                 DERR("mr_register: cannot find pd resource\n");
4595                 retval = EINVAL;
4596                 goto cleanup;
4597         }
4598         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
4599         mr_rp->mr_pd_res = pd_rp;
4600 
4601         mr_rp->mr_attr.mr_vaddr = args.mr_vaddr;
4602         mr_rp->mr_attr.mr_len = args.mr_len;
4603         mr_rp->mr_attr.mr_as = curproc->p_as;
4604         mr_rp->mr_attr.mr_flags = args.mr_flags | IBT_MR_NOSLEEP;
4605 
4606         D3("mr_register: mr_vaddr %p, mr_len %llu, mr_flags 0x%x\n",
4607             (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
4608             (longlong_t)mr_rp->mr_attr.mr_len,
4609             mr_rp->mr_attr.mr_flags);
4610 
4611         status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
4612             mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr, &mr_rp->mr_hdl,
4613             &mr_rp->mr_desc);
4614 
4615         if (status != IBT_SUCCESS) {
4616                 DERR("mr_register: ibt_register_mr error %d\n", status);
4617                 *rvalp = (int)status;
4618                 retval = 0;
4619                 goto cleanup;
4620         }
4621 
4622         mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
4623         mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
4624         mr_cb_data_in.mr_arg1 = (void *)mr_rp;
4625         mr_cb_data_in.mr_arg2 = NULL;
4626 
4627         /* Pass the service driver mr cleanup handler to the hca driver */
4628         status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
4629             IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
4630             &mr_cb_data_in, sizeof (mr_cb_data_in));
4631 
4632         if (status != IBT_SUCCESS) {
4633                 DERR("mr_register: ibt_ci_data_in error(%d) ver(%d)",
4634                     status, mr_cb_data_in.mr_rev);
4635                 *rvalp = (int)status;
4636                 retval = 0;
4637                 goto cleanup;
4638         }
4639 
4640         /* insert into mr hash table */
4641         retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
4642             &mr_hkey, (void *)mr_rp);
4643         if (retval != 0) {
4644                 DERR("mr_register: cannot insert mr resource into mr_htbl\n");
4645                 goto cleanup;
4646         }
4647         inserted = B_TRUE;
4648 
4649         args.mr_lkey = mr_rp->mr_desc.md_lkey;
4650         args.mr_rkey = mr_rp->mr_desc.md_rkey;
4651         args.mr_hkey = mr_hkey;
4652 
4653         retval = ddi_copyout((void *)&args, (void *)arg,
4654             sizeof (dapl_mr_register_t), mode);
4655         if (retval != 0) {
4656                 DERR("mr_register: copyout error %d\n", retval);
4657                 retval = EFAULT;
4658                 goto cleanup;
4659         }
4660         return (0);
4661 
4662 cleanup:;
4663         if (inserted) {
4664                 daplka_mr_resource_t *free_rp = NULL;
4665 
4666                 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
4667                     (void **)&free_rp);
4668                 if (free_rp != mr_rp) {
4669                         DERR("mr_register: cannot remove mr from hash table\n");
4670                         /*
4671                          * we can only get here if another thread
4672                          * has completed the cleanup in mr_deregister
4673                          */
4674                         return (retval);
4675                 }
4676         }
4677         DAPLKA_RS_UNREF(mr_rp);
4678         return (retval);
4679 }
4680 
4681 /*
4682  * registers a shared memory region.
4683  * the client calls this function with the intention to share the memory
4684  * region with other clients. it is assumed that, prior to calling this
4685  * function, the client(s) are already sharing parts of their address
4686  * space using a mechanism such as SYSV shared memory. the first client
4687  * that calls this function will create and insert a daplka_shared_mr_t
4688  * object into the global daplka_shared_mr_tree. this shared mr object
4689  * will be identified by a unique 40-byte key and will maintain a list
4690  * of mr resources. every time this function gets called with the same
4691  * 40-byte key, a new mr resource (containing a new mr handle generated
4692  * by ibt_register_mr or ibt_register_shared_mr) is created and inserted
4693  * into this list. similarly, every time a shared mr gets deregistered
4694  * or invalidated by a callback, the mr resource gets removed from this
4695  * list. the shared mr object has a reference count. when it drops to
4696  * zero, the shared mr object will be removed from the global avl tree
4697  * and be freed.
4698  */
4699 /* ARGSUSED */
4700 static int
4701 daplka_mr_register_shared(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4702         cred_t *cred, int *rvalp)
4703 {
4704         dapl_mr_register_shared_t       args;
4705         daplka_shared_mr_t              *smrp = NULL;
4706         daplka_shared_mr_t              tmp_smr;
4707         ibt_mr_data_in_t                mr_cb_data_in;
4708         avl_index_t                     where;
4709         boolean_t                       inserted = B_FALSE;
4710         daplka_mr_resource_t            *mr_rp = NULL;
4711         daplka_pd_resource_t            *pd_rp;
4712         uint64_t                        mr_hkey = 0;
4713         ibt_status_t                    status;
4714         int                             retval;
4715 
4716         retval = ddi_copyin((void *)arg, &args,
4717             sizeof (dapl_mr_register_shared_t), mode);
4718         if (retval != 0) {
4719                 DERR("mr_register_shared: copyin error %d\n", retval);
4720                 return (EINVAL);
4721         }
4722 
4723         mutex_enter(&daplka_shared_mr_lock);
4724         /*
4725          * find smrp from the global avl tree.
4726          * the 40-byte key is used as the lookup key.
4727          */
4728         tmp_smr.smr_cookie = args.mrs_shm_cookie;
4729         smrp = (daplka_shared_mr_t *)
4730             avl_find(&daplka_shared_mr_tree, &tmp_smr, &where);
4731         if (smrp != NULL) {
4732                 D2("mr_register_shared: smrp 0x%p, found cookie:\n"
4733                     "0x%016llx%016llx%016llx%016llx%016llx\n", smrp,
4734                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
4735                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
4736                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
4737                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
4738                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);
4739 
4740                 /*
4741                  * if the smrp exists, other threads could still be
4742                  * accessing it. we wait until they are done before
4743                  * we continue.
4744                  */
4745                 smrp->smr_refcnt++;
4746                 while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
4747                         D2("mr_register_shared: smrp 0x%p, "
4748                             "waiting in transitioning state, refcnt %d\n",
4749                             smrp, smrp->smr_refcnt);
4750                         cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
4751                 }
4752                 ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
4753                 D2("mr_register_shared: smrp 0x%p, refcnt %d, ready\n",
4754                     smrp, smrp->smr_refcnt);
4755 
4756                 /*
4757                  * we set smr_state to TRANSITIONING to temporarily
4758                  * prevent other threads from trying to access smrp.
4759                  */
4760                 smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
4761         } else {
4762                 D2("mr_register_shared: cannot find cookie:\n"
4763                     "0x%016llx%016llx%016llx%016llx%016llx\n",
4764                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
4765                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
4766                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
4767                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
4768                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);
4769 
4770                 /*
4771                  * if we cannot find smrp, we need to create and
4772                  * insert one into daplka_shared_mr_tree
4773                  */
4774                 smrp = kmem_zalloc(sizeof (daplka_shared_mr_t),
4775                     daplka_km_flags);
4776                 if (smrp == NULL) {
4777                         retval = ENOMEM;
4778                         mutex_exit(&daplka_shared_mr_lock);
4779                         goto cleanup;
4780                 }
4781                 smrp->smr_refcnt = 1;
4782                 smrp->smr_cookie = args.mrs_shm_cookie;
4783                 smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
4784                 smrp->smr_mr_list = NULL;
4785                 cv_init(&smrp->smr_cv, NULL, CV_DRIVER, NULL);
4786                 avl_insert(&daplka_shared_mr_tree, smrp, where);
4787         }
4788         mutex_exit(&daplka_shared_mr_lock);
4789 
4790         mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
4791         if (mr_rp == NULL) {
4792                 DERR("mr_register_shared: cannot allocate mr resource\n");
4793                 goto cleanup;
4794         }
4795         DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
4796             DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
4797 
4798         mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
4799         mr_rp->mr_hca = ia_rp->ia_hca;
4800         mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
4801         mr_rp->mr_next = NULL;
4802         mr_rp->mr_shared_mr = NULL;
4803 
4804         /* get pd handle */
4805         pd_rp = (daplka_pd_resource_t *)
4806             daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mrs_pd_hkey);
4807         if (pd_rp == NULL) {
4808                 DERR("mr_register_shared: cannot find pd resource\n");
4809                 retval = EINVAL;
4810                 goto cleanup;
4811         }
4812         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
4813         mr_rp->mr_pd_res = pd_rp;
4814 
4815         mr_rp->mr_attr.mr_vaddr = args.mrs_vaddr;
4816         mr_rp->mr_attr.mr_len = args.mrs_len;
4817         mr_rp->mr_attr.mr_flags = args.mrs_flags | IBT_MR_NOSLEEP;
4818         mr_rp->mr_attr.mr_as = curproc->p_as;
4819 
4820         D2("mr_register_shared: mr_vaddr 0x%p, mr_len %llu, "
4821             "mr_flags 0x%x, mr_as 0x%p, mr_exists %d, smrp 0x%p\n",
4822             (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
4823             (longlong_t)mr_rp->mr_attr.mr_len,
4824             mr_rp->mr_attr.mr_flags, mr_rp->mr_attr.mr_as,
4825             (int)(smrp->smr_mr_list != NULL), smrp);
4826 
4827         /*
4828          * since we are in TRANSITIONING state, we are guaranteed
4829          * that we have exclusive access to smr_mr_list.
4830          */
4831         if (smrp->smr_mr_list != NULL) {
4832                 ibt_smr_attr_t  mem_sattr;
4833 
4834                 /*
4835                  * a non-null smr_mr_list indicates that someone
4836                  * else has already inserted an mr_resource into
4837                  * smr_mr_list. we use the mr_handle from the first
4838                  * element as an arg to ibt_register_shared_mr.
4839                  */
4840                 mem_sattr.mr_vaddr = smrp->smr_mr_list->mr_desc.md_vaddr;
4841                 mem_sattr.mr_flags = mr_rp->mr_attr.mr_flags;
4842 
4843                 D2("mr_register_shared: mem_sattr vaddr 0x%p flags 0x%x\n",
4844                     (void *)(uintptr_t)mem_sattr.mr_vaddr, mem_sattr.mr_flags);
4845                 status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
4846                     smrp->smr_mr_list->mr_hdl, mr_rp->mr_pd_res->pd_hdl,
4847                     &mem_sattr, &mr_rp->mr_hdl, &mr_rp->mr_desc);
4848 
4849                 if (status != IBT_SUCCESS) {
4850                         DERR("mr_register_shared: "
4851                             "ibt_register_shared_mr error %d\n", status);
4852                         *rvalp = (int)status;
4853                         retval = 0;
4854                         goto cleanup;
4855                 }
4856         } else {
4857                 /*
4858                  * an mr does not exist yet. we need to create one
4859                  * using ibt_register_mr.
4860                  */
4861                 status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
4862                     mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr,
4863                     &mr_rp->mr_hdl, &mr_rp->mr_desc);
4864 
4865                 if (status != IBT_SUCCESS) {
4866                         DERR("mr_register_shared: "
4867                             "ibt_register_mr error %d\n", status);
4868                         *rvalp = (int)status;
4869                         retval = 0;
4870                         goto cleanup;
4871                 }
4872         }
4873 
4874         mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
4875         mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
4876         mr_cb_data_in.mr_arg1 = (void *)mr_rp;
4877         mr_cb_data_in.mr_arg2 = NULL;
4878 
4879         /* Pass the service driver mr cleanup handler to the hca driver */
4880         status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
4881             IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
4882             &mr_cb_data_in, sizeof (mr_cb_data_in));
4883 
4884         if (status != IBT_SUCCESS) {
4885                 DERR("mr_register_shared: ibt_ci_data_in error(%d) ver(%d)",
4886                     status, mr_cb_data_in.mr_rev);
4887                 *rvalp = (int)status;
4888                 retval = 0;
4889                 goto cleanup;
4890         }
4891 
4892         /*
4893          * we bump reference of mr_rp and enqueue it onto smrp.
4894          */
4895         DAPLKA_RS_REF(mr_rp);
4896         mr_rp->mr_next = smrp->smr_mr_list;
4897         smrp->smr_mr_list = mr_rp;
4898         mr_rp->mr_shared_mr = smrp;
4899 
4900         /* insert into mr hash table */
4901         retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
4902             &mr_hkey, (void *)mr_rp);
4903         if (retval != 0) {
4904                 DERR("mr_register_shared: cannot insert mr resource\n");
4905                 goto cleanup;
4906         }
4907         inserted = B_TRUE;
4908 
4909         /*
4910          * at this point, there are two references to our mr resource.
4911          * one is kept in ia_mr_htbl. the other is kept in the list
4912          * within this shared mr object (smrp). when we deregister this
4913          * mr or when a callback invalidates this mr, the reference kept
4914          * by this shared mr object will be removed.
4915          */
4916 
4917         args.mrs_lkey = mr_rp->mr_desc.md_lkey;
4918         args.mrs_rkey = mr_rp->mr_desc.md_rkey;
4919         args.mrs_hkey = mr_hkey;
4920 
4921         retval = ddi_copyout((void *)&args, (void *)arg,
4922             sizeof (dapl_mr_register_shared_t), mode);
4923         if (retval != 0) {
4924                 DERR("mr_register_shared: copyout error %d\n", retval);
4925                 retval = EFAULT;
4926                 goto cleanup;
4927         }
4928 
4929         /*
4930          * set the state to READY to allow others to continue
4931          */
4932         mutex_enter(&daplka_shared_mr_lock);
4933         smrp->smr_state = DAPLKA_SMR_READY;
4934         cv_broadcast(&smrp->smr_cv);
4935         mutex_exit(&daplka_shared_mr_lock);
4936         return (0);
4937 
4938 cleanup:;
4939         if (inserted) {
4940                 daplka_mr_resource_t *free_rp = NULL;
4941 
4942                 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
4943                     (void **)&free_rp);
4944                 if (free_rp != mr_rp) {
4945                         DERR("mr_register_shared: "
4946                             "cannot remove mr from hash table\n");
4947                         /*
4948                          * we can only get here if another thread
4949                          * has completed the cleanup in mr_deregister
4950                          */
4951                         return (retval);
4952                 }
4953         }
4954         if (smrp != NULL) {
4955                 mutex_enter(&daplka_shared_mr_lock);
4956                 ASSERT(smrp->smr_refcnt > 0);
4957                 smrp->smr_refcnt--;
4958 
4959                 if (smrp->smr_refcnt == 0) {
4960                         DERR("mr_register_shared: freeing smrp 0x%p\n", smrp);
4961                         avl_remove(&daplka_shared_mr_tree, smrp);
4962                         if (smrp->smr_mr_list != NULL) {
4963                                 /*
4964                                  * the refcnt is 0. if there is anything
4965                                  * left on the list, it must be ours.
4966                                  */
4967                                 ASSERT(smrp->smr_mr_list == mr_rp);
4968                                 DAPLKA_RS_UNREF(mr_rp);
4969                                 smrp->smr_mr_list = NULL;
4970                                 ASSERT(mr_rp->mr_shared_mr == smrp);
4971                                 mr_rp->mr_shared_mr = NULL;
4972                                 ASSERT(mr_rp->mr_next == NULL);
4973                         }
4974                         smrp->smr_state = DAPLKA_SMR_FREED;
4975                         cv_destroy(&smrp->smr_cv);
4976                         kmem_free(smrp, sizeof (daplka_shared_mr_t));
4977                 } else {
4978                         DERR("mr_register_shared: resetting smr_state "
4979                             "smrp 0x%p, %d waiters remain\n", smrp,
4980                             smrp->smr_refcnt);
4981                         ASSERT(smrp->smr_state == DAPLKA_SMR_TRANSITIONING);
4982                         if (smrp->smr_mr_list != NULL && mr_rp != NULL) {
4983                                 daplka_mr_resource_t    **mpp;
4984 
4985                                 /*
4986                                  * search and remove mr_rp from smr_mr_list
4987                                  */
4988                                 mpp = &smrp->smr_mr_list;
4989                                 while (*mpp != NULL) {
4990                                         if (*mpp == mr_rp) {
4991                                                 *mpp = (*mpp)->mr_next;
4992                                                 DAPLKA_RS_UNREF(mr_rp);
4993                                                 ASSERT(mr_rp->mr_shared_mr ==
4994                                                     smrp);
4995                                                 mr_rp->mr_shared_mr = NULL;
4996                                                 mr_rp->mr_next = NULL;
4997                                                 break;
4998                                         }
4999                                         mpp = &(*mpp)->mr_next;
5000                                 }
5001                         }
5002                         /*
5003                          * note that smr_state == READY does not necessarily
5004                          * mean that smr_mr_list is non empty. for this case,
5005                          * we are doing cleanup because of a failure. we set
5006                          * the state to READY to allow other threads to
5007                          * continue.
5008                          */
5009                         smrp->smr_state = DAPLKA_SMR_READY;
5010                         cv_broadcast(&smrp->smr_cv);
5011                 }
5012                 mutex_exit(&daplka_shared_mr_lock);
5013         }
5014         if (mr_rp != NULL) {
5015                 DAPLKA_RS_UNREF(mr_rp);
5016         }
5017         return (retval);
5018 }
5019 
5020 /*
5021  * registers a memory region using the attributes of an
5022  * existing region.
5023  */
5024 /* ARGSUSED */
5025 static int
5026 daplka_mr_register_lmr(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5027         cred_t *cred, int *rvalp)
5028 {
5029         boolean_t                       inserted = B_FALSE;
5030         dapl_mr_register_lmr_t          args;
5031         ibt_mr_data_in_t                mr_cb_data_in;
5032         daplka_mr_resource_t            *orig_mr_rp = NULL;
5033         daplka_mr_resource_t            *mr_rp;
5034         ibt_smr_attr_t                  mem_sattr;
5035         uint64_t                        mr_hkey = 0;
5036         ibt_status_t                    status;
5037         int                             retval;
5038 
5039         retval = ddi_copyin((void *)arg, &args,
5040             sizeof (dapl_mr_register_lmr_t), mode);
5041         if (retval != 0) {
5042                 DERR("mr_register_lmr: copyin error %d\n", retval);
5043                 return (EINVAL);
5044         }
5045         orig_mr_rp = (daplka_mr_resource_t *)
5046             daplka_hash_lookup(&ia_rp->ia_mr_htbl, args.mrl_orig_hkey);
5047         if (orig_mr_rp == NULL) {
5048                 DERR("mr_register_lmr: cannot find mr resource\n");
5049                 return (EINVAL);
5050         }
5051         ASSERT(DAPLKA_RS_TYPE(orig_mr_rp) == DAPL_TYPE_MR);
5052 
5053         mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
5054         if (mr_rp == NULL) {
5055                 DERR("mr_register_lmr: cannot allocate mr resource\n");
5056                 retval = ENOMEM;
5057                 goto cleanup;
5058         }
5059         DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
5060             DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
5061 
5062         mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
5063         mr_rp->mr_hca = ia_rp->ia_hca;
5064         mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
5065         mr_rp->mr_next = NULL;
5066         mr_rp->mr_shared_mr = NULL;
5067 
5068         DAPLKA_RS_REF(orig_mr_rp->mr_pd_res);
5069         mr_rp->mr_pd_res = orig_mr_rp->mr_pd_res;
5070         mr_rp->mr_attr = orig_mr_rp->mr_attr;
5071 
5072         /* Pass the IO addr that was returned while allocating the orig MR */
5073         mem_sattr.mr_vaddr = orig_mr_rp->mr_desc.md_vaddr;
5074         mem_sattr.mr_flags = args.mrl_flags | IBT_MR_NOSLEEP;
5075 
5076         status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
5077             orig_mr_rp->mr_hdl, mr_rp->mr_pd_res->pd_hdl, &mem_sattr,
5078             &mr_rp->mr_hdl, &mr_rp->mr_desc);
5079 
5080         if (status != IBT_SUCCESS) {
5081                 DERR("mr_register_lmr: ibt_register_shared_mr error %d\n",
5082                     status);
5083                 *rvalp = (int)status;
5084                 retval = 0;
5085                 goto cleanup;
5086         }
5087 
5088         mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
5089         mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
5090         mr_cb_data_in.mr_arg1 = (void *)mr_rp;
5091         mr_cb_data_in.mr_arg2 = NULL;
5092 
5093         /* Pass the service driver mr cleanup handler to the hca driver */
5094         status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
5095             IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
5096             &mr_cb_data_in, sizeof (mr_cb_data_in));
5097 
5098         if (status != IBT_SUCCESS) {
5099                 DERR("mr_register_lmr: ibt_ci_data_in error(%d) ver(%d)",
5100                     status, mr_cb_data_in.mr_rev);
5101                 *rvalp = (int)status;
5102                 retval = 0;
5103                 goto cleanup;
5104         }
5105         mr_rp->mr_attr.mr_len = orig_mr_rp->mr_attr.mr_len;
5106         mr_rp->mr_attr.mr_flags = mem_sattr.mr_flags;
5107 
5108         /* insert into mr hash table */
5109         retval = daplka_hash_insert(&ia_rp->ia_mr_htbl, &mr_hkey,
5110             (void *)mr_rp);
5111         if (retval != 0) {
5112                 DERR("mr_register: cannot insert mr resource into mr_htbl\n");
5113                 goto cleanup;
5114         }
5115         inserted = B_TRUE;
5116 
5117         args.mrl_lkey = mr_rp->mr_desc.md_lkey;
5118         args.mrl_rkey = mr_rp->mr_desc.md_rkey;
5119         args.mrl_hkey = mr_hkey;
5120 
5121         retval = ddi_copyout((void *)&args, (void *)arg,
5122             sizeof (dapl_mr_register_lmr_t), mode);
5123         if (retval != 0) {
5124                 DERR("mr_register_lmr: copyout error %d\n", retval);
5125                 retval = EFAULT;
5126                 goto cleanup;
5127         }
5128         if (orig_mr_rp != NULL) {
5129                 DAPLKA_RS_UNREF(orig_mr_rp);
5130         }
5131         return (0);
5132 
5133 cleanup:;
5134         if (inserted) {
5135                 daplka_mr_resource_t *free_rp = NULL;
5136 
5137                 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
5138                     (void **)&free_rp);
5139                 if (free_rp != mr_rp) {
5140                         DERR("mr_register: cannot remove mr from hash table\n");
5141                         /*
5142                          * we can only get here if another thread
5143                          * has completed the cleanup in mr_deregister
5144                          */
5145                         return (retval);
5146                 }
5147         }
5148         if (orig_mr_rp != NULL) {
5149                 DAPLKA_RS_UNREF(orig_mr_rp);
5150         }
5151         if (mr_rp != NULL) {
5152                 DAPLKA_RS_UNREF(mr_rp);
5153         }
5154         return (retval);
5155 }
5156 
5157 /*
5158  * this function is called by mr_deregister and mr_cleanup_callback to
5159  * remove a mr resource from the shared mr object mr_rp->mr_shared_mr.
5160  * if mr_shared_mr is already NULL, that means the region being
5161  * deregistered or invalidated is not a shared mr region and we can
5162  * return immediately.
5163  */
5164 static void
5165 daplka_shared_mr_free(daplka_mr_resource_t *mr_rp)
5166 {
5167         daplka_shared_mr_t      *smrp;
5168 
5169         /*
5170          * we need a lock because mr_callback also checks this field.
5171          * for the rare case that mr_deregister and mr_cleanup_callback
5172          * gets called simultaneously, we are guaranteed that smrp won't
5173          * be dereferenced twice because either function will find
5174          * mr_shared_mr to be NULL.
5175          */
5176         mutex_enter(&mr_rp->mr_lock);
5177         smrp = mr_rp->mr_shared_mr;
5178         mr_rp->mr_shared_mr = NULL;
5179         mutex_exit(&mr_rp->mr_lock);
5180 
5181         if (smrp != NULL) {
5182                 daplka_mr_resource_t    **mpp;
5183                 boolean_t               mr_found = B_FALSE;
5184 
5185                 mutex_enter(&daplka_shared_mr_lock);
5186                 ASSERT(smrp->smr_refcnt > 0);
5187                 while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
5188                         cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
5189                 }
5190                 ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
5191                 smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
5192                 smrp->smr_refcnt--;
5193 
5194                 /*
5195                  * search and remove mr_rp from smr_mr_list.
5196                  * also UNREF mr_rp because it is no longer
5197                  * on the list.
5198                  */
5199                 mpp = &smrp->smr_mr_list;
5200                 while (*mpp != NULL) {
5201                         if (*mpp == mr_rp) {
5202                                 *mpp = (*mpp)->mr_next;
5203                                 DAPLKA_RS_UNREF(mr_rp);
5204                                 mr_rp->mr_next = NULL;
5205                                 mr_found = B_TRUE;
5206                                 break;
5207                         }
5208                         mpp = &(*mpp)->mr_next;
5209                 }
5210                 /*
5211                  * since mr_clean_callback may not touch smr_mr_list
5212                  * at this time (due to smr_state), we can be sure
5213                  * that we can find and remove mr_rp from smr_mr_list
5214                  */
5215                 ASSERT(mr_found);
5216                 if (smrp->smr_refcnt == 0) {
5217                         D3("shared_mr_free: freeing smrp 0x%p\n", smrp);
5218                         avl_remove(&daplka_shared_mr_tree, smrp);
5219                         ASSERT(smrp->smr_mr_list == NULL);
5220                         smrp->smr_state = DAPLKA_SMR_FREED;
5221                         cv_destroy(&smrp->smr_cv);
5222                         kmem_free(smrp, sizeof (daplka_shared_mr_t));
5223                 } else {
5224                         D3("shared_mr_free: smrp 0x%p, refcnt %d\n",
5225                             smrp, smrp->smr_refcnt);
5226                         smrp->smr_state = DAPLKA_SMR_READY;
5227                         cv_broadcast(&smrp->smr_cv);
5228                 }
5229                 mutex_exit(&daplka_shared_mr_lock);
5230         }
5231 }
5232 
5233 /*
5234  * deregisters a memory region.
5235  * if mr is shared, remove reference from global shared mr object.
5236  * release the initial reference to the mr. if the mr's refcnt is
5237  * zero, call mr_destroy to free mr.
5238  */
5239 /* ARGSUSED */
5240 static int
5241 daplka_mr_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5242         cred_t *cred, int *rvalp)
5243 {
5244         daplka_mr_resource_t    *mr_rp;
5245         dapl_mr_deregister_t    args;
5246         int                     retval;
5247 
5248         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_deregister_t),
5249             mode);
5250         if (retval != 0) {
5251                 DERR("mr_deregister: copyin error %d\n", retval);
5252                 return (EINVAL);
5253         }
5254         retval = daplka_hash_remove(&ia_rp->ia_mr_htbl,
5255             args.mrd_hkey, (void **)&mr_rp);
5256         if (retval != 0 || mr_rp == NULL) {
5257                 DERR("mr_deregister: cannot find mr resource\n");
5258                 return (EINVAL);
5259         }
5260         ASSERT(DAPLKA_RS_TYPE(mr_rp) == DAPL_TYPE_MR);
5261 
5262         daplka_shared_mr_free(mr_rp);
5263         DAPLKA_RS_UNREF(mr_rp);
5264         return (0);
5265 }
5266 
5267 /*
5268  * sync local memory regions on RDMA read or write.
5269  */
5270 /* ARGSUSED */
5271 static int
5272 daplka_mr_sync(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5273         cred_t *cred, int *rvalp)
5274 {
5275         dapl_mr_sync_t  args;
5276         daplka_mr_resource_t *mr_rp[DAPL_MR_PER_SYNC];
5277         ibt_mr_sync_t   mrs[DAPL_MR_PER_SYNC];
5278         uint32_t        sync_direction_flags;
5279         ibt_status_t    status;
5280         int             i, j;
5281         int             retval;
5282 
5283         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_sync_t), mode);
5284         if (retval != 0) {
5285                 DERR("mr_sync: copyin error %d\n", retval);
5286                 return (EFAULT);
5287         }
5288 
5289         /* number of segments bound check */
5290         if (args.mrs_numseg > DAPL_MR_PER_SYNC) {
5291                 DERR("mr_sync: number of segments too large\n");
5292                 return (EINVAL);
5293         }
5294 
5295         /* translate MR sync direction flag */
5296         if (args.mrs_flags == DAPL_MR_SYNC_RDMA_RD) {
5297                 sync_direction_flags = IBT_SYNC_READ;
5298         } else if (args.mrs_flags == DAPL_MR_SYNC_RDMA_WR) {
5299                 sync_direction_flags = IBT_SYNC_WRITE;
5300         } else {
5301                 DERR("mr_sync: unknown flags\n");
5302                 return (EINVAL);
5303         }
5304 
5305         /*
5306          * all the segments are going to be sync'd by ibtl together
5307          */
5308         for (i = 0; i < args.mrs_numseg; i++) {
5309                 mr_rp[i] = (daplka_mr_resource_t *)daplka_hash_lookup(
5310                     &ia_rp->ia_mr_htbl, args.mrs_vec[i].mrsv_hkey);
5311                 if (mr_rp[i] == NULL) {
5312                         for (j = 0; j < i; j++) {
5313                                 DAPLKA_RS_UNREF(mr_rp[j]);
5314                         }
5315                         DERR("mr_sync: lookup error\n");
5316                         return (EINVAL);
5317                 }
5318                 ASSERT(DAPLKA_RS_TYPE(mr_rp[i]) == DAPL_TYPE_MR);
5319                 mrs[i].ms_handle = mr_rp[i]->mr_hdl;
5320                 mrs[i].ms_vaddr = args.mrs_vec[i].mrsv_va;
5321                 mrs[i].ms_len = args.mrs_vec[i].mrsv_len;
5322                 mrs[i].ms_flags = sync_direction_flags;
5323         }
5324 
5325         status = ibt_sync_mr(ia_rp->ia_hca_hdl, mrs, args.mrs_numseg);
5326         if (status != IBT_SUCCESS) {
5327                 DERR("mr_sync: ibt_sync_mr error %d\n", status);
5328                 *rvalp = (int)status;
5329         }
5330         for (i = 0; i < args.mrs_numseg; i++) {
5331                 DAPLKA_RS_UNREF(mr_rp[i]);
5332         }
5333         return (0);
5334 }
5335 
5336 /*
5337  * destroys a memory region.
5338  * called when refcnt drops to zero.
5339  */
5340 static int
5341 daplka_mr_destroy(daplka_resource_t *gen_rp)
5342 {
5343         daplka_mr_resource_t    *mr_rp = (daplka_mr_resource_t *)gen_rp;
5344         ibt_status_t            status;
5345 
5346         ASSERT(DAPLKA_RS_REFCNT(mr_rp) == 0);
5347         ASSERT(mr_rp->mr_shared_mr == NULL);
5348         D3("mr_destroy: entering, mr_rp 0x%p, rnum %d\n",
5349             mr_rp, DAPLKA_RS_RNUM(mr_rp));
5350 
5351         /*
5352          * deregister mr
5353          */
5354         if (mr_rp->mr_hdl) {
5355                 status = daplka_ibt_deregister_mr(mr_rp, mr_rp->mr_hca_hdl,
5356                     mr_rp->mr_hdl);
5357                 if (status != IBT_SUCCESS) {
5358                         DERR("mr_destroy: ibt_deregister_mr returned %d\n",
5359                             status);
5360                 }
5361                 mr_rp->mr_hdl = NULL;
5362                 D3("mr_destroy: mr deregistered\n");
5363         }
5364         mr_rp->mr_attr.mr_vaddr = NULL;
5365 
5366         /*
5367          * release reference on PD
5368          */
5369         if (mr_rp->mr_pd_res != NULL) {
5370                 DAPLKA_RS_UNREF(mr_rp->mr_pd_res);
5371                 mr_rp->mr_pd_res = NULL;
5372         }
5373         mutex_destroy(&mr_rp->mr_lock);
5374         DAPLKA_RS_FINI(mr_rp);
5375         kmem_free(mr_rp, sizeof (daplka_mr_resource_t));
5376         D3("mr_destroy: exiting, mr_rp 0x%p\n", mr_rp);
5377         return (0);
5378 }
5379 
5380 /*
5381  * this function is called by daplka_hash_destroy for
5382  * freeing MR resource objects
5383  */
5384 static void
5385 daplka_hash_mr_free(void *obj)
5386 {
5387         daplka_mr_resource_t    *mr_rp = (daplka_mr_resource_t *)obj;
5388 
5389         daplka_shared_mr_free(mr_rp);
5390         DAPLKA_RS_UNREF(mr_rp);
5391 }
5392 
5393 /*
5394  * comparison function used for finding a shared mr object
5395  * from the global shared mr avl tree.
5396  */
5397 static int
5398 daplka_shared_mr_cmp(const void *smr1, const void *smr2)
5399 {
5400         daplka_shared_mr_t      *s1 = (daplka_shared_mr_t *)smr1;
5401         daplka_shared_mr_t      *s2 = (daplka_shared_mr_t *)smr2;
5402         int i;
5403 
5404         for (i = 4; i >= 0; i--) {
5405                 if (s1->smr_cookie.mc_uint_arr[i] <
5406                     s2->smr_cookie.mc_uint_arr[i]) {
5407                         return (-1);
5408                 }
5409                 if (s1->smr_cookie.mc_uint_arr[i] >
5410                     s2->smr_cookie.mc_uint_arr[i]) {
5411                         return (1);
5412                 }
5413         }
5414         return (0);
5415 }
5416 
5417 /*
5418  * allocates a protection domain.
5419  */
5420 /* ARGSUSED */
5421 static int
5422 daplka_pd_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5423         cred_t *cred, int *rvalp)
5424 {
5425         dapl_pd_alloc_t         args;
5426         daplka_pd_resource_t    *pd_rp;
5427         ibt_status_t            status;
5428         uint64_t                pd_hkey = 0;
5429         boolean_t               inserted = B_FALSE;
5430         int                     retval;
5431 
5432         pd_rp = kmem_zalloc(sizeof (*pd_rp), daplka_km_flags);
5433         if (pd_rp == NULL) {
5434                 DERR("pd_alloc: cannot allocate pd resource\n");
5435                 return (ENOMEM);
5436         }
5437         DAPLKA_RS_INIT(pd_rp, DAPL_TYPE_PD,
5438             DAPLKA_RS_RNUM(ia_rp), daplka_pd_destroy);
5439 
5440         pd_rp->pd_hca = ia_rp->ia_hca;
5441         pd_rp->pd_hca_hdl = ia_rp->ia_hca_hdl;
5442         status = daplka_ibt_alloc_pd(pd_rp, pd_rp->pd_hca_hdl,
5443             IBT_PD_NO_FLAGS, &pd_rp->pd_hdl);
5444         if (status != IBT_SUCCESS) {
5445                 DERR("pd_alloc: ibt_alloc_pd returned %d\n", status);
5446                 *rvalp = (int)status;
5447                 retval = 0;
5448                 goto cleanup;
5449         }
5450 
5451         /* insert into pd hash table */
5452         retval = daplka_hash_insert(&ia_rp->ia_pd_htbl,
5453             &pd_hkey, (void *)pd_rp);
5454         if (retval != 0) {
5455                 DERR("pd_alloc: cannot insert pd resource into pd_htbl\n");
5456                 goto cleanup;
5457         }
5458         inserted = B_TRUE;
5459 
5460         /* return hkey to library */
5461         args.pda_hkey = pd_hkey;
5462 
5463         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_pd_alloc_t),
5464             mode);
5465         if (retval != 0) {
5466                 DERR("pd_alloc: copyout error %d\n", retval);
5467                 retval = EFAULT;
5468                 goto cleanup;
5469         }
5470         return (0);
5471 
5472 cleanup:;
5473         if (inserted) {
5474                 daplka_pd_resource_t *free_rp = NULL;
5475 
5476                 (void) daplka_hash_remove(&ia_rp->ia_pd_htbl, pd_hkey,
5477                     (void **)&free_rp);
5478                 if (free_rp != pd_rp) {
5479                         DERR("pd_alloc: cannot remove pd from hash table\n");
5480                         /*
5481                          * we can only get here if another thread
5482                          * has completed the cleanup in pd_free
5483                          */
5484                         return (retval);
5485                 }
5486         }
5487         DAPLKA_RS_UNREF(pd_rp);
5488         return (retval);
5489 }
5490 
5491 /*
5492  * destroys a protection domain.
5493  * called when refcnt drops to zero.
5494  */
5495 static int
5496 daplka_pd_destroy(daplka_resource_t *gen_rp)
5497 {
5498         daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)gen_rp;
5499         ibt_status_t status;
5500 
5501         ASSERT(DAPLKA_RS_REFCNT(pd_rp) == 0);
5502         D3("pd_destroy: entering, pd_rp %p, rnum %d\n",
5503             pd_rp, DAPLKA_RS_RNUM(pd_rp));
5504 
5505         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5506         if (pd_rp->pd_hdl != NULL) {
5507                 status = daplka_ibt_free_pd(pd_rp, pd_rp->pd_hca_hdl,
5508                     pd_rp->pd_hdl);
5509                 if (status != IBT_SUCCESS) {
5510                         DERR("pd_destroy: ibt_free_pd returned %d\n", status);
5511                 }
5512         }
5513         DAPLKA_RS_FINI(pd_rp);
5514         kmem_free(pd_rp, sizeof (daplka_pd_resource_t));
5515         D3("pd_destroy: exiting, pd_rp %p\n", pd_rp);
5516         return (0);
5517 }
5518 
5519 static void
5520 daplka_hash_pd_free(void *obj)
5521 {
5522         daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)obj;
5523 
5524         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5525         DAPLKA_RS_UNREF(pd_rp);
5526 }
5527 
5528 /*
5529  * removes the pd reference from ia_pd_htbl and releases the
5530  * initial reference to the pd. also destroys the pd if the refcnt
5531  * is zero.
5532  */
5533 /* ARGSUSED */
5534 static int
5535 daplka_pd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5536         cred_t *cred, int *rvalp)
5537 {
5538         daplka_pd_resource_t *pd_rp;
5539         dapl_pd_free_t args;
5540         int retval;
5541 
5542         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_pd_free_t), mode);
5543         if (retval != 0) {
5544                 DERR("pd_free: copyin error %d\n", retval);
5545                 return (EINVAL);
5546         }
5547 
5548         retval = daplka_hash_remove(&ia_rp->ia_pd_htbl,
5549             args.pdf_hkey, (void **)&pd_rp);
5550         if (retval != 0 || pd_rp == NULL) {
5551                 DERR("pd_free: cannot find pd resource\n");
5552                 return (EINVAL);
5553         }
5554         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5555 
5556         /* UNREF calls the actual free function when refcnt is zero */
5557         DAPLKA_RS_UNREF(pd_rp);
5558         return (0);
5559 }
5560 
5561 /*
5562  * allocates a memory window
5563  */
5564 /* ARGSUSED */
5565 static int
5566 daplka_mw_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5567         cred_t *cred, int *rvalp)
5568 {
5569         daplka_pd_resource_t    *pd_rp;
5570         daplka_mw_resource_t    *mw_rp;
5571         dapl_mw_alloc_t         args;
5572         ibt_status_t            status;
5573         boolean_t               inserted = B_FALSE;
5574         uint64_t                mw_hkey;
5575         ibt_rkey_t              mw_rkey;
5576         int                     retval;
5577 
5578         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_alloc_t), mode);
5579         if (retval != 0) {
5580                 DERR("mw_alloc: copyin error %d\n", retval);
5581                 return (EFAULT);
5582         }
5583 
5584         /*
5585          * Allocate and initialize a MW resource
5586          */
5587         mw_rp = kmem_zalloc(sizeof (daplka_mw_resource_t), daplka_km_flags);
5588         if (mw_rp == NULL) {
5589                 DERR("mw_alloc: cannot allocate mw resource\n");
5590                 return (ENOMEM);
5591         }
5592         DAPLKA_RS_INIT(mw_rp, DAPL_TYPE_MW,
5593             DAPLKA_RS_RNUM(ia_rp), daplka_mw_destroy);
5594 
5595         mutex_init(&mw_rp->mw_lock, NULL, MUTEX_DRIVER, NULL);
5596         mw_rp->mw_hca = ia_rp->ia_hca;
5597         mw_rp->mw_hca_hdl = ia_rp->ia_hca_hdl;
5598 
5599         /* get pd handle */
5600         pd_rp = (daplka_pd_resource_t *)
5601             daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mw_pd_hkey);
5602         if (pd_rp == NULL) {
5603                 DERR("mw_alloc: cannot find pd resource\n");
5604                 goto cleanup;
5605         }
5606         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5607 
5608         mw_rp->mw_pd_res = pd_rp;
5609 
5610         status = daplka_ibt_alloc_mw(mw_rp, mw_rp->mw_hca_hdl,
5611             pd_rp->pd_hdl, IBT_MW_NOSLEEP, &mw_rp->mw_hdl, &mw_rkey);
5612 
5613         if (status != IBT_SUCCESS) {
5614                 DERR("mw_alloc: ibt_alloc_mw returned %d\n", status);
5615                 *rvalp = (int)status;
5616                 retval = 0;
5617                 goto cleanup;
5618         }
5619 
5620         mutex_enter(&ia_rp->ia_lock);
5621         switch (ia_rp->ia_state) {
5622         case DAPLKA_IA_INIT:
5623                 ia_rp->ia_state = DAPLKA_IA_MW_ALLOC_IN_PROGRESS;
5624                 ia_rp->ia_mw_alloccnt++;
5625                 retval = 0;
5626                 break;
5627         case DAPLKA_IA_MW_ALLOC_IN_PROGRESS:
5628                 /* another mw_alloc is already in progress increase cnt */
5629                 ia_rp->ia_mw_alloccnt++;
5630                 retval = 0;
5631                 break;
5632         case DAPLKA_IA_MW_FREEZE_IN_PROGRESS:
5633                 /* FALLTHRU */
5634         case DAPLKA_IA_MW_FROZEN:
5635                 /*
5636                  * IA is being or already frozen don't allow more MWs to be
5637                  * allocated.
5638                  */
5639                 DERR("mw_alloc: IA is freezing MWs (state=%d)\n",
5640                     ia_rp->ia_state);
5641                 retval = EINVAL;
5642                 break;
5643         default:
5644                 ASSERT(!"Invalid IA state in mw_alloc");
5645                 DERR("mw_alloc: IA state=%d invalid\n", ia_rp->ia_state);
5646                 retval = EINVAL;
5647                 break;
5648         }
5649         mutex_exit(&ia_rp->ia_lock);
5650         /* retval is 0 when ia_mw_alloccnt is incremented */
5651         if (retval != 0) {
5652                 goto cleanup;
5653         }
5654 
5655         /* insert into mw hash table */
5656         mw_hkey = 0;
5657         retval = daplka_hash_insert(&ia_rp->ia_mw_htbl, &mw_hkey,
5658             (void *)mw_rp);
5659         if (retval != 0) {
5660                 DERR("mw_alloc: cannot insert mw resource into mw_htbl\n");
5661                 mutex_enter(&ia_rp->ia_lock);
5662                 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
5663                 ia_rp->ia_mw_alloccnt--;
5664                 if (ia_rp->ia_mw_alloccnt == 0) {
5665                         ia_rp->ia_state = DAPLKA_IA_INIT;
5666                         cv_broadcast(&ia_rp->ia_cv);
5667                 }
5668                 mutex_exit(&ia_rp->ia_lock);
5669                 goto cleanup;
5670         }
5671         inserted = B_TRUE;
5672 
5673         D3("mw_alloc: ibt_alloc_mw mw_hdl(%p) mw_rkey(0x%llx)\n",
5674             mw_rp->mw_hdl, (longlong_t)mw_rkey);
5675 
5676         mutex_enter(&ia_rp->ia_lock);
5677         /*
5678          * We are done with mw_alloc if this was the last mw_alloc
5679          * change state back to DAPLKA_IA_INIT and wake up waiters
5680          * specifically the unlock callback.
5681          */
5682         ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
5683         ia_rp->ia_mw_alloccnt--;
5684         if (ia_rp->ia_mw_alloccnt == 0) {
5685                 ia_rp->ia_state = DAPLKA_IA_INIT;
5686                 cv_broadcast(&ia_rp->ia_cv);
5687         }
5688         mutex_exit(&ia_rp->ia_lock);
5689 
5690         args.mw_hkey = mw_hkey;
5691         args.mw_rkey = mw_rkey;
5692 
5693         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_mw_alloc_t),
5694             mode);
5695         if (retval != 0) {
5696                 DERR("mw_alloc: copyout error %d\n", retval);
5697                 retval = EFAULT;
5698                 goto cleanup;
5699         }
5700         return (0);
5701 
5702 cleanup:;
5703         if (inserted) {
5704                 daplka_mw_resource_t *free_rp = NULL;
5705 
5706                 (void) daplka_hash_remove(&ia_rp->ia_mw_htbl, mw_hkey,
5707                     (void **)&free_rp);
5708                 if (free_rp != mw_rp) {
5709                         DERR("mw_alloc: cannot remove mw from hash table\n");
5710                         /*
5711                          * we can only get here if another thread
5712                          * has completed the cleanup in mw_free
5713                          */
5714                         return (retval);
5715                 }
5716         }
5717         DAPLKA_RS_UNREF(mw_rp);
5718         return (retval);
5719 }
5720 
5721 /*
5722  * removes the mw reference from ia_mw_htbl and releases the
5723  * initial reference to the mw. also destroys the mw if the refcnt
5724  * is zero.
5725  */
5726 /* ARGSUSED */
5727 static int
5728 daplka_mw_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5729         cred_t *cred, int *rvalp)
5730 {
5731         daplka_mw_resource_t    *mw_rp = NULL;
5732         dapl_mw_free_t          args;
5733         int                     retval = 0;
5734 
5735         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_free_t), mode);
5736         if (retval != 0) {
5737                 DERR("mw_free: copyin error %d\n", retval);
5738                 return (EFAULT);
5739         }
5740 
5741         retval = daplka_hash_remove(&ia_rp->ia_mw_htbl, args.mw_hkey,
5742             (void **)&mw_rp);
5743         if (retval != 0 || mw_rp == NULL) {
5744                 DERR("mw_free: cannot find mw resrc (0x%llx)\n",
5745                     (longlong_t)args.mw_hkey);
5746                 return (EINVAL);
5747         }
5748 
5749         ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);
5750 
5751         /* UNREF calls the actual free function when refcnt is zero */
5752         DAPLKA_RS_UNREF(mw_rp);
5753         return (retval);
5754 }
5755 
5756 /*
5757  * destroys the memory window.
5758  * called when refcnt drops to zero.
5759  */
5760 static int
5761 daplka_mw_destroy(daplka_resource_t *gen_rp)
5762 {
5763         daplka_mw_resource_t    *mw_rp = (daplka_mw_resource_t *)gen_rp;
5764         ibt_status_t            status;
5765 
5766         ASSERT(DAPLKA_RS_REFCNT(mw_rp) == 0);
5767         D3("mw_destroy: entering, mw_rp 0x%p, rnum %d\n",
5768             mw_rp, DAPLKA_RS_RNUM(mw_rp));
5769 
5770         /*
5771          * free memory window
5772          */
5773         if (mw_rp->mw_hdl) {
5774                 status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl,
5775                     mw_rp->mw_hdl);
5776                 if (status != IBT_SUCCESS) {
5777                         DERR("mw_destroy: ibt_free_mw returned %d\n", status);
5778                 }
5779                 mw_rp->mw_hdl = NULL;
5780                 D3("mw_destroy: mw freed\n");
5781         }
5782 
5783         /*
5784          * release reference on PD
5785          */
5786         if (mw_rp->mw_pd_res != NULL) {
5787                 DAPLKA_RS_UNREF(mw_rp->mw_pd_res);
5788                 mw_rp->mw_pd_res = NULL;
5789         }
5790         mutex_destroy(&mw_rp->mw_lock);
5791         DAPLKA_RS_FINI(mw_rp);
5792         kmem_free(mw_rp, sizeof (daplka_mw_resource_t));
5793         D3("mw_destroy: exiting, mw_rp 0x%p\n", mw_rp);
5794         return (0);
5795 }
5796 
5797 static void
5798 daplka_hash_mw_free(void *obj)
5799 {
5800         daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)obj;
5801 
5802         ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);
5803         DAPLKA_RS_UNREF(mw_rp);
5804 }
5805 
5806 /*
5807  * SRQ ioctls and supporting functions
5808  */
5809 /* ARGSUSED */
5810 static int
5811 daplka_srq_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5812     cred_t *cred, int *rvalp)
5813 {
5814         daplka_srq_resource_t           *srq_rp;
5815         daplka_pd_resource_t            *pd_rp;
5816         dapl_srq_create_t               args;
5817         ibt_srq_sizes_t                 srq_sizes;
5818         ibt_srq_sizes_t                 srq_real_sizes;
5819         ibt_hca_attr_t                  *hca_attrp;
5820         uint64_t                        srq_hkey = 0;
5821         boolean_t                       inserted = B_FALSE;
5822         int                             retval;
5823         ibt_status_t                    status;
5824 
5825         D3("srq_create: enter\n");
5826         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_create_t),
5827             mode);
5828         if (retval != 0) {
5829                 DERR("srq_create: copyin error %d\n", retval);
5830                 return (EFAULT);
5831         }
5832         srq_rp = kmem_zalloc(sizeof (daplka_srq_resource_t), daplka_km_flags);
5833         if (srq_rp == NULL) {
5834                 DERR("srq_create: cannot allocate ep_rp\n");
5835                 return (ENOMEM);
5836         }
5837         DAPLKA_RS_INIT(srq_rp, DAPL_TYPE_SRQ,
5838             DAPLKA_RS_RNUM(ia_rp), daplka_srq_destroy);
5839 
5840         srq_rp->srq_hca = ia_rp->ia_hca;
5841         srq_rp->srq_hca_hdl = ia_rp->ia_hca_hdl;
5842         mutex_init(&srq_rp->srq_lock, NULL, MUTEX_DRIVER, NULL);
5843 
5844         /* get pd handle */
5845         pd_rp = (daplka_pd_resource_t *)
5846             daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.srqc_pd_hkey);
5847         if (pd_rp == NULL) {
5848                 DERR("srq_create: cannot find pd resource\n");
5849                 retval = EINVAL;
5850                 goto cleanup;
5851         }
5852         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5853         srq_rp->srq_pd_res = pd_rp;
5854 
5855         /*
5856          * these checks ensure that the requested SRQ sizes
5857          * are within the limits supported by the chosen HCA.
5858          */
5859         hca_attrp = &ia_rp->ia_hca->hca_attr;
5860         if (args.srqc_sizes.srqs_sz > hca_attrp->hca_max_srqs_sz) {
5861                 DERR("srq_create: invalid srqs_sz %d\n",
5862                     args.srqc_sizes.srqs_sz);
5863                 retval = EINVAL;
5864                 goto cleanup;
5865         }
5866         if (args.srqc_sizes.srqs_sgl > hca_attrp->hca_max_srq_sgl) {
5867                 DERR("srq_create: invalid srqs_sgl %d\n",
5868                     args.srqc_sizes.srqs_sgl);
5869                 retval = EINVAL;
5870                 goto cleanup;
5871         }
5872 
5873         D3("srq_create: srq_sgl %d, srq_sz %d\n",
5874             args.srqc_sizes.srqs_sgl, args.srqc_sizes.srqs_sz);
5875 
5876         srq_sizes.srq_wr_sz = args.srqc_sizes.srqs_sz;
5877         srq_sizes.srq_sgl_sz = args.srqc_sizes.srqs_sgl;
5878 
5879         /* create srq */
5880         status = daplka_ibt_alloc_srq(srq_rp, ia_rp->ia_hca_hdl,
5881             IBT_SRQ_USER_MAP, pd_rp->pd_hdl, &srq_sizes, &srq_rp->srq_hdl,
5882             &srq_real_sizes);
5883         if (status != IBT_SUCCESS) {
5884                 DERR("srq_create: alloc_srq returned %d\n", status);
5885                 *rvalp = (int)status;
5886                 retval = 0;
5887                 goto cleanup;
5888         }
5889 
5890         args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
5891         args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;
5892 
5893         /* Get HCA-specific data_out info */
5894         status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
5895             IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
5896             &args.srqc_data_out, sizeof (args.srqc_data_out));
5897 
5898         if (status != IBT_SUCCESS) {
5899                 DERR("srq_create: ibt_ci_data_out error(%d)\n", status);
5900                 *rvalp = (int)status;
5901                 retval = 0;
5902                 goto cleanup;
5903         }
5904 
5905         srq_rp->srq_real_size = srq_real_sizes.srq_wr_sz;
5906 
5907         /* preparing to copyout map_data back to the library */
5908         args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
5909         args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;
5910 
5911         /* insert into srq hash table */
5912         retval = daplka_hash_insert(&ia_rp->ia_srq_htbl,
5913             &srq_hkey, (void *)srq_rp);
5914         if (retval != 0) {
5915                 DERR("srq_create: cannot insert srq resource into srq_htbl\n");
5916                 goto cleanup;
5917         }
5918         inserted = B_TRUE;
5919 
5920         /* return hkey to library */
5921         args.srqc_hkey = srq_hkey;
5922 
5923         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_create_t),
5924             mode);
5925         if (retval != 0) {
5926                 DERR("srq_create: copyout error %d\n", retval);
5927                 retval = EFAULT;
5928                 goto cleanup;
5929         }
5930 
5931         D3("srq_create: %p, 0x%llx\n", srq_rp->srq_hdl, (longlong_t)srq_hkey);
5932         D3("    sz(%d) sgl(%d)\n",
5933             args.srqc_real_sizes.srqs_sz, args.srqc_real_sizes.srqs_sgl);
5934         D3("srq_create: exit\n");
5935         return (0);
5936 
5937 cleanup:
5938         if (inserted) {
5939                 daplka_srq_resource_t *free_rp = NULL;
5940 
5941                 (void) daplka_hash_remove(&ia_rp->ia_srq_htbl, srq_hkey,
5942                     (void **)&free_rp);
5943                 if (free_rp != srq_rp) {
5944                         /*
5945                          * this case is impossible because ep_free will
5946                          * wait until our state transition is complete.
5947                          */
5948                         DERR("srq_create: cannot remove srq from hash table\n");
5949                         ASSERT(B_FALSE);
5950                         return (retval);
5951                 }
5952         }
5953         DAPLKA_RS_UNREF(srq_rp);
5954         return (retval);
5955 }
5956 
5957 /*
5958  * Resize an existing SRQ
5959  */
5960 /* ARGSUSED */
5961 static int
5962 daplka_srq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5963     cred_t *cred, int *rvalp)
5964 {
5965         daplka_srq_resource_t           *srq_rp = NULL;
5966         ibt_hca_attr_t                  *hca_attrp;
5967         dapl_srq_resize_t               args;
5968         ibt_status_t                    status;
5969         int                             retval = 0;
5970 
5971         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_resize_t),
5972             mode);
5973         if (retval != 0) {
5974                 DERR("srq_resize: copyin error %d\n", retval);
5975                 return (EFAULT);
5976         }
5977 
5978         /* get srq resource */
5979         srq_rp = (daplka_srq_resource_t *)
5980             daplka_hash_lookup(&ia_rp->ia_srq_htbl, args.srqr_hkey);
5981         if (srq_rp == NULL) {
5982                 DERR("srq_resize: cannot find srq resource\n");
5983                 return (EINVAL);
5984         }
5985         ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);
5986 
5987         hca_attrp = &ia_rp->ia_hca->hca_attr;
5988         if (args.srqr_new_size > hca_attrp->hca_max_srqs_sz) {
5989                 DERR("srq_resize: invalid srq size %d", args.srqr_new_size);
5990                 retval = EINVAL;
5991                 goto cleanup;
5992         }
5993 
5994         mutex_enter(&srq_rp->srq_lock);
5995         /*
5996          * If ibt_resize_srq fails that it is primarily due to resource
5997          * shortage. Per IB spec resize will never loose events and
5998          * a resize error leaves the SRQ intact. Therefore even if the
5999          * resize request fails we proceed and get the mapping data
6000          * from the SRQ so that the library can mmap it.
6001          */
6002         status = ibt_modify_srq(srq_rp->srq_hdl, IBT_SRQ_SET_SIZE,
6003             args.srqr_new_size, 0, &args.srqr_real_size);
6004         if (status != IBT_SUCCESS) {
6005                 /* we return the size of the old CQ if resize fails */
6006                 args.srqr_real_size = srq_rp->srq_real_size;
6007                 ASSERT(status != IBT_SRQ_HDL_INVALID);
6008                 DERR("srq_resize: ibt_modify_srq failed:%d\n", status);
6009         } else {
6010                 srq_rp->srq_real_size = args.srqr_real_size;
6011         }
6012         mutex_exit(&srq_rp->srq_lock);
6013 
6014 
6015         D2("srq_resize(%d): done new_sz(%u) real_sz(%u)\n",
6016             DAPLKA_RS_RNUM(srq_rp), args.srqr_new_size, args.srqr_real_size);
6017 
6018         /* Get HCA-specific data_out info */
6019         status = ibt_ci_data_out(srq_rp->srq_hca_hdl,
6020             IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
6021             &args.srqr_data_out, sizeof (args.srqr_data_out));
6022         if (status != IBT_SUCCESS) {
6023                 DERR("srq_resize: ibt_ci_data_out error(%d)\n", status);
6024                 /* return ibt_ci_data_out status */
6025                 *rvalp = (int)status;
6026                 retval = 0;
6027                 goto cleanup;
6028         }
6029 
6030         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_resize_t),
6031             mode);
6032         if (retval != 0) {
6033                 DERR("srq_resize: copyout error %d\n", retval);
6034                 retval = EFAULT;
6035                 goto cleanup;
6036         }
6037 
6038 cleanup:;
6039         if (srq_rp != NULL) {
6040                 DAPLKA_RS_UNREF(srq_rp);
6041         }
6042         return (retval);
6043 }
6044 
6045 /*
6046  * Frees an SRQ resource.
6047  */
6048 /* ARGSUSED */
6049 static int
6050 daplka_srq_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6051     cred_t *cred, int *rvalp)
6052 {
6053         daplka_srq_resource_t   *srq_rp = NULL;
6054         dapl_srq_free_t         args;
6055         int                     retval;
6056 
6057         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_free_t), mode);
6058         if (retval != 0) {
6059                 DERR("srq_free: copyin error %d\n", retval);
6060                 return (EFAULT);
6061         }
6062 
6063         retval = daplka_hash_remove(&ia_rp->ia_srq_htbl,
6064             args.srqf_hkey, (void **)&srq_rp);
6065         if (retval != 0 || srq_rp == NULL) {
6066                 /*
6067                  * this is only possible if we have two threads
6068                  * calling ep_free in parallel.
6069                  */
6070                 DERR("srq_free: cannot find resource retval(%d) 0x%llx\n",
6071                     retval, args.srqf_hkey);
6072                 return (EINVAL);
6073         }
6074 
6075         /* UNREF calls the actual free function when refcnt is zero */
6076         DAPLKA_RS_UNREF(srq_rp);
6077         return (0);
6078 }
6079 
6080 /*
6081  * destroys a SRQ resource.
6082  * called when refcnt drops to zero.
6083  */
6084 static int
6085 daplka_srq_destroy(daplka_resource_t *gen_rp)
6086 {
6087         daplka_srq_resource_t   *srq_rp = (daplka_srq_resource_t *)gen_rp;
6088         ibt_status_t            status;
6089 
6090         ASSERT(DAPLKA_RS_REFCNT(srq_rp) == 0);
6091 
6092         D3("srq_destroy: entering, srq_rp 0x%p, rnum %d\n",
6093             srq_rp, DAPLKA_RS_RNUM(srq_rp));
6094         /*
6095          * destroy the srq
6096          */
6097         if (srq_rp->srq_hdl != NULL) {
6098                 status = daplka_ibt_free_srq(srq_rp, srq_rp->srq_hdl);
6099                 if (status != IBT_SUCCESS) {
6100                         DERR("srq_destroy: ibt_free_srq returned %d\n",
6101                             status);
6102                 }
6103                 srq_rp->srq_hdl = NULL;
6104                 D3("srq_destroy: srq freed, rnum %d\n", DAPLKA_RS_RNUM(srq_rp));
6105         }
6106         /*
6107          * release all references
6108          */
6109         if (srq_rp->srq_pd_res != NULL) {
6110                 DAPLKA_RS_UNREF(srq_rp->srq_pd_res);
6111                 srq_rp->srq_pd_res = NULL;
6112         }
6113 
6114         mutex_destroy(&srq_rp->srq_lock);
6115         DAPLKA_RS_FINI(srq_rp);
6116         kmem_free(srq_rp, sizeof (daplka_srq_resource_t));
6117         D3("srq_destroy: exiting, srq_rp 0x%p\n", srq_rp);
6118         return (0);
6119 }
6120 
6121 static void
6122 daplka_hash_srq_free(void *obj)
6123 {
6124         daplka_srq_resource_t *srq_rp = (daplka_srq_resource_t *)obj;
6125 
6126         ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);
6127         DAPLKA_RS_UNREF(srq_rp);
6128 }
6129 
6130 /*
6131  * This function tells the CM to start listening on a service id.
6132  * It must be called by the passive side client before the client
6133  * can receive connection requests from remote endpoints. If the
6134  * client specifies a non-zero service id (connection qualifier in
6135  * dapl terms), this function will attempt to bind to this service
6136  * id and return an error if the id is already in use. If the client
6137  * specifies zero as the service id, this function will try to find
6138  * the next available service id and return it back to the client.
6139  * To support the cr_handoff function, this function will, in addition
6140  * to creating and inserting an SP resource into the per-IA SP hash
6141  * table, insert the SP resource into a global SP table. This table
6142  * maintains all active service points created by all dapl clients.
6143  * CR handoff locates the target SP by iterating through this global
6144  * table.
6145  */
6146 /* ARGSUSED */
6147 static int
6148 daplka_service_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6149         cred_t *cred, int *rvalp)
6150 {
6151         daplka_evd_resource_t   *evd_rp = NULL;
6152         daplka_sp_resource_t    *sp_rp = NULL;
6153         dapl_service_register_t args;
6154         ibt_srv_desc_t          sd_args;
6155         ibt_srv_bind_t          sb_args;
6156         ibt_status_t            status;
6157         ib_svc_id_t             retsid = 0;
6158         uint64_t                sp_hkey = 0;
6159         boolean_t               bumped = B_FALSE;
6160         int                     backlog_size;
6161         int                     retval = 0;
6162 
6163         retval = ddi_copyin((void *)arg, &args,
6164             sizeof (dapl_service_register_t), mode);
6165         if (retval != 0) {
6166                 DERR("service_register: copyin error %d\n", retval);
6167                 return (EINVAL);
6168         }
6169 
6170         sp_rp = kmem_zalloc(sizeof (*sp_rp), daplka_km_flags);
6171         if (sp_rp == NULL) {
6172                 DERR("service_register: cannot allocate sp resource\n");
6173                 return (ENOMEM);
6174         }
6175         DAPLKA_RS_INIT(sp_rp, DAPL_TYPE_SP,
6176             DAPLKA_RS_RNUM(ia_rp), daplka_sp_destroy);
6177 
6178         /* check if evd exists */
6179         evd_rp = (daplka_evd_resource_t *)
6180             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.sr_evd_hkey);
6181         if (evd_rp == NULL) {
6182                 DERR("service_register: evd resource not found\n");
6183                 retval = EINVAL;
6184                 goto cleanup;
6185         }
6186         /*
6187          * initialize backlog size
6188          */
6189         if (evd_rp && evd_rp->evd_cq_real_size > 0) {
6190                 backlog_size = evd_rp->evd_cq_real_size + 1;
6191         } else {
6192                 backlog_size = DAPLKA_DEFAULT_SP_BACKLOG;
6193         }
6194         D2("service_register: args.sr_sid = %llu\n", (longlong_t)args.sr_sid);
6195 
6196         /* save the userland sp ptr */
6197         sp_rp->sp_cookie = args.sr_sp_cookie;
6198         sp_rp->sp_backlog_size = backlog_size;
6199         D3("service_register: backlog set to %d\n", sp_rp->sp_backlog_size);
6200         sp_rp->sp_backlog = kmem_zalloc(sp_rp->sp_backlog_size *
6201             sizeof (daplka_sp_conn_pend_t), daplka_km_flags);
6202 
6203         /* save evd resource pointer */
6204         sp_rp->sp_evd_res = evd_rp;
6205 
6206         /*
6207          * save ruid here so that we can do a comparison later
6208          * when someone does cr_handoff. the check will prevent
6209          * a malicious app from passing a CR to us.
6210          */
6211         sp_rp->sp_ruid = crgetruid(cred);
6212 
6213         /* fill in args for register_service */
6214         sd_args.sd_ud_handler = NULL;
6215         sd_args.sd_handler = daplka_cm_service_handler;
6216         sd_args.sd_flags = IBT_SRV_NO_FLAGS;
6217 
6218         status = ibt_register_service(daplka_dev->daplka_clnt_hdl,
6219             &sd_args, args.sr_sid, 1, &sp_rp->sp_srv_hdl, &retsid);
6220 
6221         if (status != IBT_SUCCESS) {
6222                 DERR("service_register: ibt_register_service returned %d\n",
6223                     status);
6224                 *rvalp = (int)status;
6225                 retval = 0;
6226                 goto cleanup;
6227         }
6228         /* save returned sid */
6229         sp_rp->sp_conn_qual = retsid;
6230         args.sr_retsid = retsid;
6231 
6232         /* fill in args for bind_service */
6233         sb_args.sb_pkey = ia_rp->ia_port_pkey;
6234         sb_args.sb_lease = 0xffffffff;
6235         sb_args.sb_key[0] = 0x1234;
6236         sb_args.sb_key[1] = 0x5678;
6237         sb_args.sb_name = DAPLKA_DRV_NAME;
6238 
6239         D2("service_register: bind(0x%llx:0x%llx)\n",
6240             (longlong_t)ia_rp->ia_hca_sgid.gid_prefix,
6241             (longlong_t)ia_rp->ia_hca_sgid.gid_guid);
6242 
6243         status = ibt_bind_service(sp_rp->sp_srv_hdl, ia_rp->ia_hca_sgid,
6244             &sb_args, (void *)sp_rp, &sp_rp->sp_bind_hdl);
6245         if (status != IBT_SUCCESS) {
6246                 DERR("service_register: ibt_bind_service returned %d\n",
6247                     status);
6248                 *rvalp = (int)status;
6249                 retval = 0;
6250                 goto cleanup;
6251         }
6252 
6253         /*
6254          * need to bump refcnt because the global hash table will
6255          * have a reference to sp_rp
6256          */
6257         DAPLKA_RS_REF(sp_rp);
6258         bumped = B_TRUE;
6259 
6260         /* insert into global sp hash table */
6261         sp_rp->sp_global_hkey = 0;
6262         retval = daplka_hash_insert(&daplka_global_sp_htbl,
6263             &sp_rp->sp_global_hkey, (void *)sp_rp);
6264         if (retval != 0) {
6265                 DERR("service_register: cannot insert sp resource\n");
6266                 goto cleanup;
6267         }
6268 
6269         /* insert into per-IA sp hash table */
6270         retval = daplka_hash_insert(&ia_rp->ia_sp_htbl,
6271             &sp_hkey, (void *)sp_rp);
6272         if (retval != 0) {
6273                 DERR("service_register: cannot insert sp resource\n");
6274                 goto cleanup;
6275         }
6276 
6277         /* pass index to application */
6278         args.sr_sp_hkey = sp_hkey;
6279         retval = ddi_copyout(&args, (void *)arg,
6280             sizeof (dapl_service_register_t), mode);
6281         if (retval != 0) {
6282                 DERR("service_register: copyout error %d\n", retval);
6283                 retval = EFAULT;
6284                 goto cleanup;
6285         }
6286         return (0);
6287 
6288 cleanup:;
6289         ASSERT(sp_rp != NULL);
6290         /* remove from ia table */
6291         if (sp_hkey != 0) {
6292                 daplka_sp_resource_t *free_rp = NULL;
6293 
6294                 (void) daplka_hash_remove(&ia_rp->ia_sp_htbl,
6295                     sp_hkey, (void **)&free_rp);
6296                 if (free_rp != sp_rp) {
6297                         DERR("service_register: cannot remove sp\n");
6298                         /*
6299                          * we can only get here if another thread
6300                          * has completed the cleanup in svc_deregister
6301                          */
6302                         return (retval);
6303                 }
6304         }
6305 
6306         /* remove from global table */
6307         if (sp_rp->sp_global_hkey != 0) {
6308                 daplka_sp_resource_t *free_rp = NULL;
6309 
6310                 /*
6311                  * we get here if either the hash_insert into
6312                  * ia_sp_htbl failed or the ddi_copyout failed.
6313                  * hash_insert failure implies that we are the
6314                  * only thread with a reference to sp. ddi_copyout
6315                  * failure implies that svc_deregister could have
6316                  * picked up the sp and destroyed it. but since
6317                  * we got to this point, we must have removed
6318                  * the sp ourselves in hash_remove above and
6319                  * that the sp can be destroyed by us.
6320                  */
6321                 (void) daplka_hash_remove(&daplka_global_sp_htbl,
6322                     sp_rp->sp_global_hkey, (void **)&free_rp);
6323                 if (free_rp != sp_rp) {
6324                         DERR("service_register: cannot remove sp\n");
6325                         /*
6326                          * this case is impossible. see explanation above.
6327                          */
6328                         ASSERT(B_FALSE);
6329                         return (retval);
6330                 }
6331                 sp_rp->sp_global_hkey = 0;
6332         }
6333         /* unreference sp */
6334         if (bumped) {
6335                 DAPLKA_RS_UNREF(sp_rp);
6336         }
6337 
6338         /* destroy sp resource */
6339         DAPLKA_RS_UNREF(sp_rp);
6340         return (retval);
6341 }
6342 
6343 /*
6344  * deregisters the service and removes SP from the global table.
6345  */
6346 /* ARGSUSED */
6347 static int
6348 daplka_service_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6349         cred_t *cred, int *rvalp)
6350 {
6351         dapl_service_deregister_t       args;
6352         daplka_sp_resource_t            *sp_rp = NULL, *g_sp_rp = NULL;
6353         int                             retval;
6354 
6355         retval = ddi_copyin((void *)arg, &args,
6356             sizeof (dapl_service_deregister_t), mode);
6357 
6358         if (retval != 0) {
6359                 DERR("service_deregister: copyin error %d\n", retval);
6360                 return (EINVAL);
6361         }
6362 
6363         retval = daplka_hash_remove(&ia_rp->ia_sp_htbl,
6364             args.sdr_sp_hkey, (void **)&sp_rp);
6365         if (retval != 0 || sp_rp == NULL) {
6366                 DERR("service_deregister: cannot find sp resource\n");
6367                 return (EINVAL);
6368         }
6369 
6370         retval = daplka_hash_remove(&daplka_global_sp_htbl,
6371             sp_rp->sp_global_hkey, (void **)&g_sp_rp);
6372         if (retval != 0 || g_sp_rp == NULL) {
6373                 DERR("service_deregister: cannot find sp resource\n");
6374         }
6375 
6376         /* remove the global reference */
6377         if (g_sp_rp == sp_rp) {
6378                 DAPLKA_RS_UNREF(g_sp_rp);
6379         }
6380 
6381         DAPLKA_RS_UNREF(sp_rp);
6382         return (0);
6383 }
6384 
6385 /*
6386  * destroys a service point.
6387  * called when the refcnt drops to zero.
6388  */
6389 static int
6390 daplka_sp_destroy(daplka_resource_t *gen_rp)
6391 {
6392         daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)gen_rp;
6393         ibt_status_t status;
6394 
6395         ASSERT(DAPLKA_RS_REFCNT(sp_rp) == 0);
6396         D3("sp_destroy: entering, sp_rp %p, rnum %d\n",
6397             sp_rp, DAPLKA_RS_RNUM(sp_rp));
6398 
6399         /*
6400          * it is possible for pending connections to remain
6401          * on an SP. We need to clean them up here.
6402          */
6403         if (sp_rp->sp_backlog != NULL) {
6404                 ibt_cm_proceed_reply_t proc_reply;
6405                 int i, cnt = 0;
6406                 void *spcp_sidp;
6407 
6408                 for (i = 0; i < sp_rp->sp_backlog_size; i++) {
6409                         if (sp_rp->sp_backlog[i].spcp_state ==
6410                             DAPLKA_SPCP_PENDING) {
6411                                 cnt++;
6412                                 if (sp_rp->sp_backlog[i].spcp_sid == NULL) {
6413                                         DERR("sp_destroy: "
6414                                             "spcp_sid == NULL!\n");
6415                                         continue;
6416                                 }
6417                                 mutex_enter(&sp_rp->sp_lock);
6418                                 spcp_sidp = sp_rp->sp_backlog[i].spcp_sid;
6419                                 sp_rp->sp_backlog[i].spcp_state =
6420                                     DAPLKA_SPCP_INIT;
6421                                 sp_rp->sp_backlog[i].spcp_sid = NULL;
6422                                 sp_rp->sp_backlog[i].spcp_req_len = 0;
6423                                 mutex_exit(&sp_rp->sp_lock);
6424                                 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV,
6425                                     spcp_sidp,
6426                                     IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
6427                                 if (status != IBT_SUCCESS) {
6428                                         DERR("sp_destroy: proceed failed %d\n",
6429                                             status);
6430                                 }
6431                         }
6432                 }
6433                 if (cnt > 0) {
6434                         DERR("sp_destroy: found %d pending "
6435                             "connections\n", cnt);
6436                 }
6437         }
6438 
6439         if (sp_rp->sp_srv_hdl != NULL && sp_rp->sp_bind_hdl != NULL) {
6440                 status = ibt_unbind_service(sp_rp->sp_srv_hdl,
6441                     sp_rp->sp_bind_hdl);
6442                 if (status != IBT_SUCCESS) {
6443                         DERR("sp_destroy: ibt_unbind_service "
6444                             "failed: %d\n", status);
6445                 }
6446         }
6447 
6448         if (sp_rp->sp_srv_hdl != NULL) {
6449                 status = ibt_deregister_service(daplka_dev->daplka_clnt_hdl,
6450                     sp_rp->sp_srv_hdl);
6451                 if (status != IBT_SUCCESS) {
6452                         DERR("sp_destroy: ibt_deregister_service "
6453                             "failed: %d\n", status);
6454                 }
6455         }
6456         if (sp_rp->sp_backlog != NULL) {
6457                 kmem_free(sp_rp->sp_backlog,
6458                     sp_rp->sp_backlog_size * sizeof (daplka_sp_conn_pend_t));
6459                 sp_rp->sp_backlog = NULL;
6460                 sp_rp->sp_backlog_size = 0;
6461         }
6462 
6463         /*
6464          * release reference to evd
6465          */
6466         if (sp_rp->sp_evd_res != NULL) {
6467                 DAPLKA_RS_UNREF(sp_rp->sp_evd_res);
6468         }
6469         sp_rp->sp_bind_hdl = NULL;
6470         sp_rp->sp_srv_hdl = NULL;
6471         DAPLKA_RS_FINI(sp_rp);
6472         kmem_free(sp_rp, sizeof (*sp_rp));
6473         D3("sp_destroy: exiting, sp_rp %p\n", sp_rp);
6474         return (0);
6475 }
6476 
6477 /*
6478  * this function is called by daplka_hash_destroy for
6479  * freeing SP resource objects
6480  */
6481 static void
6482 daplka_hash_sp_free(void *obj)
6483 {
6484         daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;
6485         daplka_sp_resource_t *g_sp_rp;
6486         int retval;
6487 
6488         ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
6489 
6490         retval = daplka_hash_remove(&daplka_global_sp_htbl,
6491             sp_rp->sp_global_hkey, (void **)&g_sp_rp);
6492         if (retval != 0 || g_sp_rp == NULL) {
6493                 DERR("sp_free: cannot find sp resource\n");
6494         }
6495         if (g_sp_rp == sp_rp) {
6496                 DAPLKA_RS_UNREF(g_sp_rp);
6497         }
6498 
6499         DAPLKA_RS_UNREF(sp_rp);
6500 }
6501 
6502 static void
6503 daplka_hash_sp_unref(void *obj)
6504 {
6505         daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;
6506 
6507         ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
6508         DAPLKA_RS_UNREF(sp_rp);
6509 }
6510 
6511 /*
6512  * Passive side CM handlers
6513  */
6514 
6515 /*
6516  * processes the REQ_RCV event
6517  */
6518 /* ARGSUSED */
6519 static ibt_cm_status_t
6520 daplka_cm_service_req(daplka_sp_resource_t *spp, ibt_cm_event_t *event,
6521     ibt_cm_return_args_t *ret_args, void *pr_data, ibt_priv_data_len_t pr_len)
6522 {
6523         daplka_sp_conn_pend_t   *conn = NULL;
6524         daplka_evd_event_t      *cr_ev = NULL;
6525         ibt_cm_status_t         cm_status = IBT_CM_DEFAULT;
6526         uint16_t                bkl_index;
6527         ibt_status_t            status;
6528 
6529         /*
6530          * acquire a slot in the connection backlog of this service point
6531          */
6532         mutex_enter(&spp->sp_lock);
6533         for (bkl_index = 0; bkl_index < spp->sp_backlog_size; bkl_index++) {
6534                 if (spp->sp_backlog[bkl_index].spcp_state == DAPLKA_SPCP_INIT) {
6535                         conn = &spp->sp_backlog[bkl_index];
6536                         ASSERT(conn->spcp_sid == NULL);
6537                         conn->spcp_state = DAPLKA_SPCP_PENDING;
6538                         conn->spcp_sid = event->cm_session_id;
6539                         break;
6540                 }
6541         }
6542         mutex_exit(&spp->sp_lock);
6543 
6544         /*
6545          * too many pending connections
6546          */
6547         if (bkl_index == spp->sp_backlog_size) {
6548                 DERR("service_req: connection pending exceeded %d limit\n",
6549                     spp->sp_backlog_size);
6550                 return (IBT_CM_NO_RESOURCE);
6551         }
6552 
6553         /*
6554          * save data for cr_handoff
6555          */
6556         if (pr_data != NULL && pr_len > 0) {
6557                 int trunc_len = pr_len;
6558 
6559                 if (trunc_len > DAPL_MAX_PRIVATE_DATA_SIZE) {
6560                         DERR("service_req: private data truncated\n");
6561                         trunc_len = DAPL_MAX_PRIVATE_DATA_SIZE;
6562                 }
6563                 conn->spcp_req_len = trunc_len;
6564                 bcopy(pr_data, conn->spcp_req_data, trunc_len);
6565         } else {
6566                 conn->spcp_req_len = 0;
6567         }
6568         conn->spcp_rdma_ra_in = event->cm_event.req.req_rdma_ra_in;
6569         conn->spcp_rdma_ra_out = event->cm_event.req.req_rdma_ra_out;
6570 
6571         /*
6572          * create a CR event
6573          */
6574         cr_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6575         if (cr_ev == NULL) {
6576                 DERR("service_req: could not alloc cr_ev\n");
6577                 cm_status = IBT_CM_NO_RESOURCE;
6578                 goto cleanup;
6579         }
6580 
6581         cr_ev->ee_next = NULL;
6582         cr_ev->ee_cmev.ec_cm_cookie = spp->sp_cookie;
6583         cr_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6584         cr_ev->ee_cmev.ec_cm_psep_cookie = DAPLKA_CREATE_PSEP_COOKIE(bkl_index);
6585         /*
6586          * save the requestor gid
6587          * daplka_event_poll needs this if this is a third party REQ_RCV
6588          */
6589         cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix =
6590             event->cm_event.req.req_prim_addr.av_dgid.gid_prefix;
6591         cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid =
6592             event->cm_event.req.req_prim_addr.av_dgid.gid_guid;
6593 
6594         /*
6595          * set event type
6596          */
6597         if (pr_len == 0) {
6598                 cr_ev->ee_cmev.ec_cm_ev_type =
6599                     DAPL_IB_CME_CONNECTION_REQUEST_PENDING;
6600         } else {
6601                 cr_ev->ee_cmev.ec_cm_ev_priv_data =
6602                     kmem_zalloc(pr_len, KM_NOSLEEP);
6603                 if (cr_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
6604                         DERR("service_req: could not alloc priv\n");
6605                         cm_status = IBT_CM_NO_RESOURCE;
6606                         goto cleanup;
6607                 }
6608                 bcopy(pr_data, cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6609                 cr_ev->ee_cmev.ec_cm_ev_type =
6610                     DAPL_IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA;
6611         }
6612         cr_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
6613 
6614         /*
6615          * tell the active side to expect the processing time to be
6616          * at most equal to daplka_cm_delay
6617          */
6618         status = ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id,
6619             daplka_cm_delay, NULL, 0);
6620         if (status != IBT_SUCCESS) {
6621                 DERR("service_req: ibt_cm_delay failed %d\n", status);
6622                 cm_status = IBT_CM_NO_RESOURCE;
6623                 goto cleanup;
6624         }
6625 
6626         /*
6627          * enqueue cr_ev onto the cr_events list of the EVD
6628          * corresponding to the SP
6629          */
6630         D2("service_req: enqueue event(%p) evdp(%p) priv_data(%p) "
6631             "priv_len(%d) psep(0x%llx)\n", cr_ev, spp->sp_evd_res,
6632             cr_ev->ee_cmev.ec_cm_ev_priv_data,
6633             (int)cr_ev->ee_cmev.ec_cm_ev_priv_data_len,
6634             (longlong_t)cr_ev->ee_cmev.ec_cm_psep_cookie);
6635 
6636         daplka_evd_wakeup(spp->sp_evd_res,
6637             &spp->sp_evd_res->evd_cr_events, cr_ev);
6638 
6639         return (IBT_CM_DEFER);
6640 
6641 cleanup:;
6642         /*
6643          * free the cr event
6644          */
6645         if (cr_ev != NULL) {
6646                 if (cr_ev->ee_cmev.ec_cm_ev_priv_data != NULL) {
6647                         kmem_free(cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6648                         cr_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6649                         cr_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6650                 }
6651                 kmem_free(cr_ev, sizeof (daplka_evd_event_t));
6652         }
6653         /*
6654          * release our slot in the backlog array
6655          */
6656         if (conn != NULL) {
6657                 mutex_enter(&spp->sp_lock);
6658                 ASSERT(conn->spcp_state == DAPLKA_SPCP_PENDING);
6659                 ASSERT(conn->spcp_sid == event->cm_session_id);
6660                 conn->spcp_state = DAPLKA_SPCP_INIT;
6661                 conn->spcp_req_len = 0;
6662                 conn->spcp_sid = NULL;
6663                 mutex_exit(&spp->sp_lock);
6664         }
6665         return (cm_status);
6666 }
6667 
6668 /*
6669  * processes the CONN_CLOSED event
6670  */
6671 /* ARGSUSED */
6672 static ibt_cm_status_t
6673 daplka_cm_service_conn_closed(daplka_sp_resource_t *sp_rp,
6674     ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args,
6675     void *priv_data, ibt_priv_data_len_t len)
6676 {
6677         daplka_ep_resource_t    *ep_rp;
6678         daplka_evd_event_t      *disc_ev;
6679         uint32_t                old_state, new_state;
6680 
6681         ep_rp = (daplka_ep_resource_t *)
6682             ibt_get_chan_private(event->cm_channel);
6683         if (ep_rp == NULL) {
6684                 DERR("service_conn_closed: ep_rp == NULL\n");
6685                 return (IBT_CM_ACCEPT);
6686         }
6687 
6688         /*
6689          * verify that the ep_state is either CONNECTED or
6690          * DISCONNECTING. if it is not in either states return
6691          * without generating an event.
6692          */
6693         new_state = old_state = daplka_ep_get_state(ep_rp);
6694         if (old_state != DAPLKA_EP_STATE_CONNECTED &&
6695             old_state != DAPLKA_EP_STATE_DISCONNECTING) {
6696                 /*
6697                  * we can get here if the connection is being aborted
6698                  */
6699                 D2("service_conn_closed: conn aborted, state = %d, "
6700                     "closed = %d\n", old_state, (int)event->cm_event.closed);
6701                 daplka_ep_set_state(ep_rp, old_state, new_state);
6702                 return (IBT_CM_ACCEPT);
6703         }
6704 
6705         /*
6706          * create a DAPL_IB_CME_DISCONNECTED event
6707          */
6708         disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6709         if (disc_ev == NULL) {
6710                 DERR("service_conn_closed: cannot alloc disc_ev\n");
6711                 daplka_ep_set_state(ep_rp, old_state, new_state);
6712                 return (IBT_CM_ACCEPT);
6713         }
6714 
6715         disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
6716         disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6717         disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6718         disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6719         disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6720         disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6721 
6722         D2("service_conn_closed: enqueue event(%p) evdp(%p) psep(0x%llx)\n",
6723             disc_ev, sp_rp->sp_evd_res, (longlong_t)ep_rp->ep_psep_cookie);
6724 
6725         /*
6726          * transition ep_state to DISCONNECTED
6727          */
6728         new_state = DAPLKA_EP_STATE_DISCONNECTED;
6729         daplka_ep_set_state(ep_rp, old_state, new_state);
6730 
6731         /*
6732          * enqueue event onto the conn_evd owned by ep_rp
6733          */
6734         daplka_evd_wakeup(ep_rp->ep_conn_evd,
6735             &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
6736 
6737         return (IBT_CM_ACCEPT);
6738 }
6739 
6740 /*
6741  * processes the CONN_EST event
6742  */
6743 /* ARGSUSED */
6744 static ibt_cm_status_t
6745 daplka_cm_service_conn_est(daplka_sp_resource_t *sp_rp, ibt_cm_event_t *event,
6746     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6747 {
6748         daplka_ep_resource_t    *ep_rp;
6749         daplka_evd_event_t      *conn_ev;
6750         void                    *pr_data = event->cm_priv_data;
6751         ibt_priv_data_len_t     pr_len = event->cm_priv_data_len;
6752         uint32_t                old_state, new_state;
6753 
6754         ep_rp = (daplka_ep_resource_t *)
6755             ibt_get_chan_private(event->cm_channel);
6756         if (ep_rp == NULL) {
6757                 DERR("service_conn_est: ep_rp == NULL\n");
6758                 return (IBT_CM_ACCEPT);
6759         }
6760 
6761         /*
6762          * verify that ep_state is ACCEPTING. if it is not in this
6763          * state, return without generating an event.
6764          */
6765         new_state = old_state = daplka_ep_get_state(ep_rp);
6766         if (old_state != DAPLKA_EP_STATE_ACCEPTING) {
6767                 /*
6768                  * we can get here if the connection is being aborted
6769                  */
6770                 DERR("service_conn_est: conn aborted, state = %d\n",
6771                     old_state);
6772                 daplka_ep_set_state(ep_rp, old_state, new_state);
6773                 return (IBT_CM_ACCEPT);
6774         }
6775 
6776         /*
6777          * create a DAPL_IB_CME_CONNECTED event
6778          */
6779         conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6780         if (conn_ev == NULL) {
6781                 DERR("service_conn_est: conn_ev alloc failed\n");
6782                 daplka_ep_set_state(ep_rp, old_state, new_state);
6783                 return (IBT_CM_ACCEPT);
6784         }
6785 
6786         conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
6787         conn_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6788         conn_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6789         conn_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6790 
6791         /*
6792          * copy private data into event
6793          */
6794         if (pr_len > 0) {
6795                 conn_ev->ee_cmev.ec_cm_ev_priv_data =
6796                     kmem_zalloc(pr_len, KM_NOSLEEP);
6797                 if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
6798                         DERR("service_conn_est: pr_data alloc failed\n");
6799                         daplka_ep_set_state(ep_rp, old_state, new_state);
6800                         kmem_free(conn_ev, sizeof (daplka_evd_event_t));
6801                         return (IBT_CM_ACCEPT);
6802                 }
6803                 bcopy(pr_data, conn_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6804         }
6805         conn_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
6806 
6807         D2("service_conn_est: enqueue event(%p) evdp(%p)\n",
6808             conn_ev, ep_rp->ep_conn_evd);
6809 
6810         /*
6811          * transition ep_state to CONNECTED
6812          */
6813         new_state = DAPLKA_EP_STATE_CONNECTED;
6814         daplka_ep_set_state(ep_rp, old_state, new_state);
6815 
6816         /*
6817          * enqueue event onto the conn_evd owned by ep_rp
6818          */
6819         daplka_evd_wakeup(ep_rp->ep_conn_evd,
6820             &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);
6821 
6822         return (IBT_CM_ACCEPT);
6823 }
6824 
6825 /*
6826  * processes the FAILURE event
6827  */
6828 /* ARGSUSED */
6829 static ibt_cm_status_t
6830 daplka_cm_service_event_failure(daplka_sp_resource_t *sp_rp,
6831     ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args, void *priv_data,
6832     ibt_priv_data_len_t len)
6833 {
6834         daplka_evd_event_t      *disc_ev;
6835         daplka_ep_resource_t    *ep_rp;
6836         uint32_t                old_state, new_state;
6837         ibt_rc_chan_query_attr_t chan_attrs;
6838         ibt_status_t            status;
6839 
6840         /*
6841          * check that we still have a valid cm_channel before continuing
6842          */
6843         if (event->cm_channel == NULL) {
6844                 DERR("serice_event_failure: event->cm_channel == NULL\n");
6845                 return (IBT_CM_ACCEPT);
6846         }
6847         ep_rp = (daplka_ep_resource_t *)
6848             ibt_get_chan_private(event->cm_channel);
6849         if (ep_rp == NULL) {
6850                 DERR("service_event_failure: ep_rp == NULL\n");
6851                 return (IBT_CM_ACCEPT);
6852         }
6853 
6854         /*
6855          * verify that ep_state is ACCEPTING or DISCONNECTING. if it
6856          * is not in either state, return without generating an event.
6857          */
6858         new_state = old_state = daplka_ep_get_state(ep_rp);
6859         if (old_state != DAPLKA_EP_STATE_ACCEPTING &&
6860             old_state != DAPLKA_EP_STATE_DISCONNECTING) {
6861                 /*
6862                  * we can get here if the connection is being aborted
6863                  */
6864                 DERR("service_event_failure: conn aborted, state = %d, "
6865                     "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
6866                     (int)event->cm_event.failed.cf_code,
6867                     (int)event->cm_event.failed.cf_msg,
6868                     (int)event->cm_event.failed.cf_reason);
6869 
6870                 daplka_ep_set_state(ep_rp, old_state, new_state);
6871                 return (IBT_CM_ACCEPT);
6872         }
6873 
6874         bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
6875         status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
6876 
6877         if ((status == IBT_SUCCESS) &&
6878             (chan_attrs.rc_state != IBT_STATE_ERROR)) {
6879                 DERR("service_event_failure: conn abort qpn %d state %d\n",
6880                     chan_attrs.rc_qpn, chan_attrs.rc_state);
6881 
6882                 /* explicit transition the QP to ERROR state */
6883                 status = ibt_flush_channel(ep_rp->ep_chan_hdl);
6884         }
6885 
6886         /*
6887          * create an event
6888          */
6889         disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6890         if (disc_ev == NULL) {
6891                 DERR("service_event_failure: cannot alloc disc_ev\n");
6892                 daplka_ep_set_state(ep_rp, old_state, new_state);
6893                 return (IBT_CM_ACCEPT);
6894         }
6895 
6896         /*
6897          * fill in the appropriate event type
6898          */
6899         if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
6900                 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
6901         } else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
6902                 switch (event->cm_event.failed.cf_reason) {
6903                 case IBT_CM_INVALID_CID:
6904                         disc_ev->ee_cmev.ec_cm_ev_type =
6905                             DAPL_IB_CME_DESTINATION_REJECT;
6906                         break;
6907                 default:
6908                         disc_ev->ee_cmev.ec_cm_ev_type =
6909                             DAPL_IB_CME_LOCAL_FAILURE;
6910                         break;
6911                 }
6912         } else {
6913                 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
6914         }
6915         disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6916         disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6917         disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6918         disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6919         disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6920 
6921         D2("service_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
6922             "cf_msg(%d) cf_reason(%d) psep(0x%llx)\n", disc_ev,
6923             ep_rp->ep_conn_evd, (int)event->cm_event.failed.cf_code,
6924             (int)event->cm_event.failed.cf_msg,
6925             (int)event->cm_event.failed.cf_reason,
6926             (longlong_t)ep_rp->ep_psep_cookie);
6927 
6928         /*
6929          * transition ep_state to DISCONNECTED
6930          */
6931         new_state = DAPLKA_EP_STATE_DISCONNECTED;
6932         daplka_ep_set_state(ep_rp, old_state, new_state);
6933 
6934         /*
6935          * enqueue event onto the conn_evd owned by ep_rp
6936          */
6937         daplka_evd_wakeup(ep_rp->ep_conn_evd,
6938             &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
6939 
6940         return (IBT_CM_ACCEPT);
6941 }
6942 
6943 /*
6944  * this is the passive side CM handler. it gets registered
6945  * when an SP resource is created in daplka_service_register.
6946  */
6947 static ibt_cm_status_t
6948 daplka_cm_service_handler(void *cm_private, ibt_cm_event_t *event,
6949 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6950 {
6951         daplka_sp_resource_t    *sp_rp = (daplka_sp_resource_t *)cm_private;
6952 
6953         if (sp_rp == NULL) {
6954                 DERR("service_handler: sp_rp == NULL\n");
6955                 return (IBT_CM_NO_RESOURCE);
6956         }
6957         /*
6958          * default is not to return priv data
6959          */
6960         if (ret_args != NULL) {
6961                 ret_args->cm_ret_len = 0;
6962         }
6963 
6964         switch (event->cm_type) {
6965         case IBT_CM_EVENT_REQ_RCV:
6966                 D2("service_handler: IBT_CM_EVENT_REQ_RCV\n");
6967                 return (daplka_cm_service_req(sp_rp, event, ret_args,
6968                     event->cm_priv_data, event->cm_priv_data_len));
6969 
6970         case IBT_CM_EVENT_REP_RCV:
6971                 /* passive side should not receive this event */
6972                 D2("service_handler: IBT_CM_EVENT_REP_RCV\n");
6973                 return (IBT_CM_DEFAULT);
6974 
6975         case IBT_CM_EVENT_CONN_CLOSED:
6976                 D2("service_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
6977                     event->cm_event.closed);
6978                 return (daplka_cm_service_conn_closed(sp_rp, event, ret_args,
6979                     priv_data, len));
6980 
6981         case IBT_CM_EVENT_MRA_RCV:
6982                 /* passive side does default processing MRA event */
6983                 D2("service_handler: IBT_CM_EVENT_MRA_RCV\n");
6984                 return (IBT_CM_DEFAULT);
6985 
6986         case IBT_CM_EVENT_CONN_EST:
6987                 D2("service_handler: IBT_CM_EVENT_CONN_EST\n");
6988                 return (daplka_cm_service_conn_est(sp_rp, event, ret_args,
6989                     priv_data, len));
6990 
6991         case IBT_CM_EVENT_FAILURE:
6992                 D2("service_handler: IBT_CM_EVENT_FAILURE\n");
6993                 return (daplka_cm_service_event_failure(sp_rp, event, ret_args,
6994                     priv_data, len));
6995         case IBT_CM_EVENT_LAP_RCV:
6996                 /* active side had initiated a path migration operation */
6997                 D2("service_handler: IBT_CM_EVENT_LAP_RCV\n");
6998                 return (IBT_CM_ACCEPT);
6999         default:
7000                 DERR("service_handler: invalid event %d\n", event->cm_type);
7001                 break;
7002         }
7003         return (IBT_CM_DEFAULT);
7004 }
7005 
7006 /*
7007  * Active side CM handlers
7008  */
7009 
7010 /*
7011  * Processes the REP_RCV event. When the passive side accepts the
7012  * connection, this handler is called. We make a copy of the private
7013  * data into the ep so that it can be passed back to userland in when
7014  * the CONN_EST event occurs.
7015  */
7016 /* ARGSUSED */
7017 static ibt_cm_status_t
7018 daplka_cm_rc_rep_rcv(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7019     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7020 {
7021         void                    *pr_data = event->cm_priv_data;
7022         ibt_priv_data_len_t     pr_len = event->cm_priv_data_len;
7023         uint32_t                old_state, new_state;
7024 
7025         D2("rc_rep_rcv: pr_data(0x%p), pr_len(%d)\n", pr_data,
7026             (int)pr_len);
7027 
7028         ASSERT(ep_rp != NULL);
7029         new_state = old_state = daplka_ep_get_state(ep_rp);
7030         if (old_state != DAPLKA_EP_STATE_CONNECTING) {
7031                 /*
7032                  * we can get here if the connection is being aborted
7033                  */
7034                 DERR("rc_rep_rcv: conn aborted, state = %d\n", old_state);
7035                 daplka_ep_set_state(ep_rp, old_state, new_state);
7036                 return (IBT_CM_NO_CHANNEL);
7037         }
7038 
7039         /*
7040          * we do not cancel the timer here because the connection
7041          * handshake is still in progress.
7042          */
7043 
7044         /*
7045          * save the private data. it will be passed up when
7046          * the connection is established.
7047          */
7048         if (pr_len > 0) {
7049                 ep_rp->ep_priv_len = pr_len;
7050                 bcopy(pr_data, ep_rp->ep_priv_data, (size_t)pr_len);
7051         }
7052 
7053         /*
7054          * we do not actually transition to a different state.
7055          * the state will change when we get a conn_est, failure,
7056          * closed, or timeout event.
7057          */
7058         daplka_ep_set_state(ep_rp, old_state, new_state);
7059         return (IBT_CM_ACCEPT);
7060 }
7061 
7062 /*
7063  * Processes the CONN_CLOSED event. This gets called when either
7064  * the active or passive side closes the rc channel.
7065  */
7066 /* ARGSUSED */
7067 static ibt_cm_status_t
7068 daplka_cm_rc_conn_closed(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7069     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7070 {
7071         daplka_evd_event_t      *disc_ev;
7072         uint32_t                old_state, new_state;
7073 
7074         ASSERT(ep_rp != NULL);
7075         old_state = new_state = daplka_ep_get_state(ep_rp);
7076         if (old_state != DAPLKA_EP_STATE_CONNECTED &&
7077             old_state != DAPLKA_EP_STATE_DISCONNECTING) {
7078                 /*
7079                  * we can get here if the connection is being aborted
7080                  */
7081                 D2("rc_conn_closed: conn aborted, state = %d, "
7082                     "closed = %d\n", old_state, (int)event->cm_event.closed);
7083                 daplka_ep_set_state(ep_rp, old_state, new_state);
7084                 return (IBT_CM_ACCEPT);
7085         }
7086 
7087         /*
7088          * it's ok for the timer to fire at this point. the
7089          * taskq thread that processes the timer will just wait
7090          * until we are done with our state transition.
7091          */
7092         if (daplka_cancel_timer(ep_rp) != 0) {
7093                 /*
7094                  * daplka_cancel_timer returns -1 if the timer is
7095                  * being processed and 0 for all other cases.
7096                  * we need to reset ep_state to allow timer processing
7097                  * to continue.
7098                  */
7099                 DERR("rc_conn_closed: timer is being processed\n");
7100                 daplka_ep_set_state(ep_rp, old_state, new_state);
7101                 return (IBT_CM_ACCEPT);
7102         }
7103 
7104         /*
7105          * create a DAPL_IB_CME_DISCONNECTED event
7106          */
7107         disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7108         if (disc_ev == NULL) {
7109                 DERR("rc_conn_closed: could not alloc ev\n");
7110                 daplka_ep_set_state(ep_rp, old_state, new_state);
7111                 return (IBT_CM_ACCEPT);
7112         }
7113 
7114         disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
7115         disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7116         disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7117         disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
7118         disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
7119         disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
7120 
7121         D2("rc_conn_closed: enqueue event(%p) evdp(%p) closed(%d)\n",
7122             disc_ev, ep_rp->ep_conn_evd, (int)event->cm_event.closed);
7123 
7124         /*
7125          * transition ep_state to DISCONNECTED
7126          */
7127         new_state = DAPLKA_EP_STATE_DISCONNECTED;
7128         daplka_ep_set_state(ep_rp, old_state, new_state);
7129 
7130         /*
7131          * enqueue event onto the conn_evd owned by ep_rp
7132          */
7133         daplka_evd_wakeup(ep_rp->ep_conn_evd,
7134             &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
7135 
7136         return (IBT_CM_ACCEPT);
7137 }
7138 
7139 /*
7140  * processes the CONN_EST event
7141  */
7142 /* ARGSUSED */
7143 static ibt_cm_status_t
7144 daplka_cm_rc_conn_est(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7145     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7146 {
7147         daplka_evd_event_t      *conn_ev;
7148         uint32_t                old_state, new_state;
7149 
7150         ASSERT(ep_rp != NULL);
7151         old_state = new_state = daplka_ep_get_state(ep_rp);
7152         if (old_state != DAPLKA_EP_STATE_CONNECTING) {
7153                 /*
7154                  * we can get here if the connection is being aborted
7155                  */
7156                 DERR("rc_conn_est: conn aborted, state = %d\n", old_state);
7157                 daplka_ep_set_state(ep_rp, old_state, new_state);
7158                 return (IBT_CM_ACCEPT);
7159         }
7160 
7161         /*
7162          * it's ok for the timer to fire at this point. the
7163          * taskq thread that processes the timer will just wait
7164          * until we are done with our state transition.
7165          */
7166         if (daplka_cancel_timer(ep_rp) != 0) {
7167                 /*
7168                  * daplka_cancel_timer returns -1 if the timer is
7169                  * being processed and 0 for all other cases.
7170                  * we need to reset ep_state to allow timer processing
7171                  * to continue.
7172                  */
7173                 DERR("rc_conn_est: timer is being processed\n");
7174                 daplka_ep_set_state(ep_rp, old_state, new_state);
7175                 return (IBT_CM_ACCEPT);
7176         }
7177 
7178         /*
7179          * create a DAPL_IB_CME_CONNECTED event
7180          */
7181         conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7182         if (conn_ev == NULL) {
7183                 DERR("rc_conn_est: could not alloc ev\n");
7184                 daplka_ep_set_state(ep_rp, old_state, new_state);
7185                 return (IBT_CM_ACCEPT);
7186         }
7187 
7188         conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
7189         conn_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7190         conn_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7191         conn_ev->ee_cmev.ec_cm_psep_cookie = 0;
7192 
7193         /*
7194          * The private data passed back in the connection established
7195          * event is what was recvd in the daplka_cm_rc_rep_rcv handler and
7196          * saved in ep resource structure.
7197          */
7198         if (ep_rp->ep_priv_len > 0) {
7199                 conn_ev->ee_cmev.ec_cm_ev_priv_data =
7200                     kmem_zalloc(ep_rp->ep_priv_len, KM_NOSLEEP);
7201 
7202                 if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
7203                         DERR("rc_conn_est: could not alloc pr_data\n");
7204                         kmem_free(conn_ev, sizeof (daplka_evd_event_t));
7205                         daplka_ep_set_state(ep_rp, old_state, new_state);
7206                         return (IBT_CM_ACCEPT);
7207                 }
7208                 bcopy(ep_rp->ep_priv_data, conn_ev->ee_cmev.ec_cm_ev_priv_data,
7209                     ep_rp->ep_priv_len);
7210         }
7211         conn_ev->ee_cmev.ec_cm_ev_priv_data_len = ep_rp->ep_priv_len;
7212 
7213         D2("rc_conn_est: enqueue event(%p) evdp(%p) pr_data(0x%p), "
7214             "pr_len(%d)\n", conn_ev, ep_rp->ep_conn_evd,
7215             conn_ev->ee_cmev.ec_cm_ev_priv_data,
7216             (int)conn_ev->ee_cmev.ec_cm_ev_priv_data_len);
7217 
7218         /*
7219          * transition ep_state to CONNECTED
7220          */
7221         new_state = DAPLKA_EP_STATE_CONNECTED;
7222         daplka_ep_set_state(ep_rp, old_state, new_state);
7223 
7224         /*
7225          * enqueue event onto the conn_evd owned by ep_rp
7226          */
7227         daplka_evd_wakeup(ep_rp->ep_conn_evd,
7228             &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);
7229 
7230         return (IBT_CM_ACCEPT);
7231 }
7232 
7233 /*
7234  * processes the FAILURE event
7235  */
7236 /* ARGSUSED */
7237 static ibt_cm_status_t
7238 daplka_cm_rc_event_failure(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7239     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7240 {
7241         daplka_evd_event_t      *disc_ev;
7242         ibt_priv_data_len_t     pr_len = event->cm_priv_data_len;
7243         void                    *pr_data = event->cm_priv_data;
7244         uint32_t                old_state, new_state;
7245         ibt_rc_chan_query_attr_t chan_attrs;
7246         ibt_status_t            status;
7247 
7248         ASSERT(ep_rp != NULL);
7249         old_state = new_state = daplka_ep_get_state(ep_rp);
7250         if (old_state != DAPLKA_EP_STATE_CONNECTING &&
7251             old_state != DAPLKA_EP_STATE_DISCONNECTING) {
7252                 /*
7253                  * we can get here if the connection is being aborted
7254                  */
7255                 DERR("rc_event_failure: conn aborted, state = %d, "
7256                     "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
7257                     (int)event->cm_event.failed.cf_code,
7258                     (int)event->cm_event.failed.cf_msg,
7259                     (int)event->cm_event.failed.cf_reason);
7260 
7261                 daplka_ep_set_state(ep_rp, old_state, new_state);
7262                 return (IBT_CM_ACCEPT);
7263         }
7264 
7265         /*
7266          * it's ok for the timer to fire at this point. the
7267          * taskq thread that processes the timer will just wait
7268          * until we are done with our state transition.
7269          */
7270         if (daplka_cancel_timer(ep_rp) != 0) {
7271                 /*
7272                  * daplka_cancel_timer returns -1 if the timer is
7273                  * being processed and 0 for all other cases.
7274                  * we need to reset ep_state to allow timer processing
7275                  * to continue.
7276                  */
7277                 DERR("rc_event_failure: timer is being processed\n");
7278                 daplka_ep_set_state(ep_rp, old_state, new_state);
7279                 return (IBT_CM_ACCEPT);
7280         }
7281 
7282         bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
7283         status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
7284 
7285         if ((status == IBT_SUCCESS) &&
7286             (chan_attrs.rc_state != IBT_STATE_ERROR)) {
7287                 DERR("rc_event_failure: conn abort qpn %d state %d\n",
7288                     chan_attrs.rc_qpn, chan_attrs.rc_state);
7289 
7290                 /* explicit transition the QP to ERROR state */
7291                 status = ibt_flush_channel(ep_rp->ep_chan_hdl);
7292         }
7293 
7294         /*
7295          * create an event
7296          */
7297         disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7298         if (disc_ev == NULL) {
7299                 DERR("rc_event_failure: cannot alloc disc_ev\n");
7300                 daplka_ep_set_state(ep_rp, old_state, new_state);
7301                 return (IBT_CM_ACCEPT);
7302         }
7303 
7304         /*
7305          * copy private data into event
7306          */
7307         if (pr_len > 0) {
7308                 disc_ev->ee_cmev.ec_cm_ev_priv_data =
7309                     kmem_zalloc(pr_len, KM_NOSLEEP);
7310 
7311                 if (disc_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
7312                         DERR("rc_event_failure: cannot alloc pr data\n");
7313                         kmem_free(disc_ev, sizeof (daplka_evd_event_t));
7314                         daplka_ep_set_state(ep_rp, old_state, new_state);
7315                         return (IBT_CM_ACCEPT);
7316                 }
7317                 bcopy(pr_data, disc_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
7318         }
7319         disc_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
7320 
7321         /*
7322          * fill in the appropriate event type
7323          */
7324         if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
7325                 switch (event->cm_event.failed.cf_reason) {
7326                 case IBT_CM_CONSUMER:
7327                         disc_ev->ee_cmev.ec_cm_ev_type =
7328                             DAPL_IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
7329                         break;
7330                 case IBT_CM_NO_CHAN:
7331                 case IBT_CM_NO_RESC:
7332                         disc_ev->ee_cmev.ec_cm_ev_type =
7333                             DAPL_IB_CME_DESTINATION_REJECT;
7334                         break;
7335                 default:
7336                         disc_ev->ee_cmev.ec_cm_ev_type =
7337                             DAPL_IB_CME_DESTINATION_REJECT;
7338                         break;
7339                 }
7340         } else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
7341                 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
7342         } else {
7343                 /* others we'll mark as local failure */
7344                 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
7345         }
7346         disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7347         disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7348         disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
7349 
7350         D2("rc_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
7351             "cf_msg(%d) cf_reason(%d)\n", disc_ev, ep_rp->ep_conn_evd,
7352             (int)event->cm_event.failed.cf_code,
7353             (int)event->cm_event.failed.cf_msg,
7354             (int)event->cm_event.failed.cf_reason);
7355 
7356         /*
7357          * transition ep_state to DISCONNECTED
7358          */
7359         new_state = DAPLKA_EP_STATE_DISCONNECTED;
7360         daplka_ep_set_state(ep_rp, old_state, new_state);
7361 
7362         /*
7363          * enqueue event onto the conn_evd owned by ep_rp
7364          */
7365         daplka_evd_wakeup(ep_rp->ep_conn_evd,
7366             &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
7367 
7368         return (IBT_CM_ACCEPT);
7369 }
7370 
7371 /*
7372  * This is the active side CM handler. It gets registered when
7373  * ibt_open_rc_channel is called.
7374  */
7375 static ibt_cm_status_t
7376 daplka_cm_rc_handler(void *cm_private, ibt_cm_event_t *event,
7377     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7378 {
7379         daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)cm_private;
7380 
7381         if (ep_rp == NULL) {
7382                 DERR("rc_handler: ep_rp == NULL\n");
7383                 return (IBT_CM_NO_CHANNEL);
7384         }
7385         /*
7386          * default is not to return priv data
7387          */
7388         if (ret_args != NULL) {
7389                 ret_args->cm_ret_len = 0;
7390         }
7391 
7392         switch (event->cm_type) {
7393         case IBT_CM_EVENT_REQ_RCV:
7394                 /* active side should not receive this event */
7395                 D2("rc_handler: IBT_CM_EVENT_REQ_RCV\n");
7396                 break;
7397 
7398         case IBT_CM_EVENT_REP_RCV:
7399                 /* connection accepted by passive side */
7400                 D2("rc_handler: IBT_CM_EVENT_REP_RCV\n");
7401                 return (daplka_cm_rc_rep_rcv(ep_rp, event, ret_args,
7402                     priv_data, len));
7403 
7404         case IBT_CM_EVENT_CONN_CLOSED:
7405                 D2("rc_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
7406                     event->cm_event.closed);
7407                 return (daplka_cm_rc_conn_closed(ep_rp, event, ret_args,
7408                     priv_data, len));
7409 
7410         case IBT_CM_EVENT_MRA_RCV:
7411                 /* passive side does default processing MRA event */
7412                 D2("rc_handler: IBT_CM_EVENT_MRA_RCV\n");
7413                 return (IBT_CM_DEFAULT);
7414 
7415         case IBT_CM_EVENT_CONN_EST:
7416                 D2("rc_handler: IBT_CM_EVENT_CONN_EST\n");
7417                 return (daplka_cm_rc_conn_est(ep_rp, event, ret_args,
7418                     priv_data, len));
7419 
7420         case IBT_CM_EVENT_FAILURE:
7421                 D2("rc_handler: IBT_CM_EVENT_FAILURE\n");
7422                 return (daplka_cm_rc_event_failure(ep_rp, event, ret_args,
7423                     priv_data, len));
7424 
7425         default:
7426                 D2("rc_handler: invalid event %d\n", event->cm_type);
7427                 break;
7428         }
7429         return (IBT_CM_DEFAULT);
7430 }
7431 
7432 /*
7433  * creates an IA resource and inserts it into the global resource table.
7434  */
7435 /* ARGSUSED */
7436 static int
7437 daplka_ia_create(minor_t rnum, intptr_t arg, int mode,
7438         cred_t *cred, int *rvalp)
7439 {
7440         daplka_ia_resource_t    *ia_rp, *tmp_rp;
7441         boolean_t               inserted = B_FALSE;
7442         dapl_ia_create_t        args;
7443         ibt_hca_hdl_t           hca_hdl;
7444         ibt_status_t            status;
7445         ib_gid_t                sgid;
7446         int                     retval;
7447         ibt_hca_portinfo_t      *pinfop;
7448         uint_t                  pinfon;
7449         uint_t                  size;
7450         ibt_ar_t                ar_s;
7451         daplka_hca_t            *hca;
7452 
7453         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ia_create_t),
7454             mode);
7455         if (retval != 0) {
7456                 DERR("ia_create: copyin error %d\n", retval);
7457                 return (EFAULT);
7458         }
7459         if (args.ia_version != DAPL_IF_VERSION) {
7460                 DERR("ia_create: invalid version %d, expected version %d\n",
7461                     args.ia_version, DAPL_IF_VERSION);
7462                 return (EINVAL);
7463         }
7464 
7465         /*
7466          * find the hca with the matching guid
7467          */
7468         mutex_enter(&daplka_dev->daplka_mutex);
7469         for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
7470             hca = hca->hca_next) {
7471                 if (hca->hca_guid == args.ia_guid) {
7472                         DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca);
7473                         break;
7474                 }
7475         }
7476         mutex_exit(&daplka_dev->daplka_mutex);
7477 
7478         if (hca == NULL) {
7479                 DERR("ia_create: guid 0x%016llx not found\n",
7480                     (longlong_t)args.ia_guid);
7481                 return (EINVAL);
7482         }
7483 
7484         /*
7485          * check whether port number is valid and whether it is up
7486          */
7487         if (args.ia_port > hca->hca_nports) {
7488                 DERR("ia_create: invalid hca_port %d\n", args.ia_port);
7489                 DAPLKA_RELE_HCA(daplka_dev, hca);
7490                 return (EINVAL);
7491         }
7492         hca_hdl = hca->hca_hdl;
7493         if (hca_hdl == NULL) {
7494                 DERR("ia_create: hca_hdl == NULL\n");
7495                 DAPLKA_RELE_HCA(daplka_dev, hca);
7496                 return (EINVAL);
7497         }
7498         status = ibt_query_hca_ports(hca_hdl, (uint8_t)args.ia_port,
7499             &pinfop, &pinfon, &size);
7500         if (status != IBT_SUCCESS) {
7501                 DERR("ia_create: ibt_query_hca_ports returned %d\n", status);
7502                 *rvalp = (int)status;
7503                 DAPLKA_RELE_HCA(daplka_dev, hca);
7504                 return (0);
7505         }
7506         sgid = pinfop->p_sgid_tbl[0];
7507         ibt_free_portinfo(pinfop, size);
7508 
7509         ia_rp = kmem_zalloc(sizeof (daplka_ia_resource_t), daplka_km_flags);
7510         DAPLKA_RS_INIT(ia_rp, DAPL_TYPE_IA, rnum, daplka_ia_destroy);
7511 
7512         mutex_init(&ia_rp->ia_lock, NULL, MUTEX_DRIVER, NULL);
7513         cv_init(&ia_rp->ia_cv, NULL, CV_DRIVER, NULL);
7514         ia_rp->ia_hca_hdl = hca_hdl;
7515         ia_rp->ia_hca_sgid = sgid;
7516         ia_rp->ia_hca = hca;
7517         ia_rp->ia_port_num = args.ia_port;
7518         ia_rp->ia_port_pkey = args.ia_pkey;
7519         ia_rp->ia_pid = ddi_get_pid();
7520         ia_rp->ia_async_evd_hkeys = NULL;
7521         ia_rp->ia_ar_registered = B_FALSE;
7522         bcopy(args.ia_sadata, ia_rp->ia_sadata, DAPL_ATS_NBYTES);
7523 
7524         /* register Address Record */
7525         ar_s.ar_gid = ia_rp->ia_hca_sgid;
7526         ar_s.ar_pkey = ia_rp->ia_port_pkey;
7527         bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
7528 #define UC(b) ar_s.ar_data[(b)]
7529         D3("daplka_ia_create: SA[8] %d.%d.%d.%d\n",
7530             UC(8), UC(9), UC(10), UC(11));
7531         D3("daplka_ia_create: SA[12] %d.%d.%d.%d\n",
7532             UC(12), UC(13), UC(14), UC(15));
7533         retval = ibt_register_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
7534         if (retval != IBT_SUCCESS) {
7535                 DERR("ia_create: failed to register Address Record.\n");
7536                 retval = EINVAL;
7537                 goto cleanup;
7538         }
7539         ia_rp->ia_ar_registered = B_TRUE;
7540 
7541         /*
7542          * create hash tables for all object types
7543          */
7544         retval = daplka_hash_create(&ia_rp->ia_ep_htbl, DAPLKA_EP_HTBL_SZ,
7545             daplka_hash_ep_free, daplka_hash_generic_lookup);
7546         if (retval != 0) {
7547                 DERR("ia_create: cannot create ep hash table\n");
7548                 goto cleanup;
7549         }
7550         retval = daplka_hash_create(&ia_rp->ia_mr_htbl, DAPLKA_MR_HTBL_SZ,
7551             daplka_hash_mr_free, daplka_hash_generic_lookup);
7552         if (retval != 0) {
7553                 DERR("ia_create: cannot create mr hash table\n");
7554                 goto cleanup;
7555         }
7556         retval = daplka_hash_create(&ia_rp->ia_mw_htbl, DAPLKA_MW_HTBL_SZ,
7557             daplka_hash_mw_free, daplka_hash_generic_lookup);
7558         if (retval != 0) {
7559                 DERR("ia_create: cannot create mw hash table\n");
7560                 goto cleanup;
7561         }
7562         retval = daplka_hash_create(&ia_rp->ia_pd_htbl, DAPLKA_PD_HTBL_SZ,
7563             daplka_hash_pd_free, daplka_hash_generic_lookup);
7564         if (retval != 0) {
7565                 DERR("ia_create: cannot create pd hash table\n");
7566                 goto cleanup;
7567         }
7568         retval = daplka_hash_create(&ia_rp->ia_evd_htbl, DAPLKA_EVD_HTBL_SZ,
7569             daplka_hash_evd_free, daplka_hash_generic_lookup);
7570         if (retval != 0) {
7571                 DERR("ia_create: cannot create evd hash table\n");
7572                 goto cleanup;
7573         }
7574         retval = daplka_hash_create(&ia_rp->ia_cno_htbl, DAPLKA_CNO_HTBL_SZ,
7575             daplka_hash_cno_free, daplka_hash_generic_lookup);
7576         if (retval != 0) {
7577                 DERR("ia_create: cannot create cno hash table\n");
7578                 goto cleanup;
7579         }
7580         retval = daplka_hash_create(&ia_rp->ia_sp_htbl, DAPLKA_SP_HTBL_SZ,
7581             daplka_hash_sp_free, daplka_hash_generic_lookup);
7582         if (retval != 0) {
7583                 DERR("ia_create: cannot create sp hash table\n");
7584                 goto cleanup;
7585         }
7586         retval = daplka_hash_create(&ia_rp->ia_srq_htbl, DAPLKA_SRQ_HTBL_SZ,
7587             daplka_hash_srq_free, daplka_hash_generic_lookup);
7588         if (retval != 0) {
7589                 DERR("ia_create: cannot create srq hash table\n");
7590                 goto cleanup;
7591         }
7592         /*
7593          * insert ia_rp into the global resource table
7594          */
7595         retval = daplka_resource_insert(rnum, (daplka_resource_t *)ia_rp);
7596         if (retval != 0) {
7597                 DERR("ia_create: cannot insert resource\n");
7598                 goto cleanup;
7599         }
7600         inserted = B_TRUE;
7601 
7602         args.ia_resnum = rnum;
7603         retval = copyout(&args, (void *)arg, sizeof (dapl_ia_create_t));
7604         if (retval != 0) {
7605                 DERR("ia_create: copyout error %d\n", retval);
7606                 retval = EFAULT;
7607                 goto cleanup;
7608         }
7609         return (0);
7610 
7611 cleanup:;
7612         if (inserted) {
7613                 tmp_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);
7614                 if (tmp_rp != ia_rp) {
7615                         /*
7616                          * we can return here because another thread must
7617                          * have freed up the resource
7618                          */
7619                         DERR("ia_create: cannot remove resource\n");
7620                         return (retval);
7621                 }
7622         }
7623         DAPLKA_RS_UNREF(ia_rp);
7624         return (retval);
7625 }
7626 
7627 /*
7628  * destroys an IA resource
7629  */
7630 static int
7631 daplka_ia_destroy(daplka_resource_t *gen_rp)
7632 {
7633         daplka_ia_resource_t    *ia_rp = (daplka_ia_resource_t *)gen_rp;
7634         daplka_async_evd_hkey_t *hkp;
7635         int                     cnt;
7636         ibt_ar_t                ar_s;
7637 
7638         D3("ia_destroy: entering, ia_rp 0x%p\n", ia_rp);
7639 
7640         /* deregister Address Record */
7641         if (ia_rp->ia_ar_registered) {
7642                 ar_s.ar_gid = ia_rp->ia_hca_sgid;
7643                 ar_s.ar_pkey = ia_rp->ia_port_pkey;
7644                 bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
7645                 (void) ibt_deregister_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
7646                 ia_rp->ia_ar_registered = B_FALSE;
7647         }
7648 
7649         /*
7650          * destroy hash tables. make sure resources are
7651          * destroyed in the correct order.
7652          */
7653         daplka_hash_destroy(&ia_rp->ia_mw_htbl);
7654         daplka_hash_destroy(&ia_rp->ia_mr_htbl);
7655         daplka_hash_destroy(&ia_rp->ia_ep_htbl);
7656         daplka_hash_destroy(&ia_rp->ia_srq_htbl);
7657         daplka_hash_destroy(&ia_rp->ia_evd_htbl);
7658         daplka_hash_destroy(&ia_rp->ia_cno_htbl);
7659         daplka_hash_destroy(&ia_rp->ia_pd_htbl);
7660         daplka_hash_destroy(&ia_rp->ia_sp_htbl);
7661 
7662         /*
7663          * free the async evd list
7664          */
7665         cnt = 0;
7666         hkp = ia_rp->ia_async_evd_hkeys;
7667         while (hkp != NULL) {
7668                 daplka_async_evd_hkey_t *free_hkp;
7669 
7670                 cnt++;
7671                 free_hkp = hkp;
7672                 hkp = hkp->aeh_next;
7673                 kmem_free(free_hkp, sizeof (*free_hkp));
7674         }
7675         if (cnt > 0) {
7676                 D3("ia_destroy: freed %d hkeys\n", cnt);
7677         }
7678         mutex_destroy(&ia_rp->ia_lock);
7679         cv_destroy(&ia_rp->ia_cv);
7680         ia_rp->ia_hca_hdl = NULL;
7681 
7682         DAPLKA_RS_FINI(ia_rp);
7683 
7684         if (ia_rp->ia_hca)
7685                 DAPLKA_RELE_HCA(daplka_dev, ia_rp->ia_hca);
7686 
7687         kmem_free(ia_rp, sizeof (daplka_ia_resource_t));
7688         D3("ia_destroy: exiting, ia_rp 0x%p\n", ia_rp);
7689         return (0);
7690 }
7691 
7692 static void
7693 daplka_async_event_create(ibt_async_code_t code, ibt_async_event_t *event,
7694     uint64_t cookie, daplka_ia_resource_t *ia_rp)
7695 {
7696         daplka_evd_event_t      *evp;
7697         daplka_evd_resource_t   *async_evd;
7698         daplka_async_evd_hkey_t *curr;
7699 
7700         mutex_enter(&ia_rp->ia_lock);
7701         curr = ia_rp->ia_async_evd_hkeys;
7702         while (curr != NULL) {
7703                 /*
7704                  * Note: this allocation does not zero out the buffer
7705                  * since we init all the fields.
7706                  */
7707                 evp = kmem_alloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7708                 if (evp == NULL) {
7709                         DERR("async_event_enqueue: event alloc failed"
7710                             "!found\n", ia_rp, curr->aeh_evd_hkey);
7711                         curr = curr->aeh_next;
7712                         continue;
7713                 }
7714                 evp->ee_next = NULL;
7715                 evp->ee_aev.ibae_type = code;
7716                 evp->ee_aev.ibae_hca_guid = event->ev_hca_guid;
7717                 evp->ee_aev.ibae_cookie = cookie;
7718                 evp->ee_aev.ibae_port = event->ev_port;
7719 
7720                 /*
7721                  * Lookup the async evd corresponding to this ia and enqueue
7722                  * evp and wakeup any waiter.
7723                  */
7724                 async_evd = (daplka_evd_resource_t *)
7725                     daplka_hash_lookup(&ia_rp->ia_evd_htbl, curr->aeh_evd_hkey);
7726                 if (async_evd == NULL) { /* async evd is being freed */
7727                         DERR("async_event_enqueue: ia_rp(%p) asycn_evd %llx "
7728                             "!found\n", ia_rp, (longlong_t)curr->aeh_evd_hkey);
7729                         kmem_free(evp, sizeof (daplka_evd_event_t));
7730                         curr = curr->aeh_next;
7731                         continue;
7732                 }
7733                 daplka_evd_wakeup(async_evd, &async_evd->evd_async_events, evp);
7734 
7735                 /* decrement refcnt on async_evd */
7736                 DAPLKA_RS_UNREF(async_evd);
7737                 curr = curr->aeh_next;
7738         }
7739         mutex_exit(&ia_rp->ia_lock);
7740 }
7741 /*
7742  * This routine is called in kernel context
7743  */
7744 
7745 /* ARGSUSED */
7746 static void
7747 daplka_rc_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7748     ibt_async_code_t code, ibt_async_event_t *event)
7749 {
7750         daplka_ep_resource_t            *epp;
7751         daplka_ia_resource_t            *ia_rp;
7752         minor_t                         ia_rnum;
7753 
7754         if (event->ev_chan_hdl == NULL) {
7755                 DERR("daplka_rc_async_handler: ev_chan_hdl is NULL\n");
7756                 return;
7757         }
7758 
7759         mutex_enter(&daplka_dev->daplka_mutex);
7760         epp = ibt_get_chan_private(event->ev_chan_hdl);
7761         if (epp == NULL) {
7762                 mutex_exit(&daplka_dev->daplka_mutex);
7763                 DERR("daplka_rc_async_handler: chan_private is NULL\n");
7764                 return;
7765         }
7766 
7767         /* grab a reference to this ep */
7768         DAPLKA_RS_REF(epp);
7769         mutex_exit(&daplka_dev->daplka_mutex);
7770 
7771         /*
7772          * The endpoint resource has the resource number corresponding to
7773          * the IA resource. Use that to lookup the ia resource entry
7774          */
7775         ia_rnum = DAPLKA_RS_RNUM(epp);
7776         ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
7777         if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
7778                 D2("daplka_rc_async_handler: resource (%d) not found\n",
7779                     ia_rnum);
7780                 DAPLKA_RS_UNREF(epp);
7781                 return;
7782         }
7783 
7784         /*
7785          * Create an async event and chain it to the async evd
7786          */
7787         daplka_async_event_create(code, event, epp->ep_cookie, ia_rp);
7788 
7789         DAPLKA_RS_UNREF(ia_rp);
7790         DAPLKA_RS_UNREF(epp);
7791 }
7792 
7793 /*
7794  * This routine is called in kernel context
7795  */
7796 
7797 /* ARGSUSED */
7798 static void
7799 daplka_cq_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7800     ibt_async_code_t code, ibt_async_event_t *event)
7801 {
7802         daplka_evd_resource_t           *evdp;
7803         daplka_ia_resource_t            *ia_rp;
7804         minor_t                         ia_rnum;
7805 
7806         if (event->ev_cq_hdl == NULL)
7807                 return;
7808 
7809         mutex_enter(&daplka_dev->daplka_mutex);
7810         evdp = ibt_get_cq_private(event->ev_cq_hdl);
7811         if (evdp == NULL) {
7812                 mutex_exit(&daplka_dev->daplka_mutex);
7813                 DERR("daplka_cq_async_handler: get cq private(%p) failed\n",
7814                     event->ev_cq_hdl);
7815                 return;
7816         }
7817         /* grab a reference to this evd resource */
7818         DAPLKA_RS_REF(evdp);
7819         mutex_exit(&daplka_dev->daplka_mutex);
7820 
7821         /*
7822          * The endpoint resource has the resource number corresponding to
7823          * the IA resource. Use that to lookup the ia resource entry
7824          */
7825         ia_rnum = DAPLKA_RS_RNUM(evdp);
7826         ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
7827         if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
7828                 DERR("daplka_cq_async_handler: resource (%d) not found\n",
7829                     ia_rnum);
7830                 DAPLKA_RS_UNREF(evdp);
7831                 return;
7832         }
7833 
7834         /*
7835          * Create an async event and chain it to the async evd
7836          */
7837         daplka_async_event_create(code, event, evdp->evd_cookie, ia_rp);
7838 
7839         /* release all the refcount that were acquired */
7840         DAPLKA_RS_UNREF(ia_rp);
7841         DAPLKA_RS_UNREF(evdp);
7842 }
7843 
7844 /*
7845  * This routine is called in kernel context, handles unaffiliated async errors
7846  */
7847 
7848 /* ARGSUSED */
7849 static void
7850 daplka_un_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7851     ibt_async_code_t code, ibt_async_event_t *event)
7852 {
7853         int                     i, j;
7854         daplka_resource_blk_t   *blk;
7855         daplka_resource_t       *rp;
7856         daplka_ia_resource_t    *ia_rp;
7857 
7858         /*
7859          * Walk the resource table looking for an ia that matches the
7860          * hca_hdl.
7861          */
7862         rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
7863         for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
7864                 blk = daplka_resource.daplka_rc_root[i];
7865                 if (blk == NULL)
7866                         continue;
7867                 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
7868                         rp = blk->daplka_rcblk_blks[j];
7869                         if ((rp == NULL) ||
7870                             ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
7871                             (rp->rs_type != DAPL_TYPE_IA)) {
7872                                 continue;
7873                         }
7874                         /*
7875                          * rp is an IA resource check if it belongs
7876                          * to the hca/port for which we got the event
7877                          */
7878                         ia_rp = (daplka_ia_resource_t *)rp;
7879                         DAPLKA_RS_REF(ia_rp);
7880                         if ((hca_hdl == ia_rp->ia_hca_hdl) &&
7881                             (event->ev_port == ia_rp->ia_port_num)) {
7882                                 /*
7883                                  * walk the ep hash table. Acquire a
7884                                  * reader lock. NULL dgid indicates
7885                                  * local port up event.
7886                                  */
7887                                 daplka_hash_walk(&ia_rp->ia_ep_htbl,
7888                                     daplka_ep_failback, NULL, RW_READER);
7889                         }
7890                         DAPLKA_RS_UNREF(ia_rp);
7891                 }
7892         }
7893         rw_exit(&daplka_resource.daplka_rct_lock);
7894 }
7895 
7896 static int
7897 daplka_handle_hca_detach_event(ibt_async_event_t *event)
7898 {
7899         daplka_hca_t    *hca;
7900 
7901         /*
7902          * find the hca with the matching guid
7903          */
7904         mutex_enter(&daplka_dev->daplka_mutex);
7905         for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
7906             hca = hca->hca_next) {
7907                 if (hca->hca_guid == event->ev_hca_guid) {
7908                         if (DAPLKA_HCA_BUSY(hca)) {
7909                                 mutex_exit(&daplka_dev->daplka_mutex);
7910                                 return (IBT_HCA_RESOURCES_NOT_FREED);
7911                         }
7912                         daplka_dequeue_hca(daplka_dev, hca);
7913                         break;
7914                 }
7915         }
7916         mutex_exit(&daplka_dev->daplka_mutex);
7917 
7918         if (hca == NULL)
7919                 return (IBT_FAILURE);
7920 
7921         return (daplka_fini_hca(daplka_dev, hca));
7922 }
7923 
7924 /*
7925  * This routine is called in kernel context
7926  */
7927 static void
7928 daplka_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7929     ibt_async_code_t code, ibt_async_event_t *event)
7930 {
7931         switch (code) {
7932         case IBT_ERROR_CATASTROPHIC_CHAN:
7933         case IBT_ERROR_INVALID_REQUEST_CHAN:
7934         case IBT_ERROR_ACCESS_VIOLATION_CHAN:
7935         case IBT_ERROR_PATH_MIGRATE_REQ:
7936                 D2("daplka_async_handler(): Channel affiliated=0x%x\n", code);
7937                 /* These events are affiliated with a the RC channel */
7938                 daplka_rc_async_handler(clnt_private, hca_hdl, code, event);
7939                 break;
7940         case IBT_ERROR_CQ:
7941                 /* This event is affiliated with a the CQ */
7942                 D2("daplka_async_handler(): IBT_ERROR_CQ\n");
7943                 daplka_cq_async_handler(clnt_private, hca_hdl, code, event);
7944                 break;
7945         case IBT_ERROR_PORT_DOWN:
7946                 D2("daplka_async_handler(): IBT_PORT_DOWN\n");
7947                 break;
7948         case IBT_EVENT_PORT_UP:
7949                 D2("daplka_async_handler(): IBT_PORT_UP\n");
7950                 if (daplka_apm) {
7951                         daplka_un_async_handler(clnt_private, hca_hdl, code,
7952                             event);
7953                 }
7954                 break;
7955         case IBT_HCA_ATTACH_EVENT:
7956                 /*
7957                  * NOTE: In some error recovery paths, it is possible to
7958                  * receive IBT_HCA_ATTACH_EVENTs on already known HCAs.
7959                  */
7960                 D2("daplka_async_handler(): IBT_HCA_ATTACH\n");
7961                 (void) daplka_init_hca(daplka_dev, event->ev_hca_guid);
7962                 break;
7963         case IBT_HCA_DETACH_EVENT:
7964                 D2("daplka_async_handler(): IBT_HCA_DETACH\n");
7965                 /* Free all hca resources and close the HCA. */
7966                 (void) daplka_handle_hca_detach_event(event);
7967                 break;
7968         case IBT_EVENT_PATH_MIGRATED:
7969                 /* This event is affiliated with APM */
7970                 D2("daplka_async_handler(): IBT_PATH_MIGRATED.\n");
7971                 break;
7972         default:
7973                 D2("daplka_async_handler(): unhandled code = 0x%x\n", code);
7974                 break;
7975         }
7976 }
7977 
7978 /*
7979  * This routine is called in kernel context related to Subnet events
7980  */
7981 /*ARGSUSED*/
7982 static void
7983 daplka_sm_notice_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t code,
7984         ibt_subnet_event_t *event)
7985 {
7986         ib_gid_t *sgid = &gid;
7987         ib_gid_t *dgid;
7988 
7989         dgid = &event->sm_notice_gid;
7990         switch (code) {
7991         case IBT_SM_EVENT_GID_AVAIL:
7992                 /* This event is affiliated with remote port up */
7993                 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_AVAIL\n");
7994                 if (daplka_apm)
7995                         daplka_sm_gid_avail(sgid, dgid);
7996                 return;
7997         case IBT_SM_EVENT_GID_UNAVAIL:
7998                 /* This event is affiliated with remote port down */
7999                 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_UNAVAIL\n");
8000                 return;
8001         default:
8002                 D2("daplka_sm_notice_handler(): unhandled IBT_SM_EVENT_[%d]\n",
8003                     code);
8004                 return;
8005         }
8006 }
8007 
8008 /*
8009  * This routine is called in kernel context, handles Subnet GID avail events
8010  * which correspond to remote port up. Setting up alternate path or path
8011  * migration (failback) has to be initiated from the active side of the
8012  * original connect.
8013  */
8014 static void
8015 daplka_sm_gid_avail(ib_gid_t *sgid, ib_gid_t *dgid)
8016 {
8017         int                     i, j;
8018         daplka_resource_blk_t   *blk;
8019         daplka_resource_t       *rp;
8020         daplka_ia_resource_t    *ia_rp;
8021 
8022         D2("daplka_sm_gid_avail: sgid=%llx:%llx dgid=%llx:%llx\n",
8023             (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid,
8024             (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);
8025 
8026         /*
8027          * Walk the resource table looking for an ia that matches the sgid
8028          */
8029         rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
8030         for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
8031                 blk = daplka_resource.daplka_rc_root[i];
8032                 if (blk == NULL)
8033                         continue;
8034                 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
8035                         rp = blk->daplka_rcblk_blks[j];
8036                         if ((rp == NULL) ||
8037                             ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
8038                             (rp->rs_type != DAPL_TYPE_IA)) {
8039                                 continue;
8040                         }
8041                         /*
8042                          * rp is an IA resource check if its gid
8043                          * matches with the calling sgid
8044                          */
8045                         ia_rp = (daplka_ia_resource_t *)rp;
8046                         DAPLKA_RS_REF(ia_rp);
8047                         if ((sgid->gid_prefix ==
8048                             ia_rp->ia_hca_sgid.gid_prefix) &&
8049                             (sgid->gid_guid == ia_rp->ia_hca_sgid.gid_guid)) {
8050                                 /*
8051                                  * walk the ep hash table. Acquire a
8052                                  * reader lock.
8053                                  */
8054                                 daplka_hash_walk(&ia_rp->ia_ep_htbl,
8055                                     daplka_ep_failback,
8056                                     (void *)dgid, RW_READER);
8057                         }
8058                         DAPLKA_RS_UNREF(ia_rp);
8059                 }
8060         }
8061         rw_exit(&daplka_resource.daplka_rct_lock);
8062 }
8063 
8064 /*
8065  * This routine is called in kernel context to get and set an alternate path
8066  */
8067 static int
8068 daplka_ep_altpath(daplka_ep_resource_t *ep_rp, ib_gid_t *dgid)
8069 {
8070         ibt_alt_path_info_t path_info;
8071         ibt_alt_path_attr_t path_attr;
8072         ibt_ap_returns_t ap_rets;
8073         ibt_status_t status;
8074 
8075         D2("daplka_ep_altpath : ibt_get_alt_path()\n");
8076         bzero(&path_info, sizeof (ibt_alt_path_info_t));
8077         bzero(&path_attr, sizeof (ibt_alt_path_attr_t));
8078         if (dgid != NULL) {
8079                 path_attr.apa_sgid = ep_rp->ep_sgid;
8080                 path_attr.apa_dgid = *dgid;
8081         }
8082         status = ibt_get_alt_path(ep_rp->ep_chan_hdl, IBT_PATH_AVAIL,
8083             &path_attr, &path_info);
8084         if (status != IBT_SUCCESS) {
8085                 DERR("daplka_ep_altpath : ibt_get_alt_path failed %d\n",
8086                     status);
8087                 return (1);
8088         }
8089 
8090         D2("daplka_ep_altpath : ibt_set_alt_path()\n");
8091         bzero(&ap_rets, sizeof (ibt_ap_returns_t));
8092         status = ibt_set_alt_path(ep_rp->ep_chan_hdl, IBT_BLOCKING,
8093             &path_info, NULL, 0, &ap_rets);
8094         if ((status != IBT_SUCCESS) ||
8095             (ap_rets.ap_status != IBT_CM_AP_LOADED)) {
8096                 DERR("daplka_ep_altpath : ibt_set_alt_path failed "
8097                     "status %d ap_status %d\n", status, ap_rets.ap_status);
8098                 return (1);
8099         }
8100         return (0);
8101 }
8102 
8103 /*
8104  * This routine is called in kernel context to failback to the original path
8105  */
8106 static int
8107 daplka_ep_failback(void *objp, void *arg)
8108 {
8109         daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)objp;
8110         ib_gid_t *dgid;
8111         ibt_status_t status;
8112         ibt_rc_chan_query_attr_t chan_attrs;
8113         int i;
8114 
8115         ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
8116         D2("daplka_ep_failback ep : sgid=%llx:%llx dgid=%llx:%llx\n",
8117             (longlong_t)ep_rp->ep_sgid.gid_prefix,
8118             (longlong_t)ep_rp->ep_sgid.gid_guid,
8119             (longlong_t)ep_rp->ep_dgid.gid_prefix,
8120             (longlong_t)ep_rp->ep_dgid.gid_guid);
8121 
8122         /*
8123          * daplka_ep_failback is called from daplka_hash_walk
8124          * which holds the read lock on hash table to protect
8125          * the endpoint resource from removal
8126          */
8127         mutex_enter(&ep_rp->ep_lock);
8128         /* check for unconnected endpoints */
8129         /* first check for ep state */
8130         if (ep_rp->ep_state != DAPLKA_EP_STATE_CONNECTED) {
8131                 mutex_exit(&ep_rp->ep_lock);
8132                 D2("daplka_ep_failback : endpoints not connected\n");
8133                 return (0);
8134         }
8135 
8136         /* second check for gids */
8137         if (((ep_rp->ep_sgid.gid_prefix == 0) &&
8138             (ep_rp->ep_sgid.gid_guid == 0)) ||
8139             ((ep_rp->ep_dgid.gid_prefix == 0) &&
8140             (ep_rp->ep_dgid.gid_guid == 0))) {
8141                 mutex_exit(&ep_rp->ep_lock);
8142                 D2("daplka_ep_failback : skip unconnected endpoints\n");
8143                 return (0);
8144         }
8145 
8146         /*
8147          * matching destination ep
8148          * when dgid is NULL, the async event is a local port up.
8149          * dgid becomes wild card, i.e. all endpoints match
8150          */
8151         dgid = (ib_gid_t *)arg;
8152         if (dgid == NULL) {
8153                 /* ignore loopback ep */
8154                 if ((ep_rp->ep_sgid.gid_prefix == ep_rp->ep_dgid.gid_prefix) &&
8155                     (ep_rp->ep_sgid.gid_guid == ep_rp->ep_dgid.gid_guid)) {
8156                         mutex_exit(&ep_rp->ep_lock);
8157                         D2("daplka_ep_failback : skip loopback endpoints\n");
8158                         return (0);
8159                 }
8160         } else {
8161                 /* matching remote ep */
8162                 if ((ep_rp->ep_dgid.gid_prefix != dgid->gid_prefix) ||
8163                     (ep_rp->ep_dgid.gid_guid != dgid->gid_guid)) {
8164                         mutex_exit(&ep_rp->ep_lock);
8165                         D2("daplka_ep_failback : unrelated endpoints\n");
8166                         return (0);
8167                 }
8168         }
8169 
8170         /* call get and set altpath with original dgid used in ep_connect */
8171         if (daplka_ep_altpath(ep_rp, &ep_rp->ep_dgid)) {
8172                 mutex_exit(&ep_rp->ep_lock);
8173                 return (0);
8174         }
8175 
8176         /*
8177          * wait for migration state to be ARMed
8178          * e.g. a post_send msg will transit mig_state from REARM to ARM
8179          */
8180         for (i = 0; i < daplka_query_aft_setaltpath; i++) {
8181                 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
8182                 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
8183                 if (status != IBT_SUCCESS) {
8184                         mutex_exit(&ep_rp->ep_lock);
8185                         DERR("daplka_ep_altpath : ibt_query_rc_channel err\n");
8186                         return (0);
8187                 }
8188                 if (chan_attrs.rc_mig_state == IBT_STATE_ARMED)
8189                         break;
8190         }
8191 
8192         D2("daplka_ep_altpath : query[%d] mig_st=%d\n",
8193             i, chan_attrs.rc_mig_state);
8194         D2("daplka_ep_altpath : P sgid=%llx:%llx dgid=%llx:%llx\n",
8195             (longlong_t)
8196             chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_prefix,
8197             (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_guid,
8198             (longlong_t)
8199             chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_prefix,
8200             (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_guid);
8201         D2("daplka_ep_altpath : A sgid=%llx:%llx dgid=%llx:%llx\n",
8202             (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_prefix,
8203             (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_guid,
8204             (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_prefix,
8205             (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_guid);
8206 
8207         /* skip failback on ARMed state not reached or env override */
8208         if ((i >= daplka_query_aft_setaltpath) || (daplka_failback == 0)) {
8209                 mutex_exit(&ep_rp->ep_lock);
8210                 DERR("daplka_ep_altpath : ARMed state not reached\n");
8211                 return (0);
8212         }
8213 
8214         D2("daplka_ep_failback : ibt_migrate_path() to original ep\n");
8215         status = ibt_migrate_path(ep_rp->ep_chan_hdl);
8216         if (status != IBT_SUCCESS) {
8217                 mutex_exit(&ep_rp->ep_lock);
8218                 DERR("daplka_ep_failback : migration failed "
8219                     "status %d\n", status);
8220                 return (0);
8221         }
8222 
8223         /* call get and altpath with NULL dgid to indicate unspecified dgid */
8224         (void) daplka_ep_altpath(ep_rp, NULL);
8225         mutex_exit(&ep_rp->ep_lock);
8226         return (0);
8227 }
8228 
8229 /*
8230  * IBTF wrappers used for resource accounting
8231  */
8232 static ibt_status_t
8233 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *ep_rp, ibt_hca_hdl_t hca_hdl,
8234     ibt_chan_alloc_flags_t flags, ibt_rc_chan_alloc_args_t *args,
8235     ibt_channel_hdl_t *chan_hdl_p, ibt_chan_sizes_t *sizes)
8236 {
8237         daplka_hca_t    *hca_p;
8238         uint32_t        max_qps;
8239         boolean_t       acct_enabled;
8240         ibt_status_t    status;
8241 
8242         acct_enabled = daplka_accounting_enabled;
8243         hca_p = ep_rp->ep_hca;
8244         max_qps = daplka_max_qp_percent * hca_p->hca_attr.hca_max_chans / 100;
8245 
8246         if (acct_enabled) {
8247                 if (daplka_max_qp_percent != 0 &&
8248                     max_qps <= hca_p->hca_qp_count) {
8249                         DERR("ibt_alloc_rc_channel: resource limit exceeded "
8250                             "(limit %d, count %d)\n", max_qps,
8251                             hca_p->hca_qp_count);
8252                         return (IBT_INSUFF_RESOURCE);
8253                 }
8254                 DAPLKA_RS_ACCT_INC(ep_rp, 1);
8255                 atomic_inc_32(&hca_p->hca_qp_count);
8256         }
8257         status = ibt_alloc_rc_channel(hca_hdl, flags, args, chan_hdl_p, sizes);
8258 
8259         if (status != IBT_SUCCESS && acct_enabled) {
8260                 DAPLKA_RS_ACCT_DEC(ep_rp, 1);
8261                 atomic_dec_32(&hca_p->hca_qp_count);
8262         }
8263         return (status);
8264 }
8265 
8266 static ibt_status_t
8267 daplka_ibt_free_channel(daplka_ep_resource_t *ep_rp, ibt_channel_hdl_t chan_hdl)
8268 {
8269         daplka_hca_t    *hca_p;
8270         ibt_status_t    status;
8271 
8272         hca_p = ep_rp->ep_hca;
8273 
8274         status = ibt_free_channel(chan_hdl);
8275         if (status != IBT_SUCCESS) {
8276                 return (status);
8277         }
8278         if (DAPLKA_RS_ACCT_CHARGED(ep_rp) > 0) {
8279                 DAPLKA_RS_ACCT_DEC(ep_rp, 1);
8280                 atomic_dec_32(&hca_p->hca_qp_count);
8281         }
8282         return (status);
8283 }
8284 
8285 static ibt_status_t
8286 daplka_ibt_alloc_cq(daplka_evd_resource_t *evd_rp, ibt_hca_hdl_t hca_hdl,
8287     ibt_cq_attr_t *cq_attr, ibt_cq_hdl_t *ibt_cq_p, uint32_t *real_size)
8288 {
8289         daplka_hca_t    *hca_p;
8290         uint32_t        max_cqs;
8291         boolean_t       acct_enabled;
8292         ibt_status_t    status;
8293 
8294         acct_enabled = daplka_accounting_enabled;
8295         hca_p = evd_rp->evd_hca;
8296         max_cqs = daplka_max_cq_percent * hca_p->hca_attr.hca_max_cq / 100;
8297 
8298         if (acct_enabled) {
8299                 if (daplka_max_cq_percent != 0 &&
8300                     max_cqs <= hca_p->hca_cq_count) {
8301                         DERR("ibt_alloc_cq: resource limit exceeded "
8302                             "(limit %d, count %d)\n", max_cqs,
8303                             hca_p->hca_cq_count);
8304                         return (IBT_INSUFF_RESOURCE);
8305                 }
8306                 DAPLKA_RS_ACCT_INC(evd_rp, 1);
8307                 atomic_inc_32(&hca_p->hca_cq_count);
8308         }
8309         status = ibt_alloc_cq(hca_hdl, cq_attr, ibt_cq_p, real_size);
8310 
8311         if (status != IBT_SUCCESS && acct_enabled) {
8312                 DAPLKA_RS_ACCT_DEC(evd_rp, 1);
8313                 atomic_dec_32(&hca_p->hca_cq_count);
8314         }
8315         return (status);
8316 }
8317 
8318 static ibt_status_t
8319 daplka_ibt_free_cq(daplka_evd_resource_t *evd_rp, ibt_cq_hdl_t cq_hdl)
8320 {
8321         daplka_hca_t    *hca_p;
8322         ibt_status_t    status;
8323 
8324         hca_p = evd_rp->evd_hca;
8325 
8326         status = ibt_free_cq(cq_hdl);
8327         if (status != IBT_SUCCESS) {
8328                 return (status);
8329         }
8330         if (DAPLKA_RS_ACCT_CHARGED(evd_rp) > 0) {
8331                 DAPLKA_RS_ACCT_DEC(evd_rp, 1);
8332                 atomic_dec_32(&hca_p->hca_cq_count);
8333         }
8334         return (status);
8335 }
8336 
8337 static ibt_status_t
8338 daplka_ibt_alloc_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
8339     ibt_pd_flags_t flags, ibt_pd_hdl_t *pd_hdl_p)
8340 {
8341         daplka_hca_t    *hca_p;
8342         uint32_t        max_pds;
8343         boolean_t       acct_enabled;
8344         ibt_status_t    status;
8345 
8346         acct_enabled = daplka_accounting_enabled;
8347         hca_p = pd_rp->pd_hca;
8348         max_pds = daplka_max_pd_percent * hca_p->hca_attr.hca_max_pd / 100;
8349 
8350         if (acct_enabled) {
8351                 if (daplka_max_pd_percent != 0 &&
8352                     max_pds <= hca_p->hca_pd_count) {
8353                         DERR("ibt_alloc_pd: resource limit exceeded "
8354                             "(limit %d, count %d)\n", max_pds,
8355                             hca_p->hca_pd_count);
8356                         return (IBT_INSUFF_RESOURCE);
8357                 }
8358                 DAPLKA_RS_ACCT_INC(pd_rp, 1);
8359                 atomic_inc_32(&hca_p->hca_pd_count);
8360         }
8361         status = ibt_alloc_pd(hca_hdl, flags, pd_hdl_p);
8362 
8363         if (status != IBT_SUCCESS && acct_enabled) {
8364                 DAPLKA_RS_ACCT_DEC(pd_rp, 1);
8365                 atomic_dec_32(&hca_p->hca_pd_count);
8366         }
8367         return (status);
8368 }
8369 
8370 static ibt_status_t
8371 daplka_ibt_free_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
8372     ibt_pd_hdl_t pd_hdl)
8373 {
8374         daplka_hca_t    *hca_p;
8375         ibt_status_t    status;
8376 
8377         hca_p = pd_rp->pd_hca;
8378 
8379         status = ibt_free_pd(hca_hdl, pd_hdl);
8380         if (status != IBT_SUCCESS) {
8381                 return (status);
8382         }
8383         if (DAPLKA_RS_ACCT_CHARGED(pd_rp) > 0) {
8384                 DAPLKA_RS_ACCT_DEC(pd_rp, 1);
8385                 atomic_dec_32(&hca_p->hca_pd_count);
8386         }
8387         return (status);
8388 }
8389 
8390 static ibt_status_t
8391 daplka_ibt_alloc_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
8392     ibt_pd_hdl_t pd_hdl, ibt_mw_flags_t flags, ibt_mw_hdl_t *mw_hdl_p,
8393     ibt_rkey_t *rkey_p)
8394 {
8395         daplka_hca_t    *hca_p;
8396         uint32_t        max_mws;
8397         boolean_t       acct_enabled;
8398         ibt_status_t    status;
8399 
8400         acct_enabled = daplka_accounting_enabled;
8401         hca_p = mw_rp->mw_hca;
8402         max_mws = daplka_max_mw_percent * hca_p->hca_attr.hca_max_mem_win / 100;
8403 
8404         if (acct_enabled) {
8405                 if (daplka_max_mw_percent != 0 &&
8406                     max_mws <= hca_p->hca_mw_count) {
8407                         DERR("ibt_alloc_mw: resource limit exceeded "
8408                             "(limit %d, count %d)\n", max_mws,
8409                             hca_p->hca_mw_count);
8410                         return (IBT_INSUFF_RESOURCE);
8411                 }
8412                 DAPLKA_RS_ACCT_INC(mw_rp, 1);
8413                 atomic_inc_32(&hca_p->hca_mw_count);
8414         }
8415         status = ibt_alloc_mw(hca_hdl, pd_hdl, flags, mw_hdl_p, rkey_p);
8416 
8417         if (status != IBT_SUCCESS && acct_enabled) {
8418                 DAPLKA_RS_ACCT_DEC(mw_rp, 1);
8419                 atomic_dec_32(&hca_p->hca_mw_count);
8420         }
8421         return (status);
8422 }
8423 
8424 static ibt_status_t
8425 daplka_ibt_free_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
8426     ibt_mw_hdl_t mw_hdl)
8427 {
8428         daplka_hca_t    *hca_p;
8429         ibt_status_t    status;
8430 
8431         hca_p = mw_rp->mw_hca;
8432 
8433         status = ibt_free_mw(hca_hdl, mw_hdl);
8434         if (status != IBT_SUCCESS) {
8435                 return (status);
8436         }
8437         if (DAPLKA_RS_ACCT_CHARGED(mw_rp) > 0) {
8438                 DAPLKA_RS_ACCT_DEC(mw_rp, 1);
8439                 atomic_dec_32(&hca_p->hca_mw_count);
8440         }
8441         return (status);
8442 }
8443 
8444 static ibt_status_t
8445 daplka_ibt_register_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
8446     ibt_pd_hdl_t pd_hdl, ibt_mr_attr_t *mr_attr, ibt_mr_hdl_t *mr_hdl_p,
8447     ibt_mr_desc_t *mr_desc_p)
8448 {
8449         daplka_hca_t    *hca_p;
8450         uint32_t        max_mrs;
8451         boolean_t       acct_enabled;
8452         ibt_status_t    status;
8453 
8454         acct_enabled = daplka_accounting_enabled;
8455         hca_p = mr_rp->mr_hca;
8456         max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;
8457 
8458         if (acct_enabled) {
8459                 if (daplka_max_mr_percent != 0 &&
8460                     max_mrs <= hca_p->hca_mr_count) {
8461                         DERR("ibt_register_mr: resource limit exceeded "
8462                             "(limit %d, count %d)\n", max_mrs,
8463                             hca_p->hca_mr_count);
8464                         return (IBT_INSUFF_RESOURCE);
8465                 }
8466                 DAPLKA_RS_ACCT_INC(mr_rp, 1);
8467                 atomic_inc_32(&hca_p->hca_mr_count);
8468         }
8469         status = ibt_register_mr(hca_hdl, pd_hdl, mr_attr, mr_hdl_p, mr_desc_p);
8470 
8471         if (status != IBT_SUCCESS && acct_enabled) {
8472                 DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8473                 atomic_dec_32(&hca_p->hca_mr_count);
8474         }
8475         return (status);
8476 }
8477 
8478 static ibt_status_t
8479 daplka_ibt_register_shared_mr(daplka_mr_resource_t *mr_rp,
8480     ibt_hca_hdl_t hca_hdl, ibt_mr_hdl_t mr_hdl, ibt_pd_hdl_t pd_hdl,
8481     ibt_smr_attr_t *smr_attr_p, ibt_mr_hdl_t *mr_hdl_p,
8482     ibt_mr_desc_t *mr_desc_p)
8483 {
8484         daplka_hca_t    *hca_p;
8485         uint32_t        max_mrs;
8486         boolean_t       acct_enabled;
8487         ibt_status_t    status;
8488 
8489         acct_enabled = daplka_accounting_enabled;
8490         hca_p = mr_rp->mr_hca;
8491         max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;
8492 
8493         if (acct_enabled) {
8494                 if (daplka_max_mr_percent != 0 &&
8495                     max_mrs <= hca_p->hca_mr_count) {
8496                         DERR("ibt_register_shared_mr: resource limit exceeded "
8497                             "(limit %d, count %d)\n", max_mrs,
8498                             hca_p->hca_mr_count);
8499                         return (IBT_INSUFF_RESOURCE);
8500                 }
8501                 DAPLKA_RS_ACCT_INC(mr_rp, 1);
8502                 atomic_inc_32(&hca_p->hca_mr_count);
8503         }
8504         status = ibt_register_shared_mr(hca_hdl, mr_hdl, pd_hdl,
8505             smr_attr_p, mr_hdl_p, mr_desc_p);
8506 
8507         if (status != IBT_SUCCESS && acct_enabled) {
8508                 DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8509                 atomic_dec_32(&hca_p->hca_mr_count);
8510         }
8511         return (status);
8512 }
8513 
8514 static ibt_status_t
8515 daplka_ibt_deregister_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
8516     ibt_mr_hdl_t mr_hdl)
8517 {
8518         daplka_hca_t    *hca_p;
8519         ibt_status_t    status;
8520 
8521         hca_p = mr_rp->mr_hca;
8522 
8523         status = ibt_deregister_mr(hca_hdl, mr_hdl);
8524         if (status != IBT_SUCCESS) {
8525                 return (status);
8526         }
8527         if (DAPLKA_RS_ACCT_CHARGED(mr_rp) > 0) {
8528                 DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8529                 atomic_dec_32(&hca_p->hca_mr_count);
8530         }
8531         return (status);
8532 }
8533 
8534 static ibt_status_t
8535 daplka_ibt_alloc_srq(daplka_srq_resource_t *srq_rp, ibt_hca_hdl_t hca_hdl,
8536     ibt_srq_flags_t flags, ibt_pd_hdl_t pd, ibt_srq_sizes_t *reqsz,
8537     ibt_srq_hdl_t *srq_hdl_p, ibt_srq_sizes_t *realsz)
8538 {
8539         daplka_hca_t    *hca_p;
8540         uint32_t        max_srqs;
8541         boolean_t       acct_enabled;
8542         ibt_status_t    status;
8543 
8544         acct_enabled = daplka_accounting_enabled;
8545         hca_p = srq_rp->srq_hca;
8546         max_srqs = daplka_max_srq_percent * hca_p->hca_attr.hca_max_srqs / 100;
8547 
8548         if (acct_enabled) {
8549                 if (daplka_max_srq_percent != 0 &&
8550                     max_srqs <= hca_p->hca_srq_count) {
8551                         DERR("ibt_alloc_srq: resource limit exceeded "
8552                             "(limit %d, count %d)\n", max_srqs,
8553                             hca_p->hca_srq_count);
8554                         return (IBT_INSUFF_RESOURCE);
8555                 }
8556                 DAPLKA_RS_ACCT_INC(srq_rp, 1);
8557                 atomic_inc_32(&hca_p->hca_srq_count);
8558         }
8559         status = ibt_alloc_srq(hca_hdl, flags, pd, reqsz, srq_hdl_p, realsz);
8560 
8561         if (status != IBT_SUCCESS && acct_enabled) {
8562                 DAPLKA_RS_ACCT_DEC(srq_rp, 1);
8563                 atomic_dec_32(&hca_p->hca_srq_count);
8564         }
8565         return (status);
8566 }
8567 
8568 static ibt_status_t
8569 daplka_ibt_free_srq(daplka_srq_resource_t *srq_rp, ibt_srq_hdl_t srq_hdl)
8570 {
8571         daplka_hca_t    *hca_p;
8572         ibt_status_t    status;
8573 
8574         hca_p = srq_rp->srq_hca;
8575 
8576         D3("ibt_free_srq: %p %p\n", srq_rp, srq_hdl);
8577 
8578         status = ibt_free_srq(srq_hdl);
8579         if (status != IBT_SUCCESS) {
8580                 return (status);
8581         }
8582         if (DAPLKA_RS_ACCT_CHARGED(srq_rp) > 0) {
8583                 DAPLKA_RS_ACCT_DEC(srq_rp, 1);
8584                 atomic_dec_32(&hca_p->hca_srq_count);
8585         }
8586         return (status);
8587 }
8588 
8589 
8590 static int
8591 daplka_common_ioctl(int cmd, minor_t rnum, intptr_t arg, int mode,
8592         cred_t *cred, int *rvalp)
8593 {
8594         int error;
8595 
8596         switch (cmd) {
8597         case DAPL_IA_CREATE:
8598                 error = daplka_ia_create(rnum, arg, mode, cred, rvalp);
8599                 break;
8600 
8601         /* can potentially add other commands here */
8602 
8603         default:
8604                 DERR("daplka_common_ioctl: cmd not supported\n");
8605                 error = DDI_FAILURE;
8606         }
8607         return (error);
8608 }
8609 
8610 static int
8611 daplka_evd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8612         cred_t *cred, int *rvalp)
8613 {
8614         int error;
8615 
8616         switch (cmd) {
8617         case DAPL_EVD_CREATE:
8618                 error = daplka_evd_create(rp, arg, mode, cred, rvalp);
8619                 break;
8620 
8621         case DAPL_CQ_RESIZE:
8622                 error = daplka_cq_resize(rp, arg, mode, cred, rvalp);
8623                 break;
8624 
8625         case DAPL_EVENT_POLL:
8626                 error = daplka_event_poll(rp, arg, mode, cred, rvalp);
8627                 break;
8628 
8629         case DAPL_EVENT_WAKEUP:
8630                 error = daplka_event_wakeup(rp, arg, mode, cred, rvalp);
8631                 break;
8632 
8633         case DAPL_EVD_MODIFY_CNO:
8634                 error = daplka_evd_modify_cno(rp, arg, mode, cred, rvalp);
8635                 break;
8636 
8637         case DAPL_EVD_FREE:
8638                 error = daplka_evd_free(rp, arg, mode, cred, rvalp);
8639                 break;
8640 
8641         default:
8642                 DERR("daplka_evd_ioctl: cmd not supported\n");
8643                 error = DDI_FAILURE;
8644         }
8645         return (error);
8646 }
8647 
8648 static int
8649 daplka_ep_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8650         cred_t *cred, int *rvalp)
8651 {
8652         int error;
8653 
8654         switch (cmd) {
8655         case DAPL_EP_MODIFY:
8656                 error = daplka_ep_modify(rp, arg, mode, cred, rvalp);
8657                 break;
8658 
8659         case DAPL_EP_FREE:
8660                 error = daplka_ep_free(rp, arg, mode, cred, rvalp);
8661                 break;
8662 
8663         case DAPL_EP_CONNECT:
8664                 error = daplka_ep_connect(rp, arg, mode, cred, rvalp);
8665                 break;
8666 
8667         case DAPL_EP_DISCONNECT:
8668                 error = daplka_ep_disconnect(rp, arg, mode, cred, rvalp);
8669                 break;
8670 
8671         case DAPL_EP_REINIT:
8672                 error = daplka_ep_reinit(rp, arg, mode, cred, rvalp);
8673                 break;
8674 
8675         case DAPL_EP_CREATE:
8676                 error = daplka_ep_create(rp, arg, mode, cred, rvalp);
8677                 break;
8678 
8679         default:
8680                 DERR("daplka_ep_ioctl: cmd not supported\n");
8681                 error = DDI_FAILURE;
8682         }
8683         return (error);
8684 }
8685 
8686 static int
8687 daplka_mr_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8688         cred_t *cred, int *rvalp)
8689 {
8690         int error;
8691 
8692         switch (cmd) {
8693         case DAPL_MR_REGISTER:
8694                 error = daplka_mr_register(rp, arg, mode, cred, rvalp);
8695                 break;
8696 
8697         case DAPL_MR_REGISTER_LMR:
8698                 error = daplka_mr_register_lmr(rp, arg, mode, cred, rvalp);
8699                 break;
8700 
8701         case DAPL_MR_REGISTER_SHARED:
8702                 error = daplka_mr_register_shared(rp, arg, mode, cred, rvalp);
8703                 break;
8704 
8705         case DAPL_MR_DEREGISTER:
8706                 error = daplka_mr_deregister(rp, arg, mode, cred, rvalp);
8707                 break;
8708 
8709         case DAPL_MR_SYNC:
8710                 error = daplka_mr_sync(rp, arg, mode, cred, rvalp);
8711                 break;
8712 
8713         default:
8714                 DERR("daplka_mr_ioctl: cmd not supported\n");
8715                 error = DDI_FAILURE;
8716         }
8717         return (error);
8718 }
8719 
8720 static int
8721 daplka_mw_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8722         cred_t *cred, int *rvalp)
8723 {
8724         int error;
8725 
8726         switch (cmd) {
8727         case DAPL_MW_ALLOC:
8728                 error = daplka_mw_alloc(rp, arg, mode, cred, rvalp);
8729                 break;
8730 
8731         case DAPL_MW_FREE:
8732                 error = daplka_mw_free(rp, arg, mode, cred, rvalp);
8733                 break;
8734 
8735         default:
8736                 DERR("daplka_mw_ioctl: cmd not supported\n");
8737                 error = DDI_FAILURE;
8738         }
8739         return (error);
8740 }
8741 
8742 static int
8743 daplka_cno_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8744         cred_t *cred, int *rvalp)
8745 {
8746         int error;
8747 
8748         switch (cmd) {
8749         case DAPL_CNO_ALLOC:
8750                 error = daplka_cno_alloc(rp, arg, mode, cred, rvalp);
8751                 break;
8752 
8753         case DAPL_CNO_FREE:
8754                 error = daplka_cno_free(rp, arg, mode, cred, rvalp);
8755                 break;
8756 
8757         case DAPL_CNO_WAIT:
8758                 error = daplka_cno_wait(rp, arg, mode, cred, rvalp);
8759                 break;
8760 
8761         default:
8762                 DERR("daplka_cno_ioctl: cmd not supported\n");
8763                 error = DDI_FAILURE;
8764         }
8765         return (error);
8766 }
8767 
8768 static int
8769 daplka_pd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8770         cred_t *cred, int *rvalp)
8771 {
8772         int error;
8773 
8774         switch (cmd) {
8775         case DAPL_PD_ALLOC:
8776                 error = daplka_pd_alloc(rp, arg, mode, cred, rvalp);
8777                 break;
8778 
8779         case DAPL_PD_FREE:
8780                 error = daplka_pd_free(rp, arg, mode, cred, rvalp);
8781                 break;
8782 
8783         default:
8784                 DERR("daplka_pd_ioctl: cmd not supported\n");
8785                 error = DDI_FAILURE;
8786         }
8787         return (error);
8788 }
8789 
8790 static int
8791 daplka_sp_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8792         cred_t *cred, int *rvalp)
8793 {
8794         int error;
8795 
8796         switch (cmd) {
8797         case DAPL_SERVICE_REGISTER:
8798                 error = daplka_service_register(rp, arg, mode, cred, rvalp);
8799                 break;
8800 
8801         case DAPL_SERVICE_DEREGISTER:
8802                 error = daplka_service_deregister(rp, arg, mode, cred, rvalp);
8803                 break;
8804 
8805         default:
8806                 DERR("daplka_sp_ioctl: cmd not supported\n");
8807                 error = DDI_FAILURE;
8808         }
8809         return (error);
8810 }
8811 
8812 static int
8813 daplka_srq_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8814         cred_t *cred, int *rvalp)
8815 {
8816         int error;
8817 
8818         switch (cmd) {
8819         case DAPL_SRQ_CREATE:
8820                 error = daplka_srq_create(rp, arg, mode, cred, rvalp);
8821                 break;
8822 
8823         case DAPL_SRQ_RESIZE:
8824                 error = daplka_srq_resize(rp, arg, mode, cred, rvalp);
8825                 break;
8826 
8827         case DAPL_SRQ_FREE:
8828                 error = daplka_srq_free(rp, arg, mode, cred, rvalp);
8829                 break;
8830 
8831         default:
8832                 DERR("daplka_srq_ioctl: cmd(%d) not supported\n", cmd);
8833                 error = DDI_FAILURE;
8834                 break;
8835         }
8836         return (error);
8837 }
8838 
8839 static int
8840 daplka_misc_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8841         cred_t *cred, int *rvalp)
8842 {
8843         int error;
8844 
8845         switch (cmd) {
8846         case DAPL_CR_ACCEPT:
8847                 error = daplka_cr_accept(rp, arg, mode, cred, rvalp);
8848                 break;
8849 
8850         case DAPL_CR_REJECT:
8851                 error = daplka_cr_reject(rp, arg, mode, cred, rvalp);
8852                 break;
8853 
8854         case DAPL_IA_QUERY:
8855                 error = daplka_ia_query(rp, arg, mode, cred, rvalp);
8856                 break;
8857 
8858         case DAPL_CR_HANDOFF:
8859                 error = daplka_cr_handoff(rp, arg, mode, cred, rvalp);
8860                 break;
8861 
8862         default:
8863                 DERR("daplka_misc_ioctl: cmd not supported\n");
8864                 error = DDI_FAILURE;
8865         }
8866         return (error);
8867 }
8868 
8869 /*ARGSUSED*/
8870 static int
8871 daplka_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
8872         int *rvalp)
8873 {
8874         daplka_ia_resource_t    *ia_rp;
8875         minor_t                 rnum;
8876         int                     error = 0;
8877 
8878         rnum = getminor(dev);
8879         ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
8880         if (ia_rp == NULL) {
8881                 DERR("ioctl: resource not found, rnum %d\n", rnum);
8882                 return (ENXIO);
8883         }
8884 
8885         D4("ioctl: rnum = %d, cmd = 0x%x\n", rnum, cmd);
8886         if (DAPLKA_RS_RESERVED(ia_rp)) {
8887                 error = daplka_common_ioctl(cmd, rnum, arg, mode, cred, rvalp);
8888                 return (error);
8889         }
8890         if (DAPLKA_RS_TYPE(ia_rp) != DAPL_TYPE_IA) {
8891                 DERR("ioctl: invalid type %d\n", DAPLKA_RS_TYPE(ia_rp));
8892                 error = EINVAL;
8893                 goto cleanup;
8894         }
8895         if (ia_rp->ia_pid != ddi_get_pid()) {
8896                 DERR("ioctl: ia_pid %d != pid %d\n",
8897                     ia_rp->ia_pid, ddi_get_pid());
8898                 error = EINVAL;
8899                 goto cleanup;
8900         }
8901 
8902         switch (cmd & DAPL_TYPE_MASK) {
8903         case DAPL_TYPE_EVD:
8904                 error = daplka_evd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8905                 break;
8906 
8907         case DAPL_TYPE_EP:
8908                 error = daplka_ep_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8909                 break;
8910 
8911         case DAPL_TYPE_MR:
8912                 error = daplka_mr_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8913                 break;
8914 
8915         case DAPL_TYPE_MW:
8916                 error = daplka_mw_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8917                 break;
8918 
8919         case DAPL_TYPE_PD:
8920                 error = daplka_pd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8921                 break;
8922 
8923         case DAPL_TYPE_SP:
8924                 error = daplka_sp_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8925                 break;
8926 
8927         case DAPL_TYPE_CNO:
8928                 error = daplka_cno_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8929                 break;
8930 
8931         case DAPL_TYPE_MISC:
8932                 error = daplka_misc_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8933                 break;
8934 
8935         case DAPL_TYPE_SRQ:
8936                 error = daplka_srq_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8937                 break;
8938 
8939         default:
8940                 DERR("ioctl: invalid dapl type = %d\n", DAPLKA_RS_TYPE(ia_rp));
8941                 error = DDI_FAILURE;
8942         }
8943 
8944 cleanup:;
8945         DAPLKA_RS_UNREF(ia_rp);
8946         return (error);
8947 }
8948 
8949 /* ARGSUSED */
8950 static int
8951 daplka_open(dev_t *devp, int flag, int otyp, struct cred *cred)
8952 {
8953         minor_t rnum;
8954 
8955         /*
8956          * Char only
8957          */
8958         if (otyp != OTYP_CHR) {
8959                 return (EINVAL);
8960         }
8961 
8962         /*
8963          * Only zero can be opened, clones are used for resources.
8964          */
8965         if (getminor(*devp) != DAPLKA_DRIVER_MINOR) {
8966                 DERR("daplka_open: bad minor %d\n", getminor(*devp));
8967                 return (ENODEV);
8968         }
8969 
8970         /*
8971          * - allocate new minor number
8972          * - update devp argument to new device
8973          */
8974         if (daplka_resource_reserve(&rnum) == 0) {
8975                 *devp = makedevice(getmajor(*devp), rnum);
8976         } else {
8977                 return (ENOMEM);
8978         }
8979 
8980         return (DDI_SUCCESS);
8981 }
8982 
8983 /* ARGSUSED */
8984 static int
8985 daplka_close(dev_t dev, int flag, int otyp, struct cred *cred)
8986 {
8987         daplka_ia_resource_t    *ia_rp;
8988         minor_t                 rnum = getminor(dev);
8989 
8990         /*
8991          * Char only
8992          */
8993         if (otyp != OTYP_CHR) {
8994                 return (EINVAL);
8995         }
8996         D2("daplka_close: closing rnum = %d\n", rnum);
8997         atomic_inc_32(&daplka_pending_close);
8998 
8999         /*
9000          * remove from resource table.
9001          */
9002         ia_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);
9003 
9004         /*
9005          * remove the initial reference
9006          */
9007         if (ia_rp != NULL) {
9008                 DAPLKA_RS_UNREF(ia_rp);
9009         }
9010         atomic_dec_32(&daplka_pending_close);
9011         return (DDI_SUCCESS);
9012 }
9013 
9014 
9015 /*
9016  * Resource management routines
9017  *
9018  * We start with no resource array. Each time we run out of slots, we
9019  * reallocate a new larger array and copy the pointer to the new array and
9020  * a new resource blk is allocated and added to the hash table.
9021  *
9022  * The resource control block contains:
9023  *      root    - array of pointer of resource blks
9024  *      sz      - current size of array.
9025  *      len     - last valid entry in array.
9026  *
9027  * A search operation based on a resource number is as follows:
9028  *      index = rnum / RESOURCE_BLKSZ;
9029  *      ASSERT(index < resource_block.len);
9030  *      ASSERT(index < resource_block.sz);
9031  *      offset = rnum % RESOURCE_BLKSZ;
9032  *      ASSERT(offset >= resource_block.root[index]->base);
9033  *      ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
9034  *      return resource_block.root[index]->blks[offset];
9035  *
9036  * A resource blk is freed when its used count reaches zero.
9037  */
9038 
9039 /*
9040  * initializes the global resource table
9041  */
9042 static void
9043 daplka_resource_init(void)
9044 {
9045         rw_init(&daplka_resource.daplka_rct_lock, NULL, RW_DRIVER, NULL);
9046         daplka_resource.daplka_rc_len = 0;
9047         daplka_resource.daplka_rc_sz = 0;
9048         daplka_resource.daplka_rc_cnt = 0;
9049         daplka_resource.daplka_rc_flag = 0;
9050         daplka_resource.daplka_rc_root = NULL;
9051 }
9052 
9053 /*
9054  * destroys the global resource table
9055  */
9056 static void
9057 daplka_resource_fini(void)
9058 {
9059         int     i;
9060 
9061         rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9062         for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
9063                 daplka_resource_blk_t   *blk;
9064                 int                     j;
9065 
9066                 blk = daplka_resource.daplka_rc_root[i];
9067                 if (blk == NULL) {
9068                         continue;
9069                 }
9070                 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
9071                         if (blk->daplka_rcblk_blks[j] != NULL) {
9072                                 DERR("resource_fini: non-null slot %d, %p\n",
9073                                     j, blk->daplka_rcblk_blks[j]);
9074                         }
9075                 }
9076                 kmem_free(blk, sizeof (*blk));
9077                 daplka_resource.daplka_rc_root[i] = NULL;
9078         }
9079         if (daplka_resource.daplka_rc_root != NULL) {
9080                 uint_t  sz;
9081 
9082                 sz = daplka_resource.daplka_rc_sz *
9083                     sizeof (daplka_resource_blk_t *);
9084                 kmem_free(daplka_resource.daplka_rc_root, (uint_t)sz);
9085                 daplka_resource.daplka_rc_root = NULL;
9086                 daplka_resource.daplka_rc_len = 0;
9087                 daplka_resource.daplka_rc_sz = 0;
9088         }
9089         rw_exit(&daplka_resource.daplka_rct_lock);
9090         rw_destroy(&daplka_resource.daplka_rct_lock);
9091 }
9092 
9093 /*
9094  * reserves a slot in the global resource table.
9095  * this is called by the open() syscall. it is needed because
9096  * at open() time, we do not have sufficient information to
9097  * create an IA resource. the library needs to subsequently
9098  * call daplka_ia_create to insert an IA resource into this
9099  * reserved slot.
9100  */
9101 static int
9102 daplka_resource_reserve(minor_t *rnum)
9103 {
9104         int i, j, empty = -1;
9105         daplka_resource_blk_t *blk;
9106 
9107         rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9108         /*
9109          * Try to find an empty slot
9110          */
9111         for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
9112                 blk = daplka_resource.daplka_rc_root[i];
9113                 if (blk != NULL && blk->daplka_rcblk_avail > 0) {
9114 
9115                         D3("resource_alloc: available blks %d\n",
9116                             blk->daplka_rcblk_avail);
9117 
9118                         /*
9119                          * found an empty slot in this blk
9120                          */
9121                         for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
9122                                 if (blk->daplka_rcblk_blks[j] == NULL) {
9123                                         *rnum = (minor_t)
9124                                             (j + (i * DAPLKA_RC_BLKSZ));
9125                                         blk->daplka_rcblk_blks[j] =
9126                                             (daplka_resource_t *)
9127                                             DAPLKA_RC_RESERVED;
9128                                         blk->daplka_rcblk_avail--;
9129                                         daplka_resource.daplka_rc_cnt++;
9130                                         rw_exit(&daplka_resource.
9131                                             daplka_rct_lock);
9132                                         return (0);
9133                                 }
9134                         }
9135                 } else if (blk == NULL && empty < 0) {
9136                         /*
9137                          * remember first empty slot
9138                          */
9139                         empty = i;
9140                 }
9141         }
9142 
9143         /*
9144          * Couldn't find anything, allocate a new blk
9145          * Do we need to reallocate the root array
9146          */
9147         if (empty < 0) {
9148                 if (daplka_resource.daplka_rc_len ==
9149                     daplka_resource.daplka_rc_sz) {
9150                         /*
9151                          * Allocate new array and copy current stuff into it
9152                          */
9153                         daplka_resource_blk_t   **p;
9154                         uint_t newsz = (uint_t)daplka_resource.daplka_rc_sz +
9155                             DAPLKA_RC_BLKSZ;
9156 
9157                         D3("resource_alloc: increasing no. of buckets to %d\n",
9158                             newsz);
9159 
9160                         p = kmem_zalloc(newsz * sizeof (*p), daplka_km_flags);
9161 
9162                         if (daplka_resource.daplka_rc_root) {
9163                                 uint_t oldsz;
9164 
9165                                 oldsz = (uint_t)(daplka_resource.daplka_rc_sz *
9166                                     (int)sizeof (*p));
9167 
9168                                 /*
9169                                  * Copy old data into new space and
9170                                  * free old stuff
9171                                  */
9172                                 bcopy(daplka_resource.daplka_rc_root, p, oldsz);
9173                                 kmem_free(daplka_resource.daplka_rc_root,
9174                                     oldsz);
9175                         }
9176 
9177                         daplka_resource.daplka_rc_root = p;
9178                         daplka_resource.daplka_rc_sz = (int)newsz;
9179                 }
9180 
9181                 empty = daplka_resource.daplka_rc_len;
9182                 daplka_resource.daplka_rc_len++;
9183 
9184                 D3("resource_alloc: daplka_rc_len %d\n",
9185                     daplka_resource.daplka_rc_len);
9186         }
9187 
9188         /*
9189          * Allocate a new blk
9190          */
9191         blk = kmem_zalloc(sizeof (*blk), daplka_km_flags);
9192         ASSERT(daplka_resource.daplka_rc_root[empty] == NULL);
9193         daplka_resource.daplka_rc_root[empty] = blk;
9194         blk->daplka_rcblk_avail = DAPLKA_RC_BLKSZ - 1;
9195 
9196         /*
9197          * Allocate slot
9198          */
9199         *rnum = (minor_t)(empty * DAPLKA_RC_BLKSZ);
9200         blk->daplka_rcblk_blks[0] = (daplka_resource_t *)DAPLKA_RC_RESERVED;
9201         daplka_resource.daplka_rc_cnt++;
9202         rw_exit(&daplka_resource.daplka_rct_lock);
9203 
9204         return (0);
9205 }
9206 
9207 /*
9208  * removes resource from global resource table
9209  */
9210 static daplka_resource_t *
9211 daplka_resource_remove(minor_t rnum)
9212 {
9213         int i, j;
9214         daplka_resource_blk_t *blk;
9215         daplka_resource_t *p;
9216 
9217         i = (int)(rnum / DAPLKA_RC_BLKSZ);
9218         j = (int)(rnum % DAPLKA_RC_BLKSZ);
9219 
9220         rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9221         if (i >= daplka_resource.daplka_rc_len) {
9222                 rw_exit(&daplka_resource.daplka_rct_lock);
9223                 DERR("resource_remove: invalid rnum %d\n", rnum);
9224                 return (NULL);
9225         }
9226 
9227         ASSERT(daplka_resource.daplka_rc_root);
9228         ASSERT(i < daplka_resource.daplka_rc_len);
9229         ASSERT(i < daplka_resource.daplka_rc_sz);
9230         blk = daplka_resource.daplka_rc_root[i];
9231         if (blk == NULL) {
9232                 rw_exit(&daplka_resource.daplka_rct_lock);
9233                 DERR("resource_remove: invalid rnum %d\n", rnum);
9234                 return (NULL);
9235         }
9236 
9237         if (blk->daplka_rcblk_blks[j] == NULL) {
9238                 rw_exit(&daplka_resource.daplka_rct_lock);
9239                 DERR("resource_remove: blk->daplka_rcblk_blks[j] == NULL\n");
9240                 return (NULL);
9241         }
9242         p = blk->daplka_rcblk_blks[j];
9243         blk->daplka_rcblk_blks[j] = NULL;
9244         blk->daplka_rcblk_avail++;
9245         if (blk->daplka_rcblk_avail == DAPLKA_RC_BLKSZ) {
9246                 /*
9247                  * free this blk
9248                  */
9249                 kmem_free(blk, sizeof (*blk));
9250                 daplka_resource.daplka_rc_root[i] = NULL;
9251         }
9252         daplka_resource.daplka_rc_cnt--;
9253         rw_exit(&daplka_resource.daplka_rct_lock);
9254 
9255         if ((intptr_t)p == DAPLKA_RC_RESERVED) {
9256                 return (NULL);
9257         } else {
9258                 return (p);
9259         }
9260 }
9261 
9262 /*
9263  * inserts resource into the slot designated by rnum
9264  */
9265 static int
9266 daplka_resource_insert(minor_t rnum, daplka_resource_t *rp)
9267 {
9268         int i, j, error = -1;
9269         daplka_resource_blk_t *blk;
9270 
9271         /*
9272          * Find resource and lock it in WRITER mode
9273          * search for available resource slot
9274          */
9275 
9276         i = (int)(rnum / DAPLKA_RC_BLKSZ);
9277         j = (int)(rnum % DAPLKA_RC_BLKSZ);
9278 
9279         rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9280         if (i >= daplka_resource.daplka_rc_len) {
9281                 rw_exit(&daplka_resource.daplka_rct_lock);
9282                 DERR("resource_insert: resource %d not found\n", rnum);
9283                 return (-1);
9284         }
9285 
9286         blk = daplka_resource.daplka_rc_root[i];
9287         if (blk != NULL) {
9288                 ASSERT(i < daplka_resource.daplka_rc_len);
9289                 ASSERT(i < daplka_resource.daplka_rc_sz);
9290 
9291                 if ((intptr_t)blk->daplka_rcblk_blks[j] == DAPLKA_RC_RESERVED) {
9292                         blk->daplka_rcblk_blks[j] = rp;
9293                         error = 0;
9294                 } else {
9295                         DERR("resource_insert: %d not reserved, blk = %p\n",
9296                             rnum, blk->daplka_rcblk_blks[j]);
9297                 }
9298         } else {
9299                 DERR("resource_insert: resource %d not found\n", rnum);
9300         }
9301         rw_exit(&daplka_resource.daplka_rct_lock);
9302         return (error);
9303 }
9304 
9305 /*
9306  * finds resource using minor device number
9307  */
9308 static daplka_resource_t *
9309 daplka_resource_lookup(minor_t rnum)
9310 {
9311         int i, j;
9312         daplka_resource_blk_t *blk;
9313         daplka_resource_t *rp;
9314 
9315         /*
9316          * Find resource and lock it in READER mode
9317          * search for available resource slot
9318          */
9319 
9320         i = (int)(rnum / DAPLKA_RC_BLKSZ);
9321         j = (int)(rnum % DAPLKA_RC_BLKSZ);
9322 
9323         rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
9324         if (i >= daplka_resource.daplka_rc_len) {
9325                 rw_exit(&daplka_resource.daplka_rct_lock);
9326                 DERR("resource_lookup: resource %d not found\n", rnum);
9327                 return (NULL);
9328         }
9329 
9330         blk = daplka_resource.daplka_rc_root[i];
9331         if (blk != NULL) {
9332                 ASSERT(i < daplka_resource.daplka_rc_len);
9333                 ASSERT(i < daplka_resource.daplka_rc_sz);
9334 
9335                 rp = blk->daplka_rcblk_blks[j];
9336                 if (rp == NULL || (intptr_t)rp == DAPLKA_RC_RESERVED) {
9337                         D3("resource_lookup: %d not found, blk = %p\n",
9338                             rnum, blk->daplka_rcblk_blks[j]);
9339                 } else {
9340                         DAPLKA_RS_REF((daplka_ia_resource_t *)rp);
9341                 }
9342         } else {
9343                 DERR("resource_lookup: resource %d not found\n", rnum);
9344                 rp = NULL;
9345         }
9346         rw_exit(&daplka_resource.daplka_rct_lock);
9347         return (rp);
9348 }
9349 
9350 /*
9351  * generic hash table implementation
9352  */
9353 
9354 /*
9355  * daplka_hash_create:
9356  *      initializes a hash table with the specified parameters
9357  *
9358  * input:
9359  *      htblp                   pointer to hash table
9360  *
9361  *      nbuckets                number of buckets (must be power of 2)
9362  *
9363  *      free_func               this function is called on each hash
9364  *                              table element when daplka_hash_destroy
9365  *                              is called
9366  *
9367  *      lookup_func             if daplka_hash_lookup is able to find
9368  *                              the desired object, this function is
9369  *                              applied on the object before
9370  *                              daplka_hash_lookup returns
9371  * output:
9372  *      none
9373  *
9374  * return value(s):
9375  *      EINVAL                  nbuckets is not a power of 2
9376  *      ENOMEM                  cannot allocate buckets
9377  *      0                       success
9378  */
9379 static int
9380 daplka_hash_create(daplka_hash_table_t *htblp, uint_t nbuckets,
9381         void (*free_func)(void *), void (*lookup_func)(void *))
9382 {
9383         int i;
9384 
9385         if ((nbuckets & ~(nbuckets - 1)) != nbuckets) {
9386                 DERR("hash_create: nbuckets not power of 2\n");
9387                 return (EINVAL);
9388         }
9389 
9390         htblp->ht_buckets =
9391             kmem_zalloc(sizeof (daplka_hash_bucket_t) * nbuckets,
9392             daplka_km_flags);
9393         if (htblp->ht_buckets == NULL) {
9394                 DERR("hash_create: cannot allocate buckets\n");
9395                 return (ENOMEM);
9396         }
9397         for (i = 0; i < nbuckets; i++) {
9398                 htblp->ht_buckets[i].hb_count = 0;
9399                 htblp->ht_buckets[i].hb_entries = NULL;
9400         }
9401         rw_init(&htblp->ht_table_lock, NULL, RW_DRIVER, NULL);
9402         mutex_init(&htblp->ht_key_lock, NULL, MUTEX_DRIVER, NULL);
9403 
9404         htblp->ht_count = 0;
9405         htblp->ht_next_hkey = (uint64_t)gethrtime();
9406         htblp->ht_nbuckets = nbuckets;
9407         htblp->ht_free_func = free_func;
9408         htblp->ht_lookup_func = lookup_func;
9409         htblp->ht_initialized = B_TRUE;
9410         D3("hash_create: done, buckets = %d\n", nbuckets);
9411         return (0);
9412 }
9413 
9414 /*
9415  * daplka_hash_insert:
9416  *      inserts an object into a hash table
9417  *
9418  * input:
9419  *      htblp                   pointer to hash table
9420  *
9421  *      hkeyp                   pointer to hash key.
9422  *                              *hkeyp being non-zero means that the caller
9423  *                              has generated its own hkey. if *hkeyp is zero,
9424  *                              this function will generate an hkey for the
9425  *                              caller. it is recommended that the caller
9426  *                              leave the hkey generation to this function
9427  *                              because the hkey is more likely to be evenly
9428  *                              distributed.
9429  *
9430  *      objp                    pointer to object to be inserted into
9431  *                              hash table
9432  *
9433  * output:
9434  *      hkeyp                   the generated hkey is returned via this pointer
9435  *
9436  * return value(s):
9437  *      EINVAL                  invalid parameter
9438  *      ENOMEM                  cannot allocate hash entry
9439  *      0                       successful
9440  */
9441 static int
9442 daplka_hash_insert(daplka_hash_table_t *htblp, uint64_t *hkeyp, void *objp)
9443 {
9444         daplka_hash_entry_t *hep, *curr_hep;
9445         daplka_hash_bucket_t *hbp;
9446         uint32_t bucket;
9447         uint64_t hkey;
9448 
9449         if (hkeyp == NULL) {
9450                 DERR("hash_insert: hkeyp == NULL\n");
9451                 return (EINVAL);
9452         }
9453         hep = kmem_zalloc(sizeof (*hep), daplka_km_flags);
9454         if (hep == NULL) {
9455                 DERR("hash_insert: cannot alloc hash_entry\n");
9456                 return (ENOMEM);
9457         }
9458         if (*hkeyp == 0) {
9459                 /* generate a new key */
9460                 mutex_enter(&htblp->ht_key_lock);
9461                 hkey = ++htblp->ht_next_hkey;
9462                 if (hkey == 0) {
9463                         hkey = htblp->ht_next_hkey = (uint64_t)gethrtime();
9464                 }
9465                 mutex_exit(&htblp->ht_key_lock);
9466         } else {
9467                 /* use user generated key */
9468                 hkey = *hkeyp;
9469         }
9470 
9471         /* only works if ht_nbuckets is a power of 2 */
9472         bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9473         ASSERT(objp != NULL);
9474         ASSERT(bucket < htblp->ht_nbuckets);
9475 
9476         rw_enter(&htblp->ht_table_lock, RW_WRITER);
9477         hep->he_hkey = hkey;
9478         hep->he_objp = objp;
9479 
9480         /* look for duplicate entries */
9481         hbp = &htblp->ht_buckets[bucket];
9482         curr_hep = hbp->hb_entries;
9483         while (curr_hep != NULL) {
9484                 if (curr_hep->he_hkey == hep->he_hkey) {
9485                         break;
9486                 }
9487                 curr_hep = curr_hep->he_next;
9488         }
9489         if (curr_hep != NULL) {
9490                 DERR("hash_insert: found duplicate hash entry: "
9491                     "bucket %d, hkey 0x%016llx\n",
9492                     bucket, (longlong_t)hep->he_hkey);
9493                 kmem_free(hep, sizeof (*hep));
9494                 rw_exit(&htblp->ht_table_lock);
9495                 return (EINVAL);
9496         }
9497         hep->he_next = hbp->hb_entries;
9498         hbp->hb_entries = hep;
9499         hbp->hb_count++;
9500         htblp->ht_count++;
9501         rw_exit(&htblp->ht_table_lock);
9502 
9503         if (*hkeyp == 0) {
9504                 *hkeyp = hkey;
9505                 ASSERT(*hkeyp != 0);
9506         }
9507         D3("hash_insert: htblp 0x%p, hkey = 0x%016llx, bucket = %d\n",
9508             htblp, (longlong_t)*hkeyp, bucket);
9509         return (0);
9510 }
9511 
9512 /*
9513  * daplka_hash_remove:
9514  *      removes object identified by hkey from hash table
9515  *
9516  * input:
9517  *      htblp                   pointer to hash table
9518  *
9519  *      hkey                    hkey that identifies the object to be removed
9520  *
9521  * output:
9522  *      objpp                   pointer to pointer to object.
9523  *                              if remove is successful, the removed object
9524  *                              will be returned via *objpp.
9525  *
9526  * return value(s):
9527  *      EINVAL                  cannot find hash entry
9528  *      0                       successful
9529  */
9530 static int
9531 daplka_hash_remove(daplka_hash_table_t *htblp, uint64_t hkey, void **objpp)
9532 {
9533         daplka_hash_entry_t     *free_hep, **curr_hepp;
9534         daplka_hash_bucket_t    *hbp;
9535         uint32_t                bucket;
9536 
9537         bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9538 
9539         rw_enter(&htblp->ht_table_lock, RW_WRITER);
9540         hbp = &htblp->ht_buckets[bucket];
9541 
9542         curr_hepp = &hbp->hb_entries;
9543         while (*curr_hepp != NULL) {
9544                 if ((*curr_hepp)->he_hkey == hkey) {
9545                         break;
9546                 }
9547                 curr_hepp = &(*curr_hepp)->he_next;
9548         }
9549         if (*curr_hepp == NULL) {
9550                 DERR("hash_remove: cannot find hash entry: "
9551                     "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
9552                 rw_exit(&htblp->ht_table_lock);
9553                 return (EINVAL);
9554         } else {
9555                 if (objpp != NULL) {
9556                         *objpp = (*curr_hepp)->he_objp;
9557                 }
9558                 free_hep = *curr_hepp;
9559                 *curr_hepp = (*curr_hepp)->he_next;
9560                 kmem_free(free_hep, sizeof (*free_hep));
9561         }
9562         hbp->hb_count--;
9563         htblp->ht_count--;
9564         D3("hash_remove: removed entry, hkey 0x%016llx, bucket %d, "
9565             "hb_count %d, hb_count %d\n",
9566             (longlong_t)hkey, bucket, hbp->hb_count, htblp->ht_count);
9567         rw_exit(&htblp->ht_table_lock);
9568         return (0);
9569 }
9570 
9571 /*
9572  * daplka_hash_walk:
9573  *      walks through the entire hash table. applying func on each of
9574  *      the inserted objects. stops walking if func returns non-zero.
9575  *
9576  * input:
9577  *      htblp                   pointer to hash table
9578  *
9579  *      func                    function to be applied on each object
9580  *
9581  *      farg                    second argument to func
9582  *
9583  *      lockmode                can be RW_WRITER or RW_READER. this
9584  *                              allows the caller to choose what type
9585  *                              of lock to acquire before walking the
9586  *                              table.
9587  *
9588  * output:
9589  *      none
9590  *
9591  * return value(s):
9592  *      none
9593  */
9594 static void
9595 daplka_hash_walk(daplka_hash_table_t *htblp, int (*func)(void *, void *),
9596         void *farg, krw_t lockmode)
9597 {
9598         daplka_hash_entry_t *curr_hep;
9599         daplka_hash_bucket_t *hbp;
9600         uint32_t bucket, retval = 0;
9601 
9602         ASSERT(lockmode == RW_WRITER || lockmode == RW_READER);
9603 
9604         if (lockmode == RW_WRITER) {
9605                 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9606         } else {
9607                 rw_enter(&htblp->ht_table_lock, RW_READER);
9608         }
9609         for (bucket = 0; bucket < htblp->ht_nbuckets && retval == 0; bucket++) {
9610                 hbp = &htblp->ht_buckets[bucket];
9611                 curr_hep = hbp->hb_entries;
9612                 while (curr_hep != NULL) {
9613                         retval = (*func)(curr_hep->he_objp, farg);
9614                         if (retval != 0) {
9615                                 break;
9616                         }
9617                         curr_hep = curr_hep->he_next;
9618                 }
9619         }
9620         rw_exit(&htblp->ht_table_lock);
9621 }
9622 
9623 /*
9624  * daplka_hash_lookup:
9625  *      finds object from hkey
9626  *
9627  * input:
9628  *      htblp                   pointer to hash table
9629  *
9630  *      hkey                    hkey that identifies the object to be looked up
9631  *
9632  * output:
9633  *      none
9634  *
9635  * return value(s):
9636  *      NULL                    if not found
9637  *      object pointer          if found
9638  */
9639 static void *
9640 daplka_hash_lookup(daplka_hash_table_t *htblp, uint64_t hkey)
9641 {
9642         daplka_hash_entry_t *curr_hep;
9643         uint32_t bucket;
9644         void *objp;
9645 
9646         bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9647 
9648         rw_enter(&htblp->ht_table_lock, RW_READER);
9649         curr_hep = htblp->ht_buckets[bucket].hb_entries;
9650         while (curr_hep != NULL) {
9651                 if (curr_hep->he_hkey == hkey) {
9652                         break;
9653                 }
9654                 curr_hep = curr_hep->he_next;
9655         }
9656         if (curr_hep == NULL) {
9657                 DERR("hash_lookup: cannot find hash entry: "
9658                     "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
9659                 rw_exit(&htblp->ht_table_lock);
9660                 return (NULL);
9661         }
9662         objp = curr_hep->he_objp;
9663         ASSERT(objp != NULL);
9664         if (htblp->ht_lookup_func != NULL) {
9665                 (*htblp->ht_lookup_func)(objp);
9666         }
9667         rw_exit(&htblp->ht_table_lock);
9668         return (objp);
9669 }
9670 
9671 /*
9672  * daplka_hash_destroy:
9673  *      destroys hash table. applies free_func on all inserted objects.
9674  *
9675  * input:
9676  *      htblp                   pointer to hash table
9677  *
9678  * output:
9679  *      none
9680  *
9681  * return value(s):
9682  *      none
9683  */
9684 static void
9685 daplka_hash_destroy(daplka_hash_table_t *htblp)
9686 {
9687         daplka_hash_entry_t *curr_hep, *free_hep;
9688         daplka_hash_entry_t *free_list = NULL;
9689         daplka_hash_bucket_t *hbp;
9690         uint32_t bucket, cnt, total = 0;
9691 
9692         if (!htblp->ht_initialized) {
9693                 DERR("hash_destroy: not initialized\n");
9694                 return;
9695         }
9696         /* free all elements from hash table */
9697         rw_enter(&htblp->ht_table_lock, RW_WRITER);
9698         for (bucket = 0; bucket < htblp->ht_nbuckets; bucket++) {
9699                 hbp = &htblp->ht_buckets[bucket];
9700 
9701                 /* build list of elements to be freed */
9702                 curr_hep = hbp->hb_entries;
9703                 cnt = 0;
9704                 while (curr_hep != NULL) {
9705                         cnt++;
9706                         free_hep = curr_hep;
9707                         curr_hep = curr_hep->he_next;
9708 
9709                         free_hep->he_next = free_list;
9710                         free_list = free_hep;
9711                 }
9712                 ASSERT(cnt == hbp->hb_count);
9713                 total += cnt;
9714                 hbp->hb_count = 0;
9715                 hbp->hb_entries = NULL;
9716         }
9717         ASSERT(total == htblp->ht_count);
9718         D3("hash_destroy: htblp 0x%p, nbuckets %d, freed %d hash entries\n",
9719             htblp, htblp->ht_nbuckets, total);
9720         rw_exit(&htblp->ht_table_lock);
9721 
9722         /* free all objects, now without holding the hash table lock */
9723         cnt = 0;
9724         while (free_list != NULL) {
9725                 cnt++;
9726                 free_hep = free_list;
9727                 free_list = free_list->he_next;
9728                 if (htblp->ht_free_func != NULL) {
9729                         (*htblp->ht_free_func)(free_hep->he_objp);
9730                 }
9731                 kmem_free(free_hep, sizeof (*free_hep));
9732         }
9733         ASSERT(total == cnt);
9734 
9735         /* free hash buckets and destroy locks */
9736         kmem_free(htblp->ht_buckets,
9737             sizeof (daplka_hash_bucket_t) * htblp->ht_nbuckets);
9738 
9739         rw_enter(&htblp->ht_table_lock, RW_WRITER);
9740         htblp->ht_buckets = NULL;
9741         htblp->ht_count = 0;
9742         htblp->ht_nbuckets = 0;
9743         htblp->ht_free_func = NULL;
9744         htblp->ht_lookup_func = NULL;
9745         htblp->ht_initialized = B_FALSE;
9746         rw_exit(&htblp->ht_table_lock);
9747 
9748         mutex_destroy(&htblp->ht_key_lock);
9749         rw_destroy(&htblp->ht_table_lock);
9750 }
9751 
9752 /*
9753  * daplka_hash_getsize:
9754  *      return the number of objects in hash table
9755  *
9756  * input:
9757  *      htblp                   pointer to hash table
9758  *
9759  * output:
9760  *      none
9761  *
9762  * return value(s):
9763  *      number of objects in hash table
9764  */
9765 static uint32_t
9766 daplka_hash_getsize(daplka_hash_table_t *htblp)
9767 {
9768         uint32_t sz;
9769 
9770         rw_enter(&htblp->ht_table_lock, RW_READER);
9771         sz = htblp->ht_count;
9772         rw_exit(&htblp->ht_table_lock);
9773 
9774         return (sz);
9775 }
9776 
9777 /*
9778  * this function is used as ht_lookup_func above when lookup is called.
9779  * other types of objs may use a more elaborate lookup_func.
9780  */
9781 static void
9782 daplka_hash_generic_lookup(void *obj)
9783 {
9784         daplka_resource_t       *rp = (daplka_resource_t *)obj;
9785 
9786         mutex_enter(&rp->rs_reflock);
9787         rp->rs_refcnt++;
9788         ASSERT(rp->rs_refcnt != 0);
9789         mutex_exit(&rp->rs_reflock);
9790 }
9791 
9792 /*
9793  * Generates a non-zero 32 bit hash key used for the timer hash table.
9794  */
9795 static uint32_t
9796 daplka_timer_hkey_gen()
9797 {
9798         uint32_t new_hkey;
9799 
9800         do {
9801                 new_hkey = atomic_inc_32_nv(&daplka_timer_hkey);
9802         } while (new_hkey == 0);
9803 
9804         return (new_hkey);
9805 }
9806 
9807 
9808 /*
9809  * The DAPL KA debug logging routines
9810  */
9811 
9812 /*
9813  * Add the string str to the end of the debug log, followed by a newline.
9814  */
9815 static void
9816 daplka_dbglog(char *str)
9817 {
9818         size_t  length;
9819         size_t  remlen;
9820 
9821         /*
9822          * If this is the first time we've written to the log, initialize it.
9823          */
9824         if (!daplka_dbginit) {
9825                 return;
9826         }
9827         mutex_enter(&daplka_dbglock);
9828         /*
9829          * Note the log is circular; if this string would run over the end,
9830          * we copy the first piece to the end and then the last piece to
9831          * the beginning of the log.
9832          */
9833         length = strlen(str);
9834 
9835         remlen = (size_t)sizeof (daplka_dbgbuf) - daplka_dbgnext - 1;
9836 
9837         if (length > remlen) {
9838                 if (remlen)
9839                         bcopy(str, daplka_dbgbuf + daplka_dbgnext, remlen);
9840                 daplka_dbgbuf[sizeof (daplka_dbgbuf) - 1] = (char)NULL;
9841                 str += remlen;
9842                 length -= remlen;
9843                 daplka_dbgnext = 0;
9844         }
9845         bcopy(str, daplka_dbgbuf + daplka_dbgnext, length);
9846         daplka_dbgnext += length;
9847 
9848         if (daplka_dbgnext >= sizeof (daplka_dbgbuf))
9849                 daplka_dbgnext = 0;
9850         mutex_exit(&daplka_dbglock);
9851 }
9852 
9853 
9854 /*
9855  * Add a printf-style message to whichever debug logs we're currently using.
9856  */
9857 static void
9858 daplka_debug(const char *fmt, ...)
9859 {
9860         char    buff[512];
9861         va_list ap;
9862         /*
9863          * The system prepends the thread id and high resolution time
9864          * (nanoseconds are dropped and so are the upper digits)
9865          * to the specified string.
9866          * The unit for timestamp is 10 microseconds.
9867          * It wraps around every 10000 seconds.
9868          * Ex: gethrtime() = X ns = X/1000 us = X/10000 10 micro sec.
9869          */
9870         int     micro_time = (int)((gethrtime() / 10000) % 1000000000);
9871         (void) sprintf(buff, "th %p tm %9d: ", (void *)curthread, micro_time);
9872 
9873         va_start(ap, fmt);
9874         (void) vsprintf(buff+strlen(buff), fmt, ap);
9875         va_end(ap);
9876 
9877         daplka_dbglog(buff);
9878 }
9879 
9880 static void
9881 daplka_console(const char *fmt, ...)
9882 {
9883         char buff[512];
9884         va_list ap;
9885 
9886         va_start(ap, fmt);
9887         (void) vsprintf(buff, fmt, ap);
9888         va_end(ap);
9889 
9890         cmn_err(CE_CONT, "%s", buff);
9891 }