1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  *
  26  * UDAPL kernel agent
  27  */
  28 
  29 #include <sys/types.h>
  30 #include <sys/errno.h>
  31 #include <sys/debug.h>
  32 #include <sys/stropts.h>
  33 #include <sys/stream.h>
  34 #include <sys/strlog.h>
  35 #include <sys/cmn_err.h>
  36 #include <sys/kmem.h>
  37 #include <sys/conf.h>
  38 #include <sys/stat.h>
  39 #include <sys/modctl.h>
  40 #include <sys/kstat.h>
  41 #include <sys/ddi.h>
  42 #include <sys/sunddi.h>
  43 #include <sys/strsun.h>
  44 #include <sys/taskq.h>
  45 #include <sys/open.h>
  46 #include <sys/uio.h>
  47 #include <sys/cpuvar.h>
  48 #include <sys/atomic.h>
  49 #include <sys/sysmacros.h>
  50 #include <sys/esunddi.h>
  51 #include <sys/avl.h>
  52 #include <sys/cred.h>
  53 #include <sys/note.h>
  54 #include <sys/ib/ibtl/ibti.h>
  55 #include <sys/socket.h>
  56 #include <netinet/in.h>
  57 #include <daplt_if.h>
  58 #include <daplt.h>
  59 
  60 /*
  61  * The following variables support the debug log buffer scheme.
  62  */
  63 #ifdef  DEBUG
  64 static char daplka_dbgbuf[0x80000];
  65 #else /* DEBUG */
  66 static char daplka_dbgbuf[0x4000];
  67 #endif /* DEBUG */
  68 static int daplka_dbgsize = sizeof (daplka_dbgbuf);
  69 static size_t daplka_dbgnext;
  70 static int daplka_dbginit = 0;
  71 static kmutex_t daplka_dbglock;
  72 _NOTE(MUTEX_PROTECTS_DATA(daplka_dbglock,
  73     daplka_dbgbuf
  74     daplka_dbgnext))
  75 
  76 static int daplka_dbg = 0x0103;
  77 static void daplka_console(const char *, ...);
  78 static void daplka_debug(const char *, ...);
  79 static int daplka_apm = 0x1;                    /* default enable */
  80 static int daplka_failback = 0x1;               /* default enable */
  81 static int daplka_query_aft_setaltpath = 10;
  82 
  83 #define DERR                            \
  84         if (daplka_dbg & 0x100)     \
  85             daplka_debug
  86 
  87 #ifdef DEBUG
  88 
  89 #define DINFO                           \
  90         daplka_console
  91 
  92 #define D1                              \
  93         if (daplka_dbg & 0x01)              \
  94             daplka_debug
  95 #define D2                              \
  96         if (daplka_dbg & 0x02)              \
  97             daplka_debug
  98 #define D3                              \
  99         if (daplka_dbg & 0x04)              \
 100             daplka_debug
 101 #define D4                              \
 102         if (daplka_dbg & 0x08)              \
 103             daplka_debug
 104 
 105 #else /* DEBUG */
 106 
 107 #define DINFO   if (0) printf
 108 #define D1      if (0) printf
 109 #define D2      if (0) printf
 110 #define D3      if (0) printf
 111 #define D4      if (0) printf
 112 
 113 #endif /* DEBUG */
 114 
 115 /*
 116  * driver entry points
 117  */
 118 static int daplka_open(dev_t *, int, int, struct cred *);
 119 static int daplka_close(dev_t, int, int, struct cred *);
 120 static int daplka_attach(dev_info_t *, ddi_attach_cmd_t);
 121 static int daplka_detach(dev_info_t *, ddi_detach_cmd_t);
 122 static int daplka_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 123 static int daplka_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 124 
 125 /*
 126  * types of ioctls
 127  */
 128 static int daplka_common_ioctl(int, minor_t, intptr_t, int, cred_t *, int *);
 129 static int daplka_misc_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 130     cred_t *, int *);
 131 static int daplka_ep_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 132     cred_t *, int *);
 133 static int daplka_evd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 134     cred_t *, int *);
 135 static int daplka_mr_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 136     cred_t *, int *);
 137 static int daplka_cno_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 138     cred_t *, int *);
 139 static int daplka_pd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 140     cred_t *, int *);
 141 static int daplka_sp_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 142     cred_t *, int *);
 143 static int daplka_srq_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
 144     cred_t *, int *);
 145 
 146 /*
 147  * common ioctls and supporting functions
 148  */
 149 static int daplka_ia_create(minor_t, intptr_t, int, cred_t *, int *);
 150 static int daplka_ia_destroy(daplka_resource_t *);
 151 
 152 /*
 153  * EP ioctls and supporting functions
 154  */
 155 static int daplka_ep_create(daplka_ia_resource_t *, intptr_t, int,
 156     cred_t *, int *);
 157 static int daplka_ep_modify(daplka_ia_resource_t *, intptr_t, int,
 158     cred_t *, int *);
 159 static int daplka_ep_free(daplka_ia_resource_t *, intptr_t, int,
 160     cred_t *, int *);
 161 static int daplka_ep_connect(daplka_ia_resource_t *, intptr_t, int,
 162     cred_t *, int *);
 163 static int daplka_ep_disconnect(daplka_ia_resource_t *, intptr_t, int,
 164     cred_t *, int *);
 165 static int daplka_ep_reinit(daplka_ia_resource_t *, intptr_t, int,
 166     cred_t *, int *);
 167 static int daplka_ep_destroy(daplka_resource_t *);
 168 static void daplka_hash_ep_free(void *);
 169 static int daplka_ep_failback(void *objp, void *arg);
 170 static int daplka_ep_altpath(daplka_ep_resource_t *, ib_gid_t *);
 171 
 172 static uint32_t daplka_ep_get_state(daplka_ep_resource_t *);
 173 static void daplka_ep_set_state(daplka_ep_resource_t *, uint32_t, uint32_t);
 174 static boolean_t daplka_ep_transition_is_valid(uint32_t, uint32_t);
 175 static daplka_timer_info_t *daplka_timer_info_alloc(daplka_ep_resource_t *);
 176 static void daplka_timer_info_free(daplka_timer_info_t *);
 177 static void daplka_timer_handler(void *);
 178 static void daplka_timer_dispatch(void *);
 179 static void daplka_timer_thread(void *);
 180 static int daplka_cancel_timer(daplka_ep_resource_t *);
 181 static void daplka_hash_timer_free(void *);
 182 
 183 /*
 184  * EVD ioctls and supporting functions
 185  */
 186 static int daplka_evd_create(daplka_ia_resource_t *, intptr_t, int,
 187     cred_t *, int *);
 188 static int daplka_cq_resize(daplka_ia_resource_t *, intptr_t, int,
 189     cred_t *, int *);
 190 static int daplka_evd_free(daplka_ia_resource_t *, intptr_t, int,
 191     cred_t *, int *);
 192 static int daplka_event_poll(daplka_ia_resource_t *, intptr_t, int,
 193     cred_t *, int *);
 194 static int daplka_evd_destroy(daplka_resource_t *);
 195 static void daplka_cq_handler(ibt_cq_hdl_t, void *);
 196 static void daplka_evd_wakeup(daplka_evd_resource_t *,
 197     daplka_evd_event_list_t *, daplka_evd_event_t *);
 198 static void daplka_evd_event_enqueue(daplka_evd_event_list_t *,
 199     daplka_evd_event_t *);
 200 static daplka_evd_event_t *daplka_evd_event_dequeue(daplka_evd_event_list_t *);
 201 static void daplka_hash_evd_free(void *);
 202 
 203 
 204 /*
 205  * SRQ ioctls and supporting functions
 206  */
 207 static int daplka_srq_create(daplka_ia_resource_t *, intptr_t, int,
 208     cred_t *, int *);
 209 static int daplka_srq_resize(daplka_ia_resource_t *, intptr_t, int,
 210     cred_t *, int *);
 211 static int daplka_srq_free(daplka_ia_resource_t *, intptr_t, int,
 212     cred_t *, int *);
 213 static int daplka_srq_destroy(daplka_resource_t *);
 214 static void daplka_hash_srq_free(void *);
 215 
 216 /*
 217  * Miscellaneous ioctls
 218  */
 219 static int daplka_cr_accept(daplka_ia_resource_t *, intptr_t, int,
 220     cred_t *, int *);
 221 static int daplka_cr_reject(daplka_ia_resource_t *, intptr_t, int,
 222     cred_t *, int *);
 223 static int daplka_cr_handoff(daplka_ia_resource_t *, intptr_t, int,
 224     cred_t *, int *);
 225 static int daplka_ia_query(daplka_ia_resource_t *, intptr_t, int,
 226     cred_t *, int *);
 227 
 228 /*
 229  * PD ioctls and supporting functions
 230  */
 231 static int daplka_pd_alloc(daplka_ia_resource_t *, intptr_t, int,
 232     cred_t *, int *);
 233 static int daplka_pd_free(daplka_ia_resource_t *, intptr_t, int,
 234     cred_t *, int *);
 235 static int daplka_pd_destroy(daplka_resource_t *);
 236 static void daplka_hash_pd_free(void *);
 237 
 238 /*
 239  * SP ioctls and supporting functions
 240  */
 241 static int daplka_service_register(daplka_ia_resource_t *, intptr_t, int,
 242     cred_t *, int *);
 243 static int daplka_service_deregister(daplka_ia_resource_t *, intptr_t, int,
 244     cred_t *, int *);
 245 static int daplka_sp_destroy(daplka_resource_t *);
 246 static void daplka_hash_sp_free(void *);
 247 static void daplka_hash_sp_unref(void *);
 248 
 249 /*
 250  * MR ioctls and supporting functions
 251  */
 252 static int daplka_mr_register(daplka_ia_resource_t *, intptr_t, int,
 253     cred_t *, int *);
 254 static int daplka_mr_register_lmr(daplka_ia_resource_t *, intptr_t, int,
 255     cred_t *, int *);
 256 static int daplka_mr_register_shared(daplka_ia_resource_t *, intptr_t, int,
 257     cred_t *, int *);
 258 static int daplka_mr_deregister(daplka_ia_resource_t *, intptr_t, int,
 259     cred_t *, int *);
 260 static int daplka_mr_sync(daplka_ia_resource_t *, intptr_t, int,
 261     cred_t *, int *);
 262 static int daplka_mr_destroy(daplka_resource_t *);
 263 static void daplka_hash_mr_free(void *);
 264 static void daplka_shared_mr_free(daplka_mr_resource_t *);
 265 
 266 /*
 267  * MW ioctls and supporting functions
 268  */
 269 static int daplka_mw_alloc(daplka_ia_resource_t *, intptr_t, int,
 270     cred_t *, int *);
 271 static int daplka_mw_free(daplka_ia_resource_t *, intptr_t, int,
 272     cred_t *, int *);
 273 static int daplka_mw_destroy(daplka_resource_t *);
 274 static void daplka_hash_mw_free(void *);
 275 
 276 /*
 277  * CNO ioctls and supporting functions
 278  */
 279 static int daplka_cno_alloc(daplka_ia_resource_t *, intptr_t, int,
 280     cred_t *, int *);
 281 static int daplka_cno_free(daplka_ia_resource_t *, intptr_t, int,
 282     cred_t *, int *);
 283 static int daplka_cno_wait(daplka_ia_resource_t *, intptr_t, int,
 284     cred_t *, int *);
 285 static int daplka_cno_destroy(daplka_resource_t *);
 286 static void daplka_hash_cno_free(void *);
 287 
 288 /*
 289  * CM handlers
 290  */
 291 static  ibt_cm_status_t daplka_cm_rc_handler(void *, ibt_cm_event_t *,
 292     ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
 293 
 294 static  ibt_cm_status_t daplka_cm_service_handler(void *, ibt_cm_event_t *,
 295     ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
 296 
 297 static ibt_cm_status_t daplka_cm_service_req(daplka_sp_resource_t *,
 298     ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
 299 
 300 /*
 301  * resource management routines
 302  */
 303 static int daplka_resource_reserve(minor_t *);
 304 static int daplka_resource_insert(minor_t, daplka_resource_t *);
 305 static daplka_resource_t *daplka_resource_remove(minor_t rnum);
 306 static daplka_resource_t *daplka_resource_lookup(minor_t);
 307 static void daplka_resource_init(void);
 308 static void daplka_resource_fini(void);
 309 static struct daplka_resource_table daplka_resource;
 310 
 311 /*
 312  * hash table routines
 313  */
 314 static int daplka_hash_insert(daplka_hash_table_t *, uint64_t *, void *);
 315 static int daplka_hash_remove(daplka_hash_table_t *, uint64_t, void **);
 316 static void daplka_hash_walk(daplka_hash_table_t *, int (*)(void *, void *),
 317     void *, krw_t);
 318 static void *daplka_hash_lookup(daplka_hash_table_t *, uint64_t);
 319 static int daplka_hash_create(daplka_hash_table_t *, uint_t,
 320     void (*)(void *), void (*)(void *));
 321 static void daplka_hash_destroy(daplka_hash_table_t *);
 322 static uint32_t daplka_hash_getsize(daplka_hash_table_t *);
 323 static void daplka_hash_generic_lookup(void *);
 324 
 325 static uint32_t daplka_timer_hkey_gen();
 326 
 327 /*
 328  * async event handlers
 329  */
 330 static void daplka_async_event_create(ibt_async_code_t, ibt_async_event_t *,
 331     uint64_t, daplka_ia_resource_t *);
 332 static void daplka_rc_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
 333     ibt_async_event_t *);
 334 static void daplka_cq_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
 335     ibt_async_event_t *);
 336 static void daplka_un_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
 337     ibt_async_event_t *);
 338 static void daplka_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
 339     ibt_async_event_t *);
 340 static void daplka_sm_notice_handler(void *, ib_gid_t, ibt_subnet_event_code_t,
 341     ibt_subnet_event_t *event);
 342 static void daplka_sm_gid_avail(ib_gid_t *, ib_gid_t *);
 343 
 344 /*
 345  * IBTF wrappers and default limits used for resource accounting
 346  */
 347 static boolean_t        daplka_accounting_enabled = B_TRUE;
 348 static uint32_t         daplka_max_qp_percent = 100;
 349 static uint32_t         daplka_max_cq_percent = 100;
 350 static uint32_t         daplka_max_pd_percent = 100;
 351 static uint32_t         daplka_max_mw_percent = 100;
 352 static uint32_t         daplka_max_mr_percent = 100;
 353 static uint32_t         daplka_max_srq_percent = 100;
 354 
 355 static ibt_status_t
 356 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *, ibt_hca_hdl_t,
 357     ibt_chan_alloc_flags_t, ibt_rc_chan_alloc_args_t *,
 358     ibt_channel_hdl_t *, ibt_chan_sizes_t *);
 359 
 360 static ibt_status_t
 361 daplka_ibt_free_channel(daplka_ep_resource_t *, ibt_channel_hdl_t);
 362 
 363 static ibt_status_t
 364 daplka_ibt_alloc_cq(daplka_evd_resource_t *, ibt_hca_hdl_t,
 365     ibt_cq_attr_t *, ibt_cq_hdl_t *, uint_t *);
 366 
 367 static ibt_status_t
 368 daplka_ibt_free_cq(daplka_evd_resource_t *, ibt_cq_hdl_t);
 369 
 370 static ibt_status_t
 371 daplka_ibt_alloc_pd(daplka_pd_resource_t *, ibt_hca_hdl_t,
 372     ibt_pd_flags_t, ibt_pd_hdl_t *);
 373 
 374 static ibt_status_t
 375 daplka_ibt_free_pd(daplka_pd_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t);
 376 
 377 static ibt_status_t
 378 daplka_ibt_alloc_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
 379     ibt_mw_flags_t, ibt_mw_hdl_t *, ibt_rkey_t *);
 380 
 381 static ibt_status_t
 382 daplka_ibt_free_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_mw_hdl_t);
 383 
 384 static ibt_status_t
 385 daplka_ibt_register_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
 386     ibt_mr_attr_t *, ibt_mr_hdl_t *, ibt_mr_desc_t *);
 387 
 388 static ibt_status_t
 389 daplka_ibt_register_shared_mr(daplka_mr_resource_t *, ibt_hca_hdl_t,
 390     ibt_mr_hdl_t, ibt_pd_hdl_t, ibt_smr_attr_t *, ibt_mr_hdl_t *,
 391     ibt_mr_desc_t *);
 392 
 393 static ibt_status_t
 394 daplka_ibt_deregister_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_mr_hdl_t);
 395 
 396 static ibt_status_t
 397 daplka_ibt_alloc_srq(daplka_srq_resource_t *, ibt_hca_hdl_t, ibt_srq_flags_t,
 398     ibt_pd_hdl_t, ibt_srq_sizes_t *, ibt_srq_hdl_t *, ibt_srq_sizes_t *);
 399 
 400 static ibt_status_t
 401 daplka_ibt_free_srq(daplka_srq_resource_t *, ibt_srq_hdl_t);
 402 
 403 /*
 404  * macros for manipulating resource objects.
 405  * these macros can be used on objects that begin with a
 406  * daplka_resource_t header.
 407  */
 408 #define DAPLKA_RS_REFCNT(rp) ((rp)->header.rs_refcnt)
 409 
 410 #define DAPLKA_RS_REF(rp) {                     \
 411         mutex_enter(&(rp)->header.rs_reflock);   \
 412         (rp)->header.rs_refcnt++;            \
 413         ASSERT((rp)->header.rs_refcnt != 0); \
 414         mutex_exit(&(rp)->header.rs_reflock);    \
 415 }
 416 
 417 #define DAPLKA_RS_UNREF(rp) {                                   \
 418         mutex_enter(&(rp)->header.rs_reflock);                   \
 419         ASSERT((rp)->header.rs_refcnt != 0);                 \
 420         if (--(rp)->header.rs_refcnt == 0) {                 \
 421                 ASSERT((rp)->header.rs_free != NULL);                \
 422                 mutex_exit(&(rp)->header.rs_reflock);            \
 423                 (rp)->header.rs_free((daplka_resource_t *)rp);       \
 424         } else {                                                \
 425                 mutex_exit(&(rp)->header.rs_reflock);            \
 426         }                                                       \
 427 }
 428 
 429 #define DAPLKA_RS_INIT(rp, type, rnum, free_func) {     \
 430         (rp)->header.rs_refcnt = 1;                  \
 431         (rp)->header.rs_type = (type);                       \
 432         (rp)->header.rs_rnum = (rnum);                       \
 433         (rp)->header.rs_charged = 0;                 \
 434         (rp)->header.rs_free = (free_func);          \
 435         mutex_init(&(rp)->header.rs_reflock, NULL,       \
 436             MUTEX_DRIVER, NULL);                        \
 437 }
 438 
 439 #define DAPLKA_RS_FINI(rp) {                            \
 440         mutex_destroy(&(rp)->header.rs_reflock); \
 441 }
 442 
 443 #define DAPLKA_RS_ACCT_INC(rp, cnt) {                           \
 444         atomic_add_32(&(rp)->header.rs_charged, (cnt));          \
 445 }
 446 #define DAPLKA_RS_ACCT_DEC(rp, cnt) {                           \
 447         atomic_add_32(&(rp)->header.rs_charged, -(cnt)); \
 448 }
 449 #define DAPLKA_RS_ACCT_CHARGED(rp) ((rp)->header.rs_charged)
 450 
 451 #define DAPLKA_RS_RNUM(rp) ((rp)->header.rs_rnum)
 452 #define DAPLKA_RS_TYPE(rp) ((rp)->header.rs_type)
 453 #define DAPLKA_RS_RESERVED(rp) ((intptr_t)(rp) == DAPLKA_RC_RESERVED)
 454 
 455 /*
 456  * depending on the timeout value does a cv_wait_sig or cv_timedwait_sig
 457  */
 458 #define DAPLKA_EVD_WAIT(cvp, mp, timeout)                       \
 459         ((timeout) == LONG_MAX) ? cv_wait_sig((cvp), (mp)) :    \
 460         cv_timedwait_sig((cvp), (mp), (timeout))
 461 
 462 #define DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca)       ((hca)->hca_ref_cnt++)
 463 #define DAPLKA_RELE_HCA_WITHOUT_LOCK(hca)       ((hca)->hca_ref_cnt--)
 464 
 465 #define DAPLKA_HOLD_HCA(dp, hca) {                      \
 466         mutex_enter(&(dp)->daplka_mutex);                \
 467         DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca);              \
 468         mutex_exit(&(dp)->daplka_mutex);         \
 469 }
 470 
 471 #define DAPLKA_RELE_HCA(dp, hca) {                      \
 472         mutex_enter(&(dp)->daplka_mutex);                \
 473         DAPLKA_RELE_HCA_WITHOUT_LOCK(hca);              \
 474         mutex_exit(&(dp)->daplka_mutex);         \
 475 }
 476 
 477 #define DAPLKA_HCA_BUSY(hca)                            \
 478         ((hca)->hca_ref_cnt != 0 ||                  \
 479         (hca)->hca_qp_count != 0 ||                  \
 480         (hca)->hca_cq_count != 0 ||                  \
 481         (hca)->hca_pd_count != 0 ||                  \
 482         (hca)->hca_mw_count != 0 ||                  \
 483         (hca)->hca_mr_count != 0)
 484 
 485 
 486 static struct cb_ops daplka_cb_ops = {
 487         daplka_open,            /* cb_open */
 488         daplka_close,           /* cb_close */
 489         nodev,                  /* cb_strategy */
 490         nodev,                  /* cb_print */
 491         nodev,                  /* cb_dump */
 492         nodev,                  /* cb_read */
 493         nodev,                  /* cb_write */
 494         daplka_ioctl,           /* cb_ioctl */
 495         nodev,                  /* cb_devmap */
 496         nodev,                  /* cb_mmap */
 497         nodev,                  /* cb_segmap */
 498         nochpoll,               /* cb_chpoll */
 499         ddi_prop_op,            /* cb_prop_op */
 500         NULL,                   /* cb_stream */
 501         D_NEW | D_MP,           /* cb_flag */
 502         CB_REV,                 /* rev */
 503         nodev,                  /* int (*cb_aread)() */
 504         nodev                   /* int (*cb_awrite)() */
 505 };
 506 
 507 static struct dev_ops daplka_ops = {
 508         DEVO_REV,               /* devo_rev */
 509         0,                      /* devo_refcnt */
 510         daplka_info,            /* devo_getinfo */
 511         nulldev,                /* devo_identify */
 512         nulldev,                /* devo_probe */
 513         daplka_attach,          /* devo_attach */
 514         daplka_detach,          /* devo_detach */
 515         nodev,                  /* devo_reset */
 516         &daplka_cb_ops,             /* devo_cb_ops */
 517         (struct bus_ops *)NULL, /* devo_bus_ops */
 518         nulldev,                /* power */
 519         ddi_quiesce_not_needed, /* devo_quiesce */
 520 };
 521 
 522 /*
 523  * Module linkage information for the kernel.
 524  */
 525 static struct modldrv modldrv = {
 526         &mod_driverops,
 527         "uDAPL Service Driver",
 528         &daplka_ops,
 529 };
 530 
 531 static struct modlinkage modlinkage = {
 532 #ifdef _LP64
 533         MODREV_1, { (void *) &modldrv, NULL, NULL, NULL, NULL, NULL, NULL }
 534 #else
 535         MODREV_1, { (void *) &modldrv, NULL, NULL, NULL }
 536 #endif
 537 };
 538 
 539 /*
 540  * daplka_dev holds global driver state and a list of HCAs
 541  */
 542 static daplka_t *daplka_dev = NULL;
 543 static void *daplka_state = NULL;
 544 
 545 /*
 546  * global SP hash table
 547  */
 548 static daplka_hash_table_t daplka_global_sp_htbl;
 549 
 550 /*
 551  * timer_info hash table
 552  */
 553 static daplka_hash_table_t daplka_timer_info_htbl;
 554 static uint32_t daplka_timer_hkey = 0;
 555 
 556 /*
 557  * shared MR avl tree
 558  */
 559 static avl_tree_t daplka_shared_mr_tree;
 560 static kmutex_t daplka_shared_mr_lock;
 561 static int daplka_shared_mr_cmp(const void *, const void *);
 562 _NOTE(MUTEX_PROTECTS_DATA(daplka_shared_mr_lock,
 563     daplka_shared_mr_tree))
 564 
 565 /*
 566  * default kmem flags used by this driver
 567  */
 568 static int daplka_km_flags = KM_SLEEP;
 569 
 570 /*
 571  * taskq used for handling background tasks
 572  */
 573 static taskq_t *daplka_taskq = NULL;
 574 
 575 /*
 576  * daplka_cm_delay is the length of time the active
 577  * side needs to wait before timing out on the REP message.
 578  */
 579 static clock_t daplka_cm_delay = 60000000;
 580 
 581 /*
 582  * modunload will fail if pending_close is non-zero
 583  */
 584 static uint32_t daplka_pending_close = 0;
 585 
 586 static struct ibt_clnt_modinfo_s daplka_clnt_modinfo = {
 587         IBTI_V_CURR,
 588         IBT_USER,
 589         daplka_async_handler,
 590         NULL,
 591         DAPLKA_DRV_NAME
 592 };
 593 
 594 /*
 595  * Module Installation
 596  */
 597 int
 598 _init(void)
 599 {
 600         int status;
 601 
 602         status = ddi_soft_state_init(&daplka_state, sizeof (daplka_t), 1);
 603         if (status != 0) {
 604                 return (status);
 605         }
 606 
 607         mutex_init(&daplka_dbglock, NULL, MUTEX_DRIVER, NULL);
 608         bzero(daplka_dbgbuf, sizeof (daplka_dbgbuf));
 609         daplka_dbgnext = 0;
 610         daplka_dbginit = 1;
 611 
 612         daplka_resource_init();
 613 
 614         status = mod_install(&modlinkage);
 615         if (status != DDI_SUCCESS) {
 616                 /* undo inits done before mod_install */
 617                 daplka_resource_fini();
 618                 mutex_destroy(&daplka_dbglock);
 619                 ddi_soft_state_fini(&daplka_state);
 620         }
 621         return (status);
 622 }
 623 
 624 /*
 625  * Module Removal
 626  */
 627 int
 628 _fini(void)
 629 {
 630         int     status;
 631 
 632         /*
 633          * mod_remove causes detach to be called
 634          */
 635         if ((status = mod_remove(&modlinkage)) != 0) {
 636                 DERR("fini: mod_remove failed: 0x%x\n", status);
 637                 return (status);
 638         }
 639 
 640         daplka_resource_fini();
 641         mutex_destroy(&daplka_dbglock);
 642         ddi_soft_state_fini(&daplka_state);
 643 
 644         return (status);
 645 }
 646 
 647 /*
 648  * Return Module Info.
 649  */
 650 int
 651 _info(struct modinfo *modinfop)
 652 {
 653         return (mod_info(&modlinkage, modinfop));
 654 }
 655 
 656 static void
 657 daplka_enqueue_hca(daplka_t *dp, daplka_hca_t *hca)
 658 {
 659         daplka_hca_t *h;
 660 
 661         ASSERT(mutex_owned(&dp->daplka_mutex));
 662 
 663         if (dp->daplka_hca_list_head == NULL) {
 664                 dp->daplka_hca_list_head = hca;
 665         } else {
 666                 h = dp->daplka_hca_list_head;
 667                 while (h->hca_next != NULL)
 668                         h = h->hca_next;
 669 
 670                 h->hca_next = hca;
 671         }
 672 }
 673 
 674 static void
 675 daplka_dequeue_hca(daplka_t *dp, daplka_hca_t *hca)
 676 {
 677         daplka_hca_t *h;
 678 
 679         ASSERT(mutex_owned(&dp->daplka_mutex));
 680 
 681         if (dp->daplka_hca_list_head == hca)
 682                 dp->daplka_hca_list_head = hca->hca_next;
 683         else {
 684                 h = dp->daplka_hca_list_head;
 685                 while (h->hca_next != hca)
 686                         h = h->hca_next;
 687                 h->hca_next = hca->hca_next;
 688         }
 689 }
 690 
 691 static int
 692 daplka_init_hca(daplka_t *dp, ib_guid_t hca_guid)
 693 {
 694         daplka_hca_t            *hca;
 695         ibt_hca_portinfo_t      *pinfop;
 696         uint_t                  size;
 697         int                     j;
 698         ibt_status_t            status;
 699 
 700         hca = kmem_zalloc(sizeof (daplka_hca_t), KM_SLEEP);
 701 
 702         hca->hca_guid = hca_guid;
 703 
 704         /*
 705          * open the HCA for use
 706          */
 707         status = ibt_open_hca(dp->daplka_clnt_hdl, hca_guid, &hca->hca_hdl);
 708         if (status != IBT_SUCCESS) {
 709                 if (status == IBT_HCA_IN_USE) {
 710                         DERR("ibt_open_hca() returned IBT_HCA_IN_USE\n");
 711                 } else {
 712                         DERR("ibt_open_hca() returned %d\n", status);
 713                 }
 714                 kmem_free(hca, sizeof (daplka_hca_t));
 715                 return (status);
 716         }
 717 
 718         /*
 719          * query HCA to get its info
 720          */
 721         status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr);
 722         if (status != IBT_SUCCESS) {
 723                 DERR("ibt_query_hca returned %d (hca_guid 0x%llx)\n",
 724                     status, (longlong_t)hca_guid);
 725                 goto out;
 726         }
 727 
 728         /*
 729          * query HCA to get info of all ports
 730          */
 731         status = ibt_query_hca_ports(hca->hca_hdl,
 732             0, &pinfop, &hca->hca_nports, &size);
 733         if (status != IBT_SUCCESS) {
 734                 DERR("ibt_query_all_ports returned %d "
 735                     "(hca_guid 0x%llx)\n", status,
 736                     (longlong_t)hca_guid);
 737                 goto out;
 738         }
 739         hca->hca_ports = pinfop;
 740         hca->hca_pinfosz = size;
 741 
 742         DERR("hca guid 0x%llx, nports %d\n",
 743             (longlong_t)hca_guid, hca->hca_nports);
 744         for (j = 0; j < hca->hca_nports; j++) {
 745                 DERR("port %d: state %d prefix 0x%016llx "
 746                     "guid %016llx\n",
 747                     pinfop[j].p_port_num, pinfop[j].p_linkstate,
 748                     (longlong_t)pinfop[j].p_sgid_tbl[0].gid_prefix,
 749                     (longlong_t)pinfop[j].p_sgid_tbl[0].gid_guid);
 750         }
 751 
 752         mutex_enter(&dp->daplka_mutex);
 753         daplka_enqueue_hca(dp, hca);
 754         mutex_exit(&dp->daplka_mutex);
 755 
 756         return (IBT_SUCCESS);
 757 
 758 out:
 759         (void) ibt_close_hca(hca->hca_hdl);
 760         kmem_free(hca, sizeof (daplka_hca_t));
 761         return (status);
 762 }
 763 
 764 /*
 765  * this function obtains the list of HCAs from IBTF.
 766  * the HCAs are then opened and the returned handles
 767  * and attributes are stored into the global daplka_dev
 768  * structure.
 769  */
 770 static int
 771 daplka_init_hcas(daplka_t *dp)
 772 {
 773         int             i;
 774         ib_guid_t       *hca_guids;
 775         uint32_t        hca_count;
 776 
 777         /*
 778          * get the num & list of HCAs present
 779          */
 780         hca_count = ibt_get_hca_list(&hca_guids);
 781         DERR("No. of HCAs present %d\n", hca_count);
 782 
 783         if (hca_count != 0) {
 784                 /*
 785                  * get the info for each available HCA
 786                  */
 787                 for (i = 0; i < hca_count; i++)
 788                         (void) daplka_init_hca(dp, hca_guids[i]);
 789 
 790                 ibt_free_hca_list(hca_guids, hca_count);
 791         }
 792 
 793         if (dp->daplka_hca_list_head != NULL)
 794                 return (IBT_SUCCESS);
 795         else
 796                 return (IBT_FAILURE);
 797 }
 798 
 799 static int
 800 daplka_fini_hca(daplka_t *dp, daplka_hca_t *hca)
 801 {
 802         ibt_status_t    status;
 803 
 804         if (hca->hca_hdl != NULL) {
 805                 status = ibt_close_hca(hca->hca_hdl);
 806                 if (status != IBT_SUCCESS) {
 807                         DERR("ibt_close_hca returned %d"
 808                             " (hca_guid 0x%llx)\n", status,
 809                             (longlong_t)hca->hca_guid);
 810 
 811                         mutex_enter(&dp->daplka_mutex);
 812                         daplka_enqueue_hca(dp, hca);
 813                         mutex_exit(&dp->daplka_mutex);
 814 
 815                         return (status);
 816                 }
 817         }
 818 
 819         if (hca->hca_ports != NULL)
 820                 ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
 821 
 822         kmem_free(hca, sizeof (daplka_hca_t));
 823         return (IBT_SUCCESS);
 824 }
 825 
 826 /*
 827  * closes all HCAs and frees up the HCA list
 828  */
 829 static int
 830 daplka_fini_hcas(daplka_t *dp)
 831 {
 832         ibt_status_t    status;
 833         daplka_hca_t    *hca;
 834 
 835         mutex_enter(&daplka_dev->daplka_mutex);
 836         while ((hca = dp->daplka_hca_list_head) != NULL) {
 837                 if (DAPLKA_HCA_BUSY(hca)) {
 838                         mutex_exit(&daplka_dev->daplka_mutex);
 839                         return (IBT_HCA_RESOURCES_NOT_FREED);
 840                 }
 841                 daplka_dequeue_hca(daplka_dev, hca);
 842                 mutex_exit(&daplka_dev->daplka_mutex);
 843 
 844                 if ((status = daplka_fini_hca(dp, hca)) != IBT_SUCCESS)
 845                         return (status);
 846 
 847                 mutex_enter(&daplka_dev->daplka_mutex);
 848         }
 849         mutex_exit(&daplka_dev->daplka_mutex);
 850 
 851         DERR("dapl kernel agent unloaded\n");
 852         return (IBT_SUCCESS);
 853 }
 854 
 855 
 856 /*
 857  * Attach the device, create and fill in daplka_dev
 858  */
 859 static int
 860 daplka_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 861 {
 862         daplka_t        *dp;
 863         int             instance, retval, err;
 864         boolean_t       sp_htbl_allocated = B_FALSE;
 865         boolean_t       timer_htbl_allocated = B_FALSE;
 866         boolean_t       shared_mr_tree_allocated = B_FALSE;
 867 
 868         switch (cmd) {
 869         case DDI_ATTACH:
 870                 break;
 871         case DDI_RESUME:
 872                 return (DDI_SUCCESS);
 873         default:
 874                 return (DDI_FAILURE);
 875         }
 876 
 877         /*
 878          * Allocate soft data structure
 879          */
 880         instance = ddi_get_instance(dip);
 881         if (ddi_soft_state_zalloc(daplka_state, instance) != DDI_SUCCESS) {
 882                 DERR("attach: bad state zalloc\n");
 883                 return (DDI_FAILURE);
 884         }
 885 
 886         dp = ddi_get_soft_state(daplka_state, instance);
 887         if (dp == NULL) {
 888                 ddi_soft_state_free(daplka_state, instance);
 889                 DERR("attach: cannot get soft state\n");
 890                 return (DDI_FAILURE);
 891         }
 892         /*
 893          * Stuff private info into dip.
 894          */
 895         dp->daplka_dip = dip;
 896         ddi_set_driver_private(dip, dp);
 897         daplka_dev = dp;
 898         mutex_init(&dp->daplka_mutex, NULL, MUTEX_DRIVER, NULL);
 899 
 900         /*
 901          * Register driver with IBTF
 902          */
 903         retval = ibt_attach(&daplka_clnt_modinfo, dip, dp,
 904             &dp->daplka_clnt_hdl);
 905         if (retval != IBT_SUCCESS) {
 906                 DERR("attach: ibt_attach failed: error = %d\n", retval);
 907                 retval = DDI_FAILURE;
 908                 goto error;
 909         }
 910         /* Register to receive SM events */
 911         ibt_register_subnet_notices(dp->daplka_clnt_hdl,
 912             daplka_sm_notice_handler, NULL);
 913 
 914         retval = daplka_init_hcas(dp);
 915         if (retval != IBT_SUCCESS) {
 916                 DERR("attach: hca_init failed: error = %d\n", retval);
 917                 retval = DDI_FAILURE;
 918                 goto error;
 919         }
 920         /*
 921          * this table is used by cr_handoff
 922          */
 923         retval = daplka_hash_create(&daplka_global_sp_htbl,
 924             DAPLKA_G_SP_HTBL_SZ, daplka_hash_sp_unref,
 925             daplka_hash_generic_lookup);
 926         if (retval != 0) {
 927                 DERR("attach: cannot create sp hash table\n");
 928                 retval = DDI_FAILURE;
 929                 goto error;
 930         }
 931         sp_htbl_allocated = B_TRUE;
 932 
 933         /*
 934          * this table stores per EP timer information.
 935          * timer_info_t objects are inserted into this table whenever
 936          * a EP timer is set. timers get removed when they expire
 937          * or when they get cancelled.
 938          */
 939         retval = daplka_hash_create(&daplka_timer_info_htbl,
 940             DAPLKA_TIMER_HTBL_SZ, daplka_hash_timer_free, NULL);
 941         if (retval != 0) {
 942                 DERR("attach: cannot create timer hash table\n");
 943                 retval = DDI_FAILURE;
 944                 goto error;
 945         }
 946         timer_htbl_allocated = B_TRUE;
 947 
 948         /*
 949          * this taskq is currently only used for processing timers.
 950          * other processing may also use this taskq in the future.
 951          */
 952         daplka_taskq = taskq_create(DAPLKA_DRV_NAME, DAPLKA_TQ_NTHREADS,
 953             maxclsyspri, 1, DAPLKA_TQ_NTHREADS, TASKQ_DYNAMIC);
 954         if (daplka_taskq == NULL) {
 955                 DERR("attach: cannot create daplka_taskq\n");
 956                 retval = DDI_FAILURE;
 957                 goto error;
 958         }
 959 
 960         /*
 961          * daplka_shared_mr_tree holds daplka_shared_mr_t objects that
 962          * gets retrieved or created when daplka_mr_register_shared is
 963          * called.
 964          */
 965         mutex_init(&daplka_shared_mr_lock, NULL, MUTEX_DRIVER, NULL);
 966 
 967         avl_create(&daplka_shared_mr_tree, daplka_shared_mr_cmp,
 968             sizeof (daplka_shared_mr_t),
 969             offsetof(daplka_shared_mr_t, smr_node));
 970         shared_mr_tree_allocated = B_TRUE;
 971 
 972         /*
 973          * Create the filesystem device node.
 974          */
 975         if (ddi_create_minor_node(dip, DAPLKA_MINOR_NAME, S_IFCHR,
 976             0, DDI_PSEUDO, NULL) != DDI_SUCCESS) {
 977                 DERR("attach: bad create_minor_node\n");
 978                 retval = DDI_FAILURE;
 979                 goto error;
 980         }
 981         dp->daplka_status = DAPLKA_STATE_ATTACHED;
 982         ddi_report_dev(dip);
 983         return (DDI_SUCCESS);
 984 
 985 error:
 986         if (shared_mr_tree_allocated) {
 987                 avl_destroy(&daplka_shared_mr_tree);
 988                 mutex_destroy(&daplka_shared_mr_lock);
 989         }
 990 
 991         if (daplka_taskq) {
 992                 taskq_destroy(daplka_taskq);
 993                 daplka_taskq = NULL;
 994         }
 995 
 996         if (timer_htbl_allocated) {
 997                 daplka_hash_destroy(&daplka_timer_info_htbl);
 998         }
 999 
1000         if (sp_htbl_allocated) {
1001                 daplka_hash_destroy(&daplka_global_sp_htbl);
1002         }
1003 
1004         err = daplka_fini_hcas(dp);
1005         if (err != IBT_SUCCESS) {
1006                 DERR("attach: hca_fini returned %d\n", err);
1007         }
1008 
1009         if (dp->daplka_clnt_hdl != NULL) {
1010                 /* unregister SM event notification */
1011                 ibt_register_subnet_notices(dp->daplka_clnt_hdl,
1012                     (ibt_sm_notice_handler_t)NULL, NULL);
1013                 err = ibt_detach(dp->daplka_clnt_hdl);
1014 
1015                 if (err != IBT_SUCCESS) {
1016                         DERR("attach: ibt_detach returned %d\n", err);
1017                 }
1018         }
1019         mutex_destroy(&dp->daplka_mutex);
1020 
1021         if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
1022                 ddi_remove_minor_node(dip, NULL);
1023         }
1024         ddi_soft_state_free(daplka_state, instance);
1025         return (retval);
1026 }
1027 
1028 /*
1029  * Detach - Free resources allocated in attach
1030  */
1031 /* ARGSUSED */
1032 static int
1033 daplka_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1034 {
1035         int             instance, err;
1036         void            *cookie = NULL;
1037         daplka_t        *dp;
1038 
1039         if (cmd != DDI_DETACH) {
1040                 return (DDI_FAILURE);
1041         }
1042         if (daplka_resource.daplka_rc_cnt > 0 ||
1043             daplka_pending_close > 0) {
1044                 DERR("detach: driver in use\n");
1045                 return (DDI_FAILURE);
1046         }
1047 
1048         instance = ddi_get_instance(dip);
1049         dp = ddi_get_soft_state(daplka_state, instance);
1050         if (dp == NULL) {
1051                 DERR("detach: cannot get soft state\n");
1052                 return (DDI_FAILURE);
1053         }
1054         err = daplka_fini_hcas(dp);
1055         if (err != IBT_SUCCESS) {
1056                 DERR("detach: hca_fini returned %d\n", err);
1057                 return (DDI_FAILURE);
1058         }
1059         if (dp->daplka_clnt_hdl != NULL) {
1060                 /* unregister SM event notification */
1061                 ibt_register_subnet_notices(dp->daplka_clnt_hdl,
1062                     (ibt_sm_notice_handler_t)NULL, NULL);
1063                 err = ibt_detach(dp->daplka_clnt_hdl);
1064                 if (err != IBT_SUCCESS) {
1065                         DERR("detach: ibt_detach returned %d\n", err);
1066                         return (DDI_FAILURE);
1067                 }
1068                 dp->daplka_clnt_hdl = NULL;
1069         }
1070         mutex_destroy(&dp->daplka_mutex);
1071         if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
1072                 ddi_remove_minor_node(dip, NULL);
1073         }
1074         dp->daplka_status = DAPLKA_STATE_DETACHED;
1075         ddi_soft_state_free(daplka_state, instance);
1076         daplka_dev = NULL;
1077 
1078         /*
1079          * by the time we get here, all clients of dapl should
1080          * have exited and completed their cleanup properly.
1081          * we can assert that all global data structures are now
1082          * empty.
1083          */
1084         ASSERT(avl_destroy_nodes(&daplka_shared_mr_tree, &cookie) == NULL);
1085         avl_destroy(&daplka_shared_mr_tree);
1086         mutex_destroy(&daplka_shared_mr_lock);
1087 
1088         ASSERT(daplka_hash_getsize(&daplka_timer_info_htbl) == 0);
1089         daplka_hash_destroy(&daplka_timer_info_htbl);
1090 
1091         ASSERT(daplka_hash_getsize(&daplka_global_sp_htbl) == 0);
1092         daplka_hash_destroy(&daplka_global_sp_htbl);
1093 
1094         taskq_destroy(daplka_taskq);
1095 
1096         return (DDI_SUCCESS);
1097 }
1098 
1099 /* ARGSUSED */
1100 static int
1101 daplka_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1102 {
1103         switch (infocmd) {
1104         case DDI_INFO_DEVT2DEVINFO:
1105                 if (daplka_dev !=  NULL) {
1106                         *result = daplka_dev->daplka_dip;
1107                         return (DDI_SUCCESS);
1108                 } else {
1109                         return (DDI_FAILURE);
1110                 }
1111 
1112         case DDI_INFO_DEVT2INSTANCE:
1113                 *result = 0;
1114                 return (DDI_SUCCESS);
1115 
1116         default:
1117                 return (DDI_FAILURE);
1118         }
1119 }
1120 
1121 /*
1122  * creates a EP resource.
1123  * A EP resource contains a RC channel. A EP resource holds a
1124  * reference to a send_evd (for the send CQ), recv_evd (for the
1125  * recv CQ), a connection evd and a PD. These references ensure
1126  * that the referenced resources are not freed until the EP itself
1127  * gets freed.
1128  */
1129 /* ARGSUSED */
1130 static int
1131 daplka_ep_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1132         cred_t *cred, int *rvalp)
1133 {
1134         daplka_ep_resource_t            *ep_rp;
1135         daplka_pd_resource_t            *pd_rp;
1136         dapl_ep_create_t                args;
1137         ibt_rc_chan_alloc_args_t        chan_args;
1138         ibt_chan_alloc_flags_t          achan_flags;
1139         ibt_chan_sizes_t                chan_real_sizes;
1140         ibt_hca_attr_t                  *hca_attrp;
1141         uint64_t                        ep_hkey = 0;
1142         boolean_t                       inserted = B_FALSE;
1143         uint32_t                        old_state, new_state;
1144         int                             retval;
1145         ibt_status_t                    status;
1146 
1147         D3("ep_create: enter\n");
1148         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_create_t),
1149             mode);
1150         if (retval != 0) {
1151                 DERR("ep_create: copyin error %d\n", retval);
1152                 return (EFAULT);
1153         }
1154         ep_rp = kmem_zalloc(sizeof (daplka_ep_resource_t), daplka_km_flags);
1155         if (ep_rp == NULL) {
1156                 DERR("ep_create: cannot allocate ep_rp\n");
1157                 return (ENOMEM);
1158         }
1159         DAPLKA_RS_INIT(ep_rp, DAPL_TYPE_EP,
1160             DAPLKA_RS_RNUM(ia_rp), daplka_ep_destroy);
1161 
1162         mutex_init(&ep_rp->ep_lock, NULL, MUTEX_DRIVER, NULL);
1163         cv_init(&ep_rp->ep_cv, NULL, CV_DRIVER, NULL);
1164         ep_rp->ep_hca = ia_rp->ia_hca;
1165         ep_rp->ep_cookie = args.ep_cookie;
1166         ep_rp->ep_timer_hkey = 0;
1167 
1168         /*
1169          * we don't have to use ep_get_state here because ep_rp is not in
1170          * ep_htbl yet. refer to the description of daplka_ep_set_state
1171          * for details about the EP state machine.
1172          */
1173         ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
1174         new_state = old_state = DAPLKA_EP_STATE_CLOSED;
1175 
1176         /* get reference to send evd and get cq handle */
1177         ep_rp->ep_snd_evd = (daplka_evd_resource_t *)
1178             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_snd_evd_hkey);
1179         if (ep_rp->ep_snd_evd == NULL) {
1180                 DERR("ep_create: ep_snd_evd %llx not found\n",
1181                     args.ep_snd_evd_hkey);
1182                 retval = EINVAL;
1183                 goto cleanup;
1184         }
1185         chan_args.rc_scq = ep_rp->ep_snd_evd->evd_cq_hdl;
1186         if (chan_args.rc_scq == NULL) {
1187                 DERR("ep_create: ep_snd_evd cq invalid\n");
1188                 retval = EINVAL;
1189                 goto cleanup;
1190         }
1191 
1192         /* get reference to recv evd and get cq handle */
1193         ep_rp->ep_rcv_evd = (daplka_evd_resource_t *)
1194             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_rcv_evd_hkey);
1195         if (ep_rp->ep_rcv_evd == NULL) {
1196                 DERR("ep_create: ep_rcv_evd %llx not found\n",
1197                     args.ep_rcv_evd_hkey);
1198                 retval = EINVAL;
1199                 goto cleanup;
1200         }
1201         chan_args.rc_rcq = ep_rp->ep_rcv_evd->evd_cq_hdl;
1202         if (chan_args.rc_rcq == NULL) {
1203                 DERR("ep_create: ep_rcv_evd cq invalid\n");
1204                 retval = EINVAL;
1205                 goto cleanup;
1206         }
1207 
1208         /* get reference to conn evd */
1209         ep_rp->ep_conn_evd = (daplka_evd_resource_t *)
1210             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_conn_evd_hkey);
1211         if (ep_rp->ep_conn_evd == NULL) {
1212                 DERR("ep_create: ep_conn_evd %llx not found\n",
1213                     args.ep_conn_evd_hkey);
1214                 retval = EINVAL;
1215                 goto cleanup;
1216         }
1217 
1218         /* get reference to SRQ if needed */
1219         if (args.ep_srq_attached) {
1220                 ep_rp->ep_srq_res = (daplka_srq_resource_t *)daplka_hash_lookup(
1221                     &ia_rp->ia_srq_htbl, args.ep_srq_hkey);
1222                 if (ep_rp->ep_srq_res == NULL) {
1223                         DERR("ep_create: ep_srq %llx not found\n",
1224                             (longlong_t)args.ep_srq_hkey);
1225                         retval = EINVAL;
1226                         goto cleanup;
1227                 }
1228                 ASSERT(DAPLKA_RS_TYPE(ep_rp->ep_srq_res) == DAPL_TYPE_SRQ);
1229                 D3("ep_create: ep_srq %p %llx\n", ep_rp->ep_srq_res,
1230                     (longlong_t)args.ep_srq_hkey);
1231         } else {
1232                 ep_rp->ep_srq_res = NULL;
1233         }
1234 
1235         /* get pd handle */
1236         pd_rp = (daplka_pd_resource_t *)
1237             daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.ep_pd_hkey);
1238         if (pd_rp == NULL) {
1239                 DERR("ep_create: cannot find pd resource\n");
1240                 retval = EINVAL;
1241                 goto cleanup;
1242         }
1243         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
1244         ep_rp->ep_pd_res = pd_rp;
1245         chan_args.rc_pd = pd_rp->pd_hdl;
1246 
1247 
1248         /*
1249          * these checks ensure that the requested channel sizes
1250          * are within the limits supported by the chosen HCA.
1251          */
1252         hca_attrp = &ia_rp->ia_hca->hca_attr;
1253         if (args.ep_ch_sizes.dcs_sq_sgl > hca_attrp->hca_max_sgl) {
1254                 DERR("ep_create: invalid cs_sq_sgl %d\n",
1255                     args.ep_ch_sizes.dcs_sq_sgl);
1256                 retval = EINVAL;
1257                 goto cleanup;
1258         }
1259         if (args.ep_ch_sizes.dcs_rq_sgl > hca_attrp->hca_max_sgl) {
1260                 DERR("ep_create: invalid cs_rq_sgl %d\n",
1261                     args.ep_ch_sizes.dcs_rq_sgl);
1262                 retval = EINVAL;
1263                 goto cleanup;
1264         }
1265         if (args.ep_ch_sizes.dcs_sq > hca_attrp->hca_max_chan_sz) {
1266                 DERR("ep_create: invalid cs_sq %d\n",
1267                     args.ep_ch_sizes.dcs_sq);
1268                 retval = EINVAL;
1269                 goto cleanup;
1270         }
1271         if (args.ep_ch_sizes.dcs_rq > hca_attrp->hca_max_chan_sz) {
1272                 DERR("ep_create: invalid cs_rq %d\n",
1273                     args.ep_ch_sizes.dcs_rq);
1274                 retval = EINVAL;
1275                 goto cleanup;
1276         }
1277 
1278         chan_args.rc_sizes.cs_sq_sgl = args.ep_ch_sizes.dcs_sq_sgl;
1279         chan_args.rc_sizes.cs_rq_sgl = args.ep_ch_sizes.dcs_rq_sgl;
1280         chan_args.rc_sizes.cs_sq = args.ep_ch_sizes.dcs_sq;
1281         chan_args.rc_sizes.cs_rq = args.ep_ch_sizes.dcs_rq;
1282         chan_args.rc_flags = IBT_WR_SIGNALED;
1283         chan_args.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
1284         chan_args.rc_hca_port_num = ia_rp->ia_port_num;
1285         chan_args.rc_clone_chan = NULL;
1286         if (args.ep_srq_attached) {
1287                 chan_args.rc_srq = ep_rp->ep_srq_res->srq_hdl;
1288         } else {
1289                 chan_args.rc_srq = NULL;
1290         }
1291 
1292         D3("ep_create: sq_sgl %d, rq_sgl %d, sq %d, rq %d, "
1293             "sig_type 0x%x, control 0x%x, portnum %d, clone_chan 0x%p\n",
1294             args.ep_ch_sizes.dcs_sq_sgl, args.ep_ch_sizes.dcs_rq_sgl,
1295             args.ep_ch_sizes.dcs_sq, args.ep_ch_sizes.dcs_rq,
1296             chan_args.rc_flags, chan_args.rc_control,
1297             chan_args.rc_hca_port_num, chan_args.rc_clone_chan);
1298 
1299         if (args.ep_srq_attached) {
1300                 achan_flags = IBT_ACHAN_USER_MAP | IBT_ACHAN_USES_SRQ;
1301         } else {
1302                 achan_flags = IBT_ACHAN_USER_MAP;
1303         }
1304         /* create rc channel */
1305         status = daplka_ibt_alloc_rc_channel(ep_rp, ia_rp->ia_hca_hdl,
1306             achan_flags, &chan_args, &ep_rp->ep_chan_hdl,
1307             &chan_real_sizes);
1308         if (status != IBT_SUCCESS) {
1309                 DERR("ep_create: alloc_rc_channel returned %d\n", status);
1310                 *rvalp = (int)status;
1311                 retval = 0;
1312                 goto cleanup;
1313         }
1314 
1315         args.ep_ch_real_sizes.dcs_sq = chan_real_sizes.cs_sq;
1316         args.ep_ch_real_sizes.dcs_rq = chan_real_sizes.cs_rq;
1317         args.ep_ch_real_sizes.dcs_sq_sgl = chan_real_sizes.cs_sq_sgl;
1318         args.ep_ch_real_sizes.dcs_rq_sgl = chan_real_sizes.cs_rq_sgl;
1319 
1320         /*
1321          * store ep ptr with chan_hdl.
1322          * this ep_ptr is used by the CM handlers (both active and
1323          * passive)
1324          * mutex is only needed for race of "destroy" and "async"
1325          */
1326         mutex_enter(&daplka_dev->daplka_mutex);
1327         ibt_set_chan_private(ep_rp->ep_chan_hdl, (void *)ep_rp);
1328         mutex_exit(&daplka_dev->daplka_mutex);
1329 
1330         /* Get HCA-specific data_out info */
1331         status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
1332             IBT_CI_NO_FLAGS, IBT_HDL_CHANNEL, (void *)ep_rp->ep_chan_hdl,
1333             &args.ep_qp_data_out, sizeof (args.ep_qp_data_out));
1334 
1335         if (status != IBT_SUCCESS) {
1336                 DERR("ep_create: ibt_ci_data_out error(%d)\n",
1337                     status);
1338                 *rvalp = (int)status;
1339                 retval = 0;
1340                 goto cleanup;
1341         }
1342 
1343         /* insert into ep hash table */
1344         retval = daplka_hash_insert(&ia_rp->ia_ep_htbl,
1345             &ep_hkey, (void *)ep_rp);
1346         if (retval != 0) {
1347                 DERR("ep_create: cannot insert ep resource into ep_htbl\n");
1348                 goto cleanup;
1349         }
1350         inserted = B_TRUE;
1351 
1352         /*
1353          * at this point, the ep_rp can be looked up by other threads
1354          * if they manage to guess the correct hkey. but they are not
1355          * permitted to operate on ep_rp until we transition to the
1356          * CLOSED state.
1357          */
1358 
1359         /* return hkey to library */
1360         args.ep_hkey = ep_hkey;
1361 
1362         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ep_create_t),
1363             mode);
1364         if (retval != 0) {
1365                 DERR("ep_create: copyout error %d\n", retval);
1366                 retval = EFAULT;
1367                 goto cleanup;
1368         }
1369 
1370         daplka_ep_set_state(ep_rp, old_state, new_state);
1371         D3("ep_create: exit\n");
1372         return (0);
1373 
1374 cleanup:
1375         if (inserted) {
1376                 daplka_ep_resource_t *free_rp = NULL;
1377 
1378                 (void) daplka_hash_remove(&ia_rp->ia_ep_htbl, ep_hkey,
1379                     (void **)&free_rp);
1380                 if (free_rp != ep_rp) {
1381                         /*
1382                          * this case is impossible because ep_free will
1383                          * wait until our state transition is complete.
1384                          */
1385                         DERR("ep_create: cannot remove ep from hash table\n");
1386                         ASSERT(B_FALSE);
1387                         return (retval);
1388                 }
1389         }
1390         new_state = DAPLKA_EP_STATE_FREED;
1391         daplka_ep_set_state(ep_rp, old_state, new_state);
1392         DAPLKA_RS_UNREF(ep_rp);
1393         return (retval);
1394 }
1395 
1396 /*
1397  * daplka_ep_get_state retrieves the current state of the EP and
1398  * sets the state to TRANSITIONING. if the current state is already
1399  * TRANSITIONING, this function will wait until the state becomes one
1400  * of the other EP states. Most of the EP related ioctls follow the
1401  * call sequence:
1402  *
1403  *      new_state = old_state = daplka_ep_get_state(ep_rp);
1404  *      ...
1405  *      ...some code that affects the EP
1406  *      ...
1407  *      new_state = <NEW_STATE>;
1408  *      daplka_ep_set_state(ep_rp, old_state, new_state);
1409  *
1410  * this call sequence ensures that only one thread may access the EP
1411  * during the time ep_state is in TRANSITIONING. daplka_ep_set_state
1412  * transitions ep_state to new_state and wakes up any waiters blocking
1413  * on ep_cv.
1414  *
1415  */
1416 static uint32_t
1417 daplka_ep_get_state(daplka_ep_resource_t *ep_rp)
1418 {
1419         uint32_t        old_state = 0;
1420 
1421         mutex_enter(&ep_rp->ep_lock);
1422         while (ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING) {
1423                 D2("get_state: wait for state transition to complete\n");
1424                 cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
1425                 D2("get_state: done, curr state = %d\n", ep_rp->ep_state);
1426         }
1427         ASSERT(ep_rp->ep_state != DAPLKA_EP_STATE_TRANSITIONING);
1428         old_state = ep_rp->ep_state;
1429 
1430         /*
1431          * an ep that is in the FREED state cannot transition
1432          * back to any of the regular states
1433          */
1434         if (old_state != DAPLKA_EP_STATE_FREED) {
1435                 ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
1436         }
1437         mutex_exit(&ep_rp->ep_lock);
1438         return (old_state);
1439 }
1440 
1441 /*
1442  * EP state transition diagram
1443  *
1444  *              CLOSED<-------------------
1445  *                |                      |
1446  *                |                      |
1447  *     ------------------------          |
1448  *     |                      |          |
1449  *     |                      |          |
1450  *     v                      v          |
1451  *   CONNECTING       ACCEPTING          |
1452  *     |  |   |       |       |          |
1453  *     |  |   |       |       |          |
1454  *     |  |   |       |       |          |
1455  *     |  |   |_______|_______|          |
1456  *     |  |           |   |   |          |
1457  *     |  |___________|   |   |          |
1458  *     |        |         |   |          |
1459  *     |        v         |   |---->DISCONNECTED
1460  *     |     CONNECTED    |              ^
1461  *     v        |         |              |
1462  *    ABORTING  |---------|--------------|
1463  *     |        |         |              |
1464  *     |        |         v              |
1465  *     |        |-------->DISCONNECTING--|
1466  *     |                                 |
1467  *     |---------------------------------|
1468  *
1469  *      *not shown in this diagram:
1470  *          -loopback transitions
1471  *          -transitions to the FREED state
1472  */
1473 static boolean_t
1474 daplka_ep_transition_is_valid(uint32_t old_state, uint32_t new_state)
1475 {
1476         boolean_t valid = B_FALSE;
1477 
1478         /*
1479          * reseting to the same state is a no-op and is always
1480          * permitted. transitioning to the FREED state indicates
1481          * that the ep is about to be freed and no further operation
1482          * is allowed on it. to support abrupt close, the ep is
1483          * permitted to transition to the FREED state from any state.
1484          */
1485         if (old_state == new_state ||
1486             new_state == DAPLKA_EP_STATE_FREED) {
1487                 return (B_TRUE);
1488         }
1489 
1490         switch (old_state) {
1491         case DAPLKA_EP_STATE_CLOSED:
1492                 /*
1493                  * this is the initial ep_state.
1494                  * a transition to CONNECTING or ACCEPTING may occur
1495                  * upon calling daplka_ep_connect or daplka_cr_accept,
1496                  * respectively.
1497                  */
1498                 if (new_state == DAPLKA_EP_STATE_CONNECTING ||
1499                     new_state == DAPLKA_EP_STATE_ACCEPTING) {
1500                         valid = B_TRUE;
1501                 }
1502                 break;
1503         case DAPLKA_EP_STATE_CONNECTING:
1504                 /*
1505                  * we transition to this state if daplka_ep_connect
1506                  * is successful. from this state, we can transition
1507                  * to CONNECTED if daplka_cm_rc_conn_est gets called;
1508                  * or to DISCONNECTED if daplka_cm_rc_conn_closed or
1509                  * daplka_cm_rc_event_failure gets called. If the
1510                  * client calls daplka_ep_disconnect, we transition
1511                  * to DISCONNECTING. If a timer was set at ep_connect
1512                  * time and if the timer expires prior to any of the
1513                  * CM callbacks, we transition to ABORTING and then
1514                  * to DISCONNECTED.
1515                  */
1516                 if (new_state == DAPLKA_EP_STATE_CONNECTED ||
1517                     new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1518                     new_state == DAPLKA_EP_STATE_DISCONNECTED ||
1519                     new_state == DAPLKA_EP_STATE_ABORTING) {
1520                         valid = B_TRUE;
1521                 }
1522                 break;
1523         case DAPLKA_EP_STATE_ACCEPTING:
1524                 /*
1525                  * we transition to this state if daplka_cr_accept
1526                  * is successful. from this state, we can transition
1527                  * to CONNECTED if daplka_cm_service_conn_est gets called;
1528                  * or to DISCONNECTED if daplka_cm_service_conn_closed or
1529                  * daplka_cm_service_event_failure gets called. If the
1530                  * client calls daplka_ep_disconnect, we transition to
1531                  * DISCONNECTING.
1532                  */
1533                 if (new_state == DAPLKA_EP_STATE_CONNECTED ||
1534                     new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1535                     new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1536                         valid = B_TRUE;
1537                 }
1538                 break;
1539         case DAPLKA_EP_STATE_CONNECTED:
1540                 /*
1541                  * we transition to this state if a active or passive
1542                  * connection gets established. if the client calls
1543                  * daplka_ep_disconnect, we transition to the
1544                  * DISCONNECTING state. subsequent CM callbacks will
1545                  * cause ep_state to be set to DISCONNECTED. If the
1546                  * remote peer terminates the connection before we do,
1547                  * it is possible for us to transition directly from
1548                  * CONNECTED to DISCONNECTED.
1549                  */
1550                 if (new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1551                     new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1552                         valid = B_TRUE;
1553                 }
1554                 break;
1555         case DAPLKA_EP_STATE_DISCONNECTING:
1556                 /*
1557                  * we transition to this state if the client calls
1558                  * daplka_ep_disconnect.
1559                  */
1560                 if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1561                         valid = B_TRUE;
1562                 }
1563                 break;
1564         case DAPLKA_EP_STATE_ABORTING:
1565                 /*
1566                  * we transition to this state if the active side
1567                  * EP timer has expired. this is only a transient
1568                  * state that is set during timer processing. when
1569                  * timer processing completes, ep_state will become
1570                  * DISCONNECTED.
1571                  */
1572                 if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1573                         valid = B_TRUE;
1574                 }
1575                 break;
1576         case DAPLKA_EP_STATE_DISCONNECTED:
1577                 /*
1578                  * we transition to this state if we get a closed
1579                  * or event_failure CM callback. an expired timer
1580                  * can also cause us to be in this state. this
1581                  * is the only state in which we permit the
1582                  * ep_reinit operation.
1583                  */
1584                 if (new_state == DAPLKA_EP_STATE_CLOSED) {
1585                         valid = B_TRUE;
1586                 }
1587                 break;
1588         default:
1589                 break;
1590         }
1591 
1592         if (!valid) {
1593                 DERR("ep_transition: invalid state change %d -> %d\n",
1594                     old_state, new_state);
1595         }
1596         return (valid);
1597 }
1598 
1599 /*
1600  * first check if the transition is valid. then set ep_state
1601  * to new_state and wake up all waiters.
1602  */
1603 static void
1604 daplka_ep_set_state(daplka_ep_resource_t *ep_rp, uint32_t old_state,
1605         uint32_t new_state)
1606 {
1607         boolean_t       valid;
1608 
1609         ASSERT(new_state != DAPLKA_EP_STATE_TRANSITIONING);
1610 
1611         valid = daplka_ep_transition_is_valid(old_state, new_state);
1612         mutex_enter(&ep_rp->ep_lock);
1613         if (ep_rp->ep_state != DAPLKA_EP_STATE_FREED) {
1614                 if (valid) {
1615                         ep_rp->ep_state = new_state;
1616                 } else {
1617                         /*
1618                          * this case is impossible.
1619                          * we have a serious problem if we get here.
1620                          * instead of panicing, we reset the state to
1621                          * old_state. doing this would at least prevent
1622                          * threads from hanging due to ep_state being
1623                          * stuck in TRANSITIONING.
1624                          */
1625                         ep_rp->ep_state = old_state;
1626                         ASSERT(B_FALSE);
1627                 }
1628         }
1629         cv_broadcast(&ep_rp->ep_cv);
1630         mutex_exit(&ep_rp->ep_lock);
1631 }
1632 
1633 /*
1634  * modifies RC channel attributes.
1635  * currently, only the rdma_in and rdma_out attributes may
1636  * be modified. the channel must be in quiescent state when
1637  * this function is called.
1638  */
1639 /* ARGSUSED */
1640 static int
1641 daplka_ep_modify(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1642         cred_t *cred, int *rvalp)
1643 {
1644         daplka_ep_resource_t            *ep_rp = NULL;
1645         ibt_cep_modify_flags_t          good_flags;
1646         ibt_rc_chan_modify_attr_t       rcm_attr;
1647         ibt_hca_attr_t                  *hca_attrp;
1648         dapl_ep_modify_t                args;
1649         ibt_status_t                    status;
1650         uint32_t                        old_state, new_state;
1651         int                             retval = 0;
1652 
1653         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_modify_t),
1654             mode);
1655         if (retval != 0) {
1656                 DERR("ep_modify: copyin error %d\n", retval);
1657                 return (EFAULT);
1658         }
1659         ep_rp = (daplka_ep_resource_t *)
1660             daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epm_hkey);
1661         if (ep_rp == NULL) {
1662                 DERR("ep_modify: cannot find ep resource\n");
1663                 return (EINVAL);
1664         }
1665         ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
1666         new_state = old_state = daplka_ep_get_state(ep_rp);
1667 
1668         if (old_state != DAPLKA_EP_STATE_CLOSED &&
1669             old_state != DAPLKA_EP_STATE_DISCONNECTED) {
1670                 DERR("ep_modify: invalid state %d\n", old_state);
1671                 retval = EINVAL;
1672                 goto cleanup;
1673         }
1674 
1675         good_flags = IBT_CEP_SET_RDMARA_OUT | IBT_CEP_SET_RDMARA_IN;
1676         if ((args.epm_flags & ~good_flags) != 0) {
1677                 DERR("ep_modify: invalid flags 0x%x\n", args.epm_flags);
1678                 retval = EINVAL;
1679                 goto cleanup;
1680         }
1681 
1682         hca_attrp = &ia_rp->ia_hca->hca_attr;
1683 
1684         bzero(&rcm_attr, sizeof (ibt_rc_chan_modify_attr_t));
1685         if ((args.epm_flags & IBT_CEP_SET_RDMARA_OUT) != 0) {
1686                 if (args.epm_rdma_ra_out > hca_attrp->hca_max_rdma_out_chan) {
1687                         DERR("ep_modify: invalid epm_rdma_ra_out %d\n",
1688                             args.epm_rdma_ra_out);
1689                         retval = EINVAL;
1690                         goto cleanup;
1691                 }
1692                 rcm_attr.rc_rdma_ra_out = args.epm_rdma_ra_out;
1693         }
1694         if ((args.epm_flags & IBT_CEP_SET_RDMARA_IN) != 0) {
1695                 if (args.epm_rdma_ra_in > hca_attrp->hca_max_rdma_in_chan) {
1696                         DERR("ep_modify: epm_rdma_ra_in %d\n",
1697                             args.epm_rdma_ra_in);
1698                         retval = EINVAL;
1699                         goto cleanup;
1700                 }
1701                 rcm_attr.rc_rdma_ra_in = args.epm_rdma_ra_in;
1702         }
1703         status = ibt_modify_rc_channel(ep_rp->ep_chan_hdl, args.epm_flags,
1704             &rcm_attr, NULL);
1705         if (status != IBT_SUCCESS) {
1706                 DERR("ep_modify: modify_rc_channel returned %d\n", status);
1707                 *rvalp = (int)status;
1708                 retval = 0;
1709                 goto cleanup;
1710         }
1711 
1712         /*
1713          * ep_modify does not change ep_state
1714          */
1715 cleanup:;
1716         daplka_ep_set_state(ep_rp, old_state, new_state);
1717         DAPLKA_RS_UNREF(ep_rp);
1718         return (retval);
1719 }
1720 
1721 /*
1722  * Frees a EP resource.
1723  * a EP may only be freed when it is in the CLOSED or
1724  * DISCONNECTED state.
1725  */
1726 /* ARGSUSED */
1727 static int
1728 daplka_ep_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1729         cred_t *cred, int *rvalp)
1730 {
1731         daplka_ep_resource_t    *ep_rp = NULL;
1732         dapl_ep_free_t          args;
1733         uint32_t                old_state, new_state;
1734         int                     retval;
1735 
1736         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_free_t), mode);
1737         if (retval != 0) {
1738                 DERR("ep_free: copyin error %d\n", retval);
1739                 return (EFAULT);
1740         }
1741         ep_rp = (daplka_ep_resource_t *)
1742             daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epf_hkey);
1743         if (ep_rp == NULL) {
1744                 DERR("ep_free: cannot find ep resource\n");
1745                 return (EINVAL);
1746         }
1747         ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
1748         new_state = old_state = daplka_ep_get_state(ep_rp);
1749 
1750         /*
1751          * ep cannot be freed if it is in an invalid state.
1752          */
1753         if (old_state != DAPLKA_EP_STATE_CLOSED &&
1754             old_state != DAPLKA_EP_STATE_DISCONNECTED) {
1755                 DERR("ep_free: invalid state %d\n", old_state);
1756                 retval = EINVAL;
1757                 goto cleanup;
1758         }
1759         ep_rp = NULL;
1760         retval = daplka_hash_remove(&ia_rp->ia_ep_htbl,
1761             args.epf_hkey, (void **)&ep_rp);
1762         if (retval != 0 || ep_rp == NULL) {
1763                 /*
1764                  * this is only possible if we have two threads
1765                  * calling ep_free in parallel.
1766                  */
1767                 DERR("ep_free: cannot find ep resource\n");
1768                 goto cleanup;
1769         }
1770         /* there should not be any outstanding timers */
1771         ASSERT(ep_rp->ep_timer_hkey == 0);
1772 
1773         new_state = DAPLKA_EP_STATE_FREED;
1774         daplka_ep_set_state(ep_rp, old_state, new_state);
1775 
1776         /* remove reference obtained by lookup */
1777         DAPLKA_RS_UNREF(ep_rp);
1778 
1779         /* UNREF calls the actual free function when refcnt is zero */
1780         DAPLKA_RS_UNREF(ep_rp);
1781         return (0);
1782 
1783 cleanup:;
1784         daplka_ep_set_state(ep_rp, old_state, new_state);
1785 
1786         /* remove reference obtained by lookup */
1787         DAPLKA_RS_UNREF(ep_rp);
1788         return (retval);
1789 }
1790 
1791 /*
1792  * The following routines supports the timeout feature of ep_connect.
1793  * Refer to the description of ep_connect for details.
1794  */
1795 
1796 /*
1797  * this is the timer processing thread.
1798  */
1799 static void
1800 daplka_timer_thread(void *arg)
1801 {
1802         daplka_timer_info_t     *timerp = (daplka_timer_info_t *)arg;
1803         daplka_ep_resource_t    *ep_rp;
1804         daplka_evd_event_t      *disc_ev = NULL;
1805         ibt_status_t            status;
1806         int                     old_state, new_state;
1807 
1808         ep_rp = timerp->ti_ep_res;
1809         ASSERT(ep_rp != NULL);
1810         ASSERT(timerp->ti_tmo_id != 0);
1811         timerp->ti_tmo_id = 0;
1812 
1813         new_state = old_state = daplka_ep_get_state(ep_rp);
1814         if (old_state != DAPLKA_EP_STATE_CONNECTING) {
1815                 /* unblock hash_ep_free */
1816                 mutex_enter(&ep_rp->ep_lock);
1817                 ASSERT(ep_rp->ep_timer_hkey != 0);
1818                 ep_rp->ep_timer_hkey = 0;
1819                 cv_broadcast(&ep_rp->ep_cv);
1820                 mutex_exit(&ep_rp->ep_lock);
1821 
1822                 /* reset state to original state */
1823                 daplka_ep_set_state(ep_rp, old_state, new_state);
1824 
1825                 /* this function will also unref ep_rp */
1826                 daplka_timer_info_free(timerp);
1827                 return;
1828         }
1829 
1830         ASSERT(ep_rp->ep_timer_hkey != 0);
1831         ep_rp->ep_timer_hkey = 0;
1832 
1833         /*
1834          * we cannot keep ep_state in TRANSITIONING if we call
1835          * ibt_close_rc_channel in blocking mode. this would cause
1836          * a deadlock because the cm callbacks will be blocked and
1837          * will not be able to wake us up.
1838          */
1839         new_state = DAPLKA_EP_STATE_ABORTING;
1840         daplka_ep_set_state(ep_rp, old_state, new_state);
1841 
1842         /*
1843          * when we return from close_rc_channel, all callbacks should have
1844          * completed. we can also be certain that these callbacks did not
1845          * enqueue any events to conn_evd.
1846          */
1847         status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
1848             NULL, 0, NULL, NULL, NULL);
1849         if (status != IBT_SUCCESS) {
1850                 DERR("timer_thread: ibt_close_rc_channel returned %d\n",
1851                     status);
1852         }
1853         old_state = daplka_ep_get_state(ep_rp);
1854 
1855         /*
1856          * this is the only thread that can transition ep_state out
1857          * of ABORTING. all other ep operations would fail when
1858          * ep_state is in ABORTING.
1859          */
1860         ASSERT(old_state == DAPLKA_EP_STATE_ABORTING);
1861 
1862         disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_SLEEP);
1863         ASSERT(disc_ev != NULL);
1864 
1865         disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
1866         disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
1867         disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
1868         disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
1869         disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
1870         disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
1871 
1872         D2("timer_thread: enqueue event(%p) evdp(%p)\n",
1873             disc_ev, ep_rp->ep_conn_evd);
1874 
1875         new_state = DAPLKA_EP_STATE_DISCONNECTED;
1876         daplka_ep_set_state(ep_rp, old_state, new_state);
1877 
1878         daplka_evd_wakeup(ep_rp->ep_conn_evd,
1879             &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
1880 
1881         /* this function will also unref ep_rp */
1882         daplka_timer_info_free(timerp);
1883 }
1884 
1885 /*
1886  * dispatches a thread to continue with timer processing.
1887  */
1888 static void
1889 daplka_timer_dispatch(void *arg)
1890 {
1891         /*
1892          * keep rescheduling this function until
1893          * taskq_dispatch succeeds.
1894          */
1895         if (taskq_dispatch(daplka_taskq,
1896             daplka_timer_thread, arg, TQ_NOSLEEP) == 0) {
1897                 DERR("timer_dispatch: taskq_dispatch failed, retrying...\n");
1898                 (void) timeout(daplka_timer_dispatch, arg, 10);
1899         }
1900 }
1901 
1902 /*
1903  * this function is called by the kernel's callout thread.
1904  * we first attempt to remove the timer object from the
1905  * global timer table. if it is found, we dispatch a thread
1906  * to continue processing the timer object. if it is not
1907  * found, that means the timer has been cancelled by someone
1908  * else.
1909  */
1910 static void
1911 daplka_timer_handler(void *arg)
1912 {
1913         uint64_t                timer_hkey = (uintptr_t)arg;
1914         daplka_timer_info_t     *timerp = NULL;
1915 
1916         D2("timer_handler: timer_hkey 0x%llx\n", (longlong_t)timer_hkey);
1917 
1918         (void) daplka_hash_remove(&daplka_timer_info_htbl,
1919             timer_hkey, (void **)&timerp);
1920         if (timerp == NULL) {
1921                 D2("timer_handler: timer already cancelled\n");
1922                 return;
1923         }
1924         daplka_timer_dispatch((void *)timerp);
1925 }
1926 
1927 /*
1928  * allocates a timer_info object.
1929  * a reference to a EP is held by this object. this ensures
1930  * that the EP stays valid when a timer is outstanding.
1931  */
1932 static daplka_timer_info_t *
1933 daplka_timer_info_alloc(daplka_ep_resource_t *ep_rp)
1934 {
1935         daplka_timer_info_t     *timerp;
1936 
1937         timerp = kmem_zalloc(sizeof (*timerp), daplka_km_flags);
1938         if (timerp == NULL) {
1939                 DERR("timer_info_alloc: cannot allocate timer info\n");
1940                 return (NULL);
1941         }
1942         timerp->ti_ep_res = ep_rp;
1943         timerp->ti_tmo_id = 0;
1944 
1945         return (timerp);
1946 }
1947 
1948 /*
1949  * Frees the timer_info object.
1950  * we release the EP reference before freeing the object.
1951  */
1952 static void
1953 daplka_timer_info_free(daplka_timer_info_t *timerp)
1954 {
1955         ASSERT(timerp->ti_ep_res != NULL);
1956         DAPLKA_RS_UNREF(timerp->ti_ep_res);
1957         timerp->ti_ep_res = NULL;
1958         ASSERT(timerp->ti_tmo_id == 0);
1959         kmem_free(timerp, sizeof (*timerp));
1960 }
1961 
1962 /*
1963  * cancels the timer set by ep_connect.
1964  * returns -1 if timer handling is in progress
1965  * and 0 otherwise.
1966  */
1967 static int
1968 daplka_cancel_timer(daplka_ep_resource_t *ep_rp)
1969 {
1970         /*
1971          * this function can only be called when ep_state
1972          * is frozen.
1973          */
1974         ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING);
1975         if (ep_rp->ep_timer_hkey != 0) {
1976                 daplka_timer_info_t     *timerp = NULL;
1977 
1978                 (void) daplka_hash_remove(&daplka_timer_info_htbl,
1979                     ep_rp->ep_timer_hkey, (void **)&timerp);
1980                 if (timerp == NULL) {
1981                         /*
1982                          * this is possible if the timer_handler has
1983                          * removed the timerp but the taskq thread has
1984                          * not transitioned the ep_state to DISCONNECTED.
1985                          * we need to reset the ep_state to allow the
1986                          * taskq thread to continue with its work. the
1987                          * taskq thread will set the ep_timer_hkey to 0
1988                          * so we don't have to do it here.
1989                          */
1990                         DERR("cancel_timer: timer is being processed\n");
1991                         return (-1);
1992                 }
1993                 /*
1994                  * we got the timer object. if the handler fires at
1995                  * this point, it will not be able to find the object
1996                  * and will return immediately. normally, ti_tmo_id gets
1997                  * cleared when the handler fires.
1998                  */
1999                 ASSERT(timerp->ti_tmo_id != 0);
2000 
2001                 /*
2002                  * note that untimeout can possibly call the handler.
2003                  * we are safe because the handler will be a no-op.
2004                  */
2005                 (void) untimeout(timerp->ti_tmo_id);
2006                 timerp->ti_tmo_id = 0;
2007                 daplka_timer_info_free(timerp);
2008                 ep_rp->ep_timer_hkey = 0;
2009         }
2010         return (0);
2011 }
2012 
2013 /*
2014  * this function is called by daplka_hash_destroy for
2015  * freeing timer_info objects
2016  */
2017 static void
2018 daplka_hash_timer_free(void *obj)
2019 {
2020         daplka_timer_info_free((daplka_timer_info_t *)obj);
2021 }
2022 
2023 /* ARGSUSED */
2024 static uint16_t
2025 daplka_hellomsg_cksum(DAPL_PRIVATE *dp)
2026 {
2027         uint8_t *bp;
2028         int i;
2029         uint16_t cksum = 0;
2030 
2031         bp = (uint8_t *)dp;
2032         for (i = 0; i < sizeof (DAPL_PRIVATE); i++) {
2033                 cksum += bp[i];
2034         }
2035         return (cksum);
2036 }
2037 
2038 /*
2039  * ep_connect is called by the client to initiate a connection to a
2040  * remote service point. It is a non-blocking call. If a non-zero
2041  * timeout is specified by the client, a timer will be set just before
2042  * returning from ep_connect. Upon a successful return from ep_connect,
2043  * the client will call evd_wait to wait for the connection to complete.
2044  * If the connection is rejected or has failed due to an error, the
2045  * client will be notified with an event containing the appropriate error
2046  * code. If the connection is accepted, the client will be notified with
2047  * the CONN_ESTABLISHED event. If the timer expires before either of the
2048  * above events (error or established), a TIMED_OUT event will be delivered
2049  * to the client.
2050  *
2051  * the complicated part of the timer logic is the handling of race
2052  * conditions with CM callbacks. we need to ensure that either the CM or
2053  * the timer thread gets to deliver an event, but not both. when the
2054  * CM callback is about to deliver an event, it always tries to cancel
2055  * the outstanding timer. if cancel_timer indicates a that the timer is
2056  * already being processed, the CM callback will simply return without
2057  * delivering an event. when the timer thread executes, it tries to check
2058  * if the EP is still in CONNECTING state (timers only work on the active
2059  * side). if the EP is not in this state, the timer thread will return
2060  * without delivering an event.
2061  */
2062 /* ARGSUSED */
2063 static int
2064 daplka_ep_connect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2065         cred_t *cred, int *rvalp)
2066 {
2067         daplka_ep_resource_t    *ep_rp = NULL;
2068         dapl_ep_connect_t       args;
2069         daplka_timer_info_t     *timerp = NULL;
2070         uint32_t                old_state, new_state;
2071         boolean_t               timer_inserted = B_FALSE;
2072         uint64_t                timer_hkey = 0;
2073         ibt_path_info_t         path_info;
2074         ibt_path_attr_t         path_attr;
2075         ibt_hca_attr_t          *hca_attrp;
2076         ibt_chan_open_args_t    chan_args;
2077         ibt_status_t            status = IBT_SUCCESS;
2078         uint8_t                 num_paths;
2079         void                    *priv_data;
2080         DAPL_PRIVATE            *dp;
2081         int                     retval = 0;
2082         ib_gid_t                *sgid;
2083         ib_gid_t                *dgid;
2084         uint64_t                dgid_ored;
2085         ibt_ar_t                ar_query_s;
2086         ibt_ar_t                ar_result_s;
2087         ibt_path_flags_t        pathflags;
2088 
2089         D3("ep_connect: enter\n");
2090         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_connect_t),
2091             mode);
2092         if (retval != 0) {
2093                 DERR("ep_connect: copyin error %d\n", retval);
2094                 return (EFAULT);
2095         }
2096         ep_rp = (daplka_ep_resource_t *)
2097             daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epc_hkey);
2098         if (ep_rp == NULL) {
2099                 DERR("ep_connect: cannot find ep resource\n");
2100                 return (EINVAL);
2101         }
2102         ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2103 
2104         new_state = old_state = daplka_ep_get_state(ep_rp);
2105         if (old_state != DAPLKA_EP_STATE_CLOSED) {
2106                 DERR("ep_connect: invalid state %d\n", old_state);
2107                 retval = EINVAL;
2108                 goto cleanup;
2109         }
2110         if (args.epc_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
2111                 DERR("ep_connect: private data len (%d) exceeded "
2112                     "max size %d\n", args.epc_priv_sz,
2113                     DAPL_MAX_PRIVATE_DATA_SIZE);
2114                 retval = EINVAL;
2115                 goto cleanup;
2116         }
2117 
2118         /*
2119          * check for remote ipaddress to dgid resolution needs ATS
2120          */
2121         dgid = &args.epc_dgid;
2122         dgid_ored = dgid->gid_guid | dgid->gid_prefix;
2123 #if defined(DAPLKA_DEBUG_FORCE_ATS)
2124         dgid_ored = 0ULL;
2125 #endif /* DAPLKA_DEBUG_FORCE_ATS */
2126         /* check for unidentified dgid */
2127         if (dgid_ored == 0ULL) {
2128                 /*
2129                  * setup for ibt_query_ar()
2130                  */
2131                 sgid = &ia_rp->ia_hca_sgid;
2132                 ar_query_s.ar_gid.gid_guid = 0ULL;
2133                 ar_query_s.ar_gid.gid_prefix = 0ULL;
2134                 ar_query_s.ar_pkey = 0;
2135                 bcopy(args.epc_raddr_sadata.iad_sadata,
2136                     ar_query_s.ar_data, DAPL_ATS_NBYTES);
2137 #define UR(b) ar_query_s.ar_data[(b)]
2138                 D3("daplka_ep_connect: SA[8] %d.%d.%d.%d\n",
2139                     UR(8), UR(9), UR(10), UR(11));
2140                 D3("daplka_ep_connect: SA[12] %d.%d.%d.%d\n",
2141                     UR(12), UR(13), UR(14), UR(15));
2142                 status = ibt_query_ar(sgid, &ar_query_s, &ar_result_s);
2143                 if (status != IBT_SUCCESS) {
2144                         DERR("ep_connect: ibt_query_ar returned %d\n", status);
2145                         *rvalp = (int)status;
2146                         retval = 0;
2147                         goto cleanup;
2148                 }
2149                 /*
2150                  * dgid identified from SA record
2151                  */
2152                 dgid = &ar_result_s.ar_gid;
2153                 D2("daplka_ep_connect: ATS dgid=%llx:%llx\n",
2154                     (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);
2155         }
2156 
2157         bzero(&path_info, sizeof (ibt_path_info_t));
2158         bzero(&path_attr, sizeof (ibt_path_attr_t));
2159         bzero(&chan_args, sizeof (ibt_chan_open_args_t));
2160 
2161         path_attr.pa_dgids = dgid;
2162         path_attr.pa_num_dgids = 1;
2163         /*
2164          * don't set sid in path_attr saves 1 SA query
2165          * Also makes server side not to write the service record
2166          */
2167         path_attr.pa_sgid = ia_rp->ia_hca_sgid;
2168         path_attr.pa_pkey = ia_rp->ia_port_pkey;
2169 
2170         /* save the connection ep  - struct copy */
2171         ep_rp->ep_sgid = ia_rp->ia_hca_sgid;
2172         ep_rp->ep_dgid = *dgid;
2173 
2174         num_paths = 0;
2175         pathflags = IBT_PATH_PKEY;
2176         /* enable APM on remote port but not on loopback case */
2177         if (daplka_apm && ((dgid->gid_prefix != path_attr.pa_sgid.gid_prefix) ||
2178             (dgid->gid_guid != path_attr.pa_sgid.gid_guid))) {
2179                 pathflags |= IBT_PATH_APM;
2180         }
2181         status = ibt_get_paths(daplka_dev->daplka_clnt_hdl,
2182             pathflags, &path_attr, 1, &path_info, &num_paths);
2183 
2184         if (status != IBT_SUCCESS && status != IBT_INSUFF_DATA) {
2185                 DERR("ep_connect: ibt_get_paths returned %d paths %d\n",
2186                     status, num_paths);
2187                 *rvalp = (int)status;
2188                 retval = 0;
2189                 goto cleanup;
2190         }
2191         /* fill in the sid directly to path_info */
2192         path_info.pi_sid = args.epc_sid;
2193         hca_attrp = &ia_rp->ia_hca->hca_attr;
2194 
2195         /* fill in open channel args */
2196         chan_args.oc_path = &path_info;
2197         chan_args.oc_cm_handler = daplka_cm_rc_handler;
2198         chan_args.oc_cm_clnt_private = (void *)ep_rp;
2199         chan_args.oc_rdma_ra_out = hca_attrp->hca_max_rdma_out_chan;
2200         chan_args.oc_rdma_ra_in = hca_attrp->hca_max_rdma_in_chan;
2201         chan_args.oc_path_retry_cnt = 7;        /* 3-bit field */
2202         chan_args.oc_path_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;
2203 
2204         ASSERT(args.epc_priv_sz > 0);
2205         priv_data = (void *)args.epc_priv;
2206 
2207         chan_args.oc_priv_data_len = args.epc_priv_sz;
2208         chan_args.oc_priv_data = priv_data;
2209 
2210         /*
2211          * calculate checksum value of hello message and
2212          * put hello message in networking byte order
2213          */
2214         dp = (DAPL_PRIVATE *)priv_data;
2215         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dp))
2216         dp->hello_msg.hi_port = htons(dp->hello_msg.hi_port);
2217         dp->hello_msg.hi_checksum = 0;
2218         dp->hello_msg.hi_checksum = htons(daplka_hellomsg_cksum(dp));
2219         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*dp))
2220 
2221         if (args.epc_timeout > 0) {
2222                 /*
2223                  * increment refcnt before passing reference to
2224                  * timer_info_alloc.
2225                  */
2226                 DAPLKA_RS_REF(ep_rp);
2227                 timerp = daplka_timer_info_alloc(ep_rp);
2228                 if (timerp == NULL) {
2229                         DERR("ep_connect: cannot allocate timer\n");
2230                         /*
2231                          * we need to remove the reference if
2232                          * allocation failed.
2233                          */
2234                         DAPLKA_RS_UNREF(ep_rp);
2235                         retval = ENOMEM;
2236                         goto cleanup;
2237                 }
2238                 /*
2239                  * We generate our own hkeys so that timer_hkey can fit
2240                  * into a pointer and passed as an arg to timeout()
2241                  */
2242                 timer_hkey = (uint64_t)daplka_timer_hkey_gen();
2243                 retval = daplka_hash_insert(&daplka_timer_info_htbl,
2244                     &timer_hkey, (void *)timerp);
2245                 if (retval != 0) {
2246                         DERR("ep_connect: cannot insert timer info\n");
2247                         goto cleanup;
2248                 }
2249                 ASSERT(ep_rp->ep_timer_hkey == 0);
2250                 ep_rp->ep_timer_hkey = timer_hkey;
2251                 timer_inserted = B_TRUE;
2252                 D2("ep_connect: timer_hkey = 0x%llx\n",
2253                     (longlong_t)timer_hkey);
2254         }
2255         status = ibt_open_rc_channel(ep_rp->ep_chan_hdl, IBT_OCHAN_NO_FLAGS,
2256             IBT_NONBLOCKING, &chan_args, NULL);
2257 
2258         if (status != IBT_SUCCESS) {
2259                 DERR("ep_connect: ibt_open_rc_channel returned %d\n", status);
2260                 *rvalp = (int)status;
2261                 retval = 0;
2262                 goto cleanup;
2263         }
2264         /*
2265          * if a cm callback gets called at this point, it'll have to wait until
2266          * ep_state becomes connecting (or some other state if another thread
2267          * manages to get ahead of the callback). this guarantees that the
2268          * callback will not touch the timer until it gets set.
2269          */
2270         if (timerp != NULL) {
2271                 clock_t         tmo;
2272 
2273                 tmo = drv_usectohz((clock_t)args.epc_timeout);
2274                 /*
2275                  * We generate our own 32 bit timer_hkey so that it can fit
2276                  * into a pointer
2277                  */
2278                 ASSERT(timer_hkey != 0);
2279                 timerp->ti_tmo_id = timeout(daplka_timer_handler,
2280                     (void *)(uintptr_t)timer_hkey, tmo);
2281         }
2282         new_state = DAPLKA_EP_STATE_CONNECTING;
2283 
2284 cleanup:;
2285         if (timerp != NULL && (retval != 0 || status != IBT_SUCCESS)) {
2286                 /*
2287                  * if ibt_open_rc_channel failed, the timerp must still
2288                  * be in daplka_timer_info_htbl because neither the cm
2289                  * callback nor the timer_handler will be called.
2290                  */
2291                 if (timer_inserted) {
2292                         daplka_timer_info_t     *new_timerp = NULL;
2293 
2294                         ASSERT(timer_hkey != 0);
2295                         (void) daplka_hash_remove(&daplka_timer_info_htbl,
2296                             timer_hkey, (void **)&new_timerp);
2297                         ASSERT(new_timerp == timerp);
2298                         ep_rp->ep_timer_hkey = 0;
2299                 }
2300                 daplka_timer_info_free(timerp);
2301         }
2302         daplka_ep_set_state(ep_rp, old_state, new_state);
2303         DAPLKA_RS_UNREF(ep_rp);
2304         D3("ep_connect: exit\n");
2305         return (retval);
2306 }
2307 
2308 /*
2309  * ep_disconnect closes a connection with a remote peer.
2310  * if a connection has not been established, ep_disconnect
2311  * will instead flush all recv bufs posted to this channel.
2312  * if the EP state is CONNECTED, CONNECTING or ACCEPTING upon
2313  * entry to ep_disconnect, the EP state will transition to
2314  * DISCONNECTING upon exit. the CM callbacks triggered by
2315  * ibt_close_rc_channel will cause EP state to become
2316  * DISCONNECTED. This function is a no-op if EP state is
2317  * DISCONNECTED.
2318  */
2319 /* ARGSUSED */
2320 static int
2321 daplka_ep_disconnect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2322         cred_t *cred, int *rvalp)
2323 {
2324         daplka_ep_resource_t    *ep_rp = NULL;
2325         dapl_ep_disconnect_t    args;
2326         ibt_status_t            status;
2327         uint32_t                old_state, new_state;
2328         int                     retval = 0;
2329 
2330         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_disconnect_t),
2331             mode);
2332         if (retval != 0) {
2333                 DERR("ep_disconnect: copyin error %d\n", retval);
2334                 return (EFAULT);
2335         }
2336         ep_rp = (daplka_ep_resource_t *)
2337             daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epd_hkey);
2338         if (ep_rp == NULL) {
2339                 DERR("ep_disconnect: cannot find ep resource\n");
2340                 return (EINVAL);
2341         }
2342         ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2343 
2344         new_state = old_state = daplka_ep_get_state(ep_rp);
2345         if (old_state != DAPLKA_EP_STATE_CONNECTED &&
2346             old_state != DAPLKA_EP_STATE_CONNECTING &&
2347             old_state != DAPLKA_EP_STATE_ACCEPTING &&
2348             old_state != DAPLKA_EP_STATE_DISCONNECTED &&
2349             old_state != DAPLKA_EP_STATE_DISCONNECTING &&
2350             old_state != DAPLKA_EP_STATE_CLOSED) {
2351                 DERR("ep_disconnect: invalid state %d\n", old_state);
2352                 retval = EINVAL;
2353                 goto cleanup;
2354         }
2355 
2356         if ((old_state == DAPLKA_EP_STATE_DISCONNECTED) ||
2357             (old_state == DAPLKA_EP_STATE_DISCONNECTING)) {
2358                 D2("ep_disconnect: ep already disconnected\n");
2359                 retval = 0;
2360                 /* we leave the state as DISCONNECTED */
2361                 goto cleanup;
2362         }
2363         if (old_state == DAPLKA_EP_STATE_CONNECTING ||
2364             old_state == DAPLKA_EP_STATE_ACCEPTING) {
2365                 D2("ep_disconnect: aborting, old_state = %d\n", old_state);
2366         }
2367 
2368         /*
2369          * according to the udapl spec, ep_disconnect should
2370          * flush the channel if the channel is not CONNECTED.
2371          */
2372         if (old_state == DAPLKA_EP_STATE_CLOSED) {
2373                 status = ibt_flush_channel(ep_rp->ep_chan_hdl);
2374                 if (status != IBT_SUCCESS) {
2375                         DERR("ep_disconnect: ibt_flush_channel failed %d\n",
2376                             status);
2377                         *rvalp = (int)status;
2378                 }
2379                 retval = 0;
2380                 /* we leave the state as CLOSED */
2381                 goto cleanup;
2382         }
2383 
2384         new_state = DAPLKA_EP_STATE_DISCONNECTING;
2385         daplka_ep_set_state(ep_rp, old_state, new_state);
2386         status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_NONBLOCKING,
2387             NULL, 0, NULL, NULL, NULL);
2388 
2389         if (status == IBT_SUCCESS) {
2390                 DAPLKA_RS_UNREF(ep_rp);
2391                 return (retval);
2392         } else {
2393                 DERR("ep_disconnect: ibt_close_rc_channel returned %d\n",
2394                     status);
2395                 *rvalp = (int)status;
2396                 retval = 0;
2397                 new_state = old_state;
2398         }
2399 
2400 cleanup:;
2401         daplka_ep_set_state(ep_rp, old_state, new_state);
2402         DAPLKA_RS_UNREF(ep_rp);
2403         return (retval);
2404 }
2405 
2406 /*
2407  * this function resets the EP to a usable state (ie. from
2408  * DISCONNECTED to CLOSED). this function is best implemented using
2409  * the ibt_recycle_channel interface. until that is available, we will
2410  * instead clone and tear down the existing channel and replace the
2411  * existing channel with the cloned one.
2412  */
2413 /* ARGSUSED */
2414 static int
2415 daplka_ep_reinit(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2416         cred_t *cred, int *rvalp)
2417 {
2418         daplka_ep_resource_t            *ep_rp = NULL;
2419         dapl_ep_reinit_t                args;
2420         ibt_status_t                    status;
2421         uint32_t                        old_state, new_state;
2422         int                             retval = 0;
2423 
2424         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_reinit_t),
2425             mode);
2426         if (retval != 0) {
2427                 DERR("reinit: copyin error %d\n", retval);
2428                 return (EFAULT);
2429         }
2430         ep_rp = (daplka_ep_resource_t *)
2431             daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epri_hkey);
2432         if (ep_rp == NULL) {
2433                 DERR("reinit: cannot find ep resource\n");
2434                 return (EINVAL);
2435         }
2436         ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2437         new_state = old_state = daplka_ep_get_state(ep_rp);
2438         if ((old_state != DAPLKA_EP_STATE_CLOSED) &&
2439             (old_state != DAPLKA_EP_STATE_DISCONNECTED)) {
2440                 DERR("reinit: invalid state %d\n", old_state);
2441                 retval = EINVAL;
2442                 goto cleanup;
2443         }
2444 
2445         status = ibt_recycle_rc(ep_rp->ep_chan_hdl,
2446             IBT_CEP_RDMA_RD|IBT_CEP_RDMA_WR,
2447             ia_rp->ia_port_num, NULL, NULL);
2448         if (status != IBT_SUCCESS) {
2449                 DERR("reinit: unable to clone channel\n");
2450                 *rvalp = (int)status;
2451                 retval = 0;
2452                 goto cleanup;
2453         }
2454         new_state = DAPLKA_EP_STATE_CLOSED;
2455 
2456 cleanup:;
2457         daplka_ep_set_state(ep_rp, old_state, new_state);
2458         DAPLKA_RS_UNREF(ep_rp);
2459         return (retval);
2460 }
2461 
2462 /*
2463  * destroys a EP resource.
2464  * called when refcnt drops to zero.
2465  */
2466 static int
2467 daplka_ep_destroy(daplka_resource_t *gen_rp)
2468 {
2469         daplka_ep_resource_t    *ep_rp = (daplka_ep_resource_t *)gen_rp;
2470         ibt_status_t            status;
2471 
2472         ASSERT(DAPLKA_RS_REFCNT(ep_rp) == 0);
2473         ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_FREED);
2474 
2475         /*
2476          * by the time we get here, we can be sure that
2477          * there is no outstanding timer.
2478          */
2479         ASSERT(ep_rp->ep_timer_hkey == 0);
2480 
2481         D3("ep_destroy: entering, ep_rp 0x%p, rnum %d\n",
2482             ep_rp, DAPLKA_RS_RNUM(ep_rp));
2483         /*
2484          * free rc channel
2485          */
2486         if (ep_rp->ep_chan_hdl != NULL) {
2487                 mutex_enter(&daplka_dev->daplka_mutex);
2488                 ibt_set_chan_private(ep_rp->ep_chan_hdl, NULL);
2489                 mutex_exit(&daplka_dev->daplka_mutex);
2490                 status = daplka_ibt_free_channel(ep_rp, ep_rp->ep_chan_hdl);
2491                 if (status != IBT_SUCCESS) {
2492                         DERR("ep_free: ibt_free_channel returned %d\n",
2493                             status);
2494                 }
2495                 ep_rp->ep_chan_hdl = NULL;
2496                 D3("ep_destroy: qp freed, rnum %d\n", DAPLKA_RS_RNUM(ep_rp));
2497         }
2498         /*
2499          * release all references
2500          */
2501         if (ep_rp->ep_snd_evd != NULL) {
2502                 DAPLKA_RS_UNREF(ep_rp->ep_snd_evd);
2503                 ep_rp->ep_snd_evd = NULL;
2504         }
2505         if (ep_rp->ep_rcv_evd != NULL) {
2506                 DAPLKA_RS_UNREF(ep_rp->ep_rcv_evd);
2507                 ep_rp->ep_rcv_evd = NULL;
2508         }
2509         if (ep_rp->ep_conn_evd != NULL) {
2510                 DAPLKA_RS_UNREF(ep_rp->ep_conn_evd);
2511                 ep_rp->ep_conn_evd = NULL;
2512         }
2513         if (ep_rp->ep_srq_res != NULL) {
2514                 DAPLKA_RS_UNREF(ep_rp->ep_srq_res);
2515                 ep_rp->ep_srq_res = NULL;
2516         }
2517         if (ep_rp->ep_pd_res != NULL) {
2518                 DAPLKA_RS_UNREF(ep_rp->ep_pd_res);
2519                 ep_rp->ep_pd_res = NULL;
2520         }
2521         cv_destroy(&ep_rp->ep_cv);
2522         mutex_destroy(&ep_rp->ep_lock);
2523 
2524         DAPLKA_RS_FINI(ep_rp);
2525         kmem_free(ep_rp, sizeof (daplka_ep_resource_t));
2526         D3("ep_destroy: exiting, ep_rp 0x%p\n", ep_rp);
2527         return (0);
2528 }
2529 
2530 /*
2531  * this function is called by daplka_hash_destroy for
2532  * freeing EP resource objects
2533  */
2534 static void
2535 daplka_hash_ep_free(void *obj)
2536 {
2537         daplka_ep_resource_t    *ep_rp = (daplka_ep_resource_t *)obj;
2538         ibt_status_t            status;
2539         uint32_t                old_state, new_state;
2540         int                     retval;
2541 
2542         old_state = daplka_ep_get_state(ep_rp);
2543         retval = daplka_cancel_timer(ep_rp);
2544         new_state = DAPLKA_EP_STATE_FREED;
2545         daplka_ep_set_state(ep_rp, old_state, new_state);
2546 
2547         if (retval != 0) {
2548                 D2("hash_ep_free: ep_rp 0x%p "
2549                     "timer is still being processed\n", ep_rp);
2550                 mutex_enter(&ep_rp->ep_lock);
2551                 if (ep_rp->ep_timer_hkey != 0) {
2552                         D2("hash_ep_free: ep_rp 0x%p "
2553                             "waiting for timer_hkey to be 0\n", ep_rp);
2554                         cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
2555                 }
2556                 mutex_exit(&ep_rp->ep_lock);
2557         }
2558 
2559         /* call ibt_close_rc_channel regardless of what state we are in */
2560         status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
2561             NULL, 0, NULL, NULL, NULL);
2562         if (status != IBT_SUCCESS) {
2563                 if (old_state == DAPLKA_EP_STATE_CONNECTED ||
2564                     old_state == DAPLKA_EP_STATE_CONNECTING ||
2565                     old_state == DAPLKA_EP_STATE_ACCEPTING) {
2566                         DERR("hash_ep_free: ep_rp 0x%p state %d "
2567                             "unexpected error %d from close_rc_channel\n",
2568                             ep_rp, old_state, status);
2569                 }
2570                 D2("hash_ep_free: close_rc_channel, status %d\n", status);
2571         }
2572 
2573         DAPLKA_RS_UNREF(ep_rp);
2574 }
2575 
2576 /*
2577  * creates a EVD resource.
2578  * a EVD is used by the client to wait for events from one
2579  * or more sources.
2580  */
2581 /* ARGSUSED */
2582 static int
2583 daplka_evd_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2584         cred_t *cred, int *rvalp)
2585 {
2586         daplka_evd_resource_t           *evd_rp = NULL;
2587         daplka_async_evd_hkey_t         *async_evd;
2588         ibt_hca_attr_t                  *hca_attrp;
2589         ibt_cq_attr_t                   cq_attr;
2590         dapl_evd_create_t               args;
2591         uint64_t                        evd_hkey = 0;
2592         boolean_t                       inserted = B_FALSE;
2593         int                             retval = 0;
2594         ibt_status_t                    status;
2595 
2596         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_create_t),
2597             mode);
2598         if (retval != 0) {
2599                 DERR("evd_create: copyin error %d", retval);
2600                 return (EFAULT);
2601         }
2602         if ((args.evd_flags &
2603             ~(DAT_EVD_DEFAULT_FLAG | DAT_EVD_SOFTWARE_FLAG)) != 0) {
2604                 DERR("evd_create: invalid flags 0x%x\n", args.evd_flags);
2605                 return (EINVAL);
2606         }
2607 
2608         evd_rp = kmem_zalloc(sizeof (daplka_evd_resource_t), daplka_km_flags);
2609         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp))
2610         DAPLKA_RS_INIT(evd_rp, DAPL_TYPE_EVD,
2611             DAPLKA_RS_RNUM(ia_rp), daplka_evd_destroy);
2612 
2613         mutex_init(&evd_rp->evd_lock, NULL, MUTEX_DRIVER, NULL);
2614         cv_init(&evd_rp->evd_cv, NULL, CV_DRIVER, NULL);
2615         evd_rp->evd_hca = ia_rp->ia_hca;
2616         evd_rp->evd_flags = args.evd_flags;
2617         evd_rp->evd_hca_hdl = ia_rp->ia_hca_hdl;
2618         evd_rp->evd_cookie = args.evd_cookie;
2619         evd_rp->evd_cno_res = NULL;
2620         evd_rp->evd_cr_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
2621         evd_rp->evd_conn_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
2622         evd_rp->evd_async_events.eel_event_type = DAPLKA_EVD_ASYNC_EVENTS;
2623 
2624         /*
2625          * if the client specified a non-zero cno_hkey, we
2626          * lookup the cno and save the reference for later use.
2627          */
2628         if (args.evd_cno_hkey > 0) {
2629                 daplka_cno_resource_t *cno_rp;
2630 
2631                 cno_rp = (daplka_cno_resource_t *)
2632                     daplka_hash_lookup(&ia_rp->ia_cno_htbl,
2633                     args.evd_cno_hkey);
2634                 if (cno_rp == NULL) {
2635                         DERR("evd_create: cannot find cno resource\n");
2636                         goto cleanup;
2637                 }
2638                 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
2639                 evd_rp->evd_cno_res = cno_rp;
2640         }
2641         hca_attrp = &ia_rp->ia_hca->hca_attr;
2642         if ((evd_rp->evd_flags &
2643             (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) != 0) {
2644                 if (args.evd_cq_size > hca_attrp->hca_max_cq_sz) {
2645                         DERR("evd_create: invalid cq size %d",
2646                             args.evd_cq_size);
2647                         retval = EINVAL;
2648                         goto cleanup;
2649                 }
2650                 cq_attr.cq_size = args.evd_cq_size;
2651                 cq_attr.cq_sched = NULL;
2652                 cq_attr.cq_flags = IBT_CQ_USER_MAP;
2653 
2654                 status = daplka_ibt_alloc_cq(evd_rp, evd_rp->evd_hca_hdl,
2655                     &cq_attr, &evd_rp->evd_cq_hdl, &evd_rp->evd_cq_real_size);
2656 
2657                 if (status != IBT_SUCCESS) {
2658                         DERR("evd_create: ibt_alloc_cq returned %d", status);
2659                         *rvalp = (int)status;
2660                         retval = 0;
2661                         goto cleanup;
2662                 }
2663 
2664                 /*
2665                  * store evd ptr with cq_hdl
2666                  * mutex is only needed for race of "destroy" and "async"
2667                  */
2668                 mutex_enter(&daplka_dev->daplka_mutex);
2669                 ibt_set_cq_private(evd_rp->evd_cq_hdl, (void *)evd_rp);
2670                 mutex_exit(&daplka_dev->daplka_mutex);
2671 
2672                 /* Get HCA-specific data_out info */
2673                 status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
2674                     IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
2675                     &args.evd_cq_data_out, sizeof (args.evd_cq_data_out));
2676 
2677                 if (status != IBT_SUCCESS) {
2678                         DERR("evd_create: ibt_ci_data_out error(%d)", status);
2679                         *rvalp = (int)status;
2680                         retval = 0;
2681                         goto cleanup;
2682                 }
2683 
2684                 args.evd_cq_real_size = evd_rp->evd_cq_real_size;
2685 
2686                 ibt_set_cq_handler(evd_rp->evd_cq_hdl, daplka_cq_handler,
2687                     (void *)evd_rp);
2688         }
2689 
2690         retval = daplka_hash_insert(&ia_rp->ia_evd_htbl,
2691             &evd_hkey, (void *)evd_rp);
2692         if (retval != 0) {
2693                 DERR("evd_ceate: cannot insert evd %d\n", retval);
2694                 goto cleanup;
2695         }
2696         inserted = B_TRUE;
2697         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*evd_rp))
2698 
2699         /*
2700          * If this evd handles async events need to add to the IA resource
2701          * async evd list
2702          */
2703         if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
2704                 async_evd = kmem_zalloc(sizeof (daplka_async_evd_hkey_t),
2705                     daplka_km_flags);
2706                 /* add the evd to the head of the list */
2707                 mutex_enter(&ia_rp->ia_lock);
2708                 async_evd->aeh_evd_hkey = evd_hkey;
2709                 async_evd->aeh_next = ia_rp->ia_async_evd_hkeys;
2710                 ia_rp->ia_async_evd_hkeys = async_evd;
2711                 mutex_exit(&ia_rp->ia_lock);
2712         }
2713 
2714         args.evd_hkey = evd_hkey;
2715         retval = copyout(&args, (void *)arg, sizeof (dapl_evd_create_t));
2716         if (retval != 0) {
2717                 DERR("evd_create: copyout error %d\n", retval);
2718                 retval = EFAULT;
2719                 goto cleanup;
2720         }
2721         return (0);
2722 
2723 cleanup:;
2724         if (inserted) {
2725                 daplka_evd_resource_t *free_rp = NULL;
2726 
2727                 (void) daplka_hash_remove(&ia_rp->ia_evd_htbl, evd_hkey,
2728                     (void **)&free_rp);
2729                 if (free_rp != evd_rp) {
2730                         DERR("evd_create: cannot remove evd\n");
2731                         /*
2732                          * we can only get here if another thread
2733                          * has completed the cleanup in evd_free
2734                          */
2735                         return (retval);
2736                 }
2737         }
2738         DAPLKA_RS_UNREF(evd_rp);
2739         return (retval);
2740 }
2741 
2742 /*
2743  * resizes CQ and returns new mapping info to library.
2744  */
2745 /* ARGSUSED */
2746 static int
2747 daplka_cq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2748         cred_t *cred, int *rvalp)
2749 {
2750         daplka_evd_resource_t           *evd_rp = NULL;
2751         ibt_hca_attr_t                  *hca_attrp;
2752         dapl_cq_resize_t                args;
2753         ibt_status_t                    status;
2754         int                             retval = 0;
2755 
2756         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cq_resize_t),
2757             mode);
2758         if (retval != 0) {
2759                 DERR("cq_resize: copyin error %d\n", retval);
2760                 return (EFAULT);
2761         }
2762 
2763         /* get evd resource */
2764         evd_rp = (daplka_evd_resource_t *)
2765             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.cqr_evd_hkey);
2766         if (evd_rp == NULL) {
2767                 DERR("cq_resize: cannot find evd resource\n");
2768                 return (EINVAL);
2769         }
2770         ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
2771 
2772         hca_attrp = &ia_rp->ia_hca->hca_attr;
2773         if (args.cqr_cq_new_size > hca_attrp->hca_max_cq_sz) {
2774                 DERR("cq_resize: invalid cq size %d", args.cqr_cq_new_size);
2775                 retval = EINVAL;
2776                 goto cleanup;
2777         }
2778         /*
2779          * If ibt_resize_cq fails that it is primarily due to resource
2780          * shortage. Per IB spec resize will never loose events and
2781          * a resize error leaves the CQ intact. Therefore even if the
2782          * resize request fails we proceed and get the mapping data
2783          * from the CQ so that the library can mmap it.
2784          */
2785         status = ibt_resize_cq(evd_rp->evd_cq_hdl, args.cqr_cq_new_size,
2786             &args.cqr_cq_real_size);
2787         if (status != IBT_SUCCESS) {
2788                 /* we return the size of the old CQ if resize fails */
2789                 args.cqr_cq_real_size = evd_rp->evd_cq_real_size;
2790                 ASSERT(status != IBT_CQ_HDL_INVALID);
2791                 DERR("cq_resize: ibt_resize_cq failed:%d\n", status);
2792         } else {
2793                 mutex_enter(&evd_rp->evd_lock);
2794                 evd_rp->evd_cq_real_size = args.cqr_cq_real_size;
2795                 mutex_exit(&evd_rp->evd_lock);
2796         }
2797 
2798         D2("cq_resize(%d): done new_sz(%u) real_sz(%u)\n",
2799             DAPLKA_RS_RNUM(evd_rp),
2800             args.cqr_cq_new_size, args.cqr_cq_real_size);
2801 
2802         /* Get HCA-specific data_out info */
2803         status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
2804             IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
2805             &args.cqr_cq_data_out, sizeof (args.cqr_cq_data_out));
2806         if (status != IBT_SUCCESS) {
2807                 DERR("cq_resize: ibt_ci_data_out error(%d)\n", status);
2808                 /* return ibt_ci_data_out status */
2809                 *rvalp = (int)status;
2810                 retval = 0;
2811                 goto cleanup;
2812         }
2813 
2814         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cq_resize_t),
2815             mode);
2816         if (retval != 0) {
2817                 DERR("cq_resize: copyout error %d\n", retval);
2818                 retval = EFAULT;
2819                 goto cleanup;
2820         }
2821 
2822 cleanup:;
2823         if (evd_rp != NULL) {
2824                 DAPLKA_RS_UNREF(evd_rp);
2825         }
2826         return (retval);
2827 }
2828 
2829 /*
2830  * Routine to copyin the event poll message so that 32 bit libraries
2831  * can be safely supported
2832  */
2833 int
2834 daplka_event_poll_copyin(intptr_t inarg, dapl_event_poll_t *outarg, int mode)
2835 {
2836         int     retval;
2837 
2838 #ifdef _MULTI_DATAMODEL
2839         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2840                 dapl_event_poll32_t     args32;
2841 
2842                 retval = ddi_copyin((void *)inarg, &args32,
2843                     sizeof (dapl_event_poll32_t), mode);
2844                 if (retval != 0) {
2845                         DERR("event_poll_copyin: 32bit error %d\n", retval);
2846                         return (EFAULT);
2847                 }
2848 
2849                 outarg->evp_evd_hkey = args32.evp_evd_hkey;
2850                 outarg->evp_threshold = args32.evp_threshold;
2851                 outarg->evp_timeout = args32.evp_timeout;
2852                 outarg->evp_ep = (dapl_ib_event_t *)(uintptr_t)args32.evp_ep;
2853                 outarg->evp_num_ev = args32.evp_num_ev;
2854                 outarg->evp_num_polled = args32.evp_num_polled;
2855                 return (0);
2856         }
2857 #endif
2858         retval = ddi_copyin((void *)inarg, outarg, sizeof (dapl_event_poll_t),
2859             mode);
2860         if (retval != 0) {
2861                 DERR("event_poll: copyin error %d\n", retval);
2862                 return (EFAULT);
2863         }
2864 
2865         return (0);
2866 }
2867 
2868 /*
2869  * Routine to copyout the event poll message so that 32 bit libraries
2870  * can be safely supported
2871  */
2872 int
2873 daplka_event_poll_copyout(dapl_event_poll_t *inarg, intptr_t outarg, int mode)
2874 {
2875         int     retval;
2876 
2877 #ifdef _MULTI_DATAMODEL
2878         if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2879                 dapl_event_poll32_t     args32;
2880 
2881                 args32.evp_evd_hkey = inarg->evp_evd_hkey;
2882                 args32.evp_threshold = inarg->evp_threshold;
2883                 args32.evp_timeout = inarg->evp_timeout;
2884                 args32.evp_ep = (caddr32_t)(uintptr_t)inarg->evp_ep;
2885                 args32.evp_num_ev = inarg->evp_num_ev;
2886                 args32.evp_num_polled = inarg->evp_num_polled;
2887 
2888                 retval = ddi_copyout((void *)&args32, (void *)outarg,
2889                     sizeof (dapl_event_poll32_t), mode);
2890                 if (retval != 0) {
2891                         DERR("event_poll_copyout: 32bit error %d\n", retval);
2892                         return (EFAULT);
2893                 }
2894                 return (0);
2895         }
2896 #endif
2897         retval = ddi_copyout((void *)inarg, (void *)outarg,
2898             sizeof (dapl_event_poll_t), mode);
2899         if (retval != 0) {
2900                 DERR("event_poll_copyout: error %d\n", retval);
2901                 return (EFAULT);
2902         }
2903 
2904         return (0);
2905 }
2906 
2907 /*
2908  * fucntion to handle CM REQ RCV private data from Solaris or third parties
2909  */
2910 /* ARGSUSED */
2911 static void
2912 daplka_crevent_privdata_post(daplka_ia_resource_t *ia_rp,
2913         dapl_ib_event_t *evd_rp, daplka_evd_event_t *cr_ev)
2914 {
2915         DAPL_PRIVATE    *dp;
2916         ib_gid_t        *lgid;
2917         ibt_ar_t        ar_query_s;
2918         ibt_ar_t        ar_result_s;
2919         DAPL_HELLO_MSG  *hip;
2920         uint32_t        ipaddr_ord;
2921         ibt_priv_data_len_t clen;
2922         ibt_priv_data_len_t olen;
2923         ibt_status_t    status;
2924         uint16_t        cksum;
2925 
2926         /*
2927          * get private data and len
2928          */
2929         dp = (DAPL_PRIVATE *)cr_ev->ee_cmev.ec_cm_ev_priv_data;
2930         clen = cr_ev->ee_cmev.ec_cm_ev_priv_data_len;
2931 #if defined(DAPLKA_DEBUG_FORCE_ATS)
2932         /* skip the DAPL_PRIVATE chekcsum check */
2933 #else
2934         /* for remote connects */
2935         /* look up hello message in the CM private data area */
2936         if (clen >= sizeof (DAPL_PRIVATE) &&
2937             (dp->hello_msg.hi_vers == DAPL_HELLO_MSG_VERS)) {
2938                 cksum = ntohs(dp->hello_msg.hi_checksum);
2939                 dp->hello_msg.hi_checksum = 0;
2940                 if (daplka_hellomsg_cksum(dp) == cksum) {
2941                         D2("daplka_crevent_privdata_post: Solaris msg\n");
2942                         evd_rp->ibe_ce.ibce_priv_data_size = clen;
2943                         dp->hello_msg.hi_checksum = DAPL_CHECKSUM;
2944                         dp->hello_msg.hi_port = ntohs(dp->hello_msg.hi_port);
2945                         bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
2946                         kmem_free(dp, clen);
2947                         return;
2948                 }
2949         }
2950 #endif /* DAPLKA_DEBUG_FORCE_ATS */
2951 
2952         D2("daplka_crevent_privdata_post: 3rd party msg\n");
2953         /* transpose CM private data into hello message */
2954         if (clen) {
2955                 olen = clen;
2956                 if (clen > DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE) {
2957                         clen = DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE;
2958                 }
2959                 bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
2960                 kmem_free(dp, olen);
2961         } else {
2962                 bzero(evd_rp->ibe_ce.ibce_priv_data_ptr,
2963                     DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE);
2964         }
2965         evd_rp->ibe_ce.ibce_priv_data_size = sizeof (DAPL_PRIVATE);
2966         dp = (DAPL_PRIVATE *)evd_rp->ibe_ce.ibce_priv_data_ptr;
2967         /*
2968          * fill in hello message
2969          */
2970         hip = &dp->hello_msg;
2971         hip->hi_checksum = DAPL_CHECKSUM;
2972         hip->hi_clen = clen;
2973         hip->hi_mid = 0;
2974         hip->hi_vers = DAPL_HELLO_MSG_VERS;
2975         hip->hi_port = 0;
2976 
2977         /* assign sgid and dgid */
2978         lgid = &ia_rp->ia_hca_sgid;
2979         ar_query_s.ar_gid.gid_prefix =
2980             cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix;
2981         ar_query_s.ar_gid.gid_guid =
2982             cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid;
2983         ar_query_s.ar_pkey = ia_rp->ia_port_pkey;
2984         bzero(ar_query_s.ar_data, DAPL_ATS_NBYTES);
2985 
2986         /* reverse ip address lookup through ATS */
2987         status = ibt_query_ar(lgid, &ar_query_s, &ar_result_s);
2988         if (status == IBT_SUCCESS) {
2989                 bcopy(ar_result_s.ar_data, hip->hi_saaddr, DAPL_ATS_NBYTES);
2990                 /* determine the address families */
2991                 ipaddr_ord = hip->hi_v4pad[0] | hip->hi_v4pad[1] |
2992                     hip->hi_v4pad[2];
2993                 if (ipaddr_ord == 0) {
2994                         hip->hi_ipv = AF_INET;
2995                 } else {
2996                         hip->hi_ipv = AF_INET6;
2997                 }
2998 
2999 #define UL(b) ar_result_s.ar_data[(b)]
3000                 D3("daplka_privdata_post: family=%d :SA[8] %d.%d.%d.%d\n",
3001                     hip->hi_ipv, UL(8), UL(9), UL(10), UL(11));
3002                 D3("daplka_privdata_post: SA[12] %d.%d.%d.%d\n",
3003                     UL(12), UL(13), UL(14), UL(15));
3004         } else {
3005                 /* non-conformed third parties */
3006                 hip->hi_ipv = AF_UNSPEC;
3007                 bzero(hip->hi_saaddr, DAPL_ATS_NBYTES);
3008         }
3009 }
3010 
3011 /*
3012  * this function is called by evd_wait and evd_dequeue to wait for
3013  * connection events and CQ notifications. typically this function
3014  * is called when the userland CQ is empty and the client has
3015  * specified a non-zero timeout to evd_wait. if the client is
3016  * interested in CQ events, the CQ must be armed in userland prior
3017  * to calling this function.
3018  */
3019 /* ARGSUSED */
3020 static int
3021 daplka_event_poll(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3022         cred_t *cred, int *rvalp)
3023 {
3024         daplka_evd_resource_t   *evd_rp = NULL;
3025         dapl_event_poll_t       args;
3026         daplka_evd_event_t      *head;
3027         dapl_ib_event_t         evp_arr[NUM_EVENTS_PER_POLL];
3028         dapl_ib_event_t         *evp;
3029         dapl_ib_event_t         *evp_start;
3030         size_t                  evp_size;
3031         int                     threshold;
3032         clock_t                 timeout;
3033         uint32_t                max_events;
3034         uint32_t                num_events = 0;
3035         void                    *pd;
3036         ibt_priv_data_len_t     n;
3037         int                     retval = 0;
3038         int                     rc;
3039 
3040         retval = daplka_event_poll_copyin(arg, &args, mode);
3041         if (retval != 0) {
3042                 return (EFAULT);
3043         }
3044 
3045         if ((args.evp_num_ev > 0) && (args.evp_ep == NULL)) {
3046                 DERR("event_poll: evp_ep cannot be NULL if num_wc=%d",
3047                     args.evp_num_ev);
3048                 return (EINVAL);
3049         }
3050         /*
3051          * Note: dequeue requests have a threshold = 0, timeout = 0
3052          */
3053         threshold = args.evp_threshold;
3054 
3055         max_events = args.evp_num_ev;
3056         /* ensure library is passing sensible values */
3057         if (max_events < threshold) {
3058                 DERR("event_poll: max_events(%d) < threshold(%d)\n",
3059                     max_events, threshold);
3060                 return (EINVAL);
3061         }
3062         /* Do a sanity check to avoid excessive memory allocation */
3063         if (max_events > DAPL_EVD_MAX_EVENTS) {
3064                 DERR("event_poll: max_events(%d) > %d",
3065                     max_events, DAPL_EVD_MAX_EVENTS);
3066                 return (EINVAL);
3067         }
3068         D4("event_poll: threshold(%d) timeout(0x%llx) max_events(%d)\n",
3069             threshold, (longlong_t)args.evp_timeout, max_events);
3070 
3071         /* get evd resource */
3072         evd_rp = (daplka_evd_resource_t *)
3073             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evp_evd_hkey);
3074         if (evd_rp == NULL) {
3075                 DERR("event_poll: cannot find evd resource\n");
3076                 return (EINVAL);
3077         }
3078         ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3079 
3080         /*
3081          * Use event array on the stack if possible
3082          */
3083         if (max_events <= NUM_EVENTS_PER_POLL) {
3084                 evp_start = evp = &evp_arr[0];
3085         } else {
3086                 evp_size = max_events * sizeof (dapl_ib_event_t);
3087                 evp_start = evp = kmem_zalloc(evp_size, daplka_km_flags);
3088                 if (evp == NULL) {
3089                         DERR("event_poll: kmem_zalloc failed, evp_size %d",
3090                             evp_size);
3091                         retval = ENOMEM;
3092                         goto cleanup;
3093                 }
3094         }
3095 
3096         /*
3097          * The Event poll algorithm is as follows -
3098          * The library passes a buffer big enough to hold "max_events"
3099          * events. max_events is >= threshold. If at any stage we get
3100          * max_events no. of events we bail. The events are polled in
3101          * the following order -
3102          * 1) Check for CR events in the evd_cr_events list
3103          * 2) Check for Connection events in the evd_connection_events list
3104          *
3105          * If after the above 2 steps we don't have enough(>= threshold) events
3106          * we block for CQ notification and sleep. Upon being woken up we start
3107          * at step 1 again.
3108          */
3109 
3110         /*
3111          * Note: this could be 0 or INFINITE or anyother value in microsec
3112          */
3113         if (args.evp_timeout > 0) {
3114                 if (args.evp_timeout >= LONG_MAX) {
3115                         timeout = LONG_MAX;
3116                 } else {
3117                         clock_t curr_time = ddi_get_lbolt();
3118 
3119                         timeout = curr_time +
3120                             drv_usectohz((clock_t)args.evp_timeout);
3121                         /*
3122                          * use the max value if we wrapped around
3123                          */
3124                         if (timeout <= curr_time) {
3125                                 timeout = LONG_MAX;
3126                         }
3127                 }
3128         } else {
3129                 timeout = 0;
3130         }
3131 
3132         mutex_enter(&evd_rp->evd_lock);
3133         for (;;) {
3134                 /*
3135                  * If this evd is waiting for CM events check that now.
3136                  */
3137                 if ((evd_rp->evd_flags & DAT_EVD_CR_FLAG) &&
3138                     (evd_rp->evd_cr_events.eel_num_elements > 0)) {
3139                         /* dequeue events from evd_cr_events list */
3140                         while (head = daplka_evd_event_dequeue(
3141                             &evd_rp->evd_cr_events)) {
3142                                 /*
3143                                  * populate the evp array
3144                                  */
3145                                 evp[num_events].ibe_ev_family = DAPL_CR_EVENTS;
3146                                 evp[num_events].ibe_ce.ibce_event =
3147                                     head->ee_cmev.ec_cm_ev_type;
3148                                 evp[num_events].ibe_ce.ibce_cookie =
3149                                     (uint64_t)head->ee_cmev.ec_cm_cookie;
3150                                 evp[num_events].ibe_ce.ibce_psep_cookie =
3151                                     head->ee_cmev.ec_cm_psep_cookie;
3152                                 daplka_crevent_privdata_post(ia_rp,
3153                                     &evp[num_events], head);
3154                                 kmem_free(head, sizeof (daplka_evd_event_t));
3155 
3156                                 if (++num_events == max_events) {
3157                                         mutex_exit(&evd_rp->evd_lock);
3158                                         goto maxevent_reached;
3159                                 }
3160                         }
3161                 }
3162 
3163                 if ((evd_rp->evd_flags & DAT_EVD_CONNECTION_FLAG) &&
3164                     (evd_rp->evd_conn_events.eel_num_elements > 0)) {
3165                         /* dequeue events from evd_connection_events list */
3166                         while ((head = daplka_evd_event_dequeue
3167                             (&evd_rp->evd_conn_events))) {
3168                                 /*
3169                                  * populate the evp array -
3170                                  *
3171                                  */
3172                                 if (head->ee_cmev.ec_cm_is_passive) {
3173                                         evp[num_events].ibe_ev_family =
3174                                             DAPL_PASSIVE_CONNECTION_EVENTS;
3175                                 } else {
3176                                         evp[num_events].ibe_ev_family =
3177                                             DAPL_ACTIVE_CONNECTION_EVENTS;
3178                                 }
3179                                 evp[num_events].ibe_ce.ibce_event =
3180                                     head->ee_cmev.ec_cm_ev_type;
3181                                 evp[num_events].ibe_ce.ibce_cookie =
3182                                     (uint64_t)head->ee_cmev.ec_cm_cookie;
3183                                 evp[num_events].ibe_ce.ibce_psep_cookie =
3184                                     head->ee_cmev.ec_cm_psep_cookie;
3185 
3186                                 if (head->ee_cmev.ec_cm_ev_priv_data_len > 0) {
3187                                         pd = head->ee_cmev.ec_cm_ev_priv_data;
3188                                         n = head->
3189                                             ee_cmev.ec_cm_ev_priv_data_len;
3190                                         bcopy(pd, (void *)evp[num_events].
3191                                             ibe_ce.ibce_priv_data_ptr, n);
3192                                         evp[num_events].ibe_ce.
3193                                             ibce_priv_data_size = n;
3194                                         kmem_free(pd, n);
3195                                 }
3196 
3197                                 kmem_free(head, sizeof (daplka_evd_event_t));
3198 
3199                                 if (++num_events == max_events) {
3200                                         mutex_exit(&evd_rp->evd_lock);
3201                                         goto maxevent_reached;
3202                                 }
3203                         }
3204                 }
3205 
3206                 if ((evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) &&
3207                     (evd_rp->evd_async_events.eel_num_elements > 0)) {
3208                         /* dequeue events from evd_async_events list */
3209                         while (head = daplka_evd_event_dequeue(
3210                             &evd_rp->evd_async_events)) {
3211                                 /*
3212                                  * populate the evp array
3213                                  */
3214                                 evp[num_events].ibe_ev_family =
3215                                     DAPL_ASYNC_EVENTS;
3216                                 evp[num_events].ibe_async.ibae_type =
3217                                     head->ee_aev.ibae_type;
3218                                 evp[num_events].ibe_async.ibae_hca_guid =
3219                                     head->ee_aev.ibae_hca_guid;
3220                                 evp[num_events].ibe_async.ibae_cookie =
3221                                     head->ee_aev.ibae_cookie;
3222                                 evp[num_events].ibe_async.ibae_port =
3223                                     head->ee_aev.ibae_port;
3224 
3225                                 kmem_free(head, sizeof (daplka_evd_event_t));
3226 
3227                                 if (++num_events == max_events) {
3228                                         break;
3229                                 }
3230                         }
3231                 }
3232 
3233                 /*
3234                  * We have sufficient events for this call so no need to wait
3235                  */
3236                 if ((threshold > 0) && (num_events >= threshold)) {
3237                         mutex_exit(&evd_rp->evd_lock);
3238                         break;
3239                 }
3240 
3241                 evd_rp->evd_waiters++;
3242                 /*
3243                  * There are no new events and a timeout was specified.
3244                  * Note: for CQ events threshold is 0 but timeout is
3245                  * not necessarily 0.
3246                  */
3247                 while ((evd_rp->evd_newevents == DAPLKA_EVD_NO_EVENTS) &&
3248                     timeout) {
3249                         retval = DAPLKA_EVD_WAIT(&evd_rp->evd_cv,
3250                             &evd_rp->evd_lock, timeout);
3251                         if (retval == 0) {
3252                                 retval = EINTR;
3253                                 break;
3254                         } else if (retval == -1) {
3255                                 retval = ETIME;
3256                                 break;
3257                         } else {
3258                                 retval = 0;
3259                                 continue;
3260                         }
3261                 }
3262                 evd_rp->evd_waiters--;
3263                 if (evd_rp->evd_newevents != DAPLKA_EVD_NO_EVENTS) {
3264                         /*
3265                          * If we got woken up by the CQ handler due to events
3266                          * in the CQ. Need to go to userland to check for
3267                          * CQ events. Or if we were woken up due to S/W events
3268                          */
3269 
3270                         /* check for userland events only */
3271                         if (!(evd_rp->evd_newevents &
3272                             ~DAPLKA_EVD_ULAND_EVENTS)) {
3273                                 evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
3274                                 mutex_exit(&evd_rp->evd_lock);
3275                                 break;
3276                         }
3277                         /*
3278                          * Clear newevents since we are going to loopback
3279                          * back and check for both CM and CQ events
3280                          */
3281                         evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
3282                 } else { /* error */
3283                         mutex_exit(&evd_rp->evd_lock);
3284                         break;
3285                 }
3286         }
3287 
3288 maxevent_reached:
3289         args.evp_num_polled = num_events;
3290 
3291         /*
3292          * At this point retval might have a value that we want to return
3293          * back to the user. So the copyouts shouldn't tamper retval.
3294          */
3295         if (args.evp_num_polled > 0) { /* copyout the events */
3296                 rc = ddi_copyout(evp, args.evp_ep, args.evp_num_polled *
3297                     sizeof (dapl_ib_event_t), mode);
3298                 if (rc != 0) { /* XXX: we are losing events here */
3299                         DERR("event_poll: event array copyout error %d", rc);
3300                         retval = EFAULT;
3301                         goto cleanup;
3302                 }
3303                 rc = daplka_event_poll_copyout(&args, arg, mode);
3304                 if (rc != 0) {  /* XXX: we are losing events here */
3305                         DERR("event_poll: copyout error %d\n", rc);
3306                         retval = EFAULT;
3307                         goto cleanup;
3308                 }
3309         }
3310 
3311 cleanup:;
3312         if ((max_events > NUM_EVENTS_PER_POLL) && (evp_start != NULL)) {
3313                 kmem_free(evp_start, evp_size);
3314         }
3315 
3316         if (evd_rp != NULL) {
3317                 DAPLKA_RS_UNREF(evd_rp);
3318         }
3319         return (retval);
3320 }
3321 
3322 /* ARGSUSED */
3323 static int
3324 daplka_event_wakeup(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3325         cred_t *cred, int *rvalp)
3326 {
3327         dapl_event_wakeup_t     args;
3328         daplka_evd_resource_t   *evd_rp;
3329         int                     retval;
3330 
3331         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_event_wakeup_t),
3332             mode);
3333         if (retval != 0) {
3334                 DERR("event_wakeup: copyin error %d\n", retval);
3335                 return (EFAULT);
3336         }
3337 
3338         /* get evd resource */
3339         evd_rp = (daplka_evd_resource_t *)
3340             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evw_hkey);
3341         if (evd_rp == NULL) {
3342                 DERR("event_wakeup: cannot find evd resource\n");
3343                 return (EINVAL);
3344         }
3345         ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3346 
3347         daplka_evd_wakeup(evd_rp, NULL, NULL);
3348 
3349         DAPLKA_RS_UNREF(evd_rp);
3350 
3351         return (retval);
3352 }
3353 
3354 /* ARGSUSED */
3355 static int
3356 daplka_evd_modify_cno(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3357         cred_t *cred, int *rvalp)
3358 {
3359         dapl_evd_modify_cno_t   args;
3360         daplka_evd_resource_t   *evd_rp;
3361         daplka_cno_resource_t   *cno_rp;
3362         daplka_cno_resource_t   *old_cno_rp;
3363         int                     retval;
3364 
3365         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_modify_cno_t),
3366             mode);
3367         if (retval != 0) {
3368                 DERR("evd_modify_cno: copyin error %d\n", retval);
3369                 return (EFAULT);
3370         }
3371 
3372         /* get evd resource */
3373         evd_rp = (daplka_evd_resource_t *)
3374             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evmc_hkey);
3375         if (evd_rp == NULL) {
3376                 DERR("evd_modify_cno: cannot find evd resource\n");
3377                 retval = EINVAL;
3378                 goto cleanup;
3379         }
3380         ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3381 
3382         if (args.evmc_cno_hkey > 0) {
3383                 /* get cno resource corresponding to the new CNO */
3384                 cno_rp = (daplka_cno_resource_t *)
3385                     daplka_hash_lookup(&ia_rp->ia_cno_htbl,
3386                     args.evmc_cno_hkey);
3387                 if (cno_rp == NULL) {
3388                         DERR("evd_modify_cno: cannot find CNO resource\n");
3389                         retval = EINVAL;
3390                         goto cleanup;
3391                 }
3392                 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3393         } else {
3394                 cno_rp = NULL;
3395         }
3396 
3397         mutex_enter(&evd_rp->evd_lock);
3398         old_cno_rp = evd_rp->evd_cno_res;
3399         evd_rp->evd_cno_res = cno_rp;
3400         mutex_exit(&evd_rp->evd_lock);
3401 
3402         /*
3403          * drop the refcnt on the old CNO, the refcnt on the new CNO is
3404          * retained since the evd holds a reference to it.
3405          */
3406         if (old_cno_rp) {
3407                 DAPLKA_RS_UNREF(old_cno_rp);
3408         }
3409 
3410 cleanup:
3411         if (evd_rp) {
3412                 DAPLKA_RS_UNREF(evd_rp);
3413         }
3414 
3415         return (retval);
3416 }
3417 
3418 /*
3419  * Frees the EVD and associated resources.
3420  * If there are other threads still using this EVD, the destruction
3421  * will defer until the EVD's refcnt drops to zero.
3422  */
3423 /* ARGSUSED */
3424 static int
3425 daplka_evd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3426         cred_t *cred, int *rvalp)
3427 {
3428         daplka_evd_resource_t   *evd_rp = NULL;
3429         daplka_async_evd_hkey_t *curr;
3430         daplka_async_evd_hkey_t *prev;
3431         dapl_evd_free_t         args;
3432         int                     retval = 0;
3433 
3434         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_free_t), mode);
3435         if (retval != 0) {
3436                 DERR("evd_free: copyin error %d\n", retval);
3437                 return (EFAULT);
3438         }
3439         retval = daplka_hash_remove(&ia_rp->ia_evd_htbl, args.evf_hkey,
3440             (void **)&evd_rp);
3441         if (retval != 0 || evd_rp == NULL) {
3442                 DERR("evd_free: cannot find evd resource\n");
3443                 return (EINVAL);
3444         }
3445         ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3446 
3447         /* If this is an async evd remove it from the IA's async evd list */
3448         if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
3449                 mutex_enter(&ia_rp->ia_lock);
3450                 curr = prev = ia_rp->ia_async_evd_hkeys;
3451                 while (curr != NULL) {
3452                         if (curr->aeh_evd_hkey == args.evf_hkey) {
3453                                 /* unlink curr from the list */
3454                                 if (curr == prev) {
3455                                         /*
3456                                          * if first element in the list update
3457                                          * the list head
3458                                          */
3459                                         ia_rp->ia_async_evd_hkeys =
3460                                             curr->aeh_next;
3461                                 } else {
3462                                         prev->aeh_next = curr->aeh_next;
3463                                 }
3464                                 break;
3465                         }
3466                         prev = curr;
3467                         curr = curr->aeh_next;
3468                 }
3469                 mutex_exit(&ia_rp->ia_lock);
3470                 /* free the curr entry */
3471                 kmem_free(curr, sizeof (daplka_async_evd_hkey_t));
3472         }
3473 
3474         /* UNREF calls the actual free function when refcnt is zero */
3475         DAPLKA_RS_UNREF(evd_rp);
3476         return (0);
3477 }
3478 
3479 /*
3480  * destroys EVD resource.
3481  * called when refcnt drops to zero.
3482  */
3483 static int
3484 daplka_evd_destroy(daplka_resource_t *gen_rp)
3485 {
3486         daplka_evd_resource_t   *evd_rp = (daplka_evd_resource_t *)gen_rp;
3487         ibt_status_t            status;
3488         daplka_evd_event_t      *evt;
3489         ibt_priv_data_len_t     len;
3490 
3491         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp))
3492         D3("evd_destroy: entering, evd_rp 0x%p, rnum %d\n",
3493             evd_rp, DAPLKA_RS_RNUM(evd_rp));
3494         /*
3495          * free CQ
3496          */
3497         if (evd_rp->evd_cq_hdl) {
3498                 ibt_set_cq_handler(evd_rp->evd_cq_hdl, NULL, NULL);
3499                 mutex_enter(&daplka_dev->daplka_mutex);
3500                 ibt_set_cq_private(evd_rp->evd_cq_hdl, NULL);
3501                 mutex_exit(&daplka_dev->daplka_mutex);
3502 
3503                 status = daplka_ibt_free_cq(evd_rp, evd_rp->evd_cq_hdl);
3504                 if (status != IBT_SUCCESS) {
3505                         DERR("evd_destroy: ibt_free_cq returned %d\n", status);
3506                 }
3507                 evd_rp->evd_cq_hdl = NULL;
3508                 D2("evd_destroy: cq freed, rnum %d\n", DAPLKA_RS_RNUM(evd_rp));
3509         }
3510 
3511         /*
3512          * release reference on CNO
3513          */
3514         if (evd_rp->evd_cno_res != NULL) {
3515                 mutex_enter(&evd_rp->evd_cno_res->cno_lock);
3516                 if (evd_rp->evd_cno_res->cno_evd_cookie ==
3517                     evd_rp->evd_cookie) {
3518                         evd_rp->evd_cno_res->cno_evd_cookie = 0;
3519                 }
3520                 mutex_exit(&evd_rp->evd_cno_res->cno_lock);
3521                 DAPLKA_RS_UNREF(evd_rp->evd_cno_res);
3522                 evd_rp->evd_cno_res = NULL;
3523         }
3524 
3525         /*
3526          * discard all remaining events
3527          */
3528         mutex_enter(&evd_rp->evd_lock);
3529         while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_cr_events))) {
3530                 D2("evd_destroy: discarding CR event: %d\n",
3531                     evt->ee_cmev.ec_cm_ev_type);
3532                 len = evt->ee_cmev.ec_cm_ev_priv_data_len;
3533                 if (len > 0) {
3534                         kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
3535                         evt->ee_cmev.ec_cm_ev_priv_data = NULL;
3536                         evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
3537                 }
3538                 kmem_free(evt, sizeof (*evt));
3539         }
3540         ASSERT(evd_rp->evd_cr_events.eel_num_elements == 0);
3541 
3542         while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_conn_events))) {
3543                 D2("evd_destroy: discarding CONN event: %d\n",
3544                     evt->ee_cmev.ec_cm_ev_type);
3545                 len = evt->ee_cmev.ec_cm_ev_priv_data_len;
3546                 if (len > 0) {
3547                         kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
3548                         evt->ee_cmev.ec_cm_ev_priv_data = NULL;
3549                         evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
3550                 }
3551                 kmem_free(evt, sizeof (*evt));
3552         }
3553         ASSERT(evd_rp->evd_conn_events.eel_num_elements == 0);
3554 
3555         while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_async_events))) {
3556                 DERR("evd_destroy: discarding ASYNC event: %d\n",
3557                     evt->ee_aev.ibae_type);
3558                 kmem_free(evt, sizeof (*evt));
3559         }
3560         ASSERT(evd_rp->evd_async_events.eel_num_elements == 0);
3561         mutex_exit(&evd_rp->evd_lock);
3562 
3563         mutex_destroy(&evd_rp->evd_lock);
3564         DAPLKA_RS_FINI(evd_rp);
3565         kmem_free(evd_rp, sizeof (daplka_evd_resource_t));
3566         D3("evd_destroy: exiting, evd_rp 0x%p\n", evd_rp);
3567         return (0);
3568 }
3569 
3570 static void
3571 daplka_hash_evd_free(void *obj)
3572 {
3573         daplka_evd_resource_t *evd_rp = (daplka_evd_resource_t *)obj;
3574 
3575         ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3576         DAPLKA_RS_UNREF(evd_rp);
3577 }
3578 
3579 /*
3580  * this handler fires when new completions arrive.
3581  */
3582 /* ARGSUSED */
3583 static void
3584 daplka_cq_handler(ibt_cq_hdl_t ibt_cq, void *arg)
3585 {
3586         D3("cq_handler: fired setting evd_newevents\n");
3587         daplka_evd_wakeup((daplka_evd_resource_t *)arg, NULL, NULL);
3588 }
3589 
3590 /*
3591  * this routine wakes up a client from evd_wait. if evtq and evt
3592  * are non-null, the event evt will be enqueued prior to waking
3593  * up the client. if the evd is associated with a CNO and if there
3594  * are no waiters on the evd, the CNO will be notified.
3595  */
3596 static void
3597 daplka_evd_wakeup(daplka_evd_resource_t *evd_rp, daplka_evd_event_list_t *evtq,
3598         daplka_evd_event_t *evt)
3599 {
3600         uint32_t waiters = 0;
3601 
3602         mutex_enter(&evd_rp->evd_lock);
3603         if (evtq != NULL && evt != NULL) {
3604                 ASSERT(evtq == &evd_rp->evd_cr_events ||
3605                     evtq == &evd_rp->evd_conn_events ||
3606                     evtq == &evd_rp->evd_async_events);
3607                 daplka_evd_event_enqueue(evtq, evt);
3608                 ASSERT((evtq->eel_event_type == DAPLKA_EVD_CM_EVENTS) ||
3609                     (evtq->eel_event_type == DAPLKA_EVD_ASYNC_EVENTS));
3610                 evd_rp->evd_newevents |= evtq->eel_event_type;
3611         } else {
3612                 evd_rp->evd_newevents |= DAPLKA_EVD_ULAND_EVENTS;
3613         }
3614         waiters = evd_rp->evd_waiters;
3615         cv_broadcast(&evd_rp->evd_cv);
3616         mutex_exit(&evd_rp->evd_lock);
3617 
3618         /*
3619          * only wakeup the CNO if there are no waiters on this evd.
3620          */
3621         if (evd_rp->evd_cno_res != NULL && waiters == 0) {
3622                 mutex_enter(&evd_rp->evd_cno_res->cno_lock);
3623                 evd_rp->evd_cno_res->cno_evd_cookie = evd_rp->evd_cookie;
3624                 cv_broadcast(&evd_rp->evd_cno_res->cno_cv);
3625                 mutex_exit(&evd_rp->evd_cno_res->cno_lock);
3626         }
3627 }
3628 
3629 /*
3630  * daplka_evd_event_enqueue adds elem to the end of the event list
3631  * The caller is expected to acquire appropriate locks before
3632  * calling enqueue
3633  */
3634 static void
3635 daplka_evd_event_enqueue(daplka_evd_event_list_t *evlist,
3636     daplka_evd_event_t *elem)
3637 {
3638         if (evlist->eel_tail) {
3639                 evlist->eel_tail->ee_next = elem;
3640                 evlist->eel_tail = elem;
3641         } else {
3642                 /* list is empty */
3643                 ASSERT(evlist->eel_head == NULL);
3644                 evlist->eel_head = elem;
3645                 evlist->eel_tail = elem;
3646         }
3647         evlist->eel_num_elements++;
3648 }
3649 
3650 /*
3651  * daplka_evd_event_dequeue removes and returns the first element of event
3652  * list. NULL is returned if the list is empty. The caller is expected to
3653  * acquire appropriate locks before calling enqueue.
3654  */
3655 static daplka_evd_event_t *
3656 daplka_evd_event_dequeue(daplka_evd_event_list_t *evlist)
3657 {
3658         daplka_evd_event_t *head;
3659 
3660         head = evlist->eel_head;
3661         if (head == NULL) {
3662                 return (NULL);
3663         }
3664 
3665         evlist->eel_head = head->ee_next;
3666         evlist->eel_num_elements--;
3667         /* if it was the last element update the tail pointer too */
3668         if (evlist->eel_head == NULL) {
3669                 ASSERT(evlist->eel_num_elements == 0);
3670                 evlist->eel_tail = NULL;
3671         }
3672         return (head);
3673 }
3674 
3675 /*
3676  * A CNO allows the client to wait for notifications from multiple EVDs.
3677  * To use a CNO, the client needs to follow the procedure below:
3678  * 1. allocate a CNO. this returns a cno_hkey that identifies the CNO.
3679  * 2. create one or more EVDs using the returned cno_hkey.
3680  * 3. call cno_wait. when one of the associated EVDs get notified, the
3681  *    CNO will also get notified. cno_wait will then return with a
3682  *    evd_cookie identifying the EVD that triggered the event.
3683  *
3684  * A note about cno_wait:
3685  * -unlike a EVD, a CNO does not maintain a queue of notifications. For
3686  *  example, suppose multiple EVDs triggered a CNO before the client calls
3687  *  cno_wait; when the client calls cno_wait, it will return with the
3688  *  evd_cookie that identifies the *last* EVD that triggered the CNO. It
3689  *  is the responsibility of the client, upon returning from cno_wait, to
3690  *  check on all EVDs that can potentially trigger the CNO. the returned
3691  *  evd_cookie is only meant to be a hint. there is no guarantee that the
3692  *  EVD identified by the evd_cookie still contains an event or still
3693  *  exists by the time cno_wait returns.
3694  */
3695 
3696 /*
3697  * allocates a CNO.
3698  * the returned cno_hkey may subsequently be used in evd_create.
3699  */
3700 /* ARGSUSED */
3701 static int
3702 daplka_cno_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3703         cred_t *cred, int *rvalp)
3704 {
3705         dapl_cno_alloc_t        args;
3706         daplka_cno_resource_t   *cno_rp = NULL;
3707         uint64_t                cno_hkey = 0;
3708         boolean_t               inserted = B_FALSE;
3709         int                     retval = 0;
3710 
3711         cno_rp = kmem_zalloc(sizeof (*cno_rp), daplka_km_flags);
3712         if (cno_rp == NULL) {
3713                 DERR("cno_alloc: cannot allocate cno resource\n");
3714                 return (ENOMEM);
3715         }
3716         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cno_rp))
3717         DAPLKA_RS_INIT(cno_rp, DAPL_TYPE_CNO,
3718             DAPLKA_RS_RNUM(ia_rp), daplka_cno_destroy);
3719 
3720         mutex_init(&cno_rp->cno_lock, NULL, MUTEX_DRIVER, NULL);
3721         cv_init(&cno_rp->cno_cv, NULL, CV_DRIVER, NULL);
3722         cno_rp->cno_evd_cookie = 0;
3723 
3724         /* insert into cno hash table */
3725         retval = daplka_hash_insert(&ia_rp->ia_cno_htbl,
3726             &cno_hkey, (void *)cno_rp);
3727         if (retval != 0) {
3728                 DERR("cno_alloc: cannot insert cno resource\n");
3729                 goto cleanup;
3730         }
3731         inserted = B_TRUE;
3732         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cno_rp))
3733 
3734         /* return hkey to library */
3735         args.cno_hkey = cno_hkey;
3736 
3737         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cno_alloc_t),
3738             mode);
3739         if (retval != 0) {
3740                 DERR("cno_alloc: copyout error %d\n", retval);
3741                 retval = EFAULT;
3742                 goto cleanup;
3743         }
3744         return (0);
3745 
3746 cleanup:;
3747         if (inserted) {
3748                 daplka_cno_resource_t *free_rp = NULL;
3749 
3750                 (void) daplka_hash_remove(&ia_rp->ia_cno_htbl, cno_hkey,
3751                     (void **)&free_rp);
3752                 if (free_rp != cno_rp) {
3753                         DERR("cno_alloc: cannot remove cno\n");
3754                         /*
3755                          * we can only get here if another thread
3756                          * has completed the cleanup in cno_free
3757                          */
3758                         return (retval);
3759                 }
3760         }
3761         DAPLKA_RS_UNREF(cno_rp);
3762         return (retval);
3763 }
3764 
3765 /*
3766  * destroys a CNO.
3767  * this gets called when a CNO resource's refcnt drops to zero.
3768  */
3769 static int
3770 daplka_cno_destroy(daplka_resource_t *gen_rp)
3771 {
3772         daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)gen_rp;
3773 
3774         ASSERT(DAPLKA_RS_REFCNT(cno_rp) == 0);
3775         D2("cno_destroy: entering, cno_rp %p, rnum %d\n",
3776             cno_rp, DAPLKA_RS_RNUM(cno_rp));
3777 
3778         ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3779         cv_destroy(&cno_rp->cno_cv);
3780         mutex_destroy(&cno_rp->cno_lock);
3781 
3782         DAPLKA_RS_FINI(cno_rp);
3783         kmem_free(cno_rp, sizeof (daplka_cno_resource_t));
3784         D2("cno_destroy: exiting, cno_rp %p\n", cno_rp);
3785         return (0);
3786 }
3787 
3788 static void
3789 daplka_hash_cno_free(void *obj)
3790 {
3791         daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)obj;
3792 
3793         ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3794         DAPLKA_RS_UNREF(cno_rp);
3795 }
3796 
3797 /*
3798  * removes the CNO from the cno hash table and frees the CNO
3799  * if there are no references to it. if there are references to
3800  * it, the CNO will be destroyed when the last of the references
3801  * is released. once the CNO is removed from the cno hash table,
3802  * the client will no longer be able to call cno_wait on the CNO.
3803  */
3804 /* ARGSUSED */
3805 static int
3806 daplka_cno_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3807         cred_t *cred, int *rvalp)
3808 {
3809         daplka_cno_resource_t   *cno_rp = NULL;
3810         dapl_cno_free_t         args;
3811         int                     retval = 0;
3812 
3813         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_free_t), mode);
3814         if (retval != 0) {
3815                 DERR("cno_free: copyin error %d\n", retval);
3816                 return (EINVAL);
3817         }
3818 
3819         retval = daplka_hash_remove(&ia_rp->ia_cno_htbl,
3820             args.cnf_hkey, (void **)&cno_rp);
3821         if (retval != 0 || cno_rp == NULL) {
3822                 DERR("cno_free: cannot find cno resource\n");
3823                 return (EINVAL);
3824         }
3825         ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3826 
3827         /* UNREF calls the actual free function when refcnt is zero */
3828         DAPLKA_RS_UNREF(cno_rp);
3829         return (0);
3830 }
3831 
3832 /*
3833  * wait for a notification from one of the associated EVDs.
3834  */
3835 /* ARGSUSED */
3836 static int
3837 daplka_cno_wait(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3838         cred_t *cred, int *rvalp)
3839 {
3840         daplka_cno_resource_t   *cno_rp = NULL;
3841         dapl_cno_wait_t         args;
3842         int                     retval = 0;
3843         uint64_t                evd_cookie = 0;
3844         clock_t                 timeout, curr_time;
3845 
3846         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_wait_t), mode);
3847         if (retval != 0) {
3848                 DERR("cno_wait: copyin error %d\n", retval);
3849                 return (EINVAL);
3850         }
3851         /* get cno resource */
3852         cno_rp = (daplka_cno_resource_t *)
3853             daplka_hash_lookup(&ia_rp->ia_cno_htbl, args.cnw_hkey);
3854         if (cno_rp == NULL) {
3855                 DERR("cno_wait: cannot find cno resource\n");
3856                 return (EINVAL);
3857         }
3858         ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3859 
3860         curr_time = ddi_get_lbolt();
3861         timeout = curr_time + drv_usectohz(args.cnw_timeout);
3862 
3863         /*
3864          * use the max value if we wrapped around
3865          */
3866         if (args.cnw_timeout > 0 && timeout <= curr_time) {
3867                 /*
3868                  * clock_t (size long) changes between 32 and 64-bit kernels
3869                  */
3870                 timeout = LONG_MAX >> 4;
3871         }
3872         mutex_enter(&cno_rp->cno_lock);
3873         while (cno_rp->cno_evd_cookie == 0) {
3874                 int rval = 0;
3875 
3876                 rval = cv_timedwait_sig(&cno_rp->cno_cv,
3877                     &cno_rp->cno_lock, timeout);
3878                 if (rval == 0) {
3879                         DERR("cno_wait: interrupted\n");
3880                         mutex_exit(&cno_rp->cno_lock);
3881                         retval = EINTR;
3882                         goto cleanup;
3883                 } else if (rval == -1) {
3884                         DERR("cno_wait: timed out\n");
3885                         mutex_exit(&cno_rp->cno_lock);
3886                         retval = ETIME;
3887                         goto cleanup;
3888                 }
3889         }
3890         evd_cookie = cno_rp->cno_evd_cookie;
3891         cno_rp->cno_evd_cookie = 0;
3892         mutex_exit(&cno_rp->cno_lock);
3893 
3894         ASSERT(evd_cookie != 0);
3895         D2("cno_wait: returning evd_cookie 0x%p\n",
3896             (void *)(uintptr_t)evd_cookie);
3897         args.cnw_evd_cookie = evd_cookie;
3898         retval = ddi_copyout((void *)&args, (void *)arg,
3899             sizeof (dapl_cno_wait_t), mode);
3900         if (retval != 0) {
3901                 DERR("cno_wait: copyout error %d\n", retval);
3902                 retval = EFAULT;
3903                 goto cleanup;
3904         }
3905 
3906 cleanup:;
3907         if (cno_rp != NULL) {
3908                 DAPLKA_RS_UNREF(cno_rp);
3909         }
3910         return (retval);
3911 }
3912 
3913 /*
3914  * this function is called by the client when it decides to
3915  * accept a connection request. a connection request is generated
3916  * when the active side generates REQ MAD to a service point on
3917  * the destination node. this causes the CM service handler
3918  * (daplka_cm_service_req) on the passive side to be callee. This
3919  * handler will then enqueue this connection request to the backlog
3920  * array of the service point. A connection event containing the
3921  * backlog array index and connection request private data is passed
3922  * to the client's service point EVD (sp_evd_res). once the event
3923  * is passed up to the userland, the client may examine the request
3924  * to decide whether to call daplka_cr_accept or dapka_cr_reject.
3925  */
3926 /* ARGSUSED */
3927 static int
3928 daplka_cr_accept(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3929         cred_t *cred, int *rvalp)
3930 {
3931         daplka_ep_resource_t            *ep_rp = NULL;
3932         daplka_sp_resource_t            *sp_rp = NULL;
3933         dapl_cr_accept_t                args;
3934         daplka_sp_conn_pend_t           *conn;
3935         ibt_cm_proceed_reply_t          proc_reply;
3936         ibt_status_t                    status;
3937         uint16_t                        bkl_index;
3938         uint32_t                        old_state, new_state;
3939         int                             retval = 0;
3940         void                            *priv_data = NULL, *sid;
3941 
3942         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_accept_t),
3943             mode);
3944         if (retval != 0) {
3945                 DERR("cr_accept: copyin error %d\n", retval);
3946                 return (EFAULT);
3947         }
3948         if (args.cra_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
3949                 DERR("cr_accept: private data len (%d) exceeded "
3950                     "max size %d\n", args.cra_priv_sz,
3951                     DAPL_MAX_PRIVATE_DATA_SIZE);
3952                 return (EINVAL);
3953         }
3954         priv_data = (args.cra_priv_sz > 0) ? (void *)args.cra_priv : NULL;
3955 
3956         D2("cr_accept: priv(0x%p) priv_len(%u) psep(0x%llx)\n", priv_data,
3957             args.cra_priv_sz, (longlong_t)args.cra_bkl_cookie);
3958 
3959         /* get sp resource */
3960         sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
3961             args.cra_sp_hkey);
3962         if (sp_rp == NULL) {
3963                 DERR("cr_accept: cannot find sp resource\n");
3964                 return (EINVAL);
3965         }
3966         ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
3967 
3968         /* get ep resource */
3969         ep_rp = (daplka_ep_resource_t *)daplka_hash_lookup(&ia_rp->ia_ep_htbl,
3970             args.cra_ep_hkey);
3971         if (ep_rp == NULL) {
3972                 DERR("cr_accept: cannot find ep resource\n");
3973                 retval = EINVAL;
3974                 goto cleanup;
3975         }
3976         ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
3977 
3978         /*
3979          * accept is only allowed if ep_state is CLOSED.
3980          * note that after this point, the ep_state is frozen
3981          * (i.e. TRANSITIONING) until we transition ep_state
3982          * to ACCEPTING or back to CLOSED if we get an error.
3983          */
3984         new_state = old_state = daplka_ep_get_state(ep_rp);
3985         if (old_state != DAPLKA_EP_STATE_CLOSED) {
3986                 DERR("cr_accept: invalid ep state %d\n", old_state);
3987                 retval = EINVAL;
3988                 goto cleanup;
3989         }
3990 
3991         mutex_enter(&sp_rp->sp_lock);
3992         bkl_index = DAPLKA_GET_PSEP_INDEX(args.cra_bkl_cookie);
3993         /*
3994          * make sure the backlog index is not bogus.
3995          */
3996         if (bkl_index >= sp_rp->sp_backlog_size) {
3997                 DERR("cr_accept: invalid backlog index 0x%llx %d\n",
3998                     (longlong_t)args.cra_bkl_cookie, bkl_index);
3999                 mutex_exit(&sp_rp->sp_lock);
4000                 retval = EINVAL;
4001                 goto cleanup;
4002         }
4003         /*
4004          * make sure the backlog index indeed refers
4005          * to a pending connection.
4006          */
4007         conn = &sp_rp->sp_backlog[bkl_index];
4008         if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4009                 DERR("cr_accept: invalid conn state %d\n",
4010                     conn->spcp_state);
4011                 mutex_exit(&sp_rp->sp_lock);
4012                 retval = EINVAL;
4013                 goto cleanup;
4014         }
4015         if (conn->spcp_sid == NULL) {
4016                 DERR("cr_accept: sid == NULL\n");
4017                 mutex_exit(&sp_rp->sp_lock);
4018                 retval = EINVAL;
4019                 goto cleanup;
4020         }
4021         if (ep_rp->ep_chan_hdl == NULL) {
4022                 /*
4023                  * a ep_rp with a NULL chan_hdl is impossible.
4024                  */
4025                 DERR("cr_accept: ep_chan_hdl == NULL\n");
4026                 mutex_exit(&sp_rp->sp_lock);
4027                 ASSERT(B_FALSE);
4028                 retval = EINVAL;
4029                 goto cleanup;
4030         }
4031         proc_reply.rep.cm_channel = ep_rp->ep_chan_hdl;
4032         proc_reply.rep.cm_rdma_ra_out = conn->spcp_rdma_ra_out;
4033         proc_reply.rep.cm_rdma_ra_in = conn->spcp_rdma_ra_in;
4034         proc_reply.rep.cm_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;
4035         sid = conn->spcp_sid;
4036 
4037         /*
4038          * this clears our slot in the backlog array.
4039          * this slot may now be used by other pending connections.
4040          */
4041         conn->spcp_sid = NULL;
4042         conn->spcp_state = DAPLKA_SPCP_INIT;
4043         conn->spcp_req_len = 0;
4044         mutex_exit(&sp_rp->sp_lock);
4045 
4046         /*
4047          * Set the unique cookie corresponding to the CR to this EP
4048          * so that is can be used in passive side CM callbacks
4049          */
4050         ep_rp->ep_psep_cookie = args.cra_bkl_cookie;
4051 
4052         status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, IBT_CM_ACCEPT,
4053             &proc_reply, priv_data, (ibt_priv_data_len_t)args.cra_priv_sz);
4054 
4055         if (status != IBT_SUCCESS) {
4056                 DERR("cr_accept: ibt_cm_proceed returned %d\n", status);
4057                 *rvalp = (int)status;
4058                 retval = 0;
4059         }
4060         /*
4061          * note that the CM handler may actually be called at this
4062          * point. but since ep_state is still in TRANSITIONING, the
4063          * handler will wait until we transition to ACCEPTING. this
4064          * prevents the case where we set ep_state to ACCEPTING after
4065          * daplka_service_conn_est sets ep_state to CONNECTED.
4066          */
4067         new_state = DAPLKA_EP_STATE_ACCEPTING;
4068 
4069 cleanup:;
4070         if (sp_rp != NULL) {
4071                 DAPLKA_RS_UNREF(sp_rp);
4072         }
4073         if (ep_rp != NULL) {
4074                 daplka_ep_set_state(ep_rp, old_state, new_state);
4075                 DAPLKA_RS_UNREF(ep_rp);
4076         }
4077         return (retval);
4078 }
4079 
4080 /*
4081  * this function is called by the client to reject a
4082  * connection request.
4083  */
4084 /* ARGSUSED */
4085 static int
4086 daplka_cr_reject(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4087         cred_t *cred, int *rvalp)
4088 {
4089         dapl_cr_reject_t        args;
4090         daplka_sp_resource_t    *sp_rp = NULL;
4091         daplka_sp_conn_pend_t   *conn;
4092         ibt_cm_proceed_reply_t  proc_reply;
4093         ibt_cm_status_t         proc_status;
4094         ibt_status_t            status;
4095         uint16_t                bkl_index;
4096         int                     retval = 0;
4097         void                    *sid;
4098 
4099         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_reject_t),
4100             mode);
4101         if (retval != 0) {
4102                 DERR("cr_reject: copyin error %d\n", retval);
4103                 return (EFAULT);
4104         }
4105         /* get sp resource */
4106         sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
4107             args.crr_sp_hkey);
4108         if (sp_rp == NULL) {
4109                 DERR("cr_reject: cannot find sp resource\n");
4110                 return (EINVAL);
4111         }
4112         ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4113 
4114         D2("cr_reject: psep(0x%llx)\n", (longlong_t)args.crr_bkl_cookie);
4115 
4116         mutex_enter(&sp_rp->sp_lock);
4117         bkl_index = DAPLKA_GET_PSEP_INDEX(args.crr_bkl_cookie);
4118         /*
4119          * make sure the backlog index is not bogus.
4120          */
4121         if (bkl_index >= sp_rp->sp_backlog_size) {
4122                 DERR("cr_reject: invalid backlog index 0x%llx %d\n",
4123                     (longlong_t)args.crr_bkl_cookie, bkl_index);
4124                 mutex_exit(&sp_rp->sp_lock);
4125                 retval = EINVAL;
4126                 goto cleanup;
4127         }
4128         /*
4129          * make sure the backlog index indeed refers
4130          * to a pending connection.
4131          */
4132         conn = &sp_rp->sp_backlog[bkl_index];
4133         if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4134                 DERR("cr_reject: invalid conn state %d\n",
4135                     conn->spcp_state);
4136                 mutex_exit(&sp_rp->sp_lock);
4137                 retval = EINVAL;
4138                 goto cleanup;
4139         }
4140         if (conn->spcp_sid == NULL) {
4141                 DERR("cr_reject: sid == NULL\n");
4142                 mutex_exit(&sp_rp->sp_lock);
4143                 retval = EINVAL;
4144                 goto cleanup;
4145         }
4146         bzero(&proc_reply, sizeof (proc_reply));
4147         sid = conn->spcp_sid;
4148 
4149         /*
4150          * this clears our slot in the backlog array.
4151          * this slot may now be used by other pending connections.
4152          */
4153         conn->spcp_sid = NULL;
4154         conn->spcp_state = DAPLKA_SPCP_INIT;
4155         conn->spcp_req_len = 0;
4156 
4157         switch (args.crr_reason) {
4158         case DAPL_IB_CM_REJ_REASON_CONSUMER_REJ:
4159                 /* results in IBT_CM_CONSUMER as the reason for reject */
4160                 proc_status = IBT_CM_REJECT;
4161                 break;
4162         case DAPL_IB_CME_LOCAL_FAILURE:
4163                 /*FALLTHRU*/
4164         case DAPL_IB_CME_DESTINATION_UNREACHABLE:
4165                 /* results in IBT_CM_NO_RESC as the reason for reject */
4166                 proc_status = IBT_CM_NO_RESOURCE;
4167                 break;
4168         default:
4169                 /* unexpect reason code */
4170                 ASSERT(!"unexpected reject reason code");
4171                 proc_status = IBT_CM_NO_RESOURCE;
4172                 break;
4173         }
4174 
4175         mutex_exit(&sp_rp->sp_lock);
4176 
4177         status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, proc_status,
4178             &proc_reply, NULL, 0);
4179 
4180         if (status != IBT_SUCCESS) {
4181                 DERR("cr_reject: ibt_cm_proceed returned %d\n", status);
4182                 *rvalp = (int)status;
4183                 retval = 0;
4184         }
4185 
4186 cleanup:;
4187         if (sp_rp != NULL) {
4188                 DAPLKA_RS_UNREF(sp_rp);
4189         }
4190         return (retval);
4191 }
4192 
4193 
4194 /*
4195  * daplka_sp_match is used by daplka_hash_walk for finding SPs
4196  */
4197 typedef struct daplka_sp_match_s {
4198         uint64_t                spm_conn_qual;
4199         daplka_sp_resource_t    *spm_sp_rp;
4200 } daplka_sp_match_t;
4201 _NOTE(SCHEME_PROTECTS_DATA("daplka", daplka_sp_match_s::spm_sp_rp))
4202 
4203 static int
4204 daplka_sp_match(void *objp, void *arg)
4205 {
4206         daplka_sp_resource_t    *sp_rp = (daplka_sp_resource_t *)objp;
4207 
4208         ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4209         if (sp_rp->sp_conn_qual ==
4210             ((daplka_sp_match_t *)arg)->spm_conn_qual) {
4211                 ((daplka_sp_match_t *)arg)->spm_sp_rp = sp_rp;
4212                 D2("daplka_sp_match: found sp, conn_qual %016llu\n",
4213                     (longlong_t)((daplka_sp_match_t *)arg)->spm_conn_qual);
4214                 DAPLKA_RS_REF(sp_rp);
4215                 return (1);
4216         }
4217         return (0);
4218 }
4219 
4220 /*
4221  * cr_handoff allows the client to handoff a connection request from
4222  * one service point to another.
4223  */
4224 /* ARGSUSED */
4225 static int
4226 daplka_cr_handoff(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4227         cred_t *cred, int *rvalp)
4228 {
4229         dapl_cr_handoff_t               args;
4230         daplka_sp_resource_t            *sp_rp = NULL, *new_sp_rp = NULL;
4231         daplka_sp_conn_pend_t           *conn;
4232         daplka_sp_match_t               sp_match;
4233         ibt_cm_event_t                  fake_event;
4234         ibt_cm_status_t                 cm_status;
4235         ibt_status_t                    status;
4236         uint16_t                        bkl_index;
4237         void                            *sid, *priv = NULL;
4238         int                             retval = 0, priv_len = 0;
4239 
4240         D3("cr_handoff: entering\n");
4241         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_handoff_t),
4242             mode);
4243         if (retval != 0) {
4244                 DERR("cr_handoff: copyin error %d\n", retval);
4245                 return (EFAULT);
4246         }
4247         /* get sp resource */
4248         sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
4249             args.crh_sp_hkey);
4250         if (sp_rp == NULL) {
4251                 DERR("cr_handoff: cannot find sp resource\n");
4252                 return (EINVAL);
4253         }
4254         ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4255 
4256         /*
4257          * find the destination service point.
4258          */
4259         sp_match.spm_conn_qual = args.crh_conn_qual;
4260         sp_match.spm_sp_rp = NULL;
4261         daplka_hash_walk(&daplka_global_sp_htbl, daplka_sp_match,
4262             (void *)&sp_match, RW_READER);
4263 
4264         /*
4265          * return if we cannot find the service point
4266          */
4267         if (sp_match.spm_sp_rp == NULL) {
4268                 DERR("cr_handoff: new sp not found, conn qual = %llu\n",
4269                     (longlong_t)args.crh_conn_qual);
4270                 retval = EINVAL;
4271                 goto cleanup;
4272         }
4273         new_sp_rp = sp_match.spm_sp_rp;
4274 
4275         /*
4276          * the spec does not discuss the security implications of this
4277          * function. to be safe, we currently only allow processes
4278          * owned by the same user to handoff connection requests
4279          * to each other.
4280          */
4281         if (crgetruid(cred) != new_sp_rp->sp_ruid) {
4282                 DERR("cr_handoff: permission denied\n");
4283                 retval = EPERM;
4284                 goto cleanup;
4285         }
4286 
4287         D2("cr_handoff: psep(0x%llx)\n", (longlong_t)args.crh_bkl_cookie);
4288 
4289         mutex_enter(&sp_rp->sp_lock);
4290         bkl_index = DAPLKA_GET_PSEP_INDEX(args.crh_bkl_cookie);
4291         /*
4292          * make sure the backlog index is not bogus.
4293          */
4294         if (bkl_index >= sp_rp->sp_backlog_size) {
4295                 DERR("cr_handoff: invalid backlog index 0x%llx %d\n",
4296                     (longlong_t)args.crh_bkl_cookie, bkl_index);
4297                 mutex_exit(&sp_rp->sp_lock);
4298                 retval = EINVAL;
4299                 goto cleanup;
4300         }
4301         /*
4302          * make sure the backlog index indeed refers
4303          * to a pending connection.
4304          */
4305         conn = &sp_rp->sp_backlog[bkl_index];
4306         if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4307                 DERR("cr_handoff: invalid conn state %d\n",
4308                     conn->spcp_state);
4309                 mutex_exit(&sp_rp->sp_lock);
4310                 retval = EINVAL;
4311                 goto cleanup;
4312         }
4313         if (conn->spcp_sid == NULL) {
4314                 DERR("cr_handoff: sid == NULL\n");
4315                 mutex_exit(&sp_rp->sp_lock);
4316                 retval = EINVAL;
4317                 goto cleanup;
4318         }
4319         sid = conn->spcp_sid;
4320         priv = NULL;
4321         priv_len = conn->spcp_req_len;
4322         if (priv_len > 0) {
4323                 priv = kmem_zalloc(priv_len, daplka_km_flags);
4324                 if (priv == NULL) {
4325                         mutex_exit(&sp_rp->sp_lock);
4326                         retval = ENOMEM;
4327                         goto cleanup;
4328                 }
4329                 bcopy(conn->spcp_req_data, priv, priv_len);
4330         }
4331         /*
4332          * this clears our slot in the backlog array.
4333          * this slot may now be used by other pending connections.
4334          */
4335         conn->spcp_sid = NULL;
4336         conn->spcp_state = DAPLKA_SPCP_INIT;
4337         conn->spcp_req_len = 0;
4338         mutex_exit(&sp_rp->sp_lock);
4339 
4340         /* fill fake_event and call service_req handler */
4341         bzero(&fake_event, sizeof (fake_event));
4342         fake_event.cm_type = IBT_CM_EVENT_REQ_RCV;
4343         fake_event.cm_session_id = sid;
4344         fake_event.cm_priv_data_len = priv_len;
4345         fake_event.cm_priv_data = priv;
4346 
4347         cm_status = daplka_cm_service_req(new_sp_rp,
4348             &fake_event, NULL, priv, (ibt_priv_data_len_t)priv_len);
4349         if (cm_status != IBT_CM_DEFER) {
4350                 ibt_cm_proceed_reply_t  proc_reply;
4351 
4352                 DERR("cr_handoff: service_req returned %d\n", cm_status);
4353                 /*
4354                  * if for some reason cm_service_req failed, we
4355                  * reject the connection.
4356                  */
4357                 bzero(&proc_reply, sizeof (proc_reply));
4358 
4359                 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid,
4360                     IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
4361                 if (status != IBT_SUCCESS) {
4362                         DERR("cr_handoff: ibt_cm_proceed returned %d\n",
4363                             status);
4364                 }
4365                 *rvalp = (int)status;
4366                 retval = 0;
4367         }
4368 
4369 cleanup:;
4370         if (priv_len > 0 && priv != NULL) {
4371                 kmem_free(priv, priv_len);
4372         }
4373         if (new_sp_rp != NULL) {
4374                 DAPLKA_RS_UNREF(new_sp_rp);
4375         }
4376         if (sp_rp != NULL) {
4377                 DAPLKA_RS_UNREF(sp_rp);
4378         }
4379         D3("cr_handoff: exiting\n");
4380         return (retval);
4381 }
4382 
4383 /*
4384  * returns a list of hca attributes
4385  */
4386 /* ARGSUSED */
4387 static int
4388 daplka_ia_query(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4389         cred_t *cred, int *rvalp)
4390 {
4391         dapl_ia_query_t         args;
4392         int                     retval;
4393         ibt_hca_attr_t          *hcap;
4394 
4395         hcap = &ia_rp->ia_hca->hca_attr;
4396 
4397         /*
4398          * Take the ibt_hca_attr_t and stuff them into dapl_hca_attr_t
4399          */
4400         args.hca_attr.dhca_vendor_id = hcap->hca_vendor_id;
4401         args.hca_attr.dhca_device_id = hcap->hca_device_id;
4402         args.hca_attr.dhca_version_id = hcap->hca_version_id;
4403         args.hca_attr.dhca_max_chans = hcap->hca_max_chans;
4404         args.hca_attr.dhca_max_chan_sz = hcap->hca_max_chan_sz;
4405         args.hca_attr.dhca_max_sgl = hcap->hca_max_sgl;
4406         args.hca_attr.dhca_max_cq = hcap->hca_max_cq;
4407         args.hca_attr.dhca_max_cq_sz = hcap->hca_max_cq_sz;
4408         args.hca_attr.dhca_max_memr = hcap->hca_max_memr;
4409         args.hca_attr.dhca_max_memr_len = hcap->hca_max_memr_len;
4410         args.hca_attr.dhca_max_mem_win = hcap->hca_max_mem_win;
4411         args.hca_attr.dhca_max_rdma_in_chan = hcap->hca_max_rdma_in_chan;
4412         args.hca_attr.dhca_max_rdma_out_chan = hcap->hca_max_rdma_out_chan;
4413         args.hca_attr.dhca_max_partitions  = hcap->hca_max_partitions;
4414         args.hca_attr.dhca_nports  = hcap->hca_nports;
4415         args.hca_attr.dhca_node_guid  = hcap->hca_node_guid;
4416         args.hca_attr.dhca_max_pd = hcap->hca_max_pd;
4417         args.hca_attr.dhca_max_srqs = hcap->hca_max_srqs;
4418         args.hca_attr.dhca_max_srqs_sz = hcap->hca_max_srqs_sz;
4419         args.hca_attr.dhca_max_srq_sgl = hcap->hca_max_srq_sgl;
4420 
4421         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ia_query_t),
4422             mode);
4423         if (retval != 0) {
4424                 DERR("ia_query: copyout error %d\n", retval);
4425                 return (EFAULT);
4426         }
4427         return (0);
4428 }
4429 
4430 /*
4431  * This routine is passed to hash walk in the daplka_pre_mr_cleanup_callback,
4432  * it frees the mw embedded in the mw resource object.
4433  */
4434 
4435 /* ARGSUSED */
4436 static int
4437 daplka_mr_cb_freemw(void *objp, void *arg)
4438 {
4439         daplka_mw_resource_t    *mw_rp = (daplka_mw_resource_t *)objp;
4440         ibt_mw_hdl_t            mw_hdl;
4441         ibt_status_t            status;
4442 
4443         D3("mr_cb_freemw: entering, mw_rp 0x%p\n", mw_rp);
4444         DAPLKA_RS_REF(mw_rp);
4445 
4446         mutex_enter(&mw_rp->mw_lock);
4447         mw_hdl = mw_rp->mw_hdl;
4448         /*
4449          * we set mw_hdl to NULL so it won't get freed again
4450          */
4451         mw_rp->mw_hdl = NULL;
4452         mutex_exit(&mw_rp->mw_lock);
4453 
4454         if (mw_hdl != NULL) {
4455                 status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl, mw_hdl);
4456                 if (status != IBT_SUCCESS) {
4457                         DERR("mr_cb_freemw: ibt_free_mw returned %d\n", status);
4458                 }
4459                 D3("mr_cb_freemw: mw freed\n");
4460         }
4461 
4462         DAPLKA_RS_UNREF(mw_rp);
4463         return (0);
4464 }
4465 
4466 /*
4467  * This routine is called from HCA driver's umem lock undo callback
4468  * when the memory associated with an MR is being unmapped. In this callback
4469  * we free all the MW associated with the IA and post an unaffiliated
4470  * async event to tell the app that there was a catastrophic event.
4471  * This allows the HCA to deregister the MR in its callback processing.
4472  */
4473 static void
4474 daplka_pre_mr_cleanup_callback(void *arg1, void *arg2 /*ARGSUSED*/)
4475 {
4476         daplka_mr_resource_t    *mr_rp;
4477         daplka_ia_resource_t    *ia_rp;
4478 #ifdef  _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4479         ibt_async_event_t       event;
4480         ibt_hca_attr_t          *hca_attrp;
4481 #endif
4482         minor_t                 rnum;
4483 
4484         mr_rp = (daplka_mr_resource_t *)arg1;
4485         rnum = DAPLKA_RS_RNUM(mr_rp);
4486         daplka_shared_mr_free(mr_rp);
4487 
4488         ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
4489         if (ia_rp == NULL) {
4490                 DERR("daplka_mr_unlock_callback: resource not found, rnum %d\n",
4491                     rnum);
4492                 return;
4493         }
4494 
4495         DERR("daplka_mr_unlock_callback: resource(%p) rnum(%d)\n", ia_rp, rnum);
4496 
4497         mutex_enter(&ia_rp->ia_lock);
4498         /*
4499          * MW is being alloced OR MW freeze has already begun. In
4500          * both these cases we wait for that to complete before
4501          * continuing.
4502          */
4503         while ((ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS) ||
4504             (ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS)) {
4505                 cv_wait(&ia_rp->ia_cv, &ia_rp->ia_lock);
4506         }
4507 
4508         switch (ia_rp->ia_state) {
4509         case DAPLKA_IA_INIT:
4510                 ia_rp->ia_state = DAPLKA_IA_MW_FREEZE_IN_PROGRESS;
4511                 mutex_exit(&ia_rp->ia_lock);
4512                 break;
4513         case DAPLKA_IA_MW_FROZEN:
4514                 /* the mw on this ia have been freed */
4515                 D2("daplka_mr_unlock_callback: ia_state %d nothing to do\n",
4516                     ia_rp->ia_state);
4517                 mutex_exit(&ia_rp->ia_lock);
4518                 goto cleanup;
4519         default:
4520                 ASSERT(!"daplka_mr_unlock_callback: IA state invalid");
4521                 DERR("daplka_mr_unlock_callback: invalid ia_state %d\n",
4522                     ia_rp->ia_state);
4523                 mutex_exit(&ia_rp->ia_lock);
4524                 goto cleanup;
4525         }
4526 
4527         /*
4528          * Walk the mw hash table and free the mws. Acquire a writer
4529          * lock since we don't want anyone else traversing this tree
4530          * while we are freeing the MW.
4531          */
4532         daplka_hash_walk(&ia_rp->ia_mw_htbl, daplka_mr_cb_freemw, NULL,
4533             RW_WRITER);
4534 
4535         mutex_enter(&ia_rp->ia_lock);
4536         ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS);
4537         ia_rp->ia_state = DAPLKA_IA_MW_FROZEN;
4538         cv_broadcast(&ia_rp->ia_cv);
4539         mutex_exit(&ia_rp->ia_lock);
4540 
4541         /*
4542          * Currently commented out because Oracle skgxp is incapable
4543          * of handling async events correctly.
4544          */
4545 #ifdef  _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4546         /*
4547          * Enqueue an unaffiliated async error event to indicate this
4548          * IA has encountered a problem that caused the MW to freed up
4549          */
4550 
4551         /* Create a fake event, only relevant field is the hca_guid */
4552         bzero(&event, sizeof (ibt_async_event_t));
4553         hca_attrp = &ia_rp->ia_hca->hca_attr;
4554         event.ev_hca_guid = hca_attrp->hca_node_guid;
4555 
4556         daplka_async_event_create(IBT_ERROR_LOCAL_CATASTROPHIC, &event, 0,
4557             ia_rp);
4558 #endif  /* _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB */
4559 
4560 cleanup:;
4561         D2("daplka_mr_unlock_callback: resource(%p) done\n", ia_rp);
4562         DAPLKA_RS_UNREF(ia_rp);
4563 }
4564 
4565 /*
4566  * registers a memory region.
4567  * memory locking will be done by the HCA driver.
4568  */
4569 /* ARGSUSED */
4570 static int
4571 daplka_mr_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4572         cred_t *cred, int *rvalp)
4573 {
4574         boolean_t                       inserted = B_FALSE;
4575         daplka_mr_resource_t            *mr_rp;
4576         daplka_pd_resource_t            *pd_rp;
4577         dapl_mr_register_t              args;
4578         ibt_mr_data_in_t                mr_cb_data_in;
4579         uint64_t                        mr_hkey = 0;
4580         ibt_status_t                    status;
4581         int                             retval;
4582 
4583         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_register_t),
4584             mode);
4585         if (retval != 0) {
4586                 DERR("mr_register: copyin error %d\n", retval);
4587                 return (EINVAL);
4588         }
4589         mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
4590         if (mr_rp == NULL) {
4591                 DERR("mr_register: cannot allocate mr resource\n");
4592                 return (ENOMEM);
4593         }
4594         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
4595         DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
4596             DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
4597 
4598         mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
4599         mr_rp->mr_hca = ia_rp->ia_hca;
4600         mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
4601         mr_rp->mr_next = NULL;
4602         mr_rp->mr_shared_mr = NULL;
4603 
4604         /* get pd handle */
4605         pd_rp = (daplka_pd_resource_t *)
4606             daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mr_pd_hkey);
4607         if (pd_rp == NULL) {
4608                 DERR("mr_register: cannot find pd resource\n");
4609                 retval = EINVAL;
4610                 goto cleanup;
4611         }
4612         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
4613         mr_rp->mr_pd_res = pd_rp;
4614 
4615         mr_rp->mr_attr.mr_vaddr = args.mr_vaddr;
4616         mr_rp->mr_attr.mr_len = args.mr_len;
4617         mr_rp->mr_attr.mr_as = curproc->p_as;
4618         mr_rp->mr_attr.mr_flags = args.mr_flags | IBT_MR_NOSLEEP;
4619 
4620         D3("mr_register: mr_vaddr %p, mr_len %llu, mr_flags 0x%x\n",
4621             (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
4622             (longlong_t)mr_rp->mr_attr.mr_len,
4623             mr_rp->mr_attr.mr_flags);
4624 
4625         status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
4626             mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr, &mr_rp->mr_hdl,
4627             &mr_rp->mr_desc);
4628 
4629         if (status != IBT_SUCCESS) {
4630                 DERR("mr_register: ibt_register_mr error %d\n", status);
4631                 *rvalp = (int)status;
4632                 retval = 0;
4633                 goto cleanup;
4634         }
4635 
4636         mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
4637         mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
4638         mr_cb_data_in.mr_arg1 = (void *)mr_rp;
4639         mr_cb_data_in.mr_arg2 = NULL;
4640 
4641         /* Pass the service driver mr cleanup handler to the hca driver */
4642         status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
4643             IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
4644             &mr_cb_data_in, sizeof (mr_cb_data_in));
4645 
4646         if (status != IBT_SUCCESS) {
4647                 DERR("mr_register: ibt_ci_data_in error(%d) ver(%d)",
4648                     status, mr_cb_data_in.mr_rev);
4649                 *rvalp = (int)status;
4650                 retval = 0;
4651                 goto cleanup;
4652         }
4653 
4654         /* insert into mr hash table */
4655         retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
4656             &mr_hkey, (void *)mr_rp);
4657         if (retval != 0) {
4658                 DERR("mr_register: cannot insert mr resource into mr_htbl\n");
4659                 goto cleanup;
4660         }
4661         inserted = B_TRUE;
4662         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))
4663 
4664         args.mr_lkey = mr_rp->mr_desc.md_lkey;
4665         args.mr_rkey = mr_rp->mr_desc.md_rkey;
4666         args.mr_hkey = mr_hkey;
4667 
4668         retval = ddi_copyout((void *)&args, (void *)arg,
4669             sizeof (dapl_mr_register_t), mode);
4670         if (retval != 0) {
4671                 DERR("mr_register: copyout error %d\n", retval);
4672                 retval = EFAULT;
4673                 goto cleanup;
4674         }
4675         return (0);
4676 
4677 cleanup:;
4678         if (inserted) {
4679                 daplka_mr_resource_t *free_rp = NULL;
4680 
4681                 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
4682                     (void **)&free_rp);
4683                 if (free_rp != mr_rp) {
4684                         DERR("mr_register: cannot remove mr from hash table\n");
4685                         /*
4686                          * we can only get here if another thread
4687                          * has completed the cleanup in mr_deregister
4688                          */
4689                         return (retval);
4690                 }
4691         }
4692         DAPLKA_RS_UNREF(mr_rp);
4693         return (retval);
4694 }
4695 
4696 /*
4697  * registers a shared memory region.
4698  * the client calls this function with the intention to share the memory
4699  * region with other clients. it is assumed that, prior to calling this
4700  * function, the client(s) are already sharing parts of their address
4701  * space using a mechanism such as SYSV shared memory. the first client
4702  * that calls this function will create and insert a daplka_shared_mr_t
4703  * object into the global daplka_shared_mr_tree. this shared mr object
4704  * will be identified by a unique 40-byte key and will maintain a list
4705  * of mr resources. every time this function gets called with the same
4706  * 40-byte key, a new mr resource (containing a new mr handle generated
4707  * by ibt_register_mr or ibt_register_shared_mr) is created and inserted
4708  * into this list. similarly, every time a shared mr gets deregistered
4709  * or invalidated by a callback, the mr resource gets removed from this
4710  * list. the shared mr object has a reference count. when it drops to
4711  * zero, the shared mr object will be removed from the global avl tree
4712  * and be freed.
4713  */
4714 /* ARGSUSED */
4715 static int
4716 daplka_mr_register_shared(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4717         cred_t *cred, int *rvalp)
4718 {
4719         dapl_mr_register_shared_t       args;
4720         daplka_shared_mr_t              *smrp = NULL;
4721         daplka_shared_mr_t              tmp_smr;
4722         ibt_mr_data_in_t                mr_cb_data_in;
4723         avl_index_t                     where;
4724         boolean_t                       inserted = B_FALSE;
4725         daplka_mr_resource_t            *mr_rp = NULL;
4726         daplka_pd_resource_t            *pd_rp;
4727         uint64_t                        mr_hkey = 0;
4728         ibt_status_t                    status;
4729         int                             retval;
4730 
4731         retval = ddi_copyin((void *)arg, &args,
4732             sizeof (dapl_mr_register_shared_t), mode);
4733         if (retval != 0) {
4734                 DERR("mr_register_shared: copyin error %d\n", retval);
4735                 return (EINVAL);
4736         }
4737 
4738         mutex_enter(&daplka_shared_mr_lock);
4739         /*
4740          * find smrp from the global avl tree.
4741          * the 40-byte key is used as the lookup key.
4742          */
4743         tmp_smr.smr_cookie = args.mrs_shm_cookie;
4744         smrp = (daplka_shared_mr_t *)
4745             avl_find(&daplka_shared_mr_tree, &tmp_smr, &where);
4746         if (smrp != NULL) {
4747                 D2("mr_register_shared: smrp 0x%p, found cookie:\n"
4748                     "0x%016llx%016llx%016llx%016llx%016llx\n", smrp,
4749                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
4750                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
4751                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
4752                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
4753                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);
4754 
4755                 /*
4756                  * if the smrp exists, other threads could still be
4757                  * accessing it. we wait until they are done before
4758                  * we continue.
4759                  */
4760                 smrp->smr_refcnt++;
4761                 while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
4762                         D2("mr_register_shared: smrp 0x%p, "
4763                             "waiting in transitioning state, refcnt %d\n",
4764                             smrp, smrp->smr_refcnt);
4765                         cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
4766                 }
4767                 ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
4768                 D2("mr_register_shared: smrp 0x%p, refcnt %d, ready\n",
4769                     smrp, smrp->smr_refcnt);
4770 
4771                 /*
4772                  * we set smr_state to TRANSITIONING to temporarily
4773                  * prevent other threads from trying to access smrp.
4774                  */
4775                 smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
4776         } else {
4777                 D2("mr_register_shared: cannot find cookie:\n"
4778                     "0x%016llx%016llx%016llx%016llx%016llx\n",
4779                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
4780                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
4781                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
4782                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
4783                     (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);
4784 
4785                 /*
4786                  * if we cannot find smrp, we need to create and
4787                  * insert one into daplka_shared_mr_tree
4788                  */
4789                 smrp = kmem_zalloc(sizeof (daplka_shared_mr_t),
4790                     daplka_km_flags);
4791                 if (smrp == NULL) {
4792                         retval = ENOMEM;
4793                         mutex_exit(&daplka_shared_mr_lock);
4794                         goto cleanup;
4795                 }
4796                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp))
4797                 smrp->smr_refcnt = 1;
4798                 smrp->smr_cookie = args.mrs_shm_cookie;
4799                 smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
4800                 smrp->smr_mr_list = NULL;
4801                 cv_init(&smrp->smr_cv, NULL, CV_DRIVER, NULL);
4802                 avl_insert(&daplka_shared_mr_tree, smrp, where);
4803                 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*smrp))
4804         }
4805         mutex_exit(&daplka_shared_mr_lock);
4806 
4807         mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
4808         if (mr_rp == NULL) {
4809                 DERR("mr_register_shared: cannot allocate mr resource\n");
4810                 goto cleanup;
4811         }
4812         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
4813         DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
4814             DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
4815 
4816         mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
4817         mr_rp->mr_hca = ia_rp->ia_hca;
4818         mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
4819         mr_rp->mr_next = NULL;
4820         mr_rp->mr_shared_mr = NULL;
4821 
4822         /* get pd handle */
4823         pd_rp = (daplka_pd_resource_t *)
4824             daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mrs_pd_hkey);
4825         if (pd_rp == NULL) {
4826                 DERR("mr_register_shared: cannot find pd resource\n");
4827                 retval = EINVAL;
4828                 goto cleanup;
4829         }
4830         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
4831         mr_rp->mr_pd_res = pd_rp;
4832 
4833         mr_rp->mr_attr.mr_vaddr = args.mrs_vaddr;
4834         mr_rp->mr_attr.mr_len = args.mrs_len;
4835         mr_rp->mr_attr.mr_flags = args.mrs_flags | IBT_MR_NOSLEEP;
4836         mr_rp->mr_attr.mr_as = curproc->p_as;
4837 
4838         D2("mr_register_shared: mr_vaddr 0x%p, mr_len %llu, "
4839             "mr_flags 0x%x, mr_as 0x%p, mr_exists %d, smrp 0x%p\n",
4840             (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
4841             (longlong_t)mr_rp->mr_attr.mr_len,
4842             mr_rp->mr_attr.mr_flags, mr_rp->mr_attr.mr_as,
4843             (int)(smrp->smr_mr_list != NULL), smrp);
4844 
4845         /*
4846          * since we are in TRANSITIONING state, we are guaranteed
4847          * that we have exclusive access to smr_mr_list.
4848          */
4849         if (smrp->smr_mr_list != NULL) {
4850                 ibt_smr_attr_t  mem_sattr;
4851 
4852                 /*
4853                  * a non-null smr_mr_list indicates that someone
4854                  * else has already inserted an mr_resource into
4855                  * smr_mr_list. we use the mr_handle from the first
4856                  * element as an arg to ibt_register_shared_mr.
4857                  */
4858                 mem_sattr.mr_vaddr = smrp->smr_mr_list->mr_desc.md_vaddr;
4859                 mem_sattr.mr_flags = mr_rp->mr_attr.mr_flags;
4860 
4861                 D2("mr_register_shared: mem_sattr vaddr 0x%p flags 0x%x\n",
4862                     (void *)(uintptr_t)mem_sattr.mr_vaddr, mem_sattr.mr_flags);
4863                 status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
4864                     smrp->smr_mr_list->mr_hdl, mr_rp->mr_pd_res->pd_hdl,
4865                     &mem_sattr, &mr_rp->mr_hdl, &mr_rp->mr_desc);
4866 
4867                 if (status != IBT_SUCCESS) {
4868                         DERR("mr_register_shared: "
4869                             "ibt_register_shared_mr error %d\n", status);
4870                         *rvalp = (int)status;
4871                         retval = 0;
4872                         goto cleanup;
4873                 }
4874         } else {
4875                 /*
4876                  * an mr does not exist yet. we need to create one
4877                  * using ibt_register_mr.
4878                  */
4879                 status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
4880                     mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr,
4881                     &mr_rp->mr_hdl, &mr_rp->mr_desc);
4882 
4883                 if (status != IBT_SUCCESS) {
4884                         DERR("mr_register_shared: "
4885                             "ibt_register_mr error %d\n", status);
4886                         *rvalp = (int)status;
4887                         retval = 0;
4888                         goto cleanup;
4889                 }
4890         }
4891 
4892         mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
4893         mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
4894         mr_cb_data_in.mr_arg1 = (void *)mr_rp;
4895         mr_cb_data_in.mr_arg2 = NULL;
4896 
4897         /* Pass the service driver mr cleanup handler to the hca driver */
4898         status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
4899             IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
4900             &mr_cb_data_in, sizeof (mr_cb_data_in));
4901 
4902         if (status != IBT_SUCCESS) {
4903                 DERR("mr_register_shared: ibt_ci_data_in error(%d) ver(%d)",
4904                     status, mr_cb_data_in.mr_rev);
4905                 *rvalp = (int)status;
4906                 retval = 0;
4907                 goto cleanup;
4908         }
4909 
4910         /*
4911          * we bump reference of mr_rp and enqueue it onto smrp.
4912          */
4913         DAPLKA_RS_REF(mr_rp);
4914         mr_rp->mr_next = smrp->smr_mr_list;
4915         smrp->smr_mr_list = mr_rp;
4916         mr_rp->mr_shared_mr = smrp;
4917 
4918         /* insert into mr hash table */
4919         retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
4920             &mr_hkey, (void *)mr_rp);
4921         if (retval != 0) {
4922                 DERR("mr_register_shared: cannot insert mr resource\n");
4923                 goto cleanup;
4924         }
4925         inserted = B_TRUE;
4926         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))
4927 
4928         /*
4929          * at this point, there are two references to our mr resource.
4930          * one is kept in ia_mr_htbl. the other is kept in the list
4931          * within this shared mr object (smrp). when we deregister this
4932          * mr or when a callback invalidates this mr, the reference kept
4933          * by this shared mr object will be removed.
4934          */
4935 
4936         args.mrs_lkey = mr_rp->mr_desc.md_lkey;
4937         args.mrs_rkey = mr_rp->mr_desc.md_rkey;
4938         args.mrs_hkey = mr_hkey;
4939 
4940         retval = ddi_copyout((void *)&args, (void *)arg,
4941             sizeof (dapl_mr_register_shared_t), mode);
4942         if (retval != 0) {
4943                 DERR("mr_register_shared: copyout error %d\n", retval);
4944                 retval = EFAULT;
4945                 goto cleanup;
4946         }
4947 
4948         /*
4949          * set the state to READY to allow others to continue
4950          */
4951         mutex_enter(&daplka_shared_mr_lock);
4952         smrp->smr_state = DAPLKA_SMR_READY;
4953         cv_broadcast(&smrp->smr_cv);
4954         mutex_exit(&daplka_shared_mr_lock);
4955         return (0);
4956 
4957 cleanup:;
4958         if (inserted) {
4959                 daplka_mr_resource_t *free_rp = NULL;
4960 
4961                 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
4962                     (void **)&free_rp);
4963                 if (free_rp != mr_rp) {
4964                         DERR("mr_register_shared: "
4965                             "cannot remove mr from hash table\n");
4966                         /*
4967                          * we can only get here if another thread
4968                          * has completed the cleanup in mr_deregister
4969                          */
4970                         return (retval);
4971                 }
4972         }
4973         if (smrp != NULL) {
4974                 mutex_enter(&daplka_shared_mr_lock);
4975                 ASSERT(smrp->smr_refcnt > 0);
4976                 smrp->smr_refcnt--;
4977 
4978                 if (smrp->smr_refcnt == 0) {
4979                         DERR("mr_register_shared: freeing smrp 0x%p\n", smrp);
4980                         avl_remove(&daplka_shared_mr_tree, smrp);
4981                         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp))
4982                         if (smrp->smr_mr_list != NULL) {
4983                                 /*
4984                                  * the refcnt is 0. if there is anything
4985                                  * left on the list, it must be ours.
4986                                  */
4987                                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
4988                                 ASSERT(smrp->smr_mr_list == mr_rp);
4989                                 DAPLKA_RS_UNREF(mr_rp);
4990                                 smrp->smr_mr_list = NULL;
4991                                 ASSERT(mr_rp->mr_shared_mr == smrp);
4992                                 mr_rp->mr_shared_mr = NULL;
4993                                 ASSERT(mr_rp->mr_next == NULL);
4994                         }
4995                         smrp->smr_state = DAPLKA_SMR_FREED;
4996                         cv_destroy(&smrp->smr_cv);
4997                         kmem_free(smrp, sizeof (daplka_shared_mr_t));
4998                 } else {
4999                         DERR("mr_register_shared: resetting smr_state "
5000                             "smrp 0x%p, %d waiters remain\n", smrp,
5001                             smrp->smr_refcnt);
5002                         ASSERT(smrp->smr_state == DAPLKA_SMR_TRANSITIONING);
5003                         if (smrp->smr_mr_list != NULL && mr_rp != NULL) {
5004                                 daplka_mr_resource_t    **mpp;
5005 
5006                                 /*
5007                                  * search and remove mr_rp from smr_mr_list
5008                                  */
5009                                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
5010                                 mpp = &smrp->smr_mr_list;
5011                                 while (*mpp != NULL) {
5012                                         if (*mpp == mr_rp) {
5013                                                 *mpp = (*mpp)->mr_next;
5014                                                 DAPLKA_RS_UNREF(mr_rp);
5015                                                 ASSERT(mr_rp->mr_shared_mr ==
5016                                                     smrp);
5017                                                 mr_rp->mr_shared_mr = NULL;
5018                                                 mr_rp->mr_next = NULL;
5019                                                 break;
5020                                         }
5021                                         mpp = &(*mpp)->mr_next;
5022                                 }
5023                         }
5024                         /*
5025                          * note that smr_state == READY does not necessarily
5026                          * mean that smr_mr_list is non empty. for this case,
5027                          * we are doing cleanup because of a failure. we set
5028                          * the state to READY to allow other threads to
5029                          * continue.
5030                          */
5031                         smrp->smr_state = DAPLKA_SMR_READY;
5032                         cv_broadcast(&smrp->smr_cv);
5033                 }
5034                 mutex_exit(&daplka_shared_mr_lock);
5035         }
5036         if (mr_rp != NULL) {
5037                 DAPLKA_RS_UNREF(mr_rp);
5038         }
5039         return (retval);
5040 }
5041 
5042 /*
5043  * registers a memory region using the attributes of an
5044  * existing region.
5045  */
5046 /* ARGSUSED */
5047 static int
5048 daplka_mr_register_lmr(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5049         cred_t *cred, int *rvalp)
5050 {
5051         boolean_t                       inserted = B_FALSE;
5052         dapl_mr_register_lmr_t          args;
5053         ibt_mr_data_in_t                mr_cb_data_in;
5054         daplka_mr_resource_t            *orig_mr_rp = NULL;
5055         daplka_mr_resource_t            *mr_rp;
5056         ibt_smr_attr_t                  mem_sattr;
5057         uint64_t                        mr_hkey = 0;
5058         ibt_status_t                    status;
5059         int                             retval;
5060 
5061         retval = ddi_copyin((void *)arg, &args,
5062             sizeof (dapl_mr_register_lmr_t), mode);
5063         if (retval != 0) {
5064                 DERR("mr_register_lmr: copyin error %d\n", retval);
5065                 return (EINVAL);
5066         }
5067         orig_mr_rp = (daplka_mr_resource_t *)
5068             daplka_hash_lookup(&ia_rp->ia_mr_htbl, args.mrl_orig_hkey);
5069         if (orig_mr_rp == NULL) {
5070                 DERR("mr_register_lmr: cannot find mr resource\n");
5071                 return (EINVAL);
5072         }
5073         ASSERT(DAPLKA_RS_TYPE(orig_mr_rp) == DAPL_TYPE_MR);
5074 
5075         mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
5076         if (mr_rp == NULL) {
5077                 DERR("mr_register_lmr: cannot allocate mr resource\n");
5078                 retval = ENOMEM;
5079                 goto cleanup;
5080         }
5081         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
5082         DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
5083             DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
5084 
5085         mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
5086         mr_rp->mr_hca = ia_rp->ia_hca;
5087         mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
5088         mr_rp->mr_next = NULL;
5089         mr_rp->mr_shared_mr = NULL;
5090 
5091         DAPLKA_RS_REF(orig_mr_rp->mr_pd_res);
5092         mr_rp->mr_pd_res = orig_mr_rp->mr_pd_res;
5093         mr_rp->mr_attr = orig_mr_rp->mr_attr;
5094 
5095         /* Pass the IO addr that was returned while allocating the orig MR */
5096         mem_sattr.mr_vaddr = orig_mr_rp->mr_desc.md_vaddr;
5097         mem_sattr.mr_flags = args.mrl_flags | IBT_MR_NOSLEEP;
5098 
5099         status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
5100             orig_mr_rp->mr_hdl, mr_rp->mr_pd_res->pd_hdl, &mem_sattr,
5101             &mr_rp->mr_hdl, &mr_rp->mr_desc);
5102 
5103         if (status != IBT_SUCCESS) {
5104                 DERR("mr_register_lmr: ibt_register_shared_mr error %d\n",
5105                     status);
5106                 *rvalp = (int)status;
5107                 retval = 0;
5108                 goto cleanup;
5109         }
5110 
5111         mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
5112         mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
5113         mr_cb_data_in.mr_arg1 = (void *)mr_rp;
5114         mr_cb_data_in.mr_arg2 = NULL;
5115 
5116         /* Pass the service driver mr cleanup handler to the hca driver */
5117         status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
5118             IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
5119             &mr_cb_data_in, sizeof (mr_cb_data_in));
5120 
5121         if (status != IBT_SUCCESS) {
5122                 DERR("mr_register_lmr: ibt_ci_data_in error(%d) ver(%d)",
5123                     status, mr_cb_data_in.mr_rev);
5124                 *rvalp = (int)status;
5125                 retval = 0;
5126                 goto cleanup;
5127         }
5128         mr_rp->mr_attr.mr_len = orig_mr_rp->mr_attr.mr_len;
5129         mr_rp->mr_attr.mr_flags = mem_sattr.mr_flags;
5130 
5131         /* insert into mr hash table */
5132         retval = daplka_hash_insert(&ia_rp->ia_mr_htbl, &mr_hkey,
5133             (void *)mr_rp);
5134         if (retval != 0) {
5135                 DERR("mr_register: cannot insert mr resource into mr_htbl\n");
5136                 goto cleanup;
5137         }
5138         inserted = B_TRUE;
5139         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))
5140 
5141         args.mrl_lkey = mr_rp->mr_desc.md_lkey;
5142         args.mrl_rkey = mr_rp->mr_desc.md_rkey;
5143         args.mrl_hkey = mr_hkey;
5144 
5145         retval = ddi_copyout((void *)&args, (void *)arg,
5146             sizeof (dapl_mr_register_lmr_t), mode);
5147         if (retval != 0) {
5148                 DERR("mr_register_lmr: copyout error %d\n", retval);
5149                 retval = EFAULT;
5150                 goto cleanup;
5151         }
5152         if (orig_mr_rp != NULL) {
5153                 DAPLKA_RS_UNREF(orig_mr_rp);
5154         }
5155         return (0);
5156 
5157 cleanup:;
5158         if (inserted) {
5159                 daplka_mr_resource_t *free_rp = NULL;
5160 
5161                 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
5162                     (void **)&free_rp);
5163                 if (free_rp != mr_rp) {
5164                         DERR("mr_register: cannot remove mr from hash table\n");
5165                         /*
5166                          * we can only get here if another thread
5167                          * has completed the cleanup in mr_deregister
5168                          */
5169                         return (retval);
5170                 }
5171         }
5172         if (orig_mr_rp != NULL) {
5173                 DAPLKA_RS_UNREF(orig_mr_rp);
5174         }
5175         if (mr_rp != NULL) {
5176                 DAPLKA_RS_UNREF(mr_rp);
5177         }
5178         return (retval);
5179 }
5180 
5181 /*
5182  * this function is called by mr_deregister and mr_cleanup_callback to
5183  * remove a mr resource from the shared mr object mr_rp->mr_shared_mr.
5184  * if mr_shared_mr is already NULL, that means the region being
5185  * deregistered or invalidated is not a shared mr region and we can
5186  * return immediately.
5187  */
5188 static void
5189 daplka_shared_mr_free(daplka_mr_resource_t *mr_rp)
5190 {
5191         daplka_shared_mr_t      *smrp;
5192 
5193         /*
5194          * we need a lock because mr_callback also checks this field.
5195          * for the rare case that mr_deregister and mr_cleanup_callback
5196          * gets called simultaneously, we are guaranteed that smrp won't
5197          * be dereferenced twice because either function will find
5198          * mr_shared_mr to be NULL.
5199          */
5200         mutex_enter(&mr_rp->mr_lock);
5201         smrp = mr_rp->mr_shared_mr;
5202         mr_rp->mr_shared_mr = NULL;
5203         mutex_exit(&mr_rp->mr_lock);
5204 
5205         if (smrp != NULL) {
5206                 daplka_mr_resource_t    **mpp;
5207                 boolean_t               mr_found = B_FALSE;
5208 
5209                 mutex_enter(&daplka_shared_mr_lock);
5210                 ASSERT(smrp->smr_refcnt > 0);
5211                 while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
5212                         cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
5213                 }
5214                 ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
5215                 smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
5216                 smrp->smr_refcnt--;
5217 
5218                 /*
5219                  * search and remove mr_rp from smr_mr_list.
5220                  * also UNREF mr_rp because it is no longer
5221                  * on the list.
5222                  */
5223                 mpp = &smrp->smr_mr_list;
5224                 while (*mpp != NULL) {
5225                         if (*mpp == mr_rp) {
5226                                 *mpp = (*mpp)->mr_next;
5227                                 DAPLKA_RS_UNREF(mr_rp);
5228                                 mr_rp->mr_next = NULL;
5229                                 mr_found = B_TRUE;
5230                                 break;
5231                         }
5232                         mpp = &(*mpp)->mr_next;
5233                 }
5234                 /*
5235                  * since mr_clean_callback may not touch smr_mr_list
5236                  * at this time (due to smr_state), we can be sure
5237                  * that we can find and remove mr_rp from smr_mr_list
5238                  */
5239                 ASSERT(mr_found);
5240                 if (smrp->smr_refcnt == 0) {
5241                         D3("shared_mr_free: freeing smrp 0x%p\n", smrp);
5242                         avl_remove(&daplka_shared_mr_tree, smrp);
5243                         ASSERT(smrp->smr_mr_list == NULL);
5244                         smrp->smr_state = DAPLKA_SMR_FREED;
5245                         cv_destroy(&smrp->smr_cv);
5246                         kmem_free(smrp, sizeof (daplka_shared_mr_t));
5247                 } else {
5248                         D3("shared_mr_free: smrp 0x%p, refcnt %d\n",
5249                             smrp, smrp->smr_refcnt);
5250                         smrp->smr_state = DAPLKA_SMR_READY;
5251                         cv_broadcast(&smrp->smr_cv);
5252                 }
5253                 mutex_exit(&daplka_shared_mr_lock);
5254         }
5255 }
5256 
5257 /*
5258  * deregisters a memory region.
5259  * if mr is shared, remove reference from global shared mr object.
5260  * release the initial reference to the mr. if the mr's refcnt is
5261  * zero, call mr_destroy to free mr.
5262  */
5263 /* ARGSUSED */
5264 static int
5265 daplka_mr_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5266         cred_t *cred, int *rvalp)
5267 {
5268         daplka_mr_resource_t    *mr_rp;
5269         dapl_mr_deregister_t    args;
5270         int                     retval;
5271 
5272         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_deregister_t),
5273             mode);
5274         if (retval != 0) {
5275                 DERR("mr_deregister: copyin error %d\n", retval);
5276                 return (EINVAL);
5277         }
5278         retval = daplka_hash_remove(&ia_rp->ia_mr_htbl,
5279             args.mrd_hkey, (void **)&mr_rp);
5280         if (retval != 0 || mr_rp == NULL) {
5281                 DERR("mr_deregister: cannot find mr resource\n");
5282                 return (EINVAL);
5283         }
5284         ASSERT(DAPLKA_RS_TYPE(mr_rp) == DAPL_TYPE_MR);
5285 
5286         daplka_shared_mr_free(mr_rp);
5287         DAPLKA_RS_UNREF(mr_rp);
5288         return (0);
5289 }
5290 
5291 /*
5292  * sync local memory regions on RDMA read or write.
5293  */
5294 /* ARGSUSED */
5295 static int
5296 daplka_mr_sync(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5297         cred_t *cred, int *rvalp)
5298 {
5299         dapl_mr_sync_t  args;
5300         daplka_mr_resource_t *mr_rp[DAPL_MR_PER_SYNC];
5301         ibt_mr_sync_t   mrs[DAPL_MR_PER_SYNC];
5302         uint32_t        sync_direction_flags;
5303         ibt_status_t    status;
5304         int             i, j;
5305         int             retval;
5306 
5307         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_sync_t), mode);
5308         if (retval != 0) {
5309                 DERR("mr_sync: copyin error %d\n", retval);
5310                 return (EFAULT);
5311         }
5312 
5313         /* number of segments bound check */
5314         if (args.mrs_numseg > DAPL_MR_PER_SYNC) {
5315                 DERR("mr_sync: number of segments too large\n");
5316                 return (EINVAL);
5317         }
5318 
5319         /* translate MR sync direction flag */
5320         if (args.mrs_flags == DAPL_MR_SYNC_RDMA_RD) {
5321                 sync_direction_flags = IBT_SYNC_READ;
5322         } else if (args.mrs_flags == DAPL_MR_SYNC_RDMA_WR) {
5323                 sync_direction_flags = IBT_SYNC_WRITE;
5324         } else {
5325                 DERR("mr_sync: unknown flags\n");
5326                 return (EINVAL);
5327         }
5328 
5329         /*
5330          * all the segments are going to be sync'd by ibtl together
5331          */
5332         for (i = 0; i < args.mrs_numseg; i++) {
5333                 mr_rp[i] = (daplka_mr_resource_t *)daplka_hash_lookup(
5334                     &ia_rp->ia_mr_htbl, args.mrs_vec[i].mrsv_hkey);
5335                 if (mr_rp[i] == NULL) {
5336                         for (j = 0; j < i; j++) {
5337                                 DAPLKA_RS_UNREF(mr_rp[j]);
5338                         }
5339                         DERR("mr_sync: lookup error\n");
5340                         return (EINVAL);
5341                 }
5342                 ASSERT(DAPLKA_RS_TYPE(mr_rp[i]) == DAPL_TYPE_MR);
5343                 mrs[i].ms_handle = mr_rp[i]->mr_hdl;
5344                 mrs[i].ms_vaddr = args.mrs_vec[i].mrsv_va;
5345                 mrs[i].ms_len = args.mrs_vec[i].mrsv_len;
5346                 mrs[i].ms_flags = sync_direction_flags;
5347         }
5348 
5349         status = ibt_sync_mr(ia_rp->ia_hca_hdl, mrs, args.mrs_numseg);
5350         if (status != IBT_SUCCESS) {
5351                 DERR("mr_sync: ibt_sync_mr error %d\n", status);
5352                 *rvalp = (int)status;
5353         }
5354         for (i = 0; i < args.mrs_numseg; i++) {
5355                 DAPLKA_RS_UNREF(mr_rp[i]);
5356         }
5357         return (0);
5358 }
5359 
5360 /*
5361  * destroys a memory region.
5362  * called when refcnt drops to zero.
5363  */
5364 static int
5365 daplka_mr_destroy(daplka_resource_t *gen_rp)
5366 {
5367         daplka_mr_resource_t    *mr_rp = (daplka_mr_resource_t *)gen_rp;
5368         ibt_status_t            status;
5369 
5370         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
5371         ASSERT(DAPLKA_RS_REFCNT(mr_rp) == 0);
5372         ASSERT(mr_rp->mr_shared_mr == NULL);
5373         D3("mr_destroy: entering, mr_rp 0x%p, rnum %d\n",
5374             mr_rp, DAPLKA_RS_RNUM(mr_rp));
5375 
5376         /*
5377          * deregister mr
5378          */
5379         if (mr_rp->mr_hdl) {
5380                 status = daplka_ibt_deregister_mr(mr_rp, mr_rp->mr_hca_hdl,
5381                     mr_rp->mr_hdl);
5382                 if (status != IBT_SUCCESS) {
5383                         DERR("mr_destroy: ibt_deregister_mr returned %d\n",
5384                             status);
5385                 }
5386                 mr_rp->mr_hdl = NULL;
5387                 D3("mr_destroy: mr deregistered\n");
5388         }
5389         mr_rp->mr_attr.mr_vaddr = NULL;
5390 
5391         /*
5392          * release reference on PD
5393          */
5394         if (mr_rp->mr_pd_res != NULL) {
5395                 DAPLKA_RS_UNREF(mr_rp->mr_pd_res);
5396                 mr_rp->mr_pd_res = NULL;
5397         }
5398         mutex_destroy(&mr_rp->mr_lock);
5399         DAPLKA_RS_FINI(mr_rp);
5400         kmem_free(mr_rp, sizeof (daplka_mr_resource_t));
5401         D3("mr_destroy: exiting, mr_rp 0x%p\n", mr_rp);
5402         return (0);
5403 }
5404 
5405 /*
5406  * this function is called by daplka_hash_destroy for
5407  * freeing MR resource objects
5408  */
5409 static void
5410 daplka_hash_mr_free(void *obj)
5411 {
5412         daplka_mr_resource_t    *mr_rp = (daplka_mr_resource_t *)obj;
5413 
5414         daplka_shared_mr_free(mr_rp);
5415         DAPLKA_RS_UNREF(mr_rp);
5416 }
5417 
5418 /*
5419  * comparison function used for finding a shared mr object
5420  * from the global shared mr avl tree.
5421  */
5422 static int
5423 daplka_shared_mr_cmp(const void *smr1, const void *smr2)
5424 {
5425         daplka_shared_mr_t      *s1 = (daplka_shared_mr_t *)smr1;
5426         daplka_shared_mr_t      *s2 = (daplka_shared_mr_t *)smr2;
5427         int i;
5428 
5429         for (i = 4; i >= 0; i--) {
5430                 if (s1->smr_cookie.mc_uint_arr[i] <
5431                     s2->smr_cookie.mc_uint_arr[i]) {
5432                         return (-1);
5433                 }
5434                 if (s1->smr_cookie.mc_uint_arr[i] >
5435                     s2->smr_cookie.mc_uint_arr[i]) {
5436                         return (1);
5437                 }
5438         }
5439         return (0);
5440 }
5441 
5442 /*
5443  * allocates a protection domain.
5444  */
5445 /* ARGSUSED */
5446 static int
5447 daplka_pd_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5448         cred_t *cred, int *rvalp)
5449 {
5450         dapl_pd_alloc_t         args;
5451         daplka_pd_resource_t    *pd_rp;
5452         ibt_status_t            status;
5453         uint64_t                pd_hkey = 0;
5454         boolean_t               inserted = B_FALSE;
5455         int                     retval;
5456 
5457         pd_rp = kmem_zalloc(sizeof (*pd_rp), daplka_km_flags);
5458         if (pd_rp == NULL) {
5459                 DERR("pd_alloc: cannot allocate pd resource\n");
5460                 return (ENOMEM);
5461         }
5462         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp))
5463         DAPLKA_RS_INIT(pd_rp, DAPL_TYPE_PD,
5464             DAPLKA_RS_RNUM(ia_rp), daplka_pd_destroy);
5465 
5466         pd_rp->pd_hca = ia_rp->ia_hca;
5467         pd_rp->pd_hca_hdl = ia_rp->ia_hca_hdl;
5468         status = daplka_ibt_alloc_pd(pd_rp, pd_rp->pd_hca_hdl,
5469             IBT_PD_NO_FLAGS, &pd_rp->pd_hdl);
5470         if (status != IBT_SUCCESS) {
5471                 DERR("pd_alloc: ibt_alloc_pd returned %d\n", status);
5472                 *rvalp = (int)status;
5473                 retval = 0;
5474                 goto cleanup;
5475         }
5476 
5477         /* insert into pd hash table */
5478         retval = daplka_hash_insert(&ia_rp->ia_pd_htbl,
5479             &pd_hkey, (void *)pd_rp);
5480         if (retval != 0) {
5481                 DERR("pd_alloc: cannot insert pd resource into pd_htbl\n");
5482                 goto cleanup;
5483         }
5484         inserted = B_TRUE;
5485         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pd_rp))
5486 
5487         /* return hkey to library */
5488         args.pda_hkey = pd_hkey;
5489 
5490         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_pd_alloc_t),
5491             mode);
5492         if (retval != 0) {
5493                 DERR("pd_alloc: copyout error %d\n", retval);
5494                 retval = EFAULT;
5495                 goto cleanup;
5496         }
5497         return (0);
5498 
5499 cleanup:;
5500         if (inserted) {
5501                 daplka_pd_resource_t *free_rp = NULL;
5502 
5503                 (void) daplka_hash_remove(&ia_rp->ia_pd_htbl, pd_hkey,
5504                     (void **)&free_rp);
5505                 if (free_rp != pd_rp) {
5506                         DERR("pd_alloc: cannot remove pd from hash table\n");
5507                         /*
5508                          * we can only get here if another thread
5509                          * has completed the cleanup in pd_free
5510                          */
5511                         return (retval);
5512                 }
5513         }
5514         DAPLKA_RS_UNREF(pd_rp);
5515         return (retval);
5516 }
5517 
5518 /*
5519  * destroys a protection domain.
5520  * called when refcnt drops to zero.
5521  */
5522 static int
5523 daplka_pd_destroy(daplka_resource_t *gen_rp)
5524 {
5525         daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)gen_rp;
5526         ibt_status_t status;
5527 
5528         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp))
5529         ASSERT(DAPLKA_RS_REFCNT(pd_rp) == 0);
5530         D3("pd_destroy: entering, pd_rp %p, rnum %d\n",
5531             pd_rp, DAPLKA_RS_RNUM(pd_rp));
5532 
5533         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5534         if (pd_rp->pd_hdl != NULL) {
5535                 status = daplka_ibt_free_pd(pd_rp, pd_rp->pd_hca_hdl,
5536                     pd_rp->pd_hdl);
5537                 if (status != IBT_SUCCESS) {
5538                         DERR("pd_destroy: ibt_free_pd returned %d\n", status);
5539                 }
5540         }
5541         DAPLKA_RS_FINI(pd_rp);
5542         kmem_free(pd_rp, sizeof (daplka_pd_resource_t));
5543         D3("pd_destroy: exiting, pd_rp %p\n", pd_rp);
5544         return (0);
5545 }
5546 
5547 static void
5548 daplka_hash_pd_free(void *obj)
5549 {
5550         daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)obj;
5551 
5552         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5553         DAPLKA_RS_UNREF(pd_rp);
5554 }
5555 
5556 /*
5557  * removes the pd reference from ia_pd_htbl and releases the
5558  * initial reference to the pd. also destroys the pd if the refcnt
5559  * is zero.
5560  */
5561 /* ARGSUSED */
5562 static int
5563 daplka_pd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5564         cred_t *cred, int *rvalp)
5565 {
5566         daplka_pd_resource_t *pd_rp;
5567         dapl_pd_free_t args;
5568         int retval;
5569 
5570         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_pd_free_t), mode);
5571         if (retval != 0) {
5572                 DERR("pd_free: copyin error %d\n", retval);
5573                 return (EINVAL);
5574         }
5575 
5576         retval = daplka_hash_remove(&ia_rp->ia_pd_htbl,
5577             args.pdf_hkey, (void **)&pd_rp);
5578         if (retval != 0 || pd_rp == NULL) {
5579                 DERR("pd_free: cannot find pd resource\n");
5580                 return (EINVAL);
5581         }
5582         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5583 
5584         /* UNREF calls the actual free function when refcnt is zero */
5585         DAPLKA_RS_UNREF(pd_rp);
5586         return (0);
5587 }
5588 
5589 /*
5590  * allocates a memory window
5591  */
5592 /* ARGSUSED */
5593 static int
5594 daplka_mw_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5595         cred_t *cred, int *rvalp)
5596 {
5597         daplka_pd_resource_t    *pd_rp;
5598         daplka_mw_resource_t    *mw_rp;
5599         dapl_mw_alloc_t         args;
5600         ibt_status_t            status;
5601         boolean_t               inserted = B_FALSE;
5602         uint64_t                mw_hkey;
5603         ibt_rkey_t              mw_rkey;
5604         int                     retval;
5605 
5606         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_alloc_t), mode);
5607         if (retval != 0) {
5608                 DERR("mw_alloc: copyin error %d\n", retval);
5609                 return (EFAULT);
5610         }
5611 
5612         /*
5613          * Allocate and initialize a MW resource
5614          */
5615         mw_rp = kmem_zalloc(sizeof (daplka_mw_resource_t), daplka_km_flags);
5616         if (mw_rp == NULL) {
5617                 DERR("mw_alloc: cannot allocate mw resource\n");
5618                 return (ENOMEM);
5619         }
5620         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp))
5621         DAPLKA_RS_INIT(mw_rp, DAPL_TYPE_MW,
5622             DAPLKA_RS_RNUM(ia_rp), daplka_mw_destroy);
5623 
5624         mutex_init(&mw_rp->mw_lock, NULL, MUTEX_DRIVER, NULL);
5625         mw_rp->mw_hca = ia_rp->ia_hca;
5626         mw_rp->mw_hca_hdl = ia_rp->ia_hca_hdl;
5627 
5628         /* get pd handle */
5629         pd_rp = (daplka_pd_resource_t *)
5630             daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mw_pd_hkey);
5631         if (pd_rp == NULL) {
5632                 DERR("mw_alloc: cannot find pd resource\n");
5633                 goto cleanup;
5634         }
5635         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5636 
5637         mw_rp->mw_pd_res = pd_rp;
5638 
5639         status = daplka_ibt_alloc_mw(mw_rp, mw_rp->mw_hca_hdl,
5640             pd_rp->pd_hdl, IBT_MW_NOSLEEP, &mw_rp->mw_hdl, &mw_rkey);
5641 
5642         if (status != IBT_SUCCESS) {
5643                 DERR("mw_alloc: ibt_alloc_mw returned %d\n", status);
5644                 *rvalp = (int)status;
5645                 retval = 0;
5646                 goto cleanup;
5647         }
5648 
5649         mutex_enter(&ia_rp->ia_lock);
5650         switch (ia_rp->ia_state) {
5651         case DAPLKA_IA_INIT:
5652                 ia_rp->ia_state = DAPLKA_IA_MW_ALLOC_IN_PROGRESS;
5653                 ia_rp->ia_mw_alloccnt++;
5654                 retval = 0;
5655                 break;
5656         case DAPLKA_IA_MW_ALLOC_IN_PROGRESS:
5657                 /* another mw_alloc is already in progress increase cnt */
5658                 ia_rp->ia_mw_alloccnt++;
5659                 retval = 0;
5660                 break;
5661         case DAPLKA_IA_MW_FREEZE_IN_PROGRESS:
5662                 /* FALLTHRU */
5663         case DAPLKA_IA_MW_FROZEN:
5664                 /*
5665                  * IA is being or already frozen don't allow more MWs to be
5666                  * allocated.
5667                  */
5668                 DERR("mw_alloc: IA is freezing MWs (state=%d)\n",
5669                     ia_rp->ia_state);
5670                 retval = EINVAL;
5671                 break;
5672         default:
5673                 ASSERT(!"Invalid IA state in mw_alloc");
5674                 DERR("mw_alloc: IA state=%d invalid\n", ia_rp->ia_state);
5675                 retval = EINVAL;
5676                 break;
5677         }
5678         mutex_exit(&ia_rp->ia_lock);
5679         /* retval is 0 when ia_mw_alloccnt is incremented */
5680         if (retval != 0) {
5681                 goto cleanup;
5682         }
5683 
5684         /* insert into mw hash table */
5685         mw_hkey = 0;
5686         retval = daplka_hash_insert(&ia_rp->ia_mw_htbl, &mw_hkey,
5687             (void *)mw_rp);
5688         if (retval != 0) {
5689                 DERR("mw_alloc: cannot insert mw resource into mw_htbl\n");
5690                 mutex_enter(&ia_rp->ia_lock);
5691                 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
5692                 ia_rp->ia_mw_alloccnt--;
5693                 if (ia_rp->ia_mw_alloccnt == 0) {
5694                         ia_rp->ia_state = DAPLKA_IA_INIT;
5695                         cv_broadcast(&ia_rp->ia_cv);
5696                 }
5697                 mutex_exit(&ia_rp->ia_lock);
5698                 goto cleanup;
5699         }
5700         inserted = B_TRUE;
5701         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mw_rp))
5702 
5703         D3("mw_alloc: ibt_alloc_mw mw_hdl(%p) mw_rkey(0x%llx)\n",
5704             mw_rp->mw_hdl, (longlong_t)mw_rkey);
5705 
5706         mutex_enter(&ia_rp->ia_lock);
5707         /*
5708          * We are done with mw_alloc if this was the last mw_alloc
5709          * change state back to DAPLKA_IA_INIT and wake up waiters
5710          * specifically the unlock callback.
5711          */
5712         ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
5713         ia_rp->ia_mw_alloccnt--;
5714         if (ia_rp->ia_mw_alloccnt == 0) {
5715                 ia_rp->ia_state = DAPLKA_IA_INIT;
5716                 cv_broadcast(&ia_rp->ia_cv);
5717         }
5718         mutex_exit(&ia_rp->ia_lock);
5719 
5720         args.mw_hkey = mw_hkey;
5721         args.mw_rkey = mw_rkey;
5722 
5723         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_mw_alloc_t),
5724             mode);
5725         if (retval != 0) {
5726                 DERR("mw_alloc: copyout error %d\n", retval);
5727                 retval = EFAULT;
5728                 goto cleanup;
5729         }
5730         return (0);
5731 
5732 cleanup:;
5733         if (inserted) {
5734                 daplka_mw_resource_t *free_rp = NULL;
5735 
5736                 (void) daplka_hash_remove(&ia_rp->ia_mw_htbl, mw_hkey,
5737                     (void **)&free_rp);
5738                 if (free_rp != mw_rp) {
5739                         DERR("mw_alloc: cannot remove mw from hash table\n");
5740                         /*
5741                          * we can only get here if another thread
5742                          * has completed the cleanup in mw_free
5743                          */
5744                         return (retval);
5745                 }
5746         }
5747         DAPLKA_RS_UNREF(mw_rp);
5748         return (retval);
5749 }
5750 
5751 /*
5752  * removes the mw reference from ia_mw_htbl and releases the
5753  * initial reference to the mw. also destroys the mw if the refcnt
5754  * is zero.
5755  */
5756 /* ARGSUSED */
5757 static int
5758 daplka_mw_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5759         cred_t *cred, int *rvalp)
5760 {
5761         daplka_mw_resource_t    *mw_rp = NULL;
5762         dapl_mw_free_t          args;
5763         int                     retval = 0;
5764 
5765         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_free_t), mode);
5766         if (retval != 0) {
5767                 DERR("mw_free: copyin error %d\n", retval);
5768                 return (EFAULT);
5769         }
5770 
5771         retval = daplka_hash_remove(&ia_rp->ia_mw_htbl, args.mw_hkey,
5772             (void **)&mw_rp);
5773         if (retval != 0 || mw_rp == NULL) {
5774                 DERR("mw_free: cannot find mw resrc (0x%llx)\n",
5775                     (longlong_t)args.mw_hkey);
5776                 return (EINVAL);
5777         }
5778 
5779         ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);
5780 
5781         /* UNREF calls the actual free function when refcnt is zero */
5782         DAPLKA_RS_UNREF(mw_rp);
5783         return (retval);
5784 }
5785 
5786 /*
5787  * destroys the memory window.
5788  * called when refcnt drops to zero.
5789  */
5790 static int
5791 daplka_mw_destroy(daplka_resource_t *gen_rp)
5792 {
5793         daplka_mw_resource_t    *mw_rp = (daplka_mw_resource_t *)gen_rp;
5794         ibt_status_t            status;
5795 
5796         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp))
5797         ASSERT(DAPLKA_RS_REFCNT(mw_rp) == 0);
5798         D3("mw_destroy: entering, mw_rp 0x%p, rnum %d\n",
5799             mw_rp, DAPLKA_RS_RNUM(mw_rp));
5800 
5801         /*
5802          * free memory window
5803          */
5804         if (mw_rp->mw_hdl) {
5805                 status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl,
5806                     mw_rp->mw_hdl);
5807                 if (status != IBT_SUCCESS) {
5808                         DERR("mw_destroy: ibt_free_mw returned %d\n", status);
5809                 }
5810                 mw_rp->mw_hdl = NULL;
5811                 D3("mw_destroy: mw freed\n");
5812         }
5813 
5814         /*
5815          * release reference on PD
5816          */
5817         if (mw_rp->mw_pd_res != NULL) {
5818                 DAPLKA_RS_UNREF(mw_rp->mw_pd_res);
5819                 mw_rp->mw_pd_res = NULL;
5820         }
5821         mutex_destroy(&mw_rp->mw_lock);
5822         DAPLKA_RS_FINI(mw_rp);
5823         kmem_free(mw_rp, sizeof (daplka_mw_resource_t));
5824         D3("mw_destroy: exiting, mw_rp 0x%p\n", mw_rp);
5825         return (0);
5826 }
5827 
5828 static void
5829 daplka_hash_mw_free(void *obj)
5830 {
5831         daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)obj;
5832 
5833         ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);
5834         DAPLKA_RS_UNREF(mw_rp);
5835 }
5836 
5837 /*
5838  * SRQ ioctls and supporting functions
5839  */
5840 /* ARGSUSED */
5841 static int
5842 daplka_srq_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5843     cred_t *cred, int *rvalp)
5844 {
5845         daplka_srq_resource_t           *srq_rp;
5846         daplka_pd_resource_t            *pd_rp;
5847         dapl_srq_create_t               args;
5848         ibt_srq_sizes_t                 srq_sizes;
5849         ibt_srq_sizes_t                 srq_real_sizes;
5850         ibt_hca_attr_t                  *hca_attrp;
5851         uint64_t                        srq_hkey = 0;
5852         boolean_t                       inserted = B_FALSE;
5853         int                             retval;
5854         ibt_status_t                    status;
5855 
5856         D3("srq_create: enter\n");
5857         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_create_t),
5858             mode);
5859         if (retval != 0) {
5860                 DERR("srq_create: copyin error %d\n", retval);
5861                 return (EFAULT);
5862         }
5863         srq_rp = kmem_zalloc(sizeof (daplka_srq_resource_t), daplka_km_flags);
5864         if (srq_rp == NULL) {
5865                 DERR("srq_create: cannot allocate ep_rp\n");
5866                 return (ENOMEM);
5867         }
5868         DAPLKA_RS_INIT(srq_rp, DAPL_TYPE_SRQ,
5869             DAPLKA_RS_RNUM(ia_rp), daplka_srq_destroy);
5870 
5871         srq_rp->srq_hca = ia_rp->ia_hca;
5872         srq_rp->srq_hca_hdl = ia_rp->ia_hca_hdl;
5873         mutex_init(&srq_rp->srq_lock, NULL, MUTEX_DRIVER, NULL);
5874 
5875         /* get pd handle */
5876         pd_rp = (daplka_pd_resource_t *)
5877             daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.srqc_pd_hkey);
5878         if (pd_rp == NULL) {
5879                 DERR("srq_create: cannot find pd resource\n");
5880                 retval = EINVAL;
5881                 goto cleanup;
5882         }
5883         ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5884         srq_rp->srq_pd_res = pd_rp;
5885 
5886         /*
5887          * these checks ensure that the requested SRQ sizes
5888          * are within the limits supported by the chosen HCA.
5889          */
5890         hca_attrp = &ia_rp->ia_hca->hca_attr;
5891         if (args.srqc_sizes.srqs_sz > hca_attrp->hca_max_srqs_sz) {
5892                 DERR("srq_create: invalid srqs_sz %d\n",
5893                     args.srqc_sizes.srqs_sz);
5894                 retval = EINVAL;
5895                 goto cleanup;
5896         }
5897         if (args.srqc_sizes.srqs_sgl > hca_attrp->hca_max_srq_sgl) {
5898                 DERR("srq_create: invalid srqs_sgl %d\n",
5899                     args.srqc_sizes.srqs_sgl);
5900                 retval = EINVAL;
5901                 goto cleanup;
5902         }
5903 
5904         D3("srq_create: srq_sgl %d, srq_sz %d\n",
5905             args.srqc_sizes.srqs_sgl, args.srqc_sizes.srqs_sz);
5906 
5907         srq_sizes.srq_wr_sz = args.srqc_sizes.srqs_sz;
5908         srq_sizes.srq_sgl_sz = args.srqc_sizes.srqs_sgl;
5909 
5910         /* create srq */
5911         status = daplka_ibt_alloc_srq(srq_rp, ia_rp->ia_hca_hdl,
5912             IBT_SRQ_USER_MAP, pd_rp->pd_hdl, &srq_sizes, &srq_rp->srq_hdl,
5913             &srq_real_sizes);
5914         if (status != IBT_SUCCESS) {
5915                 DERR("srq_create: alloc_srq returned %d\n", status);
5916                 *rvalp = (int)status;
5917                 retval = 0;
5918                 goto cleanup;
5919         }
5920 
5921         args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
5922         args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;
5923 
5924         /* Get HCA-specific data_out info */
5925         status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
5926             IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
5927             &args.srqc_data_out, sizeof (args.srqc_data_out));
5928 
5929         if (status != IBT_SUCCESS) {
5930                 DERR("srq_create: ibt_ci_data_out error(%d)\n", status);
5931                 *rvalp = (int)status;
5932                 retval = 0;
5933                 goto cleanup;
5934         }
5935 
5936         srq_rp->srq_real_size = srq_real_sizes.srq_wr_sz;
5937 
5938         /* preparing to copyout map_data back to the library */
5939         args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
5940         args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;
5941 
5942         /* insert into srq hash table */
5943         retval = daplka_hash_insert(&ia_rp->ia_srq_htbl,
5944             &srq_hkey, (void *)srq_rp);
5945         if (retval != 0) {
5946                 DERR("srq_create: cannot insert srq resource into srq_htbl\n");
5947                 goto cleanup;
5948         }
5949         inserted = B_TRUE;
5950 
5951         /* return hkey to library */
5952         args.srqc_hkey = srq_hkey;
5953 
5954         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_create_t),
5955             mode);
5956         if (retval != 0) {
5957                 DERR("srq_create: copyout error %d\n", retval);
5958                 retval = EFAULT;
5959                 goto cleanup;
5960         }
5961 
5962         D3("srq_create: %p, 0x%llx\n", srq_rp->srq_hdl, (longlong_t)srq_hkey);
5963         D3("    sz(%d) sgl(%d)\n",
5964             args.srqc_real_sizes.srqs_sz, args.srqc_real_sizes.srqs_sgl);
5965         D3("srq_create: exit\n");
5966         return (0);
5967 
5968 cleanup:
5969         if (inserted) {
5970                 daplka_srq_resource_t *free_rp = NULL;
5971 
5972                 (void) daplka_hash_remove(&ia_rp->ia_srq_htbl, srq_hkey,
5973                     (void **)&free_rp);
5974                 if (free_rp != srq_rp) {
5975                         /*
5976                          * this case is impossible because ep_free will
5977                          * wait until our state transition is complete.
5978                          */
5979                         DERR("srq_create: cannot remove srq from hash table\n");
5980                         ASSERT(B_FALSE);
5981                         return (retval);
5982                 }
5983         }
5984         DAPLKA_RS_UNREF(srq_rp);
5985         return (retval);
5986 }
5987 
5988 /*
5989  * Resize an existing SRQ
5990  */
5991 /* ARGSUSED */
5992 static int
5993 daplka_srq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5994     cred_t *cred, int *rvalp)
5995 {
5996         daplka_srq_resource_t           *srq_rp = NULL;
5997         ibt_hca_attr_t                  *hca_attrp;
5998         dapl_srq_resize_t               args;
5999         ibt_status_t                    status;
6000         int                             retval = 0;
6001 
6002         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_resize_t),
6003             mode);
6004         if (retval != 0) {
6005                 DERR("srq_resize: copyin error %d\n", retval);
6006                 return (EFAULT);
6007         }
6008 
6009         /* get srq resource */
6010         srq_rp = (daplka_srq_resource_t *)
6011             daplka_hash_lookup(&ia_rp->ia_srq_htbl, args.srqr_hkey);
6012         if (srq_rp == NULL) {
6013                 DERR("srq_resize: cannot find srq resource\n");
6014                 return (EINVAL);
6015         }
6016         ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);
6017 
6018         hca_attrp = &ia_rp->ia_hca->hca_attr;
6019         if (args.srqr_new_size > hca_attrp->hca_max_srqs_sz) {
6020                 DERR("srq_resize: invalid srq size %d", args.srqr_new_size);
6021                 retval = EINVAL;
6022                 goto cleanup;
6023         }
6024 
6025         mutex_enter(&srq_rp->srq_lock);
6026         /*
6027          * If ibt_resize_srq fails that it is primarily due to resource
6028          * shortage. Per IB spec resize will never loose events and
6029          * a resize error leaves the SRQ intact. Therefore even if the
6030          * resize request fails we proceed and get the mapping data
6031          * from the SRQ so that the library can mmap it.
6032          */
6033         status = ibt_modify_srq(srq_rp->srq_hdl, IBT_SRQ_SET_SIZE,
6034             args.srqr_new_size, 0, &args.srqr_real_size);
6035         if (status != IBT_SUCCESS) {
6036                 /* we return the size of the old CQ if resize fails */
6037                 args.srqr_real_size = srq_rp->srq_real_size;
6038                 ASSERT(status != IBT_SRQ_HDL_INVALID);
6039                 DERR("srq_resize: ibt_modify_srq failed:%d\n", status);
6040         } else {
6041                 srq_rp->srq_real_size = args.srqr_real_size;
6042         }
6043         mutex_exit(&srq_rp->srq_lock);
6044 
6045 
6046         D2("srq_resize(%d): done new_sz(%u) real_sz(%u)\n",
6047             DAPLKA_RS_RNUM(srq_rp), args.srqr_new_size, args.srqr_real_size);
6048 
6049         /* Get HCA-specific data_out info */
6050         status = ibt_ci_data_out(srq_rp->srq_hca_hdl,
6051             IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
6052             &args.srqr_data_out, sizeof (args.srqr_data_out));
6053         if (status != IBT_SUCCESS) {
6054                 DERR("srq_resize: ibt_ci_data_out error(%d)\n", status);
6055                 /* return ibt_ci_data_out status */
6056                 *rvalp = (int)status;
6057                 retval = 0;
6058                 goto cleanup;
6059         }
6060 
6061         retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_resize_t),
6062             mode);
6063         if (retval != 0) {
6064                 DERR("srq_resize: copyout error %d\n", retval);
6065                 retval = EFAULT;
6066                 goto cleanup;
6067         }
6068 
6069 cleanup:;
6070         if (srq_rp != NULL) {
6071                 DAPLKA_RS_UNREF(srq_rp);
6072         }
6073         return (retval);
6074 }
6075 
6076 /*
6077  * Frees an SRQ resource.
6078  */
6079 /* ARGSUSED */
6080 static int
6081 daplka_srq_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6082     cred_t *cred, int *rvalp)
6083 {
6084         daplka_srq_resource_t   *srq_rp = NULL;
6085         dapl_srq_free_t         args;
6086         int                     retval;
6087 
6088         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_free_t), mode);
6089         if (retval != 0) {
6090                 DERR("srq_free: copyin error %d\n", retval);
6091                 return (EFAULT);
6092         }
6093 
6094         retval = daplka_hash_remove(&ia_rp->ia_srq_htbl,
6095             args.srqf_hkey, (void **)&srq_rp);
6096         if (retval != 0 || srq_rp == NULL) {
6097                 /*
6098                  * this is only possible if we have two threads
6099                  * calling ep_free in parallel.
6100                  */
6101                 DERR("srq_free: cannot find resource retval(%d) 0x%llx\n",
6102                     retval, args.srqf_hkey);
6103                 return (EINVAL);
6104         }
6105 
6106         /* UNREF calls the actual free function when refcnt is zero */
6107         DAPLKA_RS_UNREF(srq_rp);
6108         return (0);
6109 }
6110 
6111 /*
6112  * destroys a SRQ resource.
6113  * called when refcnt drops to zero.
6114  */
6115 static int
6116 daplka_srq_destroy(daplka_resource_t *gen_rp)
6117 {
6118         daplka_srq_resource_t   *srq_rp = (daplka_srq_resource_t *)gen_rp;
6119         ibt_status_t            status;
6120 
6121         ASSERT(DAPLKA_RS_REFCNT(srq_rp) == 0);
6122 
6123         D3("srq_destroy: entering, srq_rp 0x%p, rnum %d\n",
6124             srq_rp, DAPLKA_RS_RNUM(srq_rp));
6125         /*
6126          * destroy the srq
6127          */
6128         if (srq_rp->srq_hdl != NULL) {
6129                 status = daplka_ibt_free_srq(srq_rp, srq_rp->srq_hdl);
6130                 if (status != IBT_SUCCESS) {
6131                         DERR("srq_destroy: ibt_free_srq returned %d\n",
6132                             status);
6133                 }
6134                 srq_rp->srq_hdl = NULL;
6135                 D3("srq_destroy: srq freed, rnum %d\n", DAPLKA_RS_RNUM(srq_rp));
6136         }
6137         /*
6138          * release all references
6139          */
6140         if (srq_rp->srq_pd_res != NULL) {
6141                 DAPLKA_RS_UNREF(srq_rp->srq_pd_res);
6142                 srq_rp->srq_pd_res = NULL;
6143         }
6144 
6145         mutex_destroy(&srq_rp->srq_lock);
6146         DAPLKA_RS_FINI(srq_rp);
6147         kmem_free(srq_rp, sizeof (daplka_srq_resource_t));
6148         D3("srq_destroy: exiting, srq_rp 0x%p\n", srq_rp);
6149         return (0);
6150 }
6151 
6152 static void
6153 daplka_hash_srq_free(void *obj)
6154 {
6155         daplka_srq_resource_t *srq_rp = (daplka_srq_resource_t *)obj;
6156 
6157         ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);
6158         DAPLKA_RS_UNREF(srq_rp);
6159 }
6160 
6161 /*
6162  * This function tells the CM to start listening on a service id.
6163  * It must be called by the passive side client before the client
6164  * can receive connection requests from remote endpoints. If the
6165  * client specifies a non-zero service id (connection qualifier in
6166  * dapl terms), this function will attempt to bind to this service
6167  * id and return an error if the id is already in use. If the client
6168  * specifies zero as the service id, this function will try to find
6169  * the next available service id and return it back to the client.
6170  * To support the cr_handoff function, this function will, in addition
6171  * to creating and inserting an SP resource into the per-IA SP hash
6172  * table, insert the SP resource into a global SP table. This table
6173  * maintains all active service points created by all dapl clients.
6174  * CR handoff locates the target SP by iterating through this global
6175  * table.
6176  */
6177 /* ARGSUSED */
6178 static int
6179 daplka_service_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6180         cred_t *cred, int *rvalp)
6181 {
6182         daplka_evd_resource_t   *evd_rp = NULL;
6183         daplka_sp_resource_t    *sp_rp = NULL;
6184         dapl_service_register_t args;
6185         ibt_srv_desc_t          sd_args;
6186         ibt_srv_bind_t          sb_args;
6187         ibt_status_t            status;
6188         ib_svc_id_t             retsid = 0;
6189         uint64_t                sp_hkey = 0;
6190         boolean_t               bumped = B_FALSE;
6191         int                     backlog_size;
6192         int                     retval = 0;
6193 
6194         retval = ddi_copyin((void *)arg, &args,
6195             sizeof (dapl_service_register_t), mode);
6196         if (retval != 0) {
6197                 DERR("service_register: copyin error %d\n", retval);
6198                 return (EINVAL);
6199         }
6200 
6201         sp_rp = kmem_zalloc(sizeof (*sp_rp), daplka_km_flags);
6202         if (sp_rp == NULL) {
6203                 DERR("service_register: cannot allocate sp resource\n");
6204                 return (ENOMEM);
6205         }
6206         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp))
6207         DAPLKA_RS_INIT(sp_rp, DAPL_TYPE_SP,
6208             DAPLKA_RS_RNUM(ia_rp), daplka_sp_destroy);
6209 
6210         /* check if evd exists */
6211         evd_rp = (daplka_evd_resource_t *)
6212             daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.sr_evd_hkey);
6213         if (evd_rp == NULL) {
6214                 DERR("service_register: evd resource not found\n");
6215                 retval = EINVAL;
6216                 goto cleanup;
6217         }
6218         /*
6219          * initialize backlog size
6220          */
6221         if (evd_rp && evd_rp->evd_cq_real_size > 0) {
6222                 backlog_size = evd_rp->evd_cq_real_size + 1;
6223         } else {
6224                 backlog_size = DAPLKA_DEFAULT_SP_BACKLOG;
6225         }
6226         D2("service_register: args.sr_sid = %llu\n", (longlong_t)args.sr_sid);
6227 
6228         /* save the userland sp ptr */
6229         sp_rp->sp_cookie = args.sr_sp_cookie;
6230         sp_rp->sp_backlog_size = backlog_size;
6231         D3("service_register: backlog set to %d\n", sp_rp->sp_backlog_size);
6232         sp_rp->sp_backlog = kmem_zalloc(sp_rp->sp_backlog_size *
6233             sizeof (daplka_sp_conn_pend_t), daplka_km_flags);
6234 
6235         /* save evd resource pointer */
6236         sp_rp->sp_evd_res = evd_rp;
6237 
6238         /*
6239          * save ruid here so that we can do a comparison later
6240          * when someone does cr_handoff. the check will prevent
6241          * a malicious app from passing a CR to us.
6242          */
6243         sp_rp->sp_ruid = crgetruid(cred);
6244 
6245         /* fill in args for register_service */
6246         sd_args.sd_ud_handler = NULL;
6247         sd_args.sd_handler = daplka_cm_service_handler;
6248         sd_args.sd_flags = IBT_SRV_NO_FLAGS;
6249 
6250         status = ibt_register_service(daplka_dev->daplka_clnt_hdl,
6251             &sd_args, args.sr_sid, 1, &sp_rp->sp_srv_hdl, &retsid);
6252 
6253         if (status != IBT_SUCCESS) {
6254                 DERR("service_register: ibt_register_service returned %d\n",
6255                     status);
6256                 *rvalp = (int)status;
6257                 retval = 0;
6258                 goto cleanup;
6259         }
6260         /* save returned sid */
6261         sp_rp->sp_conn_qual = retsid;
6262         args.sr_retsid = retsid;
6263 
6264         /* fill in args for bind_service */
6265         sb_args.sb_pkey = ia_rp->ia_port_pkey;
6266         sb_args.sb_lease = 0xffffffff;
6267         sb_args.sb_key[0] = 0x1234;
6268         sb_args.sb_key[1] = 0x5678;
6269         sb_args.sb_name = DAPLKA_DRV_NAME;
6270 
6271         D2("service_register: bind(0x%llx:0x%llx)\n",
6272             (longlong_t)ia_rp->ia_hca_sgid.gid_prefix,
6273             (longlong_t)ia_rp->ia_hca_sgid.gid_guid);
6274 
6275         status = ibt_bind_service(sp_rp->sp_srv_hdl, ia_rp->ia_hca_sgid,
6276             &sb_args, (void *)sp_rp, &sp_rp->sp_bind_hdl);
6277         if (status != IBT_SUCCESS) {
6278                 DERR("service_register: ibt_bind_service returned %d\n",
6279                     status);
6280                 *rvalp = (int)status;
6281                 retval = 0;
6282                 goto cleanup;
6283         }
6284 
6285         /*
6286          * need to bump refcnt because the global hash table will
6287          * have a reference to sp_rp
6288          */
6289         DAPLKA_RS_REF(sp_rp);
6290         bumped = B_TRUE;
6291 
6292         /* insert into global sp hash table */
6293         sp_rp->sp_global_hkey = 0;
6294         retval = daplka_hash_insert(&daplka_global_sp_htbl,
6295             &sp_rp->sp_global_hkey, (void *)sp_rp);
6296         if (retval != 0) {
6297                 DERR("service_register: cannot insert sp resource\n");
6298                 goto cleanup;
6299         }
6300         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sp_rp))
6301 
6302         /* insert into per-IA sp hash table */
6303         retval = daplka_hash_insert(&ia_rp->ia_sp_htbl,
6304             &sp_hkey, (void *)sp_rp);
6305         if (retval != 0) {
6306                 DERR("service_register: cannot insert sp resource\n");
6307                 goto cleanup;
6308         }
6309 
6310         /* pass index to application */
6311         args.sr_sp_hkey = sp_hkey;
6312         retval = ddi_copyout(&args, (void *)arg,
6313             sizeof (dapl_service_register_t), mode);
6314         if (retval != 0) {
6315                 DERR("service_register: copyout error %d\n", retval);
6316                 retval = EFAULT;
6317                 goto cleanup;
6318         }
6319         return (0);
6320 
6321 cleanup:;
6322         ASSERT(sp_rp != NULL);
6323         /* remove from ia table */
6324         if (sp_hkey != 0) {
6325                 daplka_sp_resource_t *free_rp = NULL;
6326 
6327                 (void) daplka_hash_remove(&ia_rp->ia_sp_htbl,
6328                     sp_hkey, (void **)&free_rp);
6329                 if (free_rp != sp_rp) {
6330                         DERR("service_register: cannot remove sp\n");
6331                         /*
6332                          * we can only get here if another thread
6333                          * has completed the cleanup in svc_deregister
6334                          */
6335                         return (retval);
6336                 }
6337         }
6338 
6339         /* remove from global table */
6340         if (sp_rp->sp_global_hkey != 0) {
6341                 daplka_sp_resource_t *free_rp = NULL;
6342 
6343                 /*
6344                  * we get here if either the hash_insert into
6345                  * ia_sp_htbl failed or the ddi_copyout failed.
6346                  * hash_insert failure implies that we are the
6347                  * only thread with a reference to sp. ddi_copyout
6348                  * failure implies that svc_deregister could have
6349                  * picked up the sp and destroyed it. but since
6350                  * we got to this point, we must have removed
6351                  * the sp ourselves in hash_remove above and
6352                  * that the sp can be destroyed by us.
6353                  */
6354                 (void) daplka_hash_remove(&daplka_global_sp_htbl,
6355                     sp_rp->sp_global_hkey, (void **)&free_rp);
6356                 if (free_rp != sp_rp) {
6357                         DERR("service_register: cannot remove sp\n");
6358                         /*
6359                          * this case is impossible. see explanation above.
6360                          */
6361                         ASSERT(B_FALSE);
6362                         return (retval);
6363                 }
6364                 sp_rp->sp_global_hkey = 0;
6365         }
6366         /* unreference sp */
6367         if (bumped) {
6368                 DAPLKA_RS_UNREF(sp_rp);
6369         }
6370 
6371         /* destroy sp resource */
6372         DAPLKA_RS_UNREF(sp_rp);
6373         return (retval);
6374 }
6375 
6376 /*
6377  * deregisters the service and removes SP from the global table.
6378  */
6379 /* ARGSUSED */
6380 static int
6381 daplka_service_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6382         cred_t *cred, int *rvalp)
6383 {
6384         dapl_service_deregister_t       args;
6385         daplka_sp_resource_t            *sp_rp = NULL, *g_sp_rp = NULL;
6386         int                             retval;
6387 
6388         retval = ddi_copyin((void *)arg, &args,
6389             sizeof (dapl_service_deregister_t), mode);
6390 
6391         if (retval != 0) {
6392                 DERR("service_deregister: copyin error %d\n", retval);
6393                 return (EINVAL);
6394         }
6395 
6396         retval = daplka_hash_remove(&ia_rp->ia_sp_htbl,
6397             args.sdr_sp_hkey, (void **)&sp_rp);
6398         if (retval != 0 || sp_rp == NULL) {
6399                 DERR("service_deregister: cannot find sp resource\n");
6400                 return (EINVAL);
6401         }
6402 
6403         retval = daplka_hash_remove(&daplka_global_sp_htbl,
6404             sp_rp->sp_global_hkey, (void **)&g_sp_rp);
6405         if (retval != 0 || g_sp_rp == NULL) {
6406                 DERR("service_deregister: cannot find sp resource\n");
6407         }
6408 
6409         /* remove the global reference */
6410         if (g_sp_rp == sp_rp) {
6411                 DAPLKA_RS_UNREF(g_sp_rp);
6412         }
6413 
6414         DAPLKA_RS_UNREF(sp_rp);
6415         return (0);
6416 }
6417 
6418 /*
6419  * destroys a service point.
6420  * called when the refcnt drops to zero.
6421  */
6422 static int
6423 daplka_sp_destroy(daplka_resource_t *gen_rp)
6424 {
6425         daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)gen_rp;
6426         ibt_status_t status;
6427 
6428         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp))
6429         ASSERT(DAPLKA_RS_REFCNT(sp_rp) == 0);
6430         D3("sp_destroy: entering, sp_rp %p, rnum %d\n",
6431             sp_rp, DAPLKA_RS_RNUM(sp_rp));
6432 
6433         /*
6434          * it is possible for pending connections to remain
6435          * on an SP. We need to clean them up here.
6436          */
6437         if (sp_rp->sp_backlog != NULL) {
6438                 ibt_cm_proceed_reply_t proc_reply;
6439                 int i, cnt = 0;
6440                 void *spcp_sidp;
6441 
6442                 for (i = 0; i < sp_rp->sp_backlog_size; i++) {
6443                         if (sp_rp->sp_backlog[i].spcp_state ==
6444                             DAPLKA_SPCP_PENDING) {
6445                                 cnt++;
6446                                 if (sp_rp->sp_backlog[i].spcp_sid == NULL) {
6447                                         DERR("sp_destroy: "
6448                                             "spcp_sid == NULL!\n");
6449                                         continue;
6450                                 }
6451                                 mutex_enter(&sp_rp->sp_lock);
6452                                 spcp_sidp = sp_rp->sp_backlog[i].spcp_sid;
6453                                 sp_rp->sp_backlog[i].spcp_state =
6454                                     DAPLKA_SPCP_INIT;
6455                                 sp_rp->sp_backlog[i].spcp_sid = NULL;
6456                                 sp_rp->sp_backlog[i].spcp_req_len = 0;
6457                                 mutex_exit(&sp_rp->sp_lock);
6458                                 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV,
6459                                     spcp_sidp,
6460                                     IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
6461                                 if (status != IBT_SUCCESS) {
6462                                         DERR("sp_destroy: proceed failed %d\n",
6463                                             status);
6464                                 }
6465                         }
6466                 }
6467                 if (cnt > 0) {
6468                         DERR("sp_destroy: found %d pending "
6469                             "connections\n", cnt);
6470                 }
6471         }
6472 
6473         if (sp_rp->sp_srv_hdl != NULL && sp_rp->sp_bind_hdl != NULL) {
6474                 status = ibt_unbind_service(sp_rp->sp_srv_hdl,
6475                     sp_rp->sp_bind_hdl);
6476                 if (status != IBT_SUCCESS) {
6477                         DERR("sp_destroy: ibt_unbind_service "
6478                             "failed: %d\n", status);
6479                 }
6480         }
6481 
6482         if (sp_rp->sp_srv_hdl != NULL) {
6483                 status = ibt_deregister_service(daplka_dev->daplka_clnt_hdl,
6484                     sp_rp->sp_srv_hdl);
6485                 if (status != IBT_SUCCESS) {
6486                         DERR("sp_destroy: ibt_deregister_service "
6487                             "failed: %d\n", status);
6488                 }
6489         }
6490         if (sp_rp->sp_backlog != NULL) {
6491                 kmem_free(sp_rp->sp_backlog,
6492                     sp_rp->sp_backlog_size * sizeof (daplka_sp_conn_pend_t));
6493                 sp_rp->sp_backlog = NULL;
6494                 sp_rp->sp_backlog_size = 0;
6495         }
6496 
6497         /*
6498          * release reference to evd
6499          */
6500         if (sp_rp->sp_evd_res != NULL) {
6501                 DAPLKA_RS_UNREF(sp_rp->sp_evd_res);
6502         }
6503         sp_rp->sp_bind_hdl = NULL;
6504         sp_rp->sp_srv_hdl = NULL;
6505         DAPLKA_RS_FINI(sp_rp);
6506         kmem_free(sp_rp, sizeof (*sp_rp));
6507         D3("sp_destroy: exiting, sp_rp %p\n", sp_rp);
6508         return (0);
6509 }
6510 
6511 /*
6512  * this function is called by daplka_hash_destroy for
6513  * freeing SP resource objects
6514  */
6515 static void
6516 daplka_hash_sp_free(void *obj)
6517 {
6518         daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;
6519         daplka_sp_resource_t *g_sp_rp;
6520         int retval;
6521 
6522         ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
6523 
6524         retval = daplka_hash_remove(&daplka_global_sp_htbl,
6525             sp_rp->sp_global_hkey, (void **)&g_sp_rp);
6526         if (retval != 0 || g_sp_rp == NULL) {
6527                 DERR("sp_free: cannot find sp resource\n");
6528         }
6529         if (g_sp_rp == sp_rp) {
6530                 DAPLKA_RS_UNREF(g_sp_rp);
6531         }
6532 
6533         DAPLKA_RS_UNREF(sp_rp);
6534 }
6535 
6536 static void
6537 daplka_hash_sp_unref(void *obj)
6538 {
6539         daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;
6540 
6541         ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
6542         DAPLKA_RS_UNREF(sp_rp);
6543 }
6544 
6545 /*
6546  * Passive side CM handlers
6547  */
6548 
6549 /*
6550  * processes the REQ_RCV event
6551  */
6552 /* ARGSUSED */
6553 static ibt_cm_status_t
6554 daplka_cm_service_req(daplka_sp_resource_t *spp, ibt_cm_event_t *event,
6555     ibt_cm_return_args_t *ret_args, void *pr_data, ibt_priv_data_len_t pr_len)
6556 {
6557         daplka_sp_conn_pend_t   *conn = NULL;
6558         daplka_evd_event_t      *cr_ev = NULL;
6559         ibt_cm_status_t         cm_status = IBT_CM_DEFAULT;
6560         uint16_t                bkl_index;
6561         ibt_status_t            status;
6562 
6563         /*
6564          * acquire a slot in the connection backlog of this service point
6565          */
6566         mutex_enter(&spp->sp_lock);
6567         for (bkl_index = 0; bkl_index < spp->sp_backlog_size; bkl_index++) {
6568                 if (spp->sp_backlog[bkl_index].spcp_state == DAPLKA_SPCP_INIT) {
6569                         conn = &spp->sp_backlog[bkl_index];
6570                         ASSERT(conn->spcp_sid == NULL);
6571                         conn->spcp_state = DAPLKA_SPCP_PENDING;
6572                         conn->spcp_sid = event->cm_session_id;
6573                         break;
6574                 }
6575         }
6576         mutex_exit(&spp->sp_lock);
6577 
6578         /*
6579          * too many pending connections
6580          */
6581         if (bkl_index == spp->sp_backlog_size) {
6582                 DERR("service_req: connection pending exceeded %d limit\n",
6583                     spp->sp_backlog_size);
6584                 return (IBT_CM_NO_RESOURCE);
6585         }
6586         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*conn))
6587 
6588         /*
6589          * save data for cr_handoff
6590          */
6591         if (pr_data != NULL && pr_len > 0) {
6592                 int trunc_len = pr_len;
6593 
6594                 if (trunc_len > DAPL_MAX_PRIVATE_DATA_SIZE) {
6595                         DERR("service_req: private data truncated\n");
6596                         trunc_len = DAPL_MAX_PRIVATE_DATA_SIZE;
6597                 }
6598                 conn->spcp_req_len = trunc_len;
6599                 bcopy(pr_data, conn->spcp_req_data, trunc_len);
6600         } else {
6601                 conn->spcp_req_len = 0;
6602         }
6603         conn->spcp_rdma_ra_in = event->cm_event.req.req_rdma_ra_in;
6604         conn->spcp_rdma_ra_out = event->cm_event.req.req_rdma_ra_out;
6605 
6606         /*
6607          * create a CR event
6608          */
6609         cr_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6610         if (cr_ev == NULL) {
6611                 DERR("service_req: could not alloc cr_ev\n");
6612                 cm_status = IBT_CM_NO_RESOURCE;
6613                 goto cleanup;
6614         }
6615 
6616         cr_ev->ee_next = NULL;
6617         cr_ev->ee_cmev.ec_cm_cookie = spp->sp_cookie;
6618         cr_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6619         cr_ev->ee_cmev.ec_cm_psep_cookie = DAPLKA_CREATE_PSEP_COOKIE(bkl_index);
6620         /*
6621          * save the requestor gid
6622          * daplka_event_poll needs this if this is a third party REQ_RCV
6623          */
6624         cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix =
6625             event->cm_event.req.req_prim_addr.av_dgid.gid_prefix;
6626         cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid =
6627             event->cm_event.req.req_prim_addr.av_dgid.gid_guid;
6628 
6629         /*
6630          * set event type
6631          */
6632         if (pr_len == 0) {
6633                 cr_ev->ee_cmev.ec_cm_ev_type =
6634                     DAPL_IB_CME_CONNECTION_REQUEST_PENDING;
6635         } else {
6636                 cr_ev->ee_cmev.ec_cm_ev_priv_data =
6637                     kmem_zalloc(pr_len, KM_NOSLEEP);
6638                 if (cr_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
6639                         DERR("service_req: could not alloc priv\n");
6640                         cm_status = IBT_CM_NO_RESOURCE;
6641                         goto cleanup;
6642                 }
6643                 bcopy(pr_data, cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6644                 cr_ev->ee_cmev.ec_cm_ev_type =
6645                     DAPL_IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA;
6646         }
6647         cr_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
6648 
6649         /*
6650          * tell the active side to expect the processing time to be
6651          * at most equal to daplka_cm_delay
6652          */
6653         status = ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id,
6654             daplka_cm_delay, NULL, 0);
6655         if (status != IBT_SUCCESS) {
6656                 DERR("service_req: ibt_cm_delay failed %d\n", status);
6657                 cm_status = IBT_CM_NO_RESOURCE;
6658                 goto cleanup;
6659         }
6660 
6661         /*
6662          * enqueue cr_ev onto the cr_events list of the EVD
6663          * corresponding to the SP
6664          */
6665         D2("service_req: enqueue event(%p) evdp(%p) priv_data(%p) "
6666             "priv_len(%d) psep(0x%llx)\n", cr_ev, spp->sp_evd_res,
6667             cr_ev->ee_cmev.ec_cm_ev_priv_data,
6668             (int)cr_ev->ee_cmev.ec_cm_ev_priv_data_len,
6669             (longlong_t)cr_ev->ee_cmev.ec_cm_psep_cookie);
6670 
6671         daplka_evd_wakeup(spp->sp_evd_res,
6672             &spp->sp_evd_res->evd_cr_events, cr_ev);
6673 
6674         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*conn))
6675         return (IBT_CM_DEFER);
6676 
6677 cleanup:;
6678         /*
6679          * free the cr event
6680          */
6681         if (cr_ev != NULL) {
6682                 if (cr_ev->ee_cmev.ec_cm_ev_priv_data != NULL) {
6683                         kmem_free(cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6684                         cr_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6685                         cr_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6686                 }
6687                 kmem_free(cr_ev, sizeof (daplka_evd_event_t));
6688         }
6689         /*
6690          * release our slot in the backlog array
6691          */
6692         if (conn != NULL) {
6693                 mutex_enter(&spp->sp_lock);
6694                 ASSERT(conn->spcp_state == DAPLKA_SPCP_PENDING);
6695                 ASSERT(conn->spcp_sid == event->cm_session_id);
6696                 conn->spcp_state = DAPLKA_SPCP_INIT;
6697                 conn->spcp_req_len = 0;
6698                 conn->spcp_sid = NULL;
6699                 mutex_exit(&spp->sp_lock);
6700         }
6701         return (cm_status);
6702 }
6703 
6704 /*
6705  * processes the CONN_CLOSED event
6706  */
6707 /* ARGSUSED */
6708 static ibt_cm_status_t
6709 daplka_cm_service_conn_closed(daplka_sp_resource_t *sp_rp,
6710     ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args,
6711     void *priv_data, ibt_priv_data_len_t len)
6712 {
6713         daplka_ep_resource_t    *ep_rp;
6714         daplka_evd_event_t      *disc_ev;
6715         uint32_t                old_state, new_state;
6716 
6717         ep_rp = (daplka_ep_resource_t *)
6718             ibt_get_chan_private(event->cm_channel);
6719         if (ep_rp == NULL) {
6720                 DERR("service_conn_closed: ep_rp == NULL\n");
6721                 return (IBT_CM_ACCEPT);
6722         }
6723 
6724         /*
6725          * verify that the ep_state is either CONNECTED or
6726          * DISCONNECTING. if it is not in either states return
6727          * without generating an event.
6728          */
6729         new_state = old_state = daplka_ep_get_state(ep_rp);
6730         if (old_state != DAPLKA_EP_STATE_CONNECTED &&
6731             old_state != DAPLKA_EP_STATE_DISCONNECTING) {
6732                 /*
6733                  * we can get here if the connection is being aborted
6734                  */
6735                 D2("service_conn_closed: conn aborted, state = %d, "
6736                     "closed = %d\n", old_state, (int)event->cm_event.closed);
6737                 daplka_ep_set_state(ep_rp, old_state, new_state);
6738                 return (IBT_CM_ACCEPT);
6739         }
6740 
6741         /*
6742          * create a DAPL_IB_CME_DISCONNECTED event
6743          */
6744         disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6745         if (disc_ev == NULL) {
6746                 DERR("service_conn_closed: cannot alloc disc_ev\n");
6747                 daplka_ep_set_state(ep_rp, old_state, new_state);
6748                 return (IBT_CM_ACCEPT);
6749         }
6750 
6751         disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
6752         disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6753         disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6754         disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6755         disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6756         disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6757 
6758         D2("service_conn_closed: enqueue event(%p) evdp(%p) psep(0x%llx)\n",
6759             disc_ev, sp_rp->sp_evd_res, (longlong_t)ep_rp->ep_psep_cookie);
6760 
6761         /*
6762          * transition ep_state to DISCONNECTED
6763          */
6764         new_state = DAPLKA_EP_STATE_DISCONNECTED;
6765         daplka_ep_set_state(ep_rp, old_state, new_state);
6766 
6767         /*
6768          * enqueue event onto the conn_evd owned by ep_rp
6769          */
6770         daplka_evd_wakeup(ep_rp->ep_conn_evd,
6771             &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
6772 
6773         return (IBT_CM_ACCEPT);
6774 }
6775 
6776 /*
6777  * processes the CONN_EST event
6778  */
6779 /* ARGSUSED */
6780 static ibt_cm_status_t
6781 daplka_cm_service_conn_est(daplka_sp_resource_t *sp_rp, ibt_cm_event_t *event,
6782     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6783 {
6784         daplka_ep_resource_t    *ep_rp;
6785         daplka_evd_event_t      *conn_ev;
6786         void                    *pr_data = event->cm_priv_data;
6787         ibt_priv_data_len_t     pr_len = event->cm_priv_data_len;
6788         uint32_t                old_state, new_state;
6789 
6790         ep_rp = (daplka_ep_resource_t *)
6791             ibt_get_chan_private(event->cm_channel);
6792         if (ep_rp == NULL) {
6793                 DERR("service_conn_est: ep_rp == NULL\n");
6794                 return (IBT_CM_ACCEPT);
6795         }
6796 
6797         /*
6798          * verify that ep_state is ACCEPTING. if it is not in this
6799          * state, return without generating an event.
6800          */
6801         new_state = old_state = daplka_ep_get_state(ep_rp);
6802         if (old_state != DAPLKA_EP_STATE_ACCEPTING) {
6803                 /*
6804                  * we can get here if the connection is being aborted
6805                  */
6806                 DERR("service_conn_est: conn aborted, state = %d\n",
6807                     old_state);
6808                 daplka_ep_set_state(ep_rp, old_state, new_state);
6809                 return (IBT_CM_ACCEPT);
6810         }
6811 
6812         /*
6813          * create a DAPL_IB_CME_CONNECTED event
6814          */
6815         conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6816         if (conn_ev == NULL) {
6817                 DERR("service_conn_est: conn_ev alloc failed\n");
6818                 daplka_ep_set_state(ep_rp, old_state, new_state);
6819                 return (IBT_CM_ACCEPT);
6820         }
6821 
6822         conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
6823         conn_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6824         conn_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6825         conn_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6826 
6827         /*
6828          * copy private data into event
6829          */
6830         if (pr_len > 0) {
6831                 conn_ev->ee_cmev.ec_cm_ev_priv_data =
6832                     kmem_zalloc(pr_len, KM_NOSLEEP);
6833                 if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
6834                         DERR("service_conn_est: pr_data alloc failed\n");
6835                         daplka_ep_set_state(ep_rp, old_state, new_state);
6836                         kmem_free(conn_ev, sizeof (daplka_evd_event_t));
6837                         return (IBT_CM_ACCEPT);
6838                 }
6839                 bcopy(pr_data, conn_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6840         }
6841         conn_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
6842 
6843         D2("service_conn_est: enqueue event(%p) evdp(%p)\n",
6844             conn_ev, ep_rp->ep_conn_evd);
6845 
6846         /*
6847          * transition ep_state to CONNECTED
6848          */
6849         new_state = DAPLKA_EP_STATE_CONNECTED;
6850         daplka_ep_set_state(ep_rp, old_state, new_state);
6851 
6852         /*
6853          * enqueue event onto the conn_evd owned by ep_rp
6854          */
6855         daplka_evd_wakeup(ep_rp->ep_conn_evd,
6856             &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);
6857 
6858         return (IBT_CM_ACCEPT);
6859 }
6860 
6861 /*
6862  * processes the FAILURE event
6863  */
6864 /* ARGSUSED */
6865 static ibt_cm_status_t
6866 daplka_cm_service_event_failure(daplka_sp_resource_t *sp_rp,
6867     ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args, void *priv_data,
6868     ibt_priv_data_len_t len)
6869 {
6870         daplka_evd_event_t      *disc_ev;
6871         daplka_ep_resource_t    *ep_rp;
6872         uint32_t                old_state, new_state;
6873         ibt_rc_chan_query_attr_t chan_attrs;
6874         ibt_status_t            status;
6875 
6876         /*
6877          * check that we still have a valid cm_channel before continuing
6878          */
6879         if (event->cm_channel == NULL) {
6880                 DERR("serice_event_failure: event->cm_channel == NULL\n");
6881                 return (IBT_CM_ACCEPT);
6882         }
6883         ep_rp = (daplka_ep_resource_t *)
6884             ibt_get_chan_private(event->cm_channel);
6885         if (ep_rp == NULL) {
6886                 DERR("service_event_failure: ep_rp == NULL\n");
6887                 return (IBT_CM_ACCEPT);
6888         }
6889 
6890         /*
6891          * verify that ep_state is ACCEPTING or DISCONNECTING. if it
6892          * is not in either state, return without generating an event.
6893          */
6894         new_state = old_state = daplka_ep_get_state(ep_rp);
6895         if (old_state != DAPLKA_EP_STATE_ACCEPTING &&
6896             old_state != DAPLKA_EP_STATE_DISCONNECTING) {
6897                 /*
6898                  * we can get here if the connection is being aborted
6899                  */
6900                 DERR("service_event_failure: conn aborted, state = %d, "
6901                     "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
6902                     (int)event->cm_event.failed.cf_code,
6903                     (int)event->cm_event.failed.cf_msg,
6904                     (int)event->cm_event.failed.cf_reason);
6905 
6906                 daplka_ep_set_state(ep_rp, old_state, new_state);
6907                 return (IBT_CM_ACCEPT);
6908         }
6909 
6910         bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
6911         status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
6912 
6913         if ((status == IBT_SUCCESS) &&
6914             (chan_attrs.rc_state != IBT_STATE_ERROR)) {
6915                 DERR("service_event_failure: conn abort qpn %d state %d\n",
6916                     chan_attrs.rc_qpn, chan_attrs.rc_state);
6917 
6918                 /* explicit transition the QP to ERROR state */
6919                 status = ibt_flush_channel(ep_rp->ep_chan_hdl);
6920         }
6921 
6922         /*
6923          * create an event
6924          */
6925         disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6926         if (disc_ev == NULL) {
6927                 DERR("service_event_failure: cannot alloc disc_ev\n");
6928                 daplka_ep_set_state(ep_rp, old_state, new_state);
6929                 return (IBT_CM_ACCEPT);
6930         }
6931 
6932         /*
6933          * fill in the appropriate event type
6934          */
6935         if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
6936                 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
6937         } else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
6938                 switch (event->cm_event.failed.cf_reason) {
6939                 case IBT_CM_INVALID_CID:
6940                         disc_ev->ee_cmev.ec_cm_ev_type =
6941                             DAPL_IB_CME_DESTINATION_REJECT;
6942                         break;
6943                 default:
6944                         disc_ev->ee_cmev.ec_cm_ev_type =
6945                             DAPL_IB_CME_LOCAL_FAILURE;
6946                         break;
6947                 }
6948         } else {
6949                 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
6950         }
6951         disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6952         disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6953         disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6954         disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6955         disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6956 
6957         D2("service_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
6958             "cf_msg(%d) cf_reason(%d) psep(0x%llx)\n", disc_ev,
6959             ep_rp->ep_conn_evd, (int)event->cm_event.failed.cf_code,
6960             (int)event->cm_event.failed.cf_msg,
6961             (int)event->cm_event.failed.cf_reason,
6962             (longlong_t)ep_rp->ep_psep_cookie);
6963 
6964         /*
6965          * transition ep_state to DISCONNECTED
6966          */
6967         new_state = DAPLKA_EP_STATE_DISCONNECTED;
6968         daplka_ep_set_state(ep_rp, old_state, new_state);
6969 
6970         /*
6971          * enqueue event onto the conn_evd owned by ep_rp
6972          */
6973         daplka_evd_wakeup(ep_rp->ep_conn_evd,
6974             &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
6975 
6976         return (IBT_CM_ACCEPT);
6977 }
6978 
6979 /*
6980  * this is the passive side CM handler. it gets registered
6981  * when an SP resource is created in daplka_service_register.
6982  */
6983 static ibt_cm_status_t
6984 daplka_cm_service_handler(void *cm_private, ibt_cm_event_t *event,
6985 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6986 {
6987         daplka_sp_resource_t    *sp_rp = (daplka_sp_resource_t *)cm_private;
6988 
6989         if (sp_rp == NULL) {
6990                 DERR("service_handler: sp_rp == NULL\n");
6991                 return (IBT_CM_NO_RESOURCE);
6992         }
6993         /*
6994          * default is not to return priv data
6995          */
6996         if (ret_args != NULL) {
6997                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args))
6998                 ret_args->cm_ret_len = 0;
6999         }
7000 
7001         switch (event->cm_type) {
7002         case IBT_CM_EVENT_REQ_RCV:
7003                 D2("service_handler: IBT_CM_EVENT_REQ_RCV\n");
7004                 return (daplka_cm_service_req(sp_rp, event, ret_args,
7005                     event->cm_priv_data, event->cm_priv_data_len));
7006 
7007         case IBT_CM_EVENT_REP_RCV:
7008                 /* passive side should not receive this event */
7009                 D2("service_handler: IBT_CM_EVENT_REP_RCV\n");
7010                 return (IBT_CM_DEFAULT);
7011 
7012         case IBT_CM_EVENT_CONN_CLOSED:
7013                 D2("service_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
7014                     event->cm_event.closed);
7015                 return (daplka_cm_service_conn_closed(sp_rp, event, ret_args,
7016                     priv_data, len));
7017 
7018         case IBT_CM_EVENT_MRA_RCV:
7019                 /* passive side does default processing MRA event */
7020                 D2("service_handler: IBT_CM_EVENT_MRA_RCV\n");
7021                 return (IBT_CM_DEFAULT);
7022 
7023         case IBT_CM_EVENT_CONN_EST:
7024                 D2("service_handler: IBT_CM_EVENT_CONN_EST\n");
7025                 return (daplka_cm_service_conn_est(sp_rp, event, ret_args,
7026                     priv_data, len));
7027 
7028         case IBT_CM_EVENT_FAILURE:
7029                 D2("service_handler: IBT_CM_EVENT_FAILURE\n");
7030                 return (daplka_cm_service_event_failure(sp_rp, event, ret_args,
7031                     priv_data, len));
7032         case IBT_CM_EVENT_LAP_RCV:
7033                 /* active side had initiated a path migration operation */
7034                 D2("service_handler: IBT_CM_EVENT_LAP_RCV\n");
7035                 return (IBT_CM_ACCEPT);
7036         default:
7037                 DERR("service_handler: invalid event %d\n", event->cm_type);
7038                 break;
7039         }
7040         return (IBT_CM_DEFAULT);
7041 }
7042 
7043 /*
7044  * Active side CM handlers
7045  */
7046 
7047 /*
7048  * Processes the REP_RCV event. When the passive side accepts the
7049  * connection, this handler is called. We make a copy of the private
7050  * data into the ep so that it can be passed back to userland in when
7051  * the CONN_EST event occurs.
7052  */
7053 /* ARGSUSED */
7054 static ibt_cm_status_t
7055 daplka_cm_rc_rep_rcv(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7056     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7057 {
7058         void                    *pr_data = event->cm_priv_data;
7059         ibt_priv_data_len_t     pr_len = event->cm_priv_data_len;
7060         uint32_t                old_state, new_state;
7061 
7062         D2("rc_rep_rcv: pr_data(0x%p), pr_len(%d)\n", pr_data,
7063             (int)pr_len);
7064 
7065         ASSERT(ep_rp != NULL);
7066         new_state = old_state = daplka_ep_get_state(ep_rp);
7067         if (old_state != DAPLKA_EP_STATE_CONNECTING) {
7068                 /*
7069                  * we can get here if the connection is being aborted
7070                  */
7071                 DERR("rc_rep_rcv: conn aborted, state = %d\n", old_state);
7072                 daplka_ep_set_state(ep_rp, old_state, new_state);
7073                 return (IBT_CM_NO_CHANNEL);
7074         }
7075 
7076         /*
7077          * we do not cancel the timer here because the connection
7078          * handshake is still in progress.
7079          */
7080 
7081         /*
7082          * save the private data. it will be passed up when
7083          * the connection is established.
7084          */
7085         if (pr_len > 0) {
7086                 ep_rp->ep_priv_len = pr_len;
7087                 bcopy(pr_data, ep_rp->ep_priv_data, (size_t)pr_len);
7088         }
7089 
7090         /*
7091          * we do not actually transition to a different state.
7092          * the state will change when we get a conn_est, failure,
7093          * closed, or timeout event.
7094          */
7095         daplka_ep_set_state(ep_rp, old_state, new_state);
7096         return (IBT_CM_ACCEPT);
7097 }
7098 
7099 /*
7100  * Processes the CONN_CLOSED event. This gets called when either
7101  * the active or passive side closes the rc channel.
7102  */
7103 /* ARGSUSED */
7104 static ibt_cm_status_t
7105 daplka_cm_rc_conn_closed(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7106     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7107 {
7108         daplka_evd_event_t      *disc_ev;
7109         uint32_t                old_state, new_state;
7110 
7111         ASSERT(ep_rp != NULL);
7112         old_state = new_state = daplka_ep_get_state(ep_rp);
7113         if (old_state != DAPLKA_EP_STATE_CONNECTED &&
7114             old_state != DAPLKA_EP_STATE_DISCONNECTING) {
7115                 /*
7116                  * we can get here if the connection is being aborted
7117                  */
7118                 D2("rc_conn_closed: conn aborted, state = %d, "
7119                     "closed = %d\n", old_state, (int)event->cm_event.closed);
7120                 daplka_ep_set_state(ep_rp, old_state, new_state);
7121                 return (IBT_CM_ACCEPT);
7122         }
7123 
7124         /*
7125          * it's ok for the timer to fire at this point. the
7126          * taskq thread that processes the timer will just wait
7127          * until we are done with our state transition.
7128          */
7129         if (daplka_cancel_timer(ep_rp) != 0) {
7130                 /*
7131                  * daplka_cancel_timer returns -1 if the timer is
7132                  * being processed and 0 for all other cases.
7133                  * we need to reset ep_state to allow timer processing
7134                  * to continue.
7135                  */
7136                 DERR("rc_conn_closed: timer is being processed\n");
7137                 daplka_ep_set_state(ep_rp, old_state, new_state);
7138                 return (IBT_CM_ACCEPT);
7139         }
7140 
7141         /*
7142          * create a DAPL_IB_CME_DISCONNECTED event
7143          */
7144         disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7145         if (disc_ev == NULL) {
7146                 DERR("rc_conn_closed: could not alloc ev\n");
7147                 daplka_ep_set_state(ep_rp, old_state, new_state);
7148                 return (IBT_CM_ACCEPT);
7149         }
7150 
7151         disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
7152         disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7153         disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7154         disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
7155         disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
7156         disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
7157 
7158         D2("rc_conn_closed: enqueue event(%p) evdp(%p) closed(%d)\n",
7159             disc_ev, ep_rp->ep_conn_evd, (int)event->cm_event.closed);
7160 
7161         /*
7162          * transition ep_state to DISCONNECTED
7163          */
7164         new_state = DAPLKA_EP_STATE_DISCONNECTED;
7165         daplka_ep_set_state(ep_rp, old_state, new_state);
7166 
7167         /*
7168          * enqueue event onto the conn_evd owned by ep_rp
7169          */
7170         daplka_evd_wakeup(ep_rp->ep_conn_evd,
7171             &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
7172 
7173         return (IBT_CM_ACCEPT);
7174 }
7175 
7176 /*
7177  * processes the CONN_EST event
7178  */
7179 /* ARGSUSED */
7180 static ibt_cm_status_t
7181 daplka_cm_rc_conn_est(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7182     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7183 {
7184         daplka_evd_event_t      *conn_ev;
7185         uint32_t                old_state, new_state;
7186 
7187         ASSERT(ep_rp != NULL);
7188         old_state = new_state = daplka_ep_get_state(ep_rp);
7189         if (old_state != DAPLKA_EP_STATE_CONNECTING) {
7190                 /*
7191                  * we can get here if the connection is being aborted
7192                  */
7193                 DERR("rc_conn_est: conn aborted, state = %d\n", old_state);
7194                 daplka_ep_set_state(ep_rp, old_state, new_state);
7195                 return (IBT_CM_ACCEPT);
7196         }
7197 
7198         /*
7199          * it's ok for the timer to fire at this point. the
7200          * taskq thread that processes the timer will just wait
7201          * until we are done with our state transition.
7202          */
7203         if (daplka_cancel_timer(ep_rp) != 0) {
7204                 /*
7205                  * daplka_cancel_timer returns -1 if the timer is
7206                  * being processed and 0 for all other cases.
7207                  * we need to reset ep_state to allow timer processing
7208                  * to continue.
7209                  */
7210                 DERR("rc_conn_est: timer is being processed\n");
7211                 daplka_ep_set_state(ep_rp, old_state, new_state);
7212                 return (IBT_CM_ACCEPT);
7213         }
7214 
7215         /*
7216          * create a DAPL_IB_CME_CONNECTED event
7217          */
7218         conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7219         if (conn_ev == NULL) {
7220                 DERR("rc_conn_est: could not alloc ev\n");
7221                 daplka_ep_set_state(ep_rp, old_state, new_state);
7222                 return (IBT_CM_ACCEPT);
7223         }
7224 
7225         conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
7226         conn_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7227         conn_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7228         conn_ev->ee_cmev.ec_cm_psep_cookie = 0;
7229 
7230         /*
7231          * The private data passed back in the connection established
7232          * event is what was recvd in the daplka_cm_rc_rep_rcv handler and
7233          * saved in ep resource structure.
7234          */
7235         if (ep_rp->ep_priv_len > 0) {
7236                 conn_ev->ee_cmev.ec_cm_ev_priv_data =
7237                     kmem_zalloc(ep_rp->ep_priv_len, KM_NOSLEEP);
7238 
7239                 if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
7240                         DERR("rc_conn_est: could not alloc pr_data\n");
7241                         kmem_free(conn_ev, sizeof (daplka_evd_event_t));
7242                         daplka_ep_set_state(ep_rp, old_state, new_state);
7243                         return (IBT_CM_ACCEPT);
7244                 }
7245                 bcopy(ep_rp->ep_priv_data, conn_ev->ee_cmev.ec_cm_ev_priv_data,
7246                     ep_rp->ep_priv_len);
7247         }
7248         conn_ev->ee_cmev.ec_cm_ev_priv_data_len = ep_rp->ep_priv_len;
7249 
7250         D2("rc_conn_est: enqueue event(%p) evdp(%p) pr_data(0x%p), "
7251             "pr_len(%d)\n", conn_ev, ep_rp->ep_conn_evd,
7252             conn_ev->ee_cmev.ec_cm_ev_priv_data,
7253             (int)conn_ev->ee_cmev.ec_cm_ev_priv_data_len);
7254 
7255         /*
7256          * transition ep_state to CONNECTED
7257          */
7258         new_state = DAPLKA_EP_STATE_CONNECTED;
7259         daplka_ep_set_state(ep_rp, old_state, new_state);
7260 
7261         /*
7262          * enqueue event onto the conn_evd owned by ep_rp
7263          */
7264         daplka_evd_wakeup(ep_rp->ep_conn_evd,
7265             &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);
7266 
7267         return (IBT_CM_ACCEPT);
7268 }
7269 
7270 /*
7271  * processes the FAILURE event
7272  */
7273 /* ARGSUSED */
7274 static ibt_cm_status_t
7275 daplka_cm_rc_event_failure(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7276     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7277 {
7278         daplka_evd_event_t      *disc_ev;
7279         ibt_priv_data_len_t     pr_len = event->cm_priv_data_len;
7280         void                    *pr_data = event->cm_priv_data;
7281         uint32_t                old_state, new_state;
7282         ibt_rc_chan_query_attr_t chan_attrs;
7283         ibt_status_t            status;
7284 
7285         ASSERT(ep_rp != NULL);
7286         old_state = new_state = daplka_ep_get_state(ep_rp);
7287         if (old_state != DAPLKA_EP_STATE_CONNECTING &&
7288             old_state != DAPLKA_EP_STATE_DISCONNECTING) {
7289                 /*
7290                  * we can get here if the connection is being aborted
7291                  */
7292                 DERR("rc_event_failure: conn aborted, state = %d, "
7293                     "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
7294                     (int)event->cm_event.failed.cf_code,
7295                     (int)event->cm_event.failed.cf_msg,
7296                     (int)event->cm_event.failed.cf_reason);
7297 
7298                 daplka_ep_set_state(ep_rp, old_state, new_state);
7299                 return (IBT_CM_ACCEPT);
7300         }
7301 
7302         /*
7303          * it's ok for the timer to fire at this point. the
7304          * taskq thread that processes the timer will just wait
7305          * until we are done with our state transition.
7306          */
7307         if (daplka_cancel_timer(ep_rp) != 0) {
7308                 /*
7309                  * daplka_cancel_timer returns -1 if the timer is
7310                  * being processed and 0 for all other cases.
7311                  * we need to reset ep_state to allow timer processing
7312                  * to continue.
7313                  */
7314                 DERR("rc_event_failure: timer is being processed\n");
7315                 daplka_ep_set_state(ep_rp, old_state, new_state);
7316                 return (IBT_CM_ACCEPT);
7317         }
7318 
7319         bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
7320         status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
7321 
7322         if ((status == IBT_SUCCESS) &&
7323             (chan_attrs.rc_state != IBT_STATE_ERROR)) {
7324                 DERR("rc_event_failure: conn abort qpn %d state %d\n",
7325                     chan_attrs.rc_qpn, chan_attrs.rc_state);
7326 
7327                 /* explicit transition the QP to ERROR state */
7328                 status = ibt_flush_channel(ep_rp->ep_chan_hdl);
7329         }
7330 
7331         /*
7332          * create an event
7333          */
7334         disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7335         if (disc_ev == NULL) {
7336                 DERR("rc_event_failure: cannot alloc disc_ev\n");
7337                 daplka_ep_set_state(ep_rp, old_state, new_state);
7338                 return (IBT_CM_ACCEPT);
7339         }
7340 
7341         /*
7342          * copy private data into event
7343          */
7344         if (pr_len > 0) {
7345                 disc_ev->ee_cmev.ec_cm_ev_priv_data =
7346                     kmem_zalloc(pr_len, KM_NOSLEEP);
7347 
7348                 if (disc_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
7349                         DERR("rc_event_failure: cannot alloc pr data\n");
7350                         kmem_free(disc_ev, sizeof (daplka_evd_event_t));
7351                         daplka_ep_set_state(ep_rp, old_state, new_state);
7352                         return (IBT_CM_ACCEPT);
7353                 }
7354                 bcopy(pr_data, disc_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
7355         }
7356         disc_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
7357 
7358         /*
7359          * fill in the appropriate event type
7360          */
7361         if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
7362                 switch (event->cm_event.failed.cf_reason) {
7363                 case IBT_CM_CONSUMER:
7364                         disc_ev->ee_cmev.ec_cm_ev_type =
7365                             DAPL_IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
7366                         break;
7367                 case IBT_CM_NO_CHAN:
7368                 case IBT_CM_NO_RESC:
7369                         disc_ev->ee_cmev.ec_cm_ev_type =
7370                             DAPL_IB_CME_DESTINATION_REJECT;
7371                         break;
7372                 default:
7373                         disc_ev->ee_cmev.ec_cm_ev_type =
7374                             DAPL_IB_CME_DESTINATION_REJECT;
7375                         break;
7376                 }
7377         } else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
7378                 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
7379         } else {
7380                 /* others we'll mark as local failure */
7381                 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
7382         }
7383         disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7384         disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7385         disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
7386 
7387         D2("rc_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
7388             "cf_msg(%d) cf_reason(%d)\n", disc_ev, ep_rp->ep_conn_evd,
7389             (int)event->cm_event.failed.cf_code,
7390             (int)event->cm_event.failed.cf_msg,
7391             (int)event->cm_event.failed.cf_reason);
7392 
7393         /*
7394          * transition ep_state to DISCONNECTED
7395          */
7396         new_state = DAPLKA_EP_STATE_DISCONNECTED;
7397         daplka_ep_set_state(ep_rp, old_state, new_state);
7398 
7399         /*
7400          * enqueue event onto the conn_evd owned by ep_rp
7401          */
7402         daplka_evd_wakeup(ep_rp->ep_conn_evd,
7403             &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
7404 
7405         return (IBT_CM_ACCEPT);
7406 }
7407 
7408 /*
7409  * This is the active side CM handler. It gets registered when
7410  * ibt_open_rc_channel is called.
7411  */
7412 static ibt_cm_status_t
7413 daplka_cm_rc_handler(void *cm_private, ibt_cm_event_t *event,
7414     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7415 {
7416         daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)cm_private;
7417 
7418         if (ep_rp == NULL) {
7419                 DERR("rc_handler: ep_rp == NULL\n");
7420                 return (IBT_CM_NO_CHANNEL);
7421         }
7422         /*
7423          * default is not to return priv data
7424          */
7425         if (ret_args != NULL) {
7426                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args))
7427                 ret_args->cm_ret_len = 0;
7428         }
7429 
7430         switch (event->cm_type) {
7431         case IBT_CM_EVENT_REQ_RCV:
7432                 /* active side should not receive this event */
7433                 D2("rc_handler: IBT_CM_EVENT_REQ_RCV\n");
7434                 break;
7435 
7436         case IBT_CM_EVENT_REP_RCV:
7437                 /* connection accepted by passive side */
7438                 D2("rc_handler: IBT_CM_EVENT_REP_RCV\n");
7439                 return (daplka_cm_rc_rep_rcv(ep_rp, event, ret_args,
7440                     priv_data, len));
7441 
7442         case IBT_CM_EVENT_CONN_CLOSED:
7443                 D2("rc_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
7444                     event->cm_event.closed);
7445                 return (daplka_cm_rc_conn_closed(ep_rp, event, ret_args,
7446                     priv_data, len));
7447 
7448         case IBT_CM_EVENT_MRA_RCV:
7449                 /* passive side does default processing MRA event */
7450                 D2("rc_handler: IBT_CM_EVENT_MRA_RCV\n");
7451                 return (IBT_CM_DEFAULT);
7452 
7453         case IBT_CM_EVENT_CONN_EST:
7454                 D2("rc_handler: IBT_CM_EVENT_CONN_EST\n");
7455                 return (daplka_cm_rc_conn_est(ep_rp, event, ret_args,
7456                     priv_data, len));
7457 
7458         case IBT_CM_EVENT_FAILURE:
7459                 D2("rc_handler: IBT_CM_EVENT_FAILURE\n");
7460                 return (daplka_cm_rc_event_failure(ep_rp, event, ret_args,
7461                     priv_data, len));
7462 
7463         default:
7464                 D2("rc_handler: invalid event %d\n", event->cm_type);
7465                 break;
7466         }
7467         return (IBT_CM_DEFAULT);
7468 }
7469 
7470 /*
7471  * creates an IA resource and inserts it into the global resource table.
7472  */
7473 /* ARGSUSED */
7474 static int
7475 daplka_ia_create(minor_t rnum, intptr_t arg, int mode,
7476         cred_t *cred, int *rvalp)
7477 {
7478         daplka_ia_resource_t    *ia_rp, *tmp_rp;
7479         boolean_t               inserted = B_FALSE;
7480         dapl_ia_create_t        args;
7481         ibt_hca_hdl_t           hca_hdl;
7482         ibt_status_t            status;
7483         ib_gid_t                sgid;
7484         int                     retval;
7485         ibt_hca_portinfo_t      *pinfop;
7486         uint_t                  pinfon;
7487         uint_t                  size;
7488         ibt_ar_t                ar_s;
7489         daplka_hca_t            *hca;
7490 
7491         retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ia_create_t),
7492             mode);
7493         if (retval != 0) {
7494                 DERR("ia_create: copyin error %d\n", retval);
7495                 return (EFAULT);
7496         }
7497         if (args.ia_version != DAPL_IF_VERSION) {
7498                 DERR("ia_create: invalid version %d, expected version %d\n",
7499                     args.ia_version, DAPL_IF_VERSION);
7500                 return (EINVAL);
7501         }
7502 
7503         /*
7504          * find the hca with the matching guid
7505          */
7506         mutex_enter(&daplka_dev->daplka_mutex);
7507         for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
7508             hca = hca->hca_next) {
7509                 if (hca->hca_guid == args.ia_guid) {
7510                         DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca);
7511                         break;
7512                 }
7513         }
7514         mutex_exit(&daplka_dev->daplka_mutex);
7515 
7516         if (hca == NULL) {
7517                 DERR("ia_create: guid 0x%016llx not found\n",
7518                     (longlong_t)args.ia_guid);
7519                 return (EINVAL);
7520         }
7521 
7522         /*
7523          * check whether port number is valid and whether it is up
7524          */
7525         if (args.ia_port > hca->hca_nports) {
7526                 DERR("ia_create: invalid hca_port %d\n", args.ia_port);
7527                 DAPLKA_RELE_HCA(daplka_dev, hca);
7528                 return (EINVAL);
7529         }
7530         hca_hdl = hca->hca_hdl;
7531         if (hca_hdl == NULL) {
7532                 DERR("ia_create: hca_hdl == NULL\n");
7533                 DAPLKA_RELE_HCA(daplka_dev, hca);
7534                 return (EINVAL);
7535         }
7536         status = ibt_query_hca_ports(hca_hdl, (uint8_t)args.ia_port,
7537             &pinfop, &pinfon, &size);
7538         if (status != IBT_SUCCESS) {
7539                 DERR("ia_create: ibt_query_hca_ports returned %d\n", status);
7540                 *rvalp = (int)status;
7541                 DAPLKA_RELE_HCA(daplka_dev, hca);
7542                 return (0);
7543         }
7544         sgid = pinfop->p_sgid_tbl[0];
7545         ibt_free_portinfo(pinfop, size);
7546 
7547         ia_rp = kmem_zalloc(sizeof (daplka_ia_resource_t), daplka_km_flags);
7548         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp))
7549         DAPLKA_RS_INIT(ia_rp, DAPL_TYPE_IA, rnum, daplka_ia_destroy);
7550 
7551         mutex_init(&ia_rp->ia_lock, NULL, MUTEX_DRIVER, NULL);
7552         cv_init(&ia_rp->ia_cv, NULL, CV_DRIVER, NULL);
7553         ia_rp->ia_hca_hdl = hca_hdl;
7554         ia_rp->ia_hca_sgid = sgid;
7555         ia_rp->ia_hca = hca;
7556         ia_rp->ia_port_num = args.ia_port;
7557         ia_rp->ia_port_pkey = args.ia_pkey;
7558         ia_rp->ia_pid = ddi_get_pid();
7559         ia_rp->ia_async_evd_hkeys = NULL;
7560         ia_rp->ia_ar_registered = B_FALSE;
7561         bcopy(args.ia_sadata, ia_rp->ia_sadata, DAPL_ATS_NBYTES);
7562 
7563         /* register Address Record */
7564         ar_s.ar_gid = ia_rp->ia_hca_sgid;
7565         ar_s.ar_pkey = ia_rp->ia_port_pkey;
7566         bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
7567 #define UC(b) ar_s.ar_data[(b)]
7568         D3("daplka_ia_create: SA[8] %d.%d.%d.%d\n",
7569             UC(8), UC(9), UC(10), UC(11));
7570         D3("daplka_ia_create: SA[12] %d.%d.%d.%d\n",
7571             UC(12), UC(13), UC(14), UC(15));
7572         retval = ibt_register_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
7573         if (retval != IBT_SUCCESS) {
7574                 DERR("ia_create: failed to register Address Record.\n");
7575                 retval = EINVAL;
7576                 goto cleanup;
7577         }
7578         ia_rp->ia_ar_registered = B_TRUE;
7579 
7580         /*
7581          * create hash tables for all object types
7582          */
7583         retval = daplka_hash_create(&ia_rp->ia_ep_htbl, DAPLKA_EP_HTBL_SZ,
7584             daplka_hash_ep_free, daplka_hash_generic_lookup);
7585         if (retval != 0) {
7586                 DERR("ia_create: cannot create ep hash table\n");
7587                 goto cleanup;
7588         }
7589         retval = daplka_hash_create(&ia_rp->ia_mr_htbl, DAPLKA_MR_HTBL_SZ,
7590             daplka_hash_mr_free, daplka_hash_generic_lookup);
7591         if (retval != 0) {
7592                 DERR("ia_create: cannot create mr hash table\n");
7593                 goto cleanup;
7594         }
7595         retval = daplka_hash_create(&ia_rp->ia_mw_htbl, DAPLKA_MW_HTBL_SZ,
7596             daplka_hash_mw_free, daplka_hash_generic_lookup);
7597         if (retval != 0) {
7598                 DERR("ia_create: cannot create mw hash table\n");
7599                 goto cleanup;
7600         }
7601         retval = daplka_hash_create(&ia_rp->ia_pd_htbl, DAPLKA_PD_HTBL_SZ,
7602             daplka_hash_pd_free, daplka_hash_generic_lookup);
7603         if (retval != 0) {
7604                 DERR("ia_create: cannot create pd hash table\n");
7605                 goto cleanup;
7606         }
7607         retval = daplka_hash_create(&ia_rp->ia_evd_htbl, DAPLKA_EVD_HTBL_SZ,
7608             daplka_hash_evd_free, daplka_hash_generic_lookup);
7609         if (retval != 0) {
7610                 DERR("ia_create: cannot create evd hash table\n");
7611                 goto cleanup;
7612         }
7613         retval = daplka_hash_create(&ia_rp->ia_cno_htbl, DAPLKA_CNO_HTBL_SZ,
7614             daplka_hash_cno_free, daplka_hash_generic_lookup);
7615         if (retval != 0) {
7616                 DERR("ia_create: cannot create cno hash table\n");
7617                 goto cleanup;
7618         }
7619         retval = daplka_hash_create(&ia_rp->ia_sp_htbl, DAPLKA_SP_HTBL_SZ,
7620             daplka_hash_sp_free, daplka_hash_generic_lookup);
7621         if (retval != 0) {
7622                 DERR("ia_create: cannot create sp hash table\n");
7623                 goto cleanup;
7624         }
7625         retval = daplka_hash_create(&ia_rp->ia_srq_htbl, DAPLKA_SRQ_HTBL_SZ,
7626             daplka_hash_srq_free, daplka_hash_generic_lookup);
7627         if (retval != 0) {
7628                 DERR("ia_create: cannot create srq hash table\n");
7629                 goto cleanup;
7630         }
7631         /*
7632          * insert ia_rp into the global resource table
7633          */
7634         retval = daplka_resource_insert(rnum, (daplka_resource_t *)ia_rp);
7635         if (retval != 0) {
7636                 DERR("ia_create: cannot insert resource\n");
7637                 goto cleanup;
7638         }
7639         inserted = B_TRUE;
7640         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ia_rp))
7641 
7642         args.ia_resnum = rnum;
7643         retval = copyout(&args, (void *)arg, sizeof (dapl_ia_create_t));
7644         if (retval != 0) {
7645                 DERR("ia_create: copyout error %d\n", retval);
7646                 retval = EFAULT;
7647                 goto cleanup;
7648         }
7649         return (0);
7650 
7651 cleanup:;
7652         if (inserted) {
7653                 tmp_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);
7654                 if (tmp_rp != ia_rp) {
7655                         /*
7656                          * we can return here because another thread must
7657                          * have freed up the resource
7658                          */
7659                         DERR("ia_create: cannot remove resource\n");
7660                         return (retval);
7661                 }
7662         }
7663         DAPLKA_RS_UNREF(ia_rp);
7664         return (retval);
7665 }
7666 
7667 /*
7668  * destroys an IA resource
7669  */
7670 static int
7671 daplka_ia_destroy(daplka_resource_t *gen_rp)
7672 {
7673         daplka_ia_resource_t    *ia_rp = (daplka_ia_resource_t *)gen_rp;
7674         daplka_async_evd_hkey_t *hkp;
7675         int                     cnt;
7676         ibt_ar_t                ar_s;
7677 
7678         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp))
7679         D3("ia_destroy: entering, ia_rp 0x%p\n", ia_rp);
7680 
7681         /* deregister Address Record */
7682         if (ia_rp->ia_ar_registered) {
7683                 ar_s.ar_gid = ia_rp->ia_hca_sgid;
7684                 ar_s.ar_pkey = ia_rp->ia_port_pkey;
7685                 bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
7686                 (void) ibt_deregister_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
7687                 ia_rp->ia_ar_registered = B_FALSE;
7688         }
7689 
7690         /*
7691          * destroy hash tables. make sure resources are
7692          * destroyed in the correct order.
7693          */
7694         daplka_hash_destroy(&ia_rp->ia_mw_htbl);
7695         daplka_hash_destroy(&ia_rp->ia_mr_htbl);
7696         daplka_hash_destroy(&ia_rp->ia_ep_htbl);
7697         daplka_hash_destroy(&ia_rp->ia_srq_htbl);
7698         daplka_hash_destroy(&ia_rp->ia_evd_htbl);
7699         daplka_hash_destroy(&ia_rp->ia_cno_htbl);
7700         daplka_hash_destroy(&ia_rp->ia_pd_htbl);
7701         daplka_hash_destroy(&ia_rp->ia_sp_htbl);
7702 
7703         /*
7704          * free the async evd list
7705          */
7706         cnt = 0;
7707         hkp = ia_rp->ia_async_evd_hkeys;
7708         while (hkp != NULL) {
7709                 daplka_async_evd_hkey_t *free_hkp;
7710 
7711                 cnt++;
7712                 free_hkp = hkp;
7713                 hkp = hkp->aeh_next;
7714                 kmem_free(free_hkp, sizeof (*free_hkp));
7715         }
7716         if (cnt > 0) {
7717                 D3("ia_destroy: freed %d hkeys\n", cnt);
7718         }
7719         mutex_destroy(&ia_rp->ia_lock);
7720         cv_destroy(&ia_rp->ia_cv);
7721         ia_rp->ia_hca_hdl = NULL;
7722 
7723         DAPLKA_RS_FINI(ia_rp);
7724 
7725         if (ia_rp->ia_hca)
7726                 DAPLKA_RELE_HCA(daplka_dev, ia_rp->ia_hca);
7727 
7728         kmem_free(ia_rp, sizeof (daplka_ia_resource_t));
7729         D3("ia_destroy: exiting, ia_rp 0x%p\n", ia_rp);
7730         return (0);
7731 }
7732 
7733 static void
7734 daplka_async_event_create(ibt_async_code_t code, ibt_async_event_t *event,
7735     uint64_t cookie, daplka_ia_resource_t *ia_rp)
7736 {
7737         daplka_evd_event_t      *evp;
7738         daplka_evd_resource_t   *async_evd;
7739         daplka_async_evd_hkey_t *curr;
7740 
7741         mutex_enter(&ia_rp->ia_lock);
7742         curr = ia_rp->ia_async_evd_hkeys;
7743         while (curr != NULL) {
7744                 /*
7745                  * Note: this allocation does not zero out the buffer
7746                  * since we init all the fields.
7747                  */
7748                 evp = kmem_alloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7749                 if (evp == NULL) {
7750                         DERR("async_event_enqueue: event alloc failed"
7751                             "!found\n", ia_rp, curr->aeh_evd_hkey);
7752                         curr = curr->aeh_next;
7753                         continue;
7754                 }
7755                 evp->ee_next = NULL;
7756                 evp->ee_aev.ibae_type = code;
7757                 evp->ee_aev.ibae_hca_guid = event->ev_hca_guid;
7758                 evp->ee_aev.ibae_cookie = cookie;
7759                 evp->ee_aev.ibae_port = event->ev_port;
7760 
7761                 /*
7762                  * Lookup the async evd corresponding to this ia and enqueue
7763                  * evp and wakeup any waiter.
7764                  */
7765                 async_evd = (daplka_evd_resource_t *)
7766                     daplka_hash_lookup(&ia_rp->ia_evd_htbl, curr->aeh_evd_hkey);
7767                 if (async_evd == NULL) { /* async evd is being freed */
7768                         DERR("async_event_enqueue: ia_rp(%p) asycn_evd %llx "
7769                             "!found\n", ia_rp, (longlong_t)curr->aeh_evd_hkey);
7770                         kmem_free(evp, sizeof (daplka_evd_event_t));
7771                         curr = curr->aeh_next;
7772                         continue;
7773                 }
7774                 daplka_evd_wakeup(async_evd, &async_evd->evd_async_events, evp);
7775 
7776                 /* decrement refcnt on async_evd */
7777                 DAPLKA_RS_UNREF(async_evd);
7778                 curr = curr->aeh_next;
7779         }
7780         mutex_exit(&ia_rp->ia_lock);
7781 }
7782 /*
7783  * This routine is called in kernel context
7784  */
7785 
7786 /* ARGSUSED */
7787 static void
7788 daplka_rc_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7789     ibt_async_code_t code, ibt_async_event_t *event)
7790 {
7791         daplka_ep_resource_t            *epp;
7792         daplka_ia_resource_t            *ia_rp;
7793         minor_t                         ia_rnum;
7794 
7795         if (event->ev_chan_hdl == NULL) {
7796                 DERR("daplka_rc_async_handler: ev_chan_hdl is NULL\n");
7797                 return;
7798         }
7799 
7800         mutex_enter(&daplka_dev->daplka_mutex);
7801         epp = ibt_get_chan_private(event->ev_chan_hdl);
7802         if (epp == NULL) {
7803                 mutex_exit(&daplka_dev->daplka_mutex);
7804                 DERR("daplka_rc_async_handler: chan_private is NULL\n");
7805                 return;
7806         }
7807 
7808         /* grab a reference to this ep */
7809         DAPLKA_RS_REF(epp);
7810         mutex_exit(&daplka_dev->daplka_mutex);
7811 
7812         /*
7813          * The endpoint resource has the resource number corresponding to
7814          * the IA resource. Use that to lookup the ia resource entry
7815          */
7816         ia_rnum = DAPLKA_RS_RNUM(epp);
7817         ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
7818         if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
7819                 D2("daplka_rc_async_handler: resource (%d) not found\n",
7820                     ia_rnum);
7821                 DAPLKA_RS_UNREF(epp);
7822                 return;
7823         }
7824 
7825         /*
7826          * Create an async event and chain it to the async evd
7827          */
7828         daplka_async_event_create(code, event, epp->ep_cookie, ia_rp);
7829 
7830         DAPLKA_RS_UNREF(ia_rp);
7831         DAPLKA_RS_UNREF(epp);
7832 }
7833 
7834 /*
7835  * This routine is called in kernel context
7836  */
7837 
7838 /* ARGSUSED */
7839 static void
7840 daplka_cq_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7841     ibt_async_code_t code, ibt_async_event_t *event)
7842 {
7843         daplka_evd_resource_t           *evdp;
7844         daplka_ia_resource_t            *ia_rp;
7845         minor_t                         ia_rnum;
7846 
7847         if (event->ev_cq_hdl == NULL)
7848                 return;
7849 
7850         mutex_enter(&daplka_dev->daplka_mutex);
7851         evdp = ibt_get_cq_private(event->ev_cq_hdl);
7852         if (evdp == NULL) {
7853                 mutex_exit(&daplka_dev->daplka_mutex);
7854                 DERR("daplka_cq_async_handler: get cq private(%p) failed\n",
7855                     event->ev_cq_hdl);
7856                 return;
7857         }
7858         /* grab a reference to this evd resource */
7859         DAPLKA_RS_REF(evdp);
7860         mutex_exit(&daplka_dev->daplka_mutex);
7861 
7862         /*
7863          * The endpoint resource has the resource number corresponding to
7864          * the IA resource. Use that to lookup the ia resource entry
7865          */
7866         ia_rnum = DAPLKA_RS_RNUM(evdp);
7867         ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
7868         if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
7869                 DERR("daplka_cq_async_handler: resource (%d) not found\n",
7870                     ia_rnum);
7871                 DAPLKA_RS_UNREF(evdp);
7872                 return;
7873         }
7874 
7875         /*
7876          * Create an async event and chain it to the async evd
7877          */
7878         daplka_async_event_create(code, event, evdp->evd_cookie, ia_rp);
7879 
7880         /* release all the refcount that were acquired */
7881         DAPLKA_RS_UNREF(ia_rp);
7882         DAPLKA_RS_UNREF(evdp);
7883 }
7884 
7885 /*
7886  * This routine is called in kernel context, handles unaffiliated async errors
7887  */
7888 
7889 /* ARGSUSED */
7890 static void
7891 daplka_un_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7892     ibt_async_code_t code, ibt_async_event_t *event)
7893 {
7894         int                     i, j;
7895         daplka_resource_blk_t   *blk;
7896         daplka_resource_t       *rp;
7897         daplka_ia_resource_t    *ia_rp;
7898 
7899         /*
7900          * Walk the resource table looking for an ia that matches the
7901          * hca_hdl.
7902          */
7903         rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
7904         for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
7905                 blk = daplka_resource.daplka_rc_root[i];
7906                 if (blk == NULL)
7907                         continue;
7908                 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
7909                         rp = blk->daplka_rcblk_blks[j];
7910                         if ((rp == NULL) ||
7911                             ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
7912                             (rp->rs_type != DAPL_TYPE_IA)) {
7913                                 continue;
7914                         }
7915                         /*
7916                          * rp is an IA resource check if it belongs
7917                          * to the hca/port for which we got the event
7918                          */
7919                         ia_rp = (daplka_ia_resource_t *)rp;
7920                         DAPLKA_RS_REF(ia_rp);
7921                         if ((hca_hdl == ia_rp->ia_hca_hdl) &&
7922                             (event->ev_port == ia_rp->ia_port_num)) {
7923                                 /*
7924                                  * walk the ep hash table. Acquire a
7925                                  * reader lock. NULL dgid indicates
7926                                  * local port up event.
7927                                  */
7928                                 daplka_hash_walk(&ia_rp->ia_ep_htbl,
7929                                     daplka_ep_failback, NULL, RW_READER);
7930                         }
7931                         DAPLKA_RS_UNREF(ia_rp);
7932                 }
7933         }
7934         rw_exit(&daplka_resource.daplka_rct_lock);
7935 }
7936 
7937 static int
7938 daplka_handle_hca_detach_event(ibt_async_event_t *event)
7939 {
7940         daplka_hca_t    *hca;
7941 
7942         /*
7943          * find the hca with the matching guid
7944          */
7945         mutex_enter(&daplka_dev->daplka_mutex);
7946         for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
7947             hca = hca->hca_next) {
7948                 if (hca->hca_guid == event->ev_hca_guid) {
7949                         if (DAPLKA_HCA_BUSY(hca)) {
7950                                 mutex_exit(&daplka_dev->daplka_mutex);
7951                                 return (IBT_HCA_RESOURCES_NOT_FREED);
7952                         }
7953                         daplka_dequeue_hca(daplka_dev, hca);
7954                         break;
7955                 }
7956         }
7957         mutex_exit(&daplka_dev->daplka_mutex);
7958 
7959         if (hca == NULL)
7960                 return (IBT_FAILURE);
7961 
7962         return (daplka_fini_hca(daplka_dev, hca));
7963 }
7964 
7965 /*
7966  * This routine is called in kernel context
7967  */
7968 static void
7969 daplka_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7970     ibt_async_code_t code, ibt_async_event_t *event)
7971 {
7972         switch (code) {
7973         case IBT_ERROR_CATASTROPHIC_CHAN:
7974         case IBT_ERROR_INVALID_REQUEST_CHAN:
7975         case IBT_ERROR_ACCESS_VIOLATION_CHAN:
7976         case IBT_ERROR_PATH_MIGRATE_REQ:
7977                 D2("daplka_async_handler(): Channel affiliated=0x%x\n", code);
7978                 /* These events are affiliated with a the RC channel */
7979                 daplka_rc_async_handler(clnt_private, hca_hdl, code, event);
7980                 break;
7981         case IBT_ERROR_CQ:
7982                 /* This event is affiliated with a the CQ */
7983                 D2("daplka_async_handler(): IBT_ERROR_CQ\n");
7984                 daplka_cq_async_handler(clnt_private, hca_hdl, code, event);
7985                 break;
7986         case IBT_ERROR_PORT_DOWN:
7987                 D2("daplka_async_handler(): IBT_PORT_DOWN\n");
7988                 break;
7989         case IBT_EVENT_PORT_UP:
7990                 D2("daplka_async_handler(): IBT_PORT_UP\n");
7991                 if (daplka_apm) {
7992                         daplka_un_async_handler(clnt_private, hca_hdl, code,
7993                             event);
7994                 }
7995                 break;
7996         case IBT_HCA_ATTACH_EVENT:
7997                 /*
7998                  * NOTE: In some error recovery paths, it is possible to
7999                  * receive IBT_HCA_ATTACH_EVENTs on already known HCAs.
8000                  */
8001                 D2("daplka_async_handler(): IBT_HCA_ATTACH\n");
8002                 (void) daplka_init_hca(daplka_dev, event->ev_hca_guid);
8003                 break;
8004         case IBT_HCA_DETACH_EVENT:
8005                 D2("daplka_async_handler(): IBT_HCA_DETACH\n");
8006                 /* Free all hca resources and close the HCA. */
8007                 (void) daplka_handle_hca_detach_event(event);
8008                 break;
8009         case IBT_EVENT_PATH_MIGRATED:
8010                 /* This event is affiliated with APM */
8011                 D2("daplka_async_handler(): IBT_PATH_MIGRATED.\n");
8012                 break;
8013         default:
8014                 D2("daplka_async_handler(): unhandled code = 0x%x\n", code);
8015                 break;
8016         }
8017 }
8018 
8019 /*
8020  * This routine is called in kernel context related to Subnet events
8021  */
8022 /*ARGSUSED*/
8023 static void
8024 daplka_sm_notice_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t code,
8025         ibt_subnet_event_t *event)
8026 {
8027         ib_gid_t *sgid = &gid;
8028         ib_gid_t *dgid;
8029 
8030         dgid = &event->sm_notice_gid;
8031         switch (code) {
8032         case IBT_SM_EVENT_GID_AVAIL:
8033                 /* This event is affiliated with remote port up */
8034                 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_AVAIL\n");
8035                 if (daplka_apm)
8036                         daplka_sm_gid_avail(sgid, dgid);
8037                 return;
8038         case IBT_SM_EVENT_GID_UNAVAIL:
8039                 /* This event is affiliated with remote port down */
8040                 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_UNAVAIL\n");
8041                 return;
8042         default:
8043                 D2("daplka_sm_notice_handler(): unhandled IBT_SM_EVENT_[%d]\n",
8044                     code);
8045                 return;
8046         }
8047 }
8048 
8049 /*
8050  * This routine is called in kernel context, handles Subnet GID avail events
8051  * which correspond to remote port up. Setting up alternate path or path
8052  * migration (failback) has to be initiated from the active side of the
8053  * original connect.
8054  */
8055 static void
8056 daplka_sm_gid_avail(ib_gid_t *sgid, ib_gid_t *dgid)
8057 {
8058         int                     i, j;
8059         daplka_resource_blk_t   *blk;
8060         daplka_resource_t       *rp;
8061         daplka_ia_resource_t    *ia_rp;
8062 
8063         D2("daplka_sm_gid_avail: sgid=%llx:%llx dgid=%llx:%llx\n",
8064             (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid,
8065             (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);
8066 
8067         /*
8068          * Walk the resource table looking for an ia that matches the sgid
8069          */
8070         rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
8071         for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
8072                 blk = daplka_resource.daplka_rc_root[i];
8073                 if (blk == NULL)
8074                         continue;
8075                 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
8076                         rp = blk->daplka_rcblk_blks[j];
8077                         if ((rp == NULL) ||
8078                             ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
8079                             (rp->rs_type != DAPL_TYPE_IA)) {
8080                                 continue;
8081                         }
8082                         /*
8083                          * rp is an IA resource check if its gid
8084                          * matches with the calling sgid
8085                          */
8086                         ia_rp = (daplka_ia_resource_t *)rp;
8087                         DAPLKA_RS_REF(ia_rp);
8088                         if ((sgid->gid_prefix ==
8089                             ia_rp->ia_hca_sgid.gid_prefix) &&
8090                             (sgid->gid_guid == ia_rp->ia_hca_sgid.gid_guid)) {
8091                                 /*
8092                                  * walk the ep hash table. Acquire a
8093                                  * reader lock.
8094                                  */
8095                                 daplka_hash_walk(&ia_rp->ia_ep_htbl,
8096                                     daplka_ep_failback,
8097                                     (void *)dgid, RW_READER);
8098                         }
8099                         DAPLKA_RS_UNREF(ia_rp);
8100                 }
8101         }
8102         rw_exit(&daplka_resource.daplka_rct_lock);
8103 }
8104 
8105 /*
8106  * This routine is called in kernel context to get and set an alternate path
8107  */
8108 static int
8109 daplka_ep_altpath(daplka_ep_resource_t *ep_rp, ib_gid_t *dgid)
8110 {
8111         ibt_alt_path_info_t path_info;
8112         ibt_alt_path_attr_t path_attr;
8113         ibt_ap_returns_t ap_rets;
8114         ibt_status_t status;
8115 
8116         D2("daplka_ep_altpath : ibt_get_alt_path()\n");
8117         bzero(&path_info, sizeof (ibt_alt_path_info_t));
8118         bzero(&path_attr, sizeof (ibt_alt_path_attr_t));
8119         if (dgid != NULL) {
8120                 path_attr.apa_sgid = ep_rp->ep_sgid;
8121                 path_attr.apa_dgid = *dgid;
8122         }
8123         status = ibt_get_alt_path(ep_rp->ep_chan_hdl, IBT_PATH_AVAIL,
8124             &path_attr, &path_info);
8125         if (status != IBT_SUCCESS) {
8126                 DERR("daplka_ep_altpath : ibt_get_alt_path failed %d\n",
8127                     status);
8128                 return (1);
8129         }
8130 
8131         D2("daplka_ep_altpath : ibt_set_alt_path()\n");
8132         bzero(&ap_rets, sizeof (ibt_ap_returns_t));
8133         status = ibt_set_alt_path(ep_rp->ep_chan_hdl, IBT_BLOCKING,
8134             &path_info, NULL, 0, &ap_rets);
8135         if ((status != IBT_SUCCESS) ||
8136             (ap_rets.ap_status != IBT_CM_AP_LOADED)) {
8137                 DERR("daplka_ep_altpath : ibt_set_alt_path failed "
8138                     "status %d ap_status %d\n", status, ap_rets.ap_status);
8139                 return (1);
8140         }
8141         return (0);
8142 }
8143 
8144 /*
8145  * This routine is called in kernel context to failback to the original path
8146  */
8147 static int
8148 daplka_ep_failback(void *objp, void *arg)
8149 {
8150         daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)objp;
8151         ib_gid_t *dgid;
8152         ibt_status_t status;
8153         ibt_rc_chan_query_attr_t chan_attrs;
8154         int i;
8155 
8156         ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
8157         D2("daplka_ep_failback ep : sgid=%llx:%llx dgid=%llx:%llx\n",
8158             (longlong_t)ep_rp->ep_sgid.gid_prefix,
8159             (longlong_t)ep_rp->ep_sgid.gid_guid,
8160             (longlong_t)ep_rp->ep_dgid.gid_prefix,
8161             (longlong_t)ep_rp->ep_dgid.gid_guid);
8162 
8163         /*
8164          * daplka_ep_failback is called from daplka_hash_walk
8165          * which holds the read lock on hash table to protect
8166          * the endpoint resource from removal
8167          */
8168         mutex_enter(&ep_rp->ep_lock);
8169         /* check for unconnected endpoints */
8170         /* first check for ep state */
8171         if (ep_rp->ep_state != DAPLKA_EP_STATE_CONNECTED) {
8172                 mutex_exit(&ep_rp->ep_lock);
8173                 D2("daplka_ep_failback : endpoints not connected\n");
8174                 return (0);
8175         }
8176 
8177         /* second check for gids */
8178         if (((ep_rp->ep_sgid.gid_prefix == 0) &&
8179             (ep_rp->ep_sgid.gid_guid == 0)) ||
8180             ((ep_rp->ep_dgid.gid_prefix == 0) &&
8181             (ep_rp->ep_dgid.gid_guid == 0))) {
8182                 mutex_exit(&ep_rp->ep_lock);
8183                 D2("daplka_ep_failback : skip unconnected endpoints\n");
8184                 return (0);
8185         }
8186 
8187         /*
8188          * matching destination ep
8189          * when dgid is NULL, the async event is a local port up.
8190          * dgid becomes wild card, i.e. all endpoints match
8191          */
8192         dgid = (ib_gid_t *)arg;
8193         if (dgid == NULL) {
8194                 /* ignore loopback ep */
8195                 if ((ep_rp->ep_sgid.gid_prefix == ep_rp->ep_dgid.gid_prefix) &&
8196                     (ep_rp->ep_sgid.gid_guid == ep_rp->ep_dgid.gid_guid)) {
8197                         mutex_exit(&ep_rp->ep_lock);
8198                         D2("daplka_ep_failback : skip loopback endpoints\n");
8199                         return (0);
8200                 }
8201         } else {
8202                 /* matching remote ep */
8203                 if ((ep_rp->ep_dgid.gid_prefix != dgid->gid_prefix) ||
8204                     (ep_rp->ep_dgid.gid_guid != dgid->gid_guid)) {
8205                         mutex_exit(&ep_rp->ep_lock);
8206                         D2("daplka_ep_failback : unrelated endpoints\n");
8207                         return (0);
8208                 }
8209         }
8210 
8211         /* call get and set altpath with original dgid used in ep_connect */
8212         if (daplka_ep_altpath(ep_rp, &ep_rp->ep_dgid)) {
8213                 mutex_exit(&ep_rp->ep_lock);
8214                 return (0);
8215         }
8216 
8217         /*
8218          * wait for migration state to be ARMed
8219          * e.g. a post_send msg will transit mig_state from REARM to ARM
8220          */
8221         for (i = 0; i < daplka_query_aft_setaltpath; i++) {
8222                 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
8223                 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
8224                 if (status != IBT_SUCCESS) {
8225                         mutex_exit(&ep_rp->ep_lock);
8226                         DERR("daplka_ep_altpath : ibt_query_rc_channel err\n");
8227                         return (0);
8228                 }
8229                 if (chan_attrs.rc_mig_state == IBT_STATE_ARMED)
8230                         break;
8231         }
8232 
8233         D2("daplka_ep_altpath : query[%d] mig_st=%d\n",
8234             i, chan_attrs.rc_mig_state);
8235         D2("daplka_ep_altpath : P sgid=%llx:%llx dgid=%llx:%llx\n",
8236             (longlong_t)
8237             chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_prefix,
8238             (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_guid,
8239             (longlong_t)
8240             chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_prefix,
8241             (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_guid);
8242         D2("daplka_ep_altpath : A sgid=%llx:%llx dgid=%llx:%llx\n",
8243             (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_prefix,
8244             (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_guid,
8245             (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_prefix,
8246             (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_guid);
8247 
8248         /* skip failback on ARMed state not reached or env override */
8249         if ((i >= daplka_query_aft_setaltpath) || (daplka_failback == 0)) {
8250                 mutex_exit(&ep_rp->ep_lock);
8251                 DERR("daplka_ep_altpath : ARMed state not reached\n");
8252                 return (0);
8253         }
8254 
8255         D2("daplka_ep_failback : ibt_migrate_path() to original ep\n");
8256         status = ibt_migrate_path(ep_rp->ep_chan_hdl);
8257         if (status != IBT_SUCCESS) {
8258                 mutex_exit(&ep_rp->ep_lock);
8259                 DERR("daplka_ep_failback : migration failed "
8260                     "status %d\n", status);
8261                 return (0);
8262         }
8263 
8264         /* call get and altpath with NULL dgid to indicate unspecified dgid */
8265         (void) daplka_ep_altpath(ep_rp, NULL);
8266         mutex_exit(&ep_rp->ep_lock);
8267         return (0);
8268 }
8269 
8270 /*
8271  * IBTF wrappers used for resource accounting
8272  */
8273 static ibt_status_t
8274 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *ep_rp, ibt_hca_hdl_t hca_hdl,
8275     ibt_chan_alloc_flags_t flags, ibt_rc_chan_alloc_args_t *args,
8276     ibt_channel_hdl_t *chan_hdl_p, ibt_chan_sizes_t *sizes)
8277 {
8278         daplka_hca_t    *hca_p;
8279         uint32_t        max_qps;
8280         boolean_t       acct_enabled;
8281         ibt_status_t    status;
8282 
8283         acct_enabled = daplka_accounting_enabled;
8284         hca_p = ep_rp->ep_hca;
8285         max_qps = daplka_max_qp_percent * hca_p->hca_attr.hca_max_chans / 100;
8286 
8287         if (acct_enabled) {
8288                 if (daplka_max_qp_percent != 0 &&
8289                     max_qps <= hca_p->hca_qp_count) {
8290                         DERR("ibt_alloc_rc_channel: resource limit exceeded "
8291                             "(limit %d, count %d)\n", max_qps,
8292                             hca_p->hca_qp_count);
8293                         return (IBT_INSUFF_RESOURCE);
8294                 }
8295                 DAPLKA_RS_ACCT_INC(ep_rp, 1);
8296                 atomic_inc_32(&hca_p->hca_qp_count);
8297         }
8298         status = ibt_alloc_rc_channel(hca_hdl, flags, args, chan_hdl_p, sizes);
8299 
8300         if (status != IBT_SUCCESS && acct_enabled) {
8301                 DAPLKA_RS_ACCT_DEC(ep_rp, 1);
8302                 atomic_dec_32(&hca_p->hca_qp_count);
8303         }
8304         return (status);
8305 }
8306 
8307 static ibt_status_t
8308 daplka_ibt_free_channel(daplka_ep_resource_t *ep_rp, ibt_channel_hdl_t chan_hdl)
8309 {
8310         daplka_hca_t    *hca_p;
8311         ibt_status_t    status;
8312 
8313         hca_p = ep_rp->ep_hca;
8314 
8315         status = ibt_free_channel(chan_hdl);
8316         if (status != IBT_SUCCESS) {
8317                 return (status);
8318         }
8319         if (DAPLKA_RS_ACCT_CHARGED(ep_rp) > 0) {
8320                 DAPLKA_RS_ACCT_DEC(ep_rp, 1);
8321                 atomic_dec_32(&hca_p->hca_qp_count);
8322         }
8323         return (status);
8324 }
8325 
8326 static ibt_status_t
8327 daplka_ibt_alloc_cq(daplka_evd_resource_t *evd_rp, ibt_hca_hdl_t hca_hdl,
8328     ibt_cq_attr_t *cq_attr, ibt_cq_hdl_t *ibt_cq_p, uint32_t *real_size)
8329 {
8330         daplka_hca_t    *hca_p;
8331         uint32_t        max_cqs;
8332         boolean_t       acct_enabled;
8333         ibt_status_t    status;
8334 
8335         acct_enabled = daplka_accounting_enabled;
8336         hca_p = evd_rp->evd_hca;
8337         max_cqs = daplka_max_cq_percent * hca_p->hca_attr.hca_max_cq / 100;
8338 
8339         if (acct_enabled) {
8340                 if (daplka_max_cq_percent != 0 &&
8341                     max_cqs <= hca_p->hca_cq_count) {
8342                         DERR("ibt_alloc_cq: resource limit exceeded "
8343                             "(limit %d, count %d)\n", max_cqs,
8344                             hca_p->hca_cq_count);
8345                         return (IBT_INSUFF_RESOURCE);
8346                 }
8347                 DAPLKA_RS_ACCT_INC(evd_rp, 1);
8348                 atomic_inc_32(&hca_p->hca_cq_count);
8349         }
8350         status = ibt_alloc_cq(hca_hdl, cq_attr, ibt_cq_p, real_size);
8351 
8352         if (status != IBT_SUCCESS && acct_enabled) {
8353                 DAPLKA_RS_ACCT_DEC(evd_rp, 1);
8354                 atomic_dec_32(&hca_p->hca_cq_count);
8355         }
8356         return (status);
8357 }
8358 
8359 static ibt_status_t
8360 daplka_ibt_free_cq(daplka_evd_resource_t *evd_rp, ibt_cq_hdl_t cq_hdl)
8361 {
8362         daplka_hca_t    *hca_p;
8363         ibt_status_t    status;
8364 
8365         hca_p = evd_rp->evd_hca;
8366 
8367         status = ibt_free_cq(cq_hdl);
8368         if (status != IBT_SUCCESS) {
8369                 return (status);
8370         }
8371         if (DAPLKA_RS_ACCT_CHARGED(evd_rp) > 0) {
8372                 DAPLKA_RS_ACCT_DEC(evd_rp, 1);
8373                 atomic_dec_32(&hca_p->hca_cq_count);
8374         }
8375         return (status);
8376 }
8377 
8378 static ibt_status_t
8379 daplka_ibt_alloc_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
8380     ibt_pd_flags_t flags, ibt_pd_hdl_t *pd_hdl_p)
8381 {
8382         daplka_hca_t    *hca_p;
8383         uint32_t        max_pds;
8384         boolean_t       acct_enabled;
8385         ibt_status_t    status;
8386 
8387         acct_enabled = daplka_accounting_enabled;
8388         hca_p = pd_rp->pd_hca;
8389         max_pds = daplka_max_pd_percent * hca_p->hca_attr.hca_max_pd / 100;
8390 
8391         if (acct_enabled) {
8392                 if (daplka_max_pd_percent != 0 &&
8393                     max_pds <= hca_p->hca_pd_count) {
8394                         DERR("ibt_alloc_pd: resource limit exceeded "
8395                             "(limit %d, count %d)\n", max_pds,
8396                             hca_p->hca_pd_count);
8397                         return (IBT_INSUFF_RESOURCE);
8398                 }
8399                 DAPLKA_RS_ACCT_INC(pd_rp, 1);
8400                 atomic_inc_32(&hca_p->hca_pd_count);
8401         }
8402         status = ibt_alloc_pd(hca_hdl, flags, pd_hdl_p);
8403 
8404         if (status != IBT_SUCCESS && acct_enabled) {
8405                 DAPLKA_RS_ACCT_DEC(pd_rp, 1);
8406                 atomic_dec_32(&hca_p->hca_pd_count);
8407         }
8408         return (status);
8409 }
8410 
8411 static ibt_status_t
8412 daplka_ibt_free_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
8413     ibt_pd_hdl_t pd_hdl)
8414 {
8415         daplka_hca_t    *hca_p;
8416         ibt_status_t    status;
8417 
8418         hca_p = pd_rp->pd_hca;
8419 
8420         status = ibt_free_pd(hca_hdl, pd_hdl);
8421         if (status != IBT_SUCCESS) {
8422                 return (status);
8423         }
8424         if (DAPLKA_RS_ACCT_CHARGED(pd_rp) > 0) {
8425                 DAPLKA_RS_ACCT_DEC(pd_rp, 1);
8426                 atomic_dec_32(&hca_p->hca_pd_count);
8427         }
8428         return (status);
8429 }
8430 
8431 static ibt_status_t
8432 daplka_ibt_alloc_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
8433     ibt_pd_hdl_t pd_hdl, ibt_mw_flags_t flags, ibt_mw_hdl_t *mw_hdl_p,
8434     ibt_rkey_t *rkey_p)
8435 {
8436         daplka_hca_t    *hca_p;
8437         uint32_t        max_mws;
8438         boolean_t       acct_enabled;
8439         ibt_status_t    status;
8440 
8441         acct_enabled = daplka_accounting_enabled;
8442         hca_p = mw_rp->mw_hca;
8443         max_mws = daplka_max_mw_percent * hca_p->hca_attr.hca_max_mem_win / 100;
8444 
8445         if (acct_enabled) {
8446                 if (daplka_max_mw_percent != 0 &&
8447                     max_mws <= hca_p->hca_mw_count) {
8448                         DERR("ibt_alloc_mw: resource limit exceeded "
8449                             "(limit %d, count %d)\n", max_mws,
8450                             hca_p->hca_mw_count);
8451                         return (IBT_INSUFF_RESOURCE);
8452                 }
8453                 DAPLKA_RS_ACCT_INC(mw_rp, 1);
8454                 atomic_inc_32(&hca_p->hca_mw_count);
8455         }
8456         status = ibt_alloc_mw(hca_hdl, pd_hdl, flags, mw_hdl_p, rkey_p);
8457 
8458         if (status != IBT_SUCCESS && acct_enabled) {
8459                 DAPLKA_RS_ACCT_DEC(mw_rp, 1);
8460                 atomic_dec_32(&hca_p->hca_mw_count);
8461         }
8462         return (status);
8463 }
8464 
8465 static ibt_status_t
8466 daplka_ibt_free_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
8467     ibt_mw_hdl_t mw_hdl)
8468 {
8469         daplka_hca_t    *hca_p;
8470         ibt_status_t    status;
8471 
8472         hca_p = mw_rp->mw_hca;
8473 
8474         status = ibt_free_mw(hca_hdl, mw_hdl);
8475         if (status != IBT_SUCCESS) {
8476                 return (status);
8477         }
8478         if (DAPLKA_RS_ACCT_CHARGED(mw_rp) > 0) {
8479                 DAPLKA_RS_ACCT_DEC(mw_rp, 1);
8480                 atomic_dec_32(&hca_p->hca_mw_count);
8481         }
8482         return (status);
8483 }
8484 
8485 static ibt_status_t
8486 daplka_ibt_register_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
8487     ibt_pd_hdl_t pd_hdl, ibt_mr_attr_t *mr_attr, ibt_mr_hdl_t *mr_hdl_p,
8488     ibt_mr_desc_t *mr_desc_p)
8489 {
8490         daplka_hca_t    *hca_p;
8491         uint32_t        max_mrs;
8492         boolean_t       acct_enabled;
8493         ibt_status_t    status;
8494 
8495         acct_enabled = daplka_accounting_enabled;
8496         hca_p = mr_rp->mr_hca;
8497         max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;
8498 
8499         if (acct_enabled) {
8500                 if (daplka_max_mr_percent != 0 &&
8501                     max_mrs <= hca_p->hca_mr_count) {
8502                         DERR("ibt_register_mr: resource limit exceeded "
8503                             "(limit %d, count %d)\n", max_mrs,
8504                             hca_p->hca_mr_count);
8505                         return (IBT_INSUFF_RESOURCE);
8506                 }
8507                 DAPLKA_RS_ACCT_INC(mr_rp, 1);
8508                 atomic_inc_32(&hca_p->hca_mr_count);
8509         }
8510         status = ibt_register_mr(hca_hdl, pd_hdl, mr_attr, mr_hdl_p, mr_desc_p);
8511 
8512         if (status != IBT_SUCCESS && acct_enabled) {
8513                 DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8514                 atomic_dec_32(&hca_p->hca_mr_count);
8515         }
8516         return (status);
8517 }
8518 
8519 static ibt_status_t
8520 daplka_ibt_register_shared_mr(daplka_mr_resource_t *mr_rp,
8521     ibt_hca_hdl_t hca_hdl, ibt_mr_hdl_t mr_hdl, ibt_pd_hdl_t pd_hdl,
8522     ibt_smr_attr_t *smr_attr_p, ibt_mr_hdl_t *mr_hdl_p,
8523     ibt_mr_desc_t *mr_desc_p)
8524 {
8525         daplka_hca_t    *hca_p;
8526         uint32_t        max_mrs;
8527         boolean_t       acct_enabled;
8528         ibt_status_t    status;
8529 
8530         acct_enabled = daplka_accounting_enabled;
8531         hca_p = mr_rp->mr_hca;
8532         max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;
8533 
8534         if (acct_enabled) {
8535                 if (daplka_max_mr_percent != 0 &&
8536                     max_mrs <= hca_p->hca_mr_count) {
8537                         DERR("ibt_register_shared_mr: resource limit exceeded "
8538                             "(limit %d, count %d)\n", max_mrs,
8539                             hca_p->hca_mr_count);
8540                         return (IBT_INSUFF_RESOURCE);
8541                 }
8542                 DAPLKA_RS_ACCT_INC(mr_rp, 1);
8543                 atomic_inc_32(&hca_p->hca_mr_count);
8544         }
8545         status = ibt_register_shared_mr(hca_hdl, mr_hdl, pd_hdl,
8546             smr_attr_p, mr_hdl_p, mr_desc_p);
8547 
8548         if (status != IBT_SUCCESS && acct_enabled) {
8549                 DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8550                 atomic_dec_32(&hca_p->hca_mr_count);
8551         }
8552         return (status);
8553 }
8554 
8555 static ibt_status_t
8556 daplka_ibt_deregister_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
8557     ibt_mr_hdl_t mr_hdl)
8558 {
8559         daplka_hca_t    *hca_p;
8560         ibt_status_t    status;
8561 
8562         hca_p = mr_rp->mr_hca;
8563 
8564         status = ibt_deregister_mr(hca_hdl, mr_hdl);
8565         if (status != IBT_SUCCESS) {
8566                 return (status);
8567         }
8568         if (DAPLKA_RS_ACCT_CHARGED(mr_rp) > 0) {
8569                 DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8570                 atomic_dec_32(&hca_p->hca_mr_count);
8571         }
8572         return (status);
8573 }
8574 
8575 static ibt_status_t
8576 daplka_ibt_alloc_srq(daplka_srq_resource_t *srq_rp, ibt_hca_hdl_t hca_hdl,
8577     ibt_srq_flags_t flags, ibt_pd_hdl_t pd, ibt_srq_sizes_t *reqsz,
8578     ibt_srq_hdl_t *srq_hdl_p, ibt_srq_sizes_t *realsz)
8579 {
8580         daplka_hca_t    *hca_p;
8581         uint32_t        max_srqs;
8582         boolean_t       acct_enabled;
8583         ibt_status_t    status;
8584 
8585         acct_enabled = daplka_accounting_enabled;
8586         hca_p = srq_rp->srq_hca;
8587         max_srqs = daplka_max_srq_percent * hca_p->hca_attr.hca_max_srqs / 100;
8588 
8589         if (acct_enabled) {
8590                 if (daplka_max_srq_percent != 0 &&
8591                     max_srqs <= hca_p->hca_srq_count) {
8592                         DERR("ibt_alloc_srq: resource limit exceeded "
8593                             "(limit %d, count %d)\n", max_srqs,
8594                             hca_p->hca_srq_count);
8595                         return (IBT_INSUFF_RESOURCE);
8596                 }
8597                 DAPLKA_RS_ACCT_INC(srq_rp, 1);
8598                 atomic_inc_32(&hca_p->hca_srq_count);
8599         }
8600         status = ibt_alloc_srq(hca_hdl, flags, pd, reqsz, srq_hdl_p, realsz);
8601 
8602         if (status != IBT_SUCCESS && acct_enabled) {
8603                 DAPLKA_RS_ACCT_DEC(srq_rp, 1);
8604                 atomic_dec_32(&hca_p->hca_srq_count);
8605         }
8606         return (status);
8607 }
8608 
8609 static ibt_status_t
8610 daplka_ibt_free_srq(daplka_srq_resource_t *srq_rp, ibt_srq_hdl_t srq_hdl)
8611 {
8612         daplka_hca_t    *hca_p;
8613         ibt_status_t    status;
8614 
8615         hca_p = srq_rp->srq_hca;
8616 
8617         D3("ibt_free_srq: %p %p\n", srq_rp, srq_hdl);
8618 
8619         status = ibt_free_srq(srq_hdl);
8620         if (status != IBT_SUCCESS) {
8621                 return (status);
8622         }
8623         if (DAPLKA_RS_ACCT_CHARGED(srq_rp) > 0) {
8624                 DAPLKA_RS_ACCT_DEC(srq_rp, 1);
8625                 atomic_dec_32(&hca_p->hca_srq_count);
8626         }
8627         return (status);
8628 }
8629 
8630 
8631 static int
8632 daplka_common_ioctl(int cmd, minor_t rnum, intptr_t arg, int mode,
8633         cred_t *cred, int *rvalp)
8634 {
8635         int error;
8636 
8637         switch (cmd) {
8638         case DAPL_IA_CREATE:
8639                 error = daplka_ia_create(rnum, arg, mode, cred, rvalp);
8640                 break;
8641 
8642         /* can potentially add other commands here */
8643 
8644         default:
8645                 DERR("daplka_common_ioctl: cmd not supported\n");
8646                 error = DDI_FAILURE;
8647         }
8648         return (error);
8649 }
8650 
8651 static int
8652 daplka_evd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8653         cred_t *cred, int *rvalp)
8654 {
8655         int error;
8656 
8657         switch (cmd) {
8658         case DAPL_EVD_CREATE:
8659                 error = daplka_evd_create(rp, arg, mode, cred, rvalp);
8660                 break;
8661 
8662         case DAPL_CQ_RESIZE:
8663                 error = daplka_cq_resize(rp, arg, mode, cred, rvalp);
8664                 break;
8665 
8666         case DAPL_EVENT_POLL:
8667                 error = daplka_event_poll(rp, arg, mode, cred, rvalp);
8668                 break;
8669 
8670         case DAPL_EVENT_WAKEUP:
8671                 error = daplka_event_wakeup(rp, arg, mode, cred, rvalp);
8672                 break;
8673 
8674         case DAPL_EVD_MODIFY_CNO:
8675                 error = daplka_evd_modify_cno(rp, arg, mode, cred, rvalp);
8676                 break;
8677 
8678         case DAPL_EVD_FREE:
8679                 error = daplka_evd_free(rp, arg, mode, cred, rvalp);
8680                 break;
8681 
8682         default:
8683                 DERR("daplka_evd_ioctl: cmd not supported\n");
8684                 error = DDI_FAILURE;
8685         }
8686         return (error);
8687 }
8688 
8689 static int
8690 daplka_ep_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8691         cred_t *cred, int *rvalp)
8692 {
8693         int error;
8694 
8695         switch (cmd) {
8696         case DAPL_EP_MODIFY:
8697                 error = daplka_ep_modify(rp, arg, mode, cred, rvalp);
8698                 break;
8699 
8700         case DAPL_EP_FREE:
8701                 error = daplka_ep_free(rp, arg, mode, cred, rvalp);
8702                 break;
8703 
8704         case DAPL_EP_CONNECT:
8705                 error = daplka_ep_connect(rp, arg, mode, cred, rvalp);
8706                 break;
8707 
8708         case DAPL_EP_DISCONNECT:
8709                 error = daplka_ep_disconnect(rp, arg, mode, cred, rvalp);
8710                 break;
8711 
8712         case DAPL_EP_REINIT:
8713                 error = daplka_ep_reinit(rp, arg, mode, cred, rvalp);
8714                 break;
8715 
8716         case DAPL_EP_CREATE:
8717                 error = daplka_ep_create(rp, arg, mode, cred, rvalp);
8718                 break;
8719 
8720         default:
8721                 DERR("daplka_ep_ioctl: cmd not supported\n");
8722                 error = DDI_FAILURE;
8723         }
8724         return (error);
8725 }
8726 
8727 static int
8728 daplka_mr_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8729         cred_t *cred, int *rvalp)
8730 {
8731         int error;
8732 
8733         switch (cmd) {
8734         case DAPL_MR_REGISTER:
8735                 error = daplka_mr_register(rp, arg, mode, cred, rvalp);
8736                 break;
8737 
8738         case DAPL_MR_REGISTER_LMR:
8739                 error = daplka_mr_register_lmr(rp, arg, mode, cred, rvalp);
8740                 break;
8741 
8742         case DAPL_MR_REGISTER_SHARED:
8743                 error = daplka_mr_register_shared(rp, arg, mode, cred, rvalp);
8744                 break;
8745 
8746         case DAPL_MR_DEREGISTER:
8747                 error = daplka_mr_deregister(rp, arg, mode, cred, rvalp);
8748                 break;
8749 
8750         case DAPL_MR_SYNC:
8751                 error = daplka_mr_sync(rp, arg, mode, cred, rvalp);
8752                 break;
8753 
8754         default:
8755                 DERR("daplka_mr_ioctl: cmd not supported\n");
8756                 error = DDI_FAILURE;
8757         }
8758         return (error);
8759 }
8760 
8761 static int
8762 daplka_mw_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8763         cred_t *cred, int *rvalp)
8764 {
8765         int error;
8766 
8767         switch (cmd) {
8768         case DAPL_MW_ALLOC:
8769                 error = daplka_mw_alloc(rp, arg, mode, cred, rvalp);
8770                 break;
8771 
8772         case DAPL_MW_FREE:
8773                 error = daplka_mw_free(rp, arg, mode, cred, rvalp);
8774                 break;
8775 
8776         default:
8777                 DERR("daplka_mw_ioctl: cmd not supported\n");
8778                 error = DDI_FAILURE;
8779         }
8780         return (error);
8781 }
8782 
8783 static int
8784 daplka_cno_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8785         cred_t *cred, int *rvalp)
8786 {
8787         int error;
8788 
8789         switch (cmd) {
8790         case DAPL_CNO_ALLOC:
8791                 error = daplka_cno_alloc(rp, arg, mode, cred, rvalp);
8792                 break;
8793 
8794         case DAPL_CNO_FREE:
8795                 error = daplka_cno_free(rp, arg, mode, cred, rvalp);
8796                 break;
8797 
8798         case DAPL_CNO_WAIT:
8799                 error = daplka_cno_wait(rp, arg, mode, cred, rvalp);
8800                 break;
8801 
8802         default:
8803                 DERR("daplka_cno_ioctl: cmd not supported\n");
8804                 error = DDI_FAILURE;
8805         }
8806         return (error);
8807 }
8808 
8809 static int
8810 daplka_pd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8811         cred_t *cred, int *rvalp)
8812 {
8813         int error;
8814 
8815         switch (cmd) {
8816         case DAPL_PD_ALLOC:
8817                 error = daplka_pd_alloc(rp, arg, mode, cred, rvalp);
8818                 break;
8819 
8820         case DAPL_PD_FREE:
8821                 error = daplka_pd_free(rp, arg, mode, cred, rvalp);
8822                 break;
8823 
8824         default:
8825                 DERR("daplka_pd_ioctl: cmd not supported\n");
8826                 error = DDI_FAILURE;
8827         }
8828         return (error);
8829 }
8830 
8831 static int
8832 daplka_sp_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8833         cred_t *cred, int *rvalp)
8834 {
8835         int error;
8836 
8837         switch (cmd) {
8838         case DAPL_SERVICE_REGISTER:
8839                 error = daplka_service_register(rp, arg, mode, cred, rvalp);
8840                 break;
8841 
8842         case DAPL_SERVICE_DEREGISTER:
8843                 error = daplka_service_deregister(rp, arg, mode, cred, rvalp);
8844                 break;
8845 
8846         default:
8847                 DERR("daplka_sp_ioctl: cmd not supported\n");
8848                 error = DDI_FAILURE;
8849         }
8850         return (error);
8851 }
8852 
8853 static int
8854 daplka_srq_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8855         cred_t *cred, int *rvalp)
8856 {
8857         int error;
8858 
8859         switch (cmd) {
8860         case DAPL_SRQ_CREATE:
8861                 error = daplka_srq_create(rp, arg, mode, cred, rvalp);
8862                 break;
8863 
8864         case DAPL_SRQ_RESIZE:
8865                 error = daplka_srq_resize(rp, arg, mode, cred, rvalp);
8866                 break;
8867 
8868         case DAPL_SRQ_FREE:
8869                 error = daplka_srq_free(rp, arg, mode, cred, rvalp);
8870                 break;
8871 
8872         default:
8873                 DERR("daplka_srq_ioctl: cmd(%d) not supported\n", cmd);
8874                 error = DDI_FAILURE;
8875                 break;
8876         }
8877         return (error);
8878 }
8879 
8880 static int
8881 daplka_misc_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8882         cred_t *cred, int *rvalp)
8883 {
8884         int error;
8885 
8886         switch (cmd) {
8887         case DAPL_CR_ACCEPT:
8888                 error = daplka_cr_accept(rp, arg, mode, cred, rvalp);
8889                 break;
8890 
8891         case DAPL_CR_REJECT:
8892                 error = daplka_cr_reject(rp, arg, mode, cred, rvalp);
8893                 break;
8894 
8895         case DAPL_IA_QUERY:
8896                 error = daplka_ia_query(rp, arg, mode, cred, rvalp);
8897                 break;
8898 
8899         case DAPL_CR_HANDOFF:
8900                 error = daplka_cr_handoff(rp, arg, mode, cred, rvalp);
8901                 break;
8902 
8903         default:
8904                 DERR("daplka_misc_ioctl: cmd not supported\n");
8905                 error = DDI_FAILURE;
8906         }
8907         return (error);
8908 }
8909 
8910 /*ARGSUSED*/
8911 static int
8912 daplka_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
8913         int *rvalp)
8914 {
8915         daplka_ia_resource_t    *ia_rp;
8916         minor_t                 rnum;
8917         int                     error = 0;
8918 
8919         rnum = getminor(dev);
8920         ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
8921         if (ia_rp == NULL) {
8922                 DERR("ioctl: resource not found, rnum %d\n", rnum);
8923                 return (ENXIO);
8924         }
8925 
8926         D4("ioctl: rnum = %d, cmd = 0x%x\n", rnum, cmd);
8927         if (DAPLKA_RS_RESERVED(ia_rp)) {
8928                 error = daplka_common_ioctl(cmd, rnum, arg, mode, cred, rvalp);
8929                 return (error);
8930         }
8931         if (DAPLKA_RS_TYPE(ia_rp) != DAPL_TYPE_IA) {
8932                 DERR("ioctl: invalid type %d\n", DAPLKA_RS_TYPE(ia_rp));
8933                 error = EINVAL;
8934                 goto cleanup;
8935         }
8936         if (ia_rp->ia_pid != ddi_get_pid()) {
8937                 DERR("ioctl: ia_pid %d != pid %d\n",
8938                     ia_rp->ia_pid, ddi_get_pid());
8939                 error = EINVAL;
8940                 goto cleanup;
8941         }
8942 
8943         switch (cmd & DAPL_TYPE_MASK) {
8944         case DAPL_TYPE_EVD:
8945                 error = daplka_evd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8946                 break;
8947 
8948         case DAPL_TYPE_EP:
8949                 error = daplka_ep_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8950                 break;
8951 
8952         case DAPL_TYPE_MR:
8953                 error = daplka_mr_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8954                 break;
8955 
8956         case DAPL_TYPE_MW:
8957                 error = daplka_mw_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8958                 break;
8959 
8960         case DAPL_TYPE_PD:
8961                 error = daplka_pd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8962                 break;
8963 
8964         case DAPL_TYPE_SP:
8965                 error = daplka_sp_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8966                 break;
8967 
8968         case DAPL_TYPE_CNO:
8969                 error = daplka_cno_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8970                 break;
8971 
8972         case DAPL_TYPE_MISC:
8973                 error = daplka_misc_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8974                 break;
8975 
8976         case DAPL_TYPE_SRQ:
8977                 error = daplka_srq_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8978                 break;
8979 
8980         default:
8981                 DERR("ioctl: invalid dapl type = %d\n", DAPLKA_RS_TYPE(ia_rp));
8982                 error = DDI_FAILURE;
8983         }
8984 
8985 cleanup:;
8986         DAPLKA_RS_UNREF(ia_rp);
8987         return (error);
8988 }
8989 
8990 /* ARGSUSED */
8991 static int
8992 daplka_open(dev_t *devp, int flag, int otyp, struct cred *cred)
8993 {
8994         minor_t rnum;
8995 
8996         /*
8997          * Char only
8998          */
8999         if (otyp != OTYP_CHR) {
9000                 return (EINVAL);
9001         }
9002 
9003         /*
9004          * Only zero can be opened, clones are used for resources.
9005          */
9006         if (getminor(*devp) != DAPLKA_DRIVER_MINOR) {
9007                 DERR("daplka_open: bad minor %d\n", getminor(*devp));
9008                 return (ENODEV);
9009         }
9010 
9011         /*
9012          * - allocate new minor number
9013          * - update devp argument to new device
9014          */
9015         if (daplka_resource_reserve(&rnum) == 0) {
9016                 *devp = makedevice(getmajor(*devp), rnum);
9017         } else {
9018                 return (ENOMEM);
9019         }
9020 
9021         return (DDI_SUCCESS);
9022 }
9023 
9024 /* ARGSUSED */
9025 static int
9026 daplka_close(dev_t dev, int flag, int otyp, struct cred *cred)
9027 {
9028         daplka_ia_resource_t    *ia_rp;
9029         minor_t                 rnum = getminor(dev);
9030 
9031         /*
9032          * Char only
9033          */
9034         if (otyp != OTYP_CHR) {
9035                 return (EINVAL);
9036         }
9037         D2("daplka_close: closing rnum = %d\n", rnum);
9038         atomic_inc_32(&daplka_pending_close);
9039 
9040         /*
9041          * remove from resource table.
9042          */
9043         ia_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);
9044 
9045         /*
9046          * remove the initial reference
9047          */
9048         if (ia_rp != NULL) {
9049                 DAPLKA_RS_UNREF(ia_rp);
9050         }
9051         atomic_dec_32(&daplka_pending_close);
9052         return (DDI_SUCCESS);
9053 }
9054 
9055 
9056 /*
9057  * Resource management routines
9058  *
9059  * We start with no resource array. Each time we run out of slots, we
9060  * reallocate a new larger array and copy the pointer to the new array and
9061  * a new resource blk is allocated and added to the hash table.
9062  *
9063  * The resource control block contains:
9064  *      root    - array of pointer of resource blks
9065  *      sz      - current size of array.
9066  *      len     - last valid entry in array.
9067  *
9068  * A search operation based on a resource number is as follows:
9069  *      index = rnum / RESOURCE_BLKSZ;
9070  *      ASSERT(index < resource_block.len);
9071  *      ASSERT(index < resource_block.sz);
9072  *      offset = rnum % RESOURCE_BLKSZ;
9073  *      ASSERT(offset >= resource_block.root[index]->base);
9074  *      ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
9075  *      return resource_block.root[index]->blks[offset];
9076  *
9077  * A resource blk is freed when its used count reaches zero.
9078  */
9079 
9080 /*
9081  * initializes the global resource table
9082  */
9083 static void
9084 daplka_resource_init(void)
9085 {
9086         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(daplka_resource))
9087         rw_init(&daplka_resource.daplka_rct_lock, NULL, RW_DRIVER, NULL);
9088         daplka_resource.daplka_rc_len = 0;
9089         daplka_resource.daplka_rc_sz = 0;
9090         daplka_resource.daplka_rc_cnt = 0;
9091         daplka_resource.daplka_rc_flag = 0;
9092         daplka_resource.daplka_rc_root = NULL;
9093         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(daplka_resource))
9094 }
9095 
9096 /*
9097  * destroys the global resource table
9098  */
9099 static void
9100 daplka_resource_fini(void)
9101 {
9102         int     i;
9103 
9104         rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9105         for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
9106                 daplka_resource_blk_t   *blk;
9107                 int                     j;
9108 
9109                 blk = daplka_resource.daplka_rc_root[i];
9110                 if (blk == NULL) {
9111                         continue;
9112                 }
9113                 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
9114                         if (blk->daplka_rcblk_blks[j] != NULL) {
9115                                 DERR("resource_fini: non-null slot %d, %p\n",
9116                                     j, blk->daplka_rcblk_blks[j]);
9117                         }
9118                 }
9119                 kmem_free(blk, sizeof (*blk));
9120                 daplka_resource.daplka_rc_root[i] = NULL;
9121         }
9122         if (daplka_resource.daplka_rc_root != NULL) {
9123                 uint_t  sz;
9124 
9125                 sz = daplka_resource.daplka_rc_sz *
9126                     sizeof (daplka_resource_blk_t *);
9127                 kmem_free(daplka_resource.daplka_rc_root, (uint_t)sz);
9128                 daplka_resource.daplka_rc_root = NULL;
9129                 daplka_resource.daplka_rc_len = 0;
9130                 daplka_resource.daplka_rc_sz = 0;
9131         }
9132         rw_exit(&daplka_resource.daplka_rct_lock);
9133         rw_destroy(&daplka_resource.daplka_rct_lock);
9134 }
9135 
9136 /*
9137  * reserves a slot in the global resource table.
9138  * this is called by the open() syscall. it is needed because
9139  * at open() time, we do not have sufficient information to
9140  * create an IA resource. the library needs to subsequently
9141  * call daplka_ia_create to insert an IA resource into this
9142  * reserved slot.
9143  */
9144 static int
9145 daplka_resource_reserve(minor_t *rnum)
9146 {
9147         int i, j, empty = -1;
9148         daplka_resource_blk_t *blk;
9149 
9150         rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9151         /*
9152          * Try to find an empty slot
9153          */
9154         for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
9155                 blk = daplka_resource.daplka_rc_root[i];
9156                 if (blk != NULL && blk->daplka_rcblk_avail > 0) {
9157 
9158                         D3("resource_alloc: available blks %d\n",
9159                             blk->daplka_rcblk_avail);
9160 
9161                         /*
9162                          * found an empty slot in this blk
9163                          */
9164                         for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
9165                                 if (blk->daplka_rcblk_blks[j] == NULL) {
9166                                         *rnum = (minor_t)
9167                                             (j + (i * DAPLKA_RC_BLKSZ));
9168                                         blk->daplka_rcblk_blks[j] =
9169                                             (daplka_resource_t *)
9170                                             DAPLKA_RC_RESERVED;
9171                                         blk->daplka_rcblk_avail--;
9172                                         daplka_resource.daplka_rc_cnt++;
9173                                         rw_exit(&daplka_resource.
9174                                             daplka_rct_lock);
9175                                         return (0);
9176                                 }
9177                         }
9178                 } else if (blk == NULL && empty < 0) {
9179                         /*
9180                          * remember first empty slot
9181                          */
9182                         empty = i;
9183                 }
9184         }
9185 
9186         /*
9187          * Couldn't find anything, allocate a new blk
9188          * Do we need to reallocate the root array
9189          */
9190         if (empty < 0) {
9191                 if (daplka_resource.daplka_rc_len ==
9192                     daplka_resource.daplka_rc_sz) {
9193                         /*
9194                          * Allocate new array and copy current stuff into it
9195                          */
9196                         daplka_resource_blk_t   **p;
9197                         uint_t newsz = (uint_t)daplka_resource.daplka_rc_sz +
9198                             DAPLKA_RC_BLKSZ;
9199 
9200                         D3("resource_alloc: increasing no. of buckets to %d\n",
9201                             newsz);
9202 
9203                         p = kmem_zalloc(newsz * sizeof (*p), daplka_km_flags);
9204 
9205                         if (daplka_resource.daplka_rc_root) {
9206                                 uint_t oldsz;
9207 
9208                                 oldsz = (uint_t)(daplka_resource.daplka_rc_sz *
9209                                     (int)sizeof (*p));
9210 
9211                                 /*
9212                                  * Copy old data into new space and
9213                                  * free old stuff
9214                                  */
9215                                 bcopy(daplka_resource.daplka_rc_root, p, oldsz);
9216                                 kmem_free(daplka_resource.daplka_rc_root,
9217                                     oldsz);
9218                         }
9219 
9220                         daplka_resource.daplka_rc_root = p;
9221                         daplka_resource.daplka_rc_sz = (int)newsz;
9222                 }
9223 
9224                 empty = daplka_resource.daplka_rc_len;
9225                 daplka_resource.daplka_rc_len++;
9226 
9227                 D3("resource_alloc: daplka_rc_len %d\n",
9228                     daplka_resource.daplka_rc_len);
9229         }
9230 
9231         /*
9232          * Allocate a new blk
9233          */
9234         blk = kmem_zalloc(sizeof (*blk), daplka_km_flags);
9235         ASSERT(daplka_resource.daplka_rc_root[empty] == NULL);
9236         daplka_resource.daplka_rc_root[empty] = blk;
9237         blk->daplka_rcblk_avail = DAPLKA_RC_BLKSZ - 1;
9238 
9239         /*
9240          * Allocate slot
9241          */
9242         *rnum = (minor_t)(empty * DAPLKA_RC_BLKSZ);
9243         blk->daplka_rcblk_blks[0] = (daplka_resource_t *)DAPLKA_RC_RESERVED;
9244         daplka_resource.daplka_rc_cnt++;
9245         rw_exit(&daplka_resource.daplka_rct_lock);
9246 
9247         return (0);
9248 }
9249 
9250 /*
9251  * removes resource from global resource table
9252  */
9253 static daplka_resource_t *
9254 daplka_resource_remove(minor_t rnum)
9255 {
9256         int i, j;
9257         daplka_resource_blk_t *blk;
9258         daplka_resource_t *p;
9259 
9260         i = (int)(rnum / DAPLKA_RC_BLKSZ);
9261         j = (int)(rnum % DAPLKA_RC_BLKSZ);
9262 
9263         rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9264         if (i >= daplka_resource.daplka_rc_len) {
9265                 rw_exit(&daplka_resource.daplka_rct_lock);
9266                 DERR("resource_remove: invalid rnum %d\n", rnum);
9267                 return (NULL);
9268         }
9269 
9270         ASSERT(daplka_resource.daplka_rc_root);
9271         ASSERT(i < daplka_resource.daplka_rc_len);
9272         ASSERT(i < daplka_resource.daplka_rc_sz);
9273         blk = daplka_resource.daplka_rc_root[i];
9274         if (blk == NULL) {
9275                 rw_exit(&daplka_resource.daplka_rct_lock);
9276                 DERR("resource_remove: invalid rnum %d\n", rnum);
9277                 return (NULL);
9278         }
9279 
9280         if (blk->daplka_rcblk_blks[j] == NULL) {
9281                 rw_exit(&daplka_resource.daplka_rct_lock);
9282                 DERR("resource_remove: blk->daplka_rcblk_blks[j] == NULL\n");
9283                 return (NULL);
9284         }
9285         p = blk->daplka_rcblk_blks[j];
9286         blk->daplka_rcblk_blks[j] = NULL;
9287         blk->daplka_rcblk_avail++;
9288         if (blk->daplka_rcblk_avail == DAPLKA_RC_BLKSZ) {
9289                 /*
9290                  * free this blk
9291                  */
9292                 kmem_free(blk, sizeof (*blk));
9293                 daplka_resource.daplka_rc_root[i] = NULL;
9294         }
9295         daplka_resource.daplka_rc_cnt--;
9296         rw_exit(&daplka_resource.daplka_rct_lock);
9297 
9298         if ((intptr_t)p == DAPLKA_RC_RESERVED) {
9299                 return (NULL);
9300         } else {
9301                 return (p);
9302         }
9303 }
9304 
9305 /*
9306  * inserts resource into the slot designated by rnum
9307  */
9308 static int
9309 daplka_resource_insert(minor_t rnum, daplka_resource_t *rp)
9310 {
9311         int i, j, error = -1;
9312         daplka_resource_blk_t *blk;
9313 
9314         /*
9315          * Find resource and lock it in WRITER mode
9316          * search for available resource slot
9317          */
9318 
9319         i = (int)(rnum / DAPLKA_RC_BLKSZ);
9320         j = (int)(rnum % DAPLKA_RC_BLKSZ);
9321 
9322         rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9323         if (i >= daplka_resource.daplka_rc_len) {
9324                 rw_exit(&daplka_resource.daplka_rct_lock);
9325                 DERR("resource_insert: resource %d not found\n", rnum);
9326                 return (-1);
9327         }
9328 
9329         blk = daplka_resource.daplka_rc_root[i];
9330         if (blk != NULL) {
9331                 ASSERT(i < daplka_resource.daplka_rc_len);
9332                 ASSERT(i < daplka_resource.daplka_rc_sz);
9333 
9334                 if ((intptr_t)blk->daplka_rcblk_blks[j] == DAPLKA_RC_RESERVED) {
9335                         blk->daplka_rcblk_blks[j] = rp;
9336                         error = 0;
9337                 } else {
9338                         DERR("resource_insert: %d not reserved, blk = %p\n",
9339                             rnum, blk->daplka_rcblk_blks[j]);
9340                 }
9341         } else {
9342                 DERR("resource_insert: resource %d not found\n", rnum);
9343         }
9344         rw_exit(&daplka_resource.daplka_rct_lock);
9345         return (error);
9346 }
9347 
9348 /*
9349  * finds resource using minor device number
9350  */
9351 static daplka_resource_t *
9352 daplka_resource_lookup(minor_t rnum)
9353 {
9354         int i, j;
9355         daplka_resource_blk_t *blk;
9356         daplka_resource_t *rp;
9357 
9358         /*
9359          * Find resource and lock it in READER mode
9360          * search for available resource slot
9361          */
9362 
9363         i = (int)(rnum / DAPLKA_RC_BLKSZ);
9364         j = (int)(rnum % DAPLKA_RC_BLKSZ);
9365 
9366         rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
9367         if (i >= daplka_resource.daplka_rc_len) {
9368                 rw_exit(&daplka_resource.daplka_rct_lock);
9369                 DERR("resource_lookup: resource %d not found\n", rnum);
9370                 return (NULL);
9371         }
9372 
9373         blk = daplka_resource.daplka_rc_root[i];
9374         if (blk != NULL) {
9375                 ASSERT(i < daplka_resource.daplka_rc_len);
9376                 ASSERT(i < daplka_resource.daplka_rc_sz);
9377 
9378                 rp = blk->daplka_rcblk_blks[j];
9379                 if (rp == NULL || (intptr_t)rp == DAPLKA_RC_RESERVED) {
9380                         D3("resource_lookup: %d not found, blk = %p\n",
9381                             rnum, blk->daplka_rcblk_blks[j]);
9382                 } else {
9383                         DAPLKA_RS_REF((daplka_ia_resource_t *)rp);
9384                 }
9385         } else {
9386                 DERR("resource_lookup: resource %d not found\n", rnum);
9387                 rp = NULL;
9388         }
9389         rw_exit(&daplka_resource.daplka_rct_lock);
9390         return (rp);
9391 }
9392 
9393 /*
9394  * generic hash table implementation
9395  */
9396 
9397 /*
9398  * daplka_hash_create:
9399  *      initializes a hash table with the specified parameters
9400  *
9401  * input:
9402  *      htblp                   pointer to hash table
9403  *
9404  *      nbuckets                number of buckets (must be power of 2)
9405  *
9406  *      free_func               this function is called on each hash
9407  *                              table element when daplka_hash_destroy
9408  *                              is called
9409  *
9410  *      lookup_func             if daplka_hash_lookup is able to find
9411  *                              the desired object, this function is
9412  *                              applied on the object before
9413  *                              daplka_hash_lookup returns
9414  * output:
9415  *      none
9416  *
9417  * return value(s):
9418  *      EINVAL                  nbuckets is not a power of 2
9419  *      ENOMEM                  cannot allocate buckets
9420  *      0                       success
9421  */
9422 static int
9423 daplka_hash_create(daplka_hash_table_t *htblp, uint_t nbuckets,
9424         void (*free_func)(void *), void (*lookup_func)(void *))
9425 {
9426         int i;
9427 
9428         if ((nbuckets & ~(nbuckets - 1)) != nbuckets) {
9429                 DERR("hash_create: nbuckets not power of 2\n");
9430                 return (EINVAL);
9431         }
9432         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*htblp))
9433 
9434         htblp->ht_buckets =
9435             kmem_zalloc(sizeof (daplka_hash_bucket_t) * nbuckets,
9436             daplka_km_flags);
9437         if (htblp->ht_buckets == NULL) {
9438                 DERR("hash_create: cannot allocate buckets\n");
9439                 return (ENOMEM);
9440         }
9441         for (i = 0; i < nbuckets; i++) {
9442                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(htblp->ht_buckets[i]))
9443                 htblp->ht_buckets[i].hb_count = 0;
9444                 htblp->ht_buckets[i].hb_entries = NULL;
9445                 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(htblp->ht_buckets[i]))
9446         }
9447         rw_init(&htblp->ht_table_lock, NULL, RW_DRIVER, NULL);
9448         mutex_init(&htblp->ht_key_lock, NULL, MUTEX_DRIVER, NULL);
9449 
9450         htblp->ht_count = 0;
9451         htblp->ht_next_hkey = (uint64_t)gethrtime();
9452         htblp->ht_nbuckets = nbuckets;
9453         htblp->ht_free_func = free_func;
9454         htblp->ht_lookup_func = lookup_func;
9455         htblp->ht_initialized = B_TRUE;
9456         D3("hash_create: done, buckets = %d\n", nbuckets);
9457         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*htblp))
9458         return (0);
9459 }
9460 
9461 /*
9462  * daplka_hash_insert:
9463  *      inserts an object into a hash table
9464  *
9465  * input:
9466  *      htblp                   pointer to hash table
9467  *
9468  *      hkeyp                   pointer to hash key.
9469  *                              *hkeyp being non-zero means that the caller
9470  *                              has generated its own hkey. if *hkeyp is zero,
9471  *                              this function will generate an hkey for the
9472  *                              caller. it is recommended that the caller
9473  *                              leave the hkey generation to this function
9474  *                              because the hkey is more likely to be evenly
9475  *                              distributed.
9476  *
9477  *      objp                    pointer to object to be inserted into
9478  *                              hash table
9479  *
9480  * output:
9481  *      hkeyp                   the generated hkey is returned via this pointer
9482  *
9483  * return value(s):
9484  *      EINVAL                  invalid parameter
9485  *      ENOMEM                  cannot allocate hash entry
9486  *      0                       successful
9487  */
9488 static int
9489 daplka_hash_insert(daplka_hash_table_t *htblp, uint64_t *hkeyp, void *objp)
9490 {
9491         daplka_hash_entry_t *hep, *curr_hep;
9492         daplka_hash_bucket_t *hbp;
9493         uint32_t bucket;
9494         uint64_t hkey;
9495 
9496         if (hkeyp == NULL) {
9497                 DERR("hash_insert: hkeyp == NULL\n");
9498                 return (EINVAL);
9499         }
9500         hep = kmem_zalloc(sizeof (*hep), daplka_km_flags);
9501         if (hep == NULL) {
9502                 DERR("hash_insert: cannot alloc hash_entry\n");
9503                 return (ENOMEM);
9504         }
9505         if (*hkeyp == 0) {
9506                 /* generate a new key */
9507                 mutex_enter(&htblp->ht_key_lock);
9508                 hkey = ++htblp->ht_next_hkey;
9509                 if (hkey == 0) {
9510                         hkey = htblp->ht_next_hkey = (uint64_t)gethrtime();
9511                 }
9512                 mutex_exit(&htblp->ht_key_lock);
9513         } else {
9514                 /* use user generated key */
9515                 hkey = *hkeyp;
9516         }
9517 
9518         /* only works if ht_nbuckets is a power of 2 */
9519         bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9520         ASSERT(objp != NULL);
9521         ASSERT(bucket < htblp->ht_nbuckets);
9522 
9523         rw_enter(&htblp->ht_table_lock, RW_WRITER);
9524         hep->he_hkey = hkey;
9525         hep->he_objp = objp;
9526 
9527         /* look for duplicate entries */
9528         hbp = &htblp->ht_buckets[bucket];
9529         curr_hep = hbp->hb_entries;
9530         while (curr_hep != NULL) {
9531                 if (curr_hep->he_hkey == hep->he_hkey) {
9532                         break;
9533                 }
9534                 curr_hep = curr_hep->he_next;
9535         }
9536         if (curr_hep != NULL) {
9537                 DERR("hash_insert: found duplicate hash entry: "
9538                     "bucket %d, hkey 0x%016llx\n",
9539                     bucket, (longlong_t)hep->he_hkey);
9540                 kmem_free(hep, sizeof (*hep));
9541                 rw_exit(&htblp->ht_table_lock);
9542                 return (EINVAL);
9543         }
9544         hep->he_next = hbp->hb_entries;
9545         hbp->hb_entries = hep;
9546         hbp->hb_count++;
9547         htblp->ht_count++;
9548         rw_exit(&htblp->ht_table_lock);
9549 
9550         if (*hkeyp == 0) {
9551                 *hkeyp = hkey;
9552                 ASSERT(*hkeyp != 0);
9553         }
9554         D3("hash_insert: htblp 0x%p, hkey = 0x%016llx, bucket = %d\n",
9555             htblp, (longlong_t)*hkeyp, bucket);
9556         return (0);
9557 }
9558 
9559 /*
9560  * daplka_hash_remove:
9561  *      removes object identified by hkey from hash table
9562  *
9563  * input:
9564  *      htblp                   pointer to hash table
9565  *
9566  *      hkey                    hkey that identifies the object to be removed
9567  *
9568  * output:
9569  *      objpp                   pointer to pointer to object.
9570  *                              if remove is successful, the removed object
9571  *                              will be returned via *objpp.
9572  *
9573  * return value(s):
9574  *      EINVAL                  cannot find hash entry
9575  *      0                       successful
9576  */
9577 static int
9578 daplka_hash_remove(daplka_hash_table_t *htblp, uint64_t hkey, void **objpp)
9579 {
9580         daplka_hash_entry_t     *free_hep, **curr_hepp;
9581         daplka_hash_bucket_t    *hbp;
9582         uint32_t                bucket;
9583 
9584         bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9585 
9586         rw_enter(&htblp->ht_table_lock, RW_WRITER);
9587         hbp = &htblp->ht_buckets[bucket];
9588 
9589         curr_hepp = &hbp->hb_entries;
9590         while (*curr_hepp != NULL) {
9591                 if ((*curr_hepp)->he_hkey == hkey) {
9592                         break;
9593                 }
9594                 curr_hepp = &(*curr_hepp)->he_next;
9595         }
9596         if (*curr_hepp == NULL) {
9597                 DERR("hash_remove: cannot find hash entry: "
9598                     "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
9599                 rw_exit(&htblp->ht_table_lock);
9600                 return (EINVAL);
9601         } else {
9602                 if (objpp != NULL) {
9603                         *objpp = (*curr_hepp)->he_objp;
9604                 }
9605                 free_hep = *curr_hepp;
9606                 *curr_hepp = (*curr_hepp)->he_next;
9607                 kmem_free(free_hep, sizeof (*free_hep));
9608         }
9609         hbp->hb_count--;
9610         htblp->ht_count--;
9611         D3("hash_remove: removed entry, hkey 0x%016llx, bucket %d, "
9612             "hb_count %d, hb_count %d\n",
9613             (longlong_t)hkey, bucket, hbp->hb_count, htblp->ht_count);
9614         rw_exit(&htblp->ht_table_lock);
9615         return (0);
9616 }
9617 
9618 /*
9619  * daplka_hash_walk:
9620  *      walks through the entire hash table. applying func on each of
9621  *      the inserted objects. stops walking if func returns non-zero.
9622  *
9623  * input:
9624  *      htblp                   pointer to hash table
9625  *
9626  *      func                    function to be applied on each object
9627  *
9628  *      farg                    second argument to func
9629  *
9630  *      lockmode                can be RW_WRITER or RW_READER. this
9631  *                              allows the caller to choose what type
9632  *                              of lock to acquire before walking the
9633  *                              table.
9634  *
9635  * output:
9636  *      none
9637  *
9638  * return value(s):
9639  *      none
9640  */
9641 static void
9642 daplka_hash_walk(daplka_hash_table_t *htblp, int (*func)(void *, void *),
9643         void *farg, krw_t lockmode)
9644 {
9645         daplka_hash_entry_t *curr_hep;
9646         daplka_hash_bucket_t *hbp;
9647         uint32_t bucket, retval = 0;
9648 
9649         ASSERT(lockmode == RW_WRITER || lockmode == RW_READER);
9650 
9651         /* needed for warlock */
9652         if (lockmode == RW_WRITER) {
9653                 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9654         } else {
9655                 rw_enter(&htblp->ht_table_lock, RW_READER);
9656         }
9657         for (bucket = 0; bucket < htblp->ht_nbuckets && retval == 0; bucket++) {
9658                 hbp = &htblp->ht_buckets[bucket];
9659                 curr_hep = hbp->hb_entries;
9660                 while (curr_hep != NULL) {
9661                         retval = (*func)(curr_hep->he_objp, farg);
9662                         if (retval != 0) {
9663                                 break;
9664                         }
9665                         curr_hep = curr_hep->he_next;
9666                 }
9667         }
9668         rw_exit(&htblp->ht_table_lock);
9669 }
9670 
9671 /*
9672  * daplka_hash_lookup:
9673  *      finds object from hkey
9674  *
9675  * input:
9676  *      htblp                   pointer to hash table
9677  *
9678  *      hkey                    hkey that identifies the object to be looked up
9679  *
9680  * output:
9681  *      none
9682  *
9683  * return value(s):
9684  *      NULL                    if not found
9685  *      object pointer          if found
9686  */
9687 static void *
9688 daplka_hash_lookup(daplka_hash_table_t *htblp, uint64_t hkey)
9689 {
9690         daplka_hash_entry_t *curr_hep;
9691         uint32_t bucket;
9692         void *objp;
9693 
9694         bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9695 
9696         rw_enter(&htblp->ht_table_lock, RW_READER);
9697         curr_hep = htblp->ht_buckets[bucket].hb_entries;
9698         while (curr_hep != NULL) {
9699                 if (curr_hep->he_hkey == hkey) {
9700                         break;
9701                 }
9702                 curr_hep = curr_hep->he_next;
9703         }
9704         if (curr_hep == NULL) {
9705                 DERR("hash_lookup: cannot find hash entry: "
9706                     "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
9707                 rw_exit(&htblp->ht_table_lock);
9708                 return (NULL);
9709         }
9710         objp = curr_hep->he_objp;
9711         ASSERT(objp != NULL);
9712         if (htblp->ht_lookup_func != NULL) {
9713                 (*htblp->ht_lookup_func)(objp);
9714         }
9715         rw_exit(&htblp->ht_table_lock);
9716         return (objp);
9717 }
9718 
9719 /*
9720  * daplka_hash_destroy:
9721  *      destroys hash table. applies free_func on all inserted objects.
9722  *
9723  * input:
9724  *      htblp                   pointer to hash table
9725  *
9726  * output:
9727  *      none
9728  *
9729  * return value(s):
9730  *      none
9731  */
9732 static void
9733 daplka_hash_destroy(daplka_hash_table_t *htblp)
9734 {
9735         daplka_hash_entry_t *curr_hep, *free_hep;
9736         daplka_hash_entry_t *free_list = NULL;
9737         daplka_hash_bucket_t *hbp;
9738         uint32_t bucket, cnt, total = 0;
9739 
9740         if (!htblp->ht_initialized) {
9741                 DERR("hash_destroy: not initialized\n");
9742                 return;
9743         }
9744         /* free all elements from hash table */
9745         rw_enter(&htblp->ht_table_lock, RW_WRITER);
9746         for (bucket = 0; bucket < htblp->ht_nbuckets; bucket++) {
9747                 hbp = &htblp->ht_buckets[bucket];
9748 
9749                 /* build list of elements to be freed */
9750                 curr_hep = hbp->hb_entries;
9751                 cnt = 0;
9752                 while (curr_hep != NULL) {
9753                         cnt++;
9754                         free_hep = curr_hep;
9755                         curr_hep = curr_hep->he_next;
9756 
9757                         free_hep->he_next = free_list;
9758                         free_list = free_hep;
9759                 }
9760                 ASSERT(cnt == hbp->hb_count);
9761                 total += cnt;
9762                 hbp->hb_count = 0;
9763                 hbp->hb_entries = NULL;
9764         }
9765         ASSERT(total == htblp->ht_count);
9766         D3("hash_destroy: htblp 0x%p, nbuckets %d, freed %d hash entries\n",
9767             htblp, htblp->ht_nbuckets, total);
9768         rw_exit(&htblp->ht_table_lock);
9769 
9770         /* free all objects, now without holding the hash table lock */
9771         cnt = 0;
9772         while (free_list != NULL) {
9773                 cnt++;
9774                 free_hep = free_list;
9775                 free_list = free_list->he_next;
9776                 if (htblp->ht_free_func != NULL) {
9777                         (*htblp->ht_free_func)(free_hep->he_objp);
9778                 }
9779                 kmem_free(free_hep, sizeof (*free_hep));
9780         }
9781         ASSERT(total == cnt);
9782 
9783         /* free hash buckets and destroy locks */
9784         kmem_free(htblp->ht_buckets,
9785             sizeof (daplka_hash_bucket_t) * htblp->ht_nbuckets);
9786 
9787         rw_enter(&htblp->ht_table_lock, RW_WRITER);
9788         htblp->ht_buckets = NULL;
9789         htblp->ht_count = 0;
9790         htblp->ht_nbuckets = 0;
9791         htblp->ht_free_func = NULL;
9792         htblp->ht_lookup_func = NULL;
9793         htblp->ht_initialized = B_FALSE;
9794         rw_exit(&htblp->ht_table_lock);
9795 
9796         mutex_destroy(&htblp->ht_key_lock);
9797         rw_destroy(&htblp->ht_table_lock);
9798 }
9799 
9800 /*
9801  * daplka_hash_getsize:
9802  *      return the number of objects in hash table
9803  *
9804  * input:
9805  *      htblp                   pointer to hash table
9806  *
9807  * output:
9808  *      none
9809  *
9810  * return value(s):
9811  *      number of objects in hash table
9812  */
9813 static uint32_t
9814 daplka_hash_getsize(daplka_hash_table_t *htblp)
9815 {
9816         uint32_t sz;
9817 
9818         rw_enter(&htblp->ht_table_lock, RW_READER);
9819         sz = htblp->ht_count;
9820         rw_exit(&htblp->ht_table_lock);
9821 
9822         return (sz);
9823 }
9824 
9825 /*
9826  * this function is used as ht_lookup_func above when lookup is called.
9827  * other types of objs may use a more elaborate lookup_func.
9828  */
9829 static void
9830 daplka_hash_generic_lookup(void *obj)
9831 {
9832         daplka_resource_t       *rp = (daplka_resource_t *)obj;
9833 
9834         mutex_enter(&rp->rs_reflock);
9835         rp->rs_refcnt++;
9836         ASSERT(rp->rs_refcnt != 0);
9837         mutex_exit(&rp->rs_reflock);
9838 }
9839 
9840 /*
9841  * Generates a non-zero 32 bit hash key used for the timer hash table.
9842  */
9843 static uint32_t
9844 daplka_timer_hkey_gen()
9845 {
9846         uint32_t new_hkey;
9847 
9848         do {
9849                 new_hkey = atomic_inc_32_nv(&daplka_timer_hkey);
9850         } while (new_hkey == 0);
9851 
9852         return (new_hkey);
9853 }
9854 
9855 
9856 /*
9857  * The DAPL KA debug logging routines
9858  */
9859 
9860 /*
9861  * Add the string str to the end of the debug log, followed by a newline.
9862  */
9863 static void
9864 daplka_dbglog(char *str)
9865 {
9866         size_t  length;
9867         size_t  remlen;
9868 
9869         /*
9870          * If this is the first time we've written to the log, initialize it.
9871          */
9872         if (!daplka_dbginit) {
9873                 return;
9874         }
9875         mutex_enter(&daplka_dbglock);
9876         /*
9877          * Note the log is circular; if this string would run over the end,
9878          * we copy the first piece to the end and then the last piece to
9879          * the beginning of the log.
9880          */
9881         length = strlen(str);
9882 
9883         remlen = (size_t)sizeof (daplka_dbgbuf) - daplka_dbgnext - 1;
9884 
9885         if (length > remlen) {
9886                 if (remlen)
9887                         bcopy(str, daplka_dbgbuf + daplka_dbgnext, remlen);
9888                 daplka_dbgbuf[sizeof (daplka_dbgbuf) - 1] = (char)NULL;
9889                 str += remlen;
9890                 length -= remlen;
9891                 daplka_dbgnext = 0;
9892         }
9893         bcopy(str, daplka_dbgbuf + daplka_dbgnext, length);
9894         daplka_dbgnext += length;
9895 
9896         if (daplka_dbgnext >= sizeof (daplka_dbgbuf))
9897                 daplka_dbgnext = 0;
9898         mutex_exit(&daplka_dbglock);
9899 }
9900 
9901 
9902 /*
9903  * Add a printf-style message to whichever debug logs we're currently using.
9904  */
9905 static void
9906 daplka_debug(const char *fmt, ...)
9907 {
9908         char    buff[512];
9909         va_list ap;
9910         /*
9911          * The system prepends the thread id and high resolution time
9912          * (nanoseconds are dropped and so are the upper digits)
9913          * to the specified string.
9914          * The unit for timestamp is 10 microseconds.
9915          * It wraps around every 10000 seconds.
9916          * Ex: gethrtime() = X ns = X/1000 us = X/10000 10 micro sec.
9917          */
9918         int     micro_time = (int)((gethrtime() / 10000) % 1000000000);
9919         (void) sprintf(buff, "th %p tm %9d: ", (void *)curthread, micro_time);
9920 
9921         va_start(ap, fmt);
9922         (void) vsprintf(buff+strlen(buff), fmt, ap);
9923         va_end(ap);
9924 
9925         daplka_dbglog(buff);
9926 }
9927 
9928 static void
9929 daplka_console(const char *fmt, ...)
9930 {
9931         char buff[512];
9932         va_list ap;
9933 
9934         va_start(ap, fmt);
9935         (void) vsprintf(buff, fmt, ap);
9936         va_end(ap);
9937 
9938         cmn_err(CE_CONT, "%s", buff);
9939 }