1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 *
26 * UDAPL kernel agent
27 */
28
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/debug.h>
32 #include <sys/stropts.h>
33 #include <sys/stream.h>
34 #include <sys/strlog.h>
35 #include <sys/cmn_err.h>
36 #include <sys/kmem.h>
37 #include <sys/conf.h>
38 #include <sys/stat.h>
39 #include <sys/modctl.h>
40 #include <sys/kstat.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/taskq.h>
45 #include <sys/open.h>
46 #include <sys/uio.h>
47 #include <sys/cpuvar.h>
48 #include <sys/atomic.h>
49 #include <sys/sysmacros.h>
50 #include <sys/esunddi.h>
51 #include <sys/avl.h>
52 #include <sys/cred.h>
53 #include <sys/note.h>
54 #include <sys/ib/ibtl/ibti.h>
55 #include <sys/socket.h>
56 #include <netinet/in.h>
57 #include <daplt_if.h>
58 #include <daplt.h>
59
60 /*
61 * The following variables support the debug log buffer scheme.
62 */
63 #ifdef DEBUG
64 static char daplka_dbgbuf[0x80000];
65 #else /* DEBUG */
66 static char daplka_dbgbuf[0x4000];
67 #endif /* DEBUG */
68 static int daplka_dbgsize = sizeof (daplka_dbgbuf);
69 static size_t daplka_dbgnext;
70 static int daplka_dbginit = 0;
71 static kmutex_t daplka_dbglock;
72
73 static int daplka_dbg = 0x0103;
74 static void daplka_console(const char *, ...);
75 static void daplka_debug(const char *, ...);
76 static int daplka_apm = 0x1; /* default enable */
77 static int daplka_failback = 0x1; /* default enable */
78 static int daplka_query_aft_setaltpath = 10;
79
80 #define DERR \
81 if (daplka_dbg & 0x100) \
82 daplka_debug
83
84 #ifdef DEBUG
85
86 #define DINFO \
87 daplka_console
88
89 #define D1 \
90 if (daplka_dbg & 0x01) \
91 daplka_debug
92 #define D2 \
93 if (daplka_dbg & 0x02) \
94 daplka_debug
95 #define D3 \
96 if (daplka_dbg & 0x04) \
97 daplka_debug
98 #define D4 \
99 if (daplka_dbg & 0x08) \
100 daplka_debug
101
102 #else /* DEBUG */
103
104 #define DINFO if (0) printf
105 #define D1 if (0) printf
106 #define D2 if (0) printf
107 #define D3 if (0) printf
108 #define D4 if (0) printf
109
110 #endif /* DEBUG */
111
112 /*
113 * driver entry points
114 */
115 static int daplka_open(dev_t *, int, int, struct cred *);
116 static int daplka_close(dev_t, int, int, struct cred *);
117 static int daplka_attach(dev_info_t *, ddi_attach_cmd_t);
118 static int daplka_detach(dev_info_t *, ddi_detach_cmd_t);
119 static int daplka_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
120 static int daplka_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
121
122 /*
123 * types of ioctls
124 */
125 static int daplka_common_ioctl(int, minor_t, intptr_t, int, cred_t *, int *);
126 static int daplka_misc_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
127 cred_t *, int *);
128 static int daplka_ep_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
129 cred_t *, int *);
130 static int daplka_evd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
131 cred_t *, int *);
132 static int daplka_mr_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
133 cred_t *, int *);
134 static int daplka_cno_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
135 cred_t *, int *);
136 static int daplka_pd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
137 cred_t *, int *);
138 static int daplka_sp_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
139 cred_t *, int *);
140 static int daplka_srq_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
141 cred_t *, int *);
142
143 /*
144 * common ioctls and supporting functions
145 */
146 static int daplka_ia_create(minor_t, intptr_t, int, cred_t *, int *);
147 static int daplka_ia_destroy(daplka_resource_t *);
148
149 /*
150 * EP ioctls and supporting functions
151 */
152 static int daplka_ep_create(daplka_ia_resource_t *, intptr_t, int,
153 cred_t *, int *);
154 static int daplka_ep_modify(daplka_ia_resource_t *, intptr_t, int,
155 cred_t *, int *);
156 static int daplka_ep_free(daplka_ia_resource_t *, intptr_t, int,
157 cred_t *, int *);
158 static int daplka_ep_connect(daplka_ia_resource_t *, intptr_t, int,
159 cred_t *, int *);
160 static int daplka_ep_disconnect(daplka_ia_resource_t *, intptr_t, int,
161 cred_t *, int *);
162 static int daplka_ep_reinit(daplka_ia_resource_t *, intptr_t, int,
163 cred_t *, int *);
164 static int daplka_ep_destroy(daplka_resource_t *);
165 static void daplka_hash_ep_free(void *);
166 static int daplka_ep_failback(void *objp, void *arg);
167 static int daplka_ep_altpath(daplka_ep_resource_t *, ib_gid_t *);
168
169 static uint32_t daplka_ep_get_state(daplka_ep_resource_t *);
170 static void daplka_ep_set_state(daplka_ep_resource_t *, uint32_t, uint32_t);
171 static boolean_t daplka_ep_transition_is_valid(uint32_t, uint32_t);
172 static daplka_timer_info_t *daplka_timer_info_alloc(daplka_ep_resource_t *);
173 static void daplka_timer_info_free(daplka_timer_info_t *);
174 static void daplka_timer_handler(void *);
175 static void daplka_timer_dispatch(void *);
176 static void daplka_timer_thread(void *);
177 static int daplka_cancel_timer(daplka_ep_resource_t *);
178 static void daplka_hash_timer_free(void *);
179
180 /*
181 * EVD ioctls and supporting functions
182 */
183 static int daplka_evd_create(daplka_ia_resource_t *, intptr_t, int,
184 cred_t *, int *);
185 static int daplka_cq_resize(daplka_ia_resource_t *, intptr_t, int,
186 cred_t *, int *);
187 static int daplka_evd_free(daplka_ia_resource_t *, intptr_t, int,
188 cred_t *, int *);
189 static int daplka_event_poll(daplka_ia_resource_t *, intptr_t, int,
190 cred_t *, int *);
191 static int daplka_evd_destroy(daplka_resource_t *);
192 static void daplka_cq_handler(ibt_cq_hdl_t, void *);
193 static void daplka_evd_wakeup(daplka_evd_resource_t *,
194 daplka_evd_event_list_t *, daplka_evd_event_t *);
195 static void daplka_evd_event_enqueue(daplka_evd_event_list_t *,
196 daplka_evd_event_t *);
197 static daplka_evd_event_t *daplka_evd_event_dequeue(daplka_evd_event_list_t *);
198 static void daplka_hash_evd_free(void *);
199
200
201 /*
202 * SRQ ioctls and supporting functions
203 */
204 static int daplka_srq_create(daplka_ia_resource_t *, intptr_t, int,
205 cred_t *, int *);
206 static int daplka_srq_resize(daplka_ia_resource_t *, intptr_t, int,
207 cred_t *, int *);
208 static int daplka_srq_free(daplka_ia_resource_t *, intptr_t, int,
209 cred_t *, int *);
210 static int daplka_srq_destroy(daplka_resource_t *);
211 static void daplka_hash_srq_free(void *);
212
213 /*
214 * Miscellaneous ioctls
215 */
216 static int daplka_cr_accept(daplka_ia_resource_t *, intptr_t, int,
217 cred_t *, int *);
218 static int daplka_cr_reject(daplka_ia_resource_t *, intptr_t, int,
219 cred_t *, int *);
220 static int daplka_cr_handoff(daplka_ia_resource_t *, intptr_t, int,
221 cred_t *, int *);
222 static int daplka_ia_query(daplka_ia_resource_t *, intptr_t, int,
223 cred_t *, int *);
224
225 /*
226 * PD ioctls and supporting functions
227 */
228 static int daplka_pd_alloc(daplka_ia_resource_t *, intptr_t, int,
229 cred_t *, int *);
230 static int daplka_pd_free(daplka_ia_resource_t *, intptr_t, int,
231 cred_t *, int *);
232 static int daplka_pd_destroy(daplka_resource_t *);
233 static void daplka_hash_pd_free(void *);
234
235 /*
236 * SP ioctls and supporting functions
237 */
238 static int daplka_service_register(daplka_ia_resource_t *, intptr_t, int,
239 cred_t *, int *);
240 static int daplka_service_deregister(daplka_ia_resource_t *, intptr_t, int,
241 cred_t *, int *);
242 static int daplka_sp_destroy(daplka_resource_t *);
243 static void daplka_hash_sp_free(void *);
244 static void daplka_hash_sp_unref(void *);
245
246 /*
247 * MR ioctls and supporting functions
248 */
249 static int daplka_mr_register(daplka_ia_resource_t *, intptr_t, int,
250 cred_t *, int *);
251 static int daplka_mr_register_lmr(daplka_ia_resource_t *, intptr_t, int,
252 cred_t *, int *);
253 static int daplka_mr_register_shared(daplka_ia_resource_t *, intptr_t, int,
254 cred_t *, int *);
255 static int daplka_mr_deregister(daplka_ia_resource_t *, intptr_t, int,
256 cred_t *, int *);
257 static int daplka_mr_sync(daplka_ia_resource_t *, intptr_t, int,
258 cred_t *, int *);
259 static int daplka_mr_destroy(daplka_resource_t *);
260 static void daplka_hash_mr_free(void *);
261 static void daplka_shared_mr_free(daplka_mr_resource_t *);
262
263 /*
264 * MW ioctls and supporting functions
265 */
266 static int daplka_mw_alloc(daplka_ia_resource_t *, intptr_t, int,
267 cred_t *, int *);
268 static int daplka_mw_free(daplka_ia_resource_t *, intptr_t, int,
269 cred_t *, int *);
270 static int daplka_mw_destroy(daplka_resource_t *);
271 static void daplka_hash_mw_free(void *);
272
273 /*
274 * CNO ioctls and supporting functions
275 */
276 static int daplka_cno_alloc(daplka_ia_resource_t *, intptr_t, int,
277 cred_t *, int *);
278 static int daplka_cno_free(daplka_ia_resource_t *, intptr_t, int,
279 cred_t *, int *);
280 static int daplka_cno_wait(daplka_ia_resource_t *, intptr_t, int,
281 cred_t *, int *);
282 static int daplka_cno_destroy(daplka_resource_t *);
283 static void daplka_hash_cno_free(void *);
284
285 /*
286 * CM handlers
287 */
288 static ibt_cm_status_t daplka_cm_rc_handler(void *, ibt_cm_event_t *,
289 ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
290
291 static ibt_cm_status_t daplka_cm_service_handler(void *, ibt_cm_event_t *,
292 ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
293
294 static ibt_cm_status_t daplka_cm_service_req(daplka_sp_resource_t *,
295 ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
296
297 /*
298 * resource management routines
299 */
300 static int daplka_resource_reserve(minor_t *);
301 static int daplka_resource_insert(minor_t, daplka_resource_t *);
302 static daplka_resource_t *daplka_resource_remove(minor_t rnum);
303 static daplka_resource_t *daplka_resource_lookup(minor_t);
304 static void daplka_resource_init(void);
305 static void daplka_resource_fini(void);
306 static struct daplka_resource_table daplka_resource;
307
308 /*
309 * hash table routines
310 */
311 static int daplka_hash_insert(daplka_hash_table_t *, uint64_t *, void *);
312 static int daplka_hash_remove(daplka_hash_table_t *, uint64_t, void **);
313 static void daplka_hash_walk(daplka_hash_table_t *, int (*)(void *, void *),
314 void *, krw_t);
315 static void *daplka_hash_lookup(daplka_hash_table_t *, uint64_t);
316 static int daplka_hash_create(daplka_hash_table_t *, uint_t,
317 void (*)(void *), void (*)(void *));
318 static void daplka_hash_destroy(daplka_hash_table_t *);
319 static uint32_t daplka_hash_getsize(daplka_hash_table_t *);
320 static void daplka_hash_generic_lookup(void *);
321
322 static uint32_t daplka_timer_hkey_gen();
323
324 /*
325 * async event handlers
326 */
327 static void daplka_async_event_create(ibt_async_code_t, ibt_async_event_t *,
328 uint64_t, daplka_ia_resource_t *);
329 static void daplka_rc_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
330 ibt_async_event_t *);
331 static void daplka_cq_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
332 ibt_async_event_t *);
333 static void daplka_un_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
334 ibt_async_event_t *);
335 static void daplka_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
336 ibt_async_event_t *);
337 static void daplka_sm_notice_handler(void *, ib_gid_t, ibt_subnet_event_code_t,
338 ibt_subnet_event_t *event);
339 static void daplka_sm_gid_avail(ib_gid_t *, ib_gid_t *);
340
341 /*
342 * IBTF wrappers and default limits used for resource accounting
343 */
344 static boolean_t daplka_accounting_enabled = B_TRUE;
345 static uint32_t daplka_max_qp_percent = 100;
346 static uint32_t daplka_max_cq_percent = 100;
347 static uint32_t daplka_max_pd_percent = 100;
348 static uint32_t daplka_max_mw_percent = 100;
349 static uint32_t daplka_max_mr_percent = 100;
350 static uint32_t daplka_max_srq_percent = 100;
351
352 static ibt_status_t
353 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *, ibt_hca_hdl_t,
354 ibt_chan_alloc_flags_t, ibt_rc_chan_alloc_args_t *,
355 ibt_channel_hdl_t *, ibt_chan_sizes_t *);
356
357 static ibt_status_t
358 daplka_ibt_free_channel(daplka_ep_resource_t *, ibt_channel_hdl_t);
359
360 static ibt_status_t
361 daplka_ibt_alloc_cq(daplka_evd_resource_t *, ibt_hca_hdl_t,
362 ibt_cq_attr_t *, ibt_cq_hdl_t *, uint_t *);
363
364 static ibt_status_t
365 daplka_ibt_free_cq(daplka_evd_resource_t *, ibt_cq_hdl_t);
366
367 static ibt_status_t
368 daplka_ibt_alloc_pd(daplka_pd_resource_t *, ibt_hca_hdl_t,
369 ibt_pd_flags_t, ibt_pd_hdl_t *);
370
371 static ibt_status_t
372 daplka_ibt_free_pd(daplka_pd_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t);
373
374 static ibt_status_t
375 daplka_ibt_alloc_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
376 ibt_mw_flags_t, ibt_mw_hdl_t *, ibt_rkey_t *);
377
378 static ibt_status_t
379 daplka_ibt_free_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_mw_hdl_t);
380
381 static ibt_status_t
382 daplka_ibt_register_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
383 ibt_mr_attr_t *, ibt_mr_hdl_t *, ibt_mr_desc_t *);
384
385 static ibt_status_t
386 daplka_ibt_register_shared_mr(daplka_mr_resource_t *, ibt_hca_hdl_t,
387 ibt_mr_hdl_t, ibt_pd_hdl_t, ibt_smr_attr_t *, ibt_mr_hdl_t *,
388 ibt_mr_desc_t *);
389
390 static ibt_status_t
391 daplka_ibt_deregister_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_mr_hdl_t);
392
393 static ibt_status_t
394 daplka_ibt_alloc_srq(daplka_srq_resource_t *, ibt_hca_hdl_t, ibt_srq_flags_t,
395 ibt_pd_hdl_t, ibt_srq_sizes_t *, ibt_srq_hdl_t *, ibt_srq_sizes_t *);
396
397 static ibt_status_t
398 daplka_ibt_free_srq(daplka_srq_resource_t *, ibt_srq_hdl_t);
399
400 /*
401 * macros for manipulating resource objects.
402 * these macros can be used on objects that begin with a
403 * daplka_resource_t header.
404 */
405 #define DAPLKA_RS_REFCNT(rp) ((rp)->header.rs_refcnt)
406
407 #define DAPLKA_RS_REF(rp) { \
408 mutex_enter(&(rp)->header.rs_reflock); \
409 (rp)->header.rs_refcnt++; \
410 ASSERT((rp)->header.rs_refcnt != 0); \
411 mutex_exit(&(rp)->header.rs_reflock); \
412 }
413
414 #define DAPLKA_RS_UNREF(rp) { \
415 mutex_enter(&(rp)->header.rs_reflock); \
416 ASSERT((rp)->header.rs_refcnt != 0); \
417 if (--(rp)->header.rs_refcnt == 0) { \
418 ASSERT((rp)->header.rs_free != NULL); \
419 mutex_exit(&(rp)->header.rs_reflock); \
420 (rp)->header.rs_free((daplka_resource_t *)rp); \
421 } else { \
422 mutex_exit(&(rp)->header.rs_reflock); \
423 } \
424 }
425
426 #define DAPLKA_RS_INIT(rp, type, rnum, free_func) { \
427 (rp)->header.rs_refcnt = 1; \
428 (rp)->header.rs_type = (type); \
429 (rp)->header.rs_rnum = (rnum); \
430 (rp)->header.rs_charged = 0; \
431 (rp)->header.rs_free = (free_func); \
432 mutex_init(&(rp)->header.rs_reflock, NULL, \
433 MUTEX_DRIVER, NULL); \
434 }
435
436 #define DAPLKA_RS_FINI(rp) { \
437 mutex_destroy(&(rp)->header.rs_reflock); \
438 }
439
440 #define DAPLKA_RS_ACCT_INC(rp, cnt) { \
441 atomic_add_32(&(rp)->header.rs_charged, (cnt)); \
442 }
443 #define DAPLKA_RS_ACCT_DEC(rp, cnt) { \
444 atomic_add_32(&(rp)->header.rs_charged, -(cnt)); \
445 }
446 #define DAPLKA_RS_ACCT_CHARGED(rp) ((rp)->header.rs_charged)
447
448 #define DAPLKA_RS_RNUM(rp) ((rp)->header.rs_rnum)
449 #define DAPLKA_RS_TYPE(rp) ((rp)->header.rs_type)
450 #define DAPLKA_RS_RESERVED(rp) ((intptr_t)(rp) == DAPLKA_RC_RESERVED)
451
452 /*
453 * depending on the timeout value does a cv_wait_sig or cv_timedwait_sig
454 */
455 #define DAPLKA_EVD_WAIT(cvp, mp, timeout) \
456 ((timeout) == LONG_MAX) ? cv_wait_sig((cvp), (mp)) : \
457 cv_timedwait_sig((cvp), (mp), (timeout))
458
459 #define DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca) ((hca)->hca_ref_cnt++)
460 #define DAPLKA_RELE_HCA_WITHOUT_LOCK(hca) ((hca)->hca_ref_cnt--)
461
462 #define DAPLKA_HOLD_HCA(dp, hca) { \
463 mutex_enter(&(dp)->daplka_mutex); \
464 DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca); \
465 mutex_exit(&(dp)->daplka_mutex); \
466 }
467
468 #define DAPLKA_RELE_HCA(dp, hca) { \
469 mutex_enter(&(dp)->daplka_mutex); \
470 DAPLKA_RELE_HCA_WITHOUT_LOCK(hca); \
471 mutex_exit(&(dp)->daplka_mutex); \
472 }
473
474 #define DAPLKA_HCA_BUSY(hca) \
475 ((hca)->hca_ref_cnt != 0 || \
476 (hca)->hca_qp_count != 0 || \
477 (hca)->hca_cq_count != 0 || \
478 (hca)->hca_pd_count != 0 || \
479 (hca)->hca_mw_count != 0 || \
480 (hca)->hca_mr_count != 0)
481
482
483 static struct cb_ops daplka_cb_ops = {
484 daplka_open, /* cb_open */
485 daplka_close, /* cb_close */
486 nodev, /* cb_strategy */
487 nodev, /* cb_print */
488 nodev, /* cb_dump */
489 nodev, /* cb_read */
490 nodev, /* cb_write */
491 daplka_ioctl, /* cb_ioctl */
492 nodev, /* cb_devmap */
493 nodev, /* cb_mmap */
494 nodev, /* cb_segmap */
495 nochpoll, /* cb_chpoll */
496 ddi_prop_op, /* cb_prop_op */
497 NULL, /* cb_stream */
498 D_NEW | D_MP, /* cb_flag */
499 CB_REV, /* rev */
500 nodev, /* int (*cb_aread)() */
501 nodev /* int (*cb_awrite)() */
502 };
503
504 static struct dev_ops daplka_ops = {
505 DEVO_REV, /* devo_rev */
506 0, /* devo_refcnt */
507 daplka_info, /* devo_getinfo */
508 nulldev, /* devo_identify */
509 nulldev, /* devo_probe */
510 daplka_attach, /* devo_attach */
511 daplka_detach, /* devo_detach */
512 nodev, /* devo_reset */
513 &daplka_cb_ops, /* devo_cb_ops */
514 (struct bus_ops *)NULL, /* devo_bus_ops */
515 nulldev, /* power */
516 ddi_quiesce_not_needed, /* devo_quiesce */
517 };
518
519 /*
520 * Module linkage information for the kernel.
521 */
522 static struct modldrv modldrv = {
523 &mod_driverops,
524 "uDAPL Service Driver",
525 &daplka_ops,
526 };
527
528 static struct modlinkage modlinkage = {
529 #ifdef _LP64
530 MODREV_1, { (void *) &modldrv, NULL, NULL, NULL, NULL, NULL, NULL }
531 #else
532 MODREV_1, { (void *) &modldrv, NULL, NULL, NULL }
533 #endif
534 };
535
536 /*
537 * daplka_dev holds global driver state and a list of HCAs
538 */
539 static daplka_t *daplka_dev = NULL;
540 static void *daplka_state = NULL;
541
542 /*
543 * global SP hash table
544 */
545 static daplka_hash_table_t daplka_global_sp_htbl;
546
547 /*
548 * timer_info hash table
549 */
550 static daplka_hash_table_t daplka_timer_info_htbl;
551 static uint32_t daplka_timer_hkey = 0;
552
553 /*
554 * shared MR avl tree
555 */
556 static avl_tree_t daplka_shared_mr_tree;
557 static kmutex_t daplka_shared_mr_lock;
558 static int daplka_shared_mr_cmp(const void *, const void *);
559
560 /*
561 * default kmem flags used by this driver
562 */
563 static int daplka_km_flags = KM_SLEEP;
564
565 /*
566 * taskq used for handling background tasks
567 */
568 static taskq_t *daplka_taskq = NULL;
569
570 /*
571 * daplka_cm_delay is the length of time the active
572 * side needs to wait before timing out on the REP message.
573 */
574 static clock_t daplka_cm_delay = 60000000;
575
576 /*
577 * modunload will fail if pending_close is non-zero
578 */
579 static uint32_t daplka_pending_close = 0;
580
581 static struct ibt_clnt_modinfo_s daplka_clnt_modinfo = {
582 IBTI_V_CURR,
583 IBT_USER,
584 daplka_async_handler,
585 NULL,
586 DAPLKA_DRV_NAME
587 };
588
589 /*
590 * Module Installation
591 */
592 int
593 _init(void)
594 {
595 int status;
596
597 status = ddi_soft_state_init(&daplka_state, sizeof (daplka_t), 1);
598 if (status != 0) {
599 return (status);
600 }
601
602 mutex_init(&daplka_dbglock, NULL, MUTEX_DRIVER, NULL);
603 bzero(daplka_dbgbuf, sizeof (daplka_dbgbuf));
604 daplka_dbgnext = 0;
605 daplka_dbginit = 1;
606
607 daplka_resource_init();
608
609 status = mod_install(&modlinkage);
610 if (status != DDI_SUCCESS) {
611 /* undo inits done before mod_install */
612 daplka_resource_fini();
613 mutex_destroy(&daplka_dbglock);
614 ddi_soft_state_fini(&daplka_state);
615 }
616 return (status);
617 }
618
619 /*
620 * Module Removal
621 */
622 int
623 _fini(void)
624 {
625 int status;
626
627 /*
628 * mod_remove causes detach to be called
629 */
630 if ((status = mod_remove(&modlinkage)) != 0) {
631 DERR("fini: mod_remove failed: 0x%x\n", status);
632 return (status);
633 }
634
635 daplka_resource_fini();
636 mutex_destroy(&daplka_dbglock);
637 ddi_soft_state_fini(&daplka_state);
638
639 return (status);
640 }
641
642 /*
643 * Return Module Info.
644 */
645 int
646 _info(struct modinfo *modinfop)
647 {
648 return (mod_info(&modlinkage, modinfop));
649 }
650
651 static void
652 daplka_enqueue_hca(daplka_t *dp, daplka_hca_t *hca)
653 {
654 daplka_hca_t *h;
655
656 ASSERT(mutex_owned(&dp->daplka_mutex));
657
658 if (dp->daplka_hca_list_head == NULL) {
659 dp->daplka_hca_list_head = hca;
660 } else {
661 h = dp->daplka_hca_list_head;
662 while (h->hca_next != NULL)
663 h = h->hca_next;
664
665 h->hca_next = hca;
666 }
667 }
668
669 static void
670 daplka_dequeue_hca(daplka_t *dp, daplka_hca_t *hca)
671 {
672 daplka_hca_t *h;
673
674 ASSERT(mutex_owned(&dp->daplka_mutex));
675
676 if (dp->daplka_hca_list_head == hca)
677 dp->daplka_hca_list_head = hca->hca_next;
678 else {
679 h = dp->daplka_hca_list_head;
680 while (h->hca_next != hca)
681 h = h->hca_next;
682 h->hca_next = hca->hca_next;
683 }
684 }
685
686 static int
687 daplka_init_hca(daplka_t *dp, ib_guid_t hca_guid)
688 {
689 daplka_hca_t *hca;
690 ibt_hca_portinfo_t *pinfop;
691 uint_t size;
692 int j;
693 ibt_status_t status;
694
695 hca = kmem_zalloc(sizeof (daplka_hca_t), KM_SLEEP);
696
697 hca->hca_guid = hca_guid;
698
699 /*
700 * open the HCA for use
701 */
702 status = ibt_open_hca(dp->daplka_clnt_hdl, hca_guid, &hca->hca_hdl);
703 if (status != IBT_SUCCESS) {
704 if (status == IBT_HCA_IN_USE) {
705 DERR("ibt_open_hca() returned IBT_HCA_IN_USE\n");
706 } else {
707 DERR("ibt_open_hca() returned %d\n", status);
708 }
709 kmem_free(hca, sizeof (daplka_hca_t));
710 return (status);
711 }
712
713 /*
714 * query HCA to get its info
715 */
716 status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr);
717 if (status != IBT_SUCCESS) {
718 DERR("ibt_query_hca returned %d (hca_guid 0x%llx)\n",
719 status, (longlong_t)hca_guid);
720 goto out;
721 }
722
723 /*
724 * query HCA to get info of all ports
725 */
726 status = ibt_query_hca_ports(hca->hca_hdl,
727 0, &pinfop, &hca->hca_nports, &size);
728 if (status != IBT_SUCCESS) {
729 DERR("ibt_query_all_ports returned %d "
730 "(hca_guid 0x%llx)\n", status,
731 (longlong_t)hca_guid);
732 goto out;
733 }
734 hca->hca_ports = pinfop;
735 hca->hca_pinfosz = size;
736
737 DERR("hca guid 0x%llx, nports %d\n",
738 (longlong_t)hca_guid, hca->hca_nports);
739 for (j = 0; j < hca->hca_nports; j++) {
740 DERR("port %d: state %d prefix 0x%016llx "
741 "guid %016llx\n",
742 pinfop[j].p_port_num, pinfop[j].p_linkstate,
743 (longlong_t)pinfop[j].p_sgid_tbl[0].gid_prefix,
744 (longlong_t)pinfop[j].p_sgid_tbl[0].gid_guid);
745 }
746
747 mutex_enter(&dp->daplka_mutex);
748 daplka_enqueue_hca(dp, hca);
749 mutex_exit(&dp->daplka_mutex);
750
751 return (IBT_SUCCESS);
752
753 out:
754 (void) ibt_close_hca(hca->hca_hdl);
755 kmem_free(hca, sizeof (daplka_hca_t));
756 return (status);
757 }
758
759 /*
760 * this function obtains the list of HCAs from IBTF.
761 * the HCAs are then opened and the returned handles
762 * and attributes are stored into the global daplka_dev
763 * structure.
764 */
765 static int
766 daplka_init_hcas(daplka_t *dp)
767 {
768 int i;
769 ib_guid_t *hca_guids;
770 uint32_t hca_count;
771
772 /*
773 * get the num & list of HCAs present
774 */
775 hca_count = ibt_get_hca_list(&hca_guids);
776 DERR("No. of HCAs present %d\n", hca_count);
777
778 if (hca_count != 0) {
779 /*
780 * get the info for each available HCA
781 */
782 for (i = 0; i < hca_count; i++)
783 (void) daplka_init_hca(dp, hca_guids[i]);
784
785 ibt_free_hca_list(hca_guids, hca_count);
786 }
787
788 if (dp->daplka_hca_list_head != NULL)
789 return (IBT_SUCCESS);
790 else
791 return (IBT_FAILURE);
792 }
793
794 static int
795 daplka_fini_hca(daplka_t *dp, daplka_hca_t *hca)
796 {
797 ibt_status_t status;
798
799 if (hca->hca_hdl != NULL) {
800 status = ibt_close_hca(hca->hca_hdl);
801 if (status != IBT_SUCCESS) {
802 DERR("ibt_close_hca returned %d"
803 " (hca_guid 0x%llx)\n", status,
804 (longlong_t)hca->hca_guid);
805
806 mutex_enter(&dp->daplka_mutex);
807 daplka_enqueue_hca(dp, hca);
808 mutex_exit(&dp->daplka_mutex);
809
810 return (status);
811 }
812 }
813
814 if (hca->hca_ports != NULL)
815 ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
816
817 kmem_free(hca, sizeof (daplka_hca_t));
818 return (IBT_SUCCESS);
819 }
820
821 /*
822 * closes all HCAs and frees up the HCA list
823 */
824 static int
825 daplka_fini_hcas(daplka_t *dp)
826 {
827 ibt_status_t status;
828 daplka_hca_t *hca;
829
830 mutex_enter(&daplka_dev->daplka_mutex);
831 while ((hca = dp->daplka_hca_list_head) != NULL) {
832 if (DAPLKA_HCA_BUSY(hca)) {
833 mutex_exit(&daplka_dev->daplka_mutex);
834 return (IBT_HCA_RESOURCES_NOT_FREED);
835 }
836 daplka_dequeue_hca(daplka_dev, hca);
837 mutex_exit(&daplka_dev->daplka_mutex);
838
839 if ((status = daplka_fini_hca(dp, hca)) != IBT_SUCCESS)
840 return (status);
841
842 mutex_enter(&daplka_dev->daplka_mutex);
843 }
844 mutex_exit(&daplka_dev->daplka_mutex);
845
846 DERR("dapl kernel agent unloaded\n");
847 return (IBT_SUCCESS);
848 }
849
850
851 /*
852 * Attach the device, create and fill in daplka_dev
853 */
854 static int
855 daplka_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
856 {
857 daplka_t *dp;
858 int instance, retval, err;
859 boolean_t sp_htbl_allocated = B_FALSE;
860 boolean_t timer_htbl_allocated = B_FALSE;
861 boolean_t shared_mr_tree_allocated = B_FALSE;
862
863 switch (cmd) {
864 case DDI_ATTACH:
865 break;
866 case DDI_RESUME:
867 return (DDI_SUCCESS);
868 default:
869 return (DDI_FAILURE);
870 }
871
872 /*
873 * Allocate soft data structure
874 */
875 instance = ddi_get_instance(dip);
876 if (ddi_soft_state_zalloc(daplka_state, instance) != DDI_SUCCESS) {
877 DERR("attach: bad state zalloc\n");
878 return (DDI_FAILURE);
879 }
880
881 dp = ddi_get_soft_state(daplka_state, instance);
882 if (dp == NULL) {
883 ddi_soft_state_free(daplka_state, instance);
884 DERR("attach: cannot get soft state\n");
885 return (DDI_FAILURE);
886 }
887 /*
888 * Stuff private info into dip.
889 */
890 dp->daplka_dip = dip;
891 ddi_set_driver_private(dip, dp);
892 daplka_dev = dp;
893 mutex_init(&dp->daplka_mutex, NULL, MUTEX_DRIVER, NULL);
894
895 /*
896 * Register driver with IBTF
897 */
898 retval = ibt_attach(&daplka_clnt_modinfo, dip, dp,
899 &dp->daplka_clnt_hdl);
900 if (retval != IBT_SUCCESS) {
901 DERR("attach: ibt_attach failed: error = %d\n", retval);
902 retval = DDI_FAILURE;
903 goto error;
904 }
905 /* Register to receive SM events */
906 ibt_register_subnet_notices(dp->daplka_clnt_hdl,
907 daplka_sm_notice_handler, NULL);
908
909 retval = daplka_init_hcas(dp);
910 if (retval != IBT_SUCCESS) {
911 DERR("attach: hca_init failed: error = %d\n", retval);
912 retval = DDI_FAILURE;
913 goto error;
914 }
915 /*
916 * this table is used by cr_handoff
917 */
918 retval = daplka_hash_create(&daplka_global_sp_htbl,
919 DAPLKA_G_SP_HTBL_SZ, daplka_hash_sp_unref,
920 daplka_hash_generic_lookup);
921 if (retval != 0) {
922 DERR("attach: cannot create sp hash table\n");
923 retval = DDI_FAILURE;
924 goto error;
925 }
926 sp_htbl_allocated = B_TRUE;
927
928 /*
929 * this table stores per EP timer information.
930 * timer_info_t objects are inserted into this table whenever
931 * a EP timer is set. timers get removed when they expire
932 * or when they get cancelled.
933 */
934 retval = daplka_hash_create(&daplka_timer_info_htbl,
935 DAPLKA_TIMER_HTBL_SZ, daplka_hash_timer_free, NULL);
936 if (retval != 0) {
937 DERR("attach: cannot create timer hash table\n");
938 retval = DDI_FAILURE;
939 goto error;
940 }
941 timer_htbl_allocated = B_TRUE;
942
943 /*
944 * this taskq is currently only used for processing timers.
945 * other processing may also use this taskq in the future.
946 */
947 daplka_taskq = taskq_create(DAPLKA_DRV_NAME, DAPLKA_TQ_NTHREADS,
948 maxclsyspri, 1, DAPLKA_TQ_NTHREADS, TASKQ_DYNAMIC);
949 if (daplka_taskq == NULL) {
950 DERR("attach: cannot create daplka_taskq\n");
951 retval = DDI_FAILURE;
952 goto error;
953 }
954
955 /*
956 * daplka_shared_mr_tree holds daplka_shared_mr_t objects that
957 * gets retrieved or created when daplka_mr_register_shared is
958 * called.
959 */
960 mutex_init(&daplka_shared_mr_lock, NULL, MUTEX_DRIVER, NULL);
961
962 avl_create(&daplka_shared_mr_tree, daplka_shared_mr_cmp,
963 sizeof (daplka_shared_mr_t),
964 offsetof(daplka_shared_mr_t, smr_node));
965 shared_mr_tree_allocated = B_TRUE;
966
967 /*
968 * Create the filesystem device node.
969 */
970 if (ddi_create_minor_node(dip, DAPLKA_MINOR_NAME, S_IFCHR,
971 0, DDI_PSEUDO, NULL) != DDI_SUCCESS) {
972 DERR("attach: bad create_minor_node\n");
973 retval = DDI_FAILURE;
974 goto error;
975 }
976 dp->daplka_status = DAPLKA_STATE_ATTACHED;
977 ddi_report_dev(dip);
978 return (DDI_SUCCESS);
979
980 error:
981 if (shared_mr_tree_allocated) {
982 avl_destroy(&daplka_shared_mr_tree);
983 mutex_destroy(&daplka_shared_mr_lock);
984 }
985
986 if (daplka_taskq) {
987 taskq_destroy(daplka_taskq);
988 daplka_taskq = NULL;
989 }
990
991 if (timer_htbl_allocated) {
992 daplka_hash_destroy(&daplka_timer_info_htbl);
993 }
994
995 if (sp_htbl_allocated) {
996 daplka_hash_destroy(&daplka_global_sp_htbl);
997 }
998
999 err = daplka_fini_hcas(dp);
1000 if (err != IBT_SUCCESS) {
1001 DERR("attach: hca_fini returned %d\n", err);
1002 }
1003
1004 if (dp->daplka_clnt_hdl != NULL) {
1005 /* unregister SM event notification */
1006 ibt_register_subnet_notices(dp->daplka_clnt_hdl,
1007 (ibt_sm_notice_handler_t)NULL, NULL);
1008 err = ibt_detach(dp->daplka_clnt_hdl);
1009
1010 if (err != IBT_SUCCESS) {
1011 DERR("attach: ibt_detach returned %d\n", err);
1012 }
1013 }
1014 mutex_destroy(&dp->daplka_mutex);
1015
1016 if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
1017 ddi_remove_minor_node(dip, NULL);
1018 }
1019 ddi_soft_state_free(daplka_state, instance);
1020 return (retval);
1021 }
1022
1023 /*
1024 * Detach - Free resources allocated in attach
1025 */
1026 /* ARGSUSED */
1027 static int
1028 daplka_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1029 {
1030 int instance, err;
1031 void *cookie = NULL;
1032 daplka_t *dp;
1033
1034 if (cmd != DDI_DETACH) {
1035 return (DDI_FAILURE);
1036 }
1037 if (daplka_resource.daplka_rc_cnt > 0 ||
1038 daplka_pending_close > 0) {
1039 DERR("detach: driver in use\n");
1040 return (DDI_FAILURE);
1041 }
1042
1043 instance = ddi_get_instance(dip);
1044 dp = ddi_get_soft_state(daplka_state, instance);
1045 if (dp == NULL) {
1046 DERR("detach: cannot get soft state\n");
1047 return (DDI_FAILURE);
1048 }
1049 err = daplka_fini_hcas(dp);
1050 if (err != IBT_SUCCESS) {
1051 DERR("detach: hca_fini returned %d\n", err);
1052 return (DDI_FAILURE);
1053 }
1054 if (dp->daplka_clnt_hdl != NULL) {
1055 /* unregister SM event notification */
1056 ibt_register_subnet_notices(dp->daplka_clnt_hdl,
1057 (ibt_sm_notice_handler_t)NULL, NULL);
1058 err = ibt_detach(dp->daplka_clnt_hdl);
1059 if (err != IBT_SUCCESS) {
1060 DERR("detach: ibt_detach returned %d\n", err);
1061 return (DDI_FAILURE);
1062 }
1063 dp->daplka_clnt_hdl = NULL;
1064 }
1065 mutex_destroy(&dp->daplka_mutex);
1066 if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
1067 ddi_remove_minor_node(dip, NULL);
1068 }
1069 dp->daplka_status = DAPLKA_STATE_DETACHED;
1070 ddi_soft_state_free(daplka_state, instance);
1071 daplka_dev = NULL;
1072
1073 /*
1074 * by the time we get here, all clients of dapl should
1075 * have exited and completed their cleanup properly.
1076 * we can assert that all global data structures are now
1077 * empty.
1078 */
1079 ASSERT(avl_destroy_nodes(&daplka_shared_mr_tree, &cookie) == NULL);
1080 avl_destroy(&daplka_shared_mr_tree);
1081 mutex_destroy(&daplka_shared_mr_lock);
1082
1083 ASSERT(daplka_hash_getsize(&daplka_timer_info_htbl) == 0);
1084 daplka_hash_destroy(&daplka_timer_info_htbl);
1085
1086 ASSERT(daplka_hash_getsize(&daplka_global_sp_htbl) == 0);
1087 daplka_hash_destroy(&daplka_global_sp_htbl);
1088
1089 taskq_destroy(daplka_taskq);
1090
1091 return (DDI_SUCCESS);
1092 }
1093
1094 /* ARGSUSED */
1095 static int
1096 daplka_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1097 {
1098 switch (infocmd) {
1099 case DDI_INFO_DEVT2DEVINFO:
1100 if (daplka_dev != NULL) {
1101 *result = daplka_dev->daplka_dip;
1102 return (DDI_SUCCESS);
1103 } else {
1104 return (DDI_FAILURE);
1105 }
1106
1107 case DDI_INFO_DEVT2INSTANCE:
1108 *result = 0;
1109 return (DDI_SUCCESS);
1110
1111 default:
1112 return (DDI_FAILURE);
1113 }
1114 }
1115
1116 /*
1117 * creates a EP resource.
1118 * A EP resource contains a RC channel. A EP resource holds a
1119 * reference to a send_evd (for the send CQ), recv_evd (for the
1120 * recv CQ), a connection evd and a PD. These references ensure
1121 * that the referenced resources are not freed until the EP itself
1122 * gets freed.
1123 */
1124 /* ARGSUSED */
1125 static int
1126 daplka_ep_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1127 cred_t *cred, int *rvalp)
1128 {
1129 daplka_ep_resource_t *ep_rp;
1130 daplka_pd_resource_t *pd_rp;
1131 dapl_ep_create_t args;
1132 ibt_rc_chan_alloc_args_t chan_args;
1133 ibt_chan_alloc_flags_t achan_flags;
1134 ibt_chan_sizes_t chan_real_sizes;
1135 ibt_hca_attr_t *hca_attrp;
1136 uint64_t ep_hkey = 0;
1137 boolean_t inserted = B_FALSE;
1138 uint32_t old_state, new_state;
1139 int retval;
1140 ibt_status_t status;
1141
1142 D3("ep_create: enter\n");
1143 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_create_t),
1144 mode);
1145 if (retval != 0) {
1146 DERR("ep_create: copyin error %d\n", retval);
1147 return (EFAULT);
1148 }
1149 ep_rp = kmem_zalloc(sizeof (daplka_ep_resource_t), daplka_km_flags);
1150 if (ep_rp == NULL) {
1151 DERR("ep_create: cannot allocate ep_rp\n");
1152 return (ENOMEM);
1153 }
1154 DAPLKA_RS_INIT(ep_rp, DAPL_TYPE_EP,
1155 DAPLKA_RS_RNUM(ia_rp), daplka_ep_destroy);
1156
1157 mutex_init(&ep_rp->ep_lock, NULL, MUTEX_DRIVER, NULL);
1158 cv_init(&ep_rp->ep_cv, NULL, CV_DRIVER, NULL);
1159 ep_rp->ep_hca = ia_rp->ia_hca;
1160 ep_rp->ep_cookie = args.ep_cookie;
1161 ep_rp->ep_timer_hkey = 0;
1162
1163 /*
1164 * we don't have to use ep_get_state here because ep_rp is not in
1165 * ep_htbl yet. refer to the description of daplka_ep_set_state
1166 * for details about the EP state machine.
1167 */
1168 ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
1169 new_state = old_state = DAPLKA_EP_STATE_CLOSED;
1170
1171 /* get reference to send evd and get cq handle */
1172 ep_rp->ep_snd_evd = (daplka_evd_resource_t *)
1173 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_snd_evd_hkey);
1174 if (ep_rp->ep_snd_evd == NULL) {
1175 DERR("ep_create: ep_snd_evd %llx not found\n",
1176 args.ep_snd_evd_hkey);
1177 retval = EINVAL;
1178 goto cleanup;
1179 }
1180 chan_args.rc_scq = ep_rp->ep_snd_evd->evd_cq_hdl;
1181 if (chan_args.rc_scq == NULL) {
1182 DERR("ep_create: ep_snd_evd cq invalid\n");
1183 retval = EINVAL;
1184 goto cleanup;
1185 }
1186
1187 /* get reference to recv evd and get cq handle */
1188 ep_rp->ep_rcv_evd = (daplka_evd_resource_t *)
1189 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_rcv_evd_hkey);
1190 if (ep_rp->ep_rcv_evd == NULL) {
1191 DERR("ep_create: ep_rcv_evd %llx not found\n",
1192 args.ep_rcv_evd_hkey);
1193 retval = EINVAL;
1194 goto cleanup;
1195 }
1196 chan_args.rc_rcq = ep_rp->ep_rcv_evd->evd_cq_hdl;
1197 if (chan_args.rc_rcq == NULL) {
1198 DERR("ep_create: ep_rcv_evd cq invalid\n");
1199 retval = EINVAL;
1200 goto cleanup;
1201 }
1202
1203 /* get reference to conn evd */
1204 ep_rp->ep_conn_evd = (daplka_evd_resource_t *)
1205 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_conn_evd_hkey);
1206 if (ep_rp->ep_conn_evd == NULL) {
1207 DERR("ep_create: ep_conn_evd %llx not found\n",
1208 args.ep_conn_evd_hkey);
1209 retval = EINVAL;
1210 goto cleanup;
1211 }
1212
1213 /* get reference to SRQ if needed */
1214 if (args.ep_srq_attached) {
1215 ep_rp->ep_srq_res = (daplka_srq_resource_t *)daplka_hash_lookup(
1216 &ia_rp->ia_srq_htbl, args.ep_srq_hkey);
1217 if (ep_rp->ep_srq_res == NULL) {
1218 DERR("ep_create: ep_srq %llx not found\n",
1219 (longlong_t)args.ep_srq_hkey);
1220 retval = EINVAL;
1221 goto cleanup;
1222 }
1223 ASSERT(DAPLKA_RS_TYPE(ep_rp->ep_srq_res) == DAPL_TYPE_SRQ);
1224 D3("ep_create: ep_srq %p %llx\n", ep_rp->ep_srq_res,
1225 (longlong_t)args.ep_srq_hkey);
1226 } else {
1227 ep_rp->ep_srq_res = NULL;
1228 }
1229
1230 /* get pd handle */
1231 pd_rp = (daplka_pd_resource_t *)
1232 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.ep_pd_hkey);
1233 if (pd_rp == NULL) {
1234 DERR("ep_create: cannot find pd resource\n");
1235 retval = EINVAL;
1236 goto cleanup;
1237 }
1238 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
1239 ep_rp->ep_pd_res = pd_rp;
1240 chan_args.rc_pd = pd_rp->pd_hdl;
1241
1242
1243 /*
1244 * these checks ensure that the requested channel sizes
1245 * are within the limits supported by the chosen HCA.
1246 */
1247 hca_attrp = &ia_rp->ia_hca->hca_attr;
1248 if (args.ep_ch_sizes.dcs_sq_sgl > hca_attrp->hca_max_sgl) {
1249 DERR("ep_create: invalid cs_sq_sgl %d\n",
1250 args.ep_ch_sizes.dcs_sq_sgl);
1251 retval = EINVAL;
1252 goto cleanup;
1253 }
1254 if (args.ep_ch_sizes.dcs_rq_sgl > hca_attrp->hca_max_sgl) {
1255 DERR("ep_create: invalid cs_rq_sgl %d\n",
1256 args.ep_ch_sizes.dcs_rq_sgl);
1257 retval = EINVAL;
1258 goto cleanup;
1259 }
1260 if (args.ep_ch_sizes.dcs_sq > hca_attrp->hca_max_chan_sz) {
1261 DERR("ep_create: invalid cs_sq %d\n",
1262 args.ep_ch_sizes.dcs_sq);
1263 retval = EINVAL;
1264 goto cleanup;
1265 }
1266 if (args.ep_ch_sizes.dcs_rq > hca_attrp->hca_max_chan_sz) {
1267 DERR("ep_create: invalid cs_rq %d\n",
1268 args.ep_ch_sizes.dcs_rq);
1269 retval = EINVAL;
1270 goto cleanup;
1271 }
1272
1273 chan_args.rc_sizes.cs_sq_sgl = args.ep_ch_sizes.dcs_sq_sgl;
1274 chan_args.rc_sizes.cs_rq_sgl = args.ep_ch_sizes.dcs_rq_sgl;
1275 chan_args.rc_sizes.cs_sq = args.ep_ch_sizes.dcs_sq;
1276 chan_args.rc_sizes.cs_rq = args.ep_ch_sizes.dcs_rq;
1277 chan_args.rc_flags = IBT_WR_SIGNALED;
1278 chan_args.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
1279 chan_args.rc_hca_port_num = ia_rp->ia_port_num;
1280 chan_args.rc_clone_chan = NULL;
1281 if (args.ep_srq_attached) {
1282 chan_args.rc_srq = ep_rp->ep_srq_res->srq_hdl;
1283 } else {
1284 chan_args.rc_srq = NULL;
1285 }
1286
1287 D3("ep_create: sq_sgl %d, rq_sgl %d, sq %d, rq %d, "
1288 "sig_type 0x%x, control 0x%x, portnum %d, clone_chan 0x%p\n",
1289 args.ep_ch_sizes.dcs_sq_sgl, args.ep_ch_sizes.dcs_rq_sgl,
1290 args.ep_ch_sizes.dcs_sq, args.ep_ch_sizes.dcs_rq,
1291 chan_args.rc_flags, chan_args.rc_control,
1292 chan_args.rc_hca_port_num, chan_args.rc_clone_chan);
1293
1294 if (args.ep_srq_attached) {
1295 achan_flags = IBT_ACHAN_USER_MAP | IBT_ACHAN_USES_SRQ;
1296 } else {
1297 achan_flags = IBT_ACHAN_USER_MAP;
1298 }
1299 /* create rc channel */
1300 status = daplka_ibt_alloc_rc_channel(ep_rp, ia_rp->ia_hca_hdl,
1301 achan_flags, &chan_args, &ep_rp->ep_chan_hdl,
1302 &chan_real_sizes);
1303 if (status != IBT_SUCCESS) {
1304 DERR("ep_create: alloc_rc_channel returned %d\n", status);
1305 *rvalp = (int)status;
1306 retval = 0;
1307 goto cleanup;
1308 }
1309
1310 args.ep_ch_real_sizes.dcs_sq = chan_real_sizes.cs_sq;
1311 args.ep_ch_real_sizes.dcs_rq = chan_real_sizes.cs_rq;
1312 args.ep_ch_real_sizes.dcs_sq_sgl = chan_real_sizes.cs_sq_sgl;
1313 args.ep_ch_real_sizes.dcs_rq_sgl = chan_real_sizes.cs_rq_sgl;
1314
1315 /*
1316 * store ep ptr with chan_hdl.
1317 * this ep_ptr is used by the CM handlers (both active and
1318 * passive)
1319 * mutex is only needed for race of "destroy" and "async"
1320 */
1321 mutex_enter(&daplka_dev->daplka_mutex);
1322 ibt_set_chan_private(ep_rp->ep_chan_hdl, (void *)ep_rp);
1323 mutex_exit(&daplka_dev->daplka_mutex);
1324
1325 /* Get HCA-specific data_out info */
1326 status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
1327 IBT_CI_NO_FLAGS, IBT_HDL_CHANNEL, (void *)ep_rp->ep_chan_hdl,
1328 &args.ep_qp_data_out, sizeof (args.ep_qp_data_out));
1329
1330 if (status != IBT_SUCCESS) {
1331 DERR("ep_create: ibt_ci_data_out error(%d)\n",
1332 status);
1333 *rvalp = (int)status;
1334 retval = 0;
1335 goto cleanup;
1336 }
1337
1338 /* insert into ep hash table */
1339 retval = daplka_hash_insert(&ia_rp->ia_ep_htbl,
1340 &ep_hkey, (void *)ep_rp);
1341 if (retval != 0) {
1342 DERR("ep_create: cannot insert ep resource into ep_htbl\n");
1343 goto cleanup;
1344 }
1345 inserted = B_TRUE;
1346
1347 /*
1348 * at this point, the ep_rp can be looked up by other threads
1349 * if they manage to guess the correct hkey. but they are not
1350 * permitted to operate on ep_rp until we transition to the
1351 * CLOSED state.
1352 */
1353
1354 /* return hkey to library */
1355 args.ep_hkey = ep_hkey;
1356
1357 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ep_create_t),
1358 mode);
1359 if (retval != 0) {
1360 DERR("ep_create: copyout error %d\n", retval);
1361 retval = EFAULT;
1362 goto cleanup;
1363 }
1364
1365 daplka_ep_set_state(ep_rp, old_state, new_state);
1366 D3("ep_create: exit\n");
1367 return (0);
1368
1369 cleanup:
1370 if (inserted) {
1371 daplka_ep_resource_t *free_rp = NULL;
1372
1373 (void) daplka_hash_remove(&ia_rp->ia_ep_htbl, ep_hkey,
1374 (void **)&free_rp);
1375 if (free_rp != ep_rp) {
1376 /*
1377 * this case is impossible because ep_free will
1378 * wait until our state transition is complete.
1379 */
1380 DERR("ep_create: cannot remove ep from hash table\n");
1381 ASSERT(B_FALSE);
1382 return (retval);
1383 }
1384 }
1385 new_state = DAPLKA_EP_STATE_FREED;
1386 daplka_ep_set_state(ep_rp, old_state, new_state);
1387 DAPLKA_RS_UNREF(ep_rp);
1388 return (retval);
1389 }
1390
1391 /*
1392 * daplka_ep_get_state retrieves the current state of the EP and
1393 * sets the state to TRANSITIONING. if the current state is already
1394 * TRANSITIONING, this function will wait until the state becomes one
1395 * of the other EP states. Most of the EP related ioctls follow the
1396 * call sequence:
1397 *
1398 * new_state = old_state = daplka_ep_get_state(ep_rp);
1399 * ...
1400 * ...some code that affects the EP
1401 * ...
1402 * new_state = <NEW_STATE>;
1403 * daplka_ep_set_state(ep_rp, old_state, new_state);
1404 *
1405 * this call sequence ensures that only one thread may access the EP
1406 * during the time ep_state is in TRANSITIONING. daplka_ep_set_state
1407 * transitions ep_state to new_state and wakes up any waiters blocking
1408 * on ep_cv.
1409 *
1410 */
1411 static uint32_t
1412 daplka_ep_get_state(daplka_ep_resource_t *ep_rp)
1413 {
1414 uint32_t old_state = 0;
1415
1416 mutex_enter(&ep_rp->ep_lock);
1417 while (ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING) {
1418 D2("get_state: wait for state transition to complete\n");
1419 cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
1420 D2("get_state: done, curr state = %d\n", ep_rp->ep_state);
1421 }
1422 ASSERT(ep_rp->ep_state != DAPLKA_EP_STATE_TRANSITIONING);
1423 old_state = ep_rp->ep_state;
1424
1425 /*
1426 * an ep that is in the FREED state cannot transition
1427 * back to any of the regular states
1428 */
1429 if (old_state != DAPLKA_EP_STATE_FREED) {
1430 ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
1431 }
1432 mutex_exit(&ep_rp->ep_lock);
1433 return (old_state);
1434 }
1435
1436 /*
1437 * EP state transition diagram
1438 *
1439 * CLOSED<-------------------
1440 * | |
1441 * | |
1442 * ------------------------ |
1443 * | | |
1444 * | | |
1445 * v v |
1446 * CONNECTING ACCEPTING |
1447 * | | | | | |
1448 * | | | | | |
1449 * | | | | | |
1450 * | | |_______|_______| |
1451 * | | | | | |
1452 * | |___________| | | |
1453 * | | | | |
1454 * | v | |---->DISCONNECTED
1455 * | CONNECTED | ^
1456 * v | | |
1457 * ABORTING |---------|--------------|
1458 * | | | |
1459 * | | v |
1460 * | |-------->DISCONNECTING--|
1461 * | |
1462 * |---------------------------------|
1463 *
1464 * *not shown in this diagram:
1465 * -loopback transitions
1466 * -transitions to the FREED state
1467 */
1468 static boolean_t
1469 daplka_ep_transition_is_valid(uint32_t old_state, uint32_t new_state)
1470 {
1471 boolean_t valid = B_FALSE;
1472
1473 /*
1474 * reseting to the same state is a no-op and is always
1475 * permitted. transitioning to the FREED state indicates
1476 * that the ep is about to be freed and no further operation
1477 * is allowed on it. to support abrupt close, the ep is
1478 * permitted to transition to the FREED state from any state.
1479 */
1480 if (old_state == new_state ||
1481 new_state == DAPLKA_EP_STATE_FREED) {
1482 return (B_TRUE);
1483 }
1484
1485 switch (old_state) {
1486 case DAPLKA_EP_STATE_CLOSED:
1487 /*
1488 * this is the initial ep_state.
1489 * a transition to CONNECTING or ACCEPTING may occur
1490 * upon calling daplka_ep_connect or daplka_cr_accept,
1491 * respectively.
1492 */
1493 if (new_state == DAPLKA_EP_STATE_CONNECTING ||
1494 new_state == DAPLKA_EP_STATE_ACCEPTING) {
1495 valid = B_TRUE;
1496 }
1497 break;
1498 case DAPLKA_EP_STATE_CONNECTING:
1499 /*
1500 * we transition to this state if daplka_ep_connect
1501 * is successful. from this state, we can transition
1502 * to CONNECTED if daplka_cm_rc_conn_est gets called;
1503 * or to DISCONNECTED if daplka_cm_rc_conn_closed or
1504 * daplka_cm_rc_event_failure gets called. If the
1505 * client calls daplka_ep_disconnect, we transition
1506 * to DISCONNECTING. If a timer was set at ep_connect
1507 * time and if the timer expires prior to any of the
1508 * CM callbacks, we transition to ABORTING and then
1509 * to DISCONNECTED.
1510 */
1511 if (new_state == DAPLKA_EP_STATE_CONNECTED ||
1512 new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1513 new_state == DAPLKA_EP_STATE_DISCONNECTED ||
1514 new_state == DAPLKA_EP_STATE_ABORTING) {
1515 valid = B_TRUE;
1516 }
1517 break;
1518 case DAPLKA_EP_STATE_ACCEPTING:
1519 /*
1520 * we transition to this state if daplka_cr_accept
1521 * is successful. from this state, we can transition
1522 * to CONNECTED if daplka_cm_service_conn_est gets called;
1523 * or to DISCONNECTED if daplka_cm_service_conn_closed or
1524 * daplka_cm_service_event_failure gets called. If the
1525 * client calls daplka_ep_disconnect, we transition to
1526 * DISCONNECTING.
1527 */
1528 if (new_state == DAPLKA_EP_STATE_CONNECTED ||
1529 new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1530 new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1531 valid = B_TRUE;
1532 }
1533 break;
1534 case DAPLKA_EP_STATE_CONNECTED:
1535 /*
1536 * we transition to this state if a active or passive
1537 * connection gets established. if the client calls
1538 * daplka_ep_disconnect, we transition to the
1539 * DISCONNECTING state. subsequent CM callbacks will
1540 * cause ep_state to be set to DISCONNECTED. If the
1541 * remote peer terminates the connection before we do,
1542 * it is possible for us to transition directly from
1543 * CONNECTED to DISCONNECTED.
1544 */
1545 if (new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1546 new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1547 valid = B_TRUE;
1548 }
1549 break;
1550 case DAPLKA_EP_STATE_DISCONNECTING:
1551 /*
1552 * we transition to this state if the client calls
1553 * daplka_ep_disconnect.
1554 */
1555 if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1556 valid = B_TRUE;
1557 }
1558 break;
1559 case DAPLKA_EP_STATE_ABORTING:
1560 /*
1561 * we transition to this state if the active side
1562 * EP timer has expired. this is only a transient
1563 * state that is set during timer processing. when
1564 * timer processing completes, ep_state will become
1565 * DISCONNECTED.
1566 */
1567 if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1568 valid = B_TRUE;
1569 }
1570 break;
1571 case DAPLKA_EP_STATE_DISCONNECTED:
1572 /*
1573 * we transition to this state if we get a closed
1574 * or event_failure CM callback. an expired timer
1575 * can also cause us to be in this state. this
1576 * is the only state in which we permit the
1577 * ep_reinit operation.
1578 */
1579 if (new_state == DAPLKA_EP_STATE_CLOSED) {
1580 valid = B_TRUE;
1581 }
1582 break;
1583 default:
1584 break;
1585 }
1586
1587 if (!valid) {
1588 DERR("ep_transition: invalid state change %d -> %d\n",
1589 old_state, new_state);
1590 }
1591 return (valid);
1592 }
1593
1594 /*
1595 * first check if the transition is valid. then set ep_state
1596 * to new_state and wake up all waiters.
1597 */
1598 static void
1599 daplka_ep_set_state(daplka_ep_resource_t *ep_rp, uint32_t old_state,
1600 uint32_t new_state)
1601 {
1602 boolean_t valid;
1603
1604 ASSERT(new_state != DAPLKA_EP_STATE_TRANSITIONING);
1605
1606 valid = daplka_ep_transition_is_valid(old_state, new_state);
1607 mutex_enter(&ep_rp->ep_lock);
1608 if (ep_rp->ep_state != DAPLKA_EP_STATE_FREED) {
1609 if (valid) {
1610 ep_rp->ep_state = new_state;
1611 } else {
1612 /*
1613 * this case is impossible.
1614 * we have a serious problem if we get here.
1615 * instead of panicing, we reset the state to
1616 * old_state. doing this would at least prevent
1617 * threads from hanging due to ep_state being
1618 * stuck in TRANSITIONING.
1619 */
1620 ep_rp->ep_state = old_state;
1621 ASSERT(B_FALSE);
1622 }
1623 }
1624 cv_broadcast(&ep_rp->ep_cv);
1625 mutex_exit(&ep_rp->ep_lock);
1626 }
1627
1628 /*
1629 * modifies RC channel attributes.
1630 * currently, only the rdma_in and rdma_out attributes may
1631 * be modified. the channel must be in quiescent state when
1632 * this function is called.
1633 */
1634 /* ARGSUSED */
1635 static int
1636 daplka_ep_modify(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1637 cred_t *cred, int *rvalp)
1638 {
1639 daplka_ep_resource_t *ep_rp = NULL;
1640 ibt_cep_modify_flags_t good_flags;
1641 ibt_rc_chan_modify_attr_t rcm_attr;
1642 ibt_hca_attr_t *hca_attrp;
1643 dapl_ep_modify_t args;
1644 ibt_status_t status;
1645 uint32_t old_state, new_state;
1646 int retval = 0;
1647
1648 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_modify_t),
1649 mode);
1650 if (retval != 0) {
1651 DERR("ep_modify: copyin error %d\n", retval);
1652 return (EFAULT);
1653 }
1654 ep_rp = (daplka_ep_resource_t *)
1655 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epm_hkey);
1656 if (ep_rp == NULL) {
1657 DERR("ep_modify: cannot find ep resource\n");
1658 return (EINVAL);
1659 }
1660 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
1661 new_state = old_state = daplka_ep_get_state(ep_rp);
1662
1663 if (old_state != DAPLKA_EP_STATE_CLOSED &&
1664 old_state != DAPLKA_EP_STATE_DISCONNECTED) {
1665 DERR("ep_modify: invalid state %d\n", old_state);
1666 retval = EINVAL;
1667 goto cleanup;
1668 }
1669
1670 good_flags = IBT_CEP_SET_RDMARA_OUT | IBT_CEP_SET_RDMARA_IN;
1671 if ((args.epm_flags & ~good_flags) != 0) {
1672 DERR("ep_modify: invalid flags 0x%x\n", args.epm_flags);
1673 retval = EINVAL;
1674 goto cleanup;
1675 }
1676
1677 hca_attrp = &ia_rp->ia_hca->hca_attr;
1678
1679 bzero(&rcm_attr, sizeof (ibt_rc_chan_modify_attr_t));
1680 if ((args.epm_flags & IBT_CEP_SET_RDMARA_OUT) != 0) {
1681 if (args.epm_rdma_ra_out > hca_attrp->hca_max_rdma_out_chan) {
1682 DERR("ep_modify: invalid epm_rdma_ra_out %d\n",
1683 args.epm_rdma_ra_out);
1684 retval = EINVAL;
1685 goto cleanup;
1686 }
1687 rcm_attr.rc_rdma_ra_out = args.epm_rdma_ra_out;
1688 }
1689 if ((args.epm_flags & IBT_CEP_SET_RDMARA_IN) != 0) {
1690 if (args.epm_rdma_ra_in > hca_attrp->hca_max_rdma_in_chan) {
1691 DERR("ep_modify: epm_rdma_ra_in %d\n",
1692 args.epm_rdma_ra_in);
1693 retval = EINVAL;
1694 goto cleanup;
1695 }
1696 rcm_attr.rc_rdma_ra_in = args.epm_rdma_ra_in;
1697 }
1698 status = ibt_modify_rc_channel(ep_rp->ep_chan_hdl, args.epm_flags,
1699 &rcm_attr, NULL);
1700 if (status != IBT_SUCCESS) {
1701 DERR("ep_modify: modify_rc_channel returned %d\n", status);
1702 *rvalp = (int)status;
1703 retval = 0;
1704 goto cleanup;
1705 }
1706
1707 /*
1708 * ep_modify does not change ep_state
1709 */
1710 cleanup:;
1711 daplka_ep_set_state(ep_rp, old_state, new_state);
1712 DAPLKA_RS_UNREF(ep_rp);
1713 return (retval);
1714 }
1715
1716 /*
1717 * Frees a EP resource.
1718 * a EP may only be freed when it is in the CLOSED or
1719 * DISCONNECTED state.
1720 */
1721 /* ARGSUSED */
1722 static int
1723 daplka_ep_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1724 cred_t *cred, int *rvalp)
1725 {
1726 daplka_ep_resource_t *ep_rp = NULL;
1727 dapl_ep_free_t args;
1728 uint32_t old_state, new_state;
1729 int retval;
1730
1731 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_free_t), mode);
1732 if (retval != 0) {
1733 DERR("ep_free: copyin error %d\n", retval);
1734 return (EFAULT);
1735 }
1736 ep_rp = (daplka_ep_resource_t *)
1737 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epf_hkey);
1738 if (ep_rp == NULL) {
1739 DERR("ep_free: cannot find ep resource\n");
1740 return (EINVAL);
1741 }
1742 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
1743 new_state = old_state = daplka_ep_get_state(ep_rp);
1744
1745 /*
1746 * ep cannot be freed if it is in an invalid state.
1747 */
1748 if (old_state != DAPLKA_EP_STATE_CLOSED &&
1749 old_state != DAPLKA_EP_STATE_DISCONNECTED) {
1750 DERR("ep_free: invalid state %d\n", old_state);
1751 retval = EINVAL;
1752 goto cleanup;
1753 }
1754 ep_rp = NULL;
1755 retval = daplka_hash_remove(&ia_rp->ia_ep_htbl,
1756 args.epf_hkey, (void **)&ep_rp);
1757 if (retval != 0 || ep_rp == NULL) {
1758 /*
1759 * this is only possible if we have two threads
1760 * calling ep_free in parallel.
1761 */
1762 DERR("ep_free: cannot find ep resource\n");
1763 goto cleanup;
1764 }
1765 /* there should not be any outstanding timers */
1766 ASSERT(ep_rp->ep_timer_hkey == 0);
1767
1768 new_state = DAPLKA_EP_STATE_FREED;
1769 daplka_ep_set_state(ep_rp, old_state, new_state);
1770
1771 /* remove reference obtained by lookup */
1772 DAPLKA_RS_UNREF(ep_rp);
1773
1774 /* UNREF calls the actual free function when refcnt is zero */
1775 DAPLKA_RS_UNREF(ep_rp);
1776 return (0);
1777
1778 cleanup:;
1779 daplka_ep_set_state(ep_rp, old_state, new_state);
1780
1781 /* remove reference obtained by lookup */
1782 DAPLKA_RS_UNREF(ep_rp);
1783 return (retval);
1784 }
1785
1786 /*
1787 * The following routines supports the timeout feature of ep_connect.
1788 * Refer to the description of ep_connect for details.
1789 */
1790
1791 /*
1792 * this is the timer processing thread.
1793 */
1794 static void
1795 daplka_timer_thread(void *arg)
1796 {
1797 daplka_timer_info_t *timerp = (daplka_timer_info_t *)arg;
1798 daplka_ep_resource_t *ep_rp;
1799 daplka_evd_event_t *disc_ev = NULL;
1800 ibt_status_t status;
1801 int old_state, new_state;
1802
1803 ep_rp = timerp->ti_ep_res;
1804 ASSERT(ep_rp != NULL);
1805 ASSERT(timerp->ti_tmo_id != 0);
1806 timerp->ti_tmo_id = 0;
1807
1808 new_state = old_state = daplka_ep_get_state(ep_rp);
1809 if (old_state != DAPLKA_EP_STATE_CONNECTING) {
1810 /* unblock hash_ep_free */
1811 mutex_enter(&ep_rp->ep_lock);
1812 ASSERT(ep_rp->ep_timer_hkey != 0);
1813 ep_rp->ep_timer_hkey = 0;
1814 cv_broadcast(&ep_rp->ep_cv);
1815 mutex_exit(&ep_rp->ep_lock);
1816
1817 /* reset state to original state */
1818 daplka_ep_set_state(ep_rp, old_state, new_state);
1819
1820 /* this function will also unref ep_rp */
1821 daplka_timer_info_free(timerp);
1822 return;
1823 }
1824
1825 ASSERT(ep_rp->ep_timer_hkey != 0);
1826 ep_rp->ep_timer_hkey = 0;
1827
1828 /*
1829 * we cannot keep ep_state in TRANSITIONING if we call
1830 * ibt_close_rc_channel in blocking mode. this would cause
1831 * a deadlock because the cm callbacks will be blocked and
1832 * will not be able to wake us up.
1833 */
1834 new_state = DAPLKA_EP_STATE_ABORTING;
1835 daplka_ep_set_state(ep_rp, old_state, new_state);
1836
1837 /*
1838 * when we return from close_rc_channel, all callbacks should have
1839 * completed. we can also be certain that these callbacks did not
1840 * enqueue any events to conn_evd.
1841 */
1842 status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
1843 NULL, 0, NULL, NULL, NULL);
1844 if (status != IBT_SUCCESS) {
1845 DERR("timer_thread: ibt_close_rc_channel returned %d\n",
1846 status);
1847 }
1848 old_state = daplka_ep_get_state(ep_rp);
1849
1850 /*
1851 * this is the only thread that can transition ep_state out
1852 * of ABORTING. all other ep operations would fail when
1853 * ep_state is in ABORTING.
1854 */
1855 ASSERT(old_state == DAPLKA_EP_STATE_ABORTING);
1856
1857 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_SLEEP);
1858 ASSERT(disc_ev != NULL);
1859
1860 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
1861 disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
1862 disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
1863 disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
1864 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
1865 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
1866
1867 D2("timer_thread: enqueue event(%p) evdp(%p)\n",
1868 disc_ev, ep_rp->ep_conn_evd);
1869
1870 new_state = DAPLKA_EP_STATE_DISCONNECTED;
1871 daplka_ep_set_state(ep_rp, old_state, new_state);
1872
1873 daplka_evd_wakeup(ep_rp->ep_conn_evd,
1874 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
1875
1876 /* this function will also unref ep_rp */
1877 daplka_timer_info_free(timerp);
1878 }
1879
1880 /*
1881 * dispatches a thread to continue with timer processing.
1882 */
1883 static void
1884 daplka_timer_dispatch(void *arg)
1885 {
1886 /*
1887 * keep rescheduling this function until
1888 * taskq_dispatch succeeds.
1889 */
1890 if (taskq_dispatch(daplka_taskq,
1891 daplka_timer_thread, arg, TQ_NOSLEEP) == 0) {
1892 DERR("timer_dispatch: taskq_dispatch failed, retrying...\n");
1893 (void) timeout(daplka_timer_dispatch, arg, 10);
1894 }
1895 }
1896
1897 /*
1898 * this function is called by the kernel's callout thread.
1899 * we first attempt to remove the timer object from the
1900 * global timer table. if it is found, we dispatch a thread
1901 * to continue processing the timer object. if it is not
1902 * found, that means the timer has been cancelled by someone
1903 * else.
1904 */
1905 static void
1906 daplka_timer_handler(void *arg)
1907 {
1908 uint64_t timer_hkey = (uintptr_t)arg;
1909 daplka_timer_info_t *timerp = NULL;
1910
1911 D2("timer_handler: timer_hkey 0x%llx\n", (longlong_t)timer_hkey);
1912
1913 (void) daplka_hash_remove(&daplka_timer_info_htbl,
1914 timer_hkey, (void **)&timerp);
1915 if (timerp == NULL) {
1916 D2("timer_handler: timer already cancelled\n");
1917 return;
1918 }
1919 daplka_timer_dispatch((void *)timerp);
1920 }
1921
1922 /*
1923 * allocates a timer_info object.
1924 * a reference to a EP is held by this object. this ensures
1925 * that the EP stays valid when a timer is outstanding.
1926 */
1927 static daplka_timer_info_t *
1928 daplka_timer_info_alloc(daplka_ep_resource_t *ep_rp)
1929 {
1930 daplka_timer_info_t *timerp;
1931
1932 timerp = kmem_zalloc(sizeof (*timerp), daplka_km_flags);
1933 if (timerp == NULL) {
1934 DERR("timer_info_alloc: cannot allocate timer info\n");
1935 return (NULL);
1936 }
1937 timerp->ti_ep_res = ep_rp;
1938 timerp->ti_tmo_id = 0;
1939
1940 return (timerp);
1941 }
1942
1943 /*
1944 * Frees the timer_info object.
1945 * we release the EP reference before freeing the object.
1946 */
1947 static void
1948 daplka_timer_info_free(daplka_timer_info_t *timerp)
1949 {
1950 ASSERT(timerp->ti_ep_res != NULL);
1951 DAPLKA_RS_UNREF(timerp->ti_ep_res);
1952 timerp->ti_ep_res = NULL;
1953 ASSERT(timerp->ti_tmo_id == 0);
1954 kmem_free(timerp, sizeof (*timerp));
1955 }
1956
1957 /*
1958 * cancels the timer set by ep_connect.
1959 * returns -1 if timer handling is in progress
1960 * and 0 otherwise.
1961 */
1962 static int
1963 daplka_cancel_timer(daplka_ep_resource_t *ep_rp)
1964 {
1965 /*
1966 * this function can only be called when ep_state
1967 * is frozen.
1968 */
1969 ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING);
1970 if (ep_rp->ep_timer_hkey != 0) {
1971 daplka_timer_info_t *timerp = NULL;
1972
1973 (void) daplka_hash_remove(&daplka_timer_info_htbl,
1974 ep_rp->ep_timer_hkey, (void **)&timerp);
1975 if (timerp == NULL) {
1976 /*
1977 * this is possible if the timer_handler has
1978 * removed the timerp but the taskq thread has
1979 * not transitioned the ep_state to DISCONNECTED.
1980 * we need to reset the ep_state to allow the
1981 * taskq thread to continue with its work. the
1982 * taskq thread will set the ep_timer_hkey to 0
1983 * so we don't have to do it here.
1984 */
1985 DERR("cancel_timer: timer is being processed\n");
1986 return (-1);
1987 }
1988 /*
1989 * we got the timer object. if the handler fires at
1990 * this point, it will not be able to find the object
1991 * and will return immediately. normally, ti_tmo_id gets
1992 * cleared when the handler fires.
1993 */
1994 ASSERT(timerp->ti_tmo_id != 0);
1995
1996 /*
1997 * note that untimeout can possibly call the handler.
1998 * we are safe because the handler will be a no-op.
1999 */
2000 (void) untimeout(timerp->ti_tmo_id);
2001 timerp->ti_tmo_id = 0;
2002 daplka_timer_info_free(timerp);
2003 ep_rp->ep_timer_hkey = 0;
2004 }
2005 return (0);
2006 }
2007
2008 /*
2009 * this function is called by daplka_hash_destroy for
2010 * freeing timer_info objects
2011 */
2012 static void
2013 daplka_hash_timer_free(void *obj)
2014 {
2015 daplka_timer_info_free((daplka_timer_info_t *)obj);
2016 }
2017
2018 /* ARGSUSED */
2019 static uint16_t
2020 daplka_hellomsg_cksum(DAPL_PRIVATE *dp)
2021 {
2022 uint8_t *bp;
2023 int i;
2024 uint16_t cksum = 0;
2025
2026 bp = (uint8_t *)dp;
2027 for (i = 0; i < sizeof (DAPL_PRIVATE); i++) {
2028 cksum += bp[i];
2029 }
2030 return (cksum);
2031 }
2032
2033 /*
2034 * ep_connect is called by the client to initiate a connection to a
2035 * remote service point. It is a non-blocking call. If a non-zero
2036 * timeout is specified by the client, a timer will be set just before
2037 * returning from ep_connect. Upon a successful return from ep_connect,
2038 * the client will call evd_wait to wait for the connection to complete.
2039 * If the connection is rejected or has failed due to an error, the
2040 * client will be notified with an event containing the appropriate error
2041 * code. If the connection is accepted, the client will be notified with
2042 * the CONN_ESTABLISHED event. If the timer expires before either of the
2043 * above events (error or established), a TIMED_OUT event will be delivered
2044 * to the client.
2045 *
2046 * the complicated part of the timer logic is the handling of race
2047 * conditions with CM callbacks. we need to ensure that either the CM or
2048 * the timer thread gets to deliver an event, but not both. when the
2049 * CM callback is about to deliver an event, it always tries to cancel
2050 * the outstanding timer. if cancel_timer indicates a that the timer is
2051 * already being processed, the CM callback will simply return without
2052 * delivering an event. when the timer thread executes, it tries to check
2053 * if the EP is still in CONNECTING state (timers only work on the active
2054 * side). if the EP is not in this state, the timer thread will return
2055 * without delivering an event.
2056 */
2057 /* ARGSUSED */
2058 static int
2059 daplka_ep_connect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2060 cred_t *cred, int *rvalp)
2061 {
2062 daplka_ep_resource_t *ep_rp = NULL;
2063 dapl_ep_connect_t args;
2064 daplka_timer_info_t *timerp = NULL;
2065 uint32_t old_state, new_state;
2066 boolean_t timer_inserted = B_FALSE;
2067 uint64_t timer_hkey = 0;
2068 ibt_path_info_t path_info;
2069 ibt_path_attr_t path_attr;
2070 ibt_hca_attr_t *hca_attrp;
2071 ibt_chan_open_args_t chan_args;
2072 ibt_status_t status = IBT_SUCCESS;
2073 uint8_t num_paths;
2074 void *priv_data;
2075 DAPL_PRIVATE *dp;
2076 int retval = 0;
2077 ib_gid_t *sgid;
2078 ib_gid_t *dgid;
2079 uint64_t dgid_ored;
2080 ibt_ar_t ar_query_s;
2081 ibt_ar_t ar_result_s;
2082 ibt_path_flags_t pathflags;
2083
2084 D3("ep_connect: enter\n");
2085 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_connect_t),
2086 mode);
2087 if (retval != 0) {
2088 DERR("ep_connect: copyin error %d\n", retval);
2089 return (EFAULT);
2090 }
2091 ep_rp = (daplka_ep_resource_t *)
2092 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epc_hkey);
2093 if (ep_rp == NULL) {
2094 DERR("ep_connect: cannot find ep resource\n");
2095 return (EINVAL);
2096 }
2097 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2098
2099 new_state = old_state = daplka_ep_get_state(ep_rp);
2100 if (old_state != DAPLKA_EP_STATE_CLOSED) {
2101 DERR("ep_connect: invalid state %d\n", old_state);
2102 retval = EINVAL;
2103 goto cleanup;
2104 }
2105 if (args.epc_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
2106 DERR("ep_connect: private data len (%d) exceeded "
2107 "max size %d\n", args.epc_priv_sz,
2108 DAPL_MAX_PRIVATE_DATA_SIZE);
2109 retval = EINVAL;
2110 goto cleanup;
2111 }
2112
2113 /*
2114 * check for remote ipaddress to dgid resolution needs ATS
2115 */
2116 dgid = &args.epc_dgid;
2117 dgid_ored = dgid->gid_guid | dgid->gid_prefix;
2118 #if defined(DAPLKA_DEBUG_FORCE_ATS)
2119 dgid_ored = 0ULL;
2120 #endif /* DAPLKA_DEBUG_FORCE_ATS */
2121 /* check for unidentified dgid */
2122 if (dgid_ored == 0ULL) {
2123 /*
2124 * setup for ibt_query_ar()
2125 */
2126 sgid = &ia_rp->ia_hca_sgid;
2127 ar_query_s.ar_gid.gid_guid = 0ULL;
2128 ar_query_s.ar_gid.gid_prefix = 0ULL;
2129 ar_query_s.ar_pkey = 0;
2130 bcopy(args.epc_raddr_sadata.iad_sadata,
2131 ar_query_s.ar_data, DAPL_ATS_NBYTES);
2132 #define UR(b) ar_query_s.ar_data[(b)]
2133 D3("daplka_ep_connect: SA[8] %d.%d.%d.%d\n",
2134 UR(8), UR(9), UR(10), UR(11));
2135 D3("daplka_ep_connect: SA[12] %d.%d.%d.%d\n",
2136 UR(12), UR(13), UR(14), UR(15));
2137 status = ibt_query_ar(sgid, &ar_query_s, &ar_result_s);
2138 if (status != IBT_SUCCESS) {
2139 DERR("ep_connect: ibt_query_ar returned %d\n", status);
2140 *rvalp = (int)status;
2141 retval = 0;
2142 goto cleanup;
2143 }
2144 /*
2145 * dgid identified from SA record
2146 */
2147 dgid = &ar_result_s.ar_gid;
2148 D2("daplka_ep_connect: ATS dgid=%llx:%llx\n",
2149 (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);
2150 }
2151
2152 bzero(&path_info, sizeof (ibt_path_info_t));
2153 bzero(&path_attr, sizeof (ibt_path_attr_t));
2154 bzero(&chan_args, sizeof (ibt_chan_open_args_t));
2155
2156 path_attr.pa_dgids = dgid;
2157 path_attr.pa_num_dgids = 1;
2158 /*
2159 * don't set sid in path_attr saves 1 SA query
2160 * Also makes server side not to write the service record
2161 */
2162 path_attr.pa_sgid = ia_rp->ia_hca_sgid;
2163 path_attr.pa_pkey = ia_rp->ia_port_pkey;
2164
2165 /* save the connection ep - struct copy */
2166 ep_rp->ep_sgid = ia_rp->ia_hca_sgid;
2167 ep_rp->ep_dgid = *dgid;
2168
2169 num_paths = 0;
2170 pathflags = IBT_PATH_PKEY;
2171 /* enable APM on remote port but not on loopback case */
2172 if (daplka_apm && ((dgid->gid_prefix != path_attr.pa_sgid.gid_prefix) ||
2173 (dgid->gid_guid != path_attr.pa_sgid.gid_guid))) {
2174 pathflags |= IBT_PATH_APM;
2175 }
2176 status = ibt_get_paths(daplka_dev->daplka_clnt_hdl,
2177 pathflags, &path_attr, 1, &path_info, &num_paths);
2178
2179 if (status != IBT_SUCCESS && status != IBT_INSUFF_DATA) {
2180 DERR("ep_connect: ibt_get_paths returned %d paths %d\n",
2181 status, num_paths);
2182 *rvalp = (int)status;
2183 retval = 0;
2184 goto cleanup;
2185 }
2186 /* fill in the sid directly to path_info */
2187 path_info.pi_sid = args.epc_sid;
2188 hca_attrp = &ia_rp->ia_hca->hca_attr;
2189
2190 /* fill in open channel args */
2191 chan_args.oc_path = &path_info;
2192 chan_args.oc_cm_handler = daplka_cm_rc_handler;
2193 chan_args.oc_cm_clnt_private = (void *)ep_rp;
2194 chan_args.oc_rdma_ra_out = hca_attrp->hca_max_rdma_out_chan;
2195 chan_args.oc_rdma_ra_in = hca_attrp->hca_max_rdma_in_chan;
2196 chan_args.oc_path_retry_cnt = 7; /* 3-bit field */
2197 chan_args.oc_path_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;
2198
2199 ASSERT(args.epc_priv_sz > 0);
2200 priv_data = (void *)args.epc_priv;
2201
2202 chan_args.oc_priv_data_len = args.epc_priv_sz;
2203 chan_args.oc_priv_data = priv_data;
2204
2205 /*
2206 * calculate checksum value of hello message and
2207 * put hello message in networking byte order
2208 */
2209 dp = (DAPL_PRIVATE *)priv_data;
2210 dp->hello_msg.hi_port = htons(dp->hello_msg.hi_port);
2211 dp->hello_msg.hi_checksum = 0;
2212 dp->hello_msg.hi_checksum = htons(daplka_hellomsg_cksum(dp));
2213
2214 if (args.epc_timeout > 0) {
2215 /*
2216 * increment refcnt before passing reference to
2217 * timer_info_alloc.
2218 */
2219 DAPLKA_RS_REF(ep_rp);
2220 timerp = daplka_timer_info_alloc(ep_rp);
2221 if (timerp == NULL) {
2222 DERR("ep_connect: cannot allocate timer\n");
2223 /*
2224 * we need to remove the reference if
2225 * allocation failed.
2226 */
2227 DAPLKA_RS_UNREF(ep_rp);
2228 retval = ENOMEM;
2229 goto cleanup;
2230 }
2231 /*
2232 * We generate our own hkeys so that timer_hkey can fit
2233 * into a pointer and passed as an arg to timeout()
2234 */
2235 timer_hkey = (uint64_t)daplka_timer_hkey_gen();
2236 retval = daplka_hash_insert(&daplka_timer_info_htbl,
2237 &timer_hkey, (void *)timerp);
2238 if (retval != 0) {
2239 DERR("ep_connect: cannot insert timer info\n");
2240 goto cleanup;
2241 }
2242 ASSERT(ep_rp->ep_timer_hkey == 0);
2243 ep_rp->ep_timer_hkey = timer_hkey;
2244 timer_inserted = B_TRUE;
2245 D2("ep_connect: timer_hkey = 0x%llx\n",
2246 (longlong_t)timer_hkey);
2247 }
2248 status = ibt_open_rc_channel(ep_rp->ep_chan_hdl, IBT_OCHAN_NO_FLAGS,
2249 IBT_NONBLOCKING, &chan_args, NULL);
2250
2251 if (status != IBT_SUCCESS) {
2252 DERR("ep_connect: ibt_open_rc_channel returned %d\n", status);
2253 *rvalp = (int)status;
2254 retval = 0;
2255 goto cleanup;
2256 }
2257 /*
2258 * if a cm callback gets called at this point, it'll have to wait until
2259 * ep_state becomes connecting (or some other state if another thread
2260 * manages to get ahead of the callback). this guarantees that the
2261 * callback will not touch the timer until it gets set.
2262 */
2263 if (timerp != NULL) {
2264 clock_t tmo;
2265
2266 tmo = drv_usectohz((clock_t)args.epc_timeout);
2267 /*
2268 * We generate our own 32 bit timer_hkey so that it can fit
2269 * into a pointer
2270 */
2271 ASSERT(timer_hkey != 0);
2272 timerp->ti_tmo_id = timeout(daplka_timer_handler,
2273 (void *)(uintptr_t)timer_hkey, tmo);
2274 }
2275 new_state = DAPLKA_EP_STATE_CONNECTING;
2276
2277 cleanup:;
2278 if (timerp != NULL && (retval != 0 || status != IBT_SUCCESS)) {
2279 /*
2280 * if ibt_open_rc_channel failed, the timerp must still
2281 * be in daplka_timer_info_htbl because neither the cm
2282 * callback nor the timer_handler will be called.
2283 */
2284 if (timer_inserted) {
2285 daplka_timer_info_t *new_timerp = NULL;
2286
2287 ASSERT(timer_hkey != 0);
2288 (void) daplka_hash_remove(&daplka_timer_info_htbl,
2289 timer_hkey, (void **)&new_timerp);
2290 ASSERT(new_timerp == timerp);
2291 ep_rp->ep_timer_hkey = 0;
2292 }
2293 daplka_timer_info_free(timerp);
2294 }
2295 daplka_ep_set_state(ep_rp, old_state, new_state);
2296 DAPLKA_RS_UNREF(ep_rp);
2297 D3("ep_connect: exit\n");
2298 return (retval);
2299 }
2300
2301 /*
2302 * ep_disconnect closes a connection with a remote peer.
2303 * if a connection has not been established, ep_disconnect
2304 * will instead flush all recv bufs posted to this channel.
2305 * if the EP state is CONNECTED, CONNECTING or ACCEPTING upon
2306 * entry to ep_disconnect, the EP state will transition to
2307 * DISCONNECTING upon exit. the CM callbacks triggered by
2308 * ibt_close_rc_channel will cause EP state to become
2309 * DISCONNECTED. This function is a no-op if EP state is
2310 * DISCONNECTED.
2311 */
2312 /* ARGSUSED */
2313 static int
2314 daplka_ep_disconnect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2315 cred_t *cred, int *rvalp)
2316 {
2317 daplka_ep_resource_t *ep_rp = NULL;
2318 dapl_ep_disconnect_t args;
2319 ibt_status_t status;
2320 uint32_t old_state, new_state;
2321 int retval = 0;
2322
2323 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_disconnect_t),
2324 mode);
2325 if (retval != 0) {
2326 DERR("ep_disconnect: copyin error %d\n", retval);
2327 return (EFAULT);
2328 }
2329 ep_rp = (daplka_ep_resource_t *)
2330 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epd_hkey);
2331 if (ep_rp == NULL) {
2332 DERR("ep_disconnect: cannot find ep resource\n");
2333 return (EINVAL);
2334 }
2335 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2336
2337 new_state = old_state = daplka_ep_get_state(ep_rp);
2338 if (old_state != DAPLKA_EP_STATE_CONNECTED &&
2339 old_state != DAPLKA_EP_STATE_CONNECTING &&
2340 old_state != DAPLKA_EP_STATE_ACCEPTING &&
2341 old_state != DAPLKA_EP_STATE_DISCONNECTED &&
2342 old_state != DAPLKA_EP_STATE_DISCONNECTING &&
2343 old_state != DAPLKA_EP_STATE_CLOSED) {
2344 DERR("ep_disconnect: invalid state %d\n", old_state);
2345 retval = EINVAL;
2346 goto cleanup;
2347 }
2348
2349 if ((old_state == DAPLKA_EP_STATE_DISCONNECTED) ||
2350 (old_state == DAPLKA_EP_STATE_DISCONNECTING)) {
2351 D2("ep_disconnect: ep already disconnected\n");
2352 retval = 0;
2353 /* we leave the state as DISCONNECTED */
2354 goto cleanup;
2355 }
2356 if (old_state == DAPLKA_EP_STATE_CONNECTING ||
2357 old_state == DAPLKA_EP_STATE_ACCEPTING) {
2358 D2("ep_disconnect: aborting, old_state = %d\n", old_state);
2359 }
2360
2361 /*
2362 * according to the udapl spec, ep_disconnect should
2363 * flush the channel if the channel is not CONNECTED.
2364 */
2365 if (old_state == DAPLKA_EP_STATE_CLOSED) {
2366 status = ibt_flush_channel(ep_rp->ep_chan_hdl);
2367 if (status != IBT_SUCCESS) {
2368 DERR("ep_disconnect: ibt_flush_channel failed %d\n",
2369 status);
2370 *rvalp = (int)status;
2371 }
2372 retval = 0;
2373 /* we leave the state as CLOSED */
2374 goto cleanup;
2375 }
2376
2377 new_state = DAPLKA_EP_STATE_DISCONNECTING;
2378 daplka_ep_set_state(ep_rp, old_state, new_state);
2379 status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_NONBLOCKING,
2380 NULL, 0, NULL, NULL, NULL);
2381
2382 if (status == IBT_SUCCESS) {
2383 DAPLKA_RS_UNREF(ep_rp);
2384 return (retval);
2385 } else {
2386 DERR("ep_disconnect: ibt_close_rc_channel returned %d\n",
2387 status);
2388 *rvalp = (int)status;
2389 retval = 0;
2390 new_state = old_state;
2391 }
2392
2393 cleanup:;
2394 daplka_ep_set_state(ep_rp, old_state, new_state);
2395 DAPLKA_RS_UNREF(ep_rp);
2396 return (retval);
2397 }
2398
2399 /*
2400 * this function resets the EP to a usable state (ie. from
2401 * DISCONNECTED to CLOSED). this function is best implemented using
2402 * the ibt_recycle_channel interface. until that is available, we will
2403 * instead clone and tear down the existing channel and replace the
2404 * existing channel with the cloned one.
2405 */
2406 /* ARGSUSED */
2407 static int
2408 daplka_ep_reinit(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2409 cred_t *cred, int *rvalp)
2410 {
2411 daplka_ep_resource_t *ep_rp = NULL;
2412 dapl_ep_reinit_t args;
2413 ibt_status_t status;
2414 uint32_t old_state, new_state;
2415 int retval = 0;
2416
2417 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_reinit_t),
2418 mode);
2419 if (retval != 0) {
2420 DERR("reinit: copyin error %d\n", retval);
2421 return (EFAULT);
2422 }
2423 ep_rp = (daplka_ep_resource_t *)
2424 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epri_hkey);
2425 if (ep_rp == NULL) {
2426 DERR("reinit: cannot find ep resource\n");
2427 return (EINVAL);
2428 }
2429 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2430 new_state = old_state = daplka_ep_get_state(ep_rp);
2431 if ((old_state != DAPLKA_EP_STATE_CLOSED) &&
2432 (old_state != DAPLKA_EP_STATE_DISCONNECTED)) {
2433 DERR("reinit: invalid state %d\n", old_state);
2434 retval = EINVAL;
2435 goto cleanup;
2436 }
2437
2438 status = ibt_recycle_rc(ep_rp->ep_chan_hdl,
2439 IBT_CEP_RDMA_RD|IBT_CEP_RDMA_WR,
2440 ia_rp->ia_port_num, NULL, NULL);
2441 if (status != IBT_SUCCESS) {
2442 DERR("reinit: unable to clone channel\n");
2443 *rvalp = (int)status;
2444 retval = 0;
2445 goto cleanup;
2446 }
2447 new_state = DAPLKA_EP_STATE_CLOSED;
2448
2449 cleanup:;
2450 daplka_ep_set_state(ep_rp, old_state, new_state);
2451 DAPLKA_RS_UNREF(ep_rp);
2452 return (retval);
2453 }
2454
2455 /*
2456 * destroys a EP resource.
2457 * called when refcnt drops to zero.
2458 */
2459 static int
2460 daplka_ep_destroy(daplka_resource_t *gen_rp)
2461 {
2462 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)gen_rp;
2463 ibt_status_t status;
2464
2465 ASSERT(DAPLKA_RS_REFCNT(ep_rp) == 0);
2466 ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_FREED);
2467
2468 /*
2469 * by the time we get here, we can be sure that
2470 * there is no outstanding timer.
2471 */
2472 ASSERT(ep_rp->ep_timer_hkey == 0);
2473
2474 D3("ep_destroy: entering, ep_rp 0x%p, rnum %d\n",
2475 ep_rp, DAPLKA_RS_RNUM(ep_rp));
2476 /*
2477 * free rc channel
2478 */
2479 if (ep_rp->ep_chan_hdl != NULL) {
2480 mutex_enter(&daplka_dev->daplka_mutex);
2481 ibt_set_chan_private(ep_rp->ep_chan_hdl, NULL);
2482 mutex_exit(&daplka_dev->daplka_mutex);
2483 status = daplka_ibt_free_channel(ep_rp, ep_rp->ep_chan_hdl);
2484 if (status != IBT_SUCCESS) {
2485 DERR("ep_free: ibt_free_channel returned %d\n",
2486 status);
2487 }
2488 ep_rp->ep_chan_hdl = NULL;
2489 D3("ep_destroy: qp freed, rnum %d\n", DAPLKA_RS_RNUM(ep_rp));
2490 }
2491 /*
2492 * release all references
2493 */
2494 if (ep_rp->ep_snd_evd != NULL) {
2495 DAPLKA_RS_UNREF(ep_rp->ep_snd_evd);
2496 ep_rp->ep_snd_evd = NULL;
2497 }
2498 if (ep_rp->ep_rcv_evd != NULL) {
2499 DAPLKA_RS_UNREF(ep_rp->ep_rcv_evd);
2500 ep_rp->ep_rcv_evd = NULL;
2501 }
2502 if (ep_rp->ep_conn_evd != NULL) {
2503 DAPLKA_RS_UNREF(ep_rp->ep_conn_evd);
2504 ep_rp->ep_conn_evd = NULL;
2505 }
2506 if (ep_rp->ep_srq_res != NULL) {
2507 DAPLKA_RS_UNREF(ep_rp->ep_srq_res);
2508 ep_rp->ep_srq_res = NULL;
2509 }
2510 if (ep_rp->ep_pd_res != NULL) {
2511 DAPLKA_RS_UNREF(ep_rp->ep_pd_res);
2512 ep_rp->ep_pd_res = NULL;
2513 }
2514 cv_destroy(&ep_rp->ep_cv);
2515 mutex_destroy(&ep_rp->ep_lock);
2516
2517 DAPLKA_RS_FINI(ep_rp);
2518 kmem_free(ep_rp, sizeof (daplka_ep_resource_t));
2519 D3("ep_destroy: exiting, ep_rp 0x%p\n", ep_rp);
2520 return (0);
2521 }
2522
2523 /*
2524 * this function is called by daplka_hash_destroy for
2525 * freeing EP resource objects
2526 */
2527 static void
2528 daplka_hash_ep_free(void *obj)
2529 {
2530 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)obj;
2531 ibt_status_t status;
2532 uint32_t old_state, new_state;
2533 int retval;
2534
2535 old_state = daplka_ep_get_state(ep_rp);
2536 retval = daplka_cancel_timer(ep_rp);
2537 new_state = DAPLKA_EP_STATE_FREED;
2538 daplka_ep_set_state(ep_rp, old_state, new_state);
2539
2540 if (retval != 0) {
2541 D2("hash_ep_free: ep_rp 0x%p "
2542 "timer is still being processed\n", ep_rp);
2543 mutex_enter(&ep_rp->ep_lock);
2544 if (ep_rp->ep_timer_hkey != 0) {
2545 D2("hash_ep_free: ep_rp 0x%p "
2546 "waiting for timer_hkey to be 0\n", ep_rp);
2547 cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
2548 }
2549 mutex_exit(&ep_rp->ep_lock);
2550 }
2551
2552 /* call ibt_close_rc_channel regardless of what state we are in */
2553 status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
2554 NULL, 0, NULL, NULL, NULL);
2555 if (status != IBT_SUCCESS) {
2556 if (old_state == DAPLKA_EP_STATE_CONNECTED ||
2557 old_state == DAPLKA_EP_STATE_CONNECTING ||
2558 old_state == DAPLKA_EP_STATE_ACCEPTING) {
2559 DERR("hash_ep_free: ep_rp 0x%p state %d "
2560 "unexpected error %d from close_rc_channel\n",
2561 ep_rp, old_state, status);
2562 }
2563 D2("hash_ep_free: close_rc_channel, status %d\n", status);
2564 }
2565
2566 DAPLKA_RS_UNREF(ep_rp);
2567 }
2568
2569 /*
2570 * creates a EVD resource.
2571 * a EVD is used by the client to wait for events from one
2572 * or more sources.
2573 */
2574 /* ARGSUSED */
2575 static int
2576 daplka_evd_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2577 cred_t *cred, int *rvalp)
2578 {
2579 daplka_evd_resource_t *evd_rp = NULL;
2580 daplka_async_evd_hkey_t *async_evd;
2581 ibt_hca_attr_t *hca_attrp;
2582 ibt_cq_attr_t cq_attr;
2583 dapl_evd_create_t args;
2584 uint64_t evd_hkey = 0;
2585 boolean_t inserted = B_FALSE;
2586 int retval = 0;
2587 ibt_status_t status;
2588
2589 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_create_t),
2590 mode);
2591 if (retval != 0) {
2592 DERR("evd_create: copyin error %d", retval);
2593 return (EFAULT);
2594 }
2595 if ((args.evd_flags &
2596 ~(DAT_EVD_DEFAULT_FLAG | DAT_EVD_SOFTWARE_FLAG)) != 0) {
2597 DERR("evd_create: invalid flags 0x%x\n", args.evd_flags);
2598 return (EINVAL);
2599 }
2600
2601 evd_rp = kmem_zalloc(sizeof (daplka_evd_resource_t), daplka_km_flags);
2602 DAPLKA_RS_INIT(evd_rp, DAPL_TYPE_EVD,
2603 DAPLKA_RS_RNUM(ia_rp), daplka_evd_destroy);
2604
2605 mutex_init(&evd_rp->evd_lock, NULL, MUTEX_DRIVER, NULL);
2606 cv_init(&evd_rp->evd_cv, NULL, CV_DRIVER, NULL);
2607 evd_rp->evd_hca = ia_rp->ia_hca;
2608 evd_rp->evd_flags = args.evd_flags;
2609 evd_rp->evd_hca_hdl = ia_rp->ia_hca_hdl;
2610 evd_rp->evd_cookie = args.evd_cookie;
2611 evd_rp->evd_cno_res = NULL;
2612 evd_rp->evd_cr_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
2613 evd_rp->evd_conn_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
2614 evd_rp->evd_async_events.eel_event_type = DAPLKA_EVD_ASYNC_EVENTS;
2615
2616 /*
2617 * if the client specified a non-zero cno_hkey, we
2618 * lookup the cno and save the reference for later use.
2619 */
2620 if (args.evd_cno_hkey > 0) {
2621 daplka_cno_resource_t *cno_rp;
2622
2623 cno_rp = (daplka_cno_resource_t *)
2624 daplka_hash_lookup(&ia_rp->ia_cno_htbl,
2625 args.evd_cno_hkey);
2626 if (cno_rp == NULL) {
2627 DERR("evd_create: cannot find cno resource\n");
2628 goto cleanup;
2629 }
2630 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
2631 evd_rp->evd_cno_res = cno_rp;
2632 }
2633 hca_attrp = &ia_rp->ia_hca->hca_attr;
2634 if ((evd_rp->evd_flags &
2635 (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) != 0) {
2636 if (args.evd_cq_size > hca_attrp->hca_max_cq_sz) {
2637 DERR("evd_create: invalid cq size %d",
2638 args.evd_cq_size);
2639 retval = EINVAL;
2640 goto cleanup;
2641 }
2642 cq_attr.cq_size = args.evd_cq_size;
2643 cq_attr.cq_sched = NULL;
2644 cq_attr.cq_flags = IBT_CQ_USER_MAP;
2645
2646 status = daplka_ibt_alloc_cq(evd_rp, evd_rp->evd_hca_hdl,
2647 &cq_attr, &evd_rp->evd_cq_hdl, &evd_rp->evd_cq_real_size);
2648
2649 if (status != IBT_SUCCESS) {
2650 DERR("evd_create: ibt_alloc_cq returned %d", status);
2651 *rvalp = (int)status;
2652 retval = 0;
2653 goto cleanup;
2654 }
2655
2656 /*
2657 * store evd ptr with cq_hdl
2658 * mutex is only needed for race of "destroy" and "async"
2659 */
2660 mutex_enter(&daplka_dev->daplka_mutex);
2661 ibt_set_cq_private(evd_rp->evd_cq_hdl, (void *)evd_rp);
2662 mutex_exit(&daplka_dev->daplka_mutex);
2663
2664 /* Get HCA-specific data_out info */
2665 status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
2666 IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
2667 &args.evd_cq_data_out, sizeof (args.evd_cq_data_out));
2668
2669 if (status != IBT_SUCCESS) {
2670 DERR("evd_create: ibt_ci_data_out error(%d)", status);
2671 *rvalp = (int)status;
2672 retval = 0;
2673 goto cleanup;
2674 }
2675
2676 args.evd_cq_real_size = evd_rp->evd_cq_real_size;
2677
2678 ibt_set_cq_handler(evd_rp->evd_cq_hdl, daplka_cq_handler,
2679 (void *)evd_rp);
2680 }
2681
2682 retval = daplka_hash_insert(&ia_rp->ia_evd_htbl,
2683 &evd_hkey, (void *)evd_rp);
2684 if (retval != 0) {
2685 DERR("evd_ceate: cannot insert evd %d\n", retval);
2686 goto cleanup;
2687 }
2688 inserted = B_TRUE;
2689
2690 /*
2691 * If this evd handles async events need to add to the IA resource
2692 * async evd list
2693 */
2694 if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
2695 async_evd = kmem_zalloc(sizeof (daplka_async_evd_hkey_t),
2696 daplka_km_flags);
2697 /* add the evd to the head of the list */
2698 mutex_enter(&ia_rp->ia_lock);
2699 async_evd->aeh_evd_hkey = evd_hkey;
2700 async_evd->aeh_next = ia_rp->ia_async_evd_hkeys;
2701 ia_rp->ia_async_evd_hkeys = async_evd;
2702 mutex_exit(&ia_rp->ia_lock);
2703 }
2704
2705 args.evd_hkey = evd_hkey;
2706 retval = copyout(&args, (void *)arg, sizeof (dapl_evd_create_t));
2707 if (retval != 0) {
2708 DERR("evd_create: copyout error %d\n", retval);
2709 retval = EFAULT;
2710 goto cleanup;
2711 }
2712 return (0);
2713
2714 cleanup:;
2715 if (inserted) {
2716 daplka_evd_resource_t *free_rp = NULL;
2717
2718 (void) daplka_hash_remove(&ia_rp->ia_evd_htbl, evd_hkey,
2719 (void **)&free_rp);
2720 if (free_rp != evd_rp) {
2721 DERR("evd_create: cannot remove evd\n");
2722 /*
2723 * we can only get here if another thread
2724 * has completed the cleanup in evd_free
2725 */
2726 return (retval);
2727 }
2728 }
2729 DAPLKA_RS_UNREF(evd_rp);
2730 return (retval);
2731 }
2732
2733 /*
2734 * resizes CQ and returns new mapping info to library.
2735 */
2736 /* ARGSUSED */
2737 static int
2738 daplka_cq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2739 cred_t *cred, int *rvalp)
2740 {
2741 daplka_evd_resource_t *evd_rp = NULL;
2742 ibt_hca_attr_t *hca_attrp;
2743 dapl_cq_resize_t args;
2744 ibt_status_t status;
2745 int retval = 0;
2746
2747 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cq_resize_t),
2748 mode);
2749 if (retval != 0) {
2750 DERR("cq_resize: copyin error %d\n", retval);
2751 return (EFAULT);
2752 }
2753
2754 /* get evd resource */
2755 evd_rp = (daplka_evd_resource_t *)
2756 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.cqr_evd_hkey);
2757 if (evd_rp == NULL) {
2758 DERR("cq_resize: cannot find evd resource\n");
2759 return (EINVAL);
2760 }
2761 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
2762
2763 hca_attrp = &ia_rp->ia_hca->hca_attr;
2764 if (args.cqr_cq_new_size > hca_attrp->hca_max_cq_sz) {
2765 DERR("cq_resize: invalid cq size %d", args.cqr_cq_new_size);
2766 retval = EINVAL;
2767 goto cleanup;
2768 }
2769 /*
2770 * If ibt_resize_cq fails that it is primarily due to resource
2771 * shortage. Per IB spec resize will never loose events and
2772 * a resize error leaves the CQ intact. Therefore even if the
2773 * resize request fails we proceed and get the mapping data
2774 * from the CQ so that the library can mmap it.
2775 */
2776 status = ibt_resize_cq(evd_rp->evd_cq_hdl, args.cqr_cq_new_size,
2777 &args.cqr_cq_real_size);
2778 if (status != IBT_SUCCESS) {
2779 /* we return the size of the old CQ if resize fails */
2780 args.cqr_cq_real_size = evd_rp->evd_cq_real_size;
2781 ASSERT(status != IBT_CQ_HDL_INVALID);
2782 DERR("cq_resize: ibt_resize_cq failed:%d\n", status);
2783 } else {
2784 mutex_enter(&evd_rp->evd_lock);
2785 evd_rp->evd_cq_real_size = args.cqr_cq_real_size;
2786 mutex_exit(&evd_rp->evd_lock);
2787 }
2788
2789 D2("cq_resize(%d): done new_sz(%u) real_sz(%u)\n",
2790 DAPLKA_RS_RNUM(evd_rp),
2791 args.cqr_cq_new_size, args.cqr_cq_real_size);
2792
2793 /* Get HCA-specific data_out info */
2794 status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
2795 IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
2796 &args.cqr_cq_data_out, sizeof (args.cqr_cq_data_out));
2797 if (status != IBT_SUCCESS) {
2798 DERR("cq_resize: ibt_ci_data_out error(%d)\n", status);
2799 /* return ibt_ci_data_out status */
2800 *rvalp = (int)status;
2801 retval = 0;
2802 goto cleanup;
2803 }
2804
2805 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cq_resize_t),
2806 mode);
2807 if (retval != 0) {
2808 DERR("cq_resize: copyout error %d\n", retval);
2809 retval = EFAULT;
2810 goto cleanup;
2811 }
2812
2813 cleanup:;
2814 if (evd_rp != NULL) {
2815 DAPLKA_RS_UNREF(evd_rp);
2816 }
2817 return (retval);
2818 }
2819
2820 /*
2821 * Routine to copyin the event poll message so that 32 bit libraries
2822 * can be safely supported
2823 */
2824 int
2825 daplka_event_poll_copyin(intptr_t inarg, dapl_event_poll_t *outarg, int mode)
2826 {
2827 int retval;
2828
2829 #ifdef _MULTI_DATAMODEL
2830 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2831 dapl_event_poll32_t args32;
2832
2833 retval = ddi_copyin((void *)inarg, &args32,
2834 sizeof (dapl_event_poll32_t), mode);
2835 if (retval != 0) {
2836 DERR("event_poll_copyin: 32bit error %d\n", retval);
2837 return (EFAULT);
2838 }
2839
2840 outarg->evp_evd_hkey = args32.evp_evd_hkey;
2841 outarg->evp_threshold = args32.evp_threshold;
2842 outarg->evp_timeout = args32.evp_timeout;
2843 outarg->evp_ep = (dapl_ib_event_t *)(uintptr_t)args32.evp_ep;
2844 outarg->evp_num_ev = args32.evp_num_ev;
2845 outarg->evp_num_polled = args32.evp_num_polled;
2846 return (0);
2847 }
2848 #endif
2849 retval = ddi_copyin((void *)inarg, outarg, sizeof (dapl_event_poll_t),
2850 mode);
2851 if (retval != 0) {
2852 DERR("event_poll: copyin error %d\n", retval);
2853 return (EFAULT);
2854 }
2855
2856 return (0);
2857 }
2858
2859 /*
2860 * Routine to copyout the event poll message so that 32 bit libraries
2861 * can be safely supported
2862 */
2863 int
2864 daplka_event_poll_copyout(dapl_event_poll_t *inarg, intptr_t outarg, int mode)
2865 {
2866 int retval;
2867
2868 #ifdef _MULTI_DATAMODEL
2869 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2870 dapl_event_poll32_t args32;
2871
2872 args32.evp_evd_hkey = inarg->evp_evd_hkey;
2873 args32.evp_threshold = inarg->evp_threshold;
2874 args32.evp_timeout = inarg->evp_timeout;
2875 args32.evp_ep = (caddr32_t)(uintptr_t)inarg->evp_ep;
2876 args32.evp_num_ev = inarg->evp_num_ev;
2877 args32.evp_num_polled = inarg->evp_num_polled;
2878
2879 retval = ddi_copyout((void *)&args32, (void *)outarg,
2880 sizeof (dapl_event_poll32_t), mode);
2881 if (retval != 0) {
2882 DERR("event_poll_copyout: 32bit error %d\n", retval);
2883 return (EFAULT);
2884 }
2885 return (0);
2886 }
2887 #endif
2888 retval = ddi_copyout((void *)inarg, (void *)outarg,
2889 sizeof (dapl_event_poll_t), mode);
2890 if (retval != 0) {
2891 DERR("event_poll_copyout: error %d\n", retval);
2892 return (EFAULT);
2893 }
2894
2895 return (0);
2896 }
2897
2898 /*
2899 * fucntion to handle CM REQ RCV private data from Solaris or third parties
2900 */
2901 /* ARGSUSED */
2902 static void
2903 daplka_crevent_privdata_post(daplka_ia_resource_t *ia_rp,
2904 dapl_ib_event_t *evd_rp, daplka_evd_event_t *cr_ev)
2905 {
2906 DAPL_PRIVATE *dp;
2907 ib_gid_t *lgid;
2908 ibt_ar_t ar_query_s;
2909 ibt_ar_t ar_result_s;
2910 DAPL_HELLO_MSG *hip;
2911 uint32_t ipaddr_ord;
2912 ibt_priv_data_len_t clen;
2913 ibt_priv_data_len_t olen;
2914 ibt_status_t status;
2915 uint16_t cksum;
2916
2917 /*
2918 * get private data and len
2919 */
2920 dp = (DAPL_PRIVATE *)cr_ev->ee_cmev.ec_cm_ev_priv_data;
2921 clen = cr_ev->ee_cmev.ec_cm_ev_priv_data_len;
2922 #if defined(DAPLKA_DEBUG_FORCE_ATS)
2923 /* skip the DAPL_PRIVATE chekcsum check */
2924 #else
2925 /* for remote connects */
2926 /* look up hello message in the CM private data area */
2927 if (clen >= sizeof (DAPL_PRIVATE) &&
2928 (dp->hello_msg.hi_vers == DAPL_HELLO_MSG_VERS)) {
2929 cksum = ntohs(dp->hello_msg.hi_checksum);
2930 dp->hello_msg.hi_checksum = 0;
2931 if (daplka_hellomsg_cksum(dp) == cksum) {
2932 D2("daplka_crevent_privdata_post: Solaris msg\n");
2933 evd_rp->ibe_ce.ibce_priv_data_size = clen;
2934 dp->hello_msg.hi_checksum = DAPL_CHECKSUM;
2935 dp->hello_msg.hi_port = ntohs(dp->hello_msg.hi_port);
2936 bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
2937 kmem_free(dp, clen);
2938 return;
2939 }
2940 }
2941 #endif /* DAPLKA_DEBUG_FORCE_ATS */
2942
2943 D2("daplka_crevent_privdata_post: 3rd party msg\n");
2944 /* transpose CM private data into hello message */
2945 if (clen) {
2946 olen = clen;
2947 if (clen > DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE) {
2948 clen = DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE;
2949 }
2950 bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
2951 kmem_free(dp, olen);
2952 } else {
2953 bzero(evd_rp->ibe_ce.ibce_priv_data_ptr,
2954 DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE);
2955 }
2956 evd_rp->ibe_ce.ibce_priv_data_size = sizeof (DAPL_PRIVATE);
2957 dp = (DAPL_PRIVATE *)evd_rp->ibe_ce.ibce_priv_data_ptr;
2958 /*
2959 * fill in hello message
2960 */
2961 hip = &dp->hello_msg;
2962 hip->hi_checksum = DAPL_CHECKSUM;
2963 hip->hi_clen = clen;
2964 hip->hi_mid = 0;
2965 hip->hi_vers = DAPL_HELLO_MSG_VERS;
2966 hip->hi_port = 0;
2967
2968 /* assign sgid and dgid */
2969 lgid = &ia_rp->ia_hca_sgid;
2970 ar_query_s.ar_gid.gid_prefix =
2971 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix;
2972 ar_query_s.ar_gid.gid_guid =
2973 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid;
2974 ar_query_s.ar_pkey = ia_rp->ia_port_pkey;
2975 bzero(ar_query_s.ar_data, DAPL_ATS_NBYTES);
2976
2977 /* reverse ip address lookup through ATS */
2978 status = ibt_query_ar(lgid, &ar_query_s, &ar_result_s);
2979 if (status == IBT_SUCCESS) {
2980 bcopy(ar_result_s.ar_data, hip->hi_saaddr, DAPL_ATS_NBYTES);
2981 /* determine the address families */
2982 ipaddr_ord = hip->hi_v4pad[0] | hip->hi_v4pad[1] |
2983 hip->hi_v4pad[2];
2984 if (ipaddr_ord == 0) {
2985 hip->hi_ipv = AF_INET;
2986 } else {
2987 hip->hi_ipv = AF_INET6;
2988 }
2989
2990 #define UL(b) ar_result_s.ar_data[(b)]
2991 D3("daplka_privdata_post: family=%d :SA[8] %d.%d.%d.%d\n",
2992 hip->hi_ipv, UL(8), UL(9), UL(10), UL(11));
2993 D3("daplka_privdata_post: SA[12] %d.%d.%d.%d\n",
2994 UL(12), UL(13), UL(14), UL(15));
2995 } else {
2996 /* non-conformed third parties */
2997 hip->hi_ipv = AF_UNSPEC;
2998 bzero(hip->hi_saaddr, DAPL_ATS_NBYTES);
2999 }
3000 }
3001
3002 /*
3003 * this function is called by evd_wait and evd_dequeue to wait for
3004 * connection events and CQ notifications. typically this function
3005 * is called when the userland CQ is empty and the client has
3006 * specified a non-zero timeout to evd_wait. if the client is
3007 * interested in CQ events, the CQ must be armed in userland prior
3008 * to calling this function.
3009 */
3010 /* ARGSUSED */
3011 static int
3012 daplka_event_poll(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3013 cred_t *cred, int *rvalp)
3014 {
3015 daplka_evd_resource_t *evd_rp = NULL;
3016 dapl_event_poll_t args;
3017 daplka_evd_event_t *head;
3018 dapl_ib_event_t evp_arr[NUM_EVENTS_PER_POLL];
3019 dapl_ib_event_t *evp;
3020 dapl_ib_event_t *evp_start;
3021 size_t evp_size;
3022 int threshold;
3023 clock_t timeout;
3024 uint32_t max_events;
3025 uint32_t num_events = 0;
3026 void *pd;
3027 ibt_priv_data_len_t n;
3028 int retval = 0;
3029 int rc;
3030
3031 retval = daplka_event_poll_copyin(arg, &args, mode);
3032 if (retval != 0) {
3033 return (EFAULT);
3034 }
3035
3036 if ((args.evp_num_ev > 0) && (args.evp_ep == NULL)) {
3037 DERR("event_poll: evp_ep cannot be NULL if num_wc=%d",
3038 args.evp_num_ev);
3039 return (EINVAL);
3040 }
3041 /*
3042 * Note: dequeue requests have a threshold = 0, timeout = 0
3043 */
3044 threshold = args.evp_threshold;
3045
3046 max_events = args.evp_num_ev;
3047 /* ensure library is passing sensible values */
3048 if (max_events < threshold) {
3049 DERR("event_poll: max_events(%d) < threshold(%d)\n",
3050 max_events, threshold);
3051 return (EINVAL);
3052 }
3053 /* Do a sanity check to avoid excessive memory allocation */
3054 if (max_events > DAPL_EVD_MAX_EVENTS) {
3055 DERR("event_poll: max_events(%d) > %d",
3056 max_events, DAPL_EVD_MAX_EVENTS);
3057 return (EINVAL);
3058 }
3059 D4("event_poll: threshold(%d) timeout(0x%llx) max_events(%d)\n",
3060 threshold, (longlong_t)args.evp_timeout, max_events);
3061
3062 /* get evd resource */
3063 evd_rp = (daplka_evd_resource_t *)
3064 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evp_evd_hkey);
3065 if (evd_rp == NULL) {
3066 DERR("event_poll: cannot find evd resource\n");
3067 return (EINVAL);
3068 }
3069 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3070
3071 /*
3072 * Use event array on the stack if possible
3073 */
3074 if (max_events <= NUM_EVENTS_PER_POLL) {
3075 evp_start = evp = &evp_arr[0];
3076 } else {
3077 evp_size = max_events * sizeof (dapl_ib_event_t);
3078 evp_start = evp = kmem_zalloc(evp_size, daplka_km_flags);
3079 if (evp == NULL) {
3080 DERR("event_poll: kmem_zalloc failed, evp_size %d",
3081 evp_size);
3082 retval = ENOMEM;
3083 goto cleanup;
3084 }
3085 }
3086
3087 /*
3088 * The Event poll algorithm is as follows -
3089 * The library passes a buffer big enough to hold "max_events"
3090 * events. max_events is >= threshold. If at any stage we get
3091 * max_events no. of events we bail. The events are polled in
3092 * the following order -
3093 * 1) Check for CR events in the evd_cr_events list
3094 * 2) Check for Connection events in the evd_connection_events list
3095 *
3096 * If after the above 2 steps we don't have enough(>= threshold) events
3097 * we block for CQ notification and sleep. Upon being woken up we start
3098 * at step 1 again.
3099 */
3100
3101 /*
3102 * Note: this could be 0 or INFINITE or anyother value in microsec
3103 */
3104 if (args.evp_timeout > 0) {
3105 if (args.evp_timeout >= LONG_MAX) {
3106 timeout = LONG_MAX;
3107 } else {
3108 clock_t curr_time = ddi_get_lbolt();
3109
3110 timeout = curr_time +
3111 drv_usectohz((clock_t)args.evp_timeout);
3112 /*
3113 * use the max value if we wrapped around
3114 */
3115 if (timeout <= curr_time) {
3116 timeout = LONG_MAX;
3117 }
3118 }
3119 } else {
3120 timeout = 0;
3121 }
3122
3123 mutex_enter(&evd_rp->evd_lock);
3124 for (;;) {
3125 /*
3126 * If this evd is waiting for CM events check that now.
3127 */
3128 if ((evd_rp->evd_flags & DAT_EVD_CR_FLAG) &&
3129 (evd_rp->evd_cr_events.eel_num_elements > 0)) {
3130 /* dequeue events from evd_cr_events list */
3131 while (head = daplka_evd_event_dequeue(
3132 &evd_rp->evd_cr_events)) {
3133 /*
3134 * populate the evp array
3135 */
3136 evp[num_events].ibe_ev_family = DAPL_CR_EVENTS;
3137 evp[num_events].ibe_ce.ibce_event =
3138 head->ee_cmev.ec_cm_ev_type;
3139 evp[num_events].ibe_ce.ibce_cookie =
3140 (uint64_t)head->ee_cmev.ec_cm_cookie;
3141 evp[num_events].ibe_ce.ibce_psep_cookie =
3142 head->ee_cmev.ec_cm_psep_cookie;
3143 daplka_crevent_privdata_post(ia_rp,
3144 &evp[num_events], head);
3145 kmem_free(head, sizeof (daplka_evd_event_t));
3146
3147 if (++num_events == max_events) {
3148 mutex_exit(&evd_rp->evd_lock);
3149 goto maxevent_reached;
3150 }
3151 }
3152 }
3153
3154 if ((evd_rp->evd_flags & DAT_EVD_CONNECTION_FLAG) &&
3155 (evd_rp->evd_conn_events.eel_num_elements > 0)) {
3156 /* dequeue events from evd_connection_events list */
3157 while ((head = daplka_evd_event_dequeue
3158 (&evd_rp->evd_conn_events))) {
3159 /*
3160 * populate the evp array -
3161 *
3162 */
3163 if (head->ee_cmev.ec_cm_is_passive) {
3164 evp[num_events].ibe_ev_family =
3165 DAPL_PASSIVE_CONNECTION_EVENTS;
3166 } else {
3167 evp[num_events].ibe_ev_family =
3168 DAPL_ACTIVE_CONNECTION_EVENTS;
3169 }
3170 evp[num_events].ibe_ce.ibce_event =
3171 head->ee_cmev.ec_cm_ev_type;
3172 evp[num_events].ibe_ce.ibce_cookie =
3173 (uint64_t)head->ee_cmev.ec_cm_cookie;
3174 evp[num_events].ibe_ce.ibce_psep_cookie =
3175 head->ee_cmev.ec_cm_psep_cookie;
3176
3177 if (head->ee_cmev.ec_cm_ev_priv_data_len > 0) {
3178 pd = head->ee_cmev.ec_cm_ev_priv_data;
3179 n = head->
3180 ee_cmev.ec_cm_ev_priv_data_len;
3181 bcopy(pd, (void *)evp[num_events].
3182 ibe_ce.ibce_priv_data_ptr, n);
3183 evp[num_events].ibe_ce.
3184 ibce_priv_data_size = n;
3185 kmem_free(pd, n);
3186 }
3187
3188 kmem_free(head, sizeof (daplka_evd_event_t));
3189
3190 if (++num_events == max_events) {
3191 mutex_exit(&evd_rp->evd_lock);
3192 goto maxevent_reached;
3193 }
3194 }
3195 }
3196
3197 if ((evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) &&
3198 (evd_rp->evd_async_events.eel_num_elements > 0)) {
3199 /* dequeue events from evd_async_events list */
3200 while (head = daplka_evd_event_dequeue(
3201 &evd_rp->evd_async_events)) {
3202 /*
3203 * populate the evp array
3204 */
3205 evp[num_events].ibe_ev_family =
3206 DAPL_ASYNC_EVENTS;
3207 evp[num_events].ibe_async.ibae_type =
3208 head->ee_aev.ibae_type;
3209 evp[num_events].ibe_async.ibae_hca_guid =
3210 head->ee_aev.ibae_hca_guid;
3211 evp[num_events].ibe_async.ibae_cookie =
3212 head->ee_aev.ibae_cookie;
3213 evp[num_events].ibe_async.ibae_port =
3214 head->ee_aev.ibae_port;
3215
3216 kmem_free(head, sizeof (daplka_evd_event_t));
3217
3218 if (++num_events == max_events) {
3219 break;
3220 }
3221 }
3222 }
3223
3224 /*
3225 * We have sufficient events for this call so no need to wait
3226 */
3227 if ((threshold > 0) && (num_events >= threshold)) {
3228 mutex_exit(&evd_rp->evd_lock);
3229 break;
3230 }
3231
3232 evd_rp->evd_waiters++;
3233 /*
3234 * There are no new events and a timeout was specified.
3235 * Note: for CQ events threshold is 0 but timeout is
3236 * not necessarily 0.
3237 */
3238 while ((evd_rp->evd_newevents == DAPLKA_EVD_NO_EVENTS) &&
3239 timeout) {
3240 retval = DAPLKA_EVD_WAIT(&evd_rp->evd_cv,
3241 &evd_rp->evd_lock, timeout);
3242 if (retval == 0) {
3243 retval = EINTR;
3244 break;
3245 } else if (retval == -1) {
3246 retval = ETIME;
3247 break;
3248 } else {
3249 retval = 0;
3250 continue;
3251 }
3252 }
3253 evd_rp->evd_waiters--;
3254 if (evd_rp->evd_newevents != DAPLKA_EVD_NO_EVENTS) {
3255 /*
3256 * If we got woken up by the CQ handler due to events
3257 * in the CQ. Need to go to userland to check for
3258 * CQ events. Or if we were woken up due to S/W events
3259 */
3260
3261 /* check for userland events only */
3262 if (!(evd_rp->evd_newevents &
3263 ~DAPLKA_EVD_ULAND_EVENTS)) {
3264 evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
3265 mutex_exit(&evd_rp->evd_lock);
3266 break;
3267 }
3268 /*
3269 * Clear newevents since we are going to loopback
3270 * back and check for both CM and CQ events
3271 */
3272 evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
3273 } else { /* error */
3274 mutex_exit(&evd_rp->evd_lock);
3275 break;
3276 }
3277 }
3278
3279 maxevent_reached:
3280 args.evp_num_polled = num_events;
3281
3282 /*
3283 * At this point retval might have a value that we want to return
3284 * back to the user. So the copyouts shouldn't tamper retval.
3285 */
3286 if (args.evp_num_polled > 0) { /* copyout the events */
3287 rc = ddi_copyout(evp, args.evp_ep, args.evp_num_polled *
3288 sizeof (dapl_ib_event_t), mode);
3289 if (rc != 0) { /* XXX: we are losing events here */
3290 DERR("event_poll: event array copyout error %d", rc);
3291 retval = EFAULT;
3292 goto cleanup;
3293 }
3294 rc = daplka_event_poll_copyout(&args, arg, mode);
3295 if (rc != 0) { /* XXX: we are losing events here */
3296 DERR("event_poll: copyout error %d\n", rc);
3297 retval = EFAULT;
3298 goto cleanup;
3299 }
3300 }
3301
3302 cleanup:;
3303 if ((max_events > NUM_EVENTS_PER_POLL) && (evp_start != NULL)) {
3304 kmem_free(evp_start, evp_size);
3305 }
3306
3307 if (evd_rp != NULL) {
3308 DAPLKA_RS_UNREF(evd_rp);
3309 }
3310 return (retval);
3311 }
3312
3313 /* ARGSUSED */
3314 static int
3315 daplka_event_wakeup(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3316 cred_t *cred, int *rvalp)
3317 {
3318 dapl_event_wakeup_t args;
3319 daplka_evd_resource_t *evd_rp;
3320 int retval;
3321
3322 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_event_wakeup_t),
3323 mode);
3324 if (retval != 0) {
3325 DERR("event_wakeup: copyin error %d\n", retval);
3326 return (EFAULT);
3327 }
3328
3329 /* get evd resource */
3330 evd_rp = (daplka_evd_resource_t *)
3331 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evw_hkey);
3332 if (evd_rp == NULL) {
3333 DERR("event_wakeup: cannot find evd resource\n");
3334 return (EINVAL);
3335 }
3336 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3337
3338 daplka_evd_wakeup(evd_rp, NULL, NULL);
3339
3340 DAPLKA_RS_UNREF(evd_rp);
3341
3342 return (retval);
3343 }
3344
3345 /* ARGSUSED */
3346 static int
3347 daplka_evd_modify_cno(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3348 cred_t *cred, int *rvalp)
3349 {
3350 dapl_evd_modify_cno_t args;
3351 daplka_evd_resource_t *evd_rp;
3352 daplka_cno_resource_t *cno_rp;
3353 daplka_cno_resource_t *old_cno_rp;
3354 int retval;
3355
3356 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_modify_cno_t),
3357 mode);
3358 if (retval != 0) {
3359 DERR("evd_modify_cno: copyin error %d\n", retval);
3360 return (EFAULT);
3361 }
3362
3363 /* get evd resource */
3364 evd_rp = (daplka_evd_resource_t *)
3365 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evmc_hkey);
3366 if (evd_rp == NULL) {
3367 DERR("evd_modify_cno: cannot find evd resource\n");
3368 retval = EINVAL;
3369 goto cleanup;
3370 }
3371 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3372
3373 if (args.evmc_cno_hkey > 0) {
3374 /* get cno resource corresponding to the new CNO */
3375 cno_rp = (daplka_cno_resource_t *)
3376 daplka_hash_lookup(&ia_rp->ia_cno_htbl,
3377 args.evmc_cno_hkey);
3378 if (cno_rp == NULL) {
3379 DERR("evd_modify_cno: cannot find CNO resource\n");
3380 retval = EINVAL;
3381 goto cleanup;
3382 }
3383 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3384 } else {
3385 cno_rp = NULL;
3386 }
3387
3388 mutex_enter(&evd_rp->evd_lock);
3389 old_cno_rp = evd_rp->evd_cno_res;
3390 evd_rp->evd_cno_res = cno_rp;
3391 mutex_exit(&evd_rp->evd_lock);
3392
3393 /*
3394 * drop the refcnt on the old CNO, the refcnt on the new CNO is
3395 * retained since the evd holds a reference to it.
3396 */
3397 if (old_cno_rp) {
3398 DAPLKA_RS_UNREF(old_cno_rp);
3399 }
3400
3401 cleanup:
3402 if (evd_rp) {
3403 DAPLKA_RS_UNREF(evd_rp);
3404 }
3405
3406 return (retval);
3407 }
3408
3409 /*
3410 * Frees the EVD and associated resources.
3411 * If there are other threads still using this EVD, the destruction
3412 * will defer until the EVD's refcnt drops to zero.
3413 */
3414 /* ARGSUSED */
3415 static int
3416 daplka_evd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3417 cred_t *cred, int *rvalp)
3418 {
3419 daplka_evd_resource_t *evd_rp = NULL;
3420 daplka_async_evd_hkey_t *curr;
3421 daplka_async_evd_hkey_t *prev;
3422 dapl_evd_free_t args;
3423 int retval = 0;
3424
3425 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_free_t), mode);
3426 if (retval != 0) {
3427 DERR("evd_free: copyin error %d\n", retval);
3428 return (EFAULT);
3429 }
3430 retval = daplka_hash_remove(&ia_rp->ia_evd_htbl, args.evf_hkey,
3431 (void **)&evd_rp);
3432 if (retval != 0 || evd_rp == NULL) {
3433 DERR("evd_free: cannot find evd resource\n");
3434 return (EINVAL);
3435 }
3436 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3437
3438 /* If this is an async evd remove it from the IA's async evd list */
3439 if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
3440 mutex_enter(&ia_rp->ia_lock);
3441 curr = prev = ia_rp->ia_async_evd_hkeys;
3442 while (curr != NULL) {
3443 if (curr->aeh_evd_hkey == args.evf_hkey) {
3444 /* unlink curr from the list */
3445 if (curr == prev) {
3446 /*
3447 * if first element in the list update
3448 * the list head
3449 */
3450 ia_rp->ia_async_evd_hkeys =
3451 curr->aeh_next;
3452 } else {
3453 prev->aeh_next = curr->aeh_next;
3454 }
3455 break;
3456 }
3457 prev = curr;
3458 curr = curr->aeh_next;
3459 }
3460 mutex_exit(&ia_rp->ia_lock);
3461 /* free the curr entry */
3462 kmem_free(curr, sizeof (daplka_async_evd_hkey_t));
3463 }
3464
3465 /* UNREF calls the actual free function when refcnt is zero */
3466 DAPLKA_RS_UNREF(evd_rp);
3467 return (0);
3468 }
3469
3470 /*
3471 * destroys EVD resource.
3472 * called when refcnt drops to zero.
3473 */
3474 static int
3475 daplka_evd_destroy(daplka_resource_t *gen_rp)
3476 {
3477 daplka_evd_resource_t *evd_rp = (daplka_evd_resource_t *)gen_rp;
3478 ibt_status_t status;
3479 daplka_evd_event_t *evt;
3480 ibt_priv_data_len_t len;
3481
3482 D3("evd_destroy: entering, evd_rp 0x%p, rnum %d\n",
3483 evd_rp, DAPLKA_RS_RNUM(evd_rp));
3484 /*
3485 * free CQ
3486 */
3487 if (evd_rp->evd_cq_hdl) {
3488 ibt_set_cq_handler(evd_rp->evd_cq_hdl, NULL, NULL);
3489 mutex_enter(&daplka_dev->daplka_mutex);
3490 ibt_set_cq_private(evd_rp->evd_cq_hdl, NULL);
3491 mutex_exit(&daplka_dev->daplka_mutex);
3492
3493 status = daplka_ibt_free_cq(evd_rp, evd_rp->evd_cq_hdl);
3494 if (status != IBT_SUCCESS) {
3495 DERR("evd_destroy: ibt_free_cq returned %d\n", status);
3496 }
3497 evd_rp->evd_cq_hdl = NULL;
3498 D2("evd_destroy: cq freed, rnum %d\n", DAPLKA_RS_RNUM(evd_rp));
3499 }
3500
3501 /*
3502 * release reference on CNO
3503 */
3504 if (evd_rp->evd_cno_res != NULL) {
3505 mutex_enter(&evd_rp->evd_cno_res->cno_lock);
3506 if (evd_rp->evd_cno_res->cno_evd_cookie ==
3507 evd_rp->evd_cookie) {
3508 evd_rp->evd_cno_res->cno_evd_cookie = 0;
3509 }
3510 mutex_exit(&evd_rp->evd_cno_res->cno_lock);
3511 DAPLKA_RS_UNREF(evd_rp->evd_cno_res);
3512 evd_rp->evd_cno_res = NULL;
3513 }
3514
3515 /*
3516 * discard all remaining events
3517 */
3518 mutex_enter(&evd_rp->evd_lock);
3519 while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_cr_events))) {
3520 D2("evd_destroy: discarding CR event: %d\n",
3521 evt->ee_cmev.ec_cm_ev_type);
3522 len = evt->ee_cmev.ec_cm_ev_priv_data_len;
3523 if (len > 0) {
3524 kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
3525 evt->ee_cmev.ec_cm_ev_priv_data = NULL;
3526 evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
3527 }
3528 kmem_free(evt, sizeof (*evt));
3529 }
3530 ASSERT(evd_rp->evd_cr_events.eel_num_elements == 0);
3531
3532 while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_conn_events))) {
3533 D2("evd_destroy: discarding CONN event: %d\n",
3534 evt->ee_cmev.ec_cm_ev_type);
3535 len = evt->ee_cmev.ec_cm_ev_priv_data_len;
3536 if (len > 0) {
3537 kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
3538 evt->ee_cmev.ec_cm_ev_priv_data = NULL;
3539 evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
3540 }
3541 kmem_free(evt, sizeof (*evt));
3542 }
3543 ASSERT(evd_rp->evd_conn_events.eel_num_elements == 0);
3544
3545 while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_async_events))) {
3546 DERR("evd_destroy: discarding ASYNC event: %d\n",
3547 evt->ee_aev.ibae_type);
3548 kmem_free(evt, sizeof (*evt));
3549 }
3550 ASSERT(evd_rp->evd_async_events.eel_num_elements == 0);
3551 mutex_exit(&evd_rp->evd_lock);
3552
3553 mutex_destroy(&evd_rp->evd_lock);
3554 DAPLKA_RS_FINI(evd_rp);
3555 kmem_free(evd_rp, sizeof (daplka_evd_resource_t));
3556 D3("evd_destroy: exiting, evd_rp 0x%p\n", evd_rp);
3557 return (0);
3558 }
3559
3560 static void
3561 daplka_hash_evd_free(void *obj)
3562 {
3563 daplka_evd_resource_t *evd_rp = (daplka_evd_resource_t *)obj;
3564
3565 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3566 DAPLKA_RS_UNREF(evd_rp);
3567 }
3568
3569 /*
3570 * this handler fires when new completions arrive.
3571 */
3572 /* ARGSUSED */
3573 static void
3574 daplka_cq_handler(ibt_cq_hdl_t ibt_cq, void *arg)
3575 {
3576 D3("cq_handler: fired setting evd_newevents\n");
3577 daplka_evd_wakeup((daplka_evd_resource_t *)arg, NULL, NULL);
3578 }
3579
3580 /*
3581 * this routine wakes up a client from evd_wait. if evtq and evt
3582 * are non-null, the event evt will be enqueued prior to waking
3583 * up the client. if the evd is associated with a CNO and if there
3584 * are no waiters on the evd, the CNO will be notified.
3585 */
3586 static void
3587 daplka_evd_wakeup(daplka_evd_resource_t *evd_rp, daplka_evd_event_list_t *evtq,
3588 daplka_evd_event_t *evt)
3589 {
3590 uint32_t waiters = 0;
3591
3592 mutex_enter(&evd_rp->evd_lock);
3593 if (evtq != NULL && evt != NULL) {
3594 ASSERT(evtq == &evd_rp->evd_cr_events ||
3595 evtq == &evd_rp->evd_conn_events ||
3596 evtq == &evd_rp->evd_async_events);
3597 daplka_evd_event_enqueue(evtq, evt);
3598 ASSERT((evtq->eel_event_type == DAPLKA_EVD_CM_EVENTS) ||
3599 (evtq->eel_event_type == DAPLKA_EVD_ASYNC_EVENTS));
3600 evd_rp->evd_newevents |= evtq->eel_event_type;
3601 } else {
3602 evd_rp->evd_newevents |= DAPLKA_EVD_ULAND_EVENTS;
3603 }
3604 waiters = evd_rp->evd_waiters;
3605 cv_broadcast(&evd_rp->evd_cv);
3606 mutex_exit(&evd_rp->evd_lock);
3607
3608 /*
3609 * only wakeup the CNO if there are no waiters on this evd.
3610 */
3611 if (evd_rp->evd_cno_res != NULL && waiters == 0) {
3612 mutex_enter(&evd_rp->evd_cno_res->cno_lock);
3613 evd_rp->evd_cno_res->cno_evd_cookie = evd_rp->evd_cookie;
3614 cv_broadcast(&evd_rp->evd_cno_res->cno_cv);
3615 mutex_exit(&evd_rp->evd_cno_res->cno_lock);
3616 }
3617 }
3618
3619 /*
3620 * daplka_evd_event_enqueue adds elem to the end of the event list
3621 * The caller is expected to acquire appropriate locks before
3622 * calling enqueue
3623 */
3624 static void
3625 daplka_evd_event_enqueue(daplka_evd_event_list_t *evlist,
3626 daplka_evd_event_t *elem)
3627 {
3628 if (evlist->eel_tail) {
3629 evlist->eel_tail->ee_next = elem;
3630 evlist->eel_tail = elem;
3631 } else {
3632 /* list is empty */
3633 ASSERT(evlist->eel_head == NULL);
3634 evlist->eel_head = elem;
3635 evlist->eel_tail = elem;
3636 }
3637 evlist->eel_num_elements++;
3638 }
3639
3640 /*
3641 * daplka_evd_event_dequeue removes and returns the first element of event
3642 * list. NULL is returned if the list is empty. The caller is expected to
3643 * acquire appropriate locks before calling enqueue.
3644 */
3645 static daplka_evd_event_t *
3646 daplka_evd_event_dequeue(daplka_evd_event_list_t *evlist)
3647 {
3648 daplka_evd_event_t *head;
3649
3650 head = evlist->eel_head;
3651 if (head == NULL) {
3652 return (NULL);
3653 }
3654
3655 evlist->eel_head = head->ee_next;
3656 evlist->eel_num_elements--;
3657 /* if it was the last element update the tail pointer too */
3658 if (evlist->eel_head == NULL) {
3659 ASSERT(evlist->eel_num_elements == 0);
3660 evlist->eel_tail = NULL;
3661 }
3662 return (head);
3663 }
3664
3665 /*
3666 * A CNO allows the client to wait for notifications from multiple EVDs.
3667 * To use a CNO, the client needs to follow the procedure below:
3668 * 1. allocate a CNO. this returns a cno_hkey that identifies the CNO.
3669 * 2. create one or more EVDs using the returned cno_hkey.
3670 * 3. call cno_wait. when one of the associated EVDs get notified, the
3671 * CNO will also get notified. cno_wait will then return with a
3672 * evd_cookie identifying the EVD that triggered the event.
3673 *
3674 * A note about cno_wait:
3675 * -unlike a EVD, a CNO does not maintain a queue of notifications. For
3676 * example, suppose multiple EVDs triggered a CNO before the client calls
3677 * cno_wait; when the client calls cno_wait, it will return with the
3678 * evd_cookie that identifies the *last* EVD that triggered the CNO. It
3679 * is the responsibility of the client, upon returning from cno_wait, to
3680 * check on all EVDs that can potentially trigger the CNO. the returned
3681 * evd_cookie is only meant to be a hint. there is no guarantee that the
3682 * EVD identified by the evd_cookie still contains an event or still
3683 * exists by the time cno_wait returns.
3684 */
3685
3686 /*
3687 * allocates a CNO.
3688 * the returned cno_hkey may subsequently be used in evd_create.
3689 */
3690 /* ARGSUSED */
3691 static int
3692 daplka_cno_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3693 cred_t *cred, int *rvalp)
3694 {
3695 dapl_cno_alloc_t args;
3696 daplka_cno_resource_t *cno_rp = NULL;
3697 uint64_t cno_hkey = 0;
3698 boolean_t inserted = B_FALSE;
3699 int retval = 0;
3700
3701 cno_rp = kmem_zalloc(sizeof (*cno_rp), daplka_km_flags);
3702 if (cno_rp == NULL) {
3703 DERR("cno_alloc: cannot allocate cno resource\n");
3704 return (ENOMEM);
3705 }
3706 DAPLKA_RS_INIT(cno_rp, DAPL_TYPE_CNO,
3707 DAPLKA_RS_RNUM(ia_rp), daplka_cno_destroy);
3708
3709 mutex_init(&cno_rp->cno_lock, NULL, MUTEX_DRIVER, NULL);
3710 cv_init(&cno_rp->cno_cv, NULL, CV_DRIVER, NULL);
3711 cno_rp->cno_evd_cookie = 0;
3712
3713 /* insert into cno hash table */
3714 retval = daplka_hash_insert(&ia_rp->ia_cno_htbl,
3715 &cno_hkey, (void *)cno_rp);
3716 if (retval != 0) {
3717 DERR("cno_alloc: cannot insert cno resource\n");
3718 goto cleanup;
3719 }
3720 inserted = B_TRUE;
3721
3722 /* return hkey to library */
3723 args.cno_hkey = cno_hkey;
3724
3725 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cno_alloc_t),
3726 mode);
3727 if (retval != 0) {
3728 DERR("cno_alloc: copyout error %d\n", retval);
3729 retval = EFAULT;
3730 goto cleanup;
3731 }
3732 return (0);
3733
3734 cleanup:;
3735 if (inserted) {
3736 daplka_cno_resource_t *free_rp = NULL;
3737
3738 (void) daplka_hash_remove(&ia_rp->ia_cno_htbl, cno_hkey,
3739 (void **)&free_rp);
3740 if (free_rp != cno_rp) {
3741 DERR("cno_alloc: cannot remove cno\n");
3742 /*
3743 * we can only get here if another thread
3744 * has completed the cleanup in cno_free
3745 */
3746 return (retval);
3747 }
3748 }
3749 DAPLKA_RS_UNREF(cno_rp);
3750 return (retval);
3751 }
3752
3753 /*
3754 * destroys a CNO.
3755 * this gets called when a CNO resource's refcnt drops to zero.
3756 */
3757 static int
3758 daplka_cno_destroy(daplka_resource_t *gen_rp)
3759 {
3760 daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)gen_rp;
3761
3762 ASSERT(DAPLKA_RS_REFCNT(cno_rp) == 0);
3763 D2("cno_destroy: entering, cno_rp %p, rnum %d\n",
3764 cno_rp, DAPLKA_RS_RNUM(cno_rp));
3765
3766 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3767 cv_destroy(&cno_rp->cno_cv);
3768 mutex_destroy(&cno_rp->cno_lock);
3769
3770 DAPLKA_RS_FINI(cno_rp);
3771 kmem_free(cno_rp, sizeof (daplka_cno_resource_t));
3772 D2("cno_destroy: exiting, cno_rp %p\n", cno_rp);
3773 return (0);
3774 }
3775
3776 static void
3777 daplka_hash_cno_free(void *obj)
3778 {
3779 daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)obj;
3780
3781 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3782 DAPLKA_RS_UNREF(cno_rp);
3783 }
3784
3785 /*
3786 * removes the CNO from the cno hash table and frees the CNO
3787 * if there are no references to it. if there are references to
3788 * it, the CNO will be destroyed when the last of the references
3789 * is released. once the CNO is removed from the cno hash table,
3790 * the client will no longer be able to call cno_wait on the CNO.
3791 */
3792 /* ARGSUSED */
3793 static int
3794 daplka_cno_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3795 cred_t *cred, int *rvalp)
3796 {
3797 daplka_cno_resource_t *cno_rp = NULL;
3798 dapl_cno_free_t args;
3799 int retval = 0;
3800
3801 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_free_t), mode);
3802 if (retval != 0) {
3803 DERR("cno_free: copyin error %d\n", retval);
3804 return (EINVAL);
3805 }
3806
3807 retval = daplka_hash_remove(&ia_rp->ia_cno_htbl,
3808 args.cnf_hkey, (void **)&cno_rp);
3809 if (retval != 0 || cno_rp == NULL) {
3810 DERR("cno_free: cannot find cno resource\n");
3811 return (EINVAL);
3812 }
3813 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3814
3815 /* UNREF calls the actual free function when refcnt is zero */
3816 DAPLKA_RS_UNREF(cno_rp);
3817 return (0);
3818 }
3819
3820 /*
3821 * wait for a notification from one of the associated EVDs.
3822 */
3823 /* ARGSUSED */
3824 static int
3825 daplka_cno_wait(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3826 cred_t *cred, int *rvalp)
3827 {
3828 daplka_cno_resource_t *cno_rp = NULL;
3829 dapl_cno_wait_t args;
3830 int retval = 0;
3831 uint64_t evd_cookie = 0;
3832 clock_t timeout, curr_time;
3833
3834 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_wait_t), mode);
3835 if (retval != 0) {
3836 DERR("cno_wait: copyin error %d\n", retval);
3837 return (EINVAL);
3838 }
3839 /* get cno resource */
3840 cno_rp = (daplka_cno_resource_t *)
3841 daplka_hash_lookup(&ia_rp->ia_cno_htbl, args.cnw_hkey);
3842 if (cno_rp == NULL) {
3843 DERR("cno_wait: cannot find cno resource\n");
3844 return (EINVAL);
3845 }
3846 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3847
3848 curr_time = ddi_get_lbolt();
3849 timeout = curr_time + drv_usectohz(args.cnw_timeout);
3850
3851 /*
3852 * use the max value if we wrapped around
3853 */
3854 if (args.cnw_timeout > 0 && timeout <= curr_time) {
3855 /*
3856 * clock_t (size long) changes between 32 and 64-bit kernels
3857 */
3858 timeout = LONG_MAX >> 4;
3859 }
3860 mutex_enter(&cno_rp->cno_lock);
3861 while (cno_rp->cno_evd_cookie == 0) {
3862 int rval = 0;
3863
3864 rval = cv_timedwait_sig(&cno_rp->cno_cv,
3865 &cno_rp->cno_lock, timeout);
3866 if (rval == 0) {
3867 DERR("cno_wait: interrupted\n");
3868 mutex_exit(&cno_rp->cno_lock);
3869 retval = EINTR;
3870 goto cleanup;
3871 } else if (rval == -1) {
3872 DERR("cno_wait: timed out\n");
3873 mutex_exit(&cno_rp->cno_lock);
3874 retval = ETIME;
3875 goto cleanup;
3876 }
3877 }
3878 evd_cookie = cno_rp->cno_evd_cookie;
3879 cno_rp->cno_evd_cookie = 0;
3880 mutex_exit(&cno_rp->cno_lock);
3881
3882 ASSERT(evd_cookie != 0);
3883 D2("cno_wait: returning evd_cookie 0x%p\n",
3884 (void *)(uintptr_t)evd_cookie);
3885 args.cnw_evd_cookie = evd_cookie;
3886 retval = ddi_copyout((void *)&args, (void *)arg,
3887 sizeof (dapl_cno_wait_t), mode);
3888 if (retval != 0) {
3889 DERR("cno_wait: copyout error %d\n", retval);
3890 retval = EFAULT;
3891 goto cleanup;
3892 }
3893
3894 cleanup:;
3895 if (cno_rp != NULL) {
3896 DAPLKA_RS_UNREF(cno_rp);
3897 }
3898 return (retval);
3899 }
3900
3901 /*
3902 * this function is called by the client when it decides to
3903 * accept a connection request. a connection request is generated
3904 * when the active side generates REQ MAD to a service point on
3905 * the destination node. this causes the CM service handler
3906 * (daplka_cm_service_req) on the passive side to be callee. This
3907 * handler will then enqueue this connection request to the backlog
3908 * array of the service point. A connection event containing the
3909 * backlog array index and connection request private data is passed
3910 * to the client's service point EVD (sp_evd_res). once the event
3911 * is passed up to the userland, the client may examine the request
3912 * to decide whether to call daplka_cr_accept or dapka_cr_reject.
3913 */
3914 /* ARGSUSED */
3915 static int
3916 daplka_cr_accept(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3917 cred_t *cred, int *rvalp)
3918 {
3919 daplka_ep_resource_t *ep_rp = NULL;
3920 daplka_sp_resource_t *sp_rp = NULL;
3921 dapl_cr_accept_t args;
3922 daplka_sp_conn_pend_t *conn;
3923 ibt_cm_proceed_reply_t proc_reply;
3924 ibt_status_t status;
3925 uint16_t bkl_index;
3926 uint32_t old_state, new_state;
3927 int retval = 0;
3928 void *priv_data = NULL, *sid;
3929
3930 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_accept_t),
3931 mode);
3932 if (retval != 0) {
3933 DERR("cr_accept: copyin error %d\n", retval);
3934 return (EFAULT);
3935 }
3936 if (args.cra_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
3937 DERR("cr_accept: private data len (%d) exceeded "
3938 "max size %d\n", args.cra_priv_sz,
3939 DAPL_MAX_PRIVATE_DATA_SIZE);
3940 return (EINVAL);
3941 }
3942 priv_data = (args.cra_priv_sz > 0) ? (void *)args.cra_priv : NULL;
3943
3944 D2("cr_accept: priv(0x%p) priv_len(%u) psep(0x%llx)\n", priv_data,
3945 args.cra_priv_sz, (longlong_t)args.cra_bkl_cookie);
3946
3947 /* get sp resource */
3948 sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
3949 args.cra_sp_hkey);
3950 if (sp_rp == NULL) {
3951 DERR("cr_accept: cannot find sp resource\n");
3952 return (EINVAL);
3953 }
3954 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
3955
3956 /* get ep resource */
3957 ep_rp = (daplka_ep_resource_t *)daplka_hash_lookup(&ia_rp->ia_ep_htbl,
3958 args.cra_ep_hkey);
3959 if (ep_rp == NULL) {
3960 DERR("cr_accept: cannot find ep resource\n");
3961 retval = EINVAL;
3962 goto cleanup;
3963 }
3964 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
3965
3966 /*
3967 * accept is only allowed if ep_state is CLOSED.
3968 * note that after this point, the ep_state is frozen
3969 * (i.e. TRANSITIONING) until we transition ep_state
3970 * to ACCEPTING or back to CLOSED if we get an error.
3971 */
3972 new_state = old_state = daplka_ep_get_state(ep_rp);
3973 if (old_state != DAPLKA_EP_STATE_CLOSED) {
3974 DERR("cr_accept: invalid ep state %d\n", old_state);
3975 retval = EINVAL;
3976 goto cleanup;
3977 }
3978
3979 mutex_enter(&sp_rp->sp_lock);
3980 bkl_index = DAPLKA_GET_PSEP_INDEX(args.cra_bkl_cookie);
3981 /*
3982 * make sure the backlog index is not bogus.
3983 */
3984 if (bkl_index >= sp_rp->sp_backlog_size) {
3985 DERR("cr_accept: invalid backlog index 0x%llx %d\n",
3986 (longlong_t)args.cra_bkl_cookie, bkl_index);
3987 mutex_exit(&sp_rp->sp_lock);
3988 retval = EINVAL;
3989 goto cleanup;
3990 }
3991 /*
3992 * make sure the backlog index indeed refers
3993 * to a pending connection.
3994 */
3995 conn = &sp_rp->sp_backlog[bkl_index];
3996 if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
3997 DERR("cr_accept: invalid conn state %d\n",
3998 conn->spcp_state);
3999 mutex_exit(&sp_rp->sp_lock);
4000 retval = EINVAL;
4001 goto cleanup;
4002 }
4003 if (conn->spcp_sid == NULL) {
4004 DERR("cr_accept: sid == NULL\n");
4005 mutex_exit(&sp_rp->sp_lock);
4006 retval = EINVAL;
4007 goto cleanup;
4008 }
4009 if (ep_rp->ep_chan_hdl == NULL) {
4010 /*
4011 * a ep_rp with a NULL chan_hdl is impossible.
4012 */
4013 DERR("cr_accept: ep_chan_hdl == NULL\n");
4014 mutex_exit(&sp_rp->sp_lock);
4015 ASSERT(B_FALSE);
4016 retval = EINVAL;
4017 goto cleanup;
4018 }
4019 proc_reply.rep.cm_channel = ep_rp->ep_chan_hdl;
4020 proc_reply.rep.cm_rdma_ra_out = conn->spcp_rdma_ra_out;
4021 proc_reply.rep.cm_rdma_ra_in = conn->spcp_rdma_ra_in;
4022 proc_reply.rep.cm_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;
4023 sid = conn->spcp_sid;
4024
4025 /*
4026 * this clears our slot in the backlog array.
4027 * this slot may now be used by other pending connections.
4028 */
4029 conn->spcp_sid = NULL;
4030 conn->spcp_state = DAPLKA_SPCP_INIT;
4031 conn->spcp_req_len = 0;
4032 mutex_exit(&sp_rp->sp_lock);
4033
4034 /*
4035 * Set the unique cookie corresponding to the CR to this EP
4036 * so that is can be used in passive side CM callbacks
4037 */
4038 ep_rp->ep_psep_cookie = args.cra_bkl_cookie;
4039
4040 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, IBT_CM_ACCEPT,
4041 &proc_reply, priv_data, (ibt_priv_data_len_t)args.cra_priv_sz);
4042
4043 if (status != IBT_SUCCESS) {
4044 DERR("cr_accept: ibt_cm_proceed returned %d\n", status);
4045 *rvalp = (int)status;
4046 retval = 0;
4047 }
4048 /*
4049 * note that the CM handler may actually be called at this
4050 * point. but since ep_state is still in TRANSITIONING, the
4051 * handler will wait until we transition to ACCEPTING. this
4052 * prevents the case where we set ep_state to ACCEPTING after
4053 * daplka_service_conn_est sets ep_state to CONNECTED.
4054 */
4055 new_state = DAPLKA_EP_STATE_ACCEPTING;
4056
4057 cleanup:;
4058 if (sp_rp != NULL) {
4059 DAPLKA_RS_UNREF(sp_rp);
4060 }
4061 if (ep_rp != NULL) {
4062 daplka_ep_set_state(ep_rp, old_state, new_state);
4063 DAPLKA_RS_UNREF(ep_rp);
4064 }
4065 return (retval);
4066 }
4067
4068 /*
4069 * this function is called by the client to reject a
4070 * connection request.
4071 */
4072 /* ARGSUSED */
4073 static int
4074 daplka_cr_reject(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4075 cred_t *cred, int *rvalp)
4076 {
4077 dapl_cr_reject_t args;
4078 daplka_sp_resource_t *sp_rp = NULL;
4079 daplka_sp_conn_pend_t *conn;
4080 ibt_cm_proceed_reply_t proc_reply;
4081 ibt_cm_status_t proc_status;
4082 ibt_status_t status;
4083 uint16_t bkl_index;
4084 int retval = 0;
4085 void *sid;
4086
4087 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_reject_t),
4088 mode);
4089 if (retval != 0) {
4090 DERR("cr_reject: copyin error %d\n", retval);
4091 return (EFAULT);
4092 }
4093 /* get sp resource */
4094 sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
4095 args.crr_sp_hkey);
4096 if (sp_rp == NULL) {
4097 DERR("cr_reject: cannot find sp resource\n");
4098 return (EINVAL);
4099 }
4100 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4101
4102 D2("cr_reject: psep(0x%llx)\n", (longlong_t)args.crr_bkl_cookie);
4103
4104 mutex_enter(&sp_rp->sp_lock);
4105 bkl_index = DAPLKA_GET_PSEP_INDEX(args.crr_bkl_cookie);
4106 /*
4107 * make sure the backlog index is not bogus.
4108 */
4109 if (bkl_index >= sp_rp->sp_backlog_size) {
4110 DERR("cr_reject: invalid backlog index 0x%llx %d\n",
4111 (longlong_t)args.crr_bkl_cookie, bkl_index);
4112 mutex_exit(&sp_rp->sp_lock);
4113 retval = EINVAL;
4114 goto cleanup;
4115 }
4116 /*
4117 * make sure the backlog index indeed refers
4118 * to a pending connection.
4119 */
4120 conn = &sp_rp->sp_backlog[bkl_index];
4121 if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4122 DERR("cr_reject: invalid conn state %d\n",
4123 conn->spcp_state);
4124 mutex_exit(&sp_rp->sp_lock);
4125 retval = EINVAL;
4126 goto cleanup;
4127 }
4128 if (conn->spcp_sid == NULL) {
4129 DERR("cr_reject: sid == NULL\n");
4130 mutex_exit(&sp_rp->sp_lock);
4131 retval = EINVAL;
4132 goto cleanup;
4133 }
4134 bzero(&proc_reply, sizeof (proc_reply));
4135 sid = conn->spcp_sid;
4136
4137 /*
4138 * this clears our slot in the backlog array.
4139 * this slot may now be used by other pending connections.
4140 */
4141 conn->spcp_sid = NULL;
4142 conn->spcp_state = DAPLKA_SPCP_INIT;
4143 conn->spcp_req_len = 0;
4144
4145 switch (args.crr_reason) {
4146 case DAPL_IB_CM_REJ_REASON_CONSUMER_REJ:
4147 /* results in IBT_CM_CONSUMER as the reason for reject */
4148 proc_status = IBT_CM_REJECT;
4149 break;
4150 case DAPL_IB_CME_LOCAL_FAILURE:
4151 /*FALLTHRU*/
4152 case DAPL_IB_CME_DESTINATION_UNREACHABLE:
4153 /* results in IBT_CM_NO_RESC as the reason for reject */
4154 proc_status = IBT_CM_NO_RESOURCE;
4155 break;
4156 default:
4157 /* unexpect reason code */
4158 ASSERT(!"unexpected reject reason code");
4159 proc_status = IBT_CM_NO_RESOURCE;
4160 break;
4161 }
4162
4163 mutex_exit(&sp_rp->sp_lock);
4164
4165 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, proc_status,
4166 &proc_reply, NULL, 0);
4167
4168 if (status != IBT_SUCCESS) {
4169 DERR("cr_reject: ibt_cm_proceed returned %d\n", status);
4170 *rvalp = (int)status;
4171 retval = 0;
4172 }
4173
4174 cleanup:;
4175 if (sp_rp != NULL) {
4176 DAPLKA_RS_UNREF(sp_rp);
4177 }
4178 return (retval);
4179 }
4180
4181
4182 /*
4183 * daplka_sp_match is used by daplka_hash_walk for finding SPs
4184 */
4185 typedef struct daplka_sp_match_s {
4186 uint64_t spm_conn_qual;
4187 daplka_sp_resource_t *spm_sp_rp;
4188 } daplka_sp_match_t;
4189
4190 static int
4191 daplka_sp_match(void *objp, void *arg)
4192 {
4193 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)objp;
4194
4195 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4196 if (sp_rp->sp_conn_qual ==
4197 ((daplka_sp_match_t *)arg)->spm_conn_qual) {
4198 ((daplka_sp_match_t *)arg)->spm_sp_rp = sp_rp;
4199 D2("daplka_sp_match: found sp, conn_qual %016llu\n",
4200 (longlong_t)((daplka_sp_match_t *)arg)->spm_conn_qual);
4201 DAPLKA_RS_REF(sp_rp);
4202 return (1);
4203 }
4204 return (0);
4205 }
4206
4207 /*
4208 * cr_handoff allows the client to handoff a connection request from
4209 * one service point to another.
4210 */
4211 /* ARGSUSED */
4212 static int
4213 daplka_cr_handoff(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4214 cred_t *cred, int *rvalp)
4215 {
4216 dapl_cr_handoff_t args;
4217 daplka_sp_resource_t *sp_rp = NULL, *new_sp_rp = NULL;
4218 daplka_sp_conn_pend_t *conn;
4219 daplka_sp_match_t sp_match;
4220 ibt_cm_event_t fake_event;
4221 ibt_cm_status_t cm_status;
4222 ibt_status_t status;
4223 uint16_t bkl_index;
4224 void *sid, *priv = NULL;
4225 int retval = 0, priv_len = 0;
4226
4227 D3("cr_handoff: entering\n");
4228 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_handoff_t),
4229 mode);
4230 if (retval != 0) {
4231 DERR("cr_handoff: copyin error %d\n", retval);
4232 return (EFAULT);
4233 }
4234 /* get sp resource */
4235 sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
4236 args.crh_sp_hkey);
4237 if (sp_rp == NULL) {
4238 DERR("cr_handoff: cannot find sp resource\n");
4239 return (EINVAL);
4240 }
4241 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4242
4243 /*
4244 * find the destination service point.
4245 */
4246 sp_match.spm_conn_qual = args.crh_conn_qual;
4247 sp_match.spm_sp_rp = NULL;
4248 daplka_hash_walk(&daplka_global_sp_htbl, daplka_sp_match,
4249 (void *)&sp_match, RW_READER);
4250
4251 /*
4252 * return if we cannot find the service point
4253 */
4254 if (sp_match.spm_sp_rp == NULL) {
4255 DERR("cr_handoff: new sp not found, conn qual = %llu\n",
4256 (longlong_t)args.crh_conn_qual);
4257 retval = EINVAL;
4258 goto cleanup;
4259 }
4260 new_sp_rp = sp_match.spm_sp_rp;
4261
4262 /*
4263 * the spec does not discuss the security implications of this
4264 * function. to be safe, we currently only allow processes
4265 * owned by the same user to handoff connection requests
4266 * to each other.
4267 */
4268 if (crgetruid(cred) != new_sp_rp->sp_ruid) {
4269 DERR("cr_handoff: permission denied\n");
4270 retval = EPERM;
4271 goto cleanup;
4272 }
4273
4274 D2("cr_handoff: psep(0x%llx)\n", (longlong_t)args.crh_bkl_cookie);
4275
4276 mutex_enter(&sp_rp->sp_lock);
4277 bkl_index = DAPLKA_GET_PSEP_INDEX(args.crh_bkl_cookie);
4278 /*
4279 * make sure the backlog index is not bogus.
4280 */
4281 if (bkl_index >= sp_rp->sp_backlog_size) {
4282 DERR("cr_handoff: invalid backlog index 0x%llx %d\n",
4283 (longlong_t)args.crh_bkl_cookie, bkl_index);
4284 mutex_exit(&sp_rp->sp_lock);
4285 retval = EINVAL;
4286 goto cleanup;
4287 }
4288 /*
4289 * make sure the backlog index indeed refers
4290 * to a pending connection.
4291 */
4292 conn = &sp_rp->sp_backlog[bkl_index];
4293 if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4294 DERR("cr_handoff: invalid conn state %d\n",
4295 conn->spcp_state);
4296 mutex_exit(&sp_rp->sp_lock);
4297 retval = EINVAL;
4298 goto cleanup;
4299 }
4300 if (conn->spcp_sid == NULL) {
4301 DERR("cr_handoff: sid == NULL\n");
4302 mutex_exit(&sp_rp->sp_lock);
4303 retval = EINVAL;
4304 goto cleanup;
4305 }
4306 sid = conn->spcp_sid;
4307 priv = NULL;
4308 priv_len = conn->spcp_req_len;
4309 if (priv_len > 0) {
4310 priv = kmem_zalloc(priv_len, daplka_km_flags);
4311 if (priv == NULL) {
4312 mutex_exit(&sp_rp->sp_lock);
4313 retval = ENOMEM;
4314 goto cleanup;
4315 }
4316 bcopy(conn->spcp_req_data, priv, priv_len);
4317 }
4318 /*
4319 * this clears our slot in the backlog array.
4320 * this slot may now be used by other pending connections.
4321 */
4322 conn->spcp_sid = NULL;
4323 conn->spcp_state = DAPLKA_SPCP_INIT;
4324 conn->spcp_req_len = 0;
4325 mutex_exit(&sp_rp->sp_lock);
4326
4327 /* fill fake_event and call service_req handler */
4328 bzero(&fake_event, sizeof (fake_event));
4329 fake_event.cm_type = IBT_CM_EVENT_REQ_RCV;
4330 fake_event.cm_session_id = sid;
4331 fake_event.cm_priv_data_len = priv_len;
4332 fake_event.cm_priv_data = priv;
4333
4334 cm_status = daplka_cm_service_req(new_sp_rp,
4335 &fake_event, NULL, priv, (ibt_priv_data_len_t)priv_len);
4336 if (cm_status != IBT_CM_DEFER) {
4337 ibt_cm_proceed_reply_t proc_reply;
4338
4339 DERR("cr_handoff: service_req returned %d\n", cm_status);
4340 /*
4341 * if for some reason cm_service_req failed, we
4342 * reject the connection.
4343 */
4344 bzero(&proc_reply, sizeof (proc_reply));
4345
4346 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid,
4347 IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
4348 if (status != IBT_SUCCESS) {
4349 DERR("cr_handoff: ibt_cm_proceed returned %d\n",
4350 status);
4351 }
4352 *rvalp = (int)status;
4353 retval = 0;
4354 }
4355
4356 cleanup:;
4357 if (priv_len > 0 && priv != NULL) {
4358 kmem_free(priv, priv_len);
4359 }
4360 if (new_sp_rp != NULL) {
4361 DAPLKA_RS_UNREF(new_sp_rp);
4362 }
4363 if (sp_rp != NULL) {
4364 DAPLKA_RS_UNREF(sp_rp);
4365 }
4366 D3("cr_handoff: exiting\n");
4367 return (retval);
4368 }
4369
4370 /*
4371 * returns a list of hca attributes
4372 */
4373 /* ARGSUSED */
4374 static int
4375 daplka_ia_query(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4376 cred_t *cred, int *rvalp)
4377 {
4378 dapl_ia_query_t args;
4379 int retval;
4380 ibt_hca_attr_t *hcap;
4381
4382 hcap = &ia_rp->ia_hca->hca_attr;
4383
4384 /*
4385 * Take the ibt_hca_attr_t and stuff them into dapl_hca_attr_t
4386 */
4387 args.hca_attr.dhca_vendor_id = hcap->hca_vendor_id;
4388 args.hca_attr.dhca_device_id = hcap->hca_device_id;
4389 args.hca_attr.dhca_version_id = hcap->hca_version_id;
4390 args.hca_attr.dhca_max_chans = hcap->hca_max_chans;
4391 args.hca_attr.dhca_max_chan_sz = hcap->hca_max_chan_sz;
4392 args.hca_attr.dhca_max_sgl = hcap->hca_max_sgl;
4393 args.hca_attr.dhca_max_cq = hcap->hca_max_cq;
4394 args.hca_attr.dhca_max_cq_sz = hcap->hca_max_cq_sz;
4395 args.hca_attr.dhca_max_memr = hcap->hca_max_memr;
4396 args.hca_attr.dhca_max_memr_len = hcap->hca_max_memr_len;
4397 args.hca_attr.dhca_max_mem_win = hcap->hca_max_mem_win;
4398 args.hca_attr.dhca_max_rdma_in_chan = hcap->hca_max_rdma_in_chan;
4399 args.hca_attr.dhca_max_rdma_out_chan = hcap->hca_max_rdma_out_chan;
4400 args.hca_attr.dhca_max_partitions = hcap->hca_max_partitions;
4401 args.hca_attr.dhca_nports = hcap->hca_nports;
4402 args.hca_attr.dhca_node_guid = hcap->hca_node_guid;
4403 args.hca_attr.dhca_max_pd = hcap->hca_max_pd;
4404 args.hca_attr.dhca_max_srqs = hcap->hca_max_srqs;
4405 args.hca_attr.dhca_max_srqs_sz = hcap->hca_max_srqs_sz;
4406 args.hca_attr.dhca_max_srq_sgl = hcap->hca_max_srq_sgl;
4407
4408 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ia_query_t),
4409 mode);
4410 if (retval != 0) {
4411 DERR("ia_query: copyout error %d\n", retval);
4412 return (EFAULT);
4413 }
4414 return (0);
4415 }
4416
4417 /*
4418 * This routine is passed to hash walk in the daplka_pre_mr_cleanup_callback,
4419 * it frees the mw embedded in the mw resource object.
4420 */
4421
4422 /* ARGSUSED */
4423 static int
4424 daplka_mr_cb_freemw(void *objp, void *arg)
4425 {
4426 daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)objp;
4427 ibt_mw_hdl_t mw_hdl;
4428 ibt_status_t status;
4429
4430 D3("mr_cb_freemw: entering, mw_rp 0x%p\n", mw_rp);
4431 DAPLKA_RS_REF(mw_rp);
4432
4433 mutex_enter(&mw_rp->mw_lock);
4434 mw_hdl = mw_rp->mw_hdl;
4435 /*
4436 * we set mw_hdl to NULL so it won't get freed again
4437 */
4438 mw_rp->mw_hdl = NULL;
4439 mutex_exit(&mw_rp->mw_lock);
4440
4441 if (mw_hdl != NULL) {
4442 status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl, mw_hdl);
4443 if (status != IBT_SUCCESS) {
4444 DERR("mr_cb_freemw: ibt_free_mw returned %d\n", status);
4445 }
4446 D3("mr_cb_freemw: mw freed\n");
4447 }
4448
4449 DAPLKA_RS_UNREF(mw_rp);
4450 return (0);
4451 }
4452
4453 /*
4454 * This routine is called from HCA driver's umem lock undo callback
4455 * when the memory associated with an MR is being unmapped. In this callback
4456 * we free all the MW associated with the IA and post an unaffiliated
4457 * async event to tell the app that there was a catastrophic event.
4458 * This allows the HCA to deregister the MR in its callback processing.
4459 */
4460 static void
4461 daplka_pre_mr_cleanup_callback(void *arg1, void *arg2 /*ARGSUSED*/)
4462 {
4463 daplka_mr_resource_t *mr_rp;
4464 daplka_ia_resource_t *ia_rp;
4465 #ifdef _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4466 ibt_async_event_t event;
4467 ibt_hca_attr_t *hca_attrp;
4468 #endif
4469 minor_t rnum;
4470
4471 mr_rp = (daplka_mr_resource_t *)arg1;
4472 rnum = DAPLKA_RS_RNUM(mr_rp);
4473 daplka_shared_mr_free(mr_rp);
4474
4475 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
4476 if (ia_rp == NULL) {
4477 DERR("daplka_mr_unlock_callback: resource not found, rnum %d\n",
4478 rnum);
4479 return;
4480 }
4481
4482 DERR("daplka_mr_unlock_callback: resource(%p) rnum(%d)\n", ia_rp, rnum);
4483
4484 mutex_enter(&ia_rp->ia_lock);
4485 /*
4486 * MW is being alloced OR MW freeze has already begun. In
4487 * both these cases we wait for that to complete before
4488 * continuing.
4489 */
4490 while ((ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS) ||
4491 (ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS)) {
4492 cv_wait(&ia_rp->ia_cv, &ia_rp->ia_lock);
4493 }
4494
4495 switch (ia_rp->ia_state) {
4496 case DAPLKA_IA_INIT:
4497 ia_rp->ia_state = DAPLKA_IA_MW_FREEZE_IN_PROGRESS;
4498 mutex_exit(&ia_rp->ia_lock);
4499 break;
4500 case DAPLKA_IA_MW_FROZEN:
4501 /* the mw on this ia have been freed */
4502 D2("daplka_mr_unlock_callback: ia_state %d nothing to do\n",
4503 ia_rp->ia_state);
4504 mutex_exit(&ia_rp->ia_lock);
4505 goto cleanup;
4506 default:
4507 ASSERT(!"daplka_mr_unlock_callback: IA state invalid");
4508 DERR("daplka_mr_unlock_callback: invalid ia_state %d\n",
4509 ia_rp->ia_state);
4510 mutex_exit(&ia_rp->ia_lock);
4511 goto cleanup;
4512 }
4513
4514 /*
4515 * Walk the mw hash table and free the mws. Acquire a writer
4516 * lock since we don't want anyone else traversing this tree
4517 * while we are freeing the MW.
4518 */
4519 daplka_hash_walk(&ia_rp->ia_mw_htbl, daplka_mr_cb_freemw, NULL,
4520 RW_WRITER);
4521
4522 mutex_enter(&ia_rp->ia_lock);
4523 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS);
4524 ia_rp->ia_state = DAPLKA_IA_MW_FROZEN;
4525 cv_broadcast(&ia_rp->ia_cv);
4526 mutex_exit(&ia_rp->ia_lock);
4527
4528 /*
4529 * Currently commented out because Oracle skgxp is incapable
4530 * of handling async events correctly.
4531 */
4532 #ifdef _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4533 /*
4534 * Enqueue an unaffiliated async error event to indicate this
4535 * IA has encountered a problem that caused the MW to freed up
4536 */
4537
4538 /* Create a fake event, only relevant field is the hca_guid */
4539 bzero(&event, sizeof (ibt_async_event_t));
4540 hca_attrp = &ia_rp->ia_hca->hca_attr;
4541 event.ev_hca_guid = hca_attrp->hca_node_guid;
4542
4543 daplka_async_event_create(IBT_ERROR_LOCAL_CATASTROPHIC, &event, 0,
4544 ia_rp);
4545 #endif /* _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB */
4546
4547 cleanup:;
4548 D2("daplka_mr_unlock_callback: resource(%p) done\n", ia_rp);
4549 DAPLKA_RS_UNREF(ia_rp);
4550 }
4551
4552 /*
4553 * registers a memory region.
4554 * memory locking will be done by the HCA driver.
4555 */
4556 /* ARGSUSED */
4557 static int
4558 daplka_mr_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4559 cred_t *cred, int *rvalp)
4560 {
4561 boolean_t inserted = B_FALSE;
4562 daplka_mr_resource_t *mr_rp;
4563 daplka_pd_resource_t *pd_rp;
4564 dapl_mr_register_t args;
4565 ibt_mr_data_in_t mr_cb_data_in;
4566 uint64_t mr_hkey = 0;
4567 ibt_status_t status;
4568 int retval;
4569
4570 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_register_t),
4571 mode);
4572 if (retval != 0) {
4573 DERR("mr_register: copyin error %d\n", retval);
4574 return (EINVAL);
4575 }
4576 mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
4577 if (mr_rp == NULL) {
4578 DERR("mr_register: cannot allocate mr resource\n");
4579 return (ENOMEM);
4580 }
4581 DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
4582 DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
4583
4584 mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
4585 mr_rp->mr_hca = ia_rp->ia_hca;
4586 mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
4587 mr_rp->mr_next = NULL;
4588 mr_rp->mr_shared_mr = NULL;
4589
4590 /* get pd handle */
4591 pd_rp = (daplka_pd_resource_t *)
4592 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mr_pd_hkey);
4593 if (pd_rp == NULL) {
4594 DERR("mr_register: cannot find pd resource\n");
4595 retval = EINVAL;
4596 goto cleanup;
4597 }
4598 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
4599 mr_rp->mr_pd_res = pd_rp;
4600
4601 mr_rp->mr_attr.mr_vaddr = args.mr_vaddr;
4602 mr_rp->mr_attr.mr_len = args.mr_len;
4603 mr_rp->mr_attr.mr_as = curproc->p_as;
4604 mr_rp->mr_attr.mr_flags = args.mr_flags | IBT_MR_NOSLEEP;
4605
4606 D3("mr_register: mr_vaddr %p, mr_len %llu, mr_flags 0x%x\n",
4607 (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
4608 (longlong_t)mr_rp->mr_attr.mr_len,
4609 mr_rp->mr_attr.mr_flags);
4610
4611 status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
4612 mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr, &mr_rp->mr_hdl,
4613 &mr_rp->mr_desc);
4614
4615 if (status != IBT_SUCCESS) {
4616 DERR("mr_register: ibt_register_mr error %d\n", status);
4617 *rvalp = (int)status;
4618 retval = 0;
4619 goto cleanup;
4620 }
4621
4622 mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
4623 mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
4624 mr_cb_data_in.mr_arg1 = (void *)mr_rp;
4625 mr_cb_data_in.mr_arg2 = NULL;
4626
4627 /* Pass the service driver mr cleanup handler to the hca driver */
4628 status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
4629 IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
4630 &mr_cb_data_in, sizeof (mr_cb_data_in));
4631
4632 if (status != IBT_SUCCESS) {
4633 DERR("mr_register: ibt_ci_data_in error(%d) ver(%d)",
4634 status, mr_cb_data_in.mr_rev);
4635 *rvalp = (int)status;
4636 retval = 0;
4637 goto cleanup;
4638 }
4639
4640 /* insert into mr hash table */
4641 retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
4642 &mr_hkey, (void *)mr_rp);
4643 if (retval != 0) {
4644 DERR("mr_register: cannot insert mr resource into mr_htbl\n");
4645 goto cleanup;
4646 }
4647 inserted = B_TRUE;
4648
4649 args.mr_lkey = mr_rp->mr_desc.md_lkey;
4650 args.mr_rkey = mr_rp->mr_desc.md_rkey;
4651 args.mr_hkey = mr_hkey;
4652
4653 retval = ddi_copyout((void *)&args, (void *)arg,
4654 sizeof (dapl_mr_register_t), mode);
4655 if (retval != 0) {
4656 DERR("mr_register: copyout error %d\n", retval);
4657 retval = EFAULT;
4658 goto cleanup;
4659 }
4660 return (0);
4661
4662 cleanup:;
4663 if (inserted) {
4664 daplka_mr_resource_t *free_rp = NULL;
4665
4666 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
4667 (void **)&free_rp);
4668 if (free_rp != mr_rp) {
4669 DERR("mr_register: cannot remove mr from hash table\n");
4670 /*
4671 * we can only get here if another thread
4672 * has completed the cleanup in mr_deregister
4673 */
4674 return (retval);
4675 }
4676 }
4677 DAPLKA_RS_UNREF(mr_rp);
4678 return (retval);
4679 }
4680
4681 /*
4682 * registers a shared memory region.
4683 * the client calls this function with the intention to share the memory
4684 * region with other clients. it is assumed that, prior to calling this
4685 * function, the client(s) are already sharing parts of their address
4686 * space using a mechanism such as SYSV shared memory. the first client
4687 * that calls this function will create and insert a daplka_shared_mr_t
4688 * object into the global daplka_shared_mr_tree. this shared mr object
4689 * will be identified by a unique 40-byte key and will maintain a list
4690 * of mr resources. every time this function gets called with the same
4691 * 40-byte key, a new mr resource (containing a new mr handle generated
4692 * by ibt_register_mr or ibt_register_shared_mr) is created and inserted
4693 * into this list. similarly, every time a shared mr gets deregistered
4694 * or invalidated by a callback, the mr resource gets removed from this
4695 * list. the shared mr object has a reference count. when it drops to
4696 * zero, the shared mr object will be removed from the global avl tree
4697 * and be freed.
4698 */
4699 /* ARGSUSED */
4700 static int
4701 daplka_mr_register_shared(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4702 cred_t *cred, int *rvalp)
4703 {
4704 dapl_mr_register_shared_t args;
4705 daplka_shared_mr_t *smrp = NULL;
4706 daplka_shared_mr_t tmp_smr;
4707 ibt_mr_data_in_t mr_cb_data_in;
4708 avl_index_t where;
4709 boolean_t inserted = B_FALSE;
4710 daplka_mr_resource_t *mr_rp = NULL;
4711 daplka_pd_resource_t *pd_rp;
4712 uint64_t mr_hkey = 0;
4713 ibt_status_t status;
4714 int retval;
4715
4716 retval = ddi_copyin((void *)arg, &args,
4717 sizeof (dapl_mr_register_shared_t), mode);
4718 if (retval != 0) {
4719 DERR("mr_register_shared: copyin error %d\n", retval);
4720 return (EINVAL);
4721 }
4722
4723 mutex_enter(&daplka_shared_mr_lock);
4724 /*
4725 * find smrp from the global avl tree.
4726 * the 40-byte key is used as the lookup key.
4727 */
4728 tmp_smr.smr_cookie = args.mrs_shm_cookie;
4729 smrp = (daplka_shared_mr_t *)
4730 avl_find(&daplka_shared_mr_tree, &tmp_smr, &where);
4731 if (smrp != NULL) {
4732 D2("mr_register_shared: smrp 0x%p, found cookie:\n"
4733 "0x%016llx%016llx%016llx%016llx%016llx\n", smrp,
4734 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
4735 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
4736 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
4737 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
4738 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);
4739
4740 /*
4741 * if the smrp exists, other threads could still be
4742 * accessing it. we wait until they are done before
4743 * we continue.
4744 */
4745 smrp->smr_refcnt++;
4746 while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
4747 D2("mr_register_shared: smrp 0x%p, "
4748 "waiting in transitioning state, refcnt %d\n",
4749 smrp, smrp->smr_refcnt);
4750 cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
4751 }
4752 ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
4753 D2("mr_register_shared: smrp 0x%p, refcnt %d, ready\n",
4754 smrp, smrp->smr_refcnt);
4755
4756 /*
4757 * we set smr_state to TRANSITIONING to temporarily
4758 * prevent other threads from trying to access smrp.
4759 */
4760 smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
4761 } else {
4762 D2("mr_register_shared: cannot find cookie:\n"
4763 "0x%016llx%016llx%016llx%016llx%016llx\n",
4764 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
4765 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
4766 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
4767 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
4768 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);
4769
4770 /*
4771 * if we cannot find smrp, we need to create and
4772 * insert one into daplka_shared_mr_tree
4773 */
4774 smrp = kmem_zalloc(sizeof (daplka_shared_mr_t),
4775 daplka_km_flags);
4776 if (smrp == NULL) {
4777 retval = ENOMEM;
4778 mutex_exit(&daplka_shared_mr_lock);
4779 goto cleanup;
4780 }
4781 smrp->smr_refcnt = 1;
4782 smrp->smr_cookie = args.mrs_shm_cookie;
4783 smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
4784 smrp->smr_mr_list = NULL;
4785 cv_init(&smrp->smr_cv, NULL, CV_DRIVER, NULL);
4786 avl_insert(&daplka_shared_mr_tree, smrp, where);
4787 }
4788 mutex_exit(&daplka_shared_mr_lock);
4789
4790 mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
4791 if (mr_rp == NULL) {
4792 DERR("mr_register_shared: cannot allocate mr resource\n");
4793 goto cleanup;
4794 }
4795 DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
4796 DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
4797
4798 mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
4799 mr_rp->mr_hca = ia_rp->ia_hca;
4800 mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
4801 mr_rp->mr_next = NULL;
4802 mr_rp->mr_shared_mr = NULL;
4803
4804 /* get pd handle */
4805 pd_rp = (daplka_pd_resource_t *)
4806 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mrs_pd_hkey);
4807 if (pd_rp == NULL) {
4808 DERR("mr_register_shared: cannot find pd resource\n");
4809 retval = EINVAL;
4810 goto cleanup;
4811 }
4812 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
4813 mr_rp->mr_pd_res = pd_rp;
4814
4815 mr_rp->mr_attr.mr_vaddr = args.mrs_vaddr;
4816 mr_rp->mr_attr.mr_len = args.mrs_len;
4817 mr_rp->mr_attr.mr_flags = args.mrs_flags | IBT_MR_NOSLEEP;
4818 mr_rp->mr_attr.mr_as = curproc->p_as;
4819
4820 D2("mr_register_shared: mr_vaddr 0x%p, mr_len %llu, "
4821 "mr_flags 0x%x, mr_as 0x%p, mr_exists %d, smrp 0x%p\n",
4822 (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
4823 (longlong_t)mr_rp->mr_attr.mr_len,
4824 mr_rp->mr_attr.mr_flags, mr_rp->mr_attr.mr_as,
4825 (int)(smrp->smr_mr_list != NULL), smrp);
4826
4827 /*
4828 * since we are in TRANSITIONING state, we are guaranteed
4829 * that we have exclusive access to smr_mr_list.
4830 */
4831 if (smrp->smr_mr_list != NULL) {
4832 ibt_smr_attr_t mem_sattr;
4833
4834 /*
4835 * a non-null smr_mr_list indicates that someone
4836 * else has already inserted an mr_resource into
4837 * smr_mr_list. we use the mr_handle from the first
4838 * element as an arg to ibt_register_shared_mr.
4839 */
4840 mem_sattr.mr_vaddr = smrp->smr_mr_list->mr_desc.md_vaddr;
4841 mem_sattr.mr_flags = mr_rp->mr_attr.mr_flags;
4842
4843 D2("mr_register_shared: mem_sattr vaddr 0x%p flags 0x%x\n",
4844 (void *)(uintptr_t)mem_sattr.mr_vaddr, mem_sattr.mr_flags);
4845 status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
4846 smrp->smr_mr_list->mr_hdl, mr_rp->mr_pd_res->pd_hdl,
4847 &mem_sattr, &mr_rp->mr_hdl, &mr_rp->mr_desc);
4848
4849 if (status != IBT_SUCCESS) {
4850 DERR("mr_register_shared: "
4851 "ibt_register_shared_mr error %d\n", status);
4852 *rvalp = (int)status;
4853 retval = 0;
4854 goto cleanup;
4855 }
4856 } else {
4857 /*
4858 * an mr does not exist yet. we need to create one
4859 * using ibt_register_mr.
4860 */
4861 status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
4862 mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr,
4863 &mr_rp->mr_hdl, &mr_rp->mr_desc);
4864
4865 if (status != IBT_SUCCESS) {
4866 DERR("mr_register_shared: "
4867 "ibt_register_mr error %d\n", status);
4868 *rvalp = (int)status;
4869 retval = 0;
4870 goto cleanup;
4871 }
4872 }
4873
4874 mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
4875 mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
4876 mr_cb_data_in.mr_arg1 = (void *)mr_rp;
4877 mr_cb_data_in.mr_arg2 = NULL;
4878
4879 /* Pass the service driver mr cleanup handler to the hca driver */
4880 status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
4881 IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
4882 &mr_cb_data_in, sizeof (mr_cb_data_in));
4883
4884 if (status != IBT_SUCCESS) {
4885 DERR("mr_register_shared: ibt_ci_data_in error(%d) ver(%d)",
4886 status, mr_cb_data_in.mr_rev);
4887 *rvalp = (int)status;
4888 retval = 0;
4889 goto cleanup;
4890 }
4891
4892 /*
4893 * we bump reference of mr_rp and enqueue it onto smrp.
4894 */
4895 DAPLKA_RS_REF(mr_rp);
4896 mr_rp->mr_next = smrp->smr_mr_list;
4897 smrp->smr_mr_list = mr_rp;
4898 mr_rp->mr_shared_mr = smrp;
4899
4900 /* insert into mr hash table */
4901 retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
4902 &mr_hkey, (void *)mr_rp);
4903 if (retval != 0) {
4904 DERR("mr_register_shared: cannot insert mr resource\n");
4905 goto cleanup;
4906 }
4907 inserted = B_TRUE;
4908
4909 /*
4910 * at this point, there are two references to our mr resource.
4911 * one is kept in ia_mr_htbl. the other is kept in the list
4912 * within this shared mr object (smrp). when we deregister this
4913 * mr or when a callback invalidates this mr, the reference kept
4914 * by this shared mr object will be removed.
4915 */
4916
4917 args.mrs_lkey = mr_rp->mr_desc.md_lkey;
4918 args.mrs_rkey = mr_rp->mr_desc.md_rkey;
4919 args.mrs_hkey = mr_hkey;
4920
4921 retval = ddi_copyout((void *)&args, (void *)arg,
4922 sizeof (dapl_mr_register_shared_t), mode);
4923 if (retval != 0) {
4924 DERR("mr_register_shared: copyout error %d\n", retval);
4925 retval = EFAULT;
4926 goto cleanup;
4927 }
4928
4929 /*
4930 * set the state to READY to allow others to continue
4931 */
4932 mutex_enter(&daplka_shared_mr_lock);
4933 smrp->smr_state = DAPLKA_SMR_READY;
4934 cv_broadcast(&smrp->smr_cv);
4935 mutex_exit(&daplka_shared_mr_lock);
4936 return (0);
4937
4938 cleanup:;
4939 if (inserted) {
4940 daplka_mr_resource_t *free_rp = NULL;
4941
4942 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
4943 (void **)&free_rp);
4944 if (free_rp != mr_rp) {
4945 DERR("mr_register_shared: "
4946 "cannot remove mr from hash table\n");
4947 /*
4948 * we can only get here if another thread
4949 * has completed the cleanup in mr_deregister
4950 */
4951 return (retval);
4952 }
4953 }
4954 if (smrp != NULL) {
4955 mutex_enter(&daplka_shared_mr_lock);
4956 ASSERT(smrp->smr_refcnt > 0);
4957 smrp->smr_refcnt--;
4958
4959 if (smrp->smr_refcnt == 0) {
4960 DERR("mr_register_shared: freeing smrp 0x%p\n", smrp);
4961 avl_remove(&daplka_shared_mr_tree, smrp);
4962 if (smrp->smr_mr_list != NULL) {
4963 /*
4964 * the refcnt is 0. if there is anything
4965 * left on the list, it must be ours.
4966 */
4967 ASSERT(smrp->smr_mr_list == mr_rp);
4968 DAPLKA_RS_UNREF(mr_rp);
4969 smrp->smr_mr_list = NULL;
4970 ASSERT(mr_rp->mr_shared_mr == smrp);
4971 mr_rp->mr_shared_mr = NULL;
4972 ASSERT(mr_rp->mr_next == NULL);
4973 }
4974 smrp->smr_state = DAPLKA_SMR_FREED;
4975 cv_destroy(&smrp->smr_cv);
4976 kmem_free(smrp, sizeof (daplka_shared_mr_t));
4977 } else {
4978 DERR("mr_register_shared: resetting smr_state "
4979 "smrp 0x%p, %d waiters remain\n", smrp,
4980 smrp->smr_refcnt);
4981 ASSERT(smrp->smr_state == DAPLKA_SMR_TRANSITIONING);
4982 if (smrp->smr_mr_list != NULL && mr_rp != NULL) {
4983 daplka_mr_resource_t **mpp;
4984
4985 /*
4986 * search and remove mr_rp from smr_mr_list
4987 */
4988 mpp = &smrp->smr_mr_list;
4989 while (*mpp != NULL) {
4990 if (*mpp == mr_rp) {
4991 *mpp = (*mpp)->mr_next;
4992 DAPLKA_RS_UNREF(mr_rp);
4993 ASSERT(mr_rp->mr_shared_mr ==
4994 smrp);
4995 mr_rp->mr_shared_mr = NULL;
4996 mr_rp->mr_next = NULL;
4997 break;
4998 }
4999 mpp = &(*mpp)->mr_next;
5000 }
5001 }
5002 /*
5003 * note that smr_state == READY does not necessarily
5004 * mean that smr_mr_list is non empty. for this case,
5005 * we are doing cleanup because of a failure. we set
5006 * the state to READY to allow other threads to
5007 * continue.
5008 */
5009 smrp->smr_state = DAPLKA_SMR_READY;
5010 cv_broadcast(&smrp->smr_cv);
5011 }
5012 mutex_exit(&daplka_shared_mr_lock);
5013 }
5014 if (mr_rp != NULL) {
5015 DAPLKA_RS_UNREF(mr_rp);
5016 }
5017 return (retval);
5018 }
5019
5020 /*
5021 * registers a memory region using the attributes of an
5022 * existing region.
5023 */
5024 /* ARGSUSED */
5025 static int
5026 daplka_mr_register_lmr(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5027 cred_t *cred, int *rvalp)
5028 {
5029 boolean_t inserted = B_FALSE;
5030 dapl_mr_register_lmr_t args;
5031 ibt_mr_data_in_t mr_cb_data_in;
5032 daplka_mr_resource_t *orig_mr_rp = NULL;
5033 daplka_mr_resource_t *mr_rp;
5034 ibt_smr_attr_t mem_sattr;
5035 uint64_t mr_hkey = 0;
5036 ibt_status_t status;
5037 int retval;
5038
5039 retval = ddi_copyin((void *)arg, &args,
5040 sizeof (dapl_mr_register_lmr_t), mode);
5041 if (retval != 0) {
5042 DERR("mr_register_lmr: copyin error %d\n", retval);
5043 return (EINVAL);
5044 }
5045 orig_mr_rp = (daplka_mr_resource_t *)
5046 daplka_hash_lookup(&ia_rp->ia_mr_htbl, args.mrl_orig_hkey);
5047 if (orig_mr_rp == NULL) {
5048 DERR("mr_register_lmr: cannot find mr resource\n");
5049 return (EINVAL);
5050 }
5051 ASSERT(DAPLKA_RS_TYPE(orig_mr_rp) == DAPL_TYPE_MR);
5052
5053 mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
5054 if (mr_rp == NULL) {
5055 DERR("mr_register_lmr: cannot allocate mr resource\n");
5056 retval = ENOMEM;
5057 goto cleanup;
5058 }
5059 DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
5060 DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
5061
5062 mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
5063 mr_rp->mr_hca = ia_rp->ia_hca;
5064 mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
5065 mr_rp->mr_next = NULL;
5066 mr_rp->mr_shared_mr = NULL;
5067
5068 DAPLKA_RS_REF(orig_mr_rp->mr_pd_res);
5069 mr_rp->mr_pd_res = orig_mr_rp->mr_pd_res;
5070 mr_rp->mr_attr = orig_mr_rp->mr_attr;
5071
5072 /* Pass the IO addr that was returned while allocating the orig MR */
5073 mem_sattr.mr_vaddr = orig_mr_rp->mr_desc.md_vaddr;
5074 mem_sattr.mr_flags = args.mrl_flags | IBT_MR_NOSLEEP;
5075
5076 status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
5077 orig_mr_rp->mr_hdl, mr_rp->mr_pd_res->pd_hdl, &mem_sattr,
5078 &mr_rp->mr_hdl, &mr_rp->mr_desc);
5079
5080 if (status != IBT_SUCCESS) {
5081 DERR("mr_register_lmr: ibt_register_shared_mr error %d\n",
5082 status);
5083 *rvalp = (int)status;
5084 retval = 0;
5085 goto cleanup;
5086 }
5087
5088 mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
5089 mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
5090 mr_cb_data_in.mr_arg1 = (void *)mr_rp;
5091 mr_cb_data_in.mr_arg2 = NULL;
5092
5093 /* Pass the service driver mr cleanup handler to the hca driver */
5094 status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
5095 IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
5096 &mr_cb_data_in, sizeof (mr_cb_data_in));
5097
5098 if (status != IBT_SUCCESS) {
5099 DERR("mr_register_lmr: ibt_ci_data_in error(%d) ver(%d)",
5100 status, mr_cb_data_in.mr_rev);
5101 *rvalp = (int)status;
5102 retval = 0;
5103 goto cleanup;
5104 }
5105 mr_rp->mr_attr.mr_len = orig_mr_rp->mr_attr.mr_len;
5106 mr_rp->mr_attr.mr_flags = mem_sattr.mr_flags;
5107
5108 /* insert into mr hash table */
5109 retval = daplka_hash_insert(&ia_rp->ia_mr_htbl, &mr_hkey,
5110 (void *)mr_rp);
5111 if (retval != 0) {
5112 DERR("mr_register: cannot insert mr resource into mr_htbl\n");
5113 goto cleanup;
5114 }
5115 inserted = B_TRUE;
5116
5117 args.mrl_lkey = mr_rp->mr_desc.md_lkey;
5118 args.mrl_rkey = mr_rp->mr_desc.md_rkey;
5119 args.mrl_hkey = mr_hkey;
5120
5121 retval = ddi_copyout((void *)&args, (void *)arg,
5122 sizeof (dapl_mr_register_lmr_t), mode);
5123 if (retval != 0) {
5124 DERR("mr_register_lmr: copyout error %d\n", retval);
5125 retval = EFAULT;
5126 goto cleanup;
5127 }
5128 if (orig_mr_rp != NULL) {
5129 DAPLKA_RS_UNREF(orig_mr_rp);
5130 }
5131 return (0);
5132
5133 cleanup:;
5134 if (inserted) {
5135 daplka_mr_resource_t *free_rp = NULL;
5136
5137 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
5138 (void **)&free_rp);
5139 if (free_rp != mr_rp) {
5140 DERR("mr_register: cannot remove mr from hash table\n");
5141 /*
5142 * we can only get here if another thread
5143 * has completed the cleanup in mr_deregister
5144 */
5145 return (retval);
5146 }
5147 }
5148 if (orig_mr_rp != NULL) {
5149 DAPLKA_RS_UNREF(orig_mr_rp);
5150 }
5151 if (mr_rp != NULL) {
5152 DAPLKA_RS_UNREF(mr_rp);
5153 }
5154 return (retval);
5155 }
5156
5157 /*
5158 * this function is called by mr_deregister and mr_cleanup_callback to
5159 * remove a mr resource from the shared mr object mr_rp->mr_shared_mr.
5160 * if mr_shared_mr is already NULL, that means the region being
5161 * deregistered or invalidated is not a shared mr region and we can
5162 * return immediately.
5163 */
5164 static void
5165 daplka_shared_mr_free(daplka_mr_resource_t *mr_rp)
5166 {
5167 daplka_shared_mr_t *smrp;
5168
5169 /*
5170 * we need a lock because mr_callback also checks this field.
5171 * for the rare case that mr_deregister and mr_cleanup_callback
5172 * gets called simultaneously, we are guaranteed that smrp won't
5173 * be dereferenced twice because either function will find
5174 * mr_shared_mr to be NULL.
5175 */
5176 mutex_enter(&mr_rp->mr_lock);
5177 smrp = mr_rp->mr_shared_mr;
5178 mr_rp->mr_shared_mr = NULL;
5179 mutex_exit(&mr_rp->mr_lock);
5180
5181 if (smrp != NULL) {
5182 daplka_mr_resource_t **mpp;
5183 boolean_t mr_found = B_FALSE;
5184
5185 mutex_enter(&daplka_shared_mr_lock);
5186 ASSERT(smrp->smr_refcnt > 0);
5187 while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
5188 cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
5189 }
5190 ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
5191 smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
5192 smrp->smr_refcnt--;
5193
5194 /*
5195 * search and remove mr_rp from smr_mr_list.
5196 * also UNREF mr_rp because it is no longer
5197 * on the list.
5198 */
5199 mpp = &smrp->smr_mr_list;
5200 while (*mpp != NULL) {
5201 if (*mpp == mr_rp) {
5202 *mpp = (*mpp)->mr_next;
5203 DAPLKA_RS_UNREF(mr_rp);
5204 mr_rp->mr_next = NULL;
5205 mr_found = B_TRUE;
5206 break;
5207 }
5208 mpp = &(*mpp)->mr_next;
5209 }
5210 /*
5211 * since mr_clean_callback may not touch smr_mr_list
5212 * at this time (due to smr_state), we can be sure
5213 * that we can find and remove mr_rp from smr_mr_list
5214 */
5215 ASSERT(mr_found);
5216 if (smrp->smr_refcnt == 0) {
5217 D3("shared_mr_free: freeing smrp 0x%p\n", smrp);
5218 avl_remove(&daplka_shared_mr_tree, smrp);
5219 ASSERT(smrp->smr_mr_list == NULL);
5220 smrp->smr_state = DAPLKA_SMR_FREED;
5221 cv_destroy(&smrp->smr_cv);
5222 kmem_free(smrp, sizeof (daplka_shared_mr_t));
5223 } else {
5224 D3("shared_mr_free: smrp 0x%p, refcnt %d\n",
5225 smrp, smrp->smr_refcnt);
5226 smrp->smr_state = DAPLKA_SMR_READY;
5227 cv_broadcast(&smrp->smr_cv);
5228 }
5229 mutex_exit(&daplka_shared_mr_lock);
5230 }
5231 }
5232
5233 /*
5234 * deregisters a memory region.
5235 * if mr is shared, remove reference from global shared mr object.
5236 * release the initial reference to the mr. if the mr's refcnt is
5237 * zero, call mr_destroy to free mr.
5238 */
5239 /* ARGSUSED */
5240 static int
5241 daplka_mr_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5242 cred_t *cred, int *rvalp)
5243 {
5244 daplka_mr_resource_t *mr_rp;
5245 dapl_mr_deregister_t args;
5246 int retval;
5247
5248 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_deregister_t),
5249 mode);
5250 if (retval != 0) {
5251 DERR("mr_deregister: copyin error %d\n", retval);
5252 return (EINVAL);
5253 }
5254 retval = daplka_hash_remove(&ia_rp->ia_mr_htbl,
5255 args.mrd_hkey, (void **)&mr_rp);
5256 if (retval != 0 || mr_rp == NULL) {
5257 DERR("mr_deregister: cannot find mr resource\n");
5258 return (EINVAL);
5259 }
5260 ASSERT(DAPLKA_RS_TYPE(mr_rp) == DAPL_TYPE_MR);
5261
5262 daplka_shared_mr_free(mr_rp);
5263 DAPLKA_RS_UNREF(mr_rp);
5264 return (0);
5265 }
5266
5267 /*
5268 * sync local memory regions on RDMA read or write.
5269 */
5270 /* ARGSUSED */
5271 static int
5272 daplka_mr_sync(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5273 cred_t *cred, int *rvalp)
5274 {
5275 dapl_mr_sync_t args;
5276 daplka_mr_resource_t *mr_rp[DAPL_MR_PER_SYNC];
5277 ibt_mr_sync_t mrs[DAPL_MR_PER_SYNC];
5278 uint32_t sync_direction_flags;
5279 ibt_status_t status;
5280 int i, j;
5281 int retval;
5282
5283 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_sync_t), mode);
5284 if (retval != 0) {
5285 DERR("mr_sync: copyin error %d\n", retval);
5286 return (EFAULT);
5287 }
5288
5289 /* number of segments bound check */
5290 if (args.mrs_numseg > DAPL_MR_PER_SYNC) {
5291 DERR("mr_sync: number of segments too large\n");
5292 return (EINVAL);
5293 }
5294
5295 /* translate MR sync direction flag */
5296 if (args.mrs_flags == DAPL_MR_SYNC_RDMA_RD) {
5297 sync_direction_flags = IBT_SYNC_READ;
5298 } else if (args.mrs_flags == DAPL_MR_SYNC_RDMA_WR) {
5299 sync_direction_flags = IBT_SYNC_WRITE;
5300 } else {
5301 DERR("mr_sync: unknown flags\n");
5302 return (EINVAL);
5303 }
5304
5305 /*
5306 * all the segments are going to be sync'd by ibtl together
5307 */
5308 for (i = 0; i < args.mrs_numseg; i++) {
5309 mr_rp[i] = (daplka_mr_resource_t *)daplka_hash_lookup(
5310 &ia_rp->ia_mr_htbl, args.mrs_vec[i].mrsv_hkey);
5311 if (mr_rp[i] == NULL) {
5312 for (j = 0; j < i; j++) {
5313 DAPLKA_RS_UNREF(mr_rp[j]);
5314 }
5315 DERR("mr_sync: lookup error\n");
5316 return (EINVAL);
5317 }
5318 ASSERT(DAPLKA_RS_TYPE(mr_rp[i]) == DAPL_TYPE_MR);
5319 mrs[i].ms_handle = mr_rp[i]->mr_hdl;
5320 mrs[i].ms_vaddr = args.mrs_vec[i].mrsv_va;
5321 mrs[i].ms_len = args.mrs_vec[i].mrsv_len;
5322 mrs[i].ms_flags = sync_direction_flags;
5323 }
5324
5325 status = ibt_sync_mr(ia_rp->ia_hca_hdl, mrs, args.mrs_numseg);
5326 if (status != IBT_SUCCESS) {
5327 DERR("mr_sync: ibt_sync_mr error %d\n", status);
5328 *rvalp = (int)status;
5329 }
5330 for (i = 0; i < args.mrs_numseg; i++) {
5331 DAPLKA_RS_UNREF(mr_rp[i]);
5332 }
5333 return (0);
5334 }
5335
5336 /*
5337 * destroys a memory region.
5338 * called when refcnt drops to zero.
5339 */
5340 static int
5341 daplka_mr_destroy(daplka_resource_t *gen_rp)
5342 {
5343 daplka_mr_resource_t *mr_rp = (daplka_mr_resource_t *)gen_rp;
5344 ibt_status_t status;
5345
5346 ASSERT(DAPLKA_RS_REFCNT(mr_rp) == 0);
5347 ASSERT(mr_rp->mr_shared_mr == NULL);
5348 D3("mr_destroy: entering, mr_rp 0x%p, rnum %d\n",
5349 mr_rp, DAPLKA_RS_RNUM(mr_rp));
5350
5351 /*
5352 * deregister mr
5353 */
5354 if (mr_rp->mr_hdl) {
5355 status = daplka_ibt_deregister_mr(mr_rp, mr_rp->mr_hca_hdl,
5356 mr_rp->mr_hdl);
5357 if (status != IBT_SUCCESS) {
5358 DERR("mr_destroy: ibt_deregister_mr returned %d\n",
5359 status);
5360 }
5361 mr_rp->mr_hdl = NULL;
5362 D3("mr_destroy: mr deregistered\n");
5363 }
5364 mr_rp->mr_attr.mr_vaddr = NULL;
5365
5366 /*
5367 * release reference on PD
5368 */
5369 if (mr_rp->mr_pd_res != NULL) {
5370 DAPLKA_RS_UNREF(mr_rp->mr_pd_res);
5371 mr_rp->mr_pd_res = NULL;
5372 }
5373 mutex_destroy(&mr_rp->mr_lock);
5374 DAPLKA_RS_FINI(mr_rp);
5375 kmem_free(mr_rp, sizeof (daplka_mr_resource_t));
5376 D3("mr_destroy: exiting, mr_rp 0x%p\n", mr_rp);
5377 return (0);
5378 }
5379
5380 /*
5381 * this function is called by daplka_hash_destroy for
5382 * freeing MR resource objects
5383 */
5384 static void
5385 daplka_hash_mr_free(void *obj)
5386 {
5387 daplka_mr_resource_t *mr_rp = (daplka_mr_resource_t *)obj;
5388
5389 daplka_shared_mr_free(mr_rp);
5390 DAPLKA_RS_UNREF(mr_rp);
5391 }
5392
5393 /*
5394 * comparison function used for finding a shared mr object
5395 * from the global shared mr avl tree.
5396 */
5397 static int
5398 daplka_shared_mr_cmp(const void *smr1, const void *smr2)
5399 {
5400 daplka_shared_mr_t *s1 = (daplka_shared_mr_t *)smr1;
5401 daplka_shared_mr_t *s2 = (daplka_shared_mr_t *)smr2;
5402 int i;
5403
5404 for (i = 4; i >= 0; i--) {
5405 if (s1->smr_cookie.mc_uint_arr[i] <
5406 s2->smr_cookie.mc_uint_arr[i]) {
5407 return (-1);
5408 }
5409 if (s1->smr_cookie.mc_uint_arr[i] >
5410 s2->smr_cookie.mc_uint_arr[i]) {
5411 return (1);
5412 }
5413 }
5414 return (0);
5415 }
5416
5417 /*
5418 * allocates a protection domain.
5419 */
5420 /* ARGSUSED */
5421 static int
5422 daplka_pd_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5423 cred_t *cred, int *rvalp)
5424 {
5425 dapl_pd_alloc_t args;
5426 daplka_pd_resource_t *pd_rp;
5427 ibt_status_t status;
5428 uint64_t pd_hkey = 0;
5429 boolean_t inserted = B_FALSE;
5430 int retval;
5431
5432 pd_rp = kmem_zalloc(sizeof (*pd_rp), daplka_km_flags);
5433 if (pd_rp == NULL) {
5434 DERR("pd_alloc: cannot allocate pd resource\n");
5435 return (ENOMEM);
5436 }
5437 DAPLKA_RS_INIT(pd_rp, DAPL_TYPE_PD,
5438 DAPLKA_RS_RNUM(ia_rp), daplka_pd_destroy);
5439
5440 pd_rp->pd_hca = ia_rp->ia_hca;
5441 pd_rp->pd_hca_hdl = ia_rp->ia_hca_hdl;
5442 status = daplka_ibt_alloc_pd(pd_rp, pd_rp->pd_hca_hdl,
5443 IBT_PD_NO_FLAGS, &pd_rp->pd_hdl);
5444 if (status != IBT_SUCCESS) {
5445 DERR("pd_alloc: ibt_alloc_pd returned %d\n", status);
5446 *rvalp = (int)status;
5447 retval = 0;
5448 goto cleanup;
5449 }
5450
5451 /* insert into pd hash table */
5452 retval = daplka_hash_insert(&ia_rp->ia_pd_htbl,
5453 &pd_hkey, (void *)pd_rp);
5454 if (retval != 0) {
5455 DERR("pd_alloc: cannot insert pd resource into pd_htbl\n");
5456 goto cleanup;
5457 }
5458 inserted = B_TRUE;
5459
5460 /* return hkey to library */
5461 args.pda_hkey = pd_hkey;
5462
5463 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_pd_alloc_t),
5464 mode);
5465 if (retval != 0) {
5466 DERR("pd_alloc: copyout error %d\n", retval);
5467 retval = EFAULT;
5468 goto cleanup;
5469 }
5470 return (0);
5471
5472 cleanup:;
5473 if (inserted) {
5474 daplka_pd_resource_t *free_rp = NULL;
5475
5476 (void) daplka_hash_remove(&ia_rp->ia_pd_htbl, pd_hkey,
5477 (void **)&free_rp);
5478 if (free_rp != pd_rp) {
5479 DERR("pd_alloc: cannot remove pd from hash table\n");
5480 /*
5481 * we can only get here if another thread
5482 * has completed the cleanup in pd_free
5483 */
5484 return (retval);
5485 }
5486 }
5487 DAPLKA_RS_UNREF(pd_rp);
5488 return (retval);
5489 }
5490
5491 /*
5492 * destroys a protection domain.
5493 * called when refcnt drops to zero.
5494 */
5495 static int
5496 daplka_pd_destroy(daplka_resource_t *gen_rp)
5497 {
5498 daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)gen_rp;
5499 ibt_status_t status;
5500
5501 ASSERT(DAPLKA_RS_REFCNT(pd_rp) == 0);
5502 D3("pd_destroy: entering, pd_rp %p, rnum %d\n",
5503 pd_rp, DAPLKA_RS_RNUM(pd_rp));
5504
5505 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5506 if (pd_rp->pd_hdl != NULL) {
5507 status = daplka_ibt_free_pd(pd_rp, pd_rp->pd_hca_hdl,
5508 pd_rp->pd_hdl);
5509 if (status != IBT_SUCCESS) {
5510 DERR("pd_destroy: ibt_free_pd returned %d\n", status);
5511 }
5512 }
5513 DAPLKA_RS_FINI(pd_rp);
5514 kmem_free(pd_rp, sizeof (daplka_pd_resource_t));
5515 D3("pd_destroy: exiting, pd_rp %p\n", pd_rp);
5516 return (0);
5517 }
5518
5519 static void
5520 daplka_hash_pd_free(void *obj)
5521 {
5522 daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)obj;
5523
5524 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5525 DAPLKA_RS_UNREF(pd_rp);
5526 }
5527
5528 /*
5529 * removes the pd reference from ia_pd_htbl and releases the
5530 * initial reference to the pd. also destroys the pd if the refcnt
5531 * is zero.
5532 */
5533 /* ARGSUSED */
5534 static int
5535 daplka_pd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5536 cred_t *cred, int *rvalp)
5537 {
5538 daplka_pd_resource_t *pd_rp;
5539 dapl_pd_free_t args;
5540 int retval;
5541
5542 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_pd_free_t), mode);
5543 if (retval != 0) {
5544 DERR("pd_free: copyin error %d\n", retval);
5545 return (EINVAL);
5546 }
5547
5548 retval = daplka_hash_remove(&ia_rp->ia_pd_htbl,
5549 args.pdf_hkey, (void **)&pd_rp);
5550 if (retval != 0 || pd_rp == NULL) {
5551 DERR("pd_free: cannot find pd resource\n");
5552 return (EINVAL);
5553 }
5554 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5555
5556 /* UNREF calls the actual free function when refcnt is zero */
5557 DAPLKA_RS_UNREF(pd_rp);
5558 return (0);
5559 }
5560
5561 /*
5562 * allocates a memory window
5563 */
5564 /* ARGSUSED */
5565 static int
5566 daplka_mw_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5567 cred_t *cred, int *rvalp)
5568 {
5569 daplka_pd_resource_t *pd_rp;
5570 daplka_mw_resource_t *mw_rp;
5571 dapl_mw_alloc_t args;
5572 ibt_status_t status;
5573 boolean_t inserted = B_FALSE;
5574 uint64_t mw_hkey;
5575 ibt_rkey_t mw_rkey;
5576 int retval;
5577
5578 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_alloc_t), mode);
5579 if (retval != 0) {
5580 DERR("mw_alloc: copyin error %d\n", retval);
5581 return (EFAULT);
5582 }
5583
5584 /*
5585 * Allocate and initialize a MW resource
5586 */
5587 mw_rp = kmem_zalloc(sizeof (daplka_mw_resource_t), daplka_km_flags);
5588 if (mw_rp == NULL) {
5589 DERR("mw_alloc: cannot allocate mw resource\n");
5590 return (ENOMEM);
5591 }
5592 DAPLKA_RS_INIT(mw_rp, DAPL_TYPE_MW,
5593 DAPLKA_RS_RNUM(ia_rp), daplka_mw_destroy);
5594
5595 mutex_init(&mw_rp->mw_lock, NULL, MUTEX_DRIVER, NULL);
5596 mw_rp->mw_hca = ia_rp->ia_hca;
5597 mw_rp->mw_hca_hdl = ia_rp->ia_hca_hdl;
5598
5599 /* get pd handle */
5600 pd_rp = (daplka_pd_resource_t *)
5601 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mw_pd_hkey);
5602 if (pd_rp == NULL) {
5603 DERR("mw_alloc: cannot find pd resource\n");
5604 goto cleanup;
5605 }
5606 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5607
5608 mw_rp->mw_pd_res = pd_rp;
5609
5610 status = daplka_ibt_alloc_mw(mw_rp, mw_rp->mw_hca_hdl,
5611 pd_rp->pd_hdl, IBT_MW_NOSLEEP, &mw_rp->mw_hdl, &mw_rkey);
5612
5613 if (status != IBT_SUCCESS) {
5614 DERR("mw_alloc: ibt_alloc_mw returned %d\n", status);
5615 *rvalp = (int)status;
5616 retval = 0;
5617 goto cleanup;
5618 }
5619
5620 mutex_enter(&ia_rp->ia_lock);
5621 switch (ia_rp->ia_state) {
5622 case DAPLKA_IA_INIT:
5623 ia_rp->ia_state = DAPLKA_IA_MW_ALLOC_IN_PROGRESS;
5624 ia_rp->ia_mw_alloccnt++;
5625 retval = 0;
5626 break;
5627 case DAPLKA_IA_MW_ALLOC_IN_PROGRESS:
5628 /* another mw_alloc is already in progress increase cnt */
5629 ia_rp->ia_mw_alloccnt++;
5630 retval = 0;
5631 break;
5632 case DAPLKA_IA_MW_FREEZE_IN_PROGRESS:
5633 /* FALLTHRU */
5634 case DAPLKA_IA_MW_FROZEN:
5635 /*
5636 * IA is being or already frozen don't allow more MWs to be
5637 * allocated.
5638 */
5639 DERR("mw_alloc: IA is freezing MWs (state=%d)\n",
5640 ia_rp->ia_state);
5641 retval = EINVAL;
5642 break;
5643 default:
5644 ASSERT(!"Invalid IA state in mw_alloc");
5645 DERR("mw_alloc: IA state=%d invalid\n", ia_rp->ia_state);
5646 retval = EINVAL;
5647 break;
5648 }
5649 mutex_exit(&ia_rp->ia_lock);
5650 /* retval is 0 when ia_mw_alloccnt is incremented */
5651 if (retval != 0) {
5652 goto cleanup;
5653 }
5654
5655 /* insert into mw hash table */
5656 mw_hkey = 0;
5657 retval = daplka_hash_insert(&ia_rp->ia_mw_htbl, &mw_hkey,
5658 (void *)mw_rp);
5659 if (retval != 0) {
5660 DERR("mw_alloc: cannot insert mw resource into mw_htbl\n");
5661 mutex_enter(&ia_rp->ia_lock);
5662 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
5663 ia_rp->ia_mw_alloccnt--;
5664 if (ia_rp->ia_mw_alloccnt == 0) {
5665 ia_rp->ia_state = DAPLKA_IA_INIT;
5666 cv_broadcast(&ia_rp->ia_cv);
5667 }
5668 mutex_exit(&ia_rp->ia_lock);
5669 goto cleanup;
5670 }
5671 inserted = B_TRUE;
5672
5673 D3("mw_alloc: ibt_alloc_mw mw_hdl(%p) mw_rkey(0x%llx)\n",
5674 mw_rp->mw_hdl, (longlong_t)mw_rkey);
5675
5676 mutex_enter(&ia_rp->ia_lock);
5677 /*
5678 * We are done with mw_alloc if this was the last mw_alloc
5679 * change state back to DAPLKA_IA_INIT and wake up waiters
5680 * specifically the unlock callback.
5681 */
5682 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
5683 ia_rp->ia_mw_alloccnt--;
5684 if (ia_rp->ia_mw_alloccnt == 0) {
5685 ia_rp->ia_state = DAPLKA_IA_INIT;
5686 cv_broadcast(&ia_rp->ia_cv);
5687 }
5688 mutex_exit(&ia_rp->ia_lock);
5689
5690 args.mw_hkey = mw_hkey;
5691 args.mw_rkey = mw_rkey;
5692
5693 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_mw_alloc_t),
5694 mode);
5695 if (retval != 0) {
5696 DERR("mw_alloc: copyout error %d\n", retval);
5697 retval = EFAULT;
5698 goto cleanup;
5699 }
5700 return (0);
5701
5702 cleanup:;
5703 if (inserted) {
5704 daplka_mw_resource_t *free_rp = NULL;
5705
5706 (void) daplka_hash_remove(&ia_rp->ia_mw_htbl, mw_hkey,
5707 (void **)&free_rp);
5708 if (free_rp != mw_rp) {
5709 DERR("mw_alloc: cannot remove mw from hash table\n");
5710 /*
5711 * we can only get here if another thread
5712 * has completed the cleanup in mw_free
5713 */
5714 return (retval);
5715 }
5716 }
5717 DAPLKA_RS_UNREF(mw_rp);
5718 return (retval);
5719 }
5720
5721 /*
5722 * removes the mw reference from ia_mw_htbl and releases the
5723 * initial reference to the mw. also destroys the mw if the refcnt
5724 * is zero.
5725 */
5726 /* ARGSUSED */
5727 static int
5728 daplka_mw_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5729 cred_t *cred, int *rvalp)
5730 {
5731 daplka_mw_resource_t *mw_rp = NULL;
5732 dapl_mw_free_t args;
5733 int retval = 0;
5734
5735 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_free_t), mode);
5736 if (retval != 0) {
5737 DERR("mw_free: copyin error %d\n", retval);
5738 return (EFAULT);
5739 }
5740
5741 retval = daplka_hash_remove(&ia_rp->ia_mw_htbl, args.mw_hkey,
5742 (void **)&mw_rp);
5743 if (retval != 0 || mw_rp == NULL) {
5744 DERR("mw_free: cannot find mw resrc (0x%llx)\n",
5745 (longlong_t)args.mw_hkey);
5746 return (EINVAL);
5747 }
5748
5749 ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);
5750
5751 /* UNREF calls the actual free function when refcnt is zero */
5752 DAPLKA_RS_UNREF(mw_rp);
5753 return (retval);
5754 }
5755
5756 /*
5757 * destroys the memory window.
5758 * called when refcnt drops to zero.
5759 */
5760 static int
5761 daplka_mw_destroy(daplka_resource_t *gen_rp)
5762 {
5763 daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)gen_rp;
5764 ibt_status_t status;
5765
5766 ASSERT(DAPLKA_RS_REFCNT(mw_rp) == 0);
5767 D3("mw_destroy: entering, mw_rp 0x%p, rnum %d\n",
5768 mw_rp, DAPLKA_RS_RNUM(mw_rp));
5769
5770 /*
5771 * free memory window
5772 */
5773 if (mw_rp->mw_hdl) {
5774 status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl,
5775 mw_rp->mw_hdl);
5776 if (status != IBT_SUCCESS) {
5777 DERR("mw_destroy: ibt_free_mw returned %d\n", status);
5778 }
5779 mw_rp->mw_hdl = NULL;
5780 D3("mw_destroy: mw freed\n");
5781 }
5782
5783 /*
5784 * release reference on PD
5785 */
5786 if (mw_rp->mw_pd_res != NULL) {
5787 DAPLKA_RS_UNREF(mw_rp->mw_pd_res);
5788 mw_rp->mw_pd_res = NULL;
5789 }
5790 mutex_destroy(&mw_rp->mw_lock);
5791 DAPLKA_RS_FINI(mw_rp);
5792 kmem_free(mw_rp, sizeof (daplka_mw_resource_t));
5793 D3("mw_destroy: exiting, mw_rp 0x%p\n", mw_rp);
5794 return (0);
5795 }
5796
5797 static void
5798 daplka_hash_mw_free(void *obj)
5799 {
5800 daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)obj;
5801
5802 ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);
5803 DAPLKA_RS_UNREF(mw_rp);
5804 }
5805
5806 /*
5807 * SRQ ioctls and supporting functions
5808 */
5809 /* ARGSUSED */
5810 static int
5811 daplka_srq_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5812 cred_t *cred, int *rvalp)
5813 {
5814 daplka_srq_resource_t *srq_rp;
5815 daplka_pd_resource_t *pd_rp;
5816 dapl_srq_create_t args;
5817 ibt_srq_sizes_t srq_sizes;
5818 ibt_srq_sizes_t srq_real_sizes;
5819 ibt_hca_attr_t *hca_attrp;
5820 uint64_t srq_hkey = 0;
5821 boolean_t inserted = B_FALSE;
5822 int retval;
5823 ibt_status_t status;
5824
5825 D3("srq_create: enter\n");
5826 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_create_t),
5827 mode);
5828 if (retval != 0) {
5829 DERR("srq_create: copyin error %d\n", retval);
5830 return (EFAULT);
5831 }
5832 srq_rp = kmem_zalloc(sizeof (daplka_srq_resource_t), daplka_km_flags);
5833 if (srq_rp == NULL) {
5834 DERR("srq_create: cannot allocate ep_rp\n");
5835 return (ENOMEM);
5836 }
5837 DAPLKA_RS_INIT(srq_rp, DAPL_TYPE_SRQ,
5838 DAPLKA_RS_RNUM(ia_rp), daplka_srq_destroy);
5839
5840 srq_rp->srq_hca = ia_rp->ia_hca;
5841 srq_rp->srq_hca_hdl = ia_rp->ia_hca_hdl;
5842 mutex_init(&srq_rp->srq_lock, NULL, MUTEX_DRIVER, NULL);
5843
5844 /* get pd handle */
5845 pd_rp = (daplka_pd_resource_t *)
5846 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.srqc_pd_hkey);
5847 if (pd_rp == NULL) {
5848 DERR("srq_create: cannot find pd resource\n");
5849 retval = EINVAL;
5850 goto cleanup;
5851 }
5852 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5853 srq_rp->srq_pd_res = pd_rp;
5854
5855 /*
5856 * these checks ensure that the requested SRQ sizes
5857 * are within the limits supported by the chosen HCA.
5858 */
5859 hca_attrp = &ia_rp->ia_hca->hca_attr;
5860 if (args.srqc_sizes.srqs_sz > hca_attrp->hca_max_srqs_sz) {
5861 DERR("srq_create: invalid srqs_sz %d\n",
5862 args.srqc_sizes.srqs_sz);
5863 retval = EINVAL;
5864 goto cleanup;
5865 }
5866 if (args.srqc_sizes.srqs_sgl > hca_attrp->hca_max_srq_sgl) {
5867 DERR("srq_create: invalid srqs_sgl %d\n",
5868 args.srqc_sizes.srqs_sgl);
5869 retval = EINVAL;
5870 goto cleanup;
5871 }
5872
5873 D3("srq_create: srq_sgl %d, srq_sz %d\n",
5874 args.srqc_sizes.srqs_sgl, args.srqc_sizes.srqs_sz);
5875
5876 srq_sizes.srq_wr_sz = args.srqc_sizes.srqs_sz;
5877 srq_sizes.srq_sgl_sz = args.srqc_sizes.srqs_sgl;
5878
5879 /* create srq */
5880 status = daplka_ibt_alloc_srq(srq_rp, ia_rp->ia_hca_hdl,
5881 IBT_SRQ_USER_MAP, pd_rp->pd_hdl, &srq_sizes, &srq_rp->srq_hdl,
5882 &srq_real_sizes);
5883 if (status != IBT_SUCCESS) {
5884 DERR("srq_create: alloc_srq returned %d\n", status);
5885 *rvalp = (int)status;
5886 retval = 0;
5887 goto cleanup;
5888 }
5889
5890 args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
5891 args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;
5892
5893 /* Get HCA-specific data_out info */
5894 status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
5895 IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
5896 &args.srqc_data_out, sizeof (args.srqc_data_out));
5897
5898 if (status != IBT_SUCCESS) {
5899 DERR("srq_create: ibt_ci_data_out error(%d)\n", status);
5900 *rvalp = (int)status;
5901 retval = 0;
5902 goto cleanup;
5903 }
5904
5905 srq_rp->srq_real_size = srq_real_sizes.srq_wr_sz;
5906
5907 /* preparing to copyout map_data back to the library */
5908 args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
5909 args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;
5910
5911 /* insert into srq hash table */
5912 retval = daplka_hash_insert(&ia_rp->ia_srq_htbl,
5913 &srq_hkey, (void *)srq_rp);
5914 if (retval != 0) {
5915 DERR("srq_create: cannot insert srq resource into srq_htbl\n");
5916 goto cleanup;
5917 }
5918 inserted = B_TRUE;
5919
5920 /* return hkey to library */
5921 args.srqc_hkey = srq_hkey;
5922
5923 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_create_t),
5924 mode);
5925 if (retval != 0) {
5926 DERR("srq_create: copyout error %d\n", retval);
5927 retval = EFAULT;
5928 goto cleanup;
5929 }
5930
5931 D3("srq_create: %p, 0x%llx\n", srq_rp->srq_hdl, (longlong_t)srq_hkey);
5932 D3(" sz(%d) sgl(%d)\n",
5933 args.srqc_real_sizes.srqs_sz, args.srqc_real_sizes.srqs_sgl);
5934 D3("srq_create: exit\n");
5935 return (0);
5936
5937 cleanup:
5938 if (inserted) {
5939 daplka_srq_resource_t *free_rp = NULL;
5940
5941 (void) daplka_hash_remove(&ia_rp->ia_srq_htbl, srq_hkey,
5942 (void **)&free_rp);
5943 if (free_rp != srq_rp) {
5944 /*
5945 * this case is impossible because ep_free will
5946 * wait until our state transition is complete.
5947 */
5948 DERR("srq_create: cannot remove srq from hash table\n");
5949 ASSERT(B_FALSE);
5950 return (retval);
5951 }
5952 }
5953 DAPLKA_RS_UNREF(srq_rp);
5954 return (retval);
5955 }
5956
5957 /*
5958 * Resize an existing SRQ
5959 */
5960 /* ARGSUSED */
5961 static int
5962 daplka_srq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5963 cred_t *cred, int *rvalp)
5964 {
5965 daplka_srq_resource_t *srq_rp = NULL;
5966 ibt_hca_attr_t *hca_attrp;
5967 dapl_srq_resize_t args;
5968 ibt_status_t status;
5969 int retval = 0;
5970
5971 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_resize_t),
5972 mode);
5973 if (retval != 0) {
5974 DERR("srq_resize: copyin error %d\n", retval);
5975 return (EFAULT);
5976 }
5977
5978 /* get srq resource */
5979 srq_rp = (daplka_srq_resource_t *)
5980 daplka_hash_lookup(&ia_rp->ia_srq_htbl, args.srqr_hkey);
5981 if (srq_rp == NULL) {
5982 DERR("srq_resize: cannot find srq resource\n");
5983 return (EINVAL);
5984 }
5985 ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);
5986
5987 hca_attrp = &ia_rp->ia_hca->hca_attr;
5988 if (args.srqr_new_size > hca_attrp->hca_max_srqs_sz) {
5989 DERR("srq_resize: invalid srq size %d", args.srqr_new_size);
5990 retval = EINVAL;
5991 goto cleanup;
5992 }
5993
5994 mutex_enter(&srq_rp->srq_lock);
5995 /*
5996 * If ibt_resize_srq fails that it is primarily due to resource
5997 * shortage. Per IB spec resize will never loose events and
5998 * a resize error leaves the SRQ intact. Therefore even if the
5999 * resize request fails we proceed and get the mapping data
6000 * from the SRQ so that the library can mmap it.
6001 */
6002 status = ibt_modify_srq(srq_rp->srq_hdl, IBT_SRQ_SET_SIZE,
6003 args.srqr_new_size, 0, &args.srqr_real_size);
6004 if (status != IBT_SUCCESS) {
6005 /* we return the size of the old CQ if resize fails */
6006 args.srqr_real_size = srq_rp->srq_real_size;
6007 ASSERT(status != IBT_SRQ_HDL_INVALID);
6008 DERR("srq_resize: ibt_modify_srq failed:%d\n", status);
6009 } else {
6010 srq_rp->srq_real_size = args.srqr_real_size;
6011 }
6012 mutex_exit(&srq_rp->srq_lock);
6013
6014
6015 D2("srq_resize(%d): done new_sz(%u) real_sz(%u)\n",
6016 DAPLKA_RS_RNUM(srq_rp), args.srqr_new_size, args.srqr_real_size);
6017
6018 /* Get HCA-specific data_out info */
6019 status = ibt_ci_data_out(srq_rp->srq_hca_hdl,
6020 IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
6021 &args.srqr_data_out, sizeof (args.srqr_data_out));
6022 if (status != IBT_SUCCESS) {
6023 DERR("srq_resize: ibt_ci_data_out error(%d)\n", status);
6024 /* return ibt_ci_data_out status */
6025 *rvalp = (int)status;
6026 retval = 0;
6027 goto cleanup;
6028 }
6029
6030 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_resize_t),
6031 mode);
6032 if (retval != 0) {
6033 DERR("srq_resize: copyout error %d\n", retval);
6034 retval = EFAULT;
6035 goto cleanup;
6036 }
6037
6038 cleanup:;
6039 if (srq_rp != NULL) {
6040 DAPLKA_RS_UNREF(srq_rp);
6041 }
6042 return (retval);
6043 }
6044
6045 /*
6046 * Frees an SRQ resource.
6047 */
6048 /* ARGSUSED */
6049 static int
6050 daplka_srq_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6051 cred_t *cred, int *rvalp)
6052 {
6053 daplka_srq_resource_t *srq_rp = NULL;
6054 dapl_srq_free_t args;
6055 int retval;
6056
6057 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_free_t), mode);
6058 if (retval != 0) {
6059 DERR("srq_free: copyin error %d\n", retval);
6060 return (EFAULT);
6061 }
6062
6063 retval = daplka_hash_remove(&ia_rp->ia_srq_htbl,
6064 args.srqf_hkey, (void **)&srq_rp);
6065 if (retval != 0 || srq_rp == NULL) {
6066 /*
6067 * this is only possible if we have two threads
6068 * calling ep_free in parallel.
6069 */
6070 DERR("srq_free: cannot find resource retval(%d) 0x%llx\n",
6071 retval, args.srqf_hkey);
6072 return (EINVAL);
6073 }
6074
6075 /* UNREF calls the actual free function when refcnt is zero */
6076 DAPLKA_RS_UNREF(srq_rp);
6077 return (0);
6078 }
6079
6080 /*
6081 * destroys a SRQ resource.
6082 * called when refcnt drops to zero.
6083 */
6084 static int
6085 daplka_srq_destroy(daplka_resource_t *gen_rp)
6086 {
6087 daplka_srq_resource_t *srq_rp = (daplka_srq_resource_t *)gen_rp;
6088 ibt_status_t status;
6089
6090 ASSERT(DAPLKA_RS_REFCNT(srq_rp) == 0);
6091
6092 D3("srq_destroy: entering, srq_rp 0x%p, rnum %d\n",
6093 srq_rp, DAPLKA_RS_RNUM(srq_rp));
6094 /*
6095 * destroy the srq
6096 */
6097 if (srq_rp->srq_hdl != NULL) {
6098 status = daplka_ibt_free_srq(srq_rp, srq_rp->srq_hdl);
6099 if (status != IBT_SUCCESS) {
6100 DERR("srq_destroy: ibt_free_srq returned %d\n",
6101 status);
6102 }
6103 srq_rp->srq_hdl = NULL;
6104 D3("srq_destroy: srq freed, rnum %d\n", DAPLKA_RS_RNUM(srq_rp));
6105 }
6106 /*
6107 * release all references
6108 */
6109 if (srq_rp->srq_pd_res != NULL) {
6110 DAPLKA_RS_UNREF(srq_rp->srq_pd_res);
6111 srq_rp->srq_pd_res = NULL;
6112 }
6113
6114 mutex_destroy(&srq_rp->srq_lock);
6115 DAPLKA_RS_FINI(srq_rp);
6116 kmem_free(srq_rp, sizeof (daplka_srq_resource_t));
6117 D3("srq_destroy: exiting, srq_rp 0x%p\n", srq_rp);
6118 return (0);
6119 }
6120
6121 static void
6122 daplka_hash_srq_free(void *obj)
6123 {
6124 daplka_srq_resource_t *srq_rp = (daplka_srq_resource_t *)obj;
6125
6126 ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);
6127 DAPLKA_RS_UNREF(srq_rp);
6128 }
6129
6130 /*
6131 * This function tells the CM to start listening on a service id.
6132 * It must be called by the passive side client before the client
6133 * can receive connection requests from remote endpoints. If the
6134 * client specifies a non-zero service id (connection qualifier in
6135 * dapl terms), this function will attempt to bind to this service
6136 * id and return an error if the id is already in use. If the client
6137 * specifies zero as the service id, this function will try to find
6138 * the next available service id and return it back to the client.
6139 * To support the cr_handoff function, this function will, in addition
6140 * to creating and inserting an SP resource into the per-IA SP hash
6141 * table, insert the SP resource into a global SP table. This table
6142 * maintains all active service points created by all dapl clients.
6143 * CR handoff locates the target SP by iterating through this global
6144 * table.
6145 */
6146 /* ARGSUSED */
6147 static int
6148 daplka_service_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6149 cred_t *cred, int *rvalp)
6150 {
6151 daplka_evd_resource_t *evd_rp = NULL;
6152 daplka_sp_resource_t *sp_rp = NULL;
6153 dapl_service_register_t args;
6154 ibt_srv_desc_t sd_args;
6155 ibt_srv_bind_t sb_args;
6156 ibt_status_t status;
6157 ib_svc_id_t retsid = 0;
6158 uint64_t sp_hkey = 0;
6159 boolean_t bumped = B_FALSE;
6160 int backlog_size;
6161 int retval = 0;
6162
6163 retval = ddi_copyin((void *)arg, &args,
6164 sizeof (dapl_service_register_t), mode);
6165 if (retval != 0) {
6166 DERR("service_register: copyin error %d\n", retval);
6167 return (EINVAL);
6168 }
6169
6170 sp_rp = kmem_zalloc(sizeof (*sp_rp), daplka_km_flags);
6171 if (sp_rp == NULL) {
6172 DERR("service_register: cannot allocate sp resource\n");
6173 return (ENOMEM);
6174 }
6175 DAPLKA_RS_INIT(sp_rp, DAPL_TYPE_SP,
6176 DAPLKA_RS_RNUM(ia_rp), daplka_sp_destroy);
6177
6178 /* check if evd exists */
6179 evd_rp = (daplka_evd_resource_t *)
6180 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.sr_evd_hkey);
6181 if (evd_rp == NULL) {
6182 DERR("service_register: evd resource not found\n");
6183 retval = EINVAL;
6184 goto cleanup;
6185 }
6186 /*
6187 * initialize backlog size
6188 */
6189 if (evd_rp && evd_rp->evd_cq_real_size > 0) {
6190 backlog_size = evd_rp->evd_cq_real_size + 1;
6191 } else {
6192 backlog_size = DAPLKA_DEFAULT_SP_BACKLOG;
6193 }
6194 D2("service_register: args.sr_sid = %llu\n", (longlong_t)args.sr_sid);
6195
6196 /* save the userland sp ptr */
6197 sp_rp->sp_cookie = args.sr_sp_cookie;
6198 sp_rp->sp_backlog_size = backlog_size;
6199 D3("service_register: backlog set to %d\n", sp_rp->sp_backlog_size);
6200 sp_rp->sp_backlog = kmem_zalloc(sp_rp->sp_backlog_size *
6201 sizeof (daplka_sp_conn_pend_t), daplka_km_flags);
6202
6203 /* save evd resource pointer */
6204 sp_rp->sp_evd_res = evd_rp;
6205
6206 /*
6207 * save ruid here so that we can do a comparison later
6208 * when someone does cr_handoff. the check will prevent
6209 * a malicious app from passing a CR to us.
6210 */
6211 sp_rp->sp_ruid = crgetruid(cred);
6212
6213 /* fill in args for register_service */
6214 sd_args.sd_ud_handler = NULL;
6215 sd_args.sd_handler = daplka_cm_service_handler;
6216 sd_args.sd_flags = IBT_SRV_NO_FLAGS;
6217
6218 status = ibt_register_service(daplka_dev->daplka_clnt_hdl,
6219 &sd_args, args.sr_sid, 1, &sp_rp->sp_srv_hdl, &retsid);
6220
6221 if (status != IBT_SUCCESS) {
6222 DERR("service_register: ibt_register_service returned %d\n",
6223 status);
6224 *rvalp = (int)status;
6225 retval = 0;
6226 goto cleanup;
6227 }
6228 /* save returned sid */
6229 sp_rp->sp_conn_qual = retsid;
6230 args.sr_retsid = retsid;
6231
6232 /* fill in args for bind_service */
6233 sb_args.sb_pkey = ia_rp->ia_port_pkey;
6234 sb_args.sb_lease = 0xffffffff;
6235 sb_args.sb_key[0] = 0x1234;
6236 sb_args.sb_key[1] = 0x5678;
6237 sb_args.sb_name = DAPLKA_DRV_NAME;
6238
6239 D2("service_register: bind(0x%llx:0x%llx)\n",
6240 (longlong_t)ia_rp->ia_hca_sgid.gid_prefix,
6241 (longlong_t)ia_rp->ia_hca_sgid.gid_guid);
6242
6243 status = ibt_bind_service(sp_rp->sp_srv_hdl, ia_rp->ia_hca_sgid,
6244 &sb_args, (void *)sp_rp, &sp_rp->sp_bind_hdl);
6245 if (status != IBT_SUCCESS) {
6246 DERR("service_register: ibt_bind_service returned %d\n",
6247 status);
6248 *rvalp = (int)status;
6249 retval = 0;
6250 goto cleanup;
6251 }
6252
6253 /*
6254 * need to bump refcnt because the global hash table will
6255 * have a reference to sp_rp
6256 */
6257 DAPLKA_RS_REF(sp_rp);
6258 bumped = B_TRUE;
6259
6260 /* insert into global sp hash table */
6261 sp_rp->sp_global_hkey = 0;
6262 retval = daplka_hash_insert(&daplka_global_sp_htbl,
6263 &sp_rp->sp_global_hkey, (void *)sp_rp);
6264 if (retval != 0) {
6265 DERR("service_register: cannot insert sp resource\n");
6266 goto cleanup;
6267 }
6268
6269 /* insert into per-IA sp hash table */
6270 retval = daplka_hash_insert(&ia_rp->ia_sp_htbl,
6271 &sp_hkey, (void *)sp_rp);
6272 if (retval != 0) {
6273 DERR("service_register: cannot insert sp resource\n");
6274 goto cleanup;
6275 }
6276
6277 /* pass index to application */
6278 args.sr_sp_hkey = sp_hkey;
6279 retval = ddi_copyout(&args, (void *)arg,
6280 sizeof (dapl_service_register_t), mode);
6281 if (retval != 0) {
6282 DERR("service_register: copyout error %d\n", retval);
6283 retval = EFAULT;
6284 goto cleanup;
6285 }
6286 return (0);
6287
6288 cleanup:;
6289 ASSERT(sp_rp != NULL);
6290 /* remove from ia table */
6291 if (sp_hkey != 0) {
6292 daplka_sp_resource_t *free_rp = NULL;
6293
6294 (void) daplka_hash_remove(&ia_rp->ia_sp_htbl,
6295 sp_hkey, (void **)&free_rp);
6296 if (free_rp != sp_rp) {
6297 DERR("service_register: cannot remove sp\n");
6298 /*
6299 * we can only get here if another thread
6300 * has completed the cleanup in svc_deregister
6301 */
6302 return (retval);
6303 }
6304 }
6305
6306 /* remove from global table */
6307 if (sp_rp->sp_global_hkey != 0) {
6308 daplka_sp_resource_t *free_rp = NULL;
6309
6310 /*
6311 * we get here if either the hash_insert into
6312 * ia_sp_htbl failed or the ddi_copyout failed.
6313 * hash_insert failure implies that we are the
6314 * only thread with a reference to sp. ddi_copyout
6315 * failure implies that svc_deregister could have
6316 * picked up the sp and destroyed it. but since
6317 * we got to this point, we must have removed
6318 * the sp ourselves in hash_remove above and
6319 * that the sp can be destroyed by us.
6320 */
6321 (void) daplka_hash_remove(&daplka_global_sp_htbl,
6322 sp_rp->sp_global_hkey, (void **)&free_rp);
6323 if (free_rp != sp_rp) {
6324 DERR("service_register: cannot remove sp\n");
6325 /*
6326 * this case is impossible. see explanation above.
6327 */
6328 ASSERT(B_FALSE);
6329 return (retval);
6330 }
6331 sp_rp->sp_global_hkey = 0;
6332 }
6333 /* unreference sp */
6334 if (bumped) {
6335 DAPLKA_RS_UNREF(sp_rp);
6336 }
6337
6338 /* destroy sp resource */
6339 DAPLKA_RS_UNREF(sp_rp);
6340 return (retval);
6341 }
6342
6343 /*
6344 * deregisters the service and removes SP from the global table.
6345 */
6346 /* ARGSUSED */
6347 static int
6348 daplka_service_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6349 cred_t *cred, int *rvalp)
6350 {
6351 dapl_service_deregister_t args;
6352 daplka_sp_resource_t *sp_rp = NULL, *g_sp_rp = NULL;
6353 int retval;
6354
6355 retval = ddi_copyin((void *)arg, &args,
6356 sizeof (dapl_service_deregister_t), mode);
6357
6358 if (retval != 0) {
6359 DERR("service_deregister: copyin error %d\n", retval);
6360 return (EINVAL);
6361 }
6362
6363 retval = daplka_hash_remove(&ia_rp->ia_sp_htbl,
6364 args.sdr_sp_hkey, (void **)&sp_rp);
6365 if (retval != 0 || sp_rp == NULL) {
6366 DERR("service_deregister: cannot find sp resource\n");
6367 return (EINVAL);
6368 }
6369
6370 retval = daplka_hash_remove(&daplka_global_sp_htbl,
6371 sp_rp->sp_global_hkey, (void **)&g_sp_rp);
6372 if (retval != 0 || g_sp_rp == NULL) {
6373 DERR("service_deregister: cannot find sp resource\n");
6374 }
6375
6376 /* remove the global reference */
6377 if (g_sp_rp == sp_rp) {
6378 DAPLKA_RS_UNREF(g_sp_rp);
6379 }
6380
6381 DAPLKA_RS_UNREF(sp_rp);
6382 return (0);
6383 }
6384
6385 /*
6386 * destroys a service point.
6387 * called when the refcnt drops to zero.
6388 */
6389 static int
6390 daplka_sp_destroy(daplka_resource_t *gen_rp)
6391 {
6392 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)gen_rp;
6393 ibt_status_t status;
6394
6395 ASSERT(DAPLKA_RS_REFCNT(sp_rp) == 0);
6396 D3("sp_destroy: entering, sp_rp %p, rnum %d\n",
6397 sp_rp, DAPLKA_RS_RNUM(sp_rp));
6398
6399 /*
6400 * it is possible for pending connections to remain
6401 * on an SP. We need to clean them up here.
6402 */
6403 if (sp_rp->sp_backlog != NULL) {
6404 ibt_cm_proceed_reply_t proc_reply;
6405 int i, cnt = 0;
6406 void *spcp_sidp;
6407
6408 for (i = 0; i < sp_rp->sp_backlog_size; i++) {
6409 if (sp_rp->sp_backlog[i].spcp_state ==
6410 DAPLKA_SPCP_PENDING) {
6411 cnt++;
6412 if (sp_rp->sp_backlog[i].spcp_sid == NULL) {
6413 DERR("sp_destroy: "
6414 "spcp_sid == NULL!\n");
6415 continue;
6416 }
6417 mutex_enter(&sp_rp->sp_lock);
6418 spcp_sidp = sp_rp->sp_backlog[i].spcp_sid;
6419 sp_rp->sp_backlog[i].spcp_state =
6420 DAPLKA_SPCP_INIT;
6421 sp_rp->sp_backlog[i].spcp_sid = NULL;
6422 sp_rp->sp_backlog[i].spcp_req_len = 0;
6423 mutex_exit(&sp_rp->sp_lock);
6424 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV,
6425 spcp_sidp,
6426 IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
6427 if (status != IBT_SUCCESS) {
6428 DERR("sp_destroy: proceed failed %d\n",
6429 status);
6430 }
6431 }
6432 }
6433 if (cnt > 0) {
6434 DERR("sp_destroy: found %d pending "
6435 "connections\n", cnt);
6436 }
6437 }
6438
6439 if (sp_rp->sp_srv_hdl != NULL && sp_rp->sp_bind_hdl != NULL) {
6440 status = ibt_unbind_service(sp_rp->sp_srv_hdl,
6441 sp_rp->sp_bind_hdl);
6442 if (status != IBT_SUCCESS) {
6443 DERR("sp_destroy: ibt_unbind_service "
6444 "failed: %d\n", status);
6445 }
6446 }
6447
6448 if (sp_rp->sp_srv_hdl != NULL) {
6449 status = ibt_deregister_service(daplka_dev->daplka_clnt_hdl,
6450 sp_rp->sp_srv_hdl);
6451 if (status != IBT_SUCCESS) {
6452 DERR("sp_destroy: ibt_deregister_service "
6453 "failed: %d\n", status);
6454 }
6455 }
6456 if (sp_rp->sp_backlog != NULL) {
6457 kmem_free(sp_rp->sp_backlog,
6458 sp_rp->sp_backlog_size * sizeof (daplka_sp_conn_pend_t));
6459 sp_rp->sp_backlog = NULL;
6460 sp_rp->sp_backlog_size = 0;
6461 }
6462
6463 /*
6464 * release reference to evd
6465 */
6466 if (sp_rp->sp_evd_res != NULL) {
6467 DAPLKA_RS_UNREF(sp_rp->sp_evd_res);
6468 }
6469 sp_rp->sp_bind_hdl = NULL;
6470 sp_rp->sp_srv_hdl = NULL;
6471 DAPLKA_RS_FINI(sp_rp);
6472 kmem_free(sp_rp, sizeof (*sp_rp));
6473 D3("sp_destroy: exiting, sp_rp %p\n", sp_rp);
6474 return (0);
6475 }
6476
6477 /*
6478 * this function is called by daplka_hash_destroy for
6479 * freeing SP resource objects
6480 */
6481 static void
6482 daplka_hash_sp_free(void *obj)
6483 {
6484 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;
6485 daplka_sp_resource_t *g_sp_rp;
6486 int retval;
6487
6488 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
6489
6490 retval = daplka_hash_remove(&daplka_global_sp_htbl,
6491 sp_rp->sp_global_hkey, (void **)&g_sp_rp);
6492 if (retval != 0 || g_sp_rp == NULL) {
6493 DERR("sp_free: cannot find sp resource\n");
6494 }
6495 if (g_sp_rp == sp_rp) {
6496 DAPLKA_RS_UNREF(g_sp_rp);
6497 }
6498
6499 DAPLKA_RS_UNREF(sp_rp);
6500 }
6501
6502 static void
6503 daplka_hash_sp_unref(void *obj)
6504 {
6505 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;
6506
6507 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
6508 DAPLKA_RS_UNREF(sp_rp);
6509 }
6510
6511 /*
6512 * Passive side CM handlers
6513 */
6514
6515 /*
6516 * processes the REQ_RCV event
6517 */
6518 /* ARGSUSED */
6519 static ibt_cm_status_t
6520 daplka_cm_service_req(daplka_sp_resource_t *spp, ibt_cm_event_t *event,
6521 ibt_cm_return_args_t *ret_args, void *pr_data, ibt_priv_data_len_t pr_len)
6522 {
6523 daplka_sp_conn_pend_t *conn = NULL;
6524 daplka_evd_event_t *cr_ev = NULL;
6525 ibt_cm_status_t cm_status = IBT_CM_DEFAULT;
6526 uint16_t bkl_index;
6527 ibt_status_t status;
6528
6529 /*
6530 * acquire a slot in the connection backlog of this service point
6531 */
6532 mutex_enter(&spp->sp_lock);
6533 for (bkl_index = 0; bkl_index < spp->sp_backlog_size; bkl_index++) {
6534 if (spp->sp_backlog[bkl_index].spcp_state == DAPLKA_SPCP_INIT) {
6535 conn = &spp->sp_backlog[bkl_index];
6536 ASSERT(conn->spcp_sid == NULL);
6537 conn->spcp_state = DAPLKA_SPCP_PENDING;
6538 conn->spcp_sid = event->cm_session_id;
6539 break;
6540 }
6541 }
6542 mutex_exit(&spp->sp_lock);
6543
6544 /*
6545 * too many pending connections
6546 */
6547 if (bkl_index == spp->sp_backlog_size) {
6548 DERR("service_req: connection pending exceeded %d limit\n",
6549 spp->sp_backlog_size);
6550 return (IBT_CM_NO_RESOURCE);
6551 }
6552
6553 /*
6554 * save data for cr_handoff
6555 */
6556 if (pr_data != NULL && pr_len > 0) {
6557 int trunc_len = pr_len;
6558
6559 if (trunc_len > DAPL_MAX_PRIVATE_DATA_SIZE) {
6560 DERR("service_req: private data truncated\n");
6561 trunc_len = DAPL_MAX_PRIVATE_DATA_SIZE;
6562 }
6563 conn->spcp_req_len = trunc_len;
6564 bcopy(pr_data, conn->spcp_req_data, trunc_len);
6565 } else {
6566 conn->spcp_req_len = 0;
6567 }
6568 conn->spcp_rdma_ra_in = event->cm_event.req.req_rdma_ra_in;
6569 conn->spcp_rdma_ra_out = event->cm_event.req.req_rdma_ra_out;
6570
6571 /*
6572 * create a CR event
6573 */
6574 cr_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6575 if (cr_ev == NULL) {
6576 DERR("service_req: could not alloc cr_ev\n");
6577 cm_status = IBT_CM_NO_RESOURCE;
6578 goto cleanup;
6579 }
6580
6581 cr_ev->ee_next = NULL;
6582 cr_ev->ee_cmev.ec_cm_cookie = spp->sp_cookie;
6583 cr_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6584 cr_ev->ee_cmev.ec_cm_psep_cookie = DAPLKA_CREATE_PSEP_COOKIE(bkl_index);
6585 /*
6586 * save the requestor gid
6587 * daplka_event_poll needs this if this is a third party REQ_RCV
6588 */
6589 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix =
6590 event->cm_event.req.req_prim_addr.av_dgid.gid_prefix;
6591 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid =
6592 event->cm_event.req.req_prim_addr.av_dgid.gid_guid;
6593
6594 /*
6595 * set event type
6596 */
6597 if (pr_len == 0) {
6598 cr_ev->ee_cmev.ec_cm_ev_type =
6599 DAPL_IB_CME_CONNECTION_REQUEST_PENDING;
6600 } else {
6601 cr_ev->ee_cmev.ec_cm_ev_priv_data =
6602 kmem_zalloc(pr_len, KM_NOSLEEP);
6603 if (cr_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
6604 DERR("service_req: could not alloc priv\n");
6605 cm_status = IBT_CM_NO_RESOURCE;
6606 goto cleanup;
6607 }
6608 bcopy(pr_data, cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6609 cr_ev->ee_cmev.ec_cm_ev_type =
6610 DAPL_IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA;
6611 }
6612 cr_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
6613
6614 /*
6615 * tell the active side to expect the processing time to be
6616 * at most equal to daplka_cm_delay
6617 */
6618 status = ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id,
6619 daplka_cm_delay, NULL, 0);
6620 if (status != IBT_SUCCESS) {
6621 DERR("service_req: ibt_cm_delay failed %d\n", status);
6622 cm_status = IBT_CM_NO_RESOURCE;
6623 goto cleanup;
6624 }
6625
6626 /*
6627 * enqueue cr_ev onto the cr_events list of the EVD
6628 * corresponding to the SP
6629 */
6630 D2("service_req: enqueue event(%p) evdp(%p) priv_data(%p) "
6631 "priv_len(%d) psep(0x%llx)\n", cr_ev, spp->sp_evd_res,
6632 cr_ev->ee_cmev.ec_cm_ev_priv_data,
6633 (int)cr_ev->ee_cmev.ec_cm_ev_priv_data_len,
6634 (longlong_t)cr_ev->ee_cmev.ec_cm_psep_cookie);
6635
6636 daplka_evd_wakeup(spp->sp_evd_res,
6637 &spp->sp_evd_res->evd_cr_events, cr_ev);
6638
6639 return (IBT_CM_DEFER);
6640
6641 cleanup:;
6642 /*
6643 * free the cr event
6644 */
6645 if (cr_ev != NULL) {
6646 if (cr_ev->ee_cmev.ec_cm_ev_priv_data != NULL) {
6647 kmem_free(cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6648 cr_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6649 cr_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6650 }
6651 kmem_free(cr_ev, sizeof (daplka_evd_event_t));
6652 }
6653 /*
6654 * release our slot in the backlog array
6655 */
6656 if (conn != NULL) {
6657 mutex_enter(&spp->sp_lock);
6658 ASSERT(conn->spcp_state == DAPLKA_SPCP_PENDING);
6659 ASSERT(conn->spcp_sid == event->cm_session_id);
6660 conn->spcp_state = DAPLKA_SPCP_INIT;
6661 conn->spcp_req_len = 0;
6662 conn->spcp_sid = NULL;
6663 mutex_exit(&spp->sp_lock);
6664 }
6665 return (cm_status);
6666 }
6667
6668 /*
6669 * processes the CONN_CLOSED event
6670 */
6671 /* ARGSUSED */
6672 static ibt_cm_status_t
6673 daplka_cm_service_conn_closed(daplka_sp_resource_t *sp_rp,
6674 ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args,
6675 void *priv_data, ibt_priv_data_len_t len)
6676 {
6677 daplka_ep_resource_t *ep_rp;
6678 daplka_evd_event_t *disc_ev;
6679 uint32_t old_state, new_state;
6680
6681 ep_rp = (daplka_ep_resource_t *)
6682 ibt_get_chan_private(event->cm_channel);
6683 if (ep_rp == NULL) {
6684 DERR("service_conn_closed: ep_rp == NULL\n");
6685 return (IBT_CM_ACCEPT);
6686 }
6687
6688 /*
6689 * verify that the ep_state is either CONNECTED or
6690 * DISCONNECTING. if it is not in either states return
6691 * without generating an event.
6692 */
6693 new_state = old_state = daplka_ep_get_state(ep_rp);
6694 if (old_state != DAPLKA_EP_STATE_CONNECTED &&
6695 old_state != DAPLKA_EP_STATE_DISCONNECTING) {
6696 /*
6697 * we can get here if the connection is being aborted
6698 */
6699 D2("service_conn_closed: conn aborted, state = %d, "
6700 "closed = %d\n", old_state, (int)event->cm_event.closed);
6701 daplka_ep_set_state(ep_rp, old_state, new_state);
6702 return (IBT_CM_ACCEPT);
6703 }
6704
6705 /*
6706 * create a DAPL_IB_CME_DISCONNECTED event
6707 */
6708 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6709 if (disc_ev == NULL) {
6710 DERR("service_conn_closed: cannot alloc disc_ev\n");
6711 daplka_ep_set_state(ep_rp, old_state, new_state);
6712 return (IBT_CM_ACCEPT);
6713 }
6714
6715 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
6716 disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6717 disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6718 disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6719 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6720 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6721
6722 D2("service_conn_closed: enqueue event(%p) evdp(%p) psep(0x%llx)\n",
6723 disc_ev, sp_rp->sp_evd_res, (longlong_t)ep_rp->ep_psep_cookie);
6724
6725 /*
6726 * transition ep_state to DISCONNECTED
6727 */
6728 new_state = DAPLKA_EP_STATE_DISCONNECTED;
6729 daplka_ep_set_state(ep_rp, old_state, new_state);
6730
6731 /*
6732 * enqueue event onto the conn_evd owned by ep_rp
6733 */
6734 daplka_evd_wakeup(ep_rp->ep_conn_evd,
6735 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
6736
6737 return (IBT_CM_ACCEPT);
6738 }
6739
6740 /*
6741 * processes the CONN_EST event
6742 */
6743 /* ARGSUSED */
6744 static ibt_cm_status_t
6745 daplka_cm_service_conn_est(daplka_sp_resource_t *sp_rp, ibt_cm_event_t *event,
6746 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6747 {
6748 daplka_ep_resource_t *ep_rp;
6749 daplka_evd_event_t *conn_ev;
6750 void *pr_data = event->cm_priv_data;
6751 ibt_priv_data_len_t pr_len = event->cm_priv_data_len;
6752 uint32_t old_state, new_state;
6753
6754 ep_rp = (daplka_ep_resource_t *)
6755 ibt_get_chan_private(event->cm_channel);
6756 if (ep_rp == NULL) {
6757 DERR("service_conn_est: ep_rp == NULL\n");
6758 return (IBT_CM_ACCEPT);
6759 }
6760
6761 /*
6762 * verify that ep_state is ACCEPTING. if it is not in this
6763 * state, return without generating an event.
6764 */
6765 new_state = old_state = daplka_ep_get_state(ep_rp);
6766 if (old_state != DAPLKA_EP_STATE_ACCEPTING) {
6767 /*
6768 * we can get here if the connection is being aborted
6769 */
6770 DERR("service_conn_est: conn aborted, state = %d\n",
6771 old_state);
6772 daplka_ep_set_state(ep_rp, old_state, new_state);
6773 return (IBT_CM_ACCEPT);
6774 }
6775
6776 /*
6777 * create a DAPL_IB_CME_CONNECTED event
6778 */
6779 conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6780 if (conn_ev == NULL) {
6781 DERR("service_conn_est: conn_ev alloc failed\n");
6782 daplka_ep_set_state(ep_rp, old_state, new_state);
6783 return (IBT_CM_ACCEPT);
6784 }
6785
6786 conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
6787 conn_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6788 conn_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6789 conn_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6790
6791 /*
6792 * copy private data into event
6793 */
6794 if (pr_len > 0) {
6795 conn_ev->ee_cmev.ec_cm_ev_priv_data =
6796 kmem_zalloc(pr_len, KM_NOSLEEP);
6797 if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
6798 DERR("service_conn_est: pr_data alloc failed\n");
6799 daplka_ep_set_state(ep_rp, old_state, new_state);
6800 kmem_free(conn_ev, sizeof (daplka_evd_event_t));
6801 return (IBT_CM_ACCEPT);
6802 }
6803 bcopy(pr_data, conn_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6804 }
6805 conn_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
6806
6807 D2("service_conn_est: enqueue event(%p) evdp(%p)\n",
6808 conn_ev, ep_rp->ep_conn_evd);
6809
6810 /*
6811 * transition ep_state to CONNECTED
6812 */
6813 new_state = DAPLKA_EP_STATE_CONNECTED;
6814 daplka_ep_set_state(ep_rp, old_state, new_state);
6815
6816 /*
6817 * enqueue event onto the conn_evd owned by ep_rp
6818 */
6819 daplka_evd_wakeup(ep_rp->ep_conn_evd,
6820 &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);
6821
6822 return (IBT_CM_ACCEPT);
6823 }
6824
6825 /*
6826 * processes the FAILURE event
6827 */
6828 /* ARGSUSED */
6829 static ibt_cm_status_t
6830 daplka_cm_service_event_failure(daplka_sp_resource_t *sp_rp,
6831 ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args, void *priv_data,
6832 ibt_priv_data_len_t len)
6833 {
6834 daplka_evd_event_t *disc_ev;
6835 daplka_ep_resource_t *ep_rp;
6836 uint32_t old_state, new_state;
6837 ibt_rc_chan_query_attr_t chan_attrs;
6838 ibt_status_t status;
6839
6840 /*
6841 * check that we still have a valid cm_channel before continuing
6842 */
6843 if (event->cm_channel == NULL) {
6844 DERR("serice_event_failure: event->cm_channel == NULL\n");
6845 return (IBT_CM_ACCEPT);
6846 }
6847 ep_rp = (daplka_ep_resource_t *)
6848 ibt_get_chan_private(event->cm_channel);
6849 if (ep_rp == NULL) {
6850 DERR("service_event_failure: ep_rp == NULL\n");
6851 return (IBT_CM_ACCEPT);
6852 }
6853
6854 /*
6855 * verify that ep_state is ACCEPTING or DISCONNECTING. if it
6856 * is not in either state, return without generating an event.
6857 */
6858 new_state = old_state = daplka_ep_get_state(ep_rp);
6859 if (old_state != DAPLKA_EP_STATE_ACCEPTING &&
6860 old_state != DAPLKA_EP_STATE_DISCONNECTING) {
6861 /*
6862 * we can get here if the connection is being aborted
6863 */
6864 DERR("service_event_failure: conn aborted, state = %d, "
6865 "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
6866 (int)event->cm_event.failed.cf_code,
6867 (int)event->cm_event.failed.cf_msg,
6868 (int)event->cm_event.failed.cf_reason);
6869
6870 daplka_ep_set_state(ep_rp, old_state, new_state);
6871 return (IBT_CM_ACCEPT);
6872 }
6873
6874 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
6875 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
6876
6877 if ((status == IBT_SUCCESS) &&
6878 (chan_attrs.rc_state != IBT_STATE_ERROR)) {
6879 DERR("service_event_failure: conn abort qpn %d state %d\n",
6880 chan_attrs.rc_qpn, chan_attrs.rc_state);
6881
6882 /* explicit transition the QP to ERROR state */
6883 status = ibt_flush_channel(ep_rp->ep_chan_hdl);
6884 }
6885
6886 /*
6887 * create an event
6888 */
6889 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6890 if (disc_ev == NULL) {
6891 DERR("service_event_failure: cannot alloc disc_ev\n");
6892 daplka_ep_set_state(ep_rp, old_state, new_state);
6893 return (IBT_CM_ACCEPT);
6894 }
6895
6896 /*
6897 * fill in the appropriate event type
6898 */
6899 if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
6900 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
6901 } else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
6902 switch (event->cm_event.failed.cf_reason) {
6903 case IBT_CM_INVALID_CID:
6904 disc_ev->ee_cmev.ec_cm_ev_type =
6905 DAPL_IB_CME_DESTINATION_REJECT;
6906 break;
6907 default:
6908 disc_ev->ee_cmev.ec_cm_ev_type =
6909 DAPL_IB_CME_LOCAL_FAILURE;
6910 break;
6911 }
6912 } else {
6913 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
6914 }
6915 disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6916 disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6917 disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6918 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6919 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6920
6921 D2("service_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
6922 "cf_msg(%d) cf_reason(%d) psep(0x%llx)\n", disc_ev,
6923 ep_rp->ep_conn_evd, (int)event->cm_event.failed.cf_code,
6924 (int)event->cm_event.failed.cf_msg,
6925 (int)event->cm_event.failed.cf_reason,
6926 (longlong_t)ep_rp->ep_psep_cookie);
6927
6928 /*
6929 * transition ep_state to DISCONNECTED
6930 */
6931 new_state = DAPLKA_EP_STATE_DISCONNECTED;
6932 daplka_ep_set_state(ep_rp, old_state, new_state);
6933
6934 /*
6935 * enqueue event onto the conn_evd owned by ep_rp
6936 */
6937 daplka_evd_wakeup(ep_rp->ep_conn_evd,
6938 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
6939
6940 return (IBT_CM_ACCEPT);
6941 }
6942
6943 /*
6944 * this is the passive side CM handler. it gets registered
6945 * when an SP resource is created in daplka_service_register.
6946 */
6947 static ibt_cm_status_t
6948 daplka_cm_service_handler(void *cm_private, ibt_cm_event_t *event,
6949 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6950 {
6951 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)cm_private;
6952
6953 if (sp_rp == NULL) {
6954 DERR("service_handler: sp_rp == NULL\n");
6955 return (IBT_CM_NO_RESOURCE);
6956 }
6957 /*
6958 * default is not to return priv data
6959 */
6960 if (ret_args != NULL) {
6961 ret_args->cm_ret_len = 0;
6962 }
6963
6964 switch (event->cm_type) {
6965 case IBT_CM_EVENT_REQ_RCV:
6966 D2("service_handler: IBT_CM_EVENT_REQ_RCV\n");
6967 return (daplka_cm_service_req(sp_rp, event, ret_args,
6968 event->cm_priv_data, event->cm_priv_data_len));
6969
6970 case IBT_CM_EVENT_REP_RCV:
6971 /* passive side should not receive this event */
6972 D2("service_handler: IBT_CM_EVENT_REP_RCV\n");
6973 return (IBT_CM_DEFAULT);
6974
6975 case IBT_CM_EVENT_CONN_CLOSED:
6976 D2("service_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
6977 event->cm_event.closed);
6978 return (daplka_cm_service_conn_closed(sp_rp, event, ret_args,
6979 priv_data, len));
6980
6981 case IBT_CM_EVENT_MRA_RCV:
6982 /* passive side does default processing MRA event */
6983 D2("service_handler: IBT_CM_EVENT_MRA_RCV\n");
6984 return (IBT_CM_DEFAULT);
6985
6986 case IBT_CM_EVENT_CONN_EST:
6987 D2("service_handler: IBT_CM_EVENT_CONN_EST\n");
6988 return (daplka_cm_service_conn_est(sp_rp, event, ret_args,
6989 priv_data, len));
6990
6991 case IBT_CM_EVENT_FAILURE:
6992 D2("service_handler: IBT_CM_EVENT_FAILURE\n");
6993 return (daplka_cm_service_event_failure(sp_rp, event, ret_args,
6994 priv_data, len));
6995 case IBT_CM_EVENT_LAP_RCV:
6996 /* active side had initiated a path migration operation */
6997 D2("service_handler: IBT_CM_EVENT_LAP_RCV\n");
6998 return (IBT_CM_ACCEPT);
6999 default:
7000 DERR("service_handler: invalid event %d\n", event->cm_type);
7001 break;
7002 }
7003 return (IBT_CM_DEFAULT);
7004 }
7005
7006 /*
7007 * Active side CM handlers
7008 */
7009
7010 /*
7011 * Processes the REP_RCV event. When the passive side accepts the
7012 * connection, this handler is called. We make a copy of the private
7013 * data into the ep so that it can be passed back to userland in when
7014 * the CONN_EST event occurs.
7015 */
7016 /* ARGSUSED */
7017 static ibt_cm_status_t
7018 daplka_cm_rc_rep_rcv(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7019 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7020 {
7021 void *pr_data = event->cm_priv_data;
7022 ibt_priv_data_len_t pr_len = event->cm_priv_data_len;
7023 uint32_t old_state, new_state;
7024
7025 D2("rc_rep_rcv: pr_data(0x%p), pr_len(%d)\n", pr_data,
7026 (int)pr_len);
7027
7028 ASSERT(ep_rp != NULL);
7029 new_state = old_state = daplka_ep_get_state(ep_rp);
7030 if (old_state != DAPLKA_EP_STATE_CONNECTING) {
7031 /*
7032 * we can get here if the connection is being aborted
7033 */
7034 DERR("rc_rep_rcv: conn aborted, state = %d\n", old_state);
7035 daplka_ep_set_state(ep_rp, old_state, new_state);
7036 return (IBT_CM_NO_CHANNEL);
7037 }
7038
7039 /*
7040 * we do not cancel the timer here because the connection
7041 * handshake is still in progress.
7042 */
7043
7044 /*
7045 * save the private data. it will be passed up when
7046 * the connection is established.
7047 */
7048 if (pr_len > 0) {
7049 ep_rp->ep_priv_len = pr_len;
7050 bcopy(pr_data, ep_rp->ep_priv_data, (size_t)pr_len);
7051 }
7052
7053 /*
7054 * we do not actually transition to a different state.
7055 * the state will change when we get a conn_est, failure,
7056 * closed, or timeout event.
7057 */
7058 daplka_ep_set_state(ep_rp, old_state, new_state);
7059 return (IBT_CM_ACCEPT);
7060 }
7061
7062 /*
7063 * Processes the CONN_CLOSED event. This gets called when either
7064 * the active or passive side closes the rc channel.
7065 */
7066 /* ARGSUSED */
7067 static ibt_cm_status_t
7068 daplka_cm_rc_conn_closed(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7069 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7070 {
7071 daplka_evd_event_t *disc_ev;
7072 uint32_t old_state, new_state;
7073
7074 ASSERT(ep_rp != NULL);
7075 old_state = new_state = daplka_ep_get_state(ep_rp);
7076 if (old_state != DAPLKA_EP_STATE_CONNECTED &&
7077 old_state != DAPLKA_EP_STATE_DISCONNECTING) {
7078 /*
7079 * we can get here if the connection is being aborted
7080 */
7081 D2("rc_conn_closed: conn aborted, state = %d, "
7082 "closed = %d\n", old_state, (int)event->cm_event.closed);
7083 daplka_ep_set_state(ep_rp, old_state, new_state);
7084 return (IBT_CM_ACCEPT);
7085 }
7086
7087 /*
7088 * it's ok for the timer to fire at this point. the
7089 * taskq thread that processes the timer will just wait
7090 * until we are done with our state transition.
7091 */
7092 if (daplka_cancel_timer(ep_rp) != 0) {
7093 /*
7094 * daplka_cancel_timer returns -1 if the timer is
7095 * being processed and 0 for all other cases.
7096 * we need to reset ep_state to allow timer processing
7097 * to continue.
7098 */
7099 DERR("rc_conn_closed: timer is being processed\n");
7100 daplka_ep_set_state(ep_rp, old_state, new_state);
7101 return (IBT_CM_ACCEPT);
7102 }
7103
7104 /*
7105 * create a DAPL_IB_CME_DISCONNECTED event
7106 */
7107 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7108 if (disc_ev == NULL) {
7109 DERR("rc_conn_closed: could not alloc ev\n");
7110 daplka_ep_set_state(ep_rp, old_state, new_state);
7111 return (IBT_CM_ACCEPT);
7112 }
7113
7114 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
7115 disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7116 disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7117 disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
7118 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
7119 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
7120
7121 D2("rc_conn_closed: enqueue event(%p) evdp(%p) closed(%d)\n",
7122 disc_ev, ep_rp->ep_conn_evd, (int)event->cm_event.closed);
7123
7124 /*
7125 * transition ep_state to DISCONNECTED
7126 */
7127 new_state = DAPLKA_EP_STATE_DISCONNECTED;
7128 daplka_ep_set_state(ep_rp, old_state, new_state);
7129
7130 /*
7131 * enqueue event onto the conn_evd owned by ep_rp
7132 */
7133 daplka_evd_wakeup(ep_rp->ep_conn_evd,
7134 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
7135
7136 return (IBT_CM_ACCEPT);
7137 }
7138
7139 /*
7140 * processes the CONN_EST event
7141 */
7142 /* ARGSUSED */
7143 static ibt_cm_status_t
7144 daplka_cm_rc_conn_est(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7145 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7146 {
7147 daplka_evd_event_t *conn_ev;
7148 uint32_t old_state, new_state;
7149
7150 ASSERT(ep_rp != NULL);
7151 old_state = new_state = daplka_ep_get_state(ep_rp);
7152 if (old_state != DAPLKA_EP_STATE_CONNECTING) {
7153 /*
7154 * we can get here if the connection is being aborted
7155 */
7156 DERR("rc_conn_est: conn aborted, state = %d\n", old_state);
7157 daplka_ep_set_state(ep_rp, old_state, new_state);
7158 return (IBT_CM_ACCEPT);
7159 }
7160
7161 /*
7162 * it's ok for the timer to fire at this point. the
7163 * taskq thread that processes the timer will just wait
7164 * until we are done with our state transition.
7165 */
7166 if (daplka_cancel_timer(ep_rp) != 0) {
7167 /*
7168 * daplka_cancel_timer returns -1 if the timer is
7169 * being processed and 0 for all other cases.
7170 * we need to reset ep_state to allow timer processing
7171 * to continue.
7172 */
7173 DERR("rc_conn_est: timer is being processed\n");
7174 daplka_ep_set_state(ep_rp, old_state, new_state);
7175 return (IBT_CM_ACCEPT);
7176 }
7177
7178 /*
7179 * create a DAPL_IB_CME_CONNECTED event
7180 */
7181 conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7182 if (conn_ev == NULL) {
7183 DERR("rc_conn_est: could not alloc ev\n");
7184 daplka_ep_set_state(ep_rp, old_state, new_state);
7185 return (IBT_CM_ACCEPT);
7186 }
7187
7188 conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
7189 conn_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7190 conn_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7191 conn_ev->ee_cmev.ec_cm_psep_cookie = 0;
7192
7193 /*
7194 * The private data passed back in the connection established
7195 * event is what was recvd in the daplka_cm_rc_rep_rcv handler and
7196 * saved in ep resource structure.
7197 */
7198 if (ep_rp->ep_priv_len > 0) {
7199 conn_ev->ee_cmev.ec_cm_ev_priv_data =
7200 kmem_zalloc(ep_rp->ep_priv_len, KM_NOSLEEP);
7201
7202 if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
7203 DERR("rc_conn_est: could not alloc pr_data\n");
7204 kmem_free(conn_ev, sizeof (daplka_evd_event_t));
7205 daplka_ep_set_state(ep_rp, old_state, new_state);
7206 return (IBT_CM_ACCEPT);
7207 }
7208 bcopy(ep_rp->ep_priv_data, conn_ev->ee_cmev.ec_cm_ev_priv_data,
7209 ep_rp->ep_priv_len);
7210 }
7211 conn_ev->ee_cmev.ec_cm_ev_priv_data_len = ep_rp->ep_priv_len;
7212
7213 D2("rc_conn_est: enqueue event(%p) evdp(%p) pr_data(0x%p), "
7214 "pr_len(%d)\n", conn_ev, ep_rp->ep_conn_evd,
7215 conn_ev->ee_cmev.ec_cm_ev_priv_data,
7216 (int)conn_ev->ee_cmev.ec_cm_ev_priv_data_len);
7217
7218 /*
7219 * transition ep_state to CONNECTED
7220 */
7221 new_state = DAPLKA_EP_STATE_CONNECTED;
7222 daplka_ep_set_state(ep_rp, old_state, new_state);
7223
7224 /*
7225 * enqueue event onto the conn_evd owned by ep_rp
7226 */
7227 daplka_evd_wakeup(ep_rp->ep_conn_evd,
7228 &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);
7229
7230 return (IBT_CM_ACCEPT);
7231 }
7232
7233 /*
7234 * processes the FAILURE event
7235 */
7236 /* ARGSUSED */
7237 static ibt_cm_status_t
7238 daplka_cm_rc_event_failure(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7239 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7240 {
7241 daplka_evd_event_t *disc_ev;
7242 ibt_priv_data_len_t pr_len = event->cm_priv_data_len;
7243 void *pr_data = event->cm_priv_data;
7244 uint32_t old_state, new_state;
7245 ibt_rc_chan_query_attr_t chan_attrs;
7246 ibt_status_t status;
7247
7248 ASSERT(ep_rp != NULL);
7249 old_state = new_state = daplka_ep_get_state(ep_rp);
7250 if (old_state != DAPLKA_EP_STATE_CONNECTING &&
7251 old_state != DAPLKA_EP_STATE_DISCONNECTING) {
7252 /*
7253 * we can get here if the connection is being aborted
7254 */
7255 DERR("rc_event_failure: conn aborted, state = %d, "
7256 "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
7257 (int)event->cm_event.failed.cf_code,
7258 (int)event->cm_event.failed.cf_msg,
7259 (int)event->cm_event.failed.cf_reason);
7260
7261 daplka_ep_set_state(ep_rp, old_state, new_state);
7262 return (IBT_CM_ACCEPT);
7263 }
7264
7265 /*
7266 * it's ok for the timer to fire at this point. the
7267 * taskq thread that processes the timer will just wait
7268 * until we are done with our state transition.
7269 */
7270 if (daplka_cancel_timer(ep_rp) != 0) {
7271 /*
7272 * daplka_cancel_timer returns -1 if the timer is
7273 * being processed and 0 for all other cases.
7274 * we need to reset ep_state to allow timer processing
7275 * to continue.
7276 */
7277 DERR("rc_event_failure: timer is being processed\n");
7278 daplka_ep_set_state(ep_rp, old_state, new_state);
7279 return (IBT_CM_ACCEPT);
7280 }
7281
7282 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
7283 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
7284
7285 if ((status == IBT_SUCCESS) &&
7286 (chan_attrs.rc_state != IBT_STATE_ERROR)) {
7287 DERR("rc_event_failure: conn abort qpn %d state %d\n",
7288 chan_attrs.rc_qpn, chan_attrs.rc_state);
7289
7290 /* explicit transition the QP to ERROR state */
7291 status = ibt_flush_channel(ep_rp->ep_chan_hdl);
7292 }
7293
7294 /*
7295 * create an event
7296 */
7297 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7298 if (disc_ev == NULL) {
7299 DERR("rc_event_failure: cannot alloc disc_ev\n");
7300 daplka_ep_set_state(ep_rp, old_state, new_state);
7301 return (IBT_CM_ACCEPT);
7302 }
7303
7304 /*
7305 * copy private data into event
7306 */
7307 if (pr_len > 0) {
7308 disc_ev->ee_cmev.ec_cm_ev_priv_data =
7309 kmem_zalloc(pr_len, KM_NOSLEEP);
7310
7311 if (disc_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
7312 DERR("rc_event_failure: cannot alloc pr data\n");
7313 kmem_free(disc_ev, sizeof (daplka_evd_event_t));
7314 daplka_ep_set_state(ep_rp, old_state, new_state);
7315 return (IBT_CM_ACCEPT);
7316 }
7317 bcopy(pr_data, disc_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
7318 }
7319 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
7320
7321 /*
7322 * fill in the appropriate event type
7323 */
7324 if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
7325 switch (event->cm_event.failed.cf_reason) {
7326 case IBT_CM_CONSUMER:
7327 disc_ev->ee_cmev.ec_cm_ev_type =
7328 DAPL_IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
7329 break;
7330 case IBT_CM_NO_CHAN:
7331 case IBT_CM_NO_RESC:
7332 disc_ev->ee_cmev.ec_cm_ev_type =
7333 DAPL_IB_CME_DESTINATION_REJECT;
7334 break;
7335 default:
7336 disc_ev->ee_cmev.ec_cm_ev_type =
7337 DAPL_IB_CME_DESTINATION_REJECT;
7338 break;
7339 }
7340 } else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
7341 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
7342 } else {
7343 /* others we'll mark as local failure */
7344 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
7345 }
7346 disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7347 disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7348 disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
7349
7350 D2("rc_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
7351 "cf_msg(%d) cf_reason(%d)\n", disc_ev, ep_rp->ep_conn_evd,
7352 (int)event->cm_event.failed.cf_code,
7353 (int)event->cm_event.failed.cf_msg,
7354 (int)event->cm_event.failed.cf_reason);
7355
7356 /*
7357 * transition ep_state to DISCONNECTED
7358 */
7359 new_state = DAPLKA_EP_STATE_DISCONNECTED;
7360 daplka_ep_set_state(ep_rp, old_state, new_state);
7361
7362 /*
7363 * enqueue event onto the conn_evd owned by ep_rp
7364 */
7365 daplka_evd_wakeup(ep_rp->ep_conn_evd,
7366 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
7367
7368 return (IBT_CM_ACCEPT);
7369 }
7370
7371 /*
7372 * This is the active side CM handler. It gets registered when
7373 * ibt_open_rc_channel is called.
7374 */
7375 static ibt_cm_status_t
7376 daplka_cm_rc_handler(void *cm_private, ibt_cm_event_t *event,
7377 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7378 {
7379 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)cm_private;
7380
7381 if (ep_rp == NULL) {
7382 DERR("rc_handler: ep_rp == NULL\n");
7383 return (IBT_CM_NO_CHANNEL);
7384 }
7385 /*
7386 * default is not to return priv data
7387 */
7388 if (ret_args != NULL) {
7389 ret_args->cm_ret_len = 0;
7390 }
7391
7392 switch (event->cm_type) {
7393 case IBT_CM_EVENT_REQ_RCV:
7394 /* active side should not receive this event */
7395 D2("rc_handler: IBT_CM_EVENT_REQ_RCV\n");
7396 break;
7397
7398 case IBT_CM_EVENT_REP_RCV:
7399 /* connection accepted by passive side */
7400 D2("rc_handler: IBT_CM_EVENT_REP_RCV\n");
7401 return (daplka_cm_rc_rep_rcv(ep_rp, event, ret_args,
7402 priv_data, len));
7403
7404 case IBT_CM_EVENT_CONN_CLOSED:
7405 D2("rc_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
7406 event->cm_event.closed);
7407 return (daplka_cm_rc_conn_closed(ep_rp, event, ret_args,
7408 priv_data, len));
7409
7410 case IBT_CM_EVENT_MRA_RCV:
7411 /* passive side does default processing MRA event */
7412 D2("rc_handler: IBT_CM_EVENT_MRA_RCV\n");
7413 return (IBT_CM_DEFAULT);
7414
7415 case IBT_CM_EVENT_CONN_EST:
7416 D2("rc_handler: IBT_CM_EVENT_CONN_EST\n");
7417 return (daplka_cm_rc_conn_est(ep_rp, event, ret_args,
7418 priv_data, len));
7419
7420 case IBT_CM_EVENT_FAILURE:
7421 D2("rc_handler: IBT_CM_EVENT_FAILURE\n");
7422 return (daplka_cm_rc_event_failure(ep_rp, event, ret_args,
7423 priv_data, len));
7424
7425 default:
7426 D2("rc_handler: invalid event %d\n", event->cm_type);
7427 break;
7428 }
7429 return (IBT_CM_DEFAULT);
7430 }
7431
7432 /*
7433 * creates an IA resource and inserts it into the global resource table.
7434 */
7435 /* ARGSUSED */
7436 static int
7437 daplka_ia_create(minor_t rnum, intptr_t arg, int mode,
7438 cred_t *cred, int *rvalp)
7439 {
7440 daplka_ia_resource_t *ia_rp, *tmp_rp;
7441 boolean_t inserted = B_FALSE;
7442 dapl_ia_create_t args;
7443 ibt_hca_hdl_t hca_hdl;
7444 ibt_status_t status;
7445 ib_gid_t sgid;
7446 int retval;
7447 ibt_hca_portinfo_t *pinfop;
7448 uint_t pinfon;
7449 uint_t size;
7450 ibt_ar_t ar_s;
7451 daplka_hca_t *hca;
7452
7453 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ia_create_t),
7454 mode);
7455 if (retval != 0) {
7456 DERR("ia_create: copyin error %d\n", retval);
7457 return (EFAULT);
7458 }
7459 if (args.ia_version != DAPL_IF_VERSION) {
7460 DERR("ia_create: invalid version %d, expected version %d\n",
7461 args.ia_version, DAPL_IF_VERSION);
7462 return (EINVAL);
7463 }
7464
7465 /*
7466 * find the hca with the matching guid
7467 */
7468 mutex_enter(&daplka_dev->daplka_mutex);
7469 for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
7470 hca = hca->hca_next) {
7471 if (hca->hca_guid == args.ia_guid) {
7472 DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca);
7473 break;
7474 }
7475 }
7476 mutex_exit(&daplka_dev->daplka_mutex);
7477
7478 if (hca == NULL) {
7479 DERR("ia_create: guid 0x%016llx not found\n",
7480 (longlong_t)args.ia_guid);
7481 return (EINVAL);
7482 }
7483
7484 /*
7485 * check whether port number is valid and whether it is up
7486 */
7487 if (args.ia_port > hca->hca_nports) {
7488 DERR("ia_create: invalid hca_port %d\n", args.ia_port);
7489 DAPLKA_RELE_HCA(daplka_dev, hca);
7490 return (EINVAL);
7491 }
7492 hca_hdl = hca->hca_hdl;
7493 if (hca_hdl == NULL) {
7494 DERR("ia_create: hca_hdl == NULL\n");
7495 DAPLKA_RELE_HCA(daplka_dev, hca);
7496 return (EINVAL);
7497 }
7498 status = ibt_query_hca_ports(hca_hdl, (uint8_t)args.ia_port,
7499 &pinfop, &pinfon, &size);
7500 if (status != IBT_SUCCESS) {
7501 DERR("ia_create: ibt_query_hca_ports returned %d\n", status);
7502 *rvalp = (int)status;
7503 DAPLKA_RELE_HCA(daplka_dev, hca);
7504 return (0);
7505 }
7506 sgid = pinfop->p_sgid_tbl[0];
7507 ibt_free_portinfo(pinfop, size);
7508
7509 ia_rp = kmem_zalloc(sizeof (daplka_ia_resource_t), daplka_km_flags);
7510 DAPLKA_RS_INIT(ia_rp, DAPL_TYPE_IA, rnum, daplka_ia_destroy);
7511
7512 mutex_init(&ia_rp->ia_lock, NULL, MUTEX_DRIVER, NULL);
7513 cv_init(&ia_rp->ia_cv, NULL, CV_DRIVER, NULL);
7514 ia_rp->ia_hca_hdl = hca_hdl;
7515 ia_rp->ia_hca_sgid = sgid;
7516 ia_rp->ia_hca = hca;
7517 ia_rp->ia_port_num = args.ia_port;
7518 ia_rp->ia_port_pkey = args.ia_pkey;
7519 ia_rp->ia_pid = ddi_get_pid();
7520 ia_rp->ia_async_evd_hkeys = NULL;
7521 ia_rp->ia_ar_registered = B_FALSE;
7522 bcopy(args.ia_sadata, ia_rp->ia_sadata, DAPL_ATS_NBYTES);
7523
7524 /* register Address Record */
7525 ar_s.ar_gid = ia_rp->ia_hca_sgid;
7526 ar_s.ar_pkey = ia_rp->ia_port_pkey;
7527 bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
7528 #define UC(b) ar_s.ar_data[(b)]
7529 D3("daplka_ia_create: SA[8] %d.%d.%d.%d\n",
7530 UC(8), UC(9), UC(10), UC(11));
7531 D3("daplka_ia_create: SA[12] %d.%d.%d.%d\n",
7532 UC(12), UC(13), UC(14), UC(15));
7533 retval = ibt_register_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
7534 if (retval != IBT_SUCCESS) {
7535 DERR("ia_create: failed to register Address Record.\n");
7536 retval = EINVAL;
7537 goto cleanup;
7538 }
7539 ia_rp->ia_ar_registered = B_TRUE;
7540
7541 /*
7542 * create hash tables for all object types
7543 */
7544 retval = daplka_hash_create(&ia_rp->ia_ep_htbl, DAPLKA_EP_HTBL_SZ,
7545 daplka_hash_ep_free, daplka_hash_generic_lookup);
7546 if (retval != 0) {
7547 DERR("ia_create: cannot create ep hash table\n");
7548 goto cleanup;
7549 }
7550 retval = daplka_hash_create(&ia_rp->ia_mr_htbl, DAPLKA_MR_HTBL_SZ,
7551 daplka_hash_mr_free, daplka_hash_generic_lookup);
7552 if (retval != 0) {
7553 DERR("ia_create: cannot create mr hash table\n");
7554 goto cleanup;
7555 }
7556 retval = daplka_hash_create(&ia_rp->ia_mw_htbl, DAPLKA_MW_HTBL_SZ,
7557 daplka_hash_mw_free, daplka_hash_generic_lookup);
7558 if (retval != 0) {
7559 DERR("ia_create: cannot create mw hash table\n");
7560 goto cleanup;
7561 }
7562 retval = daplka_hash_create(&ia_rp->ia_pd_htbl, DAPLKA_PD_HTBL_SZ,
7563 daplka_hash_pd_free, daplka_hash_generic_lookup);
7564 if (retval != 0) {
7565 DERR("ia_create: cannot create pd hash table\n");
7566 goto cleanup;
7567 }
7568 retval = daplka_hash_create(&ia_rp->ia_evd_htbl, DAPLKA_EVD_HTBL_SZ,
7569 daplka_hash_evd_free, daplka_hash_generic_lookup);
7570 if (retval != 0) {
7571 DERR("ia_create: cannot create evd hash table\n");
7572 goto cleanup;
7573 }
7574 retval = daplka_hash_create(&ia_rp->ia_cno_htbl, DAPLKA_CNO_HTBL_SZ,
7575 daplka_hash_cno_free, daplka_hash_generic_lookup);
7576 if (retval != 0) {
7577 DERR("ia_create: cannot create cno hash table\n");
7578 goto cleanup;
7579 }
7580 retval = daplka_hash_create(&ia_rp->ia_sp_htbl, DAPLKA_SP_HTBL_SZ,
7581 daplka_hash_sp_free, daplka_hash_generic_lookup);
7582 if (retval != 0) {
7583 DERR("ia_create: cannot create sp hash table\n");
7584 goto cleanup;
7585 }
7586 retval = daplka_hash_create(&ia_rp->ia_srq_htbl, DAPLKA_SRQ_HTBL_SZ,
7587 daplka_hash_srq_free, daplka_hash_generic_lookup);
7588 if (retval != 0) {
7589 DERR("ia_create: cannot create srq hash table\n");
7590 goto cleanup;
7591 }
7592 /*
7593 * insert ia_rp into the global resource table
7594 */
7595 retval = daplka_resource_insert(rnum, (daplka_resource_t *)ia_rp);
7596 if (retval != 0) {
7597 DERR("ia_create: cannot insert resource\n");
7598 goto cleanup;
7599 }
7600 inserted = B_TRUE;
7601
7602 args.ia_resnum = rnum;
7603 retval = copyout(&args, (void *)arg, sizeof (dapl_ia_create_t));
7604 if (retval != 0) {
7605 DERR("ia_create: copyout error %d\n", retval);
7606 retval = EFAULT;
7607 goto cleanup;
7608 }
7609 return (0);
7610
7611 cleanup:;
7612 if (inserted) {
7613 tmp_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);
7614 if (tmp_rp != ia_rp) {
7615 /*
7616 * we can return here because another thread must
7617 * have freed up the resource
7618 */
7619 DERR("ia_create: cannot remove resource\n");
7620 return (retval);
7621 }
7622 }
7623 DAPLKA_RS_UNREF(ia_rp);
7624 return (retval);
7625 }
7626
7627 /*
7628 * destroys an IA resource
7629 */
7630 static int
7631 daplka_ia_destroy(daplka_resource_t *gen_rp)
7632 {
7633 daplka_ia_resource_t *ia_rp = (daplka_ia_resource_t *)gen_rp;
7634 daplka_async_evd_hkey_t *hkp;
7635 int cnt;
7636 ibt_ar_t ar_s;
7637
7638 D3("ia_destroy: entering, ia_rp 0x%p\n", ia_rp);
7639
7640 /* deregister Address Record */
7641 if (ia_rp->ia_ar_registered) {
7642 ar_s.ar_gid = ia_rp->ia_hca_sgid;
7643 ar_s.ar_pkey = ia_rp->ia_port_pkey;
7644 bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
7645 (void) ibt_deregister_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
7646 ia_rp->ia_ar_registered = B_FALSE;
7647 }
7648
7649 /*
7650 * destroy hash tables. make sure resources are
7651 * destroyed in the correct order.
7652 */
7653 daplka_hash_destroy(&ia_rp->ia_mw_htbl);
7654 daplka_hash_destroy(&ia_rp->ia_mr_htbl);
7655 daplka_hash_destroy(&ia_rp->ia_ep_htbl);
7656 daplka_hash_destroy(&ia_rp->ia_srq_htbl);
7657 daplka_hash_destroy(&ia_rp->ia_evd_htbl);
7658 daplka_hash_destroy(&ia_rp->ia_cno_htbl);
7659 daplka_hash_destroy(&ia_rp->ia_pd_htbl);
7660 daplka_hash_destroy(&ia_rp->ia_sp_htbl);
7661
7662 /*
7663 * free the async evd list
7664 */
7665 cnt = 0;
7666 hkp = ia_rp->ia_async_evd_hkeys;
7667 while (hkp != NULL) {
7668 daplka_async_evd_hkey_t *free_hkp;
7669
7670 cnt++;
7671 free_hkp = hkp;
7672 hkp = hkp->aeh_next;
7673 kmem_free(free_hkp, sizeof (*free_hkp));
7674 }
7675 if (cnt > 0) {
7676 D3("ia_destroy: freed %d hkeys\n", cnt);
7677 }
7678 mutex_destroy(&ia_rp->ia_lock);
7679 cv_destroy(&ia_rp->ia_cv);
7680 ia_rp->ia_hca_hdl = NULL;
7681
7682 DAPLKA_RS_FINI(ia_rp);
7683
7684 if (ia_rp->ia_hca)
7685 DAPLKA_RELE_HCA(daplka_dev, ia_rp->ia_hca);
7686
7687 kmem_free(ia_rp, sizeof (daplka_ia_resource_t));
7688 D3("ia_destroy: exiting, ia_rp 0x%p\n", ia_rp);
7689 return (0);
7690 }
7691
7692 static void
7693 daplka_async_event_create(ibt_async_code_t code, ibt_async_event_t *event,
7694 uint64_t cookie, daplka_ia_resource_t *ia_rp)
7695 {
7696 daplka_evd_event_t *evp;
7697 daplka_evd_resource_t *async_evd;
7698 daplka_async_evd_hkey_t *curr;
7699
7700 mutex_enter(&ia_rp->ia_lock);
7701 curr = ia_rp->ia_async_evd_hkeys;
7702 while (curr != NULL) {
7703 /*
7704 * Note: this allocation does not zero out the buffer
7705 * since we init all the fields.
7706 */
7707 evp = kmem_alloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7708 if (evp == NULL) {
7709 DERR("async_event_enqueue: event alloc failed"
7710 "!found\n", ia_rp, curr->aeh_evd_hkey);
7711 curr = curr->aeh_next;
7712 continue;
7713 }
7714 evp->ee_next = NULL;
7715 evp->ee_aev.ibae_type = code;
7716 evp->ee_aev.ibae_hca_guid = event->ev_hca_guid;
7717 evp->ee_aev.ibae_cookie = cookie;
7718 evp->ee_aev.ibae_port = event->ev_port;
7719
7720 /*
7721 * Lookup the async evd corresponding to this ia and enqueue
7722 * evp and wakeup any waiter.
7723 */
7724 async_evd = (daplka_evd_resource_t *)
7725 daplka_hash_lookup(&ia_rp->ia_evd_htbl, curr->aeh_evd_hkey);
7726 if (async_evd == NULL) { /* async evd is being freed */
7727 DERR("async_event_enqueue: ia_rp(%p) asycn_evd %llx "
7728 "!found\n", ia_rp, (longlong_t)curr->aeh_evd_hkey);
7729 kmem_free(evp, sizeof (daplka_evd_event_t));
7730 curr = curr->aeh_next;
7731 continue;
7732 }
7733 daplka_evd_wakeup(async_evd, &async_evd->evd_async_events, evp);
7734
7735 /* decrement refcnt on async_evd */
7736 DAPLKA_RS_UNREF(async_evd);
7737 curr = curr->aeh_next;
7738 }
7739 mutex_exit(&ia_rp->ia_lock);
7740 }
7741 /*
7742 * This routine is called in kernel context
7743 */
7744
7745 /* ARGSUSED */
7746 static void
7747 daplka_rc_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7748 ibt_async_code_t code, ibt_async_event_t *event)
7749 {
7750 daplka_ep_resource_t *epp;
7751 daplka_ia_resource_t *ia_rp;
7752 minor_t ia_rnum;
7753
7754 if (event->ev_chan_hdl == NULL) {
7755 DERR("daplka_rc_async_handler: ev_chan_hdl is NULL\n");
7756 return;
7757 }
7758
7759 mutex_enter(&daplka_dev->daplka_mutex);
7760 epp = ibt_get_chan_private(event->ev_chan_hdl);
7761 if (epp == NULL) {
7762 mutex_exit(&daplka_dev->daplka_mutex);
7763 DERR("daplka_rc_async_handler: chan_private is NULL\n");
7764 return;
7765 }
7766
7767 /* grab a reference to this ep */
7768 DAPLKA_RS_REF(epp);
7769 mutex_exit(&daplka_dev->daplka_mutex);
7770
7771 /*
7772 * The endpoint resource has the resource number corresponding to
7773 * the IA resource. Use that to lookup the ia resource entry
7774 */
7775 ia_rnum = DAPLKA_RS_RNUM(epp);
7776 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
7777 if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
7778 D2("daplka_rc_async_handler: resource (%d) not found\n",
7779 ia_rnum);
7780 DAPLKA_RS_UNREF(epp);
7781 return;
7782 }
7783
7784 /*
7785 * Create an async event and chain it to the async evd
7786 */
7787 daplka_async_event_create(code, event, epp->ep_cookie, ia_rp);
7788
7789 DAPLKA_RS_UNREF(ia_rp);
7790 DAPLKA_RS_UNREF(epp);
7791 }
7792
7793 /*
7794 * This routine is called in kernel context
7795 */
7796
7797 /* ARGSUSED */
7798 static void
7799 daplka_cq_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7800 ibt_async_code_t code, ibt_async_event_t *event)
7801 {
7802 daplka_evd_resource_t *evdp;
7803 daplka_ia_resource_t *ia_rp;
7804 minor_t ia_rnum;
7805
7806 if (event->ev_cq_hdl == NULL)
7807 return;
7808
7809 mutex_enter(&daplka_dev->daplka_mutex);
7810 evdp = ibt_get_cq_private(event->ev_cq_hdl);
7811 if (evdp == NULL) {
7812 mutex_exit(&daplka_dev->daplka_mutex);
7813 DERR("daplka_cq_async_handler: get cq private(%p) failed\n",
7814 event->ev_cq_hdl);
7815 return;
7816 }
7817 /* grab a reference to this evd resource */
7818 DAPLKA_RS_REF(evdp);
7819 mutex_exit(&daplka_dev->daplka_mutex);
7820
7821 /*
7822 * The endpoint resource has the resource number corresponding to
7823 * the IA resource. Use that to lookup the ia resource entry
7824 */
7825 ia_rnum = DAPLKA_RS_RNUM(evdp);
7826 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
7827 if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
7828 DERR("daplka_cq_async_handler: resource (%d) not found\n",
7829 ia_rnum);
7830 DAPLKA_RS_UNREF(evdp);
7831 return;
7832 }
7833
7834 /*
7835 * Create an async event and chain it to the async evd
7836 */
7837 daplka_async_event_create(code, event, evdp->evd_cookie, ia_rp);
7838
7839 /* release all the refcount that were acquired */
7840 DAPLKA_RS_UNREF(ia_rp);
7841 DAPLKA_RS_UNREF(evdp);
7842 }
7843
7844 /*
7845 * This routine is called in kernel context, handles unaffiliated async errors
7846 */
7847
7848 /* ARGSUSED */
7849 static void
7850 daplka_un_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7851 ibt_async_code_t code, ibt_async_event_t *event)
7852 {
7853 int i, j;
7854 daplka_resource_blk_t *blk;
7855 daplka_resource_t *rp;
7856 daplka_ia_resource_t *ia_rp;
7857
7858 /*
7859 * Walk the resource table looking for an ia that matches the
7860 * hca_hdl.
7861 */
7862 rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
7863 for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
7864 blk = daplka_resource.daplka_rc_root[i];
7865 if (blk == NULL)
7866 continue;
7867 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
7868 rp = blk->daplka_rcblk_blks[j];
7869 if ((rp == NULL) ||
7870 ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
7871 (rp->rs_type != DAPL_TYPE_IA)) {
7872 continue;
7873 }
7874 /*
7875 * rp is an IA resource check if it belongs
7876 * to the hca/port for which we got the event
7877 */
7878 ia_rp = (daplka_ia_resource_t *)rp;
7879 DAPLKA_RS_REF(ia_rp);
7880 if ((hca_hdl == ia_rp->ia_hca_hdl) &&
7881 (event->ev_port == ia_rp->ia_port_num)) {
7882 /*
7883 * walk the ep hash table. Acquire a
7884 * reader lock. NULL dgid indicates
7885 * local port up event.
7886 */
7887 daplka_hash_walk(&ia_rp->ia_ep_htbl,
7888 daplka_ep_failback, NULL, RW_READER);
7889 }
7890 DAPLKA_RS_UNREF(ia_rp);
7891 }
7892 }
7893 rw_exit(&daplka_resource.daplka_rct_lock);
7894 }
7895
7896 static int
7897 daplka_handle_hca_detach_event(ibt_async_event_t *event)
7898 {
7899 daplka_hca_t *hca;
7900
7901 /*
7902 * find the hca with the matching guid
7903 */
7904 mutex_enter(&daplka_dev->daplka_mutex);
7905 for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
7906 hca = hca->hca_next) {
7907 if (hca->hca_guid == event->ev_hca_guid) {
7908 if (DAPLKA_HCA_BUSY(hca)) {
7909 mutex_exit(&daplka_dev->daplka_mutex);
7910 return (IBT_HCA_RESOURCES_NOT_FREED);
7911 }
7912 daplka_dequeue_hca(daplka_dev, hca);
7913 break;
7914 }
7915 }
7916 mutex_exit(&daplka_dev->daplka_mutex);
7917
7918 if (hca == NULL)
7919 return (IBT_FAILURE);
7920
7921 return (daplka_fini_hca(daplka_dev, hca));
7922 }
7923
7924 /*
7925 * This routine is called in kernel context
7926 */
7927 static void
7928 daplka_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7929 ibt_async_code_t code, ibt_async_event_t *event)
7930 {
7931 switch (code) {
7932 case IBT_ERROR_CATASTROPHIC_CHAN:
7933 case IBT_ERROR_INVALID_REQUEST_CHAN:
7934 case IBT_ERROR_ACCESS_VIOLATION_CHAN:
7935 case IBT_ERROR_PATH_MIGRATE_REQ:
7936 D2("daplka_async_handler(): Channel affiliated=0x%x\n", code);
7937 /* These events are affiliated with a the RC channel */
7938 daplka_rc_async_handler(clnt_private, hca_hdl, code, event);
7939 break;
7940 case IBT_ERROR_CQ:
7941 /* This event is affiliated with a the CQ */
7942 D2("daplka_async_handler(): IBT_ERROR_CQ\n");
7943 daplka_cq_async_handler(clnt_private, hca_hdl, code, event);
7944 break;
7945 case IBT_ERROR_PORT_DOWN:
7946 D2("daplka_async_handler(): IBT_PORT_DOWN\n");
7947 break;
7948 case IBT_EVENT_PORT_UP:
7949 D2("daplka_async_handler(): IBT_PORT_UP\n");
7950 if (daplka_apm) {
7951 daplka_un_async_handler(clnt_private, hca_hdl, code,
7952 event);
7953 }
7954 break;
7955 case IBT_HCA_ATTACH_EVENT:
7956 /*
7957 * NOTE: In some error recovery paths, it is possible to
7958 * receive IBT_HCA_ATTACH_EVENTs on already known HCAs.
7959 */
7960 D2("daplka_async_handler(): IBT_HCA_ATTACH\n");
7961 (void) daplka_init_hca(daplka_dev, event->ev_hca_guid);
7962 break;
7963 case IBT_HCA_DETACH_EVENT:
7964 D2("daplka_async_handler(): IBT_HCA_DETACH\n");
7965 /* Free all hca resources and close the HCA. */
7966 (void) daplka_handle_hca_detach_event(event);
7967 break;
7968 case IBT_EVENT_PATH_MIGRATED:
7969 /* This event is affiliated with APM */
7970 D2("daplka_async_handler(): IBT_PATH_MIGRATED.\n");
7971 break;
7972 default:
7973 D2("daplka_async_handler(): unhandled code = 0x%x\n", code);
7974 break;
7975 }
7976 }
7977
7978 /*
7979 * This routine is called in kernel context related to Subnet events
7980 */
7981 /*ARGSUSED*/
7982 static void
7983 daplka_sm_notice_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t code,
7984 ibt_subnet_event_t *event)
7985 {
7986 ib_gid_t *sgid = &gid;
7987 ib_gid_t *dgid;
7988
7989 dgid = &event->sm_notice_gid;
7990 switch (code) {
7991 case IBT_SM_EVENT_GID_AVAIL:
7992 /* This event is affiliated with remote port up */
7993 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_AVAIL\n");
7994 if (daplka_apm)
7995 daplka_sm_gid_avail(sgid, dgid);
7996 return;
7997 case IBT_SM_EVENT_GID_UNAVAIL:
7998 /* This event is affiliated with remote port down */
7999 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_UNAVAIL\n");
8000 return;
8001 default:
8002 D2("daplka_sm_notice_handler(): unhandled IBT_SM_EVENT_[%d]\n",
8003 code);
8004 return;
8005 }
8006 }
8007
8008 /*
8009 * This routine is called in kernel context, handles Subnet GID avail events
8010 * which correspond to remote port up. Setting up alternate path or path
8011 * migration (failback) has to be initiated from the active side of the
8012 * original connect.
8013 */
8014 static void
8015 daplka_sm_gid_avail(ib_gid_t *sgid, ib_gid_t *dgid)
8016 {
8017 int i, j;
8018 daplka_resource_blk_t *blk;
8019 daplka_resource_t *rp;
8020 daplka_ia_resource_t *ia_rp;
8021
8022 D2("daplka_sm_gid_avail: sgid=%llx:%llx dgid=%llx:%llx\n",
8023 (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid,
8024 (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);
8025
8026 /*
8027 * Walk the resource table looking for an ia that matches the sgid
8028 */
8029 rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
8030 for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
8031 blk = daplka_resource.daplka_rc_root[i];
8032 if (blk == NULL)
8033 continue;
8034 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
8035 rp = blk->daplka_rcblk_blks[j];
8036 if ((rp == NULL) ||
8037 ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
8038 (rp->rs_type != DAPL_TYPE_IA)) {
8039 continue;
8040 }
8041 /*
8042 * rp is an IA resource check if its gid
8043 * matches with the calling sgid
8044 */
8045 ia_rp = (daplka_ia_resource_t *)rp;
8046 DAPLKA_RS_REF(ia_rp);
8047 if ((sgid->gid_prefix ==
8048 ia_rp->ia_hca_sgid.gid_prefix) &&
8049 (sgid->gid_guid == ia_rp->ia_hca_sgid.gid_guid)) {
8050 /*
8051 * walk the ep hash table. Acquire a
8052 * reader lock.
8053 */
8054 daplka_hash_walk(&ia_rp->ia_ep_htbl,
8055 daplka_ep_failback,
8056 (void *)dgid, RW_READER);
8057 }
8058 DAPLKA_RS_UNREF(ia_rp);
8059 }
8060 }
8061 rw_exit(&daplka_resource.daplka_rct_lock);
8062 }
8063
8064 /*
8065 * This routine is called in kernel context to get and set an alternate path
8066 */
8067 static int
8068 daplka_ep_altpath(daplka_ep_resource_t *ep_rp, ib_gid_t *dgid)
8069 {
8070 ibt_alt_path_info_t path_info;
8071 ibt_alt_path_attr_t path_attr;
8072 ibt_ap_returns_t ap_rets;
8073 ibt_status_t status;
8074
8075 D2("daplka_ep_altpath : ibt_get_alt_path()\n");
8076 bzero(&path_info, sizeof (ibt_alt_path_info_t));
8077 bzero(&path_attr, sizeof (ibt_alt_path_attr_t));
8078 if (dgid != NULL) {
8079 path_attr.apa_sgid = ep_rp->ep_sgid;
8080 path_attr.apa_dgid = *dgid;
8081 }
8082 status = ibt_get_alt_path(ep_rp->ep_chan_hdl, IBT_PATH_AVAIL,
8083 &path_attr, &path_info);
8084 if (status != IBT_SUCCESS) {
8085 DERR("daplka_ep_altpath : ibt_get_alt_path failed %d\n",
8086 status);
8087 return (1);
8088 }
8089
8090 D2("daplka_ep_altpath : ibt_set_alt_path()\n");
8091 bzero(&ap_rets, sizeof (ibt_ap_returns_t));
8092 status = ibt_set_alt_path(ep_rp->ep_chan_hdl, IBT_BLOCKING,
8093 &path_info, NULL, 0, &ap_rets);
8094 if ((status != IBT_SUCCESS) ||
8095 (ap_rets.ap_status != IBT_CM_AP_LOADED)) {
8096 DERR("daplka_ep_altpath : ibt_set_alt_path failed "
8097 "status %d ap_status %d\n", status, ap_rets.ap_status);
8098 return (1);
8099 }
8100 return (0);
8101 }
8102
8103 /*
8104 * This routine is called in kernel context to failback to the original path
8105 */
8106 static int
8107 daplka_ep_failback(void *objp, void *arg)
8108 {
8109 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)objp;
8110 ib_gid_t *dgid;
8111 ibt_status_t status;
8112 ibt_rc_chan_query_attr_t chan_attrs;
8113 int i;
8114
8115 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
8116 D2("daplka_ep_failback ep : sgid=%llx:%llx dgid=%llx:%llx\n",
8117 (longlong_t)ep_rp->ep_sgid.gid_prefix,
8118 (longlong_t)ep_rp->ep_sgid.gid_guid,
8119 (longlong_t)ep_rp->ep_dgid.gid_prefix,
8120 (longlong_t)ep_rp->ep_dgid.gid_guid);
8121
8122 /*
8123 * daplka_ep_failback is called from daplka_hash_walk
8124 * which holds the read lock on hash table to protect
8125 * the endpoint resource from removal
8126 */
8127 mutex_enter(&ep_rp->ep_lock);
8128 /* check for unconnected endpoints */
8129 /* first check for ep state */
8130 if (ep_rp->ep_state != DAPLKA_EP_STATE_CONNECTED) {
8131 mutex_exit(&ep_rp->ep_lock);
8132 D2("daplka_ep_failback : endpoints not connected\n");
8133 return (0);
8134 }
8135
8136 /* second check for gids */
8137 if (((ep_rp->ep_sgid.gid_prefix == 0) &&
8138 (ep_rp->ep_sgid.gid_guid == 0)) ||
8139 ((ep_rp->ep_dgid.gid_prefix == 0) &&
8140 (ep_rp->ep_dgid.gid_guid == 0))) {
8141 mutex_exit(&ep_rp->ep_lock);
8142 D2("daplka_ep_failback : skip unconnected endpoints\n");
8143 return (0);
8144 }
8145
8146 /*
8147 * matching destination ep
8148 * when dgid is NULL, the async event is a local port up.
8149 * dgid becomes wild card, i.e. all endpoints match
8150 */
8151 dgid = (ib_gid_t *)arg;
8152 if (dgid == NULL) {
8153 /* ignore loopback ep */
8154 if ((ep_rp->ep_sgid.gid_prefix == ep_rp->ep_dgid.gid_prefix) &&
8155 (ep_rp->ep_sgid.gid_guid == ep_rp->ep_dgid.gid_guid)) {
8156 mutex_exit(&ep_rp->ep_lock);
8157 D2("daplka_ep_failback : skip loopback endpoints\n");
8158 return (0);
8159 }
8160 } else {
8161 /* matching remote ep */
8162 if ((ep_rp->ep_dgid.gid_prefix != dgid->gid_prefix) ||
8163 (ep_rp->ep_dgid.gid_guid != dgid->gid_guid)) {
8164 mutex_exit(&ep_rp->ep_lock);
8165 D2("daplka_ep_failback : unrelated endpoints\n");
8166 return (0);
8167 }
8168 }
8169
8170 /* call get and set altpath with original dgid used in ep_connect */
8171 if (daplka_ep_altpath(ep_rp, &ep_rp->ep_dgid)) {
8172 mutex_exit(&ep_rp->ep_lock);
8173 return (0);
8174 }
8175
8176 /*
8177 * wait for migration state to be ARMed
8178 * e.g. a post_send msg will transit mig_state from REARM to ARM
8179 */
8180 for (i = 0; i < daplka_query_aft_setaltpath; i++) {
8181 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
8182 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
8183 if (status != IBT_SUCCESS) {
8184 mutex_exit(&ep_rp->ep_lock);
8185 DERR("daplka_ep_altpath : ibt_query_rc_channel err\n");
8186 return (0);
8187 }
8188 if (chan_attrs.rc_mig_state == IBT_STATE_ARMED)
8189 break;
8190 }
8191
8192 D2("daplka_ep_altpath : query[%d] mig_st=%d\n",
8193 i, chan_attrs.rc_mig_state);
8194 D2("daplka_ep_altpath : P sgid=%llx:%llx dgid=%llx:%llx\n",
8195 (longlong_t)
8196 chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_prefix,
8197 (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_guid,
8198 (longlong_t)
8199 chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_prefix,
8200 (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_guid);
8201 D2("daplka_ep_altpath : A sgid=%llx:%llx dgid=%llx:%llx\n",
8202 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_prefix,
8203 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_guid,
8204 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_prefix,
8205 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_guid);
8206
8207 /* skip failback on ARMed state not reached or env override */
8208 if ((i >= daplka_query_aft_setaltpath) || (daplka_failback == 0)) {
8209 mutex_exit(&ep_rp->ep_lock);
8210 DERR("daplka_ep_altpath : ARMed state not reached\n");
8211 return (0);
8212 }
8213
8214 D2("daplka_ep_failback : ibt_migrate_path() to original ep\n");
8215 status = ibt_migrate_path(ep_rp->ep_chan_hdl);
8216 if (status != IBT_SUCCESS) {
8217 mutex_exit(&ep_rp->ep_lock);
8218 DERR("daplka_ep_failback : migration failed "
8219 "status %d\n", status);
8220 return (0);
8221 }
8222
8223 /* call get and altpath with NULL dgid to indicate unspecified dgid */
8224 (void) daplka_ep_altpath(ep_rp, NULL);
8225 mutex_exit(&ep_rp->ep_lock);
8226 return (0);
8227 }
8228
8229 /*
8230 * IBTF wrappers used for resource accounting
8231 */
8232 static ibt_status_t
8233 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *ep_rp, ibt_hca_hdl_t hca_hdl,
8234 ibt_chan_alloc_flags_t flags, ibt_rc_chan_alloc_args_t *args,
8235 ibt_channel_hdl_t *chan_hdl_p, ibt_chan_sizes_t *sizes)
8236 {
8237 daplka_hca_t *hca_p;
8238 uint32_t max_qps;
8239 boolean_t acct_enabled;
8240 ibt_status_t status;
8241
8242 acct_enabled = daplka_accounting_enabled;
8243 hca_p = ep_rp->ep_hca;
8244 max_qps = daplka_max_qp_percent * hca_p->hca_attr.hca_max_chans / 100;
8245
8246 if (acct_enabled) {
8247 if (daplka_max_qp_percent != 0 &&
8248 max_qps <= hca_p->hca_qp_count) {
8249 DERR("ibt_alloc_rc_channel: resource limit exceeded "
8250 "(limit %d, count %d)\n", max_qps,
8251 hca_p->hca_qp_count);
8252 return (IBT_INSUFF_RESOURCE);
8253 }
8254 DAPLKA_RS_ACCT_INC(ep_rp, 1);
8255 atomic_inc_32(&hca_p->hca_qp_count);
8256 }
8257 status = ibt_alloc_rc_channel(hca_hdl, flags, args, chan_hdl_p, sizes);
8258
8259 if (status != IBT_SUCCESS && acct_enabled) {
8260 DAPLKA_RS_ACCT_DEC(ep_rp, 1);
8261 atomic_dec_32(&hca_p->hca_qp_count);
8262 }
8263 return (status);
8264 }
8265
8266 static ibt_status_t
8267 daplka_ibt_free_channel(daplka_ep_resource_t *ep_rp, ibt_channel_hdl_t chan_hdl)
8268 {
8269 daplka_hca_t *hca_p;
8270 ibt_status_t status;
8271
8272 hca_p = ep_rp->ep_hca;
8273
8274 status = ibt_free_channel(chan_hdl);
8275 if (status != IBT_SUCCESS) {
8276 return (status);
8277 }
8278 if (DAPLKA_RS_ACCT_CHARGED(ep_rp) > 0) {
8279 DAPLKA_RS_ACCT_DEC(ep_rp, 1);
8280 atomic_dec_32(&hca_p->hca_qp_count);
8281 }
8282 return (status);
8283 }
8284
8285 static ibt_status_t
8286 daplka_ibt_alloc_cq(daplka_evd_resource_t *evd_rp, ibt_hca_hdl_t hca_hdl,
8287 ibt_cq_attr_t *cq_attr, ibt_cq_hdl_t *ibt_cq_p, uint32_t *real_size)
8288 {
8289 daplka_hca_t *hca_p;
8290 uint32_t max_cqs;
8291 boolean_t acct_enabled;
8292 ibt_status_t status;
8293
8294 acct_enabled = daplka_accounting_enabled;
8295 hca_p = evd_rp->evd_hca;
8296 max_cqs = daplka_max_cq_percent * hca_p->hca_attr.hca_max_cq / 100;
8297
8298 if (acct_enabled) {
8299 if (daplka_max_cq_percent != 0 &&
8300 max_cqs <= hca_p->hca_cq_count) {
8301 DERR("ibt_alloc_cq: resource limit exceeded "
8302 "(limit %d, count %d)\n", max_cqs,
8303 hca_p->hca_cq_count);
8304 return (IBT_INSUFF_RESOURCE);
8305 }
8306 DAPLKA_RS_ACCT_INC(evd_rp, 1);
8307 atomic_inc_32(&hca_p->hca_cq_count);
8308 }
8309 status = ibt_alloc_cq(hca_hdl, cq_attr, ibt_cq_p, real_size);
8310
8311 if (status != IBT_SUCCESS && acct_enabled) {
8312 DAPLKA_RS_ACCT_DEC(evd_rp, 1);
8313 atomic_dec_32(&hca_p->hca_cq_count);
8314 }
8315 return (status);
8316 }
8317
8318 static ibt_status_t
8319 daplka_ibt_free_cq(daplka_evd_resource_t *evd_rp, ibt_cq_hdl_t cq_hdl)
8320 {
8321 daplka_hca_t *hca_p;
8322 ibt_status_t status;
8323
8324 hca_p = evd_rp->evd_hca;
8325
8326 status = ibt_free_cq(cq_hdl);
8327 if (status != IBT_SUCCESS) {
8328 return (status);
8329 }
8330 if (DAPLKA_RS_ACCT_CHARGED(evd_rp) > 0) {
8331 DAPLKA_RS_ACCT_DEC(evd_rp, 1);
8332 atomic_dec_32(&hca_p->hca_cq_count);
8333 }
8334 return (status);
8335 }
8336
8337 static ibt_status_t
8338 daplka_ibt_alloc_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
8339 ibt_pd_flags_t flags, ibt_pd_hdl_t *pd_hdl_p)
8340 {
8341 daplka_hca_t *hca_p;
8342 uint32_t max_pds;
8343 boolean_t acct_enabled;
8344 ibt_status_t status;
8345
8346 acct_enabled = daplka_accounting_enabled;
8347 hca_p = pd_rp->pd_hca;
8348 max_pds = daplka_max_pd_percent * hca_p->hca_attr.hca_max_pd / 100;
8349
8350 if (acct_enabled) {
8351 if (daplka_max_pd_percent != 0 &&
8352 max_pds <= hca_p->hca_pd_count) {
8353 DERR("ibt_alloc_pd: resource limit exceeded "
8354 "(limit %d, count %d)\n", max_pds,
8355 hca_p->hca_pd_count);
8356 return (IBT_INSUFF_RESOURCE);
8357 }
8358 DAPLKA_RS_ACCT_INC(pd_rp, 1);
8359 atomic_inc_32(&hca_p->hca_pd_count);
8360 }
8361 status = ibt_alloc_pd(hca_hdl, flags, pd_hdl_p);
8362
8363 if (status != IBT_SUCCESS && acct_enabled) {
8364 DAPLKA_RS_ACCT_DEC(pd_rp, 1);
8365 atomic_dec_32(&hca_p->hca_pd_count);
8366 }
8367 return (status);
8368 }
8369
8370 static ibt_status_t
8371 daplka_ibt_free_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
8372 ibt_pd_hdl_t pd_hdl)
8373 {
8374 daplka_hca_t *hca_p;
8375 ibt_status_t status;
8376
8377 hca_p = pd_rp->pd_hca;
8378
8379 status = ibt_free_pd(hca_hdl, pd_hdl);
8380 if (status != IBT_SUCCESS) {
8381 return (status);
8382 }
8383 if (DAPLKA_RS_ACCT_CHARGED(pd_rp) > 0) {
8384 DAPLKA_RS_ACCT_DEC(pd_rp, 1);
8385 atomic_dec_32(&hca_p->hca_pd_count);
8386 }
8387 return (status);
8388 }
8389
8390 static ibt_status_t
8391 daplka_ibt_alloc_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
8392 ibt_pd_hdl_t pd_hdl, ibt_mw_flags_t flags, ibt_mw_hdl_t *mw_hdl_p,
8393 ibt_rkey_t *rkey_p)
8394 {
8395 daplka_hca_t *hca_p;
8396 uint32_t max_mws;
8397 boolean_t acct_enabled;
8398 ibt_status_t status;
8399
8400 acct_enabled = daplka_accounting_enabled;
8401 hca_p = mw_rp->mw_hca;
8402 max_mws = daplka_max_mw_percent * hca_p->hca_attr.hca_max_mem_win / 100;
8403
8404 if (acct_enabled) {
8405 if (daplka_max_mw_percent != 0 &&
8406 max_mws <= hca_p->hca_mw_count) {
8407 DERR("ibt_alloc_mw: resource limit exceeded "
8408 "(limit %d, count %d)\n", max_mws,
8409 hca_p->hca_mw_count);
8410 return (IBT_INSUFF_RESOURCE);
8411 }
8412 DAPLKA_RS_ACCT_INC(mw_rp, 1);
8413 atomic_inc_32(&hca_p->hca_mw_count);
8414 }
8415 status = ibt_alloc_mw(hca_hdl, pd_hdl, flags, mw_hdl_p, rkey_p);
8416
8417 if (status != IBT_SUCCESS && acct_enabled) {
8418 DAPLKA_RS_ACCT_DEC(mw_rp, 1);
8419 atomic_dec_32(&hca_p->hca_mw_count);
8420 }
8421 return (status);
8422 }
8423
8424 static ibt_status_t
8425 daplka_ibt_free_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
8426 ibt_mw_hdl_t mw_hdl)
8427 {
8428 daplka_hca_t *hca_p;
8429 ibt_status_t status;
8430
8431 hca_p = mw_rp->mw_hca;
8432
8433 status = ibt_free_mw(hca_hdl, mw_hdl);
8434 if (status != IBT_SUCCESS) {
8435 return (status);
8436 }
8437 if (DAPLKA_RS_ACCT_CHARGED(mw_rp) > 0) {
8438 DAPLKA_RS_ACCT_DEC(mw_rp, 1);
8439 atomic_dec_32(&hca_p->hca_mw_count);
8440 }
8441 return (status);
8442 }
8443
8444 static ibt_status_t
8445 daplka_ibt_register_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
8446 ibt_pd_hdl_t pd_hdl, ibt_mr_attr_t *mr_attr, ibt_mr_hdl_t *mr_hdl_p,
8447 ibt_mr_desc_t *mr_desc_p)
8448 {
8449 daplka_hca_t *hca_p;
8450 uint32_t max_mrs;
8451 boolean_t acct_enabled;
8452 ibt_status_t status;
8453
8454 acct_enabled = daplka_accounting_enabled;
8455 hca_p = mr_rp->mr_hca;
8456 max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;
8457
8458 if (acct_enabled) {
8459 if (daplka_max_mr_percent != 0 &&
8460 max_mrs <= hca_p->hca_mr_count) {
8461 DERR("ibt_register_mr: resource limit exceeded "
8462 "(limit %d, count %d)\n", max_mrs,
8463 hca_p->hca_mr_count);
8464 return (IBT_INSUFF_RESOURCE);
8465 }
8466 DAPLKA_RS_ACCT_INC(mr_rp, 1);
8467 atomic_inc_32(&hca_p->hca_mr_count);
8468 }
8469 status = ibt_register_mr(hca_hdl, pd_hdl, mr_attr, mr_hdl_p, mr_desc_p);
8470
8471 if (status != IBT_SUCCESS && acct_enabled) {
8472 DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8473 atomic_dec_32(&hca_p->hca_mr_count);
8474 }
8475 return (status);
8476 }
8477
8478 static ibt_status_t
8479 daplka_ibt_register_shared_mr(daplka_mr_resource_t *mr_rp,
8480 ibt_hca_hdl_t hca_hdl, ibt_mr_hdl_t mr_hdl, ibt_pd_hdl_t pd_hdl,
8481 ibt_smr_attr_t *smr_attr_p, ibt_mr_hdl_t *mr_hdl_p,
8482 ibt_mr_desc_t *mr_desc_p)
8483 {
8484 daplka_hca_t *hca_p;
8485 uint32_t max_mrs;
8486 boolean_t acct_enabled;
8487 ibt_status_t status;
8488
8489 acct_enabled = daplka_accounting_enabled;
8490 hca_p = mr_rp->mr_hca;
8491 max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;
8492
8493 if (acct_enabled) {
8494 if (daplka_max_mr_percent != 0 &&
8495 max_mrs <= hca_p->hca_mr_count) {
8496 DERR("ibt_register_shared_mr: resource limit exceeded "
8497 "(limit %d, count %d)\n", max_mrs,
8498 hca_p->hca_mr_count);
8499 return (IBT_INSUFF_RESOURCE);
8500 }
8501 DAPLKA_RS_ACCT_INC(mr_rp, 1);
8502 atomic_inc_32(&hca_p->hca_mr_count);
8503 }
8504 status = ibt_register_shared_mr(hca_hdl, mr_hdl, pd_hdl,
8505 smr_attr_p, mr_hdl_p, mr_desc_p);
8506
8507 if (status != IBT_SUCCESS && acct_enabled) {
8508 DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8509 atomic_dec_32(&hca_p->hca_mr_count);
8510 }
8511 return (status);
8512 }
8513
8514 static ibt_status_t
8515 daplka_ibt_deregister_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
8516 ibt_mr_hdl_t mr_hdl)
8517 {
8518 daplka_hca_t *hca_p;
8519 ibt_status_t status;
8520
8521 hca_p = mr_rp->mr_hca;
8522
8523 status = ibt_deregister_mr(hca_hdl, mr_hdl);
8524 if (status != IBT_SUCCESS) {
8525 return (status);
8526 }
8527 if (DAPLKA_RS_ACCT_CHARGED(mr_rp) > 0) {
8528 DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8529 atomic_dec_32(&hca_p->hca_mr_count);
8530 }
8531 return (status);
8532 }
8533
8534 static ibt_status_t
8535 daplka_ibt_alloc_srq(daplka_srq_resource_t *srq_rp, ibt_hca_hdl_t hca_hdl,
8536 ibt_srq_flags_t flags, ibt_pd_hdl_t pd, ibt_srq_sizes_t *reqsz,
8537 ibt_srq_hdl_t *srq_hdl_p, ibt_srq_sizes_t *realsz)
8538 {
8539 daplka_hca_t *hca_p;
8540 uint32_t max_srqs;
8541 boolean_t acct_enabled;
8542 ibt_status_t status;
8543
8544 acct_enabled = daplka_accounting_enabled;
8545 hca_p = srq_rp->srq_hca;
8546 max_srqs = daplka_max_srq_percent * hca_p->hca_attr.hca_max_srqs / 100;
8547
8548 if (acct_enabled) {
8549 if (daplka_max_srq_percent != 0 &&
8550 max_srqs <= hca_p->hca_srq_count) {
8551 DERR("ibt_alloc_srq: resource limit exceeded "
8552 "(limit %d, count %d)\n", max_srqs,
8553 hca_p->hca_srq_count);
8554 return (IBT_INSUFF_RESOURCE);
8555 }
8556 DAPLKA_RS_ACCT_INC(srq_rp, 1);
8557 atomic_inc_32(&hca_p->hca_srq_count);
8558 }
8559 status = ibt_alloc_srq(hca_hdl, flags, pd, reqsz, srq_hdl_p, realsz);
8560
8561 if (status != IBT_SUCCESS && acct_enabled) {
8562 DAPLKA_RS_ACCT_DEC(srq_rp, 1);
8563 atomic_dec_32(&hca_p->hca_srq_count);
8564 }
8565 return (status);
8566 }
8567
8568 static ibt_status_t
8569 daplka_ibt_free_srq(daplka_srq_resource_t *srq_rp, ibt_srq_hdl_t srq_hdl)
8570 {
8571 daplka_hca_t *hca_p;
8572 ibt_status_t status;
8573
8574 hca_p = srq_rp->srq_hca;
8575
8576 D3("ibt_free_srq: %p %p\n", srq_rp, srq_hdl);
8577
8578 status = ibt_free_srq(srq_hdl);
8579 if (status != IBT_SUCCESS) {
8580 return (status);
8581 }
8582 if (DAPLKA_RS_ACCT_CHARGED(srq_rp) > 0) {
8583 DAPLKA_RS_ACCT_DEC(srq_rp, 1);
8584 atomic_dec_32(&hca_p->hca_srq_count);
8585 }
8586 return (status);
8587 }
8588
8589
8590 static int
8591 daplka_common_ioctl(int cmd, minor_t rnum, intptr_t arg, int mode,
8592 cred_t *cred, int *rvalp)
8593 {
8594 int error;
8595
8596 switch (cmd) {
8597 case DAPL_IA_CREATE:
8598 error = daplka_ia_create(rnum, arg, mode, cred, rvalp);
8599 break;
8600
8601 /* can potentially add other commands here */
8602
8603 default:
8604 DERR("daplka_common_ioctl: cmd not supported\n");
8605 error = DDI_FAILURE;
8606 }
8607 return (error);
8608 }
8609
8610 static int
8611 daplka_evd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8612 cred_t *cred, int *rvalp)
8613 {
8614 int error;
8615
8616 switch (cmd) {
8617 case DAPL_EVD_CREATE:
8618 error = daplka_evd_create(rp, arg, mode, cred, rvalp);
8619 break;
8620
8621 case DAPL_CQ_RESIZE:
8622 error = daplka_cq_resize(rp, arg, mode, cred, rvalp);
8623 break;
8624
8625 case DAPL_EVENT_POLL:
8626 error = daplka_event_poll(rp, arg, mode, cred, rvalp);
8627 break;
8628
8629 case DAPL_EVENT_WAKEUP:
8630 error = daplka_event_wakeup(rp, arg, mode, cred, rvalp);
8631 break;
8632
8633 case DAPL_EVD_MODIFY_CNO:
8634 error = daplka_evd_modify_cno(rp, arg, mode, cred, rvalp);
8635 break;
8636
8637 case DAPL_EVD_FREE:
8638 error = daplka_evd_free(rp, arg, mode, cred, rvalp);
8639 break;
8640
8641 default:
8642 DERR("daplka_evd_ioctl: cmd not supported\n");
8643 error = DDI_FAILURE;
8644 }
8645 return (error);
8646 }
8647
8648 static int
8649 daplka_ep_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8650 cred_t *cred, int *rvalp)
8651 {
8652 int error;
8653
8654 switch (cmd) {
8655 case DAPL_EP_MODIFY:
8656 error = daplka_ep_modify(rp, arg, mode, cred, rvalp);
8657 break;
8658
8659 case DAPL_EP_FREE:
8660 error = daplka_ep_free(rp, arg, mode, cred, rvalp);
8661 break;
8662
8663 case DAPL_EP_CONNECT:
8664 error = daplka_ep_connect(rp, arg, mode, cred, rvalp);
8665 break;
8666
8667 case DAPL_EP_DISCONNECT:
8668 error = daplka_ep_disconnect(rp, arg, mode, cred, rvalp);
8669 break;
8670
8671 case DAPL_EP_REINIT:
8672 error = daplka_ep_reinit(rp, arg, mode, cred, rvalp);
8673 break;
8674
8675 case DAPL_EP_CREATE:
8676 error = daplka_ep_create(rp, arg, mode, cred, rvalp);
8677 break;
8678
8679 default:
8680 DERR("daplka_ep_ioctl: cmd not supported\n");
8681 error = DDI_FAILURE;
8682 }
8683 return (error);
8684 }
8685
8686 static int
8687 daplka_mr_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8688 cred_t *cred, int *rvalp)
8689 {
8690 int error;
8691
8692 switch (cmd) {
8693 case DAPL_MR_REGISTER:
8694 error = daplka_mr_register(rp, arg, mode, cred, rvalp);
8695 break;
8696
8697 case DAPL_MR_REGISTER_LMR:
8698 error = daplka_mr_register_lmr(rp, arg, mode, cred, rvalp);
8699 break;
8700
8701 case DAPL_MR_REGISTER_SHARED:
8702 error = daplka_mr_register_shared(rp, arg, mode, cred, rvalp);
8703 break;
8704
8705 case DAPL_MR_DEREGISTER:
8706 error = daplka_mr_deregister(rp, arg, mode, cred, rvalp);
8707 break;
8708
8709 case DAPL_MR_SYNC:
8710 error = daplka_mr_sync(rp, arg, mode, cred, rvalp);
8711 break;
8712
8713 default:
8714 DERR("daplka_mr_ioctl: cmd not supported\n");
8715 error = DDI_FAILURE;
8716 }
8717 return (error);
8718 }
8719
8720 static int
8721 daplka_mw_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8722 cred_t *cred, int *rvalp)
8723 {
8724 int error;
8725
8726 switch (cmd) {
8727 case DAPL_MW_ALLOC:
8728 error = daplka_mw_alloc(rp, arg, mode, cred, rvalp);
8729 break;
8730
8731 case DAPL_MW_FREE:
8732 error = daplka_mw_free(rp, arg, mode, cred, rvalp);
8733 break;
8734
8735 default:
8736 DERR("daplka_mw_ioctl: cmd not supported\n");
8737 error = DDI_FAILURE;
8738 }
8739 return (error);
8740 }
8741
8742 static int
8743 daplka_cno_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8744 cred_t *cred, int *rvalp)
8745 {
8746 int error;
8747
8748 switch (cmd) {
8749 case DAPL_CNO_ALLOC:
8750 error = daplka_cno_alloc(rp, arg, mode, cred, rvalp);
8751 break;
8752
8753 case DAPL_CNO_FREE:
8754 error = daplka_cno_free(rp, arg, mode, cred, rvalp);
8755 break;
8756
8757 case DAPL_CNO_WAIT:
8758 error = daplka_cno_wait(rp, arg, mode, cred, rvalp);
8759 break;
8760
8761 default:
8762 DERR("daplka_cno_ioctl: cmd not supported\n");
8763 error = DDI_FAILURE;
8764 }
8765 return (error);
8766 }
8767
8768 static int
8769 daplka_pd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8770 cred_t *cred, int *rvalp)
8771 {
8772 int error;
8773
8774 switch (cmd) {
8775 case DAPL_PD_ALLOC:
8776 error = daplka_pd_alloc(rp, arg, mode, cred, rvalp);
8777 break;
8778
8779 case DAPL_PD_FREE:
8780 error = daplka_pd_free(rp, arg, mode, cred, rvalp);
8781 break;
8782
8783 default:
8784 DERR("daplka_pd_ioctl: cmd not supported\n");
8785 error = DDI_FAILURE;
8786 }
8787 return (error);
8788 }
8789
8790 static int
8791 daplka_sp_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8792 cred_t *cred, int *rvalp)
8793 {
8794 int error;
8795
8796 switch (cmd) {
8797 case DAPL_SERVICE_REGISTER:
8798 error = daplka_service_register(rp, arg, mode, cred, rvalp);
8799 break;
8800
8801 case DAPL_SERVICE_DEREGISTER:
8802 error = daplka_service_deregister(rp, arg, mode, cred, rvalp);
8803 break;
8804
8805 default:
8806 DERR("daplka_sp_ioctl: cmd not supported\n");
8807 error = DDI_FAILURE;
8808 }
8809 return (error);
8810 }
8811
8812 static int
8813 daplka_srq_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8814 cred_t *cred, int *rvalp)
8815 {
8816 int error;
8817
8818 switch (cmd) {
8819 case DAPL_SRQ_CREATE:
8820 error = daplka_srq_create(rp, arg, mode, cred, rvalp);
8821 break;
8822
8823 case DAPL_SRQ_RESIZE:
8824 error = daplka_srq_resize(rp, arg, mode, cred, rvalp);
8825 break;
8826
8827 case DAPL_SRQ_FREE:
8828 error = daplka_srq_free(rp, arg, mode, cred, rvalp);
8829 break;
8830
8831 default:
8832 DERR("daplka_srq_ioctl: cmd(%d) not supported\n", cmd);
8833 error = DDI_FAILURE;
8834 break;
8835 }
8836 return (error);
8837 }
8838
8839 static int
8840 daplka_misc_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8841 cred_t *cred, int *rvalp)
8842 {
8843 int error;
8844
8845 switch (cmd) {
8846 case DAPL_CR_ACCEPT:
8847 error = daplka_cr_accept(rp, arg, mode, cred, rvalp);
8848 break;
8849
8850 case DAPL_CR_REJECT:
8851 error = daplka_cr_reject(rp, arg, mode, cred, rvalp);
8852 break;
8853
8854 case DAPL_IA_QUERY:
8855 error = daplka_ia_query(rp, arg, mode, cred, rvalp);
8856 break;
8857
8858 case DAPL_CR_HANDOFF:
8859 error = daplka_cr_handoff(rp, arg, mode, cred, rvalp);
8860 break;
8861
8862 default:
8863 DERR("daplka_misc_ioctl: cmd not supported\n");
8864 error = DDI_FAILURE;
8865 }
8866 return (error);
8867 }
8868
8869 /*ARGSUSED*/
8870 static int
8871 daplka_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
8872 int *rvalp)
8873 {
8874 daplka_ia_resource_t *ia_rp;
8875 minor_t rnum;
8876 int error = 0;
8877
8878 rnum = getminor(dev);
8879 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
8880 if (ia_rp == NULL) {
8881 DERR("ioctl: resource not found, rnum %d\n", rnum);
8882 return (ENXIO);
8883 }
8884
8885 D4("ioctl: rnum = %d, cmd = 0x%x\n", rnum, cmd);
8886 if (DAPLKA_RS_RESERVED(ia_rp)) {
8887 error = daplka_common_ioctl(cmd, rnum, arg, mode, cred, rvalp);
8888 return (error);
8889 }
8890 if (DAPLKA_RS_TYPE(ia_rp) != DAPL_TYPE_IA) {
8891 DERR("ioctl: invalid type %d\n", DAPLKA_RS_TYPE(ia_rp));
8892 error = EINVAL;
8893 goto cleanup;
8894 }
8895 if (ia_rp->ia_pid != ddi_get_pid()) {
8896 DERR("ioctl: ia_pid %d != pid %d\n",
8897 ia_rp->ia_pid, ddi_get_pid());
8898 error = EINVAL;
8899 goto cleanup;
8900 }
8901
8902 switch (cmd & DAPL_TYPE_MASK) {
8903 case DAPL_TYPE_EVD:
8904 error = daplka_evd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8905 break;
8906
8907 case DAPL_TYPE_EP:
8908 error = daplka_ep_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8909 break;
8910
8911 case DAPL_TYPE_MR:
8912 error = daplka_mr_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8913 break;
8914
8915 case DAPL_TYPE_MW:
8916 error = daplka_mw_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8917 break;
8918
8919 case DAPL_TYPE_PD:
8920 error = daplka_pd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8921 break;
8922
8923 case DAPL_TYPE_SP:
8924 error = daplka_sp_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8925 break;
8926
8927 case DAPL_TYPE_CNO:
8928 error = daplka_cno_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8929 break;
8930
8931 case DAPL_TYPE_MISC:
8932 error = daplka_misc_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8933 break;
8934
8935 case DAPL_TYPE_SRQ:
8936 error = daplka_srq_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8937 break;
8938
8939 default:
8940 DERR("ioctl: invalid dapl type = %d\n", DAPLKA_RS_TYPE(ia_rp));
8941 error = DDI_FAILURE;
8942 }
8943
8944 cleanup:;
8945 DAPLKA_RS_UNREF(ia_rp);
8946 return (error);
8947 }
8948
8949 /* ARGSUSED */
8950 static int
8951 daplka_open(dev_t *devp, int flag, int otyp, struct cred *cred)
8952 {
8953 minor_t rnum;
8954
8955 /*
8956 * Char only
8957 */
8958 if (otyp != OTYP_CHR) {
8959 return (EINVAL);
8960 }
8961
8962 /*
8963 * Only zero can be opened, clones are used for resources.
8964 */
8965 if (getminor(*devp) != DAPLKA_DRIVER_MINOR) {
8966 DERR("daplka_open: bad minor %d\n", getminor(*devp));
8967 return (ENODEV);
8968 }
8969
8970 /*
8971 * - allocate new minor number
8972 * - update devp argument to new device
8973 */
8974 if (daplka_resource_reserve(&rnum) == 0) {
8975 *devp = makedevice(getmajor(*devp), rnum);
8976 } else {
8977 return (ENOMEM);
8978 }
8979
8980 return (DDI_SUCCESS);
8981 }
8982
8983 /* ARGSUSED */
8984 static int
8985 daplka_close(dev_t dev, int flag, int otyp, struct cred *cred)
8986 {
8987 daplka_ia_resource_t *ia_rp;
8988 minor_t rnum = getminor(dev);
8989
8990 /*
8991 * Char only
8992 */
8993 if (otyp != OTYP_CHR) {
8994 return (EINVAL);
8995 }
8996 D2("daplka_close: closing rnum = %d\n", rnum);
8997 atomic_inc_32(&daplka_pending_close);
8998
8999 /*
9000 * remove from resource table.
9001 */
9002 ia_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);
9003
9004 /*
9005 * remove the initial reference
9006 */
9007 if (ia_rp != NULL) {
9008 DAPLKA_RS_UNREF(ia_rp);
9009 }
9010 atomic_dec_32(&daplka_pending_close);
9011 return (DDI_SUCCESS);
9012 }
9013
9014
9015 /*
9016 * Resource management routines
9017 *
9018 * We start with no resource array. Each time we run out of slots, we
9019 * reallocate a new larger array and copy the pointer to the new array and
9020 * a new resource blk is allocated and added to the hash table.
9021 *
9022 * The resource control block contains:
9023 * root - array of pointer of resource blks
9024 * sz - current size of array.
9025 * len - last valid entry in array.
9026 *
9027 * A search operation based on a resource number is as follows:
9028 * index = rnum / RESOURCE_BLKSZ;
9029 * ASSERT(index < resource_block.len);
9030 * ASSERT(index < resource_block.sz);
9031 * offset = rnum % RESOURCE_BLKSZ;
9032 * ASSERT(offset >= resource_block.root[index]->base);
9033 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
9034 * return resource_block.root[index]->blks[offset];
9035 *
9036 * A resource blk is freed when its used count reaches zero.
9037 */
9038
9039 /*
9040 * initializes the global resource table
9041 */
9042 static void
9043 daplka_resource_init(void)
9044 {
9045 rw_init(&daplka_resource.daplka_rct_lock, NULL, RW_DRIVER, NULL);
9046 daplka_resource.daplka_rc_len = 0;
9047 daplka_resource.daplka_rc_sz = 0;
9048 daplka_resource.daplka_rc_cnt = 0;
9049 daplka_resource.daplka_rc_flag = 0;
9050 daplka_resource.daplka_rc_root = NULL;
9051 }
9052
9053 /*
9054 * destroys the global resource table
9055 */
9056 static void
9057 daplka_resource_fini(void)
9058 {
9059 int i;
9060
9061 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9062 for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
9063 daplka_resource_blk_t *blk;
9064 int j;
9065
9066 blk = daplka_resource.daplka_rc_root[i];
9067 if (blk == NULL) {
9068 continue;
9069 }
9070 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
9071 if (blk->daplka_rcblk_blks[j] != NULL) {
9072 DERR("resource_fini: non-null slot %d, %p\n",
9073 j, blk->daplka_rcblk_blks[j]);
9074 }
9075 }
9076 kmem_free(blk, sizeof (*blk));
9077 daplka_resource.daplka_rc_root[i] = NULL;
9078 }
9079 if (daplka_resource.daplka_rc_root != NULL) {
9080 uint_t sz;
9081
9082 sz = daplka_resource.daplka_rc_sz *
9083 sizeof (daplka_resource_blk_t *);
9084 kmem_free(daplka_resource.daplka_rc_root, (uint_t)sz);
9085 daplka_resource.daplka_rc_root = NULL;
9086 daplka_resource.daplka_rc_len = 0;
9087 daplka_resource.daplka_rc_sz = 0;
9088 }
9089 rw_exit(&daplka_resource.daplka_rct_lock);
9090 rw_destroy(&daplka_resource.daplka_rct_lock);
9091 }
9092
9093 /*
9094 * reserves a slot in the global resource table.
9095 * this is called by the open() syscall. it is needed because
9096 * at open() time, we do not have sufficient information to
9097 * create an IA resource. the library needs to subsequently
9098 * call daplka_ia_create to insert an IA resource into this
9099 * reserved slot.
9100 */
9101 static int
9102 daplka_resource_reserve(minor_t *rnum)
9103 {
9104 int i, j, empty = -1;
9105 daplka_resource_blk_t *blk;
9106
9107 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9108 /*
9109 * Try to find an empty slot
9110 */
9111 for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
9112 blk = daplka_resource.daplka_rc_root[i];
9113 if (blk != NULL && blk->daplka_rcblk_avail > 0) {
9114
9115 D3("resource_alloc: available blks %d\n",
9116 blk->daplka_rcblk_avail);
9117
9118 /*
9119 * found an empty slot in this blk
9120 */
9121 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
9122 if (blk->daplka_rcblk_blks[j] == NULL) {
9123 *rnum = (minor_t)
9124 (j + (i * DAPLKA_RC_BLKSZ));
9125 blk->daplka_rcblk_blks[j] =
9126 (daplka_resource_t *)
9127 DAPLKA_RC_RESERVED;
9128 blk->daplka_rcblk_avail--;
9129 daplka_resource.daplka_rc_cnt++;
9130 rw_exit(&daplka_resource.
9131 daplka_rct_lock);
9132 return (0);
9133 }
9134 }
9135 } else if (blk == NULL && empty < 0) {
9136 /*
9137 * remember first empty slot
9138 */
9139 empty = i;
9140 }
9141 }
9142
9143 /*
9144 * Couldn't find anything, allocate a new blk
9145 * Do we need to reallocate the root array
9146 */
9147 if (empty < 0) {
9148 if (daplka_resource.daplka_rc_len ==
9149 daplka_resource.daplka_rc_sz) {
9150 /*
9151 * Allocate new array and copy current stuff into it
9152 */
9153 daplka_resource_blk_t **p;
9154 uint_t newsz = (uint_t)daplka_resource.daplka_rc_sz +
9155 DAPLKA_RC_BLKSZ;
9156
9157 D3("resource_alloc: increasing no. of buckets to %d\n",
9158 newsz);
9159
9160 p = kmem_zalloc(newsz * sizeof (*p), daplka_km_flags);
9161
9162 if (daplka_resource.daplka_rc_root) {
9163 uint_t oldsz;
9164
9165 oldsz = (uint_t)(daplka_resource.daplka_rc_sz *
9166 (int)sizeof (*p));
9167
9168 /*
9169 * Copy old data into new space and
9170 * free old stuff
9171 */
9172 bcopy(daplka_resource.daplka_rc_root, p, oldsz);
9173 kmem_free(daplka_resource.daplka_rc_root,
9174 oldsz);
9175 }
9176
9177 daplka_resource.daplka_rc_root = p;
9178 daplka_resource.daplka_rc_sz = (int)newsz;
9179 }
9180
9181 empty = daplka_resource.daplka_rc_len;
9182 daplka_resource.daplka_rc_len++;
9183
9184 D3("resource_alloc: daplka_rc_len %d\n",
9185 daplka_resource.daplka_rc_len);
9186 }
9187
9188 /*
9189 * Allocate a new blk
9190 */
9191 blk = kmem_zalloc(sizeof (*blk), daplka_km_flags);
9192 ASSERT(daplka_resource.daplka_rc_root[empty] == NULL);
9193 daplka_resource.daplka_rc_root[empty] = blk;
9194 blk->daplka_rcblk_avail = DAPLKA_RC_BLKSZ - 1;
9195
9196 /*
9197 * Allocate slot
9198 */
9199 *rnum = (minor_t)(empty * DAPLKA_RC_BLKSZ);
9200 blk->daplka_rcblk_blks[0] = (daplka_resource_t *)DAPLKA_RC_RESERVED;
9201 daplka_resource.daplka_rc_cnt++;
9202 rw_exit(&daplka_resource.daplka_rct_lock);
9203
9204 return (0);
9205 }
9206
9207 /*
9208 * removes resource from global resource table
9209 */
9210 static daplka_resource_t *
9211 daplka_resource_remove(minor_t rnum)
9212 {
9213 int i, j;
9214 daplka_resource_blk_t *blk;
9215 daplka_resource_t *p;
9216
9217 i = (int)(rnum / DAPLKA_RC_BLKSZ);
9218 j = (int)(rnum % DAPLKA_RC_BLKSZ);
9219
9220 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9221 if (i >= daplka_resource.daplka_rc_len) {
9222 rw_exit(&daplka_resource.daplka_rct_lock);
9223 DERR("resource_remove: invalid rnum %d\n", rnum);
9224 return (NULL);
9225 }
9226
9227 ASSERT(daplka_resource.daplka_rc_root);
9228 ASSERT(i < daplka_resource.daplka_rc_len);
9229 ASSERT(i < daplka_resource.daplka_rc_sz);
9230 blk = daplka_resource.daplka_rc_root[i];
9231 if (blk == NULL) {
9232 rw_exit(&daplka_resource.daplka_rct_lock);
9233 DERR("resource_remove: invalid rnum %d\n", rnum);
9234 return (NULL);
9235 }
9236
9237 if (blk->daplka_rcblk_blks[j] == NULL) {
9238 rw_exit(&daplka_resource.daplka_rct_lock);
9239 DERR("resource_remove: blk->daplka_rcblk_blks[j] == NULL\n");
9240 return (NULL);
9241 }
9242 p = blk->daplka_rcblk_blks[j];
9243 blk->daplka_rcblk_blks[j] = NULL;
9244 blk->daplka_rcblk_avail++;
9245 if (blk->daplka_rcblk_avail == DAPLKA_RC_BLKSZ) {
9246 /*
9247 * free this blk
9248 */
9249 kmem_free(blk, sizeof (*blk));
9250 daplka_resource.daplka_rc_root[i] = NULL;
9251 }
9252 daplka_resource.daplka_rc_cnt--;
9253 rw_exit(&daplka_resource.daplka_rct_lock);
9254
9255 if ((intptr_t)p == DAPLKA_RC_RESERVED) {
9256 return (NULL);
9257 } else {
9258 return (p);
9259 }
9260 }
9261
9262 /*
9263 * inserts resource into the slot designated by rnum
9264 */
9265 static int
9266 daplka_resource_insert(minor_t rnum, daplka_resource_t *rp)
9267 {
9268 int i, j, error = -1;
9269 daplka_resource_blk_t *blk;
9270
9271 /*
9272 * Find resource and lock it in WRITER mode
9273 * search for available resource slot
9274 */
9275
9276 i = (int)(rnum / DAPLKA_RC_BLKSZ);
9277 j = (int)(rnum % DAPLKA_RC_BLKSZ);
9278
9279 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9280 if (i >= daplka_resource.daplka_rc_len) {
9281 rw_exit(&daplka_resource.daplka_rct_lock);
9282 DERR("resource_insert: resource %d not found\n", rnum);
9283 return (-1);
9284 }
9285
9286 blk = daplka_resource.daplka_rc_root[i];
9287 if (blk != NULL) {
9288 ASSERT(i < daplka_resource.daplka_rc_len);
9289 ASSERT(i < daplka_resource.daplka_rc_sz);
9290
9291 if ((intptr_t)blk->daplka_rcblk_blks[j] == DAPLKA_RC_RESERVED) {
9292 blk->daplka_rcblk_blks[j] = rp;
9293 error = 0;
9294 } else {
9295 DERR("resource_insert: %d not reserved, blk = %p\n",
9296 rnum, blk->daplka_rcblk_blks[j]);
9297 }
9298 } else {
9299 DERR("resource_insert: resource %d not found\n", rnum);
9300 }
9301 rw_exit(&daplka_resource.daplka_rct_lock);
9302 return (error);
9303 }
9304
9305 /*
9306 * finds resource using minor device number
9307 */
9308 static daplka_resource_t *
9309 daplka_resource_lookup(minor_t rnum)
9310 {
9311 int i, j;
9312 daplka_resource_blk_t *blk;
9313 daplka_resource_t *rp;
9314
9315 /*
9316 * Find resource and lock it in READER mode
9317 * search for available resource slot
9318 */
9319
9320 i = (int)(rnum / DAPLKA_RC_BLKSZ);
9321 j = (int)(rnum % DAPLKA_RC_BLKSZ);
9322
9323 rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
9324 if (i >= daplka_resource.daplka_rc_len) {
9325 rw_exit(&daplka_resource.daplka_rct_lock);
9326 DERR("resource_lookup: resource %d not found\n", rnum);
9327 return (NULL);
9328 }
9329
9330 blk = daplka_resource.daplka_rc_root[i];
9331 if (blk != NULL) {
9332 ASSERT(i < daplka_resource.daplka_rc_len);
9333 ASSERT(i < daplka_resource.daplka_rc_sz);
9334
9335 rp = blk->daplka_rcblk_blks[j];
9336 if (rp == NULL || (intptr_t)rp == DAPLKA_RC_RESERVED) {
9337 D3("resource_lookup: %d not found, blk = %p\n",
9338 rnum, blk->daplka_rcblk_blks[j]);
9339 } else {
9340 DAPLKA_RS_REF((daplka_ia_resource_t *)rp);
9341 }
9342 } else {
9343 DERR("resource_lookup: resource %d not found\n", rnum);
9344 rp = NULL;
9345 }
9346 rw_exit(&daplka_resource.daplka_rct_lock);
9347 return (rp);
9348 }
9349
9350 /*
9351 * generic hash table implementation
9352 */
9353
9354 /*
9355 * daplka_hash_create:
9356 * initializes a hash table with the specified parameters
9357 *
9358 * input:
9359 * htblp pointer to hash table
9360 *
9361 * nbuckets number of buckets (must be power of 2)
9362 *
9363 * free_func this function is called on each hash
9364 * table element when daplka_hash_destroy
9365 * is called
9366 *
9367 * lookup_func if daplka_hash_lookup is able to find
9368 * the desired object, this function is
9369 * applied on the object before
9370 * daplka_hash_lookup returns
9371 * output:
9372 * none
9373 *
9374 * return value(s):
9375 * EINVAL nbuckets is not a power of 2
9376 * ENOMEM cannot allocate buckets
9377 * 0 success
9378 */
9379 static int
9380 daplka_hash_create(daplka_hash_table_t *htblp, uint_t nbuckets,
9381 void (*free_func)(void *), void (*lookup_func)(void *))
9382 {
9383 int i;
9384
9385 if ((nbuckets & ~(nbuckets - 1)) != nbuckets) {
9386 DERR("hash_create: nbuckets not power of 2\n");
9387 return (EINVAL);
9388 }
9389
9390 htblp->ht_buckets =
9391 kmem_zalloc(sizeof (daplka_hash_bucket_t) * nbuckets,
9392 daplka_km_flags);
9393 if (htblp->ht_buckets == NULL) {
9394 DERR("hash_create: cannot allocate buckets\n");
9395 return (ENOMEM);
9396 }
9397 for (i = 0; i < nbuckets; i++) {
9398 htblp->ht_buckets[i].hb_count = 0;
9399 htblp->ht_buckets[i].hb_entries = NULL;
9400 }
9401 rw_init(&htblp->ht_table_lock, NULL, RW_DRIVER, NULL);
9402 mutex_init(&htblp->ht_key_lock, NULL, MUTEX_DRIVER, NULL);
9403
9404 htblp->ht_count = 0;
9405 htblp->ht_next_hkey = (uint64_t)gethrtime();
9406 htblp->ht_nbuckets = nbuckets;
9407 htblp->ht_free_func = free_func;
9408 htblp->ht_lookup_func = lookup_func;
9409 htblp->ht_initialized = B_TRUE;
9410 D3("hash_create: done, buckets = %d\n", nbuckets);
9411 return (0);
9412 }
9413
9414 /*
9415 * daplka_hash_insert:
9416 * inserts an object into a hash table
9417 *
9418 * input:
9419 * htblp pointer to hash table
9420 *
9421 * hkeyp pointer to hash key.
9422 * *hkeyp being non-zero means that the caller
9423 * has generated its own hkey. if *hkeyp is zero,
9424 * this function will generate an hkey for the
9425 * caller. it is recommended that the caller
9426 * leave the hkey generation to this function
9427 * because the hkey is more likely to be evenly
9428 * distributed.
9429 *
9430 * objp pointer to object to be inserted into
9431 * hash table
9432 *
9433 * output:
9434 * hkeyp the generated hkey is returned via this pointer
9435 *
9436 * return value(s):
9437 * EINVAL invalid parameter
9438 * ENOMEM cannot allocate hash entry
9439 * 0 successful
9440 */
9441 static int
9442 daplka_hash_insert(daplka_hash_table_t *htblp, uint64_t *hkeyp, void *objp)
9443 {
9444 daplka_hash_entry_t *hep, *curr_hep;
9445 daplka_hash_bucket_t *hbp;
9446 uint32_t bucket;
9447 uint64_t hkey;
9448
9449 if (hkeyp == NULL) {
9450 DERR("hash_insert: hkeyp == NULL\n");
9451 return (EINVAL);
9452 }
9453 hep = kmem_zalloc(sizeof (*hep), daplka_km_flags);
9454 if (hep == NULL) {
9455 DERR("hash_insert: cannot alloc hash_entry\n");
9456 return (ENOMEM);
9457 }
9458 if (*hkeyp == 0) {
9459 /* generate a new key */
9460 mutex_enter(&htblp->ht_key_lock);
9461 hkey = ++htblp->ht_next_hkey;
9462 if (hkey == 0) {
9463 hkey = htblp->ht_next_hkey = (uint64_t)gethrtime();
9464 }
9465 mutex_exit(&htblp->ht_key_lock);
9466 } else {
9467 /* use user generated key */
9468 hkey = *hkeyp;
9469 }
9470
9471 /* only works if ht_nbuckets is a power of 2 */
9472 bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9473 ASSERT(objp != NULL);
9474 ASSERT(bucket < htblp->ht_nbuckets);
9475
9476 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9477 hep->he_hkey = hkey;
9478 hep->he_objp = objp;
9479
9480 /* look for duplicate entries */
9481 hbp = &htblp->ht_buckets[bucket];
9482 curr_hep = hbp->hb_entries;
9483 while (curr_hep != NULL) {
9484 if (curr_hep->he_hkey == hep->he_hkey) {
9485 break;
9486 }
9487 curr_hep = curr_hep->he_next;
9488 }
9489 if (curr_hep != NULL) {
9490 DERR("hash_insert: found duplicate hash entry: "
9491 "bucket %d, hkey 0x%016llx\n",
9492 bucket, (longlong_t)hep->he_hkey);
9493 kmem_free(hep, sizeof (*hep));
9494 rw_exit(&htblp->ht_table_lock);
9495 return (EINVAL);
9496 }
9497 hep->he_next = hbp->hb_entries;
9498 hbp->hb_entries = hep;
9499 hbp->hb_count++;
9500 htblp->ht_count++;
9501 rw_exit(&htblp->ht_table_lock);
9502
9503 if (*hkeyp == 0) {
9504 *hkeyp = hkey;
9505 ASSERT(*hkeyp != 0);
9506 }
9507 D3("hash_insert: htblp 0x%p, hkey = 0x%016llx, bucket = %d\n",
9508 htblp, (longlong_t)*hkeyp, bucket);
9509 return (0);
9510 }
9511
9512 /*
9513 * daplka_hash_remove:
9514 * removes object identified by hkey from hash table
9515 *
9516 * input:
9517 * htblp pointer to hash table
9518 *
9519 * hkey hkey that identifies the object to be removed
9520 *
9521 * output:
9522 * objpp pointer to pointer to object.
9523 * if remove is successful, the removed object
9524 * will be returned via *objpp.
9525 *
9526 * return value(s):
9527 * EINVAL cannot find hash entry
9528 * 0 successful
9529 */
9530 static int
9531 daplka_hash_remove(daplka_hash_table_t *htblp, uint64_t hkey, void **objpp)
9532 {
9533 daplka_hash_entry_t *free_hep, **curr_hepp;
9534 daplka_hash_bucket_t *hbp;
9535 uint32_t bucket;
9536
9537 bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9538
9539 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9540 hbp = &htblp->ht_buckets[bucket];
9541
9542 curr_hepp = &hbp->hb_entries;
9543 while (*curr_hepp != NULL) {
9544 if ((*curr_hepp)->he_hkey == hkey) {
9545 break;
9546 }
9547 curr_hepp = &(*curr_hepp)->he_next;
9548 }
9549 if (*curr_hepp == NULL) {
9550 DERR("hash_remove: cannot find hash entry: "
9551 "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
9552 rw_exit(&htblp->ht_table_lock);
9553 return (EINVAL);
9554 } else {
9555 if (objpp != NULL) {
9556 *objpp = (*curr_hepp)->he_objp;
9557 }
9558 free_hep = *curr_hepp;
9559 *curr_hepp = (*curr_hepp)->he_next;
9560 kmem_free(free_hep, sizeof (*free_hep));
9561 }
9562 hbp->hb_count--;
9563 htblp->ht_count--;
9564 D3("hash_remove: removed entry, hkey 0x%016llx, bucket %d, "
9565 "hb_count %d, hb_count %d\n",
9566 (longlong_t)hkey, bucket, hbp->hb_count, htblp->ht_count);
9567 rw_exit(&htblp->ht_table_lock);
9568 return (0);
9569 }
9570
9571 /*
9572 * daplka_hash_walk:
9573 * walks through the entire hash table. applying func on each of
9574 * the inserted objects. stops walking if func returns non-zero.
9575 *
9576 * input:
9577 * htblp pointer to hash table
9578 *
9579 * func function to be applied on each object
9580 *
9581 * farg second argument to func
9582 *
9583 * lockmode can be RW_WRITER or RW_READER. this
9584 * allows the caller to choose what type
9585 * of lock to acquire before walking the
9586 * table.
9587 *
9588 * output:
9589 * none
9590 *
9591 * return value(s):
9592 * none
9593 */
9594 static void
9595 daplka_hash_walk(daplka_hash_table_t *htblp, int (*func)(void *, void *),
9596 void *farg, krw_t lockmode)
9597 {
9598 daplka_hash_entry_t *curr_hep;
9599 daplka_hash_bucket_t *hbp;
9600 uint32_t bucket, retval = 0;
9601
9602 ASSERT(lockmode == RW_WRITER || lockmode == RW_READER);
9603
9604 if (lockmode == RW_WRITER) {
9605 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9606 } else {
9607 rw_enter(&htblp->ht_table_lock, RW_READER);
9608 }
9609 for (bucket = 0; bucket < htblp->ht_nbuckets && retval == 0; bucket++) {
9610 hbp = &htblp->ht_buckets[bucket];
9611 curr_hep = hbp->hb_entries;
9612 while (curr_hep != NULL) {
9613 retval = (*func)(curr_hep->he_objp, farg);
9614 if (retval != 0) {
9615 break;
9616 }
9617 curr_hep = curr_hep->he_next;
9618 }
9619 }
9620 rw_exit(&htblp->ht_table_lock);
9621 }
9622
9623 /*
9624 * daplka_hash_lookup:
9625 * finds object from hkey
9626 *
9627 * input:
9628 * htblp pointer to hash table
9629 *
9630 * hkey hkey that identifies the object to be looked up
9631 *
9632 * output:
9633 * none
9634 *
9635 * return value(s):
9636 * NULL if not found
9637 * object pointer if found
9638 */
9639 static void *
9640 daplka_hash_lookup(daplka_hash_table_t *htblp, uint64_t hkey)
9641 {
9642 daplka_hash_entry_t *curr_hep;
9643 uint32_t bucket;
9644 void *objp;
9645
9646 bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9647
9648 rw_enter(&htblp->ht_table_lock, RW_READER);
9649 curr_hep = htblp->ht_buckets[bucket].hb_entries;
9650 while (curr_hep != NULL) {
9651 if (curr_hep->he_hkey == hkey) {
9652 break;
9653 }
9654 curr_hep = curr_hep->he_next;
9655 }
9656 if (curr_hep == NULL) {
9657 DERR("hash_lookup: cannot find hash entry: "
9658 "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
9659 rw_exit(&htblp->ht_table_lock);
9660 return (NULL);
9661 }
9662 objp = curr_hep->he_objp;
9663 ASSERT(objp != NULL);
9664 if (htblp->ht_lookup_func != NULL) {
9665 (*htblp->ht_lookup_func)(objp);
9666 }
9667 rw_exit(&htblp->ht_table_lock);
9668 return (objp);
9669 }
9670
9671 /*
9672 * daplka_hash_destroy:
9673 * destroys hash table. applies free_func on all inserted objects.
9674 *
9675 * input:
9676 * htblp pointer to hash table
9677 *
9678 * output:
9679 * none
9680 *
9681 * return value(s):
9682 * none
9683 */
9684 static void
9685 daplka_hash_destroy(daplka_hash_table_t *htblp)
9686 {
9687 daplka_hash_entry_t *curr_hep, *free_hep;
9688 daplka_hash_entry_t *free_list = NULL;
9689 daplka_hash_bucket_t *hbp;
9690 uint32_t bucket, cnt, total = 0;
9691
9692 if (!htblp->ht_initialized) {
9693 DERR("hash_destroy: not initialized\n");
9694 return;
9695 }
9696 /* free all elements from hash table */
9697 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9698 for (bucket = 0; bucket < htblp->ht_nbuckets; bucket++) {
9699 hbp = &htblp->ht_buckets[bucket];
9700
9701 /* build list of elements to be freed */
9702 curr_hep = hbp->hb_entries;
9703 cnt = 0;
9704 while (curr_hep != NULL) {
9705 cnt++;
9706 free_hep = curr_hep;
9707 curr_hep = curr_hep->he_next;
9708
9709 free_hep->he_next = free_list;
9710 free_list = free_hep;
9711 }
9712 ASSERT(cnt == hbp->hb_count);
9713 total += cnt;
9714 hbp->hb_count = 0;
9715 hbp->hb_entries = NULL;
9716 }
9717 ASSERT(total == htblp->ht_count);
9718 D3("hash_destroy: htblp 0x%p, nbuckets %d, freed %d hash entries\n",
9719 htblp, htblp->ht_nbuckets, total);
9720 rw_exit(&htblp->ht_table_lock);
9721
9722 /* free all objects, now without holding the hash table lock */
9723 cnt = 0;
9724 while (free_list != NULL) {
9725 cnt++;
9726 free_hep = free_list;
9727 free_list = free_list->he_next;
9728 if (htblp->ht_free_func != NULL) {
9729 (*htblp->ht_free_func)(free_hep->he_objp);
9730 }
9731 kmem_free(free_hep, sizeof (*free_hep));
9732 }
9733 ASSERT(total == cnt);
9734
9735 /* free hash buckets and destroy locks */
9736 kmem_free(htblp->ht_buckets,
9737 sizeof (daplka_hash_bucket_t) * htblp->ht_nbuckets);
9738
9739 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9740 htblp->ht_buckets = NULL;
9741 htblp->ht_count = 0;
9742 htblp->ht_nbuckets = 0;
9743 htblp->ht_free_func = NULL;
9744 htblp->ht_lookup_func = NULL;
9745 htblp->ht_initialized = B_FALSE;
9746 rw_exit(&htblp->ht_table_lock);
9747
9748 mutex_destroy(&htblp->ht_key_lock);
9749 rw_destroy(&htblp->ht_table_lock);
9750 }
9751
9752 /*
9753 * daplka_hash_getsize:
9754 * return the number of objects in hash table
9755 *
9756 * input:
9757 * htblp pointer to hash table
9758 *
9759 * output:
9760 * none
9761 *
9762 * return value(s):
9763 * number of objects in hash table
9764 */
9765 static uint32_t
9766 daplka_hash_getsize(daplka_hash_table_t *htblp)
9767 {
9768 uint32_t sz;
9769
9770 rw_enter(&htblp->ht_table_lock, RW_READER);
9771 sz = htblp->ht_count;
9772 rw_exit(&htblp->ht_table_lock);
9773
9774 return (sz);
9775 }
9776
9777 /*
9778 * this function is used as ht_lookup_func above when lookup is called.
9779 * other types of objs may use a more elaborate lookup_func.
9780 */
9781 static void
9782 daplka_hash_generic_lookup(void *obj)
9783 {
9784 daplka_resource_t *rp = (daplka_resource_t *)obj;
9785
9786 mutex_enter(&rp->rs_reflock);
9787 rp->rs_refcnt++;
9788 ASSERT(rp->rs_refcnt != 0);
9789 mutex_exit(&rp->rs_reflock);
9790 }
9791
9792 /*
9793 * Generates a non-zero 32 bit hash key used for the timer hash table.
9794 */
9795 static uint32_t
9796 daplka_timer_hkey_gen()
9797 {
9798 uint32_t new_hkey;
9799
9800 do {
9801 new_hkey = atomic_inc_32_nv(&daplka_timer_hkey);
9802 } while (new_hkey == 0);
9803
9804 return (new_hkey);
9805 }
9806
9807
9808 /*
9809 * The DAPL KA debug logging routines
9810 */
9811
9812 /*
9813 * Add the string str to the end of the debug log, followed by a newline.
9814 */
9815 static void
9816 daplka_dbglog(char *str)
9817 {
9818 size_t length;
9819 size_t remlen;
9820
9821 /*
9822 * If this is the first time we've written to the log, initialize it.
9823 */
9824 if (!daplka_dbginit) {
9825 return;
9826 }
9827 mutex_enter(&daplka_dbglock);
9828 /*
9829 * Note the log is circular; if this string would run over the end,
9830 * we copy the first piece to the end and then the last piece to
9831 * the beginning of the log.
9832 */
9833 length = strlen(str);
9834
9835 remlen = (size_t)sizeof (daplka_dbgbuf) - daplka_dbgnext - 1;
9836
9837 if (length > remlen) {
9838 if (remlen)
9839 bcopy(str, daplka_dbgbuf + daplka_dbgnext, remlen);
9840 daplka_dbgbuf[sizeof (daplka_dbgbuf) - 1] = (char)NULL;
9841 str += remlen;
9842 length -= remlen;
9843 daplka_dbgnext = 0;
9844 }
9845 bcopy(str, daplka_dbgbuf + daplka_dbgnext, length);
9846 daplka_dbgnext += length;
9847
9848 if (daplka_dbgnext >= sizeof (daplka_dbgbuf))
9849 daplka_dbgnext = 0;
9850 mutex_exit(&daplka_dbglock);
9851 }
9852
9853
9854 /*
9855 * Add a printf-style message to whichever debug logs we're currently using.
9856 */
9857 static void
9858 daplka_debug(const char *fmt, ...)
9859 {
9860 char buff[512];
9861 va_list ap;
9862 /*
9863 * The system prepends the thread id and high resolution time
9864 * (nanoseconds are dropped and so are the upper digits)
9865 * to the specified string.
9866 * The unit for timestamp is 10 microseconds.
9867 * It wraps around every 10000 seconds.
9868 * Ex: gethrtime() = X ns = X/1000 us = X/10000 10 micro sec.
9869 */
9870 int micro_time = (int)((gethrtime() / 10000) % 1000000000);
9871 (void) sprintf(buff, "th %p tm %9d: ", (void *)curthread, micro_time);
9872
9873 va_start(ap, fmt);
9874 (void) vsprintf(buff+strlen(buff), fmt, ap);
9875 va_end(ap);
9876
9877 daplka_dbglog(buff);
9878 }
9879
9880 static void
9881 daplka_console(const char *fmt, ...)
9882 {
9883 char buff[512];
9884 va_list ap;
9885
9886 va_start(ap, fmt);
9887 (void) vsprintf(buff, fmt, ap);
9888 va_end(ap);
9889
9890 cmn_err(CE_CONT, "%s", buff);
9891 }