Print this page
8368 remove warlock leftovers from usr/src/uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/clients/daplt/daplt.c
+++ new/usr/src/uts/common/io/ib/clients/daplt/daplt.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 *
26 26 * UDAPL kernel agent
27 27 */
28 28
29 29 #include <sys/types.h>
30 30 #include <sys/errno.h>
31 31 #include <sys/debug.h>
32 32 #include <sys/stropts.h>
33 33 #include <sys/stream.h>
34 34 #include <sys/strlog.h>
35 35 #include <sys/cmn_err.h>
36 36 #include <sys/kmem.h>
37 37 #include <sys/conf.h>
38 38 #include <sys/stat.h>
39 39 #include <sys/modctl.h>
40 40 #include <sys/kstat.h>
41 41 #include <sys/ddi.h>
42 42 #include <sys/sunddi.h>
43 43 #include <sys/strsun.h>
44 44 #include <sys/taskq.h>
45 45 #include <sys/open.h>
46 46 #include <sys/uio.h>
47 47 #include <sys/cpuvar.h>
48 48 #include <sys/atomic.h>
49 49 #include <sys/sysmacros.h>
50 50 #include <sys/esunddi.h>
51 51 #include <sys/avl.h>
52 52 #include <sys/cred.h>
53 53 #include <sys/note.h>
54 54 #include <sys/ib/ibtl/ibti.h>
55 55 #include <sys/socket.h>
56 56 #include <netinet/in.h>
57 57 #include <daplt_if.h>
58 58 #include <daplt.h>
59 59
60 60 /*
61 61 * The following variables support the debug log buffer scheme.
↓ open down ↓ |
61 lines elided |
↑ open up ↑ |
62 62 */
63 63 #ifdef DEBUG
64 64 static char daplka_dbgbuf[0x80000];
65 65 #else /* DEBUG */
66 66 static char daplka_dbgbuf[0x4000];
67 67 #endif /* DEBUG */
68 68 static int daplka_dbgsize = sizeof (daplka_dbgbuf);
69 69 static size_t daplka_dbgnext;
70 70 static int daplka_dbginit = 0;
71 71 static kmutex_t daplka_dbglock;
72 -_NOTE(MUTEX_PROTECTS_DATA(daplka_dbglock,
73 - daplka_dbgbuf
74 - daplka_dbgnext))
75 72
76 73 static int daplka_dbg = 0x0103;
77 74 static void daplka_console(const char *, ...);
78 75 static void daplka_debug(const char *, ...);
79 76 static int daplka_apm = 0x1; /* default enable */
80 77 static int daplka_failback = 0x1; /* default enable */
81 78 static int daplka_query_aft_setaltpath = 10;
82 79
83 80 #define DERR \
84 81 if (daplka_dbg & 0x100) \
85 82 daplka_debug
86 83
87 84 #ifdef DEBUG
88 85
89 86 #define DINFO \
90 87 daplka_console
91 88
92 89 #define D1 \
93 90 if (daplka_dbg & 0x01) \
94 91 daplka_debug
95 92 #define D2 \
96 93 if (daplka_dbg & 0x02) \
97 94 daplka_debug
98 95 #define D3 \
99 96 if (daplka_dbg & 0x04) \
100 97 daplka_debug
101 98 #define D4 \
102 99 if (daplka_dbg & 0x08) \
103 100 daplka_debug
104 101
105 102 #else /* DEBUG */
106 103
107 104 #define DINFO if (0) printf
108 105 #define D1 if (0) printf
109 106 #define D2 if (0) printf
110 107 #define D3 if (0) printf
111 108 #define D4 if (0) printf
112 109
113 110 #endif /* DEBUG */
114 111
115 112 /*
116 113 * driver entry points
117 114 */
118 115 static int daplka_open(dev_t *, int, int, struct cred *);
119 116 static int daplka_close(dev_t, int, int, struct cred *);
120 117 static int daplka_attach(dev_info_t *, ddi_attach_cmd_t);
121 118 static int daplka_detach(dev_info_t *, ddi_detach_cmd_t);
122 119 static int daplka_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
123 120 static int daplka_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
124 121
125 122 /*
126 123 * types of ioctls
127 124 */
128 125 static int daplka_common_ioctl(int, minor_t, intptr_t, int, cred_t *, int *);
129 126 static int daplka_misc_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
130 127 cred_t *, int *);
131 128 static int daplka_ep_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
132 129 cred_t *, int *);
133 130 static int daplka_evd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
134 131 cred_t *, int *);
135 132 static int daplka_mr_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
136 133 cred_t *, int *);
137 134 static int daplka_cno_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
138 135 cred_t *, int *);
139 136 static int daplka_pd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
140 137 cred_t *, int *);
141 138 static int daplka_sp_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
142 139 cred_t *, int *);
143 140 static int daplka_srq_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
144 141 cred_t *, int *);
145 142
146 143 /*
147 144 * common ioctls and supporting functions
148 145 */
149 146 static int daplka_ia_create(minor_t, intptr_t, int, cred_t *, int *);
150 147 static int daplka_ia_destroy(daplka_resource_t *);
151 148
152 149 /*
153 150 * EP ioctls and supporting functions
154 151 */
155 152 static int daplka_ep_create(daplka_ia_resource_t *, intptr_t, int,
156 153 cred_t *, int *);
157 154 static int daplka_ep_modify(daplka_ia_resource_t *, intptr_t, int,
158 155 cred_t *, int *);
159 156 static int daplka_ep_free(daplka_ia_resource_t *, intptr_t, int,
160 157 cred_t *, int *);
161 158 static int daplka_ep_connect(daplka_ia_resource_t *, intptr_t, int,
162 159 cred_t *, int *);
163 160 static int daplka_ep_disconnect(daplka_ia_resource_t *, intptr_t, int,
164 161 cred_t *, int *);
165 162 static int daplka_ep_reinit(daplka_ia_resource_t *, intptr_t, int,
166 163 cred_t *, int *);
167 164 static int daplka_ep_destroy(daplka_resource_t *);
168 165 static void daplka_hash_ep_free(void *);
169 166 static int daplka_ep_failback(void *objp, void *arg);
170 167 static int daplka_ep_altpath(daplka_ep_resource_t *, ib_gid_t *);
171 168
172 169 static uint32_t daplka_ep_get_state(daplka_ep_resource_t *);
173 170 static void daplka_ep_set_state(daplka_ep_resource_t *, uint32_t, uint32_t);
174 171 static boolean_t daplka_ep_transition_is_valid(uint32_t, uint32_t);
175 172 static daplka_timer_info_t *daplka_timer_info_alloc(daplka_ep_resource_t *);
176 173 static void daplka_timer_info_free(daplka_timer_info_t *);
177 174 static void daplka_timer_handler(void *);
178 175 static void daplka_timer_dispatch(void *);
179 176 static void daplka_timer_thread(void *);
180 177 static int daplka_cancel_timer(daplka_ep_resource_t *);
181 178 static void daplka_hash_timer_free(void *);
182 179
183 180 /*
184 181 * EVD ioctls and supporting functions
185 182 */
186 183 static int daplka_evd_create(daplka_ia_resource_t *, intptr_t, int,
187 184 cred_t *, int *);
188 185 static int daplka_cq_resize(daplka_ia_resource_t *, intptr_t, int,
189 186 cred_t *, int *);
190 187 static int daplka_evd_free(daplka_ia_resource_t *, intptr_t, int,
191 188 cred_t *, int *);
192 189 static int daplka_event_poll(daplka_ia_resource_t *, intptr_t, int,
193 190 cred_t *, int *);
194 191 static int daplka_evd_destroy(daplka_resource_t *);
195 192 static void daplka_cq_handler(ibt_cq_hdl_t, void *);
196 193 static void daplka_evd_wakeup(daplka_evd_resource_t *,
197 194 daplka_evd_event_list_t *, daplka_evd_event_t *);
198 195 static void daplka_evd_event_enqueue(daplka_evd_event_list_t *,
199 196 daplka_evd_event_t *);
200 197 static daplka_evd_event_t *daplka_evd_event_dequeue(daplka_evd_event_list_t *);
201 198 static void daplka_hash_evd_free(void *);
202 199
203 200
204 201 /*
205 202 * SRQ ioctls and supporting functions
206 203 */
207 204 static int daplka_srq_create(daplka_ia_resource_t *, intptr_t, int,
208 205 cred_t *, int *);
209 206 static int daplka_srq_resize(daplka_ia_resource_t *, intptr_t, int,
210 207 cred_t *, int *);
211 208 static int daplka_srq_free(daplka_ia_resource_t *, intptr_t, int,
212 209 cred_t *, int *);
213 210 static int daplka_srq_destroy(daplka_resource_t *);
214 211 static void daplka_hash_srq_free(void *);
215 212
216 213 /*
217 214 * Miscellaneous ioctls
218 215 */
219 216 static int daplka_cr_accept(daplka_ia_resource_t *, intptr_t, int,
220 217 cred_t *, int *);
221 218 static int daplka_cr_reject(daplka_ia_resource_t *, intptr_t, int,
222 219 cred_t *, int *);
223 220 static int daplka_cr_handoff(daplka_ia_resource_t *, intptr_t, int,
224 221 cred_t *, int *);
225 222 static int daplka_ia_query(daplka_ia_resource_t *, intptr_t, int,
226 223 cred_t *, int *);
227 224
228 225 /*
229 226 * PD ioctls and supporting functions
230 227 */
231 228 static int daplka_pd_alloc(daplka_ia_resource_t *, intptr_t, int,
232 229 cred_t *, int *);
233 230 static int daplka_pd_free(daplka_ia_resource_t *, intptr_t, int,
234 231 cred_t *, int *);
235 232 static int daplka_pd_destroy(daplka_resource_t *);
236 233 static void daplka_hash_pd_free(void *);
237 234
238 235 /*
239 236 * SP ioctls and supporting functions
240 237 */
241 238 static int daplka_service_register(daplka_ia_resource_t *, intptr_t, int,
242 239 cred_t *, int *);
243 240 static int daplka_service_deregister(daplka_ia_resource_t *, intptr_t, int,
244 241 cred_t *, int *);
245 242 static int daplka_sp_destroy(daplka_resource_t *);
246 243 static void daplka_hash_sp_free(void *);
247 244 static void daplka_hash_sp_unref(void *);
248 245
249 246 /*
250 247 * MR ioctls and supporting functions
251 248 */
252 249 static int daplka_mr_register(daplka_ia_resource_t *, intptr_t, int,
253 250 cred_t *, int *);
254 251 static int daplka_mr_register_lmr(daplka_ia_resource_t *, intptr_t, int,
255 252 cred_t *, int *);
256 253 static int daplka_mr_register_shared(daplka_ia_resource_t *, intptr_t, int,
257 254 cred_t *, int *);
258 255 static int daplka_mr_deregister(daplka_ia_resource_t *, intptr_t, int,
259 256 cred_t *, int *);
260 257 static int daplka_mr_sync(daplka_ia_resource_t *, intptr_t, int,
261 258 cred_t *, int *);
262 259 static int daplka_mr_destroy(daplka_resource_t *);
263 260 static void daplka_hash_mr_free(void *);
264 261 static void daplka_shared_mr_free(daplka_mr_resource_t *);
265 262
266 263 /*
267 264 * MW ioctls and supporting functions
268 265 */
269 266 static int daplka_mw_alloc(daplka_ia_resource_t *, intptr_t, int,
270 267 cred_t *, int *);
271 268 static int daplka_mw_free(daplka_ia_resource_t *, intptr_t, int,
272 269 cred_t *, int *);
273 270 static int daplka_mw_destroy(daplka_resource_t *);
274 271 static void daplka_hash_mw_free(void *);
275 272
276 273 /*
277 274 * CNO ioctls and supporting functions
278 275 */
279 276 static int daplka_cno_alloc(daplka_ia_resource_t *, intptr_t, int,
280 277 cred_t *, int *);
281 278 static int daplka_cno_free(daplka_ia_resource_t *, intptr_t, int,
282 279 cred_t *, int *);
283 280 static int daplka_cno_wait(daplka_ia_resource_t *, intptr_t, int,
284 281 cred_t *, int *);
285 282 static int daplka_cno_destroy(daplka_resource_t *);
286 283 static void daplka_hash_cno_free(void *);
287 284
288 285 /*
289 286 * CM handlers
290 287 */
291 288 static ibt_cm_status_t daplka_cm_rc_handler(void *, ibt_cm_event_t *,
292 289 ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
293 290
294 291 static ibt_cm_status_t daplka_cm_service_handler(void *, ibt_cm_event_t *,
295 292 ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
296 293
297 294 static ibt_cm_status_t daplka_cm_service_req(daplka_sp_resource_t *,
298 295 ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
299 296
300 297 /*
301 298 * resource management routines
302 299 */
303 300 static int daplka_resource_reserve(minor_t *);
304 301 static int daplka_resource_insert(minor_t, daplka_resource_t *);
305 302 static daplka_resource_t *daplka_resource_remove(minor_t rnum);
306 303 static daplka_resource_t *daplka_resource_lookup(minor_t);
307 304 static void daplka_resource_init(void);
308 305 static void daplka_resource_fini(void);
309 306 static struct daplka_resource_table daplka_resource;
310 307
311 308 /*
312 309 * hash table routines
313 310 */
314 311 static int daplka_hash_insert(daplka_hash_table_t *, uint64_t *, void *);
315 312 static int daplka_hash_remove(daplka_hash_table_t *, uint64_t, void **);
316 313 static void daplka_hash_walk(daplka_hash_table_t *, int (*)(void *, void *),
317 314 void *, krw_t);
318 315 static void *daplka_hash_lookup(daplka_hash_table_t *, uint64_t);
319 316 static int daplka_hash_create(daplka_hash_table_t *, uint_t,
320 317 void (*)(void *), void (*)(void *));
321 318 static void daplka_hash_destroy(daplka_hash_table_t *);
322 319 static uint32_t daplka_hash_getsize(daplka_hash_table_t *);
323 320 static void daplka_hash_generic_lookup(void *);
324 321
325 322 static uint32_t daplka_timer_hkey_gen();
326 323
327 324 /*
328 325 * async event handlers
329 326 */
330 327 static void daplka_async_event_create(ibt_async_code_t, ibt_async_event_t *,
331 328 uint64_t, daplka_ia_resource_t *);
332 329 static void daplka_rc_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
333 330 ibt_async_event_t *);
334 331 static void daplka_cq_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
335 332 ibt_async_event_t *);
336 333 static void daplka_un_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
337 334 ibt_async_event_t *);
338 335 static void daplka_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
339 336 ibt_async_event_t *);
340 337 static void daplka_sm_notice_handler(void *, ib_gid_t, ibt_subnet_event_code_t,
341 338 ibt_subnet_event_t *event);
342 339 static void daplka_sm_gid_avail(ib_gid_t *, ib_gid_t *);
343 340
344 341 /*
345 342 * IBTF wrappers and default limits used for resource accounting
346 343 */
347 344 static boolean_t daplka_accounting_enabled = B_TRUE;
348 345 static uint32_t daplka_max_qp_percent = 100;
349 346 static uint32_t daplka_max_cq_percent = 100;
350 347 static uint32_t daplka_max_pd_percent = 100;
351 348 static uint32_t daplka_max_mw_percent = 100;
352 349 static uint32_t daplka_max_mr_percent = 100;
353 350 static uint32_t daplka_max_srq_percent = 100;
354 351
355 352 static ibt_status_t
356 353 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *, ibt_hca_hdl_t,
357 354 ibt_chan_alloc_flags_t, ibt_rc_chan_alloc_args_t *,
358 355 ibt_channel_hdl_t *, ibt_chan_sizes_t *);
359 356
360 357 static ibt_status_t
361 358 daplka_ibt_free_channel(daplka_ep_resource_t *, ibt_channel_hdl_t);
362 359
363 360 static ibt_status_t
364 361 daplka_ibt_alloc_cq(daplka_evd_resource_t *, ibt_hca_hdl_t,
365 362 ibt_cq_attr_t *, ibt_cq_hdl_t *, uint_t *);
366 363
367 364 static ibt_status_t
368 365 daplka_ibt_free_cq(daplka_evd_resource_t *, ibt_cq_hdl_t);
369 366
370 367 static ibt_status_t
371 368 daplka_ibt_alloc_pd(daplka_pd_resource_t *, ibt_hca_hdl_t,
372 369 ibt_pd_flags_t, ibt_pd_hdl_t *);
373 370
374 371 static ibt_status_t
375 372 daplka_ibt_free_pd(daplka_pd_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t);
376 373
377 374 static ibt_status_t
378 375 daplka_ibt_alloc_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
379 376 ibt_mw_flags_t, ibt_mw_hdl_t *, ibt_rkey_t *);
380 377
381 378 static ibt_status_t
382 379 daplka_ibt_free_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_mw_hdl_t);
383 380
384 381 static ibt_status_t
385 382 daplka_ibt_register_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
386 383 ibt_mr_attr_t *, ibt_mr_hdl_t *, ibt_mr_desc_t *);
387 384
388 385 static ibt_status_t
389 386 daplka_ibt_register_shared_mr(daplka_mr_resource_t *, ibt_hca_hdl_t,
390 387 ibt_mr_hdl_t, ibt_pd_hdl_t, ibt_smr_attr_t *, ibt_mr_hdl_t *,
391 388 ibt_mr_desc_t *);
392 389
393 390 static ibt_status_t
394 391 daplka_ibt_deregister_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_mr_hdl_t);
395 392
396 393 static ibt_status_t
397 394 daplka_ibt_alloc_srq(daplka_srq_resource_t *, ibt_hca_hdl_t, ibt_srq_flags_t,
398 395 ibt_pd_hdl_t, ibt_srq_sizes_t *, ibt_srq_hdl_t *, ibt_srq_sizes_t *);
399 396
400 397 static ibt_status_t
401 398 daplka_ibt_free_srq(daplka_srq_resource_t *, ibt_srq_hdl_t);
402 399
403 400 /*
404 401 * macros for manipulating resource objects.
405 402 * these macros can be used on objects that begin with a
406 403 * daplka_resource_t header.
407 404 */
408 405 #define DAPLKA_RS_REFCNT(rp) ((rp)->header.rs_refcnt)
409 406
410 407 #define DAPLKA_RS_REF(rp) { \
411 408 mutex_enter(&(rp)->header.rs_reflock); \
412 409 (rp)->header.rs_refcnt++; \
413 410 ASSERT((rp)->header.rs_refcnt != 0); \
414 411 mutex_exit(&(rp)->header.rs_reflock); \
415 412 }
416 413
417 414 #define DAPLKA_RS_UNREF(rp) { \
418 415 mutex_enter(&(rp)->header.rs_reflock); \
419 416 ASSERT((rp)->header.rs_refcnt != 0); \
420 417 if (--(rp)->header.rs_refcnt == 0) { \
421 418 ASSERT((rp)->header.rs_free != NULL); \
422 419 mutex_exit(&(rp)->header.rs_reflock); \
423 420 (rp)->header.rs_free((daplka_resource_t *)rp); \
424 421 } else { \
425 422 mutex_exit(&(rp)->header.rs_reflock); \
426 423 } \
427 424 }
428 425
429 426 #define DAPLKA_RS_INIT(rp, type, rnum, free_func) { \
430 427 (rp)->header.rs_refcnt = 1; \
431 428 (rp)->header.rs_type = (type); \
432 429 (rp)->header.rs_rnum = (rnum); \
433 430 (rp)->header.rs_charged = 0; \
434 431 (rp)->header.rs_free = (free_func); \
435 432 mutex_init(&(rp)->header.rs_reflock, NULL, \
436 433 MUTEX_DRIVER, NULL); \
437 434 }
438 435
439 436 #define DAPLKA_RS_FINI(rp) { \
440 437 mutex_destroy(&(rp)->header.rs_reflock); \
441 438 }
442 439
443 440 #define DAPLKA_RS_ACCT_INC(rp, cnt) { \
444 441 atomic_add_32(&(rp)->header.rs_charged, (cnt)); \
445 442 }
446 443 #define DAPLKA_RS_ACCT_DEC(rp, cnt) { \
447 444 atomic_add_32(&(rp)->header.rs_charged, -(cnt)); \
448 445 }
449 446 #define DAPLKA_RS_ACCT_CHARGED(rp) ((rp)->header.rs_charged)
450 447
451 448 #define DAPLKA_RS_RNUM(rp) ((rp)->header.rs_rnum)
452 449 #define DAPLKA_RS_TYPE(rp) ((rp)->header.rs_type)
453 450 #define DAPLKA_RS_RESERVED(rp) ((intptr_t)(rp) == DAPLKA_RC_RESERVED)
454 451
455 452 /*
456 453 * depending on the timeout value does a cv_wait_sig or cv_timedwait_sig
457 454 */
458 455 #define DAPLKA_EVD_WAIT(cvp, mp, timeout) \
459 456 ((timeout) == LONG_MAX) ? cv_wait_sig((cvp), (mp)) : \
460 457 cv_timedwait_sig((cvp), (mp), (timeout))
461 458
462 459 #define DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca) ((hca)->hca_ref_cnt++)
463 460 #define DAPLKA_RELE_HCA_WITHOUT_LOCK(hca) ((hca)->hca_ref_cnt--)
464 461
465 462 #define DAPLKA_HOLD_HCA(dp, hca) { \
466 463 mutex_enter(&(dp)->daplka_mutex); \
467 464 DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca); \
468 465 mutex_exit(&(dp)->daplka_mutex); \
469 466 }
470 467
471 468 #define DAPLKA_RELE_HCA(dp, hca) { \
472 469 mutex_enter(&(dp)->daplka_mutex); \
473 470 DAPLKA_RELE_HCA_WITHOUT_LOCK(hca); \
474 471 mutex_exit(&(dp)->daplka_mutex); \
475 472 }
476 473
477 474 #define DAPLKA_HCA_BUSY(hca) \
478 475 ((hca)->hca_ref_cnt != 0 || \
479 476 (hca)->hca_qp_count != 0 || \
480 477 (hca)->hca_cq_count != 0 || \
481 478 (hca)->hca_pd_count != 0 || \
482 479 (hca)->hca_mw_count != 0 || \
483 480 (hca)->hca_mr_count != 0)
484 481
485 482
486 483 static struct cb_ops daplka_cb_ops = {
487 484 daplka_open, /* cb_open */
488 485 daplka_close, /* cb_close */
489 486 nodev, /* cb_strategy */
490 487 nodev, /* cb_print */
491 488 nodev, /* cb_dump */
492 489 nodev, /* cb_read */
493 490 nodev, /* cb_write */
494 491 daplka_ioctl, /* cb_ioctl */
495 492 nodev, /* cb_devmap */
496 493 nodev, /* cb_mmap */
497 494 nodev, /* cb_segmap */
498 495 nochpoll, /* cb_chpoll */
499 496 ddi_prop_op, /* cb_prop_op */
500 497 NULL, /* cb_stream */
501 498 D_NEW | D_MP, /* cb_flag */
502 499 CB_REV, /* rev */
503 500 nodev, /* int (*cb_aread)() */
504 501 nodev /* int (*cb_awrite)() */
505 502 };
506 503
507 504 static struct dev_ops daplka_ops = {
508 505 DEVO_REV, /* devo_rev */
509 506 0, /* devo_refcnt */
510 507 daplka_info, /* devo_getinfo */
511 508 nulldev, /* devo_identify */
512 509 nulldev, /* devo_probe */
513 510 daplka_attach, /* devo_attach */
514 511 daplka_detach, /* devo_detach */
515 512 nodev, /* devo_reset */
516 513 &daplka_cb_ops, /* devo_cb_ops */
517 514 (struct bus_ops *)NULL, /* devo_bus_ops */
518 515 nulldev, /* power */
519 516 ddi_quiesce_not_needed, /* devo_quiesce */
520 517 };
521 518
522 519 /*
523 520 * Module linkage information for the kernel.
524 521 */
525 522 static struct modldrv modldrv = {
526 523 &mod_driverops,
527 524 "uDAPL Service Driver",
528 525 &daplka_ops,
529 526 };
530 527
531 528 static struct modlinkage modlinkage = {
532 529 #ifdef _LP64
533 530 MODREV_1, { (void *) &modldrv, NULL, NULL, NULL, NULL, NULL, NULL }
534 531 #else
535 532 MODREV_1, { (void *) &modldrv, NULL, NULL, NULL }
536 533 #endif
537 534 };
538 535
539 536 /*
540 537 * daplka_dev holds global driver state and a list of HCAs
541 538 */
542 539 static daplka_t *daplka_dev = NULL;
543 540 static void *daplka_state = NULL;
544 541
545 542 /*
546 543 * global SP hash table
547 544 */
548 545 static daplka_hash_table_t daplka_global_sp_htbl;
549 546
550 547 /*
551 548 * timer_info hash table
↓ open down ↓ |
467 lines elided |
↑ open up ↑ |
552 549 */
553 550 static daplka_hash_table_t daplka_timer_info_htbl;
554 551 static uint32_t daplka_timer_hkey = 0;
555 552
556 553 /*
557 554 * shared MR avl tree
558 555 */
559 556 static avl_tree_t daplka_shared_mr_tree;
560 557 static kmutex_t daplka_shared_mr_lock;
561 558 static int daplka_shared_mr_cmp(const void *, const void *);
562 -_NOTE(MUTEX_PROTECTS_DATA(daplka_shared_mr_lock,
563 - daplka_shared_mr_tree))
564 559
565 560 /*
566 561 * default kmem flags used by this driver
567 562 */
568 563 static int daplka_km_flags = KM_SLEEP;
569 564
570 565 /*
571 566 * taskq used for handling background tasks
572 567 */
573 568 static taskq_t *daplka_taskq = NULL;
574 569
575 570 /*
576 571 * daplka_cm_delay is the length of time the active
577 572 * side needs to wait before timing out on the REP message.
578 573 */
579 574 static clock_t daplka_cm_delay = 60000000;
580 575
581 576 /*
582 577 * modunload will fail if pending_close is non-zero
583 578 */
584 579 static uint32_t daplka_pending_close = 0;
585 580
586 581 static struct ibt_clnt_modinfo_s daplka_clnt_modinfo = {
587 582 IBTI_V_CURR,
588 583 IBT_USER,
589 584 daplka_async_handler,
590 585 NULL,
591 586 DAPLKA_DRV_NAME
592 587 };
593 588
594 589 /*
595 590 * Module Installation
596 591 */
597 592 int
598 593 _init(void)
599 594 {
600 595 int status;
601 596
602 597 status = ddi_soft_state_init(&daplka_state, sizeof (daplka_t), 1);
603 598 if (status != 0) {
604 599 return (status);
605 600 }
606 601
607 602 mutex_init(&daplka_dbglock, NULL, MUTEX_DRIVER, NULL);
608 603 bzero(daplka_dbgbuf, sizeof (daplka_dbgbuf));
609 604 daplka_dbgnext = 0;
610 605 daplka_dbginit = 1;
611 606
612 607 daplka_resource_init();
613 608
614 609 status = mod_install(&modlinkage);
615 610 if (status != DDI_SUCCESS) {
616 611 /* undo inits done before mod_install */
617 612 daplka_resource_fini();
618 613 mutex_destroy(&daplka_dbglock);
619 614 ddi_soft_state_fini(&daplka_state);
620 615 }
621 616 return (status);
622 617 }
623 618
624 619 /*
625 620 * Module Removal
626 621 */
627 622 int
628 623 _fini(void)
629 624 {
630 625 int status;
631 626
632 627 /*
633 628 * mod_remove causes detach to be called
634 629 */
635 630 if ((status = mod_remove(&modlinkage)) != 0) {
636 631 DERR("fini: mod_remove failed: 0x%x\n", status);
637 632 return (status);
638 633 }
639 634
640 635 daplka_resource_fini();
641 636 mutex_destroy(&daplka_dbglock);
642 637 ddi_soft_state_fini(&daplka_state);
643 638
644 639 return (status);
645 640 }
646 641
647 642 /*
648 643 * Return Module Info.
649 644 */
650 645 int
651 646 _info(struct modinfo *modinfop)
652 647 {
653 648 return (mod_info(&modlinkage, modinfop));
654 649 }
655 650
656 651 static void
657 652 daplka_enqueue_hca(daplka_t *dp, daplka_hca_t *hca)
658 653 {
659 654 daplka_hca_t *h;
660 655
661 656 ASSERT(mutex_owned(&dp->daplka_mutex));
662 657
663 658 if (dp->daplka_hca_list_head == NULL) {
664 659 dp->daplka_hca_list_head = hca;
665 660 } else {
666 661 h = dp->daplka_hca_list_head;
667 662 while (h->hca_next != NULL)
668 663 h = h->hca_next;
669 664
670 665 h->hca_next = hca;
671 666 }
672 667 }
673 668
674 669 static void
675 670 daplka_dequeue_hca(daplka_t *dp, daplka_hca_t *hca)
676 671 {
677 672 daplka_hca_t *h;
678 673
679 674 ASSERT(mutex_owned(&dp->daplka_mutex));
680 675
681 676 if (dp->daplka_hca_list_head == hca)
682 677 dp->daplka_hca_list_head = hca->hca_next;
683 678 else {
684 679 h = dp->daplka_hca_list_head;
685 680 while (h->hca_next != hca)
686 681 h = h->hca_next;
687 682 h->hca_next = hca->hca_next;
688 683 }
689 684 }
690 685
691 686 static int
692 687 daplka_init_hca(daplka_t *dp, ib_guid_t hca_guid)
693 688 {
694 689 daplka_hca_t *hca;
695 690 ibt_hca_portinfo_t *pinfop;
696 691 uint_t size;
697 692 int j;
698 693 ibt_status_t status;
699 694
700 695 hca = kmem_zalloc(sizeof (daplka_hca_t), KM_SLEEP);
701 696
702 697 hca->hca_guid = hca_guid;
703 698
704 699 /*
705 700 * open the HCA for use
706 701 */
707 702 status = ibt_open_hca(dp->daplka_clnt_hdl, hca_guid, &hca->hca_hdl);
708 703 if (status != IBT_SUCCESS) {
709 704 if (status == IBT_HCA_IN_USE) {
710 705 DERR("ibt_open_hca() returned IBT_HCA_IN_USE\n");
711 706 } else {
712 707 DERR("ibt_open_hca() returned %d\n", status);
713 708 }
714 709 kmem_free(hca, sizeof (daplka_hca_t));
715 710 return (status);
716 711 }
717 712
718 713 /*
719 714 * query HCA to get its info
720 715 */
721 716 status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr);
722 717 if (status != IBT_SUCCESS) {
723 718 DERR("ibt_query_hca returned %d (hca_guid 0x%llx)\n",
724 719 status, (longlong_t)hca_guid);
725 720 goto out;
726 721 }
727 722
728 723 /*
729 724 * query HCA to get info of all ports
730 725 */
731 726 status = ibt_query_hca_ports(hca->hca_hdl,
732 727 0, &pinfop, &hca->hca_nports, &size);
733 728 if (status != IBT_SUCCESS) {
734 729 DERR("ibt_query_all_ports returned %d "
735 730 "(hca_guid 0x%llx)\n", status,
736 731 (longlong_t)hca_guid);
737 732 goto out;
738 733 }
739 734 hca->hca_ports = pinfop;
740 735 hca->hca_pinfosz = size;
741 736
742 737 DERR("hca guid 0x%llx, nports %d\n",
743 738 (longlong_t)hca_guid, hca->hca_nports);
744 739 for (j = 0; j < hca->hca_nports; j++) {
745 740 DERR("port %d: state %d prefix 0x%016llx "
746 741 "guid %016llx\n",
747 742 pinfop[j].p_port_num, pinfop[j].p_linkstate,
748 743 (longlong_t)pinfop[j].p_sgid_tbl[0].gid_prefix,
749 744 (longlong_t)pinfop[j].p_sgid_tbl[0].gid_guid);
750 745 }
751 746
752 747 mutex_enter(&dp->daplka_mutex);
753 748 daplka_enqueue_hca(dp, hca);
754 749 mutex_exit(&dp->daplka_mutex);
755 750
756 751 return (IBT_SUCCESS);
757 752
758 753 out:
759 754 (void) ibt_close_hca(hca->hca_hdl);
760 755 kmem_free(hca, sizeof (daplka_hca_t));
761 756 return (status);
762 757 }
763 758
764 759 /*
765 760 * this function obtains the list of HCAs from IBTF.
766 761 * the HCAs are then opened and the returned handles
767 762 * and attributes are stored into the global daplka_dev
768 763 * structure.
769 764 */
770 765 static int
771 766 daplka_init_hcas(daplka_t *dp)
772 767 {
773 768 int i;
774 769 ib_guid_t *hca_guids;
775 770 uint32_t hca_count;
776 771
777 772 /*
778 773 * get the num & list of HCAs present
779 774 */
780 775 hca_count = ibt_get_hca_list(&hca_guids);
781 776 DERR("No. of HCAs present %d\n", hca_count);
782 777
783 778 if (hca_count != 0) {
784 779 /*
785 780 * get the info for each available HCA
786 781 */
787 782 for (i = 0; i < hca_count; i++)
788 783 (void) daplka_init_hca(dp, hca_guids[i]);
789 784
790 785 ibt_free_hca_list(hca_guids, hca_count);
791 786 }
792 787
793 788 if (dp->daplka_hca_list_head != NULL)
794 789 return (IBT_SUCCESS);
795 790 else
796 791 return (IBT_FAILURE);
797 792 }
798 793
799 794 static int
800 795 daplka_fini_hca(daplka_t *dp, daplka_hca_t *hca)
801 796 {
802 797 ibt_status_t status;
803 798
804 799 if (hca->hca_hdl != NULL) {
805 800 status = ibt_close_hca(hca->hca_hdl);
806 801 if (status != IBT_SUCCESS) {
807 802 DERR("ibt_close_hca returned %d"
808 803 " (hca_guid 0x%llx)\n", status,
809 804 (longlong_t)hca->hca_guid);
810 805
811 806 mutex_enter(&dp->daplka_mutex);
812 807 daplka_enqueue_hca(dp, hca);
813 808 mutex_exit(&dp->daplka_mutex);
814 809
815 810 return (status);
816 811 }
817 812 }
818 813
819 814 if (hca->hca_ports != NULL)
820 815 ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
821 816
822 817 kmem_free(hca, sizeof (daplka_hca_t));
823 818 return (IBT_SUCCESS);
824 819 }
825 820
826 821 /*
827 822 * closes all HCAs and frees up the HCA list
828 823 */
829 824 static int
830 825 daplka_fini_hcas(daplka_t *dp)
831 826 {
832 827 ibt_status_t status;
833 828 daplka_hca_t *hca;
834 829
835 830 mutex_enter(&daplka_dev->daplka_mutex);
836 831 while ((hca = dp->daplka_hca_list_head) != NULL) {
837 832 if (DAPLKA_HCA_BUSY(hca)) {
838 833 mutex_exit(&daplka_dev->daplka_mutex);
839 834 return (IBT_HCA_RESOURCES_NOT_FREED);
840 835 }
841 836 daplka_dequeue_hca(daplka_dev, hca);
842 837 mutex_exit(&daplka_dev->daplka_mutex);
843 838
844 839 if ((status = daplka_fini_hca(dp, hca)) != IBT_SUCCESS)
845 840 return (status);
846 841
847 842 mutex_enter(&daplka_dev->daplka_mutex);
848 843 }
849 844 mutex_exit(&daplka_dev->daplka_mutex);
850 845
851 846 DERR("dapl kernel agent unloaded\n");
852 847 return (IBT_SUCCESS);
853 848 }
854 849
855 850
856 851 /*
857 852 * Attach the device, create and fill in daplka_dev
858 853 */
859 854 static int
860 855 daplka_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
861 856 {
862 857 daplka_t *dp;
863 858 int instance, retval, err;
864 859 boolean_t sp_htbl_allocated = B_FALSE;
865 860 boolean_t timer_htbl_allocated = B_FALSE;
866 861 boolean_t shared_mr_tree_allocated = B_FALSE;
867 862
868 863 switch (cmd) {
869 864 case DDI_ATTACH:
870 865 break;
871 866 case DDI_RESUME:
872 867 return (DDI_SUCCESS);
873 868 default:
874 869 return (DDI_FAILURE);
875 870 }
876 871
877 872 /*
878 873 * Allocate soft data structure
879 874 */
880 875 instance = ddi_get_instance(dip);
881 876 if (ddi_soft_state_zalloc(daplka_state, instance) != DDI_SUCCESS) {
882 877 DERR("attach: bad state zalloc\n");
883 878 return (DDI_FAILURE);
884 879 }
885 880
886 881 dp = ddi_get_soft_state(daplka_state, instance);
887 882 if (dp == NULL) {
888 883 ddi_soft_state_free(daplka_state, instance);
889 884 DERR("attach: cannot get soft state\n");
890 885 return (DDI_FAILURE);
891 886 }
892 887 /*
893 888 * Stuff private info into dip.
894 889 */
895 890 dp->daplka_dip = dip;
896 891 ddi_set_driver_private(dip, dp);
897 892 daplka_dev = dp;
898 893 mutex_init(&dp->daplka_mutex, NULL, MUTEX_DRIVER, NULL);
899 894
900 895 /*
901 896 * Register driver with IBTF
902 897 */
903 898 retval = ibt_attach(&daplka_clnt_modinfo, dip, dp,
904 899 &dp->daplka_clnt_hdl);
905 900 if (retval != IBT_SUCCESS) {
906 901 DERR("attach: ibt_attach failed: error = %d\n", retval);
907 902 retval = DDI_FAILURE;
908 903 goto error;
909 904 }
910 905 /* Register to receive SM events */
911 906 ibt_register_subnet_notices(dp->daplka_clnt_hdl,
912 907 daplka_sm_notice_handler, NULL);
913 908
914 909 retval = daplka_init_hcas(dp);
915 910 if (retval != IBT_SUCCESS) {
916 911 DERR("attach: hca_init failed: error = %d\n", retval);
917 912 retval = DDI_FAILURE;
918 913 goto error;
919 914 }
920 915 /*
921 916 * this table is used by cr_handoff
922 917 */
923 918 retval = daplka_hash_create(&daplka_global_sp_htbl,
924 919 DAPLKA_G_SP_HTBL_SZ, daplka_hash_sp_unref,
925 920 daplka_hash_generic_lookup);
926 921 if (retval != 0) {
927 922 DERR("attach: cannot create sp hash table\n");
928 923 retval = DDI_FAILURE;
929 924 goto error;
930 925 }
931 926 sp_htbl_allocated = B_TRUE;
932 927
933 928 /*
934 929 * this table stores per EP timer information.
935 930 * timer_info_t objects are inserted into this table whenever
936 931 * a EP timer is set. timers get removed when they expire
937 932 * or when they get cancelled.
938 933 */
939 934 retval = daplka_hash_create(&daplka_timer_info_htbl,
940 935 DAPLKA_TIMER_HTBL_SZ, daplka_hash_timer_free, NULL);
941 936 if (retval != 0) {
942 937 DERR("attach: cannot create timer hash table\n");
943 938 retval = DDI_FAILURE;
944 939 goto error;
945 940 }
946 941 timer_htbl_allocated = B_TRUE;
947 942
948 943 /*
949 944 * this taskq is currently only used for processing timers.
950 945 * other processing may also use this taskq in the future.
951 946 */
952 947 daplka_taskq = taskq_create(DAPLKA_DRV_NAME, DAPLKA_TQ_NTHREADS,
953 948 maxclsyspri, 1, DAPLKA_TQ_NTHREADS, TASKQ_DYNAMIC);
954 949 if (daplka_taskq == NULL) {
955 950 DERR("attach: cannot create daplka_taskq\n");
956 951 retval = DDI_FAILURE;
957 952 goto error;
958 953 }
959 954
960 955 /*
961 956 * daplka_shared_mr_tree holds daplka_shared_mr_t objects that
962 957 * gets retrieved or created when daplka_mr_register_shared is
963 958 * called.
964 959 */
965 960 mutex_init(&daplka_shared_mr_lock, NULL, MUTEX_DRIVER, NULL);
966 961
967 962 avl_create(&daplka_shared_mr_tree, daplka_shared_mr_cmp,
968 963 sizeof (daplka_shared_mr_t),
969 964 offsetof(daplka_shared_mr_t, smr_node));
970 965 shared_mr_tree_allocated = B_TRUE;
971 966
972 967 /*
973 968 * Create the filesystem device node.
974 969 */
975 970 if (ddi_create_minor_node(dip, DAPLKA_MINOR_NAME, S_IFCHR,
976 971 0, DDI_PSEUDO, NULL) != DDI_SUCCESS) {
977 972 DERR("attach: bad create_minor_node\n");
978 973 retval = DDI_FAILURE;
979 974 goto error;
980 975 }
981 976 dp->daplka_status = DAPLKA_STATE_ATTACHED;
982 977 ddi_report_dev(dip);
983 978 return (DDI_SUCCESS);
984 979
985 980 error:
986 981 if (shared_mr_tree_allocated) {
987 982 avl_destroy(&daplka_shared_mr_tree);
988 983 mutex_destroy(&daplka_shared_mr_lock);
989 984 }
990 985
991 986 if (daplka_taskq) {
992 987 taskq_destroy(daplka_taskq);
993 988 daplka_taskq = NULL;
994 989 }
995 990
996 991 if (timer_htbl_allocated) {
997 992 daplka_hash_destroy(&daplka_timer_info_htbl);
998 993 }
999 994
1000 995 if (sp_htbl_allocated) {
1001 996 daplka_hash_destroy(&daplka_global_sp_htbl);
1002 997 }
1003 998
1004 999 err = daplka_fini_hcas(dp);
1005 1000 if (err != IBT_SUCCESS) {
1006 1001 DERR("attach: hca_fini returned %d\n", err);
1007 1002 }
1008 1003
1009 1004 if (dp->daplka_clnt_hdl != NULL) {
1010 1005 /* unregister SM event notification */
1011 1006 ibt_register_subnet_notices(dp->daplka_clnt_hdl,
1012 1007 (ibt_sm_notice_handler_t)NULL, NULL);
1013 1008 err = ibt_detach(dp->daplka_clnt_hdl);
1014 1009
1015 1010 if (err != IBT_SUCCESS) {
1016 1011 DERR("attach: ibt_detach returned %d\n", err);
1017 1012 }
1018 1013 }
1019 1014 mutex_destroy(&dp->daplka_mutex);
1020 1015
1021 1016 if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
1022 1017 ddi_remove_minor_node(dip, NULL);
1023 1018 }
1024 1019 ddi_soft_state_free(daplka_state, instance);
1025 1020 return (retval);
1026 1021 }
1027 1022
1028 1023 /*
1029 1024 * Detach - Free resources allocated in attach
1030 1025 */
1031 1026 /* ARGSUSED */
1032 1027 static int
1033 1028 daplka_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1034 1029 {
1035 1030 int instance, err;
1036 1031 void *cookie = NULL;
1037 1032 daplka_t *dp;
1038 1033
1039 1034 if (cmd != DDI_DETACH) {
1040 1035 return (DDI_FAILURE);
1041 1036 }
1042 1037 if (daplka_resource.daplka_rc_cnt > 0 ||
1043 1038 daplka_pending_close > 0) {
1044 1039 DERR("detach: driver in use\n");
1045 1040 return (DDI_FAILURE);
1046 1041 }
1047 1042
1048 1043 instance = ddi_get_instance(dip);
1049 1044 dp = ddi_get_soft_state(daplka_state, instance);
1050 1045 if (dp == NULL) {
1051 1046 DERR("detach: cannot get soft state\n");
1052 1047 return (DDI_FAILURE);
1053 1048 }
1054 1049 err = daplka_fini_hcas(dp);
1055 1050 if (err != IBT_SUCCESS) {
1056 1051 DERR("detach: hca_fini returned %d\n", err);
1057 1052 return (DDI_FAILURE);
1058 1053 }
1059 1054 if (dp->daplka_clnt_hdl != NULL) {
1060 1055 /* unregister SM event notification */
1061 1056 ibt_register_subnet_notices(dp->daplka_clnt_hdl,
1062 1057 (ibt_sm_notice_handler_t)NULL, NULL);
1063 1058 err = ibt_detach(dp->daplka_clnt_hdl);
1064 1059 if (err != IBT_SUCCESS) {
1065 1060 DERR("detach: ibt_detach returned %d\n", err);
1066 1061 return (DDI_FAILURE);
1067 1062 }
1068 1063 dp->daplka_clnt_hdl = NULL;
1069 1064 }
1070 1065 mutex_destroy(&dp->daplka_mutex);
1071 1066 if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
1072 1067 ddi_remove_minor_node(dip, NULL);
1073 1068 }
1074 1069 dp->daplka_status = DAPLKA_STATE_DETACHED;
1075 1070 ddi_soft_state_free(daplka_state, instance);
1076 1071 daplka_dev = NULL;
1077 1072
1078 1073 /*
1079 1074 * by the time we get here, all clients of dapl should
1080 1075 * have exited and completed their cleanup properly.
1081 1076 * we can assert that all global data structures are now
1082 1077 * empty.
1083 1078 */
1084 1079 ASSERT(avl_destroy_nodes(&daplka_shared_mr_tree, &cookie) == NULL);
1085 1080 avl_destroy(&daplka_shared_mr_tree);
1086 1081 mutex_destroy(&daplka_shared_mr_lock);
1087 1082
1088 1083 ASSERT(daplka_hash_getsize(&daplka_timer_info_htbl) == 0);
1089 1084 daplka_hash_destroy(&daplka_timer_info_htbl);
1090 1085
1091 1086 ASSERT(daplka_hash_getsize(&daplka_global_sp_htbl) == 0);
1092 1087 daplka_hash_destroy(&daplka_global_sp_htbl);
1093 1088
1094 1089 taskq_destroy(daplka_taskq);
1095 1090
1096 1091 return (DDI_SUCCESS);
1097 1092 }
1098 1093
1099 1094 /* ARGSUSED */
1100 1095 static int
1101 1096 daplka_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1102 1097 {
1103 1098 switch (infocmd) {
1104 1099 case DDI_INFO_DEVT2DEVINFO:
1105 1100 if (daplka_dev != NULL) {
1106 1101 *result = daplka_dev->daplka_dip;
1107 1102 return (DDI_SUCCESS);
1108 1103 } else {
1109 1104 return (DDI_FAILURE);
1110 1105 }
1111 1106
1112 1107 case DDI_INFO_DEVT2INSTANCE:
1113 1108 *result = 0;
1114 1109 return (DDI_SUCCESS);
1115 1110
1116 1111 default:
1117 1112 return (DDI_FAILURE);
1118 1113 }
1119 1114 }
1120 1115
1121 1116 /*
1122 1117 * creates a EP resource.
1123 1118 * A EP resource contains a RC channel. A EP resource holds a
1124 1119 * reference to a send_evd (for the send CQ), recv_evd (for the
1125 1120 * recv CQ), a connection evd and a PD. These references ensure
1126 1121 * that the referenced resources are not freed until the EP itself
1127 1122 * gets freed.
1128 1123 */
1129 1124 /* ARGSUSED */
1130 1125 static int
1131 1126 daplka_ep_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1132 1127 cred_t *cred, int *rvalp)
1133 1128 {
1134 1129 daplka_ep_resource_t *ep_rp;
1135 1130 daplka_pd_resource_t *pd_rp;
1136 1131 dapl_ep_create_t args;
1137 1132 ibt_rc_chan_alloc_args_t chan_args;
1138 1133 ibt_chan_alloc_flags_t achan_flags;
1139 1134 ibt_chan_sizes_t chan_real_sizes;
1140 1135 ibt_hca_attr_t *hca_attrp;
1141 1136 uint64_t ep_hkey = 0;
1142 1137 boolean_t inserted = B_FALSE;
1143 1138 uint32_t old_state, new_state;
1144 1139 int retval;
1145 1140 ibt_status_t status;
1146 1141
1147 1142 D3("ep_create: enter\n");
1148 1143 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_create_t),
1149 1144 mode);
1150 1145 if (retval != 0) {
1151 1146 DERR("ep_create: copyin error %d\n", retval);
1152 1147 return (EFAULT);
1153 1148 }
1154 1149 ep_rp = kmem_zalloc(sizeof (daplka_ep_resource_t), daplka_km_flags);
1155 1150 if (ep_rp == NULL) {
1156 1151 DERR("ep_create: cannot allocate ep_rp\n");
1157 1152 return (ENOMEM);
1158 1153 }
1159 1154 DAPLKA_RS_INIT(ep_rp, DAPL_TYPE_EP,
1160 1155 DAPLKA_RS_RNUM(ia_rp), daplka_ep_destroy);
1161 1156
1162 1157 mutex_init(&ep_rp->ep_lock, NULL, MUTEX_DRIVER, NULL);
1163 1158 cv_init(&ep_rp->ep_cv, NULL, CV_DRIVER, NULL);
1164 1159 ep_rp->ep_hca = ia_rp->ia_hca;
1165 1160 ep_rp->ep_cookie = args.ep_cookie;
1166 1161 ep_rp->ep_timer_hkey = 0;
1167 1162
1168 1163 /*
1169 1164 * we don't have to use ep_get_state here because ep_rp is not in
1170 1165 * ep_htbl yet. refer to the description of daplka_ep_set_state
1171 1166 * for details about the EP state machine.
1172 1167 */
1173 1168 ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
1174 1169 new_state = old_state = DAPLKA_EP_STATE_CLOSED;
1175 1170
1176 1171 /* get reference to send evd and get cq handle */
1177 1172 ep_rp->ep_snd_evd = (daplka_evd_resource_t *)
1178 1173 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_snd_evd_hkey);
1179 1174 if (ep_rp->ep_snd_evd == NULL) {
1180 1175 DERR("ep_create: ep_snd_evd %llx not found\n",
1181 1176 args.ep_snd_evd_hkey);
1182 1177 retval = EINVAL;
1183 1178 goto cleanup;
1184 1179 }
1185 1180 chan_args.rc_scq = ep_rp->ep_snd_evd->evd_cq_hdl;
1186 1181 if (chan_args.rc_scq == NULL) {
1187 1182 DERR("ep_create: ep_snd_evd cq invalid\n");
1188 1183 retval = EINVAL;
1189 1184 goto cleanup;
1190 1185 }
1191 1186
1192 1187 /* get reference to recv evd and get cq handle */
1193 1188 ep_rp->ep_rcv_evd = (daplka_evd_resource_t *)
1194 1189 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_rcv_evd_hkey);
1195 1190 if (ep_rp->ep_rcv_evd == NULL) {
1196 1191 DERR("ep_create: ep_rcv_evd %llx not found\n",
1197 1192 args.ep_rcv_evd_hkey);
1198 1193 retval = EINVAL;
1199 1194 goto cleanup;
1200 1195 }
1201 1196 chan_args.rc_rcq = ep_rp->ep_rcv_evd->evd_cq_hdl;
1202 1197 if (chan_args.rc_rcq == NULL) {
1203 1198 DERR("ep_create: ep_rcv_evd cq invalid\n");
1204 1199 retval = EINVAL;
1205 1200 goto cleanup;
1206 1201 }
1207 1202
1208 1203 /* get reference to conn evd */
1209 1204 ep_rp->ep_conn_evd = (daplka_evd_resource_t *)
1210 1205 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_conn_evd_hkey);
1211 1206 if (ep_rp->ep_conn_evd == NULL) {
1212 1207 DERR("ep_create: ep_conn_evd %llx not found\n",
1213 1208 args.ep_conn_evd_hkey);
1214 1209 retval = EINVAL;
1215 1210 goto cleanup;
1216 1211 }
1217 1212
1218 1213 /* get reference to SRQ if needed */
1219 1214 if (args.ep_srq_attached) {
1220 1215 ep_rp->ep_srq_res = (daplka_srq_resource_t *)daplka_hash_lookup(
1221 1216 &ia_rp->ia_srq_htbl, args.ep_srq_hkey);
1222 1217 if (ep_rp->ep_srq_res == NULL) {
1223 1218 DERR("ep_create: ep_srq %llx not found\n",
1224 1219 (longlong_t)args.ep_srq_hkey);
1225 1220 retval = EINVAL;
1226 1221 goto cleanup;
1227 1222 }
1228 1223 ASSERT(DAPLKA_RS_TYPE(ep_rp->ep_srq_res) == DAPL_TYPE_SRQ);
1229 1224 D3("ep_create: ep_srq %p %llx\n", ep_rp->ep_srq_res,
1230 1225 (longlong_t)args.ep_srq_hkey);
1231 1226 } else {
1232 1227 ep_rp->ep_srq_res = NULL;
1233 1228 }
1234 1229
1235 1230 /* get pd handle */
1236 1231 pd_rp = (daplka_pd_resource_t *)
1237 1232 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.ep_pd_hkey);
1238 1233 if (pd_rp == NULL) {
1239 1234 DERR("ep_create: cannot find pd resource\n");
1240 1235 retval = EINVAL;
1241 1236 goto cleanup;
1242 1237 }
1243 1238 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
1244 1239 ep_rp->ep_pd_res = pd_rp;
1245 1240 chan_args.rc_pd = pd_rp->pd_hdl;
1246 1241
1247 1242
1248 1243 /*
1249 1244 * these checks ensure that the requested channel sizes
1250 1245 * are within the limits supported by the chosen HCA.
1251 1246 */
1252 1247 hca_attrp = &ia_rp->ia_hca->hca_attr;
1253 1248 if (args.ep_ch_sizes.dcs_sq_sgl > hca_attrp->hca_max_sgl) {
1254 1249 DERR("ep_create: invalid cs_sq_sgl %d\n",
1255 1250 args.ep_ch_sizes.dcs_sq_sgl);
1256 1251 retval = EINVAL;
1257 1252 goto cleanup;
1258 1253 }
1259 1254 if (args.ep_ch_sizes.dcs_rq_sgl > hca_attrp->hca_max_sgl) {
1260 1255 DERR("ep_create: invalid cs_rq_sgl %d\n",
1261 1256 args.ep_ch_sizes.dcs_rq_sgl);
1262 1257 retval = EINVAL;
1263 1258 goto cleanup;
1264 1259 }
1265 1260 if (args.ep_ch_sizes.dcs_sq > hca_attrp->hca_max_chan_sz) {
1266 1261 DERR("ep_create: invalid cs_sq %d\n",
1267 1262 args.ep_ch_sizes.dcs_sq);
1268 1263 retval = EINVAL;
1269 1264 goto cleanup;
1270 1265 }
1271 1266 if (args.ep_ch_sizes.dcs_rq > hca_attrp->hca_max_chan_sz) {
1272 1267 DERR("ep_create: invalid cs_rq %d\n",
1273 1268 args.ep_ch_sizes.dcs_rq);
1274 1269 retval = EINVAL;
1275 1270 goto cleanup;
1276 1271 }
1277 1272
1278 1273 chan_args.rc_sizes.cs_sq_sgl = args.ep_ch_sizes.dcs_sq_sgl;
1279 1274 chan_args.rc_sizes.cs_rq_sgl = args.ep_ch_sizes.dcs_rq_sgl;
1280 1275 chan_args.rc_sizes.cs_sq = args.ep_ch_sizes.dcs_sq;
1281 1276 chan_args.rc_sizes.cs_rq = args.ep_ch_sizes.dcs_rq;
1282 1277 chan_args.rc_flags = IBT_WR_SIGNALED;
1283 1278 chan_args.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
1284 1279 chan_args.rc_hca_port_num = ia_rp->ia_port_num;
1285 1280 chan_args.rc_clone_chan = NULL;
1286 1281 if (args.ep_srq_attached) {
1287 1282 chan_args.rc_srq = ep_rp->ep_srq_res->srq_hdl;
1288 1283 } else {
1289 1284 chan_args.rc_srq = NULL;
1290 1285 }
1291 1286
1292 1287 D3("ep_create: sq_sgl %d, rq_sgl %d, sq %d, rq %d, "
1293 1288 "sig_type 0x%x, control 0x%x, portnum %d, clone_chan 0x%p\n",
1294 1289 args.ep_ch_sizes.dcs_sq_sgl, args.ep_ch_sizes.dcs_rq_sgl,
1295 1290 args.ep_ch_sizes.dcs_sq, args.ep_ch_sizes.dcs_rq,
1296 1291 chan_args.rc_flags, chan_args.rc_control,
1297 1292 chan_args.rc_hca_port_num, chan_args.rc_clone_chan);
1298 1293
1299 1294 if (args.ep_srq_attached) {
1300 1295 achan_flags = IBT_ACHAN_USER_MAP | IBT_ACHAN_USES_SRQ;
1301 1296 } else {
1302 1297 achan_flags = IBT_ACHAN_USER_MAP;
1303 1298 }
1304 1299 /* create rc channel */
1305 1300 status = daplka_ibt_alloc_rc_channel(ep_rp, ia_rp->ia_hca_hdl,
1306 1301 achan_flags, &chan_args, &ep_rp->ep_chan_hdl,
1307 1302 &chan_real_sizes);
1308 1303 if (status != IBT_SUCCESS) {
1309 1304 DERR("ep_create: alloc_rc_channel returned %d\n", status);
1310 1305 *rvalp = (int)status;
1311 1306 retval = 0;
1312 1307 goto cleanup;
1313 1308 }
1314 1309
1315 1310 args.ep_ch_real_sizes.dcs_sq = chan_real_sizes.cs_sq;
1316 1311 args.ep_ch_real_sizes.dcs_rq = chan_real_sizes.cs_rq;
1317 1312 args.ep_ch_real_sizes.dcs_sq_sgl = chan_real_sizes.cs_sq_sgl;
1318 1313 args.ep_ch_real_sizes.dcs_rq_sgl = chan_real_sizes.cs_rq_sgl;
1319 1314
1320 1315 /*
1321 1316 * store ep ptr with chan_hdl.
1322 1317 * this ep_ptr is used by the CM handlers (both active and
1323 1318 * passive)
1324 1319 * mutex is only needed for race of "destroy" and "async"
1325 1320 */
1326 1321 mutex_enter(&daplka_dev->daplka_mutex);
1327 1322 ibt_set_chan_private(ep_rp->ep_chan_hdl, (void *)ep_rp);
1328 1323 mutex_exit(&daplka_dev->daplka_mutex);
1329 1324
1330 1325 /* Get HCA-specific data_out info */
1331 1326 status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
1332 1327 IBT_CI_NO_FLAGS, IBT_HDL_CHANNEL, (void *)ep_rp->ep_chan_hdl,
1333 1328 &args.ep_qp_data_out, sizeof (args.ep_qp_data_out));
1334 1329
1335 1330 if (status != IBT_SUCCESS) {
1336 1331 DERR("ep_create: ibt_ci_data_out error(%d)\n",
1337 1332 status);
1338 1333 *rvalp = (int)status;
1339 1334 retval = 0;
1340 1335 goto cleanup;
1341 1336 }
1342 1337
1343 1338 /* insert into ep hash table */
1344 1339 retval = daplka_hash_insert(&ia_rp->ia_ep_htbl,
1345 1340 &ep_hkey, (void *)ep_rp);
1346 1341 if (retval != 0) {
1347 1342 DERR("ep_create: cannot insert ep resource into ep_htbl\n");
1348 1343 goto cleanup;
1349 1344 }
1350 1345 inserted = B_TRUE;
1351 1346
1352 1347 /*
1353 1348 * at this point, the ep_rp can be looked up by other threads
1354 1349 * if they manage to guess the correct hkey. but they are not
1355 1350 * permitted to operate on ep_rp until we transition to the
1356 1351 * CLOSED state.
1357 1352 */
1358 1353
1359 1354 /* return hkey to library */
1360 1355 args.ep_hkey = ep_hkey;
1361 1356
1362 1357 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ep_create_t),
1363 1358 mode);
1364 1359 if (retval != 0) {
1365 1360 DERR("ep_create: copyout error %d\n", retval);
1366 1361 retval = EFAULT;
1367 1362 goto cleanup;
1368 1363 }
1369 1364
1370 1365 daplka_ep_set_state(ep_rp, old_state, new_state);
1371 1366 D3("ep_create: exit\n");
1372 1367 return (0);
1373 1368
1374 1369 cleanup:
1375 1370 if (inserted) {
1376 1371 daplka_ep_resource_t *free_rp = NULL;
1377 1372
1378 1373 (void) daplka_hash_remove(&ia_rp->ia_ep_htbl, ep_hkey,
1379 1374 (void **)&free_rp);
1380 1375 if (free_rp != ep_rp) {
1381 1376 /*
1382 1377 * this case is impossible because ep_free will
1383 1378 * wait until our state transition is complete.
1384 1379 */
1385 1380 DERR("ep_create: cannot remove ep from hash table\n");
1386 1381 ASSERT(B_FALSE);
1387 1382 return (retval);
1388 1383 }
1389 1384 }
1390 1385 new_state = DAPLKA_EP_STATE_FREED;
1391 1386 daplka_ep_set_state(ep_rp, old_state, new_state);
1392 1387 DAPLKA_RS_UNREF(ep_rp);
1393 1388 return (retval);
1394 1389 }
1395 1390
1396 1391 /*
1397 1392 * daplka_ep_get_state retrieves the current state of the EP and
1398 1393 * sets the state to TRANSITIONING. if the current state is already
1399 1394 * TRANSITIONING, this function will wait until the state becomes one
1400 1395 * of the other EP states. Most of the EP related ioctls follow the
1401 1396 * call sequence:
1402 1397 *
1403 1398 * new_state = old_state = daplka_ep_get_state(ep_rp);
1404 1399 * ...
1405 1400 * ...some code that affects the EP
1406 1401 * ...
1407 1402 * new_state = <NEW_STATE>;
1408 1403 * daplka_ep_set_state(ep_rp, old_state, new_state);
1409 1404 *
1410 1405 * this call sequence ensures that only one thread may access the EP
1411 1406 * during the time ep_state is in TRANSITIONING. daplka_ep_set_state
1412 1407 * transitions ep_state to new_state and wakes up any waiters blocking
1413 1408 * on ep_cv.
1414 1409 *
1415 1410 */
1416 1411 static uint32_t
1417 1412 daplka_ep_get_state(daplka_ep_resource_t *ep_rp)
1418 1413 {
1419 1414 uint32_t old_state = 0;
1420 1415
1421 1416 mutex_enter(&ep_rp->ep_lock);
1422 1417 while (ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING) {
1423 1418 D2("get_state: wait for state transition to complete\n");
1424 1419 cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
1425 1420 D2("get_state: done, curr state = %d\n", ep_rp->ep_state);
1426 1421 }
1427 1422 ASSERT(ep_rp->ep_state != DAPLKA_EP_STATE_TRANSITIONING);
1428 1423 old_state = ep_rp->ep_state;
1429 1424
1430 1425 /*
1431 1426 * an ep that is in the FREED state cannot transition
1432 1427 * back to any of the regular states
1433 1428 */
1434 1429 if (old_state != DAPLKA_EP_STATE_FREED) {
1435 1430 ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
1436 1431 }
1437 1432 mutex_exit(&ep_rp->ep_lock);
1438 1433 return (old_state);
1439 1434 }
1440 1435
1441 1436 /*
1442 1437 * EP state transition diagram
1443 1438 *
1444 1439 * CLOSED<-------------------
1445 1440 * | |
1446 1441 * | |
1447 1442 * ------------------------ |
1448 1443 * | | |
1449 1444 * | | |
1450 1445 * v v |
1451 1446 * CONNECTING ACCEPTING |
1452 1447 * | | | | | |
1453 1448 * | | | | | |
1454 1449 * | | | | | |
1455 1450 * | | |_______|_______| |
1456 1451 * | | | | | |
1457 1452 * | |___________| | | |
1458 1453 * | | | | |
1459 1454 * | v | |---->DISCONNECTED
1460 1455 * | CONNECTED | ^
1461 1456 * v | | |
1462 1457 * ABORTING |---------|--------------|
1463 1458 * | | | |
1464 1459 * | | v |
1465 1460 * | |-------->DISCONNECTING--|
1466 1461 * | |
1467 1462 * |---------------------------------|
1468 1463 *
1469 1464 * *not shown in this diagram:
1470 1465 * -loopback transitions
1471 1466 * -transitions to the FREED state
1472 1467 */
1473 1468 static boolean_t
1474 1469 daplka_ep_transition_is_valid(uint32_t old_state, uint32_t new_state)
1475 1470 {
1476 1471 boolean_t valid = B_FALSE;
1477 1472
1478 1473 /*
1479 1474 * reseting to the same state is a no-op and is always
1480 1475 * permitted. transitioning to the FREED state indicates
1481 1476 * that the ep is about to be freed and no further operation
1482 1477 * is allowed on it. to support abrupt close, the ep is
1483 1478 * permitted to transition to the FREED state from any state.
1484 1479 */
1485 1480 if (old_state == new_state ||
1486 1481 new_state == DAPLKA_EP_STATE_FREED) {
1487 1482 return (B_TRUE);
1488 1483 }
1489 1484
1490 1485 switch (old_state) {
1491 1486 case DAPLKA_EP_STATE_CLOSED:
1492 1487 /*
1493 1488 * this is the initial ep_state.
1494 1489 * a transition to CONNECTING or ACCEPTING may occur
1495 1490 * upon calling daplka_ep_connect or daplka_cr_accept,
1496 1491 * respectively.
1497 1492 */
1498 1493 if (new_state == DAPLKA_EP_STATE_CONNECTING ||
1499 1494 new_state == DAPLKA_EP_STATE_ACCEPTING) {
1500 1495 valid = B_TRUE;
1501 1496 }
1502 1497 break;
1503 1498 case DAPLKA_EP_STATE_CONNECTING:
1504 1499 /*
1505 1500 * we transition to this state if daplka_ep_connect
1506 1501 * is successful. from this state, we can transition
1507 1502 * to CONNECTED if daplka_cm_rc_conn_est gets called;
1508 1503 * or to DISCONNECTED if daplka_cm_rc_conn_closed or
1509 1504 * daplka_cm_rc_event_failure gets called. If the
1510 1505 * client calls daplka_ep_disconnect, we transition
1511 1506 * to DISCONNECTING. If a timer was set at ep_connect
1512 1507 * time and if the timer expires prior to any of the
1513 1508 * CM callbacks, we transition to ABORTING and then
1514 1509 * to DISCONNECTED.
1515 1510 */
1516 1511 if (new_state == DAPLKA_EP_STATE_CONNECTED ||
1517 1512 new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1518 1513 new_state == DAPLKA_EP_STATE_DISCONNECTED ||
1519 1514 new_state == DAPLKA_EP_STATE_ABORTING) {
1520 1515 valid = B_TRUE;
1521 1516 }
1522 1517 break;
1523 1518 case DAPLKA_EP_STATE_ACCEPTING:
1524 1519 /*
1525 1520 * we transition to this state if daplka_cr_accept
1526 1521 * is successful. from this state, we can transition
1527 1522 * to CONNECTED if daplka_cm_service_conn_est gets called;
1528 1523 * or to DISCONNECTED if daplka_cm_service_conn_closed or
1529 1524 * daplka_cm_service_event_failure gets called. If the
1530 1525 * client calls daplka_ep_disconnect, we transition to
1531 1526 * DISCONNECTING.
1532 1527 */
1533 1528 if (new_state == DAPLKA_EP_STATE_CONNECTED ||
1534 1529 new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1535 1530 new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1536 1531 valid = B_TRUE;
1537 1532 }
1538 1533 break;
1539 1534 case DAPLKA_EP_STATE_CONNECTED:
1540 1535 /*
1541 1536 * we transition to this state if a active or passive
1542 1537 * connection gets established. if the client calls
1543 1538 * daplka_ep_disconnect, we transition to the
1544 1539 * DISCONNECTING state. subsequent CM callbacks will
1545 1540 * cause ep_state to be set to DISCONNECTED. If the
1546 1541 * remote peer terminates the connection before we do,
1547 1542 * it is possible for us to transition directly from
1548 1543 * CONNECTED to DISCONNECTED.
1549 1544 */
1550 1545 if (new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1551 1546 new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1552 1547 valid = B_TRUE;
1553 1548 }
1554 1549 break;
1555 1550 case DAPLKA_EP_STATE_DISCONNECTING:
1556 1551 /*
1557 1552 * we transition to this state if the client calls
1558 1553 * daplka_ep_disconnect.
1559 1554 */
1560 1555 if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1561 1556 valid = B_TRUE;
1562 1557 }
1563 1558 break;
1564 1559 case DAPLKA_EP_STATE_ABORTING:
1565 1560 /*
1566 1561 * we transition to this state if the active side
1567 1562 * EP timer has expired. this is only a transient
1568 1563 * state that is set during timer processing. when
1569 1564 * timer processing completes, ep_state will become
1570 1565 * DISCONNECTED.
1571 1566 */
1572 1567 if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1573 1568 valid = B_TRUE;
1574 1569 }
1575 1570 break;
1576 1571 case DAPLKA_EP_STATE_DISCONNECTED:
1577 1572 /*
1578 1573 * we transition to this state if we get a closed
1579 1574 * or event_failure CM callback. an expired timer
1580 1575 * can also cause us to be in this state. this
1581 1576 * is the only state in which we permit the
1582 1577 * ep_reinit operation.
1583 1578 */
1584 1579 if (new_state == DAPLKA_EP_STATE_CLOSED) {
1585 1580 valid = B_TRUE;
1586 1581 }
1587 1582 break;
1588 1583 default:
1589 1584 break;
1590 1585 }
1591 1586
1592 1587 if (!valid) {
1593 1588 DERR("ep_transition: invalid state change %d -> %d\n",
1594 1589 old_state, new_state);
1595 1590 }
1596 1591 return (valid);
1597 1592 }
1598 1593
1599 1594 /*
1600 1595 * first check if the transition is valid. then set ep_state
1601 1596 * to new_state and wake up all waiters.
1602 1597 */
1603 1598 static void
1604 1599 daplka_ep_set_state(daplka_ep_resource_t *ep_rp, uint32_t old_state,
1605 1600 uint32_t new_state)
1606 1601 {
1607 1602 boolean_t valid;
1608 1603
1609 1604 ASSERT(new_state != DAPLKA_EP_STATE_TRANSITIONING);
1610 1605
1611 1606 valid = daplka_ep_transition_is_valid(old_state, new_state);
1612 1607 mutex_enter(&ep_rp->ep_lock);
1613 1608 if (ep_rp->ep_state != DAPLKA_EP_STATE_FREED) {
1614 1609 if (valid) {
1615 1610 ep_rp->ep_state = new_state;
1616 1611 } else {
1617 1612 /*
1618 1613 * this case is impossible.
1619 1614 * we have a serious problem if we get here.
1620 1615 * instead of panicing, we reset the state to
1621 1616 * old_state. doing this would at least prevent
1622 1617 * threads from hanging due to ep_state being
1623 1618 * stuck in TRANSITIONING.
1624 1619 */
1625 1620 ep_rp->ep_state = old_state;
1626 1621 ASSERT(B_FALSE);
1627 1622 }
1628 1623 }
1629 1624 cv_broadcast(&ep_rp->ep_cv);
1630 1625 mutex_exit(&ep_rp->ep_lock);
1631 1626 }
1632 1627
1633 1628 /*
1634 1629 * modifies RC channel attributes.
1635 1630 * currently, only the rdma_in and rdma_out attributes may
1636 1631 * be modified. the channel must be in quiescent state when
1637 1632 * this function is called.
1638 1633 */
1639 1634 /* ARGSUSED */
1640 1635 static int
1641 1636 daplka_ep_modify(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1642 1637 cred_t *cred, int *rvalp)
1643 1638 {
1644 1639 daplka_ep_resource_t *ep_rp = NULL;
1645 1640 ibt_cep_modify_flags_t good_flags;
1646 1641 ibt_rc_chan_modify_attr_t rcm_attr;
1647 1642 ibt_hca_attr_t *hca_attrp;
1648 1643 dapl_ep_modify_t args;
1649 1644 ibt_status_t status;
1650 1645 uint32_t old_state, new_state;
1651 1646 int retval = 0;
1652 1647
1653 1648 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_modify_t),
1654 1649 mode);
1655 1650 if (retval != 0) {
1656 1651 DERR("ep_modify: copyin error %d\n", retval);
1657 1652 return (EFAULT);
1658 1653 }
1659 1654 ep_rp = (daplka_ep_resource_t *)
1660 1655 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epm_hkey);
1661 1656 if (ep_rp == NULL) {
1662 1657 DERR("ep_modify: cannot find ep resource\n");
1663 1658 return (EINVAL);
1664 1659 }
1665 1660 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
1666 1661 new_state = old_state = daplka_ep_get_state(ep_rp);
1667 1662
1668 1663 if (old_state != DAPLKA_EP_STATE_CLOSED &&
1669 1664 old_state != DAPLKA_EP_STATE_DISCONNECTED) {
1670 1665 DERR("ep_modify: invalid state %d\n", old_state);
1671 1666 retval = EINVAL;
1672 1667 goto cleanup;
1673 1668 }
1674 1669
1675 1670 good_flags = IBT_CEP_SET_RDMARA_OUT | IBT_CEP_SET_RDMARA_IN;
1676 1671 if ((args.epm_flags & ~good_flags) != 0) {
1677 1672 DERR("ep_modify: invalid flags 0x%x\n", args.epm_flags);
1678 1673 retval = EINVAL;
1679 1674 goto cleanup;
1680 1675 }
1681 1676
1682 1677 hca_attrp = &ia_rp->ia_hca->hca_attr;
1683 1678
1684 1679 bzero(&rcm_attr, sizeof (ibt_rc_chan_modify_attr_t));
1685 1680 if ((args.epm_flags & IBT_CEP_SET_RDMARA_OUT) != 0) {
1686 1681 if (args.epm_rdma_ra_out > hca_attrp->hca_max_rdma_out_chan) {
1687 1682 DERR("ep_modify: invalid epm_rdma_ra_out %d\n",
1688 1683 args.epm_rdma_ra_out);
1689 1684 retval = EINVAL;
1690 1685 goto cleanup;
1691 1686 }
1692 1687 rcm_attr.rc_rdma_ra_out = args.epm_rdma_ra_out;
1693 1688 }
1694 1689 if ((args.epm_flags & IBT_CEP_SET_RDMARA_IN) != 0) {
1695 1690 if (args.epm_rdma_ra_in > hca_attrp->hca_max_rdma_in_chan) {
1696 1691 DERR("ep_modify: epm_rdma_ra_in %d\n",
1697 1692 args.epm_rdma_ra_in);
1698 1693 retval = EINVAL;
1699 1694 goto cleanup;
1700 1695 }
1701 1696 rcm_attr.rc_rdma_ra_in = args.epm_rdma_ra_in;
1702 1697 }
1703 1698 status = ibt_modify_rc_channel(ep_rp->ep_chan_hdl, args.epm_flags,
1704 1699 &rcm_attr, NULL);
1705 1700 if (status != IBT_SUCCESS) {
1706 1701 DERR("ep_modify: modify_rc_channel returned %d\n", status);
1707 1702 *rvalp = (int)status;
1708 1703 retval = 0;
1709 1704 goto cleanup;
1710 1705 }
1711 1706
1712 1707 /*
1713 1708 * ep_modify does not change ep_state
1714 1709 */
1715 1710 cleanup:;
1716 1711 daplka_ep_set_state(ep_rp, old_state, new_state);
1717 1712 DAPLKA_RS_UNREF(ep_rp);
1718 1713 return (retval);
1719 1714 }
1720 1715
1721 1716 /*
1722 1717 * Frees a EP resource.
1723 1718 * a EP may only be freed when it is in the CLOSED or
1724 1719 * DISCONNECTED state.
1725 1720 */
1726 1721 /* ARGSUSED */
1727 1722 static int
1728 1723 daplka_ep_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1729 1724 cred_t *cred, int *rvalp)
1730 1725 {
1731 1726 daplka_ep_resource_t *ep_rp = NULL;
1732 1727 dapl_ep_free_t args;
1733 1728 uint32_t old_state, new_state;
1734 1729 int retval;
1735 1730
1736 1731 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_free_t), mode);
1737 1732 if (retval != 0) {
1738 1733 DERR("ep_free: copyin error %d\n", retval);
1739 1734 return (EFAULT);
1740 1735 }
1741 1736 ep_rp = (daplka_ep_resource_t *)
1742 1737 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epf_hkey);
1743 1738 if (ep_rp == NULL) {
1744 1739 DERR("ep_free: cannot find ep resource\n");
1745 1740 return (EINVAL);
1746 1741 }
1747 1742 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
1748 1743 new_state = old_state = daplka_ep_get_state(ep_rp);
1749 1744
1750 1745 /*
1751 1746 * ep cannot be freed if it is in an invalid state.
1752 1747 */
1753 1748 if (old_state != DAPLKA_EP_STATE_CLOSED &&
1754 1749 old_state != DAPLKA_EP_STATE_DISCONNECTED) {
1755 1750 DERR("ep_free: invalid state %d\n", old_state);
1756 1751 retval = EINVAL;
1757 1752 goto cleanup;
1758 1753 }
1759 1754 ep_rp = NULL;
1760 1755 retval = daplka_hash_remove(&ia_rp->ia_ep_htbl,
1761 1756 args.epf_hkey, (void **)&ep_rp);
1762 1757 if (retval != 0 || ep_rp == NULL) {
1763 1758 /*
1764 1759 * this is only possible if we have two threads
1765 1760 * calling ep_free in parallel.
1766 1761 */
1767 1762 DERR("ep_free: cannot find ep resource\n");
1768 1763 goto cleanup;
1769 1764 }
1770 1765 /* there should not be any outstanding timers */
1771 1766 ASSERT(ep_rp->ep_timer_hkey == 0);
1772 1767
1773 1768 new_state = DAPLKA_EP_STATE_FREED;
1774 1769 daplka_ep_set_state(ep_rp, old_state, new_state);
1775 1770
1776 1771 /* remove reference obtained by lookup */
1777 1772 DAPLKA_RS_UNREF(ep_rp);
1778 1773
1779 1774 /* UNREF calls the actual free function when refcnt is zero */
1780 1775 DAPLKA_RS_UNREF(ep_rp);
1781 1776 return (0);
1782 1777
1783 1778 cleanup:;
1784 1779 daplka_ep_set_state(ep_rp, old_state, new_state);
1785 1780
1786 1781 /* remove reference obtained by lookup */
1787 1782 DAPLKA_RS_UNREF(ep_rp);
1788 1783 return (retval);
1789 1784 }
1790 1785
1791 1786 /*
1792 1787 * The following routines supports the timeout feature of ep_connect.
1793 1788 * Refer to the description of ep_connect for details.
1794 1789 */
1795 1790
1796 1791 /*
1797 1792 * this is the timer processing thread.
1798 1793 */
1799 1794 static void
1800 1795 daplka_timer_thread(void *arg)
1801 1796 {
1802 1797 daplka_timer_info_t *timerp = (daplka_timer_info_t *)arg;
1803 1798 daplka_ep_resource_t *ep_rp;
1804 1799 daplka_evd_event_t *disc_ev = NULL;
1805 1800 ibt_status_t status;
1806 1801 int old_state, new_state;
1807 1802
1808 1803 ep_rp = timerp->ti_ep_res;
1809 1804 ASSERT(ep_rp != NULL);
1810 1805 ASSERT(timerp->ti_tmo_id != 0);
1811 1806 timerp->ti_tmo_id = 0;
1812 1807
1813 1808 new_state = old_state = daplka_ep_get_state(ep_rp);
1814 1809 if (old_state != DAPLKA_EP_STATE_CONNECTING) {
1815 1810 /* unblock hash_ep_free */
1816 1811 mutex_enter(&ep_rp->ep_lock);
1817 1812 ASSERT(ep_rp->ep_timer_hkey != 0);
1818 1813 ep_rp->ep_timer_hkey = 0;
1819 1814 cv_broadcast(&ep_rp->ep_cv);
1820 1815 mutex_exit(&ep_rp->ep_lock);
1821 1816
1822 1817 /* reset state to original state */
1823 1818 daplka_ep_set_state(ep_rp, old_state, new_state);
1824 1819
1825 1820 /* this function will also unref ep_rp */
1826 1821 daplka_timer_info_free(timerp);
1827 1822 return;
1828 1823 }
1829 1824
1830 1825 ASSERT(ep_rp->ep_timer_hkey != 0);
1831 1826 ep_rp->ep_timer_hkey = 0;
1832 1827
1833 1828 /*
1834 1829 * we cannot keep ep_state in TRANSITIONING if we call
1835 1830 * ibt_close_rc_channel in blocking mode. this would cause
1836 1831 * a deadlock because the cm callbacks will be blocked and
1837 1832 * will not be able to wake us up.
1838 1833 */
1839 1834 new_state = DAPLKA_EP_STATE_ABORTING;
1840 1835 daplka_ep_set_state(ep_rp, old_state, new_state);
1841 1836
1842 1837 /*
1843 1838 * when we return from close_rc_channel, all callbacks should have
1844 1839 * completed. we can also be certain that these callbacks did not
1845 1840 * enqueue any events to conn_evd.
1846 1841 */
1847 1842 status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
1848 1843 NULL, 0, NULL, NULL, NULL);
1849 1844 if (status != IBT_SUCCESS) {
1850 1845 DERR("timer_thread: ibt_close_rc_channel returned %d\n",
1851 1846 status);
1852 1847 }
1853 1848 old_state = daplka_ep_get_state(ep_rp);
1854 1849
1855 1850 /*
1856 1851 * this is the only thread that can transition ep_state out
1857 1852 * of ABORTING. all other ep operations would fail when
1858 1853 * ep_state is in ABORTING.
1859 1854 */
1860 1855 ASSERT(old_state == DAPLKA_EP_STATE_ABORTING);
1861 1856
1862 1857 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_SLEEP);
1863 1858 ASSERT(disc_ev != NULL);
1864 1859
1865 1860 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
1866 1861 disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
1867 1862 disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
1868 1863 disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
1869 1864 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
1870 1865 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
1871 1866
1872 1867 D2("timer_thread: enqueue event(%p) evdp(%p)\n",
1873 1868 disc_ev, ep_rp->ep_conn_evd);
1874 1869
1875 1870 new_state = DAPLKA_EP_STATE_DISCONNECTED;
1876 1871 daplka_ep_set_state(ep_rp, old_state, new_state);
1877 1872
1878 1873 daplka_evd_wakeup(ep_rp->ep_conn_evd,
1879 1874 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
1880 1875
1881 1876 /* this function will also unref ep_rp */
1882 1877 daplka_timer_info_free(timerp);
1883 1878 }
1884 1879
1885 1880 /*
1886 1881 * dispatches a thread to continue with timer processing.
1887 1882 */
1888 1883 static void
1889 1884 daplka_timer_dispatch(void *arg)
1890 1885 {
1891 1886 /*
1892 1887 * keep rescheduling this function until
1893 1888 * taskq_dispatch succeeds.
1894 1889 */
1895 1890 if (taskq_dispatch(daplka_taskq,
1896 1891 daplka_timer_thread, arg, TQ_NOSLEEP) == 0) {
1897 1892 DERR("timer_dispatch: taskq_dispatch failed, retrying...\n");
1898 1893 (void) timeout(daplka_timer_dispatch, arg, 10);
1899 1894 }
1900 1895 }
1901 1896
1902 1897 /*
1903 1898 * this function is called by the kernel's callout thread.
1904 1899 * we first attempt to remove the timer object from the
1905 1900 * global timer table. if it is found, we dispatch a thread
1906 1901 * to continue processing the timer object. if it is not
1907 1902 * found, that means the timer has been cancelled by someone
1908 1903 * else.
1909 1904 */
1910 1905 static void
1911 1906 daplka_timer_handler(void *arg)
1912 1907 {
1913 1908 uint64_t timer_hkey = (uintptr_t)arg;
1914 1909 daplka_timer_info_t *timerp = NULL;
1915 1910
1916 1911 D2("timer_handler: timer_hkey 0x%llx\n", (longlong_t)timer_hkey);
1917 1912
1918 1913 (void) daplka_hash_remove(&daplka_timer_info_htbl,
1919 1914 timer_hkey, (void **)&timerp);
1920 1915 if (timerp == NULL) {
1921 1916 D2("timer_handler: timer already cancelled\n");
1922 1917 return;
1923 1918 }
1924 1919 daplka_timer_dispatch((void *)timerp);
1925 1920 }
1926 1921
1927 1922 /*
1928 1923 * allocates a timer_info object.
1929 1924 * a reference to a EP is held by this object. this ensures
1930 1925 * that the EP stays valid when a timer is outstanding.
1931 1926 */
1932 1927 static daplka_timer_info_t *
1933 1928 daplka_timer_info_alloc(daplka_ep_resource_t *ep_rp)
1934 1929 {
1935 1930 daplka_timer_info_t *timerp;
1936 1931
1937 1932 timerp = kmem_zalloc(sizeof (*timerp), daplka_km_flags);
1938 1933 if (timerp == NULL) {
1939 1934 DERR("timer_info_alloc: cannot allocate timer info\n");
1940 1935 return (NULL);
1941 1936 }
1942 1937 timerp->ti_ep_res = ep_rp;
1943 1938 timerp->ti_tmo_id = 0;
1944 1939
1945 1940 return (timerp);
1946 1941 }
1947 1942
1948 1943 /*
1949 1944 * Frees the timer_info object.
1950 1945 * we release the EP reference before freeing the object.
1951 1946 */
1952 1947 static void
1953 1948 daplka_timer_info_free(daplka_timer_info_t *timerp)
1954 1949 {
1955 1950 ASSERT(timerp->ti_ep_res != NULL);
1956 1951 DAPLKA_RS_UNREF(timerp->ti_ep_res);
1957 1952 timerp->ti_ep_res = NULL;
1958 1953 ASSERT(timerp->ti_tmo_id == 0);
1959 1954 kmem_free(timerp, sizeof (*timerp));
1960 1955 }
1961 1956
1962 1957 /*
1963 1958 * cancels the timer set by ep_connect.
1964 1959 * returns -1 if timer handling is in progress
1965 1960 * and 0 otherwise.
1966 1961 */
1967 1962 static int
1968 1963 daplka_cancel_timer(daplka_ep_resource_t *ep_rp)
1969 1964 {
1970 1965 /*
1971 1966 * this function can only be called when ep_state
1972 1967 * is frozen.
1973 1968 */
1974 1969 ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING);
1975 1970 if (ep_rp->ep_timer_hkey != 0) {
1976 1971 daplka_timer_info_t *timerp = NULL;
1977 1972
1978 1973 (void) daplka_hash_remove(&daplka_timer_info_htbl,
1979 1974 ep_rp->ep_timer_hkey, (void **)&timerp);
1980 1975 if (timerp == NULL) {
1981 1976 /*
1982 1977 * this is possible if the timer_handler has
1983 1978 * removed the timerp but the taskq thread has
1984 1979 * not transitioned the ep_state to DISCONNECTED.
1985 1980 * we need to reset the ep_state to allow the
1986 1981 * taskq thread to continue with its work. the
1987 1982 * taskq thread will set the ep_timer_hkey to 0
1988 1983 * so we don't have to do it here.
1989 1984 */
1990 1985 DERR("cancel_timer: timer is being processed\n");
1991 1986 return (-1);
1992 1987 }
1993 1988 /*
1994 1989 * we got the timer object. if the handler fires at
1995 1990 * this point, it will not be able to find the object
1996 1991 * and will return immediately. normally, ti_tmo_id gets
1997 1992 * cleared when the handler fires.
1998 1993 */
1999 1994 ASSERT(timerp->ti_tmo_id != 0);
2000 1995
2001 1996 /*
2002 1997 * note that untimeout can possibly call the handler.
2003 1998 * we are safe because the handler will be a no-op.
2004 1999 */
2005 2000 (void) untimeout(timerp->ti_tmo_id);
2006 2001 timerp->ti_tmo_id = 0;
2007 2002 daplka_timer_info_free(timerp);
2008 2003 ep_rp->ep_timer_hkey = 0;
2009 2004 }
2010 2005 return (0);
2011 2006 }
2012 2007
2013 2008 /*
2014 2009 * this function is called by daplka_hash_destroy for
2015 2010 * freeing timer_info objects
2016 2011 */
2017 2012 static void
2018 2013 daplka_hash_timer_free(void *obj)
2019 2014 {
2020 2015 daplka_timer_info_free((daplka_timer_info_t *)obj);
2021 2016 }
2022 2017
2023 2018 /* ARGSUSED */
2024 2019 static uint16_t
2025 2020 daplka_hellomsg_cksum(DAPL_PRIVATE *dp)
2026 2021 {
2027 2022 uint8_t *bp;
2028 2023 int i;
2029 2024 uint16_t cksum = 0;
2030 2025
2031 2026 bp = (uint8_t *)dp;
2032 2027 for (i = 0; i < sizeof (DAPL_PRIVATE); i++) {
2033 2028 cksum += bp[i];
2034 2029 }
2035 2030 return (cksum);
2036 2031 }
2037 2032
2038 2033 /*
2039 2034 * ep_connect is called by the client to initiate a connection to a
2040 2035 * remote service point. It is a non-blocking call. If a non-zero
2041 2036 * timeout is specified by the client, a timer will be set just before
2042 2037 * returning from ep_connect. Upon a successful return from ep_connect,
2043 2038 * the client will call evd_wait to wait for the connection to complete.
2044 2039 * If the connection is rejected or has failed due to an error, the
2045 2040 * client will be notified with an event containing the appropriate error
2046 2041 * code. If the connection is accepted, the client will be notified with
2047 2042 * the CONN_ESTABLISHED event. If the timer expires before either of the
2048 2043 * above events (error or established), a TIMED_OUT event will be delivered
2049 2044 * to the client.
2050 2045 *
2051 2046 * the complicated part of the timer logic is the handling of race
2052 2047 * conditions with CM callbacks. we need to ensure that either the CM or
2053 2048 * the timer thread gets to deliver an event, but not both. when the
2054 2049 * CM callback is about to deliver an event, it always tries to cancel
2055 2050 * the outstanding timer. if cancel_timer indicates a that the timer is
2056 2051 * already being processed, the CM callback will simply return without
2057 2052 * delivering an event. when the timer thread executes, it tries to check
2058 2053 * if the EP is still in CONNECTING state (timers only work on the active
2059 2054 * side). if the EP is not in this state, the timer thread will return
2060 2055 * without delivering an event.
2061 2056 */
2062 2057 /* ARGSUSED */
2063 2058 static int
2064 2059 daplka_ep_connect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2065 2060 cred_t *cred, int *rvalp)
2066 2061 {
2067 2062 daplka_ep_resource_t *ep_rp = NULL;
2068 2063 dapl_ep_connect_t args;
2069 2064 daplka_timer_info_t *timerp = NULL;
2070 2065 uint32_t old_state, new_state;
2071 2066 boolean_t timer_inserted = B_FALSE;
2072 2067 uint64_t timer_hkey = 0;
2073 2068 ibt_path_info_t path_info;
2074 2069 ibt_path_attr_t path_attr;
2075 2070 ibt_hca_attr_t *hca_attrp;
2076 2071 ibt_chan_open_args_t chan_args;
2077 2072 ibt_status_t status = IBT_SUCCESS;
2078 2073 uint8_t num_paths;
2079 2074 void *priv_data;
2080 2075 DAPL_PRIVATE *dp;
2081 2076 int retval = 0;
2082 2077 ib_gid_t *sgid;
2083 2078 ib_gid_t *dgid;
2084 2079 uint64_t dgid_ored;
2085 2080 ibt_ar_t ar_query_s;
2086 2081 ibt_ar_t ar_result_s;
2087 2082 ibt_path_flags_t pathflags;
2088 2083
2089 2084 D3("ep_connect: enter\n");
2090 2085 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_connect_t),
2091 2086 mode);
2092 2087 if (retval != 0) {
2093 2088 DERR("ep_connect: copyin error %d\n", retval);
2094 2089 return (EFAULT);
2095 2090 }
2096 2091 ep_rp = (daplka_ep_resource_t *)
2097 2092 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epc_hkey);
2098 2093 if (ep_rp == NULL) {
2099 2094 DERR("ep_connect: cannot find ep resource\n");
2100 2095 return (EINVAL);
2101 2096 }
2102 2097 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2103 2098
2104 2099 new_state = old_state = daplka_ep_get_state(ep_rp);
2105 2100 if (old_state != DAPLKA_EP_STATE_CLOSED) {
2106 2101 DERR("ep_connect: invalid state %d\n", old_state);
2107 2102 retval = EINVAL;
2108 2103 goto cleanup;
2109 2104 }
2110 2105 if (args.epc_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
2111 2106 DERR("ep_connect: private data len (%d) exceeded "
2112 2107 "max size %d\n", args.epc_priv_sz,
2113 2108 DAPL_MAX_PRIVATE_DATA_SIZE);
2114 2109 retval = EINVAL;
2115 2110 goto cleanup;
2116 2111 }
2117 2112
2118 2113 /*
2119 2114 * check for remote ipaddress to dgid resolution needs ATS
2120 2115 */
2121 2116 dgid = &args.epc_dgid;
2122 2117 dgid_ored = dgid->gid_guid | dgid->gid_prefix;
2123 2118 #if defined(DAPLKA_DEBUG_FORCE_ATS)
2124 2119 dgid_ored = 0ULL;
2125 2120 #endif /* DAPLKA_DEBUG_FORCE_ATS */
2126 2121 /* check for unidentified dgid */
2127 2122 if (dgid_ored == 0ULL) {
2128 2123 /*
2129 2124 * setup for ibt_query_ar()
2130 2125 */
2131 2126 sgid = &ia_rp->ia_hca_sgid;
2132 2127 ar_query_s.ar_gid.gid_guid = 0ULL;
2133 2128 ar_query_s.ar_gid.gid_prefix = 0ULL;
2134 2129 ar_query_s.ar_pkey = 0;
2135 2130 bcopy(args.epc_raddr_sadata.iad_sadata,
2136 2131 ar_query_s.ar_data, DAPL_ATS_NBYTES);
2137 2132 #define UR(b) ar_query_s.ar_data[(b)]
2138 2133 D3("daplka_ep_connect: SA[8] %d.%d.%d.%d\n",
2139 2134 UR(8), UR(9), UR(10), UR(11));
2140 2135 D3("daplka_ep_connect: SA[12] %d.%d.%d.%d\n",
2141 2136 UR(12), UR(13), UR(14), UR(15));
2142 2137 status = ibt_query_ar(sgid, &ar_query_s, &ar_result_s);
2143 2138 if (status != IBT_SUCCESS) {
2144 2139 DERR("ep_connect: ibt_query_ar returned %d\n", status);
2145 2140 *rvalp = (int)status;
2146 2141 retval = 0;
2147 2142 goto cleanup;
2148 2143 }
2149 2144 /*
2150 2145 * dgid identified from SA record
2151 2146 */
2152 2147 dgid = &ar_result_s.ar_gid;
2153 2148 D2("daplka_ep_connect: ATS dgid=%llx:%llx\n",
2154 2149 (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);
2155 2150 }
2156 2151
2157 2152 bzero(&path_info, sizeof (ibt_path_info_t));
2158 2153 bzero(&path_attr, sizeof (ibt_path_attr_t));
2159 2154 bzero(&chan_args, sizeof (ibt_chan_open_args_t));
2160 2155
2161 2156 path_attr.pa_dgids = dgid;
2162 2157 path_attr.pa_num_dgids = 1;
2163 2158 /*
2164 2159 * don't set sid in path_attr saves 1 SA query
2165 2160 * Also makes server side not to write the service record
2166 2161 */
2167 2162 path_attr.pa_sgid = ia_rp->ia_hca_sgid;
2168 2163 path_attr.pa_pkey = ia_rp->ia_port_pkey;
2169 2164
2170 2165 /* save the connection ep - struct copy */
2171 2166 ep_rp->ep_sgid = ia_rp->ia_hca_sgid;
2172 2167 ep_rp->ep_dgid = *dgid;
2173 2168
2174 2169 num_paths = 0;
2175 2170 pathflags = IBT_PATH_PKEY;
2176 2171 /* enable APM on remote port but not on loopback case */
2177 2172 if (daplka_apm && ((dgid->gid_prefix != path_attr.pa_sgid.gid_prefix) ||
2178 2173 (dgid->gid_guid != path_attr.pa_sgid.gid_guid))) {
2179 2174 pathflags |= IBT_PATH_APM;
2180 2175 }
2181 2176 status = ibt_get_paths(daplka_dev->daplka_clnt_hdl,
2182 2177 pathflags, &path_attr, 1, &path_info, &num_paths);
2183 2178
2184 2179 if (status != IBT_SUCCESS && status != IBT_INSUFF_DATA) {
2185 2180 DERR("ep_connect: ibt_get_paths returned %d paths %d\n",
2186 2181 status, num_paths);
2187 2182 *rvalp = (int)status;
2188 2183 retval = 0;
2189 2184 goto cleanup;
2190 2185 }
2191 2186 /* fill in the sid directly to path_info */
2192 2187 path_info.pi_sid = args.epc_sid;
2193 2188 hca_attrp = &ia_rp->ia_hca->hca_attr;
2194 2189
2195 2190 /* fill in open channel args */
2196 2191 chan_args.oc_path = &path_info;
2197 2192 chan_args.oc_cm_handler = daplka_cm_rc_handler;
2198 2193 chan_args.oc_cm_clnt_private = (void *)ep_rp;
2199 2194 chan_args.oc_rdma_ra_out = hca_attrp->hca_max_rdma_out_chan;
2200 2195 chan_args.oc_rdma_ra_in = hca_attrp->hca_max_rdma_in_chan;
2201 2196 chan_args.oc_path_retry_cnt = 7; /* 3-bit field */
2202 2197 chan_args.oc_path_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;
2203 2198
2204 2199 ASSERT(args.epc_priv_sz > 0);
↓ open down ↓ |
1631 lines elided |
↑ open up ↑ |
2205 2200 priv_data = (void *)args.epc_priv;
2206 2201
2207 2202 chan_args.oc_priv_data_len = args.epc_priv_sz;
2208 2203 chan_args.oc_priv_data = priv_data;
2209 2204
2210 2205 /*
2211 2206 * calculate checksum value of hello message and
2212 2207 * put hello message in networking byte order
2213 2208 */
2214 2209 dp = (DAPL_PRIVATE *)priv_data;
2215 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dp))
2216 2210 dp->hello_msg.hi_port = htons(dp->hello_msg.hi_port);
2217 2211 dp->hello_msg.hi_checksum = 0;
2218 2212 dp->hello_msg.hi_checksum = htons(daplka_hellomsg_cksum(dp));
2219 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*dp))
2220 2213
2221 2214 if (args.epc_timeout > 0) {
2222 2215 /*
2223 2216 * increment refcnt before passing reference to
2224 2217 * timer_info_alloc.
2225 2218 */
2226 2219 DAPLKA_RS_REF(ep_rp);
2227 2220 timerp = daplka_timer_info_alloc(ep_rp);
2228 2221 if (timerp == NULL) {
2229 2222 DERR("ep_connect: cannot allocate timer\n");
2230 2223 /*
2231 2224 * we need to remove the reference if
2232 2225 * allocation failed.
2233 2226 */
2234 2227 DAPLKA_RS_UNREF(ep_rp);
2235 2228 retval = ENOMEM;
2236 2229 goto cleanup;
2237 2230 }
2238 2231 /*
2239 2232 * We generate our own hkeys so that timer_hkey can fit
2240 2233 * into a pointer and passed as an arg to timeout()
2241 2234 */
2242 2235 timer_hkey = (uint64_t)daplka_timer_hkey_gen();
2243 2236 retval = daplka_hash_insert(&daplka_timer_info_htbl,
2244 2237 &timer_hkey, (void *)timerp);
2245 2238 if (retval != 0) {
2246 2239 DERR("ep_connect: cannot insert timer info\n");
2247 2240 goto cleanup;
2248 2241 }
2249 2242 ASSERT(ep_rp->ep_timer_hkey == 0);
2250 2243 ep_rp->ep_timer_hkey = timer_hkey;
2251 2244 timer_inserted = B_TRUE;
2252 2245 D2("ep_connect: timer_hkey = 0x%llx\n",
2253 2246 (longlong_t)timer_hkey);
2254 2247 }
2255 2248 status = ibt_open_rc_channel(ep_rp->ep_chan_hdl, IBT_OCHAN_NO_FLAGS,
2256 2249 IBT_NONBLOCKING, &chan_args, NULL);
2257 2250
2258 2251 if (status != IBT_SUCCESS) {
2259 2252 DERR("ep_connect: ibt_open_rc_channel returned %d\n", status);
2260 2253 *rvalp = (int)status;
2261 2254 retval = 0;
2262 2255 goto cleanup;
2263 2256 }
2264 2257 /*
2265 2258 * if a cm callback gets called at this point, it'll have to wait until
2266 2259 * ep_state becomes connecting (or some other state if another thread
2267 2260 * manages to get ahead of the callback). this guarantees that the
2268 2261 * callback will not touch the timer until it gets set.
2269 2262 */
2270 2263 if (timerp != NULL) {
2271 2264 clock_t tmo;
2272 2265
2273 2266 tmo = drv_usectohz((clock_t)args.epc_timeout);
2274 2267 /*
2275 2268 * We generate our own 32 bit timer_hkey so that it can fit
2276 2269 * into a pointer
2277 2270 */
2278 2271 ASSERT(timer_hkey != 0);
2279 2272 timerp->ti_tmo_id = timeout(daplka_timer_handler,
2280 2273 (void *)(uintptr_t)timer_hkey, tmo);
2281 2274 }
2282 2275 new_state = DAPLKA_EP_STATE_CONNECTING;
2283 2276
2284 2277 cleanup:;
2285 2278 if (timerp != NULL && (retval != 0 || status != IBT_SUCCESS)) {
2286 2279 /*
2287 2280 * if ibt_open_rc_channel failed, the timerp must still
2288 2281 * be in daplka_timer_info_htbl because neither the cm
2289 2282 * callback nor the timer_handler will be called.
2290 2283 */
2291 2284 if (timer_inserted) {
2292 2285 daplka_timer_info_t *new_timerp = NULL;
2293 2286
2294 2287 ASSERT(timer_hkey != 0);
2295 2288 (void) daplka_hash_remove(&daplka_timer_info_htbl,
2296 2289 timer_hkey, (void **)&new_timerp);
2297 2290 ASSERT(new_timerp == timerp);
2298 2291 ep_rp->ep_timer_hkey = 0;
2299 2292 }
2300 2293 daplka_timer_info_free(timerp);
2301 2294 }
2302 2295 daplka_ep_set_state(ep_rp, old_state, new_state);
2303 2296 DAPLKA_RS_UNREF(ep_rp);
2304 2297 D3("ep_connect: exit\n");
2305 2298 return (retval);
2306 2299 }
2307 2300
2308 2301 /*
2309 2302 * ep_disconnect closes a connection with a remote peer.
2310 2303 * if a connection has not been established, ep_disconnect
2311 2304 * will instead flush all recv bufs posted to this channel.
2312 2305 * if the EP state is CONNECTED, CONNECTING or ACCEPTING upon
2313 2306 * entry to ep_disconnect, the EP state will transition to
2314 2307 * DISCONNECTING upon exit. the CM callbacks triggered by
2315 2308 * ibt_close_rc_channel will cause EP state to become
2316 2309 * DISCONNECTED. This function is a no-op if EP state is
2317 2310 * DISCONNECTED.
2318 2311 */
2319 2312 /* ARGSUSED */
2320 2313 static int
2321 2314 daplka_ep_disconnect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2322 2315 cred_t *cred, int *rvalp)
2323 2316 {
2324 2317 daplka_ep_resource_t *ep_rp = NULL;
2325 2318 dapl_ep_disconnect_t args;
2326 2319 ibt_status_t status;
2327 2320 uint32_t old_state, new_state;
2328 2321 int retval = 0;
2329 2322
2330 2323 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_disconnect_t),
2331 2324 mode);
2332 2325 if (retval != 0) {
2333 2326 DERR("ep_disconnect: copyin error %d\n", retval);
2334 2327 return (EFAULT);
2335 2328 }
2336 2329 ep_rp = (daplka_ep_resource_t *)
2337 2330 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epd_hkey);
2338 2331 if (ep_rp == NULL) {
2339 2332 DERR("ep_disconnect: cannot find ep resource\n");
2340 2333 return (EINVAL);
2341 2334 }
2342 2335 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2343 2336
2344 2337 new_state = old_state = daplka_ep_get_state(ep_rp);
2345 2338 if (old_state != DAPLKA_EP_STATE_CONNECTED &&
2346 2339 old_state != DAPLKA_EP_STATE_CONNECTING &&
2347 2340 old_state != DAPLKA_EP_STATE_ACCEPTING &&
2348 2341 old_state != DAPLKA_EP_STATE_DISCONNECTED &&
2349 2342 old_state != DAPLKA_EP_STATE_DISCONNECTING &&
2350 2343 old_state != DAPLKA_EP_STATE_CLOSED) {
2351 2344 DERR("ep_disconnect: invalid state %d\n", old_state);
2352 2345 retval = EINVAL;
2353 2346 goto cleanup;
2354 2347 }
2355 2348
2356 2349 if ((old_state == DAPLKA_EP_STATE_DISCONNECTED) ||
2357 2350 (old_state == DAPLKA_EP_STATE_DISCONNECTING)) {
2358 2351 D2("ep_disconnect: ep already disconnected\n");
2359 2352 retval = 0;
2360 2353 /* we leave the state as DISCONNECTED */
2361 2354 goto cleanup;
2362 2355 }
2363 2356 if (old_state == DAPLKA_EP_STATE_CONNECTING ||
2364 2357 old_state == DAPLKA_EP_STATE_ACCEPTING) {
2365 2358 D2("ep_disconnect: aborting, old_state = %d\n", old_state);
2366 2359 }
2367 2360
2368 2361 /*
2369 2362 * according to the udapl spec, ep_disconnect should
2370 2363 * flush the channel if the channel is not CONNECTED.
2371 2364 */
2372 2365 if (old_state == DAPLKA_EP_STATE_CLOSED) {
2373 2366 status = ibt_flush_channel(ep_rp->ep_chan_hdl);
2374 2367 if (status != IBT_SUCCESS) {
2375 2368 DERR("ep_disconnect: ibt_flush_channel failed %d\n",
2376 2369 status);
2377 2370 *rvalp = (int)status;
2378 2371 }
2379 2372 retval = 0;
2380 2373 /* we leave the state as CLOSED */
2381 2374 goto cleanup;
2382 2375 }
2383 2376
2384 2377 new_state = DAPLKA_EP_STATE_DISCONNECTING;
2385 2378 daplka_ep_set_state(ep_rp, old_state, new_state);
2386 2379 status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_NONBLOCKING,
2387 2380 NULL, 0, NULL, NULL, NULL);
2388 2381
2389 2382 if (status == IBT_SUCCESS) {
2390 2383 DAPLKA_RS_UNREF(ep_rp);
2391 2384 return (retval);
2392 2385 } else {
2393 2386 DERR("ep_disconnect: ibt_close_rc_channel returned %d\n",
2394 2387 status);
2395 2388 *rvalp = (int)status;
2396 2389 retval = 0;
2397 2390 new_state = old_state;
2398 2391 }
2399 2392
2400 2393 cleanup:;
2401 2394 daplka_ep_set_state(ep_rp, old_state, new_state);
2402 2395 DAPLKA_RS_UNREF(ep_rp);
2403 2396 return (retval);
2404 2397 }
2405 2398
2406 2399 /*
2407 2400 * this function resets the EP to a usable state (ie. from
2408 2401 * DISCONNECTED to CLOSED). this function is best implemented using
2409 2402 * the ibt_recycle_channel interface. until that is available, we will
2410 2403 * instead clone and tear down the existing channel and replace the
2411 2404 * existing channel with the cloned one.
2412 2405 */
2413 2406 /* ARGSUSED */
2414 2407 static int
2415 2408 daplka_ep_reinit(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2416 2409 cred_t *cred, int *rvalp)
2417 2410 {
2418 2411 daplka_ep_resource_t *ep_rp = NULL;
2419 2412 dapl_ep_reinit_t args;
2420 2413 ibt_status_t status;
2421 2414 uint32_t old_state, new_state;
2422 2415 int retval = 0;
2423 2416
2424 2417 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_reinit_t),
2425 2418 mode);
2426 2419 if (retval != 0) {
2427 2420 DERR("reinit: copyin error %d\n", retval);
2428 2421 return (EFAULT);
2429 2422 }
2430 2423 ep_rp = (daplka_ep_resource_t *)
2431 2424 daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epri_hkey);
2432 2425 if (ep_rp == NULL) {
2433 2426 DERR("reinit: cannot find ep resource\n");
2434 2427 return (EINVAL);
2435 2428 }
2436 2429 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2437 2430 new_state = old_state = daplka_ep_get_state(ep_rp);
2438 2431 if ((old_state != DAPLKA_EP_STATE_CLOSED) &&
2439 2432 (old_state != DAPLKA_EP_STATE_DISCONNECTED)) {
2440 2433 DERR("reinit: invalid state %d\n", old_state);
2441 2434 retval = EINVAL;
2442 2435 goto cleanup;
2443 2436 }
2444 2437
2445 2438 status = ibt_recycle_rc(ep_rp->ep_chan_hdl,
2446 2439 IBT_CEP_RDMA_RD|IBT_CEP_RDMA_WR,
2447 2440 ia_rp->ia_port_num, NULL, NULL);
2448 2441 if (status != IBT_SUCCESS) {
2449 2442 DERR("reinit: unable to clone channel\n");
2450 2443 *rvalp = (int)status;
2451 2444 retval = 0;
2452 2445 goto cleanup;
2453 2446 }
2454 2447 new_state = DAPLKA_EP_STATE_CLOSED;
2455 2448
2456 2449 cleanup:;
2457 2450 daplka_ep_set_state(ep_rp, old_state, new_state);
2458 2451 DAPLKA_RS_UNREF(ep_rp);
2459 2452 return (retval);
2460 2453 }
2461 2454
2462 2455 /*
2463 2456 * destroys a EP resource.
2464 2457 * called when refcnt drops to zero.
2465 2458 */
2466 2459 static int
2467 2460 daplka_ep_destroy(daplka_resource_t *gen_rp)
2468 2461 {
2469 2462 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)gen_rp;
2470 2463 ibt_status_t status;
2471 2464
2472 2465 ASSERT(DAPLKA_RS_REFCNT(ep_rp) == 0);
2473 2466 ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_FREED);
2474 2467
2475 2468 /*
2476 2469 * by the time we get here, we can be sure that
2477 2470 * there is no outstanding timer.
2478 2471 */
2479 2472 ASSERT(ep_rp->ep_timer_hkey == 0);
2480 2473
2481 2474 D3("ep_destroy: entering, ep_rp 0x%p, rnum %d\n",
2482 2475 ep_rp, DAPLKA_RS_RNUM(ep_rp));
2483 2476 /*
2484 2477 * free rc channel
2485 2478 */
2486 2479 if (ep_rp->ep_chan_hdl != NULL) {
2487 2480 mutex_enter(&daplka_dev->daplka_mutex);
2488 2481 ibt_set_chan_private(ep_rp->ep_chan_hdl, NULL);
2489 2482 mutex_exit(&daplka_dev->daplka_mutex);
2490 2483 status = daplka_ibt_free_channel(ep_rp, ep_rp->ep_chan_hdl);
2491 2484 if (status != IBT_SUCCESS) {
2492 2485 DERR("ep_free: ibt_free_channel returned %d\n",
2493 2486 status);
2494 2487 }
2495 2488 ep_rp->ep_chan_hdl = NULL;
2496 2489 D3("ep_destroy: qp freed, rnum %d\n", DAPLKA_RS_RNUM(ep_rp));
2497 2490 }
2498 2491 /*
2499 2492 * release all references
2500 2493 */
2501 2494 if (ep_rp->ep_snd_evd != NULL) {
2502 2495 DAPLKA_RS_UNREF(ep_rp->ep_snd_evd);
2503 2496 ep_rp->ep_snd_evd = NULL;
2504 2497 }
2505 2498 if (ep_rp->ep_rcv_evd != NULL) {
2506 2499 DAPLKA_RS_UNREF(ep_rp->ep_rcv_evd);
2507 2500 ep_rp->ep_rcv_evd = NULL;
2508 2501 }
2509 2502 if (ep_rp->ep_conn_evd != NULL) {
2510 2503 DAPLKA_RS_UNREF(ep_rp->ep_conn_evd);
2511 2504 ep_rp->ep_conn_evd = NULL;
2512 2505 }
2513 2506 if (ep_rp->ep_srq_res != NULL) {
2514 2507 DAPLKA_RS_UNREF(ep_rp->ep_srq_res);
2515 2508 ep_rp->ep_srq_res = NULL;
2516 2509 }
2517 2510 if (ep_rp->ep_pd_res != NULL) {
2518 2511 DAPLKA_RS_UNREF(ep_rp->ep_pd_res);
2519 2512 ep_rp->ep_pd_res = NULL;
2520 2513 }
2521 2514 cv_destroy(&ep_rp->ep_cv);
2522 2515 mutex_destroy(&ep_rp->ep_lock);
2523 2516
2524 2517 DAPLKA_RS_FINI(ep_rp);
2525 2518 kmem_free(ep_rp, sizeof (daplka_ep_resource_t));
2526 2519 D3("ep_destroy: exiting, ep_rp 0x%p\n", ep_rp);
2527 2520 return (0);
2528 2521 }
2529 2522
2530 2523 /*
2531 2524 * this function is called by daplka_hash_destroy for
2532 2525 * freeing EP resource objects
2533 2526 */
2534 2527 static void
2535 2528 daplka_hash_ep_free(void *obj)
2536 2529 {
2537 2530 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)obj;
2538 2531 ibt_status_t status;
2539 2532 uint32_t old_state, new_state;
2540 2533 int retval;
2541 2534
2542 2535 old_state = daplka_ep_get_state(ep_rp);
2543 2536 retval = daplka_cancel_timer(ep_rp);
2544 2537 new_state = DAPLKA_EP_STATE_FREED;
2545 2538 daplka_ep_set_state(ep_rp, old_state, new_state);
2546 2539
2547 2540 if (retval != 0) {
2548 2541 D2("hash_ep_free: ep_rp 0x%p "
2549 2542 "timer is still being processed\n", ep_rp);
2550 2543 mutex_enter(&ep_rp->ep_lock);
2551 2544 if (ep_rp->ep_timer_hkey != 0) {
2552 2545 D2("hash_ep_free: ep_rp 0x%p "
2553 2546 "waiting for timer_hkey to be 0\n", ep_rp);
2554 2547 cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
2555 2548 }
2556 2549 mutex_exit(&ep_rp->ep_lock);
2557 2550 }
2558 2551
2559 2552 /* call ibt_close_rc_channel regardless of what state we are in */
2560 2553 status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
2561 2554 NULL, 0, NULL, NULL, NULL);
2562 2555 if (status != IBT_SUCCESS) {
2563 2556 if (old_state == DAPLKA_EP_STATE_CONNECTED ||
2564 2557 old_state == DAPLKA_EP_STATE_CONNECTING ||
2565 2558 old_state == DAPLKA_EP_STATE_ACCEPTING) {
2566 2559 DERR("hash_ep_free: ep_rp 0x%p state %d "
2567 2560 "unexpected error %d from close_rc_channel\n",
2568 2561 ep_rp, old_state, status);
2569 2562 }
2570 2563 D2("hash_ep_free: close_rc_channel, status %d\n", status);
2571 2564 }
2572 2565
2573 2566 DAPLKA_RS_UNREF(ep_rp);
2574 2567 }
2575 2568
2576 2569 /*
2577 2570 * creates a EVD resource.
2578 2571 * a EVD is used by the client to wait for events from one
2579 2572 * or more sources.
2580 2573 */
2581 2574 /* ARGSUSED */
2582 2575 static int
2583 2576 daplka_evd_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2584 2577 cred_t *cred, int *rvalp)
2585 2578 {
2586 2579 daplka_evd_resource_t *evd_rp = NULL;
2587 2580 daplka_async_evd_hkey_t *async_evd;
2588 2581 ibt_hca_attr_t *hca_attrp;
2589 2582 ibt_cq_attr_t cq_attr;
2590 2583 dapl_evd_create_t args;
2591 2584 uint64_t evd_hkey = 0;
2592 2585 boolean_t inserted = B_FALSE;
2593 2586 int retval = 0;
2594 2587 ibt_status_t status;
2595 2588
2596 2589 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_create_t),
2597 2590 mode);
2598 2591 if (retval != 0) {
↓ open down ↓ |
369 lines elided |
↑ open up ↑ |
2599 2592 DERR("evd_create: copyin error %d", retval);
2600 2593 return (EFAULT);
2601 2594 }
2602 2595 if ((args.evd_flags &
2603 2596 ~(DAT_EVD_DEFAULT_FLAG | DAT_EVD_SOFTWARE_FLAG)) != 0) {
2604 2597 DERR("evd_create: invalid flags 0x%x\n", args.evd_flags);
2605 2598 return (EINVAL);
2606 2599 }
2607 2600
2608 2601 evd_rp = kmem_zalloc(sizeof (daplka_evd_resource_t), daplka_km_flags);
2609 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp))
2610 2602 DAPLKA_RS_INIT(evd_rp, DAPL_TYPE_EVD,
2611 2603 DAPLKA_RS_RNUM(ia_rp), daplka_evd_destroy);
2612 2604
2613 2605 mutex_init(&evd_rp->evd_lock, NULL, MUTEX_DRIVER, NULL);
2614 2606 cv_init(&evd_rp->evd_cv, NULL, CV_DRIVER, NULL);
2615 2607 evd_rp->evd_hca = ia_rp->ia_hca;
2616 2608 evd_rp->evd_flags = args.evd_flags;
2617 2609 evd_rp->evd_hca_hdl = ia_rp->ia_hca_hdl;
2618 2610 evd_rp->evd_cookie = args.evd_cookie;
2619 2611 evd_rp->evd_cno_res = NULL;
2620 2612 evd_rp->evd_cr_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
2621 2613 evd_rp->evd_conn_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
2622 2614 evd_rp->evd_async_events.eel_event_type = DAPLKA_EVD_ASYNC_EVENTS;
2623 2615
2624 2616 /*
2625 2617 * if the client specified a non-zero cno_hkey, we
2626 2618 * lookup the cno and save the reference for later use.
2627 2619 */
2628 2620 if (args.evd_cno_hkey > 0) {
2629 2621 daplka_cno_resource_t *cno_rp;
2630 2622
2631 2623 cno_rp = (daplka_cno_resource_t *)
2632 2624 daplka_hash_lookup(&ia_rp->ia_cno_htbl,
2633 2625 args.evd_cno_hkey);
2634 2626 if (cno_rp == NULL) {
2635 2627 DERR("evd_create: cannot find cno resource\n");
2636 2628 goto cleanup;
2637 2629 }
2638 2630 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
2639 2631 evd_rp->evd_cno_res = cno_rp;
2640 2632 }
2641 2633 hca_attrp = &ia_rp->ia_hca->hca_attr;
2642 2634 if ((evd_rp->evd_flags &
2643 2635 (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) != 0) {
2644 2636 if (args.evd_cq_size > hca_attrp->hca_max_cq_sz) {
2645 2637 DERR("evd_create: invalid cq size %d",
2646 2638 args.evd_cq_size);
2647 2639 retval = EINVAL;
2648 2640 goto cleanup;
2649 2641 }
2650 2642 cq_attr.cq_size = args.evd_cq_size;
2651 2643 cq_attr.cq_sched = NULL;
2652 2644 cq_attr.cq_flags = IBT_CQ_USER_MAP;
2653 2645
2654 2646 status = daplka_ibt_alloc_cq(evd_rp, evd_rp->evd_hca_hdl,
2655 2647 &cq_attr, &evd_rp->evd_cq_hdl, &evd_rp->evd_cq_real_size);
2656 2648
2657 2649 if (status != IBT_SUCCESS) {
2658 2650 DERR("evd_create: ibt_alloc_cq returned %d", status);
2659 2651 *rvalp = (int)status;
2660 2652 retval = 0;
2661 2653 goto cleanup;
2662 2654 }
2663 2655
2664 2656 /*
2665 2657 * store evd ptr with cq_hdl
2666 2658 * mutex is only needed for race of "destroy" and "async"
2667 2659 */
2668 2660 mutex_enter(&daplka_dev->daplka_mutex);
2669 2661 ibt_set_cq_private(evd_rp->evd_cq_hdl, (void *)evd_rp);
2670 2662 mutex_exit(&daplka_dev->daplka_mutex);
2671 2663
2672 2664 /* Get HCA-specific data_out info */
2673 2665 status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
2674 2666 IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
2675 2667 &args.evd_cq_data_out, sizeof (args.evd_cq_data_out));
2676 2668
2677 2669 if (status != IBT_SUCCESS) {
2678 2670 DERR("evd_create: ibt_ci_data_out error(%d)", status);
2679 2671 *rvalp = (int)status;
2680 2672 retval = 0;
2681 2673 goto cleanup;
2682 2674 }
2683 2675
2684 2676 args.evd_cq_real_size = evd_rp->evd_cq_real_size;
2685 2677
2686 2678 ibt_set_cq_handler(evd_rp->evd_cq_hdl, daplka_cq_handler,
↓ open down ↓ |
67 lines elided |
↑ open up ↑ |
2687 2679 (void *)evd_rp);
2688 2680 }
2689 2681
2690 2682 retval = daplka_hash_insert(&ia_rp->ia_evd_htbl,
2691 2683 &evd_hkey, (void *)evd_rp);
2692 2684 if (retval != 0) {
2693 2685 DERR("evd_ceate: cannot insert evd %d\n", retval);
2694 2686 goto cleanup;
2695 2687 }
2696 2688 inserted = B_TRUE;
2697 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*evd_rp))
2698 2689
2699 2690 /*
2700 2691 * If this evd handles async events need to add to the IA resource
2701 2692 * async evd list
2702 2693 */
2703 2694 if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
2704 2695 async_evd = kmem_zalloc(sizeof (daplka_async_evd_hkey_t),
2705 2696 daplka_km_flags);
2706 2697 /* add the evd to the head of the list */
2707 2698 mutex_enter(&ia_rp->ia_lock);
2708 2699 async_evd->aeh_evd_hkey = evd_hkey;
2709 2700 async_evd->aeh_next = ia_rp->ia_async_evd_hkeys;
2710 2701 ia_rp->ia_async_evd_hkeys = async_evd;
2711 2702 mutex_exit(&ia_rp->ia_lock);
2712 2703 }
2713 2704
2714 2705 args.evd_hkey = evd_hkey;
2715 2706 retval = copyout(&args, (void *)arg, sizeof (dapl_evd_create_t));
2716 2707 if (retval != 0) {
2717 2708 DERR("evd_create: copyout error %d\n", retval);
2718 2709 retval = EFAULT;
2719 2710 goto cleanup;
2720 2711 }
2721 2712 return (0);
2722 2713
2723 2714 cleanup:;
2724 2715 if (inserted) {
2725 2716 daplka_evd_resource_t *free_rp = NULL;
2726 2717
2727 2718 (void) daplka_hash_remove(&ia_rp->ia_evd_htbl, evd_hkey,
2728 2719 (void **)&free_rp);
2729 2720 if (free_rp != evd_rp) {
2730 2721 DERR("evd_create: cannot remove evd\n");
2731 2722 /*
2732 2723 * we can only get here if another thread
2733 2724 * has completed the cleanup in evd_free
2734 2725 */
2735 2726 return (retval);
2736 2727 }
2737 2728 }
2738 2729 DAPLKA_RS_UNREF(evd_rp);
2739 2730 return (retval);
2740 2731 }
2741 2732
2742 2733 /*
2743 2734 * resizes CQ and returns new mapping info to library.
2744 2735 */
2745 2736 /* ARGSUSED */
2746 2737 static int
2747 2738 daplka_cq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2748 2739 cred_t *cred, int *rvalp)
2749 2740 {
2750 2741 daplka_evd_resource_t *evd_rp = NULL;
2751 2742 ibt_hca_attr_t *hca_attrp;
2752 2743 dapl_cq_resize_t args;
2753 2744 ibt_status_t status;
2754 2745 int retval = 0;
2755 2746
2756 2747 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cq_resize_t),
2757 2748 mode);
2758 2749 if (retval != 0) {
2759 2750 DERR("cq_resize: copyin error %d\n", retval);
2760 2751 return (EFAULT);
2761 2752 }
2762 2753
2763 2754 /* get evd resource */
2764 2755 evd_rp = (daplka_evd_resource_t *)
2765 2756 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.cqr_evd_hkey);
2766 2757 if (evd_rp == NULL) {
2767 2758 DERR("cq_resize: cannot find evd resource\n");
2768 2759 return (EINVAL);
2769 2760 }
2770 2761 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
2771 2762
2772 2763 hca_attrp = &ia_rp->ia_hca->hca_attr;
2773 2764 if (args.cqr_cq_new_size > hca_attrp->hca_max_cq_sz) {
2774 2765 DERR("cq_resize: invalid cq size %d", args.cqr_cq_new_size);
2775 2766 retval = EINVAL;
2776 2767 goto cleanup;
2777 2768 }
2778 2769 /*
2779 2770 * If ibt_resize_cq fails that it is primarily due to resource
2780 2771 * shortage. Per IB spec resize will never loose events and
2781 2772 * a resize error leaves the CQ intact. Therefore even if the
2782 2773 * resize request fails we proceed and get the mapping data
2783 2774 * from the CQ so that the library can mmap it.
2784 2775 */
2785 2776 status = ibt_resize_cq(evd_rp->evd_cq_hdl, args.cqr_cq_new_size,
2786 2777 &args.cqr_cq_real_size);
2787 2778 if (status != IBT_SUCCESS) {
2788 2779 /* we return the size of the old CQ if resize fails */
2789 2780 args.cqr_cq_real_size = evd_rp->evd_cq_real_size;
2790 2781 ASSERT(status != IBT_CQ_HDL_INVALID);
2791 2782 DERR("cq_resize: ibt_resize_cq failed:%d\n", status);
2792 2783 } else {
2793 2784 mutex_enter(&evd_rp->evd_lock);
2794 2785 evd_rp->evd_cq_real_size = args.cqr_cq_real_size;
2795 2786 mutex_exit(&evd_rp->evd_lock);
2796 2787 }
2797 2788
2798 2789 D2("cq_resize(%d): done new_sz(%u) real_sz(%u)\n",
2799 2790 DAPLKA_RS_RNUM(evd_rp),
2800 2791 args.cqr_cq_new_size, args.cqr_cq_real_size);
2801 2792
2802 2793 /* Get HCA-specific data_out info */
2803 2794 status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
2804 2795 IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
2805 2796 &args.cqr_cq_data_out, sizeof (args.cqr_cq_data_out));
2806 2797 if (status != IBT_SUCCESS) {
2807 2798 DERR("cq_resize: ibt_ci_data_out error(%d)\n", status);
2808 2799 /* return ibt_ci_data_out status */
2809 2800 *rvalp = (int)status;
2810 2801 retval = 0;
2811 2802 goto cleanup;
2812 2803 }
2813 2804
2814 2805 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cq_resize_t),
2815 2806 mode);
2816 2807 if (retval != 0) {
2817 2808 DERR("cq_resize: copyout error %d\n", retval);
2818 2809 retval = EFAULT;
2819 2810 goto cleanup;
2820 2811 }
2821 2812
2822 2813 cleanup:;
2823 2814 if (evd_rp != NULL) {
2824 2815 DAPLKA_RS_UNREF(evd_rp);
2825 2816 }
2826 2817 return (retval);
2827 2818 }
2828 2819
2829 2820 /*
2830 2821 * Routine to copyin the event poll message so that 32 bit libraries
2831 2822 * can be safely supported
2832 2823 */
2833 2824 int
2834 2825 daplka_event_poll_copyin(intptr_t inarg, dapl_event_poll_t *outarg, int mode)
2835 2826 {
2836 2827 int retval;
2837 2828
2838 2829 #ifdef _MULTI_DATAMODEL
2839 2830 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2840 2831 dapl_event_poll32_t args32;
2841 2832
2842 2833 retval = ddi_copyin((void *)inarg, &args32,
2843 2834 sizeof (dapl_event_poll32_t), mode);
2844 2835 if (retval != 0) {
2845 2836 DERR("event_poll_copyin: 32bit error %d\n", retval);
2846 2837 return (EFAULT);
2847 2838 }
2848 2839
2849 2840 outarg->evp_evd_hkey = args32.evp_evd_hkey;
2850 2841 outarg->evp_threshold = args32.evp_threshold;
2851 2842 outarg->evp_timeout = args32.evp_timeout;
2852 2843 outarg->evp_ep = (dapl_ib_event_t *)(uintptr_t)args32.evp_ep;
2853 2844 outarg->evp_num_ev = args32.evp_num_ev;
2854 2845 outarg->evp_num_polled = args32.evp_num_polled;
2855 2846 return (0);
2856 2847 }
2857 2848 #endif
2858 2849 retval = ddi_copyin((void *)inarg, outarg, sizeof (dapl_event_poll_t),
2859 2850 mode);
2860 2851 if (retval != 0) {
2861 2852 DERR("event_poll: copyin error %d\n", retval);
2862 2853 return (EFAULT);
2863 2854 }
2864 2855
2865 2856 return (0);
2866 2857 }
2867 2858
2868 2859 /*
2869 2860 * Routine to copyout the event poll message so that 32 bit libraries
2870 2861 * can be safely supported
2871 2862 */
2872 2863 int
2873 2864 daplka_event_poll_copyout(dapl_event_poll_t *inarg, intptr_t outarg, int mode)
2874 2865 {
2875 2866 int retval;
2876 2867
2877 2868 #ifdef _MULTI_DATAMODEL
2878 2869 if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2879 2870 dapl_event_poll32_t args32;
2880 2871
2881 2872 args32.evp_evd_hkey = inarg->evp_evd_hkey;
2882 2873 args32.evp_threshold = inarg->evp_threshold;
2883 2874 args32.evp_timeout = inarg->evp_timeout;
2884 2875 args32.evp_ep = (caddr32_t)(uintptr_t)inarg->evp_ep;
2885 2876 args32.evp_num_ev = inarg->evp_num_ev;
2886 2877 args32.evp_num_polled = inarg->evp_num_polled;
2887 2878
2888 2879 retval = ddi_copyout((void *)&args32, (void *)outarg,
2889 2880 sizeof (dapl_event_poll32_t), mode);
2890 2881 if (retval != 0) {
2891 2882 DERR("event_poll_copyout: 32bit error %d\n", retval);
2892 2883 return (EFAULT);
2893 2884 }
2894 2885 return (0);
2895 2886 }
2896 2887 #endif
2897 2888 retval = ddi_copyout((void *)inarg, (void *)outarg,
2898 2889 sizeof (dapl_event_poll_t), mode);
2899 2890 if (retval != 0) {
2900 2891 DERR("event_poll_copyout: error %d\n", retval);
2901 2892 return (EFAULT);
2902 2893 }
2903 2894
2904 2895 return (0);
2905 2896 }
2906 2897
2907 2898 /*
2908 2899 * fucntion to handle CM REQ RCV private data from Solaris or third parties
2909 2900 */
2910 2901 /* ARGSUSED */
2911 2902 static void
2912 2903 daplka_crevent_privdata_post(daplka_ia_resource_t *ia_rp,
2913 2904 dapl_ib_event_t *evd_rp, daplka_evd_event_t *cr_ev)
2914 2905 {
2915 2906 DAPL_PRIVATE *dp;
2916 2907 ib_gid_t *lgid;
2917 2908 ibt_ar_t ar_query_s;
2918 2909 ibt_ar_t ar_result_s;
2919 2910 DAPL_HELLO_MSG *hip;
2920 2911 uint32_t ipaddr_ord;
2921 2912 ibt_priv_data_len_t clen;
2922 2913 ibt_priv_data_len_t olen;
2923 2914 ibt_status_t status;
2924 2915 uint16_t cksum;
2925 2916
2926 2917 /*
2927 2918 * get private data and len
2928 2919 */
2929 2920 dp = (DAPL_PRIVATE *)cr_ev->ee_cmev.ec_cm_ev_priv_data;
2930 2921 clen = cr_ev->ee_cmev.ec_cm_ev_priv_data_len;
2931 2922 #if defined(DAPLKA_DEBUG_FORCE_ATS)
2932 2923 /* skip the DAPL_PRIVATE chekcsum check */
2933 2924 #else
2934 2925 /* for remote connects */
2935 2926 /* look up hello message in the CM private data area */
2936 2927 if (clen >= sizeof (DAPL_PRIVATE) &&
2937 2928 (dp->hello_msg.hi_vers == DAPL_HELLO_MSG_VERS)) {
2938 2929 cksum = ntohs(dp->hello_msg.hi_checksum);
2939 2930 dp->hello_msg.hi_checksum = 0;
2940 2931 if (daplka_hellomsg_cksum(dp) == cksum) {
2941 2932 D2("daplka_crevent_privdata_post: Solaris msg\n");
2942 2933 evd_rp->ibe_ce.ibce_priv_data_size = clen;
2943 2934 dp->hello_msg.hi_checksum = DAPL_CHECKSUM;
2944 2935 dp->hello_msg.hi_port = ntohs(dp->hello_msg.hi_port);
2945 2936 bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
2946 2937 kmem_free(dp, clen);
2947 2938 return;
2948 2939 }
2949 2940 }
2950 2941 #endif /* DAPLKA_DEBUG_FORCE_ATS */
2951 2942
2952 2943 D2("daplka_crevent_privdata_post: 3rd party msg\n");
2953 2944 /* transpose CM private data into hello message */
2954 2945 if (clen) {
2955 2946 olen = clen;
2956 2947 if (clen > DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE) {
2957 2948 clen = DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE;
2958 2949 }
2959 2950 bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
2960 2951 kmem_free(dp, olen);
2961 2952 } else {
2962 2953 bzero(evd_rp->ibe_ce.ibce_priv_data_ptr,
2963 2954 DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE);
2964 2955 }
2965 2956 evd_rp->ibe_ce.ibce_priv_data_size = sizeof (DAPL_PRIVATE);
2966 2957 dp = (DAPL_PRIVATE *)evd_rp->ibe_ce.ibce_priv_data_ptr;
2967 2958 /*
2968 2959 * fill in hello message
2969 2960 */
2970 2961 hip = &dp->hello_msg;
2971 2962 hip->hi_checksum = DAPL_CHECKSUM;
2972 2963 hip->hi_clen = clen;
2973 2964 hip->hi_mid = 0;
2974 2965 hip->hi_vers = DAPL_HELLO_MSG_VERS;
2975 2966 hip->hi_port = 0;
2976 2967
2977 2968 /* assign sgid and dgid */
2978 2969 lgid = &ia_rp->ia_hca_sgid;
2979 2970 ar_query_s.ar_gid.gid_prefix =
2980 2971 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix;
2981 2972 ar_query_s.ar_gid.gid_guid =
2982 2973 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid;
2983 2974 ar_query_s.ar_pkey = ia_rp->ia_port_pkey;
2984 2975 bzero(ar_query_s.ar_data, DAPL_ATS_NBYTES);
2985 2976
2986 2977 /* reverse ip address lookup through ATS */
2987 2978 status = ibt_query_ar(lgid, &ar_query_s, &ar_result_s);
2988 2979 if (status == IBT_SUCCESS) {
2989 2980 bcopy(ar_result_s.ar_data, hip->hi_saaddr, DAPL_ATS_NBYTES);
2990 2981 /* determine the address families */
2991 2982 ipaddr_ord = hip->hi_v4pad[0] | hip->hi_v4pad[1] |
2992 2983 hip->hi_v4pad[2];
2993 2984 if (ipaddr_ord == 0) {
2994 2985 hip->hi_ipv = AF_INET;
2995 2986 } else {
2996 2987 hip->hi_ipv = AF_INET6;
2997 2988 }
2998 2989
2999 2990 #define UL(b) ar_result_s.ar_data[(b)]
3000 2991 D3("daplka_privdata_post: family=%d :SA[8] %d.%d.%d.%d\n",
3001 2992 hip->hi_ipv, UL(8), UL(9), UL(10), UL(11));
3002 2993 D3("daplka_privdata_post: SA[12] %d.%d.%d.%d\n",
3003 2994 UL(12), UL(13), UL(14), UL(15));
3004 2995 } else {
3005 2996 /* non-conformed third parties */
3006 2997 hip->hi_ipv = AF_UNSPEC;
3007 2998 bzero(hip->hi_saaddr, DAPL_ATS_NBYTES);
3008 2999 }
3009 3000 }
3010 3001
3011 3002 /*
3012 3003 * this function is called by evd_wait and evd_dequeue to wait for
3013 3004 * connection events and CQ notifications. typically this function
3014 3005 * is called when the userland CQ is empty and the client has
3015 3006 * specified a non-zero timeout to evd_wait. if the client is
3016 3007 * interested in CQ events, the CQ must be armed in userland prior
3017 3008 * to calling this function.
3018 3009 */
3019 3010 /* ARGSUSED */
3020 3011 static int
3021 3012 daplka_event_poll(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3022 3013 cred_t *cred, int *rvalp)
3023 3014 {
3024 3015 daplka_evd_resource_t *evd_rp = NULL;
3025 3016 dapl_event_poll_t args;
3026 3017 daplka_evd_event_t *head;
3027 3018 dapl_ib_event_t evp_arr[NUM_EVENTS_PER_POLL];
3028 3019 dapl_ib_event_t *evp;
3029 3020 dapl_ib_event_t *evp_start;
3030 3021 size_t evp_size;
3031 3022 int threshold;
3032 3023 clock_t timeout;
3033 3024 uint32_t max_events;
3034 3025 uint32_t num_events = 0;
3035 3026 void *pd;
3036 3027 ibt_priv_data_len_t n;
3037 3028 int retval = 0;
3038 3029 int rc;
3039 3030
3040 3031 retval = daplka_event_poll_copyin(arg, &args, mode);
3041 3032 if (retval != 0) {
3042 3033 return (EFAULT);
3043 3034 }
3044 3035
3045 3036 if ((args.evp_num_ev > 0) && (args.evp_ep == NULL)) {
3046 3037 DERR("event_poll: evp_ep cannot be NULL if num_wc=%d",
3047 3038 args.evp_num_ev);
3048 3039 return (EINVAL);
3049 3040 }
3050 3041 /*
3051 3042 * Note: dequeue requests have a threshold = 0, timeout = 0
3052 3043 */
3053 3044 threshold = args.evp_threshold;
3054 3045
3055 3046 max_events = args.evp_num_ev;
3056 3047 /* ensure library is passing sensible values */
3057 3048 if (max_events < threshold) {
3058 3049 DERR("event_poll: max_events(%d) < threshold(%d)\n",
3059 3050 max_events, threshold);
3060 3051 return (EINVAL);
3061 3052 }
3062 3053 /* Do a sanity check to avoid excessive memory allocation */
3063 3054 if (max_events > DAPL_EVD_MAX_EVENTS) {
3064 3055 DERR("event_poll: max_events(%d) > %d",
3065 3056 max_events, DAPL_EVD_MAX_EVENTS);
3066 3057 return (EINVAL);
3067 3058 }
3068 3059 D4("event_poll: threshold(%d) timeout(0x%llx) max_events(%d)\n",
3069 3060 threshold, (longlong_t)args.evp_timeout, max_events);
3070 3061
3071 3062 /* get evd resource */
3072 3063 evd_rp = (daplka_evd_resource_t *)
3073 3064 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evp_evd_hkey);
3074 3065 if (evd_rp == NULL) {
3075 3066 DERR("event_poll: cannot find evd resource\n");
3076 3067 return (EINVAL);
3077 3068 }
3078 3069 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3079 3070
3080 3071 /*
3081 3072 * Use event array on the stack if possible
3082 3073 */
3083 3074 if (max_events <= NUM_EVENTS_PER_POLL) {
3084 3075 evp_start = evp = &evp_arr[0];
3085 3076 } else {
3086 3077 evp_size = max_events * sizeof (dapl_ib_event_t);
3087 3078 evp_start = evp = kmem_zalloc(evp_size, daplka_km_flags);
3088 3079 if (evp == NULL) {
3089 3080 DERR("event_poll: kmem_zalloc failed, evp_size %d",
3090 3081 evp_size);
3091 3082 retval = ENOMEM;
3092 3083 goto cleanup;
3093 3084 }
3094 3085 }
3095 3086
3096 3087 /*
3097 3088 * The Event poll algorithm is as follows -
3098 3089 * The library passes a buffer big enough to hold "max_events"
3099 3090 * events. max_events is >= threshold. If at any stage we get
3100 3091 * max_events no. of events we bail. The events are polled in
3101 3092 * the following order -
3102 3093 * 1) Check for CR events in the evd_cr_events list
3103 3094 * 2) Check for Connection events in the evd_connection_events list
3104 3095 *
3105 3096 * If after the above 2 steps we don't have enough(>= threshold) events
3106 3097 * we block for CQ notification and sleep. Upon being woken up we start
3107 3098 * at step 1 again.
3108 3099 */
3109 3100
3110 3101 /*
3111 3102 * Note: this could be 0 or INFINITE or anyother value in microsec
3112 3103 */
3113 3104 if (args.evp_timeout > 0) {
3114 3105 if (args.evp_timeout >= LONG_MAX) {
3115 3106 timeout = LONG_MAX;
3116 3107 } else {
3117 3108 clock_t curr_time = ddi_get_lbolt();
3118 3109
3119 3110 timeout = curr_time +
3120 3111 drv_usectohz((clock_t)args.evp_timeout);
3121 3112 /*
3122 3113 * use the max value if we wrapped around
3123 3114 */
3124 3115 if (timeout <= curr_time) {
3125 3116 timeout = LONG_MAX;
3126 3117 }
3127 3118 }
3128 3119 } else {
3129 3120 timeout = 0;
3130 3121 }
3131 3122
3132 3123 mutex_enter(&evd_rp->evd_lock);
3133 3124 for (;;) {
3134 3125 /*
3135 3126 * If this evd is waiting for CM events check that now.
3136 3127 */
3137 3128 if ((evd_rp->evd_flags & DAT_EVD_CR_FLAG) &&
3138 3129 (evd_rp->evd_cr_events.eel_num_elements > 0)) {
3139 3130 /* dequeue events from evd_cr_events list */
3140 3131 while (head = daplka_evd_event_dequeue(
3141 3132 &evd_rp->evd_cr_events)) {
3142 3133 /*
3143 3134 * populate the evp array
3144 3135 */
3145 3136 evp[num_events].ibe_ev_family = DAPL_CR_EVENTS;
3146 3137 evp[num_events].ibe_ce.ibce_event =
3147 3138 head->ee_cmev.ec_cm_ev_type;
3148 3139 evp[num_events].ibe_ce.ibce_cookie =
3149 3140 (uint64_t)head->ee_cmev.ec_cm_cookie;
3150 3141 evp[num_events].ibe_ce.ibce_psep_cookie =
3151 3142 head->ee_cmev.ec_cm_psep_cookie;
3152 3143 daplka_crevent_privdata_post(ia_rp,
3153 3144 &evp[num_events], head);
3154 3145 kmem_free(head, sizeof (daplka_evd_event_t));
3155 3146
3156 3147 if (++num_events == max_events) {
3157 3148 mutex_exit(&evd_rp->evd_lock);
3158 3149 goto maxevent_reached;
3159 3150 }
3160 3151 }
3161 3152 }
3162 3153
3163 3154 if ((evd_rp->evd_flags & DAT_EVD_CONNECTION_FLAG) &&
3164 3155 (evd_rp->evd_conn_events.eel_num_elements > 0)) {
3165 3156 /* dequeue events from evd_connection_events list */
3166 3157 while ((head = daplka_evd_event_dequeue
3167 3158 (&evd_rp->evd_conn_events))) {
3168 3159 /*
3169 3160 * populate the evp array -
3170 3161 *
3171 3162 */
3172 3163 if (head->ee_cmev.ec_cm_is_passive) {
3173 3164 evp[num_events].ibe_ev_family =
3174 3165 DAPL_PASSIVE_CONNECTION_EVENTS;
3175 3166 } else {
3176 3167 evp[num_events].ibe_ev_family =
3177 3168 DAPL_ACTIVE_CONNECTION_EVENTS;
3178 3169 }
3179 3170 evp[num_events].ibe_ce.ibce_event =
3180 3171 head->ee_cmev.ec_cm_ev_type;
3181 3172 evp[num_events].ibe_ce.ibce_cookie =
3182 3173 (uint64_t)head->ee_cmev.ec_cm_cookie;
3183 3174 evp[num_events].ibe_ce.ibce_psep_cookie =
3184 3175 head->ee_cmev.ec_cm_psep_cookie;
3185 3176
3186 3177 if (head->ee_cmev.ec_cm_ev_priv_data_len > 0) {
3187 3178 pd = head->ee_cmev.ec_cm_ev_priv_data;
3188 3179 n = head->
3189 3180 ee_cmev.ec_cm_ev_priv_data_len;
3190 3181 bcopy(pd, (void *)evp[num_events].
3191 3182 ibe_ce.ibce_priv_data_ptr, n);
3192 3183 evp[num_events].ibe_ce.
3193 3184 ibce_priv_data_size = n;
3194 3185 kmem_free(pd, n);
3195 3186 }
3196 3187
3197 3188 kmem_free(head, sizeof (daplka_evd_event_t));
3198 3189
3199 3190 if (++num_events == max_events) {
3200 3191 mutex_exit(&evd_rp->evd_lock);
3201 3192 goto maxevent_reached;
3202 3193 }
3203 3194 }
3204 3195 }
3205 3196
3206 3197 if ((evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) &&
3207 3198 (evd_rp->evd_async_events.eel_num_elements > 0)) {
3208 3199 /* dequeue events from evd_async_events list */
3209 3200 while (head = daplka_evd_event_dequeue(
3210 3201 &evd_rp->evd_async_events)) {
3211 3202 /*
3212 3203 * populate the evp array
3213 3204 */
3214 3205 evp[num_events].ibe_ev_family =
3215 3206 DAPL_ASYNC_EVENTS;
3216 3207 evp[num_events].ibe_async.ibae_type =
3217 3208 head->ee_aev.ibae_type;
3218 3209 evp[num_events].ibe_async.ibae_hca_guid =
3219 3210 head->ee_aev.ibae_hca_guid;
3220 3211 evp[num_events].ibe_async.ibae_cookie =
3221 3212 head->ee_aev.ibae_cookie;
3222 3213 evp[num_events].ibe_async.ibae_port =
3223 3214 head->ee_aev.ibae_port;
3224 3215
3225 3216 kmem_free(head, sizeof (daplka_evd_event_t));
3226 3217
3227 3218 if (++num_events == max_events) {
3228 3219 break;
3229 3220 }
3230 3221 }
3231 3222 }
3232 3223
3233 3224 /*
3234 3225 * We have sufficient events for this call so no need to wait
3235 3226 */
3236 3227 if ((threshold > 0) && (num_events >= threshold)) {
3237 3228 mutex_exit(&evd_rp->evd_lock);
3238 3229 break;
3239 3230 }
3240 3231
3241 3232 evd_rp->evd_waiters++;
3242 3233 /*
3243 3234 * There are no new events and a timeout was specified.
3244 3235 * Note: for CQ events threshold is 0 but timeout is
3245 3236 * not necessarily 0.
3246 3237 */
3247 3238 while ((evd_rp->evd_newevents == DAPLKA_EVD_NO_EVENTS) &&
3248 3239 timeout) {
3249 3240 retval = DAPLKA_EVD_WAIT(&evd_rp->evd_cv,
3250 3241 &evd_rp->evd_lock, timeout);
3251 3242 if (retval == 0) {
3252 3243 retval = EINTR;
3253 3244 break;
3254 3245 } else if (retval == -1) {
3255 3246 retval = ETIME;
3256 3247 break;
3257 3248 } else {
3258 3249 retval = 0;
3259 3250 continue;
3260 3251 }
3261 3252 }
3262 3253 evd_rp->evd_waiters--;
3263 3254 if (evd_rp->evd_newevents != DAPLKA_EVD_NO_EVENTS) {
3264 3255 /*
3265 3256 * If we got woken up by the CQ handler due to events
3266 3257 * in the CQ. Need to go to userland to check for
3267 3258 * CQ events. Or if we were woken up due to S/W events
3268 3259 */
3269 3260
3270 3261 /* check for userland events only */
3271 3262 if (!(evd_rp->evd_newevents &
3272 3263 ~DAPLKA_EVD_ULAND_EVENTS)) {
3273 3264 evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
3274 3265 mutex_exit(&evd_rp->evd_lock);
3275 3266 break;
3276 3267 }
3277 3268 /*
3278 3269 * Clear newevents since we are going to loopback
3279 3270 * back and check for both CM and CQ events
3280 3271 */
3281 3272 evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
3282 3273 } else { /* error */
3283 3274 mutex_exit(&evd_rp->evd_lock);
3284 3275 break;
3285 3276 }
3286 3277 }
3287 3278
3288 3279 maxevent_reached:
3289 3280 args.evp_num_polled = num_events;
3290 3281
3291 3282 /*
3292 3283 * At this point retval might have a value that we want to return
3293 3284 * back to the user. So the copyouts shouldn't tamper retval.
3294 3285 */
3295 3286 if (args.evp_num_polled > 0) { /* copyout the events */
3296 3287 rc = ddi_copyout(evp, args.evp_ep, args.evp_num_polled *
3297 3288 sizeof (dapl_ib_event_t), mode);
3298 3289 if (rc != 0) { /* XXX: we are losing events here */
3299 3290 DERR("event_poll: event array copyout error %d", rc);
3300 3291 retval = EFAULT;
3301 3292 goto cleanup;
3302 3293 }
3303 3294 rc = daplka_event_poll_copyout(&args, arg, mode);
3304 3295 if (rc != 0) { /* XXX: we are losing events here */
3305 3296 DERR("event_poll: copyout error %d\n", rc);
3306 3297 retval = EFAULT;
3307 3298 goto cleanup;
3308 3299 }
3309 3300 }
3310 3301
3311 3302 cleanup:;
3312 3303 if ((max_events > NUM_EVENTS_PER_POLL) && (evp_start != NULL)) {
3313 3304 kmem_free(evp_start, evp_size);
3314 3305 }
3315 3306
3316 3307 if (evd_rp != NULL) {
3317 3308 DAPLKA_RS_UNREF(evd_rp);
3318 3309 }
3319 3310 return (retval);
3320 3311 }
3321 3312
3322 3313 /* ARGSUSED */
3323 3314 static int
3324 3315 daplka_event_wakeup(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3325 3316 cred_t *cred, int *rvalp)
3326 3317 {
3327 3318 dapl_event_wakeup_t args;
3328 3319 daplka_evd_resource_t *evd_rp;
3329 3320 int retval;
3330 3321
3331 3322 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_event_wakeup_t),
3332 3323 mode);
3333 3324 if (retval != 0) {
3334 3325 DERR("event_wakeup: copyin error %d\n", retval);
3335 3326 return (EFAULT);
3336 3327 }
3337 3328
3338 3329 /* get evd resource */
3339 3330 evd_rp = (daplka_evd_resource_t *)
3340 3331 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evw_hkey);
3341 3332 if (evd_rp == NULL) {
3342 3333 DERR("event_wakeup: cannot find evd resource\n");
3343 3334 return (EINVAL);
3344 3335 }
3345 3336 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3346 3337
3347 3338 daplka_evd_wakeup(evd_rp, NULL, NULL);
3348 3339
3349 3340 DAPLKA_RS_UNREF(evd_rp);
3350 3341
3351 3342 return (retval);
3352 3343 }
3353 3344
3354 3345 /* ARGSUSED */
3355 3346 static int
3356 3347 daplka_evd_modify_cno(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3357 3348 cred_t *cred, int *rvalp)
3358 3349 {
3359 3350 dapl_evd_modify_cno_t args;
3360 3351 daplka_evd_resource_t *evd_rp;
3361 3352 daplka_cno_resource_t *cno_rp;
3362 3353 daplka_cno_resource_t *old_cno_rp;
3363 3354 int retval;
3364 3355
3365 3356 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_modify_cno_t),
3366 3357 mode);
3367 3358 if (retval != 0) {
3368 3359 DERR("evd_modify_cno: copyin error %d\n", retval);
3369 3360 return (EFAULT);
3370 3361 }
3371 3362
3372 3363 /* get evd resource */
3373 3364 evd_rp = (daplka_evd_resource_t *)
3374 3365 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evmc_hkey);
3375 3366 if (evd_rp == NULL) {
3376 3367 DERR("evd_modify_cno: cannot find evd resource\n");
3377 3368 retval = EINVAL;
3378 3369 goto cleanup;
3379 3370 }
3380 3371 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3381 3372
3382 3373 if (args.evmc_cno_hkey > 0) {
3383 3374 /* get cno resource corresponding to the new CNO */
3384 3375 cno_rp = (daplka_cno_resource_t *)
3385 3376 daplka_hash_lookup(&ia_rp->ia_cno_htbl,
3386 3377 args.evmc_cno_hkey);
3387 3378 if (cno_rp == NULL) {
3388 3379 DERR("evd_modify_cno: cannot find CNO resource\n");
3389 3380 retval = EINVAL;
3390 3381 goto cleanup;
3391 3382 }
3392 3383 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3393 3384 } else {
3394 3385 cno_rp = NULL;
3395 3386 }
3396 3387
3397 3388 mutex_enter(&evd_rp->evd_lock);
3398 3389 old_cno_rp = evd_rp->evd_cno_res;
3399 3390 evd_rp->evd_cno_res = cno_rp;
3400 3391 mutex_exit(&evd_rp->evd_lock);
3401 3392
3402 3393 /*
3403 3394 * drop the refcnt on the old CNO, the refcnt on the new CNO is
3404 3395 * retained since the evd holds a reference to it.
3405 3396 */
3406 3397 if (old_cno_rp) {
3407 3398 DAPLKA_RS_UNREF(old_cno_rp);
3408 3399 }
3409 3400
3410 3401 cleanup:
3411 3402 if (evd_rp) {
3412 3403 DAPLKA_RS_UNREF(evd_rp);
3413 3404 }
3414 3405
3415 3406 return (retval);
3416 3407 }
3417 3408
3418 3409 /*
3419 3410 * Frees the EVD and associated resources.
3420 3411 * If there are other threads still using this EVD, the destruction
3421 3412 * will defer until the EVD's refcnt drops to zero.
3422 3413 */
3423 3414 /* ARGSUSED */
3424 3415 static int
3425 3416 daplka_evd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3426 3417 cred_t *cred, int *rvalp)
3427 3418 {
3428 3419 daplka_evd_resource_t *evd_rp = NULL;
3429 3420 daplka_async_evd_hkey_t *curr;
3430 3421 daplka_async_evd_hkey_t *prev;
3431 3422 dapl_evd_free_t args;
3432 3423 int retval = 0;
3433 3424
3434 3425 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_free_t), mode);
3435 3426 if (retval != 0) {
3436 3427 DERR("evd_free: copyin error %d\n", retval);
3437 3428 return (EFAULT);
3438 3429 }
3439 3430 retval = daplka_hash_remove(&ia_rp->ia_evd_htbl, args.evf_hkey,
3440 3431 (void **)&evd_rp);
3441 3432 if (retval != 0 || evd_rp == NULL) {
3442 3433 DERR("evd_free: cannot find evd resource\n");
3443 3434 return (EINVAL);
3444 3435 }
3445 3436 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3446 3437
3447 3438 /* If this is an async evd remove it from the IA's async evd list */
3448 3439 if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
3449 3440 mutex_enter(&ia_rp->ia_lock);
3450 3441 curr = prev = ia_rp->ia_async_evd_hkeys;
3451 3442 while (curr != NULL) {
3452 3443 if (curr->aeh_evd_hkey == args.evf_hkey) {
3453 3444 /* unlink curr from the list */
3454 3445 if (curr == prev) {
3455 3446 /*
3456 3447 * if first element in the list update
3457 3448 * the list head
3458 3449 */
3459 3450 ia_rp->ia_async_evd_hkeys =
3460 3451 curr->aeh_next;
3461 3452 } else {
3462 3453 prev->aeh_next = curr->aeh_next;
3463 3454 }
3464 3455 break;
3465 3456 }
3466 3457 prev = curr;
3467 3458 curr = curr->aeh_next;
3468 3459 }
3469 3460 mutex_exit(&ia_rp->ia_lock);
3470 3461 /* free the curr entry */
3471 3462 kmem_free(curr, sizeof (daplka_async_evd_hkey_t));
3472 3463 }
3473 3464
3474 3465 /* UNREF calls the actual free function when refcnt is zero */
3475 3466 DAPLKA_RS_UNREF(evd_rp);
3476 3467 return (0);
3477 3468 }
3478 3469
3479 3470 /*
3480 3471 * destroys EVD resource.
↓ open down ↓ |
773 lines elided |
↑ open up ↑ |
3481 3472 * called when refcnt drops to zero.
3482 3473 */
3483 3474 static int
3484 3475 daplka_evd_destroy(daplka_resource_t *gen_rp)
3485 3476 {
3486 3477 daplka_evd_resource_t *evd_rp = (daplka_evd_resource_t *)gen_rp;
3487 3478 ibt_status_t status;
3488 3479 daplka_evd_event_t *evt;
3489 3480 ibt_priv_data_len_t len;
3490 3481
3491 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp))
3492 3482 D3("evd_destroy: entering, evd_rp 0x%p, rnum %d\n",
3493 3483 evd_rp, DAPLKA_RS_RNUM(evd_rp));
3494 3484 /*
3495 3485 * free CQ
3496 3486 */
3497 3487 if (evd_rp->evd_cq_hdl) {
3498 3488 ibt_set_cq_handler(evd_rp->evd_cq_hdl, NULL, NULL);
3499 3489 mutex_enter(&daplka_dev->daplka_mutex);
3500 3490 ibt_set_cq_private(evd_rp->evd_cq_hdl, NULL);
3501 3491 mutex_exit(&daplka_dev->daplka_mutex);
3502 3492
3503 3493 status = daplka_ibt_free_cq(evd_rp, evd_rp->evd_cq_hdl);
3504 3494 if (status != IBT_SUCCESS) {
3505 3495 DERR("evd_destroy: ibt_free_cq returned %d\n", status);
3506 3496 }
3507 3497 evd_rp->evd_cq_hdl = NULL;
3508 3498 D2("evd_destroy: cq freed, rnum %d\n", DAPLKA_RS_RNUM(evd_rp));
3509 3499 }
3510 3500
3511 3501 /*
3512 3502 * release reference on CNO
3513 3503 */
3514 3504 if (evd_rp->evd_cno_res != NULL) {
3515 3505 mutex_enter(&evd_rp->evd_cno_res->cno_lock);
3516 3506 if (evd_rp->evd_cno_res->cno_evd_cookie ==
3517 3507 evd_rp->evd_cookie) {
3518 3508 evd_rp->evd_cno_res->cno_evd_cookie = 0;
3519 3509 }
3520 3510 mutex_exit(&evd_rp->evd_cno_res->cno_lock);
3521 3511 DAPLKA_RS_UNREF(evd_rp->evd_cno_res);
3522 3512 evd_rp->evd_cno_res = NULL;
3523 3513 }
3524 3514
3525 3515 /*
3526 3516 * discard all remaining events
3527 3517 */
3528 3518 mutex_enter(&evd_rp->evd_lock);
3529 3519 while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_cr_events))) {
3530 3520 D2("evd_destroy: discarding CR event: %d\n",
3531 3521 evt->ee_cmev.ec_cm_ev_type);
3532 3522 len = evt->ee_cmev.ec_cm_ev_priv_data_len;
3533 3523 if (len > 0) {
3534 3524 kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
3535 3525 evt->ee_cmev.ec_cm_ev_priv_data = NULL;
3536 3526 evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
3537 3527 }
3538 3528 kmem_free(evt, sizeof (*evt));
3539 3529 }
3540 3530 ASSERT(evd_rp->evd_cr_events.eel_num_elements == 0);
3541 3531
3542 3532 while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_conn_events))) {
3543 3533 D2("evd_destroy: discarding CONN event: %d\n",
3544 3534 evt->ee_cmev.ec_cm_ev_type);
3545 3535 len = evt->ee_cmev.ec_cm_ev_priv_data_len;
3546 3536 if (len > 0) {
3547 3537 kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
3548 3538 evt->ee_cmev.ec_cm_ev_priv_data = NULL;
3549 3539 evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
3550 3540 }
3551 3541 kmem_free(evt, sizeof (*evt));
3552 3542 }
3553 3543 ASSERT(evd_rp->evd_conn_events.eel_num_elements == 0);
3554 3544
3555 3545 while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_async_events))) {
3556 3546 DERR("evd_destroy: discarding ASYNC event: %d\n",
3557 3547 evt->ee_aev.ibae_type);
3558 3548 kmem_free(evt, sizeof (*evt));
3559 3549 }
3560 3550 ASSERT(evd_rp->evd_async_events.eel_num_elements == 0);
3561 3551 mutex_exit(&evd_rp->evd_lock);
3562 3552
3563 3553 mutex_destroy(&evd_rp->evd_lock);
3564 3554 DAPLKA_RS_FINI(evd_rp);
3565 3555 kmem_free(evd_rp, sizeof (daplka_evd_resource_t));
3566 3556 D3("evd_destroy: exiting, evd_rp 0x%p\n", evd_rp);
3567 3557 return (0);
3568 3558 }
3569 3559
3570 3560 static void
3571 3561 daplka_hash_evd_free(void *obj)
3572 3562 {
3573 3563 daplka_evd_resource_t *evd_rp = (daplka_evd_resource_t *)obj;
3574 3564
3575 3565 ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3576 3566 DAPLKA_RS_UNREF(evd_rp);
3577 3567 }
3578 3568
3579 3569 /*
3580 3570 * this handler fires when new completions arrive.
3581 3571 */
3582 3572 /* ARGSUSED */
3583 3573 static void
3584 3574 daplka_cq_handler(ibt_cq_hdl_t ibt_cq, void *arg)
3585 3575 {
3586 3576 D3("cq_handler: fired setting evd_newevents\n");
3587 3577 daplka_evd_wakeup((daplka_evd_resource_t *)arg, NULL, NULL);
3588 3578 }
3589 3579
3590 3580 /*
3591 3581 * this routine wakes up a client from evd_wait. if evtq and evt
3592 3582 * are non-null, the event evt will be enqueued prior to waking
3593 3583 * up the client. if the evd is associated with a CNO and if there
3594 3584 * are no waiters on the evd, the CNO will be notified.
3595 3585 */
3596 3586 static void
3597 3587 daplka_evd_wakeup(daplka_evd_resource_t *evd_rp, daplka_evd_event_list_t *evtq,
3598 3588 daplka_evd_event_t *evt)
3599 3589 {
3600 3590 uint32_t waiters = 0;
3601 3591
3602 3592 mutex_enter(&evd_rp->evd_lock);
3603 3593 if (evtq != NULL && evt != NULL) {
3604 3594 ASSERT(evtq == &evd_rp->evd_cr_events ||
3605 3595 evtq == &evd_rp->evd_conn_events ||
3606 3596 evtq == &evd_rp->evd_async_events);
3607 3597 daplka_evd_event_enqueue(evtq, evt);
3608 3598 ASSERT((evtq->eel_event_type == DAPLKA_EVD_CM_EVENTS) ||
3609 3599 (evtq->eel_event_type == DAPLKA_EVD_ASYNC_EVENTS));
3610 3600 evd_rp->evd_newevents |= evtq->eel_event_type;
3611 3601 } else {
3612 3602 evd_rp->evd_newevents |= DAPLKA_EVD_ULAND_EVENTS;
3613 3603 }
3614 3604 waiters = evd_rp->evd_waiters;
3615 3605 cv_broadcast(&evd_rp->evd_cv);
3616 3606 mutex_exit(&evd_rp->evd_lock);
3617 3607
3618 3608 /*
3619 3609 * only wakeup the CNO if there are no waiters on this evd.
3620 3610 */
3621 3611 if (evd_rp->evd_cno_res != NULL && waiters == 0) {
3622 3612 mutex_enter(&evd_rp->evd_cno_res->cno_lock);
3623 3613 evd_rp->evd_cno_res->cno_evd_cookie = evd_rp->evd_cookie;
3624 3614 cv_broadcast(&evd_rp->evd_cno_res->cno_cv);
3625 3615 mutex_exit(&evd_rp->evd_cno_res->cno_lock);
3626 3616 }
3627 3617 }
3628 3618
3629 3619 /*
3630 3620 * daplka_evd_event_enqueue adds elem to the end of the event list
3631 3621 * The caller is expected to acquire appropriate locks before
3632 3622 * calling enqueue
3633 3623 */
3634 3624 static void
3635 3625 daplka_evd_event_enqueue(daplka_evd_event_list_t *evlist,
3636 3626 daplka_evd_event_t *elem)
3637 3627 {
3638 3628 if (evlist->eel_tail) {
3639 3629 evlist->eel_tail->ee_next = elem;
3640 3630 evlist->eel_tail = elem;
3641 3631 } else {
3642 3632 /* list is empty */
3643 3633 ASSERT(evlist->eel_head == NULL);
3644 3634 evlist->eel_head = elem;
3645 3635 evlist->eel_tail = elem;
3646 3636 }
3647 3637 evlist->eel_num_elements++;
3648 3638 }
3649 3639
3650 3640 /*
3651 3641 * daplka_evd_event_dequeue removes and returns the first element of event
3652 3642 * list. NULL is returned if the list is empty. The caller is expected to
3653 3643 * acquire appropriate locks before calling enqueue.
3654 3644 */
3655 3645 static daplka_evd_event_t *
3656 3646 daplka_evd_event_dequeue(daplka_evd_event_list_t *evlist)
3657 3647 {
3658 3648 daplka_evd_event_t *head;
3659 3649
3660 3650 head = evlist->eel_head;
3661 3651 if (head == NULL) {
3662 3652 return (NULL);
3663 3653 }
3664 3654
3665 3655 evlist->eel_head = head->ee_next;
3666 3656 evlist->eel_num_elements--;
3667 3657 /* if it was the last element update the tail pointer too */
3668 3658 if (evlist->eel_head == NULL) {
3669 3659 ASSERT(evlist->eel_num_elements == 0);
3670 3660 evlist->eel_tail = NULL;
3671 3661 }
3672 3662 return (head);
3673 3663 }
3674 3664
3675 3665 /*
3676 3666 * A CNO allows the client to wait for notifications from multiple EVDs.
3677 3667 * To use a CNO, the client needs to follow the procedure below:
3678 3668 * 1. allocate a CNO. this returns a cno_hkey that identifies the CNO.
3679 3669 * 2. create one or more EVDs using the returned cno_hkey.
3680 3670 * 3. call cno_wait. when one of the associated EVDs get notified, the
3681 3671 * CNO will also get notified. cno_wait will then return with a
3682 3672 * evd_cookie identifying the EVD that triggered the event.
3683 3673 *
3684 3674 * A note about cno_wait:
3685 3675 * -unlike a EVD, a CNO does not maintain a queue of notifications. For
3686 3676 * example, suppose multiple EVDs triggered a CNO before the client calls
3687 3677 * cno_wait; when the client calls cno_wait, it will return with the
3688 3678 * evd_cookie that identifies the *last* EVD that triggered the CNO. It
3689 3679 * is the responsibility of the client, upon returning from cno_wait, to
3690 3680 * check on all EVDs that can potentially trigger the CNO. the returned
3691 3681 * evd_cookie is only meant to be a hint. there is no guarantee that the
3692 3682 * EVD identified by the evd_cookie still contains an event or still
3693 3683 * exists by the time cno_wait returns.
3694 3684 */
3695 3685
3696 3686 /*
3697 3687 * allocates a CNO.
3698 3688 * the returned cno_hkey may subsequently be used in evd_create.
3699 3689 */
3700 3690 /* ARGSUSED */
3701 3691 static int
3702 3692 daplka_cno_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3703 3693 cred_t *cred, int *rvalp)
3704 3694 {
3705 3695 dapl_cno_alloc_t args;
↓ open down ↓ |
204 lines elided |
↑ open up ↑ |
3706 3696 daplka_cno_resource_t *cno_rp = NULL;
3707 3697 uint64_t cno_hkey = 0;
3708 3698 boolean_t inserted = B_FALSE;
3709 3699 int retval = 0;
3710 3700
3711 3701 cno_rp = kmem_zalloc(sizeof (*cno_rp), daplka_km_flags);
3712 3702 if (cno_rp == NULL) {
3713 3703 DERR("cno_alloc: cannot allocate cno resource\n");
3714 3704 return (ENOMEM);
3715 3705 }
3716 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cno_rp))
3717 3706 DAPLKA_RS_INIT(cno_rp, DAPL_TYPE_CNO,
3718 3707 DAPLKA_RS_RNUM(ia_rp), daplka_cno_destroy);
3719 3708
3720 3709 mutex_init(&cno_rp->cno_lock, NULL, MUTEX_DRIVER, NULL);
3721 3710 cv_init(&cno_rp->cno_cv, NULL, CV_DRIVER, NULL);
3722 3711 cno_rp->cno_evd_cookie = 0;
3723 3712
3724 3713 /* insert into cno hash table */
3725 3714 retval = daplka_hash_insert(&ia_rp->ia_cno_htbl,
3726 3715 &cno_hkey, (void *)cno_rp);
3727 3716 if (retval != 0) {
3728 3717 DERR("cno_alloc: cannot insert cno resource\n");
3729 3718 goto cleanup;
3730 3719 }
3731 3720 inserted = B_TRUE;
3732 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cno_rp))
3733 3721
3734 3722 /* return hkey to library */
3735 3723 args.cno_hkey = cno_hkey;
3736 3724
3737 3725 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cno_alloc_t),
3738 3726 mode);
3739 3727 if (retval != 0) {
3740 3728 DERR("cno_alloc: copyout error %d\n", retval);
3741 3729 retval = EFAULT;
3742 3730 goto cleanup;
3743 3731 }
3744 3732 return (0);
3745 3733
3746 3734 cleanup:;
3747 3735 if (inserted) {
3748 3736 daplka_cno_resource_t *free_rp = NULL;
3749 3737
3750 3738 (void) daplka_hash_remove(&ia_rp->ia_cno_htbl, cno_hkey,
3751 3739 (void **)&free_rp);
3752 3740 if (free_rp != cno_rp) {
3753 3741 DERR("cno_alloc: cannot remove cno\n");
3754 3742 /*
3755 3743 * we can only get here if another thread
3756 3744 * has completed the cleanup in cno_free
3757 3745 */
3758 3746 return (retval);
3759 3747 }
3760 3748 }
3761 3749 DAPLKA_RS_UNREF(cno_rp);
3762 3750 return (retval);
3763 3751 }
3764 3752
3765 3753 /*
3766 3754 * destroys a CNO.
3767 3755 * this gets called when a CNO resource's refcnt drops to zero.
3768 3756 */
3769 3757 static int
3770 3758 daplka_cno_destroy(daplka_resource_t *gen_rp)
3771 3759 {
3772 3760 daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)gen_rp;
3773 3761
3774 3762 ASSERT(DAPLKA_RS_REFCNT(cno_rp) == 0);
3775 3763 D2("cno_destroy: entering, cno_rp %p, rnum %d\n",
3776 3764 cno_rp, DAPLKA_RS_RNUM(cno_rp));
3777 3765
3778 3766 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3779 3767 cv_destroy(&cno_rp->cno_cv);
3780 3768 mutex_destroy(&cno_rp->cno_lock);
3781 3769
3782 3770 DAPLKA_RS_FINI(cno_rp);
3783 3771 kmem_free(cno_rp, sizeof (daplka_cno_resource_t));
3784 3772 D2("cno_destroy: exiting, cno_rp %p\n", cno_rp);
3785 3773 return (0);
3786 3774 }
3787 3775
3788 3776 static void
3789 3777 daplka_hash_cno_free(void *obj)
3790 3778 {
3791 3779 daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)obj;
3792 3780
3793 3781 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3794 3782 DAPLKA_RS_UNREF(cno_rp);
3795 3783 }
3796 3784
3797 3785 /*
3798 3786 * removes the CNO from the cno hash table and frees the CNO
3799 3787 * if there are no references to it. if there are references to
3800 3788 * it, the CNO will be destroyed when the last of the references
3801 3789 * is released. once the CNO is removed from the cno hash table,
3802 3790 * the client will no longer be able to call cno_wait on the CNO.
3803 3791 */
3804 3792 /* ARGSUSED */
3805 3793 static int
3806 3794 daplka_cno_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3807 3795 cred_t *cred, int *rvalp)
3808 3796 {
3809 3797 daplka_cno_resource_t *cno_rp = NULL;
3810 3798 dapl_cno_free_t args;
3811 3799 int retval = 0;
3812 3800
3813 3801 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_free_t), mode);
3814 3802 if (retval != 0) {
3815 3803 DERR("cno_free: copyin error %d\n", retval);
3816 3804 return (EINVAL);
3817 3805 }
3818 3806
3819 3807 retval = daplka_hash_remove(&ia_rp->ia_cno_htbl,
3820 3808 args.cnf_hkey, (void **)&cno_rp);
3821 3809 if (retval != 0 || cno_rp == NULL) {
3822 3810 DERR("cno_free: cannot find cno resource\n");
3823 3811 return (EINVAL);
3824 3812 }
3825 3813 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3826 3814
3827 3815 /* UNREF calls the actual free function when refcnt is zero */
3828 3816 DAPLKA_RS_UNREF(cno_rp);
3829 3817 return (0);
3830 3818 }
3831 3819
3832 3820 /*
3833 3821 * wait for a notification from one of the associated EVDs.
3834 3822 */
3835 3823 /* ARGSUSED */
3836 3824 static int
3837 3825 daplka_cno_wait(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3838 3826 cred_t *cred, int *rvalp)
3839 3827 {
3840 3828 daplka_cno_resource_t *cno_rp = NULL;
3841 3829 dapl_cno_wait_t args;
3842 3830 int retval = 0;
3843 3831 uint64_t evd_cookie = 0;
3844 3832 clock_t timeout, curr_time;
3845 3833
3846 3834 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_wait_t), mode);
3847 3835 if (retval != 0) {
3848 3836 DERR("cno_wait: copyin error %d\n", retval);
3849 3837 return (EINVAL);
3850 3838 }
3851 3839 /* get cno resource */
3852 3840 cno_rp = (daplka_cno_resource_t *)
3853 3841 daplka_hash_lookup(&ia_rp->ia_cno_htbl, args.cnw_hkey);
3854 3842 if (cno_rp == NULL) {
3855 3843 DERR("cno_wait: cannot find cno resource\n");
3856 3844 return (EINVAL);
3857 3845 }
3858 3846 ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3859 3847
3860 3848 curr_time = ddi_get_lbolt();
3861 3849 timeout = curr_time + drv_usectohz(args.cnw_timeout);
3862 3850
3863 3851 /*
3864 3852 * use the max value if we wrapped around
3865 3853 */
3866 3854 if (args.cnw_timeout > 0 && timeout <= curr_time) {
3867 3855 /*
3868 3856 * clock_t (size long) changes between 32 and 64-bit kernels
3869 3857 */
3870 3858 timeout = LONG_MAX >> 4;
3871 3859 }
3872 3860 mutex_enter(&cno_rp->cno_lock);
3873 3861 while (cno_rp->cno_evd_cookie == 0) {
3874 3862 int rval = 0;
3875 3863
3876 3864 rval = cv_timedwait_sig(&cno_rp->cno_cv,
3877 3865 &cno_rp->cno_lock, timeout);
3878 3866 if (rval == 0) {
3879 3867 DERR("cno_wait: interrupted\n");
3880 3868 mutex_exit(&cno_rp->cno_lock);
3881 3869 retval = EINTR;
3882 3870 goto cleanup;
3883 3871 } else if (rval == -1) {
3884 3872 DERR("cno_wait: timed out\n");
3885 3873 mutex_exit(&cno_rp->cno_lock);
3886 3874 retval = ETIME;
3887 3875 goto cleanup;
3888 3876 }
3889 3877 }
3890 3878 evd_cookie = cno_rp->cno_evd_cookie;
3891 3879 cno_rp->cno_evd_cookie = 0;
3892 3880 mutex_exit(&cno_rp->cno_lock);
3893 3881
3894 3882 ASSERT(evd_cookie != 0);
3895 3883 D2("cno_wait: returning evd_cookie 0x%p\n",
3896 3884 (void *)(uintptr_t)evd_cookie);
3897 3885 args.cnw_evd_cookie = evd_cookie;
3898 3886 retval = ddi_copyout((void *)&args, (void *)arg,
3899 3887 sizeof (dapl_cno_wait_t), mode);
3900 3888 if (retval != 0) {
3901 3889 DERR("cno_wait: copyout error %d\n", retval);
3902 3890 retval = EFAULT;
3903 3891 goto cleanup;
3904 3892 }
3905 3893
3906 3894 cleanup:;
3907 3895 if (cno_rp != NULL) {
3908 3896 DAPLKA_RS_UNREF(cno_rp);
3909 3897 }
3910 3898 return (retval);
3911 3899 }
3912 3900
3913 3901 /*
3914 3902 * this function is called by the client when it decides to
3915 3903 * accept a connection request. a connection request is generated
3916 3904 * when the active side generates REQ MAD to a service point on
3917 3905 * the destination node. this causes the CM service handler
3918 3906 * (daplka_cm_service_req) on the passive side to be callee. This
3919 3907 * handler will then enqueue this connection request to the backlog
3920 3908 * array of the service point. A connection event containing the
3921 3909 * backlog array index and connection request private data is passed
3922 3910 * to the client's service point EVD (sp_evd_res). once the event
3923 3911 * is passed up to the userland, the client may examine the request
3924 3912 * to decide whether to call daplka_cr_accept or dapka_cr_reject.
3925 3913 */
3926 3914 /* ARGSUSED */
3927 3915 static int
3928 3916 daplka_cr_accept(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3929 3917 cred_t *cred, int *rvalp)
3930 3918 {
3931 3919 daplka_ep_resource_t *ep_rp = NULL;
3932 3920 daplka_sp_resource_t *sp_rp = NULL;
3933 3921 dapl_cr_accept_t args;
3934 3922 daplka_sp_conn_pend_t *conn;
3935 3923 ibt_cm_proceed_reply_t proc_reply;
3936 3924 ibt_status_t status;
3937 3925 uint16_t bkl_index;
3938 3926 uint32_t old_state, new_state;
3939 3927 int retval = 0;
3940 3928 void *priv_data = NULL, *sid;
3941 3929
3942 3930 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_accept_t),
3943 3931 mode);
3944 3932 if (retval != 0) {
3945 3933 DERR("cr_accept: copyin error %d\n", retval);
3946 3934 return (EFAULT);
3947 3935 }
3948 3936 if (args.cra_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
3949 3937 DERR("cr_accept: private data len (%d) exceeded "
3950 3938 "max size %d\n", args.cra_priv_sz,
3951 3939 DAPL_MAX_PRIVATE_DATA_SIZE);
3952 3940 return (EINVAL);
3953 3941 }
3954 3942 priv_data = (args.cra_priv_sz > 0) ? (void *)args.cra_priv : NULL;
3955 3943
3956 3944 D2("cr_accept: priv(0x%p) priv_len(%u) psep(0x%llx)\n", priv_data,
3957 3945 args.cra_priv_sz, (longlong_t)args.cra_bkl_cookie);
3958 3946
3959 3947 /* get sp resource */
3960 3948 sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
3961 3949 args.cra_sp_hkey);
3962 3950 if (sp_rp == NULL) {
3963 3951 DERR("cr_accept: cannot find sp resource\n");
3964 3952 return (EINVAL);
3965 3953 }
3966 3954 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
3967 3955
3968 3956 /* get ep resource */
3969 3957 ep_rp = (daplka_ep_resource_t *)daplka_hash_lookup(&ia_rp->ia_ep_htbl,
3970 3958 args.cra_ep_hkey);
3971 3959 if (ep_rp == NULL) {
3972 3960 DERR("cr_accept: cannot find ep resource\n");
3973 3961 retval = EINVAL;
3974 3962 goto cleanup;
3975 3963 }
3976 3964 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
3977 3965
3978 3966 /*
3979 3967 * accept is only allowed if ep_state is CLOSED.
3980 3968 * note that after this point, the ep_state is frozen
3981 3969 * (i.e. TRANSITIONING) until we transition ep_state
3982 3970 * to ACCEPTING or back to CLOSED if we get an error.
3983 3971 */
3984 3972 new_state = old_state = daplka_ep_get_state(ep_rp);
3985 3973 if (old_state != DAPLKA_EP_STATE_CLOSED) {
3986 3974 DERR("cr_accept: invalid ep state %d\n", old_state);
3987 3975 retval = EINVAL;
3988 3976 goto cleanup;
3989 3977 }
3990 3978
3991 3979 mutex_enter(&sp_rp->sp_lock);
3992 3980 bkl_index = DAPLKA_GET_PSEP_INDEX(args.cra_bkl_cookie);
3993 3981 /*
3994 3982 * make sure the backlog index is not bogus.
3995 3983 */
3996 3984 if (bkl_index >= sp_rp->sp_backlog_size) {
3997 3985 DERR("cr_accept: invalid backlog index 0x%llx %d\n",
3998 3986 (longlong_t)args.cra_bkl_cookie, bkl_index);
3999 3987 mutex_exit(&sp_rp->sp_lock);
4000 3988 retval = EINVAL;
4001 3989 goto cleanup;
4002 3990 }
4003 3991 /*
4004 3992 * make sure the backlog index indeed refers
4005 3993 * to a pending connection.
4006 3994 */
4007 3995 conn = &sp_rp->sp_backlog[bkl_index];
4008 3996 if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4009 3997 DERR("cr_accept: invalid conn state %d\n",
4010 3998 conn->spcp_state);
4011 3999 mutex_exit(&sp_rp->sp_lock);
4012 4000 retval = EINVAL;
4013 4001 goto cleanup;
4014 4002 }
4015 4003 if (conn->spcp_sid == NULL) {
4016 4004 DERR("cr_accept: sid == NULL\n");
4017 4005 mutex_exit(&sp_rp->sp_lock);
4018 4006 retval = EINVAL;
4019 4007 goto cleanup;
4020 4008 }
4021 4009 if (ep_rp->ep_chan_hdl == NULL) {
4022 4010 /*
4023 4011 * a ep_rp with a NULL chan_hdl is impossible.
4024 4012 */
4025 4013 DERR("cr_accept: ep_chan_hdl == NULL\n");
4026 4014 mutex_exit(&sp_rp->sp_lock);
4027 4015 ASSERT(B_FALSE);
4028 4016 retval = EINVAL;
4029 4017 goto cleanup;
4030 4018 }
4031 4019 proc_reply.rep.cm_channel = ep_rp->ep_chan_hdl;
4032 4020 proc_reply.rep.cm_rdma_ra_out = conn->spcp_rdma_ra_out;
4033 4021 proc_reply.rep.cm_rdma_ra_in = conn->spcp_rdma_ra_in;
4034 4022 proc_reply.rep.cm_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;
4035 4023 sid = conn->spcp_sid;
4036 4024
4037 4025 /*
4038 4026 * this clears our slot in the backlog array.
4039 4027 * this slot may now be used by other pending connections.
4040 4028 */
4041 4029 conn->spcp_sid = NULL;
4042 4030 conn->spcp_state = DAPLKA_SPCP_INIT;
4043 4031 conn->spcp_req_len = 0;
4044 4032 mutex_exit(&sp_rp->sp_lock);
4045 4033
4046 4034 /*
4047 4035 * Set the unique cookie corresponding to the CR to this EP
4048 4036 * so that is can be used in passive side CM callbacks
4049 4037 */
4050 4038 ep_rp->ep_psep_cookie = args.cra_bkl_cookie;
4051 4039
4052 4040 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, IBT_CM_ACCEPT,
4053 4041 &proc_reply, priv_data, (ibt_priv_data_len_t)args.cra_priv_sz);
4054 4042
4055 4043 if (status != IBT_SUCCESS) {
4056 4044 DERR("cr_accept: ibt_cm_proceed returned %d\n", status);
4057 4045 *rvalp = (int)status;
4058 4046 retval = 0;
4059 4047 }
4060 4048 /*
4061 4049 * note that the CM handler may actually be called at this
4062 4050 * point. but since ep_state is still in TRANSITIONING, the
4063 4051 * handler will wait until we transition to ACCEPTING. this
4064 4052 * prevents the case where we set ep_state to ACCEPTING after
4065 4053 * daplka_service_conn_est sets ep_state to CONNECTED.
4066 4054 */
4067 4055 new_state = DAPLKA_EP_STATE_ACCEPTING;
4068 4056
4069 4057 cleanup:;
4070 4058 if (sp_rp != NULL) {
4071 4059 DAPLKA_RS_UNREF(sp_rp);
4072 4060 }
4073 4061 if (ep_rp != NULL) {
4074 4062 daplka_ep_set_state(ep_rp, old_state, new_state);
4075 4063 DAPLKA_RS_UNREF(ep_rp);
4076 4064 }
4077 4065 return (retval);
4078 4066 }
4079 4067
4080 4068 /*
4081 4069 * this function is called by the client to reject a
4082 4070 * connection request.
4083 4071 */
4084 4072 /* ARGSUSED */
4085 4073 static int
4086 4074 daplka_cr_reject(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4087 4075 cred_t *cred, int *rvalp)
4088 4076 {
4089 4077 dapl_cr_reject_t args;
4090 4078 daplka_sp_resource_t *sp_rp = NULL;
4091 4079 daplka_sp_conn_pend_t *conn;
4092 4080 ibt_cm_proceed_reply_t proc_reply;
4093 4081 ibt_cm_status_t proc_status;
4094 4082 ibt_status_t status;
4095 4083 uint16_t bkl_index;
4096 4084 int retval = 0;
4097 4085 void *sid;
4098 4086
4099 4087 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_reject_t),
4100 4088 mode);
4101 4089 if (retval != 0) {
4102 4090 DERR("cr_reject: copyin error %d\n", retval);
4103 4091 return (EFAULT);
4104 4092 }
4105 4093 /* get sp resource */
4106 4094 sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
4107 4095 args.crr_sp_hkey);
4108 4096 if (sp_rp == NULL) {
4109 4097 DERR("cr_reject: cannot find sp resource\n");
4110 4098 return (EINVAL);
4111 4099 }
4112 4100 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4113 4101
4114 4102 D2("cr_reject: psep(0x%llx)\n", (longlong_t)args.crr_bkl_cookie);
4115 4103
4116 4104 mutex_enter(&sp_rp->sp_lock);
4117 4105 bkl_index = DAPLKA_GET_PSEP_INDEX(args.crr_bkl_cookie);
4118 4106 /*
4119 4107 * make sure the backlog index is not bogus.
4120 4108 */
4121 4109 if (bkl_index >= sp_rp->sp_backlog_size) {
4122 4110 DERR("cr_reject: invalid backlog index 0x%llx %d\n",
4123 4111 (longlong_t)args.crr_bkl_cookie, bkl_index);
4124 4112 mutex_exit(&sp_rp->sp_lock);
4125 4113 retval = EINVAL;
4126 4114 goto cleanup;
4127 4115 }
4128 4116 /*
4129 4117 * make sure the backlog index indeed refers
4130 4118 * to a pending connection.
4131 4119 */
4132 4120 conn = &sp_rp->sp_backlog[bkl_index];
4133 4121 if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4134 4122 DERR("cr_reject: invalid conn state %d\n",
4135 4123 conn->spcp_state);
4136 4124 mutex_exit(&sp_rp->sp_lock);
4137 4125 retval = EINVAL;
4138 4126 goto cleanup;
4139 4127 }
4140 4128 if (conn->spcp_sid == NULL) {
4141 4129 DERR("cr_reject: sid == NULL\n");
4142 4130 mutex_exit(&sp_rp->sp_lock);
4143 4131 retval = EINVAL;
4144 4132 goto cleanup;
4145 4133 }
4146 4134 bzero(&proc_reply, sizeof (proc_reply));
4147 4135 sid = conn->spcp_sid;
4148 4136
4149 4137 /*
4150 4138 * this clears our slot in the backlog array.
4151 4139 * this slot may now be used by other pending connections.
4152 4140 */
4153 4141 conn->spcp_sid = NULL;
4154 4142 conn->spcp_state = DAPLKA_SPCP_INIT;
4155 4143 conn->spcp_req_len = 0;
4156 4144
4157 4145 switch (args.crr_reason) {
4158 4146 case DAPL_IB_CM_REJ_REASON_CONSUMER_REJ:
4159 4147 /* results in IBT_CM_CONSUMER as the reason for reject */
4160 4148 proc_status = IBT_CM_REJECT;
4161 4149 break;
4162 4150 case DAPL_IB_CME_LOCAL_FAILURE:
4163 4151 /*FALLTHRU*/
4164 4152 case DAPL_IB_CME_DESTINATION_UNREACHABLE:
4165 4153 /* results in IBT_CM_NO_RESC as the reason for reject */
4166 4154 proc_status = IBT_CM_NO_RESOURCE;
4167 4155 break;
4168 4156 default:
4169 4157 /* unexpect reason code */
4170 4158 ASSERT(!"unexpected reject reason code");
4171 4159 proc_status = IBT_CM_NO_RESOURCE;
4172 4160 break;
4173 4161 }
4174 4162
4175 4163 mutex_exit(&sp_rp->sp_lock);
4176 4164
4177 4165 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, proc_status,
4178 4166 &proc_reply, NULL, 0);
4179 4167
4180 4168 if (status != IBT_SUCCESS) {
4181 4169 DERR("cr_reject: ibt_cm_proceed returned %d\n", status);
4182 4170 *rvalp = (int)status;
4183 4171 retval = 0;
4184 4172 }
4185 4173
4186 4174 cleanup:;
4187 4175 if (sp_rp != NULL) {
4188 4176 DAPLKA_RS_UNREF(sp_rp);
4189 4177 }
4190 4178 return (retval);
↓ open down ↓ |
448 lines elided |
↑ open up ↑ |
4191 4179 }
4192 4180
4193 4181
4194 4182 /*
4195 4183 * daplka_sp_match is used by daplka_hash_walk for finding SPs
4196 4184 */
4197 4185 typedef struct daplka_sp_match_s {
4198 4186 uint64_t spm_conn_qual;
4199 4187 daplka_sp_resource_t *spm_sp_rp;
4200 4188 } daplka_sp_match_t;
4201 -_NOTE(SCHEME_PROTECTS_DATA("daplka", daplka_sp_match_s::spm_sp_rp))
4202 4189
4203 4190 static int
4204 4191 daplka_sp_match(void *objp, void *arg)
4205 4192 {
4206 4193 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)objp;
4207 4194
4208 4195 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4209 4196 if (sp_rp->sp_conn_qual ==
4210 4197 ((daplka_sp_match_t *)arg)->spm_conn_qual) {
4211 4198 ((daplka_sp_match_t *)arg)->spm_sp_rp = sp_rp;
4212 4199 D2("daplka_sp_match: found sp, conn_qual %016llu\n",
4213 4200 (longlong_t)((daplka_sp_match_t *)arg)->spm_conn_qual);
4214 4201 DAPLKA_RS_REF(sp_rp);
4215 4202 return (1);
4216 4203 }
4217 4204 return (0);
4218 4205 }
4219 4206
4220 4207 /*
4221 4208 * cr_handoff allows the client to handoff a connection request from
4222 4209 * one service point to another.
4223 4210 */
4224 4211 /* ARGSUSED */
4225 4212 static int
4226 4213 daplka_cr_handoff(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4227 4214 cred_t *cred, int *rvalp)
4228 4215 {
4229 4216 dapl_cr_handoff_t args;
4230 4217 daplka_sp_resource_t *sp_rp = NULL, *new_sp_rp = NULL;
4231 4218 daplka_sp_conn_pend_t *conn;
4232 4219 daplka_sp_match_t sp_match;
4233 4220 ibt_cm_event_t fake_event;
4234 4221 ibt_cm_status_t cm_status;
4235 4222 ibt_status_t status;
4236 4223 uint16_t bkl_index;
4237 4224 void *sid, *priv = NULL;
4238 4225 int retval = 0, priv_len = 0;
4239 4226
4240 4227 D3("cr_handoff: entering\n");
4241 4228 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_handoff_t),
4242 4229 mode);
4243 4230 if (retval != 0) {
4244 4231 DERR("cr_handoff: copyin error %d\n", retval);
4245 4232 return (EFAULT);
4246 4233 }
4247 4234 /* get sp resource */
4248 4235 sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
4249 4236 args.crh_sp_hkey);
4250 4237 if (sp_rp == NULL) {
4251 4238 DERR("cr_handoff: cannot find sp resource\n");
4252 4239 return (EINVAL);
4253 4240 }
4254 4241 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4255 4242
4256 4243 /*
4257 4244 * find the destination service point.
4258 4245 */
4259 4246 sp_match.spm_conn_qual = args.crh_conn_qual;
4260 4247 sp_match.spm_sp_rp = NULL;
4261 4248 daplka_hash_walk(&daplka_global_sp_htbl, daplka_sp_match,
4262 4249 (void *)&sp_match, RW_READER);
4263 4250
4264 4251 /*
4265 4252 * return if we cannot find the service point
4266 4253 */
4267 4254 if (sp_match.spm_sp_rp == NULL) {
4268 4255 DERR("cr_handoff: new sp not found, conn qual = %llu\n",
4269 4256 (longlong_t)args.crh_conn_qual);
4270 4257 retval = EINVAL;
4271 4258 goto cleanup;
4272 4259 }
4273 4260 new_sp_rp = sp_match.spm_sp_rp;
4274 4261
4275 4262 /*
4276 4263 * the spec does not discuss the security implications of this
4277 4264 * function. to be safe, we currently only allow processes
4278 4265 * owned by the same user to handoff connection requests
4279 4266 * to each other.
4280 4267 */
4281 4268 if (crgetruid(cred) != new_sp_rp->sp_ruid) {
4282 4269 DERR("cr_handoff: permission denied\n");
4283 4270 retval = EPERM;
4284 4271 goto cleanup;
4285 4272 }
4286 4273
4287 4274 D2("cr_handoff: psep(0x%llx)\n", (longlong_t)args.crh_bkl_cookie);
4288 4275
4289 4276 mutex_enter(&sp_rp->sp_lock);
4290 4277 bkl_index = DAPLKA_GET_PSEP_INDEX(args.crh_bkl_cookie);
4291 4278 /*
4292 4279 * make sure the backlog index is not bogus.
4293 4280 */
4294 4281 if (bkl_index >= sp_rp->sp_backlog_size) {
4295 4282 DERR("cr_handoff: invalid backlog index 0x%llx %d\n",
4296 4283 (longlong_t)args.crh_bkl_cookie, bkl_index);
4297 4284 mutex_exit(&sp_rp->sp_lock);
4298 4285 retval = EINVAL;
4299 4286 goto cleanup;
4300 4287 }
4301 4288 /*
4302 4289 * make sure the backlog index indeed refers
4303 4290 * to a pending connection.
4304 4291 */
4305 4292 conn = &sp_rp->sp_backlog[bkl_index];
4306 4293 if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4307 4294 DERR("cr_handoff: invalid conn state %d\n",
4308 4295 conn->spcp_state);
4309 4296 mutex_exit(&sp_rp->sp_lock);
4310 4297 retval = EINVAL;
4311 4298 goto cleanup;
4312 4299 }
4313 4300 if (conn->spcp_sid == NULL) {
4314 4301 DERR("cr_handoff: sid == NULL\n");
4315 4302 mutex_exit(&sp_rp->sp_lock);
4316 4303 retval = EINVAL;
4317 4304 goto cleanup;
4318 4305 }
4319 4306 sid = conn->spcp_sid;
4320 4307 priv = NULL;
4321 4308 priv_len = conn->spcp_req_len;
4322 4309 if (priv_len > 0) {
4323 4310 priv = kmem_zalloc(priv_len, daplka_km_flags);
4324 4311 if (priv == NULL) {
4325 4312 mutex_exit(&sp_rp->sp_lock);
4326 4313 retval = ENOMEM;
4327 4314 goto cleanup;
4328 4315 }
4329 4316 bcopy(conn->spcp_req_data, priv, priv_len);
4330 4317 }
4331 4318 /*
4332 4319 * this clears our slot in the backlog array.
4333 4320 * this slot may now be used by other pending connections.
4334 4321 */
4335 4322 conn->spcp_sid = NULL;
4336 4323 conn->spcp_state = DAPLKA_SPCP_INIT;
4337 4324 conn->spcp_req_len = 0;
4338 4325 mutex_exit(&sp_rp->sp_lock);
4339 4326
4340 4327 /* fill fake_event and call service_req handler */
4341 4328 bzero(&fake_event, sizeof (fake_event));
4342 4329 fake_event.cm_type = IBT_CM_EVENT_REQ_RCV;
4343 4330 fake_event.cm_session_id = sid;
4344 4331 fake_event.cm_priv_data_len = priv_len;
4345 4332 fake_event.cm_priv_data = priv;
4346 4333
4347 4334 cm_status = daplka_cm_service_req(new_sp_rp,
4348 4335 &fake_event, NULL, priv, (ibt_priv_data_len_t)priv_len);
4349 4336 if (cm_status != IBT_CM_DEFER) {
4350 4337 ibt_cm_proceed_reply_t proc_reply;
4351 4338
4352 4339 DERR("cr_handoff: service_req returned %d\n", cm_status);
4353 4340 /*
4354 4341 * if for some reason cm_service_req failed, we
4355 4342 * reject the connection.
4356 4343 */
4357 4344 bzero(&proc_reply, sizeof (proc_reply));
4358 4345
4359 4346 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid,
4360 4347 IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
4361 4348 if (status != IBT_SUCCESS) {
4362 4349 DERR("cr_handoff: ibt_cm_proceed returned %d\n",
4363 4350 status);
4364 4351 }
4365 4352 *rvalp = (int)status;
4366 4353 retval = 0;
4367 4354 }
4368 4355
4369 4356 cleanup:;
4370 4357 if (priv_len > 0 && priv != NULL) {
4371 4358 kmem_free(priv, priv_len);
4372 4359 }
4373 4360 if (new_sp_rp != NULL) {
4374 4361 DAPLKA_RS_UNREF(new_sp_rp);
4375 4362 }
4376 4363 if (sp_rp != NULL) {
4377 4364 DAPLKA_RS_UNREF(sp_rp);
4378 4365 }
4379 4366 D3("cr_handoff: exiting\n");
4380 4367 return (retval);
4381 4368 }
4382 4369
4383 4370 /*
4384 4371 * returns a list of hca attributes
4385 4372 */
4386 4373 /* ARGSUSED */
4387 4374 static int
4388 4375 daplka_ia_query(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4389 4376 cred_t *cred, int *rvalp)
4390 4377 {
4391 4378 dapl_ia_query_t args;
4392 4379 int retval;
4393 4380 ibt_hca_attr_t *hcap;
4394 4381
4395 4382 hcap = &ia_rp->ia_hca->hca_attr;
4396 4383
4397 4384 /*
4398 4385 * Take the ibt_hca_attr_t and stuff them into dapl_hca_attr_t
4399 4386 */
4400 4387 args.hca_attr.dhca_vendor_id = hcap->hca_vendor_id;
4401 4388 args.hca_attr.dhca_device_id = hcap->hca_device_id;
4402 4389 args.hca_attr.dhca_version_id = hcap->hca_version_id;
4403 4390 args.hca_attr.dhca_max_chans = hcap->hca_max_chans;
4404 4391 args.hca_attr.dhca_max_chan_sz = hcap->hca_max_chan_sz;
4405 4392 args.hca_attr.dhca_max_sgl = hcap->hca_max_sgl;
4406 4393 args.hca_attr.dhca_max_cq = hcap->hca_max_cq;
4407 4394 args.hca_attr.dhca_max_cq_sz = hcap->hca_max_cq_sz;
4408 4395 args.hca_attr.dhca_max_memr = hcap->hca_max_memr;
4409 4396 args.hca_attr.dhca_max_memr_len = hcap->hca_max_memr_len;
4410 4397 args.hca_attr.dhca_max_mem_win = hcap->hca_max_mem_win;
4411 4398 args.hca_attr.dhca_max_rdma_in_chan = hcap->hca_max_rdma_in_chan;
4412 4399 args.hca_attr.dhca_max_rdma_out_chan = hcap->hca_max_rdma_out_chan;
4413 4400 args.hca_attr.dhca_max_partitions = hcap->hca_max_partitions;
4414 4401 args.hca_attr.dhca_nports = hcap->hca_nports;
4415 4402 args.hca_attr.dhca_node_guid = hcap->hca_node_guid;
4416 4403 args.hca_attr.dhca_max_pd = hcap->hca_max_pd;
4417 4404 args.hca_attr.dhca_max_srqs = hcap->hca_max_srqs;
4418 4405 args.hca_attr.dhca_max_srqs_sz = hcap->hca_max_srqs_sz;
4419 4406 args.hca_attr.dhca_max_srq_sgl = hcap->hca_max_srq_sgl;
4420 4407
4421 4408 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ia_query_t),
4422 4409 mode);
4423 4410 if (retval != 0) {
4424 4411 DERR("ia_query: copyout error %d\n", retval);
4425 4412 return (EFAULT);
4426 4413 }
4427 4414 return (0);
4428 4415 }
4429 4416
4430 4417 /*
4431 4418 * This routine is passed to hash walk in the daplka_pre_mr_cleanup_callback,
4432 4419 * it frees the mw embedded in the mw resource object.
4433 4420 */
4434 4421
4435 4422 /* ARGSUSED */
4436 4423 static int
4437 4424 daplka_mr_cb_freemw(void *objp, void *arg)
4438 4425 {
4439 4426 daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)objp;
4440 4427 ibt_mw_hdl_t mw_hdl;
4441 4428 ibt_status_t status;
4442 4429
4443 4430 D3("mr_cb_freemw: entering, mw_rp 0x%p\n", mw_rp);
4444 4431 DAPLKA_RS_REF(mw_rp);
4445 4432
4446 4433 mutex_enter(&mw_rp->mw_lock);
4447 4434 mw_hdl = mw_rp->mw_hdl;
4448 4435 /*
4449 4436 * we set mw_hdl to NULL so it won't get freed again
4450 4437 */
4451 4438 mw_rp->mw_hdl = NULL;
4452 4439 mutex_exit(&mw_rp->mw_lock);
4453 4440
4454 4441 if (mw_hdl != NULL) {
4455 4442 status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl, mw_hdl);
4456 4443 if (status != IBT_SUCCESS) {
4457 4444 DERR("mr_cb_freemw: ibt_free_mw returned %d\n", status);
4458 4445 }
4459 4446 D3("mr_cb_freemw: mw freed\n");
4460 4447 }
4461 4448
4462 4449 DAPLKA_RS_UNREF(mw_rp);
4463 4450 return (0);
4464 4451 }
4465 4452
4466 4453 /*
4467 4454 * This routine is called from HCA driver's umem lock undo callback
4468 4455 * when the memory associated with an MR is being unmapped. In this callback
4469 4456 * we free all the MW associated with the IA and post an unaffiliated
4470 4457 * async event to tell the app that there was a catastrophic event.
4471 4458 * This allows the HCA to deregister the MR in its callback processing.
4472 4459 */
4473 4460 static void
4474 4461 daplka_pre_mr_cleanup_callback(void *arg1, void *arg2 /*ARGSUSED*/)
4475 4462 {
4476 4463 daplka_mr_resource_t *mr_rp;
4477 4464 daplka_ia_resource_t *ia_rp;
4478 4465 #ifdef _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4479 4466 ibt_async_event_t event;
4480 4467 ibt_hca_attr_t *hca_attrp;
4481 4468 #endif
4482 4469 minor_t rnum;
4483 4470
4484 4471 mr_rp = (daplka_mr_resource_t *)arg1;
4485 4472 rnum = DAPLKA_RS_RNUM(mr_rp);
4486 4473 daplka_shared_mr_free(mr_rp);
4487 4474
4488 4475 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
4489 4476 if (ia_rp == NULL) {
4490 4477 DERR("daplka_mr_unlock_callback: resource not found, rnum %d\n",
4491 4478 rnum);
4492 4479 return;
4493 4480 }
4494 4481
4495 4482 DERR("daplka_mr_unlock_callback: resource(%p) rnum(%d)\n", ia_rp, rnum);
4496 4483
4497 4484 mutex_enter(&ia_rp->ia_lock);
4498 4485 /*
4499 4486 * MW is being alloced OR MW freeze has already begun. In
4500 4487 * both these cases we wait for that to complete before
4501 4488 * continuing.
4502 4489 */
4503 4490 while ((ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS) ||
4504 4491 (ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS)) {
4505 4492 cv_wait(&ia_rp->ia_cv, &ia_rp->ia_lock);
4506 4493 }
4507 4494
4508 4495 switch (ia_rp->ia_state) {
4509 4496 case DAPLKA_IA_INIT:
4510 4497 ia_rp->ia_state = DAPLKA_IA_MW_FREEZE_IN_PROGRESS;
4511 4498 mutex_exit(&ia_rp->ia_lock);
4512 4499 break;
4513 4500 case DAPLKA_IA_MW_FROZEN:
4514 4501 /* the mw on this ia have been freed */
4515 4502 D2("daplka_mr_unlock_callback: ia_state %d nothing to do\n",
4516 4503 ia_rp->ia_state);
4517 4504 mutex_exit(&ia_rp->ia_lock);
4518 4505 goto cleanup;
4519 4506 default:
4520 4507 ASSERT(!"daplka_mr_unlock_callback: IA state invalid");
4521 4508 DERR("daplka_mr_unlock_callback: invalid ia_state %d\n",
4522 4509 ia_rp->ia_state);
4523 4510 mutex_exit(&ia_rp->ia_lock);
4524 4511 goto cleanup;
4525 4512 }
4526 4513
4527 4514 /*
4528 4515 * Walk the mw hash table and free the mws. Acquire a writer
4529 4516 * lock since we don't want anyone else traversing this tree
4530 4517 * while we are freeing the MW.
4531 4518 */
4532 4519 daplka_hash_walk(&ia_rp->ia_mw_htbl, daplka_mr_cb_freemw, NULL,
4533 4520 RW_WRITER);
4534 4521
4535 4522 mutex_enter(&ia_rp->ia_lock);
4536 4523 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS);
4537 4524 ia_rp->ia_state = DAPLKA_IA_MW_FROZEN;
4538 4525 cv_broadcast(&ia_rp->ia_cv);
4539 4526 mutex_exit(&ia_rp->ia_lock);
4540 4527
4541 4528 /*
4542 4529 * Currently commented out because Oracle skgxp is incapable
4543 4530 * of handling async events correctly.
4544 4531 */
4545 4532 #ifdef _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4546 4533 /*
4547 4534 * Enqueue an unaffiliated async error event to indicate this
4548 4535 * IA has encountered a problem that caused the MW to freed up
4549 4536 */
4550 4537
4551 4538 /* Create a fake event, only relevant field is the hca_guid */
4552 4539 bzero(&event, sizeof (ibt_async_event_t));
4553 4540 hca_attrp = &ia_rp->ia_hca->hca_attr;
4554 4541 event.ev_hca_guid = hca_attrp->hca_node_guid;
4555 4542
4556 4543 daplka_async_event_create(IBT_ERROR_LOCAL_CATASTROPHIC, &event, 0,
4557 4544 ia_rp);
4558 4545 #endif /* _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB */
4559 4546
4560 4547 cleanup:;
4561 4548 D2("daplka_mr_unlock_callback: resource(%p) done\n", ia_rp);
4562 4549 DAPLKA_RS_UNREF(ia_rp);
4563 4550 }
4564 4551
4565 4552 /*
4566 4553 * registers a memory region.
4567 4554 * memory locking will be done by the HCA driver.
4568 4555 */
4569 4556 /* ARGSUSED */
4570 4557 static int
4571 4558 daplka_mr_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4572 4559 cred_t *cred, int *rvalp)
4573 4560 {
4574 4561 boolean_t inserted = B_FALSE;
4575 4562 daplka_mr_resource_t *mr_rp;
4576 4563 daplka_pd_resource_t *pd_rp;
4577 4564 dapl_mr_register_t args;
4578 4565 ibt_mr_data_in_t mr_cb_data_in;
4579 4566 uint64_t mr_hkey = 0;
4580 4567 ibt_status_t status;
4581 4568 int retval;
4582 4569
4583 4570 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_register_t),
↓ open down ↓ |
372 lines elided |
↑ open up ↑ |
4584 4571 mode);
4585 4572 if (retval != 0) {
4586 4573 DERR("mr_register: copyin error %d\n", retval);
4587 4574 return (EINVAL);
4588 4575 }
4589 4576 mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
4590 4577 if (mr_rp == NULL) {
4591 4578 DERR("mr_register: cannot allocate mr resource\n");
4592 4579 return (ENOMEM);
4593 4580 }
4594 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
4595 4581 DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
4596 4582 DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
4597 4583
4598 4584 mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
4599 4585 mr_rp->mr_hca = ia_rp->ia_hca;
4600 4586 mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
4601 4587 mr_rp->mr_next = NULL;
4602 4588 mr_rp->mr_shared_mr = NULL;
4603 4589
4604 4590 /* get pd handle */
4605 4591 pd_rp = (daplka_pd_resource_t *)
4606 4592 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mr_pd_hkey);
4607 4593 if (pd_rp == NULL) {
4608 4594 DERR("mr_register: cannot find pd resource\n");
4609 4595 retval = EINVAL;
4610 4596 goto cleanup;
4611 4597 }
4612 4598 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
4613 4599 mr_rp->mr_pd_res = pd_rp;
4614 4600
4615 4601 mr_rp->mr_attr.mr_vaddr = args.mr_vaddr;
4616 4602 mr_rp->mr_attr.mr_len = args.mr_len;
4617 4603 mr_rp->mr_attr.mr_as = curproc->p_as;
4618 4604 mr_rp->mr_attr.mr_flags = args.mr_flags | IBT_MR_NOSLEEP;
4619 4605
4620 4606 D3("mr_register: mr_vaddr %p, mr_len %llu, mr_flags 0x%x\n",
4621 4607 (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
4622 4608 (longlong_t)mr_rp->mr_attr.mr_len,
4623 4609 mr_rp->mr_attr.mr_flags);
4624 4610
4625 4611 status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
4626 4612 mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr, &mr_rp->mr_hdl,
4627 4613 &mr_rp->mr_desc);
4628 4614
4629 4615 if (status != IBT_SUCCESS) {
4630 4616 DERR("mr_register: ibt_register_mr error %d\n", status);
4631 4617 *rvalp = (int)status;
4632 4618 retval = 0;
4633 4619 goto cleanup;
4634 4620 }
4635 4621
4636 4622 mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
4637 4623 mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
4638 4624 mr_cb_data_in.mr_arg1 = (void *)mr_rp;
4639 4625 mr_cb_data_in.mr_arg2 = NULL;
4640 4626
4641 4627 /* Pass the service driver mr cleanup handler to the hca driver */
4642 4628 status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
4643 4629 IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
4644 4630 &mr_cb_data_in, sizeof (mr_cb_data_in));
4645 4631
4646 4632 if (status != IBT_SUCCESS) {
4647 4633 DERR("mr_register: ibt_ci_data_in error(%d) ver(%d)",
4648 4634 status, mr_cb_data_in.mr_rev);
4649 4635 *rvalp = (int)status;
4650 4636 retval = 0;
4651 4637 goto cleanup;
↓ open down ↓ |
47 lines elided |
↑ open up ↑ |
4652 4638 }
4653 4639
4654 4640 /* insert into mr hash table */
4655 4641 retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
4656 4642 &mr_hkey, (void *)mr_rp);
4657 4643 if (retval != 0) {
4658 4644 DERR("mr_register: cannot insert mr resource into mr_htbl\n");
4659 4645 goto cleanup;
4660 4646 }
4661 4647 inserted = B_TRUE;
4662 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))
4663 4648
4664 4649 args.mr_lkey = mr_rp->mr_desc.md_lkey;
4665 4650 args.mr_rkey = mr_rp->mr_desc.md_rkey;
4666 4651 args.mr_hkey = mr_hkey;
4667 4652
4668 4653 retval = ddi_copyout((void *)&args, (void *)arg,
4669 4654 sizeof (dapl_mr_register_t), mode);
4670 4655 if (retval != 0) {
4671 4656 DERR("mr_register: copyout error %d\n", retval);
4672 4657 retval = EFAULT;
4673 4658 goto cleanup;
4674 4659 }
4675 4660 return (0);
4676 4661
4677 4662 cleanup:;
4678 4663 if (inserted) {
4679 4664 daplka_mr_resource_t *free_rp = NULL;
4680 4665
4681 4666 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
4682 4667 (void **)&free_rp);
4683 4668 if (free_rp != mr_rp) {
4684 4669 DERR("mr_register: cannot remove mr from hash table\n");
4685 4670 /*
4686 4671 * we can only get here if another thread
4687 4672 * has completed the cleanup in mr_deregister
4688 4673 */
4689 4674 return (retval);
4690 4675 }
4691 4676 }
4692 4677 DAPLKA_RS_UNREF(mr_rp);
4693 4678 return (retval);
4694 4679 }
4695 4680
4696 4681 /*
4697 4682 * registers a shared memory region.
4698 4683 * the client calls this function with the intention to share the memory
4699 4684 * region with other clients. it is assumed that, prior to calling this
4700 4685 * function, the client(s) are already sharing parts of their address
4701 4686 * space using a mechanism such as SYSV shared memory. the first client
4702 4687 * that calls this function will create and insert a daplka_shared_mr_t
4703 4688 * object into the global daplka_shared_mr_tree. this shared mr object
4704 4689 * will be identified by a unique 40-byte key and will maintain a list
4705 4690 * of mr resources. every time this function gets called with the same
4706 4691 * 40-byte key, a new mr resource (containing a new mr handle generated
4707 4692 * by ibt_register_mr or ibt_register_shared_mr) is created and inserted
4708 4693 * into this list. similarly, every time a shared mr gets deregistered
4709 4694 * or invalidated by a callback, the mr resource gets removed from this
4710 4695 * list. the shared mr object has a reference count. when it drops to
4711 4696 * zero, the shared mr object will be removed from the global avl tree
4712 4697 * and be freed.
4713 4698 */
4714 4699 /* ARGSUSED */
4715 4700 static int
4716 4701 daplka_mr_register_shared(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4717 4702 cred_t *cred, int *rvalp)
4718 4703 {
4719 4704 dapl_mr_register_shared_t args;
4720 4705 daplka_shared_mr_t *smrp = NULL;
4721 4706 daplka_shared_mr_t tmp_smr;
4722 4707 ibt_mr_data_in_t mr_cb_data_in;
4723 4708 avl_index_t where;
4724 4709 boolean_t inserted = B_FALSE;
4725 4710 daplka_mr_resource_t *mr_rp = NULL;
4726 4711 daplka_pd_resource_t *pd_rp;
4727 4712 uint64_t mr_hkey = 0;
4728 4713 ibt_status_t status;
4729 4714 int retval;
4730 4715
4731 4716 retval = ddi_copyin((void *)arg, &args,
4732 4717 sizeof (dapl_mr_register_shared_t), mode);
4733 4718 if (retval != 0) {
4734 4719 DERR("mr_register_shared: copyin error %d\n", retval);
4735 4720 return (EINVAL);
4736 4721 }
4737 4722
4738 4723 mutex_enter(&daplka_shared_mr_lock);
4739 4724 /*
4740 4725 * find smrp from the global avl tree.
4741 4726 * the 40-byte key is used as the lookup key.
4742 4727 */
4743 4728 tmp_smr.smr_cookie = args.mrs_shm_cookie;
4744 4729 smrp = (daplka_shared_mr_t *)
4745 4730 avl_find(&daplka_shared_mr_tree, &tmp_smr, &where);
4746 4731 if (smrp != NULL) {
4747 4732 D2("mr_register_shared: smrp 0x%p, found cookie:\n"
4748 4733 "0x%016llx%016llx%016llx%016llx%016llx\n", smrp,
4749 4734 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
4750 4735 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
4751 4736 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
4752 4737 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
4753 4738 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);
4754 4739
4755 4740 /*
4756 4741 * if the smrp exists, other threads could still be
4757 4742 * accessing it. we wait until they are done before
4758 4743 * we continue.
4759 4744 */
4760 4745 smrp->smr_refcnt++;
4761 4746 while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
4762 4747 D2("mr_register_shared: smrp 0x%p, "
4763 4748 "waiting in transitioning state, refcnt %d\n",
4764 4749 smrp, smrp->smr_refcnt);
4765 4750 cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
4766 4751 }
4767 4752 ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
4768 4753 D2("mr_register_shared: smrp 0x%p, refcnt %d, ready\n",
4769 4754 smrp, smrp->smr_refcnt);
4770 4755
4771 4756 /*
4772 4757 * we set smr_state to TRANSITIONING to temporarily
4773 4758 * prevent other threads from trying to access smrp.
4774 4759 */
4775 4760 smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
4776 4761 } else {
4777 4762 D2("mr_register_shared: cannot find cookie:\n"
4778 4763 "0x%016llx%016llx%016llx%016llx%016llx\n",
4779 4764 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
4780 4765 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
4781 4766 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
4782 4767 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
4783 4768 (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);
4784 4769
4785 4770 /*
↓ open down ↓ |
113 lines elided |
↑ open up ↑ |
4786 4771 * if we cannot find smrp, we need to create and
4787 4772 * insert one into daplka_shared_mr_tree
4788 4773 */
4789 4774 smrp = kmem_zalloc(sizeof (daplka_shared_mr_t),
4790 4775 daplka_km_flags);
4791 4776 if (smrp == NULL) {
4792 4777 retval = ENOMEM;
4793 4778 mutex_exit(&daplka_shared_mr_lock);
4794 4779 goto cleanup;
4795 4780 }
4796 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp))
4797 4781 smrp->smr_refcnt = 1;
4798 4782 smrp->smr_cookie = args.mrs_shm_cookie;
4799 4783 smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
4800 4784 smrp->smr_mr_list = NULL;
4801 4785 cv_init(&smrp->smr_cv, NULL, CV_DRIVER, NULL);
4802 4786 avl_insert(&daplka_shared_mr_tree, smrp, where);
4803 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*smrp))
4804 4787 }
4805 4788 mutex_exit(&daplka_shared_mr_lock);
4806 4789
4807 4790 mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
4808 4791 if (mr_rp == NULL) {
4809 4792 DERR("mr_register_shared: cannot allocate mr resource\n");
4810 4793 goto cleanup;
4811 4794 }
4812 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
4813 4795 DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
4814 4796 DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
4815 4797
4816 4798 mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
4817 4799 mr_rp->mr_hca = ia_rp->ia_hca;
4818 4800 mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
4819 4801 mr_rp->mr_next = NULL;
4820 4802 mr_rp->mr_shared_mr = NULL;
4821 4803
4822 4804 /* get pd handle */
4823 4805 pd_rp = (daplka_pd_resource_t *)
4824 4806 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mrs_pd_hkey);
4825 4807 if (pd_rp == NULL) {
4826 4808 DERR("mr_register_shared: cannot find pd resource\n");
4827 4809 retval = EINVAL;
4828 4810 goto cleanup;
4829 4811 }
4830 4812 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
4831 4813 mr_rp->mr_pd_res = pd_rp;
4832 4814
4833 4815 mr_rp->mr_attr.mr_vaddr = args.mrs_vaddr;
4834 4816 mr_rp->mr_attr.mr_len = args.mrs_len;
4835 4817 mr_rp->mr_attr.mr_flags = args.mrs_flags | IBT_MR_NOSLEEP;
4836 4818 mr_rp->mr_attr.mr_as = curproc->p_as;
4837 4819
4838 4820 D2("mr_register_shared: mr_vaddr 0x%p, mr_len %llu, "
4839 4821 "mr_flags 0x%x, mr_as 0x%p, mr_exists %d, smrp 0x%p\n",
4840 4822 (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
4841 4823 (longlong_t)mr_rp->mr_attr.mr_len,
4842 4824 mr_rp->mr_attr.mr_flags, mr_rp->mr_attr.mr_as,
4843 4825 (int)(smrp->smr_mr_list != NULL), smrp);
4844 4826
4845 4827 /*
4846 4828 * since we are in TRANSITIONING state, we are guaranteed
4847 4829 * that we have exclusive access to smr_mr_list.
4848 4830 */
4849 4831 if (smrp->smr_mr_list != NULL) {
4850 4832 ibt_smr_attr_t mem_sattr;
4851 4833
4852 4834 /*
4853 4835 * a non-null smr_mr_list indicates that someone
4854 4836 * else has already inserted an mr_resource into
4855 4837 * smr_mr_list. we use the mr_handle from the first
4856 4838 * element as an arg to ibt_register_shared_mr.
4857 4839 */
4858 4840 mem_sattr.mr_vaddr = smrp->smr_mr_list->mr_desc.md_vaddr;
4859 4841 mem_sattr.mr_flags = mr_rp->mr_attr.mr_flags;
4860 4842
4861 4843 D2("mr_register_shared: mem_sattr vaddr 0x%p flags 0x%x\n",
4862 4844 (void *)(uintptr_t)mem_sattr.mr_vaddr, mem_sattr.mr_flags);
4863 4845 status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
4864 4846 smrp->smr_mr_list->mr_hdl, mr_rp->mr_pd_res->pd_hdl,
4865 4847 &mem_sattr, &mr_rp->mr_hdl, &mr_rp->mr_desc);
4866 4848
4867 4849 if (status != IBT_SUCCESS) {
4868 4850 DERR("mr_register_shared: "
4869 4851 "ibt_register_shared_mr error %d\n", status);
4870 4852 *rvalp = (int)status;
4871 4853 retval = 0;
4872 4854 goto cleanup;
4873 4855 }
4874 4856 } else {
4875 4857 /*
4876 4858 * an mr does not exist yet. we need to create one
4877 4859 * using ibt_register_mr.
4878 4860 */
4879 4861 status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
4880 4862 mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr,
4881 4863 &mr_rp->mr_hdl, &mr_rp->mr_desc);
4882 4864
4883 4865 if (status != IBT_SUCCESS) {
4884 4866 DERR("mr_register_shared: "
4885 4867 "ibt_register_mr error %d\n", status);
4886 4868 *rvalp = (int)status;
4887 4869 retval = 0;
4888 4870 goto cleanup;
4889 4871 }
4890 4872 }
4891 4873
4892 4874 mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
4893 4875 mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
4894 4876 mr_cb_data_in.mr_arg1 = (void *)mr_rp;
4895 4877 mr_cb_data_in.mr_arg2 = NULL;
4896 4878
4897 4879 /* Pass the service driver mr cleanup handler to the hca driver */
4898 4880 status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
4899 4881 IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
4900 4882 &mr_cb_data_in, sizeof (mr_cb_data_in));
4901 4883
4902 4884 if (status != IBT_SUCCESS) {
4903 4885 DERR("mr_register_shared: ibt_ci_data_in error(%d) ver(%d)",
4904 4886 status, mr_cb_data_in.mr_rev);
4905 4887 *rvalp = (int)status;
4906 4888 retval = 0;
4907 4889 goto cleanup;
4908 4890 }
4909 4891
4910 4892 /*
4911 4893 * we bump reference of mr_rp and enqueue it onto smrp.
4912 4894 */
4913 4895 DAPLKA_RS_REF(mr_rp);
4914 4896 mr_rp->mr_next = smrp->smr_mr_list;
4915 4897 smrp->smr_mr_list = mr_rp;
↓ open down ↓ |
93 lines elided |
↑ open up ↑ |
4916 4898 mr_rp->mr_shared_mr = smrp;
4917 4899
4918 4900 /* insert into mr hash table */
4919 4901 retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
4920 4902 &mr_hkey, (void *)mr_rp);
4921 4903 if (retval != 0) {
4922 4904 DERR("mr_register_shared: cannot insert mr resource\n");
4923 4905 goto cleanup;
4924 4906 }
4925 4907 inserted = B_TRUE;
4926 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))
4927 4908
4928 4909 /*
4929 4910 * at this point, there are two references to our mr resource.
4930 4911 * one is kept in ia_mr_htbl. the other is kept in the list
4931 4912 * within this shared mr object (smrp). when we deregister this
4932 4913 * mr or when a callback invalidates this mr, the reference kept
4933 4914 * by this shared mr object will be removed.
4934 4915 */
4935 4916
4936 4917 args.mrs_lkey = mr_rp->mr_desc.md_lkey;
4937 4918 args.mrs_rkey = mr_rp->mr_desc.md_rkey;
4938 4919 args.mrs_hkey = mr_hkey;
4939 4920
4940 4921 retval = ddi_copyout((void *)&args, (void *)arg,
4941 4922 sizeof (dapl_mr_register_shared_t), mode);
4942 4923 if (retval != 0) {
4943 4924 DERR("mr_register_shared: copyout error %d\n", retval);
4944 4925 retval = EFAULT;
4945 4926 goto cleanup;
4946 4927 }
4947 4928
4948 4929 /*
4949 4930 * set the state to READY to allow others to continue
4950 4931 */
4951 4932 mutex_enter(&daplka_shared_mr_lock);
4952 4933 smrp->smr_state = DAPLKA_SMR_READY;
4953 4934 cv_broadcast(&smrp->smr_cv);
4954 4935 mutex_exit(&daplka_shared_mr_lock);
4955 4936 return (0);
4956 4937
4957 4938 cleanup:;
4958 4939 if (inserted) {
4959 4940 daplka_mr_resource_t *free_rp = NULL;
4960 4941
4961 4942 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
4962 4943 (void **)&free_rp);
4963 4944 if (free_rp != mr_rp) {
4964 4945 DERR("mr_register_shared: "
4965 4946 "cannot remove mr from hash table\n");
4966 4947 /*
4967 4948 * we can only get here if another thread
4968 4949 * has completed the cleanup in mr_deregister
4969 4950 */
4970 4951 return (retval);
↓ open down ↓ |
34 lines elided |
↑ open up ↑ |
4971 4952 }
4972 4953 }
4973 4954 if (smrp != NULL) {
4974 4955 mutex_enter(&daplka_shared_mr_lock);
4975 4956 ASSERT(smrp->smr_refcnt > 0);
4976 4957 smrp->smr_refcnt--;
4977 4958
4978 4959 if (smrp->smr_refcnt == 0) {
4979 4960 DERR("mr_register_shared: freeing smrp 0x%p\n", smrp);
4980 4961 avl_remove(&daplka_shared_mr_tree, smrp);
4981 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp))
4982 4962 if (smrp->smr_mr_list != NULL) {
4983 4963 /*
4984 4964 * the refcnt is 0. if there is anything
4985 4965 * left on the list, it must be ours.
4986 4966 */
4987 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
4988 4967 ASSERT(smrp->smr_mr_list == mr_rp);
4989 4968 DAPLKA_RS_UNREF(mr_rp);
4990 4969 smrp->smr_mr_list = NULL;
4991 4970 ASSERT(mr_rp->mr_shared_mr == smrp);
4992 4971 mr_rp->mr_shared_mr = NULL;
4993 4972 ASSERT(mr_rp->mr_next == NULL);
4994 4973 }
4995 4974 smrp->smr_state = DAPLKA_SMR_FREED;
4996 4975 cv_destroy(&smrp->smr_cv);
4997 4976 kmem_free(smrp, sizeof (daplka_shared_mr_t));
4998 4977 } else {
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
4999 4978 DERR("mr_register_shared: resetting smr_state "
5000 4979 "smrp 0x%p, %d waiters remain\n", smrp,
5001 4980 smrp->smr_refcnt);
5002 4981 ASSERT(smrp->smr_state == DAPLKA_SMR_TRANSITIONING);
5003 4982 if (smrp->smr_mr_list != NULL && mr_rp != NULL) {
5004 4983 daplka_mr_resource_t **mpp;
5005 4984
5006 4985 /*
5007 4986 * search and remove mr_rp from smr_mr_list
5008 4987 */
5009 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
5010 4988 mpp = &smrp->smr_mr_list;
5011 4989 while (*mpp != NULL) {
5012 4990 if (*mpp == mr_rp) {
5013 4991 *mpp = (*mpp)->mr_next;
5014 4992 DAPLKA_RS_UNREF(mr_rp);
5015 4993 ASSERT(mr_rp->mr_shared_mr ==
5016 4994 smrp);
5017 4995 mr_rp->mr_shared_mr = NULL;
5018 4996 mr_rp->mr_next = NULL;
5019 4997 break;
5020 4998 }
5021 4999 mpp = &(*mpp)->mr_next;
5022 5000 }
5023 5001 }
5024 5002 /*
5025 5003 * note that smr_state == READY does not necessarily
5026 5004 * mean that smr_mr_list is non empty. for this case,
5027 5005 * we are doing cleanup because of a failure. we set
5028 5006 * the state to READY to allow other threads to
5029 5007 * continue.
5030 5008 */
5031 5009 smrp->smr_state = DAPLKA_SMR_READY;
5032 5010 cv_broadcast(&smrp->smr_cv);
5033 5011 }
5034 5012 mutex_exit(&daplka_shared_mr_lock);
5035 5013 }
5036 5014 if (mr_rp != NULL) {
5037 5015 DAPLKA_RS_UNREF(mr_rp);
5038 5016 }
5039 5017 return (retval);
5040 5018 }
5041 5019
5042 5020 /*
5043 5021 * registers a memory region using the attributes of an
5044 5022 * existing region.
5045 5023 */
5046 5024 /* ARGSUSED */
5047 5025 static int
5048 5026 daplka_mr_register_lmr(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5049 5027 cred_t *cred, int *rvalp)
5050 5028 {
5051 5029 boolean_t inserted = B_FALSE;
5052 5030 dapl_mr_register_lmr_t args;
5053 5031 ibt_mr_data_in_t mr_cb_data_in;
5054 5032 daplka_mr_resource_t *orig_mr_rp = NULL;
5055 5033 daplka_mr_resource_t *mr_rp;
5056 5034 ibt_smr_attr_t mem_sattr;
5057 5035 uint64_t mr_hkey = 0;
5058 5036 ibt_status_t status;
5059 5037 int retval;
5060 5038
5061 5039 retval = ddi_copyin((void *)arg, &args,
5062 5040 sizeof (dapl_mr_register_lmr_t), mode);
5063 5041 if (retval != 0) {
5064 5042 DERR("mr_register_lmr: copyin error %d\n", retval);
5065 5043 return (EINVAL);
5066 5044 }
5067 5045 orig_mr_rp = (daplka_mr_resource_t *)
5068 5046 daplka_hash_lookup(&ia_rp->ia_mr_htbl, args.mrl_orig_hkey);
5069 5047 if (orig_mr_rp == NULL) {
5070 5048 DERR("mr_register_lmr: cannot find mr resource\n");
↓ open down ↓ |
51 lines elided |
↑ open up ↑ |
5071 5049 return (EINVAL);
5072 5050 }
5073 5051 ASSERT(DAPLKA_RS_TYPE(orig_mr_rp) == DAPL_TYPE_MR);
5074 5052
5075 5053 mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
5076 5054 if (mr_rp == NULL) {
5077 5055 DERR("mr_register_lmr: cannot allocate mr resource\n");
5078 5056 retval = ENOMEM;
5079 5057 goto cleanup;
5080 5058 }
5081 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
5082 5059 DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
5083 5060 DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
5084 5061
5085 5062 mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
5086 5063 mr_rp->mr_hca = ia_rp->ia_hca;
5087 5064 mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
5088 5065 mr_rp->mr_next = NULL;
5089 5066 mr_rp->mr_shared_mr = NULL;
5090 5067
5091 5068 DAPLKA_RS_REF(orig_mr_rp->mr_pd_res);
5092 5069 mr_rp->mr_pd_res = orig_mr_rp->mr_pd_res;
5093 5070 mr_rp->mr_attr = orig_mr_rp->mr_attr;
5094 5071
5095 5072 /* Pass the IO addr that was returned while allocating the orig MR */
5096 5073 mem_sattr.mr_vaddr = orig_mr_rp->mr_desc.md_vaddr;
5097 5074 mem_sattr.mr_flags = args.mrl_flags | IBT_MR_NOSLEEP;
5098 5075
5099 5076 status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
5100 5077 orig_mr_rp->mr_hdl, mr_rp->mr_pd_res->pd_hdl, &mem_sattr,
5101 5078 &mr_rp->mr_hdl, &mr_rp->mr_desc);
5102 5079
5103 5080 if (status != IBT_SUCCESS) {
5104 5081 DERR("mr_register_lmr: ibt_register_shared_mr error %d\n",
5105 5082 status);
5106 5083 *rvalp = (int)status;
5107 5084 retval = 0;
5108 5085 goto cleanup;
5109 5086 }
5110 5087
5111 5088 mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
5112 5089 mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
5113 5090 mr_cb_data_in.mr_arg1 = (void *)mr_rp;
5114 5091 mr_cb_data_in.mr_arg2 = NULL;
5115 5092
5116 5093 /* Pass the service driver mr cleanup handler to the hca driver */
5117 5094 status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
5118 5095 IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
5119 5096 &mr_cb_data_in, sizeof (mr_cb_data_in));
5120 5097
5121 5098 if (status != IBT_SUCCESS) {
5122 5099 DERR("mr_register_lmr: ibt_ci_data_in error(%d) ver(%d)",
5123 5100 status, mr_cb_data_in.mr_rev);
5124 5101 *rvalp = (int)status;
5125 5102 retval = 0;
5126 5103 goto cleanup;
5127 5104 }
5128 5105 mr_rp->mr_attr.mr_len = orig_mr_rp->mr_attr.mr_len;
↓ open down ↓ |
37 lines elided |
↑ open up ↑ |
5129 5106 mr_rp->mr_attr.mr_flags = mem_sattr.mr_flags;
5130 5107
5131 5108 /* insert into mr hash table */
5132 5109 retval = daplka_hash_insert(&ia_rp->ia_mr_htbl, &mr_hkey,
5133 5110 (void *)mr_rp);
5134 5111 if (retval != 0) {
5135 5112 DERR("mr_register: cannot insert mr resource into mr_htbl\n");
5136 5113 goto cleanup;
5137 5114 }
5138 5115 inserted = B_TRUE;
5139 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))
5140 5116
5141 5117 args.mrl_lkey = mr_rp->mr_desc.md_lkey;
5142 5118 args.mrl_rkey = mr_rp->mr_desc.md_rkey;
5143 5119 args.mrl_hkey = mr_hkey;
5144 5120
5145 5121 retval = ddi_copyout((void *)&args, (void *)arg,
5146 5122 sizeof (dapl_mr_register_lmr_t), mode);
5147 5123 if (retval != 0) {
5148 5124 DERR("mr_register_lmr: copyout error %d\n", retval);
5149 5125 retval = EFAULT;
5150 5126 goto cleanup;
5151 5127 }
5152 5128 if (orig_mr_rp != NULL) {
5153 5129 DAPLKA_RS_UNREF(orig_mr_rp);
5154 5130 }
5155 5131 return (0);
5156 5132
5157 5133 cleanup:;
5158 5134 if (inserted) {
5159 5135 daplka_mr_resource_t *free_rp = NULL;
5160 5136
5161 5137 (void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
5162 5138 (void **)&free_rp);
5163 5139 if (free_rp != mr_rp) {
5164 5140 DERR("mr_register: cannot remove mr from hash table\n");
5165 5141 /*
5166 5142 * we can only get here if another thread
5167 5143 * has completed the cleanup in mr_deregister
5168 5144 */
5169 5145 return (retval);
5170 5146 }
5171 5147 }
5172 5148 if (orig_mr_rp != NULL) {
5173 5149 DAPLKA_RS_UNREF(orig_mr_rp);
5174 5150 }
5175 5151 if (mr_rp != NULL) {
5176 5152 DAPLKA_RS_UNREF(mr_rp);
5177 5153 }
5178 5154 return (retval);
5179 5155 }
5180 5156
5181 5157 /*
5182 5158 * this function is called by mr_deregister and mr_cleanup_callback to
5183 5159 * remove a mr resource from the shared mr object mr_rp->mr_shared_mr.
5184 5160 * if mr_shared_mr is already NULL, that means the region being
5185 5161 * deregistered or invalidated is not a shared mr region and we can
5186 5162 * return immediately.
5187 5163 */
5188 5164 static void
5189 5165 daplka_shared_mr_free(daplka_mr_resource_t *mr_rp)
5190 5166 {
5191 5167 daplka_shared_mr_t *smrp;
5192 5168
5193 5169 /*
5194 5170 * we need a lock because mr_callback also checks this field.
5195 5171 * for the rare case that mr_deregister and mr_cleanup_callback
5196 5172 * gets called simultaneously, we are guaranteed that smrp won't
5197 5173 * be dereferenced twice because either function will find
5198 5174 * mr_shared_mr to be NULL.
5199 5175 */
5200 5176 mutex_enter(&mr_rp->mr_lock);
5201 5177 smrp = mr_rp->mr_shared_mr;
5202 5178 mr_rp->mr_shared_mr = NULL;
5203 5179 mutex_exit(&mr_rp->mr_lock);
5204 5180
5205 5181 if (smrp != NULL) {
5206 5182 daplka_mr_resource_t **mpp;
5207 5183 boolean_t mr_found = B_FALSE;
5208 5184
5209 5185 mutex_enter(&daplka_shared_mr_lock);
5210 5186 ASSERT(smrp->smr_refcnt > 0);
5211 5187 while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
5212 5188 cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
5213 5189 }
5214 5190 ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
5215 5191 smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
5216 5192 smrp->smr_refcnt--;
5217 5193
5218 5194 /*
5219 5195 * search and remove mr_rp from smr_mr_list.
5220 5196 * also UNREF mr_rp because it is no longer
5221 5197 * on the list.
5222 5198 */
5223 5199 mpp = &smrp->smr_mr_list;
5224 5200 while (*mpp != NULL) {
5225 5201 if (*mpp == mr_rp) {
5226 5202 *mpp = (*mpp)->mr_next;
5227 5203 DAPLKA_RS_UNREF(mr_rp);
5228 5204 mr_rp->mr_next = NULL;
5229 5205 mr_found = B_TRUE;
5230 5206 break;
5231 5207 }
5232 5208 mpp = &(*mpp)->mr_next;
5233 5209 }
5234 5210 /*
5235 5211 * since mr_clean_callback may not touch smr_mr_list
5236 5212 * at this time (due to smr_state), we can be sure
5237 5213 * that we can find and remove mr_rp from smr_mr_list
5238 5214 */
5239 5215 ASSERT(mr_found);
5240 5216 if (smrp->smr_refcnt == 0) {
5241 5217 D3("shared_mr_free: freeing smrp 0x%p\n", smrp);
5242 5218 avl_remove(&daplka_shared_mr_tree, smrp);
5243 5219 ASSERT(smrp->smr_mr_list == NULL);
5244 5220 smrp->smr_state = DAPLKA_SMR_FREED;
5245 5221 cv_destroy(&smrp->smr_cv);
5246 5222 kmem_free(smrp, sizeof (daplka_shared_mr_t));
5247 5223 } else {
5248 5224 D3("shared_mr_free: smrp 0x%p, refcnt %d\n",
5249 5225 smrp, smrp->smr_refcnt);
5250 5226 smrp->smr_state = DAPLKA_SMR_READY;
5251 5227 cv_broadcast(&smrp->smr_cv);
5252 5228 }
5253 5229 mutex_exit(&daplka_shared_mr_lock);
5254 5230 }
5255 5231 }
5256 5232
5257 5233 /*
5258 5234 * deregisters a memory region.
5259 5235 * if mr is shared, remove reference from global shared mr object.
5260 5236 * release the initial reference to the mr. if the mr's refcnt is
5261 5237 * zero, call mr_destroy to free mr.
5262 5238 */
5263 5239 /* ARGSUSED */
5264 5240 static int
5265 5241 daplka_mr_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5266 5242 cred_t *cred, int *rvalp)
5267 5243 {
5268 5244 daplka_mr_resource_t *mr_rp;
5269 5245 dapl_mr_deregister_t args;
5270 5246 int retval;
5271 5247
5272 5248 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_deregister_t),
5273 5249 mode);
5274 5250 if (retval != 0) {
5275 5251 DERR("mr_deregister: copyin error %d\n", retval);
5276 5252 return (EINVAL);
5277 5253 }
5278 5254 retval = daplka_hash_remove(&ia_rp->ia_mr_htbl,
5279 5255 args.mrd_hkey, (void **)&mr_rp);
5280 5256 if (retval != 0 || mr_rp == NULL) {
5281 5257 DERR("mr_deregister: cannot find mr resource\n");
5282 5258 return (EINVAL);
5283 5259 }
5284 5260 ASSERT(DAPLKA_RS_TYPE(mr_rp) == DAPL_TYPE_MR);
5285 5261
5286 5262 daplka_shared_mr_free(mr_rp);
5287 5263 DAPLKA_RS_UNREF(mr_rp);
5288 5264 return (0);
5289 5265 }
5290 5266
5291 5267 /*
5292 5268 * sync local memory regions on RDMA read or write.
5293 5269 */
5294 5270 /* ARGSUSED */
5295 5271 static int
5296 5272 daplka_mr_sync(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5297 5273 cred_t *cred, int *rvalp)
5298 5274 {
5299 5275 dapl_mr_sync_t args;
5300 5276 daplka_mr_resource_t *mr_rp[DAPL_MR_PER_SYNC];
5301 5277 ibt_mr_sync_t mrs[DAPL_MR_PER_SYNC];
5302 5278 uint32_t sync_direction_flags;
5303 5279 ibt_status_t status;
5304 5280 int i, j;
5305 5281 int retval;
5306 5282
5307 5283 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_sync_t), mode);
5308 5284 if (retval != 0) {
5309 5285 DERR("mr_sync: copyin error %d\n", retval);
5310 5286 return (EFAULT);
5311 5287 }
5312 5288
5313 5289 /* number of segments bound check */
5314 5290 if (args.mrs_numseg > DAPL_MR_PER_SYNC) {
5315 5291 DERR("mr_sync: number of segments too large\n");
5316 5292 return (EINVAL);
5317 5293 }
5318 5294
5319 5295 /* translate MR sync direction flag */
5320 5296 if (args.mrs_flags == DAPL_MR_SYNC_RDMA_RD) {
5321 5297 sync_direction_flags = IBT_SYNC_READ;
5322 5298 } else if (args.mrs_flags == DAPL_MR_SYNC_RDMA_WR) {
5323 5299 sync_direction_flags = IBT_SYNC_WRITE;
5324 5300 } else {
5325 5301 DERR("mr_sync: unknown flags\n");
5326 5302 return (EINVAL);
5327 5303 }
5328 5304
5329 5305 /*
5330 5306 * all the segments are going to be sync'd by ibtl together
5331 5307 */
5332 5308 for (i = 0; i < args.mrs_numseg; i++) {
5333 5309 mr_rp[i] = (daplka_mr_resource_t *)daplka_hash_lookup(
5334 5310 &ia_rp->ia_mr_htbl, args.mrs_vec[i].mrsv_hkey);
5335 5311 if (mr_rp[i] == NULL) {
5336 5312 for (j = 0; j < i; j++) {
5337 5313 DAPLKA_RS_UNREF(mr_rp[j]);
5338 5314 }
5339 5315 DERR("mr_sync: lookup error\n");
5340 5316 return (EINVAL);
5341 5317 }
5342 5318 ASSERT(DAPLKA_RS_TYPE(mr_rp[i]) == DAPL_TYPE_MR);
5343 5319 mrs[i].ms_handle = mr_rp[i]->mr_hdl;
5344 5320 mrs[i].ms_vaddr = args.mrs_vec[i].mrsv_va;
5345 5321 mrs[i].ms_len = args.mrs_vec[i].mrsv_len;
5346 5322 mrs[i].ms_flags = sync_direction_flags;
5347 5323 }
5348 5324
5349 5325 status = ibt_sync_mr(ia_rp->ia_hca_hdl, mrs, args.mrs_numseg);
5350 5326 if (status != IBT_SUCCESS) {
5351 5327 DERR("mr_sync: ibt_sync_mr error %d\n", status);
5352 5328 *rvalp = (int)status;
5353 5329 }
5354 5330 for (i = 0; i < args.mrs_numseg; i++) {
5355 5331 DAPLKA_RS_UNREF(mr_rp[i]);
5356 5332 }
5357 5333 return (0);
5358 5334 }
5359 5335
↓ open down ↓ |
210 lines elided |
↑ open up ↑ |
5360 5336 /*
5361 5337 * destroys a memory region.
5362 5338 * called when refcnt drops to zero.
5363 5339 */
5364 5340 static int
5365 5341 daplka_mr_destroy(daplka_resource_t *gen_rp)
5366 5342 {
5367 5343 daplka_mr_resource_t *mr_rp = (daplka_mr_resource_t *)gen_rp;
5368 5344 ibt_status_t status;
5369 5345
5370 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
5371 5346 ASSERT(DAPLKA_RS_REFCNT(mr_rp) == 0);
5372 5347 ASSERT(mr_rp->mr_shared_mr == NULL);
5373 5348 D3("mr_destroy: entering, mr_rp 0x%p, rnum %d\n",
5374 5349 mr_rp, DAPLKA_RS_RNUM(mr_rp));
5375 5350
5376 5351 /*
5377 5352 * deregister mr
5378 5353 */
5379 5354 if (mr_rp->mr_hdl) {
5380 5355 status = daplka_ibt_deregister_mr(mr_rp, mr_rp->mr_hca_hdl,
5381 5356 mr_rp->mr_hdl);
5382 5357 if (status != IBT_SUCCESS) {
5383 5358 DERR("mr_destroy: ibt_deregister_mr returned %d\n",
5384 5359 status);
5385 5360 }
5386 5361 mr_rp->mr_hdl = NULL;
5387 5362 D3("mr_destroy: mr deregistered\n");
5388 5363 }
5389 5364 mr_rp->mr_attr.mr_vaddr = NULL;
5390 5365
5391 5366 /*
5392 5367 * release reference on PD
5393 5368 */
5394 5369 if (mr_rp->mr_pd_res != NULL) {
5395 5370 DAPLKA_RS_UNREF(mr_rp->mr_pd_res);
5396 5371 mr_rp->mr_pd_res = NULL;
5397 5372 }
5398 5373 mutex_destroy(&mr_rp->mr_lock);
5399 5374 DAPLKA_RS_FINI(mr_rp);
5400 5375 kmem_free(mr_rp, sizeof (daplka_mr_resource_t));
5401 5376 D3("mr_destroy: exiting, mr_rp 0x%p\n", mr_rp);
5402 5377 return (0);
5403 5378 }
5404 5379
5405 5380 /*
5406 5381 * this function is called by daplka_hash_destroy for
5407 5382 * freeing MR resource objects
5408 5383 */
5409 5384 static void
5410 5385 daplka_hash_mr_free(void *obj)
5411 5386 {
5412 5387 daplka_mr_resource_t *mr_rp = (daplka_mr_resource_t *)obj;
5413 5388
5414 5389 daplka_shared_mr_free(mr_rp);
5415 5390 DAPLKA_RS_UNREF(mr_rp);
5416 5391 }
5417 5392
5418 5393 /*
5419 5394 * comparison function used for finding a shared mr object
5420 5395 * from the global shared mr avl tree.
5421 5396 */
5422 5397 static int
5423 5398 daplka_shared_mr_cmp(const void *smr1, const void *smr2)
5424 5399 {
5425 5400 daplka_shared_mr_t *s1 = (daplka_shared_mr_t *)smr1;
5426 5401 daplka_shared_mr_t *s2 = (daplka_shared_mr_t *)smr2;
5427 5402 int i;
5428 5403
5429 5404 for (i = 4; i >= 0; i--) {
5430 5405 if (s1->smr_cookie.mc_uint_arr[i] <
5431 5406 s2->smr_cookie.mc_uint_arr[i]) {
5432 5407 return (-1);
5433 5408 }
5434 5409 if (s1->smr_cookie.mc_uint_arr[i] >
5435 5410 s2->smr_cookie.mc_uint_arr[i]) {
5436 5411 return (1);
5437 5412 }
5438 5413 }
5439 5414 return (0);
5440 5415 }
5441 5416
5442 5417 /*
5443 5418 * allocates a protection domain.
5444 5419 */
5445 5420 /* ARGSUSED */
5446 5421 static int
5447 5422 daplka_pd_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5448 5423 cred_t *cred, int *rvalp)
5449 5424 {
5450 5425 dapl_pd_alloc_t args;
5451 5426 daplka_pd_resource_t *pd_rp;
↓ open down ↓ |
71 lines elided |
↑ open up ↑ |
5452 5427 ibt_status_t status;
5453 5428 uint64_t pd_hkey = 0;
5454 5429 boolean_t inserted = B_FALSE;
5455 5430 int retval;
5456 5431
5457 5432 pd_rp = kmem_zalloc(sizeof (*pd_rp), daplka_km_flags);
5458 5433 if (pd_rp == NULL) {
5459 5434 DERR("pd_alloc: cannot allocate pd resource\n");
5460 5435 return (ENOMEM);
5461 5436 }
5462 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp))
5463 5437 DAPLKA_RS_INIT(pd_rp, DAPL_TYPE_PD,
5464 5438 DAPLKA_RS_RNUM(ia_rp), daplka_pd_destroy);
5465 5439
5466 5440 pd_rp->pd_hca = ia_rp->ia_hca;
5467 5441 pd_rp->pd_hca_hdl = ia_rp->ia_hca_hdl;
5468 5442 status = daplka_ibt_alloc_pd(pd_rp, pd_rp->pd_hca_hdl,
5469 5443 IBT_PD_NO_FLAGS, &pd_rp->pd_hdl);
5470 5444 if (status != IBT_SUCCESS) {
5471 5445 DERR("pd_alloc: ibt_alloc_pd returned %d\n", status);
5472 5446 *rvalp = (int)status;
5473 5447 retval = 0;
5474 5448 goto cleanup;
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
5475 5449 }
5476 5450
5477 5451 /* insert into pd hash table */
5478 5452 retval = daplka_hash_insert(&ia_rp->ia_pd_htbl,
5479 5453 &pd_hkey, (void *)pd_rp);
5480 5454 if (retval != 0) {
5481 5455 DERR("pd_alloc: cannot insert pd resource into pd_htbl\n");
5482 5456 goto cleanup;
5483 5457 }
5484 5458 inserted = B_TRUE;
5485 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pd_rp))
5486 5459
5487 5460 /* return hkey to library */
5488 5461 args.pda_hkey = pd_hkey;
5489 5462
5490 5463 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_pd_alloc_t),
5491 5464 mode);
5492 5465 if (retval != 0) {
5493 5466 DERR("pd_alloc: copyout error %d\n", retval);
5494 5467 retval = EFAULT;
5495 5468 goto cleanup;
5496 5469 }
5497 5470 return (0);
5498 5471
5499 5472 cleanup:;
5500 5473 if (inserted) {
5501 5474 daplka_pd_resource_t *free_rp = NULL;
5502 5475
5503 5476 (void) daplka_hash_remove(&ia_rp->ia_pd_htbl, pd_hkey,
5504 5477 (void **)&free_rp);
5505 5478 if (free_rp != pd_rp) {
5506 5479 DERR("pd_alloc: cannot remove pd from hash table\n");
5507 5480 /*
5508 5481 * we can only get here if another thread
5509 5482 * has completed the cleanup in pd_free
5510 5483 */
5511 5484 return (retval);
5512 5485 }
5513 5486 }
5514 5487 DAPLKA_RS_UNREF(pd_rp);
5515 5488 return (retval);
5516 5489 }
5517 5490
↓ open down ↓ |
22 lines elided |
↑ open up ↑ |
5518 5491 /*
5519 5492 * destroys a protection domain.
5520 5493 * called when refcnt drops to zero.
5521 5494 */
5522 5495 static int
5523 5496 daplka_pd_destroy(daplka_resource_t *gen_rp)
5524 5497 {
5525 5498 daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)gen_rp;
5526 5499 ibt_status_t status;
5527 5500
5528 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp))
5529 5501 ASSERT(DAPLKA_RS_REFCNT(pd_rp) == 0);
5530 5502 D3("pd_destroy: entering, pd_rp %p, rnum %d\n",
5531 5503 pd_rp, DAPLKA_RS_RNUM(pd_rp));
5532 5504
5533 5505 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5534 5506 if (pd_rp->pd_hdl != NULL) {
5535 5507 status = daplka_ibt_free_pd(pd_rp, pd_rp->pd_hca_hdl,
5536 5508 pd_rp->pd_hdl);
5537 5509 if (status != IBT_SUCCESS) {
5538 5510 DERR("pd_destroy: ibt_free_pd returned %d\n", status);
5539 5511 }
5540 5512 }
5541 5513 DAPLKA_RS_FINI(pd_rp);
5542 5514 kmem_free(pd_rp, sizeof (daplka_pd_resource_t));
5543 5515 D3("pd_destroy: exiting, pd_rp %p\n", pd_rp);
5544 5516 return (0);
5545 5517 }
5546 5518
5547 5519 static void
5548 5520 daplka_hash_pd_free(void *obj)
5549 5521 {
5550 5522 daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)obj;
5551 5523
5552 5524 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5553 5525 DAPLKA_RS_UNREF(pd_rp);
5554 5526 }
5555 5527
5556 5528 /*
5557 5529 * removes the pd reference from ia_pd_htbl and releases the
5558 5530 * initial reference to the pd. also destroys the pd if the refcnt
5559 5531 * is zero.
5560 5532 */
5561 5533 /* ARGSUSED */
5562 5534 static int
5563 5535 daplka_pd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5564 5536 cred_t *cred, int *rvalp)
5565 5537 {
5566 5538 daplka_pd_resource_t *pd_rp;
5567 5539 dapl_pd_free_t args;
5568 5540 int retval;
5569 5541
5570 5542 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_pd_free_t), mode);
5571 5543 if (retval != 0) {
5572 5544 DERR("pd_free: copyin error %d\n", retval);
5573 5545 return (EINVAL);
5574 5546 }
5575 5547
5576 5548 retval = daplka_hash_remove(&ia_rp->ia_pd_htbl,
5577 5549 args.pdf_hkey, (void **)&pd_rp);
5578 5550 if (retval != 0 || pd_rp == NULL) {
5579 5551 DERR("pd_free: cannot find pd resource\n");
5580 5552 return (EINVAL);
5581 5553 }
5582 5554 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5583 5555
5584 5556 /* UNREF calls the actual free function when refcnt is zero */
5585 5557 DAPLKA_RS_UNREF(pd_rp);
5586 5558 return (0);
5587 5559 }
5588 5560
5589 5561 /*
5590 5562 * allocates a memory window
5591 5563 */
5592 5564 /* ARGSUSED */
5593 5565 static int
5594 5566 daplka_mw_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5595 5567 cred_t *cred, int *rvalp)
5596 5568 {
5597 5569 daplka_pd_resource_t *pd_rp;
5598 5570 daplka_mw_resource_t *mw_rp;
5599 5571 dapl_mw_alloc_t args;
5600 5572 ibt_status_t status;
5601 5573 boolean_t inserted = B_FALSE;
5602 5574 uint64_t mw_hkey;
5603 5575 ibt_rkey_t mw_rkey;
5604 5576 int retval;
5605 5577
5606 5578 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_alloc_t), mode);
5607 5579 if (retval != 0) {
5608 5580 DERR("mw_alloc: copyin error %d\n", retval);
5609 5581 return (EFAULT);
↓ open down ↓ |
71 lines elided |
↑ open up ↑ |
5610 5582 }
5611 5583
5612 5584 /*
5613 5585 * Allocate and initialize a MW resource
5614 5586 */
5615 5587 mw_rp = kmem_zalloc(sizeof (daplka_mw_resource_t), daplka_km_flags);
5616 5588 if (mw_rp == NULL) {
5617 5589 DERR("mw_alloc: cannot allocate mw resource\n");
5618 5590 return (ENOMEM);
5619 5591 }
5620 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp))
5621 5592 DAPLKA_RS_INIT(mw_rp, DAPL_TYPE_MW,
5622 5593 DAPLKA_RS_RNUM(ia_rp), daplka_mw_destroy);
5623 5594
5624 5595 mutex_init(&mw_rp->mw_lock, NULL, MUTEX_DRIVER, NULL);
5625 5596 mw_rp->mw_hca = ia_rp->ia_hca;
5626 5597 mw_rp->mw_hca_hdl = ia_rp->ia_hca_hdl;
5627 5598
5628 5599 /* get pd handle */
5629 5600 pd_rp = (daplka_pd_resource_t *)
5630 5601 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mw_pd_hkey);
5631 5602 if (pd_rp == NULL) {
5632 5603 DERR("mw_alloc: cannot find pd resource\n");
5633 5604 goto cleanup;
5634 5605 }
5635 5606 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5636 5607
5637 5608 mw_rp->mw_pd_res = pd_rp;
5638 5609
5639 5610 status = daplka_ibt_alloc_mw(mw_rp, mw_rp->mw_hca_hdl,
5640 5611 pd_rp->pd_hdl, IBT_MW_NOSLEEP, &mw_rp->mw_hdl, &mw_rkey);
5641 5612
5642 5613 if (status != IBT_SUCCESS) {
5643 5614 DERR("mw_alloc: ibt_alloc_mw returned %d\n", status);
5644 5615 *rvalp = (int)status;
5645 5616 retval = 0;
5646 5617 goto cleanup;
5647 5618 }
5648 5619
5649 5620 mutex_enter(&ia_rp->ia_lock);
5650 5621 switch (ia_rp->ia_state) {
5651 5622 case DAPLKA_IA_INIT:
5652 5623 ia_rp->ia_state = DAPLKA_IA_MW_ALLOC_IN_PROGRESS;
5653 5624 ia_rp->ia_mw_alloccnt++;
5654 5625 retval = 0;
5655 5626 break;
5656 5627 case DAPLKA_IA_MW_ALLOC_IN_PROGRESS:
5657 5628 /* another mw_alloc is already in progress increase cnt */
5658 5629 ia_rp->ia_mw_alloccnt++;
5659 5630 retval = 0;
5660 5631 break;
5661 5632 case DAPLKA_IA_MW_FREEZE_IN_PROGRESS:
5662 5633 /* FALLTHRU */
5663 5634 case DAPLKA_IA_MW_FROZEN:
5664 5635 /*
5665 5636 * IA is being or already frozen don't allow more MWs to be
5666 5637 * allocated.
5667 5638 */
5668 5639 DERR("mw_alloc: IA is freezing MWs (state=%d)\n",
5669 5640 ia_rp->ia_state);
5670 5641 retval = EINVAL;
5671 5642 break;
5672 5643 default:
5673 5644 ASSERT(!"Invalid IA state in mw_alloc");
5674 5645 DERR("mw_alloc: IA state=%d invalid\n", ia_rp->ia_state);
5675 5646 retval = EINVAL;
5676 5647 break;
5677 5648 }
5678 5649 mutex_exit(&ia_rp->ia_lock);
5679 5650 /* retval is 0 when ia_mw_alloccnt is incremented */
5680 5651 if (retval != 0) {
5681 5652 goto cleanup;
5682 5653 }
5683 5654
5684 5655 /* insert into mw hash table */
5685 5656 mw_hkey = 0;
5686 5657 retval = daplka_hash_insert(&ia_rp->ia_mw_htbl, &mw_hkey,
5687 5658 (void *)mw_rp);
5688 5659 if (retval != 0) {
5689 5660 DERR("mw_alloc: cannot insert mw resource into mw_htbl\n");
5690 5661 mutex_enter(&ia_rp->ia_lock);
↓ open down ↓ |
60 lines elided |
↑ open up ↑ |
5691 5662 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
5692 5663 ia_rp->ia_mw_alloccnt--;
5693 5664 if (ia_rp->ia_mw_alloccnt == 0) {
5694 5665 ia_rp->ia_state = DAPLKA_IA_INIT;
5695 5666 cv_broadcast(&ia_rp->ia_cv);
5696 5667 }
5697 5668 mutex_exit(&ia_rp->ia_lock);
5698 5669 goto cleanup;
5699 5670 }
5700 5671 inserted = B_TRUE;
5701 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mw_rp))
5702 5672
5703 5673 D3("mw_alloc: ibt_alloc_mw mw_hdl(%p) mw_rkey(0x%llx)\n",
5704 5674 mw_rp->mw_hdl, (longlong_t)mw_rkey);
5705 5675
5706 5676 mutex_enter(&ia_rp->ia_lock);
5707 5677 /*
5708 5678 * We are done with mw_alloc if this was the last mw_alloc
5709 5679 * change state back to DAPLKA_IA_INIT and wake up waiters
5710 5680 * specifically the unlock callback.
5711 5681 */
5712 5682 ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
5713 5683 ia_rp->ia_mw_alloccnt--;
5714 5684 if (ia_rp->ia_mw_alloccnt == 0) {
5715 5685 ia_rp->ia_state = DAPLKA_IA_INIT;
5716 5686 cv_broadcast(&ia_rp->ia_cv);
5717 5687 }
5718 5688 mutex_exit(&ia_rp->ia_lock);
5719 5689
5720 5690 args.mw_hkey = mw_hkey;
5721 5691 args.mw_rkey = mw_rkey;
5722 5692
5723 5693 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_mw_alloc_t),
5724 5694 mode);
5725 5695 if (retval != 0) {
5726 5696 DERR("mw_alloc: copyout error %d\n", retval);
5727 5697 retval = EFAULT;
5728 5698 goto cleanup;
5729 5699 }
5730 5700 return (0);
5731 5701
5732 5702 cleanup:;
5733 5703 if (inserted) {
5734 5704 daplka_mw_resource_t *free_rp = NULL;
5735 5705
5736 5706 (void) daplka_hash_remove(&ia_rp->ia_mw_htbl, mw_hkey,
5737 5707 (void **)&free_rp);
5738 5708 if (free_rp != mw_rp) {
5739 5709 DERR("mw_alloc: cannot remove mw from hash table\n");
5740 5710 /*
5741 5711 * we can only get here if another thread
5742 5712 * has completed the cleanup in mw_free
5743 5713 */
5744 5714 return (retval);
5745 5715 }
5746 5716 }
5747 5717 DAPLKA_RS_UNREF(mw_rp);
5748 5718 return (retval);
5749 5719 }
5750 5720
5751 5721 /*
5752 5722 * removes the mw reference from ia_mw_htbl and releases the
5753 5723 * initial reference to the mw. also destroys the mw if the refcnt
5754 5724 * is zero.
5755 5725 */
5756 5726 /* ARGSUSED */
5757 5727 static int
5758 5728 daplka_mw_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5759 5729 cred_t *cred, int *rvalp)
5760 5730 {
5761 5731 daplka_mw_resource_t *mw_rp = NULL;
5762 5732 dapl_mw_free_t args;
5763 5733 int retval = 0;
5764 5734
5765 5735 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_free_t), mode);
5766 5736 if (retval != 0) {
5767 5737 DERR("mw_free: copyin error %d\n", retval);
5768 5738 return (EFAULT);
5769 5739 }
5770 5740
5771 5741 retval = daplka_hash_remove(&ia_rp->ia_mw_htbl, args.mw_hkey,
5772 5742 (void **)&mw_rp);
5773 5743 if (retval != 0 || mw_rp == NULL) {
5774 5744 DERR("mw_free: cannot find mw resrc (0x%llx)\n",
5775 5745 (longlong_t)args.mw_hkey);
5776 5746 return (EINVAL);
5777 5747 }
5778 5748
5779 5749 ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);
5780 5750
5781 5751 /* UNREF calls the actual free function when refcnt is zero */
5782 5752 DAPLKA_RS_UNREF(mw_rp);
5783 5753 return (retval);
5784 5754 }
5785 5755
↓ open down ↓ |
74 lines elided |
↑ open up ↑ |
5786 5756 /*
5787 5757 * destroys the memory window.
5788 5758 * called when refcnt drops to zero.
5789 5759 */
5790 5760 static int
5791 5761 daplka_mw_destroy(daplka_resource_t *gen_rp)
5792 5762 {
5793 5763 daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)gen_rp;
5794 5764 ibt_status_t status;
5795 5765
5796 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp))
5797 5766 ASSERT(DAPLKA_RS_REFCNT(mw_rp) == 0);
5798 5767 D3("mw_destroy: entering, mw_rp 0x%p, rnum %d\n",
5799 5768 mw_rp, DAPLKA_RS_RNUM(mw_rp));
5800 5769
5801 5770 /*
5802 5771 * free memory window
5803 5772 */
5804 5773 if (mw_rp->mw_hdl) {
5805 5774 status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl,
5806 5775 mw_rp->mw_hdl);
5807 5776 if (status != IBT_SUCCESS) {
5808 5777 DERR("mw_destroy: ibt_free_mw returned %d\n", status);
5809 5778 }
5810 5779 mw_rp->mw_hdl = NULL;
5811 5780 D3("mw_destroy: mw freed\n");
5812 5781 }
5813 5782
5814 5783 /*
5815 5784 * release reference on PD
5816 5785 */
5817 5786 if (mw_rp->mw_pd_res != NULL) {
5818 5787 DAPLKA_RS_UNREF(mw_rp->mw_pd_res);
5819 5788 mw_rp->mw_pd_res = NULL;
5820 5789 }
5821 5790 mutex_destroy(&mw_rp->mw_lock);
5822 5791 DAPLKA_RS_FINI(mw_rp);
5823 5792 kmem_free(mw_rp, sizeof (daplka_mw_resource_t));
5824 5793 D3("mw_destroy: exiting, mw_rp 0x%p\n", mw_rp);
5825 5794 return (0);
5826 5795 }
5827 5796
5828 5797 static void
5829 5798 daplka_hash_mw_free(void *obj)
5830 5799 {
5831 5800 daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)obj;
5832 5801
5833 5802 ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);
5834 5803 DAPLKA_RS_UNREF(mw_rp);
5835 5804 }
5836 5805
5837 5806 /*
5838 5807 * SRQ ioctls and supporting functions
5839 5808 */
5840 5809 /* ARGSUSED */
5841 5810 static int
5842 5811 daplka_srq_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5843 5812 cred_t *cred, int *rvalp)
5844 5813 {
5845 5814 daplka_srq_resource_t *srq_rp;
5846 5815 daplka_pd_resource_t *pd_rp;
5847 5816 dapl_srq_create_t args;
5848 5817 ibt_srq_sizes_t srq_sizes;
5849 5818 ibt_srq_sizes_t srq_real_sizes;
5850 5819 ibt_hca_attr_t *hca_attrp;
5851 5820 uint64_t srq_hkey = 0;
5852 5821 boolean_t inserted = B_FALSE;
5853 5822 int retval;
5854 5823 ibt_status_t status;
5855 5824
5856 5825 D3("srq_create: enter\n");
5857 5826 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_create_t),
5858 5827 mode);
5859 5828 if (retval != 0) {
5860 5829 DERR("srq_create: copyin error %d\n", retval);
5861 5830 return (EFAULT);
5862 5831 }
5863 5832 srq_rp = kmem_zalloc(sizeof (daplka_srq_resource_t), daplka_km_flags);
5864 5833 if (srq_rp == NULL) {
5865 5834 DERR("srq_create: cannot allocate ep_rp\n");
5866 5835 return (ENOMEM);
5867 5836 }
5868 5837 DAPLKA_RS_INIT(srq_rp, DAPL_TYPE_SRQ,
5869 5838 DAPLKA_RS_RNUM(ia_rp), daplka_srq_destroy);
5870 5839
5871 5840 srq_rp->srq_hca = ia_rp->ia_hca;
5872 5841 srq_rp->srq_hca_hdl = ia_rp->ia_hca_hdl;
5873 5842 mutex_init(&srq_rp->srq_lock, NULL, MUTEX_DRIVER, NULL);
5874 5843
5875 5844 /* get pd handle */
5876 5845 pd_rp = (daplka_pd_resource_t *)
5877 5846 daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.srqc_pd_hkey);
5878 5847 if (pd_rp == NULL) {
5879 5848 DERR("srq_create: cannot find pd resource\n");
5880 5849 retval = EINVAL;
5881 5850 goto cleanup;
5882 5851 }
5883 5852 ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5884 5853 srq_rp->srq_pd_res = pd_rp;
5885 5854
5886 5855 /*
5887 5856 * these checks ensure that the requested SRQ sizes
5888 5857 * are within the limits supported by the chosen HCA.
5889 5858 */
5890 5859 hca_attrp = &ia_rp->ia_hca->hca_attr;
5891 5860 if (args.srqc_sizes.srqs_sz > hca_attrp->hca_max_srqs_sz) {
5892 5861 DERR("srq_create: invalid srqs_sz %d\n",
5893 5862 args.srqc_sizes.srqs_sz);
5894 5863 retval = EINVAL;
5895 5864 goto cleanup;
5896 5865 }
5897 5866 if (args.srqc_sizes.srqs_sgl > hca_attrp->hca_max_srq_sgl) {
5898 5867 DERR("srq_create: invalid srqs_sgl %d\n",
5899 5868 args.srqc_sizes.srqs_sgl);
5900 5869 retval = EINVAL;
5901 5870 goto cleanup;
5902 5871 }
5903 5872
5904 5873 D3("srq_create: srq_sgl %d, srq_sz %d\n",
5905 5874 args.srqc_sizes.srqs_sgl, args.srqc_sizes.srqs_sz);
5906 5875
5907 5876 srq_sizes.srq_wr_sz = args.srqc_sizes.srqs_sz;
5908 5877 srq_sizes.srq_sgl_sz = args.srqc_sizes.srqs_sgl;
5909 5878
5910 5879 /* create srq */
5911 5880 status = daplka_ibt_alloc_srq(srq_rp, ia_rp->ia_hca_hdl,
5912 5881 IBT_SRQ_USER_MAP, pd_rp->pd_hdl, &srq_sizes, &srq_rp->srq_hdl,
5913 5882 &srq_real_sizes);
5914 5883 if (status != IBT_SUCCESS) {
5915 5884 DERR("srq_create: alloc_srq returned %d\n", status);
5916 5885 *rvalp = (int)status;
5917 5886 retval = 0;
5918 5887 goto cleanup;
5919 5888 }
5920 5889
5921 5890 args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
5922 5891 args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;
5923 5892
5924 5893 /* Get HCA-specific data_out info */
5925 5894 status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
5926 5895 IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
5927 5896 &args.srqc_data_out, sizeof (args.srqc_data_out));
5928 5897
5929 5898 if (status != IBT_SUCCESS) {
5930 5899 DERR("srq_create: ibt_ci_data_out error(%d)\n", status);
5931 5900 *rvalp = (int)status;
5932 5901 retval = 0;
5933 5902 goto cleanup;
5934 5903 }
5935 5904
5936 5905 srq_rp->srq_real_size = srq_real_sizes.srq_wr_sz;
5937 5906
5938 5907 /* preparing to copyout map_data back to the library */
5939 5908 args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
5940 5909 args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;
5941 5910
5942 5911 /* insert into srq hash table */
5943 5912 retval = daplka_hash_insert(&ia_rp->ia_srq_htbl,
5944 5913 &srq_hkey, (void *)srq_rp);
5945 5914 if (retval != 0) {
5946 5915 DERR("srq_create: cannot insert srq resource into srq_htbl\n");
5947 5916 goto cleanup;
5948 5917 }
5949 5918 inserted = B_TRUE;
5950 5919
5951 5920 /* return hkey to library */
5952 5921 args.srqc_hkey = srq_hkey;
5953 5922
5954 5923 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_create_t),
5955 5924 mode);
5956 5925 if (retval != 0) {
5957 5926 DERR("srq_create: copyout error %d\n", retval);
5958 5927 retval = EFAULT;
5959 5928 goto cleanup;
5960 5929 }
5961 5930
5962 5931 D3("srq_create: %p, 0x%llx\n", srq_rp->srq_hdl, (longlong_t)srq_hkey);
5963 5932 D3(" sz(%d) sgl(%d)\n",
5964 5933 args.srqc_real_sizes.srqs_sz, args.srqc_real_sizes.srqs_sgl);
5965 5934 D3("srq_create: exit\n");
5966 5935 return (0);
5967 5936
5968 5937 cleanup:
5969 5938 if (inserted) {
5970 5939 daplka_srq_resource_t *free_rp = NULL;
5971 5940
5972 5941 (void) daplka_hash_remove(&ia_rp->ia_srq_htbl, srq_hkey,
5973 5942 (void **)&free_rp);
5974 5943 if (free_rp != srq_rp) {
5975 5944 /*
5976 5945 * this case is impossible because ep_free will
5977 5946 * wait until our state transition is complete.
5978 5947 */
5979 5948 DERR("srq_create: cannot remove srq from hash table\n");
5980 5949 ASSERT(B_FALSE);
5981 5950 return (retval);
5982 5951 }
5983 5952 }
5984 5953 DAPLKA_RS_UNREF(srq_rp);
5985 5954 return (retval);
5986 5955 }
5987 5956
5988 5957 /*
5989 5958 * Resize an existing SRQ
5990 5959 */
5991 5960 /* ARGSUSED */
5992 5961 static int
5993 5962 daplka_srq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5994 5963 cred_t *cred, int *rvalp)
5995 5964 {
5996 5965 daplka_srq_resource_t *srq_rp = NULL;
5997 5966 ibt_hca_attr_t *hca_attrp;
5998 5967 dapl_srq_resize_t args;
5999 5968 ibt_status_t status;
6000 5969 int retval = 0;
6001 5970
6002 5971 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_resize_t),
6003 5972 mode);
6004 5973 if (retval != 0) {
6005 5974 DERR("srq_resize: copyin error %d\n", retval);
6006 5975 return (EFAULT);
6007 5976 }
6008 5977
6009 5978 /* get srq resource */
6010 5979 srq_rp = (daplka_srq_resource_t *)
6011 5980 daplka_hash_lookup(&ia_rp->ia_srq_htbl, args.srqr_hkey);
6012 5981 if (srq_rp == NULL) {
6013 5982 DERR("srq_resize: cannot find srq resource\n");
6014 5983 return (EINVAL);
6015 5984 }
6016 5985 ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);
6017 5986
6018 5987 hca_attrp = &ia_rp->ia_hca->hca_attr;
6019 5988 if (args.srqr_new_size > hca_attrp->hca_max_srqs_sz) {
6020 5989 DERR("srq_resize: invalid srq size %d", args.srqr_new_size);
6021 5990 retval = EINVAL;
6022 5991 goto cleanup;
6023 5992 }
6024 5993
6025 5994 mutex_enter(&srq_rp->srq_lock);
6026 5995 /*
6027 5996 * If ibt_resize_srq fails that it is primarily due to resource
6028 5997 * shortage. Per IB spec resize will never loose events and
6029 5998 * a resize error leaves the SRQ intact. Therefore even if the
6030 5999 * resize request fails we proceed and get the mapping data
6031 6000 * from the SRQ so that the library can mmap it.
6032 6001 */
6033 6002 status = ibt_modify_srq(srq_rp->srq_hdl, IBT_SRQ_SET_SIZE,
6034 6003 args.srqr_new_size, 0, &args.srqr_real_size);
6035 6004 if (status != IBT_SUCCESS) {
6036 6005 /* we return the size of the old CQ if resize fails */
6037 6006 args.srqr_real_size = srq_rp->srq_real_size;
6038 6007 ASSERT(status != IBT_SRQ_HDL_INVALID);
6039 6008 DERR("srq_resize: ibt_modify_srq failed:%d\n", status);
6040 6009 } else {
6041 6010 srq_rp->srq_real_size = args.srqr_real_size;
6042 6011 }
6043 6012 mutex_exit(&srq_rp->srq_lock);
6044 6013
6045 6014
6046 6015 D2("srq_resize(%d): done new_sz(%u) real_sz(%u)\n",
6047 6016 DAPLKA_RS_RNUM(srq_rp), args.srqr_new_size, args.srqr_real_size);
6048 6017
6049 6018 /* Get HCA-specific data_out info */
6050 6019 status = ibt_ci_data_out(srq_rp->srq_hca_hdl,
6051 6020 IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
6052 6021 &args.srqr_data_out, sizeof (args.srqr_data_out));
6053 6022 if (status != IBT_SUCCESS) {
6054 6023 DERR("srq_resize: ibt_ci_data_out error(%d)\n", status);
6055 6024 /* return ibt_ci_data_out status */
6056 6025 *rvalp = (int)status;
6057 6026 retval = 0;
6058 6027 goto cleanup;
6059 6028 }
6060 6029
6061 6030 retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_resize_t),
6062 6031 mode);
6063 6032 if (retval != 0) {
6064 6033 DERR("srq_resize: copyout error %d\n", retval);
6065 6034 retval = EFAULT;
6066 6035 goto cleanup;
6067 6036 }
6068 6037
6069 6038 cleanup:;
6070 6039 if (srq_rp != NULL) {
6071 6040 DAPLKA_RS_UNREF(srq_rp);
6072 6041 }
6073 6042 return (retval);
6074 6043 }
6075 6044
6076 6045 /*
6077 6046 * Frees an SRQ resource.
6078 6047 */
6079 6048 /* ARGSUSED */
6080 6049 static int
6081 6050 daplka_srq_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6082 6051 cred_t *cred, int *rvalp)
6083 6052 {
6084 6053 daplka_srq_resource_t *srq_rp = NULL;
6085 6054 dapl_srq_free_t args;
6086 6055 int retval;
6087 6056
6088 6057 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_free_t), mode);
6089 6058 if (retval != 0) {
6090 6059 DERR("srq_free: copyin error %d\n", retval);
6091 6060 return (EFAULT);
6092 6061 }
6093 6062
6094 6063 retval = daplka_hash_remove(&ia_rp->ia_srq_htbl,
6095 6064 args.srqf_hkey, (void **)&srq_rp);
6096 6065 if (retval != 0 || srq_rp == NULL) {
6097 6066 /*
6098 6067 * this is only possible if we have two threads
6099 6068 * calling ep_free in parallel.
6100 6069 */
6101 6070 DERR("srq_free: cannot find resource retval(%d) 0x%llx\n",
6102 6071 retval, args.srqf_hkey);
6103 6072 return (EINVAL);
6104 6073 }
6105 6074
6106 6075 /* UNREF calls the actual free function when refcnt is zero */
6107 6076 DAPLKA_RS_UNREF(srq_rp);
6108 6077 return (0);
6109 6078 }
6110 6079
6111 6080 /*
6112 6081 * destroys a SRQ resource.
6113 6082 * called when refcnt drops to zero.
6114 6083 */
6115 6084 static int
6116 6085 daplka_srq_destroy(daplka_resource_t *gen_rp)
6117 6086 {
6118 6087 daplka_srq_resource_t *srq_rp = (daplka_srq_resource_t *)gen_rp;
6119 6088 ibt_status_t status;
6120 6089
6121 6090 ASSERT(DAPLKA_RS_REFCNT(srq_rp) == 0);
6122 6091
6123 6092 D3("srq_destroy: entering, srq_rp 0x%p, rnum %d\n",
6124 6093 srq_rp, DAPLKA_RS_RNUM(srq_rp));
6125 6094 /*
6126 6095 * destroy the srq
6127 6096 */
6128 6097 if (srq_rp->srq_hdl != NULL) {
6129 6098 status = daplka_ibt_free_srq(srq_rp, srq_rp->srq_hdl);
6130 6099 if (status != IBT_SUCCESS) {
6131 6100 DERR("srq_destroy: ibt_free_srq returned %d\n",
6132 6101 status);
6133 6102 }
6134 6103 srq_rp->srq_hdl = NULL;
6135 6104 D3("srq_destroy: srq freed, rnum %d\n", DAPLKA_RS_RNUM(srq_rp));
6136 6105 }
6137 6106 /*
6138 6107 * release all references
6139 6108 */
6140 6109 if (srq_rp->srq_pd_res != NULL) {
6141 6110 DAPLKA_RS_UNREF(srq_rp->srq_pd_res);
6142 6111 srq_rp->srq_pd_res = NULL;
6143 6112 }
6144 6113
6145 6114 mutex_destroy(&srq_rp->srq_lock);
6146 6115 DAPLKA_RS_FINI(srq_rp);
6147 6116 kmem_free(srq_rp, sizeof (daplka_srq_resource_t));
6148 6117 D3("srq_destroy: exiting, srq_rp 0x%p\n", srq_rp);
6149 6118 return (0);
6150 6119 }
6151 6120
6152 6121 static void
6153 6122 daplka_hash_srq_free(void *obj)
6154 6123 {
6155 6124 daplka_srq_resource_t *srq_rp = (daplka_srq_resource_t *)obj;
6156 6125
6157 6126 ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);
6158 6127 DAPLKA_RS_UNREF(srq_rp);
6159 6128 }
6160 6129
6161 6130 /*
6162 6131 * This function tells the CM to start listening on a service id.
6163 6132 * It must be called by the passive side client before the client
6164 6133 * can receive connection requests from remote endpoints. If the
6165 6134 * client specifies a non-zero service id (connection qualifier in
6166 6135 * dapl terms), this function will attempt to bind to this service
6167 6136 * id and return an error if the id is already in use. If the client
6168 6137 * specifies zero as the service id, this function will try to find
6169 6138 * the next available service id and return it back to the client.
6170 6139 * To support the cr_handoff function, this function will, in addition
6171 6140 * to creating and inserting an SP resource into the per-IA SP hash
6172 6141 * table, insert the SP resource into a global SP table. This table
6173 6142 * maintains all active service points created by all dapl clients.
6174 6143 * CR handoff locates the target SP by iterating through this global
6175 6144 * table.
6176 6145 */
6177 6146 /* ARGSUSED */
6178 6147 static int
6179 6148 daplka_service_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6180 6149 cred_t *cred, int *rvalp)
6181 6150 {
6182 6151 daplka_evd_resource_t *evd_rp = NULL;
6183 6152 daplka_sp_resource_t *sp_rp = NULL;
6184 6153 dapl_service_register_t args;
6185 6154 ibt_srv_desc_t sd_args;
6186 6155 ibt_srv_bind_t sb_args;
6187 6156 ibt_status_t status;
6188 6157 ib_svc_id_t retsid = 0;
6189 6158 uint64_t sp_hkey = 0;
6190 6159 boolean_t bumped = B_FALSE;
6191 6160 int backlog_size;
6192 6161 int retval = 0;
6193 6162
6194 6163 retval = ddi_copyin((void *)arg, &args,
6195 6164 sizeof (dapl_service_register_t), mode);
↓ open down ↓ |
389 lines elided |
↑ open up ↑ |
6196 6165 if (retval != 0) {
6197 6166 DERR("service_register: copyin error %d\n", retval);
6198 6167 return (EINVAL);
6199 6168 }
6200 6169
6201 6170 sp_rp = kmem_zalloc(sizeof (*sp_rp), daplka_km_flags);
6202 6171 if (sp_rp == NULL) {
6203 6172 DERR("service_register: cannot allocate sp resource\n");
6204 6173 return (ENOMEM);
6205 6174 }
6206 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp))
6207 6175 DAPLKA_RS_INIT(sp_rp, DAPL_TYPE_SP,
6208 6176 DAPLKA_RS_RNUM(ia_rp), daplka_sp_destroy);
6209 6177
6210 6178 /* check if evd exists */
6211 6179 evd_rp = (daplka_evd_resource_t *)
6212 6180 daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.sr_evd_hkey);
6213 6181 if (evd_rp == NULL) {
6214 6182 DERR("service_register: evd resource not found\n");
6215 6183 retval = EINVAL;
6216 6184 goto cleanup;
6217 6185 }
6218 6186 /*
6219 6187 * initialize backlog size
6220 6188 */
6221 6189 if (evd_rp && evd_rp->evd_cq_real_size > 0) {
6222 6190 backlog_size = evd_rp->evd_cq_real_size + 1;
6223 6191 } else {
6224 6192 backlog_size = DAPLKA_DEFAULT_SP_BACKLOG;
6225 6193 }
6226 6194 D2("service_register: args.sr_sid = %llu\n", (longlong_t)args.sr_sid);
6227 6195
6228 6196 /* save the userland sp ptr */
6229 6197 sp_rp->sp_cookie = args.sr_sp_cookie;
6230 6198 sp_rp->sp_backlog_size = backlog_size;
6231 6199 D3("service_register: backlog set to %d\n", sp_rp->sp_backlog_size);
6232 6200 sp_rp->sp_backlog = kmem_zalloc(sp_rp->sp_backlog_size *
6233 6201 sizeof (daplka_sp_conn_pend_t), daplka_km_flags);
6234 6202
6235 6203 /* save evd resource pointer */
6236 6204 sp_rp->sp_evd_res = evd_rp;
6237 6205
6238 6206 /*
6239 6207 * save ruid here so that we can do a comparison later
6240 6208 * when someone does cr_handoff. the check will prevent
6241 6209 * a malicious app from passing a CR to us.
6242 6210 */
6243 6211 sp_rp->sp_ruid = crgetruid(cred);
6244 6212
6245 6213 /* fill in args for register_service */
6246 6214 sd_args.sd_ud_handler = NULL;
6247 6215 sd_args.sd_handler = daplka_cm_service_handler;
6248 6216 sd_args.sd_flags = IBT_SRV_NO_FLAGS;
6249 6217
6250 6218 status = ibt_register_service(daplka_dev->daplka_clnt_hdl,
6251 6219 &sd_args, args.sr_sid, 1, &sp_rp->sp_srv_hdl, &retsid);
6252 6220
6253 6221 if (status != IBT_SUCCESS) {
6254 6222 DERR("service_register: ibt_register_service returned %d\n",
6255 6223 status);
6256 6224 *rvalp = (int)status;
6257 6225 retval = 0;
6258 6226 goto cleanup;
6259 6227 }
6260 6228 /* save returned sid */
6261 6229 sp_rp->sp_conn_qual = retsid;
6262 6230 args.sr_retsid = retsid;
6263 6231
6264 6232 /* fill in args for bind_service */
6265 6233 sb_args.sb_pkey = ia_rp->ia_port_pkey;
6266 6234 sb_args.sb_lease = 0xffffffff;
6267 6235 sb_args.sb_key[0] = 0x1234;
6268 6236 sb_args.sb_key[1] = 0x5678;
6269 6237 sb_args.sb_name = DAPLKA_DRV_NAME;
6270 6238
6271 6239 D2("service_register: bind(0x%llx:0x%llx)\n",
6272 6240 (longlong_t)ia_rp->ia_hca_sgid.gid_prefix,
6273 6241 (longlong_t)ia_rp->ia_hca_sgid.gid_guid);
6274 6242
6275 6243 status = ibt_bind_service(sp_rp->sp_srv_hdl, ia_rp->ia_hca_sgid,
6276 6244 &sb_args, (void *)sp_rp, &sp_rp->sp_bind_hdl);
6277 6245 if (status != IBT_SUCCESS) {
6278 6246 DERR("service_register: ibt_bind_service returned %d\n",
6279 6247 status);
6280 6248 *rvalp = (int)status;
6281 6249 retval = 0;
6282 6250 goto cleanup;
6283 6251 }
6284 6252
6285 6253 /*
6286 6254 * need to bump refcnt because the global hash table will
6287 6255 * have a reference to sp_rp
6288 6256 */
6289 6257 DAPLKA_RS_REF(sp_rp);
↓ open down ↓ |
73 lines elided |
↑ open up ↑ |
6290 6258 bumped = B_TRUE;
6291 6259
6292 6260 /* insert into global sp hash table */
6293 6261 sp_rp->sp_global_hkey = 0;
6294 6262 retval = daplka_hash_insert(&daplka_global_sp_htbl,
6295 6263 &sp_rp->sp_global_hkey, (void *)sp_rp);
6296 6264 if (retval != 0) {
6297 6265 DERR("service_register: cannot insert sp resource\n");
6298 6266 goto cleanup;
6299 6267 }
6300 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sp_rp))
6301 6268
6302 6269 /* insert into per-IA sp hash table */
6303 6270 retval = daplka_hash_insert(&ia_rp->ia_sp_htbl,
6304 6271 &sp_hkey, (void *)sp_rp);
6305 6272 if (retval != 0) {
6306 6273 DERR("service_register: cannot insert sp resource\n");
6307 6274 goto cleanup;
6308 6275 }
6309 6276
6310 6277 /* pass index to application */
6311 6278 args.sr_sp_hkey = sp_hkey;
6312 6279 retval = ddi_copyout(&args, (void *)arg,
6313 6280 sizeof (dapl_service_register_t), mode);
6314 6281 if (retval != 0) {
6315 6282 DERR("service_register: copyout error %d\n", retval);
6316 6283 retval = EFAULT;
6317 6284 goto cleanup;
6318 6285 }
6319 6286 return (0);
6320 6287
6321 6288 cleanup:;
6322 6289 ASSERT(sp_rp != NULL);
6323 6290 /* remove from ia table */
6324 6291 if (sp_hkey != 0) {
6325 6292 daplka_sp_resource_t *free_rp = NULL;
6326 6293
6327 6294 (void) daplka_hash_remove(&ia_rp->ia_sp_htbl,
6328 6295 sp_hkey, (void **)&free_rp);
6329 6296 if (free_rp != sp_rp) {
6330 6297 DERR("service_register: cannot remove sp\n");
6331 6298 /*
6332 6299 * we can only get here if another thread
6333 6300 * has completed the cleanup in svc_deregister
6334 6301 */
6335 6302 return (retval);
6336 6303 }
6337 6304 }
6338 6305
6339 6306 /* remove from global table */
6340 6307 if (sp_rp->sp_global_hkey != 0) {
6341 6308 daplka_sp_resource_t *free_rp = NULL;
6342 6309
6343 6310 /*
6344 6311 * we get here if either the hash_insert into
6345 6312 * ia_sp_htbl failed or the ddi_copyout failed.
6346 6313 * hash_insert failure implies that we are the
6347 6314 * only thread with a reference to sp. ddi_copyout
6348 6315 * failure implies that svc_deregister could have
6349 6316 * picked up the sp and destroyed it. but since
6350 6317 * we got to this point, we must have removed
6351 6318 * the sp ourselves in hash_remove above and
6352 6319 * that the sp can be destroyed by us.
6353 6320 */
6354 6321 (void) daplka_hash_remove(&daplka_global_sp_htbl,
6355 6322 sp_rp->sp_global_hkey, (void **)&free_rp);
6356 6323 if (free_rp != sp_rp) {
6357 6324 DERR("service_register: cannot remove sp\n");
6358 6325 /*
6359 6326 * this case is impossible. see explanation above.
6360 6327 */
6361 6328 ASSERT(B_FALSE);
6362 6329 return (retval);
6363 6330 }
6364 6331 sp_rp->sp_global_hkey = 0;
6365 6332 }
6366 6333 /* unreference sp */
6367 6334 if (bumped) {
6368 6335 DAPLKA_RS_UNREF(sp_rp);
6369 6336 }
6370 6337
6371 6338 /* destroy sp resource */
6372 6339 DAPLKA_RS_UNREF(sp_rp);
6373 6340 return (retval);
6374 6341 }
6375 6342
6376 6343 /*
6377 6344 * deregisters the service and removes SP from the global table.
6378 6345 */
6379 6346 /* ARGSUSED */
6380 6347 static int
6381 6348 daplka_service_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6382 6349 cred_t *cred, int *rvalp)
6383 6350 {
6384 6351 dapl_service_deregister_t args;
6385 6352 daplka_sp_resource_t *sp_rp = NULL, *g_sp_rp = NULL;
6386 6353 int retval;
6387 6354
6388 6355 retval = ddi_copyin((void *)arg, &args,
6389 6356 sizeof (dapl_service_deregister_t), mode);
6390 6357
6391 6358 if (retval != 0) {
6392 6359 DERR("service_deregister: copyin error %d\n", retval);
6393 6360 return (EINVAL);
6394 6361 }
6395 6362
6396 6363 retval = daplka_hash_remove(&ia_rp->ia_sp_htbl,
6397 6364 args.sdr_sp_hkey, (void **)&sp_rp);
6398 6365 if (retval != 0 || sp_rp == NULL) {
6399 6366 DERR("service_deregister: cannot find sp resource\n");
6400 6367 return (EINVAL);
6401 6368 }
6402 6369
6403 6370 retval = daplka_hash_remove(&daplka_global_sp_htbl,
6404 6371 sp_rp->sp_global_hkey, (void **)&g_sp_rp);
6405 6372 if (retval != 0 || g_sp_rp == NULL) {
6406 6373 DERR("service_deregister: cannot find sp resource\n");
6407 6374 }
6408 6375
6409 6376 /* remove the global reference */
6410 6377 if (g_sp_rp == sp_rp) {
6411 6378 DAPLKA_RS_UNREF(g_sp_rp);
6412 6379 }
6413 6380
6414 6381 DAPLKA_RS_UNREF(sp_rp);
6415 6382 return (0);
6416 6383 }
6417 6384
↓ open down ↓ |
107 lines elided |
↑ open up ↑ |
6418 6385 /*
6419 6386 * destroys a service point.
6420 6387 * called when the refcnt drops to zero.
6421 6388 */
6422 6389 static int
6423 6390 daplka_sp_destroy(daplka_resource_t *gen_rp)
6424 6391 {
6425 6392 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)gen_rp;
6426 6393 ibt_status_t status;
6427 6394
6428 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp))
6429 6395 ASSERT(DAPLKA_RS_REFCNT(sp_rp) == 0);
6430 6396 D3("sp_destroy: entering, sp_rp %p, rnum %d\n",
6431 6397 sp_rp, DAPLKA_RS_RNUM(sp_rp));
6432 6398
6433 6399 /*
6434 6400 * it is possible for pending connections to remain
6435 6401 * on an SP. We need to clean them up here.
6436 6402 */
6437 6403 if (sp_rp->sp_backlog != NULL) {
6438 6404 ibt_cm_proceed_reply_t proc_reply;
6439 6405 int i, cnt = 0;
6440 6406 void *spcp_sidp;
6441 6407
6442 6408 for (i = 0; i < sp_rp->sp_backlog_size; i++) {
6443 6409 if (sp_rp->sp_backlog[i].spcp_state ==
6444 6410 DAPLKA_SPCP_PENDING) {
6445 6411 cnt++;
6446 6412 if (sp_rp->sp_backlog[i].spcp_sid == NULL) {
6447 6413 DERR("sp_destroy: "
6448 6414 "spcp_sid == NULL!\n");
6449 6415 continue;
6450 6416 }
6451 6417 mutex_enter(&sp_rp->sp_lock);
6452 6418 spcp_sidp = sp_rp->sp_backlog[i].spcp_sid;
6453 6419 sp_rp->sp_backlog[i].spcp_state =
6454 6420 DAPLKA_SPCP_INIT;
6455 6421 sp_rp->sp_backlog[i].spcp_sid = NULL;
6456 6422 sp_rp->sp_backlog[i].spcp_req_len = 0;
6457 6423 mutex_exit(&sp_rp->sp_lock);
6458 6424 status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV,
6459 6425 spcp_sidp,
6460 6426 IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
6461 6427 if (status != IBT_SUCCESS) {
6462 6428 DERR("sp_destroy: proceed failed %d\n",
6463 6429 status);
6464 6430 }
6465 6431 }
6466 6432 }
6467 6433 if (cnt > 0) {
6468 6434 DERR("sp_destroy: found %d pending "
6469 6435 "connections\n", cnt);
6470 6436 }
6471 6437 }
6472 6438
6473 6439 if (sp_rp->sp_srv_hdl != NULL && sp_rp->sp_bind_hdl != NULL) {
6474 6440 status = ibt_unbind_service(sp_rp->sp_srv_hdl,
6475 6441 sp_rp->sp_bind_hdl);
6476 6442 if (status != IBT_SUCCESS) {
6477 6443 DERR("sp_destroy: ibt_unbind_service "
6478 6444 "failed: %d\n", status);
6479 6445 }
6480 6446 }
6481 6447
6482 6448 if (sp_rp->sp_srv_hdl != NULL) {
6483 6449 status = ibt_deregister_service(daplka_dev->daplka_clnt_hdl,
6484 6450 sp_rp->sp_srv_hdl);
6485 6451 if (status != IBT_SUCCESS) {
6486 6452 DERR("sp_destroy: ibt_deregister_service "
6487 6453 "failed: %d\n", status);
6488 6454 }
6489 6455 }
6490 6456 if (sp_rp->sp_backlog != NULL) {
6491 6457 kmem_free(sp_rp->sp_backlog,
6492 6458 sp_rp->sp_backlog_size * sizeof (daplka_sp_conn_pend_t));
6493 6459 sp_rp->sp_backlog = NULL;
6494 6460 sp_rp->sp_backlog_size = 0;
6495 6461 }
6496 6462
6497 6463 /*
6498 6464 * release reference to evd
6499 6465 */
6500 6466 if (sp_rp->sp_evd_res != NULL) {
6501 6467 DAPLKA_RS_UNREF(sp_rp->sp_evd_res);
6502 6468 }
6503 6469 sp_rp->sp_bind_hdl = NULL;
6504 6470 sp_rp->sp_srv_hdl = NULL;
6505 6471 DAPLKA_RS_FINI(sp_rp);
6506 6472 kmem_free(sp_rp, sizeof (*sp_rp));
6507 6473 D3("sp_destroy: exiting, sp_rp %p\n", sp_rp);
6508 6474 return (0);
6509 6475 }
6510 6476
6511 6477 /*
6512 6478 * this function is called by daplka_hash_destroy for
6513 6479 * freeing SP resource objects
6514 6480 */
6515 6481 static void
6516 6482 daplka_hash_sp_free(void *obj)
6517 6483 {
6518 6484 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;
6519 6485 daplka_sp_resource_t *g_sp_rp;
6520 6486 int retval;
6521 6487
6522 6488 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
6523 6489
6524 6490 retval = daplka_hash_remove(&daplka_global_sp_htbl,
6525 6491 sp_rp->sp_global_hkey, (void **)&g_sp_rp);
6526 6492 if (retval != 0 || g_sp_rp == NULL) {
6527 6493 DERR("sp_free: cannot find sp resource\n");
6528 6494 }
6529 6495 if (g_sp_rp == sp_rp) {
6530 6496 DAPLKA_RS_UNREF(g_sp_rp);
6531 6497 }
6532 6498
6533 6499 DAPLKA_RS_UNREF(sp_rp);
6534 6500 }
6535 6501
6536 6502 static void
6537 6503 daplka_hash_sp_unref(void *obj)
6538 6504 {
6539 6505 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;
6540 6506
6541 6507 ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
6542 6508 DAPLKA_RS_UNREF(sp_rp);
6543 6509 }
6544 6510
6545 6511 /*
6546 6512 * Passive side CM handlers
6547 6513 */
6548 6514
6549 6515 /*
6550 6516 * processes the REQ_RCV event
6551 6517 */
6552 6518 /* ARGSUSED */
6553 6519 static ibt_cm_status_t
6554 6520 daplka_cm_service_req(daplka_sp_resource_t *spp, ibt_cm_event_t *event,
6555 6521 ibt_cm_return_args_t *ret_args, void *pr_data, ibt_priv_data_len_t pr_len)
6556 6522 {
6557 6523 daplka_sp_conn_pend_t *conn = NULL;
6558 6524 daplka_evd_event_t *cr_ev = NULL;
6559 6525 ibt_cm_status_t cm_status = IBT_CM_DEFAULT;
6560 6526 uint16_t bkl_index;
6561 6527 ibt_status_t status;
6562 6528
6563 6529 /*
6564 6530 * acquire a slot in the connection backlog of this service point
6565 6531 */
6566 6532 mutex_enter(&spp->sp_lock);
6567 6533 for (bkl_index = 0; bkl_index < spp->sp_backlog_size; bkl_index++) {
6568 6534 if (spp->sp_backlog[bkl_index].spcp_state == DAPLKA_SPCP_INIT) {
6569 6535 conn = &spp->sp_backlog[bkl_index];
6570 6536 ASSERT(conn->spcp_sid == NULL);
6571 6537 conn->spcp_state = DAPLKA_SPCP_PENDING;
6572 6538 conn->spcp_sid = event->cm_session_id;
6573 6539 break;
6574 6540 }
6575 6541 }
↓ open down ↓ |
137 lines elided |
↑ open up ↑ |
6576 6542 mutex_exit(&spp->sp_lock);
6577 6543
6578 6544 /*
6579 6545 * too many pending connections
6580 6546 */
6581 6547 if (bkl_index == spp->sp_backlog_size) {
6582 6548 DERR("service_req: connection pending exceeded %d limit\n",
6583 6549 spp->sp_backlog_size);
6584 6550 return (IBT_CM_NO_RESOURCE);
6585 6551 }
6586 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*conn))
6587 6552
6588 6553 /*
6589 6554 * save data for cr_handoff
6590 6555 */
6591 6556 if (pr_data != NULL && pr_len > 0) {
6592 6557 int trunc_len = pr_len;
6593 6558
6594 6559 if (trunc_len > DAPL_MAX_PRIVATE_DATA_SIZE) {
6595 6560 DERR("service_req: private data truncated\n");
6596 6561 trunc_len = DAPL_MAX_PRIVATE_DATA_SIZE;
6597 6562 }
6598 6563 conn->spcp_req_len = trunc_len;
6599 6564 bcopy(pr_data, conn->spcp_req_data, trunc_len);
6600 6565 } else {
6601 6566 conn->spcp_req_len = 0;
6602 6567 }
6603 6568 conn->spcp_rdma_ra_in = event->cm_event.req.req_rdma_ra_in;
6604 6569 conn->spcp_rdma_ra_out = event->cm_event.req.req_rdma_ra_out;
6605 6570
6606 6571 /*
6607 6572 * create a CR event
6608 6573 */
6609 6574 cr_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6610 6575 if (cr_ev == NULL) {
6611 6576 DERR("service_req: could not alloc cr_ev\n");
6612 6577 cm_status = IBT_CM_NO_RESOURCE;
6613 6578 goto cleanup;
6614 6579 }
6615 6580
6616 6581 cr_ev->ee_next = NULL;
6617 6582 cr_ev->ee_cmev.ec_cm_cookie = spp->sp_cookie;
6618 6583 cr_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6619 6584 cr_ev->ee_cmev.ec_cm_psep_cookie = DAPLKA_CREATE_PSEP_COOKIE(bkl_index);
6620 6585 /*
6621 6586 * save the requestor gid
6622 6587 * daplka_event_poll needs this if this is a third party REQ_RCV
6623 6588 */
6624 6589 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix =
6625 6590 event->cm_event.req.req_prim_addr.av_dgid.gid_prefix;
6626 6591 cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid =
6627 6592 event->cm_event.req.req_prim_addr.av_dgid.gid_guid;
6628 6593
6629 6594 /*
6630 6595 * set event type
6631 6596 */
6632 6597 if (pr_len == 0) {
6633 6598 cr_ev->ee_cmev.ec_cm_ev_type =
6634 6599 DAPL_IB_CME_CONNECTION_REQUEST_PENDING;
6635 6600 } else {
6636 6601 cr_ev->ee_cmev.ec_cm_ev_priv_data =
6637 6602 kmem_zalloc(pr_len, KM_NOSLEEP);
6638 6603 if (cr_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
6639 6604 DERR("service_req: could not alloc priv\n");
6640 6605 cm_status = IBT_CM_NO_RESOURCE;
6641 6606 goto cleanup;
6642 6607 }
6643 6608 bcopy(pr_data, cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6644 6609 cr_ev->ee_cmev.ec_cm_ev_type =
6645 6610 DAPL_IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA;
6646 6611 }
6647 6612 cr_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
6648 6613
6649 6614 /*
6650 6615 * tell the active side to expect the processing time to be
6651 6616 * at most equal to daplka_cm_delay
6652 6617 */
6653 6618 status = ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id,
6654 6619 daplka_cm_delay, NULL, 0);
6655 6620 if (status != IBT_SUCCESS) {
6656 6621 DERR("service_req: ibt_cm_delay failed %d\n", status);
6657 6622 cm_status = IBT_CM_NO_RESOURCE;
6658 6623 goto cleanup;
6659 6624 }
6660 6625
6661 6626 /*
6662 6627 * enqueue cr_ev onto the cr_events list of the EVD
6663 6628 * corresponding to the SP
↓ open down ↓ |
67 lines elided |
↑ open up ↑ |
6664 6629 */
6665 6630 D2("service_req: enqueue event(%p) evdp(%p) priv_data(%p) "
6666 6631 "priv_len(%d) psep(0x%llx)\n", cr_ev, spp->sp_evd_res,
6667 6632 cr_ev->ee_cmev.ec_cm_ev_priv_data,
6668 6633 (int)cr_ev->ee_cmev.ec_cm_ev_priv_data_len,
6669 6634 (longlong_t)cr_ev->ee_cmev.ec_cm_psep_cookie);
6670 6635
6671 6636 daplka_evd_wakeup(spp->sp_evd_res,
6672 6637 &spp->sp_evd_res->evd_cr_events, cr_ev);
6673 6638
6674 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*conn))
6675 6639 return (IBT_CM_DEFER);
6676 6640
6677 6641 cleanup:;
6678 6642 /*
6679 6643 * free the cr event
6680 6644 */
6681 6645 if (cr_ev != NULL) {
6682 6646 if (cr_ev->ee_cmev.ec_cm_ev_priv_data != NULL) {
6683 6647 kmem_free(cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6684 6648 cr_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6685 6649 cr_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6686 6650 }
6687 6651 kmem_free(cr_ev, sizeof (daplka_evd_event_t));
6688 6652 }
6689 6653 /*
6690 6654 * release our slot in the backlog array
6691 6655 */
6692 6656 if (conn != NULL) {
6693 6657 mutex_enter(&spp->sp_lock);
6694 6658 ASSERT(conn->spcp_state == DAPLKA_SPCP_PENDING);
6695 6659 ASSERT(conn->spcp_sid == event->cm_session_id);
6696 6660 conn->spcp_state = DAPLKA_SPCP_INIT;
6697 6661 conn->spcp_req_len = 0;
6698 6662 conn->spcp_sid = NULL;
6699 6663 mutex_exit(&spp->sp_lock);
6700 6664 }
6701 6665 return (cm_status);
6702 6666 }
6703 6667
6704 6668 /*
6705 6669 * processes the CONN_CLOSED event
6706 6670 */
6707 6671 /* ARGSUSED */
6708 6672 static ibt_cm_status_t
6709 6673 daplka_cm_service_conn_closed(daplka_sp_resource_t *sp_rp,
6710 6674 ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args,
6711 6675 void *priv_data, ibt_priv_data_len_t len)
6712 6676 {
6713 6677 daplka_ep_resource_t *ep_rp;
6714 6678 daplka_evd_event_t *disc_ev;
6715 6679 uint32_t old_state, new_state;
6716 6680
6717 6681 ep_rp = (daplka_ep_resource_t *)
6718 6682 ibt_get_chan_private(event->cm_channel);
6719 6683 if (ep_rp == NULL) {
6720 6684 DERR("service_conn_closed: ep_rp == NULL\n");
6721 6685 return (IBT_CM_ACCEPT);
6722 6686 }
6723 6687
6724 6688 /*
6725 6689 * verify that the ep_state is either CONNECTED or
6726 6690 * DISCONNECTING. if it is not in either states return
6727 6691 * without generating an event.
6728 6692 */
6729 6693 new_state = old_state = daplka_ep_get_state(ep_rp);
6730 6694 if (old_state != DAPLKA_EP_STATE_CONNECTED &&
6731 6695 old_state != DAPLKA_EP_STATE_DISCONNECTING) {
6732 6696 /*
6733 6697 * we can get here if the connection is being aborted
6734 6698 */
6735 6699 D2("service_conn_closed: conn aborted, state = %d, "
6736 6700 "closed = %d\n", old_state, (int)event->cm_event.closed);
6737 6701 daplka_ep_set_state(ep_rp, old_state, new_state);
6738 6702 return (IBT_CM_ACCEPT);
6739 6703 }
6740 6704
6741 6705 /*
6742 6706 * create a DAPL_IB_CME_DISCONNECTED event
6743 6707 */
6744 6708 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6745 6709 if (disc_ev == NULL) {
6746 6710 DERR("service_conn_closed: cannot alloc disc_ev\n");
6747 6711 daplka_ep_set_state(ep_rp, old_state, new_state);
6748 6712 return (IBT_CM_ACCEPT);
6749 6713 }
6750 6714
6751 6715 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
6752 6716 disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6753 6717 disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6754 6718 disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6755 6719 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6756 6720 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6757 6721
6758 6722 D2("service_conn_closed: enqueue event(%p) evdp(%p) psep(0x%llx)\n",
6759 6723 disc_ev, sp_rp->sp_evd_res, (longlong_t)ep_rp->ep_psep_cookie);
6760 6724
6761 6725 /*
6762 6726 * transition ep_state to DISCONNECTED
6763 6727 */
6764 6728 new_state = DAPLKA_EP_STATE_DISCONNECTED;
6765 6729 daplka_ep_set_state(ep_rp, old_state, new_state);
6766 6730
6767 6731 /*
6768 6732 * enqueue event onto the conn_evd owned by ep_rp
6769 6733 */
6770 6734 daplka_evd_wakeup(ep_rp->ep_conn_evd,
6771 6735 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
6772 6736
6773 6737 return (IBT_CM_ACCEPT);
6774 6738 }
6775 6739
6776 6740 /*
6777 6741 * processes the CONN_EST event
6778 6742 */
6779 6743 /* ARGSUSED */
6780 6744 static ibt_cm_status_t
6781 6745 daplka_cm_service_conn_est(daplka_sp_resource_t *sp_rp, ibt_cm_event_t *event,
6782 6746 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6783 6747 {
6784 6748 daplka_ep_resource_t *ep_rp;
6785 6749 daplka_evd_event_t *conn_ev;
6786 6750 void *pr_data = event->cm_priv_data;
6787 6751 ibt_priv_data_len_t pr_len = event->cm_priv_data_len;
6788 6752 uint32_t old_state, new_state;
6789 6753
6790 6754 ep_rp = (daplka_ep_resource_t *)
6791 6755 ibt_get_chan_private(event->cm_channel);
6792 6756 if (ep_rp == NULL) {
6793 6757 DERR("service_conn_est: ep_rp == NULL\n");
6794 6758 return (IBT_CM_ACCEPT);
6795 6759 }
6796 6760
6797 6761 /*
6798 6762 * verify that ep_state is ACCEPTING. if it is not in this
6799 6763 * state, return without generating an event.
6800 6764 */
6801 6765 new_state = old_state = daplka_ep_get_state(ep_rp);
6802 6766 if (old_state != DAPLKA_EP_STATE_ACCEPTING) {
6803 6767 /*
6804 6768 * we can get here if the connection is being aborted
6805 6769 */
6806 6770 DERR("service_conn_est: conn aborted, state = %d\n",
6807 6771 old_state);
6808 6772 daplka_ep_set_state(ep_rp, old_state, new_state);
6809 6773 return (IBT_CM_ACCEPT);
6810 6774 }
6811 6775
6812 6776 /*
6813 6777 * create a DAPL_IB_CME_CONNECTED event
6814 6778 */
6815 6779 conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6816 6780 if (conn_ev == NULL) {
6817 6781 DERR("service_conn_est: conn_ev alloc failed\n");
6818 6782 daplka_ep_set_state(ep_rp, old_state, new_state);
6819 6783 return (IBT_CM_ACCEPT);
6820 6784 }
6821 6785
6822 6786 conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
6823 6787 conn_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6824 6788 conn_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6825 6789 conn_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6826 6790
6827 6791 /*
6828 6792 * copy private data into event
6829 6793 */
6830 6794 if (pr_len > 0) {
6831 6795 conn_ev->ee_cmev.ec_cm_ev_priv_data =
6832 6796 kmem_zalloc(pr_len, KM_NOSLEEP);
6833 6797 if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
6834 6798 DERR("service_conn_est: pr_data alloc failed\n");
6835 6799 daplka_ep_set_state(ep_rp, old_state, new_state);
6836 6800 kmem_free(conn_ev, sizeof (daplka_evd_event_t));
6837 6801 return (IBT_CM_ACCEPT);
6838 6802 }
6839 6803 bcopy(pr_data, conn_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6840 6804 }
6841 6805 conn_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
6842 6806
6843 6807 D2("service_conn_est: enqueue event(%p) evdp(%p)\n",
6844 6808 conn_ev, ep_rp->ep_conn_evd);
6845 6809
6846 6810 /*
6847 6811 * transition ep_state to CONNECTED
6848 6812 */
6849 6813 new_state = DAPLKA_EP_STATE_CONNECTED;
6850 6814 daplka_ep_set_state(ep_rp, old_state, new_state);
6851 6815
6852 6816 /*
6853 6817 * enqueue event onto the conn_evd owned by ep_rp
6854 6818 */
6855 6819 daplka_evd_wakeup(ep_rp->ep_conn_evd,
6856 6820 &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);
6857 6821
6858 6822 return (IBT_CM_ACCEPT);
6859 6823 }
6860 6824
6861 6825 /*
6862 6826 * processes the FAILURE event
6863 6827 */
6864 6828 /* ARGSUSED */
6865 6829 static ibt_cm_status_t
6866 6830 daplka_cm_service_event_failure(daplka_sp_resource_t *sp_rp,
6867 6831 ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args, void *priv_data,
6868 6832 ibt_priv_data_len_t len)
6869 6833 {
6870 6834 daplka_evd_event_t *disc_ev;
6871 6835 daplka_ep_resource_t *ep_rp;
6872 6836 uint32_t old_state, new_state;
6873 6837 ibt_rc_chan_query_attr_t chan_attrs;
6874 6838 ibt_status_t status;
6875 6839
6876 6840 /*
6877 6841 * check that we still have a valid cm_channel before continuing
6878 6842 */
6879 6843 if (event->cm_channel == NULL) {
6880 6844 DERR("serice_event_failure: event->cm_channel == NULL\n");
6881 6845 return (IBT_CM_ACCEPT);
6882 6846 }
6883 6847 ep_rp = (daplka_ep_resource_t *)
6884 6848 ibt_get_chan_private(event->cm_channel);
6885 6849 if (ep_rp == NULL) {
6886 6850 DERR("service_event_failure: ep_rp == NULL\n");
6887 6851 return (IBT_CM_ACCEPT);
6888 6852 }
6889 6853
6890 6854 /*
6891 6855 * verify that ep_state is ACCEPTING or DISCONNECTING. if it
6892 6856 * is not in either state, return without generating an event.
6893 6857 */
6894 6858 new_state = old_state = daplka_ep_get_state(ep_rp);
6895 6859 if (old_state != DAPLKA_EP_STATE_ACCEPTING &&
6896 6860 old_state != DAPLKA_EP_STATE_DISCONNECTING) {
6897 6861 /*
6898 6862 * we can get here if the connection is being aborted
6899 6863 */
6900 6864 DERR("service_event_failure: conn aborted, state = %d, "
6901 6865 "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
6902 6866 (int)event->cm_event.failed.cf_code,
6903 6867 (int)event->cm_event.failed.cf_msg,
6904 6868 (int)event->cm_event.failed.cf_reason);
6905 6869
6906 6870 daplka_ep_set_state(ep_rp, old_state, new_state);
6907 6871 return (IBT_CM_ACCEPT);
6908 6872 }
6909 6873
6910 6874 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
6911 6875 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
6912 6876
6913 6877 if ((status == IBT_SUCCESS) &&
6914 6878 (chan_attrs.rc_state != IBT_STATE_ERROR)) {
6915 6879 DERR("service_event_failure: conn abort qpn %d state %d\n",
6916 6880 chan_attrs.rc_qpn, chan_attrs.rc_state);
6917 6881
6918 6882 /* explicit transition the QP to ERROR state */
6919 6883 status = ibt_flush_channel(ep_rp->ep_chan_hdl);
6920 6884 }
6921 6885
6922 6886 /*
6923 6887 * create an event
6924 6888 */
6925 6889 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6926 6890 if (disc_ev == NULL) {
6927 6891 DERR("service_event_failure: cannot alloc disc_ev\n");
6928 6892 daplka_ep_set_state(ep_rp, old_state, new_state);
6929 6893 return (IBT_CM_ACCEPT);
6930 6894 }
6931 6895
6932 6896 /*
6933 6897 * fill in the appropriate event type
6934 6898 */
6935 6899 if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
6936 6900 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
6937 6901 } else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
6938 6902 switch (event->cm_event.failed.cf_reason) {
6939 6903 case IBT_CM_INVALID_CID:
6940 6904 disc_ev->ee_cmev.ec_cm_ev_type =
6941 6905 DAPL_IB_CME_DESTINATION_REJECT;
6942 6906 break;
6943 6907 default:
6944 6908 disc_ev->ee_cmev.ec_cm_ev_type =
6945 6909 DAPL_IB_CME_LOCAL_FAILURE;
6946 6910 break;
6947 6911 }
6948 6912 } else {
6949 6913 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
6950 6914 }
6951 6915 disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6952 6916 disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6953 6917 disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6954 6918 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6955 6919 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6956 6920
6957 6921 D2("service_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
6958 6922 "cf_msg(%d) cf_reason(%d) psep(0x%llx)\n", disc_ev,
6959 6923 ep_rp->ep_conn_evd, (int)event->cm_event.failed.cf_code,
6960 6924 (int)event->cm_event.failed.cf_msg,
6961 6925 (int)event->cm_event.failed.cf_reason,
6962 6926 (longlong_t)ep_rp->ep_psep_cookie);
6963 6927
6964 6928 /*
6965 6929 * transition ep_state to DISCONNECTED
6966 6930 */
6967 6931 new_state = DAPLKA_EP_STATE_DISCONNECTED;
6968 6932 daplka_ep_set_state(ep_rp, old_state, new_state);
6969 6933
6970 6934 /*
6971 6935 * enqueue event onto the conn_evd owned by ep_rp
6972 6936 */
6973 6937 daplka_evd_wakeup(ep_rp->ep_conn_evd,
6974 6938 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
6975 6939
6976 6940 return (IBT_CM_ACCEPT);
6977 6941 }
6978 6942
6979 6943 /*
6980 6944 * this is the passive side CM handler. it gets registered
6981 6945 * when an SP resource is created in daplka_service_register.
6982 6946 */
6983 6947 static ibt_cm_status_t
6984 6948 daplka_cm_service_handler(void *cm_private, ibt_cm_event_t *event,
6985 6949 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6986 6950 {
↓ open down ↓ |
302 lines elided |
↑ open up ↑ |
6987 6951 daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)cm_private;
6988 6952
6989 6953 if (sp_rp == NULL) {
6990 6954 DERR("service_handler: sp_rp == NULL\n");
6991 6955 return (IBT_CM_NO_RESOURCE);
6992 6956 }
6993 6957 /*
6994 6958 * default is not to return priv data
6995 6959 */
6996 6960 if (ret_args != NULL) {
6997 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args))
6998 6961 ret_args->cm_ret_len = 0;
6999 6962 }
7000 6963
7001 6964 switch (event->cm_type) {
7002 6965 case IBT_CM_EVENT_REQ_RCV:
7003 6966 D2("service_handler: IBT_CM_EVENT_REQ_RCV\n");
7004 6967 return (daplka_cm_service_req(sp_rp, event, ret_args,
7005 6968 event->cm_priv_data, event->cm_priv_data_len));
7006 6969
7007 6970 case IBT_CM_EVENT_REP_RCV:
7008 6971 /* passive side should not receive this event */
7009 6972 D2("service_handler: IBT_CM_EVENT_REP_RCV\n");
7010 6973 return (IBT_CM_DEFAULT);
7011 6974
7012 6975 case IBT_CM_EVENT_CONN_CLOSED:
7013 6976 D2("service_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
7014 6977 event->cm_event.closed);
7015 6978 return (daplka_cm_service_conn_closed(sp_rp, event, ret_args,
7016 6979 priv_data, len));
7017 6980
7018 6981 case IBT_CM_EVENT_MRA_RCV:
7019 6982 /* passive side does default processing MRA event */
7020 6983 D2("service_handler: IBT_CM_EVENT_MRA_RCV\n");
7021 6984 return (IBT_CM_DEFAULT);
7022 6985
7023 6986 case IBT_CM_EVENT_CONN_EST:
7024 6987 D2("service_handler: IBT_CM_EVENT_CONN_EST\n");
7025 6988 return (daplka_cm_service_conn_est(sp_rp, event, ret_args,
7026 6989 priv_data, len));
7027 6990
7028 6991 case IBT_CM_EVENT_FAILURE:
7029 6992 D2("service_handler: IBT_CM_EVENT_FAILURE\n");
7030 6993 return (daplka_cm_service_event_failure(sp_rp, event, ret_args,
7031 6994 priv_data, len));
7032 6995 case IBT_CM_EVENT_LAP_RCV:
7033 6996 /* active side had initiated a path migration operation */
7034 6997 D2("service_handler: IBT_CM_EVENT_LAP_RCV\n");
7035 6998 return (IBT_CM_ACCEPT);
7036 6999 default:
7037 7000 DERR("service_handler: invalid event %d\n", event->cm_type);
7038 7001 break;
7039 7002 }
7040 7003 return (IBT_CM_DEFAULT);
7041 7004 }
7042 7005
7043 7006 /*
7044 7007 * Active side CM handlers
7045 7008 */
7046 7009
7047 7010 /*
7048 7011 * Processes the REP_RCV event. When the passive side accepts the
7049 7012 * connection, this handler is called. We make a copy of the private
7050 7013 * data into the ep so that it can be passed back to userland in when
7051 7014 * the CONN_EST event occurs.
7052 7015 */
7053 7016 /* ARGSUSED */
7054 7017 static ibt_cm_status_t
7055 7018 daplka_cm_rc_rep_rcv(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7056 7019 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7057 7020 {
7058 7021 void *pr_data = event->cm_priv_data;
7059 7022 ibt_priv_data_len_t pr_len = event->cm_priv_data_len;
7060 7023 uint32_t old_state, new_state;
7061 7024
7062 7025 D2("rc_rep_rcv: pr_data(0x%p), pr_len(%d)\n", pr_data,
7063 7026 (int)pr_len);
7064 7027
7065 7028 ASSERT(ep_rp != NULL);
7066 7029 new_state = old_state = daplka_ep_get_state(ep_rp);
7067 7030 if (old_state != DAPLKA_EP_STATE_CONNECTING) {
7068 7031 /*
7069 7032 * we can get here if the connection is being aborted
7070 7033 */
7071 7034 DERR("rc_rep_rcv: conn aborted, state = %d\n", old_state);
7072 7035 daplka_ep_set_state(ep_rp, old_state, new_state);
7073 7036 return (IBT_CM_NO_CHANNEL);
7074 7037 }
7075 7038
7076 7039 /*
7077 7040 * we do not cancel the timer here because the connection
7078 7041 * handshake is still in progress.
7079 7042 */
7080 7043
7081 7044 /*
7082 7045 * save the private data. it will be passed up when
7083 7046 * the connection is established.
7084 7047 */
7085 7048 if (pr_len > 0) {
7086 7049 ep_rp->ep_priv_len = pr_len;
7087 7050 bcopy(pr_data, ep_rp->ep_priv_data, (size_t)pr_len);
7088 7051 }
7089 7052
7090 7053 /*
7091 7054 * we do not actually transition to a different state.
7092 7055 * the state will change when we get a conn_est, failure,
7093 7056 * closed, or timeout event.
7094 7057 */
7095 7058 daplka_ep_set_state(ep_rp, old_state, new_state);
7096 7059 return (IBT_CM_ACCEPT);
7097 7060 }
7098 7061
7099 7062 /*
7100 7063 * Processes the CONN_CLOSED event. This gets called when either
7101 7064 * the active or passive side closes the rc channel.
7102 7065 */
7103 7066 /* ARGSUSED */
7104 7067 static ibt_cm_status_t
7105 7068 daplka_cm_rc_conn_closed(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7106 7069 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7107 7070 {
7108 7071 daplka_evd_event_t *disc_ev;
7109 7072 uint32_t old_state, new_state;
7110 7073
7111 7074 ASSERT(ep_rp != NULL);
7112 7075 old_state = new_state = daplka_ep_get_state(ep_rp);
7113 7076 if (old_state != DAPLKA_EP_STATE_CONNECTED &&
7114 7077 old_state != DAPLKA_EP_STATE_DISCONNECTING) {
7115 7078 /*
7116 7079 * we can get here if the connection is being aborted
7117 7080 */
7118 7081 D2("rc_conn_closed: conn aborted, state = %d, "
7119 7082 "closed = %d\n", old_state, (int)event->cm_event.closed);
7120 7083 daplka_ep_set_state(ep_rp, old_state, new_state);
7121 7084 return (IBT_CM_ACCEPT);
7122 7085 }
7123 7086
7124 7087 /*
7125 7088 * it's ok for the timer to fire at this point. the
7126 7089 * taskq thread that processes the timer will just wait
7127 7090 * until we are done with our state transition.
7128 7091 */
7129 7092 if (daplka_cancel_timer(ep_rp) != 0) {
7130 7093 /*
7131 7094 * daplka_cancel_timer returns -1 if the timer is
7132 7095 * being processed and 0 for all other cases.
7133 7096 * we need to reset ep_state to allow timer processing
7134 7097 * to continue.
7135 7098 */
7136 7099 DERR("rc_conn_closed: timer is being processed\n");
7137 7100 daplka_ep_set_state(ep_rp, old_state, new_state);
7138 7101 return (IBT_CM_ACCEPT);
7139 7102 }
7140 7103
7141 7104 /*
7142 7105 * create a DAPL_IB_CME_DISCONNECTED event
7143 7106 */
7144 7107 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7145 7108 if (disc_ev == NULL) {
7146 7109 DERR("rc_conn_closed: could not alloc ev\n");
7147 7110 daplka_ep_set_state(ep_rp, old_state, new_state);
7148 7111 return (IBT_CM_ACCEPT);
7149 7112 }
7150 7113
7151 7114 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
7152 7115 disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7153 7116 disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7154 7117 disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
7155 7118 disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
7156 7119 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
7157 7120
7158 7121 D2("rc_conn_closed: enqueue event(%p) evdp(%p) closed(%d)\n",
7159 7122 disc_ev, ep_rp->ep_conn_evd, (int)event->cm_event.closed);
7160 7123
7161 7124 /*
7162 7125 * transition ep_state to DISCONNECTED
7163 7126 */
7164 7127 new_state = DAPLKA_EP_STATE_DISCONNECTED;
7165 7128 daplka_ep_set_state(ep_rp, old_state, new_state);
7166 7129
7167 7130 /*
7168 7131 * enqueue event onto the conn_evd owned by ep_rp
7169 7132 */
7170 7133 daplka_evd_wakeup(ep_rp->ep_conn_evd,
7171 7134 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
7172 7135
7173 7136 return (IBT_CM_ACCEPT);
7174 7137 }
7175 7138
7176 7139 /*
7177 7140 * processes the CONN_EST event
7178 7141 */
7179 7142 /* ARGSUSED */
7180 7143 static ibt_cm_status_t
7181 7144 daplka_cm_rc_conn_est(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7182 7145 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7183 7146 {
7184 7147 daplka_evd_event_t *conn_ev;
7185 7148 uint32_t old_state, new_state;
7186 7149
7187 7150 ASSERT(ep_rp != NULL);
7188 7151 old_state = new_state = daplka_ep_get_state(ep_rp);
7189 7152 if (old_state != DAPLKA_EP_STATE_CONNECTING) {
7190 7153 /*
7191 7154 * we can get here if the connection is being aborted
7192 7155 */
7193 7156 DERR("rc_conn_est: conn aborted, state = %d\n", old_state);
7194 7157 daplka_ep_set_state(ep_rp, old_state, new_state);
7195 7158 return (IBT_CM_ACCEPT);
7196 7159 }
7197 7160
7198 7161 /*
7199 7162 * it's ok for the timer to fire at this point. the
7200 7163 * taskq thread that processes the timer will just wait
7201 7164 * until we are done with our state transition.
7202 7165 */
7203 7166 if (daplka_cancel_timer(ep_rp) != 0) {
7204 7167 /*
7205 7168 * daplka_cancel_timer returns -1 if the timer is
7206 7169 * being processed and 0 for all other cases.
7207 7170 * we need to reset ep_state to allow timer processing
7208 7171 * to continue.
7209 7172 */
7210 7173 DERR("rc_conn_est: timer is being processed\n");
7211 7174 daplka_ep_set_state(ep_rp, old_state, new_state);
7212 7175 return (IBT_CM_ACCEPT);
7213 7176 }
7214 7177
7215 7178 /*
7216 7179 * create a DAPL_IB_CME_CONNECTED event
7217 7180 */
7218 7181 conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7219 7182 if (conn_ev == NULL) {
7220 7183 DERR("rc_conn_est: could not alloc ev\n");
7221 7184 daplka_ep_set_state(ep_rp, old_state, new_state);
7222 7185 return (IBT_CM_ACCEPT);
7223 7186 }
7224 7187
7225 7188 conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
7226 7189 conn_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7227 7190 conn_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7228 7191 conn_ev->ee_cmev.ec_cm_psep_cookie = 0;
7229 7192
7230 7193 /*
7231 7194 * The private data passed back in the connection established
7232 7195 * event is what was recvd in the daplka_cm_rc_rep_rcv handler and
7233 7196 * saved in ep resource structure.
7234 7197 */
7235 7198 if (ep_rp->ep_priv_len > 0) {
7236 7199 conn_ev->ee_cmev.ec_cm_ev_priv_data =
7237 7200 kmem_zalloc(ep_rp->ep_priv_len, KM_NOSLEEP);
7238 7201
7239 7202 if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
7240 7203 DERR("rc_conn_est: could not alloc pr_data\n");
7241 7204 kmem_free(conn_ev, sizeof (daplka_evd_event_t));
7242 7205 daplka_ep_set_state(ep_rp, old_state, new_state);
7243 7206 return (IBT_CM_ACCEPT);
7244 7207 }
7245 7208 bcopy(ep_rp->ep_priv_data, conn_ev->ee_cmev.ec_cm_ev_priv_data,
7246 7209 ep_rp->ep_priv_len);
7247 7210 }
7248 7211 conn_ev->ee_cmev.ec_cm_ev_priv_data_len = ep_rp->ep_priv_len;
7249 7212
7250 7213 D2("rc_conn_est: enqueue event(%p) evdp(%p) pr_data(0x%p), "
7251 7214 "pr_len(%d)\n", conn_ev, ep_rp->ep_conn_evd,
7252 7215 conn_ev->ee_cmev.ec_cm_ev_priv_data,
7253 7216 (int)conn_ev->ee_cmev.ec_cm_ev_priv_data_len);
7254 7217
7255 7218 /*
7256 7219 * transition ep_state to CONNECTED
7257 7220 */
7258 7221 new_state = DAPLKA_EP_STATE_CONNECTED;
7259 7222 daplka_ep_set_state(ep_rp, old_state, new_state);
7260 7223
7261 7224 /*
7262 7225 * enqueue event onto the conn_evd owned by ep_rp
7263 7226 */
7264 7227 daplka_evd_wakeup(ep_rp->ep_conn_evd,
7265 7228 &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);
7266 7229
7267 7230 return (IBT_CM_ACCEPT);
7268 7231 }
7269 7232
7270 7233 /*
7271 7234 * processes the FAILURE event
7272 7235 */
7273 7236 /* ARGSUSED */
7274 7237 static ibt_cm_status_t
7275 7238 daplka_cm_rc_event_failure(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7276 7239 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7277 7240 {
7278 7241 daplka_evd_event_t *disc_ev;
7279 7242 ibt_priv_data_len_t pr_len = event->cm_priv_data_len;
7280 7243 void *pr_data = event->cm_priv_data;
7281 7244 uint32_t old_state, new_state;
7282 7245 ibt_rc_chan_query_attr_t chan_attrs;
7283 7246 ibt_status_t status;
7284 7247
7285 7248 ASSERT(ep_rp != NULL);
7286 7249 old_state = new_state = daplka_ep_get_state(ep_rp);
7287 7250 if (old_state != DAPLKA_EP_STATE_CONNECTING &&
7288 7251 old_state != DAPLKA_EP_STATE_DISCONNECTING) {
7289 7252 /*
7290 7253 * we can get here if the connection is being aborted
7291 7254 */
7292 7255 DERR("rc_event_failure: conn aborted, state = %d, "
7293 7256 "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
7294 7257 (int)event->cm_event.failed.cf_code,
7295 7258 (int)event->cm_event.failed.cf_msg,
7296 7259 (int)event->cm_event.failed.cf_reason);
7297 7260
7298 7261 daplka_ep_set_state(ep_rp, old_state, new_state);
7299 7262 return (IBT_CM_ACCEPT);
7300 7263 }
7301 7264
7302 7265 /*
7303 7266 * it's ok for the timer to fire at this point. the
7304 7267 * taskq thread that processes the timer will just wait
7305 7268 * until we are done with our state transition.
7306 7269 */
7307 7270 if (daplka_cancel_timer(ep_rp) != 0) {
7308 7271 /*
7309 7272 * daplka_cancel_timer returns -1 if the timer is
7310 7273 * being processed and 0 for all other cases.
7311 7274 * we need to reset ep_state to allow timer processing
7312 7275 * to continue.
7313 7276 */
7314 7277 DERR("rc_event_failure: timer is being processed\n");
7315 7278 daplka_ep_set_state(ep_rp, old_state, new_state);
7316 7279 return (IBT_CM_ACCEPT);
7317 7280 }
7318 7281
7319 7282 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
7320 7283 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
7321 7284
7322 7285 if ((status == IBT_SUCCESS) &&
7323 7286 (chan_attrs.rc_state != IBT_STATE_ERROR)) {
7324 7287 DERR("rc_event_failure: conn abort qpn %d state %d\n",
7325 7288 chan_attrs.rc_qpn, chan_attrs.rc_state);
7326 7289
7327 7290 /* explicit transition the QP to ERROR state */
7328 7291 status = ibt_flush_channel(ep_rp->ep_chan_hdl);
7329 7292 }
7330 7293
7331 7294 /*
7332 7295 * create an event
7333 7296 */
7334 7297 disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7335 7298 if (disc_ev == NULL) {
7336 7299 DERR("rc_event_failure: cannot alloc disc_ev\n");
7337 7300 daplka_ep_set_state(ep_rp, old_state, new_state);
7338 7301 return (IBT_CM_ACCEPT);
7339 7302 }
7340 7303
7341 7304 /*
7342 7305 * copy private data into event
7343 7306 */
7344 7307 if (pr_len > 0) {
7345 7308 disc_ev->ee_cmev.ec_cm_ev_priv_data =
7346 7309 kmem_zalloc(pr_len, KM_NOSLEEP);
7347 7310
7348 7311 if (disc_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
7349 7312 DERR("rc_event_failure: cannot alloc pr data\n");
7350 7313 kmem_free(disc_ev, sizeof (daplka_evd_event_t));
7351 7314 daplka_ep_set_state(ep_rp, old_state, new_state);
7352 7315 return (IBT_CM_ACCEPT);
7353 7316 }
7354 7317 bcopy(pr_data, disc_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
7355 7318 }
7356 7319 disc_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
7357 7320
7358 7321 /*
7359 7322 * fill in the appropriate event type
7360 7323 */
7361 7324 if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
7362 7325 switch (event->cm_event.failed.cf_reason) {
7363 7326 case IBT_CM_CONSUMER:
7364 7327 disc_ev->ee_cmev.ec_cm_ev_type =
7365 7328 DAPL_IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
7366 7329 break;
7367 7330 case IBT_CM_NO_CHAN:
7368 7331 case IBT_CM_NO_RESC:
7369 7332 disc_ev->ee_cmev.ec_cm_ev_type =
7370 7333 DAPL_IB_CME_DESTINATION_REJECT;
7371 7334 break;
7372 7335 default:
7373 7336 disc_ev->ee_cmev.ec_cm_ev_type =
7374 7337 DAPL_IB_CME_DESTINATION_REJECT;
7375 7338 break;
7376 7339 }
7377 7340 } else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
7378 7341 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
7379 7342 } else {
7380 7343 /* others we'll mark as local failure */
7381 7344 disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
7382 7345 }
7383 7346 disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7384 7347 disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7385 7348 disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
7386 7349
7387 7350 D2("rc_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
7388 7351 "cf_msg(%d) cf_reason(%d)\n", disc_ev, ep_rp->ep_conn_evd,
7389 7352 (int)event->cm_event.failed.cf_code,
7390 7353 (int)event->cm_event.failed.cf_msg,
7391 7354 (int)event->cm_event.failed.cf_reason);
7392 7355
7393 7356 /*
7394 7357 * transition ep_state to DISCONNECTED
7395 7358 */
7396 7359 new_state = DAPLKA_EP_STATE_DISCONNECTED;
7397 7360 daplka_ep_set_state(ep_rp, old_state, new_state);
7398 7361
7399 7362 /*
7400 7363 * enqueue event onto the conn_evd owned by ep_rp
7401 7364 */
7402 7365 daplka_evd_wakeup(ep_rp->ep_conn_evd,
7403 7366 &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
7404 7367
7405 7368 return (IBT_CM_ACCEPT);
7406 7369 }
7407 7370
7408 7371 /*
7409 7372 * This is the active side CM handler. It gets registered when
7410 7373 * ibt_open_rc_channel is called.
7411 7374 */
7412 7375 static ibt_cm_status_t
7413 7376 daplka_cm_rc_handler(void *cm_private, ibt_cm_event_t *event,
7414 7377 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7415 7378 {
↓ open down ↓ |
408 lines elided |
↑ open up ↑ |
7416 7379 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)cm_private;
7417 7380
7418 7381 if (ep_rp == NULL) {
7419 7382 DERR("rc_handler: ep_rp == NULL\n");
7420 7383 return (IBT_CM_NO_CHANNEL);
7421 7384 }
7422 7385 /*
7423 7386 * default is not to return priv data
7424 7387 */
7425 7388 if (ret_args != NULL) {
7426 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args))
7427 7389 ret_args->cm_ret_len = 0;
7428 7390 }
7429 7391
7430 7392 switch (event->cm_type) {
7431 7393 case IBT_CM_EVENT_REQ_RCV:
7432 7394 /* active side should not receive this event */
7433 7395 D2("rc_handler: IBT_CM_EVENT_REQ_RCV\n");
7434 7396 break;
7435 7397
7436 7398 case IBT_CM_EVENT_REP_RCV:
7437 7399 /* connection accepted by passive side */
7438 7400 D2("rc_handler: IBT_CM_EVENT_REP_RCV\n");
7439 7401 return (daplka_cm_rc_rep_rcv(ep_rp, event, ret_args,
7440 7402 priv_data, len));
7441 7403
7442 7404 case IBT_CM_EVENT_CONN_CLOSED:
7443 7405 D2("rc_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
7444 7406 event->cm_event.closed);
7445 7407 return (daplka_cm_rc_conn_closed(ep_rp, event, ret_args,
7446 7408 priv_data, len));
7447 7409
7448 7410 case IBT_CM_EVENT_MRA_RCV:
7449 7411 /* passive side does default processing MRA event */
7450 7412 D2("rc_handler: IBT_CM_EVENT_MRA_RCV\n");
7451 7413 return (IBT_CM_DEFAULT);
7452 7414
7453 7415 case IBT_CM_EVENT_CONN_EST:
7454 7416 D2("rc_handler: IBT_CM_EVENT_CONN_EST\n");
7455 7417 return (daplka_cm_rc_conn_est(ep_rp, event, ret_args,
7456 7418 priv_data, len));
7457 7419
7458 7420 case IBT_CM_EVENT_FAILURE:
7459 7421 D2("rc_handler: IBT_CM_EVENT_FAILURE\n");
7460 7422 return (daplka_cm_rc_event_failure(ep_rp, event, ret_args,
7461 7423 priv_data, len));
7462 7424
7463 7425 default:
7464 7426 D2("rc_handler: invalid event %d\n", event->cm_type);
7465 7427 break;
7466 7428 }
7467 7429 return (IBT_CM_DEFAULT);
7468 7430 }
7469 7431
7470 7432 /*
7471 7433 * creates an IA resource and inserts it into the global resource table.
7472 7434 */
7473 7435 /* ARGSUSED */
7474 7436 static int
7475 7437 daplka_ia_create(minor_t rnum, intptr_t arg, int mode,
7476 7438 cred_t *cred, int *rvalp)
7477 7439 {
7478 7440 daplka_ia_resource_t *ia_rp, *tmp_rp;
7479 7441 boolean_t inserted = B_FALSE;
7480 7442 dapl_ia_create_t args;
7481 7443 ibt_hca_hdl_t hca_hdl;
7482 7444 ibt_status_t status;
7483 7445 ib_gid_t sgid;
7484 7446 int retval;
7485 7447 ibt_hca_portinfo_t *pinfop;
7486 7448 uint_t pinfon;
7487 7449 uint_t size;
7488 7450 ibt_ar_t ar_s;
7489 7451 daplka_hca_t *hca;
7490 7452
7491 7453 retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ia_create_t),
7492 7454 mode);
7493 7455 if (retval != 0) {
7494 7456 DERR("ia_create: copyin error %d\n", retval);
7495 7457 return (EFAULT);
7496 7458 }
7497 7459 if (args.ia_version != DAPL_IF_VERSION) {
7498 7460 DERR("ia_create: invalid version %d, expected version %d\n",
7499 7461 args.ia_version, DAPL_IF_VERSION);
7500 7462 return (EINVAL);
7501 7463 }
7502 7464
7503 7465 /*
7504 7466 * find the hca with the matching guid
7505 7467 */
7506 7468 mutex_enter(&daplka_dev->daplka_mutex);
7507 7469 for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
7508 7470 hca = hca->hca_next) {
7509 7471 if (hca->hca_guid == args.ia_guid) {
7510 7472 DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca);
7511 7473 break;
7512 7474 }
7513 7475 }
7514 7476 mutex_exit(&daplka_dev->daplka_mutex);
7515 7477
7516 7478 if (hca == NULL) {
7517 7479 DERR("ia_create: guid 0x%016llx not found\n",
7518 7480 (longlong_t)args.ia_guid);
7519 7481 return (EINVAL);
7520 7482 }
7521 7483
7522 7484 /*
7523 7485 * check whether port number is valid and whether it is up
7524 7486 */
7525 7487 if (args.ia_port > hca->hca_nports) {
7526 7488 DERR("ia_create: invalid hca_port %d\n", args.ia_port);
7527 7489 DAPLKA_RELE_HCA(daplka_dev, hca);
7528 7490 return (EINVAL);
7529 7491 }
7530 7492 hca_hdl = hca->hca_hdl;
7531 7493 if (hca_hdl == NULL) {
7532 7494 DERR("ia_create: hca_hdl == NULL\n");
7533 7495 DAPLKA_RELE_HCA(daplka_dev, hca);
7534 7496 return (EINVAL);
7535 7497 }
7536 7498 status = ibt_query_hca_ports(hca_hdl, (uint8_t)args.ia_port,
7537 7499 &pinfop, &pinfon, &size);
↓ open down ↓ |
101 lines elided |
↑ open up ↑ |
7538 7500 if (status != IBT_SUCCESS) {
7539 7501 DERR("ia_create: ibt_query_hca_ports returned %d\n", status);
7540 7502 *rvalp = (int)status;
7541 7503 DAPLKA_RELE_HCA(daplka_dev, hca);
7542 7504 return (0);
7543 7505 }
7544 7506 sgid = pinfop->p_sgid_tbl[0];
7545 7507 ibt_free_portinfo(pinfop, size);
7546 7508
7547 7509 ia_rp = kmem_zalloc(sizeof (daplka_ia_resource_t), daplka_km_flags);
7548 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp))
7549 7510 DAPLKA_RS_INIT(ia_rp, DAPL_TYPE_IA, rnum, daplka_ia_destroy);
7550 7511
7551 7512 mutex_init(&ia_rp->ia_lock, NULL, MUTEX_DRIVER, NULL);
7552 7513 cv_init(&ia_rp->ia_cv, NULL, CV_DRIVER, NULL);
7553 7514 ia_rp->ia_hca_hdl = hca_hdl;
7554 7515 ia_rp->ia_hca_sgid = sgid;
7555 7516 ia_rp->ia_hca = hca;
7556 7517 ia_rp->ia_port_num = args.ia_port;
7557 7518 ia_rp->ia_port_pkey = args.ia_pkey;
7558 7519 ia_rp->ia_pid = ddi_get_pid();
7559 7520 ia_rp->ia_async_evd_hkeys = NULL;
7560 7521 ia_rp->ia_ar_registered = B_FALSE;
7561 7522 bcopy(args.ia_sadata, ia_rp->ia_sadata, DAPL_ATS_NBYTES);
7562 7523
7563 7524 /* register Address Record */
7564 7525 ar_s.ar_gid = ia_rp->ia_hca_sgid;
7565 7526 ar_s.ar_pkey = ia_rp->ia_port_pkey;
7566 7527 bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
7567 7528 #define UC(b) ar_s.ar_data[(b)]
7568 7529 D3("daplka_ia_create: SA[8] %d.%d.%d.%d\n",
7569 7530 UC(8), UC(9), UC(10), UC(11));
7570 7531 D3("daplka_ia_create: SA[12] %d.%d.%d.%d\n",
7571 7532 UC(12), UC(13), UC(14), UC(15));
7572 7533 retval = ibt_register_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
7573 7534 if (retval != IBT_SUCCESS) {
7574 7535 DERR("ia_create: failed to register Address Record.\n");
7575 7536 retval = EINVAL;
7576 7537 goto cleanup;
7577 7538 }
7578 7539 ia_rp->ia_ar_registered = B_TRUE;
7579 7540
7580 7541 /*
7581 7542 * create hash tables for all object types
7582 7543 */
7583 7544 retval = daplka_hash_create(&ia_rp->ia_ep_htbl, DAPLKA_EP_HTBL_SZ,
7584 7545 daplka_hash_ep_free, daplka_hash_generic_lookup);
7585 7546 if (retval != 0) {
7586 7547 DERR("ia_create: cannot create ep hash table\n");
7587 7548 goto cleanup;
7588 7549 }
7589 7550 retval = daplka_hash_create(&ia_rp->ia_mr_htbl, DAPLKA_MR_HTBL_SZ,
7590 7551 daplka_hash_mr_free, daplka_hash_generic_lookup);
7591 7552 if (retval != 0) {
7592 7553 DERR("ia_create: cannot create mr hash table\n");
7593 7554 goto cleanup;
7594 7555 }
7595 7556 retval = daplka_hash_create(&ia_rp->ia_mw_htbl, DAPLKA_MW_HTBL_SZ,
7596 7557 daplka_hash_mw_free, daplka_hash_generic_lookup);
7597 7558 if (retval != 0) {
7598 7559 DERR("ia_create: cannot create mw hash table\n");
7599 7560 goto cleanup;
7600 7561 }
7601 7562 retval = daplka_hash_create(&ia_rp->ia_pd_htbl, DAPLKA_PD_HTBL_SZ,
7602 7563 daplka_hash_pd_free, daplka_hash_generic_lookup);
7603 7564 if (retval != 0) {
7604 7565 DERR("ia_create: cannot create pd hash table\n");
7605 7566 goto cleanup;
7606 7567 }
7607 7568 retval = daplka_hash_create(&ia_rp->ia_evd_htbl, DAPLKA_EVD_HTBL_SZ,
7608 7569 daplka_hash_evd_free, daplka_hash_generic_lookup);
7609 7570 if (retval != 0) {
7610 7571 DERR("ia_create: cannot create evd hash table\n");
7611 7572 goto cleanup;
7612 7573 }
7613 7574 retval = daplka_hash_create(&ia_rp->ia_cno_htbl, DAPLKA_CNO_HTBL_SZ,
7614 7575 daplka_hash_cno_free, daplka_hash_generic_lookup);
7615 7576 if (retval != 0) {
7616 7577 DERR("ia_create: cannot create cno hash table\n");
7617 7578 goto cleanup;
7618 7579 }
7619 7580 retval = daplka_hash_create(&ia_rp->ia_sp_htbl, DAPLKA_SP_HTBL_SZ,
7620 7581 daplka_hash_sp_free, daplka_hash_generic_lookup);
7621 7582 if (retval != 0) {
7622 7583 DERR("ia_create: cannot create sp hash table\n");
7623 7584 goto cleanup;
7624 7585 }
7625 7586 retval = daplka_hash_create(&ia_rp->ia_srq_htbl, DAPLKA_SRQ_HTBL_SZ,
7626 7587 daplka_hash_srq_free, daplka_hash_generic_lookup);
7627 7588 if (retval != 0) {
7628 7589 DERR("ia_create: cannot create srq hash table\n");
7629 7590 goto cleanup;
↓ open down ↓ |
71 lines elided |
↑ open up ↑ |
7630 7591 }
7631 7592 /*
7632 7593 * insert ia_rp into the global resource table
7633 7594 */
7634 7595 retval = daplka_resource_insert(rnum, (daplka_resource_t *)ia_rp);
7635 7596 if (retval != 0) {
7636 7597 DERR("ia_create: cannot insert resource\n");
7637 7598 goto cleanup;
7638 7599 }
7639 7600 inserted = B_TRUE;
7640 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ia_rp))
7641 7601
7642 7602 args.ia_resnum = rnum;
7643 7603 retval = copyout(&args, (void *)arg, sizeof (dapl_ia_create_t));
7644 7604 if (retval != 0) {
7645 7605 DERR("ia_create: copyout error %d\n", retval);
7646 7606 retval = EFAULT;
7647 7607 goto cleanup;
7648 7608 }
7649 7609 return (0);
7650 7610
7651 7611 cleanup:;
7652 7612 if (inserted) {
7653 7613 tmp_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);
7654 7614 if (tmp_rp != ia_rp) {
7655 7615 /*
7656 7616 * we can return here because another thread must
7657 7617 * have freed up the resource
7658 7618 */
7659 7619 DERR("ia_create: cannot remove resource\n");
7660 7620 return (retval);
7661 7621 }
7662 7622 }
7663 7623 DAPLKA_RS_UNREF(ia_rp);
7664 7624 return (retval);
7665 7625 }
7666 7626
7667 7627 /*
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
7668 7628 * destroys an IA resource
7669 7629 */
7670 7630 static int
7671 7631 daplka_ia_destroy(daplka_resource_t *gen_rp)
7672 7632 {
7673 7633 daplka_ia_resource_t *ia_rp = (daplka_ia_resource_t *)gen_rp;
7674 7634 daplka_async_evd_hkey_t *hkp;
7675 7635 int cnt;
7676 7636 ibt_ar_t ar_s;
7677 7637
7678 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp))
7679 7638 D3("ia_destroy: entering, ia_rp 0x%p\n", ia_rp);
7680 7639
7681 7640 /* deregister Address Record */
7682 7641 if (ia_rp->ia_ar_registered) {
7683 7642 ar_s.ar_gid = ia_rp->ia_hca_sgid;
7684 7643 ar_s.ar_pkey = ia_rp->ia_port_pkey;
7685 7644 bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
7686 7645 (void) ibt_deregister_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
7687 7646 ia_rp->ia_ar_registered = B_FALSE;
7688 7647 }
7689 7648
7690 7649 /*
7691 7650 * destroy hash tables. make sure resources are
7692 7651 * destroyed in the correct order.
7693 7652 */
7694 7653 daplka_hash_destroy(&ia_rp->ia_mw_htbl);
7695 7654 daplka_hash_destroy(&ia_rp->ia_mr_htbl);
7696 7655 daplka_hash_destroy(&ia_rp->ia_ep_htbl);
7697 7656 daplka_hash_destroy(&ia_rp->ia_srq_htbl);
7698 7657 daplka_hash_destroy(&ia_rp->ia_evd_htbl);
7699 7658 daplka_hash_destroy(&ia_rp->ia_cno_htbl);
7700 7659 daplka_hash_destroy(&ia_rp->ia_pd_htbl);
7701 7660 daplka_hash_destroy(&ia_rp->ia_sp_htbl);
7702 7661
7703 7662 /*
7704 7663 * free the async evd list
7705 7664 */
7706 7665 cnt = 0;
7707 7666 hkp = ia_rp->ia_async_evd_hkeys;
7708 7667 while (hkp != NULL) {
7709 7668 daplka_async_evd_hkey_t *free_hkp;
7710 7669
7711 7670 cnt++;
7712 7671 free_hkp = hkp;
7713 7672 hkp = hkp->aeh_next;
7714 7673 kmem_free(free_hkp, sizeof (*free_hkp));
7715 7674 }
7716 7675 if (cnt > 0) {
7717 7676 D3("ia_destroy: freed %d hkeys\n", cnt);
7718 7677 }
7719 7678 mutex_destroy(&ia_rp->ia_lock);
7720 7679 cv_destroy(&ia_rp->ia_cv);
7721 7680 ia_rp->ia_hca_hdl = NULL;
7722 7681
7723 7682 DAPLKA_RS_FINI(ia_rp);
7724 7683
7725 7684 if (ia_rp->ia_hca)
7726 7685 DAPLKA_RELE_HCA(daplka_dev, ia_rp->ia_hca);
7727 7686
7728 7687 kmem_free(ia_rp, sizeof (daplka_ia_resource_t));
7729 7688 D3("ia_destroy: exiting, ia_rp 0x%p\n", ia_rp);
7730 7689 return (0);
7731 7690 }
7732 7691
7733 7692 static void
7734 7693 daplka_async_event_create(ibt_async_code_t code, ibt_async_event_t *event,
7735 7694 uint64_t cookie, daplka_ia_resource_t *ia_rp)
7736 7695 {
7737 7696 daplka_evd_event_t *evp;
7738 7697 daplka_evd_resource_t *async_evd;
7739 7698 daplka_async_evd_hkey_t *curr;
7740 7699
7741 7700 mutex_enter(&ia_rp->ia_lock);
7742 7701 curr = ia_rp->ia_async_evd_hkeys;
7743 7702 while (curr != NULL) {
7744 7703 /*
7745 7704 * Note: this allocation does not zero out the buffer
7746 7705 * since we init all the fields.
7747 7706 */
7748 7707 evp = kmem_alloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7749 7708 if (evp == NULL) {
7750 7709 DERR("async_event_enqueue: event alloc failed"
7751 7710 "!found\n", ia_rp, curr->aeh_evd_hkey);
7752 7711 curr = curr->aeh_next;
7753 7712 continue;
7754 7713 }
7755 7714 evp->ee_next = NULL;
7756 7715 evp->ee_aev.ibae_type = code;
7757 7716 evp->ee_aev.ibae_hca_guid = event->ev_hca_guid;
7758 7717 evp->ee_aev.ibae_cookie = cookie;
7759 7718 evp->ee_aev.ibae_port = event->ev_port;
7760 7719
7761 7720 /*
7762 7721 * Lookup the async evd corresponding to this ia and enqueue
7763 7722 * evp and wakeup any waiter.
7764 7723 */
7765 7724 async_evd = (daplka_evd_resource_t *)
7766 7725 daplka_hash_lookup(&ia_rp->ia_evd_htbl, curr->aeh_evd_hkey);
7767 7726 if (async_evd == NULL) { /* async evd is being freed */
7768 7727 DERR("async_event_enqueue: ia_rp(%p) asycn_evd %llx "
7769 7728 "!found\n", ia_rp, (longlong_t)curr->aeh_evd_hkey);
7770 7729 kmem_free(evp, sizeof (daplka_evd_event_t));
7771 7730 curr = curr->aeh_next;
7772 7731 continue;
7773 7732 }
7774 7733 daplka_evd_wakeup(async_evd, &async_evd->evd_async_events, evp);
7775 7734
7776 7735 /* decrement refcnt on async_evd */
7777 7736 DAPLKA_RS_UNREF(async_evd);
7778 7737 curr = curr->aeh_next;
7779 7738 }
7780 7739 mutex_exit(&ia_rp->ia_lock);
7781 7740 }
7782 7741 /*
7783 7742 * This routine is called in kernel context
7784 7743 */
7785 7744
7786 7745 /* ARGSUSED */
7787 7746 static void
7788 7747 daplka_rc_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7789 7748 ibt_async_code_t code, ibt_async_event_t *event)
7790 7749 {
7791 7750 daplka_ep_resource_t *epp;
7792 7751 daplka_ia_resource_t *ia_rp;
7793 7752 minor_t ia_rnum;
7794 7753
7795 7754 if (event->ev_chan_hdl == NULL) {
7796 7755 DERR("daplka_rc_async_handler: ev_chan_hdl is NULL\n");
7797 7756 return;
7798 7757 }
7799 7758
7800 7759 mutex_enter(&daplka_dev->daplka_mutex);
7801 7760 epp = ibt_get_chan_private(event->ev_chan_hdl);
7802 7761 if (epp == NULL) {
7803 7762 mutex_exit(&daplka_dev->daplka_mutex);
7804 7763 DERR("daplka_rc_async_handler: chan_private is NULL\n");
7805 7764 return;
7806 7765 }
7807 7766
7808 7767 /* grab a reference to this ep */
7809 7768 DAPLKA_RS_REF(epp);
7810 7769 mutex_exit(&daplka_dev->daplka_mutex);
7811 7770
7812 7771 /*
7813 7772 * The endpoint resource has the resource number corresponding to
7814 7773 * the IA resource. Use that to lookup the ia resource entry
7815 7774 */
7816 7775 ia_rnum = DAPLKA_RS_RNUM(epp);
7817 7776 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
7818 7777 if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
7819 7778 D2("daplka_rc_async_handler: resource (%d) not found\n",
7820 7779 ia_rnum);
7821 7780 DAPLKA_RS_UNREF(epp);
7822 7781 return;
7823 7782 }
7824 7783
7825 7784 /*
7826 7785 * Create an async event and chain it to the async evd
7827 7786 */
7828 7787 daplka_async_event_create(code, event, epp->ep_cookie, ia_rp);
7829 7788
7830 7789 DAPLKA_RS_UNREF(ia_rp);
7831 7790 DAPLKA_RS_UNREF(epp);
7832 7791 }
7833 7792
7834 7793 /*
7835 7794 * This routine is called in kernel context
7836 7795 */
7837 7796
7838 7797 /* ARGSUSED */
7839 7798 static void
7840 7799 daplka_cq_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7841 7800 ibt_async_code_t code, ibt_async_event_t *event)
7842 7801 {
7843 7802 daplka_evd_resource_t *evdp;
7844 7803 daplka_ia_resource_t *ia_rp;
7845 7804 minor_t ia_rnum;
7846 7805
7847 7806 if (event->ev_cq_hdl == NULL)
7848 7807 return;
7849 7808
7850 7809 mutex_enter(&daplka_dev->daplka_mutex);
7851 7810 evdp = ibt_get_cq_private(event->ev_cq_hdl);
7852 7811 if (evdp == NULL) {
7853 7812 mutex_exit(&daplka_dev->daplka_mutex);
7854 7813 DERR("daplka_cq_async_handler: get cq private(%p) failed\n",
7855 7814 event->ev_cq_hdl);
7856 7815 return;
7857 7816 }
7858 7817 /* grab a reference to this evd resource */
7859 7818 DAPLKA_RS_REF(evdp);
7860 7819 mutex_exit(&daplka_dev->daplka_mutex);
7861 7820
7862 7821 /*
7863 7822 * The endpoint resource has the resource number corresponding to
7864 7823 * the IA resource. Use that to lookup the ia resource entry
7865 7824 */
7866 7825 ia_rnum = DAPLKA_RS_RNUM(evdp);
7867 7826 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
7868 7827 if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
7869 7828 DERR("daplka_cq_async_handler: resource (%d) not found\n",
7870 7829 ia_rnum);
7871 7830 DAPLKA_RS_UNREF(evdp);
7872 7831 return;
7873 7832 }
7874 7833
7875 7834 /*
7876 7835 * Create an async event and chain it to the async evd
7877 7836 */
7878 7837 daplka_async_event_create(code, event, evdp->evd_cookie, ia_rp);
7879 7838
7880 7839 /* release all the refcount that were acquired */
7881 7840 DAPLKA_RS_UNREF(ia_rp);
7882 7841 DAPLKA_RS_UNREF(evdp);
7883 7842 }
7884 7843
7885 7844 /*
7886 7845 * This routine is called in kernel context, handles unaffiliated async errors
7887 7846 */
7888 7847
7889 7848 /* ARGSUSED */
7890 7849 static void
7891 7850 daplka_un_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7892 7851 ibt_async_code_t code, ibt_async_event_t *event)
7893 7852 {
7894 7853 int i, j;
7895 7854 daplka_resource_blk_t *blk;
7896 7855 daplka_resource_t *rp;
7897 7856 daplka_ia_resource_t *ia_rp;
7898 7857
7899 7858 /*
7900 7859 * Walk the resource table looking for an ia that matches the
7901 7860 * hca_hdl.
7902 7861 */
7903 7862 rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
7904 7863 for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
7905 7864 blk = daplka_resource.daplka_rc_root[i];
7906 7865 if (blk == NULL)
7907 7866 continue;
7908 7867 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
7909 7868 rp = blk->daplka_rcblk_blks[j];
7910 7869 if ((rp == NULL) ||
7911 7870 ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
7912 7871 (rp->rs_type != DAPL_TYPE_IA)) {
7913 7872 continue;
7914 7873 }
7915 7874 /*
7916 7875 * rp is an IA resource check if it belongs
7917 7876 * to the hca/port for which we got the event
7918 7877 */
7919 7878 ia_rp = (daplka_ia_resource_t *)rp;
7920 7879 DAPLKA_RS_REF(ia_rp);
7921 7880 if ((hca_hdl == ia_rp->ia_hca_hdl) &&
7922 7881 (event->ev_port == ia_rp->ia_port_num)) {
7923 7882 /*
7924 7883 * walk the ep hash table. Acquire a
7925 7884 * reader lock. NULL dgid indicates
7926 7885 * local port up event.
7927 7886 */
7928 7887 daplka_hash_walk(&ia_rp->ia_ep_htbl,
7929 7888 daplka_ep_failback, NULL, RW_READER);
7930 7889 }
7931 7890 DAPLKA_RS_UNREF(ia_rp);
7932 7891 }
7933 7892 }
7934 7893 rw_exit(&daplka_resource.daplka_rct_lock);
7935 7894 }
7936 7895
7937 7896 static int
7938 7897 daplka_handle_hca_detach_event(ibt_async_event_t *event)
7939 7898 {
7940 7899 daplka_hca_t *hca;
7941 7900
7942 7901 /*
7943 7902 * find the hca with the matching guid
7944 7903 */
7945 7904 mutex_enter(&daplka_dev->daplka_mutex);
7946 7905 for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
7947 7906 hca = hca->hca_next) {
7948 7907 if (hca->hca_guid == event->ev_hca_guid) {
7949 7908 if (DAPLKA_HCA_BUSY(hca)) {
7950 7909 mutex_exit(&daplka_dev->daplka_mutex);
7951 7910 return (IBT_HCA_RESOURCES_NOT_FREED);
7952 7911 }
7953 7912 daplka_dequeue_hca(daplka_dev, hca);
7954 7913 break;
7955 7914 }
7956 7915 }
7957 7916 mutex_exit(&daplka_dev->daplka_mutex);
7958 7917
7959 7918 if (hca == NULL)
7960 7919 return (IBT_FAILURE);
7961 7920
7962 7921 return (daplka_fini_hca(daplka_dev, hca));
7963 7922 }
7964 7923
7965 7924 /*
7966 7925 * This routine is called in kernel context
7967 7926 */
7968 7927 static void
7969 7928 daplka_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7970 7929 ibt_async_code_t code, ibt_async_event_t *event)
7971 7930 {
7972 7931 switch (code) {
7973 7932 case IBT_ERROR_CATASTROPHIC_CHAN:
7974 7933 case IBT_ERROR_INVALID_REQUEST_CHAN:
7975 7934 case IBT_ERROR_ACCESS_VIOLATION_CHAN:
7976 7935 case IBT_ERROR_PATH_MIGRATE_REQ:
7977 7936 D2("daplka_async_handler(): Channel affiliated=0x%x\n", code);
7978 7937 /* These events are affiliated with a the RC channel */
7979 7938 daplka_rc_async_handler(clnt_private, hca_hdl, code, event);
7980 7939 break;
7981 7940 case IBT_ERROR_CQ:
7982 7941 /* This event is affiliated with a the CQ */
7983 7942 D2("daplka_async_handler(): IBT_ERROR_CQ\n");
7984 7943 daplka_cq_async_handler(clnt_private, hca_hdl, code, event);
7985 7944 break;
7986 7945 case IBT_ERROR_PORT_DOWN:
7987 7946 D2("daplka_async_handler(): IBT_PORT_DOWN\n");
7988 7947 break;
7989 7948 case IBT_EVENT_PORT_UP:
7990 7949 D2("daplka_async_handler(): IBT_PORT_UP\n");
7991 7950 if (daplka_apm) {
7992 7951 daplka_un_async_handler(clnt_private, hca_hdl, code,
7993 7952 event);
7994 7953 }
7995 7954 break;
7996 7955 case IBT_HCA_ATTACH_EVENT:
7997 7956 /*
7998 7957 * NOTE: In some error recovery paths, it is possible to
7999 7958 * receive IBT_HCA_ATTACH_EVENTs on already known HCAs.
8000 7959 */
8001 7960 D2("daplka_async_handler(): IBT_HCA_ATTACH\n");
8002 7961 (void) daplka_init_hca(daplka_dev, event->ev_hca_guid);
8003 7962 break;
8004 7963 case IBT_HCA_DETACH_EVENT:
8005 7964 D2("daplka_async_handler(): IBT_HCA_DETACH\n");
8006 7965 /* Free all hca resources and close the HCA. */
8007 7966 (void) daplka_handle_hca_detach_event(event);
8008 7967 break;
8009 7968 case IBT_EVENT_PATH_MIGRATED:
8010 7969 /* This event is affiliated with APM */
8011 7970 D2("daplka_async_handler(): IBT_PATH_MIGRATED.\n");
8012 7971 break;
8013 7972 default:
8014 7973 D2("daplka_async_handler(): unhandled code = 0x%x\n", code);
8015 7974 break;
8016 7975 }
8017 7976 }
8018 7977
8019 7978 /*
8020 7979 * This routine is called in kernel context related to Subnet events
8021 7980 */
8022 7981 /*ARGSUSED*/
8023 7982 static void
8024 7983 daplka_sm_notice_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t code,
8025 7984 ibt_subnet_event_t *event)
8026 7985 {
8027 7986 ib_gid_t *sgid = &gid;
8028 7987 ib_gid_t *dgid;
8029 7988
8030 7989 dgid = &event->sm_notice_gid;
8031 7990 switch (code) {
8032 7991 case IBT_SM_EVENT_GID_AVAIL:
8033 7992 /* This event is affiliated with remote port up */
8034 7993 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_AVAIL\n");
8035 7994 if (daplka_apm)
8036 7995 daplka_sm_gid_avail(sgid, dgid);
8037 7996 return;
8038 7997 case IBT_SM_EVENT_GID_UNAVAIL:
8039 7998 /* This event is affiliated with remote port down */
8040 7999 D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_UNAVAIL\n");
8041 8000 return;
8042 8001 default:
8043 8002 D2("daplka_sm_notice_handler(): unhandled IBT_SM_EVENT_[%d]\n",
8044 8003 code);
8045 8004 return;
8046 8005 }
8047 8006 }
8048 8007
8049 8008 /*
8050 8009 * This routine is called in kernel context, handles Subnet GID avail events
8051 8010 * which correspond to remote port up. Setting up alternate path or path
8052 8011 * migration (failback) has to be initiated from the active side of the
8053 8012 * original connect.
8054 8013 */
8055 8014 static void
8056 8015 daplka_sm_gid_avail(ib_gid_t *sgid, ib_gid_t *dgid)
8057 8016 {
8058 8017 int i, j;
8059 8018 daplka_resource_blk_t *blk;
8060 8019 daplka_resource_t *rp;
8061 8020 daplka_ia_resource_t *ia_rp;
8062 8021
8063 8022 D2("daplka_sm_gid_avail: sgid=%llx:%llx dgid=%llx:%llx\n",
8064 8023 (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid,
8065 8024 (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);
8066 8025
8067 8026 /*
8068 8027 * Walk the resource table looking for an ia that matches the sgid
8069 8028 */
8070 8029 rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
8071 8030 for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
8072 8031 blk = daplka_resource.daplka_rc_root[i];
8073 8032 if (blk == NULL)
8074 8033 continue;
8075 8034 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
8076 8035 rp = blk->daplka_rcblk_blks[j];
8077 8036 if ((rp == NULL) ||
8078 8037 ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
8079 8038 (rp->rs_type != DAPL_TYPE_IA)) {
8080 8039 continue;
8081 8040 }
8082 8041 /*
8083 8042 * rp is an IA resource check if its gid
8084 8043 * matches with the calling sgid
8085 8044 */
8086 8045 ia_rp = (daplka_ia_resource_t *)rp;
8087 8046 DAPLKA_RS_REF(ia_rp);
8088 8047 if ((sgid->gid_prefix ==
8089 8048 ia_rp->ia_hca_sgid.gid_prefix) &&
8090 8049 (sgid->gid_guid == ia_rp->ia_hca_sgid.gid_guid)) {
8091 8050 /*
8092 8051 * walk the ep hash table. Acquire a
8093 8052 * reader lock.
8094 8053 */
8095 8054 daplka_hash_walk(&ia_rp->ia_ep_htbl,
8096 8055 daplka_ep_failback,
8097 8056 (void *)dgid, RW_READER);
8098 8057 }
8099 8058 DAPLKA_RS_UNREF(ia_rp);
8100 8059 }
8101 8060 }
8102 8061 rw_exit(&daplka_resource.daplka_rct_lock);
8103 8062 }
8104 8063
8105 8064 /*
8106 8065 * This routine is called in kernel context to get and set an alternate path
8107 8066 */
8108 8067 static int
8109 8068 daplka_ep_altpath(daplka_ep_resource_t *ep_rp, ib_gid_t *dgid)
8110 8069 {
8111 8070 ibt_alt_path_info_t path_info;
8112 8071 ibt_alt_path_attr_t path_attr;
8113 8072 ibt_ap_returns_t ap_rets;
8114 8073 ibt_status_t status;
8115 8074
8116 8075 D2("daplka_ep_altpath : ibt_get_alt_path()\n");
8117 8076 bzero(&path_info, sizeof (ibt_alt_path_info_t));
8118 8077 bzero(&path_attr, sizeof (ibt_alt_path_attr_t));
8119 8078 if (dgid != NULL) {
8120 8079 path_attr.apa_sgid = ep_rp->ep_sgid;
8121 8080 path_attr.apa_dgid = *dgid;
8122 8081 }
8123 8082 status = ibt_get_alt_path(ep_rp->ep_chan_hdl, IBT_PATH_AVAIL,
8124 8083 &path_attr, &path_info);
8125 8084 if (status != IBT_SUCCESS) {
8126 8085 DERR("daplka_ep_altpath : ibt_get_alt_path failed %d\n",
8127 8086 status);
8128 8087 return (1);
8129 8088 }
8130 8089
8131 8090 D2("daplka_ep_altpath : ibt_set_alt_path()\n");
8132 8091 bzero(&ap_rets, sizeof (ibt_ap_returns_t));
8133 8092 status = ibt_set_alt_path(ep_rp->ep_chan_hdl, IBT_BLOCKING,
8134 8093 &path_info, NULL, 0, &ap_rets);
8135 8094 if ((status != IBT_SUCCESS) ||
8136 8095 (ap_rets.ap_status != IBT_CM_AP_LOADED)) {
8137 8096 DERR("daplka_ep_altpath : ibt_set_alt_path failed "
8138 8097 "status %d ap_status %d\n", status, ap_rets.ap_status);
8139 8098 return (1);
8140 8099 }
8141 8100 return (0);
8142 8101 }
8143 8102
8144 8103 /*
8145 8104 * This routine is called in kernel context to failback to the original path
8146 8105 */
8147 8106 static int
8148 8107 daplka_ep_failback(void *objp, void *arg)
8149 8108 {
8150 8109 daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)objp;
8151 8110 ib_gid_t *dgid;
8152 8111 ibt_status_t status;
8153 8112 ibt_rc_chan_query_attr_t chan_attrs;
8154 8113 int i;
8155 8114
8156 8115 ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
8157 8116 D2("daplka_ep_failback ep : sgid=%llx:%llx dgid=%llx:%llx\n",
8158 8117 (longlong_t)ep_rp->ep_sgid.gid_prefix,
8159 8118 (longlong_t)ep_rp->ep_sgid.gid_guid,
8160 8119 (longlong_t)ep_rp->ep_dgid.gid_prefix,
8161 8120 (longlong_t)ep_rp->ep_dgid.gid_guid);
8162 8121
8163 8122 /*
8164 8123 * daplka_ep_failback is called from daplka_hash_walk
8165 8124 * which holds the read lock on hash table to protect
8166 8125 * the endpoint resource from removal
8167 8126 */
8168 8127 mutex_enter(&ep_rp->ep_lock);
8169 8128 /* check for unconnected endpoints */
8170 8129 /* first check for ep state */
8171 8130 if (ep_rp->ep_state != DAPLKA_EP_STATE_CONNECTED) {
8172 8131 mutex_exit(&ep_rp->ep_lock);
8173 8132 D2("daplka_ep_failback : endpoints not connected\n");
8174 8133 return (0);
8175 8134 }
8176 8135
8177 8136 /* second check for gids */
8178 8137 if (((ep_rp->ep_sgid.gid_prefix == 0) &&
8179 8138 (ep_rp->ep_sgid.gid_guid == 0)) ||
8180 8139 ((ep_rp->ep_dgid.gid_prefix == 0) &&
8181 8140 (ep_rp->ep_dgid.gid_guid == 0))) {
8182 8141 mutex_exit(&ep_rp->ep_lock);
8183 8142 D2("daplka_ep_failback : skip unconnected endpoints\n");
8184 8143 return (0);
8185 8144 }
8186 8145
8187 8146 /*
8188 8147 * matching destination ep
8189 8148 * when dgid is NULL, the async event is a local port up.
8190 8149 * dgid becomes wild card, i.e. all endpoints match
8191 8150 */
8192 8151 dgid = (ib_gid_t *)arg;
8193 8152 if (dgid == NULL) {
8194 8153 /* ignore loopback ep */
8195 8154 if ((ep_rp->ep_sgid.gid_prefix == ep_rp->ep_dgid.gid_prefix) &&
8196 8155 (ep_rp->ep_sgid.gid_guid == ep_rp->ep_dgid.gid_guid)) {
8197 8156 mutex_exit(&ep_rp->ep_lock);
8198 8157 D2("daplka_ep_failback : skip loopback endpoints\n");
8199 8158 return (0);
8200 8159 }
8201 8160 } else {
8202 8161 /* matching remote ep */
8203 8162 if ((ep_rp->ep_dgid.gid_prefix != dgid->gid_prefix) ||
8204 8163 (ep_rp->ep_dgid.gid_guid != dgid->gid_guid)) {
8205 8164 mutex_exit(&ep_rp->ep_lock);
8206 8165 D2("daplka_ep_failback : unrelated endpoints\n");
8207 8166 return (0);
8208 8167 }
8209 8168 }
8210 8169
8211 8170 /* call get and set altpath with original dgid used in ep_connect */
8212 8171 if (daplka_ep_altpath(ep_rp, &ep_rp->ep_dgid)) {
8213 8172 mutex_exit(&ep_rp->ep_lock);
8214 8173 return (0);
8215 8174 }
8216 8175
8217 8176 /*
8218 8177 * wait for migration state to be ARMed
8219 8178 * e.g. a post_send msg will transit mig_state from REARM to ARM
8220 8179 */
8221 8180 for (i = 0; i < daplka_query_aft_setaltpath; i++) {
8222 8181 bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
8223 8182 status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
8224 8183 if (status != IBT_SUCCESS) {
8225 8184 mutex_exit(&ep_rp->ep_lock);
8226 8185 DERR("daplka_ep_altpath : ibt_query_rc_channel err\n");
8227 8186 return (0);
8228 8187 }
8229 8188 if (chan_attrs.rc_mig_state == IBT_STATE_ARMED)
8230 8189 break;
8231 8190 }
8232 8191
8233 8192 D2("daplka_ep_altpath : query[%d] mig_st=%d\n",
8234 8193 i, chan_attrs.rc_mig_state);
8235 8194 D2("daplka_ep_altpath : P sgid=%llx:%llx dgid=%llx:%llx\n",
8236 8195 (longlong_t)
8237 8196 chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_prefix,
8238 8197 (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_guid,
8239 8198 (longlong_t)
8240 8199 chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_prefix,
8241 8200 (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_guid);
8242 8201 D2("daplka_ep_altpath : A sgid=%llx:%llx dgid=%llx:%llx\n",
8243 8202 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_prefix,
8244 8203 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_guid,
8245 8204 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_prefix,
8246 8205 (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_guid);
8247 8206
8248 8207 /* skip failback on ARMed state not reached or env override */
8249 8208 if ((i >= daplka_query_aft_setaltpath) || (daplka_failback == 0)) {
8250 8209 mutex_exit(&ep_rp->ep_lock);
8251 8210 DERR("daplka_ep_altpath : ARMed state not reached\n");
8252 8211 return (0);
8253 8212 }
8254 8213
8255 8214 D2("daplka_ep_failback : ibt_migrate_path() to original ep\n");
8256 8215 status = ibt_migrate_path(ep_rp->ep_chan_hdl);
8257 8216 if (status != IBT_SUCCESS) {
8258 8217 mutex_exit(&ep_rp->ep_lock);
8259 8218 DERR("daplka_ep_failback : migration failed "
8260 8219 "status %d\n", status);
8261 8220 return (0);
8262 8221 }
8263 8222
8264 8223 /* call get and altpath with NULL dgid to indicate unspecified dgid */
8265 8224 (void) daplka_ep_altpath(ep_rp, NULL);
8266 8225 mutex_exit(&ep_rp->ep_lock);
8267 8226 return (0);
8268 8227 }
8269 8228
8270 8229 /*
8271 8230 * IBTF wrappers used for resource accounting
8272 8231 */
8273 8232 static ibt_status_t
8274 8233 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *ep_rp, ibt_hca_hdl_t hca_hdl,
8275 8234 ibt_chan_alloc_flags_t flags, ibt_rc_chan_alloc_args_t *args,
8276 8235 ibt_channel_hdl_t *chan_hdl_p, ibt_chan_sizes_t *sizes)
8277 8236 {
8278 8237 daplka_hca_t *hca_p;
8279 8238 uint32_t max_qps;
8280 8239 boolean_t acct_enabled;
8281 8240 ibt_status_t status;
8282 8241
8283 8242 acct_enabled = daplka_accounting_enabled;
8284 8243 hca_p = ep_rp->ep_hca;
8285 8244 max_qps = daplka_max_qp_percent * hca_p->hca_attr.hca_max_chans / 100;
8286 8245
8287 8246 if (acct_enabled) {
8288 8247 if (daplka_max_qp_percent != 0 &&
8289 8248 max_qps <= hca_p->hca_qp_count) {
8290 8249 DERR("ibt_alloc_rc_channel: resource limit exceeded "
8291 8250 "(limit %d, count %d)\n", max_qps,
8292 8251 hca_p->hca_qp_count);
8293 8252 return (IBT_INSUFF_RESOURCE);
8294 8253 }
8295 8254 DAPLKA_RS_ACCT_INC(ep_rp, 1);
8296 8255 atomic_inc_32(&hca_p->hca_qp_count);
8297 8256 }
8298 8257 status = ibt_alloc_rc_channel(hca_hdl, flags, args, chan_hdl_p, sizes);
8299 8258
8300 8259 if (status != IBT_SUCCESS && acct_enabled) {
8301 8260 DAPLKA_RS_ACCT_DEC(ep_rp, 1);
8302 8261 atomic_dec_32(&hca_p->hca_qp_count);
8303 8262 }
8304 8263 return (status);
8305 8264 }
8306 8265
8307 8266 static ibt_status_t
8308 8267 daplka_ibt_free_channel(daplka_ep_resource_t *ep_rp, ibt_channel_hdl_t chan_hdl)
8309 8268 {
8310 8269 daplka_hca_t *hca_p;
8311 8270 ibt_status_t status;
8312 8271
8313 8272 hca_p = ep_rp->ep_hca;
8314 8273
8315 8274 status = ibt_free_channel(chan_hdl);
8316 8275 if (status != IBT_SUCCESS) {
8317 8276 return (status);
8318 8277 }
8319 8278 if (DAPLKA_RS_ACCT_CHARGED(ep_rp) > 0) {
8320 8279 DAPLKA_RS_ACCT_DEC(ep_rp, 1);
8321 8280 atomic_dec_32(&hca_p->hca_qp_count);
8322 8281 }
8323 8282 return (status);
8324 8283 }
8325 8284
8326 8285 static ibt_status_t
8327 8286 daplka_ibt_alloc_cq(daplka_evd_resource_t *evd_rp, ibt_hca_hdl_t hca_hdl,
8328 8287 ibt_cq_attr_t *cq_attr, ibt_cq_hdl_t *ibt_cq_p, uint32_t *real_size)
8329 8288 {
8330 8289 daplka_hca_t *hca_p;
8331 8290 uint32_t max_cqs;
8332 8291 boolean_t acct_enabled;
8333 8292 ibt_status_t status;
8334 8293
8335 8294 acct_enabled = daplka_accounting_enabled;
8336 8295 hca_p = evd_rp->evd_hca;
8337 8296 max_cqs = daplka_max_cq_percent * hca_p->hca_attr.hca_max_cq / 100;
8338 8297
8339 8298 if (acct_enabled) {
8340 8299 if (daplka_max_cq_percent != 0 &&
8341 8300 max_cqs <= hca_p->hca_cq_count) {
8342 8301 DERR("ibt_alloc_cq: resource limit exceeded "
8343 8302 "(limit %d, count %d)\n", max_cqs,
8344 8303 hca_p->hca_cq_count);
8345 8304 return (IBT_INSUFF_RESOURCE);
8346 8305 }
8347 8306 DAPLKA_RS_ACCT_INC(evd_rp, 1);
8348 8307 atomic_inc_32(&hca_p->hca_cq_count);
8349 8308 }
8350 8309 status = ibt_alloc_cq(hca_hdl, cq_attr, ibt_cq_p, real_size);
8351 8310
8352 8311 if (status != IBT_SUCCESS && acct_enabled) {
8353 8312 DAPLKA_RS_ACCT_DEC(evd_rp, 1);
8354 8313 atomic_dec_32(&hca_p->hca_cq_count);
8355 8314 }
8356 8315 return (status);
8357 8316 }
8358 8317
8359 8318 static ibt_status_t
8360 8319 daplka_ibt_free_cq(daplka_evd_resource_t *evd_rp, ibt_cq_hdl_t cq_hdl)
8361 8320 {
8362 8321 daplka_hca_t *hca_p;
8363 8322 ibt_status_t status;
8364 8323
8365 8324 hca_p = evd_rp->evd_hca;
8366 8325
8367 8326 status = ibt_free_cq(cq_hdl);
8368 8327 if (status != IBT_SUCCESS) {
8369 8328 return (status);
8370 8329 }
8371 8330 if (DAPLKA_RS_ACCT_CHARGED(evd_rp) > 0) {
8372 8331 DAPLKA_RS_ACCT_DEC(evd_rp, 1);
8373 8332 atomic_dec_32(&hca_p->hca_cq_count);
8374 8333 }
8375 8334 return (status);
8376 8335 }
8377 8336
8378 8337 static ibt_status_t
8379 8338 daplka_ibt_alloc_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
8380 8339 ibt_pd_flags_t flags, ibt_pd_hdl_t *pd_hdl_p)
8381 8340 {
8382 8341 daplka_hca_t *hca_p;
8383 8342 uint32_t max_pds;
8384 8343 boolean_t acct_enabled;
8385 8344 ibt_status_t status;
8386 8345
8387 8346 acct_enabled = daplka_accounting_enabled;
8388 8347 hca_p = pd_rp->pd_hca;
8389 8348 max_pds = daplka_max_pd_percent * hca_p->hca_attr.hca_max_pd / 100;
8390 8349
8391 8350 if (acct_enabled) {
8392 8351 if (daplka_max_pd_percent != 0 &&
8393 8352 max_pds <= hca_p->hca_pd_count) {
8394 8353 DERR("ibt_alloc_pd: resource limit exceeded "
8395 8354 "(limit %d, count %d)\n", max_pds,
8396 8355 hca_p->hca_pd_count);
8397 8356 return (IBT_INSUFF_RESOURCE);
8398 8357 }
8399 8358 DAPLKA_RS_ACCT_INC(pd_rp, 1);
8400 8359 atomic_inc_32(&hca_p->hca_pd_count);
8401 8360 }
8402 8361 status = ibt_alloc_pd(hca_hdl, flags, pd_hdl_p);
8403 8362
8404 8363 if (status != IBT_SUCCESS && acct_enabled) {
8405 8364 DAPLKA_RS_ACCT_DEC(pd_rp, 1);
8406 8365 atomic_dec_32(&hca_p->hca_pd_count);
8407 8366 }
8408 8367 return (status);
8409 8368 }
8410 8369
8411 8370 static ibt_status_t
8412 8371 daplka_ibt_free_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
8413 8372 ibt_pd_hdl_t pd_hdl)
8414 8373 {
8415 8374 daplka_hca_t *hca_p;
8416 8375 ibt_status_t status;
8417 8376
8418 8377 hca_p = pd_rp->pd_hca;
8419 8378
8420 8379 status = ibt_free_pd(hca_hdl, pd_hdl);
8421 8380 if (status != IBT_SUCCESS) {
8422 8381 return (status);
8423 8382 }
8424 8383 if (DAPLKA_RS_ACCT_CHARGED(pd_rp) > 0) {
8425 8384 DAPLKA_RS_ACCT_DEC(pd_rp, 1);
8426 8385 atomic_dec_32(&hca_p->hca_pd_count);
8427 8386 }
8428 8387 return (status);
8429 8388 }
8430 8389
8431 8390 static ibt_status_t
8432 8391 daplka_ibt_alloc_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
8433 8392 ibt_pd_hdl_t pd_hdl, ibt_mw_flags_t flags, ibt_mw_hdl_t *mw_hdl_p,
8434 8393 ibt_rkey_t *rkey_p)
8435 8394 {
8436 8395 daplka_hca_t *hca_p;
8437 8396 uint32_t max_mws;
8438 8397 boolean_t acct_enabled;
8439 8398 ibt_status_t status;
8440 8399
8441 8400 acct_enabled = daplka_accounting_enabled;
8442 8401 hca_p = mw_rp->mw_hca;
8443 8402 max_mws = daplka_max_mw_percent * hca_p->hca_attr.hca_max_mem_win / 100;
8444 8403
8445 8404 if (acct_enabled) {
8446 8405 if (daplka_max_mw_percent != 0 &&
8447 8406 max_mws <= hca_p->hca_mw_count) {
8448 8407 DERR("ibt_alloc_mw: resource limit exceeded "
8449 8408 "(limit %d, count %d)\n", max_mws,
8450 8409 hca_p->hca_mw_count);
8451 8410 return (IBT_INSUFF_RESOURCE);
8452 8411 }
8453 8412 DAPLKA_RS_ACCT_INC(mw_rp, 1);
8454 8413 atomic_inc_32(&hca_p->hca_mw_count);
8455 8414 }
8456 8415 status = ibt_alloc_mw(hca_hdl, pd_hdl, flags, mw_hdl_p, rkey_p);
8457 8416
8458 8417 if (status != IBT_SUCCESS && acct_enabled) {
8459 8418 DAPLKA_RS_ACCT_DEC(mw_rp, 1);
8460 8419 atomic_dec_32(&hca_p->hca_mw_count);
8461 8420 }
8462 8421 return (status);
8463 8422 }
8464 8423
8465 8424 static ibt_status_t
8466 8425 daplka_ibt_free_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
8467 8426 ibt_mw_hdl_t mw_hdl)
8468 8427 {
8469 8428 daplka_hca_t *hca_p;
8470 8429 ibt_status_t status;
8471 8430
8472 8431 hca_p = mw_rp->mw_hca;
8473 8432
8474 8433 status = ibt_free_mw(hca_hdl, mw_hdl);
8475 8434 if (status != IBT_SUCCESS) {
8476 8435 return (status);
8477 8436 }
8478 8437 if (DAPLKA_RS_ACCT_CHARGED(mw_rp) > 0) {
8479 8438 DAPLKA_RS_ACCT_DEC(mw_rp, 1);
8480 8439 atomic_dec_32(&hca_p->hca_mw_count);
8481 8440 }
8482 8441 return (status);
8483 8442 }
8484 8443
8485 8444 static ibt_status_t
8486 8445 daplka_ibt_register_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
8487 8446 ibt_pd_hdl_t pd_hdl, ibt_mr_attr_t *mr_attr, ibt_mr_hdl_t *mr_hdl_p,
8488 8447 ibt_mr_desc_t *mr_desc_p)
8489 8448 {
8490 8449 daplka_hca_t *hca_p;
8491 8450 uint32_t max_mrs;
8492 8451 boolean_t acct_enabled;
8493 8452 ibt_status_t status;
8494 8453
8495 8454 acct_enabled = daplka_accounting_enabled;
8496 8455 hca_p = mr_rp->mr_hca;
8497 8456 max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;
8498 8457
8499 8458 if (acct_enabled) {
8500 8459 if (daplka_max_mr_percent != 0 &&
8501 8460 max_mrs <= hca_p->hca_mr_count) {
8502 8461 DERR("ibt_register_mr: resource limit exceeded "
8503 8462 "(limit %d, count %d)\n", max_mrs,
8504 8463 hca_p->hca_mr_count);
8505 8464 return (IBT_INSUFF_RESOURCE);
8506 8465 }
8507 8466 DAPLKA_RS_ACCT_INC(mr_rp, 1);
8508 8467 atomic_inc_32(&hca_p->hca_mr_count);
8509 8468 }
8510 8469 status = ibt_register_mr(hca_hdl, pd_hdl, mr_attr, mr_hdl_p, mr_desc_p);
8511 8470
8512 8471 if (status != IBT_SUCCESS && acct_enabled) {
8513 8472 DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8514 8473 atomic_dec_32(&hca_p->hca_mr_count);
8515 8474 }
8516 8475 return (status);
8517 8476 }
8518 8477
8519 8478 static ibt_status_t
8520 8479 daplka_ibt_register_shared_mr(daplka_mr_resource_t *mr_rp,
8521 8480 ibt_hca_hdl_t hca_hdl, ibt_mr_hdl_t mr_hdl, ibt_pd_hdl_t pd_hdl,
8522 8481 ibt_smr_attr_t *smr_attr_p, ibt_mr_hdl_t *mr_hdl_p,
8523 8482 ibt_mr_desc_t *mr_desc_p)
8524 8483 {
8525 8484 daplka_hca_t *hca_p;
8526 8485 uint32_t max_mrs;
8527 8486 boolean_t acct_enabled;
8528 8487 ibt_status_t status;
8529 8488
8530 8489 acct_enabled = daplka_accounting_enabled;
8531 8490 hca_p = mr_rp->mr_hca;
8532 8491 max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;
8533 8492
8534 8493 if (acct_enabled) {
8535 8494 if (daplka_max_mr_percent != 0 &&
8536 8495 max_mrs <= hca_p->hca_mr_count) {
8537 8496 DERR("ibt_register_shared_mr: resource limit exceeded "
8538 8497 "(limit %d, count %d)\n", max_mrs,
8539 8498 hca_p->hca_mr_count);
8540 8499 return (IBT_INSUFF_RESOURCE);
8541 8500 }
8542 8501 DAPLKA_RS_ACCT_INC(mr_rp, 1);
8543 8502 atomic_inc_32(&hca_p->hca_mr_count);
8544 8503 }
8545 8504 status = ibt_register_shared_mr(hca_hdl, mr_hdl, pd_hdl,
8546 8505 smr_attr_p, mr_hdl_p, mr_desc_p);
8547 8506
8548 8507 if (status != IBT_SUCCESS && acct_enabled) {
8549 8508 DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8550 8509 atomic_dec_32(&hca_p->hca_mr_count);
8551 8510 }
8552 8511 return (status);
8553 8512 }
8554 8513
8555 8514 static ibt_status_t
8556 8515 daplka_ibt_deregister_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
8557 8516 ibt_mr_hdl_t mr_hdl)
8558 8517 {
8559 8518 daplka_hca_t *hca_p;
8560 8519 ibt_status_t status;
8561 8520
8562 8521 hca_p = mr_rp->mr_hca;
8563 8522
8564 8523 status = ibt_deregister_mr(hca_hdl, mr_hdl);
8565 8524 if (status != IBT_SUCCESS) {
8566 8525 return (status);
8567 8526 }
8568 8527 if (DAPLKA_RS_ACCT_CHARGED(mr_rp) > 0) {
8569 8528 DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8570 8529 atomic_dec_32(&hca_p->hca_mr_count);
8571 8530 }
8572 8531 return (status);
8573 8532 }
8574 8533
8575 8534 static ibt_status_t
8576 8535 daplka_ibt_alloc_srq(daplka_srq_resource_t *srq_rp, ibt_hca_hdl_t hca_hdl,
8577 8536 ibt_srq_flags_t flags, ibt_pd_hdl_t pd, ibt_srq_sizes_t *reqsz,
8578 8537 ibt_srq_hdl_t *srq_hdl_p, ibt_srq_sizes_t *realsz)
8579 8538 {
8580 8539 daplka_hca_t *hca_p;
8581 8540 uint32_t max_srqs;
8582 8541 boolean_t acct_enabled;
8583 8542 ibt_status_t status;
8584 8543
8585 8544 acct_enabled = daplka_accounting_enabled;
8586 8545 hca_p = srq_rp->srq_hca;
8587 8546 max_srqs = daplka_max_srq_percent * hca_p->hca_attr.hca_max_srqs / 100;
8588 8547
8589 8548 if (acct_enabled) {
8590 8549 if (daplka_max_srq_percent != 0 &&
8591 8550 max_srqs <= hca_p->hca_srq_count) {
8592 8551 DERR("ibt_alloc_srq: resource limit exceeded "
8593 8552 "(limit %d, count %d)\n", max_srqs,
8594 8553 hca_p->hca_srq_count);
8595 8554 return (IBT_INSUFF_RESOURCE);
8596 8555 }
8597 8556 DAPLKA_RS_ACCT_INC(srq_rp, 1);
8598 8557 atomic_inc_32(&hca_p->hca_srq_count);
8599 8558 }
8600 8559 status = ibt_alloc_srq(hca_hdl, flags, pd, reqsz, srq_hdl_p, realsz);
8601 8560
8602 8561 if (status != IBT_SUCCESS && acct_enabled) {
8603 8562 DAPLKA_RS_ACCT_DEC(srq_rp, 1);
8604 8563 atomic_dec_32(&hca_p->hca_srq_count);
8605 8564 }
8606 8565 return (status);
8607 8566 }
8608 8567
8609 8568 static ibt_status_t
8610 8569 daplka_ibt_free_srq(daplka_srq_resource_t *srq_rp, ibt_srq_hdl_t srq_hdl)
8611 8570 {
8612 8571 daplka_hca_t *hca_p;
8613 8572 ibt_status_t status;
8614 8573
8615 8574 hca_p = srq_rp->srq_hca;
8616 8575
8617 8576 D3("ibt_free_srq: %p %p\n", srq_rp, srq_hdl);
8618 8577
8619 8578 status = ibt_free_srq(srq_hdl);
8620 8579 if (status != IBT_SUCCESS) {
8621 8580 return (status);
8622 8581 }
8623 8582 if (DAPLKA_RS_ACCT_CHARGED(srq_rp) > 0) {
8624 8583 DAPLKA_RS_ACCT_DEC(srq_rp, 1);
8625 8584 atomic_dec_32(&hca_p->hca_srq_count);
8626 8585 }
8627 8586 return (status);
8628 8587 }
8629 8588
8630 8589
8631 8590 static int
8632 8591 daplka_common_ioctl(int cmd, minor_t rnum, intptr_t arg, int mode,
8633 8592 cred_t *cred, int *rvalp)
8634 8593 {
8635 8594 int error;
8636 8595
8637 8596 switch (cmd) {
8638 8597 case DAPL_IA_CREATE:
8639 8598 error = daplka_ia_create(rnum, arg, mode, cred, rvalp);
8640 8599 break;
8641 8600
8642 8601 /* can potentially add other commands here */
8643 8602
8644 8603 default:
8645 8604 DERR("daplka_common_ioctl: cmd not supported\n");
8646 8605 error = DDI_FAILURE;
8647 8606 }
8648 8607 return (error);
8649 8608 }
8650 8609
8651 8610 static int
8652 8611 daplka_evd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8653 8612 cred_t *cred, int *rvalp)
8654 8613 {
8655 8614 int error;
8656 8615
8657 8616 switch (cmd) {
8658 8617 case DAPL_EVD_CREATE:
8659 8618 error = daplka_evd_create(rp, arg, mode, cred, rvalp);
8660 8619 break;
8661 8620
8662 8621 case DAPL_CQ_RESIZE:
8663 8622 error = daplka_cq_resize(rp, arg, mode, cred, rvalp);
8664 8623 break;
8665 8624
8666 8625 case DAPL_EVENT_POLL:
8667 8626 error = daplka_event_poll(rp, arg, mode, cred, rvalp);
8668 8627 break;
8669 8628
8670 8629 case DAPL_EVENT_WAKEUP:
8671 8630 error = daplka_event_wakeup(rp, arg, mode, cred, rvalp);
8672 8631 break;
8673 8632
8674 8633 case DAPL_EVD_MODIFY_CNO:
8675 8634 error = daplka_evd_modify_cno(rp, arg, mode, cred, rvalp);
8676 8635 break;
8677 8636
8678 8637 case DAPL_EVD_FREE:
8679 8638 error = daplka_evd_free(rp, arg, mode, cred, rvalp);
8680 8639 break;
8681 8640
8682 8641 default:
8683 8642 DERR("daplka_evd_ioctl: cmd not supported\n");
8684 8643 error = DDI_FAILURE;
8685 8644 }
8686 8645 return (error);
8687 8646 }
8688 8647
8689 8648 static int
8690 8649 daplka_ep_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8691 8650 cred_t *cred, int *rvalp)
8692 8651 {
8693 8652 int error;
8694 8653
8695 8654 switch (cmd) {
8696 8655 case DAPL_EP_MODIFY:
8697 8656 error = daplka_ep_modify(rp, arg, mode, cred, rvalp);
8698 8657 break;
8699 8658
8700 8659 case DAPL_EP_FREE:
8701 8660 error = daplka_ep_free(rp, arg, mode, cred, rvalp);
8702 8661 break;
8703 8662
8704 8663 case DAPL_EP_CONNECT:
8705 8664 error = daplka_ep_connect(rp, arg, mode, cred, rvalp);
8706 8665 break;
8707 8666
8708 8667 case DAPL_EP_DISCONNECT:
8709 8668 error = daplka_ep_disconnect(rp, arg, mode, cred, rvalp);
8710 8669 break;
8711 8670
8712 8671 case DAPL_EP_REINIT:
8713 8672 error = daplka_ep_reinit(rp, arg, mode, cred, rvalp);
8714 8673 break;
8715 8674
8716 8675 case DAPL_EP_CREATE:
8717 8676 error = daplka_ep_create(rp, arg, mode, cred, rvalp);
8718 8677 break;
8719 8678
8720 8679 default:
8721 8680 DERR("daplka_ep_ioctl: cmd not supported\n");
8722 8681 error = DDI_FAILURE;
8723 8682 }
8724 8683 return (error);
8725 8684 }
8726 8685
8727 8686 static int
8728 8687 daplka_mr_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8729 8688 cred_t *cred, int *rvalp)
8730 8689 {
8731 8690 int error;
8732 8691
8733 8692 switch (cmd) {
8734 8693 case DAPL_MR_REGISTER:
8735 8694 error = daplka_mr_register(rp, arg, mode, cred, rvalp);
8736 8695 break;
8737 8696
8738 8697 case DAPL_MR_REGISTER_LMR:
8739 8698 error = daplka_mr_register_lmr(rp, arg, mode, cred, rvalp);
8740 8699 break;
8741 8700
8742 8701 case DAPL_MR_REGISTER_SHARED:
8743 8702 error = daplka_mr_register_shared(rp, arg, mode, cred, rvalp);
8744 8703 break;
8745 8704
8746 8705 case DAPL_MR_DEREGISTER:
8747 8706 error = daplka_mr_deregister(rp, arg, mode, cred, rvalp);
8748 8707 break;
8749 8708
8750 8709 case DAPL_MR_SYNC:
8751 8710 error = daplka_mr_sync(rp, arg, mode, cred, rvalp);
8752 8711 break;
8753 8712
8754 8713 default:
8755 8714 DERR("daplka_mr_ioctl: cmd not supported\n");
8756 8715 error = DDI_FAILURE;
8757 8716 }
8758 8717 return (error);
8759 8718 }
8760 8719
8761 8720 static int
8762 8721 daplka_mw_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8763 8722 cred_t *cred, int *rvalp)
8764 8723 {
8765 8724 int error;
8766 8725
8767 8726 switch (cmd) {
8768 8727 case DAPL_MW_ALLOC:
8769 8728 error = daplka_mw_alloc(rp, arg, mode, cred, rvalp);
8770 8729 break;
8771 8730
8772 8731 case DAPL_MW_FREE:
8773 8732 error = daplka_mw_free(rp, arg, mode, cred, rvalp);
8774 8733 break;
8775 8734
8776 8735 default:
8777 8736 DERR("daplka_mw_ioctl: cmd not supported\n");
8778 8737 error = DDI_FAILURE;
8779 8738 }
8780 8739 return (error);
8781 8740 }
8782 8741
8783 8742 static int
8784 8743 daplka_cno_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8785 8744 cred_t *cred, int *rvalp)
8786 8745 {
8787 8746 int error;
8788 8747
8789 8748 switch (cmd) {
8790 8749 case DAPL_CNO_ALLOC:
8791 8750 error = daplka_cno_alloc(rp, arg, mode, cred, rvalp);
8792 8751 break;
8793 8752
8794 8753 case DAPL_CNO_FREE:
8795 8754 error = daplka_cno_free(rp, arg, mode, cred, rvalp);
8796 8755 break;
8797 8756
8798 8757 case DAPL_CNO_WAIT:
8799 8758 error = daplka_cno_wait(rp, arg, mode, cred, rvalp);
8800 8759 break;
8801 8760
8802 8761 default:
8803 8762 DERR("daplka_cno_ioctl: cmd not supported\n");
8804 8763 error = DDI_FAILURE;
8805 8764 }
8806 8765 return (error);
8807 8766 }
8808 8767
8809 8768 static int
8810 8769 daplka_pd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8811 8770 cred_t *cred, int *rvalp)
8812 8771 {
8813 8772 int error;
8814 8773
8815 8774 switch (cmd) {
8816 8775 case DAPL_PD_ALLOC:
8817 8776 error = daplka_pd_alloc(rp, arg, mode, cred, rvalp);
8818 8777 break;
8819 8778
8820 8779 case DAPL_PD_FREE:
8821 8780 error = daplka_pd_free(rp, arg, mode, cred, rvalp);
8822 8781 break;
8823 8782
8824 8783 default:
8825 8784 DERR("daplka_pd_ioctl: cmd not supported\n");
8826 8785 error = DDI_FAILURE;
8827 8786 }
8828 8787 return (error);
8829 8788 }
8830 8789
8831 8790 static int
8832 8791 daplka_sp_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8833 8792 cred_t *cred, int *rvalp)
8834 8793 {
8835 8794 int error;
8836 8795
8837 8796 switch (cmd) {
8838 8797 case DAPL_SERVICE_REGISTER:
8839 8798 error = daplka_service_register(rp, arg, mode, cred, rvalp);
8840 8799 break;
8841 8800
8842 8801 case DAPL_SERVICE_DEREGISTER:
8843 8802 error = daplka_service_deregister(rp, arg, mode, cred, rvalp);
8844 8803 break;
8845 8804
8846 8805 default:
8847 8806 DERR("daplka_sp_ioctl: cmd not supported\n");
8848 8807 error = DDI_FAILURE;
8849 8808 }
8850 8809 return (error);
8851 8810 }
8852 8811
8853 8812 static int
8854 8813 daplka_srq_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8855 8814 cred_t *cred, int *rvalp)
8856 8815 {
8857 8816 int error;
8858 8817
8859 8818 switch (cmd) {
8860 8819 case DAPL_SRQ_CREATE:
8861 8820 error = daplka_srq_create(rp, arg, mode, cred, rvalp);
8862 8821 break;
8863 8822
8864 8823 case DAPL_SRQ_RESIZE:
8865 8824 error = daplka_srq_resize(rp, arg, mode, cred, rvalp);
8866 8825 break;
8867 8826
8868 8827 case DAPL_SRQ_FREE:
8869 8828 error = daplka_srq_free(rp, arg, mode, cred, rvalp);
8870 8829 break;
8871 8830
8872 8831 default:
8873 8832 DERR("daplka_srq_ioctl: cmd(%d) not supported\n", cmd);
8874 8833 error = DDI_FAILURE;
8875 8834 break;
8876 8835 }
8877 8836 return (error);
8878 8837 }
8879 8838
8880 8839 static int
8881 8840 daplka_misc_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8882 8841 cred_t *cred, int *rvalp)
8883 8842 {
8884 8843 int error;
8885 8844
8886 8845 switch (cmd) {
8887 8846 case DAPL_CR_ACCEPT:
8888 8847 error = daplka_cr_accept(rp, arg, mode, cred, rvalp);
8889 8848 break;
8890 8849
8891 8850 case DAPL_CR_REJECT:
8892 8851 error = daplka_cr_reject(rp, arg, mode, cred, rvalp);
8893 8852 break;
8894 8853
8895 8854 case DAPL_IA_QUERY:
8896 8855 error = daplka_ia_query(rp, arg, mode, cred, rvalp);
8897 8856 break;
8898 8857
8899 8858 case DAPL_CR_HANDOFF:
8900 8859 error = daplka_cr_handoff(rp, arg, mode, cred, rvalp);
8901 8860 break;
8902 8861
8903 8862 default:
8904 8863 DERR("daplka_misc_ioctl: cmd not supported\n");
8905 8864 error = DDI_FAILURE;
8906 8865 }
8907 8866 return (error);
8908 8867 }
8909 8868
8910 8869 /*ARGSUSED*/
8911 8870 static int
8912 8871 daplka_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
8913 8872 int *rvalp)
8914 8873 {
8915 8874 daplka_ia_resource_t *ia_rp;
8916 8875 minor_t rnum;
8917 8876 int error = 0;
8918 8877
8919 8878 rnum = getminor(dev);
8920 8879 ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
8921 8880 if (ia_rp == NULL) {
8922 8881 DERR("ioctl: resource not found, rnum %d\n", rnum);
8923 8882 return (ENXIO);
8924 8883 }
8925 8884
8926 8885 D4("ioctl: rnum = %d, cmd = 0x%x\n", rnum, cmd);
8927 8886 if (DAPLKA_RS_RESERVED(ia_rp)) {
8928 8887 error = daplka_common_ioctl(cmd, rnum, arg, mode, cred, rvalp);
8929 8888 return (error);
8930 8889 }
8931 8890 if (DAPLKA_RS_TYPE(ia_rp) != DAPL_TYPE_IA) {
8932 8891 DERR("ioctl: invalid type %d\n", DAPLKA_RS_TYPE(ia_rp));
8933 8892 error = EINVAL;
8934 8893 goto cleanup;
8935 8894 }
8936 8895 if (ia_rp->ia_pid != ddi_get_pid()) {
8937 8896 DERR("ioctl: ia_pid %d != pid %d\n",
8938 8897 ia_rp->ia_pid, ddi_get_pid());
8939 8898 error = EINVAL;
8940 8899 goto cleanup;
8941 8900 }
8942 8901
8943 8902 switch (cmd & DAPL_TYPE_MASK) {
8944 8903 case DAPL_TYPE_EVD:
8945 8904 error = daplka_evd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8946 8905 break;
8947 8906
8948 8907 case DAPL_TYPE_EP:
8949 8908 error = daplka_ep_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8950 8909 break;
8951 8910
8952 8911 case DAPL_TYPE_MR:
8953 8912 error = daplka_mr_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8954 8913 break;
8955 8914
8956 8915 case DAPL_TYPE_MW:
8957 8916 error = daplka_mw_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8958 8917 break;
8959 8918
8960 8919 case DAPL_TYPE_PD:
8961 8920 error = daplka_pd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8962 8921 break;
8963 8922
8964 8923 case DAPL_TYPE_SP:
8965 8924 error = daplka_sp_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8966 8925 break;
8967 8926
8968 8927 case DAPL_TYPE_CNO:
8969 8928 error = daplka_cno_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8970 8929 break;
8971 8930
8972 8931 case DAPL_TYPE_MISC:
8973 8932 error = daplka_misc_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8974 8933 break;
8975 8934
8976 8935 case DAPL_TYPE_SRQ:
8977 8936 error = daplka_srq_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8978 8937 break;
8979 8938
8980 8939 default:
8981 8940 DERR("ioctl: invalid dapl type = %d\n", DAPLKA_RS_TYPE(ia_rp));
8982 8941 error = DDI_FAILURE;
8983 8942 }
8984 8943
8985 8944 cleanup:;
8986 8945 DAPLKA_RS_UNREF(ia_rp);
8987 8946 return (error);
8988 8947 }
8989 8948
8990 8949 /* ARGSUSED */
8991 8950 static int
8992 8951 daplka_open(dev_t *devp, int flag, int otyp, struct cred *cred)
8993 8952 {
8994 8953 minor_t rnum;
8995 8954
8996 8955 /*
8997 8956 * Char only
8998 8957 */
8999 8958 if (otyp != OTYP_CHR) {
9000 8959 return (EINVAL);
9001 8960 }
9002 8961
9003 8962 /*
9004 8963 * Only zero can be opened, clones are used for resources.
9005 8964 */
9006 8965 if (getminor(*devp) != DAPLKA_DRIVER_MINOR) {
9007 8966 DERR("daplka_open: bad minor %d\n", getminor(*devp));
9008 8967 return (ENODEV);
9009 8968 }
9010 8969
9011 8970 /*
9012 8971 * - allocate new minor number
9013 8972 * - update devp argument to new device
9014 8973 */
9015 8974 if (daplka_resource_reserve(&rnum) == 0) {
9016 8975 *devp = makedevice(getmajor(*devp), rnum);
9017 8976 } else {
9018 8977 return (ENOMEM);
9019 8978 }
9020 8979
9021 8980 return (DDI_SUCCESS);
9022 8981 }
9023 8982
9024 8983 /* ARGSUSED */
9025 8984 static int
9026 8985 daplka_close(dev_t dev, int flag, int otyp, struct cred *cred)
9027 8986 {
9028 8987 daplka_ia_resource_t *ia_rp;
9029 8988 minor_t rnum = getminor(dev);
9030 8989
9031 8990 /*
9032 8991 * Char only
9033 8992 */
9034 8993 if (otyp != OTYP_CHR) {
9035 8994 return (EINVAL);
9036 8995 }
9037 8996 D2("daplka_close: closing rnum = %d\n", rnum);
9038 8997 atomic_inc_32(&daplka_pending_close);
9039 8998
9040 8999 /*
9041 9000 * remove from resource table.
9042 9001 */
9043 9002 ia_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);
9044 9003
9045 9004 /*
9046 9005 * remove the initial reference
9047 9006 */
9048 9007 if (ia_rp != NULL) {
9049 9008 DAPLKA_RS_UNREF(ia_rp);
9050 9009 }
9051 9010 atomic_dec_32(&daplka_pending_close);
9052 9011 return (DDI_SUCCESS);
9053 9012 }
9054 9013
9055 9014
9056 9015 /*
9057 9016 * Resource management routines
9058 9017 *
9059 9018 * We start with no resource array. Each time we run out of slots, we
9060 9019 * reallocate a new larger array and copy the pointer to the new array and
9061 9020 * a new resource blk is allocated and added to the hash table.
9062 9021 *
9063 9022 * The resource control block contains:
9064 9023 * root - array of pointer of resource blks
9065 9024 * sz - current size of array.
9066 9025 * len - last valid entry in array.
9067 9026 *
9068 9027 * A search operation based on a resource number is as follows:
9069 9028 * index = rnum / RESOURCE_BLKSZ;
9070 9029 * ASSERT(index < resource_block.len);
9071 9030 * ASSERT(index < resource_block.sz);
9072 9031 * offset = rnum % RESOURCE_BLKSZ;
9073 9032 * ASSERT(offset >= resource_block.root[index]->base);
9074 9033 * ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
9075 9034 * return resource_block.root[index]->blks[offset];
↓ open down ↓ |
1387 lines elided |
↑ open up ↑ |
9076 9035 *
9077 9036 * A resource blk is freed when its used count reaches zero.
9078 9037 */
9079 9038
9080 9039 /*
9081 9040 * initializes the global resource table
9082 9041 */
9083 9042 static void
9084 9043 daplka_resource_init(void)
9085 9044 {
9086 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(daplka_resource))
9087 9045 rw_init(&daplka_resource.daplka_rct_lock, NULL, RW_DRIVER, NULL);
9088 9046 daplka_resource.daplka_rc_len = 0;
9089 9047 daplka_resource.daplka_rc_sz = 0;
9090 9048 daplka_resource.daplka_rc_cnt = 0;
9091 9049 daplka_resource.daplka_rc_flag = 0;
9092 9050 daplka_resource.daplka_rc_root = NULL;
9093 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(daplka_resource))
9094 9051 }
9095 9052
9096 9053 /*
9097 9054 * destroys the global resource table
9098 9055 */
9099 9056 static void
9100 9057 daplka_resource_fini(void)
9101 9058 {
9102 9059 int i;
9103 9060
9104 9061 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9105 9062 for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
9106 9063 daplka_resource_blk_t *blk;
9107 9064 int j;
9108 9065
9109 9066 blk = daplka_resource.daplka_rc_root[i];
9110 9067 if (blk == NULL) {
9111 9068 continue;
9112 9069 }
9113 9070 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
9114 9071 if (blk->daplka_rcblk_blks[j] != NULL) {
9115 9072 DERR("resource_fini: non-null slot %d, %p\n",
9116 9073 j, blk->daplka_rcblk_blks[j]);
9117 9074 }
9118 9075 }
9119 9076 kmem_free(blk, sizeof (*blk));
9120 9077 daplka_resource.daplka_rc_root[i] = NULL;
9121 9078 }
9122 9079 if (daplka_resource.daplka_rc_root != NULL) {
9123 9080 uint_t sz;
9124 9081
9125 9082 sz = daplka_resource.daplka_rc_sz *
9126 9083 sizeof (daplka_resource_blk_t *);
9127 9084 kmem_free(daplka_resource.daplka_rc_root, (uint_t)sz);
9128 9085 daplka_resource.daplka_rc_root = NULL;
9129 9086 daplka_resource.daplka_rc_len = 0;
9130 9087 daplka_resource.daplka_rc_sz = 0;
9131 9088 }
9132 9089 rw_exit(&daplka_resource.daplka_rct_lock);
9133 9090 rw_destroy(&daplka_resource.daplka_rct_lock);
9134 9091 }
9135 9092
9136 9093 /*
9137 9094 * reserves a slot in the global resource table.
9138 9095 * this is called by the open() syscall. it is needed because
9139 9096 * at open() time, we do not have sufficient information to
9140 9097 * create an IA resource. the library needs to subsequently
9141 9098 * call daplka_ia_create to insert an IA resource into this
9142 9099 * reserved slot.
9143 9100 */
9144 9101 static int
9145 9102 daplka_resource_reserve(minor_t *rnum)
9146 9103 {
9147 9104 int i, j, empty = -1;
9148 9105 daplka_resource_blk_t *blk;
9149 9106
9150 9107 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9151 9108 /*
9152 9109 * Try to find an empty slot
9153 9110 */
9154 9111 for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
9155 9112 blk = daplka_resource.daplka_rc_root[i];
9156 9113 if (blk != NULL && blk->daplka_rcblk_avail > 0) {
9157 9114
9158 9115 D3("resource_alloc: available blks %d\n",
9159 9116 blk->daplka_rcblk_avail);
9160 9117
9161 9118 /*
9162 9119 * found an empty slot in this blk
9163 9120 */
9164 9121 for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
9165 9122 if (blk->daplka_rcblk_blks[j] == NULL) {
9166 9123 *rnum = (minor_t)
9167 9124 (j + (i * DAPLKA_RC_BLKSZ));
9168 9125 blk->daplka_rcblk_blks[j] =
9169 9126 (daplka_resource_t *)
9170 9127 DAPLKA_RC_RESERVED;
9171 9128 blk->daplka_rcblk_avail--;
9172 9129 daplka_resource.daplka_rc_cnt++;
9173 9130 rw_exit(&daplka_resource.
9174 9131 daplka_rct_lock);
9175 9132 return (0);
9176 9133 }
9177 9134 }
9178 9135 } else if (blk == NULL && empty < 0) {
9179 9136 /*
9180 9137 * remember first empty slot
9181 9138 */
9182 9139 empty = i;
9183 9140 }
9184 9141 }
9185 9142
9186 9143 /*
9187 9144 * Couldn't find anything, allocate a new blk
9188 9145 * Do we need to reallocate the root array
9189 9146 */
9190 9147 if (empty < 0) {
9191 9148 if (daplka_resource.daplka_rc_len ==
9192 9149 daplka_resource.daplka_rc_sz) {
9193 9150 /*
9194 9151 * Allocate new array and copy current stuff into it
9195 9152 */
9196 9153 daplka_resource_blk_t **p;
9197 9154 uint_t newsz = (uint_t)daplka_resource.daplka_rc_sz +
9198 9155 DAPLKA_RC_BLKSZ;
9199 9156
9200 9157 D3("resource_alloc: increasing no. of buckets to %d\n",
9201 9158 newsz);
9202 9159
9203 9160 p = kmem_zalloc(newsz * sizeof (*p), daplka_km_flags);
9204 9161
9205 9162 if (daplka_resource.daplka_rc_root) {
9206 9163 uint_t oldsz;
9207 9164
9208 9165 oldsz = (uint_t)(daplka_resource.daplka_rc_sz *
9209 9166 (int)sizeof (*p));
9210 9167
9211 9168 /*
9212 9169 * Copy old data into new space and
9213 9170 * free old stuff
9214 9171 */
9215 9172 bcopy(daplka_resource.daplka_rc_root, p, oldsz);
9216 9173 kmem_free(daplka_resource.daplka_rc_root,
9217 9174 oldsz);
9218 9175 }
9219 9176
9220 9177 daplka_resource.daplka_rc_root = p;
9221 9178 daplka_resource.daplka_rc_sz = (int)newsz;
9222 9179 }
9223 9180
9224 9181 empty = daplka_resource.daplka_rc_len;
9225 9182 daplka_resource.daplka_rc_len++;
9226 9183
9227 9184 D3("resource_alloc: daplka_rc_len %d\n",
9228 9185 daplka_resource.daplka_rc_len);
9229 9186 }
9230 9187
9231 9188 /*
9232 9189 * Allocate a new blk
9233 9190 */
9234 9191 blk = kmem_zalloc(sizeof (*blk), daplka_km_flags);
9235 9192 ASSERT(daplka_resource.daplka_rc_root[empty] == NULL);
9236 9193 daplka_resource.daplka_rc_root[empty] = blk;
9237 9194 blk->daplka_rcblk_avail = DAPLKA_RC_BLKSZ - 1;
9238 9195
9239 9196 /*
9240 9197 * Allocate slot
9241 9198 */
9242 9199 *rnum = (minor_t)(empty * DAPLKA_RC_BLKSZ);
9243 9200 blk->daplka_rcblk_blks[0] = (daplka_resource_t *)DAPLKA_RC_RESERVED;
9244 9201 daplka_resource.daplka_rc_cnt++;
9245 9202 rw_exit(&daplka_resource.daplka_rct_lock);
9246 9203
9247 9204 return (0);
9248 9205 }
9249 9206
9250 9207 /*
9251 9208 * removes resource from global resource table
9252 9209 */
9253 9210 static daplka_resource_t *
9254 9211 daplka_resource_remove(minor_t rnum)
9255 9212 {
9256 9213 int i, j;
9257 9214 daplka_resource_blk_t *blk;
9258 9215 daplka_resource_t *p;
9259 9216
9260 9217 i = (int)(rnum / DAPLKA_RC_BLKSZ);
9261 9218 j = (int)(rnum % DAPLKA_RC_BLKSZ);
9262 9219
9263 9220 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9264 9221 if (i >= daplka_resource.daplka_rc_len) {
9265 9222 rw_exit(&daplka_resource.daplka_rct_lock);
9266 9223 DERR("resource_remove: invalid rnum %d\n", rnum);
9267 9224 return (NULL);
9268 9225 }
9269 9226
9270 9227 ASSERT(daplka_resource.daplka_rc_root);
9271 9228 ASSERT(i < daplka_resource.daplka_rc_len);
9272 9229 ASSERT(i < daplka_resource.daplka_rc_sz);
9273 9230 blk = daplka_resource.daplka_rc_root[i];
9274 9231 if (blk == NULL) {
9275 9232 rw_exit(&daplka_resource.daplka_rct_lock);
9276 9233 DERR("resource_remove: invalid rnum %d\n", rnum);
9277 9234 return (NULL);
9278 9235 }
9279 9236
9280 9237 if (blk->daplka_rcblk_blks[j] == NULL) {
9281 9238 rw_exit(&daplka_resource.daplka_rct_lock);
9282 9239 DERR("resource_remove: blk->daplka_rcblk_blks[j] == NULL\n");
9283 9240 return (NULL);
9284 9241 }
9285 9242 p = blk->daplka_rcblk_blks[j];
9286 9243 blk->daplka_rcblk_blks[j] = NULL;
9287 9244 blk->daplka_rcblk_avail++;
9288 9245 if (blk->daplka_rcblk_avail == DAPLKA_RC_BLKSZ) {
9289 9246 /*
9290 9247 * free this blk
9291 9248 */
9292 9249 kmem_free(blk, sizeof (*blk));
9293 9250 daplka_resource.daplka_rc_root[i] = NULL;
9294 9251 }
9295 9252 daplka_resource.daplka_rc_cnt--;
9296 9253 rw_exit(&daplka_resource.daplka_rct_lock);
9297 9254
9298 9255 if ((intptr_t)p == DAPLKA_RC_RESERVED) {
9299 9256 return (NULL);
9300 9257 } else {
9301 9258 return (p);
9302 9259 }
9303 9260 }
9304 9261
9305 9262 /*
9306 9263 * inserts resource into the slot designated by rnum
9307 9264 */
9308 9265 static int
9309 9266 daplka_resource_insert(minor_t rnum, daplka_resource_t *rp)
9310 9267 {
9311 9268 int i, j, error = -1;
9312 9269 daplka_resource_blk_t *blk;
9313 9270
9314 9271 /*
9315 9272 * Find resource and lock it in WRITER mode
9316 9273 * search for available resource slot
9317 9274 */
9318 9275
9319 9276 i = (int)(rnum / DAPLKA_RC_BLKSZ);
9320 9277 j = (int)(rnum % DAPLKA_RC_BLKSZ);
9321 9278
9322 9279 rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9323 9280 if (i >= daplka_resource.daplka_rc_len) {
9324 9281 rw_exit(&daplka_resource.daplka_rct_lock);
9325 9282 DERR("resource_insert: resource %d not found\n", rnum);
9326 9283 return (-1);
9327 9284 }
9328 9285
9329 9286 blk = daplka_resource.daplka_rc_root[i];
9330 9287 if (blk != NULL) {
9331 9288 ASSERT(i < daplka_resource.daplka_rc_len);
9332 9289 ASSERT(i < daplka_resource.daplka_rc_sz);
9333 9290
9334 9291 if ((intptr_t)blk->daplka_rcblk_blks[j] == DAPLKA_RC_RESERVED) {
9335 9292 blk->daplka_rcblk_blks[j] = rp;
9336 9293 error = 0;
9337 9294 } else {
9338 9295 DERR("resource_insert: %d not reserved, blk = %p\n",
9339 9296 rnum, blk->daplka_rcblk_blks[j]);
9340 9297 }
9341 9298 } else {
9342 9299 DERR("resource_insert: resource %d not found\n", rnum);
9343 9300 }
9344 9301 rw_exit(&daplka_resource.daplka_rct_lock);
9345 9302 return (error);
9346 9303 }
9347 9304
9348 9305 /*
9349 9306 * finds resource using minor device number
9350 9307 */
9351 9308 static daplka_resource_t *
9352 9309 daplka_resource_lookup(minor_t rnum)
9353 9310 {
9354 9311 int i, j;
9355 9312 daplka_resource_blk_t *blk;
9356 9313 daplka_resource_t *rp;
9357 9314
9358 9315 /*
9359 9316 * Find resource and lock it in READER mode
9360 9317 * search for available resource slot
9361 9318 */
9362 9319
9363 9320 i = (int)(rnum / DAPLKA_RC_BLKSZ);
9364 9321 j = (int)(rnum % DAPLKA_RC_BLKSZ);
9365 9322
9366 9323 rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
9367 9324 if (i >= daplka_resource.daplka_rc_len) {
9368 9325 rw_exit(&daplka_resource.daplka_rct_lock);
9369 9326 DERR("resource_lookup: resource %d not found\n", rnum);
9370 9327 return (NULL);
9371 9328 }
9372 9329
9373 9330 blk = daplka_resource.daplka_rc_root[i];
9374 9331 if (blk != NULL) {
9375 9332 ASSERT(i < daplka_resource.daplka_rc_len);
9376 9333 ASSERT(i < daplka_resource.daplka_rc_sz);
9377 9334
9378 9335 rp = blk->daplka_rcblk_blks[j];
9379 9336 if (rp == NULL || (intptr_t)rp == DAPLKA_RC_RESERVED) {
9380 9337 D3("resource_lookup: %d not found, blk = %p\n",
9381 9338 rnum, blk->daplka_rcblk_blks[j]);
9382 9339 } else {
9383 9340 DAPLKA_RS_REF((daplka_ia_resource_t *)rp);
9384 9341 }
9385 9342 } else {
9386 9343 DERR("resource_lookup: resource %d not found\n", rnum);
9387 9344 rp = NULL;
9388 9345 }
9389 9346 rw_exit(&daplka_resource.daplka_rct_lock);
9390 9347 return (rp);
9391 9348 }
9392 9349
9393 9350 /*
9394 9351 * generic hash table implementation
9395 9352 */
9396 9353
9397 9354 /*
9398 9355 * daplka_hash_create:
9399 9356 * initializes a hash table with the specified parameters
9400 9357 *
9401 9358 * input:
9402 9359 * htblp pointer to hash table
9403 9360 *
9404 9361 * nbuckets number of buckets (must be power of 2)
9405 9362 *
9406 9363 * free_func this function is called on each hash
9407 9364 * table element when daplka_hash_destroy
9408 9365 * is called
9409 9366 *
9410 9367 * lookup_func if daplka_hash_lookup is able to find
9411 9368 * the desired object, this function is
9412 9369 * applied on the object before
9413 9370 * daplka_hash_lookup returns
9414 9371 * output:
9415 9372 * none
9416 9373 *
9417 9374 * return value(s):
9418 9375 * EINVAL nbuckets is not a power of 2
9419 9376 * ENOMEM cannot allocate buckets
9420 9377 * 0 success
9421 9378 */
↓ open down ↓ |
318 lines elided |
↑ open up ↑ |
9422 9379 static int
9423 9380 daplka_hash_create(daplka_hash_table_t *htblp, uint_t nbuckets,
9424 9381 void (*free_func)(void *), void (*lookup_func)(void *))
9425 9382 {
9426 9383 int i;
9427 9384
9428 9385 if ((nbuckets & ~(nbuckets - 1)) != nbuckets) {
9429 9386 DERR("hash_create: nbuckets not power of 2\n");
9430 9387 return (EINVAL);
9431 9388 }
9432 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*htblp))
9433 9389
9434 9390 htblp->ht_buckets =
9435 9391 kmem_zalloc(sizeof (daplka_hash_bucket_t) * nbuckets,
9436 9392 daplka_km_flags);
9437 9393 if (htblp->ht_buckets == NULL) {
9438 9394 DERR("hash_create: cannot allocate buckets\n");
9439 9395 return (ENOMEM);
9440 9396 }
9441 9397 for (i = 0; i < nbuckets; i++) {
9442 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(htblp->ht_buckets[i]))
9443 9398 htblp->ht_buckets[i].hb_count = 0;
9444 9399 htblp->ht_buckets[i].hb_entries = NULL;
9445 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(htblp->ht_buckets[i]))
9446 9400 }
9447 9401 rw_init(&htblp->ht_table_lock, NULL, RW_DRIVER, NULL);
9448 9402 mutex_init(&htblp->ht_key_lock, NULL, MUTEX_DRIVER, NULL);
9449 9403
9450 9404 htblp->ht_count = 0;
9451 9405 htblp->ht_next_hkey = (uint64_t)gethrtime();
9452 9406 htblp->ht_nbuckets = nbuckets;
9453 9407 htblp->ht_free_func = free_func;
9454 9408 htblp->ht_lookup_func = lookup_func;
9455 9409 htblp->ht_initialized = B_TRUE;
9456 9410 D3("hash_create: done, buckets = %d\n", nbuckets);
9457 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*htblp))
9458 9411 return (0);
9459 9412 }
9460 9413
9461 9414 /*
9462 9415 * daplka_hash_insert:
9463 9416 * inserts an object into a hash table
9464 9417 *
9465 9418 * input:
9466 9419 * htblp pointer to hash table
9467 9420 *
9468 9421 * hkeyp pointer to hash key.
9469 9422 * *hkeyp being non-zero means that the caller
9470 9423 * has generated its own hkey. if *hkeyp is zero,
9471 9424 * this function will generate an hkey for the
9472 9425 * caller. it is recommended that the caller
9473 9426 * leave the hkey generation to this function
9474 9427 * because the hkey is more likely to be evenly
9475 9428 * distributed.
9476 9429 *
9477 9430 * objp pointer to object to be inserted into
9478 9431 * hash table
9479 9432 *
9480 9433 * output:
9481 9434 * hkeyp the generated hkey is returned via this pointer
9482 9435 *
9483 9436 * return value(s):
9484 9437 * EINVAL invalid parameter
9485 9438 * ENOMEM cannot allocate hash entry
9486 9439 * 0 successful
9487 9440 */
9488 9441 static int
9489 9442 daplka_hash_insert(daplka_hash_table_t *htblp, uint64_t *hkeyp, void *objp)
9490 9443 {
9491 9444 daplka_hash_entry_t *hep, *curr_hep;
9492 9445 daplka_hash_bucket_t *hbp;
9493 9446 uint32_t bucket;
9494 9447 uint64_t hkey;
9495 9448
9496 9449 if (hkeyp == NULL) {
9497 9450 DERR("hash_insert: hkeyp == NULL\n");
9498 9451 return (EINVAL);
9499 9452 }
9500 9453 hep = kmem_zalloc(sizeof (*hep), daplka_km_flags);
9501 9454 if (hep == NULL) {
9502 9455 DERR("hash_insert: cannot alloc hash_entry\n");
9503 9456 return (ENOMEM);
9504 9457 }
9505 9458 if (*hkeyp == 0) {
9506 9459 /* generate a new key */
9507 9460 mutex_enter(&htblp->ht_key_lock);
9508 9461 hkey = ++htblp->ht_next_hkey;
9509 9462 if (hkey == 0) {
9510 9463 hkey = htblp->ht_next_hkey = (uint64_t)gethrtime();
9511 9464 }
9512 9465 mutex_exit(&htblp->ht_key_lock);
9513 9466 } else {
9514 9467 /* use user generated key */
9515 9468 hkey = *hkeyp;
9516 9469 }
9517 9470
9518 9471 /* only works if ht_nbuckets is a power of 2 */
9519 9472 bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9520 9473 ASSERT(objp != NULL);
9521 9474 ASSERT(bucket < htblp->ht_nbuckets);
9522 9475
9523 9476 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9524 9477 hep->he_hkey = hkey;
9525 9478 hep->he_objp = objp;
9526 9479
9527 9480 /* look for duplicate entries */
9528 9481 hbp = &htblp->ht_buckets[bucket];
9529 9482 curr_hep = hbp->hb_entries;
9530 9483 while (curr_hep != NULL) {
9531 9484 if (curr_hep->he_hkey == hep->he_hkey) {
9532 9485 break;
9533 9486 }
9534 9487 curr_hep = curr_hep->he_next;
9535 9488 }
9536 9489 if (curr_hep != NULL) {
9537 9490 DERR("hash_insert: found duplicate hash entry: "
9538 9491 "bucket %d, hkey 0x%016llx\n",
9539 9492 bucket, (longlong_t)hep->he_hkey);
9540 9493 kmem_free(hep, sizeof (*hep));
9541 9494 rw_exit(&htblp->ht_table_lock);
9542 9495 return (EINVAL);
9543 9496 }
9544 9497 hep->he_next = hbp->hb_entries;
9545 9498 hbp->hb_entries = hep;
9546 9499 hbp->hb_count++;
9547 9500 htblp->ht_count++;
9548 9501 rw_exit(&htblp->ht_table_lock);
9549 9502
9550 9503 if (*hkeyp == 0) {
9551 9504 *hkeyp = hkey;
9552 9505 ASSERT(*hkeyp != 0);
9553 9506 }
9554 9507 D3("hash_insert: htblp 0x%p, hkey = 0x%016llx, bucket = %d\n",
9555 9508 htblp, (longlong_t)*hkeyp, bucket);
9556 9509 return (0);
9557 9510 }
9558 9511
9559 9512 /*
9560 9513 * daplka_hash_remove:
9561 9514 * removes object identified by hkey from hash table
9562 9515 *
9563 9516 * input:
9564 9517 * htblp pointer to hash table
9565 9518 *
9566 9519 * hkey hkey that identifies the object to be removed
9567 9520 *
9568 9521 * output:
9569 9522 * objpp pointer to pointer to object.
9570 9523 * if remove is successful, the removed object
9571 9524 * will be returned via *objpp.
9572 9525 *
9573 9526 * return value(s):
9574 9527 * EINVAL cannot find hash entry
9575 9528 * 0 successful
9576 9529 */
9577 9530 static int
9578 9531 daplka_hash_remove(daplka_hash_table_t *htblp, uint64_t hkey, void **objpp)
9579 9532 {
9580 9533 daplka_hash_entry_t *free_hep, **curr_hepp;
9581 9534 daplka_hash_bucket_t *hbp;
9582 9535 uint32_t bucket;
9583 9536
9584 9537 bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9585 9538
9586 9539 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9587 9540 hbp = &htblp->ht_buckets[bucket];
9588 9541
9589 9542 curr_hepp = &hbp->hb_entries;
9590 9543 while (*curr_hepp != NULL) {
9591 9544 if ((*curr_hepp)->he_hkey == hkey) {
9592 9545 break;
9593 9546 }
9594 9547 curr_hepp = &(*curr_hepp)->he_next;
9595 9548 }
9596 9549 if (*curr_hepp == NULL) {
9597 9550 DERR("hash_remove: cannot find hash entry: "
9598 9551 "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
9599 9552 rw_exit(&htblp->ht_table_lock);
9600 9553 return (EINVAL);
9601 9554 } else {
9602 9555 if (objpp != NULL) {
9603 9556 *objpp = (*curr_hepp)->he_objp;
9604 9557 }
9605 9558 free_hep = *curr_hepp;
9606 9559 *curr_hepp = (*curr_hepp)->he_next;
9607 9560 kmem_free(free_hep, sizeof (*free_hep));
9608 9561 }
9609 9562 hbp->hb_count--;
9610 9563 htblp->ht_count--;
9611 9564 D3("hash_remove: removed entry, hkey 0x%016llx, bucket %d, "
9612 9565 "hb_count %d, hb_count %d\n",
9613 9566 (longlong_t)hkey, bucket, hbp->hb_count, htblp->ht_count);
9614 9567 rw_exit(&htblp->ht_table_lock);
9615 9568 return (0);
9616 9569 }
9617 9570
9618 9571 /*
9619 9572 * daplka_hash_walk:
9620 9573 * walks through the entire hash table. applying func on each of
9621 9574 * the inserted objects. stops walking if func returns non-zero.
9622 9575 *
9623 9576 * input:
9624 9577 * htblp pointer to hash table
9625 9578 *
9626 9579 * func function to be applied on each object
9627 9580 *
9628 9581 * farg second argument to func
9629 9582 *
9630 9583 * lockmode can be RW_WRITER or RW_READER. this
9631 9584 * allows the caller to choose what type
9632 9585 * of lock to acquire before walking the
9633 9586 * table.
9634 9587 *
9635 9588 * output:
9636 9589 * none
9637 9590 *
9638 9591 * return value(s):
9639 9592 * none
9640 9593 */
↓ open down ↓ |
173 lines elided |
↑ open up ↑ |
9641 9594 static void
9642 9595 daplka_hash_walk(daplka_hash_table_t *htblp, int (*func)(void *, void *),
9643 9596 void *farg, krw_t lockmode)
9644 9597 {
9645 9598 daplka_hash_entry_t *curr_hep;
9646 9599 daplka_hash_bucket_t *hbp;
9647 9600 uint32_t bucket, retval = 0;
9648 9601
9649 9602 ASSERT(lockmode == RW_WRITER || lockmode == RW_READER);
9650 9603
9651 - /* needed for warlock */
9652 9604 if (lockmode == RW_WRITER) {
9653 9605 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9654 9606 } else {
9655 9607 rw_enter(&htblp->ht_table_lock, RW_READER);
9656 9608 }
9657 9609 for (bucket = 0; bucket < htblp->ht_nbuckets && retval == 0; bucket++) {
9658 9610 hbp = &htblp->ht_buckets[bucket];
9659 9611 curr_hep = hbp->hb_entries;
9660 9612 while (curr_hep != NULL) {
9661 9613 retval = (*func)(curr_hep->he_objp, farg);
9662 9614 if (retval != 0) {
9663 9615 break;
9664 9616 }
9665 9617 curr_hep = curr_hep->he_next;
9666 9618 }
9667 9619 }
9668 9620 rw_exit(&htblp->ht_table_lock);
9669 9621 }
9670 9622
9671 9623 /*
9672 9624 * daplka_hash_lookup:
9673 9625 * finds object from hkey
9674 9626 *
9675 9627 * input:
9676 9628 * htblp pointer to hash table
9677 9629 *
9678 9630 * hkey hkey that identifies the object to be looked up
9679 9631 *
9680 9632 * output:
9681 9633 * none
9682 9634 *
9683 9635 * return value(s):
9684 9636 * NULL if not found
9685 9637 * object pointer if found
9686 9638 */
9687 9639 static void *
9688 9640 daplka_hash_lookup(daplka_hash_table_t *htblp, uint64_t hkey)
9689 9641 {
9690 9642 daplka_hash_entry_t *curr_hep;
9691 9643 uint32_t bucket;
9692 9644 void *objp;
9693 9645
9694 9646 bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9695 9647
9696 9648 rw_enter(&htblp->ht_table_lock, RW_READER);
9697 9649 curr_hep = htblp->ht_buckets[bucket].hb_entries;
9698 9650 while (curr_hep != NULL) {
9699 9651 if (curr_hep->he_hkey == hkey) {
9700 9652 break;
9701 9653 }
9702 9654 curr_hep = curr_hep->he_next;
9703 9655 }
9704 9656 if (curr_hep == NULL) {
9705 9657 DERR("hash_lookup: cannot find hash entry: "
9706 9658 "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
9707 9659 rw_exit(&htblp->ht_table_lock);
9708 9660 return (NULL);
9709 9661 }
9710 9662 objp = curr_hep->he_objp;
9711 9663 ASSERT(objp != NULL);
9712 9664 if (htblp->ht_lookup_func != NULL) {
9713 9665 (*htblp->ht_lookup_func)(objp);
9714 9666 }
9715 9667 rw_exit(&htblp->ht_table_lock);
9716 9668 return (objp);
9717 9669 }
9718 9670
9719 9671 /*
9720 9672 * daplka_hash_destroy:
9721 9673 * destroys hash table. applies free_func on all inserted objects.
9722 9674 *
9723 9675 * input:
9724 9676 * htblp pointer to hash table
9725 9677 *
9726 9678 * output:
9727 9679 * none
9728 9680 *
9729 9681 * return value(s):
9730 9682 * none
9731 9683 */
9732 9684 static void
9733 9685 daplka_hash_destroy(daplka_hash_table_t *htblp)
9734 9686 {
9735 9687 daplka_hash_entry_t *curr_hep, *free_hep;
9736 9688 daplka_hash_entry_t *free_list = NULL;
9737 9689 daplka_hash_bucket_t *hbp;
9738 9690 uint32_t bucket, cnt, total = 0;
9739 9691
9740 9692 if (!htblp->ht_initialized) {
9741 9693 DERR("hash_destroy: not initialized\n");
9742 9694 return;
9743 9695 }
9744 9696 /* free all elements from hash table */
9745 9697 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9746 9698 for (bucket = 0; bucket < htblp->ht_nbuckets; bucket++) {
9747 9699 hbp = &htblp->ht_buckets[bucket];
9748 9700
9749 9701 /* build list of elements to be freed */
9750 9702 curr_hep = hbp->hb_entries;
9751 9703 cnt = 0;
9752 9704 while (curr_hep != NULL) {
9753 9705 cnt++;
9754 9706 free_hep = curr_hep;
9755 9707 curr_hep = curr_hep->he_next;
9756 9708
9757 9709 free_hep->he_next = free_list;
9758 9710 free_list = free_hep;
9759 9711 }
9760 9712 ASSERT(cnt == hbp->hb_count);
9761 9713 total += cnt;
9762 9714 hbp->hb_count = 0;
9763 9715 hbp->hb_entries = NULL;
9764 9716 }
9765 9717 ASSERT(total == htblp->ht_count);
9766 9718 D3("hash_destroy: htblp 0x%p, nbuckets %d, freed %d hash entries\n",
9767 9719 htblp, htblp->ht_nbuckets, total);
9768 9720 rw_exit(&htblp->ht_table_lock);
9769 9721
9770 9722 /* free all objects, now without holding the hash table lock */
9771 9723 cnt = 0;
9772 9724 while (free_list != NULL) {
9773 9725 cnt++;
9774 9726 free_hep = free_list;
9775 9727 free_list = free_list->he_next;
9776 9728 if (htblp->ht_free_func != NULL) {
9777 9729 (*htblp->ht_free_func)(free_hep->he_objp);
9778 9730 }
9779 9731 kmem_free(free_hep, sizeof (*free_hep));
9780 9732 }
9781 9733 ASSERT(total == cnt);
9782 9734
9783 9735 /* free hash buckets and destroy locks */
9784 9736 kmem_free(htblp->ht_buckets,
9785 9737 sizeof (daplka_hash_bucket_t) * htblp->ht_nbuckets);
9786 9738
9787 9739 rw_enter(&htblp->ht_table_lock, RW_WRITER);
9788 9740 htblp->ht_buckets = NULL;
9789 9741 htblp->ht_count = 0;
9790 9742 htblp->ht_nbuckets = 0;
9791 9743 htblp->ht_free_func = NULL;
9792 9744 htblp->ht_lookup_func = NULL;
9793 9745 htblp->ht_initialized = B_FALSE;
9794 9746 rw_exit(&htblp->ht_table_lock);
9795 9747
9796 9748 mutex_destroy(&htblp->ht_key_lock);
9797 9749 rw_destroy(&htblp->ht_table_lock);
9798 9750 }
9799 9751
9800 9752 /*
9801 9753 * daplka_hash_getsize:
9802 9754 * return the number of objects in hash table
9803 9755 *
9804 9756 * input:
9805 9757 * htblp pointer to hash table
9806 9758 *
9807 9759 * output:
9808 9760 * none
9809 9761 *
9810 9762 * return value(s):
9811 9763 * number of objects in hash table
9812 9764 */
9813 9765 static uint32_t
9814 9766 daplka_hash_getsize(daplka_hash_table_t *htblp)
9815 9767 {
9816 9768 uint32_t sz;
9817 9769
9818 9770 rw_enter(&htblp->ht_table_lock, RW_READER);
9819 9771 sz = htblp->ht_count;
9820 9772 rw_exit(&htblp->ht_table_lock);
9821 9773
9822 9774 return (sz);
9823 9775 }
9824 9776
9825 9777 /*
9826 9778 * this function is used as ht_lookup_func above when lookup is called.
9827 9779 * other types of objs may use a more elaborate lookup_func.
9828 9780 */
9829 9781 static void
9830 9782 daplka_hash_generic_lookup(void *obj)
9831 9783 {
9832 9784 daplka_resource_t *rp = (daplka_resource_t *)obj;
9833 9785
9834 9786 mutex_enter(&rp->rs_reflock);
9835 9787 rp->rs_refcnt++;
9836 9788 ASSERT(rp->rs_refcnt != 0);
9837 9789 mutex_exit(&rp->rs_reflock);
9838 9790 }
9839 9791
9840 9792 /*
9841 9793 * Generates a non-zero 32 bit hash key used for the timer hash table.
9842 9794 */
9843 9795 static uint32_t
9844 9796 daplka_timer_hkey_gen()
9845 9797 {
9846 9798 uint32_t new_hkey;
9847 9799
9848 9800 do {
9849 9801 new_hkey = atomic_inc_32_nv(&daplka_timer_hkey);
9850 9802 } while (new_hkey == 0);
9851 9803
9852 9804 return (new_hkey);
9853 9805 }
9854 9806
9855 9807
9856 9808 /*
9857 9809 * The DAPL KA debug logging routines
9858 9810 */
9859 9811
9860 9812 /*
9861 9813 * Add the string str to the end of the debug log, followed by a newline.
9862 9814 */
9863 9815 static void
9864 9816 daplka_dbglog(char *str)
9865 9817 {
9866 9818 size_t length;
9867 9819 size_t remlen;
9868 9820
9869 9821 /*
9870 9822 * If this is the first time we've written to the log, initialize it.
9871 9823 */
9872 9824 if (!daplka_dbginit) {
9873 9825 return;
9874 9826 }
9875 9827 mutex_enter(&daplka_dbglock);
9876 9828 /*
9877 9829 * Note the log is circular; if this string would run over the end,
9878 9830 * we copy the first piece to the end and then the last piece to
9879 9831 * the beginning of the log.
9880 9832 */
9881 9833 length = strlen(str);
9882 9834
9883 9835 remlen = (size_t)sizeof (daplka_dbgbuf) - daplka_dbgnext - 1;
9884 9836
9885 9837 if (length > remlen) {
9886 9838 if (remlen)
9887 9839 bcopy(str, daplka_dbgbuf + daplka_dbgnext, remlen);
9888 9840 daplka_dbgbuf[sizeof (daplka_dbgbuf) - 1] = (char)NULL;
9889 9841 str += remlen;
9890 9842 length -= remlen;
9891 9843 daplka_dbgnext = 0;
9892 9844 }
9893 9845 bcopy(str, daplka_dbgbuf + daplka_dbgnext, length);
9894 9846 daplka_dbgnext += length;
9895 9847
9896 9848 if (daplka_dbgnext >= sizeof (daplka_dbgbuf))
9897 9849 daplka_dbgnext = 0;
9898 9850 mutex_exit(&daplka_dbglock);
9899 9851 }
9900 9852
9901 9853
9902 9854 /*
9903 9855 * Add a printf-style message to whichever debug logs we're currently using.
9904 9856 */
9905 9857 static void
9906 9858 daplka_debug(const char *fmt, ...)
9907 9859 {
9908 9860 char buff[512];
9909 9861 va_list ap;
9910 9862 /*
9911 9863 * The system prepends the thread id and high resolution time
9912 9864 * (nanoseconds are dropped and so are the upper digits)
9913 9865 * to the specified string.
9914 9866 * The unit for timestamp is 10 microseconds.
9915 9867 * It wraps around every 10000 seconds.
9916 9868 * Ex: gethrtime() = X ns = X/1000 us = X/10000 10 micro sec.
9917 9869 */
9918 9870 int micro_time = (int)((gethrtime() / 10000) % 1000000000);
9919 9871 (void) sprintf(buff, "th %p tm %9d: ", (void *)curthread, micro_time);
9920 9872
9921 9873 va_start(ap, fmt);
9922 9874 (void) vsprintf(buff+strlen(buff), fmt, ap);
9923 9875 va_end(ap);
9924 9876
9925 9877 daplka_dbglog(buff);
9926 9878 }
9927 9879
9928 9880 static void
9929 9881 daplka_console(const char *fmt, ...)
9930 9882 {
9931 9883 char buff[512];
9932 9884 va_list ap;
9933 9885
9934 9886 va_start(ap, fmt);
9935 9887 (void) vsprintf(buff, fmt, ap);
9936 9888 va_end(ap);
9937 9889
9938 9890 cmn_err(CE_CONT, "%s", buff);
9939 9891 }
↓ open down ↓ |
278 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX