1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * tavor_umap.c
29 * Tavor Userland Mapping Routines
30 *
31 * Implements all the routines necessary for enabling direct userland
32 * access to the Tavor hardware. This includes all routines necessary for
33 * maintaining the "userland resources database" and all the support routines
34 * for the devmap calls.
35 */
36
37 #include <sys/types.h>
38 #include <sys/conf.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/modctl.h>
42 #include <sys/file.h>
43 #include <sys/avl.h>
44 #include <sys/sysmacros.h>
45
46 #include <sys/ib/adapters/tavor/tavor.h>
47
48 /* Tavor HCA state pointer (extern) */
49 extern void *tavor_statep;
50
51 /* Tavor HCA Userland Resource Database (extern) */
52 extern tavor_umap_db_t tavor_userland_rsrc_db;
53
54 static int tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
55 tavor_rsrc_t *rsrcp, size_t *maplen, int *err);
56 static int tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
57 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
58 static int tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
59 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
60 static int tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
61 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
62 static int tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
63 offset_t off, size_t len, void **pvtp);
64 static int tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp,
65 devmap_cookie_t new_dhp, void **new_pvtp);
66 static void tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp,
67 offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
68 devmap_cookie_t new_dhp2, void **pvtp2);
69 static int tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
70 offset_t off, size_t len, void **pvtp);
71 static int tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
72 devmap_cookie_t new_dhp, void **new_pvtp);
73 static void tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp,
74 offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
75 devmap_cookie_t new_dhp2, void **pvtp2);
76 static ibt_status_t tavor_umap_mr_data_in(tavor_mrhdl_t mr,
77 ibt_mr_data_in_t *data, size_t data_sz);
78 static ibt_status_t tavor_umap_cq_data_out(tavor_cqhdl_t cq,
79 mlnx_umap_cq_data_out_t *data, size_t data_sz);
80 static ibt_status_t tavor_umap_qp_data_out(tavor_qphdl_t qp,
81 mlnx_umap_qp_data_out_t *data, size_t data_sz);
82 static ibt_status_t tavor_umap_srq_data_out(tavor_srqhdl_t srq,
83 mlnx_umap_srq_data_out_t *data, size_t data_sz);
84 static int tavor_umap_db_compare(const void *query, const void *entry);
85 static ibt_status_t tavor_umap_pd_data_out(tavor_pdhdl_t pd,
86 mlnx_umap_pd_data_out_t *data, size_t data_sz);
87
88
89 /*
90 * These callbacks are passed to devmap_umem_setup() and devmap_devmem_setup(),
91 * respectively. They are used to handle (among other things) partial
92 * unmappings and to provide a method for invalidating mappings inherited
93 * as a result of a fork(2) system call.
94 */
95 static struct devmap_callback_ctl tavor_devmap_umem_cbops = {
96 DEVMAP_OPS_REV,
97 tavor_devmap_umem_map,
98 NULL,
99 tavor_devmap_umem_dup,
100 tavor_devmap_umem_unmap
101 };
102 static struct devmap_callback_ctl tavor_devmap_devmem_cbops = {
103 DEVMAP_OPS_REV,
104 tavor_devmap_devmem_map,
105 NULL,
106 tavor_devmap_devmem_dup,
107 tavor_devmap_devmem_unmap
108 };
109
110 /*
111 * tavor_devmap()
112 * Context: Can be called from user context.
113 */
114 /* ARGSUSED */
115 int
116 tavor_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
117 size_t *maplen, uint_t model)
118 {
119 tavor_state_t *state;
120 tavor_rsrc_t *rsrcp;
121 minor_t instance;
122 uint64_t key, value;
123 uint_t type;
124 int err, status;
125
126 TAVOR_TNF_ENTER(tavor_devmap);
127
128 /* Get Tavor softstate structure from instance */
129 instance = TAVOR_DEV_INSTANCE(dev);
130 state = ddi_get_soft_state(tavor_statep, instance);
131 if (state == NULL) {
132 TNF_PROBE_0(tavor_devmap_gss_fail, TAVOR_TNF_ERROR, "");
133 TAVOR_TNF_EXIT(tavor_devmap);
134 return (ENXIO);
135 }
136
137 /*
138 * Access to Tavor devmap interface is not allowed in
139 * "maintenance mode".
140 */
141 if (state->ts_operational_mode == TAVOR_MAINTENANCE_MODE) {
142 TNF_PROBE_0(tavor_devmap_maintenance_mode_fail,
143 TAVOR_TNF_ERROR, "");
144 TAVOR_TNF_EXIT(tavor_devmap);
145 return (EFAULT);
146 }
147
148 /*
149 * The bottom bits of "offset" are undefined (number depends on
150 * system PAGESIZE). Shifting these off leaves us with a "key".
151 * The "key" is actually a combination of both a real key value
152 * (for the purpose of database lookup) and a "type" value. We
153 * extract this information before doing the database lookup.
154 */
155 key = off >> PAGESHIFT;
156 type = key & MLNX_UMAP_RSRC_TYPE_MASK;
157 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
158 status = tavor_umap_db_find(instance, key, type, &value, 0, NULL);
159 if (status == DDI_SUCCESS) {
160 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
161
162 switch (type) {
163 case MLNX_UMAP_UARPG_RSRC:
164 /*
165 * Double check that process who open()'d Tavor is
166 * same process attempting to mmap() UAR page.
167 */
168 if (key != ddi_get_pid()) {
169 TNF_PROBE_0(tavor_devmap_uarpg_invpid_fail,
170 TAVOR_TNF_ERROR, "");
171 TAVOR_TNF_EXIT(tavor_devmap);
172 return (EINVAL);
173 }
174
175 /* Map the UAR page out for userland access */
176 status = tavor_umap_uarpg(state, dhp, rsrcp, maplen,
177 &err);
178 if (status != DDI_SUCCESS) {
179 TNF_PROBE_0(tavor_devmap_uarpg_map_fail,
180 TAVOR_TNF_ERROR, "");
181 TAVOR_TNF_EXIT(tavor_devmap);
182 return (err);
183 }
184 break;
185
186 case MLNX_UMAP_CQMEM_RSRC:
187 /* Map the CQ memory out for userland access */
188 status = tavor_umap_cqmem(state, dhp, rsrcp, off,
189 maplen, &err);
190 if (status != DDI_SUCCESS) {
191 TNF_PROBE_0(tavor_devmap_cqmem_map_fail,
192 TAVOR_TNF_ERROR, "");
193 TAVOR_TNF_EXIT(tavor_devmap);
194 return (err);
195 }
196 break;
197
198 case MLNX_UMAP_QPMEM_RSRC:
199 /* Map the QP memory out for userland access */
200 status = tavor_umap_qpmem(state, dhp, rsrcp, off,
201 maplen, &err);
202 if (status != DDI_SUCCESS) {
203 TNF_PROBE_0(tavor_devmap_qpmem_map_fail,
204 TAVOR_TNF_ERROR, "");
205 TAVOR_TNF_EXIT(tavor_devmap);
206 return (err);
207 }
208 break;
209
210 case MLNX_UMAP_SRQMEM_RSRC:
211 /* Map the SRQ memory out for userland access */
212 status = tavor_umap_srqmem(state, dhp, rsrcp, off,
213 maplen, &err);
214 if (status != DDI_SUCCESS) {
215 TNF_PROBE_0(tavor_devmap_srqmem_map_fail,
216 TAVOR_TNF_ERROR, "");
217 TAVOR_TNF_EXIT(tavor_devmap);
218 return (err);
219 }
220 break;
221
222 default:
223 TAVOR_WARNING(state, "unexpected rsrc type in devmap");
224 TNF_PROBE_0(tavor_devmap_invrsrc_fail,
225 TAVOR_TNF_ERROR, "");
226 TAVOR_TNF_EXIT(tavor_devmap);
227 return (EINVAL);
228 }
229 } else {
230 TNF_PROBE_0(tavor_devmap_umap_lookup_fail, TAVOR_TNF_ERROR, "");
231 TAVOR_TNF_EXIT(tavor_devmap);
232 return (EINVAL);
233 }
234
235 TAVOR_TNF_EXIT(tavor_devmap);
236 return (0);
237 }
238
239
240 /*
241 * tavor_umap_uarpg()
242 * Context: Can be called from user context.
243 */
244 static int
245 tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
246 tavor_rsrc_t *rsrcp, size_t *maplen, int *err)
247 {
248 int status;
249 uint_t maxprot;
250
251 TAVOR_TNF_ENTER(tavor_umap_uarpg);
252
253 /* Map out the UAR page (doorbell page) */
254 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
255 status = devmap_devmem_setup(dhp, state->ts_dip,
256 &tavor_devmap_devmem_cbops, TAVOR_UAR_BAR, (rsrcp->tr_indx <<
257 PAGESHIFT), PAGESIZE, maxprot, DEVMAP_ALLOW_REMAP,
258 &state->ts_reg_accattr);
259 if (status < 0) {
260 *err = status;
261 TNF_PROBE_0(tavor_umap_uarpg_devmap_fail, TAVOR_TNF_ERROR, "");
262 TAVOR_TNF_EXIT(tavor_umap_uarpg);
263 return (DDI_FAILURE);
264 }
265
266 *maplen = PAGESIZE;
267 TAVOR_TNF_EXIT(tavor_umap_uarpg);
268 return (DDI_SUCCESS);
269 }
270
271
272 /*
273 * tavor_umap_cqmem()
274 * Context: Can be called from user context.
275 */
276 /* ARGSUSED */
277 static int
278 tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
279 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
280 {
281 tavor_cqhdl_t cq;
282 size_t size;
283 uint_t maxprot;
284 int status;
285
286 TAVOR_TNF_ENTER(tavor_umap_cqmem);
287
288 /* Extract the Tavor CQ handle pointer from the tavor_rsrc_t */
289 cq = (tavor_cqhdl_t)rsrcp->tr_addr;
290
291 /* Round-up the CQ size to system page size */
292 size = ptob(btopr(cq->cq_cqinfo.qa_size));
293
294 /* Map out the CQ memory */
295 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
296 status = devmap_umem_setup(dhp, state->ts_dip,
297 &tavor_devmap_umem_cbops, cq->cq_cqinfo.qa_umemcookie, 0, size,
298 maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
299 if (status < 0) {
300 *err = status;
301 TNF_PROBE_0(tavor_umap_cqmem_devmap_fail, TAVOR_TNF_ERROR, "");
302 TAVOR_TNF_EXIT(tavor_umap_cqmem);
303 return (DDI_FAILURE);
304 }
305 *maplen = size;
306
307 TAVOR_TNF_EXIT(tavor_umap_cqmem);
308 return (DDI_SUCCESS);
309 }
310
311
312 /*
313 * tavor_umap_qpmem()
314 * Context: Can be called from user context.
315 */
316 /* ARGSUSED */
317 static int
318 tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
319 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
320 {
321 tavor_qphdl_t qp;
322 offset_t offset;
323 size_t size;
324 uint_t maxprot;
325 int status;
326
327 TAVOR_TNF_ENTER(tavor_umap_qpmem);
328
329 /* Extract the Tavor QP handle pointer from the tavor_rsrc_t */
330 qp = (tavor_qphdl_t)rsrcp->tr_addr;
331
332 /*
333 * Calculate the offset of the first work queue (send or recv) into
334 * the memory (ddi_umem_alloc()) allocated previously for the QP.
335 */
336 offset = (offset_t)((uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
337 (uintptr_t)qp->qp_wqinfo.qa_buf_real);
338
339 /* Round-up the QP work queue sizes to system page size */
340 size = ptob(btopr(qp->qp_wqinfo.qa_size));
341
342 /* Map out the QP memory */
343 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
344 status = devmap_umem_setup(dhp, state->ts_dip,
345 &tavor_devmap_umem_cbops, qp->qp_wqinfo.qa_umemcookie, offset,
346 size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
347 if (status < 0) {
348 *err = status;
349 TNF_PROBE_0(tavor_umap_qpmem_devmap_fail, TAVOR_TNF_ERROR, "");
350 TAVOR_TNF_EXIT(tavor_umap_qpmem);
351 return (DDI_FAILURE);
352 }
353 *maplen = size;
354
355 TAVOR_TNF_EXIT(tavor_umap_qpmem);
356 return (DDI_SUCCESS);
357 }
358
359
360 /*
361 * tavor_umap_srqmem()
362 * Context: Can be called from user context.
363 */
364 /* ARGSUSED */
365 static int
366 tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
367 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
368 {
369 tavor_srqhdl_t srq;
370 offset_t offset;
371 size_t size;
372 uint_t maxprot;
373 int status;
374
375 TAVOR_TNF_ENTER(tavor_umap_srqmem);
376
377 /* Extract the Tavor SRQ handle pointer from the tavor_rsrc_t */
378 srq = (tavor_srqhdl_t)rsrcp->tr_addr;
379
380 /*
381 * Calculate the offset of the first shared recv queue into the memory
382 * (ddi_umem_alloc()) allocated previously for the SRQ.
383 */
384 offset = (offset_t)((uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
385 (uintptr_t)srq->srq_wqinfo.qa_buf_real);
386
387 /* Round-up the SRQ work queue sizes to system page size */
388 size = ptob(btopr(srq->srq_wqinfo.qa_size));
389
390 /* Map out the QP memory */
391 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
392 status = devmap_umem_setup(dhp, state->ts_dip,
393 &tavor_devmap_umem_cbops, srq->srq_wqinfo.qa_umemcookie, offset,
394 size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
395 if (status < 0) {
396 *err = status;
397 TNF_PROBE_0(tavor_umap_srqmem_devmap_fail, TAVOR_TNF_ERROR, "");
398 TAVOR_TNF_EXIT(tavor_umap_srqmem);
399 return (DDI_FAILURE);
400 }
401 *maplen = size;
402
403 TAVOR_TNF_EXIT(tavor_umap_srqmem);
404 return (DDI_SUCCESS);
405 }
406
407
408 /*
409 * tavor_devmap_umem_map()
410 * Context: Can be called from kernel context.
411 */
412 /* ARGSUSED */
413 static int
414 tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
415 offset_t off, size_t len, void **pvtp)
416 {
417 tavor_state_t *state;
418 tavor_devmap_track_t *dvm_track;
419 tavor_cqhdl_t cq;
420 tavor_qphdl_t qp;
421 tavor_srqhdl_t srq;
422 minor_t instance;
423 uint64_t key;
424 uint_t type;
425
426 TAVOR_TNF_ENTER(tavor_devmap_umem_map);
427
428 /* Get Tavor softstate structure from instance */
429 instance = TAVOR_DEV_INSTANCE(dev);
430 state = ddi_get_soft_state(tavor_statep, instance);
431 if (state == NULL) {
432 TNF_PROBE_0(tavor_devmap_umem_map_gss_fail, TAVOR_TNF_ERROR,
433 "");
434 TAVOR_TNF_EXIT(tavor_devmap_umem_map);
435 return (ENXIO);
436 }
437
438 /*
439 * The bottom bits of "offset" are undefined (number depends on
440 * system PAGESIZE). Shifting these off leaves us with a "key".
441 * The "key" is actually a combination of both a real key value
442 * (for the purpose of database lookup) and a "type" value. Although
443 * we are not going to do any database lookup per se, we do want
444 * to extract the "key" and the "type" (to enable faster lookup of
445 * the appropriate CQ or QP handle).
446 */
447 key = off >> PAGESHIFT;
448 type = key & MLNX_UMAP_RSRC_TYPE_MASK;
449 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
450
451 /*
452 * Allocate an entry to track the mapping and unmapping (specifically,
453 * partial unmapping) of this resource.
454 */
455 dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
456 sizeof (tavor_devmap_track_t), KM_SLEEP);
457 dvm_track->tdt_offset = off;
458 dvm_track->tdt_state = state;
459 dvm_track->tdt_refcnt = 1;
460 mutex_init(&dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
461 DDI_INTR_PRI(state->ts_intrmsi_pri));
462
463 /*
464 * Depending of the type of resource that has been mapped out, we
465 * need to update the QP or CQ handle to reflect that it has, in
466 * fact, been mapped. This allows the driver code which frees a QP
467 * or a CQ to know whether it is appropriate to do a
468 * devmap_devmem_remap() to invalidate the userland mapping for the
469 * corresponding queue's memory.
470 */
471 if (type == MLNX_UMAP_CQMEM_RSRC) {
472
473 /* Use "key" (CQ number) to do fast lookup of CQ handle */
474 cq = tavor_cqhdl_from_cqnum(state, key);
475
476 /*
477 * Update the handle to the userland mapping. Note: If
478 * the CQ already has a valid userland mapping, then stop
479 * and return failure.
480 */
481 mutex_enter(&cq->cq_lock);
482 if (cq->cq_umap_dhp == NULL) {
483 cq->cq_umap_dhp = dhp;
484 dvm_track->tdt_size = cq->cq_cqinfo.qa_size;
485 mutex_exit(&cq->cq_lock);
486 } else {
487 mutex_exit(&cq->cq_lock);
488 goto umem_map_fail;
489 }
490
491 } else if (type == MLNX_UMAP_QPMEM_RSRC) {
492
493 /* Use "key" (QP number) to do fast lookup of QP handle */
494 qp = tavor_qphdl_from_qpnum(state, key);
495
496 /*
497 * Update the handle to the userland mapping. Note: If
498 * the CQ already has a valid userland mapping, then stop
499 * and return failure.
500 */
501 mutex_enter(&qp->qp_lock);
502 if (qp->qp_umap_dhp == NULL) {
503 qp->qp_umap_dhp = dhp;
504 dvm_track->tdt_size = qp->qp_wqinfo.qa_size;
505 mutex_exit(&qp->qp_lock);
506 } else {
507 mutex_exit(&qp->qp_lock);
508 goto umem_map_fail;
509 }
510
511 } else if (type == MLNX_UMAP_SRQMEM_RSRC) {
512
513 /* Use "key" (SRQ number) to do fast lookup on SRQ handle */
514 srq = tavor_srqhdl_from_srqnum(state, key);
515
516 /*
517 * Update the handle to the userland mapping. Note: If the
518 * SRQ already has a valid userland mapping, then stop and
519 * return failure.
520 */
521 mutex_enter(&srq->srq_lock);
522 if (srq->srq_umap_dhp == NULL) {
523 srq->srq_umap_dhp = dhp;
524 dvm_track->tdt_size = srq->srq_wqinfo.qa_size;
525 mutex_exit(&srq->srq_lock);
526 } else {
527 mutex_exit(&srq->srq_lock);
528 goto umem_map_fail;
529 }
530 }
531
532 /*
533 * Pass the private "Tavor devmap tracking structure" back. This
534 * pointer will be returned in subsequent "unmap" callbacks.
535 */
536 *pvtp = dvm_track;
537
538 TAVOR_TNF_EXIT(tavor_devmap_umem_map);
539 return (DDI_SUCCESS);
540
541 umem_map_fail:
542 mutex_destroy(&dvm_track->tdt_lock);
543 kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
544 TAVOR_TNF_EXIT(tavor_devmap_umem_map);
545 return (DDI_FAILURE);
546 }
547
548
549 /*
550 * tavor_devmap_umem_dup()
551 * Context: Can be called from kernel context.
552 */
553 /* ARGSUSED */
554 static int
555 tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp, devmap_cookie_t new_dhp,
556 void **new_pvtp)
557 {
558 tavor_state_t *state;
559 tavor_devmap_track_t *dvm_track, *new_dvm_track;
560 uint_t maxprot;
561 int status;
562
563 TAVOR_TNF_ENTER(tavor_devmap_umem_dup);
564
565 /*
566 * Extract the Tavor softstate pointer from "Tavor devmap tracking
567 * structure" (in "pvtp").
568 */
569 dvm_track = (tavor_devmap_track_t *)pvtp;
570 state = dvm_track->tdt_state;
571
572 /*
573 * Since this devmap_dup() entry point is generally called
574 * when a process does fork(2), it is incumbent upon the driver
575 * to insure that the child does not inherit a valid copy of
576 * the parent's QP or CQ resource. This is accomplished by using
577 * devmap_devmem_remap() to invalidate the child's mapping to the
578 * kernel memory.
579 */
580 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
581 status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
582 dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
583 if (status != DDI_SUCCESS) {
584 TAVOR_WARNING(state, "failed in tavor_devmap_umem_dup()");
585 TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
586 return (status);
587 }
588
589 /*
590 * Allocate a new entry to track the subsequent unmapping
591 * (specifically, all partial unmappings) of the child's newly
592 * invalidated resource. Note: Setting the "tdt_size" field to
593 * zero here is an indication to the devmap_unmap() entry point
594 * that this mapping is invalid, and that its subsequent unmapping
595 * should not affect any of the parent's CQ or QP resources.
596 */
597 new_dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
598 sizeof (tavor_devmap_track_t), KM_SLEEP);
599 new_dvm_track->tdt_offset = 0;
600 new_dvm_track->tdt_state = state;
601 new_dvm_track->tdt_refcnt = 1;
602 new_dvm_track->tdt_size = 0;
603 mutex_init(&new_dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
604 DDI_INTR_PRI(state->ts_intrmsi_pri));
605 *new_pvtp = new_dvm_track;
606
607 TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
608 return (DDI_SUCCESS);
609 }
610
611
612 /*
613 * tavor_devmap_umem_unmap()
614 * Context: Can be called from kernel context.
615 */
616 /* ARGSUSED */
617 static void
618 tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
619 size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
620 devmap_cookie_t new_dhp2, void **pvtp2)
621 {
622 tavor_state_t *state;
623 tavor_rsrc_t *rsrcp;
624 tavor_devmap_track_t *dvm_track;
625 tavor_cqhdl_t cq;
626 tavor_qphdl_t qp;
627 tavor_srqhdl_t srq;
628 uint64_t key, value;
629 uint_t type;
630 uint_t size;
631 int status;
632
633 TAVOR_TNF_ENTER(tavor_devmap_umem_unmap);
634
635 /*
636 * Extract the Tavor softstate pointer from "Tavor devmap tracking
637 * structure" (in "pvtp").
638 */
639 dvm_track = (tavor_devmap_track_t *)pvtp;
640 state = dvm_track->tdt_state;
641
642 /*
643 * Extract the "offset" from the "Tavor devmap tracking structure".
644 * Note: The input argument "off" is ignored here because the
645 * Tavor mapping interfaces define a very specific meaning to
646 * each "logical offset". Also extract the "key" and "type" encoded
647 * in the logical offset.
648 */
649 key = dvm_track->tdt_offset >> PAGESHIFT;
650 type = key & MLNX_UMAP_RSRC_TYPE_MASK;
651 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
652
653 /*
654 * Extract the "size" of the mapping. If this size is determined
655 * to be zero, then it is an indication of a previously invalidated
656 * mapping, and no CQ or QP resources should be affected.
657 */
658 size = dvm_track->tdt_size;
659
660 /*
661 * If only the "middle portion of a given mapping is being unmapped,
662 * then we are effectively creating one new piece of mapped memory.
663 * (Original region is divided into three pieces of which the middle
664 * piece is being removed. This leaves two pieces. Since we started
665 * with one piece and now have two pieces, we need to increment the
666 * counter in the "Tavor devmap tracking structure".
667 *
668 * If, however, the whole mapped region is being unmapped, then we
669 * have started with one region which we are completely removing.
670 * In this case, we need to decrement the counter in the "Tavor
671 * devmap tracking structure".
672 *
673 * In each of the remaining cases, we will have started with one
674 * mapped region and ended with one (different) region. So no counter
675 * modification is necessary.
676 */
677 mutex_enter(&dvm_track->tdt_lock);
678 if ((new_dhp1 == NULL) && (new_dhp2 == NULL)) {
679 dvm_track->tdt_refcnt--;
680 } else if ((new_dhp1 != NULL) && (new_dhp2 != NULL)) {
681 dvm_track->tdt_refcnt++;
682 }
683 mutex_exit(&dvm_track->tdt_lock);
684
685 /*
686 * For each of the cases where the region is being divided, then we
687 * need to pass back the "Tavor devmap tracking structure". This way
688 * we get it back when each of the remaining pieces is subsequently
689 * unmapped.
690 */
691 if (new_dhp1 != NULL) {
692 *pvtp1 = pvtp;
693 }
694 if (new_dhp2 != NULL) {
695 *pvtp2 = pvtp;
696 }
697
698 /*
699 * If the "Tavor devmap tracking structure" is no longer being
700 * referenced, then free it up. Otherwise, return.
701 */
702 if (dvm_track->tdt_refcnt == 0) {
703 mutex_destroy(&dvm_track->tdt_lock);
704 kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
705
706 /*
707 * If the mapping was invalid (see explanation above), then
708 * no further processing is necessary.
709 */
710 if (size == 0) {
711 TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
712 return;
713 }
714 } else {
715 TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
716 return;
717 }
718
719 /*
720 * Now that we can guarantee that the user memory is fully unmapped,
721 * we can use the "key" and "type" values to try to find the entry
722 * in the "userland resources database". If it's found, then it
723 * indicates that the queue memory (CQ or QP) has not yet been freed.
724 * In this case, we update the corresponding CQ or QP handle to
725 * indicate that the "devmap_devmem_remap()" call will be unnecessary.
726 * If it's _not_ found, then it indicates that the CQ or QP memory
727 * was, in fact, freed before it was unmapped (thus requiring a
728 * previous invalidation by remapping - which will already have
729 * been done in the free routine).
730 */
731 status = tavor_umap_db_find(state->ts_instance, key, type, &value,
732 0, NULL);
733 if (status == DDI_SUCCESS) {
734 /*
735 * Depending on the type of the mapped resource (CQ or QP),
736 * update handle to indicate that no invalidation remapping
737 * will be necessary.
738 */
739 if (type == MLNX_UMAP_CQMEM_RSRC) {
740
741 /* Use "value" to convert to CQ handle */
742 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
743 cq = (tavor_cqhdl_t)rsrcp->tr_addr;
744
745 /*
746 * Invalidate the handle to the userland mapping.
747 * Note: We must ensure that the mapping being
748 * unmapped here is the current one for the CQ. It
749 * is possible that it might not be if this CQ has
750 * been resized and the previous CQ memory has not
751 * yet been unmapped. But in that case, because of
752 * the devmap_devmem_remap(), there is no longer any
753 * association between the mapping and the real CQ
754 * kernel memory.
755 */
756 mutex_enter(&cq->cq_lock);
757 if (cq->cq_umap_dhp == dhp) {
758 cq->cq_umap_dhp = (devmap_cookie_t)NULL;
759 }
760 mutex_exit(&cq->cq_lock);
761
762 } else if (type == MLNX_UMAP_QPMEM_RSRC) {
763
764 /* Use "value" to convert to QP handle */
765 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
766 qp = (tavor_qphdl_t)rsrcp->tr_addr;
767
768 /*
769 * Invalidate the handle to the userland mapping.
770 * Note: we ensure that the mapping being unmapped
771 * here is the current one for the QP. This is
772 * more of a sanity check here since, unlike CQs
773 * (above) we do not support resize of QPs.
774 */
775 mutex_enter(&qp->qp_lock);
776 if (qp->qp_umap_dhp == dhp) {
777 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
778 }
779 mutex_exit(&qp->qp_lock);
780
781 } else if (type == MLNX_UMAP_SRQMEM_RSRC) {
782
783 /* Use "value" to convert to SRQ handle */
784 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
785 srq = (tavor_srqhdl_t)rsrcp->tr_addr;
786
787 /*
788 * Invalidate the handle to the userland mapping.
789 * Note: we ensure that the mapping being unmapped
790 * here is the current one for the QP. This is
791 * more of a sanity check here since, unlike CQs
792 * (above) we do not support resize of QPs.
793 */
794 mutex_enter(&srq->srq_lock);
795 if (srq->srq_umap_dhp == dhp) {
796 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
797 }
798 mutex_exit(&srq->srq_lock);
799 }
800 }
801
802 TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
803 }
804
805
806 /*
807 * tavor_devmap_devmem_map()
808 * Context: Can be called from kernel context.
809 */
810 /* ARGSUSED */
811 static int
812 tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
813 offset_t off, size_t len, void **pvtp)
814 {
815 tavor_state_t *state;
816 tavor_devmap_track_t *dvm_track;
817 minor_t instance;
818
819 TAVOR_TNF_ENTER(tavor_devmap_devmem_map);
820
821 /* Get Tavor softstate structure from instance */
822 instance = TAVOR_DEV_INSTANCE(dev);
823 state = ddi_get_soft_state(tavor_statep, instance);
824 if (state == NULL) {
825 TNF_PROBE_0(tavor_devmap_devmem_map_gss_fail, TAVOR_TNF_ERROR,
826 "");
827 TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
828 return (ENXIO);
829 }
830
831 /*
832 * Allocate an entry to track the mapping and unmapping of this
833 * resource. Note: We don't need to initialize the "refcnt" or
834 * "offset" fields here, nor do we need to initialize the mutex
835 * used with the "refcnt". Since UAR pages are single pages, they
836 * are not subject to "partial" unmappings. This makes these other
837 * fields unnecessary.
838 */
839 dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
840 sizeof (tavor_devmap_track_t), KM_SLEEP);
841 dvm_track->tdt_state = state;
842 dvm_track->tdt_size = PAGESIZE;
843
844 /*
845 * Pass the private "Tavor devmap tracking structure" back. This
846 * pointer will be returned in a subsequent "unmap" callback.
847 */
848 *pvtp = dvm_track;
849
850 TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
851 return (DDI_SUCCESS);
852 }
853
854
855 /*
856 * tavor_devmap_devmem_dup()
857 * Context: Can be called from kernel context.
858 */
859 /* ARGSUSED */
860 static int
861 tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
862 devmap_cookie_t new_dhp, void **new_pvtp)
863 {
864 tavor_state_t *state;
865 tavor_devmap_track_t *dvm_track;
866 uint_t maxprot;
867 int status;
868
869 TAVOR_TNF_ENTER(tavor_devmap_devmem_dup);
870
871 /*
872 * Extract the Tavor softstate pointer from "Tavor devmap tracking
873 * structure" (in "pvtp"). Note: If the tracking structure is NULL
874 * here, it means that the mapping corresponds to an invalid mapping.
875 * In this case, it can be safely ignored ("new_pvtp" set to NULL).
876 */
877 dvm_track = (tavor_devmap_track_t *)pvtp;
878 if (dvm_track == NULL) {
879 *new_pvtp = NULL;
880 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
881 return (DDI_SUCCESS);
882 }
883
884 state = dvm_track->tdt_state;
885
886 /*
887 * Since this devmap_dup() entry point is generally called
888 * when a process does fork(2), it is incumbent upon the driver
889 * to insure that the child does not inherit a valid copy of
890 * the parent's resource. This is accomplished by using
891 * devmap_devmem_remap() to invalidate the child's mapping to the
892 * kernel memory.
893 */
894 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
895 status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
896 dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
897 if (status != DDI_SUCCESS) {
898 TAVOR_WARNING(state, "failed in tavor_devmap_devmem_dup()");
899 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
900 return (status);
901 }
902
903 /*
904 * Since the region is invalid, there is no need for us to
905 * allocate and continue to track an additional "Tavor devmap
906 * tracking structure". Instead we return NULL here, which is an
907 * indication to the devmap_unmap() entry point that this entry
908 * can be safely ignored.
909 */
910 *new_pvtp = NULL;
911
912 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
913 return (DDI_SUCCESS);
914 }
915
916
917 /*
918 * tavor_devmap_devmem_unmap()
919 * Context: Can be called from kernel context.
920 */
921 /* ARGSUSED */
922 static void
923 tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
924 size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
925 devmap_cookie_t new_dhp2, void **pvtp2)
926 {
927 tavor_devmap_track_t *dvm_track;
928
929 TAVOR_TNF_ENTER(tavor_devmap_devmem_unmap);
930
931 /*
932 * Free up the "Tavor devmap tracking structure" (in "pvtp").
933 * There cannot be "partial" unmappings here because all UAR pages
934 * are single pages. Note: If the tracking structure is NULL here,
935 * it means that the mapping corresponds to an invalid mapping. In
936 * this case, it can be safely ignored.
937 */
938 dvm_track = (tavor_devmap_track_t *)pvtp;
939 if (dvm_track == NULL) {
940 TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
941 return;
942 }
943
944 kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
945 TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
946 }
947
948
949 /*
950 * tavor_umap_ci_data_in()
951 * Context: Can be called from user or kernel context.
952 */
953 /* ARGSUSED */
954 ibt_status_t
955 tavor_umap_ci_data_in(tavor_state_t *state, ibt_ci_data_flags_t flags,
956 ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
957 {
958 int status;
959
960 TAVOR_TNF_ENTER(tavor_umap_ci_data_in);
961
962 /*
963 * Depending on the type of object about which additional information
964 * is being provided (currently only MR is supported), we call the
965 * appropriate resource-specific function.
966 */
967 switch (object) {
968 case IBT_HDL_MR:
969 status = tavor_umap_mr_data_in((tavor_mrhdl_t)hdl,
970 (ibt_mr_data_in_t *)data_p, data_sz);
971 if (status != DDI_SUCCESS) {
972 TNF_PROBE_0(tavor_umap_mr_data_in_fail,
973 TAVOR_TNF_ERROR, "");
974 TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
975 return (status);
976 }
977 break;
978
979 /*
980 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
981 * since the Tavor driver does not support these.
982 */
983 case IBT_HDL_HCA:
984 case IBT_HDL_QP:
985 case IBT_HDL_CQ:
986 case IBT_HDL_PD:
987 case IBT_HDL_MW:
988 case IBT_HDL_AH:
989 case IBT_HDL_SCHED:
990 case IBT_HDL_EEC:
991 case IBT_HDL_RDD:
992 case IBT_HDL_SRQ:
993 TNF_PROBE_0(tavor_umap_ci_data_in_unsupp_type,
994 TAVOR_TNF_ERROR, "");
995 TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
996 return (IBT_NOT_SUPPORTED);
997
998 /*
999 * Any other types are invalid.
1000 */
1001 default:
1002 TNF_PROBE_0(tavor_umap_ci_data_in_invtype_fail,
1003 TAVOR_TNF_ERROR, "");
1004 TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1005 return (IBT_INVALID_PARAM);
1006 }
1007
1008 TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1009 return (DDI_SUCCESS);
1010 }
1011
1012
1013 /*
1014 * tavor_umap_mr_data_in()
1015 * Context: Can be called from user or kernel context.
1016 */
1017 static ibt_status_t
1018 tavor_umap_mr_data_in(tavor_mrhdl_t mr, ibt_mr_data_in_t *data,
1019 size_t data_sz)
1020 {
1021 TAVOR_TNF_ENTER(tavor_umap_mr_data_in);
1022
1023 if (data->mr_rev != IBT_MR_DATA_IN_IF_VERSION) {
1024 TNF_PROBE_0(tavor_umap_mr_data_in_ver_fail,
1025 TAVOR_TNF_ERROR, "");
1026 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1027 return (IBT_NOT_SUPPORTED);
1028 }
1029
1030 /* Check for valid MR handle pointer */
1031 if (mr == NULL) {
1032 TNF_PROBE_0(tavor_umap_mr_data_in_invmrhdl_fail,
1033 TAVOR_TNF_ERROR, "");
1034 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1035 return (IBT_MR_HDL_INVALID);
1036 }
1037
1038 /* Check for valid MR input structure size */
1039 if (data_sz < sizeof (ibt_mr_data_in_t)) {
1040 TNF_PROBE_0(tavor_umap_mr_data_in_invdatasz_fail,
1041 TAVOR_TNF_ERROR, "");
1042 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1043 return (IBT_INSUFF_RESOURCE);
1044 }
1045
1046 /*
1047 * Ensure that the MR corresponds to userland memory and that it is
1048 * a currently valid memory region as well.
1049 */
1050 mutex_enter(&mr->mr_lock);
1051 if ((mr->mr_is_umem == 0) || (mr->mr_umemcookie == NULL)) {
1052 mutex_exit(&mr->mr_lock);
1053 TNF_PROBE_0(tavor_umap_mr_data_in_invumem_fail,
1054 TAVOR_TNF_ERROR, "");
1055 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1056 return (IBT_MR_HDL_INVALID);
1057 }
1058
1059 /*
1060 * If it has passed all the above checks, then extract the callback
1061 * function and argument from the input structure. Copy them into
1062 * the MR handle. This function will be called only if the memory
1063 * corresponding to the MR handle gets a umem_lockmemory() callback.
1064 */
1065 mr->mr_umem_cbfunc = data->mr_func;
1066 mr->mr_umem_cbarg1 = data->mr_arg1;
1067 mr->mr_umem_cbarg2 = data->mr_arg2;
1068 mutex_exit(&mr->mr_lock);
1069
1070 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1071 return (DDI_SUCCESS);
1072 }
1073
1074
1075 /*
1076 * tavor_umap_ci_data_out()
1077 * Context: Can be called from user or kernel context.
1078 */
1079 /* ARGSUSED */
1080 ibt_status_t
1081 tavor_umap_ci_data_out(tavor_state_t *state, ibt_ci_data_flags_t flags,
1082 ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
1083 {
1084 int status;
1085
1086 TAVOR_TNF_ENTER(tavor_umap_ci_data_out);
1087
1088 /*
1089 * Depending on the type of object about which additional information
1090 * is being requested (CQ or QP), we call the appropriate resource-
1091 * specific mapping function.
1092 */
1093 switch (object) {
1094 case IBT_HDL_CQ:
1095 status = tavor_umap_cq_data_out((tavor_cqhdl_t)hdl,
1096 (mlnx_umap_cq_data_out_t *)data_p, data_sz);
1097 if (status != DDI_SUCCESS) {
1098 TNF_PROBE_0(tavor_umap_cq_data_out_fail,
1099 TAVOR_TNF_ERROR, "");
1100 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1101 return (status);
1102 }
1103 break;
1104
1105 case IBT_HDL_QP:
1106 status = tavor_umap_qp_data_out((tavor_qphdl_t)hdl,
1107 (mlnx_umap_qp_data_out_t *)data_p, data_sz);
1108 if (status != DDI_SUCCESS) {
1109 TNF_PROBE_0(tavor_umap_qp_data_out_fail,
1110 TAVOR_TNF_ERROR, "");
1111 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1112 return (status);
1113 }
1114 break;
1115
1116 case IBT_HDL_SRQ:
1117 status = tavor_umap_srq_data_out((tavor_srqhdl_t)hdl,
1118 (mlnx_umap_srq_data_out_t *)data_p, data_sz);
1119 if (status != DDI_SUCCESS) {
1120 TNF_PROBE_0(tavor_umap_srq_data_out_fail,
1121 TAVOR_TNF_ERROR, "");
1122 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1123 return (status);
1124 }
1125 break;
1126
1127 /*
1128 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
1129 * since the Tavor driver does not support these.
1130 */
1131 case IBT_HDL_PD:
1132 status = tavor_umap_pd_data_out((tavor_pdhdl_t)hdl,
1133 (mlnx_umap_pd_data_out_t *)data_p, data_sz);
1134 if (status != DDI_SUCCESS) {
1135 TNF_PROBE_0(tavor_umap_pd_data_out_fail,
1136 TAVOR_TNF_ERROR, "");
1137 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1138 return (status);
1139 }
1140 break;
1141
1142 case IBT_HDL_HCA:
1143 case IBT_HDL_MR:
1144 case IBT_HDL_MW:
1145 case IBT_HDL_AH:
1146 case IBT_HDL_SCHED:
1147 case IBT_HDL_EEC:
1148 case IBT_HDL_RDD:
1149 TNF_PROBE_0(tavor_umap_ci_data_out_unsupp_type,
1150 TAVOR_TNF_ERROR, "");
1151 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1152 return (IBT_NOT_SUPPORTED);
1153
1154 /*
1155 * Any other types are invalid.
1156 */
1157 default:
1158 TNF_PROBE_0(tavor_umap_ci_data_out_invtype_fail,
1159 TAVOR_TNF_ERROR, "");
1160 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1161 return (IBT_INVALID_PARAM);
1162 }
1163
1164 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1165 return (DDI_SUCCESS);
1166 }
1167
1168
1169 /*
1170 * tavor_umap_cq_data_out()
1171 * Context: Can be called from user or kernel context.
1172 */
1173 static ibt_status_t
1174 tavor_umap_cq_data_out(tavor_cqhdl_t cq, mlnx_umap_cq_data_out_t *data,
1175 size_t data_sz)
1176 {
1177 TAVOR_TNF_ENTER(tavor_umap_cq_data_out);
1178
1179 /* Check for valid CQ handle pointer */
1180 if (cq == NULL) {
1181 TNF_PROBE_0(tavor_umap_cq_data_out_invcqhdl_fail,
1182 TAVOR_TNF_ERROR, "");
1183 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1184 return (IBT_CQ_HDL_INVALID);
1185 }
1186
1187 /* Check for valid CQ mapping structure size */
1188 if (data_sz < sizeof (mlnx_umap_cq_data_out_t)) {
1189 TNF_PROBE_0(tavor_umap_cq_data_out_invdatasz_fail,
1190 TAVOR_TNF_ERROR, "");
1191 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1192 return (IBT_INSUFF_RESOURCE);
1193 }
1194
1195 /*
1196 * If it has passed all the above checks, then fill in all the useful
1197 * mapping information (including the mapping offset that will be
1198 * passed back to the devmap() interface during a subsequent mmap()
1199 * call.
1200 *
1201 * The "offset" for CQ mmap()'s looks like this:
1202 * +----------------------------------------+--------+--------------+
1203 * | CQ Number | 0x33 | Reserved (0) |
1204 * +----------------------------------------+--------+--------------+
1205 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits
1206 *
1207 * This returns information about the mapping offset, the length of
1208 * the CQ memory, the CQ number (for use in later CQ doorbells), the
1209 * number of CQEs the CQ memory can hold, and the size of each CQE.
1210 */
1211 data->mcq_rev = MLNX_UMAP_IF_VERSION;
1212 data->mcq_mapoffset = ((((uint64_t)cq->cq_cqnum <<
1213 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_CQMEM_RSRC) << PAGESHIFT);
1214 data->mcq_maplen = cq->cq_cqinfo.qa_size;
1215 data->mcq_cqnum = cq->cq_cqnum;
1216 data->mcq_numcqe = cq->cq_bufsz;
1217 data->mcq_cqesz = sizeof (tavor_hw_cqe_t);
1218
1219 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1220 return (DDI_SUCCESS);
1221 }
1222
1223
1224 /*
1225 * tavor_umap_qp_data_out()
1226 * Context: Can be called from user or kernel context.
1227 */
1228 static ibt_status_t
1229 tavor_umap_qp_data_out(tavor_qphdl_t qp, mlnx_umap_qp_data_out_t *data,
1230 size_t data_sz)
1231 {
1232 TAVOR_TNF_ENTER(tavor_umap_qp_data_out);
1233
1234 /* Check for valid QP handle pointer */
1235 if (qp == NULL) {
1236 TNF_PROBE_0(tavor_umap_qp_data_out_invqphdl_fail,
1237 TAVOR_TNF_ERROR, "");
1238 TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1239 return (IBT_QP_HDL_INVALID);
1240 }
1241
1242 /* Check for valid QP mapping structure size */
1243 if (data_sz < sizeof (mlnx_umap_qp_data_out_t)) {
1244 TNF_PROBE_0(tavor_umap_qp_data_out_invdatasz_fail,
1245 TAVOR_TNF_ERROR, "");
1246 TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1247 return (IBT_INSUFF_RESOURCE);
1248 }
1249
1250 /*
1251 * If it has passed all the checks, then fill in all the useful
1252 * mapping information (including the mapping offset that will be
1253 * passed back to the devmap() interface during a subsequent mmap()
1254 * call.
1255 *
1256 * The "offset" for QP mmap()'s looks like this:
1257 * +----------------------------------------+--------+--------------+
1258 * | QP Number | 0x44 | Reserved (0) |
1259 * +----------------------------------------+--------+--------------+
1260 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits
1261 *
1262 * This returns information about the mapping offset, the length of
1263 * the QP memory, and the QP number (for use in later send and recv
1264 * doorbells). It also returns the following information for both
1265 * the receive work queue and the send work queue, respectively: the
1266 * offset (from the base mapped address) of the start of the given
1267 * work queue, the 64-bit IB virtual address that corresponds to
1268 * the base mapped address (needed for posting WQEs though the
1269 * QP doorbells), the number of WQEs the given work queue can hold,
1270 * and the size of each WQE for the given work queue.
1271 */
1272 data->mqp_rev = MLNX_UMAP_IF_VERSION;
1273 data->mqp_mapoffset = ((((uint64_t)qp->qp_qpnum <<
1274 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_QPMEM_RSRC) << PAGESHIFT);
1275 data->mqp_maplen = qp->qp_wqinfo.qa_size;
1276 data->mqp_qpnum = qp->qp_qpnum;
1277
1278 /*
1279 * If this QP is associated with a shared receive queue (SRQ),
1280 * then return invalid RecvQ parameters. Otherwise, return
1281 * the proper parameter values.
1282 */
1283 if (qp->qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
1284 data->mqp_rq_off = (uint32_t)qp->qp_wqinfo.qa_size;
1285 data->mqp_rq_desc_addr = (uint32_t)qp->qp_wqinfo.qa_size;
1286 data->mqp_rq_numwqe = 0;
1287 data->mqp_rq_wqesz = 0;
1288 } else {
1289 data->mqp_rq_off = (uintptr_t)qp->qp_rq_buf -
1290 (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1291 data->mqp_rq_desc_addr = (uint32_t)((uintptr_t)qp->qp_rq_buf -
1292 qp->qp_desc_off);
1293 data->mqp_rq_numwqe = qp->qp_rq_bufsz;
1294 data->mqp_rq_wqesz = (1 << qp->qp_rq_log_wqesz);
1295 }
1296 data->mqp_sq_off = (uintptr_t)qp->qp_sq_buf -
1297 (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1298 data->mqp_sq_desc_addr = (uint32_t)((uintptr_t)qp->qp_sq_buf -
1299 qp->qp_desc_off);
1300 data->mqp_sq_numwqe = qp->qp_sq_bufsz;
1301 data->mqp_sq_wqesz = (1 << qp->qp_sq_log_wqesz);
1302
1303 TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1304 return (DDI_SUCCESS);
1305 }
1306
1307
1308 /*
1309 * tavor_umap_srq_data_out()
1310 * Context: Can be called from user or kernel context.
1311 */
1312 static ibt_status_t
1313 tavor_umap_srq_data_out(tavor_srqhdl_t srq, mlnx_umap_srq_data_out_t *data,
1314 size_t data_sz)
1315 {
1316 TAVOR_TNF_ENTER(tavor_umap_srq_data_out);
1317
1318 /* Check for valid SRQ handle pointer */
1319 if (srq == NULL) {
1320 TNF_PROBE_0(tavor_umap_srq_data_out_invsrqhdl_fail,
1321 TAVOR_TNF_ERROR, "");
1322 TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1323 return (IBT_SRQ_HDL_INVALID);
1324 }
1325
1326 /* Check for valid SRQ mapping structure size */
1327 if (data_sz < sizeof (mlnx_umap_srq_data_out_t)) {
1328 TNF_PROBE_0(tavor_umap_srq_data_out_invdatasz_fail,
1329 TAVOR_TNF_ERROR, "");
1330 TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1331 return (IBT_INSUFF_RESOURCE);
1332 }
1333
1334 /*
1335 * If it has passed all the checks, then fill in all the useful
1336 * mapping information (including the mapping offset that will be
1337 * passed back to the devmap() interface during a subsequent mmap()
1338 * call.
1339 *
1340 * The "offset" for SRQ mmap()'s looks like this:
1341 * +----------------------------------------+--------+--------------+
1342 * | SRQ Number | 0x66 | Reserved (0) |
1343 * +----------------------------------------+--------+--------------+
1344 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits
1345 *
1346 * This returns information about the mapping offset, the length of the
1347 * SRQ memory, and the SRQ number (for use in later send and recv
1348 * doorbells). It also returns the following information for the
1349 * shared receive queue: the offset (from the base mapped address) of
1350 * the start of the given work queue, the 64-bit IB virtual address
1351 * that corresponds to the base mapped address (needed for posting WQEs
1352 * though the QP doorbells), the number of WQEs the given work queue
1353 * can hold, and the size of each WQE for the given work queue.
1354 */
1355 data->msrq_rev = MLNX_UMAP_IF_VERSION;
1356 data->msrq_mapoffset = ((((uint64_t)srq->srq_srqnum <<
1357 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_SRQMEM_RSRC) << PAGESHIFT);
1358 data->msrq_maplen = srq->srq_wqinfo.qa_size;
1359 data->msrq_srqnum = srq->srq_srqnum;
1360
1361 data->msrq_desc_addr = (uint32_t)((uintptr_t)srq->srq_wq_buf -
1362 srq->srq_desc_off);
1363 data->msrq_numwqe = srq->srq_wq_bufsz;
1364 data->msrq_wqesz = (1 << srq->srq_wq_log_wqesz);
1365
1366 TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1367 return (DDI_SUCCESS);
1368 }
1369
1370 /*
1371 * tavor_umap_pd_data_out()
1372 * Context: Can be called from user or kernel context.
1373 */
1374 static ibt_status_t
1375 tavor_umap_pd_data_out(tavor_pdhdl_t pd, mlnx_umap_pd_data_out_t *data,
1376 size_t data_sz)
1377 {
1378 TAVOR_TNF_ENTER(tavor_umap_pd_data_out);
1379
1380 /* Check for valid PD handle pointer */
1381 if (pd == NULL) {
1382 TNF_PROBE_0(tavor_umap_pd_data_out_invpdhdl_fail,
1383 TAVOR_TNF_ERROR, "");
1384 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1385 return (IBT_PD_HDL_INVALID);
1386 }
1387
1388 /* Check for valid PD mapping structure size */
1389 if (data_sz < sizeof (mlnx_umap_pd_data_out_t)) {
1390 TNF_PROBE_0(tavor_umap_pd_data_out_invdatasz_fail,
1391 TAVOR_TNF_ERROR, "");
1392 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1393 return (IBT_INSUFF_RESOURCE);
1394 }
1395
1396 /*
1397 * If it has passed all the checks, then fill the PD table index
1398 * (the PD table allocated index for the PD pd_pdnum)
1399 */
1400 data->mpd_rev = MLNX_UMAP_IF_VERSION;
1401 data->mpd_pdnum = pd->pd_pdnum;
1402
1403 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1404 return (DDI_SUCCESS);
1405 }
1406
1407 /*
1408 * tavor_umap_db_init()
1409 * Context: Only called from attach() path context
1410 */
1411 void
1412 tavor_umap_db_init(void)
1413 {
1414 TAVOR_TNF_ENTER(tavor_umap_db_init);
1415
1416 /*
1417 * Initialize the lock used by the Tavor "userland resources database"
1418 * This is used to ensure atomic access to add, remove, and find
1419 * entries in the database.
1420 */
1421 mutex_init(&tavor_userland_rsrc_db.tdl_umapdb_lock, NULL,
1422 MUTEX_DRIVER, NULL);
1423
1424 /*
1425 * Initialize the AVL tree used for the "userland resources
1426 * database". Using an AVL tree here provides the ability to
1427 * scale the database size to large numbers of resources. The
1428 * entries in the tree are "tavor_umap_db_entry_t".
1429 * The tree is searched with the help of the
1430 * tavor_umap_db_compare() routine.
1431 */
1432 avl_create(&tavor_userland_rsrc_db.tdl_umapdb_avl,
1433 tavor_umap_db_compare, sizeof (tavor_umap_db_entry_t),
1434 offsetof(tavor_umap_db_entry_t, tdbe_avlnode));
1435
1436 TAVOR_TNF_EXIT(tavor_umap_db_init);
1437 }
1438
1439
1440 /*
1441 * tavor_umap_db_fini()
1442 * Context: Only called from attach() and/or detach() path contexts
1443 */
1444 void
1445 tavor_umap_db_fini(void)
1446 {
1447 TAVOR_TNF_ENTER(tavor_umap_db_fini);
1448
1449 /* Destroy the AVL tree for the "userland resources database" */
1450 avl_destroy(&tavor_userland_rsrc_db.tdl_umapdb_avl);
1451
1452 /* Destroy the lock for the "userland resources database" */
1453 mutex_destroy(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1454
1455 TAVOR_TNF_EXIT(tavor_umap_db_fini);
1456 }
1457
1458
1459 /*
1460 * tavor_umap_db_alloc()
1461 * Context: Can be called from user or kernel context.
1462 */
1463 tavor_umap_db_entry_t *
1464 tavor_umap_db_alloc(uint_t instance, uint64_t key, uint_t type, uint64_t value)
1465 {
1466 tavor_umap_db_entry_t *umapdb;
1467
1468 TAVOR_TNF_ENTER(tavor_umap_db_alloc);
1469
1470 /* Allocate an entry to add to the "userland resources database" */
1471 umapdb = kmem_zalloc(sizeof (tavor_umap_db_entry_t), KM_NOSLEEP);
1472 if (umapdb == NULL) {
1473 TNF_PROBE_0(tavor_umap_db_alloc_kmz_fail, TAVOR_TNF_ERROR, "");
1474 TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1475 return (NULL);
1476 }
1477
1478 /* Fill in the fields in the database entry */
1479 umapdb->tdbe_common.tdb_instance = instance;
1480 umapdb->tdbe_common.tdb_type = type;
1481 umapdb->tdbe_common.tdb_key = key;
1482 umapdb->tdbe_common.tdb_value = value;
1483
1484 TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1485 return (umapdb);
1486 }
1487
1488
1489 /*
1490 * tavor_umap_db_free()
1491 * Context: Can be called from user or kernel context.
1492 */
1493 void
1494 tavor_umap_db_free(tavor_umap_db_entry_t *umapdb)
1495 {
1496 TAVOR_TNF_ENTER(tavor_umap_db_free);
1497
1498 /* Free the database entry */
1499 kmem_free(umapdb, sizeof (tavor_umap_db_entry_t));
1500
1501 TAVOR_TNF_EXIT(tavor_umap_db_free);
1502 }
1503
1504
1505 /*
1506 * tavor_umap_db_add()
1507 * Context: Can be called from user or kernel context.
1508 */
1509 void
1510 tavor_umap_db_add(tavor_umap_db_entry_t *umapdb)
1511 {
1512 TAVOR_TNF_ENTER(tavor_umap_db_add);
1513
1514 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1515 tavor_umap_db_add_nolock(umapdb);
1516 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1517
1518 TAVOR_TNF_EXIT(tavor_umap_db_add);
1519 }
1520
1521
1522 /*
1523 * tavor_umap_db_add_nolock()
1524 * Context: Can be called from user or kernel context.
1525 */
1526 void
1527 tavor_umap_db_add_nolock(tavor_umap_db_entry_t *umapdb)
1528 {
1529 tavor_umap_db_query_t query;
1530 avl_index_t where;
1531
1532 TAVOR_TNF_ENTER(tavor_umap_db_add_nolock);
1533
1534 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1535
1536 /*
1537 * Copy the common portion of the "to-be-added" database entry
1538 * into the "tavor_umap_db_query_t" structure. We use this structure
1539 * (with no flags set) to find the appropriate location in the
1540 * "userland resources database" for the new entry to be added.
1541 *
1542 * Note: we expect that this entry should not be found in the
1543 * database (unless something bad has happened).
1544 */
1545 query.tqdb_common = umapdb->tdbe_common;
1546 query.tqdb_flags = 0;
1547 (void) avl_find(&tavor_userland_rsrc_db.tdl_umapdb_avl, &query,
1548 &where);
1549
1550 /*
1551 * Now, using the "where" field from the avl_find() operation
1552 * above, we will insert the new database entry ("umapdb").
1553 */
1554 avl_insert(&tavor_userland_rsrc_db.tdl_umapdb_avl, umapdb,
1555 where);
1556
1557 TAVOR_TNF_EXIT(tavor_umap_db_add_nolock);
1558 }
1559
1560
1561 /*
1562 * tavor_umap_db_find()
1563 * Context: Can be called from user or kernel context.
1564 */
1565 int
1566 tavor_umap_db_find(uint_t instance, uint64_t key, uint_t type,
1567 uint64_t *value, uint_t flag, tavor_umap_db_entry_t **umapdb)
1568 {
1569 int status;
1570
1571 TAVOR_TNF_ENTER(tavor_umap_db_find);
1572
1573 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1574 status = tavor_umap_db_find_nolock(instance, key, type, value, flag,
1575 umapdb);
1576 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1577
1578 TAVOR_TNF_EXIT(tavor_umap_db_find);
1579 return (status);
1580 }
1581
1582
1583 /*
1584 * tavor_umap_db_find_nolock()
1585 * Context: Can be called from user or kernel context.
1586 */
1587 int
1588 tavor_umap_db_find_nolock(uint_t instance, uint64_t key, uint_t type,
1589 uint64_t *value, uint_t flags, tavor_umap_db_entry_t **umapdb)
1590 {
1591 tavor_umap_db_query_t query;
1592 tavor_umap_db_entry_t *entry;
1593 avl_index_t where;
1594
1595 TAVOR_TNF_ENTER(tavor_umap_db_find_nolock);
1596
1597 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1598
1599 /*
1600 * Fill in key, type, instance, and flags values of the
1601 * tavor_umap_db_query_t in preparation for the database
1602 * lookup.
1603 */
1604 query.tqdb_flags = flags;
1605 query.tqdb_common.tdb_key = key;
1606 query.tqdb_common.tdb_type = type;
1607 query.tqdb_common.tdb_instance = instance;
1608
1609 /*
1610 * Perform the database query. If no entry is found, then
1611 * return failure, else continue.
1612 */
1613 entry = (tavor_umap_db_entry_t *)avl_find(
1614 &tavor_userland_rsrc_db.tdl_umapdb_avl, &query, &where);
1615 if (entry == NULL) {
1616 TAVOR_TNF_EXIT(tavor_umap_db_find_nolock);
1617 return (DDI_FAILURE);
1618 }
1619
1620 /*
1621 * If the flags argument specifies that the entry should
1622 * be removed if found, then call avl_remove() to remove
1623 * the entry from the database.
1624 */
1625 if (flags & TAVOR_UMAP_DB_REMOVE) {
1626
1627 avl_remove(&tavor_userland_rsrc_db.tdl_umapdb_avl, entry);
1628
1629 /*
1630 * The database entry is returned with the expectation
1631 * that the caller will use tavor_umap_db_free() to
1632 * free the entry's memory. ASSERT that this is non-NULL.
1633 * NULL pointer should never be passed for the
1634 * TAVOR_UMAP_DB_REMOVE case.
1635 */
1636 ASSERT(umapdb != NULL);
1637 }
1638
1639 /*
1640 * If the caller would like visibility to the database entry
1641 * (indicated through the use of a non-NULL "umapdb" argument),
1642 * then fill it in.
1643 */
1644 if (umapdb != NULL) {
1645 *umapdb = entry;
1646 }
1647
1648 /* Extract value field from database entry and return success */
1649 *value = entry->tdbe_common.tdb_value;
1650
1651 TAVOR_TNF_EXIT(tavor_umap_db_find_nolock);
1652 return (DDI_SUCCESS);
1653 }
1654
1655
1656 /*
1657 * tavor_umap_umemlock_cb()
1658 * Context: Can be called from callback context.
1659 */
1660 void
1661 tavor_umap_umemlock_cb(ddi_umem_cookie_t *umem_cookie)
1662 {
1663 tavor_umap_db_entry_t *umapdb;
1664 tavor_state_t *state;
1665 tavor_rsrc_t *rsrcp;
1666 tavor_mrhdl_t mr;
1667 uint64_t value;
1668 uint_t instance;
1669 int status;
1670 void (*mr_callback)(void *, void *);
1671 void *mr_cbarg1, *mr_cbarg2;
1672
1673 TAVOR_TNF_ENTER(tavor_umap_umemlock_cb);
1674
1675 /*
1676 * If this was userland memory, then we need to remove its entry
1677 * from the "userland resources database". Note: We use the
1678 * TAVOR_UMAP_DB_IGNORE_INSTANCE flag here because we don't know
1679 * which instance was used when the entry was added (but we want
1680 * to know after the entry is found using the other search criteria).
1681 */
1682 status = tavor_umap_db_find(0, (uint64_t)(uintptr_t)umem_cookie,
1683 MLNX_UMAP_MRMEM_RSRC, &value, (TAVOR_UMAP_DB_REMOVE |
1684 TAVOR_UMAP_DB_IGNORE_INSTANCE), &umapdb);
1685 if (status == DDI_SUCCESS) {
1686 instance = umapdb->tdbe_common.tdb_instance;
1687 state = ddi_get_soft_state(tavor_statep, instance);
1688 if (state == NULL) {
1689 cmn_err(CE_WARN, "Unable to match Tavor instance\n");
1690 TNF_PROBE_0(tavor_umap_umemlock_cb_gss_fail,
1691 TAVOR_TNF_ERROR, "");
1692 TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1693 return;
1694 }
1695
1696 /* Free the database entry */
1697 tavor_umap_db_free(umapdb);
1698
1699 /* Use "value" to convert to an MR handle */
1700 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
1701 mr = (tavor_mrhdl_t)rsrcp->tr_addr;
1702
1703 /*
1704 * If a callback has been provided, call it first. This
1705 * callback is expected to do any cleanup necessary to
1706 * guarantee that the subsequent MR deregister (below)
1707 * will succeed. Specifically, this means freeing up memory
1708 * windows which might have been associated with the MR.
1709 */
1710 mutex_enter(&mr->mr_lock);
1711 mr_callback = mr->mr_umem_cbfunc;
1712 mr_cbarg1 = mr->mr_umem_cbarg1;
1713 mr_cbarg2 = mr->mr_umem_cbarg2;
1714 mutex_exit(&mr->mr_lock);
1715 if (mr_callback != NULL) {
1716 mr_callback(mr_cbarg1, mr_cbarg2);
1717 }
1718
1719 /*
1720 * Then call tavor_mr_deregister() to release the resources
1721 * associated with the MR handle. Note: Because this routine
1722 * will also check for whether the ddi_umem_cookie_t is in the
1723 * database, it will take responsibility for disabling the
1724 * memory region and calling ddi_umem_unlock().
1725 */
1726 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
1727 TAVOR_SLEEP);
1728 if (status != DDI_SUCCESS) {
1729 TAVOR_WARNING(state, "Unexpected failure in "
1730 "deregister from callback\n");
1731 TNF_PROBE_0(tavor_umap_umemlock_cb_dereg_fail,
1732 TAVOR_TNF_ERROR, "");
1733 TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1734 }
1735 }
1736
1737 TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1738 }
1739
1740
1741 /*
1742 * tavor_umap_db_compare()
1743 * Context: Can be called from user or kernel context.
1744 */
1745 static int
1746 tavor_umap_db_compare(const void *q, const void *e)
1747 {
1748 tavor_umap_db_common_t *entry_common, *query_common;
1749 uint_t query_flags;
1750
1751 TAVOR_TNF_ENTER(tavor_umap_db_compare);
1752
1753 entry_common = &((tavor_umap_db_entry_t *)e)->tdbe_common;
1754 query_common = &((tavor_umap_db_query_t *)q)->tqdb_common;
1755 query_flags = ((tavor_umap_db_query_t *)q)->tqdb_flags;
1756
1757 /*
1758 * The first comparison is done on the "key" value in "query"
1759 * and "entry". If they are not equal, then the appropriate
1760 * search direction is returned. Else, we continue by
1761 * comparing "type".
1762 */
1763 if (query_common->tdb_key < entry_common->tdb_key) {
1764 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1765 return (-1);
1766 } else if (query_common->tdb_key > entry_common->tdb_key) {
1767 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1768 return (+1);
1769 }
1770
1771 /*
1772 * If the search reaches this point, then "query" and "entry"
1773 * have equal key values. So we continue be comparing their
1774 * "type" values. Again, if they are not equal, then the
1775 * appropriate search direction is returned. Else, we continue
1776 * by comparing "instance".
1777 */
1778 if (query_common->tdb_type < entry_common->tdb_type) {
1779 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1780 return (-1);
1781 } else if (query_common->tdb_type > entry_common->tdb_type) {
1782 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1783 return (+1);
1784 }
1785
1786 /*
1787 * If the search reaches this point, then "query" and "entry"
1788 * have exactly the same key and type values. Now we consult
1789 * the "flags" field in the query to determine whether the
1790 * "instance" is relevant to the search. If the
1791 * TAVOR_UMAP_DB_IGNORE_INSTANCE flags is set, then return
1792 * success (0) here. Otherwise, continue the search by comparing
1793 * instance values and returning the appropriate search direction.
1794 */
1795 if (query_flags & TAVOR_UMAP_DB_IGNORE_INSTANCE) {
1796 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1797 return (0);
1798 }
1799
1800 /*
1801 * If the search has reached this point, then "query" and "entry"
1802 * can only be differentiated by their instance values. If these
1803 * are not equal, then return the appropriate search direction.
1804 * Else, we return success (0).
1805 */
1806 if (query_common->tdb_instance < entry_common->tdb_instance) {
1807 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1808 return (-1);
1809 } else if (query_common->tdb_instance > entry_common->tdb_instance) {
1810 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1811 return (+1);
1812 }
1813
1814 /* Everything matches... so return success */
1815 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1816 return (0);
1817 }
1818
1819
1820 /*
1821 * tavor_umap_db_set_onclose_cb()
1822 * Context: Can be called from user or kernel context.
1823 */
1824 int
1825 tavor_umap_db_set_onclose_cb(dev_t dev, uint64_t flag,
1826 void (*callback)(void *), void *arg)
1827 {
1828 tavor_umap_db_priv_t *priv;
1829 tavor_umap_db_entry_t *umapdb;
1830 minor_t instance;
1831 uint64_t value;
1832 int status;
1833
1834 TAVOR_TNF_ENTER(tavor_umap_db_set_onclose_cb);
1835
1836 instance = TAVOR_DEV_INSTANCE(dev);
1837 if (instance == -1) {
1838 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_inst_fail,
1839 TAVOR_TNF_ERROR, "");
1840 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1841 return (DDI_FAILURE);
1842 }
1843
1844 if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1845 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_invflag_fail,
1846 TAVOR_TNF_ERROR, "");
1847 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1848 return (DDI_FAILURE);
1849 }
1850
1851 /*
1852 * Grab the lock for the "userland resources database" and find
1853 * the entry corresponding to this minor number. Once it's found,
1854 * allocate (if necessary) and add an entry (in the "tdb_priv"
1855 * field) to indicate that further processing may be needed during
1856 * Tavor's close() handling.
1857 */
1858 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1859 status = tavor_umap_db_find_nolock(instance, dev,
1860 MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1861 if (status != DDI_SUCCESS) {
1862 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_find_fail,
1863 TAVOR_TNF_ERROR, "");
1864 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1865 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1866 return (DDI_FAILURE);
1867 }
1868
1869 priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1870 if (priv == NULL) {
1871 priv = (tavor_umap_db_priv_t *)kmem_zalloc(
1872 sizeof (tavor_umap_db_priv_t), KM_NOSLEEP);
1873 if (priv == NULL) {
1874 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_kmz_fail,
1875 TAVOR_TNF_ERROR, "");
1876 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1877 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1878 return (DDI_FAILURE);
1879 }
1880 }
1881
1882 /*
1883 * Save away the callback and argument to be used during Tavor's
1884 * close() processing.
1885 */
1886 priv->tdp_cb = callback;
1887 priv->tdp_arg = arg;
1888
1889 umapdb->tdbe_common.tdb_priv = (void *)priv;
1890 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1891
1892 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1893 return (DDI_SUCCESS);
1894 }
1895
1896
1897 /*
1898 * tavor_umap_db_clear_onclose_cb()
1899 * Context: Can be called from user or kernel context.
1900 */
1901 int
1902 tavor_umap_db_clear_onclose_cb(dev_t dev, uint64_t flag)
1903 {
1904 tavor_umap_db_priv_t *priv;
1905 tavor_umap_db_entry_t *umapdb;
1906 minor_t instance;
1907 uint64_t value;
1908 int status;
1909
1910 TAVOR_TNF_ENTER(tavor_umap_db_set_onclose_cb);
1911
1912 instance = TAVOR_DEV_INSTANCE(dev);
1913 if (instance == -1) {
1914 TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_inst_fail,
1915 TAVOR_TNF_ERROR, "");
1916 TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1917 return (DDI_FAILURE);
1918 }
1919
1920 if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1921 TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_invflag_fail,
1922 TAVOR_TNF_ERROR, "");
1923 TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1924 return (DDI_FAILURE);
1925 }
1926
1927 /*
1928 * Grab the lock for the "userland resources database" and find
1929 * the entry corresponding to this minor number. Once it's found,
1930 * remove the entry (in the "tdb_priv" field) that indicated the
1931 * need for further processing during Tavor's close(). Free the
1932 * entry, if appropriate.
1933 */
1934 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1935 status = tavor_umap_db_find_nolock(instance, dev,
1936 MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1937 if (status != DDI_SUCCESS) {
1938 TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_find_fail,
1939 TAVOR_TNF_ERROR, "");
1940 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1941 TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1942 return (DDI_FAILURE);
1943 }
1944
1945 priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1946 if (priv != NULL) {
1947 kmem_free(priv, sizeof (tavor_umap_db_priv_t));
1948 priv = NULL;
1949 }
1950
1951 umapdb->tdbe_common.tdb_priv = (void *)priv;
1952 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1953 return (DDI_SUCCESS);
1954 }
1955
1956
1957 /*
1958 * tavor_umap_db_clear_onclose_cb()
1959 * Context: Can be called from user or kernel context.
1960 */
1961 void
1962 tavor_umap_db_handle_onclose_cb(tavor_umap_db_priv_t *priv)
1963 {
1964 void (*callback)(void *);
1965
1966 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1967
1968 /*
1969 * Call the callback.
1970 * Note: Currently there is only one callback (in "tdp_cb"), but
1971 * in the future there may be more, depending on what other types
1972 * of interaction there are between userland processes and the
1973 * driver.
1974 */
1975 callback = priv->tdp_cb;
1976 callback(priv->tdp_arg);
1977 }