1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * tavor_umap.c
  29  *    Tavor Userland Mapping Routines
  30  *
  31  *    Implements all the routines necessary for enabling direct userland
  32  *    access to the Tavor hardware.  This includes all routines necessary for
  33  *    maintaining the "userland resources database" and all the support routines
  34  *    for the devmap calls.
  35  */
  36 
  37 #include <sys/types.h>
  38 #include <sys/conf.h>
  39 #include <sys/ddi.h>
  40 #include <sys/sunddi.h>
  41 #include <sys/modctl.h>
  42 #include <sys/file.h>
  43 #include <sys/avl.h>
  44 #include <sys/sysmacros.h>
  45 
  46 #include <sys/ib/adapters/tavor/tavor.h>
  47 
  48 /* Tavor HCA state pointer (extern) */
  49 extern void *tavor_statep;
  50 
  51 /* Tavor HCA Userland Resource Database (extern) */
  52 extern tavor_umap_db_t tavor_userland_rsrc_db;
  53 
  54 static int tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
  55     tavor_rsrc_t *rsrcp, size_t *maplen, int *err);
  56 static int tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
  57     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
  58 static int tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
  59     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
  60 static int tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
  61     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
  62 static int tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
  63     offset_t off, size_t len, void **pvtp);
  64 static int tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp,
  65     devmap_cookie_t new_dhp, void **new_pvtp);
  66 static void tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp,
  67     offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
  68     devmap_cookie_t new_dhp2, void **pvtp2);
  69 static int tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
  70     offset_t off, size_t len, void **pvtp);
  71 static int tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
  72     devmap_cookie_t new_dhp, void **new_pvtp);
  73 static void tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp,
  74     offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
  75     devmap_cookie_t new_dhp2, void **pvtp2);
  76 static ibt_status_t tavor_umap_mr_data_in(tavor_mrhdl_t mr,
  77     ibt_mr_data_in_t *data, size_t data_sz);
  78 static ibt_status_t tavor_umap_cq_data_out(tavor_cqhdl_t cq,
  79     mlnx_umap_cq_data_out_t *data, size_t data_sz);
  80 static ibt_status_t tavor_umap_qp_data_out(tavor_qphdl_t qp,
  81     mlnx_umap_qp_data_out_t *data, size_t data_sz);
  82 static ibt_status_t tavor_umap_srq_data_out(tavor_srqhdl_t srq,
  83     mlnx_umap_srq_data_out_t *data, size_t data_sz);
  84 static int tavor_umap_db_compare(const void *query, const void *entry);
  85 static ibt_status_t tavor_umap_pd_data_out(tavor_pdhdl_t pd,
  86     mlnx_umap_pd_data_out_t *data, size_t data_sz);
  87 
  88 
  89 /*
  90  * These callbacks are passed to devmap_umem_setup() and devmap_devmem_setup(),
  91  * respectively.  They are used to handle (among other things) partial
  92  * unmappings and to provide a method for invalidating mappings inherited
  93  * as a result of a fork(2) system call.
  94  */
  95 static struct devmap_callback_ctl tavor_devmap_umem_cbops = {
  96         DEVMAP_OPS_REV,
  97         tavor_devmap_umem_map,
  98         NULL,
  99         tavor_devmap_umem_dup,
 100         tavor_devmap_umem_unmap
 101 };
 102 static struct devmap_callback_ctl tavor_devmap_devmem_cbops = {
 103         DEVMAP_OPS_REV,
 104         tavor_devmap_devmem_map,
 105         NULL,
 106         tavor_devmap_devmem_dup,
 107         tavor_devmap_devmem_unmap
 108 };
 109 
 110 /*
 111  * tavor_devmap()
 112  *    Context: Can be called from user context.
 113  */
 114 /* ARGSUSED */
 115 int
 116 tavor_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
 117     size_t *maplen, uint_t model)
 118 {
 119         tavor_state_t   *state;
 120         tavor_rsrc_t    *rsrcp;
 121         minor_t         instance;
 122         uint64_t        key, value;
 123         uint_t          type;
 124         int             err, status;
 125 
 126         TAVOR_TNF_ENTER(tavor_devmap);
 127 
 128         /* Get Tavor softstate structure from instance */
 129         instance = TAVOR_DEV_INSTANCE(dev);
 130         state = ddi_get_soft_state(tavor_statep, instance);
 131         if (state == NULL) {
 132                 TNF_PROBE_0(tavor_devmap_gss_fail, TAVOR_TNF_ERROR, "");
 133                 TAVOR_TNF_EXIT(tavor_devmap);
 134                 return (ENXIO);
 135         }
 136 
 137         /*
 138          * Access to Tavor devmap interface is not allowed in
 139          * "maintenance mode".
 140          */
 141         if (state->ts_operational_mode == TAVOR_MAINTENANCE_MODE) {
 142                 TNF_PROBE_0(tavor_devmap_maintenance_mode_fail,
 143                     TAVOR_TNF_ERROR, "");
 144                 TAVOR_TNF_EXIT(tavor_devmap);
 145                 return (EFAULT);
 146         }
 147 
 148         /*
 149          * The bottom bits of "offset" are undefined (number depends on
 150          * system PAGESIZE).  Shifting these off leaves us with a "key".
 151          * The "key" is actually a combination of both a real key value
 152          * (for the purpose of database lookup) and a "type" value.  We
 153          * extract this information before doing the database lookup.
 154          */
 155         key  = off >> PAGESHIFT;
 156         type = key & MLNX_UMAP_RSRC_TYPE_MASK;
 157         key  = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
 158         status = tavor_umap_db_find(instance, key, type, &value, 0, NULL);
 159         if (status == DDI_SUCCESS) {
 160                 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
 161 
 162                 switch (type) {
 163                 case MLNX_UMAP_UARPG_RSRC:
 164                         /*
 165                          * Double check that process who open()'d Tavor is
 166                          * same process attempting to mmap() UAR page.
 167                          */
 168                         if (key != ddi_get_pid()) {
 169                                 TNF_PROBE_0(tavor_devmap_uarpg_invpid_fail,
 170                                     TAVOR_TNF_ERROR, "");
 171                                 TAVOR_TNF_EXIT(tavor_devmap);
 172                                 return (EINVAL);
 173                         }
 174 
 175                         /* Map the UAR page out for userland access */
 176                         status = tavor_umap_uarpg(state, dhp, rsrcp, maplen,
 177                             &err);
 178                         if (status != DDI_SUCCESS) {
 179                                 TNF_PROBE_0(tavor_devmap_uarpg_map_fail,
 180                                     TAVOR_TNF_ERROR, "");
 181                                 TAVOR_TNF_EXIT(tavor_devmap);
 182                                 return (err);
 183                         }
 184                         break;
 185 
 186                 case MLNX_UMAP_CQMEM_RSRC:
 187                         /* Map the CQ memory out for userland access */
 188                         status = tavor_umap_cqmem(state, dhp, rsrcp, off,
 189                             maplen, &err);
 190                         if (status != DDI_SUCCESS) {
 191                                 TNF_PROBE_0(tavor_devmap_cqmem_map_fail,
 192                                     TAVOR_TNF_ERROR, "");
 193                                 TAVOR_TNF_EXIT(tavor_devmap);
 194                                 return (err);
 195                         }
 196                         break;
 197 
 198                 case MLNX_UMAP_QPMEM_RSRC:
 199                         /* Map the QP memory out for userland access */
 200                         status = tavor_umap_qpmem(state, dhp, rsrcp, off,
 201                             maplen, &err);
 202                         if (status != DDI_SUCCESS) {
 203                                 TNF_PROBE_0(tavor_devmap_qpmem_map_fail,
 204                                     TAVOR_TNF_ERROR, "");
 205                                 TAVOR_TNF_EXIT(tavor_devmap);
 206                                 return (err);
 207                         }
 208                         break;
 209 
 210                 case MLNX_UMAP_SRQMEM_RSRC:
 211                         /* Map the SRQ memory out for userland access */
 212                         status = tavor_umap_srqmem(state, dhp, rsrcp, off,
 213                             maplen, &err);
 214                         if (status != DDI_SUCCESS) {
 215                                 TNF_PROBE_0(tavor_devmap_srqmem_map_fail,
 216                                     TAVOR_TNF_ERROR, "");
 217                                 TAVOR_TNF_EXIT(tavor_devmap);
 218                                 return (err);
 219                         }
 220                         break;
 221 
 222                 default:
 223                         TAVOR_WARNING(state, "unexpected rsrc type in devmap");
 224                         TNF_PROBE_0(tavor_devmap_invrsrc_fail,
 225                             TAVOR_TNF_ERROR, "");
 226                         TAVOR_TNF_EXIT(tavor_devmap);
 227                         return (EINVAL);
 228                 }
 229         } else {
 230                 TNF_PROBE_0(tavor_devmap_umap_lookup_fail, TAVOR_TNF_ERROR, "");
 231                 TAVOR_TNF_EXIT(tavor_devmap);
 232                 return (EINVAL);
 233         }
 234 
 235         TAVOR_TNF_EXIT(tavor_devmap);
 236         return (0);
 237 }
 238 
 239 
 240 /*
 241  * tavor_umap_uarpg()
 242  *    Context: Can be called from user context.
 243  */
 244 static int
 245 tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
 246     tavor_rsrc_t *rsrcp, size_t *maplen, int *err)
 247 {
 248         int             status;
 249         uint_t          maxprot;
 250 
 251         TAVOR_TNF_ENTER(tavor_umap_uarpg);
 252 
 253         /* Map out the UAR page (doorbell page) */
 254         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 255         status = devmap_devmem_setup(dhp, state->ts_dip,
 256             &tavor_devmap_devmem_cbops, TAVOR_UAR_BAR, (rsrcp->tr_indx <<
 257             PAGESHIFT), PAGESIZE, maxprot, DEVMAP_ALLOW_REMAP,
 258             &state->ts_reg_accattr);
 259         if (status < 0) {
 260                 *err = status;
 261                 TNF_PROBE_0(tavor_umap_uarpg_devmap_fail, TAVOR_TNF_ERROR, "");
 262                 TAVOR_TNF_EXIT(tavor_umap_uarpg);
 263                 return (DDI_FAILURE);
 264         }
 265 
 266         *maplen = PAGESIZE;
 267         TAVOR_TNF_EXIT(tavor_umap_uarpg);
 268         return (DDI_SUCCESS);
 269 }
 270 
 271 
 272 /*
 273  * tavor_umap_cqmem()
 274  *    Context: Can be called from user context.
 275  */
 276 /* ARGSUSED */
 277 static int
 278 tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
 279     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
 280 {
 281         tavor_cqhdl_t   cq;
 282         size_t          size;
 283         uint_t          maxprot;
 284         int             status;
 285 
 286         TAVOR_TNF_ENTER(tavor_umap_cqmem);
 287 
 288         /* Extract the Tavor CQ handle pointer from the tavor_rsrc_t */
 289         cq = (tavor_cqhdl_t)rsrcp->tr_addr;
 290 
 291         /* Round-up the CQ size to system page size */
 292         size = ptob(btopr(cq->cq_cqinfo.qa_size));
 293 
 294         /* Map out the CQ memory */
 295         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 296         status = devmap_umem_setup(dhp, state->ts_dip,
 297             &tavor_devmap_umem_cbops, cq->cq_cqinfo.qa_umemcookie, 0, size,
 298             maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
 299         if (status < 0) {
 300                 *err = status;
 301                 TNF_PROBE_0(tavor_umap_cqmem_devmap_fail, TAVOR_TNF_ERROR, "");
 302                 TAVOR_TNF_EXIT(tavor_umap_cqmem);
 303                 return (DDI_FAILURE);
 304         }
 305         *maplen = size;
 306 
 307         TAVOR_TNF_EXIT(tavor_umap_cqmem);
 308         return (DDI_SUCCESS);
 309 }
 310 
 311 
 312 /*
 313  * tavor_umap_qpmem()
 314  *    Context: Can be called from user context.
 315  */
 316 /* ARGSUSED */
 317 static int
 318 tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
 319     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
 320 {
 321         tavor_qphdl_t   qp;
 322         offset_t        offset;
 323         size_t          size;
 324         uint_t          maxprot;
 325         int             status;
 326 
 327         TAVOR_TNF_ENTER(tavor_umap_qpmem);
 328 
 329         /* Extract the Tavor QP handle pointer from the tavor_rsrc_t */
 330         qp = (tavor_qphdl_t)rsrcp->tr_addr;
 331 
 332         /*
 333          * Calculate the offset of the first work queue (send or recv) into
 334          * the memory (ddi_umem_alloc()) allocated previously for the QP.
 335          */
 336         offset = (offset_t)((uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
 337             (uintptr_t)qp->qp_wqinfo.qa_buf_real);
 338 
 339         /* Round-up the QP work queue sizes to system page size */
 340         size = ptob(btopr(qp->qp_wqinfo.qa_size));
 341 
 342         /* Map out the QP memory */
 343         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 344         status = devmap_umem_setup(dhp, state->ts_dip,
 345             &tavor_devmap_umem_cbops, qp->qp_wqinfo.qa_umemcookie, offset,
 346             size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
 347         if (status < 0) {
 348                 *err = status;
 349                 TNF_PROBE_0(tavor_umap_qpmem_devmap_fail, TAVOR_TNF_ERROR, "");
 350                 TAVOR_TNF_EXIT(tavor_umap_qpmem);
 351                 return (DDI_FAILURE);
 352         }
 353         *maplen = size;
 354 
 355         TAVOR_TNF_EXIT(tavor_umap_qpmem);
 356         return (DDI_SUCCESS);
 357 }
 358 
 359 
 360 /*
 361  * tavor_umap_srqmem()
 362  *    Context: Can be called from user context.
 363  */
 364 /* ARGSUSED */
 365 static int
 366 tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
 367     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
 368 {
 369         tavor_srqhdl_t  srq;
 370         offset_t        offset;
 371         size_t          size;
 372         uint_t          maxprot;
 373         int             status;
 374 
 375         TAVOR_TNF_ENTER(tavor_umap_srqmem);
 376 
 377         /* Extract the Tavor SRQ handle pointer from the tavor_rsrc_t */
 378         srq = (tavor_srqhdl_t)rsrcp->tr_addr;
 379 
 380         /*
 381          * Calculate the offset of the first shared recv queue into the memory
 382          * (ddi_umem_alloc()) allocated previously for the SRQ.
 383          */
 384         offset = (offset_t)((uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
 385             (uintptr_t)srq->srq_wqinfo.qa_buf_real);
 386 
 387         /* Round-up the SRQ work queue sizes to system page size */
 388         size = ptob(btopr(srq->srq_wqinfo.qa_size));
 389 
 390         /* Map out the QP memory */
 391         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 392         status = devmap_umem_setup(dhp, state->ts_dip,
 393             &tavor_devmap_umem_cbops, srq->srq_wqinfo.qa_umemcookie, offset,
 394             size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
 395         if (status < 0) {
 396                 *err = status;
 397                 TNF_PROBE_0(tavor_umap_srqmem_devmap_fail, TAVOR_TNF_ERROR, "");
 398                 TAVOR_TNF_EXIT(tavor_umap_srqmem);
 399                 return (DDI_FAILURE);
 400         }
 401         *maplen = size;
 402 
 403         TAVOR_TNF_EXIT(tavor_umap_srqmem);
 404         return (DDI_SUCCESS);
 405 }
 406 
 407 
 408 /*
 409  * tavor_devmap_umem_map()
 410  *    Context: Can be called from kernel context.
 411  */
 412 /* ARGSUSED */
 413 static int
 414 tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
 415     offset_t off, size_t len, void **pvtp)
 416 {
 417         tavor_state_t           *state;
 418         tavor_devmap_track_t    *dvm_track;
 419         tavor_cqhdl_t           cq;
 420         tavor_qphdl_t           qp;
 421         tavor_srqhdl_t          srq;
 422         minor_t                 instance;
 423         uint64_t                key;
 424         uint_t                  type;
 425 
 426         TAVOR_TNF_ENTER(tavor_devmap_umem_map);
 427 
 428         /* Get Tavor softstate structure from instance */
 429         instance = TAVOR_DEV_INSTANCE(dev);
 430         state = ddi_get_soft_state(tavor_statep, instance);
 431         if (state == NULL) {
 432                 TNF_PROBE_0(tavor_devmap_umem_map_gss_fail, TAVOR_TNF_ERROR,
 433                     "");
 434                 TAVOR_TNF_EXIT(tavor_devmap_umem_map);
 435                 return (ENXIO);
 436         }
 437 
 438         /*
 439          * The bottom bits of "offset" are undefined (number depends on
 440          * system PAGESIZE).  Shifting these off leaves us with a "key".
 441          * The "key" is actually a combination of both a real key value
 442          * (for the purpose of database lookup) and a "type" value.  Although
 443          * we are not going to do any database lookup per se, we do want
 444          * to extract the "key" and the "type" (to enable faster lookup of
 445          * the appropriate CQ or QP handle).
 446          */
 447         key  = off >> PAGESHIFT;
 448         type = key & MLNX_UMAP_RSRC_TYPE_MASK;
 449         key  = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
 450 
 451         /*
 452          * Allocate an entry to track the mapping and unmapping (specifically,
 453          * partial unmapping) of this resource.
 454          */
 455         dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
 456             sizeof (tavor_devmap_track_t), KM_SLEEP);
 457         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
 458         dvm_track->tdt_offset = off;
 459         dvm_track->tdt_state  = state;
 460         dvm_track->tdt_refcnt = 1;
 461         mutex_init(&dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
 462             DDI_INTR_PRI(state->ts_intrmsi_pri));
 463 
 464         /*
 465          * Depending of the type of resource that has been mapped out, we
 466          * need to update the QP or CQ handle to reflect that it has, in
 467          * fact, been mapped.  This allows the driver code which frees a QP
 468          * or a CQ to know whether it is appropriate to do a
 469          * devmap_devmem_remap() to invalidate the userland mapping for the
 470          * corresponding queue's memory.
 471          */
 472         if (type == MLNX_UMAP_CQMEM_RSRC) {
 473 
 474                 /* Use "key" (CQ number) to do fast lookup of CQ handle */
 475                 cq = tavor_cqhdl_from_cqnum(state, key);
 476 
 477                 /*
 478                  * Update the handle to the userland mapping.  Note:  If
 479                  * the CQ already has a valid userland mapping, then stop
 480                  * and return failure.
 481                  */
 482                 mutex_enter(&cq->cq_lock);
 483                 if (cq->cq_umap_dhp == NULL) {
 484                         cq->cq_umap_dhp = dhp;
 485                         dvm_track->tdt_size = cq->cq_cqinfo.qa_size;
 486                         mutex_exit(&cq->cq_lock);
 487                 } else {
 488                         mutex_exit(&cq->cq_lock);
 489                         goto umem_map_fail;
 490                 }
 491 
 492         } else if (type == MLNX_UMAP_QPMEM_RSRC) {
 493 
 494                 /* Use "key" (QP number) to do fast lookup of QP handle */
 495                 qp = tavor_qphdl_from_qpnum(state, key);
 496 
 497                 /*
 498                  * Update the handle to the userland mapping.  Note:  If
 499                  * the CQ already has a valid userland mapping, then stop
 500                  * and return failure.
 501                  */
 502                 mutex_enter(&qp->qp_lock);
 503                 if (qp->qp_umap_dhp == NULL) {
 504                         qp->qp_umap_dhp = dhp;
 505                         dvm_track->tdt_size = qp->qp_wqinfo.qa_size;
 506                         mutex_exit(&qp->qp_lock);
 507                 } else {
 508                         mutex_exit(&qp->qp_lock);
 509                         goto umem_map_fail;
 510                 }
 511 
 512         } else if (type == MLNX_UMAP_SRQMEM_RSRC) {
 513 
 514                 /* Use "key" (SRQ number) to do fast lookup on SRQ handle */
 515                 srq = tavor_srqhdl_from_srqnum(state, key);
 516 
 517                 /*
 518                  * Update the handle to the userland mapping.  Note:  If the
 519                  * SRQ already has a valid userland mapping, then stop and
 520                  * return failure.
 521                  */
 522                 mutex_enter(&srq->srq_lock);
 523                 if (srq->srq_umap_dhp == NULL) {
 524                         srq->srq_umap_dhp = dhp;
 525                         dvm_track->tdt_size = srq->srq_wqinfo.qa_size;
 526                         mutex_exit(&srq->srq_lock);
 527                 } else {
 528                         mutex_exit(&srq->srq_lock);
 529                         goto umem_map_fail;
 530                 }
 531         }
 532 
 533         /*
 534          * Pass the private "Tavor devmap tracking structure" back.  This
 535          * pointer will be returned in subsequent "unmap" callbacks.
 536          */
 537         *pvtp = dvm_track;
 538 
 539         TAVOR_TNF_EXIT(tavor_devmap_umem_map);
 540         return (DDI_SUCCESS);
 541 
 542 umem_map_fail:
 543         mutex_destroy(&dvm_track->tdt_lock);
 544         kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
 545         TAVOR_TNF_EXIT(tavor_devmap_umem_map);
 546         return (DDI_FAILURE);
 547 }
 548 
 549 
 550 /*
 551  * tavor_devmap_umem_dup()
 552  *    Context: Can be called from kernel context.
 553  */
 554 /* ARGSUSED */
 555 static int
 556 tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp, devmap_cookie_t new_dhp,
 557     void **new_pvtp)
 558 {
 559         tavor_state_t           *state;
 560         tavor_devmap_track_t    *dvm_track, *new_dvm_track;
 561         uint_t                  maxprot;
 562         int                     status;
 563 
 564         TAVOR_TNF_ENTER(tavor_devmap_umem_dup);
 565 
 566         /*
 567          * Extract the Tavor softstate pointer from "Tavor devmap tracking
 568          * structure" (in "pvtp").
 569          */
 570         dvm_track = (tavor_devmap_track_t *)pvtp;
 571         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
 572         state = dvm_track->tdt_state;
 573 
 574         /*
 575          * Since this devmap_dup() entry point is generally called
 576          * when a process does fork(2), it is incumbent upon the driver
 577          * to insure that the child does not inherit a valid copy of
 578          * the parent's QP or CQ resource.  This is accomplished by using
 579          * devmap_devmem_remap() to invalidate the child's mapping to the
 580          * kernel memory.
 581          */
 582         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 583         status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
 584             dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
 585         if (status != DDI_SUCCESS) {
 586                 TAVOR_WARNING(state, "failed in tavor_devmap_umem_dup()");
 587                 TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
 588                 return (status);
 589         }
 590 
 591         /*
 592          * Allocate a new entry to track the subsequent unmapping
 593          * (specifically, all partial unmappings) of the child's newly
 594          * invalidated resource.  Note: Setting the "tdt_size" field to
 595          * zero here is an indication to the devmap_unmap() entry point
 596          * that this mapping is invalid, and that its subsequent unmapping
 597          * should not affect any of the parent's CQ or QP resources.
 598          */
 599         new_dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
 600             sizeof (tavor_devmap_track_t), KM_SLEEP);
 601         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*new_dvm_track))
 602         new_dvm_track->tdt_offset = 0;
 603         new_dvm_track->tdt_state  = state;
 604         new_dvm_track->tdt_refcnt = 1;
 605         new_dvm_track->tdt_size        = 0;
 606         mutex_init(&new_dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
 607             DDI_INTR_PRI(state->ts_intrmsi_pri));
 608         *new_pvtp = new_dvm_track;
 609 
 610         TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
 611         return (DDI_SUCCESS);
 612 }
 613 
 614 
 615 /*
 616  * tavor_devmap_umem_unmap()
 617  *    Context: Can be called from kernel context.
 618  */
 619 /* ARGSUSED */
 620 static void
 621 tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
 622     size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
 623     devmap_cookie_t new_dhp2, void **pvtp2)
 624 {
 625         tavor_state_t           *state;
 626         tavor_rsrc_t            *rsrcp;
 627         tavor_devmap_track_t    *dvm_track;
 628         tavor_cqhdl_t           cq;
 629         tavor_qphdl_t           qp;
 630         tavor_srqhdl_t          srq;
 631         uint64_t                key, value;
 632         uint_t                  type;
 633         uint_t                  size;
 634         int                     status;
 635 
 636         TAVOR_TNF_ENTER(tavor_devmap_umem_unmap);
 637 
 638         /*
 639          * Extract the Tavor softstate pointer from "Tavor devmap tracking
 640          * structure" (in "pvtp").
 641          */
 642         dvm_track = (tavor_devmap_track_t *)pvtp;
 643         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
 644         state     = dvm_track->tdt_state;
 645 
 646         /*
 647          * Extract the "offset" from the "Tavor devmap tracking structure".
 648          * Note: The input argument "off" is ignored here because the
 649          * Tavor mapping interfaces define a very specific meaning to
 650          * each "logical offset".  Also extract the "key" and "type" encoded
 651          * in the logical offset.
 652          */
 653         key  = dvm_track->tdt_offset >> PAGESHIFT;
 654         type = key & MLNX_UMAP_RSRC_TYPE_MASK;
 655         key  = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
 656 
 657         /*
 658          * Extract the "size" of the mapping.  If this size is determined
 659          * to be zero, then it is an indication of a previously invalidated
 660          * mapping, and no CQ or QP resources should be affected.
 661          */
 662         size = dvm_track->tdt_size;
 663 
 664         /*
 665          * If only the "middle portion of a given mapping is being unmapped,
 666          * then we are effectively creating one new piece of mapped memory.
 667          * (Original region is divided into three pieces of which the middle
 668          * piece is being removed.  This leaves two pieces.  Since we started
 669          * with one piece and now have two pieces, we need to increment the
 670          * counter in the "Tavor devmap tracking structure".
 671          *
 672          * If, however, the whole mapped region is being unmapped, then we
 673          * have started with one region which we are completely removing.
 674          * In this case, we need to decrement the counter in the "Tavor
 675          * devmap tracking structure".
 676          *
 677          * In each of the remaining cases, we will have started with one
 678          * mapped region and ended with one (different) region.  So no counter
 679          * modification is necessary.
 680          */
 681         mutex_enter(&dvm_track->tdt_lock);
 682         if ((new_dhp1 == NULL) && (new_dhp2 == NULL)) {
 683                 dvm_track->tdt_refcnt--;
 684         } else if ((new_dhp1 != NULL) && (new_dhp2 != NULL)) {
 685                 dvm_track->tdt_refcnt++;
 686         }
 687         mutex_exit(&dvm_track->tdt_lock);
 688 
 689         /*
 690          * For each of the cases where the region is being divided, then we
 691          * need to pass back the "Tavor devmap tracking structure".  This way
 692          * we get it back when each of the remaining pieces is subsequently
 693          * unmapped.
 694          */
 695         if (new_dhp1 != NULL) {
 696                 *pvtp1 = pvtp;
 697         }
 698         if (new_dhp2 != NULL) {
 699                 *pvtp2 = pvtp;
 700         }
 701 
 702         /*
 703          * If the "Tavor devmap tracking structure" is no longer being
 704          * referenced, then free it up.  Otherwise, return.
 705          */
 706         if (dvm_track->tdt_refcnt == 0) {
 707                 mutex_destroy(&dvm_track->tdt_lock);
 708                 kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
 709 
 710                 /*
 711                  * If the mapping was invalid (see explanation above), then
 712                  * no further processing is necessary.
 713                  */
 714                 if (size == 0) {
 715                         TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
 716                         return;
 717                 }
 718         } else {
 719                 TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
 720                 return;
 721         }
 722 
 723         /*
 724          * Now that we can guarantee that the user memory is fully unmapped,
 725          * we can use the "key" and "type" values to try to find the entry
 726          * in the "userland resources database".  If it's found, then it
 727          * indicates that the queue memory (CQ or QP) has not yet been freed.
 728          * In this case, we update the corresponding CQ or QP handle to
 729          * indicate that the "devmap_devmem_remap()" call will be unnecessary.
 730          * If it's _not_ found, then it indicates that the CQ or QP memory
 731          * was, in fact, freed before it was unmapped (thus requiring a
 732          * previous invalidation by remapping - which will already have
 733          * been done in the free routine).
 734          */
 735         status = tavor_umap_db_find(state->ts_instance, key, type, &value,
 736             0, NULL);
 737         if (status == DDI_SUCCESS) {
 738                 /*
 739                  * Depending on the type of the mapped resource (CQ or QP),
 740                  * update handle to indicate that no invalidation remapping
 741                  * will be necessary.
 742                  */
 743                 if (type == MLNX_UMAP_CQMEM_RSRC) {
 744 
 745                         /* Use "value" to convert to CQ handle */
 746                         rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
 747                         cq = (tavor_cqhdl_t)rsrcp->tr_addr;
 748 
 749                         /*
 750                          * Invalidate the handle to the userland mapping.
 751                          * Note: We must ensure that the mapping being
 752                          * unmapped here is the current one for the CQ.  It
 753                          * is possible that it might not be if this CQ has
 754                          * been resized and the previous CQ memory has not
 755                          * yet been unmapped.  But in that case, because of
 756                          * the devmap_devmem_remap(), there is no longer any
 757                          * association between the mapping and the real CQ
 758                          * kernel memory.
 759                          */
 760                         mutex_enter(&cq->cq_lock);
 761                         if (cq->cq_umap_dhp == dhp) {
 762                                 cq->cq_umap_dhp = (devmap_cookie_t)NULL;
 763                         }
 764                         mutex_exit(&cq->cq_lock);
 765 
 766                 } else if (type == MLNX_UMAP_QPMEM_RSRC) {
 767 
 768                         /* Use "value" to convert to QP handle */
 769                         rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
 770                         qp = (tavor_qphdl_t)rsrcp->tr_addr;
 771 
 772                         /*
 773                          * Invalidate the handle to the userland mapping.
 774                          * Note: we ensure that the mapping being unmapped
 775                          * here is the current one for the QP.  This is
 776                          * more of a sanity check here since, unlike CQs
 777                          * (above) we do not support resize of QPs.
 778                          */
 779                         mutex_enter(&qp->qp_lock);
 780                         if (qp->qp_umap_dhp == dhp) {
 781                                 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
 782                         }
 783                         mutex_exit(&qp->qp_lock);
 784 
 785                 } else if (type == MLNX_UMAP_SRQMEM_RSRC) {
 786 
 787                         /* Use "value" to convert to SRQ handle */
 788                         rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
 789                         srq = (tavor_srqhdl_t)rsrcp->tr_addr;
 790 
 791                         /*
 792                          * Invalidate the handle to the userland mapping.
 793                          * Note: we ensure that the mapping being unmapped
 794                          * here is the current one for the QP.  This is
 795                          * more of a sanity check here since, unlike CQs
 796                          * (above) we do not support resize of QPs.
 797                          */
 798                         mutex_enter(&srq->srq_lock);
 799                         if (srq->srq_umap_dhp == dhp) {
 800                                 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
 801                         }
 802                         mutex_exit(&srq->srq_lock);
 803                 }
 804         }
 805 
 806         TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
 807 }
 808 
 809 
 810 /*
 811  * tavor_devmap_devmem_map()
 812  *    Context: Can be called from kernel context.
 813  */
 814 /* ARGSUSED */
 815 static int
 816 tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
 817     offset_t off, size_t len, void **pvtp)
 818 {
 819         tavor_state_t           *state;
 820         tavor_devmap_track_t    *dvm_track;
 821         minor_t                 instance;
 822 
 823         TAVOR_TNF_ENTER(tavor_devmap_devmem_map);
 824 
 825         /* Get Tavor softstate structure from instance */
 826         instance = TAVOR_DEV_INSTANCE(dev);
 827         state = ddi_get_soft_state(tavor_statep, instance);
 828         if (state == NULL) {
 829                 TNF_PROBE_0(tavor_devmap_devmem_map_gss_fail, TAVOR_TNF_ERROR,
 830                     "");
 831                 TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
 832                 return (ENXIO);
 833         }
 834 
 835         /*
 836          * Allocate an entry to track the mapping and unmapping of this
 837          * resource.  Note:  We don't need to initialize the "refcnt" or
 838          * "offset" fields here, nor do we need to initialize the mutex
 839          * used with the "refcnt".  Since UAR pages are single pages, they
 840          * are not subject to "partial" unmappings.  This makes these other
 841          * fields unnecessary.
 842          */
 843         dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
 844             sizeof (tavor_devmap_track_t), KM_SLEEP);
 845         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
 846         dvm_track->tdt_state  = state;
 847         dvm_track->tdt_size   = PAGESIZE;
 848 
 849         /*
 850          * Pass the private "Tavor devmap tracking structure" back.  This
 851          * pointer will be returned in a subsequent "unmap" callback.
 852          */
 853         *pvtp = dvm_track;
 854 
 855         TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
 856         return (DDI_SUCCESS);
 857 }
 858 
 859 
 860 /*
 861  * tavor_devmap_devmem_dup()
 862  *    Context: Can be called from kernel context.
 863  */
 864 /* ARGSUSED */
 865 static int
 866 tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
 867     devmap_cookie_t new_dhp, void **new_pvtp)
 868 {
 869         tavor_state_t           *state;
 870         tavor_devmap_track_t    *dvm_track;
 871         uint_t                  maxprot;
 872         int                     status;
 873 
 874         TAVOR_TNF_ENTER(tavor_devmap_devmem_dup);
 875 
 876         /*
 877          * Extract the Tavor softstate pointer from "Tavor devmap tracking
 878          * structure" (in "pvtp").  Note: If the tracking structure is NULL
 879          * here, it means that the mapping corresponds to an invalid mapping.
 880          * In this case, it can be safely ignored ("new_pvtp" set to NULL).
 881          */
 882         dvm_track = (tavor_devmap_track_t *)pvtp;
 883         if (dvm_track == NULL) {
 884                 *new_pvtp = NULL;
 885                 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
 886                 return (DDI_SUCCESS);
 887         }
 888 
 889         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
 890         state = dvm_track->tdt_state;
 891 
 892         /*
 893          * Since this devmap_dup() entry point is generally called
 894          * when a process does fork(2), it is incumbent upon the driver
 895          * to insure that the child does not inherit a valid copy of
 896          * the parent's resource.  This is accomplished by using
 897          * devmap_devmem_remap() to invalidate the child's mapping to the
 898          * kernel memory.
 899          */
 900         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 901         status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
 902             dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
 903         if (status != DDI_SUCCESS) {
 904                 TAVOR_WARNING(state, "failed in tavor_devmap_devmem_dup()");
 905                 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
 906                 return (status);
 907         }
 908 
 909         /*
 910          * Since the region is invalid, there is no need for us to
 911          * allocate and continue to track an additional "Tavor devmap
 912          * tracking structure".  Instead we return NULL here, which is an
 913          * indication to the devmap_unmap() entry point that this entry
 914          * can be safely ignored.
 915          */
 916         *new_pvtp = NULL;
 917 
 918         TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
 919         return (DDI_SUCCESS);
 920 }
 921 
 922 
 923 /*
 924  * tavor_devmap_devmem_unmap()
 925  *    Context: Can be called from kernel context.
 926  */
 927 /* ARGSUSED */
 928 static void
 929 tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
 930     size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
 931     devmap_cookie_t new_dhp2, void **pvtp2)
 932 {
 933         tavor_devmap_track_t    *dvm_track;
 934 
 935         TAVOR_TNF_ENTER(tavor_devmap_devmem_unmap);
 936 
 937         /*
 938          * Free up the "Tavor devmap tracking structure" (in "pvtp").
 939          * There cannot be "partial" unmappings here because all UAR pages
 940          * are single pages.  Note: If the tracking structure is NULL here,
 941          * it means that the mapping corresponds to an invalid mapping.  In
 942          * this case, it can be safely ignored.
 943          */
 944         dvm_track = (tavor_devmap_track_t *)pvtp;
 945         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
 946         if (dvm_track == NULL) {
 947                 TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
 948                 return;
 949         }
 950 
 951         kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
 952         TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
 953 }
 954 
 955 
 956 /*
 957  * tavor_umap_ci_data_in()
 958  *    Context: Can be called from user or kernel context.
 959  */
 960 /* ARGSUSED */
 961 ibt_status_t
 962 tavor_umap_ci_data_in(tavor_state_t *state, ibt_ci_data_flags_t flags,
 963     ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
 964 {
 965         int     status;
 966 
 967         TAVOR_TNF_ENTER(tavor_umap_ci_data_in);
 968 
 969         /*
 970          * Depending on the type of object about which additional information
 971          * is being provided (currently only MR is supported), we call the
 972          * appropriate resource-specific function.
 973          */
 974         switch (object) {
 975         case IBT_HDL_MR:
 976                 status = tavor_umap_mr_data_in((tavor_mrhdl_t)hdl,
 977                     (ibt_mr_data_in_t *)data_p, data_sz);
 978                 if (status != DDI_SUCCESS) {
 979                         TNF_PROBE_0(tavor_umap_mr_data_in_fail,
 980                             TAVOR_TNF_ERROR, "");
 981                         TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
 982                         return (status);
 983                 }
 984                 break;
 985 
 986         /*
 987          * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
 988          * since the Tavor driver does not support these.
 989          */
 990         case IBT_HDL_HCA:
 991         case IBT_HDL_QP:
 992         case IBT_HDL_CQ:
 993         case IBT_HDL_PD:
 994         case IBT_HDL_MW:
 995         case IBT_HDL_AH:
 996         case IBT_HDL_SCHED:
 997         case IBT_HDL_EEC:
 998         case IBT_HDL_RDD:
 999         case IBT_HDL_SRQ:
1000                 TNF_PROBE_0(tavor_umap_ci_data_in_unsupp_type,
1001                     TAVOR_TNF_ERROR, "");
1002                 TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1003                 return (IBT_NOT_SUPPORTED);
1004 
1005         /*
1006          * Any other types are invalid.
1007          */
1008         default:
1009                 TNF_PROBE_0(tavor_umap_ci_data_in_invtype_fail,
1010                     TAVOR_TNF_ERROR, "");
1011                 TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1012                 return (IBT_INVALID_PARAM);
1013         }
1014 
1015         TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1016         return (DDI_SUCCESS);
1017 }
1018 
1019 
1020 /*
1021  * tavor_umap_mr_data_in()
1022  *    Context: Can be called from user or kernel context.
1023  */
1024 static ibt_status_t
1025 tavor_umap_mr_data_in(tavor_mrhdl_t mr, ibt_mr_data_in_t *data,
1026     size_t data_sz)
1027 {
1028         TAVOR_TNF_ENTER(tavor_umap_mr_data_in);
1029 
1030         if (data->mr_rev != IBT_MR_DATA_IN_IF_VERSION) {
1031                 TNF_PROBE_0(tavor_umap_mr_data_in_ver_fail,
1032                     TAVOR_TNF_ERROR, "");
1033                 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1034                 return (IBT_NOT_SUPPORTED);
1035         }
1036 
1037         /* Check for valid MR handle pointer */
1038         if (mr == NULL) {
1039                 TNF_PROBE_0(tavor_umap_mr_data_in_invmrhdl_fail,
1040                     TAVOR_TNF_ERROR, "");
1041                 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1042                 return (IBT_MR_HDL_INVALID);
1043         }
1044 
1045         /* Check for valid MR input structure size */
1046         if (data_sz < sizeof (ibt_mr_data_in_t)) {
1047                 TNF_PROBE_0(tavor_umap_mr_data_in_invdatasz_fail,
1048                     TAVOR_TNF_ERROR, "");
1049                 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1050                 return (IBT_INSUFF_RESOURCE);
1051         }
1052         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1053 
1054         /*
1055          * Ensure that the MR corresponds to userland memory and that it is
1056          * a currently valid memory region as well.
1057          */
1058         mutex_enter(&mr->mr_lock);
1059         if ((mr->mr_is_umem == 0) || (mr->mr_umemcookie == NULL)) {
1060                 mutex_exit(&mr->mr_lock);
1061                 TNF_PROBE_0(tavor_umap_mr_data_in_invumem_fail,
1062                     TAVOR_TNF_ERROR, "");
1063                 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1064                 return (IBT_MR_HDL_INVALID);
1065         }
1066 
1067         /*
1068          * If it has passed all the above checks, then extract the callback
1069          * function and argument from the input structure.  Copy them into
1070          * the MR handle.  This function will be called only if the memory
1071          * corresponding to the MR handle gets a umem_lockmemory() callback.
1072          */
1073         mr->mr_umem_cbfunc = data->mr_func;
1074         mr->mr_umem_cbarg1 = data->mr_arg1;
1075         mr->mr_umem_cbarg2 = data->mr_arg2;
1076         mutex_exit(&mr->mr_lock);
1077 
1078         TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1079         return (DDI_SUCCESS);
1080 }
1081 
1082 
1083 /*
1084  * tavor_umap_ci_data_out()
1085  *    Context: Can be called from user or kernel context.
1086  */
1087 /* ARGSUSED */
1088 ibt_status_t
1089 tavor_umap_ci_data_out(tavor_state_t *state, ibt_ci_data_flags_t flags,
1090     ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
1091 {
1092         int     status;
1093 
1094         TAVOR_TNF_ENTER(tavor_umap_ci_data_out);
1095 
1096         /*
1097          * Depending on the type of object about which additional information
1098          * is being requested (CQ or QP), we call the appropriate resource-
1099          * specific mapping function.
1100          */
1101         switch (object) {
1102         case IBT_HDL_CQ:
1103                 status = tavor_umap_cq_data_out((tavor_cqhdl_t)hdl,
1104                     (mlnx_umap_cq_data_out_t *)data_p, data_sz);
1105                 if (status != DDI_SUCCESS) {
1106                         TNF_PROBE_0(tavor_umap_cq_data_out_fail,
1107                             TAVOR_TNF_ERROR, "");
1108                         TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1109                         return (status);
1110                 }
1111                 break;
1112 
1113         case IBT_HDL_QP:
1114                 status = tavor_umap_qp_data_out((tavor_qphdl_t)hdl,
1115                     (mlnx_umap_qp_data_out_t *)data_p, data_sz);
1116                 if (status != DDI_SUCCESS) {
1117                         TNF_PROBE_0(tavor_umap_qp_data_out_fail,
1118                             TAVOR_TNF_ERROR, "");
1119                         TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1120                         return (status);
1121                 }
1122                 break;
1123 
1124         case IBT_HDL_SRQ:
1125                 status = tavor_umap_srq_data_out((tavor_srqhdl_t)hdl,
1126                     (mlnx_umap_srq_data_out_t *)data_p, data_sz);
1127                 if (status != DDI_SUCCESS) {
1128                         TNF_PROBE_0(tavor_umap_srq_data_out_fail,
1129                             TAVOR_TNF_ERROR, "");
1130                         TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1131                         return (status);
1132                 }
1133                 break;
1134 
1135         /*
1136          * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
1137          * since the Tavor driver does not support these.
1138          */
1139         case IBT_HDL_PD:
1140                 status = tavor_umap_pd_data_out((tavor_pdhdl_t)hdl,
1141                     (mlnx_umap_pd_data_out_t *)data_p, data_sz);
1142                 if (status != DDI_SUCCESS) {
1143                         TNF_PROBE_0(tavor_umap_pd_data_out_fail,
1144                             TAVOR_TNF_ERROR, "");
1145                         TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1146                         return (status);
1147                 }
1148                 break;
1149 
1150         case IBT_HDL_HCA:
1151         case IBT_HDL_MR:
1152         case IBT_HDL_MW:
1153         case IBT_HDL_AH:
1154         case IBT_HDL_SCHED:
1155         case IBT_HDL_EEC:
1156         case IBT_HDL_RDD:
1157                 TNF_PROBE_0(tavor_umap_ci_data_out_unsupp_type,
1158                     TAVOR_TNF_ERROR, "");
1159                 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1160                 return (IBT_NOT_SUPPORTED);
1161 
1162         /*
1163          * Any other types are invalid.
1164          */
1165         default:
1166                 TNF_PROBE_0(tavor_umap_ci_data_out_invtype_fail,
1167                     TAVOR_TNF_ERROR, "");
1168                 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1169                 return (IBT_INVALID_PARAM);
1170         }
1171 
1172         TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1173         return (DDI_SUCCESS);
1174 }
1175 
1176 
1177 /*
1178  * tavor_umap_cq_data_out()
1179  *    Context: Can be called from user or kernel context.
1180  */
1181 static ibt_status_t
1182 tavor_umap_cq_data_out(tavor_cqhdl_t cq, mlnx_umap_cq_data_out_t *data,
1183     size_t data_sz)
1184 {
1185         TAVOR_TNF_ENTER(tavor_umap_cq_data_out);
1186 
1187         /* Check for valid CQ handle pointer */
1188         if (cq == NULL) {
1189                 TNF_PROBE_0(tavor_umap_cq_data_out_invcqhdl_fail,
1190                     TAVOR_TNF_ERROR, "");
1191                 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1192                 return (IBT_CQ_HDL_INVALID);
1193         }
1194 
1195         /* Check for valid CQ mapping structure size */
1196         if (data_sz < sizeof (mlnx_umap_cq_data_out_t)) {
1197                 TNF_PROBE_0(tavor_umap_cq_data_out_invdatasz_fail,
1198                     TAVOR_TNF_ERROR, "");
1199                 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1200                 return (IBT_INSUFF_RESOURCE);
1201         }
1202         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1203 
1204         /*
1205          * If it has passed all the above checks, then fill in all the useful
1206          * mapping information (including the mapping offset that will be
1207          * passed back to the devmap() interface during a subsequent mmap()
1208          * call.
1209          *
1210          * The "offset" for CQ mmap()'s looks like this:
1211          * +----------------------------------------+--------+--------------+
1212          * |               CQ Number                |  0x33  | Reserved (0) |
1213          * +----------------------------------------+--------+--------------+
1214          *         (64 - 8 - PAGESHIFT) bits        8 bits      PAGESHIFT bits
1215          *
1216          * This returns information about the mapping offset, the length of
1217          * the CQ memory, the CQ number (for use in later CQ doorbells), the
1218          * number of CQEs the CQ memory can hold, and the size of each CQE.
1219          */
1220         data->mcq_rev                = MLNX_UMAP_IF_VERSION;
1221         data->mcq_mapoffset  = ((((uint64_t)cq->cq_cqnum <<
1222             MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_CQMEM_RSRC) << PAGESHIFT);
1223         data->mcq_maplen     = cq->cq_cqinfo.qa_size;
1224         data->mcq_cqnum              = cq->cq_cqnum;
1225         data->mcq_numcqe     = cq->cq_bufsz;
1226         data->mcq_cqesz              = sizeof (tavor_hw_cqe_t);
1227 
1228         TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1229         return (DDI_SUCCESS);
1230 }
1231 
1232 
1233 /*
1234  * tavor_umap_qp_data_out()
1235  *    Context: Can be called from user or kernel context.
1236  */
1237 static ibt_status_t
1238 tavor_umap_qp_data_out(tavor_qphdl_t qp, mlnx_umap_qp_data_out_t *data,
1239     size_t data_sz)
1240 {
1241         TAVOR_TNF_ENTER(tavor_umap_qp_data_out);
1242 
1243         /* Check for valid QP handle pointer */
1244         if (qp == NULL) {
1245                 TNF_PROBE_0(tavor_umap_qp_data_out_invqphdl_fail,
1246                     TAVOR_TNF_ERROR, "");
1247                 TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1248                 return (IBT_QP_HDL_INVALID);
1249         }
1250 
1251         /* Check for valid QP mapping structure size */
1252         if (data_sz < sizeof (mlnx_umap_qp_data_out_t)) {
1253                 TNF_PROBE_0(tavor_umap_qp_data_out_invdatasz_fail,
1254                     TAVOR_TNF_ERROR, "");
1255                 TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1256                 return (IBT_INSUFF_RESOURCE);
1257         }
1258         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1259 
1260         /*
1261          * If it has passed all the checks, then fill in all the useful
1262          * mapping information (including the mapping offset that will be
1263          * passed back to the devmap() interface during a subsequent mmap()
1264          * call.
1265          *
1266          * The "offset" for QP mmap()'s looks like this:
1267          * +----------------------------------------+--------+--------------+
1268          * |               QP Number                |  0x44  | Reserved (0) |
1269          * +----------------------------------------+--------+--------------+
1270          *         (64 - 8 - PAGESHIFT) bits        8 bits      PAGESHIFT bits
1271          *
1272          * This returns information about the mapping offset, the length of
1273          * the QP memory, and the QP number (for use in later send and recv
1274          * doorbells).  It also returns the following information for both
1275          * the receive work queue and the send work queue, respectively:  the
1276          * offset (from the base mapped address) of the start of the given
1277          * work queue, the 64-bit IB virtual address that corresponds to
1278          * the base mapped address (needed for posting WQEs though the
1279          * QP doorbells), the number of WQEs the given work queue can hold,
1280          * and the size of each WQE for the given work queue.
1281          */
1282         data->mqp_rev                = MLNX_UMAP_IF_VERSION;
1283         data->mqp_mapoffset  = ((((uint64_t)qp->qp_qpnum <<
1284             MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_QPMEM_RSRC) << PAGESHIFT);
1285         data->mqp_maplen     = qp->qp_wqinfo.qa_size;
1286         data->mqp_qpnum              = qp->qp_qpnum;
1287 
1288         /*
1289          * If this QP is associated with a shared receive queue (SRQ),
1290          * then return invalid RecvQ parameters.  Otherwise, return
1291          * the proper parameter values.
1292          */
1293         if (qp->qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
1294                 data->mqp_rq_off     = (uint32_t)qp->qp_wqinfo.qa_size;
1295                 data->mqp_rq_desc_addr       = (uint32_t)qp->qp_wqinfo.qa_size;
1296                 data->mqp_rq_numwqe  = 0;
1297                 data->mqp_rq_wqesz   = 0;
1298         } else {
1299                 data->mqp_rq_off     = (uintptr_t)qp->qp_rq_buf -
1300                     (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1301                 data->mqp_rq_desc_addr       = (uint32_t)((uintptr_t)qp->qp_rq_buf -
1302                     qp->qp_desc_off);
1303                 data->mqp_rq_numwqe  = qp->qp_rq_bufsz;
1304                 data->mqp_rq_wqesz   = (1 << qp->qp_rq_log_wqesz);
1305         }
1306         data->mqp_sq_off     = (uintptr_t)qp->qp_sq_buf -
1307             (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1308         data->mqp_sq_desc_addr       = (uint32_t)((uintptr_t)qp->qp_sq_buf -
1309             qp->qp_desc_off);
1310         data->mqp_sq_numwqe  = qp->qp_sq_bufsz;
1311         data->mqp_sq_wqesz   = (1 << qp->qp_sq_log_wqesz);
1312 
1313         TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1314         return (DDI_SUCCESS);
1315 }
1316 
1317 
1318 /*
1319  * tavor_umap_srq_data_out()
1320  *    Context: Can be called from user or kernel context.
1321  */
1322 static ibt_status_t
1323 tavor_umap_srq_data_out(tavor_srqhdl_t srq, mlnx_umap_srq_data_out_t *data,
1324     size_t data_sz)
1325 {
1326         TAVOR_TNF_ENTER(tavor_umap_srq_data_out);
1327 
1328         /* Check for valid SRQ handle pointer */
1329         if (srq == NULL) {
1330                 TNF_PROBE_0(tavor_umap_srq_data_out_invsrqhdl_fail,
1331                     TAVOR_TNF_ERROR, "");
1332                 TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1333                 return (IBT_SRQ_HDL_INVALID);
1334         }
1335 
1336         /* Check for valid SRQ mapping structure size */
1337         if (data_sz < sizeof (mlnx_umap_srq_data_out_t)) {
1338                 TNF_PROBE_0(tavor_umap_srq_data_out_invdatasz_fail,
1339                     TAVOR_TNF_ERROR, "");
1340                 TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1341                 return (IBT_INSUFF_RESOURCE);
1342         }
1343         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1344 
1345         /*
1346          * If it has passed all the checks, then fill in all the useful
1347          * mapping information (including the mapping offset that will be
1348          * passed back to the devmap() interface during a subsequent mmap()
1349          * call.
1350          *
1351          * The "offset" for SRQ mmap()'s looks like this:
1352          * +----------------------------------------+--------+--------------+
1353          * |               SRQ Number               |  0x66  | Reserved (0) |
1354          * +----------------------------------------+--------+--------------+
1355          *         (64 - 8 - PAGESHIFT) bits        8 bits      PAGESHIFT bits
1356          *
1357          * This returns information about the mapping offset, the length of the
1358          * SRQ memory, and the SRQ number (for use in later send and recv
1359          * doorbells).  It also returns the following information for the
1360          * shared receive queue: the offset (from the base mapped address) of
1361          * the start of the given work queue, the 64-bit IB virtual address
1362          * that corresponds to the base mapped address (needed for posting WQEs
1363          * though the QP doorbells), the number of WQEs the given work queue
1364          * can hold, and the size of each WQE for the given work queue.
1365          */
1366         data->msrq_rev               = MLNX_UMAP_IF_VERSION;
1367         data->msrq_mapoffset = ((((uint64_t)srq->srq_srqnum <<
1368             MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_SRQMEM_RSRC) << PAGESHIFT);
1369         data->msrq_maplen    = srq->srq_wqinfo.qa_size;
1370         data->msrq_srqnum    = srq->srq_srqnum;
1371 
1372         data->msrq_desc_addr = (uint32_t)((uintptr_t)srq->srq_wq_buf -
1373             srq->srq_desc_off);
1374         data->msrq_numwqe    = srq->srq_wq_bufsz;
1375         data->msrq_wqesz     = (1 << srq->srq_wq_log_wqesz);
1376 
1377         TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1378         return (DDI_SUCCESS);
1379 }
1380 
1381 /*
1382  * tavor_umap_pd_data_out()
1383  *    Context: Can be called from user or kernel context.
1384  */
1385 static ibt_status_t
1386 tavor_umap_pd_data_out(tavor_pdhdl_t pd, mlnx_umap_pd_data_out_t *data,
1387     size_t data_sz)
1388 {
1389         TAVOR_TNF_ENTER(tavor_umap_pd_data_out);
1390 
1391         /* Check for valid PD handle pointer */
1392         if (pd == NULL) {
1393                 TNF_PROBE_0(tavor_umap_pd_data_out_invpdhdl_fail,
1394                     TAVOR_TNF_ERROR, "");
1395                 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1396                 return (IBT_PD_HDL_INVALID);
1397         }
1398 
1399         /* Check for valid PD mapping structure size */
1400         if (data_sz < sizeof (mlnx_umap_pd_data_out_t)) {
1401                 TNF_PROBE_0(tavor_umap_pd_data_out_invdatasz_fail,
1402                     TAVOR_TNF_ERROR, "");
1403                 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1404                 return (IBT_INSUFF_RESOURCE);
1405         }
1406         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1407 
1408         /*
1409          * If it has passed all the checks, then fill the PD table index
1410          * (the PD table allocated index for the PD pd_pdnum)
1411          */
1412         data->mpd_rev        = MLNX_UMAP_IF_VERSION;
1413         data->mpd_pdnum      = pd->pd_pdnum;
1414 
1415         TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1416         return (DDI_SUCCESS);
1417 }
1418 
1419 /*
1420  * tavor_umap_db_init()
1421  *    Context: Only called from attach() path context
1422  */
1423 void
1424 tavor_umap_db_init(void)
1425 {
1426         TAVOR_TNF_ENTER(tavor_umap_db_init);
1427 
1428         /*
1429          * Initialize the lock used by the Tavor "userland resources database"
1430          * This is used to ensure atomic access to add, remove, and find
1431          * entries in the database.
1432          */
1433         mutex_init(&tavor_userland_rsrc_db.tdl_umapdb_lock, NULL,
1434             MUTEX_DRIVER, NULL);
1435 
1436         /*
1437          * Initialize the AVL tree used for the "userland resources
1438          * database".  Using an AVL tree here provides the ability to
1439          * scale the database size to large numbers of resources.  The
1440          * entries in the tree are "tavor_umap_db_entry_t".
1441          * The tree is searched with the help of the
1442          * tavor_umap_db_compare() routine.
1443          */
1444         avl_create(&tavor_userland_rsrc_db.tdl_umapdb_avl,
1445             tavor_umap_db_compare, sizeof (tavor_umap_db_entry_t),
1446             offsetof(tavor_umap_db_entry_t, tdbe_avlnode));
1447 
1448         TAVOR_TNF_EXIT(tavor_umap_db_init);
1449 }
1450 
1451 
1452 /*
1453  * tavor_umap_db_fini()
1454  *    Context: Only called from attach() and/or detach() path contexts
1455  */
1456 void
1457 tavor_umap_db_fini(void)
1458 {
1459         TAVOR_TNF_ENTER(tavor_umap_db_fini);
1460 
1461         /* Destroy the AVL tree for the "userland resources database" */
1462         avl_destroy(&tavor_userland_rsrc_db.tdl_umapdb_avl);
1463 
1464         /* Destroy the lock for the "userland resources database" */
1465         mutex_destroy(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1466 
1467         TAVOR_TNF_EXIT(tavor_umap_db_fini);
1468 }
1469 
1470 
1471 /*
1472  * tavor_umap_db_alloc()
1473  *    Context: Can be called from user or kernel context.
1474  */
1475 tavor_umap_db_entry_t *
1476 tavor_umap_db_alloc(uint_t instance, uint64_t key, uint_t type, uint64_t value)
1477 {
1478         tavor_umap_db_entry_t   *umapdb;
1479 
1480         TAVOR_TNF_ENTER(tavor_umap_db_alloc);
1481 
1482         /* Allocate an entry to add to the "userland resources database" */
1483         umapdb = kmem_zalloc(sizeof (tavor_umap_db_entry_t), KM_NOSLEEP);
1484         if (umapdb == NULL) {
1485                 TNF_PROBE_0(tavor_umap_db_alloc_kmz_fail, TAVOR_TNF_ERROR, "");
1486                 TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1487                 return (NULL);
1488         }
1489         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1490 
1491         /* Fill in the fields in the database entry */
1492         umapdb->tdbe_common.tdb_instance  = instance;
1493         umapdb->tdbe_common.tdb_type   = type;
1494         umapdb->tdbe_common.tdb_key    = key;
1495         umapdb->tdbe_common.tdb_value          = value;
1496 
1497         TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1498         return (umapdb);
1499 }
1500 
1501 
1502 /*
1503  * tavor_umap_db_free()
1504  *    Context: Can be called from user or kernel context.
1505  */
1506 void
1507 tavor_umap_db_free(tavor_umap_db_entry_t *umapdb)
1508 {
1509         TAVOR_TNF_ENTER(tavor_umap_db_free);
1510 
1511         /* Free the database entry */
1512         kmem_free(umapdb, sizeof (tavor_umap_db_entry_t));
1513 
1514         TAVOR_TNF_EXIT(tavor_umap_db_free);
1515 }
1516 
1517 
1518 /*
1519  * tavor_umap_db_add()
1520  *    Context: Can be called from user or kernel context.
1521  */
1522 void
1523 tavor_umap_db_add(tavor_umap_db_entry_t *umapdb)
1524 {
1525         TAVOR_TNF_ENTER(tavor_umap_db_add);
1526 
1527         mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1528         tavor_umap_db_add_nolock(umapdb);
1529         mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1530 
1531         TAVOR_TNF_EXIT(tavor_umap_db_add);
1532 }
1533 
1534 
1535 /*
1536  * tavor_umap_db_add_nolock()
1537  *    Context: Can be called from user or kernel context.
1538  */
1539 void
1540 tavor_umap_db_add_nolock(tavor_umap_db_entry_t *umapdb)
1541 {
1542         tavor_umap_db_query_t   query;
1543         avl_index_t             where;
1544 
1545         TAVOR_TNF_ENTER(tavor_umap_db_add_nolock);
1546 
1547         ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1548 
1549         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1550 
1551         /*
1552          * Copy the common portion of the "to-be-added" database entry
1553          * into the "tavor_umap_db_query_t" structure.  We use this structure
1554          * (with no flags set) to find the appropriate location in the
1555          * "userland resources database" for the new entry to be added.
1556          *
1557          * Note: we expect that this entry should not be found in the
1558          * database (unless something bad has happened).
1559          */
1560         query.tqdb_common = umapdb->tdbe_common;
1561         query.tqdb_flags  = 0;
1562         (void) avl_find(&tavor_userland_rsrc_db.tdl_umapdb_avl, &query,
1563             &where);
1564 
1565         /*
1566          * Now, using the "where" field from the avl_find() operation
1567          * above, we will insert the new database entry ("umapdb").
1568          */
1569         avl_insert(&tavor_userland_rsrc_db.tdl_umapdb_avl, umapdb,
1570             where);
1571 
1572         TAVOR_TNF_EXIT(tavor_umap_db_add_nolock);
1573 }
1574 
1575 
1576 /*
1577  * tavor_umap_db_find()
1578  *    Context: Can be called from user or kernel context.
1579  */
1580 int
1581 tavor_umap_db_find(uint_t instance, uint64_t key, uint_t type,
1582     uint64_t *value, uint_t flag, tavor_umap_db_entry_t **umapdb)
1583 {
1584         int     status;
1585 
1586         TAVOR_TNF_ENTER(tavor_umap_db_find);
1587 
1588         mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1589         status = tavor_umap_db_find_nolock(instance, key, type, value, flag,
1590             umapdb);
1591         mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1592 
1593         TAVOR_TNF_EXIT(tavor_umap_db_find);
1594         return (status);
1595 }
1596 
1597 
1598 /*
1599  * tavor_umap_db_find_nolock()
1600  *    Context: Can be called from user or kernel context.
1601  */
1602 int
1603 tavor_umap_db_find_nolock(uint_t instance, uint64_t key, uint_t type,
1604     uint64_t *value, uint_t flags, tavor_umap_db_entry_t **umapdb)
1605 {
1606         tavor_umap_db_query_t   query;
1607         tavor_umap_db_entry_t   *entry;
1608         avl_index_t             where;
1609 
1610         TAVOR_TNF_ENTER(tavor_umap_db_find_nolock);
1611 
1612         ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1613 
1614         /*
1615          * Fill in key, type, instance, and flags values of the
1616          * tavor_umap_db_query_t in preparation for the database
1617          * lookup.
1618          */
1619         query.tqdb_flags                = flags;
1620         query.tqdb_common.tdb_key       = key;
1621         query.tqdb_common.tdb_type      = type;
1622         query.tqdb_common.tdb_instance  = instance;
1623 
1624         /*
1625          * Perform the database query.  If no entry is found, then
1626          * return failure, else continue.
1627          */
1628         entry = (tavor_umap_db_entry_t *)avl_find(
1629             &tavor_userland_rsrc_db.tdl_umapdb_avl, &query, &where);
1630         if (entry == NULL) {
1631                 TAVOR_TNF_EXIT(tavor_umap_db_find_nolock);
1632                 return (DDI_FAILURE);
1633         }
1634         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry))
1635 
1636         /*
1637          * If the flags argument specifies that the entry should
1638          * be removed if found, then call avl_remove() to remove
1639          * the entry from the database.
1640          */
1641         if (flags & TAVOR_UMAP_DB_REMOVE) {
1642 
1643                 avl_remove(&tavor_userland_rsrc_db.tdl_umapdb_avl, entry);
1644 
1645                 /*
1646                  * The database entry is returned with the expectation
1647                  * that the caller will use tavor_umap_db_free() to
1648                  * free the entry's memory.  ASSERT that this is non-NULL.
1649                  * NULL pointer should never be passed for the
1650                  * TAVOR_UMAP_DB_REMOVE case.
1651                  */
1652                 ASSERT(umapdb != NULL);
1653         }
1654 
1655         /*
1656          * If the caller would like visibility to the database entry
1657          * (indicated through the use of a non-NULL "umapdb" argument),
1658          * then fill it in.
1659          */
1660         if (umapdb != NULL) {
1661                 *umapdb = entry;
1662         }
1663 
1664         /* Extract value field from database entry and return success */
1665         *value = entry->tdbe_common.tdb_value;
1666 
1667         TAVOR_TNF_EXIT(tavor_umap_db_find_nolock);
1668         return (DDI_SUCCESS);
1669 }
1670 
1671 
1672 /*
1673  * tavor_umap_umemlock_cb()
1674  *    Context: Can be called from callback context.
1675  */
1676 void
1677 tavor_umap_umemlock_cb(ddi_umem_cookie_t *umem_cookie)
1678 {
1679         tavor_umap_db_entry_t   *umapdb;
1680         tavor_state_t           *state;
1681         tavor_rsrc_t            *rsrcp;
1682         tavor_mrhdl_t           mr;
1683         uint64_t                value;
1684         uint_t                  instance;
1685         int                     status;
1686         void                    (*mr_callback)(void *, void *);
1687         void                    *mr_cbarg1, *mr_cbarg2;
1688 
1689         TAVOR_TNF_ENTER(tavor_umap_umemlock_cb);
1690 
1691         /*
1692          * If this was userland memory, then we need to remove its entry
1693          * from the "userland resources database".  Note:  We use the
1694          * TAVOR_UMAP_DB_IGNORE_INSTANCE flag here because we don't know
1695          * which instance was used when the entry was added (but we want
1696          * to know after the entry is found using the other search criteria).
1697          */
1698         status = tavor_umap_db_find(0, (uint64_t)(uintptr_t)umem_cookie,
1699             MLNX_UMAP_MRMEM_RSRC, &value, (TAVOR_UMAP_DB_REMOVE |
1700             TAVOR_UMAP_DB_IGNORE_INSTANCE), &umapdb);
1701         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1702         if (status == DDI_SUCCESS) {
1703                 instance = umapdb->tdbe_common.tdb_instance;
1704                 state = ddi_get_soft_state(tavor_statep, instance);
1705                 if (state == NULL) {
1706                         cmn_err(CE_WARN, "Unable to match Tavor instance\n");
1707                         TNF_PROBE_0(tavor_umap_umemlock_cb_gss_fail,
1708                             TAVOR_TNF_ERROR, "");
1709                         TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1710                         return;
1711                 }
1712 
1713                 /* Free the database entry */
1714                 tavor_umap_db_free(umapdb);
1715 
1716                 /* Use "value" to convert to an MR handle */
1717                 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
1718                 mr = (tavor_mrhdl_t)rsrcp->tr_addr;
1719 
1720                 /*
1721                  * If a callback has been provided, call it first.  This
1722                  * callback is expected to do any cleanup necessary to
1723                  * guarantee that the subsequent MR deregister (below)
1724                  * will succeed.  Specifically, this means freeing up memory
1725                  * windows which might have been associated with the MR.
1726                  */
1727                 mutex_enter(&mr->mr_lock);
1728                 mr_callback = mr->mr_umem_cbfunc;
1729                 mr_cbarg1   = mr->mr_umem_cbarg1;
1730                 mr_cbarg2   = mr->mr_umem_cbarg2;
1731                 mutex_exit(&mr->mr_lock);
1732                 if (mr_callback != NULL) {
1733                         mr_callback(mr_cbarg1, mr_cbarg2);
1734                 }
1735 
1736                 /*
1737                  * Then call tavor_mr_deregister() to release the resources
1738                  * associated with the MR handle.  Note: Because this routine
1739                  * will also check for whether the ddi_umem_cookie_t is in the
1740                  * database, it will take responsibility for disabling the
1741                  * memory region and calling ddi_umem_unlock().
1742                  */
1743                 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
1744                     TAVOR_SLEEP);
1745                 if (status != DDI_SUCCESS) {
1746                         TAVOR_WARNING(state, "Unexpected failure in "
1747                             "deregister from callback\n");
1748                         TNF_PROBE_0(tavor_umap_umemlock_cb_dereg_fail,
1749                             TAVOR_TNF_ERROR, "");
1750                         TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1751                 }
1752         }
1753 
1754         TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1755 }
1756 
1757 
1758 /*
1759  * tavor_umap_db_compare()
1760  *    Context: Can be called from user or kernel context.
1761  */
1762 static int
1763 tavor_umap_db_compare(const void *q, const void *e)
1764 {
1765         tavor_umap_db_common_t  *entry_common, *query_common;
1766         uint_t                  query_flags;
1767 
1768         TAVOR_TNF_ENTER(tavor_umap_db_compare);
1769 
1770         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*((tavor_umap_db_query_t *)q)))
1771 
1772         entry_common = &((tavor_umap_db_entry_t *)e)->tdbe_common;
1773         query_common = &((tavor_umap_db_query_t *)q)->tqdb_common;
1774         query_flags  = ((tavor_umap_db_query_t *)q)->tqdb_flags;
1775 
1776         /*
1777          * The first comparison is done on the "key" value in "query"
1778          * and "entry".  If they are not equal, then the appropriate
1779          * search direction is returned.  Else, we continue by
1780          * comparing "type".
1781          */
1782         if (query_common->tdb_key < entry_common->tdb_key) {
1783                 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1784                 return (-1);
1785         } else if (query_common->tdb_key > entry_common->tdb_key) {
1786                 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1787                 return (+1);
1788         }
1789 
1790         /*
1791          * If the search reaches this point, then "query" and "entry"
1792          * have equal key values.  So we continue be comparing their
1793          * "type" values.  Again, if they are not equal, then the
1794          * appropriate search direction is returned.  Else, we continue
1795          * by comparing "instance".
1796          */
1797         if (query_common->tdb_type < entry_common->tdb_type) {
1798                 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1799                 return (-1);
1800         } else if (query_common->tdb_type > entry_common->tdb_type) {
1801                 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1802                 return (+1);
1803         }
1804 
1805         /*
1806          * If the search reaches this point, then "query" and "entry"
1807          * have exactly the same key and type values.  Now we consult
1808          * the "flags" field in the query to determine whether the
1809          * "instance" is relevant to the search.  If the
1810          * TAVOR_UMAP_DB_IGNORE_INSTANCE flags is set, then return
1811          * success (0) here.  Otherwise, continue the search by comparing
1812          * instance values and returning the appropriate search direction.
1813          */
1814         if (query_flags & TAVOR_UMAP_DB_IGNORE_INSTANCE) {
1815                 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1816                 return (0);
1817         }
1818 
1819         /*
1820          * If the search has reached this point, then "query" and "entry"
1821          * can only be differentiated by their instance values.  If these
1822          * are not equal, then return the appropriate search direction.
1823          * Else, we return success (0).
1824          */
1825         if (query_common->tdb_instance < entry_common->tdb_instance) {
1826                 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1827                 return (-1);
1828         } else if (query_common->tdb_instance > entry_common->tdb_instance) {
1829                 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1830                 return (+1);
1831         }
1832 
1833         /* Everything matches... so return success */
1834         TAVOR_TNF_EXIT(tavor_umap_db_compare);
1835         return (0);
1836 }
1837 
1838 
1839 /*
1840  * tavor_umap_db_set_onclose_cb()
1841  *    Context: Can be called from user or kernel context.
1842  */
1843 int
1844 tavor_umap_db_set_onclose_cb(dev_t dev, uint64_t flag,
1845     void (*callback)(void *), void *arg)
1846 {
1847         tavor_umap_db_priv_t    *priv;
1848         tavor_umap_db_entry_t   *umapdb;
1849         minor_t                 instance;
1850         uint64_t                value;
1851         int                     status;
1852 
1853         TAVOR_TNF_ENTER(tavor_umap_db_set_onclose_cb);
1854 
1855         instance = TAVOR_DEV_INSTANCE(dev);
1856         if (instance == -1) {
1857                 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_inst_fail,
1858                     TAVOR_TNF_ERROR, "");
1859                 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1860                 return (DDI_FAILURE);
1861         }
1862 
1863         if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1864                 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_invflag_fail,
1865                     TAVOR_TNF_ERROR, "");
1866                 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1867                 return (DDI_FAILURE);
1868         }
1869 
1870         /*
1871          * Grab the lock for the "userland resources database" and find
1872          * the entry corresponding to this minor number.  Once it's found,
1873          * allocate (if necessary) and add an entry (in the "tdb_priv"
1874          * field) to indicate that further processing may be needed during
1875          * Tavor's close() handling.
1876          */
1877         mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1878         status = tavor_umap_db_find_nolock(instance, dev,
1879             MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1880         if (status != DDI_SUCCESS) {
1881                 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_find_fail,
1882                     TAVOR_TNF_ERROR, "");
1883                 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1884                 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1885                 return (DDI_FAILURE);
1886         }
1887 
1888         priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1889         if (priv == NULL) {
1890                 priv = (tavor_umap_db_priv_t *)kmem_zalloc(
1891                     sizeof (tavor_umap_db_priv_t), KM_NOSLEEP);
1892                 if (priv == NULL) {
1893                         TNF_PROBE_0(tavor_umap_db_set_onclose_cb_kmz_fail,
1894                             TAVOR_TNF_ERROR, "");
1895                         mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1896                         TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1897                         return (DDI_FAILURE);
1898                 }
1899         }
1900 
1901         /*
1902          * Save away the callback and argument to be used during Tavor's
1903          * close() processing.
1904          */
1905         priv->tdp_cb = callback;
1906         priv->tdp_arg        = arg;
1907 
1908         umapdb->tdbe_common.tdb_priv = (void *)priv;
1909         mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1910 
1911         TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1912         return (DDI_SUCCESS);
1913 }
1914 
1915 
1916 /*
1917  * tavor_umap_db_clear_onclose_cb()
1918  *    Context: Can be called from user or kernel context.
1919  */
1920 int
1921 tavor_umap_db_clear_onclose_cb(dev_t dev, uint64_t flag)
1922 {
1923         tavor_umap_db_priv_t    *priv;
1924         tavor_umap_db_entry_t   *umapdb;
1925         minor_t                 instance;
1926         uint64_t                value;
1927         int                     status;
1928 
1929         TAVOR_TNF_ENTER(tavor_umap_db_set_onclose_cb);
1930 
1931         instance = TAVOR_DEV_INSTANCE(dev);
1932         if (instance == -1) {
1933                 TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_inst_fail,
1934                     TAVOR_TNF_ERROR, "");
1935                 TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1936                 return (DDI_FAILURE);
1937         }
1938 
1939         if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1940                 TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_invflag_fail,
1941                     TAVOR_TNF_ERROR, "");
1942                 TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1943                 return (DDI_FAILURE);
1944         }
1945 
1946         /*
1947          * Grab the lock for the "userland resources database" and find
1948          * the entry corresponding to this minor number.  Once it's found,
1949          * remove the entry (in the "tdb_priv" field) that indicated the
1950          * need for further processing during Tavor's close().  Free the
1951          * entry, if appropriate.
1952          */
1953         mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1954         status = tavor_umap_db_find_nolock(instance, dev,
1955             MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1956         if (status != DDI_SUCCESS) {
1957                 TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_find_fail,
1958                     TAVOR_TNF_ERROR, "");
1959                 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1960                 TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1961                 return (DDI_FAILURE);
1962         }
1963 
1964         priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1965         if (priv != NULL) {
1966                 kmem_free(priv, sizeof (tavor_umap_db_priv_t));
1967                 priv = NULL;
1968         }
1969 
1970         umapdb->tdbe_common.tdb_priv = (void *)priv;
1971         mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1972         return (DDI_SUCCESS);
1973 }
1974 
1975 
1976 /*
1977  * tavor_umap_db_clear_onclose_cb()
1978  *    Context: Can be called from user or kernel context.
1979  */
1980 void
1981 tavor_umap_db_handle_onclose_cb(tavor_umap_db_priv_t *priv)
1982 {
1983         void    (*callback)(void *);
1984 
1985         ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1986 
1987         /*
1988          * Call the callback.
1989          *    Note: Currently there is only one callback (in "tdp_cb"), but
1990          *    in the future there may be more, depending on what other types
1991          *    of interaction there are between userland processes and the
1992          *    driver.
1993          */
1994         callback = priv->tdp_cb;
1995         callback(priv->tdp_arg);
1996 }