1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * tavor_umap.c
  29  *    Tavor Userland Mapping Routines
  30  *
  31  *    Implements all the routines necessary for enabling direct userland
  32  *    access to the Tavor hardware.  This includes all routines necessary for
  33  *    maintaining the "userland resources database" and all the support routines
  34  *    for the devmap calls.
  35  */
  36 
  37 #include <sys/types.h>
  38 #include <sys/conf.h>
  39 #include <sys/ddi.h>
  40 #include <sys/sunddi.h>
  41 #include <sys/modctl.h>
  42 #include <sys/file.h>
  43 #include <sys/avl.h>
  44 #include <sys/sysmacros.h>
  45 
  46 #include <sys/ib/adapters/tavor/tavor.h>
  47 
  48 /* Tavor HCA state pointer (extern) */
  49 extern void *tavor_statep;
  50 
  51 /* Tavor HCA Userland Resource Database (extern) */
  52 extern tavor_umap_db_t tavor_userland_rsrc_db;
  53 
  54 static int tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
  55     tavor_rsrc_t *rsrcp, size_t *maplen, int *err);
  56 static int tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
  57     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
  58 static int tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
  59     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
  60 static int tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
  61     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
  62 static int tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
  63     offset_t off, size_t len, void **pvtp);
  64 static int tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp,
  65     devmap_cookie_t new_dhp, void **new_pvtp);
  66 static void tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp,
  67     offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
  68     devmap_cookie_t new_dhp2, void **pvtp2);
  69 static int tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
  70     offset_t off, size_t len, void **pvtp);
  71 static int tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
  72     devmap_cookie_t new_dhp, void **new_pvtp);
  73 static void tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp,
  74     offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
  75     devmap_cookie_t new_dhp2, void **pvtp2);
  76 static ibt_status_t tavor_umap_mr_data_in(tavor_mrhdl_t mr,
  77     ibt_mr_data_in_t *data, size_t data_sz);
  78 static ibt_status_t tavor_umap_cq_data_out(tavor_cqhdl_t cq,
  79     mlnx_umap_cq_data_out_t *data, size_t data_sz);
  80 static ibt_status_t tavor_umap_qp_data_out(tavor_qphdl_t qp,
  81     mlnx_umap_qp_data_out_t *data, size_t data_sz);
  82 static ibt_status_t tavor_umap_srq_data_out(tavor_srqhdl_t srq,
  83     mlnx_umap_srq_data_out_t *data, size_t data_sz);
  84 static int tavor_umap_db_compare(const void *query, const void *entry);
  85 static ibt_status_t tavor_umap_pd_data_out(tavor_pdhdl_t pd,
  86     mlnx_umap_pd_data_out_t *data, size_t data_sz);
  87 
  88 
  89 /*
  90  * These callbacks are passed to devmap_umem_setup() and devmap_devmem_setup(),
  91  * respectively.  They are used to handle (among other things) partial
  92  * unmappings and to provide a method for invalidating mappings inherited
  93  * as a result of a fork(2) system call.
  94  */
  95 static struct devmap_callback_ctl tavor_devmap_umem_cbops = {
  96         DEVMAP_OPS_REV,
  97         tavor_devmap_umem_map,
  98         NULL,
  99         tavor_devmap_umem_dup,
 100         tavor_devmap_umem_unmap
 101 };
 102 static struct devmap_callback_ctl tavor_devmap_devmem_cbops = {
 103         DEVMAP_OPS_REV,
 104         tavor_devmap_devmem_map,
 105         NULL,
 106         tavor_devmap_devmem_dup,
 107         tavor_devmap_devmem_unmap
 108 };
 109 
 110 /*
 111  * tavor_devmap()
 112  *    Context: Can be called from user context.
 113  */
 114 /* ARGSUSED */
 115 int
 116 tavor_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
 117     size_t *maplen, uint_t model)
 118 {
 119         tavor_state_t   *state;
 120         tavor_rsrc_t    *rsrcp;
 121         minor_t         instance;
 122         uint64_t        key, value;
 123         uint_t          type;
 124         int             err, status;
 125 
 126         TAVOR_TNF_ENTER(tavor_devmap);
 127 
 128         /* Get Tavor softstate structure from instance */
 129         instance = TAVOR_DEV_INSTANCE(dev);
 130         state = ddi_get_soft_state(tavor_statep, instance);
 131         if (state == NULL) {
 132                 TNF_PROBE_0(tavor_devmap_gss_fail, TAVOR_TNF_ERROR, "");
 133                 TAVOR_TNF_EXIT(tavor_devmap);
 134                 return (ENXIO);
 135         }
 136 
 137         /*
 138          * Access to Tavor devmap interface is not allowed in
 139          * "maintenance mode".
 140          */
 141         if (state->ts_operational_mode == TAVOR_MAINTENANCE_MODE) {
 142                 TNF_PROBE_0(tavor_devmap_maintenance_mode_fail,
 143                     TAVOR_TNF_ERROR, "");
 144                 TAVOR_TNF_EXIT(tavor_devmap);
 145                 return (EFAULT);
 146         }
 147 
 148         /*
 149          * The bottom bits of "offset" are undefined (number depends on
 150          * system PAGESIZE).  Shifting these off leaves us with a "key".
 151          * The "key" is actually a combination of both a real key value
 152          * (for the purpose of database lookup) and a "type" value.  We
 153          * extract this information before doing the database lookup.
 154          */
 155         key  = off >> PAGESHIFT;
 156         type = key & MLNX_UMAP_RSRC_TYPE_MASK;
 157         key  = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
 158         status = tavor_umap_db_find(instance, key, type, &value, 0, NULL);
 159         if (status == DDI_SUCCESS) {
 160                 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
 161 
 162                 switch (type) {
 163                 case MLNX_UMAP_UARPG_RSRC:
 164                         /*
 165                          * Double check that process who open()'d Tavor is
 166                          * same process attempting to mmap() UAR page.
 167                          */
 168                         if (key != ddi_get_pid()) {
 169                                 TNF_PROBE_0(tavor_devmap_uarpg_invpid_fail,
 170                                     TAVOR_TNF_ERROR, "");
 171                                 TAVOR_TNF_EXIT(tavor_devmap);
 172                                 return (EINVAL);
 173                         }
 174 
 175                         /* Map the UAR page out for userland access */
 176                         status = tavor_umap_uarpg(state, dhp, rsrcp, maplen,
 177                             &err);
 178                         if (status != DDI_SUCCESS) {
 179                                 TNF_PROBE_0(tavor_devmap_uarpg_map_fail,
 180                                     TAVOR_TNF_ERROR, "");
 181                                 TAVOR_TNF_EXIT(tavor_devmap);
 182                                 return (err);
 183                         }
 184                         break;
 185 
 186                 case MLNX_UMAP_CQMEM_RSRC:
 187                         /* Map the CQ memory out for userland access */
 188                         status = tavor_umap_cqmem(state, dhp, rsrcp, off,
 189                             maplen, &err);
 190                         if (status != DDI_SUCCESS) {
 191                                 TNF_PROBE_0(tavor_devmap_cqmem_map_fail,
 192                                     TAVOR_TNF_ERROR, "");
 193                                 TAVOR_TNF_EXIT(tavor_devmap);
 194                                 return (err);
 195                         }
 196                         break;
 197 
 198                 case MLNX_UMAP_QPMEM_RSRC:
 199                         /* Map the QP memory out for userland access */
 200                         status = tavor_umap_qpmem(state, dhp, rsrcp, off,
 201                             maplen, &err);
 202                         if (status != DDI_SUCCESS) {
 203                                 TNF_PROBE_0(tavor_devmap_qpmem_map_fail,
 204                                     TAVOR_TNF_ERROR, "");
 205                                 TAVOR_TNF_EXIT(tavor_devmap);
 206                                 return (err);
 207                         }
 208                         break;
 209 
 210                 case MLNX_UMAP_SRQMEM_RSRC:
 211                         /* Map the SRQ memory out for userland access */
 212                         status = tavor_umap_srqmem(state, dhp, rsrcp, off,
 213                             maplen, &err);
 214                         if (status != DDI_SUCCESS) {
 215                                 TNF_PROBE_0(tavor_devmap_srqmem_map_fail,
 216                                     TAVOR_TNF_ERROR, "");
 217                                 TAVOR_TNF_EXIT(tavor_devmap);
 218                                 return (err);
 219                         }
 220                         break;
 221 
 222                 default:
 223                         TAVOR_WARNING(state, "unexpected rsrc type in devmap");
 224                         TNF_PROBE_0(tavor_devmap_invrsrc_fail,
 225                             TAVOR_TNF_ERROR, "");
 226                         TAVOR_TNF_EXIT(tavor_devmap);
 227                         return (EINVAL);
 228                 }
 229         } else {
 230                 TNF_PROBE_0(tavor_devmap_umap_lookup_fail, TAVOR_TNF_ERROR, "");
 231                 TAVOR_TNF_EXIT(tavor_devmap);
 232                 return (EINVAL);
 233         }
 234 
 235         TAVOR_TNF_EXIT(tavor_devmap);
 236         return (0);
 237 }
 238 
 239 
 240 /*
 241  * tavor_umap_uarpg()
 242  *    Context: Can be called from user context.
 243  */
 244 static int
 245 tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
 246     tavor_rsrc_t *rsrcp, size_t *maplen, int *err)
 247 {
 248         int             status;
 249         uint_t          maxprot;
 250 
 251         TAVOR_TNF_ENTER(tavor_umap_uarpg);
 252 
 253         /* Map out the UAR page (doorbell page) */
 254         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 255         status = devmap_devmem_setup(dhp, state->ts_dip,
 256             &tavor_devmap_devmem_cbops, TAVOR_UAR_BAR, (rsrcp->tr_indx <<
 257             PAGESHIFT), PAGESIZE, maxprot, DEVMAP_ALLOW_REMAP,
 258             &state->ts_reg_accattr);
 259         if (status < 0) {
 260                 *err = status;
 261                 TNF_PROBE_0(tavor_umap_uarpg_devmap_fail, TAVOR_TNF_ERROR, "");
 262                 TAVOR_TNF_EXIT(tavor_umap_uarpg);
 263                 return (DDI_FAILURE);
 264         }
 265 
 266         *maplen = PAGESIZE;
 267         TAVOR_TNF_EXIT(tavor_umap_uarpg);
 268         return (DDI_SUCCESS);
 269 }
 270 
 271 
 272 /*
 273  * tavor_umap_cqmem()
 274  *    Context: Can be called from user context.
 275  */
 276 /* ARGSUSED */
 277 static int
 278 tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
 279     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
 280 {
 281         tavor_cqhdl_t   cq;
 282         size_t          size;
 283         uint_t          maxprot;
 284         int             status;
 285 
 286         TAVOR_TNF_ENTER(tavor_umap_cqmem);
 287 
 288         /* Extract the Tavor CQ handle pointer from the tavor_rsrc_t */
 289         cq = (tavor_cqhdl_t)rsrcp->tr_addr;
 290 
 291         /* Round-up the CQ size to system page size */
 292         size = ptob(btopr(cq->cq_cqinfo.qa_size));
 293 
 294         /* Map out the CQ memory */
 295         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 296         status = devmap_umem_setup(dhp, state->ts_dip,
 297             &tavor_devmap_umem_cbops, cq->cq_cqinfo.qa_umemcookie, 0, size,
 298             maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
 299         if (status < 0) {
 300                 *err = status;
 301                 TNF_PROBE_0(tavor_umap_cqmem_devmap_fail, TAVOR_TNF_ERROR, "");
 302                 TAVOR_TNF_EXIT(tavor_umap_cqmem);
 303                 return (DDI_FAILURE);
 304         }
 305         *maplen = size;
 306 
 307         TAVOR_TNF_EXIT(tavor_umap_cqmem);
 308         return (DDI_SUCCESS);
 309 }
 310 
 311 
 312 /*
 313  * tavor_umap_qpmem()
 314  *    Context: Can be called from user context.
 315  */
 316 /* ARGSUSED */
 317 static int
 318 tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
 319     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
 320 {
 321         tavor_qphdl_t   qp;
 322         offset_t        offset;
 323         size_t          size;
 324         uint_t          maxprot;
 325         int             status;
 326 
 327         TAVOR_TNF_ENTER(tavor_umap_qpmem);
 328 
 329         /* Extract the Tavor QP handle pointer from the tavor_rsrc_t */
 330         qp = (tavor_qphdl_t)rsrcp->tr_addr;
 331 
 332         /*
 333          * Calculate the offset of the first work queue (send or recv) into
 334          * the memory (ddi_umem_alloc()) allocated previously for the QP.
 335          */
 336         offset = (offset_t)((uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
 337             (uintptr_t)qp->qp_wqinfo.qa_buf_real);
 338 
 339         /* Round-up the QP work queue sizes to system page size */
 340         size = ptob(btopr(qp->qp_wqinfo.qa_size));
 341 
 342         /* Map out the QP memory */
 343         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 344         status = devmap_umem_setup(dhp, state->ts_dip,
 345             &tavor_devmap_umem_cbops, qp->qp_wqinfo.qa_umemcookie, offset,
 346             size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
 347         if (status < 0) {
 348                 *err = status;
 349                 TNF_PROBE_0(tavor_umap_qpmem_devmap_fail, TAVOR_TNF_ERROR, "");
 350                 TAVOR_TNF_EXIT(tavor_umap_qpmem);
 351                 return (DDI_FAILURE);
 352         }
 353         *maplen = size;
 354 
 355         TAVOR_TNF_EXIT(tavor_umap_qpmem);
 356         return (DDI_SUCCESS);
 357 }
 358 
 359 
 360 /*
 361  * tavor_umap_srqmem()
 362  *    Context: Can be called from user context.
 363  */
 364 /* ARGSUSED */
 365 static int
 366 tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
 367     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
 368 {
 369         tavor_srqhdl_t  srq;
 370         offset_t        offset;
 371         size_t          size;
 372         uint_t          maxprot;
 373         int             status;
 374 
 375         TAVOR_TNF_ENTER(tavor_umap_srqmem);
 376 
 377         /* Extract the Tavor SRQ handle pointer from the tavor_rsrc_t */
 378         srq = (tavor_srqhdl_t)rsrcp->tr_addr;
 379 
 380         /*
 381          * Calculate the offset of the first shared recv queue into the memory
 382          * (ddi_umem_alloc()) allocated previously for the SRQ.
 383          */
 384         offset = (offset_t)((uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
 385             (uintptr_t)srq->srq_wqinfo.qa_buf_real);
 386 
 387         /* Round-up the SRQ work queue sizes to system page size */
 388         size = ptob(btopr(srq->srq_wqinfo.qa_size));
 389 
 390         /* Map out the QP memory */
 391         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 392         status = devmap_umem_setup(dhp, state->ts_dip,
 393             &tavor_devmap_umem_cbops, srq->srq_wqinfo.qa_umemcookie, offset,
 394             size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
 395         if (status < 0) {
 396                 *err = status;
 397                 TNF_PROBE_0(tavor_umap_srqmem_devmap_fail, TAVOR_TNF_ERROR, "");
 398                 TAVOR_TNF_EXIT(tavor_umap_srqmem);
 399                 return (DDI_FAILURE);
 400         }
 401         *maplen = size;
 402 
 403         TAVOR_TNF_EXIT(tavor_umap_srqmem);
 404         return (DDI_SUCCESS);
 405 }
 406 
 407 
 408 /*
 409  * tavor_devmap_umem_map()
 410  *    Context: Can be called from kernel context.
 411  */
 412 /* ARGSUSED */
 413 static int
 414 tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
 415     offset_t off, size_t len, void **pvtp)
 416 {
 417         tavor_state_t           *state;
 418         tavor_devmap_track_t    *dvm_track;
 419         tavor_cqhdl_t           cq;
 420         tavor_qphdl_t           qp;
 421         tavor_srqhdl_t          srq;
 422         minor_t                 instance;
 423         uint64_t                key;
 424         uint_t                  type;
 425 
 426         TAVOR_TNF_ENTER(tavor_devmap_umem_map);
 427 
 428         /* Get Tavor softstate structure from instance */
 429         instance = TAVOR_DEV_INSTANCE(dev);
 430         state = ddi_get_soft_state(tavor_statep, instance);
 431         if (state == NULL) {
 432                 TNF_PROBE_0(tavor_devmap_umem_map_gss_fail, TAVOR_TNF_ERROR,
 433                     "");
 434                 TAVOR_TNF_EXIT(tavor_devmap_umem_map);
 435                 return (ENXIO);
 436         }
 437 
 438         /*
 439          * The bottom bits of "offset" are undefined (number depends on
 440          * system PAGESIZE).  Shifting these off leaves us with a "key".
 441          * The "key" is actually a combination of both a real key value
 442          * (for the purpose of database lookup) and a "type" value.  Although
 443          * we are not going to do any database lookup per se, we do want
 444          * to extract the "key" and the "type" (to enable faster lookup of
 445          * the appropriate CQ or QP handle).
 446          */
 447         key  = off >> PAGESHIFT;
 448         type = key & MLNX_UMAP_RSRC_TYPE_MASK;
 449         key  = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
 450 
 451         /*
 452          * Allocate an entry to track the mapping and unmapping (specifically,
 453          * partial unmapping) of this resource.
 454          */
 455         dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
 456             sizeof (tavor_devmap_track_t), KM_SLEEP);
 457         dvm_track->tdt_offset = off;
 458         dvm_track->tdt_state  = state;
 459         dvm_track->tdt_refcnt = 1;
 460         mutex_init(&dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
 461             DDI_INTR_PRI(state->ts_intrmsi_pri));
 462 
 463         /*
 464          * Depending of the type of resource that has been mapped out, we
 465          * need to update the QP or CQ handle to reflect that it has, in
 466          * fact, been mapped.  This allows the driver code which frees a QP
 467          * or a CQ to know whether it is appropriate to do a
 468          * devmap_devmem_remap() to invalidate the userland mapping for the
 469          * corresponding queue's memory.
 470          */
 471         if (type == MLNX_UMAP_CQMEM_RSRC) {
 472 
 473                 /* Use "key" (CQ number) to do fast lookup of CQ handle */
 474                 cq = tavor_cqhdl_from_cqnum(state, key);
 475 
 476                 /*
 477                  * Update the handle to the userland mapping.  Note:  If
 478                  * the CQ already has a valid userland mapping, then stop
 479                  * and return failure.
 480                  */
 481                 mutex_enter(&cq->cq_lock);
 482                 if (cq->cq_umap_dhp == NULL) {
 483                         cq->cq_umap_dhp = dhp;
 484                         dvm_track->tdt_size = cq->cq_cqinfo.qa_size;
 485                         mutex_exit(&cq->cq_lock);
 486                 } else {
 487                         mutex_exit(&cq->cq_lock);
 488                         goto umem_map_fail;
 489                 }
 490 
 491         } else if (type == MLNX_UMAP_QPMEM_RSRC) {
 492 
 493                 /* Use "key" (QP number) to do fast lookup of QP handle */
 494                 qp = tavor_qphdl_from_qpnum(state, key);
 495 
 496                 /*
 497                  * Update the handle to the userland mapping.  Note:  If
 498                  * the CQ already has a valid userland mapping, then stop
 499                  * and return failure.
 500                  */
 501                 mutex_enter(&qp->qp_lock);
 502                 if (qp->qp_umap_dhp == NULL) {
 503                         qp->qp_umap_dhp = dhp;
 504                         dvm_track->tdt_size = qp->qp_wqinfo.qa_size;
 505                         mutex_exit(&qp->qp_lock);
 506                 } else {
 507                         mutex_exit(&qp->qp_lock);
 508                         goto umem_map_fail;
 509                 }
 510 
 511         } else if (type == MLNX_UMAP_SRQMEM_RSRC) {
 512 
 513                 /* Use "key" (SRQ number) to do fast lookup on SRQ handle */
 514                 srq = tavor_srqhdl_from_srqnum(state, key);
 515 
 516                 /*
 517                  * Update the handle to the userland mapping.  Note:  If the
 518                  * SRQ already has a valid userland mapping, then stop and
 519                  * return failure.
 520                  */
 521                 mutex_enter(&srq->srq_lock);
 522                 if (srq->srq_umap_dhp == NULL) {
 523                         srq->srq_umap_dhp = dhp;
 524                         dvm_track->tdt_size = srq->srq_wqinfo.qa_size;
 525                         mutex_exit(&srq->srq_lock);
 526                 } else {
 527                         mutex_exit(&srq->srq_lock);
 528                         goto umem_map_fail;
 529                 }
 530         }
 531 
 532         /*
 533          * Pass the private "Tavor devmap tracking structure" back.  This
 534          * pointer will be returned in subsequent "unmap" callbacks.
 535          */
 536         *pvtp = dvm_track;
 537 
 538         TAVOR_TNF_EXIT(tavor_devmap_umem_map);
 539         return (DDI_SUCCESS);
 540 
 541 umem_map_fail:
 542         mutex_destroy(&dvm_track->tdt_lock);
 543         kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
 544         TAVOR_TNF_EXIT(tavor_devmap_umem_map);
 545         return (DDI_FAILURE);
 546 }
 547 
 548 
 549 /*
 550  * tavor_devmap_umem_dup()
 551  *    Context: Can be called from kernel context.
 552  */
 553 /* ARGSUSED */
 554 static int
 555 tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp, devmap_cookie_t new_dhp,
 556     void **new_pvtp)
 557 {
 558         tavor_state_t           *state;
 559         tavor_devmap_track_t    *dvm_track, *new_dvm_track;
 560         uint_t                  maxprot;
 561         int                     status;
 562 
 563         TAVOR_TNF_ENTER(tavor_devmap_umem_dup);
 564 
 565         /*
 566          * Extract the Tavor softstate pointer from "Tavor devmap tracking
 567          * structure" (in "pvtp").
 568          */
 569         dvm_track = (tavor_devmap_track_t *)pvtp;
 570         state = dvm_track->tdt_state;
 571 
 572         /*
 573          * Since this devmap_dup() entry point is generally called
 574          * when a process does fork(2), it is incumbent upon the driver
 575          * to insure that the child does not inherit a valid copy of
 576          * the parent's QP or CQ resource.  This is accomplished by using
 577          * devmap_devmem_remap() to invalidate the child's mapping to the
 578          * kernel memory.
 579          */
 580         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 581         status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
 582             dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
 583         if (status != DDI_SUCCESS) {
 584                 TAVOR_WARNING(state, "failed in tavor_devmap_umem_dup()");
 585                 TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
 586                 return (status);
 587         }
 588 
 589         /*
 590          * Allocate a new entry to track the subsequent unmapping
 591          * (specifically, all partial unmappings) of the child's newly
 592          * invalidated resource.  Note: Setting the "tdt_size" field to
 593          * zero here is an indication to the devmap_unmap() entry point
 594          * that this mapping is invalid, and that its subsequent unmapping
 595          * should not affect any of the parent's CQ or QP resources.
 596          */
 597         new_dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
 598             sizeof (tavor_devmap_track_t), KM_SLEEP);
 599         new_dvm_track->tdt_offset = 0;
 600         new_dvm_track->tdt_state  = state;
 601         new_dvm_track->tdt_refcnt = 1;
 602         new_dvm_track->tdt_size        = 0;
 603         mutex_init(&new_dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
 604             DDI_INTR_PRI(state->ts_intrmsi_pri));
 605         *new_pvtp = new_dvm_track;
 606 
 607         TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
 608         return (DDI_SUCCESS);
 609 }
 610 
 611 
 612 /*
 613  * tavor_devmap_umem_unmap()
 614  *    Context: Can be called from kernel context.
 615  */
 616 /* ARGSUSED */
 617 static void
 618 tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
 619     size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
 620     devmap_cookie_t new_dhp2, void **pvtp2)
 621 {
 622         tavor_state_t           *state;
 623         tavor_rsrc_t            *rsrcp;
 624         tavor_devmap_track_t    *dvm_track;
 625         tavor_cqhdl_t           cq;
 626         tavor_qphdl_t           qp;
 627         tavor_srqhdl_t          srq;
 628         uint64_t                key, value;
 629         uint_t                  type;
 630         uint_t                  size;
 631         int                     status;
 632 
 633         TAVOR_TNF_ENTER(tavor_devmap_umem_unmap);
 634 
 635         /*
 636          * Extract the Tavor softstate pointer from "Tavor devmap tracking
 637          * structure" (in "pvtp").
 638          */
 639         dvm_track = (tavor_devmap_track_t *)pvtp;
 640         state     = dvm_track->tdt_state;
 641 
 642         /*
 643          * Extract the "offset" from the "Tavor devmap tracking structure".
 644          * Note: The input argument "off" is ignored here because the
 645          * Tavor mapping interfaces define a very specific meaning to
 646          * each "logical offset".  Also extract the "key" and "type" encoded
 647          * in the logical offset.
 648          */
 649         key  = dvm_track->tdt_offset >> PAGESHIFT;
 650         type = key & MLNX_UMAP_RSRC_TYPE_MASK;
 651         key  = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
 652 
 653         /*
 654          * Extract the "size" of the mapping.  If this size is determined
 655          * to be zero, then it is an indication of a previously invalidated
 656          * mapping, and no CQ or QP resources should be affected.
 657          */
 658         size = dvm_track->tdt_size;
 659 
 660         /*
 661          * If only the "middle portion of a given mapping is being unmapped,
 662          * then we are effectively creating one new piece of mapped memory.
 663          * (Original region is divided into three pieces of which the middle
 664          * piece is being removed.  This leaves two pieces.  Since we started
 665          * with one piece and now have two pieces, we need to increment the
 666          * counter in the "Tavor devmap tracking structure".
 667          *
 668          * If, however, the whole mapped region is being unmapped, then we
 669          * have started with one region which we are completely removing.
 670          * In this case, we need to decrement the counter in the "Tavor
 671          * devmap tracking structure".
 672          *
 673          * In each of the remaining cases, we will have started with one
 674          * mapped region and ended with one (different) region.  So no counter
 675          * modification is necessary.
 676          */
 677         mutex_enter(&dvm_track->tdt_lock);
 678         if ((new_dhp1 == NULL) && (new_dhp2 == NULL)) {
 679                 dvm_track->tdt_refcnt--;
 680         } else if ((new_dhp1 != NULL) && (new_dhp2 != NULL)) {
 681                 dvm_track->tdt_refcnt++;
 682         }
 683         mutex_exit(&dvm_track->tdt_lock);
 684 
 685         /*
 686          * For each of the cases where the region is being divided, then we
 687          * need to pass back the "Tavor devmap tracking structure".  This way
 688          * we get it back when each of the remaining pieces is subsequently
 689          * unmapped.
 690          */
 691         if (new_dhp1 != NULL) {
 692                 *pvtp1 = pvtp;
 693         }
 694         if (new_dhp2 != NULL) {
 695                 *pvtp2 = pvtp;
 696         }
 697 
 698         /*
 699          * If the "Tavor devmap tracking structure" is no longer being
 700          * referenced, then free it up.  Otherwise, return.
 701          */
 702         if (dvm_track->tdt_refcnt == 0) {
 703                 mutex_destroy(&dvm_track->tdt_lock);
 704                 kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
 705 
 706                 /*
 707                  * If the mapping was invalid (see explanation above), then
 708                  * no further processing is necessary.
 709                  */
 710                 if (size == 0) {
 711                         TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
 712                         return;
 713                 }
 714         } else {
 715                 TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
 716                 return;
 717         }
 718 
 719         /*
 720          * Now that we can guarantee that the user memory is fully unmapped,
 721          * we can use the "key" and "type" values to try to find the entry
 722          * in the "userland resources database".  If it's found, then it
 723          * indicates that the queue memory (CQ or QP) has not yet been freed.
 724          * In this case, we update the corresponding CQ or QP handle to
 725          * indicate that the "devmap_devmem_remap()" call will be unnecessary.
 726          * If it's _not_ found, then it indicates that the CQ or QP memory
 727          * was, in fact, freed before it was unmapped (thus requiring a
 728          * previous invalidation by remapping - which will already have
 729          * been done in the free routine).
 730          */
 731         status = tavor_umap_db_find(state->ts_instance, key, type, &value,
 732             0, NULL);
 733         if (status == DDI_SUCCESS) {
 734                 /*
 735                  * Depending on the type of the mapped resource (CQ or QP),
 736                  * update handle to indicate that no invalidation remapping
 737                  * will be necessary.
 738                  */
 739                 if (type == MLNX_UMAP_CQMEM_RSRC) {
 740 
 741                         /* Use "value" to convert to CQ handle */
 742                         rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
 743                         cq = (tavor_cqhdl_t)rsrcp->tr_addr;
 744 
 745                         /*
 746                          * Invalidate the handle to the userland mapping.
 747                          * Note: We must ensure that the mapping being
 748                          * unmapped here is the current one for the CQ.  It
 749                          * is possible that it might not be if this CQ has
 750                          * been resized and the previous CQ memory has not
 751                          * yet been unmapped.  But in that case, because of
 752                          * the devmap_devmem_remap(), there is no longer any
 753                          * association between the mapping and the real CQ
 754                          * kernel memory.
 755                          */
 756                         mutex_enter(&cq->cq_lock);
 757                         if (cq->cq_umap_dhp == dhp) {
 758                                 cq->cq_umap_dhp = (devmap_cookie_t)NULL;
 759                         }
 760                         mutex_exit(&cq->cq_lock);
 761 
 762                 } else if (type == MLNX_UMAP_QPMEM_RSRC) {
 763 
 764                         /* Use "value" to convert to QP handle */
 765                         rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
 766                         qp = (tavor_qphdl_t)rsrcp->tr_addr;
 767 
 768                         /*
 769                          * Invalidate the handle to the userland mapping.
 770                          * Note: we ensure that the mapping being unmapped
 771                          * here is the current one for the QP.  This is
 772                          * more of a sanity check here since, unlike CQs
 773                          * (above) we do not support resize of QPs.
 774                          */
 775                         mutex_enter(&qp->qp_lock);
 776                         if (qp->qp_umap_dhp == dhp) {
 777                                 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
 778                         }
 779                         mutex_exit(&qp->qp_lock);
 780 
 781                 } else if (type == MLNX_UMAP_SRQMEM_RSRC) {
 782 
 783                         /* Use "value" to convert to SRQ handle */
 784                         rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
 785                         srq = (tavor_srqhdl_t)rsrcp->tr_addr;
 786 
 787                         /*
 788                          * Invalidate the handle to the userland mapping.
 789                          * Note: we ensure that the mapping being unmapped
 790                          * here is the current one for the QP.  This is
 791                          * more of a sanity check here since, unlike CQs
 792                          * (above) we do not support resize of QPs.
 793                          */
 794                         mutex_enter(&srq->srq_lock);
 795                         if (srq->srq_umap_dhp == dhp) {
 796                                 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
 797                         }
 798                         mutex_exit(&srq->srq_lock);
 799                 }
 800         }
 801 
 802         TAVOR_TNF_EXIT(tavor_devmap_umem_unmap);
 803 }
 804 
 805 
 806 /*
 807  * tavor_devmap_devmem_map()
 808  *    Context: Can be called from kernel context.
 809  */
 810 /* ARGSUSED */
 811 static int
 812 tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
 813     offset_t off, size_t len, void **pvtp)
 814 {
 815         tavor_state_t           *state;
 816         tavor_devmap_track_t    *dvm_track;
 817         minor_t                 instance;
 818 
 819         TAVOR_TNF_ENTER(tavor_devmap_devmem_map);
 820 
 821         /* Get Tavor softstate structure from instance */
 822         instance = TAVOR_DEV_INSTANCE(dev);
 823         state = ddi_get_soft_state(tavor_statep, instance);
 824         if (state == NULL) {
 825                 TNF_PROBE_0(tavor_devmap_devmem_map_gss_fail, TAVOR_TNF_ERROR,
 826                     "");
 827                 TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
 828                 return (ENXIO);
 829         }
 830 
 831         /*
 832          * Allocate an entry to track the mapping and unmapping of this
 833          * resource.  Note:  We don't need to initialize the "refcnt" or
 834          * "offset" fields here, nor do we need to initialize the mutex
 835          * used with the "refcnt".  Since UAR pages are single pages, they
 836          * are not subject to "partial" unmappings.  This makes these other
 837          * fields unnecessary.
 838          */
 839         dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
 840             sizeof (tavor_devmap_track_t), KM_SLEEP);
 841         dvm_track->tdt_state  = state;
 842         dvm_track->tdt_size   = PAGESIZE;
 843 
 844         /*
 845          * Pass the private "Tavor devmap tracking structure" back.  This
 846          * pointer will be returned in a subsequent "unmap" callback.
 847          */
 848         *pvtp = dvm_track;
 849 
 850         TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
 851         return (DDI_SUCCESS);
 852 }
 853 
 854 
 855 /*
 856  * tavor_devmap_devmem_dup()
 857  *    Context: Can be called from kernel context.
 858  */
 859 /* ARGSUSED */
 860 static int
 861 tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
 862     devmap_cookie_t new_dhp, void **new_pvtp)
 863 {
 864         tavor_state_t           *state;
 865         tavor_devmap_track_t    *dvm_track;
 866         uint_t                  maxprot;
 867         int                     status;
 868 
 869         TAVOR_TNF_ENTER(tavor_devmap_devmem_dup);
 870 
 871         /*
 872          * Extract the Tavor softstate pointer from "Tavor devmap tracking
 873          * structure" (in "pvtp").  Note: If the tracking structure is NULL
 874          * here, it means that the mapping corresponds to an invalid mapping.
 875          * In this case, it can be safely ignored ("new_pvtp" set to NULL).
 876          */
 877         dvm_track = (tavor_devmap_track_t *)pvtp;
 878         if (dvm_track == NULL) {
 879                 *new_pvtp = NULL;
 880                 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
 881                 return (DDI_SUCCESS);
 882         }
 883 
 884         state = dvm_track->tdt_state;
 885 
 886         /*
 887          * Since this devmap_dup() entry point is generally called
 888          * when a process does fork(2), it is incumbent upon the driver
 889          * to insure that the child does not inherit a valid copy of
 890          * the parent's resource.  This is accomplished by using
 891          * devmap_devmem_remap() to invalidate the child's mapping to the
 892          * kernel memory.
 893          */
 894         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 895         status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
 896             dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
 897         if (status != DDI_SUCCESS) {
 898                 TAVOR_WARNING(state, "failed in tavor_devmap_devmem_dup()");
 899                 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
 900                 return (status);
 901         }
 902 
 903         /*
 904          * Since the region is invalid, there is no need for us to
 905          * allocate and continue to track an additional "Tavor devmap
 906          * tracking structure".  Instead we return NULL here, which is an
 907          * indication to the devmap_unmap() entry point that this entry
 908          * can be safely ignored.
 909          */
 910         *new_pvtp = NULL;
 911 
 912         TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
 913         return (DDI_SUCCESS);
 914 }
 915 
 916 
 917 /*
 918  * tavor_devmap_devmem_unmap()
 919  *    Context: Can be called from kernel context.
 920  */
 921 /* ARGSUSED */
 922 static void
 923 tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
 924     size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
 925     devmap_cookie_t new_dhp2, void **pvtp2)
 926 {
 927         tavor_devmap_track_t    *dvm_track;
 928 
 929         TAVOR_TNF_ENTER(tavor_devmap_devmem_unmap);
 930 
 931         /*
 932          * Free up the "Tavor devmap tracking structure" (in "pvtp").
 933          * There cannot be "partial" unmappings here because all UAR pages
 934          * are single pages.  Note: If the tracking structure is NULL here,
 935          * it means that the mapping corresponds to an invalid mapping.  In
 936          * this case, it can be safely ignored.
 937          */
 938         dvm_track = (tavor_devmap_track_t *)pvtp;
 939         if (dvm_track == NULL) {
 940                 TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
 941                 return;
 942         }
 943 
 944         kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
 945         TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
 946 }
 947 
 948 
 949 /*
 950  * tavor_umap_ci_data_in()
 951  *    Context: Can be called from user or kernel context.
 952  */
 953 /* ARGSUSED */
 954 ibt_status_t
 955 tavor_umap_ci_data_in(tavor_state_t *state, ibt_ci_data_flags_t flags,
 956     ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
 957 {
 958         int     status;
 959 
 960         TAVOR_TNF_ENTER(tavor_umap_ci_data_in);
 961 
 962         /*
 963          * Depending on the type of object about which additional information
 964          * is being provided (currently only MR is supported), we call the
 965          * appropriate resource-specific function.
 966          */
 967         switch (object) {
 968         case IBT_HDL_MR:
 969                 status = tavor_umap_mr_data_in((tavor_mrhdl_t)hdl,
 970                     (ibt_mr_data_in_t *)data_p, data_sz);
 971                 if (status != DDI_SUCCESS) {
 972                         TNF_PROBE_0(tavor_umap_mr_data_in_fail,
 973                             TAVOR_TNF_ERROR, "");
 974                         TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
 975                         return (status);
 976                 }
 977                 break;
 978 
 979         /*
 980          * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
 981          * since the Tavor driver does not support these.
 982          */
 983         case IBT_HDL_HCA:
 984         case IBT_HDL_QP:
 985         case IBT_HDL_CQ:
 986         case IBT_HDL_PD:
 987         case IBT_HDL_MW:
 988         case IBT_HDL_AH:
 989         case IBT_HDL_SCHED:
 990         case IBT_HDL_EEC:
 991         case IBT_HDL_RDD:
 992         case IBT_HDL_SRQ:
 993                 TNF_PROBE_0(tavor_umap_ci_data_in_unsupp_type,
 994                     TAVOR_TNF_ERROR, "");
 995                 TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
 996                 return (IBT_NOT_SUPPORTED);
 997 
 998         /*
 999          * Any other types are invalid.
1000          */
1001         default:
1002                 TNF_PROBE_0(tavor_umap_ci_data_in_invtype_fail,
1003                     TAVOR_TNF_ERROR, "");
1004                 TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1005                 return (IBT_INVALID_PARAM);
1006         }
1007 
1008         TAVOR_TNF_EXIT(tavor_umap_ci_data_in);
1009         return (DDI_SUCCESS);
1010 }
1011 
1012 
1013 /*
1014  * tavor_umap_mr_data_in()
1015  *    Context: Can be called from user or kernel context.
1016  */
1017 static ibt_status_t
1018 tavor_umap_mr_data_in(tavor_mrhdl_t mr, ibt_mr_data_in_t *data,
1019     size_t data_sz)
1020 {
1021         TAVOR_TNF_ENTER(tavor_umap_mr_data_in);
1022 
1023         if (data->mr_rev != IBT_MR_DATA_IN_IF_VERSION) {
1024                 TNF_PROBE_0(tavor_umap_mr_data_in_ver_fail,
1025                     TAVOR_TNF_ERROR, "");
1026                 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1027                 return (IBT_NOT_SUPPORTED);
1028         }
1029 
1030         /* Check for valid MR handle pointer */
1031         if (mr == NULL) {
1032                 TNF_PROBE_0(tavor_umap_mr_data_in_invmrhdl_fail,
1033                     TAVOR_TNF_ERROR, "");
1034                 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1035                 return (IBT_MR_HDL_INVALID);
1036         }
1037 
1038         /* Check for valid MR input structure size */
1039         if (data_sz < sizeof (ibt_mr_data_in_t)) {
1040                 TNF_PROBE_0(tavor_umap_mr_data_in_invdatasz_fail,
1041                     TAVOR_TNF_ERROR, "");
1042                 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1043                 return (IBT_INSUFF_RESOURCE);
1044         }
1045 
1046         /*
1047          * Ensure that the MR corresponds to userland memory and that it is
1048          * a currently valid memory region as well.
1049          */
1050         mutex_enter(&mr->mr_lock);
1051         if ((mr->mr_is_umem == 0) || (mr->mr_umemcookie == NULL)) {
1052                 mutex_exit(&mr->mr_lock);
1053                 TNF_PROBE_0(tavor_umap_mr_data_in_invumem_fail,
1054                     TAVOR_TNF_ERROR, "");
1055                 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1056                 return (IBT_MR_HDL_INVALID);
1057         }
1058 
1059         /*
1060          * If it has passed all the above checks, then extract the callback
1061          * function and argument from the input structure.  Copy them into
1062          * the MR handle.  This function will be called only if the memory
1063          * corresponding to the MR handle gets a umem_lockmemory() callback.
1064          */
1065         mr->mr_umem_cbfunc = data->mr_func;
1066         mr->mr_umem_cbarg1 = data->mr_arg1;
1067         mr->mr_umem_cbarg2 = data->mr_arg2;
1068         mutex_exit(&mr->mr_lock);
1069 
1070         TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1071         return (DDI_SUCCESS);
1072 }
1073 
1074 
1075 /*
1076  * tavor_umap_ci_data_out()
1077  *    Context: Can be called from user or kernel context.
1078  */
1079 /* ARGSUSED */
1080 ibt_status_t
1081 tavor_umap_ci_data_out(tavor_state_t *state, ibt_ci_data_flags_t flags,
1082     ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
1083 {
1084         int     status;
1085 
1086         TAVOR_TNF_ENTER(tavor_umap_ci_data_out);
1087 
1088         /*
1089          * Depending on the type of object about which additional information
1090          * is being requested (CQ or QP), we call the appropriate resource-
1091          * specific mapping function.
1092          */
1093         switch (object) {
1094         case IBT_HDL_CQ:
1095                 status = tavor_umap_cq_data_out((tavor_cqhdl_t)hdl,
1096                     (mlnx_umap_cq_data_out_t *)data_p, data_sz);
1097                 if (status != DDI_SUCCESS) {
1098                         TNF_PROBE_0(tavor_umap_cq_data_out_fail,
1099                             TAVOR_TNF_ERROR, "");
1100                         TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1101                         return (status);
1102                 }
1103                 break;
1104 
1105         case IBT_HDL_QP:
1106                 status = tavor_umap_qp_data_out((tavor_qphdl_t)hdl,
1107                     (mlnx_umap_qp_data_out_t *)data_p, data_sz);
1108                 if (status != DDI_SUCCESS) {
1109                         TNF_PROBE_0(tavor_umap_qp_data_out_fail,
1110                             TAVOR_TNF_ERROR, "");
1111                         TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1112                         return (status);
1113                 }
1114                 break;
1115 
1116         case IBT_HDL_SRQ:
1117                 status = tavor_umap_srq_data_out((tavor_srqhdl_t)hdl,
1118                     (mlnx_umap_srq_data_out_t *)data_p, data_sz);
1119                 if (status != DDI_SUCCESS) {
1120                         TNF_PROBE_0(tavor_umap_srq_data_out_fail,
1121                             TAVOR_TNF_ERROR, "");
1122                         TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1123                         return (status);
1124                 }
1125                 break;
1126 
1127         /*
1128          * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
1129          * since the Tavor driver does not support these.
1130          */
1131         case IBT_HDL_PD:
1132                 status = tavor_umap_pd_data_out((tavor_pdhdl_t)hdl,
1133                     (mlnx_umap_pd_data_out_t *)data_p, data_sz);
1134                 if (status != DDI_SUCCESS) {
1135                         TNF_PROBE_0(tavor_umap_pd_data_out_fail,
1136                             TAVOR_TNF_ERROR, "");
1137                         TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1138                         return (status);
1139                 }
1140                 break;
1141 
1142         case IBT_HDL_HCA:
1143         case IBT_HDL_MR:
1144         case IBT_HDL_MW:
1145         case IBT_HDL_AH:
1146         case IBT_HDL_SCHED:
1147         case IBT_HDL_EEC:
1148         case IBT_HDL_RDD:
1149                 TNF_PROBE_0(tavor_umap_ci_data_out_unsupp_type,
1150                     TAVOR_TNF_ERROR, "");
1151                 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1152                 return (IBT_NOT_SUPPORTED);
1153 
1154         /*
1155          * Any other types are invalid.
1156          */
1157         default:
1158                 TNF_PROBE_0(tavor_umap_ci_data_out_invtype_fail,
1159                     TAVOR_TNF_ERROR, "");
1160                 TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1161                 return (IBT_INVALID_PARAM);
1162         }
1163 
1164         TAVOR_TNF_EXIT(tavor_umap_ci_data_out);
1165         return (DDI_SUCCESS);
1166 }
1167 
1168 
1169 /*
1170  * tavor_umap_cq_data_out()
1171  *    Context: Can be called from user or kernel context.
1172  */
1173 static ibt_status_t
1174 tavor_umap_cq_data_out(tavor_cqhdl_t cq, mlnx_umap_cq_data_out_t *data,
1175     size_t data_sz)
1176 {
1177         TAVOR_TNF_ENTER(tavor_umap_cq_data_out);
1178 
1179         /* Check for valid CQ handle pointer */
1180         if (cq == NULL) {
1181                 TNF_PROBE_0(tavor_umap_cq_data_out_invcqhdl_fail,
1182                     TAVOR_TNF_ERROR, "");
1183                 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1184                 return (IBT_CQ_HDL_INVALID);
1185         }
1186 
1187         /* Check for valid CQ mapping structure size */
1188         if (data_sz < sizeof (mlnx_umap_cq_data_out_t)) {
1189                 TNF_PROBE_0(tavor_umap_cq_data_out_invdatasz_fail,
1190                     TAVOR_TNF_ERROR, "");
1191                 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1192                 return (IBT_INSUFF_RESOURCE);
1193         }
1194 
1195         /*
1196          * If it has passed all the above checks, then fill in all the useful
1197          * mapping information (including the mapping offset that will be
1198          * passed back to the devmap() interface during a subsequent mmap()
1199          * call.
1200          *
1201          * The "offset" for CQ mmap()'s looks like this:
1202          * +----------------------------------------+--------+--------------+
1203          * |               CQ Number                |  0x33  | Reserved (0) |
1204          * +----------------------------------------+--------+--------------+
1205          *         (64 - 8 - PAGESHIFT) bits        8 bits      PAGESHIFT bits
1206          *
1207          * This returns information about the mapping offset, the length of
1208          * the CQ memory, the CQ number (for use in later CQ doorbells), the
1209          * number of CQEs the CQ memory can hold, and the size of each CQE.
1210          */
1211         data->mcq_rev                = MLNX_UMAP_IF_VERSION;
1212         data->mcq_mapoffset  = ((((uint64_t)cq->cq_cqnum <<
1213             MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_CQMEM_RSRC) << PAGESHIFT);
1214         data->mcq_maplen     = cq->cq_cqinfo.qa_size;
1215         data->mcq_cqnum              = cq->cq_cqnum;
1216         data->mcq_numcqe     = cq->cq_bufsz;
1217         data->mcq_cqesz              = sizeof (tavor_hw_cqe_t);
1218 
1219         TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1220         return (DDI_SUCCESS);
1221 }
1222 
1223 
1224 /*
1225  * tavor_umap_qp_data_out()
1226  *    Context: Can be called from user or kernel context.
1227  */
1228 static ibt_status_t
1229 tavor_umap_qp_data_out(tavor_qphdl_t qp, mlnx_umap_qp_data_out_t *data,
1230     size_t data_sz)
1231 {
1232         TAVOR_TNF_ENTER(tavor_umap_qp_data_out);
1233 
1234         /* Check for valid QP handle pointer */
1235         if (qp == NULL) {
1236                 TNF_PROBE_0(tavor_umap_qp_data_out_invqphdl_fail,
1237                     TAVOR_TNF_ERROR, "");
1238                 TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1239                 return (IBT_QP_HDL_INVALID);
1240         }
1241 
1242         /* Check for valid QP mapping structure size */
1243         if (data_sz < sizeof (mlnx_umap_qp_data_out_t)) {
1244                 TNF_PROBE_0(tavor_umap_qp_data_out_invdatasz_fail,
1245                     TAVOR_TNF_ERROR, "");
1246                 TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1247                 return (IBT_INSUFF_RESOURCE);
1248         }
1249 
1250         /*
1251          * If it has passed all the checks, then fill in all the useful
1252          * mapping information (including the mapping offset that will be
1253          * passed back to the devmap() interface during a subsequent mmap()
1254          * call.
1255          *
1256          * The "offset" for QP mmap()'s looks like this:
1257          * +----------------------------------------+--------+--------------+
1258          * |               QP Number                |  0x44  | Reserved (0) |
1259          * +----------------------------------------+--------+--------------+
1260          *         (64 - 8 - PAGESHIFT) bits        8 bits      PAGESHIFT bits
1261          *
1262          * This returns information about the mapping offset, the length of
1263          * the QP memory, and the QP number (for use in later send and recv
1264          * doorbells).  It also returns the following information for both
1265          * the receive work queue and the send work queue, respectively:  the
1266          * offset (from the base mapped address) of the start of the given
1267          * work queue, the 64-bit IB virtual address that corresponds to
1268          * the base mapped address (needed for posting WQEs though the
1269          * QP doorbells), the number of WQEs the given work queue can hold,
1270          * and the size of each WQE for the given work queue.
1271          */
1272         data->mqp_rev                = MLNX_UMAP_IF_VERSION;
1273         data->mqp_mapoffset  = ((((uint64_t)qp->qp_qpnum <<
1274             MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_QPMEM_RSRC) << PAGESHIFT);
1275         data->mqp_maplen     = qp->qp_wqinfo.qa_size;
1276         data->mqp_qpnum              = qp->qp_qpnum;
1277 
1278         /*
1279          * If this QP is associated with a shared receive queue (SRQ),
1280          * then return invalid RecvQ parameters.  Otherwise, return
1281          * the proper parameter values.
1282          */
1283         if (qp->qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
1284                 data->mqp_rq_off     = (uint32_t)qp->qp_wqinfo.qa_size;
1285                 data->mqp_rq_desc_addr       = (uint32_t)qp->qp_wqinfo.qa_size;
1286                 data->mqp_rq_numwqe  = 0;
1287                 data->mqp_rq_wqesz   = 0;
1288         } else {
1289                 data->mqp_rq_off     = (uintptr_t)qp->qp_rq_buf -
1290                     (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1291                 data->mqp_rq_desc_addr       = (uint32_t)((uintptr_t)qp->qp_rq_buf -
1292                     qp->qp_desc_off);
1293                 data->mqp_rq_numwqe  = qp->qp_rq_bufsz;
1294                 data->mqp_rq_wqesz   = (1 << qp->qp_rq_log_wqesz);
1295         }
1296         data->mqp_sq_off     = (uintptr_t)qp->qp_sq_buf -
1297             (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1298         data->mqp_sq_desc_addr       = (uint32_t)((uintptr_t)qp->qp_sq_buf -
1299             qp->qp_desc_off);
1300         data->mqp_sq_numwqe  = qp->qp_sq_bufsz;
1301         data->mqp_sq_wqesz   = (1 << qp->qp_sq_log_wqesz);
1302 
1303         TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1304         return (DDI_SUCCESS);
1305 }
1306 
1307 
1308 /*
1309  * tavor_umap_srq_data_out()
1310  *    Context: Can be called from user or kernel context.
1311  */
1312 static ibt_status_t
1313 tavor_umap_srq_data_out(tavor_srqhdl_t srq, mlnx_umap_srq_data_out_t *data,
1314     size_t data_sz)
1315 {
1316         TAVOR_TNF_ENTER(tavor_umap_srq_data_out);
1317 
1318         /* Check for valid SRQ handle pointer */
1319         if (srq == NULL) {
1320                 TNF_PROBE_0(tavor_umap_srq_data_out_invsrqhdl_fail,
1321                     TAVOR_TNF_ERROR, "");
1322                 TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1323                 return (IBT_SRQ_HDL_INVALID);
1324         }
1325 
1326         /* Check for valid SRQ mapping structure size */
1327         if (data_sz < sizeof (mlnx_umap_srq_data_out_t)) {
1328                 TNF_PROBE_0(tavor_umap_srq_data_out_invdatasz_fail,
1329                     TAVOR_TNF_ERROR, "");
1330                 TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1331                 return (IBT_INSUFF_RESOURCE);
1332         }
1333 
1334         /*
1335          * If it has passed all the checks, then fill in all the useful
1336          * mapping information (including the mapping offset that will be
1337          * passed back to the devmap() interface during a subsequent mmap()
1338          * call.
1339          *
1340          * The "offset" for SRQ mmap()'s looks like this:
1341          * +----------------------------------------+--------+--------------+
1342          * |               SRQ Number               |  0x66  | Reserved (0) |
1343          * +----------------------------------------+--------+--------------+
1344          *         (64 - 8 - PAGESHIFT) bits        8 bits      PAGESHIFT bits
1345          *
1346          * This returns information about the mapping offset, the length of the
1347          * SRQ memory, and the SRQ number (for use in later send and recv
1348          * doorbells).  It also returns the following information for the
1349          * shared receive queue: the offset (from the base mapped address) of
1350          * the start of the given work queue, the 64-bit IB virtual address
1351          * that corresponds to the base mapped address (needed for posting WQEs
1352          * though the QP doorbells), the number of WQEs the given work queue
1353          * can hold, and the size of each WQE for the given work queue.
1354          */
1355         data->msrq_rev               = MLNX_UMAP_IF_VERSION;
1356         data->msrq_mapoffset = ((((uint64_t)srq->srq_srqnum <<
1357             MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_SRQMEM_RSRC) << PAGESHIFT);
1358         data->msrq_maplen    = srq->srq_wqinfo.qa_size;
1359         data->msrq_srqnum    = srq->srq_srqnum;
1360 
1361         data->msrq_desc_addr = (uint32_t)((uintptr_t)srq->srq_wq_buf -
1362             srq->srq_desc_off);
1363         data->msrq_numwqe    = srq->srq_wq_bufsz;
1364         data->msrq_wqesz     = (1 << srq->srq_wq_log_wqesz);
1365 
1366         TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1367         return (DDI_SUCCESS);
1368 }
1369 
1370 /*
1371  * tavor_umap_pd_data_out()
1372  *    Context: Can be called from user or kernel context.
1373  */
1374 static ibt_status_t
1375 tavor_umap_pd_data_out(tavor_pdhdl_t pd, mlnx_umap_pd_data_out_t *data,
1376     size_t data_sz)
1377 {
1378         TAVOR_TNF_ENTER(tavor_umap_pd_data_out);
1379 
1380         /* Check for valid PD handle pointer */
1381         if (pd == NULL) {
1382                 TNF_PROBE_0(tavor_umap_pd_data_out_invpdhdl_fail,
1383                     TAVOR_TNF_ERROR, "");
1384                 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1385                 return (IBT_PD_HDL_INVALID);
1386         }
1387 
1388         /* Check for valid PD mapping structure size */
1389         if (data_sz < sizeof (mlnx_umap_pd_data_out_t)) {
1390                 TNF_PROBE_0(tavor_umap_pd_data_out_invdatasz_fail,
1391                     TAVOR_TNF_ERROR, "");
1392                 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1393                 return (IBT_INSUFF_RESOURCE);
1394         }
1395 
1396         /*
1397          * If it has passed all the checks, then fill the PD table index
1398          * (the PD table allocated index for the PD pd_pdnum)
1399          */
1400         data->mpd_rev        = MLNX_UMAP_IF_VERSION;
1401         data->mpd_pdnum      = pd->pd_pdnum;
1402 
1403         TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1404         return (DDI_SUCCESS);
1405 }
1406 
1407 /*
1408  * tavor_umap_db_init()
1409  *    Context: Only called from attach() path context
1410  */
1411 void
1412 tavor_umap_db_init(void)
1413 {
1414         TAVOR_TNF_ENTER(tavor_umap_db_init);
1415 
1416         /*
1417          * Initialize the lock used by the Tavor "userland resources database"
1418          * This is used to ensure atomic access to add, remove, and find
1419          * entries in the database.
1420          */
1421         mutex_init(&tavor_userland_rsrc_db.tdl_umapdb_lock, NULL,
1422             MUTEX_DRIVER, NULL);
1423 
1424         /*
1425          * Initialize the AVL tree used for the "userland resources
1426          * database".  Using an AVL tree here provides the ability to
1427          * scale the database size to large numbers of resources.  The
1428          * entries in the tree are "tavor_umap_db_entry_t".
1429          * The tree is searched with the help of the
1430          * tavor_umap_db_compare() routine.
1431          */
1432         avl_create(&tavor_userland_rsrc_db.tdl_umapdb_avl,
1433             tavor_umap_db_compare, sizeof (tavor_umap_db_entry_t),
1434             offsetof(tavor_umap_db_entry_t, tdbe_avlnode));
1435 
1436         TAVOR_TNF_EXIT(tavor_umap_db_init);
1437 }
1438 
1439 
1440 /*
1441  * tavor_umap_db_fini()
1442  *    Context: Only called from attach() and/or detach() path contexts
1443  */
1444 void
1445 tavor_umap_db_fini(void)
1446 {
1447         TAVOR_TNF_ENTER(tavor_umap_db_fini);
1448 
1449         /* Destroy the AVL tree for the "userland resources database" */
1450         avl_destroy(&tavor_userland_rsrc_db.tdl_umapdb_avl);
1451 
1452         /* Destroy the lock for the "userland resources database" */
1453         mutex_destroy(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1454 
1455         TAVOR_TNF_EXIT(tavor_umap_db_fini);
1456 }
1457 
1458 
1459 /*
1460  * tavor_umap_db_alloc()
1461  *    Context: Can be called from user or kernel context.
1462  */
1463 tavor_umap_db_entry_t *
1464 tavor_umap_db_alloc(uint_t instance, uint64_t key, uint_t type, uint64_t value)
1465 {
1466         tavor_umap_db_entry_t   *umapdb;
1467 
1468         TAVOR_TNF_ENTER(tavor_umap_db_alloc);
1469 
1470         /* Allocate an entry to add to the "userland resources database" */
1471         umapdb = kmem_zalloc(sizeof (tavor_umap_db_entry_t), KM_NOSLEEP);
1472         if (umapdb == NULL) {
1473                 TNF_PROBE_0(tavor_umap_db_alloc_kmz_fail, TAVOR_TNF_ERROR, "");
1474                 TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1475                 return (NULL);
1476         }
1477 
1478         /* Fill in the fields in the database entry */
1479         umapdb->tdbe_common.tdb_instance  = instance;
1480         umapdb->tdbe_common.tdb_type   = type;
1481         umapdb->tdbe_common.tdb_key    = key;
1482         umapdb->tdbe_common.tdb_value          = value;
1483 
1484         TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1485         return (umapdb);
1486 }
1487 
1488 
1489 /*
1490  * tavor_umap_db_free()
1491  *    Context: Can be called from user or kernel context.
1492  */
1493 void
1494 tavor_umap_db_free(tavor_umap_db_entry_t *umapdb)
1495 {
1496         TAVOR_TNF_ENTER(tavor_umap_db_free);
1497 
1498         /* Free the database entry */
1499         kmem_free(umapdb, sizeof (tavor_umap_db_entry_t));
1500 
1501         TAVOR_TNF_EXIT(tavor_umap_db_free);
1502 }
1503 
1504 
1505 /*
1506  * tavor_umap_db_add()
1507  *    Context: Can be called from user or kernel context.
1508  */
1509 void
1510 tavor_umap_db_add(tavor_umap_db_entry_t *umapdb)
1511 {
1512         TAVOR_TNF_ENTER(tavor_umap_db_add);
1513 
1514         mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1515         tavor_umap_db_add_nolock(umapdb);
1516         mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1517 
1518         TAVOR_TNF_EXIT(tavor_umap_db_add);
1519 }
1520 
1521 
1522 /*
1523  * tavor_umap_db_add_nolock()
1524  *    Context: Can be called from user or kernel context.
1525  */
1526 void
1527 tavor_umap_db_add_nolock(tavor_umap_db_entry_t *umapdb)
1528 {
1529         tavor_umap_db_query_t   query;
1530         avl_index_t             where;
1531 
1532         TAVOR_TNF_ENTER(tavor_umap_db_add_nolock);
1533 
1534         ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1535 
1536         /*
1537          * Copy the common portion of the "to-be-added" database entry
1538          * into the "tavor_umap_db_query_t" structure.  We use this structure
1539          * (with no flags set) to find the appropriate location in the
1540          * "userland resources database" for the new entry to be added.
1541          *
1542          * Note: we expect that this entry should not be found in the
1543          * database (unless something bad has happened).
1544          */
1545         query.tqdb_common = umapdb->tdbe_common;
1546         query.tqdb_flags  = 0;
1547         (void) avl_find(&tavor_userland_rsrc_db.tdl_umapdb_avl, &query,
1548             &where);
1549 
1550         /*
1551          * Now, using the "where" field from the avl_find() operation
1552          * above, we will insert the new database entry ("umapdb").
1553          */
1554         avl_insert(&tavor_userland_rsrc_db.tdl_umapdb_avl, umapdb,
1555             where);
1556 
1557         TAVOR_TNF_EXIT(tavor_umap_db_add_nolock);
1558 }
1559 
1560 
1561 /*
1562  * tavor_umap_db_find()
1563  *    Context: Can be called from user or kernel context.
1564  */
1565 int
1566 tavor_umap_db_find(uint_t instance, uint64_t key, uint_t type,
1567     uint64_t *value, uint_t flag, tavor_umap_db_entry_t **umapdb)
1568 {
1569         int     status;
1570 
1571         TAVOR_TNF_ENTER(tavor_umap_db_find);
1572 
1573         mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1574         status = tavor_umap_db_find_nolock(instance, key, type, value, flag,
1575             umapdb);
1576         mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1577 
1578         TAVOR_TNF_EXIT(tavor_umap_db_find);
1579         return (status);
1580 }
1581 
1582 
1583 /*
1584  * tavor_umap_db_find_nolock()
1585  *    Context: Can be called from user or kernel context.
1586  */
1587 int
1588 tavor_umap_db_find_nolock(uint_t instance, uint64_t key, uint_t type,
1589     uint64_t *value, uint_t flags, tavor_umap_db_entry_t **umapdb)
1590 {
1591         tavor_umap_db_query_t   query;
1592         tavor_umap_db_entry_t   *entry;
1593         avl_index_t             where;
1594 
1595         TAVOR_TNF_ENTER(tavor_umap_db_find_nolock);
1596 
1597         ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1598 
1599         /*
1600          * Fill in key, type, instance, and flags values of the
1601          * tavor_umap_db_query_t in preparation for the database
1602          * lookup.
1603          */
1604         query.tqdb_flags                = flags;
1605         query.tqdb_common.tdb_key       = key;
1606         query.tqdb_common.tdb_type      = type;
1607         query.tqdb_common.tdb_instance  = instance;
1608 
1609         /*
1610          * Perform the database query.  If no entry is found, then
1611          * return failure, else continue.
1612          */
1613         entry = (tavor_umap_db_entry_t *)avl_find(
1614             &tavor_userland_rsrc_db.tdl_umapdb_avl, &query, &where);
1615         if (entry == NULL) {
1616                 TAVOR_TNF_EXIT(tavor_umap_db_find_nolock);
1617                 return (DDI_FAILURE);
1618         }
1619 
1620         /*
1621          * If the flags argument specifies that the entry should
1622          * be removed if found, then call avl_remove() to remove
1623          * the entry from the database.
1624          */
1625         if (flags & TAVOR_UMAP_DB_REMOVE) {
1626 
1627                 avl_remove(&tavor_userland_rsrc_db.tdl_umapdb_avl, entry);
1628 
1629                 /*
1630                  * The database entry is returned with the expectation
1631                  * that the caller will use tavor_umap_db_free() to
1632                  * free the entry's memory.  ASSERT that this is non-NULL.
1633                  * NULL pointer should never be passed for the
1634                  * TAVOR_UMAP_DB_REMOVE case.
1635                  */
1636                 ASSERT(umapdb != NULL);
1637         }
1638 
1639         /*
1640          * If the caller would like visibility to the database entry
1641          * (indicated through the use of a non-NULL "umapdb" argument),
1642          * then fill it in.
1643          */
1644         if (umapdb != NULL) {
1645                 *umapdb = entry;
1646         }
1647 
1648         /* Extract value field from database entry and return success */
1649         *value = entry->tdbe_common.tdb_value;
1650 
1651         TAVOR_TNF_EXIT(tavor_umap_db_find_nolock);
1652         return (DDI_SUCCESS);
1653 }
1654 
1655 
1656 /*
1657  * tavor_umap_umemlock_cb()
1658  *    Context: Can be called from callback context.
1659  */
1660 void
1661 tavor_umap_umemlock_cb(ddi_umem_cookie_t *umem_cookie)
1662 {
1663         tavor_umap_db_entry_t   *umapdb;
1664         tavor_state_t           *state;
1665         tavor_rsrc_t            *rsrcp;
1666         tavor_mrhdl_t           mr;
1667         uint64_t                value;
1668         uint_t                  instance;
1669         int                     status;
1670         void                    (*mr_callback)(void *, void *);
1671         void                    *mr_cbarg1, *mr_cbarg2;
1672 
1673         TAVOR_TNF_ENTER(tavor_umap_umemlock_cb);
1674 
1675         /*
1676          * If this was userland memory, then we need to remove its entry
1677          * from the "userland resources database".  Note:  We use the
1678          * TAVOR_UMAP_DB_IGNORE_INSTANCE flag here because we don't know
1679          * which instance was used when the entry was added (but we want
1680          * to know after the entry is found using the other search criteria).
1681          */
1682         status = tavor_umap_db_find(0, (uint64_t)(uintptr_t)umem_cookie,
1683             MLNX_UMAP_MRMEM_RSRC, &value, (TAVOR_UMAP_DB_REMOVE |
1684             TAVOR_UMAP_DB_IGNORE_INSTANCE), &umapdb);
1685         if (status == DDI_SUCCESS) {
1686                 instance = umapdb->tdbe_common.tdb_instance;
1687                 state = ddi_get_soft_state(tavor_statep, instance);
1688                 if (state == NULL) {
1689                         cmn_err(CE_WARN, "Unable to match Tavor instance\n");
1690                         TNF_PROBE_0(tavor_umap_umemlock_cb_gss_fail,
1691                             TAVOR_TNF_ERROR, "");
1692                         TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1693                         return;
1694                 }
1695 
1696                 /* Free the database entry */
1697                 tavor_umap_db_free(umapdb);
1698 
1699                 /* Use "value" to convert to an MR handle */
1700                 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
1701                 mr = (tavor_mrhdl_t)rsrcp->tr_addr;
1702 
1703                 /*
1704                  * If a callback has been provided, call it first.  This
1705                  * callback is expected to do any cleanup necessary to
1706                  * guarantee that the subsequent MR deregister (below)
1707                  * will succeed.  Specifically, this means freeing up memory
1708                  * windows which might have been associated with the MR.
1709                  */
1710                 mutex_enter(&mr->mr_lock);
1711                 mr_callback = mr->mr_umem_cbfunc;
1712                 mr_cbarg1   = mr->mr_umem_cbarg1;
1713                 mr_cbarg2   = mr->mr_umem_cbarg2;
1714                 mutex_exit(&mr->mr_lock);
1715                 if (mr_callback != NULL) {
1716                         mr_callback(mr_cbarg1, mr_cbarg2);
1717                 }
1718 
1719                 /*
1720                  * Then call tavor_mr_deregister() to release the resources
1721                  * associated with the MR handle.  Note: Because this routine
1722                  * will also check for whether the ddi_umem_cookie_t is in the
1723                  * database, it will take responsibility for disabling the
1724                  * memory region and calling ddi_umem_unlock().
1725                  */
1726                 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
1727                     TAVOR_SLEEP);
1728                 if (status != DDI_SUCCESS) {
1729                         TAVOR_WARNING(state, "Unexpected failure in "
1730                             "deregister from callback\n");
1731                         TNF_PROBE_0(tavor_umap_umemlock_cb_dereg_fail,
1732                             TAVOR_TNF_ERROR, "");
1733                         TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1734                 }
1735         }
1736 
1737         TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1738 }
1739 
1740 
1741 /*
1742  * tavor_umap_db_compare()
1743  *    Context: Can be called from user or kernel context.
1744  */
1745 static int
1746 tavor_umap_db_compare(const void *q, const void *e)
1747 {
1748         tavor_umap_db_common_t  *entry_common, *query_common;
1749         uint_t                  query_flags;
1750 
1751         TAVOR_TNF_ENTER(tavor_umap_db_compare);
1752 
1753         entry_common = &((tavor_umap_db_entry_t *)e)->tdbe_common;
1754         query_common = &((tavor_umap_db_query_t *)q)->tqdb_common;
1755         query_flags  = ((tavor_umap_db_query_t *)q)->tqdb_flags;
1756 
1757         /*
1758          * The first comparison is done on the "key" value in "query"
1759          * and "entry".  If they are not equal, then the appropriate
1760          * search direction is returned.  Else, we continue by
1761          * comparing "type".
1762          */
1763         if (query_common->tdb_key < entry_common->tdb_key) {
1764                 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1765                 return (-1);
1766         } else if (query_common->tdb_key > entry_common->tdb_key) {
1767                 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1768                 return (+1);
1769         }
1770 
1771         /*
1772          * If the search reaches this point, then "query" and "entry"
1773          * have equal key values.  So we continue be comparing their
1774          * "type" values.  Again, if they are not equal, then the
1775          * appropriate search direction is returned.  Else, we continue
1776          * by comparing "instance".
1777          */
1778         if (query_common->tdb_type < entry_common->tdb_type) {
1779                 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1780                 return (-1);
1781         } else if (query_common->tdb_type > entry_common->tdb_type) {
1782                 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1783                 return (+1);
1784         }
1785 
1786         /*
1787          * If the search reaches this point, then "query" and "entry"
1788          * have exactly the same key and type values.  Now we consult
1789          * the "flags" field in the query to determine whether the
1790          * "instance" is relevant to the search.  If the
1791          * TAVOR_UMAP_DB_IGNORE_INSTANCE flags is set, then return
1792          * success (0) here.  Otherwise, continue the search by comparing
1793          * instance values and returning the appropriate search direction.
1794          */
1795         if (query_flags & TAVOR_UMAP_DB_IGNORE_INSTANCE) {
1796                 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1797                 return (0);
1798         }
1799 
1800         /*
1801          * If the search has reached this point, then "query" and "entry"
1802          * can only be differentiated by their instance values.  If these
1803          * are not equal, then return the appropriate search direction.
1804          * Else, we return success (0).
1805          */
1806         if (query_common->tdb_instance < entry_common->tdb_instance) {
1807                 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1808                 return (-1);
1809         } else if (query_common->tdb_instance > entry_common->tdb_instance) {
1810                 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1811                 return (+1);
1812         }
1813 
1814         /* Everything matches... so return success */
1815         TAVOR_TNF_EXIT(tavor_umap_db_compare);
1816         return (0);
1817 }
1818 
1819 
1820 /*
1821  * tavor_umap_db_set_onclose_cb()
1822  *    Context: Can be called from user or kernel context.
1823  */
1824 int
1825 tavor_umap_db_set_onclose_cb(dev_t dev, uint64_t flag,
1826     void (*callback)(void *), void *arg)
1827 {
1828         tavor_umap_db_priv_t    *priv;
1829         tavor_umap_db_entry_t   *umapdb;
1830         minor_t                 instance;
1831         uint64_t                value;
1832         int                     status;
1833 
1834         TAVOR_TNF_ENTER(tavor_umap_db_set_onclose_cb);
1835 
1836         instance = TAVOR_DEV_INSTANCE(dev);
1837         if (instance == -1) {
1838                 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_inst_fail,
1839                     TAVOR_TNF_ERROR, "");
1840                 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1841                 return (DDI_FAILURE);
1842         }
1843 
1844         if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1845                 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_invflag_fail,
1846                     TAVOR_TNF_ERROR, "");
1847                 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1848                 return (DDI_FAILURE);
1849         }
1850 
1851         /*
1852          * Grab the lock for the "userland resources database" and find
1853          * the entry corresponding to this minor number.  Once it's found,
1854          * allocate (if necessary) and add an entry (in the "tdb_priv"
1855          * field) to indicate that further processing may be needed during
1856          * Tavor's close() handling.
1857          */
1858         mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1859         status = tavor_umap_db_find_nolock(instance, dev,
1860             MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1861         if (status != DDI_SUCCESS) {
1862                 TNF_PROBE_0(tavor_umap_db_set_onclose_cb_find_fail,
1863                     TAVOR_TNF_ERROR, "");
1864                 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1865                 TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1866                 return (DDI_FAILURE);
1867         }
1868 
1869         priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1870         if (priv == NULL) {
1871                 priv = (tavor_umap_db_priv_t *)kmem_zalloc(
1872                     sizeof (tavor_umap_db_priv_t), KM_NOSLEEP);
1873                 if (priv == NULL) {
1874                         TNF_PROBE_0(tavor_umap_db_set_onclose_cb_kmz_fail,
1875                             TAVOR_TNF_ERROR, "");
1876                         mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1877                         TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1878                         return (DDI_FAILURE);
1879                 }
1880         }
1881 
1882         /*
1883          * Save away the callback and argument to be used during Tavor's
1884          * close() processing.
1885          */
1886         priv->tdp_cb = callback;
1887         priv->tdp_arg        = arg;
1888 
1889         umapdb->tdbe_common.tdb_priv = (void *)priv;
1890         mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1891 
1892         TAVOR_TNF_EXIT(tavor_umap_db_set_onclose_cb);
1893         return (DDI_SUCCESS);
1894 }
1895 
1896 
1897 /*
1898  * tavor_umap_db_clear_onclose_cb()
1899  *    Context: Can be called from user or kernel context.
1900  */
1901 int
1902 tavor_umap_db_clear_onclose_cb(dev_t dev, uint64_t flag)
1903 {
1904         tavor_umap_db_priv_t    *priv;
1905         tavor_umap_db_entry_t   *umapdb;
1906         minor_t                 instance;
1907         uint64_t                value;
1908         int                     status;
1909 
1910         TAVOR_TNF_ENTER(tavor_umap_db_set_onclose_cb);
1911 
1912         instance = TAVOR_DEV_INSTANCE(dev);
1913         if (instance == -1) {
1914                 TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_inst_fail,
1915                     TAVOR_TNF_ERROR, "");
1916                 TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1917                 return (DDI_FAILURE);
1918         }
1919 
1920         if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1921                 TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_invflag_fail,
1922                     TAVOR_TNF_ERROR, "");
1923                 TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1924                 return (DDI_FAILURE);
1925         }
1926 
1927         /*
1928          * Grab the lock for the "userland resources database" and find
1929          * the entry corresponding to this minor number.  Once it's found,
1930          * remove the entry (in the "tdb_priv" field) that indicated the
1931          * need for further processing during Tavor's close().  Free the
1932          * entry, if appropriate.
1933          */
1934         mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1935         status = tavor_umap_db_find_nolock(instance, dev,
1936             MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1937         if (status != DDI_SUCCESS) {
1938                 TNF_PROBE_0(tavor_umap_db_clear_onclose_cb_find_fail,
1939                     TAVOR_TNF_ERROR, "");
1940                 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1941                 TAVOR_TNF_EXIT(tavor_umap_db_clear_onclose_cb);
1942                 return (DDI_FAILURE);
1943         }
1944 
1945         priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1946         if (priv != NULL) {
1947                 kmem_free(priv, sizeof (tavor_umap_db_priv_t));
1948                 priv = NULL;
1949         }
1950 
1951         umapdb->tdbe_common.tdb_priv = (void *)priv;
1952         mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1953         return (DDI_SUCCESS);
1954 }
1955 
1956 
1957 /*
1958  * tavor_umap_db_clear_onclose_cb()
1959  *    Context: Can be called from user or kernel context.
1960  */
1961 void
1962 tavor_umap_db_handle_onclose_cb(tavor_umap_db_priv_t *priv)
1963 {
1964         void    (*callback)(void *);
1965 
1966         ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1967 
1968         /*
1969          * Call the callback.
1970          *    Note: Currently there is only one callback (in "tdp_cb"), but
1971          *    in the future there may be more, depending on what other types
1972          *    of interaction there are between userland processes and the
1973          *    driver.
1974          */
1975         callback = priv->tdp_cb;
1976         callback(priv->tdp_arg);
1977 }