1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * hermon_mr.c
  28  *    Hermon Memory Region/Window Routines
  29  *
  30  *    Implements all the routines necessary to provide the requisite memory
  31  *    registration verbs.  These include operations like RegisterMemRegion(),
  32  *    DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion,
  33  *    etc., that affect Memory Regions.  It also includes the verbs that
  34  *    affect Memory Windows, including AllocMemWindow(), FreeMemWindow(),
  35  *    and QueryMemWindow().
  36  */
  37 
  38 #include <sys/types.h>
  39 #include <sys/conf.h>
  40 #include <sys/ddi.h>
  41 #include <sys/sunddi.h>
  42 #include <sys/modctl.h>
  43 #include <sys/esunddi.h>
  44 
  45 #include <sys/ib/adapters/hermon/hermon.h>
  46 
  47 extern uint32_t hermon_kernel_data_ro;
  48 extern uint32_t hermon_user_data_ro;
  49 extern int hermon_rdma_debug;
  50 
  51 /*
  52  * Used by hermon_mr_keycalc() below to fill in the "unconstrained" portion
  53  * of Hermon memory keys (LKeys and RKeys)
  54  */
  55 static  uint_t hermon_memkey_cnt = 0x00;
  56 #define HERMON_MEMKEY_SHIFT     24
  57 
  58 /* initial state of an MPT */
  59 #define HERMON_MPT_SW_OWNERSHIP 0xF     /* memory regions */
  60 #define HERMON_MPT_FREE         0x3     /* allocate lkey */
  61 
  62 static int hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
  63     hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
  64     hermon_mpt_rsrc_type_t mpt_type);
  65 static int hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
  66     hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
  67     hermon_mr_options_t *op);
  68 static int hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
  69     hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
  70     uint_t sleep, uint_t *dereg_level);
  71 static uint64_t hermon_mr_nummtt_needed(hermon_state_t *state,
  72     hermon_bind_info_t *bind, uint_t *mtt_pgsize);
  73 static int hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
  74     ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer);
  75 static void hermon_mr_mem_unbind(hermon_state_t *state,
  76     hermon_bind_info_t *bind);
  77 static int hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
  78     hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits);
  79 static int hermon_mr_fast_mtt_write_fmr(hermon_state_t *state,
  80     hermon_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits);
  81 static uint_t hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc);
  82 static uint_t hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc);
  83 
  84 
  85 /*
  86  * The Hermon umem_lockmemory() callback ops.  When userland memory is
  87  * registered, these callback ops are specified.  The hermon_umap_umemlock_cb()
  88  * callback will be called whenever the memory for the corresponding
  89  * ddi_umem_cookie_t is being freed.
  90  */
  91 static struct umem_callback_ops hermon_umem_cbops = {
  92         UMEM_CALLBACK_VERSION,
  93         hermon_umap_umemlock_cb,
  94 };
  95 
  96 
  97 
  98 /*
  99  * hermon_mr_register()
 100  *    Context: Can be called from interrupt or base context.
 101  */
 102 int
 103 hermon_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
 104     ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
 105     hermon_mpt_rsrc_type_t mpt_type)
 106 {
 107         hermon_bind_info_t      bind;
 108         int                     status;
 109 
 110         /*
 111          * Fill in the "bind" struct.  This struct provides the majority
 112          * of the information that will be used to distinguish between an
 113          * "addr" binding (as is the case here) and a "buf" binding (see
 114          * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
 115          * which does most of the "heavy lifting" for the Hermon memory
 116          * registration routines.
 117          */
 118         bind.bi_type  = HERMON_BINDHDL_VADDR;
 119         bind.bi_addr  = mr_attr->mr_vaddr;
 120         bind.bi_len   = mr_attr->mr_len;
 121         bind.bi_as    = mr_attr->mr_as;
 122         bind.bi_flags = mr_attr->mr_flags;
 123         status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op,
 124             mpt_type);
 125         return (status);
 126 }
 127 
 128 
 129 /*
 130  * hermon_mr_register_buf()
 131  *    Context: Can be called from interrupt or base context.
 132  */
 133 int
 134 hermon_mr_register_buf(hermon_state_t *state, hermon_pdhdl_t pd,
 135     ibt_smr_attr_t *mr_attr, struct buf *buf, hermon_mrhdl_t *mrhdl,
 136     hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type)
 137 {
 138         hermon_bind_info_t      bind;
 139         int                     status;
 140 
 141         /*
 142          * Fill in the "bind" struct.  This struct provides the majority
 143          * of the information that will be used to distinguish between an
 144          * "addr" binding (see above) and a "buf" binding (as is the case
 145          * here).  The "bind" struct is later passed to hermon_mr_mem_bind()
 146          * which does most of the "heavy lifting" for the Hermon memory
 147          * registration routines.  Note: We have chosen to provide
 148          * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
 149          * not set).  It is not critical what value we choose here as it need
 150          * only be unique for the given RKey (which will happen by default),
 151          * so the choice here is somewhat arbitrary.
 152          */
 153         bind.bi_type  = HERMON_BINDHDL_BUF;
 154         bind.bi_buf   = buf;
 155         if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
 156                 bind.bi_addr  = mr_attr->mr_vaddr;
 157         } else {
 158                 bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
 159         }
 160         bind.bi_as    = NULL;
 161         bind.bi_len   = (uint64_t)buf->b_bcount;
 162         bind.bi_flags = mr_attr->mr_flags;
 163         status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, mpt_type);
 164         return (status);
 165 }
 166 
 167 
 168 /*
 169  * hermon_mr_register_shared()
 170  *    Context: Can be called from interrupt or base context.
 171  */
 172 int
 173 hermon_mr_register_shared(hermon_state_t *state, hermon_mrhdl_t mrhdl,
 174     hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new)
 175 {
 176         hermon_rsrc_t           *mpt, *mtt, *rsrc;
 177         hermon_umap_db_entry_t  *umapdb;
 178         hermon_hw_dmpt_t        mpt_entry;
 179         hermon_mrhdl_t          mr;
 180         hermon_bind_info_t      *bind;
 181         ddi_umem_cookie_t       umem_cookie;
 182         size_t                  umem_len;
 183         caddr_t                 umem_addr;
 184         uint64_t                mtt_addr, pgsize_msk;
 185         uint_t                  sleep, mr_is_umem;
 186         int                     status, umem_flags;
 187 
 188         /*
 189          * Check the sleep flag.  Ensure that it is consistent with the
 190          * current thread context (i.e. if we are currently in the interrupt
 191          * context, then we shouldn't be attempting to sleep).
 192          */
 193         sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP :
 194             HERMON_SLEEP;
 195         if ((sleep == HERMON_SLEEP) &&
 196             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
 197                 status = IBT_INVALID_PARAM;
 198                 goto mrshared_fail;
 199         }
 200 
 201         /* Increment the reference count on the protection domain (PD) */
 202         hermon_pd_refcnt_inc(pd);
 203 
 204         /*
 205          * Allocate an MPT entry.  This will be filled in with all the
 206          * necessary parameters to define the shared memory region.
 207          * Specifically, it will be made to reference the currently existing
 208          * MTT entries and ownership of the MPT will be passed to the hardware
 209          * in the last step below.  If we fail here, we must undo the
 210          * protection domain reference count.
 211          */
 212         status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
 213         if (status != DDI_SUCCESS) {
 214                 status = IBT_INSUFF_RESOURCE;
 215                 goto mrshared_fail1;
 216         }
 217 
 218         /*
 219          * Allocate the software structure for tracking the shared memory
 220          * region (i.e. the Hermon Memory Region handle).  If we fail here, we
 221          * must undo the protection domain reference count and the previous
 222          * resource allocation.
 223          */
 224         status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
 225         if (status != DDI_SUCCESS) {
 226                 status = IBT_INSUFF_RESOURCE;
 227                 goto mrshared_fail2;
 228         }
 229         mr = (hermon_mrhdl_t)rsrc->hr_addr;
 230 
 231         /*
 232          * Setup and validate the memory region access flags.  This means
 233          * translating the IBTF's enable flags into the access flags that
 234          * will be used in later operations.
 235          */
 236         mr->mr_accflag = 0;
 237         if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
 238                 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
 239         if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
 240                 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
 241         if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
 242                 mr->mr_accflag |= IBT_MR_REMOTE_READ;
 243         if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
 244                 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
 245         if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
 246                 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
 247 
 248         /*
 249          * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
 250          * from a certain number of "constrained" bits (the least significant
 251          * bits) and some number of "unconstrained" bits.  The constrained
 252          * bits must be set to the index of the entry in the MPT table, but
 253          * the unconstrained bits can be set to any value we wish.  Note:
 254          * if no remote access is required, then the RKey value is not filled
 255          * in.  Otherwise both Rkey and LKey are given the same value.
 256          */
 257         mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
 258 
 259         /* Grab the MR lock for the current memory region */
 260         mutex_enter(&mrhdl->mr_lock);
 261 
 262         /*
 263          * Check here to see if the memory region has already been partially
 264          * deregistered as a result of a hermon_umap_umemlock_cb() callback.
 265          * If so, this is an error, return failure.
 266          */
 267         if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
 268                 mutex_exit(&mrhdl->mr_lock);
 269                 status = IBT_MR_HDL_INVALID;
 270                 goto mrshared_fail3;
 271         }
 272 
 273         /*
 274          * Determine if the original memory was from userland and, if so, pin
 275          * the pages (again) with umem_lockmemory().  This will guarantee a
 276          * separate callback for each of this shared region's MR handles.
 277          * If this is userland memory, then allocate an entry in the
 278          * "userland resources database".  This will later be added to
 279          * the database (after all further memory registration operations are
 280          * successful).  If we fail here, we must undo all the above setup.
 281          */
 282         mr_is_umem = mrhdl->mr_is_umem;
 283         if (mr_is_umem) {
 284                 umem_len   = ptob(btopr(mrhdl->mr_bindinfo.bi_len));
 285                 umem_addr  = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr &
 286                     ~PAGEOFFSET);
 287                 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
 288                     DDI_UMEMLOCK_LONGTERM);
 289                 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
 290                     &umem_cookie, &hermon_umem_cbops, NULL);
 291                 if (status != 0) {
 292                         mutex_exit(&mrhdl->mr_lock);
 293                         status = IBT_INSUFF_RESOURCE;
 294                         goto mrshared_fail3;
 295                 }
 296 
 297                 umapdb = hermon_umap_db_alloc(state->hs_instance,
 298                     (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
 299                     (uint64_t)(uintptr_t)rsrc);
 300                 if (umapdb == NULL) {
 301                         mutex_exit(&mrhdl->mr_lock);
 302                         status = IBT_INSUFF_RESOURCE;
 303                         goto mrshared_fail4;
 304                 }
 305         }
 306 
 307         /*
 308          * Copy the MTT resource pointer (and additional parameters) from
 309          * the original Hermon Memory Region handle.  Note: this is normally
 310          * where the hermon_mr_mem_bind() routine would be called, but because
 311          * we already have bound and filled-in MTT entries it is simply a
 312          * matter here of managing the MTT reference count and grabbing the
 313          * address of the MTT table entries (for filling in the shared region's
 314          * MPT entry).
 315          */
 316         mr->mr_mttrsrcp        = mrhdl->mr_mttrsrcp;
 317         mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
 318         mr->mr_bindinfo        = mrhdl->mr_bindinfo;
 319         mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
 320         mutex_exit(&mrhdl->mr_lock);
 321         bind = &mr->mr_bindinfo;
 322         mtt = mr->mr_mttrsrcp;
 323 
 324         /*
 325          * Increment the MTT reference count (to reflect the fact that
 326          * the MTT is now shared)
 327          */
 328         (void) hermon_mtt_refcnt_inc(mr->mr_mttrefcntp);
 329 
 330         /*
 331          * Update the new "bind" virtual address.  Do some extra work here
 332          * to ensure proper alignment.  That is, make sure that the page
 333          * offset for the beginning of the old range is the same as the
 334          * offset for this new mapping
 335          */
 336         pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
 337         bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
 338             (mr->mr_bindinfo.bi_addr & pgsize_msk));
 339 
 340         /*
 341          * Fill in the MPT entry.  This is the final step before passing
 342          * ownership of the MPT entry to the Hermon hardware.  We use all of
 343          * the information collected/calculated above to fill in the
 344          * requisite portions of the MPT.
 345          */
 346         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
 347         mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
 348         mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
 349         mpt_entry.rw      = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
 350         mpt_entry.rr      = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
 351         mpt_entry.lw      = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
 352         mpt_entry.lr      = 1;
 353         mpt_entry.reg_win = HERMON_MPT_IS_REGION;
 354         mpt_entry.entity_sz     = mr->mr_logmttpgsz;
 355         mpt_entry.mem_key       = mr->mr_lkey;
 356         mpt_entry.pd            = pd->pd_pdnum;
 357         mpt_entry.start_addr    = bind->bi_addr;
 358         mpt_entry.reg_win_len   = bind->bi_len;
 359         mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
 360         mpt_entry.mtt_addr_h = mtt_addr >> 32;
 361         mpt_entry.mtt_addr_l = mtt_addr >> 3;
 362 
 363         /*
 364          * Write the MPT entry to hardware.  Lastly, we pass ownership of
 365          * the entry to the hardware.  Note: in general, this operation
 366          * shouldn't fail.  But if it does, we have to undo everything we've
 367          * done above before returning error.
 368          */
 369         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
 370             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
 371         if (status != HERMON_CMD_SUCCESS) {
 372                 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
 373                     status);
 374                 if (status == HERMON_CMD_INVALID_STATUS) {
 375                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 376                 }
 377                 status = ibc_get_ci_failure(0);
 378                 goto mrshared_fail5;
 379         }
 380 
 381         /*
 382          * Fill in the rest of the Hermon Memory Region handle.  Having
 383          * successfully transferred ownership of the MPT, we can update the
 384          * following fields for use in further operations on the MR.
 385          */
 386         mr->mr_mptrsrcp        = mpt;
 387         mr->mr_mttrsrcp        = mtt;
 388         mr->mr_mpt_type        = HERMON_MPT_DMPT;
 389         mr->mr_pdhdl   = pd;
 390         mr->mr_rsrcp   = rsrc;
 391         mr->mr_is_umem         = mr_is_umem;
 392         mr->mr_is_fmr          = 0;
 393         mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
 394         mr->mr_umem_cbfunc = NULL;
 395         mr->mr_umem_cbarg1 = NULL;
 396         mr->mr_umem_cbarg2 = NULL;
 397         mr->mr_lkey     = hermon_mr_key_swap(mr->mr_lkey);
 398         mr->mr_rkey     = hermon_mr_key_swap(mr->mr_rkey);
 399 
 400         /*
 401          * If this is userland memory, then we need to insert the previously
 402          * allocated entry into the "userland resources database".  This will
 403          * allow for later coordination between the hermon_umap_umemlock_cb()
 404          * callback and hermon_mr_deregister().
 405          */
 406         if (mr_is_umem) {
 407                 hermon_umap_db_add(umapdb);
 408         }
 409 
 410         *mrhdl_new = mr;
 411 
 412         return (DDI_SUCCESS);
 413 
 414 /*
 415  * The following is cleanup for all possible failure cases in this routine
 416  */
 417 mrshared_fail5:
 418         (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
 419         if (mr_is_umem) {
 420                 hermon_umap_db_free(umapdb);
 421         }
 422 mrshared_fail4:
 423         if (mr_is_umem) {
 424                 ddi_umem_unlock(umem_cookie);
 425         }
 426 mrshared_fail3:
 427         hermon_rsrc_free(state, &rsrc);
 428 mrshared_fail2:
 429         hermon_rsrc_free(state, &mpt);
 430 mrshared_fail1:
 431         hermon_pd_refcnt_dec(pd);
 432 mrshared_fail:
 433         return (status);
 434 }
 435 
 436 /*
 437  * hermon_mr_alloc_fmr()
 438  *    Context: Can be called from interrupt or base context.
 439  */
 440 int
 441 hermon_mr_alloc_fmr(hermon_state_t *state, hermon_pdhdl_t pd,
 442     hermon_fmrhdl_t fmr_pool, hermon_mrhdl_t *mrhdl)
 443 {
 444         hermon_rsrc_t           *mpt, *mtt, *rsrc;
 445         hermon_hw_dmpt_t        mpt_entry;
 446         hermon_mrhdl_t          mr;
 447         hermon_bind_info_t      bind;
 448         uint64_t                mtt_addr;
 449         uint64_t                nummtt;
 450         uint_t                  sleep, mtt_pgsize_bits;
 451         int                     status;
 452         offset_t                i;
 453         hermon_icm_table_t      *icm_table;
 454         hermon_dma_info_t       *dma_info;
 455         uint32_t                index1, index2, rindx;
 456 
 457         /*
 458          * Check the sleep flag.  Ensure that it is consistent with the
 459          * current thread context (i.e. if we are currently in the interrupt
 460          * context, then we shouldn't be attempting to sleep).
 461          */
 462         sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP :
 463             HERMON_NOSLEEP;
 464         if ((sleep == HERMON_SLEEP) &&
 465             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
 466                 return (IBT_INVALID_PARAM);
 467         }
 468 
 469         /* Increment the reference count on the protection domain (PD) */
 470         hermon_pd_refcnt_inc(pd);
 471 
 472         /*
 473          * Allocate an MPT entry.  This will be filled in with all the
 474          * necessary parameters to define the FMR.  Specifically, it will be
 475          * made to reference the currently existing MTT entries and ownership
 476          * of the MPT will be passed to the hardware in the last step below.
 477          * If we fail here, we must undo the protection domain reference count.
 478          */
 479 
 480         status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
 481         if (status != DDI_SUCCESS) {
 482                 status = IBT_INSUFF_RESOURCE;
 483                 goto fmralloc_fail1;
 484         }
 485 
 486         /*
 487          * Allocate the software structure for tracking the fmr memory
 488          * region (i.e. the Hermon Memory Region handle).  If we fail here, we
 489          * must undo the protection domain reference count and the previous
 490          * resource allocation.
 491          */
 492         status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
 493         if (status != DDI_SUCCESS) {
 494                 status = IBT_INSUFF_RESOURCE;
 495                 goto fmralloc_fail2;
 496         }
 497         mr = (hermon_mrhdl_t)rsrc->hr_addr;
 498 
 499         /*
 500          * Setup and validate the memory region access flags.  This means
 501          * translating the IBTF's enable flags into the access flags that
 502          * will be used in later operations.
 503          */
 504         mr->mr_accflag = 0;
 505         if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
 506                 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
 507         if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ)
 508                 mr->mr_accflag |= IBT_MR_REMOTE_READ;
 509         if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
 510                 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
 511         if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
 512                 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
 513 
 514         /*
 515          * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
 516          * from a certain number of "constrained" bits (the least significant
 517          * bits) and some number of "unconstrained" bits.  The constrained
 518          * bits must be set to the index of the entry in the MPT table, but
 519          * the unconstrained bits can be set to any value we wish.  Note:
 520          * if no remote access is required, then the RKey value is not filled
 521          * in.  Otherwise both Rkey and LKey are given the same value.
 522          */
 523         mr->mr_fmr_key = 1;  /* ready for the next reload */
 524         mr->mr_rkey = mr->mr_lkey = mpt->hr_indx;
 525 
 526         /*
 527          * Determine number of pages spanned.  This routine uses the
 528          * information in the "bind" struct to determine the required
 529          * number of MTT entries needed (and returns the suggested page size -
 530          * as a "power-of-2" - for each MTT entry).
 531          */
 532         /* Assume address will be page aligned later */
 533         bind.bi_addr = 0;
 534         /* Calculate size based on given max pages */
 535         bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT;
 536         nummtt = hermon_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits);
 537 
 538         /*
 539          * Allocate the MTT entries.  Use the calculations performed above to
 540          * allocate the required number of MTT entries.  If we fail here, we
 541          * must not only undo all the previous resource allocation (and PD
 542          * reference count), but we must also unbind the memory.
 543          */
 544         status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
 545         if (status != DDI_SUCCESS) {
 546                 IBTF_DPRINTF_L2("FMR", "FATAL: too few MTTs");
 547                 status = IBT_INSUFF_RESOURCE;
 548                 goto fmralloc_fail3;
 549         }
 550         mr->mr_logmttpgsz = mtt_pgsize_bits;
 551 
 552         /*
 553          * Fill in the MPT entry.  This is the final step before passing
 554          * ownership of the MPT entry to the Hermon hardware.  We use all of
 555          * the information collected/calculated above to fill in the
 556          * requisite portions of the MPT.
 557          */
 558         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
 559         mpt_entry.en_bind = 0;
 560         mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
 561         mpt_entry.rw      = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
 562         mpt_entry.rr      = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
 563         mpt_entry.lw      = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
 564         mpt_entry.lr      = 1;
 565         mpt_entry.reg_win = HERMON_MPT_IS_REGION;
 566         mpt_entry.pd            = pd->pd_pdnum;
 567 
 568         mpt_entry.entity_sz     = mr->mr_logmttpgsz;
 569         mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
 570         mpt_entry.fast_reg_en = 1;
 571         mpt_entry.mtt_size = (uint_t)nummtt;
 572         mpt_entry.mtt_addr_h = mtt_addr >> 32;
 573         mpt_entry.mtt_addr_l = mtt_addr >> 3;
 574         mpt_entry.mem_key = mr->mr_lkey;
 575 
 576         /*
 577          * FMR sets these to 0 for now.  Later during actual fmr registration
 578          * these values are filled in.
 579          */
 580         mpt_entry.start_addr    = 0;
 581         mpt_entry.reg_win_len   = 0;
 582 
 583         /*
 584          * Write the MPT entry to hardware.  Lastly, we pass ownership of
 585          * the entry to the hardware.  Note: in general, this operation
 586          * shouldn't fail.  But if it does, we have to undo everything we've
 587          * done above before returning error.
 588          */
 589         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
 590             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
 591         if (status != HERMON_CMD_SUCCESS) {
 592                 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
 593                     status);
 594                 if (status == HERMON_CMD_INVALID_STATUS) {
 595                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 596                 }
 597                 status = ibc_get_ci_failure(0);
 598                 goto fmralloc_fail4;
 599         }
 600 
 601         /*
 602          * Fill in the rest of the Hermon Memory Region handle.  Having
 603          * successfully transferred ownership of the MPT, we can update the
 604          * following fields for use in further operations on the MR.  Also, set
 605          * that this is an FMR region.
 606          */
 607         mr->mr_mptrsrcp        = mpt;
 608         mr->mr_mttrsrcp        = mtt;
 609 
 610         mr->mr_mpt_type   = HERMON_MPT_DMPT;
 611         mr->mr_pdhdl   = pd;
 612         mr->mr_rsrcp   = rsrc;
 613         mr->mr_is_fmr          = 1;
 614         mr->mr_lkey     = hermon_mr_key_swap(mr->mr_lkey);
 615         mr->mr_rkey     = hermon_mr_key_swap(mr->mr_rkey);
 616         mr->mr_mttaddr          = mtt_addr;
 617         (void) memcpy(&mr->mr_bindinfo, &bind, sizeof (hermon_bind_info_t));
 618 
 619         /* initialize hr_addr for use during register/deregister/invalidate */
 620         icm_table = &state->hs_icm[HERMON_DMPT];
 621         rindx = mpt->hr_indx;
 622         hermon_index(index1, index2, rindx, icm_table, i);
 623         dma_info = icm_table->icm_dma[index1] + index2;
 624         mpt->hr_addr = (void *)((uintptr_t)(dma_info->vaddr + i * mpt->hr_len));
 625 
 626         *mrhdl = mr;
 627 
 628         return (DDI_SUCCESS);
 629 
 630 /*
 631  * The following is cleanup for all possible failure cases in this routine
 632  */
 633 fmralloc_fail4:
 634         kmem_free(mtt, sizeof (hermon_rsrc_t) * nummtt);
 635 fmralloc_fail3:
 636         hermon_rsrc_free(state, &rsrc);
 637 fmralloc_fail2:
 638         hermon_rsrc_free(state, &mpt);
 639 fmralloc_fail1:
 640         hermon_pd_refcnt_dec(pd);
 641 fmralloc_fail:
 642         return (status);
 643 }
 644 
 645 
 646 /*
 647  * hermon_mr_register_physical_fmr()
 648  *    Context: Can be called from interrupt or base context.
 649  */
 650 /*ARGSUSED*/
 651 int
 652 hermon_mr_register_physical_fmr(hermon_state_t *state,
 653     ibt_pmr_attr_t *mem_pattr_p, hermon_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p)
 654 {
 655         hermon_rsrc_t           *mpt;
 656         uint64_t                *mpt_table;
 657         int                     status;
 658         uint32_t                key;
 659 
 660         mutex_enter(&mr->mr_lock);
 661         mpt = mr->mr_mptrsrcp;
 662         mpt_table = (uint64_t *)mpt->hr_addr;
 663 
 664         /* Write MPT status to SW bit */
 665         *(uint8_t *)mpt_table = 0xF0;
 666 
 667         membar_producer();
 668 
 669         /*
 670          * Write the mapped addresses into the MTT entries.  FMR needs to do
 671          * this a little differently, so we call the fmr specific fast mtt
 672          * write here.
 673          */
 674         status = hermon_mr_fast_mtt_write_fmr(state, mr->mr_mttrsrcp,
 675             mem_pattr_p, mr->mr_logmttpgsz);
 676         if (status != DDI_SUCCESS) {
 677                 mutex_exit(&mr->mr_lock);
 678                 status = ibc_get_ci_failure(0);
 679                 goto fmr_reg_fail1;
 680         }
 681 
 682         /*
 683          * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
 684          * from a certain number of "constrained" bits (the least significant
 685          * bits) and some number of "unconstrained" bits.  The constrained
 686          * bits must be set to the index of the entry in the MPT table, but
 687          * the unconstrained bits can be set to any value we wish.  Note:
 688          * if no remote access is required, then the RKey value is not filled
 689          * in.  Otherwise both Rkey and LKey are given the same value.
 690          */
 691         key = mpt->hr_indx | (mr->mr_fmr_key++ << HERMON_MEMKEY_SHIFT);
 692         mr->mr_lkey = mr->mr_rkey = hermon_mr_key_swap(key);
 693 
 694         /* write mem key value */
 695         *(uint32_t *)&mpt_table[1] = htonl(key);
 696 
 697         /* write length value */
 698         mpt_table[3] = htonll(mem_pattr_p->pmr_len);
 699 
 700         /* write start addr value */
 701         mpt_table[2] = htonll(mem_pattr_p->pmr_iova);
 702 
 703         /* write lkey value */
 704         *(uint32_t *)&mpt_table[4] = htonl(key);
 705 
 706         membar_producer();
 707 
 708         /* Write MPT status to HW bit */
 709         *(uint8_t *)mpt_table = 0x00;
 710 
 711         /* Fill in return parameters */
 712         mem_desc_p->pmd_lkey = mr->mr_lkey;
 713         mem_desc_p->pmd_rkey = mr->mr_rkey;
 714         mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova;
 715         mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len;
 716 
 717         /* Fill in MR bindinfo struct for later sync or query operations */
 718         mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova;
 719         mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT;
 720 
 721         mutex_exit(&mr->mr_lock);
 722 
 723         return (DDI_SUCCESS);
 724 
 725 fmr_reg_fail1:
 726         /*
 727          * Note, we fail here, and purposely leave the memory ownership in
 728          * software.  The memory tables may be corrupt, so we leave the region
 729          * unregistered.
 730          */
 731         return (status);
 732 }
 733 
 734 
 735 /*
 736  * hermon_mr_deregister()
 737  *    Context: Can be called from interrupt or base context.
 738  */
 739 /* ARGSUSED */
 740 int
 741 hermon_mr_deregister(hermon_state_t *state, hermon_mrhdl_t *mrhdl, uint_t level,
 742     uint_t sleep)
 743 {
 744         hermon_rsrc_t           *mpt, *mtt, *rsrc, *mtt_refcnt;
 745         hermon_umap_db_entry_t  *umapdb;
 746         hermon_pdhdl_t          pd;
 747         hermon_mrhdl_t          mr;
 748         hermon_bind_info_t      *bind;
 749         uint64_t                value;
 750         int                     status;
 751         uint_t                  shared_mtt;
 752 
 753         /*
 754          * Check the sleep flag.  Ensure that it is consistent with the
 755          * current thread context (i.e. if we are currently in the interrupt
 756          * context, then we shouldn't be attempting to sleep).
 757          */
 758         if ((sleep == HERMON_SLEEP) &&
 759             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
 760                 status = IBT_INVALID_PARAM;
 761                 return (status);
 762         }
 763 
 764         /*
 765          * Pull all the necessary information from the Hermon Memory Region
 766          * handle.  This is necessary here because the resource for the
 767          * MR handle is going to be freed up as part of the this
 768          * deregistration
 769          */
 770         mr      = *mrhdl;
 771         mutex_enter(&mr->mr_lock);
 772         mpt     = mr->mr_mptrsrcp;
 773         mtt     = mr->mr_mttrsrcp;
 774         mtt_refcnt = mr->mr_mttrefcntp;
 775         rsrc    = mr->mr_rsrcp;
 776         pd      = mr->mr_pdhdl;
 777         bind    = &mr->mr_bindinfo;
 778 
 779         /*
 780          * Check here if the memory region is really an FMR.  If so, this is a
 781          * bad thing and we shouldn't be here.  Return failure.
 782          */
 783         if (mr->mr_is_fmr) {
 784                 mutex_exit(&mr->mr_lock);
 785                 return (IBT_INVALID_PARAM);
 786         }
 787 
 788         /*
 789          * Check here to see if the memory region has already been partially
 790          * deregistered as a result of the hermon_umap_umemlock_cb() callback.
 791          * If so, then jump to the end and free the remaining resources.
 792          */
 793         if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
 794                 goto mrdereg_finish_cleanup;
 795         }
 796         if (hermon_rdma_debug & 0x4)
 797                 IBTF_DPRINTF_L2("mr", "dereg: mr %p  key %x",
 798                     mr, mr->mr_rkey);
 799 
 800         /*
 801          * We must drop the "mr_lock" here to ensure that both SLEEP and
 802          * NOSLEEP calls into the firmware work as expected.  Also, if two
 803          * threads are attemping to access this MR (via de-register,
 804          * re-register, or otherwise), then we allow the firmware to enforce
 805          * the checking, that only one deregister is valid.
 806          */
 807         mutex_exit(&mr->mr_lock);
 808 
 809         /*
 810          * Reclaim MPT entry from hardware (if necessary).  Since the
 811          * hermon_mr_deregister() routine is used in the memory region
 812          * reregistration process as well, it is possible that we will
 813          * not always wish to reclaim ownership of the MPT.  Check the
 814          * "level" arg and, if necessary, attempt to reclaim it.  If
 815          * the ownership transfer fails for any reason, we check to see
 816          * what command status was returned from the hardware.  The only
 817          * "expected" error status is the one that indicates an attempt to
 818          * deregister a memory region that has memory windows bound to it
 819          */
 820         if (level >= HERMON_MR_DEREG_ALL) {
 821                 if (mr->mr_mpt_type >= HERMON_MPT_DMPT) {
 822                         status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
 823                             NULL, 0, mpt->hr_indx, sleep);
 824                         if (status != HERMON_CMD_SUCCESS) {
 825                                 if (status == HERMON_CMD_REG_BOUND) {
 826                                         return (IBT_MR_IN_USE);
 827                                 } else {
 828                                         cmn_err(CE_CONT, "Hermon: HW2SW_MPT "
 829                                             "command failed: %08x\n", status);
 830                                         if (status ==
 831                                             HERMON_CMD_INVALID_STATUS) {
 832                                                 hermon_fm_ereport(state,
 833                                                     HCA_SYS_ERR,
 834                                                     DDI_SERVICE_LOST);
 835                                         }
 836                                         return (IBT_INVALID_PARAM);
 837                                 }
 838                         }
 839                 }
 840         }
 841 
 842         /*
 843          * Re-grab the mr_lock here.  Since further access to the protected
 844          * 'mr' structure is needed, and we would have returned previously for
 845          * the multiple deregistration case, we can safely grab the lock here.
 846          */
 847         mutex_enter(&mr->mr_lock);
 848 
 849         /*
 850          * If the memory had come from userland, then we do a lookup in the
 851          * "userland resources database".  On success, we free the entry, call
 852          * ddi_umem_unlock(), and continue the cleanup.  On failure (which is
 853          * an indication that the umem_lockmemory() callback has called
 854          * hermon_mr_deregister()), we call ddi_umem_unlock() and invalidate
 855          * the "mr_umemcookie" field in the MR handle (this will be used
 856          * later to detect that only partial cleaup still remains to be done
 857          * on the MR handle).
 858          */
 859         if (mr->mr_is_umem) {
 860                 status = hermon_umap_db_find(state->hs_instance,
 861                     (uint64_t)(uintptr_t)mr->mr_umemcookie,
 862                     MLNX_UMAP_MRMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
 863                     &umapdb);
 864                 if (status == DDI_SUCCESS) {
 865                         hermon_umap_db_free(umapdb);
 866                         ddi_umem_unlock(mr->mr_umemcookie);
 867                 } else {
 868                         ddi_umem_unlock(mr->mr_umemcookie);
 869                         mr->mr_umemcookie = NULL;
 870                 }
 871         }
 872 
 873         /* mtt_refcnt is NULL in the case of hermon_dma_mr_register() */
 874         if (mtt_refcnt != NULL) {
 875                 /*
 876                  * Decrement the MTT reference count.  Since the MTT resource
 877                  * may be shared between multiple memory regions (as a result
 878                  * of a "RegisterSharedMR" verb) it is important that we not
 879                  * free up or unbind resources prematurely.  If it's not shared
 880                  * (as indicated by the return status), then free the resource.
 881                  */
 882                 shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt);
 883                 if (!shared_mtt) {
 884                         hermon_rsrc_free(state, &mtt_refcnt);
 885                 }
 886 
 887                 /*
 888                  * Free up the MTT entries and unbind the memory.  Here,
 889                  * as above, we attempt to free these resources only if
 890                  * it is appropriate to do so.
 891                  * Note, 'bind' is NULL in the alloc_lkey case.
 892                  */
 893                 if (!shared_mtt) {
 894                         if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) {
 895                                 hermon_mr_mem_unbind(state, bind);
 896                         }
 897                         hermon_rsrc_free(state, &mtt);
 898                 }
 899         }
 900 
 901         /*
 902          * If the MR handle has been invalidated, then drop the
 903          * lock and return success.  Note: This only happens because
 904          * the umem_lockmemory() callback has been triggered.  The
 905          * cleanup here is partial, and further cleanup (in a
 906          * subsequent hermon_mr_deregister() call) will be necessary.
 907          */
 908         if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
 909                 mutex_exit(&mr->mr_lock);
 910                 return (DDI_SUCCESS);
 911         }
 912 
 913 mrdereg_finish_cleanup:
 914         mutex_exit(&mr->mr_lock);
 915 
 916         /* Free the Hermon Memory Region handle */
 917         hermon_rsrc_free(state, &rsrc);
 918 
 919         /* Free up the MPT entry resource */
 920         if (mpt != NULL)
 921                 hermon_rsrc_free(state, &mpt);
 922 
 923         /* Decrement the reference count on the protection domain (PD) */
 924         hermon_pd_refcnt_dec(pd);
 925 
 926         /* Set the mrhdl pointer to NULL and return success */
 927         *mrhdl = NULL;
 928 
 929         return (DDI_SUCCESS);
 930 }
 931 
 932 /*
 933  * hermon_mr_dealloc_fmr()
 934  *    Context: Can be called from interrupt or base context.
 935  */
 936 /* ARGSUSED */
 937 int
 938 hermon_mr_dealloc_fmr(hermon_state_t *state, hermon_mrhdl_t *mrhdl)
 939 {
 940         hermon_rsrc_t           *mpt, *mtt, *rsrc;
 941         hermon_pdhdl_t          pd;
 942         hermon_mrhdl_t          mr;
 943 
 944         /*
 945          * Pull all the necessary information from the Hermon Memory Region
 946          * handle.  This is necessary here because the resource for the
 947          * MR handle is going to be freed up as part of the this
 948          * deregistration
 949          */
 950         mr      = *mrhdl;
 951         mutex_enter(&mr->mr_lock);
 952         mpt     = mr->mr_mptrsrcp;
 953         mtt     = mr->mr_mttrsrcp;
 954         rsrc    = mr->mr_rsrcp;
 955         pd      = mr->mr_pdhdl;
 956         mutex_exit(&mr->mr_lock);
 957 
 958         /* Free the MTT entries */
 959         hermon_rsrc_free(state, &mtt);
 960 
 961         /* Free the Hermon Memory Region handle */
 962         hermon_rsrc_free(state, &rsrc);
 963 
 964         /* Free up the MPT entry resource */
 965         hermon_rsrc_free(state, &mpt);
 966 
 967         /* Decrement the reference count on the protection domain (PD) */
 968         hermon_pd_refcnt_dec(pd);
 969 
 970         /* Set the mrhdl pointer to NULL and return success */
 971         *mrhdl = NULL;
 972 
 973         return (DDI_SUCCESS);
 974 }
 975 
 976 
 977 /*
 978  * hermon_mr_query()
 979  *    Context: Can be called from interrupt or base context.
 980  */
 981 /* ARGSUSED */
 982 int
 983 hermon_mr_query(hermon_state_t *state, hermon_mrhdl_t mr,
 984     ibt_mr_query_attr_t *attr)
 985 {
 986         int                     status;
 987         hermon_hw_dmpt_t        mpt_entry;
 988         uint32_t                lkey;
 989 
 990         mutex_enter(&mr->mr_lock);
 991 
 992         /*
 993          * Check here to see if the memory region has already been partially
 994          * deregistered as a result of a hermon_umap_umemlock_cb() callback.
 995          * If so, this is an error, return failure.
 996          */
 997         if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
 998                 mutex_exit(&mr->mr_lock);
 999                 return (IBT_MR_HDL_INVALID);
1000         }
1001 
1002         status = hermon_cmn_query_cmd_post(state, QUERY_MPT, 0,
1003             mr->mr_lkey >> 8, &mpt_entry, sizeof (hermon_hw_dmpt_t),
1004             HERMON_NOSLEEP);
1005         if (status != HERMON_CMD_SUCCESS) {
1006                 cmn_err(CE_CONT, "Hermon: QUERY_MPT failed: status %x", status);
1007                 mutex_exit(&mr->mr_lock);
1008                 return (ibc_get_ci_failure(0));
1009         }
1010 
1011         /* Update the mr sw struct from the hw struct. */
1012         lkey = mpt_entry.mem_key;
1013         mr->mr_lkey = mr->mr_rkey = (lkey >> 8) | (lkey << 24);
1014         mr->mr_bindinfo.bi_addr = mpt_entry.start_addr;
1015         mr->mr_bindinfo.bi_len = mpt_entry.reg_win_len;
1016         mr->mr_accflag = (mr->mr_accflag & IBT_MR_RO_DISABLED) |
1017             (mpt_entry.lw ? IBT_MR_LOCAL_WRITE : 0) |
1018             (mpt_entry.rr ? IBT_MR_REMOTE_READ : 0) |
1019             (mpt_entry.rw ? IBT_MR_REMOTE_WRITE : 0) |
1020             (mpt_entry.atomic ? IBT_MR_REMOTE_ATOMIC : 0) |
1021             (mpt_entry.en_bind ? IBT_MR_WINDOW_BIND : 0);
1022         mr->mr_mttaddr = ((uint64_t)mpt_entry.mtt_addr_h << 32) |
1023             (mpt_entry.mtt_addr_l << 3);
1024         mr->mr_logmttpgsz = mpt_entry.entity_sz;
1025 
1026         /* Fill in the queried attributes */
1027         attr->mr_lkey_state =
1028             (mpt_entry.status == HERMON_MPT_FREE) ? IBT_KEY_FREE :
1029             (mpt_entry.status == HERMON_MPT_SW_OWNERSHIP) ? IBT_KEY_INVALID :
1030             IBT_KEY_VALID;
1031         attr->mr_phys_buf_list_sz = mpt_entry.mtt_size;
1032         attr->mr_attr_flags = mr->mr_accflag;
1033         attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl;
1034 
1035         /* Fill in the "local" attributes */
1036         attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;
1037         attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1038         attr->mr_lbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;
1039 
1040         /*
1041          * Fill in the "remote" attributes (if necessary).  Note: the
1042          * remote attributes are only valid if the memory region has one
1043          * or more of the remote access flags set.
1044          */
1045         if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
1046             (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
1047             (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
1048                 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey;
1049                 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1050                 attr->mr_rbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;
1051         }
1052 
1053         /*
1054          * If region is mapped for streaming (i.e. noncoherent), then set sync
1055          * is required
1056          */
1057         attr->mr_sync_required = (mr->mr_bindinfo.bi_flags &
1058             IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE;
1059 
1060         mutex_exit(&mr->mr_lock);
1061         return (DDI_SUCCESS);
1062 }
1063 
1064 
1065 /*
1066  * hermon_mr_reregister()
1067  *    Context: Can be called from interrupt or base context.
1068  */
1069 int
1070 hermon_mr_reregister(hermon_state_t *state, hermon_mrhdl_t mr,
1071     hermon_pdhdl_t pd, ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new,
1072     hermon_mr_options_t *op)
1073 {
1074         hermon_bind_info_t      bind;
1075         int                     status;
1076 
1077         /*
1078          * Fill in the "bind" struct.  This struct provides the majority
1079          * of the information that will be used to distinguish between an
1080          * "addr" binding (as is the case here) and a "buf" binding (see
1081          * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
1082          * which does most of the "heavy lifting" for the Hermon memory
1083          * registration (and reregistration) routines.
1084          */
1085         bind.bi_type  = HERMON_BINDHDL_VADDR;
1086         bind.bi_addr  = mr_attr->mr_vaddr;
1087         bind.bi_len   = mr_attr->mr_len;
1088         bind.bi_as    = mr_attr->mr_as;
1089         bind.bi_flags = mr_attr->mr_flags;
1090         status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1091         return (status);
1092 }
1093 
1094 
1095 /*
1096  * hermon_mr_reregister_buf()
1097  *    Context: Can be called from interrupt or base context.
1098  */
1099 int
1100 hermon_mr_reregister_buf(hermon_state_t *state, hermon_mrhdl_t mr,
1101     hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf,
1102     hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op)
1103 {
1104         hermon_bind_info_t      bind;
1105         int                     status;
1106 
1107         /*
1108          * Fill in the "bind" struct.  This struct provides the majority
1109          * of the information that will be used to distinguish between an
1110          * "addr" binding (see above) and a "buf" binding (as is the case
1111          * here).  The "bind" struct is later passed to hermon_mr_mem_bind()
1112          * which does most of the "heavy lifting" for the Hermon memory
1113          * registration routines.  Note: We have chosen to provide
1114          * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
1115          * not set).  It is not critical what value we choose here as it need
1116          * only be unique for the given RKey (which will happen by default),
1117          * so the choice here is somewhat arbitrary.
1118          */
1119         bind.bi_type  = HERMON_BINDHDL_BUF;
1120         bind.bi_buf   = buf;
1121         if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
1122                 bind.bi_addr  = mr_attr->mr_vaddr;
1123         } else {
1124                 bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
1125         }
1126         bind.bi_len   = (uint64_t)buf->b_bcount;
1127         bind.bi_flags = mr_attr->mr_flags;
1128         bind.bi_as    = NULL;
1129         status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1130         return (status);
1131 }
1132 
1133 
1134 /*
1135  * hermon_mr_sync()
1136  *    Context: Can be called from interrupt or base context.
1137  */
1138 /* ARGSUSED */
1139 int
1140 hermon_mr_sync(hermon_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs)
1141 {
1142         hermon_mrhdl_t          mrhdl;
1143         uint64_t                seg_vaddr, seg_len, seg_end;
1144         uint64_t                mr_start, mr_end;
1145         uint_t                  type;
1146         int                     status, i;
1147 
1148         /* Process each of the ibt_mr_sync_t's */
1149         for (i = 0; i < num_segs; i++) {
1150                 mrhdl = (hermon_mrhdl_t)mr_segs[i].ms_handle;
1151 
1152                 /* Check for valid memory region handle */
1153                 if (mrhdl == NULL) {
1154                         status = IBT_MR_HDL_INVALID;
1155                         goto mrsync_fail;
1156                 }
1157 
1158                 mutex_enter(&mrhdl->mr_lock);
1159 
1160                 /*
1161                  * Check here to see if the memory region has already been
1162                  * partially deregistered as a result of a
1163                  * hermon_umap_umemlock_cb() callback.  If so, this is an
1164                  * error, return failure.
1165                  */
1166                 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
1167                         mutex_exit(&mrhdl->mr_lock);
1168                         status = IBT_MR_HDL_INVALID;
1169                         goto mrsync_fail;
1170                 }
1171 
1172                 /* Check for valid bounds on sync request */
1173                 seg_vaddr = mr_segs[i].ms_vaddr;
1174                 seg_len   = mr_segs[i].ms_len;
1175                 seg_end   = seg_vaddr + seg_len - 1;
1176                 mr_start  = mrhdl->mr_bindinfo.bi_addr;
1177                 mr_end    = mr_start + mrhdl->mr_bindinfo.bi_len - 1;
1178                 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) {
1179                         mutex_exit(&mrhdl->mr_lock);
1180                         status = IBT_MR_VA_INVALID;
1181                         goto mrsync_fail;
1182                 }
1183                 if ((seg_end < mr_start) || (seg_end > mr_end)) {
1184                         mutex_exit(&mrhdl->mr_lock);
1185                         status = IBT_MR_LEN_INVALID;
1186                         goto mrsync_fail;
1187                 }
1188 
1189                 /* Determine what type (i.e. direction) for sync */
1190                 if (mr_segs[i].ms_flags & IBT_SYNC_READ) {
1191                         type = DDI_DMA_SYNC_FORDEV;
1192                 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) {
1193                         type = DDI_DMA_SYNC_FORCPU;
1194                 } else {
1195                         mutex_exit(&mrhdl->mr_lock);
1196                         status = IBT_INVALID_PARAM;
1197                         goto mrsync_fail;
1198                 }
1199 
1200                 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl,
1201                     (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type);
1202 
1203                 mutex_exit(&mrhdl->mr_lock);
1204         }
1205 
1206         return (DDI_SUCCESS);
1207 
1208 mrsync_fail:
1209         return (status);
1210 }
1211 
1212 
1213 /*
1214  * hermon_mw_alloc()
1215  *    Context: Can be called from interrupt or base context.
1216  */
1217 int
1218 hermon_mw_alloc(hermon_state_t *state, hermon_pdhdl_t pd, ibt_mw_flags_t flags,
1219     hermon_mwhdl_t *mwhdl)
1220 {
1221         hermon_rsrc_t           *mpt, *rsrc;
1222         hermon_hw_dmpt_t                mpt_entry;
1223         hermon_mwhdl_t          mw;
1224         uint_t                  sleep;
1225         int                     status;
1226 
1227         if (state != NULL)      /* XXX - bogus test that is always TRUE */
1228                 return (IBT_INSUFF_RESOURCE);
1229 
1230         /*
1231          * Check the sleep flag.  Ensure that it is consistent with the
1232          * current thread context (i.e. if we are currently in the interrupt
1233          * context, then we shouldn't be attempting to sleep).
1234          */
1235         sleep = (flags & IBT_MW_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP;
1236         if ((sleep == HERMON_SLEEP) &&
1237             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1238                 status = IBT_INVALID_PARAM;
1239                 goto mwalloc_fail;
1240         }
1241 
1242         /* Increment the reference count on the protection domain (PD) */
1243         hermon_pd_refcnt_inc(pd);
1244 
1245         /*
1246          * Allocate an MPT entry (for use as a memory window).  Since the
1247          * Hermon hardware uses the MPT entry for memory regions and for
1248          * memory windows, we will fill in this MPT with all the necessary
1249          * parameters for the memory window.  And then (just as we do for
1250          * memory regions) ownership will be passed to the hardware in the
1251          * final step below.  If we fail here, we must undo the protection
1252          * domain reference count.
1253          */
1254         status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1255         if (status != DDI_SUCCESS) {
1256                 status = IBT_INSUFF_RESOURCE;
1257                 goto mwalloc_fail1;
1258         }
1259 
1260         /*
1261          * Allocate the software structure for tracking the memory window (i.e.
1262          * the Hermon Memory Window handle).  Note: This is actually the same
1263          * software structure used for tracking memory regions, but since many
1264          * of the same properties are needed, only a single structure is
1265          * necessary.  If we fail here, we must undo the protection domain
1266          * reference count and the previous resource allocation.
1267          */
1268         status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1269         if (status != DDI_SUCCESS) {
1270                 status = IBT_INSUFF_RESOURCE;
1271                 goto mwalloc_fail2;
1272         }
1273         mw = (hermon_mwhdl_t)rsrc->hr_addr;
1274 
1275         /*
1276          * Calculate an "unbound" RKey from MPT index.  In much the same way
1277          * as we do for memory regions (above), this key is constructed from
1278          * a "constrained" (which depends on the MPT index) and an
1279          * "unconstrained" portion (which may be arbitrarily chosen).
1280          */
1281         mw->mr_rkey = hermon_mr_keycalc(mpt->hr_indx);
1282 
1283         /*
1284          * Fill in the MPT entry.  This is the final step before passing
1285          * ownership of the MPT entry to the Hermon hardware.  We use all of
1286          * the information collected/calculated above to fill in the
1287          * requisite portions of the MPT.  Note: fewer entries in the MPT
1288          * entry are necessary to allocate a memory window.
1289          */
1290         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1291         mpt_entry.reg_win       = HERMON_MPT_IS_WINDOW;
1292         mpt_entry.mem_key       = mw->mr_rkey;
1293         mpt_entry.pd            = pd->pd_pdnum;
1294         mpt_entry.lr            = 1;
1295 
1296         /*
1297          * Write the MPT entry to hardware.  Lastly, we pass ownership of
1298          * the entry to the hardware.  Note: in general, this operation
1299          * shouldn't fail.  But if it does, we have to undo everything we've
1300          * done above before returning error.
1301          */
1302         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1303             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1304         if (status != HERMON_CMD_SUCCESS) {
1305                 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1306                     status);
1307                 if (status == HERMON_CMD_INVALID_STATUS) {
1308                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1309                 }
1310                 status = ibc_get_ci_failure(0);
1311                 goto mwalloc_fail3;
1312         }
1313 
1314         /*
1315          * Fill in the rest of the Hermon Memory Window handle.  Having
1316          * successfully transferred ownership of the MPT, we can update the
1317          * following fields for use in further operations on the MW.
1318          */
1319         mw->mr_mptrsrcp      = mpt;
1320         mw->mr_pdhdl = pd;
1321         mw->mr_rsrcp = rsrc;
1322         mw->mr_rkey  = hermon_mr_key_swap(mw->mr_rkey);
1323         *mwhdl = mw;
1324 
1325         return (DDI_SUCCESS);
1326 
1327 mwalloc_fail3:
1328         hermon_rsrc_free(state, &rsrc);
1329 mwalloc_fail2:
1330         hermon_rsrc_free(state, &mpt);
1331 mwalloc_fail1:
1332         hermon_pd_refcnt_dec(pd);
1333 mwalloc_fail:
1334         return (status);
1335 }
1336 
1337 
1338 /*
1339  * hermon_mw_free()
1340  *    Context: Can be called from interrupt or base context.
1341  */
1342 int
1343 hermon_mw_free(hermon_state_t *state, hermon_mwhdl_t *mwhdl, uint_t sleep)
1344 {
1345         hermon_rsrc_t           *mpt, *rsrc;
1346         hermon_mwhdl_t          mw;
1347         int                     status;
1348         hermon_pdhdl_t          pd;
1349 
1350         /*
1351          * Check the sleep flag.  Ensure that it is consistent with the
1352          * current thread context (i.e. if we are currently in the interrupt
1353          * context, then we shouldn't be attempting to sleep).
1354          */
1355         if ((sleep == HERMON_SLEEP) &&
1356             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1357                 status = IBT_INVALID_PARAM;
1358                 return (status);
1359         }
1360 
1361         /*
1362          * Pull all the necessary information from the Hermon Memory Window
1363          * handle.  This is necessary here because the resource for the
1364          * MW handle is going to be freed up as part of the this operation.
1365          */
1366         mw      = *mwhdl;
1367         mutex_enter(&mw->mr_lock);
1368         mpt     = mw->mr_mptrsrcp;
1369         rsrc    = mw->mr_rsrcp;
1370         pd      = mw->mr_pdhdl;
1371         mutex_exit(&mw->mr_lock);
1372 
1373         /*
1374          * Reclaim the MPT entry from hardware.  Note: in general, it is
1375          * unexpected for this operation to return an error.
1376          */
1377         status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
1378             0, mpt->hr_indx, sleep);
1379         if (status != HERMON_CMD_SUCCESS) {
1380                 cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: %08x\n",
1381                     status);
1382                 if (status == HERMON_CMD_INVALID_STATUS) {
1383                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1384                 }
1385                 return (ibc_get_ci_failure(0));
1386         }
1387 
1388         /* Free the Hermon Memory Window handle */
1389         hermon_rsrc_free(state, &rsrc);
1390 
1391         /* Free up the MPT entry resource */
1392         hermon_rsrc_free(state, &mpt);
1393 
1394         /* Decrement the reference count on the protection domain (PD) */
1395         hermon_pd_refcnt_dec(pd);
1396 
1397         /* Set the mwhdl pointer to NULL and return success */
1398         *mwhdl = NULL;
1399 
1400         return (DDI_SUCCESS);
1401 }
1402 
1403 
1404 /*
1405  * hermon_mr_keycalc()
1406  *    Context: Can be called from interrupt or base context.
1407  *    NOTE:  Produces a key in the form of
1408  *              KKKKKKKK IIIIIIII IIIIIIII IIIIIIIII
1409  *    where K == the arbitrary bits and I == the index
1410  */
1411 uint32_t
1412 hermon_mr_keycalc(uint32_t indx)
1413 {
1414         uint32_t tmp_key, tmp_indx;
1415 
1416         /*
1417          * Generate a simple key from counter.  Note:  We increment this
1418          * static variable _intentionally_ without any kind of mutex around
1419          * it.  First, single-threading all operations through a single lock
1420          * would be a bad idea (from a performance point-of-view).  Second,
1421          * the upper "unconstrained" bits don't really have to be unique
1422          * because the lower bits are guaranteed to be (although we do make a
1423          * best effort to ensure that they are).  Third, the window for the
1424          * race (where both threads read and update the counter at the same
1425          * time) is incredibly small.
1426          * And, lastly, we'd like to make this into a "random" key
1427          */
1428         tmp_key = (hermon_memkey_cnt++) << HERMON_MEMKEY_SHIFT;
1429         tmp_indx = indx & 0xffffff;
1430         return (tmp_key | tmp_indx);
1431 }
1432 
1433 
1434 /*
1435  * hermon_mr_key_swap()
1436  *    Context: Can be called from interrupt or base context.
1437  *    NOTE:  Produces a key in the form of
1438  *              IIIIIIII IIIIIIII IIIIIIIII KKKKKKKK
1439  *    where K == the arbitrary bits and I == the index
1440  */
1441 uint32_t
1442 hermon_mr_key_swap(uint32_t indx)
1443 {
1444         /*
1445          * The memory key format to pass down to the hardware is
1446          * (key[7:0],index[23:0]), which defines the index to the
1447          * hardware resource. When the driver passes this as a memory
1448          * key, (i.e. to retrieve a resource) the format is
1449          * (index[23:0],key[7:0]).
1450          */
1451         return (((indx >> 24) & 0x000000ff) | ((indx << 8) & 0xffffff00));
1452 }
1453 
1454 /*
1455  * hermon_mr_common_reg()
1456  *    Context: Can be called from interrupt or base context.
1457  */
1458 static int
1459 hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
1460     hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
1461     hermon_mpt_rsrc_type_t mpt_type)
1462 {
1463         hermon_rsrc_t           *mpt, *mtt, *rsrc, *mtt_refcnt;
1464         hermon_umap_db_entry_t  *umapdb;
1465         hermon_sw_refcnt_t      *swrc_tmp;
1466         hermon_hw_dmpt_t        mpt_entry;
1467         hermon_mrhdl_t          mr;
1468         ibt_mr_flags_t          flags;
1469         hermon_bind_info_t      *bh;
1470         ddi_dma_handle_t        bind_dmahdl;
1471         ddi_umem_cookie_t       umem_cookie;
1472         size_t                  umem_len;
1473         caddr_t                 umem_addr;
1474         uint64_t                mtt_addr, max_sz;
1475         uint_t                  sleep, mtt_pgsize_bits, bind_type, mr_is_umem;
1476         int                     status, umem_flags, bind_override_addr;
1477 
1478         /*
1479          * Check the "options" flag.  Currently this flag tells the driver
1480          * whether or not the region should be bound normally (i.e. with
1481          * entries written into the PCI IOMMU), whether it should be
1482          * registered to bypass the IOMMU, and whether or not the resulting
1483          * address should be "zero-based" (to aid the alignment restrictions
1484          * for QPs).
1485          */
1486         if (op == NULL) {
1487                 bind_type   = HERMON_BINDMEM_NORMAL;
1488                 bind_dmahdl = NULL;
1489                 bind_override_addr = 0;
1490         } else {
1491                 bind_type          = op->mro_bind_type;
1492                 bind_dmahdl        = op->mro_bind_dmahdl;
1493                 bind_override_addr = op->mro_bind_override_addr;
1494         }
1495 
1496         /* check what kind of mpt to use */
1497 
1498         /* Extract the flags field from the hermon_bind_info_t */
1499         flags = bind->bi_flags;
1500 
1501         /*
1502          * Check for invalid length.  Check is the length is zero or if the
1503          * length is larger than the maximum configured value.  Return error
1504          * if it is.
1505          */
1506         max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
1507         if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
1508                 status = IBT_MR_LEN_INVALID;
1509                 goto mrcommon_fail;
1510         }
1511 
1512         /*
1513          * Check the sleep flag.  Ensure that it is consistent with the
1514          * current thread context (i.e. if we are currently in the interrupt
1515          * context, then we shouldn't be attempting to sleep).
1516          */
1517         sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1518         if ((sleep == HERMON_SLEEP) &&
1519             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1520                 status = IBT_INVALID_PARAM;
1521                 goto mrcommon_fail;
1522         }
1523 
1524         /* Increment the reference count on the protection domain (PD) */
1525         hermon_pd_refcnt_inc(pd);
1526 
1527         /*
1528          * Allocate an MPT entry.  This will be filled in with all the
1529          * necessary parameters to define the memory region.  And then
1530          * ownership will be passed to the hardware in the final step
1531          * below.  If we fail here, we must undo the protection domain
1532          * reference count.
1533          */
1534         if (mpt_type == HERMON_MPT_DMPT) {
1535                 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1536                 if (status != DDI_SUCCESS) {
1537                         status = IBT_INSUFF_RESOURCE;
1538                         goto mrcommon_fail1;
1539                 }
1540         } else {
1541                 mpt = NULL;
1542         }
1543 
1544         /*
1545          * Allocate the software structure for tracking the memory region (i.e.
1546          * the Hermon Memory Region handle).  If we fail here, we must undo
1547          * the protection domain reference count and the previous resource
1548          * allocation.
1549          */
1550         status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1551         if (status != DDI_SUCCESS) {
1552                 status = IBT_INSUFF_RESOURCE;
1553                 goto mrcommon_fail2;
1554         }
1555         mr = (hermon_mrhdl_t)rsrc->hr_addr;
1556 
1557         /*
1558          * Setup and validate the memory region access flags.  This means
1559          * translating the IBTF's enable flags into the access flags that
1560          * will be used in later operations.
1561          */
1562         mr->mr_accflag = 0;
1563         if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1564                 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1565         if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1566                 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1567         if (flags & IBT_MR_ENABLE_REMOTE_READ)
1568                 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1569         if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1570                 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1571         if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1572                 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1573 
1574         /*
1575          * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
1576          * from a certain number of "constrained" bits (the least significant
1577          * bits) and some number of "unconstrained" bits.  The constrained
1578          * bits must be set to the index of the entry in the MPT table, but
1579          * the unconstrained bits can be set to any value we wish.  Note:
1580          * if no remote access is required, then the RKey value is not filled
1581          * in.  Otherwise both Rkey and LKey are given the same value.
1582          */
1583         if (mpt)
1584                 mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
1585 
1586         /*
1587          * Determine if the memory is from userland and pin the pages
1588          * with umem_lockmemory() if necessary.
1589          * Then, if this is userland memory, allocate an entry in the
1590          * "userland resources database".  This will later be added to
1591          * the database (after all further memory registration operations are
1592          * successful).  If we fail here, we must undo the reference counts
1593          * and the previous resource allocations.
1594          */
1595         mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
1596         if (mr_is_umem) {
1597                 umem_len   = ptob(btopr(bind->bi_len +
1598                     ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
1599                 umem_addr  = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
1600                 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
1601                     DDI_UMEMLOCK_LONGTERM);
1602                 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
1603                     &umem_cookie, &hermon_umem_cbops, NULL);
1604                 if (status != 0) {
1605                         status = IBT_INSUFF_RESOURCE;
1606                         goto mrcommon_fail3;
1607                 }
1608 
1609                 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
1610                     B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
1611                 if (bind->bi_buf == NULL) {
1612                         status = IBT_INSUFF_RESOURCE;
1613                         goto mrcommon_fail3;
1614                 }
1615                 bind->bi_type = HERMON_BINDHDL_UBUF;
1616                 bind->bi_buf->b_flags |= B_READ;
1617 
1618                 umapdb = hermon_umap_db_alloc(state->hs_instance,
1619                     (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
1620                     (uint64_t)(uintptr_t)rsrc);
1621                 if (umapdb == NULL) {
1622                         status = IBT_INSUFF_RESOURCE;
1623                         goto mrcommon_fail4;
1624                 }
1625         }
1626 
1627         /*
1628          * Setup the bindinfo for the mtt bind call
1629          */
1630         bh = &mr->mr_bindinfo;
1631         bcopy(bind, bh, sizeof (hermon_bind_info_t));
1632         bh->bi_bypass = bind_type;
1633         status = hermon_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
1634             &mtt_pgsize_bits, mpt != NULL);
1635         if (status != DDI_SUCCESS) {
1636                 /*
1637                  * When mtt_bind fails, freerbuf has already been done,
1638                  * so make sure not to call it again.
1639                  */
1640                 bind->bi_type = bh->bi_type;
1641                 goto mrcommon_fail5;
1642         }
1643         mr->mr_logmttpgsz = mtt_pgsize_bits;
1644 
1645         /*
1646          * Allocate MTT reference count (to track shared memory regions).
1647          * This reference count resource may never be used on the given
1648          * memory region, but if it is ever later registered as "shared"
1649          * memory region then this resource will be necessary.  If we fail
1650          * here, we do pretty much the same as above to clean up.
1651          */
1652         status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
1653             &mtt_refcnt);
1654         if (status != DDI_SUCCESS) {
1655                 status = IBT_INSUFF_RESOURCE;
1656                 goto mrcommon_fail6;
1657         }
1658         mr->mr_mttrefcntp = mtt_refcnt;
1659         swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
1660         HERMON_MTT_REFCNT_INIT(swrc_tmp);
1661 
1662         mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
1663 
1664         /*
1665          * Fill in the MPT entry.  This is the final step before passing
1666          * ownership of the MPT entry to the Hermon hardware.  We use all of
1667          * the information collected/calculated above to fill in the
1668          * requisite portions of the MPT.  Do this ONLY for DMPTs.
1669          */
1670         if (mpt == NULL)
1671                 goto no_passown;
1672 
1673         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1674 
1675         mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
1676         mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1677         mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1678         mpt_entry.rw      = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1679         mpt_entry.rr      = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
1680         mpt_entry.lw      = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1681         mpt_entry.lr      = 1;
1682         mpt_entry.phys_addr = 0;
1683         mpt_entry.reg_win = HERMON_MPT_IS_REGION;
1684 
1685         mpt_entry.entity_sz     = mr->mr_logmttpgsz;
1686         mpt_entry.mem_key       = mr->mr_lkey;
1687         mpt_entry.pd            = pd->pd_pdnum;
1688         mpt_entry.rem_acc_en = 0;
1689         mpt_entry.fast_reg_en = 0;
1690         mpt_entry.en_inval = 0;
1691         mpt_entry.lkey = 0;
1692         mpt_entry.win_cnt = 0;
1693 
1694         if (bind_override_addr == 0) {
1695                 mpt_entry.start_addr = bh->bi_addr;
1696         } else {
1697                 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1);
1698                 mpt_entry.start_addr = bh->bi_addr;
1699         }
1700         mpt_entry.reg_win_len   = bh->bi_len;
1701 
1702         mpt_entry.mtt_addr_h = mtt_addr >> 32;  /* only 8 more bits */
1703         mpt_entry.mtt_addr_l = mtt_addr >> 3;     /* only 29 bits */
1704 
1705         /*
1706          * Write the MPT entry to hardware.  Lastly, we pass ownership of
1707          * the entry to the hardware if needed.  Note: in general, this
1708          * operation shouldn't fail.  But if it does, we have to undo
1709          * everything we've done above before returning error.
1710          *
1711          * For Hermon, this routine (which is common to the contexts) will only
1712          * set the ownership if needed - the process of passing the context
1713          * itself to HW will take care of setting up the MPT (based on type
1714          * and index).
1715          */
1716 
1717         mpt_entry.bnd_qp = 0;   /* dMPT for a qp, check for window */
1718         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1719             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1720         if (status != HERMON_CMD_SUCCESS) {
1721                 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1722                     status);
1723                 if (status == HERMON_CMD_INVALID_STATUS) {
1724                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1725                 }
1726                 status = ibc_get_ci_failure(0);
1727                 goto mrcommon_fail7;
1728         }
1729         if (hermon_rdma_debug & 0x4)
1730                 IBTF_DPRINTF_L2("mr", "  reg: mr %p  key %x",
1731                     mr, hermon_mr_key_swap(mr->mr_rkey));
1732 no_passown:
1733 
1734         /*
1735          * Fill in the rest of the Hermon Memory Region handle.  Having
1736          * successfully transferred ownership of the MPT, we can update the
1737          * following fields for use in further operations on the MR.
1738          */
1739         mr->mr_mttaddr          = mtt_addr;
1740 
1741         mr->mr_log2_pgsz   = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
1742         mr->mr_mptrsrcp         = mpt;
1743         mr->mr_mttrsrcp         = mtt;
1744         mr->mr_pdhdl    = pd;
1745         mr->mr_rsrcp    = rsrc;
1746         mr->mr_is_umem          = mr_is_umem;
1747         mr->mr_is_fmr           = 0;
1748         mr->mr_umemcookie  = (mr_is_umem != 0) ? umem_cookie : NULL;
1749         mr->mr_umem_cbfunc = NULL;
1750         mr->mr_umem_cbarg1 = NULL;
1751         mr->mr_umem_cbarg2 = NULL;
1752         mr->mr_lkey     = hermon_mr_key_swap(mr->mr_lkey);
1753         mr->mr_rkey     = hermon_mr_key_swap(mr->mr_rkey);
1754         mr->mr_mpt_type         = mpt_type;
1755 
1756         /*
1757          * If this is userland memory, then we need to insert the previously
1758          * allocated entry into the "userland resources database".  This will
1759          * allow for later coordination between the hermon_umap_umemlock_cb()
1760          * callback and hermon_mr_deregister().
1761          */
1762         if (mr_is_umem) {
1763                 hermon_umap_db_add(umapdb);
1764         }
1765 
1766         *mrhdl = mr;
1767 
1768         return (DDI_SUCCESS);
1769 
1770 /*
1771  * The following is cleanup for all possible failure cases in this routine
1772  */
1773 mrcommon_fail7:
1774         hermon_rsrc_free(state, &mtt_refcnt);
1775 mrcommon_fail6:
1776         hermon_mr_mem_unbind(state, bh);
1777         bind->bi_type = bh->bi_type;
1778 mrcommon_fail5:
1779         if (mr_is_umem) {
1780                 hermon_umap_db_free(umapdb);
1781         }
1782 mrcommon_fail4:
1783         if (mr_is_umem) {
1784                 /*
1785                  * Free up the memory ddi_umem_iosetup() allocates
1786                  * internally.
1787                  */
1788                 if (bind->bi_type == HERMON_BINDHDL_UBUF) {
1789                         freerbuf(bind->bi_buf);
1790                         bind->bi_type = HERMON_BINDHDL_NONE;
1791                 }
1792                 ddi_umem_unlock(umem_cookie);
1793         }
1794 mrcommon_fail3:
1795         hermon_rsrc_free(state, &rsrc);
1796 mrcommon_fail2:
1797         if (mpt != NULL)
1798                 hermon_rsrc_free(state, &mpt);
1799 mrcommon_fail1:
1800         hermon_pd_refcnt_dec(pd);
1801 mrcommon_fail:
1802         return (status);
1803 }
1804 
1805 /*
1806  * hermon_dma_mr_register()
1807  *    Context: Can be called from base context.
1808  */
1809 int
1810 hermon_dma_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
1811     ibt_dmr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl)
1812 {
1813         hermon_rsrc_t           *mpt, *rsrc;
1814         hermon_hw_dmpt_t        mpt_entry;
1815         hermon_mrhdl_t          mr;
1816         ibt_mr_flags_t          flags;
1817         uint_t                  sleep;
1818         int                     status;
1819 
1820         /* Extract the flags field */
1821         flags = mr_attr->dmr_flags;
1822 
1823         /*
1824          * Check the sleep flag.  Ensure that it is consistent with the
1825          * current thread context (i.e. if we are currently in the interrupt
1826          * context, then we shouldn't be attempting to sleep).
1827          */
1828         sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1829         if ((sleep == HERMON_SLEEP) &&
1830             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1831                 status = IBT_INVALID_PARAM;
1832                 goto mrcommon_fail;
1833         }
1834 
1835         /* Increment the reference count on the protection domain (PD) */
1836         hermon_pd_refcnt_inc(pd);
1837 
1838         /*
1839          * Allocate an MPT entry.  This will be filled in with all the
1840          * necessary parameters to define the memory region.  And then
1841          * ownership will be passed to the hardware in the final step
1842          * below.  If we fail here, we must undo the protection domain
1843          * reference count.
1844          */
1845         status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1846         if (status != DDI_SUCCESS) {
1847                 status = IBT_INSUFF_RESOURCE;
1848                 goto mrcommon_fail1;
1849         }
1850 
1851         /*
1852          * Allocate the software structure for tracking the memory region (i.e.
1853          * the Hermon Memory Region handle).  If we fail here, we must undo
1854          * the protection domain reference count and the previous resource
1855          * allocation.
1856          */
1857         status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1858         if (status != DDI_SUCCESS) {
1859                 status = IBT_INSUFF_RESOURCE;
1860                 goto mrcommon_fail2;
1861         }
1862         mr = (hermon_mrhdl_t)rsrc->hr_addr;
1863         bzero(mr, sizeof (*mr));
1864 
1865         /*
1866          * Setup and validate the memory region access flags.  This means
1867          * translating the IBTF's enable flags into the access flags that
1868          * will be used in later operations.
1869          */
1870         mr->mr_accflag = 0;
1871         if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1872                 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1873         if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1874                 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1875         if (flags & IBT_MR_ENABLE_REMOTE_READ)
1876                 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1877         if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1878                 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1879         if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1880                 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1881 
1882         /*
1883          * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
1884          * from a certain number of "constrained" bits (the least significant
1885          * bits) and some number of "unconstrained" bits.  The constrained
1886          * bits must be set to the index of the entry in the MPT table, but
1887          * the unconstrained bits can be set to any value we wish.  Note:
1888          * if no remote access is required, then the RKey value is not filled
1889          * in.  Otherwise both Rkey and LKey are given the same value.
1890          */
1891         if (mpt)
1892                 mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
1893 
1894         /*
1895          * Fill in the MPT entry.  This is the final step before passing
1896          * ownership of the MPT entry to the Hermon hardware.  We use all of
1897          * the information collected/calculated above to fill in the
1898          * requisite portions of the MPT.  Do this ONLY for DMPTs.
1899          */
1900         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1901 
1902         mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
1903         mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1904         mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1905         mpt_entry.rw      = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1906         mpt_entry.rr      = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
1907         mpt_entry.lw      = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1908         mpt_entry.lr      = 1;
1909         mpt_entry.phys_addr = 1;        /* critical bit for this */
1910         mpt_entry.reg_win = HERMON_MPT_IS_REGION;
1911 
1912         mpt_entry.entity_sz     = mr->mr_logmttpgsz;
1913         mpt_entry.mem_key       = mr->mr_lkey;
1914         mpt_entry.pd            = pd->pd_pdnum;
1915         mpt_entry.rem_acc_en = 0;
1916         mpt_entry.fast_reg_en = 0;
1917         mpt_entry.en_inval = 0;
1918         mpt_entry.lkey = 0;
1919         mpt_entry.win_cnt = 0;
1920 
1921         mpt_entry.start_addr = mr_attr->dmr_paddr;
1922         mpt_entry.reg_win_len = mr_attr->dmr_len;
1923         if (mr_attr->dmr_len == 0)
1924                 mpt_entry.len_b64 = 1;  /* needed for 2^^64 length */
1925 
1926         mpt_entry.mtt_addr_h = 0;
1927         mpt_entry.mtt_addr_l = 0;
1928 
1929         /*
1930          * Write the MPT entry to hardware.  Lastly, we pass ownership of
1931          * the entry to the hardware if needed.  Note: in general, this
1932          * operation shouldn't fail.  But if it does, we have to undo
1933          * everything we've done above before returning error.
1934          *
1935          * For Hermon, this routine (which is common to the contexts) will only
1936          * set the ownership if needed - the process of passing the context
1937          * itself to HW will take care of setting up the MPT (based on type
1938          * and index).
1939          */
1940 
1941         mpt_entry.bnd_qp = 0;   /* dMPT for a qp, check for window */
1942         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1943             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1944         if (status != HERMON_CMD_SUCCESS) {
1945                 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1946                     status);
1947                 if (status == HERMON_CMD_INVALID_STATUS) {
1948                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1949                 }
1950                 status = ibc_get_ci_failure(0);
1951                 goto mrcommon_fail7;
1952         }
1953 
1954         /*
1955          * Fill in the rest of the Hermon Memory Region handle.  Having
1956          * successfully transferred ownership of the MPT, we can update the
1957          * following fields for use in further operations on the MR.
1958          */
1959         mr->mr_mttaddr          = 0;
1960 
1961         mr->mr_log2_pgsz   = 0;
1962         mr->mr_mptrsrcp         = mpt;
1963         mr->mr_mttrsrcp         = NULL;
1964         mr->mr_pdhdl    = pd;
1965         mr->mr_rsrcp    = rsrc;
1966         mr->mr_is_umem          = 0;
1967         mr->mr_is_fmr           = 0;
1968         mr->mr_umemcookie  = NULL;
1969         mr->mr_umem_cbfunc = NULL;
1970         mr->mr_umem_cbarg1 = NULL;
1971         mr->mr_umem_cbarg2 = NULL;
1972         mr->mr_lkey     = hermon_mr_key_swap(mr->mr_lkey);
1973         mr->mr_rkey     = hermon_mr_key_swap(mr->mr_rkey);
1974         mr->mr_mpt_type         = HERMON_MPT_DMPT;
1975 
1976         *mrhdl = mr;
1977 
1978         return (DDI_SUCCESS);
1979 
1980 /*
1981  * The following is cleanup for all possible failure cases in this routine
1982  */
1983 mrcommon_fail7:
1984         hermon_rsrc_free(state, &rsrc);
1985 mrcommon_fail2:
1986         hermon_rsrc_free(state, &mpt);
1987 mrcommon_fail1:
1988         hermon_pd_refcnt_dec(pd);
1989 mrcommon_fail:
1990         return (status);
1991 }
1992 
1993 /*
1994  * hermon_mr_alloc_lkey()
1995  *    Context: Can be called from base context.
1996  */
1997 int
1998 hermon_mr_alloc_lkey(hermon_state_t *state, hermon_pdhdl_t pd,
1999     ibt_lkey_flags_t flags, uint_t nummtt, hermon_mrhdl_t *mrhdl)
2000 {
2001         hermon_rsrc_t           *mpt, *mtt, *rsrc, *mtt_refcnt;
2002         hermon_sw_refcnt_t      *swrc_tmp;
2003         hermon_hw_dmpt_t        mpt_entry;
2004         hermon_mrhdl_t          mr;
2005         uint64_t                mtt_addr;
2006         uint_t                  sleep;
2007         int                     status;
2008 
2009         /* Increment the reference count on the protection domain (PD) */
2010         hermon_pd_refcnt_inc(pd);
2011 
2012         sleep = (flags & IBT_KEY_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
2013 
2014         /*
2015          * Allocate an MPT entry.  This will be filled in with "some" of the
2016          * necessary parameters to define the memory region.  And then
2017          * ownership will be passed to the hardware in the final step
2018          * below.  If we fail here, we must undo the protection domain
2019          * reference count.
2020          *
2021          * The MTTs will get filled in when the FRWR is processed.
2022          */
2023         status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
2024         if (status != DDI_SUCCESS) {
2025                 status = IBT_INSUFF_RESOURCE;
2026                 goto alloclkey_fail1;
2027         }
2028 
2029         /*
2030          * Allocate the software structure for tracking the memory region (i.e.
2031          * the Hermon Memory Region handle).  If we fail here, we must undo
2032          * the protection domain reference count and the previous resource
2033          * allocation.
2034          */
2035         status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
2036         if (status != DDI_SUCCESS) {
2037                 status = IBT_INSUFF_RESOURCE;
2038                 goto alloclkey_fail2;
2039         }
2040         mr = (hermon_mrhdl_t)rsrc->hr_addr;
2041         bzero(mr, sizeof (*mr));
2042         mr->mr_bindinfo.bi_type = HERMON_BINDHDL_LKEY;
2043 
2044         mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
2045 
2046         status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
2047         if (status != DDI_SUCCESS) {
2048                 status = IBT_INSUFF_RESOURCE;
2049                 goto alloclkey_fail3;
2050         }
2051         mr->mr_logmttpgsz = PAGESHIFT;
2052 
2053         /*
2054          * Allocate MTT reference count (to track shared memory regions).
2055          * This reference count resource may never be used on the given
2056          * memory region, but if it is ever later registered as "shared"
2057          * memory region then this resource will be necessary.  If we fail
2058          * here, we do pretty much the same as above to clean up.
2059          */
2060         status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
2061             &mtt_refcnt);
2062         if (status != DDI_SUCCESS) {
2063                 status = IBT_INSUFF_RESOURCE;
2064                 goto alloclkey_fail4;
2065         }
2066         mr->mr_mttrefcntp = mtt_refcnt;
2067         swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
2068         HERMON_MTT_REFCNT_INIT(swrc_tmp);
2069 
2070         mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
2071 
2072         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2073         mpt_entry.status = HERMON_MPT_FREE;
2074         mpt_entry.lw = 1;
2075         mpt_entry.lr = 1;
2076         mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2077         mpt_entry.entity_sz = mr->mr_logmttpgsz;
2078         mpt_entry.mem_key = mr->mr_lkey;
2079         mpt_entry.pd = pd->pd_pdnum;
2080         mpt_entry.fast_reg_en = 1;
2081         mpt_entry.rem_acc_en = 1;
2082         mpt_entry.en_inval = 1;
2083         if (flags & IBT_KEY_REMOTE) {
2084                 mpt_entry.ren_inval = 1;
2085         }
2086         mpt_entry.mtt_size = nummtt;
2087         mpt_entry.mtt_addr_h = mtt_addr >> 32;    /* only 8 more bits */
2088         mpt_entry.mtt_addr_l = mtt_addr >> 3;     /* only 29 bits */
2089 
2090         /*
2091          * Write the MPT entry to hardware.  Lastly, we pass ownership of
2092          * the entry to the hardware if needed.  Note: in general, this
2093          * operation shouldn't fail.  But if it does, we have to undo
2094          * everything we've done above before returning error.
2095          *
2096          * For Hermon, this routine (which is common to the contexts) will only
2097          * set the ownership if needed - the process of passing the context
2098          * itself to HW will take care of setting up the MPT (based on type
2099          * and index).
2100          */
2101         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2102             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
2103         if (status != HERMON_CMD_SUCCESS) {
2104                 cmn_err(CE_CONT, "Hermon: alloc_lkey: SW2HW_MPT command "
2105                     "failed: %08x\n", status);
2106                 if (status == HERMON_CMD_INVALID_STATUS) {
2107                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2108                 }
2109                 status = ibc_get_ci_failure(0);
2110                 goto alloclkey_fail5;
2111         }
2112 
2113         /*
2114          * Fill in the rest of the Hermon Memory Region handle.  Having
2115          * successfully transferred ownership of the MPT, we can update the
2116          * following fields for use in further operations on the MR.
2117          */
2118         mr->mr_accflag = IBT_MR_LOCAL_WRITE;
2119         mr->mr_mttaddr = mtt_addr;
2120         mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
2121         mr->mr_mptrsrcp = mpt;
2122         mr->mr_mttrsrcp = mtt;
2123         mr->mr_pdhdl = pd;
2124         mr->mr_rsrcp = rsrc;
2125         mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
2126         mr->mr_rkey = mr->mr_lkey;
2127         mr->mr_mpt_type = HERMON_MPT_DMPT;
2128 
2129         *mrhdl = mr;
2130         return (DDI_SUCCESS);
2131 
2132 alloclkey_fail5:
2133         hermon_rsrc_free(state, &mtt_refcnt);
2134 alloclkey_fail4:
2135         hermon_rsrc_free(state, &mtt);
2136 alloclkey_fail3:
2137         hermon_rsrc_free(state, &rsrc);
2138 alloclkey_fail2:
2139         hermon_rsrc_free(state, &mpt);
2140 alloclkey_fail1:
2141         hermon_pd_refcnt_dec(pd);
2142         return (status);
2143 }
2144 
2145 /*
2146  * hermon_mr_fexch_mpt_init()
2147  *    Context: Can be called from base context.
2148  *
2149  * This is the same as alloc_lkey, but not returning an mrhdl.
2150  */
2151 int
2152 hermon_mr_fexch_mpt_init(hermon_state_t *state, hermon_pdhdl_t pd,
2153     uint32_t mpt_indx, uint_t nummtt, uint64_t mtt_addr, uint_t sleep)
2154 {
2155         hermon_hw_dmpt_t        mpt_entry;
2156         int                     status;
2157 
2158         /*
2159          * The MTTs will get filled in when the FRWR is processed.
2160          */
2161 
2162         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2163         mpt_entry.status = HERMON_MPT_FREE;
2164         mpt_entry.lw = 1;
2165         mpt_entry.lr = 1;
2166         mpt_entry.rw = 1;
2167         mpt_entry.rr = 1;
2168         mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2169         mpt_entry.entity_sz = PAGESHIFT;
2170         mpt_entry.mem_key = mpt_indx;
2171         mpt_entry.pd = pd->pd_pdnum;
2172         mpt_entry.fast_reg_en = 1;
2173         mpt_entry.rem_acc_en = 1;
2174         mpt_entry.en_inval = 1;
2175         mpt_entry.ren_inval = 1;
2176         mpt_entry.mtt_size = nummtt;
2177         mpt_entry.mtt_addr_h = mtt_addr >> 32;    /* only 8 more bits */
2178         mpt_entry.mtt_addr_l = mtt_addr >> 3;     /* only 29 bits */
2179 
2180         /*
2181          * Write the MPT entry to hardware.  Lastly, we pass ownership of
2182          * the entry to the hardware if needed.  Note: in general, this
2183          * operation shouldn't fail.  But if it does, we have to undo
2184          * everything we've done above before returning error.
2185          *
2186          * For Hermon, this routine (which is common to the contexts) will only
2187          * set the ownership if needed - the process of passing the context
2188          * itself to HW will take care of setting up the MPT (based on type
2189          * and index).
2190          */
2191         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2192             sizeof (hermon_hw_dmpt_t), mpt_indx, sleep);
2193         if (status != HERMON_CMD_SUCCESS) {
2194                 cmn_err(CE_CONT, "Hermon: fexch_mpt_init: SW2HW_MPT command "
2195                     "failed: %08x\n", status);
2196                 if (status == HERMON_CMD_INVALID_STATUS) {
2197                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2198                 }
2199                 status = ibc_get_ci_failure(0);
2200                 return (status);
2201         }
2202         /* Increment the reference count on the protection domain (PD) */
2203         hermon_pd_refcnt_inc(pd);
2204 
2205         return (DDI_SUCCESS);
2206 }
2207 
2208 /*
2209  * hermon_mr_fexch_mpt_fini()
2210  *    Context: Can be called from base context.
2211  *
2212  * This is the same as deregister_mr, without an mrhdl.
2213  */
2214 int
2215 hermon_mr_fexch_mpt_fini(hermon_state_t *state, hermon_pdhdl_t pd,
2216     uint32_t mpt_indx, uint_t sleep)
2217 {
2218         int                     status;
2219 
2220         status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
2221             NULL, 0, mpt_indx, sleep);
2222         if (status != DDI_SUCCESS) {
2223                 cmn_err(CE_CONT, "Hermon: fexch_mpt_fini: HW2SW_MPT command "
2224                     "failed: %08x\n", status);
2225                 if (status == HERMON_CMD_INVALID_STATUS) {
2226                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2227                 }
2228                 status = ibc_get_ci_failure(0);
2229                 return (status);
2230         }
2231 
2232         /* Decrement the reference count on the protection domain (PD) */
2233         hermon_pd_refcnt_dec(pd);
2234 
2235         return (DDI_SUCCESS);
2236 }
2237 
2238 /*
2239  * hermon_mr_mtt_bind()
2240  *    Context: Can be called from interrupt or base context.
2241  */
2242 int
2243 hermon_mr_mtt_bind(hermon_state_t *state, hermon_bind_info_t *bind,
2244     ddi_dma_handle_t bind_dmahdl, hermon_rsrc_t **mtt, uint_t *mtt_pgsize_bits,
2245     uint_t is_buffer)
2246 {
2247         uint64_t                nummtt;
2248         uint_t                  sleep;
2249         int                     status;
2250 
2251         /*
2252          * Check the sleep flag.  Ensure that it is consistent with the
2253          * current thread context (i.e. if we are currently in the interrupt
2254          * context, then we shouldn't be attempting to sleep).
2255          */
2256         sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ?
2257             HERMON_NOSLEEP : HERMON_SLEEP;
2258         if ((sleep == HERMON_SLEEP) &&
2259             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2260                 status = IBT_INVALID_PARAM;
2261                 goto mrmttbind_fail;
2262         }
2263 
2264         /*
2265          * Bind the memory and determine the mapped addresses.  This is
2266          * the first of two routines that do all the "heavy lifting" for
2267          * the Hermon memory registration routines.  The hermon_mr_mem_bind()
2268          * routine takes the "bind" struct with all its fields filled
2269          * in and returns a list of DMA cookies (for the PCI mapped addresses
2270          * corresponding to the specified address region) which are used by
2271          * the hermon_mr_fast_mtt_write() routine below.  If we fail here, we
2272          * must undo all the previous resource allocation (and PD reference
2273          * count).
2274          */
2275         status = hermon_mr_mem_bind(state, bind, bind_dmahdl, sleep, is_buffer);
2276         if (status != DDI_SUCCESS) {
2277                 status = IBT_INSUFF_RESOURCE;
2278                 goto mrmttbind_fail;
2279         }
2280 
2281         /*
2282          * Determine number of pages spanned.  This routine uses the
2283          * information in the "bind" struct to determine the required
2284          * number of MTT entries needed (and returns the suggested page size -
2285          * as a "power-of-2" - for each MTT entry).
2286          */
2287         nummtt = hermon_mr_nummtt_needed(state, bind, mtt_pgsize_bits);
2288 
2289         /*
2290          * Allocate the MTT entries.  Use the calculations performed above to
2291          * allocate the required number of MTT entries. If we fail here, we
2292          * must not only undo all the previous resource allocation (and PD
2293          * reference count), but we must also unbind the memory.
2294          */
2295         status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, mtt);
2296         if (status != DDI_SUCCESS) {
2297                 status = IBT_INSUFF_RESOURCE;
2298                 goto mrmttbind_fail2;
2299         }
2300 
2301         /*
2302          * Write the mapped addresses into the MTT entries.  This is part two
2303          * of the "heavy lifting" routines that we talked about above.  Note:
2304          * we pass the suggested page size from the earlier operation here.
2305          * And if we fail here, we again do pretty much the same huge clean up.
2306          */
2307         status = hermon_mr_fast_mtt_write(state, *mtt, bind, *mtt_pgsize_bits);
2308         if (status != DDI_SUCCESS) {
2309                 /*
2310                  * hermon_mr_fast_mtt_write() returns DDI_FAILURE
2311                  * only if it detects a HW error during DMA.
2312                  */
2313                 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2314                 status = ibc_get_ci_failure(0);
2315                 goto mrmttbind_fail3;
2316         }
2317         return (DDI_SUCCESS);
2318 
2319 /*
2320  * The following is cleanup for all possible failure cases in this routine
2321  */
2322 mrmttbind_fail3:
2323         hermon_rsrc_free(state, mtt);
2324 mrmttbind_fail2:
2325         hermon_mr_mem_unbind(state, bind);
2326 mrmttbind_fail:
2327         return (status);
2328 }
2329 
2330 
2331 /*
2332  * hermon_mr_mtt_unbind()
2333  *    Context: Can be called from interrupt or base context.
2334  */
2335 int
2336 hermon_mr_mtt_unbind(hermon_state_t *state, hermon_bind_info_t *bind,
2337     hermon_rsrc_t *mtt)
2338 {
2339         /*
2340          * Free up the MTT entries and unbind the memory.  Here, as above, we
2341          * attempt to free these resources only if it is appropriate to do so.
2342          */
2343         hermon_mr_mem_unbind(state, bind);
2344         hermon_rsrc_free(state, &mtt);
2345 
2346         return (DDI_SUCCESS);
2347 }
2348 
2349 
2350 /*
2351  * hermon_mr_common_rereg()
2352  *    Context: Can be called from interrupt or base context.
2353  */
2354 static int
2355 hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
2356     hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
2357     hermon_mr_options_t *op)
2358 {
2359         hermon_rsrc_t           *mpt;
2360         ibt_mr_attr_flags_t     acc_flags_to_use;
2361         ibt_mr_flags_t          flags;
2362         hermon_pdhdl_t          pd_to_use;
2363         hermon_hw_dmpt_t        mpt_entry;
2364         uint64_t                mtt_addr_to_use, vaddr_to_use, len_to_use;
2365         uint_t                  sleep, dereg_level;
2366         int                     status;
2367 
2368         /*
2369          * Check here to see if the memory region corresponds to a userland
2370          * mapping.  Reregistration of userland memory regions is not
2371          * currently supported.  Return failure.
2372          */
2373         if (mr->mr_is_umem) {
2374                 status = IBT_MR_HDL_INVALID;
2375                 goto mrrereg_fail;
2376         }
2377 
2378         mutex_enter(&mr->mr_lock);
2379 
2380         /* Pull MPT resource pointer from the Hermon Memory Region handle */
2381         mpt = mr->mr_mptrsrcp;
2382 
2383         /* Extract the flags field from the hermon_bind_info_t */
2384         flags = bind->bi_flags;
2385 
2386         /*
2387          * Check the sleep flag.  Ensure that it is consistent with the
2388          * current thread context (i.e. if we are currently in the interrupt
2389          * context, then we shouldn't be attempting to sleep).
2390          */
2391         sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
2392         if ((sleep == HERMON_SLEEP) &&
2393             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2394                 mutex_exit(&mr->mr_lock);
2395                 status = IBT_INVALID_PARAM;
2396                 goto mrrereg_fail;
2397         }
2398 
2399         /*
2400          * First step is to temporarily invalidate the MPT entry.  This
2401          * regains ownership from the hardware, and gives us the opportunity
2402          * to modify the entry.  Note: The HW2SW_MPT command returns the
2403          * current MPT entry contents.  These are saved away here because
2404          * they will be reused in a later step below.  If the region has
2405          * bound memory windows that we fail returning an "in use" error code.
2406          * Otherwise, this is an unexpected error and we deregister the
2407          * memory region and return error.
2408          *
2409          * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
2410          * against holding the lock around this rereg call in all contexts.
2411          */
2412         status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry,
2413             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2414         if (status != HERMON_CMD_SUCCESS) {
2415                 mutex_exit(&mr->mr_lock);
2416                 if (status == HERMON_CMD_REG_BOUND) {
2417                         return (IBT_MR_IN_USE);
2418                 } else {
2419                         cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: "
2420                             "%08x\n", status);
2421                         if (status == HERMON_CMD_INVALID_STATUS) {
2422                                 hermon_fm_ereport(state, HCA_SYS_ERR,
2423                                     HCA_ERR_SRV_LOST);
2424                         }
2425                         /*
2426                          * Call deregister and ensure that all current
2427                          * resources get freed up
2428                          */
2429                         if (hermon_mr_deregister(state, &mr,
2430                             HERMON_MR_DEREG_ALL, sleep) != DDI_SUCCESS) {
2431                                 HERMON_WARNING(state, "failed to deregister "
2432                                     "memory region");
2433                         }
2434                         return (ibc_get_ci_failure(0));
2435                 }
2436         }
2437 
2438         /*
2439          * If we're changing the protection domain, then validate the new one
2440          */
2441         if (flags & IBT_MR_CHANGE_PD) {
2442 
2443                 /* Check for valid PD handle pointer */
2444                 if (pd == NULL) {
2445                         mutex_exit(&mr->mr_lock);
2446                         /*
2447                          * Call deregister and ensure that all current
2448                          * resources get properly freed up. Unnecessary
2449                          * here to attempt to regain software ownership
2450                          * of the MPT entry as that has already been
2451                          * done above.
2452                          */
2453                         if (hermon_mr_deregister(state, &mr,
2454                             HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2455                             DDI_SUCCESS) {
2456                                 HERMON_WARNING(state, "failed to deregister "
2457                                     "memory region");
2458                         }
2459                         status = IBT_PD_HDL_INVALID;
2460                         goto mrrereg_fail;
2461                 }
2462 
2463                 /* Use the new PD handle in all operations below */
2464                 pd_to_use = pd;
2465 
2466         } else {
2467                 /* Use the current PD handle in all operations below */
2468                 pd_to_use = mr->mr_pdhdl;
2469         }
2470 
2471         /*
2472          * If we're changing access permissions, then validate the new ones
2473          */
2474         if (flags & IBT_MR_CHANGE_ACCESS) {
2475                 /*
2476                  * Validate the access flags.  Both remote write and remote
2477                  * atomic require the local write flag to be set
2478                  */
2479                 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) ||
2480                     (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) &&
2481                     !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) {
2482                         mutex_exit(&mr->mr_lock);
2483                         /*
2484                          * Call deregister and ensure that all current
2485                          * resources get properly freed up. Unnecessary
2486                          * here to attempt to regain software ownership
2487                          * of the MPT entry as that has already been
2488                          * done above.
2489                          */
2490                         if (hermon_mr_deregister(state, &mr,
2491                             HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2492                             DDI_SUCCESS) {
2493                                 HERMON_WARNING(state, "failed to deregister "
2494                                     "memory region");
2495                         }
2496                         status = IBT_MR_ACCESS_REQ_INVALID;
2497                         goto mrrereg_fail;
2498                 }
2499 
2500                 /*
2501                  * Setup and validate the memory region access flags.  This
2502                  * means translating the IBTF's enable flags into the access
2503                  * flags that will be used in later operations.
2504                  */
2505                 acc_flags_to_use = 0;
2506                 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
2507                         acc_flags_to_use |= IBT_MR_WINDOW_BIND;
2508                 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
2509                         acc_flags_to_use |= IBT_MR_LOCAL_WRITE;
2510                 if (flags & IBT_MR_ENABLE_REMOTE_READ)
2511                         acc_flags_to_use |= IBT_MR_REMOTE_READ;
2512                 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
2513                         acc_flags_to_use |= IBT_MR_REMOTE_WRITE;
2514                 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
2515                         acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC;
2516 
2517         } else {
2518                 acc_flags_to_use = mr->mr_accflag;
2519         }
2520 
2521         /*
2522          * If we're modifying the translation, then figure out whether
2523          * we can reuse the current MTT resources.  This means calling
2524          * hermon_mr_rereg_xlat_helper() which does most of the heavy lifting
2525          * for the reregistration.  If the current memory region contains
2526          * sufficient MTT entries for the new regions, then it will be
2527          * reused and filled in.  Otherwise, new entries will be allocated,
2528          * the old ones will be freed, and the new entries will be filled
2529          * in.  Note:  If we're not modifying the translation, then we
2530          * should already have all the information we need to update the MPT.
2531          * Also note: If hermon_mr_rereg_xlat_helper() fails, it will return
2532          * a "dereg_level" which is the level of cleanup that needs to be
2533          * passed to hermon_mr_deregister() to finish the cleanup.
2534          */
2535         if (flags & IBT_MR_CHANGE_TRANSLATION) {
2536                 status = hermon_mr_rereg_xlat_helper(state, mr, bind, op,
2537                     &mtt_addr_to_use, sleep, &dereg_level);
2538                 if (status != DDI_SUCCESS) {
2539                         mutex_exit(&mr->mr_lock);
2540                         /*
2541                          * Call deregister and ensure that all resources get
2542                          * properly freed up.
2543                          */
2544                         if (hermon_mr_deregister(state, &mr, dereg_level,
2545                             sleep) != DDI_SUCCESS) {
2546                                 HERMON_WARNING(state, "failed to deregister "
2547                                     "memory region");
2548                         }
2549                         goto mrrereg_fail;
2550                 }
2551                 vaddr_to_use = mr->mr_bindinfo.bi_addr;
2552                 len_to_use   = mr->mr_bindinfo.bi_len;
2553         } else {
2554                 mtt_addr_to_use = mr->mr_mttaddr;
2555                 vaddr_to_use = mr->mr_bindinfo.bi_addr;
2556                 len_to_use   = mr->mr_bindinfo.bi_len;
2557         }
2558 
2559         /*
2560          * Calculate new keys (Lkey, Rkey) from MPT index.  Just like they were
2561          * when the region was first registered, each key is formed from
2562          * "constrained" bits and "unconstrained" bits.  Note:  If no remote
2563          * access is required, then the RKey value is not filled in.  Otherwise
2564          * both Rkey and LKey are given the same value.
2565          */
2566         mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
2567         if ((acc_flags_to_use & IBT_MR_REMOTE_READ) ||
2568             (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ||
2569             (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) {
2570                 mr->mr_rkey = mr->mr_lkey;
2571         } else
2572                 mr->mr_rkey = 0;
2573 
2574         /*
2575          * Fill in the MPT entry.  This is the final step before passing
2576          * ownership of the MPT entry to the Hermon hardware.  We use all of
2577          * the information collected/calculated above to fill in the
2578          * requisite portions of the MPT.
2579          */
2580         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2581 
2582         mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
2583         mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND)   ? 1 : 0;
2584         mpt_entry.atomic  = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
2585         mpt_entry.rw      = (acc_flags_to_use & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
2586         mpt_entry.rr      = (acc_flags_to_use & IBT_MR_REMOTE_READ)   ? 1 : 0;
2587         mpt_entry.lw      = (acc_flags_to_use & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
2588         mpt_entry.lr      = 1;
2589         mpt_entry.phys_addr = 0;
2590         mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2591 
2592         mpt_entry.entity_sz     = mr->mr_logmttpgsz;
2593         mpt_entry.mem_key       = mr->mr_lkey;
2594         mpt_entry.pd            = pd_to_use->pd_pdnum;
2595 
2596         mpt_entry.start_addr    = vaddr_to_use;
2597         mpt_entry.reg_win_len   = len_to_use;
2598         mpt_entry.mtt_addr_h = mtt_addr_to_use >> 32;
2599         mpt_entry.mtt_addr_l = mtt_addr_to_use >> 3;
2600 
2601         /*
2602          * Write the updated MPT entry to hardware
2603          *
2604          * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
2605          * against holding the lock around this rereg call in all contexts.
2606          */
2607         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2608             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2609         if (status != HERMON_CMD_SUCCESS) {
2610                 mutex_exit(&mr->mr_lock);
2611                 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
2612                     status);
2613                 if (status == HERMON_CMD_INVALID_STATUS) {
2614                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2615                 }
2616                 /*
2617                  * Call deregister and ensure that all current resources get
2618                  * properly freed up. Unnecessary here to attempt to regain
2619                  * software ownership of the MPT entry as that has already
2620                  * been done above.
2621                  */
2622                 if (hermon_mr_deregister(state, &mr,
2623                     HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) {
2624                         HERMON_WARNING(state, "failed to deregister memory "
2625                             "region");
2626                 }
2627                 return (ibc_get_ci_failure(0));
2628         }
2629 
2630         /*
2631          * If we're changing PD, then update their reference counts now.
2632          * This means decrementing the reference count on the old PD and
2633          * incrementing the reference count on the new PD.
2634          */
2635         if (flags & IBT_MR_CHANGE_PD) {
2636                 hermon_pd_refcnt_dec(mr->mr_pdhdl);
2637                 hermon_pd_refcnt_inc(pd);
2638         }
2639 
2640         /*
2641          * Update the contents of the Hermon Memory Region handle to reflect
2642          * what has been changed.
2643          */
2644         mr->mr_pdhdl   = pd_to_use;
2645         mr->mr_accflag         = acc_flags_to_use;
2646         mr->mr_is_umem         = 0;
2647         mr->mr_is_fmr          = 0;
2648         mr->mr_umemcookie = NULL;
2649         mr->mr_lkey    = hermon_mr_key_swap(mr->mr_lkey);
2650         mr->mr_rkey    = hermon_mr_key_swap(mr->mr_rkey);
2651 
2652         /* New MR handle is same as the old */
2653         *mrhdl_new = mr;
2654         mutex_exit(&mr->mr_lock);
2655 
2656         return (DDI_SUCCESS);
2657 
2658 mrrereg_fail:
2659         return (status);
2660 }
2661 
2662 
2663 /*
2664  * hermon_mr_rereg_xlat_helper
2665  *    Context: Can be called from interrupt or base context.
2666  *    Note: This routine expects the "mr_lock" to be held when it
2667  *    is called.  Upon returning failure, this routine passes information
2668  *    about what "dereg_level" should be passed to hermon_mr_deregister().
2669  */
2670 static int
2671 hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
2672     hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
2673     uint_t sleep, uint_t *dereg_level)
2674 {
2675         hermon_rsrc_t           *mtt, *mtt_refcnt;
2676         hermon_sw_refcnt_t      *swrc_old, *swrc_new;
2677         ddi_dma_handle_t        dmahdl;
2678         uint64_t                nummtt_needed, nummtt_in_currrsrc, max_sz;
2679         uint_t                  mtt_pgsize_bits, bind_type, reuse_dmahdl;
2680         int                     status;
2681 
2682         ASSERT(MUTEX_HELD(&mr->mr_lock));
2683 
2684         /*
2685          * Check the "options" flag.  Currently this flag tells the driver
2686          * whether or not the region should be bound normally (i.e. with
2687          * entries written into the PCI IOMMU) or whether it should be
2688          * registered to bypass the IOMMU.
2689          */
2690         if (op == NULL) {
2691                 bind_type = HERMON_BINDMEM_NORMAL;
2692         } else {
2693                 bind_type = op->mro_bind_type;
2694         }
2695 
2696         /*
2697          * Check for invalid length.  Check is the length is zero or if the
2698          * length is larger than the maximum configured value.  Return error
2699          * if it is.
2700          */
2701         max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
2702         if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
2703                 /*
2704                  * Deregister will be called upon returning failure from this
2705                  * routine. This will ensure that all current resources get
2706                  * properly freed up. Unnecessary to attempt to regain
2707                  * software ownership of the MPT entry as that has already
2708                  * been done above (in hermon_mr_reregister())
2709                  */
2710                 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT;
2711 
2712                 status = IBT_MR_LEN_INVALID;
2713                 goto mrrereghelp_fail;
2714         }
2715 
2716         /*
2717          * Determine the number of pages necessary for new region and the
2718          * number of pages supported by the current MTT resources
2719          */
2720         nummtt_needed = hermon_mr_nummtt_needed(state, bind, &mtt_pgsize_bits);
2721         nummtt_in_currrsrc = mr->mr_mttrsrcp->hr_len >> HERMON_MTT_SIZE_SHIFT;
2722 
2723         /*
2724          * Depending on whether we have enough pages or not, the next step is
2725          * to fill in a set of MTT entries that reflect the new mapping.  In
2726          * the first case below, we already have enough entries.  This means
2727          * we need to unbind the memory from the previous mapping, bind the
2728          * memory for the new mapping, write the new MTT entries, and update
2729          * the mr to reflect the changes.
2730          * In the second case below, we do not have enough entries in the
2731          * current mapping.  So, in this case, we need not only to unbind the
2732          * current mapping, but we need to free up the MTT resources associated
2733          * with that mapping.  After we've successfully done that, we continue
2734          * by binding the new memory, allocating new MTT entries, writing the
2735          * new MTT entries, and updating the mr to reflect the changes.
2736          */
2737 
2738         /*
2739          * If this region is being shared (i.e. MTT refcount != 1), then we
2740          * can't reuse the current MTT resources regardless of their size.
2741          * Instead we'll need to alloc new ones (below) just as if there
2742          * hadn't been enough room in the current entries.
2743          */
2744         swrc_old = (hermon_sw_refcnt_t *)mr->mr_mttrefcntp->hr_addr;
2745         if (HERMON_MTT_IS_NOT_SHARED(swrc_old) &&
2746             (nummtt_needed <= nummtt_in_currrsrc)) {
2747 
2748                 /*
2749                  * Unbind the old mapping for this memory region, but retain
2750                  * the ddi_dma_handle_t (if possible) for reuse in the bind
2751                  * operation below.  Note:  If original memory region was
2752                  * bound for IOMMU bypass and the new region can not use
2753                  * bypass, then a new DMA handle will be necessary.
2754                  */
2755                 if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2756                         mr->mr_bindinfo.bi_free_dmahdl = 0;
2757                         hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2758                         dmahdl = mr->mr_bindinfo.bi_dmahdl;
2759                         reuse_dmahdl = 1;
2760                 } else {
2761                         hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2762                         dmahdl = NULL;
2763                         reuse_dmahdl = 0;
2764                 }
2765 
2766                 /*
2767                  * Bind the new memory and determine the mapped addresses.
2768                  * As described, this routine and hermon_mr_fast_mtt_write()
2769                  * do the majority of the work for the memory registration
2770                  * operations.  Note:  When we successfully finish the binding,
2771                  * we will set the "bi_free_dmahdl" flag to indicate that
2772                  * even though we may have reused the ddi_dma_handle_t we do
2773                  * wish it to be freed up at some later time.  Note also that
2774                  * if we fail, we may need to cleanup the ddi_dma_handle_t.
2775                  */
2776                 bind->bi_bypass      = bind_type;
2777                 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2778                 if (status != DDI_SUCCESS) {
2779                         if (reuse_dmahdl) {
2780                                 ddi_dma_free_handle(&dmahdl);
2781                         }
2782 
2783                         /*
2784                          * Deregister will be called upon returning failure
2785                          * from this routine. This will ensure that all
2786                          * current resources get properly freed up.
2787                          * Unnecessary to attempt to regain software ownership
2788                          * of the MPT entry as that has already been done
2789                          * above (in hermon_mr_reregister()).  Also unnecessary
2790                          * to attempt to unbind the memory.
2791                          */
2792                         *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2793 
2794                         status = IBT_INSUFF_RESOURCE;
2795                         goto mrrereghelp_fail;
2796                 }
2797                 if (reuse_dmahdl) {
2798                         bind->bi_free_dmahdl = 1;
2799                 }
2800 
2801                 /*
2802                  * Using the new mapping, but reusing the current MTT
2803                  * resources, write the updated entries to MTT
2804                  */
2805                 mtt    = mr->mr_mttrsrcp;
2806                 status = hermon_mr_fast_mtt_write(state, mtt, bind,
2807                     mtt_pgsize_bits);
2808                 if (status != DDI_SUCCESS) {
2809                         /*
2810                          * Deregister will be called upon returning failure
2811                          * from this routine. This will ensure that all
2812                          * current resources get properly freed up.
2813                          * Unnecessary to attempt to regain software ownership
2814                          * of the MPT entry as that has already been done
2815                          * above (in hermon_mr_reregister()).  Also unnecessary
2816                          * to attempt to unbind the memory.
2817                          *
2818                          * But we do need to unbind the newly bound memory
2819                          * before returning.
2820                          */
2821                         hermon_mr_mem_unbind(state, bind);
2822                         *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2823 
2824                         /*
2825                          * hermon_mr_fast_mtt_write() returns DDI_FAILURE
2826                          * only if it detects a HW error during DMA.
2827                          */
2828                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2829                         status = ibc_get_ci_failure(0);
2830                         goto mrrereghelp_fail;
2831                 }
2832 
2833                 /* Put the updated information into the Mem Region handle */
2834                 mr->mr_bindinfo        = *bind;
2835                 mr->mr_logmttpgsz = mtt_pgsize_bits;
2836 
2837         } else {
2838                 /*
2839                  * Check if the memory region MTT is shared by any other MRs.
2840                  * Since the resource may be shared between multiple memory
2841                  * regions (as a result of a "RegisterSharedMR()" verb) it is
2842                  * important that we not unbind any resources prematurely.
2843                  */
2844                 if (!HERMON_MTT_IS_SHARED(swrc_old)) {
2845                         /*
2846                          * Unbind the old mapping for this memory region, but
2847                          * retain the ddi_dma_handle_t for reuse in the bind
2848                          * operation below. Note: This can only be done here
2849                          * because the region being reregistered is not
2850                          * currently shared.  Also if original memory region
2851                          * was bound for IOMMU bypass and the new region can
2852                          * not use bypass, then a new DMA handle will be
2853                          * necessary.
2854                          */
2855                         if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2856                                 mr->mr_bindinfo.bi_free_dmahdl = 0;
2857                                 hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2858                                 dmahdl = mr->mr_bindinfo.bi_dmahdl;
2859                                 reuse_dmahdl = 1;
2860                         } else {
2861                                 hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2862                                 dmahdl = NULL;
2863                                 reuse_dmahdl = 0;
2864                         }
2865                 } else {
2866                         dmahdl = NULL;
2867                         reuse_dmahdl = 0;
2868                 }
2869 
2870                 /*
2871                  * Bind the new memory and determine the mapped addresses.
2872                  * As described, this routine and hermon_mr_fast_mtt_write()
2873                  * do the majority of the work for the memory registration
2874                  * operations.  Note:  When we successfully finish the binding,
2875                  * we will set the "bi_free_dmahdl" flag to indicate that
2876                  * even though we may have reused the ddi_dma_handle_t we do
2877                  * wish it to be freed up at some later time.  Note also that
2878                  * if we fail, we may need to cleanup the ddi_dma_handle_t.
2879                  */
2880                 bind->bi_bypass      = bind_type;
2881                 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2882                 if (status != DDI_SUCCESS) {
2883                         if (reuse_dmahdl) {
2884                                 ddi_dma_free_handle(&dmahdl);
2885                         }
2886 
2887                         /*
2888                          * Deregister will be called upon returning failure
2889                          * from this routine. This will ensure that all
2890                          * current resources get properly freed up.
2891                          * Unnecessary to attempt to regain software ownership
2892                          * of the MPT entry as that has already been done
2893                          * above (in hermon_mr_reregister()).  Also unnecessary
2894                          * to attempt to unbind the memory.
2895                          */
2896                         *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2897 
2898                         status = IBT_INSUFF_RESOURCE;
2899                         goto mrrereghelp_fail;
2900                 }
2901                 if (reuse_dmahdl) {
2902                         bind->bi_free_dmahdl = 1;
2903                 }
2904 
2905                 /*
2906                  * Allocate the new MTT entries resource
2907                  */
2908                 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt_needed,
2909                     sleep, &mtt);
2910                 if (status != DDI_SUCCESS) {
2911                         /*
2912                          * Deregister will be called upon returning failure
2913                          * from this routine. This will ensure that all
2914                          * current resources get properly freed up.
2915                          * Unnecessary to attempt to regain software ownership
2916                          * of the MPT entry as that has already been done
2917                          * above (in hermon_mr_reregister()).  Also unnecessary
2918                          * to attempt to unbind the memory.
2919                          *
2920                          * But we do need to unbind the newly bound memory
2921                          * before returning.
2922                          */
2923                         hermon_mr_mem_unbind(state, bind);
2924                         *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2925 
2926                         status = IBT_INSUFF_RESOURCE;
2927                         goto mrrereghelp_fail;
2928                 }
2929 
2930                 /*
2931                  * Allocate MTT reference count (to track shared memory
2932                  * regions).  As mentioned elsewhere above, this reference
2933                  * count resource may never be used on the given memory region,
2934                  * but if it is ever later registered as a "shared" memory
2935                  * region then this resource will be necessary.  Note:  This
2936                  * is only necessary here if the existing memory region is
2937                  * already being shared (because otherwise we already have
2938                  * a useable reference count resource).
2939                  */
2940                 if (HERMON_MTT_IS_SHARED(swrc_old)) {
2941                         status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1,
2942                             sleep, &mtt_refcnt);
2943                         if (status != DDI_SUCCESS) {
2944                                 /*
2945                                  * Deregister will be called upon returning
2946                                  * failure from this routine. This will ensure
2947                                  * that all current resources get properly
2948                                  * freed up.  Unnecessary to attempt to regain
2949                                  * software ownership of the MPT entry as that
2950                                  * has already been done above (in
2951                                  * hermon_mr_reregister()).  Also unnecessary
2952                                  * to attempt to unbind the memory.
2953                                  *
2954                                  * But we need to unbind the newly bound
2955                                  * memory and free up the newly allocated MTT
2956                                  * entries before returning.
2957                                  */
2958                                 hermon_mr_mem_unbind(state, bind);
2959                                 hermon_rsrc_free(state, &mtt);
2960                                 *dereg_level =
2961                                     HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2962 
2963                                 status = IBT_INSUFF_RESOURCE;
2964                                 goto mrrereghelp_fail;
2965                         }
2966                         swrc_new = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
2967                         HERMON_MTT_REFCNT_INIT(swrc_new);
2968                 } else {
2969                         mtt_refcnt = mr->mr_mttrefcntp;
2970                 }
2971 
2972                 /*
2973                  * Using the new mapping and the new MTT resources, write the
2974                  * updated entries to MTT
2975                  */
2976                 status = hermon_mr_fast_mtt_write(state, mtt, bind,
2977                     mtt_pgsize_bits);
2978                 if (status != DDI_SUCCESS) {
2979                         /*
2980                          * Deregister will be called upon returning failure
2981                          * from this routine. This will ensure that all
2982                          * current resources get properly freed up.
2983                          * Unnecessary to attempt to regain software ownership
2984                          * of the MPT entry as that has already been done
2985                          * above (in hermon_mr_reregister()).  Also unnecessary
2986                          * to attempt to unbind the memory.
2987                          *
2988                          * But we need to unbind the newly bound memory,
2989                          * free up the newly allocated MTT entries, and
2990                          * (possibly) free the new MTT reference count
2991                          * resource before returning.
2992                          */
2993                         if (HERMON_MTT_IS_SHARED(swrc_old)) {
2994                                 hermon_rsrc_free(state, &mtt_refcnt);
2995                         }
2996                         hermon_mr_mem_unbind(state, bind);
2997                         hermon_rsrc_free(state, &mtt);
2998                         *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2999 
3000                         status = IBT_INSUFF_RESOURCE;
3001                         goto mrrereghelp_fail;
3002                 }
3003 
3004                 /*
3005                  * Check if the memory region MTT is shared by any other MRs.
3006                  * Since the resource may be shared between multiple memory
3007                  * regions (as a result of a "RegisterSharedMR()" verb) it is
3008                  * important that we not free up any resources prematurely.
3009                  */
3010                 if (HERMON_MTT_IS_SHARED(swrc_old)) {
3011                         /* Decrement MTT reference count for "old" region */
3012                         (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
3013                 } else {
3014                         /* Free up the old MTT entries resource */
3015                         hermon_rsrc_free(state, &mr->mr_mttrsrcp);
3016                 }
3017 
3018                 /* Put the updated information into the mrhdl */
3019                 mr->mr_bindinfo        = *bind;
3020                 mr->mr_logmttpgsz = mtt_pgsize_bits;
3021                 mr->mr_mttrsrcp   = mtt;
3022                 mr->mr_mttrefcntp = mtt_refcnt;
3023         }
3024 
3025         /*
3026          * Calculate and return the updated MTT address (in the DDR address
3027          * space).  This will be used by the caller (hermon_mr_reregister) in
3028          * the updated MPT entry
3029          */
3030         *mtt_addr = mtt->hr_indx << HERMON_MTT_SIZE_SHIFT;
3031 
3032         return (DDI_SUCCESS);
3033 
3034 mrrereghelp_fail:
3035         return (status);
3036 }
3037 
3038 
3039 /*
3040  * hermon_mr_nummtt_needed()
3041  *    Context: Can be called from interrupt or base context.
3042  */
3043 /* ARGSUSED */
3044 static uint64_t
3045 hermon_mr_nummtt_needed(hermon_state_t *state, hermon_bind_info_t *bind,
3046     uint_t *mtt_pgsize_bits)
3047 {
3048         uint64_t        pg_offset_mask;
3049         uint64_t        pg_offset, tmp_length;
3050 
3051         /*
3052          * For now we specify the page size as 8Kb (the default page size for
3053          * the sun4u architecture), or 4Kb for x86.  Figure out optimal page
3054          * size by examining the dmacookies
3055          */
3056         *mtt_pgsize_bits = PAGESHIFT;
3057 
3058         pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1;
3059         pg_offset = bind->bi_addr & pg_offset_mask;
3060         tmp_length = pg_offset + (bind->bi_len - 1);
3061         return ((tmp_length >> *mtt_pgsize_bits) + 1);
3062 }
3063 
3064 
3065 /*
3066  * hermon_mr_mem_bind()
3067  *    Context: Can be called from interrupt or base context.
3068  */
3069 static int
3070 hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
3071     ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer)
3072 {
3073         ddi_dma_attr_t  dma_attr;
3074         int             (*callback)(caddr_t);
3075         int             status;
3076 
3077         /* bi_type must be set to a meaningful value to get a bind handle */
3078         ASSERT(bind->bi_type == HERMON_BINDHDL_VADDR ||
3079             bind->bi_type == HERMON_BINDHDL_BUF ||
3080             bind->bi_type == HERMON_BINDHDL_UBUF);
3081 
3082         /* Set the callback flag appropriately */
3083         callback = (sleep == HERMON_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
3084 
3085         /*
3086          * Initialize many of the default DMA attributes.  Then, if we're
3087          * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
3088          */
3089         if (dmahdl == NULL) {
3090                 hermon_dma_attr_init(state, &dma_attr);
3091 #ifdef  __sparc
3092                 if (bind->bi_bypass == HERMON_BINDMEM_BYPASS) {
3093                         dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
3094                 }
3095 #endif
3096 
3097                 /* set RO if needed - tunable set and 'is_buffer' is non-0 */
3098                 if (is_buffer) {
3099                         if (! (bind->bi_flags & IBT_MR_DISABLE_RO)) {
3100                                 if ((bind->bi_type != HERMON_BINDHDL_UBUF) &&
3101                                     (hermon_kernel_data_ro ==
3102                                     HERMON_RO_ENABLED)) {
3103                                         dma_attr.dma_attr_flags |=
3104                                             DDI_DMA_RELAXED_ORDERING;
3105                                 }
3106                                 if (((bind->bi_type == HERMON_BINDHDL_UBUF) &&
3107                                     (hermon_user_data_ro ==
3108                                     HERMON_RO_ENABLED))) {
3109                                         dma_attr.dma_attr_flags |=
3110                                             DDI_DMA_RELAXED_ORDERING;
3111                                 }
3112                         }
3113                 }
3114 
3115                 /* Allocate a DMA handle for the binding */
3116                 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
3117                     callback, NULL, &bind->bi_dmahdl);
3118                 if (status != DDI_SUCCESS) {
3119                         return (status);
3120                 }
3121                 bind->bi_free_dmahdl = 1;
3122 
3123         } else  {
3124                 bind->bi_dmahdl = dmahdl;
3125                 bind->bi_free_dmahdl = 0;
3126         }
3127 
3128 
3129         /*
3130          * Bind the memory to get the PCI mapped addresses.  The decision
3131          * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle()
3132          * is determined by the "bi_type" flag.  Note: if the bind operation
3133          * fails then we have to free up the DMA handle and return error.
3134          */
3135         if (bind->bi_type == HERMON_BINDHDL_VADDR) {
3136                 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL,
3137                     (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len,
3138                     (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, NULL,
3139                     &bind->bi_dmacookie, &bind->bi_cookiecnt);
3140 
3141         } else {  /* HERMON_BINDHDL_BUF or HERMON_BINDHDL_UBUF */
3142 
3143                 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl,
3144                     bind->bi_buf, (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback,
3145                     NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt);
3146         }
3147         if (status != DDI_DMA_MAPPED) {
3148                 if (bind->bi_free_dmahdl != 0) {
3149                         ddi_dma_free_handle(&bind->bi_dmahdl);
3150                 }
3151                 return (status);
3152         }
3153 
3154         return (DDI_SUCCESS);
3155 }
3156 
3157 
3158 /*
3159  * hermon_mr_mem_unbind()
3160  *    Context: Can be called from interrupt or base context.
3161  */
3162 static void
3163 hermon_mr_mem_unbind(hermon_state_t *state, hermon_bind_info_t *bind)
3164 {
3165         int     status;
3166 
3167         /* there is nothing to unbind for alloc_lkey */
3168         if (bind->bi_type == HERMON_BINDHDL_LKEY)
3169                 return;
3170 
3171         /*
3172          * In case of HERMON_BINDHDL_UBUF, the memory bi_buf points to
3173          * is actually allocated by ddi_umem_iosetup() internally, then
3174          * it's required to free it here. Reset bi_type to HERMON_BINDHDL_NONE
3175          * not to free it again later.
3176          */
3177         if (bind->bi_type == HERMON_BINDHDL_UBUF) {
3178                 freerbuf(bind->bi_buf);
3179                 bind->bi_type = HERMON_BINDHDL_NONE;
3180         }
3181 
3182         /*
3183          * Unbind the DMA memory for the region
3184          *
3185          * Note: The only way ddi_dma_unbind_handle() currently
3186          * can return an error is if the handle passed in is invalid.
3187          * Since this should never happen, we choose to return void
3188          * from this function!  If this does return an error, however,
3189          * then we print a warning message to the console.
3190          */
3191         status = ddi_dma_unbind_handle(bind->bi_dmahdl);
3192         if (status != DDI_SUCCESS) {
3193                 HERMON_WARNING(state, "failed to unbind DMA mapping");
3194                 return;
3195         }
3196 
3197         /* Free up the DMA handle */
3198         if (bind->bi_free_dmahdl != 0) {
3199                 ddi_dma_free_handle(&bind->bi_dmahdl);
3200         }
3201 }
3202 
3203 
3204 /*
3205  * hermon_mr_fast_mtt_write()
3206  *    Context: Can be called from interrupt or base context.
3207  */
3208 static int
3209 hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
3210     hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits)
3211 {
3212         hermon_icm_table_t      *icm_table;
3213         hermon_dma_info_t       *dma_info;
3214         uint32_t                index1, index2, rindx;
3215         ddi_dma_cookie_t        dmacookie;
3216         uint_t                  cookie_cnt;
3217         uint64_t                *mtt_table;
3218         uint64_t                mtt_entry;
3219         uint64_t                addr, endaddr;
3220         uint64_t                pagesize;
3221         offset_t                i, start;
3222         uint_t                  per_span;
3223         int                     sync_needed;
3224 
3225         /*
3226          * XXX According to the PRM, we are to use the WRITE_MTT
3227          * command to write out MTTs. Tavor does not do this,
3228          * instead taking advantage of direct access to the MTTs,
3229          * and knowledge that Mellanox FMR relies on our ability
3230          * to write directly to the MTTs without any further
3231          * notification to the firmware. Likewise, we will choose
3232          * to not use the WRITE_MTT command, but to simply write
3233          * out the MTTs.
3234          */
3235 
3236         /* Calculate page size from the suggested value passed in */
3237         pagesize = ((uint64_t)1 << mtt_pgsize_bits);
3238 
3239         /* Walk the "cookie list" and fill in the MTT table entries */
3240         dmacookie  = bind->bi_dmacookie;
3241         cookie_cnt = bind->bi_cookiecnt;
3242 
3243         icm_table = &state->hs_icm[HERMON_MTT];
3244         rindx = mtt->hr_indx;
3245         hermon_index(index1, index2, rindx, icm_table, i);
3246         start = i;
3247 
3248         per_span   = icm_table->span;
3249         dma_info   = icm_table->icm_dma[index1] + index2;
3250         mtt_table  = (uint64_t *)(uintptr_t)dma_info->vaddr;
3251 
3252         sync_needed = 0;
3253         while (cookie_cnt-- > 0) {
3254                 addr    = dmacookie.dmac_laddress;
3255                 endaddr = addr + (dmacookie.dmac_size - 1);
3256                 addr    = addr & ~((uint64_t)pagesize - 1);
3257 
3258                 while (addr <= endaddr) {
3259 
3260                         /*
3261                          * Fill in the mapped addresses (calculated above) and
3262                          * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry.
3263                          */
3264                         mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT;
3265                         mtt_table[i] = htonll(mtt_entry);
3266                         i++;
3267                         rindx++;
3268 
3269                         if (i == per_span) {
3270 
3271                                 (void) ddi_dma_sync(dma_info->dma_hdl,
3272                                     start * sizeof (hermon_hw_mtt_t),
3273                                     (i - start) * sizeof (hermon_hw_mtt_t),
3274                                     DDI_DMA_SYNC_FORDEV);
3275 
3276                                 if ((addr + pagesize > endaddr) &&
3277                                     (cookie_cnt == 0))
3278                                         return (DDI_SUCCESS);
3279 
3280                                 hermon_index(index1, index2, rindx, icm_table,
3281                                     i);
3282                                 start = i * sizeof (hermon_hw_mtt_t);
3283                                 dma_info = icm_table->icm_dma[index1] + index2;
3284                                 mtt_table =
3285                                     (uint64_t *)(uintptr_t)dma_info->vaddr;
3286 
3287                                 sync_needed = 0;
3288                         } else {
3289                                 sync_needed = 1;
3290                         }
3291 
3292                         addr += pagesize;
3293                         if (addr == 0) {
3294                                 static int do_once = 1;
3295                                 if (do_once) {
3296                                         do_once = 0;
3297                                         cmn_err(CE_NOTE, "probable error in "
3298                                             "dma_cookie address from caller\n");
3299                                 }
3300                                 break;
3301                         }
3302                 }
3303 
3304                 /*
3305                  * When we've reached the end of the current DMA cookie,
3306                  * jump to the next cookie (if there are more)
3307                  */
3308                 if (cookie_cnt != 0) {
3309                         ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
3310                 }
3311         }
3312 
3313         /* done all the cookies, now sync the memory for the device */
3314         if (sync_needed)
3315                 (void) ddi_dma_sync(dma_info->dma_hdl,
3316                     start * sizeof (hermon_hw_mtt_t),
3317                     (i - start) * sizeof (hermon_hw_mtt_t),
3318                     DDI_DMA_SYNC_FORDEV);
3319 
3320         return (DDI_SUCCESS);
3321 }
3322 
3323 /*
3324  * hermon_mr_fast_mtt_write_fmr()
3325  *    Context: Can be called from interrupt or base context.
3326  */
3327 /* ARGSUSED */
3328 static int
3329 hermon_mr_fast_mtt_write_fmr(hermon_state_t *state, hermon_rsrc_t *mtt,
3330     ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits)
3331 {
3332         hermon_icm_table_t      *icm_table;
3333         hermon_dma_info_t       *dma_info;
3334         uint32_t                index1, index2, rindx;
3335         uint64_t                *mtt_table;
3336         offset_t                i, j;
3337         uint_t                  per_span;
3338 
3339         icm_table = &state->hs_icm[HERMON_MTT];
3340         rindx = mtt->hr_indx;
3341         hermon_index(index1, index2, rindx, icm_table, i);
3342         per_span   = icm_table->span;
3343         dma_info   = icm_table->icm_dma[index1] + index2;
3344         mtt_table  = (uint64_t *)(uintptr_t)dma_info->vaddr;
3345 
3346         /*
3347          * Fill in the MTT table entries
3348          */
3349         for (j = 0; j < mem_pattr->pmr_num_buf; j++) {
3350                 mtt_table[i] = mem_pattr->pmr_addr_list[j].p_laddr;
3351                 i++;
3352                 rindx++;
3353                 if (i == per_span) {
3354                         hermon_index(index1, index2, rindx, icm_table, i);
3355                         dma_info = icm_table->icm_dma[index1] + index2;
3356                         mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr;
3357                 }
3358         }
3359 
3360         return (DDI_SUCCESS);
3361 }
3362 
3363 
3364 /*
3365  * hermon_mtt_refcnt_inc()
3366  *    Context: Can be called from interrupt or base context.
3367  */
3368 static uint_t
3369 hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc)
3370 {
3371         hermon_sw_refcnt_t *rc;
3372 
3373         rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
3374         return (atomic_inc_uint_nv(&rc->swrc_refcnt));
3375 }
3376 
3377 
3378 /*
3379  * hermon_mtt_refcnt_dec()
3380  *    Context: Can be called from interrupt or base context.
3381  */
3382 static uint_t
3383 hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc)
3384 {
3385         hermon_sw_refcnt_t *rc;
3386 
3387         rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
3388         return (atomic_dec_uint_nv(&rc->swrc_refcnt));
3389 }