1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * hermon_mr.c
  28  *    Hermon Memory Region/Window Routines
  29  *
  30  *    Implements all the routines necessary to provide the requisite memory
  31  *    registration verbs.  These include operations like RegisterMemRegion(),
  32  *    DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion,
  33  *    etc., that affect Memory Regions.  It also includes the verbs that
  34  *    affect Memory Windows, including AllocMemWindow(), FreeMemWindow(),
  35  *    and QueryMemWindow().
  36  */
  37 
  38 #include <sys/types.h>
  39 #include <sys/conf.h>
  40 #include <sys/ddi.h>
  41 #include <sys/sunddi.h>
  42 #include <sys/modctl.h>
  43 #include <sys/esunddi.h>
  44 
  45 #include <sys/ib/adapters/hermon/hermon.h>
  46 
  47 extern uint32_t hermon_kernel_data_ro;
  48 extern uint32_t hermon_user_data_ro;
  49 extern int hermon_rdma_debug;
  50 
  51 /*
  52  * Used by hermon_mr_keycalc() below to fill in the "unconstrained" portion
  53  * of Hermon memory keys (LKeys and RKeys)
  54  */
  55 static  uint_t hermon_memkey_cnt = 0x00;
  56 #define HERMON_MEMKEY_SHIFT     24
  57 
  58 /* initial state of an MPT */
  59 #define HERMON_MPT_SW_OWNERSHIP 0xF     /* memory regions */
  60 #define HERMON_MPT_FREE         0x3     /* allocate lkey */
  61 
  62 static int hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
  63     hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
  64     hermon_mpt_rsrc_type_t mpt_type);
  65 static int hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
  66     hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
  67     hermon_mr_options_t *op);
  68 static int hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
  69     hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
  70     uint_t sleep, uint_t *dereg_level);
  71 static uint64_t hermon_mr_nummtt_needed(hermon_state_t *state,
  72     hermon_bind_info_t *bind, uint_t *mtt_pgsize);
  73 static int hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
  74     ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer);
  75 static void hermon_mr_mem_unbind(hermon_state_t *state,
  76     hermon_bind_info_t *bind);
  77 static int hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
  78     hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits);
  79 static int hermon_mr_fast_mtt_write_fmr(hermon_state_t *state,
  80     hermon_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits);
  81 static uint_t hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc);
  82 static uint_t hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc);
  83 
  84 
  85 /*
  86  * The Hermon umem_lockmemory() callback ops.  When userland memory is
  87  * registered, these callback ops are specified.  The hermon_umap_umemlock_cb()
  88  * callback will be called whenever the memory for the corresponding
  89  * ddi_umem_cookie_t is being freed.
  90  */
  91 static struct umem_callback_ops hermon_umem_cbops = {
  92         UMEM_CALLBACK_VERSION,
  93         hermon_umap_umemlock_cb,
  94 };
  95 
  96 
  97 
  98 /*
  99  * hermon_mr_register()
 100  *    Context: Can be called from interrupt or base context.
 101  */
 102 int
 103 hermon_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
 104     ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
 105     hermon_mpt_rsrc_type_t mpt_type)
 106 {
 107         hermon_bind_info_t      bind;
 108         int                     status;
 109 
 110         /*
 111          * Fill in the "bind" struct.  This struct provides the majority
 112          * of the information that will be used to distinguish between an
 113          * "addr" binding (as is the case here) and a "buf" binding (see
 114          * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
 115          * which does most of the "heavy lifting" for the Hermon memory
 116          * registration routines.
 117          */
 118         bind.bi_type  = HERMON_BINDHDL_VADDR;
 119         bind.bi_addr  = mr_attr->mr_vaddr;
 120         bind.bi_len   = mr_attr->mr_len;
 121         bind.bi_as    = mr_attr->mr_as;
 122         bind.bi_flags = mr_attr->mr_flags;
 123         status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op,
 124             mpt_type);
 125         return (status);
 126 }
 127 
 128 
 129 /*
 130  * hermon_mr_register_buf()
 131  *    Context: Can be called from interrupt or base context.
 132  */
 133 int
 134 hermon_mr_register_buf(hermon_state_t *state, hermon_pdhdl_t pd,
 135     ibt_smr_attr_t *mr_attr, struct buf *buf, hermon_mrhdl_t *mrhdl,
 136     hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type)
 137 {
 138         hermon_bind_info_t      bind;
 139         int                     status;
 140 
 141         /*
 142          * Fill in the "bind" struct.  This struct provides the majority
 143          * of the information that will be used to distinguish between an
 144          * "addr" binding (see above) and a "buf" binding (as is the case
 145          * here).  The "bind" struct is later passed to hermon_mr_mem_bind()
 146          * which does most of the "heavy lifting" for the Hermon memory
 147          * registration routines.  Note: We have chosen to provide
 148          * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
 149          * not set).  It is not critical what value we choose here as it need
 150          * only be unique for the given RKey (which will happen by default),
 151          * so the choice here is somewhat arbitrary.
 152          */
 153         bind.bi_type  = HERMON_BINDHDL_BUF;
 154         bind.bi_buf   = buf;
 155         if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
 156                 bind.bi_addr  = mr_attr->mr_vaddr;
 157         } else {
 158                 bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
 159         }
 160         bind.bi_as    = NULL;
 161         bind.bi_len   = (uint64_t)buf->b_bcount;
 162         bind.bi_flags = mr_attr->mr_flags;
 163         status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, mpt_type);
 164         return (status);
 165 }
 166 
 167 
 168 /*
 169  * hermon_mr_register_shared()
 170  *    Context: Can be called from interrupt or base context.
 171  */
 172 int
 173 hermon_mr_register_shared(hermon_state_t *state, hermon_mrhdl_t mrhdl,
 174     hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new)
 175 {
 176         hermon_rsrc_t           *mpt, *mtt, *rsrc;
 177         hermon_umap_db_entry_t  *umapdb;
 178         hermon_hw_dmpt_t        mpt_entry;
 179         hermon_mrhdl_t          mr;
 180         hermon_bind_info_t      *bind;
 181         ddi_umem_cookie_t       umem_cookie;
 182         size_t                  umem_len;
 183         caddr_t                 umem_addr;
 184         uint64_t                mtt_addr, pgsize_msk;
 185         uint_t                  sleep, mr_is_umem;
 186         int                     status, umem_flags;
 187 
 188         /*
 189          * Check the sleep flag.  Ensure that it is consistent with the
 190          * current thread context (i.e. if we are currently in the interrupt
 191          * context, then we shouldn't be attempting to sleep).
 192          */
 193         sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP :
 194             HERMON_SLEEP;
 195         if ((sleep == HERMON_SLEEP) &&
 196             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
 197                 status = IBT_INVALID_PARAM;
 198                 goto mrshared_fail;
 199         }
 200 
 201         /* Increment the reference count on the protection domain (PD) */
 202         hermon_pd_refcnt_inc(pd);
 203 
 204         /*
 205          * Allocate an MPT entry.  This will be filled in with all the
 206          * necessary parameters to define the shared memory region.
 207          * Specifically, it will be made to reference the currently existing
 208          * MTT entries and ownership of the MPT will be passed to the hardware
 209          * in the last step below.  If we fail here, we must undo the
 210          * protection domain reference count.
 211          */
 212         status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
 213         if (status != DDI_SUCCESS) {
 214                 status = IBT_INSUFF_RESOURCE;
 215                 goto mrshared_fail1;
 216         }
 217 
 218         /*
 219          * Allocate the software structure for tracking the shared memory
 220          * region (i.e. the Hermon Memory Region handle).  If we fail here, we
 221          * must undo the protection domain reference count and the previous
 222          * resource allocation.
 223          */
 224         status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
 225         if (status != DDI_SUCCESS) {
 226                 status = IBT_INSUFF_RESOURCE;
 227                 goto mrshared_fail2;
 228         }
 229         mr = (hermon_mrhdl_t)rsrc->hr_addr;
 230         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
 231 
 232         /*
 233          * Setup and validate the memory region access flags.  This means
 234          * translating the IBTF's enable flags into the access flags that
 235          * will be used in later operations.
 236          */
 237         mr->mr_accflag = 0;
 238         if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
 239                 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
 240         if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
 241                 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
 242         if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
 243                 mr->mr_accflag |= IBT_MR_REMOTE_READ;
 244         if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
 245                 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
 246         if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
 247                 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
 248 
 249         /*
 250          * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
 251          * from a certain number of "constrained" bits (the least significant
 252          * bits) and some number of "unconstrained" bits.  The constrained
 253          * bits must be set to the index of the entry in the MPT table, but
 254          * the unconstrained bits can be set to any value we wish.  Note:
 255          * if no remote access is required, then the RKey value is not filled
 256          * in.  Otherwise both Rkey and LKey are given the same value.
 257          */
 258         mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
 259 
 260         /* Grab the MR lock for the current memory region */
 261         mutex_enter(&mrhdl->mr_lock);
 262 
 263         /*
 264          * Check here to see if the memory region has already been partially
 265          * deregistered as a result of a hermon_umap_umemlock_cb() callback.
 266          * If so, this is an error, return failure.
 267          */
 268         if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
 269                 mutex_exit(&mrhdl->mr_lock);
 270                 status = IBT_MR_HDL_INVALID;
 271                 goto mrshared_fail3;
 272         }
 273 
 274         /*
 275          * Determine if the original memory was from userland and, if so, pin
 276          * the pages (again) with umem_lockmemory().  This will guarantee a
 277          * separate callback for each of this shared region's MR handles.
 278          * If this is userland memory, then allocate an entry in the
 279          * "userland resources database".  This will later be added to
 280          * the database (after all further memory registration operations are
 281          * successful).  If we fail here, we must undo all the above setup.
 282          */
 283         mr_is_umem = mrhdl->mr_is_umem;
 284         if (mr_is_umem) {
 285                 umem_len   = ptob(btopr(mrhdl->mr_bindinfo.bi_len));
 286                 umem_addr  = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr &
 287                     ~PAGEOFFSET);
 288                 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
 289                     DDI_UMEMLOCK_LONGTERM);
 290                 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
 291                     &umem_cookie, &hermon_umem_cbops, NULL);
 292                 if (status != 0) {
 293                         mutex_exit(&mrhdl->mr_lock);
 294                         status = IBT_INSUFF_RESOURCE;
 295                         goto mrshared_fail3;
 296                 }
 297 
 298                 umapdb = hermon_umap_db_alloc(state->hs_instance,
 299                     (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
 300                     (uint64_t)(uintptr_t)rsrc);
 301                 if (umapdb == NULL) {
 302                         mutex_exit(&mrhdl->mr_lock);
 303                         status = IBT_INSUFF_RESOURCE;
 304                         goto mrshared_fail4;
 305                 }
 306         }
 307 
 308         /*
 309          * Copy the MTT resource pointer (and additional parameters) from
 310          * the original Hermon Memory Region handle.  Note: this is normally
 311          * where the hermon_mr_mem_bind() routine would be called, but because
 312          * we already have bound and filled-in MTT entries it is simply a
 313          * matter here of managing the MTT reference count and grabbing the
 314          * address of the MTT table entries (for filling in the shared region's
 315          * MPT entry).
 316          */
 317         mr->mr_mttrsrcp        = mrhdl->mr_mttrsrcp;
 318         mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
 319         mr->mr_bindinfo        = mrhdl->mr_bindinfo;
 320         mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
 321         mutex_exit(&mrhdl->mr_lock);
 322         bind = &mr->mr_bindinfo;
 323         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
 324         mtt = mr->mr_mttrsrcp;
 325 
 326         /*
 327          * Increment the MTT reference count (to reflect the fact that
 328          * the MTT is now shared)
 329          */
 330         (void) hermon_mtt_refcnt_inc(mr->mr_mttrefcntp);
 331 
 332         /*
 333          * Update the new "bind" virtual address.  Do some extra work here
 334          * to ensure proper alignment.  That is, make sure that the page
 335          * offset for the beginning of the old range is the same as the
 336          * offset for this new mapping
 337          */
 338         pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
 339         bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
 340             (mr->mr_bindinfo.bi_addr & pgsize_msk));
 341 
 342         /*
 343          * Fill in the MPT entry.  This is the final step before passing
 344          * ownership of the MPT entry to the Hermon hardware.  We use all of
 345          * the information collected/calculated above to fill in the
 346          * requisite portions of the MPT.
 347          */
 348         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
 349         mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
 350         mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
 351         mpt_entry.rw      = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
 352         mpt_entry.rr      = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
 353         mpt_entry.lw      = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
 354         mpt_entry.lr      = 1;
 355         mpt_entry.reg_win = HERMON_MPT_IS_REGION;
 356         mpt_entry.entity_sz     = mr->mr_logmttpgsz;
 357         mpt_entry.mem_key       = mr->mr_lkey;
 358         mpt_entry.pd            = pd->pd_pdnum;
 359         mpt_entry.start_addr    = bind->bi_addr;
 360         mpt_entry.reg_win_len   = bind->bi_len;
 361         mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
 362         mpt_entry.mtt_addr_h = mtt_addr >> 32;
 363         mpt_entry.mtt_addr_l = mtt_addr >> 3;
 364 
 365         /*
 366          * Write the MPT entry to hardware.  Lastly, we pass ownership of
 367          * the entry to the hardware.  Note: in general, this operation
 368          * shouldn't fail.  But if it does, we have to undo everything we've
 369          * done above before returning error.
 370          */
 371         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
 372             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
 373         if (status != HERMON_CMD_SUCCESS) {
 374                 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
 375                     status);
 376                 if (status == HERMON_CMD_INVALID_STATUS) {
 377                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 378                 }
 379                 status = ibc_get_ci_failure(0);
 380                 goto mrshared_fail5;
 381         }
 382 
 383         /*
 384          * Fill in the rest of the Hermon Memory Region handle.  Having
 385          * successfully transferred ownership of the MPT, we can update the
 386          * following fields for use in further operations on the MR.
 387          */
 388         mr->mr_mptrsrcp        = mpt;
 389         mr->mr_mttrsrcp        = mtt;
 390         mr->mr_mpt_type        = HERMON_MPT_DMPT;
 391         mr->mr_pdhdl   = pd;
 392         mr->mr_rsrcp   = rsrc;
 393         mr->mr_is_umem         = mr_is_umem;
 394         mr->mr_is_fmr          = 0;
 395         mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
 396         mr->mr_umem_cbfunc = NULL;
 397         mr->mr_umem_cbarg1 = NULL;
 398         mr->mr_umem_cbarg2 = NULL;
 399         mr->mr_lkey     = hermon_mr_key_swap(mr->mr_lkey);
 400         mr->mr_rkey     = hermon_mr_key_swap(mr->mr_rkey);
 401 
 402         /*
 403          * If this is userland memory, then we need to insert the previously
 404          * allocated entry into the "userland resources database".  This will
 405          * allow for later coordination between the hermon_umap_umemlock_cb()
 406          * callback and hermon_mr_deregister().
 407          */
 408         if (mr_is_umem) {
 409                 hermon_umap_db_add(umapdb);
 410         }
 411 
 412         *mrhdl_new = mr;
 413 
 414         return (DDI_SUCCESS);
 415 
 416 /*
 417  * The following is cleanup for all possible failure cases in this routine
 418  */
 419 mrshared_fail5:
 420         (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
 421         if (mr_is_umem) {
 422                 hermon_umap_db_free(umapdb);
 423         }
 424 mrshared_fail4:
 425         if (mr_is_umem) {
 426                 ddi_umem_unlock(umem_cookie);
 427         }
 428 mrshared_fail3:
 429         hermon_rsrc_free(state, &rsrc);
 430 mrshared_fail2:
 431         hermon_rsrc_free(state, &mpt);
 432 mrshared_fail1:
 433         hermon_pd_refcnt_dec(pd);
 434 mrshared_fail:
 435         return (status);
 436 }
 437 
 438 /*
 439  * hermon_mr_alloc_fmr()
 440  *    Context: Can be called from interrupt or base context.
 441  */
 442 int
 443 hermon_mr_alloc_fmr(hermon_state_t *state, hermon_pdhdl_t pd,
 444     hermon_fmrhdl_t fmr_pool, hermon_mrhdl_t *mrhdl)
 445 {
 446         hermon_rsrc_t           *mpt, *mtt, *rsrc;
 447         hermon_hw_dmpt_t        mpt_entry;
 448         hermon_mrhdl_t          mr;
 449         hermon_bind_info_t      bind;
 450         uint64_t                mtt_addr;
 451         uint64_t                nummtt;
 452         uint_t                  sleep, mtt_pgsize_bits;
 453         int                     status;
 454         offset_t                i;
 455         hermon_icm_table_t      *icm_table;
 456         hermon_dma_info_t       *dma_info;
 457         uint32_t                index1, index2, rindx;
 458 
 459         /*
 460          * Check the sleep flag.  Ensure that it is consistent with the
 461          * current thread context (i.e. if we are currently in the interrupt
 462          * context, then we shouldn't be attempting to sleep).
 463          */
 464         sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP :
 465             HERMON_NOSLEEP;
 466         if ((sleep == HERMON_SLEEP) &&
 467             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
 468                 return (IBT_INVALID_PARAM);
 469         }
 470 
 471         /* Increment the reference count on the protection domain (PD) */
 472         hermon_pd_refcnt_inc(pd);
 473 
 474         /*
 475          * Allocate an MPT entry.  This will be filled in with all the
 476          * necessary parameters to define the FMR.  Specifically, it will be
 477          * made to reference the currently existing MTT entries and ownership
 478          * of the MPT will be passed to the hardware in the last step below.
 479          * If we fail here, we must undo the protection domain reference count.
 480          */
 481 
 482         status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
 483         if (status != DDI_SUCCESS) {
 484                 status = IBT_INSUFF_RESOURCE;
 485                 goto fmralloc_fail1;
 486         }
 487 
 488         /*
 489          * Allocate the software structure for tracking the fmr memory
 490          * region (i.e. the Hermon Memory Region handle).  If we fail here, we
 491          * must undo the protection domain reference count and the previous
 492          * resource allocation.
 493          */
 494         status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
 495         if (status != DDI_SUCCESS) {
 496                 status = IBT_INSUFF_RESOURCE;
 497                 goto fmralloc_fail2;
 498         }
 499         mr = (hermon_mrhdl_t)rsrc->hr_addr;
 500         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
 501 
 502         /*
 503          * Setup and validate the memory region access flags.  This means
 504          * translating the IBTF's enable flags into the access flags that
 505          * will be used in later operations.
 506          */
 507         mr->mr_accflag = 0;
 508         if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
 509                 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
 510         if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ)
 511                 mr->mr_accflag |= IBT_MR_REMOTE_READ;
 512         if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
 513                 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
 514         if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
 515                 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
 516 
 517         /*
 518          * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
 519          * from a certain number of "constrained" bits (the least significant
 520          * bits) and some number of "unconstrained" bits.  The constrained
 521          * bits must be set to the index of the entry in the MPT table, but
 522          * the unconstrained bits can be set to any value we wish.  Note:
 523          * if no remote access is required, then the RKey value is not filled
 524          * in.  Otherwise both Rkey and LKey are given the same value.
 525          */
 526         mr->mr_fmr_key = 1;  /* ready for the next reload */
 527         mr->mr_rkey = mr->mr_lkey = mpt->hr_indx;
 528 
 529         /*
 530          * Determine number of pages spanned.  This routine uses the
 531          * information in the "bind" struct to determine the required
 532          * number of MTT entries needed (and returns the suggested page size -
 533          * as a "power-of-2" - for each MTT entry).
 534          */
 535         /* Assume address will be page aligned later */
 536         bind.bi_addr = 0;
 537         /* Calculate size based on given max pages */
 538         bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT;
 539         nummtt = hermon_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits);
 540 
 541         /*
 542          * Allocate the MTT entries.  Use the calculations performed above to
 543          * allocate the required number of MTT entries.  If we fail here, we
 544          * must not only undo all the previous resource allocation (and PD
 545          * reference count), but we must also unbind the memory.
 546          */
 547         status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
 548         if (status != DDI_SUCCESS) {
 549                 IBTF_DPRINTF_L2("FMR", "FATAL: too few MTTs");
 550                 status = IBT_INSUFF_RESOURCE;
 551                 goto fmralloc_fail3;
 552         }
 553         mr->mr_logmttpgsz = mtt_pgsize_bits;
 554 
 555         /*
 556          * Fill in the MPT entry.  This is the final step before passing
 557          * ownership of the MPT entry to the Hermon hardware.  We use all of
 558          * the information collected/calculated above to fill in the
 559          * requisite portions of the MPT.
 560          */
 561         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
 562         mpt_entry.en_bind = 0;
 563         mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
 564         mpt_entry.rw      = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
 565         mpt_entry.rr      = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
 566         mpt_entry.lw      = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
 567         mpt_entry.lr      = 1;
 568         mpt_entry.reg_win = HERMON_MPT_IS_REGION;
 569         mpt_entry.pd            = pd->pd_pdnum;
 570 
 571         mpt_entry.entity_sz     = mr->mr_logmttpgsz;
 572         mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
 573         mpt_entry.fast_reg_en = 1;
 574         mpt_entry.mtt_size = (uint_t)nummtt;
 575         mpt_entry.mtt_addr_h = mtt_addr >> 32;
 576         mpt_entry.mtt_addr_l = mtt_addr >> 3;
 577         mpt_entry.mem_key = mr->mr_lkey;
 578 
 579         /*
 580          * FMR sets these to 0 for now.  Later during actual fmr registration
 581          * these values are filled in.
 582          */
 583         mpt_entry.start_addr    = 0;
 584         mpt_entry.reg_win_len   = 0;
 585 
 586         /*
 587          * Write the MPT entry to hardware.  Lastly, we pass ownership of
 588          * the entry to the hardware.  Note: in general, this operation
 589          * shouldn't fail.  But if it does, we have to undo everything we've
 590          * done above before returning error.
 591          */
 592         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
 593             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
 594         if (status != HERMON_CMD_SUCCESS) {
 595                 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
 596                     status);
 597                 if (status == HERMON_CMD_INVALID_STATUS) {
 598                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 599                 }
 600                 status = ibc_get_ci_failure(0);
 601                 goto fmralloc_fail4;
 602         }
 603 
 604         /*
 605          * Fill in the rest of the Hermon Memory Region handle.  Having
 606          * successfully transferred ownership of the MPT, we can update the
 607          * following fields for use in further operations on the MR.  Also, set
 608          * that this is an FMR region.
 609          */
 610         mr->mr_mptrsrcp        = mpt;
 611         mr->mr_mttrsrcp        = mtt;
 612 
 613         mr->mr_mpt_type   = HERMON_MPT_DMPT;
 614         mr->mr_pdhdl   = pd;
 615         mr->mr_rsrcp   = rsrc;
 616         mr->mr_is_fmr          = 1;
 617         mr->mr_lkey     = hermon_mr_key_swap(mr->mr_lkey);
 618         mr->mr_rkey     = hermon_mr_key_swap(mr->mr_rkey);
 619         mr->mr_mttaddr          = mtt_addr;
 620         (void) memcpy(&mr->mr_bindinfo, &bind, sizeof (hermon_bind_info_t));
 621 
 622         /* initialize hr_addr for use during register/deregister/invalidate */
 623         icm_table = &state->hs_icm[HERMON_DMPT];
 624         rindx = mpt->hr_indx;
 625         hermon_index(index1, index2, rindx, icm_table, i);
 626         dma_info = icm_table->icm_dma[index1] + index2;
 627         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mpt))
 628         mpt->hr_addr = (void *)((uintptr_t)(dma_info->vaddr + i * mpt->hr_len));
 629 
 630         *mrhdl = mr;
 631 
 632         return (DDI_SUCCESS);
 633 
 634 /*
 635  * The following is cleanup for all possible failure cases in this routine
 636  */
 637 fmralloc_fail4:
 638         kmem_free(mtt, sizeof (hermon_rsrc_t) * nummtt);
 639 fmralloc_fail3:
 640         hermon_rsrc_free(state, &rsrc);
 641 fmralloc_fail2:
 642         hermon_rsrc_free(state, &mpt);
 643 fmralloc_fail1:
 644         hermon_pd_refcnt_dec(pd);
 645 fmralloc_fail:
 646         return (status);
 647 }
 648 
 649 
 650 /*
 651  * hermon_mr_register_physical_fmr()
 652  *    Context: Can be called from interrupt or base context.
 653  */
 654 /*ARGSUSED*/
 655 int
 656 hermon_mr_register_physical_fmr(hermon_state_t *state,
 657     ibt_pmr_attr_t *mem_pattr_p, hermon_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p)
 658 {
 659         hermon_rsrc_t           *mpt;
 660         uint64_t                *mpt_table;
 661         int                     status;
 662         uint32_t                key;
 663 
 664         mutex_enter(&mr->mr_lock);
 665         mpt = mr->mr_mptrsrcp;
 666         mpt_table = (uint64_t *)mpt->hr_addr;
 667 
 668         /* Write MPT status to SW bit */
 669         *(uint8_t *)mpt_table = 0xF0;
 670 
 671         membar_producer();
 672 
 673         /*
 674          * Write the mapped addresses into the MTT entries.  FMR needs to do
 675          * this a little differently, so we call the fmr specific fast mtt
 676          * write here.
 677          */
 678         status = hermon_mr_fast_mtt_write_fmr(state, mr->mr_mttrsrcp,
 679             mem_pattr_p, mr->mr_logmttpgsz);
 680         if (status != DDI_SUCCESS) {
 681                 mutex_exit(&mr->mr_lock);
 682                 status = ibc_get_ci_failure(0);
 683                 goto fmr_reg_fail1;
 684         }
 685 
 686         /*
 687          * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
 688          * from a certain number of "constrained" bits (the least significant
 689          * bits) and some number of "unconstrained" bits.  The constrained
 690          * bits must be set to the index of the entry in the MPT table, but
 691          * the unconstrained bits can be set to any value we wish.  Note:
 692          * if no remote access is required, then the RKey value is not filled
 693          * in.  Otherwise both Rkey and LKey are given the same value.
 694          */
 695         key = mpt->hr_indx | (mr->mr_fmr_key++ << HERMON_MEMKEY_SHIFT);
 696         mr->mr_lkey = mr->mr_rkey = hermon_mr_key_swap(key);
 697 
 698         /* write mem key value */
 699         *(uint32_t *)&mpt_table[1] = htonl(key);
 700 
 701         /* write length value */
 702         mpt_table[3] = htonll(mem_pattr_p->pmr_len);
 703 
 704         /* write start addr value */
 705         mpt_table[2] = htonll(mem_pattr_p->pmr_iova);
 706 
 707         /* write lkey value */
 708         *(uint32_t *)&mpt_table[4] = htonl(key);
 709 
 710         membar_producer();
 711 
 712         /* Write MPT status to HW bit */
 713         *(uint8_t *)mpt_table = 0x00;
 714 
 715         /* Fill in return parameters */
 716         mem_desc_p->pmd_lkey = mr->mr_lkey;
 717         mem_desc_p->pmd_rkey = mr->mr_rkey;
 718         mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova;
 719         mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len;
 720 
 721         /* Fill in MR bindinfo struct for later sync or query operations */
 722         mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova;
 723         mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT;
 724 
 725         mutex_exit(&mr->mr_lock);
 726 
 727         return (DDI_SUCCESS);
 728 
 729 fmr_reg_fail1:
 730         /*
 731          * Note, we fail here, and purposely leave the memory ownership in
 732          * software.  The memory tables may be corrupt, so we leave the region
 733          * unregistered.
 734          */
 735         return (status);
 736 }
 737 
 738 
 739 /*
 740  * hermon_mr_deregister()
 741  *    Context: Can be called from interrupt or base context.
 742  */
 743 /* ARGSUSED */
 744 int
 745 hermon_mr_deregister(hermon_state_t *state, hermon_mrhdl_t *mrhdl, uint_t level,
 746     uint_t sleep)
 747 {
 748         hermon_rsrc_t           *mpt, *mtt, *rsrc, *mtt_refcnt;
 749         hermon_umap_db_entry_t  *umapdb;
 750         hermon_pdhdl_t          pd;
 751         hermon_mrhdl_t          mr;
 752         hermon_bind_info_t      *bind;
 753         uint64_t                value;
 754         int                     status;
 755         uint_t                  shared_mtt;
 756 
 757         /*
 758          * Check the sleep flag.  Ensure that it is consistent with the
 759          * current thread context (i.e. if we are currently in the interrupt
 760          * context, then we shouldn't be attempting to sleep).
 761          */
 762         if ((sleep == HERMON_SLEEP) &&
 763             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
 764                 status = IBT_INVALID_PARAM;
 765                 return (status);
 766         }
 767 
 768         /*
 769          * Pull all the necessary information from the Hermon Memory Region
 770          * handle.  This is necessary here because the resource for the
 771          * MR handle is going to be freed up as part of the this
 772          * deregistration
 773          */
 774         mr      = *mrhdl;
 775         mutex_enter(&mr->mr_lock);
 776         mpt     = mr->mr_mptrsrcp;
 777         mtt     = mr->mr_mttrsrcp;
 778         mtt_refcnt = mr->mr_mttrefcntp;
 779         rsrc    = mr->mr_rsrcp;
 780         pd      = mr->mr_pdhdl;
 781         bind    = &mr->mr_bindinfo;
 782 
 783         /*
 784          * Check here if the memory region is really an FMR.  If so, this is a
 785          * bad thing and we shouldn't be here.  Return failure.
 786          */
 787         if (mr->mr_is_fmr) {
 788                 mutex_exit(&mr->mr_lock);
 789                 return (IBT_INVALID_PARAM);
 790         }
 791 
 792         /*
 793          * Check here to see if the memory region has already been partially
 794          * deregistered as a result of the hermon_umap_umemlock_cb() callback.
 795          * If so, then jump to the end and free the remaining resources.
 796          */
 797         if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
 798                 goto mrdereg_finish_cleanup;
 799         }
 800         if (hermon_rdma_debug & 0x4)
 801                 IBTF_DPRINTF_L2("mr", "dereg: mr %p  key %x",
 802                     mr, mr->mr_rkey);
 803 
 804         /*
 805          * We must drop the "mr_lock" here to ensure that both SLEEP and
 806          * NOSLEEP calls into the firmware work as expected.  Also, if two
 807          * threads are attemping to access this MR (via de-register,
 808          * re-register, or otherwise), then we allow the firmware to enforce
 809          * the checking, that only one deregister is valid.
 810          */
 811         mutex_exit(&mr->mr_lock);
 812 
 813         /*
 814          * Reclaim MPT entry from hardware (if necessary).  Since the
 815          * hermon_mr_deregister() routine is used in the memory region
 816          * reregistration process as well, it is possible that we will
 817          * not always wish to reclaim ownership of the MPT.  Check the
 818          * "level" arg and, if necessary, attempt to reclaim it.  If
 819          * the ownership transfer fails for any reason, we check to see
 820          * what command status was returned from the hardware.  The only
 821          * "expected" error status is the one that indicates an attempt to
 822          * deregister a memory region that has memory windows bound to it
 823          */
 824         if (level >= HERMON_MR_DEREG_ALL) {
 825                 if (mr->mr_mpt_type >= HERMON_MPT_DMPT) {
 826                         status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
 827                             NULL, 0, mpt->hr_indx, sleep);
 828                         if (status != HERMON_CMD_SUCCESS) {
 829                                 if (status == HERMON_CMD_REG_BOUND) {
 830                                         return (IBT_MR_IN_USE);
 831                                 } else {
 832                                         cmn_err(CE_CONT, "Hermon: HW2SW_MPT "
 833                                             "command failed: %08x\n", status);
 834                                         if (status ==
 835                                             HERMON_CMD_INVALID_STATUS) {
 836                                                 hermon_fm_ereport(state,
 837                                                     HCA_SYS_ERR,
 838                                                     DDI_SERVICE_LOST);
 839                                         }
 840                                         return (IBT_INVALID_PARAM);
 841                                 }
 842                         }
 843                 }
 844         }
 845 
 846         /*
 847          * Re-grab the mr_lock here.  Since further access to the protected
 848          * 'mr' structure is needed, and we would have returned previously for
 849          * the multiple deregistration case, we can safely grab the lock here.
 850          */
 851         mutex_enter(&mr->mr_lock);
 852 
 853         /*
 854          * If the memory had come from userland, then we do a lookup in the
 855          * "userland resources database".  On success, we free the entry, call
 856          * ddi_umem_unlock(), and continue the cleanup.  On failure (which is
 857          * an indication that the umem_lockmemory() callback has called
 858          * hermon_mr_deregister()), we call ddi_umem_unlock() and invalidate
 859          * the "mr_umemcookie" field in the MR handle (this will be used
 860          * later to detect that only partial cleaup still remains to be done
 861          * on the MR handle).
 862          */
 863         if (mr->mr_is_umem) {
 864                 status = hermon_umap_db_find(state->hs_instance,
 865                     (uint64_t)(uintptr_t)mr->mr_umemcookie,
 866                     MLNX_UMAP_MRMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
 867                     &umapdb);
 868                 if (status == DDI_SUCCESS) {
 869                         hermon_umap_db_free(umapdb);
 870                         ddi_umem_unlock(mr->mr_umemcookie);
 871                 } else {
 872                         ddi_umem_unlock(mr->mr_umemcookie);
 873                         mr->mr_umemcookie = NULL;
 874                 }
 875         }
 876 
 877         /* mtt_refcnt is NULL in the case of hermon_dma_mr_register() */
 878         if (mtt_refcnt != NULL) {
 879                 /*
 880                  * Decrement the MTT reference count.  Since the MTT resource
 881                  * may be shared between multiple memory regions (as a result
 882                  * of a "RegisterSharedMR" verb) it is important that we not
 883                  * free up or unbind resources prematurely.  If it's not shared
 884                  * (as indicated by the return status), then free the resource.
 885                  */
 886                 shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt);
 887                 if (!shared_mtt) {
 888                         hermon_rsrc_free(state, &mtt_refcnt);
 889                 }
 890 
 891                 /*
 892                  * Free up the MTT entries and unbind the memory.  Here,
 893                  * as above, we attempt to free these resources only if
 894                  * it is appropriate to do so.
 895                  * Note, 'bind' is NULL in the alloc_lkey case.
 896                  */
 897                 if (!shared_mtt) {
 898                         if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) {
 899                                 hermon_mr_mem_unbind(state, bind);
 900                         }
 901                         hermon_rsrc_free(state, &mtt);
 902                 }
 903         }
 904 
 905         /*
 906          * If the MR handle has been invalidated, then drop the
 907          * lock and return success.  Note: This only happens because
 908          * the umem_lockmemory() callback has been triggered.  The
 909          * cleanup here is partial, and further cleanup (in a
 910          * subsequent hermon_mr_deregister() call) will be necessary.
 911          */
 912         if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
 913                 mutex_exit(&mr->mr_lock);
 914                 return (DDI_SUCCESS);
 915         }
 916 
 917 mrdereg_finish_cleanup:
 918         mutex_exit(&mr->mr_lock);
 919 
 920         /* Free the Hermon Memory Region handle */
 921         hermon_rsrc_free(state, &rsrc);
 922 
 923         /* Free up the MPT entry resource */
 924         if (mpt != NULL)
 925                 hermon_rsrc_free(state, &mpt);
 926 
 927         /* Decrement the reference count on the protection domain (PD) */
 928         hermon_pd_refcnt_dec(pd);
 929 
 930         /* Set the mrhdl pointer to NULL and return success */
 931         *mrhdl = NULL;
 932 
 933         return (DDI_SUCCESS);
 934 }
 935 
 936 /*
 937  * hermon_mr_dealloc_fmr()
 938  *    Context: Can be called from interrupt or base context.
 939  */
 940 /* ARGSUSED */
 941 int
 942 hermon_mr_dealloc_fmr(hermon_state_t *state, hermon_mrhdl_t *mrhdl)
 943 {
 944         hermon_rsrc_t           *mpt, *mtt, *rsrc;
 945         hermon_pdhdl_t          pd;
 946         hermon_mrhdl_t          mr;
 947 
 948         /*
 949          * Pull all the necessary information from the Hermon Memory Region
 950          * handle.  This is necessary here because the resource for the
 951          * MR handle is going to be freed up as part of the this
 952          * deregistration
 953          */
 954         mr      = *mrhdl;
 955         mutex_enter(&mr->mr_lock);
 956         mpt     = mr->mr_mptrsrcp;
 957         mtt     = mr->mr_mttrsrcp;
 958         rsrc    = mr->mr_rsrcp;
 959         pd      = mr->mr_pdhdl;
 960         mutex_exit(&mr->mr_lock);
 961 
 962         /* Free the MTT entries */
 963         hermon_rsrc_free(state, &mtt);
 964 
 965         /* Free the Hermon Memory Region handle */
 966         hermon_rsrc_free(state, &rsrc);
 967 
 968         /* Free up the MPT entry resource */
 969         hermon_rsrc_free(state, &mpt);
 970 
 971         /* Decrement the reference count on the protection domain (PD) */
 972         hermon_pd_refcnt_dec(pd);
 973 
 974         /* Set the mrhdl pointer to NULL and return success */
 975         *mrhdl = NULL;
 976 
 977         return (DDI_SUCCESS);
 978 }
 979 
 980 
 981 /*
 982  * hermon_mr_query()
 983  *    Context: Can be called from interrupt or base context.
 984  */
 985 /* ARGSUSED */
 986 int
 987 hermon_mr_query(hermon_state_t *state, hermon_mrhdl_t mr,
 988     ibt_mr_query_attr_t *attr)
 989 {
 990         int                     status;
 991         hermon_hw_dmpt_t        mpt_entry;
 992         uint32_t                lkey;
 993 
 994         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr))
 995 
 996         mutex_enter(&mr->mr_lock);
 997 
 998         /*
 999          * Check here to see if the memory region has already been partially
1000          * deregistered as a result of a hermon_umap_umemlock_cb() callback.
1001          * If so, this is an error, return failure.
1002          */
1003         if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
1004                 mutex_exit(&mr->mr_lock);
1005                 return (IBT_MR_HDL_INVALID);
1006         }
1007 
1008         status = hermon_cmn_query_cmd_post(state, QUERY_MPT, 0,
1009             mr->mr_lkey >> 8, &mpt_entry, sizeof (hermon_hw_dmpt_t),
1010             HERMON_NOSLEEP);
1011         if (status != HERMON_CMD_SUCCESS) {
1012                 cmn_err(CE_CONT, "Hermon: QUERY_MPT failed: status %x", status);
1013                 mutex_exit(&mr->mr_lock);
1014                 return (ibc_get_ci_failure(0));
1015         }
1016 
1017         /* Update the mr sw struct from the hw struct. */
1018         lkey = mpt_entry.mem_key;
1019         mr->mr_lkey = mr->mr_rkey = (lkey >> 8) | (lkey << 24);
1020         mr->mr_bindinfo.bi_addr = mpt_entry.start_addr;
1021         mr->mr_bindinfo.bi_len = mpt_entry.reg_win_len;
1022         mr->mr_accflag = (mr->mr_accflag & IBT_MR_RO_DISABLED) |
1023             (mpt_entry.lw ? IBT_MR_LOCAL_WRITE : 0) |
1024             (mpt_entry.rr ? IBT_MR_REMOTE_READ : 0) |
1025             (mpt_entry.rw ? IBT_MR_REMOTE_WRITE : 0) |
1026             (mpt_entry.atomic ? IBT_MR_REMOTE_ATOMIC : 0) |
1027             (mpt_entry.en_bind ? IBT_MR_WINDOW_BIND : 0);
1028         mr->mr_mttaddr = ((uint64_t)mpt_entry.mtt_addr_h << 32) |
1029             (mpt_entry.mtt_addr_l << 3);
1030         mr->mr_logmttpgsz = mpt_entry.entity_sz;
1031 
1032         /* Fill in the queried attributes */
1033         attr->mr_lkey_state =
1034             (mpt_entry.status == HERMON_MPT_FREE) ? IBT_KEY_FREE :
1035             (mpt_entry.status == HERMON_MPT_SW_OWNERSHIP) ? IBT_KEY_INVALID :
1036             IBT_KEY_VALID;
1037         attr->mr_phys_buf_list_sz = mpt_entry.mtt_size;
1038         attr->mr_attr_flags = mr->mr_accflag;
1039         attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl;
1040 
1041         /* Fill in the "local" attributes */
1042         attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;
1043         attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1044         attr->mr_lbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;
1045 
1046         /*
1047          * Fill in the "remote" attributes (if necessary).  Note: the
1048          * remote attributes are only valid if the memory region has one
1049          * or more of the remote access flags set.
1050          */
1051         if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
1052             (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
1053             (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
1054                 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey;
1055                 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
1056                 attr->mr_rbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;
1057         }
1058 
1059         /*
1060          * If region is mapped for streaming (i.e. noncoherent), then set sync
1061          * is required
1062          */
1063         attr->mr_sync_required = (mr->mr_bindinfo.bi_flags &
1064             IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE;
1065 
1066         mutex_exit(&mr->mr_lock);
1067         return (DDI_SUCCESS);
1068 }
1069 
1070 
1071 /*
1072  * hermon_mr_reregister()
1073  *    Context: Can be called from interrupt or base context.
1074  */
1075 int
1076 hermon_mr_reregister(hermon_state_t *state, hermon_mrhdl_t mr,
1077     hermon_pdhdl_t pd, ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new,
1078     hermon_mr_options_t *op)
1079 {
1080         hermon_bind_info_t      bind;
1081         int                     status;
1082 
1083         /*
1084          * Fill in the "bind" struct.  This struct provides the majority
1085          * of the information that will be used to distinguish between an
1086          * "addr" binding (as is the case here) and a "buf" binding (see
1087          * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
1088          * which does most of the "heavy lifting" for the Hermon memory
1089          * registration (and reregistration) routines.
1090          */
1091         bind.bi_type  = HERMON_BINDHDL_VADDR;
1092         bind.bi_addr  = mr_attr->mr_vaddr;
1093         bind.bi_len   = mr_attr->mr_len;
1094         bind.bi_as    = mr_attr->mr_as;
1095         bind.bi_flags = mr_attr->mr_flags;
1096         status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1097         return (status);
1098 }
1099 
1100 
1101 /*
1102  * hermon_mr_reregister_buf()
1103  *    Context: Can be called from interrupt or base context.
1104  */
1105 int
1106 hermon_mr_reregister_buf(hermon_state_t *state, hermon_mrhdl_t mr,
1107     hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf,
1108     hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op)
1109 {
1110         hermon_bind_info_t      bind;
1111         int                     status;
1112 
1113         /*
1114          * Fill in the "bind" struct.  This struct provides the majority
1115          * of the information that will be used to distinguish between an
1116          * "addr" binding (see above) and a "buf" binding (as is the case
1117          * here).  The "bind" struct is later passed to hermon_mr_mem_bind()
1118          * which does most of the "heavy lifting" for the Hermon memory
1119          * registration routines.  Note: We have chosen to provide
1120          * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
1121          * not set).  It is not critical what value we choose here as it need
1122          * only be unique for the given RKey (which will happen by default),
1123          * so the choice here is somewhat arbitrary.
1124          */
1125         bind.bi_type  = HERMON_BINDHDL_BUF;
1126         bind.bi_buf   = buf;
1127         if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
1128                 bind.bi_addr  = mr_attr->mr_vaddr;
1129         } else {
1130                 bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
1131         }
1132         bind.bi_len   = (uint64_t)buf->b_bcount;
1133         bind.bi_flags = mr_attr->mr_flags;
1134         bind.bi_as    = NULL;
1135         status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
1136         return (status);
1137 }
1138 
1139 
1140 /*
1141  * hermon_mr_sync()
1142  *    Context: Can be called from interrupt or base context.
1143  */
1144 /* ARGSUSED */
1145 int
1146 hermon_mr_sync(hermon_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs)
1147 {
1148         hermon_mrhdl_t          mrhdl;
1149         uint64_t                seg_vaddr, seg_len, seg_end;
1150         uint64_t                mr_start, mr_end;
1151         uint_t                  type;
1152         int                     status, i;
1153 
1154         /* Process each of the ibt_mr_sync_t's */
1155         for (i = 0; i < num_segs; i++) {
1156                 mrhdl = (hermon_mrhdl_t)mr_segs[i].ms_handle;
1157 
1158                 /* Check for valid memory region handle */
1159                 if (mrhdl == NULL) {
1160                         status = IBT_MR_HDL_INVALID;
1161                         goto mrsync_fail;
1162                 }
1163 
1164                 mutex_enter(&mrhdl->mr_lock);
1165 
1166                 /*
1167                  * Check here to see if the memory region has already been
1168                  * partially deregistered as a result of a
1169                  * hermon_umap_umemlock_cb() callback.  If so, this is an
1170                  * error, return failure.
1171                  */
1172                 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
1173                         mutex_exit(&mrhdl->mr_lock);
1174                         status = IBT_MR_HDL_INVALID;
1175                         goto mrsync_fail;
1176                 }
1177 
1178                 /* Check for valid bounds on sync request */
1179                 seg_vaddr = mr_segs[i].ms_vaddr;
1180                 seg_len   = mr_segs[i].ms_len;
1181                 seg_end   = seg_vaddr + seg_len - 1;
1182                 mr_start  = mrhdl->mr_bindinfo.bi_addr;
1183                 mr_end    = mr_start + mrhdl->mr_bindinfo.bi_len - 1;
1184                 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) {
1185                         mutex_exit(&mrhdl->mr_lock);
1186                         status = IBT_MR_VA_INVALID;
1187                         goto mrsync_fail;
1188                 }
1189                 if ((seg_end < mr_start) || (seg_end > mr_end)) {
1190                         mutex_exit(&mrhdl->mr_lock);
1191                         status = IBT_MR_LEN_INVALID;
1192                         goto mrsync_fail;
1193                 }
1194 
1195                 /* Determine what type (i.e. direction) for sync */
1196                 if (mr_segs[i].ms_flags & IBT_SYNC_READ) {
1197                         type = DDI_DMA_SYNC_FORDEV;
1198                 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) {
1199                         type = DDI_DMA_SYNC_FORCPU;
1200                 } else {
1201                         mutex_exit(&mrhdl->mr_lock);
1202                         status = IBT_INVALID_PARAM;
1203                         goto mrsync_fail;
1204                 }
1205 
1206                 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl,
1207                     (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type);
1208 
1209                 mutex_exit(&mrhdl->mr_lock);
1210         }
1211 
1212         return (DDI_SUCCESS);
1213 
1214 mrsync_fail:
1215         return (status);
1216 }
1217 
1218 
1219 /*
1220  * hermon_mw_alloc()
1221  *    Context: Can be called from interrupt or base context.
1222  */
1223 int
1224 hermon_mw_alloc(hermon_state_t *state, hermon_pdhdl_t pd, ibt_mw_flags_t flags,
1225     hermon_mwhdl_t *mwhdl)
1226 {
1227         hermon_rsrc_t           *mpt, *rsrc;
1228         hermon_hw_dmpt_t                mpt_entry;
1229         hermon_mwhdl_t          mw;
1230         uint_t                  sleep;
1231         int                     status;
1232 
1233         if (state != NULL)      /* XXX - bogus test that is always TRUE */
1234                 return (IBT_INSUFF_RESOURCE);
1235 
1236         /*
1237          * Check the sleep flag.  Ensure that it is consistent with the
1238          * current thread context (i.e. if we are currently in the interrupt
1239          * context, then we shouldn't be attempting to sleep).
1240          */
1241         sleep = (flags & IBT_MW_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP;
1242         if ((sleep == HERMON_SLEEP) &&
1243             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1244                 status = IBT_INVALID_PARAM;
1245                 goto mwalloc_fail;
1246         }
1247 
1248         /* Increment the reference count on the protection domain (PD) */
1249         hermon_pd_refcnt_inc(pd);
1250 
1251         /*
1252          * Allocate an MPT entry (for use as a memory window).  Since the
1253          * Hermon hardware uses the MPT entry for memory regions and for
1254          * memory windows, we will fill in this MPT with all the necessary
1255          * parameters for the memory window.  And then (just as we do for
1256          * memory regions) ownership will be passed to the hardware in the
1257          * final step below.  If we fail here, we must undo the protection
1258          * domain reference count.
1259          */
1260         status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1261         if (status != DDI_SUCCESS) {
1262                 status = IBT_INSUFF_RESOURCE;
1263                 goto mwalloc_fail1;
1264         }
1265 
1266         /*
1267          * Allocate the software structure for tracking the memory window (i.e.
1268          * the Hermon Memory Window handle).  Note: This is actually the same
1269          * software structure used for tracking memory regions, but since many
1270          * of the same properties are needed, only a single structure is
1271          * necessary.  If we fail here, we must undo the protection domain
1272          * reference count and the previous resource allocation.
1273          */
1274         status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1275         if (status != DDI_SUCCESS) {
1276                 status = IBT_INSUFF_RESOURCE;
1277                 goto mwalloc_fail2;
1278         }
1279         mw = (hermon_mwhdl_t)rsrc->hr_addr;
1280         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1281 
1282         /*
1283          * Calculate an "unbound" RKey from MPT index.  In much the same way
1284          * as we do for memory regions (above), this key is constructed from
1285          * a "constrained" (which depends on the MPT index) and an
1286          * "unconstrained" portion (which may be arbitrarily chosen).
1287          */
1288         mw->mr_rkey = hermon_mr_keycalc(mpt->hr_indx);
1289 
1290         /*
1291          * Fill in the MPT entry.  This is the final step before passing
1292          * ownership of the MPT entry to the Hermon hardware.  We use all of
1293          * the information collected/calculated above to fill in the
1294          * requisite portions of the MPT.  Note: fewer entries in the MPT
1295          * entry are necessary to allocate a memory window.
1296          */
1297         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1298         mpt_entry.reg_win       = HERMON_MPT_IS_WINDOW;
1299         mpt_entry.mem_key       = mw->mr_rkey;
1300         mpt_entry.pd            = pd->pd_pdnum;
1301         mpt_entry.lr            = 1;
1302 
1303         /*
1304          * Write the MPT entry to hardware.  Lastly, we pass ownership of
1305          * the entry to the hardware.  Note: in general, this operation
1306          * shouldn't fail.  But if it does, we have to undo everything we've
1307          * done above before returning error.
1308          */
1309         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1310             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1311         if (status != HERMON_CMD_SUCCESS) {
1312                 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1313                     status);
1314                 if (status == HERMON_CMD_INVALID_STATUS) {
1315                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1316                 }
1317                 status = ibc_get_ci_failure(0);
1318                 goto mwalloc_fail3;
1319         }
1320 
1321         /*
1322          * Fill in the rest of the Hermon Memory Window handle.  Having
1323          * successfully transferred ownership of the MPT, we can update the
1324          * following fields for use in further operations on the MW.
1325          */
1326         mw->mr_mptrsrcp      = mpt;
1327         mw->mr_pdhdl = pd;
1328         mw->mr_rsrcp = rsrc;
1329         mw->mr_rkey  = hermon_mr_key_swap(mw->mr_rkey);
1330         *mwhdl = mw;
1331 
1332         return (DDI_SUCCESS);
1333 
1334 mwalloc_fail3:
1335         hermon_rsrc_free(state, &rsrc);
1336 mwalloc_fail2:
1337         hermon_rsrc_free(state, &mpt);
1338 mwalloc_fail1:
1339         hermon_pd_refcnt_dec(pd);
1340 mwalloc_fail:
1341         return (status);
1342 }
1343 
1344 
1345 /*
1346  * hermon_mw_free()
1347  *    Context: Can be called from interrupt or base context.
1348  */
1349 int
1350 hermon_mw_free(hermon_state_t *state, hermon_mwhdl_t *mwhdl, uint_t sleep)
1351 {
1352         hermon_rsrc_t           *mpt, *rsrc;
1353         hermon_mwhdl_t          mw;
1354         int                     status;
1355         hermon_pdhdl_t          pd;
1356 
1357         /*
1358          * Check the sleep flag.  Ensure that it is consistent with the
1359          * current thread context (i.e. if we are currently in the interrupt
1360          * context, then we shouldn't be attempting to sleep).
1361          */
1362         if ((sleep == HERMON_SLEEP) &&
1363             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1364                 status = IBT_INVALID_PARAM;
1365                 return (status);
1366         }
1367 
1368         /*
1369          * Pull all the necessary information from the Hermon Memory Window
1370          * handle.  This is necessary here because the resource for the
1371          * MW handle is going to be freed up as part of the this operation.
1372          */
1373         mw      = *mwhdl;
1374         mutex_enter(&mw->mr_lock);
1375         mpt     = mw->mr_mptrsrcp;
1376         rsrc    = mw->mr_rsrcp;
1377         pd      = mw->mr_pdhdl;
1378         mutex_exit(&mw->mr_lock);
1379         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1380 
1381         /*
1382          * Reclaim the MPT entry from hardware.  Note: in general, it is
1383          * unexpected for this operation to return an error.
1384          */
1385         status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
1386             0, mpt->hr_indx, sleep);
1387         if (status != HERMON_CMD_SUCCESS) {
1388                 cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: %08x\n",
1389                     status);
1390                 if (status == HERMON_CMD_INVALID_STATUS) {
1391                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1392                 }
1393                 return (ibc_get_ci_failure(0));
1394         }
1395 
1396         /* Free the Hermon Memory Window handle */
1397         hermon_rsrc_free(state, &rsrc);
1398 
1399         /* Free up the MPT entry resource */
1400         hermon_rsrc_free(state, &mpt);
1401 
1402         /* Decrement the reference count on the protection domain (PD) */
1403         hermon_pd_refcnt_dec(pd);
1404 
1405         /* Set the mwhdl pointer to NULL and return success */
1406         *mwhdl = NULL;
1407 
1408         return (DDI_SUCCESS);
1409 }
1410 
1411 
1412 /*
1413  * hermon_mr_keycalc()
1414  *    Context: Can be called from interrupt or base context.
1415  *    NOTE:  Produces a key in the form of
1416  *              KKKKKKKK IIIIIIII IIIIIIII IIIIIIIII
1417  *    where K == the arbitrary bits and I == the index
1418  */
1419 uint32_t
1420 hermon_mr_keycalc(uint32_t indx)
1421 {
1422         uint32_t tmp_key, tmp_indx;
1423 
1424         /*
1425          * Generate a simple key from counter.  Note:  We increment this
1426          * static variable _intentionally_ without any kind of mutex around
1427          * it.  First, single-threading all operations through a single lock
1428          * would be a bad idea (from a performance point-of-view).  Second,
1429          * the upper "unconstrained" bits don't really have to be unique
1430          * because the lower bits are guaranteed to be (although we do make a
1431          * best effort to ensure that they are).  Third, the window for the
1432          * race (where both threads read and update the counter at the same
1433          * time) is incredibly small.
1434          * And, lastly, we'd like to make this into a "random" key
1435          */
1436         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hermon_memkey_cnt))
1437         tmp_key = (hermon_memkey_cnt++) << HERMON_MEMKEY_SHIFT;
1438         tmp_indx = indx & 0xffffff;
1439         return (tmp_key | tmp_indx);
1440 }
1441 
1442 
1443 /*
1444  * hermon_mr_key_swap()
1445  *    Context: Can be called from interrupt or base context.
1446  *    NOTE:  Produces a key in the form of
1447  *              IIIIIIII IIIIIIII IIIIIIIII KKKKKKKK
1448  *    where K == the arbitrary bits and I == the index
1449  */
1450 uint32_t
1451 hermon_mr_key_swap(uint32_t indx)
1452 {
1453         /*
1454          * The memory key format to pass down to the hardware is
1455          * (key[7:0],index[23:0]), which defines the index to the
1456          * hardware resource. When the driver passes this as a memory
1457          * key, (i.e. to retrieve a resource) the format is
1458          * (index[23:0],key[7:0]).
1459          */
1460         return (((indx >> 24) & 0x000000ff) | ((indx << 8) & 0xffffff00));
1461 }
1462 
1463 /*
1464  * hermon_mr_common_reg()
1465  *    Context: Can be called from interrupt or base context.
1466  */
1467 static int
1468 hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
1469     hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
1470     hermon_mpt_rsrc_type_t mpt_type)
1471 {
1472         hermon_rsrc_t           *mpt, *mtt, *rsrc, *mtt_refcnt;
1473         hermon_umap_db_entry_t  *umapdb;
1474         hermon_sw_refcnt_t      *swrc_tmp;
1475         hermon_hw_dmpt_t        mpt_entry;
1476         hermon_mrhdl_t          mr;
1477         ibt_mr_flags_t          flags;
1478         hermon_bind_info_t      *bh;
1479         ddi_dma_handle_t        bind_dmahdl;
1480         ddi_umem_cookie_t       umem_cookie;
1481         size_t                  umem_len;
1482         caddr_t                 umem_addr;
1483         uint64_t                mtt_addr, max_sz;
1484         uint_t                  sleep, mtt_pgsize_bits, bind_type, mr_is_umem;
1485         int                     status, umem_flags, bind_override_addr;
1486 
1487         /*
1488          * Check the "options" flag.  Currently this flag tells the driver
1489          * whether or not the region should be bound normally (i.e. with
1490          * entries written into the PCI IOMMU), whether it should be
1491          * registered to bypass the IOMMU, and whether or not the resulting
1492          * address should be "zero-based" (to aid the alignment restrictions
1493          * for QPs).
1494          */
1495         if (op == NULL) {
1496                 bind_type   = HERMON_BINDMEM_NORMAL;
1497                 bind_dmahdl = NULL;
1498                 bind_override_addr = 0;
1499         } else {
1500                 bind_type          = op->mro_bind_type;
1501                 bind_dmahdl        = op->mro_bind_dmahdl;
1502                 bind_override_addr = op->mro_bind_override_addr;
1503         }
1504 
1505         /* check what kind of mpt to use */
1506 
1507         /* Extract the flags field from the hermon_bind_info_t */
1508         flags = bind->bi_flags;
1509 
1510         /*
1511          * Check for invalid length.  Check is the length is zero or if the
1512          * length is larger than the maximum configured value.  Return error
1513          * if it is.
1514          */
1515         max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
1516         if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
1517                 status = IBT_MR_LEN_INVALID;
1518                 goto mrcommon_fail;
1519         }
1520 
1521         /*
1522          * Check the sleep flag.  Ensure that it is consistent with the
1523          * current thread context (i.e. if we are currently in the interrupt
1524          * context, then we shouldn't be attempting to sleep).
1525          */
1526         sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1527         if ((sleep == HERMON_SLEEP) &&
1528             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1529                 status = IBT_INVALID_PARAM;
1530                 goto mrcommon_fail;
1531         }
1532 
1533         /* Increment the reference count on the protection domain (PD) */
1534         hermon_pd_refcnt_inc(pd);
1535 
1536         /*
1537          * Allocate an MPT entry.  This will be filled in with all the
1538          * necessary parameters to define the memory region.  And then
1539          * ownership will be passed to the hardware in the final step
1540          * below.  If we fail here, we must undo the protection domain
1541          * reference count.
1542          */
1543         if (mpt_type == HERMON_MPT_DMPT) {
1544                 status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1545                 if (status != DDI_SUCCESS) {
1546                         status = IBT_INSUFF_RESOURCE;
1547                         goto mrcommon_fail1;
1548                 }
1549         } else {
1550                 mpt = NULL;
1551         }
1552 
1553         /*
1554          * Allocate the software structure for tracking the memory region (i.e.
1555          * the Hermon Memory Region handle).  If we fail here, we must undo
1556          * the protection domain reference count and the previous resource
1557          * allocation.
1558          */
1559         status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1560         if (status != DDI_SUCCESS) {
1561                 status = IBT_INSUFF_RESOURCE;
1562                 goto mrcommon_fail2;
1563         }
1564         mr = (hermon_mrhdl_t)rsrc->hr_addr;
1565         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1566 
1567         /*
1568          * Setup and validate the memory region access flags.  This means
1569          * translating the IBTF's enable flags into the access flags that
1570          * will be used in later operations.
1571          */
1572         mr->mr_accflag = 0;
1573         if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1574                 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1575         if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1576                 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1577         if (flags & IBT_MR_ENABLE_REMOTE_READ)
1578                 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1579         if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1580                 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1581         if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1582                 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1583 
1584         /*
1585          * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
1586          * from a certain number of "constrained" bits (the least significant
1587          * bits) and some number of "unconstrained" bits.  The constrained
1588          * bits must be set to the index of the entry in the MPT table, but
1589          * the unconstrained bits can be set to any value we wish.  Note:
1590          * if no remote access is required, then the RKey value is not filled
1591          * in.  Otherwise both Rkey and LKey are given the same value.
1592          */
1593         if (mpt)
1594                 mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
1595 
1596         /*
1597          * Determine if the memory is from userland and pin the pages
1598          * with umem_lockmemory() if necessary.
1599          * Then, if this is userland memory, allocate an entry in the
1600          * "userland resources database".  This will later be added to
1601          * the database (after all further memory registration operations are
1602          * successful).  If we fail here, we must undo the reference counts
1603          * and the previous resource allocations.
1604          */
1605         mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
1606         if (mr_is_umem) {
1607                 umem_len   = ptob(btopr(bind->bi_len +
1608                     ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
1609                 umem_addr  = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
1610                 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
1611                     DDI_UMEMLOCK_LONGTERM);
1612                 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
1613                     &umem_cookie, &hermon_umem_cbops, NULL);
1614                 if (status != 0) {
1615                         status = IBT_INSUFF_RESOURCE;
1616                         goto mrcommon_fail3;
1617                 }
1618 
1619                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1620                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1621 
1622                 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
1623                     B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
1624                 if (bind->bi_buf == NULL) {
1625                         status = IBT_INSUFF_RESOURCE;
1626                         goto mrcommon_fail3;
1627                 }
1628                 bind->bi_type = HERMON_BINDHDL_UBUF;
1629                 bind->bi_buf->b_flags |= B_READ;
1630 
1631                 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1632                 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1633 
1634                 umapdb = hermon_umap_db_alloc(state->hs_instance,
1635                     (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
1636                     (uint64_t)(uintptr_t)rsrc);
1637                 if (umapdb == NULL) {
1638                         status = IBT_INSUFF_RESOURCE;
1639                         goto mrcommon_fail4;
1640                 }
1641         }
1642 
1643         /*
1644          * Setup the bindinfo for the mtt bind call
1645          */
1646         bh = &mr->mr_bindinfo;
1647         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh))
1648         bcopy(bind, bh, sizeof (hermon_bind_info_t));
1649         bh->bi_bypass = bind_type;
1650         status = hermon_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
1651             &mtt_pgsize_bits, mpt != NULL);
1652         if (status != DDI_SUCCESS) {
1653                 /*
1654                  * When mtt_bind fails, freerbuf has already been done,
1655                  * so make sure not to call it again.
1656                  */
1657                 bind->bi_type = bh->bi_type;
1658                 goto mrcommon_fail5;
1659         }
1660         mr->mr_logmttpgsz = mtt_pgsize_bits;
1661 
1662         /*
1663          * Allocate MTT reference count (to track shared memory regions).
1664          * This reference count resource may never be used on the given
1665          * memory region, but if it is ever later registered as "shared"
1666          * memory region then this resource will be necessary.  If we fail
1667          * here, we do pretty much the same as above to clean up.
1668          */
1669         status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
1670             &mtt_refcnt);
1671         if (status != DDI_SUCCESS) {
1672                 status = IBT_INSUFF_RESOURCE;
1673                 goto mrcommon_fail6;
1674         }
1675         mr->mr_mttrefcntp = mtt_refcnt;
1676         swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
1677         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
1678         HERMON_MTT_REFCNT_INIT(swrc_tmp);
1679 
1680         mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
1681 
1682         /*
1683          * Fill in the MPT entry.  This is the final step before passing
1684          * ownership of the MPT entry to the Hermon hardware.  We use all of
1685          * the information collected/calculated above to fill in the
1686          * requisite portions of the MPT.  Do this ONLY for DMPTs.
1687          */
1688         if (mpt == NULL)
1689                 goto no_passown;
1690 
1691         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1692 
1693         mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
1694         mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1695         mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1696         mpt_entry.rw      = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1697         mpt_entry.rr      = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
1698         mpt_entry.lw      = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1699         mpt_entry.lr      = 1;
1700         mpt_entry.phys_addr = 0;
1701         mpt_entry.reg_win = HERMON_MPT_IS_REGION;
1702 
1703         mpt_entry.entity_sz     = mr->mr_logmttpgsz;
1704         mpt_entry.mem_key       = mr->mr_lkey;
1705         mpt_entry.pd            = pd->pd_pdnum;
1706         mpt_entry.rem_acc_en = 0;
1707         mpt_entry.fast_reg_en = 0;
1708         mpt_entry.en_inval = 0;
1709         mpt_entry.lkey = 0;
1710         mpt_entry.win_cnt = 0;
1711 
1712         if (bind_override_addr == 0) {
1713                 mpt_entry.start_addr = bh->bi_addr;
1714         } else {
1715                 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1);
1716                 mpt_entry.start_addr = bh->bi_addr;
1717         }
1718         mpt_entry.reg_win_len   = bh->bi_len;
1719 
1720         mpt_entry.mtt_addr_h = mtt_addr >> 32;  /* only 8 more bits */
1721         mpt_entry.mtt_addr_l = mtt_addr >> 3;     /* only 29 bits */
1722 
1723         /*
1724          * Write the MPT entry to hardware.  Lastly, we pass ownership of
1725          * the entry to the hardware if needed.  Note: in general, this
1726          * operation shouldn't fail.  But if it does, we have to undo
1727          * everything we've done above before returning error.
1728          *
1729          * For Hermon, this routine (which is common to the contexts) will only
1730          * set the ownership if needed - the process of passing the context
1731          * itself to HW will take care of setting up the MPT (based on type
1732          * and index).
1733          */
1734 
1735         mpt_entry.bnd_qp = 0;   /* dMPT for a qp, check for window */
1736         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1737             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1738         if (status != HERMON_CMD_SUCCESS) {
1739                 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1740                     status);
1741                 if (status == HERMON_CMD_INVALID_STATUS) {
1742                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1743                 }
1744                 status = ibc_get_ci_failure(0);
1745                 goto mrcommon_fail7;
1746         }
1747         if (hermon_rdma_debug & 0x4)
1748                 IBTF_DPRINTF_L2("mr", "  reg: mr %p  key %x",
1749                     mr, hermon_mr_key_swap(mr->mr_rkey));
1750 no_passown:
1751 
1752         /*
1753          * Fill in the rest of the Hermon Memory Region handle.  Having
1754          * successfully transferred ownership of the MPT, we can update the
1755          * following fields for use in further operations on the MR.
1756          */
1757         mr->mr_mttaddr          = mtt_addr;
1758 
1759         mr->mr_log2_pgsz   = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
1760         mr->mr_mptrsrcp         = mpt;
1761         mr->mr_mttrsrcp         = mtt;
1762         mr->mr_pdhdl    = pd;
1763         mr->mr_rsrcp    = rsrc;
1764         mr->mr_is_umem          = mr_is_umem;
1765         mr->mr_is_fmr           = 0;
1766         mr->mr_umemcookie  = (mr_is_umem != 0) ? umem_cookie : NULL;
1767         mr->mr_umem_cbfunc = NULL;
1768         mr->mr_umem_cbarg1 = NULL;
1769         mr->mr_umem_cbarg2 = NULL;
1770         mr->mr_lkey     = hermon_mr_key_swap(mr->mr_lkey);
1771         mr->mr_rkey     = hermon_mr_key_swap(mr->mr_rkey);
1772         mr->mr_mpt_type         = mpt_type;
1773 
1774         /*
1775          * If this is userland memory, then we need to insert the previously
1776          * allocated entry into the "userland resources database".  This will
1777          * allow for later coordination between the hermon_umap_umemlock_cb()
1778          * callback and hermon_mr_deregister().
1779          */
1780         if (mr_is_umem) {
1781                 hermon_umap_db_add(umapdb);
1782         }
1783 
1784         *mrhdl = mr;
1785 
1786         return (DDI_SUCCESS);
1787 
1788 /*
1789  * The following is cleanup for all possible failure cases in this routine
1790  */
1791 mrcommon_fail7:
1792         hermon_rsrc_free(state, &mtt_refcnt);
1793 mrcommon_fail6:
1794         hermon_mr_mem_unbind(state, bh);
1795         bind->bi_type = bh->bi_type;
1796 mrcommon_fail5:
1797         if (mr_is_umem) {
1798                 hermon_umap_db_free(umapdb);
1799         }
1800 mrcommon_fail4:
1801         if (mr_is_umem) {
1802                 /*
1803                  * Free up the memory ddi_umem_iosetup() allocates
1804                  * internally.
1805                  */
1806                 if (bind->bi_type == HERMON_BINDHDL_UBUF) {
1807                         freerbuf(bind->bi_buf);
1808                         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1809                         bind->bi_type = HERMON_BINDHDL_NONE;
1810                         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1811                 }
1812                 ddi_umem_unlock(umem_cookie);
1813         }
1814 mrcommon_fail3:
1815         hermon_rsrc_free(state, &rsrc);
1816 mrcommon_fail2:
1817         if (mpt != NULL)
1818                 hermon_rsrc_free(state, &mpt);
1819 mrcommon_fail1:
1820         hermon_pd_refcnt_dec(pd);
1821 mrcommon_fail:
1822         return (status);
1823 }
1824 
1825 /*
1826  * hermon_dma_mr_register()
1827  *    Context: Can be called from base context.
1828  */
1829 int
1830 hermon_dma_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
1831     ibt_dmr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl)
1832 {
1833         hermon_rsrc_t           *mpt, *rsrc;
1834         hermon_hw_dmpt_t        mpt_entry;
1835         hermon_mrhdl_t          mr;
1836         ibt_mr_flags_t          flags;
1837         uint_t                  sleep;
1838         int                     status;
1839 
1840         /* Extract the flags field */
1841         flags = mr_attr->dmr_flags;
1842 
1843         /*
1844          * Check the sleep flag.  Ensure that it is consistent with the
1845          * current thread context (i.e. if we are currently in the interrupt
1846          * context, then we shouldn't be attempting to sleep).
1847          */
1848         sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
1849         if ((sleep == HERMON_SLEEP) &&
1850             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
1851                 status = IBT_INVALID_PARAM;
1852                 goto mrcommon_fail;
1853         }
1854 
1855         /* Increment the reference count on the protection domain (PD) */
1856         hermon_pd_refcnt_inc(pd);
1857 
1858         /*
1859          * Allocate an MPT entry.  This will be filled in with all the
1860          * necessary parameters to define the memory region.  And then
1861          * ownership will be passed to the hardware in the final step
1862          * below.  If we fail here, we must undo the protection domain
1863          * reference count.
1864          */
1865         status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
1866         if (status != DDI_SUCCESS) {
1867                 status = IBT_INSUFF_RESOURCE;
1868                 goto mrcommon_fail1;
1869         }
1870 
1871         /*
1872          * Allocate the software structure for tracking the memory region (i.e.
1873          * the Hermon Memory Region handle).  If we fail here, we must undo
1874          * the protection domain reference count and the previous resource
1875          * allocation.
1876          */
1877         status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
1878         if (status != DDI_SUCCESS) {
1879                 status = IBT_INSUFF_RESOURCE;
1880                 goto mrcommon_fail2;
1881         }
1882         mr = (hermon_mrhdl_t)rsrc->hr_addr;
1883         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1884         bzero(mr, sizeof (*mr));
1885 
1886         /*
1887          * Setup and validate the memory region access flags.  This means
1888          * translating the IBTF's enable flags into the access flags that
1889          * will be used in later operations.
1890          */
1891         mr->mr_accflag = 0;
1892         if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1893                 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1894         if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1895                 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1896         if (flags & IBT_MR_ENABLE_REMOTE_READ)
1897                 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1898         if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1899                 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1900         if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1901                 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1902 
1903         /*
1904          * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
1905          * from a certain number of "constrained" bits (the least significant
1906          * bits) and some number of "unconstrained" bits.  The constrained
1907          * bits must be set to the index of the entry in the MPT table, but
1908          * the unconstrained bits can be set to any value we wish.  Note:
1909          * if no remote access is required, then the RKey value is not filled
1910          * in.  Otherwise both Rkey and LKey are given the same value.
1911          */
1912         if (mpt)
1913                 mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
1914 
1915         /*
1916          * Fill in the MPT entry.  This is the final step before passing
1917          * ownership of the MPT entry to the Hermon hardware.  We use all of
1918          * the information collected/calculated above to fill in the
1919          * requisite portions of the MPT.  Do this ONLY for DMPTs.
1920          */
1921         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
1922 
1923         mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
1924         mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1925         mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1926         mpt_entry.rw      = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1927         mpt_entry.rr      = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
1928         mpt_entry.lw      = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1929         mpt_entry.lr      = 1;
1930         mpt_entry.phys_addr = 1;        /* critical bit for this */
1931         mpt_entry.reg_win = HERMON_MPT_IS_REGION;
1932 
1933         mpt_entry.entity_sz     = mr->mr_logmttpgsz;
1934         mpt_entry.mem_key       = mr->mr_lkey;
1935         mpt_entry.pd            = pd->pd_pdnum;
1936         mpt_entry.rem_acc_en = 0;
1937         mpt_entry.fast_reg_en = 0;
1938         mpt_entry.en_inval = 0;
1939         mpt_entry.lkey = 0;
1940         mpt_entry.win_cnt = 0;
1941 
1942         mpt_entry.start_addr = mr_attr->dmr_paddr;
1943         mpt_entry.reg_win_len = mr_attr->dmr_len;
1944         if (mr_attr->dmr_len == 0)
1945                 mpt_entry.len_b64 = 1;  /* needed for 2^^64 length */
1946 
1947         mpt_entry.mtt_addr_h = 0;
1948         mpt_entry.mtt_addr_l = 0;
1949 
1950         /*
1951          * Write the MPT entry to hardware.  Lastly, we pass ownership of
1952          * the entry to the hardware if needed.  Note: in general, this
1953          * operation shouldn't fail.  But if it does, we have to undo
1954          * everything we've done above before returning error.
1955          *
1956          * For Hermon, this routine (which is common to the contexts) will only
1957          * set the ownership if needed - the process of passing the context
1958          * itself to HW will take care of setting up the MPT (based on type
1959          * and index).
1960          */
1961 
1962         mpt_entry.bnd_qp = 0;   /* dMPT for a qp, check for window */
1963         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1964             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
1965         if (status != HERMON_CMD_SUCCESS) {
1966                 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
1967                     status);
1968                 if (status == HERMON_CMD_INVALID_STATUS) {
1969                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1970                 }
1971                 status = ibc_get_ci_failure(0);
1972                 goto mrcommon_fail7;
1973         }
1974 
1975         /*
1976          * Fill in the rest of the Hermon Memory Region handle.  Having
1977          * successfully transferred ownership of the MPT, we can update the
1978          * following fields for use in further operations on the MR.
1979          */
1980         mr->mr_mttaddr          = 0;
1981 
1982         mr->mr_log2_pgsz   = 0;
1983         mr->mr_mptrsrcp         = mpt;
1984         mr->mr_mttrsrcp         = NULL;
1985         mr->mr_pdhdl    = pd;
1986         mr->mr_rsrcp    = rsrc;
1987         mr->mr_is_umem          = 0;
1988         mr->mr_is_fmr           = 0;
1989         mr->mr_umemcookie  = NULL;
1990         mr->mr_umem_cbfunc = NULL;
1991         mr->mr_umem_cbarg1 = NULL;
1992         mr->mr_umem_cbarg2 = NULL;
1993         mr->mr_lkey     = hermon_mr_key_swap(mr->mr_lkey);
1994         mr->mr_rkey     = hermon_mr_key_swap(mr->mr_rkey);
1995         mr->mr_mpt_type         = HERMON_MPT_DMPT;
1996 
1997         *mrhdl = mr;
1998 
1999         return (DDI_SUCCESS);
2000 
2001 /*
2002  * The following is cleanup for all possible failure cases in this routine
2003  */
2004 mrcommon_fail7:
2005         hermon_rsrc_free(state, &rsrc);
2006 mrcommon_fail2:
2007         hermon_rsrc_free(state, &mpt);
2008 mrcommon_fail1:
2009         hermon_pd_refcnt_dec(pd);
2010 mrcommon_fail:
2011         return (status);
2012 }
2013 
2014 /*
2015  * hermon_mr_alloc_lkey()
2016  *    Context: Can be called from base context.
2017  */
2018 int
2019 hermon_mr_alloc_lkey(hermon_state_t *state, hermon_pdhdl_t pd,
2020     ibt_lkey_flags_t flags, uint_t nummtt, hermon_mrhdl_t *mrhdl)
2021 {
2022         hermon_rsrc_t           *mpt, *mtt, *rsrc, *mtt_refcnt;
2023         hermon_sw_refcnt_t      *swrc_tmp;
2024         hermon_hw_dmpt_t        mpt_entry;
2025         hermon_mrhdl_t          mr;
2026         uint64_t                mtt_addr;
2027         uint_t                  sleep;
2028         int                     status;
2029 
2030         /* Increment the reference count on the protection domain (PD) */
2031         hermon_pd_refcnt_inc(pd);
2032 
2033         sleep = (flags & IBT_KEY_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
2034 
2035         /*
2036          * Allocate an MPT entry.  This will be filled in with "some" of the
2037          * necessary parameters to define the memory region.  And then
2038          * ownership will be passed to the hardware in the final step
2039          * below.  If we fail here, we must undo the protection domain
2040          * reference count.
2041          *
2042          * The MTTs will get filled in when the FRWR is processed.
2043          */
2044         status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
2045         if (status != DDI_SUCCESS) {
2046                 status = IBT_INSUFF_RESOURCE;
2047                 goto alloclkey_fail1;
2048         }
2049 
2050         /*
2051          * Allocate the software structure for tracking the memory region (i.e.
2052          * the Hermon Memory Region handle).  If we fail here, we must undo
2053          * the protection domain reference count and the previous resource
2054          * allocation.
2055          */
2056         status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
2057         if (status != DDI_SUCCESS) {
2058                 status = IBT_INSUFF_RESOURCE;
2059                 goto alloclkey_fail2;
2060         }
2061         mr = (hermon_mrhdl_t)rsrc->hr_addr;
2062         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
2063         bzero(mr, sizeof (*mr));
2064         mr->mr_bindinfo.bi_type = HERMON_BINDHDL_LKEY;
2065 
2066         mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
2067 
2068         status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
2069         if (status != DDI_SUCCESS) {
2070                 status = IBT_INSUFF_RESOURCE;
2071                 goto alloclkey_fail3;
2072         }
2073         mr->mr_logmttpgsz = PAGESHIFT;
2074 
2075         /*
2076          * Allocate MTT reference count (to track shared memory regions).
2077          * This reference count resource may never be used on the given
2078          * memory region, but if it is ever later registered as "shared"
2079          * memory region then this resource will be necessary.  If we fail
2080          * here, we do pretty much the same as above to clean up.
2081          */
2082         status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
2083             &mtt_refcnt);
2084         if (status != DDI_SUCCESS) {
2085                 status = IBT_INSUFF_RESOURCE;
2086                 goto alloclkey_fail4;
2087         }
2088         mr->mr_mttrefcntp = mtt_refcnt;
2089         swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
2090         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
2091         HERMON_MTT_REFCNT_INIT(swrc_tmp);
2092 
2093         mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
2094 
2095         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2096         mpt_entry.status = HERMON_MPT_FREE;
2097         mpt_entry.lw = 1;
2098         mpt_entry.lr = 1;
2099         mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2100         mpt_entry.entity_sz = mr->mr_logmttpgsz;
2101         mpt_entry.mem_key = mr->mr_lkey;
2102         mpt_entry.pd = pd->pd_pdnum;
2103         mpt_entry.fast_reg_en = 1;
2104         mpt_entry.rem_acc_en = 1;
2105         mpt_entry.en_inval = 1;
2106         if (flags & IBT_KEY_REMOTE) {
2107                 mpt_entry.ren_inval = 1;
2108         }
2109         mpt_entry.mtt_size = nummtt;
2110         mpt_entry.mtt_addr_h = mtt_addr >> 32;    /* only 8 more bits */
2111         mpt_entry.mtt_addr_l = mtt_addr >> 3;     /* only 29 bits */
2112 
2113         /*
2114          * Write the MPT entry to hardware.  Lastly, we pass ownership of
2115          * the entry to the hardware if needed.  Note: in general, this
2116          * operation shouldn't fail.  But if it does, we have to undo
2117          * everything we've done above before returning error.
2118          *
2119          * For Hermon, this routine (which is common to the contexts) will only
2120          * set the ownership if needed - the process of passing the context
2121          * itself to HW will take care of setting up the MPT (based on type
2122          * and index).
2123          */
2124         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2125             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
2126         if (status != HERMON_CMD_SUCCESS) {
2127                 cmn_err(CE_CONT, "Hermon: alloc_lkey: SW2HW_MPT command "
2128                     "failed: %08x\n", status);
2129                 if (status == HERMON_CMD_INVALID_STATUS) {
2130                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2131                 }
2132                 status = ibc_get_ci_failure(0);
2133                 goto alloclkey_fail5;
2134         }
2135 
2136         /*
2137          * Fill in the rest of the Hermon Memory Region handle.  Having
2138          * successfully transferred ownership of the MPT, we can update the
2139          * following fields for use in further operations on the MR.
2140          */
2141         mr->mr_accflag = IBT_MR_LOCAL_WRITE;
2142         mr->mr_mttaddr = mtt_addr;
2143         mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
2144         mr->mr_mptrsrcp = mpt;
2145         mr->mr_mttrsrcp = mtt;
2146         mr->mr_pdhdl = pd;
2147         mr->mr_rsrcp = rsrc;
2148         mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
2149         mr->mr_rkey = mr->mr_lkey;
2150         mr->mr_mpt_type = HERMON_MPT_DMPT;
2151 
2152         *mrhdl = mr;
2153         return (DDI_SUCCESS);
2154 
2155 alloclkey_fail5:
2156         hermon_rsrc_free(state, &mtt_refcnt);
2157 alloclkey_fail4:
2158         hermon_rsrc_free(state, &mtt);
2159 alloclkey_fail3:
2160         hermon_rsrc_free(state, &rsrc);
2161 alloclkey_fail2:
2162         hermon_rsrc_free(state, &mpt);
2163 alloclkey_fail1:
2164         hermon_pd_refcnt_dec(pd);
2165         return (status);
2166 }
2167 
2168 /*
2169  * hermon_mr_fexch_mpt_init()
2170  *    Context: Can be called from base context.
2171  *
2172  * This is the same as alloc_lkey, but not returning an mrhdl.
2173  */
2174 int
2175 hermon_mr_fexch_mpt_init(hermon_state_t *state, hermon_pdhdl_t pd,
2176     uint32_t mpt_indx, uint_t nummtt, uint64_t mtt_addr, uint_t sleep)
2177 {
2178         hermon_hw_dmpt_t        mpt_entry;
2179         int                     status;
2180 
2181         /*
2182          * The MTTs will get filled in when the FRWR is processed.
2183          */
2184 
2185         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2186         mpt_entry.status = HERMON_MPT_FREE;
2187         mpt_entry.lw = 1;
2188         mpt_entry.lr = 1;
2189         mpt_entry.rw = 1;
2190         mpt_entry.rr = 1;
2191         mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2192         mpt_entry.entity_sz = PAGESHIFT;
2193         mpt_entry.mem_key = mpt_indx;
2194         mpt_entry.pd = pd->pd_pdnum;
2195         mpt_entry.fast_reg_en = 1;
2196         mpt_entry.rem_acc_en = 1;
2197         mpt_entry.en_inval = 1;
2198         mpt_entry.ren_inval = 1;
2199         mpt_entry.mtt_size = nummtt;
2200         mpt_entry.mtt_addr_h = mtt_addr >> 32;    /* only 8 more bits */
2201         mpt_entry.mtt_addr_l = mtt_addr >> 3;     /* only 29 bits */
2202 
2203         /*
2204          * Write the MPT entry to hardware.  Lastly, we pass ownership of
2205          * the entry to the hardware if needed.  Note: in general, this
2206          * operation shouldn't fail.  But if it does, we have to undo
2207          * everything we've done above before returning error.
2208          *
2209          * For Hermon, this routine (which is common to the contexts) will only
2210          * set the ownership if needed - the process of passing the context
2211          * itself to HW will take care of setting up the MPT (based on type
2212          * and index).
2213          */
2214         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2215             sizeof (hermon_hw_dmpt_t), mpt_indx, sleep);
2216         if (status != HERMON_CMD_SUCCESS) {
2217                 cmn_err(CE_CONT, "Hermon: fexch_mpt_init: SW2HW_MPT command "
2218                     "failed: %08x\n", status);
2219                 if (status == HERMON_CMD_INVALID_STATUS) {
2220                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2221                 }
2222                 status = ibc_get_ci_failure(0);
2223                 return (status);
2224         }
2225         /* Increment the reference count on the protection domain (PD) */
2226         hermon_pd_refcnt_inc(pd);
2227 
2228         return (DDI_SUCCESS);
2229 }
2230 
2231 /*
2232  * hermon_mr_fexch_mpt_fini()
2233  *    Context: Can be called from base context.
2234  *
2235  * This is the same as deregister_mr, without an mrhdl.
2236  */
2237 int
2238 hermon_mr_fexch_mpt_fini(hermon_state_t *state, hermon_pdhdl_t pd,
2239     uint32_t mpt_indx, uint_t sleep)
2240 {
2241         int                     status;
2242 
2243         status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
2244             NULL, 0, mpt_indx, sleep);
2245         if (status != DDI_SUCCESS) {
2246                 cmn_err(CE_CONT, "Hermon: fexch_mpt_fini: HW2SW_MPT command "
2247                     "failed: %08x\n", status);
2248                 if (status == HERMON_CMD_INVALID_STATUS) {
2249                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2250                 }
2251                 status = ibc_get_ci_failure(0);
2252                 return (status);
2253         }
2254 
2255         /* Decrement the reference count on the protection domain (PD) */
2256         hermon_pd_refcnt_dec(pd);
2257 
2258         return (DDI_SUCCESS);
2259 }
2260 
2261 /*
2262  * hermon_mr_mtt_bind()
2263  *    Context: Can be called from interrupt or base context.
2264  */
2265 int
2266 hermon_mr_mtt_bind(hermon_state_t *state, hermon_bind_info_t *bind,
2267     ddi_dma_handle_t bind_dmahdl, hermon_rsrc_t **mtt, uint_t *mtt_pgsize_bits,
2268     uint_t is_buffer)
2269 {
2270         uint64_t                nummtt;
2271         uint_t                  sleep;
2272         int                     status;
2273 
2274         /*
2275          * Check the sleep flag.  Ensure that it is consistent with the
2276          * current thread context (i.e. if we are currently in the interrupt
2277          * context, then we shouldn't be attempting to sleep).
2278          */
2279         sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ?
2280             HERMON_NOSLEEP : HERMON_SLEEP;
2281         if ((sleep == HERMON_SLEEP) &&
2282             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2283                 status = IBT_INVALID_PARAM;
2284                 goto mrmttbind_fail;
2285         }
2286 
2287         /*
2288          * Bind the memory and determine the mapped addresses.  This is
2289          * the first of two routines that do all the "heavy lifting" for
2290          * the Hermon memory registration routines.  The hermon_mr_mem_bind()
2291          * routine takes the "bind" struct with all its fields filled
2292          * in and returns a list of DMA cookies (for the PCI mapped addresses
2293          * corresponding to the specified address region) which are used by
2294          * the hermon_mr_fast_mtt_write() routine below.  If we fail here, we
2295          * must undo all the previous resource allocation (and PD reference
2296          * count).
2297          */
2298         status = hermon_mr_mem_bind(state, bind, bind_dmahdl, sleep, is_buffer);
2299         if (status != DDI_SUCCESS) {
2300                 status = IBT_INSUFF_RESOURCE;
2301                 goto mrmttbind_fail;
2302         }
2303 
2304         /*
2305          * Determine number of pages spanned.  This routine uses the
2306          * information in the "bind" struct to determine the required
2307          * number of MTT entries needed (and returns the suggested page size -
2308          * as a "power-of-2" - for each MTT entry).
2309          */
2310         nummtt = hermon_mr_nummtt_needed(state, bind, mtt_pgsize_bits);
2311 
2312         /*
2313          * Allocate the MTT entries.  Use the calculations performed above to
2314          * allocate the required number of MTT entries. If we fail here, we
2315          * must not only undo all the previous resource allocation (and PD
2316          * reference count), but we must also unbind the memory.
2317          */
2318         status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, mtt);
2319         if (status != DDI_SUCCESS) {
2320                 status = IBT_INSUFF_RESOURCE;
2321                 goto mrmttbind_fail2;
2322         }
2323 
2324         /*
2325          * Write the mapped addresses into the MTT entries.  This is part two
2326          * of the "heavy lifting" routines that we talked about above.  Note:
2327          * we pass the suggested page size from the earlier operation here.
2328          * And if we fail here, we again do pretty much the same huge clean up.
2329          */
2330         status = hermon_mr_fast_mtt_write(state, *mtt, bind, *mtt_pgsize_bits);
2331         if (status != DDI_SUCCESS) {
2332                 /*
2333                  * hermon_mr_fast_mtt_write() returns DDI_FAILURE
2334                  * only if it detects a HW error during DMA.
2335                  */
2336                 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2337                 status = ibc_get_ci_failure(0);
2338                 goto mrmttbind_fail3;
2339         }
2340         return (DDI_SUCCESS);
2341 
2342 /*
2343  * The following is cleanup for all possible failure cases in this routine
2344  */
2345 mrmttbind_fail3:
2346         hermon_rsrc_free(state, mtt);
2347 mrmttbind_fail2:
2348         hermon_mr_mem_unbind(state, bind);
2349 mrmttbind_fail:
2350         return (status);
2351 }
2352 
2353 
2354 /*
2355  * hermon_mr_mtt_unbind()
2356  *    Context: Can be called from interrupt or base context.
2357  */
2358 int
2359 hermon_mr_mtt_unbind(hermon_state_t *state, hermon_bind_info_t *bind,
2360     hermon_rsrc_t *mtt)
2361 {
2362         /*
2363          * Free up the MTT entries and unbind the memory.  Here, as above, we
2364          * attempt to free these resources only if it is appropriate to do so.
2365          */
2366         hermon_mr_mem_unbind(state, bind);
2367         hermon_rsrc_free(state, &mtt);
2368 
2369         return (DDI_SUCCESS);
2370 }
2371 
2372 
2373 /*
2374  * hermon_mr_common_rereg()
2375  *    Context: Can be called from interrupt or base context.
2376  */
2377 static int
2378 hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
2379     hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
2380     hermon_mr_options_t *op)
2381 {
2382         hermon_rsrc_t           *mpt;
2383         ibt_mr_attr_flags_t     acc_flags_to_use;
2384         ibt_mr_flags_t          flags;
2385         hermon_pdhdl_t          pd_to_use;
2386         hermon_hw_dmpt_t        mpt_entry;
2387         uint64_t                mtt_addr_to_use, vaddr_to_use, len_to_use;
2388         uint_t                  sleep, dereg_level;
2389         int                     status;
2390 
2391         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2392 
2393         /*
2394          * Check here to see if the memory region corresponds to a userland
2395          * mapping.  Reregistration of userland memory regions is not
2396          * currently supported.  Return failure.
2397          */
2398         if (mr->mr_is_umem) {
2399                 status = IBT_MR_HDL_INVALID;
2400                 goto mrrereg_fail;
2401         }
2402 
2403         mutex_enter(&mr->mr_lock);
2404 
2405         /* Pull MPT resource pointer from the Hermon Memory Region handle */
2406         mpt = mr->mr_mptrsrcp;
2407 
2408         /* Extract the flags field from the hermon_bind_info_t */
2409         flags = bind->bi_flags;
2410 
2411         /*
2412          * Check the sleep flag.  Ensure that it is consistent with the
2413          * current thread context (i.e. if we are currently in the interrupt
2414          * context, then we shouldn't be attempting to sleep).
2415          */
2416         sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
2417         if ((sleep == HERMON_SLEEP) &&
2418             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2419                 mutex_exit(&mr->mr_lock);
2420                 status = IBT_INVALID_PARAM;
2421                 goto mrrereg_fail;
2422         }
2423 
2424         /*
2425          * First step is to temporarily invalidate the MPT entry.  This
2426          * regains ownership from the hardware, and gives us the opportunity
2427          * to modify the entry.  Note: The HW2SW_MPT command returns the
2428          * current MPT entry contents.  These are saved away here because
2429          * they will be reused in a later step below.  If the region has
2430          * bound memory windows that we fail returning an "in use" error code.
2431          * Otherwise, this is an unexpected error and we deregister the
2432          * memory region and return error.
2433          *
2434          * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
2435          * against holding the lock around this rereg call in all contexts.
2436          */
2437         status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry,
2438             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2439         if (status != HERMON_CMD_SUCCESS) {
2440                 mutex_exit(&mr->mr_lock);
2441                 if (status == HERMON_CMD_REG_BOUND) {
2442                         return (IBT_MR_IN_USE);
2443                 } else {
2444                         cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: "
2445                             "%08x\n", status);
2446                         if (status == HERMON_CMD_INVALID_STATUS) {
2447                                 hermon_fm_ereport(state, HCA_SYS_ERR,
2448                                     HCA_ERR_SRV_LOST);
2449                         }
2450                         /*
2451                          * Call deregister and ensure that all current
2452                          * resources get freed up
2453                          */
2454                         if (hermon_mr_deregister(state, &mr,
2455                             HERMON_MR_DEREG_ALL, sleep) != DDI_SUCCESS) {
2456                                 HERMON_WARNING(state, "failed to deregister "
2457                                     "memory region");
2458                         }
2459                         return (ibc_get_ci_failure(0));
2460                 }
2461         }
2462 
2463         /*
2464          * If we're changing the protection domain, then validate the new one
2465          */
2466         if (flags & IBT_MR_CHANGE_PD) {
2467 
2468                 /* Check for valid PD handle pointer */
2469                 if (pd == NULL) {
2470                         mutex_exit(&mr->mr_lock);
2471                         /*
2472                          * Call deregister and ensure that all current
2473                          * resources get properly freed up. Unnecessary
2474                          * here to attempt to regain software ownership
2475                          * of the MPT entry as that has already been
2476                          * done above.
2477                          */
2478                         if (hermon_mr_deregister(state, &mr,
2479                             HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2480                             DDI_SUCCESS) {
2481                                 HERMON_WARNING(state, "failed to deregister "
2482                                     "memory region");
2483                         }
2484                         status = IBT_PD_HDL_INVALID;
2485                         goto mrrereg_fail;
2486                 }
2487 
2488                 /* Use the new PD handle in all operations below */
2489                 pd_to_use = pd;
2490 
2491         } else {
2492                 /* Use the current PD handle in all operations below */
2493                 pd_to_use = mr->mr_pdhdl;
2494         }
2495 
2496         /*
2497          * If we're changing access permissions, then validate the new ones
2498          */
2499         if (flags & IBT_MR_CHANGE_ACCESS) {
2500                 /*
2501                  * Validate the access flags.  Both remote write and remote
2502                  * atomic require the local write flag to be set
2503                  */
2504                 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) ||
2505                     (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) &&
2506                     !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) {
2507                         mutex_exit(&mr->mr_lock);
2508                         /*
2509                          * Call deregister and ensure that all current
2510                          * resources get properly freed up. Unnecessary
2511                          * here to attempt to regain software ownership
2512                          * of the MPT entry as that has already been
2513                          * done above.
2514                          */
2515                         if (hermon_mr_deregister(state, &mr,
2516                             HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
2517                             DDI_SUCCESS) {
2518                                 HERMON_WARNING(state, "failed to deregister "
2519                                     "memory region");
2520                         }
2521                         status = IBT_MR_ACCESS_REQ_INVALID;
2522                         goto mrrereg_fail;
2523                 }
2524 
2525                 /*
2526                  * Setup and validate the memory region access flags.  This
2527                  * means translating the IBTF's enable flags into the access
2528                  * flags that will be used in later operations.
2529                  */
2530                 acc_flags_to_use = 0;
2531                 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
2532                         acc_flags_to_use |= IBT_MR_WINDOW_BIND;
2533                 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
2534                         acc_flags_to_use |= IBT_MR_LOCAL_WRITE;
2535                 if (flags & IBT_MR_ENABLE_REMOTE_READ)
2536                         acc_flags_to_use |= IBT_MR_REMOTE_READ;
2537                 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
2538                         acc_flags_to_use |= IBT_MR_REMOTE_WRITE;
2539                 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
2540                         acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC;
2541 
2542         } else {
2543                 acc_flags_to_use = mr->mr_accflag;
2544         }
2545 
2546         /*
2547          * If we're modifying the translation, then figure out whether
2548          * we can reuse the current MTT resources.  This means calling
2549          * hermon_mr_rereg_xlat_helper() which does most of the heavy lifting
2550          * for the reregistration.  If the current memory region contains
2551          * sufficient MTT entries for the new regions, then it will be
2552          * reused and filled in.  Otherwise, new entries will be allocated,
2553          * the old ones will be freed, and the new entries will be filled
2554          * in.  Note:  If we're not modifying the translation, then we
2555          * should already have all the information we need to update the MPT.
2556          * Also note: If hermon_mr_rereg_xlat_helper() fails, it will return
2557          * a "dereg_level" which is the level of cleanup that needs to be
2558          * passed to hermon_mr_deregister() to finish the cleanup.
2559          */
2560         if (flags & IBT_MR_CHANGE_TRANSLATION) {
2561                 status = hermon_mr_rereg_xlat_helper(state, mr, bind, op,
2562                     &mtt_addr_to_use, sleep, &dereg_level);
2563                 if (status != DDI_SUCCESS) {
2564                         mutex_exit(&mr->mr_lock);
2565                         /*
2566                          * Call deregister and ensure that all resources get
2567                          * properly freed up.
2568                          */
2569                         if (hermon_mr_deregister(state, &mr, dereg_level,
2570                             sleep) != DDI_SUCCESS) {
2571                                 HERMON_WARNING(state, "failed to deregister "
2572                                     "memory region");
2573                         }
2574                         goto mrrereg_fail;
2575                 }
2576                 vaddr_to_use = mr->mr_bindinfo.bi_addr;
2577                 len_to_use   = mr->mr_bindinfo.bi_len;
2578         } else {
2579                 mtt_addr_to_use = mr->mr_mttaddr;
2580                 vaddr_to_use = mr->mr_bindinfo.bi_addr;
2581                 len_to_use   = mr->mr_bindinfo.bi_len;
2582         }
2583 
2584         /*
2585          * Calculate new keys (Lkey, Rkey) from MPT index.  Just like they were
2586          * when the region was first registered, each key is formed from
2587          * "constrained" bits and "unconstrained" bits.  Note:  If no remote
2588          * access is required, then the RKey value is not filled in.  Otherwise
2589          * both Rkey and LKey are given the same value.
2590          */
2591         mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
2592         if ((acc_flags_to_use & IBT_MR_REMOTE_READ) ||
2593             (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ||
2594             (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) {
2595                 mr->mr_rkey = mr->mr_lkey;
2596         } else
2597                 mr->mr_rkey = 0;
2598 
2599         /*
2600          * Fill in the MPT entry.  This is the final step before passing
2601          * ownership of the MPT entry to the Hermon hardware.  We use all of
2602          * the information collected/calculated above to fill in the
2603          * requisite portions of the MPT.
2604          */
2605         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
2606 
2607         mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
2608         mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND)   ? 1 : 0;
2609         mpt_entry.atomic  = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
2610         mpt_entry.rw      = (acc_flags_to_use & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
2611         mpt_entry.rr      = (acc_flags_to_use & IBT_MR_REMOTE_READ)   ? 1 : 0;
2612         mpt_entry.lw      = (acc_flags_to_use & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
2613         mpt_entry.lr      = 1;
2614         mpt_entry.phys_addr = 0;
2615         mpt_entry.reg_win = HERMON_MPT_IS_REGION;
2616 
2617         mpt_entry.entity_sz     = mr->mr_logmttpgsz;
2618         mpt_entry.mem_key       = mr->mr_lkey;
2619         mpt_entry.pd            = pd_to_use->pd_pdnum;
2620 
2621         mpt_entry.start_addr    = vaddr_to_use;
2622         mpt_entry.reg_win_len   = len_to_use;
2623         mpt_entry.mtt_addr_h = mtt_addr_to_use >> 32;
2624         mpt_entry.mtt_addr_l = mtt_addr_to_use >> 3;
2625 
2626         /*
2627          * Write the updated MPT entry to hardware
2628          *
2629          * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
2630          * against holding the lock around this rereg call in all contexts.
2631          */
2632         status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2633             sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
2634         if (status != HERMON_CMD_SUCCESS) {
2635                 mutex_exit(&mr->mr_lock);
2636                 cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
2637                     status);
2638                 if (status == HERMON_CMD_INVALID_STATUS) {
2639                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2640                 }
2641                 /*
2642                  * Call deregister and ensure that all current resources get
2643                  * properly freed up. Unnecessary here to attempt to regain
2644                  * software ownership of the MPT entry as that has already
2645                  * been done above.
2646                  */
2647                 if (hermon_mr_deregister(state, &mr,
2648                     HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) {
2649                         HERMON_WARNING(state, "failed to deregister memory "
2650                             "region");
2651                 }
2652                 return (ibc_get_ci_failure(0));
2653         }
2654 
2655         /*
2656          * If we're changing PD, then update their reference counts now.
2657          * This means decrementing the reference count on the old PD and
2658          * incrementing the reference count on the new PD.
2659          */
2660         if (flags & IBT_MR_CHANGE_PD) {
2661                 hermon_pd_refcnt_dec(mr->mr_pdhdl);
2662                 hermon_pd_refcnt_inc(pd);
2663         }
2664 
2665         /*
2666          * Update the contents of the Hermon Memory Region handle to reflect
2667          * what has been changed.
2668          */
2669         mr->mr_pdhdl   = pd_to_use;
2670         mr->mr_accflag         = acc_flags_to_use;
2671         mr->mr_is_umem         = 0;
2672         mr->mr_is_fmr          = 0;
2673         mr->mr_umemcookie = NULL;
2674         mr->mr_lkey    = hermon_mr_key_swap(mr->mr_lkey);
2675         mr->mr_rkey    = hermon_mr_key_swap(mr->mr_rkey);
2676 
2677         /* New MR handle is same as the old */
2678         *mrhdl_new = mr;
2679         mutex_exit(&mr->mr_lock);
2680 
2681         return (DDI_SUCCESS);
2682 
2683 mrrereg_fail:
2684         return (status);
2685 }
2686 
2687 
2688 /*
2689  * hermon_mr_rereg_xlat_helper
2690  *    Context: Can be called from interrupt or base context.
2691  *    Note: This routine expects the "mr_lock" to be held when it
2692  *    is called.  Upon returning failure, this routine passes information
2693  *    about what "dereg_level" should be passed to hermon_mr_deregister().
2694  */
2695 static int
2696 hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
2697     hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
2698     uint_t sleep, uint_t *dereg_level)
2699 {
2700         hermon_rsrc_t           *mtt, *mtt_refcnt;
2701         hermon_sw_refcnt_t      *swrc_old, *swrc_new;
2702         ddi_dma_handle_t        dmahdl;
2703         uint64_t                nummtt_needed, nummtt_in_currrsrc, max_sz;
2704         uint_t                  mtt_pgsize_bits, bind_type, reuse_dmahdl;
2705         int                     status;
2706 
2707         ASSERT(MUTEX_HELD(&mr->mr_lock));
2708 
2709         /*
2710          * Check the "options" flag.  Currently this flag tells the driver
2711          * whether or not the region should be bound normally (i.e. with
2712          * entries written into the PCI IOMMU) or whether it should be
2713          * registered to bypass the IOMMU.
2714          */
2715         if (op == NULL) {
2716                 bind_type = HERMON_BINDMEM_NORMAL;
2717         } else {
2718                 bind_type = op->mro_bind_type;
2719         }
2720 
2721         /*
2722          * Check for invalid length.  Check is the length is zero or if the
2723          * length is larger than the maximum configured value.  Return error
2724          * if it is.
2725          */
2726         max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
2727         if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
2728                 /*
2729                  * Deregister will be called upon returning failure from this
2730                  * routine. This will ensure that all current resources get
2731                  * properly freed up. Unnecessary to attempt to regain
2732                  * software ownership of the MPT entry as that has already
2733                  * been done above (in hermon_mr_reregister())
2734                  */
2735                 *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT;
2736 
2737                 status = IBT_MR_LEN_INVALID;
2738                 goto mrrereghelp_fail;
2739         }
2740 
2741         /*
2742          * Determine the number of pages necessary for new region and the
2743          * number of pages supported by the current MTT resources
2744          */
2745         nummtt_needed = hermon_mr_nummtt_needed(state, bind, &mtt_pgsize_bits);
2746         nummtt_in_currrsrc = mr->mr_mttrsrcp->hr_len >> HERMON_MTT_SIZE_SHIFT;
2747 
2748         /*
2749          * Depending on whether we have enough pages or not, the next step is
2750          * to fill in a set of MTT entries that reflect the new mapping.  In
2751          * the first case below, we already have enough entries.  This means
2752          * we need to unbind the memory from the previous mapping, bind the
2753          * memory for the new mapping, write the new MTT entries, and update
2754          * the mr to reflect the changes.
2755          * In the second case below, we do not have enough entries in the
2756          * current mapping.  So, in this case, we need not only to unbind the
2757          * current mapping, but we need to free up the MTT resources associated
2758          * with that mapping.  After we've successfully done that, we continue
2759          * by binding the new memory, allocating new MTT entries, writing the
2760          * new MTT entries, and updating the mr to reflect the changes.
2761          */
2762 
2763         /*
2764          * If this region is being shared (i.e. MTT refcount != 1), then we
2765          * can't reuse the current MTT resources regardless of their size.
2766          * Instead we'll need to alloc new ones (below) just as if there
2767          * hadn't been enough room in the current entries.
2768          */
2769         swrc_old = (hermon_sw_refcnt_t *)mr->mr_mttrefcntp->hr_addr;
2770         if (HERMON_MTT_IS_NOT_SHARED(swrc_old) &&
2771             (nummtt_needed <= nummtt_in_currrsrc)) {
2772 
2773                 /*
2774                  * Unbind the old mapping for this memory region, but retain
2775                  * the ddi_dma_handle_t (if possible) for reuse in the bind
2776                  * operation below.  Note:  If original memory region was
2777                  * bound for IOMMU bypass and the new region can not use
2778                  * bypass, then a new DMA handle will be necessary.
2779                  */
2780                 if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2781                         mr->mr_bindinfo.bi_free_dmahdl = 0;
2782                         hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2783                         dmahdl = mr->mr_bindinfo.bi_dmahdl;
2784                         reuse_dmahdl = 1;
2785                 } else {
2786                         hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2787                         dmahdl = NULL;
2788                         reuse_dmahdl = 0;
2789                 }
2790 
2791                 /*
2792                  * Bind the new memory and determine the mapped addresses.
2793                  * As described, this routine and hermon_mr_fast_mtt_write()
2794                  * do the majority of the work for the memory registration
2795                  * operations.  Note:  When we successfully finish the binding,
2796                  * we will set the "bi_free_dmahdl" flag to indicate that
2797                  * even though we may have reused the ddi_dma_handle_t we do
2798                  * wish it to be freed up at some later time.  Note also that
2799                  * if we fail, we may need to cleanup the ddi_dma_handle_t.
2800                  */
2801                 bind->bi_bypass      = bind_type;
2802                 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2803                 if (status != DDI_SUCCESS) {
2804                         if (reuse_dmahdl) {
2805                                 ddi_dma_free_handle(&dmahdl);
2806                         }
2807 
2808                         /*
2809                          * Deregister will be called upon returning failure
2810                          * from this routine. This will ensure that all
2811                          * current resources get properly freed up.
2812                          * Unnecessary to attempt to regain software ownership
2813                          * of the MPT entry as that has already been done
2814                          * above (in hermon_mr_reregister()).  Also unnecessary
2815                          * to attempt to unbind the memory.
2816                          */
2817                         *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2818 
2819                         status = IBT_INSUFF_RESOURCE;
2820                         goto mrrereghelp_fail;
2821                 }
2822                 if (reuse_dmahdl) {
2823                         bind->bi_free_dmahdl = 1;
2824                 }
2825 
2826                 /*
2827                  * Using the new mapping, but reusing the current MTT
2828                  * resources, write the updated entries to MTT
2829                  */
2830                 mtt    = mr->mr_mttrsrcp;
2831                 status = hermon_mr_fast_mtt_write(state, mtt, bind,
2832                     mtt_pgsize_bits);
2833                 if (status != DDI_SUCCESS) {
2834                         /*
2835                          * Deregister will be called upon returning failure
2836                          * from this routine. This will ensure that all
2837                          * current resources get properly freed up.
2838                          * Unnecessary to attempt to regain software ownership
2839                          * of the MPT entry as that has already been done
2840                          * above (in hermon_mr_reregister()).  Also unnecessary
2841                          * to attempt to unbind the memory.
2842                          *
2843                          * But we do need to unbind the newly bound memory
2844                          * before returning.
2845                          */
2846                         hermon_mr_mem_unbind(state, bind);
2847                         *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2848 
2849                         /*
2850                          * hermon_mr_fast_mtt_write() returns DDI_FAILURE
2851                          * only if it detects a HW error during DMA.
2852                          */
2853                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2854                         status = ibc_get_ci_failure(0);
2855                         goto mrrereghelp_fail;
2856                 }
2857 
2858                 /* Put the updated information into the Mem Region handle */
2859                 mr->mr_bindinfo        = *bind;
2860                 mr->mr_logmttpgsz = mtt_pgsize_bits;
2861 
2862         } else {
2863                 /*
2864                  * Check if the memory region MTT is shared by any other MRs.
2865                  * Since the resource may be shared between multiple memory
2866                  * regions (as a result of a "RegisterSharedMR()" verb) it is
2867                  * important that we not unbind any resources prematurely.
2868                  */
2869                 if (!HERMON_MTT_IS_SHARED(swrc_old)) {
2870                         /*
2871                          * Unbind the old mapping for this memory region, but
2872                          * retain the ddi_dma_handle_t for reuse in the bind
2873                          * operation below. Note: This can only be done here
2874                          * because the region being reregistered is not
2875                          * currently shared.  Also if original memory region
2876                          * was bound for IOMMU bypass and the new region can
2877                          * not use bypass, then a new DMA handle will be
2878                          * necessary.
2879                          */
2880                         if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2881                                 mr->mr_bindinfo.bi_free_dmahdl = 0;
2882                                 hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2883                                 dmahdl = mr->mr_bindinfo.bi_dmahdl;
2884                                 reuse_dmahdl = 1;
2885                         } else {
2886                                 hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
2887                                 dmahdl = NULL;
2888                                 reuse_dmahdl = 0;
2889                         }
2890                 } else {
2891                         dmahdl = NULL;
2892                         reuse_dmahdl = 0;
2893                 }
2894 
2895                 /*
2896                  * Bind the new memory and determine the mapped addresses.
2897                  * As described, this routine and hermon_mr_fast_mtt_write()
2898                  * do the majority of the work for the memory registration
2899                  * operations.  Note:  When we successfully finish the binding,
2900                  * we will set the "bi_free_dmahdl" flag to indicate that
2901                  * even though we may have reused the ddi_dma_handle_t we do
2902                  * wish it to be freed up at some later time.  Note also that
2903                  * if we fail, we may need to cleanup the ddi_dma_handle_t.
2904                  */
2905                 bind->bi_bypass      = bind_type;
2906                 status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
2907                 if (status != DDI_SUCCESS) {
2908                         if (reuse_dmahdl) {
2909                                 ddi_dma_free_handle(&dmahdl);
2910                         }
2911 
2912                         /*
2913                          * Deregister will be called upon returning failure
2914                          * from this routine. This will ensure that all
2915                          * current resources get properly freed up.
2916                          * Unnecessary to attempt to regain software ownership
2917                          * of the MPT entry as that has already been done
2918                          * above (in hermon_mr_reregister()).  Also unnecessary
2919                          * to attempt to unbind the memory.
2920                          */
2921                         *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2922 
2923                         status = IBT_INSUFF_RESOURCE;
2924                         goto mrrereghelp_fail;
2925                 }
2926                 if (reuse_dmahdl) {
2927                         bind->bi_free_dmahdl = 1;
2928                 }
2929 
2930                 /*
2931                  * Allocate the new MTT entries resource
2932                  */
2933                 status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt_needed,
2934                     sleep, &mtt);
2935                 if (status != DDI_SUCCESS) {
2936                         /*
2937                          * Deregister will be called upon returning failure
2938                          * from this routine. This will ensure that all
2939                          * current resources get properly freed up.
2940                          * Unnecessary to attempt to regain software ownership
2941                          * of the MPT entry as that has already been done
2942                          * above (in hermon_mr_reregister()).  Also unnecessary
2943                          * to attempt to unbind the memory.
2944                          *
2945                          * But we do need to unbind the newly bound memory
2946                          * before returning.
2947                          */
2948                         hermon_mr_mem_unbind(state, bind);
2949                         *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2950 
2951                         status = IBT_INSUFF_RESOURCE;
2952                         goto mrrereghelp_fail;
2953                 }
2954 
2955                 /*
2956                  * Allocate MTT reference count (to track shared memory
2957                  * regions).  As mentioned elsewhere above, this reference
2958                  * count resource may never be used on the given memory region,
2959                  * but if it is ever later registered as a "shared" memory
2960                  * region then this resource will be necessary.  Note:  This
2961                  * is only necessary here if the existing memory region is
2962                  * already being shared (because otherwise we already have
2963                  * a useable reference count resource).
2964                  */
2965                 if (HERMON_MTT_IS_SHARED(swrc_old)) {
2966                         status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1,
2967                             sleep, &mtt_refcnt);
2968                         if (status != DDI_SUCCESS) {
2969                                 /*
2970                                  * Deregister will be called upon returning
2971                                  * failure from this routine. This will ensure
2972                                  * that all current resources get properly
2973                                  * freed up.  Unnecessary to attempt to regain
2974                                  * software ownership of the MPT entry as that
2975                                  * has already been done above (in
2976                                  * hermon_mr_reregister()).  Also unnecessary
2977                                  * to attempt to unbind the memory.
2978                                  *
2979                                  * But we need to unbind the newly bound
2980                                  * memory and free up the newly allocated MTT
2981                                  * entries before returning.
2982                                  */
2983                                 hermon_mr_mem_unbind(state, bind);
2984                                 hermon_rsrc_free(state, &mtt);
2985                                 *dereg_level =
2986                                     HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2987 
2988                                 status = IBT_INSUFF_RESOURCE;
2989                                 goto mrrereghelp_fail;
2990                         }
2991                         swrc_new = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
2992                         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new))
2993                         HERMON_MTT_REFCNT_INIT(swrc_new);
2994                 } else {
2995                         mtt_refcnt = mr->mr_mttrefcntp;
2996                 }
2997 
2998                 /*
2999                  * Using the new mapping and the new MTT resources, write the
3000                  * updated entries to MTT
3001                  */
3002                 status = hermon_mr_fast_mtt_write(state, mtt, bind,
3003                     mtt_pgsize_bits);
3004                 if (status != DDI_SUCCESS) {
3005                         /*
3006                          * Deregister will be called upon returning failure
3007                          * from this routine. This will ensure that all
3008                          * current resources get properly freed up.
3009                          * Unnecessary to attempt to regain software ownership
3010                          * of the MPT entry as that has already been done
3011                          * above (in hermon_mr_reregister()).  Also unnecessary
3012                          * to attempt to unbind the memory.
3013                          *
3014                          * But we need to unbind the newly bound memory,
3015                          * free up the newly allocated MTT entries, and
3016                          * (possibly) free the new MTT reference count
3017                          * resource before returning.
3018                          */
3019                         if (HERMON_MTT_IS_SHARED(swrc_old)) {
3020                                 hermon_rsrc_free(state, &mtt_refcnt);
3021                         }
3022                         hermon_mr_mem_unbind(state, bind);
3023                         hermon_rsrc_free(state, &mtt);
3024                         *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
3025 
3026                         status = IBT_INSUFF_RESOURCE;
3027                         goto mrrereghelp_fail;
3028                 }
3029 
3030                 /*
3031                  * Check if the memory region MTT is shared by any other MRs.
3032                  * Since the resource may be shared between multiple memory
3033                  * regions (as a result of a "RegisterSharedMR()" verb) it is
3034                  * important that we not free up any resources prematurely.
3035                  */
3036                 if (HERMON_MTT_IS_SHARED(swrc_old)) {
3037                         /* Decrement MTT reference count for "old" region */
3038                         (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
3039                 } else {
3040                         /* Free up the old MTT entries resource */
3041                         hermon_rsrc_free(state, &mr->mr_mttrsrcp);
3042                 }
3043 
3044                 /* Put the updated information into the mrhdl */
3045                 mr->mr_bindinfo        = *bind;
3046                 mr->mr_logmttpgsz = mtt_pgsize_bits;
3047                 mr->mr_mttrsrcp   = mtt;
3048                 mr->mr_mttrefcntp = mtt_refcnt;
3049         }
3050 
3051         /*
3052          * Calculate and return the updated MTT address (in the DDR address
3053          * space).  This will be used by the caller (hermon_mr_reregister) in
3054          * the updated MPT entry
3055          */
3056         *mtt_addr = mtt->hr_indx << HERMON_MTT_SIZE_SHIFT;
3057 
3058         return (DDI_SUCCESS);
3059 
3060 mrrereghelp_fail:
3061         return (status);
3062 }
3063 
3064 
3065 /*
3066  * hermon_mr_nummtt_needed()
3067  *    Context: Can be called from interrupt or base context.
3068  */
3069 /* ARGSUSED */
3070 static uint64_t
3071 hermon_mr_nummtt_needed(hermon_state_t *state, hermon_bind_info_t *bind,
3072     uint_t *mtt_pgsize_bits)
3073 {
3074         uint64_t        pg_offset_mask;
3075         uint64_t        pg_offset, tmp_length;
3076 
3077         /*
3078          * For now we specify the page size as 8Kb (the default page size for
3079          * the sun4u architecture), or 4Kb for x86.  Figure out optimal page
3080          * size by examining the dmacookies
3081          */
3082         *mtt_pgsize_bits = PAGESHIFT;
3083 
3084         pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1;
3085         pg_offset = bind->bi_addr & pg_offset_mask;
3086         tmp_length = pg_offset + (bind->bi_len - 1);
3087         return ((tmp_length >> *mtt_pgsize_bits) + 1);
3088 }
3089 
3090 
3091 /*
3092  * hermon_mr_mem_bind()
3093  *    Context: Can be called from interrupt or base context.
3094  */
3095 static int
3096 hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
3097     ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer)
3098 {
3099         ddi_dma_attr_t  dma_attr;
3100         int             (*callback)(caddr_t);
3101         int             status;
3102 
3103         /* bi_type must be set to a meaningful value to get a bind handle */
3104         ASSERT(bind->bi_type == HERMON_BINDHDL_VADDR ||
3105             bind->bi_type == HERMON_BINDHDL_BUF ||
3106             bind->bi_type == HERMON_BINDHDL_UBUF);
3107 
3108         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3109 
3110         /* Set the callback flag appropriately */
3111         callback = (sleep == HERMON_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
3112 
3113         /*
3114          * Initialize many of the default DMA attributes.  Then, if we're
3115          * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
3116          */
3117         if (dmahdl == NULL) {
3118                 hermon_dma_attr_init(state, &dma_attr);
3119 #ifdef  __sparc
3120                 if (bind->bi_bypass == HERMON_BINDMEM_BYPASS) {
3121                         dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
3122                 }
3123 #endif
3124 
3125                 /* set RO if needed - tunable set and 'is_buffer' is non-0 */
3126                 if (is_buffer) {
3127                         if (! (bind->bi_flags & IBT_MR_DISABLE_RO)) {
3128                                 if ((bind->bi_type != HERMON_BINDHDL_UBUF) &&
3129                                     (hermon_kernel_data_ro ==
3130                                     HERMON_RO_ENABLED)) {
3131                                         dma_attr.dma_attr_flags |=
3132                                             DDI_DMA_RELAXED_ORDERING;
3133                                 }
3134                                 if (((bind->bi_type == HERMON_BINDHDL_UBUF) &&
3135                                     (hermon_user_data_ro ==
3136                                     HERMON_RO_ENABLED))) {
3137                                         dma_attr.dma_attr_flags |=
3138                                             DDI_DMA_RELAXED_ORDERING;
3139                                 }
3140                         }
3141                 }
3142 
3143                 /* Allocate a DMA handle for the binding */
3144                 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
3145                     callback, NULL, &bind->bi_dmahdl);
3146                 if (status != DDI_SUCCESS) {
3147                         return (status);
3148                 }
3149                 bind->bi_free_dmahdl = 1;
3150 
3151         } else  {
3152                 bind->bi_dmahdl = dmahdl;
3153                 bind->bi_free_dmahdl = 0;
3154         }
3155 
3156 
3157         /*
3158          * Bind the memory to get the PCI mapped addresses.  The decision
3159          * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle()
3160          * is determined by the "bi_type" flag.  Note: if the bind operation
3161          * fails then we have to free up the DMA handle and return error.
3162          */
3163         if (bind->bi_type == HERMON_BINDHDL_VADDR) {
3164                 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL,
3165                     (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len,
3166                     (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, NULL,
3167                     &bind->bi_dmacookie, &bind->bi_cookiecnt);
3168 
3169         } else {  /* HERMON_BINDHDL_BUF or HERMON_BINDHDL_UBUF */
3170 
3171                 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl,
3172                     bind->bi_buf, (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback,
3173                     NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt);
3174         }
3175         if (status != DDI_DMA_MAPPED) {
3176                 if (bind->bi_free_dmahdl != 0) {
3177                         ddi_dma_free_handle(&bind->bi_dmahdl);
3178                 }
3179                 return (status);
3180         }
3181 
3182         return (DDI_SUCCESS);
3183 }
3184 
3185 
3186 /*
3187  * hermon_mr_mem_unbind()
3188  *    Context: Can be called from interrupt or base context.
3189  */
3190 static void
3191 hermon_mr_mem_unbind(hermon_state_t *state, hermon_bind_info_t *bind)
3192 {
3193         int     status;
3194 
3195         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3196         /* there is nothing to unbind for alloc_lkey */
3197         if (bind->bi_type == HERMON_BINDHDL_LKEY)
3198                 return;
3199 
3200         /*
3201          * In case of HERMON_BINDHDL_UBUF, the memory bi_buf points to
3202          * is actually allocated by ddi_umem_iosetup() internally, then
3203          * it's required to free it here. Reset bi_type to HERMON_BINDHDL_NONE
3204          * not to free it again later.
3205          */
3206         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
3207         if (bind->bi_type == HERMON_BINDHDL_UBUF) {
3208                 freerbuf(bind->bi_buf);
3209                 bind->bi_type = HERMON_BINDHDL_NONE;
3210         }
3211         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
3212 
3213         /*
3214          * Unbind the DMA memory for the region
3215          *
3216          * Note: The only way ddi_dma_unbind_handle() currently
3217          * can return an error is if the handle passed in is invalid.
3218          * Since this should never happen, we choose to return void
3219          * from this function!  If this does return an error, however,
3220          * then we print a warning message to the console.
3221          */
3222         status = ddi_dma_unbind_handle(bind->bi_dmahdl);
3223         if (status != DDI_SUCCESS) {
3224                 HERMON_WARNING(state, "failed to unbind DMA mapping");
3225                 return;
3226         }
3227 
3228         /* Free up the DMA handle */
3229         if (bind->bi_free_dmahdl != 0) {
3230                 ddi_dma_free_handle(&bind->bi_dmahdl);
3231         }
3232 }
3233 
3234 
3235 /*
3236  * hermon_mr_fast_mtt_write()
3237  *    Context: Can be called from interrupt or base context.
3238  */
3239 static int
3240 hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
3241     hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits)
3242 {
3243         hermon_icm_table_t      *icm_table;
3244         hermon_dma_info_t       *dma_info;
3245         uint32_t                index1, index2, rindx;
3246         ddi_dma_cookie_t        dmacookie;
3247         uint_t                  cookie_cnt;
3248         uint64_t                *mtt_table;
3249         uint64_t                mtt_entry;
3250         uint64_t                addr, endaddr;
3251         uint64_t                pagesize;
3252         offset_t                i, start;
3253         uint_t                  per_span;
3254         int                     sync_needed;
3255 
3256         /*
3257          * XXX According to the PRM, we are to use the WRITE_MTT
3258          * command to write out MTTs. Tavor does not do this,
3259          * instead taking advantage of direct access to the MTTs,
3260          * and knowledge that Mellanox FMR relies on our ability
3261          * to write directly to the MTTs without any further
3262          * notification to the firmware. Likewise, we will choose
3263          * to not use the WRITE_MTT command, but to simply write
3264          * out the MTTs.
3265          */
3266 
3267         /* Calculate page size from the suggested value passed in */
3268         pagesize = ((uint64_t)1 << mtt_pgsize_bits);
3269 
3270         /* Walk the "cookie list" and fill in the MTT table entries */
3271         dmacookie  = bind->bi_dmacookie;
3272         cookie_cnt = bind->bi_cookiecnt;
3273 
3274         icm_table = &state->hs_icm[HERMON_MTT];
3275         rindx = mtt->hr_indx;
3276         hermon_index(index1, index2, rindx, icm_table, i);
3277         start = i;
3278 
3279         per_span   = icm_table->span;
3280         dma_info   = icm_table->icm_dma[index1] + index2;
3281         mtt_table  = (uint64_t *)(uintptr_t)dma_info->vaddr;
3282 
3283         sync_needed = 0;
3284         while (cookie_cnt-- > 0) {
3285                 addr    = dmacookie.dmac_laddress;
3286                 endaddr = addr + (dmacookie.dmac_size - 1);
3287                 addr    = addr & ~((uint64_t)pagesize - 1);
3288 
3289                 while (addr <= endaddr) {
3290 
3291                         /*
3292                          * Fill in the mapped addresses (calculated above) and
3293                          * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry.
3294                          */
3295                         mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT;
3296                         mtt_table[i] = htonll(mtt_entry);
3297                         i++;
3298                         rindx++;
3299 
3300                         if (i == per_span) {
3301 
3302                                 (void) ddi_dma_sync(dma_info->dma_hdl,
3303                                     start * sizeof (hermon_hw_mtt_t),
3304                                     (i - start) * sizeof (hermon_hw_mtt_t),
3305                                     DDI_DMA_SYNC_FORDEV);
3306 
3307                                 if ((addr + pagesize > endaddr) &&
3308                                     (cookie_cnt == 0))
3309                                         return (DDI_SUCCESS);
3310 
3311                                 hermon_index(index1, index2, rindx, icm_table,
3312                                     i);
3313                                 start = i * sizeof (hermon_hw_mtt_t);
3314                                 dma_info = icm_table->icm_dma[index1] + index2;
3315                                 mtt_table =
3316                                     (uint64_t *)(uintptr_t)dma_info->vaddr;
3317 
3318                                 sync_needed = 0;
3319                         } else {
3320                                 sync_needed = 1;
3321                         }
3322 
3323                         addr += pagesize;
3324                         if (addr == 0) {
3325                                 static int do_once = 1;
3326                                 _NOTE(SCHEME_PROTECTS_DATA("safe sharing",
3327                                     do_once))
3328                                 if (do_once) {
3329                                         do_once = 0;
3330                                         cmn_err(CE_NOTE, "probable error in "
3331                                             "dma_cookie address from caller\n");
3332                                 }
3333                                 break;
3334                         }
3335                 }
3336 
3337                 /*
3338                  * When we've reached the end of the current DMA cookie,
3339                  * jump to the next cookie (if there are more)
3340                  */
3341                 if (cookie_cnt != 0) {
3342                         ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
3343                 }
3344         }
3345 
3346         /* done all the cookies, now sync the memory for the device */
3347         if (sync_needed)
3348                 (void) ddi_dma_sync(dma_info->dma_hdl,
3349                     start * sizeof (hermon_hw_mtt_t),
3350                     (i - start) * sizeof (hermon_hw_mtt_t),
3351                     DDI_DMA_SYNC_FORDEV);
3352 
3353         return (DDI_SUCCESS);
3354 }
3355 
3356 /*
3357  * hermon_mr_fast_mtt_write_fmr()
3358  *    Context: Can be called from interrupt or base context.
3359  */
3360 /* ARGSUSED */
3361 static int
3362 hermon_mr_fast_mtt_write_fmr(hermon_state_t *state, hermon_rsrc_t *mtt,
3363     ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits)
3364 {
3365         hermon_icm_table_t      *icm_table;
3366         hermon_dma_info_t       *dma_info;
3367         uint32_t                index1, index2, rindx;
3368         uint64_t                *mtt_table;
3369         offset_t                i, j;
3370         uint_t                  per_span;
3371 
3372         icm_table = &state->hs_icm[HERMON_MTT];
3373         rindx = mtt->hr_indx;
3374         hermon_index(index1, index2, rindx, icm_table, i);
3375         per_span   = icm_table->span;
3376         dma_info   = icm_table->icm_dma[index1] + index2;
3377         mtt_table  = (uint64_t *)(uintptr_t)dma_info->vaddr;
3378 
3379         /*
3380          * Fill in the MTT table entries
3381          */
3382         for (j = 0; j < mem_pattr->pmr_num_buf; j++) {
3383                 mtt_table[i] = mem_pattr->pmr_addr_list[j].p_laddr;
3384                 i++;
3385                 rindx++;
3386                 if (i == per_span) {
3387                         hermon_index(index1, index2, rindx, icm_table, i);
3388                         dma_info = icm_table->icm_dma[index1] + index2;
3389                         mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr;
3390                 }
3391         }
3392 
3393         return (DDI_SUCCESS);
3394 }
3395 
3396 
3397 /*
3398  * hermon_mtt_refcnt_inc()
3399  *    Context: Can be called from interrupt or base context.
3400  */
3401 static uint_t
3402 hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc)
3403 {
3404         hermon_sw_refcnt_t *rc;
3405 
3406         rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
3407         return (atomic_inc_uint_nv(&rc->swrc_refcnt));
3408 }
3409 
3410 
3411 /*
3412  * hermon_mtt_refcnt_dec()
3413  *    Context: Can be called from interrupt or base context.
3414  */
3415 static uint_t
3416 hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc)
3417 {
3418         hermon_sw_refcnt_t *rc;
3419 
3420         rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
3421         return (atomic_dec_uint_nv(&rc->swrc_refcnt));
3422 }