1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * hermon_misc.c
  28  *    Hermon Miscellaneous routines - Address Handle, Multicast, Protection
  29  *    Domain, and port-related operations
  30  *
  31  *    Implements all the routines necessary for allocating, freeing, querying
  32  *    and modifying Address Handles and Protection Domains.  Also implements
  33  *    all the routines necessary for adding and removing Queue Pairs to/from
  34  *    Multicast Groups.  Lastly, it implements the routines necessary for
  35  *    port-related query and modify operations.
  36  */
  37 
  38 #include <sys/types.h>
  39 #include <sys/conf.h>
  40 #include <sys/ddi.h>
  41 #include <sys/sunddi.h>
  42 #include <sys/modctl.h>
  43 #include <sys/bitmap.h>
  44 #include <sys/sysmacros.h>
  45 
  46 #include <sys/ib/adapters/hermon/hermon.h>
  47 
  48 extern int hermon_rdma_debug;
  49 int hermon_fmr_verbose = 0;
  50 
  51 static int hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg,
  52     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, uint_t *qp_found);
  53 static int hermon_mcg_qplist_remove(hermon_mcghdl_t mcg,
  54     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp);
  55 static void hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp);
  56 static void hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp);
  57 static uint_t hermon_mcg_walk_mgid_hash(hermon_state_t *state,
  58     uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx);
  59 static void hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg,
  60     hermon_hw_mcg_t *mcg_hdr, ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc);
  61 static int hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx,
  62     uint_t prev_indx, hermon_hw_mcg_t *mcg_entry);
  63 static int hermon_mcg_entry_invalidate(hermon_state_t *state,
  64     hermon_hw_mcg_t *mcg_entry, uint_t indx);
  65 static int hermon_mgid_is_valid(ib_gid_t gid);
  66 static int hermon_mlid_is_valid(ib_lid_t lid);
  67 static void hermon_fmr_cleanup(hermon_fmrhdl_t pool);
  68 
  69 
  70 #define HERMON_MAX_DBR_PAGES_PER_USER   64
  71 #define HERMON_DBR_KEY(index, page) \
  72         (((uint64_t)index) * HERMON_MAX_DBR_PAGES_PER_USER + (page))
  73 
  74 static hermon_udbr_page_t *
  75 hermon_dbr_new_user_page(hermon_state_t *state, uint_t index,
  76     uint_t page)
  77 {
  78         hermon_udbr_page_t *pagep;
  79         ddi_dma_attr_t dma_attr;
  80         uint_t cookiecnt;
  81         int status;
  82         hermon_umap_db_entry_t *umapdb;
  83         ulong_t pagesize = PAGESIZE;
  84 
  85         pagep = kmem_alloc(sizeof (*pagep), KM_SLEEP);
  86         pagep->upg_index = page;
  87         pagep->upg_nfree = pagesize / sizeof (hermon_dbr_t);
  88 
  89         /* Allocate 1 bit per dbr for free/alloc management (0 => "free") */
  90         pagep->upg_free = kmem_zalloc(pagesize / sizeof (hermon_dbr_t) / 8,
  91             KM_SLEEP);
  92         pagep->upg_kvaddr = ddi_umem_alloc(pagesize, DDI_UMEM_SLEEP,
  93             &pagep->upg_umemcookie); /* not HERMON_PAGESIZE here */
  94 
  95         pagep->upg_buf = ddi_umem_iosetup(pagep->upg_umemcookie, 0,
  96             pagesize, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
  97 
  98         hermon_dma_attr_init(state, &dma_attr);
  99 #ifdef  __sparc
 100         if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS)
 101                 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
 102 #endif
 103         status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
 104             DDI_DMA_SLEEP, NULL, &pagep->upg_dmahdl);
 105         if (status != DDI_SUCCESS) {
 106                 IBTF_DPRINTF_L2("hermon", "hermon_new_user_page: "
 107                     "ddi_dma_buf_bind_handle failed: %d", status);
 108                 return (NULL);
 109         }
 110         status = ddi_dma_buf_bind_handle(pagep->upg_dmahdl,
 111             pagep->upg_buf, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
 112             DDI_DMA_SLEEP, NULL, &pagep->upg_dmacookie, &cookiecnt);
 113         if (status != DDI_SUCCESS) {
 114                 IBTF_DPRINTF_L2("hermon", "hermon_dbr_new_user_page: "
 115                     "ddi_dma_buf_bind_handle failed: %d", status);
 116                 ddi_dma_free_handle(&pagep->upg_dmahdl);
 117                 return (NULL);
 118         }
 119         ASSERT(cookiecnt == 1);
 120 
 121         /* create db entry for mmap */
 122         umapdb = hermon_umap_db_alloc(state->hs_instance,
 123             HERMON_DBR_KEY(index, page), MLNX_UMAP_DBRMEM_RSRC,
 124             (uint64_t)(uintptr_t)pagep);
 125         hermon_umap_db_add(umapdb);
 126         return (pagep);
 127 }
 128 
 129 
 130 /*ARGSUSED*/
 131 static int
 132 hermon_user_dbr_alloc(hermon_state_t *state, uint_t index,
 133     ddi_acc_handle_t *acchdl, hermon_dbr_t **vdbr, uint64_t *pdbr,
 134     uint64_t *mapoffset)
 135 {
 136         hermon_user_dbr_t *udbr;
 137         hermon_udbr_page_t *pagep;
 138         uint_t next_page;
 139         int dbr_index;
 140         int i1, i2, i3, last;
 141         uint64_t u64, mask;
 142 
 143         mutex_enter(&state->hs_dbr_lock);
 144         for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link)
 145                 if (udbr->udbr_index == index)
 146                         break;
 147         if (udbr == NULL) {
 148                 udbr = kmem_alloc(sizeof (*udbr), KM_SLEEP);
 149                 udbr->udbr_link = state->hs_user_dbr;
 150                 state->hs_user_dbr = udbr;
 151                 udbr->udbr_index = index;
 152                 udbr->udbr_pagep = NULL;
 153         }
 154         pagep = udbr->udbr_pagep;
 155         next_page = (pagep == NULL) ? 0 : (pagep->upg_index + 1);
 156         while (pagep != NULL)
 157                 if (pagep->upg_nfree > 0)
 158                         break;
 159                 else
 160                         pagep = pagep->upg_link;
 161         if (pagep == NULL) {
 162                 pagep = hermon_dbr_new_user_page(state, index, next_page);
 163                 if (pagep == NULL) {
 164                         mutex_exit(&state->hs_dbr_lock);
 165                         return (DDI_FAILURE);
 166                 }
 167                 pagep->upg_link = udbr->udbr_pagep;
 168                 udbr->udbr_pagep = pagep;
 169         }
 170 
 171         /* Since nfree > 0, we're assured the loops below will succeed */
 172 
 173         /* First, find a 64-bit (not ~0) that has a free dbr */
 174         last = PAGESIZE / sizeof (uint64_t) / 64;
 175         mask = ~0ull;
 176         for (i1 = 0; i1 < last; i1++)
 177                 if ((pagep->upg_free[i1] & mask) != mask)
 178                         break;
 179         u64 = pagep->upg_free[i1];
 180 
 181         /* Second, find a byte (not 0xff) that has a free dbr */
 182         last = sizeof (uint64_t) / sizeof (uint8_t);
 183         for (i2 = 0, mask = 0xff; i2 < last; i2++, mask <<= 8)
 184                 if ((u64 & mask) != mask)
 185                         break;
 186 
 187         /* Third, find a bit that is free (0) */
 188         for (i3 = 0; i3 < sizeof (uint64_t) / sizeof (uint8_t); i3++)
 189                 if ((u64 & (1ul << (i3 + 8 * i2))) == 0)
 190                         break;
 191 
 192         /* Mark it as allocated */
 193         pagep->upg_free[i1] |= (1ul << (i3 + 8 * i2));
 194 
 195         dbr_index = ((i1 * sizeof (uint64_t)) + i2) * sizeof (uint64_t) + i3;
 196         pagep->upg_nfree--;
 197         ((uint64_t *)(void *)pagep->upg_kvaddr)[dbr_index] = 0;      /* clear dbr */
 198         *mapoffset = ((HERMON_DBR_KEY(index, pagep->upg_index) <<
 199             MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_DBRMEM_RSRC) << PAGESHIFT;
 200         *vdbr = (hermon_dbr_t *)((uint64_t *)(void *)pagep->upg_kvaddr +
 201             dbr_index);
 202         *pdbr = pagep->upg_dmacookie.dmac_laddress + dbr_index *
 203             sizeof (uint64_t);
 204 
 205         mutex_exit(&state->hs_dbr_lock);
 206         return (DDI_SUCCESS);
 207 }
 208 
 209 static void
 210 hermon_user_dbr_free(hermon_state_t *state, uint_t index, hermon_dbr_t *record)
 211 {
 212         hermon_user_dbr_t       *udbr;
 213         hermon_udbr_page_t      *pagep;
 214         caddr_t                 kvaddr;
 215         uint_t                  dbr_index;
 216         uint_t                  max_free = PAGESIZE / sizeof (hermon_dbr_t);
 217         int                     i1, i2;
 218 
 219         dbr_index = (uintptr_t)record & PAGEOFFSET; /* offset (not yet index) */
 220         kvaddr = (caddr_t)record - dbr_index;
 221         dbr_index /= sizeof (hermon_dbr_t); /* now it's the index */
 222 
 223         mutex_enter(&state->hs_dbr_lock);
 224         for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link)
 225                 if (udbr->udbr_index == index)
 226                         break;
 227         if (udbr == NULL) {
 228                 IBTF_DPRINTF_L2("hermon", "free user dbr: udbr struct not "
 229                     "found for index %x", index);
 230                 mutex_exit(&state->hs_dbr_lock);
 231                 return;
 232         }
 233         for (pagep = udbr->udbr_pagep; pagep != NULL; pagep = pagep->upg_link)
 234                 if (pagep->upg_kvaddr == kvaddr)
 235                         break;
 236         if (pagep == NULL) {
 237                 IBTF_DPRINTF_L2("hermon", "free user dbr: pagep struct not"
 238                     " found for index %x, kvaddr %p, DBR index %x",
 239                     index, kvaddr, dbr_index);
 240                 mutex_exit(&state->hs_dbr_lock);
 241                 return;
 242         }
 243         if (pagep->upg_nfree >= max_free) {
 244                 IBTF_DPRINTF_L2("hermon", "free user dbr: overflow: "
 245                     "UCE index %x, DBR index %x", index, dbr_index);
 246                 mutex_exit(&state->hs_dbr_lock);
 247                 return;
 248         }
 249         ASSERT(dbr_index < max_free);
 250         i1 = dbr_index / 64;
 251         i2 = dbr_index % 64;
 252         ASSERT((pagep->upg_free[i1] & (1ul << i2)) == (1ul << i2));
 253         pagep->upg_free[i1] &= ~(1ul << i2);
 254         pagep->upg_nfree++;
 255         mutex_exit(&state->hs_dbr_lock);
 256 }
 257 
 258 /*
 259  * hermon_dbr_page_alloc()
 260  *      first page allocation - called from attach or open
 261  *      in this case, we want exactly one page per call, and aligned on a
 262  *      page - and may need to be mapped to the user for access
 263  */
 264 int
 265 hermon_dbr_page_alloc(hermon_state_t *state, hermon_dbr_info_t **dinfo)
 266 {
 267         int                     status;
 268         ddi_dma_handle_t        dma_hdl;
 269         ddi_acc_handle_t        acc_hdl;
 270         ddi_dma_attr_t          dma_attr;
 271         ddi_dma_cookie_t        cookie;
 272         uint_t                  cookie_cnt;
 273         int                     i;
 274         hermon_dbr_info_t       *info;
 275         caddr_t                 dmaaddr;
 276         uint64_t                dmalen;
 277         ulong_t                 pagesize = PAGESIZE;
 278 
 279         info = kmem_zalloc(sizeof (hermon_dbr_info_t), KM_SLEEP);
 280 
 281         /*
 282          * Initialize many of the default DMA attributes.  Then set additional
 283          * alignment restrictions if necessary for the dbr memory, meaning
 284          * page aligned.  Also use the configured value for IOMMU bypass
 285          */
 286         hermon_dma_attr_init(state, &dma_attr);
 287         dma_attr.dma_attr_align = pagesize;
 288         dma_attr.dma_attr_sgllen = 1;   /* make sure only one cookie */
 289 #ifdef  __sparc
 290         if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS)
 291                 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
 292 #endif
 293 
 294         status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
 295             DDI_DMA_SLEEP, NULL, &dma_hdl);
 296         if (status != DDI_SUCCESS) {
 297                 kmem_free((void *)info, sizeof (hermon_dbr_info_t));
 298                 cmn_err(CE_NOTE, "dbr DMA handle alloc failed\n");
 299                 return (DDI_FAILURE);
 300         }
 301 
 302         status = ddi_dma_mem_alloc(dma_hdl, pagesize,
 303             &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
 304             NULL, &dmaaddr, (size_t *)&dmalen, &acc_hdl);
 305         if (status != DDI_SUCCESS)      {
 306                 ddi_dma_free_handle(&dma_hdl);
 307                 cmn_err(CE_CONT, "dbr DMA mem alloc failed(status %d)", status);
 308                 kmem_free((void *)info, sizeof (hermon_dbr_info_t));
 309                 return (DDI_FAILURE);
 310         }
 311 
 312         /* this memory won't be IB registered, so do the bind here */
 313         status = ddi_dma_addr_bind_handle(dma_hdl, NULL,
 314             dmaaddr, (size_t)dmalen, DDI_DMA_RDWR |
 315             DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt);
 316         if (status != DDI_SUCCESS) {
 317                 ddi_dma_mem_free(&acc_hdl);
 318                 ddi_dma_free_handle(&dma_hdl);
 319                 kmem_free((void *)info, sizeof (hermon_dbr_info_t));
 320                 cmn_err(CE_CONT, "dbr DMA bind handle failed (status %d)",
 321                     status);
 322                 return (DDI_FAILURE);
 323         }
 324         *dinfo = info;          /* Pass back the pointer */
 325 
 326         /* init the info structure with returned info */
 327         info->dbr_dmahdl = dma_hdl;
 328         info->dbr_acchdl = acc_hdl;
 329         info->dbr_page   = (hermon_dbr_t *)(void *)dmaaddr;
 330         info->dbr_link = NULL;
 331         /* extract the phys addr from the cookie */
 332         info->dbr_paddr = cookie.dmac_laddress;
 333         info->dbr_firstfree = 0;
 334         info->dbr_nfree = HERMON_NUM_DBR_PER_PAGE;
 335         /* link all DBrs onto the free list */
 336         for (i = 0; i < HERMON_NUM_DBR_PER_PAGE; i++) {
 337                 info->dbr_page[i] = i + 1;
 338         }
 339 
 340         return (DDI_SUCCESS);
 341 }
 342 
 343 
 344 /*
 345  * hermon_dbr_alloc()
 346  *      DBr record allocation - called from alloc cq/qp/srq
 347  *      will check for available dbrs in current
 348  *      page - if needed it will allocate another and link them
 349  */
 350 
 351 int
 352 hermon_dbr_alloc(hermon_state_t *state, uint_t index, ddi_acc_handle_t *acchdl,
 353     hermon_dbr_t **vdbr, uint64_t *pdbr, uint64_t *mapoffset)
 354 {
 355         hermon_dbr_t            *record = NULL;
 356         hermon_dbr_info_t       *info = NULL;
 357         uint32_t                idx;
 358         int                     status;
 359 
 360         if (index != state->hs_kernel_uar_index)
 361                 return (hermon_user_dbr_alloc(state, index, acchdl, vdbr, pdbr,
 362                     mapoffset));
 363 
 364         mutex_enter(&state->hs_dbr_lock);
 365         for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link)
 366                 if (info->dbr_nfree != 0)
 367                         break;          /* found a page w/ one available */
 368 
 369         if (info == NULL) {     /* did NOT find a page with one available */
 370                 status = hermon_dbr_page_alloc(state, &info);
 371                 if (status != DDI_SUCCESS) {
 372                         /* do error handling */
 373                         mutex_exit(&state->hs_dbr_lock);
 374                         return (DDI_FAILURE);
 375                 }
 376                 /* got a new page, so link it in. */
 377                 info->dbr_link = state->hs_kern_dbr;
 378                 state->hs_kern_dbr = info;
 379         }
 380         idx = info->dbr_firstfree;
 381         record = info->dbr_page + idx;
 382         info->dbr_firstfree = *record;
 383         info->dbr_nfree--;
 384         *record = 0;
 385 
 386         *acchdl = info->dbr_acchdl;
 387         *vdbr = record;
 388         *pdbr = info->dbr_paddr + idx * sizeof (hermon_dbr_t);
 389         mutex_exit(&state->hs_dbr_lock);
 390         return (DDI_SUCCESS);
 391 }
 392 
 393 /*
 394  * hermon_dbr_free()
 395  *      DBr record deallocation - called from free cq/qp
 396  *      will update the counter in the header, and invalidate
 397  *      the dbr, but will NEVER free pages of dbrs - small
 398  *      price to pay, but userland access never will anyway
 399  */
 400 void
 401 hermon_dbr_free(hermon_state_t *state, uint_t indx, hermon_dbr_t *record)
 402 {
 403         hermon_dbr_t            *page;
 404         hermon_dbr_info_t       *info;
 405 
 406         if (indx != state->hs_kernel_uar_index) {
 407                 hermon_user_dbr_free(state, indx, record);
 408                 return;
 409         }
 410         page = (hermon_dbr_t *)(uintptr_t)((uintptr_t)record & PAGEMASK);
 411         mutex_enter(&state->hs_dbr_lock);
 412         for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link)
 413                 if (info->dbr_page == page)
 414                         break;
 415         ASSERT(info != NULL);
 416         *record = info->dbr_firstfree;
 417         info->dbr_firstfree = record - info->dbr_page;
 418         info->dbr_nfree++;
 419         mutex_exit(&state->hs_dbr_lock);
 420 }
 421 
 422 /*
 423  * hermon_dbr_kern_free()
 424  *    Context: Can be called only from detach context.
 425  *
 426  *      Free all kernel dbr pages.  This includes the freeing of all the dma
 427  *      resources acquired during the allocation of the pages.
 428  *
 429  *      Also, free all the user dbr pages.
 430  */
 431 void
 432 hermon_dbr_kern_free(hermon_state_t *state)
 433 {
 434         hermon_dbr_info_t       *info, *link;
 435         hermon_user_dbr_t       *udbr, *next;
 436         hermon_udbr_page_t      *pagep, *nextp;
 437         hermon_umap_db_entry_t  *umapdb;
 438         int                     instance, status;
 439         uint64_t                value;
 440         extern                  hermon_umap_db_t hermon_userland_rsrc_db;
 441 
 442         mutex_enter(&state->hs_dbr_lock);
 443         for (info = state->hs_kern_dbr; info != NULL; info = link) {
 444                 (void) ddi_dma_unbind_handle(info->dbr_dmahdl);
 445                 ddi_dma_mem_free(&info->dbr_acchdl);     /* free page */
 446                 ddi_dma_free_handle(&info->dbr_dmahdl);
 447                 link = info->dbr_link;
 448                 kmem_free(info, sizeof (hermon_dbr_info_t));
 449         }
 450 
 451         udbr = state->hs_user_dbr;
 452         instance = state->hs_instance;
 453         mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
 454         while (udbr != NULL) {
 455                 pagep = udbr->udbr_pagep;
 456                 while (pagep != NULL) {
 457                         /* probably need to remove "db" */
 458                         (void) ddi_dma_unbind_handle(pagep->upg_dmahdl);
 459                         ddi_dma_free_handle(&pagep->upg_dmahdl);
 460                         freerbuf(pagep->upg_buf);
 461                         ddi_umem_free(pagep->upg_umemcookie);
 462                         status = hermon_umap_db_find_nolock(instance,
 463                             HERMON_DBR_KEY(udbr->udbr_index,
 464                             pagep->upg_index), MLNX_UMAP_DBRMEM_RSRC,
 465                             &value, HERMON_UMAP_DB_REMOVE, &umapdb);
 466                         if (status == DDI_SUCCESS)
 467                                 hermon_umap_db_free(umapdb);
 468                         kmem_free(pagep->upg_free,
 469                             PAGESIZE / sizeof (hermon_dbr_t) / 8);
 470                         nextp = pagep->upg_link;
 471                         kmem_free(pagep, sizeof (*pagep));
 472                         pagep = nextp;
 473                 }
 474                 next = udbr->udbr_link;
 475                 kmem_free(udbr, sizeof (*udbr));
 476                 udbr = next;
 477         }
 478         mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
 479         mutex_exit(&state->hs_dbr_lock);
 480 }
 481 
 482 /*
 483  * hermon_ah_alloc()
 484  *    Context: Can be called only from user or kernel context.
 485  */
 486 int
 487 hermon_ah_alloc(hermon_state_t *state, hermon_pdhdl_t pd,
 488     ibt_adds_vect_t *attr_p, hermon_ahhdl_t *ahhdl, uint_t sleepflag)
 489 {
 490         hermon_rsrc_t           *rsrc;
 491         hermon_hw_udav_t        *udav;
 492         hermon_ahhdl_t          ah;
 493         int                     status;
 494 
 495         /*
 496          * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to
 497          * indicate that we wish to allocate an "invalid" (i.e. empty)
 498          * address handle XXX
 499          */
 500 
 501         /* Validate that specified port number is legal */
 502         if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) {
 503                 return (IBT_HCA_PORT_INVALID);
 504         }
 505 
 506         /*
 507          * Allocate the software structure for tracking the address handle
 508          * (i.e. the Hermon Address Handle struct).
 509          */
 510         status = hermon_rsrc_alloc(state, HERMON_AHHDL, 1, sleepflag, &rsrc);
 511         if (status != DDI_SUCCESS) {
 512                 return (IBT_INSUFF_RESOURCE);
 513         }
 514         ah = (hermon_ahhdl_t)rsrc->hr_addr;
 515 
 516         /* Increment the reference count on the protection domain (PD) */
 517         hermon_pd_refcnt_inc(pd);
 518 
 519         udav = (hermon_hw_udav_t *)kmem_zalloc(sizeof (hermon_hw_udav_t),
 520             KM_SLEEP);
 521 
 522         /*
 523          * Fill in the UDAV data. We first zero out the UDAV, then populate
 524          * it by then calling hermon_set_addr_path() to fill in the common
 525          * portions that can be pulled from the "ibt_adds_vect_t" passed in
 526          */
 527         status = hermon_set_addr_path(state, attr_p,
 528             (hermon_hw_addr_path_t *)udav, HERMON_ADDRPATH_UDAV);
 529         if (status != DDI_SUCCESS) {
 530                 hermon_pd_refcnt_dec(pd);
 531                 hermon_rsrc_free(state, &rsrc);
 532                 return (status);
 533         }
 534         udav->pd     = pd->pd_pdnum;
 535         udav->sl     = attr_p->av_srvl;
 536 
 537         /*
 538          * Fill in the rest of the Hermon Address Handle struct.
 539          *
 540          * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field
 541          * here because we may need to return it later to the IBTF (as a
 542          * result of a subsequent query operation).  Unlike the other UDAV
 543          * parameters, the value of "av_dgid.gid_guid" is not always preserved.
 544          * The reason for this is described in hermon_set_addr_path().
 545          */
 546         ah->ah_rsrcp  = rsrc;
 547         ah->ah_pdhdl  = pd;
 548         ah->ah_udav   = udav;
 549         ah->ah_save_guid = attr_p->av_dgid.gid_guid;
 550         *ahhdl = ah;
 551 
 552         return (DDI_SUCCESS);
 553 }
 554 
 555 
 556 /*
 557  * hermon_ah_free()
 558  *    Context: Can be called only from user or kernel context.
 559  */
 560 /* ARGSUSED */
 561 int
 562 hermon_ah_free(hermon_state_t *state, hermon_ahhdl_t *ahhdl, uint_t sleepflag)
 563 {
 564         hermon_rsrc_t           *rsrc;
 565         hermon_pdhdl_t          pd;
 566         hermon_ahhdl_t          ah;
 567 
 568         /*
 569          * Pull all the necessary information from the Hermon Address Handle
 570          * struct.  This is necessary here because the resource for the
 571          * AH is going to be freed up as part of this operation.
 572          */
 573         ah    = *ahhdl;
 574         mutex_enter(&ah->ah_lock);
 575         rsrc  = ah->ah_rsrcp;
 576         pd    = ah->ah_pdhdl;
 577         mutex_exit(&ah->ah_lock);
 578 
 579         /* Free the UDAV memory */
 580         kmem_free(ah->ah_udav, sizeof (hermon_hw_udav_t));
 581 
 582         /* Decrement the reference count on the protection domain (PD) */
 583         hermon_pd_refcnt_dec(pd);
 584 
 585         /* Free the Hermon Address Handle structure */
 586         hermon_rsrc_free(state, &rsrc);
 587 
 588         /* Set the ahhdl pointer to NULL and return success */
 589         *ahhdl = NULL;
 590 
 591         return (DDI_SUCCESS);
 592 }
 593 
 594 
 595 /*
 596  * hermon_ah_query()
 597  *    Context: Can be called from interrupt or base context.
 598  */
 599 /* ARGSUSED */
 600 int
 601 hermon_ah_query(hermon_state_t *state, hermon_ahhdl_t ah, hermon_pdhdl_t *pd,
 602     ibt_adds_vect_t *attr_p)
 603 {
 604         mutex_enter(&ah->ah_lock);
 605 
 606         /*
 607          * Pull the PD and UDAV from the Hermon Address Handle structure
 608          */
 609         *pd = ah->ah_pdhdl;
 610 
 611         /*
 612          * Fill in "ibt_adds_vect_t".  We call hermon_get_addr_path() to fill
 613          * the common portions that can be pulled from the UDAV we pass in.
 614          *
 615          * NOTE: We will also fill the "av_dgid.gid_guid" field from the
 616          * "ah_save_guid" field we have previously saved away.  The reason
 617          * for this is described in hermon_ah_alloc() and hermon_ah_modify().
 618          */
 619         hermon_get_addr_path(state, (hermon_hw_addr_path_t *)ah->ah_udav,
 620             attr_p, HERMON_ADDRPATH_UDAV);
 621 
 622         attr_p->av_dgid.gid_guid = ah->ah_save_guid;
 623 
 624         mutex_exit(&ah->ah_lock);
 625         return (DDI_SUCCESS);
 626 }
 627 
 628 
 629 /*
 630  * hermon_ah_modify()
 631  *    Context: Can be called from interrupt or base context.
 632  */
 633 /* ARGSUSED */
 634 int
 635 hermon_ah_modify(hermon_state_t *state, hermon_ahhdl_t ah,
 636     ibt_adds_vect_t *attr_p)
 637 {
 638         hermon_hw_udav_t        old_udav;
 639         uint64_t                data_old;
 640         int                     status, size, i;
 641 
 642         /* Validate that specified port number is legal */
 643         if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) {
 644                 return (IBT_HCA_PORT_INVALID);
 645         }
 646 
 647         mutex_enter(&ah->ah_lock);
 648 
 649         /* Save a copy of the current UDAV data in old_udav. */
 650         bcopy(ah->ah_udav, &old_udav, sizeof (hermon_hw_udav_t));
 651 
 652         /*
 653          * Fill in the new UDAV with the caller's data, passed in via the
 654          * "ibt_adds_vect_t" structure.
 655          *
 656          * NOTE: We also need to save away a copy of the "av_dgid.gid_guid"
 657          * field here (just as we did during hermon_ah_alloc()) because we
 658          * may need to return it later to the IBTF (as a result of a
 659          * subsequent query operation).  As explained in hermon_ah_alloc(),
 660          * unlike the other UDAV parameters, the value of "av_dgid.gid_guid"
 661          * is not always preserved. The reason for this is described in
 662          * hermon_set_addr_path().
 663          */
 664         status = hermon_set_addr_path(state, attr_p,
 665             (hermon_hw_addr_path_t *)ah->ah_udav, HERMON_ADDRPATH_UDAV);
 666         if (status != DDI_SUCCESS) {
 667                 mutex_exit(&ah->ah_lock);
 668                 return (status);
 669         }
 670         ah->ah_save_guid = attr_p->av_dgid.gid_guid;
 671         ah->ah_udav->sl  = attr_p->av_srvl;
 672 
 673         /*
 674          * Copy changes into the new UDAV.
 675          *    Note:  We copy in 64-bit chunks.  For the first two of these
 676          *    chunks it is necessary to read the current contents of the
 677          *    UDAV, mask off the modifiable portions (maintaining any
 678          *    of the "reserved" portions), and then mask on the new data.
 679          */
 680         size = sizeof (hermon_hw_udav_t) >> 3;
 681         for (i = 0; i < size; i++) {
 682                 data_old = ((uint64_t *)&old_udav)[i];
 683 
 684                 /*
 685                  * Apply mask to change only the relevant values.
 686                  */
 687                 if (i == 0) {
 688                         data_old = data_old & HERMON_UDAV_MODIFY_MASK0;
 689                 } else if (i == 1) {
 690                         data_old = data_old & HERMON_UDAV_MODIFY_MASK1;
 691                 } else {
 692                         data_old = 0;
 693                 }
 694 
 695                 /* Store the updated values to the UDAV */
 696                 ((uint64_t *)ah->ah_udav)[i] |= data_old;
 697         }
 698 
 699         /*
 700          * Put the valid PD number back into the UDAV entry, as it
 701          * might have been clobbered above.
 702          */
 703         ah->ah_udav->pd = old_udav.pd;
 704 
 705 
 706         mutex_exit(&ah->ah_lock);
 707         return (DDI_SUCCESS);
 708 }
 709 
 710 /*
 711  * hermon_mcg_attach()
 712  *    Context: Can be called only from user or kernel context.
 713  */
 714 int
 715 hermon_mcg_attach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid,
 716     ib_lid_t lid)
 717 {
 718         hermon_rsrc_t           *rsrc;
 719         hermon_hw_mcg_t         *mcg_entry;
 720         hermon_hw_mcg_qp_list_t *mcg_entry_qplist;
 721         hermon_mcghdl_t         mcg, newmcg;
 722         uint64_t                mgid_hash;
 723         uint32_t                end_indx;
 724         int                     status;
 725         uint_t                  qp_found;
 726 
 727         /*
 728          * It is only allowed to attach MCG to UD queue pairs.  Verify
 729          * that the intended QP is of the appropriate transport type
 730          */
 731         if (qp->qp_serv_type != HERMON_QP_UD) {
 732                 return (IBT_QP_SRV_TYPE_INVALID);
 733         }
 734 
 735         /*
 736          * Check for invalid Multicast DLID.  Specifically, all Multicast
 737          * LIDs should be within a well defined range.  If the specified LID
 738          * is outside of that range, then return an error.
 739          */
 740         if (hermon_mlid_is_valid(lid) == 0) {
 741                 return (IBT_MC_MLID_INVALID);
 742         }
 743         /*
 744          * Check for invalid Multicast GID.  All Multicast GIDs should have
 745          * a well-defined pattern of bits and flags that are allowable.  If
 746          * the specified GID does not meet the criteria, then return an error.
 747          */
 748         if (hermon_mgid_is_valid(gid) == 0) {
 749                 return (IBT_MC_MGID_INVALID);
 750         }
 751 
 752         /*
 753          * Compute the MGID hash value.  Since the MCG table is arranged as
 754          * a number of separate hash chains, this operation converts the
 755          * specified MGID into the starting index of an entry in the hash
 756          * table (i.e. the index for the start of the appropriate hash chain).
 757          * Subsequent operations below will walk the chain searching for the
 758          * right place to add this new QP.
 759          */
 760         status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
 761             &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT());
 762         if (status != HERMON_CMD_SUCCESS) {
 763                 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n",
 764                     status);
 765                 if (status == HERMON_CMD_INVALID_STATUS) {
 766                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 767                 }
 768                 return (ibc_get_ci_failure(0));
 769         }
 770 
 771         /*
 772          * Grab the multicast group mutex.  Then grab the pre-allocated
 773          * temporary buffer used for holding and/or modifying MCG entries.
 774          * Zero out the temporary MCG entry before we begin.
 775          */
 776         mutex_enter(&state->hs_mcglock);
 777         mcg_entry = state->hs_mcgtmp;
 778         mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry);
 779         bzero(mcg_entry, HERMON_MCGMEM_SZ(state));
 780 
 781         /*
 782          * Walk through the array of MCG entries starting at "mgid_hash".
 783          * Try to find the appropriate place for this new QP to be added.
 784          * This could happen when the first entry of the chain has MGID == 0
 785          * (which means that the hash chain is empty), or because we find
 786          * an entry with the same MGID (in which case we'll add the QP to
 787          * that MCG), or because we come to the end of the chain (in which
 788          * case this is the first QP being added to the multicast group that
 789          * corresponds to the MGID.  The hermon_mcg_walk_mgid_hash() routine
 790          * walks the list and returns an index into the MCG table.  The entry
 791          * at this index is then checked to determine which case we have
 792          * fallen into (see below).  Note:  We are using the "shadow" MCG
 793          * list (of hermon_mcg_t structs) for this lookup because the real
 794          * MCG entries are in hardware (and the lookup process would be much
 795          * more time consuming).
 796          */
 797         end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL);
 798         mcg      = &state->hs_mcghdl[end_indx];
 799 
 800         /*
 801          * If MGID == 0, then the hash chain is empty.  Just fill in the
 802          * current entry.  Note:  No need to allocate an MCG table entry
 803          * as all the hash chain "heads" are already preallocated.
 804          */
 805         if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) {
 806 
 807                 /* Fill in the current entry in the "shadow" MCG list */
 808                 hermon_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL);
 809 
 810                 /*
 811                  * Try to add the new QP number to the list.  This (and the
 812                  * above) routine fills in a temporary MCG.  The "mcg_entry"
 813                  * and "mcg_entry_qplist" pointers simply point to different
 814                  * offsets within the same temporary copy of the MCG (for
 815                  * convenience).  Note:  If this fails, we need to invalidate
 816                  * the entries we've already put into the "shadow" list entry
 817                  * above.
 818                  */
 819                 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
 820                     &qp_found);
 821                 if (status != DDI_SUCCESS) {
 822                         bzero(mcg, sizeof (struct hermon_sw_mcg_list_s));
 823                         mutex_exit(&state->hs_mcglock);
 824                         return (status);
 825                 }
 826                 if (!qp_found)
 827                         mcg_entry->member_cnt = (mcg->mcg_num_qps + 1);
 828                             /* set the member count */
 829 
 830                 /*
 831                  * Once the temporary MCG has been filled in, write the entry
 832                  * into the appropriate location in the Hermon MCG entry table.
 833                  * If it's successful, then drop the lock and return success.
 834                  * Note: In general, this operation shouldn't fail.  If it
 835                  * does, then it is an indication that something (probably in
 836                  * HW, but maybe in SW) has gone seriously wrong.  We still
 837                  * want to zero out the entries that we've filled in above
 838                  * (in the hermon_mcg_setup_new_hdr() routine).
 839                  */
 840                 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
 841                     HERMON_CMD_NOSLEEP_SPIN);
 842                 if (status != HERMON_CMD_SUCCESS) {
 843                         bzero(mcg, sizeof (struct hermon_sw_mcg_list_s));
 844                         mutex_exit(&state->hs_mcglock);
 845                         HERMON_WARNING(state, "failed to write MCG entry");
 846                         cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
 847                             "%08x\n", status);
 848                         if (status == HERMON_CMD_INVALID_STATUS) {
 849                                 hermon_fm_ereport(state, HCA_SYS_ERR,
 850                                     HCA_ERR_SRV_LOST);
 851                         }
 852                         return (ibc_get_ci_failure(0));
 853                 }
 854 
 855                 /*
 856                  * Now that we know all the Hermon firmware accesses have been
 857                  * successful, we update the "shadow" MCG entry by incrementing
 858                  * the "number of attached QPs" count.
 859                  *
 860                  * We increment only if the QP is not already part of the
 861                  * MCG by checking the 'qp_found' flag returned from the
 862                  * qplist_add above.
 863                  */
 864                 if (!qp_found) {
 865                         mcg->mcg_num_qps++;
 866 
 867                         /*
 868                          * Increment the refcnt for this QP.  Because the QP
 869                          * was added to this MCG, the refcnt must be
 870                          * incremented.
 871                          */
 872                         hermon_qp_mcg_refcnt_inc(qp);
 873                 }
 874 
 875                 /*
 876                  * We drop the lock and return success.
 877                  */
 878                 mutex_exit(&state->hs_mcglock);
 879                 return (DDI_SUCCESS);
 880         }
 881 
 882         /*
 883          * If the specified MGID matches the MGID in the current entry, then
 884          * we need to try to add the QP to the current MCG entry.  In this
 885          * case, it means that we need to read the existing MCG entry (into
 886          * the temporary MCG), add the new QP number to the temporary entry
 887          * (using the same method we used above), and write the entry back
 888          * to the hardware (same as above).
 889          */
 890         if ((mcg->mcg_mgid_h == gid.gid_prefix) &&
 891             (mcg->mcg_mgid_l == gid.gid_guid)) {
 892 
 893                 /*
 894                  * Read the current MCG entry into the temporary MCG.  Note:
 895                  * In general, this operation shouldn't fail.  If it does,
 896                  * then it is an indication that something (probably in HW,
 897                  * but maybe in SW) has gone seriously wrong.
 898                  */
 899                 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
 900                     HERMON_CMD_NOSLEEP_SPIN);
 901                 if (status != HERMON_CMD_SUCCESS) {
 902                         mutex_exit(&state->hs_mcglock);
 903                         HERMON_WARNING(state, "failed to read MCG entry");
 904                         cmn_err(CE_CONT, "Hermon: READ_MGM command failed: "
 905                             "%08x\n", status);
 906                         if (status == HERMON_CMD_INVALID_STATUS) {
 907                                 hermon_fm_ereport(state, HCA_SYS_ERR,
 908                                     HCA_ERR_SRV_LOST);
 909                         }
 910                         return (ibc_get_ci_failure(0));
 911                 }
 912 
 913                 /*
 914                  * Try to add the new QP number to the list.  This routine
 915                  * fills in the necessary pieces of the temporary MCG.  The
 916                  * "mcg_entry_qplist" pointer is used to point to the portion
 917                  * of the temporary MCG that holds the QP numbers.
 918                  *
 919                  * Note: hermon_mcg_qplist_add() returns SUCCESS if it
 920                  * already found the QP in the list.  In this case, the QP is
 921                  * not added on to the list again.  Check the flag 'qp_found'
 922                  * if this value is needed to be known.
 923                  *
 924                  */
 925                 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
 926                     &qp_found);
 927                 if (status != DDI_SUCCESS) {
 928                         mutex_exit(&state->hs_mcglock);
 929                         return (status);
 930                 }
 931                 if (!qp_found)
 932                         mcg_entry->member_cnt = (mcg->mcg_num_qps + 1);
 933                             /* set the member count */
 934 
 935                 /*
 936                  * Once the temporary MCG has been updated, write the entry
 937                  * into the appropriate location in the Hermon MCG entry table.
 938                  * If it's successful, then drop the lock and return success.
 939                  * Note: In general, this operation shouldn't fail.  If it
 940                  * does, then it is an indication that something (probably in
 941                  * HW, but maybe in SW) has gone seriously wrong.
 942                  */
 943                 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
 944                     HERMON_CMD_NOSLEEP_SPIN);
 945                 if (status != HERMON_CMD_SUCCESS) {
 946                         mutex_exit(&state->hs_mcglock);
 947                         HERMON_WARNING(state, "failed to write MCG entry");
 948                         cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
 949                             "%08x\n", status);
 950                         if (status == HERMON_CMD_INVALID_STATUS) {
 951                                 hermon_fm_ereport(state, HCA_SYS_ERR,
 952                                     HCA_ERR_SRV_LOST);
 953                         }
 954                         return (ibc_get_ci_failure(0));
 955                 }
 956 
 957                 /*
 958                  * Now that we know all the Hermon firmware accesses have been
 959                  * successful, we update the current "shadow" MCG entry by
 960                  * incrementing the "number of attached QPs" count.
 961                  *
 962                  * We increment only if the QP is not already part of the
 963                  * MCG by checking the 'qp_found' flag returned
 964                  * hermon_mcg_walk_mgid_hashfrom the qplist_add above.
 965                  */
 966                 if (!qp_found) {
 967                         mcg->mcg_num_qps++;
 968 
 969                         /*
 970                          * Increment the refcnt for this QP.  Because the QP
 971                          * was added to this MCG, the refcnt must be
 972                          * incremented.
 973                          */
 974                         hermon_qp_mcg_refcnt_inc(qp);
 975                 }
 976 
 977                 /*
 978                  * We drop the lock and return success.
 979                  */
 980                 mutex_exit(&state->hs_mcglock);
 981                 return (DDI_SUCCESS);
 982         }
 983 
 984         /*
 985          * If we've reached here, then we're at the end of the hash chain.
 986          * We need to allocate a new MCG entry, fill it in, write it to Hermon,
 987          * and update the previous entry to link the new one to the end of the
 988          * chain.
 989          */
 990 
 991         /*
 992          * Allocate an MCG table entry.  This will be filled in with all
 993          * the necessary parameters to define the multicast group.  Then it
 994          * will be written to the hardware in the next-to-last step below.
 995          */
 996         status = hermon_rsrc_alloc(state, HERMON_MCG, 1, HERMON_NOSLEEP, &rsrc);
 997         if (status != DDI_SUCCESS) {
 998                 mutex_exit(&state->hs_mcglock);
 999                 return (IBT_INSUFF_RESOURCE);
1000         }
1001 
1002         /*
1003          * Fill in the new entry in the "shadow" MCG list.  Note:  Just as
1004          * it does above, hermon_mcg_setup_new_hdr() also fills in a portion
1005          * of the temporary MCG entry (the rest of which will be filled in by
1006          * hermon_mcg_qplist_add() below)
1007          */
1008         newmcg = &state->hs_mcghdl[rsrc->hr_indx];
1009         hermon_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc);
1010 
1011         /*
1012          * Try to add the new QP number to the list.  This routine fills in
1013          * the final necessary pieces of the temporary MCG.  The
1014          * "mcg_entry_qplist" pointer is used to point to the portion of the
1015          * temporary MCG that holds the QP numbers.  If we fail here, we
1016          * must undo the previous resource allocation.
1017          *
1018          * Note: hermon_mcg_qplist_add() can we return SUCCESS if it already
1019          * found the QP in the list.  In this case, the QP is not added on to
1020          * the list again.  Check the flag 'qp_found' if this value is needed
1021          * to be known.
1022          */
1023         status = hermon_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp,
1024             &qp_found);
1025         if (status != DDI_SUCCESS) {
1026                 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1027                 hermon_rsrc_free(state, &rsrc);
1028                 mutex_exit(&state->hs_mcglock);
1029                 return (status);
1030         }
1031         mcg_entry->member_cnt = (newmcg->mcg_num_qps + 1);
1032             /* set the member count */
1033 
1034         /*
1035          * Once the temporary MCG has been updated, write the entry into the
1036          * appropriate location in the Hermon MCG entry table.  If this is
1037          * successful, then we need to chain the previous entry to this one.
1038          * Note: In general, this operation shouldn't fail.  If it does, then
1039          * it is an indication that something (probably in HW, but maybe in
1040          * SW) has gone seriously wrong.
1041          */
1042         status = hermon_write_mgm_cmd_post(state, mcg_entry, rsrc->hr_indx,
1043             HERMON_CMD_NOSLEEP_SPIN);
1044         if (status != HERMON_CMD_SUCCESS) {
1045                 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1046                 hermon_rsrc_free(state, &rsrc);
1047                 mutex_exit(&state->hs_mcglock);
1048                 HERMON_WARNING(state, "failed to write MCG entry");
1049                 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1050                     status);
1051                 if (status == HERMON_CMD_INVALID_STATUS) {
1052                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1053                 }
1054                 return (ibc_get_ci_failure(0));
1055         }
1056 
1057         /*
1058          * Now read the current MCG entry (the one previously at the end of
1059          * hash chain) into the temporary MCG.  We are going to update its
1060          * "next_gid_indx" now and write the entry back to the MCG table.
1061          * Note:  In general, this operation shouldn't fail.  If it does, then
1062          * it is an indication that something (probably in HW, but maybe in SW)
1063          * has gone seriously wrong.  We will free up the MCG entry resource,
1064          * but we will not undo the previously written MCG entry in the HW.
1065          * This is OK, though, because the MCG entry is not currently attached
1066          * to any hash chain.
1067          */
1068         status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
1069             HERMON_CMD_NOSLEEP_SPIN);
1070         if (status != HERMON_CMD_SUCCESS) {
1071                 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1072                 hermon_rsrc_free(state, &rsrc);
1073                 mutex_exit(&state->hs_mcglock);
1074                 HERMON_WARNING(state, "failed to read MCG entry");
1075                 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1076                     status);
1077                 if (status == HERMON_CMD_INVALID_STATUS) {
1078                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1079                 }
1080                 return (ibc_get_ci_failure(0));
1081         }
1082 
1083         /*
1084          * Finally, we update the "next_gid_indx" field in the temporary MCG
1085          * and attempt to write the entry back into the Hermon MCG table.  If
1086          * this succeeds, then we update the "shadow" list to reflect the
1087          * change, drop the lock, and return success.  Note:  In general, this
1088          * operation shouldn't fail.  If it does, then it is an indication
1089          * that something (probably in HW, but maybe in SW) has gone seriously
1090          * wrong.  Just as we do above, we will free up the MCG entry resource,
1091          * but we will not try to undo the previously written MCG entry.  This
1092          * is OK, though, because (since we failed here to update the end of
1093          * the chain) that other entry is not currently attached to any chain.
1094          */
1095         mcg_entry->next_gid_indx = rsrc->hr_indx;
1096         status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
1097             HERMON_CMD_NOSLEEP_SPIN);
1098         if (status != HERMON_CMD_SUCCESS) {
1099                 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1100                 hermon_rsrc_free(state, &rsrc);
1101                 mutex_exit(&state->hs_mcglock);
1102                 HERMON_WARNING(state, "failed to write MCG entry");
1103                 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1104                     status);
1105                 if (status == HERMON_CMD_INVALID_STATUS) {
1106                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1107                 }
1108                 return (ibc_get_ci_failure(0));
1109         }
1110         mcg = &state->hs_mcghdl[end_indx];
1111         mcg->mcg_next_indx = rsrc->hr_indx;
1112 
1113         /*
1114          * Now that we know all the Hermon firmware accesses have been
1115          * successful, we update the new "shadow" MCG entry by incrementing
1116          * the "number of attached QPs" count.  Then we drop the lock and
1117          * return success.
1118          */
1119         newmcg->mcg_num_qps++;
1120 
1121         /*
1122          * Increment the refcnt for this QP.  Because the QP
1123          * was added to this MCG, the refcnt must be
1124          * incremented.
1125          */
1126         hermon_qp_mcg_refcnt_inc(qp);
1127 
1128         mutex_exit(&state->hs_mcglock);
1129         return (DDI_SUCCESS);
1130 }
1131 
1132 
1133 /*
1134  * hermon_mcg_detach()
1135  *    Context: Can be called only from user or kernel context.
1136  */
1137 int
1138 hermon_mcg_detach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid,
1139     ib_lid_t lid)
1140 {
1141         hermon_hw_mcg_t         *mcg_entry;
1142         hermon_hw_mcg_qp_list_t *mcg_entry_qplist;
1143         hermon_mcghdl_t         mcg;
1144         uint64_t                mgid_hash;
1145         uint32_t                end_indx, prev_indx;
1146         int                     status;
1147 
1148         /*
1149          * Check for invalid Multicast DLID.  Specifically, all Multicast
1150          * LIDs should be within a well defined range.  If the specified LID
1151          * is outside of that range, then return an error.
1152          */
1153         if (hermon_mlid_is_valid(lid) == 0) {
1154                 return (IBT_MC_MLID_INVALID);
1155         }
1156 
1157         /*
1158          * Compute the MGID hash value.  As described above, the MCG table is
1159          * arranged as a number of separate hash chains.  This operation
1160          * converts the specified MGID into the starting index of an entry in
1161          * the hash table (i.e. the index for the start of the appropriate
1162          * hash chain).  Subsequent operations below will walk the chain
1163          * searching for a matching entry from which to attempt to remove
1164          * the specified QP.
1165          */
1166         status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
1167             &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT());
1168         if (status != HERMON_CMD_SUCCESS) {
1169                 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n",
1170                     status);
1171                 if (status == HERMON_CMD_INVALID_STATUS) {
1172                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1173                 }
1174                 return (ibc_get_ci_failure(0));
1175         }
1176 
1177         /*
1178          * Grab the multicast group mutex.  Then grab the pre-allocated
1179          * temporary buffer used for holding and/or modifying MCG entries.
1180          */
1181         mutex_enter(&state->hs_mcglock);
1182         mcg_entry = state->hs_mcgtmp;
1183         mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry);
1184 
1185         /*
1186          * Walk through the array of MCG entries starting at "mgid_hash".
1187          * Try to find an MCG entry with a matching MGID.  The
1188          * hermon_mcg_walk_mgid_hash() routine walks the list and returns an
1189          * index into the MCG table.  The entry at this index is checked to
1190          * determine whether it is a match or not.  If it is a match, then
1191          * we continue on to attempt to remove the QP from the MCG.  If it
1192          * is not a match (or not a valid MCG entry), then we return an error.
1193          */
1194         end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx);
1195         mcg      = &state->hs_mcghdl[end_indx];
1196 
1197         /*
1198          * If MGID == 0 (the hash chain is empty) or if the specified MGID
1199          * does not match the MGID in the current entry, then return
1200          * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not
1201          * valid).
1202          */
1203         if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) ||
1204             ((mcg->mcg_mgid_h != gid.gid_prefix) ||
1205             (mcg->mcg_mgid_l != gid.gid_guid))) {
1206                 mutex_exit(&state->hs_mcglock);
1207                 return (IBT_MC_MGID_INVALID);
1208         }
1209 
1210         /*
1211          * Read the current MCG entry into the temporary MCG.  Note: In
1212          * general, this operation shouldn't fail.  If it does, then it is
1213          * an indication that something (probably in HW, but maybe in SW)
1214          * has gone seriously wrong.
1215          */
1216         status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
1217             HERMON_CMD_NOSLEEP_SPIN);
1218         if (status != HERMON_CMD_SUCCESS) {
1219                 mutex_exit(&state->hs_mcglock);
1220                 HERMON_WARNING(state, "failed to read MCG entry");
1221                 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1222                     status);
1223                 if (status == HERMON_CMD_INVALID_STATUS) {
1224                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1225                 }
1226                 return (ibc_get_ci_failure(0));
1227         }
1228 
1229         /*
1230          * Search the QP number list for a match.  If a match is found, then
1231          * remove the entry from the QP list.  Otherwise, if no match is found,
1232          * return an error.
1233          */
1234         status = hermon_mcg_qplist_remove(mcg, mcg_entry_qplist, qp);
1235         if (status != DDI_SUCCESS) {
1236                 mutex_exit(&state->hs_mcglock);
1237                 return (status);
1238         }
1239 
1240         /*
1241          * Decrement the MCG count for this QP.  When the 'qp_mcg'
1242          * field becomes 0, then this QP is no longer a member of any
1243          * MCG.
1244          */
1245         hermon_qp_mcg_refcnt_dec(qp);
1246 
1247         /*
1248          * If the current MCG's QP number list is about to be made empty
1249          * ("mcg_num_qps" == 1), then remove the entry itself from the hash
1250          * chain.  Otherwise, just write the updated MCG entry back to the
1251          * hardware.  In either case, once we successfully update the hardware
1252          * chain, then we decrement the "shadow" list entry's "mcg_num_qps"
1253          * count (or zero out the entire "shadow" list entry) before returning
1254          * success.  Note:  Zeroing out the "shadow" list entry is done
1255          * inside of hermon_mcg_hash_list_remove().
1256          */
1257         if (mcg->mcg_num_qps == 1) {
1258 
1259                 /* Remove an MCG entry from the hash chain */
1260                 status = hermon_mcg_hash_list_remove(state, end_indx, prev_indx,
1261                     mcg_entry);
1262                 if (status != DDI_SUCCESS) {
1263                         mutex_exit(&state->hs_mcglock);
1264                         return (status);
1265                 }
1266 
1267         } else {
1268                 /*
1269                  * Write the updated MCG entry back to the Hermon MCG table.
1270                  * If this succeeds, then we update the "shadow" list to
1271                  * reflect the change (i.e. decrement the "mcg_num_qps"),
1272                  * drop the lock, and return success.  Note:  In general,
1273                  * this operation shouldn't fail.  If it does, then it is an
1274                  * indication that something (probably in HW, but maybe in SW)
1275                  * has gone seriously wrong.
1276                  */
1277                 mcg_entry->member_cnt = (mcg->mcg_num_qps - 1);
1278                 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
1279                     HERMON_CMD_NOSLEEP_SPIN);
1280                 if (status != HERMON_CMD_SUCCESS) {
1281                         mutex_exit(&state->hs_mcglock);
1282                         HERMON_WARNING(state, "failed to write MCG entry");
1283                         cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
1284                             "%08x\n", status);
1285                         if (status == HERMON_CMD_INVALID_STATUS) {
1286                                 hermon_fm_ereport(state, HCA_SYS_ERR,
1287                                     HCA_ERR_SRV_LOST);
1288                         }
1289                         return (ibc_get_ci_failure(0));
1290                 }
1291                 mcg->mcg_num_qps--;
1292         }
1293 
1294         mutex_exit(&state->hs_mcglock);
1295         return (DDI_SUCCESS);
1296 }
1297 
1298 /*
1299  * hermon_qp_mcg_refcnt_inc()
1300  *    Context: Can be called from interrupt or base context.
1301  */
1302 static void
1303 hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp)
1304 {
1305         /* Increment the QP's MCG reference count */
1306         mutex_enter(&qp->qp_lock);
1307         qp->qp_mcg_refcnt++;
1308         mutex_exit(&qp->qp_lock);
1309 }
1310 
1311 
1312 /*
1313  * hermon_qp_mcg_refcnt_dec()
1314  *    Context: Can be called from interrupt or base context.
1315  */
1316 static void
1317 hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp)
1318 {
1319         /* Decrement the QP's MCG reference count */
1320         mutex_enter(&qp->qp_lock);
1321         qp->qp_mcg_refcnt--;
1322         mutex_exit(&qp->qp_lock);
1323 }
1324 
1325 
1326 /*
1327  * hermon_mcg_qplist_add()
1328  *    Context: Can be called from interrupt or base context.
1329  */
1330 static int
1331 hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg,
1332     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp,
1333     uint_t *qp_found)
1334 {
1335         uint_t          qplist_indx;
1336 
1337         ASSERT(MUTEX_HELD(&state->hs_mcglock));
1338 
1339         qplist_indx = mcg->mcg_num_qps;
1340 
1341         /*
1342          * Determine if we have exceeded the maximum number of QP per
1343          * multicast group.  If we have, then return an error
1344          */
1345         if (qplist_indx >= state->hs_cfg_profile->cp_num_qp_per_mcg) {
1346                 return (IBT_HCA_MCG_QP_EXCEEDED);
1347         }
1348 
1349         /*
1350          * Determine if the QP is already attached to this MCG table.  If it
1351          * is, then we break out and treat this operation as a NO-OP
1352          */
1353         for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps;
1354             qplist_indx++) {
1355                 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) {
1356                         break;
1357                 }
1358         }
1359 
1360         /*
1361          * If the QP was already on the list, set 'qp_found' to TRUE.  We still
1362          * return SUCCESS in this case, but the qplist will not have been
1363          * updated because the QP was already on the list.
1364          */
1365         if (qplist_indx < mcg->mcg_num_qps) {
1366                 *qp_found = 1;
1367         } else {
1368                 /*
1369                  * Otherwise, append the new QP number to the end of the
1370                  * current QP list.  Note: We will increment the "mcg_num_qps"
1371                  * field on the "shadow" MCG list entry later (after we know
1372                  * that all necessary Hermon firmware accesses have been
1373                  * successful).
1374                  *
1375                  * Set 'qp_found' to 0 so we know the QP was added on to the
1376                  * list for sure.
1377                  */
1378                 mcg_qplist[qplist_indx].qpn =
1379                     (qp->qp_qpnum | HERMON_MCG_QPN_BLOCK_LB);
1380                 *qp_found = 0;
1381         }
1382 
1383         return (DDI_SUCCESS);
1384 }
1385 
1386 
1387 
1388 /*
1389  * hermon_mcg_qplist_remove()
1390  *    Context: Can be called from interrupt or base context.
1391  */
1392 static int
1393 hermon_mcg_qplist_remove(hermon_mcghdl_t mcg,
1394     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp)
1395 {
1396         uint_t          i, qplist_indx;
1397 
1398         /*
1399          * Search the MCG QP list for a matching QPN.  When
1400          * it's found, we swap the last entry with the current
1401          * one, set the last entry to zero, decrement the last
1402          * entry, and return.  If it's not found, then it's
1403          * and error.
1404          */
1405         qplist_indx = mcg->mcg_num_qps;
1406         for (i = 0; i < qplist_indx; i++) {
1407                 if (mcg_qplist[i].qpn == qp->qp_qpnum) {
1408                         mcg_qplist[i] = mcg_qplist[qplist_indx - 1];
1409                         mcg_qplist[qplist_indx - 1].qpn = 0;
1410 
1411                         return (DDI_SUCCESS);
1412                 }
1413         }
1414 
1415         return (IBT_QP_HDL_INVALID);
1416 }
1417 
1418 
1419 /*
1420  * hermon_mcg_walk_mgid_hash()
1421  *    Context: Can be called from interrupt or base context.
1422  */
1423 static uint_t
1424 hermon_mcg_walk_mgid_hash(hermon_state_t *state, uint64_t start_indx,
1425     ib_gid_t mgid, uint_t *p_indx)
1426 {
1427         hermon_mcghdl_t curr_mcghdl;
1428         uint_t          curr_indx, prev_indx;
1429 
1430         ASSERT(MUTEX_HELD(&state->hs_mcglock));
1431 
1432         /* Start at the head of the hash chain */
1433         curr_indx   = (uint_t)start_indx;
1434         prev_indx   = curr_indx;
1435         curr_mcghdl = &state->hs_mcghdl[curr_indx];
1436 
1437         /* If the first entry in the chain has MGID == 0, then stop */
1438         if ((curr_mcghdl->mcg_mgid_h == 0) &&
1439             (curr_mcghdl->mcg_mgid_l == 0)) {
1440                 goto end_mgid_hash_walk;
1441         }
1442 
1443         /* If the first entry in the chain matches the MGID, then stop */
1444         if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1445             (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1446                 goto end_mgid_hash_walk;
1447         }
1448 
1449         /* Otherwise, walk the hash chain looking for a match */
1450         while (curr_mcghdl->mcg_next_indx != 0) {
1451                 prev_indx = curr_indx;
1452                 curr_indx = curr_mcghdl->mcg_next_indx;
1453                 curr_mcghdl = &state->hs_mcghdl[curr_indx];
1454 
1455                 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1456                     (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1457                         break;
1458                 }
1459         }
1460 
1461 end_mgid_hash_walk:
1462         /*
1463          * If necessary, return the index of the previous entry too.  This
1464          * is primarily used for detaching a QP from a multicast group.  It
1465          * may be necessary, in that case, to delete an MCG entry from the
1466          * hash chain and having the index of the previous entry is helpful.
1467          */
1468         if (p_indx != NULL) {
1469                 *p_indx = prev_indx;
1470         }
1471         return (curr_indx);
1472 }
1473 
1474 
1475 /*
1476  * hermon_mcg_setup_new_hdr()
1477  *    Context: Can be called from interrupt or base context.
1478  */
1479 static void
1480 hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, hermon_hw_mcg_t *mcg_hdr,
1481     ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc)
1482 {
1483         /*
1484          * Fill in the fields of the "shadow" entry used by software
1485          * to track MCG hardware entry
1486          */
1487         mcg->mcg_mgid_h         = mgid.gid_prefix;
1488         mcg->mcg_mgid_l         = mgid.gid_guid;
1489         mcg->mcg_rsrcp          = mcg_rsrc;
1490         mcg->mcg_next_indx = 0;
1491         mcg->mcg_num_qps   = 0;
1492 
1493         /*
1494          * Fill the header fields of the MCG entry (in the temporary copy)
1495          */
1496         mcg_hdr->mgid_h              = mgid.gid_prefix;
1497         mcg_hdr->mgid_l              = mgid.gid_guid;
1498         mcg_hdr->next_gid_indx       = 0;
1499 }
1500 
1501 
1502 /*
1503  * hermon_mcg_hash_list_remove()
1504  *    Context: Can be called only from user or kernel context.
1505  */
1506 static int
1507 hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx,
1508     uint_t prev_indx, hermon_hw_mcg_t *mcg_entry)
1509 {
1510         hermon_mcghdl_t         curr_mcg, prev_mcg, next_mcg;
1511         uint_t                  next_indx;
1512         int                     status;
1513 
1514         /* Get the pointer to "shadow" list for current entry */
1515         curr_mcg = &state->hs_mcghdl[curr_indx];
1516 
1517         /*
1518          * If this is the first entry on a hash chain, then attempt to replace
1519          * the entry with the next entry on the chain.  If there are no
1520          * subsequent entries on the chain, then this is the only entry and
1521          * should be invalidated.
1522          */
1523         if (curr_indx == prev_indx) {
1524 
1525                 /*
1526                  * If this is the only entry on the chain, then invalidate it.
1527                  * Note:  Invalidating an MCG entry means writing all zeros
1528                  * to the entry.  This is only necessary for those MCG
1529                  * entries that are the "head" entries of the individual hash
1530                  * chains.  Regardless of whether this operation returns
1531                  * success or failure, return that result to the caller.
1532                  */
1533                 next_indx = curr_mcg->mcg_next_indx;
1534                 if (next_indx == 0) {
1535                         status = hermon_mcg_entry_invalidate(state, mcg_entry,
1536                             curr_indx);
1537                         bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1538                         return (status);
1539                 }
1540 
1541                 /*
1542                  * Otherwise, this is just the first entry on the chain, so
1543                  * grab the next one
1544                  */
1545                 next_mcg = &state->hs_mcghdl[next_indx];
1546 
1547                 /*
1548                  * Read the next MCG entry into the temporary MCG.  Note:
1549                  * In general, this operation shouldn't fail.  If it does,
1550                  * then it is an indication that something (probably in HW,
1551                  * but maybe in SW) has gone seriously wrong.
1552                  */
1553                 status = hermon_read_mgm_cmd_post(state, mcg_entry, next_indx,
1554                     HERMON_CMD_NOSLEEP_SPIN);
1555                 if (status != HERMON_CMD_SUCCESS) {
1556                         HERMON_WARNING(state, "failed to read MCG entry");
1557                         cmn_err(CE_CONT, "Hermon: READ_MGM command failed: "
1558                             "%08x\n", status);
1559                         if (status == HERMON_CMD_INVALID_STATUS) {
1560                                 hermon_fm_ereport(state, HCA_SYS_ERR,
1561                                     HCA_ERR_SRV_LOST);
1562                         }
1563                         return (ibc_get_ci_failure(0));
1564                 }
1565 
1566                 /*
1567                  * Copy/Write the temporary MCG back to the hardware MCG list
1568                  * using the current index.  This essentially removes the
1569                  * current MCG entry from the list by writing over it with
1570                  * the next one.  If this is successful, then we can do the
1571                  * same operation for the "shadow" list.  And we can also
1572                  * free up the Hermon MCG entry resource that was associated
1573                  * with the (old) next entry.  Note:  In general, this
1574                  * operation shouldn't fail.  If it does, then it is an
1575                  * indication that something (probably in HW, but maybe in SW)
1576                  * has gone seriously wrong.
1577                  */
1578                 status = hermon_write_mgm_cmd_post(state, mcg_entry, curr_indx,
1579                     HERMON_CMD_NOSLEEP_SPIN);
1580                 if (status != HERMON_CMD_SUCCESS) {
1581                         HERMON_WARNING(state, "failed to write MCG entry");
1582                         cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
1583                             "%08x\n", status);
1584                         if (status == HERMON_CMD_INVALID_STATUS) {
1585                                 hermon_fm_ereport(state, HCA_SYS_ERR,
1586                                     HCA_ERR_SRV_LOST);
1587                         }
1588                         return (ibc_get_ci_failure(0));
1589                 }
1590 
1591                 /*
1592                  * Copy all the software tracking information from the next
1593                  * entry on the "shadow" MCG list into the current entry on
1594                  * the list.  Then invalidate (zero out) the other "shadow"
1595                  * list entry.
1596                  */
1597                 bcopy(next_mcg, curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1598                 bzero(next_mcg, sizeof (struct hermon_sw_mcg_list_s));
1599 
1600                 /*
1601                  * Free up the Hermon MCG entry resource used by the "next"
1602                  * MCG entry.  That resource is no longer needed by any
1603                  * MCG entry which is first on a hash chain (like the "next"
1604                  * entry has just become).
1605                  */
1606                 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1607 
1608                 return (DDI_SUCCESS);
1609         }
1610 
1611         /*
1612          * Else if this is the last entry on the hash chain (or a middle
1613          * entry, then we update the previous entry's "next_gid_index" field
1614          * to make it point instead to the next entry on the chain.  By
1615          * skipping over the removed entry in this way, we can then free up
1616          * any resources associated with the current entry.  Note:  We don't
1617          * need to invalidate the "skipped over" hardware entry because it
1618          * will no be longer connected to any hash chains, and if/when it is
1619          * finally re-used, it will be written with entirely new values.
1620          */
1621 
1622         /*
1623          * Read the next MCG entry into the temporary MCG.  Note:  In general,
1624          * this operation shouldn't fail.  If it does, then it is an
1625          * indication that something (probably in HW, but maybe in SW) has
1626          * gone seriously wrong.
1627          */
1628         status = hermon_read_mgm_cmd_post(state, mcg_entry, prev_indx,
1629             HERMON_CMD_NOSLEEP_SPIN);
1630         if (status != HERMON_CMD_SUCCESS) {
1631                 HERMON_WARNING(state, "failed to read MCG entry");
1632                 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1633                     status);
1634                 if (status == HERMON_CMD_INVALID_STATUS) {
1635                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1636                 }
1637                 return (ibc_get_ci_failure(0));
1638         }
1639 
1640         /*
1641          * Finally, we update the "next_gid_indx" field in the temporary MCG
1642          * and attempt to write the entry back into the Hermon MCG table.  If
1643          * this succeeds, then we update the "shadow" list to reflect the
1644          * change, free up the Hermon MCG entry resource that was associated
1645          * with the current entry, and return success.  Note:  In general,
1646          * this operation shouldn't fail.  If it does, then it is an indication
1647          * that something (probably in HW, but maybe in SW) has gone seriously
1648          * wrong.
1649          */
1650         mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx;
1651         status = hermon_write_mgm_cmd_post(state, mcg_entry, prev_indx,
1652             HERMON_CMD_NOSLEEP_SPIN);
1653         if (status != HERMON_CMD_SUCCESS) {
1654                 HERMON_WARNING(state, "failed to write MCG entry");
1655                 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1656                     status);
1657                 if (status == HERMON_CMD_INVALID_STATUS) {
1658                         hermon_fm_ereport(state, HCA_SYS_ERR,
1659                             HCA_ERR_SRV_LOST);
1660                 }
1661                 return (ibc_get_ci_failure(0));
1662         }
1663 
1664         /*
1665          * Get the pointer to the "shadow" MCG list entry for the previous
1666          * MCG.  Update its "mcg_next_indx" to point to the next entry
1667          * the one after the current entry. Note:  This next index may be
1668          * zero, indicating the end of the list.
1669          */
1670         prev_mcg = &state->hs_mcghdl[prev_indx];
1671         prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx;
1672 
1673         /*
1674          * Free up the Hermon MCG entry resource used by the current entry.
1675          * This resource is no longer needed because the chain now skips over
1676          * the current entry.  Then invalidate (zero out) the current "shadow"
1677          * list entry.
1678          */
1679         hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1680         bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1681 
1682         return (DDI_SUCCESS);
1683 }
1684 
1685 
1686 /*
1687  * hermon_mcg_entry_invalidate()
1688  *    Context: Can be called only from user or kernel context.
1689  */
1690 static int
1691 hermon_mcg_entry_invalidate(hermon_state_t *state, hermon_hw_mcg_t *mcg_entry,
1692     uint_t indx)
1693 {
1694         int             status;
1695 
1696         /*
1697          * Invalidate the hardware MCG entry by zeroing out this temporary
1698          * MCG and writing it the the hardware.  Note: In general, this
1699          * operation shouldn't fail.  If it does, then it is an indication
1700          * that something (probably in HW, but maybe in SW) has gone seriously
1701          * wrong.
1702          */
1703         bzero(mcg_entry, HERMON_MCGMEM_SZ(state));
1704         status = hermon_write_mgm_cmd_post(state, mcg_entry, indx,
1705             HERMON_CMD_NOSLEEP_SPIN);
1706         if (status != HERMON_CMD_SUCCESS) {
1707                 HERMON_WARNING(state, "failed to write MCG entry");
1708                 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1709                     status);
1710                 if (status == HERMON_CMD_INVALID_STATUS) {
1711                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1712                 }
1713                 return (ibc_get_ci_failure(0));
1714         }
1715 
1716         return (DDI_SUCCESS);
1717 }
1718 
1719 
1720 /*
1721  * hermon_mgid_is_valid()
1722  *    Context: Can be called from interrupt or base context.
1723  */
1724 static int
1725 hermon_mgid_is_valid(ib_gid_t gid)
1726 {
1727         uint_t          topbits, flags, scope;
1728 
1729         /*
1730          * According to IBA 1.1 specification (section 4.1.1) a valid
1731          * "multicast GID" must have its top eight bits set to all ones
1732          */
1733         topbits = (gid.gid_prefix >> HERMON_MCG_TOPBITS_SHIFT) &
1734             HERMON_MCG_TOPBITS_MASK;
1735         if (topbits != HERMON_MCG_TOPBITS) {
1736                 return (0);
1737         }
1738 
1739         /*
1740          * The next 4 bits are the "flag" bits.  These are valid only
1741          * if they are "0" (which correspond to permanently assigned/
1742          * "well-known" multicast GIDs) or "1" (for so-called "transient"
1743          * multicast GIDs).  All other values are reserved.
1744          */
1745         flags = (gid.gid_prefix >> HERMON_MCG_FLAGS_SHIFT) &
1746             HERMON_MCG_FLAGS_MASK;
1747         if (!((flags == HERMON_MCG_FLAGS_PERM) ||
1748             (flags == HERMON_MCG_FLAGS_NONPERM))) {
1749                 return (0);
1750         }
1751 
1752         /*
1753          * The next 4 bits are the "scope" bits.  These are valid only
1754          * if they are "2" (Link-local), "5" (Site-local), "8"
1755          * (Organization-local) or "E" (Global).  All other values
1756          * are reserved (or currently unassigned).
1757          */
1758         scope = (gid.gid_prefix >> HERMON_MCG_SCOPE_SHIFT) &
1759             HERMON_MCG_SCOPE_MASK;
1760         if (!((scope == HERMON_MCG_SCOPE_LINKLOC) ||
1761             (scope == HERMON_MCG_SCOPE_SITELOC)  ||
1762             (scope == HERMON_MCG_SCOPE_ORGLOC)   ||
1763             (scope == HERMON_MCG_SCOPE_GLOBAL))) {
1764                 return (0);
1765         }
1766 
1767         /*
1768          * If it passes all of the above checks, then we will consider it
1769          * a valid multicast GID.
1770          */
1771         return (1);
1772 }
1773 
1774 
1775 /*
1776  * hermon_mlid_is_valid()
1777  *    Context: Can be called from interrupt or base context.
1778  */
1779 static int
1780 hermon_mlid_is_valid(ib_lid_t lid)
1781 {
1782         /*
1783          * According to IBA 1.1 specification (section 4.1.1) a valid
1784          * "multicast DLID" must be between 0xC000 and 0xFFFE.
1785          */
1786         if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) {
1787                 return (0);
1788         }
1789 
1790         return (1);
1791 }
1792 
1793 
1794 /*
1795  * hermon_pd_alloc()
1796  *    Context: Can be called only from user or kernel context.
1797  */
1798 int
1799 hermon_pd_alloc(hermon_state_t *state, hermon_pdhdl_t *pdhdl, uint_t sleepflag)
1800 {
1801         hermon_rsrc_t   *rsrc;
1802         hermon_pdhdl_t  pd;
1803         int             status;
1804 
1805         /*
1806          * Allocate the software structure for tracking the protection domain
1807          * (i.e. the Hermon Protection Domain handle).  By default each PD
1808          * structure will have a unique PD number assigned to it.  All that
1809          * is necessary is for software to initialize the PD reference count
1810          * (to zero) and return success.
1811          */
1812         status = hermon_rsrc_alloc(state, HERMON_PDHDL, 1, sleepflag, &rsrc);
1813         if (status != DDI_SUCCESS) {
1814                 return (IBT_INSUFF_RESOURCE);
1815         }
1816         pd = (hermon_pdhdl_t)rsrc->hr_addr;
1817 
1818         pd->pd_refcnt = 0;
1819         *pdhdl = pd;
1820 
1821         return (DDI_SUCCESS);
1822 }
1823 
1824 
1825 /*
1826  * hermon_pd_free()
1827  *    Context: Can be called only from user or kernel context.
1828  */
1829 int
1830 hermon_pd_free(hermon_state_t *state, hermon_pdhdl_t *pdhdl)
1831 {
1832         hermon_rsrc_t   *rsrc;
1833         hermon_pdhdl_t  pd;
1834 
1835         /*
1836          * Pull all the necessary information from the Hermon Protection Domain
1837          * handle.  This is necessary here because the resource for the
1838          * PD is going to be freed up as part of this operation.
1839          */
1840         pd   = *pdhdl;
1841         rsrc = pd->pd_rsrcp;
1842 
1843         /*
1844          * Check the PD reference count.  If the reference count is non-zero,
1845          * then it means that this protection domain is still referenced by
1846          * some memory region, queue pair, address handle, or other IB object
1847          * If it is non-zero, then return an error.  Otherwise, free the
1848          * Hermon resource and return success.
1849          */
1850         if (pd->pd_refcnt != 0) {
1851                 return (IBT_PD_IN_USE);
1852         }
1853 
1854         /* Free the Hermon Protection Domain handle */
1855         hermon_rsrc_free(state, &rsrc);
1856 
1857         /* Set the pdhdl pointer to NULL and return success */
1858         *pdhdl = (hermon_pdhdl_t)NULL;
1859 
1860         return (DDI_SUCCESS);
1861 }
1862 
1863 
1864 /*
1865  * hermon_pd_refcnt_inc()
1866  *    Context: Can be called from interrupt or base context.
1867  */
1868 void
1869 hermon_pd_refcnt_inc(hermon_pdhdl_t pd)
1870 {
1871         /* Increment the protection domain's reference count */
1872         atomic_inc_32(&pd->pd_refcnt);
1873 }
1874 
1875 
1876 /*
1877  * hermon_pd_refcnt_dec()
1878  *    Context: Can be called from interrupt or base context.
1879  */
1880 void
1881 hermon_pd_refcnt_dec(hermon_pdhdl_t pd)
1882 {
1883         /* Decrement the protection domain's reference count */
1884         atomic_dec_32(&pd->pd_refcnt);
1885 }
1886 
1887 
1888 /*
1889  * hermon_port_query()
1890  *    Context: Can be called only from user or kernel context.
1891  */
1892 int
1893 hermon_port_query(hermon_state_t *state, uint_t port, ibt_hca_portinfo_t *pi)
1894 {
1895         sm_portinfo_t           portinfo;
1896         sm_guidinfo_t           guidinfo;
1897         sm_pkey_table_t         pkeytable;
1898         ib_gid_t                *sgid;
1899         uint_t                  sgid_max, pkey_max, tbl_size;
1900         int                     i, j, indx, status;
1901         ib_pkey_t               *pkeyp;
1902         ib_guid_t               *guidp;
1903 
1904         /* Validate that specified port number is legal */
1905         if (!hermon_portnum_is_valid(state, port)) {
1906                 return (IBT_HCA_PORT_INVALID);
1907         }
1908         pkeyp = state->hs_pkey[port - 1];
1909         guidp = state->hs_guid[port - 1];
1910 
1911         /*
1912          * We use the Hermon MAD_IFC command to post a GetPortInfo MAD
1913          * to the firmware (for the specified port number).  This returns
1914          * a full PortInfo MAD (in "portinfo") which we subsequently
1915          * parse to fill in the "ibt_hca_portinfo_t" structure returned
1916          * to the IBTF.
1917          */
1918         status = hermon_getportinfo_cmd_post(state, port,
1919             HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
1920         if (status != HERMON_CMD_SUCCESS) {
1921                 cmn_err(CE_CONT, "Hermon: GetPortInfo (port %02d) command "
1922                     "failed: %08x\n", port, status);
1923                 if (status == HERMON_CMD_INVALID_STATUS) {
1924                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1925                 }
1926                 return (ibc_get_ci_failure(0));
1927         }
1928 
1929         /*
1930          * Parse the PortInfo MAD and fill in the IBTF structure
1931          */
1932         pi->p_base_lid               = portinfo.LID;
1933         pi->p_qkey_violations        = portinfo.Q_KeyViolations;
1934         pi->p_pkey_violations        = portinfo.P_KeyViolations;
1935         pi->p_sm_sl          = portinfo.MasterSMSL;
1936         pi->p_sm_lid         = portinfo.MasterSMLID;
1937         pi->p_linkstate              = portinfo.PortState;
1938         pi->p_port_num               = portinfo.LocalPortNum;
1939         pi->p_phys_state     = portinfo.PortPhysicalState;
1940         pi->p_width_supported        = portinfo.LinkWidthSupported;
1941         pi->p_width_enabled  = portinfo.LinkWidthEnabled;
1942         pi->p_width_active   = portinfo.LinkWidthActive;
1943         pi->p_speed_supported        = portinfo.LinkSpeedSupported;
1944         pi->p_speed_enabled  = portinfo.LinkSpeedEnabled;
1945         pi->p_speed_active   = portinfo.LinkSpeedActive;
1946         pi->p_mtu            = portinfo.MTUCap;
1947         pi->p_lmc            = portinfo.LMC;
1948         pi->p_max_vl         = portinfo.VLCap;
1949         pi->p_subnet_timeout = portinfo.SubnetTimeOut;
1950         pi->p_msg_sz         = ((uint32_t)1 << HERMON_QP_LOG_MAX_MSGSZ);
1951         tbl_size = state->hs_cfg_profile->cp_log_max_gidtbl;
1952         pi->p_sgid_tbl_sz    = (1 << tbl_size);
1953         tbl_size = state->hs_cfg_profile->cp_log_max_pkeytbl;
1954         pi->p_pkey_tbl_sz    = (1 << tbl_size);
1955         state->hs_sn_prefix[port - 1] = portinfo.GidPrefix;
1956 
1957         /*
1958          * Convert InfiniBand-defined port capability flags to the format
1959          * specified by the IBTF
1960          */
1961         if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM)
1962                 pi->p_capabilities |= IBT_PORT_CAP_SM;
1963         if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED)
1964                 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED;
1965         if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD)
1966                 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL;
1967         if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD)
1968                 pi->p_capabilities |= IBT_PORT_CAP_DM;
1969         if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD)
1970                 pi->p_capabilities |= IBT_PORT_CAP_VENDOR;
1971         if (portinfo.CapabilityMask & SM_CAP_MASK_IS_CLNT_REREG_SUPPD)
1972                 pi->p_capabilities |= IBT_PORT_CAP_CLNT_REREG;
1973 
1974         /*
1975          * Fill in the SGID table.  Since the only access to the Hermon
1976          * GID tables is through the firmware's MAD_IFC interface, we
1977          * post as many GetGUIDInfo MADs as necessary to read in the entire
1978          * contents of the SGID table (for the specified port).  Note:  The
1979          * GetGUIDInfo command only gets eight GUIDs per operation.  These
1980          * GUIDs are then appended to the GID prefix for the port (from the
1981          * GetPortInfo above) to form the entire SGID table.
1982          */
1983         for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) {
1984                 status = hermon_getguidinfo_cmd_post(state, port, i >> 3,
1985                     HERMON_SLEEPFLAG_FOR_CONTEXT(), &guidinfo);
1986                 if (status != HERMON_CMD_SUCCESS) {
1987                         cmn_err(CE_CONT, "Hermon: GetGUIDInfo (port %02d) "
1988                             "command failed: %08x\n", port, status);
1989                         if (status == HERMON_CMD_INVALID_STATUS) {
1990                                 hermon_fm_ereport(state, HCA_SYS_ERR,
1991                                     HCA_ERR_SRV_LOST);
1992                         }
1993                         return (ibc_get_ci_failure(0));
1994                 }
1995 
1996                 /* Figure out how many of the entries are valid */
1997                 sgid_max = min((pi->p_sgid_tbl_sz - i), 8);
1998                 for (j = 0; j < sgid_max; j++) {
1999                         indx = (i + j);
2000                         sgid = &pi->p_sgid_tbl[indx];
2001                         sgid->gid_prefix = portinfo.GidPrefix;
2002                         guidp[indx] = sgid->gid_guid =
2003                             guidinfo.GUIDBlocks[j];
2004                 }
2005         }
2006 
2007         /*
2008          * Fill in the PKey table.  Just as for the GID tables above, the
2009          * only access to the Hermon PKey tables is through the firmware's
2010          * MAD_IFC interface.  We post as many GetPKeyTable MADs as necessary
2011          * to read in the entire contents of the PKey table (for the specified
2012          * port).  Note:  The GetPKeyTable command only gets 32 PKeys per
2013          * operation.
2014          */
2015         for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) {
2016                 status = hermon_getpkeytable_cmd_post(state, port, i,
2017                     HERMON_SLEEPFLAG_FOR_CONTEXT(), &pkeytable);
2018                 if (status != HERMON_CMD_SUCCESS) {
2019                         cmn_err(CE_CONT, "Hermon: GetPKeyTable (port %02d) "
2020                             "command failed: %08x\n", port, status);
2021                         if (status == HERMON_CMD_INVALID_STATUS) {
2022                                 hermon_fm_ereport(state, HCA_SYS_ERR,
2023                                     HCA_ERR_SRV_LOST);
2024                         }
2025                         return (ibc_get_ci_failure(0));
2026                 }
2027 
2028                 /* Figure out how many of the entries are valid */
2029                 pkey_max = min((pi->p_pkey_tbl_sz - i), 32);
2030                 for (j = 0; j < pkey_max; j++) {
2031                         indx = (i + j);
2032                         pkeyp[indx] = pi->p_pkey_tbl[indx] =
2033                             pkeytable.P_KeyTableBlocks[j];
2034                 }
2035         }
2036 
2037         return (DDI_SUCCESS);
2038 }
2039 
2040 
2041 /*
2042  * hermon_port_modify()
2043  *    Context: Can be called only from user or kernel context.
2044  */
2045 /* ARGSUSED */
2046 int
2047 hermon_port_modify(hermon_state_t *state, uint8_t port,
2048     ibt_port_modify_flags_t flags, uint8_t init_type)
2049 {
2050         sm_portinfo_t           portinfo;
2051         uint32_t                capmask;
2052         int                     status;
2053         hermon_hw_set_port_t    set_port;
2054 
2055         /*
2056          * Return an error if either of the unsupported flags are set
2057          */
2058         if ((flags & IBT_PORT_SHUTDOWN) ||
2059             (flags & IBT_PORT_SET_INIT_TYPE)) {
2060                 return (IBT_NOT_SUPPORTED);
2061         }
2062 
2063         bzero(&set_port, sizeof (set_port));
2064 
2065         /*
2066          * Determine whether we are trying to reset the QKey counter
2067          */
2068         if (flags & IBT_PORT_RESET_QKEY)
2069                 set_port.rqk = 1;
2070 
2071         /* Validate that specified port number is legal */
2072         if (!hermon_portnum_is_valid(state, port)) {
2073                 return (IBT_HCA_PORT_INVALID);
2074         }
2075 
2076         /*
2077          * Use the Hermon MAD_IFC command to post a GetPortInfo MAD to the
2078          * firmware (for the specified port number).  This returns a full
2079          * PortInfo MAD (in "portinfo") from which we pull the current
2080          * capability mask.  We then modify the capability mask as directed
2081          * by the "pmod_flags" field, and write the updated capability mask
2082          * using the Hermon SET_IB command (below).
2083          */
2084         status = hermon_getportinfo_cmd_post(state, port,
2085             HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
2086         if (status != HERMON_CMD_SUCCESS) {
2087                 if (status == HERMON_CMD_INVALID_STATUS) {
2088                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2089                 }
2090                 return (ibc_get_ci_failure(0));
2091         }
2092 
2093         /*
2094          * Convert InfiniBand-defined port capability flags to the format
2095          * specified by the IBTF.  Specifically, we modify the capability
2096          * mask based on the specified values.
2097          */
2098         capmask = portinfo.CapabilityMask;
2099 
2100         if (flags & IBT_PORT_RESET_SM)
2101                 capmask &= ~SM_CAP_MASK_IS_SM;
2102         else if (flags & IBT_PORT_SET_SM)
2103                 capmask |= SM_CAP_MASK_IS_SM;
2104 
2105         if (flags & IBT_PORT_RESET_SNMP)
2106                 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD;
2107         else if (flags & IBT_PORT_SET_SNMP)
2108                 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD;
2109 
2110         if (flags & IBT_PORT_RESET_DEVMGT)
2111                 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD;
2112         else if (flags & IBT_PORT_SET_DEVMGT)
2113                 capmask |= SM_CAP_MASK_IS_DM_SUPPD;
2114 
2115         if (flags & IBT_PORT_RESET_VENDOR)
2116                 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD;
2117         else if (flags & IBT_PORT_SET_VENDOR)
2118                 capmask |= SM_CAP_MASK_IS_VM_SUPPD;
2119 
2120         set_port.cap_mask = capmask;
2121 
2122         /*
2123          * Use the Hermon SET_PORT command to update the capability mask and
2124          * (possibly) reset the QKey violation counter for the specified port.
2125          * Note: In general, this operation shouldn't fail.  If it does, then
2126          * it is an indication that something (probably in HW, but maybe in
2127          * SW) has gone seriously wrong.
2128          */
2129         status = hermon_set_port_cmd_post(state, &set_port, port,
2130             HERMON_SLEEPFLAG_FOR_CONTEXT());
2131         if (status != HERMON_CMD_SUCCESS) {
2132                 HERMON_WARNING(state, "failed to modify port capabilities");
2133                 cmn_err(CE_CONT, "Hermon: SET_IB (port %02d) command failed: "
2134                     "%08x\n", port, status);
2135                 if (status == HERMON_CMD_INVALID_STATUS) {
2136                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2137                 }
2138                 return (ibc_get_ci_failure(0));
2139         }
2140 
2141         return (DDI_SUCCESS);
2142 }
2143 
2144 
2145 /*
2146  * hermon_set_addr_path()
2147  *    Context: Can be called from interrupt or base context.
2148  *
2149  * Note: This routine is used for two purposes.  It is used to fill in the
2150  * Hermon UDAV fields, and it is used to fill in the address path information
2151  * for QPs.  Because the two Hermon structures are similar, common fields can
2152  * be filled in here.  Because they are different, however, we pass
2153  * an additional flag to indicate which type is being filled and do each one
2154  * uniquely
2155  */
2156 
2157 int hermon_srate_override = -1; /* allows ease of testing */
2158 
2159 int
2160 hermon_set_addr_path(hermon_state_t *state, ibt_adds_vect_t *av,
2161     hermon_hw_addr_path_t *path, uint_t type)
2162 {
2163         uint_t          gidtbl_sz;
2164         hermon_hw_udav_t *udav;
2165 
2166         udav = (hermon_hw_udav_t *)(void *)path;
2167         path->mlid   = av->av_src_path;
2168         path->rlid   = av->av_dlid;
2169 
2170         switch (av->av_srate) {
2171         case IBT_SRATE_2:       /* 1xSDR-2.5Gb/s injection rate */
2172                 path->max_stat_rate = 7; break;
2173         case IBT_SRATE_10:      /* 4xSDR-10.0Gb/s injection rate */
2174                 path->max_stat_rate = 8; break;
2175         case IBT_SRATE_30:      /* 12xSDR-30Gb/s injection rate */
2176                 path->max_stat_rate = 9; break;
2177         case IBT_SRATE_5:       /* 1xDDR-5Gb/s injection rate */
2178                 path->max_stat_rate = 10; break;
2179         case IBT_SRATE_20:      /* 4xDDR-20Gb/s injection rate */
2180                 path->max_stat_rate = 11; break;
2181         case IBT_SRATE_40:      /* 4xQDR-40Gb/s injection rate */
2182                 path->max_stat_rate = 12; break;
2183         case IBT_SRATE_60:      /* 12xDDR-60Gb/s injection rate */
2184                 path->max_stat_rate = 13; break;
2185         case IBT_SRATE_80:      /* 8xQDR-80Gb/s injection rate */
2186                 path->max_stat_rate = 14; break;
2187         case IBT_SRATE_120:     /* 12xQDR-120Gb/s injection rate */
2188                 path->max_stat_rate = 15; break;
2189         case IBT_SRATE_NOT_SPECIFIED:   /* Max */
2190                 path->max_stat_rate = 0; break;
2191         default:
2192                 return (IBT_STATIC_RATE_INVALID);
2193         }
2194         if (hermon_srate_override != -1) /* for evaluating HCA firmware */
2195                 path->max_stat_rate = hermon_srate_override;
2196 
2197         /* If "grh" flag is set, then check for valid SGID index too */
2198         gidtbl_sz = (1 << state->hs_queryport.log_max_gid);
2199         if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) {
2200                 return (IBT_SGID_INVALID);
2201         }
2202 
2203         /*
2204          * Fill in all "global" values regardless of the value in the GRH
2205          * flag.  Because "grh" is not set unless "av_send_grh" is set, the
2206          * hardware will ignore the other "global" values as necessary.  Note:
2207          * SW does this here to enable later query operations to return
2208          * exactly the same params that were passed when the addr path was
2209          * last written.
2210          */
2211         path->grh = av->av_send_grh;
2212         if (type == HERMON_ADDRPATH_QP) {
2213                 path->mgid_index = av->av_sgid_ix;
2214         } else {
2215                 /*
2216                  * For Hermon UDAV, the "mgid_index" field is the index into
2217                  * a combined table (not a per-port table), but having sections
2218                  * for each port. So some extra calculations are necessary.
2219                  */
2220 
2221                 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) +
2222                     av->av_sgid_ix;
2223 
2224                 udav->portnum = av->av_port_num;
2225         }
2226 
2227         /*
2228          * According to Hermon PRM, the (31:0) part of rgid_l must be set to
2229          * "0x2" if the 'grh' or 'g' bit is cleared.  It also says that we
2230          * only need to do it for UDAV's.  So we enforce that here.
2231          *
2232          * NOTE: The entire 64 bits worth of GUID info is actually being
2233          * preserved (for UDAVs) by the callers of this function
2234          * (hermon_ah_alloc() and hermon_ah_modify()) and as long as the
2235          * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are
2236          * "don't care".
2237          */
2238         if ((path->grh) || (type == HERMON_ADDRPATH_QP)) {
2239                 path->flow_label = av->av_flow;
2240                 path->tclass  = av->av_tclass;
2241                 path->hop_limit       = av->av_hop;
2242                 bcopy(&(av->av_dgid.gid_prefix), &(path->rgid_h),
2243                     sizeof (uint64_t));
2244                 bcopy(&(av->av_dgid.gid_guid), &(path->rgid_l),
2245                     sizeof (uint64_t));
2246         } else {
2247                 path->rgid_l  = 0x2;
2248                 path->flow_label = 0;
2249                 path->tclass  = 0;
2250                 path->hop_limit       = 0;
2251                 path->rgid_h  = 0;
2252         }
2253         /* extract the default service level */
2254         udav->sl = (HERMON_DEF_SCHED_SELECTION & 0x3C) >> 2;
2255 
2256         return (DDI_SUCCESS);
2257 }
2258 
2259 
2260 /*
2261  * hermon_get_addr_path()
2262  *    Context: Can be called from interrupt or base context.
2263  *
2264  * Note: Just like hermon_set_addr_path() above, this routine is used for two
2265  * purposes.  It is used to read in the Hermon UDAV fields, and it is used to
2266  * read in the address path information for QPs.  Because the two Hermon
2267  * structures are similar, common fields can be read in here.  But because
2268  * they are slightly different, we pass an additional flag to indicate which
2269  * type is being read.
2270  */
2271 void
2272 hermon_get_addr_path(hermon_state_t *state, hermon_hw_addr_path_t *path,
2273     ibt_adds_vect_t *av, uint_t type)
2274 {
2275         uint_t          gidtbl_sz;
2276 
2277         av->av_src_path      = path->mlid;
2278         av->av_dlid  = path->rlid;
2279 
2280         /* Set "av_ipd" value from max_stat_rate */
2281         switch (path->max_stat_rate) {
2282         case 7:                         /* 1xSDR-2.5Gb/s injection rate */
2283                 av->av_srate = IBT_SRATE_2; break;
2284         case 8:                         /* 4xSDR-10.0Gb/s injection rate */
2285                 av->av_srate = IBT_SRATE_10; break;
2286         case 9:                         /* 12xSDR-30Gb/s injection rate */
2287                 av->av_srate = IBT_SRATE_30; break;
2288         case 10:                        /* 1xDDR-5Gb/s injection rate */
2289                 av->av_srate = IBT_SRATE_5; break;
2290         case 11:                        /* 4xDDR-20Gb/s injection rate */
2291                 av->av_srate = IBT_SRATE_20; break;
2292         case 12:                        /* xQDR-40Gb/s injection rate */
2293                 av->av_srate = IBT_SRATE_40; break;
2294         case 13:                        /* 12xDDR-60Gb/s injection rate */
2295                 av->av_srate = IBT_SRATE_60; break;
2296         case 14:                        /* 8xQDR-80Gb/s injection rate */
2297                 av->av_srate = IBT_SRATE_80; break;
2298         case 15:                        /* 12xQDR-120Gb/s injection rate */
2299                 av->av_srate = IBT_SRATE_120; break;
2300         case 0:                         /* max */
2301                 av->av_srate = IBT_SRATE_NOT_SPECIFIED; break;
2302         default:                        /* 1x injection rate */
2303                 av->av_srate = IBT_SRATE_1X;
2304         }
2305 
2306         /*
2307          * Extract all "global" values regardless of the value in the GRH
2308          * flag.  Because "av_send_grh" is set only if "grh" is set, software
2309          * knows to ignore the other "global" values as necessary.  Note: SW
2310          * does it this way to enable these query operations to return exactly
2311          * the same params that were passed when the addr path was last written.
2312          */
2313         av->av_send_grh              = path->grh;
2314         if (type == HERMON_ADDRPATH_QP) {
2315                 av->av_sgid_ix  = path->mgid_index;
2316         } else {
2317                 /*
2318                  * For Hermon UDAV, the "mgid_index" field is the index into
2319                  * a combined table (not a per-port table).
2320                  */
2321                 gidtbl_sz = (1 << state->hs_queryport.log_max_gid);
2322                 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) *
2323                     gidtbl_sz);
2324 
2325                 av->av_port_num = ((hermon_hw_udav_t *)(void *)path)->portnum;
2326         }
2327         av->av_flow          = path->flow_label;
2328         av->av_tclass                = path->tclass;
2329         av->av_hop           = path->hop_limit;
2330         /* this is for alignment issue w/ the addr path struct in Hermon */
2331         bcopy(&(path->rgid_h), &(av->av_dgid.gid_prefix), sizeof (uint64_t));
2332         bcopy(&(path->rgid_l), &(av->av_dgid.gid_guid), sizeof (uint64_t));
2333 }
2334 
2335 
2336 /*
2337  * hermon_portnum_is_valid()
2338  *    Context: Can be called from interrupt or base context.
2339  */
2340 int
2341 hermon_portnum_is_valid(hermon_state_t *state, uint_t portnum)
2342 {
2343         uint_t  max_port;
2344 
2345         max_port = state->hs_cfg_profile->cp_num_ports;
2346         if ((portnum <= max_port) && (portnum != 0)) {
2347                 return (1);
2348         } else {
2349                 return (0);
2350         }
2351 }
2352 
2353 
2354 /*
2355  * hermon_pkeyindex_is_valid()
2356  *    Context: Can be called from interrupt or base context.
2357  */
2358 int
2359 hermon_pkeyindex_is_valid(hermon_state_t *state, uint_t pkeyindx)
2360 {
2361         uint_t  max_pkeyindx;
2362 
2363         max_pkeyindx = 1 << state->hs_cfg_profile->cp_log_max_pkeytbl;
2364         if (pkeyindx < max_pkeyindx) {
2365                 return (1);
2366         } else {
2367                 return (0);
2368         }
2369 }
2370 
2371 
2372 /*
2373  * hermon_queue_alloc()
2374  *    Context: Can be called from interrupt or base context.
2375  */
2376 int
2377 hermon_queue_alloc(hermon_state_t *state, hermon_qalloc_info_t *qa_info,
2378     uint_t sleepflag)
2379 {
2380         ddi_dma_attr_t          dma_attr;
2381         int                     (*callback)(caddr_t);
2382         uint64_t                realsize, alloc_mask;
2383         int                     flag, status;
2384 
2385         /* Set the callback flag appropriately */
2386         callback = (sleepflag == HERMON_SLEEP) ? DDI_DMA_SLEEP :
2387             DDI_DMA_DONTWAIT;
2388 
2389         /*
2390          * Initialize many of the default DMA attributes.  Then set additional
2391          * alignment restrictions as necessary for the queue memory.  Also
2392          * respect the configured value for IOMMU bypass
2393          */
2394         hermon_dma_attr_init(state, &dma_attr);
2395         dma_attr.dma_attr_align = qa_info->qa_bind_align;
2396 #ifdef  __sparc
2397         if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS) {
2398                 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
2399         }
2400 #endif
2401 
2402         /* Allocate a DMA handle */
2403         status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, callback, NULL,
2404             &qa_info->qa_dmahdl);
2405         if (status != DDI_SUCCESS) {
2406                 return (DDI_FAILURE);
2407         }
2408 
2409         /*
2410          * Determine the amount of memory to allocate, depending on the values
2411          * in "qa_bind_align" and "qa_alloc_align".  The problem we are trying
2412          * to solve here is that allocating a DMA handle with IOMMU bypass
2413          * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments
2414          * that are less restrictive than the page size.  Since we may need
2415          * stricter alignments on the memory allocated by ddi_dma_mem_alloc()
2416          * (e.g. in Hermon QP work queue memory allocation), we use the
2417          * following method to calculate how much additional memory to request,
2418          * and we enforce our own alignment on the allocated result.
2419          */
2420         alloc_mask = qa_info->qa_alloc_align - 1;
2421         if (qa_info->qa_bind_align == qa_info->qa_alloc_align) {
2422                 realsize = qa_info->qa_size;
2423         } else {
2424                 realsize = qa_info->qa_size + alloc_mask;
2425         }
2426 
2427         /*
2428          * If we are to allocate the queue from system memory, then use
2429          * ddi_dma_mem_alloc() to find the space.  Otherwise, this is a
2430          * host memory allocation, use ddi_umem_alloc(). In either case,
2431          * return a pointer to the memory range allocated (including any
2432          * necessary alignment adjustments), the "real" memory pointer,
2433          * the "real" size, and a ddi_acc_handle_t to use when reading
2434          * from/writing to the memory.
2435          */
2436         if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) {
2437                 /* Allocate system memory for the queue */
2438                 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize,
2439                     &state->hs_reg_accattr, DDI_DMA_CONSISTENT, callback, NULL,
2440                     (caddr_t *)&qa_info->qa_buf_real,
2441                     (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl);
2442                 if (status != DDI_SUCCESS) {
2443                         ddi_dma_free_handle(&qa_info->qa_dmahdl);
2444                         return (DDI_FAILURE);
2445                 }
2446 
2447                 /*
2448                  * Save temporary copy of the real pointer.  (This may be
2449                  * modified in the last step below).
2450                  */
2451                 qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2452 
2453                 bzero(qa_info->qa_buf_real, qa_info->qa_buf_realsz);
2454 
2455         } else { /* HERMON_QUEUE_LOCATION_USERLAND */
2456 
2457                 /* Allocate userland mappable memory for the queue */
2458                 flag = (sleepflag == HERMON_SLEEP) ? DDI_UMEM_SLEEP :
2459                     DDI_UMEM_NOSLEEP;
2460                 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag,
2461                     &qa_info->qa_umemcookie);
2462                 if (qa_info->qa_buf_real == NULL) {
2463                         ddi_dma_free_handle(&qa_info->qa_dmahdl);
2464                         return (DDI_FAILURE);
2465                 }
2466 
2467                 /*
2468                  * Save temporary copy of the real pointer.  (This may be
2469                  * modified in the last step below).
2470                  */
2471                 qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2472 
2473         }
2474 
2475         /*
2476          * The next to last step is to ensure that the final address
2477          * ("qa_buf_aligned") has the appropriate "alloc" alignment
2478          * restriction applied to it (if necessary).
2479          */
2480         if (qa_info->qa_bind_align != qa_info->qa_alloc_align) {
2481                 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t)
2482                     qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask);
2483         }
2484         /*
2485          * The last step is to figure out the offset of the start relative
2486          * to the first page of the region - will be used in the eqc/cqc
2487          * passed to the HW
2488          */
2489         qa_info->qa_pgoffs = (uint_t)((uintptr_t)
2490             qa_info->qa_buf_aligned & HERMON_PAGEOFFSET);
2491 
2492         return (DDI_SUCCESS);
2493 }
2494 
2495 
2496 /*
2497  * hermon_queue_free()
2498  *    Context: Can be called from interrupt or base context.
2499  */
2500 void
2501 hermon_queue_free(hermon_qalloc_info_t *qa_info)
2502 {
2503         /*
2504          * Depending on how (i.e. from where) we allocated the memory for
2505          * this queue, we choose the appropriate method for releasing the
2506          * resources.
2507          */
2508         if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) {
2509 
2510                 ddi_dma_mem_free(&qa_info->qa_acchdl);
2511 
2512         } else if (qa_info->qa_location == HERMON_QUEUE_LOCATION_USERLAND) {
2513 
2514                 ddi_umem_free(qa_info->qa_umemcookie);
2515 
2516         }
2517 
2518         /* Always free the dma handle */
2519         ddi_dma_free_handle(&qa_info->qa_dmahdl);
2520 }
2521 
2522 /*
2523  * hermon_create_fmr_pool()
2524  * Create a pool of FMRs.
2525  *     Context: Can be called from kernel context only.
2526  */
2527 int
2528 hermon_create_fmr_pool(hermon_state_t *state, hermon_pdhdl_t pd,
2529     ibt_fmr_pool_attr_t *fmr_attr, hermon_fmrhdl_t *fmrpoolp)
2530 {
2531         hermon_fmrhdl_t fmrpool;
2532         hermon_fmr_list_t *fmr, *fmr_next;
2533         hermon_mrhdl_t   mr;
2534         int             status;
2535         int             sleep;
2536         int             i;
2537 
2538         sleep = (fmr_attr->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP :
2539             HERMON_NOSLEEP;
2540         if ((sleep == HERMON_SLEEP) &&
2541             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2542                 return (IBT_INVALID_PARAM);
2543         }
2544 
2545         fmrpool = (hermon_fmrhdl_t)kmem_zalloc(sizeof (*fmrpool), sleep);
2546         if (fmrpool == NULL) {
2547                 status = IBT_INSUFF_RESOURCE;
2548                 goto fail;
2549         }
2550 
2551         mutex_init(&fmrpool->fmr_lock, NULL, MUTEX_DRIVER,
2552             DDI_INTR_PRI(state->hs_intrmsi_pri));
2553         mutex_init(&fmrpool->remap_lock, NULL, MUTEX_DRIVER,
2554             DDI_INTR_PRI(state->hs_intrmsi_pri));
2555         mutex_init(&fmrpool->dirty_lock, NULL, MUTEX_DRIVER,
2556             DDI_INTR_PRI(state->hs_intrmsi_pri));
2557 
2558         fmrpool->fmr_state       = state;
2559         fmrpool->fmr_flush_function = fmr_attr->fmr_func_hdlr;
2560         fmrpool->fmr_flush_arg           = fmr_attr->fmr_func_arg;
2561         fmrpool->fmr_pool_size           = 0;
2562         fmrpool->fmr_max_pages           = fmr_attr->fmr_max_pages_per_fmr;
2563         fmrpool->fmr_page_sz     = fmr_attr->fmr_page_sz;
2564         fmrpool->fmr_dirty_watermark = fmr_attr->fmr_pool_size / 4;
2565         fmrpool->fmr_dirty_len           = 0;
2566         fmrpool->fmr_remap_watermark = fmr_attr->fmr_pool_size / 32;
2567         fmrpool->fmr_remap_len           = 0;
2568         fmrpool->fmr_flags       = fmr_attr->fmr_flags;
2569         fmrpool->fmr_stat_register  = 0;
2570         fmrpool->fmr_max_remaps          = state->hs_cfg_profile->cp_fmr_max_remaps;
2571         fmrpool->fmr_remap_gen           = 1;
2572 
2573         fmrpool->fmr_free_list_tail = &fmrpool->fmr_free_list;
2574         fmrpool->fmr_dirty_list = NULL;
2575         fmrpool->fmr_dirty_list_tail = &fmrpool->fmr_dirty_list;
2576         fmrpool->fmr_remap_list = NULL;
2577         fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list;
2578         fmrpool->fmr_pool_size = fmrpool->fmr_free_len =
2579             fmr_attr->fmr_pool_size;
2580 
2581         for (i = 0; i < fmr_attr->fmr_pool_size; i++) {
2582                 status = hermon_mr_alloc_fmr(state, pd, fmrpool, &mr);
2583                 if (status != DDI_SUCCESS) {
2584                         goto fail2;
2585                 }
2586 
2587                 fmr = (hermon_fmr_list_t *)kmem_zalloc(
2588                     sizeof (hermon_fmr_list_t), sleep);
2589 
2590                 fmr->fmr = mr;
2591                 fmr->fmr_remaps = 0;
2592                 fmr->fmr_remap_gen = fmrpool->fmr_remap_gen;
2593                 fmr->fmr_pool = fmrpool;
2594                 mr->mr_fmr = fmr;
2595 
2596                 if (!i)         /* address of last entry's link */
2597                         fmrpool->fmr_free_list_tail = &fmr->fmr_next;
2598                 fmr->fmr_next = fmrpool->fmr_free_list;
2599                 fmrpool->fmr_free_list = fmr;
2600         }
2601 
2602         /* Set to return pool */
2603         *fmrpoolp = fmrpool;
2604 
2605         IBTF_DPRINTF_L2("fmr", "create_fmr_pool SUCCESS");
2606         return (IBT_SUCCESS);
2607 fail2:
2608         for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) {
2609                 fmr_next = fmr->fmr_next;
2610                 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr);
2611                 kmem_free(fmr, sizeof (hermon_fmr_list_t));
2612         }
2613         kmem_free(fmrpool, sizeof (*fmrpool));
2614 fail:
2615         *fmrpoolp = NULL;
2616         IBTF_DPRINTF_L2("fmr", "create_fmr_pool FAILED");
2617         if (status == DDI_FAILURE) {
2618                 return (ibc_get_ci_failure(0));
2619         } else {
2620                 return (status);
2621         }
2622 }
2623 
2624 /*
2625  * hermon_destroy_fmr_pool()
2626  * Destroy an FMR pool and free all associated resources.
2627  *     Context: Can be called from kernel context only.
2628  */
2629 int
2630 hermon_destroy_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool)
2631 {
2632         hermon_fmr_list_t       *fmr, *fmr_next;
2633 
2634         mutex_enter(&fmrpool->fmr_lock);
2635         hermon_fmr_cleanup(fmrpool);
2636 
2637         for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) {
2638                 fmr_next = fmr->fmr_next;
2639 
2640                 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr);
2641                 kmem_free(fmr, sizeof (hermon_fmr_list_t));
2642 
2643                 --fmrpool->fmr_pool_size;
2644         }
2645         ASSERT(fmrpool->fmr_pool_size == 0);
2646         mutex_exit(&fmrpool->fmr_lock);
2647 
2648         mutex_destroy(&fmrpool->fmr_lock);
2649         mutex_destroy(&fmrpool->dirty_lock);
2650         mutex_destroy(&fmrpool->remap_lock);
2651 
2652         kmem_free(fmrpool, sizeof (*fmrpool));
2653         IBTF_DPRINTF_L2("fmr", "destroy_fmr_pool SUCCESS");
2654         return (DDI_SUCCESS);
2655 }
2656 
2657 /*
2658  * hermon_flush_fmr_pool()
2659  * Ensure that all unmapped FMRs are fully invalidated.
2660  *     Context: Can be called from kernel context only.
2661  */
2662 /* ARGSUSED */
2663 int
2664 hermon_flush_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool)
2665 {
2666         /*
2667          * Force the unmapping of all entries on the dirty list, regardless of
2668          * whether the watermark has been hit yet.
2669          */
2670         /* grab the pool lock */
2671         mutex_enter(&fmrpool->fmr_lock);
2672         hermon_fmr_cleanup(fmrpool);
2673         mutex_exit(&fmrpool->fmr_lock);
2674         return (DDI_SUCCESS);
2675 }
2676 
2677 /*
2678  * hermon_register_physical_fmr()
2679  * Map memory into FMR
2680  *    Context: Can be called from interrupt or base context.
2681  */
2682 int
2683 hermon_register_physical_fmr(hermon_state_t *state, hermon_fmrhdl_t fmrpool,
2684     ibt_pmr_attr_t *mem_pattr, hermon_mrhdl_t *mr,
2685     ibt_pmr_desc_t *mem_desc_p)
2686 {
2687         hermon_fmr_list_t       *fmr;
2688         int                     status;
2689 
2690         /* Check length */
2691         if (mem_pattr->pmr_len < 1 || (mem_pattr->pmr_num_buf >
2692             fmrpool->fmr_max_pages)) {
2693                 return (IBT_MR_LEN_INVALID);
2694         }
2695 
2696         mutex_enter(&fmrpool->fmr_lock);
2697         if (fmrpool->fmr_free_list == NULL) {
2698                 if (hermon_fmr_verbose & 2)
2699                         IBTF_DPRINTF_L2("fmr", "register needs remap");
2700                 mutex_enter(&fmrpool->remap_lock);
2701                 if (fmrpool->fmr_remap_list) {
2702                         /* add to free list */
2703                         *(fmrpool->fmr_free_list_tail) =
2704                             fmrpool->fmr_remap_list;
2705                         fmrpool->fmr_remap_list = NULL;
2706                         fmrpool->fmr_free_list_tail =
2707                             fmrpool->fmr_remap_list_tail;
2708 
2709                         /* reset list */
2710                         fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list;
2711                         fmrpool->fmr_free_len += fmrpool->fmr_remap_len;
2712                         fmrpool->fmr_remap_len = 0;
2713                 }
2714                 mutex_exit(&fmrpool->remap_lock);
2715         }
2716         if (fmrpool->fmr_free_list == NULL) {
2717                 if (hermon_fmr_verbose & 2)
2718                         IBTF_DPRINTF_L2("fmr", "register needs cleanup");
2719                 hermon_fmr_cleanup(fmrpool);
2720         }
2721 
2722         /* grab next free entry */
2723         fmr = fmrpool->fmr_free_list;
2724         if (fmr == NULL) {
2725                 IBTF_DPRINTF_L2("fmr", "WARNING: no free fmr resource");
2726                 cmn_err(CE_CONT, "no free fmr resource\n");
2727                 mutex_exit(&fmrpool->fmr_lock);
2728                 return (IBT_INSUFF_RESOURCE);
2729         }
2730 
2731         if ((fmrpool->fmr_free_list = fmr->fmr_next) == NULL)
2732                 fmrpool->fmr_free_list_tail = &fmrpool->fmr_free_list;
2733         fmr->fmr_next = NULL;
2734         fmrpool->fmr_stat_register++;
2735         mutex_exit(&fmrpool->fmr_lock);
2736 
2737         status = hermon_mr_register_physical_fmr(state, mem_pattr, fmr->fmr,
2738             mem_desc_p);
2739         if (status != DDI_SUCCESS) {
2740                 return (status);
2741         }
2742         if (hermon_rdma_debug & 0x4)
2743                 IBTF_DPRINTF_L2("fmr", "  reg: mr %p  key %x",
2744                     fmr->fmr, fmr->fmr->mr_rkey);
2745         if (fmr->fmr_remap_gen != fmrpool->fmr_remap_gen) {
2746                 fmr->fmr_remap_gen = fmrpool->fmr_remap_gen;
2747                 fmr->fmr_remaps = 0;
2748         }
2749 
2750         fmr->fmr_remaps++;
2751 
2752         *mr = (hermon_mrhdl_t)fmr->fmr;
2753 
2754         return (DDI_SUCCESS);
2755 }
2756 
2757 /*
2758  * hermon_deregister_fmr()
2759  * Unmap FMR
2760  *    Context: Can be called from kernel context only.
2761  */
2762 int
2763 hermon_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr)
2764 {
2765         hermon_fmrhdl_t         fmrpool;
2766         hermon_fmr_list_t       *fmr, **fmrlast;
2767         int                     len;
2768 
2769         fmr = mr->mr_fmr;
2770         fmrpool = fmr->fmr_pool;
2771 
2772         /* mark as owned by software */
2773         *(uint8_t *)(fmr->fmr->mr_mptrsrcp->hr_addr) = 0xF0;
2774 
2775         if (fmr->fmr_remaps <
2776             state->hs_cfg_profile->cp_fmr_max_remaps) {
2777                 /* add to remap list */
2778                 if (hermon_rdma_debug & 0x4)
2779                         IBTF_DPRINTF_L2("fmr", "dereg: mr %p  key %x",
2780                             fmr->fmr, fmr->fmr->mr_rkey);
2781                 mutex_enter(&fmrpool->remap_lock);
2782                 fmr->fmr_next = NULL;
2783                 *(fmrpool->fmr_remap_list_tail) = fmr;
2784                 fmrpool->fmr_remap_list_tail = &fmr->fmr_next;
2785                 fmrpool->fmr_remap_len++;
2786 
2787                 /* conditionally add remap list back to free list */
2788                 fmrlast = NULL;
2789                 if (fmrpool->fmr_remap_len >=
2790                     fmrpool->fmr_remap_watermark) {
2791                         fmr = fmrpool->fmr_remap_list;
2792                         fmrlast = fmrpool->fmr_remap_list_tail;
2793                         len = fmrpool->fmr_remap_len;
2794                         fmrpool->fmr_remap_len = 0;
2795                         fmrpool->fmr_remap_list = NULL;
2796                         fmrpool->fmr_remap_list_tail =
2797                             &fmrpool->fmr_remap_list;
2798                 }
2799                 mutex_exit(&fmrpool->remap_lock);
2800                 if (fmrlast) {
2801                         mutex_enter(&fmrpool->fmr_lock);
2802                         *(fmrpool->fmr_free_list_tail) = fmr;
2803                         fmrpool->fmr_free_list_tail = fmrlast;
2804                         fmrpool->fmr_free_len += len;
2805                         mutex_exit(&fmrpool->fmr_lock);
2806                 }
2807         } else {
2808                 /* add to dirty list */
2809                 if (hermon_rdma_debug & 0x4)
2810                         IBTF_DPRINTF_L2("fmr", "dirty: mr %p  key %x",
2811                             fmr->fmr, fmr->fmr->mr_rkey);
2812 
2813                 mutex_enter(&fmrpool->dirty_lock);
2814                 fmr->fmr_next = NULL;
2815                 *(fmrpool->fmr_dirty_list_tail) = fmr;
2816                 fmrpool->fmr_dirty_list_tail = &fmr->fmr_next;
2817                 fmrpool->fmr_dirty_len++;
2818 
2819                 if (fmrpool->fmr_dirty_len >=
2820                     fmrpool->fmr_dirty_watermark) {
2821                         mutex_exit(&fmrpool->dirty_lock);
2822                         mutex_enter(&fmrpool->fmr_lock);
2823                         hermon_fmr_cleanup(fmrpool);
2824                         mutex_exit(&fmrpool->fmr_lock);
2825                 } else
2826                         mutex_exit(&fmrpool->dirty_lock);
2827         }
2828         return (DDI_SUCCESS);
2829 }
2830 
2831 /*
2832  * hermon_fmr_cleanup()
2833  *     Context: Called from any context.
2834  */
2835 static void
2836 hermon_fmr_cleanup(hermon_fmrhdl_t fmrpool)
2837 {
2838         int                     status;
2839 
2840         ASSERT(MUTEX_HELD(&fmrpool->fmr_lock));
2841 
2842         if (fmrpool->fmr_stat_register == 0)
2843                 return;
2844 
2845         fmrpool->fmr_stat_register = 0;
2846         membar_producer();
2847 
2848         if (hermon_fmr_verbose)
2849                 IBTF_DPRINTF_L2("fmr", "TPT_SYNC");
2850         status = hermon_sync_tpt_cmd_post(fmrpool->fmr_state,
2851             HERMON_CMD_NOSLEEP_SPIN);
2852         if (status != HERMON_CMD_SUCCESS) {
2853                 cmn_err(CE_WARN, "fmr SYNC_TPT failed(%x)\n", status);
2854         }
2855         fmrpool->fmr_remap_gen++;
2856 
2857         /* add everything back to the free list */
2858         mutex_enter(&fmrpool->dirty_lock);
2859         if (fmrpool->fmr_dirty_list) {
2860                 /* add to free list */
2861                 *(fmrpool->fmr_free_list_tail) = fmrpool->fmr_dirty_list;
2862                 fmrpool->fmr_dirty_list = NULL;
2863                 fmrpool->fmr_free_list_tail = fmrpool->fmr_dirty_list_tail;
2864 
2865                 /* reset list */
2866                 fmrpool->fmr_dirty_list_tail = &fmrpool->fmr_dirty_list;
2867                 fmrpool->fmr_free_len += fmrpool->fmr_dirty_len;
2868                 fmrpool->fmr_dirty_len = 0;
2869         }
2870         mutex_exit(&fmrpool->dirty_lock);
2871 
2872         mutex_enter(&fmrpool->remap_lock);
2873         if (fmrpool->fmr_remap_list) {
2874                 /* add to free list */
2875                 *(fmrpool->fmr_free_list_tail) = fmrpool->fmr_remap_list;
2876                 fmrpool->fmr_remap_list = NULL;
2877                 fmrpool->fmr_free_list_tail = fmrpool->fmr_remap_list_tail;
2878 
2879                 /* reset list */
2880                 fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list;
2881                 fmrpool->fmr_free_len += fmrpool->fmr_remap_len;
2882                 fmrpool->fmr_remap_len = 0;
2883         }
2884         mutex_exit(&fmrpool->remap_lock);
2885 
2886         if (fmrpool->fmr_flush_function != NULL) {
2887                 (void) fmrpool->fmr_flush_function(
2888                     (ibc_fmr_pool_hdl_t)fmrpool,
2889                     fmrpool->fmr_flush_arg);
2890         }
2891 }