1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * hermon_misc.c
  28  *    Hermon Miscellaneous routines - Address Handle, Multicast, Protection
  29  *    Domain, and port-related operations
  30  *
  31  *    Implements all the routines necessary for allocating, freeing, querying
  32  *    and modifying Address Handles and Protection Domains.  Also implements
  33  *    all the routines necessary for adding and removing Queue Pairs to/from
  34  *    Multicast Groups.  Lastly, it implements the routines necessary for
  35  *    port-related query and modify operations.
  36  */
  37 
  38 #include <sys/types.h>
  39 #include <sys/conf.h>
  40 #include <sys/ddi.h>
  41 #include <sys/sunddi.h>
  42 #include <sys/modctl.h>
  43 #include <sys/bitmap.h>
  44 #include <sys/sysmacros.h>
  45 
  46 #include <sys/ib/adapters/hermon/hermon.h>
  47 
  48 extern int hermon_rdma_debug;
  49 int hermon_fmr_verbose = 0;
  50 
  51 static int hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg,
  52     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp, uint_t *qp_found);
  53 static int hermon_mcg_qplist_remove(hermon_mcghdl_t mcg,
  54     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp);
  55 static void hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp);
  56 static void hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp);
  57 static uint_t hermon_mcg_walk_mgid_hash(hermon_state_t *state,
  58     uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx);
  59 static void hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg,
  60     hermon_hw_mcg_t *mcg_hdr, ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc);
  61 static int hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx,
  62     uint_t prev_indx, hermon_hw_mcg_t *mcg_entry);
  63 static int hermon_mcg_entry_invalidate(hermon_state_t *state,
  64     hermon_hw_mcg_t *mcg_entry, uint_t indx);
  65 static int hermon_mgid_is_valid(ib_gid_t gid);
  66 static int hermon_mlid_is_valid(ib_lid_t lid);
  67 static void hermon_fmr_cleanup(hermon_fmrhdl_t pool);
  68 
  69 
  70 #define HERMON_MAX_DBR_PAGES_PER_USER   64
  71 #define HERMON_DBR_KEY(index, page) \
  72         (((uint64_t)index) * HERMON_MAX_DBR_PAGES_PER_USER + (page))
  73 
  74 static hermon_udbr_page_t *
  75 hermon_dbr_new_user_page(hermon_state_t *state, uint_t index,
  76     uint_t page)
  77 {
  78         hermon_udbr_page_t *pagep;
  79         ddi_dma_attr_t dma_attr;
  80         uint_t cookiecnt;
  81         int status;
  82         hermon_umap_db_entry_t *umapdb;
  83         ulong_t pagesize = PAGESIZE;
  84 
  85         pagep = kmem_alloc(sizeof (*pagep), KM_SLEEP);
  86         pagep->upg_index = page;
  87         pagep->upg_nfree = pagesize / sizeof (hermon_dbr_t);
  88 
  89         /* Allocate 1 bit per dbr for free/alloc management (0 => "free") */
  90         pagep->upg_free = kmem_zalloc(pagesize / sizeof (hermon_dbr_t) / 8,
  91             KM_SLEEP);
  92         pagep->upg_kvaddr = ddi_umem_alloc(pagesize, DDI_UMEM_SLEEP,
  93             &pagep->upg_umemcookie); /* not HERMON_PAGESIZE here */
  94 
  95         pagep->upg_buf = ddi_umem_iosetup(pagep->upg_umemcookie, 0,
  96             pagesize, B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
  97 
  98         hermon_dma_attr_init(state, &dma_attr);
  99 #ifdef  __sparc
 100         if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS)
 101                 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
 102 #endif
 103         status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
 104             DDI_DMA_SLEEP, NULL, &pagep->upg_dmahdl);
 105         if (status != DDI_SUCCESS) {
 106                 IBTF_DPRINTF_L2("hermon", "hermon_new_user_page: "
 107                     "ddi_dma_buf_bind_handle failed: %d", status);
 108                 return (NULL);
 109         }
 110         status = ddi_dma_buf_bind_handle(pagep->upg_dmahdl,
 111             pagep->upg_buf, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
 112             DDI_DMA_SLEEP, NULL, &pagep->upg_dmacookie, &cookiecnt);
 113         if (status != DDI_SUCCESS) {
 114                 IBTF_DPRINTF_L2("hermon", "hermon_dbr_new_user_page: "
 115                     "ddi_dma_buf_bind_handle failed: %d", status);
 116                 ddi_dma_free_handle(&pagep->upg_dmahdl);
 117                 return (NULL);
 118         }
 119         ASSERT(cookiecnt == 1);
 120 
 121         /* create db entry for mmap */
 122         umapdb = hermon_umap_db_alloc(state->hs_instance,
 123             HERMON_DBR_KEY(index, page), MLNX_UMAP_DBRMEM_RSRC,
 124             (uint64_t)(uintptr_t)pagep);
 125         hermon_umap_db_add(umapdb);
 126         return (pagep);
 127 }
 128 
 129 
 130 /*ARGSUSED*/
 131 static int
 132 hermon_user_dbr_alloc(hermon_state_t *state, uint_t index,
 133     ddi_acc_handle_t *acchdl, hermon_dbr_t **vdbr, uint64_t *pdbr,
 134     uint64_t *mapoffset)
 135 {
 136         hermon_user_dbr_t *udbr;
 137         hermon_udbr_page_t *pagep;
 138         uint_t next_page;
 139         int dbr_index;
 140         int i1, i2, i3, last;
 141         uint64_t u64, mask;
 142 
 143         mutex_enter(&state->hs_dbr_lock);
 144         for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link)
 145                 if (udbr->udbr_index == index)
 146                         break;
 147         if (udbr == NULL) {
 148                 udbr = kmem_alloc(sizeof (*udbr), KM_SLEEP);
 149                 udbr->udbr_link = state->hs_user_dbr;
 150                 state->hs_user_dbr = udbr;
 151                 udbr->udbr_index = index;
 152                 udbr->udbr_pagep = NULL;
 153         }
 154         pagep = udbr->udbr_pagep;
 155         next_page = (pagep == NULL) ? 0 : (pagep->upg_index + 1);
 156         while (pagep != NULL)
 157                 if (pagep->upg_nfree > 0)
 158                         break;
 159                 else
 160                         pagep = pagep->upg_link;
 161         if (pagep == NULL) {
 162                 pagep = hermon_dbr_new_user_page(state, index, next_page);
 163                 if (pagep == NULL) {
 164                         mutex_exit(&state->hs_dbr_lock);
 165                         return (DDI_FAILURE);
 166                 }
 167                 pagep->upg_link = udbr->udbr_pagep;
 168                 udbr->udbr_pagep = pagep;
 169         }
 170 
 171         /* Since nfree > 0, we're assured the loops below will succeed */
 172 
 173         /* First, find a 64-bit (not ~0) that has a free dbr */
 174         last = PAGESIZE / sizeof (uint64_t) / 64;
 175         mask = ~0ull;
 176         for (i1 = 0; i1 < last; i1++)
 177                 if ((pagep->upg_free[i1] & mask) != mask)
 178                         break;
 179         u64 = pagep->upg_free[i1];
 180 
 181         /* Second, find a byte (not 0xff) that has a free dbr */
 182         last = sizeof (uint64_t) / sizeof (uint8_t);
 183         for (i2 = 0, mask = 0xff; i2 < last; i2++, mask <<= 8)
 184                 if ((u64 & mask) != mask)
 185                         break;
 186 
 187         /* Third, find a bit that is free (0) */
 188         for (i3 = 0; i3 < sizeof (uint64_t) / sizeof (uint8_t); i3++)
 189                 if ((u64 & (1ul << (i3 + 8 * i2))) == 0)
 190                         break;
 191 
 192         /* Mark it as allocated */
 193         pagep->upg_free[i1] |= (1ul << (i3 + 8 * i2));
 194 
 195         dbr_index = ((i1 * sizeof (uint64_t)) + i2) * sizeof (uint64_t) + i3;
 196         pagep->upg_nfree--;
 197         ((uint64_t *)(void *)pagep->upg_kvaddr)[dbr_index] = 0;      /* clear dbr */
 198         *mapoffset = ((HERMON_DBR_KEY(index, pagep->upg_index) <<
 199             MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_DBRMEM_RSRC) << PAGESHIFT;
 200         *vdbr = (hermon_dbr_t *)((uint64_t *)(void *)pagep->upg_kvaddr +
 201             dbr_index);
 202         *pdbr = pagep->upg_dmacookie.dmac_laddress + dbr_index *
 203             sizeof (uint64_t);
 204 
 205         mutex_exit(&state->hs_dbr_lock);
 206         return (DDI_SUCCESS);
 207 }
 208 
 209 static void
 210 hermon_user_dbr_free(hermon_state_t *state, uint_t index, hermon_dbr_t *record)
 211 {
 212         hermon_user_dbr_t       *udbr;
 213         hermon_udbr_page_t      *pagep;
 214         caddr_t                 kvaddr;
 215         uint_t                  dbr_index;
 216         uint_t                  max_free = PAGESIZE / sizeof (hermon_dbr_t);
 217         int                     i1, i2;
 218 
 219         dbr_index = (uintptr_t)record & PAGEOFFSET; /* offset (not yet index) */
 220         kvaddr = (caddr_t)record - dbr_index;
 221         dbr_index /= sizeof (hermon_dbr_t); /* now it's the index */
 222 
 223         mutex_enter(&state->hs_dbr_lock);
 224         for (udbr = state->hs_user_dbr; udbr != NULL; udbr = udbr->udbr_link)
 225                 if (udbr->udbr_index == index)
 226                         break;
 227         if (udbr == NULL) {
 228                 IBTF_DPRINTF_L2("hermon", "free user dbr: udbr struct not "
 229                     "found for index %x", index);
 230                 mutex_exit(&state->hs_dbr_lock);
 231                 return;
 232         }
 233         for (pagep = udbr->udbr_pagep; pagep != NULL; pagep = pagep->upg_link)
 234                 if (pagep->upg_kvaddr == kvaddr)
 235                         break;
 236         if (pagep == NULL) {
 237                 IBTF_DPRINTF_L2("hermon", "free user dbr: pagep struct not"
 238                     " found for index %x, kvaddr %p, DBR index %x",
 239                     index, kvaddr, dbr_index);
 240                 mutex_exit(&state->hs_dbr_lock);
 241                 return;
 242         }
 243         if (pagep->upg_nfree >= max_free) {
 244                 IBTF_DPRINTF_L2("hermon", "free user dbr: overflow: "
 245                     "UCE index %x, DBR index %x", index, dbr_index);
 246                 mutex_exit(&state->hs_dbr_lock);
 247                 return;
 248         }
 249         ASSERT(dbr_index < max_free);
 250         i1 = dbr_index / 64;
 251         i2 = dbr_index % 64;
 252         ASSERT((pagep->upg_free[i1] & (1ul << i2)) == (1ul << i2));
 253         pagep->upg_free[i1] &= ~(1ul << i2);
 254         pagep->upg_nfree++;
 255         mutex_exit(&state->hs_dbr_lock);
 256 }
 257 
 258 /*
 259  * hermon_dbr_page_alloc()
 260  *      first page allocation - called from attach or open
 261  *      in this case, we want exactly one page per call, and aligned on a
 262  *      page - and may need to be mapped to the user for access
 263  */
 264 int
 265 hermon_dbr_page_alloc(hermon_state_t *state, hermon_dbr_info_t **dinfo)
 266 {
 267         int                     status;
 268         ddi_dma_handle_t        dma_hdl;
 269         ddi_acc_handle_t        acc_hdl;
 270         ddi_dma_attr_t          dma_attr;
 271         ddi_dma_cookie_t        cookie;
 272         uint_t                  cookie_cnt;
 273         int                     i;
 274         hermon_dbr_info_t       *info;
 275         caddr_t                 dmaaddr;
 276         uint64_t                dmalen;
 277         ulong_t                 pagesize = PAGESIZE;
 278 
 279         info = kmem_zalloc(sizeof (hermon_dbr_info_t), KM_SLEEP);
 280 
 281         /*
 282          * Initialize many of the default DMA attributes.  Then set additional
 283          * alignment restrictions if necessary for the dbr memory, meaning
 284          * page aligned.  Also use the configured value for IOMMU bypass
 285          */
 286         hermon_dma_attr_init(state, &dma_attr);
 287         dma_attr.dma_attr_align = pagesize;
 288         dma_attr.dma_attr_sgllen = 1;   /* make sure only one cookie */
 289 #ifdef  __sparc
 290         if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS)
 291                 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
 292 #endif
 293 
 294         status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
 295             DDI_DMA_SLEEP, NULL, &dma_hdl);
 296         if (status != DDI_SUCCESS) {
 297                 kmem_free((void *)info, sizeof (hermon_dbr_info_t));
 298                 cmn_err(CE_NOTE, "dbr DMA handle alloc failed\n");
 299                 return (DDI_FAILURE);
 300         }
 301 
 302         status = ddi_dma_mem_alloc(dma_hdl, pagesize,
 303             &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
 304             NULL, &dmaaddr, (size_t *)&dmalen, &acc_hdl);
 305         if (status != DDI_SUCCESS)      {
 306                 ddi_dma_free_handle(&dma_hdl);
 307                 cmn_err(CE_CONT, "dbr DMA mem alloc failed(status %d)", status);
 308                 kmem_free((void *)info, sizeof (hermon_dbr_info_t));
 309                 return (DDI_FAILURE);
 310         }
 311 
 312         /* this memory won't be IB registered, so do the bind here */
 313         status = ddi_dma_addr_bind_handle(dma_hdl, NULL,
 314             dmaaddr, (size_t)dmalen, DDI_DMA_RDWR |
 315             DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &cookie_cnt);
 316         if (status != DDI_SUCCESS) {
 317                 ddi_dma_mem_free(&acc_hdl);
 318                 ddi_dma_free_handle(&dma_hdl);
 319                 kmem_free((void *)info, sizeof (hermon_dbr_info_t));
 320                 cmn_err(CE_CONT, "dbr DMA bind handle failed (status %d)",
 321                     status);
 322                 return (DDI_FAILURE);
 323         }
 324         *dinfo = info;          /* Pass back the pointer */
 325 
 326         /* init the info structure with returned info */
 327         info->dbr_dmahdl = dma_hdl;
 328         info->dbr_acchdl = acc_hdl;
 329         info->dbr_page   = (hermon_dbr_t *)(void *)dmaaddr;
 330         info->dbr_link = NULL;
 331         /* extract the phys addr from the cookie */
 332         info->dbr_paddr = cookie.dmac_laddress;
 333         info->dbr_firstfree = 0;
 334         info->dbr_nfree = HERMON_NUM_DBR_PER_PAGE;
 335         /* link all DBrs onto the free list */
 336         for (i = 0; i < HERMON_NUM_DBR_PER_PAGE; i++) {
 337                 info->dbr_page[i] = i + 1;
 338         }
 339 
 340         return (DDI_SUCCESS);
 341 }
 342 
 343 
 344 /*
 345  * hermon_dbr_alloc()
 346  *      DBr record allocation - called from alloc cq/qp/srq
 347  *      will check for available dbrs in current
 348  *      page - if needed it will allocate another and link them
 349  */
 350 
 351 int
 352 hermon_dbr_alloc(hermon_state_t *state, uint_t index, ddi_acc_handle_t *acchdl,
 353     hermon_dbr_t **vdbr, uint64_t *pdbr, uint64_t *mapoffset)
 354 {
 355         hermon_dbr_t            *record = NULL;
 356         hermon_dbr_info_t       *info = NULL;
 357         uint32_t                idx;
 358         int                     status;
 359 
 360         if (index != state->hs_kernel_uar_index)
 361                 return (hermon_user_dbr_alloc(state, index, acchdl, vdbr, pdbr,
 362                     mapoffset));
 363 
 364         mutex_enter(&state->hs_dbr_lock);
 365         for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link)
 366                 if (info->dbr_nfree != 0)
 367                         break;          /* found a page w/ one available */
 368 
 369         if (info == NULL) {     /* did NOT find a page with one available */
 370                 status = hermon_dbr_page_alloc(state, &info);
 371                 if (status != DDI_SUCCESS) {
 372                         /* do error handling */
 373                         mutex_exit(&state->hs_dbr_lock);
 374                         return (DDI_FAILURE);
 375                 }
 376                 /* got a new page, so link it in. */
 377                 info->dbr_link = state->hs_kern_dbr;
 378                 state->hs_kern_dbr = info;
 379         }
 380         idx = info->dbr_firstfree;
 381         record = info->dbr_page + idx;
 382         info->dbr_firstfree = *record;
 383         info->dbr_nfree--;
 384         *record = 0;
 385 
 386         *acchdl = info->dbr_acchdl;
 387         *vdbr = record;
 388         *pdbr = info->dbr_paddr + idx * sizeof (hermon_dbr_t);
 389         mutex_exit(&state->hs_dbr_lock);
 390         return (DDI_SUCCESS);
 391 }
 392 
 393 /*
 394  * hermon_dbr_free()
 395  *      DBr record deallocation - called from free cq/qp
 396  *      will update the counter in the header, and invalidate
 397  *      the dbr, but will NEVER free pages of dbrs - small
 398  *      price to pay, but userland access never will anyway
 399  */
 400 void
 401 hermon_dbr_free(hermon_state_t *state, uint_t indx, hermon_dbr_t *record)
 402 {
 403         hermon_dbr_t            *page;
 404         hermon_dbr_info_t       *info;
 405 
 406         if (indx != state->hs_kernel_uar_index) {
 407                 hermon_user_dbr_free(state, indx, record);
 408                 return;
 409         }
 410         page = (hermon_dbr_t *)(uintptr_t)((uintptr_t)record & PAGEMASK);
 411         mutex_enter(&state->hs_dbr_lock);
 412         for (info = state->hs_kern_dbr; info != NULL; info = info->dbr_link)
 413                 if (info->dbr_page == page)
 414                         break;
 415         ASSERT(info != NULL);
 416         *record = info->dbr_firstfree;
 417         info->dbr_firstfree = record - info->dbr_page;
 418         info->dbr_nfree++;
 419         mutex_exit(&state->hs_dbr_lock);
 420 }
 421 
 422 /*
 423  * hermon_dbr_kern_free()
 424  *    Context: Can be called only from detach context.
 425  *
 426  *      Free all kernel dbr pages.  This includes the freeing of all the dma
 427  *      resources acquired during the allocation of the pages.
 428  *
 429  *      Also, free all the user dbr pages.
 430  */
 431 void
 432 hermon_dbr_kern_free(hermon_state_t *state)
 433 {
 434         hermon_dbr_info_t       *info, *link;
 435         hermon_user_dbr_t       *udbr, *next;
 436         hermon_udbr_page_t      *pagep, *nextp;
 437         hermon_umap_db_entry_t  *umapdb;
 438         int                     instance, status;
 439         uint64_t                value;
 440         extern                  hermon_umap_db_t hermon_userland_rsrc_db;
 441 
 442         mutex_enter(&state->hs_dbr_lock);
 443         for (info = state->hs_kern_dbr; info != NULL; info = link) {
 444                 (void) ddi_dma_unbind_handle(info->dbr_dmahdl);
 445                 ddi_dma_mem_free(&info->dbr_acchdl);     /* free page */
 446                 ddi_dma_free_handle(&info->dbr_dmahdl);
 447                 link = info->dbr_link;
 448                 kmem_free(info, sizeof (hermon_dbr_info_t));
 449         }
 450 
 451         udbr = state->hs_user_dbr;
 452         instance = state->hs_instance;
 453         mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
 454         while (udbr != NULL) {
 455                 pagep = udbr->udbr_pagep;
 456                 while (pagep != NULL) {
 457                         /* probably need to remove "db" */
 458                         (void) ddi_dma_unbind_handle(pagep->upg_dmahdl);
 459                         ddi_dma_free_handle(&pagep->upg_dmahdl);
 460                         freerbuf(pagep->upg_buf);
 461                         ddi_umem_free(pagep->upg_umemcookie);
 462                         status = hermon_umap_db_find_nolock(instance,
 463                             HERMON_DBR_KEY(udbr->udbr_index,
 464                             pagep->upg_index), MLNX_UMAP_DBRMEM_RSRC,
 465                             &value, HERMON_UMAP_DB_REMOVE, &umapdb);
 466                         if (status == DDI_SUCCESS)
 467                                 hermon_umap_db_free(umapdb);
 468                         kmem_free(pagep->upg_free,
 469                             PAGESIZE / sizeof (hermon_dbr_t) / 8);
 470                         nextp = pagep->upg_link;
 471                         kmem_free(pagep, sizeof (*pagep));
 472                         pagep = nextp;
 473                 }
 474                 next = udbr->udbr_link;
 475                 kmem_free(udbr, sizeof (*udbr));
 476                 udbr = next;
 477         }
 478         mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
 479         mutex_exit(&state->hs_dbr_lock);
 480 }
 481 
 482 /*
 483  * hermon_ah_alloc()
 484  *    Context: Can be called only from user or kernel context.
 485  */
 486 int
 487 hermon_ah_alloc(hermon_state_t *state, hermon_pdhdl_t pd,
 488     ibt_adds_vect_t *attr_p, hermon_ahhdl_t *ahhdl, uint_t sleepflag)
 489 {
 490         hermon_rsrc_t           *rsrc;
 491         hermon_hw_udav_t        *udav;
 492         hermon_ahhdl_t          ah;
 493         int                     status;
 494 
 495         /*
 496          * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to
 497          * indicate that we wish to allocate an "invalid" (i.e. empty)
 498          * address handle XXX
 499          */
 500 
 501         /* Validate that specified port number is legal */
 502         if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) {
 503                 return (IBT_HCA_PORT_INVALID);
 504         }
 505 
 506         /*
 507          * Allocate the software structure for tracking the address handle
 508          * (i.e. the Hermon Address Handle struct).
 509          */
 510         status = hermon_rsrc_alloc(state, HERMON_AHHDL, 1, sleepflag, &rsrc);
 511         if (status != DDI_SUCCESS) {
 512                 return (IBT_INSUFF_RESOURCE);
 513         }
 514         ah = (hermon_ahhdl_t)rsrc->hr_addr;
 515         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
 516 
 517         /* Increment the reference count on the protection domain (PD) */
 518         hermon_pd_refcnt_inc(pd);
 519 
 520         udav = (hermon_hw_udav_t *)kmem_zalloc(sizeof (hermon_hw_udav_t),
 521             KM_SLEEP);
 522         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav))
 523 
 524         /*
 525          * Fill in the UDAV data. We first zero out the UDAV, then populate
 526          * it by then calling hermon_set_addr_path() to fill in the common
 527          * portions that can be pulled from the "ibt_adds_vect_t" passed in
 528          */
 529         status = hermon_set_addr_path(state, attr_p,
 530             (hermon_hw_addr_path_t *)udav, HERMON_ADDRPATH_UDAV);
 531         if (status != DDI_SUCCESS) {
 532                 hermon_pd_refcnt_dec(pd);
 533                 hermon_rsrc_free(state, &rsrc);
 534                 return (status);
 535         }
 536         udav->pd     = pd->pd_pdnum;
 537         udav->sl     = attr_p->av_srvl;
 538 
 539         /*
 540          * Fill in the rest of the Hermon Address Handle struct.
 541          *
 542          * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field
 543          * here because we may need to return it later to the IBTF (as a
 544          * result of a subsequent query operation).  Unlike the other UDAV
 545          * parameters, the value of "av_dgid.gid_guid" is not always preserved.
 546          * The reason for this is described in hermon_set_addr_path().
 547          */
 548         ah->ah_rsrcp  = rsrc;
 549         ah->ah_pdhdl  = pd;
 550         ah->ah_udav   = udav;
 551         ah->ah_save_guid = attr_p->av_dgid.gid_guid;
 552         *ahhdl = ah;
 553 
 554         return (DDI_SUCCESS);
 555 }
 556 
 557 
 558 /*
 559  * hermon_ah_free()
 560  *    Context: Can be called only from user or kernel context.
 561  */
 562 /* ARGSUSED */
 563 int
 564 hermon_ah_free(hermon_state_t *state, hermon_ahhdl_t *ahhdl, uint_t sleepflag)
 565 {
 566         hermon_rsrc_t           *rsrc;
 567         hermon_pdhdl_t          pd;
 568         hermon_ahhdl_t          ah;
 569 
 570         /*
 571          * Pull all the necessary information from the Hermon Address Handle
 572          * struct.  This is necessary here because the resource for the
 573          * AH is going to be freed up as part of this operation.
 574          */
 575         ah    = *ahhdl;
 576         mutex_enter(&ah->ah_lock);
 577         rsrc  = ah->ah_rsrcp;
 578         pd    = ah->ah_pdhdl;
 579         mutex_exit(&ah->ah_lock);
 580         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
 581 
 582         /* Free the UDAV memory */
 583         kmem_free(ah->ah_udav, sizeof (hermon_hw_udav_t));
 584 
 585         /* Decrement the reference count on the protection domain (PD) */
 586         hermon_pd_refcnt_dec(pd);
 587 
 588         /* Free the Hermon Address Handle structure */
 589         hermon_rsrc_free(state, &rsrc);
 590 
 591         /* Set the ahhdl pointer to NULL and return success */
 592         *ahhdl = NULL;
 593 
 594         return (DDI_SUCCESS);
 595 }
 596 
 597 
 598 /*
 599  * hermon_ah_query()
 600  *    Context: Can be called from interrupt or base context.
 601  */
 602 /* ARGSUSED */
 603 int
 604 hermon_ah_query(hermon_state_t *state, hermon_ahhdl_t ah, hermon_pdhdl_t *pd,
 605     ibt_adds_vect_t *attr_p)
 606 {
 607         mutex_enter(&ah->ah_lock);
 608         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p))
 609 
 610         /*
 611          * Pull the PD and UDAV from the Hermon Address Handle structure
 612          */
 613         *pd = ah->ah_pdhdl;
 614 
 615         /*
 616          * Fill in "ibt_adds_vect_t".  We call hermon_get_addr_path() to fill
 617          * the common portions that can be pulled from the UDAV we pass in.
 618          *
 619          * NOTE: We will also fill the "av_dgid.gid_guid" field from the
 620          * "ah_save_guid" field we have previously saved away.  The reason
 621          * for this is described in hermon_ah_alloc() and hermon_ah_modify().
 622          */
 623         hermon_get_addr_path(state, (hermon_hw_addr_path_t *)ah->ah_udav,
 624             attr_p, HERMON_ADDRPATH_UDAV);
 625 
 626         attr_p->av_dgid.gid_guid = ah->ah_save_guid;
 627 
 628         mutex_exit(&ah->ah_lock);
 629         return (DDI_SUCCESS);
 630 }
 631 
 632 
 633 /*
 634  * hermon_ah_modify()
 635  *    Context: Can be called from interrupt or base context.
 636  */
 637 /* ARGSUSED */
 638 int
 639 hermon_ah_modify(hermon_state_t *state, hermon_ahhdl_t ah,
 640     ibt_adds_vect_t *attr_p)
 641 {
 642         hermon_hw_udav_t        old_udav;
 643         uint64_t                data_old;
 644         int                     status, size, i;
 645 
 646         /* Validate that specified port number is legal */
 647         if (!hermon_portnum_is_valid(state, attr_p->av_port_num)) {
 648                 return (IBT_HCA_PORT_INVALID);
 649         }
 650 
 651         mutex_enter(&ah->ah_lock);
 652 
 653         /* Save a copy of the current UDAV data in old_udav. */
 654         bcopy(ah->ah_udav, &old_udav, sizeof (hermon_hw_udav_t));
 655 
 656         /*
 657          * Fill in the new UDAV with the caller's data, passed in via the
 658          * "ibt_adds_vect_t" structure.
 659          *
 660          * NOTE: We also need to save away a copy of the "av_dgid.gid_guid"
 661          * field here (just as we did during hermon_ah_alloc()) because we
 662          * may need to return it later to the IBTF (as a result of a
 663          * subsequent query operation).  As explained in hermon_ah_alloc(),
 664          * unlike the other UDAV parameters, the value of "av_dgid.gid_guid"
 665          * is not always preserved. The reason for this is described in
 666          * hermon_set_addr_path().
 667          */
 668         status = hermon_set_addr_path(state, attr_p,
 669             (hermon_hw_addr_path_t *)ah->ah_udav, HERMON_ADDRPATH_UDAV);
 670         if (status != DDI_SUCCESS) {
 671                 mutex_exit(&ah->ah_lock);
 672                 return (status);
 673         }
 674         ah->ah_save_guid = attr_p->av_dgid.gid_guid;
 675         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(ah->ah_udav)))
 676         ah->ah_udav->sl  = attr_p->av_srvl;
 677 
 678         /*
 679          * Copy changes into the new UDAV.
 680          *    Note:  We copy in 64-bit chunks.  For the first two of these
 681          *    chunks it is necessary to read the current contents of the
 682          *    UDAV, mask off the modifiable portions (maintaining any
 683          *    of the "reserved" portions), and then mask on the new data.
 684          */
 685         size = sizeof (hermon_hw_udav_t) >> 3;
 686         for (i = 0; i < size; i++) {
 687                 data_old = ((uint64_t *)&old_udav)[i];
 688 
 689                 /*
 690                  * Apply mask to change only the relevant values.
 691                  */
 692                 if (i == 0) {
 693                         data_old = data_old & HERMON_UDAV_MODIFY_MASK0;
 694                 } else if (i == 1) {
 695                         data_old = data_old & HERMON_UDAV_MODIFY_MASK1;
 696                 } else {
 697                         data_old = 0;
 698                 }
 699 
 700                 /* Store the updated values to the UDAV */
 701                 ((uint64_t *)ah->ah_udav)[i] |= data_old;
 702         }
 703 
 704         /*
 705          * Put the valid PD number back into the UDAV entry, as it
 706          * might have been clobbered above.
 707          */
 708         ah->ah_udav->pd = old_udav.pd;
 709 
 710 
 711         mutex_exit(&ah->ah_lock);
 712         return (DDI_SUCCESS);
 713 }
 714 
 715 /*
 716  * hermon_mcg_attach()
 717  *    Context: Can be called only from user or kernel context.
 718  */
 719 int
 720 hermon_mcg_attach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid,
 721     ib_lid_t lid)
 722 {
 723         hermon_rsrc_t           *rsrc;
 724         hermon_hw_mcg_t         *mcg_entry;
 725         hermon_hw_mcg_qp_list_t *mcg_entry_qplist;
 726         hermon_mcghdl_t         mcg, newmcg;
 727         uint64_t                mgid_hash;
 728         uint32_t                end_indx;
 729         int                     status;
 730         uint_t                  qp_found;
 731 
 732         /*
 733          * It is only allowed to attach MCG to UD queue pairs.  Verify
 734          * that the intended QP is of the appropriate transport type
 735          */
 736         if (qp->qp_serv_type != HERMON_QP_UD) {
 737                 return (IBT_QP_SRV_TYPE_INVALID);
 738         }
 739 
 740         /*
 741          * Check for invalid Multicast DLID.  Specifically, all Multicast
 742          * LIDs should be within a well defined range.  If the specified LID
 743          * is outside of that range, then return an error.
 744          */
 745         if (hermon_mlid_is_valid(lid) == 0) {
 746                 return (IBT_MC_MLID_INVALID);
 747         }
 748         /*
 749          * Check for invalid Multicast GID.  All Multicast GIDs should have
 750          * a well-defined pattern of bits and flags that are allowable.  If
 751          * the specified GID does not meet the criteria, then return an error.
 752          */
 753         if (hermon_mgid_is_valid(gid) == 0) {
 754                 return (IBT_MC_MGID_INVALID);
 755         }
 756 
 757         /*
 758          * Compute the MGID hash value.  Since the MCG table is arranged as
 759          * a number of separate hash chains, this operation converts the
 760          * specified MGID into the starting index of an entry in the hash
 761          * table (i.e. the index for the start of the appropriate hash chain).
 762          * Subsequent operations below will walk the chain searching for the
 763          * right place to add this new QP.
 764          */
 765         status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
 766             &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT());
 767         if (status != HERMON_CMD_SUCCESS) {
 768                 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n",
 769                     status);
 770                 if (status == HERMON_CMD_INVALID_STATUS) {
 771                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 772                 }
 773                 return (ibc_get_ci_failure(0));
 774         }
 775 
 776         /*
 777          * Grab the multicast group mutex.  Then grab the pre-allocated
 778          * temporary buffer used for holding and/or modifying MCG entries.
 779          * Zero out the temporary MCG entry before we begin.
 780          */
 781         mutex_enter(&state->hs_mcglock);
 782         mcg_entry = state->hs_mcgtmp;
 783         mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry);
 784         bzero(mcg_entry, HERMON_MCGMEM_SZ(state));
 785 
 786         /*
 787          * Walk through the array of MCG entries starting at "mgid_hash".
 788          * Try to find the appropriate place for this new QP to be added.
 789          * This could happen when the first entry of the chain has MGID == 0
 790          * (which means that the hash chain is empty), or because we find
 791          * an entry with the same MGID (in which case we'll add the QP to
 792          * that MCG), or because we come to the end of the chain (in which
 793          * case this is the first QP being added to the multicast group that
 794          * corresponds to the MGID.  The hermon_mcg_walk_mgid_hash() routine
 795          * walks the list and returns an index into the MCG table.  The entry
 796          * at this index is then checked to determine which case we have
 797          * fallen into (see below).  Note:  We are using the "shadow" MCG
 798          * list (of hermon_mcg_t structs) for this lookup because the real
 799          * MCG entries are in hardware (and the lookup process would be much
 800          * more time consuming).
 801          */
 802         end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL);
 803         mcg      = &state->hs_mcghdl[end_indx];
 804 
 805         /*
 806          * If MGID == 0, then the hash chain is empty.  Just fill in the
 807          * current entry.  Note:  No need to allocate an MCG table entry
 808          * as all the hash chain "heads" are already preallocated.
 809          */
 810         if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) {
 811 
 812                 /* Fill in the current entry in the "shadow" MCG list */
 813                 hermon_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL);
 814 
 815                 /*
 816                  * Try to add the new QP number to the list.  This (and the
 817                  * above) routine fills in a temporary MCG.  The "mcg_entry"
 818                  * and "mcg_entry_qplist" pointers simply point to different
 819                  * offsets within the same temporary copy of the MCG (for
 820                  * convenience).  Note:  If this fails, we need to invalidate
 821                  * the entries we've already put into the "shadow" list entry
 822                  * above.
 823                  */
 824                 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
 825                     &qp_found);
 826                 if (status != DDI_SUCCESS) {
 827                         bzero(mcg, sizeof (struct hermon_sw_mcg_list_s));
 828                         mutex_exit(&state->hs_mcglock);
 829                         return (status);
 830                 }
 831                 if (!qp_found)
 832                         mcg_entry->member_cnt = (mcg->mcg_num_qps + 1);
 833                             /* set the member count */
 834 
 835                 /*
 836                  * Once the temporary MCG has been filled in, write the entry
 837                  * into the appropriate location in the Hermon MCG entry table.
 838                  * If it's successful, then drop the lock and return success.
 839                  * Note: In general, this operation shouldn't fail.  If it
 840                  * does, then it is an indication that something (probably in
 841                  * HW, but maybe in SW) has gone seriously wrong.  We still
 842                  * want to zero out the entries that we've filled in above
 843                  * (in the hermon_mcg_setup_new_hdr() routine).
 844                  */
 845                 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
 846                     HERMON_CMD_NOSLEEP_SPIN);
 847                 if (status != HERMON_CMD_SUCCESS) {
 848                         bzero(mcg, sizeof (struct hermon_sw_mcg_list_s));
 849                         mutex_exit(&state->hs_mcglock);
 850                         HERMON_WARNING(state, "failed to write MCG entry");
 851                         cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
 852                             "%08x\n", status);
 853                         if (status == HERMON_CMD_INVALID_STATUS) {
 854                                 hermon_fm_ereport(state, HCA_SYS_ERR,
 855                                     HCA_ERR_SRV_LOST);
 856                         }
 857                         return (ibc_get_ci_failure(0));
 858                 }
 859 
 860                 /*
 861                  * Now that we know all the Hermon firmware accesses have been
 862                  * successful, we update the "shadow" MCG entry by incrementing
 863                  * the "number of attached QPs" count.
 864                  *
 865                  * We increment only if the QP is not already part of the
 866                  * MCG by checking the 'qp_found' flag returned from the
 867                  * qplist_add above.
 868                  */
 869                 if (!qp_found) {
 870                         mcg->mcg_num_qps++;
 871 
 872                         /*
 873                          * Increment the refcnt for this QP.  Because the QP
 874                          * was added to this MCG, the refcnt must be
 875                          * incremented.
 876                          */
 877                         hermon_qp_mcg_refcnt_inc(qp);
 878                 }
 879 
 880                 /*
 881                  * We drop the lock and return success.
 882                  */
 883                 mutex_exit(&state->hs_mcglock);
 884                 return (DDI_SUCCESS);
 885         }
 886 
 887         /*
 888          * If the specified MGID matches the MGID in the current entry, then
 889          * we need to try to add the QP to the current MCG entry.  In this
 890          * case, it means that we need to read the existing MCG entry (into
 891          * the temporary MCG), add the new QP number to the temporary entry
 892          * (using the same method we used above), and write the entry back
 893          * to the hardware (same as above).
 894          */
 895         if ((mcg->mcg_mgid_h == gid.gid_prefix) &&
 896             (mcg->mcg_mgid_l == gid.gid_guid)) {
 897 
 898                 /*
 899                  * Read the current MCG entry into the temporary MCG.  Note:
 900                  * In general, this operation shouldn't fail.  If it does,
 901                  * then it is an indication that something (probably in HW,
 902                  * but maybe in SW) has gone seriously wrong.
 903                  */
 904                 status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
 905                     HERMON_CMD_NOSLEEP_SPIN);
 906                 if (status != HERMON_CMD_SUCCESS) {
 907                         mutex_exit(&state->hs_mcglock);
 908                         HERMON_WARNING(state, "failed to read MCG entry");
 909                         cmn_err(CE_CONT, "Hermon: READ_MGM command failed: "
 910                             "%08x\n", status);
 911                         if (status == HERMON_CMD_INVALID_STATUS) {
 912                                 hermon_fm_ereport(state, HCA_SYS_ERR,
 913                                     HCA_ERR_SRV_LOST);
 914                         }
 915                         return (ibc_get_ci_failure(0));
 916                 }
 917 
 918                 /*
 919                  * Try to add the new QP number to the list.  This routine
 920                  * fills in the necessary pieces of the temporary MCG.  The
 921                  * "mcg_entry_qplist" pointer is used to point to the portion
 922                  * of the temporary MCG that holds the QP numbers.
 923                  *
 924                  * Note: hermon_mcg_qplist_add() returns SUCCESS if it
 925                  * already found the QP in the list.  In this case, the QP is
 926                  * not added on to the list again.  Check the flag 'qp_found'
 927                  * if this value is needed to be known.
 928                  *
 929                  */
 930                 status = hermon_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
 931                     &qp_found);
 932                 if (status != DDI_SUCCESS) {
 933                         mutex_exit(&state->hs_mcglock);
 934                         return (status);
 935                 }
 936                 if (!qp_found)
 937                         mcg_entry->member_cnt = (mcg->mcg_num_qps + 1);
 938                             /* set the member count */
 939 
 940                 /*
 941                  * Once the temporary MCG has been updated, write the entry
 942                  * into the appropriate location in the Hermon MCG entry table.
 943                  * If it's successful, then drop the lock and return success.
 944                  * Note: In general, this operation shouldn't fail.  If it
 945                  * does, then it is an indication that something (probably in
 946                  * HW, but maybe in SW) has gone seriously wrong.
 947                  */
 948                 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
 949                     HERMON_CMD_NOSLEEP_SPIN);
 950                 if (status != HERMON_CMD_SUCCESS) {
 951                         mutex_exit(&state->hs_mcglock);
 952                         HERMON_WARNING(state, "failed to write MCG entry");
 953                         cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
 954                             "%08x\n", status);
 955                         if (status == HERMON_CMD_INVALID_STATUS) {
 956                                 hermon_fm_ereport(state, HCA_SYS_ERR,
 957                                     HCA_ERR_SRV_LOST);
 958                         }
 959                         return (ibc_get_ci_failure(0));
 960                 }
 961 
 962                 /*
 963                  * Now that we know all the Hermon firmware accesses have been
 964                  * successful, we update the current "shadow" MCG entry by
 965                  * incrementing the "number of attached QPs" count.
 966                  *
 967                  * We increment only if the QP is not already part of the
 968                  * MCG by checking the 'qp_found' flag returned
 969                  * hermon_mcg_walk_mgid_hashfrom the qplist_add above.
 970                  */
 971                 if (!qp_found) {
 972                         mcg->mcg_num_qps++;
 973 
 974                         /*
 975                          * Increment the refcnt for this QP.  Because the QP
 976                          * was added to this MCG, the refcnt must be
 977                          * incremented.
 978                          */
 979                         hermon_qp_mcg_refcnt_inc(qp);
 980                 }
 981 
 982                 /*
 983                  * We drop the lock and return success.
 984                  */
 985                 mutex_exit(&state->hs_mcglock);
 986                 return (DDI_SUCCESS);
 987         }
 988 
 989         /*
 990          * If we've reached here, then we're at the end of the hash chain.
 991          * We need to allocate a new MCG entry, fill it in, write it to Hermon,
 992          * and update the previous entry to link the new one to the end of the
 993          * chain.
 994          */
 995 
 996         /*
 997          * Allocate an MCG table entry.  This will be filled in with all
 998          * the necessary parameters to define the multicast group.  Then it
 999          * will be written to the hardware in the next-to-last step below.
1000          */
1001         status = hermon_rsrc_alloc(state, HERMON_MCG, 1, HERMON_NOSLEEP, &rsrc);
1002         if (status != DDI_SUCCESS) {
1003                 mutex_exit(&state->hs_mcglock);
1004                 return (IBT_INSUFF_RESOURCE);
1005         }
1006 
1007         /*
1008          * Fill in the new entry in the "shadow" MCG list.  Note:  Just as
1009          * it does above, hermon_mcg_setup_new_hdr() also fills in a portion
1010          * of the temporary MCG entry (the rest of which will be filled in by
1011          * hermon_mcg_qplist_add() below)
1012          */
1013         newmcg = &state->hs_mcghdl[rsrc->hr_indx];
1014         hermon_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc);
1015 
1016         /*
1017          * Try to add the new QP number to the list.  This routine fills in
1018          * the final necessary pieces of the temporary MCG.  The
1019          * "mcg_entry_qplist" pointer is used to point to the portion of the
1020          * temporary MCG that holds the QP numbers.  If we fail here, we
1021          * must undo the previous resource allocation.
1022          *
1023          * Note: hermon_mcg_qplist_add() can we return SUCCESS if it already
1024          * found the QP in the list.  In this case, the QP is not added on to
1025          * the list again.  Check the flag 'qp_found' if this value is needed
1026          * to be known.
1027          */
1028         status = hermon_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp,
1029             &qp_found);
1030         if (status != DDI_SUCCESS) {
1031                 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1032                 hermon_rsrc_free(state, &rsrc);
1033                 mutex_exit(&state->hs_mcglock);
1034                 return (status);
1035         }
1036         mcg_entry->member_cnt = (newmcg->mcg_num_qps + 1);
1037             /* set the member count */
1038 
1039         /*
1040          * Once the temporary MCG has been updated, write the entry into the
1041          * appropriate location in the Hermon MCG entry table.  If this is
1042          * successful, then we need to chain the previous entry to this one.
1043          * Note: In general, this operation shouldn't fail.  If it does, then
1044          * it is an indication that something (probably in HW, but maybe in
1045          * SW) has gone seriously wrong.
1046          */
1047         status = hermon_write_mgm_cmd_post(state, mcg_entry, rsrc->hr_indx,
1048             HERMON_CMD_NOSLEEP_SPIN);
1049         if (status != HERMON_CMD_SUCCESS) {
1050                 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1051                 hermon_rsrc_free(state, &rsrc);
1052                 mutex_exit(&state->hs_mcglock);
1053                 HERMON_WARNING(state, "failed to write MCG entry");
1054                 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1055                     status);
1056                 if (status == HERMON_CMD_INVALID_STATUS) {
1057                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1058                 }
1059                 return (ibc_get_ci_failure(0));
1060         }
1061 
1062         /*
1063          * Now read the current MCG entry (the one previously at the end of
1064          * hash chain) into the temporary MCG.  We are going to update its
1065          * "next_gid_indx" now and write the entry back to the MCG table.
1066          * Note:  In general, this operation shouldn't fail.  If it does, then
1067          * it is an indication that something (probably in HW, but maybe in SW)
1068          * has gone seriously wrong.  We will free up the MCG entry resource,
1069          * but we will not undo the previously written MCG entry in the HW.
1070          * This is OK, though, because the MCG entry is not currently attached
1071          * to any hash chain.
1072          */
1073         status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
1074             HERMON_CMD_NOSLEEP_SPIN);
1075         if (status != HERMON_CMD_SUCCESS) {
1076                 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1077                 hermon_rsrc_free(state, &rsrc);
1078                 mutex_exit(&state->hs_mcglock);
1079                 HERMON_WARNING(state, "failed to read MCG entry");
1080                 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1081                     status);
1082                 if (status == HERMON_CMD_INVALID_STATUS) {
1083                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1084                 }
1085                 return (ibc_get_ci_failure(0));
1086         }
1087 
1088         /*
1089          * Finally, we update the "next_gid_indx" field in the temporary MCG
1090          * and attempt to write the entry back into the Hermon MCG table.  If
1091          * this succeeds, then we update the "shadow" list to reflect the
1092          * change, drop the lock, and return success.  Note:  In general, this
1093          * operation shouldn't fail.  If it does, then it is an indication
1094          * that something (probably in HW, but maybe in SW) has gone seriously
1095          * wrong.  Just as we do above, we will free up the MCG entry resource,
1096          * but we will not try to undo the previously written MCG entry.  This
1097          * is OK, though, because (since we failed here to update the end of
1098          * the chain) that other entry is not currently attached to any chain.
1099          */
1100         mcg_entry->next_gid_indx = rsrc->hr_indx;
1101         status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
1102             HERMON_CMD_NOSLEEP_SPIN);
1103         if (status != HERMON_CMD_SUCCESS) {
1104                 bzero(newmcg, sizeof (struct hermon_sw_mcg_list_s));
1105                 hermon_rsrc_free(state, &rsrc);
1106                 mutex_exit(&state->hs_mcglock);
1107                 HERMON_WARNING(state, "failed to write MCG entry");
1108                 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1109                     status);
1110                 if (status == HERMON_CMD_INVALID_STATUS) {
1111                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1112                 }
1113                 return (ibc_get_ci_failure(0));
1114         }
1115         mcg = &state->hs_mcghdl[end_indx];
1116         mcg->mcg_next_indx = rsrc->hr_indx;
1117 
1118         /*
1119          * Now that we know all the Hermon firmware accesses have been
1120          * successful, we update the new "shadow" MCG entry by incrementing
1121          * the "number of attached QPs" count.  Then we drop the lock and
1122          * return success.
1123          */
1124         newmcg->mcg_num_qps++;
1125 
1126         /*
1127          * Increment the refcnt for this QP.  Because the QP
1128          * was added to this MCG, the refcnt must be
1129          * incremented.
1130          */
1131         hermon_qp_mcg_refcnt_inc(qp);
1132 
1133         mutex_exit(&state->hs_mcglock);
1134         return (DDI_SUCCESS);
1135 }
1136 
1137 
1138 /*
1139  * hermon_mcg_detach()
1140  *    Context: Can be called only from user or kernel context.
1141  */
1142 int
1143 hermon_mcg_detach(hermon_state_t *state, hermon_qphdl_t qp, ib_gid_t gid,
1144     ib_lid_t lid)
1145 {
1146         hermon_hw_mcg_t         *mcg_entry;
1147         hermon_hw_mcg_qp_list_t *mcg_entry_qplist;
1148         hermon_mcghdl_t         mcg;
1149         uint64_t                mgid_hash;
1150         uint32_t                end_indx, prev_indx;
1151         int                     status;
1152 
1153         /*
1154          * Check for invalid Multicast DLID.  Specifically, all Multicast
1155          * LIDs should be within a well defined range.  If the specified LID
1156          * is outside of that range, then return an error.
1157          */
1158         if (hermon_mlid_is_valid(lid) == 0) {
1159                 return (IBT_MC_MLID_INVALID);
1160         }
1161 
1162         /*
1163          * Compute the MGID hash value.  As described above, the MCG table is
1164          * arranged as a number of separate hash chains.  This operation
1165          * converts the specified MGID into the starting index of an entry in
1166          * the hash table (i.e. the index for the start of the appropriate
1167          * hash chain).  Subsequent operations below will walk the chain
1168          * searching for a matching entry from which to attempt to remove
1169          * the specified QP.
1170          */
1171         status = hermon_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
1172             &mgid_hash, HERMON_SLEEPFLAG_FOR_CONTEXT());
1173         if (status != HERMON_CMD_SUCCESS) {
1174                 cmn_err(CE_CONT, "Hermon: MGID_HASH command failed: %08x\n",
1175                     status);
1176                 if (status == HERMON_CMD_INVALID_STATUS) {
1177                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1178                 }
1179                 return (ibc_get_ci_failure(0));
1180         }
1181 
1182         /*
1183          * Grab the multicast group mutex.  Then grab the pre-allocated
1184          * temporary buffer used for holding and/or modifying MCG entries.
1185          */
1186         mutex_enter(&state->hs_mcglock);
1187         mcg_entry = state->hs_mcgtmp;
1188         mcg_entry_qplist = HERMON_MCG_GET_QPLIST_PTR(mcg_entry);
1189 
1190         /*
1191          * Walk through the array of MCG entries starting at "mgid_hash".
1192          * Try to find an MCG entry with a matching MGID.  The
1193          * hermon_mcg_walk_mgid_hash() routine walks the list and returns an
1194          * index into the MCG table.  The entry at this index is checked to
1195          * determine whether it is a match or not.  If it is a match, then
1196          * we continue on to attempt to remove the QP from the MCG.  If it
1197          * is not a match (or not a valid MCG entry), then we return an error.
1198          */
1199         end_indx = hermon_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx);
1200         mcg      = &state->hs_mcghdl[end_indx];
1201 
1202         /*
1203          * If MGID == 0 (the hash chain is empty) or if the specified MGID
1204          * does not match the MGID in the current entry, then return
1205          * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not
1206          * valid).
1207          */
1208         if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) ||
1209             ((mcg->mcg_mgid_h != gid.gid_prefix) ||
1210             (mcg->mcg_mgid_l != gid.gid_guid))) {
1211                 mutex_exit(&state->hs_mcglock);
1212                 return (IBT_MC_MGID_INVALID);
1213         }
1214 
1215         /*
1216          * Read the current MCG entry into the temporary MCG.  Note: In
1217          * general, this operation shouldn't fail.  If it does, then it is
1218          * an indication that something (probably in HW, but maybe in SW)
1219          * has gone seriously wrong.
1220          */
1221         status = hermon_read_mgm_cmd_post(state, mcg_entry, end_indx,
1222             HERMON_CMD_NOSLEEP_SPIN);
1223         if (status != HERMON_CMD_SUCCESS) {
1224                 mutex_exit(&state->hs_mcglock);
1225                 HERMON_WARNING(state, "failed to read MCG entry");
1226                 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1227                     status);
1228                 if (status == HERMON_CMD_INVALID_STATUS) {
1229                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1230                 }
1231                 return (ibc_get_ci_failure(0));
1232         }
1233 
1234         /*
1235          * Search the QP number list for a match.  If a match is found, then
1236          * remove the entry from the QP list.  Otherwise, if no match is found,
1237          * return an error.
1238          */
1239         status = hermon_mcg_qplist_remove(mcg, mcg_entry_qplist, qp);
1240         if (status != DDI_SUCCESS) {
1241                 mutex_exit(&state->hs_mcglock);
1242                 return (status);
1243         }
1244 
1245         /*
1246          * Decrement the MCG count for this QP.  When the 'qp_mcg'
1247          * field becomes 0, then this QP is no longer a member of any
1248          * MCG.
1249          */
1250         hermon_qp_mcg_refcnt_dec(qp);
1251 
1252         /*
1253          * If the current MCG's QP number list is about to be made empty
1254          * ("mcg_num_qps" == 1), then remove the entry itself from the hash
1255          * chain.  Otherwise, just write the updated MCG entry back to the
1256          * hardware.  In either case, once we successfully update the hardware
1257          * chain, then we decrement the "shadow" list entry's "mcg_num_qps"
1258          * count (or zero out the entire "shadow" list entry) before returning
1259          * success.  Note:  Zeroing out the "shadow" list entry is done
1260          * inside of hermon_mcg_hash_list_remove().
1261          */
1262         if (mcg->mcg_num_qps == 1) {
1263 
1264                 /* Remove an MCG entry from the hash chain */
1265                 status = hermon_mcg_hash_list_remove(state, end_indx, prev_indx,
1266                     mcg_entry);
1267                 if (status != DDI_SUCCESS) {
1268                         mutex_exit(&state->hs_mcglock);
1269                         return (status);
1270                 }
1271 
1272         } else {
1273                 /*
1274                  * Write the updated MCG entry back to the Hermon MCG table.
1275                  * If this succeeds, then we update the "shadow" list to
1276                  * reflect the change (i.e. decrement the "mcg_num_qps"),
1277                  * drop the lock, and return success.  Note:  In general,
1278                  * this operation shouldn't fail.  If it does, then it is an
1279                  * indication that something (probably in HW, but maybe in SW)
1280                  * has gone seriously wrong.
1281                  */
1282                 mcg_entry->member_cnt = (mcg->mcg_num_qps - 1);
1283                 status = hermon_write_mgm_cmd_post(state, mcg_entry, end_indx,
1284                     HERMON_CMD_NOSLEEP_SPIN);
1285                 if (status != HERMON_CMD_SUCCESS) {
1286                         mutex_exit(&state->hs_mcglock);
1287                         HERMON_WARNING(state, "failed to write MCG entry");
1288                         cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
1289                             "%08x\n", status);
1290                         if (status == HERMON_CMD_INVALID_STATUS) {
1291                                 hermon_fm_ereport(state, HCA_SYS_ERR,
1292                                     HCA_ERR_SRV_LOST);
1293                         }
1294                         return (ibc_get_ci_failure(0));
1295                 }
1296                 mcg->mcg_num_qps--;
1297         }
1298 
1299         mutex_exit(&state->hs_mcglock);
1300         return (DDI_SUCCESS);
1301 }
1302 
1303 /*
1304  * hermon_qp_mcg_refcnt_inc()
1305  *    Context: Can be called from interrupt or base context.
1306  */
1307 static void
1308 hermon_qp_mcg_refcnt_inc(hermon_qphdl_t qp)
1309 {
1310         /* Increment the QP's MCG reference count */
1311         mutex_enter(&qp->qp_lock);
1312         qp->qp_mcg_refcnt++;
1313         mutex_exit(&qp->qp_lock);
1314 }
1315 
1316 
1317 /*
1318  * hermon_qp_mcg_refcnt_dec()
1319  *    Context: Can be called from interrupt or base context.
1320  */
1321 static void
1322 hermon_qp_mcg_refcnt_dec(hermon_qphdl_t qp)
1323 {
1324         /* Decrement the QP's MCG reference count */
1325         mutex_enter(&qp->qp_lock);
1326         qp->qp_mcg_refcnt--;
1327         mutex_exit(&qp->qp_lock);
1328 }
1329 
1330 
1331 /*
1332  * hermon_mcg_qplist_add()
1333  *    Context: Can be called from interrupt or base context.
1334  */
1335 static int
1336 hermon_mcg_qplist_add(hermon_state_t *state, hermon_mcghdl_t mcg,
1337     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp,
1338     uint_t *qp_found)
1339 {
1340         uint_t          qplist_indx;
1341 
1342         ASSERT(MUTEX_HELD(&state->hs_mcglock));
1343 
1344         qplist_indx = mcg->mcg_num_qps;
1345 
1346         /*
1347          * Determine if we have exceeded the maximum number of QP per
1348          * multicast group.  If we have, then return an error
1349          */
1350         if (qplist_indx >= state->hs_cfg_profile->cp_num_qp_per_mcg) {
1351                 return (IBT_HCA_MCG_QP_EXCEEDED);
1352         }
1353 
1354         /*
1355          * Determine if the QP is already attached to this MCG table.  If it
1356          * is, then we break out and treat this operation as a NO-OP
1357          */
1358         for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps;
1359             qplist_indx++) {
1360                 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) {
1361                         break;
1362                 }
1363         }
1364 
1365         /*
1366          * If the QP was already on the list, set 'qp_found' to TRUE.  We still
1367          * return SUCCESS in this case, but the qplist will not have been
1368          * updated because the QP was already on the list.
1369          */
1370         if (qplist_indx < mcg->mcg_num_qps) {
1371                 *qp_found = 1;
1372         } else {
1373                 /*
1374                  * Otherwise, append the new QP number to the end of the
1375                  * current QP list.  Note: We will increment the "mcg_num_qps"
1376                  * field on the "shadow" MCG list entry later (after we know
1377                  * that all necessary Hermon firmware accesses have been
1378                  * successful).
1379                  *
1380                  * Set 'qp_found' to 0 so we know the QP was added on to the
1381                  * list for sure.
1382                  */
1383                 mcg_qplist[qplist_indx].qpn =
1384                     (qp->qp_qpnum | HERMON_MCG_QPN_BLOCK_LB);
1385                 *qp_found = 0;
1386         }
1387 
1388         return (DDI_SUCCESS);
1389 }
1390 
1391 
1392 
1393 /*
1394  * hermon_mcg_qplist_remove()
1395  *    Context: Can be called from interrupt or base context.
1396  */
1397 static int
1398 hermon_mcg_qplist_remove(hermon_mcghdl_t mcg,
1399     hermon_hw_mcg_qp_list_t *mcg_qplist, hermon_qphdl_t qp)
1400 {
1401         uint_t          i, qplist_indx;
1402 
1403         /*
1404          * Search the MCG QP list for a matching QPN.  When
1405          * it's found, we swap the last entry with the current
1406          * one, set the last entry to zero, decrement the last
1407          * entry, and return.  If it's not found, then it's
1408          * and error.
1409          */
1410         qplist_indx = mcg->mcg_num_qps;
1411         for (i = 0; i < qplist_indx; i++) {
1412                 if (mcg_qplist[i].qpn == qp->qp_qpnum) {
1413                         mcg_qplist[i] = mcg_qplist[qplist_indx - 1];
1414                         mcg_qplist[qplist_indx - 1].qpn = 0;
1415 
1416                         return (DDI_SUCCESS);
1417                 }
1418         }
1419 
1420         return (IBT_QP_HDL_INVALID);
1421 }
1422 
1423 
1424 /*
1425  * hermon_mcg_walk_mgid_hash()
1426  *    Context: Can be called from interrupt or base context.
1427  */
1428 static uint_t
1429 hermon_mcg_walk_mgid_hash(hermon_state_t *state, uint64_t start_indx,
1430     ib_gid_t mgid, uint_t *p_indx)
1431 {
1432         hermon_mcghdl_t curr_mcghdl;
1433         uint_t          curr_indx, prev_indx;
1434 
1435         ASSERT(MUTEX_HELD(&state->hs_mcglock));
1436 
1437         /* Start at the head of the hash chain */
1438         curr_indx   = (uint_t)start_indx;
1439         prev_indx   = curr_indx;
1440         curr_mcghdl = &state->hs_mcghdl[curr_indx];
1441 
1442         /* If the first entry in the chain has MGID == 0, then stop */
1443         if ((curr_mcghdl->mcg_mgid_h == 0) &&
1444             (curr_mcghdl->mcg_mgid_l == 0)) {
1445                 goto end_mgid_hash_walk;
1446         }
1447 
1448         /* If the first entry in the chain matches the MGID, then stop */
1449         if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1450             (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1451                 goto end_mgid_hash_walk;
1452         }
1453 
1454         /* Otherwise, walk the hash chain looking for a match */
1455         while (curr_mcghdl->mcg_next_indx != 0) {
1456                 prev_indx = curr_indx;
1457                 curr_indx = curr_mcghdl->mcg_next_indx;
1458                 curr_mcghdl = &state->hs_mcghdl[curr_indx];
1459 
1460                 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1461                     (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1462                         break;
1463                 }
1464         }
1465 
1466 end_mgid_hash_walk:
1467         /*
1468          * If necessary, return the index of the previous entry too.  This
1469          * is primarily used for detaching a QP from a multicast group.  It
1470          * may be necessary, in that case, to delete an MCG entry from the
1471          * hash chain and having the index of the previous entry is helpful.
1472          */
1473         if (p_indx != NULL) {
1474                 *p_indx = prev_indx;
1475         }
1476         return (curr_indx);
1477 }
1478 
1479 
1480 /*
1481  * hermon_mcg_setup_new_hdr()
1482  *    Context: Can be called from interrupt or base context.
1483  */
1484 static void
1485 hermon_mcg_setup_new_hdr(hermon_mcghdl_t mcg, hermon_hw_mcg_t *mcg_hdr,
1486     ib_gid_t mgid, hermon_rsrc_t *mcg_rsrc)
1487 {
1488         /*
1489          * Fill in the fields of the "shadow" entry used by software
1490          * to track MCG hardware entry
1491          */
1492         mcg->mcg_mgid_h         = mgid.gid_prefix;
1493         mcg->mcg_mgid_l         = mgid.gid_guid;
1494         mcg->mcg_rsrcp          = mcg_rsrc;
1495         mcg->mcg_next_indx = 0;
1496         mcg->mcg_num_qps   = 0;
1497 
1498         /*
1499          * Fill the header fields of the MCG entry (in the temporary copy)
1500          */
1501         mcg_hdr->mgid_h              = mgid.gid_prefix;
1502         mcg_hdr->mgid_l              = mgid.gid_guid;
1503         mcg_hdr->next_gid_indx       = 0;
1504 }
1505 
1506 
1507 /*
1508  * hermon_mcg_hash_list_remove()
1509  *    Context: Can be called only from user or kernel context.
1510  */
1511 static int
1512 hermon_mcg_hash_list_remove(hermon_state_t *state, uint_t curr_indx,
1513     uint_t prev_indx, hermon_hw_mcg_t *mcg_entry)
1514 {
1515         hermon_mcghdl_t         curr_mcg, prev_mcg, next_mcg;
1516         uint_t                  next_indx;
1517         int                     status;
1518 
1519         /* Get the pointer to "shadow" list for current entry */
1520         curr_mcg = &state->hs_mcghdl[curr_indx];
1521 
1522         /*
1523          * If this is the first entry on a hash chain, then attempt to replace
1524          * the entry with the next entry on the chain.  If there are no
1525          * subsequent entries on the chain, then this is the only entry and
1526          * should be invalidated.
1527          */
1528         if (curr_indx == prev_indx) {
1529 
1530                 /*
1531                  * If this is the only entry on the chain, then invalidate it.
1532                  * Note:  Invalidating an MCG entry means writing all zeros
1533                  * to the entry.  This is only necessary for those MCG
1534                  * entries that are the "head" entries of the individual hash
1535                  * chains.  Regardless of whether this operation returns
1536                  * success or failure, return that result to the caller.
1537                  */
1538                 next_indx = curr_mcg->mcg_next_indx;
1539                 if (next_indx == 0) {
1540                         status = hermon_mcg_entry_invalidate(state, mcg_entry,
1541                             curr_indx);
1542                         bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1543                         return (status);
1544                 }
1545 
1546                 /*
1547                  * Otherwise, this is just the first entry on the chain, so
1548                  * grab the next one
1549                  */
1550                 next_mcg = &state->hs_mcghdl[next_indx];
1551 
1552                 /*
1553                  * Read the next MCG entry into the temporary MCG.  Note:
1554                  * In general, this operation shouldn't fail.  If it does,
1555                  * then it is an indication that something (probably in HW,
1556                  * but maybe in SW) has gone seriously wrong.
1557                  */
1558                 status = hermon_read_mgm_cmd_post(state, mcg_entry, next_indx,
1559                     HERMON_CMD_NOSLEEP_SPIN);
1560                 if (status != HERMON_CMD_SUCCESS) {
1561                         HERMON_WARNING(state, "failed to read MCG entry");
1562                         cmn_err(CE_CONT, "Hermon: READ_MGM command failed: "
1563                             "%08x\n", status);
1564                         if (status == HERMON_CMD_INVALID_STATUS) {
1565                                 hermon_fm_ereport(state, HCA_SYS_ERR,
1566                                     HCA_ERR_SRV_LOST);
1567                         }
1568                         return (ibc_get_ci_failure(0));
1569                 }
1570 
1571                 /*
1572                  * Copy/Write the temporary MCG back to the hardware MCG list
1573                  * using the current index.  This essentially removes the
1574                  * current MCG entry from the list by writing over it with
1575                  * the next one.  If this is successful, then we can do the
1576                  * same operation for the "shadow" list.  And we can also
1577                  * free up the Hermon MCG entry resource that was associated
1578                  * with the (old) next entry.  Note:  In general, this
1579                  * operation shouldn't fail.  If it does, then it is an
1580                  * indication that something (probably in HW, but maybe in SW)
1581                  * has gone seriously wrong.
1582                  */
1583                 status = hermon_write_mgm_cmd_post(state, mcg_entry, curr_indx,
1584                     HERMON_CMD_NOSLEEP_SPIN);
1585                 if (status != HERMON_CMD_SUCCESS) {
1586                         HERMON_WARNING(state, "failed to write MCG entry");
1587                         cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: "
1588                             "%08x\n", status);
1589                         if (status == HERMON_CMD_INVALID_STATUS) {
1590                                 hermon_fm_ereport(state, HCA_SYS_ERR,
1591                                     HCA_ERR_SRV_LOST);
1592                         }
1593                         return (ibc_get_ci_failure(0));
1594                 }
1595 
1596                 /*
1597                  * Copy all the software tracking information from the next
1598                  * entry on the "shadow" MCG list into the current entry on
1599                  * the list.  Then invalidate (zero out) the other "shadow"
1600                  * list entry.
1601                  */
1602                 bcopy(next_mcg, curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1603                 bzero(next_mcg, sizeof (struct hermon_sw_mcg_list_s));
1604 
1605                 /*
1606                  * Free up the Hermon MCG entry resource used by the "next"
1607                  * MCG entry.  That resource is no longer needed by any
1608                  * MCG entry which is first on a hash chain (like the "next"
1609                  * entry has just become).
1610                  */
1611                 hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1612 
1613                 return (DDI_SUCCESS);
1614         }
1615 
1616         /*
1617          * Else if this is the last entry on the hash chain (or a middle
1618          * entry, then we update the previous entry's "next_gid_index" field
1619          * to make it point instead to the next entry on the chain.  By
1620          * skipping over the removed entry in this way, we can then free up
1621          * any resources associated with the current entry.  Note:  We don't
1622          * need to invalidate the "skipped over" hardware entry because it
1623          * will no be longer connected to any hash chains, and if/when it is
1624          * finally re-used, it will be written with entirely new values.
1625          */
1626 
1627         /*
1628          * Read the next MCG entry into the temporary MCG.  Note:  In general,
1629          * this operation shouldn't fail.  If it does, then it is an
1630          * indication that something (probably in HW, but maybe in SW) has
1631          * gone seriously wrong.
1632          */
1633         status = hermon_read_mgm_cmd_post(state, mcg_entry, prev_indx,
1634             HERMON_CMD_NOSLEEP_SPIN);
1635         if (status != HERMON_CMD_SUCCESS) {
1636                 HERMON_WARNING(state, "failed to read MCG entry");
1637                 cmn_err(CE_CONT, "Hermon: READ_MGM command failed: %08x\n",
1638                     status);
1639                 if (status == HERMON_CMD_INVALID_STATUS) {
1640                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1641                 }
1642                 return (ibc_get_ci_failure(0));
1643         }
1644 
1645         /*
1646          * Finally, we update the "next_gid_indx" field in the temporary MCG
1647          * and attempt to write the entry back into the Hermon MCG table.  If
1648          * this succeeds, then we update the "shadow" list to reflect the
1649          * change, free up the Hermon MCG entry resource that was associated
1650          * with the current entry, and return success.  Note:  In general,
1651          * this operation shouldn't fail.  If it does, then it is an indication
1652          * that something (probably in HW, but maybe in SW) has gone seriously
1653          * wrong.
1654          */
1655         mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx;
1656         status = hermon_write_mgm_cmd_post(state, mcg_entry, prev_indx,
1657             HERMON_CMD_NOSLEEP_SPIN);
1658         if (status != HERMON_CMD_SUCCESS) {
1659                 HERMON_WARNING(state, "failed to write MCG entry");
1660                 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1661                     status);
1662                 if (status == HERMON_CMD_INVALID_STATUS) {
1663                         hermon_fm_ereport(state, HCA_SYS_ERR,
1664                             HCA_ERR_SRV_LOST);
1665                 }
1666                 return (ibc_get_ci_failure(0));
1667         }
1668 
1669         /*
1670          * Get the pointer to the "shadow" MCG list entry for the previous
1671          * MCG.  Update its "mcg_next_indx" to point to the next entry
1672          * the one after the current entry. Note:  This next index may be
1673          * zero, indicating the end of the list.
1674          */
1675         prev_mcg = &state->hs_mcghdl[prev_indx];
1676         prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx;
1677 
1678         /*
1679          * Free up the Hermon MCG entry resource used by the current entry.
1680          * This resource is no longer needed because the chain now skips over
1681          * the current entry.  Then invalidate (zero out) the current "shadow"
1682          * list entry.
1683          */
1684         hermon_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1685         bzero(curr_mcg, sizeof (struct hermon_sw_mcg_list_s));
1686 
1687         return (DDI_SUCCESS);
1688 }
1689 
1690 
1691 /*
1692  * hermon_mcg_entry_invalidate()
1693  *    Context: Can be called only from user or kernel context.
1694  */
1695 static int
1696 hermon_mcg_entry_invalidate(hermon_state_t *state, hermon_hw_mcg_t *mcg_entry,
1697     uint_t indx)
1698 {
1699         int             status;
1700 
1701         /*
1702          * Invalidate the hardware MCG entry by zeroing out this temporary
1703          * MCG and writing it the the hardware.  Note: In general, this
1704          * operation shouldn't fail.  If it does, then it is an indication
1705          * that something (probably in HW, but maybe in SW) has gone seriously
1706          * wrong.
1707          */
1708         bzero(mcg_entry, HERMON_MCGMEM_SZ(state));
1709         status = hermon_write_mgm_cmd_post(state, mcg_entry, indx,
1710             HERMON_CMD_NOSLEEP_SPIN);
1711         if (status != HERMON_CMD_SUCCESS) {
1712                 HERMON_WARNING(state, "failed to write MCG entry");
1713                 cmn_err(CE_CONT, "Hermon: WRITE_MGM command failed: %08x\n",
1714                     status);
1715                 if (status == HERMON_CMD_INVALID_STATUS) {
1716                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1717                 }
1718                 return (ibc_get_ci_failure(0));
1719         }
1720 
1721         return (DDI_SUCCESS);
1722 }
1723 
1724 
1725 /*
1726  * hermon_mgid_is_valid()
1727  *    Context: Can be called from interrupt or base context.
1728  */
1729 static int
1730 hermon_mgid_is_valid(ib_gid_t gid)
1731 {
1732         uint_t          topbits, flags, scope;
1733 
1734         /*
1735          * According to IBA 1.1 specification (section 4.1.1) a valid
1736          * "multicast GID" must have its top eight bits set to all ones
1737          */
1738         topbits = (gid.gid_prefix >> HERMON_MCG_TOPBITS_SHIFT) &
1739             HERMON_MCG_TOPBITS_MASK;
1740         if (topbits != HERMON_MCG_TOPBITS) {
1741                 return (0);
1742         }
1743 
1744         /*
1745          * The next 4 bits are the "flag" bits.  These are valid only
1746          * if they are "0" (which correspond to permanently assigned/
1747          * "well-known" multicast GIDs) or "1" (for so-called "transient"
1748          * multicast GIDs).  All other values are reserved.
1749          */
1750         flags = (gid.gid_prefix >> HERMON_MCG_FLAGS_SHIFT) &
1751             HERMON_MCG_FLAGS_MASK;
1752         if (!((flags == HERMON_MCG_FLAGS_PERM) ||
1753             (flags == HERMON_MCG_FLAGS_NONPERM))) {
1754                 return (0);
1755         }
1756 
1757         /*
1758          * The next 4 bits are the "scope" bits.  These are valid only
1759          * if they are "2" (Link-local), "5" (Site-local), "8"
1760          * (Organization-local) or "E" (Global).  All other values
1761          * are reserved (or currently unassigned).
1762          */
1763         scope = (gid.gid_prefix >> HERMON_MCG_SCOPE_SHIFT) &
1764             HERMON_MCG_SCOPE_MASK;
1765         if (!((scope == HERMON_MCG_SCOPE_LINKLOC) ||
1766             (scope == HERMON_MCG_SCOPE_SITELOC)  ||
1767             (scope == HERMON_MCG_SCOPE_ORGLOC)   ||
1768             (scope == HERMON_MCG_SCOPE_GLOBAL))) {
1769                 return (0);
1770         }
1771 
1772         /*
1773          * If it passes all of the above checks, then we will consider it
1774          * a valid multicast GID.
1775          */
1776         return (1);
1777 }
1778 
1779 
1780 /*
1781  * hermon_mlid_is_valid()
1782  *    Context: Can be called from interrupt or base context.
1783  */
1784 static int
1785 hermon_mlid_is_valid(ib_lid_t lid)
1786 {
1787         /*
1788          * According to IBA 1.1 specification (section 4.1.1) a valid
1789          * "multicast DLID" must be between 0xC000 and 0xFFFE.
1790          */
1791         if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) {
1792                 return (0);
1793         }
1794 
1795         return (1);
1796 }
1797 
1798 
1799 /*
1800  * hermon_pd_alloc()
1801  *    Context: Can be called only from user or kernel context.
1802  */
1803 int
1804 hermon_pd_alloc(hermon_state_t *state, hermon_pdhdl_t *pdhdl, uint_t sleepflag)
1805 {
1806         hermon_rsrc_t   *rsrc;
1807         hermon_pdhdl_t  pd;
1808         int             status;
1809 
1810         /*
1811          * Allocate the software structure for tracking the protection domain
1812          * (i.e. the Hermon Protection Domain handle).  By default each PD
1813          * structure will have a unique PD number assigned to it.  All that
1814          * is necessary is for software to initialize the PD reference count
1815          * (to zero) and return success.
1816          */
1817         status = hermon_rsrc_alloc(state, HERMON_PDHDL, 1, sleepflag, &rsrc);
1818         if (status != DDI_SUCCESS) {
1819                 return (IBT_INSUFF_RESOURCE);
1820         }
1821         pd = (hermon_pdhdl_t)rsrc->hr_addr;
1822         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1823 
1824         pd->pd_refcnt = 0;
1825         *pdhdl = pd;
1826 
1827         return (DDI_SUCCESS);
1828 }
1829 
1830 
1831 /*
1832  * hermon_pd_free()
1833  *    Context: Can be called only from user or kernel context.
1834  */
1835 int
1836 hermon_pd_free(hermon_state_t *state, hermon_pdhdl_t *pdhdl)
1837 {
1838         hermon_rsrc_t   *rsrc;
1839         hermon_pdhdl_t  pd;
1840 
1841         /*
1842          * Pull all the necessary information from the Hermon Protection Domain
1843          * handle.  This is necessary here because the resource for the
1844          * PD is going to be freed up as part of this operation.
1845          */
1846         pd   = *pdhdl;
1847         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1848         rsrc = pd->pd_rsrcp;
1849 
1850         /*
1851          * Check the PD reference count.  If the reference count is non-zero,
1852          * then it means that this protection domain is still referenced by
1853          * some memory region, queue pair, address handle, or other IB object
1854          * If it is non-zero, then return an error.  Otherwise, free the
1855          * Hermon resource and return success.
1856          */
1857         if (pd->pd_refcnt != 0) {
1858                 return (IBT_PD_IN_USE);
1859         }
1860 
1861         /* Free the Hermon Protection Domain handle */
1862         hermon_rsrc_free(state, &rsrc);
1863 
1864         /* Set the pdhdl pointer to NULL and return success */
1865         *pdhdl = (hermon_pdhdl_t)NULL;
1866 
1867         return (DDI_SUCCESS);
1868 }
1869 
1870 
1871 /*
1872  * hermon_pd_refcnt_inc()
1873  *    Context: Can be called from interrupt or base context.
1874  */
1875 void
1876 hermon_pd_refcnt_inc(hermon_pdhdl_t pd)
1877 {
1878         /* Increment the protection domain's reference count */
1879         atomic_inc_32(&pd->pd_refcnt);
1880 }
1881 
1882 
1883 /*
1884  * hermon_pd_refcnt_dec()
1885  *    Context: Can be called from interrupt or base context.
1886  */
1887 void
1888 hermon_pd_refcnt_dec(hermon_pdhdl_t pd)
1889 {
1890         /* Decrement the protection domain's reference count */
1891         atomic_dec_32(&pd->pd_refcnt);
1892 }
1893 
1894 
1895 /*
1896  * hermon_port_query()
1897  *    Context: Can be called only from user or kernel context.
1898  */
1899 int
1900 hermon_port_query(hermon_state_t *state, uint_t port, ibt_hca_portinfo_t *pi)
1901 {
1902         sm_portinfo_t           portinfo;
1903         sm_guidinfo_t           guidinfo;
1904         sm_pkey_table_t         pkeytable;
1905         ib_gid_t                *sgid;
1906         uint_t                  sgid_max, pkey_max, tbl_size;
1907         int                     i, j, indx, status;
1908         ib_pkey_t               *pkeyp;
1909         ib_guid_t               *guidp;
1910 
1911         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi))
1912         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state))
1913 
1914         /* Validate that specified port number is legal */
1915         if (!hermon_portnum_is_valid(state, port)) {
1916                 return (IBT_HCA_PORT_INVALID);
1917         }
1918         pkeyp = state->hs_pkey[port - 1];
1919         guidp = state->hs_guid[port - 1];
1920 
1921         /*
1922          * We use the Hermon MAD_IFC command to post a GetPortInfo MAD
1923          * to the firmware (for the specified port number).  This returns
1924          * a full PortInfo MAD (in "portinfo") which we subsequently
1925          * parse to fill in the "ibt_hca_portinfo_t" structure returned
1926          * to the IBTF.
1927          */
1928         status = hermon_getportinfo_cmd_post(state, port,
1929             HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
1930         if (status != HERMON_CMD_SUCCESS) {
1931                 cmn_err(CE_CONT, "Hermon: GetPortInfo (port %02d) command "
1932                     "failed: %08x\n", port, status);
1933                 if (status == HERMON_CMD_INVALID_STATUS) {
1934                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1935                 }
1936                 return (ibc_get_ci_failure(0));
1937         }
1938 
1939         /*
1940          * Parse the PortInfo MAD and fill in the IBTF structure
1941          */
1942         pi->p_base_lid               = portinfo.LID;
1943         pi->p_qkey_violations        = portinfo.Q_KeyViolations;
1944         pi->p_pkey_violations        = portinfo.P_KeyViolations;
1945         pi->p_sm_sl          = portinfo.MasterSMSL;
1946         pi->p_sm_lid         = portinfo.MasterSMLID;
1947         pi->p_linkstate              = portinfo.PortState;
1948         pi->p_port_num               = portinfo.LocalPortNum;
1949         pi->p_phys_state     = portinfo.PortPhysicalState;
1950         pi->p_width_supported        = portinfo.LinkWidthSupported;
1951         pi->p_width_enabled  = portinfo.LinkWidthEnabled;
1952         pi->p_width_active   = portinfo.LinkWidthActive;
1953         pi->p_speed_supported        = portinfo.LinkSpeedSupported;
1954         pi->p_speed_enabled  = portinfo.LinkSpeedEnabled;
1955         pi->p_speed_active   = portinfo.LinkSpeedActive;
1956         pi->p_mtu            = portinfo.MTUCap;
1957         pi->p_lmc            = portinfo.LMC;
1958         pi->p_max_vl         = portinfo.VLCap;
1959         pi->p_subnet_timeout = portinfo.SubnetTimeOut;
1960         pi->p_msg_sz         = ((uint32_t)1 << HERMON_QP_LOG_MAX_MSGSZ);
1961         tbl_size = state->hs_cfg_profile->cp_log_max_gidtbl;
1962         pi->p_sgid_tbl_sz    = (1 << tbl_size);
1963         tbl_size = state->hs_cfg_profile->cp_log_max_pkeytbl;
1964         pi->p_pkey_tbl_sz    = (1 << tbl_size);
1965         state->hs_sn_prefix[port - 1] = portinfo.GidPrefix;
1966 
1967         /*
1968          * Convert InfiniBand-defined port capability flags to the format
1969          * specified by the IBTF
1970          */
1971         if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM)
1972                 pi->p_capabilities |= IBT_PORT_CAP_SM;
1973         if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED)
1974                 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED;
1975         if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD)
1976                 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL;
1977         if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD)
1978                 pi->p_capabilities |= IBT_PORT_CAP_DM;
1979         if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD)
1980                 pi->p_capabilities |= IBT_PORT_CAP_VENDOR;
1981         if (portinfo.CapabilityMask & SM_CAP_MASK_IS_CLNT_REREG_SUPPD)
1982                 pi->p_capabilities |= IBT_PORT_CAP_CLNT_REREG;
1983 
1984         /*
1985          * Fill in the SGID table.  Since the only access to the Hermon
1986          * GID tables is through the firmware's MAD_IFC interface, we
1987          * post as many GetGUIDInfo MADs as necessary to read in the entire
1988          * contents of the SGID table (for the specified port).  Note:  The
1989          * GetGUIDInfo command only gets eight GUIDs per operation.  These
1990          * GUIDs are then appended to the GID prefix for the port (from the
1991          * GetPortInfo above) to form the entire SGID table.
1992          */
1993         for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) {
1994                 status = hermon_getguidinfo_cmd_post(state, port, i >> 3,
1995                     HERMON_SLEEPFLAG_FOR_CONTEXT(), &guidinfo);
1996                 if (status != HERMON_CMD_SUCCESS) {
1997                         cmn_err(CE_CONT, "Hermon: GetGUIDInfo (port %02d) "
1998                             "command failed: %08x\n", port, status);
1999                         if (status == HERMON_CMD_INVALID_STATUS) {
2000                                 hermon_fm_ereport(state, HCA_SYS_ERR,
2001                                     HCA_ERR_SRV_LOST);
2002                         }
2003                         return (ibc_get_ci_failure(0));
2004                 }
2005 
2006                 /* Figure out how many of the entries are valid */
2007                 sgid_max = min((pi->p_sgid_tbl_sz - i), 8);
2008                 for (j = 0; j < sgid_max; j++) {
2009                         indx = (i + j);
2010                         sgid = &pi->p_sgid_tbl[indx];
2011                         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid))
2012                         sgid->gid_prefix = portinfo.GidPrefix;
2013                         guidp[indx] = sgid->gid_guid =
2014                             guidinfo.GUIDBlocks[j];
2015                 }
2016         }
2017 
2018         /*
2019          * Fill in the PKey table.  Just as for the GID tables above, the
2020          * only access to the Hermon PKey tables is through the firmware's
2021          * MAD_IFC interface.  We post as many GetPKeyTable MADs as necessary
2022          * to read in the entire contents of the PKey table (for the specified
2023          * port).  Note:  The GetPKeyTable command only gets 32 PKeys per
2024          * operation.
2025          */
2026         for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) {
2027                 status = hermon_getpkeytable_cmd_post(state, port, i,
2028                     HERMON_SLEEPFLAG_FOR_CONTEXT(), &pkeytable);
2029                 if (status != HERMON_CMD_SUCCESS) {
2030                         cmn_err(CE_CONT, "Hermon: GetPKeyTable (port %02d) "
2031                             "command failed: %08x\n", port, status);
2032                         if (status == HERMON_CMD_INVALID_STATUS) {
2033                                 hermon_fm_ereport(state, HCA_SYS_ERR,
2034                                     HCA_ERR_SRV_LOST);
2035                         }
2036                         return (ibc_get_ci_failure(0));
2037                 }
2038 
2039                 /* Figure out how many of the entries are valid */
2040                 pkey_max = min((pi->p_pkey_tbl_sz - i), 32);
2041                 for (j = 0; j < pkey_max; j++) {
2042                         indx = (i + j);
2043                         pkeyp[indx] = pi->p_pkey_tbl[indx] =
2044                             pkeytable.P_KeyTableBlocks[j];
2045                 }
2046         }
2047 
2048         return (DDI_SUCCESS);
2049 }
2050 
2051 
2052 /*
2053  * hermon_port_modify()
2054  *    Context: Can be called only from user or kernel context.
2055  */
2056 /* ARGSUSED */
2057 int
2058 hermon_port_modify(hermon_state_t *state, uint8_t port,
2059     ibt_port_modify_flags_t flags, uint8_t init_type)
2060 {
2061         sm_portinfo_t           portinfo;
2062         uint32_t                capmask;
2063         int                     status;
2064         hermon_hw_set_port_t    set_port;
2065 
2066         /*
2067          * Return an error if either of the unsupported flags are set
2068          */
2069         if ((flags & IBT_PORT_SHUTDOWN) ||
2070             (flags & IBT_PORT_SET_INIT_TYPE)) {
2071                 return (IBT_NOT_SUPPORTED);
2072         }
2073 
2074         bzero(&set_port, sizeof (set_port));
2075 
2076         /*
2077          * Determine whether we are trying to reset the QKey counter
2078          */
2079         if (flags & IBT_PORT_RESET_QKEY)
2080                 set_port.rqk = 1;
2081 
2082         /* Validate that specified port number is legal */
2083         if (!hermon_portnum_is_valid(state, port)) {
2084                 return (IBT_HCA_PORT_INVALID);
2085         }
2086 
2087         /*
2088          * Use the Hermon MAD_IFC command to post a GetPortInfo MAD to the
2089          * firmware (for the specified port number).  This returns a full
2090          * PortInfo MAD (in "portinfo") from which we pull the current
2091          * capability mask.  We then modify the capability mask as directed
2092          * by the "pmod_flags" field, and write the updated capability mask
2093          * using the Hermon SET_IB command (below).
2094          */
2095         status = hermon_getportinfo_cmd_post(state, port,
2096             HERMON_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
2097         if (status != HERMON_CMD_SUCCESS) {
2098                 if (status == HERMON_CMD_INVALID_STATUS) {
2099                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2100                 }
2101                 return (ibc_get_ci_failure(0));
2102         }
2103 
2104         /*
2105          * Convert InfiniBand-defined port capability flags to the format
2106          * specified by the IBTF.  Specifically, we modify the capability
2107          * mask based on the specified values.
2108          */
2109         capmask = portinfo.CapabilityMask;
2110 
2111         if (flags & IBT_PORT_RESET_SM)
2112                 capmask &= ~SM_CAP_MASK_IS_SM;
2113         else if (flags & IBT_PORT_SET_SM)
2114                 capmask |= SM_CAP_MASK_IS_SM;
2115 
2116         if (flags & IBT_PORT_RESET_SNMP)
2117                 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD;
2118         else if (flags & IBT_PORT_SET_SNMP)
2119                 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD;
2120 
2121         if (flags & IBT_PORT_RESET_DEVMGT)
2122                 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD;
2123         else if (flags & IBT_PORT_SET_DEVMGT)
2124                 capmask |= SM_CAP_MASK_IS_DM_SUPPD;
2125 
2126         if (flags & IBT_PORT_RESET_VENDOR)
2127                 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD;
2128         else if (flags & IBT_PORT_SET_VENDOR)
2129                 capmask |= SM_CAP_MASK_IS_VM_SUPPD;
2130 
2131         set_port.cap_mask = capmask;
2132 
2133         /*
2134          * Use the Hermon SET_PORT command to update the capability mask and
2135          * (possibly) reset the QKey violation counter for the specified port.
2136          * Note: In general, this operation shouldn't fail.  If it does, then
2137          * it is an indication that something (probably in HW, but maybe in
2138          * SW) has gone seriously wrong.
2139          */
2140         status = hermon_set_port_cmd_post(state, &set_port, port,
2141             HERMON_SLEEPFLAG_FOR_CONTEXT());
2142         if (status != HERMON_CMD_SUCCESS) {
2143                 HERMON_WARNING(state, "failed to modify port capabilities");
2144                 cmn_err(CE_CONT, "Hermon: SET_IB (port %02d) command failed: "
2145                     "%08x\n", port, status);
2146                 if (status == HERMON_CMD_INVALID_STATUS) {
2147                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2148                 }
2149                 return (ibc_get_ci_failure(0));
2150         }
2151 
2152         return (DDI_SUCCESS);
2153 }
2154 
2155 
2156 /*
2157  * hermon_set_addr_path()
2158  *    Context: Can be called from interrupt or base context.
2159  *
2160  * Note: This routine is used for two purposes.  It is used to fill in the
2161  * Hermon UDAV fields, and it is used to fill in the address path information
2162  * for QPs.  Because the two Hermon structures are similar, common fields can
2163  * be filled in here.  Because they are different, however, we pass
2164  * an additional flag to indicate which type is being filled and do each one
2165  * uniquely
2166  */
2167 
2168 int hermon_srate_override = -1; /* allows ease of testing */
2169 
2170 int
2171 hermon_set_addr_path(hermon_state_t *state, ibt_adds_vect_t *av,
2172     hermon_hw_addr_path_t *path, uint_t type)
2173 {
2174         uint_t          gidtbl_sz;
2175         hermon_hw_udav_t *udav;
2176 
2177         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2178         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2179 
2180         udav = (hermon_hw_udav_t *)(void *)path;
2181         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*udav))
2182         path->mlid   = av->av_src_path;
2183         path->rlid   = av->av_dlid;
2184 
2185         switch (av->av_srate) {
2186         case IBT_SRATE_2:       /* 1xSDR-2.5Gb/s injection rate */
2187                 path->max_stat_rate = 7; break;
2188         case IBT_SRATE_10:      /* 4xSDR-10.0Gb/s injection rate */
2189                 path->max_stat_rate = 8; break;
2190         case IBT_SRATE_30:      /* 12xSDR-30Gb/s injection rate */
2191                 path->max_stat_rate = 9; break;
2192         case IBT_SRATE_5:       /* 1xDDR-5Gb/s injection rate */
2193                 path->max_stat_rate = 10; break;
2194         case IBT_SRATE_20:      /* 4xDDR-20Gb/s injection rate */
2195                 path->max_stat_rate = 11; break;
2196         case IBT_SRATE_40:      /* 4xQDR-40Gb/s injection rate */
2197                 path->max_stat_rate = 12; break;
2198         case IBT_SRATE_60:      /* 12xDDR-60Gb/s injection rate */
2199                 path->max_stat_rate = 13; break;
2200         case IBT_SRATE_80:      /* 8xQDR-80Gb/s injection rate */
2201                 path->max_stat_rate = 14; break;
2202         case IBT_SRATE_120:     /* 12xQDR-120Gb/s injection rate */
2203                 path->max_stat_rate = 15; break;
2204         case IBT_SRATE_NOT_SPECIFIED:   /* Max */
2205                 path->max_stat_rate = 0; break;
2206         default:
2207                 return (IBT_STATIC_RATE_INVALID);
2208         }
2209         if (hermon_srate_override != -1) /* for evaluating HCA firmware */
2210                 path->max_stat_rate = hermon_srate_override;
2211 
2212         /* If "grh" flag is set, then check for valid SGID index too */
2213         gidtbl_sz = (1 << state->hs_queryport.log_max_gid);
2214         if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) {
2215                 return (IBT_SGID_INVALID);
2216         }
2217 
2218         /*
2219          * Fill in all "global" values regardless of the value in the GRH
2220          * flag.  Because "grh" is not set unless "av_send_grh" is set, the
2221          * hardware will ignore the other "global" values as necessary.  Note:
2222          * SW does this here to enable later query operations to return
2223          * exactly the same params that were passed when the addr path was
2224          * last written.
2225          */
2226         path->grh = av->av_send_grh;
2227         if (type == HERMON_ADDRPATH_QP) {
2228                 path->mgid_index = av->av_sgid_ix;
2229         } else {
2230                 /*
2231                  * For Hermon UDAV, the "mgid_index" field is the index into
2232                  * a combined table (not a per-port table), but having sections
2233                  * for each port. So some extra calculations are necessary.
2234                  */
2235 
2236                 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) +
2237                     av->av_sgid_ix;
2238 
2239                 udav->portnum = av->av_port_num;
2240         }
2241 
2242         /*
2243          * According to Hermon PRM, the (31:0) part of rgid_l must be set to
2244          * "0x2" if the 'grh' or 'g' bit is cleared.  It also says that we
2245          * only need to do it for UDAV's.  So we enforce that here.
2246          *
2247          * NOTE: The entire 64 bits worth of GUID info is actually being
2248          * preserved (for UDAVs) by the callers of this function
2249          * (hermon_ah_alloc() and hermon_ah_modify()) and as long as the
2250          * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are
2251          * "don't care".
2252          */
2253         if ((path->grh) || (type == HERMON_ADDRPATH_QP)) {
2254                 path->flow_label = av->av_flow;
2255                 path->tclass  = av->av_tclass;
2256                 path->hop_limit       = av->av_hop;
2257                 bcopy(&(av->av_dgid.gid_prefix), &(path->rgid_h),
2258                     sizeof (uint64_t));
2259                 bcopy(&(av->av_dgid.gid_guid), &(path->rgid_l),
2260                     sizeof (uint64_t));
2261         } else {
2262                 path->rgid_l  = 0x2;
2263                 path->flow_label = 0;
2264                 path->tclass  = 0;
2265                 path->hop_limit       = 0;
2266                 path->rgid_h  = 0;
2267         }
2268         /* extract the default service level */
2269         udav->sl = (HERMON_DEF_SCHED_SELECTION & 0x3C) >> 2;
2270 
2271         return (DDI_SUCCESS);
2272 }
2273 
2274 
2275 /*
2276  * hermon_get_addr_path()
2277  *    Context: Can be called from interrupt or base context.
2278  *
2279  * Note: Just like hermon_set_addr_path() above, this routine is used for two
2280  * purposes.  It is used to read in the Hermon UDAV fields, and it is used to
2281  * read in the address path information for QPs.  Because the two Hermon
2282  * structures are similar, common fields can be read in here.  But because
2283  * they are slightly different, we pass an additional flag to indicate which
2284  * type is being read.
2285  */
2286 void
2287 hermon_get_addr_path(hermon_state_t *state, hermon_hw_addr_path_t *path,
2288     ibt_adds_vect_t *av, uint_t type)
2289 {
2290         uint_t          gidtbl_sz;
2291 
2292         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2293         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2294 
2295         av->av_src_path      = path->mlid;
2296         av->av_dlid  = path->rlid;
2297 
2298         /* Set "av_ipd" value from max_stat_rate */
2299         switch (path->max_stat_rate) {
2300         case 7:                         /* 1xSDR-2.5Gb/s injection rate */
2301                 av->av_srate = IBT_SRATE_2; break;
2302         case 8:                         /* 4xSDR-10.0Gb/s injection rate */
2303                 av->av_srate = IBT_SRATE_10; break;
2304         case 9:                         /* 12xSDR-30Gb/s injection rate */
2305                 av->av_srate = IBT_SRATE_30; break;
2306         case 10:                        /* 1xDDR-5Gb/s injection rate */
2307                 av->av_srate = IBT_SRATE_5; break;
2308         case 11:                        /* 4xDDR-20Gb/s injection rate */
2309                 av->av_srate = IBT_SRATE_20; break;
2310         case 12:                        /* xQDR-40Gb/s injection rate */
2311                 av->av_srate = IBT_SRATE_40; break;
2312         case 13:                        /* 12xDDR-60Gb/s injection rate */
2313                 av->av_srate = IBT_SRATE_60; break;
2314         case 14:                        /* 8xQDR-80Gb/s injection rate */
2315                 av->av_srate = IBT_SRATE_80; break;
2316         case 15:                        /* 12xQDR-120Gb/s injection rate */
2317                 av->av_srate = IBT_SRATE_120; break;
2318         case 0:                         /* max */
2319                 av->av_srate = IBT_SRATE_NOT_SPECIFIED; break;
2320         default:                        /* 1x injection rate */
2321                 av->av_srate = IBT_SRATE_1X;
2322         }
2323 
2324         /*
2325          * Extract all "global" values regardless of the value in the GRH
2326          * flag.  Because "av_send_grh" is set only if "grh" is set, software
2327          * knows to ignore the other "global" values as necessary.  Note: SW
2328          * does it this way to enable these query operations to return exactly
2329          * the same params that were passed when the addr path was last written.
2330          */
2331         av->av_send_grh              = path->grh;
2332         if (type == HERMON_ADDRPATH_QP) {
2333                 av->av_sgid_ix  = path->mgid_index;
2334         } else {
2335                 /*
2336                  * For Hermon UDAV, the "mgid_index" field is the index into
2337                  * a combined table (not a per-port table).
2338                  */
2339                 gidtbl_sz = (1 << state->hs_queryport.log_max_gid);
2340                 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) *
2341                     gidtbl_sz);
2342 
2343                 av->av_port_num = ((hermon_hw_udav_t *)(void *)path)->portnum;
2344         }
2345         av->av_flow          = path->flow_label;
2346         av->av_tclass                = path->tclass;
2347         av->av_hop           = path->hop_limit;
2348         /* this is for alignment issue w/ the addr path struct in Hermon */
2349         bcopy(&(path->rgid_h), &(av->av_dgid.gid_prefix), sizeof (uint64_t));
2350         bcopy(&(path->rgid_l), &(av->av_dgid.gid_guid), sizeof (uint64_t));
2351 }
2352 
2353 
2354 /*
2355  * hermon_portnum_is_valid()
2356  *    Context: Can be called from interrupt or base context.
2357  */
2358 int
2359 hermon_portnum_is_valid(hermon_state_t *state, uint_t portnum)
2360 {
2361         uint_t  max_port;
2362 
2363         max_port = state->hs_cfg_profile->cp_num_ports;
2364         if ((portnum <= max_port) && (portnum != 0)) {
2365                 return (1);
2366         } else {
2367                 return (0);
2368         }
2369 }
2370 
2371 
2372 /*
2373  * hermon_pkeyindex_is_valid()
2374  *    Context: Can be called from interrupt or base context.
2375  */
2376 int
2377 hermon_pkeyindex_is_valid(hermon_state_t *state, uint_t pkeyindx)
2378 {
2379         uint_t  max_pkeyindx;
2380 
2381         max_pkeyindx = 1 << state->hs_cfg_profile->cp_log_max_pkeytbl;
2382         if (pkeyindx < max_pkeyindx) {
2383                 return (1);
2384         } else {
2385                 return (0);
2386         }
2387 }
2388 
2389 
2390 /*
2391  * hermon_queue_alloc()
2392  *    Context: Can be called from interrupt or base context.
2393  */
2394 int
2395 hermon_queue_alloc(hermon_state_t *state, hermon_qalloc_info_t *qa_info,
2396     uint_t sleepflag)
2397 {
2398         ddi_dma_attr_t          dma_attr;
2399         int                     (*callback)(caddr_t);
2400         uint64_t                realsize, alloc_mask;
2401         int                     flag, status;
2402 
2403         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2404 
2405         /* Set the callback flag appropriately */
2406         callback = (sleepflag == HERMON_SLEEP) ? DDI_DMA_SLEEP :
2407             DDI_DMA_DONTWAIT;
2408 
2409         /*
2410          * Initialize many of the default DMA attributes.  Then set additional
2411          * alignment restrictions as necessary for the queue memory.  Also
2412          * respect the configured value for IOMMU bypass
2413          */
2414         hermon_dma_attr_init(state, &dma_attr);
2415         dma_attr.dma_attr_align = qa_info->qa_bind_align;
2416 #ifdef  __sparc
2417         if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS) {
2418                 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
2419         }
2420 #endif
2421 
2422         /* Allocate a DMA handle */
2423         status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, callback, NULL,
2424             &qa_info->qa_dmahdl);
2425         if (status != DDI_SUCCESS) {
2426                 return (DDI_FAILURE);
2427         }
2428 
2429         /*
2430          * Determine the amount of memory to allocate, depending on the values
2431          * in "qa_bind_align" and "qa_alloc_align".  The problem we are trying
2432          * to solve here is that allocating a DMA handle with IOMMU bypass
2433          * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments
2434          * that are less restrictive than the page size.  Since we may need
2435          * stricter alignments on the memory allocated by ddi_dma_mem_alloc()
2436          * (e.g. in Hermon QP work queue memory allocation), we use the
2437          * following method to calculate how much additional memory to request,
2438          * and we enforce our own alignment on the allocated result.
2439          */
2440         alloc_mask = qa_info->qa_alloc_align - 1;
2441         if (qa_info->qa_bind_align == qa_info->qa_alloc_align) {
2442                 realsize = qa_info->qa_size;
2443         } else {
2444                 realsize = qa_info->qa_size + alloc_mask;
2445         }
2446 
2447         /*
2448          * If we are to allocate the queue from system memory, then use
2449          * ddi_dma_mem_alloc() to find the space.  Otherwise, this is a
2450          * host memory allocation, use ddi_umem_alloc(). In either case,
2451          * return a pointer to the memory range allocated (including any
2452          * necessary alignment adjustments), the "real" memory pointer,
2453          * the "real" size, and a ddi_acc_handle_t to use when reading
2454          * from/writing to the memory.
2455          */
2456         if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) {
2457                 /* Allocate system memory for the queue */
2458                 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize,
2459                     &state->hs_reg_accattr, DDI_DMA_CONSISTENT, callback, NULL,
2460                     (caddr_t *)&qa_info->qa_buf_real,
2461                     (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl);
2462                 if (status != DDI_SUCCESS) {
2463                         ddi_dma_free_handle(&qa_info->qa_dmahdl);
2464                         return (DDI_FAILURE);
2465                 }
2466 
2467                 /*
2468                  * Save temporary copy of the real pointer.  (This may be
2469                  * modified in the last step below).
2470                  */
2471                 qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2472 
2473                 bzero(qa_info->qa_buf_real, qa_info->qa_buf_realsz);
2474 
2475         } else { /* HERMON_QUEUE_LOCATION_USERLAND */
2476 
2477                 /* Allocate userland mappable memory for the queue */
2478                 flag = (sleepflag == HERMON_SLEEP) ? DDI_UMEM_SLEEP :
2479                     DDI_UMEM_NOSLEEP;
2480                 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag,
2481                     &qa_info->qa_umemcookie);
2482                 if (qa_info->qa_buf_real == NULL) {
2483                         ddi_dma_free_handle(&qa_info->qa_dmahdl);
2484                         return (DDI_FAILURE);
2485                 }
2486 
2487                 /*
2488                  * Save temporary copy of the real pointer.  (This may be
2489                  * modified in the last step below).
2490                  */
2491                 qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2492 
2493         }
2494 
2495         /*
2496          * The next to last step is to ensure that the final address
2497          * ("qa_buf_aligned") has the appropriate "alloc" alignment
2498          * restriction applied to it (if necessary).
2499          */
2500         if (qa_info->qa_bind_align != qa_info->qa_alloc_align) {
2501                 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t)
2502                     qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask);
2503         }
2504         /*
2505          * The last step is to figure out the offset of the start relative
2506          * to the first page of the region - will be used in the eqc/cqc
2507          * passed to the HW
2508          */
2509         qa_info->qa_pgoffs = (uint_t)((uintptr_t)
2510             qa_info->qa_buf_aligned & HERMON_PAGEOFFSET);
2511 
2512         return (DDI_SUCCESS);
2513 }
2514 
2515 
2516 /*
2517  * hermon_queue_free()
2518  *    Context: Can be called from interrupt or base context.
2519  */
2520 void
2521 hermon_queue_free(hermon_qalloc_info_t *qa_info)
2522 {
2523         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2524 
2525         /*
2526          * Depending on how (i.e. from where) we allocated the memory for
2527          * this queue, we choose the appropriate method for releasing the
2528          * resources.
2529          */
2530         if (qa_info->qa_location == HERMON_QUEUE_LOCATION_NORMAL) {
2531 
2532                 ddi_dma_mem_free(&qa_info->qa_acchdl);
2533 
2534         } else if (qa_info->qa_location == HERMON_QUEUE_LOCATION_USERLAND) {
2535 
2536                 ddi_umem_free(qa_info->qa_umemcookie);
2537 
2538         }
2539 
2540         /* Always free the dma handle */
2541         ddi_dma_free_handle(&qa_info->qa_dmahdl);
2542 }
2543 
2544 /*
2545  * hermon_create_fmr_pool()
2546  * Create a pool of FMRs.
2547  *     Context: Can be called from kernel context only.
2548  */
2549 int
2550 hermon_create_fmr_pool(hermon_state_t *state, hermon_pdhdl_t pd,
2551     ibt_fmr_pool_attr_t *fmr_attr, hermon_fmrhdl_t *fmrpoolp)
2552 {
2553         hermon_fmrhdl_t fmrpool;
2554         hermon_fmr_list_t *fmr, *fmr_next;
2555         hermon_mrhdl_t   mr;
2556         int             status;
2557         int             sleep;
2558         int             i;
2559 
2560         sleep = (fmr_attr->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP :
2561             HERMON_NOSLEEP;
2562         if ((sleep == HERMON_SLEEP) &&
2563             (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
2564                 return (IBT_INVALID_PARAM);
2565         }
2566 
2567         fmrpool = (hermon_fmrhdl_t)kmem_zalloc(sizeof (*fmrpool), sleep);
2568         if (fmrpool == NULL) {
2569                 status = IBT_INSUFF_RESOURCE;
2570                 goto fail;
2571         }
2572         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmrpool))
2573 
2574         mutex_init(&fmrpool->fmr_lock, NULL, MUTEX_DRIVER,
2575             DDI_INTR_PRI(state->hs_intrmsi_pri));
2576         mutex_init(&fmrpool->remap_lock, NULL, MUTEX_DRIVER,
2577             DDI_INTR_PRI(state->hs_intrmsi_pri));
2578         mutex_init(&fmrpool->dirty_lock, NULL, MUTEX_DRIVER,
2579             DDI_INTR_PRI(state->hs_intrmsi_pri));
2580 
2581         fmrpool->fmr_state       = state;
2582         fmrpool->fmr_flush_function = fmr_attr->fmr_func_hdlr;
2583         fmrpool->fmr_flush_arg           = fmr_attr->fmr_func_arg;
2584         fmrpool->fmr_pool_size           = 0;
2585         fmrpool->fmr_max_pages           = fmr_attr->fmr_max_pages_per_fmr;
2586         fmrpool->fmr_page_sz     = fmr_attr->fmr_page_sz;
2587         fmrpool->fmr_dirty_watermark = fmr_attr->fmr_pool_size / 4;
2588         fmrpool->fmr_dirty_len           = 0;
2589         fmrpool->fmr_remap_watermark = fmr_attr->fmr_pool_size / 32;
2590         fmrpool->fmr_remap_len           = 0;
2591         fmrpool->fmr_flags       = fmr_attr->fmr_flags;
2592         fmrpool->fmr_stat_register  = 0;
2593         fmrpool->fmr_max_remaps          = state->hs_cfg_profile->cp_fmr_max_remaps;
2594         fmrpool->fmr_remap_gen           = 1;
2595 
2596         fmrpool->fmr_free_list_tail = &fmrpool->fmr_free_list;
2597         fmrpool->fmr_dirty_list = NULL;
2598         fmrpool->fmr_dirty_list_tail = &fmrpool->fmr_dirty_list;
2599         fmrpool->fmr_remap_list = NULL;
2600         fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list;
2601         fmrpool->fmr_pool_size = fmrpool->fmr_free_len =
2602             fmr_attr->fmr_pool_size;
2603 
2604         for (i = 0; i < fmr_attr->fmr_pool_size; i++) {
2605                 status = hermon_mr_alloc_fmr(state, pd, fmrpool, &mr);
2606                 if (status != DDI_SUCCESS) {
2607                         goto fail2;
2608                 }
2609 
2610                 fmr = (hermon_fmr_list_t *)kmem_zalloc(
2611                     sizeof (hermon_fmr_list_t), sleep);
2612                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2613 
2614                 fmr->fmr = mr;
2615                 fmr->fmr_remaps = 0;
2616                 fmr->fmr_remap_gen = fmrpool->fmr_remap_gen;
2617                 fmr->fmr_pool = fmrpool;
2618                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
2619                 mr->mr_fmr = fmr;
2620 
2621                 if (!i)         /* address of last entry's link */
2622                         fmrpool->fmr_free_list_tail = &fmr->fmr_next;
2623                 fmr->fmr_next = fmrpool->fmr_free_list;
2624                 fmrpool->fmr_free_list = fmr;
2625         }
2626 
2627         /* Set to return pool */
2628         *fmrpoolp = fmrpool;
2629 
2630         IBTF_DPRINTF_L2("fmr", "create_fmr_pool SUCCESS");
2631         return (IBT_SUCCESS);
2632 fail2:
2633         for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) {
2634                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2635                 fmr_next = fmr->fmr_next;
2636                 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr);
2637                 kmem_free(fmr, sizeof (hermon_fmr_list_t));
2638         }
2639         kmem_free(fmrpool, sizeof (*fmrpool));
2640 fail:
2641         *fmrpoolp = NULL;
2642         IBTF_DPRINTF_L2("fmr", "create_fmr_pool FAILED");
2643         if (status == DDI_FAILURE) {
2644                 return (ibc_get_ci_failure(0));
2645         } else {
2646                 return (status);
2647         }
2648 }
2649 
2650 /*
2651  * hermon_destroy_fmr_pool()
2652  * Destroy an FMR pool and free all associated resources.
2653  *     Context: Can be called from kernel context only.
2654  */
2655 int
2656 hermon_destroy_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool)
2657 {
2658         hermon_fmr_list_t       *fmr, *fmr_next;
2659 
2660         mutex_enter(&fmrpool->fmr_lock);
2661         hermon_fmr_cleanup(fmrpool);
2662 
2663         for (fmr = fmrpool->fmr_free_list; fmr != NULL; fmr = fmr_next) {
2664                 fmr_next = fmr->fmr_next;
2665 
2666                 (void) hermon_mr_dealloc_fmr(state, &fmr->fmr);
2667                 kmem_free(fmr, sizeof (hermon_fmr_list_t));
2668 
2669                 --fmrpool->fmr_pool_size;
2670         }
2671         ASSERT(fmrpool->fmr_pool_size == 0);
2672         mutex_exit(&fmrpool->fmr_lock);
2673 
2674         mutex_destroy(&fmrpool->fmr_lock);
2675         mutex_destroy(&fmrpool->dirty_lock);
2676         mutex_destroy(&fmrpool->remap_lock);
2677 
2678         kmem_free(fmrpool, sizeof (*fmrpool));
2679         IBTF_DPRINTF_L2("fmr", "destroy_fmr_pool SUCCESS");
2680         return (DDI_SUCCESS);
2681 }
2682 
2683 /*
2684  * hermon_flush_fmr_pool()
2685  * Ensure that all unmapped FMRs are fully invalidated.
2686  *     Context: Can be called from kernel context only.
2687  */
2688 /* ARGSUSED */
2689 int
2690 hermon_flush_fmr_pool(hermon_state_t *state, hermon_fmrhdl_t fmrpool)
2691 {
2692         /*
2693          * Force the unmapping of all entries on the dirty list, regardless of
2694          * whether the watermark has been hit yet.
2695          */
2696         /* grab the pool lock */
2697         mutex_enter(&fmrpool->fmr_lock);
2698         hermon_fmr_cleanup(fmrpool);
2699         mutex_exit(&fmrpool->fmr_lock);
2700         return (DDI_SUCCESS);
2701 }
2702 
2703 /*
2704  * hermon_register_physical_fmr()
2705  * Map memory into FMR
2706  *    Context: Can be called from interrupt or base context.
2707  */
2708 int
2709 hermon_register_physical_fmr(hermon_state_t *state, hermon_fmrhdl_t fmrpool,
2710     ibt_pmr_attr_t *mem_pattr, hermon_mrhdl_t *mr,
2711     ibt_pmr_desc_t *mem_desc_p)
2712 {
2713         hermon_fmr_list_t       *fmr;
2714         int                     status;
2715 
2716         /* Check length */
2717         if (mem_pattr->pmr_len < 1 || (mem_pattr->pmr_num_buf >
2718             fmrpool->fmr_max_pages)) {
2719                 return (IBT_MR_LEN_INVALID);
2720         }
2721 
2722         mutex_enter(&fmrpool->fmr_lock);
2723         if (fmrpool->fmr_free_list == NULL) {
2724                 if (hermon_fmr_verbose & 2)
2725                         IBTF_DPRINTF_L2("fmr", "register needs remap");
2726                 mutex_enter(&fmrpool->remap_lock);
2727                 if (fmrpool->fmr_remap_list) {
2728                         /* add to free list */
2729                         *(fmrpool->fmr_free_list_tail) =
2730                             fmrpool->fmr_remap_list;
2731                         fmrpool->fmr_remap_list = NULL;
2732                         fmrpool->fmr_free_list_tail =
2733                             fmrpool->fmr_remap_list_tail;
2734 
2735                         /* reset list */
2736                         fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list;
2737                         fmrpool->fmr_free_len += fmrpool->fmr_remap_len;
2738                         fmrpool->fmr_remap_len = 0;
2739                 }
2740                 mutex_exit(&fmrpool->remap_lock);
2741         }
2742         if (fmrpool->fmr_free_list == NULL) {
2743                 if (hermon_fmr_verbose & 2)
2744                         IBTF_DPRINTF_L2("fmr", "register needs cleanup");
2745                 hermon_fmr_cleanup(fmrpool);
2746         }
2747 
2748         /* grab next free entry */
2749         fmr = fmrpool->fmr_free_list;
2750         if (fmr == NULL) {
2751                 IBTF_DPRINTF_L2("fmr", "WARNING: no free fmr resource");
2752                 cmn_err(CE_CONT, "no free fmr resource\n");
2753                 mutex_exit(&fmrpool->fmr_lock);
2754                 return (IBT_INSUFF_RESOURCE);
2755         }
2756 
2757         if ((fmrpool->fmr_free_list = fmr->fmr_next) == NULL)
2758                 fmrpool->fmr_free_list_tail = &fmrpool->fmr_free_list;
2759         fmr->fmr_next = NULL;
2760         fmrpool->fmr_stat_register++;
2761         mutex_exit(&fmrpool->fmr_lock);
2762 
2763         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2764         status = hermon_mr_register_physical_fmr(state, mem_pattr, fmr->fmr,
2765             mem_desc_p);
2766         if (status != DDI_SUCCESS) {
2767                 return (status);
2768         }
2769         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr->fmr))
2770         if (hermon_rdma_debug & 0x4)
2771                 IBTF_DPRINTF_L2("fmr", "  reg: mr %p  key %x",
2772                     fmr->fmr, fmr->fmr->mr_rkey);
2773         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*fmr->fmr))
2774         if (fmr->fmr_remap_gen != fmrpool->fmr_remap_gen) {
2775                 fmr->fmr_remap_gen = fmrpool->fmr_remap_gen;
2776                 fmr->fmr_remaps = 0;
2777         }
2778 
2779         fmr->fmr_remaps++;
2780 
2781         *mr = (hermon_mrhdl_t)fmr->fmr;
2782 
2783         return (DDI_SUCCESS);
2784 }
2785 
2786 /*
2787  * hermon_deregister_fmr()
2788  * Unmap FMR
2789  *    Context: Can be called from kernel context only.
2790  */
2791 int
2792 hermon_deregister_fmr(hermon_state_t *state, hermon_mrhdl_t mr)
2793 {
2794         hermon_fmrhdl_t         fmrpool;
2795         hermon_fmr_list_t       *fmr, **fmrlast;
2796         int                     len;
2797 
2798         fmr = mr->mr_fmr;
2799         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*fmr))
2800         fmrpool = fmr->fmr_pool;
2801 
2802         /* mark as owned by software */
2803         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr)))
2804         *(uint8_t *)(fmr->fmr->mr_mptrsrcp->hr_addr) = 0xF0;
2805 
2806         if (fmr->fmr_remaps <
2807             state->hs_cfg_profile->cp_fmr_max_remaps) {
2808                 /* add to remap list */
2809                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr)))
2810                 if (hermon_rdma_debug & 0x4)
2811                         IBTF_DPRINTF_L2("fmr", "dereg: mr %p  key %x",
2812                             fmr->fmr, fmr->fmr->mr_rkey);
2813                 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr)))
2814                 mutex_enter(&fmrpool->remap_lock);
2815                 fmr->fmr_next = NULL;
2816                 *(fmrpool->fmr_remap_list_tail) = fmr;
2817                 fmrpool->fmr_remap_list_tail = &fmr->fmr_next;
2818                 fmrpool->fmr_remap_len++;
2819 
2820                 /* conditionally add remap list back to free list */
2821                 fmrlast = NULL;
2822                 if (fmrpool->fmr_remap_len >=
2823                     fmrpool->fmr_remap_watermark) {
2824                         fmr = fmrpool->fmr_remap_list;
2825                         fmrlast = fmrpool->fmr_remap_list_tail;
2826                         len = fmrpool->fmr_remap_len;
2827                         fmrpool->fmr_remap_len = 0;
2828                         fmrpool->fmr_remap_list = NULL;
2829                         fmrpool->fmr_remap_list_tail =
2830                             &fmrpool->fmr_remap_list;
2831                 }
2832                 mutex_exit(&fmrpool->remap_lock);
2833                 if (fmrlast) {
2834                         mutex_enter(&fmrpool->fmr_lock);
2835                         *(fmrpool->fmr_free_list_tail) = fmr;
2836                         fmrpool->fmr_free_list_tail = fmrlast;
2837                         fmrpool->fmr_free_len += len;
2838                         mutex_exit(&fmrpool->fmr_lock);
2839                 }
2840         } else {
2841                 /* add to dirty list */
2842                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(fmr->fmr)))
2843                 if (hermon_rdma_debug & 0x4)
2844                         IBTF_DPRINTF_L2("fmr", "dirty: mr %p  key %x",
2845                             fmr->fmr, fmr->fmr->mr_rkey);
2846                 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*(fmr->fmr)))
2847 
2848                 mutex_enter(&fmrpool->dirty_lock);
2849                 fmr->fmr_next = NULL;
2850                 *(fmrpool->fmr_dirty_list_tail) = fmr;
2851                 fmrpool->fmr_dirty_list_tail = &fmr->fmr_next;
2852                 fmrpool->fmr_dirty_len++;
2853 
2854                 if (fmrpool->fmr_dirty_len >=
2855                     fmrpool->fmr_dirty_watermark) {
2856                         mutex_exit(&fmrpool->dirty_lock);
2857                         mutex_enter(&fmrpool->fmr_lock);
2858                         hermon_fmr_cleanup(fmrpool);
2859                         mutex_exit(&fmrpool->fmr_lock);
2860                 } else
2861                         mutex_exit(&fmrpool->dirty_lock);
2862         }
2863         return (DDI_SUCCESS);
2864 }
2865 
2866 /*
2867  * hermon_fmr_cleanup()
2868  *     Context: Called from any context.
2869  */
2870 static void
2871 hermon_fmr_cleanup(hermon_fmrhdl_t fmrpool)
2872 {
2873         int                     status;
2874 
2875         ASSERT(MUTEX_HELD(&fmrpool->fmr_lock));
2876 
2877         if (fmrpool->fmr_stat_register == 0)
2878                 return;
2879 
2880         fmrpool->fmr_stat_register = 0;
2881         membar_producer();
2882 
2883         if (hermon_fmr_verbose)
2884                 IBTF_DPRINTF_L2("fmr", "TPT_SYNC");
2885         status = hermon_sync_tpt_cmd_post(fmrpool->fmr_state,
2886             HERMON_CMD_NOSLEEP_SPIN);
2887         if (status != HERMON_CMD_SUCCESS) {
2888                 cmn_err(CE_WARN, "fmr SYNC_TPT failed(%x)\n", status);
2889         }
2890         fmrpool->fmr_remap_gen++;
2891 
2892         /* add everything back to the free list */
2893         mutex_enter(&fmrpool->dirty_lock);
2894         if (fmrpool->fmr_dirty_list) {
2895                 /* add to free list */
2896                 *(fmrpool->fmr_free_list_tail) = fmrpool->fmr_dirty_list;
2897                 fmrpool->fmr_dirty_list = NULL;
2898                 fmrpool->fmr_free_list_tail = fmrpool->fmr_dirty_list_tail;
2899 
2900                 /* reset list */
2901                 fmrpool->fmr_dirty_list_tail = &fmrpool->fmr_dirty_list;
2902                 fmrpool->fmr_free_len += fmrpool->fmr_dirty_len;
2903                 fmrpool->fmr_dirty_len = 0;
2904         }
2905         mutex_exit(&fmrpool->dirty_lock);
2906 
2907         mutex_enter(&fmrpool->remap_lock);
2908         if (fmrpool->fmr_remap_list) {
2909                 /* add to free list */
2910                 *(fmrpool->fmr_free_list_tail) = fmrpool->fmr_remap_list;
2911                 fmrpool->fmr_remap_list = NULL;
2912                 fmrpool->fmr_free_list_tail = fmrpool->fmr_remap_list_tail;
2913 
2914                 /* reset list */
2915                 fmrpool->fmr_remap_list_tail = &fmrpool->fmr_remap_list;
2916                 fmrpool->fmr_free_len += fmrpool->fmr_remap_len;
2917                 fmrpool->fmr_remap_len = 0;
2918         }
2919         mutex_exit(&fmrpool->remap_lock);
2920 
2921         if (fmrpool->fmr_flush_function != NULL) {
2922                 (void) fmrpool->fmr_flush_function(
2923                     (ibc_fmr_pool_hdl_t)fmrpool,
2924                     fmrpool->fmr_flush_arg);
2925         }
2926 }