1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * hermon_srq.c
  28  *    Hermon Shared Receive Queue Processing Routines
  29  *
  30  *    Implements all the routines necessary for allocating, freeing, querying,
  31  *    modifying and posting shared receive queues.
  32  */
  33 
  34 #include <sys/sysmacros.h>
  35 #include <sys/types.h>
  36 #include <sys/conf.h>
  37 #include <sys/ddi.h>
  38 #include <sys/sunddi.h>
  39 #include <sys/modctl.h>
  40 #include <sys/bitmap.h>
  41 
  42 #include <sys/ib/adapters/hermon/hermon.h>
  43 
  44 static void hermon_srq_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
  45     hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl);
  46 
  47 /*
  48  * hermon_srq_alloc()
  49  *    Context: Can be called only from user or kernel context.
  50  */
  51 int
  52 hermon_srq_alloc(hermon_state_t *state, hermon_srq_info_t *srqinfo,
  53     uint_t sleepflag)
  54 {
  55         ibt_srq_hdl_t           ibt_srqhdl;
  56         hermon_pdhdl_t          pd;
  57         ibt_srq_sizes_t         *sizes;
  58         ibt_srq_sizes_t         *real_sizes;
  59         hermon_srqhdl_t         *srqhdl;
  60         ibt_srq_flags_t         flags;
  61         hermon_rsrc_t           *srqc, *rsrc;
  62         hermon_hw_srqc_t        srqc_entry;
  63         uint32_t                *buf;
  64         hermon_srqhdl_t         srq;
  65         hermon_umap_db_entry_t  *umapdb;
  66         ibt_mr_attr_t           mr_attr;
  67         hermon_mr_options_t     mr_op;
  68         hermon_mrhdl_t          mr;
  69         uint64_t                value, srq_desc_off;
  70         uint32_t                log_srq_size;
  71         uint32_t                uarpg;
  72         uint_t                  srq_is_umap;
  73         int                     flag, status;
  74         uint_t                  max_sgl;
  75         uint_t                  wqesz;
  76         uint_t                  srq_wr_sz;
  77 
  78         /*
  79          * options-->wq_location used to be for location, now explicitly
  80          * LOCATION_NORMAL
  81          */
  82 
  83         /*
  84          * Extract the necessary info from the hermon_srq_info_t structure
  85          */
  86         real_sizes = srqinfo->srqi_real_sizes;
  87         sizes      = srqinfo->srqi_sizes;
  88         pd         = srqinfo->srqi_pd;
  89         ibt_srqhdl = srqinfo->srqi_ibt_srqhdl;
  90         flags      = srqinfo->srqi_flags;
  91         srqhdl     = srqinfo->srqi_srqhdl;
  92 
  93         /*
  94          * Determine whether SRQ is being allocated for userland access or
  95          * whether it is being allocated for kernel access.  If the SRQ is
  96          * being allocated for userland access, then lookup the UAR doorbell
  97          * page number for the current process.  Note:  If this is not found
  98          * (e.g. if the process has not previously open()'d the Hermon driver),
  99          * then an error is returned.
 100          */
 101         srq_is_umap = (flags & IBT_SRQ_USER_MAP) ? 1 : 0;
 102         if (srq_is_umap) {
 103                 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
 104                     MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
 105                 if (status != DDI_SUCCESS) {
 106                         status = IBT_INVALID_PARAM;
 107                         goto srqalloc_fail3;
 108                 }
 109                 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
 110         } else {
 111                 uarpg = state->hs_kernel_uar_index;
 112         }
 113 
 114         /* Increase PD refcnt */
 115         hermon_pd_refcnt_inc(pd);
 116 
 117         /* Allocate an SRQ context entry */
 118         status = hermon_rsrc_alloc(state, HERMON_SRQC, 1, sleepflag, &srqc);
 119         if (status != DDI_SUCCESS) {
 120                 status = IBT_INSUFF_RESOURCE;
 121                 goto srqalloc_fail1;
 122         }
 123 
 124         /* Allocate the SRQ Handle entry */
 125         status = hermon_rsrc_alloc(state, HERMON_SRQHDL, 1, sleepflag, &rsrc);
 126         if (status != DDI_SUCCESS) {
 127                 status = IBT_INSUFF_RESOURCE;
 128                 goto srqalloc_fail2;
 129         }
 130 
 131         srq = (hermon_srqhdl_t)rsrc->hr_addr;
 132 
 133         bzero(srq, sizeof (struct hermon_sw_srq_s));
 134         /* Calculate the SRQ number */
 135 
 136         /* just use the index, implicit in Hermon */
 137         srq->srq_srqnum = srqc->hr_indx;
 138 
 139         /*
 140          * If this will be a user-mappable SRQ, then allocate an entry for
 141          * the "userland resources database".  This will later be added to
 142          * the database (after all further SRQ operations are successful).
 143          * If we fail here, we must undo the reference counts and the
 144          * previous resource allocation.
 145          */
 146         if (srq_is_umap) {
 147                 umapdb = hermon_umap_db_alloc(state->hs_instance,
 148                     srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC,
 149                     (uint64_t)(uintptr_t)rsrc);
 150                 if (umapdb == NULL) {
 151                         status = IBT_INSUFF_RESOURCE;
 152                         goto srqalloc_fail3;
 153                 }
 154         }
 155 
 156         /*
 157          * Allocate the doorbell record.  Hermon just needs one for the
 158          * SRQ, and use uarpg (above) as the uar index
 159          */
 160 
 161         status = hermon_dbr_alloc(state, uarpg, &srq->srq_wq_dbr_acchdl,
 162             &srq->srq_wq_vdbr, &srq->srq_wq_pdbr, &srq->srq_rdbr_mapoffset);
 163         if (status != DDI_SUCCESS) {
 164                 status = IBT_INSUFF_RESOURCE;
 165                 goto srqalloc_fail4;
 166         }
 167 
 168         /*
 169          * Calculate the appropriate size for the SRQ.
 170          * Note:  All Hermon SRQs must be a power-of-2 in size.  Also
 171          * they may not be any smaller than HERMON_SRQ_MIN_SIZE.  This step
 172          * is to round the requested size up to the next highest power-of-2
 173          */
 174         srq_wr_sz = max(sizes->srq_wr_sz + 1, HERMON_SRQ_MIN_SIZE);
 175         log_srq_size = highbit(srq_wr_sz);
 176         if (ISP2(srq_wr_sz)) {
 177                 log_srq_size = log_srq_size - 1;
 178         }
 179 
 180         /*
 181          * Next we verify that the rounded-up size is valid (i.e. consistent
 182          * with the device limits and/or software-configured limits).  If not,
 183          * then obviously we have a lot of cleanup to do before returning.
 184          */
 185         if (log_srq_size > state->hs_cfg_profile->cp_log_max_srq_sz) {
 186                 status = IBT_HCA_WR_EXCEEDED;
 187                 goto srqalloc_fail4a;
 188         }
 189 
 190         /*
 191          * Next we verify that the requested number of SGL is valid (i.e.
 192          * consistent with the device limits and/or software-configured
 193          * limits).  If not, then obviously the same cleanup needs to be done.
 194          */
 195         max_sgl = state->hs_ibtfinfo.hca_attr->hca_max_srq_sgl;
 196         if (sizes->srq_sgl_sz > max_sgl) {
 197                 status = IBT_HCA_SGL_EXCEEDED;
 198                 goto srqalloc_fail4a;
 199         }
 200 
 201         /*
 202          * Determine the SRQ's WQE sizes.  This depends on the requested
 203          * number of SGLs.  Note: This also has the side-effect of
 204          * calculating the real number of SGLs (for the calculated WQE size)
 205          */
 206         hermon_srq_sgl_to_logwqesz(state, sizes->srq_sgl_sz,
 207             HERMON_QP_WQ_TYPE_RECVQ, &srq->srq_wq_log_wqesz,
 208             &srq->srq_wq_sgl);
 209 
 210         /*
 211          * Allocate the memory for SRQ work queues.  Note:  The location from
 212          * which we will allocate these work queues is always
 213          * QUEUE_LOCATION_NORMAL.  Since Hermon work queues are not
 214          * allowed to cross a 32-bit (4GB) boundary, the alignment of the work
 215          * queue memory is very important.  We used to allocate work queues
 216          * (the combined receive and send queues) so that they would be aligned
 217          * on their combined size.  That alignment guaranteed that they would
 218          * never cross the 4GB boundary (Hermon work queues are on the order of
 219          * MBs at maximum).  Now we are able to relax this alignment constraint
 220          * by ensuring that the IB address assigned to the queue memory (as a
 221          * result of the hermon_mr_register() call) is offset from zero.
 222          * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
 223          * guarantee the alignment, but when attempting to use IOMMU bypass
 224          * mode we found that we were not allowed to specify any alignment that
 225          * was more restrictive than the system page size.  So we avoided this
 226          * constraint by passing two alignment values, one for the memory
 227          * allocation itself and the other for the DMA handle (for later bind).
 228          * This used to cause more memory than necessary to be allocated (in
 229          * order to guarantee the more restrictive alignment contraint).  But
 230          * be guaranteeing the zero-based IB virtual address for the queue, we
 231          * are able to conserve this memory.
 232          *
 233          * Note: If SRQ is not user-mappable, then it may come from either
 234          * kernel system memory or from HCA-attached local DDR memory.
 235          *
 236          * Note2: We align this queue on a pagesize boundary.  This is required
 237          * to make sure that all the resulting IB addresses will start at 0, for
 238          * a zero-based queue.  By making sure we are aligned on at least a
 239          * page, any offset we use into our queue will be the same as when we
 240          * perform hermon_srq_modify() operations later.
 241          */
 242         wqesz = (1 << srq->srq_wq_log_wqesz);
 243         srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz;
 244         srq->srq_wqinfo.qa_alloc_align = PAGESIZE;
 245         srq->srq_wqinfo.qa_bind_align = PAGESIZE;
 246         if (srq_is_umap) {
 247                 srq->srq_wqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
 248         } else {
 249                 srq->srq_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
 250         }
 251         status = hermon_queue_alloc(state, &srq->srq_wqinfo, sleepflag);
 252         if (status != DDI_SUCCESS) {
 253                 status = IBT_INSUFF_RESOURCE;
 254                 goto srqalloc_fail4a;
 255         }
 256         buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned;
 257 
 258         /*
 259          * Register the memory for the SRQ work queues.  The memory for the SRQ
 260          * must be registered in the Hermon cMPT tables.  This gives us the LKey
 261          * to specify in the SRQ context later.  Note: If the work queue is to
 262          * be allocated from DDR memory, then only a "bypass" mapping is
 263          * appropriate.  And if the SRQ memory is user-mappable, then we force
 264          * DDI_DMA_CONSISTENT mapping.  Also, in order to meet the alignment
 265          * restriction, we pass the "mro_bind_override_addr" flag in the call
 266          * to hermon_mr_register().  This guarantees that the resulting IB vaddr
 267          * will be zero-based (modulo the offset into the first page).  If we
 268          * fail here, we still have the bunch of resource and reference count
 269          * cleanup to do.
 270          */
 271         flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
 272             IBT_MR_NOSLEEP;
 273         mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
 274         mr_attr.mr_len   = srq->srq_wqinfo.qa_size;
 275         mr_attr.mr_as    = NULL;
 276         mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
 277         mr_op.mro_bind_type   = state->hs_cfg_profile->cp_iommu_bypass;
 278         mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl;
 279         mr_op.mro_bind_override_addr = 1;
 280         status = hermon_mr_register(state, pd, &mr_attr, &mr,
 281             &mr_op, HERMON_SRQ_CMPT);
 282         if (status != DDI_SUCCESS) {
 283                 status = IBT_INSUFF_RESOURCE;
 284                 goto srqalloc_fail5;
 285         }
 286 
 287         /*
 288          * Calculate the offset between the kernel virtual address space
 289          * and the IB virtual address space.  This will be used when
 290          * posting work requests to properly initialize each WQE.
 291          */
 292         srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
 293             (uint64_t)mr->mr_bindinfo.bi_addr;
 294 
 295         srq->srq_wq_wqhdr = hermon_wrid_wqhdr_create(1 << log_srq_size);
 296 
 297         /*
 298          * Fill in all the return arguments (if necessary).  This includes
 299          * real queue size and real SGLs.
 300          */
 301         if (real_sizes != NULL) {
 302                 real_sizes->srq_wr_sz = (1 << log_srq_size) - 1;
 303                 real_sizes->srq_sgl_sz = srq->srq_wq_sgl;
 304         }
 305 
 306         /*
 307          * Fill in the SRQC entry.  This is the final step before passing
 308          * ownership of the SRQC entry to the Hermon hardware.  We use all of
 309          * the information collected/calculated above to fill in the
 310          * requisite portions of the SRQC.  Note: If this SRQ is going to be
 311          * used for userland access, then we need to set the UAR page number
 312          * appropriately (otherwise it's a "don't care")
 313          */
 314         bzero(&srqc_entry, sizeof (hermon_hw_srqc_t));
 315         srqc_entry.state           = HERMON_SRQ_STATE_HW_OWNER;
 316         srqc_entry.log_srq_size    = log_srq_size;
 317         srqc_entry.srqn            = srq->srq_srqnum;
 318         srqc_entry.log_rq_stride   = srq->srq_wq_log_wqesz - 4;
 319                                         /* 16-byte chunks */
 320 
 321         srqc_entry.page_offs       = srq->srq_wqinfo.qa_pgoffs >> 6;
 322         srqc_entry.log2_pgsz       = mr->mr_log2_pgsz;
 323         srqc_entry.mtt_base_addrh  = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
 324         srqc_entry.mtt_base_addrl  = mr->mr_mttaddr >> 3;
 325         srqc_entry.pd              = pd->pd_pdnum;
 326         srqc_entry.dbr_addrh = (uint32_t)((uint64_t)srq->srq_wq_pdbr >> 32);
 327         srqc_entry.dbr_addrl = (uint32_t)((uint64_t)srq->srq_wq_pdbr >> 2);
 328 
 329         /*
 330          * all others - specifically, xrcd, cqn_xrc, lwm, wqe_cnt, and wqe_cntr
 331          * are zero thanks to the bzero of the structure
 332          */
 333 
 334         /*
 335          * Write the SRQC entry to hardware.  Lastly, we pass ownership of
 336          * the entry to the hardware (using the Hermon SW2HW_SRQ firmware
 337          * command).  Note: In general, this operation shouldn't fail.  But
 338          * if it does, we have to undo everything we've done above before
 339          * returning error.
 340          */
 341         status = hermon_cmn_ownership_cmd_post(state, SW2HW_SRQ, &srqc_entry,
 342             sizeof (hermon_hw_srqc_t), srq->srq_srqnum,
 343             sleepflag);
 344         if (status != HERMON_CMD_SUCCESS) {
 345                 cmn_err(CE_CONT, "Hermon: SW2HW_SRQ command failed: %08x\n",
 346                     status);
 347                 if (status == HERMON_CMD_INVALID_STATUS) {
 348                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 349                 }
 350                 status = ibc_get_ci_failure(0);
 351                 goto srqalloc_fail8;
 352         }
 353 
 354         /*
 355          * Fill in the rest of the Hermon SRQ handle.  We can update
 356          * the following fields for use in further operations on the SRQ.
 357          */
 358         srq->srq_srqcrsrcp = srqc;
 359         srq->srq_rsrcp          = rsrc;
 360         srq->srq_mrhdl          = mr;
 361         srq->srq_refcnt         = 0;
 362         srq->srq_is_umap   = srq_is_umap;
 363         srq->srq_uarpg          = uarpg;
 364         srq->srq_umap_dhp  = (devmap_cookie_t)NULL;
 365         srq->srq_pdhdl          = pd;
 366         srq->srq_wq_bufsz  = (1 << log_srq_size);
 367         srq->srq_wq_buf         = buf;
 368         srq->srq_desc_off  = srq_desc_off;
 369         srq->srq_hdlrarg   = (void *)ibt_srqhdl;
 370         srq->srq_state          = 0;
 371         srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
 372         srq->srq_real_sizes.srq_sgl_sz = srq->srq_wq_sgl;
 373 
 374         /*
 375          * Put SRQ handle in Hermon SRQNum-to-SRQhdl list.  Then fill in the
 376          * "srqhdl" and return success
 377          */
 378         hermon_icm_set_num_to_hdl(state, HERMON_SRQC, srqc->hr_indx, srq);
 379 
 380         /*
 381          * If this is a user-mappable SRQ, then we need to insert the
 382          * previously allocated entry into the "userland resources database".
 383          * This will allow for later lookup during devmap() (i.e. mmap())
 384          * calls.
 385          */
 386         if (srq->srq_is_umap) {
 387                 hermon_umap_db_add(umapdb);
 388         } else {        /* initialize work queue for kernel SRQs */
 389                 int i, len, last;
 390                 uint16_t *desc;
 391 
 392                 desc = (uint16_t *)buf;
 393                 len = wqesz / sizeof (*desc);
 394                 last = srq->srq_wq_bufsz - 1;
 395                 for (i = 0; i < last; i++) {
 396                         desc[1] = htons(i + 1);
 397                         desc += len;
 398                 }
 399                 srq->srq_wq_wqhdr->wq_tail = last;
 400                 srq->srq_wq_wqhdr->wq_head = 0;
 401         }
 402 
 403         *srqhdl = srq;
 404 
 405         return (status);
 406 
 407 /*
 408  * The following is cleanup for all possible failure cases in this routine
 409  */
 410 srqalloc_fail8:
 411         hermon_wrid_wqhdr_destroy(srq->srq_wq_wqhdr);
 412 srqalloc_fail7:
 413         if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
 414             HERMON_SLEEPFLAG_FOR_CONTEXT()) != DDI_SUCCESS) {
 415                 HERMON_WARNING(state, "failed to deregister SRQ memory");
 416         }
 417 srqalloc_fail5:
 418         hermon_queue_free(&srq->srq_wqinfo);
 419 srqalloc_fail4a:
 420         hermon_dbr_free(state, uarpg, srq->srq_wq_vdbr);
 421 srqalloc_fail4:
 422         if (srq_is_umap) {
 423                 hermon_umap_db_free(umapdb);
 424         }
 425 srqalloc_fail3:
 426         hermon_rsrc_free(state, &rsrc);
 427 srqalloc_fail2:
 428         hermon_rsrc_free(state, &srqc);
 429 srqalloc_fail1:
 430         hermon_pd_refcnt_dec(pd);
 431 srqalloc_fail:
 432         return (status);
 433 }
 434 
 435 
 436 /*
 437  * hermon_srq_free()
 438  *    Context: Can be called only from user or kernel context.
 439  */
 440 /* ARGSUSED */
 441 int
 442 hermon_srq_free(hermon_state_t *state, hermon_srqhdl_t *srqhdl,
 443     uint_t sleepflag)
 444 {
 445         hermon_rsrc_t           *srqc, *rsrc;
 446         hermon_umap_db_entry_t  *umapdb;
 447         uint64_t                value;
 448         hermon_srqhdl_t         srq;
 449         hermon_mrhdl_t          mr;
 450         hermon_pdhdl_t          pd;
 451         hermon_hw_srqc_t        srqc_entry;
 452         uint32_t                srqnum;
 453         uint_t                  maxprot;
 454         int                     status;
 455 
 456         /*
 457          * Pull all the necessary information from the Hermon Shared Receive
 458          * Queue handle.  This is necessary here because the resource for the
 459          * SRQ handle is going to be freed up as part of this operation.
 460          */
 461         srq     = *srqhdl;
 462         mutex_enter(&srq->srq_lock);
 463         srqc    = srq->srq_srqcrsrcp;
 464         rsrc    = srq->srq_rsrcp;
 465         pd      = srq->srq_pdhdl;
 466         mr      = srq->srq_mrhdl;
 467         srqnum  = srq->srq_srqnum;
 468 
 469         /*
 470          * If there are work queues still associated with the SRQ, then return
 471          * an error.  Otherwise, we will be holding the SRQ lock.
 472          */
 473         if (srq->srq_refcnt != 0) {
 474                 mutex_exit(&srq->srq_lock);
 475                 return (IBT_SRQ_IN_USE);
 476         }
 477 
 478         /*
 479          * If this was a user-mappable SRQ, then we need to remove its entry
 480          * from the "userland resources database".  If it is also currently
 481          * mmap()'d out to a user process, then we need to call
 482          * devmap_devmem_remap() to remap the SRQ memory to an invalid mapping.
 483          * We also need to invalidate the SRQ tracking information for the
 484          * user mapping.
 485          */
 486         if (srq->srq_is_umap) {
 487                 status = hermon_umap_db_find(state->hs_instance,
 488                     srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC, &value,
 489                     HERMON_UMAP_DB_REMOVE, &umapdb);
 490                 if (status != DDI_SUCCESS) {
 491                         mutex_exit(&srq->srq_lock);
 492                         HERMON_WARNING(state, "failed to find in database");
 493                         return (ibc_get_ci_failure(0));
 494                 }
 495                 hermon_umap_db_free(umapdb);
 496                 if (srq->srq_umap_dhp != NULL) {
 497                         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 498                         status = devmap_devmem_remap(srq->srq_umap_dhp,
 499                             state->hs_dip, 0, 0, srq->srq_wqinfo.qa_size,
 500                             maxprot, DEVMAP_MAPPING_INVALID, NULL);
 501                         if (status != DDI_SUCCESS) {
 502                                 mutex_exit(&srq->srq_lock);
 503                                 HERMON_WARNING(state, "failed in SRQ memory "
 504                                     "devmap_devmem_remap()");
 505                                 return (ibc_get_ci_failure(0));
 506                         }
 507                         srq->srq_umap_dhp = (devmap_cookie_t)NULL;
 508                 }
 509         }
 510 
 511         /*
 512          * Put NULL into the Hermon SRQNum-to-SRQHdl list.  This will allow any
 513          * in-progress events to detect that the SRQ corresponding to this
 514          * number has been freed.
 515          */
 516         hermon_icm_set_num_to_hdl(state, HERMON_SRQC, srqc->hr_indx, NULL);
 517 
 518         mutex_exit(&srq->srq_lock);
 519 
 520         /*
 521          * Reclaim SRQC entry from hardware (using the Hermon HW2SW_SRQ
 522          * firmware command).  If the ownership transfer fails for any reason,
 523          * then it is an indication that something (either in HW or SW) has
 524          * gone seriously wrong.
 525          */
 526         status = hermon_cmn_ownership_cmd_post(state, HW2SW_SRQ, &srqc_entry,
 527             sizeof (hermon_hw_srqc_t), srqnum, sleepflag);
 528         if (status != HERMON_CMD_SUCCESS) {
 529                 HERMON_WARNING(state, "failed to reclaim SRQC ownership");
 530                 cmn_err(CE_CONT, "Hermon: HW2SW_SRQ command failed: %08x\n",
 531                     status);
 532                 if (status == HERMON_CMD_INVALID_STATUS) {
 533                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 534                 }
 535                 return (ibc_get_ci_failure(0));
 536         }
 537 
 538         /*
 539          * Deregister the memory for the Shared Receive Queue.  If this fails
 540          * for any reason, then it is an indication that something (either
 541          * in HW or SW) has gone seriously wrong.  So we print a warning
 542          * message and return.
 543          */
 544         status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
 545             sleepflag);
 546         if (status != DDI_SUCCESS) {
 547                 HERMON_WARNING(state, "failed to deregister SRQ memory");
 548                 return (IBT_FAILURE);
 549         }
 550 
 551         hermon_wrid_wqhdr_destroy(srq->srq_wq_wqhdr);
 552 
 553         /* Free the memory for the SRQ */
 554         hermon_queue_free(&srq->srq_wqinfo);
 555 
 556         /* Free the dbr */
 557         hermon_dbr_free(state, srq->srq_uarpg, srq->srq_wq_vdbr);
 558 
 559         /* Free the Hermon SRQ Handle */
 560         hermon_rsrc_free(state, &rsrc);
 561 
 562         /* Free the SRQC entry resource */
 563         hermon_rsrc_free(state, &srqc);
 564 
 565         /* Decrement the reference count on the protection domain (PD) */
 566         hermon_pd_refcnt_dec(pd);
 567 
 568         /* Set the srqhdl pointer to NULL and return success */
 569         *srqhdl = NULL;
 570 
 571         return (DDI_SUCCESS);
 572 }
 573 
 574 
 575 /*
 576  * hermon_srq_modify()
 577  *    Context: Can be called only from user or kernel context.
 578  */
 579 int
 580 hermon_srq_modify(hermon_state_t *state, hermon_srqhdl_t srq, uint_t size,
 581     uint_t *real_size, uint_t sleepflag)
 582 {
 583         hermon_qalloc_info_t    new_srqinfo, old_srqinfo;
 584         hermon_rsrc_t           *mtt, *old_mtt;
 585         hermon_bind_info_t      bind;
 586         hermon_bind_info_t      old_bind;
 587         hermon_mrhdl_t          mr;
 588         hermon_hw_srqc_t        srqc_entry;
 589         hermon_hw_dmpt_t        mpt_entry;
 590         uint64_t                *wre_new, *wre_old;
 591         uint64_t                mtt_addr;
 592         uint64_t                srq_pgoffs;
 593         uint64_t                srq_desc_off;
 594         uint32_t                *buf, srq_old_bufsz;
 595         uint32_t                wqesz;
 596         uint_t                  max_srq_size;
 597         uint_t                  mtt_pgsize_bits;
 598         uint_t                  log_srq_size, maxprot;
 599         int                     status;
 600 
 601         if ((state->hs_devlim.mod_wr_srq == 0) ||
 602             (state->hs_cfg_profile->cp_srq_resize_enabled == 0))
 603                 return (IBT_NOT_SUPPORTED);
 604 
 605         /*
 606          * If size requested is larger than device capability, return
 607          * Insufficient Resources
 608          */
 609         max_srq_size = (1 << state->hs_cfg_profile->cp_log_max_srq_sz);
 610         if (size > max_srq_size) {
 611                 return (IBT_HCA_WR_EXCEEDED);
 612         }
 613 
 614         /*
 615          * Calculate the appropriate size for the SRQ.
 616          * Note:  All Hermon SRQs must be a power-of-2 in size.  Also
 617          * they may not be any smaller than HERMON_SRQ_MIN_SIZE.  This step
 618          * is to round the requested size up to the next highest power-of-2
 619          */
 620         size = max(size, HERMON_SRQ_MIN_SIZE);
 621         log_srq_size = highbit(size);
 622         if (ISP2(size)) {
 623                 log_srq_size = log_srq_size - 1;
 624         }
 625 
 626         /*
 627          * Next we verify that the rounded-up size is valid (i.e. consistent
 628          * with the device limits and/or software-configured limits).
 629          */
 630         if (log_srq_size > state->hs_cfg_profile->cp_log_max_srq_sz) {
 631                 status = IBT_HCA_WR_EXCEEDED;
 632                 goto srqmodify_fail;
 633         }
 634 
 635         /*
 636          * Allocate the memory for newly resized Shared Receive Queue.
 637          *
 638          * Note: If SRQ is not user-mappable, then it may come from either
 639          * kernel system memory or from HCA-attached local DDR memory.
 640          *
 641          * Note2: We align this queue on a pagesize boundary.  This is required
 642          * to make sure that all the resulting IB addresses will start at 0,
 643          * for a zero-based queue.  By making sure we are aligned on at least a
 644          * page, any offset we use into our queue will be the same as it was
 645          * when we allocated it at hermon_srq_alloc() time.
 646          */
 647         wqesz = (1 << srq->srq_wq_log_wqesz);
 648         new_srqinfo.qa_size = (1 << log_srq_size) * wqesz;
 649         new_srqinfo.qa_alloc_align = PAGESIZE;
 650         new_srqinfo.qa_bind_align  = PAGESIZE;
 651         if (srq->srq_is_umap) {
 652                 new_srqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
 653         } else {
 654                 new_srqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
 655         }
 656         status = hermon_queue_alloc(state, &new_srqinfo, sleepflag);
 657         if (status != DDI_SUCCESS) {
 658                 status = IBT_INSUFF_RESOURCE;
 659                 goto srqmodify_fail;
 660         }
 661         buf = (uint32_t *)new_srqinfo.qa_buf_aligned;
 662 
 663         /*
 664          * Allocate the memory for the new WRE list.  This will be used later
 665          * when we resize the wridlist based on the new SRQ size.
 666          */
 667         wre_new = kmem_zalloc((1 << log_srq_size) * sizeof (uint64_t),
 668             sleepflag);
 669         if (wre_new == NULL) {
 670                 status = IBT_INSUFF_RESOURCE;
 671                 goto srqmodify_fail;
 672         }
 673 
 674         /*
 675          * Fill in the "bind" struct.  This struct provides the majority
 676          * of the information that will be used to distinguish between an
 677          * "addr" binding (as is the case here) and a "buf" binding (see
 678          * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
 679          * which does most of the "heavy lifting" for the Hermon memory
 680          * registration routines.
 681          */
 682         bzero(&bind, sizeof (hermon_bind_info_t));
 683         bind.bi_type  = HERMON_BINDHDL_VADDR;
 684         bind.bi_addr  = (uint64_t)(uintptr_t)buf;
 685         bind.bi_len   = new_srqinfo.qa_size;
 686         bind.bi_as    = NULL;
 687         bind.bi_flags = sleepflag == HERMON_SLEEP ? IBT_MR_SLEEP :
 688             IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
 689         bind.bi_bypass = state->hs_cfg_profile->cp_iommu_bypass;
 690 
 691         status = hermon_mr_mtt_bind(state, &bind, new_srqinfo.qa_dmahdl, &mtt,
 692             &mtt_pgsize_bits, 0); /* no relaxed ordering */
 693         if (status != DDI_SUCCESS) {
 694                 status = status;
 695                 kmem_free(wre_new, (1 << log_srq_size) *
 696                     sizeof (uint64_t));
 697                 hermon_queue_free(&new_srqinfo);
 698                 goto srqmodify_fail;
 699         }
 700 
 701         /*
 702          * Calculate the offset between the kernel virtual address space
 703          * and the IB virtual address space.  This will be used when
 704          * posting work requests to properly initialize each WQE.
 705          *
 706          * Note: bind addr is zero-based (from alloc) so we calculate the
 707          * correct new offset here.
 708          */
 709         bind.bi_addr = bind.bi_addr & ((1 << mtt_pgsize_bits) - 1);
 710         srq_desc_off = (uint64_t)(uintptr_t)new_srqinfo.qa_buf_aligned -
 711             (uint64_t)bind.bi_addr;
 712         srq_pgoffs   = (uint_t)
 713             ((uintptr_t)new_srqinfo.qa_buf_aligned & HERMON_PAGEOFFSET);
 714 
 715         /*
 716          * Fill in the MPT entry.  This is the final step before passing
 717          * ownership of the MPT entry to the Hermon hardware.  We use all of
 718          * the information collected/calculated above to fill in the
 719          * requisite portions of the MPT.
 720          */
 721         bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
 722         mpt_entry.reg_win_len   = bind.bi_len;
 723         mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
 724         mpt_entry.mtt_addr_h = mtt_addr >> 32;
 725         mpt_entry.mtt_addr_l = mtt_addr >> 3;
 726 
 727         /*
 728          * for hermon we build up a new srqc and pass that (partially filled
 729          * to resize SRQ instead of modifying the (d)mpt directly
 730          */
 731 
 732 
 733 
 734         /*
 735          * Now we grab the SRQ lock.  Since we will be updating the actual
 736          * SRQ location and the producer/consumer indexes, we should hold
 737          * the lock.
 738          *
 739          * We do a HERMON_NOSLEEP here (and below), though, because we are
 740          * holding the "srq_lock" and if we got raised to interrupt level
 741          * by priority inversion, we would not want to block in this routine
 742          * waiting for success.
 743          */
 744         mutex_enter(&srq->srq_lock);
 745 
 746         /*
 747          * Copy old entries to new buffer
 748          */
 749         srq_old_bufsz = srq->srq_wq_bufsz;
 750         bcopy(srq->srq_wq_buf, buf, srq_old_bufsz * wqesz);
 751 
 752         /*
 753          * Setup MPT information for use in the MODIFY_MPT command
 754          */
 755         mr = srq->srq_mrhdl;
 756         mutex_enter(&mr->mr_lock);
 757 
 758         /*
 759          * now, setup the srqc information needed for resize - limit the
 760          * values, but use the same structure as the srqc
 761          */
 762 
 763         srqc_entry.log_srq_size   = log_srq_size;
 764         srqc_entry.page_offs      = srq_pgoffs >> 6;
 765         srqc_entry.log2_pgsz      = mr->mr_log2_pgsz;
 766         srqc_entry.mtt_base_addrl = (uint64_t)mtt_addr >> 32;
 767         srqc_entry.mtt_base_addrh = mtt_addr >> 3;
 768 
 769         /*
 770          * RESIZE_SRQ
 771          *
 772          * If this fails for any reason, then it is an indication that
 773          * something (either in HW or SW) has gone seriously wrong.  So we
 774          * print a warning message and return.
 775          */
 776         status = hermon_resize_srq_cmd_post(state, &srqc_entry,
 777             srq->srq_srqnum, sleepflag);
 778         if (status != HERMON_CMD_SUCCESS) {
 779                 cmn_err(CE_CONT, "Hermon: RESIZE_SRQ command failed: %08x\n",
 780                     status);
 781                 if (status == HERMON_CMD_INVALID_STATUS) {
 782                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 783                 }
 784                 (void) hermon_mr_mtt_unbind(state, &bind, mtt);
 785                 kmem_free(wre_new, (1 << log_srq_size) *
 786                     sizeof (uint64_t));
 787                 hermon_queue_free(&new_srqinfo);
 788                 mutex_exit(&mr->mr_lock);
 789                 mutex_exit(&srq->srq_lock);
 790                 return (ibc_get_ci_failure(0));
 791         }
 792         /*
 793          * Update the Hermon Shared Receive Queue handle with all the new
 794          * information.  At the same time, save away all the necessary
 795          * information for freeing up the old resources
 796          */
 797         old_srqinfo        = srq->srq_wqinfo;
 798         old_mtt            = srq->srq_mrhdl->mr_mttrsrcp;
 799         bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind,
 800             sizeof (hermon_bind_info_t));
 801 
 802         /* Now set the new info */
 803         srq->srq_wqinfo         = new_srqinfo;
 804         srq->srq_wq_buf         = buf;
 805         srq->srq_wq_bufsz  = (1 << log_srq_size);
 806         bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (hermon_bind_info_t));
 807         srq->srq_mrhdl->mr_mttrsrcp = mtt;
 808         srq->srq_desc_off  = srq_desc_off;
 809         srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
 810 
 811         /* Update MR mtt pagesize */
 812         mr->mr_logmttpgsz = mtt_pgsize_bits;
 813         mutex_exit(&mr->mr_lock);
 814 
 815         /*
 816          * Initialize new wridlist, if needed.
 817          *
 818          * If a wridlist already is setup on an SRQ (the QP associated with an
 819          * SRQ has moved "from_reset") then we must update this wridlist based
 820          * on the new SRQ size.  We allocate the new size of Work Request ID
 821          * Entries, copy over the old entries to the new list, and
 822          * re-initialize the srq wridlist in non-umap case
 823          */
 824         wre_old = srq->srq_wq_wqhdr->wq_wrid;
 825 
 826         bcopy(wre_old, wre_new, srq_old_bufsz * sizeof (uint64_t));
 827 
 828         /* Setup new sizes in wre */
 829         srq->srq_wq_wqhdr->wq_wrid = wre_new;
 830 
 831         /*
 832          * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out
 833          * to a user process, then we need to call devmap_devmem_remap() to
 834          * invalidate the mapping to the SRQ memory.  We also need to
 835          * invalidate the SRQ tracking information for the user mapping.
 836          *
 837          * Note: On failure, the remap really shouldn't ever happen.  So, if it
 838          * does, it is an indication that something has gone seriously wrong.
 839          * So we print a warning message and return error (knowing, of course,
 840          * that the "old" SRQ memory will be leaked)
 841          */
 842         if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) {
 843                 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 844                 status = devmap_devmem_remap(srq->srq_umap_dhp,
 845                     state->hs_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot,
 846                     DEVMAP_MAPPING_INVALID, NULL);
 847                 if (status != DDI_SUCCESS) {
 848                         mutex_exit(&srq->srq_lock);
 849                         HERMON_WARNING(state, "failed in SRQ memory "
 850                             "devmap_devmem_remap()");
 851                         /* We can, however, free the memory for old wre */
 852                         kmem_free(wre_old, srq_old_bufsz * sizeof (uint64_t));
 853                         return (ibc_get_ci_failure(0));
 854                 }
 855                 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
 856         }
 857 
 858         /*
 859          * Drop the SRQ lock now.  The only thing left to do is to free up
 860          * the old resources.
 861          */
 862         mutex_exit(&srq->srq_lock);
 863 
 864         /*
 865          * Unbind the MTT entries.
 866          */
 867         status = hermon_mr_mtt_unbind(state, &old_bind, old_mtt);
 868         if (status != DDI_SUCCESS) {
 869                 HERMON_WARNING(state, "failed to unbind old SRQ memory");
 870                 status = ibc_get_ci_failure(0);
 871                 goto srqmodify_fail;
 872         }
 873 
 874         /* Free the memory for old wre */
 875         kmem_free(wre_old, srq_old_bufsz * sizeof (uint64_t));
 876 
 877         /* Free the memory for the old SRQ */
 878         hermon_queue_free(&old_srqinfo);
 879 
 880         /*
 881          * Fill in the return arguments (if necessary).  This includes the
 882          * real new completion queue size.
 883          */
 884         if (real_size != NULL) {
 885                 *real_size = (1 << log_srq_size);
 886         }
 887 
 888         return (DDI_SUCCESS);
 889 
 890 srqmodify_fail:
 891         return (status);
 892 }
 893 
 894 
 895 /*
 896  * hermon_srq_refcnt_inc()
 897  *    Context: Can be called from interrupt or base context.
 898  */
 899 void
 900 hermon_srq_refcnt_inc(hermon_srqhdl_t srq)
 901 {
 902         mutex_enter(&srq->srq_lock);
 903         srq->srq_refcnt++;
 904         mutex_exit(&srq->srq_lock);
 905 }
 906 
 907 
 908 /*
 909  * hermon_srq_refcnt_dec()
 910  *    Context: Can be called from interrupt or base context.
 911  */
 912 void
 913 hermon_srq_refcnt_dec(hermon_srqhdl_t srq)
 914 {
 915         mutex_enter(&srq->srq_lock);
 916         srq->srq_refcnt--;
 917         mutex_exit(&srq->srq_lock);
 918 }
 919 
 920 
 921 /*
 922  * hermon_srqhdl_from_srqnum()
 923  *    Context: Can be called from interrupt or base context.
 924  *
 925  *    This routine is important because changing the unconstrained
 926  *    portion of the SRQ number is critical to the detection of a
 927  *    potential race condition in the SRQ handler code (i.e. the case
 928  *    where a SRQ is freed and alloc'd again before an event for the
 929  *    "old" SRQ can be handled).
 930  *
 931  *    While this is not a perfect solution (not sure that one exists)
 932  *    it does help to mitigate the chance that this race condition will
 933  *    cause us to deliver a "stale" event to the new SRQ owner.  Note:
 934  *    this solution does not scale well because the number of constrained
 935  *    bits increases (and, hence, the number of unconstrained bits
 936  *    decreases) as the number of supported SRQ grows.  For small and
 937  *    intermediate values, it should hopefully provide sufficient
 938  *    protection.
 939  */
 940 hermon_srqhdl_t
 941 hermon_srqhdl_from_srqnum(hermon_state_t *state, uint_t srqnum)
 942 {
 943         uint_t  srqindx, srqmask;
 944 
 945         /* Calculate the SRQ table index from the srqnum */
 946         srqmask = (1 << state->hs_cfg_profile->cp_log_num_srq) - 1;
 947         srqindx = srqnum & srqmask;
 948         return (hermon_icm_num_to_hdl(state, HERMON_SRQC, srqindx));
 949 }
 950 
 951 
 952 /*
 953  * hermon_srq_sgl_to_logwqesz()
 954  *    Context: Can be called from interrupt or base context.
 955  */
 956 static void
 957 hermon_srq_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
 958     hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl)
 959 {
 960         uint_t  max_size, log2, actual_sgl;
 961 
 962         switch (wq_type) {
 963         case HERMON_QP_WQ_TYPE_RECVQ:
 964                 /*
 965                  * Use requested maximum SGL to calculate max descriptor size
 966                  * (while guaranteeing that the descriptor size is a
 967                  * power-of-2 cachelines).
 968                  */
 969                 max_size = (HERMON_QP_WQE_MLX_SRQ_HDRS + (num_sgl << 4));
 970                 log2 = highbit(max_size);
 971                 if (ISP2(max_size)) {
 972                         log2 = log2 - 1;
 973                 }
 974 
 975                 /* Make sure descriptor is at least the minimum size */
 976                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
 977 
 978                 /* Calculate actual number of SGL (given WQE size) */
 979                 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_SRQ_HDRS) >> 4;
 980                 break;
 981 
 982         default:
 983                 HERMON_WARNING(state, "unexpected work queue type");
 984                 break;
 985         }
 986 
 987         /* Fill in the return values */
 988         *logwqesz = log2;
 989         *max_sgl  = min(state->hs_cfg_profile->cp_srq_max_sgl, actual_sgl);
 990 }