1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * hermon_qp.c
  28  *    Hermon Queue Pair Processing Routines
  29  *
  30  *    Implements all the routines necessary for allocating, freeing, and
  31  *    querying the Hermon queue pairs.
  32  */
  33 
  34 #include <sys/types.h>
  35 #include <sys/conf.h>
  36 #include <sys/ddi.h>
  37 #include <sys/sunddi.h>
  38 #include <sys/modctl.h>
  39 #include <sys/bitmap.h>
  40 #include <sys/sysmacros.h>
  41 
  42 #include <sys/ib/adapters/hermon/hermon.h>
  43 #include <sys/ib/ib_pkt_hdrs.h>
  44 
  45 static int hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp,
  46     hermon_rsrc_t *qpc);
  47 static int hermon_qpn_avl_compare(const void *q, const void *e);
  48 static int hermon_special_qp_rsrc_alloc(hermon_state_t *state,
  49     ibt_sqp_type_t type, uint_t port, hermon_rsrc_t **qp_rsrc);
  50 static int hermon_special_qp_rsrc_free(hermon_state_t *state,
  51     ibt_sqp_type_t type, uint_t port);
  52 static void hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
  53     uint_t real_max_sgl, hermon_qp_wq_type_t wq_type,
  54     uint_t *logwqesz, uint_t *max_sgl);
  55 
  56 /*
  57  * hermon_qp_alloc()
  58  *    Context: Can be called only from user or kernel context.
  59  */
  60 int
  61 hermon_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo,
  62     uint_t sleepflag)
  63 {
  64         hermon_rsrc_t                   *qpc, *rsrc;
  65         hermon_rsrc_type_t              rsrc_type;
  66         hermon_umap_db_entry_t          *umapdb;
  67         hermon_qphdl_t                  qp;
  68         ibt_qp_alloc_attr_t             *attr_p;
  69         ibt_qp_alloc_flags_t            alloc_flags;
  70         ibt_qp_type_t                   type;
  71         hermon_qp_wq_type_t             swq_type;
  72         ibtl_qp_hdl_t                   ibt_qphdl;
  73         ibt_chan_sizes_t                *queuesz_p;
  74         ib_qpn_t                        *qpn;
  75         hermon_qphdl_t                  *qphdl;
  76         ibt_mr_attr_t                   mr_attr;
  77         hermon_mr_options_t             mr_op;
  78         hermon_srqhdl_t                 srq;
  79         hermon_pdhdl_t                  pd;
  80         hermon_cqhdl_t                  sq_cq, rq_cq;
  81         hermon_mrhdl_t                  mr;
  82         uint64_t                        value, qp_desc_off;
  83         uint64_t                        *thewqe, thewqesz;
  84         uint32_t                        *sq_buf, *rq_buf;
  85         uint32_t                        log_qp_sq_size, log_qp_rq_size;
  86         uint32_t                        sq_size, rq_size;
  87         uint32_t                        sq_depth, rq_depth;
  88         uint32_t                        sq_wqe_size, rq_wqe_size, wqesz_shift;
  89         uint32_t                        max_sgl, max_recv_sgl, uarpg;
  90         uint_t                          qp_is_umap;
  91         uint_t                          qp_srq_en, i, j;
  92         int                             status, flag;
  93 
  94         /*
  95          * Extract the necessary info from the hermon_qp_info_t structure
  96          */
  97         attr_p    = qpinfo->qpi_attrp;
  98         type      = qpinfo->qpi_type;
  99         ibt_qphdl = qpinfo->qpi_ibt_qphdl;
 100         queuesz_p = qpinfo->qpi_queueszp;
 101         qpn       = qpinfo->qpi_qpn;
 102         qphdl     = &qpinfo->qpi_qphdl;
 103         alloc_flags = attr_p->qp_alloc_flags;
 104 
 105         /*
 106          * Verify correctness of alloc_flags.
 107          *
 108          * 1. FEXCH and RSS are only allocated via qp_range.
 109          */
 110         if (alloc_flags & (IBT_QP_USES_FEXCH | IBT_QP_USES_RSS)) {
 111                 return (IBT_INVALID_PARAM);
 112         }
 113         rsrc_type = HERMON_QPC;
 114         qp_is_umap = 0;
 115 
 116         /* 2. Make sure only one of these flags is set. */
 117         switch (alloc_flags &
 118             (IBT_QP_USER_MAP | IBT_QP_USES_RFCI | IBT_QP_USES_FCMD)) {
 119         case IBT_QP_USER_MAP:
 120                 qp_is_umap = 1;
 121                 break;
 122         case IBT_QP_USES_RFCI:
 123                 if (type != IBT_UD_RQP)
 124                         return (IBT_INVALID_PARAM);
 125 
 126                 switch (attr_p->qp_fc.fc_hca_port) {
 127                 case 1:
 128                         rsrc_type = HERMON_QPC_RFCI_PORT1;
 129                         break;
 130                 case 2:
 131                         rsrc_type = HERMON_QPC_RFCI_PORT2;
 132                         break;
 133                 default:
 134                         return (IBT_INVALID_PARAM);
 135                 }
 136                 break;
 137         case IBT_QP_USES_FCMD:
 138                 if (type != IBT_UD_RQP)
 139                         return (IBT_INVALID_PARAM);
 140                 break;
 141         case 0:
 142                 break;
 143         default:
 144                 return (IBT_INVALID_PARAM);     /* conflicting flags set */
 145         }
 146 
 147         /*
 148          * Determine whether QP is being allocated for userland access or
 149          * whether it is being allocated for kernel access.  If the QP is
 150          * being allocated for userland access, then lookup the UAR
 151          * page number for the current process.  Note:  If this is not found
 152          * (e.g. if the process has not previously open()'d the Hermon driver),
 153          * then an error is returned.
 154          */
 155         if (qp_is_umap) {
 156                 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
 157                     MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
 158                 if (status != DDI_SUCCESS) {
 159                         return (IBT_INVALID_PARAM);
 160                 }
 161                 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
 162         } else {
 163                 uarpg = state->hs_kernel_uar_index;
 164         }
 165 
 166         /*
 167          * Determine whether QP is being associated with an SRQ
 168          */
 169         qp_srq_en = (alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
 170         if (qp_srq_en) {
 171                 /*
 172                  * Check for valid SRQ handle pointers
 173                  */
 174                 if (attr_p->qp_ibc_srq_hdl == NULL) {
 175                         status = IBT_SRQ_HDL_INVALID;
 176                         goto qpalloc_fail;
 177                 }
 178                 srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl;
 179         }
 180 
 181         /*
 182          * Check for valid QP service type (only UD/RC/UC supported)
 183          */
 184         if (((type != IBT_UD_RQP) && (type != IBT_RC_RQP) &&
 185             (type != IBT_UC_RQP))) {
 186                 status = IBT_QP_SRV_TYPE_INVALID;
 187                 goto qpalloc_fail;
 188         }
 189 
 190 
 191         /*
 192          * Check for valid PD handle pointer
 193          */
 194         if (attr_p->qp_pd_hdl == NULL) {
 195                 status = IBT_PD_HDL_INVALID;
 196                 goto qpalloc_fail;
 197         }
 198         pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
 199 
 200         /*
 201          * If on an SRQ, check to make sure the PD is the same
 202          */
 203         if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
 204                 status = IBT_PD_HDL_INVALID;
 205                 goto qpalloc_fail;
 206         }
 207 
 208         /* Increment the reference count on the protection domain (PD) */
 209         hermon_pd_refcnt_inc(pd);
 210 
 211         /*
 212          * Check for valid CQ handle pointers
 213          *
 214          * FCMD QPs do not require a receive cq handle.
 215          */
 216         if (attr_p->qp_ibc_scq_hdl == NULL) {
 217                 status = IBT_CQ_HDL_INVALID;
 218                 goto qpalloc_fail1;
 219         }
 220         sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl;
 221         if ((attr_p->qp_ibc_rcq_hdl == NULL)) {
 222                 if ((alloc_flags & IBT_QP_USES_FCMD) == 0) {
 223                         status = IBT_CQ_HDL_INVALID;
 224                         goto qpalloc_fail1;
 225                 }
 226                 rq_cq = sq_cq;  /* just use the send cq */
 227         } else
 228                 rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
 229 
 230         /*
 231          * Increment the reference count on the CQs.  One or both of these
 232          * could return error if we determine that the given CQ is already
 233          * being used with a special (SMI/GSI) QP.
 234          */
 235         status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL);
 236         if (status != DDI_SUCCESS) {
 237                 status = IBT_CQ_HDL_INVALID;
 238                 goto qpalloc_fail1;
 239         }
 240         status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL);
 241         if (status != DDI_SUCCESS) {
 242                 status = IBT_CQ_HDL_INVALID;
 243                 goto qpalloc_fail2;
 244         }
 245 
 246         /*
 247          * Allocate an QP context entry.  This will be filled in with all
 248          * the necessary parameters to define the Queue Pair.  Unlike
 249          * other Hermon hardware resources, ownership is not immediately
 250          * given to hardware in the final step here.  Instead, we must
 251          * wait until the QP is later transitioned to the "Init" state before
 252          * passing the QP to hardware.  If we fail here, we must undo all
 253          * the reference count (CQ and PD).
 254          */
 255         status = hermon_rsrc_alloc(state, rsrc_type, 1, sleepflag, &qpc);
 256         if (status != DDI_SUCCESS) {
 257                 status = IBT_INSUFF_RESOURCE;
 258                 goto qpalloc_fail3;
 259         }
 260 
 261         /*
 262          * Allocate the software structure for tracking the queue pair
 263          * (i.e. the Hermon Queue Pair handle).  If we fail here, we must
 264          * undo the reference counts and the previous resource allocation.
 265          */
 266         status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
 267         if (status != DDI_SUCCESS) {
 268                 status = IBT_INSUFF_RESOURCE;
 269                 goto qpalloc_fail4;
 270         }
 271         qp = (hermon_qphdl_t)rsrc->hr_addr;
 272         bzero(qp, sizeof (struct hermon_sw_qp_s));
 273 
 274         qp->qp_alloc_flags = alloc_flags;
 275 
 276         /*
 277          * Calculate the QP number from QPC index.  This routine handles
 278          * all of the operations necessary to keep track of used, unused,
 279          * and released QP numbers.
 280          */
 281         if (type == IBT_UD_RQP) {
 282                 qp->qp_qpnum = qpc->hr_indx;
 283                 qp->qp_ring = qp->qp_qpnum << 8;
 284                 qp->qp_qpn_hdl = NULL;
 285         } else {
 286                 status = hermon_qp_create_qpn(state, qp, qpc);
 287                 if (status != DDI_SUCCESS) {
 288                         status = IBT_INSUFF_RESOURCE;
 289                         goto qpalloc_fail5;
 290                 }
 291         }
 292 
 293         /*
 294          * If this will be a user-mappable QP, then allocate an entry for
 295          * the "userland resources database".  This will later be added to
 296          * the database (after all further QP operations are successful).
 297          * If we fail here, we must undo the reference counts and the
 298          * previous resource allocation.
 299          */
 300         if (qp_is_umap) {
 301                 umapdb = hermon_umap_db_alloc(state->hs_instance, qp->qp_qpnum,
 302                     MLNX_UMAP_QPMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
 303                 if (umapdb == NULL) {
 304                         status = IBT_INSUFF_RESOURCE;
 305                         goto qpalloc_fail6;
 306                 }
 307         }
 308 
 309         /*
 310          * Allocate the doorbell record.  Hermon just needs one for the RQ,
 311          * if the QP is not associated with an SRQ, and use uarpg (above) as
 312          * the uar index
 313          */
 314 
 315         if (!qp_srq_en) {
 316                 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
 317                     &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
 318                 if (status != DDI_SUCCESS) {
 319                         status = IBT_INSUFF_RESOURCE;
 320                         goto qpalloc_fail6;
 321                 }
 322         }
 323 
 324         qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO);
 325 
 326         /*
 327          * We verify that the requested number of SGL is valid (i.e.
 328          * consistent with the device limits and/or software-configured
 329          * limits).  If not, then obviously the same cleanup needs to be done.
 330          */
 331         if (type == IBT_UD_RQP) {
 332                 max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz;
 333                 swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD;
 334         } else {
 335                 max_sgl = state->hs_ibtfinfo.hca_attr->hca_conn_send_sgl_sz;
 336                 swq_type = HERMON_QP_WQ_TYPE_SENDQ_CONN;
 337         }
 338         max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz;
 339         if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
 340             (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) {
 341                 status = IBT_HCA_SGL_EXCEEDED;
 342                 goto qpalloc_fail7;
 343         }
 344 
 345         /*
 346          * Determine this QP's WQE stride (for both the Send and Recv WQEs).
 347          * This will depend on the requested number of SGLs.  Note: this
 348          * has the side-effect of also calculating the real number of SGLs
 349          * (for the calculated WQE size).
 350          *
 351          * For QP's on an SRQ, we set these to 0.
 352          */
 353         if (qp_srq_en) {
 354                 qp->qp_rq_log_wqesz = 0;
 355                 qp->qp_rq_sgl = 0;
 356         } else {
 357                 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
 358                     max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ,
 359                     &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
 360         }
 361         hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
 362             max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
 363 
 364         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
 365 
 366         /* NOTE: currently policy in driver, later maybe IBTF interface */
 367         qp->qp_no_prefetch = 0;
 368 
 369         /*
 370          * for prefetching, we need to add the number of wqes in
 371          * the 2k area plus one to the number requested, but
 372          * ONLY for send queue.  If no_prefetch == 1 (prefetch off)
 373          * it's exactly TWO wqes for the headroom
 374          */
 375         if (qp->qp_no_prefetch)
 376                 qp->qp_sq_headroom = 2 * sq_wqe_size;
 377         else
 378                 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
 379         /*
 380          * hdrm wqes must be integral since both sq_wqe_size &
 381          * HERMON_QP_OH_SIZE are power of 2
 382          */
 383         qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
 384 
 385 
 386         /*
 387          * Calculate the appropriate size for the work queues.
 388          * For send queue, add in the headroom wqes to the calculation.
 389          * Note:  All Hermon QP work queues must be a power-of-2 in size.  Also
 390          * they may not be any smaller than HERMON_QP_MIN_SIZE.  This step is
 391          * to round the requested size up to the next highest power-of-2
 392          */
 393         /* first, adjust to a minimum and tell the caller the change */
 394         attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq,
 395             HERMON_QP_MIN_SIZE);
 396         attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq,
 397             HERMON_QP_MIN_SIZE);
 398         /*
 399          * now, calculate the alloc size, taking into account
 400          * the headroom for the sq
 401          */
 402         log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes);
 403         /* if the total is a power of two, reduce it */
 404         if (ISP2(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes))    {
 405                 log_qp_sq_size = log_qp_sq_size - 1;
 406         }
 407 
 408         log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
 409         if (ISP2(attr_p->qp_sizes.cs_rq)) {
 410                 log_qp_rq_size = log_qp_rq_size - 1;
 411         }
 412 
 413         /*
 414          * Next we verify that the rounded-up size is valid (i.e. consistent
 415          * with the device limits and/or software-configured limits).  If not,
 416          * then obviously we have a lot of cleanup to do before returning.
 417          *
 418          * NOTE: the first condition deals with the (test) case of cs_sq
 419          * being just less than 2^32.  In this case, the headroom addition
 420          * to the requested cs_sq will pass the test when it should not.
 421          * This test no longer lets that case slip through the check.
 422          */
 423         if ((attr_p->qp_sizes.cs_sq >
 424             (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) ||
 425             (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
 426             (!qp_srq_en && (log_qp_rq_size >
 427             state->hs_cfg_profile->cp_log_max_qp_sz))) {
 428                 status = IBT_HCA_WR_EXCEEDED;
 429                 goto qpalloc_fail7;
 430         }
 431 
 432         /*
 433          * Allocate the memory for QP work queues. Since Hermon work queues
 434          * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
 435          * the work queue memory is very important.  We used to allocate
 436          * work queues (the combined receive and send queues) so that they
 437          * would be aligned on their combined size.  That alignment guaranteed
 438          * that they would never cross the 4GB boundary (Hermon work queues
 439          * are on the order of MBs at maximum).  Now we are able to relax
 440          * this alignment constraint by ensuring that the IB address assigned
 441          * to the queue memory (as a result of the hermon_mr_register() call)
 442          * is offset from zero.
 443          * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
 444          * guarantee the alignment, but when attempting to use IOMMU bypass
 445          * mode we found that we were not allowed to specify any alignment
 446          * that was more restrictive than the system page size.
 447          * So we avoided this constraint by passing two alignment values,
 448          * one for the memory allocation itself and the other for the DMA
 449          * handle (for later bind).  This used to cause more memory than
 450          * necessary to be allocated (in order to guarantee the more
 451          * restrictive alignment contraint).  But by guaranteeing the
 452          * zero-based IB virtual address for the queue, we are able to
 453          * conserve this memory.
 454          */
 455         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
 456         sq_depth    = 1 << log_qp_sq_size;
 457         sq_size     = sq_depth * sq_wqe_size;
 458 
 459         /* QP on SRQ sets these to 0 */
 460         if (qp_srq_en) {
 461                 rq_wqe_size = 0;
 462                 rq_size     = 0;
 463         } else {
 464                 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
 465                 rq_depth    = 1 << log_qp_rq_size;
 466                 rq_size     = rq_depth * rq_wqe_size;
 467         }
 468 
 469         qp->qp_wqinfo.qa_size = sq_size + rq_size;
 470 
 471         qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
 472         qp->qp_wqinfo.qa_bind_align  = PAGESIZE;
 473 
 474         if (qp_is_umap) {
 475                 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
 476         } else {
 477                 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
 478         }
 479         status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
 480         if (status != DDI_SUCCESS) {
 481                 status = IBT_INSUFF_RESOURCE;
 482                 goto qpalloc_fail7;
 483         }
 484 
 485         /*
 486          * Sort WQs in memory according to stride (*q_wqe_size), largest first
 487          * If they are equal, still put the SQ first
 488          */
 489         qp->qp_sq_baseaddr = 0;
 490         qp->qp_rq_baseaddr = 0;
 491         if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
 492                 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
 493 
 494                 /* if this QP is on an SRQ, set the rq_buf to NULL */
 495                 if (qp_srq_en) {
 496                         rq_buf = NULL;
 497                 } else {
 498                         rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
 499                         qp->qp_rq_baseaddr = sq_size;
 500                 }
 501         } else {
 502                 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
 503                 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
 504                 qp->qp_sq_baseaddr = rq_size;
 505         }
 506 
 507         if (qp_is_umap == 0) {
 508                 qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
 509                 if (qp->qp_sq_wqhdr == NULL) {
 510                         status = IBT_INSUFF_RESOURCE;
 511                         goto qpalloc_fail8;
 512                 }
 513                 if (qp_srq_en) {
 514                         qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr;
 515                         qp->qp_rq_wqavl.wqa_srq_en = 1;
 516                         qp->qp_rq_wqavl.wqa_srq = srq;
 517                 } else {
 518                         qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth);
 519                         if (qp->qp_rq_wqhdr == NULL) {
 520                                 status = IBT_INSUFF_RESOURCE;
 521                                 goto qpalloc_fail8;
 522                         }
 523                         qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
 524                 }
 525                 qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
 526                 qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
 527                 qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
 528                 qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
 529                 qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
 530         }
 531 
 532         /*
 533          * Register the memory for the QP work queues.  The memory for the
 534          * QP must be registered in the Hermon cMPT tables.  This gives us the
 535          * LKey to specify in the QP context later.  Note: The memory for
 536          * Hermon work queues (both Send and Recv) must be contiguous and
 537          * registered as a single memory region.  Note: If the QP memory is
 538          * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to
 539          * meet the alignment restriction, we pass the "mro_bind_override_addr"
 540          * flag in the call to hermon_mr_register(). This guarantees that the
 541          * resulting IB vaddr will be zero-based (modulo the offset into the
 542          * first page). If we fail here, we still have the bunch of resource
 543          * and reference count cleanup to do.
 544          */
 545         flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
 546             IBT_MR_NOSLEEP;
 547         mr_attr.mr_vaddr    = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
 548         mr_attr.mr_len      = qp->qp_wqinfo.qa_size;
 549         mr_attr.mr_as       = NULL;
 550         mr_attr.mr_flags    = flag;
 551         if (qp_is_umap) {
 552                 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
 553         } else {
 554                 /* HERMON_QUEUE_LOCATION_NORMAL */
 555                 mr_op.mro_bind_type =
 556                     state->hs_cfg_profile->cp_iommu_bypass;
 557         }
 558         mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
 559         mr_op.mro_bind_override_addr = 1;
 560         status = hermon_mr_register(state, pd, &mr_attr, &mr,
 561             &mr_op, HERMON_QP_CMPT);
 562         if (status != DDI_SUCCESS) {
 563                 status = IBT_INSUFF_RESOURCE;
 564                 goto qpalloc_fail9;
 565         }
 566 
 567         /*
 568          * Calculate the offset between the kernel virtual address space
 569          * and the IB virtual address space.  This will be used when
 570          * posting work requests to properly initialize each WQE.
 571          */
 572         qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
 573             (uint64_t)mr->mr_bindinfo.bi_addr;
 574 
 575         /*
 576          * Fill in all the return arguments (if necessary).  This includes
 577          * real work queue sizes (in wqes), real SGLs, and QP number
 578          */
 579         if (queuesz_p != NULL) {
 580                 queuesz_p->cs_sq     =
 581                     (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
 582                 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
 583 
 584                 /* if this QP is on an SRQ, set these to 0 */
 585                 if (qp_srq_en) {
 586                         queuesz_p->cs_rq     = 0;
 587                         queuesz_p->cs_rq_sgl = 0;
 588                 } else {
 589                         queuesz_p->cs_rq     = (1 << log_qp_rq_size);
 590                         queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
 591                 }
 592         }
 593         if (qpn != NULL) {
 594                 *qpn = (ib_qpn_t)qp->qp_qpnum;
 595         }
 596 
 597         /*
 598          * Fill in the rest of the Hermon Queue Pair handle.
 599          */
 600         qp->qp_qpcrsrcp              = qpc;
 601         qp->qp_rsrcp         = rsrc;
 602         qp->qp_state         = HERMON_QP_RESET;
 603         HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
 604         qp->qp_pdhdl         = pd;
 605         qp->qp_mrhdl         = mr;
 606         qp->qp_sq_sigtype    = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
 607             HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
 608         qp->qp_is_special    = 0;
 609         qp->qp_uarpg         = uarpg;
 610         qp->qp_umap_dhp              = (devmap_cookie_t)NULL;
 611         qp->qp_sq_cqhdl              = sq_cq;
 612         qp->qp_sq_bufsz              = (1 << log_qp_sq_size);
 613         qp->qp_sq_logqsz     = log_qp_sq_size;
 614         qp->qp_sq_buf                = sq_buf;
 615         qp->qp_desc_off              = qp_desc_off;
 616         qp->qp_rq_cqhdl              = rq_cq;
 617         qp->qp_rq_buf                = rq_buf;
 618         qp->qp_rlky          = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) !=
 619             0;
 620 
 621         /* if this QP is on an SRQ, set rq_bufsz to 0 */
 622         if (qp_srq_en) {
 623                 qp->qp_rq_bufsz              = 0;
 624                 qp->qp_rq_logqsz     = 0;
 625         } else {
 626                 qp->qp_rq_bufsz              = (1 << log_qp_rq_size);
 627                 qp->qp_rq_logqsz     = log_qp_rq_size;
 628         }
 629 
 630         qp->qp_forward_sqd_event  = 0;
 631         qp->qp_sqd_still_draining = 0;
 632         qp->qp_hdlrarg               = (void *)ibt_qphdl;
 633         qp->qp_mcg_refcnt    = 0;
 634 
 635         /*
 636          * If this QP is to be associated with an SRQ, set the SRQ handle
 637          */
 638         if (qp_srq_en) {
 639                 qp->qp_srqhdl = srq;
 640                 hermon_srq_refcnt_inc(qp->qp_srqhdl);
 641         } else {
 642                 qp->qp_srqhdl = NULL;
 643         }
 644 
 645         /* Determine the QP service type */
 646         qp->qp_type = type;
 647         if (type == IBT_RC_RQP) {
 648                 qp->qp_serv_type = HERMON_QP_RC;
 649         } else if (type == IBT_UD_RQP) {
 650                 if (alloc_flags & IBT_QP_USES_RFCI)
 651                         qp->qp_serv_type = HERMON_QP_RFCI;
 652                 else if (alloc_flags & IBT_QP_USES_FCMD)
 653                         qp->qp_serv_type = HERMON_QP_FCMND;
 654                 else
 655                         qp->qp_serv_type = HERMON_QP_UD;
 656         } else {
 657                 qp->qp_serv_type = HERMON_QP_UC;
 658         }
 659 
 660         /*
 661          * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
 662          */
 663 
 664         /*
 665          * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
 666          * set the quadword to all F's - high-order bit is owner (init to one)
 667          * and the rest for the headroom definition of prefetching
 668          *
 669          */
 670         wqesz_shift = qp->qp_sq_log_wqesz;
 671         thewqesz    = 1 << wqesz_shift;
 672         thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
 673         if (qp_is_umap == 0) {
 674                 for (i = 0; i < sq_depth; i++) {
 675                         /*
 676                          * for each stride, go through and every 64 bytes
 677                          * write the init value - having set the address
 678                          * once, just keep incrementing it
 679                          */
 680                         for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
 681                                 *(uint32_t *)thewqe = 0xFFFFFFFF;
 682                         }
 683                 }
 684         }
 685 
 686         /* Zero out the QP context */
 687         bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
 688 
 689         /*
 690          * Put QP handle in Hermon QPNum-to-QPHdl list.  Then fill in the
 691          * "qphdl" and return success
 692          */
 693         hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx, qp);
 694 
 695         /*
 696          * If this is a user-mappable QP, then we need to insert the previously
 697          * allocated entry into the "userland resources database".  This will
 698          * allow for later lookup during devmap() (i.e. mmap()) calls.
 699          */
 700         if (qp_is_umap) {
 701                 hermon_umap_db_add(umapdb);
 702         }
 703         mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
 704             DDI_INTR_PRI(state->hs_intrmsi_pri));
 705 
 706         *qphdl = qp;
 707 
 708         return (DDI_SUCCESS);
 709 
 710 /*
 711  * The following is cleanup for all possible failure cases in this routine
 712  */
 713 qpalloc_fail9:
 714         hermon_queue_free(&qp->qp_wqinfo);
 715 qpalloc_fail8:
 716         if (qp->qp_sq_wqhdr)
 717                 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
 718         if (qp->qp_rq_wqhdr)
 719                 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
 720 qpalloc_fail7:
 721         if (qp_is_umap) {
 722                 hermon_umap_db_free(umapdb);
 723         }
 724         if (!qp_srq_en) {
 725                 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
 726         }
 727 
 728 qpalloc_fail6:
 729         /*
 730          * Releasing the QPN will also free up the QPC context.  Update
 731          * the QPC context pointer to indicate this.
 732          */
 733         if (qp->qp_qpn_hdl) {
 734                 hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
 735                     HERMON_QPN_RELEASE);
 736         } else {
 737                 hermon_rsrc_free(state, &qpc);
 738         }
 739         qpc = NULL;
 740 qpalloc_fail5:
 741         hermon_rsrc_free(state, &rsrc);
 742 qpalloc_fail4:
 743         if (qpc) {
 744                 hermon_rsrc_free(state, &qpc);
 745         }
 746 qpalloc_fail3:
 747         hermon_cq_refcnt_dec(rq_cq);
 748 qpalloc_fail2:
 749         hermon_cq_refcnt_dec(sq_cq);
 750 qpalloc_fail1:
 751         hermon_pd_refcnt_dec(pd);
 752 qpalloc_fail:
 753         return (status);
 754 }
 755 
 756 
 757 
 758 /*
 759  * hermon_special_qp_alloc()
 760  *    Context: Can be called only from user or kernel context.
 761  */
 762 int
 763 hermon_special_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo,
 764     uint_t sleepflag)
 765 {
 766         hermon_rsrc_t           *qpc, *rsrc;
 767         hermon_qphdl_t          qp;
 768         ibt_qp_alloc_attr_t     *attr_p;
 769         ibt_sqp_type_t          type;
 770         uint8_t                 port;
 771         ibtl_qp_hdl_t           ibt_qphdl;
 772         ibt_chan_sizes_t        *queuesz_p;
 773         hermon_qphdl_t          *qphdl;
 774         ibt_mr_attr_t           mr_attr;
 775         hermon_mr_options_t     mr_op;
 776         hermon_pdhdl_t          pd;
 777         hermon_cqhdl_t          sq_cq, rq_cq;
 778         hermon_mrhdl_t          mr;
 779         uint64_t                qp_desc_off;
 780         uint64_t                *thewqe, thewqesz;
 781         uint32_t                *sq_buf, *rq_buf;
 782         uint32_t                log_qp_sq_size, log_qp_rq_size;
 783         uint32_t                sq_size, rq_size, max_sgl;
 784         uint32_t                uarpg;
 785         uint32_t                sq_depth;
 786         uint32_t                sq_wqe_size, rq_wqe_size, wqesz_shift;
 787         int                     status, flag, i, j;
 788 
 789         /*
 790          * Extract the necessary info from the hermon_qp_info_t structure
 791          */
 792         attr_p    = qpinfo->qpi_attrp;
 793         type      = qpinfo->qpi_type;
 794         port      = qpinfo->qpi_port;
 795         ibt_qphdl = qpinfo->qpi_ibt_qphdl;
 796         queuesz_p = qpinfo->qpi_queueszp;
 797         qphdl     = &qpinfo->qpi_qphdl;
 798 
 799         /*
 800          * Check for valid special QP type (only SMI & GSI supported)
 801          */
 802         if ((type != IBT_SMI_SQP) && (type != IBT_GSI_SQP)) {
 803                 status = IBT_QP_SPECIAL_TYPE_INVALID;
 804                 goto spec_qpalloc_fail;
 805         }
 806 
 807         /*
 808          * Check for valid port number
 809          */
 810         if (!hermon_portnum_is_valid(state, port)) {
 811                 status = IBT_HCA_PORT_INVALID;
 812                 goto spec_qpalloc_fail;
 813         }
 814         port = port - 1;
 815 
 816         /*
 817          * Check for valid PD handle pointer
 818          */
 819         if (attr_p->qp_pd_hdl == NULL) {
 820                 status = IBT_PD_HDL_INVALID;
 821                 goto spec_qpalloc_fail;
 822         }
 823         pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
 824 
 825         /* Increment the reference count on the PD */
 826         hermon_pd_refcnt_inc(pd);
 827 
 828         /*
 829          * Check for valid CQ handle pointers
 830          */
 831         if ((attr_p->qp_ibc_scq_hdl == NULL) ||
 832             (attr_p->qp_ibc_rcq_hdl == NULL)) {
 833                 status = IBT_CQ_HDL_INVALID;
 834                 goto spec_qpalloc_fail1;
 835         }
 836         sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl;
 837         rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
 838 
 839         /*
 840          * Increment the reference count on the CQs.  One or both of these
 841          * could return error if we determine that the given CQ is already
 842          * being used with a non-special QP (i.e. a normal QP).
 843          */
 844         status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_SPECIAL);
 845         if (status != DDI_SUCCESS) {
 846                 status = IBT_CQ_HDL_INVALID;
 847                 goto spec_qpalloc_fail1;
 848         }
 849         status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_SPECIAL);
 850         if (status != DDI_SUCCESS) {
 851                 status = IBT_CQ_HDL_INVALID;
 852                 goto spec_qpalloc_fail2;
 853         }
 854 
 855         /*
 856          * Allocate the special QP resources.  Essentially, this allocation
 857          * amounts to checking if the request special QP has already been
 858          * allocated.  If successful, the QP context return is an actual
 859          * QP context that has been "aliased" to act as a special QP of the
 860          * appropriate type (and for the appropriate port).  Just as in
 861          * hermon_qp_alloc() above, ownership for this QP context is not
 862          * immediately given to hardware in the final step here.  Instead, we
 863          * wait until the QP is later transitioned to the "Init" state before
 864          * passing the QP to hardware.  If we fail here, we must undo all
 865          * the reference count (CQ and PD).
 866          */
 867         status = hermon_special_qp_rsrc_alloc(state, type, port, &qpc);
 868         if (status != DDI_SUCCESS) {
 869                 goto spec_qpalloc_fail3;
 870         }
 871 
 872         /*
 873          * Allocate the software structure for tracking the special queue
 874          * pair (i.e. the Hermon Queue Pair handle).  If we fail here, we
 875          * must undo the reference counts and the previous resource allocation.
 876          */
 877         status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
 878         if (status != DDI_SUCCESS) {
 879                 status = IBT_INSUFF_RESOURCE;
 880                 goto spec_qpalloc_fail4;
 881         }
 882         qp = (hermon_qphdl_t)rsrc->hr_addr;
 883 
 884         bzero(qp, sizeof (struct hermon_sw_qp_s));
 885 
 886         qp->qp_alloc_flags = attr_p->qp_alloc_flags;
 887 
 888         /*
 889          * Actual QP number is a combination of the index of the QPC and
 890          * the port number.  This is because the special QP contexts must
 891          * be allocated two-at-a-time.
 892          */
 893         qp->qp_qpnum = qpc->hr_indx + port;
 894         qp->qp_ring = qp->qp_qpnum << 8;
 895 
 896         uarpg = state->hs_kernel_uar_index; /* must be for spec qp */
 897         /*
 898          * Allocate the doorbell record.  Hermon uses only one for the RQ so
 899          * alloc a qp doorbell, using uarpg (above) as the uar index
 900          */
 901 
 902         status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
 903             &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
 904         if (status != DDI_SUCCESS) {
 905                 status = IBT_INSUFF_RESOURCE;
 906                 goto spec_qpalloc_fail5;
 907         }
 908         /*
 909          * Calculate the appropriate size for the work queues.
 910          * Note:  All Hermon QP work queues must be a power-of-2 in size.  Also
 911          * they may not be any smaller than HERMON_QP_MIN_SIZE.  This step is
 912          * to round the requested size up to the next highest power-of-2
 913          */
 914         attr_p->qp_sizes.cs_sq =
 915             max(attr_p->qp_sizes.cs_sq, HERMON_QP_MIN_SIZE);
 916         attr_p->qp_sizes.cs_rq =
 917             max(attr_p->qp_sizes.cs_rq, HERMON_QP_MIN_SIZE);
 918         log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq);
 919         if (ISP2(attr_p->qp_sizes.cs_sq)) {
 920                 log_qp_sq_size = log_qp_sq_size - 1;
 921         }
 922         log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
 923         if (ISP2(attr_p->qp_sizes.cs_rq)) {
 924                 log_qp_rq_size = log_qp_rq_size - 1;
 925         }
 926 
 927         /*
 928          * Next we verify that the rounded-up size is valid (i.e. consistent
 929          * with the device limits and/or software-configured limits).  If not,
 930          * then obviously we have a bit of cleanup to do before returning.
 931          */
 932         if ((log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
 933             (log_qp_rq_size > state->hs_cfg_profile->cp_log_max_qp_sz)) {
 934                 status = IBT_HCA_WR_EXCEEDED;
 935                 goto spec_qpalloc_fail5a;
 936         }
 937 
 938         /*
 939          * Next we verify that the requested number of SGL is valid (i.e.
 940          * consistent with the device limits and/or software-configured
 941          * limits).  If not, then obviously the same cleanup needs to be done.
 942          */
 943         max_sgl = state->hs_cfg_profile->cp_wqe_real_max_sgl;
 944         if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
 945             (attr_p->qp_sizes.cs_rq_sgl > max_sgl)) {
 946                 status = IBT_HCA_SGL_EXCEEDED;
 947                 goto spec_qpalloc_fail5a;
 948         }
 949 
 950         /*
 951          * Determine this QP's WQE stride (for both the Send and Recv WQEs).
 952          * This will depend on the requested number of SGLs.  Note: this
 953          * has the side-effect of also calculating the real number of SGLs
 954          * (for the calculated WQE size).
 955          */
 956         hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
 957             max_sgl, HERMON_QP_WQ_TYPE_RECVQ,
 958             &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
 959         if (type == IBT_SMI_SQP) {
 960                 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
 961                     max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP0,
 962                     &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
 963         } else {
 964                 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
 965                     max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP1,
 966                     &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
 967         }
 968 
 969         /*
 970          * Allocate the memory for QP work queues. Since Hermon work queues
 971          * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
 972          * the work queue memory is very important.  We used to allocate
 973          * work queues (the combined receive and send queues) so that they
 974          * would be aligned on their combined size.  That alignment guaranteed
 975          * that they would never cross the 4GB boundary (Hermon work queues
 976          * are on the order of MBs at maximum).  Now we are able to relax
 977          * this alignment constraint by ensuring that the IB address assigned
 978          * to the queue memory (as a result of the hermon_mr_register() call)
 979          * is offset from zero.
 980          * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
 981          * guarantee the alignment, but when attempting to use IOMMU bypass
 982          * mode we found that we were not allowed to specify any alignment
 983          * that was more restrictive than the system page size.
 984          * So we avoided this constraint by passing two alignment values,
 985          * one for the memory allocation itself and the other for the DMA
 986          * handle (for later bind).  This used to cause more memory than
 987          * necessary to be allocated (in order to guarantee the more
 988          * restrictive alignment contraint).  But by guaranteeing the
 989          * zero-based IB virtual address for the queue, we are able to
 990          * conserve this memory.
 991          */
 992         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
 993         sq_depth    = 1 << log_qp_sq_size;
 994         sq_size     = (1 << log_qp_sq_size) * sq_wqe_size;
 995 
 996         rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
 997         rq_size     = (1 << log_qp_rq_size) * rq_wqe_size;
 998 
 999         qp->qp_wqinfo.qa_size          = sq_size + rq_size;
1000 
1001         qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
1002         qp->qp_wqinfo.qa_bind_align  = PAGESIZE;
1003         qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
1004 
1005         status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
1006         if (status != NULL) {
1007                 status = IBT_INSUFF_RESOURCE;
1008                 goto spec_qpalloc_fail5a;
1009         }
1010 
1011         /*
1012          * Sort WQs in memory according to depth, stride (*q_wqe_size),
1013          * biggest first. If equal, the Send Queue still goes first
1014          */
1015         qp->qp_sq_baseaddr = 0;
1016         qp->qp_rq_baseaddr = 0;
1017         if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
1018                 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
1019                 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
1020                 qp->qp_rq_baseaddr = sq_size;
1021         } else {
1022                 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
1023                 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
1024                 qp->qp_sq_baseaddr = rq_size;
1025         }
1026 
1027         qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
1028         if (qp->qp_sq_wqhdr == NULL) {
1029                 status = IBT_INSUFF_RESOURCE;
1030                 goto spec_qpalloc_fail6;
1031         }
1032         qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(1 << log_qp_rq_size);
1033         if (qp->qp_rq_wqhdr == NULL) {
1034                 status = IBT_INSUFF_RESOURCE;
1035                 goto spec_qpalloc_fail6;
1036         }
1037         qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
1038         qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
1039         qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
1040         qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
1041         qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
1042         qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
1043 
1044         /*
1045          * Register the memory for the special QP work queues.  The memory for
1046          * the special QP must be registered in the Hermon cMPT tables.  This
1047          * gives us the LKey to specify in the QP context later.  Note: The
1048          * memory for Hermon work queues (both Send and Recv) must be contiguous
1049          * and registered as a single memory region. Also, in order to meet the
1050          * alignment restriction, we pass the "mro_bind_override_addr" flag in
1051          * the call to hermon_mr_register(). This guarantees that the resulting
1052          * IB vaddr will be zero-based (modulo the offset into the first page).
1053          * If we fail here, we have a bunch of resource and reference count
1054          * cleanup to do.
1055          */
1056         flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
1057             IBT_MR_NOSLEEP;
1058         mr_attr.mr_vaddr    = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1059         mr_attr.mr_len      = qp->qp_wqinfo.qa_size;
1060         mr_attr.mr_as       = NULL;
1061         mr_attr.mr_flags    = flag;
1062 
1063         mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
1064         mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
1065         mr_op.mro_bind_override_addr = 1;
1066 
1067         status = hermon_mr_register(state, pd, &mr_attr, &mr, &mr_op,
1068             HERMON_QP_CMPT);
1069         if (status != DDI_SUCCESS) {
1070                 status = IBT_INSUFF_RESOURCE;
1071                 goto spec_qpalloc_fail6;
1072         }
1073 
1074         /*
1075          * Calculate the offset between the kernel virtual address space
1076          * and the IB virtual address space.  This will be used when
1077          * posting work requests to properly initialize each WQE.
1078          */
1079         qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
1080             (uint64_t)mr->mr_bindinfo.bi_addr;
1081 
1082         /* set the prefetch - initially, not prefetching */
1083         qp->qp_no_prefetch = 1;
1084 
1085         if (qp->qp_no_prefetch)
1086                 qp->qp_sq_headroom = 2 * sq_wqe_size;
1087         else
1088                 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
1089         /*
1090          * hdrm wqes must be integral since both sq_wqe_size &
1091          * HERMON_QP_OH_SIZE are power of 2
1092          */
1093         qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
1094         /*
1095          * Fill in all the return arguments (if necessary).  This includes
1096          * real work queue sizes, real SGLs, and QP number (which will be
1097          * either zero or one, depending on the special QP type)
1098          */
1099         if (queuesz_p != NULL) {
1100                 queuesz_p->cs_sq     =
1101                     (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
1102                 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
1103                 queuesz_p->cs_rq     = (1 << log_qp_rq_size);
1104                 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
1105         }
1106 
1107         /*
1108          * Fill in the rest of the Hermon Queue Pair handle.  We can update
1109          * the following fields for use in further operations on the QP.
1110          */
1111         qp->qp_qpcrsrcp              = qpc;
1112         qp->qp_rsrcp         = rsrc;
1113         qp->qp_state         = HERMON_QP_RESET;
1114         HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1115         qp->qp_pdhdl         = pd;
1116         qp->qp_mrhdl         = mr;
1117         qp->qp_sq_sigtype    = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
1118             HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
1119         qp->qp_is_special    = (type == IBT_SMI_SQP) ?
1120             HERMON_QP_SMI : HERMON_QP_GSI;
1121         qp->qp_uarpg         = uarpg;
1122         qp->qp_umap_dhp              = (devmap_cookie_t)NULL;
1123         qp->qp_sq_cqhdl              = sq_cq;
1124         qp->qp_sq_bufsz              = (1 << log_qp_sq_size);
1125         qp->qp_sq_buf                = sq_buf;
1126         qp->qp_sq_logqsz     = log_qp_sq_size;
1127         qp->qp_desc_off              = qp_desc_off;
1128         qp->qp_rq_cqhdl              = rq_cq;
1129         qp->qp_rq_bufsz              = (1 << log_qp_rq_size);
1130         qp->qp_rq_buf                = rq_buf;
1131         qp->qp_rq_logqsz     = log_qp_rq_size;
1132         qp->qp_portnum               = port;
1133         qp->qp_pkeyindx              = 0;
1134         qp->qp_forward_sqd_event  = 0;
1135         qp->qp_sqd_still_draining = 0;
1136         qp->qp_hdlrarg               = (void *)ibt_qphdl;
1137         qp->qp_mcg_refcnt    = 0;
1138         qp->qp_srqhdl                = NULL;
1139 
1140         /* All special QPs are UD QP service type */
1141         qp->qp_type = IBT_UD_RQP;
1142         qp->qp_serv_type = HERMON_QP_UD;
1143 
1144         /*
1145          * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
1146          */
1147 
1148         /*
1149          * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
1150          * set the quadword to all F's - high-order bit is owner (init to one)
1151          * and the rest for the headroom definition of prefetching
1152          *
1153          */
1154 
1155         wqesz_shift = qp->qp_sq_log_wqesz;
1156         thewqesz    = 1 << wqesz_shift;
1157         thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
1158         for (i = 0; i < sq_depth; i++) {
1159                 /*
1160                  * for each stride, go through and every 64 bytes write the
1161                  * init value - having set the address once, just keep
1162                  * incrementing it
1163                  */
1164                 for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
1165                         *(uint32_t *)thewqe = 0xFFFFFFFF;
1166                 }
1167         }
1168 
1169 
1170         /* Zero out the QP context */
1171         bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
1172 
1173         /*
1174          * Put QP handle in Hermon QPNum-to-QPHdl list.  Then fill in the
1175          * "qphdl" and return success
1176          */
1177         hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + port, qp);
1178 
1179         mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
1180             DDI_INTR_PRI(state->hs_intrmsi_pri));
1181 
1182         *qphdl = qp;
1183 
1184         return (DDI_SUCCESS);
1185 
1186 /*
1187  * The following is cleanup for all possible failure cases in this routine
1188  */
1189 spec_qpalloc_fail6:
1190         hermon_queue_free(&qp->qp_wqinfo);
1191         if (qp->qp_sq_wqhdr)
1192                 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
1193         if (qp->qp_rq_wqhdr)
1194                 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
1195 spec_qpalloc_fail5a:
1196         hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
1197 spec_qpalloc_fail5:
1198         hermon_rsrc_free(state, &rsrc);
1199 spec_qpalloc_fail4:
1200         if (hermon_special_qp_rsrc_free(state, type, port) != DDI_SUCCESS) {
1201                 HERMON_WARNING(state, "failed to free special QP rsrc");
1202         }
1203 spec_qpalloc_fail3:
1204         hermon_cq_refcnt_dec(rq_cq);
1205 spec_qpalloc_fail2:
1206         hermon_cq_refcnt_dec(sq_cq);
1207 spec_qpalloc_fail1:
1208         hermon_pd_refcnt_dec(pd);
1209 spec_qpalloc_fail:
1210         return (status);
1211 }
1212 
1213 
1214 /*
1215  * hermon_qp_alloc_range()
1216  *    Context: Can be called only from user or kernel context.
1217  */
1218 int
1219 hermon_qp_alloc_range(hermon_state_t *state, uint_t log2,
1220     hermon_qp_info_t *qpinfo, ibtl_qp_hdl_t *ibt_qphdl,
1221     ibc_cq_hdl_t *send_cq, ibc_cq_hdl_t *recv_cq,
1222     hermon_qphdl_t *qphdl, uint_t sleepflag)
1223 {
1224         hermon_rsrc_t                   *qpc, *rsrc;
1225         hermon_rsrc_type_t              rsrc_type;
1226         hermon_qphdl_t                  qp;
1227         hermon_qp_range_t               *qp_range_p;
1228         ibt_qp_alloc_attr_t             *attr_p;
1229         ibt_qp_type_t                   type;
1230         hermon_qp_wq_type_t             swq_type;
1231         ibt_chan_sizes_t                *queuesz_p;
1232         ibt_mr_attr_t                   mr_attr;
1233         hermon_mr_options_t             mr_op;
1234         hermon_srqhdl_t                 srq;
1235         hermon_pdhdl_t                  pd;
1236         hermon_cqhdl_t                  sq_cq, rq_cq;
1237         hermon_mrhdl_t                  mr;
1238         uint64_t                        qp_desc_off;
1239         uint64_t                        *thewqe, thewqesz;
1240         uint32_t                        *sq_buf, *rq_buf;
1241         uint32_t                        log_qp_sq_size, log_qp_rq_size;
1242         uint32_t                        sq_size, rq_size;
1243         uint32_t                        sq_depth, rq_depth;
1244         uint32_t                        sq_wqe_size, rq_wqe_size, wqesz_shift;
1245         uint32_t                        max_sgl, max_recv_sgl, uarpg;
1246         uint_t                          qp_srq_en, i, j;
1247         int                             ii;     /* loop counter for range */
1248         int                             status, flag;
1249         uint_t                          serv_type;
1250 
1251         /*
1252          * Extract the necessary info from the hermon_qp_info_t structure
1253          */
1254         attr_p    = qpinfo->qpi_attrp;
1255         type      = qpinfo->qpi_type;
1256         queuesz_p = qpinfo->qpi_queueszp;
1257 
1258         if (attr_p->qp_alloc_flags & IBT_QP_USES_RSS) {
1259                 if (log2 > state->hs_ibtfinfo.hca_attr->hca_rss_max_log2_table)
1260                         return (IBT_INSUFF_RESOURCE);
1261                 rsrc_type = HERMON_QPC;
1262                 serv_type = HERMON_QP_UD;
1263         } else if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) {
1264                 if (log2 > state->hs_ibtfinfo.hca_attr->hca_fexch_max_log2_qp)
1265                         return (IBT_INSUFF_RESOURCE);
1266                 switch (attr_p->qp_fc.fc_hca_port) {
1267                 case 1:
1268                         rsrc_type = HERMON_QPC_FEXCH_PORT1;
1269                         break;
1270                 case 2:
1271                         rsrc_type = HERMON_QPC_FEXCH_PORT2;
1272                         break;
1273                 default:
1274                         return (IBT_INVALID_PARAM);
1275                 }
1276                 serv_type = HERMON_QP_FEXCH;
1277         } else
1278                 return (IBT_INVALID_PARAM);
1279 
1280         /*
1281          * Determine whether QP is being allocated for userland access or
1282          * whether it is being allocated for kernel access.  If the QP is
1283          * being allocated for userland access, fail (too complex for now).
1284          */
1285         if (attr_p->qp_alloc_flags & IBT_QP_USER_MAP) {
1286                 return (IBT_NOT_SUPPORTED);
1287         } else {
1288                 uarpg = state->hs_kernel_uar_index;
1289         }
1290 
1291         /*
1292          * Determine whether QP is being associated with an SRQ
1293          */
1294         qp_srq_en = (attr_p->qp_alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
1295         if (qp_srq_en) {
1296                 /*
1297                  * Check for valid SRQ handle pointers
1298                  */
1299                 if (attr_p->qp_ibc_srq_hdl == NULL) {
1300                         return (IBT_SRQ_HDL_INVALID);
1301                 }
1302                 srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl;
1303         }
1304 
1305         /*
1306          * Check for valid QP service type (only UD supported)
1307          */
1308         if (type != IBT_UD_RQP) {
1309                 return (IBT_QP_SRV_TYPE_INVALID);
1310         }
1311 
1312         /*
1313          * Check for valid PD handle pointer
1314          */
1315         if (attr_p->qp_pd_hdl == NULL) {
1316                 return (IBT_PD_HDL_INVALID);
1317         }
1318         pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
1319 
1320         /*
1321          * If on an SRQ, check to make sure the PD is the same
1322          */
1323         if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
1324                 return (IBT_PD_HDL_INVALID);
1325         }
1326 
1327         /* set loop variable here, for freeing resources on error */
1328         ii = 0;
1329 
1330         /*
1331          * Allocate 2^log2 contiguous/aligned QP context entries.  This will
1332          * be filled in with all the necessary parameters to define the
1333          * Queue Pairs.  Unlike other Hermon hardware resources, ownership
1334          * is not immediately given to hardware in the final step here.
1335          * Instead, we must wait until the QP is later transitioned to the
1336          * "Init" state before passing the QP to hardware.  If we fail here,
1337          * we must undo all the reference count (CQ and PD).
1338          */
1339         status = hermon_rsrc_alloc(state, rsrc_type, 1 << log2, sleepflag,
1340             &qpc);
1341         if (status != DDI_SUCCESS) {
1342                 return (IBT_INSUFF_RESOURCE);
1343         }
1344 
1345         if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH)
1346                 /*
1347                  * Need to init the MKEYs for the FEXCH QPs.
1348                  *
1349                  * For FEXCH QP subranges, we return the QPN base as
1350                  * "relative" to the full FEXCH QP range for the port.
1351                  */
1352                 *(qpinfo->qpi_qpn) = hermon_fcoib_fexch_relative_qpn(state,
1353                     attr_p->qp_fc.fc_hca_port, qpc->hr_indx);
1354         else
1355                 *(qpinfo->qpi_qpn) = (ib_qpn_t)qpc->hr_indx;
1356 
1357         qp_range_p = kmem_alloc(sizeof (*qp_range_p),
1358             (sleepflag == HERMON_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1359         if (qp_range_p == NULL) {
1360                 status = IBT_INSUFF_RESOURCE;
1361                 goto qpalloc_fail0;
1362         }
1363         mutex_init(&qp_range_p->hqpr_lock, NULL, MUTEX_DRIVER,
1364             DDI_INTR_PRI(state->hs_intrmsi_pri));
1365         mutex_enter(&qp_range_p->hqpr_lock);
1366         qp_range_p->hqpr_refcnt = 1 << log2;
1367         qp_range_p->hqpr_qpcrsrc = qpc;
1368         mutex_exit(&qp_range_p->hqpr_lock);
1369 
1370 for_each_qp:
1371 
1372         /* Increment the reference count on the protection domain (PD) */
1373         hermon_pd_refcnt_inc(pd);
1374 
1375         rq_cq = (hermon_cqhdl_t)recv_cq[ii];
1376         sq_cq = (hermon_cqhdl_t)send_cq[ii];
1377         if (sq_cq == NULL) {
1378                 if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) {
1379                         /* if no send completions, just use rq_cq */
1380                         sq_cq = rq_cq;
1381                 } else {
1382                         status = IBT_CQ_HDL_INVALID;
1383                         goto qpalloc_fail1;
1384                 }
1385         }
1386 
1387         /*
1388          * Increment the reference count on the CQs.  One or both of these
1389          * could return error if we determine that the given CQ is already
1390          * being used with a special (SMI/GSI) QP.
1391          */
1392         status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL);
1393         if (status != DDI_SUCCESS) {
1394                 status = IBT_CQ_HDL_INVALID;
1395                 goto qpalloc_fail1;
1396         }
1397         status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL);
1398         if (status != DDI_SUCCESS) {
1399                 status = IBT_CQ_HDL_INVALID;
1400                 goto qpalloc_fail2;
1401         }
1402 
1403         /*
1404          * Allocate the software structure for tracking the queue pair
1405          * (i.e. the Hermon Queue Pair handle).  If we fail here, we must
1406          * undo the reference counts and the previous resource allocation.
1407          */
1408         status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
1409         if (status != DDI_SUCCESS) {
1410                 status = IBT_INSUFF_RESOURCE;
1411                 goto qpalloc_fail4;
1412         }
1413         qp = (hermon_qphdl_t)rsrc->hr_addr;
1414         bzero(qp, sizeof (struct hermon_sw_qp_s));
1415         qp->qp_alloc_flags = attr_p->qp_alloc_flags;
1416 
1417         /*
1418          * Calculate the QP number from QPC index.  This routine handles
1419          * all of the operations necessary to keep track of used, unused,
1420          * and released QP numbers.
1421          */
1422         qp->qp_qpnum = qpc->hr_indx + ii;
1423         qp->qp_ring = qp->qp_qpnum << 8;
1424         qp->qp_qpn_hdl = NULL;
1425 
1426         /*
1427          * Allocate the doorbell record.  Hermon just needs one for the RQ,
1428          * if the QP is not associated with an SRQ, and use uarpg (above) as
1429          * the uar index
1430          */
1431 
1432         if (!qp_srq_en) {
1433                 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
1434                     &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
1435                 if (status != DDI_SUCCESS) {
1436                         status = IBT_INSUFF_RESOURCE;
1437                         goto qpalloc_fail6;
1438                 }
1439         }
1440 
1441         qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO);
1442 
1443         /*
1444          * We verify that the requested number of SGL is valid (i.e.
1445          * consistent with the device limits and/or software-configured
1446          * limits).  If not, then obviously the same cleanup needs to be done.
1447          */
1448         max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz;
1449         swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD;
1450         max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz;
1451         if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
1452             (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) {
1453                 status = IBT_HCA_SGL_EXCEEDED;
1454                 goto qpalloc_fail7;
1455         }
1456 
1457         /*
1458          * Determine this QP's WQE stride (for both the Send and Recv WQEs).
1459          * This will depend on the requested number of SGLs.  Note: this
1460          * has the side-effect of also calculating the real number of SGLs
1461          * (for the calculated WQE size).
1462          *
1463          * For QP's on an SRQ, we set these to 0.
1464          */
1465         if (qp_srq_en) {
1466                 qp->qp_rq_log_wqesz = 0;
1467                 qp->qp_rq_sgl = 0;
1468         } else {
1469                 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
1470                     max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ,
1471                     &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
1472         }
1473         hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
1474             max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
1475 
1476         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
1477 
1478         /* NOTE: currently policy in driver, later maybe IBTF interface */
1479         qp->qp_no_prefetch = 0;
1480 
1481         /*
1482          * for prefetching, we need to add the number of wqes in
1483          * the 2k area plus one to the number requested, but
1484          * ONLY for send queue.  If no_prefetch == 1 (prefetch off)
1485          * it's exactly TWO wqes for the headroom
1486          */
1487         if (qp->qp_no_prefetch)
1488                 qp->qp_sq_headroom = 2 * sq_wqe_size;
1489         else
1490                 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
1491         /*
1492          * hdrm wqes must be integral since both sq_wqe_size &
1493          * HERMON_QP_OH_SIZE are power of 2
1494          */
1495         qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
1496 
1497 
1498         /*
1499          * Calculate the appropriate size for the work queues.
1500          * For send queue, add in the headroom wqes to the calculation.
1501          * Note:  All Hermon QP work queues must be a power-of-2 in size.  Also
1502          * they may not be any smaller than HERMON_QP_MIN_SIZE.  This step is
1503          * to round the requested size up to the next highest power-of-2
1504          */
1505         /* first, adjust to a minimum and tell the caller the change */
1506         attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq,
1507             HERMON_QP_MIN_SIZE);
1508         attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq,
1509             HERMON_QP_MIN_SIZE);
1510         /*
1511          * now, calculate the alloc size, taking into account
1512          * the headroom for the sq
1513          */
1514         log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes);
1515         /* if the total is a power of two, reduce it */
1516         if (ISP2(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes))    {
1517                 log_qp_sq_size = log_qp_sq_size - 1;
1518         }
1519 
1520         log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
1521         if (ISP2(attr_p->qp_sizes.cs_rq)) {
1522                 log_qp_rq_size = log_qp_rq_size - 1;
1523         }
1524 
1525         /*
1526          * Next we verify that the rounded-up size is valid (i.e. consistent
1527          * with the device limits and/or software-configured limits).  If not,
1528          * then obviously we have a lot of cleanup to do before returning.
1529          *
1530          * NOTE: the first condition deals with the (test) case of cs_sq
1531          * being just less than 2^32.  In this case, the headroom addition
1532          * to the requested cs_sq will pass the test when it should not.
1533          * This test no longer lets that case slip through the check.
1534          */
1535         if ((attr_p->qp_sizes.cs_sq >
1536             (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) ||
1537             (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
1538             (!qp_srq_en && (log_qp_rq_size >
1539             state->hs_cfg_profile->cp_log_max_qp_sz))) {
1540                 status = IBT_HCA_WR_EXCEEDED;
1541                 goto qpalloc_fail7;
1542         }
1543 
1544         /*
1545          * Allocate the memory for QP work queues. Since Hermon work queues
1546          * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
1547          * the work queue memory is very important.  We used to allocate
1548          * work queues (the combined receive and send queues) so that they
1549          * would be aligned on their combined size.  That alignment guaranteed
1550          * that they would never cross the 4GB boundary (Hermon work queues
1551          * are on the order of MBs at maximum).  Now we are able to relax
1552          * this alignment constraint by ensuring that the IB address assigned
1553          * to the queue memory (as a result of the hermon_mr_register() call)
1554          * is offset from zero.
1555          * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
1556          * guarantee the alignment, but when attempting to use IOMMU bypass
1557          * mode we found that we were not allowed to specify any alignment
1558          * that was more restrictive than the system page size.
1559          * So we avoided this constraint by passing two alignment values,
1560          * one for the memory allocation itself and the other for the DMA
1561          * handle (for later bind).  This used to cause more memory than
1562          * necessary to be allocated (in order to guarantee the more
1563          * restrictive alignment contraint).  But by guaranteeing the
1564          * zero-based IB virtual address for the queue, we are able to
1565          * conserve this memory.
1566          */
1567         sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
1568         sq_depth    = 1 << log_qp_sq_size;
1569         sq_size     = sq_depth * sq_wqe_size;
1570 
1571         /* QP on SRQ sets these to 0 */
1572         if (qp_srq_en) {
1573                 rq_wqe_size = 0;
1574                 rq_size     = 0;
1575         } else {
1576                 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
1577                 rq_depth    = 1 << log_qp_rq_size;
1578                 rq_size     = rq_depth * rq_wqe_size;
1579         }
1580 
1581         qp->qp_wqinfo.qa_size = sq_size + rq_size;
1582         qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
1583         qp->qp_wqinfo.qa_bind_align  = PAGESIZE;
1584         qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
1585         status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
1586         if (status != DDI_SUCCESS) {
1587                 status = IBT_INSUFF_RESOURCE;
1588                 goto qpalloc_fail7;
1589         }
1590 
1591         /*
1592          * Sort WQs in memory according to stride (*q_wqe_size), largest first
1593          * If they are equal, still put the SQ first
1594          */
1595         qp->qp_sq_baseaddr = 0;
1596         qp->qp_rq_baseaddr = 0;
1597         if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
1598                 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
1599 
1600                 /* if this QP is on an SRQ, set the rq_buf to NULL */
1601                 if (qp_srq_en) {
1602                         rq_buf = NULL;
1603                 } else {
1604                         rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
1605                         qp->qp_rq_baseaddr = sq_size;
1606                 }
1607         } else {
1608                 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
1609                 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
1610                 qp->qp_sq_baseaddr = rq_size;
1611         }
1612 
1613         qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
1614         if (qp->qp_sq_wqhdr == NULL) {
1615                 status = IBT_INSUFF_RESOURCE;
1616                 goto qpalloc_fail8;
1617         }
1618         if (qp_srq_en) {
1619                 qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr;
1620                 qp->qp_rq_wqavl.wqa_srq_en = 1;
1621                 qp->qp_rq_wqavl.wqa_srq = srq;
1622         } else {
1623                 qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth);
1624                 if (qp->qp_rq_wqhdr == NULL) {
1625                         status = IBT_INSUFF_RESOURCE;
1626                         goto qpalloc_fail8;
1627                 }
1628                 qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
1629         }
1630         qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
1631         qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
1632         qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
1633         qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
1634         qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
1635 
1636         /*
1637          * Register the memory for the QP work queues.  The memory for the
1638          * QP must be registered in the Hermon cMPT tables.  This gives us the
1639          * LKey to specify in the QP context later.  Note: The memory for
1640          * Hermon work queues (both Send and Recv) must be contiguous and
1641          * registered as a single memory region.  Note: If the QP memory is
1642          * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to
1643          * meet the alignment restriction, we pass the "mro_bind_override_addr"
1644          * flag in the call to hermon_mr_register(). This guarantees that the
1645          * resulting IB vaddr will be zero-based (modulo the offset into the
1646          * first page). If we fail here, we still have the bunch of resource
1647          * and reference count cleanup to do.
1648          */
1649         flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
1650             IBT_MR_NOSLEEP;
1651         mr_attr.mr_vaddr    = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1652         mr_attr.mr_len      = qp->qp_wqinfo.qa_size;
1653         mr_attr.mr_as       = NULL;
1654         mr_attr.mr_flags    = flag;
1655         /* HERMON_QUEUE_LOCATION_NORMAL */
1656         mr_op.mro_bind_type =
1657             state->hs_cfg_profile->cp_iommu_bypass;
1658         mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
1659         mr_op.mro_bind_override_addr = 1;
1660         status = hermon_mr_register(state, pd, &mr_attr, &mr,
1661             &mr_op, HERMON_QP_CMPT);
1662         if (status != DDI_SUCCESS) {
1663                 status = IBT_INSUFF_RESOURCE;
1664                 goto qpalloc_fail9;
1665         }
1666 
1667         /*
1668          * Calculate the offset between the kernel virtual address space
1669          * and the IB virtual address space.  This will be used when
1670          * posting work requests to properly initialize each WQE.
1671          */
1672         qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
1673             (uint64_t)mr->mr_bindinfo.bi_addr;
1674 
1675         /*
1676          * Fill in all the return arguments (if necessary).  This includes
1677          * real work queue sizes (in wqes), real SGLs, and QP number
1678          */
1679         if (queuesz_p != NULL) {
1680                 queuesz_p->cs_sq     =
1681                     (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
1682                 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
1683 
1684                 /* if this QP is on an SRQ, set these to 0 */
1685                 if (qp_srq_en) {
1686                         queuesz_p->cs_rq     = 0;
1687                         queuesz_p->cs_rq_sgl = 0;
1688                 } else {
1689                         queuesz_p->cs_rq     = (1 << log_qp_rq_size);
1690                         queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
1691                 }
1692         }
1693 
1694         /*
1695          * Fill in the rest of the Hermon Queue Pair handle.
1696          */
1697         qp->qp_qpcrsrcp              = NULL;
1698         qp->qp_rsrcp         = rsrc;
1699         qp->qp_state         = HERMON_QP_RESET;
1700         HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1701         qp->qp_pdhdl         = pd;
1702         qp->qp_mrhdl         = mr;
1703         qp->qp_sq_sigtype    = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
1704             HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
1705         qp->qp_is_special    = 0;
1706         qp->qp_uarpg         = uarpg;
1707         qp->qp_umap_dhp              = (devmap_cookie_t)NULL;
1708         qp->qp_sq_cqhdl              = sq_cq;
1709         qp->qp_sq_bufsz              = (1 << log_qp_sq_size);
1710         qp->qp_sq_logqsz     = log_qp_sq_size;
1711         qp->qp_sq_buf                = sq_buf;
1712         qp->qp_desc_off              = qp_desc_off;
1713         qp->qp_rq_cqhdl              = rq_cq;
1714         qp->qp_rq_buf                = rq_buf;
1715         qp->qp_rlky          = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) !=
1716             0;
1717 
1718         /* if this QP is on an SRQ, set rq_bufsz to 0 */
1719         if (qp_srq_en) {
1720                 qp->qp_rq_bufsz              = 0;
1721                 qp->qp_rq_logqsz     = 0;
1722         } else {
1723                 qp->qp_rq_bufsz              = (1 << log_qp_rq_size);
1724                 qp->qp_rq_logqsz     = log_qp_rq_size;
1725         }
1726 
1727         qp->qp_forward_sqd_event  = 0;
1728         qp->qp_sqd_still_draining = 0;
1729         qp->qp_hdlrarg               = (void *)ibt_qphdl[ii];
1730         qp->qp_mcg_refcnt    = 0;
1731 
1732         /*
1733          * If this QP is to be associated with an SRQ, set the SRQ handle
1734          */
1735         if (qp_srq_en) {
1736                 qp->qp_srqhdl = srq;
1737                 hermon_srq_refcnt_inc(qp->qp_srqhdl);
1738         } else {
1739                 qp->qp_srqhdl = NULL;
1740         }
1741 
1742         qp->qp_type = IBT_UD_RQP;
1743         qp->qp_serv_type = serv_type;
1744 
1745         /*
1746          * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
1747          */
1748 
1749         /*
1750          * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
1751          * set the quadword to all F's - high-order bit is owner (init to one)
1752          * and the rest for the headroom definition of prefetching.
1753          */
1754         if ((attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) == 0) {
1755                 wqesz_shift = qp->qp_sq_log_wqesz;
1756                 thewqesz    = 1 << wqesz_shift;
1757                 thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
1758                 for (i = 0; i < sq_depth; i++) {
1759                         /*
1760                          * for each stride, go through and every 64 bytes
1761                          * write the init value - having set the address
1762                          * once, just keep incrementing it
1763                          */
1764                         for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
1765                                 *(uint32_t *)thewqe = 0xFFFFFFFF;
1766                         }
1767                 }
1768         }
1769 
1770         /* Zero out the QP context */
1771         bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
1772 
1773         /*
1774          * Put QP handle in Hermon QPNum-to-QPHdl list.  Then fill in the
1775          * "qphdl" and return success
1776          */
1777         hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + ii, qp);
1778 
1779         mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
1780             DDI_INTR_PRI(state->hs_intrmsi_pri));
1781 
1782         qp->qp_rangep = qp_range_p;
1783 
1784         qphdl[ii] = qp;
1785 
1786         if (++ii < (1 << log2))
1787                 goto for_each_qp;
1788 
1789         return (DDI_SUCCESS);
1790 
1791 /*
1792  * The following is cleanup for all possible failure cases in this routine
1793  */
1794 qpalloc_fail9:
1795         hermon_queue_free(&qp->qp_wqinfo);
1796 qpalloc_fail8:
1797         if (qp->qp_sq_wqhdr)
1798                 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
1799         if (qp->qp_rq_wqhdr)
1800                 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
1801 qpalloc_fail7:
1802         if (!qp_srq_en) {
1803                 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
1804         }
1805 
1806 qpalloc_fail6:
1807         hermon_rsrc_free(state, &rsrc);
1808 qpalloc_fail4:
1809         hermon_cq_refcnt_dec(rq_cq);
1810 qpalloc_fail2:
1811         hermon_cq_refcnt_dec(sq_cq);
1812 qpalloc_fail1:
1813         hermon_pd_refcnt_dec(pd);
1814 qpalloc_fail0:
1815         if (ii == 0) {
1816                 if (qp_range_p)
1817                         kmem_free(qp_range_p, sizeof (*qp_range_p));
1818                 hermon_rsrc_free(state, &qpc);
1819         } else {
1820                 /* qp_range_p and qpc rsrc will be freed in hermon_qp_free */
1821 
1822                 mutex_enter(&qp->qp_rangep->hqpr_lock);
1823                 qp_range_p->hqpr_refcnt = ii;
1824                 mutex_exit(&qp->qp_rangep->hqpr_lock);
1825                 while (--ii >= 0) {
1826                         ibc_qpn_hdl_t qpn_hdl;
1827                         int free_status;
1828 
1829                         free_status = hermon_qp_free(state, &qphdl[ii],
1830                             IBC_FREE_QP_AND_QPN, &qpn_hdl, sleepflag);
1831                         if (free_status != DDI_SUCCESS)
1832                                 cmn_err(CE_CONT, "!qp_range: status 0x%x: "
1833                                     "error status %x during free",
1834                                     status, free_status);
1835                 }
1836         }
1837 
1838         return (status);
1839 }
1840 
1841 
1842 /*
1843  * hermon_qp_free()
1844  *    This function frees up the QP resources.  Depending on the value
1845  *    of the "free_qp_flags", the QP number may not be released until
1846  *    a subsequent call to hermon_qp_release_qpn().
1847  *
1848  *    Context: Can be called only from user or kernel context.
1849  */
1850 /* ARGSUSED */
1851 int
1852 hermon_qp_free(hermon_state_t *state, hermon_qphdl_t *qphdl,
1853     ibc_free_qp_flags_t free_qp_flags, ibc_qpn_hdl_t *qpnh,
1854     uint_t sleepflag)
1855 {
1856         hermon_rsrc_t           *qpc, *rsrc;
1857         hermon_umap_db_entry_t  *umapdb;
1858         hermon_qpn_entry_t      *entry;
1859         hermon_pdhdl_t          pd;
1860         hermon_mrhdl_t          mr;
1861         hermon_cqhdl_t          sq_cq, rq_cq;
1862         hermon_srqhdl_t         srq;
1863         hermon_qphdl_t          qp;
1864         uint64_t                value;
1865         uint_t                  type, port;
1866         uint_t                  maxprot;
1867         uint_t                  qp_srq_en;
1868         int                     status;
1869 
1870         /*
1871          * Pull all the necessary information from the Hermon Queue Pair
1872          * handle.  This is necessary here because the resource for the
1873          * QP handle is going to be freed up as part of this operation.
1874          */
1875         qp      = *qphdl;
1876         mutex_enter(&qp->qp_lock);
1877         qpc     = qp->qp_qpcrsrcp;   /* NULL if part of a "range" */
1878         rsrc    = qp->qp_rsrcp;
1879         pd      = qp->qp_pdhdl;
1880         srq     = qp->qp_srqhdl;
1881         mr      = qp->qp_mrhdl;
1882         rq_cq   = qp->qp_rq_cqhdl;
1883         sq_cq   = qp->qp_sq_cqhdl;
1884         port    = qp->qp_portnum;
1885         qp_srq_en = qp->qp_alloc_flags & IBT_QP_USES_SRQ;
1886 
1887         /*
1888          * If the QP is part of an MCG, then we fail the qp_free
1889          */
1890         if (qp->qp_mcg_refcnt != 0) {
1891                 mutex_exit(&qp->qp_lock);
1892                 status = ibc_get_ci_failure(0);
1893                 goto qpfree_fail;
1894         }
1895 
1896         /*
1897          * If the QP is not already in "Reset" state, then transition to
1898          * "Reset".  This is necessary because software does not reclaim
1899          * ownership of the QP context until the QP is in the "Reset" state.
1900          * If the ownership transfer fails for any reason, then it is an
1901          * indication that something (either in HW or SW) has gone seriously
1902          * wrong.  So we print a warning message and return.
1903          */
1904         if (qp->qp_state != HERMON_QP_RESET) {
1905                 if (hermon_qp_to_reset(state, qp) != DDI_SUCCESS) {
1906                         mutex_exit(&qp->qp_lock);
1907                         HERMON_WARNING(state, "failed to reset QP context");
1908                         status = ibc_get_ci_failure(0);
1909                         goto qpfree_fail;
1910                 }
1911                 qp->qp_state = HERMON_QP_RESET;
1912                 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1913 
1914                 /*
1915                  * Do any additional handling necessary for the transition
1916                  * to the "Reset" state (e.g. update the WRID lists)
1917                  */
1918                 if (hermon_wrid_to_reset_handling(state, qp) != DDI_SUCCESS) {
1919                         mutex_exit(&qp->qp_lock);
1920                         HERMON_WARNING(state, "failed to reset QP WRID list");
1921                         status = ibc_get_ci_failure(0);
1922                         goto qpfree_fail;
1923                 }
1924         }
1925 
1926         /*
1927          * If this was a user-mappable QP, then we need to remove its entry
1928          * from the "userland resources database".  If it is also currently
1929          * mmap()'d out to a user process, then we need to call
1930          * devmap_devmem_remap() to remap the QP memory to an invalid mapping.
1931          * We also need to invalidate the QP tracking information for the
1932          * user mapping.
1933          */
1934         if (qp->qp_alloc_flags & IBT_QP_USER_MAP) {
1935                 status = hermon_umap_db_find(state->hs_instance, qp->qp_qpnum,
1936                     MLNX_UMAP_QPMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
1937                     &umapdb);
1938                 if (status != DDI_SUCCESS) {
1939                         mutex_exit(&qp->qp_lock);
1940                         HERMON_WARNING(state, "failed to find in database");
1941                         return (ibc_get_ci_failure(0));
1942                 }
1943                 hermon_umap_db_free(umapdb);
1944                 if (qp->qp_umap_dhp != NULL) {
1945                         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
1946                         status = devmap_devmem_remap(qp->qp_umap_dhp,
1947                             state->hs_dip, 0, 0, qp->qp_wqinfo.qa_size,
1948                             maxprot, DEVMAP_MAPPING_INVALID, NULL);
1949                         if (status != DDI_SUCCESS) {
1950                                 mutex_exit(&qp->qp_lock);
1951                                 HERMON_WARNING(state, "failed in QP memory "
1952                                     "devmap_devmem_remap()");
1953                                 return (ibc_get_ci_failure(0));
1954                         }
1955                         qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1956                 }
1957         }
1958 
1959 
1960         /*
1961          * Put NULL into the Hermon QPNum-to-QPHdl list.  This will allow any
1962          * in-progress events to detect that the QP corresponding to this
1963          * number has been freed.  Note: it does depend in whether we are
1964          * freeing a special QP or not.
1965          */
1966         if (qpc == NULL) {
1967                 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1968                     qp->qp_qpnum, NULL);
1969         } else if (qp->qp_is_special) {
1970                 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1971                     qpc->hr_indx + port, NULL);
1972         } else {
1973                 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1974                     qpc->hr_indx, NULL);
1975         }
1976 
1977         /*
1978          * Drop the QP lock
1979          *    At this point the lock is no longer necessary.  We cannot
1980          *    protect from multiple simultaneous calls to free the same QP.
1981          *    In addition, since the QP lock is contained in the QP "software
1982          *    handle" resource, which we will free (see below), it is
1983          *    important that we have no further references to that memory.
1984          */
1985         mutex_exit(&qp->qp_lock);
1986 
1987         /*
1988          * Free the QP resources
1989          *    Start by deregistering and freeing the memory for work queues.
1990          *    Next free any previously allocated context information
1991          *    (depending on QP type)
1992          *    Finally, decrement the necessary reference counts.
1993          * If this fails for any reason, then it is an indication that
1994          * something (either in HW or SW) has gone seriously wrong.  So we
1995          * print a warning message and return.
1996          */
1997         status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
1998             sleepflag);
1999         if (status != DDI_SUCCESS) {
2000                 HERMON_WARNING(state, "failed to deregister QP memory");
2001                 status = ibc_get_ci_failure(0);
2002                 goto qpfree_fail;
2003         }
2004 
2005         /* Free the memory for the QP */
2006         hermon_queue_free(&qp->qp_wqinfo);
2007 
2008         if (qp->qp_sq_wqhdr)
2009                 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
2010         if (qp->qp_rq_wqhdr)
2011                 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
2012 
2013         /* Free the dbr */
2014         if (!qp_srq_en) {
2015                 hermon_dbr_free(state, qp->qp_uarpg, qp->qp_rq_vdbr);
2016         }
2017 
2018         /*
2019          * Free up the remainder of the QP resources.  Note: we have a few
2020          * different resources to free up depending on whether the QP is a
2021          * special QP or not.  As described above, if any of these fail for
2022          * any reason it is an indication that something (either in HW or SW)
2023          * has gone seriously wrong.  So we print a warning message and
2024          * return.
2025          */
2026         if (qp->qp_is_special) {
2027                 type = (qp->qp_is_special == HERMON_QP_SMI) ?
2028                     IBT_SMI_SQP : IBT_GSI_SQP;
2029 
2030                 /* Free up resources for the special QP */
2031                 status = hermon_special_qp_rsrc_free(state, type, port);
2032                 if (status != DDI_SUCCESS) {
2033                         HERMON_WARNING(state, "failed to free special QP rsrc");
2034                         status = ibc_get_ci_failure(0);
2035                         goto qpfree_fail;
2036                 }
2037 
2038         } else if (qp->qp_rangep) {
2039                 int refcnt;
2040                 mutex_enter(&qp->qp_rangep->hqpr_lock);
2041                 refcnt = --qp->qp_rangep->hqpr_refcnt;
2042                 mutex_exit(&qp->qp_rangep->hqpr_lock);
2043                 if (refcnt == 0) {
2044                         mutex_destroy(&qp->qp_rangep->hqpr_lock);
2045                         hermon_rsrc_free(state, &qp->qp_rangep->hqpr_qpcrsrc);
2046                         kmem_free(qp->qp_rangep, sizeof (*qp->qp_rangep));
2047                 }
2048                 qp->qp_rangep = NULL;
2049         } else if (qp->qp_qpn_hdl == NULL) {
2050                 hermon_rsrc_free(state, &qpc);
2051         } else {
2052                 /*
2053                  * Check the flags and determine whether to release the
2054                  * QPN or not, based on their value.
2055                  */
2056                 if (free_qp_flags == IBC_FREE_QP_ONLY) {
2057                         entry = qp->qp_qpn_hdl;
2058                         hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
2059                             HERMON_QPN_FREE_ONLY);
2060                         *qpnh = (ibc_qpn_hdl_t)entry;
2061                 } else {
2062                         hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
2063                             HERMON_QPN_RELEASE);
2064                 }
2065         }
2066 
2067         mutex_destroy(&qp->qp_sq_lock);
2068 
2069         /* Free the Hermon Queue Pair handle */
2070         hermon_rsrc_free(state, &rsrc);
2071 
2072         /* Decrement the reference counts on CQs, PD and SRQ (if needed) */
2073         hermon_cq_refcnt_dec(rq_cq);
2074         hermon_cq_refcnt_dec(sq_cq);
2075         hermon_pd_refcnt_dec(pd);
2076         if (qp_srq_en == HERMON_QP_SRQ_ENABLED) {
2077                 hermon_srq_refcnt_dec(srq);
2078         }
2079 
2080         /* Set the qphdl pointer to NULL and return success */
2081         *qphdl = NULL;
2082 
2083         return (DDI_SUCCESS);
2084 
2085 qpfree_fail:
2086         return (status);
2087 }
2088 
2089 
2090 /*
2091  * hermon_qp_query()
2092  *    Context: Can be called from interrupt or base context.
2093  */
2094 int
2095 hermon_qp_query(hermon_state_t *state, hermon_qphdl_t qp,
2096     ibt_qp_query_attr_t *attr_p)
2097 {
2098         ibt_cep_state_t         qp_state;
2099         ibt_qp_ud_attr_t        *ud;
2100         ibt_qp_rc_attr_t        *rc;
2101         ibt_qp_uc_attr_t        *uc;
2102         ibt_cep_flags_t         enable_flags;
2103         hermon_hw_addr_path_t   *qpc_path, *qpc_alt_path;
2104         ibt_cep_path_t          *path_ptr, *alt_path_ptr;
2105         hermon_hw_qpc_t         *qpc;
2106         int                     status;
2107         uint_t                  tmp_sched_q, tmp_alt_sched_q;
2108 
2109         mutex_enter(&qp->qp_lock);
2110 
2111         /*
2112          * Grab the temporary QPC entry from QP software state
2113          */
2114         qpc = &qp->qpc;
2115 
2116         /* Convert the current Hermon QP state to IBTF QP state */
2117         switch (qp->qp_state) {
2118         case HERMON_QP_RESET:
2119                 qp_state = IBT_STATE_RESET;             /* "Reset" */
2120                 break;
2121         case HERMON_QP_INIT:
2122                 qp_state = IBT_STATE_INIT;              /* Initialized */
2123                 break;
2124         case HERMON_QP_RTR:
2125                 qp_state = IBT_STATE_RTR;               /* Ready to Receive */
2126                 break;
2127         case HERMON_QP_RTS:
2128                 qp_state = IBT_STATE_RTS;               /* Ready to Send */
2129                 break;
2130         case HERMON_QP_SQERR:
2131                 qp_state = IBT_STATE_SQE;               /* Send Queue Error */
2132                 break;
2133         case HERMON_QP_SQD:
2134                 if (qp->qp_sqd_still_draining) {
2135                         qp_state = IBT_STATE_SQDRAIN;   /* SQ Draining */
2136                 } else {
2137                         qp_state = IBT_STATE_SQD;       /* SQ Drained */
2138                 }
2139                 break;
2140         case HERMON_QP_ERR:
2141                 qp_state = IBT_STATE_ERROR;             /* Error */
2142                 break;
2143         default:
2144                 mutex_exit(&qp->qp_lock);
2145                 return (ibc_get_ci_failure(0));
2146         }
2147         attr_p->qp_info.qp_state = qp_state;
2148 
2149         /* SRQ Hook. */
2150         attr_p->qp_srq = NULL;
2151 
2152         /*
2153          * The following QP information is always returned, regardless of
2154          * the current QP state.  Note: Some special handling is necessary
2155          * for calculating the QP number on special QP (QP0 and QP1).
2156          */
2157         attr_p->qp_sq_cq    =
2158             (qp->qp_sq_cqhdl == NULL) ? NULL : qp->qp_sq_cqhdl->cq_hdlrarg;
2159         attr_p->qp_rq_cq    =
2160             (qp->qp_rq_cqhdl == NULL) ? NULL : qp->qp_rq_cqhdl->cq_hdlrarg;
2161         if (qp->qp_is_special) {
2162                 attr_p->qp_qpn = (qp->qp_is_special == HERMON_QP_SMI) ? 0 : 1;
2163         } else {
2164                 attr_p->qp_qpn = (ib_qpn_t)qp->qp_qpnum;
2165         }
2166         attr_p->qp_sq_sgl   = qp->qp_sq_sgl;
2167         attr_p->qp_rq_sgl   = qp->qp_rq_sgl;
2168         attr_p->qp_info.qp_sq_sz = qp->qp_sq_bufsz - qp->qp_sq_hdrmwqes;
2169         attr_p->qp_info.qp_rq_sz = qp->qp_rq_bufsz;
2170 
2171         /*
2172          * If QP is currently in the "Reset" state, then only the above are
2173          * returned
2174          */
2175         if (qp_state == IBT_STATE_RESET) {
2176                 mutex_exit(&qp->qp_lock);
2177                 return (DDI_SUCCESS);
2178         }
2179 
2180         /*
2181          * Post QUERY_QP command to firmware
2182          *
2183          * We do a HERMON_NOSLEEP here because we are holding the "qp_lock".
2184          * Since we may be in the interrupt context (or subsequently raised
2185          * to interrupt level by priority inversion), we do not want to block
2186          * in this routine waiting for success.
2187          */
2188         tmp_sched_q = qpc->pri_addr_path.sched_q;
2189         tmp_alt_sched_q = qpc->alt_addr_path.sched_q;
2190         status = hermon_cmn_query_cmd_post(state, QUERY_QP, 0, qp->qp_qpnum,
2191             qpc, sizeof (hermon_hw_qpc_t), HERMON_CMD_NOSLEEP_SPIN);
2192         if (status != HERMON_CMD_SUCCESS) {
2193                 mutex_exit(&qp->qp_lock);
2194                 cmn_err(CE_WARN, "hermon%d: hermon_qp_query: QUERY_QP "
2195                     "command failed: %08x\n", state->hs_instance, status);
2196                 if (status == HERMON_CMD_INVALID_STATUS) {
2197                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2198                 }
2199                 return (ibc_get_ci_failure(0));
2200         }
2201         qpc->pri_addr_path.sched_q = tmp_sched_q;
2202         qpc->alt_addr_path.sched_q = tmp_alt_sched_q;
2203 
2204         /*
2205          * Fill in the additional QP info based on the QP's transport type.
2206          */
2207         if (qp->qp_type == IBT_UD_RQP) {
2208 
2209                 /* Fill in the UD-specific info */
2210                 ud = &attr_p->qp_info.qp_transport.ud;
2211                 ud->ud_qkey  = (ib_qkey_t)qpc->qkey;
2212                 ud->ud_sq_psn        = qpc->next_snd_psn;
2213                 ud->ud_pkey_ix       = qpc->pri_addr_path.pkey_indx;
2214                 /* port+1 for port 1/2 */
2215                 ud->ud_port  =
2216                     (uint8_t)(((qpc->pri_addr_path.sched_q >> 6) & 0x01) + 1);
2217 
2218                 attr_p->qp_info.qp_trans = IBT_UD_SRV;
2219 
2220                 if (qp->qp_serv_type == HERMON_QP_FEXCH) {
2221                         ibt_pmr_desc_t *pmr;
2222                         uint64_t heart_beat;
2223 
2224                         pmr = &attr_p->qp_query_fexch.fq_uni_mem_desc;
2225                         pmr->pmd_iova = 0;
2226                         pmr->pmd_lkey = pmr->pmd_rkey =
2227                             hermon_fcoib_qpn_to_mkey(state, qp->qp_qpnum);
2228                         pmr->pmd_phys_buf_list_sz =
2229                             state->hs_fcoib.hfc_mtts_per_mpt;
2230                         pmr->pmd_sync_required = 0;
2231 
2232                         pmr = &attr_p->qp_query_fexch.fq_bi_mem_desc;
2233                         pmr->pmd_iova = 0;
2234                         pmr->pmd_lkey = 0;
2235                         pmr->pmd_rkey = 0;
2236                         pmr->pmd_phys_buf_list_sz = 0;
2237                         pmr->pmd_sync_required = 0;
2238 
2239                         attr_p->qp_query_fexch.fq_flags =
2240                             ((hermon_get_heart_beat_rq_cmd_post(state,
2241                             qp->qp_qpnum, &heart_beat) == HERMON_CMD_SUCCESS) &&
2242                             (heart_beat == 0)) ? IBT_FEXCH_HEART_BEAT_OK :
2243                             IBT_FEXCH_NO_FLAGS;
2244 
2245                         ud->ud_fc = qp->qp_fc_attr;
2246                 } else if (qp->qp_serv_type == HERMON_QP_FCMND ||
2247                     qp->qp_serv_type == HERMON_QP_RFCI) {
2248                         ud->ud_fc = qp->qp_fc_attr;
2249                 }
2250 
2251         } else if (qp->qp_serv_type == HERMON_QP_RC) {
2252 
2253                 /* Fill in the RC-specific info */
2254                 rc = &attr_p->qp_info.qp_transport.rc;
2255                 rc->rc_sq_psn        = qpc->next_snd_psn;
2256                 rc->rc_rq_psn        = qpc->next_rcv_psn;
2257                 rc->rc_dst_qpn       = qpc->rem_qpn;
2258 
2259                 /* Grab the path migration state information */
2260                 if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) {
2261                         rc->rc_mig_state = IBT_STATE_MIGRATED;
2262                 } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) {
2263                         rc->rc_mig_state = IBT_STATE_REARMED;
2264                 } else {
2265                         rc->rc_mig_state = IBT_STATE_ARMED;
2266                 }
2267                 rc->rc_rdma_ra_out = (1 << qpc->sra_max);
2268                 rc->rc_rdma_ra_in  = (1 << qpc->rra_max);
2269                 rc->rc_min_rnr_nak = qpc->min_rnr_nak;
2270                 rc->rc_path_mtu         = qpc->mtu;
2271                 rc->rc_retry_cnt   = qpc->retry_cnt;
2272 
2273                 /* Get the common primary address path fields */
2274                 qpc_path = &qpc->pri_addr_path;
2275                 path_ptr = &rc->rc_path;
2276                 hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
2277                     HERMON_ADDRPATH_QP);
2278 
2279                 /* Fill in the additional primary address path fields */
2280                 path_ptr->cep_pkey_ix           = qpc_path->pkey_indx;
2281                 path_ptr->cep_hca_port_num =
2282                     path_ptr->cep_adds_vect.av_port_num =
2283                     (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1);
2284                 path_ptr->cep_timeout           = qpc_path->ack_timeout;
2285 
2286                 /* Get the common alternate address path fields */
2287                 qpc_alt_path = &qpc->alt_addr_path;
2288                 alt_path_ptr = &rc->rc_alt_path;
2289                 hermon_get_addr_path(state, qpc_alt_path,
2290                     &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP);
2291 
2292                 /* Fill in the additional alternate address path fields */
2293                 alt_path_ptr->cep_pkey_ix    = qpc_alt_path->pkey_indx;
2294                 alt_path_ptr->cep_hca_port_num       =
2295                     alt_path_ptr->cep_adds_vect.av_port_num =
2296                     (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1);
2297                 alt_path_ptr->cep_timeout    = qpc_alt_path->ack_timeout;
2298 
2299                 /* Get the RNR retry time from primary path */
2300                 rc->rc_rnr_retry_cnt = qpc->rnr_retry;
2301 
2302                 /* Set the enable flags based on RDMA/Atomic enable bits */
2303                 enable_flags = IBT_CEP_NO_FLAGS;
2304                 enable_flags |= ((qpc->rre == 0) ? 0 : IBT_CEP_RDMA_RD);
2305                 enable_flags |= ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
2306                 enable_flags |= ((qpc->rae == 0) ? 0 : IBT_CEP_ATOMIC);
2307                 attr_p->qp_info.qp_flags = enable_flags;
2308 
2309                 attr_p->qp_info.qp_trans = IBT_RC_SRV;
2310 
2311         } else if (qp->qp_serv_type == HERMON_QP_UC) {
2312 
2313                 /* Fill in the UC-specific info */
2314                 uc = &attr_p->qp_info.qp_transport.uc;
2315                 uc->uc_sq_psn        = qpc->next_snd_psn;
2316                 uc->uc_rq_psn        = qpc->next_rcv_psn;
2317                 uc->uc_dst_qpn       = qpc->rem_qpn;
2318 
2319                 /* Grab the path migration state information */
2320                 if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) {
2321                         uc->uc_mig_state = IBT_STATE_MIGRATED;
2322                 } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) {
2323                         uc->uc_mig_state = IBT_STATE_REARMED;
2324                 } else {
2325                         uc->uc_mig_state = IBT_STATE_ARMED;
2326                 }
2327                 uc->uc_path_mtu = qpc->mtu;
2328 
2329                 /* Get the common primary address path fields */
2330                 qpc_path = &qpc->pri_addr_path;
2331                 path_ptr = &uc->uc_path;
2332                 hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
2333                     HERMON_ADDRPATH_QP);
2334 
2335                 /* Fill in the additional primary address path fields */
2336                 path_ptr->cep_pkey_ix           = qpc_path->pkey_indx;
2337                 path_ptr->cep_hca_port_num =
2338                     path_ptr->cep_adds_vect.av_port_num =
2339                     (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1);
2340 
2341                 /* Get the common alternate address path fields */
2342                 qpc_alt_path = &qpc->alt_addr_path;
2343                 alt_path_ptr = &uc->uc_alt_path;
2344                 hermon_get_addr_path(state, qpc_alt_path,
2345                     &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP);
2346 
2347                 /* Fill in the additional alternate address path fields */
2348                 alt_path_ptr->cep_pkey_ix    = qpc_alt_path->pkey_indx;
2349                 alt_path_ptr->cep_hca_port_num       =
2350                     alt_path_ptr->cep_adds_vect.av_port_num =
2351                     (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1);
2352 
2353                 /*
2354                  * Set the enable flags based on RDMA enable bits (by
2355                  * definition UC doesn't support Atomic or RDMA Read)
2356                  */
2357                 enable_flags = ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
2358                 attr_p->qp_info.qp_flags = enable_flags;
2359 
2360                 attr_p->qp_info.qp_trans = IBT_UC_SRV;
2361 
2362         } else {
2363                 HERMON_WARNING(state, "unexpected QP transport type");
2364                 mutex_exit(&qp->qp_lock);
2365                 return (ibc_get_ci_failure(0));
2366         }
2367 
2368         /*
2369          * Under certain circumstances it is possible for the Hermon hardware
2370          * to transition to one of the error states without software directly
2371          * knowing about it.  The QueryQP() call is the one place where we
2372          * have an opportunity to sample and update our view of the QP state.
2373          */
2374         if (qpc->state == HERMON_QP_SQERR) {
2375                 attr_p->qp_info.qp_state = IBT_STATE_SQE;
2376                 qp->qp_state = HERMON_QP_SQERR;
2377                 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_SQERR);
2378         }
2379         if (qpc->state == HERMON_QP_ERR) {
2380                 attr_p->qp_info.qp_state = IBT_STATE_ERROR;
2381                 qp->qp_state = HERMON_QP_ERR;
2382                 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_ERR);
2383         }
2384         mutex_exit(&qp->qp_lock);
2385 
2386         return (DDI_SUCCESS);
2387 }
2388 
2389 
2390 /*
2391  * hermon_qp_create_qpn()
2392  *    Context: Can be called from interrupt or base context.
2393  */
2394 static int
2395 hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp,
2396     hermon_rsrc_t *qpc)
2397 {
2398         hermon_qpn_entry_t      query;
2399         hermon_qpn_entry_t      *entry;
2400         avl_index_t             where;
2401 
2402         /*
2403          * Build a query (for the AVL tree lookup) and attempt to find
2404          * a previously added entry that has a matching QPC index.  If
2405          * no matching entry is found, then allocate, initialize, and
2406          * add an entry to the AVL tree.
2407          * If a matching entry is found, then increment its QPN counter
2408          * and reference counter.
2409          */
2410         query.qpn_indx = qpc->hr_indx;
2411         mutex_enter(&state->hs_qpn_avl_lock);
2412         entry = (hermon_qpn_entry_t *)avl_find(&state->hs_qpn_avl,
2413             &query, &where);
2414         if (entry == NULL) {
2415                 /*
2416                  * Allocate and initialize a QPN entry, then insert
2417                  * it into the AVL tree.
2418                  */
2419                 entry = (hermon_qpn_entry_t *)kmem_zalloc(
2420                     sizeof (hermon_qpn_entry_t), KM_NOSLEEP);
2421                 if (entry == NULL) {
2422                         mutex_exit(&state->hs_qpn_avl_lock);
2423                         return (DDI_FAILURE);
2424                 }
2425 
2426                 entry->qpn_indx         = qpc->hr_indx;
2427                 entry->qpn_refcnt  = 0;
2428                 entry->qpn_counter = 0;
2429 
2430                 avl_insert(&state->hs_qpn_avl, entry, where);
2431         }
2432 
2433         /*
2434          * Make the AVL tree entry point to the QP context resource that
2435          * it will be responsible for tracking
2436          */
2437         entry->qpn_qpc = qpc;
2438 
2439         /*
2440          * Setup the QP handle to point to the AVL tree entry.  Then
2441          * generate the new QP number from the entry's QPN counter value
2442          * and the hardware's QP context table index.
2443          */
2444         qp->qp_qpn_hdl       = entry;
2445         qp->qp_qpnum = ((entry->qpn_counter <<
2446             state->hs_cfg_profile->cp_log_num_qp) | qpc->hr_indx) &
2447             HERMON_QP_MAXNUMBER_MSK;
2448         qp->qp_ring = qp->qp_qpnum << 8;
2449 
2450         /*
2451          * Increment the reference counter and QPN counter.  The QPN
2452          * counter always indicates the next available number for use.
2453          */
2454         entry->qpn_counter++;
2455         entry->qpn_refcnt++;
2456 
2457         mutex_exit(&state->hs_qpn_avl_lock);
2458 
2459         return (DDI_SUCCESS);
2460 }
2461 
2462 
2463 /*
2464  * hermon_qp_release_qpn()
2465  *    Context: Can be called only from user or kernel context.
2466  */
2467 void
2468 hermon_qp_release_qpn(hermon_state_t *state, hermon_qpn_entry_t *entry,
2469     int flags)
2470 {
2471         ASSERT(entry != NULL);
2472 
2473         mutex_enter(&state->hs_qpn_avl_lock);
2474 
2475         /*
2476          * If we are releasing the QP number here, then we decrement the
2477          * reference count and check for zero references.  If there are
2478          * zero references, then we free the QPC context (if it hadn't
2479          * already been freed during a HERMON_QPN_FREE_ONLY free, i.e. for
2480          * reuse with another similar QP number) and remove the tracking
2481          * structure from the QP number AVL tree and free the structure.
2482          * If we are not releasing the QP number here, then, as long as we
2483          * have not exhausted the usefulness of the QPC context (that is,
2484          * re-used it too many times without the reference count having
2485          * gone to zero), we free up the QPC context for use by another
2486          * thread (which will use it to construct a different QP number
2487          * from the same QPC table index).
2488          */
2489         if (flags == HERMON_QPN_RELEASE) {
2490                 entry->qpn_refcnt--;
2491 
2492                 /*
2493                  * If the reference count is zero, then we free the QPC
2494                  * context (if it hadn't already been freed in an early
2495                  * step, e.g. HERMON_QPN_FREE_ONLY) and remove/free the
2496                  * tracking structure from the QP number AVL tree.
2497                  */
2498                 if (entry->qpn_refcnt == 0) {
2499                         if (entry->qpn_qpc != NULL) {
2500                                 hermon_rsrc_free(state, &entry->qpn_qpc);
2501                         }
2502 
2503                         /*
2504                          * If the current entry has served it's useful
2505                          * purpose (i.e. been reused the maximum allowable
2506                          * number of times), then remove it from QP number
2507                          * AVL tree and free it up.
2508                          */
2509                         if (entry->qpn_counter >= (1 <<
2510                             (24 - state->hs_cfg_profile->cp_log_num_qp))) {
2511                                 avl_remove(&state->hs_qpn_avl, entry);
2512                                 kmem_free(entry, sizeof (hermon_qpn_entry_t));
2513                         }
2514                 }
2515 
2516         } else if (flags == HERMON_QPN_FREE_ONLY) {
2517                 /*
2518                  * Even if we are not freeing the QP number, that will not
2519                  * always prevent us from releasing the QPC context.  In fact,
2520                  * since the QPC context only forms part of the whole QPN,
2521                  * we want to free it up for use by other consumers.  But
2522                  * if the reference count is non-zero (which it will always
2523                  * be when we are doing HERMON_QPN_FREE_ONLY) and the counter
2524                  * has reached its maximum value, then we cannot reuse the
2525                  * QPC context until the reference count eventually reaches
2526                  * zero (in HERMON_QPN_RELEASE, above).
2527                  */
2528                 if (entry->qpn_counter < (1 <<
2529                     (24 - state->hs_cfg_profile->cp_log_num_qp))) {
2530                         hermon_rsrc_free(state, &entry->qpn_qpc);
2531                 }
2532         }
2533         mutex_exit(&state->hs_qpn_avl_lock);
2534 }
2535 
2536 
2537 /*
2538  * hermon_qpn_avl_compare()
2539  *    Context: Can be called from user or kernel context.
2540  */
2541 static int
2542 hermon_qpn_avl_compare(const void *q, const void *e)
2543 {
2544         hermon_qpn_entry_t      *entry, *query;
2545 
2546         entry = (hermon_qpn_entry_t *)e;
2547         query = (hermon_qpn_entry_t *)q;
2548 
2549         if (query->qpn_indx < entry->qpn_indx) {
2550                 return (-1);
2551         } else if (query->qpn_indx > entry->qpn_indx) {
2552                 return (+1);
2553         } else {
2554                 return (0);
2555         }
2556 }
2557 
2558 
2559 /*
2560  * hermon_qpn_avl_init()
2561  *    Context: Only called from attach() path context
2562  */
2563 void
2564 hermon_qpn_avl_init(hermon_state_t *state)
2565 {
2566         /* Initialize the lock used for QP number (QPN) AVL tree access */
2567         mutex_init(&state->hs_qpn_avl_lock, NULL, MUTEX_DRIVER,
2568             DDI_INTR_PRI(state->hs_intrmsi_pri));
2569 
2570         /* Initialize the AVL tree for the QP number (QPN) storage */
2571         avl_create(&state->hs_qpn_avl, hermon_qpn_avl_compare,
2572             sizeof (hermon_qpn_entry_t),
2573             offsetof(hermon_qpn_entry_t, qpn_avlnode));
2574 }
2575 
2576 
2577 /*
2578  * hermon_qpn_avl_fini()
2579  *    Context: Only called from attach() and/or detach() path contexts
2580  */
2581 void
2582 hermon_qpn_avl_fini(hermon_state_t *state)
2583 {
2584         hermon_qpn_entry_t      *entry;
2585         void                    *cookie;
2586 
2587         /*
2588          * Empty all entries (if necessary) and destroy the AVL tree
2589          * that was used for QP number (QPN) tracking.
2590          */
2591         cookie = NULL;
2592         while ((entry = (hermon_qpn_entry_t *)avl_destroy_nodes(
2593             &state->hs_qpn_avl, &cookie)) != NULL) {
2594                 kmem_free(entry, sizeof (hermon_qpn_entry_t));
2595         }
2596         avl_destroy(&state->hs_qpn_avl);
2597 
2598         /* Destroy the lock used for QP number (QPN) AVL tree access */
2599         mutex_destroy(&state->hs_qpn_avl_lock);
2600 }
2601 
2602 
2603 /*
2604  * hermon_qphdl_from_qpnum()
2605  *    Context: Can be called from interrupt or base context.
2606  *
2607  *    This routine is important because changing the unconstrained
2608  *    portion of the QP number is critical to the detection of a
2609  *    potential race condition in the QP event handler code (i.e. the case
2610  *    where a QP is freed and alloc'd again before an event for the
2611  *    "old" QP can be handled).
2612  *
2613  *    While this is not a perfect solution (not sure that one exists)
2614  *    it does help to mitigate the chance that this race condition will
2615  *    cause us to deliver a "stale" event to the new QP owner.  Note:
2616  *    this solution does not scale well because the number of constrained
2617  *    bits increases (and, hence, the number of unconstrained bits
2618  *    decreases) as the number of supported QPs grows.  For small and
2619  *    intermediate values, it should hopefully provide sufficient
2620  *    protection.
2621  */
2622 hermon_qphdl_t
2623 hermon_qphdl_from_qpnum(hermon_state_t *state, uint_t qpnum)
2624 {
2625         uint_t  qpindx, qpmask;
2626 
2627         /* Calculate the QP table index from the qpnum */
2628         qpmask = (1 << state->hs_cfg_profile->cp_log_num_qp) - 1;
2629         qpindx = qpnum & qpmask;
2630         return (hermon_icm_num_to_hdl(state, HERMON_QPC, qpindx));
2631 }
2632 
2633 
2634 /*
2635  * hermon_special_qp_rsrc_alloc
2636  *    Context: Can be called from interrupt or base context.
2637  */
2638 static int
2639 hermon_special_qp_rsrc_alloc(hermon_state_t *state, ibt_sqp_type_t type,
2640     uint_t port, hermon_rsrc_t **qp_rsrc)
2641 {
2642         uint_t          mask, flags;
2643         int             status;
2644 
2645         mutex_enter(&state->hs_spec_qplock);
2646         flags = state->hs_spec_qpflags;
2647         if (type == IBT_SMI_SQP) {
2648                 /*
2649                  * Check here to see if the driver has been configured
2650                  * to instruct the Hermon firmware to handle all incoming
2651                  * SMP messages (i.e. messages sent to SMA).  If so,
2652                  * then we will treat QP0 as if it has already been
2653                  * allocated (for internal use).  Otherwise, if we allow
2654                  * the allocation to happen, it will cause unexpected
2655                  * behaviors (e.g. Hermon SMA becomes unresponsive).
2656                  */
2657                 if (state->hs_cfg_profile->cp_qp0_agents_in_fw != 0) {
2658                         mutex_exit(&state->hs_spec_qplock);
2659                         return (IBT_QP_IN_USE);
2660                 }
2661 
2662                 /*
2663                  * If this is the first QP0 allocation, then post
2664                  * a CONF_SPECIAL_QP firmware command
2665                  */
2666                 if ((flags & HERMON_SPECIAL_QP0_RSRC_MASK) == 0) {
2667                         status = hermon_conf_special_qp_cmd_post(state,
2668                             state->hs_spec_qp0->hr_indx, HERMON_CMD_QP_SMI,
2669                             HERMON_CMD_NOSLEEP_SPIN,
2670                             HERMON_CMD_SPEC_QP_OPMOD(
2671                             state->hs_cfg_profile->cp_qp0_agents_in_fw,
2672                             state->hs_cfg_profile->cp_qp1_agents_in_fw));
2673                         if (status != HERMON_CMD_SUCCESS) {
2674                                 mutex_exit(&state->hs_spec_qplock);
2675                                 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2676                                     "command failed: %08x\n",
2677                                     state->hs_instance, status);
2678                                 return (IBT_INSUFF_RESOURCE);
2679                         }
2680                 }
2681 
2682                 /*
2683                  * Now check (and, if necessary, modify) the flags to indicate
2684                  * whether the allocation was successful
2685                  */
2686                 mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port));
2687                 if (flags & mask) {
2688                         mutex_exit(&state->hs_spec_qplock);
2689                         return (IBT_QP_IN_USE);
2690                 }
2691                 state->hs_spec_qpflags |= mask;
2692                 *qp_rsrc = state->hs_spec_qp0;
2693 
2694         } else {
2695                 /*
2696                  * If this is the first QP1 allocation, then post
2697                  * a CONF_SPECIAL_QP firmware command
2698                  */
2699                 if ((flags & HERMON_SPECIAL_QP1_RSRC_MASK) == 0) {
2700                         status = hermon_conf_special_qp_cmd_post(state,
2701                             state->hs_spec_qp1->hr_indx, HERMON_CMD_QP_GSI,
2702                             HERMON_CMD_NOSLEEP_SPIN,
2703                             HERMON_CMD_SPEC_QP_OPMOD(
2704                             state->hs_cfg_profile->cp_qp0_agents_in_fw,
2705                             state->hs_cfg_profile->cp_qp1_agents_in_fw));
2706                         if (status != HERMON_CMD_SUCCESS) {
2707                                 mutex_exit(&state->hs_spec_qplock);
2708                                 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2709                                     "command failed: %08x\n",
2710                                     state->hs_instance, status);
2711                                 return (IBT_INSUFF_RESOURCE);
2712                         }
2713                 }
2714 
2715                 /*
2716                  * Now check (and, if necessary, modify) the flags to indicate
2717                  * whether the allocation was successful
2718                  */
2719                 mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port));
2720                 if (flags & mask) {
2721                         mutex_exit(&state->hs_spec_qplock);
2722                         return (IBT_QP_IN_USE);
2723                 }
2724                 state->hs_spec_qpflags |= mask;
2725                 *qp_rsrc = state->hs_spec_qp1;
2726         }
2727 
2728         mutex_exit(&state->hs_spec_qplock);
2729         return (DDI_SUCCESS);
2730 }
2731 
2732 
2733 /*
2734  * hermon_special_qp_rsrc_free
2735  *    Context: Can be called from interrupt or base context.
2736  */
2737 static int
2738 hermon_special_qp_rsrc_free(hermon_state_t *state, ibt_sqp_type_t type,
2739     uint_t port)
2740 {
2741         uint_t          mask, flags;
2742         int             status;
2743 
2744         mutex_enter(&state->hs_spec_qplock);
2745         if (type == IBT_SMI_SQP) {
2746                 mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port));
2747                 state->hs_spec_qpflags &= ~mask;
2748                 flags = state->hs_spec_qpflags;
2749 
2750                 /*
2751                  * If this is the last QP0 free, then post a CONF_SPECIAL_QP
2752                  * NOW, If this is the last Special QP free, then post a
2753                  * CONF_SPECIAL_QP firmware command - it'll stop them all
2754                  */
2755                 if (flags) {
2756                         status = hermon_conf_special_qp_cmd_post(state, 0,
2757                             HERMON_CMD_QP_SMI, HERMON_CMD_NOSLEEP_SPIN, 0);
2758                         if (status != HERMON_CMD_SUCCESS) {
2759                                 mutex_exit(&state->hs_spec_qplock);
2760                                 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2761                                     "command failed: %08x\n",
2762                                     state->hs_instance, status);
2763                                 if (status == HERMON_CMD_INVALID_STATUS) {
2764                                         hermon_fm_ereport(state, HCA_SYS_ERR,
2765                                             HCA_ERR_SRV_LOST);
2766                                 }
2767                                 return (ibc_get_ci_failure(0));
2768                         }
2769                 }
2770         } else {
2771                 mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port));
2772                 state->hs_spec_qpflags &= ~mask;
2773                 flags = state->hs_spec_qpflags;
2774 
2775                 /*
2776                  * If this is the last QP1 free, then post a CONF_SPECIAL_QP
2777                  * NOW, if this is the last special QP free, then post a
2778                  * CONF_SPECIAL_QP firmware command - it'll stop them all
2779                  */
2780                 if (flags) {
2781                         status = hermon_conf_special_qp_cmd_post(state, 0,
2782                             HERMON_CMD_QP_GSI, HERMON_CMD_NOSLEEP_SPIN, 0);
2783                         if (status != HERMON_CMD_SUCCESS) {
2784                                 mutex_exit(&state->hs_spec_qplock);
2785                                 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2786                                     "command failed: %08x\n",
2787                                     state->hs_instance, status);
2788                                 if (status == HERMON_CMD_INVALID_STATUS) {
2789                                         hermon_fm_ereport(state, HCA_SYS_ERR,
2790                                             HCA_ERR_SRV_LOST);
2791                                 }
2792                                 return (ibc_get_ci_failure(0));
2793                         }
2794                 }
2795         }
2796 
2797         mutex_exit(&state->hs_spec_qplock);
2798         return (DDI_SUCCESS);
2799 }
2800 
2801 
2802 /*
2803  * hermon_qp_sgl_to_logwqesz()
2804  *    Context: Can be called from interrupt or base context.
2805  */
2806 static void
2807 hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
2808     uint_t real_max_sgl, hermon_qp_wq_type_t wq_type,
2809     uint_t *logwqesz, uint_t *max_sgl)
2810 {
2811         uint_t  max_size, log2, actual_sgl;
2812 
2813         switch (wq_type) {
2814         case HERMON_QP_WQ_TYPE_SENDQ_UD:
2815                 /*
2816                  * Use requested maximum SGL to calculate max descriptor size
2817                  * (while guaranteeing that the descriptor size is a
2818                  * power-of-2 cachelines).
2819                  */
2820                 max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
2821                 log2 = highbit(max_size);
2822                 if (ISP2(max_size)) {
2823                         log2 = log2 - 1;
2824                 }
2825 
2826                 /* Make sure descriptor is at least the minimum size */
2827                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2828 
2829                 /* Calculate actual number of SGL (given WQE size) */
2830                 actual_sgl = ((1 << log2) -
2831                     sizeof (hermon_hw_snd_wqe_ctrl_t)) >> 4;
2832                 break;
2833 
2834         case HERMON_QP_WQ_TYPE_SENDQ_CONN:
2835                 /*
2836                  * Use requested maximum SGL to calculate max descriptor size
2837                  * (while guaranteeing that the descriptor size is a
2838                  * power-of-2 cachelines).
2839                  */
2840                 max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
2841                 log2 = highbit(max_size);
2842                 if (ISP2(max_size)) {
2843                         log2 = log2 - 1;
2844                 }
2845 
2846                 /* Make sure descriptor is at least the minimum size */
2847                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2848 
2849                 /* Calculate actual number of SGL (given WQE size) */
2850                 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_SND_HDRS) >> 4;
2851                 break;
2852 
2853         case HERMON_QP_WQ_TYPE_RECVQ:
2854                 /*
2855                  * Same as above (except for Recv WQEs)
2856                  */
2857                 max_size = (HERMON_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4));
2858                 log2 = highbit(max_size);
2859                 if (ISP2(max_size)) {
2860                         log2 = log2 - 1;
2861                 }
2862 
2863                 /* Make sure descriptor is at least the minimum size */
2864                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2865 
2866                 /* Calculate actual number of SGL (given WQE size) */
2867                 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_RCV_HDRS) >> 4;
2868                 break;
2869 
2870         case HERMON_QP_WQ_TYPE_SENDMLX_QP0:
2871                 /*
2872                  * Same as above (except for MLX transport WQEs).  For these
2873                  * WQEs we have to account for the space consumed by the
2874                  * "inline" packet headers.  (This is smaller than for QP1
2875                  * below because QP0 is not allowed to send packets with a GRH.
2876                  */
2877                 max_size = (HERMON_QP_WQE_MLX_QP0_HDRS + (num_sgl << 4));
2878                 log2 = highbit(max_size);
2879                 if (ISP2(max_size)) {
2880                         log2 = log2 - 1;
2881                 }
2882 
2883                 /* Make sure descriptor is at least the minimum size */
2884                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2885 
2886                 /* Calculate actual number of SGL (given WQE size) */
2887                 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP0_HDRS) >> 4;
2888                 break;
2889 
2890         case HERMON_QP_WQ_TYPE_SENDMLX_QP1:
2891                 /*
2892                  * Same as above.  For these WQEs we again have to account for
2893                  * the space consumed by the "inline" packet headers.  (This
2894                  * is larger than for QP0 above because we have to account for
2895                  * the possibility of a GRH in each packet - and this
2896                  * introduces an alignment issue that causes us to consume
2897                  * an additional 8 bytes).
2898                  */
2899                 max_size = (HERMON_QP_WQE_MLX_QP1_HDRS + (num_sgl << 4));
2900                 log2 = highbit(max_size);
2901                 if (ISP2(max_size)) {
2902                         log2 = log2 - 1;
2903                 }
2904 
2905                 /* Make sure descriptor is at least the minimum size */
2906                 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2907 
2908                 /* Calculate actual number of SGL (given WQE size) */
2909                 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP1_HDRS) >> 4;
2910                 break;
2911 
2912         default:
2913                 HERMON_WARNING(state, "unexpected work queue type");
2914                 break;
2915         }
2916 
2917         /* Fill in the return values */
2918         *logwqesz = log2;
2919         *max_sgl  = min(real_max_sgl, actual_sgl);
2920 }