1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * hermon_cq.c
  28  *    Hermon Completion Queue Processing Routines
  29  *
  30  *    Implements all the routines necessary for allocating, freeing, resizing,
  31  *    and handling the completion type events that the Hermon hardware can
  32  *    generate.
  33  */
  34 
  35 #include <sys/types.h>
  36 #include <sys/conf.h>
  37 #include <sys/ddi.h>
  38 #include <sys/sunddi.h>
  39 #include <sys/modctl.h>
  40 #include <sys/bitmap.h>
  41 #include <sys/sysmacros.h>
  42 
  43 #include <sys/ib/adapters/hermon/hermon.h>
  44 
  45 int hermon_should_panic = 0;    /* debugging aid */
  46 
  47 #define hermon_cq_update_ci_doorbell(cq)                                \
  48         /* Build the doorbell record data (low 24 bits only) */         \
  49         HERMON_UAR_DB_RECORD_WRITE(cq->cq_arm_ci_vdbr,                       \
  50             cq->cq_consindx & 0x00FFFFFF)
  51 
  52 static int hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq,
  53     uint_t cmd);
  54 #pragma inline(hermon_cq_arm_doorbell)
  55 static void hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr);
  56 #pragma inline(hermon_arm_cq_dbr_init)
  57 static void hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
  58     hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
  59 static void hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
  60     hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
  61 
  62 
  63 /*
  64  * hermon_cq_alloc()
  65  *    Context: Can be called only from user or kernel context.
  66  */
  67 int
  68 hermon_cq_alloc(hermon_state_t *state, ibt_cq_hdl_t ibt_cqhdl,
  69     ibt_cq_attr_t *cq_attr, uint_t *actual_size, hermon_cqhdl_t *cqhdl,
  70     uint_t sleepflag)
  71 {
  72         hermon_rsrc_t           *cqc, *rsrc;
  73         hermon_umap_db_entry_t  *umapdb;
  74         hermon_hw_cqc_t         cqc_entry;
  75         hermon_cqhdl_t          cq;
  76         ibt_mr_attr_t           mr_attr;
  77         hermon_mr_options_t     op;
  78         hermon_pdhdl_t          pd;
  79         hermon_mrhdl_t          mr;
  80         hermon_hw_cqe_t         *buf;
  81         uint64_t                value;
  82         uint32_t                log_cq_size, uarpg;
  83         uint_t                  cq_is_umap;
  84         uint32_t                status, flag;
  85         hermon_cq_sched_t       *cq_schedp;
  86 
  87         /*
  88          * Determine whether CQ is being allocated for userland access or
  89          * whether it is being allocated for kernel access.  If the CQ is
  90          * being allocated for userland access, then lookup the UAR
  91          * page number for the current process.  Note:  If this is not found
  92          * (e.g. if the process has not previously open()'d the Hermon driver),
  93          * then an error is returned.
  94          */
  95         cq_is_umap = (cq_attr->cq_flags & IBT_CQ_USER_MAP) ? 1 : 0;
  96         if (cq_is_umap) {
  97                 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
  98                     MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
  99                 if (status != DDI_SUCCESS) {
 100                         status = IBT_INVALID_PARAM;
 101                         goto cqalloc_fail;
 102                 }
 103                 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
 104         } else {
 105                 uarpg = state->hs_kernel_uar_index;
 106         }
 107 
 108         /* Use the internal protection domain (PD) for setting up CQs */
 109         pd = state->hs_pdhdl_internal;
 110 
 111         /* Increment the reference count on the protection domain (PD) */
 112         hermon_pd_refcnt_inc(pd);
 113 
 114         /*
 115          * Allocate an CQ context entry.  This will be filled in with all
 116          * the necessary parameters to define the Completion Queue.  And then
 117          * ownership will be passed to the hardware in the final step
 118          * below.  If we fail here, we must undo the protection domain
 119          * reference count.
 120          */
 121         status = hermon_rsrc_alloc(state, HERMON_CQC, 1, sleepflag, &cqc);
 122         if (status != DDI_SUCCESS) {
 123                 status = IBT_INSUFF_RESOURCE;
 124                 goto cqalloc_fail1;
 125         }
 126 
 127         /*
 128          * Allocate the software structure for tracking the completion queue
 129          * (i.e. the Hermon Completion Queue handle).  If we fail here, we must
 130          * undo the protection domain reference count and the previous
 131          * resource allocation.
 132          */
 133         status = hermon_rsrc_alloc(state, HERMON_CQHDL, 1, sleepflag, &rsrc);
 134         if (status != DDI_SUCCESS) {
 135                 status = IBT_INSUFF_RESOURCE;
 136                 goto cqalloc_fail2;
 137         }
 138         cq = (hermon_cqhdl_t)rsrc->hr_addr;
 139         cq->cq_is_umap = cq_is_umap;
 140         cq->cq_cqnum = cqc->hr_indx;      /* just use index, implicit in Hermon */
 141         cq->cq_intmod_count = 0;
 142         cq->cq_intmod_usec = 0;
 143 
 144         /*
 145          * If this will be a user-mappable CQ, then allocate an entry for
 146          * the "userland resources database".  This will later be added to
 147          * the database (after all further CQ operations are successful).
 148          * If we fail here, we must undo the reference counts and the
 149          * previous resource allocation.
 150          */
 151         if (cq->cq_is_umap) {
 152                 umapdb = hermon_umap_db_alloc(state->hs_instance, cq->cq_cqnum,
 153                     MLNX_UMAP_CQMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
 154                 if (umapdb == NULL) {
 155                         status = IBT_INSUFF_RESOURCE;
 156                         goto cqalloc_fail3;
 157                 }
 158         }
 159 
 160 
 161         /*
 162          * Allocate the doorbell record.  We'll need one for the CQ, handling
 163          * both consumer index (SET CI) and the CQ state (CQ ARM).
 164          */
 165 
 166         status = hermon_dbr_alloc(state, uarpg, &cq->cq_arm_ci_dbr_acchdl,
 167             &cq->cq_arm_ci_vdbr, &cq->cq_arm_ci_pdbr, &cq->cq_dbr_mapoffset);
 168         if (status != DDI_SUCCESS) {
 169                 status = IBT_INSUFF_RESOURCE;
 170                 goto cqalloc_fail4;
 171         }
 172 
 173         /*
 174          * Calculate the appropriate size for the completion queue.
 175          * Note:  All Hermon CQs must be a power-of-2 minus 1 in size.  Also
 176          * they may not be any smaller than HERMON_CQ_MIN_SIZE.  This step is
 177          * to round the requested size up to the next highest power-of-2
 178          */
 179         cq_attr->cq_size = max(cq_attr->cq_size, HERMON_CQ_MIN_SIZE);
 180         log_cq_size = highbit(cq_attr->cq_size);
 181 
 182         /*
 183          * Next we verify that the rounded-up size is valid (i.e. consistent
 184          * with the device limits and/or software-configured limits)
 185          */
 186         if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
 187                 status = IBT_HCA_CQ_EXCEEDED;
 188                 goto cqalloc_fail4a;
 189         }
 190 
 191         /*
 192          * Allocate the memory for Completion Queue.
 193          *
 194          * Note: Although we use the common queue allocation routine, we
 195          * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
 196          * kernel system memory) for kernel CQs because it would be
 197          * inefficient to have CQs located in DDR memory.  This is primarily
 198          * because CQs are read from (by software) more than they are written
 199          * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
 200          * user-mappable CQs for a similar reason.)
 201          * It is also worth noting that, unlike Hermon QP work queues,
 202          * completion queues do not have the same strict alignment
 203          * requirements.  It is sufficient for the CQ memory to be both
 204          * aligned to and bound to addresses which are a multiple of CQE size.
 205          */
 206         cq->cq_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
 207 
 208         cq->cq_cqinfo.qa_alloc_align = PAGESIZE;
 209         cq->cq_cqinfo.qa_bind_align  = PAGESIZE;
 210         if (cq->cq_is_umap) {
 211                 cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
 212         } else {
 213                 cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
 214                 hermon_arm_cq_dbr_init(cq->cq_arm_ci_vdbr);
 215         }
 216         status = hermon_queue_alloc(state, &cq->cq_cqinfo, sleepflag);
 217         if (status != DDI_SUCCESS) {
 218                 status = IBT_INSUFF_RESOURCE;
 219                 goto cqalloc_fail4;
 220         }
 221         buf = (hermon_hw_cqe_t *)cq->cq_cqinfo.qa_buf_aligned;
 222 
 223         /*
 224          * The ownership bit of the CQE's is set by the HW during the process
 225          * of transferrring ownership of the CQ (PRM 09.35c, 14.2.1, note D1
 226          *
 227          */
 228 
 229         /*
 230          * Register the memory for the CQ.  The memory for the CQ must
 231          * be registered in the Hermon TPT tables.  This gives us the LKey
 232          * to specify in the CQ context below.  Note: If this is a user-
 233          * mappable CQ, then we will force DDI_DMA_CONSISTENT mapping.
 234          */
 235         flag = (sleepflag == HERMON_SLEEP) ?  IBT_MR_SLEEP : IBT_MR_NOSLEEP;
 236         mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
 237         mr_attr.mr_len   = cq->cq_cqinfo.qa_size;
 238         mr_attr.mr_as    = NULL;
 239         mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
 240         op.mro_bind_type   = state->hs_cfg_profile->cp_iommu_bypass;
 241         op.mro_bind_dmahdl = cq->cq_cqinfo.qa_dmahdl;
 242         op.mro_bind_override_addr = 0;
 243         status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
 244             HERMON_CQ_CMPT);
 245         if (status != DDI_SUCCESS) {
 246                 status = IBT_INSUFF_RESOURCE;
 247                 goto cqalloc_fail5;
 248         }
 249 
 250         cq->cq_erreqnum = HERMON_CQ_ERREQNUM_GET(state);
 251         if (cq_attr->cq_flags & IBT_CQ_HID) {
 252                 if (!HERMON_HID_VALID(state, cq_attr->cq_hid)) {
 253                         IBTF_DPRINTF_L2("CQalloc", "bad handler id 0x%x",
 254                             cq_attr->cq_hid);
 255                         status = IBT_INVALID_PARAM;
 256                         goto cqalloc_fail5;
 257                 }
 258                 cq->cq_eqnum = HERMON_HID_TO_EQNUM(state, cq_attr->cq_hid);
 259                 IBTF_DPRINTF_L2("cqalloc", "hid: eqn %d", cq->cq_eqnum);
 260         } else {
 261                 cq_schedp = (hermon_cq_sched_t *)cq_attr->cq_sched;
 262                 if (cq_schedp == NULL) {
 263                         cq_schedp = &state->hs_cq_sched_default;
 264                 } else if (cq_schedp != &state->hs_cq_sched_default) {
 265                         int i;
 266                         hermon_cq_sched_t *tmp;
 267 
 268                         tmp = state->hs_cq_sched_array;
 269                         for (i = 0; i < state->hs_cq_sched_array_size; i++)
 270                                 if (cq_schedp == &tmp[i])
 271                                         break;  /* found it */
 272                         if (i >= state->hs_cq_sched_array_size) {
 273                                 cmn_err(CE_CONT, "!Invalid cq_sched argument: "
 274                                     "ignored\n");
 275                                 cq_schedp = &state->hs_cq_sched_default;
 276                         }
 277                 }
 278                 cq->cq_eqnum = HERMON_HID_TO_EQNUM(state,
 279                     HERMON_CQSCHED_NEXT_HID(cq_schedp));
 280                 IBTF_DPRINTF_L2("cqalloc", "sched: first-1 %d, len %d, "
 281                     "eqn %d", cq_schedp->cqs_start_hid - 1,
 282                     cq_schedp->cqs_len, cq->cq_eqnum);
 283         }
 284 
 285         /*
 286          * Fill in the CQC entry.  This is the final step before passing
 287          * ownership of the CQC entry to the Hermon hardware.  We use all of
 288          * the information collected/calculated above to fill in the
 289          * requisite portions of the CQC.  Note: If this CQ is going to be
 290          * used for userland access, then we need to set the UAR page number
 291          * appropriately (otherwise it's a "don't care")
 292          */
 293         bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
 294 
 295         cqc_entry.state         = HERMON_CQ_DISARMED;
 296         cqc_entry.pg_offs       = cq->cq_cqinfo.qa_pgoffs >> 5;
 297         cqc_entry.log_cq_sz     = log_cq_size;
 298         cqc_entry.usr_page      = uarpg;
 299         cqc_entry.c_eqn         = cq->cq_eqnum;
 300         cqc_entry.log2_pgsz     = mr->mr_log2_pgsz;
 301         cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
 302         cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
 303         cqc_entry.dbr_addrh = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 32);
 304         cqc_entry.dbr_addrl = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 3);
 305 
 306         /*
 307          * Write the CQC entry to hardware - we pass ownership of
 308          * the entry to the hardware (using the Hermon SW2HW_CQ firmware
 309          * command).  Note: In general, this operation shouldn't fail.  But
 310          * if it does, we have to undo everything we've done above before
 311          * returning error.
 312          */
 313         status = hermon_cmn_ownership_cmd_post(state, SW2HW_CQ, &cqc_entry,
 314             sizeof (hermon_hw_cqc_t), cq->cq_cqnum, sleepflag);
 315         if (status != HERMON_CMD_SUCCESS) {
 316                 cmn_err(CE_CONT, "Hermon: SW2HW_CQ command failed: %08x\n",
 317                     status);
 318                 if (status == HERMON_CMD_INVALID_STATUS) {
 319                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 320                 }
 321                 status = ibc_get_ci_failure(0);
 322                 goto cqalloc_fail6;
 323         }
 324 
 325         /*
 326          * Fill in the rest of the Hermon Completion Queue handle.  Having
 327          * successfully transferred ownership of the CQC, we can update the
 328          * following fields for use in further operations on the CQ.
 329          */
 330         cq->cq_resize_hdl = 0;
 331         cq->cq_cqcrsrcp        = cqc;
 332         cq->cq_rsrcp   = rsrc;
 333         cq->cq_consindx        = 0;
 334                 /* least restrictive */
 335         cq->cq_buf     = buf;
 336         cq->cq_bufsz   = (1 << log_cq_size);
 337         cq->cq_log_cqsz        = log_cq_size;
 338         cq->cq_mrhdl   = mr;
 339         cq->cq_refcnt          = 0;
 340         cq->cq_is_special = 0;
 341         cq->cq_uarpg   = uarpg;
 342         cq->cq_umap_dhp        = (devmap_cookie_t)NULL;
 343         avl_create(&cq->cq_wrid_wqhdr_avl_tree, hermon_wrid_workq_compare,
 344             sizeof (struct hermon_workq_avl_s),
 345             offsetof(struct hermon_workq_avl_s, wqa_link));
 346 
 347         cq->cq_hdlrarg         = (void *)ibt_cqhdl;
 348 
 349         /*
 350          * Put CQ handle in Hermon CQNum-to-CQHdl list.  Then fill in the
 351          * "actual_size" and "cqhdl" and return success
 352          */
 353         hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, cq);
 354 
 355         /*
 356          * If this is a user-mappable CQ, then we need to insert the previously
 357          * allocated entry into the "userland resources database".  This will
 358          * allow for later lookup during devmap() (i.e. mmap()) calls.
 359          */
 360         if (cq->cq_is_umap) {
 361                 hermon_umap_db_add(umapdb);
 362         }
 363 
 364         /*
 365          * Fill in the return arguments (if necessary).  This includes the
 366          * real completion queue size.
 367          */
 368         if (actual_size != NULL) {
 369                 *actual_size = (1 << log_cq_size) - 1;
 370         }
 371         *cqhdl = cq;
 372 
 373         return (DDI_SUCCESS);
 374 
 375 /*
 376  * The following is cleanup for all possible failure cases in this routine
 377  */
 378 cqalloc_fail6:
 379         if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
 380             sleepflag) != DDI_SUCCESS) {
 381                 HERMON_WARNING(state, "failed to deregister CQ memory");
 382         }
 383 cqalloc_fail5:
 384         hermon_queue_free(&cq->cq_cqinfo);
 385 cqalloc_fail4a:
 386         hermon_dbr_free(state, uarpg, cq->cq_arm_ci_vdbr);
 387 cqalloc_fail4:
 388         if (cq_is_umap) {
 389                 hermon_umap_db_free(umapdb);
 390         }
 391 cqalloc_fail3:
 392         hermon_rsrc_free(state, &rsrc);
 393 cqalloc_fail2:
 394         hermon_rsrc_free(state, &cqc);
 395 cqalloc_fail1:
 396         hermon_pd_refcnt_dec(pd);
 397 cqalloc_fail:
 398         return (status);
 399 }
 400 
 401 
 402 /*
 403  * hermon_cq_free()
 404  *    Context: Can be called only from user or kernel context.
 405  */
 406 /* ARGSUSED */
 407 int
 408 hermon_cq_free(hermon_state_t *state, hermon_cqhdl_t *cqhdl, uint_t sleepflag)
 409 {
 410         hermon_rsrc_t           *cqc, *rsrc;
 411         hermon_umap_db_entry_t  *umapdb;
 412         hermon_hw_cqc_t         cqc_entry;
 413         hermon_pdhdl_t          pd;
 414         hermon_mrhdl_t          mr;
 415         hermon_cqhdl_t          cq, resize;
 416         uint32_t                cqnum;
 417         uint64_t                value;
 418         uint_t                  maxprot;
 419         int                     status;
 420 
 421         /*
 422          * Pull all the necessary information from the Hermon Completion Queue
 423          * handle.  This is necessary here because the resource for the
 424          * CQ handle is going to be freed up as part of this operation.
 425          */
 426         cq      = *cqhdl;
 427         mutex_enter(&cq->cq_lock);
 428         cqc     = cq->cq_cqcrsrcp;
 429         rsrc    = cq->cq_rsrcp;
 430         pd      = state->hs_pdhdl_internal;
 431         mr      = cq->cq_mrhdl;
 432         cqnum   = cq->cq_cqnum;
 433 
 434         resize = cq->cq_resize_hdl;          /* save the handle for later */
 435 
 436         /*
 437          * If there are work queues still associated with the CQ, then return
 438          * an error.  Otherwise, we will be holding the CQ lock.
 439          */
 440         if (cq->cq_refcnt != 0) {
 441                 mutex_exit(&cq->cq_lock);
 442                 return (IBT_CQ_BUSY);
 443         }
 444 
 445         /*
 446          * If this was a user-mappable CQ, then we need to remove its entry
 447          * from the "userland resources database".  If it is also currently
 448          * mmap()'d out to a user process, then we need to call
 449          * devmap_devmem_remap() to remap the CQ memory to an invalid mapping.
 450          * We also need to invalidate the CQ tracking information for the
 451          * user mapping.
 452          */
 453         if (cq->cq_is_umap) {
 454                 status = hermon_umap_db_find(state->hs_instance, cqnum,
 455                     MLNX_UMAP_CQMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
 456                     &umapdb);
 457                 if (status != DDI_SUCCESS) {
 458                         mutex_exit(&cq->cq_lock);
 459                         HERMON_WARNING(state, "failed to find in database");
 460                         return (ibc_get_ci_failure(0));
 461                 }
 462                 hermon_umap_db_free(umapdb);
 463                 if (cq->cq_umap_dhp != NULL) {
 464                         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 465                         status = devmap_devmem_remap(cq->cq_umap_dhp,
 466                             state->hs_dip, 0, 0, cq->cq_cqinfo.qa_size,
 467                             maxprot, DEVMAP_MAPPING_INVALID, NULL);
 468                         if (status != DDI_SUCCESS) {
 469                                 mutex_exit(&cq->cq_lock);
 470                                 HERMON_WARNING(state, "failed in CQ memory "
 471                                     "devmap_devmem_remap()");
 472                                 return (ibc_get_ci_failure(0));
 473                         }
 474                         cq->cq_umap_dhp = (devmap_cookie_t)NULL;
 475                 }
 476         }
 477 
 478         /*
 479          * Put NULL into the Arbel CQNum-to-CQHdl list.  This will allow any
 480          * in-progress events to detect that the CQ corresponding to this
 481          * number has been freed.
 482          */
 483         hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, NULL);
 484 
 485         mutex_exit(&cq->cq_lock);
 486 
 487         /*
 488          * Reclaim CQC entry from hardware (using the Hermon HW2SW_CQ
 489          * firmware command).  If the ownership transfer fails for any reason,
 490          * then it is an indication that something (either in HW or SW) has
 491          * gone seriously wrong.
 492          */
 493         status = hermon_cmn_ownership_cmd_post(state, HW2SW_CQ, &cqc_entry,
 494             sizeof (hermon_hw_cqc_t), cqnum, sleepflag);
 495         if (status != HERMON_CMD_SUCCESS) {
 496                 HERMON_WARNING(state, "failed to reclaim CQC ownership");
 497                 cmn_err(CE_CONT, "Hermon: HW2SW_CQ command failed: %08x\n",
 498                     status);
 499                 if (status == HERMON_CMD_INVALID_STATUS) {
 500                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 501                 }
 502                 return (ibc_get_ci_failure(0));
 503         }
 504 
 505         /*
 506          * From here on, we start reliquishing resources - but check to see
 507          * if a resize was in progress - if so, we need to relinquish those
 508          * resources as well
 509          */
 510 
 511 
 512         /*
 513          * Deregister the memory for the Completion Queue.  If this fails
 514          * for any reason, then it is an indication that something (either
 515          * in HW or SW) has gone seriously wrong.  So we print a warning
 516          * message and return.
 517          */
 518         status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
 519             sleepflag);
 520         if (status != DDI_SUCCESS) {
 521                 HERMON_WARNING(state, "failed to deregister CQ memory");
 522                 return (ibc_get_ci_failure(0));
 523         }
 524 
 525         if (resize)     {       /* there was a pointer to a handle */
 526                 mr = resize->cq_mrhdl;       /* reuse the pointer to the region */
 527                 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
 528                     sleepflag);
 529                 if (status != DDI_SUCCESS) {
 530                         HERMON_WARNING(state, "failed to deregister resize CQ "
 531                             "memory");
 532                         return (ibc_get_ci_failure(0));
 533                 }
 534         }
 535 
 536         /* Free the memory for the CQ */
 537         hermon_queue_free(&cq->cq_cqinfo);
 538         if (resize)     {
 539                 hermon_queue_free(&resize->cq_cqinfo);
 540                 /* and the temporary handle */
 541                 kmem_free(resize, sizeof (struct hermon_sw_cq_s));
 542         }
 543 
 544         /* everything else does not matter for the resize in progress */
 545 
 546         /* Free the dbr */
 547         hermon_dbr_free(state, cq->cq_uarpg, cq->cq_arm_ci_vdbr);
 548 
 549         /* Free the Hermon Completion Queue handle */
 550         hermon_rsrc_free(state, &rsrc);
 551 
 552         /* Free up the CQC entry resource */
 553         hermon_rsrc_free(state, &cqc);
 554 
 555         /* Decrement the reference count on the protection domain (PD) */
 556         hermon_pd_refcnt_dec(pd);
 557 
 558         /* Set the cqhdl pointer to NULL and return success */
 559         *cqhdl = NULL;
 560 
 561         return (DDI_SUCCESS);
 562 }
 563 
 564 
 565 /*
 566  * hermon_cq_resize()
 567  *    Context: Can be called only from user or kernel context.
 568  */
 569 int
 570 hermon_cq_resize(hermon_state_t *state, hermon_cqhdl_t cq, uint_t req_size,
 571     uint_t *actual_size, uint_t sleepflag)
 572 {
 573         hermon_hw_cqc_t         cqc_entry;
 574         hermon_cqhdl_t          resize_hdl;
 575         hermon_qalloc_info_t    new_cqinfo;
 576         ibt_mr_attr_t           mr_attr;
 577         hermon_mr_options_t     op;
 578         hermon_pdhdl_t          pd;
 579         hermon_mrhdl_t          mr;
 580         hermon_hw_cqe_t         *buf;
 581         uint32_t                new_prod_indx;
 582         uint_t                  log_cq_size;
 583         int                     status, flag;
 584 
 585         if (cq->cq_resize_hdl != 0) {        /* already in process */
 586                 status = IBT_CQ_BUSY;
 587                 goto cqresize_fail;
 588         }
 589 
 590 
 591         /* Use the internal protection domain (PD) for CQs */
 592         pd = state->hs_pdhdl_internal;
 593 
 594         /*
 595          * Calculate the appropriate size for the new resized completion queue.
 596          * Note:  All Hermon CQs must be a power-of-2 minus 1 in size.  Also
 597          * they may not be any smaller than HERMON_CQ_MIN_SIZE.  This step is
 598          * to round the requested size up to the next highest power-of-2
 599          */
 600         req_size = max(req_size, HERMON_CQ_MIN_SIZE);
 601         log_cq_size = highbit(req_size);
 602 
 603         /*
 604          * Next we verify that the rounded-up size is valid (i.e. consistent
 605          * with the device limits and/or software-configured limits)
 606          */
 607         if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
 608                 status = IBT_HCA_CQ_EXCEEDED;
 609                 goto cqresize_fail;
 610         }
 611 
 612         /*
 613          * Allocate the memory for newly resized Completion Queue.
 614          *
 615          * Note: Although we use the common queue allocation routine, we
 616          * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
 617          * kernel system memory) for kernel CQs because it would be
 618          * inefficient to have CQs located in DDR memory.  This is the same
 619          * as we do when we first allocate completion queues primarily
 620          * because CQs are read from (by software) more than they are written
 621          * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
 622          * user-mappable CQs for a similar reason.)
 623          * It is also worth noting that, unlike Hermon QP work queues,
 624          * completion queues do not have the same strict alignment
 625          * requirements.  It is sufficient for the CQ memory to be both
 626          * aligned to and bound to addresses which are a multiple of CQE size.
 627          */
 628 
 629         /* first, alloc the resize_handle */
 630         resize_hdl = kmem_zalloc(sizeof (struct hermon_sw_cq_s), KM_SLEEP);
 631 
 632         new_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
 633         new_cqinfo.qa_alloc_align = PAGESIZE;
 634         new_cqinfo.qa_bind_align  = PAGESIZE;
 635         if (cq->cq_is_umap) {
 636                 new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
 637         } else {
 638                 new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
 639         }
 640         status = hermon_queue_alloc(state, &new_cqinfo, sleepflag);
 641         if (status != DDI_SUCCESS) {
 642                 /* free the resize handle */
 643                 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
 644                 status = IBT_INSUFF_RESOURCE;
 645                 goto cqresize_fail;
 646         }
 647         buf = (hermon_hw_cqe_t *)new_cqinfo.qa_buf_aligned;
 648 
 649         /*
 650          * No initialization of the cq is needed - the command will do it
 651          */
 652 
 653         /*
 654          * Register the memory for the CQ.  The memory for the CQ must
 655          * be registered in the Hermon TPT tables.  This gives us the LKey
 656          * to specify in the CQ context below.
 657          */
 658         flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
 659         mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
 660         mr_attr.mr_len   = new_cqinfo.qa_size;
 661         mr_attr.mr_as    = NULL;
 662         mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
 663         op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
 664         op.mro_bind_dmahdl = new_cqinfo.qa_dmahdl;
 665         op.mro_bind_override_addr = 0;
 666         status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
 667             HERMON_CQ_CMPT);
 668         if (status != DDI_SUCCESS) {
 669                 hermon_queue_free(&new_cqinfo);
 670                 /* free the resize handle */
 671                 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
 672                 status = IBT_INSUFF_RESOURCE;
 673                 goto cqresize_fail;
 674         }
 675 
 676         /*
 677          * Now we grab the CQ lock.  Since we will be updating the actual
 678          * CQ location and the producer/consumer indexes, we should hold
 679          * the lock.
 680          *
 681          * We do a ARBEL_NOSLEEP here (and below), though, because we are
 682          * holding the "cq_lock" and if we got raised to interrupt level
 683          * by priority inversion, we would not want to block in this routine
 684          * waiting for success.
 685          */
 686         mutex_enter(&cq->cq_lock);
 687 
 688         /*
 689          * Fill in the CQC entry.  For the resize operation this is the
 690          * final step before attempting the resize operation on the CQC entry.
 691          * We use all of the information collected/calculated above to fill
 692          * in the requisite portions of the CQC.
 693          */
 694         bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
 695         cqc_entry.log_cq_sz     = log_cq_size;
 696         cqc_entry.pg_offs       = new_cqinfo.qa_pgoffs >> 5;
 697         cqc_entry.log2_pgsz     = mr->mr_log2_pgsz;
 698         cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
 699         cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
 700 
 701         /*
 702          * Write the CQC entry to hardware.  Lastly, we pass ownership of
 703          * the entry to the hardware (using the Hermon RESIZE_CQ firmware
 704          * command).  Note: In general, this operation shouldn't fail.  But
 705          * if it does, we have to undo everything we've done above before
 706          * returning error.  Also note that the status returned may indicate
 707          * the code to return to the IBTF.
 708          */
 709         status = hermon_resize_cq_cmd_post(state, &cqc_entry, cq->cq_cqnum,
 710             &new_prod_indx, HERMON_CMD_NOSLEEP_SPIN);
 711         if (status != HERMON_CMD_SUCCESS) {
 712                 /* Resize attempt has failed, drop CQ lock and cleanup */
 713                 mutex_exit(&cq->cq_lock);
 714                 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
 715                     sleepflag) != DDI_SUCCESS) {
 716                         HERMON_WARNING(state, "failed to deregister CQ memory");
 717                 }
 718                 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
 719                 hermon_queue_free(&new_cqinfo);
 720                 if (status == HERMON_CMD_BAD_SIZE) {
 721                         return (IBT_CQ_SZ_INSUFFICIENT);
 722                 } else {
 723                         cmn_err(CE_CONT, "Hermon: RESIZE_CQ command failed: "
 724                             "%08x\n", status);
 725                         if (status == HERMON_CMD_INVALID_STATUS) {
 726                                 hermon_fm_ereport(state, HCA_SYS_ERR,
 727                                     HCA_ERR_SRV_LOST);
 728                         }
 729                         return (ibc_get_ci_failure(0));
 730                 }
 731         }
 732 
 733         /*
 734          * For Hermon, we've alloc'd another handle structure and save off the
 735          * important things in it. Then, in polling we check to see if there's
 736          * a "resizing handle" and if so we look for the "special CQE", opcode
 737          * 0x16, that indicates the transition to the new buffer.
 738          *
 739          * At that point, we'll adjust everything - including dereg and
 740          * freeing of the original buffer, updating all the necessary fields
 741          * in the cq_hdl, and setting up for the next cqe polling
 742          */
 743 
 744         resize_hdl->cq_buf   = buf;
 745         resize_hdl->cq_bufsz = (1 << log_cq_size);
 746         resize_hdl->cq_mrhdl = mr;
 747         resize_hdl->cq_log_cqsz = log_cq_size;
 748 
 749         bcopy(&new_cqinfo, &(resize_hdl->cq_cqinfo),
 750             sizeof (struct hermon_qalloc_info_s));
 751 
 752         /* now, save the address in the cq_handle */
 753         cq->cq_resize_hdl = resize_hdl;
 754 
 755         /*
 756          * Drop the CQ lock now.
 757          */
 758 
 759         mutex_exit(&cq->cq_lock);
 760         /*
 761          * Fill in the return arguments (if necessary).  This includes the
 762          * real new completion queue size.
 763          */
 764         if (actual_size != NULL) {
 765                 *actual_size = (1 << log_cq_size) - 1;
 766         }
 767 
 768         return (DDI_SUCCESS);
 769 
 770 cqresize_fail:
 771         return (status);
 772 }
 773 
 774 
 775 /*
 776  * hermon_cq_modify()
 777  *    Context: Can be called base context.
 778  */
 779 /* ARGSUSED */
 780 int
 781 hermon_cq_modify(hermon_state_t *state, hermon_cqhdl_t cq,
 782     uint_t count, uint_t usec, ibt_cq_handler_id_t hid, uint_t sleepflag)
 783 {
 784         int     status;
 785         hermon_hw_cqc_t         cqc_entry;
 786 
 787         mutex_enter(&cq->cq_lock);
 788         if (count != cq->cq_intmod_count ||
 789             usec != cq->cq_intmod_usec) {
 790                 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
 791                 cqc_entry.cq_max_cnt = count;
 792                 cqc_entry.cq_period = usec;
 793                 status = hermon_modify_cq_cmd_post(state, &cqc_entry,
 794                     cq->cq_cqnum, MODIFY_MODERATION_CQ, sleepflag);
 795                 if (status != HERMON_CMD_SUCCESS) {
 796                         mutex_exit(&cq->cq_lock);
 797                         cmn_err(CE_CONT, "Hermon: MODIFY_MODERATION_CQ "
 798                             "command failed: %08x\n", status);
 799                         if (status == HERMON_CMD_INVALID_STATUS) {
 800                                 hermon_fm_ereport(state, HCA_SYS_ERR,
 801                                     HCA_ERR_SRV_LOST);
 802                         }
 803                         return (ibc_get_ci_failure(0));
 804                 }
 805                 cq->cq_intmod_count = count;
 806                 cq->cq_intmod_usec = usec;
 807         }
 808         if (hid && (hid - 1 != cq->cq_eqnum)) {
 809                 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
 810                 cqc_entry.c_eqn = HERMON_HID_TO_EQNUM(state, hid);
 811                 status = hermon_modify_cq_cmd_post(state, &cqc_entry,
 812                     cq->cq_cqnum, MODIFY_EQN, sleepflag);
 813                 if (status != HERMON_CMD_SUCCESS) {
 814                         mutex_exit(&cq->cq_lock);
 815                         cmn_err(CE_CONT, "Hermon: MODIFY_EQN command failed: "
 816                             "%08x\n", status);
 817                         if (status == HERMON_CMD_INVALID_STATUS) {
 818                                 hermon_fm_ereport(state, HCA_SYS_ERR,
 819                                     HCA_ERR_SRV_LOST);
 820                         }
 821                         return (ibc_get_ci_failure(0));
 822                 }
 823                 cq->cq_eqnum = hid - 1;
 824         }
 825         mutex_exit(&cq->cq_lock);
 826         return (DDI_SUCCESS);
 827 }
 828 
 829 /*
 830  * hermon_cq_notify()
 831  *    Context: Can be called from interrupt or base context.
 832  */
 833 int
 834 hermon_cq_notify(hermon_state_t *state, hermon_cqhdl_t cq,
 835     ibt_cq_notify_flags_t flags)
 836 {
 837         uint_t  cmd;
 838         ibt_status_t status;
 839 
 840         /* Validate IBT flags and call doorbell routine. */
 841         if (flags == IBT_NEXT_COMPLETION) {
 842                 cmd = HERMON_CQDB_NOTIFY_CQ;
 843         } else if (flags == IBT_NEXT_SOLICITED) {
 844                 cmd = HERMON_CQDB_NOTIFY_CQ_SOLICIT;
 845         } else {
 846                 return (IBT_CQ_NOTIFY_TYPE_INVALID);
 847         }
 848 
 849         status = hermon_cq_arm_doorbell(state, cq, cmd);
 850         return (status);
 851 }
 852 
 853 
 854 /*
 855  * hermon_cq_poll()
 856  *    Context: Can be called from interrupt or base context.
 857  */
 858 int
 859 hermon_cq_poll(hermon_state_t *state, hermon_cqhdl_t cq, ibt_wc_t *wc_p,
 860     uint_t num_wc, uint_t *num_polled)
 861 {
 862         hermon_hw_cqe_t *cqe;
 863         uint_t          opcode;
 864         uint32_t        cons_indx, wrap_around_mask, shift, mask;
 865         uint32_t        polled_cnt, spec_op = 0;
 866         int             status;
 867 
 868         /*
 869          * Check for user-mappable CQ memory.  Note:  We do not allow kernel
 870          * clients to poll CQ memory that is accessible directly by the user.
 871          * If the CQ memory is user accessible, then return an error.
 872          */
 873         if (cq->cq_is_umap) {
 874                 return (IBT_CQ_HDL_INVALID);
 875         }
 876 
 877         mutex_enter(&cq->cq_lock);
 878 
 879         /* Get the consumer index */
 880         cons_indx = cq->cq_consindx;
 881         shift = cq->cq_log_cqsz;
 882         mask = cq->cq_bufsz;
 883 
 884         /*
 885          * Calculate the wrap around mask.  Note: This operation only works
 886          * because all Hermon completion queues have power-of-2 sizes
 887          */
 888         wrap_around_mask = (cq->cq_bufsz - 1);
 889 
 890         /* Calculate the pointer to the first CQ entry */
 891         cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
 892 
 893         /*
 894          * Keep pulling entries from the CQ until we find an entry owned by
 895          * the hardware.  As long as there the CQE's owned by SW, process
 896          * each entry by calling hermon_cq_cqe_consume() and updating the CQ
 897          * consumer index.  Note:  We only update the consumer index if
 898          * hermon_cq_cqe_consume() returns HERMON_CQ_SYNC_AND_DB.  Otherwise,
 899          * it indicates that we are going to "recycle" the CQE (probably
 900          * because it is a error CQE and corresponds to more than one
 901          * completion).
 902          */
 903         polled_cnt = 0;
 904         while (HERMON_CQE_OWNER_IS_SW(cq, cqe, cons_indx, shift, mask)) {
 905                 if (cq->cq_resize_hdl != 0) {        /* in midst of resize */
 906                         /* peek at the opcode */
 907                         opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
 908                         if (opcode == HERMON_CQE_RCV_RESIZE_CODE) {
 909                                 hermon_cq_resize_helper(state, cq);
 910 
 911                                 /* Increment the consumer index */
 912                                 cons_indx = (cons_indx + 1);
 913                                 spec_op = 1; /* plus one for the limiting CQE */
 914 
 915                                 wrap_around_mask = (cq->cq_bufsz - 1);
 916 
 917                                 /* Update the pointer to the next CQ entry */
 918                                 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
 919 
 920                                 continue;
 921                         }
 922                 }       /* in resizing CQ */
 923 
 924                 /*
 925                  * either resizing and not the special opcode, or
 926                  * not resizing at all
 927                  */
 928                 hermon_cq_cqe_consume(state, cq, cqe, &wc_p[polled_cnt++]);
 929 
 930                 /* Increment the consumer index */
 931                 cons_indx = (cons_indx + 1);
 932 
 933                 /* Update the pointer to the next CQ entry */
 934                 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
 935 
 936                 /*
 937                  * If we have run out of space to store work completions,
 938                  * then stop and return the ones we have pulled of the CQ.
 939                  */
 940                 if (polled_cnt >= num_wc) {
 941                         break;
 942                 }
 943         }
 944 
 945         /*
 946          * Now we only ring the doorbell (to update the consumer index) if
 947          * we've actually consumed a CQ entry.
 948          */
 949         if ((polled_cnt != 0) && (cq->cq_consindx != cons_indx)) {
 950                 /*
 951                  * Update the consumer index in both the CQ handle and the
 952                  * doorbell record.
 953                  */
 954                 cq->cq_consindx = cons_indx;
 955                 hermon_cq_update_ci_doorbell(cq);
 956 
 957         } else if (polled_cnt == 0) {
 958                 if (spec_op != 0) {
 959                         /* if we got the special opcode, update the consindx */
 960                         cq->cq_consindx = cons_indx;
 961                         hermon_cq_update_ci_doorbell(cq);
 962                 }
 963         }
 964 
 965         mutex_exit(&cq->cq_lock);
 966 
 967         /* Set "num_polled" (if necessary) */
 968         if (num_polled != NULL) {
 969                 *num_polled = polled_cnt;
 970         }
 971 
 972         /* Set CQ_EMPTY condition if needed, otherwise return success */
 973         if (polled_cnt == 0) {
 974                 status = IBT_CQ_EMPTY;
 975         } else {
 976                 status = DDI_SUCCESS;
 977         }
 978 
 979         /*
 980          * Check if the system is currently panicking.  If it is, then call
 981          * the Hermon interrupt service routine.  This step is necessary here
 982          * because we might be in a polled I/O mode and without the call to
 983          * hermon_isr() - and its subsequent calls to poll and rearm each
 984          * event queue - we might overflow our EQs and render the system
 985          * unable to sync/dump.
 986          */
 987         if (ddi_in_panic() != 0) {
 988                 (void) hermon_isr((caddr_t)state, (caddr_t)NULL);
 989         }
 990         return (status);
 991 }
 992 
 993 /*
 994  *      cmd_sn must be initialized to 1 to enable proper reenabling
 995  *      by hermon_arm_cq_dbr_update().
 996  */
 997 static void
 998 hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr)
 999 {
1000         uint32_t *target;
1001 
1002         target = (uint32_t *)cq_arm_dbr + 1;
1003         *target = htonl(1 << HERMON_CQDB_CMDSN_SHIFT);
1004 }
1005 
1006 
1007 /*
1008  *      User cmd_sn needs help from this kernel function to know
1009  *      when it should be incremented (modulo 4).  We do an atomic
1010  *      update of the arm_cq dbr to communicate this fact.  We retry
1011  *      in the case that user library is racing with us.  We zero
1012  *      out the cmd field so that the user library can use the cmd
1013  *      field to track the last command it issued (solicited verses any).
1014  */
1015 static void
1016 hermon_arm_cq_dbr_update(hermon_dbr_t *cq_arm_dbr)
1017 {
1018         uint32_t tmp, cmp, new;
1019         uint32_t old_cmd_sn, new_cmd_sn;
1020         uint32_t *target;
1021         int retries = 0;
1022 
1023         target = (uint32_t *)cq_arm_dbr + 1;
1024 retry:
1025         cmp = *target;
1026         tmp = htonl(cmp);
1027         old_cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1028         new_cmd_sn = (old_cmd_sn + (0x1 << HERMON_CQDB_CMDSN_SHIFT)) &
1029             (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1030         new = htonl((tmp & ~(0x37 << HERMON_CQDB_CMD_SHIFT)) | new_cmd_sn);
1031         tmp = atomic_cas_32(target, cmp, new);
1032         if (tmp != cmp) {       /* cas failed, so need to retry */
1033                 drv_usecwait(retries & 0xff);   /* avoid race */
1034                 if (++retries > 100000) {
1035                         cmn_err(CE_CONT, "cas failed in hermon\n");
1036                         retries = 0;
1037                 }
1038                 goto retry;
1039         }
1040 }
1041 
1042 
1043 /*
1044  * hermon_cq_handler()
1045  *    Context: Only called from interrupt context
1046  */
1047 /* ARGSUSED */
1048 int
1049 hermon_cq_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1050     hermon_hw_eqe_t *eqe)
1051 {
1052         hermon_cqhdl_t          cq;
1053         uint_t                  cqnum;
1054 
1055         /* Get the CQ handle from CQ number in event descriptor */
1056         cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1057         cq = hermon_cqhdl_from_cqnum(state, cqnum);
1058 
1059         /*
1060          * If the CQ handle is NULL, this is probably an indication
1061          * that the CQ has been freed already.  In which case, we
1062          * should not deliver this event.
1063          *
1064          * We also check that the CQ number in the handle is the
1065          * same as the CQ number in the event queue entry.  This
1066          * extra check allows us to handle the case where a CQ was
1067          * freed and then allocated again in the time it took to
1068          * handle the event queue processing.  By constantly incrementing
1069          * the non-constrained portion of the CQ number every time
1070          * a new CQ is allocated, we mitigate (somewhat) the chance
1071          * that a stale event could be passed to the client's CQ
1072          * handler.
1073          *
1074          * Lastly, we check if "hs_ibtfpriv" is NULL.  If it is then it
1075          * means that we've have either received this event before we
1076          * finished attaching to the IBTF or we've received it while we
1077          * are in the process of detaching.
1078          */
1079         if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1080             (state->hs_ibtfpriv != NULL)) {
1081                 hermon_arm_cq_dbr_update(cq->cq_arm_ci_vdbr);
1082                 HERMON_DO_IBTF_CQ_CALLB(state, cq);
1083         }
1084 
1085         return (DDI_SUCCESS);
1086 }
1087 
1088 
1089 /*
1090  * hermon_cq_err_handler()
1091  *    Context: Only called from interrupt context
1092  */
1093 /* ARGSUSED */
1094 int
1095 hermon_cq_err_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1096     hermon_hw_eqe_t *eqe)
1097 {
1098         hermon_cqhdl_t          cq;
1099         uint_t                  cqnum;
1100         ibc_async_event_t       event;
1101         ibt_async_code_t        type;
1102 
1103         HERMON_FMANOTE(state, HERMON_FMA_OVERRUN);
1104         /* Get the CQ handle from CQ number in event descriptor */
1105         cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1106         cq = hermon_cqhdl_from_cqnum(state, cqnum);
1107 
1108         /*
1109          * If the CQ handle is NULL, this is probably an indication
1110          * that the CQ has been freed already.  In which case, we
1111          * should not deliver this event.
1112          *
1113          * We also check that the CQ number in the handle is the
1114          * same as the CQ number in the event queue entry.  This
1115          * extra check allows us to handle the case where a CQ was
1116          * freed and then allocated again in the time it took to
1117          * handle the event queue processing.  By constantly incrementing
1118          * the non-constrained portion of the CQ number every time
1119          * a new CQ is allocated, we mitigate (somewhat) the chance
1120          * that a stale event could be passed to the client's CQ
1121          * handler.
1122          *
1123          * And then we check if "hs_ibtfpriv" is NULL.  If it is then it
1124          * means that we've have either received this event before we
1125          * finished attaching to the IBTF or we've received it while we
1126          * are in the process of detaching.
1127          */
1128         if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1129             (state->hs_ibtfpriv != NULL)) {
1130                 event.ev_cq_hdl = (ibt_cq_hdl_t)cq->cq_hdlrarg;
1131                 type            = IBT_ERROR_CQ;
1132                 HERMON_DO_IBTF_ASYNC_CALLB(state, type, &event);
1133         }
1134 
1135         return (DDI_SUCCESS);
1136 }
1137 
1138 
1139 /*
1140  * hermon_cq_refcnt_inc()
1141  *    Context: Can be called from interrupt or base context.
1142  */
1143 int
1144 hermon_cq_refcnt_inc(hermon_cqhdl_t cq, uint_t is_special)
1145 {
1146         /*
1147          * Increment the completion queue's reference count.  Note: In order
1148          * to ensure compliance with IBA C11-15, we must ensure that a given
1149          * CQ is not used for both special (SMI/GSI) QP and non-special QP.
1150          * This is accomplished here by keeping track of how the referenced
1151          * CQ is being used.
1152          */
1153         mutex_enter(&cq->cq_lock);
1154         if (cq->cq_refcnt == 0) {
1155                 cq->cq_is_special = is_special;
1156         } else {
1157                 if (cq->cq_is_special != is_special) {
1158                         mutex_exit(&cq->cq_lock);
1159                         return (DDI_FAILURE);
1160                 }
1161         }
1162         cq->cq_refcnt++;
1163         mutex_exit(&cq->cq_lock);
1164         return (DDI_SUCCESS);
1165 }
1166 
1167 
1168 /*
1169  * hermon_cq_refcnt_dec()
1170  *    Context: Can be called from interrupt or base context.
1171  */
1172 void
1173 hermon_cq_refcnt_dec(hermon_cqhdl_t cq)
1174 {
1175         /* Decrement the completion queue's reference count */
1176         mutex_enter(&cq->cq_lock);
1177         cq->cq_refcnt--;
1178         mutex_exit(&cq->cq_lock);
1179 }
1180 
1181 
1182 /*
1183  * hermon_cq_arm_doorbell()
1184  *    Context: Can be called from interrupt or base context.
1185  */
1186 static int
1187 hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq, uint_t cq_cmd)
1188 {
1189         uint32_t        cq_num;
1190         uint32_t        *target;
1191         uint32_t        old_cmd, cmp, new, tmp, cmd_sn;
1192         ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
1193 
1194         /* initialize the FMA retry loop */
1195         hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
1196 
1197         cq_num = cq->cq_cqnum;
1198         target = (uint32_t *)cq->cq_arm_ci_vdbr + 1;
1199 
1200         /* the FMA retry loop starts for Hermon doorbell register. */
1201         hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1202             fm_test_num);
1203 retry:
1204         cmp = *target;
1205         tmp = htonl(cmp);
1206         old_cmd = tmp & (0x7 << HERMON_CQDB_CMD_SHIFT);
1207         cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1208         if (cq_cmd == HERMON_CQDB_NOTIFY_CQ) {
1209                 if (old_cmd != HERMON_CQDB_NOTIFY_CQ) {
1210                         cmd_sn |= (HERMON_CQDB_NOTIFY_CQ <<
1211                             HERMON_CQDB_CMD_SHIFT);
1212                         new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1213                         tmp = atomic_cas_32(target, cmp, new);
1214                         if (tmp != cmp)
1215                                 goto retry;
1216                         HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1217                             &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1218                             32) | (cq->cq_consindx & 0xFFFFFF));
1219                 } /* else it's already armed */
1220         } else {
1221                 ASSERT(cq_cmd == HERMON_CQDB_NOTIFY_CQ_SOLICIT);
1222                 if (old_cmd != HERMON_CQDB_NOTIFY_CQ &&
1223                     old_cmd != HERMON_CQDB_NOTIFY_CQ_SOLICIT) {
1224                         cmd_sn |= (HERMON_CQDB_NOTIFY_CQ_SOLICIT <<
1225                             HERMON_CQDB_CMD_SHIFT);
1226                         new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1227                         tmp = atomic_cas_32(target, cmp, new);
1228                         if (tmp != cmp)
1229                                 goto retry;
1230                         HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1231                             &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1232                             32) | (cq->cq_consindx & 0xFFFFFF));
1233                 } /* else it's already armed */
1234         }
1235 
1236         /* the FMA retry loop ends. */
1237         hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1238             fm_test_num);
1239 
1240         return (IBT_SUCCESS);
1241 
1242 pio_error:
1243         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1244         return (ibc_get_ci_failure(0));
1245 }
1246 
1247 
1248 /*
1249  * hermon_cqhdl_from_cqnum()
1250  *    Context: Can be called from interrupt or base context.
1251  *
1252  *    This routine is important because changing the unconstrained
1253  *    portion of the CQ number is critical to the detection of a
1254  *    potential race condition in the CQ handler code (i.e. the case
1255  *    where a CQ is freed and alloc'd again before an event for the
1256  *    "old" CQ can be handled).
1257  *
1258  *    While this is not a perfect solution (not sure that one exists)
1259  *    it does help to mitigate the chance that this race condition will
1260  *    cause us to deliver a "stale" event to the new CQ owner.  Note:
1261  *    this solution does not scale well because the number of constrained
1262  *    bits increases (and, hence, the number of unconstrained bits
1263  *    decreases) as the number of supported CQs grows.  For small and
1264  *    intermediate values, it should hopefully provide sufficient
1265  *    protection.
1266  */
1267 hermon_cqhdl_t
1268 hermon_cqhdl_from_cqnum(hermon_state_t *state, uint_t cqnum)
1269 {
1270         uint_t  cqindx, cqmask;
1271 
1272         /* Calculate the CQ table index from the cqnum */
1273         cqmask = (1 << state->hs_cfg_profile->cp_log_num_cq) - 1;
1274         cqindx = cqnum & cqmask;
1275         return (hermon_icm_num_to_hdl(state, HERMON_CQC, cqindx));
1276 }
1277 
1278 /*
1279  * hermon_cq_cqe_consume()
1280  *    Context: Can be called from interrupt or base context.
1281  */
1282 static void
1283 hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1284     hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1285 {
1286         uint_t          opcode, qpnum, qp1_indx;
1287         ibt_wc_flags_t  flags;
1288         ibt_wrc_opcode_t type;
1289 
1290         /*
1291          * Determine if this is an "error" CQE by examining "opcode".  If it
1292          * is an error CQE, then call hermon_cq_errcqe_consume() and return
1293          * whatever status it returns.  Otherwise, this is a successful
1294          * completion.
1295          */
1296         opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
1297         if ((opcode == HERMON_CQE_SEND_ERR_OPCODE) ||
1298             (opcode == HERMON_CQE_RECV_ERR_OPCODE)) {
1299                 hermon_cq_errcqe_consume(state, cq, cqe, wc);
1300                 return;
1301         }
1302 
1303         /*
1304          * Fetch the Work Request ID using the information in the CQE.
1305          * See hermon_wr.c for more details.
1306          */
1307         wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1308 
1309         /*
1310          * Parse the CQE opcode to determine completion type.  This will set
1311          * not only the type of the completion, but also any flags that might
1312          * be associated with it (e.g. whether immediate data is present).
1313          */
1314         flags = IBT_WC_NO_FLAGS;
1315         if (HERMON_CQE_SENDRECV_GET(cq, cqe) != HERMON_COMPLETION_RECV) {
1316 
1317                 /* Send CQE */
1318                 switch (opcode) {
1319                 case HERMON_CQE_SND_RDMAWR_IMM:
1320                 case HERMON_CQE_SND_RDMAWR:
1321                         type = IBT_WRC_RDMAW;
1322                         break;
1323 
1324                 case HERMON_CQE_SND_SEND_INV:
1325                 case HERMON_CQE_SND_SEND_IMM:
1326                 case HERMON_CQE_SND_SEND:
1327                         type = IBT_WRC_SEND;
1328                         break;
1329 
1330                 case HERMON_CQE_SND_LSO:
1331                         type = IBT_WRC_SEND_LSO;
1332                         break;
1333 
1334                 case HERMON_CQE_SND_RDMARD:
1335                         type = IBT_WRC_RDMAR;
1336                         break;
1337 
1338                 case HERMON_CQE_SND_ATOMIC_CS:
1339                         type = IBT_WRC_CSWAP;
1340                         break;
1341 
1342                 case HERMON_CQE_SND_ATOMIC_FA:
1343                         type = IBT_WRC_FADD;
1344                         break;
1345 
1346                 case HERMON_CQE_SND_BIND_MW:
1347                         type = IBT_WRC_BIND;
1348                         break;
1349 
1350                 case HERMON_CQE_SND_FRWR:
1351                         type = IBT_WRC_FAST_REG_PMR;
1352                         break;
1353 
1354                 case HERMON_CQE_SND_LCL_INV:
1355                         type = IBT_WRC_LOCAL_INVALIDATE;
1356                         break;
1357 
1358                 default:
1359                         HERMON_WARNING(state, "unknown send CQE type");
1360                         wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1361                         return;
1362                 }
1363         } else if ((state->hs_fcoib_may_be_running == B_TRUE) &&
1364             hermon_fcoib_is_fexch_qpn(state, HERMON_CQE_QPNUM_GET(cq, cqe))) {
1365                 type = IBT_WRC_RECV;
1366                 if (HERMON_CQE_FEXCH_DIFE(cq, cqe))
1367                         flags |= IBT_WC_DIF_ERROR;
1368                 wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1369                 wc->wc_fexch_seq_cnt = HERMON_CQE_FEXCH_SEQ_CNT(cq, cqe);
1370                 wc->wc_fexch_tx_bytes_xfer = HERMON_CQE_FEXCH_TX_BYTES(cq, cqe);
1371                 wc->wc_fexch_rx_bytes_xfer = HERMON_CQE_FEXCH_RX_BYTES(cq, cqe);
1372                 wc->wc_fexch_seq_id = HERMON_CQE_FEXCH_SEQ_ID(cq, cqe);
1373                 wc->wc_detail = HERMON_CQE_FEXCH_DETAIL(cq, cqe) &
1374                     IBT_WC_DETAIL_FC_MATCH_MASK;
1375                 wc->wc_rkey = HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1376                 flags |= IBT_WC_FEXCH_FMT | IBT_WC_RKEY_INVALIDATED;
1377         } else {
1378                 /*
1379                  * Parse the remaining contents of the CQE into the work
1380                  * completion.  This means filling in SL, QP number, SLID,
1381                  * immediate data, etc.
1382                  *
1383                  * Note: Not all of these fields are valid in a given
1384                  * completion.  Many of them depend on the actual type of
1385                  * completion.  So we fill in all of the fields and leave
1386                  * it up to the IBTF and consumer to sort out which are
1387                  * valid based on their context.
1388                  */
1389                 wc->wc_sl      = HERMON_CQE_SL_GET(cq, cqe);
1390                 wc->wc_qpn     = HERMON_CQE_DQPN_GET(cq, cqe);
1391                 wc->wc_slid    = HERMON_CQE_DLID_GET(cq, cqe);
1392                 wc->wc_immed_data =
1393                     HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1394                 wc->wc_ethertype  = (wc->wc_immed_data & 0xFFFF);
1395                 wc->wc_pkey_ix         = (wc->wc_immed_data &
1396                     ((1 << state->hs_queryport.log_max_pkey) - 1));
1397                 /*
1398                  * Fill in "bytes transferred" as appropriate.  Also,
1399                  * if necessary, fill in the "path bits" field.
1400                  */
1401                 wc->wc_path_bits = HERMON_CQE_PATHBITS_GET(cq, cqe);
1402                 wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1403 
1404                 /*
1405                  * Check for GRH, update the flags, then fill in "wc_flags"
1406                  * field in the work completion
1407                  */
1408                 if (HERMON_CQE_GRH_GET(cq, cqe) != 0) {
1409                         flags |= IBT_WC_GRH_PRESENT;
1410                 }
1411 
1412                 /* Receive CQE */
1413                 switch (opcode) {
1414                 case HERMON_CQE_RCV_SEND_IMM:
1415                         /*
1416                          * Note:  According to the PRM, all QP1 recv
1417                          * completions look like the result of a Send with
1418                          * Immediate.  They are not, however, (MADs are Send
1419                          * Only) so we need to check the QP number and set
1420                          * the flag only if it is non-QP1.
1421                          */
1422                         qpnum    = HERMON_CQE_QPNUM_GET(cq, cqe);
1423                         qp1_indx = state->hs_spec_qp1->hr_indx;
1424                         if ((qpnum < qp1_indx) || (qpnum > qp1_indx + 1)) {
1425                                 flags |= IBT_WC_IMMED_DATA_PRESENT;
1426                         }
1427                         /* FALLTHROUGH */
1428 
1429                 case HERMON_CQE_RCV_SEND:
1430                         type = IBT_WRC_RECV;
1431                         if (HERMON_CQE_IS_IPOK(cq, cqe)) {
1432                                 wc->wc_cksum = HERMON_CQE_CKSUM(cq, cqe);
1433                                 flags |= IBT_WC_CKSUM_OK;
1434                                 wc->wc_detail = IBT_WC_DETAIL_ALL_FLAGS_MASK &
1435                                     HERMON_CQE_IPOIB_STATUS(cq, cqe);
1436                         }
1437                         break;
1438 
1439                 case HERMON_CQE_RCV_SEND_INV:
1440                         type = IBT_WRC_RECV;
1441                         flags |= IBT_WC_RKEY_INVALIDATED;
1442                         wc->wc_rkey = wc->wc_immed_data; /* same field in cqe */
1443                         break;
1444 
1445                 case HERMON_CQE_RCV_RDMAWR_IMM:
1446                         flags |= IBT_WC_IMMED_DATA_PRESENT;
1447                         type = IBT_WRC_RECV_RDMAWI;
1448                         break;
1449 
1450                 default:
1451 
1452                         HERMON_WARNING(state, "unknown recv CQE type");
1453                         wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1454                         return;
1455                 }
1456         }
1457         wc->wc_type = type;
1458         wc->wc_flags = flags;
1459         wc->wc_status = IBT_WC_SUCCESS;
1460 }
1461 
1462 /*
1463  * hermon_cq_errcqe_consume()
1464  *    Context: Can be called from interrupt or base context.
1465  */
1466 static void
1467 hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1468     hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1469 {
1470         uint32_t                imm_eth_pkey_cred;
1471         uint_t                  status;
1472         ibt_wc_status_t         ibt_status;
1473 
1474         /*
1475          * Fetch the Work Request ID using the information in the CQE.
1476          * See hermon_wr.c for more details.
1477          */
1478         wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1479 
1480         /*
1481          * Parse the CQE opcode to determine completion type.  We know that
1482          * the CQE is an error completion, so we extract only the completion
1483          * status/syndrome here.
1484          */
1485         imm_eth_pkey_cred = HERMON_CQE_ERROR_SYNDROME_GET(cq, cqe);
1486         status = imm_eth_pkey_cred;
1487         if (status != HERMON_CQE_WR_FLUSHED_ERR)
1488                 IBTF_DPRINTF_L2("CQE ERR", "cqe %p QPN %x indx %x status 0x%x  "
1489                     "vendor syndrome %x", cqe, HERMON_CQE_QPNUM_GET(cq, cqe),
1490                     HERMON_CQE_WQECNTR_GET(cq, cqe), status,
1491                     HERMON_CQE_ERROR_VENDOR_SYNDROME_GET(cq, cqe));
1492         switch (status) {
1493         case HERMON_CQE_LOC_LEN_ERR:
1494                 HERMON_WARNING(state, HERMON_FMA_LOCLEN);
1495                 ibt_status = IBT_WC_LOCAL_LEN_ERR;
1496                 break;
1497 
1498         case HERMON_CQE_LOC_OP_ERR:
1499                 HERMON_WARNING(state, HERMON_FMA_LOCQPOP);
1500                 ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1501                 break;
1502 
1503         case HERMON_CQE_LOC_PROT_ERR:
1504                 HERMON_WARNING(state, HERMON_FMA_LOCPROT);
1505                 ibt_status = IBT_WC_LOCAL_PROTECT_ERR;
1506                 IBTF_DPRINTF_L2("ERRCQE", "is at %p", cqe);
1507                 if (hermon_should_panic) {
1508                         cmn_err(CE_PANIC, "Hermon intentional PANIC - "
1509                             "Local Protection Error\n");
1510                 }
1511                 break;
1512 
1513         case HERMON_CQE_WR_FLUSHED_ERR:
1514                 ibt_status = IBT_WC_WR_FLUSHED_ERR;
1515                 break;
1516 
1517         case HERMON_CQE_MW_BIND_ERR:
1518                 HERMON_WARNING(state, HERMON_FMA_MWBIND);
1519                 ibt_status = IBT_WC_MEM_WIN_BIND_ERR;
1520                 break;
1521 
1522         case HERMON_CQE_BAD_RESPONSE_ERR:
1523                 HERMON_WARNING(state, HERMON_FMA_RESP);
1524                 ibt_status = IBT_WC_BAD_RESPONSE_ERR;
1525                 break;
1526 
1527         case HERMON_CQE_LOCAL_ACCESS_ERR:
1528                 HERMON_WARNING(state, HERMON_FMA_LOCACC);
1529                 ibt_status = IBT_WC_LOCAL_ACCESS_ERR;
1530                 break;
1531 
1532         case HERMON_CQE_REM_INV_REQ_ERR:
1533                 HERMON_WARNING(state, HERMON_FMA_REMREQ);
1534                 ibt_status = IBT_WC_REMOTE_INVALID_REQ_ERR;
1535                 break;
1536 
1537         case HERMON_CQE_REM_ACC_ERR:
1538                 HERMON_WARNING(state, HERMON_FMA_REMACC);
1539                 ibt_status = IBT_WC_REMOTE_ACCESS_ERR;
1540                 break;
1541 
1542         case HERMON_CQE_REM_OP_ERR:
1543                 HERMON_WARNING(state, HERMON_FMA_REMOP);
1544                 ibt_status = IBT_WC_REMOTE_OP_ERR;
1545                 break;
1546 
1547         case HERMON_CQE_TRANS_TO_ERR:
1548                 HERMON_WARNING(state, HERMON_FMA_XPORTCNT);
1549                 ibt_status = IBT_WC_TRANS_TIMEOUT_ERR;
1550                 break;
1551 
1552         case HERMON_CQE_RNRNAK_TO_ERR:
1553                 HERMON_WARNING(state, HERMON_FMA_RNRCNT);
1554                 ibt_status = IBT_WC_RNR_NAK_TIMEOUT_ERR;
1555                 break;
1556 
1557         /*
1558          * The following error codes are not supported in the Hermon driver
1559          * as they relate only to Reliable Datagram completion statuses:
1560          *    case HERMON_CQE_LOCAL_RDD_VIO_ERR:
1561          *    case HERMON_CQE_REM_INV_RD_REQ_ERR:
1562          *    case HERMON_CQE_EEC_REM_ABORTED_ERR:
1563          *    case HERMON_CQE_INV_EEC_NUM_ERR:
1564          *    case HERMON_CQE_INV_EEC_STATE_ERR:
1565          *    case HERMON_CQE_LOC_EEC_ERR:
1566          */
1567 
1568         default:
1569                 HERMON_WARNING(state, "unknown error CQE status");
1570                 HERMON_FMANOTE(state, HERMON_FMA_UNKN);
1571                 ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1572                 break;
1573         }
1574 
1575         wc->wc_status = ibt_status;
1576 }
1577 
1578 
1579 /*
1580  * hermon_cq_resize_helper()
1581  *    Context: Can be called only from user or kernel context.
1582  */
1583 void
1584 hermon_cq_resize_helper(hermon_state_t *state, hermon_cqhdl_t cq)
1585 {
1586         hermon_cqhdl_t          resize_hdl;
1587         int                     status;
1588 
1589         /*
1590          * we're here because we found the special cqe opcode, so we have
1591          * to update the cq_handle, release the old resources, clear the
1592          * flag in the cq_hdl, and release the resize_hdl.  When we return
1593          * above, it will take care of the rest
1594          */
1595         ASSERT(MUTEX_HELD(&cq->cq_lock));
1596 
1597         resize_hdl = cq->cq_resize_hdl;
1598 
1599         /*
1600          * Deregister the memory for the old Completion Queue.  Note: We
1601          * really can't return error here because we have no good way to
1602          * cleanup.  Plus, the deregistration really shouldn't ever happen.
1603          * So, if it does, it is an indication that something has gone
1604          * seriously wrong.  So we print a warning message and return error
1605          * (knowing, of course, that the "old" CQ memory will be leaked)
1606          */
1607         status = hermon_mr_deregister(state, &cq->cq_mrhdl, HERMON_MR_DEREG_ALL,
1608             HERMON_SLEEP);
1609         if (status != DDI_SUCCESS) {
1610                 HERMON_WARNING(state, "failed to deregister old CQ memory");
1611         }
1612 
1613         /* Next, free the memory from the old CQ buffer */
1614         hermon_queue_free(&cq->cq_cqinfo);
1615 
1616         /* now we can update the cq_hdl with the new things saved */
1617 
1618         cq->cq_buf   = resize_hdl->cq_buf;
1619         cq->cq_mrhdl = resize_hdl->cq_mrhdl;
1620         cq->cq_bufsz = resize_hdl->cq_bufsz;
1621         cq->cq_log_cqsz = resize_hdl->cq_log_cqsz;
1622         cq->cq_umap_dhp = cq->cq_resize_hdl->cq_umap_dhp;
1623         cq->cq_resize_hdl = 0;
1624         bcopy(&resize_hdl->cq_cqinfo, &cq->cq_cqinfo,
1625             sizeof (struct hermon_qalloc_info_s));
1626 
1627         /* finally, release the resizing handle */
1628         kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
1629 }
1630 
1631 
1632 /*
1633  * hermon_cq_entries_flush()
1634  * Context: Can be called from interrupt or base context.
1635  */
1636 /* ARGSUSED */
1637 void
1638 hermon_cq_entries_flush(hermon_state_t *state, hermon_qphdl_t qp)
1639 {
1640         hermon_cqhdl_t          cq;
1641         hermon_hw_cqe_t         *cqe, *next_cqe;
1642         hermon_srqhdl_t         srq;
1643         hermon_workq_hdr_t      *wq;
1644         uint32_t                cons_indx, tail_cons_indx, wrap_around_mask;
1645         uint32_t                new_indx, check_indx, qpnum;
1646         uint32_t                shift, mask;
1647         int                     outstanding_cqes;
1648 
1649         qpnum = qp->qp_qpnum;
1650         if ((srq = qp->qp_srqhdl) != NULL)
1651                 wq = qp->qp_srqhdl->srq_wq_wqhdr;
1652         else
1653                 wq = NULL;
1654         cq = qp->qp_rq_cqhdl;
1655 
1656         if (cq == NULL) {
1657                 cq = qp->qp_sq_cqhdl;
1658         }
1659 
1660 do_send_cq:     /* loop back to here if send_cq is not the same as recv_cq */
1661         if (cq == NULL)
1662                 return;
1663 
1664         cons_indx = cq->cq_consindx;
1665         shift = cq->cq_log_cqsz;
1666         mask = cq->cq_bufsz;
1667         wrap_around_mask = mask - 1;
1668 
1669         /* Calculate the pointer to the first CQ entry */
1670         cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
1671 
1672         /*
1673          * Loop through the CQ looking for entries owned by software.  If an
1674          * entry is owned by software then we increment an 'outstanding_cqes'
1675          * count to know how many entries total we have on our CQ.  We use this
1676          * value further down to know how many entries to loop through looking
1677          * for our same QP number.
1678          */
1679         outstanding_cqes = 0;
1680         tail_cons_indx = cons_indx;
1681         while (HERMON_CQE_OWNER_IS_SW(cq, cqe, tail_cons_indx, shift, mask)) {
1682                 /* increment total cqes count */
1683                 outstanding_cqes++;
1684 
1685                 /* increment the consumer index */
1686                 tail_cons_indx++;
1687 
1688                 /* update the pointer to the next cq entry */
1689                 cqe = &cq->cq_buf[tail_cons_indx & wrap_around_mask];
1690         }
1691 
1692         /*
1693          * Using the 'tail_cons_indx' that was just set, we now know how many
1694          * total CQEs possible there are.  Set the 'check_indx' and the
1695          * 'new_indx' to the last entry identified by 'tail_cons_indx'
1696          */
1697         check_indx = new_indx = (tail_cons_indx - 1);
1698 
1699         while (--outstanding_cqes >= 0) {
1700                 cqe = &cq->cq_buf[check_indx & wrap_around_mask];
1701 
1702                 /*
1703                  * If the QP number is the same in the CQE as the QP, then
1704                  * we must "consume" it.  If it is for an SRQ wqe, then we
1705                  * also must free the wqe back onto the free list of the SRQ.
1706                  */
1707                 if (qpnum == HERMON_CQE_QPNUM_GET(cq, cqe)) {
1708                         if (srq && (HERMON_CQE_SENDRECV_GET(cq, cqe) ==
1709                             HERMON_COMPLETION_RECV)) {
1710                                 uint64_t *desc;
1711                                 int indx;
1712 
1713                                 /* Add wqe back to SRQ free list */
1714                                 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) &
1715                                     wq->wq_mask;
1716                                 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail);
1717                                 ((uint16_t *)desc)[1] = htons(indx);
1718                                 wq->wq_tail = indx;
1719                         }
1720                 } else {        /* CQEs for other QPNs need to remain */
1721                         if (check_indx != new_indx) {
1722                                 next_cqe =
1723                                     &cq->cq_buf[new_indx & wrap_around_mask];
1724                                 /* Copy the CQE into the "next_cqe" pointer. */
1725                                 bcopy(cqe, next_cqe, sizeof (hermon_hw_cqe_t));
1726                         }
1727                         new_indx--;     /* move index to next CQE to fill */
1728                 }
1729                 check_indx--;           /* move index to next CQE to check */
1730         }
1731 
1732         /*
1733          * Update consumer index to be the 'new_indx'.  This moves it past all
1734          * removed entries.  Because 'new_indx' is pointing to the last
1735          * previously valid SW owned entry, we add 1 to point the cons_indx to
1736          * the first HW owned entry.
1737          */
1738         cons_indx = (new_indx + 1);
1739 
1740         /*
1741          * Now we only ring the doorbell (to update the consumer index) if
1742          * we've actually consumed a CQ entry.  If we found no QP number
1743          * matches above, then we would not have removed anything.  So only if
1744          * something was removed do we ring the doorbell.
1745          */
1746         if (cq->cq_consindx != cons_indx) {
1747                 /*
1748                  * Update the consumer index in both the CQ handle and the
1749                  * doorbell record.
1750                  */
1751                 cq->cq_consindx = cons_indx;
1752 
1753                 hermon_cq_update_ci_doorbell(cq);
1754 
1755         }
1756         if (cq != qp->qp_sq_cqhdl) {
1757                 cq = qp->qp_sq_cqhdl;
1758                 goto do_send_cq;
1759         }
1760 }
1761 
1762 /*
1763  * hermon_get_cq_sched_list()
1764  *    Context: Only called from attach() path context
1765  *
1766  * Read properties, creating entries in hs_cq_sched_list with
1767  * information about the requested "expected" and "minimum"
1768  * number of MSI-X interrupt vectors per list entry.
1769  */
1770 static int
1771 hermon_get_cq_sched_list(hermon_state_t *state)
1772 {
1773         char **listp, ulp_prop[HERMON_CQH_MAX + 4];
1774         uint_t nlist, i, j, ndata;
1775         int *data;
1776         size_t len;
1777         hermon_cq_sched_t *cq_schedp;
1778 
1779         if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, state->hs_dip,
1780             DDI_PROP_DONTPASS, "cqh-group-list", &listp, &nlist) !=
1781             DDI_PROP_SUCCESS)
1782                 return (0);
1783 
1784         state->hs_cq_sched_array_size = nlist;
1785         state->hs_cq_sched_array = cq_schedp = kmem_zalloc(nlist *
1786             sizeof (hermon_cq_sched_t), KM_SLEEP);
1787         for (i = 0; i < nlist; i++) {
1788                 if ((len = strlen(listp[i])) >= HERMON_CQH_MAX) {
1789                         cmn_err(CE_CONT, "'cqh' property name too long\n");
1790                         goto game_over;
1791                 }
1792                 for (j = 0; j < i; j++) {
1793                         if (strcmp(listp[j], listp[i]) == 0) {
1794                                 cmn_err(CE_CONT, "Duplicate 'cqh' property\n");
1795                                 goto game_over;
1796                         }
1797                 }
1798                 (void) strncpy(cq_schedp[i].cqs_name, listp[i], HERMON_CQH_MAX);
1799                 ulp_prop[0] = 'c';
1800                 ulp_prop[1] = 'q';
1801                 ulp_prop[2] = 'h';
1802                 ulp_prop[3] = '-';
1803                 (void) strncpy(ulp_prop + 4, listp[i], len + 1);
1804                 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
1805                     DDI_PROP_DONTPASS, ulp_prop, &data, &ndata) !=
1806                     DDI_PROP_SUCCESS) {
1807                         cmn_err(CE_CONT, "property '%s' not found\n", ulp_prop);
1808                         goto game_over;
1809                 }
1810                 if (ndata != 2) {
1811                         cmn_err(CE_CONT, "property '%s' does not "
1812                             "have 2 integers\n", ulp_prop);
1813                         goto game_over_free_data;
1814                 }
1815                 cq_schedp[i].cqs_desired = data[0];
1816                 cq_schedp[i].cqs_minimum = data[1];
1817                 cq_schedp[i].cqs_refcnt = 0;
1818                 ddi_prop_free(data);
1819         }
1820         if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
1821             DDI_PROP_DONTPASS, "cqh-default", &data, &ndata) !=
1822             DDI_PROP_SUCCESS) {
1823                 cmn_err(CE_CONT, "property 'cqh-default' not found\n");
1824                 goto game_over;
1825         }
1826         if (ndata != 2) {
1827                 cmn_err(CE_CONT, "property 'cqh-default' does not "
1828                     "have 2 integers\n");
1829                 goto game_over_free_data;
1830         }
1831         cq_schedp = &state->hs_cq_sched_default;
1832         cq_schedp->cqs_desired = data[0];
1833         cq_schedp->cqs_minimum = data[1];
1834         cq_schedp->cqs_refcnt = 0;
1835         ddi_prop_free(data);
1836         ddi_prop_free(listp);
1837         return (1);             /* game on */
1838 
1839 game_over_free_data:
1840         ddi_prop_free(data);
1841 game_over:
1842         cmn_err(CE_CONT, "Error in 'cqh' properties in hermon.conf\n");
1843         cmn_err(CE_CONT, "completion handler groups not being used\n");
1844         kmem_free(cq_schedp, nlist * sizeof (hermon_cq_sched_t));
1845         state->hs_cq_sched_array_size = 0;
1846         ddi_prop_free(listp);
1847         return (0);
1848 }
1849 
1850 /*
1851  * hermon_cq_sched_init()
1852  *    Context: Only called from attach() path context
1853  *
1854  * Read the hermon.conf properties looking for cq_sched info,
1855  * creating reserved pools of MSI-X interrupt ranges for the
1856  * specified ULPs.
1857  */
1858 int
1859 hermon_cq_sched_init(hermon_state_t *state)
1860 {
1861         hermon_cq_sched_t *cq_schedp, *defp;
1862         int i, desired, array_size;
1863 
1864         mutex_init(&state->hs_cq_sched_lock, NULL, MUTEX_DRIVER,
1865             DDI_INTR_PRI(state->hs_intrmsi_pri));
1866 
1867         mutex_enter(&state->hs_cq_sched_lock);
1868         state->hs_cq_sched_array = NULL;
1869 
1870         /* initialize cq_sched_default */
1871         defp = &state->hs_cq_sched_default;
1872         defp->cqs_start_hid = 1;
1873         defp->cqs_len = state->hs_intrmsi_allocd;
1874         defp->cqs_next_alloc = defp->cqs_len - 1;
1875         (void) strncpy(defp->cqs_name, "default", 8);
1876 
1877         /* Read properties to determine which ULPs use cq_sched */
1878         if (hermon_get_cq_sched_list(state) == 0)
1879                 goto done;
1880 
1881         /* Determine if we have enough vectors, or if we have to scale down */
1882         desired = defp->cqs_desired; /* default desired (from hermon.conf) */
1883         if (desired <= 0)
1884                 goto done;              /* all interrupts in the default pool */
1885         cq_schedp = state->hs_cq_sched_array;
1886         array_size = state->hs_cq_sched_array_size;
1887         for (i = 0; i < array_size; i++)
1888                 desired += cq_schedp[i].cqs_desired;
1889         if (desired > state->hs_intrmsi_allocd) {
1890                 cmn_err(CE_CONT, "#interrupts allocated (%d) is less than "
1891                     "the #interrupts desired (%d)\n",
1892                     state->hs_intrmsi_allocd, desired);
1893                 cmn_err(CE_CONT, "completion handler groups not being used\n");
1894                 goto done;              /* all interrupts in the default pool */
1895         }
1896         /* Game on.  For each cq_sched group, reserve the MSI-X range */
1897         for (i = 0; i < array_size; i++) {
1898                 desired = cq_schedp[i].cqs_desired;
1899                 cq_schedp[i].cqs_start_hid = defp->cqs_start_hid;
1900                 cq_schedp[i].cqs_len = desired;
1901                 cq_schedp[i].cqs_next_alloc = desired - 1;
1902                 defp->cqs_len -= desired;
1903                 defp->cqs_start_hid += desired;
1904         }
1905         /* reset default's start allocation seed */
1906         state->hs_cq_sched_default.cqs_next_alloc =
1907             state->hs_cq_sched_default.cqs_len - 1;
1908 
1909 done:
1910         mutex_exit(&state->hs_cq_sched_lock);
1911         return (IBT_SUCCESS);
1912 }
1913 
1914 void
1915 hermon_cq_sched_fini(hermon_state_t *state)
1916 {
1917         mutex_enter(&state->hs_cq_sched_lock);
1918         if (state->hs_cq_sched_array_size) {
1919                 kmem_free(state->hs_cq_sched_array, sizeof (hermon_cq_sched_t) *
1920                     state->hs_cq_sched_array_size);
1921                 state->hs_cq_sched_array_size = 0;
1922                 state->hs_cq_sched_array = NULL;
1923         }
1924         mutex_exit(&state->hs_cq_sched_lock);
1925         mutex_destroy(&state->hs_cq_sched_lock);
1926 }
1927 
1928 int
1929 hermon_cq_sched_alloc(hermon_state_t *state, ibt_cq_sched_attr_t *attr,
1930     hermon_cq_sched_t **cq_sched_pp)
1931 {
1932         hermon_cq_sched_t       *cq_schedp;
1933         int                     i;
1934         char                    *name;
1935         ibt_cq_sched_flags_t    flags;
1936 
1937         flags = attr->cqs_flags;
1938         if ((flags & (IBT_CQS_SCHED_GROUP | IBT_CQS_EXACT_SCHED_GROUP)) == 0) {
1939                 *cq_sched_pp = NULL;
1940                 return (IBT_SUCCESS);
1941         }
1942         name = attr->cqs_pool_name;
1943 
1944         mutex_enter(&state->hs_cq_sched_lock);
1945         cq_schedp = state->hs_cq_sched_array;
1946         for (i = 0; i < state->hs_cq_sched_array_size; i++, cq_schedp++) {
1947                 if (strcmp(name, cq_schedp->cqs_name) == 0) {
1948                         if (cq_schedp->cqs_len != 0)
1949                                 cq_schedp->cqs_refcnt++;
1950                         break;  /* found it */
1951                 }
1952         }
1953         if ((i == state->hs_cq_sched_array_size) ||  /* not found, or */
1954             (cq_schedp->cqs_len == 0)) /* defined, but no dedicated intr's */
1955                 cq_schedp = NULL;
1956         mutex_exit(&state->hs_cq_sched_lock);
1957 
1958         *cq_sched_pp = cq_schedp;       /* set to valid hdl, or to NULL */
1959         if ((cq_schedp == NULL) &&
1960             (attr->cqs_flags & IBT_CQS_EXACT_SCHED_GROUP))
1961                 return (IBT_CQ_NO_SCHED_GROUP);
1962         else
1963                 return (IBT_SUCCESS);
1964 }
1965 
1966 int
1967 hermon_cq_sched_free(hermon_state_t *state, hermon_cq_sched_t *cq_schedp)
1968 {
1969         if (cq_schedp != NULL) {
1970                 /* Just decrement refcnt */
1971                 mutex_enter(&state->hs_cq_sched_lock);
1972                 if (cq_schedp->cqs_refcnt == 0)
1973                         HERMON_WARNING(state, "cq_sched free underflow\n");
1974                 else
1975                         cq_schedp->cqs_refcnt--;
1976                 mutex_exit(&state->hs_cq_sched_lock);
1977         }
1978         return (IBT_SUCCESS);
1979 }