1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * hermon_cq.c
  28  *    Hermon Completion Queue Processing Routines
  29  *
  30  *    Implements all the routines necessary for allocating, freeing, resizing,
  31  *    and handling the completion type events that the Hermon hardware can
  32  *    generate.
  33  */
  34 
  35 #include <sys/types.h>
  36 #include <sys/conf.h>
  37 #include <sys/ddi.h>
  38 #include <sys/sunddi.h>
  39 #include <sys/modctl.h>
  40 #include <sys/bitmap.h>
  41 #include <sys/sysmacros.h>
  42 
  43 #include <sys/ib/adapters/hermon/hermon.h>
  44 
  45 int hermon_should_panic = 0;    /* debugging aid */
  46 
  47 #define hermon_cq_update_ci_doorbell(cq)                                \
  48         /* Build the doorbell record data (low 24 bits only) */         \
  49         HERMON_UAR_DB_RECORD_WRITE(cq->cq_arm_ci_vdbr,                       \
  50             cq->cq_consindx & 0x00FFFFFF)
  51 
  52 static int hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq,
  53     uint_t cmd);
  54 #pragma inline(hermon_cq_arm_doorbell)
  55 static void hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr);
  56 #pragma inline(hermon_arm_cq_dbr_init)
  57 static void hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
  58     hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
  59 static void hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
  60     hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
  61 
  62 
  63 /*
  64  * hermon_cq_alloc()
  65  *    Context: Can be called only from user or kernel context.
  66  */
  67 int
  68 hermon_cq_alloc(hermon_state_t *state, ibt_cq_hdl_t ibt_cqhdl,
  69     ibt_cq_attr_t *cq_attr, uint_t *actual_size, hermon_cqhdl_t *cqhdl,
  70     uint_t sleepflag)
  71 {
  72         hermon_rsrc_t           *cqc, *rsrc;
  73         hermon_umap_db_entry_t  *umapdb;
  74         hermon_hw_cqc_t         cqc_entry;
  75         hermon_cqhdl_t          cq;
  76         ibt_mr_attr_t           mr_attr;
  77         hermon_mr_options_t     op;
  78         hermon_pdhdl_t          pd;
  79         hermon_mrhdl_t          mr;
  80         hermon_hw_cqe_t         *buf;
  81         uint64_t                value;
  82         uint32_t                log_cq_size, uarpg;
  83         uint_t                  cq_is_umap;
  84         uint32_t                status, flag;
  85         hermon_cq_sched_t       *cq_schedp;
  86 
  87         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq_attr))
  88 
  89         /*
  90          * Determine whether CQ is being allocated for userland access or
  91          * whether it is being allocated for kernel access.  If the CQ is
  92          * being allocated for userland access, then lookup the UAR
  93          * page number for the current process.  Note:  If this is not found
  94          * (e.g. if the process has not previously open()'d the Hermon driver),
  95          * then an error is returned.
  96          */
  97         cq_is_umap = (cq_attr->cq_flags & IBT_CQ_USER_MAP) ? 1 : 0;
  98         if (cq_is_umap) {
  99                 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
 100                     MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
 101                 if (status != DDI_SUCCESS) {
 102                         status = IBT_INVALID_PARAM;
 103                         goto cqalloc_fail;
 104                 }
 105                 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
 106         } else {
 107                 uarpg = state->hs_kernel_uar_index;
 108         }
 109 
 110         /* Use the internal protection domain (PD) for setting up CQs */
 111         pd = state->hs_pdhdl_internal;
 112 
 113         /* Increment the reference count on the protection domain (PD) */
 114         hermon_pd_refcnt_inc(pd);
 115 
 116         /*
 117          * Allocate an CQ context entry.  This will be filled in with all
 118          * the necessary parameters to define the Completion Queue.  And then
 119          * ownership will be passed to the hardware in the final step
 120          * below.  If we fail here, we must undo the protection domain
 121          * reference count.
 122          */
 123         status = hermon_rsrc_alloc(state, HERMON_CQC, 1, sleepflag, &cqc);
 124         if (status != DDI_SUCCESS) {
 125                 status = IBT_INSUFF_RESOURCE;
 126                 goto cqalloc_fail1;
 127         }
 128 
 129         /*
 130          * Allocate the software structure for tracking the completion queue
 131          * (i.e. the Hermon Completion Queue handle).  If we fail here, we must
 132          * undo the protection domain reference count and the previous
 133          * resource allocation.
 134          */
 135         status = hermon_rsrc_alloc(state, HERMON_CQHDL, 1, sleepflag, &rsrc);
 136         if (status != DDI_SUCCESS) {
 137                 status = IBT_INSUFF_RESOURCE;
 138                 goto cqalloc_fail2;
 139         }
 140         cq = (hermon_cqhdl_t)rsrc->hr_addr;
 141         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq))
 142         cq->cq_is_umap = cq_is_umap;
 143         cq->cq_cqnum = cqc->hr_indx;      /* just use index, implicit in Hermon */
 144         cq->cq_intmod_count = 0;
 145         cq->cq_intmod_usec = 0;
 146 
 147         /*
 148          * If this will be a user-mappable CQ, then allocate an entry for
 149          * the "userland resources database".  This will later be added to
 150          * the database (after all further CQ operations are successful).
 151          * If we fail here, we must undo the reference counts and the
 152          * previous resource allocation.
 153          */
 154         if (cq->cq_is_umap) {
 155                 umapdb = hermon_umap_db_alloc(state->hs_instance, cq->cq_cqnum,
 156                     MLNX_UMAP_CQMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
 157                 if (umapdb == NULL) {
 158                         status = IBT_INSUFF_RESOURCE;
 159                         goto cqalloc_fail3;
 160                 }
 161         }
 162 
 163 
 164         /*
 165          * Allocate the doorbell record.  We'll need one for the CQ, handling
 166          * both consumer index (SET CI) and the CQ state (CQ ARM).
 167          */
 168 
 169         status = hermon_dbr_alloc(state, uarpg, &cq->cq_arm_ci_dbr_acchdl,
 170             &cq->cq_arm_ci_vdbr, &cq->cq_arm_ci_pdbr, &cq->cq_dbr_mapoffset);
 171         if (status != DDI_SUCCESS) {
 172                 status = IBT_INSUFF_RESOURCE;
 173                 goto cqalloc_fail4;
 174         }
 175 
 176         /*
 177          * Calculate the appropriate size for the completion queue.
 178          * Note:  All Hermon CQs must be a power-of-2 minus 1 in size.  Also
 179          * they may not be any smaller than HERMON_CQ_MIN_SIZE.  This step is
 180          * to round the requested size up to the next highest power-of-2
 181          */
 182         cq_attr->cq_size = max(cq_attr->cq_size, HERMON_CQ_MIN_SIZE);
 183         log_cq_size = highbit(cq_attr->cq_size);
 184 
 185         /*
 186          * Next we verify that the rounded-up size is valid (i.e. consistent
 187          * with the device limits and/or software-configured limits)
 188          */
 189         if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
 190                 status = IBT_HCA_CQ_EXCEEDED;
 191                 goto cqalloc_fail4a;
 192         }
 193 
 194         /*
 195          * Allocate the memory for Completion Queue.
 196          *
 197          * Note: Although we use the common queue allocation routine, we
 198          * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
 199          * kernel system memory) for kernel CQs because it would be
 200          * inefficient to have CQs located in DDR memory.  This is primarily
 201          * because CQs are read from (by software) more than they are written
 202          * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
 203          * user-mappable CQs for a similar reason.)
 204          * It is also worth noting that, unlike Hermon QP work queues,
 205          * completion queues do not have the same strict alignment
 206          * requirements.  It is sufficient for the CQ memory to be both
 207          * aligned to and bound to addresses which are a multiple of CQE size.
 208          */
 209         cq->cq_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
 210 
 211         cq->cq_cqinfo.qa_alloc_align = PAGESIZE;
 212         cq->cq_cqinfo.qa_bind_align  = PAGESIZE;
 213         if (cq->cq_is_umap) {
 214                 cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
 215         } else {
 216                 cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
 217                 hermon_arm_cq_dbr_init(cq->cq_arm_ci_vdbr);
 218         }
 219         status = hermon_queue_alloc(state, &cq->cq_cqinfo, sleepflag);
 220         if (status != DDI_SUCCESS) {
 221                 status = IBT_INSUFF_RESOURCE;
 222                 goto cqalloc_fail4;
 223         }
 224         buf = (hermon_hw_cqe_t *)cq->cq_cqinfo.qa_buf_aligned;
 225         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
 226 
 227         /*
 228          * The ownership bit of the CQE's is set by the HW during the process
 229          * of transferrring ownership of the CQ (PRM 09.35c, 14.2.1, note D1
 230          *
 231          */
 232 
 233         /*
 234          * Register the memory for the CQ.  The memory for the CQ must
 235          * be registered in the Hermon TPT tables.  This gives us the LKey
 236          * to specify in the CQ context below.  Note: If this is a user-
 237          * mappable CQ, then we will force DDI_DMA_CONSISTENT mapping.
 238          */
 239         flag = (sleepflag == HERMON_SLEEP) ?  IBT_MR_SLEEP : IBT_MR_NOSLEEP;
 240         mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
 241         mr_attr.mr_len   = cq->cq_cqinfo.qa_size;
 242         mr_attr.mr_as    = NULL;
 243         mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
 244         op.mro_bind_type   = state->hs_cfg_profile->cp_iommu_bypass;
 245         op.mro_bind_dmahdl = cq->cq_cqinfo.qa_dmahdl;
 246         op.mro_bind_override_addr = 0;
 247         status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
 248             HERMON_CQ_CMPT);
 249         if (status != DDI_SUCCESS) {
 250                 status = IBT_INSUFF_RESOURCE;
 251                 goto cqalloc_fail5;
 252         }
 253         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
 254 
 255         cq->cq_erreqnum = HERMON_CQ_ERREQNUM_GET(state);
 256         if (cq_attr->cq_flags & IBT_CQ_HID) {
 257                 if (!HERMON_HID_VALID(state, cq_attr->cq_hid)) {
 258                         IBTF_DPRINTF_L2("CQalloc", "bad handler id 0x%x",
 259                             cq_attr->cq_hid);
 260                         status = IBT_INVALID_PARAM;
 261                         goto cqalloc_fail5;
 262                 }
 263                 cq->cq_eqnum = HERMON_HID_TO_EQNUM(state, cq_attr->cq_hid);
 264                 IBTF_DPRINTF_L2("cqalloc", "hid: eqn %d", cq->cq_eqnum);
 265         } else {
 266                 cq_schedp = (hermon_cq_sched_t *)cq_attr->cq_sched;
 267                 if (cq_schedp == NULL) {
 268                         cq_schedp = &state->hs_cq_sched_default;
 269                 } else if (cq_schedp != &state->hs_cq_sched_default) {
 270                         int i;
 271                         hermon_cq_sched_t *tmp;
 272 
 273                         tmp = state->hs_cq_sched_array;
 274                         for (i = 0; i < state->hs_cq_sched_array_size; i++)
 275                                 if (cq_schedp == &tmp[i])
 276                                         break;  /* found it */
 277                         if (i >= state->hs_cq_sched_array_size) {
 278                                 cmn_err(CE_CONT, "!Invalid cq_sched argument: "
 279                                     "ignored\n");
 280                                 cq_schedp = &state->hs_cq_sched_default;
 281                         }
 282                 }
 283                 cq->cq_eqnum = HERMON_HID_TO_EQNUM(state,
 284                     HERMON_CQSCHED_NEXT_HID(cq_schedp));
 285                 IBTF_DPRINTF_L2("cqalloc", "sched: first-1 %d, len %d, "
 286                     "eqn %d", cq_schedp->cqs_start_hid - 1,
 287                     cq_schedp->cqs_len, cq->cq_eqnum);
 288         }
 289 
 290         /*
 291          * Fill in the CQC entry.  This is the final step before passing
 292          * ownership of the CQC entry to the Hermon hardware.  We use all of
 293          * the information collected/calculated above to fill in the
 294          * requisite portions of the CQC.  Note: If this CQ is going to be
 295          * used for userland access, then we need to set the UAR page number
 296          * appropriately (otherwise it's a "don't care")
 297          */
 298         bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
 299 
 300         cqc_entry.state         = HERMON_CQ_DISARMED;
 301         cqc_entry.pg_offs       = cq->cq_cqinfo.qa_pgoffs >> 5;
 302         cqc_entry.log_cq_sz     = log_cq_size;
 303         cqc_entry.usr_page      = uarpg;
 304         cqc_entry.c_eqn         = cq->cq_eqnum;
 305         cqc_entry.log2_pgsz     = mr->mr_log2_pgsz;
 306         cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
 307         cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
 308         cqc_entry.dbr_addrh = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 32);
 309         cqc_entry.dbr_addrl = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 3);
 310 
 311         /*
 312          * Write the CQC entry to hardware - we pass ownership of
 313          * the entry to the hardware (using the Hermon SW2HW_CQ firmware
 314          * command).  Note: In general, this operation shouldn't fail.  But
 315          * if it does, we have to undo everything we've done above before
 316          * returning error.
 317          */
 318         status = hermon_cmn_ownership_cmd_post(state, SW2HW_CQ, &cqc_entry,
 319             sizeof (hermon_hw_cqc_t), cq->cq_cqnum, sleepflag);
 320         if (status != HERMON_CMD_SUCCESS) {
 321                 cmn_err(CE_CONT, "Hermon: SW2HW_CQ command failed: %08x\n",
 322                     status);
 323                 if (status == HERMON_CMD_INVALID_STATUS) {
 324                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 325                 }
 326                 status = ibc_get_ci_failure(0);
 327                 goto cqalloc_fail6;
 328         }
 329 
 330         /*
 331          * Fill in the rest of the Hermon Completion Queue handle.  Having
 332          * successfully transferred ownership of the CQC, we can update the
 333          * following fields for use in further operations on the CQ.
 334          */
 335         cq->cq_resize_hdl = 0;
 336         cq->cq_cqcrsrcp        = cqc;
 337         cq->cq_rsrcp   = rsrc;
 338         cq->cq_consindx        = 0;
 339                 /* least restrictive */
 340         cq->cq_buf     = buf;
 341         cq->cq_bufsz   = (1 << log_cq_size);
 342         cq->cq_log_cqsz        = log_cq_size;
 343         cq->cq_mrhdl   = mr;
 344         cq->cq_refcnt          = 0;
 345         cq->cq_is_special = 0;
 346         cq->cq_uarpg   = uarpg;
 347         cq->cq_umap_dhp        = (devmap_cookie_t)NULL;
 348         avl_create(&cq->cq_wrid_wqhdr_avl_tree, hermon_wrid_workq_compare,
 349             sizeof (struct hermon_workq_avl_s),
 350             offsetof(struct hermon_workq_avl_s, wqa_link));
 351 
 352         cq->cq_hdlrarg         = (void *)ibt_cqhdl;
 353 
 354         /*
 355          * Put CQ handle in Hermon CQNum-to-CQHdl list.  Then fill in the
 356          * "actual_size" and "cqhdl" and return success
 357          */
 358         hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, cq);
 359 
 360         /*
 361          * If this is a user-mappable CQ, then we need to insert the previously
 362          * allocated entry into the "userland resources database".  This will
 363          * allow for later lookup during devmap() (i.e. mmap()) calls.
 364          */
 365         if (cq->cq_is_umap) {
 366                 hermon_umap_db_add(umapdb);
 367         }
 368 
 369         /*
 370          * Fill in the return arguments (if necessary).  This includes the
 371          * real completion queue size.
 372          */
 373         if (actual_size != NULL) {
 374                 *actual_size = (1 << log_cq_size) - 1;
 375         }
 376         *cqhdl = cq;
 377 
 378         return (DDI_SUCCESS);
 379 
 380 /*
 381  * The following is cleanup for all possible failure cases in this routine
 382  */
 383 cqalloc_fail6:
 384         if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
 385             sleepflag) != DDI_SUCCESS) {
 386                 HERMON_WARNING(state, "failed to deregister CQ memory");
 387         }
 388 cqalloc_fail5:
 389         hermon_queue_free(&cq->cq_cqinfo);
 390 cqalloc_fail4a:
 391         hermon_dbr_free(state, uarpg, cq->cq_arm_ci_vdbr);
 392 cqalloc_fail4:
 393         if (cq_is_umap) {
 394                 hermon_umap_db_free(umapdb);
 395         }
 396 cqalloc_fail3:
 397         hermon_rsrc_free(state, &rsrc);
 398 cqalloc_fail2:
 399         hermon_rsrc_free(state, &cqc);
 400 cqalloc_fail1:
 401         hermon_pd_refcnt_dec(pd);
 402 cqalloc_fail:
 403         return (status);
 404 }
 405 
 406 
 407 /*
 408  * hermon_cq_free()
 409  *    Context: Can be called only from user or kernel context.
 410  */
 411 /* ARGSUSED */
 412 int
 413 hermon_cq_free(hermon_state_t *state, hermon_cqhdl_t *cqhdl, uint_t sleepflag)
 414 {
 415         hermon_rsrc_t           *cqc, *rsrc;
 416         hermon_umap_db_entry_t  *umapdb;
 417         hermon_hw_cqc_t         cqc_entry;
 418         hermon_pdhdl_t          pd;
 419         hermon_mrhdl_t          mr;
 420         hermon_cqhdl_t          cq, resize;
 421         uint32_t                cqnum;
 422         uint64_t                value;
 423         uint_t                  maxprot;
 424         int                     status;
 425 
 426         /*
 427          * Pull all the necessary information from the Hermon Completion Queue
 428          * handle.  This is necessary here because the resource for the
 429          * CQ handle is going to be freed up as part of this operation.
 430          */
 431         cq      = *cqhdl;
 432         mutex_enter(&cq->cq_lock);
 433         cqc     = cq->cq_cqcrsrcp;
 434         rsrc    = cq->cq_rsrcp;
 435         pd      = state->hs_pdhdl_internal;
 436         mr      = cq->cq_mrhdl;
 437         cqnum   = cq->cq_cqnum;
 438 
 439         resize = cq->cq_resize_hdl;          /* save the handle for later */
 440 
 441         /*
 442          * If there are work queues still associated with the CQ, then return
 443          * an error.  Otherwise, we will be holding the CQ lock.
 444          */
 445         if (cq->cq_refcnt != 0) {
 446                 mutex_exit(&cq->cq_lock);
 447                 return (IBT_CQ_BUSY);
 448         }
 449 
 450         /*
 451          * If this was a user-mappable CQ, then we need to remove its entry
 452          * from the "userland resources database".  If it is also currently
 453          * mmap()'d out to a user process, then we need to call
 454          * devmap_devmem_remap() to remap the CQ memory to an invalid mapping.
 455          * We also need to invalidate the CQ tracking information for the
 456          * user mapping.
 457          */
 458         if (cq->cq_is_umap) {
 459                 status = hermon_umap_db_find(state->hs_instance, cqnum,
 460                     MLNX_UMAP_CQMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
 461                     &umapdb);
 462                 if (status != DDI_SUCCESS) {
 463                         mutex_exit(&cq->cq_lock);
 464                         HERMON_WARNING(state, "failed to find in database");
 465                         return (ibc_get_ci_failure(0));
 466                 }
 467                 hermon_umap_db_free(umapdb);
 468                 if (cq->cq_umap_dhp != NULL) {
 469                         maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
 470                         status = devmap_devmem_remap(cq->cq_umap_dhp,
 471                             state->hs_dip, 0, 0, cq->cq_cqinfo.qa_size,
 472                             maxprot, DEVMAP_MAPPING_INVALID, NULL);
 473                         if (status != DDI_SUCCESS) {
 474                                 mutex_exit(&cq->cq_lock);
 475                                 HERMON_WARNING(state, "failed in CQ memory "
 476                                     "devmap_devmem_remap()");
 477                                 return (ibc_get_ci_failure(0));
 478                         }
 479                         cq->cq_umap_dhp = (devmap_cookie_t)NULL;
 480                 }
 481         }
 482 
 483         /*
 484          * Put NULL into the Arbel CQNum-to-CQHdl list.  This will allow any
 485          * in-progress events to detect that the CQ corresponding to this
 486          * number has been freed.
 487          */
 488         hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, NULL);
 489 
 490         mutex_exit(&cq->cq_lock);
 491         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq))
 492 
 493         /*
 494          * Reclaim CQC entry from hardware (using the Hermon HW2SW_CQ
 495          * firmware command).  If the ownership transfer fails for any reason,
 496          * then it is an indication that something (either in HW or SW) has
 497          * gone seriously wrong.
 498          */
 499         status = hermon_cmn_ownership_cmd_post(state, HW2SW_CQ, &cqc_entry,
 500             sizeof (hermon_hw_cqc_t), cqnum, sleepflag);
 501         if (status != HERMON_CMD_SUCCESS) {
 502                 HERMON_WARNING(state, "failed to reclaim CQC ownership");
 503                 cmn_err(CE_CONT, "Hermon: HW2SW_CQ command failed: %08x\n",
 504                     status);
 505                 if (status == HERMON_CMD_INVALID_STATUS) {
 506                         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
 507                 }
 508                 return (ibc_get_ci_failure(0));
 509         }
 510 
 511         /*
 512          * From here on, we start reliquishing resources - but check to see
 513          * if a resize was in progress - if so, we need to relinquish those
 514          * resources as well
 515          */
 516 
 517 
 518         /*
 519          * Deregister the memory for the Completion Queue.  If this fails
 520          * for any reason, then it is an indication that something (either
 521          * in HW or SW) has gone seriously wrong.  So we print a warning
 522          * message and return.
 523          */
 524         status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
 525             sleepflag);
 526         if (status != DDI_SUCCESS) {
 527                 HERMON_WARNING(state, "failed to deregister CQ memory");
 528                 return (ibc_get_ci_failure(0));
 529         }
 530 
 531         if (resize)     {       /* there was a pointer to a handle */
 532                 mr = resize->cq_mrhdl;       /* reuse the pointer to the region */
 533                 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
 534                     sleepflag);
 535                 if (status != DDI_SUCCESS) {
 536                         HERMON_WARNING(state, "failed to deregister resize CQ "
 537                             "memory");
 538                         return (ibc_get_ci_failure(0));
 539                 }
 540         }
 541 
 542         /* Free the memory for the CQ */
 543         hermon_queue_free(&cq->cq_cqinfo);
 544         if (resize)     {
 545                 hermon_queue_free(&resize->cq_cqinfo);
 546                 /* and the temporary handle */
 547                 kmem_free(resize, sizeof (struct hermon_sw_cq_s));
 548         }
 549 
 550         /* everything else does not matter for the resize in progress */
 551 
 552         /* Free the dbr */
 553         hermon_dbr_free(state, cq->cq_uarpg, cq->cq_arm_ci_vdbr);
 554 
 555         /* Free the Hermon Completion Queue handle */
 556         hermon_rsrc_free(state, &rsrc);
 557 
 558         /* Free up the CQC entry resource */
 559         hermon_rsrc_free(state, &cqc);
 560 
 561         /* Decrement the reference count on the protection domain (PD) */
 562         hermon_pd_refcnt_dec(pd);
 563 
 564         /* Set the cqhdl pointer to NULL and return success */
 565         *cqhdl = NULL;
 566 
 567         return (DDI_SUCCESS);
 568 }
 569 
 570 
 571 /*
 572  * hermon_cq_resize()
 573  *    Context: Can be called only from user or kernel context.
 574  */
 575 int
 576 hermon_cq_resize(hermon_state_t *state, hermon_cqhdl_t cq, uint_t req_size,
 577     uint_t *actual_size, uint_t sleepflag)
 578 {
 579         hermon_hw_cqc_t         cqc_entry;
 580         hermon_cqhdl_t          resize_hdl;
 581         hermon_qalloc_info_t    new_cqinfo;
 582         ibt_mr_attr_t           mr_attr;
 583         hermon_mr_options_t     op;
 584         hermon_pdhdl_t          pd;
 585         hermon_mrhdl_t          mr;
 586         hermon_hw_cqe_t         *buf;
 587         uint32_t                new_prod_indx;
 588         uint_t                  log_cq_size;
 589         int                     status, flag;
 590 
 591         if (cq->cq_resize_hdl != 0) {        /* already in process */
 592                 status = IBT_CQ_BUSY;
 593                 goto cqresize_fail;
 594         }
 595 
 596 
 597         /* Use the internal protection domain (PD) for CQs */
 598         pd = state->hs_pdhdl_internal;
 599 
 600         /*
 601          * Calculate the appropriate size for the new resized completion queue.
 602          * Note:  All Hermon CQs must be a power-of-2 minus 1 in size.  Also
 603          * they may not be any smaller than HERMON_CQ_MIN_SIZE.  This step is
 604          * to round the requested size up to the next highest power-of-2
 605          */
 606         req_size = max(req_size, HERMON_CQ_MIN_SIZE);
 607         log_cq_size = highbit(req_size);
 608 
 609         /*
 610          * Next we verify that the rounded-up size is valid (i.e. consistent
 611          * with the device limits and/or software-configured limits)
 612          */
 613         if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
 614                 status = IBT_HCA_CQ_EXCEEDED;
 615                 goto cqresize_fail;
 616         }
 617 
 618         /*
 619          * Allocate the memory for newly resized Completion Queue.
 620          *
 621          * Note: Although we use the common queue allocation routine, we
 622          * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
 623          * kernel system memory) for kernel CQs because it would be
 624          * inefficient to have CQs located in DDR memory.  This is the same
 625          * as we do when we first allocate completion queues primarily
 626          * because CQs are read from (by software) more than they are written
 627          * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
 628          * user-mappable CQs for a similar reason.)
 629          * It is also worth noting that, unlike Hermon QP work queues,
 630          * completion queues do not have the same strict alignment
 631          * requirements.  It is sufficient for the CQ memory to be both
 632          * aligned to and bound to addresses which are a multiple of CQE size.
 633          */
 634 
 635         /* first, alloc the resize_handle */
 636         resize_hdl = kmem_zalloc(sizeof (struct hermon_sw_cq_s), KM_SLEEP);
 637 
 638         new_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
 639         new_cqinfo.qa_alloc_align = PAGESIZE;
 640         new_cqinfo.qa_bind_align  = PAGESIZE;
 641         if (cq->cq_is_umap) {
 642                 new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
 643         } else {
 644                 new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
 645         }
 646         status = hermon_queue_alloc(state, &new_cqinfo, sleepflag);
 647         if (status != DDI_SUCCESS) {
 648                 /* free the resize handle */
 649                 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
 650                 status = IBT_INSUFF_RESOURCE;
 651                 goto cqresize_fail;
 652         }
 653         buf = (hermon_hw_cqe_t *)new_cqinfo.qa_buf_aligned;
 654         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
 655 
 656         /*
 657          * No initialization of the cq is needed - the command will do it
 658          */
 659 
 660         /*
 661          * Register the memory for the CQ.  The memory for the CQ must
 662          * be registered in the Hermon TPT tables.  This gives us the LKey
 663          * to specify in the CQ context below.
 664          */
 665         flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
 666         mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
 667         mr_attr.mr_len   = new_cqinfo.qa_size;
 668         mr_attr.mr_as    = NULL;
 669         mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
 670         op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
 671         op.mro_bind_dmahdl = new_cqinfo.qa_dmahdl;
 672         op.mro_bind_override_addr = 0;
 673         status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
 674             HERMON_CQ_CMPT);
 675         if (status != DDI_SUCCESS) {
 676                 hermon_queue_free(&new_cqinfo);
 677                 /* free the resize handle */
 678                 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
 679                 status = IBT_INSUFF_RESOURCE;
 680                 goto cqresize_fail;
 681         }
 682         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
 683 
 684         /*
 685          * Now we grab the CQ lock.  Since we will be updating the actual
 686          * CQ location and the producer/consumer indexes, we should hold
 687          * the lock.
 688          *
 689          * We do a ARBEL_NOSLEEP here (and below), though, because we are
 690          * holding the "cq_lock" and if we got raised to interrupt level
 691          * by priority inversion, we would not want to block in this routine
 692          * waiting for success.
 693          */
 694         mutex_enter(&cq->cq_lock);
 695 
 696         /*
 697          * Fill in the CQC entry.  For the resize operation this is the
 698          * final step before attempting the resize operation on the CQC entry.
 699          * We use all of the information collected/calculated above to fill
 700          * in the requisite portions of the CQC.
 701          */
 702         bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
 703         cqc_entry.log_cq_sz     = log_cq_size;
 704         cqc_entry.pg_offs       = new_cqinfo.qa_pgoffs >> 5;
 705         cqc_entry.log2_pgsz     = mr->mr_log2_pgsz;
 706         cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
 707         cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
 708 
 709         /*
 710          * Write the CQC entry to hardware.  Lastly, we pass ownership of
 711          * the entry to the hardware (using the Hermon RESIZE_CQ firmware
 712          * command).  Note: In general, this operation shouldn't fail.  But
 713          * if it does, we have to undo everything we've done above before
 714          * returning error.  Also note that the status returned may indicate
 715          * the code to return to the IBTF.
 716          */
 717         status = hermon_resize_cq_cmd_post(state, &cqc_entry, cq->cq_cqnum,
 718             &new_prod_indx, HERMON_CMD_NOSLEEP_SPIN);
 719         if (status != HERMON_CMD_SUCCESS) {
 720                 /* Resize attempt has failed, drop CQ lock and cleanup */
 721                 mutex_exit(&cq->cq_lock);
 722                 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
 723                     sleepflag) != DDI_SUCCESS) {
 724                         HERMON_WARNING(state, "failed to deregister CQ memory");
 725                 }
 726                 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
 727                 hermon_queue_free(&new_cqinfo);
 728                 if (status == HERMON_CMD_BAD_SIZE) {
 729                         return (IBT_CQ_SZ_INSUFFICIENT);
 730                 } else {
 731                         cmn_err(CE_CONT, "Hermon: RESIZE_CQ command failed: "
 732                             "%08x\n", status);
 733                         if (status == HERMON_CMD_INVALID_STATUS) {
 734                                 hermon_fm_ereport(state, HCA_SYS_ERR,
 735                                     HCA_ERR_SRV_LOST);
 736                         }
 737                         return (ibc_get_ci_failure(0));
 738                 }
 739         }
 740 
 741         /*
 742          * For Hermon, we've alloc'd another handle structure and save off the
 743          * important things in it. Then, in polling we check to see if there's
 744          * a "resizing handle" and if so we look for the "special CQE", opcode
 745          * 0x16, that indicates the transition to the new buffer.
 746          *
 747          * At that point, we'll adjust everything - including dereg and
 748          * freeing of the original buffer, updating all the necessary fields
 749          * in the cq_hdl, and setting up for the next cqe polling
 750          */
 751 
 752         resize_hdl->cq_buf   = buf;
 753         resize_hdl->cq_bufsz = (1 << log_cq_size);
 754         resize_hdl->cq_mrhdl = mr;
 755         resize_hdl->cq_log_cqsz = log_cq_size;
 756 
 757         bcopy(&new_cqinfo, &(resize_hdl->cq_cqinfo),
 758             sizeof (struct hermon_qalloc_info_s));
 759 
 760         /* now, save the address in the cq_handle */
 761         cq->cq_resize_hdl = resize_hdl;
 762 
 763         /*
 764          * Drop the CQ lock now.
 765          */
 766 
 767         mutex_exit(&cq->cq_lock);
 768         /*
 769          * Fill in the return arguments (if necessary).  This includes the
 770          * real new completion queue size.
 771          */
 772         if (actual_size != NULL) {
 773                 *actual_size = (1 << log_cq_size) - 1;
 774         }
 775 
 776         return (DDI_SUCCESS);
 777 
 778 cqresize_fail:
 779         return (status);
 780 }
 781 
 782 
 783 /*
 784  * hermon_cq_modify()
 785  *    Context: Can be called base context.
 786  */
 787 /* ARGSUSED */
 788 int
 789 hermon_cq_modify(hermon_state_t *state, hermon_cqhdl_t cq,
 790     uint_t count, uint_t usec, ibt_cq_handler_id_t hid, uint_t sleepflag)
 791 {
 792         int     status;
 793         hermon_hw_cqc_t         cqc_entry;
 794 
 795         mutex_enter(&cq->cq_lock);
 796         if (count != cq->cq_intmod_count ||
 797             usec != cq->cq_intmod_usec) {
 798                 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
 799                 cqc_entry.cq_max_cnt = count;
 800                 cqc_entry.cq_period = usec;
 801                 status = hermon_modify_cq_cmd_post(state, &cqc_entry,
 802                     cq->cq_cqnum, MODIFY_MODERATION_CQ, sleepflag);
 803                 if (status != HERMON_CMD_SUCCESS) {
 804                         mutex_exit(&cq->cq_lock);
 805                         cmn_err(CE_CONT, "Hermon: MODIFY_MODERATION_CQ "
 806                             "command failed: %08x\n", status);
 807                         if (status == HERMON_CMD_INVALID_STATUS) {
 808                                 hermon_fm_ereport(state, HCA_SYS_ERR,
 809                                     HCA_ERR_SRV_LOST);
 810                         }
 811                         return (ibc_get_ci_failure(0));
 812                 }
 813                 cq->cq_intmod_count = count;
 814                 cq->cq_intmod_usec = usec;
 815         }
 816         if (hid && (hid - 1 != cq->cq_eqnum)) {
 817                 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
 818                 cqc_entry.c_eqn = HERMON_HID_TO_EQNUM(state, hid);
 819                 status = hermon_modify_cq_cmd_post(state, &cqc_entry,
 820                     cq->cq_cqnum, MODIFY_EQN, sleepflag);
 821                 if (status != HERMON_CMD_SUCCESS) {
 822                         mutex_exit(&cq->cq_lock);
 823                         cmn_err(CE_CONT, "Hermon: MODIFY_EQN command failed: "
 824                             "%08x\n", status);
 825                         if (status == HERMON_CMD_INVALID_STATUS) {
 826                                 hermon_fm_ereport(state, HCA_SYS_ERR,
 827                                     HCA_ERR_SRV_LOST);
 828                         }
 829                         return (ibc_get_ci_failure(0));
 830                 }
 831                 cq->cq_eqnum = hid - 1;
 832         }
 833         mutex_exit(&cq->cq_lock);
 834         return (DDI_SUCCESS);
 835 }
 836 
 837 /*
 838  * hermon_cq_notify()
 839  *    Context: Can be called from interrupt or base context.
 840  */
 841 int
 842 hermon_cq_notify(hermon_state_t *state, hermon_cqhdl_t cq,
 843     ibt_cq_notify_flags_t flags)
 844 {
 845         uint_t  cmd;
 846         ibt_status_t status;
 847 
 848         /* Validate IBT flags and call doorbell routine. */
 849         if (flags == IBT_NEXT_COMPLETION) {
 850                 cmd = HERMON_CQDB_NOTIFY_CQ;
 851         } else if (flags == IBT_NEXT_SOLICITED) {
 852                 cmd = HERMON_CQDB_NOTIFY_CQ_SOLICIT;
 853         } else {
 854                 return (IBT_CQ_NOTIFY_TYPE_INVALID);
 855         }
 856 
 857         status = hermon_cq_arm_doorbell(state, cq, cmd);
 858         return (status);
 859 }
 860 
 861 
 862 /*
 863  * hermon_cq_poll()
 864  *    Context: Can be called from interrupt or base context.
 865  */
 866 int
 867 hermon_cq_poll(hermon_state_t *state, hermon_cqhdl_t cq, ibt_wc_t *wc_p,
 868     uint_t num_wc, uint_t *num_polled)
 869 {
 870         hermon_hw_cqe_t *cqe;
 871         uint_t          opcode;
 872         uint32_t        cons_indx, wrap_around_mask, shift, mask;
 873         uint32_t        polled_cnt, spec_op = 0;
 874         int             status;
 875 
 876         /*
 877          * Check for user-mappable CQ memory.  Note:  We do not allow kernel
 878          * clients to poll CQ memory that is accessible directly by the user.
 879          * If the CQ memory is user accessible, then return an error.
 880          */
 881         if (cq->cq_is_umap) {
 882                 return (IBT_CQ_HDL_INVALID);
 883         }
 884 
 885         mutex_enter(&cq->cq_lock);
 886 
 887         /* Get the consumer index */
 888         cons_indx = cq->cq_consindx;
 889         shift = cq->cq_log_cqsz;
 890         mask = cq->cq_bufsz;
 891 
 892         /*
 893          * Calculate the wrap around mask.  Note: This operation only works
 894          * because all Hermon completion queues have power-of-2 sizes
 895          */
 896         wrap_around_mask = (cq->cq_bufsz - 1);
 897 
 898         /* Calculate the pointer to the first CQ entry */
 899         cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
 900 
 901         /*
 902          * Keep pulling entries from the CQ until we find an entry owned by
 903          * the hardware.  As long as there the CQE's owned by SW, process
 904          * each entry by calling hermon_cq_cqe_consume() and updating the CQ
 905          * consumer index.  Note:  We only update the consumer index if
 906          * hermon_cq_cqe_consume() returns HERMON_CQ_SYNC_AND_DB.  Otherwise,
 907          * it indicates that we are going to "recycle" the CQE (probably
 908          * because it is a error CQE and corresponds to more than one
 909          * completion).
 910          */
 911         polled_cnt = 0;
 912         while (HERMON_CQE_OWNER_IS_SW(cq, cqe, cons_indx, shift, mask)) {
 913                 if (cq->cq_resize_hdl != 0) {        /* in midst of resize */
 914                         /* peek at the opcode */
 915                         opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
 916                         if (opcode == HERMON_CQE_RCV_RESIZE_CODE) {
 917                                 hermon_cq_resize_helper(state, cq);
 918 
 919                                 /* Increment the consumer index */
 920                                 cons_indx = (cons_indx + 1);
 921                                 spec_op = 1; /* plus one for the limiting CQE */
 922 
 923                                 wrap_around_mask = (cq->cq_bufsz - 1);
 924 
 925                                 /* Update the pointer to the next CQ entry */
 926                                 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
 927 
 928                                 continue;
 929                         }
 930                 }       /* in resizing CQ */
 931 
 932                 /*
 933                  * either resizing and not the special opcode, or
 934                  * not resizing at all
 935                  */
 936                 hermon_cq_cqe_consume(state, cq, cqe, &wc_p[polled_cnt++]);
 937 
 938                 /* Increment the consumer index */
 939                 cons_indx = (cons_indx + 1);
 940 
 941                 /* Update the pointer to the next CQ entry */
 942                 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
 943 
 944                 /*
 945                  * If we have run out of space to store work completions,
 946                  * then stop and return the ones we have pulled of the CQ.
 947                  */
 948                 if (polled_cnt >= num_wc) {
 949                         break;
 950                 }
 951         }
 952 
 953         /*
 954          * Now we only ring the doorbell (to update the consumer index) if
 955          * we've actually consumed a CQ entry.
 956          */
 957         if ((polled_cnt != 0) && (cq->cq_consindx != cons_indx)) {
 958                 /*
 959                  * Update the consumer index in both the CQ handle and the
 960                  * doorbell record.
 961                  */
 962                 cq->cq_consindx = cons_indx;
 963                 hermon_cq_update_ci_doorbell(cq);
 964 
 965         } else if (polled_cnt == 0) {
 966                 if (spec_op != 0) {
 967                         /* if we got the special opcode, update the consindx */
 968                         cq->cq_consindx = cons_indx;
 969                         hermon_cq_update_ci_doorbell(cq);
 970                 }
 971         }
 972 
 973         mutex_exit(&cq->cq_lock);
 974 
 975         /* Set "num_polled" (if necessary) */
 976         if (num_polled != NULL) {
 977                 *num_polled = polled_cnt;
 978         }
 979 
 980         /* Set CQ_EMPTY condition if needed, otherwise return success */
 981         if (polled_cnt == 0) {
 982                 status = IBT_CQ_EMPTY;
 983         } else {
 984                 status = DDI_SUCCESS;
 985         }
 986 
 987         /*
 988          * Check if the system is currently panicking.  If it is, then call
 989          * the Hermon interrupt service routine.  This step is necessary here
 990          * because we might be in a polled I/O mode and without the call to
 991          * hermon_isr() - and its subsequent calls to poll and rearm each
 992          * event queue - we might overflow our EQs and render the system
 993          * unable to sync/dump.
 994          */
 995         if (ddi_in_panic() != 0) {
 996                 (void) hermon_isr((caddr_t)state, (caddr_t)NULL);
 997         }
 998         return (status);
 999 }
1000 
1001 /*
1002  *      cmd_sn must be initialized to 1 to enable proper reenabling
1003  *      by hermon_arm_cq_dbr_update().
1004  */
1005 static void
1006 hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr)
1007 {
1008         uint32_t *target;
1009 
1010         target = (uint32_t *)cq_arm_dbr + 1;
1011         *target = htonl(1 << HERMON_CQDB_CMDSN_SHIFT);
1012 }
1013 
1014 
1015 /*
1016  *      User cmd_sn needs help from this kernel function to know
1017  *      when it should be incremented (modulo 4).  We do an atomic
1018  *      update of the arm_cq dbr to communicate this fact.  We retry
1019  *      in the case that user library is racing with us.  We zero
1020  *      out the cmd field so that the user library can use the cmd
1021  *      field to track the last command it issued (solicited verses any).
1022  */
1023 static void
1024 hermon_arm_cq_dbr_update(hermon_dbr_t *cq_arm_dbr)
1025 {
1026         uint32_t tmp, cmp, new;
1027         uint32_t old_cmd_sn, new_cmd_sn;
1028         uint32_t *target;
1029         int retries = 0;
1030 
1031         target = (uint32_t *)cq_arm_dbr + 1;
1032 retry:
1033         cmp = *target;
1034         tmp = htonl(cmp);
1035         old_cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1036         new_cmd_sn = (old_cmd_sn + (0x1 << HERMON_CQDB_CMDSN_SHIFT)) &
1037             (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1038         new = htonl((tmp & ~(0x37 << HERMON_CQDB_CMD_SHIFT)) | new_cmd_sn);
1039         tmp = atomic_cas_32(target, cmp, new);
1040         if (tmp != cmp) {       /* cas failed, so need to retry */
1041                 drv_usecwait(retries & 0xff);   /* avoid race */
1042                 if (++retries > 100000) {
1043                         cmn_err(CE_CONT, "cas failed in hermon\n");
1044                         retries = 0;
1045                 }
1046                 goto retry;
1047         }
1048 }
1049 
1050 
1051 /*
1052  * hermon_cq_handler()
1053  *    Context: Only called from interrupt context
1054  */
1055 /* ARGSUSED */
1056 int
1057 hermon_cq_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1058     hermon_hw_eqe_t *eqe)
1059 {
1060         hermon_cqhdl_t          cq;
1061         uint_t                  cqnum;
1062 
1063         /* Get the CQ handle from CQ number in event descriptor */
1064         cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1065         cq = hermon_cqhdl_from_cqnum(state, cqnum);
1066 
1067         /*
1068          * If the CQ handle is NULL, this is probably an indication
1069          * that the CQ has been freed already.  In which case, we
1070          * should not deliver this event.
1071          *
1072          * We also check that the CQ number in the handle is the
1073          * same as the CQ number in the event queue entry.  This
1074          * extra check allows us to handle the case where a CQ was
1075          * freed and then allocated again in the time it took to
1076          * handle the event queue processing.  By constantly incrementing
1077          * the non-constrained portion of the CQ number every time
1078          * a new CQ is allocated, we mitigate (somewhat) the chance
1079          * that a stale event could be passed to the client's CQ
1080          * handler.
1081          *
1082          * Lastly, we check if "hs_ibtfpriv" is NULL.  If it is then it
1083          * means that we've have either received this event before we
1084          * finished attaching to the IBTF or we've received it while we
1085          * are in the process of detaching.
1086          */
1087         if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1088             (state->hs_ibtfpriv != NULL)) {
1089                 hermon_arm_cq_dbr_update(cq->cq_arm_ci_vdbr);
1090                 HERMON_DO_IBTF_CQ_CALLB(state, cq);
1091         }
1092 
1093         return (DDI_SUCCESS);
1094 }
1095 
1096 
1097 /*
1098  * hermon_cq_err_handler()
1099  *    Context: Only called from interrupt context
1100  */
1101 /* ARGSUSED */
1102 int
1103 hermon_cq_err_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1104     hermon_hw_eqe_t *eqe)
1105 {
1106         hermon_cqhdl_t          cq;
1107         uint_t                  cqnum;
1108         ibc_async_event_t       event;
1109         ibt_async_code_t        type;
1110 
1111         HERMON_FMANOTE(state, HERMON_FMA_OVERRUN);
1112         /* Get the CQ handle from CQ number in event descriptor */
1113         cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1114         cq = hermon_cqhdl_from_cqnum(state, cqnum);
1115 
1116         /*
1117          * If the CQ handle is NULL, this is probably an indication
1118          * that the CQ has been freed already.  In which case, we
1119          * should not deliver this event.
1120          *
1121          * We also check that the CQ number in the handle is the
1122          * same as the CQ number in the event queue entry.  This
1123          * extra check allows us to handle the case where a CQ was
1124          * freed and then allocated again in the time it took to
1125          * handle the event queue processing.  By constantly incrementing
1126          * the non-constrained portion of the CQ number every time
1127          * a new CQ is allocated, we mitigate (somewhat) the chance
1128          * that a stale event could be passed to the client's CQ
1129          * handler.
1130          *
1131          * And then we check if "hs_ibtfpriv" is NULL.  If it is then it
1132          * means that we've have either received this event before we
1133          * finished attaching to the IBTF or we've received it while we
1134          * are in the process of detaching.
1135          */
1136         if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1137             (state->hs_ibtfpriv != NULL)) {
1138                 event.ev_cq_hdl = (ibt_cq_hdl_t)cq->cq_hdlrarg;
1139                 type            = IBT_ERROR_CQ;
1140                 HERMON_DO_IBTF_ASYNC_CALLB(state, type, &event);
1141         }
1142 
1143         return (DDI_SUCCESS);
1144 }
1145 
1146 
1147 /*
1148  * hermon_cq_refcnt_inc()
1149  *    Context: Can be called from interrupt or base context.
1150  */
1151 int
1152 hermon_cq_refcnt_inc(hermon_cqhdl_t cq, uint_t is_special)
1153 {
1154         /*
1155          * Increment the completion queue's reference count.  Note: In order
1156          * to ensure compliance with IBA C11-15, we must ensure that a given
1157          * CQ is not used for both special (SMI/GSI) QP and non-special QP.
1158          * This is accomplished here by keeping track of how the referenced
1159          * CQ is being used.
1160          */
1161         mutex_enter(&cq->cq_lock);
1162         if (cq->cq_refcnt == 0) {
1163                 cq->cq_is_special = is_special;
1164         } else {
1165                 if (cq->cq_is_special != is_special) {
1166                         mutex_exit(&cq->cq_lock);
1167                         return (DDI_FAILURE);
1168                 }
1169         }
1170         cq->cq_refcnt++;
1171         mutex_exit(&cq->cq_lock);
1172         return (DDI_SUCCESS);
1173 }
1174 
1175 
1176 /*
1177  * hermon_cq_refcnt_dec()
1178  *    Context: Can be called from interrupt or base context.
1179  */
1180 void
1181 hermon_cq_refcnt_dec(hermon_cqhdl_t cq)
1182 {
1183         /* Decrement the completion queue's reference count */
1184         mutex_enter(&cq->cq_lock);
1185         cq->cq_refcnt--;
1186         mutex_exit(&cq->cq_lock);
1187 }
1188 
1189 
1190 /*
1191  * hermon_cq_arm_doorbell()
1192  *    Context: Can be called from interrupt or base context.
1193  */
1194 static int
1195 hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq, uint_t cq_cmd)
1196 {
1197         uint32_t        cq_num;
1198         uint32_t        *target;
1199         uint32_t        old_cmd, cmp, new, tmp, cmd_sn;
1200         ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
1201 
1202         /* initialize the FMA retry loop */
1203         hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
1204 
1205         cq_num = cq->cq_cqnum;
1206         target = (uint32_t *)cq->cq_arm_ci_vdbr + 1;
1207 
1208         /* the FMA retry loop starts for Hermon doorbell register. */
1209         hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1210             fm_test_num);
1211 retry:
1212         cmp = *target;
1213         tmp = htonl(cmp);
1214         old_cmd = tmp & (0x7 << HERMON_CQDB_CMD_SHIFT);
1215         cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1216         if (cq_cmd == HERMON_CQDB_NOTIFY_CQ) {
1217                 if (old_cmd != HERMON_CQDB_NOTIFY_CQ) {
1218                         cmd_sn |= (HERMON_CQDB_NOTIFY_CQ <<
1219                             HERMON_CQDB_CMD_SHIFT);
1220                         new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1221                         tmp = atomic_cas_32(target, cmp, new);
1222                         if (tmp != cmp)
1223                                 goto retry;
1224                         HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1225                             &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1226                             32) | (cq->cq_consindx & 0xFFFFFF));
1227                 } /* else it's already armed */
1228         } else {
1229                 ASSERT(cq_cmd == HERMON_CQDB_NOTIFY_CQ_SOLICIT);
1230                 if (old_cmd != HERMON_CQDB_NOTIFY_CQ &&
1231                     old_cmd != HERMON_CQDB_NOTIFY_CQ_SOLICIT) {
1232                         cmd_sn |= (HERMON_CQDB_NOTIFY_CQ_SOLICIT <<
1233                             HERMON_CQDB_CMD_SHIFT);
1234                         new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1235                         tmp = atomic_cas_32(target, cmp, new);
1236                         if (tmp != cmp)
1237                                 goto retry;
1238                         HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1239                             &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1240                             32) | (cq->cq_consindx & 0xFFFFFF));
1241                 } /* else it's already armed */
1242         }
1243 
1244         /* the FMA retry loop ends. */
1245         hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1246             fm_test_num);
1247 
1248         return (IBT_SUCCESS);
1249 
1250 pio_error:
1251         hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1252         return (ibc_get_ci_failure(0));
1253 }
1254 
1255 
1256 /*
1257  * hermon_cqhdl_from_cqnum()
1258  *    Context: Can be called from interrupt or base context.
1259  *
1260  *    This routine is important because changing the unconstrained
1261  *    portion of the CQ number is critical to the detection of a
1262  *    potential race condition in the CQ handler code (i.e. the case
1263  *    where a CQ is freed and alloc'd again before an event for the
1264  *    "old" CQ can be handled).
1265  *
1266  *    While this is not a perfect solution (not sure that one exists)
1267  *    it does help to mitigate the chance that this race condition will
1268  *    cause us to deliver a "stale" event to the new CQ owner.  Note:
1269  *    this solution does not scale well because the number of constrained
1270  *    bits increases (and, hence, the number of unconstrained bits
1271  *    decreases) as the number of supported CQs grows.  For small and
1272  *    intermediate values, it should hopefully provide sufficient
1273  *    protection.
1274  */
1275 hermon_cqhdl_t
1276 hermon_cqhdl_from_cqnum(hermon_state_t *state, uint_t cqnum)
1277 {
1278         uint_t  cqindx, cqmask;
1279 
1280         /* Calculate the CQ table index from the cqnum */
1281         cqmask = (1 << state->hs_cfg_profile->cp_log_num_cq) - 1;
1282         cqindx = cqnum & cqmask;
1283         return (hermon_icm_num_to_hdl(state, HERMON_CQC, cqindx));
1284 }
1285 
1286 /*
1287  * hermon_cq_cqe_consume()
1288  *    Context: Can be called from interrupt or base context.
1289  */
1290 static void
1291 hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1292     hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1293 {
1294         uint_t          opcode, qpnum, qp1_indx;
1295         ibt_wc_flags_t  flags;
1296         ibt_wrc_opcode_t type;
1297 
1298         /*
1299          * Determine if this is an "error" CQE by examining "opcode".  If it
1300          * is an error CQE, then call hermon_cq_errcqe_consume() and return
1301          * whatever status it returns.  Otherwise, this is a successful
1302          * completion.
1303          */
1304         opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
1305         if ((opcode == HERMON_CQE_SEND_ERR_OPCODE) ||
1306             (opcode == HERMON_CQE_RECV_ERR_OPCODE)) {
1307                 hermon_cq_errcqe_consume(state, cq, cqe, wc);
1308                 return;
1309         }
1310 
1311         /*
1312          * Fetch the Work Request ID using the information in the CQE.
1313          * See hermon_wr.c for more details.
1314          */
1315         wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1316 
1317         /*
1318          * Parse the CQE opcode to determine completion type.  This will set
1319          * not only the type of the completion, but also any flags that might
1320          * be associated with it (e.g. whether immediate data is present).
1321          */
1322         flags = IBT_WC_NO_FLAGS;
1323         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->hs_fcoib_may_be_running))
1324         if (HERMON_CQE_SENDRECV_GET(cq, cqe) != HERMON_COMPLETION_RECV) {
1325 
1326                 /* Send CQE */
1327                 switch (opcode) {
1328                 case HERMON_CQE_SND_RDMAWR_IMM:
1329                 case HERMON_CQE_SND_RDMAWR:
1330                         type = IBT_WRC_RDMAW;
1331                         break;
1332 
1333                 case HERMON_CQE_SND_SEND_INV:
1334                 case HERMON_CQE_SND_SEND_IMM:
1335                 case HERMON_CQE_SND_SEND:
1336                         type = IBT_WRC_SEND;
1337                         break;
1338 
1339                 case HERMON_CQE_SND_LSO:
1340                         type = IBT_WRC_SEND_LSO;
1341                         break;
1342 
1343                 case HERMON_CQE_SND_RDMARD:
1344                         type = IBT_WRC_RDMAR;
1345                         break;
1346 
1347                 case HERMON_CQE_SND_ATOMIC_CS:
1348                         type = IBT_WRC_CSWAP;
1349                         break;
1350 
1351                 case HERMON_CQE_SND_ATOMIC_FA:
1352                         type = IBT_WRC_FADD;
1353                         break;
1354 
1355                 case HERMON_CQE_SND_BIND_MW:
1356                         type = IBT_WRC_BIND;
1357                         break;
1358 
1359                 case HERMON_CQE_SND_FRWR:
1360                         type = IBT_WRC_FAST_REG_PMR;
1361                         break;
1362 
1363                 case HERMON_CQE_SND_LCL_INV:
1364                         type = IBT_WRC_LOCAL_INVALIDATE;
1365                         break;
1366 
1367                 default:
1368                         HERMON_WARNING(state, "unknown send CQE type");
1369                         wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1370                         return;
1371                 }
1372         } else if ((state->hs_fcoib_may_be_running == B_TRUE) &&
1373             hermon_fcoib_is_fexch_qpn(state, HERMON_CQE_QPNUM_GET(cq, cqe))) {
1374                 type = IBT_WRC_RECV;
1375                 if (HERMON_CQE_FEXCH_DIFE(cq, cqe))
1376                         flags |= IBT_WC_DIF_ERROR;
1377                 wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1378                 wc->wc_fexch_seq_cnt = HERMON_CQE_FEXCH_SEQ_CNT(cq, cqe);
1379                 wc->wc_fexch_tx_bytes_xfer = HERMON_CQE_FEXCH_TX_BYTES(cq, cqe);
1380                 wc->wc_fexch_rx_bytes_xfer = HERMON_CQE_FEXCH_RX_BYTES(cq, cqe);
1381                 wc->wc_fexch_seq_id = HERMON_CQE_FEXCH_SEQ_ID(cq, cqe);
1382                 wc->wc_detail = HERMON_CQE_FEXCH_DETAIL(cq, cqe) &
1383                     IBT_WC_DETAIL_FC_MATCH_MASK;
1384                 wc->wc_rkey = HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1385                 flags |= IBT_WC_FEXCH_FMT | IBT_WC_RKEY_INVALIDATED;
1386         } else {
1387                 /*
1388                  * Parse the remaining contents of the CQE into the work
1389                  * completion.  This means filling in SL, QP number, SLID,
1390                  * immediate data, etc.
1391                  *
1392                  * Note: Not all of these fields are valid in a given
1393                  * completion.  Many of them depend on the actual type of
1394                  * completion.  So we fill in all of the fields and leave
1395                  * it up to the IBTF and consumer to sort out which are
1396                  * valid based on their context.
1397                  */
1398                 wc->wc_sl      = HERMON_CQE_SL_GET(cq, cqe);
1399                 wc->wc_qpn     = HERMON_CQE_DQPN_GET(cq, cqe);
1400                 wc->wc_slid    = HERMON_CQE_DLID_GET(cq, cqe);
1401                 wc->wc_immed_data =
1402                     HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1403                 wc->wc_ethertype  = (wc->wc_immed_data & 0xFFFF);
1404                 wc->wc_pkey_ix         = (wc->wc_immed_data &
1405                     ((1 << state->hs_queryport.log_max_pkey) - 1));
1406                 /*
1407                  * Fill in "bytes transferred" as appropriate.  Also,
1408                  * if necessary, fill in the "path bits" field.
1409                  */
1410                 wc->wc_path_bits = HERMON_CQE_PATHBITS_GET(cq, cqe);
1411                 wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1412 
1413                 /*
1414                  * Check for GRH, update the flags, then fill in "wc_flags"
1415                  * field in the work completion
1416                  */
1417                 if (HERMON_CQE_GRH_GET(cq, cqe) != 0) {
1418                         flags |= IBT_WC_GRH_PRESENT;
1419                 }
1420 
1421                 /* Receive CQE */
1422                 switch (opcode) {
1423                 case HERMON_CQE_RCV_SEND_IMM:
1424                         /*
1425                          * Note:  According to the PRM, all QP1 recv
1426                          * completions look like the result of a Send with
1427                          * Immediate.  They are not, however, (MADs are Send
1428                          * Only) so we need to check the QP number and set
1429                          * the flag only if it is non-QP1.
1430                          */
1431                         qpnum    = HERMON_CQE_QPNUM_GET(cq, cqe);
1432                         qp1_indx = state->hs_spec_qp1->hr_indx;
1433                         if ((qpnum < qp1_indx) || (qpnum > qp1_indx + 1)) {
1434                                 flags |= IBT_WC_IMMED_DATA_PRESENT;
1435                         }
1436                         /* FALLTHROUGH */
1437 
1438                 case HERMON_CQE_RCV_SEND:
1439                         type = IBT_WRC_RECV;
1440                         if (HERMON_CQE_IS_IPOK(cq, cqe)) {
1441                                 wc->wc_cksum = HERMON_CQE_CKSUM(cq, cqe);
1442                                 flags |= IBT_WC_CKSUM_OK;
1443                                 wc->wc_detail = IBT_WC_DETAIL_ALL_FLAGS_MASK &
1444                                     HERMON_CQE_IPOIB_STATUS(cq, cqe);
1445                         }
1446                         break;
1447 
1448                 case HERMON_CQE_RCV_SEND_INV:
1449                         type = IBT_WRC_RECV;
1450                         flags |= IBT_WC_RKEY_INVALIDATED;
1451                         wc->wc_rkey = wc->wc_immed_data; /* same field in cqe */
1452                         break;
1453 
1454                 case HERMON_CQE_RCV_RDMAWR_IMM:
1455                         flags |= IBT_WC_IMMED_DATA_PRESENT;
1456                         type = IBT_WRC_RECV_RDMAWI;
1457                         break;
1458 
1459                 default:
1460 
1461                         HERMON_WARNING(state, "unknown recv CQE type");
1462                         wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1463                         return;
1464                 }
1465         }
1466         wc->wc_type = type;
1467         wc->wc_flags = flags;
1468         wc->wc_status = IBT_WC_SUCCESS;
1469 }
1470 
1471 /*
1472  * hermon_cq_errcqe_consume()
1473  *    Context: Can be called from interrupt or base context.
1474  */
1475 static void
1476 hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1477     hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1478 {
1479         uint32_t                imm_eth_pkey_cred;
1480         uint_t                  status;
1481         ibt_wc_status_t         ibt_status;
1482 
1483         /*
1484          * Fetch the Work Request ID using the information in the CQE.
1485          * See hermon_wr.c for more details.
1486          */
1487         wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1488 
1489         /*
1490          * Parse the CQE opcode to determine completion type.  We know that
1491          * the CQE is an error completion, so we extract only the completion
1492          * status/syndrome here.
1493          */
1494         imm_eth_pkey_cred = HERMON_CQE_ERROR_SYNDROME_GET(cq, cqe);
1495         status = imm_eth_pkey_cred;
1496         if (status != HERMON_CQE_WR_FLUSHED_ERR)
1497                 IBTF_DPRINTF_L2("CQE ERR", "cqe %p QPN %x indx %x status 0x%x  "
1498                     "vendor syndrome %x", cqe, HERMON_CQE_QPNUM_GET(cq, cqe),
1499                     HERMON_CQE_WQECNTR_GET(cq, cqe), status,
1500                     HERMON_CQE_ERROR_VENDOR_SYNDROME_GET(cq, cqe));
1501         switch (status) {
1502         case HERMON_CQE_LOC_LEN_ERR:
1503                 HERMON_WARNING(state, HERMON_FMA_LOCLEN);
1504                 ibt_status = IBT_WC_LOCAL_LEN_ERR;
1505                 break;
1506 
1507         case HERMON_CQE_LOC_OP_ERR:
1508                 HERMON_WARNING(state, HERMON_FMA_LOCQPOP);
1509                 ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1510                 break;
1511 
1512         case HERMON_CQE_LOC_PROT_ERR:
1513                 HERMON_WARNING(state, HERMON_FMA_LOCPROT);
1514                 ibt_status = IBT_WC_LOCAL_PROTECT_ERR;
1515                 IBTF_DPRINTF_L2("ERRCQE", "is at %p", cqe);
1516                 if (hermon_should_panic) {
1517                         cmn_err(CE_PANIC, "Hermon intentional PANIC - "
1518                             "Local Protection Error\n");
1519                 }
1520                 break;
1521 
1522         case HERMON_CQE_WR_FLUSHED_ERR:
1523                 ibt_status = IBT_WC_WR_FLUSHED_ERR;
1524                 break;
1525 
1526         case HERMON_CQE_MW_BIND_ERR:
1527                 HERMON_WARNING(state, HERMON_FMA_MWBIND);
1528                 ibt_status = IBT_WC_MEM_WIN_BIND_ERR;
1529                 break;
1530 
1531         case HERMON_CQE_BAD_RESPONSE_ERR:
1532                 HERMON_WARNING(state, HERMON_FMA_RESP);
1533                 ibt_status = IBT_WC_BAD_RESPONSE_ERR;
1534                 break;
1535 
1536         case HERMON_CQE_LOCAL_ACCESS_ERR:
1537                 HERMON_WARNING(state, HERMON_FMA_LOCACC);
1538                 ibt_status = IBT_WC_LOCAL_ACCESS_ERR;
1539                 break;
1540 
1541         case HERMON_CQE_REM_INV_REQ_ERR:
1542                 HERMON_WARNING(state, HERMON_FMA_REMREQ);
1543                 ibt_status = IBT_WC_REMOTE_INVALID_REQ_ERR;
1544                 break;
1545 
1546         case HERMON_CQE_REM_ACC_ERR:
1547                 HERMON_WARNING(state, HERMON_FMA_REMACC);
1548                 ibt_status = IBT_WC_REMOTE_ACCESS_ERR;
1549                 break;
1550 
1551         case HERMON_CQE_REM_OP_ERR:
1552                 HERMON_WARNING(state, HERMON_FMA_REMOP);
1553                 ibt_status = IBT_WC_REMOTE_OP_ERR;
1554                 break;
1555 
1556         case HERMON_CQE_TRANS_TO_ERR:
1557                 HERMON_WARNING(state, HERMON_FMA_XPORTCNT);
1558                 ibt_status = IBT_WC_TRANS_TIMEOUT_ERR;
1559                 break;
1560 
1561         case HERMON_CQE_RNRNAK_TO_ERR:
1562                 HERMON_WARNING(state, HERMON_FMA_RNRCNT);
1563                 ibt_status = IBT_WC_RNR_NAK_TIMEOUT_ERR;
1564                 break;
1565 
1566         /*
1567          * The following error codes are not supported in the Hermon driver
1568          * as they relate only to Reliable Datagram completion statuses:
1569          *    case HERMON_CQE_LOCAL_RDD_VIO_ERR:
1570          *    case HERMON_CQE_REM_INV_RD_REQ_ERR:
1571          *    case HERMON_CQE_EEC_REM_ABORTED_ERR:
1572          *    case HERMON_CQE_INV_EEC_NUM_ERR:
1573          *    case HERMON_CQE_INV_EEC_STATE_ERR:
1574          *    case HERMON_CQE_LOC_EEC_ERR:
1575          */
1576 
1577         default:
1578                 HERMON_WARNING(state, "unknown error CQE status");
1579                 HERMON_FMANOTE(state, HERMON_FMA_UNKN);
1580                 ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1581                 break;
1582         }
1583 
1584         wc->wc_status = ibt_status;
1585 }
1586 
1587 
1588 /*
1589  * hermon_cq_resize_helper()
1590  *    Context: Can be called only from user or kernel context.
1591  */
1592 void
1593 hermon_cq_resize_helper(hermon_state_t *state, hermon_cqhdl_t cq)
1594 {
1595         hermon_cqhdl_t          resize_hdl;
1596         int                     status;
1597 
1598         /*
1599          * we're here because we found the special cqe opcode, so we have
1600          * to update the cq_handle, release the old resources, clear the
1601          * flag in the cq_hdl, and release the resize_hdl.  When we return
1602          * above, it will take care of the rest
1603          */
1604         ASSERT(MUTEX_HELD(&cq->cq_lock));
1605 
1606         resize_hdl = cq->cq_resize_hdl;
1607 
1608         /*
1609          * Deregister the memory for the old Completion Queue.  Note: We
1610          * really can't return error here because we have no good way to
1611          * cleanup.  Plus, the deregistration really shouldn't ever happen.
1612          * So, if it does, it is an indication that something has gone
1613          * seriously wrong.  So we print a warning message and return error
1614          * (knowing, of course, that the "old" CQ memory will be leaked)
1615          */
1616         status = hermon_mr_deregister(state, &cq->cq_mrhdl, HERMON_MR_DEREG_ALL,
1617             HERMON_SLEEP);
1618         if (status != DDI_SUCCESS) {
1619                 HERMON_WARNING(state, "failed to deregister old CQ memory");
1620         }
1621 
1622         /* Next, free the memory from the old CQ buffer */
1623         hermon_queue_free(&cq->cq_cqinfo);
1624 
1625         /* now we can update the cq_hdl with the new things saved */
1626 
1627         cq->cq_buf   = resize_hdl->cq_buf;
1628         cq->cq_mrhdl = resize_hdl->cq_mrhdl;
1629         cq->cq_bufsz = resize_hdl->cq_bufsz;
1630         cq->cq_log_cqsz = resize_hdl->cq_log_cqsz;
1631         cq->cq_umap_dhp = cq->cq_resize_hdl->cq_umap_dhp;
1632         cq->cq_resize_hdl = 0;
1633         bcopy(&resize_hdl->cq_cqinfo, &cq->cq_cqinfo,
1634             sizeof (struct hermon_qalloc_info_s));
1635 
1636         /* finally, release the resizing handle */
1637         kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
1638 }
1639 
1640 
1641 /*
1642  * hermon_cq_entries_flush()
1643  * Context: Can be called from interrupt or base context.
1644  */
1645 /* ARGSUSED */
1646 void
1647 hermon_cq_entries_flush(hermon_state_t *state, hermon_qphdl_t qp)
1648 {
1649         hermon_cqhdl_t          cq;
1650         hermon_hw_cqe_t         *cqe, *next_cqe;
1651         hermon_srqhdl_t         srq;
1652         hermon_workq_hdr_t      *wq;
1653         uint32_t                cons_indx, tail_cons_indx, wrap_around_mask;
1654         uint32_t                new_indx, check_indx, qpnum;
1655         uint32_t                shift, mask;
1656         int                     outstanding_cqes;
1657 
1658         qpnum = qp->qp_qpnum;
1659         if ((srq = qp->qp_srqhdl) != NULL)
1660                 wq = qp->qp_srqhdl->srq_wq_wqhdr;
1661         else
1662                 wq = NULL;
1663         cq = qp->qp_rq_cqhdl;
1664 
1665         if (cq == NULL) {
1666                 cq = qp->qp_sq_cqhdl;
1667         }
1668 
1669 do_send_cq:     /* loop back to here if send_cq is not the same as recv_cq */
1670         if (cq == NULL)
1671                 return;
1672 
1673         cons_indx = cq->cq_consindx;
1674         shift = cq->cq_log_cqsz;
1675         mask = cq->cq_bufsz;
1676         wrap_around_mask = mask - 1;
1677 
1678         /* Calculate the pointer to the first CQ entry */
1679         cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
1680 
1681         /*
1682          * Loop through the CQ looking for entries owned by software.  If an
1683          * entry is owned by software then we increment an 'outstanding_cqes'
1684          * count to know how many entries total we have on our CQ.  We use this
1685          * value further down to know how many entries to loop through looking
1686          * for our same QP number.
1687          */
1688         outstanding_cqes = 0;
1689         tail_cons_indx = cons_indx;
1690         while (HERMON_CQE_OWNER_IS_SW(cq, cqe, tail_cons_indx, shift, mask)) {
1691                 /* increment total cqes count */
1692                 outstanding_cqes++;
1693 
1694                 /* increment the consumer index */
1695                 tail_cons_indx++;
1696 
1697                 /* update the pointer to the next cq entry */
1698                 cqe = &cq->cq_buf[tail_cons_indx & wrap_around_mask];
1699         }
1700 
1701         /*
1702          * Using the 'tail_cons_indx' that was just set, we now know how many
1703          * total CQEs possible there are.  Set the 'check_indx' and the
1704          * 'new_indx' to the last entry identified by 'tail_cons_indx'
1705          */
1706         check_indx = new_indx = (tail_cons_indx - 1);
1707 
1708         while (--outstanding_cqes >= 0) {
1709                 cqe = &cq->cq_buf[check_indx & wrap_around_mask];
1710 
1711                 /*
1712                  * If the QP number is the same in the CQE as the QP, then
1713                  * we must "consume" it.  If it is for an SRQ wqe, then we
1714                  * also must free the wqe back onto the free list of the SRQ.
1715                  */
1716                 if (qpnum == HERMON_CQE_QPNUM_GET(cq, cqe)) {
1717                         if (srq && (HERMON_CQE_SENDRECV_GET(cq, cqe) ==
1718                             HERMON_COMPLETION_RECV)) {
1719                                 uint64_t *desc;
1720                                 int indx;
1721 
1722                                 /* Add wqe back to SRQ free list */
1723                                 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) &
1724                                     wq->wq_mask;
1725                                 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail);
1726                                 ((uint16_t *)desc)[1] = htons(indx);
1727                                 wq->wq_tail = indx;
1728                         }
1729                 } else {        /* CQEs for other QPNs need to remain */
1730                         if (check_indx != new_indx) {
1731                                 next_cqe =
1732                                     &cq->cq_buf[new_indx & wrap_around_mask];
1733                                 /* Copy the CQE into the "next_cqe" pointer. */
1734                                 bcopy(cqe, next_cqe, sizeof (hermon_hw_cqe_t));
1735                         }
1736                         new_indx--;     /* move index to next CQE to fill */
1737                 }
1738                 check_indx--;           /* move index to next CQE to check */
1739         }
1740 
1741         /*
1742          * Update consumer index to be the 'new_indx'.  This moves it past all
1743          * removed entries.  Because 'new_indx' is pointing to the last
1744          * previously valid SW owned entry, we add 1 to point the cons_indx to
1745          * the first HW owned entry.
1746          */
1747         cons_indx = (new_indx + 1);
1748 
1749         /*
1750          * Now we only ring the doorbell (to update the consumer index) if
1751          * we've actually consumed a CQ entry.  If we found no QP number
1752          * matches above, then we would not have removed anything.  So only if
1753          * something was removed do we ring the doorbell.
1754          */
1755         if (cq->cq_consindx != cons_indx) {
1756                 /*
1757                  * Update the consumer index in both the CQ handle and the
1758                  * doorbell record.
1759                  */
1760                 cq->cq_consindx = cons_indx;
1761 
1762                 hermon_cq_update_ci_doorbell(cq);
1763 
1764         }
1765         if (cq != qp->qp_sq_cqhdl) {
1766                 cq = qp->qp_sq_cqhdl;
1767                 goto do_send_cq;
1768         }
1769 }
1770 
1771 /*
1772  * hermon_get_cq_sched_list()
1773  *    Context: Only called from attach() path context
1774  *
1775  * Read properties, creating entries in hs_cq_sched_list with
1776  * information about the requested "expected" and "minimum"
1777  * number of MSI-X interrupt vectors per list entry.
1778  */
1779 static int
1780 hermon_get_cq_sched_list(hermon_state_t *state)
1781 {
1782         char **listp, ulp_prop[HERMON_CQH_MAX + 4];
1783         uint_t nlist, i, j, ndata;
1784         int *data;
1785         size_t len;
1786         hermon_cq_sched_t *cq_schedp;
1787 
1788         if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, state->hs_dip,
1789             DDI_PROP_DONTPASS, "cqh-group-list", &listp, &nlist) !=
1790             DDI_PROP_SUCCESS)
1791                 return (0);
1792 
1793         state->hs_cq_sched_array_size = nlist;
1794         state->hs_cq_sched_array = cq_schedp = kmem_zalloc(nlist *
1795             sizeof (hermon_cq_sched_t), KM_SLEEP);
1796         for (i = 0; i < nlist; i++) {
1797                 if ((len = strlen(listp[i])) >= HERMON_CQH_MAX) {
1798                         cmn_err(CE_CONT, "'cqh' property name too long\n");
1799                         goto game_over;
1800                 }
1801                 for (j = 0; j < i; j++) {
1802                         if (strcmp(listp[j], listp[i]) == 0) {
1803                                 cmn_err(CE_CONT, "Duplicate 'cqh' property\n");
1804                                 goto game_over;
1805                         }
1806                 }
1807                 (void) strncpy(cq_schedp[i].cqs_name, listp[i], HERMON_CQH_MAX);
1808                 ulp_prop[0] = 'c';
1809                 ulp_prop[1] = 'q';
1810                 ulp_prop[2] = 'h';
1811                 ulp_prop[3] = '-';
1812                 (void) strncpy(ulp_prop + 4, listp[i], len + 1);
1813                 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
1814                     DDI_PROP_DONTPASS, ulp_prop, &data, &ndata) !=
1815                     DDI_PROP_SUCCESS) {
1816                         cmn_err(CE_CONT, "property '%s' not found\n", ulp_prop);
1817                         goto game_over;
1818                 }
1819                 if (ndata != 2) {
1820                         cmn_err(CE_CONT, "property '%s' does not "
1821                             "have 2 integers\n", ulp_prop);
1822                         goto game_over_free_data;
1823                 }
1824                 cq_schedp[i].cqs_desired = data[0];
1825                 cq_schedp[i].cqs_minimum = data[1];
1826                 cq_schedp[i].cqs_refcnt = 0;
1827                 ddi_prop_free(data);
1828         }
1829         if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
1830             DDI_PROP_DONTPASS, "cqh-default", &data, &ndata) !=
1831             DDI_PROP_SUCCESS) {
1832                 cmn_err(CE_CONT, "property 'cqh-default' not found\n");
1833                 goto game_over;
1834         }
1835         if (ndata != 2) {
1836                 cmn_err(CE_CONT, "property 'cqh-default' does not "
1837                     "have 2 integers\n");
1838                 goto game_over_free_data;
1839         }
1840         cq_schedp = &state->hs_cq_sched_default;
1841         cq_schedp->cqs_desired = data[0];
1842         cq_schedp->cqs_minimum = data[1];
1843         cq_schedp->cqs_refcnt = 0;
1844         ddi_prop_free(data);
1845         ddi_prop_free(listp);
1846         return (1);             /* game on */
1847 
1848 game_over_free_data:
1849         ddi_prop_free(data);
1850 game_over:
1851         cmn_err(CE_CONT, "Error in 'cqh' properties in hermon.conf\n");
1852         cmn_err(CE_CONT, "completion handler groups not being used\n");
1853         kmem_free(cq_schedp, nlist * sizeof (hermon_cq_sched_t));
1854         state->hs_cq_sched_array_size = 0;
1855         ddi_prop_free(listp);
1856         return (0);
1857 }
1858 
1859 /*
1860  * hermon_cq_sched_init()
1861  *    Context: Only called from attach() path context
1862  *
1863  * Read the hermon.conf properties looking for cq_sched info,
1864  * creating reserved pools of MSI-X interrupt ranges for the
1865  * specified ULPs.
1866  */
1867 int
1868 hermon_cq_sched_init(hermon_state_t *state)
1869 {
1870         hermon_cq_sched_t *cq_schedp, *defp;
1871         int i, desired, array_size;
1872 
1873         mutex_init(&state->hs_cq_sched_lock, NULL, MUTEX_DRIVER,
1874             DDI_INTR_PRI(state->hs_intrmsi_pri));
1875 
1876         mutex_enter(&state->hs_cq_sched_lock);
1877         state->hs_cq_sched_array = NULL;
1878 
1879         /* initialize cq_sched_default */
1880         defp = &state->hs_cq_sched_default;
1881         defp->cqs_start_hid = 1;
1882         defp->cqs_len = state->hs_intrmsi_allocd;
1883         defp->cqs_next_alloc = defp->cqs_len - 1;
1884         (void) strncpy(defp->cqs_name, "default", 8);
1885 
1886         /* Read properties to determine which ULPs use cq_sched */
1887         if (hermon_get_cq_sched_list(state) == 0)
1888                 goto done;
1889 
1890         /* Determine if we have enough vectors, or if we have to scale down */
1891         desired = defp->cqs_desired; /* default desired (from hermon.conf) */
1892         if (desired <= 0)
1893                 goto done;              /* all interrupts in the default pool */
1894         cq_schedp = state->hs_cq_sched_array;
1895         array_size = state->hs_cq_sched_array_size;
1896         for (i = 0; i < array_size; i++)
1897                 desired += cq_schedp[i].cqs_desired;
1898         if (desired > state->hs_intrmsi_allocd) {
1899                 cmn_err(CE_CONT, "#interrupts allocated (%d) is less than "
1900                     "the #interrupts desired (%d)\n",
1901                     state->hs_intrmsi_allocd, desired);
1902                 cmn_err(CE_CONT, "completion handler groups not being used\n");
1903                 goto done;              /* all interrupts in the default pool */
1904         }
1905         /* Game on.  For each cq_sched group, reserve the MSI-X range */
1906         for (i = 0; i < array_size; i++) {
1907                 desired = cq_schedp[i].cqs_desired;
1908                 cq_schedp[i].cqs_start_hid = defp->cqs_start_hid;
1909                 cq_schedp[i].cqs_len = desired;
1910                 cq_schedp[i].cqs_next_alloc = desired - 1;
1911                 defp->cqs_len -= desired;
1912                 defp->cqs_start_hid += desired;
1913         }
1914         /* reset default's start allocation seed */
1915         state->hs_cq_sched_default.cqs_next_alloc =
1916             state->hs_cq_sched_default.cqs_len - 1;
1917 
1918 done:
1919         mutex_exit(&state->hs_cq_sched_lock);
1920         return (IBT_SUCCESS);
1921 }
1922 
1923 void
1924 hermon_cq_sched_fini(hermon_state_t *state)
1925 {
1926         mutex_enter(&state->hs_cq_sched_lock);
1927         if (state->hs_cq_sched_array_size) {
1928                 kmem_free(state->hs_cq_sched_array, sizeof (hermon_cq_sched_t) *
1929                     state->hs_cq_sched_array_size);
1930                 state->hs_cq_sched_array_size = 0;
1931                 state->hs_cq_sched_array = NULL;
1932         }
1933         mutex_exit(&state->hs_cq_sched_lock);
1934         mutex_destroy(&state->hs_cq_sched_lock);
1935 }
1936 
1937 int
1938 hermon_cq_sched_alloc(hermon_state_t *state, ibt_cq_sched_attr_t *attr,
1939     hermon_cq_sched_t **cq_sched_pp)
1940 {
1941         hermon_cq_sched_t       *cq_schedp;
1942         int                     i;
1943         char                    *name;
1944         ibt_cq_sched_flags_t    flags;
1945 
1946         flags = attr->cqs_flags;
1947         if ((flags & (IBT_CQS_SCHED_GROUP | IBT_CQS_EXACT_SCHED_GROUP)) == 0) {
1948                 *cq_sched_pp = NULL;
1949                 return (IBT_SUCCESS);
1950         }
1951         name = attr->cqs_pool_name;
1952 
1953         mutex_enter(&state->hs_cq_sched_lock);
1954         cq_schedp = state->hs_cq_sched_array;
1955         for (i = 0; i < state->hs_cq_sched_array_size; i++, cq_schedp++) {
1956                 if (strcmp(name, cq_schedp->cqs_name) == 0) {
1957                         if (cq_schedp->cqs_len != 0)
1958                                 cq_schedp->cqs_refcnt++;
1959                         break;  /* found it */
1960                 }
1961         }
1962         if ((i == state->hs_cq_sched_array_size) ||  /* not found, or */
1963             (cq_schedp->cqs_len == 0)) /* defined, but no dedicated intr's */
1964                 cq_schedp = NULL;
1965         mutex_exit(&state->hs_cq_sched_lock);
1966 
1967         *cq_sched_pp = cq_schedp;       /* set to valid hdl, or to NULL */
1968         if ((cq_schedp == NULL) &&
1969             (attr->cqs_flags & IBT_CQS_EXACT_SCHED_GROUP))
1970                 return (IBT_CQ_NO_SCHED_GROUP);
1971         else
1972                 return (IBT_SUCCESS);
1973 }
1974 
1975 int
1976 hermon_cq_sched_free(hermon_state_t *state, hermon_cq_sched_t *cq_schedp)
1977 {
1978         if (cq_schedp != NULL) {
1979                 /* Just decrement refcnt */
1980                 mutex_enter(&state->hs_cq_sched_lock);
1981                 if (cq_schedp->cqs_refcnt == 0)
1982                         HERMON_WARNING(state, "cq_sched free underflow\n");
1983                 else
1984                         cq_schedp->cqs_refcnt--;
1985                 mutex_exit(&state->hs_cq_sched_lock);
1986         }
1987         return (IBT_SUCCESS);
1988 }