1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * hermon_cq.c 28 * Hermon Completion Queue Processing Routines 29 * 30 * Implements all the routines necessary for allocating, freeing, resizing, 31 * and handling the completion type events that the Hermon hardware can 32 * generate. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/conf.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/modctl.h> 40 #include <sys/bitmap.h> 41 #include <sys/sysmacros.h> 42 43 #include <sys/ib/adapters/hermon/hermon.h> 44 45 int hermon_should_panic = 0; /* debugging aid */ 46 47 #define hermon_cq_update_ci_doorbell(cq) \ 48 /* Build the doorbell record data (low 24 bits only) */ \ 49 HERMON_UAR_DB_RECORD_WRITE(cq->cq_arm_ci_vdbr, \ 50 cq->cq_consindx & 0x00FFFFFF) 51 52 static int hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq, 53 uint_t cmd); 54 #pragma inline(hermon_cq_arm_doorbell) 55 static void hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr); 56 #pragma inline(hermon_arm_cq_dbr_init) 57 static void hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq, 58 hermon_hw_cqe_t *cqe, ibt_wc_t *wc); 59 static void hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq, 60 hermon_hw_cqe_t *cqe, ibt_wc_t *wc); 61 62 63 /* 64 * hermon_cq_alloc() 65 * Context: Can be called only from user or kernel context. 66 */ 67 int 68 hermon_cq_alloc(hermon_state_t *state, ibt_cq_hdl_t ibt_cqhdl, 69 ibt_cq_attr_t *cq_attr, uint_t *actual_size, hermon_cqhdl_t *cqhdl, 70 uint_t sleepflag) 71 { 72 hermon_rsrc_t *cqc, *rsrc; 73 hermon_umap_db_entry_t *umapdb; 74 hermon_hw_cqc_t cqc_entry; 75 hermon_cqhdl_t cq; 76 ibt_mr_attr_t mr_attr; 77 hermon_mr_options_t op; 78 hermon_pdhdl_t pd; 79 hermon_mrhdl_t mr; 80 hermon_hw_cqe_t *buf; 81 uint64_t value; 82 uint32_t log_cq_size, uarpg; 83 uint_t cq_is_umap; 84 uint32_t status, flag; 85 hermon_cq_sched_t *cq_schedp; 86 87 /* 88 * Determine whether CQ is being allocated for userland access or 89 * whether it is being allocated for kernel access. If the CQ is 90 * being allocated for userland access, then lookup the UAR 91 * page number for the current process. Note: If this is not found 92 * (e.g. if the process has not previously open()'d the Hermon driver), 93 * then an error is returned. 94 */ 95 cq_is_umap = (cq_attr->cq_flags & IBT_CQ_USER_MAP) ? 1 : 0; 96 if (cq_is_umap) { 97 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(), 98 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL); 99 if (status != DDI_SUCCESS) { 100 status = IBT_INVALID_PARAM; 101 goto cqalloc_fail; 102 } 103 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx; 104 } else { 105 uarpg = state->hs_kernel_uar_index; 106 } 107 108 /* Use the internal protection domain (PD) for setting up CQs */ 109 pd = state->hs_pdhdl_internal; 110 111 /* Increment the reference count on the protection domain (PD) */ 112 hermon_pd_refcnt_inc(pd); 113 114 /* 115 * Allocate an CQ context entry. This will be filled in with all 116 * the necessary parameters to define the Completion Queue. And then 117 * ownership will be passed to the hardware in the final step 118 * below. If we fail here, we must undo the protection domain 119 * reference count. 120 */ 121 status = hermon_rsrc_alloc(state, HERMON_CQC, 1, sleepflag, &cqc); 122 if (status != DDI_SUCCESS) { 123 status = IBT_INSUFF_RESOURCE; 124 goto cqalloc_fail1; 125 } 126 127 /* 128 * Allocate the software structure for tracking the completion queue 129 * (i.e. the Hermon Completion Queue handle). If we fail here, we must 130 * undo the protection domain reference count and the previous 131 * resource allocation. 132 */ 133 status = hermon_rsrc_alloc(state, HERMON_CQHDL, 1, sleepflag, &rsrc); 134 if (status != DDI_SUCCESS) { 135 status = IBT_INSUFF_RESOURCE; 136 goto cqalloc_fail2; 137 } 138 cq = (hermon_cqhdl_t)rsrc->hr_addr; 139 cq->cq_is_umap = cq_is_umap; 140 cq->cq_cqnum = cqc->hr_indx; /* just use index, implicit in Hermon */ 141 cq->cq_intmod_count = 0; 142 cq->cq_intmod_usec = 0; 143 144 /* 145 * If this will be a user-mappable CQ, then allocate an entry for 146 * the "userland resources database". This will later be added to 147 * the database (after all further CQ operations are successful). 148 * If we fail here, we must undo the reference counts and the 149 * previous resource allocation. 150 */ 151 if (cq->cq_is_umap) { 152 umapdb = hermon_umap_db_alloc(state->hs_instance, cq->cq_cqnum, 153 MLNX_UMAP_CQMEM_RSRC, (uint64_t)(uintptr_t)rsrc); 154 if (umapdb == NULL) { 155 status = IBT_INSUFF_RESOURCE; 156 goto cqalloc_fail3; 157 } 158 } 159 160 161 /* 162 * Allocate the doorbell record. We'll need one for the CQ, handling 163 * both consumer index (SET CI) and the CQ state (CQ ARM). 164 */ 165 166 status = hermon_dbr_alloc(state, uarpg, &cq->cq_arm_ci_dbr_acchdl, 167 &cq->cq_arm_ci_vdbr, &cq->cq_arm_ci_pdbr, &cq->cq_dbr_mapoffset); 168 if (status != DDI_SUCCESS) { 169 status = IBT_INSUFF_RESOURCE; 170 goto cqalloc_fail4; 171 } 172 173 /* 174 * Calculate the appropriate size for the completion queue. 175 * Note: All Hermon CQs must be a power-of-2 minus 1 in size. Also 176 * they may not be any smaller than HERMON_CQ_MIN_SIZE. This step is 177 * to round the requested size up to the next highest power-of-2 178 */ 179 cq_attr->cq_size = max(cq_attr->cq_size, HERMON_CQ_MIN_SIZE); 180 log_cq_size = highbit(cq_attr->cq_size); 181 182 /* 183 * Next we verify that the rounded-up size is valid (i.e. consistent 184 * with the device limits and/or software-configured limits) 185 */ 186 if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) { 187 status = IBT_HCA_CQ_EXCEEDED; 188 goto cqalloc_fail4a; 189 } 190 191 /* 192 * Allocate the memory for Completion Queue. 193 * 194 * Note: Although we use the common queue allocation routine, we 195 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in 196 * kernel system memory) for kernel CQs because it would be 197 * inefficient to have CQs located in DDR memory. This is primarily 198 * because CQs are read from (by software) more than they are written 199 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all 200 * user-mappable CQs for a similar reason.) 201 * It is also worth noting that, unlike Hermon QP work queues, 202 * completion queues do not have the same strict alignment 203 * requirements. It is sufficient for the CQ memory to be both 204 * aligned to and bound to addresses which are a multiple of CQE size. 205 */ 206 cq->cq_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t); 207 208 cq->cq_cqinfo.qa_alloc_align = PAGESIZE; 209 cq->cq_cqinfo.qa_bind_align = PAGESIZE; 210 if (cq->cq_is_umap) { 211 cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND; 212 } else { 213 cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL; 214 hermon_arm_cq_dbr_init(cq->cq_arm_ci_vdbr); 215 } 216 status = hermon_queue_alloc(state, &cq->cq_cqinfo, sleepflag); 217 if (status != DDI_SUCCESS) { 218 status = IBT_INSUFF_RESOURCE; 219 goto cqalloc_fail4; 220 } 221 buf = (hermon_hw_cqe_t *)cq->cq_cqinfo.qa_buf_aligned; 222 223 /* 224 * The ownership bit of the CQE's is set by the HW during the process 225 * of transferrring ownership of the CQ (PRM 09.35c, 14.2.1, note D1 226 * 227 */ 228 229 /* 230 * Register the memory for the CQ. The memory for the CQ must 231 * be registered in the Hermon TPT tables. This gives us the LKey 232 * to specify in the CQ context below. Note: If this is a user- 233 * mappable CQ, then we will force DDI_DMA_CONSISTENT mapping. 234 */ 235 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP; 236 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf; 237 mr_attr.mr_len = cq->cq_cqinfo.qa_size; 238 mr_attr.mr_as = NULL; 239 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE; 240 op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass; 241 op.mro_bind_dmahdl = cq->cq_cqinfo.qa_dmahdl; 242 op.mro_bind_override_addr = 0; 243 status = hermon_mr_register(state, pd, &mr_attr, &mr, &op, 244 HERMON_CQ_CMPT); 245 if (status != DDI_SUCCESS) { 246 status = IBT_INSUFF_RESOURCE; 247 goto cqalloc_fail5; 248 } 249 250 cq->cq_erreqnum = HERMON_CQ_ERREQNUM_GET(state); 251 if (cq_attr->cq_flags & IBT_CQ_HID) { 252 if (!HERMON_HID_VALID(state, cq_attr->cq_hid)) { 253 IBTF_DPRINTF_L2("CQalloc", "bad handler id 0x%x", 254 cq_attr->cq_hid); 255 status = IBT_INVALID_PARAM; 256 goto cqalloc_fail5; 257 } 258 cq->cq_eqnum = HERMON_HID_TO_EQNUM(state, cq_attr->cq_hid); 259 IBTF_DPRINTF_L2("cqalloc", "hid: eqn %d", cq->cq_eqnum); 260 } else { 261 cq_schedp = (hermon_cq_sched_t *)cq_attr->cq_sched; 262 if (cq_schedp == NULL) { 263 cq_schedp = &state->hs_cq_sched_default; 264 } else if (cq_schedp != &state->hs_cq_sched_default) { 265 int i; 266 hermon_cq_sched_t *tmp; 267 268 tmp = state->hs_cq_sched_array; 269 for (i = 0; i < state->hs_cq_sched_array_size; i++) 270 if (cq_schedp == &tmp[i]) 271 break; /* found it */ 272 if (i >= state->hs_cq_sched_array_size) { 273 cmn_err(CE_CONT, "!Invalid cq_sched argument: " 274 "ignored\n"); 275 cq_schedp = &state->hs_cq_sched_default; 276 } 277 } 278 cq->cq_eqnum = HERMON_HID_TO_EQNUM(state, 279 HERMON_CQSCHED_NEXT_HID(cq_schedp)); 280 IBTF_DPRINTF_L2("cqalloc", "sched: first-1 %d, len %d, " 281 "eqn %d", cq_schedp->cqs_start_hid - 1, 282 cq_schedp->cqs_len, cq->cq_eqnum); 283 } 284 285 /* 286 * Fill in the CQC entry. This is the final step before passing 287 * ownership of the CQC entry to the Hermon hardware. We use all of 288 * the information collected/calculated above to fill in the 289 * requisite portions of the CQC. Note: If this CQ is going to be 290 * used for userland access, then we need to set the UAR page number 291 * appropriately (otherwise it's a "don't care") 292 */ 293 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t)); 294 295 cqc_entry.state = HERMON_CQ_DISARMED; 296 cqc_entry.pg_offs = cq->cq_cqinfo.qa_pgoffs >> 5; 297 cqc_entry.log_cq_sz = log_cq_size; 298 cqc_entry.usr_page = uarpg; 299 cqc_entry.c_eqn = cq->cq_eqnum; 300 cqc_entry.log2_pgsz = mr->mr_log2_pgsz; 301 cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF); 302 cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3; 303 cqc_entry.dbr_addrh = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 32); 304 cqc_entry.dbr_addrl = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 3); 305 306 /* 307 * Write the CQC entry to hardware - we pass ownership of 308 * the entry to the hardware (using the Hermon SW2HW_CQ firmware 309 * command). Note: In general, this operation shouldn't fail. But 310 * if it does, we have to undo everything we've done above before 311 * returning error. 312 */ 313 status = hermon_cmn_ownership_cmd_post(state, SW2HW_CQ, &cqc_entry, 314 sizeof (hermon_hw_cqc_t), cq->cq_cqnum, sleepflag); 315 if (status != HERMON_CMD_SUCCESS) { 316 cmn_err(CE_CONT, "Hermon: SW2HW_CQ command failed: %08x\n", 317 status); 318 if (status == HERMON_CMD_INVALID_STATUS) { 319 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 320 } 321 status = ibc_get_ci_failure(0); 322 goto cqalloc_fail6; 323 } 324 325 /* 326 * Fill in the rest of the Hermon Completion Queue handle. Having 327 * successfully transferred ownership of the CQC, we can update the 328 * following fields for use in further operations on the CQ. 329 */ 330 cq->cq_resize_hdl = 0; 331 cq->cq_cqcrsrcp = cqc; 332 cq->cq_rsrcp = rsrc; 333 cq->cq_consindx = 0; 334 /* least restrictive */ 335 cq->cq_buf = buf; 336 cq->cq_bufsz = (1 << log_cq_size); 337 cq->cq_log_cqsz = log_cq_size; 338 cq->cq_mrhdl = mr; 339 cq->cq_refcnt = 0; 340 cq->cq_is_special = 0; 341 cq->cq_uarpg = uarpg; 342 cq->cq_umap_dhp = (devmap_cookie_t)NULL; 343 avl_create(&cq->cq_wrid_wqhdr_avl_tree, hermon_wrid_workq_compare, 344 sizeof (struct hermon_workq_avl_s), 345 offsetof(struct hermon_workq_avl_s, wqa_link)); 346 347 cq->cq_hdlrarg = (void *)ibt_cqhdl; 348 349 /* 350 * Put CQ handle in Hermon CQNum-to-CQHdl list. Then fill in the 351 * "actual_size" and "cqhdl" and return success 352 */ 353 hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, cq); 354 355 /* 356 * If this is a user-mappable CQ, then we need to insert the previously 357 * allocated entry into the "userland resources database". This will 358 * allow for later lookup during devmap() (i.e. mmap()) calls. 359 */ 360 if (cq->cq_is_umap) { 361 hermon_umap_db_add(umapdb); 362 } 363 364 /* 365 * Fill in the return arguments (if necessary). This includes the 366 * real completion queue size. 367 */ 368 if (actual_size != NULL) { 369 *actual_size = (1 << log_cq_size) - 1; 370 } 371 *cqhdl = cq; 372 373 return (DDI_SUCCESS); 374 375 /* 376 * The following is cleanup for all possible failure cases in this routine 377 */ 378 cqalloc_fail6: 379 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL, 380 sleepflag) != DDI_SUCCESS) { 381 HERMON_WARNING(state, "failed to deregister CQ memory"); 382 } 383 cqalloc_fail5: 384 hermon_queue_free(&cq->cq_cqinfo); 385 cqalloc_fail4a: 386 hermon_dbr_free(state, uarpg, cq->cq_arm_ci_vdbr); 387 cqalloc_fail4: 388 if (cq_is_umap) { 389 hermon_umap_db_free(umapdb); 390 } 391 cqalloc_fail3: 392 hermon_rsrc_free(state, &rsrc); 393 cqalloc_fail2: 394 hermon_rsrc_free(state, &cqc); 395 cqalloc_fail1: 396 hermon_pd_refcnt_dec(pd); 397 cqalloc_fail: 398 return (status); 399 } 400 401 402 /* 403 * hermon_cq_free() 404 * Context: Can be called only from user or kernel context. 405 */ 406 /* ARGSUSED */ 407 int 408 hermon_cq_free(hermon_state_t *state, hermon_cqhdl_t *cqhdl, uint_t sleepflag) 409 { 410 hermon_rsrc_t *cqc, *rsrc; 411 hermon_umap_db_entry_t *umapdb; 412 hermon_hw_cqc_t cqc_entry; 413 hermon_pdhdl_t pd; 414 hermon_mrhdl_t mr; 415 hermon_cqhdl_t cq, resize; 416 uint32_t cqnum; 417 uint64_t value; 418 uint_t maxprot; 419 int status; 420 421 /* 422 * Pull all the necessary information from the Hermon Completion Queue 423 * handle. This is necessary here because the resource for the 424 * CQ handle is going to be freed up as part of this operation. 425 */ 426 cq = *cqhdl; 427 mutex_enter(&cq->cq_lock); 428 cqc = cq->cq_cqcrsrcp; 429 rsrc = cq->cq_rsrcp; 430 pd = state->hs_pdhdl_internal; 431 mr = cq->cq_mrhdl; 432 cqnum = cq->cq_cqnum; 433 434 resize = cq->cq_resize_hdl; /* save the handle for later */ 435 436 /* 437 * If there are work queues still associated with the CQ, then return 438 * an error. Otherwise, we will be holding the CQ lock. 439 */ 440 if (cq->cq_refcnt != 0) { 441 mutex_exit(&cq->cq_lock); 442 return (IBT_CQ_BUSY); 443 } 444 445 /* 446 * If this was a user-mappable CQ, then we need to remove its entry 447 * from the "userland resources database". If it is also currently 448 * mmap()'d out to a user process, then we need to call 449 * devmap_devmem_remap() to remap the CQ memory to an invalid mapping. 450 * We also need to invalidate the CQ tracking information for the 451 * user mapping. 452 */ 453 if (cq->cq_is_umap) { 454 status = hermon_umap_db_find(state->hs_instance, cqnum, 455 MLNX_UMAP_CQMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE, 456 &umapdb); 457 if (status != DDI_SUCCESS) { 458 mutex_exit(&cq->cq_lock); 459 HERMON_WARNING(state, "failed to find in database"); 460 return (ibc_get_ci_failure(0)); 461 } 462 hermon_umap_db_free(umapdb); 463 if (cq->cq_umap_dhp != NULL) { 464 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 465 status = devmap_devmem_remap(cq->cq_umap_dhp, 466 state->hs_dip, 0, 0, cq->cq_cqinfo.qa_size, 467 maxprot, DEVMAP_MAPPING_INVALID, NULL); 468 if (status != DDI_SUCCESS) { 469 mutex_exit(&cq->cq_lock); 470 HERMON_WARNING(state, "failed in CQ memory " 471 "devmap_devmem_remap()"); 472 return (ibc_get_ci_failure(0)); 473 } 474 cq->cq_umap_dhp = (devmap_cookie_t)NULL; 475 } 476 } 477 478 /* 479 * Put NULL into the Arbel CQNum-to-CQHdl list. This will allow any 480 * in-progress events to detect that the CQ corresponding to this 481 * number has been freed. 482 */ 483 hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, NULL); 484 485 mutex_exit(&cq->cq_lock); 486 487 /* 488 * Reclaim CQC entry from hardware (using the Hermon HW2SW_CQ 489 * firmware command). If the ownership transfer fails for any reason, 490 * then it is an indication that something (either in HW or SW) has 491 * gone seriously wrong. 492 */ 493 status = hermon_cmn_ownership_cmd_post(state, HW2SW_CQ, &cqc_entry, 494 sizeof (hermon_hw_cqc_t), cqnum, sleepflag); 495 if (status != HERMON_CMD_SUCCESS) { 496 HERMON_WARNING(state, "failed to reclaim CQC ownership"); 497 cmn_err(CE_CONT, "Hermon: HW2SW_CQ command failed: %08x\n", 498 status); 499 if (status == HERMON_CMD_INVALID_STATUS) { 500 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 501 } 502 return (ibc_get_ci_failure(0)); 503 } 504 505 /* 506 * From here on, we start reliquishing resources - but check to see 507 * if a resize was in progress - if so, we need to relinquish those 508 * resources as well 509 */ 510 511 512 /* 513 * Deregister the memory for the Completion Queue. If this fails 514 * for any reason, then it is an indication that something (either 515 * in HW or SW) has gone seriously wrong. So we print a warning 516 * message and return. 517 */ 518 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL, 519 sleepflag); 520 if (status != DDI_SUCCESS) { 521 HERMON_WARNING(state, "failed to deregister CQ memory"); 522 return (ibc_get_ci_failure(0)); 523 } 524 525 if (resize) { /* there was a pointer to a handle */ 526 mr = resize->cq_mrhdl; /* reuse the pointer to the region */ 527 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL, 528 sleepflag); 529 if (status != DDI_SUCCESS) { 530 HERMON_WARNING(state, "failed to deregister resize CQ " 531 "memory"); 532 return (ibc_get_ci_failure(0)); 533 } 534 } 535 536 /* Free the memory for the CQ */ 537 hermon_queue_free(&cq->cq_cqinfo); 538 if (resize) { 539 hermon_queue_free(&resize->cq_cqinfo); 540 /* and the temporary handle */ 541 kmem_free(resize, sizeof (struct hermon_sw_cq_s)); 542 } 543 544 /* everything else does not matter for the resize in progress */ 545 546 /* Free the dbr */ 547 hermon_dbr_free(state, cq->cq_uarpg, cq->cq_arm_ci_vdbr); 548 549 /* Free the Hermon Completion Queue handle */ 550 hermon_rsrc_free(state, &rsrc); 551 552 /* Free up the CQC entry resource */ 553 hermon_rsrc_free(state, &cqc); 554 555 /* Decrement the reference count on the protection domain (PD) */ 556 hermon_pd_refcnt_dec(pd); 557 558 /* Set the cqhdl pointer to NULL and return success */ 559 *cqhdl = NULL; 560 561 return (DDI_SUCCESS); 562 } 563 564 565 /* 566 * hermon_cq_resize() 567 * Context: Can be called only from user or kernel context. 568 */ 569 int 570 hermon_cq_resize(hermon_state_t *state, hermon_cqhdl_t cq, uint_t req_size, 571 uint_t *actual_size, uint_t sleepflag) 572 { 573 hermon_hw_cqc_t cqc_entry; 574 hermon_cqhdl_t resize_hdl; 575 hermon_qalloc_info_t new_cqinfo; 576 ibt_mr_attr_t mr_attr; 577 hermon_mr_options_t op; 578 hermon_pdhdl_t pd; 579 hermon_mrhdl_t mr; 580 hermon_hw_cqe_t *buf; 581 uint32_t new_prod_indx; 582 uint_t log_cq_size; 583 int status, flag; 584 585 if (cq->cq_resize_hdl != 0) { /* already in process */ 586 status = IBT_CQ_BUSY; 587 goto cqresize_fail; 588 } 589 590 591 /* Use the internal protection domain (PD) for CQs */ 592 pd = state->hs_pdhdl_internal; 593 594 /* 595 * Calculate the appropriate size for the new resized completion queue. 596 * Note: All Hermon CQs must be a power-of-2 minus 1 in size. Also 597 * they may not be any smaller than HERMON_CQ_MIN_SIZE. This step is 598 * to round the requested size up to the next highest power-of-2 599 */ 600 req_size = max(req_size, HERMON_CQ_MIN_SIZE); 601 log_cq_size = highbit(req_size); 602 603 /* 604 * Next we verify that the rounded-up size is valid (i.e. consistent 605 * with the device limits and/or software-configured limits) 606 */ 607 if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) { 608 status = IBT_HCA_CQ_EXCEEDED; 609 goto cqresize_fail; 610 } 611 612 /* 613 * Allocate the memory for newly resized Completion Queue. 614 * 615 * Note: Although we use the common queue allocation routine, we 616 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in 617 * kernel system memory) for kernel CQs because it would be 618 * inefficient to have CQs located in DDR memory. This is the same 619 * as we do when we first allocate completion queues primarily 620 * because CQs are read from (by software) more than they are written 621 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all 622 * user-mappable CQs for a similar reason.) 623 * It is also worth noting that, unlike Hermon QP work queues, 624 * completion queues do not have the same strict alignment 625 * requirements. It is sufficient for the CQ memory to be both 626 * aligned to and bound to addresses which are a multiple of CQE size. 627 */ 628 629 /* first, alloc the resize_handle */ 630 resize_hdl = kmem_zalloc(sizeof (struct hermon_sw_cq_s), KM_SLEEP); 631 632 new_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t); 633 new_cqinfo.qa_alloc_align = PAGESIZE; 634 new_cqinfo.qa_bind_align = PAGESIZE; 635 if (cq->cq_is_umap) { 636 new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND; 637 } else { 638 new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL; 639 } 640 status = hermon_queue_alloc(state, &new_cqinfo, sleepflag); 641 if (status != DDI_SUCCESS) { 642 /* free the resize handle */ 643 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s)); 644 status = IBT_INSUFF_RESOURCE; 645 goto cqresize_fail; 646 } 647 buf = (hermon_hw_cqe_t *)new_cqinfo.qa_buf_aligned; 648 649 /* 650 * No initialization of the cq is needed - the command will do it 651 */ 652 653 /* 654 * Register the memory for the CQ. The memory for the CQ must 655 * be registered in the Hermon TPT tables. This gives us the LKey 656 * to specify in the CQ context below. 657 */ 658 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP; 659 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf; 660 mr_attr.mr_len = new_cqinfo.qa_size; 661 mr_attr.mr_as = NULL; 662 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE; 663 op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass; 664 op.mro_bind_dmahdl = new_cqinfo.qa_dmahdl; 665 op.mro_bind_override_addr = 0; 666 status = hermon_mr_register(state, pd, &mr_attr, &mr, &op, 667 HERMON_CQ_CMPT); 668 if (status != DDI_SUCCESS) { 669 hermon_queue_free(&new_cqinfo); 670 /* free the resize handle */ 671 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s)); 672 status = IBT_INSUFF_RESOURCE; 673 goto cqresize_fail; 674 } 675 676 /* 677 * Now we grab the CQ lock. Since we will be updating the actual 678 * CQ location and the producer/consumer indexes, we should hold 679 * the lock. 680 * 681 * We do a ARBEL_NOSLEEP here (and below), though, because we are 682 * holding the "cq_lock" and if we got raised to interrupt level 683 * by priority inversion, we would not want to block in this routine 684 * waiting for success. 685 */ 686 mutex_enter(&cq->cq_lock); 687 688 /* 689 * Fill in the CQC entry. For the resize operation this is the 690 * final step before attempting the resize operation on the CQC entry. 691 * We use all of the information collected/calculated above to fill 692 * in the requisite portions of the CQC. 693 */ 694 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t)); 695 cqc_entry.log_cq_sz = log_cq_size; 696 cqc_entry.pg_offs = new_cqinfo.qa_pgoffs >> 5; 697 cqc_entry.log2_pgsz = mr->mr_log2_pgsz; 698 cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF); 699 cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3; 700 701 /* 702 * Write the CQC entry to hardware. Lastly, we pass ownership of 703 * the entry to the hardware (using the Hermon RESIZE_CQ firmware 704 * command). Note: In general, this operation shouldn't fail. But 705 * if it does, we have to undo everything we've done above before 706 * returning error. Also note that the status returned may indicate 707 * the code to return to the IBTF. 708 */ 709 status = hermon_resize_cq_cmd_post(state, &cqc_entry, cq->cq_cqnum, 710 &new_prod_indx, HERMON_CMD_NOSLEEP_SPIN); 711 if (status != HERMON_CMD_SUCCESS) { 712 /* Resize attempt has failed, drop CQ lock and cleanup */ 713 mutex_exit(&cq->cq_lock); 714 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL, 715 sleepflag) != DDI_SUCCESS) { 716 HERMON_WARNING(state, "failed to deregister CQ memory"); 717 } 718 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s)); 719 hermon_queue_free(&new_cqinfo); 720 if (status == HERMON_CMD_BAD_SIZE) { 721 return (IBT_CQ_SZ_INSUFFICIENT); 722 } else { 723 cmn_err(CE_CONT, "Hermon: RESIZE_CQ command failed: " 724 "%08x\n", status); 725 if (status == HERMON_CMD_INVALID_STATUS) { 726 hermon_fm_ereport(state, HCA_SYS_ERR, 727 HCA_ERR_SRV_LOST); 728 } 729 return (ibc_get_ci_failure(0)); 730 } 731 } 732 733 /* 734 * For Hermon, we've alloc'd another handle structure and save off the 735 * important things in it. Then, in polling we check to see if there's 736 * a "resizing handle" and if so we look for the "special CQE", opcode 737 * 0x16, that indicates the transition to the new buffer. 738 * 739 * At that point, we'll adjust everything - including dereg and 740 * freeing of the original buffer, updating all the necessary fields 741 * in the cq_hdl, and setting up for the next cqe polling 742 */ 743 744 resize_hdl->cq_buf = buf; 745 resize_hdl->cq_bufsz = (1 << log_cq_size); 746 resize_hdl->cq_mrhdl = mr; 747 resize_hdl->cq_log_cqsz = log_cq_size; 748 749 bcopy(&new_cqinfo, &(resize_hdl->cq_cqinfo), 750 sizeof (struct hermon_qalloc_info_s)); 751 752 /* now, save the address in the cq_handle */ 753 cq->cq_resize_hdl = resize_hdl; 754 755 /* 756 * Drop the CQ lock now. 757 */ 758 759 mutex_exit(&cq->cq_lock); 760 /* 761 * Fill in the return arguments (if necessary). This includes the 762 * real new completion queue size. 763 */ 764 if (actual_size != NULL) { 765 *actual_size = (1 << log_cq_size) - 1; 766 } 767 768 return (DDI_SUCCESS); 769 770 cqresize_fail: 771 return (status); 772 } 773 774 775 /* 776 * hermon_cq_modify() 777 * Context: Can be called base context. 778 */ 779 /* ARGSUSED */ 780 int 781 hermon_cq_modify(hermon_state_t *state, hermon_cqhdl_t cq, 782 uint_t count, uint_t usec, ibt_cq_handler_id_t hid, uint_t sleepflag) 783 { 784 int status; 785 hermon_hw_cqc_t cqc_entry; 786 787 mutex_enter(&cq->cq_lock); 788 if (count != cq->cq_intmod_count || 789 usec != cq->cq_intmod_usec) { 790 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t)); 791 cqc_entry.cq_max_cnt = count; 792 cqc_entry.cq_period = usec; 793 status = hermon_modify_cq_cmd_post(state, &cqc_entry, 794 cq->cq_cqnum, MODIFY_MODERATION_CQ, sleepflag); 795 if (status != HERMON_CMD_SUCCESS) { 796 mutex_exit(&cq->cq_lock); 797 cmn_err(CE_CONT, "Hermon: MODIFY_MODERATION_CQ " 798 "command failed: %08x\n", status); 799 if (status == HERMON_CMD_INVALID_STATUS) { 800 hermon_fm_ereport(state, HCA_SYS_ERR, 801 HCA_ERR_SRV_LOST); 802 } 803 return (ibc_get_ci_failure(0)); 804 } 805 cq->cq_intmod_count = count; 806 cq->cq_intmod_usec = usec; 807 } 808 if (hid && (hid - 1 != cq->cq_eqnum)) { 809 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t)); 810 cqc_entry.c_eqn = HERMON_HID_TO_EQNUM(state, hid); 811 status = hermon_modify_cq_cmd_post(state, &cqc_entry, 812 cq->cq_cqnum, MODIFY_EQN, sleepflag); 813 if (status != HERMON_CMD_SUCCESS) { 814 mutex_exit(&cq->cq_lock); 815 cmn_err(CE_CONT, "Hermon: MODIFY_EQN command failed: " 816 "%08x\n", status); 817 if (status == HERMON_CMD_INVALID_STATUS) { 818 hermon_fm_ereport(state, HCA_SYS_ERR, 819 HCA_ERR_SRV_LOST); 820 } 821 return (ibc_get_ci_failure(0)); 822 } 823 cq->cq_eqnum = hid - 1; 824 } 825 mutex_exit(&cq->cq_lock); 826 return (DDI_SUCCESS); 827 } 828 829 /* 830 * hermon_cq_notify() 831 * Context: Can be called from interrupt or base context. 832 */ 833 int 834 hermon_cq_notify(hermon_state_t *state, hermon_cqhdl_t cq, 835 ibt_cq_notify_flags_t flags) 836 { 837 uint_t cmd; 838 ibt_status_t status; 839 840 /* Validate IBT flags and call doorbell routine. */ 841 if (flags == IBT_NEXT_COMPLETION) { 842 cmd = HERMON_CQDB_NOTIFY_CQ; 843 } else if (flags == IBT_NEXT_SOLICITED) { 844 cmd = HERMON_CQDB_NOTIFY_CQ_SOLICIT; 845 } else { 846 return (IBT_CQ_NOTIFY_TYPE_INVALID); 847 } 848 849 status = hermon_cq_arm_doorbell(state, cq, cmd); 850 return (status); 851 } 852 853 854 /* 855 * hermon_cq_poll() 856 * Context: Can be called from interrupt or base context. 857 */ 858 int 859 hermon_cq_poll(hermon_state_t *state, hermon_cqhdl_t cq, ibt_wc_t *wc_p, 860 uint_t num_wc, uint_t *num_polled) 861 { 862 hermon_hw_cqe_t *cqe; 863 uint_t opcode; 864 uint32_t cons_indx, wrap_around_mask, shift, mask; 865 uint32_t polled_cnt, spec_op = 0; 866 int status; 867 868 /* 869 * Check for user-mappable CQ memory. Note: We do not allow kernel 870 * clients to poll CQ memory that is accessible directly by the user. 871 * If the CQ memory is user accessible, then return an error. 872 */ 873 if (cq->cq_is_umap) { 874 return (IBT_CQ_HDL_INVALID); 875 } 876 877 mutex_enter(&cq->cq_lock); 878 879 /* Get the consumer index */ 880 cons_indx = cq->cq_consindx; 881 shift = cq->cq_log_cqsz; 882 mask = cq->cq_bufsz; 883 884 /* 885 * Calculate the wrap around mask. Note: This operation only works 886 * because all Hermon completion queues have power-of-2 sizes 887 */ 888 wrap_around_mask = (cq->cq_bufsz - 1); 889 890 /* Calculate the pointer to the first CQ entry */ 891 cqe = &cq->cq_buf[cons_indx & wrap_around_mask]; 892 893 /* 894 * Keep pulling entries from the CQ until we find an entry owned by 895 * the hardware. As long as there the CQE's owned by SW, process 896 * each entry by calling hermon_cq_cqe_consume() and updating the CQ 897 * consumer index. Note: We only update the consumer index if 898 * hermon_cq_cqe_consume() returns HERMON_CQ_SYNC_AND_DB. Otherwise, 899 * it indicates that we are going to "recycle" the CQE (probably 900 * because it is a error CQE and corresponds to more than one 901 * completion). 902 */ 903 polled_cnt = 0; 904 while (HERMON_CQE_OWNER_IS_SW(cq, cqe, cons_indx, shift, mask)) { 905 if (cq->cq_resize_hdl != 0) { /* in midst of resize */ 906 /* peek at the opcode */ 907 opcode = HERMON_CQE_OPCODE_GET(cq, cqe); 908 if (opcode == HERMON_CQE_RCV_RESIZE_CODE) { 909 hermon_cq_resize_helper(state, cq); 910 911 /* Increment the consumer index */ 912 cons_indx = (cons_indx + 1); 913 spec_op = 1; /* plus one for the limiting CQE */ 914 915 wrap_around_mask = (cq->cq_bufsz - 1); 916 917 /* Update the pointer to the next CQ entry */ 918 cqe = &cq->cq_buf[cons_indx & wrap_around_mask]; 919 920 continue; 921 } 922 } /* in resizing CQ */ 923 924 /* 925 * either resizing and not the special opcode, or 926 * not resizing at all 927 */ 928 hermon_cq_cqe_consume(state, cq, cqe, &wc_p[polled_cnt++]); 929 930 /* Increment the consumer index */ 931 cons_indx = (cons_indx + 1); 932 933 /* Update the pointer to the next CQ entry */ 934 cqe = &cq->cq_buf[cons_indx & wrap_around_mask]; 935 936 /* 937 * If we have run out of space to store work completions, 938 * then stop and return the ones we have pulled of the CQ. 939 */ 940 if (polled_cnt >= num_wc) { 941 break; 942 } 943 } 944 945 /* 946 * Now we only ring the doorbell (to update the consumer index) if 947 * we've actually consumed a CQ entry. 948 */ 949 if ((polled_cnt != 0) && (cq->cq_consindx != cons_indx)) { 950 /* 951 * Update the consumer index in both the CQ handle and the 952 * doorbell record. 953 */ 954 cq->cq_consindx = cons_indx; 955 hermon_cq_update_ci_doorbell(cq); 956 957 } else if (polled_cnt == 0) { 958 if (spec_op != 0) { 959 /* if we got the special opcode, update the consindx */ 960 cq->cq_consindx = cons_indx; 961 hermon_cq_update_ci_doorbell(cq); 962 } 963 } 964 965 mutex_exit(&cq->cq_lock); 966 967 /* Set "num_polled" (if necessary) */ 968 if (num_polled != NULL) { 969 *num_polled = polled_cnt; 970 } 971 972 /* Set CQ_EMPTY condition if needed, otherwise return success */ 973 if (polled_cnt == 0) { 974 status = IBT_CQ_EMPTY; 975 } else { 976 status = DDI_SUCCESS; 977 } 978 979 /* 980 * Check if the system is currently panicking. If it is, then call 981 * the Hermon interrupt service routine. This step is necessary here 982 * because we might be in a polled I/O mode and without the call to 983 * hermon_isr() - and its subsequent calls to poll and rearm each 984 * event queue - we might overflow our EQs and render the system 985 * unable to sync/dump. 986 */ 987 if (ddi_in_panic() != 0) { 988 (void) hermon_isr((caddr_t)state, (caddr_t)NULL); 989 } 990 return (status); 991 } 992 993 /* 994 * cmd_sn must be initialized to 1 to enable proper reenabling 995 * by hermon_arm_cq_dbr_update(). 996 */ 997 static void 998 hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr) 999 { 1000 uint32_t *target; 1001 1002 target = (uint32_t *)cq_arm_dbr + 1; 1003 *target = htonl(1 << HERMON_CQDB_CMDSN_SHIFT); 1004 } 1005 1006 1007 /* 1008 * User cmd_sn needs help from this kernel function to know 1009 * when it should be incremented (modulo 4). We do an atomic 1010 * update of the arm_cq dbr to communicate this fact. We retry 1011 * in the case that user library is racing with us. We zero 1012 * out the cmd field so that the user library can use the cmd 1013 * field to track the last command it issued (solicited verses any). 1014 */ 1015 static void 1016 hermon_arm_cq_dbr_update(hermon_dbr_t *cq_arm_dbr) 1017 { 1018 uint32_t tmp, cmp, new; 1019 uint32_t old_cmd_sn, new_cmd_sn; 1020 uint32_t *target; 1021 int retries = 0; 1022 1023 target = (uint32_t *)cq_arm_dbr + 1; 1024 retry: 1025 cmp = *target; 1026 tmp = htonl(cmp); 1027 old_cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT); 1028 new_cmd_sn = (old_cmd_sn + (0x1 << HERMON_CQDB_CMDSN_SHIFT)) & 1029 (0x3 << HERMON_CQDB_CMDSN_SHIFT); 1030 new = htonl((tmp & ~(0x37 << HERMON_CQDB_CMD_SHIFT)) | new_cmd_sn); 1031 tmp = atomic_cas_32(target, cmp, new); 1032 if (tmp != cmp) { /* cas failed, so need to retry */ 1033 drv_usecwait(retries & 0xff); /* avoid race */ 1034 if (++retries > 100000) { 1035 cmn_err(CE_CONT, "cas failed in hermon\n"); 1036 retries = 0; 1037 } 1038 goto retry; 1039 } 1040 } 1041 1042 1043 /* 1044 * hermon_cq_handler() 1045 * Context: Only called from interrupt context 1046 */ 1047 /* ARGSUSED */ 1048 int 1049 hermon_cq_handler(hermon_state_t *state, hermon_eqhdl_t eq, 1050 hermon_hw_eqe_t *eqe) 1051 { 1052 hermon_cqhdl_t cq; 1053 uint_t cqnum; 1054 1055 /* Get the CQ handle from CQ number in event descriptor */ 1056 cqnum = HERMON_EQE_CQNUM_GET(eq, eqe); 1057 cq = hermon_cqhdl_from_cqnum(state, cqnum); 1058 1059 /* 1060 * If the CQ handle is NULL, this is probably an indication 1061 * that the CQ has been freed already. In which case, we 1062 * should not deliver this event. 1063 * 1064 * We also check that the CQ number in the handle is the 1065 * same as the CQ number in the event queue entry. This 1066 * extra check allows us to handle the case where a CQ was 1067 * freed and then allocated again in the time it took to 1068 * handle the event queue processing. By constantly incrementing 1069 * the non-constrained portion of the CQ number every time 1070 * a new CQ is allocated, we mitigate (somewhat) the chance 1071 * that a stale event could be passed to the client's CQ 1072 * handler. 1073 * 1074 * Lastly, we check if "hs_ibtfpriv" is NULL. If it is then it 1075 * means that we've have either received this event before we 1076 * finished attaching to the IBTF or we've received it while we 1077 * are in the process of detaching. 1078 */ 1079 if ((cq != NULL) && (cq->cq_cqnum == cqnum) && 1080 (state->hs_ibtfpriv != NULL)) { 1081 hermon_arm_cq_dbr_update(cq->cq_arm_ci_vdbr); 1082 HERMON_DO_IBTF_CQ_CALLB(state, cq); 1083 } 1084 1085 return (DDI_SUCCESS); 1086 } 1087 1088 1089 /* 1090 * hermon_cq_err_handler() 1091 * Context: Only called from interrupt context 1092 */ 1093 /* ARGSUSED */ 1094 int 1095 hermon_cq_err_handler(hermon_state_t *state, hermon_eqhdl_t eq, 1096 hermon_hw_eqe_t *eqe) 1097 { 1098 hermon_cqhdl_t cq; 1099 uint_t cqnum; 1100 ibc_async_event_t event; 1101 ibt_async_code_t type; 1102 1103 HERMON_FMANOTE(state, HERMON_FMA_OVERRUN); 1104 /* Get the CQ handle from CQ number in event descriptor */ 1105 cqnum = HERMON_EQE_CQNUM_GET(eq, eqe); 1106 cq = hermon_cqhdl_from_cqnum(state, cqnum); 1107 1108 /* 1109 * If the CQ handle is NULL, this is probably an indication 1110 * that the CQ has been freed already. In which case, we 1111 * should not deliver this event. 1112 * 1113 * We also check that the CQ number in the handle is the 1114 * same as the CQ number in the event queue entry. This 1115 * extra check allows us to handle the case where a CQ was 1116 * freed and then allocated again in the time it took to 1117 * handle the event queue processing. By constantly incrementing 1118 * the non-constrained portion of the CQ number every time 1119 * a new CQ is allocated, we mitigate (somewhat) the chance 1120 * that a stale event could be passed to the client's CQ 1121 * handler. 1122 * 1123 * And then we check if "hs_ibtfpriv" is NULL. If it is then it 1124 * means that we've have either received this event before we 1125 * finished attaching to the IBTF or we've received it while we 1126 * are in the process of detaching. 1127 */ 1128 if ((cq != NULL) && (cq->cq_cqnum == cqnum) && 1129 (state->hs_ibtfpriv != NULL)) { 1130 event.ev_cq_hdl = (ibt_cq_hdl_t)cq->cq_hdlrarg; 1131 type = IBT_ERROR_CQ; 1132 HERMON_DO_IBTF_ASYNC_CALLB(state, type, &event); 1133 } 1134 1135 return (DDI_SUCCESS); 1136 } 1137 1138 1139 /* 1140 * hermon_cq_refcnt_inc() 1141 * Context: Can be called from interrupt or base context. 1142 */ 1143 int 1144 hermon_cq_refcnt_inc(hermon_cqhdl_t cq, uint_t is_special) 1145 { 1146 /* 1147 * Increment the completion queue's reference count. Note: In order 1148 * to ensure compliance with IBA C11-15, we must ensure that a given 1149 * CQ is not used for both special (SMI/GSI) QP and non-special QP. 1150 * This is accomplished here by keeping track of how the referenced 1151 * CQ is being used. 1152 */ 1153 mutex_enter(&cq->cq_lock); 1154 if (cq->cq_refcnt == 0) { 1155 cq->cq_is_special = is_special; 1156 } else { 1157 if (cq->cq_is_special != is_special) { 1158 mutex_exit(&cq->cq_lock); 1159 return (DDI_FAILURE); 1160 } 1161 } 1162 cq->cq_refcnt++; 1163 mutex_exit(&cq->cq_lock); 1164 return (DDI_SUCCESS); 1165 } 1166 1167 1168 /* 1169 * hermon_cq_refcnt_dec() 1170 * Context: Can be called from interrupt or base context. 1171 */ 1172 void 1173 hermon_cq_refcnt_dec(hermon_cqhdl_t cq) 1174 { 1175 /* Decrement the completion queue's reference count */ 1176 mutex_enter(&cq->cq_lock); 1177 cq->cq_refcnt--; 1178 mutex_exit(&cq->cq_lock); 1179 } 1180 1181 1182 /* 1183 * hermon_cq_arm_doorbell() 1184 * Context: Can be called from interrupt or base context. 1185 */ 1186 static int 1187 hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq, uint_t cq_cmd) 1188 { 1189 uint32_t cq_num; 1190 uint32_t *target; 1191 uint32_t old_cmd, cmp, new, tmp, cmd_sn; 1192 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 1193 1194 /* initialize the FMA retry loop */ 1195 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num); 1196 1197 cq_num = cq->cq_cqnum; 1198 target = (uint32_t *)cq->cq_arm_ci_vdbr + 1; 1199 1200 /* the FMA retry loop starts for Hermon doorbell register. */ 1201 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, fm_status, 1202 fm_test_num); 1203 retry: 1204 cmp = *target; 1205 tmp = htonl(cmp); 1206 old_cmd = tmp & (0x7 << HERMON_CQDB_CMD_SHIFT); 1207 cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT); 1208 if (cq_cmd == HERMON_CQDB_NOTIFY_CQ) { 1209 if (old_cmd != HERMON_CQDB_NOTIFY_CQ) { 1210 cmd_sn |= (HERMON_CQDB_NOTIFY_CQ << 1211 HERMON_CQDB_CMD_SHIFT); 1212 new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF)); 1213 tmp = atomic_cas_32(target, cmp, new); 1214 if (tmp != cmp) 1215 goto retry; 1216 HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *) 1217 &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) << 1218 32) | (cq->cq_consindx & 0xFFFFFF)); 1219 } /* else it's already armed */ 1220 } else { 1221 ASSERT(cq_cmd == HERMON_CQDB_NOTIFY_CQ_SOLICIT); 1222 if (old_cmd != HERMON_CQDB_NOTIFY_CQ && 1223 old_cmd != HERMON_CQDB_NOTIFY_CQ_SOLICIT) { 1224 cmd_sn |= (HERMON_CQDB_NOTIFY_CQ_SOLICIT << 1225 HERMON_CQDB_CMD_SHIFT); 1226 new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF)); 1227 tmp = atomic_cas_32(target, cmp, new); 1228 if (tmp != cmp) 1229 goto retry; 1230 HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *) 1231 &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) << 1232 32) | (cq->cq_consindx & 0xFFFFFF)); 1233 } /* else it's already armed */ 1234 } 1235 1236 /* the FMA retry loop ends. */ 1237 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, fm_status, 1238 fm_test_num); 1239 1240 return (IBT_SUCCESS); 1241 1242 pio_error: 1243 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1244 return (ibc_get_ci_failure(0)); 1245 } 1246 1247 1248 /* 1249 * hermon_cqhdl_from_cqnum() 1250 * Context: Can be called from interrupt or base context. 1251 * 1252 * This routine is important because changing the unconstrained 1253 * portion of the CQ number is critical to the detection of a 1254 * potential race condition in the CQ handler code (i.e. the case 1255 * where a CQ is freed and alloc'd again before an event for the 1256 * "old" CQ can be handled). 1257 * 1258 * While this is not a perfect solution (not sure that one exists) 1259 * it does help to mitigate the chance that this race condition will 1260 * cause us to deliver a "stale" event to the new CQ owner. Note: 1261 * this solution does not scale well because the number of constrained 1262 * bits increases (and, hence, the number of unconstrained bits 1263 * decreases) as the number of supported CQs grows. For small and 1264 * intermediate values, it should hopefully provide sufficient 1265 * protection. 1266 */ 1267 hermon_cqhdl_t 1268 hermon_cqhdl_from_cqnum(hermon_state_t *state, uint_t cqnum) 1269 { 1270 uint_t cqindx, cqmask; 1271 1272 /* Calculate the CQ table index from the cqnum */ 1273 cqmask = (1 << state->hs_cfg_profile->cp_log_num_cq) - 1; 1274 cqindx = cqnum & cqmask; 1275 return (hermon_icm_num_to_hdl(state, HERMON_CQC, cqindx)); 1276 } 1277 1278 /* 1279 * hermon_cq_cqe_consume() 1280 * Context: Can be called from interrupt or base context. 1281 */ 1282 static void 1283 hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq, 1284 hermon_hw_cqe_t *cqe, ibt_wc_t *wc) 1285 { 1286 uint_t opcode, qpnum, qp1_indx; 1287 ibt_wc_flags_t flags; 1288 ibt_wrc_opcode_t type; 1289 1290 /* 1291 * Determine if this is an "error" CQE by examining "opcode". If it 1292 * is an error CQE, then call hermon_cq_errcqe_consume() and return 1293 * whatever status it returns. Otherwise, this is a successful 1294 * completion. 1295 */ 1296 opcode = HERMON_CQE_OPCODE_GET(cq, cqe); 1297 if ((opcode == HERMON_CQE_SEND_ERR_OPCODE) || 1298 (opcode == HERMON_CQE_RECV_ERR_OPCODE)) { 1299 hermon_cq_errcqe_consume(state, cq, cqe, wc); 1300 return; 1301 } 1302 1303 /* 1304 * Fetch the Work Request ID using the information in the CQE. 1305 * See hermon_wr.c for more details. 1306 */ 1307 wc->wc_id = hermon_wrid_get_entry(cq, cqe); 1308 1309 /* 1310 * Parse the CQE opcode to determine completion type. This will set 1311 * not only the type of the completion, but also any flags that might 1312 * be associated with it (e.g. whether immediate data is present). 1313 */ 1314 flags = IBT_WC_NO_FLAGS; 1315 if (HERMON_CQE_SENDRECV_GET(cq, cqe) != HERMON_COMPLETION_RECV) { 1316 1317 /* Send CQE */ 1318 switch (opcode) { 1319 case HERMON_CQE_SND_RDMAWR_IMM: 1320 case HERMON_CQE_SND_RDMAWR: 1321 type = IBT_WRC_RDMAW; 1322 break; 1323 1324 case HERMON_CQE_SND_SEND_INV: 1325 case HERMON_CQE_SND_SEND_IMM: 1326 case HERMON_CQE_SND_SEND: 1327 type = IBT_WRC_SEND; 1328 break; 1329 1330 case HERMON_CQE_SND_LSO: 1331 type = IBT_WRC_SEND_LSO; 1332 break; 1333 1334 case HERMON_CQE_SND_RDMARD: 1335 type = IBT_WRC_RDMAR; 1336 break; 1337 1338 case HERMON_CQE_SND_ATOMIC_CS: 1339 type = IBT_WRC_CSWAP; 1340 break; 1341 1342 case HERMON_CQE_SND_ATOMIC_FA: 1343 type = IBT_WRC_FADD; 1344 break; 1345 1346 case HERMON_CQE_SND_BIND_MW: 1347 type = IBT_WRC_BIND; 1348 break; 1349 1350 case HERMON_CQE_SND_FRWR: 1351 type = IBT_WRC_FAST_REG_PMR; 1352 break; 1353 1354 case HERMON_CQE_SND_LCL_INV: 1355 type = IBT_WRC_LOCAL_INVALIDATE; 1356 break; 1357 1358 default: 1359 HERMON_WARNING(state, "unknown send CQE type"); 1360 wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR; 1361 return; 1362 } 1363 } else if ((state->hs_fcoib_may_be_running == B_TRUE) && 1364 hermon_fcoib_is_fexch_qpn(state, HERMON_CQE_QPNUM_GET(cq, cqe))) { 1365 type = IBT_WRC_RECV; 1366 if (HERMON_CQE_FEXCH_DIFE(cq, cqe)) 1367 flags |= IBT_WC_DIF_ERROR; 1368 wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe); 1369 wc->wc_fexch_seq_cnt = HERMON_CQE_FEXCH_SEQ_CNT(cq, cqe); 1370 wc->wc_fexch_tx_bytes_xfer = HERMON_CQE_FEXCH_TX_BYTES(cq, cqe); 1371 wc->wc_fexch_rx_bytes_xfer = HERMON_CQE_FEXCH_RX_BYTES(cq, cqe); 1372 wc->wc_fexch_seq_id = HERMON_CQE_FEXCH_SEQ_ID(cq, cqe); 1373 wc->wc_detail = HERMON_CQE_FEXCH_DETAIL(cq, cqe) & 1374 IBT_WC_DETAIL_FC_MATCH_MASK; 1375 wc->wc_rkey = HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe); 1376 flags |= IBT_WC_FEXCH_FMT | IBT_WC_RKEY_INVALIDATED; 1377 } else { 1378 /* 1379 * Parse the remaining contents of the CQE into the work 1380 * completion. This means filling in SL, QP number, SLID, 1381 * immediate data, etc. 1382 * 1383 * Note: Not all of these fields are valid in a given 1384 * completion. Many of them depend on the actual type of 1385 * completion. So we fill in all of the fields and leave 1386 * it up to the IBTF and consumer to sort out which are 1387 * valid based on their context. 1388 */ 1389 wc->wc_sl = HERMON_CQE_SL_GET(cq, cqe); 1390 wc->wc_qpn = HERMON_CQE_DQPN_GET(cq, cqe); 1391 wc->wc_slid = HERMON_CQE_DLID_GET(cq, cqe); 1392 wc->wc_immed_data = 1393 HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe); 1394 wc->wc_ethertype = (wc->wc_immed_data & 0xFFFF); 1395 wc->wc_pkey_ix = (wc->wc_immed_data & 1396 ((1 << state->hs_queryport.log_max_pkey) - 1)); 1397 /* 1398 * Fill in "bytes transferred" as appropriate. Also, 1399 * if necessary, fill in the "path bits" field. 1400 */ 1401 wc->wc_path_bits = HERMON_CQE_PATHBITS_GET(cq, cqe); 1402 wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe); 1403 1404 /* 1405 * Check for GRH, update the flags, then fill in "wc_flags" 1406 * field in the work completion 1407 */ 1408 if (HERMON_CQE_GRH_GET(cq, cqe) != 0) { 1409 flags |= IBT_WC_GRH_PRESENT; 1410 } 1411 1412 /* Receive CQE */ 1413 switch (opcode) { 1414 case HERMON_CQE_RCV_SEND_IMM: 1415 /* 1416 * Note: According to the PRM, all QP1 recv 1417 * completions look like the result of a Send with 1418 * Immediate. They are not, however, (MADs are Send 1419 * Only) so we need to check the QP number and set 1420 * the flag only if it is non-QP1. 1421 */ 1422 qpnum = HERMON_CQE_QPNUM_GET(cq, cqe); 1423 qp1_indx = state->hs_spec_qp1->hr_indx; 1424 if ((qpnum < qp1_indx) || (qpnum > qp1_indx + 1)) { 1425 flags |= IBT_WC_IMMED_DATA_PRESENT; 1426 } 1427 /* FALLTHROUGH */ 1428 1429 case HERMON_CQE_RCV_SEND: 1430 type = IBT_WRC_RECV; 1431 if (HERMON_CQE_IS_IPOK(cq, cqe)) { 1432 wc->wc_cksum = HERMON_CQE_CKSUM(cq, cqe); 1433 flags |= IBT_WC_CKSUM_OK; 1434 wc->wc_detail = IBT_WC_DETAIL_ALL_FLAGS_MASK & 1435 HERMON_CQE_IPOIB_STATUS(cq, cqe); 1436 } 1437 break; 1438 1439 case HERMON_CQE_RCV_SEND_INV: 1440 type = IBT_WRC_RECV; 1441 flags |= IBT_WC_RKEY_INVALIDATED; 1442 wc->wc_rkey = wc->wc_immed_data; /* same field in cqe */ 1443 break; 1444 1445 case HERMON_CQE_RCV_RDMAWR_IMM: 1446 flags |= IBT_WC_IMMED_DATA_PRESENT; 1447 type = IBT_WRC_RECV_RDMAWI; 1448 break; 1449 1450 default: 1451 1452 HERMON_WARNING(state, "unknown recv CQE type"); 1453 wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR; 1454 return; 1455 } 1456 } 1457 wc->wc_type = type; 1458 wc->wc_flags = flags; 1459 wc->wc_status = IBT_WC_SUCCESS; 1460 } 1461 1462 /* 1463 * hermon_cq_errcqe_consume() 1464 * Context: Can be called from interrupt or base context. 1465 */ 1466 static void 1467 hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq, 1468 hermon_hw_cqe_t *cqe, ibt_wc_t *wc) 1469 { 1470 uint32_t imm_eth_pkey_cred; 1471 uint_t status; 1472 ibt_wc_status_t ibt_status; 1473 1474 /* 1475 * Fetch the Work Request ID using the information in the CQE. 1476 * See hermon_wr.c for more details. 1477 */ 1478 wc->wc_id = hermon_wrid_get_entry(cq, cqe); 1479 1480 /* 1481 * Parse the CQE opcode to determine completion type. We know that 1482 * the CQE is an error completion, so we extract only the completion 1483 * status/syndrome here. 1484 */ 1485 imm_eth_pkey_cred = HERMON_CQE_ERROR_SYNDROME_GET(cq, cqe); 1486 status = imm_eth_pkey_cred; 1487 if (status != HERMON_CQE_WR_FLUSHED_ERR) 1488 IBTF_DPRINTF_L2("CQE ERR", "cqe %p QPN %x indx %x status 0x%x " 1489 "vendor syndrome %x", cqe, HERMON_CQE_QPNUM_GET(cq, cqe), 1490 HERMON_CQE_WQECNTR_GET(cq, cqe), status, 1491 HERMON_CQE_ERROR_VENDOR_SYNDROME_GET(cq, cqe)); 1492 switch (status) { 1493 case HERMON_CQE_LOC_LEN_ERR: 1494 HERMON_WARNING(state, HERMON_FMA_LOCLEN); 1495 ibt_status = IBT_WC_LOCAL_LEN_ERR; 1496 break; 1497 1498 case HERMON_CQE_LOC_OP_ERR: 1499 HERMON_WARNING(state, HERMON_FMA_LOCQPOP); 1500 ibt_status = IBT_WC_LOCAL_QP_OP_ERR; 1501 break; 1502 1503 case HERMON_CQE_LOC_PROT_ERR: 1504 HERMON_WARNING(state, HERMON_FMA_LOCPROT); 1505 ibt_status = IBT_WC_LOCAL_PROTECT_ERR; 1506 IBTF_DPRINTF_L2("ERRCQE", "is at %p", cqe); 1507 if (hermon_should_panic) { 1508 cmn_err(CE_PANIC, "Hermon intentional PANIC - " 1509 "Local Protection Error\n"); 1510 } 1511 break; 1512 1513 case HERMON_CQE_WR_FLUSHED_ERR: 1514 ibt_status = IBT_WC_WR_FLUSHED_ERR; 1515 break; 1516 1517 case HERMON_CQE_MW_BIND_ERR: 1518 HERMON_WARNING(state, HERMON_FMA_MWBIND); 1519 ibt_status = IBT_WC_MEM_WIN_BIND_ERR; 1520 break; 1521 1522 case HERMON_CQE_BAD_RESPONSE_ERR: 1523 HERMON_WARNING(state, HERMON_FMA_RESP); 1524 ibt_status = IBT_WC_BAD_RESPONSE_ERR; 1525 break; 1526 1527 case HERMON_CQE_LOCAL_ACCESS_ERR: 1528 HERMON_WARNING(state, HERMON_FMA_LOCACC); 1529 ibt_status = IBT_WC_LOCAL_ACCESS_ERR; 1530 break; 1531 1532 case HERMON_CQE_REM_INV_REQ_ERR: 1533 HERMON_WARNING(state, HERMON_FMA_REMREQ); 1534 ibt_status = IBT_WC_REMOTE_INVALID_REQ_ERR; 1535 break; 1536 1537 case HERMON_CQE_REM_ACC_ERR: 1538 HERMON_WARNING(state, HERMON_FMA_REMACC); 1539 ibt_status = IBT_WC_REMOTE_ACCESS_ERR; 1540 break; 1541 1542 case HERMON_CQE_REM_OP_ERR: 1543 HERMON_WARNING(state, HERMON_FMA_REMOP); 1544 ibt_status = IBT_WC_REMOTE_OP_ERR; 1545 break; 1546 1547 case HERMON_CQE_TRANS_TO_ERR: 1548 HERMON_WARNING(state, HERMON_FMA_XPORTCNT); 1549 ibt_status = IBT_WC_TRANS_TIMEOUT_ERR; 1550 break; 1551 1552 case HERMON_CQE_RNRNAK_TO_ERR: 1553 HERMON_WARNING(state, HERMON_FMA_RNRCNT); 1554 ibt_status = IBT_WC_RNR_NAK_TIMEOUT_ERR; 1555 break; 1556 1557 /* 1558 * The following error codes are not supported in the Hermon driver 1559 * as they relate only to Reliable Datagram completion statuses: 1560 * case HERMON_CQE_LOCAL_RDD_VIO_ERR: 1561 * case HERMON_CQE_REM_INV_RD_REQ_ERR: 1562 * case HERMON_CQE_EEC_REM_ABORTED_ERR: 1563 * case HERMON_CQE_INV_EEC_NUM_ERR: 1564 * case HERMON_CQE_INV_EEC_STATE_ERR: 1565 * case HERMON_CQE_LOC_EEC_ERR: 1566 */ 1567 1568 default: 1569 HERMON_WARNING(state, "unknown error CQE status"); 1570 HERMON_FMANOTE(state, HERMON_FMA_UNKN); 1571 ibt_status = IBT_WC_LOCAL_QP_OP_ERR; 1572 break; 1573 } 1574 1575 wc->wc_status = ibt_status; 1576 } 1577 1578 1579 /* 1580 * hermon_cq_resize_helper() 1581 * Context: Can be called only from user or kernel context. 1582 */ 1583 void 1584 hermon_cq_resize_helper(hermon_state_t *state, hermon_cqhdl_t cq) 1585 { 1586 hermon_cqhdl_t resize_hdl; 1587 int status; 1588 1589 /* 1590 * we're here because we found the special cqe opcode, so we have 1591 * to update the cq_handle, release the old resources, clear the 1592 * flag in the cq_hdl, and release the resize_hdl. When we return 1593 * above, it will take care of the rest 1594 */ 1595 ASSERT(MUTEX_HELD(&cq->cq_lock)); 1596 1597 resize_hdl = cq->cq_resize_hdl; 1598 1599 /* 1600 * Deregister the memory for the old Completion Queue. Note: We 1601 * really can't return error here because we have no good way to 1602 * cleanup. Plus, the deregistration really shouldn't ever happen. 1603 * So, if it does, it is an indication that something has gone 1604 * seriously wrong. So we print a warning message and return error 1605 * (knowing, of course, that the "old" CQ memory will be leaked) 1606 */ 1607 status = hermon_mr_deregister(state, &cq->cq_mrhdl, HERMON_MR_DEREG_ALL, 1608 HERMON_SLEEP); 1609 if (status != DDI_SUCCESS) { 1610 HERMON_WARNING(state, "failed to deregister old CQ memory"); 1611 } 1612 1613 /* Next, free the memory from the old CQ buffer */ 1614 hermon_queue_free(&cq->cq_cqinfo); 1615 1616 /* now we can update the cq_hdl with the new things saved */ 1617 1618 cq->cq_buf = resize_hdl->cq_buf; 1619 cq->cq_mrhdl = resize_hdl->cq_mrhdl; 1620 cq->cq_bufsz = resize_hdl->cq_bufsz; 1621 cq->cq_log_cqsz = resize_hdl->cq_log_cqsz; 1622 cq->cq_umap_dhp = cq->cq_resize_hdl->cq_umap_dhp; 1623 cq->cq_resize_hdl = 0; 1624 bcopy(&resize_hdl->cq_cqinfo, &cq->cq_cqinfo, 1625 sizeof (struct hermon_qalloc_info_s)); 1626 1627 /* finally, release the resizing handle */ 1628 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s)); 1629 } 1630 1631 1632 /* 1633 * hermon_cq_entries_flush() 1634 * Context: Can be called from interrupt or base context. 1635 */ 1636 /* ARGSUSED */ 1637 void 1638 hermon_cq_entries_flush(hermon_state_t *state, hermon_qphdl_t qp) 1639 { 1640 hermon_cqhdl_t cq; 1641 hermon_hw_cqe_t *cqe, *next_cqe; 1642 hermon_srqhdl_t srq; 1643 hermon_workq_hdr_t *wq; 1644 uint32_t cons_indx, tail_cons_indx, wrap_around_mask; 1645 uint32_t new_indx, check_indx, qpnum; 1646 uint32_t shift, mask; 1647 int outstanding_cqes; 1648 1649 qpnum = qp->qp_qpnum; 1650 if ((srq = qp->qp_srqhdl) != NULL) 1651 wq = qp->qp_srqhdl->srq_wq_wqhdr; 1652 else 1653 wq = NULL; 1654 cq = qp->qp_rq_cqhdl; 1655 1656 if (cq == NULL) { 1657 cq = qp->qp_sq_cqhdl; 1658 } 1659 1660 do_send_cq: /* loop back to here if send_cq is not the same as recv_cq */ 1661 if (cq == NULL) 1662 return; 1663 1664 cons_indx = cq->cq_consindx; 1665 shift = cq->cq_log_cqsz; 1666 mask = cq->cq_bufsz; 1667 wrap_around_mask = mask - 1; 1668 1669 /* Calculate the pointer to the first CQ entry */ 1670 cqe = &cq->cq_buf[cons_indx & wrap_around_mask]; 1671 1672 /* 1673 * Loop through the CQ looking for entries owned by software. If an 1674 * entry is owned by software then we increment an 'outstanding_cqes' 1675 * count to know how many entries total we have on our CQ. We use this 1676 * value further down to know how many entries to loop through looking 1677 * for our same QP number. 1678 */ 1679 outstanding_cqes = 0; 1680 tail_cons_indx = cons_indx; 1681 while (HERMON_CQE_OWNER_IS_SW(cq, cqe, tail_cons_indx, shift, mask)) { 1682 /* increment total cqes count */ 1683 outstanding_cqes++; 1684 1685 /* increment the consumer index */ 1686 tail_cons_indx++; 1687 1688 /* update the pointer to the next cq entry */ 1689 cqe = &cq->cq_buf[tail_cons_indx & wrap_around_mask]; 1690 } 1691 1692 /* 1693 * Using the 'tail_cons_indx' that was just set, we now know how many 1694 * total CQEs possible there are. Set the 'check_indx' and the 1695 * 'new_indx' to the last entry identified by 'tail_cons_indx' 1696 */ 1697 check_indx = new_indx = (tail_cons_indx - 1); 1698 1699 while (--outstanding_cqes >= 0) { 1700 cqe = &cq->cq_buf[check_indx & wrap_around_mask]; 1701 1702 /* 1703 * If the QP number is the same in the CQE as the QP, then 1704 * we must "consume" it. If it is for an SRQ wqe, then we 1705 * also must free the wqe back onto the free list of the SRQ. 1706 */ 1707 if (qpnum == HERMON_CQE_QPNUM_GET(cq, cqe)) { 1708 if (srq && (HERMON_CQE_SENDRECV_GET(cq, cqe) == 1709 HERMON_COMPLETION_RECV)) { 1710 uint64_t *desc; 1711 int indx; 1712 1713 /* Add wqe back to SRQ free list */ 1714 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) & 1715 wq->wq_mask; 1716 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail); 1717 ((uint16_t *)desc)[1] = htons(indx); 1718 wq->wq_tail = indx; 1719 } 1720 } else { /* CQEs for other QPNs need to remain */ 1721 if (check_indx != new_indx) { 1722 next_cqe = 1723 &cq->cq_buf[new_indx & wrap_around_mask]; 1724 /* Copy the CQE into the "next_cqe" pointer. */ 1725 bcopy(cqe, next_cqe, sizeof (hermon_hw_cqe_t)); 1726 } 1727 new_indx--; /* move index to next CQE to fill */ 1728 } 1729 check_indx--; /* move index to next CQE to check */ 1730 } 1731 1732 /* 1733 * Update consumer index to be the 'new_indx'. This moves it past all 1734 * removed entries. Because 'new_indx' is pointing to the last 1735 * previously valid SW owned entry, we add 1 to point the cons_indx to 1736 * the first HW owned entry. 1737 */ 1738 cons_indx = (new_indx + 1); 1739 1740 /* 1741 * Now we only ring the doorbell (to update the consumer index) if 1742 * we've actually consumed a CQ entry. If we found no QP number 1743 * matches above, then we would not have removed anything. So only if 1744 * something was removed do we ring the doorbell. 1745 */ 1746 if (cq->cq_consindx != cons_indx) { 1747 /* 1748 * Update the consumer index in both the CQ handle and the 1749 * doorbell record. 1750 */ 1751 cq->cq_consindx = cons_indx; 1752 1753 hermon_cq_update_ci_doorbell(cq); 1754 1755 } 1756 if (cq != qp->qp_sq_cqhdl) { 1757 cq = qp->qp_sq_cqhdl; 1758 goto do_send_cq; 1759 } 1760 } 1761 1762 /* 1763 * hermon_get_cq_sched_list() 1764 * Context: Only called from attach() path context 1765 * 1766 * Read properties, creating entries in hs_cq_sched_list with 1767 * information about the requested "expected" and "minimum" 1768 * number of MSI-X interrupt vectors per list entry. 1769 */ 1770 static int 1771 hermon_get_cq_sched_list(hermon_state_t *state) 1772 { 1773 char **listp, ulp_prop[HERMON_CQH_MAX + 4]; 1774 uint_t nlist, i, j, ndata; 1775 int *data; 1776 size_t len; 1777 hermon_cq_sched_t *cq_schedp; 1778 1779 if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, state->hs_dip, 1780 DDI_PROP_DONTPASS, "cqh-group-list", &listp, &nlist) != 1781 DDI_PROP_SUCCESS) 1782 return (0); 1783 1784 state->hs_cq_sched_array_size = nlist; 1785 state->hs_cq_sched_array = cq_schedp = kmem_zalloc(nlist * 1786 sizeof (hermon_cq_sched_t), KM_SLEEP); 1787 for (i = 0; i < nlist; i++) { 1788 if ((len = strlen(listp[i])) >= HERMON_CQH_MAX) { 1789 cmn_err(CE_CONT, "'cqh' property name too long\n"); 1790 goto game_over; 1791 } 1792 for (j = 0; j < i; j++) { 1793 if (strcmp(listp[j], listp[i]) == 0) { 1794 cmn_err(CE_CONT, "Duplicate 'cqh' property\n"); 1795 goto game_over; 1796 } 1797 } 1798 (void) strncpy(cq_schedp[i].cqs_name, listp[i], HERMON_CQH_MAX); 1799 ulp_prop[0] = 'c'; 1800 ulp_prop[1] = 'q'; 1801 ulp_prop[2] = 'h'; 1802 ulp_prop[3] = '-'; 1803 (void) strncpy(ulp_prop + 4, listp[i], len + 1); 1804 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip, 1805 DDI_PROP_DONTPASS, ulp_prop, &data, &ndata) != 1806 DDI_PROP_SUCCESS) { 1807 cmn_err(CE_CONT, "property '%s' not found\n", ulp_prop); 1808 goto game_over; 1809 } 1810 if (ndata != 2) { 1811 cmn_err(CE_CONT, "property '%s' does not " 1812 "have 2 integers\n", ulp_prop); 1813 goto game_over_free_data; 1814 } 1815 cq_schedp[i].cqs_desired = data[0]; 1816 cq_schedp[i].cqs_minimum = data[1]; 1817 cq_schedp[i].cqs_refcnt = 0; 1818 ddi_prop_free(data); 1819 } 1820 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip, 1821 DDI_PROP_DONTPASS, "cqh-default", &data, &ndata) != 1822 DDI_PROP_SUCCESS) { 1823 cmn_err(CE_CONT, "property 'cqh-default' not found\n"); 1824 goto game_over; 1825 } 1826 if (ndata != 2) { 1827 cmn_err(CE_CONT, "property 'cqh-default' does not " 1828 "have 2 integers\n"); 1829 goto game_over_free_data; 1830 } 1831 cq_schedp = &state->hs_cq_sched_default; 1832 cq_schedp->cqs_desired = data[0]; 1833 cq_schedp->cqs_minimum = data[1]; 1834 cq_schedp->cqs_refcnt = 0; 1835 ddi_prop_free(data); 1836 ddi_prop_free(listp); 1837 return (1); /* game on */ 1838 1839 game_over_free_data: 1840 ddi_prop_free(data); 1841 game_over: 1842 cmn_err(CE_CONT, "Error in 'cqh' properties in hermon.conf\n"); 1843 cmn_err(CE_CONT, "completion handler groups not being used\n"); 1844 kmem_free(cq_schedp, nlist * sizeof (hermon_cq_sched_t)); 1845 state->hs_cq_sched_array_size = 0; 1846 ddi_prop_free(listp); 1847 return (0); 1848 } 1849 1850 /* 1851 * hermon_cq_sched_init() 1852 * Context: Only called from attach() path context 1853 * 1854 * Read the hermon.conf properties looking for cq_sched info, 1855 * creating reserved pools of MSI-X interrupt ranges for the 1856 * specified ULPs. 1857 */ 1858 int 1859 hermon_cq_sched_init(hermon_state_t *state) 1860 { 1861 hermon_cq_sched_t *cq_schedp, *defp; 1862 int i, desired, array_size; 1863 1864 mutex_init(&state->hs_cq_sched_lock, NULL, MUTEX_DRIVER, 1865 DDI_INTR_PRI(state->hs_intrmsi_pri)); 1866 1867 mutex_enter(&state->hs_cq_sched_lock); 1868 state->hs_cq_sched_array = NULL; 1869 1870 /* initialize cq_sched_default */ 1871 defp = &state->hs_cq_sched_default; 1872 defp->cqs_start_hid = 1; 1873 defp->cqs_len = state->hs_intrmsi_allocd; 1874 defp->cqs_next_alloc = defp->cqs_len - 1; 1875 (void) strncpy(defp->cqs_name, "default", 8); 1876 1877 /* Read properties to determine which ULPs use cq_sched */ 1878 if (hermon_get_cq_sched_list(state) == 0) 1879 goto done; 1880 1881 /* Determine if we have enough vectors, or if we have to scale down */ 1882 desired = defp->cqs_desired; /* default desired (from hermon.conf) */ 1883 if (desired <= 0) 1884 goto done; /* all interrupts in the default pool */ 1885 cq_schedp = state->hs_cq_sched_array; 1886 array_size = state->hs_cq_sched_array_size; 1887 for (i = 0; i < array_size; i++) 1888 desired += cq_schedp[i].cqs_desired; 1889 if (desired > state->hs_intrmsi_allocd) { 1890 cmn_err(CE_CONT, "#interrupts allocated (%d) is less than " 1891 "the #interrupts desired (%d)\n", 1892 state->hs_intrmsi_allocd, desired); 1893 cmn_err(CE_CONT, "completion handler groups not being used\n"); 1894 goto done; /* all interrupts in the default pool */ 1895 } 1896 /* Game on. For each cq_sched group, reserve the MSI-X range */ 1897 for (i = 0; i < array_size; i++) { 1898 desired = cq_schedp[i].cqs_desired; 1899 cq_schedp[i].cqs_start_hid = defp->cqs_start_hid; 1900 cq_schedp[i].cqs_len = desired; 1901 cq_schedp[i].cqs_next_alloc = desired - 1; 1902 defp->cqs_len -= desired; 1903 defp->cqs_start_hid += desired; 1904 } 1905 /* reset default's start allocation seed */ 1906 state->hs_cq_sched_default.cqs_next_alloc = 1907 state->hs_cq_sched_default.cqs_len - 1; 1908 1909 done: 1910 mutex_exit(&state->hs_cq_sched_lock); 1911 return (IBT_SUCCESS); 1912 } 1913 1914 void 1915 hermon_cq_sched_fini(hermon_state_t *state) 1916 { 1917 mutex_enter(&state->hs_cq_sched_lock); 1918 if (state->hs_cq_sched_array_size) { 1919 kmem_free(state->hs_cq_sched_array, sizeof (hermon_cq_sched_t) * 1920 state->hs_cq_sched_array_size); 1921 state->hs_cq_sched_array_size = 0; 1922 state->hs_cq_sched_array = NULL; 1923 } 1924 mutex_exit(&state->hs_cq_sched_lock); 1925 mutex_destroy(&state->hs_cq_sched_lock); 1926 } 1927 1928 int 1929 hermon_cq_sched_alloc(hermon_state_t *state, ibt_cq_sched_attr_t *attr, 1930 hermon_cq_sched_t **cq_sched_pp) 1931 { 1932 hermon_cq_sched_t *cq_schedp; 1933 int i; 1934 char *name; 1935 ibt_cq_sched_flags_t flags; 1936 1937 flags = attr->cqs_flags; 1938 if ((flags & (IBT_CQS_SCHED_GROUP | IBT_CQS_EXACT_SCHED_GROUP)) == 0) { 1939 *cq_sched_pp = NULL; 1940 return (IBT_SUCCESS); 1941 } 1942 name = attr->cqs_pool_name; 1943 1944 mutex_enter(&state->hs_cq_sched_lock); 1945 cq_schedp = state->hs_cq_sched_array; 1946 for (i = 0; i < state->hs_cq_sched_array_size; i++, cq_schedp++) { 1947 if (strcmp(name, cq_schedp->cqs_name) == 0) { 1948 if (cq_schedp->cqs_len != 0) 1949 cq_schedp->cqs_refcnt++; 1950 break; /* found it */ 1951 } 1952 } 1953 if ((i == state->hs_cq_sched_array_size) || /* not found, or */ 1954 (cq_schedp->cqs_len == 0)) /* defined, but no dedicated intr's */ 1955 cq_schedp = NULL; 1956 mutex_exit(&state->hs_cq_sched_lock); 1957 1958 *cq_sched_pp = cq_schedp; /* set to valid hdl, or to NULL */ 1959 if ((cq_schedp == NULL) && 1960 (attr->cqs_flags & IBT_CQS_EXACT_SCHED_GROUP)) 1961 return (IBT_CQ_NO_SCHED_GROUP); 1962 else 1963 return (IBT_SUCCESS); 1964 } 1965 1966 int 1967 hermon_cq_sched_free(hermon_state_t *state, hermon_cq_sched_t *cq_schedp) 1968 { 1969 if (cq_schedp != NULL) { 1970 /* Just decrement refcnt */ 1971 mutex_enter(&state->hs_cq_sched_lock); 1972 if (cq_schedp->cqs_refcnt == 0) 1973 HERMON_WARNING(state, "cq_sched free underflow\n"); 1974 else 1975 cq_schedp->cqs_refcnt--; 1976 mutex_exit(&state->hs_cq_sched_lock); 1977 } 1978 return (IBT_SUCCESS); 1979 }