1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * hermon_cq.c 28 * Hermon Completion Queue Processing Routines 29 * 30 * Implements all the routines necessary for allocating, freeing, resizing, 31 * and handling the completion type events that the Hermon hardware can 32 * generate. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/conf.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/modctl.h> 40 #include <sys/bitmap.h> 41 #include <sys/sysmacros.h> 42 43 #include <sys/ib/adapters/hermon/hermon.h> 44 45 int hermon_should_panic = 0; /* debugging aid */ 46 47 #define hermon_cq_update_ci_doorbell(cq) \ 48 /* Build the doorbell record data (low 24 bits only) */ \ 49 HERMON_UAR_DB_RECORD_WRITE(cq->cq_arm_ci_vdbr, \ 50 cq->cq_consindx & 0x00FFFFFF) 51 52 static int hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq, 53 uint_t cmd); 54 #pragma inline(hermon_cq_arm_doorbell) 55 static void hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr); 56 #pragma inline(hermon_arm_cq_dbr_init) 57 static void hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq, 58 hermon_hw_cqe_t *cqe, ibt_wc_t *wc); 59 static void hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq, 60 hermon_hw_cqe_t *cqe, ibt_wc_t *wc); 61 62 63 /* 64 * hermon_cq_alloc() 65 * Context: Can be called only from user or kernel context. 66 */ 67 int 68 hermon_cq_alloc(hermon_state_t *state, ibt_cq_hdl_t ibt_cqhdl, 69 ibt_cq_attr_t *cq_attr, uint_t *actual_size, hermon_cqhdl_t *cqhdl, 70 uint_t sleepflag) 71 { 72 hermon_rsrc_t *cqc, *rsrc; 73 hermon_umap_db_entry_t *umapdb; 74 hermon_hw_cqc_t cqc_entry; 75 hermon_cqhdl_t cq; 76 ibt_mr_attr_t mr_attr; 77 hermon_mr_options_t op; 78 hermon_pdhdl_t pd; 79 hermon_mrhdl_t mr; 80 hermon_hw_cqe_t *buf; 81 uint64_t value; 82 uint32_t log_cq_size, uarpg; 83 uint_t cq_is_umap; 84 uint32_t status, flag; 85 hermon_cq_sched_t *cq_schedp; 86 87 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq_attr)) 88 89 /* 90 * Determine whether CQ is being allocated for userland access or 91 * whether it is being allocated for kernel access. If the CQ is 92 * being allocated for userland access, then lookup the UAR 93 * page number for the current process. Note: If this is not found 94 * (e.g. if the process has not previously open()'d the Hermon driver), 95 * then an error is returned. 96 */ 97 cq_is_umap = (cq_attr->cq_flags & IBT_CQ_USER_MAP) ? 1 : 0; 98 if (cq_is_umap) { 99 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(), 100 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL); 101 if (status != DDI_SUCCESS) { 102 status = IBT_INVALID_PARAM; 103 goto cqalloc_fail; 104 } 105 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx; 106 } else { 107 uarpg = state->hs_kernel_uar_index; 108 } 109 110 /* Use the internal protection domain (PD) for setting up CQs */ 111 pd = state->hs_pdhdl_internal; 112 113 /* Increment the reference count on the protection domain (PD) */ 114 hermon_pd_refcnt_inc(pd); 115 116 /* 117 * Allocate an CQ context entry. This will be filled in with all 118 * the necessary parameters to define the Completion Queue. And then 119 * ownership will be passed to the hardware in the final step 120 * below. If we fail here, we must undo the protection domain 121 * reference count. 122 */ 123 status = hermon_rsrc_alloc(state, HERMON_CQC, 1, sleepflag, &cqc); 124 if (status != DDI_SUCCESS) { 125 status = IBT_INSUFF_RESOURCE; 126 goto cqalloc_fail1; 127 } 128 129 /* 130 * Allocate the software structure for tracking the completion queue 131 * (i.e. the Hermon Completion Queue handle). If we fail here, we must 132 * undo the protection domain reference count and the previous 133 * resource allocation. 134 */ 135 status = hermon_rsrc_alloc(state, HERMON_CQHDL, 1, sleepflag, &rsrc); 136 if (status != DDI_SUCCESS) { 137 status = IBT_INSUFF_RESOURCE; 138 goto cqalloc_fail2; 139 } 140 cq = (hermon_cqhdl_t)rsrc->hr_addr; 141 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq)) 142 cq->cq_is_umap = cq_is_umap; 143 cq->cq_cqnum = cqc->hr_indx; /* just use index, implicit in Hermon */ 144 cq->cq_intmod_count = 0; 145 cq->cq_intmod_usec = 0; 146 147 /* 148 * If this will be a user-mappable CQ, then allocate an entry for 149 * the "userland resources database". This will later be added to 150 * the database (after all further CQ operations are successful). 151 * If we fail here, we must undo the reference counts and the 152 * previous resource allocation. 153 */ 154 if (cq->cq_is_umap) { 155 umapdb = hermon_umap_db_alloc(state->hs_instance, cq->cq_cqnum, 156 MLNX_UMAP_CQMEM_RSRC, (uint64_t)(uintptr_t)rsrc); 157 if (umapdb == NULL) { 158 status = IBT_INSUFF_RESOURCE; 159 goto cqalloc_fail3; 160 } 161 } 162 163 164 /* 165 * Allocate the doorbell record. We'll need one for the CQ, handling 166 * both consumer index (SET CI) and the CQ state (CQ ARM). 167 */ 168 169 status = hermon_dbr_alloc(state, uarpg, &cq->cq_arm_ci_dbr_acchdl, 170 &cq->cq_arm_ci_vdbr, &cq->cq_arm_ci_pdbr, &cq->cq_dbr_mapoffset); 171 if (status != DDI_SUCCESS) { 172 status = IBT_INSUFF_RESOURCE; 173 goto cqalloc_fail4; 174 } 175 176 /* 177 * Calculate the appropriate size for the completion queue. 178 * Note: All Hermon CQs must be a power-of-2 minus 1 in size. Also 179 * they may not be any smaller than HERMON_CQ_MIN_SIZE. This step is 180 * to round the requested size up to the next highest power-of-2 181 */ 182 cq_attr->cq_size = max(cq_attr->cq_size, HERMON_CQ_MIN_SIZE); 183 log_cq_size = highbit(cq_attr->cq_size); 184 185 /* 186 * Next we verify that the rounded-up size is valid (i.e. consistent 187 * with the device limits and/or software-configured limits) 188 */ 189 if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) { 190 status = IBT_HCA_CQ_EXCEEDED; 191 goto cqalloc_fail4a; 192 } 193 194 /* 195 * Allocate the memory for Completion Queue. 196 * 197 * Note: Although we use the common queue allocation routine, we 198 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in 199 * kernel system memory) for kernel CQs because it would be 200 * inefficient to have CQs located in DDR memory. This is primarily 201 * because CQs are read from (by software) more than they are written 202 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all 203 * user-mappable CQs for a similar reason.) 204 * It is also worth noting that, unlike Hermon QP work queues, 205 * completion queues do not have the same strict alignment 206 * requirements. It is sufficient for the CQ memory to be both 207 * aligned to and bound to addresses which are a multiple of CQE size. 208 */ 209 cq->cq_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t); 210 211 cq->cq_cqinfo.qa_alloc_align = PAGESIZE; 212 cq->cq_cqinfo.qa_bind_align = PAGESIZE; 213 if (cq->cq_is_umap) { 214 cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND; 215 } else { 216 cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL; 217 hermon_arm_cq_dbr_init(cq->cq_arm_ci_vdbr); 218 } 219 status = hermon_queue_alloc(state, &cq->cq_cqinfo, sleepflag); 220 if (status != DDI_SUCCESS) { 221 status = IBT_INSUFF_RESOURCE; 222 goto cqalloc_fail4; 223 } 224 buf = (hermon_hw_cqe_t *)cq->cq_cqinfo.qa_buf_aligned; 225 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf)) 226 227 /* 228 * The ownership bit of the CQE's is set by the HW during the process 229 * of transferrring ownership of the CQ (PRM 09.35c, 14.2.1, note D1 230 * 231 */ 232 233 /* 234 * Register the memory for the CQ. The memory for the CQ must 235 * be registered in the Hermon TPT tables. This gives us the LKey 236 * to specify in the CQ context below. Note: If this is a user- 237 * mappable CQ, then we will force DDI_DMA_CONSISTENT mapping. 238 */ 239 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP; 240 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf; 241 mr_attr.mr_len = cq->cq_cqinfo.qa_size; 242 mr_attr.mr_as = NULL; 243 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE; 244 op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass; 245 op.mro_bind_dmahdl = cq->cq_cqinfo.qa_dmahdl; 246 op.mro_bind_override_addr = 0; 247 status = hermon_mr_register(state, pd, &mr_attr, &mr, &op, 248 HERMON_CQ_CMPT); 249 if (status != DDI_SUCCESS) { 250 status = IBT_INSUFF_RESOURCE; 251 goto cqalloc_fail5; 252 } 253 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 254 255 cq->cq_erreqnum = HERMON_CQ_ERREQNUM_GET(state); 256 if (cq_attr->cq_flags & IBT_CQ_HID) { 257 if (!HERMON_HID_VALID(state, cq_attr->cq_hid)) { 258 IBTF_DPRINTF_L2("CQalloc", "bad handler id 0x%x", 259 cq_attr->cq_hid); 260 status = IBT_INVALID_PARAM; 261 goto cqalloc_fail5; 262 } 263 cq->cq_eqnum = HERMON_HID_TO_EQNUM(state, cq_attr->cq_hid); 264 IBTF_DPRINTF_L2("cqalloc", "hid: eqn %d", cq->cq_eqnum); 265 } else { 266 cq_schedp = (hermon_cq_sched_t *)cq_attr->cq_sched; 267 if (cq_schedp == NULL) { 268 cq_schedp = &state->hs_cq_sched_default; 269 } else if (cq_schedp != &state->hs_cq_sched_default) { 270 int i; 271 hermon_cq_sched_t *tmp; 272 273 tmp = state->hs_cq_sched_array; 274 for (i = 0; i < state->hs_cq_sched_array_size; i++) 275 if (cq_schedp == &tmp[i]) 276 break; /* found it */ 277 if (i >= state->hs_cq_sched_array_size) { 278 cmn_err(CE_CONT, "!Invalid cq_sched argument: " 279 "ignored\n"); 280 cq_schedp = &state->hs_cq_sched_default; 281 } 282 } 283 cq->cq_eqnum = HERMON_HID_TO_EQNUM(state, 284 HERMON_CQSCHED_NEXT_HID(cq_schedp)); 285 IBTF_DPRINTF_L2("cqalloc", "sched: first-1 %d, len %d, " 286 "eqn %d", cq_schedp->cqs_start_hid - 1, 287 cq_schedp->cqs_len, cq->cq_eqnum); 288 } 289 290 /* 291 * Fill in the CQC entry. This is the final step before passing 292 * ownership of the CQC entry to the Hermon hardware. We use all of 293 * the information collected/calculated above to fill in the 294 * requisite portions of the CQC. Note: If this CQ is going to be 295 * used for userland access, then we need to set the UAR page number 296 * appropriately (otherwise it's a "don't care") 297 */ 298 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t)); 299 300 cqc_entry.state = HERMON_CQ_DISARMED; 301 cqc_entry.pg_offs = cq->cq_cqinfo.qa_pgoffs >> 5; 302 cqc_entry.log_cq_sz = log_cq_size; 303 cqc_entry.usr_page = uarpg; 304 cqc_entry.c_eqn = cq->cq_eqnum; 305 cqc_entry.log2_pgsz = mr->mr_log2_pgsz; 306 cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF); 307 cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3; 308 cqc_entry.dbr_addrh = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 32); 309 cqc_entry.dbr_addrl = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 3); 310 311 /* 312 * Write the CQC entry to hardware - we pass ownership of 313 * the entry to the hardware (using the Hermon SW2HW_CQ firmware 314 * command). Note: In general, this operation shouldn't fail. But 315 * if it does, we have to undo everything we've done above before 316 * returning error. 317 */ 318 status = hermon_cmn_ownership_cmd_post(state, SW2HW_CQ, &cqc_entry, 319 sizeof (hermon_hw_cqc_t), cq->cq_cqnum, sleepflag); 320 if (status != HERMON_CMD_SUCCESS) { 321 cmn_err(CE_CONT, "Hermon: SW2HW_CQ command failed: %08x\n", 322 status); 323 if (status == HERMON_CMD_INVALID_STATUS) { 324 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 325 } 326 status = ibc_get_ci_failure(0); 327 goto cqalloc_fail6; 328 } 329 330 /* 331 * Fill in the rest of the Hermon Completion Queue handle. Having 332 * successfully transferred ownership of the CQC, we can update the 333 * following fields for use in further operations on the CQ. 334 */ 335 cq->cq_resize_hdl = 0; 336 cq->cq_cqcrsrcp = cqc; 337 cq->cq_rsrcp = rsrc; 338 cq->cq_consindx = 0; 339 /* least restrictive */ 340 cq->cq_buf = buf; 341 cq->cq_bufsz = (1 << log_cq_size); 342 cq->cq_log_cqsz = log_cq_size; 343 cq->cq_mrhdl = mr; 344 cq->cq_refcnt = 0; 345 cq->cq_is_special = 0; 346 cq->cq_uarpg = uarpg; 347 cq->cq_umap_dhp = (devmap_cookie_t)NULL; 348 avl_create(&cq->cq_wrid_wqhdr_avl_tree, hermon_wrid_workq_compare, 349 sizeof (struct hermon_workq_avl_s), 350 offsetof(struct hermon_workq_avl_s, wqa_link)); 351 352 cq->cq_hdlrarg = (void *)ibt_cqhdl; 353 354 /* 355 * Put CQ handle in Hermon CQNum-to-CQHdl list. Then fill in the 356 * "actual_size" and "cqhdl" and return success 357 */ 358 hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, cq); 359 360 /* 361 * If this is a user-mappable CQ, then we need to insert the previously 362 * allocated entry into the "userland resources database". This will 363 * allow for later lookup during devmap() (i.e. mmap()) calls. 364 */ 365 if (cq->cq_is_umap) { 366 hermon_umap_db_add(umapdb); 367 } 368 369 /* 370 * Fill in the return arguments (if necessary). This includes the 371 * real completion queue size. 372 */ 373 if (actual_size != NULL) { 374 *actual_size = (1 << log_cq_size) - 1; 375 } 376 *cqhdl = cq; 377 378 return (DDI_SUCCESS); 379 380 /* 381 * The following is cleanup for all possible failure cases in this routine 382 */ 383 cqalloc_fail6: 384 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL, 385 sleepflag) != DDI_SUCCESS) { 386 HERMON_WARNING(state, "failed to deregister CQ memory"); 387 } 388 cqalloc_fail5: 389 hermon_queue_free(&cq->cq_cqinfo); 390 cqalloc_fail4a: 391 hermon_dbr_free(state, uarpg, cq->cq_arm_ci_vdbr); 392 cqalloc_fail4: 393 if (cq_is_umap) { 394 hermon_umap_db_free(umapdb); 395 } 396 cqalloc_fail3: 397 hermon_rsrc_free(state, &rsrc); 398 cqalloc_fail2: 399 hermon_rsrc_free(state, &cqc); 400 cqalloc_fail1: 401 hermon_pd_refcnt_dec(pd); 402 cqalloc_fail: 403 return (status); 404 } 405 406 407 /* 408 * hermon_cq_free() 409 * Context: Can be called only from user or kernel context. 410 */ 411 /* ARGSUSED */ 412 int 413 hermon_cq_free(hermon_state_t *state, hermon_cqhdl_t *cqhdl, uint_t sleepflag) 414 { 415 hermon_rsrc_t *cqc, *rsrc; 416 hermon_umap_db_entry_t *umapdb; 417 hermon_hw_cqc_t cqc_entry; 418 hermon_pdhdl_t pd; 419 hermon_mrhdl_t mr; 420 hermon_cqhdl_t cq, resize; 421 uint32_t cqnum; 422 uint64_t value; 423 uint_t maxprot; 424 int status; 425 426 /* 427 * Pull all the necessary information from the Hermon Completion Queue 428 * handle. This is necessary here because the resource for the 429 * CQ handle is going to be freed up as part of this operation. 430 */ 431 cq = *cqhdl; 432 mutex_enter(&cq->cq_lock); 433 cqc = cq->cq_cqcrsrcp; 434 rsrc = cq->cq_rsrcp; 435 pd = state->hs_pdhdl_internal; 436 mr = cq->cq_mrhdl; 437 cqnum = cq->cq_cqnum; 438 439 resize = cq->cq_resize_hdl; /* save the handle for later */ 440 441 /* 442 * If there are work queues still associated with the CQ, then return 443 * an error. Otherwise, we will be holding the CQ lock. 444 */ 445 if (cq->cq_refcnt != 0) { 446 mutex_exit(&cq->cq_lock); 447 return (IBT_CQ_BUSY); 448 } 449 450 /* 451 * If this was a user-mappable CQ, then we need to remove its entry 452 * from the "userland resources database". If it is also currently 453 * mmap()'d out to a user process, then we need to call 454 * devmap_devmem_remap() to remap the CQ memory to an invalid mapping. 455 * We also need to invalidate the CQ tracking information for the 456 * user mapping. 457 */ 458 if (cq->cq_is_umap) { 459 status = hermon_umap_db_find(state->hs_instance, cqnum, 460 MLNX_UMAP_CQMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE, 461 &umapdb); 462 if (status != DDI_SUCCESS) { 463 mutex_exit(&cq->cq_lock); 464 HERMON_WARNING(state, "failed to find in database"); 465 return (ibc_get_ci_failure(0)); 466 } 467 hermon_umap_db_free(umapdb); 468 if (cq->cq_umap_dhp != NULL) { 469 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 470 status = devmap_devmem_remap(cq->cq_umap_dhp, 471 state->hs_dip, 0, 0, cq->cq_cqinfo.qa_size, 472 maxprot, DEVMAP_MAPPING_INVALID, NULL); 473 if (status != DDI_SUCCESS) { 474 mutex_exit(&cq->cq_lock); 475 HERMON_WARNING(state, "failed in CQ memory " 476 "devmap_devmem_remap()"); 477 return (ibc_get_ci_failure(0)); 478 } 479 cq->cq_umap_dhp = (devmap_cookie_t)NULL; 480 } 481 } 482 483 /* 484 * Put NULL into the Arbel CQNum-to-CQHdl list. This will allow any 485 * in-progress events to detect that the CQ corresponding to this 486 * number has been freed. 487 */ 488 hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, NULL); 489 490 mutex_exit(&cq->cq_lock); 491 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq)) 492 493 /* 494 * Reclaim CQC entry from hardware (using the Hermon HW2SW_CQ 495 * firmware command). If the ownership transfer fails for any reason, 496 * then it is an indication that something (either in HW or SW) has 497 * gone seriously wrong. 498 */ 499 status = hermon_cmn_ownership_cmd_post(state, HW2SW_CQ, &cqc_entry, 500 sizeof (hermon_hw_cqc_t), cqnum, sleepflag); 501 if (status != HERMON_CMD_SUCCESS) { 502 HERMON_WARNING(state, "failed to reclaim CQC ownership"); 503 cmn_err(CE_CONT, "Hermon: HW2SW_CQ command failed: %08x\n", 504 status); 505 if (status == HERMON_CMD_INVALID_STATUS) { 506 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 507 } 508 return (ibc_get_ci_failure(0)); 509 } 510 511 /* 512 * From here on, we start reliquishing resources - but check to see 513 * if a resize was in progress - if so, we need to relinquish those 514 * resources as well 515 */ 516 517 518 /* 519 * Deregister the memory for the Completion Queue. If this fails 520 * for any reason, then it is an indication that something (either 521 * in HW or SW) has gone seriously wrong. So we print a warning 522 * message and return. 523 */ 524 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL, 525 sleepflag); 526 if (status != DDI_SUCCESS) { 527 HERMON_WARNING(state, "failed to deregister CQ memory"); 528 return (ibc_get_ci_failure(0)); 529 } 530 531 if (resize) { /* there was a pointer to a handle */ 532 mr = resize->cq_mrhdl; /* reuse the pointer to the region */ 533 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL, 534 sleepflag); 535 if (status != DDI_SUCCESS) { 536 HERMON_WARNING(state, "failed to deregister resize CQ " 537 "memory"); 538 return (ibc_get_ci_failure(0)); 539 } 540 } 541 542 /* Free the memory for the CQ */ 543 hermon_queue_free(&cq->cq_cqinfo); 544 if (resize) { 545 hermon_queue_free(&resize->cq_cqinfo); 546 /* and the temporary handle */ 547 kmem_free(resize, sizeof (struct hermon_sw_cq_s)); 548 } 549 550 /* everything else does not matter for the resize in progress */ 551 552 /* Free the dbr */ 553 hermon_dbr_free(state, cq->cq_uarpg, cq->cq_arm_ci_vdbr); 554 555 /* Free the Hermon Completion Queue handle */ 556 hermon_rsrc_free(state, &rsrc); 557 558 /* Free up the CQC entry resource */ 559 hermon_rsrc_free(state, &cqc); 560 561 /* Decrement the reference count on the protection domain (PD) */ 562 hermon_pd_refcnt_dec(pd); 563 564 /* Set the cqhdl pointer to NULL and return success */ 565 *cqhdl = NULL; 566 567 return (DDI_SUCCESS); 568 } 569 570 571 /* 572 * hermon_cq_resize() 573 * Context: Can be called only from user or kernel context. 574 */ 575 int 576 hermon_cq_resize(hermon_state_t *state, hermon_cqhdl_t cq, uint_t req_size, 577 uint_t *actual_size, uint_t sleepflag) 578 { 579 hermon_hw_cqc_t cqc_entry; 580 hermon_cqhdl_t resize_hdl; 581 hermon_qalloc_info_t new_cqinfo; 582 ibt_mr_attr_t mr_attr; 583 hermon_mr_options_t op; 584 hermon_pdhdl_t pd; 585 hermon_mrhdl_t mr; 586 hermon_hw_cqe_t *buf; 587 uint32_t new_prod_indx; 588 uint_t log_cq_size; 589 int status, flag; 590 591 if (cq->cq_resize_hdl != 0) { /* already in process */ 592 status = IBT_CQ_BUSY; 593 goto cqresize_fail; 594 } 595 596 597 /* Use the internal protection domain (PD) for CQs */ 598 pd = state->hs_pdhdl_internal; 599 600 /* 601 * Calculate the appropriate size for the new resized completion queue. 602 * Note: All Hermon CQs must be a power-of-2 minus 1 in size. Also 603 * they may not be any smaller than HERMON_CQ_MIN_SIZE. This step is 604 * to round the requested size up to the next highest power-of-2 605 */ 606 req_size = max(req_size, HERMON_CQ_MIN_SIZE); 607 log_cq_size = highbit(req_size); 608 609 /* 610 * Next we verify that the rounded-up size is valid (i.e. consistent 611 * with the device limits and/or software-configured limits) 612 */ 613 if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) { 614 status = IBT_HCA_CQ_EXCEEDED; 615 goto cqresize_fail; 616 } 617 618 /* 619 * Allocate the memory for newly resized Completion Queue. 620 * 621 * Note: Although we use the common queue allocation routine, we 622 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in 623 * kernel system memory) for kernel CQs because it would be 624 * inefficient to have CQs located in DDR memory. This is the same 625 * as we do when we first allocate completion queues primarily 626 * because CQs are read from (by software) more than they are written 627 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all 628 * user-mappable CQs for a similar reason.) 629 * It is also worth noting that, unlike Hermon QP work queues, 630 * completion queues do not have the same strict alignment 631 * requirements. It is sufficient for the CQ memory to be both 632 * aligned to and bound to addresses which are a multiple of CQE size. 633 */ 634 635 /* first, alloc the resize_handle */ 636 resize_hdl = kmem_zalloc(sizeof (struct hermon_sw_cq_s), KM_SLEEP); 637 638 new_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t); 639 new_cqinfo.qa_alloc_align = PAGESIZE; 640 new_cqinfo.qa_bind_align = PAGESIZE; 641 if (cq->cq_is_umap) { 642 new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND; 643 } else { 644 new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL; 645 } 646 status = hermon_queue_alloc(state, &new_cqinfo, sleepflag); 647 if (status != DDI_SUCCESS) { 648 /* free the resize handle */ 649 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s)); 650 status = IBT_INSUFF_RESOURCE; 651 goto cqresize_fail; 652 } 653 buf = (hermon_hw_cqe_t *)new_cqinfo.qa_buf_aligned; 654 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf)) 655 656 /* 657 * No initialization of the cq is needed - the command will do it 658 */ 659 660 /* 661 * Register the memory for the CQ. The memory for the CQ must 662 * be registered in the Hermon TPT tables. This gives us the LKey 663 * to specify in the CQ context below. 664 */ 665 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP; 666 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf; 667 mr_attr.mr_len = new_cqinfo.qa_size; 668 mr_attr.mr_as = NULL; 669 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE; 670 op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass; 671 op.mro_bind_dmahdl = new_cqinfo.qa_dmahdl; 672 op.mro_bind_override_addr = 0; 673 status = hermon_mr_register(state, pd, &mr_attr, &mr, &op, 674 HERMON_CQ_CMPT); 675 if (status != DDI_SUCCESS) { 676 hermon_queue_free(&new_cqinfo); 677 /* free the resize handle */ 678 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s)); 679 status = IBT_INSUFF_RESOURCE; 680 goto cqresize_fail; 681 } 682 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 683 684 /* 685 * Now we grab the CQ lock. Since we will be updating the actual 686 * CQ location and the producer/consumer indexes, we should hold 687 * the lock. 688 * 689 * We do a ARBEL_NOSLEEP here (and below), though, because we are 690 * holding the "cq_lock" and if we got raised to interrupt level 691 * by priority inversion, we would not want to block in this routine 692 * waiting for success. 693 */ 694 mutex_enter(&cq->cq_lock); 695 696 /* 697 * Fill in the CQC entry. For the resize operation this is the 698 * final step before attempting the resize operation on the CQC entry. 699 * We use all of the information collected/calculated above to fill 700 * in the requisite portions of the CQC. 701 */ 702 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t)); 703 cqc_entry.log_cq_sz = log_cq_size; 704 cqc_entry.pg_offs = new_cqinfo.qa_pgoffs >> 5; 705 cqc_entry.log2_pgsz = mr->mr_log2_pgsz; 706 cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF); 707 cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3; 708 709 /* 710 * Write the CQC entry to hardware. Lastly, we pass ownership of 711 * the entry to the hardware (using the Hermon RESIZE_CQ firmware 712 * command). Note: In general, this operation shouldn't fail. But 713 * if it does, we have to undo everything we've done above before 714 * returning error. Also note that the status returned may indicate 715 * the code to return to the IBTF. 716 */ 717 status = hermon_resize_cq_cmd_post(state, &cqc_entry, cq->cq_cqnum, 718 &new_prod_indx, HERMON_CMD_NOSLEEP_SPIN); 719 if (status != HERMON_CMD_SUCCESS) { 720 /* Resize attempt has failed, drop CQ lock and cleanup */ 721 mutex_exit(&cq->cq_lock); 722 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL, 723 sleepflag) != DDI_SUCCESS) { 724 HERMON_WARNING(state, "failed to deregister CQ memory"); 725 } 726 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s)); 727 hermon_queue_free(&new_cqinfo); 728 if (status == HERMON_CMD_BAD_SIZE) { 729 return (IBT_CQ_SZ_INSUFFICIENT); 730 } else { 731 cmn_err(CE_CONT, "Hermon: RESIZE_CQ command failed: " 732 "%08x\n", status); 733 if (status == HERMON_CMD_INVALID_STATUS) { 734 hermon_fm_ereport(state, HCA_SYS_ERR, 735 HCA_ERR_SRV_LOST); 736 } 737 return (ibc_get_ci_failure(0)); 738 } 739 } 740 741 /* 742 * For Hermon, we've alloc'd another handle structure and save off the 743 * important things in it. Then, in polling we check to see if there's 744 * a "resizing handle" and if so we look for the "special CQE", opcode 745 * 0x16, that indicates the transition to the new buffer. 746 * 747 * At that point, we'll adjust everything - including dereg and 748 * freeing of the original buffer, updating all the necessary fields 749 * in the cq_hdl, and setting up for the next cqe polling 750 */ 751 752 resize_hdl->cq_buf = buf; 753 resize_hdl->cq_bufsz = (1 << log_cq_size); 754 resize_hdl->cq_mrhdl = mr; 755 resize_hdl->cq_log_cqsz = log_cq_size; 756 757 bcopy(&new_cqinfo, &(resize_hdl->cq_cqinfo), 758 sizeof (struct hermon_qalloc_info_s)); 759 760 /* now, save the address in the cq_handle */ 761 cq->cq_resize_hdl = resize_hdl; 762 763 /* 764 * Drop the CQ lock now. 765 */ 766 767 mutex_exit(&cq->cq_lock); 768 /* 769 * Fill in the return arguments (if necessary). This includes the 770 * real new completion queue size. 771 */ 772 if (actual_size != NULL) { 773 *actual_size = (1 << log_cq_size) - 1; 774 } 775 776 return (DDI_SUCCESS); 777 778 cqresize_fail: 779 return (status); 780 } 781 782 783 /* 784 * hermon_cq_modify() 785 * Context: Can be called base context. 786 */ 787 /* ARGSUSED */ 788 int 789 hermon_cq_modify(hermon_state_t *state, hermon_cqhdl_t cq, 790 uint_t count, uint_t usec, ibt_cq_handler_id_t hid, uint_t sleepflag) 791 { 792 int status; 793 hermon_hw_cqc_t cqc_entry; 794 795 mutex_enter(&cq->cq_lock); 796 if (count != cq->cq_intmod_count || 797 usec != cq->cq_intmod_usec) { 798 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t)); 799 cqc_entry.cq_max_cnt = count; 800 cqc_entry.cq_period = usec; 801 status = hermon_modify_cq_cmd_post(state, &cqc_entry, 802 cq->cq_cqnum, MODIFY_MODERATION_CQ, sleepflag); 803 if (status != HERMON_CMD_SUCCESS) { 804 mutex_exit(&cq->cq_lock); 805 cmn_err(CE_CONT, "Hermon: MODIFY_MODERATION_CQ " 806 "command failed: %08x\n", status); 807 if (status == HERMON_CMD_INVALID_STATUS) { 808 hermon_fm_ereport(state, HCA_SYS_ERR, 809 HCA_ERR_SRV_LOST); 810 } 811 return (ibc_get_ci_failure(0)); 812 } 813 cq->cq_intmod_count = count; 814 cq->cq_intmod_usec = usec; 815 } 816 if (hid && (hid - 1 != cq->cq_eqnum)) { 817 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t)); 818 cqc_entry.c_eqn = HERMON_HID_TO_EQNUM(state, hid); 819 status = hermon_modify_cq_cmd_post(state, &cqc_entry, 820 cq->cq_cqnum, MODIFY_EQN, sleepflag); 821 if (status != HERMON_CMD_SUCCESS) { 822 mutex_exit(&cq->cq_lock); 823 cmn_err(CE_CONT, "Hermon: MODIFY_EQN command failed: " 824 "%08x\n", status); 825 if (status == HERMON_CMD_INVALID_STATUS) { 826 hermon_fm_ereport(state, HCA_SYS_ERR, 827 HCA_ERR_SRV_LOST); 828 } 829 return (ibc_get_ci_failure(0)); 830 } 831 cq->cq_eqnum = hid - 1; 832 } 833 mutex_exit(&cq->cq_lock); 834 return (DDI_SUCCESS); 835 } 836 837 /* 838 * hermon_cq_notify() 839 * Context: Can be called from interrupt or base context. 840 */ 841 int 842 hermon_cq_notify(hermon_state_t *state, hermon_cqhdl_t cq, 843 ibt_cq_notify_flags_t flags) 844 { 845 uint_t cmd; 846 ibt_status_t status; 847 848 /* Validate IBT flags and call doorbell routine. */ 849 if (flags == IBT_NEXT_COMPLETION) { 850 cmd = HERMON_CQDB_NOTIFY_CQ; 851 } else if (flags == IBT_NEXT_SOLICITED) { 852 cmd = HERMON_CQDB_NOTIFY_CQ_SOLICIT; 853 } else { 854 return (IBT_CQ_NOTIFY_TYPE_INVALID); 855 } 856 857 status = hermon_cq_arm_doorbell(state, cq, cmd); 858 return (status); 859 } 860 861 862 /* 863 * hermon_cq_poll() 864 * Context: Can be called from interrupt or base context. 865 */ 866 int 867 hermon_cq_poll(hermon_state_t *state, hermon_cqhdl_t cq, ibt_wc_t *wc_p, 868 uint_t num_wc, uint_t *num_polled) 869 { 870 hermon_hw_cqe_t *cqe; 871 uint_t opcode; 872 uint32_t cons_indx, wrap_around_mask, shift, mask; 873 uint32_t polled_cnt, spec_op = 0; 874 int status; 875 876 /* 877 * Check for user-mappable CQ memory. Note: We do not allow kernel 878 * clients to poll CQ memory that is accessible directly by the user. 879 * If the CQ memory is user accessible, then return an error. 880 */ 881 if (cq->cq_is_umap) { 882 return (IBT_CQ_HDL_INVALID); 883 } 884 885 mutex_enter(&cq->cq_lock); 886 887 /* Get the consumer index */ 888 cons_indx = cq->cq_consindx; 889 shift = cq->cq_log_cqsz; 890 mask = cq->cq_bufsz; 891 892 /* 893 * Calculate the wrap around mask. Note: This operation only works 894 * because all Hermon completion queues have power-of-2 sizes 895 */ 896 wrap_around_mask = (cq->cq_bufsz - 1); 897 898 /* Calculate the pointer to the first CQ entry */ 899 cqe = &cq->cq_buf[cons_indx & wrap_around_mask]; 900 901 /* 902 * Keep pulling entries from the CQ until we find an entry owned by 903 * the hardware. As long as there the CQE's owned by SW, process 904 * each entry by calling hermon_cq_cqe_consume() and updating the CQ 905 * consumer index. Note: We only update the consumer index if 906 * hermon_cq_cqe_consume() returns HERMON_CQ_SYNC_AND_DB. Otherwise, 907 * it indicates that we are going to "recycle" the CQE (probably 908 * because it is a error CQE and corresponds to more than one 909 * completion). 910 */ 911 polled_cnt = 0; 912 while (HERMON_CQE_OWNER_IS_SW(cq, cqe, cons_indx, shift, mask)) { 913 if (cq->cq_resize_hdl != 0) { /* in midst of resize */ 914 /* peek at the opcode */ 915 opcode = HERMON_CQE_OPCODE_GET(cq, cqe); 916 if (opcode == HERMON_CQE_RCV_RESIZE_CODE) { 917 hermon_cq_resize_helper(state, cq); 918 919 /* Increment the consumer index */ 920 cons_indx = (cons_indx + 1); 921 spec_op = 1; /* plus one for the limiting CQE */ 922 923 wrap_around_mask = (cq->cq_bufsz - 1); 924 925 /* Update the pointer to the next CQ entry */ 926 cqe = &cq->cq_buf[cons_indx & wrap_around_mask]; 927 928 continue; 929 } 930 } /* in resizing CQ */ 931 932 /* 933 * either resizing and not the special opcode, or 934 * not resizing at all 935 */ 936 hermon_cq_cqe_consume(state, cq, cqe, &wc_p[polled_cnt++]); 937 938 /* Increment the consumer index */ 939 cons_indx = (cons_indx + 1); 940 941 /* Update the pointer to the next CQ entry */ 942 cqe = &cq->cq_buf[cons_indx & wrap_around_mask]; 943 944 /* 945 * If we have run out of space to store work completions, 946 * then stop and return the ones we have pulled of the CQ. 947 */ 948 if (polled_cnt >= num_wc) { 949 break; 950 } 951 } 952 953 /* 954 * Now we only ring the doorbell (to update the consumer index) if 955 * we've actually consumed a CQ entry. 956 */ 957 if ((polled_cnt != 0) && (cq->cq_consindx != cons_indx)) { 958 /* 959 * Update the consumer index in both the CQ handle and the 960 * doorbell record. 961 */ 962 cq->cq_consindx = cons_indx; 963 hermon_cq_update_ci_doorbell(cq); 964 965 } else if (polled_cnt == 0) { 966 if (spec_op != 0) { 967 /* if we got the special opcode, update the consindx */ 968 cq->cq_consindx = cons_indx; 969 hermon_cq_update_ci_doorbell(cq); 970 } 971 } 972 973 mutex_exit(&cq->cq_lock); 974 975 /* Set "num_polled" (if necessary) */ 976 if (num_polled != NULL) { 977 *num_polled = polled_cnt; 978 } 979 980 /* Set CQ_EMPTY condition if needed, otherwise return success */ 981 if (polled_cnt == 0) { 982 status = IBT_CQ_EMPTY; 983 } else { 984 status = DDI_SUCCESS; 985 } 986 987 /* 988 * Check if the system is currently panicking. If it is, then call 989 * the Hermon interrupt service routine. This step is necessary here 990 * because we might be in a polled I/O mode and without the call to 991 * hermon_isr() - and its subsequent calls to poll and rearm each 992 * event queue - we might overflow our EQs and render the system 993 * unable to sync/dump. 994 */ 995 if (ddi_in_panic() != 0) { 996 (void) hermon_isr((caddr_t)state, (caddr_t)NULL); 997 } 998 return (status); 999 } 1000 1001 /* 1002 * cmd_sn must be initialized to 1 to enable proper reenabling 1003 * by hermon_arm_cq_dbr_update(). 1004 */ 1005 static void 1006 hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr) 1007 { 1008 uint32_t *target; 1009 1010 target = (uint32_t *)cq_arm_dbr + 1; 1011 *target = htonl(1 << HERMON_CQDB_CMDSN_SHIFT); 1012 } 1013 1014 1015 /* 1016 * User cmd_sn needs help from this kernel function to know 1017 * when it should be incremented (modulo 4). We do an atomic 1018 * update of the arm_cq dbr to communicate this fact. We retry 1019 * in the case that user library is racing with us. We zero 1020 * out the cmd field so that the user library can use the cmd 1021 * field to track the last command it issued (solicited verses any). 1022 */ 1023 static void 1024 hermon_arm_cq_dbr_update(hermon_dbr_t *cq_arm_dbr) 1025 { 1026 uint32_t tmp, cmp, new; 1027 uint32_t old_cmd_sn, new_cmd_sn; 1028 uint32_t *target; 1029 int retries = 0; 1030 1031 target = (uint32_t *)cq_arm_dbr + 1; 1032 retry: 1033 cmp = *target; 1034 tmp = htonl(cmp); 1035 old_cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT); 1036 new_cmd_sn = (old_cmd_sn + (0x1 << HERMON_CQDB_CMDSN_SHIFT)) & 1037 (0x3 << HERMON_CQDB_CMDSN_SHIFT); 1038 new = htonl((tmp & ~(0x37 << HERMON_CQDB_CMD_SHIFT)) | new_cmd_sn); 1039 tmp = atomic_cas_32(target, cmp, new); 1040 if (tmp != cmp) { /* cas failed, so need to retry */ 1041 drv_usecwait(retries & 0xff); /* avoid race */ 1042 if (++retries > 100000) { 1043 cmn_err(CE_CONT, "cas failed in hermon\n"); 1044 retries = 0; 1045 } 1046 goto retry; 1047 } 1048 } 1049 1050 1051 /* 1052 * hermon_cq_handler() 1053 * Context: Only called from interrupt context 1054 */ 1055 /* ARGSUSED */ 1056 int 1057 hermon_cq_handler(hermon_state_t *state, hermon_eqhdl_t eq, 1058 hermon_hw_eqe_t *eqe) 1059 { 1060 hermon_cqhdl_t cq; 1061 uint_t cqnum; 1062 1063 /* Get the CQ handle from CQ number in event descriptor */ 1064 cqnum = HERMON_EQE_CQNUM_GET(eq, eqe); 1065 cq = hermon_cqhdl_from_cqnum(state, cqnum); 1066 1067 /* 1068 * If the CQ handle is NULL, this is probably an indication 1069 * that the CQ has been freed already. In which case, we 1070 * should not deliver this event. 1071 * 1072 * We also check that the CQ number in the handle is the 1073 * same as the CQ number in the event queue entry. This 1074 * extra check allows us to handle the case where a CQ was 1075 * freed and then allocated again in the time it took to 1076 * handle the event queue processing. By constantly incrementing 1077 * the non-constrained portion of the CQ number every time 1078 * a new CQ is allocated, we mitigate (somewhat) the chance 1079 * that a stale event could be passed to the client's CQ 1080 * handler. 1081 * 1082 * Lastly, we check if "hs_ibtfpriv" is NULL. If it is then it 1083 * means that we've have either received this event before we 1084 * finished attaching to the IBTF or we've received it while we 1085 * are in the process of detaching. 1086 */ 1087 if ((cq != NULL) && (cq->cq_cqnum == cqnum) && 1088 (state->hs_ibtfpriv != NULL)) { 1089 hermon_arm_cq_dbr_update(cq->cq_arm_ci_vdbr); 1090 HERMON_DO_IBTF_CQ_CALLB(state, cq); 1091 } 1092 1093 return (DDI_SUCCESS); 1094 } 1095 1096 1097 /* 1098 * hermon_cq_err_handler() 1099 * Context: Only called from interrupt context 1100 */ 1101 /* ARGSUSED */ 1102 int 1103 hermon_cq_err_handler(hermon_state_t *state, hermon_eqhdl_t eq, 1104 hermon_hw_eqe_t *eqe) 1105 { 1106 hermon_cqhdl_t cq; 1107 uint_t cqnum; 1108 ibc_async_event_t event; 1109 ibt_async_code_t type; 1110 1111 HERMON_FMANOTE(state, HERMON_FMA_OVERRUN); 1112 /* Get the CQ handle from CQ number in event descriptor */ 1113 cqnum = HERMON_EQE_CQNUM_GET(eq, eqe); 1114 cq = hermon_cqhdl_from_cqnum(state, cqnum); 1115 1116 /* 1117 * If the CQ handle is NULL, this is probably an indication 1118 * that the CQ has been freed already. In which case, we 1119 * should not deliver this event. 1120 * 1121 * We also check that the CQ number in the handle is the 1122 * same as the CQ number in the event queue entry. This 1123 * extra check allows us to handle the case where a CQ was 1124 * freed and then allocated again in the time it took to 1125 * handle the event queue processing. By constantly incrementing 1126 * the non-constrained portion of the CQ number every time 1127 * a new CQ is allocated, we mitigate (somewhat) the chance 1128 * that a stale event could be passed to the client's CQ 1129 * handler. 1130 * 1131 * And then we check if "hs_ibtfpriv" is NULL. If it is then it 1132 * means that we've have either received this event before we 1133 * finished attaching to the IBTF or we've received it while we 1134 * are in the process of detaching. 1135 */ 1136 if ((cq != NULL) && (cq->cq_cqnum == cqnum) && 1137 (state->hs_ibtfpriv != NULL)) { 1138 event.ev_cq_hdl = (ibt_cq_hdl_t)cq->cq_hdlrarg; 1139 type = IBT_ERROR_CQ; 1140 HERMON_DO_IBTF_ASYNC_CALLB(state, type, &event); 1141 } 1142 1143 return (DDI_SUCCESS); 1144 } 1145 1146 1147 /* 1148 * hermon_cq_refcnt_inc() 1149 * Context: Can be called from interrupt or base context. 1150 */ 1151 int 1152 hermon_cq_refcnt_inc(hermon_cqhdl_t cq, uint_t is_special) 1153 { 1154 /* 1155 * Increment the completion queue's reference count. Note: In order 1156 * to ensure compliance with IBA C11-15, we must ensure that a given 1157 * CQ is not used for both special (SMI/GSI) QP and non-special QP. 1158 * This is accomplished here by keeping track of how the referenced 1159 * CQ is being used. 1160 */ 1161 mutex_enter(&cq->cq_lock); 1162 if (cq->cq_refcnt == 0) { 1163 cq->cq_is_special = is_special; 1164 } else { 1165 if (cq->cq_is_special != is_special) { 1166 mutex_exit(&cq->cq_lock); 1167 return (DDI_FAILURE); 1168 } 1169 } 1170 cq->cq_refcnt++; 1171 mutex_exit(&cq->cq_lock); 1172 return (DDI_SUCCESS); 1173 } 1174 1175 1176 /* 1177 * hermon_cq_refcnt_dec() 1178 * Context: Can be called from interrupt or base context. 1179 */ 1180 void 1181 hermon_cq_refcnt_dec(hermon_cqhdl_t cq) 1182 { 1183 /* Decrement the completion queue's reference count */ 1184 mutex_enter(&cq->cq_lock); 1185 cq->cq_refcnt--; 1186 mutex_exit(&cq->cq_lock); 1187 } 1188 1189 1190 /* 1191 * hermon_cq_arm_doorbell() 1192 * Context: Can be called from interrupt or base context. 1193 */ 1194 static int 1195 hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq, uint_t cq_cmd) 1196 { 1197 uint32_t cq_num; 1198 uint32_t *target; 1199 uint32_t old_cmd, cmp, new, tmp, cmd_sn; 1200 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 1201 1202 /* initialize the FMA retry loop */ 1203 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num); 1204 1205 cq_num = cq->cq_cqnum; 1206 target = (uint32_t *)cq->cq_arm_ci_vdbr + 1; 1207 1208 /* the FMA retry loop starts for Hermon doorbell register. */ 1209 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, fm_status, 1210 fm_test_num); 1211 retry: 1212 cmp = *target; 1213 tmp = htonl(cmp); 1214 old_cmd = tmp & (0x7 << HERMON_CQDB_CMD_SHIFT); 1215 cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT); 1216 if (cq_cmd == HERMON_CQDB_NOTIFY_CQ) { 1217 if (old_cmd != HERMON_CQDB_NOTIFY_CQ) { 1218 cmd_sn |= (HERMON_CQDB_NOTIFY_CQ << 1219 HERMON_CQDB_CMD_SHIFT); 1220 new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF)); 1221 tmp = atomic_cas_32(target, cmp, new); 1222 if (tmp != cmp) 1223 goto retry; 1224 HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *) 1225 &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) << 1226 32) | (cq->cq_consindx & 0xFFFFFF)); 1227 } /* else it's already armed */ 1228 } else { 1229 ASSERT(cq_cmd == HERMON_CQDB_NOTIFY_CQ_SOLICIT); 1230 if (old_cmd != HERMON_CQDB_NOTIFY_CQ && 1231 old_cmd != HERMON_CQDB_NOTIFY_CQ_SOLICIT) { 1232 cmd_sn |= (HERMON_CQDB_NOTIFY_CQ_SOLICIT << 1233 HERMON_CQDB_CMD_SHIFT); 1234 new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF)); 1235 tmp = atomic_cas_32(target, cmp, new); 1236 if (tmp != cmp) 1237 goto retry; 1238 HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *) 1239 &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) << 1240 32) | (cq->cq_consindx & 0xFFFFFF)); 1241 } /* else it's already armed */ 1242 } 1243 1244 /* the FMA retry loop ends. */ 1245 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, fm_status, 1246 fm_test_num); 1247 1248 return (IBT_SUCCESS); 1249 1250 pio_error: 1251 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1252 return (ibc_get_ci_failure(0)); 1253 } 1254 1255 1256 /* 1257 * hermon_cqhdl_from_cqnum() 1258 * Context: Can be called from interrupt or base context. 1259 * 1260 * This routine is important because changing the unconstrained 1261 * portion of the CQ number is critical to the detection of a 1262 * potential race condition in the CQ handler code (i.e. the case 1263 * where a CQ is freed and alloc'd again before an event for the 1264 * "old" CQ can be handled). 1265 * 1266 * While this is not a perfect solution (not sure that one exists) 1267 * it does help to mitigate the chance that this race condition will 1268 * cause us to deliver a "stale" event to the new CQ owner. Note: 1269 * this solution does not scale well because the number of constrained 1270 * bits increases (and, hence, the number of unconstrained bits 1271 * decreases) as the number of supported CQs grows. For small and 1272 * intermediate values, it should hopefully provide sufficient 1273 * protection. 1274 */ 1275 hermon_cqhdl_t 1276 hermon_cqhdl_from_cqnum(hermon_state_t *state, uint_t cqnum) 1277 { 1278 uint_t cqindx, cqmask; 1279 1280 /* Calculate the CQ table index from the cqnum */ 1281 cqmask = (1 << state->hs_cfg_profile->cp_log_num_cq) - 1; 1282 cqindx = cqnum & cqmask; 1283 return (hermon_icm_num_to_hdl(state, HERMON_CQC, cqindx)); 1284 } 1285 1286 /* 1287 * hermon_cq_cqe_consume() 1288 * Context: Can be called from interrupt or base context. 1289 */ 1290 static void 1291 hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq, 1292 hermon_hw_cqe_t *cqe, ibt_wc_t *wc) 1293 { 1294 uint_t opcode, qpnum, qp1_indx; 1295 ibt_wc_flags_t flags; 1296 ibt_wrc_opcode_t type; 1297 1298 /* 1299 * Determine if this is an "error" CQE by examining "opcode". If it 1300 * is an error CQE, then call hermon_cq_errcqe_consume() and return 1301 * whatever status it returns. Otherwise, this is a successful 1302 * completion. 1303 */ 1304 opcode = HERMON_CQE_OPCODE_GET(cq, cqe); 1305 if ((opcode == HERMON_CQE_SEND_ERR_OPCODE) || 1306 (opcode == HERMON_CQE_RECV_ERR_OPCODE)) { 1307 hermon_cq_errcqe_consume(state, cq, cqe, wc); 1308 return; 1309 } 1310 1311 /* 1312 * Fetch the Work Request ID using the information in the CQE. 1313 * See hermon_wr.c for more details. 1314 */ 1315 wc->wc_id = hermon_wrid_get_entry(cq, cqe); 1316 1317 /* 1318 * Parse the CQE opcode to determine completion type. This will set 1319 * not only the type of the completion, but also any flags that might 1320 * be associated with it (e.g. whether immediate data is present). 1321 */ 1322 flags = IBT_WC_NO_FLAGS; 1323 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->hs_fcoib_may_be_running)) 1324 if (HERMON_CQE_SENDRECV_GET(cq, cqe) != HERMON_COMPLETION_RECV) { 1325 1326 /* Send CQE */ 1327 switch (opcode) { 1328 case HERMON_CQE_SND_RDMAWR_IMM: 1329 case HERMON_CQE_SND_RDMAWR: 1330 type = IBT_WRC_RDMAW; 1331 break; 1332 1333 case HERMON_CQE_SND_SEND_INV: 1334 case HERMON_CQE_SND_SEND_IMM: 1335 case HERMON_CQE_SND_SEND: 1336 type = IBT_WRC_SEND; 1337 break; 1338 1339 case HERMON_CQE_SND_LSO: 1340 type = IBT_WRC_SEND_LSO; 1341 break; 1342 1343 case HERMON_CQE_SND_RDMARD: 1344 type = IBT_WRC_RDMAR; 1345 break; 1346 1347 case HERMON_CQE_SND_ATOMIC_CS: 1348 type = IBT_WRC_CSWAP; 1349 break; 1350 1351 case HERMON_CQE_SND_ATOMIC_FA: 1352 type = IBT_WRC_FADD; 1353 break; 1354 1355 case HERMON_CQE_SND_BIND_MW: 1356 type = IBT_WRC_BIND; 1357 break; 1358 1359 case HERMON_CQE_SND_FRWR: 1360 type = IBT_WRC_FAST_REG_PMR; 1361 break; 1362 1363 case HERMON_CQE_SND_LCL_INV: 1364 type = IBT_WRC_LOCAL_INVALIDATE; 1365 break; 1366 1367 default: 1368 HERMON_WARNING(state, "unknown send CQE type"); 1369 wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR; 1370 return; 1371 } 1372 } else if ((state->hs_fcoib_may_be_running == B_TRUE) && 1373 hermon_fcoib_is_fexch_qpn(state, HERMON_CQE_QPNUM_GET(cq, cqe))) { 1374 type = IBT_WRC_RECV; 1375 if (HERMON_CQE_FEXCH_DIFE(cq, cqe)) 1376 flags |= IBT_WC_DIF_ERROR; 1377 wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe); 1378 wc->wc_fexch_seq_cnt = HERMON_CQE_FEXCH_SEQ_CNT(cq, cqe); 1379 wc->wc_fexch_tx_bytes_xfer = HERMON_CQE_FEXCH_TX_BYTES(cq, cqe); 1380 wc->wc_fexch_rx_bytes_xfer = HERMON_CQE_FEXCH_RX_BYTES(cq, cqe); 1381 wc->wc_fexch_seq_id = HERMON_CQE_FEXCH_SEQ_ID(cq, cqe); 1382 wc->wc_detail = HERMON_CQE_FEXCH_DETAIL(cq, cqe) & 1383 IBT_WC_DETAIL_FC_MATCH_MASK; 1384 wc->wc_rkey = HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe); 1385 flags |= IBT_WC_FEXCH_FMT | IBT_WC_RKEY_INVALIDATED; 1386 } else { 1387 /* 1388 * Parse the remaining contents of the CQE into the work 1389 * completion. This means filling in SL, QP number, SLID, 1390 * immediate data, etc. 1391 * 1392 * Note: Not all of these fields are valid in a given 1393 * completion. Many of them depend on the actual type of 1394 * completion. So we fill in all of the fields and leave 1395 * it up to the IBTF and consumer to sort out which are 1396 * valid based on their context. 1397 */ 1398 wc->wc_sl = HERMON_CQE_SL_GET(cq, cqe); 1399 wc->wc_qpn = HERMON_CQE_DQPN_GET(cq, cqe); 1400 wc->wc_slid = HERMON_CQE_DLID_GET(cq, cqe); 1401 wc->wc_immed_data = 1402 HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe); 1403 wc->wc_ethertype = (wc->wc_immed_data & 0xFFFF); 1404 wc->wc_pkey_ix = (wc->wc_immed_data & 1405 ((1 << state->hs_queryport.log_max_pkey) - 1)); 1406 /* 1407 * Fill in "bytes transferred" as appropriate. Also, 1408 * if necessary, fill in the "path bits" field. 1409 */ 1410 wc->wc_path_bits = HERMON_CQE_PATHBITS_GET(cq, cqe); 1411 wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe); 1412 1413 /* 1414 * Check for GRH, update the flags, then fill in "wc_flags" 1415 * field in the work completion 1416 */ 1417 if (HERMON_CQE_GRH_GET(cq, cqe) != 0) { 1418 flags |= IBT_WC_GRH_PRESENT; 1419 } 1420 1421 /* Receive CQE */ 1422 switch (opcode) { 1423 case HERMON_CQE_RCV_SEND_IMM: 1424 /* 1425 * Note: According to the PRM, all QP1 recv 1426 * completions look like the result of a Send with 1427 * Immediate. They are not, however, (MADs are Send 1428 * Only) so we need to check the QP number and set 1429 * the flag only if it is non-QP1. 1430 */ 1431 qpnum = HERMON_CQE_QPNUM_GET(cq, cqe); 1432 qp1_indx = state->hs_spec_qp1->hr_indx; 1433 if ((qpnum < qp1_indx) || (qpnum > qp1_indx + 1)) { 1434 flags |= IBT_WC_IMMED_DATA_PRESENT; 1435 } 1436 /* FALLTHROUGH */ 1437 1438 case HERMON_CQE_RCV_SEND: 1439 type = IBT_WRC_RECV; 1440 if (HERMON_CQE_IS_IPOK(cq, cqe)) { 1441 wc->wc_cksum = HERMON_CQE_CKSUM(cq, cqe); 1442 flags |= IBT_WC_CKSUM_OK; 1443 wc->wc_detail = IBT_WC_DETAIL_ALL_FLAGS_MASK & 1444 HERMON_CQE_IPOIB_STATUS(cq, cqe); 1445 } 1446 break; 1447 1448 case HERMON_CQE_RCV_SEND_INV: 1449 type = IBT_WRC_RECV; 1450 flags |= IBT_WC_RKEY_INVALIDATED; 1451 wc->wc_rkey = wc->wc_immed_data; /* same field in cqe */ 1452 break; 1453 1454 case HERMON_CQE_RCV_RDMAWR_IMM: 1455 flags |= IBT_WC_IMMED_DATA_PRESENT; 1456 type = IBT_WRC_RECV_RDMAWI; 1457 break; 1458 1459 default: 1460 1461 HERMON_WARNING(state, "unknown recv CQE type"); 1462 wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR; 1463 return; 1464 } 1465 } 1466 wc->wc_type = type; 1467 wc->wc_flags = flags; 1468 wc->wc_status = IBT_WC_SUCCESS; 1469 } 1470 1471 /* 1472 * hermon_cq_errcqe_consume() 1473 * Context: Can be called from interrupt or base context. 1474 */ 1475 static void 1476 hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq, 1477 hermon_hw_cqe_t *cqe, ibt_wc_t *wc) 1478 { 1479 uint32_t imm_eth_pkey_cred; 1480 uint_t status; 1481 ibt_wc_status_t ibt_status; 1482 1483 /* 1484 * Fetch the Work Request ID using the information in the CQE. 1485 * See hermon_wr.c for more details. 1486 */ 1487 wc->wc_id = hermon_wrid_get_entry(cq, cqe); 1488 1489 /* 1490 * Parse the CQE opcode to determine completion type. We know that 1491 * the CQE is an error completion, so we extract only the completion 1492 * status/syndrome here. 1493 */ 1494 imm_eth_pkey_cred = HERMON_CQE_ERROR_SYNDROME_GET(cq, cqe); 1495 status = imm_eth_pkey_cred; 1496 if (status != HERMON_CQE_WR_FLUSHED_ERR) 1497 IBTF_DPRINTF_L2("CQE ERR", "cqe %p QPN %x indx %x status 0x%x " 1498 "vendor syndrome %x", cqe, HERMON_CQE_QPNUM_GET(cq, cqe), 1499 HERMON_CQE_WQECNTR_GET(cq, cqe), status, 1500 HERMON_CQE_ERROR_VENDOR_SYNDROME_GET(cq, cqe)); 1501 switch (status) { 1502 case HERMON_CQE_LOC_LEN_ERR: 1503 HERMON_WARNING(state, HERMON_FMA_LOCLEN); 1504 ibt_status = IBT_WC_LOCAL_LEN_ERR; 1505 break; 1506 1507 case HERMON_CQE_LOC_OP_ERR: 1508 HERMON_WARNING(state, HERMON_FMA_LOCQPOP); 1509 ibt_status = IBT_WC_LOCAL_QP_OP_ERR; 1510 break; 1511 1512 case HERMON_CQE_LOC_PROT_ERR: 1513 HERMON_WARNING(state, HERMON_FMA_LOCPROT); 1514 ibt_status = IBT_WC_LOCAL_PROTECT_ERR; 1515 IBTF_DPRINTF_L2("ERRCQE", "is at %p", cqe); 1516 if (hermon_should_panic) { 1517 cmn_err(CE_PANIC, "Hermon intentional PANIC - " 1518 "Local Protection Error\n"); 1519 } 1520 break; 1521 1522 case HERMON_CQE_WR_FLUSHED_ERR: 1523 ibt_status = IBT_WC_WR_FLUSHED_ERR; 1524 break; 1525 1526 case HERMON_CQE_MW_BIND_ERR: 1527 HERMON_WARNING(state, HERMON_FMA_MWBIND); 1528 ibt_status = IBT_WC_MEM_WIN_BIND_ERR; 1529 break; 1530 1531 case HERMON_CQE_BAD_RESPONSE_ERR: 1532 HERMON_WARNING(state, HERMON_FMA_RESP); 1533 ibt_status = IBT_WC_BAD_RESPONSE_ERR; 1534 break; 1535 1536 case HERMON_CQE_LOCAL_ACCESS_ERR: 1537 HERMON_WARNING(state, HERMON_FMA_LOCACC); 1538 ibt_status = IBT_WC_LOCAL_ACCESS_ERR; 1539 break; 1540 1541 case HERMON_CQE_REM_INV_REQ_ERR: 1542 HERMON_WARNING(state, HERMON_FMA_REMREQ); 1543 ibt_status = IBT_WC_REMOTE_INVALID_REQ_ERR; 1544 break; 1545 1546 case HERMON_CQE_REM_ACC_ERR: 1547 HERMON_WARNING(state, HERMON_FMA_REMACC); 1548 ibt_status = IBT_WC_REMOTE_ACCESS_ERR; 1549 break; 1550 1551 case HERMON_CQE_REM_OP_ERR: 1552 HERMON_WARNING(state, HERMON_FMA_REMOP); 1553 ibt_status = IBT_WC_REMOTE_OP_ERR; 1554 break; 1555 1556 case HERMON_CQE_TRANS_TO_ERR: 1557 HERMON_WARNING(state, HERMON_FMA_XPORTCNT); 1558 ibt_status = IBT_WC_TRANS_TIMEOUT_ERR; 1559 break; 1560 1561 case HERMON_CQE_RNRNAK_TO_ERR: 1562 HERMON_WARNING(state, HERMON_FMA_RNRCNT); 1563 ibt_status = IBT_WC_RNR_NAK_TIMEOUT_ERR; 1564 break; 1565 1566 /* 1567 * The following error codes are not supported in the Hermon driver 1568 * as they relate only to Reliable Datagram completion statuses: 1569 * case HERMON_CQE_LOCAL_RDD_VIO_ERR: 1570 * case HERMON_CQE_REM_INV_RD_REQ_ERR: 1571 * case HERMON_CQE_EEC_REM_ABORTED_ERR: 1572 * case HERMON_CQE_INV_EEC_NUM_ERR: 1573 * case HERMON_CQE_INV_EEC_STATE_ERR: 1574 * case HERMON_CQE_LOC_EEC_ERR: 1575 */ 1576 1577 default: 1578 HERMON_WARNING(state, "unknown error CQE status"); 1579 HERMON_FMANOTE(state, HERMON_FMA_UNKN); 1580 ibt_status = IBT_WC_LOCAL_QP_OP_ERR; 1581 break; 1582 } 1583 1584 wc->wc_status = ibt_status; 1585 } 1586 1587 1588 /* 1589 * hermon_cq_resize_helper() 1590 * Context: Can be called only from user or kernel context. 1591 */ 1592 void 1593 hermon_cq_resize_helper(hermon_state_t *state, hermon_cqhdl_t cq) 1594 { 1595 hermon_cqhdl_t resize_hdl; 1596 int status; 1597 1598 /* 1599 * we're here because we found the special cqe opcode, so we have 1600 * to update the cq_handle, release the old resources, clear the 1601 * flag in the cq_hdl, and release the resize_hdl. When we return 1602 * above, it will take care of the rest 1603 */ 1604 ASSERT(MUTEX_HELD(&cq->cq_lock)); 1605 1606 resize_hdl = cq->cq_resize_hdl; 1607 1608 /* 1609 * Deregister the memory for the old Completion Queue. Note: We 1610 * really can't return error here because we have no good way to 1611 * cleanup. Plus, the deregistration really shouldn't ever happen. 1612 * So, if it does, it is an indication that something has gone 1613 * seriously wrong. So we print a warning message and return error 1614 * (knowing, of course, that the "old" CQ memory will be leaked) 1615 */ 1616 status = hermon_mr_deregister(state, &cq->cq_mrhdl, HERMON_MR_DEREG_ALL, 1617 HERMON_SLEEP); 1618 if (status != DDI_SUCCESS) { 1619 HERMON_WARNING(state, "failed to deregister old CQ memory"); 1620 } 1621 1622 /* Next, free the memory from the old CQ buffer */ 1623 hermon_queue_free(&cq->cq_cqinfo); 1624 1625 /* now we can update the cq_hdl with the new things saved */ 1626 1627 cq->cq_buf = resize_hdl->cq_buf; 1628 cq->cq_mrhdl = resize_hdl->cq_mrhdl; 1629 cq->cq_bufsz = resize_hdl->cq_bufsz; 1630 cq->cq_log_cqsz = resize_hdl->cq_log_cqsz; 1631 cq->cq_umap_dhp = cq->cq_resize_hdl->cq_umap_dhp; 1632 cq->cq_resize_hdl = 0; 1633 bcopy(&resize_hdl->cq_cqinfo, &cq->cq_cqinfo, 1634 sizeof (struct hermon_qalloc_info_s)); 1635 1636 /* finally, release the resizing handle */ 1637 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s)); 1638 } 1639 1640 1641 /* 1642 * hermon_cq_entries_flush() 1643 * Context: Can be called from interrupt or base context. 1644 */ 1645 /* ARGSUSED */ 1646 void 1647 hermon_cq_entries_flush(hermon_state_t *state, hermon_qphdl_t qp) 1648 { 1649 hermon_cqhdl_t cq; 1650 hermon_hw_cqe_t *cqe, *next_cqe; 1651 hermon_srqhdl_t srq; 1652 hermon_workq_hdr_t *wq; 1653 uint32_t cons_indx, tail_cons_indx, wrap_around_mask; 1654 uint32_t new_indx, check_indx, qpnum; 1655 uint32_t shift, mask; 1656 int outstanding_cqes; 1657 1658 qpnum = qp->qp_qpnum; 1659 if ((srq = qp->qp_srqhdl) != NULL) 1660 wq = qp->qp_srqhdl->srq_wq_wqhdr; 1661 else 1662 wq = NULL; 1663 cq = qp->qp_rq_cqhdl; 1664 1665 if (cq == NULL) { 1666 cq = qp->qp_sq_cqhdl; 1667 } 1668 1669 do_send_cq: /* loop back to here if send_cq is not the same as recv_cq */ 1670 if (cq == NULL) 1671 return; 1672 1673 cons_indx = cq->cq_consindx; 1674 shift = cq->cq_log_cqsz; 1675 mask = cq->cq_bufsz; 1676 wrap_around_mask = mask - 1; 1677 1678 /* Calculate the pointer to the first CQ entry */ 1679 cqe = &cq->cq_buf[cons_indx & wrap_around_mask]; 1680 1681 /* 1682 * Loop through the CQ looking for entries owned by software. If an 1683 * entry is owned by software then we increment an 'outstanding_cqes' 1684 * count to know how many entries total we have on our CQ. We use this 1685 * value further down to know how many entries to loop through looking 1686 * for our same QP number. 1687 */ 1688 outstanding_cqes = 0; 1689 tail_cons_indx = cons_indx; 1690 while (HERMON_CQE_OWNER_IS_SW(cq, cqe, tail_cons_indx, shift, mask)) { 1691 /* increment total cqes count */ 1692 outstanding_cqes++; 1693 1694 /* increment the consumer index */ 1695 tail_cons_indx++; 1696 1697 /* update the pointer to the next cq entry */ 1698 cqe = &cq->cq_buf[tail_cons_indx & wrap_around_mask]; 1699 } 1700 1701 /* 1702 * Using the 'tail_cons_indx' that was just set, we now know how many 1703 * total CQEs possible there are. Set the 'check_indx' and the 1704 * 'new_indx' to the last entry identified by 'tail_cons_indx' 1705 */ 1706 check_indx = new_indx = (tail_cons_indx - 1); 1707 1708 while (--outstanding_cqes >= 0) { 1709 cqe = &cq->cq_buf[check_indx & wrap_around_mask]; 1710 1711 /* 1712 * If the QP number is the same in the CQE as the QP, then 1713 * we must "consume" it. If it is for an SRQ wqe, then we 1714 * also must free the wqe back onto the free list of the SRQ. 1715 */ 1716 if (qpnum == HERMON_CQE_QPNUM_GET(cq, cqe)) { 1717 if (srq && (HERMON_CQE_SENDRECV_GET(cq, cqe) == 1718 HERMON_COMPLETION_RECV)) { 1719 uint64_t *desc; 1720 int indx; 1721 1722 /* Add wqe back to SRQ free list */ 1723 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) & 1724 wq->wq_mask; 1725 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail); 1726 ((uint16_t *)desc)[1] = htons(indx); 1727 wq->wq_tail = indx; 1728 } 1729 } else { /* CQEs for other QPNs need to remain */ 1730 if (check_indx != new_indx) { 1731 next_cqe = 1732 &cq->cq_buf[new_indx & wrap_around_mask]; 1733 /* Copy the CQE into the "next_cqe" pointer. */ 1734 bcopy(cqe, next_cqe, sizeof (hermon_hw_cqe_t)); 1735 } 1736 new_indx--; /* move index to next CQE to fill */ 1737 } 1738 check_indx--; /* move index to next CQE to check */ 1739 } 1740 1741 /* 1742 * Update consumer index to be the 'new_indx'. This moves it past all 1743 * removed entries. Because 'new_indx' is pointing to the last 1744 * previously valid SW owned entry, we add 1 to point the cons_indx to 1745 * the first HW owned entry. 1746 */ 1747 cons_indx = (new_indx + 1); 1748 1749 /* 1750 * Now we only ring the doorbell (to update the consumer index) if 1751 * we've actually consumed a CQ entry. If we found no QP number 1752 * matches above, then we would not have removed anything. So only if 1753 * something was removed do we ring the doorbell. 1754 */ 1755 if (cq->cq_consindx != cons_indx) { 1756 /* 1757 * Update the consumer index in both the CQ handle and the 1758 * doorbell record. 1759 */ 1760 cq->cq_consindx = cons_indx; 1761 1762 hermon_cq_update_ci_doorbell(cq); 1763 1764 } 1765 if (cq != qp->qp_sq_cqhdl) { 1766 cq = qp->qp_sq_cqhdl; 1767 goto do_send_cq; 1768 } 1769 } 1770 1771 /* 1772 * hermon_get_cq_sched_list() 1773 * Context: Only called from attach() path context 1774 * 1775 * Read properties, creating entries in hs_cq_sched_list with 1776 * information about the requested "expected" and "minimum" 1777 * number of MSI-X interrupt vectors per list entry. 1778 */ 1779 static int 1780 hermon_get_cq_sched_list(hermon_state_t *state) 1781 { 1782 char **listp, ulp_prop[HERMON_CQH_MAX + 4]; 1783 uint_t nlist, i, j, ndata; 1784 int *data; 1785 size_t len; 1786 hermon_cq_sched_t *cq_schedp; 1787 1788 if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, state->hs_dip, 1789 DDI_PROP_DONTPASS, "cqh-group-list", &listp, &nlist) != 1790 DDI_PROP_SUCCESS) 1791 return (0); 1792 1793 state->hs_cq_sched_array_size = nlist; 1794 state->hs_cq_sched_array = cq_schedp = kmem_zalloc(nlist * 1795 sizeof (hermon_cq_sched_t), KM_SLEEP); 1796 for (i = 0; i < nlist; i++) { 1797 if ((len = strlen(listp[i])) >= HERMON_CQH_MAX) { 1798 cmn_err(CE_CONT, "'cqh' property name too long\n"); 1799 goto game_over; 1800 } 1801 for (j = 0; j < i; j++) { 1802 if (strcmp(listp[j], listp[i]) == 0) { 1803 cmn_err(CE_CONT, "Duplicate 'cqh' property\n"); 1804 goto game_over; 1805 } 1806 } 1807 (void) strncpy(cq_schedp[i].cqs_name, listp[i], HERMON_CQH_MAX); 1808 ulp_prop[0] = 'c'; 1809 ulp_prop[1] = 'q'; 1810 ulp_prop[2] = 'h'; 1811 ulp_prop[3] = '-'; 1812 (void) strncpy(ulp_prop + 4, listp[i], len + 1); 1813 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip, 1814 DDI_PROP_DONTPASS, ulp_prop, &data, &ndata) != 1815 DDI_PROP_SUCCESS) { 1816 cmn_err(CE_CONT, "property '%s' not found\n", ulp_prop); 1817 goto game_over; 1818 } 1819 if (ndata != 2) { 1820 cmn_err(CE_CONT, "property '%s' does not " 1821 "have 2 integers\n", ulp_prop); 1822 goto game_over_free_data; 1823 } 1824 cq_schedp[i].cqs_desired = data[0]; 1825 cq_schedp[i].cqs_minimum = data[1]; 1826 cq_schedp[i].cqs_refcnt = 0; 1827 ddi_prop_free(data); 1828 } 1829 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip, 1830 DDI_PROP_DONTPASS, "cqh-default", &data, &ndata) != 1831 DDI_PROP_SUCCESS) { 1832 cmn_err(CE_CONT, "property 'cqh-default' not found\n"); 1833 goto game_over; 1834 } 1835 if (ndata != 2) { 1836 cmn_err(CE_CONT, "property 'cqh-default' does not " 1837 "have 2 integers\n"); 1838 goto game_over_free_data; 1839 } 1840 cq_schedp = &state->hs_cq_sched_default; 1841 cq_schedp->cqs_desired = data[0]; 1842 cq_schedp->cqs_minimum = data[1]; 1843 cq_schedp->cqs_refcnt = 0; 1844 ddi_prop_free(data); 1845 ddi_prop_free(listp); 1846 return (1); /* game on */ 1847 1848 game_over_free_data: 1849 ddi_prop_free(data); 1850 game_over: 1851 cmn_err(CE_CONT, "Error in 'cqh' properties in hermon.conf\n"); 1852 cmn_err(CE_CONT, "completion handler groups not being used\n"); 1853 kmem_free(cq_schedp, nlist * sizeof (hermon_cq_sched_t)); 1854 state->hs_cq_sched_array_size = 0; 1855 ddi_prop_free(listp); 1856 return (0); 1857 } 1858 1859 /* 1860 * hermon_cq_sched_init() 1861 * Context: Only called from attach() path context 1862 * 1863 * Read the hermon.conf properties looking for cq_sched info, 1864 * creating reserved pools of MSI-X interrupt ranges for the 1865 * specified ULPs. 1866 */ 1867 int 1868 hermon_cq_sched_init(hermon_state_t *state) 1869 { 1870 hermon_cq_sched_t *cq_schedp, *defp; 1871 int i, desired, array_size; 1872 1873 mutex_init(&state->hs_cq_sched_lock, NULL, MUTEX_DRIVER, 1874 DDI_INTR_PRI(state->hs_intrmsi_pri)); 1875 1876 mutex_enter(&state->hs_cq_sched_lock); 1877 state->hs_cq_sched_array = NULL; 1878 1879 /* initialize cq_sched_default */ 1880 defp = &state->hs_cq_sched_default; 1881 defp->cqs_start_hid = 1; 1882 defp->cqs_len = state->hs_intrmsi_allocd; 1883 defp->cqs_next_alloc = defp->cqs_len - 1; 1884 (void) strncpy(defp->cqs_name, "default", 8); 1885 1886 /* Read properties to determine which ULPs use cq_sched */ 1887 if (hermon_get_cq_sched_list(state) == 0) 1888 goto done; 1889 1890 /* Determine if we have enough vectors, or if we have to scale down */ 1891 desired = defp->cqs_desired; /* default desired (from hermon.conf) */ 1892 if (desired <= 0) 1893 goto done; /* all interrupts in the default pool */ 1894 cq_schedp = state->hs_cq_sched_array; 1895 array_size = state->hs_cq_sched_array_size; 1896 for (i = 0; i < array_size; i++) 1897 desired += cq_schedp[i].cqs_desired; 1898 if (desired > state->hs_intrmsi_allocd) { 1899 cmn_err(CE_CONT, "#interrupts allocated (%d) is less than " 1900 "the #interrupts desired (%d)\n", 1901 state->hs_intrmsi_allocd, desired); 1902 cmn_err(CE_CONT, "completion handler groups not being used\n"); 1903 goto done; /* all interrupts in the default pool */ 1904 } 1905 /* Game on. For each cq_sched group, reserve the MSI-X range */ 1906 for (i = 0; i < array_size; i++) { 1907 desired = cq_schedp[i].cqs_desired; 1908 cq_schedp[i].cqs_start_hid = defp->cqs_start_hid; 1909 cq_schedp[i].cqs_len = desired; 1910 cq_schedp[i].cqs_next_alloc = desired - 1; 1911 defp->cqs_len -= desired; 1912 defp->cqs_start_hid += desired; 1913 } 1914 /* reset default's start allocation seed */ 1915 state->hs_cq_sched_default.cqs_next_alloc = 1916 state->hs_cq_sched_default.cqs_len - 1; 1917 1918 done: 1919 mutex_exit(&state->hs_cq_sched_lock); 1920 return (IBT_SUCCESS); 1921 } 1922 1923 void 1924 hermon_cq_sched_fini(hermon_state_t *state) 1925 { 1926 mutex_enter(&state->hs_cq_sched_lock); 1927 if (state->hs_cq_sched_array_size) { 1928 kmem_free(state->hs_cq_sched_array, sizeof (hermon_cq_sched_t) * 1929 state->hs_cq_sched_array_size); 1930 state->hs_cq_sched_array_size = 0; 1931 state->hs_cq_sched_array = NULL; 1932 } 1933 mutex_exit(&state->hs_cq_sched_lock); 1934 mutex_destroy(&state->hs_cq_sched_lock); 1935 } 1936 1937 int 1938 hermon_cq_sched_alloc(hermon_state_t *state, ibt_cq_sched_attr_t *attr, 1939 hermon_cq_sched_t **cq_sched_pp) 1940 { 1941 hermon_cq_sched_t *cq_schedp; 1942 int i; 1943 char *name; 1944 ibt_cq_sched_flags_t flags; 1945 1946 flags = attr->cqs_flags; 1947 if ((flags & (IBT_CQS_SCHED_GROUP | IBT_CQS_EXACT_SCHED_GROUP)) == 0) { 1948 *cq_sched_pp = NULL; 1949 return (IBT_SUCCESS); 1950 } 1951 name = attr->cqs_pool_name; 1952 1953 mutex_enter(&state->hs_cq_sched_lock); 1954 cq_schedp = state->hs_cq_sched_array; 1955 for (i = 0; i < state->hs_cq_sched_array_size; i++, cq_schedp++) { 1956 if (strcmp(name, cq_schedp->cqs_name) == 0) { 1957 if (cq_schedp->cqs_len != 0) 1958 cq_schedp->cqs_refcnt++; 1959 break; /* found it */ 1960 } 1961 } 1962 if ((i == state->hs_cq_sched_array_size) || /* not found, or */ 1963 (cq_schedp->cqs_len == 0)) /* defined, but no dedicated intr's */ 1964 cq_schedp = NULL; 1965 mutex_exit(&state->hs_cq_sched_lock); 1966 1967 *cq_sched_pp = cq_schedp; /* set to valid hdl, or to NULL */ 1968 if ((cq_schedp == NULL) && 1969 (attr->cqs_flags & IBT_CQS_EXACT_SCHED_GROUP)) 1970 return (IBT_CQ_NO_SCHED_GROUP); 1971 else 1972 return (IBT_SUCCESS); 1973 } 1974 1975 int 1976 hermon_cq_sched_free(hermon_state_t *state, hermon_cq_sched_t *cq_schedp) 1977 { 1978 if (cq_schedp != NULL) { 1979 /* Just decrement refcnt */ 1980 mutex_enter(&state->hs_cq_sched_lock); 1981 if (cq_schedp->cqs_refcnt == 0) 1982 HERMON_WARNING(state, "cq_sched free underflow\n"); 1983 else 1984 cq_schedp->cqs_refcnt--; 1985 mutex_exit(&state->hs_cq_sched_lock); 1986 } 1987 return (IBT_SUCCESS); 1988 }