1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * tavor_misc.c 29 * Tavor Miscellaneous routines - Address Handle, Multicast, Protection 30 * Domain, and port-related operations 31 * 32 * Implements all the routines necessary for allocating, freeing, querying 33 * and modifying Address Handles and Protection Domains. Also implements 34 * all the routines necessary for adding and removing Queue Pairs to/from 35 * Multicast Groups. Lastly, it implements the routines necessary for 36 * port-related query and modify operations. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/modctl.h> 44 #include <sys/bitmap.h> 45 #include <sys/sysmacros.h> 46 47 #include <sys/ib/adapters/tavor/tavor.h> 48 49 static void tavor_udav_sync(tavor_ahhdl_t ah, tavor_hw_udav_t *udav, 50 uint_t flag); 51 static int tavor_mcg_qplist_add(tavor_state_t *state, tavor_mcghdl_t mcg, 52 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp, uint_t *qp_found); 53 static int tavor_mcg_qplist_remove(tavor_mcghdl_t mcg, 54 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp); 55 static void tavor_qp_mcg_refcnt_inc(tavor_qphdl_t qp); 56 static void tavor_qp_mcg_refcnt_dec(tavor_qphdl_t qp); 57 static uint_t tavor_mcg_walk_mgid_hash(tavor_state_t *state, 58 uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx); 59 static void tavor_mcg_setup_new_hdr(tavor_mcghdl_t mcg, 60 tavor_hw_mcg_t *mcg_hdr, ib_gid_t mgid, tavor_rsrc_t *mcg_rsrc); 61 static int tavor_mcg_hash_list_remove(tavor_state_t *state, uint_t curr_indx, 62 uint_t prev_indx, tavor_hw_mcg_t *mcg_entry); 63 static int tavor_mcg_entry_invalidate(tavor_state_t *state, 64 tavor_hw_mcg_t *mcg_entry, uint_t indx); 65 static int tavor_mgid_is_valid(ib_gid_t gid); 66 static int tavor_mlid_is_valid(ib_lid_t lid); 67 68 69 /* 70 * tavor_ah_alloc() 71 * Context: Can be called only from user or kernel context. 72 */ 73 int 74 tavor_ah_alloc(tavor_state_t *state, tavor_pdhdl_t pd, 75 ibt_adds_vect_t *attr_p, tavor_ahhdl_t *ahhdl, uint_t sleepflag) 76 { 77 tavor_rsrc_t *udav, *rsrc; 78 tavor_hw_udav_t udav_entry; 79 tavor_ahhdl_t ah; 80 ibt_mr_attr_t mr_attr; 81 tavor_mr_options_t op; 82 tavor_mrhdl_t mr; 83 uint64_t data; 84 uint32_t size; 85 int status, i, flag; 86 char *errormsg; 87 88 TAVOR_TNF_ENTER(tavor_ah_alloc); 89 90 /* 91 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to 92 * indicate that we wish to allocate an "invalid" (i.e. empty) 93 * address handle XXX 94 */ 95 96 /* Validate that specified port number is legal */ 97 if (!tavor_portnum_is_valid(state, attr_p->av_port_num)) { 98 /* Set "status" and "errormsg" and goto failure */ 99 TAVOR_TNF_FAIL(IBT_HCA_PORT_INVALID, "invalid port num"); 100 goto ahalloc_fail; 101 } 102 103 /* 104 * Allocate a UDAV entry. This will be filled in with all the 105 * necessary parameters to define the Address Handle. Unlike the 106 * other hardware resources no ownership transfer takes place as 107 * these UDAV entries are always owned by hardware. 108 */ 109 status = tavor_rsrc_alloc(state, TAVOR_UDAV, 1, sleepflag, &udav); 110 if (status != DDI_SUCCESS) { 111 /* Set "status" and "errormsg" and goto failure */ 112 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed UDAV"); 113 goto ahalloc_fail; 114 } 115 116 /* 117 * Allocate the software structure for tracking the address handle 118 * (i.e. the Tavor Address Handle struct). If we fail here, we must 119 * undo the previous resource allocation. 120 */ 121 status = tavor_rsrc_alloc(state, TAVOR_AHHDL, 1, sleepflag, &rsrc); 122 if (status != DDI_SUCCESS) { 123 /* Set "status" and "errormsg" and goto failure */ 124 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed AH handler"); 125 goto ahalloc_fail1; 126 } 127 ah = (tavor_ahhdl_t)rsrc->tr_addr; 128 129 /* Increment the reference count on the protection domain (PD) */ 130 tavor_pd_refcnt_inc(pd); 131 132 /* 133 * Fill in the UDAV entry. Note: We are only filling in a temporary 134 * copy here, which we will later copy into the actual entry in 135 * Tavor DDR memory. This starts be zeroing out the temporary copy 136 * and then calling tavor_set_addr_path() to fill in the common 137 * portions that can be pulled from the "ibt_adds_vect_t" passed in 138 */ 139 bzero(&udav_entry, sizeof (tavor_hw_udav_t)); 140 status = tavor_set_addr_path(state, attr_p, 141 (tavor_hw_addr_path_t *)&udav_entry, TAVOR_ADDRPATH_UDAV, NULL); 142 if (status != DDI_SUCCESS) { 143 tavor_pd_refcnt_dec(pd); 144 tavor_rsrc_free(state, &rsrc); 145 tavor_rsrc_free(state, &udav); 146 /* Set "status" and "errormsg" and goto failure */ 147 TAVOR_TNF_FAIL(status, "failed in tavor_set_addr_path"); 148 goto ahalloc_fail; 149 } 150 udav_entry.pd = pd->pd_pdnum; 151 udav_entry.msg_sz = state->ts_cfg_profile->cp_max_mtu - 1; 152 153 /* 154 * Register the memory for the UDAV. The memory for the UDAV must 155 * be registered in the Tavor TPT tables. This gives us the LKey 156 * that we will need when we later post a UD work request that 157 * uses this address handle. 158 * We might be able to pre-register all the memory for the UDAV XXX 159 */ 160 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP; 161 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)udav->tr_addr; 162 mr_attr.mr_len = udav->tr_len; 163 mr_attr.mr_as = NULL; 164 mr_attr.mr_flags = flag; 165 op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass; 166 op.mro_bind_dmahdl = NULL; 167 op.mro_bind_override_addr = 0; 168 status = tavor_mr_register(state, pd, &mr_attr, &mr, &op); 169 if (status != DDI_SUCCESS) { 170 /* Set "status" and "errormsg" and goto failure */ 171 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr"); 172 goto ahalloc_fail2; 173 } 174 175 /* 176 * Fill in the UDAV entry. Here we copy all the information from 177 * the temporary UDAV into the DDR memory for the real UDAV entry. 178 * Note that we copy everything but the first 64-bit word. This 179 * is where the PD number for the address handle resides. 180 * By filling everything except the PD and then writing the PD in 181 * a separate step below, we can ensure that the UDAV is not 182 * accessed while there are partially written values in it (something 183 * which really should not happen anyway). This is guaranteed 184 * because we take measures to ensure that the PD number is zero for 185 * all unused UDAV (and because PD#0 is reserved for Tavor). 186 */ 187 size = sizeof (tavor_hw_udav_t) >> 3; 188 for (i = 1; i < size; i++) { 189 data = ((uint64_t *)&udav_entry)[i]; 190 ddi_put64(udav->tr_acchdl, ((uint64_t *)udav->tr_addr + i), 191 data); 192 } 193 data = ((uint64_t *)&udav_entry)[0]; 194 ddi_put64(udav->tr_acchdl, (uint64_t *)udav->tr_addr, data); 195 196 /* 197 * Fill in the rest of the Tavor Address Handle struct. Having 198 * successfully copied the UDAV into the hardware, we update the 199 * following fields for use in further operations on the AH. 200 * 201 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field 202 * here because we may need to return it later to the IBTF (as a 203 * result of a subsequent query operation). Unlike the other UDAV 204 * parameters, the value of "av_dgid.gid_guid" is not always preserved 205 * by being written to hardware. The reason for this is described in 206 * tavor_set_addr_path(). 207 */ 208 ah->ah_udavrsrcp = udav; 209 ah->ah_rsrcp = rsrc; 210 ah->ah_pdhdl = pd; 211 ah->ah_mrhdl = mr; 212 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 213 ah->ah_save_srate = attr_p->av_srate; 214 *ahhdl = ah; 215 216 /* Determine if later ddi_dma_sync will be necessary */ 217 ah->ah_sync = TAVOR_UDAV_IS_SYNC_REQ(state); 218 219 /* Sync the UDAV for use by the hardware */ 220 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV); 221 222 TAVOR_TNF_EXIT(tavor_ah_alloc); 223 return (DDI_SUCCESS); 224 225 ahalloc_fail2: 226 tavor_pd_refcnt_dec(pd); 227 tavor_rsrc_free(state, &rsrc); 228 ahalloc_fail1: 229 tavor_rsrc_free(state, &udav); 230 ahalloc_fail: 231 TNF_PROBE_1(tavor_ah_alloc_fail, TAVOR_TNF_ERROR, "", 232 tnf_string, msg, errormsg); 233 TAVOR_TNF_EXIT(tavor_ah_alloc); 234 return (status); 235 } 236 237 238 /* 239 * tavor_ah_free() 240 * Context: Can be called only from user or kernel context. 241 */ 242 /* ARGSUSED */ 243 int 244 tavor_ah_free(tavor_state_t *state, tavor_ahhdl_t *ahhdl, uint_t sleepflag) 245 { 246 tavor_rsrc_t *udav, *rsrc; 247 tavor_pdhdl_t pd; 248 tavor_mrhdl_t mr; 249 tavor_ahhdl_t ah; 250 int status; 251 252 TAVOR_TNF_ENTER(tavor_ah_free); 253 254 /* 255 * Pull all the necessary information from the Tavor Address Handle 256 * struct. This is necessary here because the resource for the 257 * AH is going to be freed up as part of this operation. 258 */ 259 ah = *ahhdl; 260 mutex_enter(&ah->ah_lock); 261 udav = ah->ah_udavrsrcp; 262 rsrc = ah->ah_rsrcp; 263 pd = ah->ah_pdhdl; 264 mr = ah->ah_mrhdl; 265 mutex_exit(&ah->ah_lock); 266 267 /* 268 * Deregister the memory for the UDAV. If this fails for any reason, 269 * then it is an indication that something (either in HW or SW) has 270 * gone seriously wrong. So we print a warning message and return 271 * failure. 272 */ 273 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL, 274 sleepflag); 275 if (status != DDI_SUCCESS) { 276 TNF_PROBE_0(tavor_ah_free_dereg_mr_fail, TAVOR_TNF_ERROR, ""); 277 TAVOR_TNF_EXIT(tavor_ah_free); 278 return (ibc_get_ci_failure(0)); 279 } 280 281 /* 282 * Write zero to the first 64-bit word in the UDAV entry. As 283 * described above (in tavor_ah_alloc), the PD number is stored in 284 * the first 64-bits of each UDAV and setting this to zero is 285 * guaranteed to invalidate the entry. 286 */ 287 ddi_put64(udav->tr_acchdl, (uint64_t *)udav->tr_addr, 0); 288 289 /* Sync the UDAV for use by the hardware */ 290 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV); 291 292 /* Decrement the reference count on the protection domain (PD) */ 293 tavor_pd_refcnt_dec(pd); 294 295 /* Free the Tavor Address Handle structure */ 296 tavor_rsrc_free(state, &rsrc); 297 298 /* Free up the UDAV entry resource */ 299 tavor_rsrc_free(state, &udav); 300 301 /* Set the ahhdl pointer to NULL and return success */ 302 *ahhdl = NULL; 303 304 TAVOR_TNF_EXIT(tavor_ah_free); 305 return (DDI_SUCCESS); 306 } 307 308 309 /* 310 * tavor_ah_query() 311 * Context: Can be called from interrupt or base context. 312 */ 313 /* ARGSUSED */ 314 int 315 tavor_ah_query(tavor_state_t *state, tavor_ahhdl_t ah, tavor_pdhdl_t *pd, 316 ibt_adds_vect_t *attr_p) 317 { 318 tavor_hw_udav_t udav_entry; 319 tavor_rsrc_t *udav; 320 uint64_t data; 321 uint32_t size; 322 int i; 323 324 TAVOR_TNF_ENTER(tavor_ah_query); 325 326 mutex_enter(&ah->ah_lock); 327 328 /* 329 * Pull all the necessary information from the Tavor Address Handle 330 * structure 331 */ 332 udav = ah->ah_udavrsrcp; 333 *pd = ah->ah_pdhdl; 334 335 /* 336 * Copy the UDAV entry into the temporary copy. Here we copy all 337 * the information from the UDAV entry in DDR memory into the 338 * temporary UDAV. Note: We don't need to sync the UDAV for 339 * reading by software because Tavor HW never modifies the entry. 340 */ 341 size = sizeof (tavor_hw_udav_t) >> 3; 342 for (i = 0; i < size; i++) { 343 data = ddi_get64(udav->tr_acchdl, 344 ((uint64_t *)udav->tr_addr + i)); 345 ((uint64_t *)&udav_entry)[i] = data; 346 } 347 348 /* 349 * Fill in "ibt_adds_vect_t". We call tavor_get_addr_path() to fill 350 * the common portions that can be pulled from the UDAV we pass in. 351 * 352 * NOTE: We will also fill the "av_dgid.gid_guid" field from the 353 * "ah_save_guid" field we have previously saved away. The reason 354 * for this is described in tavor_ah_alloc() and tavor_ah_modify(). 355 */ 356 tavor_get_addr_path(state, (tavor_hw_addr_path_t *)&udav_entry, 357 attr_p, TAVOR_ADDRPATH_UDAV, NULL); 358 359 attr_p->av_dgid.gid_guid = ah->ah_save_guid; 360 attr_p->av_srate = ah->ah_save_srate; 361 362 mutex_exit(&ah->ah_lock); 363 TAVOR_TNF_EXIT(tavor_ah_query); 364 return (DDI_SUCCESS); 365 } 366 367 368 /* 369 * tavor_ah_modify() 370 * Context: Can be called from interrupt or base context. 371 */ 372 /* ARGSUSED */ 373 int 374 tavor_ah_modify(tavor_state_t *state, tavor_ahhdl_t ah, 375 ibt_adds_vect_t *attr_p) 376 { 377 tavor_hw_udav_t udav_entry; 378 tavor_rsrc_t *udav; 379 uint64_t data_new, data_old; 380 uint32_t udav_pd, size, portnum_new; 381 int i, status; 382 383 TAVOR_TNF_ENTER(tavor_ah_modify); 384 385 /* Validate that specified port number is legal */ 386 if (!tavor_portnum_is_valid(state, attr_p->av_port_num)) { 387 TNF_PROBE_1(tavor_ah_modify_inv_portnum, 388 TAVOR_TNF_ERROR, "", tnf_uint, port, attr_p->av_port_num); 389 TAVOR_TNF_EXIT(tavor_ah_modify); 390 return (IBT_HCA_PORT_INVALID); 391 } 392 393 mutex_enter(&ah->ah_lock); 394 395 /* 396 * Pull all the necessary information from the Tavor Address Handle 397 * structure 398 */ 399 udav = ah->ah_udavrsrcp; 400 401 /* 402 * Fill in the UDAV entry. Note: we are only filling in a temporary 403 * copy here, which we will later copy into the actual entry in 404 * Tavor DDR memory. This starts be zeroing out the temporary copy 405 * and then calling tavor_set_addr_path() to fill in the common 406 * portions that can be pulled from the "ibt_adds_vect_t" passed in 407 * 408 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid" 409 * field here (just as we did during tavor_ah_alloc()) because we 410 * may need to return it later to the IBTF (as a result of a 411 * subsequent query operation). As explained in tavor_ah_alloc(), 412 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid" 413 * is not always preserved by being written to hardware. The reason 414 * for this is described in tavor_set_addr_path(). 415 */ 416 bzero(&udav_entry, sizeof (tavor_hw_udav_t)); 417 status = tavor_set_addr_path(state, attr_p, 418 (tavor_hw_addr_path_t *)&udav_entry, TAVOR_ADDRPATH_UDAV, NULL); 419 if (status != DDI_SUCCESS) { 420 mutex_exit(&ah->ah_lock); 421 TNF_PROBE_0(tavor_ah_modify_setaddrpath_fail, 422 TAVOR_TNF_ERROR, ""); 423 TAVOR_TNF_EXIT(tavor_ah_modify); 424 return (status); 425 } 426 ah->ah_save_guid = attr_p->av_dgid.gid_guid; 427 ah->ah_save_srate = attr_p->av_srate; 428 429 /* 430 * Save away the current PD number for this UDAV. Then temporarily 431 * invalidate the entry (by setting the PD to zero). Note: Since 432 * the first 32 bits of the UDAV actually contain the current port 433 * number _and_ current PD number, we need to mask off some bits. 434 */ 435 udav_pd = ddi_get32(udav->tr_acchdl, (uint32_t *)udav->tr_addr); 436 udav_pd = udav_pd & 0xFFFFFF; 437 ddi_put32(udav->tr_acchdl, (uint32_t *)udav->tr_addr, 0); 438 439 /* Sync the UDAV for use by the hardware */ 440 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV); 441 442 /* 443 * Copy UDAV structure to the entry 444 * Note: We copy in 64-bit chunks. For the first two of these 445 * chunks it is necessary to read the current contents of the 446 * UDAV, mask off the modifiable portions (maintaining any 447 * of the "reserved" portions), and then mask on the new data. 448 */ 449 size = sizeof (tavor_hw_udav_t) >> 3; 450 for (i = 0; i < size; i++) { 451 data_new = ((uint64_t *)&udav_entry)[i]; 452 data_old = ddi_get64(udav->tr_acchdl, 453 ((uint64_t *)udav->tr_addr + i)); 454 455 /* 456 * Apply mask to change only the relevant values. Note: We 457 * extract the new portnum from the address handle here 458 * because the "PD" and "portnum" fields are in the same 459 * 32-bit word in the UDAV. We will use the (new) port 460 * number extracted here when we write the valid PD number 461 * in the last step below. 462 */ 463 if (i == 0) { 464 data_old = data_old & TAVOR_UDAV_MODIFY_MASK0; 465 portnum_new = data_new >> 56; 466 } else if (i == 1) { 467 data_old = data_old & TAVOR_UDAV_MODIFY_MASK1; 468 } else { 469 data_old = 0; 470 } 471 472 /* Write the updated values to the UDAV (in DDR) */ 473 data_new = data_old | data_new; 474 ddi_put64(udav->tr_acchdl, ((uint64_t *)udav->tr_addr + i), 475 data_new); 476 } 477 478 /* 479 * Sync the body of the UDAV for use by the hardware. After we 480 * have updated the PD number (to make the UDAV valid), we sync 481 * again to push the entire entry out for hardware access. 482 */ 483 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV); 484 485 /* 486 * Put the valid PD number back into UDAV entry. Note: Because port 487 * number and PD number are in the same word, we must mask the 488 * new port number with the old PD number before writing it back 489 * to the UDAV entry 490 */ 491 udav_pd = ((portnum_new << 24) | udav_pd); 492 ddi_put32(udav->tr_acchdl, (uint32_t *)udav->tr_addr, udav_pd); 493 494 /* Sync the rest of the UDAV for use by the hardware */ 495 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV); 496 497 mutex_exit(&ah->ah_lock); 498 TAVOR_TNF_EXIT(tavor_ah_modify); 499 return (DDI_SUCCESS); 500 } 501 502 503 /* 504 * tavor_udav_sync() 505 * Context: Can be called from interrupt or base context. 506 */ 507 /* ARGSUSED */ 508 static void 509 tavor_udav_sync(tavor_ahhdl_t ah, tavor_hw_udav_t *udav, uint_t flag) 510 { 511 ddi_dma_handle_t dmahdl; 512 off_t offset; 513 int status; 514 515 TAVOR_TNF_ENTER(tavor_udav_sync); 516 517 /* Determine if AH needs to be synced or not */ 518 if (ah->ah_sync == 0) { 519 TAVOR_TNF_EXIT(tavor_udav_sync); 520 return; 521 } 522 523 /* Get the DMA handle from AH handle */ 524 dmahdl = ah->ah_mrhdl->mr_bindinfo.bi_dmahdl; 525 526 /* Calculate offset into address handle */ 527 offset = (off_t)0; 528 status = ddi_dma_sync(dmahdl, offset, sizeof (tavor_hw_udav_t), flag); 529 if (status != DDI_SUCCESS) { 530 TNF_PROBE_0(tavor_udav_sync_getnextentry_fail, 531 TAVOR_TNF_ERROR, ""); 532 TAVOR_TNF_EXIT(tavor_udav_sync); 533 return; 534 } 535 536 TAVOR_TNF_EXIT(tavor_udav_sync); 537 } 538 539 540 /* 541 * tavor_mcg_attach() 542 * Context: Can be called only from user or kernel context. 543 */ 544 int 545 tavor_mcg_attach(tavor_state_t *state, tavor_qphdl_t qp, ib_gid_t gid, 546 ib_lid_t lid) 547 { 548 tavor_rsrc_t *rsrc; 549 tavor_hw_mcg_t *mcg_entry; 550 tavor_hw_mcg_qp_list_t *mcg_entry_qplist; 551 tavor_mcghdl_t mcg, newmcg; 552 uint64_t mgid_hash; 553 uint32_t end_indx; 554 int status; 555 uint_t qp_found; 556 char *errormsg; 557 558 TAVOR_TNF_ENTER(tavor_mcg_attach); 559 560 /* 561 * It is only allowed to attach MCG to UD queue pairs. Verify 562 * that the intended QP is of the appropriate transport type 563 */ 564 if (qp->qp_serv_type != TAVOR_QP_UD) { 565 /* Set "status" and "errormsg" and goto failure */ 566 TAVOR_TNF_FAIL(IBT_QP_SRV_TYPE_INVALID, "invalid service type"); 567 goto mcgattach_fail; 568 } 569 570 /* 571 * Check for invalid Multicast DLID. Specifically, all Multicast 572 * LIDs should be within a well defined range. If the specified LID 573 * is outside of that range, then return an error. 574 */ 575 if (tavor_mlid_is_valid(lid) == 0) { 576 /* Set "status" and "errormsg" and goto failure */ 577 TAVOR_TNF_FAIL(IBT_MC_MLID_INVALID, "invalid MLID"); 578 goto mcgattach_fail; 579 } 580 /* 581 * Check for invalid Multicast GID. All Multicast GIDs should have 582 * a well-defined pattern of bits and flags that are allowable. If 583 * the specified GID does not meet the criteria, then return an error. 584 */ 585 if (tavor_mgid_is_valid(gid) == 0) { 586 /* Set "status" and "errormsg" and goto failure */ 587 TAVOR_TNF_FAIL(IBT_MC_MGID_INVALID, "invalid MGID"); 588 goto mcgattach_fail; 589 } 590 591 /* 592 * Compute the MGID hash value. Since the MCG table is arranged as 593 * a number of separate hash chains, this operation converts the 594 * specified MGID into the starting index of an entry in the hash 595 * table (i.e. the index for the start of the appropriate hash chain). 596 * Subsequent operations below will walk the chain searching for the 597 * right place to add this new QP. 598 */ 599 status = tavor_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 600 &mgid_hash, TAVOR_SLEEPFLAG_FOR_CONTEXT()); 601 if (status != TAVOR_CMD_SUCCESS) { 602 cmn_err(CE_CONT, "Tavor: MGID_HASH command failed: %08x\n", 603 status); 604 TNF_PROBE_1(tavor_mcg_attach_mgid_hash_cmd_fail, 605 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 606 TAVOR_TNF_EXIT(tavor_mcg_attach); 607 return (ibc_get_ci_failure(0)); 608 } 609 610 /* 611 * Grab the multicast group mutex. Then grab the pre-allocated 612 * temporary buffer used for holding and/or modifying MCG entries. 613 * Zero out the temporary MCG entry before we begin. 614 */ 615 mutex_enter(&state->ts_mcglock); 616 mcg_entry = state->ts_mcgtmp; 617 mcg_entry_qplist = TAVOR_MCG_GET_QPLIST_PTR(mcg_entry); 618 bzero(mcg_entry, TAVOR_MCGMEM_SZ(state)); 619 620 /* 621 * Walk through the array of MCG entries starting at "mgid_hash". 622 * Try to find the appropriate place for this new QP to be added. 623 * This could happen when the first entry of the chain has MGID == 0 624 * (which means that the hash chain is empty), or because we find 625 * an entry with the same MGID (in which case we'll add the QP to 626 * that MCG), or because we come to the end of the chain (in which 627 * case this is the first QP being added to the multicast group that 628 * corresponds to the MGID. The tavor_mcg_walk_mgid_hash() routine 629 * walks the list and returns an index into the MCG table. The entry 630 * at this index is then checked to determine which case we have 631 * fallen into (see below). Note: We are using the "shadow" MCG 632 * list (of tavor_mcg_t structs) for this lookup because the real 633 * MCG entries are in hardware (and the lookup process would be much 634 * more time consuming). 635 */ 636 end_indx = tavor_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL); 637 mcg = &state->ts_mcghdl[end_indx]; 638 639 /* 640 * If MGID == 0, then the hash chain is empty. Just fill in the 641 * current entry. Note: No need to allocate an MCG table entry 642 * as all the hash chain "heads" are already preallocated. 643 */ 644 if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) { 645 646 /* Fill in the current entry in the "shadow" MCG list */ 647 tavor_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL); 648 649 /* 650 * Try to add the new QP number to the list. This (and the 651 * above) routine fills in a temporary MCG. The "mcg_entry" 652 * and "mcg_entry_qplist" pointers simply point to different 653 * offsets within the same temporary copy of the MCG (for 654 * convenience). Note: If this fails, we need to invalidate 655 * the entries we've already put into the "shadow" list entry 656 * above. 657 */ 658 status = tavor_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 659 &qp_found); 660 if (status != DDI_SUCCESS) { 661 bzero(mcg, sizeof (struct tavor_sw_mcg_list_s)); 662 mutex_exit(&state->ts_mcglock); 663 /* Set "status" and "errormsg" and goto failure */ 664 TAVOR_TNF_FAIL(status, "failed qplist add"); 665 goto mcgattach_fail; 666 } 667 668 /* 669 * Once the temporary MCG has been filled in, write the entry 670 * into the appropriate location in the Tavor MCG entry table. 671 * If it's successful, then drop the lock and return success. 672 * Note: In general, this operation shouldn't fail. If it 673 * does, then it is an indication that something (probably in 674 * HW, but maybe in SW) has gone seriously wrong. We still 675 * want to zero out the entries that we've filled in above 676 * (in the tavor_mcg_setup_new_hdr() routine). 677 */ 678 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx, 679 TAVOR_CMD_NOSLEEP_SPIN); 680 if (status != TAVOR_CMD_SUCCESS) { 681 bzero(mcg, sizeof (struct tavor_sw_mcg_list_s)); 682 mutex_exit(&state->ts_mcglock); 683 TAVOR_WARNING(state, "failed to write MCG entry"); 684 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: " 685 "%08x\n", status); 686 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail, 687 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 688 tnf_uint, indx, end_indx); 689 TAVOR_TNF_EXIT(tavor_mcg_attach); 690 return (ibc_get_ci_failure(0)); 691 } 692 693 /* 694 * Now that we know all the Tavor firmware accesses have been 695 * successful, we update the "shadow" MCG entry by incrementing 696 * the "number of attached QPs" count. 697 * 698 * We increment only if the QP is not already part of the 699 * MCG by checking the 'qp_found' flag returned from the 700 * qplist_add above. 701 */ 702 if (!qp_found) { 703 mcg->mcg_num_qps++; 704 705 /* 706 * Increment the refcnt for this QP. Because the QP 707 * was added to this MCG, the refcnt must be 708 * incremented. 709 */ 710 tavor_qp_mcg_refcnt_inc(qp); 711 } 712 713 /* 714 * We drop the lock and return success. 715 */ 716 mutex_exit(&state->ts_mcglock); 717 TAVOR_TNF_EXIT(tavor_mcg_attach); 718 return (DDI_SUCCESS); 719 } 720 721 /* 722 * If the specified MGID matches the MGID in the current entry, then 723 * we need to try to add the QP to the current MCG entry. In this 724 * case, it means that we need to read the existing MCG entry (into 725 * the temporary MCG), add the new QP number to the temporary entry 726 * (using the same method we used above), and write the entry back 727 * to the hardware (same as above). 728 */ 729 if ((mcg->mcg_mgid_h == gid.gid_prefix) && 730 (mcg->mcg_mgid_l == gid.gid_guid)) { 731 732 /* 733 * Read the current MCG entry into the temporary MCG. Note: 734 * In general, this operation shouldn't fail. If it does, 735 * then it is an indication that something (probably in HW, 736 * but maybe in SW) has gone seriously wrong. 737 */ 738 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx, 739 TAVOR_CMD_NOSLEEP_SPIN); 740 if (status != TAVOR_CMD_SUCCESS) { 741 mutex_exit(&state->ts_mcglock); 742 TAVOR_WARNING(state, "failed to read MCG entry"); 743 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: " 744 "%08x\n", status); 745 TNF_PROBE_2(tavor_mcg_attach_read_mgm_cmd_fail, 746 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 747 tnf_uint, indx, end_indx); 748 TAVOR_TNF_EXIT(tavor_mcg_attach); 749 return (ibc_get_ci_failure(0)); 750 } 751 752 /* 753 * Try to add the new QP number to the list. This routine 754 * fills in the necessary pieces of the temporary MCG. The 755 * "mcg_entry_qplist" pointer is used to point to the portion 756 * of the temporary MCG that holds the QP numbers. 757 * 758 * Note: tavor_mcg_qplist_add() returns SUCCESS if it 759 * already found the QP in the list. In this case, the QP is 760 * not added on to the list again. Check the flag 'qp_found' 761 * if this value is needed to be known. 762 * 763 */ 764 status = tavor_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp, 765 &qp_found); 766 if (status != DDI_SUCCESS) { 767 mutex_exit(&state->ts_mcglock); 768 /* Set "status" and "errormsg" and goto failure */ 769 TAVOR_TNF_FAIL(status, "failed qplist add"); 770 goto mcgattach_fail; 771 } 772 773 /* 774 * Once the temporary MCG has been updated, write the entry 775 * into the appropriate location in the Tavor MCG entry table. 776 * If it's successful, then drop the lock and return success. 777 * Note: In general, this operation shouldn't fail. If it 778 * does, then it is an indication that something (probably in 779 * HW, but maybe in SW) has gone seriously wrong. 780 */ 781 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx, 782 TAVOR_CMD_NOSLEEP_SPIN); 783 if (status != TAVOR_CMD_SUCCESS) { 784 mutex_exit(&state->ts_mcglock); 785 TAVOR_WARNING(state, "failed to write MCG entry"); 786 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: " 787 "%08x\n", status); 788 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail, 789 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 790 tnf_uint, indx, end_indx); 791 TAVOR_TNF_EXIT(tavor_mcg_attach); 792 return (ibc_get_ci_failure(0)); 793 } 794 795 /* 796 * Now that we know all the Tavor firmware accesses have been 797 * successful, we update the current "shadow" MCG entry by 798 * incrementing the "number of attached QPs" count. 799 * 800 * We increment only if the QP is not already part of the 801 * MCG by checking the 'qp_found' flag returned from the 802 * qplist_add above. 803 */ 804 if (!qp_found) { 805 mcg->mcg_num_qps++; 806 807 /* 808 * Increment the refcnt for this QP. Because the QP 809 * was added to this MCG, the refcnt must be 810 * incremented. 811 */ 812 tavor_qp_mcg_refcnt_inc(qp); 813 } 814 815 /* 816 * We drop the lock and return success. 817 */ 818 mutex_exit(&state->ts_mcglock); 819 TAVOR_TNF_EXIT(tavor_mcg_attach); 820 return (DDI_SUCCESS); 821 } 822 823 /* 824 * If we've reached here, then we're at the end of the hash chain. 825 * We need to allocate a new MCG entry, fill it in, write it to Tavor, 826 * and update the previous entry to link the new one to the end of the 827 * chain. 828 */ 829 830 /* 831 * Allocate an MCG table entry. This will be filled in with all 832 * the necessary parameters to define the multicast group. Then it 833 * will be written to the hardware in the next-to-last step below. 834 */ 835 status = tavor_rsrc_alloc(state, TAVOR_MCG, 1, TAVOR_NOSLEEP, &rsrc); 836 if (status != DDI_SUCCESS) { 837 mutex_exit(&state->ts_mcglock); 838 /* Set "status" and "errormsg" and goto failure */ 839 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MCG"); 840 goto mcgattach_fail; 841 } 842 843 /* 844 * Fill in the new entry in the "shadow" MCG list. Note: Just as 845 * it does above, tavor_mcg_setup_new_hdr() also fills in a portion 846 * of the temporary MCG entry (the rest of which will be filled in by 847 * tavor_mcg_qplist_add() below) 848 */ 849 newmcg = &state->ts_mcghdl[rsrc->tr_indx]; 850 tavor_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc); 851 852 /* 853 * Try to add the new QP number to the list. This routine fills in 854 * the final necessary pieces of the temporary MCG. The 855 * "mcg_entry_qplist" pointer is used to point to the portion of the 856 * temporary MCG that holds the QP numbers. If we fail here, we 857 * must undo the previous resource allocation. 858 * 859 * Note: tavor_mcg_qplist_add() can we return SUCCESS if it already 860 * found the QP in the list. In this case, the QP is not added on to 861 * the list again. Check the flag 'qp_found' if this value is needed 862 * to be known. 863 */ 864 status = tavor_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp, 865 &qp_found); 866 if (status != DDI_SUCCESS) { 867 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s)); 868 tavor_rsrc_free(state, &rsrc); 869 mutex_exit(&state->ts_mcglock); 870 /* Set "status" and "errormsg" and goto failure */ 871 TAVOR_TNF_FAIL(status, "failed qplist add"); 872 goto mcgattach_fail; 873 } 874 875 /* 876 * Once the temporary MCG has been updated, write the entry into the 877 * appropriate location in the Tavor MCG entry table. If this is 878 * successful, then we need to chain the previous entry to this one. 879 * Note: In general, this operation shouldn't fail. If it does, then 880 * it is an indication that something (probably in HW, but maybe in 881 * SW) has gone seriously wrong. 882 */ 883 status = tavor_write_mgm_cmd_post(state, mcg_entry, rsrc->tr_indx, 884 TAVOR_CMD_NOSLEEP_SPIN); 885 if (status != TAVOR_CMD_SUCCESS) { 886 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s)); 887 tavor_rsrc_free(state, &rsrc); 888 mutex_exit(&state->ts_mcglock); 889 TAVOR_WARNING(state, "failed to write MCG entry"); 890 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n", 891 status); 892 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail, 893 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 894 tnf_uint, indx, rsrc->tr_indx); 895 TAVOR_TNF_EXIT(tavor_mcg_attach); 896 return (ibc_get_ci_failure(0)); 897 } 898 899 /* 900 * Now read the current MCG entry (the one previously at the end of 901 * hash chain) into the temporary MCG. We are going to update its 902 * "next_gid_indx" now and write the entry back to the MCG table. 903 * Note: In general, this operation shouldn't fail. If it does, then 904 * it is an indication that something (probably in HW, but maybe in SW) 905 * has gone seriously wrong. We will free up the MCG entry resource, 906 * but we will not undo the previously written MCG entry in the HW. 907 * This is OK, though, because the MCG entry is not currently attached 908 * to any hash chain. 909 */ 910 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx, 911 TAVOR_CMD_NOSLEEP_SPIN); 912 if (status != TAVOR_CMD_SUCCESS) { 913 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s)); 914 tavor_rsrc_free(state, &rsrc); 915 mutex_exit(&state->ts_mcglock); 916 TAVOR_WARNING(state, "failed to read MCG entry"); 917 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n", 918 status); 919 TNF_PROBE_2(tavor_mcg_attach_read_mgm_cmd_fail, 920 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 921 tnf_uint, indx, end_indx); 922 TAVOR_TNF_EXIT(tavor_mcg_attach); 923 return (ibc_get_ci_failure(0)); 924 } 925 926 /* 927 * Finally, we update the "next_gid_indx" field in the temporary MCG 928 * and attempt to write the entry back into the Tavor MCG table. If 929 * this succeeds, then we update the "shadow" list to reflect the 930 * change, drop the lock, and return success. Note: In general, this 931 * operation shouldn't fail. If it does, then it is an indication 932 * that something (probably in HW, but maybe in SW) has gone seriously 933 * wrong. Just as we do above, we will free up the MCG entry resource, 934 * but we will not try to undo the previously written MCG entry. This 935 * is OK, though, because (since we failed here to update the end of 936 * the chain) that other entry is not currently attached to any chain. 937 */ 938 mcg_entry->next_gid_indx = rsrc->tr_indx; 939 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx, 940 TAVOR_CMD_NOSLEEP_SPIN); 941 if (status != TAVOR_CMD_SUCCESS) { 942 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s)); 943 tavor_rsrc_free(state, &rsrc); 944 mutex_exit(&state->ts_mcglock); 945 TAVOR_WARNING(state, "failed to write MCG entry"); 946 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n", 947 status); 948 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail, 949 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 950 tnf_uint, indx, end_indx); 951 TAVOR_TNF_EXIT(tavor_mcg_attach); 952 return (ibc_get_ci_failure(0)); 953 } 954 mcg = &state->ts_mcghdl[end_indx]; 955 mcg->mcg_next_indx = rsrc->tr_indx; 956 957 /* 958 * Now that we know all the Tavor firmware accesses have been 959 * successful, we update the new "shadow" MCG entry by incrementing 960 * the "number of attached QPs" count. Then we drop the lock and 961 * return success. 962 */ 963 newmcg->mcg_num_qps++; 964 965 /* 966 * Increment the refcnt for this QP. Because the QP 967 * was added to this MCG, the refcnt must be 968 * incremented. 969 */ 970 tavor_qp_mcg_refcnt_inc(qp); 971 972 mutex_exit(&state->ts_mcglock); 973 TAVOR_TNF_EXIT(tavor_mcg_attach); 974 return (DDI_SUCCESS); 975 976 mcgattach_fail: 977 TNF_PROBE_1(tavor_mcg_attach_fail, TAVOR_TNF_ERROR, "", tnf_string, 978 msg, errormsg); 979 TAVOR_TNF_EXIT(tavor_mcg_attach); 980 return (status); 981 } 982 983 984 /* 985 * tavor_mcg_detach() 986 * Context: Can be called only from user or kernel context. 987 */ 988 int 989 tavor_mcg_detach(tavor_state_t *state, tavor_qphdl_t qp, ib_gid_t gid, 990 ib_lid_t lid) 991 { 992 tavor_hw_mcg_t *mcg_entry; 993 tavor_hw_mcg_qp_list_t *mcg_entry_qplist; 994 tavor_mcghdl_t mcg; 995 uint64_t mgid_hash; 996 uint32_t end_indx, prev_indx; 997 int status; 998 999 TAVOR_TNF_ENTER(tavor_mcg_detach); 1000 1001 /* 1002 * Check for invalid Multicast DLID. Specifically, all Multicast 1003 * LIDs should be within a well defined range. If the specified LID 1004 * is outside of that range, then return an error. 1005 */ 1006 if (tavor_mlid_is_valid(lid) == 0) { 1007 TNF_PROBE_0(tavor_mcg_detach_invmlid_fail, TAVOR_TNF_ERROR, ""); 1008 TAVOR_TNF_EXIT(tavor_mcg_detach); 1009 return (IBT_MC_MLID_INVALID); 1010 } 1011 1012 /* 1013 * Compute the MGID hash value. As described above, the MCG table is 1014 * arranged as a number of separate hash chains. This operation 1015 * converts the specified MGID into the starting index of an entry in 1016 * the hash table (i.e. the index for the start of the appropriate 1017 * hash chain). Subsequent operations below will walk the chain 1018 * searching for a matching entry from which to attempt to remove 1019 * the specified QP. 1020 */ 1021 status = tavor_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid, 1022 &mgid_hash, TAVOR_SLEEPFLAG_FOR_CONTEXT()); 1023 if (status != TAVOR_CMD_SUCCESS) { 1024 cmn_err(CE_CONT, "Tavor: MGID_HASH command failed: %08x\n", 1025 status); 1026 TNF_PROBE_1(tavor_mcg_detach_mgid_hash_cmd_fail, 1027 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 1028 TAVOR_TNF_EXIT(tavor_mcg_attach); 1029 return (ibc_get_ci_failure(0)); 1030 } 1031 1032 /* 1033 * Grab the multicast group mutex. Then grab the pre-allocated 1034 * temporary buffer used for holding and/or modifying MCG entries. 1035 */ 1036 mutex_enter(&state->ts_mcglock); 1037 mcg_entry = state->ts_mcgtmp; 1038 mcg_entry_qplist = TAVOR_MCG_GET_QPLIST_PTR(mcg_entry); 1039 1040 /* 1041 * Walk through the array of MCG entries starting at "mgid_hash". 1042 * Try to find an MCG entry with a matching MGID. The 1043 * tavor_mcg_walk_mgid_hash() routine walks the list and returns an 1044 * index into the MCG table. The entry at this index is checked to 1045 * determine whether it is a match or not. If it is a match, then 1046 * we continue on to attempt to remove the QP from the MCG. If it 1047 * is not a match (or not a valid MCG entry), then we return an error. 1048 */ 1049 end_indx = tavor_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx); 1050 mcg = &state->ts_mcghdl[end_indx]; 1051 1052 /* 1053 * If MGID == 0 (the hash chain is empty) or if the specified MGID 1054 * does not match the MGID in the current entry, then return 1055 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not 1056 * valid). 1057 */ 1058 if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) || 1059 ((mcg->mcg_mgid_h != gid.gid_prefix) || 1060 (mcg->mcg_mgid_l != gid.gid_guid))) { 1061 mutex_exit(&state->ts_mcglock); 1062 TNF_PROBE_0(tavor_mcg_detach_invmgid_fail, TAVOR_TNF_ERROR, ""); 1063 TAVOR_TNF_EXIT(tavor_mcg_detach); 1064 return (IBT_MC_MGID_INVALID); 1065 } 1066 1067 /* 1068 * Read the current MCG entry into the temporary MCG. Note: In 1069 * general, this operation shouldn't fail. If it does, then it is 1070 * an indication that something (probably in HW, but maybe in SW) 1071 * has gone seriously wrong. 1072 */ 1073 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx, 1074 TAVOR_CMD_NOSLEEP_SPIN); 1075 if (status != TAVOR_CMD_SUCCESS) { 1076 mutex_exit(&state->ts_mcglock); 1077 TAVOR_WARNING(state, "failed to read MCG entry"); 1078 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n", 1079 status); 1080 TNF_PROBE_2(tavor_mcg_detach_read_mgm_cmd_fail, 1081 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1082 tnf_uint, indx, end_indx); 1083 TAVOR_TNF_EXIT(tavor_mcg_attach); 1084 return (ibc_get_ci_failure(0)); 1085 } 1086 1087 /* 1088 * Search the QP number list for a match. If a match is found, then 1089 * remove the entry from the QP list. Otherwise, if no match is found, 1090 * return an error. 1091 */ 1092 status = tavor_mcg_qplist_remove(mcg, mcg_entry_qplist, qp); 1093 if (status != DDI_SUCCESS) { 1094 mutex_exit(&state->ts_mcglock); 1095 TAVOR_TNF_EXIT(tavor_mcg_detach); 1096 return (status); 1097 } 1098 1099 /* 1100 * Decrement the MCG count for this QP. When the 'qp_mcg' 1101 * field becomes 0, then this QP is no longer a member of any 1102 * MCG. 1103 */ 1104 tavor_qp_mcg_refcnt_dec(qp); 1105 1106 /* 1107 * If the current MCG's QP number list is about to be made empty 1108 * ("mcg_num_qps" == 1), then remove the entry itself from the hash 1109 * chain. Otherwise, just write the updated MCG entry back to the 1110 * hardware. In either case, once we successfully update the hardware 1111 * chain, then we decrement the "shadow" list entry's "mcg_num_qps" 1112 * count (or zero out the entire "shadow" list entry) before returning 1113 * success. Note: Zeroing out the "shadow" list entry is done 1114 * inside of tavor_mcg_hash_list_remove(). 1115 */ 1116 if (mcg->mcg_num_qps == 1) { 1117 1118 /* Remove an MCG entry from the hash chain */ 1119 status = tavor_mcg_hash_list_remove(state, end_indx, prev_indx, 1120 mcg_entry); 1121 if (status != DDI_SUCCESS) { 1122 mutex_exit(&state->ts_mcglock); 1123 TAVOR_TNF_EXIT(tavor_mcg_detach); 1124 return (status); 1125 } 1126 1127 } else { 1128 /* 1129 * Write the updated MCG entry back to the Tavor MCG table. 1130 * If this succeeds, then we update the "shadow" list to 1131 * reflect the change (i.e. decrement the "mcg_num_qps"), 1132 * drop the lock, and return success. Note: In general, 1133 * this operation shouldn't fail. If it does, then it is an 1134 * indication that something (probably in HW, but maybe in SW) 1135 * has gone seriously wrong. 1136 */ 1137 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx, 1138 TAVOR_CMD_NOSLEEP_SPIN); 1139 if (status != TAVOR_CMD_SUCCESS) { 1140 mutex_exit(&state->ts_mcglock); 1141 TAVOR_WARNING(state, "failed to write MCG entry"); 1142 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: " 1143 "%08x\n", status); 1144 TNF_PROBE_2(tavor_mcg_detach_write_mgm_cmd_fail, 1145 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1146 tnf_uint, indx, end_indx); 1147 TAVOR_TNF_EXIT(tavor_mcg_detach); 1148 return (ibc_get_ci_failure(0)); 1149 } 1150 mcg->mcg_num_qps--; 1151 } 1152 1153 mutex_exit(&state->ts_mcglock); 1154 TAVOR_TNF_EXIT(tavor_mcg_detach); 1155 return (DDI_SUCCESS); 1156 } 1157 1158 /* 1159 * tavor_qp_mcg_refcnt_inc() 1160 * Context: Can be called from interrupt or base context. 1161 */ 1162 static void 1163 tavor_qp_mcg_refcnt_inc(tavor_qphdl_t qp) 1164 { 1165 /* Increment the QP's MCG reference count */ 1166 mutex_enter(&qp->qp_lock); 1167 qp->qp_mcg_refcnt++; 1168 TNF_PROBE_1_DEBUG(tavor_qp_mcg_refcnt_inc, TAVOR_TNF_TRACE, "", 1169 tnf_uint, refcnt, qp->qp_mcg_refcnt); 1170 mutex_exit(&qp->qp_lock); 1171 } 1172 1173 1174 /* 1175 * tavor_qp_mcg_refcnt_dec() 1176 * Context: Can be called from interrupt or base context. 1177 */ 1178 static void 1179 tavor_qp_mcg_refcnt_dec(tavor_qphdl_t qp) 1180 { 1181 /* Decrement the QP's MCG reference count */ 1182 mutex_enter(&qp->qp_lock); 1183 qp->qp_mcg_refcnt--; 1184 TNF_PROBE_1_DEBUG(tavor_qp_mcg_refcnt_dec, TAVOR_TNF_TRACE, "", 1185 tnf_uint, refcnt, qp->qp_mcg_refcnt); 1186 mutex_exit(&qp->qp_lock); 1187 } 1188 1189 1190 /* 1191 * tavor_mcg_qplist_add() 1192 * Context: Can be called from interrupt or base context. 1193 */ 1194 static int 1195 tavor_mcg_qplist_add(tavor_state_t *state, tavor_mcghdl_t mcg, 1196 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp, 1197 uint_t *qp_found) 1198 { 1199 uint_t qplist_indx; 1200 1201 TAVOR_TNF_ENTER(tavor_mcg_qplist_add); 1202 1203 ASSERT(MUTEX_HELD(&state->ts_mcglock)); 1204 1205 qplist_indx = mcg->mcg_num_qps; 1206 1207 /* 1208 * Determine if we have exceeded the maximum number of QP per 1209 * multicast group. If we have, then return an error 1210 */ 1211 if (qplist_indx >= state->ts_cfg_profile->cp_num_qp_per_mcg) { 1212 TNF_PROBE_0(tavor_mcg_qplist_add_too_many_qps, 1213 TAVOR_TNF_ERROR, ""); 1214 TAVOR_TNF_EXIT(tavor_mcg_qplist_add); 1215 return (IBT_HCA_MCG_QP_EXCEEDED); 1216 } 1217 1218 /* 1219 * Determine if the QP is already attached to this MCG table. If it 1220 * is, then we break out and treat this operation as a NO-OP 1221 */ 1222 for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps; 1223 qplist_indx++) { 1224 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) { 1225 break; 1226 } 1227 } 1228 1229 /* 1230 * If the QP was already on the list, set 'qp_found' to TRUE. We still 1231 * return SUCCESS in this case, but the qplist will not have been 1232 * updated because the QP was already on the list. 1233 */ 1234 if (qplist_indx < mcg->mcg_num_qps) { 1235 *qp_found = 1; 1236 } else { 1237 /* 1238 * Otherwise, append the new QP number to the end of the 1239 * current QP list. Note: We will increment the "mcg_num_qps" 1240 * field on the "shadow" MCG list entry later (after we know 1241 * that all necessary Tavor firmware accesses have been 1242 * successful). 1243 * 1244 * Set 'qp_found' to 0 so we know the QP was added on to the 1245 * list for sure. 1246 */ 1247 mcg_qplist[qplist_indx].q = TAVOR_MCG_QPN_VALID; 1248 mcg_qplist[qplist_indx].qpn = qp->qp_qpnum; 1249 *qp_found = 0; 1250 } 1251 1252 TAVOR_TNF_EXIT(tavor_mcg_qplist_add); 1253 return (DDI_SUCCESS); 1254 } 1255 1256 1257 1258 /* 1259 * tavor_mcg_qplist_remove() 1260 * Context: Can be called from interrupt or base context. 1261 */ 1262 static int 1263 tavor_mcg_qplist_remove(tavor_mcghdl_t mcg, tavor_hw_mcg_qp_list_t *mcg_qplist, 1264 tavor_qphdl_t qp) 1265 { 1266 uint_t i, qplist_indx; 1267 1268 TAVOR_TNF_ENTER(tavor_mcg_qplist_remove); 1269 1270 /* 1271 * Search the MCG QP list for a matching QPN. When 1272 * it's found, we swap the last entry with the current 1273 * one, set the last entry to zero, decrement the last 1274 * entry, and return. If it's not found, then it's 1275 * and error. 1276 */ 1277 qplist_indx = mcg->mcg_num_qps; 1278 for (i = 0; i < qplist_indx; i++) { 1279 if (mcg_qplist[i].qpn == qp->qp_qpnum) { 1280 mcg_qplist[i] = mcg_qplist[qplist_indx - 1]; 1281 mcg_qplist[qplist_indx - 1].q = TAVOR_MCG_QPN_INVALID; 1282 mcg_qplist[qplist_indx - 1].qpn = 0; 1283 1284 TAVOR_TNF_EXIT(tavor_mcg_qplist_remove); 1285 return (DDI_SUCCESS); 1286 } 1287 } 1288 1289 TNF_PROBE_0(tavor_mcg_qplist_remove_invqphdl_fail, TAVOR_TNF_ERROR, ""); 1290 TAVOR_TNF_EXIT(tavor_mcg_qplist_remove); 1291 return (IBT_QP_HDL_INVALID); 1292 } 1293 1294 1295 /* 1296 * tavor_mcg_walk_mgid_hash() 1297 * Context: Can be called from interrupt or base context. 1298 */ 1299 static uint_t 1300 tavor_mcg_walk_mgid_hash(tavor_state_t *state, uint64_t start_indx, 1301 ib_gid_t mgid, uint_t *p_indx) 1302 { 1303 tavor_mcghdl_t curr_mcghdl; 1304 uint_t curr_indx, prev_indx; 1305 1306 TAVOR_TNF_ENTER(tavor_mcg_walk_mgid_hash); 1307 1308 ASSERT(MUTEX_HELD(&state->ts_mcglock)); 1309 1310 /* Start at the head of the hash chain */ 1311 curr_indx = start_indx; 1312 prev_indx = curr_indx; 1313 curr_mcghdl = &state->ts_mcghdl[curr_indx]; 1314 1315 /* If the first entry in the chain has MGID == 0, then stop */ 1316 if ((curr_mcghdl->mcg_mgid_h == 0) && 1317 (curr_mcghdl->mcg_mgid_l == 0)) { 1318 goto end_mgid_hash_walk; 1319 } 1320 1321 /* If the first entry in the chain matches the MGID, then stop */ 1322 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1323 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1324 goto end_mgid_hash_walk; 1325 } 1326 1327 /* Otherwise, walk the hash chain looking for a match */ 1328 while (curr_mcghdl->mcg_next_indx != 0) { 1329 prev_indx = curr_indx; 1330 curr_indx = curr_mcghdl->mcg_next_indx; 1331 curr_mcghdl = &state->ts_mcghdl[curr_indx]; 1332 1333 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) && 1334 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) { 1335 break; 1336 } 1337 } 1338 1339 end_mgid_hash_walk: 1340 /* 1341 * If necessary, return the index of the previous entry too. This 1342 * is primarily used for detaching a QP from a multicast group. It 1343 * may be necessary, in that case, to delete an MCG entry from the 1344 * hash chain and having the index of the previous entry is helpful. 1345 */ 1346 if (p_indx != NULL) { 1347 *p_indx = prev_indx; 1348 } 1349 TAVOR_TNF_EXIT(tavor_mcg_walk_mgid_hash); 1350 return (curr_indx); 1351 } 1352 1353 1354 /* 1355 * tavor_mcg_setup_new_hdr() 1356 * Context: Can be called from interrupt or base context. 1357 */ 1358 static void 1359 tavor_mcg_setup_new_hdr(tavor_mcghdl_t mcg, tavor_hw_mcg_t *mcg_hdr, 1360 ib_gid_t mgid, tavor_rsrc_t *mcg_rsrc) 1361 { 1362 TAVOR_TNF_ENTER(tavor_mcg_setup_new_hdr); 1363 1364 /* 1365 * Fill in the fields of the "shadow" entry used by software 1366 * to track MCG hardware entry 1367 */ 1368 mcg->mcg_mgid_h = mgid.gid_prefix; 1369 mcg->mcg_mgid_l = mgid.gid_guid; 1370 mcg->mcg_rsrcp = mcg_rsrc; 1371 mcg->mcg_next_indx = 0; 1372 mcg->mcg_num_qps = 0; 1373 1374 /* 1375 * Fill the header fields of the MCG entry (in the temporary copy) 1376 */ 1377 mcg_hdr->mgid_h = mgid.gid_prefix; 1378 mcg_hdr->mgid_l = mgid.gid_guid; 1379 mcg_hdr->next_gid_indx = 0; 1380 1381 TAVOR_TNF_EXIT(tavor_mcg_setup_new_hdr); 1382 } 1383 1384 1385 /* 1386 * tavor_mcg_hash_list_remove() 1387 * Context: Can be called only from user or kernel context. 1388 */ 1389 static int 1390 tavor_mcg_hash_list_remove(tavor_state_t *state, uint_t curr_indx, 1391 uint_t prev_indx, tavor_hw_mcg_t *mcg_entry) 1392 { 1393 tavor_mcghdl_t curr_mcg, prev_mcg, next_mcg; 1394 uint_t next_indx; 1395 int status; 1396 1397 /* Get the pointer to "shadow" list for current entry */ 1398 curr_mcg = &state->ts_mcghdl[curr_indx]; 1399 1400 /* 1401 * If this is the first entry on a hash chain, then attempt to replace 1402 * the entry with the next entry on the chain. If there are no 1403 * subsequent entries on the chain, then this is the only entry and 1404 * should be invalidated. 1405 */ 1406 if (curr_indx == prev_indx) { 1407 1408 /* 1409 * If this is the only entry on the chain, then invalidate it. 1410 * Note: Invalidating an MCG entry means writing all zeros 1411 * to the entry. This is only necessary for those MCG 1412 * entries that are the "head" entries of the individual hash 1413 * chains. Regardless of whether this operation returns 1414 * success or failure, return that result to the caller. 1415 */ 1416 next_indx = curr_mcg->mcg_next_indx; 1417 if (next_indx == 0) { 1418 status = tavor_mcg_entry_invalidate(state, mcg_entry, 1419 curr_indx); 1420 bzero(curr_mcg, sizeof (struct tavor_sw_mcg_list_s)); 1421 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1422 return (status); 1423 } 1424 1425 /* 1426 * Otherwise, this is just the first entry on the chain, so 1427 * grab the next one 1428 */ 1429 next_mcg = &state->ts_mcghdl[next_indx]; 1430 1431 /* 1432 * Read the next MCG entry into the temporary MCG. Note: 1433 * In general, this operation shouldn't fail. If it does, 1434 * then it is an indication that something (probably in HW, 1435 * but maybe in SW) has gone seriously wrong. 1436 */ 1437 status = tavor_read_mgm_cmd_post(state, mcg_entry, next_indx, 1438 TAVOR_CMD_NOSLEEP_SPIN); 1439 if (status != TAVOR_CMD_SUCCESS) { 1440 TAVOR_WARNING(state, "failed to read MCG entry"); 1441 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: " 1442 "%08x\n", status); 1443 TNF_PROBE_2(tavor_mcg_hash_list_rem_read_mgm_cmd_fail, 1444 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1445 tnf_uint, indx, next_indx); 1446 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1447 return (ibc_get_ci_failure(0)); 1448 } 1449 1450 /* 1451 * Copy/Write the temporary MCG back to the hardware MCG list 1452 * using the current index. This essentially removes the 1453 * current MCG entry from the list by writing over it with 1454 * the next one. If this is successful, then we can do the 1455 * same operation for the "shadow" list. And we can also 1456 * free up the Tavor MCG entry resource that was associated 1457 * with the (old) next entry. Note: In general, this 1458 * operation shouldn't fail. If it does, then it is an 1459 * indication that something (probably in HW, but maybe in SW) 1460 * has gone seriously wrong. 1461 */ 1462 status = tavor_write_mgm_cmd_post(state, mcg_entry, curr_indx, 1463 TAVOR_CMD_NOSLEEP_SPIN); 1464 if (status != TAVOR_CMD_SUCCESS) { 1465 TAVOR_WARNING(state, "failed to write MCG entry"); 1466 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: " 1467 "%08x\n", status); 1468 TNF_PROBE_2(tavor_mcg_hash_list_rem_write_mgm_cmd_fail, 1469 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1470 tnf_uint, indx, curr_indx); 1471 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1472 return (ibc_get_ci_failure(0)); 1473 } 1474 1475 /* 1476 * Copy all the software tracking information from the next 1477 * entry on the "shadow" MCG list into the current entry on 1478 * the list. Then invalidate (zero out) the other "shadow" 1479 * list entry. 1480 */ 1481 bcopy(next_mcg, curr_mcg, sizeof (struct tavor_sw_mcg_list_s)); 1482 bzero(next_mcg, sizeof (struct tavor_sw_mcg_list_s)); 1483 1484 /* 1485 * Free up the Tavor MCG entry resource used by the "next" 1486 * MCG entry. That resource is no longer needed by any 1487 * MCG entry which is first on a hash chain (like the "next" 1488 * entry has just become). 1489 */ 1490 tavor_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1491 1492 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1493 return (DDI_SUCCESS); 1494 } 1495 1496 /* 1497 * Else if this is the last entry on the hash chain (or a middle 1498 * entry, then we update the previous entry's "next_gid_index" field 1499 * to make it point instead to the next entry on the chain. By 1500 * skipping over the removed entry in this way, we can then free up 1501 * any resources associated with the current entry. Note: We don't 1502 * need to invalidate the "skipped over" hardware entry because it 1503 * will no be longer connected to any hash chains, and if/when it is 1504 * finally re-used, it will be written with entirely new values. 1505 */ 1506 1507 /* 1508 * Read the next MCG entry into the temporary MCG. Note: In general, 1509 * this operation shouldn't fail. If it does, then it is an 1510 * indication that something (probably in HW, but maybe in SW) has 1511 * gone seriously wrong. 1512 */ 1513 status = tavor_read_mgm_cmd_post(state, mcg_entry, prev_indx, 1514 TAVOR_CMD_NOSLEEP_SPIN); 1515 if (status != TAVOR_CMD_SUCCESS) { 1516 TAVOR_WARNING(state, "failed to read MCG entry"); 1517 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n", 1518 status); 1519 TNF_PROBE_2(tavor_mcg_hash_list_rem_read_mgm_cmd_fail, 1520 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1521 tnf_uint, indx, prev_indx); 1522 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1523 return (ibc_get_ci_failure(0)); 1524 } 1525 1526 /* 1527 * Finally, we update the "next_gid_indx" field in the temporary MCG 1528 * and attempt to write the entry back into the Tavor MCG table. If 1529 * this succeeds, then we update the "shadow" list to reflect the 1530 * change, free up the Tavor MCG entry resource that was associated 1531 * with the current entry, and return success. Note: In general, 1532 * this operation shouldn't fail. If it does, then it is an indication 1533 * that something (probably in HW, but maybe in SW) has gone seriously 1534 * wrong. 1535 */ 1536 mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx; 1537 status = tavor_write_mgm_cmd_post(state, mcg_entry, prev_indx, 1538 TAVOR_CMD_NOSLEEP_SPIN); 1539 if (status != TAVOR_CMD_SUCCESS) { 1540 TAVOR_WARNING(state, "failed to write MCG entry"); 1541 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n", 1542 status); 1543 TNF_PROBE_2(tavor_mcg_hash_list_rem_write_mgm_cmd_fail, 1544 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1545 tnf_uint, indx, prev_indx); 1546 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1547 return (ibc_get_ci_failure(0)); 1548 } 1549 1550 /* 1551 * Get the pointer to the "shadow" MCG list entry for the previous 1552 * MCG. Update its "mcg_next_indx" to point to the next entry 1553 * the one after the current entry. Note: This next index may be 1554 * zero, indicating the end of the list. 1555 */ 1556 prev_mcg = &state->ts_mcghdl[prev_indx]; 1557 prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx; 1558 1559 /* 1560 * Free up the Tavor MCG entry resource used by the current entry. 1561 * This resource is no longer needed because the chain now skips over 1562 * the current entry. Then invalidate (zero out) the current "shadow" 1563 * list entry. 1564 */ 1565 tavor_rsrc_free(state, &curr_mcg->mcg_rsrcp); 1566 bzero(curr_mcg, sizeof (struct tavor_sw_mcg_list_s)); 1567 1568 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove); 1569 return (DDI_SUCCESS); 1570 } 1571 1572 1573 /* 1574 * tavor_mcg_entry_invalidate() 1575 * Context: Can be called only from user or kernel context. 1576 */ 1577 static int 1578 tavor_mcg_entry_invalidate(tavor_state_t *state, tavor_hw_mcg_t *mcg_entry, 1579 uint_t indx) 1580 { 1581 int status; 1582 1583 TAVOR_TNF_ENTER(tavor_mcg_entry_invalidate); 1584 1585 /* 1586 * Invalidate the hardware MCG entry by zeroing out this temporary 1587 * MCG and writing it the the hardware. Note: In general, this 1588 * operation shouldn't fail. If it does, then it is an indication 1589 * that something (probably in HW, but maybe in SW) has gone seriously 1590 * wrong. 1591 */ 1592 bzero(mcg_entry, TAVOR_MCGMEM_SZ(state)); 1593 status = tavor_write_mgm_cmd_post(state, mcg_entry, indx, 1594 TAVOR_CMD_NOSLEEP_SPIN); 1595 if (status != TAVOR_CMD_SUCCESS) { 1596 TAVOR_WARNING(state, "failed to write MCG entry"); 1597 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n", 1598 status); 1599 TNF_PROBE_2(tavor_mcg_entry_invalidate_write_mgm_cmd_fail, 1600 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status, 1601 tnf_uint, indx, indx); 1602 TAVOR_TNF_EXIT(tavor_mcg_entry_invalidate); 1603 return (ibc_get_ci_failure(0)); 1604 } 1605 1606 TAVOR_TNF_EXIT(tavor_mcg_entry_invalidate); 1607 return (DDI_SUCCESS); 1608 } 1609 1610 1611 /* 1612 * tavor_mgid_is_valid() 1613 * Context: Can be called from interrupt or base context. 1614 */ 1615 static int 1616 tavor_mgid_is_valid(ib_gid_t gid) 1617 { 1618 uint_t topbits, flags, scope; 1619 1620 TAVOR_TNF_ENTER(tavor_mgid_is_valid); 1621 1622 /* 1623 * According to IBA 1.1 specification (section 4.1.1) a valid 1624 * "multicast GID" must have its top eight bits set to all ones 1625 */ 1626 topbits = (gid.gid_prefix >> TAVOR_MCG_TOPBITS_SHIFT) & 1627 TAVOR_MCG_TOPBITS_MASK; 1628 if (topbits != TAVOR_MCG_TOPBITS) { 1629 TNF_PROBE_0(tavor_mgid_is_valid_invbits_fail, TAVOR_TNF_ERROR, 1630 ""); 1631 TAVOR_TNF_EXIT(tavor_mgid_is_valid); 1632 return (0); 1633 } 1634 1635 /* 1636 * The next 4 bits are the "flag" bits. These are valid only 1637 * if they are "0" (which correspond to permanently assigned/ 1638 * "well-known" multicast GIDs) or "1" (for so-called "transient" 1639 * multicast GIDs). All other values are reserved. 1640 */ 1641 flags = (gid.gid_prefix >> TAVOR_MCG_FLAGS_SHIFT) & 1642 TAVOR_MCG_FLAGS_MASK; 1643 if (!((flags == TAVOR_MCG_FLAGS_PERM) || 1644 (flags == TAVOR_MCG_FLAGS_NONPERM))) { 1645 TNF_PROBE_1(tavor_mgid_is_valid_invflags_fail, TAVOR_TNF_ERROR, 1646 "", tnf_uint, flags, flags); 1647 TAVOR_TNF_EXIT(tavor_mgid_is_valid); 1648 return (0); 1649 } 1650 1651 /* 1652 * The next 4 bits are the "scope" bits. These are valid only 1653 * if they are "2" (Link-local), "5" (Site-local), "8" 1654 * (Organization-local) or "E" (Global). All other values 1655 * are reserved (or currently unassigned). 1656 */ 1657 scope = (gid.gid_prefix >> TAVOR_MCG_SCOPE_SHIFT) & 1658 TAVOR_MCG_SCOPE_MASK; 1659 if (!((scope == TAVOR_MCG_SCOPE_LINKLOC) || 1660 (scope == TAVOR_MCG_SCOPE_SITELOC) || 1661 (scope == TAVOR_MCG_SCOPE_ORGLOC) || 1662 (scope == TAVOR_MCG_SCOPE_GLOBAL))) { 1663 TNF_PROBE_1(tavor_mgid_is_valid_invscope_fail, TAVOR_TNF_ERROR, 1664 "", tnf_uint, scope, scope); 1665 TAVOR_TNF_EXIT(tavor_mgid_is_valid); 1666 return (0); 1667 } 1668 1669 /* 1670 * If it passes all of the above checks, then we will consider it 1671 * a valid multicast GID. 1672 */ 1673 TAVOR_TNF_EXIT(tavor_mgid_is_valid); 1674 return (1); 1675 } 1676 1677 1678 /* 1679 * tavor_mlid_is_valid() 1680 * Context: Can be called from interrupt or base context. 1681 */ 1682 static int 1683 tavor_mlid_is_valid(ib_lid_t lid) 1684 { 1685 TAVOR_TNF_ENTER(tavor_mlid_is_valid); 1686 1687 /* 1688 * According to IBA 1.1 specification (section 4.1.1) a valid 1689 * "multicast DLID" must be between 0xC000 and 0xFFFE. 1690 */ 1691 if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) { 1692 TNF_PROBE_1(tavor_mlid_is_valid_invdlid_fail, TAVOR_TNF_ERROR, 1693 "", tnf_uint, mlid, lid); 1694 TAVOR_TNF_EXIT(tavor_mlid_is_valid); 1695 return (0); 1696 } 1697 1698 TAVOR_TNF_EXIT(tavor_mlid_is_valid); 1699 return (1); 1700 } 1701 1702 1703 /* 1704 * tavor_pd_alloc() 1705 * Context: Can be called only from user or kernel context. 1706 */ 1707 int 1708 tavor_pd_alloc(tavor_state_t *state, tavor_pdhdl_t *pdhdl, uint_t sleepflag) 1709 { 1710 tavor_rsrc_t *rsrc; 1711 tavor_pdhdl_t pd; 1712 int status; 1713 1714 TAVOR_TNF_ENTER(tavor_pd_alloc); 1715 1716 /* 1717 * Allocate the software structure for tracking the protection domain 1718 * (i.e. the Tavor Protection Domain handle). By default each PD 1719 * structure will have a unique PD number assigned to it. All that 1720 * is necessary is for software to initialize the PD reference count 1721 * (to zero) and return success. 1722 */ 1723 status = tavor_rsrc_alloc(state, TAVOR_PDHDL, 1, sleepflag, &rsrc); 1724 if (status != DDI_SUCCESS) { 1725 TNF_PROBE_0(tavor_pd_alloc_rsrcalloc_fail, TAVOR_TNF_ERROR, ""); 1726 TAVOR_TNF_EXIT(tavor_pd_alloc); 1727 return (IBT_INSUFF_RESOURCE); 1728 } 1729 pd = (tavor_pdhdl_t)rsrc->tr_addr; 1730 1731 pd->pd_refcnt = 0; 1732 *pdhdl = pd; 1733 1734 TAVOR_TNF_EXIT(tavor_pd_alloc); 1735 return (DDI_SUCCESS); 1736 } 1737 1738 1739 /* 1740 * tavor_pd_free() 1741 * Context: Can be called only from user or kernel context. 1742 */ 1743 int 1744 tavor_pd_free(tavor_state_t *state, tavor_pdhdl_t *pdhdl) 1745 { 1746 tavor_rsrc_t *rsrc; 1747 tavor_pdhdl_t pd; 1748 1749 TAVOR_TNF_ENTER(tavor_pd_free); 1750 1751 /* 1752 * Pull all the necessary information from the Tavor Protection Domain 1753 * handle. This is necessary here because the resource for the 1754 * PD is going to be freed up as part of this operation. 1755 */ 1756 pd = *pdhdl; 1757 rsrc = pd->pd_rsrcp; 1758 1759 /* 1760 * Check the PD reference count. If the reference count is non-zero, 1761 * then it means that this protection domain is still referenced by 1762 * some memory region, queue pair, address handle, or other IB object 1763 * If it is non-zero, then return an error. Otherwise, free the 1764 * Tavor resource and return success. 1765 */ 1766 if (pd->pd_refcnt != 0) { 1767 TNF_PROBE_1(tavor_pd_free_refcnt_fail, TAVOR_TNF_ERROR, "", 1768 tnf_int, refcnt, pd->pd_refcnt); 1769 TAVOR_TNF_EXIT(tavor_pd_free); 1770 return (IBT_PD_IN_USE); 1771 } 1772 1773 /* Free the Tavor Protection Domain handle */ 1774 tavor_rsrc_free(state, &rsrc); 1775 1776 /* Set the pdhdl pointer to NULL and return success */ 1777 *pdhdl = (tavor_pdhdl_t)NULL; 1778 1779 TAVOR_TNF_EXIT(tavor_pd_free); 1780 return (DDI_SUCCESS); 1781 } 1782 1783 1784 /* 1785 * tavor_pd_refcnt_inc() 1786 * Context: Can be called from interrupt or base context. 1787 */ 1788 void 1789 tavor_pd_refcnt_inc(tavor_pdhdl_t pd) 1790 { 1791 /* Increment the protection domain's reference count */ 1792 mutex_enter(&pd->pd_lock); 1793 TNF_PROBE_1_DEBUG(tavor_pd_refcnt_inc, TAVOR_TNF_TRACE, "", 1794 tnf_uint, refcnt, pd->pd_refcnt); 1795 pd->pd_refcnt++; 1796 mutex_exit(&pd->pd_lock); 1797 1798 } 1799 1800 1801 /* 1802 * tavor_pd_refcnt_dec() 1803 * Context: Can be called from interrupt or base context. 1804 */ 1805 void 1806 tavor_pd_refcnt_dec(tavor_pdhdl_t pd) 1807 { 1808 /* Decrement the protection domain's reference count */ 1809 mutex_enter(&pd->pd_lock); 1810 pd->pd_refcnt--; 1811 TNF_PROBE_1_DEBUG(tavor_pd_refcnt_dec, TAVOR_TNF_TRACE, "", 1812 tnf_uint, refcnt, pd->pd_refcnt); 1813 mutex_exit(&pd->pd_lock); 1814 1815 } 1816 1817 1818 /* 1819 * tavor_port_query() 1820 * Context: Can be called only from user or kernel context. 1821 */ 1822 int 1823 tavor_port_query(tavor_state_t *state, uint_t port, ibt_hca_portinfo_t *pi) 1824 { 1825 sm_portinfo_t portinfo; 1826 sm_guidinfo_t guidinfo; 1827 sm_pkey_table_t pkeytable; 1828 ib_gid_t *sgid; 1829 uint_t sgid_max, pkey_max, tbl_size; 1830 int i, j, indx, status; 1831 1832 TAVOR_TNF_ENTER(tavor_port_query); 1833 1834 /* Validate that specified port number is legal */ 1835 if (!tavor_portnum_is_valid(state, port)) { 1836 TNF_PROBE_1(tavor_port_query_inv_portnum_fail, 1837 TAVOR_TNF_ERROR, "", tnf_uint, port, port); 1838 TAVOR_TNF_EXIT(tavor_port_query); 1839 return (IBT_HCA_PORT_INVALID); 1840 } 1841 1842 /* 1843 * We use the Tavor MAD_IFC command to post a GetPortInfo MAD 1844 * to the firmware (for the specified port number). This returns 1845 * a full PortInfo MAD (in "portinfo") which we subsequently 1846 * parse to fill in the "ibt_hca_portinfo_t" structure returned 1847 * to the IBTF. 1848 */ 1849 status = tavor_getportinfo_cmd_post(state, port, 1850 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 1851 if (status != TAVOR_CMD_SUCCESS) { 1852 cmn_err(CE_CONT, "Tavor: GetPortInfo (port %02d) command " 1853 "failed: %08x\n", port, status); 1854 TNF_PROBE_1(tavor_port_query_getportinfo_cmd_fail, 1855 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 1856 TAVOR_TNF_EXIT(tavor_port_query); 1857 return (ibc_get_ci_failure(0)); 1858 } 1859 1860 /* 1861 * Parse the PortInfo MAD and fill in the IBTF structure 1862 */ 1863 pi->p_base_lid = portinfo.LID; 1864 pi->p_qkey_violations = portinfo.Q_KeyViolations; 1865 pi->p_pkey_violations = portinfo.P_KeyViolations; 1866 pi->p_sm_sl = portinfo.MasterSMSL; 1867 pi->p_sm_lid = portinfo.MasterSMLID; 1868 pi->p_linkstate = portinfo.PortState; 1869 pi->p_port_num = portinfo.LocalPortNum; 1870 pi->p_phys_state = portinfo.PortPhysicalState; 1871 pi->p_width_supported = portinfo.LinkWidthSupported; 1872 pi->p_width_enabled = portinfo.LinkWidthEnabled; 1873 pi->p_width_active = portinfo.LinkWidthActive; 1874 pi->p_speed_supported = portinfo.LinkSpeedSupported; 1875 pi->p_speed_enabled = portinfo.LinkSpeedEnabled; 1876 pi->p_speed_active = portinfo.LinkSpeedActive; 1877 pi->p_mtu = portinfo.MTUCap; 1878 pi->p_lmc = portinfo.LMC; 1879 pi->p_max_vl = portinfo.VLCap; 1880 pi->p_subnet_timeout = portinfo.SubnetTimeOut; 1881 pi->p_msg_sz = ((uint32_t)1 << TAVOR_QP_LOG_MAX_MSGSZ); 1882 tbl_size = state->ts_cfg_profile->cp_log_max_gidtbl; 1883 pi->p_sgid_tbl_sz = (1 << tbl_size); 1884 tbl_size = state->ts_cfg_profile->cp_log_max_pkeytbl; 1885 pi->p_pkey_tbl_sz = (1 << tbl_size); 1886 1887 /* 1888 * Convert InfiniBand-defined port capability flags to the format 1889 * specified by the IBTF 1890 */ 1891 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM) 1892 pi->p_capabilities |= IBT_PORT_CAP_SM; 1893 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED) 1894 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED; 1895 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD) 1896 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL; 1897 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD) 1898 pi->p_capabilities |= IBT_PORT_CAP_DM; 1899 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD) 1900 pi->p_capabilities |= IBT_PORT_CAP_VENDOR; 1901 1902 /* 1903 * Fill in the SGID table. Since the only access to the Tavor 1904 * GID tables is through the firmware's MAD_IFC interface, we 1905 * post as many GetGUIDInfo MADs as necessary to read in the entire 1906 * contents of the SGID table (for the specified port). Note: The 1907 * GetGUIDInfo command only gets eight GUIDs per operation. These 1908 * GUIDs are then appended to the GID prefix for the port (from the 1909 * GetPortInfo above) to form the entire SGID table. 1910 */ 1911 for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) { 1912 status = tavor_getguidinfo_cmd_post(state, port, i >> 3, 1913 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &guidinfo); 1914 if (status != TAVOR_CMD_SUCCESS) { 1915 cmn_err(CE_CONT, "Tavor: GetGUIDInfo (port %02d) " 1916 "command failed: %08x\n", port, status); 1917 TNF_PROBE_1(tavor_port_query_getguidinfo_cmd_fail, 1918 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 1919 TAVOR_TNF_EXIT(tavor_port_query); 1920 return (ibc_get_ci_failure(0)); 1921 } 1922 1923 /* Figure out how many of the entries are valid */ 1924 sgid_max = min((pi->p_sgid_tbl_sz - i), 8); 1925 for (j = 0; j < sgid_max; j++) { 1926 indx = (i + j); 1927 sgid = &pi->p_sgid_tbl[indx]; 1928 sgid->gid_prefix = portinfo.GidPrefix; 1929 sgid->gid_guid = guidinfo.GUIDBlocks[j]; 1930 } 1931 } 1932 1933 /* 1934 * Fill in the PKey table. Just as for the GID tables above, the 1935 * only access to the Tavor PKey tables is through the firmware's 1936 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary 1937 * to read in the entire contents of the PKey table (for the specified 1938 * port). Note: The GetPKeyTable command only gets 32 PKeys per 1939 * operation. 1940 */ 1941 for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) { 1942 status = tavor_getpkeytable_cmd_post(state, port, i, 1943 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &pkeytable); 1944 if (status != TAVOR_CMD_SUCCESS) { 1945 cmn_err(CE_CONT, "Tavor: GetPKeyTable (port %02d) " 1946 "command failed: %08x\n", port, status); 1947 TNF_PROBE_1(tavor_port_query_getpkeytable_cmd_fail, 1948 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 1949 TAVOR_TNF_EXIT(tavor_port_query); 1950 return (ibc_get_ci_failure(0)); 1951 } 1952 1953 /* Figure out how many of the entries are valid */ 1954 pkey_max = min((pi->p_pkey_tbl_sz - i), 32); 1955 for (j = 0; j < pkey_max; j++) { 1956 indx = (i + j); 1957 pi->p_pkey_tbl[indx] = pkeytable.P_KeyTableBlocks[j]; 1958 } 1959 } 1960 1961 TAVOR_TNF_EXIT(tavor_port_query); 1962 return (DDI_SUCCESS); 1963 } 1964 1965 1966 /* 1967 * tavor_port_modify() 1968 * Context: Can be called only from user or kernel context. 1969 */ 1970 /* ARGSUSED */ 1971 int 1972 tavor_port_modify(tavor_state_t *state, uint8_t port, 1973 ibt_port_modify_flags_t flags, uint8_t init_type) 1974 { 1975 sm_portinfo_t portinfo; 1976 uint32_t capmask, reset_qkey; 1977 int status; 1978 1979 TAVOR_TNF_ENTER(tavor_port_modify); 1980 1981 /* 1982 * Return an error if either of the unsupported flags are set 1983 */ 1984 if ((flags & IBT_PORT_SHUTDOWN) || 1985 (flags & IBT_PORT_SET_INIT_TYPE)) { 1986 TNF_PROBE_1(tavor_port_modify_inv_flags_fail, 1987 TAVOR_TNF_ERROR, "", tnf_uint, flags, flags); 1988 TAVOR_TNF_EXIT(tavor_port_modify); 1989 return (IBT_NOT_SUPPORTED); 1990 } 1991 1992 /* 1993 * Determine whether we are trying to reset the QKey counter 1994 */ 1995 reset_qkey = (flags & IBT_PORT_RESET_QKEY) ? 1 : 0; 1996 1997 /* Validate that specified port number is legal */ 1998 if (!tavor_portnum_is_valid(state, port)) { 1999 TNF_PROBE_1(tavor_port_modify_inv_portnum_fail, 2000 TAVOR_TNF_ERROR, "", tnf_uint, port, port); 2001 TAVOR_TNF_EXIT(tavor_port_modify); 2002 return (IBT_HCA_PORT_INVALID); 2003 } 2004 2005 /* 2006 * Use the Tavor MAD_IFC command to post a GetPortInfo MAD to the 2007 * firmware (for the specified port number). This returns a full 2008 * PortInfo MAD (in "portinfo") from which we pull the current 2009 * capability mask. We then modify the capability mask as directed 2010 * by the "pmod_flags" field, and write the updated capability mask 2011 * using the Tavor SET_IB command (below). 2012 */ 2013 status = tavor_getportinfo_cmd_post(state, port, 2014 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &portinfo); 2015 if (status != TAVOR_CMD_SUCCESS) { 2016 TNF_PROBE_1(tavor_port_modify_getportinfo_cmd_fail, 2017 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 2018 TAVOR_TNF_EXIT(tavor_port_modify); 2019 return (ibc_get_ci_failure(0)); 2020 } 2021 2022 /* 2023 * Convert InfiniBand-defined port capability flags to the format 2024 * specified by the IBTF. Specifically, we modify the capability 2025 * mask based on the specified values. 2026 */ 2027 capmask = portinfo.CapabilityMask; 2028 2029 if (flags & IBT_PORT_RESET_SM) 2030 capmask &= ~SM_CAP_MASK_IS_SM; 2031 else if (flags & IBT_PORT_SET_SM) 2032 capmask |= SM_CAP_MASK_IS_SM; 2033 2034 if (flags & IBT_PORT_RESET_SNMP) 2035 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD; 2036 else if (flags & IBT_PORT_SET_SNMP) 2037 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD; 2038 2039 if (flags & IBT_PORT_RESET_DEVMGT) 2040 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD; 2041 else if (flags & IBT_PORT_SET_DEVMGT) 2042 capmask |= SM_CAP_MASK_IS_DM_SUPPD; 2043 2044 if (flags & IBT_PORT_RESET_VENDOR) 2045 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD; 2046 else if (flags & IBT_PORT_SET_VENDOR) 2047 capmask |= SM_CAP_MASK_IS_VM_SUPPD; 2048 2049 /* 2050 * Use the Tavor SET_IB command to update the capability mask and 2051 * (possibly) reset the QKey violation counter for the specified port. 2052 * Note: In general, this operation shouldn't fail. If it does, then 2053 * it is an indication that something (probably in HW, but maybe in 2054 * SW) has gone seriously wrong. 2055 */ 2056 status = tavor_set_ib_cmd_post(state, capmask, port, reset_qkey, 2057 TAVOR_SLEEPFLAG_FOR_CONTEXT()); 2058 if (status != TAVOR_CMD_SUCCESS) { 2059 TAVOR_WARNING(state, "failed to modify port capabilities"); 2060 cmn_err(CE_CONT, "Tavor: SET_IB (port %02d) command failed: " 2061 "%08x\n", port, status); 2062 TNF_PROBE_1(tavor_port_modify_set_ib_cmd_fail, 2063 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status); 2064 TAVOR_TNF_EXIT(tavor_port_modify); 2065 return (ibc_get_ci_failure(0)); 2066 } 2067 2068 TAVOR_TNF_EXIT(tavor_port_modify); 2069 return (DDI_SUCCESS); 2070 } 2071 2072 2073 /* 2074 * tavor_set_addr_path() 2075 * Context: Can be called from interrupt or base context. 2076 * 2077 * Note: This routine is used for two purposes. It is used to fill in the 2078 * Tavor UDAV fields, and it is used to fill in the address path information 2079 * for QPs. Because the two Tavor structures are similar, common fields can 2080 * be filled in here. Because they are slightly different, however, we pass 2081 * an additional flag to indicate which type is being filled. 2082 */ 2083 int 2084 tavor_set_addr_path(tavor_state_t *state, ibt_adds_vect_t *av, 2085 tavor_hw_addr_path_t *path, uint_t type, tavor_qphdl_t qp) 2086 { 2087 uint_t gidtbl_sz; 2088 2089 TAVOR_TNF_ENTER(tavor_set_addr_path); 2090 2091 path->ml_path = av->av_src_path; 2092 path->rlid = av->av_dlid; 2093 path->sl = av->av_srvl; 2094 2095 /* Port number only valid (in "av_port_num") if this is a UDAV */ 2096 if (type == TAVOR_ADDRPATH_UDAV) { 2097 path->portnum = av->av_port_num; 2098 } 2099 2100 /* 2101 * Validate (and fill in) static rate. 2102 * 2103 * The stat_rate_sup is used to decide how to set the rate and 2104 * if it is zero, the driver uses the old interface. 2105 */ 2106 if (state->ts_devlim.stat_rate_sup) { 2107 if (av->av_srate == IBT_SRATE_20) { 2108 path->max_stat_rate = 0; /* 4x@DDR injection rate */ 2109 } else if (av->av_srate == IBT_SRATE_5) { 2110 path->max_stat_rate = 3; /* 1x@DDR injection rate */ 2111 } else if (av->av_srate == IBT_SRATE_10) { 2112 path->max_stat_rate = 2; /* 4x@SDR injection rate */ 2113 } else if (av->av_srate == IBT_SRATE_2) { 2114 path->max_stat_rate = 1; /* 1x@SDR injection rate */ 2115 } else if (av->av_srate == IBT_SRATE_NOT_SPECIFIED) { 2116 path->max_stat_rate = 0; /* Max */ 2117 } else { 2118 TNF_PROBE_1(tavor_set_addr_path_inv_srate_fail, 2119 TAVOR_TNF_ERROR, "", tnf_uint, srate, av->av_srate); 2120 TAVOR_TNF_EXIT(tavor_set_addr_path); 2121 return (IBT_STATIC_RATE_INVALID); 2122 } 2123 } else { 2124 if (av->av_srate == IBT_SRATE_10) { 2125 path->max_stat_rate = 0; /* 4x@SDR injection rate */ 2126 } else if (av->av_srate == IBT_SRATE_2) { 2127 path->max_stat_rate = 1; /* 1x@SDR injection rate */ 2128 } else if (av->av_srate == IBT_SRATE_NOT_SPECIFIED) { 2129 path->max_stat_rate = 0; /* Max */ 2130 } else { 2131 TNF_PROBE_1(tavor_set_addr_path_inv_srate_fail, 2132 TAVOR_TNF_ERROR, "", tnf_uint, srate, av->av_srate); 2133 TAVOR_TNF_EXIT(tavor_set_addr_path); 2134 return (IBT_STATIC_RATE_INVALID); 2135 } 2136 } 2137 2138 /* 2139 * If this is a QP operation save asoft copy. 2140 */ 2141 if (qp) { 2142 qp->qp_save_srate = av->av_srate; 2143 } 2144 2145 /* If "grh" flag is set, then check for valid SGID index too */ 2146 gidtbl_sz = (1 << state->ts_devlim.log_max_gid); 2147 if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) { 2148 TNF_PROBE_1(tavor_set_addr_path_inv_sgid_ix_fail, 2149 TAVOR_TNF_ERROR, "", tnf_uint, sgid_ix, av->av_sgid_ix); 2150 TAVOR_TNF_EXIT(tavor_set_addr_path); 2151 return (IBT_SGID_INVALID); 2152 } 2153 2154 /* 2155 * Fill in all "global" values regardless of the value in the GRH 2156 * flag. Because "grh" is not set unless "av_send_grh" is set, the 2157 * hardware will ignore the other "global" values as necessary. Note: 2158 * SW does this here to enable later query operations to return 2159 * exactly the same params that were passed when the addr path was 2160 * last written. 2161 */ 2162 path->grh = av->av_send_grh; 2163 if (type == TAVOR_ADDRPATH_QP) { 2164 path->mgid_index = av->av_sgid_ix; 2165 } else { 2166 /* 2167 * For Tavor UDAV, the "mgid_index" field is the index into 2168 * a combined table (not a per-port table). So some extra 2169 * calculations are necessary. 2170 */ 2171 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) + 2172 av->av_sgid_ix; 2173 } 2174 path->flow_label = av->av_flow; 2175 path->tclass = av->av_tclass; 2176 path->hop_limit = av->av_hop; 2177 path->rgid_h = av->av_dgid.gid_prefix; 2178 2179 /* 2180 * According to Tavor PRM, the (31:0) part of rgid_l must be set to 2181 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we 2182 * only need to do it for UDAV's. So we enforce that here. 2183 * 2184 * NOTE: The entire 64 bits worth of GUID info is actually being 2185 * preserved (for UDAVs) by the callers of this function 2186 * (tavor_ah_alloc() and tavor_ah_modify()) and as long as the 2187 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are 2188 * "don't care". 2189 */ 2190 if ((path->grh) || (type == TAVOR_ADDRPATH_QP)) { 2191 path->rgid_l = av->av_dgid.gid_guid; 2192 } else { 2193 path->rgid_l = 0x2; 2194 } 2195 2196 TAVOR_TNF_EXIT(tavor_set_addr_path); 2197 return (DDI_SUCCESS); 2198 } 2199 2200 2201 /* 2202 * tavor_get_addr_path() 2203 * Context: Can be called from interrupt or base context. 2204 * 2205 * Note: Just like tavor_set_addr_path() above, this routine is used for two 2206 * purposes. It is used to read in the Tavor UDAV fields, and it is used to 2207 * read in the address path information for QPs. Because the two Tavor 2208 * structures are similar, common fields can be read in here. But because 2209 * they are slightly different, we pass an additional flag to indicate which 2210 * type is being read. 2211 */ 2212 void 2213 tavor_get_addr_path(tavor_state_t *state, tavor_hw_addr_path_t *path, 2214 ibt_adds_vect_t *av, uint_t type, tavor_qphdl_t qp) 2215 { 2216 uint_t gidtbl_sz; 2217 2218 av->av_src_path = path->ml_path; 2219 av->av_port_num = path->portnum; 2220 av->av_dlid = path->rlid; 2221 av->av_srvl = path->sl; 2222 2223 /* 2224 * Set "av_ipd" value from max_stat_rate. 2225 */ 2226 if (qp) { 2227 /* 2228 * If a QP operation use the soft copy 2229 */ 2230 av->av_srate = qp->qp_save_srate; 2231 } else { 2232 /* 2233 * The stat_rate_sup is used to decide how the srate value is 2234 * set and 2235 * if it is zero, the driver uses the old interface. 2236 */ 2237 if (state->ts_devlim.stat_rate_sup) { 2238 if (path->max_stat_rate == 0) { 2239 av->av_srate = IBT_SRATE_20; /* 4x@DDR rate */ 2240 } else if (path->max_stat_rate == 1) { 2241 av->av_srate = IBT_SRATE_2; /* 1x@SDR rate */ 2242 } else if (path->max_stat_rate == 2) { 2243 av->av_srate = IBT_SRATE_10; /* 4x@SDR rate */ 2244 } else if (path->max_stat_rate == 3) { 2245 av->av_srate = IBT_SRATE_5; /* 1xDDR rate */ 2246 } 2247 } else { 2248 if (path->max_stat_rate == 0) { 2249 av->av_srate = IBT_SRATE_10; /* 4x@SDR rate */ 2250 } else if (path->max_stat_rate == 1) { 2251 av->av_srate = IBT_SRATE_2; /* 1x@SDR rate */ 2252 } 2253 } 2254 } 2255 2256 /* 2257 * Extract all "global" values regardless of the value in the GRH 2258 * flag. Because "av_send_grh" is set only if "grh" is set, software 2259 * knows to ignore the other "global" values as necessary. Note: SW 2260 * does it this way to enable these query operations to return exactly 2261 * the same params that were passed when the addr path was last written. 2262 */ 2263 av->av_send_grh = path->grh; 2264 if (type == TAVOR_ADDRPATH_QP) { 2265 av->av_sgid_ix = path->mgid_index; 2266 } else { 2267 /* 2268 * For Tavor UDAV, the "mgid_index" field is the index into 2269 * a combined table (not a per-port table). So some extra 2270 * calculations are necessary. 2271 */ 2272 gidtbl_sz = (1 << state->ts_devlim.log_max_gid); 2273 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) * 2274 gidtbl_sz); 2275 } 2276 av->av_flow = path->flow_label; 2277 av->av_tclass = path->tclass; 2278 av->av_hop = path->hop_limit; 2279 av->av_dgid.gid_prefix = path->rgid_h; 2280 av->av_dgid.gid_guid = path->rgid_l; 2281 } 2282 2283 2284 /* 2285 * tavor_portnum_is_valid() 2286 * Context: Can be called from interrupt or base context. 2287 */ 2288 int 2289 tavor_portnum_is_valid(tavor_state_t *state, uint_t portnum) 2290 { 2291 uint_t max_port; 2292 2293 max_port = state->ts_cfg_profile->cp_num_ports; 2294 if ((portnum <= max_port) && (portnum != 0)) { 2295 return (1); 2296 } else { 2297 return (0); 2298 } 2299 } 2300 2301 2302 /* 2303 * tavor_pkeyindex_is_valid() 2304 * Context: Can be called from interrupt or base context. 2305 */ 2306 int 2307 tavor_pkeyindex_is_valid(tavor_state_t *state, uint_t pkeyindx) 2308 { 2309 uint_t max_pkeyindx; 2310 2311 max_pkeyindx = 1 << state->ts_cfg_profile->cp_log_max_pkeytbl; 2312 if (pkeyindx < max_pkeyindx) { 2313 return (1); 2314 } else { 2315 return (0); 2316 } 2317 } 2318 2319 2320 /* 2321 * tavor_queue_alloc() 2322 * Context: Can be called from interrupt or base context. 2323 */ 2324 int 2325 tavor_queue_alloc(tavor_state_t *state, tavor_qalloc_info_t *qa_info, 2326 uint_t sleepflag) 2327 { 2328 ddi_dma_attr_t dma_attr; 2329 int (*callback)(caddr_t); 2330 uint64_t realsize, alloc_mask; 2331 uint_t dma_xfer_mode, type; 2332 int flag, status; 2333 2334 TAVOR_TNF_ENTER(tavor_queue_alloc); 2335 2336 /* Set the callback flag appropriately */ 2337 callback = (sleepflag == TAVOR_SLEEP) ? DDI_DMA_SLEEP : 2338 DDI_DMA_DONTWAIT; 2339 2340 /* 2341 * Initialize many of the default DMA attributes. Then set additional 2342 * alignment restrictions as necessary for the queue memory. Also 2343 * respect the configured value for IOMMU bypass 2344 */ 2345 tavor_dma_attr_init(&dma_attr); 2346 dma_attr.dma_attr_align = qa_info->qa_bind_align; 2347 type = state->ts_cfg_profile->cp_iommu_bypass; 2348 if (type == TAVOR_BINDMEM_BYPASS) { 2349 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2350 } 2351 2352 /* Allocate a DMA handle */ 2353 status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, callback, NULL, 2354 &qa_info->qa_dmahdl); 2355 if (status != DDI_SUCCESS) { 2356 TNF_PROBE_0(tavor_queue_alloc_dmahdl_fail, TAVOR_TNF_ERROR, ""); 2357 TAVOR_TNF_EXIT(tavor_queue_alloc); 2358 return (DDI_FAILURE); 2359 } 2360 2361 /* 2362 * Determine the amount of memory to allocate, depending on the values 2363 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying 2364 * to solve here is that allocating a DMA handle with IOMMU bypass 2365 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments 2366 * that are less than the page size. Since we may need stricter 2367 * alignments on the memory allocated by ddi_dma_mem_alloc() (e.g. in 2368 * Tavor QP work queue memory allocation), we use the following method 2369 * to calculate how much additional memory to request, and we enforce 2370 * our own alignment on the allocated result. 2371 */ 2372 alloc_mask = qa_info->qa_alloc_align - 1; 2373 if (qa_info->qa_bind_align == qa_info->qa_alloc_align) { 2374 realsize = qa_info->qa_size; 2375 } else { 2376 realsize = qa_info->qa_size + alloc_mask; 2377 } 2378 2379 /* 2380 * If we are to allocate the queue from system memory, then use 2381 * ddi_dma_mem_alloc() to find the space. Otherwise, if we are to 2382 * allocate the queue from locally-attached DDR memory, then use the 2383 * vmem allocator to find the space. In either case, return a pointer 2384 * to the memory range allocated (including any necessary alignment 2385 * adjustments), the "real" memory pointer, the "real" size, and a 2386 * ddi_acc_handle_t to use when reading from/writing to the memory. 2387 */ 2388 if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_NORMAL) { 2389 2390 /* 2391 * Determine whether to map STREAMING or CONSISTENT. This is 2392 * based on the value set in the configuration profile at 2393 * attach time. 2394 */ 2395 dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent; 2396 2397 /* Allocate system memory for the queue */ 2398 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize, 2399 &state->ts_reg_accattr, dma_xfer_mode, callback, NULL, 2400 (caddr_t *)&qa_info->qa_buf_real, 2401 (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl); 2402 if (status != DDI_SUCCESS) { 2403 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2404 TNF_PROBE_0(tavor_queue_alloc_dma_memalloc_fail, 2405 TAVOR_TNF_ERROR, ""); 2406 TAVOR_TNF_EXIT(tavor_queue_alloc); 2407 return (DDI_FAILURE); 2408 } 2409 2410 /* 2411 * Save temporary copy of the real pointer. (This may be 2412 * modified in the last step below). 2413 */ 2414 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2415 2416 } else if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_USERLAND) { 2417 2418 /* Allocate userland mappable memory for the queue */ 2419 flag = (sleepflag == TAVOR_SLEEP) ? DDI_UMEM_SLEEP : 2420 DDI_UMEM_NOSLEEP; 2421 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag, 2422 &qa_info->qa_umemcookie); 2423 if (qa_info->qa_buf_real == NULL) { 2424 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2425 TNF_PROBE_0(tavor_queue_alloc_umem_fail, 2426 TAVOR_TNF_ERROR, ""); 2427 TAVOR_TNF_EXIT(tavor_queue_alloc); 2428 return (DDI_FAILURE); 2429 } 2430 2431 /* 2432 * Save temporary copy of the real pointer. (This may be 2433 * modified in the last step below). 2434 */ 2435 qa_info->qa_buf_aligned = qa_info->qa_buf_real; 2436 2437 } else { /* TAVOR_QUEUE_LOCATION_INDDR */ 2438 2439 /* Allocate DDR memory for the queue */ 2440 flag = (sleepflag == TAVOR_SLEEP) ? VM_SLEEP : VM_NOSLEEP; 2441 qa_info->qa_buf_real = (uint32_t *)vmem_xalloc( 2442 state->ts_ddrvmem, realsize, qa_info->qa_bind_align, 0, 0, 2443 NULL, NULL, flag); 2444 if (qa_info->qa_buf_real == NULL) { 2445 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2446 TNF_PROBE_0(tavor_queue_alloc_vmxa_fail, 2447 TAVOR_TNF_ERROR, ""); 2448 TAVOR_TNF_EXIT(tavor_queue_alloc); 2449 return (DDI_FAILURE); 2450 } 2451 2452 /* 2453 * Since "qa_buf_real" will be a PCI address (the offset into 2454 * the DDR memory), we first need to do some calculations to 2455 * convert it to its kernel mapped address. (Note: This may 2456 * be modified again below, when any additional "alloc" 2457 * alignment constraint is applied). 2458 */ 2459 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t) 2460 state->ts_reg_ddr_baseaddr) + ((uintptr_t) 2461 qa_info->qa_buf_real - state->ts_ddr.ddr_baseaddr)); 2462 qa_info->qa_buf_realsz = realsize; 2463 qa_info->qa_acchdl = state->ts_reg_ddrhdl; 2464 } 2465 2466 /* 2467 * The last step is to ensure that the final address ("qa_buf_aligned") 2468 * has the appropriate "alloc" alignment restriction applied to it 2469 * (if necessary). 2470 */ 2471 if (qa_info->qa_bind_align != qa_info->qa_alloc_align) { 2472 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t) 2473 qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask); 2474 } 2475 2476 TAVOR_TNF_EXIT(tavor_queue_alloc); 2477 return (DDI_SUCCESS); 2478 } 2479 2480 2481 /* 2482 * tavor_queue_free() 2483 * Context: Can be called from interrupt or base context. 2484 */ 2485 void 2486 tavor_queue_free(tavor_state_t *state, tavor_qalloc_info_t *qa_info) 2487 { 2488 TAVOR_TNF_ENTER(tavor_queue_free); 2489 2490 /* 2491 * Depending on how (i.e. from where) we allocated the memory for 2492 * this queue, we choose the appropriate method for releasing the 2493 * resources. 2494 */ 2495 if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_NORMAL) { 2496 2497 ddi_dma_mem_free(&qa_info->qa_acchdl); 2498 2499 } else if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_USERLAND) { 2500 2501 ddi_umem_free(qa_info->qa_umemcookie); 2502 2503 } else { /* TAVOR_QUEUE_LOCATION_INDDR */ 2504 2505 vmem_xfree(state->ts_ddrvmem, qa_info->qa_buf_real, 2506 qa_info->qa_buf_realsz); 2507 } 2508 2509 /* Always free the dma handle */ 2510 ddi_dma_free_handle(&qa_info->qa_dmahdl); 2511 2512 TAVOR_TNF_EXIT(tavor_queue_free); 2513 } 2514 2515 2516 /* 2517 * tavor_dmaattr_get() 2518 * Context: Can be called from interrupt or base context. 2519 */ 2520 void 2521 tavor_dma_attr_init(ddi_dma_attr_t *dma_attr) 2522 { 2523 dma_attr->dma_attr_version = DMA_ATTR_V0; 2524 dma_attr->dma_attr_addr_lo = 0; 2525 dma_attr->dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFFull; 2526 dma_attr->dma_attr_count_max = 0xFFFFFFFFFFFFFFFFull; 2527 dma_attr->dma_attr_align = 1; 2528 dma_attr->dma_attr_burstsizes = 0x3FF; 2529 dma_attr->dma_attr_minxfer = 1; 2530 dma_attr->dma_attr_maxxfer = 0xFFFFFFFFFFFFFFFFull; 2531 dma_attr->dma_attr_seg = 0xFFFFFFFFFFFFFFFFull; 2532 dma_attr->dma_attr_sgllen = 0x7FFFFFFF; 2533 dma_attr->dma_attr_granular = 1; 2534 dma_attr->dma_attr_flags = 0; 2535 }