1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * hermon_wr.c 28 * Hermon Work Request Processing Routines 29 * 30 * Implements all the routines necessary to provide the PostSend(), 31 * PostRecv() and PostSRQ() verbs. Also contains all the code 32 * necessary to implement the Hermon WRID tracking mechanism. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/conf.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/modctl.h> 40 #include <sys/avl.h> 41 42 #include <sys/ib/adapters/hermon/hermon.h> 43 44 static uint32_t hermon_wr_get_immediate(ibt_send_wr_t *wr); 45 static int hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr); 46 static int hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp, 47 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size); 48 static int hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp, 49 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size); 50 static void hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp); 51 static int hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp, 52 ibt_recv_wr_t *wr, uint64_t *desc); 53 static int hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq, 54 ibt_recv_wr_t *wr, uint64_t *desc); 55 static hermon_workq_avl_t *hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, 56 uint_t send_or_recv); 57 static void hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl); 58 static void hermon_cq_workq_remove(hermon_cqhdl_t cq, 59 hermon_workq_avl_t *wqavl); 60 61 static ibt_wr_ds_t null_sgl = { 0, 0x00000100, 0 }; 62 63 /* 64 * Add ability to try to debug RDMA_READ/RDMA_WRITE failures. 65 * 66 * 0x1 - print rkey used during post_send 67 * 0x2 - print sgls used during post_send 68 * 0x4 - print FMR comings and goings 69 */ 70 int hermon_rdma_debug = 0x0; 71 72 static int 73 hermon_post_send_ud(hermon_state_t *state, hermon_qphdl_t qp, 74 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 75 { 76 hermon_hw_snd_wqe_ud_t *ud; 77 hermon_workq_hdr_t *wq; 78 hermon_ahhdl_t ah; 79 ibt_wr_rfci_send_t *rfci; 80 ibt_wr_init_send_t *is; 81 ibt_ud_dest_t *dest; 82 uint64_t *desc; 83 uint32_t desc_sz; 84 uint32_t signaled_dbd, solicited; 85 uint32_t head, tail, next_tail, qsize_msk; 86 uint32_t hdrmwqes; 87 uint32_t nopcode, fence, immed_data = 0; 88 hermon_hw_wqe_sgl_t *ds, *old_ds; 89 ibt_wr_ds_t *sgl; 90 int nds; 91 int i, j, last_ds, num_ds, status; 92 uint32_t *wqe_start; 93 int sectperwqe; 94 uint_t posted_cnt = 0; 95 int total_len, strong_order, fc_bits, cksum; 96 97 98 /* initialize the FMA retry loop */ 99 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num); 100 101 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 102 103 /* Grab the lock for the WRID list */ 104 membar_consumer(); 105 106 /* Save away some initial QP state */ 107 wq = qp->qp_sq_wqhdr; 108 qsize_msk = wq->wq_mask; 109 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 110 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2); 111 112 tail = wq->wq_tail; 113 head = wq->wq_head; 114 status = DDI_SUCCESS; 115 116 post_next: 117 /* 118 * Check for "queue full" condition. If the queue 119 * is already full, then no more WQEs can be posted. 120 * So break out, ring a doorbell (if necessary) and 121 * return an error 122 */ 123 if (wq->wq_full != 0) { 124 status = IBT_QP_FULL; 125 goto done; 126 } 127 128 next_tail = (tail + 1) & qsize_msk; 129 if (((tail + hdrmwqes) & qsize_msk) == head) { 130 wq->wq_full = 1; 131 } 132 133 desc = HERMON_QP_SQ_ENTRY(qp, tail); 134 135 nds = wr->wr_nds; 136 sgl = wr->wr_sgl; 137 num_ds = 0; 138 strong_order = 0; 139 fc_bits = 0; 140 cksum = 0; 141 142 /* 143 * Build a Send or Send_LSO WQE 144 */ 145 switch (wr->wr_opcode) { 146 case IBT_WRC_SEND_LSO: 147 if (wr->wr_trans != IBT_UD_SRV) { 148 status = IBT_QP_SRV_TYPE_INVALID; 149 goto done; 150 } 151 nopcode = HERMON_WQE_SEND_NOPCODE_LSO; 152 if (wr->wr_flags & IBT_WR_SEND_CKSUM) 153 cksum = 0x30; 154 if (wr->wr.ud_lso.lso_hdr_sz > 60) { 155 nopcode |= (1 << 6); /* ReRead bit must be set */ 156 } 157 dest = wr->wr.ud_lso.lso_ud_dest; 158 ah = (hermon_ahhdl_t)dest->ud_ah; 159 if (ah == NULL) { 160 status = IBT_AH_HDL_INVALID; 161 goto done; 162 } 163 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 164 sizeof (hermon_hw_snd_wqe_ctrl_t)); 165 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 166 sizeof (hermon_hw_snd_wqe_ud_t)); 167 HERMON_WQE_BUILD_UD(qp, ud, ah, dest); 168 169 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf; 170 if ((uintptr_t)ds + total_len + (nds * 16) > 171 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) { 172 status = IBT_QP_SGL_LEN_INVALID; 173 goto done; 174 } 175 old_ds = ds; 176 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)old_ds + 1, 177 wr->wr.ud_lso.lso_hdr_sz); 178 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len); 179 i = 0; 180 break; 181 182 case IBT_WRC_SEND: 183 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 184 if (qp->qp_serv_type == HERMON_QP_UD) { 185 if (wr->wr_trans != IBT_UD_SRV) { 186 status = IBT_QP_SRV_TYPE_INVALID; 187 goto done; 188 } 189 if (wr->wr_flags & IBT_WR_SEND_CKSUM) 190 cksum = 0x30; 191 dest = wr->wr.ud.udwr_dest; 192 } else if (qp->qp_serv_type == HERMON_QP_RFCI) { 193 if (wr->wr_trans != IBT_RFCI_SRV) { 194 status = IBT_QP_SRV_TYPE_INVALID; 195 goto done; 196 } 197 rfci = &wr->wr.fc.rfci_send; 198 if ((wr->wr_flags & IBT_WR_SEND_FC_CRC) != 0) { 199 nopcode |= (rfci->rfci_eof << 16); 200 fc_bits = 0x40; /* set FCRC */ 201 } 202 dest = rfci->rfci_dest; 203 } else { 204 status = IBT_QP_OP_TYPE_INVALID; 205 goto done; 206 } 207 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 208 /* "|=" changes 0xa to 0xb without touching FCEOF */ 209 nopcode |= HERMON_WQE_SEND_NOPCODE_SENDI; 210 immed_data = wr->wr.ud.udwr_immed; 211 } 212 ah = (hermon_ahhdl_t)dest->ud_ah; 213 if (ah == NULL) { 214 status = IBT_AH_HDL_INVALID; 215 goto done; 216 } 217 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 218 sizeof (hermon_hw_snd_wqe_ctrl_t)); 219 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 220 sizeof (hermon_hw_snd_wqe_ud_t)); 221 HERMON_WQE_BUILD_UD(qp, ud, ah, dest); 222 i = 0; 223 break; 224 225 case IBT_WRC_INIT_SEND_FCMD: 226 if (qp->qp_serv_type != HERMON_QP_FCMND) { 227 status = IBT_QP_OP_TYPE_INVALID; 228 goto done; 229 } 230 if (wr->wr_trans != IBT_FCMD_SRV) { 231 status = IBT_QP_SRV_TYPE_INVALID; 232 goto done; 233 } 234 nopcode = HERMON_WQE_FCP_OPCODE_INIT_AND_SEND; 235 is = wr->wr.fc.fc_is; 236 dest = is->is_ctl.fc_dest; 237 ah = (hermon_ahhdl_t)dest->ud_ah; 238 if (ah == NULL) { 239 status = IBT_AH_HDL_INVALID; 240 goto done; 241 } 242 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 243 sizeof (hermon_hw_snd_wqe_ctrl_t)); 244 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 245 sizeof (hermon_hw_snd_wqe_ud_t)); 246 HERMON_WQE_BUILD_UD(qp, ud, ah, dest); 247 old_ds = ds; 248 /* move ds beyond the FCP-3 Init Segment */ 249 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + 0x10); 250 i = 0; 251 break; 252 253 case IBT_WRC_FAST_REG_PMR: 254 { 255 hermon_hw_snd_wqe_frwr_t *frwr; 256 257 if (qp->qp_serv_type != HERMON_QP_FCMND) { 258 status = IBT_QP_OP_TYPE_INVALID; 259 goto done; 260 } 261 if (wr->wr_trans != IBT_FCMD_SRV) { 262 status = IBT_QP_SRV_TYPE_INVALID; 263 goto done; 264 } 265 nopcode = HERMON_WQE_SEND_NOPCODE_FRWR; 266 frwr = (hermon_hw_snd_wqe_frwr_t *)((uintptr_t)desc + 267 sizeof (hermon_hw_snd_wqe_ctrl_t)); 268 HERMON_WQE_BUILD_FRWR(qp, frwr, wr->wr.fc.reg_pmr); 269 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)frwr + 270 sizeof (hermon_hw_snd_wqe_frwr_t)); 271 nds = 0; 272 strong_order = 0x80; 273 break; 274 } 275 276 #if 0 277 /* firmware does not support this */ 278 case IBT_WRC_LOCAL_INVALIDATE: 279 { 280 hermon_hw_snd_wqe_local_inv_t *li; 281 282 if (qp->qp_serv_type != HERMON_QP_FCMND) { 283 status = IBT_QP_OP_TYPE_INVALID; 284 goto done; 285 } 286 if (wr->wr_trans != IBT_FCMD_SRV) { 287 status = IBT_QP_SRV_TYPE_INVALID; 288 goto done; 289 } 290 nopcode = HERMON_WQE_SEND_NOPCODE_LCL_INV; 291 li = (hermon_hw_snd_wqe_local_inv_t *)((uintptr_t)desc + 292 sizeof (hermon_hw_snd_wqe_ctrl_t)); 293 HERMON_WQE_BUILD_LI(qp, li, wr->wr.fc.li); 294 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)li + 295 sizeof (hermon_hw_snd_wqe_local_inv_t)); 296 nds = 0; 297 strong_order = 0x80; 298 break; 299 } 300 #endif 301 default: 302 status = IBT_QP_OP_TYPE_INVALID; 303 goto done; 304 } 305 306 if (nds > qp->qp_sq_sgl) { 307 status = IBT_QP_SGL_LEN_INVALID; 308 goto done; 309 } 310 for (last_ds = num_ds, j = i; j < nds; j++) { 311 if (sgl[j].ds_len != 0) 312 last_ds++; /* real last ds of wqe to fill */ 313 } 314 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4; 315 for (j = nds; --j >= i; ) { 316 if (sgl[j].ds_len == 0) { 317 continue; 318 } 319 320 /* 321 * Fill in the Data Segment(s) for the current WQE, using the 322 * information contained in the scatter-gather list of the 323 * work request. 324 */ 325 last_ds--; 326 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]); 327 } 328 329 membar_producer(); 330 331 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 332 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss, 333 wr->wr.ud_lso.lso_hdr_sz); 334 } else if (wr->wr_opcode == IBT_WRC_INIT_SEND_FCMD) { 335 /* This sits in the STAMP, so must be set after setting SGL */ 336 HERMON_WQE_BUILD_FCP3_INIT(old_ds, is->is_ctl.fc_frame_ctrl, 337 is->is_cs_priority, is->is_tx_seq_id, is->is_fc_mtu, 338 is->is_dest_id, is->is_op, is->is_rem_exch, 339 is->is_exch_qp_idx); 340 341 /* The following will be used in HERMON_WQE_SET_CTRL_SEGMENT */ 342 /* SIT bit in FCP-3 ctrl segment */ 343 desc_sz |= (is->is_ctl.fc_frame_ctrl & IBT_FCTL_SIT) ? 0x80 : 0; 344 /* LS bit in FCP-3 ctrl segment */ 345 fc_bits |= (is->is_ctl.fc_frame_ctrl & IBT_FCTL_LAST_SEQ) ? 346 0x10000 : 0; 347 fc_bits |= ((is->is_ctl.fc_routing_ctrl & 0xF) << 20) | 348 (is->is_ctl.fc_seq_id << 24); 349 immed_data = is->is_ctl.fc_parameter; 350 } 351 352 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 353 354 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 355 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 0xC : 0; 356 357 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 0x2 : 0; 358 359 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, 360 solicited, signaled_dbd, cksum, qp, strong_order, fc_bits); 361 362 wq->wq_wrid[tail] = wr->wr_id; 363 364 tail = next_tail; 365 366 /* Update some of the state in the QP */ 367 wq->wq_tail = tail; 368 369 membar_producer(); 370 371 /* Now set the ownership bit and opcode (first dword). */ 372 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode); 373 374 posted_cnt++; 375 if (--num_wr > 0) { 376 /* do the invalidate of the headroom */ 377 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 378 (tail + hdrmwqes) & qsize_msk); 379 for (i = 16; i < sectperwqe; i += 16) { 380 wqe_start[i] = 0xFFFFFFFF; 381 } 382 383 wr++; 384 goto post_next; 385 } 386 done: 387 if (posted_cnt != 0) { 388 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 389 390 membar_producer(); 391 392 /* the FMA retry loop starts for Hermon doorbell register. */ 393 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 394 fm_status, fm_test_num); 395 396 HERMON_UAR_DOORBELL(state, uarhdl, 397 (uint64_t *)(void *)&state->hs_uar->send, 398 (uint64_t)qp->qp_ring); 399 400 /* the FMA retry loop ends. */ 401 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 402 fm_status, fm_test_num); 403 404 /* do the invalidate of the headroom */ 405 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 406 (tail + hdrmwqes) & qsize_msk); 407 for (i = 16; i < sectperwqe; i += 16) { 408 wqe_start[i] = 0xFFFFFFFF; 409 } 410 } 411 if (num_posted != NULL) 412 *num_posted = posted_cnt; 413 414 mutex_exit(&qp->qp_sq_lock); 415 416 return (status); 417 418 pio_error: 419 mutex_exit(&qp->qp_sq_lock); 420 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 421 return (ibc_get_ci_failure(0)); 422 } 423 424 static int 425 hermon_post_send_rc(hermon_state_t *state, hermon_qphdl_t qp, 426 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 427 { 428 uint64_t *desc; 429 hermon_workq_hdr_t *wq; 430 uint32_t desc_sz; 431 uint32_t signaled_dbd, solicited; 432 uint32_t head, tail, next_tail, qsize_msk; 433 uint32_t hdrmwqes; 434 int status; 435 uint32_t nopcode, fence, immed_data = 0; 436 hermon_hw_snd_wqe_remaddr_t *rc; 437 hermon_hw_snd_wqe_atomic_t *at; 438 hermon_hw_snd_wqe_bind_t *bn; 439 hermon_hw_snd_wqe_frwr_t *frwr; 440 hermon_hw_snd_wqe_local_inv_t *li; 441 hermon_hw_wqe_sgl_t *ds; 442 ibt_wr_ds_t *sgl; 443 int nds; 444 int i, last_ds, num_ds; 445 uint32_t *wqe_start; 446 int sectperwqe; 447 uint_t posted_cnt = 0; 448 int strong_order; 449 int print_rdma; 450 int rlen; 451 uint32_t rkey; 452 uint64_t raddr; 453 454 /* initialize the FMA retry loop */ 455 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num); 456 457 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 458 459 /* Save away some initial QP state */ 460 wq = qp->qp_sq_wqhdr; 461 qsize_msk = wq->wq_mask; 462 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 463 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2); 464 465 tail = wq->wq_tail; 466 head = wq->wq_head; 467 status = DDI_SUCCESS; 468 469 post_next: 470 print_rdma = 0; 471 rlen = 0; 472 strong_order = 0; 473 474 /* 475 * Check for "queue full" condition. If the queue 476 * is already full, then no more WQEs can be posted. 477 * So break out, ring a doorbell (if necessary) and 478 * return an error 479 */ 480 if (wq->wq_full != 0) { 481 status = IBT_QP_FULL; 482 goto done; 483 } 484 next_tail = (tail + 1) & qsize_msk; 485 if (((tail + hdrmwqes) & qsize_msk) == head) { 486 wq->wq_full = 1; 487 } 488 489 desc = HERMON_QP_SQ_ENTRY(qp, tail); 490 491 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 492 sizeof (hermon_hw_snd_wqe_ctrl_t)); 493 nds = wr->wr_nds; 494 sgl = wr->wr_sgl; 495 num_ds = 0; 496 if (wr->wr_trans != IBT_RC_SRV) { 497 status = IBT_QP_SRV_TYPE_INVALID; 498 goto done; 499 } 500 501 /* 502 * Validate the operation type. For RC requests, we allow 503 * "Send", "RDMA Read", "RDMA Write", various "Atomic" 504 * operations, and memory window "Bind" 505 */ 506 switch (wr->wr_opcode) { 507 default: 508 status = IBT_QP_OP_TYPE_INVALID; 509 goto done; 510 511 case IBT_WRC_SEND: 512 if (wr->wr_flags & IBT_WR_SEND_REMOTE_INVAL) { 513 nopcode = HERMON_WQE_SEND_NOPCODE_SND_INV; 514 immed_data = wr->wr.rc.rcwr.send_inval; 515 } else if (wr->wr_flags & IBT_WR_SEND_IMMED) { 516 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 517 immed_data = wr->wr.rc.rcwr.send_immed; 518 } else { 519 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 520 } 521 break; 522 523 /* 524 * If this is an RDMA Read or RDMA Write request, then fill 525 * in the "Remote Address" header fields. 526 */ 527 case IBT_WRC_RDMAW: 528 if (wr->wr_flags & IBT_WR_SEND_IMMED) { 529 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAWI; 530 immed_data = wr->wr.rc.rcwr.rdma.rdma_immed; 531 } else { 532 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW; 533 } 534 /* FALLTHROUGH */ 535 case IBT_WRC_RDMAR: 536 if (wr->wr_opcode == IBT_WRC_RDMAR) 537 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR; 538 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 539 sizeof (hermon_hw_snd_wqe_ctrl_t)); 540 541 /* 542 * Build the Remote Address Segment for the WQE, using 543 * the information from the RC work request. 544 */ 545 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma); 546 547 if (hermon_rdma_debug) { 548 print_rdma = hermon_rdma_debug; 549 rkey = wr->wr.rc.rcwr.rdma.rdma_rkey; 550 raddr = wr->wr.rc.rcwr.rdma.rdma_raddr; 551 } 552 553 /* Update "ds" for filling in Data Segments (below) */ 554 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc + 555 sizeof (hermon_hw_snd_wqe_remaddr_t)); 556 break; 557 558 /* 559 * If this is one of the Atomic type operations (i.e 560 * Compare-Swap or Fetch-Add), then fill in both the "Remote 561 * Address" header fields and the "Atomic" header fields. 562 */ 563 case IBT_WRC_CSWAP: 564 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS; 565 /* FALLTHROUGH */ 566 case IBT_WRC_FADD: 567 if (wr->wr_opcode == IBT_WRC_FADD) 568 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA; 569 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 570 sizeof (hermon_hw_snd_wqe_ctrl_t)); 571 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc + 572 sizeof (hermon_hw_snd_wqe_remaddr_t)); 573 574 /* 575 * Build the Remote Address and Atomic Segments for 576 * the WQE, using the information from the RC Atomic 577 * work request. 578 */ 579 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr); 580 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic); 581 582 /* Update "ds" for filling in Data Segments (below) */ 583 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at + 584 sizeof (hermon_hw_snd_wqe_atomic_t)); 585 586 /* 587 * Update "nds" and "sgl" because Atomic requests have 588 * only a single Data Segment. 589 */ 590 nds = 1; 591 sgl = wr->wr_sgl; 592 break; 593 594 /* 595 * If this is memory window Bind operation, then we call the 596 * hermon_wr_bind_check() routine to validate the request and 597 * to generate the updated RKey. If this is successful, then 598 * we fill in the WQE's "Bind" header fields. 599 */ 600 case IBT_WRC_BIND: 601 nopcode = HERMON_WQE_SEND_NOPCODE_BIND; 602 status = hermon_wr_bind_check(state, wr); 603 if (status != DDI_SUCCESS) 604 goto done; 605 606 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 607 sizeof (hermon_hw_snd_wqe_ctrl_t)); 608 609 /* 610 * Build the Bind Memory Window Segments for the WQE, 611 * using the information from the RC Bind memory 612 * window work request. 613 */ 614 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind); 615 616 /* 617 * Update the "ds" pointer. Even though the "bind" 618 * operation requires no SGLs, this is necessary to 619 * facilitate the correct descriptor size calculations 620 * (below). 621 */ 622 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 623 sizeof (hermon_hw_snd_wqe_bind_t)); 624 nds = 0; 625 break; 626 627 case IBT_WRC_FAST_REG_PMR: 628 nopcode = HERMON_WQE_SEND_NOPCODE_FRWR; 629 frwr = (hermon_hw_snd_wqe_frwr_t *)((uintptr_t)desc + 630 sizeof (hermon_hw_snd_wqe_ctrl_t)); 631 HERMON_WQE_BUILD_FRWR(qp, frwr, wr->wr.rc.rcwr.reg_pmr); 632 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)frwr + 633 sizeof (hermon_hw_snd_wqe_frwr_t)); 634 nds = 0; 635 strong_order = 0x80; 636 break; 637 638 case IBT_WRC_LOCAL_INVALIDATE: 639 nopcode = HERMON_WQE_SEND_NOPCODE_LCL_INV; 640 li = (hermon_hw_snd_wqe_local_inv_t *)((uintptr_t)desc + 641 sizeof (hermon_hw_snd_wqe_ctrl_t)); 642 HERMON_WQE_BUILD_LI(qp, li, wr->wr.rc.rcwr.li); 643 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)li + 644 sizeof (hermon_hw_snd_wqe_local_inv_t)); 645 nds = 0; 646 strong_order = 0x80; 647 break; 648 } 649 650 /* 651 * Now fill in the Data Segments (SGL) for the Send WQE based 652 * on the values setup above (i.e. "sgl", "nds", and the "ds" 653 * pointer. Start by checking for a valid number of SGL entries 654 */ 655 if (nds > qp->qp_sq_sgl) { 656 status = IBT_QP_SGL_LEN_INVALID; 657 goto done; 658 } 659 660 for (last_ds = num_ds, i = 0; i < nds; i++) { 661 if (sgl[i].ds_len != 0) 662 last_ds++; /* real last ds of wqe to fill */ 663 } 664 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4; 665 for (i = nds; --i >= 0; ) { 666 if (sgl[i].ds_len == 0) { 667 continue; 668 } 669 rlen += sgl[i].ds_len; 670 if (print_rdma & 0x2) 671 IBTF_DPRINTF_L2("rdma", "post: [%d]: laddr %llx " 672 "llen %x", i, sgl[i].ds_va, sgl[i].ds_len); 673 674 /* 675 * Fill in the Data Segment(s) for the current WQE, using the 676 * information contained in the scatter-gather list of the 677 * work request. 678 */ 679 last_ds--; 680 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[i]); 681 } 682 /* ensure RDMA READ does not exceed HCA limit */ 683 if ((wr->wr_opcode == IBT_WRC_RDMAR) && (desc_sz > 684 state->hs_ibtfinfo.hca_attr->hca_conn_rdma_read_sgl_sz + 2)) { 685 status = IBT_QP_SGL_LEN_INVALID; 686 goto done; 687 } 688 689 if (print_rdma & 0x1) { 690 IBTF_DPRINTF_L2("rdma", "post: indx %x rkey %x raddr %llx " 691 "total len %x", tail, rkey, raddr, rlen); 692 } 693 694 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 695 696 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 697 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 0xC : 0; 698 699 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 0x2 : 0; 700 701 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, solicited, 702 signaled_dbd, 0, qp, strong_order, 0); 703 704 wq->wq_wrid[tail] = wr->wr_id; 705 706 tail = next_tail; 707 708 /* Update some of the state in the QP */ 709 wq->wq_tail = tail; 710 711 membar_producer(); 712 713 /* Now set the ownership bit of the first one in the chain. */ 714 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode); 715 716 posted_cnt++; 717 if (--num_wr > 0) { 718 /* do the invalidate of the headroom */ 719 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 720 (tail + hdrmwqes) & qsize_msk); 721 for (i = 16; i < sectperwqe; i += 16) { 722 wqe_start[i] = 0xFFFFFFFF; 723 } 724 725 wr++; 726 goto post_next; 727 } 728 done: 729 730 if (posted_cnt != 0) { 731 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 732 733 membar_producer(); 734 735 /* the FMA retry loop starts for Hermon doorbell register. */ 736 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 737 fm_status, fm_test_num); 738 739 /* Ring the doorbell */ 740 HERMON_UAR_DOORBELL(state, uarhdl, 741 (uint64_t *)(void *)&state->hs_uar->send, 742 (uint64_t)qp->qp_ring); 743 744 /* the FMA retry loop ends. */ 745 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 746 fm_status, fm_test_num); 747 748 /* do the invalidate of the headroom */ 749 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 750 (tail + hdrmwqes) & qsize_msk); 751 for (i = 16; i < sectperwqe; i += 16) { 752 wqe_start[i] = 0xFFFFFFFF; 753 } 754 } 755 /* 756 * Update the "num_posted" return value (if necessary). 757 * Then drop the locks and return success. 758 */ 759 if (num_posted != NULL) { 760 *num_posted = posted_cnt; 761 } 762 763 mutex_exit(&qp->qp_sq_lock); 764 return (status); 765 766 pio_error: 767 mutex_exit(&qp->qp_sq_lock); 768 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 769 return (ibc_get_ci_failure(0)); 770 } 771 772 /* 773 * hermon_post_send() 774 * Context: Can be called from interrupt or base context. 775 */ 776 int 777 hermon_post_send(hermon_state_t *state, hermon_qphdl_t qp, 778 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted) 779 { 780 ibt_send_wr_t *curr_wr; 781 hermon_workq_hdr_t *wq; 782 hermon_ahhdl_t ah; 783 uint64_t *desc, *prev; 784 uint32_t desc_sz; 785 uint32_t signaled_dbd, solicited; 786 uint32_t head, tail, next_tail, qsize_msk; 787 uint32_t hdrmwqes; 788 uint_t currindx, wrindx, numremain; 789 uint_t chainlen; 790 uint_t posted_cnt, maxstat; 791 uint_t total_posted; 792 int status; 793 uint32_t nopcode, fence, immed_data = 0; 794 uint32_t prev_nopcode; 795 uint_t qp_state; 796 797 /* initialize the FMA retry loop */ 798 hermon_pio_init(fm_loop_cnt, fm_status, fm_test); 799 800 /* 801 * Check for user-mappable QP memory. Note: We do not allow kernel 802 * clients to post to QP memory that is accessible directly by the 803 * user. If the QP memory is user accessible, then return an error. 804 */ 805 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) { 806 return (IBT_QP_HDL_INVALID); 807 } 808 809 mutex_enter(&qp->qp_sq_lock); 810 811 /* 812 * Check QP state. Can not post Send requests from the "Reset", 813 * "Init", or "RTR" states 814 */ 815 qp_state = qp->qp_state_for_post_send; 816 if ((qp_state == HERMON_QP_RESET) || 817 (qp_state == HERMON_QP_INIT) || 818 (qp_state == HERMON_QP_RTR)) { 819 mutex_exit(&qp->qp_sq_lock); 820 return (IBT_QP_STATE_INVALID); 821 } 822 823 if (qp->qp_is_special) 824 goto post_many; 825 826 /* Use these optimized functions most of the time */ 827 if (qp->qp_type == IBT_UD_RQP) { 828 return (hermon_post_send_ud(state, qp, wr, num_wr, num_posted)); 829 } 830 831 if (qp->qp_serv_type == HERMON_QP_RC) { 832 return (hermon_post_send_rc(state, qp, wr, num_wr, num_posted)); 833 } 834 835 if (qp->qp_serv_type == HERMON_QP_UC) 836 goto post_many; 837 838 mutex_exit(&qp->qp_sq_lock); 839 return (IBT_QP_SRV_TYPE_INVALID); 840 841 post_many: 842 /* general loop for non-optimized posting */ 843 844 /* Save away some initial QP state */ 845 wq = qp->qp_sq_wqhdr; 846 qsize_msk = wq->wq_mask; 847 tail = wq->wq_tail; 848 head = wq->wq_head; 849 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */ 850 851 /* Initialize posted_cnt */ 852 posted_cnt = 0; 853 total_posted = 0; 854 855 /* 856 * For each ibt_send_wr_t in the wr[] list passed in, parse the 857 * request and build a Send WQE. NOTE: Because we are potentially 858 * building a chain of WQEs to post, we want to build them all first, 859 * and set the valid (HW Ownership) bit on all but the first. 860 * However, we do not want to validate the first one until the 861 * entire chain of WQEs has been built. Then in the final 862 * we set the valid bit in the first, flush if needed, and as a last 863 * step ring the appropriate doorbell. NOTE: the doorbell ring may 864 * NOT be needed if the HCA is already processing, but the doorbell 865 * ring will be done regardless. NOTE ALSO: It is possible for 866 * more Work Requests to be posted than the HW will support at one 867 * shot. If this happens, we need to be able to post and ring 868 * several chains here until the the entire request is complete. 869 * NOTE ALSO: the term "chain" is used to differentiate it from 870 * Work Request List passed in; and because that's the terminology 871 * from the previous generations of HCA - but the WQEs are not, in fact 872 * chained together for Hermon 873 */ 874 875 wrindx = 0; 876 numremain = num_wr; 877 status = DDI_SUCCESS; 878 while ((wrindx < num_wr) && (status == DDI_SUCCESS)) { 879 /* 880 * For the first WQE on a new chain we need "prev" to point 881 * to the current descriptor. 882 */ 883 prev = HERMON_QP_SQ_ENTRY(qp, tail); 884 885 /* 886 * Break the request up into lists that are less than or 887 * equal to the maximum number of WQEs that can be posted 888 * per doorbell ring - 256 currently 889 */ 890 chainlen = (numremain > HERMON_QP_MAXDESC_PER_DB) ? 891 HERMON_QP_MAXDESC_PER_DB : numremain; 892 numremain -= chainlen; 893 894 for (currindx = 0; currindx < chainlen; currindx++, wrindx++) { 895 /* 896 * Check for "queue full" condition. If the queue 897 * is already full, then no more WQEs can be posted. 898 * So break out, ring a doorbell (if necessary) and 899 * return an error 900 */ 901 if (wq->wq_full != 0) { 902 status = IBT_QP_FULL; 903 break; 904 } 905 906 /* 907 * Increment the "tail index". Check for "queue 908 * full" condition incl. headroom. If we detect that 909 * the current work request is going to fill the work 910 * queue, then we mark this condition and continue. 911 * Don't need >=, because going one-by-one we have to 912 * hit it exactly sooner or later 913 */ 914 915 next_tail = (tail + 1) & qsize_msk; 916 if (((tail + hdrmwqes) & qsize_msk) == head) { 917 wq->wq_full = 1; 918 } 919 920 /* 921 * Get the address of the location where the next 922 * Send WQE should be built 923 */ 924 desc = HERMON_QP_SQ_ENTRY(qp, tail); 925 /* 926 * Call hermon_wqe_send_build() to build the WQE 927 * at the given address. This routine uses the 928 * information in the ibt_send_wr_t list (wr[]) and 929 * returns the size of the WQE when it returns. 930 */ 931 status = hermon_wqe_send_build(state, qp, 932 &wr[wrindx], desc, &desc_sz); 933 if (status != DDI_SUCCESS) { 934 break; 935 } 936 937 /* 938 * Now, build the Ctrl Segment based on 939 * what was just done 940 */ 941 curr_wr = &wr[wrindx]; 942 943 switch (curr_wr->wr_opcode) { 944 case IBT_WRC_RDMAW: 945 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) { 946 nopcode = 947 HERMON_WQE_SEND_NOPCODE_RDMAWI; 948 immed_data = 949 hermon_wr_get_immediate(curr_wr); 950 } else { 951 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW; 952 } 953 break; 954 955 case IBT_WRC_SEND: 956 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) { 957 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI; 958 immed_data = 959 hermon_wr_get_immediate(curr_wr); 960 } else { 961 nopcode = HERMON_WQE_SEND_NOPCODE_SEND; 962 } 963 break; 964 965 case IBT_WRC_SEND_LSO: 966 nopcode = HERMON_WQE_SEND_NOPCODE_LSO; 967 break; 968 969 case IBT_WRC_RDMAR: 970 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR; 971 break; 972 973 case IBT_WRC_CSWAP: 974 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS; 975 break; 976 977 case IBT_WRC_FADD: 978 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA; 979 break; 980 981 case IBT_WRC_BIND: 982 nopcode = HERMON_WQE_SEND_NOPCODE_BIND; 983 break; 984 } 985 986 fence = (curr_wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0; 987 988 /* 989 * now, build up the control segment, leaving the 990 * owner bit as it is 991 */ 992 993 if ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) || 994 (curr_wr->wr_flags & IBT_WR_SEND_SIGNAL)) { 995 signaled_dbd = 0xC; 996 } else { 997 signaled_dbd = 0; 998 } 999 if (curr_wr->wr_flags & IBT_WR_SEND_SOLICIT) 1000 solicited = 0x2; 1001 else 1002 solicited = 0; 1003 1004 if (qp->qp_is_special) { 1005 /* Ensure correctness, set the ReRead bit */ 1006 nopcode |= (1 << 6); 1007 ah = (hermon_ahhdl_t) 1008 curr_wr->wr.ud.udwr_dest->ud_ah; 1009 mutex_enter(&ah->ah_lock); 1010 maxstat = ah->ah_udav->max_stat_rate; 1011 HERMON_WQE_SET_MLX_CTRL_SEGMENT(desc, desc_sz, 1012 signaled_dbd, maxstat, ah->ah_udav->rlid, 1013 qp, ah->ah_udav->sl); 1014 mutex_exit(&ah->ah_lock); 1015 } else { 1016 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, 1017 fence, immed_data, solicited, 1018 signaled_dbd, 0, qp, 0, 0); 1019 } 1020 wq->wq_wrid[tail] = curr_wr->wr_id; 1021 1022 /* 1023 * If this is not the first descriptor on the current 1024 * chain, then set the ownership bit. 1025 */ 1026 if (currindx != 0) { /* not the first */ 1027 membar_producer(); 1028 HERMON_SET_SEND_WQE_OWNER(qp, 1029 (uint32_t *)desc, nopcode); 1030 } else 1031 prev_nopcode = nopcode; 1032 1033 /* 1034 * Update the current "tail index" and increment 1035 * "posted_cnt" 1036 */ 1037 tail = next_tail; 1038 posted_cnt++; 1039 } 1040 1041 /* 1042 * If we reach here and there are one or more WQEs which have 1043 * been successfully built as a chain, we have to finish up 1044 * and prepare them for writing to the HW 1045 * The steps are: 1046 * 1. do the headroom fixup 1047 * 2. add in the size of the headroom for the sync 1048 * 3. write the owner bit for the first WQE 1049 * 4. sync them 1050 * 5. fix up the structures 1051 * 6. hit the doorbell in UAR 1052 */ 1053 if (posted_cnt != 0) { 1054 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state); 1055 1056 /* do the invalidate of the headroom */ 1057 1058 hermon_wqe_headroom(tail, qp); 1059 1060 /* Update some of the state in the QP */ 1061 wq->wq_tail = tail; 1062 total_posted += posted_cnt; 1063 posted_cnt = 0; 1064 1065 membar_producer(); 1066 1067 /* 1068 * Now set the ownership bit of the first 1069 * one in the chain 1070 */ 1071 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)prev, 1072 prev_nopcode); 1073 1074 /* the FMA retry loop starts for Hermon doorbell. */ 1075 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, 1076 fm_status, fm_test); 1077 1078 HERMON_UAR_DOORBELL(state, uarhdl, 1079 (uint64_t *)(void *)&state->hs_uar->send, 1080 (uint64_t)qp->qp_ring); 1081 1082 /* the FMA retry loop ends. */ 1083 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, 1084 fm_status, fm_test); 1085 } 1086 } 1087 1088 /* 1089 * Update the "num_posted" return value (if necessary). 1090 * Then drop the locks and return success. 1091 */ 1092 if (num_posted != NULL) { 1093 *num_posted = total_posted; 1094 } 1095 mutex_exit(&qp->qp_sq_lock); 1096 return (status); 1097 1098 pio_error: 1099 mutex_exit(&qp->qp_sq_lock); 1100 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST); 1101 return (ibc_get_ci_failure(0)); 1102 } 1103 1104 1105 /* 1106 * hermon_post_recv() 1107 * Context: Can be called from interrupt or base context. 1108 */ 1109 int 1110 hermon_post_recv(hermon_state_t *state, hermon_qphdl_t qp, 1111 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted) 1112 { 1113 uint64_t *desc; 1114 hermon_workq_hdr_t *wq; 1115 uint32_t head, tail, next_tail, qsize_msk; 1116 uint_t wrindx; 1117 uint_t posted_cnt; 1118 int status; 1119 1120 /* 1121 * Check for user-mappable QP memory. Note: We do not allow kernel 1122 * clients to post to QP memory that is accessible directly by the 1123 * user. If the QP memory is user accessible, then return an error. 1124 */ 1125 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) { 1126 return (IBT_QP_HDL_INVALID); 1127 } 1128 1129 /* Initialize posted_cnt */ 1130 posted_cnt = 0; 1131 1132 mutex_enter(&qp->qp_lock); 1133 1134 /* 1135 * Check if QP is associated with an SRQ 1136 */ 1137 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { 1138 mutex_exit(&qp->qp_lock); 1139 return (IBT_SRQ_IN_USE); 1140 } 1141 1142 /* 1143 * Check QP state. Can not post Recv requests from the "Reset" state 1144 */ 1145 if (qp->qp_state == HERMON_QP_RESET) { 1146 mutex_exit(&qp->qp_lock); 1147 return (IBT_QP_STATE_INVALID); 1148 } 1149 1150 /* Check that work request transport type is valid */ 1151 if ((qp->qp_type != IBT_UD_RQP) && 1152 (qp->qp_serv_type != HERMON_QP_RC) && 1153 (qp->qp_serv_type != HERMON_QP_UC)) { 1154 mutex_exit(&qp->qp_lock); 1155 return (IBT_QP_SRV_TYPE_INVALID); 1156 } 1157 1158 /* 1159 * Grab the lock for the WRID list, i.e., membar_consumer(). 1160 * This is not needed because the mutex_enter() above has 1161 * the same effect. 1162 */ 1163 1164 /* Save away some initial QP state */ 1165 wq = qp->qp_rq_wqhdr; 1166 qsize_msk = wq->wq_mask; 1167 tail = wq->wq_tail; 1168 head = wq->wq_head; 1169 1170 wrindx = 0; 1171 status = DDI_SUCCESS; 1172 1173 for (wrindx = 0; wrindx < num_wr; wrindx++) { 1174 if (wq->wq_full != 0) { 1175 status = IBT_QP_FULL; 1176 break; 1177 } 1178 next_tail = (tail + 1) & qsize_msk; 1179 if (next_tail == head) { 1180 wq->wq_full = 1; 1181 } 1182 desc = HERMON_QP_RQ_ENTRY(qp, tail); 1183 status = hermon_wqe_recv_build(state, qp, &wr[wrindx], desc); 1184 if (status != DDI_SUCCESS) { 1185 break; 1186 } 1187 1188 wq->wq_wrid[tail] = wr[wrindx].wr_id; 1189 qp->qp_rq_wqecntr++; 1190 1191 tail = next_tail; 1192 posted_cnt++; 1193 } 1194 1195 if (posted_cnt != 0) { 1196 1197 wq->wq_tail = tail; 1198 1199 membar_producer(); /* ensure wrids are visible */ 1200 1201 /* Update the doorbell record w/ wqecntr */ 1202 HERMON_UAR_DB_RECORD_WRITE(qp->qp_rq_vdbr, 1203 qp->qp_rq_wqecntr & 0xFFFF); 1204 } 1205 1206 if (num_posted != NULL) { 1207 *num_posted = posted_cnt; 1208 } 1209 1210 1211 mutex_exit(&qp->qp_lock); 1212 return (status); 1213 } 1214 1215 /* 1216 * hermon_post_srq() 1217 * Context: Can be called from interrupt or base context. 1218 */ 1219 int 1220 hermon_post_srq(hermon_state_t *state, hermon_srqhdl_t srq, 1221 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted) 1222 { 1223 uint64_t *desc; 1224 hermon_workq_hdr_t *wq; 1225 uint_t indx, wrindx; 1226 uint_t posted_cnt; 1227 int status; 1228 1229 mutex_enter(&srq->srq_lock); 1230 1231 /* 1232 * Check for user-mappable QP memory. Note: We do not allow kernel 1233 * clients to post to QP memory that is accessible directly by the 1234 * user. If the QP memory is user accessible, then return an error. 1235 */ 1236 if (srq->srq_is_umap) { 1237 mutex_exit(&srq->srq_lock); 1238 return (IBT_SRQ_HDL_INVALID); 1239 } 1240 1241 /* 1242 * Check SRQ state. Can not post Recv requests when SRQ is in error 1243 */ 1244 if (srq->srq_state == HERMON_SRQ_STATE_ERROR) { 1245 mutex_exit(&srq->srq_lock); 1246 return (IBT_QP_STATE_INVALID); 1247 } 1248 1249 status = DDI_SUCCESS; 1250 posted_cnt = 0; 1251 wq = srq->srq_wq_wqhdr; 1252 indx = wq->wq_head; 1253 1254 for (wrindx = 0; wrindx < num_wr; wrindx++) { 1255 1256 if (indx == wq->wq_tail) { 1257 status = IBT_QP_FULL; 1258 break; 1259 } 1260 desc = HERMON_SRQ_WQE_ADDR(srq, indx); 1261 1262 wq->wq_wrid[indx] = wr[wrindx].wr_id; 1263 1264 status = hermon_wqe_srq_build(state, srq, &wr[wrindx], desc); 1265 if (status != DDI_SUCCESS) { 1266 break; 1267 } 1268 1269 posted_cnt++; 1270 indx = htons(((uint16_t *)desc)[1]); 1271 wq->wq_head = indx; 1272 } 1273 1274 if (posted_cnt != 0) { 1275 1276 srq->srq_wq_wqecntr += posted_cnt; 1277 1278 membar_producer(); /* ensure wrids are visible */ 1279 1280 /* Ring the doorbell w/ wqecntr */ 1281 HERMON_UAR_DB_RECORD_WRITE(srq->srq_wq_vdbr, 1282 srq->srq_wq_wqecntr & 0xFFFF); 1283 } 1284 1285 if (num_posted != NULL) { 1286 *num_posted = posted_cnt; 1287 } 1288 1289 mutex_exit(&srq->srq_lock); 1290 return (status); 1291 } 1292 1293 1294 /* 1295 * hermon_wqe_send_build() 1296 * Context: Can be called from interrupt or base context. 1297 */ 1298 static int 1299 hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp, 1300 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size) 1301 { 1302 hermon_hw_snd_wqe_ud_t *ud; 1303 hermon_hw_snd_wqe_remaddr_t *rc; 1304 hermon_hw_snd_wqe_atomic_t *at; 1305 hermon_hw_snd_wqe_remaddr_t *uc; 1306 hermon_hw_snd_wqe_bind_t *bn; 1307 hermon_hw_wqe_sgl_t *ds, *old_ds; 1308 ibt_ud_dest_t *dest; 1309 ibt_wr_ds_t *sgl; 1310 hermon_ahhdl_t ah; 1311 uint32_t nds; 1312 int i, j, last_ds, num_ds, status; 1313 int tmpsize; 1314 1315 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 1316 1317 /* Initialize the information for the Data Segments */ 1318 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1319 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1320 nds = wr->wr_nds; 1321 sgl = wr->wr_sgl; 1322 num_ds = 0; 1323 i = 0; 1324 1325 /* 1326 * Build a Send WQE depends first and foremost on the transport 1327 * type of Work Request (i.e. UD, RC, or UC) 1328 */ 1329 switch (wr->wr_trans) { 1330 case IBT_UD_SRV: 1331 /* Ensure that work request transport type matches QP type */ 1332 if (qp->qp_serv_type != HERMON_QP_UD) { 1333 return (IBT_QP_SRV_TYPE_INVALID); 1334 } 1335 1336 /* 1337 * Validate the operation type. For UD requests, only the 1338 * "Send" and "Send LSO" operations are valid. 1339 */ 1340 if (wr->wr_opcode != IBT_WRC_SEND && 1341 wr->wr_opcode != IBT_WRC_SEND_LSO) { 1342 return (IBT_QP_OP_TYPE_INVALID); 1343 } 1344 1345 /* 1346 * If this is a Special QP (QP0 or QP1), then we need to 1347 * build MLX WQEs instead. So jump to hermon_wqe_mlx_build() 1348 * and return whatever status it returns 1349 */ 1350 if (qp->qp_is_special) { 1351 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 1352 return (IBT_QP_OP_TYPE_INVALID); 1353 } 1354 status = hermon_wqe_mlx_build(state, qp, 1355 wr, desc, size); 1356 return (status); 1357 } 1358 1359 /* 1360 * Otherwise, if this is a normal UD Send request, then fill 1361 * all the fields in the Hermon UD header for the WQE. Note: 1362 * to do this we'll need to extract some information from the 1363 * Address Handle passed with the work request. 1364 */ 1365 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc + 1366 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1367 if (wr->wr_opcode == IBT_WRC_SEND) { 1368 dest = wr->wr.ud.udwr_dest; 1369 } else { 1370 dest = wr->wr.ud_lso.lso_ud_dest; 1371 } 1372 ah = (hermon_ahhdl_t)dest->ud_ah; 1373 if (ah == NULL) { 1374 return (IBT_AH_HDL_INVALID); 1375 } 1376 1377 /* 1378 * Build the Unreliable Datagram Segment for the WQE, using 1379 * the information from the address handle and the work 1380 * request. 1381 */ 1382 /* mutex_enter(&ah->ah_lock); */ 1383 if (wr->wr_opcode == IBT_WRC_SEND) { 1384 HERMON_WQE_BUILD_UD(qp, ud, ah, wr->wr.ud.udwr_dest); 1385 } else { /* IBT_WRC_SEND_LSO */ 1386 HERMON_WQE_BUILD_UD(qp, ud, ah, 1387 wr->wr.ud_lso.lso_ud_dest); 1388 } 1389 /* mutex_exit(&ah->ah_lock); */ 1390 1391 /* Update "ds" for filling in Data Segments (below) */ 1392 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud + 1393 sizeof (hermon_hw_snd_wqe_ud_t)); 1394 1395 if (wr->wr_opcode == IBT_WRC_SEND_LSO) { 1396 int total_len; 1397 1398 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf; 1399 if ((uintptr_t)ds + total_len + (nds * 16) > 1400 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) 1401 return (IBT_QP_SGL_LEN_INVALID); 1402 1403 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)ds + 1, 1404 wr->wr.ud_lso.lso_hdr_sz); 1405 old_ds = ds; 1406 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len); 1407 for (; i < nds; i++) { 1408 if (sgl[i].ds_len == 0) 1409 continue; 1410 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], 1411 &sgl[i]); 1412 num_ds++; 1413 i++; 1414 break; 1415 } 1416 membar_producer(); 1417 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss, 1418 wr->wr.ud_lso.lso_hdr_sz); 1419 } 1420 1421 break; 1422 1423 case IBT_RC_SRV: 1424 /* Ensure that work request transport type matches QP type */ 1425 if (qp->qp_serv_type != HERMON_QP_RC) { 1426 return (IBT_QP_SRV_TYPE_INVALID); 1427 } 1428 1429 /* 1430 * Validate the operation type. For RC requests, we allow 1431 * "Send", "RDMA Read", "RDMA Write", various "Atomic" 1432 * operations, and memory window "Bind" 1433 */ 1434 if ((wr->wr_opcode != IBT_WRC_SEND) && 1435 (wr->wr_opcode != IBT_WRC_RDMAR) && 1436 (wr->wr_opcode != IBT_WRC_RDMAW) && 1437 (wr->wr_opcode != IBT_WRC_CSWAP) && 1438 (wr->wr_opcode != IBT_WRC_FADD) && 1439 (wr->wr_opcode != IBT_WRC_BIND)) { 1440 return (IBT_QP_OP_TYPE_INVALID); 1441 } 1442 1443 /* 1444 * If this is a Send request, then all we need to do is break 1445 * out and here and begin the Data Segment processing below 1446 */ 1447 if (wr->wr_opcode == IBT_WRC_SEND) { 1448 break; 1449 } 1450 1451 /* 1452 * If this is an RDMA Read or RDMA Write request, then fill 1453 * in the "Remote Address" header fields. 1454 */ 1455 if ((wr->wr_opcode == IBT_WRC_RDMAR) || 1456 (wr->wr_opcode == IBT_WRC_RDMAW)) { 1457 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1458 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1459 1460 /* 1461 * Build the Remote Address Segment for the WQE, using 1462 * the information from the RC work request. 1463 */ 1464 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma); 1465 1466 /* Update "ds" for filling in Data Segments (below) */ 1467 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc + 1468 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1469 break; 1470 } 1471 1472 /* 1473 * If this is one of the Atomic type operations (i.e 1474 * Compare-Swap or Fetch-Add), then fill in both the "Remote 1475 * Address" header fields and the "Atomic" header fields. 1476 */ 1477 if ((wr->wr_opcode == IBT_WRC_CSWAP) || 1478 (wr->wr_opcode == IBT_WRC_FADD)) { 1479 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1480 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1481 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc + 1482 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1483 1484 /* 1485 * Build the Remote Address and Atomic Segments for 1486 * the WQE, using the information from the RC Atomic 1487 * work request. 1488 */ 1489 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr); 1490 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic); 1491 1492 /* Update "ds" for filling in Data Segments (below) */ 1493 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at + 1494 sizeof (hermon_hw_snd_wqe_atomic_t)); 1495 1496 /* 1497 * Update "nds" and "sgl" because Atomic requests have 1498 * only a single Data Segment (and they are encoded 1499 * somewhat differently in the work request. 1500 */ 1501 nds = 1; 1502 sgl = wr->wr_sgl; 1503 break; 1504 } 1505 1506 /* 1507 * If this is memory window Bind operation, then we call the 1508 * hermon_wr_bind_check() routine to validate the request and 1509 * to generate the updated RKey. If this is successful, then 1510 * we fill in the WQE's "Bind" header fields. 1511 */ 1512 if (wr->wr_opcode == IBT_WRC_BIND) { 1513 status = hermon_wr_bind_check(state, wr); 1514 if (status != DDI_SUCCESS) { 1515 return (status); 1516 } 1517 1518 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 1519 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1520 1521 /* 1522 * Build the Bind Memory Window Segments for the WQE, 1523 * using the information from the RC Bind memory 1524 * window work request. 1525 */ 1526 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind); 1527 1528 /* 1529 * Update the "ds" pointer. Even though the "bind" 1530 * operation requires no SGLs, this is necessary to 1531 * facilitate the correct descriptor size calculations 1532 * (below). 1533 */ 1534 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 1535 sizeof (hermon_hw_snd_wqe_bind_t)); 1536 nds = 0; 1537 } 1538 break; 1539 1540 case IBT_UC_SRV: 1541 /* Ensure that work request transport type matches QP type */ 1542 if (qp->qp_serv_type != HERMON_QP_UC) { 1543 return (IBT_QP_SRV_TYPE_INVALID); 1544 } 1545 1546 /* 1547 * Validate the operation type. For UC requests, we only 1548 * allow "Send", "RDMA Write", and memory window "Bind". 1549 * Note: Unlike RC, UC does not allow "RDMA Read" or "Atomic" 1550 * operations 1551 */ 1552 if ((wr->wr_opcode != IBT_WRC_SEND) && 1553 (wr->wr_opcode != IBT_WRC_RDMAW) && 1554 (wr->wr_opcode != IBT_WRC_BIND)) { 1555 return (IBT_QP_OP_TYPE_INVALID); 1556 } 1557 1558 /* 1559 * If this is a Send request, then all we need to do is break 1560 * out and here and begin the Data Segment processing below 1561 */ 1562 if (wr->wr_opcode == IBT_WRC_SEND) { 1563 break; 1564 } 1565 1566 /* 1567 * If this is an RDMA Write request, then fill in the "Remote 1568 * Address" header fields. 1569 */ 1570 if (wr->wr_opcode == IBT_WRC_RDMAW) { 1571 uc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc + 1572 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1573 1574 /* 1575 * Build the Remote Address Segment for the WQE, using 1576 * the information from the UC work request. 1577 */ 1578 HERMON_WQE_BUILD_REMADDR(qp, uc, &wr->wr.uc.ucwr.rdma); 1579 1580 /* Update "ds" for filling in Data Segments (below) */ 1581 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)uc + 1582 sizeof (hermon_hw_snd_wqe_remaddr_t)); 1583 break; 1584 } 1585 1586 /* 1587 * If this is memory window Bind operation, then we call the 1588 * hermon_wr_bind_check() routine to validate the request and 1589 * to generate the updated RKey. If this is successful, then 1590 * we fill in the WQE's "Bind" header fields. 1591 */ 1592 if (wr->wr_opcode == IBT_WRC_BIND) { 1593 status = hermon_wr_bind_check(state, wr); 1594 if (status != DDI_SUCCESS) { 1595 return (status); 1596 } 1597 1598 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc + 1599 sizeof (hermon_hw_snd_wqe_ctrl_t)); 1600 1601 /* 1602 * Build the Bind Memory Window Segments for the WQE, 1603 * using the information from the UC Bind memory 1604 * window work request. 1605 */ 1606 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.uc.ucwr.bind); 1607 1608 /* 1609 * Update the "ds" pointer. Even though the "bind" 1610 * operation requires no SGLs, this is necessary to 1611 * facilitate the correct descriptor size calculations 1612 * (below). 1613 */ 1614 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn + 1615 sizeof (hermon_hw_snd_wqe_bind_t)); 1616 nds = 0; 1617 } 1618 break; 1619 1620 default: 1621 return (IBT_QP_SRV_TYPE_INVALID); 1622 } 1623 1624 /* 1625 * Now fill in the Data Segments (SGL) for the Send WQE based on 1626 * the values setup above (i.e. "sgl", "nds", and the "ds" pointer 1627 * Start by checking for a valid number of SGL entries 1628 */ 1629 if (nds > qp->qp_sq_sgl) { 1630 return (IBT_QP_SGL_LEN_INVALID); 1631 } 1632 1633 /* 1634 * For each SGL in the Send Work Request, fill in the Send WQE's data 1635 * segments. Note: We skip any SGL with zero size because Hermon 1636 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1637 * the encoding for zero means a 2GB transfer. 1638 */ 1639 for (last_ds = num_ds, j = i; j < nds; j++) { 1640 if (sgl[j].ds_len != 0) 1641 last_ds++; /* real last ds of wqe to fill */ 1642 } 1643 1644 /* 1645 * Return the size of descriptor (in 16-byte chunks) 1646 * For Hermon, we want them (for now) to be on stride size 1647 * boundaries, which was implicit in Tavor/Arbel 1648 * 1649 */ 1650 tmpsize = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc); 1651 1652 *size = tmpsize >> 0x4; 1653 1654 for (j = nds; --j >= i; ) { 1655 if (sgl[j].ds_len == 0) { 1656 continue; 1657 } 1658 1659 /* 1660 * Fill in the Data Segment(s) for the current WQE, using the 1661 * information contained in the scatter-gather list of the 1662 * work request. 1663 */ 1664 last_ds--; 1665 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]); 1666 } 1667 1668 return (DDI_SUCCESS); 1669 } 1670 1671 1672 1673 /* 1674 * hermon_wqe_mlx_build() 1675 * Context: Can be called from interrupt or base context. 1676 */ 1677 static int 1678 hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp, 1679 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size) 1680 { 1681 hermon_ahhdl_t ah; 1682 hermon_hw_udav_t *udav; 1683 ib_lrh_hdr_t *lrh; 1684 ib_grh_t *grh; 1685 ib_bth_hdr_t *bth; 1686 ib_deth_hdr_t *deth; 1687 hermon_hw_wqe_sgl_t *ds; 1688 ibt_wr_ds_t *sgl; 1689 uint8_t *mgmtclass, *hpoint, *hcount; 1690 uint32_t nds, offset, pktlen; 1691 uint32_t desc_sz; 1692 int i, num_ds; 1693 int tmpsize; 1694 1695 ASSERT(MUTEX_HELD(&qp->qp_sq_lock)); 1696 1697 /* Initialize the information for the Data Segments */ 1698 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1699 sizeof (hermon_hw_mlx_wqe_nextctrl_t)); 1700 1701 /* 1702 * Pull the address handle from the work request. The UDAV will 1703 * be used to answer some questions about the request. 1704 */ 1705 ah = (hermon_ahhdl_t)wr->wr.ud.udwr_dest->ud_ah; 1706 if (ah == NULL) { 1707 return (IBT_AH_HDL_INVALID); 1708 } 1709 mutex_enter(&ah->ah_lock); 1710 udav = ah->ah_udav; 1711 1712 /* 1713 * If the request is for QP1 and the destination LID is equal to 1714 * the Permissive LID, then return an error. This combination is 1715 * not allowed 1716 */ 1717 if ((udav->rlid == IB_LID_PERMISSIVE) && 1718 (qp->qp_is_special == HERMON_QP_GSI)) { 1719 mutex_exit(&ah->ah_lock); 1720 return (IBT_AH_HDL_INVALID); 1721 } 1722 1723 /* 1724 * Calculate the size of the packet headers, including the GRH 1725 * (if necessary) 1726 */ 1727 desc_sz = sizeof (ib_lrh_hdr_t) + sizeof (ib_bth_hdr_t) + 1728 sizeof (ib_deth_hdr_t); 1729 if (udav->grh) { 1730 desc_sz += sizeof (ib_grh_t); 1731 } 1732 1733 /* 1734 * Begin to build the first "inline" data segment for the packet 1735 * headers. Note: By specifying "inline" we can build the contents 1736 * of the MAD packet headers directly into the work queue (as part 1737 * descriptor). This has the advantage of both speeding things up 1738 * and of not requiring the driver to allocate/register any additional 1739 * memory for the packet headers. 1740 */ 1741 HERMON_WQE_BUILD_INLINE(qp, &ds[0], desc_sz); 1742 desc_sz += 4; 1743 1744 /* 1745 * Build Local Route Header (LRH) 1746 * We start here by building the LRH into a temporary location. 1747 * When we have finished we copy the LRH data into the descriptor. 1748 * 1749 * Notice that the VL values are hardcoded. This is not a problem 1750 * because VL15 is decided later based on the value in the MLX 1751 * transport "next/ctrl" header (see the "vl15" bit below), and it 1752 * is otherwise (meaning for QP1) chosen from the SL-to-VL table 1753 * values. This rule does not hold for loopback packets however 1754 * (all of which bypass the SL-to-VL tables) and it is the reason 1755 * that non-QP0 MADs are setup with VL hardcoded to zero below. 1756 * 1757 * Notice also that Source LID is hardcoded to the Permissive LID 1758 * (0xFFFF). This is also not a problem because if the Destination 1759 * LID is not the Permissive LID, then the "slr" value in the MLX 1760 * transport "next/ctrl" header will be set to zero and the hardware 1761 * will pull the LID from value in the port. 1762 */ 1763 lrh = (ib_lrh_hdr_t *)((uintptr_t)&ds[0] + 4); 1764 pktlen = (desc_sz + 0x100) >> 2; 1765 HERMON_WQE_BUILD_MLX_LRH(lrh, qp, udav, pktlen); 1766 1767 /* 1768 * Build Global Route Header (GRH) 1769 * This is only built if necessary as defined by the "grh" bit in 1770 * the address vector. Note: We also calculate the offset to the 1771 * next header (BTH) based on whether or not the "grh" bit is set. 1772 */ 1773 if (udav->grh) { 1774 /* 1775 * If the request is for QP0, then return an error. The 1776 * combination of global routine (GRH) and QP0 is not allowed. 1777 */ 1778 if (qp->qp_is_special == HERMON_QP_SMI) { 1779 mutex_exit(&ah->ah_lock); 1780 return (IBT_AH_HDL_INVALID); 1781 } 1782 grh = (ib_grh_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t)); 1783 HERMON_WQE_BUILD_MLX_GRH(state, grh, qp, udav, pktlen); 1784 1785 bth = (ib_bth_hdr_t *)((uintptr_t)grh + sizeof (ib_grh_t)); 1786 } else { 1787 bth = (ib_bth_hdr_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t)); 1788 } 1789 mutex_exit(&ah->ah_lock); 1790 1791 1792 /* 1793 * Build Base Transport Header (BTH) 1794 * Notice that the M, PadCnt, and TVer fields are all set 1795 * to zero implicitly. This is true for all Management Datagrams 1796 * MADs whether GSI are SMI. 1797 */ 1798 HERMON_WQE_BUILD_MLX_BTH(state, bth, qp, wr); 1799 1800 /* 1801 * Build Datagram Extended Transport Header (DETH) 1802 */ 1803 deth = (ib_deth_hdr_t *)((uintptr_t)bth + sizeof (ib_bth_hdr_t)); 1804 HERMON_WQE_BUILD_MLX_DETH(deth, qp); 1805 1806 /* Ensure that the Data Segment is aligned on a 16-byte boundary */ 1807 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)deth + sizeof (ib_deth_hdr_t)); 1808 ds = (hermon_hw_wqe_sgl_t *)(((uintptr_t)ds + 0xF) & ~0xF); 1809 nds = wr->wr_nds; 1810 sgl = wr->wr_sgl; 1811 num_ds = 0; 1812 1813 /* 1814 * Now fill in the Data Segments (SGL) for the MLX WQE based on the 1815 * values set up above (i.e. "sgl", "nds", and the "ds" pointer 1816 * Start by checking for a valid number of SGL entries 1817 */ 1818 if (nds > qp->qp_sq_sgl) { 1819 return (IBT_QP_SGL_LEN_INVALID); 1820 } 1821 1822 /* 1823 * For each SGL in the Send Work Request, fill in the MLX WQE's data 1824 * segments. Note: We skip any SGL with zero size because Hermon 1825 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1826 * the encoding for zero means a 2GB transfer. Because of this special 1827 * encoding in the hardware, we mask the requested length with 1828 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1829 * zero.) 1830 */ 1831 mgmtclass = hpoint = hcount = NULL; 1832 offset = 0; 1833 for (i = 0; i < nds; i++) { 1834 if (sgl[i].ds_len == 0) { 1835 continue; 1836 } 1837 1838 /* 1839 * Fill in the Data Segment(s) for the MLX send WQE, using 1840 * the information contained in the scatter-gather list of 1841 * the work request. 1842 */ 1843 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], &sgl[i]); 1844 1845 /* 1846 * Search through the contents of all MADs posted to QP0 to 1847 * initialize pointers to the places where Directed Route "hop 1848 * pointer", "hop count", and "mgmtclass" would be. Hermon 1849 * needs these updated (i.e. incremented or decremented, as 1850 * necessary) by software. 1851 */ 1852 if (qp->qp_is_special == HERMON_QP_SMI) { 1853 1854 HERMON_SPECIAL_QP_DRMAD_GET_MGMTCLASS(mgmtclass, 1855 offset, sgl[i].ds_va, sgl[i].ds_len); 1856 1857 HERMON_SPECIAL_QP_DRMAD_GET_HOPPOINTER(hpoint, 1858 offset, sgl[i].ds_va, sgl[i].ds_len); 1859 1860 HERMON_SPECIAL_QP_DRMAD_GET_HOPCOUNT(hcount, 1861 offset, sgl[i].ds_va, sgl[i].ds_len); 1862 1863 offset += sgl[i].ds_len; 1864 } 1865 num_ds++; 1866 } 1867 1868 /* 1869 * Hermon's Directed Route MADs need to have the "hop pointer" 1870 * incremented/decremented (as necessary) depending on whether it is 1871 * currently less than or greater than the "hop count" (i.e. whether 1872 * the MAD is a request or a response.) 1873 */ 1874 if (qp->qp_is_special == HERMON_QP_SMI) { 1875 HERMON_SPECIAL_QP_DRMAD_DO_HOPPOINTER_MODIFY(*mgmtclass, 1876 *hpoint, *hcount); 1877 } 1878 1879 /* 1880 * Now fill in the ICRC Data Segment. This data segment is inlined 1881 * just like the packets headers above, but it is only four bytes and 1882 * set to zero (to indicate that we wish the hardware to generate ICRC. 1883 */ 1884 HERMON_WQE_BUILD_INLINE_ICRC(qp, &ds[num_ds], 4, 0); 1885 num_ds++; 1886 1887 /* 1888 * Return the size of descriptor (in 16-byte chunks) 1889 * For Hermon, we want them (for now) to be on stride size 1890 * boundaries, which was implicit in Tavor/Arbel 1891 */ 1892 tmpsize = ((uintptr_t)&ds[num_ds] - (uintptr_t)desc); 1893 1894 *size = tmpsize >> 0x04; 1895 1896 return (DDI_SUCCESS); 1897 } 1898 1899 1900 1901 /* 1902 * hermon_wqe_recv_build() 1903 * Context: Can be called from interrupt or base context. 1904 */ 1905 /* ARGSUSED */ 1906 static int 1907 hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp, 1908 ibt_recv_wr_t *wr, uint64_t *desc) 1909 { 1910 hermon_hw_wqe_sgl_t *ds; 1911 int i, num_ds; 1912 1913 ASSERT(MUTEX_HELD(&qp->qp_lock)); 1914 1915 /* 1916 * Fill in the Data Segments (SGL) for the Recv WQE - don't 1917 * need to have a reserved for the ctrl, there is none on the 1918 * recv queue for hermon, but will need to put an invalid 1919 * (null) scatter pointer per PRM 1920 */ 1921 ds = (hermon_hw_wqe_sgl_t *)(uintptr_t)desc; 1922 num_ds = 0; 1923 1924 /* Check for valid number of SGL entries */ 1925 if (wr->wr_nds > qp->qp_rq_sgl) { 1926 return (IBT_QP_SGL_LEN_INVALID); 1927 } 1928 1929 /* 1930 * For each SGL in the Recv Work Request, fill in the Recv WQE's data 1931 * segments. Note: We skip any SGL with zero size because Hermon 1932 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1933 * the encoding for zero means a 2GB transfer. Because of this special 1934 * encoding in the hardware, we mask the requested length with 1935 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1936 * zero.) 1937 */ 1938 for (i = 0; i < wr->wr_nds; i++) { 1939 if (wr->wr_sgl[i].ds_len == 0) { 1940 continue; 1941 } 1942 1943 /* 1944 * Fill in the Data Segment(s) for the receive WQE, using the 1945 * information contained in the scatter-gather list of the 1946 * work request. 1947 */ 1948 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]); 1949 num_ds++; 1950 } 1951 1952 /* put the null sgl pointer as well if needed */ 1953 if (num_ds < qp->qp_rq_sgl) { 1954 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl); 1955 } 1956 1957 return (DDI_SUCCESS); 1958 } 1959 1960 1961 1962 /* 1963 * hermon_wqe_srq_build() 1964 * Context: Can be called from interrupt or base context. 1965 */ 1966 /* ARGSUSED */ 1967 static int 1968 hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq, 1969 ibt_recv_wr_t *wr, uint64_t *desc) 1970 { 1971 hermon_hw_wqe_sgl_t *ds; 1972 int i, num_ds; 1973 1974 ASSERT(MUTEX_HELD(&srq->srq_lock)); 1975 1976 /* Fill in the Data Segments (SGL) for the Recv WQE */ 1977 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc + 1978 sizeof (hermon_hw_srq_wqe_next_t)); 1979 num_ds = 0; 1980 1981 /* Check for valid number of SGL entries */ 1982 if (wr->wr_nds > srq->srq_wq_sgl) { 1983 return (IBT_QP_SGL_LEN_INVALID); 1984 } 1985 1986 /* 1987 * For each SGL in the Recv Work Request, fill in the Recv WQE's data 1988 * segments. Note: We skip any SGL with zero size because Hermon 1989 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually 1990 * the encoding for zero means a 2GB transfer. Because of this special 1991 * encoding in the hardware, we mask the requested length with 1992 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as 1993 * zero.) 1994 */ 1995 for (i = 0; i < wr->wr_nds; i++) { 1996 if (wr->wr_sgl[i].ds_len == 0) { 1997 continue; 1998 } 1999 2000 /* 2001 * Fill in the Data Segment(s) for the receive WQE, using the 2002 * information contained in the scatter-gather list of the 2003 * work request. 2004 */ 2005 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]); 2006 num_ds++; 2007 } 2008 2009 /* 2010 * put in the null sgl pointer as well, if needed 2011 */ 2012 if (num_ds < srq->srq_wq_sgl) { 2013 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl); 2014 } 2015 2016 return (DDI_SUCCESS); 2017 } 2018 2019 2020 /* 2021 * hermon_wr_get_immediate() 2022 * Context: Can be called from interrupt or base context. 2023 */ 2024 static uint32_t 2025 hermon_wr_get_immediate(ibt_send_wr_t *wr) 2026 { 2027 /* 2028 * This routine extracts the "immediate data" from the appropriate 2029 * location in the IBTF work request. Because of the way the 2030 * work request structure is defined, the location for this data 2031 * depends on the actual work request operation type. 2032 */ 2033 2034 /* For RDMA Write, test if RC or UC */ 2035 if (wr->wr_opcode == IBT_WRC_RDMAW) { 2036 if (wr->wr_trans == IBT_RC_SRV) { 2037 return (wr->wr.rc.rcwr.rdma.rdma_immed); 2038 } else { /* IBT_UC_SRV */ 2039 return (wr->wr.uc.ucwr.rdma.rdma_immed); 2040 } 2041 } 2042 2043 /* For Send, test if RC, UD, or UC */ 2044 if (wr->wr_opcode == IBT_WRC_SEND) { 2045 if (wr->wr_trans == IBT_RC_SRV) { 2046 return (wr->wr.rc.rcwr.send_immed); 2047 } else if (wr->wr_trans == IBT_UD_SRV) { 2048 return (wr->wr.ud.udwr_immed); 2049 } else { /* IBT_UC_SRV */ 2050 return (wr->wr.uc.ucwr.send_immed); 2051 } 2052 } 2053 2054 /* 2055 * If any other type of request, then immediate is undefined 2056 */ 2057 return (0); 2058 } 2059 2060 /* 2061 * hermon_wqe_headroom() 2062 * Context: can be called from interrupt or base, currently only from 2063 * base context. 2064 * Routine that fills in the headroom for the Send Queue 2065 */ 2066 2067 static void 2068 hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp) 2069 { 2070 uint32_t *wqe_start, *wqe_top, *wqe_base, qsize; 2071 int hdrmwqes, wqesizebytes, sectperwqe; 2072 uint32_t invalue; 2073 int i, j; 2074 2075 qsize = qp->qp_sq_bufsz; 2076 wqesizebytes = 1 << qp->qp_sq_log_wqesz; 2077 sectperwqe = wqesizebytes >> 6; /* 64 bytes/section */ 2078 hdrmwqes = qp->qp_sq_hdrmwqes; 2079 wqe_base = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 0); 2080 wqe_top = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, qsize); 2081 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, from); 2082 2083 for (i = 0; i < hdrmwqes; i++) { 2084 for (j = 0; j < sectperwqe; j++) { 2085 if (j == 0) { /* 1st section of wqe */ 2086 /* perserve ownership bit */ 2087 invalue = ddi_get32(qp->qp_wqinfo.qa_acchdl, 2088 wqe_start) | 0x7FFFFFFF; 2089 } else { 2090 /* or just invalidate it */ 2091 invalue = 0xFFFFFFFF; 2092 } 2093 ddi_put32(qp->qp_wqinfo.qa_acchdl, wqe_start, invalue); 2094 wqe_start += 16; /* move 64 bytes */ 2095 } 2096 if (wqe_start == wqe_top) /* hit the end of the queue */ 2097 wqe_start = wqe_base; /* wrap to start */ 2098 } 2099 } 2100 2101 /* 2102 * hermon_wr_bind_check() 2103 * Context: Can be called from interrupt or base context. 2104 */ 2105 /* ARGSUSED */ 2106 static int 2107 hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr) 2108 { 2109 ibt_bind_flags_t bind_flags; 2110 uint64_t vaddr, len; 2111 uint64_t reg_start_addr, reg_end_addr; 2112 hermon_mwhdl_t mw; 2113 hermon_mrhdl_t mr; 2114 hermon_rsrc_t *mpt; 2115 uint32_t new_rkey; 2116 2117 /* Check for a valid Memory Window handle in the WR */ 2118 mw = (hermon_mwhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mw_hdl; 2119 if (mw == NULL) { 2120 return (IBT_MW_HDL_INVALID); 2121 } 2122 2123 /* Check for a valid Memory Region handle in the WR */ 2124 mr = (hermon_mrhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mr_hdl; 2125 if (mr == NULL) { 2126 return (IBT_MR_HDL_INVALID); 2127 } 2128 2129 mutex_enter(&mr->mr_lock); 2130 mutex_enter(&mw->mr_lock); 2131 2132 /* 2133 * Check here to see if the memory region has already been partially 2134 * deregistered as a result of a hermon_umap_umemlock_cb() callback. 2135 * If so, this is an error, return failure. 2136 */ 2137 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 2138 mutex_exit(&mr->mr_lock); 2139 mutex_exit(&mw->mr_lock); 2140 return (IBT_MR_HDL_INVALID); 2141 } 2142 2143 /* Check for a valid Memory Window RKey (i.e. a matching RKey) */ 2144 if (mw->mr_rkey != wr->wr.rc.rcwr.bind->bind_rkey) { 2145 mutex_exit(&mr->mr_lock); 2146 mutex_exit(&mw->mr_lock); 2147 return (IBT_MR_RKEY_INVALID); 2148 } 2149 2150 /* Check for a valid Memory Region LKey (i.e. a matching LKey) */ 2151 if (mr->mr_lkey != wr->wr.rc.rcwr.bind->bind_lkey) { 2152 mutex_exit(&mr->mr_lock); 2153 mutex_exit(&mw->mr_lock); 2154 return (IBT_MR_LKEY_INVALID); 2155 } 2156 2157 /* 2158 * Now check for valid "vaddr" and "len". Note: We don't check the 2159 * "vaddr" range when "len == 0" (i.e. on unbind operations) 2160 */ 2161 len = wr->wr.rc.rcwr.bind->bind_len; 2162 if (len != 0) { 2163 vaddr = wr->wr.rc.rcwr.bind->bind_va; 2164 reg_start_addr = mr->mr_bindinfo.bi_addr; 2165 reg_end_addr = mr->mr_bindinfo.bi_addr + 2166 (mr->mr_bindinfo.bi_len - 1); 2167 if ((vaddr < reg_start_addr) || (vaddr > reg_end_addr)) { 2168 mutex_exit(&mr->mr_lock); 2169 mutex_exit(&mw->mr_lock); 2170 return (IBT_MR_VA_INVALID); 2171 } 2172 vaddr = (vaddr + len) - 1; 2173 if (vaddr > reg_end_addr) { 2174 mutex_exit(&mr->mr_lock); 2175 mutex_exit(&mw->mr_lock); 2176 return (IBT_MR_LEN_INVALID); 2177 } 2178 } 2179 2180 /* 2181 * Validate the bind access flags. Remote Write and Atomic access for 2182 * the Memory Window require that Local Write access be set in the 2183 * corresponding Memory Region. 2184 */ 2185 bind_flags = wr->wr.rc.rcwr.bind->bind_flags; 2186 if (((bind_flags & IBT_WR_BIND_WRITE) || 2187 (bind_flags & IBT_WR_BIND_ATOMIC)) && 2188 !(mr->mr_accflag & IBT_MR_LOCAL_WRITE)) { 2189 mutex_exit(&mr->mr_lock); 2190 mutex_exit(&mw->mr_lock); 2191 return (IBT_MR_ACCESS_REQ_INVALID); 2192 } 2193 2194 /* Calculate the new RKey for the Memory Window */ 2195 mpt = mw->mr_mptrsrcp; 2196 new_rkey = hermon_mr_keycalc(mpt->hr_indx); 2197 new_rkey = hermon_mr_key_swap(new_rkey); 2198 2199 wr->wr.rc.rcwr.bind->bind_rkey_out = new_rkey; 2200 mw->mr_rkey = new_rkey; 2201 2202 mutex_exit(&mr->mr_lock); 2203 mutex_exit(&mw->mr_lock); 2204 return (DDI_SUCCESS); 2205 } 2206 2207 2208 /* 2209 * hermon_wrid_from_reset_handling() 2210 * Context: Can be called from interrupt or base context. 2211 */ 2212 /* ARGSUSED */ 2213 int 2214 hermon_wrid_from_reset_handling(hermon_state_t *state, hermon_qphdl_t qp) 2215 { 2216 hermon_workq_hdr_t *swq, *rwq; 2217 2218 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) 2219 return (DDI_SUCCESS); 2220 2221 /* grab the cq lock(s) to modify the wqavl tree */ 2222 if (qp->qp_rq_cqhdl) 2223 mutex_enter(&qp->qp_rq_cqhdl->cq_lock); 2224 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl && 2225 qp->qp_sq_cqhdl != NULL) 2226 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2227 2228 /* Chain the newly allocated work queue header to the CQ's list */ 2229 if (qp->qp_sq_cqhdl) 2230 hermon_cq_workq_add(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); 2231 2232 swq = qp->qp_sq_wqhdr; 2233 swq->wq_head = 0; 2234 swq->wq_tail = 0; 2235 swq->wq_full = 0; 2236 2237 /* 2238 * Now we repeat all the above operations for the receive work queue, 2239 * or shared receive work queue. 2240 * 2241 * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case. 2242 */ 2243 2244 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { 2245 mutex_enter(&qp->qp_srqhdl->srq_lock); 2246 } else { 2247 rwq = qp->qp_rq_wqhdr; 2248 rwq->wq_head = 0; 2249 rwq->wq_tail = 0; 2250 rwq->wq_full = 0; 2251 qp->qp_rq_wqecntr = 0; 2252 } 2253 hermon_cq_workq_add(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl); 2254 2255 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { 2256 mutex_exit(&qp->qp_srqhdl->srq_lock); 2257 } 2258 2259 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl && 2260 qp->qp_sq_cqhdl != NULL) 2261 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2262 if (qp->qp_rq_cqhdl) 2263 mutex_exit(&qp->qp_rq_cqhdl->cq_lock); 2264 return (DDI_SUCCESS); 2265 } 2266 2267 2268 /* 2269 * hermon_wrid_to_reset_handling() 2270 * Context: Can be called from interrupt or base context. 2271 */ 2272 int 2273 hermon_wrid_to_reset_handling(hermon_state_t *state, hermon_qphdl_t qp) 2274 { 2275 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) 2276 return (DDI_SUCCESS); 2277 2278 /* 2279 * If there are unpolled entries in these CQs, they are 2280 * polled/flushed. 2281 * Grab the CQ lock(s) before manipulating the lists. 2282 */ 2283 /* grab the cq lock(s) to modify the wqavl tree */ 2284 if (qp->qp_rq_cqhdl) 2285 mutex_enter(&qp->qp_rq_cqhdl->cq_lock); 2286 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl && 2287 qp->qp_sq_cqhdl != NULL) 2288 mutex_enter(&qp->qp_sq_cqhdl->cq_lock); 2289 2290 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { 2291 mutex_enter(&qp->qp_srqhdl->srq_lock); 2292 } 2293 /* 2294 * Flush the entries on the CQ for this QP's QPN. 2295 */ 2296 hermon_cq_entries_flush(state, qp); 2297 2298 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) { 2299 mutex_exit(&qp->qp_srqhdl->srq_lock); 2300 } 2301 2302 hermon_cq_workq_remove(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl); 2303 if (qp->qp_sq_cqhdl != NULL) 2304 hermon_cq_workq_remove(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl); 2305 2306 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl && 2307 qp->qp_sq_cqhdl != NULL) 2308 mutex_exit(&qp->qp_sq_cqhdl->cq_lock); 2309 if (qp->qp_rq_cqhdl) 2310 mutex_exit(&qp->qp_rq_cqhdl->cq_lock); 2311 2312 return (IBT_SUCCESS); 2313 } 2314 2315 2316 /* 2317 * hermon_wrid_get_entry() 2318 * Context: Can be called from interrupt or base context. 2319 */ 2320 uint64_t 2321 hermon_wrid_get_entry(hermon_cqhdl_t cq, hermon_hw_cqe_t *cqe) 2322 { 2323 hermon_workq_avl_t *wqa; 2324 hermon_workq_hdr_t *wq; 2325 uint64_t wrid; 2326 uint_t send_or_recv, qpnum; 2327 uint32_t indx; 2328 2329 /* 2330 * Determine whether this CQE is a send or receive completion. 2331 */ 2332 send_or_recv = HERMON_CQE_SENDRECV_GET(cq, cqe); 2333 2334 /* Find the work queue for this QP number (send or receive side) */ 2335 qpnum = HERMON_CQE_QPNUM_GET(cq, cqe); 2336 wqa = hermon_wrid_wqavl_find(cq, qpnum, send_or_recv); 2337 wq = wqa->wqa_wq; 2338 2339 /* 2340 * Regardless of whether the completion is the result of a "success" 2341 * or a "failure", we lock the list of "containers" and attempt to 2342 * search for the the first matching completion (i.e. the first WR 2343 * with a matching WQE addr and size). Once we find it, we pull out 2344 * the "wrid" field and return it (see below). XXX Note: One possible 2345 * future enhancement would be to enable this routine to skip over 2346 * any "unsignaled" completions to go directly to the next "signaled" 2347 * entry on success. 2348 */ 2349 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) & wq->wq_mask; 2350 wrid = wq->wq_wrid[indx]; 2351 if (wqa->wqa_srq_en) { 2352 struct hermon_sw_srq_s *srq; 2353 uint64_t *desc; 2354 2355 /* put wqe back on the srq free list */ 2356 srq = wqa->wqa_srq; 2357 mutex_enter(&srq->srq_lock); 2358 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail); 2359 ((uint16_t *)desc)[1] = htons(indx); 2360 wq->wq_tail = indx; 2361 mutex_exit(&srq->srq_lock); 2362 } else { 2363 wq->wq_head = (indx + 1) & wq->wq_mask; 2364 wq->wq_full = 0; 2365 } 2366 2367 return (wrid); 2368 } 2369 2370 2371 int 2372 hermon_wrid_workq_compare(const void *p1, const void *p2) 2373 { 2374 hermon_workq_compare_t *cmpp; 2375 hermon_workq_avl_t *curr; 2376 2377 cmpp = (hermon_workq_compare_t *)p1; 2378 curr = (hermon_workq_avl_t *)p2; 2379 2380 if (cmpp->cmp_qpn < curr->wqa_qpn) 2381 return (-1); 2382 else if (cmpp->cmp_qpn > curr->wqa_qpn) 2383 return (+1); 2384 else if (cmpp->cmp_type < curr->wqa_type) 2385 return (-1); 2386 else if (cmpp->cmp_type > curr->wqa_type) 2387 return (+1); 2388 else 2389 return (0); 2390 } 2391 2392 2393 /* 2394 * hermon_wrid_workq_find() 2395 * Context: Can be called from interrupt or base context. 2396 */ 2397 static hermon_workq_avl_t * 2398 hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, uint_t wq_type) 2399 { 2400 hermon_workq_avl_t *curr; 2401 hermon_workq_compare_t cmp; 2402 2403 /* 2404 * Walk the CQ's work queue list, trying to find a send or recv queue 2405 * with the same QP number. We do this even if we are going to later 2406 * create a new entry because it helps us easily find the end of the 2407 * list. 2408 */ 2409 cmp.cmp_qpn = qpn; 2410 cmp.cmp_type = wq_type; 2411 curr = avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, NULL); 2412 2413 return (curr); 2414 } 2415 2416 2417 /* 2418 * hermon_wrid_wqhdr_create() 2419 * Context: Can be called from base context. 2420 */ 2421 /* ARGSUSED */ 2422 hermon_workq_hdr_t * 2423 hermon_wrid_wqhdr_create(int bufsz) 2424 { 2425 hermon_workq_hdr_t *wqhdr; 2426 2427 /* 2428 * Allocate space for the wqhdr, and an array to record all the wrids. 2429 */ 2430 wqhdr = (hermon_workq_hdr_t *)kmem_zalloc(sizeof (*wqhdr), KM_NOSLEEP); 2431 if (wqhdr == NULL) { 2432 return (NULL); 2433 } 2434 wqhdr->wq_wrid = kmem_zalloc(bufsz * sizeof (uint64_t), KM_NOSLEEP); 2435 if (wqhdr->wq_wrid == NULL) { 2436 kmem_free(wqhdr, sizeof (*wqhdr)); 2437 return (NULL); 2438 } 2439 wqhdr->wq_size = bufsz; 2440 wqhdr->wq_mask = bufsz - 1; 2441 2442 return (wqhdr); 2443 } 2444 2445 void 2446 hermon_wrid_wqhdr_destroy(hermon_workq_hdr_t *wqhdr) 2447 { 2448 kmem_free(wqhdr->wq_wrid, wqhdr->wq_size * sizeof (uint64_t)); 2449 kmem_free(wqhdr, sizeof (*wqhdr)); 2450 } 2451 2452 2453 /* 2454 * hermon_cq_workq_add() 2455 * Context: Can be called from interrupt or base context. 2456 */ 2457 static void 2458 hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl) 2459 { 2460 hermon_workq_compare_t cmp; 2461 avl_index_t where; 2462 2463 cmp.cmp_qpn = wqavl->wqa_qpn; 2464 cmp.cmp_type = wqavl->wqa_type; 2465 (void) avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, &where); 2466 avl_insert(&cq->cq_wrid_wqhdr_avl_tree, wqavl, where); 2467 } 2468 2469 2470 /* 2471 * hermon_cq_workq_remove() 2472 * Context: Can be called from interrupt or base context. 2473 */ 2474 static void 2475 hermon_cq_workq_remove(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl) 2476 { 2477 avl_remove(&cq->cq_wrid_wqhdr_avl_tree, wqavl); 2478 }