1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * hermon_wr.c
28 * Hermon Work Request Processing Routines
29 *
30 * Implements all the routines necessary to provide the PostSend(),
31 * PostRecv() and PostSRQ() verbs. Also contains all the code
32 * necessary to implement the Hermon WRID tracking mechanism.
33 */
34
35 #include <sys/types.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/modctl.h>
40 #include <sys/avl.h>
41
42 #include <sys/ib/adapters/hermon/hermon.h>
43
44 static uint32_t hermon_wr_get_immediate(ibt_send_wr_t *wr);
45 static int hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr);
46 static int hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp,
47 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size);
48 static int hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp,
49 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size);
50 static void hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp);
51 static int hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp,
52 ibt_recv_wr_t *wr, uint64_t *desc);
53 static int hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq,
54 ibt_recv_wr_t *wr, uint64_t *desc);
55 static hermon_workq_avl_t *hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn,
56 uint_t send_or_recv);
57 static void hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl);
58 static void hermon_cq_workq_remove(hermon_cqhdl_t cq,
59 hermon_workq_avl_t *wqavl);
60
61 static ibt_wr_ds_t null_sgl = { 0, 0x00000100, 0 };
62
63 /*
64 * Add ability to try to debug RDMA_READ/RDMA_WRITE failures.
65 *
66 * 0x1 - print rkey used during post_send
67 * 0x2 - print sgls used during post_send
68 * 0x4 - print FMR comings and goings
69 */
70 int hermon_rdma_debug = 0x0;
71
72 static int
73 hermon_post_send_ud(hermon_state_t *state, hermon_qphdl_t qp,
74 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted)
75 {
76 hermon_hw_snd_wqe_ud_t *ud;
77 hermon_workq_hdr_t *wq;
78 hermon_ahhdl_t ah;
79 ibt_wr_rfci_send_t *rfci;
80 ibt_wr_init_send_t *is;
81 ibt_ud_dest_t *dest;
82 uint64_t *desc;
83 uint32_t desc_sz;
84 uint32_t signaled_dbd, solicited;
85 uint32_t head, tail, next_tail, qsize_msk;
86 uint32_t hdrmwqes;
87 uint32_t nopcode, fence, immed_data = 0;
88 hermon_hw_wqe_sgl_t *ds, *old_ds;
89 ibt_wr_ds_t *sgl;
90 int nds;
91 int i, j, last_ds, num_ds, status;
92 uint32_t *wqe_start;
93 int sectperwqe;
94 uint_t posted_cnt = 0;
95 int total_len, strong_order, fc_bits, cksum;
96
97
98 /* initialize the FMA retry loop */
99 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
100
101 ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
102
103 /* Grab the lock for the WRID list */
104 membar_consumer();
105
106 /* Save away some initial QP state */
107 wq = qp->qp_sq_wqhdr;
108 qsize_msk = wq->wq_mask;
109 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */
110 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2);
111
112 tail = wq->wq_tail;
113 head = wq->wq_head;
114 status = DDI_SUCCESS;
115
116 post_next:
117 /*
118 * Check for "queue full" condition. If the queue
119 * is already full, then no more WQEs can be posted.
120 * So break out, ring a doorbell (if necessary) and
121 * return an error
122 */
123 if (wq->wq_full != 0) {
124 status = IBT_QP_FULL;
125 goto done;
126 }
127
128 next_tail = (tail + 1) & qsize_msk;
129 if (((tail + hdrmwqes) & qsize_msk) == head) {
130 wq->wq_full = 1;
131 }
132
133 desc = HERMON_QP_SQ_ENTRY(qp, tail);
134
135 nds = wr->wr_nds;
136 sgl = wr->wr_sgl;
137 num_ds = 0;
138 strong_order = 0;
139 fc_bits = 0;
140 cksum = 0;
141
142 /*
143 * Build a Send or Send_LSO WQE
144 */
145 switch (wr->wr_opcode) {
146 case IBT_WRC_SEND_LSO:
147 if (wr->wr_trans != IBT_UD_SRV) {
148 status = IBT_QP_SRV_TYPE_INVALID;
149 goto done;
150 }
151 nopcode = HERMON_WQE_SEND_NOPCODE_LSO;
152 if (wr->wr_flags & IBT_WR_SEND_CKSUM)
153 cksum = 0x30;
154 if (wr->wr.ud_lso.lso_hdr_sz > 60) {
155 nopcode |= (1 << 6); /* ReRead bit must be set */
156 }
157 dest = wr->wr.ud_lso.lso_ud_dest;
158 ah = (hermon_ahhdl_t)dest->ud_ah;
159 if (ah == NULL) {
160 status = IBT_AH_HDL_INVALID;
161 goto done;
162 }
163 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc +
164 sizeof (hermon_hw_snd_wqe_ctrl_t));
165 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud +
166 sizeof (hermon_hw_snd_wqe_ud_t));
167 HERMON_WQE_BUILD_UD(qp, ud, ah, dest);
168
169 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf;
170 if ((uintptr_t)ds + total_len + (nds * 16) >
171 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) {
172 status = IBT_QP_SGL_LEN_INVALID;
173 goto done;
174 }
175 old_ds = ds;
176 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)old_ds + 1,
177 wr->wr.ud_lso.lso_hdr_sz);
178 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len);
179 i = 0;
180 break;
181
182 case IBT_WRC_SEND:
183 nopcode = HERMON_WQE_SEND_NOPCODE_SEND;
184 if (qp->qp_serv_type == HERMON_QP_UD) {
185 if (wr->wr_trans != IBT_UD_SRV) {
186 status = IBT_QP_SRV_TYPE_INVALID;
187 goto done;
188 }
189 if (wr->wr_flags & IBT_WR_SEND_CKSUM)
190 cksum = 0x30;
191 dest = wr->wr.ud.udwr_dest;
192 } else if (qp->qp_serv_type == HERMON_QP_RFCI) {
193 if (wr->wr_trans != IBT_RFCI_SRV) {
194 status = IBT_QP_SRV_TYPE_INVALID;
195 goto done;
196 }
197 rfci = &wr->wr.fc.rfci_send;
198 if ((wr->wr_flags & IBT_WR_SEND_FC_CRC) != 0) {
199 nopcode |= (rfci->rfci_eof << 16);
200 fc_bits = 0x40; /* set FCRC */
201 }
202 dest = rfci->rfci_dest;
203 } else {
204 status = IBT_QP_OP_TYPE_INVALID;
205 goto done;
206 }
207 if (wr->wr_flags & IBT_WR_SEND_IMMED) {
208 /* "|=" changes 0xa to 0xb without touching FCEOF */
209 nopcode |= HERMON_WQE_SEND_NOPCODE_SENDI;
210 immed_data = wr->wr.ud.udwr_immed;
211 }
212 ah = (hermon_ahhdl_t)dest->ud_ah;
213 if (ah == NULL) {
214 status = IBT_AH_HDL_INVALID;
215 goto done;
216 }
217 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc +
218 sizeof (hermon_hw_snd_wqe_ctrl_t));
219 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud +
220 sizeof (hermon_hw_snd_wqe_ud_t));
221 HERMON_WQE_BUILD_UD(qp, ud, ah, dest);
222 i = 0;
223 break;
224
225 case IBT_WRC_INIT_SEND_FCMD:
226 if (qp->qp_serv_type != HERMON_QP_FCMND) {
227 status = IBT_QP_OP_TYPE_INVALID;
228 goto done;
229 }
230 if (wr->wr_trans != IBT_FCMD_SRV) {
231 status = IBT_QP_SRV_TYPE_INVALID;
232 goto done;
233 }
234 nopcode = HERMON_WQE_FCP_OPCODE_INIT_AND_SEND;
235 is = wr->wr.fc.fc_is;
236 dest = is->is_ctl.fc_dest;
237 ah = (hermon_ahhdl_t)dest->ud_ah;
238 if (ah == NULL) {
239 status = IBT_AH_HDL_INVALID;
240 goto done;
241 }
242 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc +
243 sizeof (hermon_hw_snd_wqe_ctrl_t));
244 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud +
245 sizeof (hermon_hw_snd_wqe_ud_t));
246 HERMON_WQE_BUILD_UD(qp, ud, ah, dest);
247 old_ds = ds;
248 /* move ds beyond the FCP-3 Init Segment */
249 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + 0x10);
250 i = 0;
251 break;
252
253 case IBT_WRC_FAST_REG_PMR:
254 {
255 hermon_hw_snd_wqe_frwr_t *frwr;
256
257 if (qp->qp_serv_type != HERMON_QP_FCMND) {
258 status = IBT_QP_OP_TYPE_INVALID;
259 goto done;
260 }
261 if (wr->wr_trans != IBT_FCMD_SRV) {
262 status = IBT_QP_SRV_TYPE_INVALID;
263 goto done;
264 }
265 nopcode = HERMON_WQE_SEND_NOPCODE_FRWR;
266 frwr = (hermon_hw_snd_wqe_frwr_t *)((uintptr_t)desc +
267 sizeof (hermon_hw_snd_wqe_ctrl_t));
268 HERMON_WQE_BUILD_FRWR(qp, frwr, wr->wr.fc.reg_pmr);
269 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)frwr +
270 sizeof (hermon_hw_snd_wqe_frwr_t));
271 nds = 0;
272 strong_order = 0x80;
273 break;
274 }
275
276 #if 0
277 /* firmware does not support this */
278 case IBT_WRC_LOCAL_INVALIDATE:
279 {
280 hermon_hw_snd_wqe_local_inv_t *li;
281
282 if (qp->qp_serv_type != HERMON_QP_FCMND) {
283 status = IBT_QP_OP_TYPE_INVALID;
284 goto done;
285 }
286 if (wr->wr_trans != IBT_FCMD_SRV) {
287 status = IBT_QP_SRV_TYPE_INVALID;
288 goto done;
289 }
290 nopcode = HERMON_WQE_SEND_NOPCODE_LCL_INV;
291 li = (hermon_hw_snd_wqe_local_inv_t *)((uintptr_t)desc +
292 sizeof (hermon_hw_snd_wqe_ctrl_t));
293 HERMON_WQE_BUILD_LI(qp, li, wr->wr.fc.li);
294 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)li +
295 sizeof (hermon_hw_snd_wqe_local_inv_t));
296 nds = 0;
297 strong_order = 0x80;
298 break;
299 }
300 #endif
301 default:
302 status = IBT_QP_OP_TYPE_INVALID;
303 goto done;
304 }
305
306 if (nds > qp->qp_sq_sgl) {
307 status = IBT_QP_SGL_LEN_INVALID;
308 goto done;
309 }
310 for (last_ds = num_ds, j = i; j < nds; j++) {
311 if (sgl[j].ds_len != 0)
312 last_ds++; /* real last ds of wqe to fill */
313 }
314 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4;
315 for (j = nds; --j >= i; ) {
316 if (sgl[j].ds_len == 0) {
317 continue;
318 }
319
320 /*
321 * Fill in the Data Segment(s) for the current WQE, using the
322 * information contained in the scatter-gather list of the
323 * work request.
324 */
325 last_ds--;
326 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]);
327 }
328
329 membar_producer();
330
331 if (wr->wr_opcode == IBT_WRC_SEND_LSO) {
332 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss,
333 wr->wr.ud_lso.lso_hdr_sz);
334 } else if (wr->wr_opcode == IBT_WRC_INIT_SEND_FCMD) {
335 /* This sits in the STAMP, so must be set after setting SGL */
336 HERMON_WQE_BUILD_FCP3_INIT(old_ds, is->is_ctl.fc_frame_ctrl,
337 is->is_cs_priority, is->is_tx_seq_id, is->is_fc_mtu,
338 is->is_dest_id, is->is_op, is->is_rem_exch,
339 is->is_exch_qp_idx);
340
341 /* The following will be used in HERMON_WQE_SET_CTRL_SEGMENT */
342 /* SIT bit in FCP-3 ctrl segment */
343 desc_sz |= (is->is_ctl.fc_frame_ctrl & IBT_FCTL_SIT) ? 0x80 : 0;
344 /* LS bit in FCP-3 ctrl segment */
345 fc_bits |= (is->is_ctl.fc_frame_ctrl & IBT_FCTL_LAST_SEQ) ?
346 0x10000 : 0;
347 fc_bits |= ((is->is_ctl.fc_routing_ctrl & 0xF) << 20) |
348 (is->is_ctl.fc_seq_id << 24);
349 immed_data = is->is_ctl.fc_parameter;
350 }
351
352 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0;
353
354 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) ||
355 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 0xC : 0;
356
357 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 0x2 : 0;
358
359 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data,
360 solicited, signaled_dbd, cksum, qp, strong_order, fc_bits);
361
362 wq->wq_wrid[tail] = wr->wr_id;
363
364 tail = next_tail;
365
366 /* Update some of the state in the QP */
367 wq->wq_tail = tail;
368
369 membar_producer();
370
371 /* Now set the ownership bit and opcode (first dword). */
372 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode);
373
374 posted_cnt++;
375 if (--num_wr > 0) {
376 /* do the invalidate of the headroom */
377 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
378 (tail + hdrmwqes) & qsize_msk);
379 for (i = 16; i < sectperwqe; i += 16) {
380 wqe_start[i] = 0xFFFFFFFF;
381 }
382
383 wr++;
384 goto post_next;
385 }
386 done:
387 if (posted_cnt != 0) {
388 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
389
390 membar_producer();
391
392 /* the FMA retry loop starts for Hermon doorbell register. */
393 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt,
394 fm_status, fm_test_num);
395
396 HERMON_UAR_DOORBELL(state, uarhdl,
397 (uint64_t *)(void *)&state->hs_uar->send,
398 (uint64_t)qp->qp_ring);
399
400 /* the FMA retry loop ends. */
401 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt,
402 fm_status, fm_test_num);
403
404 /* do the invalidate of the headroom */
405 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
406 (tail + hdrmwqes) & qsize_msk);
407 for (i = 16; i < sectperwqe; i += 16) {
408 wqe_start[i] = 0xFFFFFFFF;
409 }
410 }
411 if (num_posted != NULL)
412 *num_posted = posted_cnt;
413
414 mutex_exit(&qp->qp_sq_lock);
415
416 return (status);
417
418 pio_error:
419 mutex_exit(&qp->qp_sq_lock);
420 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
421 return (ibc_get_ci_failure(0));
422 }
423
424 static int
425 hermon_post_send_rc(hermon_state_t *state, hermon_qphdl_t qp,
426 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted)
427 {
428 uint64_t *desc;
429 hermon_workq_hdr_t *wq;
430 uint32_t desc_sz;
431 uint32_t signaled_dbd, solicited;
432 uint32_t head, tail, next_tail, qsize_msk;
433 uint32_t hdrmwqes;
434 int status;
435 uint32_t nopcode, fence, immed_data = 0;
436 hermon_hw_snd_wqe_remaddr_t *rc;
437 hermon_hw_snd_wqe_atomic_t *at;
438 hermon_hw_snd_wqe_bind_t *bn;
439 hermon_hw_snd_wqe_frwr_t *frwr;
440 hermon_hw_snd_wqe_local_inv_t *li;
441 hermon_hw_wqe_sgl_t *ds;
442 ibt_wr_ds_t *sgl;
443 int nds;
444 int i, last_ds, num_ds;
445 uint32_t *wqe_start;
446 int sectperwqe;
447 uint_t posted_cnt = 0;
448 int strong_order;
449 int print_rdma;
450 int rlen;
451 uint32_t rkey;
452 uint64_t raddr;
453
454 /* initialize the FMA retry loop */
455 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
456
457 ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
458
459 /* Save away some initial QP state */
460 wq = qp->qp_sq_wqhdr;
461 qsize_msk = wq->wq_mask;
462 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */
463 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2);
464
465 tail = wq->wq_tail;
466 head = wq->wq_head;
467 status = DDI_SUCCESS;
468
469 post_next:
470 print_rdma = 0;
471 rlen = 0;
472 strong_order = 0;
473
474 /*
475 * Check for "queue full" condition. If the queue
476 * is already full, then no more WQEs can be posted.
477 * So break out, ring a doorbell (if necessary) and
478 * return an error
479 */
480 if (wq->wq_full != 0) {
481 status = IBT_QP_FULL;
482 goto done;
483 }
484 next_tail = (tail + 1) & qsize_msk;
485 if (((tail + hdrmwqes) & qsize_msk) == head) {
486 wq->wq_full = 1;
487 }
488
489 desc = HERMON_QP_SQ_ENTRY(qp, tail);
490
491 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
492 sizeof (hermon_hw_snd_wqe_ctrl_t));
493 nds = wr->wr_nds;
494 sgl = wr->wr_sgl;
495 num_ds = 0;
496 if (wr->wr_trans != IBT_RC_SRV) {
497 status = IBT_QP_SRV_TYPE_INVALID;
498 goto done;
499 }
500
501 /*
502 * Validate the operation type. For RC requests, we allow
503 * "Send", "RDMA Read", "RDMA Write", various "Atomic"
504 * operations, and memory window "Bind"
505 */
506 switch (wr->wr_opcode) {
507 default:
508 status = IBT_QP_OP_TYPE_INVALID;
509 goto done;
510
511 case IBT_WRC_SEND:
512 if (wr->wr_flags & IBT_WR_SEND_REMOTE_INVAL) {
513 nopcode = HERMON_WQE_SEND_NOPCODE_SND_INV;
514 immed_data = wr->wr.rc.rcwr.send_inval;
515 } else if (wr->wr_flags & IBT_WR_SEND_IMMED) {
516 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI;
517 immed_data = wr->wr.rc.rcwr.send_immed;
518 } else {
519 nopcode = HERMON_WQE_SEND_NOPCODE_SEND;
520 }
521 break;
522
523 /*
524 * If this is an RDMA Read or RDMA Write request, then fill
525 * in the "Remote Address" header fields.
526 */
527 case IBT_WRC_RDMAW:
528 if (wr->wr_flags & IBT_WR_SEND_IMMED) {
529 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAWI;
530 immed_data = wr->wr.rc.rcwr.rdma.rdma_immed;
531 } else {
532 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW;
533 }
534 /* FALLTHROUGH */
535 case IBT_WRC_RDMAR:
536 if (wr->wr_opcode == IBT_WRC_RDMAR)
537 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR;
538 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
539 sizeof (hermon_hw_snd_wqe_ctrl_t));
540
541 /*
542 * Build the Remote Address Segment for the WQE, using
543 * the information from the RC work request.
544 */
545 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma);
546
547 if (hermon_rdma_debug) {
548 print_rdma = hermon_rdma_debug;
549 rkey = wr->wr.rc.rcwr.rdma.rdma_rkey;
550 raddr = wr->wr.rc.rcwr.rdma.rdma_raddr;
551 }
552
553 /* Update "ds" for filling in Data Segments (below) */
554 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc +
555 sizeof (hermon_hw_snd_wqe_remaddr_t));
556 break;
557
558 /*
559 * If this is one of the Atomic type operations (i.e
560 * Compare-Swap or Fetch-Add), then fill in both the "Remote
561 * Address" header fields and the "Atomic" header fields.
562 */
563 case IBT_WRC_CSWAP:
564 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS;
565 /* FALLTHROUGH */
566 case IBT_WRC_FADD:
567 if (wr->wr_opcode == IBT_WRC_FADD)
568 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA;
569 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
570 sizeof (hermon_hw_snd_wqe_ctrl_t));
571 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc +
572 sizeof (hermon_hw_snd_wqe_remaddr_t));
573
574 /*
575 * Build the Remote Address and Atomic Segments for
576 * the WQE, using the information from the RC Atomic
577 * work request.
578 */
579 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr);
580 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic);
581
582 /* Update "ds" for filling in Data Segments (below) */
583 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at +
584 sizeof (hermon_hw_snd_wqe_atomic_t));
585
586 /*
587 * Update "nds" and "sgl" because Atomic requests have
588 * only a single Data Segment.
589 */
590 nds = 1;
591 sgl = wr->wr_sgl;
592 break;
593
594 /*
595 * If this is memory window Bind operation, then we call the
596 * hermon_wr_bind_check() routine to validate the request and
597 * to generate the updated RKey. If this is successful, then
598 * we fill in the WQE's "Bind" header fields.
599 */
600 case IBT_WRC_BIND:
601 nopcode = HERMON_WQE_SEND_NOPCODE_BIND;
602 status = hermon_wr_bind_check(state, wr);
603 if (status != DDI_SUCCESS)
604 goto done;
605
606 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc +
607 sizeof (hermon_hw_snd_wqe_ctrl_t));
608
609 /*
610 * Build the Bind Memory Window Segments for the WQE,
611 * using the information from the RC Bind memory
612 * window work request.
613 */
614 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind);
615
616 /*
617 * Update the "ds" pointer. Even though the "bind"
618 * operation requires no SGLs, this is necessary to
619 * facilitate the correct descriptor size calculations
620 * (below).
621 */
622 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn +
623 sizeof (hermon_hw_snd_wqe_bind_t));
624 nds = 0;
625 break;
626
627 case IBT_WRC_FAST_REG_PMR:
628 nopcode = HERMON_WQE_SEND_NOPCODE_FRWR;
629 frwr = (hermon_hw_snd_wqe_frwr_t *)((uintptr_t)desc +
630 sizeof (hermon_hw_snd_wqe_ctrl_t));
631 HERMON_WQE_BUILD_FRWR(qp, frwr, wr->wr.rc.rcwr.reg_pmr);
632 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)frwr +
633 sizeof (hermon_hw_snd_wqe_frwr_t));
634 nds = 0;
635 strong_order = 0x80;
636 break;
637
638 case IBT_WRC_LOCAL_INVALIDATE:
639 nopcode = HERMON_WQE_SEND_NOPCODE_LCL_INV;
640 li = (hermon_hw_snd_wqe_local_inv_t *)((uintptr_t)desc +
641 sizeof (hermon_hw_snd_wqe_ctrl_t));
642 HERMON_WQE_BUILD_LI(qp, li, wr->wr.rc.rcwr.li);
643 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)li +
644 sizeof (hermon_hw_snd_wqe_local_inv_t));
645 nds = 0;
646 strong_order = 0x80;
647 break;
648 }
649
650 /*
651 * Now fill in the Data Segments (SGL) for the Send WQE based
652 * on the values setup above (i.e. "sgl", "nds", and the "ds"
653 * pointer. Start by checking for a valid number of SGL entries
654 */
655 if (nds > qp->qp_sq_sgl) {
656 status = IBT_QP_SGL_LEN_INVALID;
657 goto done;
658 }
659
660 for (last_ds = num_ds, i = 0; i < nds; i++) {
661 if (sgl[i].ds_len != 0)
662 last_ds++; /* real last ds of wqe to fill */
663 }
664 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4;
665 for (i = nds; --i >= 0; ) {
666 if (sgl[i].ds_len == 0) {
667 continue;
668 }
669 rlen += sgl[i].ds_len;
670 if (print_rdma & 0x2)
671 IBTF_DPRINTF_L2("rdma", "post: [%d]: laddr %llx "
672 "llen %x", i, sgl[i].ds_va, sgl[i].ds_len);
673
674 /*
675 * Fill in the Data Segment(s) for the current WQE, using the
676 * information contained in the scatter-gather list of the
677 * work request.
678 */
679 last_ds--;
680 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[i]);
681 }
682 /* ensure RDMA READ does not exceed HCA limit */
683 if ((wr->wr_opcode == IBT_WRC_RDMAR) && (desc_sz >
684 state->hs_ibtfinfo.hca_attr->hca_conn_rdma_read_sgl_sz + 2)) {
685 status = IBT_QP_SGL_LEN_INVALID;
686 goto done;
687 }
688
689 if (print_rdma & 0x1) {
690 IBTF_DPRINTF_L2("rdma", "post: indx %x rkey %x raddr %llx "
691 "total len %x", tail, rkey, raddr, rlen);
692 }
693
694 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0;
695
696 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) ||
697 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 0xC : 0;
698
699 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 0x2 : 0;
700
701 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, solicited,
702 signaled_dbd, 0, qp, strong_order, 0);
703
704 wq->wq_wrid[tail] = wr->wr_id;
705
706 tail = next_tail;
707
708 /* Update some of the state in the QP */
709 wq->wq_tail = tail;
710
711 membar_producer();
712
713 /* Now set the ownership bit of the first one in the chain. */
714 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode);
715
716 posted_cnt++;
717 if (--num_wr > 0) {
718 /* do the invalidate of the headroom */
719 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
720 (tail + hdrmwqes) & qsize_msk);
721 for (i = 16; i < sectperwqe; i += 16) {
722 wqe_start[i] = 0xFFFFFFFF;
723 }
724
725 wr++;
726 goto post_next;
727 }
728 done:
729
730 if (posted_cnt != 0) {
731 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
732
733 membar_producer();
734
735 /* the FMA retry loop starts for Hermon doorbell register. */
736 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt,
737 fm_status, fm_test_num);
738
739 /* Ring the doorbell */
740 HERMON_UAR_DOORBELL(state, uarhdl,
741 (uint64_t *)(void *)&state->hs_uar->send,
742 (uint64_t)qp->qp_ring);
743
744 /* the FMA retry loop ends. */
745 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt,
746 fm_status, fm_test_num);
747
748 /* do the invalidate of the headroom */
749 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
750 (tail + hdrmwqes) & qsize_msk);
751 for (i = 16; i < sectperwqe; i += 16) {
752 wqe_start[i] = 0xFFFFFFFF;
753 }
754 }
755 /*
756 * Update the "num_posted" return value (if necessary).
757 * Then drop the locks and return success.
758 */
759 if (num_posted != NULL) {
760 *num_posted = posted_cnt;
761 }
762
763 mutex_exit(&qp->qp_sq_lock);
764 return (status);
765
766 pio_error:
767 mutex_exit(&qp->qp_sq_lock);
768 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
769 return (ibc_get_ci_failure(0));
770 }
771
772 /*
773 * hermon_post_send()
774 * Context: Can be called from interrupt or base context.
775 */
776 int
777 hermon_post_send(hermon_state_t *state, hermon_qphdl_t qp,
778 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted)
779 {
780 ibt_send_wr_t *curr_wr;
781 hermon_workq_hdr_t *wq;
782 hermon_ahhdl_t ah;
783 uint64_t *desc, *prev;
784 uint32_t desc_sz;
785 uint32_t signaled_dbd, solicited;
786 uint32_t head, tail, next_tail, qsize_msk;
787 uint32_t hdrmwqes;
788 uint_t currindx, wrindx, numremain;
789 uint_t chainlen;
790 uint_t posted_cnt, maxstat;
791 uint_t total_posted;
792 int status;
793 uint32_t nopcode, fence, immed_data = 0;
794 uint32_t prev_nopcode;
795 uint_t qp_state;
796
797 /* initialize the FMA retry loop */
798 hermon_pio_init(fm_loop_cnt, fm_status, fm_test);
799
800 /*
801 * Check for user-mappable QP memory. Note: We do not allow kernel
802 * clients to post to QP memory that is accessible directly by the
803 * user. If the QP memory is user accessible, then return an error.
804 */
805 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) {
806 return (IBT_QP_HDL_INVALID);
807 }
808
809 mutex_enter(&qp->qp_sq_lock);
810
811 /*
812 * Check QP state. Can not post Send requests from the "Reset",
813 * "Init", or "RTR" states
814 */
815 qp_state = qp->qp_state_for_post_send;
816 if ((qp_state == HERMON_QP_RESET) ||
817 (qp_state == HERMON_QP_INIT) ||
818 (qp_state == HERMON_QP_RTR)) {
819 mutex_exit(&qp->qp_sq_lock);
820 return (IBT_QP_STATE_INVALID);
821 }
822
823 if (qp->qp_is_special)
824 goto post_many;
825
826 /* Use these optimized functions most of the time */
827 if (qp->qp_type == IBT_UD_RQP) {
828 return (hermon_post_send_ud(state, qp, wr, num_wr, num_posted));
829 }
830
831 if (qp->qp_serv_type == HERMON_QP_RC) {
832 return (hermon_post_send_rc(state, qp, wr, num_wr, num_posted));
833 }
834
835 if (qp->qp_serv_type == HERMON_QP_UC)
836 goto post_many;
837
838 mutex_exit(&qp->qp_sq_lock);
839 return (IBT_QP_SRV_TYPE_INVALID);
840
841 post_many:
842 /* general loop for non-optimized posting */
843
844 /* Save away some initial QP state */
845 wq = qp->qp_sq_wqhdr;
846 qsize_msk = wq->wq_mask;
847 tail = wq->wq_tail;
848 head = wq->wq_head;
849 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */
850
851 /* Initialize posted_cnt */
852 posted_cnt = 0;
853 total_posted = 0;
854
855 /*
856 * For each ibt_send_wr_t in the wr[] list passed in, parse the
857 * request and build a Send WQE. NOTE: Because we are potentially
858 * building a chain of WQEs to post, we want to build them all first,
859 * and set the valid (HW Ownership) bit on all but the first.
860 * However, we do not want to validate the first one until the
861 * entire chain of WQEs has been built. Then in the final
862 * we set the valid bit in the first, flush if needed, and as a last
863 * step ring the appropriate doorbell. NOTE: the doorbell ring may
864 * NOT be needed if the HCA is already processing, but the doorbell
865 * ring will be done regardless. NOTE ALSO: It is possible for
866 * more Work Requests to be posted than the HW will support at one
867 * shot. If this happens, we need to be able to post and ring
868 * several chains here until the the entire request is complete.
869 * NOTE ALSO: the term "chain" is used to differentiate it from
870 * Work Request List passed in; and because that's the terminology
871 * from the previous generations of HCA - but the WQEs are not, in fact
872 * chained together for Hermon
873 */
874
875 wrindx = 0;
876 numremain = num_wr;
877 status = DDI_SUCCESS;
878 while ((wrindx < num_wr) && (status == DDI_SUCCESS)) {
879 /*
880 * For the first WQE on a new chain we need "prev" to point
881 * to the current descriptor.
882 */
883 prev = HERMON_QP_SQ_ENTRY(qp, tail);
884
885 /*
886 * Break the request up into lists that are less than or
887 * equal to the maximum number of WQEs that can be posted
888 * per doorbell ring - 256 currently
889 */
890 chainlen = (numremain > HERMON_QP_MAXDESC_PER_DB) ?
891 HERMON_QP_MAXDESC_PER_DB : numremain;
892 numremain -= chainlen;
893
894 for (currindx = 0; currindx < chainlen; currindx++, wrindx++) {
895 /*
896 * Check for "queue full" condition. If the queue
897 * is already full, then no more WQEs can be posted.
898 * So break out, ring a doorbell (if necessary) and
899 * return an error
900 */
901 if (wq->wq_full != 0) {
902 status = IBT_QP_FULL;
903 break;
904 }
905
906 /*
907 * Increment the "tail index". Check for "queue
908 * full" condition incl. headroom. If we detect that
909 * the current work request is going to fill the work
910 * queue, then we mark this condition and continue.
911 * Don't need >=, because going one-by-one we have to
912 * hit it exactly sooner or later
913 */
914
915 next_tail = (tail + 1) & qsize_msk;
916 if (((tail + hdrmwqes) & qsize_msk) == head) {
917 wq->wq_full = 1;
918 }
919
920 /*
921 * Get the address of the location where the next
922 * Send WQE should be built
923 */
924 desc = HERMON_QP_SQ_ENTRY(qp, tail);
925 /*
926 * Call hermon_wqe_send_build() to build the WQE
927 * at the given address. This routine uses the
928 * information in the ibt_send_wr_t list (wr[]) and
929 * returns the size of the WQE when it returns.
930 */
931 status = hermon_wqe_send_build(state, qp,
932 &wr[wrindx], desc, &desc_sz);
933 if (status != DDI_SUCCESS) {
934 break;
935 }
936
937 /*
938 * Now, build the Ctrl Segment based on
939 * what was just done
940 */
941 curr_wr = &wr[wrindx];
942
943 switch (curr_wr->wr_opcode) {
944 case IBT_WRC_RDMAW:
945 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) {
946 nopcode =
947 HERMON_WQE_SEND_NOPCODE_RDMAWI;
948 immed_data =
949 hermon_wr_get_immediate(curr_wr);
950 } else {
951 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW;
952 }
953 break;
954
955 case IBT_WRC_SEND:
956 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) {
957 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI;
958 immed_data =
959 hermon_wr_get_immediate(curr_wr);
960 } else {
961 nopcode = HERMON_WQE_SEND_NOPCODE_SEND;
962 }
963 break;
964
965 case IBT_WRC_SEND_LSO:
966 nopcode = HERMON_WQE_SEND_NOPCODE_LSO;
967 break;
968
969 case IBT_WRC_RDMAR:
970 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR;
971 break;
972
973 case IBT_WRC_CSWAP:
974 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS;
975 break;
976
977 case IBT_WRC_FADD:
978 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA;
979 break;
980
981 case IBT_WRC_BIND:
982 nopcode = HERMON_WQE_SEND_NOPCODE_BIND;
983 break;
984 }
985
986 fence = (curr_wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0;
987
988 /*
989 * now, build up the control segment, leaving the
990 * owner bit as it is
991 */
992
993 if ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) ||
994 (curr_wr->wr_flags & IBT_WR_SEND_SIGNAL)) {
995 signaled_dbd = 0xC;
996 } else {
997 signaled_dbd = 0;
998 }
999 if (curr_wr->wr_flags & IBT_WR_SEND_SOLICIT)
1000 solicited = 0x2;
1001 else
1002 solicited = 0;
1003
1004 if (qp->qp_is_special) {
1005 /* Ensure correctness, set the ReRead bit */
1006 nopcode |= (1 << 6);
1007 ah = (hermon_ahhdl_t)
1008 curr_wr->wr.ud.udwr_dest->ud_ah;
1009 mutex_enter(&ah->ah_lock);
1010 maxstat = ah->ah_udav->max_stat_rate;
1011 HERMON_WQE_SET_MLX_CTRL_SEGMENT(desc, desc_sz,
1012 signaled_dbd, maxstat, ah->ah_udav->rlid,
1013 qp, ah->ah_udav->sl);
1014 mutex_exit(&ah->ah_lock);
1015 } else {
1016 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz,
1017 fence, immed_data, solicited,
1018 signaled_dbd, 0, qp, 0, 0);
1019 }
1020 wq->wq_wrid[tail] = curr_wr->wr_id;
1021
1022 /*
1023 * If this is not the first descriptor on the current
1024 * chain, then set the ownership bit.
1025 */
1026 if (currindx != 0) { /* not the first */
1027 membar_producer();
1028 HERMON_SET_SEND_WQE_OWNER(qp,
1029 (uint32_t *)desc, nopcode);
1030 } else
1031 prev_nopcode = nopcode;
1032
1033 /*
1034 * Update the current "tail index" and increment
1035 * "posted_cnt"
1036 */
1037 tail = next_tail;
1038 posted_cnt++;
1039 }
1040
1041 /*
1042 * If we reach here and there are one or more WQEs which have
1043 * been successfully built as a chain, we have to finish up
1044 * and prepare them for writing to the HW
1045 * The steps are:
1046 * 1. do the headroom fixup
1047 * 2. add in the size of the headroom for the sync
1048 * 3. write the owner bit for the first WQE
1049 * 4. sync them
1050 * 5. fix up the structures
1051 * 6. hit the doorbell in UAR
1052 */
1053 if (posted_cnt != 0) {
1054 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
1055
1056 /* do the invalidate of the headroom */
1057
1058 hermon_wqe_headroom(tail, qp);
1059
1060 /* Update some of the state in the QP */
1061 wq->wq_tail = tail;
1062 total_posted += posted_cnt;
1063 posted_cnt = 0;
1064
1065 membar_producer();
1066
1067 /*
1068 * Now set the ownership bit of the first
1069 * one in the chain
1070 */
1071 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)prev,
1072 prev_nopcode);
1073
1074 /* the FMA retry loop starts for Hermon doorbell. */
1075 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt,
1076 fm_status, fm_test);
1077
1078 HERMON_UAR_DOORBELL(state, uarhdl,
1079 (uint64_t *)(void *)&state->hs_uar->send,
1080 (uint64_t)qp->qp_ring);
1081
1082 /* the FMA retry loop ends. */
1083 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt,
1084 fm_status, fm_test);
1085 }
1086 }
1087
1088 /*
1089 * Update the "num_posted" return value (if necessary).
1090 * Then drop the locks and return success.
1091 */
1092 if (num_posted != NULL) {
1093 *num_posted = total_posted;
1094 }
1095 mutex_exit(&qp->qp_sq_lock);
1096 return (status);
1097
1098 pio_error:
1099 mutex_exit(&qp->qp_sq_lock);
1100 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1101 return (ibc_get_ci_failure(0));
1102 }
1103
1104
1105 /*
1106 * hermon_post_recv()
1107 * Context: Can be called from interrupt or base context.
1108 */
1109 int
1110 hermon_post_recv(hermon_state_t *state, hermon_qphdl_t qp,
1111 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted)
1112 {
1113 uint64_t *desc;
1114 hermon_workq_hdr_t *wq;
1115 uint32_t head, tail, next_tail, qsize_msk;
1116 uint_t wrindx;
1117 uint_t posted_cnt;
1118 int status;
1119
1120 /*
1121 * Check for user-mappable QP memory. Note: We do not allow kernel
1122 * clients to post to QP memory that is accessible directly by the
1123 * user. If the QP memory is user accessible, then return an error.
1124 */
1125 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) {
1126 return (IBT_QP_HDL_INVALID);
1127 }
1128
1129 /* Initialize posted_cnt */
1130 posted_cnt = 0;
1131
1132 mutex_enter(&qp->qp_lock);
1133
1134 /*
1135 * Check if QP is associated with an SRQ
1136 */
1137 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) {
1138 mutex_exit(&qp->qp_lock);
1139 return (IBT_SRQ_IN_USE);
1140 }
1141
1142 /*
1143 * Check QP state. Can not post Recv requests from the "Reset" state
1144 */
1145 if (qp->qp_state == HERMON_QP_RESET) {
1146 mutex_exit(&qp->qp_lock);
1147 return (IBT_QP_STATE_INVALID);
1148 }
1149
1150 /* Check that work request transport type is valid */
1151 if ((qp->qp_type != IBT_UD_RQP) &&
1152 (qp->qp_serv_type != HERMON_QP_RC) &&
1153 (qp->qp_serv_type != HERMON_QP_UC)) {
1154 mutex_exit(&qp->qp_lock);
1155 return (IBT_QP_SRV_TYPE_INVALID);
1156 }
1157
1158 /*
1159 * Grab the lock for the WRID list, i.e., membar_consumer().
1160 * This is not needed because the mutex_enter() above has
1161 * the same effect.
1162 */
1163
1164 /* Save away some initial QP state */
1165 wq = qp->qp_rq_wqhdr;
1166 qsize_msk = wq->wq_mask;
1167 tail = wq->wq_tail;
1168 head = wq->wq_head;
1169
1170 wrindx = 0;
1171 status = DDI_SUCCESS;
1172
1173 for (wrindx = 0; wrindx < num_wr; wrindx++) {
1174 if (wq->wq_full != 0) {
1175 status = IBT_QP_FULL;
1176 break;
1177 }
1178 next_tail = (tail + 1) & qsize_msk;
1179 if (next_tail == head) {
1180 wq->wq_full = 1;
1181 }
1182 desc = HERMON_QP_RQ_ENTRY(qp, tail);
1183 status = hermon_wqe_recv_build(state, qp, &wr[wrindx], desc);
1184 if (status != DDI_SUCCESS) {
1185 break;
1186 }
1187
1188 wq->wq_wrid[tail] = wr[wrindx].wr_id;
1189 qp->qp_rq_wqecntr++;
1190
1191 tail = next_tail;
1192 posted_cnt++;
1193 }
1194
1195 if (posted_cnt != 0) {
1196
1197 wq->wq_tail = tail;
1198
1199 membar_producer(); /* ensure wrids are visible */
1200
1201 /* Update the doorbell record w/ wqecntr */
1202 HERMON_UAR_DB_RECORD_WRITE(qp->qp_rq_vdbr,
1203 qp->qp_rq_wqecntr & 0xFFFF);
1204 }
1205
1206 if (num_posted != NULL) {
1207 *num_posted = posted_cnt;
1208 }
1209
1210
1211 mutex_exit(&qp->qp_lock);
1212 return (status);
1213 }
1214
1215 /*
1216 * hermon_post_srq()
1217 * Context: Can be called from interrupt or base context.
1218 */
1219 int
1220 hermon_post_srq(hermon_state_t *state, hermon_srqhdl_t srq,
1221 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted)
1222 {
1223 uint64_t *desc;
1224 hermon_workq_hdr_t *wq;
1225 uint_t indx, wrindx;
1226 uint_t posted_cnt;
1227 int status;
1228
1229 mutex_enter(&srq->srq_lock);
1230
1231 /*
1232 * Check for user-mappable QP memory. Note: We do not allow kernel
1233 * clients to post to QP memory that is accessible directly by the
1234 * user. If the QP memory is user accessible, then return an error.
1235 */
1236 if (srq->srq_is_umap) {
1237 mutex_exit(&srq->srq_lock);
1238 return (IBT_SRQ_HDL_INVALID);
1239 }
1240
1241 /*
1242 * Check SRQ state. Can not post Recv requests when SRQ is in error
1243 */
1244 if (srq->srq_state == HERMON_SRQ_STATE_ERROR) {
1245 mutex_exit(&srq->srq_lock);
1246 return (IBT_QP_STATE_INVALID);
1247 }
1248
1249 status = DDI_SUCCESS;
1250 posted_cnt = 0;
1251 wq = srq->srq_wq_wqhdr;
1252 indx = wq->wq_head;
1253
1254 for (wrindx = 0; wrindx < num_wr; wrindx++) {
1255
1256 if (indx == wq->wq_tail) {
1257 status = IBT_QP_FULL;
1258 break;
1259 }
1260 desc = HERMON_SRQ_WQE_ADDR(srq, indx);
1261
1262 wq->wq_wrid[indx] = wr[wrindx].wr_id;
1263
1264 status = hermon_wqe_srq_build(state, srq, &wr[wrindx], desc);
1265 if (status != DDI_SUCCESS) {
1266 break;
1267 }
1268
1269 posted_cnt++;
1270 indx = htons(((uint16_t *)desc)[1]);
1271 wq->wq_head = indx;
1272 }
1273
1274 if (posted_cnt != 0) {
1275
1276 srq->srq_wq_wqecntr += posted_cnt;
1277
1278 membar_producer(); /* ensure wrids are visible */
1279
1280 /* Ring the doorbell w/ wqecntr */
1281 HERMON_UAR_DB_RECORD_WRITE(srq->srq_wq_vdbr,
1282 srq->srq_wq_wqecntr & 0xFFFF);
1283 }
1284
1285 if (num_posted != NULL) {
1286 *num_posted = posted_cnt;
1287 }
1288
1289 mutex_exit(&srq->srq_lock);
1290 return (status);
1291 }
1292
1293
1294 /*
1295 * hermon_wqe_send_build()
1296 * Context: Can be called from interrupt or base context.
1297 */
1298 static int
1299 hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp,
1300 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size)
1301 {
1302 hermon_hw_snd_wqe_ud_t *ud;
1303 hermon_hw_snd_wqe_remaddr_t *rc;
1304 hermon_hw_snd_wqe_atomic_t *at;
1305 hermon_hw_snd_wqe_remaddr_t *uc;
1306 hermon_hw_snd_wqe_bind_t *bn;
1307 hermon_hw_wqe_sgl_t *ds, *old_ds;
1308 ibt_ud_dest_t *dest;
1309 ibt_wr_ds_t *sgl;
1310 hermon_ahhdl_t ah;
1311 uint32_t nds;
1312 int i, j, last_ds, num_ds, status;
1313 int tmpsize;
1314
1315 ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
1316
1317 /* Initialize the information for the Data Segments */
1318 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
1319 sizeof (hermon_hw_snd_wqe_ctrl_t));
1320 nds = wr->wr_nds;
1321 sgl = wr->wr_sgl;
1322 num_ds = 0;
1323 i = 0;
1324
1325 /*
1326 * Build a Send WQE depends first and foremost on the transport
1327 * type of Work Request (i.e. UD, RC, or UC)
1328 */
1329 switch (wr->wr_trans) {
1330 case IBT_UD_SRV:
1331 /* Ensure that work request transport type matches QP type */
1332 if (qp->qp_serv_type != HERMON_QP_UD) {
1333 return (IBT_QP_SRV_TYPE_INVALID);
1334 }
1335
1336 /*
1337 * Validate the operation type. For UD requests, only the
1338 * "Send" and "Send LSO" operations are valid.
1339 */
1340 if (wr->wr_opcode != IBT_WRC_SEND &&
1341 wr->wr_opcode != IBT_WRC_SEND_LSO) {
1342 return (IBT_QP_OP_TYPE_INVALID);
1343 }
1344
1345 /*
1346 * If this is a Special QP (QP0 or QP1), then we need to
1347 * build MLX WQEs instead. So jump to hermon_wqe_mlx_build()
1348 * and return whatever status it returns
1349 */
1350 if (qp->qp_is_special) {
1351 if (wr->wr_opcode == IBT_WRC_SEND_LSO) {
1352 return (IBT_QP_OP_TYPE_INVALID);
1353 }
1354 status = hermon_wqe_mlx_build(state, qp,
1355 wr, desc, size);
1356 return (status);
1357 }
1358
1359 /*
1360 * Otherwise, if this is a normal UD Send request, then fill
1361 * all the fields in the Hermon UD header for the WQE. Note:
1362 * to do this we'll need to extract some information from the
1363 * Address Handle passed with the work request.
1364 */
1365 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc +
1366 sizeof (hermon_hw_snd_wqe_ctrl_t));
1367 if (wr->wr_opcode == IBT_WRC_SEND) {
1368 dest = wr->wr.ud.udwr_dest;
1369 } else {
1370 dest = wr->wr.ud_lso.lso_ud_dest;
1371 }
1372 ah = (hermon_ahhdl_t)dest->ud_ah;
1373 if (ah == NULL) {
1374 return (IBT_AH_HDL_INVALID);
1375 }
1376
1377 /*
1378 * Build the Unreliable Datagram Segment for the WQE, using
1379 * the information from the address handle and the work
1380 * request.
1381 */
1382 /* mutex_enter(&ah->ah_lock); */
1383 if (wr->wr_opcode == IBT_WRC_SEND) {
1384 HERMON_WQE_BUILD_UD(qp, ud, ah, wr->wr.ud.udwr_dest);
1385 } else { /* IBT_WRC_SEND_LSO */
1386 HERMON_WQE_BUILD_UD(qp, ud, ah,
1387 wr->wr.ud_lso.lso_ud_dest);
1388 }
1389 /* mutex_exit(&ah->ah_lock); */
1390
1391 /* Update "ds" for filling in Data Segments (below) */
1392 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud +
1393 sizeof (hermon_hw_snd_wqe_ud_t));
1394
1395 if (wr->wr_opcode == IBT_WRC_SEND_LSO) {
1396 int total_len;
1397
1398 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf;
1399 if ((uintptr_t)ds + total_len + (nds * 16) >
1400 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz))
1401 return (IBT_QP_SGL_LEN_INVALID);
1402
1403 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)ds + 1,
1404 wr->wr.ud_lso.lso_hdr_sz);
1405 old_ds = ds;
1406 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len);
1407 for (; i < nds; i++) {
1408 if (sgl[i].ds_len == 0)
1409 continue;
1410 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds],
1411 &sgl[i]);
1412 num_ds++;
1413 i++;
1414 break;
1415 }
1416 membar_producer();
1417 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss,
1418 wr->wr.ud_lso.lso_hdr_sz);
1419 }
1420
1421 break;
1422
1423 case IBT_RC_SRV:
1424 /* Ensure that work request transport type matches QP type */
1425 if (qp->qp_serv_type != HERMON_QP_RC) {
1426 return (IBT_QP_SRV_TYPE_INVALID);
1427 }
1428
1429 /*
1430 * Validate the operation type. For RC requests, we allow
1431 * "Send", "RDMA Read", "RDMA Write", various "Atomic"
1432 * operations, and memory window "Bind"
1433 */
1434 if ((wr->wr_opcode != IBT_WRC_SEND) &&
1435 (wr->wr_opcode != IBT_WRC_RDMAR) &&
1436 (wr->wr_opcode != IBT_WRC_RDMAW) &&
1437 (wr->wr_opcode != IBT_WRC_CSWAP) &&
1438 (wr->wr_opcode != IBT_WRC_FADD) &&
1439 (wr->wr_opcode != IBT_WRC_BIND)) {
1440 return (IBT_QP_OP_TYPE_INVALID);
1441 }
1442
1443 /*
1444 * If this is a Send request, then all we need to do is break
1445 * out and here and begin the Data Segment processing below
1446 */
1447 if (wr->wr_opcode == IBT_WRC_SEND) {
1448 break;
1449 }
1450
1451 /*
1452 * If this is an RDMA Read or RDMA Write request, then fill
1453 * in the "Remote Address" header fields.
1454 */
1455 if ((wr->wr_opcode == IBT_WRC_RDMAR) ||
1456 (wr->wr_opcode == IBT_WRC_RDMAW)) {
1457 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
1458 sizeof (hermon_hw_snd_wqe_ctrl_t));
1459
1460 /*
1461 * Build the Remote Address Segment for the WQE, using
1462 * the information from the RC work request.
1463 */
1464 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma);
1465
1466 /* Update "ds" for filling in Data Segments (below) */
1467 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc +
1468 sizeof (hermon_hw_snd_wqe_remaddr_t));
1469 break;
1470 }
1471
1472 /*
1473 * If this is one of the Atomic type operations (i.e
1474 * Compare-Swap or Fetch-Add), then fill in both the "Remote
1475 * Address" header fields and the "Atomic" header fields.
1476 */
1477 if ((wr->wr_opcode == IBT_WRC_CSWAP) ||
1478 (wr->wr_opcode == IBT_WRC_FADD)) {
1479 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
1480 sizeof (hermon_hw_snd_wqe_ctrl_t));
1481 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc +
1482 sizeof (hermon_hw_snd_wqe_remaddr_t));
1483
1484 /*
1485 * Build the Remote Address and Atomic Segments for
1486 * the WQE, using the information from the RC Atomic
1487 * work request.
1488 */
1489 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr);
1490 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic);
1491
1492 /* Update "ds" for filling in Data Segments (below) */
1493 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at +
1494 sizeof (hermon_hw_snd_wqe_atomic_t));
1495
1496 /*
1497 * Update "nds" and "sgl" because Atomic requests have
1498 * only a single Data Segment (and they are encoded
1499 * somewhat differently in the work request.
1500 */
1501 nds = 1;
1502 sgl = wr->wr_sgl;
1503 break;
1504 }
1505
1506 /*
1507 * If this is memory window Bind operation, then we call the
1508 * hermon_wr_bind_check() routine to validate the request and
1509 * to generate the updated RKey. If this is successful, then
1510 * we fill in the WQE's "Bind" header fields.
1511 */
1512 if (wr->wr_opcode == IBT_WRC_BIND) {
1513 status = hermon_wr_bind_check(state, wr);
1514 if (status != DDI_SUCCESS) {
1515 return (status);
1516 }
1517
1518 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc +
1519 sizeof (hermon_hw_snd_wqe_ctrl_t));
1520
1521 /*
1522 * Build the Bind Memory Window Segments for the WQE,
1523 * using the information from the RC Bind memory
1524 * window work request.
1525 */
1526 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind);
1527
1528 /*
1529 * Update the "ds" pointer. Even though the "bind"
1530 * operation requires no SGLs, this is necessary to
1531 * facilitate the correct descriptor size calculations
1532 * (below).
1533 */
1534 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn +
1535 sizeof (hermon_hw_snd_wqe_bind_t));
1536 nds = 0;
1537 }
1538 break;
1539
1540 case IBT_UC_SRV:
1541 /* Ensure that work request transport type matches QP type */
1542 if (qp->qp_serv_type != HERMON_QP_UC) {
1543 return (IBT_QP_SRV_TYPE_INVALID);
1544 }
1545
1546 /*
1547 * Validate the operation type. For UC requests, we only
1548 * allow "Send", "RDMA Write", and memory window "Bind".
1549 * Note: Unlike RC, UC does not allow "RDMA Read" or "Atomic"
1550 * operations
1551 */
1552 if ((wr->wr_opcode != IBT_WRC_SEND) &&
1553 (wr->wr_opcode != IBT_WRC_RDMAW) &&
1554 (wr->wr_opcode != IBT_WRC_BIND)) {
1555 return (IBT_QP_OP_TYPE_INVALID);
1556 }
1557
1558 /*
1559 * If this is a Send request, then all we need to do is break
1560 * out and here and begin the Data Segment processing below
1561 */
1562 if (wr->wr_opcode == IBT_WRC_SEND) {
1563 break;
1564 }
1565
1566 /*
1567 * If this is an RDMA Write request, then fill in the "Remote
1568 * Address" header fields.
1569 */
1570 if (wr->wr_opcode == IBT_WRC_RDMAW) {
1571 uc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
1572 sizeof (hermon_hw_snd_wqe_ctrl_t));
1573
1574 /*
1575 * Build the Remote Address Segment for the WQE, using
1576 * the information from the UC work request.
1577 */
1578 HERMON_WQE_BUILD_REMADDR(qp, uc, &wr->wr.uc.ucwr.rdma);
1579
1580 /* Update "ds" for filling in Data Segments (below) */
1581 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)uc +
1582 sizeof (hermon_hw_snd_wqe_remaddr_t));
1583 break;
1584 }
1585
1586 /*
1587 * If this is memory window Bind operation, then we call the
1588 * hermon_wr_bind_check() routine to validate the request and
1589 * to generate the updated RKey. If this is successful, then
1590 * we fill in the WQE's "Bind" header fields.
1591 */
1592 if (wr->wr_opcode == IBT_WRC_BIND) {
1593 status = hermon_wr_bind_check(state, wr);
1594 if (status != DDI_SUCCESS) {
1595 return (status);
1596 }
1597
1598 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc +
1599 sizeof (hermon_hw_snd_wqe_ctrl_t));
1600
1601 /*
1602 * Build the Bind Memory Window Segments for the WQE,
1603 * using the information from the UC Bind memory
1604 * window work request.
1605 */
1606 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.uc.ucwr.bind);
1607
1608 /*
1609 * Update the "ds" pointer. Even though the "bind"
1610 * operation requires no SGLs, this is necessary to
1611 * facilitate the correct descriptor size calculations
1612 * (below).
1613 */
1614 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn +
1615 sizeof (hermon_hw_snd_wqe_bind_t));
1616 nds = 0;
1617 }
1618 break;
1619
1620 default:
1621 return (IBT_QP_SRV_TYPE_INVALID);
1622 }
1623
1624 /*
1625 * Now fill in the Data Segments (SGL) for the Send WQE based on
1626 * the values setup above (i.e. "sgl", "nds", and the "ds" pointer
1627 * Start by checking for a valid number of SGL entries
1628 */
1629 if (nds > qp->qp_sq_sgl) {
1630 return (IBT_QP_SGL_LEN_INVALID);
1631 }
1632
1633 /*
1634 * For each SGL in the Send Work Request, fill in the Send WQE's data
1635 * segments. Note: We skip any SGL with zero size because Hermon
1636 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1637 * the encoding for zero means a 2GB transfer.
1638 */
1639 for (last_ds = num_ds, j = i; j < nds; j++) {
1640 if (sgl[j].ds_len != 0)
1641 last_ds++; /* real last ds of wqe to fill */
1642 }
1643
1644 /*
1645 * Return the size of descriptor (in 16-byte chunks)
1646 * For Hermon, we want them (for now) to be on stride size
1647 * boundaries, which was implicit in Tavor/Arbel
1648 *
1649 */
1650 tmpsize = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc);
1651
1652 *size = tmpsize >> 0x4;
1653
1654 for (j = nds; --j >= i; ) {
1655 if (sgl[j].ds_len == 0) {
1656 continue;
1657 }
1658
1659 /*
1660 * Fill in the Data Segment(s) for the current WQE, using the
1661 * information contained in the scatter-gather list of the
1662 * work request.
1663 */
1664 last_ds--;
1665 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]);
1666 }
1667
1668 return (DDI_SUCCESS);
1669 }
1670
1671
1672
1673 /*
1674 * hermon_wqe_mlx_build()
1675 * Context: Can be called from interrupt or base context.
1676 */
1677 static int
1678 hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp,
1679 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size)
1680 {
1681 hermon_ahhdl_t ah;
1682 hermon_hw_udav_t *udav;
1683 ib_lrh_hdr_t *lrh;
1684 ib_grh_t *grh;
1685 ib_bth_hdr_t *bth;
1686 ib_deth_hdr_t *deth;
1687 hermon_hw_wqe_sgl_t *ds;
1688 ibt_wr_ds_t *sgl;
1689 uint8_t *mgmtclass, *hpoint, *hcount;
1690 uint32_t nds, offset, pktlen;
1691 uint32_t desc_sz;
1692 int i, num_ds;
1693 int tmpsize;
1694
1695 ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
1696
1697 /* Initialize the information for the Data Segments */
1698 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
1699 sizeof (hermon_hw_mlx_wqe_nextctrl_t));
1700
1701 /*
1702 * Pull the address handle from the work request. The UDAV will
1703 * be used to answer some questions about the request.
1704 */
1705 ah = (hermon_ahhdl_t)wr->wr.ud.udwr_dest->ud_ah;
1706 if (ah == NULL) {
1707 return (IBT_AH_HDL_INVALID);
1708 }
1709 mutex_enter(&ah->ah_lock);
1710 udav = ah->ah_udav;
1711
1712 /*
1713 * If the request is for QP1 and the destination LID is equal to
1714 * the Permissive LID, then return an error. This combination is
1715 * not allowed
1716 */
1717 if ((udav->rlid == IB_LID_PERMISSIVE) &&
1718 (qp->qp_is_special == HERMON_QP_GSI)) {
1719 mutex_exit(&ah->ah_lock);
1720 return (IBT_AH_HDL_INVALID);
1721 }
1722
1723 /*
1724 * Calculate the size of the packet headers, including the GRH
1725 * (if necessary)
1726 */
1727 desc_sz = sizeof (ib_lrh_hdr_t) + sizeof (ib_bth_hdr_t) +
1728 sizeof (ib_deth_hdr_t);
1729 if (udav->grh) {
1730 desc_sz += sizeof (ib_grh_t);
1731 }
1732
1733 /*
1734 * Begin to build the first "inline" data segment for the packet
1735 * headers. Note: By specifying "inline" we can build the contents
1736 * of the MAD packet headers directly into the work queue (as part
1737 * descriptor). This has the advantage of both speeding things up
1738 * and of not requiring the driver to allocate/register any additional
1739 * memory for the packet headers.
1740 */
1741 HERMON_WQE_BUILD_INLINE(qp, &ds[0], desc_sz);
1742 desc_sz += 4;
1743
1744 /*
1745 * Build Local Route Header (LRH)
1746 * We start here by building the LRH into a temporary location.
1747 * When we have finished we copy the LRH data into the descriptor.
1748 *
1749 * Notice that the VL values are hardcoded. This is not a problem
1750 * because VL15 is decided later based on the value in the MLX
1751 * transport "next/ctrl" header (see the "vl15" bit below), and it
1752 * is otherwise (meaning for QP1) chosen from the SL-to-VL table
1753 * values. This rule does not hold for loopback packets however
1754 * (all of which bypass the SL-to-VL tables) and it is the reason
1755 * that non-QP0 MADs are setup with VL hardcoded to zero below.
1756 *
1757 * Notice also that Source LID is hardcoded to the Permissive LID
1758 * (0xFFFF). This is also not a problem because if the Destination
1759 * LID is not the Permissive LID, then the "slr" value in the MLX
1760 * transport "next/ctrl" header will be set to zero and the hardware
1761 * will pull the LID from value in the port.
1762 */
1763 lrh = (ib_lrh_hdr_t *)((uintptr_t)&ds[0] + 4);
1764 pktlen = (desc_sz + 0x100) >> 2;
1765 HERMON_WQE_BUILD_MLX_LRH(lrh, qp, udav, pktlen);
1766
1767 /*
1768 * Build Global Route Header (GRH)
1769 * This is only built if necessary as defined by the "grh" bit in
1770 * the address vector. Note: We also calculate the offset to the
1771 * next header (BTH) based on whether or not the "grh" bit is set.
1772 */
1773 if (udav->grh) {
1774 /*
1775 * If the request is for QP0, then return an error. The
1776 * combination of global routine (GRH) and QP0 is not allowed.
1777 */
1778 if (qp->qp_is_special == HERMON_QP_SMI) {
1779 mutex_exit(&ah->ah_lock);
1780 return (IBT_AH_HDL_INVALID);
1781 }
1782 grh = (ib_grh_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t));
1783 HERMON_WQE_BUILD_MLX_GRH(state, grh, qp, udav, pktlen);
1784
1785 bth = (ib_bth_hdr_t *)((uintptr_t)grh + sizeof (ib_grh_t));
1786 } else {
1787 bth = (ib_bth_hdr_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t));
1788 }
1789 mutex_exit(&ah->ah_lock);
1790
1791
1792 /*
1793 * Build Base Transport Header (BTH)
1794 * Notice that the M, PadCnt, and TVer fields are all set
1795 * to zero implicitly. This is true for all Management Datagrams
1796 * MADs whether GSI are SMI.
1797 */
1798 HERMON_WQE_BUILD_MLX_BTH(state, bth, qp, wr);
1799
1800 /*
1801 * Build Datagram Extended Transport Header (DETH)
1802 */
1803 deth = (ib_deth_hdr_t *)((uintptr_t)bth + sizeof (ib_bth_hdr_t));
1804 HERMON_WQE_BUILD_MLX_DETH(deth, qp);
1805
1806 /* Ensure that the Data Segment is aligned on a 16-byte boundary */
1807 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)deth + sizeof (ib_deth_hdr_t));
1808 ds = (hermon_hw_wqe_sgl_t *)(((uintptr_t)ds + 0xF) & ~0xF);
1809 nds = wr->wr_nds;
1810 sgl = wr->wr_sgl;
1811 num_ds = 0;
1812
1813 /*
1814 * Now fill in the Data Segments (SGL) for the MLX WQE based on the
1815 * values set up above (i.e. "sgl", "nds", and the "ds" pointer
1816 * Start by checking for a valid number of SGL entries
1817 */
1818 if (nds > qp->qp_sq_sgl) {
1819 return (IBT_QP_SGL_LEN_INVALID);
1820 }
1821
1822 /*
1823 * For each SGL in the Send Work Request, fill in the MLX WQE's data
1824 * segments. Note: We skip any SGL with zero size because Hermon
1825 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1826 * the encoding for zero means a 2GB transfer. Because of this special
1827 * encoding in the hardware, we mask the requested length with
1828 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
1829 * zero.)
1830 */
1831 mgmtclass = hpoint = hcount = NULL;
1832 offset = 0;
1833 for (i = 0; i < nds; i++) {
1834 if (sgl[i].ds_len == 0) {
1835 continue;
1836 }
1837
1838 /*
1839 * Fill in the Data Segment(s) for the MLX send WQE, using
1840 * the information contained in the scatter-gather list of
1841 * the work request.
1842 */
1843 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], &sgl[i]);
1844
1845 /*
1846 * Search through the contents of all MADs posted to QP0 to
1847 * initialize pointers to the places where Directed Route "hop
1848 * pointer", "hop count", and "mgmtclass" would be. Hermon
1849 * needs these updated (i.e. incremented or decremented, as
1850 * necessary) by software.
1851 */
1852 if (qp->qp_is_special == HERMON_QP_SMI) {
1853
1854 HERMON_SPECIAL_QP_DRMAD_GET_MGMTCLASS(mgmtclass,
1855 offset, sgl[i].ds_va, sgl[i].ds_len);
1856
1857 HERMON_SPECIAL_QP_DRMAD_GET_HOPPOINTER(hpoint,
1858 offset, sgl[i].ds_va, sgl[i].ds_len);
1859
1860 HERMON_SPECIAL_QP_DRMAD_GET_HOPCOUNT(hcount,
1861 offset, sgl[i].ds_va, sgl[i].ds_len);
1862
1863 offset += sgl[i].ds_len;
1864 }
1865 num_ds++;
1866 }
1867
1868 /*
1869 * Hermon's Directed Route MADs need to have the "hop pointer"
1870 * incremented/decremented (as necessary) depending on whether it is
1871 * currently less than or greater than the "hop count" (i.e. whether
1872 * the MAD is a request or a response.)
1873 */
1874 if (qp->qp_is_special == HERMON_QP_SMI) {
1875 HERMON_SPECIAL_QP_DRMAD_DO_HOPPOINTER_MODIFY(*mgmtclass,
1876 *hpoint, *hcount);
1877 }
1878
1879 /*
1880 * Now fill in the ICRC Data Segment. This data segment is inlined
1881 * just like the packets headers above, but it is only four bytes and
1882 * set to zero (to indicate that we wish the hardware to generate ICRC.
1883 */
1884 HERMON_WQE_BUILD_INLINE_ICRC(qp, &ds[num_ds], 4, 0);
1885 num_ds++;
1886
1887 /*
1888 * Return the size of descriptor (in 16-byte chunks)
1889 * For Hermon, we want them (for now) to be on stride size
1890 * boundaries, which was implicit in Tavor/Arbel
1891 */
1892 tmpsize = ((uintptr_t)&ds[num_ds] - (uintptr_t)desc);
1893
1894 *size = tmpsize >> 0x04;
1895
1896 return (DDI_SUCCESS);
1897 }
1898
1899
1900
1901 /*
1902 * hermon_wqe_recv_build()
1903 * Context: Can be called from interrupt or base context.
1904 */
1905 /* ARGSUSED */
1906 static int
1907 hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp,
1908 ibt_recv_wr_t *wr, uint64_t *desc)
1909 {
1910 hermon_hw_wqe_sgl_t *ds;
1911 int i, num_ds;
1912
1913 ASSERT(MUTEX_HELD(&qp->qp_lock));
1914
1915 /*
1916 * Fill in the Data Segments (SGL) for the Recv WQE - don't
1917 * need to have a reserved for the ctrl, there is none on the
1918 * recv queue for hermon, but will need to put an invalid
1919 * (null) scatter pointer per PRM
1920 */
1921 ds = (hermon_hw_wqe_sgl_t *)(uintptr_t)desc;
1922 num_ds = 0;
1923
1924 /* Check for valid number of SGL entries */
1925 if (wr->wr_nds > qp->qp_rq_sgl) {
1926 return (IBT_QP_SGL_LEN_INVALID);
1927 }
1928
1929 /*
1930 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
1931 * segments. Note: We skip any SGL with zero size because Hermon
1932 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1933 * the encoding for zero means a 2GB transfer. Because of this special
1934 * encoding in the hardware, we mask the requested length with
1935 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
1936 * zero.)
1937 */
1938 for (i = 0; i < wr->wr_nds; i++) {
1939 if (wr->wr_sgl[i].ds_len == 0) {
1940 continue;
1941 }
1942
1943 /*
1944 * Fill in the Data Segment(s) for the receive WQE, using the
1945 * information contained in the scatter-gather list of the
1946 * work request.
1947 */
1948 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]);
1949 num_ds++;
1950 }
1951
1952 /* put the null sgl pointer as well if needed */
1953 if (num_ds < qp->qp_rq_sgl) {
1954 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl);
1955 }
1956
1957 return (DDI_SUCCESS);
1958 }
1959
1960
1961
1962 /*
1963 * hermon_wqe_srq_build()
1964 * Context: Can be called from interrupt or base context.
1965 */
1966 /* ARGSUSED */
1967 static int
1968 hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq,
1969 ibt_recv_wr_t *wr, uint64_t *desc)
1970 {
1971 hermon_hw_wqe_sgl_t *ds;
1972 int i, num_ds;
1973
1974 ASSERT(MUTEX_HELD(&srq->srq_lock));
1975
1976 /* Fill in the Data Segments (SGL) for the Recv WQE */
1977 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
1978 sizeof (hermon_hw_srq_wqe_next_t));
1979 num_ds = 0;
1980
1981 /* Check for valid number of SGL entries */
1982 if (wr->wr_nds > srq->srq_wq_sgl) {
1983 return (IBT_QP_SGL_LEN_INVALID);
1984 }
1985
1986 /*
1987 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
1988 * segments. Note: We skip any SGL with zero size because Hermon
1989 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1990 * the encoding for zero means a 2GB transfer. Because of this special
1991 * encoding in the hardware, we mask the requested length with
1992 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
1993 * zero.)
1994 */
1995 for (i = 0; i < wr->wr_nds; i++) {
1996 if (wr->wr_sgl[i].ds_len == 0) {
1997 continue;
1998 }
1999
2000 /*
2001 * Fill in the Data Segment(s) for the receive WQE, using the
2002 * information contained in the scatter-gather list of the
2003 * work request.
2004 */
2005 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]);
2006 num_ds++;
2007 }
2008
2009 /*
2010 * put in the null sgl pointer as well, if needed
2011 */
2012 if (num_ds < srq->srq_wq_sgl) {
2013 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl);
2014 }
2015
2016 return (DDI_SUCCESS);
2017 }
2018
2019
2020 /*
2021 * hermon_wr_get_immediate()
2022 * Context: Can be called from interrupt or base context.
2023 */
2024 static uint32_t
2025 hermon_wr_get_immediate(ibt_send_wr_t *wr)
2026 {
2027 /*
2028 * This routine extracts the "immediate data" from the appropriate
2029 * location in the IBTF work request. Because of the way the
2030 * work request structure is defined, the location for this data
2031 * depends on the actual work request operation type.
2032 */
2033
2034 /* For RDMA Write, test if RC or UC */
2035 if (wr->wr_opcode == IBT_WRC_RDMAW) {
2036 if (wr->wr_trans == IBT_RC_SRV) {
2037 return (wr->wr.rc.rcwr.rdma.rdma_immed);
2038 } else { /* IBT_UC_SRV */
2039 return (wr->wr.uc.ucwr.rdma.rdma_immed);
2040 }
2041 }
2042
2043 /* For Send, test if RC, UD, or UC */
2044 if (wr->wr_opcode == IBT_WRC_SEND) {
2045 if (wr->wr_trans == IBT_RC_SRV) {
2046 return (wr->wr.rc.rcwr.send_immed);
2047 } else if (wr->wr_trans == IBT_UD_SRV) {
2048 return (wr->wr.ud.udwr_immed);
2049 } else { /* IBT_UC_SRV */
2050 return (wr->wr.uc.ucwr.send_immed);
2051 }
2052 }
2053
2054 /*
2055 * If any other type of request, then immediate is undefined
2056 */
2057 return (0);
2058 }
2059
2060 /*
2061 * hermon_wqe_headroom()
2062 * Context: can be called from interrupt or base, currently only from
2063 * base context.
2064 * Routine that fills in the headroom for the Send Queue
2065 */
2066
2067 static void
2068 hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp)
2069 {
2070 uint32_t *wqe_start, *wqe_top, *wqe_base, qsize;
2071 int hdrmwqes, wqesizebytes, sectperwqe;
2072 uint32_t invalue;
2073 int i, j;
2074
2075 qsize = qp->qp_sq_bufsz;
2076 wqesizebytes = 1 << qp->qp_sq_log_wqesz;
2077 sectperwqe = wqesizebytes >> 6; /* 64 bytes/section */
2078 hdrmwqes = qp->qp_sq_hdrmwqes;
2079 wqe_base = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 0);
2080 wqe_top = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, qsize);
2081 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, from);
2082
2083 for (i = 0; i < hdrmwqes; i++) {
2084 for (j = 0; j < sectperwqe; j++) {
2085 if (j == 0) { /* 1st section of wqe */
2086 /* perserve ownership bit */
2087 invalue = ddi_get32(qp->qp_wqinfo.qa_acchdl,
2088 wqe_start) | 0x7FFFFFFF;
2089 } else {
2090 /* or just invalidate it */
2091 invalue = 0xFFFFFFFF;
2092 }
2093 ddi_put32(qp->qp_wqinfo.qa_acchdl, wqe_start, invalue);
2094 wqe_start += 16; /* move 64 bytes */
2095 }
2096 if (wqe_start == wqe_top) /* hit the end of the queue */
2097 wqe_start = wqe_base; /* wrap to start */
2098 }
2099 }
2100
2101 /*
2102 * hermon_wr_bind_check()
2103 * Context: Can be called from interrupt or base context.
2104 */
2105 /* ARGSUSED */
2106 static int
2107 hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr)
2108 {
2109 ibt_bind_flags_t bind_flags;
2110 uint64_t vaddr, len;
2111 uint64_t reg_start_addr, reg_end_addr;
2112 hermon_mwhdl_t mw;
2113 hermon_mrhdl_t mr;
2114 hermon_rsrc_t *mpt;
2115 uint32_t new_rkey;
2116
2117 /* Check for a valid Memory Window handle in the WR */
2118 mw = (hermon_mwhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mw_hdl;
2119 if (mw == NULL) {
2120 return (IBT_MW_HDL_INVALID);
2121 }
2122
2123 /* Check for a valid Memory Region handle in the WR */
2124 mr = (hermon_mrhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mr_hdl;
2125 if (mr == NULL) {
2126 return (IBT_MR_HDL_INVALID);
2127 }
2128
2129 mutex_enter(&mr->mr_lock);
2130 mutex_enter(&mw->mr_lock);
2131
2132 /*
2133 * Check here to see if the memory region has already been partially
2134 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
2135 * If so, this is an error, return failure.
2136 */
2137 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
2138 mutex_exit(&mr->mr_lock);
2139 mutex_exit(&mw->mr_lock);
2140 return (IBT_MR_HDL_INVALID);
2141 }
2142
2143 /* Check for a valid Memory Window RKey (i.e. a matching RKey) */
2144 if (mw->mr_rkey != wr->wr.rc.rcwr.bind->bind_rkey) {
2145 mutex_exit(&mr->mr_lock);
2146 mutex_exit(&mw->mr_lock);
2147 return (IBT_MR_RKEY_INVALID);
2148 }
2149
2150 /* Check for a valid Memory Region LKey (i.e. a matching LKey) */
2151 if (mr->mr_lkey != wr->wr.rc.rcwr.bind->bind_lkey) {
2152 mutex_exit(&mr->mr_lock);
2153 mutex_exit(&mw->mr_lock);
2154 return (IBT_MR_LKEY_INVALID);
2155 }
2156
2157 /*
2158 * Now check for valid "vaddr" and "len". Note: We don't check the
2159 * "vaddr" range when "len == 0" (i.e. on unbind operations)
2160 */
2161 len = wr->wr.rc.rcwr.bind->bind_len;
2162 if (len != 0) {
2163 vaddr = wr->wr.rc.rcwr.bind->bind_va;
2164 reg_start_addr = mr->mr_bindinfo.bi_addr;
2165 reg_end_addr = mr->mr_bindinfo.bi_addr +
2166 (mr->mr_bindinfo.bi_len - 1);
2167 if ((vaddr < reg_start_addr) || (vaddr > reg_end_addr)) {
2168 mutex_exit(&mr->mr_lock);
2169 mutex_exit(&mw->mr_lock);
2170 return (IBT_MR_VA_INVALID);
2171 }
2172 vaddr = (vaddr + len) - 1;
2173 if (vaddr > reg_end_addr) {
2174 mutex_exit(&mr->mr_lock);
2175 mutex_exit(&mw->mr_lock);
2176 return (IBT_MR_LEN_INVALID);
2177 }
2178 }
2179
2180 /*
2181 * Validate the bind access flags. Remote Write and Atomic access for
2182 * the Memory Window require that Local Write access be set in the
2183 * corresponding Memory Region.
2184 */
2185 bind_flags = wr->wr.rc.rcwr.bind->bind_flags;
2186 if (((bind_flags & IBT_WR_BIND_WRITE) ||
2187 (bind_flags & IBT_WR_BIND_ATOMIC)) &&
2188 !(mr->mr_accflag & IBT_MR_LOCAL_WRITE)) {
2189 mutex_exit(&mr->mr_lock);
2190 mutex_exit(&mw->mr_lock);
2191 return (IBT_MR_ACCESS_REQ_INVALID);
2192 }
2193
2194 /* Calculate the new RKey for the Memory Window */
2195 mpt = mw->mr_mptrsrcp;
2196 new_rkey = hermon_mr_keycalc(mpt->hr_indx);
2197 new_rkey = hermon_mr_key_swap(new_rkey);
2198
2199 wr->wr.rc.rcwr.bind->bind_rkey_out = new_rkey;
2200 mw->mr_rkey = new_rkey;
2201
2202 mutex_exit(&mr->mr_lock);
2203 mutex_exit(&mw->mr_lock);
2204 return (DDI_SUCCESS);
2205 }
2206
2207
2208 /*
2209 * hermon_wrid_from_reset_handling()
2210 * Context: Can be called from interrupt or base context.
2211 */
2212 /* ARGSUSED */
2213 int
2214 hermon_wrid_from_reset_handling(hermon_state_t *state, hermon_qphdl_t qp)
2215 {
2216 hermon_workq_hdr_t *swq, *rwq;
2217
2218 if (qp->qp_alloc_flags & IBT_QP_USER_MAP)
2219 return (DDI_SUCCESS);
2220
2221 /* grab the cq lock(s) to modify the wqavl tree */
2222 if (qp->qp_rq_cqhdl)
2223 mutex_enter(&qp->qp_rq_cqhdl->cq_lock);
2224 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl &&
2225 qp->qp_sq_cqhdl != NULL)
2226 mutex_enter(&qp->qp_sq_cqhdl->cq_lock);
2227
2228 /* Chain the newly allocated work queue header to the CQ's list */
2229 if (qp->qp_sq_cqhdl)
2230 hermon_cq_workq_add(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl);
2231
2232 swq = qp->qp_sq_wqhdr;
2233 swq->wq_head = 0;
2234 swq->wq_tail = 0;
2235 swq->wq_full = 0;
2236
2237 /*
2238 * Now we repeat all the above operations for the receive work queue,
2239 * or shared receive work queue.
2240 *
2241 * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case.
2242 */
2243
2244 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) {
2245 mutex_enter(&qp->qp_srqhdl->srq_lock);
2246 } else {
2247 rwq = qp->qp_rq_wqhdr;
2248 rwq->wq_head = 0;
2249 rwq->wq_tail = 0;
2250 rwq->wq_full = 0;
2251 qp->qp_rq_wqecntr = 0;
2252 }
2253 hermon_cq_workq_add(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl);
2254
2255 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) {
2256 mutex_exit(&qp->qp_srqhdl->srq_lock);
2257 }
2258
2259 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl &&
2260 qp->qp_sq_cqhdl != NULL)
2261 mutex_exit(&qp->qp_sq_cqhdl->cq_lock);
2262 if (qp->qp_rq_cqhdl)
2263 mutex_exit(&qp->qp_rq_cqhdl->cq_lock);
2264 return (DDI_SUCCESS);
2265 }
2266
2267
2268 /*
2269 * hermon_wrid_to_reset_handling()
2270 * Context: Can be called from interrupt or base context.
2271 */
2272 int
2273 hermon_wrid_to_reset_handling(hermon_state_t *state, hermon_qphdl_t qp)
2274 {
2275 if (qp->qp_alloc_flags & IBT_QP_USER_MAP)
2276 return (DDI_SUCCESS);
2277
2278 /*
2279 * If there are unpolled entries in these CQs, they are
2280 * polled/flushed.
2281 * Grab the CQ lock(s) before manipulating the lists.
2282 */
2283 /* grab the cq lock(s) to modify the wqavl tree */
2284 if (qp->qp_rq_cqhdl)
2285 mutex_enter(&qp->qp_rq_cqhdl->cq_lock);
2286 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl &&
2287 qp->qp_sq_cqhdl != NULL)
2288 mutex_enter(&qp->qp_sq_cqhdl->cq_lock);
2289
2290 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) {
2291 mutex_enter(&qp->qp_srqhdl->srq_lock);
2292 }
2293 /*
2294 * Flush the entries on the CQ for this QP's QPN.
2295 */
2296 hermon_cq_entries_flush(state, qp);
2297
2298 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) {
2299 mutex_exit(&qp->qp_srqhdl->srq_lock);
2300 }
2301
2302 hermon_cq_workq_remove(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl);
2303 if (qp->qp_sq_cqhdl != NULL)
2304 hermon_cq_workq_remove(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl);
2305
2306 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl &&
2307 qp->qp_sq_cqhdl != NULL)
2308 mutex_exit(&qp->qp_sq_cqhdl->cq_lock);
2309 if (qp->qp_rq_cqhdl)
2310 mutex_exit(&qp->qp_rq_cqhdl->cq_lock);
2311
2312 return (IBT_SUCCESS);
2313 }
2314
2315
2316 /*
2317 * hermon_wrid_get_entry()
2318 * Context: Can be called from interrupt or base context.
2319 */
2320 uint64_t
2321 hermon_wrid_get_entry(hermon_cqhdl_t cq, hermon_hw_cqe_t *cqe)
2322 {
2323 hermon_workq_avl_t *wqa;
2324 hermon_workq_hdr_t *wq;
2325 uint64_t wrid;
2326 uint_t send_or_recv, qpnum;
2327 uint32_t indx;
2328
2329 /*
2330 * Determine whether this CQE is a send or receive completion.
2331 */
2332 send_or_recv = HERMON_CQE_SENDRECV_GET(cq, cqe);
2333
2334 /* Find the work queue for this QP number (send or receive side) */
2335 qpnum = HERMON_CQE_QPNUM_GET(cq, cqe);
2336 wqa = hermon_wrid_wqavl_find(cq, qpnum, send_or_recv);
2337 wq = wqa->wqa_wq;
2338
2339 /*
2340 * Regardless of whether the completion is the result of a "success"
2341 * or a "failure", we lock the list of "containers" and attempt to
2342 * search for the the first matching completion (i.e. the first WR
2343 * with a matching WQE addr and size). Once we find it, we pull out
2344 * the "wrid" field and return it (see below). XXX Note: One possible
2345 * future enhancement would be to enable this routine to skip over
2346 * any "unsignaled" completions to go directly to the next "signaled"
2347 * entry on success.
2348 */
2349 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) & wq->wq_mask;
2350 wrid = wq->wq_wrid[indx];
2351 if (wqa->wqa_srq_en) {
2352 struct hermon_sw_srq_s *srq;
2353 uint64_t *desc;
2354
2355 /* put wqe back on the srq free list */
2356 srq = wqa->wqa_srq;
2357 mutex_enter(&srq->srq_lock);
2358 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail);
2359 ((uint16_t *)desc)[1] = htons(indx);
2360 wq->wq_tail = indx;
2361 mutex_exit(&srq->srq_lock);
2362 } else {
2363 wq->wq_head = (indx + 1) & wq->wq_mask;
2364 wq->wq_full = 0;
2365 }
2366
2367 return (wrid);
2368 }
2369
2370
2371 int
2372 hermon_wrid_workq_compare(const void *p1, const void *p2)
2373 {
2374 hermon_workq_compare_t *cmpp;
2375 hermon_workq_avl_t *curr;
2376
2377 cmpp = (hermon_workq_compare_t *)p1;
2378 curr = (hermon_workq_avl_t *)p2;
2379
2380 if (cmpp->cmp_qpn < curr->wqa_qpn)
2381 return (-1);
2382 else if (cmpp->cmp_qpn > curr->wqa_qpn)
2383 return (+1);
2384 else if (cmpp->cmp_type < curr->wqa_type)
2385 return (-1);
2386 else if (cmpp->cmp_type > curr->wqa_type)
2387 return (+1);
2388 else
2389 return (0);
2390 }
2391
2392
2393 /*
2394 * hermon_wrid_workq_find()
2395 * Context: Can be called from interrupt or base context.
2396 */
2397 static hermon_workq_avl_t *
2398 hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, uint_t wq_type)
2399 {
2400 hermon_workq_avl_t *curr;
2401 hermon_workq_compare_t cmp;
2402
2403 /*
2404 * Walk the CQ's work queue list, trying to find a send or recv queue
2405 * with the same QP number. We do this even if we are going to later
2406 * create a new entry because it helps us easily find the end of the
2407 * list.
2408 */
2409 cmp.cmp_qpn = qpn;
2410 cmp.cmp_type = wq_type;
2411 curr = avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, NULL);
2412
2413 return (curr);
2414 }
2415
2416
2417 /*
2418 * hermon_wrid_wqhdr_create()
2419 * Context: Can be called from base context.
2420 */
2421 /* ARGSUSED */
2422 hermon_workq_hdr_t *
2423 hermon_wrid_wqhdr_create(int bufsz)
2424 {
2425 hermon_workq_hdr_t *wqhdr;
2426
2427 /*
2428 * Allocate space for the wqhdr, and an array to record all the wrids.
2429 */
2430 wqhdr = (hermon_workq_hdr_t *)kmem_zalloc(sizeof (*wqhdr), KM_NOSLEEP);
2431 if (wqhdr == NULL) {
2432 return (NULL);
2433 }
2434 wqhdr->wq_wrid = kmem_zalloc(bufsz * sizeof (uint64_t), KM_NOSLEEP);
2435 if (wqhdr->wq_wrid == NULL) {
2436 kmem_free(wqhdr, sizeof (*wqhdr));
2437 return (NULL);
2438 }
2439 wqhdr->wq_size = bufsz;
2440 wqhdr->wq_mask = bufsz - 1;
2441
2442 return (wqhdr);
2443 }
2444
2445 void
2446 hermon_wrid_wqhdr_destroy(hermon_workq_hdr_t *wqhdr)
2447 {
2448 kmem_free(wqhdr->wq_wrid, wqhdr->wq_size * sizeof (uint64_t));
2449 kmem_free(wqhdr, sizeof (*wqhdr));
2450 }
2451
2452
2453 /*
2454 * hermon_cq_workq_add()
2455 * Context: Can be called from interrupt or base context.
2456 */
2457 static void
2458 hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl)
2459 {
2460 hermon_workq_compare_t cmp;
2461 avl_index_t where;
2462
2463 cmp.cmp_qpn = wqavl->wqa_qpn;
2464 cmp.cmp_type = wqavl->wqa_type;
2465 (void) avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, &where);
2466 avl_insert(&cq->cq_wrid_wqhdr_avl_tree, wqavl, where);
2467 }
2468
2469
2470 /*
2471 * hermon_cq_workq_remove()
2472 * Context: Can be called from interrupt or base context.
2473 */
2474 static void
2475 hermon_cq_workq_remove(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl)
2476 {
2477 avl_remove(&cq->cq_wrid_wqhdr_avl_tree, wqavl);
2478 }