Print this page
8368 remove warlock leftovers from usr/src/uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/adapters/hermon/hermon_wr.c
+++ new/usr/src/uts/common/io/ib/adapters/hermon/hermon_wr.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * hermon_wr.c
28 28 * Hermon Work Request Processing Routines
29 29 *
30 30 * Implements all the routines necessary to provide the PostSend(),
31 31 * PostRecv() and PostSRQ() verbs. Also contains all the code
32 32 * necessary to implement the Hermon WRID tracking mechanism.
33 33 */
34 34
35 35 #include <sys/types.h>
36 36 #include <sys/conf.h>
37 37 #include <sys/ddi.h>
38 38 #include <sys/sunddi.h>
39 39 #include <sys/modctl.h>
40 40 #include <sys/avl.h>
41 41
42 42 #include <sys/ib/adapters/hermon/hermon.h>
43 43
44 44 static uint32_t hermon_wr_get_immediate(ibt_send_wr_t *wr);
45 45 static int hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr);
46 46 static int hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp,
47 47 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size);
48 48 static int hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp,
49 49 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size);
50 50 static void hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp);
51 51 static int hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp,
52 52 ibt_recv_wr_t *wr, uint64_t *desc);
53 53 static int hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq,
54 54 ibt_recv_wr_t *wr, uint64_t *desc);
55 55 static hermon_workq_avl_t *hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn,
56 56 uint_t send_or_recv);
57 57 static void hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl);
58 58 static void hermon_cq_workq_remove(hermon_cqhdl_t cq,
59 59 hermon_workq_avl_t *wqavl);
60 60
61 61 static ibt_wr_ds_t null_sgl = { 0, 0x00000100, 0 };
62 62
63 63 /*
64 64 * Add ability to try to debug RDMA_READ/RDMA_WRITE failures.
65 65 *
66 66 * 0x1 - print rkey used during post_send
67 67 * 0x2 - print sgls used during post_send
68 68 * 0x4 - print FMR comings and goings
69 69 */
70 70 int hermon_rdma_debug = 0x0;
71 71
72 72 static int
73 73 hermon_post_send_ud(hermon_state_t *state, hermon_qphdl_t qp,
74 74 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted)
75 75 {
76 76 hermon_hw_snd_wqe_ud_t *ud;
77 77 hermon_workq_hdr_t *wq;
78 78 hermon_ahhdl_t ah;
79 79 ibt_wr_rfci_send_t *rfci;
80 80 ibt_wr_init_send_t *is;
81 81 ibt_ud_dest_t *dest;
82 82 uint64_t *desc;
83 83 uint32_t desc_sz;
84 84 uint32_t signaled_dbd, solicited;
85 85 uint32_t head, tail, next_tail, qsize_msk;
86 86 uint32_t hdrmwqes;
87 87 uint32_t nopcode, fence, immed_data = 0;
88 88 hermon_hw_wqe_sgl_t *ds, *old_ds;
89 89 ibt_wr_ds_t *sgl;
90 90 int nds;
91 91 int i, j, last_ds, num_ds, status;
↓ open down ↓ |
91 lines elided |
↑ open up ↑ |
92 92 uint32_t *wqe_start;
93 93 int sectperwqe;
94 94 uint_t posted_cnt = 0;
95 95 int total_len, strong_order, fc_bits, cksum;
96 96
97 97
98 98 /* initialize the FMA retry loop */
99 99 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
100 100
101 101 ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
102 - _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock))
103 102
104 103 /* Grab the lock for the WRID list */
105 104 membar_consumer();
106 105
107 106 /* Save away some initial QP state */
108 107 wq = qp->qp_sq_wqhdr;
109 108 qsize_msk = wq->wq_mask;
110 109 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */
111 110 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2);
112 111
113 112 tail = wq->wq_tail;
114 113 head = wq->wq_head;
115 114 status = DDI_SUCCESS;
116 115
117 116 post_next:
118 117 /*
119 118 * Check for "queue full" condition. If the queue
120 119 * is already full, then no more WQEs can be posted.
121 120 * So break out, ring a doorbell (if necessary) and
122 121 * return an error
123 122 */
124 123 if (wq->wq_full != 0) {
125 124 status = IBT_QP_FULL;
126 125 goto done;
127 126 }
128 127
129 128 next_tail = (tail + 1) & qsize_msk;
130 129 if (((tail + hdrmwqes) & qsize_msk) == head) {
131 130 wq->wq_full = 1;
132 131 }
133 132
134 133 desc = HERMON_QP_SQ_ENTRY(qp, tail);
135 134
136 135 nds = wr->wr_nds;
137 136 sgl = wr->wr_sgl;
138 137 num_ds = 0;
139 138 strong_order = 0;
140 139 fc_bits = 0;
141 140 cksum = 0;
142 141
143 142 /*
144 143 * Build a Send or Send_LSO WQE
145 144 */
146 145 switch (wr->wr_opcode) {
147 146 case IBT_WRC_SEND_LSO:
148 147 if (wr->wr_trans != IBT_UD_SRV) {
149 148 status = IBT_QP_SRV_TYPE_INVALID;
150 149 goto done;
151 150 }
152 151 nopcode = HERMON_WQE_SEND_NOPCODE_LSO;
153 152 if (wr->wr_flags & IBT_WR_SEND_CKSUM)
154 153 cksum = 0x30;
155 154 if (wr->wr.ud_lso.lso_hdr_sz > 60) {
156 155 nopcode |= (1 << 6); /* ReRead bit must be set */
157 156 }
158 157 dest = wr->wr.ud_lso.lso_ud_dest;
159 158 ah = (hermon_ahhdl_t)dest->ud_ah;
160 159 if (ah == NULL) {
161 160 status = IBT_AH_HDL_INVALID;
162 161 goto done;
163 162 }
164 163 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc +
165 164 sizeof (hermon_hw_snd_wqe_ctrl_t));
166 165 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud +
167 166 sizeof (hermon_hw_snd_wqe_ud_t));
168 167 HERMON_WQE_BUILD_UD(qp, ud, ah, dest);
169 168
170 169 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf;
171 170 if ((uintptr_t)ds + total_len + (nds * 16) >
172 171 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz)) {
173 172 status = IBT_QP_SGL_LEN_INVALID;
174 173 goto done;
175 174 }
176 175 old_ds = ds;
177 176 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)old_ds + 1,
178 177 wr->wr.ud_lso.lso_hdr_sz);
179 178 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len);
180 179 i = 0;
181 180 break;
182 181
183 182 case IBT_WRC_SEND:
184 183 nopcode = HERMON_WQE_SEND_NOPCODE_SEND;
185 184 if (qp->qp_serv_type == HERMON_QP_UD) {
186 185 if (wr->wr_trans != IBT_UD_SRV) {
187 186 status = IBT_QP_SRV_TYPE_INVALID;
188 187 goto done;
189 188 }
190 189 if (wr->wr_flags & IBT_WR_SEND_CKSUM)
191 190 cksum = 0x30;
192 191 dest = wr->wr.ud.udwr_dest;
193 192 } else if (qp->qp_serv_type == HERMON_QP_RFCI) {
194 193 if (wr->wr_trans != IBT_RFCI_SRV) {
195 194 status = IBT_QP_SRV_TYPE_INVALID;
196 195 goto done;
197 196 }
198 197 rfci = &wr->wr.fc.rfci_send;
199 198 if ((wr->wr_flags & IBT_WR_SEND_FC_CRC) != 0) {
200 199 nopcode |= (rfci->rfci_eof << 16);
201 200 fc_bits = 0x40; /* set FCRC */
202 201 }
203 202 dest = rfci->rfci_dest;
204 203 } else {
205 204 status = IBT_QP_OP_TYPE_INVALID;
206 205 goto done;
207 206 }
208 207 if (wr->wr_flags & IBT_WR_SEND_IMMED) {
209 208 /* "|=" changes 0xa to 0xb without touching FCEOF */
210 209 nopcode |= HERMON_WQE_SEND_NOPCODE_SENDI;
211 210 immed_data = wr->wr.ud.udwr_immed;
212 211 }
213 212 ah = (hermon_ahhdl_t)dest->ud_ah;
214 213 if (ah == NULL) {
215 214 status = IBT_AH_HDL_INVALID;
216 215 goto done;
217 216 }
218 217 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc +
219 218 sizeof (hermon_hw_snd_wqe_ctrl_t));
220 219 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud +
221 220 sizeof (hermon_hw_snd_wqe_ud_t));
222 221 HERMON_WQE_BUILD_UD(qp, ud, ah, dest);
223 222 i = 0;
224 223 break;
225 224
226 225 case IBT_WRC_INIT_SEND_FCMD:
227 226 if (qp->qp_serv_type != HERMON_QP_FCMND) {
228 227 status = IBT_QP_OP_TYPE_INVALID;
229 228 goto done;
230 229 }
231 230 if (wr->wr_trans != IBT_FCMD_SRV) {
232 231 status = IBT_QP_SRV_TYPE_INVALID;
233 232 goto done;
234 233 }
235 234 nopcode = HERMON_WQE_FCP_OPCODE_INIT_AND_SEND;
236 235 is = wr->wr.fc.fc_is;
237 236 dest = is->is_ctl.fc_dest;
238 237 ah = (hermon_ahhdl_t)dest->ud_ah;
239 238 if (ah == NULL) {
240 239 status = IBT_AH_HDL_INVALID;
241 240 goto done;
242 241 }
243 242 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc +
244 243 sizeof (hermon_hw_snd_wqe_ctrl_t));
245 244 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud +
246 245 sizeof (hermon_hw_snd_wqe_ud_t));
247 246 HERMON_WQE_BUILD_UD(qp, ud, ah, dest);
248 247 old_ds = ds;
249 248 /* move ds beyond the FCP-3 Init Segment */
250 249 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + 0x10);
251 250 i = 0;
252 251 break;
253 252
254 253 case IBT_WRC_FAST_REG_PMR:
255 254 {
256 255 hermon_hw_snd_wqe_frwr_t *frwr;
257 256
258 257 if (qp->qp_serv_type != HERMON_QP_FCMND) {
259 258 status = IBT_QP_OP_TYPE_INVALID;
260 259 goto done;
261 260 }
262 261 if (wr->wr_trans != IBT_FCMD_SRV) {
263 262 status = IBT_QP_SRV_TYPE_INVALID;
264 263 goto done;
265 264 }
266 265 nopcode = HERMON_WQE_SEND_NOPCODE_FRWR;
267 266 frwr = (hermon_hw_snd_wqe_frwr_t *)((uintptr_t)desc +
268 267 sizeof (hermon_hw_snd_wqe_ctrl_t));
269 268 HERMON_WQE_BUILD_FRWR(qp, frwr, wr->wr.fc.reg_pmr);
270 269 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)frwr +
271 270 sizeof (hermon_hw_snd_wqe_frwr_t));
272 271 nds = 0;
273 272 strong_order = 0x80;
274 273 break;
275 274 }
276 275
277 276 #if 0
278 277 /* firmware does not support this */
279 278 case IBT_WRC_LOCAL_INVALIDATE:
280 279 {
281 280 hermon_hw_snd_wqe_local_inv_t *li;
282 281
283 282 if (qp->qp_serv_type != HERMON_QP_FCMND) {
284 283 status = IBT_QP_OP_TYPE_INVALID;
285 284 goto done;
286 285 }
287 286 if (wr->wr_trans != IBT_FCMD_SRV) {
288 287 status = IBT_QP_SRV_TYPE_INVALID;
289 288 goto done;
290 289 }
291 290 nopcode = HERMON_WQE_SEND_NOPCODE_LCL_INV;
292 291 li = (hermon_hw_snd_wqe_local_inv_t *)((uintptr_t)desc +
293 292 sizeof (hermon_hw_snd_wqe_ctrl_t));
294 293 HERMON_WQE_BUILD_LI(qp, li, wr->wr.fc.li);
295 294 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)li +
296 295 sizeof (hermon_hw_snd_wqe_local_inv_t));
297 296 nds = 0;
298 297 strong_order = 0x80;
299 298 break;
300 299 }
301 300 #endif
302 301 default:
303 302 status = IBT_QP_OP_TYPE_INVALID;
304 303 goto done;
305 304 }
306 305
307 306 if (nds > qp->qp_sq_sgl) {
308 307 status = IBT_QP_SGL_LEN_INVALID;
309 308 goto done;
310 309 }
311 310 for (last_ds = num_ds, j = i; j < nds; j++) {
312 311 if (sgl[j].ds_len != 0)
313 312 last_ds++; /* real last ds of wqe to fill */
314 313 }
315 314 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4;
316 315 for (j = nds; --j >= i; ) {
317 316 if (sgl[j].ds_len == 0) {
318 317 continue;
319 318 }
320 319
321 320 /*
322 321 * Fill in the Data Segment(s) for the current WQE, using the
323 322 * information contained in the scatter-gather list of the
324 323 * work request.
325 324 */
326 325 last_ds--;
327 326 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]);
328 327 }
329 328
330 329 membar_producer();
331 330
332 331 if (wr->wr_opcode == IBT_WRC_SEND_LSO) {
333 332 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss,
334 333 wr->wr.ud_lso.lso_hdr_sz);
335 334 } else if (wr->wr_opcode == IBT_WRC_INIT_SEND_FCMD) {
336 335 /* This sits in the STAMP, so must be set after setting SGL */
337 336 HERMON_WQE_BUILD_FCP3_INIT(old_ds, is->is_ctl.fc_frame_ctrl,
338 337 is->is_cs_priority, is->is_tx_seq_id, is->is_fc_mtu,
339 338 is->is_dest_id, is->is_op, is->is_rem_exch,
340 339 is->is_exch_qp_idx);
341 340
342 341 /* The following will be used in HERMON_WQE_SET_CTRL_SEGMENT */
343 342 /* SIT bit in FCP-3 ctrl segment */
344 343 desc_sz |= (is->is_ctl.fc_frame_ctrl & IBT_FCTL_SIT) ? 0x80 : 0;
345 344 /* LS bit in FCP-3 ctrl segment */
346 345 fc_bits |= (is->is_ctl.fc_frame_ctrl & IBT_FCTL_LAST_SEQ) ?
347 346 0x10000 : 0;
348 347 fc_bits |= ((is->is_ctl.fc_routing_ctrl & 0xF) << 20) |
349 348 (is->is_ctl.fc_seq_id << 24);
350 349 immed_data = is->is_ctl.fc_parameter;
351 350 }
352 351
353 352 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0;
354 353
355 354 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) ||
356 355 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 0xC : 0;
357 356
358 357 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 0x2 : 0;
359 358
360 359 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data,
361 360 solicited, signaled_dbd, cksum, qp, strong_order, fc_bits);
362 361
363 362 wq->wq_wrid[tail] = wr->wr_id;
364 363
365 364 tail = next_tail;
366 365
367 366 /* Update some of the state in the QP */
368 367 wq->wq_tail = tail;
369 368
370 369 membar_producer();
371 370
372 371 /* Now set the ownership bit and opcode (first dword). */
373 372 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode);
374 373
375 374 posted_cnt++;
376 375 if (--num_wr > 0) {
377 376 /* do the invalidate of the headroom */
378 377 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
379 378 (tail + hdrmwqes) & qsize_msk);
380 379 for (i = 16; i < sectperwqe; i += 16) {
381 380 wqe_start[i] = 0xFFFFFFFF;
382 381 }
383 382
384 383 wr++;
385 384 goto post_next;
386 385 }
387 386 done:
388 387 if (posted_cnt != 0) {
389 388 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
390 389
391 390 membar_producer();
392 391
393 392 /* the FMA retry loop starts for Hermon doorbell register. */
394 393 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt,
395 394 fm_status, fm_test_num);
396 395
397 396 HERMON_UAR_DOORBELL(state, uarhdl,
398 397 (uint64_t *)(void *)&state->hs_uar->send,
399 398 (uint64_t)qp->qp_ring);
400 399
401 400 /* the FMA retry loop ends. */
402 401 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt,
403 402 fm_status, fm_test_num);
404 403
405 404 /* do the invalidate of the headroom */
406 405 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
407 406 (tail + hdrmwqes) & qsize_msk);
408 407 for (i = 16; i < sectperwqe; i += 16) {
409 408 wqe_start[i] = 0xFFFFFFFF;
410 409 }
411 410 }
412 411 if (num_posted != NULL)
413 412 *num_posted = posted_cnt;
414 413
415 414 mutex_exit(&qp->qp_sq_lock);
416 415
417 416 return (status);
418 417
419 418 pio_error:
420 419 mutex_exit(&qp->qp_sq_lock);
421 420 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
422 421 return (ibc_get_ci_failure(0));
423 422 }
424 423
425 424 static int
426 425 hermon_post_send_rc(hermon_state_t *state, hermon_qphdl_t qp,
427 426 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted)
428 427 {
429 428 uint64_t *desc;
430 429 hermon_workq_hdr_t *wq;
431 430 uint32_t desc_sz;
432 431 uint32_t signaled_dbd, solicited;
433 432 uint32_t head, tail, next_tail, qsize_msk;
434 433 uint32_t hdrmwqes;
435 434 int status;
436 435 uint32_t nopcode, fence, immed_data = 0;
437 436 hermon_hw_snd_wqe_remaddr_t *rc;
438 437 hermon_hw_snd_wqe_atomic_t *at;
439 438 hermon_hw_snd_wqe_bind_t *bn;
440 439 hermon_hw_snd_wqe_frwr_t *frwr;
441 440 hermon_hw_snd_wqe_local_inv_t *li;
442 441 hermon_hw_wqe_sgl_t *ds;
443 442 ibt_wr_ds_t *sgl;
444 443 int nds;
445 444 int i, last_ds, num_ds;
446 445 uint32_t *wqe_start;
447 446 int sectperwqe;
448 447 uint_t posted_cnt = 0;
↓ open down ↓ |
336 lines elided |
↑ open up ↑ |
449 448 int strong_order;
450 449 int print_rdma;
451 450 int rlen;
452 451 uint32_t rkey;
453 452 uint64_t raddr;
454 453
455 454 /* initialize the FMA retry loop */
456 455 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
457 456
458 457 ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
459 - _NOTE(LOCK_RELEASED_AS_SIDE_EFFECT(&qp->qp_sq_lock))
460 458
461 459 /* Save away some initial QP state */
462 460 wq = qp->qp_sq_wqhdr;
463 461 qsize_msk = wq->wq_mask;
464 462 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */
465 463 sectperwqe = 1 << (qp->qp_sq_log_wqesz - 2);
466 464
467 465 tail = wq->wq_tail;
468 466 head = wq->wq_head;
469 467 status = DDI_SUCCESS;
470 468
471 469 post_next:
472 470 print_rdma = 0;
473 471 rlen = 0;
474 472 strong_order = 0;
475 473
476 474 /*
477 475 * Check for "queue full" condition. If the queue
478 476 * is already full, then no more WQEs can be posted.
479 477 * So break out, ring a doorbell (if necessary) and
480 478 * return an error
481 479 */
482 480 if (wq->wq_full != 0) {
483 481 status = IBT_QP_FULL;
484 482 goto done;
485 483 }
486 484 next_tail = (tail + 1) & qsize_msk;
487 485 if (((tail + hdrmwqes) & qsize_msk) == head) {
488 486 wq->wq_full = 1;
489 487 }
490 488
491 489 desc = HERMON_QP_SQ_ENTRY(qp, tail);
492 490
493 491 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
494 492 sizeof (hermon_hw_snd_wqe_ctrl_t));
495 493 nds = wr->wr_nds;
496 494 sgl = wr->wr_sgl;
497 495 num_ds = 0;
498 496 if (wr->wr_trans != IBT_RC_SRV) {
499 497 status = IBT_QP_SRV_TYPE_INVALID;
500 498 goto done;
501 499 }
502 500
503 501 /*
504 502 * Validate the operation type. For RC requests, we allow
505 503 * "Send", "RDMA Read", "RDMA Write", various "Atomic"
506 504 * operations, and memory window "Bind"
507 505 */
508 506 switch (wr->wr_opcode) {
509 507 default:
510 508 status = IBT_QP_OP_TYPE_INVALID;
511 509 goto done;
512 510
513 511 case IBT_WRC_SEND:
514 512 if (wr->wr_flags & IBT_WR_SEND_REMOTE_INVAL) {
515 513 nopcode = HERMON_WQE_SEND_NOPCODE_SND_INV;
516 514 immed_data = wr->wr.rc.rcwr.send_inval;
517 515 } else if (wr->wr_flags & IBT_WR_SEND_IMMED) {
518 516 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI;
519 517 immed_data = wr->wr.rc.rcwr.send_immed;
520 518 } else {
521 519 nopcode = HERMON_WQE_SEND_NOPCODE_SEND;
522 520 }
523 521 break;
524 522
525 523 /*
526 524 * If this is an RDMA Read or RDMA Write request, then fill
527 525 * in the "Remote Address" header fields.
528 526 */
529 527 case IBT_WRC_RDMAW:
530 528 if (wr->wr_flags & IBT_WR_SEND_IMMED) {
531 529 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAWI;
532 530 immed_data = wr->wr.rc.rcwr.rdma.rdma_immed;
533 531 } else {
534 532 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW;
535 533 }
536 534 /* FALLTHROUGH */
537 535 case IBT_WRC_RDMAR:
538 536 if (wr->wr_opcode == IBT_WRC_RDMAR)
539 537 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR;
540 538 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
541 539 sizeof (hermon_hw_snd_wqe_ctrl_t));
542 540
543 541 /*
544 542 * Build the Remote Address Segment for the WQE, using
545 543 * the information from the RC work request.
546 544 */
547 545 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma);
548 546
549 547 if (hermon_rdma_debug) {
550 548 print_rdma = hermon_rdma_debug;
551 549 rkey = wr->wr.rc.rcwr.rdma.rdma_rkey;
552 550 raddr = wr->wr.rc.rcwr.rdma.rdma_raddr;
553 551 }
554 552
555 553 /* Update "ds" for filling in Data Segments (below) */
556 554 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc +
557 555 sizeof (hermon_hw_snd_wqe_remaddr_t));
558 556 break;
559 557
560 558 /*
561 559 * If this is one of the Atomic type operations (i.e
562 560 * Compare-Swap or Fetch-Add), then fill in both the "Remote
563 561 * Address" header fields and the "Atomic" header fields.
564 562 */
565 563 case IBT_WRC_CSWAP:
566 564 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS;
567 565 /* FALLTHROUGH */
568 566 case IBT_WRC_FADD:
569 567 if (wr->wr_opcode == IBT_WRC_FADD)
570 568 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA;
571 569 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
572 570 sizeof (hermon_hw_snd_wqe_ctrl_t));
573 571 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc +
574 572 sizeof (hermon_hw_snd_wqe_remaddr_t));
575 573
576 574 /*
577 575 * Build the Remote Address and Atomic Segments for
578 576 * the WQE, using the information from the RC Atomic
579 577 * work request.
580 578 */
581 579 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr);
582 580 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic);
583 581
584 582 /* Update "ds" for filling in Data Segments (below) */
585 583 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at +
586 584 sizeof (hermon_hw_snd_wqe_atomic_t));
587 585
588 586 /*
589 587 * Update "nds" and "sgl" because Atomic requests have
590 588 * only a single Data Segment.
591 589 */
592 590 nds = 1;
593 591 sgl = wr->wr_sgl;
594 592 break;
595 593
596 594 /*
597 595 * If this is memory window Bind operation, then we call the
598 596 * hermon_wr_bind_check() routine to validate the request and
599 597 * to generate the updated RKey. If this is successful, then
600 598 * we fill in the WQE's "Bind" header fields.
601 599 */
602 600 case IBT_WRC_BIND:
603 601 nopcode = HERMON_WQE_SEND_NOPCODE_BIND;
604 602 status = hermon_wr_bind_check(state, wr);
605 603 if (status != DDI_SUCCESS)
606 604 goto done;
607 605
608 606 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc +
609 607 sizeof (hermon_hw_snd_wqe_ctrl_t));
610 608
611 609 /*
612 610 * Build the Bind Memory Window Segments for the WQE,
613 611 * using the information from the RC Bind memory
614 612 * window work request.
615 613 */
616 614 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind);
617 615
618 616 /*
619 617 * Update the "ds" pointer. Even though the "bind"
620 618 * operation requires no SGLs, this is necessary to
621 619 * facilitate the correct descriptor size calculations
622 620 * (below).
623 621 */
624 622 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn +
625 623 sizeof (hermon_hw_snd_wqe_bind_t));
626 624 nds = 0;
627 625 break;
628 626
629 627 case IBT_WRC_FAST_REG_PMR:
630 628 nopcode = HERMON_WQE_SEND_NOPCODE_FRWR;
631 629 frwr = (hermon_hw_snd_wqe_frwr_t *)((uintptr_t)desc +
632 630 sizeof (hermon_hw_snd_wqe_ctrl_t));
633 631 HERMON_WQE_BUILD_FRWR(qp, frwr, wr->wr.rc.rcwr.reg_pmr);
634 632 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)frwr +
635 633 sizeof (hermon_hw_snd_wqe_frwr_t));
636 634 nds = 0;
637 635 strong_order = 0x80;
638 636 break;
639 637
640 638 case IBT_WRC_LOCAL_INVALIDATE:
641 639 nopcode = HERMON_WQE_SEND_NOPCODE_LCL_INV;
642 640 li = (hermon_hw_snd_wqe_local_inv_t *)((uintptr_t)desc +
643 641 sizeof (hermon_hw_snd_wqe_ctrl_t));
644 642 HERMON_WQE_BUILD_LI(qp, li, wr->wr.rc.rcwr.li);
645 643 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)li +
646 644 sizeof (hermon_hw_snd_wqe_local_inv_t));
647 645 nds = 0;
648 646 strong_order = 0x80;
649 647 break;
650 648 }
651 649
652 650 /*
653 651 * Now fill in the Data Segments (SGL) for the Send WQE based
654 652 * on the values setup above (i.e. "sgl", "nds", and the "ds"
655 653 * pointer. Start by checking for a valid number of SGL entries
656 654 */
657 655 if (nds > qp->qp_sq_sgl) {
658 656 status = IBT_QP_SGL_LEN_INVALID;
659 657 goto done;
660 658 }
661 659
662 660 for (last_ds = num_ds, i = 0; i < nds; i++) {
663 661 if (sgl[i].ds_len != 0)
664 662 last_ds++; /* real last ds of wqe to fill */
665 663 }
666 664 desc_sz = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc) >> 0x4;
667 665 for (i = nds; --i >= 0; ) {
668 666 if (sgl[i].ds_len == 0) {
669 667 continue;
670 668 }
671 669 rlen += sgl[i].ds_len;
672 670 if (print_rdma & 0x2)
673 671 IBTF_DPRINTF_L2("rdma", "post: [%d]: laddr %llx "
674 672 "llen %x", i, sgl[i].ds_va, sgl[i].ds_len);
675 673
676 674 /*
677 675 * Fill in the Data Segment(s) for the current WQE, using the
678 676 * information contained in the scatter-gather list of the
679 677 * work request.
680 678 */
681 679 last_ds--;
682 680 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[i]);
683 681 }
684 682 /* ensure RDMA READ does not exceed HCA limit */
685 683 if ((wr->wr_opcode == IBT_WRC_RDMAR) && (desc_sz >
686 684 state->hs_ibtfinfo.hca_attr->hca_conn_rdma_read_sgl_sz + 2)) {
687 685 status = IBT_QP_SGL_LEN_INVALID;
688 686 goto done;
689 687 }
690 688
691 689 if (print_rdma & 0x1) {
692 690 IBTF_DPRINTF_L2("rdma", "post: indx %x rkey %x raddr %llx "
693 691 "total len %x", tail, rkey, raddr, rlen);
694 692 }
695 693
696 694 fence = (wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0;
697 695
698 696 signaled_dbd = ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) ||
699 697 (wr->wr_flags & IBT_WR_SEND_SIGNAL)) ? 0xC : 0;
700 698
701 699 solicited = (wr->wr_flags & IBT_WR_SEND_SOLICIT) ? 0x2 : 0;
702 700
703 701 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz, fence, immed_data, solicited,
704 702 signaled_dbd, 0, qp, strong_order, 0);
705 703
706 704 wq->wq_wrid[tail] = wr->wr_id;
707 705
708 706 tail = next_tail;
709 707
710 708 /* Update some of the state in the QP */
711 709 wq->wq_tail = tail;
712 710
713 711 membar_producer();
714 712
715 713 /* Now set the ownership bit of the first one in the chain. */
716 714 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)desc, nopcode);
717 715
718 716 posted_cnt++;
719 717 if (--num_wr > 0) {
720 718 /* do the invalidate of the headroom */
721 719 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
722 720 (tail + hdrmwqes) & qsize_msk);
723 721 for (i = 16; i < sectperwqe; i += 16) {
724 722 wqe_start[i] = 0xFFFFFFFF;
725 723 }
726 724
727 725 wr++;
728 726 goto post_next;
729 727 }
730 728 done:
731 729
732 730 if (posted_cnt != 0) {
733 731 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
734 732
735 733 membar_producer();
736 734
737 735 /* the FMA retry loop starts for Hermon doorbell register. */
738 736 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt,
739 737 fm_status, fm_test_num);
740 738
741 739 /* Ring the doorbell */
742 740 HERMON_UAR_DOORBELL(state, uarhdl,
743 741 (uint64_t *)(void *)&state->hs_uar->send,
744 742 (uint64_t)qp->qp_ring);
745 743
746 744 /* the FMA retry loop ends. */
747 745 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt,
748 746 fm_status, fm_test_num);
749 747
750 748 /* do the invalidate of the headroom */
751 749 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp,
752 750 (tail + hdrmwqes) & qsize_msk);
753 751 for (i = 16; i < sectperwqe; i += 16) {
754 752 wqe_start[i] = 0xFFFFFFFF;
755 753 }
756 754 }
757 755 /*
758 756 * Update the "num_posted" return value (if necessary).
759 757 * Then drop the locks and return success.
760 758 */
761 759 if (num_posted != NULL) {
762 760 *num_posted = posted_cnt;
763 761 }
764 762
765 763 mutex_exit(&qp->qp_sq_lock);
766 764 return (status);
767 765
768 766 pio_error:
769 767 mutex_exit(&qp->qp_sq_lock);
770 768 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
771 769 return (ibc_get_ci_failure(0));
772 770 }
773 771
774 772 /*
775 773 * hermon_post_send()
776 774 * Context: Can be called from interrupt or base context.
777 775 */
778 776 int
779 777 hermon_post_send(hermon_state_t *state, hermon_qphdl_t qp,
780 778 ibt_send_wr_t *wr, uint_t num_wr, uint_t *num_posted)
781 779 {
782 780 ibt_send_wr_t *curr_wr;
783 781 hermon_workq_hdr_t *wq;
784 782 hermon_ahhdl_t ah;
785 783 uint64_t *desc, *prev;
786 784 uint32_t desc_sz;
787 785 uint32_t signaled_dbd, solicited;
788 786 uint32_t head, tail, next_tail, qsize_msk;
789 787 uint32_t hdrmwqes;
790 788 uint_t currindx, wrindx, numremain;
791 789 uint_t chainlen;
792 790 uint_t posted_cnt, maxstat;
793 791 uint_t total_posted;
794 792 int status;
795 793 uint32_t nopcode, fence, immed_data = 0;
796 794 uint32_t prev_nopcode;
797 795 uint_t qp_state;
798 796
799 797 /* initialize the FMA retry loop */
800 798 hermon_pio_init(fm_loop_cnt, fm_status, fm_test);
801 799
802 800 /*
803 801 * Check for user-mappable QP memory. Note: We do not allow kernel
804 802 * clients to post to QP memory that is accessible directly by the
805 803 * user. If the QP memory is user accessible, then return an error.
806 804 */
807 805 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) {
808 806 return (IBT_QP_HDL_INVALID);
809 807 }
810 808
811 809 mutex_enter(&qp->qp_sq_lock);
812 810
813 811 /*
814 812 * Check QP state. Can not post Send requests from the "Reset",
815 813 * "Init", or "RTR" states
816 814 */
817 815 qp_state = qp->qp_state_for_post_send;
818 816 if ((qp_state == HERMON_QP_RESET) ||
819 817 (qp_state == HERMON_QP_INIT) ||
820 818 (qp_state == HERMON_QP_RTR)) {
821 819 mutex_exit(&qp->qp_sq_lock);
822 820 return (IBT_QP_STATE_INVALID);
823 821 }
824 822
825 823 if (qp->qp_is_special)
826 824 goto post_many;
827 825
828 826 /* Use these optimized functions most of the time */
829 827 if (qp->qp_type == IBT_UD_RQP) {
830 828 return (hermon_post_send_ud(state, qp, wr, num_wr, num_posted));
831 829 }
832 830
833 831 if (qp->qp_serv_type == HERMON_QP_RC) {
834 832 return (hermon_post_send_rc(state, qp, wr, num_wr, num_posted));
835 833 }
836 834
837 835 if (qp->qp_serv_type == HERMON_QP_UC)
838 836 goto post_many;
839 837
840 838 mutex_exit(&qp->qp_sq_lock);
841 839 return (IBT_QP_SRV_TYPE_INVALID);
842 840
843 841 post_many:
844 842 /* general loop for non-optimized posting */
845 843
846 844 /* Save away some initial QP state */
847 845 wq = qp->qp_sq_wqhdr;
848 846 qsize_msk = wq->wq_mask;
849 847 tail = wq->wq_tail;
850 848 head = wq->wq_head;
851 849 hdrmwqes = qp->qp_sq_hdrmwqes; /* in WQEs */
852 850
853 851 /* Initialize posted_cnt */
854 852 posted_cnt = 0;
855 853 total_posted = 0;
856 854
857 855 /*
858 856 * For each ibt_send_wr_t in the wr[] list passed in, parse the
859 857 * request and build a Send WQE. NOTE: Because we are potentially
860 858 * building a chain of WQEs to post, we want to build them all first,
861 859 * and set the valid (HW Ownership) bit on all but the first.
862 860 * However, we do not want to validate the first one until the
863 861 * entire chain of WQEs has been built. Then in the final
864 862 * we set the valid bit in the first, flush if needed, and as a last
865 863 * step ring the appropriate doorbell. NOTE: the doorbell ring may
866 864 * NOT be needed if the HCA is already processing, but the doorbell
867 865 * ring will be done regardless. NOTE ALSO: It is possible for
868 866 * more Work Requests to be posted than the HW will support at one
869 867 * shot. If this happens, we need to be able to post and ring
870 868 * several chains here until the the entire request is complete.
871 869 * NOTE ALSO: the term "chain" is used to differentiate it from
872 870 * Work Request List passed in; and because that's the terminology
873 871 * from the previous generations of HCA - but the WQEs are not, in fact
874 872 * chained together for Hermon
875 873 */
876 874
877 875 wrindx = 0;
878 876 numremain = num_wr;
879 877 status = DDI_SUCCESS;
880 878 while ((wrindx < num_wr) && (status == DDI_SUCCESS)) {
881 879 /*
882 880 * For the first WQE on a new chain we need "prev" to point
883 881 * to the current descriptor.
884 882 */
885 883 prev = HERMON_QP_SQ_ENTRY(qp, tail);
886 884
887 885 /*
888 886 * Break the request up into lists that are less than or
889 887 * equal to the maximum number of WQEs that can be posted
890 888 * per doorbell ring - 256 currently
891 889 */
892 890 chainlen = (numremain > HERMON_QP_MAXDESC_PER_DB) ?
893 891 HERMON_QP_MAXDESC_PER_DB : numremain;
894 892 numremain -= chainlen;
895 893
896 894 for (currindx = 0; currindx < chainlen; currindx++, wrindx++) {
897 895 /*
898 896 * Check for "queue full" condition. If the queue
899 897 * is already full, then no more WQEs can be posted.
900 898 * So break out, ring a doorbell (if necessary) and
901 899 * return an error
902 900 */
903 901 if (wq->wq_full != 0) {
904 902 status = IBT_QP_FULL;
905 903 break;
906 904 }
907 905
908 906 /*
909 907 * Increment the "tail index". Check for "queue
910 908 * full" condition incl. headroom. If we detect that
911 909 * the current work request is going to fill the work
912 910 * queue, then we mark this condition and continue.
913 911 * Don't need >=, because going one-by-one we have to
914 912 * hit it exactly sooner or later
915 913 */
916 914
917 915 next_tail = (tail + 1) & qsize_msk;
918 916 if (((tail + hdrmwqes) & qsize_msk) == head) {
919 917 wq->wq_full = 1;
920 918 }
921 919
922 920 /*
923 921 * Get the address of the location where the next
924 922 * Send WQE should be built
925 923 */
926 924 desc = HERMON_QP_SQ_ENTRY(qp, tail);
927 925 /*
928 926 * Call hermon_wqe_send_build() to build the WQE
929 927 * at the given address. This routine uses the
930 928 * information in the ibt_send_wr_t list (wr[]) and
931 929 * returns the size of the WQE when it returns.
932 930 */
933 931 status = hermon_wqe_send_build(state, qp,
934 932 &wr[wrindx], desc, &desc_sz);
935 933 if (status != DDI_SUCCESS) {
936 934 break;
937 935 }
938 936
939 937 /*
940 938 * Now, build the Ctrl Segment based on
941 939 * what was just done
942 940 */
943 941 curr_wr = &wr[wrindx];
944 942
945 943 switch (curr_wr->wr_opcode) {
946 944 case IBT_WRC_RDMAW:
947 945 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) {
948 946 nopcode =
949 947 HERMON_WQE_SEND_NOPCODE_RDMAWI;
950 948 immed_data =
951 949 hermon_wr_get_immediate(curr_wr);
952 950 } else {
953 951 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAW;
954 952 }
955 953 break;
956 954
957 955 case IBT_WRC_SEND:
958 956 if (curr_wr->wr_flags & IBT_WR_SEND_IMMED) {
959 957 nopcode = HERMON_WQE_SEND_NOPCODE_SENDI;
960 958 immed_data =
961 959 hermon_wr_get_immediate(curr_wr);
962 960 } else {
963 961 nopcode = HERMON_WQE_SEND_NOPCODE_SEND;
964 962 }
965 963 break;
966 964
967 965 case IBT_WRC_SEND_LSO:
968 966 nopcode = HERMON_WQE_SEND_NOPCODE_LSO;
969 967 break;
970 968
971 969 case IBT_WRC_RDMAR:
972 970 nopcode = HERMON_WQE_SEND_NOPCODE_RDMAR;
973 971 break;
974 972
975 973 case IBT_WRC_CSWAP:
976 974 nopcode = HERMON_WQE_SEND_NOPCODE_ATMCS;
977 975 break;
978 976
979 977 case IBT_WRC_FADD:
980 978 nopcode = HERMON_WQE_SEND_NOPCODE_ATMFA;
981 979 break;
982 980
983 981 case IBT_WRC_BIND:
984 982 nopcode = HERMON_WQE_SEND_NOPCODE_BIND;
985 983 break;
986 984 }
987 985
988 986 fence = (curr_wr->wr_flags & IBT_WR_SEND_FENCE) ? 1 : 0;
989 987
990 988 /*
991 989 * now, build up the control segment, leaving the
992 990 * owner bit as it is
993 991 */
994 992
995 993 if ((qp->qp_sq_sigtype == HERMON_QP_SQ_ALL_SIGNALED) ||
996 994 (curr_wr->wr_flags & IBT_WR_SEND_SIGNAL)) {
997 995 signaled_dbd = 0xC;
998 996 } else {
999 997 signaled_dbd = 0;
1000 998 }
1001 999 if (curr_wr->wr_flags & IBT_WR_SEND_SOLICIT)
1002 1000 solicited = 0x2;
1003 1001 else
1004 1002 solicited = 0;
1005 1003
1006 1004 if (qp->qp_is_special) {
1007 1005 /* Ensure correctness, set the ReRead bit */
1008 1006 nopcode |= (1 << 6);
1009 1007 ah = (hermon_ahhdl_t)
1010 1008 curr_wr->wr.ud.udwr_dest->ud_ah;
1011 1009 mutex_enter(&ah->ah_lock);
1012 1010 maxstat = ah->ah_udav->max_stat_rate;
1013 1011 HERMON_WQE_SET_MLX_CTRL_SEGMENT(desc, desc_sz,
1014 1012 signaled_dbd, maxstat, ah->ah_udav->rlid,
1015 1013 qp, ah->ah_udav->sl);
1016 1014 mutex_exit(&ah->ah_lock);
1017 1015 } else {
1018 1016 HERMON_WQE_SET_CTRL_SEGMENT(desc, desc_sz,
1019 1017 fence, immed_data, solicited,
1020 1018 signaled_dbd, 0, qp, 0, 0);
1021 1019 }
1022 1020 wq->wq_wrid[tail] = curr_wr->wr_id;
1023 1021
1024 1022 /*
1025 1023 * If this is not the first descriptor on the current
1026 1024 * chain, then set the ownership bit.
1027 1025 */
1028 1026 if (currindx != 0) { /* not the first */
1029 1027 membar_producer();
1030 1028 HERMON_SET_SEND_WQE_OWNER(qp,
1031 1029 (uint32_t *)desc, nopcode);
1032 1030 } else
1033 1031 prev_nopcode = nopcode;
1034 1032
1035 1033 /*
1036 1034 * Update the current "tail index" and increment
1037 1035 * "posted_cnt"
1038 1036 */
1039 1037 tail = next_tail;
1040 1038 posted_cnt++;
1041 1039 }
1042 1040
1043 1041 /*
1044 1042 * If we reach here and there are one or more WQEs which have
1045 1043 * been successfully built as a chain, we have to finish up
1046 1044 * and prepare them for writing to the HW
1047 1045 * The steps are:
1048 1046 * 1. do the headroom fixup
1049 1047 * 2. add in the size of the headroom for the sync
1050 1048 * 3. write the owner bit for the first WQE
1051 1049 * 4. sync them
1052 1050 * 5. fix up the structures
1053 1051 * 6. hit the doorbell in UAR
1054 1052 */
1055 1053 if (posted_cnt != 0) {
1056 1054 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
1057 1055
1058 1056 /* do the invalidate of the headroom */
1059 1057
1060 1058 hermon_wqe_headroom(tail, qp);
1061 1059
1062 1060 /* Update some of the state in the QP */
1063 1061 wq->wq_tail = tail;
1064 1062 total_posted += posted_cnt;
1065 1063 posted_cnt = 0;
1066 1064
1067 1065 membar_producer();
1068 1066
1069 1067 /*
1070 1068 * Now set the ownership bit of the first
1071 1069 * one in the chain
1072 1070 */
1073 1071 HERMON_SET_SEND_WQE_OWNER(qp, (uint32_t *)prev,
1074 1072 prev_nopcode);
1075 1073
1076 1074 /* the FMA retry loop starts for Hermon doorbell. */
1077 1075 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt,
1078 1076 fm_status, fm_test);
1079 1077
1080 1078 HERMON_UAR_DOORBELL(state, uarhdl,
1081 1079 (uint64_t *)(void *)&state->hs_uar->send,
1082 1080 (uint64_t)qp->qp_ring);
1083 1081
1084 1082 /* the FMA retry loop ends. */
1085 1083 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt,
1086 1084 fm_status, fm_test);
1087 1085 }
1088 1086 }
1089 1087
1090 1088 /*
1091 1089 * Update the "num_posted" return value (if necessary).
1092 1090 * Then drop the locks and return success.
1093 1091 */
1094 1092 if (num_posted != NULL) {
1095 1093 *num_posted = total_posted;
1096 1094 }
1097 1095 mutex_exit(&qp->qp_sq_lock);
1098 1096 return (status);
1099 1097
1100 1098 pio_error:
1101 1099 mutex_exit(&qp->qp_sq_lock);
1102 1100 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1103 1101 return (ibc_get_ci_failure(0));
1104 1102 }
1105 1103
1106 1104
1107 1105 /*
1108 1106 * hermon_post_recv()
1109 1107 * Context: Can be called from interrupt or base context.
1110 1108 */
1111 1109 int
1112 1110 hermon_post_recv(hermon_state_t *state, hermon_qphdl_t qp,
1113 1111 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted)
1114 1112 {
1115 1113 uint64_t *desc;
1116 1114 hermon_workq_hdr_t *wq;
1117 1115 uint32_t head, tail, next_tail, qsize_msk;
1118 1116 uint_t wrindx;
1119 1117 uint_t posted_cnt;
1120 1118 int status;
1121 1119
1122 1120 /*
1123 1121 * Check for user-mappable QP memory. Note: We do not allow kernel
1124 1122 * clients to post to QP memory that is accessible directly by the
1125 1123 * user. If the QP memory is user accessible, then return an error.
1126 1124 */
1127 1125 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) {
1128 1126 return (IBT_QP_HDL_INVALID);
1129 1127 }
1130 1128
1131 1129 /* Initialize posted_cnt */
1132 1130 posted_cnt = 0;
1133 1131
1134 1132 mutex_enter(&qp->qp_lock);
1135 1133
1136 1134 /*
1137 1135 * Check if QP is associated with an SRQ
1138 1136 */
1139 1137 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) {
1140 1138 mutex_exit(&qp->qp_lock);
1141 1139 return (IBT_SRQ_IN_USE);
1142 1140 }
1143 1141
1144 1142 /*
1145 1143 * Check QP state. Can not post Recv requests from the "Reset" state
1146 1144 */
1147 1145 if (qp->qp_state == HERMON_QP_RESET) {
1148 1146 mutex_exit(&qp->qp_lock);
1149 1147 return (IBT_QP_STATE_INVALID);
1150 1148 }
1151 1149
1152 1150 /* Check that work request transport type is valid */
1153 1151 if ((qp->qp_type != IBT_UD_RQP) &&
1154 1152 (qp->qp_serv_type != HERMON_QP_RC) &&
1155 1153 (qp->qp_serv_type != HERMON_QP_UC)) {
1156 1154 mutex_exit(&qp->qp_lock);
1157 1155 return (IBT_QP_SRV_TYPE_INVALID);
1158 1156 }
1159 1157
1160 1158 /*
1161 1159 * Grab the lock for the WRID list, i.e., membar_consumer().
1162 1160 * This is not needed because the mutex_enter() above has
1163 1161 * the same effect.
1164 1162 */
1165 1163
1166 1164 /* Save away some initial QP state */
1167 1165 wq = qp->qp_rq_wqhdr;
1168 1166 qsize_msk = wq->wq_mask;
1169 1167 tail = wq->wq_tail;
1170 1168 head = wq->wq_head;
1171 1169
1172 1170 wrindx = 0;
1173 1171 status = DDI_SUCCESS;
1174 1172
1175 1173 for (wrindx = 0; wrindx < num_wr; wrindx++) {
1176 1174 if (wq->wq_full != 0) {
1177 1175 status = IBT_QP_FULL;
1178 1176 break;
1179 1177 }
1180 1178 next_tail = (tail + 1) & qsize_msk;
1181 1179 if (next_tail == head) {
1182 1180 wq->wq_full = 1;
1183 1181 }
1184 1182 desc = HERMON_QP_RQ_ENTRY(qp, tail);
1185 1183 status = hermon_wqe_recv_build(state, qp, &wr[wrindx], desc);
1186 1184 if (status != DDI_SUCCESS) {
1187 1185 break;
1188 1186 }
1189 1187
1190 1188 wq->wq_wrid[tail] = wr[wrindx].wr_id;
1191 1189 qp->qp_rq_wqecntr++;
1192 1190
1193 1191 tail = next_tail;
1194 1192 posted_cnt++;
1195 1193 }
1196 1194
1197 1195 if (posted_cnt != 0) {
1198 1196
1199 1197 wq->wq_tail = tail;
1200 1198
1201 1199 membar_producer(); /* ensure wrids are visible */
1202 1200
1203 1201 /* Update the doorbell record w/ wqecntr */
1204 1202 HERMON_UAR_DB_RECORD_WRITE(qp->qp_rq_vdbr,
1205 1203 qp->qp_rq_wqecntr & 0xFFFF);
1206 1204 }
1207 1205
1208 1206 if (num_posted != NULL) {
1209 1207 *num_posted = posted_cnt;
1210 1208 }
1211 1209
1212 1210
1213 1211 mutex_exit(&qp->qp_lock);
1214 1212 return (status);
1215 1213 }
1216 1214
1217 1215 /*
1218 1216 * hermon_post_srq()
1219 1217 * Context: Can be called from interrupt or base context.
1220 1218 */
1221 1219 int
1222 1220 hermon_post_srq(hermon_state_t *state, hermon_srqhdl_t srq,
1223 1221 ibt_recv_wr_t *wr, uint_t num_wr, uint_t *num_posted)
1224 1222 {
1225 1223 uint64_t *desc;
1226 1224 hermon_workq_hdr_t *wq;
1227 1225 uint_t indx, wrindx;
1228 1226 uint_t posted_cnt;
1229 1227 int status;
1230 1228
1231 1229 mutex_enter(&srq->srq_lock);
1232 1230
1233 1231 /*
1234 1232 * Check for user-mappable QP memory. Note: We do not allow kernel
1235 1233 * clients to post to QP memory that is accessible directly by the
1236 1234 * user. If the QP memory is user accessible, then return an error.
1237 1235 */
1238 1236 if (srq->srq_is_umap) {
1239 1237 mutex_exit(&srq->srq_lock);
1240 1238 return (IBT_SRQ_HDL_INVALID);
1241 1239 }
1242 1240
1243 1241 /*
1244 1242 * Check SRQ state. Can not post Recv requests when SRQ is in error
1245 1243 */
1246 1244 if (srq->srq_state == HERMON_SRQ_STATE_ERROR) {
1247 1245 mutex_exit(&srq->srq_lock);
1248 1246 return (IBT_QP_STATE_INVALID);
1249 1247 }
1250 1248
1251 1249 status = DDI_SUCCESS;
1252 1250 posted_cnt = 0;
1253 1251 wq = srq->srq_wq_wqhdr;
1254 1252 indx = wq->wq_head;
1255 1253
1256 1254 for (wrindx = 0; wrindx < num_wr; wrindx++) {
1257 1255
1258 1256 if (indx == wq->wq_tail) {
1259 1257 status = IBT_QP_FULL;
1260 1258 break;
1261 1259 }
1262 1260 desc = HERMON_SRQ_WQE_ADDR(srq, indx);
1263 1261
1264 1262 wq->wq_wrid[indx] = wr[wrindx].wr_id;
1265 1263
1266 1264 status = hermon_wqe_srq_build(state, srq, &wr[wrindx], desc);
1267 1265 if (status != DDI_SUCCESS) {
1268 1266 break;
1269 1267 }
1270 1268
1271 1269 posted_cnt++;
1272 1270 indx = htons(((uint16_t *)desc)[1]);
1273 1271 wq->wq_head = indx;
1274 1272 }
1275 1273
1276 1274 if (posted_cnt != 0) {
1277 1275
1278 1276 srq->srq_wq_wqecntr += posted_cnt;
1279 1277
1280 1278 membar_producer(); /* ensure wrids are visible */
1281 1279
1282 1280 /* Ring the doorbell w/ wqecntr */
1283 1281 HERMON_UAR_DB_RECORD_WRITE(srq->srq_wq_vdbr,
1284 1282 srq->srq_wq_wqecntr & 0xFFFF);
1285 1283 }
1286 1284
1287 1285 if (num_posted != NULL) {
1288 1286 *num_posted = posted_cnt;
1289 1287 }
1290 1288
1291 1289 mutex_exit(&srq->srq_lock);
1292 1290 return (status);
1293 1291 }
1294 1292
1295 1293
1296 1294 /*
1297 1295 * hermon_wqe_send_build()
1298 1296 * Context: Can be called from interrupt or base context.
1299 1297 */
1300 1298 static int
1301 1299 hermon_wqe_send_build(hermon_state_t *state, hermon_qphdl_t qp,
1302 1300 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size)
1303 1301 {
1304 1302 hermon_hw_snd_wqe_ud_t *ud;
1305 1303 hermon_hw_snd_wqe_remaddr_t *rc;
1306 1304 hermon_hw_snd_wqe_atomic_t *at;
1307 1305 hermon_hw_snd_wqe_remaddr_t *uc;
1308 1306 hermon_hw_snd_wqe_bind_t *bn;
1309 1307 hermon_hw_wqe_sgl_t *ds, *old_ds;
1310 1308 ibt_ud_dest_t *dest;
1311 1309 ibt_wr_ds_t *sgl;
1312 1310 hermon_ahhdl_t ah;
1313 1311 uint32_t nds;
1314 1312 int i, j, last_ds, num_ds, status;
1315 1313 int tmpsize;
1316 1314
1317 1315 ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
1318 1316
1319 1317 /* Initialize the information for the Data Segments */
1320 1318 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
1321 1319 sizeof (hermon_hw_snd_wqe_ctrl_t));
1322 1320 nds = wr->wr_nds;
1323 1321 sgl = wr->wr_sgl;
1324 1322 num_ds = 0;
1325 1323 i = 0;
1326 1324
1327 1325 /*
1328 1326 * Build a Send WQE depends first and foremost on the transport
1329 1327 * type of Work Request (i.e. UD, RC, or UC)
1330 1328 */
1331 1329 switch (wr->wr_trans) {
1332 1330 case IBT_UD_SRV:
1333 1331 /* Ensure that work request transport type matches QP type */
1334 1332 if (qp->qp_serv_type != HERMON_QP_UD) {
1335 1333 return (IBT_QP_SRV_TYPE_INVALID);
1336 1334 }
1337 1335
1338 1336 /*
1339 1337 * Validate the operation type. For UD requests, only the
1340 1338 * "Send" and "Send LSO" operations are valid.
1341 1339 */
1342 1340 if (wr->wr_opcode != IBT_WRC_SEND &&
1343 1341 wr->wr_opcode != IBT_WRC_SEND_LSO) {
1344 1342 return (IBT_QP_OP_TYPE_INVALID);
1345 1343 }
1346 1344
1347 1345 /*
1348 1346 * If this is a Special QP (QP0 or QP1), then we need to
1349 1347 * build MLX WQEs instead. So jump to hermon_wqe_mlx_build()
1350 1348 * and return whatever status it returns
1351 1349 */
1352 1350 if (qp->qp_is_special) {
1353 1351 if (wr->wr_opcode == IBT_WRC_SEND_LSO) {
1354 1352 return (IBT_QP_OP_TYPE_INVALID);
1355 1353 }
1356 1354 status = hermon_wqe_mlx_build(state, qp,
1357 1355 wr, desc, size);
1358 1356 return (status);
1359 1357 }
1360 1358
1361 1359 /*
1362 1360 * Otherwise, if this is a normal UD Send request, then fill
1363 1361 * all the fields in the Hermon UD header for the WQE. Note:
1364 1362 * to do this we'll need to extract some information from the
1365 1363 * Address Handle passed with the work request.
1366 1364 */
1367 1365 ud = (hermon_hw_snd_wqe_ud_t *)((uintptr_t)desc +
1368 1366 sizeof (hermon_hw_snd_wqe_ctrl_t));
1369 1367 if (wr->wr_opcode == IBT_WRC_SEND) {
1370 1368 dest = wr->wr.ud.udwr_dest;
1371 1369 } else {
1372 1370 dest = wr->wr.ud_lso.lso_ud_dest;
1373 1371 }
1374 1372 ah = (hermon_ahhdl_t)dest->ud_ah;
1375 1373 if (ah == NULL) {
1376 1374 return (IBT_AH_HDL_INVALID);
1377 1375 }
1378 1376
1379 1377 /*
1380 1378 * Build the Unreliable Datagram Segment for the WQE, using
1381 1379 * the information from the address handle and the work
1382 1380 * request.
1383 1381 */
1384 1382 /* mutex_enter(&ah->ah_lock); */
1385 1383 if (wr->wr_opcode == IBT_WRC_SEND) {
1386 1384 HERMON_WQE_BUILD_UD(qp, ud, ah, wr->wr.ud.udwr_dest);
1387 1385 } else { /* IBT_WRC_SEND_LSO */
1388 1386 HERMON_WQE_BUILD_UD(qp, ud, ah,
1389 1387 wr->wr.ud_lso.lso_ud_dest);
1390 1388 }
1391 1389 /* mutex_exit(&ah->ah_lock); */
1392 1390
1393 1391 /* Update "ds" for filling in Data Segments (below) */
1394 1392 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ud +
1395 1393 sizeof (hermon_hw_snd_wqe_ud_t));
1396 1394
1397 1395 if (wr->wr_opcode == IBT_WRC_SEND_LSO) {
1398 1396 int total_len;
1399 1397
1400 1398 total_len = (4 + 0xf + wr->wr.ud_lso.lso_hdr_sz) & ~0xf;
1401 1399 if ((uintptr_t)ds + total_len + (nds * 16) >
1402 1400 (uintptr_t)desc + (1 << qp->qp_sq_log_wqesz))
1403 1401 return (IBT_QP_SGL_LEN_INVALID);
1404 1402
1405 1403 bcopy(wr->wr.ud_lso.lso_hdr, (uint32_t *)ds + 1,
1406 1404 wr->wr.ud_lso.lso_hdr_sz);
1407 1405 old_ds = ds;
1408 1406 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)ds + total_len);
1409 1407 for (; i < nds; i++) {
1410 1408 if (sgl[i].ds_len == 0)
1411 1409 continue;
1412 1410 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds],
1413 1411 &sgl[i]);
1414 1412 num_ds++;
1415 1413 i++;
1416 1414 break;
1417 1415 }
1418 1416 membar_producer();
1419 1417 HERMON_WQE_BUILD_LSO(qp, old_ds, wr->wr.ud_lso.lso_mss,
1420 1418 wr->wr.ud_lso.lso_hdr_sz);
1421 1419 }
1422 1420
1423 1421 break;
1424 1422
1425 1423 case IBT_RC_SRV:
1426 1424 /* Ensure that work request transport type matches QP type */
1427 1425 if (qp->qp_serv_type != HERMON_QP_RC) {
1428 1426 return (IBT_QP_SRV_TYPE_INVALID);
1429 1427 }
1430 1428
1431 1429 /*
1432 1430 * Validate the operation type. For RC requests, we allow
1433 1431 * "Send", "RDMA Read", "RDMA Write", various "Atomic"
1434 1432 * operations, and memory window "Bind"
1435 1433 */
1436 1434 if ((wr->wr_opcode != IBT_WRC_SEND) &&
1437 1435 (wr->wr_opcode != IBT_WRC_RDMAR) &&
1438 1436 (wr->wr_opcode != IBT_WRC_RDMAW) &&
1439 1437 (wr->wr_opcode != IBT_WRC_CSWAP) &&
1440 1438 (wr->wr_opcode != IBT_WRC_FADD) &&
1441 1439 (wr->wr_opcode != IBT_WRC_BIND)) {
1442 1440 return (IBT_QP_OP_TYPE_INVALID);
1443 1441 }
1444 1442
1445 1443 /*
1446 1444 * If this is a Send request, then all we need to do is break
1447 1445 * out and here and begin the Data Segment processing below
1448 1446 */
1449 1447 if (wr->wr_opcode == IBT_WRC_SEND) {
1450 1448 break;
1451 1449 }
1452 1450
1453 1451 /*
1454 1452 * If this is an RDMA Read or RDMA Write request, then fill
1455 1453 * in the "Remote Address" header fields.
1456 1454 */
1457 1455 if ((wr->wr_opcode == IBT_WRC_RDMAR) ||
1458 1456 (wr->wr_opcode == IBT_WRC_RDMAW)) {
1459 1457 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
1460 1458 sizeof (hermon_hw_snd_wqe_ctrl_t));
1461 1459
1462 1460 /*
1463 1461 * Build the Remote Address Segment for the WQE, using
1464 1462 * the information from the RC work request.
1465 1463 */
1466 1464 HERMON_WQE_BUILD_REMADDR(qp, rc, &wr->wr.rc.rcwr.rdma);
1467 1465
1468 1466 /* Update "ds" for filling in Data Segments (below) */
1469 1467 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)rc +
1470 1468 sizeof (hermon_hw_snd_wqe_remaddr_t));
1471 1469 break;
1472 1470 }
1473 1471
1474 1472 /*
1475 1473 * If this is one of the Atomic type operations (i.e
1476 1474 * Compare-Swap or Fetch-Add), then fill in both the "Remote
1477 1475 * Address" header fields and the "Atomic" header fields.
1478 1476 */
1479 1477 if ((wr->wr_opcode == IBT_WRC_CSWAP) ||
1480 1478 (wr->wr_opcode == IBT_WRC_FADD)) {
1481 1479 rc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
1482 1480 sizeof (hermon_hw_snd_wqe_ctrl_t));
1483 1481 at = (hermon_hw_snd_wqe_atomic_t *)((uintptr_t)rc +
1484 1482 sizeof (hermon_hw_snd_wqe_remaddr_t));
1485 1483
1486 1484 /*
1487 1485 * Build the Remote Address and Atomic Segments for
1488 1486 * the WQE, using the information from the RC Atomic
1489 1487 * work request.
1490 1488 */
1491 1489 HERMON_WQE_BUILD_RC_ATOMIC_REMADDR(qp, rc, wr);
1492 1490 HERMON_WQE_BUILD_ATOMIC(qp, at, wr->wr.rc.rcwr.atomic);
1493 1491
1494 1492 /* Update "ds" for filling in Data Segments (below) */
1495 1493 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)at +
1496 1494 sizeof (hermon_hw_snd_wqe_atomic_t));
1497 1495
1498 1496 /*
1499 1497 * Update "nds" and "sgl" because Atomic requests have
1500 1498 * only a single Data Segment (and they are encoded
1501 1499 * somewhat differently in the work request.
1502 1500 */
1503 1501 nds = 1;
1504 1502 sgl = wr->wr_sgl;
1505 1503 break;
1506 1504 }
1507 1505
1508 1506 /*
1509 1507 * If this is memory window Bind operation, then we call the
1510 1508 * hermon_wr_bind_check() routine to validate the request and
1511 1509 * to generate the updated RKey. If this is successful, then
1512 1510 * we fill in the WQE's "Bind" header fields.
1513 1511 */
1514 1512 if (wr->wr_opcode == IBT_WRC_BIND) {
1515 1513 status = hermon_wr_bind_check(state, wr);
1516 1514 if (status != DDI_SUCCESS) {
1517 1515 return (status);
1518 1516 }
1519 1517
1520 1518 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc +
1521 1519 sizeof (hermon_hw_snd_wqe_ctrl_t));
1522 1520
1523 1521 /*
1524 1522 * Build the Bind Memory Window Segments for the WQE,
1525 1523 * using the information from the RC Bind memory
1526 1524 * window work request.
1527 1525 */
1528 1526 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.rc.rcwr.bind);
1529 1527
1530 1528 /*
1531 1529 * Update the "ds" pointer. Even though the "bind"
1532 1530 * operation requires no SGLs, this is necessary to
1533 1531 * facilitate the correct descriptor size calculations
1534 1532 * (below).
1535 1533 */
1536 1534 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn +
1537 1535 sizeof (hermon_hw_snd_wqe_bind_t));
1538 1536 nds = 0;
1539 1537 }
1540 1538 break;
1541 1539
1542 1540 case IBT_UC_SRV:
1543 1541 /* Ensure that work request transport type matches QP type */
1544 1542 if (qp->qp_serv_type != HERMON_QP_UC) {
1545 1543 return (IBT_QP_SRV_TYPE_INVALID);
1546 1544 }
1547 1545
1548 1546 /*
1549 1547 * Validate the operation type. For UC requests, we only
1550 1548 * allow "Send", "RDMA Write", and memory window "Bind".
1551 1549 * Note: Unlike RC, UC does not allow "RDMA Read" or "Atomic"
1552 1550 * operations
1553 1551 */
1554 1552 if ((wr->wr_opcode != IBT_WRC_SEND) &&
1555 1553 (wr->wr_opcode != IBT_WRC_RDMAW) &&
1556 1554 (wr->wr_opcode != IBT_WRC_BIND)) {
1557 1555 return (IBT_QP_OP_TYPE_INVALID);
1558 1556 }
1559 1557
1560 1558 /*
1561 1559 * If this is a Send request, then all we need to do is break
1562 1560 * out and here and begin the Data Segment processing below
1563 1561 */
1564 1562 if (wr->wr_opcode == IBT_WRC_SEND) {
1565 1563 break;
1566 1564 }
1567 1565
1568 1566 /*
1569 1567 * If this is an RDMA Write request, then fill in the "Remote
1570 1568 * Address" header fields.
1571 1569 */
1572 1570 if (wr->wr_opcode == IBT_WRC_RDMAW) {
1573 1571 uc = (hermon_hw_snd_wqe_remaddr_t *)((uintptr_t)desc +
1574 1572 sizeof (hermon_hw_snd_wqe_ctrl_t));
1575 1573
1576 1574 /*
1577 1575 * Build the Remote Address Segment for the WQE, using
1578 1576 * the information from the UC work request.
1579 1577 */
1580 1578 HERMON_WQE_BUILD_REMADDR(qp, uc, &wr->wr.uc.ucwr.rdma);
1581 1579
1582 1580 /* Update "ds" for filling in Data Segments (below) */
1583 1581 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)uc +
1584 1582 sizeof (hermon_hw_snd_wqe_remaddr_t));
1585 1583 break;
1586 1584 }
1587 1585
1588 1586 /*
1589 1587 * If this is memory window Bind operation, then we call the
1590 1588 * hermon_wr_bind_check() routine to validate the request and
1591 1589 * to generate the updated RKey. If this is successful, then
1592 1590 * we fill in the WQE's "Bind" header fields.
1593 1591 */
1594 1592 if (wr->wr_opcode == IBT_WRC_BIND) {
1595 1593 status = hermon_wr_bind_check(state, wr);
1596 1594 if (status != DDI_SUCCESS) {
1597 1595 return (status);
1598 1596 }
1599 1597
1600 1598 bn = (hermon_hw_snd_wqe_bind_t *)((uintptr_t)desc +
1601 1599 sizeof (hermon_hw_snd_wqe_ctrl_t));
1602 1600
1603 1601 /*
1604 1602 * Build the Bind Memory Window Segments for the WQE,
1605 1603 * using the information from the UC Bind memory
1606 1604 * window work request.
1607 1605 */
1608 1606 HERMON_WQE_BUILD_BIND(qp, bn, wr->wr.uc.ucwr.bind);
1609 1607
1610 1608 /*
1611 1609 * Update the "ds" pointer. Even though the "bind"
1612 1610 * operation requires no SGLs, this is necessary to
1613 1611 * facilitate the correct descriptor size calculations
1614 1612 * (below).
1615 1613 */
1616 1614 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)bn +
1617 1615 sizeof (hermon_hw_snd_wqe_bind_t));
1618 1616 nds = 0;
1619 1617 }
1620 1618 break;
1621 1619
1622 1620 default:
1623 1621 return (IBT_QP_SRV_TYPE_INVALID);
1624 1622 }
1625 1623
1626 1624 /*
1627 1625 * Now fill in the Data Segments (SGL) for the Send WQE based on
1628 1626 * the values setup above (i.e. "sgl", "nds", and the "ds" pointer
1629 1627 * Start by checking for a valid number of SGL entries
1630 1628 */
1631 1629 if (nds > qp->qp_sq_sgl) {
1632 1630 return (IBT_QP_SGL_LEN_INVALID);
1633 1631 }
1634 1632
1635 1633 /*
1636 1634 * For each SGL in the Send Work Request, fill in the Send WQE's data
1637 1635 * segments. Note: We skip any SGL with zero size because Hermon
1638 1636 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1639 1637 * the encoding for zero means a 2GB transfer.
1640 1638 */
1641 1639 for (last_ds = num_ds, j = i; j < nds; j++) {
1642 1640 if (sgl[j].ds_len != 0)
1643 1641 last_ds++; /* real last ds of wqe to fill */
1644 1642 }
1645 1643
1646 1644 /*
1647 1645 * Return the size of descriptor (in 16-byte chunks)
1648 1646 * For Hermon, we want them (for now) to be on stride size
1649 1647 * boundaries, which was implicit in Tavor/Arbel
1650 1648 *
1651 1649 */
1652 1650 tmpsize = ((uintptr_t)&ds[last_ds] - (uintptr_t)desc);
1653 1651
1654 1652 *size = tmpsize >> 0x4;
1655 1653
1656 1654 for (j = nds; --j >= i; ) {
1657 1655 if (sgl[j].ds_len == 0) {
1658 1656 continue;
1659 1657 }
1660 1658
1661 1659 /*
1662 1660 * Fill in the Data Segment(s) for the current WQE, using the
1663 1661 * information contained in the scatter-gather list of the
1664 1662 * work request.
1665 1663 */
1666 1664 last_ds--;
1667 1665 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[last_ds], &sgl[j]);
1668 1666 }
1669 1667
1670 1668 return (DDI_SUCCESS);
1671 1669 }
1672 1670
1673 1671
1674 1672
1675 1673 /*
1676 1674 * hermon_wqe_mlx_build()
1677 1675 * Context: Can be called from interrupt or base context.
1678 1676 */
1679 1677 static int
1680 1678 hermon_wqe_mlx_build(hermon_state_t *state, hermon_qphdl_t qp,
1681 1679 ibt_send_wr_t *wr, uint64_t *desc, uint_t *size)
1682 1680 {
1683 1681 hermon_ahhdl_t ah;
1684 1682 hermon_hw_udav_t *udav;
1685 1683 ib_lrh_hdr_t *lrh;
1686 1684 ib_grh_t *grh;
1687 1685 ib_bth_hdr_t *bth;
1688 1686 ib_deth_hdr_t *deth;
1689 1687 hermon_hw_wqe_sgl_t *ds;
1690 1688 ibt_wr_ds_t *sgl;
1691 1689 uint8_t *mgmtclass, *hpoint, *hcount;
1692 1690 uint32_t nds, offset, pktlen;
1693 1691 uint32_t desc_sz;
1694 1692 int i, num_ds;
1695 1693 int tmpsize;
1696 1694
1697 1695 ASSERT(MUTEX_HELD(&qp->qp_sq_lock));
1698 1696
1699 1697 /* Initialize the information for the Data Segments */
1700 1698 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
1701 1699 sizeof (hermon_hw_mlx_wqe_nextctrl_t));
1702 1700
1703 1701 /*
1704 1702 * Pull the address handle from the work request. The UDAV will
1705 1703 * be used to answer some questions about the request.
1706 1704 */
1707 1705 ah = (hermon_ahhdl_t)wr->wr.ud.udwr_dest->ud_ah;
1708 1706 if (ah == NULL) {
1709 1707 return (IBT_AH_HDL_INVALID);
1710 1708 }
1711 1709 mutex_enter(&ah->ah_lock);
1712 1710 udav = ah->ah_udav;
1713 1711
1714 1712 /*
1715 1713 * If the request is for QP1 and the destination LID is equal to
1716 1714 * the Permissive LID, then return an error. This combination is
1717 1715 * not allowed
1718 1716 */
1719 1717 if ((udav->rlid == IB_LID_PERMISSIVE) &&
1720 1718 (qp->qp_is_special == HERMON_QP_GSI)) {
1721 1719 mutex_exit(&ah->ah_lock);
1722 1720 return (IBT_AH_HDL_INVALID);
1723 1721 }
1724 1722
1725 1723 /*
1726 1724 * Calculate the size of the packet headers, including the GRH
1727 1725 * (if necessary)
1728 1726 */
1729 1727 desc_sz = sizeof (ib_lrh_hdr_t) + sizeof (ib_bth_hdr_t) +
1730 1728 sizeof (ib_deth_hdr_t);
1731 1729 if (udav->grh) {
1732 1730 desc_sz += sizeof (ib_grh_t);
1733 1731 }
1734 1732
1735 1733 /*
1736 1734 * Begin to build the first "inline" data segment for the packet
1737 1735 * headers. Note: By specifying "inline" we can build the contents
1738 1736 * of the MAD packet headers directly into the work queue (as part
1739 1737 * descriptor). This has the advantage of both speeding things up
1740 1738 * and of not requiring the driver to allocate/register any additional
1741 1739 * memory for the packet headers.
1742 1740 */
1743 1741 HERMON_WQE_BUILD_INLINE(qp, &ds[0], desc_sz);
1744 1742 desc_sz += 4;
1745 1743
1746 1744 /*
1747 1745 * Build Local Route Header (LRH)
1748 1746 * We start here by building the LRH into a temporary location.
1749 1747 * When we have finished we copy the LRH data into the descriptor.
1750 1748 *
1751 1749 * Notice that the VL values are hardcoded. This is not a problem
1752 1750 * because VL15 is decided later based on the value in the MLX
1753 1751 * transport "next/ctrl" header (see the "vl15" bit below), and it
1754 1752 * is otherwise (meaning for QP1) chosen from the SL-to-VL table
1755 1753 * values. This rule does not hold for loopback packets however
1756 1754 * (all of which bypass the SL-to-VL tables) and it is the reason
1757 1755 * that non-QP0 MADs are setup with VL hardcoded to zero below.
1758 1756 *
1759 1757 * Notice also that Source LID is hardcoded to the Permissive LID
1760 1758 * (0xFFFF). This is also not a problem because if the Destination
1761 1759 * LID is not the Permissive LID, then the "slr" value in the MLX
1762 1760 * transport "next/ctrl" header will be set to zero and the hardware
1763 1761 * will pull the LID from value in the port.
1764 1762 */
1765 1763 lrh = (ib_lrh_hdr_t *)((uintptr_t)&ds[0] + 4);
1766 1764 pktlen = (desc_sz + 0x100) >> 2;
1767 1765 HERMON_WQE_BUILD_MLX_LRH(lrh, qp, udav, pktlen);
1768 1766
1769 1767 /*
1770 1768 * Build Global Route Header (GRH)
1771 1769 * This is only built if necessary as defined by the "grh" bit in
1772 1770 * the address vector. Note: We also calculate the offset to the
1773 1771 * next header (BTH) based on whether or not the "grh" bit is set.
1774 1772 */
1775 1773 if (udav->grh) {
1776 1774 /*
1777 1775 * If the request is for QP0, then return an error. The
1778 1776 * combination of global routine (GRH) and QP0 is not allowed.
1779 1777 */
1780 1778 if (qp->qp_is_special == HERMON_QP_SMI) {
1781 1779 mutex_exit(&ah->ah_lock);
1782 1780 return (IBT_AH_HDL_INVALID);
1783 1781 }
1784 1782 grh = (ib_grh_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t));
1785 1783 HERMON_WQE_BUILD_MLX_GRH(state, grh, qp, udav, pktlen);
1786 1784
1787 1785 bth = (ib_bth_hdr_t *)((uintptr_t)grh + sizeof (ib_grh_t));
1788 1786 } else {
1789 1787 bth = (ib_bth_hdr_t *)((uintptr_t)lrh + sizeof (ib_lrh_hdr_t));
1790 1788 }
1791 1789 mutex_exit(&ah->ah_lock);
1792 1790
1793 1791
1794 1792 /*
1795 1793 * Build Base Transport Header (BTH)
1796 1794 * Notice that the M, PadCnt, and TVer fields are all set
1797 1795 * to zero implicitly. This is true for all Management Datagrams
1798 1796 * MADs whether GSI are SMI.
1799 1797 */
1800 1798 HERMON_WQE_BUILD_MLX_BTH(state, bth, qp, wr);
1801 1799
1802 1800 /*
1803 1801 * Build Datagram Extended Transport Header (DETH)
1804 1802 */
1805 1803 deth = (ib_deth_hdr_t *)((uintptr_t)bth + sizeof (ib_bth_hdr_t));
1806 1804 HERMON_WQE_BUILD_MLX_DETH(deth, qp);
1807 1805
1808 1806 /* Ensure that the Data Segment is aligned on a 16-byte boundary */
1809 1807 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)deth + sizeof (ib_deth_hdr_t));
1810 1808 ds = (hermon_hw_wqe_sgl_t *)(((uintptr_t)ds + 0xF) & ~0xF);
1811 1809 nds = wr->wr_nds;
1812 1810 sgl = wr->wr_sgl;
1813 1811 num_ds = 0;
1814 1812
1815 1813 /*
1816 1814 * Now fill in the Data Segments (SGL) for the MLX WQE based on the
1817 1815 * values set up above (i.e. "sgl", "nds", and the "ds" pointer
1818 1816 * Start by checking for a valid number of SGL entries
1819 1817 */
1820 1818 if (nds > qp->qp_sq_sgl) {
1821 1819 return (IBT_QP_SGL_LEN_INVALID);
1822 1820 }
1823 1821
1824 1822 /*
1825 1823 * For each SGL in the Send Work Request, fill in the MLX WQE's data
1826 1824 * segments. Note: We skip any SGL with zero size because Hermon
1827 1825 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1828 1826 * the encoding for zero means a 2GB transfer. Because of this special
1829 1827 * encoding in the hardware, we mask the requested length with
1830 1828 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
1831 1829 * zero.)
1832 1830 */
1833 1831 mgmtclass = hpoint = hcount = NULL;
1834 1832 offset = 0;
1835 1833 for (i = 0; i < nds; i++) {
1836 1834 if (sgl[i].ds_len == 0) {
1837 1835 continue;
1838 1836 }
1839 1837
1840 1838 /*
1841 1839 * Fill in the Data Segment(s) for the MLX send WQE, using
1842 1840 * the information contained in the scatter-gather list of
1843 1841 * the work request.
1844 1842 */
1845 1843 HERMON_WQE_BUILD_DATA_SEG_SEND(&ds[num_ds], &sgl[i]);
1846 1844
1847 1845 /*
1848 1846 * Search through the contents of all MADs posted to QP0 to
1849 1847 * initialize pointers to the places where Directed Route "hop
1850 1848 * pointer", "hop count", and "mgmtclass" would be. Hermon
1851 1849 * needs these updated (i.e. incremented or decremented, as
1852 1850 * necessary) by software.
1853 1851 */
1854 1852 if (qp->qp_is_special == HERMON_QP_SMI) {
1855 1853
1856 1854 HERMON_SPECIAL_QP_DRMAD_GET_MGMTCLASS(mgmtclass,
1857 1855 offset, sgl[i].ds_va, sgl[i].ds_len);
1858 1856
1859 1857 HERMON_SPECIAL_QP_DRMAD_GET_HOPPOINTER(hpoint,
1860 1858 offset, sgl[i].ds_va, sgl[i].ds_len);
1861 1859
1862 1860 HERMON_SPECIAL_QP_DRMAD_GET_HOPCOUNT(hcount,
1863 1861 offset, sgl[i].ds_va, sgl[i].ds_len);
1864 1862
1865 1863 offset += sgl[i].ds_len;
1866 1864 }
1867 1865 num_ds++;
1868 1866 }
1869 1867
1870 1868 /*
1871 1869 * Hermon's Directed Route MADs need to have the "hop pointer"
1872 1870 * incremented/decremented (as necessary) depending on whether it is
1873 1871 * currently less than or greater than the "hop count" (i.e. whether
1874 1872 * the MAD is a request or a response.)
1875 1873 */
1876 1874 if (qp->qp_is_special == HERMON_QP_SMI) {
1877 1875 HERMON_SPECIAL_QP_DRMAD_DO_HOPPOINTER_MODIFY(*mgmtclass,
1878 1876 *hpoint, *hcount);
1879 1877 }
1880 1878
1881 1879 /*
1882 1880 * Now fill in the ICRC Data Segment. This data segment is inlined
1883 1881 * just like the packets headers above, but it is only four bytes and
1884 1882 * set to zero (to indicate that we wish the hardware to generate ICRC.
1885 1883 */
1886 1884 HERMON_WQE_BUILD_INLINE_ICRC(qp, &ds[num_ds], 4, 0);
1887 1885 num_ds++;
1888 1886
1889 1887 /*
1890 1888 * Return the size of descriptor (in 16-byte chunks)
1891 1889 * For Hermon, we want them (for now) to be on stride size
1892 1890 * boundaries, which was implicit in Tavor/Arbel
1893 1891 */
1894 1892 tmpsize = ((uintptr_t)&ds[num_ds] - (uintptr_t)desc);
1895 1893
1896 1894 *size = tmpsize >> 0x04;
1897 1895
1898 1896 return (DDI_SUCCESS);
1899 1897 }
1900 1898
1901 1899
1902 1900
1903 1901 /*
1904 1902 * hermon_wqe_recv_build()
1905 1903 * Context: Can be called from interrupt or base context.
1906 1904 */
1907 1905 /* ARGSUSED */
1908 1906 static int
1909 1907 hermon_wqe_recv_build(hermon_state_t *state, hermon_qphdl_t qp,
1910 1908 ibt_recv_wr_t *wr, uint64_t *desc)
1911 1909 {
1912 1910 hermon_hw_wqe_sgl_t *ds;
1913 1911 int i, num_ds;
1914 1912
1915 1913 ASSERT(MUTEX_HELD(&qp->qp_lock));
1916 1914
1917 1915 /*
1918 1916 * Fill in the Data Segments (SGL) for the Recv WQE - don't
1919 1917 * need to have a reserved for the ctrl, there is none on the
1920 1918 * recv queue for hermon, but will need to put an invalid
1921 1919 * (null) scatter pointer per PRM
1922 1920 */
1923 1921 ds = (hermon_hw_wqe_sgl_t *)(uintptr_t)desc;
1924 1922 num_ds = 0;
1925 1923
1926 1924 /* Check for valid number of SGL entries */
1927 1925 if (wr->wr_nds > qp->qp_rq_sgl) {
1928 1926 return (IBT_QP_SGL_LEN_INVALID);
1929 1927 }
1930 1928
1931 1929 /*
1932 1930 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
1933 1931 * segments. Note: We skip any SGL with zero size because Hermon
1934 1932 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1935 1933 * the encoding for zero means a 2GB transfer. Because of this special
1936 1934 * encoding in the hardware, we mask the requested length with
1937 1935 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
1938 1936 * zero.)
1939 1937 */
1940 1938 for (i = 0; i < wr->wr_nds; i++) {
1941 1939 if (wr->wr_sgl[i].ds_len == 0) {
1942 1940 continue;
1943 1941 }
1944 1942
1945 1943 /*
1946 1944 * Fill in the Data Segment(s) for the receive WQE, using the
1947 1945 * information contained in the scatter-gather list of the
1948 1946 * work request.
1949 1947 */
1950 1948 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]);
1951 1949 num_ds++;
1952 1950 }
1953 1951
1954 1952 /* put the null sgl pointer as well if needed */
1955 1953 if (num_ds < qp->qp_rq_sgl) {
1956 1954 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl);
1957 1955 }
1958 1956
1959 1957 return (DDI_SUCCESS);
1960 1958 }
1961 1959
1962 1960
1963 1961
1964 1962 /*
1965 1963 * hermon_wqe_srq_build()
1966 1964 * Context: Can be called from interrupt or base context.
1967 1965 */
1968 1966 /* ARGSUSED */
1969 1967 static int
1970 1968 hermon_wqe_srq_build(hermon_state_t *state, hermon_srqhdl_t srq,
1971 1969 ibt_recv_wr_t *wr, uint64_t *desc)
1972 1970 {
1973 1971 hermon_hw_wqe_sgl_t *ds;
1974 1972 int i, num_ds;
1975 1973
1976 1974 ASSERT(MUTEX_HELD(&srq->srq_lock));
1977 1975
1978 1976 /* Fill in the Data Segments (SGL) for the Recv WQE */
1979 1977 ds = (hermon_hw_wqe_sgl_t *)((uintptr_t)desc +
1980 1978 sizeof (hermon_hw_srq_wqe_next_t));
1981 1979 num_ds = 0;
1982 1980
1983 1981 /* Check for valid number of SGL entries */
1984 1982 if (wr->wr_nds > srq->srq_wq_sgl) {
1985 1983 return (IBT_QP_SGL_LEN_INVALID);
1986 1984 }
1987 1985
1988 1986 /*
1989 1987 * For each SGL in the Recv Work Request, fill in the Recv WQE's data
1990 1988 * segments. Note: We skip any SGL with zero size because Hermon
1991 1989 * hardware cannot handle a zero for "byte_cnt" in the WQE. Actually
1992 1990 * the encoding for zero means a 2GB transfer. Because of this special
1993 1991 * encoding in the hardware, we mask the requested length with
1994 1992 * HERMON_WQE_SGL_BYTE_CNT_MASK (so that 2GB will end up encoded as
1995 1993 * zero.)
1996 1994 */
1997 1995 for (i = 0; i < wr->wr_nds; i++) {
1998 1996 if (wr->wr_sgl[i].ds_len == 0) {
1999 1997 continue;
2000 1998 }
2001 1999
2002 2000 /*
2003 2001 * Fill in the Data Segment(s) for the receive WQE, using the
2004 2002 * information contained in the scatter-gather list of the
2005 2003 * work request.
2006 2004 */
2007 2005 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &wr->wr_sgl[i]);
2008 2006 num_ds++;
2009 2007 }
2010 2008
2011 2009 /*
2012 2010 * put in the null sgl pointer as well, if needed
2013 2011 */
2014 2012 if (num_ds < srq->srq_wq_sgl) {
2015 2013 HERMON_WQE_BUILD_DATA_SEG_RECV(&ds[num_ds], &null_sgl);
2016 2014 }
2017 2015
2018 2016 return (DDI_SUCCESS);
2019 2017 }
2020 2018
2021 2019
2022 2020 /*
2023 2021 * hermon_wr_get_immediate()
2024 2022 * Context: Can be called from interrupt or base context.
2025 2023 */
2026 2024 static uint32_t
2027 2025 hermon_wr_get_immediate(ibt_send_wr_t *wr)
2028 2026 {
2029 2027 /*
2030 2028 * This routine extracts the "immediate data" from the appropriate
2031 2029 * location in the IBTF work request. Because of the way the
2032 2030 * work request structure is defined, the location for this data
2033 2031 * depends on the actual work request operation type.
2034 2032 */
2035 2033
2036 2034 /* For RDMA Write, test if RC or UC */
2037 2035 if (wr->wr_opcode == IBT_WRC_RDMAW) {
2038 2036 if (wr->wr_trans == IBT_RC_SRV) {
2039 2037 return (wr->wr.rc.rcwr.rdma.rdma_immed);
2040 2038 } else { /* IBT_UC_SRV */
2041 2039 return (wr->wr.uc.ucwr.rdma.rdma_immed);
2042 2040 }
2043 2041 }
2044 2042
2045 2043 /* For Send, test if RC, UD, or UC */
2046 2044 if (wr->wr_opcode == IBT_WRC_SEND) {
2047 2045 if (wr->wr_trans == IBT_RC_SRV) {
2048 2046 return (wr->wr.rc.rcwr.send_immed);
2049 2047 } else if (wr->wr_trans == IBT_UD_SRV) {
2050 2048 return (wr->wr.ud.udwr_immed);
2051 2049 } else { /* IBT_UC_SRV */
2052 2050 return (wr->wr.uc.ucwr.send_immed);
2053 2051 }
2054 2052 }
2055 2053
2056 2054 /*
2057 2055 * If any other type of request, then immediate is undefined
2058 2056 */
2059 2057 return (0);
2060 2058 }
2061 2059
2062 2060 /*
2063 2061 * hermon_wqe_headroom()
2064 2062 * Context: can be called from interrupt or base, currently only from
2065 2063 * base context.
2066 2064 * Routine that fills in the headroom for the Send Queue
2067 2065 */
2068 2066
2069 2067 static void
2070 2068 hermon_wqe_headroom(uint_t from, hermon_qphdl_t qp)
2071 2069 {
2072 2070 uint32_t *wqe_start, *wqe_top, *wqe_base, qsize;
2073 2071 int hdrmwqes, wqesizebytes, sectperwqe;
2074 2072 uint32_t invalue;
2075 2073 int i, j;
2076 2074
2077 2075 qsize = qp->qp_sq_bufsz;
2078 2076 wqesizebytes = 1 << qp->qp_sq_log_wqesz;
2079 2077 sectperwqe = wqesizebytes >> 6; /* 64 bytes/section */
2080 2078 hdrmwqes = qp->qp_sq_hdrmwqes;
2081 2079 wqe_base = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, 0);
2082 2080 wqe_top = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, qsize);
2083 2081 wqe_start = (uint32_t *)HERMON_QP_SQ_ENTRY(qp, from);
2084 2082
2085 2083 for (i = 0; i < hdrmwqes; i++) {
2086 2084 for (j = 0; j < sectperwqe; j++) {
2087 2085 if (j == 0) { /* 1st section of wqe */
2088 2086 /* perserve ownership bit */
2089 2087 invalue = ddi_get32(qp->qp_wqinfo.qa_acchdl,
2090 2088 wqe_start) | 0x7FFFFFFF;
2091 2089 } else {
2092 2090 /* or just invalidate it */
2093 2091 invalue = 0xFFFFFFFF;
2094 2092 }
2095 2093 ddi_put32(qp->qp_wqinfo.qa_acchdl, wqe_start, invalue);
2096 2094 wqe_start += 16; /* move 64 bytes */
2097 2095 }
2098 2096 if (wqe_start == wqe_top) /* hit the end of the queue */
2099 2097 wqe_start = wqe_base; /* wrap to start */
2100 2098 }
2101 2099 }
2102 2100
2103 2101 /*
2104 2102 * hermon_wr_bind_check()
2105 2103 * Context: Can be called from interrupt or base context.
2106 2104 */
2107 2105 /* ARGSUSED */
2108 2106 static int
2109 2107 hermon_wr_bind_check(hermon_state_t *state, ibt_send_wr_t *wr)
2110 2108 {
2111 2109 ibt_bind_flags_t bind_flags;
2112 2110 uint64_t vaddr, len;
2113 2111 uint64_t reg_start_addr, reg_end_addr;
2114 2112 hermon_mwhdl_t mw;
2115 2113 hermon_mrhdl_t mr;
2116 2114 hermon_rsrc_t *mpt;
2117 2115 uint32_t new_rkey;
2118 2116
2119 2117 /* Check for a valid Memory Window handle in the WR */
2120 2118 mw = (hermon_mwhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mw_hdl;
2121 2119 if (mw == NULL) {
2122 2120 return (IBT_MW_HDL_INVALID);
2123 2121 }
2124 2122
2125 2123 /* Check for a valid Memory Region handle in the WR */
2126 2124 mr = (hermon_mrhdl_t)wr->wr.rc.rcwr.bind->bind_ibt_mr_hdl;
2127 2125 if (mr == NULL) {
2128 2126 return (IBT_MR_HDL_INVALID);
2129 2127 }
2130 2128
2131 2129 mutex_enter(&mr->mr_lock);
2132 2130 mutex_enter(&mw->mr_lock);
2133 2131
2134 2132 /*
2135 2133 * Check here to see if the memory region has already been partially
2136 2134 * deregistered as a result of a hermon_umap_umemlock_cb() callback.
2137 2135 * If so, this is an error, return failure.
2138 2136 */
2139 2137 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
2140 2138 mutex_exit(&mr->mr_lock);
2141 2139 mutex_exit(&mw->mr_lock);
2142 2140 return (IBT_MR_HDL_INVALID);
2143 2141 }
2144 2142
2145 2143 /* Check for a valid Memory Window RKey (i.e. a matching RKey) */
2146 2144 if (mw->mr_rkey != wr->wr.rc.rcwr.bind->bind_rkey) {
2147 2145 mutex_exit(&mr->mr_lock);
2148 2146 mutex_exit(&mw->mr_lock);
2149 2147 return (IBT_MR_RKEY_INVALID);
2150 2148 }
2151 2149
2152 2150 /* Check for a valid Memory Region LKey (i.e. a matching LKey) */
2153 2151 if (mr->mr_lkey != wr->wr.rc.rcwr.bind->bind_lkey) {
2154 2152 mutex_exit(&mr->mr_lock);
2155 2153 mutex_exit(&mw->mr_lock);
2156 2154 return (IBT_MR_LKEY_INVALID);
2157 2155 }
2158 2156
2159 2157 /*
2160 2158 * Now check for valid "vaddr" and "len". Note: We don't check the
2161 2159 * "vaddr" range when "len == 0" (i.e. on unbind operations)
2162 2160 */
2163 2161 len = wr->wr.rc.rcwr.bind->bind_len;
2164 2162 if (len != 0) {
2165 2163 vaddr = wr->wr.rc.rcwr.bind->bind_va;
2166 2164 reg_start_addr = mr->mr_bindinfo.bi_addr;
2167 2165 reg_end_addr = mr->mr_bindinfo.bi_addr +
2168 2166 (mr->mr_bindinfo.bi_len - 1);
2169 2167 if ((vaddr < reg_start_addr) || (vaddr > reg_end_addr)) {
2170 2168 mutex_exit(&mr->mr_lock);
2171 2169 mutex_exit(&mw->mr_lock);
2172 2170 return (IBT_MR_VA_INVALID);
2173 2171 }
2174 2172 vaddr = (vaddr + len) - 1;
2175 2173 if (vaddr > reg_end_addr) {
2176 2174 mutex_exit(&mr->mr_lock);
2177 2175 mutex_exit(&mw->mr_lock);
2178 2176 return (IBT_MR_LEN_INVALID);
2179 2177 }
2180 2178 }
2181 2179
2182 2180 /*
2183 2181 * Validate the bind access flags. Remote Write and Atomic access for
2184 2182 * the Memory Window require that Local Write access be set in the
2185 2183 * corresponding Memory Region.
2186 2184 */
2187 2185 bind_flags = wr->wr.rc.rcwr.bind->bind_flags;
2188 2186 if (((bind_flags & IBT_WR_BIND_WRITE) ||
2189 2187 (bind_flags & IBT_WR_BIND_ATOMIC)) &&
2190 2188 !(mr->mr_accflag & IBT_MR_LOCAL_WRITE)) {
2191 2189 mutex_exit(&mr->mr_lock);
2192 2190 mutex_exit(&mw->mr_lock);
2193 2191 return (IBT_MR_ACCESS_REQ_INVALID);
2194 2192 }
2195 2193
2196 2194 /* Calculate the new RKey for the Memory Window */
2197 2195 mpt = mw->mr_mptrsrcp;
2198 2196 new_rkey = hermon_mr_keycalc(mpt->hr_indx);
2199 2197 new_rkey = hermon_mr_key_swap(new_rkey);
2200 2198
2201 2199 wr->wr.rc.rcwr.bind->bind_rkey_out = new_rkey;
2202 2200 mw->mr_rkey = new_rkey;
2203 2201
2204 2202 mutex_exit(&mr->mr_lock);
2205 2203 mutex_exit(&mw->mr_lock);
2206 2204 return (DDI_SUCCESS);
2207 2205 }
2208 2206
2209 2207
2210 2208 /*
2211 2209 * hermon_wrid_from_reset_handling()
2212 2210 * Context: Can be called from interrupt or base context.
↓ open down ↓ |
1743 lines elided |
↑ open up ↑ |
2213 2211 */
2214 2212 /* ARGSUSED */
2215 2213 int
2216 2214 hermon_wrid_from_reset_handling(hermon_state_t *state, hermon_qphdl_t qp)
2217 2215 {
2218 2216 hermon_workq_hdr_t *swq, *rwq;
2219 2217
2220 2218 if (qp->qp_alloc_flags & IBT_QP_USER_MAP)
2221 2219 return (DDI_SUCCESS);
2222 2220
2223 -#ifdef __lock_lint
2224 - mutex_enter(&qp->qp_rq_cqhdl->cq_lock);
2225 - mutex_enter(&qp->qp_sq_cqhdl->cq_lock);
2226 -#else
2227 2221 /* grab the cq lock(s) to modify the wqavl tree */
2228 2222 if (qp->qp_rq_cqhdl)
2229 2223 mutex_enter(&qp->qp_rq_cqhdl->cq_lock);
2230 2224 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl &&
2231 2225 qp->qp_sq_cqhdl != NULL)
2232 2226 mutex_enter(&qp->qp_sq_cqhdl->cq_lock);
2233 -#endif
2234 2227
2235 2228 /* Chain the newly allocated work queue header to the CQ's list */
2236 2229 if (qp->qp_sq_cqhdl)
2237 2230 hermon_cq_workq_add(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl);
2238 2231
2239 2232 swq = qp->qp_sq_wqhdr;
2240 2233 swq->wq_head = 0;
2241 2234 swq->wq_tail = 0;
2242 2235 swq->wq_full = 0;
2243 2236
2244 2237 /*
2245 2238 * Now we repeat all the above operations for the receive work queue,
2246 2239 * or shared receive work queue.
2247 2240 *
2248 2241 * Note: We still use the 'qp_rq_cqhdl' even in the SRQ case.
2249 2242 */
2250 2243
2251 -#ifdef __lock_lint
2252 - mutex_enter(&qp->qp_srqhdl->srq_lock);
2253 -#else
2254 2244 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) {
2255 2245 mutex_enter(&qp->qp_srqhdl->srq_lock);
2256 2246 } else {
2257 2247 rwq = qp->qp_rq_wqhdr;
2258 2248 rwq->wq_head = 0;
2259 2249 rwq->wq_tail = 0;
2260 2250 rwq->wq_full = 0;
2261 2251 qp->qp_rq_wqecntr = 0;
2262 2252 }
2263 -#endif
2264 2253 hermon_cq_workq_add(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl);
2265 2254
2266 -#ifdef __lock_lint
2267 - mutex_exit(&qp->qp_srqhdl->srq_lock);
2268 -#else
2269 2255 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) {
2270 2256 mutex_exit(&qp->qp_srqhdl->srq_lock);
2271 2257 }
2272 -#endif
2273 2258
2274 -#ifdef __lock_lint
2275 - mutex_exit(&qp->qp_sq_cqhdl->cq_lock);
2276 - mutex_exit(&qp->qp_rq_cqhdl->cq_lock);
2277 -#else
2278 2259 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl &&
2279 2260 qp->qp_sq_cqhdl != NULL)
2280 2261 mutex_exit(&qp->qp_sq_cqhdl->cq_lock);
2281 2262 if (qp->qp_rq_cqhdl)
2282 2263 mutex_exit(&qp->qp_rq_cqhdl->cq_lock);
2283 -#endif
2284 2264 return (DDI_SUCCESS);
2285 2265 }
2286 2266
2287 2267
2288 2268 /*
2289 2269 * hermon_wrid_to_reset_handling()
2290 2270 * Context: Can be called from interrupt or base context.
2291 2271 */
2292 2272 int
2293 2273 hermon_wrid_to_reset_handling(hermon_state_t *state, hermon_qphdl_t qp)
2294 2274 {
2295 2275 if (qp->qp_alloc_flags & IBT_QP_USER_MAP)
2296 2276 return (DDI_SUCCESS);
2297 2277
2298 2278 /*
2299 2279 * If there are unpolled entries in these CQs, they are
2300 2280 * polled/flushed.
2301 2281 * Grab the CQ lock(s) before manipulating the lists.
2302 2282 */
2303 -#ifdef __lock_lint
2304 - mutex_enter(&qp->qp_rq_cqhdl->cq_lock);
2305 - mutex_enter(&qp->qp_sq_cqhdl->cq_lock);
2306 -#else
2307 2283 /* grab the cq lock(s) to modify the wqavl tree */
2308 2284 if (qp->qp_rq_cqhdl)
2309 2285 mutex_enter(&qp->qp_rq_cqhdl->cq_lock);
2310 2286 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl &&
2311 2287 qp->qp_sq_cqhdl != NULL)
2312 2288 mutex_enter(&qp->qp_sq_cqhdl->cq_lock);
2313 -#endif
2314 2289
2315 -#ifdef __lock_lint
2316 - mutex_enter(&qp->qp_srqhdl->srq_lock);
2317 -#else
2318 2290 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) {
2319 2291 mutex_enter(&qp->qp_srqhdl->srq_lock);
2320 2292 }
2321 -#endif
2322 2293 /*
2323 2294 * Flush the entries on the CQ for this QP's QPN.
2324 2295 */
2325 2296 hermon_cq_entries_flush(state, qp);
2326 2297
2327 -#ifdef __lock_lint
2328 - mutex_exit(&qp->qp_srqhdl->srq_lock);
2329 -#else
2330 2298 if (qp->qp_alloc_flags & IBT_QP_USES_SRQ) {
2331 2299 mutex_exit(&qp->qp_srqhdl->srq_lock);
2332 2300 }
2333 -#endif
2334 2301
2335 2302 hermon_cq_workq_remove(qp->qp_rq_cqhdl, &qp->qp_rq_wqavl);
2336 2303 if (qp->qp_sq_cqhdl != NULL)
2337 2304 hermon_cq_workq_remove(qp->qp_sq_cqhdl, &qp->qp_sq_wqavl);
2338 2305
2339 -#ifdef __lock_lint
2340 - mutex_exit(&qp->qp_sq_cqhdl->cq_lock);
2341 - mutex_exit(&qp->qp_rq_cqhdl->cq_lock);
2342 -#else
2343 2306 if (qp->qp_rq_cqhdl != qp->qp_sq_cqhdl &&
2344 2307 qp->qp_sq_cqhdl != NULL)
2345 2308 mutex_exit(&qp->qp_sq_cqhdl->cq_lock);
2346 2309 if (qp->qp_rq_cqhdl)
2347 2310 mutex_exit(&qp->qp_rq_cqhdl->cq_lock);
2348 -#endif
2349 2311
2350 2312 return (IBT_SUCCESS);
2351 2313 }
2352 2314
2353 2315
2354 2316 /*
2355 2317 * hermon_wrid_get_entry()
2356 2318 * Context: Can be called from interrupt or base context.
2357 2319 */
2358 2320 uint64_t
2359 2321 hermon_wrid_get_entry(hermon_cqhdl_t cq, hermon_hw_cqe_t *cqe)
2360 2322 {
2361 2323 hermon_workq_avl_t *wqa;
2362 2324 hermon_workq_hdr_t *wq;
2363 2325 uint64_t wrid;
2364 2326 uint_t send_or_recv, qpnum;
2365 2327 uint32_t indx;
2366 2328
2367 2329 /*
2368 2330 * Determine whether this CQE is a send or receive completion.
2369 2331 */
2370 2332 send_or_recv = HERMON_CQE_SENDRECV_GET(cq, cqe);
2371 2333
2372 2334 /* Find the work queue for this QP number (send or receive side) */
2373 2335 qpnum = HERMON_CQE_QPNUM_GET(cq, cqe);
2374 2336 wqa = hermon_wrid_wqavl_find(cq, qpnum, send_or_recv);
2375 2337 wq = wqa->wqa_wq;
2376 2338
2377 2339 /*
2378 2340 * Regardless of whether the completion is the result of a "success"
2379 2341 * or a "failure", we lock the list of "containers" and attempt to
2380 2342 * search for the the first matching completion (i.e. the first WR
2381 2343 * with a matching WQE addr and size). Once we find it, we pull out
2382 2344 * the "wrid" field and return it (see below). XXX Note: One possible
2383 2345 * future enhancement would be to enable this routine to skip over
2384 2346 * any "unsignaled" completions to go directly to the next "signaled"
2385 2347 * entry on success.
2386 2348 */
2387 2349 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) & wq->wq_mask;
2388 2350 wrid = wq->wq_wrid[indx];
2389 2351 if (wqa->wqa_srq_en) {
2390 2352 struct hermon_sw_srq_s *srq;
2391 2353 uint64_t *desc;
2392 2354
2393 2355 /* put wqe back on the srq free list */
2394 2356 srq = wqa->wqa_srq;
2395 2357 mutex_enter(&srq->srq_lock);
2396 2358 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail);
2397 2359 ((uint16_t *)desc)[1] = htons(indx);
2398 2360 wq->wq_tail = indx;
2399 2361 mutex_exit(&srq->srq_lock);
2400 2362 } else {
2401 2363 wq->wq_head = (indx + 1) & wq->wq_mask;
2402 2364 wq->wq_full = 0;
2403 2365 }
2404 2366
2405 2367 return (wrid);
2406 2368 }
2407 2369
2408 2370
2409 2371 int
2410 2372 hermon_wrid_workq_compare(const void *p1, const void *p2)
2411 2373 {
2412 2374 hermon_workq_compare_t *cmpp;
2413 2375 hermon_workq_avl_t *curr;
2414 2376
2415 2377 cmpp = (hermon_workq_compare_t *)p1;
2416 2378 curr = (hermon_workq_avl_t *)p2;
2417 2379
2418 2380 if (cmpp->cmp_qpn < curr->wqa_qpn)
2419 2381 return (-1);
2420 2382 else if (cmpp->cmp_qpn > curr->wqa_qpn)
2421 2383 return (+1);
2422 2384 else if (cmpp->cmp_type < curr->wqa_type)
2423 2385 return (-1);
2424 2386 else if (cmpp->cmp_type > curr->wqa_type)
2425 2387 return (+1);
2426 2388 else
2427 2389 return (0);
2428 2390 }
2429 2391
2430 2392
2431 2393 /*
2432 2394 * hermon_wrid_workq_find()
2433 2395 * Context: Can be called from interrupt or base context.
2434 2396 */
2435 2397 static hermon_workq_avl_t *
2436 2398 hermon_wrid_wqavl_find(hermon_cqhdl_t cq, uint_t qpn, uint_t wq_type)
2437 2399 {
2438 2400 hermon_workq_avl_t *curr;
↓ open down ↓ |
80 lines elided |
↑ open up ↑ |
2439 2401 hermon_workq_compare_t cmp;
2440 2402
2441 2403 /*
2442 2404 * Walk the CQ's work queue list, trying to find a send or recv queue
2443 2405 * with the same QP number. We do this even if we are going to later
2444 2406 * create a new entry because it helps us easily find the end of the
2445 2407 * list.
2446 2408 */
2447 2409 cmp.cmp_qpn = qpn;
2448 2410 cmp.cmp_type = wq_type;
2449 -#ifdef __lock_lint
2450 - hermon_wrid_workq_compare(NULL, NULL);
2451 -#endif
2452 2411 curr = avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, NULL);
2453 2412
2454 2413 return (curr);
2455 2414 }
2456 2415
2457 2416
2458 2417 /*
2459 2418 * hermon_wrid_wqhdr_create()
2460 2419 * Context: Can be called from base context.
2461 2420 */
2462 2421 /* ARGSUSED */
2463 2422 hermon_workq_hdr_t *
2464 2423 hermon_wrid_wqhdr_create(int bufsz)
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
2465 2424 {
2466 2425 hermon_workq_hdr_t *wqhdr;
2467 2426
2468 2427 /*
2469 2428 * Allocate space for the wqhdr, and an array to record all the wrids.
2470 2429 */
2471 2430 wqhdr = (hermon_workq_hdr_t *)kmem_zalloc(sizeof (*wqhdr), KM_NOSLEEP);
2472 2431 if (wqhdr == NULL) {
2473 2432 return (NULL);
2474 2433 }
2475 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*wqhdr))
2476 2434 wqhdr->wq_wrid = kmem_zalloc(bufsz * sizeof (uint64_t), KM_NOSLEEP);
2477 2435 if (wqhdr->wq_wrid == NULL) {
2478 2436 kmem_free(wqhdr, sizeof (*wqhdr));
2479 2437 return (NULL);
2480 2438 }
2481 2439 wqhdr->wq_size = bufsz;
2482 2440 wqhdr->wq_mask = bufsz - 1;
2483 2441
2484 2442 return (wqhdr);
2485 2443 }
2486 2444
2487 2445 void
2488 2446 hermon_wrid_wqhdr_destroy(hermon_workq_hdr_t *wqhdr)
2489 2447 {
2490 2448 kmem_free(wqhdr->wq_wrid, wqhdr->wq_size * sizeof (uint64_t));
2491 2449 kmem_free(wqhdr, sizeof (*wqhdr));
2492 2450 }
2493 2451
2494 2452
2495 2453 /*
2496 2454 * hermon_cq_workq_add()
↓ open down ↓ |
11 lines elided |
↑ open up ↑ |
2497 2455 * Context: Can be called from interrupt or base context.
2498 2456 */
2499 2457 static void
2500 2458 hermon_cq_workq_add(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl)
2501 2459 {
2502 2460 hermon_workq_compare_t cmp;
2503 2461 avl_index_t where;
2504 2462
2505 2463 cmp.cmp_qpn = wqavl->wqa_qpn;
2506 2464 cmp.cmp_type = wqavl->wqa_type;
2507 -#ifdef __lock_lint
2508 - hermon_wrid_workq_compare(NULL, NULL);
2509 -#endif
2510 2465 (void) avl_find(&cq->cq_wrid_wqhdr_avl_tree, &cmp, &where);
2511 2466 avl_insert(&cq->cq_wrid_wqhdr_avl_tree, wqavl, where);
2512 2467 }
2513 2468
2514 2469
2515 2470 /*
2516 2471 * hermon_cq_workq_remove()
2517 2472 * Context: Can be called from interrupt or base context.
2518 2473 */
2519 2474 static void
2520 2475 hermon_cq_workq_remove(hermon_cqhdl_t cq, hermon_workq_avl_t *wqavl)
2521 2476 {
2522 -#ifdef __lock_lint
2523 - hermon_wrid_workq_compare(NULL, NULL);
2524 -#endif
2525 2477 avl_remove(&cq->cq_wrid_wqhdr_avl_tree, wqavl);
2526 2478 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX