1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * hermon_qp.c
28 * Hermon Queue Pair Processing Routines
29 *
30 * Implements all the routines necessary for allocating, freeing, and
31 * querying the Hermon queue pairs.
32 */
33
34 #include <sys/types.h>
35 #include <sys/conf.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/modctl.h>
39 #include <sys/bitmap.h>
40 #include <sys/sysmacros.h>
41
42 #include <sys/ib/adapters/hermon/hermon.h>
43 #include <sys/ib/ib_pkt_hdrs.h>
44
45 static int hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp,
46 hermon_rsrc_t *qpc);
47 static int hermon_qpn_avl_compare(const void *q, const void *e);
48 static int hermon_special_qp_rsrc_alloc(hermon_state_t *state,
49 ibt_sqp_type_t type, uint_t port, hermon_rsrc_t **qp_rsrc);
50 static int hermon_special_qp_rsrc_free(hermon_state_t *state,
51 ibt_sqp_type_t type, uint_t port);
52 static void hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
53 uint_t real_max_sgl, hermon_qp_wq_type_t wq_type,
54 uint_t *logwqesz, uint_t *max_sgl);
55
56 /*
57 * hermon_qp_alloc()
58 * Context: Can be called only from user or kernel context.
59 */
60 int
61 hermon_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo,
62 uint_t sleepflag)
63 {
64 hermon_rsrc_t *qpc, *rsrc;
65 hermon_rsrc_type_t rsrc_type;
66 hermon_umap_db_entry_t *umapdb;
67 hermon_qphdl_t qp;
68 ibt_qp_alloc_attr_t *attr_p;
69 ibt_qp_alloc_flags_t alloc_flags;
70 ibt_qp_type_t type;
71 hermon_qp_wq_type_t swq_type;
72 ibtl_qp_hdl_t ibt_qphdl;
73 ibt_chan_sizes_t *queuesz_p;
74 ib_qpn_t *qpn;
75 hermon_qphdl_t *qphdl;
76 ibt_mr_attr_t mr_attr;
77 hermon_mr_options_t mr_op;
78 hermon_srqhdl_t srq;
79 hermon_pdhdl_t pd;
80 hermon_cqhdl_t sq_cq, rq_cq;
81 hermon_mrhdl_t mr;
82 uint64_t value, qp_desc_off;
83 uint64_t *thewqe, thewqesz;
84 uint32_t *sq_buf, *rq_buf;
85 uint32_t log_qp_sq_size, log_qp_rq_size;
86 uint32_t sq_size, rq_size;
87 uint32_t sq_depth, rq_depth;
88 uint32_t sq_wqe_size, rq_wqe_size, wqesz_shift;
89 uint32_t max_sgl, max_recv_sgl, uarpg;
90 uint_t qp_is_umap;
91 uint_t qp_srq_en, i, j;
92 int status, flag;
93
94 /*
95 * Extract the necessary info from the hermon_qp_info_t structure
96 */
97 attr_p = qpinfo->qpi_attrp;
98 type = qpinfo->qpi_type;
99 ibt_qphdl = qpinfo->qpi_ibt_qphdl;
100 queuesz_p = qpinfo->qpi_queueszp;
101 qpn = qpinfo->qpi_qpn;
102 qphdl = &qpinfo->qpi_qphdl;
103 alloc_flags = attr_p->qp_alloc_flags;
104
105 /*
106 * Verify correctness of alloc_flags.
107 *
108 * 1. FEXCH and RSS are only allocated via qp_range.
109 */
110 if (alloc_flags & (IBT_QP_USES_FEXCH | IBT_QP_USES_RSS)) {
111 return (IBT_INVALID_PARAM);
112 }
113 rsrc_type = HERMON_QPC;
114 qp_is_umap = 0;
115
116 /* 2. Make sure only one of these flags is set. */
117 switch (alloc_flags &
118 (IBT_QP_USER_MAP | IBT_QP_USES_RFCI | IBT_QP_USES_FCMD)) {
119 case IBT_QP_USER_MAP:
120 qp_is_umap = 1;
121 break;
122 case IBT_QP_USES_RFCI:
123 if (type != IBT_UD_RQP)
124 return (IBT_INVALID_PARAM);
125
126 switch (attr_p->qp_fc.fc_hca_port) {
127 case 1:
128 rsrc_type = HERMON_QPC_RFCI_PORT1;
129 break;
130 case 2:
131 rsrc_type = HERMON_QPC_RFCI_PORT2;
132 break;
133 default:
134 return (IBT_INVALID_PARAM);
135 }
136 break;
137 case IBT_QP_USES_FCMD:
138 if (type != IBT_UD_RQP)
139 return (IBT_INVALID_PARAM);
140 break;
141 case 0:
142 break;
143 default:
144 return (IBT_INVALID_PARAM); /* conflicting flags set */
145 }
146
147 /*
148 * Determine whether QP is being allocated for userland access or
149 * whether it is being allocated for kernel access. If the QP is
150 * being allocated for userland access, then lookup the UAR
151 * page number for the current process. Note: If this is not found
152 * (e.g. if the process has not previously open()'d the Hermon driver),
153 * then an error is returned.
154 */
155 if (qp_is_umap) {
156 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
157 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
158 if (status != DDI_SUCCESS) {
159 return (IBT_INVALID_PARAM);
160 }
161 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
162 } else {
163 uarpg = state->hs_kernel_uar_index;
164 }
165
166 /*
167 * Determine whether QP is being associated with an SRQ
168 */
169 qp_srq_en = (alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
170 if (qp_srq_en) {
171 /*
172 * Check for valid SRQ handle pointers
173 */
174 if (attr_p->qp_ibc_srq_hdl == NULL) {
175 status = IBT_SRQ_HDL_INVALID;
176 goto qpalloc_fail;
177 }
178 srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl;
179 }
180
181 /*
182 * Check for valid QP service type (only UD/RC/UC supported)
183 */
184 if (((type != IBT_UD_RQP) && (type != IBT_RC_RQP) &&
185 (type != IBT_UC_RQP))) {
186 status = IBT_QP_SRV_TYPE_INVALID;
187 goto qpalloc_fail;
188 }
189
190
191 /*
192 * Check for valid PD handle pointer
193 */
194 if (attr_p->qp_pd_hdl == NULL) {
195 status = IBT_PD_HDL_INVALID;
196 goto qpalloc_fail;
197 }
198 pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
199
200 /*
201 * If on an SRQ, check to make sure the PD is the same
202 */
203 if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
204 status = IBT_PD_HDL_INVALID;
205 goto qpalloc_fail;
206 }
207
208 /* Increment the reference count on the protection domain (PD) */
209 hermon_pd_refcnt_inc(pd);
210
211 /*
212 * Check for valid CQ handle pointers
213 *
214 * FCMD QPs do not require a receive cq handle.
215 */
216 if (attr_p->qp_ibc_scq_hdl == NULL) {
217 status = IBT_CQ_HDL_INVALID;
218 goto qpalloc_fail1;
219 }
220 sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl;
221 if ((attr_p->qp_ibc_rcq_hdl == NULL)) {
222 if ((alloc_flags & IBT_QP_USES_FCMD) == 0) {
223 status = IBT_CQ_HDL_INVALID;
224 goto qpalloc_fail1;
225 }
226 rq_cq = sq_cq; /* just use the send cq */
227 } else
228 rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
229
230 /*
231 * Increment the reference count on the CQs. One or both of these
232 * could return error if we determine that the given CQ is already
233 * being used with a special (SMI/GSI) QP.
234 */
235 status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL);
236 if (status != DDI_SUCCESS) {
237 status = IBT_CQ_HDL_INVALID;
238 goto qpalloc_fail1;
239 }
240 status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL);
241 if (status != DDI_SUCCESS) {
242 status = IBT_CQ_HDL_INVALID;
243 goto qpalloc_fail2;
244 }
245
246 /*
247 * Allocate an QP context entry. This will be filled in with all
248 * the necessary parameters to define the Queue Pair. Unlike
249 * other Hermon hardware resources, ownership is not immediately
250 * given to hardware in the final step here. Instead, we must
251 * wait until the QP is later transitioned to the "Init" state before
252 * passing the QP to hardware. If we fail here, we must undo all
253 * the reference count (CQ and PD).
254 */
255 status = hermon_rsrc_alloc(state, rsrc_type, 1, sleepflag, &qpc);
256 if (status != DDI_SUCCESS) {
257 status = IBT_INSUFF_RESOURCE;
258 goto qpalloc_fail3;
259 }
260
261 /*
262 * Allocate the software structure for tracking the queue pair
263 * (i.e. the Hermon Queue Pair handle). If we fail here, we must
264 * undo the reference counts and the previous resource allocation.
265 */
266 status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
267 if (status != DDI_SUCCESS) {
268 status = IBT_INSUFF_RESOURCE;
269 goto qpalloc_fail4;
270 }
271 qp = (hermon_qphdl_t)rsrc->hr_addr;
272 bzero(qp, sizeof (struct hermon_sw_qp_s));
273
274 qp->qp_alloc_flags = alloc_flags;
275
276 /*
277 * Calculate the QP number from QPC index. This routine handles
278 * all of the operations necessary to keep track of used, unused,
279 * and released QP numbers.
280 */
281 if (type == IBT_UD_RQP) {
282 qp->qp_qpnum = qpc->hr_indx;
283 qp->qp_ring = qp->qp_qpnum << 8;
284 qp->qp_qpn_hdl = NULL;
285 } else {
286 status = hermon_qp_create_qpn(state, qp, qpc);
287 if (status != DDI_SUCCESS) {
288 status = IBT_INSUFF_RESOURCE;
289 goto qpalloc_fail5;
290 }
291 }
292
293 /*
294 * If this will be a user-mappable QP, then allocate an entry for
295 * the "userland resources database". This will later be added to
296 * the database (after all further QP operations are successful).
297 * If we fail here, we must undo the reference counts and the
298 * previous resource allocation.
299 */
300 if (qp_is_umap) {
301 umapdb = hermon_umap_db_alloc(state->hs_instance, qp->qp_qpnum,
302 MLNX_UMAP_QPMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
303 if (umapdb == NULL) {
304 status = IBT_INSUFF_RESOURCE;
305 goto qpalloc_fail6;
306 }
307 }
308
309 /*
310 * Allocate the doorbell record. Hermon just needs one for the RQ,
311 * if the QP is not associated with an SRQ, and use uarpg (above) as
312 * the uar index
313 */
314
315 if (!qp_srq_en) {
316 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
317 &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
318 if (status != DDI_SUCCESS) {
319 status = IBT_INSUFF_RESOURCE;
320 goto qpalloc_fail6;
321 }
322 }
323
324 qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO);
325
326 /*
327 * We verify that the requested number of SGL is valid (i.e.
328 * consistent with the device limits and/or software-configured
329 * limits). If not, then obviously the same cleanup needs to be done.
330 */
331 if (type == IBT_UD_RQP) {
332 max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz;
333 swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD;
334 } else {
335 max_sgl = state->hs_ibtfinfo.hca_attr->hca_conn_send_sgl_sz;
336 swq_type = HERMON_QP_WQ_TYPE_SENDQ_CONN;
337 }
338 max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz;
339 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
340 (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) {
341 status = IBT_HCA_SGL_EXCEEDED;
342 goto qpalloc_fail7;
343 }
344
345 /*
346 * Determine this QP's WQE stride (for both the Send and Recv WQEs).
347 * This will depend on the requested number of SGLs. Note: this
348 * has the side-effect of also calculating the real number of SGLs
349 * (for the calculated WQE size).
350 *
351 * For QP's on an SRQ, we set these to 0.
352 */
353 if (qp_srq_en) {
354 qp->qp_rq_log_wqesz = 0;
355 qp->qp_rq_sgl = 0;
356 } else {
357 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
358 max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ,
359 &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
360 }
361 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
362 max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
363
364 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
365
366 /* NOTE: currently policy in driver, later maybe IBTF interface */
367 qp->qp_no_prefetch = 0;
368
369 /*
370 * for prefetching, we need to add the number of wqes in
371 * the 2k area plus one to the number requested, but
372 * ONLY for send queue. If no_prefetch == 1 (prefetch off)
373 * it's exactly TWO wqes for the headroom
374 */
375 if (qp->qp_no_prefetch)
376 qp->qp_sq_headroom = 2 * sq_wqe_size;
377 else
378 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
379 /*
380 * hdrm wqes must be integral since both sq_wqe_size &
381 * HERMON_QP_OH_SIZE are power of 2
382 */
383 qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
384
385
386 /*
387 * Calculate the appropriate size for the work queues.
388 * For send queue, add in the headroom wqes to the calculation.
389 * Note: All Hermon QP work queues must be a power-of-2 in size. Also
390 * they may not be any smaller than HERMON_QP_MIN_SIZE. This step is
391 * to round the requested size up to the next highest power-of-2
392 */
393 /* first, adjust to a minimum and tell the caller the change */
394 attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq,
395 HERMON_QP_MIN_SIZE);
396 attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq,
397 HERMON_QP_MIN_SIZE);
398 /*
399 * now, calculate the alloc size, taking into account
400 * the headroom for the sq
401 */
402 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes);
403 /* if the total is a power of two, reduce it */
404 if (ISP2(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes)) {
405 log_qp_sq_size = log_qp_sq_size - 1;
406 }
407
408 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
409 if (ISP2(attr_p->qp_sizes.cs_rq)) {
410 log_qp_rq_size = log_qp_rq_size - 1;
411 }
412
413 /*
414 * Next we verify that the rounded-up size is valid (i.e. consistent
415 * with the device limits and/or software-configured limits). If not,
416 * then obviously we have a lot of cleanup to do before returning.
417 *
418 * NOTE: the first condition deals with the (test) case of cs_sq
419 * being just less than 2^32. In this case, the headroom addition
420 * to the requested cs_sq will pass the test when it should not.
421 * This test no longer lets that case slip through the check.
422 */
423 if ((attr_p->qp_sizes.cs_sq >
424 (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) ||
425 (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
426 (!qp_srq_en && (log_qp_rq_size >
427 state->hs_cfg_profile->cp_log_max_qp_sz))) {
428 status = IBT_HCA_WR_EXCEEDED;
429 goto qpalloc_fail7;
430 }
431
432 /*
433 * Allocate the memory for QP work queues. Since Hermon work queues
434 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
435 * the work queue memory is very important. We used to allocate
436 * work queues (the combined receive and send queues) so that they
437 * would be aligned on their combined size. That alignment guaranteed
438 * that they would never cross the 4GB boundary (Hermon work queues
439 * are on the order of MBs at maximum). Now we are able to relax
440 * this alignment constraint by ensuring that the IB address assigned
441 * to the queue memory (as a result of the hermon_mr_register() call)
442 * is offset from zero.
443 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
444 * guarantee the alignment, but when attempting to use IOMMU bypass
445 * mode we found that we were not allowed to specify any alignment
446 * that was more restrictive than the system page size.
447 * So we avoided this constraint by passing two alignment values,
448 * one for the memory allocation itself and the other for the DMA
449 * handle (for later bind). This used to cause more memory than
450 * necessary to be allocated (in order to guarantee the more
451 * restrictive alignment contraint). But by guaranteeing the
452 * zero-based IB virtual address for the queue, we are able to
453 * conserve this memory.
454 */
455 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
456 sq_depth = 1 << log_qp_sq_size;
457 sq_size = sq_depth * sq_wqe_size;
458
459 /* QP on SRQ sets these to 0 */
460 if (qp_srq_en) {
461 rq_wqe_size = 0;
462 rq_size = 0;
463 } else {
464 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
465 rq_depth = 1 << log_qp_rq_size;
466 rq_size = rq_depth * rq_wqe_size;
467 }
468
469 qp->qp_wqinfo.qa_size = sq_size + rq_size;
470
471 qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
472 qp->qp_wqinfo.qa_bind_align = PAGESIZE;
473
474 if (qp_is_umap) {
475 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
476 } else {
477 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
478 }
479 status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
480 if (status != DDI_SUCCESS) {
481 status = IBT_INSUFF_RESOURCE;
482 goto qpalloc_fail7;
483 }
484
485 /*
486 * Sort WQs in memory according to stride (*q_wqe_size), largest first
487 * If they are equal, still put the SQ first
488 */
489 qp->qp_sq_baseaddr = 0;
490 qp->qp_rq_baseaddr = 0;
491 if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
492 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
493
494 /* if this QP is on an SRQ, set the rq_buf to NULL */
495 if (qp_srq_en) {
496 rq_buf = NULL;
497 } else {
498 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
499 qp->qp_rq_baseaddr = sq_size;
500 }
501 } else {
502 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
503 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
504 qp->qp_sq_baseaddr = rq_size;
505 }
506
507 if (qp_is_umap == 0) {
508 qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
509 if (qp->qp_sq_wqhdr == NULL) {
510 status = IBT_INSUFF_RESOURCE;
511 goto qpalloc_fail8;
512 }
513 if (qp_srq_en) {
514 qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr;
515 qp->qp_rq_wqavl.wqa_srq_en = 1;
516 qp->qp_rq_wqavl.wqa_srq = srq;
517 } else {
518 qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth);
519 if (qp->qp_rq_wqhdr == NULL) {
520 status = IBT_INSUFF_RESOURCE;
521 goto qpalloc_fail8;
522 }
523 qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
524 }
525 qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
526 qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
527 qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
528 qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
529 qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
530 }
531
532 /*
533 * Register the memory for the QP work queues. The memory for the
534 * QP must be registered in the Hermon cMPT tables. This gives us the
535 * LKey to specify in the QP context later. Note: The memory for
536 * Hermon work queues (both Send and Recv) must be contiguous and
537 * registered as a single memory region. Note: If the QP memory is
538 * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to
539 * meet the alignment restriction, we pass the "mro_bind_override_addr"
540 * flag in the call to hermon_mr_register(). This guarantees that the
541 * resulting IB vaddr will be zero-based (modulo the offset into the
542 * first page). If we fail here, we still have the bunch of resource
543 * and reference count cleanup to do.
544 */
545 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
546 IBT_MR_NOSLEEP;
547 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
548 mr_attr.mr_len = qp->qp_wqinfo.qa_size;
549 mr_attr.mr_as = NULL;
550 mr_attr.mr_flags = flag;
551 if (qp_is_umap) {
552 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
553 } else {
554 /* HERMON_QUEUE_LOCATION_NORMAL */
555 mr_op.mro_bind_type =
556 state->hs_cfg_profile->cp_iommu_bypass;
557 }
558 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
559 mr_op.mro_bind_override_addr = 1;
560 status = hermon_mr_register(state, pd, &mr_attr, &mr,
561 &mr_op, HERMON_QP_CMPT);
562 if (status != DDI_SUCCESS) {
563 status = IBT_INSUFF_RESOURCE;
564 goto qpalloc_fail9;
565 }
566
567 /*
568 * Calculate the offset between the kernel virtual address space
569 * and the IB virtual address space. This will be used when
570 * posting work requests to properly initialize each WQE.
571 */
572 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
573 (uint64_t)mr->mr_bindinfo.bi_addr;
574
575 /*
576 * Fill in all the return arguments (if necessary). This includes
577 * real work queue sizes (in wqes), real SGLs, and QP number
578 */
579 if (queuesz_p != NULL) {
580 queuesz_p->cs_sq =
581 (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
582 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
583
584 /* if this QP is on an SRQ, set these to 0 */
585 if (qp_srq_en) {
586 queuesz_p->cs_rq = 0;
587 queuesz_p->cs_rq_sgl = 0;
588 } else {
589 queuesz_p->cs_rq = (1 << log_qp_rq_size);
590 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
591 }
592 }
593 if (qpn != NULL) {
594 *qpn = (ib_qpn_t)qp->qp_qpnum;
595 }
596
597 /*
598 * Fill in the rest of the Hermon Queue Pair handle.
599 */
600 qp->qp_qpcrsrcp = qpc;
601 qp->qp_rsrcp = rsrc;
602 qp->qp_state = HERMON_QP_RESET;
603 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
604 qp->qp_pdhdl = pd;
605 qp->qp_mrhdl = mr;
606 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
607 HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
608 qp->qp_is_special = 0;
609 qp->qp_uarpg = uarpg;
610 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
611 qp->qp_sq_cqhdl = sq_cq;
612 qp->qp_sq_bufsz = (1 << log_qp_sq_size);
613 qp->qp_sq_logqsz = log_qp_sq_size;
614 qp->qp_sq_buf = sq_buf;
615 qp->qp_desc_off = qp_desc_off;
616 qp->qp_rq_cqhdl = rq_cq;
617 qp->qp_rq_buf = rq_buf;
618 qp->qp_rlky = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) !=
619 0;
620
621 /* if this QP is on an SRQ, set rq_bufsz to 0 */
622 if (qp_srq_en) {
623 qp->qp_rq_bufsz = 0;
624 qp->qp_rq_logqsz = 0;
625 } else {
626 qp->qp_rq_bufsz = (1 << log_qp_rq_size);
627 qp->qp_rq_logqsz = log_qp_rq_size;
628 }
629
630 qp->qp_forward_sqd_event = 0;
631 qp->qp_sqd_still_draining = 0;
632 qp->qp_hdlrarg = (void *)ibt_qphdl;
633 qp->qp_mcg_refcnt = 0;
634
635 /*
636 * If this QP is to be associated with an SRQ, set the SRQ handle
637 */
638 if (qp_srq_en) {
639 qp->qp_srqhdl = srq;
640 hermon_srq_refcnt_inc(qp->qp_srqhdl);
641 } else {
642 qp->qp_srqhdl = NULL;
643 }
644
645 /* Determine the QP service type */
646 qp->qp_type = type;
647 if (type == IBT_RC_RQP) {
648 qp->qp_serv_type = HERMON_QP_RC;
649 } else if (type == IBT_UD_RQP) {
650 if (alloc_flags & IBT_QP_USES_RFCI)
651 qp->qp_serv_type = HERMON_QP_RFCI;
652 else if (alloc_flags & IBT_QP_USES_FCMD)
653 qp->qp_serv_type = HERMON_QP_FCMND;
654 else
655 qp->qp_serv_type = HERMON_QP_UD;
656 } else {
657 qp->qp_serv_type = HERMON_QP_UC;
658 }
659
660 /*
661 * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
662 */
663
664 /*
665 * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
666 * set the quadword to all F's - high-order bit is owner (init to one)
667 * and the rest for the headroom definition of prefetching
668 *
669 */
670 wqesz_shift = qp->qp_sq_log_wqesz;
671 thewqesz = 1 << wqesz_shift;
672 thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
673 if (qp_is_umap == 0) {
674 for (i = 0; i < sq_depth; i++) {
675 /*
676 * for each stride, go through and every 64 bytes
677 * write the init value - having set the address
678 * once, just keep incrementing it
679 */
680 for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
681 *(uint32_t *)thewqe = 0xFFFFFFFF;
682 }
683 }
684 }
685
686 /* Zero out the QP context */
687 bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
688
689 /*
690 * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the
691 * "qphdl" and return success
692 */
693 hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx, qp);
694
695 /*
696 * If this is a user-mappable QP, then we need to insert the previously
697 * allocated entry into the "userland resources database". This will
698 * allow for later lookup during devmap() (i.e. mmap()) calls.
699 */
700 if (qp_is_umap) {
701 hermon_umap_db_add(umapdb);
702 }
703 mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
704 DDI_INTR_PRI(state->hs_intrmsi_pri));
705
706 *qphdl = qp;
707
708 return (DDI_SUCCESS);
709
710 /*
711 * The following is cleanup for all possible failure cases in this routine
712 */
713 qpalloc_fail9:
714 hermon_queue_free(&qp->qp_wqinfo);
715 qpalloc_fail8:
716 if (qp->qp_sq_wqhdr)
717 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
718 if (qp->qp_rq_wqhdr)
719 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
720 qpalloc_fail7:
721 if (qp_is_umap) {
722 hermon_umap_db_free(umapdb);
723 }
724 if (!qp_srq_en) {
725 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
726 }
727
728 qpalloc_fail6:
729 /*
730 * Releasing the QPN will also free up the QPC context. Update
731 * the QPC context pointer to indicate this.
732 */
733 if (qp->qp_qpn_hdl) {
734 hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
735 HERMON_QPN_RELEASE);
736 } else {
737 hermon_rsrc_free(state, &qpc);
738 }
739 qpc = NULL;
740 qpalloc_fail5:
741 hermon_rsrc_free(state, &rsrc);
742 qpalloc_fail4:
743 if (qpc) {
744 hermon_rsrc_free(state, &qpc);
745 }
746 qpalloc_fail3:
747 hermon_cq_refcnt_dec(rq_cq);
748 qpalloc_fail2:
749 hermon_cq_refcnt_dec(sq_cq);
750 qpalloc_fail1:
751 hermon_pd_refcnt_dec(pd);
752 qpalloc_fail:
753 return (status);
754 }
755
756
757
758 /*
759 * hermon_special_qp_alloc()
760 * Context: Can be called only from user or kernel context.
761 */
762 int
763 hermon_special_qp_alloc(hermon_state_t *state, hermon_qp_info_t *qpinfo,
764 uint_t sleepflag)
765 {
766 hermon_rsrc_t *qpc, *rsrc;
767 hermon_qphdl_t qp;
768 ibt_qp_alloc_attr_t *attr_p;
769 ibt_sqp_type_t type;
770 uint8_t port;
771 ibtl_qp_hdl_t ibt_qphdl;
772 ibt_chan_sizes_t *queuesz_p;
773 hermon_qphdl_t *qphdl;
774 ibt_mr_attr_t mr_attr;
775 hermon_mr_options_t mr_op;
776 hermon_pdhdl_t pd;
777 hermon_cqhdl_t sq_cq, rq_cq;
778 hermon_mrhdl_t mr;
779 uint64_t qp_desc_off;
780 uint64_t *thewqe, thewqesz;
781 uint32_t *sq_buf, *rq_buf;
782 uint32_t log_qp_sq_size, log_qp_rq_size;
783 uint32_t sq_size, rq_size, max_sgl;
784 uint32_t uarpg;
785 uint32_t sq_depth;
786 uint32_t sq_wqe_size, rq_wqe_size, wqesz_shift;
787 int status, flag, i, j;
788
789 /*
790 * Extract the necessary info from the hermon_qp_info_t structure
791 */
792 attr_p = qpinfo->qpi_attrp;
793 type = qpinfo->qpi_type;
794 port = qpinfo->qpi_port;
795 ibt_qphdl = qpinfo->qpi_ibt_qphdl;
796 queuesz_p = qpinfo->qpi_queueszp;
797 qphdl = &qpinfo->qpi_qphdl;
798
799 /*
800 * Check for valid special QP type (only SMI & GSI supported)
801 */
802 if ((type != IBT_SMI_SQP) && (type != IBT_GSI_SQP)) {
803 status = IBT_QP_SPECIAL_TYPE_INVALID;
804 goto spec_qpalloc_fail;
805 }
806
807 /*
808 * Check for valid port number
809 */
810 if (!hermon_portnum_is_valid(state, port)) {
811 status = IBT_HCA_PORT_INVALID;
812 goto spec_qpalloc_fail;
813 }
814 port = port - 1;
815
816 /*
817 * Check for valid PD handle pointer
818 */
819 if (attr_p->qp_pd_hdl == NULL) {
820 status = IBT_PD_HDL_INVALID;
821 goto spec_qpalloc_fail;
822 }
823 pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
824
825 /* Increment the reference count on the PD */
826 hermon_pd_refcnt_inc(pd);
827
828 /*
829 * Check for valid CQ handle pointers
830 */
831 if ((attr_p->qp_ibc_scq_hdl == NULL) ||
832 (attr_p->qp_ibc_rcq_hdl == NULL)) {
833 status = IBT_CQ_HDL_INVALID;
834 goto spec_qpalloc_fail1;
835 }
836 sq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_scq_hdl;
837 rq_cq = (hermon_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
838
839 /*
840 * Increment the reference count on the CQs. One or both of these
841 * could return error if we determine that the given CQ is already
842 * being used with a non-special QP (i.e. a normal QP).
843 */
844 status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_SPECIAL);
845 if (status != DDI_SUCCESS) {
846 status = IBT_CQ_HDL_INVALID;
847 goto spec_qpalloc_fail1;
848 }
849 status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_SPECIAL);
850 if (status != DDI_SUCCESS) {
851 status = IBT_CQ_HDL_INVALID;
852 goto spec_qpalloc_fail2;
853 }
854
855 /*
856 * Allocate the special QP resources. Essentially, this allocation
857 * amounts to checking if the request special QP has already been
858 * allocated. If successful, the QP context return is an actual
859 * QP context that has been "aliased" to act as a special QP of the
860 * appropriate type (and for the appropriate port). Just as in
861 * hermon_qp_alloc() above, ownership for this QP context is not
862 * immediately given to hardware in the final step here. Instead, we
863 * wait until the QP is later transitioned to the "Init" state before
864 * passing the QP to hardware. If we fail here, we must undo all
865 * the reference count (CQ and PD).
866 */
867 status = hermon_special_qp_rsrc_alloc(state, type, port, &qpc);
868 if (status != DDI_SUCCESS) {
869 goto spec_qpalloc_fail3;
870 }
871
872 /*
873 * Allocate the software structure for tracking the special queue
874 * pair (i.e. the Hermon Queue Pair handle). If we fail here, we
875 * must undo the reference counts and the previous resource allocation.
876 */
877 status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
878 if (status != DDI_SUCCESS) {
879 status = IBT_INSUFF_RESOURCE;
880 goto spec_qpalloc_fail4;
881 }
882 qp = (hermon_qphdl_t)rsrc->hr_addr;
883
884 bzero(qp, sizeof (struct hermon_sw_qp_s));
885
886 qp->qp_alloc_flags = attr_p->qp_alloc_flags;
887
888 /*
889 * Actual QP number is a combination of the index of the QPC and
890 * the port number. This is because the special QP contexts must
891 * be allocated two-at-a-time.
892 */
893 qp->qp_qpnum = qpc->hr_indx + port;
894 qp->qp_ring = qp->qp_qpnum << 8;
895
896 uarpg = state->hs_kernel_uar_index; /* must be for spec qp */
897 /*
898 * Allocate the doorbell record. Hermon uses only one for the RQ so
899 * alloc a qp doorbell, using uarpg (above) as the uar index
900 */
901
902 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
903 &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
904 if (status != DDI_SUCCESS) {
905 status = IBT_INSUFF_RESOURCE;
906 goto spec_qpalloc_fail5;
907 }
908 /*
909 * Calculate the appropriate size for the work queues.
910 * Note: All Hermon QP work queues must be a power-of-2 in size. Also
911 * they may not be any smaller than HERMON_QP_MIN_SIZE. This step is
912 * to round the requested size up to the next highest power-of-2
913 */
914 attr_p->qp_sizes.cs_sq =
915 max(attr_p->qp_sizes.cs_sq, HERMON_QP_MIN_SIZE);
916 attr_p->qp_sizes.cs_rq =
917 max(attr_p->qp_sizes.cs_rq, HERMON_QP_MIN_SIZE);
918 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq);
919 if (ISP2(attr_p->qp_sizes.cs_sq)) {
920 log_qp_sq_size = log_qp_sq_size - 1;
921 }
922 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
923 if (ISP2(attr_p->qp_sizes.cs_rq)) {
924 log_qp_rq_size = log_qp_rq_size - 1;
925 }
926
927 /*
928 * Next we verify that the rounded-up size is valid (i.e. consistent
929 * with the device limits and/or software-configured limits). If not,
930 * then obviously we have a bit of cleanup to do before returning.
931 */
932 if ((log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
933 (log_qp_rq_size > state->hs_cfg_profile->cp_log_max_qp_sz)) {
934 status = IBT_HCA_WR_EXCEEDED;
935 goto spec_qpalloc_fail5a;
936 }
937
938 /*
939 * Next we verify that the requested number of SGL is valid (i.e.
940 * consistent with the device limits and/or software-configured
941 * limits). If not, then obviously the same cleanup needs to be done.
942 */
943 max_sgl = state->hs_cfg_profile->cp_wqe_real_max_sgl;
944 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
945 (attr_p->qp_sizes.cs_rq_sgl > max_sgl)) {
946 status = IBT_HCA_SGL_EXCEEDED;
947 goto spec_qpalloc_fail5a;
948 }
949
950 /*
951 * Determine this QP's WQE stride (for both the Send and Recv WQEs).
952 * This will depend on the requested number of SGLs. Note: this
953 * has the side-effect of also calculating the real number of SGLs
954 * (for the calculated WQE size).
955 */
956 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
957 max_sgl, HERMON_QP_WQ_TYPE_RECVQ,
958 &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
959 if (type == IBT_SMI_SQP) {
960 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
961 max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP0,
962 &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
963 } else {
964 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
965 max_sgl, HERMON_QP_WQ_TYPE_SENDMLX_QP1,
966 &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
967 }
968
969 /*
970 * Allocate the memory for QP work queues. Since Hermon work queues
971 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
972 * the work queue memory is very important. We used to allocate
973 * work queues (the combined receive and send queues) so that they
974 * would be aligned on their combined size. That alignment guaranteed
975 * that they would never cross the 4GB boundary (Hermon work queues
976 * are on the order of MBs at maximum). Now we are able to relax
977 * this alignment constraint by ensuring that the IB address assigned
978 * to the queue memory (as a result of the hermon_mr_register() call)
979 * is offset from zero.
980 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
981 * guarantee the alignment, but when attempting to use IOMMU bypass
982 * mode we found that we were not allowed to specify any alignment
983 * that was more restrictive than the system page size.
984 * So we avoided this constraint by passing two alignment values,
985 * one for the memory allocation itself and the other for the DMA
986 * handle (for later bind). This used to cause more memory than
987 * necessary to be allocated (in order to guarantee the more
988 * restrictive alignment contraint). But by guaranteeing the
989 * zero-based IB virtual address for the queue, we are able to
990 * conserve this memory.
991 */
992 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
993 sq_depth = 1 << log_qp_sq_size;
994 sq_size = (1 << log_qp_sq_size) * sq_wqe_size;
995
996 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
997 rq_size = (1 << log_qp_rq_size) * rq_wqe_size;
998
999 qp->qp_wqinfo.qa_size = sq_size + rq_size;
1000
1001 qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
1002 qp->qp_wqinfo.qa_bind_align = PAGESIZE;
1003 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
1004
1005 status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
1006 if (status != NULL) {
1007 status = IBT_INSUFF_RESOURCE;
1008 goto spec_qpalloc_fail5a;
1009 }
1010
1011 /*
1012 * Sort WQs in memory according to depth, stride (*q_wqe_size),
1013 * biggest first. If equal, the Send Queue still goes first
1014 */
1015 qp->qp_sq_baseaddr = 0;
1016 qp->qp_rq_baseaddr = 0;
1017 if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
1018 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
1019 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
1020 qp->qp_rq_baseaddr = sq_size;
1021 } else {
1022 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
1023 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
1024 qp->qp_sq_baseaddr = rq_size;
1025 }
1026
1027 qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
1028 if (qp->qp_sq_wqhdr == NULL) {
1029 status = IBT_INSUFF_RESOURCE;
1030 goto spec_qpalloc_fail6;
1031 }
1032 qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(1 << log_qp_rq_size);
1033 if (qp->qp_rq_wqhdr == NULL) {
1034 status = IBT_INSUFF_RESOURCE;
1035 goto spec_qpalloc_fail6;
1036 }
1037 qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
1038 qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
1039 qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
1040 qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
1041 qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
1042 qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
1043
1044 /*
1045 * Register the memory for the special QP work queues. The memory for
1046 * the special QP must be registered in the Hermon cMPT tables. This
1047 * gives us the LKey to specify in the QP context later. Note: The
1048 * memory for Hermon work queues (both Send and Recv) must be contiguous
1049 * and registered as a single memory region. Also, in order to meet the
1050 * alignment restriction, we pass the "mro_bind_override_addr" flag in
1051 * the call to hermon_mr_register(). This guarantees that the resulting
1052 * IB vaddr will be zero-based (modulo the offset into the first page).
1053 * If we fail here, we have a bunch of resource and reference count
1054 * cleanup to do.
1055 */
1056 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
1057 IBT_MR_NOSLEEP;
1058 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1059 mr_attr.mr_len = qp->qp_wqinfo.qa_size;
1060 mr_attr.mr_as = NULL;
1061 mr_attr.mr_flags = flag;
1062
1063 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
1064 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
1065 mr_op.mro_bind_override_addr = 1;
1066
1067 status = hermon_mr_register(state, pd, &mr_attr, &mr, &mr_op,
1068 HERMON_QP_CMPT);
1069 if (status != DDI_SUCCESS) {
1070 status = IBT_INSUFF_RESOURCE;
1071 goto spec_qpalloc_fail6;
1072 }
1073
1074 /*
1075 * Calculate the offset between the kernel virtual address space
1076 * and the IB virtual address space. This will be used when
1077 * posting work requests to properly initialize each WQE.
1078 */
1079 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
1080 (uint64_t)mr->mr_bindinfo.bi_addr;
1081
1082 /* set the prefetch - initially, not prefetching */
1083 qp->qp_no_prefetch = 1;
1084
1085 if (qp->qp_no_prefetch)
1086 qp->qp_sq_headroom = 2 * sq_wqe_size;
1087 else
1088 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
1089 /*
1090 * hdrm wqes must be integral since both sq_wqe_size &
1091 * HERMON_QP_OH_SIZE are power of 2
1092 */
1093 qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
1094 /*
1095 * Fill in all the return arguments (if necessary). This includes
1096 * real work queue sizes, real SGLs, and QP number (which will be
1097 * either zero or one, depending on the special QP type)
1098 */
1099 if (queuesz_p != NULL) {
1100 queuesz_p->cs_sq =
1101 (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
1102 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
1103 queuesz_p->cs_rq = (1 << log_qp_rq_size);
1104 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
1105 }
1106
1107 /*
1108 * Fill in the rest of the Hermon Queue Pair handle. We can update
1109 * the following fields for use in further operations on the QP.
1110 */
1111 qp->qp_qpcrsrcp = qpc;
1112 qp->qp_rsrcp = rsrc;
1113 qp->qp_state = HERMON_QP_RESET;
1114 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1115 qp->qp_pdhdl = pd;
1116 qp->qp_mrhdl = mr;
1117 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
1118 HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
1119 qp->qp_is_special = (type == IBT_SMI_SQP) ?
1120 HERMON_QP_SMI : HERMON_QP_GSI;
1121 qp->qp_uarpg = uarpg;
1122 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1123 qp->qp_sq_cqhdl = sq_cq;
1124 qp->qp_sq_bufsz = (1 << log_qp_sq_size);
1125 qp->qp_sq_buf = sq_buf;
1126 qp->qp_sq_logqsz = log_qp_sq_size;
1127 qp->qp_desc_off = qp_desc_off;
1128 qp->qp_rq_cqhdl = rq_cq;
1129 qp->qp_rq_bufsz = (1 << log_qp_rq_size);
1130 qp->qp_rq_buf = rq_buf;
1131 qp->qp_rq_logqsz = log_qp_rq_size;
1132 qp->qp_portnum = port;
1133 qp->qp_pkeyindx = 0;
1134 qp->qp_forward_sqd_event = 0;
1135 qp->qp_sqd_still_draining = 0;
1136 qp->qp_hdlrarg = (void *)ibt_qphdl;
1137 qp->qp_mcg_refcnt = 0;
1138 qp->qp_srqhdl = NULL;
1139
1140 /* All special QPs are UD QP service type */
1141 qp->qp_type = IBT_UD_RQP;
1142 qp->qp_serv_type = HERMON_QP_UD;
1143
1144 /*
1145 * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
1146 */
1147
1148 /*
1149 * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
1150 * set the quadword to all F's - high-order bit is owner (init to one)
1151 * and the rest for the headroom definition of prefetching
1152 *
1153 */
1154
1155 wqesz_shift = qp->qp_sq_log_wqesz;
1156 thewqesz = 1 << wqesz_shift;
1157 thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
1158 for (i = 0; i < sq_depth; i++) {
1159 /*
1160 * for each stride, go through and every 64 bytes write the
1161 * init value - having set the address once, just keep
1162 * incrementing it
1163 */
1164 for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
1165 *(uint32_t *)thewqe = 0xFFFFFFFF;
1166 }
1167 }
1168
1169
1170 /* Zero out the QP context */
1171 bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
1172
1173 /*
1174 * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the
1175 * "qphdl" and return success
1176 */
1177 hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + port, qp);
1178
1179 mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
1180 DDI_INTR_PRI(state->hs_intrmsi_pri));
1181
1182 *qphdl = qp;
1183
1184 return (DDI_SUCCESS);
1185
1186 /*
1187 * The following is cleanup for all possible failure cases in this routine
1188 */
1189 spec_qpalloc_fail6:
1190 hermon_queue_free(&qp->qp_wqinfo);
1191 if (qp->qp_sq_wqhdr)
1192 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
1193 if (qp->qp_rq_wqhdr)
1194 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
1195 spec_qpalloc_fail5a:
1196 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
1197 spec_qpalloc_fail5:
1198 hermon_rsrc_free(state, &rsrc);
1199 spec_qpalloc_fail4:
1200 if (hermon_special_qp_rsrc_free(state, type, port) != DDI_SUCCESS) {
1201 HERMON_WARNING(state, "failed to free special QP rsrc");
1202 }
1203 spec_qpalloc_fail3:
1204 hermon_cq_refcnt_dec(rq_cq);
1205 spec_qpalloc_fail2:
1206 hermon_cq_refcnt_dec(sq_cq);
1207 spec_qpalloc_fail1:
1208 hermon_pd_refcnt_dec(pd);
1209 spec_qpalloc_fail:
1210 return (status);
1211 }
1212
1213
1214 /*
1215 * hermon_qp_alloc_range()
1216 * Context: Can be called only from user or kernel context.
1217 */
1218 int
1219 hermon_qp_alloc_range(hermon_state_t *state, uint_t log2,
1220 hermon_qp_info_t *qpinfo, ibtl_qp_hdl_t *ibt_qphdl,
1221 ibc_cq_hdl_t *send_cq, ibc_cq_hdl_t *recv_cq,
1222 hermon_qphdl_t *qphdl, uint_t sleepflag)
1223 {
1224 hermon_rsrc_t *qpc, *rsrc;
1225 hermon_rsrc_type_t rsrc_type;
1226 hermon_qphdl_t qp;
1227 hermon_qp_range_t *qp_range_p;
1228 ibt_qp_alloc_attr_t *attr_p;
1229 ibt_qp_type_t type;
1230 hermon_qp_wq_type_t swq_type;
1231 ibt_chan_sizes_t *queuesz_p;
1232 ibt_mr_attr_t mr_attr;
1233 hermon_mr_options_t mr_op;
1234 hermon_srqhdl_t srq;
1235 hermon_pdhdl_t pd;
1236 hermon_cqhdl_t sq_cq, rq_cq;
1237 hermon_mrhdl_t mr;
1238 uint64_t qp_desc_off;
1239 uint64_t *thewqe, thewqesz;
1240 uint32_t *sq_buf, *rq_buf;
1241 uint32_t log_qp_sq_size, log_qp_rq_size;
1242 uint32_t sq_size, rq_size;
1243 uint32_t sq_depth, rq_depth;
1244 uint32_t sq_wqe_size, rq_wqe_size, wqesz_shift;
1245 uint32_t max_sgl, max_recv_sgl, uarpg;
1246 uint_t qp_srq_en, i, j;
1247 int ii; /* loop counter for range */
1248 int status, flag;
1249 uint_t serv_type;
1250
1251 /*
1252 * Extract the necessary info from the hermon_qp_info_t structure
1253 */
1254 attr_p = qpinfo->qpi_attrp;
1255 type = qpinfo->qpi_type;
1256 queuesz_p = qpinfo->qpi_queueszp;
1257
1258 if (attr_p->qp_alloc_flags & IBT_QP_USES_RSS) {
1259 if (log2 > state->hs_ibtfinfo.hca_attr->hca_rss_max_log2_table)
1260 return (IBT_INSUFF_RESOURCE);
1261 rsrc_type = HERMON_QPC;
1262 serv_type = HERMON_QP_UD;
1263 } else if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) {
1264 if (log2 > state->hs_ibtfinfo.hca_attr->hca_fexch_max_log2_qp)
1265 return (IBT_INSUFF_RESOURCE);
1266 switch (attr_p->qp_fc.fc_hca_port) {
1267 case 1:
1268 rsrc_type = HERMON_QPC_FEXCH_PORT1;
1269 break;
1270 case 2:
1271 rsrc_type = HERMON_QPC_FEXCH_PORT2;
1272 break;
1273 default:
1274 return (IBT_INVALID_PARAM);
1275 }
1276 serv_type = HERMON_QP_FEXCH;
1277 } else
1278 return (IBT_INVALID_PARAM);
1279
1280 /*
1281 * Determine whether QP is being allocated for userland access or
1282 * whether it is being allocated for kernel access. If the QP is
1283 * being allocated for userland access, fail (too complex for now).
1284 */
1285 if (attr_p->qp_alloc_flags & IBT_QP_USER_MAP) {
1286 return (IBT_NOT_SUPPORTED);
1287 } else {
1288 uarpg = state->hs_kernel_uar_index;
1289 }
1290
1291 /*
1292 * Determine whether QP is being associated with an SRQ
1293 */
1294 qp_srq_en = (attr_p->qp_alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
1295 if (qp_srq_en) {
1296 /*
1297 * Check for valid SRQ handle pointers
1298 */
1299 if (attr_p->qp_ibc_srq_hdl == NULL) {
1300 return (IBT_SRQ_HDL_INVALID);
1301 }
1302 srq = (hermon_srqhdl_t)attr_p->qp_ibc_srq_hdl;
1303 }
1304
1305 /*
1306 * Check for valid QP service type (only UD supported)
1307 */
1308 if (type != IBT_UD_RQP) {
1309 return (IBT_QP_SRV_TYPE_INVALID);
1310 }
1311
1312 /*
1313 * Check for valid PD handle pointer
1314 */
1315 if (attr_p->qp_pd_hdl == NULL) {
1316 return (IBT_PD_HDL_INVALID);
1317 }
1318 pd = (hermon_pdhdl_t)attr_p->qp_pd_hdl;
1319
1320 /*
1321 * If on an SRQ, check to make sure the PD is the same
1322 */
1323 if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
1324 return (IBT_PD_HDL_INVALID);
1325 }
1326
1327 /* set loop variable here, for freeing resources on error */
1328 ii = 0;
1329
1330 /*
1331 * Allocate 2^log2 contiguous/aligned QP context entries. This will
1332 * be filled in with all the necessary parameters to define the
1333 * Queue Pairs. Unlike other Hermon hardware resources, ownership
1334 * is not immediately given to hardware in the final step here.
1335 * Instead, we must wait until the QP is later transitioned to the
1336 * "Init" state before passing the QP to hardware. If we fail here,
1337 * we must undo all the reference count (CQ and PD).
1338 */
1339 status = hermon_rsrc_alloc(state, rsrc_type, 1 << log2, sleepflag,
1340 &qpc);
1341 if (status != DDI_SUCCESS) {
1342 return (IBT_INSUFF_RESOURCE);
1343 }
1344
1345 if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH)
1346 /*
1347 * Need to init the MKEYs for the FEXCH QPs.
1348 *
1349 * For FEXCH QP subranges, we return the QPN base as
1350 * "relative" to the full FEXCH QP range for the port.
1351 */
1352 *(qpinfo->qpi_qpn) = hermon_fcoib_fexch_relative_qpn(state,
1353 attr_p->qp_fc.fc_hca_port, qpc->hr_indx);
1354 else
1355 *(qpinfo->qpi_qpn) = (ib_qpn_t)qpc->hr_indx;
1356
1357 qp_range_p = kmem_alloc(sizeof (*qp_range_p),
1358 (sleepflag == HERMON_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1359 if (qp_range_p == NULL) {
1360 status = IBT_INSUFF_RESOURCE;
1361 goto qpalloc_fail0;
1362 }
1363 mutex_init(&qp_range_p->hqpr_lock, NULL, MUTEX_DRIVER,
1364 DDI_INTR_PRI(state->hs_intrmsi_pri));
1365 mutex_enter(&qp_range_p->hqpr_lock);
1366 qp_range_p->hqpr_refcnt = 1 << log2;
1367 qp_range_p->hqpr_qpcrsrc = qpc;
1368 mutex_exit(&qp_range_p->hqpr_lock);
1369
1370 for_each_qp:
1371
1372 /* Increment the reference count on the protection domain (PD) */
1373 hermon_pd_refcnt_inc(pd);
1374
1375 rq_cq = (hermon_cqhdl_t)recv_cq[ii];
1376 sq_cq = (hermon_cqhdl_t)send_cq[ii];
1377 if (sq_cq == NULL) {
1378 if (attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) {
1379 /* if no send completions, just use rq_cq */
1380 sq_cq = rq_cq;
1381 } else {
1382 status = IBT_CQ_HDL_INVALID;
1383 goto qpalloc_fail1;
1384 }
1385 }
1386
1387 /*
1388 * Increment the reference count on the CQs. One or both of these
1389 * could return error if we determine that the given CQ is already
1390 * being used with a special (SMI/GSI) QP.
1391 */
1392 status = hermon_cq_refcnt_inc(sq_cq, HERMON_CQ_IS_NORMAL);
1393 if (status != DDI_SUCCESS) {
1394 status = IBT_CQ_HDL_INVALID;
1395 goto qpalloc_fail1;
1396 }
1397 status = hermon_cq_refcnt_inc(rq_cq, HERMON_CQ_IS_NORMAL);
1398 if (status != DDI_SUCCESS) {
1399 status = IBT_CQ_HDL_INVALID;
1400 goto qpalloc_fail2;
1401 }
1402
1403 /*
1404 * Allocate the software structure for tracking the queue pair
1405 * (i.e. the Hermon Queue Pair handle). If we fail here, we must
1406 * undo the reference counts and the previous resource allocation.
1407 */
1408 status = hermon_rsrc_alloc(state, HERMON_QPHDL, 1, sleepflag, &rsrc);
1409 if (status != DDI_SUCCESS) {
1410 status = IBT_INSUFF_RESOURCE;
1411 goto qpalloc_fail4;
1412 }
1413 qp = (hermon_qphdl_t)rsrc->hr_addr;
1414 bzero(qp, sizeof (struct hermon_sw_qp_s));
1415 qp->qp_alloc_flags = attr_p->qp_alloc_flags;
1416
1417 /*
1418 * Calculate the QP number from QPC index. This routine handles
1419 * all of the operations necessary to keep track of used, unused,
1420 * and released QP numbers.
1421 */
1422 qp->qp_qpnum = qpc->hr_indx + ii;
1423 qp->qp_ring = qp->qp_qpnum << 8;
1424 qp->qp_qpn_hdl = NULL;
1425
1426 /*
1427 * Allocate the doorbell record. Hermon just needs one for the RQ,
1428 * if the QP is not associated with an SRQ, and use uarpg (above) as
1429 * the uar index
1430 */
1431
1432 if (!qp_srq_en) {
1433 status = hermon_dbr_alloc(state, uarpg, &qp->qp_rq_dbr_acchdl,
1434 &qp->qp_rq_vdbr, &qp->qp_rq_pdbr, &qp->qp_rdbr_mapoffset);
1435 if (status != DDI_SUCCESS) {
1436 status = IBT_INSUFF_RESOURCE;
1437 goto qpalloc_fail6;
1438 }
1439 }
1440
1441 qp->qp_uses_lso = (attr_p->qp_flags & IBT_USES_LSO);
1442
1443 /*
1444 * We verify that the requested number of SGL is valid (i.e.
1445 * consistent with the device limits and/or software-configured
1446 * limits). If not, then obviously the same cleanup needs to be done.
1447 */
1448 max_sgl = state->hs_ibtfinfo.hca_attr->hca_ud_send_sgl_sz;
1449 swq_type = HERMON_QP_WQ_TYPE_SENDQ_UD;
1450 max_recv_sgl = state->hs_ibtfinfo.hca_attr->hca_recv_sgl_sz;
1451 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
1452 (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_recv_sgl))) {
1453 status = IBT_HCA_SGL_EXCEEDED;
1454 goto qpalloc_fail7;
1455 }
1456
1457 /*
1458 * Determine this QP's WQE stride (for both the Send and Recv WQEs).
1459 * This will depend on the requested number of SGLs. Note: this
1460 * has the side-effect of also calculating the real number of SGLs
1461 * (for the calculated WQE size).
1462 *
1463 * For QP's on an SRQ, we set these to 0.
1464 */
1465 if (qp_srq_en) {
1466 qp->qp_rq_log_wqesz = 0;
1467 qp->qp_rq_sgl = 0;
1468 } else {
1469 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
1470 max_recv_sgl, HERMON_QP_WQ_TYPE_RECVQ,
1471 &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
1472 }
1473 hermon_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
1474 max_sgl, swq_type, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
1475
1476 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
1477
1478 /* NOTE: currently policy in driver, later maybe IBTF interface */
1479 qp->qp_no_prefetch = 0;
1480
1481 /*
1482 * for prefetching, we need to add the number of wqes in
1483 * the 2k area plus one to the number requested, but
1484 * ONLY for send queue. If no_prefetch == 1 (prefetch off)
1485 * it's exactly TWO wqes for the headroom
1486 */
1487 if (qp->qp_no_prefetch)
1488 qp->qp_sq_headroom = 2 * sq_wqe_size;
1489 else
1490 qp->qp_sq_headroom = sq_wqe_size + HERMON_QP_OH_SIZE;
1491 /*
1492 * hdrm wqes must be integral since both sq_wqe_size &
1493 * HERMON_QP_OH_SIZE are power of 2
1494 */
1495 qp->qp_sq_hdrmwqes = (qp->qp_sq_headroom / sq_wqe_size);
1496
1497
1498 /*
1499 * Calculate the appropriate size for the work queues.
1500 * For send queue, add in the headroom wqes to the calculation.
1501 * Note: All Hermon QP work queues must be a power-of-2 in size. Also
1502 * they may not be any smaller than HERMON_QP_MIN_SIZE. This step is
1503 * to round the requested size up to the next highest power-of-2
1504 */
1505 /* first, adjust to a minimum and tell the caller the change */
1506 attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq,
1507 HERMON_QP_MIN_SIZE);
1508 attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq,
1509 HERMON_QP_MIN_SIZE);
1510 /*
1511 * now, calculate the alloc size, taking into account
1512 * the headroom for the sq
1513 */
1514 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes);
1515 /* if the total is a power of two, reduce it */
1516 if (ISP2(attr_p->qp_sizes.cs_sq + qp->qp_sq_hdrmwqes)) {
1517 log_qp_sq_size = log_qp_sq_size - 1;
1518 }
1519
1520 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
1521 if (ISP2(attr_p->qp_sizes.cs_rq)) {
1522 log_qp_rq_size = log_qp_rq_size - 1;
1523 }
1524
1525 /*
1526 * Next we verify that the rounded-up size is valid (i.e. consistent
1527 * with the device limits and/or software-configured limits). If not,
1528 * then obviously we have a lot of cleanup to do before returning.
1529 *
1530 * NOTE: the first condition deals with the (test) case of cs_sq
1531 * being just less than 2^32. In this case, the headroom addition
1532 * to the requested cs_sq will pass the test when it should not.
1533 * This test no longer lets that case slip through the check.
1534 */
1535 if ((attr_p->qp_sizes.cs_sq >
1536 (1 << state->hs_cfg_profile->cp_log_max_qp_sz)) ||
1537 (log_qp_sq_size > state->hs_cfg_profile->cp_log_max_qp_sz) ||
1538 (!qp_srq_en && (log_qp_rq_size >
1539 state->hs_cfg_profile->cp_log_max_qp_sz))) {
1540 status = IBT_HCA_WR_EXCEEDED;
1541 goto qpalloc_fail7;
1542 }
1543
1544 /*
1545 * Allocate the memory for QP work queues. Since Hermon work queues
1546 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
1547 * the work queue memory is very important. We used to allocate
1548 * work queues (the combined receive and send queues) so that they
1549 * would be aligned on their combined size. That alignment guaranteed
1550 * that they would never cross the 4GB boundary (Hermon work queues
1551 * are on the order of MBs at maximum). Now we are able to relax
1552 * this alignment constraint by ensuring that the IB address assigned
1553 * to the queue memory (as a result of the hermon_mr_register() call)
1554 * is offset from zero.
1555 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
1556 * guarantee the alignment, but when attempting to use IOMMU bypass
1557 * mode we found that we were not allowed to specify any alignment
1558 * that was more restrictive than the system page size.
1559 * So we avoided this constraint by passing two alignment values,
1560 * one for the memory allocation itself and the other for the DMA
1561 * handle (for later bind). This used to cause more memory than
1562 * necessary to be allocated (in order to guarantee the more
1563 * restrictive alignment contraint). But by guaranteeing the
1564 * zero-based IB virtual address for the queue, we are able to
1565 * conserve this memory.
1566 */
1567 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
1568 sq_depth = 1 << log_qp_sq_size;
1569 sq_size = sq_depth * sq_wqe_size;
1570
1571 /* QP on SRQ sets these to 0 */
1572 if (qp_srq_en) {
1573 rq_wqe_size = 0;
1574 rq_size = 0;
1575 } else {
1576 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
1577 rq_depth = 1 << log_qp_rq_size;
1578 rq_size = rq_depth * rq_wqe_size;
1579 }
1580
1581 qp->qp_wqinfo.qa_size = sq_size + rq_size;
1582 qp->qp_wqinfo.qa_alloc_align = PAGESIZE;
1583 qp->qp_wqinfo.qa_bind_align = PAGESIZE;
1584 qp->qp_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
1585 status = hermon_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
1586 if (status != DDI_SUCCESS) {
1587 status = IBT_INSUFF_RESOURCE;
1588 goto qpalloc_fail7;
1589 }
1590
1591 /*
1592 * Sort WQs in memory according to stride (*q_wqe_size), largest first
1593 * If they are equal, still put the SQ first
1594 */
1595 qp->qp_sq_baseaddr = 0;
1596 qp->qp_rq_baseaddr = 0;
1597 if ((sq_wqe_size > rq_wqe_size) || (sq_wqe_size == rq_wqe_size)) {
1598 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
1599
1600 /* if this QP is on an SRQ, set the rq_buf to NULL */
1601 if (qp_srq_en) {
1602 rq_buf = NULL;
1603 } else {
1604 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
1605 qp->qp_rq_baseaddr = sq_size;
1606 }
1607 } else {
1608 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
1609 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
1610 qp->qp_sq_baseaddr = rq_size;
1611 }
1612
1613 qp->qp_sq_wqhdr = hermon_wrid_wqhdr_create(sq_depth);
1614 if (qp->qp_sq_wqhdr == NULL) {
1615 status = IBT_INSUFF_RESOURCE;
1616 goto qpalloc_fail8;
1617 }
1618 if (qp_srq_en) {
1619 qp->qp_rq_wqavl.wqa_wq = srq->srq_wq_wqhdr;
1620 qp->qp_rq_wqavl.wqa_srq_en = 1;
1621 qp->qp_rq_wqavl.wqa_srq = srq;
1622 } else {
1623 qp->qp_rq_wqhdr = hermon_wrid_wqhdr_create(rq_depth);
1624 if (qp->qp_rq_wqhdr == NULL) {
1625 status = IBT_INSUFF_RESOURCE;
1626 goto qpalloc_fail8;
1627 }
1628 qp->qp_rq_wqavl.wqa_wq = qp->qp_rq_wqhdr;
1629 }
1630 qp->qp_sq_wqavl.wqa_qpn = qp->qp_qpnum;
1631 qp->qp_sq_wqavl.wqa_type = HERMON_WR_SEND;
1632 qp->qp_sq_wqavl.wqa_wq = qp->qp_sq_wqhdr;
1633 qp->qp_rq_wqavl.wqa_qpn = qp->qp_qpnum;
1634 qp->qp_rq_wqavl.wqa_type = HERMON_WR_RECV;
1635
1636 /*
1637 * Register the memory for the QP work queues. The memory for the
1638 * QP must be registered in the Hermon cMPT tables. This gives us the
1639 * LKey to specify in the QP context later. Note: The memory for
1640 * Hermon work queues (both Send and Recv) must be contiguous and
1641 * registered as a single memory region. Note: If the QP memory is
1642 * user-mappable, force DDI_DMA_CONSISTENT mapping. Also, in order to
1643 * meet the alignment restriction, we pass the "mro_bind_override_addr"
1644 * flag in the call to hermon_mr_register(). This guarantees that the
1645 * resulting IB vaddr will be zero-based (modulo the offset into the
1646 * first page). If we fail here, we still have the bunch of resource
1647 * and reference count cleanup to do.
1648 */
1649 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
1650 IBT_MR_NOSLEEP;
1651 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1652 mr_attr.mr_len = qp->qp_wqinfo.qa_size;
1653 mr_attr.mr_as = NULL;
1654 mr_attr.mr_flags = flag;
1655 /* HERMON_QUEUE_LOCATION_NORMAL */
1656 mr_op.mro_bind_type =
1657 state->hs_cfg_profile->cp_iommu_bypass;
1658 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
1659 mr_op.mro_bind_override_addr = 1;
1660 status = hermon_mr_register(state, pd, &mr_attr, &mr,
1661 &mr_op, HERMON_QP_CMPT);
1662 if (status != DDI_SUCCESS) {
1663 status = IBT_INSUFF_RESOURCE;
1664 goto qpalloc_fail9;
1665 }
1666
1667 /*
1668 * Calculate the offset between the kernel virtual address space
1669 * and the IB virtual address space. This will be used when
1670 * posting work requests to properly initialize each WQE.
1671 */
1672 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
1673 (uint64_t)mr->mr_bindinfo.bi_addr;
1674
1675 /*
1676 * Fill in all the return arguments (if necessary). This includes
1677 * real work queue sizes (in wqes), real SGLs, and QP number
1678 */
1679 if (queuesz_p != NULL) {
1680 queuesz_p->cs_sq =
1681 (1 << log_qp_sq_size) - qp->qp_sq_hdrmwqes;
1682 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
1683
1684 /* if this QP is on an SRQ, set these to 0 */
1685 if (qp_srq_en) {
1686 queuesz_p->cs_rq = 0;
1687 queuesz_p->cs_rq_sgl = 0;
1688 } else {
1689 queuesz_p->cs_rq = (1 << log_qp_rq_size);
1690 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
1691 }
1692 }
1693
1694 /*
1695 * Fill in the rest of the Hermon Queue Pair handle.
1696 */
1697 qp->qp_qpcrsrcp = NULL;
1698 qp->qp_rsrcp = rsrc;
1699 qp->qp_state = HERMON_QP_RESET;
1700 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1701 qp->qp_pdhdl = pd;
1702 qp->qp_mrhdl = mr;
1703 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
1704 HERMON_QP_SQ_WR_SIGNALED : HERMON_QP_SQ_ALL_SIGNALED;
1705 qp->qp_is_special = 0;
1706 qp->qp_uarpg = uarpg;
1707 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1708 qp->qp_sq_cqhdl = sq_cq;
1709 qp->qp_sq_bufsz = (1 << log_qp_sq_size);
1710 qp->qp_sq_logqsz = log_qp_sq_size;
1711 qp->qp_sq_buf = sq_buf;
1712 qp->qp_desc_off = qp_desc_off;
1713 qp->qp_rq_cqhdl = rq_cq;
1714 qp->qp_rq_buf = rq_buf;
1715 qp->qp_rlky = (attr_p->qp_flags & IBT_FAST_REG_RES_LKEY) !=
1716 0;
1717
1718 /* if this QP is on an SRQ, set rq_bufsz to 0 */
1719 if (qp_srq_en) {
1720 qp->qp_rq_bufsz = 0;
1721 qp->qp_rq_logqsz = 0;
1722 } else {
1723 qp->qp_rq_bufsz = (1 << log_qp_rq_size);
1724 qp->qp_rq_logqsz = log_qp_rq_size;
1725 }
1726
1727 qp->qp_forward_sqd_event = 0;
1728 qp->qp_sqd_still_draining = 0;
1729 qp->qp_hdlrarg = (void *)ibt_qphdl[ii];
1730 qp->qp_mcg_refcnt = 0;
1731
1732 /*
1733 * If this QP is to be associated with an SRQ, set the SRQ handle
1734 */
1735 if (qp_srq_en) {
1736 qp->qp_srqhdl = srq;
1737 hermon_srq_refcnt_inc(qp->qp_srqhdl);
1738 } else {
1739 qp->qp_srqhdl = NULL;
1740 }
1741
1742 qp->qp_type = IBT_UD_RQP;
1743 qp->qp_serv_type = serv_type;
1744
1745 /*
1746 * Initialize the RQ WQEs - unlike Arbel, no Rcv init is needed
1747 */
1748
1749 /*
1750 * Initialize the SQ WQEs - all that needs to be done is every 64 bytes
1751 * set the quadword to all F's - high-order bit is owner (init to one)
1752 * and the rest for the headroom definition of prefetching.
1753 */
1754 if ((attr_p->qp_alloc_flags & IBT_QP_USES_FEXCH) == 0) {
1755 wqesz_shift = qp->qp_sq_log_wqesz;
1756 thewqesz = 1 << wqesz_shift;
1757 thewqe = (uint64_t *)(void *)(qp->qp_sq_buf);
1758 for (i = 0; i < sq_depth; i++) {
1759 /*
1760 * for each stride, go through and every 64 bytes
1761 * write the init value - having set the address
1762 * once, just keep incrementing it
1763 */
1764 for (j = 0; j < thewqesz; j += 64, thewqe += 8) {
1765 *(uint32_t *)thewqe = 0xFFFFFFFF;
1766 }
1767 }
1768 }
1769
1770 /* Zero out the QP context */
1771 bzero(&qp->qpc, sizeof (hermon_hw_qpc_t));
1772
1773 /*
1774 * Put QP handle in Hermon QPNum-to-QPHdl list. Then fill in the
1775 * "qphdl" and return success
1776 */
1777 hermon_icm_set_num_to_hdl(state, HERMON_QPC, qpc->hr_indx + ii, qp);
1778
1779 mutex_init(&qp->qp_sq_lock, NULL, MUTEX_DRIVER,
1780 DDI_INTR_PRI(state->hs_intrmsi_pri));
1781
1782 qp->qp_rangep = qp_range_p;
1783
1784 qphdl[ii] = qp;
1785
1786 if (++ii < (1 << log2))
1787 goto for_each_qp;
1788
1789 return (DDI_SUCCESS);
1790
1791 /*
1792 * The following is cleanup for all possible failure cases in this routine
1793 */
1794 qpalloc_fail9:
1795 hermon_queue_free(&qp->qp_wqinfo);
1796 qpalloc_fail8:
1797 if (qp->qp_sq_wqhdr)
1798 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
1799 if (qp->qp_rq_wqhdr)
1800 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
1801 qpalloc_fail7:
1802 if (!qp_srq_en) {
1803 hermon_dbr_free(state, uarpg, qp->qp_rq_vdbr);
1804 }
1805
1806 qpalloc_fail6:
1807 hermon_rsrc_free(state, &rsrc);
1808 qpalloc_fail4:
1809 hermon_cq_refcnt_dec(rq_cq);
1810 qpalloc_fail2:
1811 hermon_cq_refcnt_dec(sq_cq);
1812 qpalloc_fail1:
1813 hermon_pd_refcnt_dec(pd);
1814 qpalloc_fail0:
1815 if (ii == 0) {
1816 if (qp_range_p)
1817 kmem_free(qp_range_p, sizeof (*qp_range_p));
1818 hermon_rsrc_free(state, &qpc);
1819 } else {
1820 /* qp_range_p and qpc rsrc will be freed in hermon_qp_free */
1821
1822 mutex_enter(&qp->qp_rangep->hqpr_lock);
1823 qp_range_p->hqpr_refcnt = ii;
1824 mutex_exit(&qp->qp_rangep->hqpr_lock);
1825 while (--ii >= 0) {
1826 ibc_qpn_hdl_t qpn_hdl;
1827 int free_status;
1828
1829 free_status = hermon_qp_free(state, &qphdl[ii],
1830 IBC_FREE_QP_AND_QPN, &qpn_hdl, sleepflag);
1831 if (free_status != DDI_SUCCESS)
1832 cmn_err(CE_CONT, "!qp_range: status 0x%x: "
1833 "error status %x during free",
1834 status, free_status);
1835 }
1836 }
1837
1838 return (status);
1839 }
1840
1841
1842 /*
1843 * hermon_qp_free()
1844 * This function frees up the QP resources. Depending on the value
1845 * of the "free_qp_flags", the QP number may not be released until
1846 * a subsequent call to hermon_qp_release_qpn().
1847 *
1848 * Context: Can be called only from user or kernel context.
1849 */
1850 /* ARGSUSED */
1851 int
1852 hermon_qp_free(hermon_state_t *state, hermon_qphdl_t *qphdl,
1853 ibc_free_qp_flags_t free_qp_flags, ibc_qpn_hdl_t *qpnh,
1854 uint_t sleepflag)
1855 {
1856 hermon_rsrc_t *qpc, *rsrc;
1857 hermon_umap_db_entry_t *umapdb;
1858 hermon_qpn_entry_t *entry;
1859 hermon_pdhdl_t pd;
1860 hermon_mrhdl_t mr;
1861 hermon_cqhdl_t sq_cq, rq_cq;
1862 hermon_srqhdl_t srq;
1863 hermon_qphdl_t qp;
1864 uint64_t value;
1865 uint_t type, port;
1866 uint_t maxprot;
1867 uint_t qp_srq_en;
1868 int status;
1869
1870 /*
1871 * Pull all the necessary information from the Hermon Queue Pair
1872 * handle. This is necessary here because the resource for the
1873 * QP handle is going to be freed up as part of this operation.
1874 */
1875 qp = *qphdl;
1876 mutex_enter(&qp->qp_lock);
1877 qpc = qp->qp_qpcrsrcp; /* NULL if part of a "range" */
1878 rsrc = qp->qp_rsrcp;
1879 pd = qp->qp_pdhdl;
1880 srq = qp->qp_srqhdl;
1881 mr = qp->qp_mrhdl;
1882 rq_cq = qp->qp_rq_cqhdl;
1883 sq_cq = qp->qp_sq_cqhdl;
1884 port = qp->qp_portnum;
1885 qp_srq_en = qp->qp_alloc_flags & IBT_QP_USES_SRQ;
1886
1887 /*
1888 * If the QP is part of an MCG, then we fail the qp_free
1889 */
1890 if (qp->qp_mcg_refcnt != 0) {
1891 mutex_exit(&qp->qp_lock);
1892 status = ibc_get_ci_failure(0);
1893 goto qpfree_fail;
1894 }
1895
1896 /*
1897 * If the QP is not already in "Reset" state, then transition to
1898 * "Reset". This is necessary because software does not reclaim
1899 * ownership of the QP context until the QP is in the "Reset" state.
1900 * If the ownership transfer fails for any reason, then it is an
1901 * indication that something (either in HW or SW) has gone seriously
1902 * wrong. So we print a warning message and return.
1903 */
1904 if (qp->qp_state != HERMON_QP_RESET) {
1905 if (hermon_qp_to_reset(state, qp) != DDI_SUCCESS) {
1906 mutex_exit(&qp->qp_lock);
1907 HERMON_WARNING(state, "failed to reset QP context");
1908 status = ibc_get_ci_failure(0);
1909 goto qpfree_fail;
1910 }
1911 qp->qp_state = HERMON_QP_RESET;
1912 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_RESET);
1913
1914 /*
1915 * Do any additional handling necessary for the transition
1916 * to the "Reset" state (e.g. update the WRID lists)
1917 */
1918 if (hermon_wrid_to_reset_handling(state, qp) != DDI_SUCCESS) {
1919 mutex_exit(&qp->qp_lock);
1920 HERMON_WARNING(state, "failed to reset QP WRID list");
1921 status = ibc_get_ci_failure(0);
1922 goto qpfree_fail;
1923 }
1924 }
1925
1926 /*
1927 * If this was a user-mappable QP, then we need to remove its entry
1928 * from the "userland resources database". If it is also currently
1929 * mmap()'d out to a user process, then we need to call
1930 * devmap_devmem_remap() to remap the QP memory to an invalid mapping.
1931 * We also need to invalidate the QP tracking information for the
1932 * user mapping.
1933 */
1934 if (qp->qp_alloc_flags & IBT_QP_USER_MAP) {
1935 status = hermon_umap_db_find(state->hs_instance, qp->qp_qpnum,
1936 MLNX_UMAP_QPMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
1937 &umapdb);
1938 if (status != DDI_SUCCESS) {
1939 mutex_exit(&qp->qp_lock);
1940 HERMON_WARNING(state, "failed to find in database");
1941 return (ibc_get_ci_failure(0));
1942 }
1943 hermon_umap_db_free(umapdb);
1944 if (qp->qp_umap_dhp != NULL) {
1945 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
1946 status = devmap_devmem_remap(qp->qp_umap_dhp,
1947 state->hs_dip, 0, 0, qp->qp_wqinfo.qa_size,
1948 maxprot, DEVMAP_MAPPING_INVALID, NULL);
1949 if (status != DDI_SUCCESS) {
1950 mutex_exit(&qp->qp_lock);
1951 HERMON_WARNING(state, "failed in QP memory "
1952 "devmap_devmem_remap()");
1953 return (ibc_get_ci_failure(0));
1954 }
1955 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1956 }
1957 }
1958
1959
1960 /*
1961 * Put NULL into the Hermon QPNum-to-QPHdl list. This will allow any
1962 * in-progress events to detect that the QP corresponding to this
1963 * number has been freed. Note: it does depend in whether we are
1964 * freeing a special QP or not.
1965 */
1966 if (qpc == NULL) {
1967 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1968 qp->qp_qpnum, NULL);
1969 } else if (qp->qp_is_special) {
1970 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1971 qpc->hr_indx + port, NULL);
1972 } else {
1973 hermon_icm_set_num_to_hdl(state, HERMON_QPC,
1974 qpc->hr_indx, NULL);
1975 }
1976
1977 /*
1978 * Drop the QP lock
1979 * At this point the lock is no longer necessary. We cannot
1980 * protect from multiple simultaneous calls to free the same QP.
1981 * In addition, since the QP lock is contained in the QP "software
1982 * handle" resource, which we will free (see below), it is
1983 * important that we have no further references to that memory.
1984 */
1985 mutex_exit(&qp->qp_lock);
1986
1987 /*
1988 * Free the QP resources
1989 * Start by deregistering and freeing the memory for work queues.
1990 * Next free any previously allocated context information
1991 * (depending on QP type)
1992 * Finally, decrement the necessary reference counts.
1993 * If this fails for any reason, then it is an indication that
1994 * something (either in HW or SW) has gone seriously wrong. So we
1995 * print a warning message and return.
1996 */
1997 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
1998 sleepflag);
1999 if (status != DDI_SUCCESS) {
2000 HERMON_WARNING(state, "failed to deregister QP memory");
2001 status = ibc_get_ci_failure(0);
2002 goto qpfree_fail;
2003 }
2004
2005 /* Free the memory for the QP */
2006 hermon_queue_free(&qp->qp_wqinfo);
2007
2008 if (qp->qp_sq_wqhdr)
2009 hermon_wrid_wqhdr_destroy(qp->qp_sq_wqhdr);
2010 if (qp->qp_rq_wqhdr)
2011 hermon_wrid_wqhdr_destroy(qp->qp_rq_wqhdr);
2012
2013 /* Free the dbr */
2014 if (!qp_srq_en) {
2015 hermon_dbr_free(state, qp->qp_uarpg, qp->qp_rq_vdbr);
2016 }
2017
2018 /*
2019 * Free up the remainder of the QP resources. Note: we have a few
2020 * different resources to free up depending on whether the QP is a
2021 * special QP or not. As described above, if any of these fail for
2022 * any reason it is an indication that something (either in HW or SW)
2023 * has gone seriously wrong. So we print a warning message and
2024 * return.
2025 */
2026 if (qp->qp_is_special) {
2027 type = (qp->qp_is_special == HERMON_QP_SMI) ?
2028 IBT_SMI_SQP : IBT_GSI_SQP;
2029
2030 /* Free up resources for the special QP */
2031 status = hermon_special_qp_rsrc_free(state, type, port);
2032 if (status != DDI_SUCCESS) {
2033 HERMON_WARNING(state, "failed to free special QP rsrc");
2034 status = ibc_get_ci_failure(0);
2035 goto qpfree_fail;
2036 }
2037
2038 } else if (qp->qp_rangep) {
2039 int refcnt;
2040 mutex_enter(&qp->qp_rangep->hqpr_lock);
2041 refcnt = --qp->qp_rangep->hqpr_refcnt;
2042 mutex_exit(&qp->qp_rangep->hqpr_lock);
2043 if (refcnt == 0) {
2044 mutex_destroy(&qp->qp_rangep->hqpr_lock);
2045 hermon_rsrc_free(state, &qp->qp_rangep->hqpr_qpcrsrc);
2046 kmem_free(qp->qp_rangep, sizeof (*qp->qp_rangep));
2047 }
2048 qp->qp_rangep = NULL;
2049 } else if (qp->qp_qpn_hdl == NULL) {
2050 hermon_rsrc_free(state, &qpc);
2051 } else {
2052 /*
2053 * Check the flags and determine whether to release the
2054 * QPN or not, based on their value.
2055 */
2056 if (free_qp_flags == IBC_FREE_QP_ONLY) {
2057 entry = qp->qp_qpn_hdl;
2058 hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
2059 HERMON_QPN_FREE_ONLY);
2060 *qpnh = (ibc_qpn_hdl_t)entry;
2061 } else {
2062 hermon_qp_release_qpn(state, qp->qp_qpn_hdl,
2063 HERMON_QPN_RELEASE);
2064 }
2065 }
2066
2067 mutex_destroy(&qp->qp_sq_lock);
2068
2069 /* Free the Hermon Queue Pair handle */
2070 hermon_rsrc_free(state, &rsrc);
2071
2072 /* Decrement the reference counts on CQs, PD and SRQ (if needed) */
2073 hermon_cq_refcnt_dec(rq_cq);
2074 hermon_cq_refcnt_dec(sq_cq);
2075 hermon_pd_refcnt_dec(pd);
2076 if (qp_srq_en == HERMON_QP_SRQ_ENABLED) {
2077 hermon_srq_refcnt_dec(srq);
2078 }
2079
2080 /* Set the qphdl pointer to NULL and return success */
2081 *qphdl = NULL;
2082
2083 return (DDI_SUCCESS);
2084
2085 qpfree_fail:
2086 return (status);
2087 }
2088
2089
2090 /*
2091 * hermon_qp_query()
2092 * Context: Can be called from interrupt or base context.
2093 */
2094 int
2095 hermon_qp_query(hermon_state_t *state, hermon_qphdl_t qp,
2096 ibt_qp_query_attr_t *attr_p)
2097 {
2098 ibt_cep_state_t qp_state;
2099 ibt_qp_ud_attr_t *ud;
2100 ibt_qp_rc_attr_t *rc;
2101 ibt_qp_uc_attr_t *uc;
2102 ibt_cep_flags_t enable_flags;
2103 hermon_hw_addr_path_t *qpc_path, *qpc_alt_path;
2104 ibt_cep_path_t *path_ptr, *alt_path_ptr;
2105 hermon_hw_qpc_t *qpc;
2106 int status;
2107 uint_t tmp_sched_q, tmp_alt_sched_q;
2108
2109 mutex_enter(&qp->qp_lock);
2110
2111 /*
2112 * Grab the temporary QPC entry from QP software state
2113 */
2114 qpc = &qp->qpc;
2115
2116 /* Convert the current Hermon QP state to IBTF QP state */
2117 switch (qp->qp_state) {
2118 case HERMON_QP_RESET:
2119 qp_state = IBT_STATE_RESET; /* "Reset" */
2120 break;
2121 case HERMON_QP_INIT:
2122 qp_state = IBT_STATE_INIT; /* Initialized */
2123 break;
2124 case HERMON_QP_RTR:
2125 qp_state = IBT_STATE_RTR; /* Ready to Receive */
2126 break;
2127 case HERMON_QP_RTS:
2128 qp_state = IBT_STATE_RTS; /* Ready to Send */
2129 break;
2130 case HERMON_QP_SQERR:
2131 qp_state = IBT_STATE_SQE; /* Send Queue Error */
2132 break;
2133 case HERMON_QP_SQD:
2134 if (qp->qp_sqd_still_draining) {
2135 qp_state = IBT_STATE_SQDRAIN; /* SQ Draining */
2136 } else {
2137 qp_state = IBT_STATE_SQD; /* SQ Drained */
2138 }
2139 break;
2140 case HERMON_QP_ERR:
2141 qp_state = IBT_STATE_ERROR; /* Error */
2142 break;
2143 default:
2144 mutex_exit(&qp->qp_lock);
2145 return (ibc_get_ci_failure(0));
2146 }
2147 attr_p->qp_info.qp_state = qp_state;
2148
2149 /* SRQ Hook. */
2150 attr_p->qp_srq = NULL;
2151
2152 /*
2153 * The following QP information is always returned, regardless of
2154 * the current QP state. Note: Some special handling is necessary
2155 * for calculating the QP number on special QP (QP0 and QP1).
2156 */
2157 attr_p->qp_sq_cq =
2158 (qp->qp_sq_cqhdl == NULL) ? NULL : qp->qp_sq_cqhdl->cq_hdlrarg;
2159 attr_p->qp_rq_cq =
2160 (qp->qp_rq_cqhdl == NULL) ? NULL : qp->qp_rq_cqhdl->cq_hdlrarg;
2161 if (qp->qp_is_special) {
2162 attr_p->qp_qpn = (qp->qp_is_special == HERMON_QP_SMI) ? 0 : 1;
2163 } else {
2164 attr_p->qp_qpn = (ib_qpn_t)qp->qp_qpnum;
2165 }
2166 attr_p->qp_sq_sgl = qp->qp_sq_sgl;
2167 attr_p->qp_rq_sgl = qp->qp_rq_sgl;
2168 attr_p->qp_info.qp_sq_sz = qp->qp_sq_bufsz - qp->qp_sq_hdrmwqes;
2169 attr_p->qp_info.qp_rq_sz = qp->qp_rq_bufsz;
2170
2171 /*
2172 * If QP is currently in the "Reset" state, then only the above are
2173 * returned
2174 */
2175 if (qp_state == IBT_STATE_RESET) {
2176 mutex_exit(&qp->qp_lock);
2177 return (DDI_SUCCESS);
2178 }
2179
2180 /*
2181 * Post QUERY_QP command to firmware
2182 *
2183 * We do a HERMON_NOSLEEP here because we are holding the "qp_lock".
2184 * Since we may be in the interrupt context (or subsequently raised
2185 * to interrupt level by priority inversion), we do not want to block
2186 * in this routine waiting for success.
2187 */
2188 tmp_sched_q = qpc->pri_addr_path.sched_q;
2189 tmp_alt_sched_q = qpc->alt_addr_path.sched_q;
2190 status = hermon_cmn_query_cmd_post(state, QUERY_QP, 0, qp->qp_qpnum,
2191 qpc, sizeof (hermon_hw_qpc_t), HERMON_CMD_NOSLEEP_SPIN);
2192 if (status != HERMON_CMD_SUCCESS) {
2193 mutex_exit(&qp->qp_lock);
2194 cmn_err(CE_WARN, "hermon%d: hermon_qp_query: QUERY_QP "
2195 "command failed: %08x\n", state->hs_instance, status);
2196 if (status == HERMON_CMD_INVALID_STATUS) {
2197 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
2198 }
2199 return (ibc_get_ci_failure(0));
2200 }
2201 qpc->pri_addr_path.sched_q = tmp_sched_q;
2202 qpc->alt_addr_path.sched_q = tmp_alt_sched_q;
2203
2204 /*
2205 * Fill in the additional QP info based on the QP's transport type.
2206 */
2207 if (qp->qp_type == IBT_UD_RQP) {
2208
2209 /* Fill in the UD-specific info */
2210 ud = &attr_p->qp_info.qp_transport.ud;
2211 ud->ud_qkey = (ib_qkey_t)qpc->qkey;
2212 ud->ud_sq_psn = qpc->next_snd_psn;
2213 ud->ud_pkey_ix = qpc->pri_addr_path.pkey_indx;
2214 /* port+1 for port 1/2 */
2215 ud->ud_port =
2216 (uint8_t)(((qpc->pri_addr_path.sched_q >> 6) & 0x01) + 1);
2217
2218 attr_p->qp_info.qp_trans = IBT_UD_SRV;
2219
2220 if (qp->qp_serv_type == HERMON_QP_FEXCH) {
2221 ibt_pmr_desc_t *pmr;
2222 uint64_t heart_beat;
2223
2224 pmr = &attr_p->qp_query_fexch.fq_uni_mem_desc;
2225 pmr->pmd_iova = 0;
2226 pmr->pmd_lkey = pmr->pmd_rkey =
2227 hermon_fcoib_qpn_to_mkey(state, qp->qp_qpnum);
2228 pmr->pmd_phys_buf_list_sz =
2229 state->hs_fcoib.hfc_mtts_per_mpt;
2230 pmr->pmd_sync_required = 0;
2231
2232 pmr = &attr_p->qp_query_fexch.fq_bi_mem_desc;
2233 pmr->pmd_iova = 0;
2234 pmr->pmd_lkey = 0;
2235 pmr->pmd_rkey = 0;
2236 pmr->pmd_phys_buf_list_sz = 0;
2237 pmr->pmd_sync_required = 0;
2238
2239 attr_p->qp_query_fexch.fq_flags =
2240 ((hermon_get_heart_beat_rq_cmd_post(state,
2241 qp->qp_qpnum, &heart_beat) == HERMON_CMD_SUCCESS) &&
2242 (heart_beat == 0)) ? IBT_FEXCH_HEART_BEAT_OK :
2243 IBT_FEXCH_NO_FLAGS;
2244
2245 ud->ud_fc = qp->qp_fc_attr;
2246 } else if (qp->qp_serv_type == HERMON_QP_FCMND ||
2247 qp->qp_serv_type == HERMON_QP_RFCI) {
2248 ud->ud_fc = qp->qp_fc_attr;
2249 }
2250
2251 } else if (qp->qp_serv_type == HERMON_QP_RC) {
2252
2253 /* Fill in the RC-specific info */
2254 rc = &attr_p->qp_info.qp_transport.rc;
2255 rc->rc_sq_psn = qpc->next_snd_psn;
2256 rc->rc_rq_psn = qpc->next_rcv_psn;
2257 rc->rc_dst_qpn = qpc->rem_qpn;
2258
2259 /* Grab the path migration state information */
2260 if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) {
2261 rc->rc_mig_state = IBT_STATE_MIGRATED;
2262 } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) {
2263 rc->rc_mig_state = IBT_STATE_REARMED;
2264 } else {
2265 rc->rc_mig_state = IBT_STATE_ARMED;
2266 }
2267 rc->rc_rdma_ra_out = (1 << qpc->sra_max);
2268 rc->rc_rdma_ra_in = (1 << qpc->rra_max);
2269 rc->rc_min_rnr_nak = qpc->min_rnr_nak;
2270 rc->rc_path_mtu = qpc->mtu;
2271 rc->rc_retry_cnt = qpc->retry_cnt;
2272
2273 /* Get the common primary address path fields */
2274 qpc_path = &qpc->pri_addr_path;
2275 path_ptr = &rc->rc_path;
2276 hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
2277 HERMON_ADDRPATH_QP);
2278
2279 /* Fill in the additional primary address path fields */
2280 path_ptr->cep_pkey_ix = qpc_path->pkey_indx;
2281 path_ptr->cep_hca_port_num =
2282 path_ptr->cep_adds_vect.av_port_num =
2283 (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1);
2284 path_ptr->cep_timeout = qpc_path->ack_timeout;
2285
2286 /* Get the common alternate address path fields */
2287 qpc_alt_path = &qpc->alt_addr_path;
2288 alt_path_ptr = &rc->rc_alt_path;
2289 hermon_get_addr_path(state, qpc_alt_path,
2290 &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP);
2291
2292 /* Fill in the additional alternate address path fields */
2293 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx;
2294 alt_path_ptr->cep_hca_port_num =
2295 alt_path_ptr->cep_adds_vect.av_port_num =
2296 (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1);
2297 alt_path_ptr->cep_timeout = qpc_alt_path->ack_timeout;
2298
2299 /* Get the RNR retry time from primary path */
2300 rc->rc_rnr_retry_cnt = qpc->rnr_retry;
2301
2302 /* Set the enable flags based on RDMA/Atomic enable bits */
2303 enable_flags = IBT_CEP_NO_FLAGS;
2304 enable_flags |= ((qpc->rre == 0) ? 0 : IBT_CEP_RDMA_RD);
2305 enable_flags |= ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
2306 enable_flags |= ((qpc->rae == 0) ? 0 : IBT_CEP_ATOMIC);
2307 attr_p->qp_info.qp_flags = enable_flags;
2308
2309 attr_p->qp_info.qp_trans = IBT_RC_SRV;
2310
2311 } else if (qp->qp_serv_type == HERMON_QP_UC) {
2312
2313 /* Fill in the UC-specific info */
2314 uc = &attr_p->qp_info.qp_transport.uc;
2315 uc->uc_sq_psn = qpc->next_snd_psn;
2316 uc->uc_rq_psn = qpc->next_rcv_psn;
2317 uc->uc_dst_qpn = qpc->rem_qpn;
2318
2319 /* Grab the path migration state information */
2320 if (qpc->pm_state == HERMON_QP_PMSTATE_MIGRATED) {
2321 uc->uc_mig_state = IBT_STATE_MIGRATED;
2322 } else if (qpc->pm_state == HERMON_QP_PMSTATE_REARM) {
2323 uc->uc_mig_state = IBT_STATE_REARMED;
2324 } else {
2325 uc->uc_mig_state = IBT_STATE_ARMED;
2326 }
2327 uc->uc_path_mtu = qpc->mtu;
2328
2329 /* Get the common primary address path fields */
2330 qpc_path = &qpc->pri_addr_path;
2331 path_ptr = &uc->uc_path;
2332 hermon_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
2333 HERMON_ADDRPATH_QP);
2334
2335 /* Fill in the additional primary address path fields */
2336 path_ptr->cep_pkey_ix = qpc_path->pkey_indx;
2337 path_ptr->cep_hca_port_num =
2338 path_ptr->cep_adds_vect.av_port_num =
2339 (uint8_t)(((qpc_path->sched_q >> 6) & 0x01) + 1);
2340
2341 /* Get the common alternate address path fields */
2342 qpc_alt_path = &qpc->alt_addr_path;
2343 alt_path_ptr = &uc->uc_alt_path;
2344 hermon_get_addr_path(state, qpc_alt_path,
2345 &alt_path_ptr->cep_adds_vect, HERMON_ADDRPATH_QP);
2346
2347 /* Fill in the additional alternate address path fields */
2348 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx;
2349 alt_path_ptr->cep_hca_port_num =
2350 alt_path_ptr->cep_adds_vect.av_port_num =
2351 (uint8_t)(((qpc_alt_path->sched_q >> 6) & 0x01) + 1);
2352
2353 /*
2354 * Set the enable flags based on RDMA enable bits (by
2355 * definition UC doesn't support Atomic or RDMA Read)
2356 */
2357 enable_flags = ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
2358 attr_p->qp_info.qp_flags = enable_flags;
2359
2360 attr_p->qp_info.qp_trans = IBT_UC_SRV;
2361
2362 } else {
2363 HERMON_WARNING(state, "unexpected QP transport type");
2364 mutex_exit(&qp->qp_lock);
2365 return (ibc_get_ci_failure(0));
2366 }
2367
2368 /*
2369 * Under certain circumstances it is possible for the Hermon hardware
2370 * to transition to one of the error states without software directly
2371 * knowing about it. The QueryQP() call is the one place where we
2372 * have an opportunity to sample and update our view of the QP state.
2373 */
2374 if (qpc->state == HERMON_QP_SQERR) {
2375 attr_p->qp_info.qp_state = IBT_STATE_SQE;
2376 qp->qp_state = HERMON_QP_SQERR;
2377 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_SQERR);
2378 }
2379 if (qpc->state == HERMON_QP_ERR) {
2380 attr_p->qp_info.qp_state = IBT_STATE_ERROR;
2381 qp->qp_state = HERMON_QP_ERR;
2382 HERMON_SET_QP_POST_SEND_STATE(qp, HERMON_QP_ERR);
2383 }
2384 mutex_exit(&qp->qp_lock);
2385
2386 return (DDI_SUCCESS);
2387 }
2388
2389
2390 /*
2391 * hermon_qp_create_qpn()
2392 * Context: Can be called from interrupt or base context.
2393 */
2394 static int
2395 hermon_qp_create_qpn(hermon_state_t *state, hermon_qphdl_t qp,
2396 hermon_rsrc_t *qpc)
2397 {
2398 hermon_qpn_entry_t query;
2399 hermon_qpn_entry_t *entry;
2400 avl_index_t where;
2401
2402 /*
2403 * Build a query (for the AVL tree lookup) and attempt to find
2404 * a previously added entry that has a matching QPC index. If
2405 * no matching entry is found, then allocate, initialize, and
2406 * add an entry to the AVL tree.
2407 * If a matching entry is found, then increment its QPN counter
2408 * and reference counter.
2409 */
2410 query.qpn_indx = qpc->hr_indx;
2411 mutex_enter(&state->hs_qpn_avl_lock);
2412 entry = (hermon_qpn_entry_t *)avl_find(&state->hs_qpn_avl,
2413 &query, &where);
2414 if (entry == NULL) {
2415 /*
2416 * Allocate and initialize a QPN entry, then insert
2417 * it into the AVL tree.
2418 */
2419 entry = (hermon_qpn_entry_t *)kmem_zalloc(
2420 sizeof (hermon_qpn_entry_t), KM_NOSLEEP);
2421 if (entry == NULL) {
2422 mutex_exit(&state->hs_qpn_avl_lock);
2423 return (DDI_FAILURE);
2424 }
2425
2426 entry->qpn_indx = qpc->hr_indx;
2427 entry->qpn_refcnt = 0;
2428 entry->qpn_counter = 0;
2429
2430 avl_insert(&state->hs_qpn_avl, entry, where);
2431 }
2432
2433 /*
2434 * Make the AVL tree entry point to the QP context resource that
2435 * it will be responsible for tracking
2436 */
2437 entry->qpn_qpc = qpc;
2438
2439 /*
2440 * Setup the QP handle to point to the AVL tree entry. Then
2441 * generate the new QP number from the entry's QPN counter value
2442 * and the hardware's QP context table index.
2443 */
2444 qp->qp_qpn_hdl = entry;
2445 qp->qp_qpnum = ((entry->qpn_counter <<
2446 state->hs_cfg_profile->cp_log_num_qp) | qpc->hr_indx) &
2447 HERMON_QP_MAXNUMBER_MSK;
2448 qp->qp_ring = qp->qp_qpnum << 8;
2449
2450 /*
2451 * Increment the reference counter and QPN counter. The QPN
2452 * counter always indicates the next available number for use.
2453 */
2454 entry->qpn_counter++;
2455 entry->qpn_refcnt++;
2456
2457 mutex_exit(&state->hs_qpn_avl_lock);
2458
2459 return (DDI_SUCCESS);
2460 }
2461
2462
2463 /*
2464 * hermon_qp_release_qpn()
2465 * Context: Can be called only from user or kernel context.
2466 */
2467 void
2468 hermon_qp_release_qpn(hermon_state_t *state, hermon_qpn_entry_t *entry,
2469 int flags)
2470 {
2471 ASSERT(entry != NULL);
2472
2473 mutex_enter(&state->hs_qpn_avl_lock);
2474
2475 /*
2476 * If we are releasing the QP number here, then we decrement the
2477 * reference count and check for zero references. If there are
2478 * zero references, then we free the QPC context (if it hadn't
2479 * already been freed during a HERMON_QPN_FREE_ONLY free, i.e. for
2480 * reuse with another similar QP number) and remove the tracking
2481 * structure from the QP number AVL tree and free the structure.
2482 * If we are not releasing the QP number here, then, as long as we
2483 * have not exhausted the usefulness of the QPC context (that is,
2484 * re-used it too many times without the reference count having
2485 * gone to zero), we free up the QPC context for use by another
2486 * thread (which will use it to construct a different QP number
2487 * from the same QPC table index).
2488 */
2489 if (flags == HERMON_QPN_RELEASE) {
2490 entry->qpn_refcnt--;
2491
2492 /*
2493 * If the reference count is zero, then we free the QPC
2494 * context (if it hadn't already been freed in an early
2495 * step, e.g. HERMON_QPN_FREE_ONLY) and remove/free the
2496 * tracking structure from the QP number AVL tree.
2497 */
2498 if (entry->qpn_refcnt == 0) {
2499 if (entry->qpn_qpc != NULL) {
2500 hermon_rsrc_free(state, &entry->qpn_qpc);
2501 }
2502
2503 /*
2504 * If the current entry has served it's useful
2505 * purpose (i.e. been reused the maximum allowable
2506 * number of times), then remove it from QP number
2507 * AVL tree and free it up.
2508 */
2509 if (entry->qpn_counter >= (1 <<
2510 (24 - state->hs_cfg_profile->cp_log_num_qp))) {
2511 avl_remove(&state->hs_qpn_avl, entry);
2512 kmem_free(entry, sizeof (hermon_qpn_entry_t));
2513 }
2514 }
2515
2516 } else if (flags == HERMON_QPN_FREE_ONLY) {
2517 /*
2518 * Even if we are not freeing the QP number, that will not
2519 * always prevent us from releasing the QPC context. In fact,
2520 * since the QPC context only forms part of the whole QPN,
2521 * we want to free it up for use by other consumers. But
2522 * if the reference count is non-zero (which it will always
2523 * be when we are doing HERMON_QPN_FREE_ONLY) and the counter
2524 * has reached its maximum value, then we cannot reuse the
2525 * QPC context until the reference count eventually reaches
2526 * zero (in HERMON_QPN_RELEASE, above).
2527 */
2528 if (entry->qpn_counter < (1 <<
2529 (24 - state->hs_cfg_profile->cp_log_num_qp))) {
2530 hermon_rsrc_free(state, &entry->qpn_qpc);
2531 }
2532 }
2533 mutex_exit(&state->hs_qpn_avl_lock);
2534 }
2535
2536
2537 /*
2538 * hermon_qpn_avl_compare()
2539 * Context: Can be called from user or kernel context.
2540 */
2541 static int
2542 hermon_qpn_avl_compare(const void *q, const void *e)
2543 {
2544 hermon_qpn_entry_t *entry, *query;
2545
2546 entry = (hermon_qpn_entry_t *)e;
2547 query = (hermon_qpn_entry_t *)q;
2548
2549 if (query->qpn_indx < entry->qpn_indx) {
2550 return (-1);
2551 } else if (query->qpn_indx > entry->qpn_indx) {
2552 return (+1);
2553 } else {
2554 return (0);
2555 }
2556 }
2557
2558
2559 /*
2560 * hermon_qpn_avl_init()
2561 * Context: Only called from attach() path context
2562 */
2563 void
2564 hermon_qpn_avl_init(hermon_state_t *state)
2565 {
2566 /* Initialize the lock used for QP number (QPN) AVL tree access */
2567 mutex_init(&state->hs_qpn_avl_lock, NULL, MUTEX_DRIVER,
2568 DDI_INTR_PRI(state->hs_intrmsi_pri));
2569
2570 /* Initialize the AVL tree for the QP number (QPN) storage */
2571 avl_create(&state->hs_qpn_avl, hermon_qpn_avl_compare,
2572 sizeof (hermon_qpn_entry_t),
2573 offsetof(hermon_qpn_entry_t, qpn_avlnode));
2574 }
2575
2576
2577 /*
2578 * hermon_qpn_avl_fini()
2579 * Context: Only called from attach() and/or detach() path contexts
2580 */
2581 void
2582 hermon_qpn_avl_fini(hermon_state_t *state)
2583 {
2584 hermon_qpn_entry_t *entry;
2585 void *cookie;
2586
2587 /*
2588 * Empty all entries (if necessary) and destroy the AVL tree
2589 * that was used for QP number (QPN) tracking.
2590 */
2591 cookie = NULL;
2592 while ((entry = (hermon_qpn_entry_t *)avl_destroy_nodes(
2593 &state->hs_qpn_avl, &cookie)) != NULL) {
2594 kmem_free(entry, sizeof (hermon_qpn_entry_t));
2595 }
2596 avl_destroy(&state->hs_qpn_avl);
2597
2598 /* Destroy the lock used for QP number (QPN) AVL tree access */
2599 mutex_destroy(&state->hs_qpn_avl_lock);
2600 }
2601
2602
2603 /*
2604 * hermon_qphdl_from_qpnum()
2605 * Context: Can be called from interrupt or base context.
2606 *
2607 * This routine is important because changing the unconstrained
2608 * portion of the QP number is critical to the detection of a
2609 * potential race condition in the QP event handler code (i.e. the case
2610 * where a QP is freed and alloc'd again before an event for the
2611 * "old" QP can be handled).
2612 *
2613 * While this is not a perfect solution (not sure that one exists)
2614 * it does help to mitigate the chance that this race condition will
2615 * cause us to deliver a "stale" event to the new QP owner. Note:
2616 * this solution does not scale well because the number of constrained
2617 * bits increases (and, hence, the number of unconstrained bits
2618 * decreases) as the number of supported QPs grows. For small and
2619 * intermediate values, it should hopefully provide sufficient
2620 * protection.
2621 */
2622 hermon_qphdl_t
2623 hermon_qphdl_from_qpnum(hermon_state_t *state, uint_t qpnum)
2624 {
2625 uint_t qpindx, qpmask;
2626
2627 /* Calculate the QP table index from the qpnum */
2628 qpmask = (1 << state->hs_cfg_profile->cp_log_num_qp) - 1;
2629 qpindx = qpnum & qpmask;
2630 return (hermon_icm_num_to_hdl(state, HERMON_QPC, qpindx));
2631 }
2632
2633
2634 /*
2635 * hermon_special_qp_rsrc_alloc
2636 * Context: Can be called from interrupt or base context.
2637 */
2638 static int
2639 hermon_special_qp_rsrc_alloc(hermon_state_t *state, ibt_sqp_type_t type,
2640 uint_t port, hermon_rsrc_t **qp_rsrc)
2641 {
2642 uint_t mask, flags;
2643 int status;
2644
2645 mutex_enter(&state->hs_spec_qplock);
2646 flags = state->hs_spec_qpflags;
2647 if (type == IBT_SMI_SQP) {
2648 /*
2649 * Check here to see if the driver has been configured
2650 * to instruct the Hermon firmware to handle all incoming
2651 * SMP messages (i.e. messages sent to SMA). If so,
2652 * then we will treat QP0 as if it has already been
2653 * allocated (for internal use). Otherwise, if we allow
2654 * the allocation to happen, it will cause unexpected
2655 * behaviors (e.g. Hermon SMA becomes unresponsive).
2656 */
2657 if (state->hs_cfg_profile->cp_qp0_agents_in_fw != 0) {
2658 mutex_exit(&state->hs_spec_qplock);
2659 return (IBT_QP_IN_USE);
2660 }
2661
2662 /*
2663 * If this is the first QP0 allocation, then post
2664 * a CONF_SPECIAL_QP firmware command
2665 */
2666 if ((flags & HERMON_SPECIAL_QP0_RSRC_MASK) == 0) {
2667 status = hermon_conf_special_qp_cmd_post(state,
2668 state->hs_spec_qp0->hr_indx, HERMON_CMD_QP_SMI,
2669 HERMON_CMD_NOSLEEP_SPIN,
2670 HERMON_CMD_SPEC_QP_OPMOD(
2671 state->hs_cfg_profile->cp_qp0_agents_in_fw,
2672 state->hs_cfg_profile->cp_qp1_agents_in_fw));
2673 if (status != HERMON_CMD_SUCCESS) {
2674 mutex_exit(&state->hs_spec_qplock);
2675 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2676 "command failed: %08x\n",
2677 state->hs_instance, status);
2678 return (IBT_INSUFF_RESOURCE);
2679 }
2680 }
2681
2682 /*
2683 * Now check (and, if necessary, modify) the flags to indicate
2684 * whether the allocation was successful
2685 */
2686 mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port));
2687 if (flags & mask) {
2688 mutex_exit(&state->hs_spec_qplock);
2689 return (IBT_QP_IN_USE);
2690 }
2691 state->hs_spec_qpflags |= mask;
2692 *qp_rsrc = state->hs_spec_qp0;
2693
2694 } else {
2695 /*
2696 * If this is the first QP1 allocation, then post
2697 * a CONF_SPECIAL_QP firmware command
2698 */
2699 if ((flags & HERMON_SPECIAL_QP1_RSRC_MASK) == 0) {
2700 status = hermon_conf_special_qp_cmd_post(state,
2701 state->hs_spec_qp1->hr_indx, HERMON_CMD_QP_GSI,
2702 HERMON_CMD_NOSLEEP_SPIN,
2703 HERMON_CMD_SPEC_QP_OPMOD(
2704 state->hs_cfg_profile->cp_qp0_agents_in_fw,
2705 state->hs_cfg_profile->cp_qp1_agents_in_fw));
2706 if (status != HERMON_CMD_SUCCESS) {
2707 mutex_exit(&state->hs_spec_qplock);
2708 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2709 "command failed: %08x\n",
2710 state->hs_instance, status);
2711 return (IBT_INSUFF_RESOURCE);
2712 }
2713 }
2714
2715 /*
2716 * Now check (and, if necessary, modify) the flags to indicate
2717 * whether the allocation was successful
2718 */
2719 mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port));
2720 if (flags & mask) {
2721 mutex_exit(&state->hs_spec_qplock);
2722 return (IBT_QP_IN_USE);
2723 }
2724 state->hs_spec_qpflags |= mask;
2725 *qp_rsrc = state->hs_spec_qp1;
2726 }
2727
2728 mutex_exit(&state->hs_spec_qplock);
2729 return (DDI_SUCCESS);
2730 }
2731
2732
2733 /*
2734 * hermon_special_qp_rsrc_free
2735 * Context: Can be called from interrupt or base context.
2736 */
2737 static int
2738 hermon_special_qp_rsrc_free(hermon_state_t *state, ibt_sqp_type_t type,
2739 uint_t port)
2740 {
2741 uint_t mask, flags;
2742 int status;
2743
2744 mutex_enter(&state->hs_spec_qplock);
2745 if (type == IBT_SMI_SQP) {
2746 mask = (1 << (HERMON_SPECIAL_QP0_RSRC + port));
2747 state->hs_spec_qpflags &= ~mask;
2748 flags = state->hs_spec_qpflags;
2749
2750 /*
2751 * If this is the last QP0 free, then post a CONF_SPECIAL_QP
2752 * NOW, If this is the last Special QP free, then post a
2753 * CONF_SPECIAL_QP firmware command - it'll stop them all
2754 */
2755 if (flags) {
2756 status = hermon_conf_special_qp_cmd_post(state, 0,
2757 HERMON_CMD_QP_SMI, HERMON_CMD_NOSLEEP_SPIN, 0);
2758 if (status != HERMON_CMD_SUCCESS) {
2759 mutex_exit(&state->hs_spec_qplock);
2760 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2761 "command failed: %08x\n",
2762 state->hs_instance, status);
2763 if (status == HERMON_CMD_INVALID_STATUS) {
2764 hermon_fm_ereport(state, HCA_SYS_ERR,
2765 HCA_ERR_SRV_LOST);
2766 }
2767 return (ibc_get_ci_failure(0));
2768 }
2769 }
2770 } else {
2771 mask = (1 << (HERMON_SPECIAL_QP1_RSRC + port));
2772 state->hs_spec_qpflags &= ~mask;
2773 flags = state->hs_spec_qpflags;
2774
2775 /*
2776 * If this is the last QP1 free, then post a CONF_SPECIAL_QP
2777 * NOW, if this is the last special QP free, then post a
2778 * CONF_SPECIAL_QP firmware command - it'll stop them all
2779 */
2780 if (flags) {
2781 status = hermon_conf_special_qp_cmd_post(state, 0,
2782 HERMON_CMD_QP_GSI, HERMON_CMD_NOSLEEP_SPIN, 0);
2783 if (status != HERMON_CMD_SUCCESS) {
2784 mutex_exit(&state->hs_spec_qplock);
2785 cmn_err(CE_NOTE, "hermon%d: CONF_SPECIAL_QP "
2786 "command failed: %08x\n",
2787 state->hs_instance, status);
2788 if (status == HERMON_CMD_INVALID_STATUS) {
2789 hermon_fm_ereport(state, HCA_SYS_ERR,
2790 HCA_ERR_SRV_LOST);
2791 }
2792 return (ibc_get_ci_failure(0));
2793 }
2794 }
2795 }
2796
2797 mutex_exit(&state->hs_spec_qplock);
2798 return (DDI_SUCCESS);
2799 }
2800
2801
2802 /*
2803 * hermon_qp_sgl_to_logwqesz()
2804 * Context: Can be called from interrupt or base context.
2805 */
2806 static void
2807 hermon_qp_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
2808 uint_t real_max_sgl, hermon_qp_wq_type_t wq_type,
2809 uint_t *logwqesz, uint_t *max_sgl)
2810 {
2811 uint_t max_size, log2, actual_sgl;
2812
2813 switch (wq_type) {
2814 case HERMON_QP_WQ_TYPE_SENDQ_UD:
2815 /*
2816 * Use requested maximum SGL to calculate max descriptor size
2817 * (while guaranteeing that the descriptor size is a
2818 * power-of-2 cachelines).
2819 */
2820 max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
2821 log2 = highbit(max_size);
2822 if (ISP2(max_size)) {
2823 log2 = log2 - 1;
2824 }
2825
2826 /* Make sure descriptor is at least the minimum size */
2827 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2828
2829 /* Calculate actual number of SGL (given WQE size) */
2830 actual_sgl = ((1 << log2) -
2831 sizeof (hermon_hw_snd_wqe_ctrl_t)) >> 4;
2832 break;
2833
2834 case HERMON_QP_WQ_TYPE_SENDQ_CONN:
2835 /*
2836 * Use requested maximum SGL to calculate max descriptor size
2837 * (while guaranteeing that the descriptor size is a
2838 * power-of-2 cachelines).
2839 */
2840 max_size = (HERMON_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
2841 log2 = highbit(max_size);
2842 if (ISP2(max_size)) {
2843 log2 = log2 - 1;
2844 }
2845
2846 /* Make sure descriptor is at least the minimum size */
2847 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2848
2849 /* Calculate actual number of SGL (given WQE size) */
2850 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_SND_HDRS) >> 4;
2851 break;
2852
2853 case HERMON_QP_WQ_TYPE_RECVQ:
2854 /*
2855 * Same as above (except for Recv WQEs)
2856 */
2857 max_size = (HERMON_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4));
2858 log2 = highbit(max_size);
2859 if (ISP2(max_size)) {
2860 log2 = log2 - 1;
2861 }
2862
2863 /* Make sure descriptor is at least the minimum size */
2864 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2865
2866 /* Calculate actual number of SGL (given WQE size) */
2867 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_RCV_HDRS) >> 4;
2868 break;
2869
2870 case HERMON_QP_WQ_TYPE_SENDMLX_QP0:
2871 /*
2872 * Same as above (except for MLX transport WQEs). For these
2873 * WQEs we have to account for the space consumed by the
2874 * "inline" packet headers. (This is smaller than for QP1
2875 * below because QP0 is not allowed to send packets with a GRH.
2876 */
2877 max_size = (HERMON_QP_WQE_MLX_QP0_HDRS + (num_sgl << 4));
2878 log2 = highbit(max_size);
2879 if (ISP2(max_size)) {
2880 log2 = log2 - 1;
2881 }
2882
2883 /* Make sure descriptor is at least the minimum size */
2884 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2885
2886 /* Calculate actual number of SGL (given WQE size) */
2887 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP0_HDRS) >> 4;
2888 break;
2889
2890 case HERMON_QP_WQ_TYPE_SENDMLX_QP1:
2891 /*
2892 * Same as above. For these WQEs we again have to account for
2893 * the space consumed by the "inline" packet headers. (This
2894 * is larger than for QP0 above because we have to account for
2895 * the possibility of a GRH in each packet - and this
2896 * introduces an alignment issue that causes us to consume
2897 * an additional 8 bytes).
2898 */
2899 max_size = (HERMON_QP_WQE_MLX_QP1_HDRS + (num_sgl << 4));
2900 log2 = highbit(max_size);
2901 if (ISP2(max_size)) {
2902 log2 = log2 - 1;
2903 }
2904
2905 /* Make sure descriptor is at least the minimum size */
2906 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
2907
2908 /* Calculate actual number of SGL (given WQE size) */
2909 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_QP1_HDRS) >> 4;
2910 break;
2911
2912 default:
2913 HERMON_WARNING(state, "unexpected work queue type");
2914 break;
2915 }
2916
2917 /* Fill in the return values */
2918 *logwqesz = log2;
2919 *max_sgl = min(real_max_sgl, actual_sgl);
2920 }