Print this page
8368 remove warlock leftovers from usr/src/uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/adapters/tavor/tavor_qp.c
+++ new/usr/src/uts/common/io/ib/adapters/tavor/tavor_qp.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * tavor_qp.c
29 29 * Tavor Queue Pair Processing Routines
30 30 *
31 31 * Implements all the routines necessary for allocating, freeing, and
32 32 * querying the Tavor queue pairs.
33 33 */
34 34
35 35 #include <sys/types.h>
36 36 #include <sys/conf.h>
37 37 #include <sys/ddi.h>
38 38 #include <sys/sunddi.h>
39 39 #include <sys/modctl.h>
40 40 #include <sys/bitmap.h>
41 41 #include <sys/sysmacros.h>
42 42
43 43 #include <sys/ib/adapters/tavor/tavor.h>
44 44 #include <sys/ib/ib_pkt_hdrs.h>
45 45
46 46 static int tavor_qp_create_qpn(tavor_state_t *state, tavor_qphdl_t qp,
47 47 tavor_rsrc_t *qpc);
48 48 static int tavor_qpn_avl_compare(const void *q, const void *e);
49 49 static int tavor_special_qp_rsrc_alloc(tavor_state_t *state,
50 50 ibt_sqp_type_t type, uint_t port, tavor_rsrc_t **qp_rsrc);
51 51 static int tavor_special_qp_rsrc_free(tavor_state_t *state, ibt_sqp_type_t type,
52 52 uint_t port);
53 53 static void tavor_qp_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
54 54 tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl);
55 55
56 56 /*
57 57 * tavor_qp_alloc()
58 58 * Context: Can be called only from user or kernel context.
59 59 */
60 60 int
61 61 tavor_qp_alloc(tavor_state_t *state, tavor_qp_info_t *qpinfo,
62 62 uint_t sleepflag, tavor_qp_options_t *op)
63 63 {
64 64 tavor_rsrc_pool_info_t *rsrc_pool;
65 65 tavor_rsrc_t *qpc, *rsrc, *rdb;
66 66 tavor_umap_db_entry_t *umapdb;
67 67 tavor_qphdl_t qp;
68 68 ibt_qp_alloc_attr_t *attr_p;
69 69 ibt_qp_type_t type;
70 70 ibtl_qp_hdl_t ibt_qphdl;
71 71 ibt_chan_sizes_t *queuesz_p;
72 72 ib_qpn_t *qpn;
73 73 tavor_qphdl_t *qphdl;
74 74 ibt_mr_attr_t mr_attr;
75 75 tavor_mr_options_t mr_op;
76 76 tavor_srqhdl_t srq;
77 77 tavor_pdhdl_t pd;
78 78 tavor_cqhdl_t sq_cq, rq_cq;
79 79 tavor_mrhdl_t mr;
80 80 uint64_t value, qp_desc_off;
81 81 uint32_t *sq_buf, *rq_buf;
82 82 uint32_t log_qp_sq_size, log_qp_rq_size;
↓ open down ↓ |
82 lines elided |
↑ open up ↑ |
83 83 uint32_t sq_size, rq_size;
84 84 uint32_t sq_wqe_size, rq_wqe_size;
85 85 uint32_t max_rdb, max_sgl, uarpg;
86 86 uint_t wq_location, dma_xfer_mode, qp_is_umap;
87 87 uint_t qp_srq_en;
88 88 int status, flag;
89 89 char *errormsg;
90 90
91 91 TAVOR_TNF_ENTER(tavor_qp_alloc);
92 92
93 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p, *queuesz_p))
94 -
95 93 /*
96 94 * Check the "options" flag. Currently this flag tells the driver
97 95 * whether or not the QP's work queues should be come from normal
98 96 * system memory or whether they should be allocated from DDR memory.
99 97 */
100 98 if (op == NULL) {
101 99 wq_location = TAVOR_QUEUE_LOCATION_NORMAL;
102 100 } else {
103 101 wq_location = op->qpo_wq_loc;
104 102 }
105 103
106 104 /*
107 105 * Extract the necessary info from the tavor_qp_info_t structure
108 106 */
109 107 attr_p = qpinfo->qpi_attrp;
110 108 type = qpinfo->qpi_type;
111 109 ibt_qphdl = qpinfo->qpi_ibt_qphdl;
112 110 queuesz_p = qpinfo->qpi_queueszp;
113 111 qpn = qpinfo->qpi_qpn;
114 112 qphdl = &qpinfo->qpi_qphdl;
115 113
116 114 /*
117 115 * Determine whether QP is being allocated for userland access or
118 116 * whether it is being allocated for kernel access. If the QP is
119 117 * being allocated for userland access, then lookup the UAR doorbell
120 118 * page number for the current process. Note: If this is not found
121 119 * (e.g. if the process has not previously open()'d the Tavor driver),
122 120 * then an error is returned.
123 121 */
124 122 qp_is_umap = (attr_p->qp_alloc_flags & IBT_QP_USER_MAP) ? 1 : 0;
125 123 if (qp_is_umap) {
126 124 status = tavor_umap_db_find(state->ts_instance, ddi_get_pid(),
127 125 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
128 126 if (status != DDI_SUCCESS) {
129 127 /* Set "status" and "errormsg" and goto failure */
130 128 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "failed UAR page");
131 129 goto qpalloc_fail;
132 130 }
133 131 uarpg = ((tavor_rsrc_t *)(uintptr_t)value)->tr_indx;
134 132 }
135 133
136 134 /*
137 135 * Determine whether QP is being associated with an SRQ
138 136 */
139 137 qp_srq_en = (attr_p->qp_alloc_flags & IBT_QP_USES_SRQ) ? 1 : 0;
140 138 if (qp_srq_en) {
141 139 /*
142 140 * Check for valid SRQ handle pointers
143 141 */
144 142 if (attr_p->qp_ibc_srq_hdl == NULL) {
145 143 /* Set "status" and "errormsg" and goto failure */
146 144 TAVOR_TNF_FAIL(IBT_SRQ_HDL_INVALID,
147 145 "invalid SRQ handle");
148 146 goto qpalloc_fail;
149 147 }
150 148 srq = (tavor_srqhdl_t)attr_p->qp_ibc_srq_hdl;
151 149 }
152 150
153 151 /*
154 152 * Check for valid QP service type (only UD/RC/UC supported)
155 153 */
156 154 if (((type != IBT_UD_RQP) && (type != IBT_RC_RQP) &&
157 155 (type != IBT_UC_RQP))) {
158 156 /* Set "status" and "errormsg" and goto failure */
159 157 TAVOR_TNF_FAIL(IBT_QP_SRV_TYPE_INVALID, "invalid serv type");
160 158 goto qpalloc_fail;
161 159 }
162 160
163 161 /*
164 162 * Only RC is supported on an SRQ -- This is a Tavor hardware
165 163 * limitation. Arbel native mode will not have this shortcoming.
166 164 */
167 165 if (qp_srq_en && type != IBT_RC_RQP) {
168 166 /* Set "status" and "errormsg" and goto failure */
169 167 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid serv type with SRQ");
170 168 goto qpalloc_fail;
171 169 }
172 170
173 171 /*
174 172 * Check for valid PD handle pointer
175 173 */
176 174 if (attr_p->qp_pd_hdl == NULL) {
177 175 /* Set "status" and "errormsg" and goto failure */
178 176 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle");
179 177 goto qpalloc_fail;
180 178 }
181 179 pd = (tavor_pdhdl_t)attr_p->qp_pd_hdl;
182 180
183 181 /*
184 182 * If on an SRQ, check to make sure the PD is the same
185 183 */
186 184 if (qp_srq_en && (pd->pd_pdnum != srq->srq_pdhdl->pd_pdnum)) {
187 185 /* Set "status" and "errormsg" and goto failure */
188 186 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle");
189 187 goto qpalloc_fail;
190 188 }
191 189
192 190 /* Increment the reference count on the protection domain (PD) */
193 191 tavor_pd_refcnt_inc(pd);
194 192
195 193 /*
196 194 * Check for valid CQ handle pointers
197 195 */
198 196 if ((attr_p->qp_ibc_scq_hdl == NULL) ||
199 197 (attr_p->qp_ibc_rcq_hdl == NULL)) {
200 198 /* Set "status" and "errormsg" and goto failure */
201 199 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
202 200 goto qpalloc_fail1;
203 201 }
204 202 sq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_scq_hdl;
205 203 rq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
206 204
207 205 /*
208 206 * Increment the reference count on the CQs. One or both of these
209 207 * could return error if we determine that the given CQ is already
210 208 * being used with a special (SMI/GSI) QP.
211 209 */
212 210 status = tavor_cq_refcnt_inc(sq_cq, TAVOR_CQ_IS_NORMAL);
213 211 if (status != DDI_SUCCESS) {
214 212 /* Set "status" and "errormsg" and goto failure */
215 213 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
216 214 goto qpalloc_fail1;
217 215 }
218 216 status = tavor_cq_refcnt_inc(rq_cq, TAVOR_CQ_IS_NORMAL);
219 217 if (status != DDI_SUCCESS) {
220 218 /* Set "status" and "errormsg" and goto failure */
221 219 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
222 220 goto qpalloc_fail2;
223 221 }
224 222
225 223 /*
226 224 * Allocate an QP context entry. This will be filled in with all
227 225 * the necessary parameters to define the Queue Pair. Unlike
228 226 * other Tavor hardware resources, ownership is not immediately
229 227 * given to hardware in the final step here. Instead, we must
230 228 * wait until the QP is later transitioned to the "Init" state before
231 229 * passing the QP to hardware. If we fail here, we must undo all
232 230 * the reference count (CQ and PD).
233 231 */
234 232 status = tavor_rsrc_alloc(state, TAVOR_QPC, 1, sleepflag, &qpc);
235 233 if (status != DDI_SUCCESS) {
236 234 /* Set "status" and "errormsg" and goto failure */
237 235 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed QP context");
238 236 goto qpalloc_fail3;
239 237 }
240 238
241 239 /*
242 240 * Allocate the software structure for tracking the queue pair
↓ open down ↓ |
138 lines elided |
↑ open up ↑ |
243 241 * (i.e. the Tavor Queue Pair handle). If we fail here, we must
244 242 * undo the reference counts and the previous resource allocation.
245 243 */
246 244 status = tavor_rsrc_alloc(state, TAVOR_QPHDL, 1, sleepflag, &rsrc);
247 245 if (status != DDI_SUCCESS) {
248 246 /* Set "status" and "errormsg" and goto failure */
249 247 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed QP handle");
250 248 goto qpalloc_fail4;
251 249 }
252 250 qp = (tavor_qphdl_t)rsrc->tr_addr;
253 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
254 251
255 252 /*
256 253 * Calculate the QP number from QPC index. This routine handles
257 254 * all of the operations necessary to keep track of used, unused,
258 255 * and released QP numbers.
259 256 */
260 257 status = tavor_qp_create_qpn(state, qp, qpc);
261 258 if (status != DDI_SUCCESS) {
262 259 /* Set "status" and "errormsg" and goto failure */
263 260 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed QPN create");
264 261 goto qpalloc_fail5;
265 262 }
266 263
267 264 /*
268 265 * If this will be a user-mappable QP, then allocate an entry for
269 266 * the "userland resources database". This will later be added to
270 267 * the database (after all further QP operations are successful).
271 268 * If we fail here, we must undo the reference counts and the
272 269 * previous resource allocation.
273 270 */
274 271 if (qp_is_umap) {
275 272 umapdb = tavor_umap_db_alloc(state->ts_instance, qp->qp_qpnum,
276 273 MLNX_UMAP_QPMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
277 274 if (umapdb == NULL) {
278 275 /* Set "status" and "errormsg" and goto failure */
279 276 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
280 277 goto qpalloc_fail6;
281 278 }
282 279 }
283 280
284 281 /*
285 282 * If this is an RC QP, then pre-allocate the maximum number of RDB
286 283 * entries. This allows us to ensure that we can later cover all
287 284 * the resources needed by hardware for handling multiple incoming
288 285 * RDMA Reads. Note: These resources are obviously not always
289 286 * necessary. They are allocated here anyway. Someday maybe this
290 287 * can be modified to allocate these on-the-fly (i.e. only if RDMA
291 288 * Read or Atomic operations are enabled) XXX
292 289 * If we fail here, we have a bunch of resource and reference count
293 290 * cleanup to do.
294 291 */
295 292 if (type == IBT_RC_RQP) {
296 293 max_rdb = state->ts_cfg_profile->cp_hca_max_rdma_in_qp;
297 294 status = tavor_rsrc_alloc(state, TAVOR_RDB, max_rdb,
298 295 sleepflag, &rdb);
299 296 if (status != DDI_SUCCESS) {
300 297 /* Set "status" and "errormsg" and goto failure */
301 298 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed RDB");
302 299 goto qpalloc_fail7;
303 300 }
304 301 qp->qp_rdbrsrcp = rdb;
305 302 /* Calculate offset (into DDR memory) of RDB entries */
306 303 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_RDB];
307 304 qp->qp_rdb_ddraddr = (uintptr_t)rsrc_pool->rsrc_ddr_offset +
308 305 (rdb->tr_indx << TAVOR_RDB_SIZE_SHIFT);
309 306 }
310 307
311 308 /*
312 309 * Calculate the appropriate size for the work queues.
313 310 * Note: All Tavor QP work queues must be a power-of-2 in size. Also
314 311 * they may not be any smaller than TAVOR_QP_MIN_SIZE. This step is
315 312 * to round the requested size up to the next highest power-of-2
316 313 */
317 314 attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq, TAVOR_QP_MIN_SIZE);
318 315 attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq, TAVOR_QP_MIN_SIZE);
319 316 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq);
320 317 if (ISP2(attr_p->qp_sizes.cs_sq)) {
321 318 log_qp_sq_size = log_qp_sq_size - 1;
322 319 }
323 320 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
324 321 if (ISP2(attr_p->qp_sizes.cs_rq)) {
325 322 log_qp_rq_size = log_qp_rq_size - 1;
326 323 }
327 324
328 325 /*
329 326 * Next we verify that the rounded-up size is valid (i.e. consistent
330 327 * with the device limits and/or software-configured limits). If not,
331 328 * then obviously we have a lot of cleanup to do before returning.
332 329 */
333 330 if ((log_qp_sq_size > state->ts_cfg_profile->cp_log_max_qp_sz) ||
334 331 (!qp_srq_en && (log_qp_rq_size >
335 332 state->ts_cfg_profile->cp_log_max_qp_sz))) {
336 333 /* Set "status" and "errormsg" and goto failure */
337 334 TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max QP size");
338 335 goto qpalloc_fail8;
339 336 }
340 337
341 338 /*
342 339 * Next we verify that the requested number of SGL is valid (i.e.
343 340 * consistent with the device limits and/or software-configured
344 341 * limits). If not, then obviously the same cleanup needs to be done.
345 342 */
346 343 max_sgl = state->ts_cfg_profile->cp_wqe_real_max_sgl;
347 344 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
348 345 (!qp_srq_en && (attr_p->qp_sizes.cs_rq_sgl > max_sgl))) {
349 346 /* Set "status" and "errormsg" and goto failure */
350 347 TAVOR_TNF_FAIL(IBT_HCA_SGL_EXCEEDED, "max QP SGL");
351 348 goto qpalloc_fail8;
352 349 }
353 350
354 351 /*
355 352 * Determine this QP's WQE sizes (for both the Send and Recv WQEs).
356 353 * This will depend on the requested number of SGLs. Note: this
357 354 * has the side-effect of also calculating the real number of SGLs
358 355 * (for the calculated WQE size).
359 356 *
360 357 * For QP's on an SRQ, we set these to 0.
361 358 */
362 359 if (qp_srq_en) {
363 360 qp->qp_rq_log_wqesz = 0;
364 361 qp->qp_rq_sgl = 0;
365 362 } else {
366 363 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
367 364 TAVOR_QP_WQ_TYPE_RECVQ, &qp->qp_rq_log_wqesz,
368 365 &qp->qp_rq_sgl);
369 366 }
370 367 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
371 368 TAVOR_QP_WQ_TYPE_SENDQ, &qp->qp_sq_log_wqesz, &qp->qp_sq_sgl);
372 369
373 370 /*
374 371 * Allocate the memory for QP work queues. Note: The location from
375 372 * which we will allocate these work queues has been passed in
376 373 * through the tavor_qp_options_t structure. Since Tavor work queues
377 374 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
378 375 * the work queue memory is very important. We used to allocate
379 376 * work queues (the combined receive and send queues) so that they
380 377 * would be aligned on their combined size. That alignment guaranteed
381 378 * that they would never cross the 4GB boundary (Tavor work queues
382 379 * are on the order of MBs at maximum). Now we are able to relax
383 380 * this alignment constraint by ensuring that the IB address assigned
384 381 * to the queue memory (as a result of the tavor_mr_register() call)
385 382 * is offset from zero.
386 383 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
387 384 * guarantee the alignment, but when attempting to use IOMMU bypass
388 385 * mode we found that we were not allowed to specify any alignment
389 386 * that was more restrictive than the system page size.
390 387 * So we avoided this constraint by passing two alignment values,
391 388 * one for the memory allocation itself and the other for the DMA
392 389 * handle (for later bind). This used to cause more memory than
393 390 * necessary to be allocated (in order to guarantee the more
394 391 * restrictive alignment contraint). But be guaranteeing the
395 392 * zero-based IB virtual address for the queue, we are able to
396 393 * conserve this memory.
397 394 * Note: If QP is not user-mappable, then it may come from either
398 395 * kernel system memory or from HCA-attached local DDR memory.
399 396 */
400 397 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
401 398 sq_size = (1 << log_qp_sq_size) * sq_wqe_size;
402 399
403 400 /* QP on SRQ sets these to 0 */
404 401 if (qp_srq_en) {
405 402 rq_wqe_size = 0;
406 403 rq_size = 0;
407 404 } else {
408 405 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
409 406 rq_size = (1 << log_qp_rq_size) * rq_wqe_size;
410 407 }
411 408
412 409 qp->qp_wqinfo.qa_size = sq_size + rq_size;
413 410 qp->qp_wqinfo.qa_alloc_align = max(sq_wqe_size, rq_wqe_size);
414 411 qp->qp_wqinfo.qa_bind_align = max(sq_wqe_size, rq_wqe_size);
415 412 if (qp_is_umap) {
416 413 qp->qp_wqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;
417 414 } else {
418 415 qp->qp_wqinfo.qa_location = wq_location;
419 416 }
420 417 status = tavor_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
421 418 if (status != DDI_SUCCESS) {
422 419 /* Set "status" and "errormsg" and goto failure */
423 420 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed work queue");
424 421 goto qpalloc_fail8;
425 422 }
426 423 if (sq_wqe_size > rq_wqe_size) {
427 424 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
428 425
429 426 /*
430 427 * If QP's on an SRQ, we set the rq_buf to NULL
431 428 */
432 429 if (qp_srq_en)
433 430 rq_buf = NULL;
434 431 else
435 432 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
436 433 } else {
437 434 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
438 435 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
439 436 }
440 437
441 438 /*
442 439 * Register the memory for the QP work queues. The memory for the
443 440 * QP must be registered in the Tavor TPT tables. This gives us the
444 441 * LKey to specify in the QP context later. Note: The memory for
445 442 * Tavor work queues (both Send and Recv) must be contiguous and
446 443 * registered as a single memory region. Note also: If the work
447 444 * queue is to be allocated from DDR memory, then only a "bypass"
448 445 * mapping is appropriate. And if the QP memory is user-mappable,
449 446 * then we force DDI_DMA_CONSISTENT mapping.
450 447 * Also, in order to meet the alignment restriction, we pass the
451 448 * "mro_bind_override_addr" flag in the call to tavor_mr_register().
452 449 * This guarantees that the resulting IB vaddr will be zero-based
453 450 * (modulo the offset into the first page).
454 451 * If we fail here, we still have the bunch of resource and reference
455 452 * count cleanup to do.
456 453 */
457 454 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP :
458 455 IBT_MR_NOSLEEP;
459 456 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
460 457 mr_attr.mr_len = qp->qp_wqinfo.qa_size;
461 458 mr_attr.mr_as = NULL;
462 459 mr_attr.mr_flags = flag;
463 460 if (qp_is_umap) {
464 461 mr_op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass;
465 462 } else {
466 463 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
467 464 mr_op.mro_bind_type =
468 465 state->ts_cfg_profile->cp_iommu_bypass;
469 466 dma_xfer_mode =
470 467 state->ts_cfg_profile->cp_streaming_consistent;
471 468 if (dma_xfer_mode == DDI_DMA_STREAMING) {
472 469 mr_attr.mr_flags |= IBT_MR_NONCOHERENT;
473 470 }
474 471 } else {
475 472 mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS;
476 473 }
477 474 }
478 475 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
479 476 mr_op.mro_bind_override_addr = 1;
480 477 status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op);
481 478 if (status != DDI_SUCCESS) {
482 479 /* Set "status" and "errormsg" and goto failure */
483 480 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
484 481 goto qpalloc_fail9;
485 482 }
486 483
487 484 /*
488 485 * Calculate the offset between the kernel virtual address space
489 486 * and the IB virtual address space. This will be used when
490 487 * posting work requests to properly initialize each WQE.
491 488 */
492 489 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
493 490 (uint64_t)mr->mr_bindinfo.bi_addr;
494 491
495 492 /*
496 493 * Fill in all the return arguments (if necessary). This includes
497 494 * real work queue sizes, real SGLs, and QP number
498 495 */
499 496 if (queuesz_p != NULL) {
500 497 queuesz_p->cs_sq = (1 << log_qp_sq_size);
501 498 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
502 499
503 500 /* QP on an SRQ set these to 0 */
504 501 if (qp_srq_en) {
505 502 queuesz_p->cs_rq = 0;
506 503 queuesz_p->cs_rq_sgl = 0;
507 504 } else {
508 505 queuesz_p->cs_rq = (1 << log_qp_rq_size);
509 506 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
510 507 }
511 508 }
512 509 if (qpn != NULL) {
513 510 *qpn = (ib_qpn_t)qp->qp_qpnum;
514 511 }
515 512
516 513 /*
517 514 * Fill in the rest of the Tavor Queue Pair handle. We can update
518 515 * the following fields for use in further operations on the QP.
519 516 */
520 517 qp->qp_qpcrsrcp = qpc;
521 518 qp->qp_rsrcp = rsrc;
522 519 qp->qp_state = TAVOR_QP_RESET;
523 520 qp->qp_pdhdl = pd;
524 521 qp->qp_mrhdl = mr;
525 522 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
526 523 TAVOR_QP_SQ_WR_SIGNALED : TAVOR_QP_SQ_ALL_SIGNALED;
527 524 qp->qp_is_special = 0;
528 525 qp->qp_is_umap = qp_is_umap;
529 526 qp->qp_uarpg = (qp->qp_is_umap) ? uarpg : 0;
530 527 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
531 528 qp->qp_sq_cqhdl = sq_cq;
532 529 qp->qp_sq_lastwqeaddr = NULL;
533 530 qp->qp_sq_bufsz = (1 << log_qp_sq_size);
534 531 qp->qp_sq_buf = sq_buf;
535 532 qp->qp_desc_off = qp_desc_off;
536 533 qp->qp_rq_cqhdl = rq_cq;
537 534 qp->qp_rq_lastwqeaddr = NULL;
538 535 qp->qp_rq_buf = rq_buf;
539 536
540 537 /* QP on an SRQ sets this to 0 */
541 538 if (qp_srq_en) {
542 539 qp->qp_rq_bufsz = 0;
543 540 } else {
544 541 qp->qp_rq_bufsz = (1 << log_qp_rq_size);
545 542 }
546 543
547 544 qp->qp_forward_sqd_event = 0;
548 545 qp->qp_sqd_still_draining = 0;
549 546 qp->qp_hdlrarg = (void *)ibt_qphdl;
550 547 qp->qp_mcg_refcnt = 0;
551 548
552 549 /*
553 550 * If this QP is to be associated with an SRQ, then set the SRQ handle
554 551 * appropriately.
555 552 */
556 553 if (qp_srq_en) {
557 554 qp->qp_srqhdl = srq;
558 555 qp->qp_srq_en = TAVOR_QP_SRQ_ENABLED;
559 556 tavor_srq_refcnt_inc(qp->qp_srqhdl);
560 557 } else {
561 558 qp->qp_srqhdl = NULL;
562 559 qp->qp_srq_en = TAVOR_QP_SRQ_DISABLED;
563 560 }
564 561
565 562 /* Determine if later ddi_dma_sync will be necessary */
566 563 qp->qp_sync = TAVOR_QP_IS_SYNC_REQ(state, qp->qp_wqinfo);
567 564
568 565 /* Determine the QP service type */
569 566 if (type == IBT_RC_RQP) {
570 567 qp->qp_serv_type = TAVOR_QP_RC;
571 568 } else if (type == IBT_UD_RQP) {
572 569 qp->qp_serv_type = TAVOR_QP_UD;
573 570 } else {
574 571 qp->qp_serv_type = TAVOR_QP_UC;
575 572 }
576 573
577 574 /* Zero out the QP context */
578 575 bzero(&qp->qpc, sizeof (tavor_hw_qpc_t));
579 576
580 577 /*
581 578 * Put QP handle in Tavor QPNum-to-QPHdl list. Then fill in the
582 579 * "qphdl" and return success
583 580 */
584 581 ASSERT(state->ts_qphdl[qpc->tr_indx] == NULL);
585 582 state->ts_qphdl[qpc->tr_indx] = qp;
586 583
587 584 /*
588 585 * If this is a user-mappable QP, then we need to insert the previously
589 586 * allocated entry into the "userland resources database". This will
590 587 * allow for later lookup during devmap() (i.e. mmap()) calls.
591 588 */
592 589 if (qp_is_umap) {
593 590 tavor_umap_db_add(umapdb);
594 591 }
595 592
596 593 *qphdl = qp;
597 594
598 595 TAVOR_TNF_EXIT(tavor_qp_alloc);
599 596 return (DDI_SUCCESS);
600 597
601 598 /*
602 599 * The following is cleanup for all possible failure cases in this routine
603 600 */
604 601 qpalloc_fail9:
605 602 tavor_queue_free(state, &qp->qp_wqinfo);
606 603 qpalloc_fail8:
607 604 if (type == IBT_RC_RQP) {
608 605 tavor_rsrc_free(state, &rdb);
609 606 }
610 607 qpalloc_fail7:
611 608 if (qp_is_umap) {
612 609 tavor_umap_db_free(umapdb);
613 610 }
614 611 qpalloc_fail6:
615 612 /*
616 613 * Releasing the QPN will also free up the QPC context. Update
617 614 * the QPC context pointer to indicate this.
618 615 */
619 616 tavor_qp_release_qpn(state, qp->qp_qpn_hdl, TAVOR_QPN_RELEASE);
620 617 qpc = NULL;
621 618 qpalloc_fail5:
622 619 tavor_rsrc_free(state, &rsrc);
623 620 qpalloc_fail4:
624 621 if (qpc) {
625 622 tavor_rsrc_free(state, &qpc);
626 623 }
627 624 qpalloc_fail3:
628 625 tavor_cq_refcnt_dec(rq_cq);
629 626 qpalloc_fail2:
630 627 tavor_cq_refcnt_dec(sq_cq);
631 628 qpalloc_fail1:
632 629 tavor_pd_refcnt_dec(pd);
633 630 qpalloc_fail:
634 631 TNF_PROBE_1(tavor_qp_alloc_fail, TAVOR_TNF_ERROR, "",
635 632 tnf_string, msg, errormsg);
636 633 TAVOR_TNF_EXIT(tavor_qp_alloc);
637 634 return (status);
638 635 }
639 636
640 637
641 638
642 639 /*
643 640 * tavor_special_qp_alloc()
644 641 * Context: Can be called only from user or kernel context.
645 642 */
646 643 int
647 644 tavor_special_qp_alloc(tavor_state_t *state, tavor_qp_info_t *qpinfo,
648 645 uint_t sleepflag, tavor_qp_options_t *op)
649 646 {
650 647 tavor_rsrc_t *qpc, *rsrc;
651 648 tavor_qphdl_t qp;
652 649 ibt_qp_alloc_attr_t *attr_p;
653 650 ibt_sqp_type_t type;
654 651 uint8_t port;
655 652 ibtl_qp_hdl_t ibt_qphdl;
656 653 ibt_chan_sizes_t *queuesz_p;
657 654 tavor_qphdl_t *qphdl;
658 655 ibt_mr_attr_t mr_attr;
659 656 tavor_mr_options_t mr_op;
660 657 tavor_pdhdl_t pd;
661 658 tavor_cqhdl_t sq_cq, rq_cq;
662 659 tavor_mrhdl_t mr;
663 660 uint64_t qp_desc_off;
664 661 uint32_t *sq_buf, *rq_buf;
665 662 uint32_t log_qp_sq_size, log_qp_rq_size;
666 663 uint32_t sq_size, rq_size, max_sgl;
667 664 uint32_t sq_wqe_size, rq_wqe_size;
668 665 uint_t wq_location, dma_xfer_mode;
669 666 int status, flag;
670 667 char *errormsg;
671 668
672 669 TAVOR_TNF_ENTER(tavor_special_qp_alloc);
673 670
674 671 /*
675 672 * Check the "options" flag. Currently this flag tells the driver
676 673 * whether or not the QP's work queues should be come from normal
677 674 * system memory or whether they should be allocated from DDR memory.
678 675 */
679 676 if (op == NULL) {
680 677 wq_location = TAVOR_QUEUE_LOCATION_NORMAL;
681 678 } else {
682 679 wq_location = op->qpo_wq_loc;
683 680 }
684 681
685 682 /*
686 683 * Extract the necessary info from the tavor_qp_info_t structure
687 684 */
688 685 attr_p = qpinfo->qpi_attrp;
689 686 type = qpinfo->qpi_type;
690 687 port = qpinfo->qpi_port;
691 688 ibt_qphdl = qpinfo->qpi_ibt_qphdl;
692 689 queuesz_p = qpinfo->qpi_queueszp;
693 690 qphdl = &qpinfo->qpi_qphdl;
694 691
695 692 /*
696 693 * Check for valid special QP type (only SMI & GSI supported)
697 694 */
698 695 if ((type != IBT_SMI_SQP) && (type != IBT_GSI_SQP)) {
699 696 /* Set "status" and "errormsg" and goto failure */
700 697 TAVOR_TNF_FAIL(IBT_QP_SPECIAL_TYPE_INVALID, "invalid QP type");
701 698 goto spec_qpalloc_fail;
702 699 }
703 700
704 701 /*
705 702 * Check for valid port number
706 703 */
707 704 if (!tavor_portnum_is_valid(state, port)) {
708 705 /* Set "status" and "errormsg" and goto failure */
709 706 TAVOR_TNF_FAIL(IBT_HCA_PORT_INVALID, "invalid port num");
710 707 goto spec_qpalloc_fail;
711 708 }
712 709 port = port - 1;
713 710
714 711 /*
715 712 * Check for valid PD handle pointer
716 713 */
717 714 if (attr_p->qp_pd_hdl == NULL) {
718 715 /* Set "status" and "errormsg" and goto failure */
719 716 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle");
720 717 goto spec_qpalloc_fail;
721 718 }
722 719 pd = (tavor_pdhdl_t)attr_p->qp_pd_hdl;
723 720
724 721 /* Increment the reference count on the PD */
725 722 tavor_pd_refcnt_inc(pd);
726 723
727 724 /*
728 725 * Check for valid CQ handle pointers
729 726 */
730 727 if ((attr_p->qp_ibc_scq_hdl == NULL) ||
731 728 (attr_p->qp_ibc_rcq_hdl == NULL)) {
732 729 /* Set "status" and "errormsg" and goto failure */
733 730 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
734 731 goto spec_qpalloc_fail1;
735 732 }
736 733 sq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_scq_hdl;
737 734 rq_cq = (tavor_cqhdl_t)attr_p->qp_ibc_rcq_hdl;
738 735
739 736 /*
740 737 * Increment the reference count on the CQs. One or both of these
741 738 * could return error if we determine that the given CQ is already
742 739 * being used with a non-special QP (i.e. a normal QP).
743 740 */
744 741 status = tavor_cq_refcnt_inc(sq_cq, TAVOR_CQ_IS_SPECIAL);
745 742 if (status != DDI_SUCCESS) {
746 743 /* Set "status" and "errormsg" and goto failure */
747 744 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
748 745 goto spec_qpalloc_fail1;
749 746 }
750 747 status = tavor_cq_refcnt_inc(rq_cq, TAVOR_CQ_IS_SPECIAL);
751 748 if (status != DDI_SUCCESS) {
752 749 /* Set "status" and "errormsg" and goto failure */
753 750 TAVOR_TNF_FAIL(IBT_CQ_HDL_INVALID, "invalid CQ handle");
754 751 goto spec_qpalloc_fail2;
755 752 }
756 753
757 754 /*
758 755 * Allocate the special QP resources. Essentially, this allocation
759 756 * amounts to checking if the request special QP has already been
760 757 * allocated. If successful, the QP context return is an actual
761 758 * QP context that has been "aliased" to act as a special QP of the
762 759 * appropriate type (and for the appropriate port). Just as in
763 760 * tavor_qp_alloc() above, ownership for this QP context is not
764 761 * immediately given to hardware in the final step here. Instead, we
765 762 * wait until the QP is later transitioned to the "Init" state before
766 763 * passing the QP to hardware. If we fail here, we must undo all
767 764 * the reference count (CQ and PD).
768 765 */
769 766 status = tavor_special_qp_rsrc_alloc(state, type, port, &qpc);
770 767 if (status != DDI_SUCCESS) {
771 768 /* Set "status" and "errormsg" and goto failure */
772 769 TAVOR_TNF_FAIL(status, "failed special QP rsrc");
773 770 goto spec_qpalloc_fail3;
774 771 }
775 772
776 773 /*
777 774 * Allocate the software structure for tracking the special queue
↓ open down ↓ |
514 lines elided |
↑ open up ↑ |
778 775 * pair (i.e. the Tavor Queue Pair handle). If we fail here, we
779 776 * must undo the reference counts and the previous resource allocation.
780 777 */
781 778 status = tavor_rsrc_alloc(state, TAVOR_QPHDL, 1, sleepflag, &rsrc);
782 779 if (status != DDI_SUCCESS) {
783 780 /* Set "status" and "errormsg" and goto failure */
784 781 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed QP handle");
785 782 goto spec_qpalloc_fail4;
786 783 }
787 784 qp = (tavor_qphdl_t)rsrc->tr_addr;
788 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
789 785
790 786 /*
791 787 * Actual QP number is a combination of the index of the QPC and
792 788 * the port number. This is because the special QP contexts must
793 789 * be allocated two-at-a-time.
794 790 */
795 791 qp->qp_qpnum = qpc->tr_indx + port;
796 792
797 793 /*
798 794 * Calculate the appropriate size for the work queues.
799 795 * Note: All Tavor QP work queues must be a power-of-2 in size. Also
800 796 * they may not be any smaller than TAVOR_QP_MIN_SIZE. This step is
801 797 * to round the requested size up to the next highest power-of-2
802 798 */
803 799 attr_p->qp_sizes.cs_sq = max(attr_p->qp_sizes.cs_sq, TAVOR_QP_MIN_SIZE);
804 800 attr_p->qp_sizes.cs_rq = max(attr_p->qp_sizes.cs_rq, TAVOR_QP_MIN_SIZE);
805 801 log_qp_sq_size = highbit(attr_p->qp_sizes.cs_sq);
806 802 if (ISP2(attr_p->qp_sizes.cs_sq)) {
807 803 log_qp_sq_size = log_qp_sq_size - 1;
808 804 }
809 805 log_qp_rq_size = highbit(attr_p->qp_sizes.cs_rq);
810 806 if (ISP2(attr_p->qp_sizes.cs_rq)) {
811 807 log_qp_rq_size = log_qp_rq_size - 1;
812 808 }
813 809
814 810 /*
815 811 * Next we verify that the rounded-up size is valid (i.e. consistent
816 812 * with the device limits and/or software-configured limits). If not,
817 813 * then obviously we have a bit of cleanup to do before returning.
818 814 */
819 815 if ((log_qp_sq_size > state->ts_cfg_profile->cp_log_max_qp_sz) ||
820 816 (log_qp_rq_size > state->ts_cfg_profile->cp_log_max_qp_sz)) {
821 817 /* Set "status" and "errormsg" and goto failure */
822 818 TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max QP size");
823 819 goto spec_qpalloc_fail5;
824 820 }
825 821
826 822 /*
827 823 * Next we verify that the requested number of SGL is valid (i.e.
828 824 * consistent with the device limits and/or software-configured
829 825 * limits). If not, then obviously the same cleanup needs to be done.
830 826 */
831 827 max_sgl = state->ts_cfg_profile->cp_wqe_real_max_sgl;
832 828 if ((attr_p->qp_sizes.cs_sq_sgl > max_sgl) ||
833 829 (attr_p->qp_sizes.cs_rq_sgl > max_sgl)) {
834 830 /* Set "status" and "errormsg" and goto failure */
835 831 TAVOR_TNF_FAIL(IBT_HCA_SGL_EXCEEDED, "max QP SGL");
836 832 goto spec_qpalloc_fail5;
837 833 }
838 834
839 835 /*
840 836 * Determine this QP's WQE sizes (for both the Send and Recv WQEs).
841 837 * This will depend on the requested number of SGLs. Note: this
842 838 * has the side-effect of also calculating the real number of SGLs
843 839 * (for the calculated WQE size).
844 840 */
845 841 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_rq_sgl,
846 842 TAVOR_QP_WQ_TYPE_RECVQ, &qp->qp_rq_log_wqesz, &qp->qp_rq_sgl);
847 843 if (type == IBT_SMI_SQP) {
848 844 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
849 845 TAVOR_QP_WQ_TYPE_SENDMLX_QP0, &qp->qp_sq_log_wqesz,
850 846 &qp->qp_sq_sgl);
851 847 } else {
852 848 tavor_qp_sgl_to_logwqesz(state, attr_p->qp_sizes.cs_sq_sgl,
853 849 TAVOR_QP_WQ_TYPE_SENDMLX_QP1, &qp->qp_sq_log_wqesz,
854 850 &qp->qp_sq_sgl);
855 851 }
856 852
857 853 /*
858 854 * Allocate the memory for QP work queues. Note: The location from
859 855 * which we will allocate these work queues has been passed in
860 856 * through the tavor_qp_options_t structure. Since Tavor work queues
861 857 * are not allowed to cross a 32-bit (4GB) boundary, the alignment of
862 858 * the work queue memory is very important. We used to allocate
863 859 * work queues (the combined receive and send queues) so that they
864 860 * would be aligned on their combined size. That alignment guaranteed
865 861 * that they would never cross the 4GB boundary (Tavor work queues
866 862 * are on the order of MBs at maximum). Now we are able to relax
867 863 * this alignment constraint by ensuring that the IB address assigned
868 864 * to the queue memory (as a result of the tavor_mr_register() call)
869 865 * is offset from zero.
870 866 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
871 867 * guarantee the alignment, but when attempting to use IOMMU bypass
872 868 * mode we found that we were not allowed to specify any alignment
873 869 * that was more restrictive than the system page size.
874 870 * So we avoided this constraint by passing two alignment values,
875 871 * one for the memory allocation itself and the other for the DMA
876 872 * handle (for later bind). This used to cause more memory than
877 873 * necessary to be allocated (in order to guarantee the more
878 874 * restrictive alignment contraint). But be guaranteeing the
879 875 * zero-based IB virtual address for the queue, we are able to
880 876 * conserve this memory.
881 877 */
882 878 sq_wqe_size = 1 << qp->qp_sq_log_wqesz;
883 879 rq_wqe_size = 1 << qp->qp_rq_log_wqesz;
884 880 sq_size = (1 << log_qp_sq_size) * sq_wqe_size;
885 881 rq_size = (1 << log_qp_rq_size) * rq_wqe_size;
886 882 qp->qp_wqinfo.qa_size = sq_size + rq_size;
887 883 qp->qp_wqinfo.qa_alloc_align = max(sq_wqe_size, rq_wqe_size);
888 884 qp->qp_wqinfo.qa_bind_align = max(sq_wqe_size, rq_wqe_size);
889 885 qp->qp_wqinfo.qa_location = wq_location;
890 886 status = tavor_queue_alloc(state, &qp->qp_wqinfo, sleepflag);
891 887 if (status != NULL) {
892 888 /* Set "status" and "errormsg" and goto failure */
893 889 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed work queue");
894 890 goto spec_qpalloc_fail5;
895 891 }
896 892 if (sq_wqe_size > rq_wqe_size) {
897 893 sq_buf = qp->qp_wqinfo.qa_buf_aligned;
898 894 rq_buf = (uint32_t *)((uintptr_t)sq_buf + sq_size);
899 895 } else {
900 896 rq_buf = qp->qp_wqinfo.qa_buf_aligned;
901 897 sq_buf = (uint32_t *)((uintptr_t)rq_buf + rq_size);
902 898 }
903 899
904 900 /*
905 901 * Register the memory for the special QP work queues. The memory for
906 902 * the special QP must be registered in the Tavor TPT tables. This
907 903 * gives us the LKey to specify in the QP context later. Note: The
908 904 * memory for Tavor work queues (both Send and Recv) must be contiguous
909 905 * and registered as a single memory region. Note also: If the work
910 906 * queue is to be allocated from DDR memory, then only a "bypass"
911 907 * mapping is appropriate.
912 908 * Also, in order to meet the alignment restriction, we pass the
913 909 * "mro_bind_override_addr" flag in the call to tavor_mr_register().
914 910 * This guarantees that the resulting IB vaddr will be zero-based
915 911 * (modulo the offset into the first page).
916 912 * If we fail here, we have a bunch of resource and reference count
917 913 * cleanup to do.
918 914 */
919 915 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP :
920 916 IBT_MR_NOSLEEP;
921 917 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
922 918 mr_attr.mr_len = qp->qp_wqinfo.qa_size;
923 919 mr_attr.mr_as = NULL;
924 920 mr_attr.mr_flags = flag;
925 921 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
926 922 mr_op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass;
927 923
928 924 dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent;
929 925 if (dma_xfer_mode == DDI_DMA_STREAMING) {
930 926 mr_attr.mr_flags |= IBT_MR_NONCOHERENT;
931 927 }
932 928 } else {
933 929 mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS;
934 930 }
935 931 mr_op.mro_bind_dmahdl = qp->qp_wqinfo.qa_dmahdl;
936 932 mr_op.mro_bind_override_addr = 1;
937 933 status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op);
938 934 if (status != DDI_SUCCESS) {
939 935 /* Set "status" and "errormsg" and goto failure */
940 936 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
941 937 goto spec_qpalloc_fail6;
942 938 }
943 939
944 940 /*
945 941 * Calculate the offset between the kernel virtual address space
946 942 * and the IB virtual address space. This will be used when
947 943 * posting work requests to properly initialize each WQE.
948 944 */
949 945 qp_desc_off = (uint64_t)(uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
950 946 (uint64_t)mr->mr_bindinfo.bi_addr;
951 947
952 948 /*
953 949 * Fill in all the return arguments (if necessary). This includes
954 950 * real work queue sizes, real SGLs, and QP number (which will be
955 951 * either zero or one, depending on the special QP type)
956 952 */
957 953 if (queuesz_p != NULL) {
958 954 queuesz_p->cs_sq = (1 << log_qp_sq_size);
959 955 queuesz_p->cs_sq_sgl = qp->qp_sq_sgl;
960 956 queuesz_p->cs_rq = (1 << log_qp_rq_size);
961 957 queuesz_p->cs_rq_sgl = qp->qp_rq_sgl;
962 958 }
963 959
964 960 /*
965 961 * Fill in the rest of the Tavor Queue Pair handle. We can update
966 962 * the following fields for use in further operations on the QP.
967 963 */
968 964 qp->qp_qpcrsrcp = qpc;
969 965 qp->qp_rsrcp = rsrc;
970 966 qp->qp_state = TAVOR_QP_RESET;
971 967 qp->qp_pdhdl = pd;
972 968 qp->qp_mrhdl = mr;
973 969 qp->qp_sq_sigtype = (attr_p->qp_flags & IBT_WR_SIGNALED) ?
974 970 TAVOR_QP_SQ_WR_SIGNALED : TAVOR_QP_SQ_ALL_SIGNALED;
975 971 qp->qp_is_special = (type == IBT_SMI_SQP) ?
976 972 TAVOR_QP_SMI : TAVOR_QP_GSI;
977 973 qp->qp_is_umap = 0;
978 974 qp->qp_uarpg = 0;
979 975 qp->qp_sq_cqhdl = sq_cq;
980 976 qp->qp_sq_lastwqeaddr = NULL;
981 977 qp->qp_sq_bufsz = (1 << log_qp_sq_size);
982 978 qp->qp_sq_buf = sq_buf;
983 979 qp->qp_desc_off = qp_desc_off;
984 980 qp->qp_rq_cqhdl = rq_cq;
985 981 qp->qp_rq_lastwqeaddr = NULL;
986 982 qp->qp_rq_bufsz = (1 << log_qp_rq_size);
987 983 qp->qp_rq_buf = rq_buf;
988 984 qp->qp_portnum = port;
989 985 qp->qp_pkeyindx = 0;
990 986 qp->qp_hdlrarg = (void *)ibt_qphdl;
991 987 qp->qp_mcg_refcnt = 0;
992 988 qp->qp_srq_en = 0;
993 989 qp->qp_srqhdl = NULL;
994 990
995 991 /* Determine if later ddi_dma_sync will be necessary */
996 992 qp->qp_sync = TAVOR_QP_IS_SYNC_REQ(state, qp->qp_wqinfo);
997 993
998 994 /* All special QPs are UD QP service type */
999 995 qp->qp_serv_type = TAVOR_QP_UD;
1000 996
1001 997 /* Zero out the QP context */
1002 998 bzero(&qp->qpc, sizeof (tavor_hw_qpc_t));
1003 999
1004 1000 /*
1005 1001 * Put QP handle in Tavor QPNum-to-QPHdl list. Then fill in the
1006 1002 * "qphdl" and return success
1007 1003 */
1008 1004 ASSERT(state->ts_qphdl[qpc->tr_indx + port] == NULL);
1009 1005 state->ts_qphdl[qpc->tr_indx + port] = qp;
1010 1006
1011 1007 *qphdl = qp;
1012 1008
1013 1009 TAVOR_TNF_EXIT(tavor_special_qp_alloc);
1014 1010 return (DDI_SUCCESS);
1015 1011
1016 1012 /*
1017 1013 * The following is cleanup for all possible failure cases in this routine
1018 1014 */
1019 1015 spec_qpalloc_fail6:
1020 1016 tavor_queue_free(state, &qp->qp_wqinfo);
1021 1017 spec_qpalloc_fail5:
1022 1018 tavor_rsrc_free(state, &rsrc);
1023 1019 spec_qpalloc_fail4:
1024 1020 if (tavor_special_qp_rsrc_free(state, type, port) != DDI_SUCCESS) {
1025 1021 TAVOR_WARNING(state, "failed to free special QP rsrc");
1026 1022 }
1027 1023 spec_qpalloc_fail3:
1028 1024 tavor_cq_refcnt_dec(rq_cq);
1029 1025 spec_qpalloc_fail2:
1030 1026 tavor_cq_refcnt_dec(sq_cq);
1031 1027 spec_qpalloc_fail1:
1032 1028 tavor_pd_refcnt_dec(pd);
1033 1029 spec_qpalloc_fail:
1034 1030 TNF_PROBE_1(tavor_special_qp_alloc_fail, TAVOR_TNF_ERROR, "",
1035 1031 tnf_string, msg, errormsg);
1036 1032 TAVOR_TNF_EXIT(tavor_special_qp_alloc);
1037 1033 return (status);
1038 1034 }
1039 1035
1040 1036
1041 1037 /*
1042 1038 * tavor_qp_free()
1043 1039 * This function frees up the QP resources. Depending on the value
1044 1040 * of the "free_qp_flags", the QP number may not be released until
1045 1041 * a subsequent call to tavor_qp_release_qpn().
1046 1042 *
1047 1043 * Context: Can be called only from user or kernel context.
1048 1044 */
1049 1045 /* ARGSUSED */
1050 1046 int
1051 1047 tavor_qp_free(tavor_state_t *state, tavor_qphdl_t *qphdl,
1052 1048 ibc_free_qp_flags_t free_qp_flags, ibc_qpn_hdl_t *qpnh,
1053 1049 uint_t sleepflag)
1054 1050 {
1055 1051 tavor_rsrc_t *qpc, *rdb, *rsrc;
1056 1052 tavor_umap_db_entry_t *umapdb;
1057 1053 tavor_qpn_entry_t *entry;
1058 1054 tavor_pdhdl_t pd;
1059 1055 tavor_mrhdl_t mr;
1060 1056 tavor_cqhdl_t sq_cq, rq_cq;
1061 1057 tavor_srqhdl_t srq;
1062 1058 tavor_qphdl_t qp;
1063 1059 uint64_t value;
1064 1060 uint_t type, port;
1065 1061 uint_t maxprot;
1066 1062 uint_t qp_srq_en;
1067 1063 int status;
1068 1064 char *errormsg;
1069 1065
1070 1066 TAVOR_TNF_ENTER(tavor_qp_free);
1071 1067
1072 1068 /*
1073 1069 * Pull all the necessary information from the Tavor Queue Pair
1074 1070 * handle. This is necessary here because the resource for the
1075 1071 * QP handle is going to be freed up as part of this operation.
1076 1072 */
1077 1073 qp = *qphdl;
1078 1074 mutex_enter(&qp->qp_lock);
1079 1075 qpc = qp->qp_qpcrsrcp;
1080 1076 rsrc = qp->qp_rsrcp;
1081 1077 pd = qp->qp_pdhdl;
1082 1078 srq = qp->qp_srqhdl;
1083 1079 mr = qp->qp_mrhdl;
1084 1080 rq_cq = qp->qp_rq_cqhdl;
1085 1081 sq_cq = qp->qp_sq_cqhdl;
1086 1082 rdb = qp->qp_rdbrsrcp;
1087 1083 port = qp->qp_portnum;
1088 1084 qp_srq_en = qp->qp_srq_en;
1089 1085
1090 1086 /*
1091 1087 * If the QP is part of an MCG, then we fail the qp_free
1092 1088 */
1093 1089 if (qp->qp_mcg_refcnt != 0) {
1094 1090 mutex_exit(&qp->qp_lock);
1095 1091 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "QP part of MCG on free");
1096 1092 goto qpfree_fail;
1097 1093 }
1098 1094
1099 1095 /*
1100 1096 * If the QP is not already in "Reset" state, then transition to
1101 1097 * "Reset". This is necessary because software does not reclaim
1102 1098 * ownership of the QP context until the QP is in the "Reset" state.
1103 1099 * If the ownership transfer fails for any reason, then it is an
1104 1100 * indication that something (either in HW or SW) has gone seriously
1105 1101 * wrong. So we print a warning message and return.
1106 1102 */
1107 1103 if (qp->qp_state != TAVOR_QP_RESET) {
1108 1104 if (tavor_qp_to_reset(state, qp) != DDI_SUCCESS) {
1109 1105 mutex_exit(&qp->qp_lock);
1110 1106 TAVOR_WARNING(state, "failed to reset QP context");
1111 1107 /* Set "status" and "errormsg" and goto failure */
1112 1108 TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
1113 1109 "reset QP context");
1114 1110 goto qpfree_fail;
1115 1111 }
1116 1112 qp->qp_state = TAVOR_QP_RESET;
1117 1113
1118 1114 /*
1119 1115 * Do any additional handling necessary for the transition
1120 1116 * to the "Reset" state (e.g. update the WRID lists)
1121 1117 */
1122 1118 tavor_wrid_to_reset_handling(state, qp);
1123 1119 }
1124 1120
1125 1121 /*
1126 1122 * If this was a user-mappable QP, then we need to remove its entry
1127 1123 * from the "userland resources database". If it is also currently
1128 1124 * mmap()'d out to a user process, then we need to call
1129 1125 * devmap_devmem_remap() to remap the QP memory to an invalid mapping.
1130 1126 * We also need to invalidate the QP tracking information for the
1131 1127 * user mapping.
1132 1128 */
1133 1129 if (qp->qp_is_umap) {
1134 1130 status = tavor_umap_db_find(state->ts_instance, qp->qp_qpnum,
1135 1131 MLNX_UMAP_QPMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE,
1136 1132 &umapdb);
1137 1133 if (status != DDI_SUCCESS) {
1138 1134 mutex_exit(&qp->qp_lock);
1139 1135 TAVOR_WARNING(state, "failed to find in database");
1140 1136 TAVOR_TNF_EXIT(tavor_qp_free);
1141 1137 return (ibc_get_ci_failure(0));
1142 1138 }
1143 1139 tavor_umap_db_free(umapdb);
1144 1140 if (qp->qp_umap_dhp != NULL) {
1145 1141 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
1146 1142 status = devmap_devmem_remap(qp->qp_umap_dhp,
1147 1143 state->ts_dip, 0, 0, qp->qp_wqinfo.qa_size,
1148 1144 maxprot, DEVMAP_MAPPING_INVALID, NULL);
1149 1145 if (status != DDI_SUCCESS) {
1150 1146 mutex_exit(&qp->qp_lock);
1151 1147 TAVOR_WARNING(state, "failed in QP memory "
1152 1148 "devmap_devmem_remap()");
1153 1149 TAVOR_TNF_EXIT(tavor_qp_free);
1154 1150 return (ibc_get_ci_failure(0));
1155 1151 }
1156 1152 qp->qp_umap_dhp = (devmap_cookie_t)NULL;
1157 1153 }
1158 1154 }
1159 1155
1160 1156 /*
1161 1157 * Put NULL into the Tavor QPNum-to-QPHdl list. This will allow any
1162 1158 * in-progress events to detect that the QP corresponding to this
1163 1159 * number has been freed. Note: it does depend in whether we are
1164 1160 * freeing a special QP or not.
1165 1161 */
1166 1162 if (qp->qp_is_special) {
1167 1163 state->ts_qphdl[qpc->tr_indx + port] = NULL;
1168 1164 } else {
1169 1165 state->ts_qphdl[qpc->tr_indx] = NULL;
1170 1166 }
↓ open down ↓ |
372 lines elided |
↑ open up ↑ |
1171 1167
1172 1168 /*
1173 1169 * Drop the QP lock
1174 1170 * At this point the lock is no longer necessary. We cannot
1175 1171 * protect from multiple simultaneous calls to free the same QP.
1176 1172 * In addition, since the QP lock is contained in the QP "software
1177 1173 * handle" resource, which we will free (see below), it is
1178 1174 * important that we have no further references to that memory.
1179 1175 */
1180 1176 mutex_exit(&qp->qp_lock);
1181 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qp))
1182 1177
1183 1178 /*
1184 1179 * Free the QP resources
1185 1180 * Start by deregistering and freeing the memory for work queues.
1186 1181 * Next free any previously allocated context information
1187 1182 * (depending on QP type)
1188 1183 * Finally, decrement the necessary reference counts.
1189 1184 * If this fails for any reason, then it is an indication that
1190 1185 * something (either in HW or SW) has gone seriously wrong. So we
1191 1186 * print a warning message and return.
1192 1187 */
1193 1188 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
1194 1189 sleepflag);
1195 1190 if (status != DDI_SUCCESS) {
1196 1191 TAVOR_WARNING(state, "failed to deregister QP memory");
1197 1192 /* Set "status" and "errormsg" and goto failure */
1198 1193 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed deregister mr");
1199 1194 goto qpfree_fail;
1200 1195 }
1201 1196
1202 1197 /* Free the memory for the QP */
1203 1198 tavor_queue_free(state, &qp->qp_wqinfo);
1204 1199
1205 1200 /*
1206 1201 * Free up the remainder of the QP resources. Note: we have a few
1207 1202 * different resources to free up depending on whether the QP is a
1208 1203 * special QP or not. As described above, if any of these fail for
1209 1204 * any reason it is an indication that something (either in HW or SW)
1210 1205 * has gone seriously wrong. So we print a warning message and
1211 1206 * return.
1212 1207 */
1213 1208 if (qp->qp_is_special) {
1214 1209 type = (qp->qp_is_special == TAVOR_QP_SMI) ?
1215 1210 IBT_SMI_SQP : IBT_GSI_SQP;
1216 1211
1217 1212 /* Free up resources for the special QP */
1218 1213 status = tavor_special_qp_rsrc_free(state, type, port);
1219 1214 if (status != DDI_SUCCESS) {
1220 1215 TAVOR_WARNING(state, "failed to free special QP rsrc");
1221 1216 /* Set "status" and "errormsg" and goto failure */
1222 1217 TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
1223 1218 "failed special QP rsrc");
1224 1219 goto qpfree_fail;
1225 1220 }
1226 1221
1227 1222 } else {
1228 1223 type = qp->qp_serv_type;
1229 1224
1230 1225 /* Free up the RDB entries resource */
1231 1226 if (type == TAVOR_QP_RC) {
1232 1227 tavor_rsrc_free(state, &rdb);
1233 1228 }
1234 1229
1235 1230 /*
1236 1231 * Check the flags and determine whether to release the
1237 1232 * QPN or not, based on their value.
1238 1233 */
1239 1234 if (free_qp_flags == IBC_FREE_QP_ONLY) {
1240 1235 entry = qp->qp_qpn_hdl;
1241 1236 tavor_qp_release_qpn(state, qp->qp_qpn_hdl,
1242 1237 TAVOR_QPN_FREE_ONLY);
1243 1238 *qpnh = (ibc_qpn_hdl_t)entry;
1244 1239 } else {
1245 1240 tavor_qp_release_qpn(state, qp->qp_qpn_hdl,
1246 1241 TAVOR_QPN_RELEASE);
1247 1242 }
1248 1243 }
1249 1244
1250 1245 /* Free the Tavor Queue Pair handle */
1251 1246 tavor_rsrc_free(state, &rsrc);
1252 1247
1253 1248 /* Decrement the reference counts on CQs, PD and SRQ (if needed) */
1254 1249 tavor_cq_refcnt_dec(rq_cq);
1255 1250 tavor_cq_refcnt_dec(sq_cq);
1256 1251 tavor_pd_refcnt_dec(pd);
1257 1252 if (qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
1258 1253 tavor_srq_refcnt_dec(srq);
1259 1254 }
1260 1255
1261 1256 /* Set the qphdl pointer to NULL and return success */
1262 1257 *qphdl = NULL;
1263 1258
1264 1259 TAVOR_TNF_EXIT(tavor_qp_free);
1265 1260 return (DDI_SUCCESS);
1266 1261
1267 1262 qpfree_fail:
1268 1263 TNF_PROBE_1(tavor_qp_free_fail, TAVOR_TNF_ERROR, "",
1269 1264 tnf_string, msg, errormsg);
1270 1265 TAVOR_TNF_EXIT(tavor_qp_free);
1271 1266 return (status);
1272 1267 }
1273 1268
1274 1269
1275 1270 /*
1276 1271 * tavor_qp_query()
1277 1272 * Context: Can be called from interrupt or base context.
1278 1273 */
1279 1274 int
1280 1275 tavor_qp_query(tavor_state_t *state, tavor_qphdl_t qp,
1281 1276 ibt_qp_query_attr_t *attr_p)
1282 1277 {
1283 1278 ibt_cep_state_t qp_state;
1284 1279 ibt_qp_ud_attr_t *ud;
1285 1280 ibt_qp_rc_attr_t *rc;
1286 1281 ibt_qp_uc_attr_t *uc;
1287 1282 ibt_cep_flags_t enable_flags;
1288 1283 tavor_hw_addr_path_t *qpc_path, *qpc_alt_path;
1289 1284 ibt_cep_path_t *path_ptr, *alt_path_ptr;
1290 1285 tavor_hw_qpc_t *qpc;
1291 1286 int status;
1292 1287
1293 1288 TAVOR_TNF_ENTER(tavor_qp_query);
1294 1289
1295 1290 mutex_enter(&qp->qp_lock);
1296 1291
1297 1292 /*
1298 1293 * Grab the temporary QPC entry from QP software state
1299 1294 */
1300 1295 qpc = &qp->qpc;
1301 1296
1302 1297 /* Convert the current Tavor QP state to IBTF QP state */
1303 1298 switch (qp->qp_state) {
1304 1299 case TAVOR_QP_RESET:
1305 1300 qp_state = IBT_STATE_RESET; /* "Reset" */
1306 1301 break;
1307 1302 case TAVOR_QP_INIT:
1308 1303 qp_state = IBT_STATE_INIT; /* Initialized */
1309 1304 break;
1310 1305 case TAVOR_QP_RTR:
1311 1306 qp_state = IBT_STATE_RTR; /* Ready to Receive */
1312 1307 break;
1313 1308 case TAVOR_QP_RTS:
1314 1309 qp_state = IBT_STATE_RTS; /* Ready to Send */
1315 1310 break;
1316 1311 case TAVOR_QP_SQERR:
1317 1312 qp_state = IBT_STATE_SQE; /* Send Queue Error */
1318 1313 break;
1319 1314 case TAVOR_QP_SQD:
1320 1315 if (qp->qp_sqd_still_draining) {
1321 1316 qp_state = IBT_STATE_SQDRAIN; /* SQ Draining */
1322 1317 } else {
1323 1318 qp_state = IBT_STATE_SQD; /* SQ Drained */
1324 1319 }
1325 1320 break;
1326 1321 case TAVOR_QP_ERR:
1327 1322 qp_state = IBT_STATE_ERROR; /* Error */
1328 1323 break;
1329 1324 default:
1330 1325 mutex_exit(&qp->qp_lock);
1331 1326 TNF_PROBE_1(tavor_qp_query_inv_qpstate_fail,
1332 1327 TAVOR_TNF_ERROR, "", tnf_uint, qpstate, qp->qp_state);
1333 1328 TAVOR_TNF_EXIT(tavor_qp_query);
1334 1329 return (ibc_get_ci_failure(0));
1335 1330 }
1336 1331 attr_p->qp_info.qp_state = qp_state;
1337 1332
1338 1333 /* SRQ Hook. */
1339 1334 attr_p->qp_srq = NULL;
1340 1335
1341 1336 /*
1342 1337 * The following QP information is always returned, regardless of
1343 1338 * the current QP state. Note: Some special handling is necessary
1344 1339 * for calculating the QP number on special QP (QP0 and QP1).
1345 1340 */
1346 1341 attr_p->qp_sq_cq = qp->qp_sq_cqhdl->cq_hdlrarg;
1347 1342 attr_p->qp_rq_cq = qp->qp_rq_cqhdl->cq_hdlrarg;
1348 1343 if (qp->qp_is_special) {
1349 1344 attr_p->qp_qpn = (qp->qp_is_special == TAVOR_QP_SMI) ? 0 : 1;
1350 1345 } else {
1351 1346 attr_p->qp_qpn = (ib_qpn_t)qp->qp_qpnum;
1352 1347 }
1353 1348 attr_p->qp_sq_sgl = qp->qp_sq_sgl;
1354 1349 attr_p->qp_rq_sgl = qp->qp_rq_sgl;
1355 1350 attr_p->qp_info.qp_sq_sz = qp->qp_sq_bufsz;
1356 1351 attr_p->qp_info.qp_rq_sz = qp->qp_rq_bufsz;
1357 1352
1358 1353 /*
1359 1354 * If QP is currently in the "Reset" state, then only the above are
1360 1355 * returned
1361 1356 */
1362 1357 if (qp_state == IBT_STATE_RESET) {
1363 1358 mutex_exit(&qp->qp_lock);
1364 1359 TAVOR_TNF_EXIT(tavor_qp_query);
1365 1360 return (DDI_SUCCESS);
1366 1361 }
1367 1362
1368 1363 /*
1369 1364 * Post QUERY_QP command to firmware
1370 1365 *
1371 1366 * We do a TAVOR_NOSLEEP here because we are holding the "qp_lock".
1372 1367 * Since we may be in the interrupt context (or subsequently raised
1373 1368 * to interrupt level by priority inversion), we do not want to block
1374 1369 * in this routine waiting for success.
1375 1370 */
1376 1371 status = tavor_cmn_query_cmd_post(state, QUERY_QP, qp->qp_qpnum,
1377 1372 qpc, sizeof (tavor_hw_qpc_t), TAVOR_CMD_NOSLEEP_SPIN);
1378 1373 if (status != TAVOR_CMD_SUCCESS) {
1379 1374 mutex_exit(&qp->qp_lock);
1380 1375 cmn_err(CE_CONT, "Tavor: QUERY_QP command failed: %08x\n",
1381 1376 status);
1382 1377 TNF_PROBE_1(tavor_qp_query_cmd_fail, TAVOR_TNF_ERROR, "",
1383 1378 tnf_uint, status, status);
1384 1379 TAVOR_TNF_EXIT(tavor_qp_query);
1385 1380 return (ibc_get_ci_failure(0));
1386 1381 }
1387 1382
1388 1383 /*
1389 1384 * Fill in the additional QP info based on the QP's transport type.
1390 1385 */
1391 1386 if (qp->qp_serv_type == TAVOR_QP_UD) {
1392 1387
1393 1388 /* Fill in the UD-specific info */
1394 1389 ud = &attr_p->qp_info.qp_transport.ud;
1395 1390 ud->ud_qkey = (ib_qkey_t)qpc->qkey;
1396 1391 ud->ud_sq_psn = qpc->next_snd_psn;
1397 1392 ud->ud_pkey_ix = qpc->pri_addr_path.pkey_indx;
1398 1393 ud->ud_port = qpc->pri_addr_path.portnum;
1399 1394
1400 1395 attr_p->qp_info.qp_trans = IBT_UD_SRV;
1401 1396
1402 1397 } else if (qp->qp_serv_type == TAVOR_QP_RC) {
1403 1398
1404 1399 /* Fill in the RC-specific info */
1405 1400 rc = &attr_p->qp_info.qp_transport.rc;
1406 1401 rc->rc_sq_psn = qpc->next_snd_psn;
1407 1402 rc->rc_rq_psn = qpc->next_rcv_psn;
1408 1403 rc->rc_dst_qpn = qpc->rem_qpn;
1409 1404
1410 1405 /* Grab the path migration state information */
1411 1406 if (qpc->pm_state == TAVOR_QP_PMSTATE_MIGRATED) {
1412 1407 rc->rc_mig_state = IBT_STATE_MIGRATED;
1413 1408 } else if (qpc->pm_state == TAVOR_QP_PMSTATE_REARM) {
1414 1409 rc->rc_mig_state = IBT_STATE_REARMED;
1415 1410 } else {
1416 1411 rc->rc_mig_state = IBT_STATE_ARMED;
1417 1412 }
1418 1413 rc->rc_rdma_ra_out = (1 << qpc->sra_max);
1419 1414 rc->rc_rdma_ra_in = (1 << qpc->rra_max);
1420 1415 rc->rc_min_rnr_nak = qpc->min_rnr_nak;
1421 1416 rc->rc_path_mtu = qpc->mtu;
1422 1417 rc->rc_retry_cnt = qpc->retry_cnt;
1423 1418
1424 1419 /* Get the common primary address path fields */
1425 1420 qpc_path = &qpc->pri_addr_path;
1426 1421 path_ptr = &rc->rc_path;
1427 1422 tavor_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
1428 1423 TAVOR_ADDRPATH_QP, qp);
1429 1424
1430 1425 /* Fill in the additional primary address path fields */
1431 1426 path_ptr->cep_pkey_ix = qpc_path->pkey_indx;
1432 1427 path_ptr->cep_hca_port_num = qpc_path->portnum;
1433 1428 path_ptr->cep_timeout = qpc_path->ack_timeout;
1434 1429
1435 1430 /* Get the common alternate address path fields */
1436 1431 qpc_alt_path = &qpc->alt_addr_path;
1437 1432 alt_path_ptr = &rc->rc_alt_path;
1438 1433 tavor_get_addr_path(state, qpc_alt_path,
1439 1434 &alt_path_ptr->cep_adds_vect, TAVOR_ADDRPATH_QP, qp);
1440 1435
1441 1436 /* Fill in the additional alternate address path fields */
1442 1437 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx;
1443 1438 alt_path_ptr->cep_hca_port_num = qpc_alt_path->portnum;
1444 1439 alt_path_ptr->cep_timeout = qpc_alt_path->ack_timeout;
1445 1440
1446 1441 /* Get the RNR retry time from primary path */
1447 1442 rc->rc_rnr_retry_cnt = qpc_path->rnr_retry;
1448 1443
1449 1444 /* Set the enable flags based on RDMA/Atomic enable bits */
1450 1445 enable_flags = IBT_CEP_NO_FLAGS;
1451 1446 enable_flags |= ((qpc->rre == 0) ? 0 : IBT_CEP_RDMA_RD);
1452 1447 enable_flags |= ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
1453 1448 enable_flags |= ((qpc->rae == 0) ? 0 : IBT_CEP_ATOMIC);
1454 1449 attr_p->qp_info.qp_flags = enable_flags;
1455 1450
1456 1451 attr_p->qp_info.qp_trans = IBT_RC_SRV;
1457 1452
1458 1453 } else if (qp->qp_serv_type == TAVOR_QP_UC) {
1459 1454
1460 1455 /* Fill in the UC-specific info */
1461 1456 uc = &attr_p->qp_info.qp_transport.uc;
1462 1457 uc->uc_sq_psn = qpc->next_snd_psn;
1463 1458 uc->uc_rq_psn = qpc->next_rcv_psn;
1464 1459 uc->uc_dst_qpn = qpc->rem_qpn;
1465 1460
1466 1461 /* Grab the path migration state information */
1467 1462 if (qpc->pm_state == TAVOR_QP_PMSTATE_MIGRATED) {
1468 1463 uc->uc_mig_state = IBT_STATE_MIGRATED;
1469 1464 } else if (qpc->pm_state == TAVOR_QP_PMSTATE_REARM) {
1470 1465 uc->uc_mig_state = IBT_STATE_REARMED;
1471 1466 } else {
1472 1467 uc->uc_mig_state = IBT_STATE_ARMED;
1473 1468 }
1474 1469 uc->uc_path_mtu = qpc->mtu;
1475 1470
1476 1471 /* Get the common primary address path fields */
1477 1472 qpc_path = &qpc->pri_addr_path;
1478 1473 path_ptr = &uc->uc_path;
1479 1474 tavor_get_addr_path(state, qpc_path, &path_ptr->cep_adds_vect,
1480 1475 TAVOR_ADDRPATH_QP, qp);
1481 1476
1482 1477 /* Fill in the additional primary address path fields */
1483 1478 path_ptr->cep_pkey_ix = qpc_path->pkey_indx;
1484 1479 path_ptr->cep_hca_port_num = qpc_path->portnum;
1485 1480
1486 1481 /* Get the common alternate address path fields */
1487 1482 qpc_alt_path = &qpc->alt_addr_path;
1488 1483 alt_path_ptr = &uc->uc_alt_path;
1489 1484 tavor_get_addr_path(state, qpc_alt_path,
1490 1485 &alt_path_ptr->cep_adds_vect, TAVOR_ADDRPATH_QP, qp);
1491 1486
1492 1487 /* Fill in the additional alternate address path fields */
1493 1488 alt_path_ptr->cep_pkey_ix = qpc_alt_path->pkey_indx;
1494 1489 alt_path_ptr->cep_hca_port_num = qpc_alt_path->portnum;
1495 1490
1496 1491 /*
1497 1492 * Set the enable flags based on RDMA enable bits (by
1498 1493 * definition UC doesn't support Atomic or RDMA Read)
1499 1494 */
1500 1495 enable_flags = ((qpc->rwe == 0) ? 0 : IBT_CEP_RDMA_WR);
1501 1496 attr_p->qp_info.qp_flags = enable_flags;
1502 1497
1503 1498 attr_p->qp_info.qp_trans = IBT_UC_SRV;
1504 1499
1505 1500 } else {
1506 1501 TAVOR_WARNING(state, "unexpected QP transport type");
1507 1502 mutex_exit(&qp->qp_lock);
1508 1503 return (ibc_get_ci_failure(0));
1509 1504 }
1510 1505
1511 1506 /*
1512 1507 * Under certain circumstances it is possible for the Tavor hardware
1513 1508 * to transition to one of the error states without software directly
1514 1509 * knowing about it. The QueryQP() call is the one place where we
1515 1510 * have an opportunity to sample and update our view of the QP state.
1516 1511 */
1517 1512 if (qpc->state == TAVOR_QP_SQERR) {
1518 1513 attr_p->qp_info.qp_state = IBT_STATE_SQE;
1519 1514 qp->qp_state = TAVOR_QP_SQERR;
1520 1515 }
1521 1516 if (qpc->state == TAVOR_QP_ERR) {
1522 1517 attr_p->qp_info.qp_state = IBT_STATE_ERROR;
1523 1518 qp->qp_state = TAVOR_QP_ERR;
1524 1519 }
1525 1520 mutex_exit(&qp->qp_lock);
1526 1521
1527 1522 TAVOR_TNF_EXIT(tavor_qp_query);
1528 1523 return (DDI_SUCCESS);
1529 1524 }
1530 1525
1531 1526
1532 1527 /*
1533 1528 * tavor_qp_create_qpn()
1534 1529 * Context: Can be called from interrupt or base context.
1535 1530 */
1536 1531 static int
1537 1532 tavor_qp_create_qpn(tavor_state_t *state, tavor_qphdl_t qp, tavor_rsrc_t *qpc)
1538 1533 {
1539 1534 tavor_qpn_entry_t query;
1540 1535 tavor_qpn_entry_t *entry;
1541 1536 avl_index_t where;
1542 1537
1543 1538 TAVOR_TNF_ENTER(tavor_qp_create_qpn);
1544 1539
1545 1540 /*
1546 1541 * Build a query (for the AVL tree lookup) and attempt to find
1547 1542 * a previously added entry that has a matching QPC index. If
1548 1543 * no matching entry is found, then allocate, initialize, and
1549 1544 * add an entry to the AVL tree.
1550 1545 * If a matching entry is found, then increment its QPN counter
1551 1546 * and reference counter.
1552 1547 */
1553 1548 query.qpn_indx = qpc->tr_indx;
1554 1549 mutex_enter(&state->ts_qpn_avl_lock);
1555 1550 entry = (tavor_qpn_entry_t *)avl_find(&state->ts_qpn_avl,
1556 1551 &query, &where);
1557 1552 if (entry == NULL) {
1558 1553 /*
↓ open down ↓ |
367 lines elided |
↑ open up ↑ |
1559 1554 * Allocate and initialize a QPN entry, then insert
1560 1555 * it into the AVL tree.
1561 1556 */
1562 1557 entry = (tavor_qpn_entry_t *)kmem_zalloc(
1563 1558 sizeof (tavor_qpn_entry_t), KM_NOSLEEP);
1564 1559 if (entry == NULL) {
1565 1560 mutex_exit(&state->ts_qpn_avl_lock);
1566 1561 TAVOR_TNF_EXIT(tavor_qp_create_qpn);
1567 1562 return (DDI_FAILURE);
1568 1563 }
1569 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry))
1570 1564
1571 1565 entry->qpn_indx = qpc->tr_indx;
1572 1566 entry->qpn_refcnt = 0;
1573 1567 entry->qpn_counter = 0;
1574 1568
1575 1569 avl_insert(&state->ts_qpn_avl, entry, where);
1576 1570 }
1577 1571
1578 1572 /*
1579 1573 * Make the AVL tree entry point to the QP context resource that
1580 1574 * it will be responsible for tracking
1581 1575 */
1582 1576 entry->qpn_qpc = qpc;
1583 1577
1584 1578 /*
1585 1579 * Setup the QP handle to point to the AVL tree entry. Then
1586 1580 * generate the new QP number from the entry's QPN counter value
1587 1581 * and the hardware's QP context table index.
1588 1582 */
1589 1583 qp->qp_qpn_hdl = entry;
1590 1584 qp->qp_qpnum = ((entry->qpn_counter <<
1591 1585 state->ts_cfg_profile->cp_log_num_qp) | qpc->tr_indx) &
1592 1586 TAVOR_QP_MAXNUMBER_MSK;
1593 1587
1594 1588 /*
1595 1589 * Increment the reference counter and QPN counter. The QPN
1596 1590 * counter always indicates the next available number for use.
1597 1591 */
1598 1592 entry->qpn_counter++;
1599 1593 entry->qpn_refcnt++;
1600 1594
1601 1595 mutex_exit(&state->ts_qpn_avl_lock);
1602 1596 TAVOR_TNF_EXIT(tavor_qp_create_qpn);
1603 1597 return (DDI_SUCCESS);
1604 1598 }
1605 1599
1606 1600
1607 1601 /*
1608 1602 * tavor_qp_release_qpn()
1609 1603 * Context: Can be called only from user or kernel context.
1610 1604 */
1611 1605 void
1612 1606 tavor_qp_release_qpn(tavor_state_t *state, tavor_qpn_entry_t *entry, int flags)
1613 1607 {
1614 1608 TAVOR_TNF_ENTER(tavor_qp_release_qpn);
1615 1609
1616 1610 ASSERT(entry != NULL);
1617 1611
1618 1612 mutex_enter(&state->ts_qpn_avl_lock);
1619 1613
1620 1614 /*
1621 1615 * If we are releasing the QP number here, then we decrement the
1622 1616 * reference count and check for zero references. If there are
1623 1617 * zero references, then we free the QPC context (if it hadn't
1624 1618 * already been freed during a TAVOR_QPN_FREE_ONLY free, i.e. for
1625 1619 * reuse with another similar QP number) and remove the tracking
1626 1620 * structure from the QP number AVL tree and free the structure.
1627 1621 * If we are not releasing the QP number here, then, as long as we
1628 1622 * have not exhausted the usefulness of the QPC context (that is,
1629 1623 * re-used it too many times without the reference count having
1630 1624 * gone to zero), we free up the QPC context for use by another
1631 1625 * thread (which will use it to construct a different QP number
1632 1626 * from the same QPC table index).
1633 1627 */
1634 1628 if (flags == TAVOR_QPN_RELEASE) {
1635 1629 entry->qpn_refcnt--;
1636 1630
1637 1631 /*
1638 1632 * If the reference count is zero, then we free the QPC
1639 1633 * context (if it hadn't already been freed in an early
1640 1634 * step, e.g. TAVOR_QPN_FREE_ONLY) and remove/free the
1641 1635 * tracking structure from the QP number AVL tree.
1642 1636 */
1643 1637 if (entry->qpn_refcnt == 0) {
1644 1638 if (entry->qpn_qpc != NULL) {
1645 1639 tavor_rsrc_free(state, &entry->qpn_qpc);
1646 1640 }
1647 1641
1648 1642 /*
1649 1643 * If the current entry has served it's useful
1650 1644 * purpose (i.e. been reused the maximum allowable
1651 1645 * number of times), then remove it from QP number
1652 1646 * AVL tree and free it up.
1653 1647 */
1654 1648 if (entry->qpn_counter >= (1 <<
1655 1649 (24 - state->ts_cfg_profile->cp_log_num_qp))) {
1656 1650 avl_remove(&state->ts_qpn_avl, entry);
1657 1651 kmem_free(entry, sizeof (tavor_qpn_entry_t));
1658 1652 }
1659 1653 }
1660 1654
1661 1655 } else if (flags == TAVOR_QPN_FREE_ONLY) {
1662 1656 /*
1663 1657 * Even if we are not freeing the QP number, that will not
1664 1658 * always prevent us from releasing the QPC context. In fact,
1665 1659 * since the QPC context only forms part of the whole QPN,
1666 1660 * we want to free it up for use by other consumers. But
1667 1661 * if the reference count is non-zero (which it will always
1668 1662 * be when we are doing TAVOR_QPN_FREE_ONLY) and the counter
1669 1663 * has reached its maximum value, then we cannot reuse the
1670 1664 * QPC context until the reference count eventually reaches
1671 1665 * zero (in TAVOR_QPN_RELEASE, above).
1672 1666 */
1673 1667 if (entry->qpn_counter < (1 <<
1674 1668 (24 - state->ts_cfg_profile->cp_log_num_qp))) {
1675 1669 tavor_rsrc_free(state, &entry->qpn_qpc);
1676 1670 }
1677 1671 }
1678 1672 mutex_exit(&state->ts_qpn_avl_lock);
1679 1673
1680 1674 TAVOR_TNF_EXIT(tavor_qp_release_qpn);
1681 1675 }
1682 1676
1683 1677
1684 1678 /*
1685 1679 * tavor_qpn_db_compare()
1686 1680 * Context: Can be called from user or kernel context.
1687 1681 */
1688 1682 static int
1689 1683 tavor_qpn_avl_compare(const void *q, const void *e)
1690 1684 {
1691 1685 tavor_qpn_entry_t *entry, *query;
1692 1686
1693 1687 TAVOR_TNF_ENTER(tavor_qpn_avl_compare);
1694 1688
1695 1689 entry = (tavor_qpn_entry_t *)e;
1696 1690 query = (tavor_qpn_entry_t *)q;
1697 1691
1698 1692 if (query->qpn_indx < entry->qpn_indx) {
1699 1693 TAVOR_TNF_EXIT(tavor_qpn_avl_compare);
1700 1694 return (-1);
1701 1695 } else if (query->qpn_indx > entry->qpn_indx) {
1702 1696 TAVOR_TNF_EXIT(tavor_qpn_avl_compare);
1703 1697 return (+1);
1704 1698 } else {
1705 1699 TAVOR_TNF_EXIT(tavor_qpn_avl_compare);
1706 1700 return (0);
1707 1701 }
1708 1702 }
1709 1703
1710 1704
1711 1705 /*
1712 1706 * tavor_qpn_avl_init()
1713 1707 * Context: Only called from attach() path context
1714 1708 */
1715 1709 void
1716 1710 tavor_qpn_avl_init(tavor_state_t *state)
1717 1711 {
1718 1712 TAVOR_TNF_ENTER(tavor_qpn_avl_init);
1719 1713
1720 1714 /* Initialize the lock used for QP number (QPN) AVL tree access */
1721 1715 mutex_init(&state->ts_qpn_avl_lock, NULL, MUTEX_DRIVER,
1722 1716 DDI_INTR_PRI(state->ts_intrmsi_pri));
1723 1717
1724 1718 /* Initialize the AVL tree for the QP number (QPN) storage */
1725 1719 avl_create(&state->ts_qpn_avl, tavor_qpn_avl_compare,
1726 1720 sizeof (tavor_qpn_entry_t),
1727 1721 offsetof(tavor_qpn_entry_t, qpn_avlnode));
1728 1722
1729 1723 TAVOR_TNF_EXIT(tavor_qpn_avl_init);
1730 1724 }
1731 1725
1732 1726
1733 1727 /*
1734 1728 * tavor_qpn_avl_fini()
1735 1729 * Context: Only called from attach() and/or detach() path contexts
1736 1730 */
1737 1731 void
1738 1732 tavor_qpn_avl_fini(tavor_state_t *state)
1739 1733 {
1740 1734 tavor_qpn_entry_t *entry;
1741 1735 void *cookie;
1742 1736
1743 1737 TAVOR_TNF_ENTER(tavor_qpn_avl_fini);
1744 1738
1745 1739 /*
1746 1740 * Empty all entries (if necessary) and destroy the AVL tree
1747 1741 * that was used for QP number (QPN) tracking.
1748 1742 */
1749 1743 cookie = NULL;
1750 1744 while ((entry = (tavor_qpn_entry_t *)avl_destroy_nodes(
1751 1745 &state->ts_qpn_avl, &cookie)) != NULL) {
1752 1746 kmem_free(entry, sizeof (tavor_qpn_entry_t));
1753 1747 }
1754 1748 avl_destroy(&state->ts_qpn_avl);
1755 1749
1756 1750 /* Destroy the lock used for QP number (QPN) AVL tree access */
1757 1751 mutex_destroy(&state->ts_qpn_avl_lock);
1758 1752
1759 1753 TAVOR_TNF_EXIT(tavor_qpn_avl_fini);
1760 1754 }
1761 1755
1762 1756
1763 1757 /*
1764 1758 * tavor_qphdl_from_qpnum()
1765 1759 * Context: Can be called from interrupt or base context.
1766 1760 *
1767 1761 * This routine is important because changing the unconstrained
1768 1762 * portion of the QP number is critical to the detection of a
1769 1763 * potential race condition in the QP event handler code (i.e. the case
1770 1764 * where a QP is freed and alloc'd again before an event for the
1771 1765 * "old" QP can be handled).
1772 1766 *
1773 1767 * While this is not a perfect solution (not sure that one exists)
1774 1768 * it does help to mitigate the chance that this race condition will
1775 1769 * cause us to deliver a "stale" event to the new QP owner. Note:
1776 1770 * this solution does not scale well because the number of constrained
1777 1771 * bits increases (and, hence, the number of unconstrained bits
1778 1772 * decreases) as the number of supported QPs grows. For small and
1779 1773 * intermediate values, it should hopefully provide sufficient
1780 1774 * protection.
1781 1775 */
1782 1776 tavor_qphdl_t
1783 1777 tavor_qphdl_from_qpnum(tavor_state_t *state, uint_t qpnum)
1784 1778 {
1785 1779 uint_t qpindx, qpmask;
1786 1780
1787 1781 /* Calculate the QP table index from the qpnum */
1788 1782 qpmask = (1 << state->ts_cfg_profile->cp_log_num_qp) - 1;
1789 1783 qpindx = qpnum & qpmask;
1790 1784 return (state->ts_qphdl[qpindx]);
1791 1785 }
1792 1786
1793 1787
1794 1788 /*
1795 1789 * tavor_special_qp_rsrc_alloc
1796 1790 * Context: Can be called from interrupt or base context.
1797 1791 */
1798 1792 static int
1799 1793 tavor_special_qp_rsrc_alloc(tavor_state_t *state, ibt_sqp_type_t type,
1800 1794 uint_t port, tavor_rsrc_t **qp_rsrc)
1801 1795 {
1802 1796 uint_t mask, flags;
1803 1797 int status;
1804 1798
1805 1799 TAVOR_TNF_ENTER(tavor_special_qp_rsrc_alloc);
1806 1800
1807 1801 mutex_enter(&state->ts_spec_qplock);
1808 1802 flags = state->ts_spec_qpflags;
1809 1803 if (type == IBT_SMI_SQP) {
1810 1804 /*
1811 1805 * Check here to see if the driver has been configured
1812 1806 * to instruct the Tavor firmware to handle all incoming
1813 1807 * SMP messages (i.e. messages sent to SMA). If so,
1814 1808 * then we will treat QP0 as if it has already been
1815 1809 * allocated (for internal use). Otherwise, if we allow
1816 1810 * the allocation to happen, it will cause unexpected
1817 1811 * behaviors (e.g. Tavor SMA becomes unresponsive).
1818 1812 */
1819 1813 if (state->ts_cfg_profile->cp_qp0_agents_in_fw != 0) {
1820 1814 mutex_exit(&state->ts_spec_qplock);
1821 1815 TNF_PROBE_0(tavor_special_qp0_alloc_already_in_fw,
1822 1816 TAVOR_TNF_ERROR, "");
1823 1817 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1824 1818 return (IBT_QP_IN_USE);
1825 1819 }
1826 1820
1827 1821 /*
1828 1822 * If this is the first QP0 allocation, then post
1829 1823 * a CONF_SPECIAL_QP firmware command
1830 1824 */
1831 1825 if ((flags & TAVOR_SPECIAL_QP0_RSRC_MASK) == 0) {
1832 1826 status = tavor_conf_special_qp_cmd_post(state,
1833 1827 state->ts_spec_qp0->tr_indx, TAVOR_CMD_QP_SMI,
1834 1828 TAVOR_CMD_NOSLEEP_SPIN);
1835 1829 if (status != TAVOR_CMD_SUCCESS) {
1836 1830 mutex_exit(&state->ts_spec_qplock);
1837 1831 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1838 1832 "command failed: %08x\n", status);
1839 1833 TNF_PROBE_1(tavor_conf_special_qp_cmd_fail,
1840 1834 TAVOR_TNF_ERROR, "", tnf_uint, status,
1841 1835 status);
1842 1836 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1843 1837 return (IBT_INSUFF_RESOURCE);
1844 1838 }
1845 1839 }
1846 1840
1847 1841 /*
1848 1842 * Now check (and, if necessary, modify) the flags to indicate
1849 1843 * whether the allocation was successful
1850 1844 */
1851 1845 mask = (1 << (TAVOR_SPECIAL_QP0_RSRC + port));
1852 1846 if (flags & mask) {
1853 1847 mutex_exit(&state->ts_spec_qplock);
1854 1848 TNF_PROBE_1(tavor_ts_spec_qp0_alloc_already,
1855 1849 TAVOR_TNF_ERROR, "", tnf_uint, port, port);
1856 1850 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1857 1851 return (IBT_QP_IN_USE);
1858 1852 }
1859 1853 state->ts_spec_qpflags |= mask;
1860 1854 *qp_rsrc = state->ts_spec_qp0;
1861 1855
1862 1856 } else {
1863 1857 /*
1864 1858 * If this is the first QP1 allocation, then post
1865 1859 * a CONF_SPECIAL_QP firmware command
1866 1860 */
1867 1861 if ((flags & TAVOR_SPECIAL_QP1_RSRC_MASK) == 0) {
1868 1862 status = tavor_conf_special_qp_cmd_post(state,
1869 1863 state->ts_spec_qp1->tr_indx, TAVOR_CMD_QP_GSI,
1870 1864 TAVOR_CMD_NOSLEEP_SPIN);
1871 1865 if (status != TAVOR_CMD_SUCCESS) {
1872 1866 mutex_exit(&state->ts_spec_qplock);
1873 1867 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1874 1868 "command failed: %08x\n", status);
1875 1869 TNF_PROBE_1(tavor_conf_special_qp_cmd_fail,
1876 1870 TAVOR_TNF_ERROR, "", tnf_uint, status,
1877 1871 status);
1878 1872 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1879 1873 return (IBT_INSUFF_RESOURCE);
1880 1874 }
1881 1875 }
1882 1876
1883 1877 /*
1884 1878 * Now check (and, if necessary, modify) the flags to indicate
1885 1879 * whether the allocation was successful
1886 1880 */
1887 1881 mask = (1 << (TAVOR_SPECIAL_QP1_RSRC + port));
1888 1882 if (flags & mask) {
1889 1883 mutex_exit(&state->ts_spec_qplock);
1890 1884 TNF_PROBE_0(tavor_ts_spec_qp1_alloc_already,
1891 1885 TAVOR_TNF_ERROR, "");
1892 1886 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1893 1887 return (IBT_QP_IN_USE);
1894 1888 }
1895 1889 state->ts_spec_qpflags |= mask;
1896 1890 *qp_rsrc = state->ts_spec_qp1;
1897 1891 }
1898 1892
1899 1893 mutex_exit(&state->ts_spec_qplock);
1900 1894 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_alloc);
1901 1895 return (DDI_SUCCESS);
1902 1896 }
1903 1897
1904 1898
1905 1899 /*
1906 1900 * tavor_special_qp_rsrc_free
1907 1901 * Context: Can be called from interrupt or base context.
1908 1902 */
1909 1903 static int
1910 1904 tavor_special_qp_rsrc_free(tavor_state_t *state, ibt_sqp_type_t type,
1911 1905 uint_t port)
1912 1906 {
1913 1907 uint_t mask, flags;
1914 1908 int status;
1915 1909
1916 1910 TAVOR_TNF_ENTER(tavor_special_qp_rsrc_free);
1917 1911
1918 1912 mutex_enter(&state->ts_spec_qplock);
1919 1913 if (type == IBT_SMI_SQP) {
1920 1914 mask = (1 << (TAVOR_SPECIAL_QP0_RSRC + port));
1921 1915 state->ts_spec_qpflags &= ~mask;
1922 1916 flags = state->ts_spec_qpflags;
1923 1917
1924 1918 /*
1925 1919 * If this is the last QP0 free, then post a CONF_SPECIAL_QP
1926 1920 * firmware command
1927 1921 */
1928 1922 if ((flags & TAVOR_SPECIAL_QP0_RSRC_MASK) == 0) {
1929 1923 status = tavor_conf_special_qp_cmd_post(state, 0,
1930 1924 TAVOR_CMD_QP_SMI, TAVOR_CMD_NOSLEEP_SPIN);
1931 1925 if (status != TAVOR_CMD_SUCCESS) {
1932 1926 mutex_exit(&state->ts_spec_qplock);
1933 1927 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1934 1928 "command failed: %08x\n", status);
1935 1929 TNF_PROBE_1(tavor_conf_special_qp_cmd_fail,
1936 1930 TAVOR_TNF_ERROR, "", tnf_uint, status,
1937 1931 status);
1938 1932 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_free);
1939 1933 return (ibc_get_ci_failure(0));
1940 1934 }
1941 1935 }
1942 1936 } else {
1943 1937 mask = (1 << (TAVOR_SPECIAL_QP1_RSRC + port));
1944 1938 state->ts_spec_qpflags &= ~mask;
1945 1939 flags = state->ts_spec_qpflags;
1946 1940
1947 1941 /*
1948 1942 * If this is the last QP1 free, then post a CONF_SPECIAL_QP
1949 1943 * firmware command
1950 1944 */
1951 1945 if ((flags & TAVOR_SPECIAL_QP1_RSRC_MASK) == 0) {
1952 1946 status = tavor_conf_special_qp_cmd_post(state, 0,
1953 1947 TAVOR_CMD_QP_GSI, TAVOR_CMD_NOSLEEP_SPIN);
1954 1948 if (status != TAVOR_CMD_SUCCESS) {
1955 1949 mutex_exit(&state->ts_spec_qplock);
1956 1950 cmn_err(CE_CONT, "Tavor: CONF_SPECIAL_QP "
1957 1951 "command failed: %08x\n", status);
1958 1952 TNF_PROBE_1(tavor_conf_special_qp_cmd_fail,
1959 1953 TAVOR_TNF_ERROR, "", tnf_uint, status,
1960 1954 status);
1961 1955 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_free);
1962 1956 return (ibc_get_ci_failure(0));
1963 1957 }
1964 1958 }
1965 1959 }
1966 1960
1967 1961 mutex_exit(&state->ts_spec_qplock);
1968 1962 TAVOR_TNF_EXIT(tavor_special_qp_rsrc_free);
1969 1963 return (DDI_SUCCESS);
1970 1964 }
1971 1965
1972 1966
1973 1967 /*
1974 1968 * tavor_qp_sgl_to_logwqesz()
1975 1969 * Context: Can be called from interrupt or base context.
1976 1970 */
1977 1971 static void
1978 1972 tavor_qp_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
1979 1973 tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl)
1980 1974 {
1981 1975 uint_t max_size, log2, actual_sgl;
1982 1976
1983 1977 TAVOR_TNF_ENTER(tavor_qp_sgl_to_logwqesz);
1984 1978
1985 1979 switch (wq_type) {
1986 1980 case TAVOR_QP_WQ_TYPE_SENDQ:
1987 1981 /*
1988 1982 * Use requested maximum SGL to calculate max descriptor size
1989 1983 * (while guaranteeing that the descriptor size is a
1990 1984 * power-of-2 cachelines).
1991 1985 */
1992 1986 max_size = (TAVOR_QP_WQE_MLX_SND_HDRS + (num_sgl << 4));
1993 1987 log2 = highbit(max_size);
1994 1988 if (ISP2(max_size)) {
1995 1989 log2 = log2 - 1;
1996 1990 }
1997 1991
1998 1992 /* Make sure descriptor is at least the minimum size */
1999 1993 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
2000 1994
2001 1995 /* Calculate actual number of SGL (given WQE size) */
2002 1996 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_SND_HDRS) >> 4;
2003 1997 break;
2004 1998
2005 1999 case TAVOR_QP_WQ_TYPE_RECVQ:
2006 2000 /*
2007 2001 * Same as above (except for Recv WQEs)
2008 2002 */
2009 2003 max_size = (TAVOR_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4));
2010 2004 log2 = highbit(max_size);
2011 2005 if (ISP2(max_size)) {
2012 2006 log2 = log2 - 1;
2013 2007 }
2014 2008
2015 2009 /* Make sure descriptor is at least the minimum size */
2016 2010 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
2017 2011
2018 2012 /* Calculate actual number of SGL (given WQE size) */
2019 2013 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_RCV_HDRS) >> 4;
2020 2014 break;
2021 2015
2022 2016 case TAVOR_QP_WQ_TYPE_SENDMLX_QP0:
2023 2017 /*
2024 2018 * Same as above (except for MLX transport WQEs). For these
2025 2019 * WQEs we have to account for the space consumed by the
2026 2020 * "inline" packet headers. (This is smaller than for QP1
2027 2021 * below because QP0 is not allowed to send packets with a GRH.
2028 2022 */
2029 2023 max_size = (TAVOR_QP_WQE_MLX_QP0_HDRS + (num_sgl << 4));
2030 2024 log2 = highbit(max_size);
2031 2025 if (ISP2(max_size)) {
2032 2026 log2 = log2 - 1;
2033 2027 }
2034 2028
2035 2029 /* Make sure descriptor is at least the minimum size */
2036 2030 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
2037 2031
2038 2032 /* Calculate actual number of SGL (given WQE size) */
2039 2033 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_QP0_HDRS) >> 4;
2040 2034 break;
2041 2035
2042 2036 case TAVOR_QP_WQ_TYPE_SENDMLX_QP1:
2043 2037 /*
2044 2038 * Same as above. For these WQEs we again have to account for
2045 2039 * the space consumed by the "inline" packet headers. (This
2046 2040 * is larger than for QP0 above because we have to account for
2047 2041 * the possibility of a GRH in each packet - and this
2048 2042 * introduces an alignment issue that causes us to consume
2049 2043 * an additional 8 bytes).
2050 2044 */
2051 2045 max_size = (TAVOR_QP_WQE_MLX_QP1_HDRS + (num_sgl << 4));
2052 2046 log2 = highbit(max_size);
2053 2047 if (ISP2(max_size)) {
2054 2048 log2 = log2 - 1;
2055 2049 }
2056 2050
2057 2051 /* Make sure descriptor is at least the minimum size */
2058 2052 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
2059 2053
2060 2054 /* Calculate actual number of SGL (given WQE size) */
2061 2055 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_QP1_HDRS) >> 4;
2062 2056 break;
2063 2057
2064 2058 default:
2065 2059 TAVOR_WARNING(state, "unexpected work queue type");
2066 2060 TNF_PROBE_0(tavor_qp_sgl_to_logwqesz_inv_wqtype_fail,
2067 2061 TAVOR_TNF_ERROR, "");
2068 2062 break;
2069 2063 }
2070 2064
2071 2065 /* Fill in the return values */
2072 2066 *logwqesz = log2;
2073 2067 *max_sgl = min(state->ts_cfg_profile->cp_wqe_real_max_sgl, actual_sgl);
2074 2068
2075 2069 TAVOR_TNF_EXIT(tavor_qp_sgl_to_logwqesz);
2076 2070 }
↓ open down ↓ |
497 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX