1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * hermon_srq.c
28 * Hermon Shared Receive Queue Processing Routines
29 *
30 * Implements all the routines necessary for allocating, freeing, querying,
31 * modifying and posting shared receive queues.
32 */
33
34 #include <sys/sysmacros.h>
35 #include <sys/types.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/modctl.h>
40 #include <sys/bitmap.h>
41
42 #include <sys/ib/adapters/hermon/hermon.h>
43
44 static void hermon_srq_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
45 hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl);
46
47 /*
48 * hermon_srq_alloc()
49 * Context: Can be called only from user or kernel context.
50 */
51 int
52 hermon_srq_alloc(hermon_state_t *state, hermon_srq_info_t *srqinfo,
53 uint_t sleepflag)
54 {
55 ibt_srq_hdl_t ibt_srqhdl;
56 hermon_pdhdl_t pd;
57 ibt_srq_sizes_t *sizes;
58 ibt_srq_sizes_t *real_sizes;
59 hermon_srqhdl_t *srqhdl;
60 ibt_srq_flags_t flags;
61 hermon_rsrc_t *srqc, *rsrc;
62 hermon_hw_srqc_t srqc_entry;
63 uint32_t *buf;
64 hermon_srqhdl_t srq;
65 hermon_umap_db_entry_t *umapdb;
66 ibt_mr_attr_t mr_attr;
67 hermon_mr_options_t mr_op;
68 hermon_mrhdl_t mr;
69 uint64_t value, srq_desc_off;
70 uint32_t log_srq_size;
71 uint32_t uarpg;
72 uint_t srq_is_umap;
73 int flag, status;
74 uint_t max_sgl;
75 uint_t wqesz;
76 uint_t srq_wr_sz;
77
78 /*
79 * options-->wq_location used to be for location, now explicitly
80 * LOCATION_NORMAL
81 */
82
83 /*
84 * Extract the necessary info from the hermon_srq_info_t structure
85 */
86 real_sizes = srqinfo->srqi_real_sizes;
87 sizes = srqinfo->srqi_sizes;
88 pd = srqinfo->srqi_pd;
89 ibt_srqhdl = srqinfo->srqi_ibt_srqhdl;
90 flags = srqinfo->srqi_flags;
91 srqhdl = srqinfo->srqi_srqhdl;
92
93 /*
94 * Determine whether SRQ is being allocated for userland access or
95 * whether it is being allocated for kernel access. If the SRQ is
96 * being allocated for userland access, then lookup the UAR doorbell
97 * page number for the current process. Note: If this is not found
98 * (e.g. if the process has not previously open()'d the Hermon driver),
99 * then an error is returned.
100 */
101 srq_is_umap = (flags & IBT_SRQ_USER_MAP) ? 1 : 0;
102 if (srq_is_umap) {
103 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
104 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
105 if (status != DDI_SUCCESS) {
106 status = IBT_INVALID_PARAM;
107 goto srqalloc_fail3;
108 }
109 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
110 } else {
111 uarpg = state->hs_kernel_uar_index;
112 }
113
114 /* Increase PD refcnt */
115 hermon_pd_refcnt_inc(pd);
116
117 /* Allocate an SRQ context entry */
118 status = hermon_rsrc_alloc(state, HERMON_SRQC, 1, sleepflag, &srqc);
119 if (status != DDI_SUCCESS) {
120 status = IBT_INSUFF_RESOURCE;
121 goto srqalloc_fail1;
122 }
123
124 /* Allocate the SRQ Handle entry */
125 status = hermon_rsrc_alloc(state, HERMON_SRQHDL, 1, sleepflag, &rsrc);
126 if (status != DDI_SUCCESS) {
127 status = IBT_INSUFF_RESOURCE;
128 goto srqalloc_fail2;
129 }
130
131 srq = (hermon_srqhdl_t)rsrc->hr_addr;
132
133 bzero(srq, sizeof (struct hermon_sw_srq_s));
134 /* Calculate the SRQ number */
135
136 /* just use the index, implicit in Hermon */
137 srq->srq_srqnum = srqc->hr_indx;
138
139 /*
140 * If this will be a user-mappable SRQ, then allocate an entry for
141 * the "userland resources database". This will later be added to
142 * the database (after all further SRQ operations are successful).
143 * If we fail here, we must undo the reference counts and the
144 * previous resource allocation.
145 */
146 if (srq_is_umap) {
147 umapdb = hermon_umap_db_alloc(state->hs_instance,
148 srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC,
149 (uint64_t)(uintptr_t)rsrc);
150 if (umapdb == NULL) {
151 status = IBT_INSUFF_RESOURCE;
152 goto srqalloc_fail3;
153 }
154 }
155
156 /*
157 * Allocate the doorbell record. Hermon just needs one for the
158 * SRQ, and use uarpg (above) as the uar index
159 */
160
161 status = hermon_dbr_alloc(state, uarpg, &srq->srq_wq_dbr_acchdl,
162 &srq->srq_wq_vdbr, &srq->srq_wq_pdbr, &srq->srq_rdbr_mapoffset);
163 if (status != DDI_SUCCESS) {
164 status = IBT_INSUFF_RESOURCE;
165 goto srqalloc_fail4;
166 }
167
168 /*
169 * Calculate the appropriate size for the SRQ.
170 * Note: All Hermon SRQs must be a power-of-2 in size. Also
171 * they may not be any smaller than HERMON_SRQ_MIN_SIZE. This step
172 * is to round the requested size up to the next highest power-of-2
173 */
174 srq_wr_sz = max(sizes->srq_wr_sz + 1, HERMON_SRQ_MIN_SIZE);
175 log_srq_size = highbit(srq_wr_sz);
176 if (ISP2(srq_wr_sz)) {
177 log_srq_size = log_srq_size - 1;
178 }
179
180 /*
181 * Next we verify that the rounded-up size is valid (i.e. consistent
182 * with the device limits and/or software-configured limits). If not,
183 * then obviously we have a lot of cleanup to do before returning.
184 */
185 if (log_srq_size > state->hs_cfg_profile->cp_log_max_srq_sz) {
186 status = IBT_HCA_WR_EXCEEDED;
187 goto srqalloc_fail4a;
188 }
189
190 /*
191 * Next we verify that the requested number of SGL is valid (i.e.
192 * consistent with the device limits and/or software-configured
193 * limits). If not, then obviously the same cleanup needs to be done.
194 */
195 max_sgl = state->hs_ibtfinfo.hca_attr->hca_max_srq_sgl;
196 if (sizes->srq_sgl_sz > max_sgl) {
197 status = IBT_HCA_SGL_EXCEEDED;
198 goto srqalloc_fail4a;
199 }
200
201 /*
202 * Determine the SRQ's WQE sizes. This depends on the requested
203 * number of SGLs. Note: This also has the side-effect of
204 * calculating the real number of SGLs (for the calculated WQE size)
205 */
206 hermon_srq_sgl_to_logwqesz(state, sizes->srq_sgl_sz,
207 HERMON_QP_WQ_TYPE_RECVQ, &srq->srq_wq_log_wqesz,
208 &srq->srq_wq_sgl);
209
210 /*
211 * Allocate the memory for SRQ work queues. Note: The location from
212 * which we will allocate these work queues is always
213 * QUEUE_LOCATION_NORMAL. Since Hermon work queues are not
214 * allowed to cross a 32-bit (4GB) boundary, the alignment of the work
215 * queue memory is very important. We used to allocate work queues
216 * (the combined receive and send queues) so that they would be aligned
217 * on their combined size. That alignment guaranteed that they would
218 * never cross the 4GB boundary (Hermon work queues are on the order of
219 * MBs at maximum). Now we are able to relax this alignment constraint
220 * by ensuring that the IB address assigned to the queue memory (as a
221 * result of the hermon_mr_register() call) is offset from zero.
222 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
223 * guarantee the alignment, but when attempting to use IOMMU bypass
224 * mode we found that we were not allowed to specify any alignment that
225 * was more restrictive than the system page size. So we avoided this
226 * constraint by passing two alignment values, one for the memory
227 * allocation itself and the other for the DMA handle (for later bind).
228 * This used to cause more memory than necessary to be allocated (in
229 * order to guarantee the more restrictive alignment contraint). But
230 * be guaranteeing the zero-based IB virtual address for the queue, we
231 * are able to conserve this memory.
232 *
233 * Note: If SRQ is not user-mappable, then it may come from either
234 * kernel system memory or from HCA-attached local DDR memory.
235 *
236 * Note2: We align this queue on a pagesize boundary. This is required
237 * to make sure that all the resulting IB addresses will start at 0, for
238 * a zero-based queue. By making sure we are aligned on at least a
239 * page, any offset we use into our queue will be the same as when we
240 * perform hermon_srq_modify() operations later.
241 */
242 wqesz = (1 << srq->srq_wq_log_wqesz);
243 srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz;
244 srq->srq_wqinfo.qa_alloc_align = PAGESIZE;
245 srq->srq_wqinfo.qa_bind_align = PAGESIZE;
246 if (srq_is_umap) {
247 srq->srq_wqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
248 } else {
249 srq->srq_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
250 }
251 status = hermon_queue_alloc(state, &srq->srq_wqinfo, sleepflag);
252 if (status != DDI_SUCCESS) {
253 status = IBT_INSUFF_RESOURCE;
254 goto srqalloc_fail4a;
255 }
256 buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned;
257
258 /*
259 * Register the memory for the SRQ work queues. The memory for the SRQ
260 * must be registered in the Hermon cMPT tables. This gives us the LKey
261 * to specify in the SRQ context later. Note: If the work queue is to
262 * be allocated from DDR memory, then only a "bypass" mapping is
263 * appropriate. And if the SRQ memory is user-mappable, then we force
264 * DDI_DMA_CONSISTENT mapping. Also, in order to meet the alignment
265 * restriction, we pass the "mro_bind_override_addr" flag in the call
266 * to hermon_mr_register(). This guarantees that the resulting IB vaddr
267 * will be zero-based (modulo the offset into the first page). If we
268 * fail here, we still have the bunch of resource and reference count
269 * cleanup to do.
270 */
271 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
272 IBT_MR_NOSLEEP;
273 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
274 mr_attr.mr_len = srq->srq_wqinfo.qa_size;
275 mr_attr.mr_as = NULL;
276 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
277 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
278 mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl;
279 mr_op.mro_bind_override_addr = 1;
280 status = hermon_mr_register(state, pd, &mr_attr, &mr,
281 &mr_op, HERMON_SRQ_CMPT);
282 if (status != DDI_SUCCESS) {
283 status = IBT_INSUFF_RESOURCE;
284 goto srqalloc_fail5;
285 }
286
287 /*
288 * Calculate the offset between the kernel virtual address space
289 * and the IB virtual address space. This will be used when
290 * posting work requests to properly initialize each WQE.
291 */
292 srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
293 (uint64_t)mr->mr_bindinfo.bi_addr;
294
295 srq->srq_wq_wqhdr = hermon_wrid_wqhdr_create(1 << log_srq_size);
296
297 /*
298 * Fill in all the return arguments (if necessary). This includes
299 * real queue size and real SGLs.
300 */
301 if (real_sizes != NULL) {
302 real_sizes->srq_wr_sz = (1 << log_srq_size) - 1;
303 real_sizes->srq_sgl_sz = srq->srq_wq_sgl;
304 }
305
306 /*
307 * Fill in the SRQC entry. This is the final step before passing
308 * ownership of the SRQC entry to the Hermon hardware. We use all of
309 * the information collected/calculated above to fill in the
310 * requisite portions of the SRQC. Note: If this SRQ is going to be
311 * used for userland access, then we need to set the UAR page number
312 * appropriately (otherwise it's a "don't care")
313 */
314 bzero(&srqc_entry, sizeof (hermon_hw_srqc_t));
315 srqc_entry.state = HERMON_SRQ_STATE_HW_OWNER;
316 srqc_entry.log_srq_size = log_srq_size;
317 srqc_entry.srqn = srq->srq_srqnum;
318 srqc_entry.log_rq_stride = srq->srq_wq_log_wqesz - 4;
319 /* 16-byte chunks */
320
321 srqc_entry.page_offs = srq->srq_wqinfo.qa_pgoffs >> 6;
322 srqc_entry.log2_pgsz = mr->mr_log2_pgsz;
323 srqc_entry.mtt_base_addrh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
324 srqc_entry.mtt_base_addrl = mr->mr_mttaddr >> 3;
325 srqc_entry.pd = pd->pd_pdnum;
326 srqc_entry.dbr_addrh = (uint32_t)((uint64_t)srq->srq_wq_pdbr >> 32);
327 srqc_entry.dbr_addrl = (uint32_t)((uint64_t)srq->srq_wq_pdbr >> 2);
328
329 /*
330 * all others - specifically, xrcd, cqn_xrc, lwm, wqe_cnt, and wqe_cntr
331 * are zero thanks to the bzero of the structure
332 */
333
334 /*
335 * Write the SRQC entry to hardware. Lastly, we pass ownership of
336 * the entry to the hardware (using the Hermon SW2HW_SRQ firmware
337 * command). Note: In general, this operation shouldn't fail. But
338 * if it does, we have to undo everything we've done above before
339 * returning error.
340 */
341 status = hermon_cmn_ownership_cmd_post(state, SW2HW_SRQ, &srqc_entry,
342 sizeof (hermon_hw_srqc_t), srq->srq_srqnum,
343 sleepflag);
344 if (status != HERMON_CMD_SUCCESS) {
345 cmn_err(CE_CONT, "Hermon: SW2HW_SRQ command failed: %08x\n",
346 status);
347 if (status == HERMON_CMD_INVALID_STATUS) {
348 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
349 }
350 status = ibc_get_ci_failure(0);
351 goto srqalloc_fail8;
352 }
353
354 /*
355 * Fill in the rest of the Hermon SRQ handle. We can update
356 * the following fields for use in further operations on the SRQ.
357 */
358 srq->srq_srqcrsrcp = srqc;
359 srq->srq_rsrcp = rsrc;
360 srq->srq_mrhdl = mr;
361 srq->srq_refcnt = 0;
362 srq->srq_is_umap = srq_is_umap;
363 srq->srq_uarpg = uarpg;
364 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
365 srq->srq_pdhdl = pd;
366 srq->srq_wq_bufsz = (1 << log_srq_size);
367 srq->srq_wq_buf = buf;
368 srq->srq_desc_off = srq_desc_off;
369 srq->srq_hdlrarg = (void *)ibt_srqhdl;
370 srq->srq_state = 0;
371 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
372 srq->srq_real_sizes.srq_sgl_sz = srq->srq_wq_sgl;
373
374 /*
375 * Put SRQ handle in Hermon SRQNum-to-SRQhdl list. Then fill in the
376 * "srqhdl" and return success
377 */
378 hermon_icm_set_num_to_hdl(state, HERMON_SRQC, srqc->hr_indx, srq);
379
380 /*
381 * If this is a user-mappable SRQ, then we need to insert the
382 * previously allocated entry into the "userland resources database".
383 * This will allow for later lookup during devmap() (i.e. mmap())
384 * calls.
385 */
386 if (srq->srq_is_umap) {
387 hermon_umap_db_add(umapdb);
388 } else { /* initialize work queue for kernel SRQs */
389 int i, len, last;
390 uint16_t *desc;
391
392 desc = (uint16_t *)buf;
393 len = wqesz / sizeof (*desc);
394 last = srq->srq_wq_bufsz - 1;
395 for (i = 0; i < last; i++) {
396 desc[1] = htons(i + 1);
397 desc += len;
398 }
399 srq->srq_wq_wqhdr->wq_tail = last;
400 srq->srq_wq_wqhdr->wq_head = 0;
401 }
402
403 *srqhdl = srq;
404
405 return (status);
406
407 /*
408 * The following is cleanup for all possible failure cases in this routine
409 */
410 srqalloc_fail8:
411 hermon_wrid_wqhdr_destroy(srq->srq_wq_wqhdr);
412 srqalloc_fail7:
413 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
414 HERMON_SLEEPFLAG_FOR_CONTEXT()) != DDI_SUCCESS) {
415 HERMON_WARNING(state, "failed to deregister SRQ memory");
416 }
417 srqalloc_fail5:
418 hermon_queue_free(&srq->srq_wqinfo);
419 srqalloc_fail4a:
420 hermon_dbr_free(state, uarpg, srq->srq_wq_vdbr);
421 srqalloc_fail4:
422 if (srq_is_umap) {
423 hermon_umap_db_free(umapdb);
424 }
425 srqalloc_fail3:
426 hermon_rsrc_free(state, &rsrc);
427 srqalloc_fail2:
428 hermon_rsrc_free(state, &srqc);
429 srqalloc_fail1:
430 hermon_pd_refcnt_dec(pd);
431 srqalloc_fail:
432 return (status);
433 }
434
435
436 /*
437 * hermon_srq_free()
438 * Context: Can be called only from user or kernel context.
439 */
440 /* ARGSUSED */
441 int
442 hermon_srq_free(hermon_state_t *state, hermon_srqhdl_t *srqhdl,
443 uint_t sleepflag)
444 {
445 hermon_rsrc_t *srqc, *rsrc;
446 hermon_umap_db_entry_t *umapdb;
447 uint64_t value;
448 hermon_srqhdl_t srq;
449 hermon_mrhdl_t mr;
450 hermon_pdhdl_t pd;
451 hermon_hw_srqc_t srqc_entry;
452 uint32_t srqnum;
453 uint_t maxprot;
454 int status;
455
456 /*
457 * Pull all the necessary information from the Hermon Shared Receive
458 * Queue handle. This is necessary here because the resource for the
459 * SRQ handle is going to be freed up as part of this operation.
460 */
461 srq = *srqhdl;
462 mutex_enter(&srq->srq_lock);
463 srqc = srq->srq_srqcrsrcp;
464 rsrc = srq->srq_rsrcp;
465 pd = srq->srq_pdhdl;
466 mr = srq->srq_mrhdl;
467 srqnum = srq->srq_srqnum;
468
469 /*
470 * If there are work queues still associated with the SRQ, then return
471 * an error. Otherwise, we will be holding the SRQ lock.
472 */
473 if (srq->srq_refcnt != 0) {
474 mutex_exit(&srq->srq_lock);
475 return (IBT_SRQ_IN_USE);
476 }
477
478 /*
479 * If this was a user-mappable SRQ, then we need to remove its entry
480 * from the "userland resources database". If it is also currently
481 * mmap()'d out to a user process, then we need to call
482 * devmap_devmem_remap() to remap the SRQ memory to an invalid mapping.
483 * We also need to invalidate the SRQ tracking information for the
484 * user mapping.
485 */
486 if (srq->srq_is_umap) {
487 status = hermon_umap_db_find(state->hs_instance,
488 srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC, &value,
489 HERMON_UMAP_DB_REMOVE, &umapdb);
490 if (status != DDI_SUCCESS) {
491 mutex_exit(&srq->srq_lock);
492 HERMON_WARNING(state, "failed to find in database");
493 return (ibc_get_ci_failure(0));
494 }
495 hermon_umap_db_free(umapdb);
496 if (srq->srq_umap_dhp != NULL) {
497 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
498 status = devmap_devmem_remap(srq->srq_umap_dhp,
499 state->hs_dip, 0, 0, srq->srq_wqinfo.qa_size,
500 maxprot, DEVMAP_MAPPING_INVALID, NULL);
501 if (status != DDI_SUCCESS) {
502 mutex_exit(&srq->srq_lock);
503 HERMON_WARNING(state, "failed in SRQ memory "
504 "devmap_devmem_remap()");
505 return (ibc_get_ci_failure(0));
506 }
507 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
508 }
509 }
510
511 /*
512 * Put NULL into the Hermon SRQNum-to-SRQHdl list. This will allow any
513 * in-progress events to detect that the SRQ corresponding to this
514 * number has been freed.
515 */
516 hermon_icm_set_num_to_hdl(state, HERMON_SRQC, srqc->hr_indx, NULL);
517
518 mutex_exit(&srq->srq_lock);
519
520 /*
521 * Reclaim SRQC entry from hardware (using the Hermon HW2SW_SRQ
522 * firmware command). If the ownership transfer fails for any reason,
523 * then it is an indication that something (either in HW or SW) has
524 * gone seriously wrong.
525 */
526 status = hermon_cmn_ownership_cmd_post(state, HW2SW_SRQ, &srqc_entry,
527 sizeof (hermon_hw_srqc_t), srqnum, sleepflag);
528 if (status != HERMON_CMD_SUCCESS) {
529 HERMON_WARNING(state, "failed to reclaim SRQC ownership");
530 cmn_err(CE_CONT, "Hermon: HW2SW_SRQ command failed: %08x\n",
531 status);
532 if (status == HERMON_CMD_INVALID_STATUS) {
533 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
534 }
535 return (ibc_get_ci_failure(0));
536 }
537
538 /*
539 * Deregister the memory for the Shared Receive Queue. If this fails
540 * for any reason, then it is an indication that something (either
541 * in HW or SW) has gone seriously wrong. So we print a warning
542 * message and return.
543 */
544 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
545 sleepflag);
546 if (status != DDI_SUCCESS) {
547 HERMON_WARNING(state, "failed to deregister SRQ memory");
548 return (IBT_FAILURE);
549 }
550
551 hermon_wrid_wqhdr_destroy(srq->srq_wq_wqhdr);
552
553 /* Free the memory for the SRQ */
554 hermon_queue_free(&srq->srq_wqinfo);
555
556 /* Free the dbr */
557 hermon_dbr_free(state, srq->srq_uarpg, srq->srq_wq_vdbr);
558
559 /* Free the Hermon SRQ Handle */
560 hermon_rsrc_free(state, &rsrc);
561
562 /* Free the SRQC entry resource */
563 hermon_rsrc_free(state, &srqc);
564
565 /* Decrement the reference count on the protection domain (PD) */
566 hermon_pd_refcnt_dec(pd);
567
568 /* Set the srqhdl pointer to NULL and return success */
569 *srqhdl = NULL;
570
571 return (DDI_SUCCESS);
572 }
573
574
575 /*
576 * hermon_srq_modify()
577 * Context: Can be called only from user or kernel context.
578 */
579 int
580 hermon_srq_modify(hermon_state_t *state, hermon_srqhdl_t srq, uint_t size,
581 uint_t *real_size, uint_t sleepflag)
582 {
583 hermon_qalloc_info_t new_srqinfo, old_srqinfo;
584 hermon_rsrc_t *mtt, *old_mtt;
585 hermon_bind_info_t bind;
586 hermon_bind_info_t old_bind;
587 hermon_mrhdl_t mr;
588 hermon_hw_srqc_t srqc_entry;
589 hermon_hw_dmpt_t mpt_entry;
590 uint64_t *wre_new, *wre_old;
591 uint64_t mtt_addr;
592 uint64_t srq_pgoffs;
593 uint64_t srq_desc_off;
594 uint32_t *buf, srq_old_bufsz;
595 uint32_t wqesz;
596 uint_t max_srq_size;
597 uint_t mtt_pgsize_bits;
598 uint_t log_srq_size, maxprot;
599 int status;
600
601 if ((state->hs_devlim.mod_wr_srq == 0) ||
602 (state->hs_cfg_profile->cp_srq_resize_enabled == 0))
603 return (IBT_NOT_SUPPORTED);
604
605 /*
606 * If size requested is larger than device capability, return
607 * Insufficient Resources
608 */
609 max_srq_size = (1 << state->hs_cfg_profile->cp_log_max_srq_sz);
610 if (size > max_srq_size) {
611 return (IBT_HCA_WR_EXCEEDED);
612 }
613
614 /*
615 * Calculate the appropriate size for the SRQ.
616 * Note: All Hermon SRQs must be a power-of-2 in size. Also
617 * they may not be any smaller than HERMON_SRQ_MIN_SIZE. This step
618 * is to round the requested size up to the next highest power-of-2
619 */
620 size = max(size, HERMON_SRQ_MIN_SIZE);
621 log_srq_size = highbit(size);
622 if (ISP2(size)) {
623 log_srq_size = log_srq_size - 1;
624 }
625
626 /*
627 * Next we verify that the rounded-up size is valid (i.e. consistent
628 * with the device limits and/or software-configured limits).
629 */
630 if (log_srq_size > state->hs_cfg_profile->cp_log_max_srq_sz) {
631 status = IBT_HCA_WR_EXCEEDED;
632 goto srqmodify_fail;
633 }
634
635 /*
636 * Allocate the memory for newly resized Shared Receive Queue.
637 *
638 * Note: If SRQ is not user-mappable, then it may come from either
639 * kernel system memory or from HCA-attached local DDR memory.
640 *
641 * Note2: We align this queue on a pagesize boundary. This is required
642 * to make sure that all the resulting IB addresses will start at 0,
643 * for a zero-based queue. By making sure we are aligned on at least a
644 * page, any offset we use into our queue will be the same as it was
645 * when we allocated it at hermon_srq_alloc() time.
646 */
647 wqesz = (1 << srq->srq_wq_log_wqesz);
648 new_srqinfo.qa_size = (1 << log_srq_size) * wqesz;
649 new_srqinfo.qa_alloc_align = PAGESIZE;
650 new_srqinfo.qa_bind_align = PAGESIZE;
651 if (srq->srq_is_umap) {
652 new_srqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
653 } else {
654 new_srqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
655 }
656 status = hermon_queue_alloc(state, &new_srqinfo, sleepflag);
657 if (status != DDI_SUCCESS) {
658 status = IBT_INSUFF_RESOURCE;
659 goto srqmodify_fail;
660 }
661 buf = (uint32_t *)new_srqinfo.qa_buf_aligned;
662
663 /*
664 * Allocate the memory for the new WRE list. This will be used later
665 * when we resize the wridlist based on the new SRQ size.
666 */
667 wre_new = kmem_zalloc((1 << log_srq_size) * sizeof (uint64_t),
668 sleepflag);
669 if (wre_new == NULL) {
670 status = IBT_INSUFF_RESOURCE;
671 goto srqmodify_fail;
672 }
673
674 /*
675 * Fill in the "bind" struct. This struct provides the majority
676 * of the information that will be used to distinguish between an
677 * "addr" binding (as is the case here) and a "buf" binding (see
678 * below). The "bind" struct is later passed to hermon_mr_mem_bind()
679 * which does most of the "heavy lifting" for the Hermon memory
680 * registration routines.
681 */
682 bzero(&bind, sizeof (hermon_bind_info_t));
683 bind.bi_type = HERMON_BINDHDL_VADDR;
684 bind.bi_addr = (uint64_t)(uintptr_t)buf;
685 bind.bi_len = new_srqinfo.qa_size;
686 bind.bi_as = NULL;
687 bind.bi_flags = sleepflag == HERMON_SLEEP ? IBT_MR_SLEEP :
688 IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
689 bind.bi_bypass = state->hs_cfg_profile->cp_iommu_bypass;
690
691 status = hermon_mr_mtt_bind(state, &bind, new_srqinfo.qa_dmahdl, &mtt,
692 &mtt_pgsize_bits, 0); /* no relaxed ordering */
693 if (status != DDI_SUCCESS) {
694 status = status;
695 kmem_free(wre_new, (1 << log_srq_size) *
696 sizeof (uint64_t));
697 hermon_queue_free(&new_srqinfo);
698 goto srqmodify_fail;
699 }
700
701 /*
702 * Calculate the offset between the kernel virtual address space
703 * and the IB virtual address space. This will be used when
704 * posting work requests to properly initialize each WQE.
705 *
706 * Note: bind addr is zero-based (from alloc) so we calculate the
707 * correct new offset here.
708 */
709 bind.bi_addr = bind.bi_addr & ((1 << mtt_pgsize_bits) - 1);
710 srq_desc_off = (uint64_t)(uintptr_t)new_srqinfo.qa_buf_aligned -
711 (uint64_t)bind.bi_addr;
712 srq_pgoffs = (uint_t)
713 ((uintptr_t)new_srqinfo.qa_buf_aligned & HERMON_PAGEOFFSET);
714
715 /*
716 * Fill in the MPT entry. This is the final step before passing
717 * ownership of the MPT entry to the Hermon hardware. We use all of
718 * the information collected/calculated above to fill in the
719 * requisite portions of the MPT.
720 */
721 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
722 mpt_entry.reg_win_len = bind.bi_len;
723 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
724 mpt_entry.mtt_addr_h = mtt_addr >> 32;
725 mpt_entry.mtt_addr_l = mtt_addr >> 3;
726
727 /*
728 * for hermon we build up a new srqc and pass that (partially filled
729 * to resize SRQ instead of modifying the (d)mpt directly
730 */
731
732
733
734 /*
735 * Now we grab the SRQ lock. Since we will be updating the actual
736 * SRQ location and the producer/consumer indexes, we should hold
737 * the lock.
738 *
739 * We do a HERMON_NOSLEEP here (and below), though, because we are
740 * holding the "srq_lock" and if we got raised to interrupt level
741 * by priority inversion, we would not want to block in this routine
742 * waiting for success.
743 */
744 mutex_enter(&srq->srq_lock);
745
746 /*
747 * Copy old entries to new buffer
748 */
749 srq_old_bufsz = srq->srq_wq_bufsz;
750 bcopy(srq->srq_wq_buf, buf, srq_old_bufsz * wqesz);
751
752 /*
753 * Setup MPT information for use in the MODIFY_MPT command
754 */
755 mr = srq->srq_mrhdl;
756 mutex_enter(&mr->mr_lock);
757
758 /*
759 * now, setup the srqc information needed for resize - limit the
760 * values, but use the same structure as the srqc
761 */
762
763 srqc_entry.log_srq_size = log_srq_size;
764 srqc_entry.page_offs = srq_pgoffs >> 6;
765 srqc_entry.log2_pgsz = mr->mr_log2_pgsz;
766 srqc_entry.mtt_base_addrl = (uint64_t)mtt_addr >> 32;
767 srqc_entry.mtt_base_addrh = mtt_addr >> 3;
768
769 /*
770 * RESIZE_SRQ
771 *
772 * If this fails for any reason, then it is an indication that
773 * something (either in HW or SW) has gone seriously wrong. So we
774 * print a warning message and return.
775 */
776 status = hermon_resize_srq_cmd_post(state, &srqc_entry,
777 srq->srq_srqnum, sleepflag);
778 if (status != HERMON_CMD_SUCCESS) {
779 cmn_err(CE_CONT, "Hermon: RESIZE_SRQ command failed: %08x\n",
780 status);
781 if (status == HERMON_CMD_INVALID_STATUS) {
782 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
783 }
784 (void) hermon_mr_mtt_unbind(state, &bind, mtt);
785 kmem_free(wre_new, (1 << log_srq_size) *
786 sizeof (uint64_t));
787 hermon_queue_free(&new_srqinfo);
788 mutex_exit(&mr->mr_lock);
789 mutex_exit(&srq->srq_lock);
790 return (ibc_get_ci_failure(0));
791 }
792 /*
793 * Update the Hermon Shared Receive Queue handle with all the new
794 * information. At the same time, save away all the necessary
795 * information for freeing up the old resources
796 */
797 old_srqinfo = srq->srq_wqinfo;
798 old_mtt = srq->srq_mrhdl->mr_mttrsrcp;
799 bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind,
800 sizeof (hermon_bind_info_t));
801
802 /* Now set the new info */
803 srq->srq_wqinfo = new_srqinfo;
804 srq->srq_wq_buf = buf;
805 srq->srq_wq_bufsz = (1 << log_srq_size);
806 bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (hermon_bind_info_t));
807 srq->srq_mrhdl->mr_mttrsrcp = mtt;
808 srq->srq_desc_off = srq_desc_off;
809 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
810
811 /* Update MR mtt pagesize */
812 mr->mr_logmttpgsz = mtt_pgsize_bits;
813 mutex_exit(&mr->mr_lock);
814
815 /*
816 * Initialize new wridlist, if needed.
817 *
818 * If a wridlist already is setup on an SRQ (the QP associated with an
819 * SRQ has moved "from_reset") then we must update this wridlist based
820 * on the new SRQ size. We allocate the new size of Work Request ID
821 * Entries, copy over the old entries to the new list, and
822 * re-initialize the srq wridlist in non-umap case
823 */
824 wre_old = srq->srq_wq_wqhdr->wq_wrid;
825
826 bcopy(wre_old, wre_new, srq_old_bufsz * sizeof (uint64_t));
827
828 /* Setup new sizes in wre */
829 srq->srq_wq_wqhdr->wq_wrid = wre_new;
830
831 /*
832 * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out
833 * to a user process, then we need to call devmap_devmem_remap() to
834 * invalidate the mapping to the SRQ memory. We also need to
835 * invalidate the SRQ tracking information for the user mapping.
836 *
837 * Note: On failure, the remap really shouldn't ever happen. So, if it
838 * does, it is an indication that something has gone seriously wrong.
839 * So we print a warning message and return error (knowing, of course,
840 * that the "old" SRQ memory will be leaked)
841 */
842 if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) {
843 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
844 status = devmap_devmem_remap(srq->srq_umap_dhp,
845 state->hs_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot,
846 DEVMAP_MAPPING_INVALID, NULL);
847 if (status != DDI_SUCCESS) {
848 mutex_exit(&srq->srq_lock);
849 HERMON_WARNING(state, "failed in SRQ memory "
850 "devmap_devmem_remap()");
851 /* We can, however, free the memory for old wre */
852 kmem_free(wre_old, srq_old_bufsz * sizeof (uint64_t));
853 return (ibc_get_ci_failure(0));
854 }
855 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
856 }
857
858 /*
859 * Drop the SRQ lock now. The only thing left to do is to free up
860 * the old resources.
861 */
862 mutex_exit(&srq->srq_lock);
863
864 /*
865 * Unbind the MTT entries.
866 */
867 status = hermon_mr_mtt_unbind(state, &old_bind, old_mtt);
868 if (status != DDI_SUCCESS) {
869 HERMON_WARNING(state, "failed to unbind old SRQ memory");
870 status = ibc_get_ci_failure(0);
871 goto srqmodify_fail;
872 }
873
874 /* Free the memory for old wre */
875 kmem_free(wre_old, srq_old_bufsz * sizeof (uint64_t));
876
877 /* Free the memory for the old SRQ */
878 hermon_queue_free(&old_srqinfo);
879
880 /*
881 * Fill in the return arguments (if necessary). This includes the
882 * real new completion queue size.
883 */
884 if (real_size != NULL) {
885 *real_size = (1 << log_srq_size);
886 }
887
888 return (DDI_SUCCESS);
889
890 srqmodify_fail:
891 return (status);
892 }
893
894
895 /*
896 * hermon_srq_refcnt_inc()
897 * Context: Can be called from interrupt or base context.
898 */
899 void
900 hermon_srq_refcnt_inc(hermon_srqhdl_t srq)
901 {
902 mutex_enter(&srq->srq_lock);
903 srq->srq_refcnt++;
904 mutex_exit(&srq->srq_lock);
905 }
906
907
908 /*
909 * hermon_srq_refcnt_dec()
910 * Context: Can be called from interrupt or base context.
911 */
912 void
913 hermon_srq_refcnt_dec(hermon_srqhdl_t srq)
914 {
915 mutex_enter(&srq->srq_lock);
916 srq->srq_refcnt--;
917 mutex_exit(&srq->srq_lock);
918 }
919
920
921 /*
922 * hermon_srqhdl_from_srqnum()
923 * Context: Can be called from interrupt or base context.
924 *
925 * This routine is important because changing the unconstrained
926 * portion of the SRQ number is critical to the detection of a
927 * potential race condition in the SRQ handler code (i.e. the case
928 * where a SRQ is freed and alloc'd again before an event for the
929 * "old" SRQ can be handled).
930 *
931 * While this is not a perfect solution (not sure that one exists)
932 * it does help to mitigate the chance that this race condition will
933 * cause us to deliver a "stale" event to the new SRQ owner. Note:
934 * this solution does not scale well because the number of constrained
935 * bits increases (and, hence, the number of unconstrained bits
936 * decreases) as the number of supported SRQ grows. For small and
937 * intermediate values, it should hopefully provide sufficient
938 * protection.
939 */
940 hermon_srqhdl_t
941 hermon_srqhdl_from_srqnum(hermon_state_t *state, uint_t srqnum)
942 {
943 uint_t srqindx, srqmask;
944
945 /* Calculate the SRQ table index from the srqnum */
946 srqmask = (1 << state->hs_cfg_profile->cp_log_num_srq) - 1;
947 srqindx = srqnum & srqmask;
948 return (hermon_icm_num_to_hdl(state, HERMON_SRQC, srqindx));
949 }
950
951
952 /*
953 * hermon_srq_sgl_to_logwqesz()
954 * Context: Can be called from interrupt or base context.
955 */
956 static void
957 hermon_srq_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
958 hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl)
959 {
960 uint_t max_size, log2, actual_sgl;
961
962 switch (wq_type) {
963 case HERMON_QP_WQ_TYPE_RECVQ:
964 /*
965 * Use requested maximum SGL to calculate max descriptor size
966 * (while guaranteeing that the descriptor size is a
967 * power-of-2 cachelines).
968 */
969 max_size = (HERMON_QP_WQE_MLX_SRQ_HDRS + (num_sgl << 4));
970 log2 = highbit(max_size);
971 if (ISP2(max_size)) {
972 log2 = log2 - 1;
973 }
974
975 /* Make sure descriptor is at least the minimum size */
976 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
977
978 /* Calculate actual number of SGL (given WQE size) */
979 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_SRQ_HDRS) >> 4;
980 break;
981
982 default:
983 HERMON_WARNING(state, "unexpected work queue type");
984 break;
985 }
986
987 /* Fill in the return values */
988 *logwqesz = log2;
989 *max_sgl = min(state->hs_cfg_profile->cp_srq_max_sgl, actual_sgl);
990 }