61 ibt_srq_flags_t flags;
62 tavor_rsrc_t *srqc, *rsrc;
63 tavor_hw_srqc_t srqc_entry;
64 uint32_t *buf;
65 tavor_srqhdl_t srq;
66 tavor_umap_db_entry_t *umapdb;
67 ibt_mr_attr_t mr_attr;
68 tavor_mr_options_t mr_op;
69 tavor_mrhdl_t mr;
70 uint64_t addr;
71 uint64_t value, srq_desc_off;
72 uint32_t lkey;
73 uint32_t log_srq_size;
74 uint32_t uarpg;
75 uint_t wq_location, dma_xfer_mode, srq_is_umap;
76 int flag, status;
77 char *errormsg;
78 uint_t max_sgl;
79 uint_t wqesz;
80
81 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sizes))
82
83 TAVOR_TNF_ENTER(tavor_srq_alloc);
84
85 /*
86 * Check the "options" flag. Currently this flag tells the driver
87 * whether or not the SRQ's work queues should be come from normal
88 * system memory or whether they should be allocated from DDR memory.
89 */
90 if (op == NULL) {
91 wq_location = TAVOR_QUEUE_LOCATION_NORMAL;
92 } else {
93 wq_location = op->srqo_wq_loc;
94 }
95
96 /*
97 * Extract the necessary info from the tavor_srq_info_t structure
98 */
99 real_sizes = srqinfo->srqi_real_sizes;
100 sizes = srqinfo->srqi_sizes;
101 pd = srqinfo->srqi_pd;
102 ibt_srqhdl = srqinfo->srqi_ibt_srqhdl;
126 /* Increase PD refcnt */
127 tavor_pd_refcnt_inc(pd);
128
129 /* Allocate an SRQ context entry */
130 status = tavor_rsrc_alloc(state, TAVOR_SRQC, 1, sleepflag, &srqc);
131 if (status != DDI_SUCCESS) {
132 /* Set "status" and "errormsg" and goto failure */
133 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ context");
134 goto srqalloc_fail1;
135 }
136
137 /* Allocate the SRQ Handle entry */
138 status = tavor_rsrc_alloc(state, TAVOR_SRQHDL, 1, sleepflag, &rsrc);
139 if (status != DDI_SUCCESS) {
140 /* Set "status" and "errormsg" and goto failure */
141 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ handle");
142 goto srqalloc_fail2;
143 }
144
145 srq = (tavor_srqhdl_t)rsrc->tr_addr;
146 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq))
147
148 srq->srq_srqnum = srqc->tr_indx; /* just use index */
149
150 /*
151 * If this will be a user-mappable SRQ, then allocate an entry for
152 * the "userland resources database". This will later be added to
153 * the database (after all further SRQ operations are successful).
154 * If we fail here, we must undo the reference counts and the
155 * previous resource allocation.
156 */
157 if (srq_is_umap) {
158 umapdb = tavor_umap_db_alloc(state->ts_instance,
159 srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC,
160 (uint64_t)(uintptr_t)rsrc);
161 if (umapdb == NULL) {
162 /* Set "status" and "errormsg" and goto failure */
163 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
164 goto srqalloc_fail3;
165 }
166 }
240 * a zero-based queue. By making sure we are aligned on at least a
241 * page, any offset we use into our queue will be the same as when we
242 * perform tavor_srq_modify() operations later.
243 */
244 wqesz = (1 << srq->srq_wq_log_wqesz);
245 srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz;
246 srq->srq_wqinfo.qa_alloc_align = PAGESIZE;
247 srq->srq_wqinfo.qa_bind_align = PAGESIZE;
248 if (srq_is_umap) {
249 srq->srq_wqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;
250 } else {
251 srq->srq_wqinfo.qa_location = wq_location;
252 }
253 status = tavor_queue_alloc(state, &srq->srq_wqinfo, sleepflag);
254 if (status != DDI_SUCCESS) {
255 /* Set "status" and "errormsg" and goto failure */
256 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq");
257 goto srqalloc_fail4;
258 }
259 buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned;
260 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
261
262 /*
263 * Register the memory for the SRQ work queues. The memory for the SRQ
264 * must be registered in the Tavor TPT tables. This gives us the LKey
265 * to specify in the SRQ context later. Note: If the work queue is to
266 * be allocated from DDR memory, then only a "bypass" mapping is
267 * appropriate. And if the SRQ memory is user-mappable, then we force
268 * DDI_DMA_CONSISTENT mapping. Also, in order to meet the alignment
269 * restriction, we pass the "mro_bind_override_addr" flag in the call
270 * to tavor_mr_register(). This guarantees that the resulting IB vaddr
271 * will be zero-based (modulo the offset into the first page). If we
272 * fail here, we still have the bunch of resource and reference count
273 * cleanup to do.
274 */
275 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP :
276 IBT_MR_NOSLEEP;
277 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
278 mr_attr.mr_len = srq->srq_wqinfo.qa_size;
279 mr_attr.mr_as = NULL;
280 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
284 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
285 mr_op.mro_bind_type =
286 state->ts_cfg_profile->cp_iommu_bypass;
287 dma_xfer_mode =
288 state->ts_cfg_profile->cp_streaming_consistent;
289 if (dma_xfer_mode == DDI_DMA_STREAMING) {
290 mr_attr.mr_flags |= IBT_MR_NONCOHERENT;
291 }
292 } else {
293 mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS;
294 }
295 }
296 mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl;
297 mr_op.mro_bind_override_addr = 1;
298 status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op);
299 if (status != DDI_SUCCESS) {
300 /* Set "status" and "errormsg" and goto failure */
301 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
302 goto srqalloc_fail5;
303 }
304 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
305 addr = mr->mr_bindinfo.bi_addr;
306 lkey = mr->mr_lkey;
307
308 /*
309 * Calculate the offset between the kernel virtual address space
310 * and the IB virtual address space. This will be used when
311 * posting work requests to properly initialize each WQE.
312 */
313 srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
314 (uint64_t)mr->mr_bindinfo.bi_addr;
315
316 /*
317 * Create WQL and Wridlist for use by this SRQ
318 */
319 srq->srq_wrid_wql = tavor_wrid_wql_create(state);
320 if (srq->srq_wrid_wql == NULL) {
321 /* Set "status" and "errormsg" and goto failure */
322 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wql create");
323 goto srqalloc_fail6;
324 }
325 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wrid_wql)))
326
327 srq->srq_wridlist = tavor_wrid_get_list(1 << log_srq_size);
328 if (srq->srq_wridlist == NULL) {
329 /* Set "status" and "errormsg" and goto failure */
330 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wridlist create");
331 goto srqalloc_fail7;
332 }
333 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wridlist)))
334
335 srq->srq_wridlist->wl_srq_en = 1;
336 srq->srq_wridlist->wl_free_list_indx = -1;
337
338 /*
339 * Fill in all the return arguments (if necessary). This includes
340 * real queue size and real SGLs.
341 */
342 if (real_sizes != NULL) {
343 real_sizes->srq_wr_sz = (1 << log_srq_size);
344 real_sizes->srq_sgl_sz = srq->srq_wq_sgl;
345 }
346
347 /*
348 * Fill in the SRQC entry. This is the final step before passing
349 * ownership of the SRQC entry to the Tavor hardware. We use all of
350 * the information collected/calculated above to fill in the
351 * requisite portions of the SRQC. Note: If this SRQ is going to be
352 * used for userland access, then we need to set the UAR page number
353 * appropriately (otherwise it's a "don't care")
543 maxprot, DEVMAP_MAPPING_INVALID, NULL);
544 if (status != DDI_SUCCESS) {
545 mutex_exit(&srq->srq_lock);
546 TAVOR_WARNING(state, "failed in SRQ memory "
547 "devmap_devmem_remap()");
548 TAVOR_TNF_EXIT(tavor_srq_free);
549 return (ibc_get_ci_failure(0));
550 }
551 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
552 }
553 }
554
555 /*
556 * Put NULL into the Tavor SRQNum-to-SRQHdl list. This will allow any
557 * in-progress events to detect that the SRQ corresponding to this
558 * number has been freed.
559 */
560 state->ts_srqhdl[srqc->tr_indx] = NULL;
561
562 mutex_exit(&srq->srq_lock);
563 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq));
564 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq->srq_wridlist));
565
566 /*
567 * Reclaim SRQC entry from hardware (using the Tavor HW2SW_SRQ
568 * firmware command). If the ownership transfer fails for any reason,
569 * then it is an indication that something (either in HW or SW) has
570 * gone seriously wrong.
571 */
572 status = tavor_cmn_ownership_cmd_post(state, HW2SW_SRQ, &srqc_entry,
573 sizeof (tavor_hw_srqc_t), srqnum, sleepflag);
574 if (status != TAVOR_CMD_SUCCESS) {
575 TAVOR_WARNING(state, "failed to reclaim SRQC ownership");
576 cmn_err(CE_CONT, "Tavor: HW2SW_SRQ command failed: %08x\n",
577 status);
578 TNF_PROBE_1(tavor_srq_free_hw2sw_srq_cmd_fail,
579 TAVOR_TNF_ERROR, "", tnf_uint, status, status);
580 TAVOR_TNF_EXIT(tavor_srq_free);
581 return (IBT_FAILURE);
582 }
583
584 /*
712 * for a zero-based queue. By making sure we are aligned on at least a
713 * page, any offset we use into our queue will be the same as it was
714 * when we allocated it at tavor_srq_alloc() time.
715 */
716 wqesz = (1 << srq->srq_wq_log_wqesz);
717 new_srqinfo.qa_size = (1 << log_srq_size) * wqesz;
718 new_srqinfo.qa_alloc_align = PAGESIZE;
719 new_srqinfo.qa_bind_align = PAGESIZE;
720 if (srq->srq_is_umap) {
721 new_srqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;
722 } else {
723 new_srqinfo.qa_location = wq_location;
724 }
725 status = tavor_queue_alloc(state, &new_srqinfo, sleepflag);
726 if (status != DDI_SUCCESS) {
727 /* Set "status" and "errormsg" and goto failure */
728 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq");
729 goto srqmodify_fail;
730 }
731 buf = (uint32_t *)new_srqinfo.qa_buf_aligned;
732 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
733
734 /*
735 * Allocate the memory for the new WRE list. This will be used later
736 * when we resize the wridlist based on the new SRQ size.
737 */
738 wre_new = (tavor_wrid_entry_t *)kmem_zalloc((1 << log_srq_size) *
739 sizeof (tavor_wrid_entry_t), sleepflag);
740 if (wre_new == NULL) {
741 /* Set "status" and "errormsg" and goto failure */
742 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE,
743 "failed wre_new alloc");
744 goto srqmodify_fail;
745 }
746
747 /*
748 * Fill in the "bind" struct. This struct provides the majority
749 * of the information that will be used to distinguish between an
750 * "addr" binding (as is the case here) and a "buf" binding (see
751 * below). The "bind" struct is later passed to tavor_mr_mem_bind()
752 * which does most of the "heavy lifting" for the Tavor memory
753 * registration routines.
754 */
755 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(bind))
756 bzero(&bind, sizeof (tavor_bind_info_t));
757 bind.bi_type = TAVOR_BINDHDL_VADDR;
758 bind.bi_addr = (uint64_t)(uintptr_t)buf;
759 bind.bi_len = new_srqinfo.qa_size;
760 bind.bi_as = NULL;
761 bind.bi_flags = sleepflag == TAVOR_SLEEP ? IBT_MR_SLEEP :
762 IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
763 if (srq->srq_is_umap) {
764 bind.bi_bypass = state->ts_cfg_profile->cp_iommu_bypass;
765 } else {
766 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
767 bind.bi_bypass =
768 state->ts_cfg_profile->cp_iommu_bypass;
769 dma_xfer_mode =
770 state->ts_cfg_profile->cp_streaming_consistent;
771 if (dma_xfer_mode == DDI_DMA_STREAMING) {
772 bind.bi_flags |= IBT_MR_NONCOHERENT;
773 }
774 } else {
775 bind.bi_bypass = TAVOR_BINDMEM_BYPASS;
882 * information for freeing up the old resources
883 */
884 old_srqinfo = srq->srq_wqinfo;
885 old_mtt = srq->srq_mrhdl->mr_mttrsrcp;
886 bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind,
887 sizeof (tavor_bind_info_t));
888
889 /* Now set the new info */
890 srq->srq_wqinfo = new_srqinfo;
891 srq->srq_wq_buf = buf;
892 srq->srq_wq_bufsz = (1 << log_srq_size);
893 bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (tavor_bind_info_t));
894 srq->srq_mrhdl->mr_mttrsrcp = mtt;
895 srq->srq_desc_off = srq_desc_off;
896 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
897
898 /* Update MR mtt pagesize */
899 mr->mr_logmttpgsz = mtt_pgsize_bits;
900 mutex_exit(&mr->mr_lock);
901
902 #ifdef __lock_lint
903 mutex_enter(&srq->srq_wrid_wql->wql_lock);
904 #else
905 if (srq->srq_wrid_wql != NULL) {
906 mutex_enter(&srq->srq_wrid_wql->wql_lock);
907 }
908 #endif
909
910 /*
911 * Initialize new wridlist, if needed.
912 *
913 * If a wridlist already is setup on an SRQ (the QP associated with an
914 * SRQ has moved "from_reset") then we must update this wridlist based
915 * on the new SRQ size. We allocate the new size of Work Request ID
916 * Entries, copy over the old entries to the new list, and
917 * re-initialize the srq wridlist in non-umap case
918 */
919 wre_old = NULL;
920 if (srq->srq_wridlist != NULL) {
921 wre_old = srq->srq_wridlist->wl_wre;
922
923 bcopy(wre_old, wre_new, srq_old_bufsz *
924 sizeof (tavor_wrid_entry_t));
925
926 /* Setup new sizes in wre */
927 srq->srq_wridlist->wl_wre = wre_new;
928 srq->srq_wridlist->wl_size = srq->srq_wq_bufsz;
929
930 if (!srq->srq_is_umap) {
931 tavor_wrid_list_srq_init(srq->srq_wridlist, srq,
932 srq_old_bufsz);
933 }
934 }
935
936 #ifdef __lock_lint
937 mutex_exit(&srq->srq_wrid_wql->wql_lock);
938 #else
939 if (srq->srq_wrid_wql != NULL) {
940 mutex_exit(&srq->srq_wrid_wql->wql_lock);
941 }
942 #endif
943
944 /*
945 * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out
946 * to a user process, then we need to call devmap_devmem_remap() to
947 * invalidate the mapping to the SRQ memory. We also need to
948 * invalidate the SRQ tracking information for the user mapping.
949 *
950 * Note: On failure, the remap really shouldn't ever happen. So, if it
951 * does, it is an indication that something has gone seriously wrong.
952 * So we print a warning message and return error (knowing, of course,
953 * that the "old" SRQ memory will be leaked)
954 */
955 if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) {
956 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
957 status = devmap_devmem_remap(srq->srq_umap_dhp,
958 state->ts_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot,
959 DEVMAP_MAPPING_INVALID, NULL);
960 if (status != DDI_SUCCESS) {
961 mutex_exit(&srq->srq_lock);
962 TAVOR_WARNING(state, "failed in SRQ memory "
|
61 ibt_srq_flags_t flags;
62 tavor_rsrc_t *srqc, *rsrc;
63 tavor_hw_srqc_t srqc_entry;
64 uint32_t *buf;
65 tavor_srqhdl_t srq;
66 tavor_umap_db_entry_t *umapdb;
67 ibt_mr_attr_t mr_attr;
68 tavor_mr_options_t mr_op;
69 tavor_mrhdl_t mr;
70 uint64_t addr;
71 uint64_t value, srq_desc_off;
72 uint32_t lkey;
73 uint32_t log_srq_size;
74 uint32_t uarpg;
75 uint_t wq_location, dma_xfer_mode, srq_is_umap;
76 int flag, status;
77 char *errormsg;
78 uint_t max_sgl;
79 uint_t wqesz;
80
81 TAVOR_TNF_ENTER(tavor_srq_alloc);
82
83 /*
84 * Check the "options" flag. Currently this flag tells the driver
85 * whether or not the SRQ's work queues should be come from normal
86 * system memory or whether they should be allocated from DDR memory.
87 */
88 if (op == NULL) {
89 wq_location = TAVOR_QUEUE_LOCATION_NORMAL;
90 } else {
91 wq_location = op->srqo_wq_loc;
92 }
93
94 /*
95 * Extract the necessary info from the tavor_srq_info_t structure
96 */
97 real_sizes = srqinfo->srqi_real_sizes;
98 sizes = srqinfo->srqi_sizes;
99 pd = srqinfo->srqi_pd;
100 ibt_srqhdl = srqinfo->srqi_ibt_srqhdl;
124 /* Increase PD refcnt */
125 tavor_pd_refcnt_inc(pd);
126
127 /* Allocate an SRQ context entry */
128 status = tavor_rsrc_alloc(state, TAVOR_SRQC, 1, sleepflag, &srqc);
129 if (status != DDI_SUCCESS) {
130 /* Set "status" and "errormsg" and goto failure */
131 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ context");
132 goto srqalloc_fail1;
133 }
134
135 /* Allocate the SRQ Handle entry */
136 status = tavor_rsrc_alloc(state, TAVOR_SRQHDL, 1, sleepflag, &rsrc);
137 if (status != DDI_SUCCESS) {
138 /* Set "status" and "errormsg" and goto failure */
139 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ handle");
140 goto srqalloc_fail2;
141 }
142
143 srq = (tavor_srqhdl_t)rsrc->tr_addr;
144
145 srq->srq_srqnum = srqc->tr_indx; /* just use index */
146
147 /*
148 * If this will be a user-mappable SRQ, then allocate an entry for
149 * the "userland resources database". This will later be added to
150 * the database (after all further SRQ operations are successful).
151 * If we fail here, we must undo the reference counts and the
152 * previous resource allocation.
153 */
154 if (srq_is_umap) {
155 umapdb = tavor_umap_db_alloc(state->ts_instance,
156 srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC,
157 (uint64_t)(uintptr_t)rsrc);
158 if (umapdb == NULL) {
159 /* Set "status" and "errormsg" and goto failure */
160 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
161 goto srqalloc_fail3;
162 }
163 }
237 * a zero-based queue. By making sure we are aligned on at least a
238 * page, any offset we use into our queue will be the same as when we
239 * perform tavor_srq_modify() operations later.
240 */
241 wqesz = (1 << srq->srq_wq_log_wqesz);
242 srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz;
243 srq->srq_wqinfo.qa_alloc_align = PAGESIZE;
244 srq->srq_wqinfo.qa_bind_align = PAGESIZE;
245 if (srq_is_umap) {
246 srq->srq_wqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;
247 } else {
248 srq->srq_wqinfo.qa_location = wq_location;
249 }
250 status = tavor_queue_alloc(state, &srq->srq_wqinfo, sleepflag);
251 if (status != DDI_SUCCESS) {
252 /* Set "status" and "errormsg" and goto failure */
253 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq");
254 goto srqalloc_fail4;
255 }
256 buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned;
257
258 /*
259 * Register the memory for the SRQ work queues. The memory for the SRQ
260 * must be registered in the Tavor TPT tables. This gives us the LKey
261 * to specify in the SRQ context later. Note: If the work queue is to
262 * be allocated from DDR memory, then only a "bypass" mapping is
263 * appropriate. And if the SRQ memory is user-mappable, then we force
264 * DDI_DMA_CONSISTENT mapping. Also, in order to meet the alignment
265 * restriction, we pass the "mro_bind_override_addr" flag in the call
266 * to tavor_mr_register(). This guarantees that the resulting IB vaddr
267 * will be zero-based (modulo the offset into the first page). If we
268 * fail here, we still have the bunch of resource and reference count
269 * cleanup to do.
270 */
271 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP :
272 IBT_MR_NOSLEEP;
273 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
274 mr_attr.mr_len = srq->srq_wqinfo.qa_size;
275 mr_attr.mr_as = NULL;
276 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
280 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
281 mr_op.mro_bind_type =
282 state->ts_cfg_profile->cp_iommu_bypass;
283 dma_xfer_mode =
284 state->ts_cfg_profile->cp_streaming_consistent;
285 if (dma_xfer_mode == DDI_DMA_STREAMING) {
286 mr_attr.mr_flags |= IBT_MR_NONCOHERENT;
287 }
288 } else {
289 mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS;
290 }
291 }
292 mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl;
293 mr_op.mro_bind_override_addr = 1;
294 status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op);
295 if (status != DDI_SUCCESS) {
296 /* Set "status" and "errormsg" and goto failure */
297 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
298 goto srqalloc_fail5;
299 }
300 addr = mr->mr_bindinfo.bi_addr;
301 lkey = mr->mr_lkey;
302
303 /*
304 * Calculate the offset between the kernel virtual address space
305 * and the IB virtual address space. This will be used when
306 * posting work requests to properly initialize each WQE.
307 */
308 srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
309 (uint64_t)mr->mr_bindinfo.bi_addr;
310
311 /*
312 * Create WQL and Wridlist for use by this SRQ
313 */
314 srq->srq_wrid_wql = tavor_wrid_wql_create(state);
315 if (srq->srq_wrid_wql == NULL) {
316 /* Set "status" and "errormsg" and goto failure */
317 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wql create");
318 goto srqalloc_fail6;
319 }
320
321 srq->srq_wridlist = tavor_wrid_get_list(1 << log_srq_size);
322 if (srq->srq_wridlist == NULL) {
323 /* Set "status" and "errormsg" and goto failure */
324 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wridlist create");
325 goto srqalloc_fail7;
326 }
327
328 srq->srq_wridlist->wl_srq_en = 1;
329 srq->srq_wridlist->wl_free_list_indx = -1;
330
331 /*
332 * Fill in all the return arguments (if necessary). This includes
333 * real queue size and real SGLs.
334 */
335 if (real_sizes != NULL) {
336 real_sizes->srq_wr_sz = (1 << log_srq_size);
337 real_sizes->srq_sgl_sz = srq->srq_wq_sgl;
338 }
339
340 /*
341 * Fill in the SRQC entry. This is the final step before passing
342 * ownership of the SRQC entry to the Tavor hardware. We use all of
343 * the information collected/calculated above to fill in the
344 * requisite portions of the SRQC. Note: If this SRQ is going to be
345 * used for userland access, then we need to set the UAR page number
346 * appropriately (otherwise it's a "don't care")
536 maxprot, DEVMAP_MAPPING_INVALID, NULL);
537 if (status != DDI_SUCCESS) {
538 mutex_exit(&srq->srq_lock);
539 TAVOR_WARNING(state, "failed in SRQ memory "
540 "devmap_devmem_remap()");
541 TAVOR_TNF_EXIT(tavor_srq_free);
542 return (ibc_get_ci_failure(0));
543 }
544 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
545 }
546 }
547
548 /*
549 * Put NULL into the Tavor SRQNum-to-SRQHdl list. This will allow any
550 * in-progress events to detect that the SRQ corresponding to this
551 * number has been freed.
552 */
553 state->ts_srqhdl[srqc->tr_indx] = NULL;
554
555 mutex_exit(&srq->srq_lock);
556
557 /*
558 * Reclaim SRQC entry from hardware (using the Tavor HW2SW_SRQ
559 * firmware command). If the ownership transfer fails for any reason,
560 * then it is an indication that something (either in HW or SW) has
561 * gone seriously wrong.
562 */
563 status = tavor_cmn_ownership_cmd_post(state, HW2SW_SRQ, &srqc_entry,
564 sizeof (tavor_hw_srqc_t), srqnum, sleepflag);
565 if (status != TAVOR_CMD_SUCCESS) {
566 TAVOR_WARNING(state, "failed to reclaim SRQC ownership");
567 cmn_err(CE_CONT, "Tavor: HW2SW_SRQ command failed: %08x\n",
568 status);
569 TNF_PROBE_1(tavor_srq_free_hw2sw_srq_cmd_fail,
570 TAVOR_TNF_ERROR, "", tnf_uint, status, status);
571 TAVOR_TNF_EXIT(tavor_srq_free);
572 return (IBT_FAILURE);
573 }
574
575 /*
703 * for a zero-based queue. By making sure we are aligned on at least a
704 * page, any offset we use into our queue will be the same as it was
705 * when we allocated it at tavor_srq_alloc() time.
706 */
707 wqesz = (1 << srq->srq_wq_log_wqesz);
708 new_srqinfo.qa_size = (1 << log_srq_size) * wqesz;
709 new_srqinfo.qa_alloc_align = PAGESIZE;
710 new_srqinfo.qa_bind_align = PAGESIZE;
711 if (srq->srq_is_umap) {
712 new_srqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;
713 } else {
714 new_srqinfo.qa_location = wq_location;
715 }
716 status = tavor_queue_alloc(state, &new_srqinfo, sleepflag);
717 if (status != DDI_SUCCESS) {
718 /* Set "status" and "errormsg" and goto failure */
719 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq");
720 goto srqmodify_fail;
721 }
722 buf = (uint32_t *)new_srqinfo.qa_buf_aligned;
723
724 /*
725 * Allocate the memory for the new WRE list. This will be used later
726 * when we resize the wridlist based on the new SRQ size.
727 */
728 wre_new = (tavor_wrid_entry_t *)kmem_zalloc((1 << log_srq_size) *
729 sizeof (tavor_wrid_entry_t), sleepflag);
730 if (wre_new == NULL) {
731 /* Set "status" and "errormsg" and goto failure */
732 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE,
733 "failed wre_new alloc");
734 goto srqmodify_fail;
735 }
736
737 /*
738 * Fill in the "bind" struct. This struct provides the majority
739 * of the information that will be used to distinguish between an
740 * "addr" binding (as is the case here) and a "buf" binding (see
741 * below). The "bind" struct is later passed to tavor_mr_mem_bind()
742 * which does most of the "heavy lifting" for the Tavor memory
743 * registration routines.
744 */
745 bzero(&bind, sizeof (tavor_bind_info_t));
746 bind.bi_type = TAVOR_BINDHDL_VADDR;
747 bind.bi_addr = (uint64_t)(uintptr_t)buf;
748 bind.bi_len = new_srqinfo.qa_size;
749 bind.bi_as = NULL;
750 bind.bi_flags = sleepflag == TAVOR_SLEEP ? IBT_MR_SLEEP :
751 IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
752 if (srq->srq_is_umap) {
753 bind.bi_bypass = state->ts_cfg_profile->cp_iommu_bypass;
754 } else {
755 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
756 bind.bi_bypass =
757 state->ts_cfg_profile->cp_iommu_bypass;
758 dma_xfer_mode =
759 state->ts_cfg_profile->cp_streaming_consistent;
760 if (dma_xfer_mode == DDI_DMA_STREAMING) {
761 bind.bi_flags |= IBT_MR_NONCOHERENT;
762 }
763 } else {
764 bind.bi_bypass = TAVOR_BINDMEM_BYPASS;
871 * information for freeing up the old resources
872 */
873 old_srqinfo = srq->srq_wqinfo;
874 old_mtt = srq->srq_mrhdl->mr_mttrsrcp;
875 bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind,
876 sizeof (tavor_bind_info_t));
877
878 /* Now set the new info */
879 srq->srq_wqinfo = new_srqinfo;
880 srq->srq_wq_buf = buf;
881 srq->srq_wq_bufsz = (1 << log_srq_size);
882 bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (tavor_bind_info_t));
883 srq->srq_mrhdl->mr_mttrsrcp = mtt;
884 srq->srq_desc_off = srq_desc_off;
885 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
886
887 /* Update MR mtt pagesize */
888 mr->mr_logmttpgsz = mtt_pgsize_bits;
889 mutex_exit(&mr->mr_lock);
890
891 if (srq->srq_wrid_wql != NULL) {
892 mutex_enter(&srq->srq_wrid_wql->wql_lock);
893 }
894
895 /*
896 * Initialize new wridlist, if needed.
897 *
898 * If a wridlist already is setup on an SRQ (the QP associated with an
899 * SRQ has moved "from_reset") then we must update this wridlist based
900 * on the new SRQ size. We allocate the new size of Work Request ID
901 * Entries, copy over the old entries to the new list, and
902 * re-initialize the srq wridlist in non-umap case
903 */
904 wre_old = NULL;
905 if (srq->srq_wridlist != NULL) {
906 wre_old = srq->srq_wridlist->wl_wre;
907
908 bcopy(wre_old, wre_new, srq_old_bufsz *
909 sizeof (tavor_wrid_entry_t));
910
911 /* Setup new sizes in wre */
912 srq->srq_wridlist->wl_wre = wre_new;
913 srq->srq_wridlist->wl_size = srq->srq_wq_bufsz;
914
915 if (!srq->srq_is_umap) {
916 tavor_wrid_list_srq_init(srq->srq_wridlist, srq,
917 srq_old_bufsz);
918 }
919 }
920
921 if (srq->srq_wrid_wql != NULL) {
922 mutex_exit(&srq->srq_wrid_wql->wql_lock);
923 }
924
925 /*
926 * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out
927 * to a user process, then we need to call devmap_devmem_remap() to
928 * invalidate the mapping to the SRQ memory. We also need to
929 * invalidate the SRQ tracking information for the user mapping.
930 *
931 * Note: On failure, the remap really shouldn't ever happen. So, if it
932 * does, it is an indication that something has gone seriously wrong.
933 * So we print a warning message and return error (knowing, of course,
934 * that the "old" SRQ memory will be leaked)
935 */
936 if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) {
937 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
938 status = devmap_devmem_remap(srq->srq_umap_dhp,
939 state->ts_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot,
940 DEVMAP_MAPPING_INVALID, NULL);
941 if (status != DDI_SUCCESS) {
942 mutex_exit(&srq->srq_lock);
943 TAVOR_WARNING(state, "failed in SRQ memory "
|