Print this page
8368 remove warlock leftovers from usr/src/uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/adapters/tavor/tavor_srq.c
+++ new/usr/src/uts/common/io/ib/adapters/tavor/tavor_srq.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * tavor_srq.c
29 29 * Tavor Shared Receive Queue Processing Routines
30 30 *
31 31 * Implements all the routines necessary for allocating, freeing, querying,
32 32 * modifying and posting shared receive queues.
33 33 */
34 34
35 35 #include <sys/sysmacros.h>
36 36 #include <sys/types.h>
37 37 #include <sys/conf.h>
38 38 #include <sys/ddi.h>
39 39 #include <sys/sunddi.h>
40 40 #include <sys/modctl.h>
41 41 #include <sys/bitmap.h>
42 42
43 43 #include <sys/ib/adapters/tavor/tavor.h>
44 44
45 45 static void tavor_srq_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
46 46 tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl);
47 47
48 48 /*
49 49 * tavor_srq_alloc()
50 50 * Context: Can be called only from user or kernel context.
51 51 */
52 52 int
53 53 tavor_srq_alloc(tavor_state_t *state, tavor_srq_info_t *srqinfo,
54 54 uint_t sleepflag, tavor_srq_options_t *op)
55 55 {
56 56 ibt_srq_hdl_t ibt_srqhdl;
57 57 tavor_pdhdl_t pd;
58 58 ibt_srq_sizes_t *sizes;
59 59 ibt_srq_sizes_t *real_sizes;
60 60 tavor_srqhdl_t *srqhdl;
61 61 ibt_srq_flags_t flags;
62 62 tavor_rsrc_t *srqc, *rsrc;
63 63 tavor_hw_srqc_t srqc_entry;
64 64 uint32_t *buf;
65 65 tavor_srqhdl_t srq;
66 66 tavor_umap_db_entry_t *umapdb;
67 67 ibt_mr_attr_t mr_attr;
68 68 tavor_mr_options_t mr_op;
69 69 tavor_mrhdl_t mr;
70 70 uint64_t addr;
↓ open down ↓ |
70 lines elided |
↑ open up ↑ |
71 71 uint64_t value, srq_desc_off;
72 72 uint32_t lkey;
73 73 uint32_t log_srq_size;
74 74 uint32_t uarpg;
75 75 uint_t wq_location, dma_xfer_mode, srq_is_umap;
76 76 int flag, status;
77 77 char *errormsg;
78 78 uint_t max_sgl;
79 79 uint_t wqesz;
80 80
81 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sizes))
82 -
83 81 TAVOR_TNF_ENTER(tavor_srq_alloc);
84 82
85 83 /*
86 84 * Check the "options" flag. Currently this flag tells the driver
87 85 * whether or not the SRQ's work queues should be come from normal
88 86 * system memory or whether they should be allocated from DDR memory.
89 87 */
90 88 if (op == NULL) {
91 89 wq_location = TAVOR_QUEUE_LOCATION_NORMAL;
92 90 } else {
93 91 wq_location = op->srqo_wq_loc;
94 92 }
95 93
96 94 /*
97 95 * Extract the necessary info from the tavor_srq_info_t structure
98 96 */
99 97 real_sizes = srqinfo->srqi_real_sizes;
100 98 sizes = srqinfo->srqi_sizes;
101 99 pd = srqinfo->srqi_pd;
102 100 ibt_srqhdl = srqinfo->srqi_ibt_srqhdl;
103 101 flags = srqinfo->srqi_flags;
104 102 srqhdl = srqinfo->srqi_srqhdl;
105 103
106 104 /*
107 105 * Determine whether SRQ is being allocated for userland access or
108 106 * whether it is being allocated for kernel access. If the SRQ is
109 107 * being allocated for userland access, then lookup the UAR doorbell
110 108 * page number for the current process. Note: If this is not found
111 109 * (e.g. if the process has not previously open()'d the Tavor driver),
112 110 * then an error is returned.
113 111 */
114 112 srq_is_umap = (flags & IBT_SRQ_USER_MAP) ? 1 : 0;
115 113 if (srq_is_umap) {
116 114 status = tavor_umap_db_find(state->ts_instance, ddi_get_pid(),
117 115 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
118 116 if (status != DDI_SUCCESS) {
119 117 /* Set "status" and "errormsg" and goto failure */
120 118 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "failed UAR page");
121 119 goto srqalloc_fail3;
122 120 }
123 121 uarpg = ((tavor_rsrc_t *)(uintptr_t)value)->tr_indx;
124 122 }
125 123
126 124 /* Increase PD refcnt */
127 125 tavor_pd_refcnt_inc(pd);
128 126
129 127 /* Allocate an SRQ context entry */
130 128 status = tavor_rsrc_alloc(state, TAVOR_SRQC, 1, sleepflag, &srqc);
131 129 if (status != DDI_SUCCESS) {
132 130 /* Set "status" and "errormsg" and goto failure */
133 131 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ context");
134 132 goto srqalloc_fail1;
135 133 }
↓ open down ↓ |
43 lines elided |
↑ open up ↑ |
136 134
137 135 /* Allocate the SRQ Handle entry */
138 136 status = tavor_rsrc_alloc(state, TAVOR_SRQHDL, 1, sleepflag, &rsrc);
139 137 if (status != DDI_SUCCESS) {
140 138 /* Set "status" and "errormsg" and goto failure */
141 139 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed SRQ handle");
142 140 goto srqalloc_fail2;
143 141 }
144 142
145 143 srq = (tavor_srqhdl_t)rsrc->tr_addr;
146 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq))
147 144
148 145 srq->srq_srqnum = srqc->tr_indx; /* just use index */
149 146
150 147 /*
151 148 * If this will be a user-mappable SRQ, then allocate an entry for
152 149 * the "userland resources database". This will later be added to
153 150 * the database (after all further SRQ operations are successful).
154 151 * If we fail here, we must undo the reference counts and the
155 152 * previous resource allocation.
156 153 */
157 154 if (srq_is_umap) {
158 155 umapdb = tavor_umap_db_alloc(state->ts_instance,
159 156 srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC,
160 157 (uint64_t)(uintptr_t)rsrc);
161 158 if (umapdb == NULL) {
162 159 /* Set "status" and "errormsg" and goto failure */
163 160 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
164 161 goto srqalloc_fail3;
165 162 }
166 163 }
167 164
168 165 /*
169 166 * Calculate the appropriate size for the SRQ.
170 167 * Note: All Tavor SRQs must be a power-of-2 in size. Also
171 168 * they may not be any smaller than TAVOR_SRQ_MIN_SIZE. This step
172 169 * is to round the requested size up to the next highest power-of-2
173 170 */
174 171 sizes->srq_wr_sz = max(sizes->srq_wr_sz, TAVOR_SRQ_MIN_SIZE);
175 172 log_srq_size = highbit(sizes->srq_wr_sz);
176 173 if (ISP2(sizes->srq_wr_sz)) {
177 174 log_srq_size = log_srq_size - 1;
178 175 }
179 176
180 177 /*
181 178 * Next we verify that the rounded-up size is valid (i.e. consistent
182 179 * with the device limits and/or software-configured limits). If not,
183 180 * then obviously we have a lot of cleanup to do before returning.
184 181 */
185 182 if (log_srq_size > state->ts_cfg_profile->cp_log_max_srq_sz) {
186 183 /* Set "status" and "errormsg" and goto failure */
187 184 TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max SRQ size");
188 185 goto srqalloc_fail4;
189 186 }
190 187
191 188 /*
192 189 * Next we verify that the requested number of SGL is valid (i.e.
193 190 * consistent with the device limits and/or software-configured
194 191 * limits). If not, then obviously the same cleanup needs to be done.
195 192 */
196 193 max_sgl = state->ts_cfg_profile->cp_srq_max_sgl;
197 194 if (sizes->srq_sgl_sz > max_sgl) {
198 195 /* Set "status" and "errormsg" and goto failure */
199 196 TAVOR_TNF_FAIL(IBT_HCA_SGL_EXCEEDED, "max SRQ SGL");
200 197 goto srqalloc_fail4;
201 198 }
202 199
203 200 /*
204 201 * Determine the SRQ's WQE sizes. This depends on the requested
205 202 * number of SGLs. Note: This also has the side-effect of
206 203 * calculating the real number of SGLs (for the calculated WQE size)
207 204 */
208 205 tavor_srq_sgl_to_logwqesz(state, sizes->srq_sgl_sz,
209 206 TAVOR_QP_WQ_TYPE_RECVQ, &srq->srq_wq_log_wqesz,
210 207 &srq->srq_wq_sgl);
211 208
212 209 /*
213 210 * Allocate the memory for SRQ work queues. Note: The location from
214 211 * which we will allocate these work queues has been passed in through
215 212 * the tavor_qp_options_t structure. Since Tavor work queues are not
216 213 * allowed to cross a 32-bit (4GB) boundary, the alignment of the work
217 214 * queue memory is very important. We used to allocate work queues
218 215 * (the combined receive and send queues) so that they would be aligned
219 216 * on their combined size. That alignment guaranteed that they would
220 217 * never cross the 4GB boundary (Tavor work queues are on the order of
221 218 * MBs at maximum). Now we are able to relax this alignment constraint
222 219 * by ensuring that the IB address assigned to the queue memory (as a
223 220 * result of the tavor_mr_register() call) is offset from zero.
224 221 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
225 222 * guarantee the alignment, but when attempting to use IOMMU bypass
226 223 * mode we found that we were not allowed to specify any alignment that
227 224 * was more restrictive than the system page size. So we avoided this
228 225 * constraint by passing two alignment values, one for the memory
229 226 * allocation itself and the other for the DMA handle (for later bind).
230 227 * This used to cause more memory than necessary to be allocated (in
231 228 * order to guarantee the more restrictive alignment contraint). But
232 229 * be guaranteeing the zero-based IB virtual address for the queue, we
233 230 * are able to conserve this memory.
234 231 *
235 232 * Note: If SRQ is not user-mappable, then it may come from either
236 233 * kernel system memory or from HCA-attached local DDR memory.
237 234 *
238 235 * Note2: We align this queue on a pagesize boundary. This is required
239 236 * to make sure that all the resulting IB addresses will start at 0, for
240 237 * a zero-based queue. By making sure we are aligned on at least a
241 238 * page, any offset we use into our queue will be the same as when we
242 239 * perform tavor_srq_modify() operations later.
243 240 */
244 241 wqesz = (1 << srq->srq_wq_log_wqesz);
245 242 srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz;
246 243 srq->srq_wqinfo.qa_alloc_align = PAGESIZE;
247 244 srq->srq_wqinfo.qa_bind_align = PAGESIZE;
248 245 if (srq_is_umap) {
249 246 srq->srq_wqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;
↓ open down ↓ |
93 lines elided |
↑ open up ↑ |
250 247 } else {
251 248 srq->srq_wqinfo.qa_location = wq_location;
252 249 }
253 250 status = tavor_queue_alloc(state, &srq->srq_wqinfo, sleepflag);
254 251 if (status != DDI_SUCCESS) {
255 252 /* Set "status" and "errormsg" and goto failure */
256 253 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq");
257 254 goto srqalloc_fail4;
258 255 }
259 256 buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned;
260 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
261 257
262 258 /*
263 259 * Register the memory for the SRQ work queues. The memory for the SRQ
264 260 * must be registered in the Tavor TPT tables. This gives us the LKey
265 261 * to specify in the SRQ context later. Note: If the work queue is to
266 262 * be allocated from DDR memory, then only a "bypass" mapping is
267 263 * appropriate. And if the SRQ memory is user-mappable, then we force
268 264 * DDI_DMA_CONSISTENT mapping. Also, in order to meet the alignment
269 265 * restriction, we pass the "mro_bind_override_addr" flag in the call
270 266 * to tavor_mr_register(). This guarantees that the resulting IB vaddr
271 267 * will be zero-based (modulo the offset into the first page). If we
272 268 * fail here, we still have the bunch of resource and reference count
273 269 * cleanup to do.
274 270 */
275 271 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP :
276 272 IBT_MR_NOSLEEP;
277 273 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
278 274 mr_attr.mr_len = srq->srq_wqinfo.qa_size;
279 275 mr_attr.mr_as = NULL;
280 276 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
281 277 if (srq_is_umap) {
282 278 mr_op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass;
283 279 } else {
284 280 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
285 281 mr_op.mro_bind_type =
286 282 state->ts_cfg_profile->cp_iommu_bypass;
287 283 dma_xfer_mode =
288 284 state->ts_cfg_profile->cp_streaming_consistent;
289 285 if (dma_xfer_mode == DDI_DMA_STREAMING) {
290 286 mr_attr.mr_flags |= IBT_MR_NONCOHERENT;
291 287 }
292 288 } else {
293 289 mr_op.mro_bind_type = TAVOR_BINDMEM_BYPASS;
↓ open down ↓ |
23 lines elided |
↑ open up ↑ |
294 290 }
295 291 }
296 292 mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl;
297 293 mr_op.mro_bind_override_addr = 1;
298 294 status = tavor_mr_register(state, pd, &mr_attr, &mr, &mr_op);
299 295 if (status != DDI_SUCCESS) {
300 296 /* Set "status" and "errormsg" and goto failure */
301 297 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
302 298 goto srqalloc_fail5;
303 299 }
304 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
305 300 addr = mr->mr_bindinfo.bi_addr;
306 301 lkey = mr->mr_lkey;
307 302
308 303 /*
309 304 * Calculate the offset between the kernel virtual address space
310 305 * and the IB virtual address space. This will be used when
311 306 * posting work requests to properly initialize each WQE.
312 307 */
313 308 srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
314 309 (uint64_t)mr->mr_bindinfo.bi_addr;
315 310
316 311 /*
317 312 * Create WQL and Wridlist for use by this SRQ
318 313 */
319 314 srq->srq_wrid_wql = tavor_wrid_wql_create(state);
320 315 if (srq->srq_wrid_wql == NULL) {
321 316 /* Set "status" and "errormsg" and goto failure */
322 317 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wql create");
323 318 goto srqalloc_fail6;
324 319 }
325 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wrid_wql)))
326 320
327 321 srq->srq_wridlist = tavor_wrid_get_list(1 << log_srq_size);
328 322 if (srq->srq_wridlist == NULL) {
329 323 /* Set "status" and "errormsg" and goto failure */
330 324 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed wridlist create");
331 325 goto srqalloc_fail7;
332 326 }
333 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*(srq->srq_wridlist)))
334 327
335 328 srq->srq_wridlist->wl_srq_en = 1;
336 329 srq->srq_wridlist->wl_free_list_indx = -1;
337 330
338 331 /*
339 332 * Fill in all the return arguments (if necessary). This includes
340 333 * real queue size and real SGLs.
341 334 */
342 335 if (real_sizes != NULL) {
343 336 real_sizes->srq_wr_sz = (1 << log_srq_size);
344 337 real_sizes->srq_sgl_sz = srq->srq_wq_sgl;
345 338 }
346 339
347 340 /*
348 341 * Fill in the SRQC entry. This is the final step before passing
349 342 * ownership of the SRQC entry to the Tavor hardware. We use all of
350 343 * the information collected/calculated above to fill in the
351 344 * requisite portions of the SRQC. Note: If this SRQ is going to be
352 345 * used for userland access, then we need to set the UAR page number
353 346 * appropriately (otherwise it's a "don't care")
354 347 */
355 348 bzero(&srqc_entry, sizeof (tavor_hw_srqc_t));
356 349 srqc_entry.wqe_addr_h = (addr >> 32);
357 350 srqc_entry.next_wqe_addr_l = 0;
358 351 srqc_entry.ds = (wqesz >> 4);
359 352 srqc_entry.state = TAVOR_SRQ_STATE_HW_OWNER;
360 353 srqc_entry.pd = pd->pd_pdnum;
361 354 srqc_entry.lkey = lkey;
362 355 srqc_entry.wqe_cnt = 0;
363 356 if (srq_is_umap) {
364 357 srqc_entry.uar = uarpg;
365 358 } else {
366 359 srqc_entry.uar = 0;
367 360 }
368 361
369 362 /*
370 363 * Write the SRQC entry to hardware. Lastly, we pass ownership of
371 364 * the entry to the hardware (using the Tavor SW2HW_SRQ firmware
372 365 * command). Note: In general, this operation shouldn't fail. But
373 366 * if it does, we have to undo everything we've done above before
374 367 * returning error.
375 368 */
376 369 status = tavor_cmn_ownership_cmd_post(state, SW2HW_SRQ, &srqc_entry,
377 370 sizeof (tavor_hw_srqc_t), srq->srq_srqnum,
378 371 sleepflag);
379 372 if (status != TAVOR_CMD_SUCCESS) {
380 373 cmn_err(CE_CONT, "Tavor: SW2HW_SRQ command failed: %08x\n",
381 374 status);
382 375 TNF_PROBE_1(tavor_srq_alloc_sw2hw_srq_cmd_fail,
383 376 TAVOR_TNF_ERROR, "", tnf_uint, status, status);
384 377 /* Set "status" and "errormsg" and goto failure */
385 378 TAVOR_TNF_FAIL(IBT_FAILURE, "tavor SW2HW_SRQ command");
386 379 goto srqalloc_fail8;
387 380 }
388 381
389 382 /*
390 383 * Fill in the rest of the Tavor SRQ handle. We can update
391 384 * the following fields for use in further operations on the SRQ.
392 385 */
393 386 srq->srq_srqcrsrcp = srqc;
394 387 srq->srq_rsrcp = rsrc;
395 388 srq->srq_mrhdl = mr;
396 389 srq->srq_refcnt = 0;
397 390 srq->srq_is_umap = srq_is_umap;
398 391 srq->srq_uarpg = (srq->srq_is_umap) ? uarpg : 0;
399 392 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
400 393 srq->srq_pdhdl = pd;
401 394 srq->srq_wq_lastwqeindx = -1;
402 395 srq->srq_wq_bufsz = (1 << log_srq_size);
403 396 srq->srq_wq_buf = buf;
404 397 srq->srq_desc_off = srq_desc_off;
405 398 srq->srq_hdlrarg = (void *)ibt_srqhdl;
406 399 srq->srq_state = 0;
407 400 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
408 401 srq->srq_real_sizes.srq_sgl_sz = srq->srq_wq_sgl;
409 402
410 403 /* Determine if later ddi_dma_sync will be necessary */
411 404 srq->srq_sync = TAVOR_SRQ_IS_SYNC_REQ(state, srq->srq_wqinfo);
412 405
413 406 /*
414 407 * Put SRQ handle in Tavor SRQNum-to-SRQhdl list. Then fill in the
415 408 * "srqhdl" and return success
416 409 */
417 410 ASSERT(state->ts_srqhdl[srqc->tr_indx] == NULL);
418 411 state->ts_srqhdl[srqc->tr_indx] = srq;
419 412
420 413 /*
421 414 * If this is a user-mappable SRQ, then we need to insert the
422 415 * previously allocated entry into the "userland resources database".
423 416 * This will allow for later lookup during devmap() (i.e. mmap())
424 417 * calls.
425 418 */
426 419 if (srq->srq_is_umap) {
427 420 tavor_umap_db_add(umapdb);
428 421 } else {
429 422 mutex_enter(&srq->srq_wrid_wql->wql_lock);
430 423 tavor_wrid_list_srq_init(srq->srq_wridlist, srq, 0);
431 424 mutex_exit(&srq->srq_wrid_wql->wql_lock);
432 425 }
433 426
434 427 *srqhdl = srq;
435 428
436 429 TAVOR_TNF_EXIT(tavor_srq_alloc);
437 430 return (status);
438 431
439 432 /*
440 433 * The following is cleanup for all possible failure cases in this routine
441 434 */
442 435 srqalloc_fail8:
443 436 kmem_free(srq->srq_wridlist->wl_wre, srq->srq_wridlist->wl_size *
444 437 sizeof (tavor_wrid_entry_t));
445 438 kmem_free(srq->srq_wridlist, sizeof (tavor_wrid_list_hdr_t));
446 439 srqalloc_fail7:
447 440 tavor_wql_refcnt_dec(srq->srq_wrid_wql);
448 441 srqalloc_fail6:
449 442 if (tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
450 443 TAVOR_SLEEPFLAG_FOR_CONTEXT()) != DDI_SUCCESS) {
451 444 TAVOR_WARNING(state, "failed to deregister SRQ memory");
452 445 }
453 446 srqalloc_fail5:
454 447 tavor_queue_free(state, &srq->srq_wqinfo);
455 448 srqalloc_fail4:
456 449 if (srq_is_umap) {
457 450 tavor_umap_db_free(umapdb);
458 451 }
459 452 srqalloc_fail3:
460 453 tavor_rsrc_free(state, &rsrc);
461 454 srqalloc_fail2:
462 455 tavor_rsrc_free(state, &srqc);
463 456 srqalloc_fail1:
464 457 tavor_pd_refcnt_dec(pd);
465 458 srqalloc_fail:
466 459 TNF_PROBE_1(tavor_srq_alloc_fail, TAVOR_TNF_ERROR, "",
467 460 tnf_string, msg, errormsg);
468 461 TAVOR_TNF_EXIT(tavor_srq_alloc);
469 462 return (status);
470 463 }
471 464
472 465
473 466 /*
474 467 * tavor_srq_free()
475 468 * Context: Can be called only from user or kernel context.
476 469 */
477 470 /* ARGSUSED */
478 471 int
479 472 tavor_srq_free(tavor_state_t *state, tavor_srqhdl_t *srqhdl, uint_t sleepflag)
480 473 {
481 474 tavor_rsrc_t *srqc, *rsrc;
482 475 tavor_umap_db_entry_t *umapdb;
483 476 uint64_t value;
484 477 tavor_srqhdl_t srq;
485 478 tavor_mrhdl_t mr;
486 479 tavor_pdhdl_t pd;
487 480 tavor_hw_srqc_t srqc_entry;
488 481 uint32_t srqnum;
489 482 uint32_t size;
490 483 uint_t maxprot;
491 484 int status;
492 485
493 486 TAVOR_TNF_ENTER(tavor_srq_free);
494 487
495 488 /*
496 489 * Pull all the necessary information from the Tavor Shared Receive
497 490 * Queue handle. This is necessary here because the resource for the
498 491 * SRQ handle is going to be freed up as part of this operation.
499 492 */
500 493 srq = *srqhdl;
501 494 mutex_enter(&srq->srq_lock);
502 495 srqc = srq->srq_srqcrsrcp;
503 496 rsrc = srq->srq_rsrcp;
504 497 pd = srq->srq_pdhdl;
505 498 mr = srq->srq_mrhdl;
506 499 srqnum = srq->srq_srqnum;
507 500
508 501 /*
509 502 * If there are work queues still associated with the SRQ, then return
510 503 * an error. Otherwise, we will be holding the SRQ lock.
511 504 */
512 505 if (srq->srq_refcnt != 0) {
513 506 mutex_exit(&srq->srq_lock);
514 507 TNF_PROBE_1(tavor_srq_free_refcnt_fail, TAVOR_TNF_ERROR, "",
515 508 tnf_int, refcnt, srq->srq_refcnt);
516 509 TAVOR_TNF_EXIT(tavor_srq_free);
517 510 return (IBT_SRQ_IN_USE);
518 511 }
519 512
520 513 /*
521 514 * If this was a user-mappable SRQ, then we need to remove its entry
522 515 * from the "userland resources database". If it is also currently
523 516 * mmap()'d out to a user process, then we need to call
524 517 * devmap_devmem_remap() to remap the SRQ memory to an invalid mapping.
525 518 * We also need to invalidate the SRQ tracking information for the
526 519 * user mapping.
527 520 */
528 521 if (srq->srq_is_umap) {
529 522 status = tavor_umap_db_find(state->ts_instance, srq->srq_srqnum,
530 523 MLNX_UMAP_SRQMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE,
531 524 &umapdb);
532 525 if (status != DDI_SUCCESS) {
533 526 mutex_exit(&srq->srq_lock);
534 527 TAVOR_WARNING(state, "failed to find in database");
535 528 TAVOR_TNF_EXIT(tavor_srq_free);
536 529 return (ibc_get_ci_failure(0));
537 530 }
538 531 tavor_umap_db_free(umapdb);
539 532 if (srq->srq_umap_dhp != NULL) {
540 533 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
541 534 status = devmap_devmem_remap(srq->srq_umap_dhp,
542 535 state->ts_dip, 0, 0, srq->srq_wqinfo.qa_size,
543 536 maxprot, DEVMAP_MAPPING_INVALID, NULL);
544 537 if (status != DDI_SUCCESS) {
545 538 mutex_exit(&srq->srq_lock);
546 539 TAVOR_WARNING(state, "failed in SRQ memory "
547 540 "devmap_devmem_remap()");
548 541 TAVOR_TNF_EXIT(tavor_srq_free);
549 542 return (ibc_get_ci_failure(0));
550 543 }
551 544 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
552 545 }
↓ open down ↓ |
209 lines elided |
↑ open up ↑ |
553 546 }
554 547
555 548 /*
556 549 * Put NULL into the Tavor SRQNum-to-SRQHdl list. This will allow any
557 550 * in-progress events to detect that the SRQ corresponding to this
558 551 * number has been freed.
559 552 */
560 553 state->ts_srqhdl[srqc->tr_indx] = NULL;
561 554
562 555 mutex_exit(&srq->srq_lock);
563 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq));
564 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq->srq_wridlist));
565 556
566 557 /*
567 558 * Reclaim SRQC entry from hardware (using the Tavor HW2SW_SRQ
568 559 * firmware command). If the ownership transfer fails for any reason,
569 560 * then it is an indication that something (either in HW or SW) has
570 561 * gone seriously wrong.
571 562 */
572 563 status = tavor_cmn_ownership_cmd_post(state, HW2SW_SRQ, &srqc_entry,
573 564 sizeof (tavor_hw_srqc_t), srqnum, sleepflag);
574 565 if (status != TAVOR_CMD_SUCCESS) {
575 566 TAVOR_WARNING(state, "failed to reclaim SRQC ownership");
576 567 cmn_err(CE_CONT, "Tavor: HW2SW_SRQ command failed: %08x\n",
577 568 status);
578 569 TNF_PROBE_1(tavor_srq_free_hw2sw_srq_cmd_fail,
579 570 TAVOR_TNF_ERROR, "", tnf_uint, status, status);
580 571 TAVOR_TNF_EXIT(tavor_srq_free);
581 572 return (IBT_FAILURE);
582 573 }
583 574
584 575 /*
585 576 * Deregister the memory for the Shared Receive Queue. If this fails
586 577 * for any reason, then it is an indication that something (either
587 578 * in HW or SW) has gone seriously wrong. So we print a warning
588 579 * message and return.
589 580 */
590 581 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
591 582 sleepflag);
592 583 if (status != DDI_SUCCESS) {
593 584 TAVOR_WARNING(state, "failed to deregister SRQ memory");
594 585 TNF_PROBE_0(tavor_srq_free_dereg_mr_fail, TAVOR_TNF_ERROR, "");
595 586 TAVOR_TNF_EXIT(tavor_srq_free);
596 587 return (IBT_FAILURE);
597 588 }
598 589
599 590 /* Calculate the size and free the wridlist container */
600 591 if (srq->srq_wridlist != NULL) {
601 592 size = (srq->srq_wridlist->wl_size *
602 593 sizeof (tavor_wrid_entry_t));
603 594 kmem_free(srq->srq_wridlist->wl_wre, size);
604 595 kmem_free(srq->srq_wridlist, sizeof (tavor_wrid_list_hdr_t));
605 596
606 597 /*
607 598 * Release reference to WQL; If this is the last reference,
608 599 * this call also has the side effect of freeing up the
609 600 * 'srq_wrid_wql' memory.
610 601 */
611 602 tavor_wql_refcnt_dec(srq->srq_wrid_wql);
612 603 }
613 604
614 605 /* Free the memory for the SRQ */
615 606 tavor_queue_free(state, &srq->srq_wqinfo);
616 607
617 608 /* Free the Tavor SRQ Handle */
618 609 tavor_rsrc_free(state, &rsrc);
619 610
620 611 /* Free the SRQC entry resource */
621 612 tavor_rsrc_free(state, &srqc);
622 613
623 614 /* Decrement the reference count on the protection domain (PD) */
624 615 tavor_pd_refcnt_dec(pd);
625 616
626 617 /* Set the srqhdl pointer to NULL and return success */
627 618 *srqhdl = NULL;
628 619
629 620 TAVOR_TNF_EXIT(tavor_srq_free);
630 621 return (DDI_SUCCESS);
631 622 }
632 623
633 624
634 625 /*
635 626 * tavor_srq_modify()
636 627 * Context: Can be called only from user or kernel context.
637 628 */
638 629 int
639 630 tavor_srq_modify(tavor_state_t *state, tavor_srqhdl_t srq, uint_t size,
640 631 uint_t *real_size, uint_t sleepflag)
641 632 {
642 633 tavor_qalloc_info_t new_srqinfo, old_srqinfo;
643 634 tavor_rsrc_t *mtt, *mpt, *old_mtt;
644 635 tavor_bind_info_t bind;
645 636 tavor_bind_info_t old_bind;
646 637 tavor_rsrc_pool_info_t *rsrc_pool;
647 638 tavor_mrhdl_t mr;
648 639 tavor_hw_mpt_t mpt_entry;
649 640 tavor_wrid_entry_t *wre_new, *wre_old;
650 641 uint64_t mtt_ddrbaseaddr, mtt_addr;
651 642 uint64_t srq_desc_off;
652 643 uint32_t *buf, srq_old_bufsz;
653 644 uint32_t wqesz;
654 645 uint_t max_srq_size;
655 646 uint_t dma_xfer_mode, mtt_pgsize_bits;
656 647 uint_t srq_sync, log_srq_size, maxprot;
657 648 uint_t wq_location;
658 649 int status;
659 650 char *errormsg;
660 651
661 652 TAVOR_TNF_ENTER(tavor_srq_modify);
662 653
663 654 /*
664 655 * Check the "inddr" flag. This flag tells the driver whether or not
665 656 * the SRQ's work queues should be come from normal system memory or
666 657 * whether they should be allocated from DDR memory.
667 658 */
668 659 wq_location = state->ts_cfg_profile->cp_srq_wq_inddr;
669 660
670 661 /*
671 662 * If size requested is larger than device capability, return
672 663 * Insufficient Resources
673 664 */
674 665 max_srq_size = (1 << state->ts_cfg_profile->cp_log_max_srq_sz);
675 666 if (size > max_srq_size) {
676 667 TNF_PROBE_0(tavor_srq_modify_size_larger_than_maxsize,
677 668 TAVOR_TNF_ERROR, "");
678 669 TAVOR_TNF_EXIT(tavor_srq_modify);
679 670 return (IBT_HCA_WR_EXCEEDED);
680 671 }
681 672
682 673 /*
683 674 * Calculate the appropriate size for the SRQ.
684 675 * Note: All Tavor SRQs must be a power-of-2 in size. Also
685 676 * they may not be any smaller than TAVOR_SRQ_MIN_SIZE. This step
686 677 * is to round the requested size up to the next highest power-of-2
687 678 */
688 679 size = max(size, TAVOR_SRQ_MIN_SIZE);
689 680 log_srq_size = highbit(size);
690 681 if (ISP2(size)) {
691 682 log_srq_size = log_srq_size - 1;
692 683 }
693 684
694 685 /*
695 686 * Next we verify that the rounded-up size is valid (i.e. consistent
696 687 * with the device limits and/or software-configured limits).
697 688 */
698 689 if (log_srq_size > state->ts_cfg_profile->cp_log_max_srq_sz) {
699 690 /* Set "status" and "errormsg" and goto failure */
700 691 TAVOR_TNF_FAIL(IBT_HCA_WR_EXCEEDED, "max SRQ size");
701 692 goto srqmodify_fail;
702 693 }
703 694
704 695 /*
705 696 * Allocate the memory for newly resized Shared Receive Queue.
706 697 *
707 698 * Note: If SRQ is not user-mappable, then it may come from either
708 699 * kernel system memory or from HCA-attached local DDR memory.
709 700 *
710 701 * Note2: We align this queue on a pagesize boundary. This is required
711 702 * to make sure that all the resulting IB addresses will start at 0,
712 703 * for a zero-based queue. By making sure we are aligned on at least a
713 704 * page, any offset we use into our queue will be the same as it was
714 705 * when we allocated it at tavor_srq_alloc() time.
715 706 */
716 707 wqesz = (1 << srq->srq_wq_log_wqesz);
717 708 new_srqinfo.qa_size = (1 << log_srq_size) * wqesz;
718 709 new_srqinfo.qa_alloc_align = PAGESIZE;
719 710 new_srqinfo.qa_bind_align = PAGESIZE;
720 711 if (srq->srq_is_umap) {
721 712 new_srqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND;
↓ open down ↓ |
147 lines elided |
↑ open up ↑ |
722 713 } else {
723 714 new_srqinfo.qa_location = wq_location;
724 715 }
725 716 status = tavor_queue_alloc(state, &new_srqinfo, sleepflag);
726 717 if (status != DDI_SUCCESS) {
727 718 /* Set "status" and "errormsg" and goto failure */
728 719 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed srq");
729 720 goto srqmodify_fail;
730 721 }
731 722 buf = (uint32_t *)new_srqinfo.qa_buf_aligned;
732 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
733 723
734 724 /*
735 725 * Allocate the memory for the new WRE list. This will be used later
736 726 * when we resize the wridlist based on the new SRQ size.
737 727 */
738 728 wre_new = (tavor_wrid_entry_t *)kmem_zalloc((1 << log_srq_size) *
739 729 sizeof (tavor_wrid_entry_t), sleepflag);
740 730 if (wre_new == NULL) {
741 731 /* Set "status" and "errormsg" and goto failure */
742 732 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE,
743 733 "failed wre_new alloc");
744 734 goto srqmodify_fail;
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
745 735 }
746 736
747 737 /*
748 738 * Fill in the "bind" struct. This struct provides the majority
749 739 * of the information that will be used to distinguish between an
750 740 * "addr" binding (as is the case here) and a "buf" binding (see
751 741 * below). The "bind" struct is later passed to tavor_mr_mem_bind()
752 742 * which does most of the "heavy lifting" for the Tavor memory
753 743 * registration routines.
754 744 */
755 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(bind))
756 745 bzero(&bind, sizeof (tavor_bind_info_t));
757 746 bind.bi_type = TAVOR_BINDHDL_VADDR;
758 747 bind.bi_addr = (uint64_t)(uintptr_t)buf;
759 748 bind.bi_len = new_srqinfo.qa_size;
760 749 bind.bi_as = NULL;
761 750 bind.bi_flags = sleepflag == TAVOR_SLEEP ? IBT_MR_SLEEP :
762 751 IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
763 752 if (srq->srq_is_umap) {
764 753 bind.bi_bypass = state->ts_cfg_profile->cp_iommu_bypass;
765 754 } else {
766 755 if (wq_location == TAVOR_QUEUE_LOCATION_NORMAL) {
767 756 bind.bi_bypass =
768 757 state->ts_cfg_profile->cp_iommu_bypass;
769 758 dma_xfer_mode =
770 759 state->ts_cfg_profile->cp_streaming_consistent;
771 760 if (dma_xfer_mode == DDI_DMA_STREAMING) {
772 761 bind.bi_flags |= IBT_MR_NONCOHERENT;
773 762 }
774 763 } else {
775 764 bind.bi_bypass = TAVOR_BINDMEM_BYPASS;
776 765 }
777 766 }
778 767 status = tavor_mr_mtt_bind(state, &bind, new_srqinfo.qa_dmahdl, &mtt,
779 768 &mtt_pgsize_bits);
780 769 if (status != DDI_SUCCESS) {
781 770 /* Set "status" and "errormsg" and goto failure */
782 771 TAVOR_TNF_FAIL(status, "failed mtt bind");
783 772 kmem_free(wre_new, srq->srq_wq_bufsz *
784 773 sizeof (tavor_wrid_entry_t));
785 774 tavor_queue_free(state, &new_srqinfo);
786 775 goto srqmodify_fail;
787 776 }
788 777
789 778 /*
790 779 * Calculate the offset between the kernel virtual address space
791 780 * and the IB virtual address space. This will be used when
792 781 * posting work requests to properly initialize each WQE.
793 782 *
794 783 * Note: bind addr is zero-based (from alloc) so we calculate the
795 784 * correct new offset here.
796 785 */
797 786 bind.bi_addr = bind.bi_addr & ((1 << mtt_pgsize_bits) - 1);
798 787 srq_desc_off = (uint64_t)(uintptr_t)new_srqinfo.qa_buf_aligned -
799 788 (uint64_t)bind.bi_addr;
800 789
801 790 /*
802 791 * Get the base address for the MTT table. This will be necessary
803 792 * below when we are modifying the MPT entry.
804 793 */
805 794 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT];
806 795 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset;
807 796
808 797 /*
809 798 * Fill in the MPT entry. This is the final step before passing
810 799 * ownership of the MPT entry to the Tavor hardware. We use all of
811 800 * the information collected/calculated above to fill in the
812 801 * requisite portions of the MPT.
813 802 */
814 803 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
815 804 mpt_entry.reg_win_len = bind.bi_len;
816 805 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT);
817 806 mpt_entry.mttseg_addr_h = mtt_addr >> 32;
818 807 mpt_entry.mttseg_addr_l = mtt_addr >> 6;
819 808
820 809 /*
821 810 * Now we grab the SRQ lock. Since we will be updating the actual
822 811 * SRQ location and the producer/consumer indexes, we should hold
823 812 * the lock.
824 813 *
825 814 * We do a TAVOR_NOSLEEP here (and below), though, because we are
826 815 * holding the "srq_lock" and if we got raised to interrupt level
827 816 * by priority inversion, we would not want to block in this routine
828 817 * waiting for success.
829 818 */
830 819 mutex_enter(&srq->srq_lock);
831 820
832 821 /*
833 822 * Copy old entries to new buffer
834 823 */
835 824 srq_old_bufsz = srq->srq_wq_bufsz;
836 825 bcopy(srq->srq_wq_buf, buf, srq_old_bufsz * wqesz);
837 826
838 827 /* Determine if later ddi_dma_sync will be necessary */
839 828 srq_sync = TAVOR_SRQ_IS_SYNC_REQ(state, srq->srq_wqinfo);
840 829
841 830 /* Sync entire "new" SRQ for use by hardware (if necessary) */
842 831 if (srq_sync) {
843 832 (void) ddi_dma_sync(bind.bi_dmahdl, 0,
844 833 new_srqinfo.qa_size, DDI_DMA_SYNC_FORDEV);
845 834 }
846 835
847 836 /*
848 837 * Setup MPT information for use in the MODIFY_MPT command
849 838 */
850 839 mr = srq->srq_mrhdl;
851 840 mutex_enter(&mr->mr_lock);
852 841 mpt = srq->srq_mrhdl->mr_mptrsrcp;
853 842
854 843 /*
855 844 * MODIFY_MPT
856 845 *
857 846 * If this fails for any reason, then it is an indication that
858 847 * something (either in HW or SW) has gone seriously wrong. So we
859 848 * print a warning message and return.
860 849 */
861 850 status = tavor_modify_mpt_cmd_post(state, &mpt_entry, mpt->tr_indx,
862 851 TAVOR_CMD_MODIFY_MPT_RESIZESRQ, sleepflag);
863 852 if (status != TAVOR_CMD_SUCCESS) {
864 853 cmn_err(CE_CONT, "Tavor: MODIFY_MPT command failed: %08x\n",
865 854 status);
866 855 TNF_PROBE_1(tavor_mr_common_reg_sw2hw_mpt_cmd_fail,
867 856 TAVOR_TNF_ERROR, "", tnf_uint, status, status);
868 857 TAVOR_TNF_FAIL(status, "MODIFY_MPT command failed");
869 858 (void) tavor_mr_mtt_unbind(state, &srq->srq_mrhdl->mr_bindinfo,
870 859 srq->srq_mrhdl->mr_mttrsrcp);
871 860 kmem_free(wre_new, srq->srq_wq_bufsz *
872 861 sizeof (tavor_wrid_entry_t));
873 862 tavor_queue_free(state, &new_srqinfo);
874 863 mutex_exit(&mr->mr_lock);
875 864 mutex_exit(&srq->srq_lock);
876 865 return (ibc_get_ci_failure(0));
877 866 }
878 867
879 868 /*
880 869 * Update the Tavor Shared Receive Queue handle with all the new
881 870 * information. At the same time, save away all the necessary
882 871 * information for freeing up the old resources
883 872 */
884 873 old_srqinfo = srq->srq_wqinfo;
885 874 old_mtt = srq->srq_mrhdl->mr_mttrsrcp;
886 875 bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind,
887 876 sizeof (tavor_bind_info_t));
888 877
889 878 /* Now set the new info */
890 879 srq->srq_wqinfo = new_srqinfo;
891 880 srq->srq_wq_buf = buf;
↓ open down ↓ |
126 lines elided |
↑ open up ↑ |
892 881 srq->srq_wq_bufsz = (1 << log_srq_size);
893 882 bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (tavor_bind_info_t));
894 883 srq->srq_mrhdl->mr_mttrsrcp = mtt;
895 884 srq->srq_desc_off = srq_desc_off;
896 885 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
897 886
898 887 /* Update MR mtt pagesize */
899 888 mr->mr_logmttpgsz = mtt_pgsize_bits;
900 889 mutex_exit(&mr->mr_lock);
901 890
902 -#ifdef __lock_lint
903 - mutex_enter(&srq->srq_wrid_wql->wql_lock);
904 -#else
905 891 if (srq->srq_wrid_wql != NULL) {
906 892 mutex_enter(&srq->srq_wrid_wql->wql_lock);
907 893 }
908 -#endif
909 894
910 895 /*
911 896 * Initialize new wridlist, if needed.
912 897 *
913 898 * If a wridlist already is setup on an SRQ (the QP associated with an
914 899 * SRQ has moved "from_reset") then we must update this wridlist based
915 900 * on the new SRQ size. We allocate the new size of Work Request ID
916 901 * Entries, copy over the old entries to the new list, and
917 902 * re-initialize the srq wridlist in non-umap case
918 903 */
919 904 wre_old = NULL;
920 905 if (srq->srq_wridlist != NULL) {
921 906 wre_old = srq->srq_wridlist->wl_wre;
922 907
923 908 bcopy(wre_old, wre_new, srq_old_bufsz *
924 909 sizeof (tavor_wrid_entry_t));
925 910
↓ open down ↓ |
7 lines elided |
↑ open up ↑ |
926 911 /* Setup new sizes in wre */
927 912 srq->srq_wridlist->wl_wre = wre_new;
928 913 srq->srq_wridlist->wl_size = srq->srq_wq_bufsz;
929 914
930 915 if (!srq->srq_is_umap) {
931 916 tavor_wrid_list_srq_init(srq->srq_wridlist, srq,
932 917 srq_old_bufsz);
933 918 }
934 919 }
935 920
936 -#ifdef __lock_lint
937 - mutex_exit(&srq->srq_wrid_wql->wql_lock);
938 -#else
939 921 if (srq->srq_wrid_wql != NULL) {
940 922 mutex_exit(&srq->srq_wrid_wql->wql_lock);
941 923 }
942 -#endif
943 924
944 925 /*
945 926 * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out
946 927 * to a user process, then we need to call devmap_devmem_remap() to
947 928 * invalidate the mapping to the SRQ memory. We also need to
948 929 * invalidate the SRQ tracking information for the user mapping.
949 930 *
950 931 * Note: On failure, the remap really shouldn't ever happen. So, if it
951 932 * does, it is an indication that something has gone seriously wrong.
952 933 * So we print a warning message and return error (knowing, of course,
953 934 * that the "old" SRQ memory will be leaked)
954 935 */
955 936 if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) {
956 937 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
957 938 status = devmap_devmem_remap(srq->srq_umap_dhp,
958 939 state->ts_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot,
959 940 DEVMAP_MAPPING_INVALID, NULL);
960 941 if (status != DDI_SUCCESS) {
961 942 mutex_exit(&srq->srq_lock);
962 943 TAVOR_WARNING(state, "failed in SRQ memory "
963 944 "devmap_devmem_remap()");
964 945 /* We can, however, free the memory for old wre */
965 946 if (wre_old != NULL) {
966 947 kmem_free(wre_old, srq_old_bufsz *
967 948 sizeof (tavor_wrid_entry_t));
968 949 }
969 950 TAVOR_TNF_EXIT(tavor_srq_modify);
970 951 return (ibc_get_ci_failure(0));
971 952 }
972 953 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
973 954 }
974 955
975 956 /*
976 957 * Drop the SRQ lock now. The only thing left to do is to free up
977 958 * the old resources.
978 959 */
979 960 mutex_exit(&srq->srq_lock);
980 961
981 962 /*
982 963 * Unbind the MTT entries.
983 964 */
984 965 status = tavor_mr_mtt_unbind(state, &old_bind, old_mtt);
985 966 if (status != DDI_SUCCESS) {
986 967 TAVOR_WARNING(state, "failed to unbind old SRQ memory");
987 968 /* Set "status" and "errormsg" and goto failure */
988 969 TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
989 970 "failed to unbind (old)");
990 971 goto srqmodify_fail;
991 972 }
992 973
993 974 /* Free the memory for old wre */
994 975 if (wre_old != NULL) {
995 976 kmem_free(wre_old, srq_old_bufsz *
996 977 sizeof (tavor_wrid_entry_t));
997 978 }
998 979
999 980 /* Free the memory for the old SRQ */
1000 981 tavor_queue_free(state, &old_srqinfo);
1001 982
1002 983 /*
1003 984 * Fill in the return arguments (if necessary). This includes the
1004 985 * real new completion queue size.
1005 986 */
1006 987 if (real_size != NULL) {
1007 988 *real_size = (1 << log_srq_size);
1008 989 }
1009 990
1010 991 TAVOR_TNF_EXIT(tavor_srq_modify);
1011 992 return (DDI_SUCCESS);
1012 993
1013 994 srqmodify_fail:
1014 995 TNF_PROBE_1(tavor_srq_modify_fail, TAVOR_TNF_ERROR, "",
1015 996 tnf_string, msg, errormsg);
1016 997 TAVOR_TNF_EXIT(tavor_srq_modify);
1017 998 return (status);
1018 999 }
1019 1000
1020 1001
1021 1002 /*
1022 1003 * tavor_srq_refcnt_inc()
1023 1004 * Context: Can be called from interrupt or base context.
1024 1005 */
1025 1006 void
1026 1007 tavor_srq_refcnt_inc(tavor_srqhdl_t srq)
1027 1008 {
1028 1009 mutex_enter(&srq->srq_lock);
1029 1010 TNF_PROBE_1_DEBUG(tavor_srq_refcnt_inc, TAVOR_TNF_TRACE, "",
1030 1011 tnf_uint, refcnt, srq->srq_refcnt);
1031 1012 srq->srq_refcnt++;
1032 1013 mutex_exit(&srq->srq_lock);
1033 1014 }
1034 1015
1035 1016
1036 1017 /*
1037 1018 * tavor_srq_refcnt_dec()
1038 1019 * Context: Can be called from interrupt or base context.
1039 1020 */
1040 1021 void
1041 1022 tavor_srq_refcnt_dec(tavor_srqhdl_t srq)
1042 1023 {
1043 1024 mutex_enter(&srq->srq_lock);
1044 1025 srq->srq_refcnt--;
1045 1026 TNF_PROBE_1_DEBUG(tavor_srq_refcnt_dec, TAVOR_TNF_TRACE, "",
1046 1027 tnf_uint, refcnt, srq->srq_refcnt);
1047 1028 mutex_exit(&srq->srq_lock);
1048 1029 }
1049 1030
1050 1031
1051 1032 /*
1052 1033 * tavor_srqhdl_from_srqnum()
1053 1034 * Context: Can be called from interrupt or base context.
1054 1035 *
1055 1036 * This routine is important because changing the unconstrained
1056 1037 * portion of the SRQ number is critical to the detection of a
1057 1038 * potential race condition in the SRQ handler code (i.e. the case
1058 1039 * where a SRQ is freed and alloc'd again before an event for the
1059 1040 * "old" SRQ can be handled).
1060 1041 *
1061 1042 * While this is not a perfect solution (not sure that one exists)
1062 1043 * it does help to mitigate the chance that this race condition will
1063 1044 * cause us to deliver a "stale" event to the new SRQ owner. Note:
1064 1045 * this solution does not scale well because the number of constrained
1065 1046 * bits increases (and, hence, the number of unconstrained bits
1066 1047 * decreases) as the number of supported SRQ grows. For small and
1067 1048 * intermediate values, it should hopefully provide sufficient
1068 1049 * protection.
1069 1050 */
1070 1051 tavor_srqhdl_t
1071 1052 tavor_srqhdl_from_srqnum(tavor_state_t *state, uint_t srqnum)
1072 1053 {
1073 1054 uint_t srqindx, srqmask;
1074 1055
1075 1056 /* Calculate the SRQ table index from the srqnum */
1076 1057 srqmask = (1 << state->ts_cfg_profile->cp_log_num_srq) - 1;
1077 1058 srqindx = srqnum & srqmask;
1078 1059 return (state->ts_srqhdl[srqindx]);
1079 1060 }
1080 1061
1081 1062
1082 1063 /*
1083 1064 * tavor_srq_sgl_to_logwqesz()
1084 1065 * Context: Can be called from interrupt or base context.
1085 1066 */
1086 1067 static void
1087 1068 tavor_srq_sgl_to_logwqesz(tavor_state_t *state, uint_t num_sgl,
1088 1069 tavor_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl)
1089 1070 {
1090 1071 uint_t max_size, log2, actual_sgl;
1091 1072
1092 1073 TAVOR_TNF_ENTER(tavor_srq_sgl_to_logwqesz);
1093 1074
1094 1075 switch (wq_type) {
1095 1076 case TAVOR_QP_WQ_TYPE_RECVQ:
1096 1077 /*
1097 1078 * Use requested maximum SGL to calculate max descriptor size
1098 1079 * (while guaranteeing that the descriptor size is a
1099 1080 * power-of-2 cachelines).
1100 1081 */
1101 1082 max_size = (TAVOR_QP_WQE_MLX_RCV_HDRS + (num_sgl << 4));
1102 1083 log2 = highbit(max_size);
1103 1084 if (ISP2(max_size)) {
1104 1085 log2 = log2 - 1;
1105 1086 }
1106 1087
1107 1088 /* Make sure descriptor is at least the minimum size */
1108 1089 log2 = max(log2, TAVOR_QP_WQE_LOG_MINIMUM);
1109 1090
1110 1091 /* Calculate actual number of SGL (given WQE size) */
1111 1092 actual_sgl = ((1 << log2) - TAVOR_QP_WQE_MLX_RCV_HDRS) >> 4;
1112 1093 break;
1113 1094
1114 1095 default:
1115 1096 TAVOR_WARNING(state, "unexpected work queue type");
1116 1097 TNF_PROBE_0(tavor_srq_sgl_to_logwqesz_inv_wqtype_fail,
1117 1098 TAVOR_TNF_ERROR, "");
1118 1099 break;
1119 1100 }
1120 1101
1121 1102 /* Fill in the return values */
1122 1103 *logwqesz = log2;
1123 1104 *max_sgl = min(state->ts_cfg_profile->cp_srq_max_sgl, actual_sgl);
1124 1105
1125 1106 TAVOR_TNF_EXIT(tavor_qp_sgl_to_logwqesz);
1126 1107 }
↓ open down ↓ |
174 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX