Print this page
8368 remove warlock leftovers from usr/src/uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/adapters/hermon/hermon_srq.c
+++ new/usr/src/uts/common/io/ib/adapters/hermon/hermon_srq.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * hermon_srq.c
28 28 * Hermon Shared Receive Queue Processing Routines
29 29 *
30 30 * Implements all the routines necessary for allocating, freeing, querying,
31 31 * modifying and posting shared receive queues.
32 32 */
33 33
34 34 #include <sys/sysmacros.h>
35 35 #include <sys/types.h>
36 36 #include <sys/conf.h>
37 37 #include <sys/ddi.h>
38 38 #include <sys/sunddi.h>
39 39 #include <sys/modctl.h>
40 40 #include <sys/bitmap.h>
41 41
42 42 #include <sys/ib/adapters/hermon/hermon.h>
43 43
44 44 static void hermon_srq_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
45 45 hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl);
46 46
47 47 /*
48 48 * hermon_srq_alloc()
49 49 * Context: Can be called only from user or kernel context.
50 50 */
51 51 int
52 52 hermon_srq_alloc(hermon_state_t *state, hermon_srq_info_t *srqinfo,
53 53 uint_t sleepflag)
54 54 {
55 55 ibt_srq_hdl_t ibt_srqhdl;
56 56 hermon_pdhdl_t pd;
57 57 ibt_srq_sizes_t *sizes;
58 58 ibt_srq_sizes_t *real_sizes;
59 59 hermon_srqhdl_t *srqhdl;
60 60 ibt_srq_flags_t flags;
61 61 hermon_rsrc_t *srqc, *rsrc;
62 62 hermon_hw_srqc_t srqc_entry;
63 63 uint32_t *buf;
64 64 hermon_srqhdl_t srq;
65 65 hermon_umap_db_entry_t *umapdb;
66 66 ibt_mr_attr_t mr_attr;
↓ open down ↓ |
66 lines elided |
↑ open up ↑ |
67 67 hermon_mr_options_t mr_op;
68 68 hermon_mrhdl_t mr;
69 69 uint64_t value, srq_desc_off;
70 70 uint32_t log_srq_size;
71 71 uint32_t uarpg;
72 72 uint_t srq_is_umap;
73 73 int flag, status;
74 74 uint_t max_sgl;
75 75 uint_t wqesz;
76 76 uint_t srq_wr_sz;
77 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sizes))
78 77
79 78 /*
80 79 * options-->wq_location used to be for location, now explicitly
81 80 * LOCATION_NORMAL
82 81 */
83 82
84 83 /*
85 84 * Extract the necessary info from the hermon_srq_info_t structure
86 85 */
87 86 real_sizes = srqinfo->srqi_real_sizes;
88 87 sizes = srqinfo->srqi_sizes;
89 88 pd = srqinfo->srqi_pd;
90 89 ibt_srqhdl = srqinfo->srqi_ibt_srqhdl;
91 90 flags = srqinfo->srqi_flags;
92 91 srqhdl = srqinfo->srqi_srqhdl;
93 92
94 93 /*
95 94 * Determine whether SRQ is being allocated for userland access or
96 95 * whether it is being allocated for kernel access. If the SRQ is
97 96 * being allocated for userland access, then lookup the UAR doorbell
98 97 * page number for the current process. Note: If this is not found
99 98 * (e.g. if the process has not previously open()'d the Hermon driver),
100 99 * then an error is returned.
101 100 */
102 101 srq_is_umap = (flags & IBT_SRQ_USER_MAP) ? 1 : 0;
103 102 if (srq_is_umap) {
104 103 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
105 104 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
106 105 if (status != DDI_SUCCESS) {
107 106 status = IBT_INVALID_PARAM;
108 107 goto srqalloc_fail3;
109 108 }
110 109 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
111 110 } else {
112 111 uarpg = state->hs_kernel_uar_index;
113 112 }
114 113
115 114 /* Increase PD refcnt */
116 115 hermon_pd_refcnt_inc(pd);
117 116
118 117 /* Allocate an SRQ context entry */
119 118 status = hermon_rsrc_alloc(state, HERMON_SRQC, 1, sleepflag, &srqc);
120 119 if (status != DDI_SUCCESS) {
121 120 status = IBT_INSUFF_RESOURCE;
122 121 goto srqalloc_fail1;
↓ open down ↓ |
35 lines elided |
↑ open up ↑ |
123 122 }
124 123
125 124 /* Allocate the SRQ Handle entry */
126 125 status = hermon_rsrc_alloc(state, HERMON_SRQHDL, 1, sleepflag, &rsrc);
127 126 if (status != DDI_SUCCESS) {
128 127 status = IBT_INSUFF_RESOURCE;
129 128 goto srqalloc_fail2;
130 129 }
131 130
132 131 srq = (hermon_srqhdl_t)rsrc->hr_addr;
133 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq))
134 132
135 133 bzero(srq, sizeof (struct hermon_sw_srq_s));
136 134 /* Calculate the SRQ number */
137 135
138 136 /* just use the index, implicit in Hermon */
139 137 srq->srq_srqnum = srqc->hr_indx;
140 138
141 139 /*
142 140 * If this will be a user-mappable SRQ, then allocate an entry for
143 141 * the "userland resources database". This will later be added to
144 142 * the database (after all further SRQ operations are successful).
145 143 * If we fail here, we must undo the reference counts and the
146 144 * previous resource allocation.
147 145 */
148 146 if (srq_is_umap) {
149 147 umapdb = hermon_umap_db_alloc(state->hs_instance,
150 148 srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC,
151 149 (uint64_t)(uintptr_t)rsrc);
152 150 if (umapdb == NULL) {
153 151 status = IBT_INSUFF_RESOURCE;
154 152 goto srqalloc_fail3;
155 153 }
156 154 }
157 155
158 156 /*
159 157 * Allocate the doorbell record. Hermon just needs one for the
160 158 * SRQ, and use uarpg (above) as the uar index
161 159 */
162 160
163 161 status = hermon_dbr_alloc(state, uarpg, &srq->srq_wq_dbr_acchdl,
164 162 &srq->srq_wq_vdbr, &srq->srq_wq_pdbr, &srq->srq_rdbr_mapoffset);
165 163 if (status != DDI_SUCCESS) {
166 164 status = IBT_INSUFF_RESOURCE;
167 165 goto srqalloc_fail4;
168 166 }
169 167
170 168 /*
171 169 * Calculate the appropriate size for the SRQ.
172 170 * Note: All Hermon SRQs must be a power-of-2 in size. Also
173 171 * they may not be any smaller than HERMON_SRQ_MIN_SIZE. This step
174 172 * is to round the requested size up to the next highest power-of-2
175 173 */
176 174 srq_wr_sz = max(sizes->srq_wr_sz + 1, HERMON_SRQ_MIN_SIZE);
177 175 log_srq_size = highbit(srq_wr_sz);
178 176 if (ISP2(srq_wr_sz)) {
179 177 log_srq_size = log_srq_size - 1;
180 178 }
181 179
182 180 /*
183 181 * Next we verify that the rounded-up size is valid (i.e. consistent
184 182 * with the device limits and/or software-configured limits). If not,
185 183 * then obviously we have a lot of cleanup to do before returning.
186 184 */
187 185 if (log_srq_size > state->hs_cfg_profile->cp_log_max_srq_sz) {
188 186 status = IBT_HCA_WR_EXCEEDED;
189 187 goto srqalloc_fail4a;
190 188 }
191 189
192 190 /*
193 191 * Next we verify that the requested number of SGL is valid (i.e.
194 192 * consistent with the device limits and/or software-configured
195 193 * limits). If not, then obviously the same cleanup needs to be done.
196 194 */
197 195 max_sgl = state->hs_ibtfinfo.hca_attr->hca_max_srq_sgl;
198 196 if (sizes->srq_sgl_sz > max_sgl) {
199 197 status = IBT_HCA_SGL_EXCEEDED;
200 198 goto srqalloc_fail4a;
201 199 }
202 200
203 201 /*
204 202 * Determine the SRQ's WQE sizes. This depends on the requested
205 203 * number of SGLs. Note: This also has the side-effect of
206 204 * calculating the real number of SGLs (for the calculated WQE size)
207 205 */
208 206 hermon_srq_sgl_to_logwqesz(state, sizes->srq_sgl_sz,
209 207 HERMON_QP_WQ_TYPE_RECVQ, &srq->srq_wq_log_wqesz,
210 208 &srq->srq_wq_sgl);
211 209
212 210 /*
213 211 * Allocate the memory for SRQ work queues. Note: The location from
214 212 * which we will allocate these work queues is always
215 213 * QUEUE_LOCATION_NORMAL. Since Hermon work queues are not
216 214 * allowed to cross a 32-bit (4GB) boundary, the alignment of the work
217 215 * queue memory is very important. We used to allocate work queues
218 216 * (the combined receive and send queues) so that they would be aligned
219 217 * on their combined size. That alignment guaranteed that they would
220 218 * never cross the 4GB boundary (Hermon work queues are on the order of
221 219 * MBs at maximum). Now we are able to relax this alignment constraint
222 220 * by ensuring that the IB address assigned to the queue memory (as a
223 221 * result of the hermon_mr_register() call) is offset from zero.
224 222 * Previously, we had wanted to use the ddi_dma_mem_alloc() routine to
225 223 * guarantee the alignment, but when attempting to use IOMMU bypass
226 224 * mode we found that we were not allowed to specify any alignment that
227 225 * was more restrictive than the system page size. So we avoided this
228 226 * constraint by passing two alignment values, one for the memory
229 227 * allocation itself and the other for the DMA handle (for later bind).
230 228 * This used to cause more memory than necessary to be allocated (in
231 229 * order to guarantee the more restrictive alignment contraint). But
232 230 * be guaranteeing the zero-based IB virtual address for the queue, we
233 231 * are able to conserve this memory.
234 232 *
235 233 * Note: If SRQ is not user-mappable, then it may come from either
236 234 * kernel system memory or from HCA-attached local DDR memory.
237 235 *
238 236 * Note2: We align this queue on a pagesize boundary. This is required
239 237 * to make sure that all the resulting IB addresses will start at 0, for
240 238 * a zero-based queue. By making sure we are aligned on at least a
241 239 * page, any offset we use into our queue will be the same as when we
242 240 * perform hermon_srq_modify() operations later.
243 241 */
244 242 wqesz = (1 << srq->srq_wq_log_wqesz);
245 243 srq->srq_wqinfo.qa_size = (1 << log_srq_size) * wqesz;
246 244 srq->srq_wqinfo.qa_alloc_align = PAGESIZE;
247 245 srq->srq_wqinfo.qa_bind_align = PAGESIZE;
248 246 if (srq_is_umap) {
↓ open down ↓ |
105 lines elided |
↑ open up ↑ |
249 247 srq->srq_wqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
250 248 } else {
251 249 srq->srq_wqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
252 250 }
253 251 status = hermon_queue_alloc(state, &srq->srq_wqinfo, sleepflag);
254 252 if (status != DDI_SUCCESS) {
255 253 status = IBT_INSUFF_RESOURCE;
256 254 goto srqalloc_fail4a;
257 255 }
258 256 buf = (uint32_t *)srq->srq_wqinfo.qa_buf_aligned;
259 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
260 257
261 258 /*
262 259 * Register the memory for the SRQ work queues. The memory for the SRQ
263 260 * must be registered in the Hermon cMPT tables. This gives us the LKey
264 261 * to specify in the SRQ context later. Note: If the work queue is to
265 262 * be allocated from DDR memory, then only a "bypass" mapping is
266 263 * appropriate. And if the SRQ memory is user-mappable, then we force
267 264 * DDI_DMA_CONSISTENT mapping. Also, in order to meet the alignment
268 265 * restriction, we pass the "mro_bind_override_addr" flag in the call
269 266 * to hermon_mr_register(). This guarantees that the resulting IB vaddr
270 267 * will be zero-based (modulo the offset into the first page). If we
271 268 * fail here, we still have the bunch of resource and reference count
272 269 * cleanup to do.
273 270 */
274 271 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP :
275 272 IBT_MR_NOSLEEP;
276 273 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
277 274 mr_attr.mr_len = srq->srq_wqinfo.qa_size;
278 275 mr_attr.mr_as = NULL;
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
279 276 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
280 277 mr_op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
281 278 mr_op.mro_bind_dmahdl = srq->srq_wqinfo.qa_dmahdl;
282 279 mr_op.mro_bind_override_addr = 1;
283 280 status = hermon_mr_register(state, pd, &mr_attr, &mr,
284 281 &mr_op, HERMON_SRQ_CMPT);
285 282 if (status != DDI_SUCCESS) {
286 283 status = IBT_INSUFF_RESOURCE;
287 284 goto srqalloc_fail5;
288 285 }
289 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
290 286
291 287 /*
292 288 * Calculate the offset between the kernel virtual address space
293 289 * and the IB virtual address space. This will be used when
294 290 * posting work requests to properly initialize each WQE.
295 291 */
296 292 srq_desc_off = (uint64_t)(uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
297 293 (uint64_t)mr->mr_bindinfo.bi_addr;
298 294
299 295 srq->srq_wq_wqhdr = hermon_wrid_wqhdr_create(1 << log_srq_size);
300 296
301 297 /*
302 298 * Fill in all the return arguments (if necessary). This includes
303 299 * real queue size and real SGLs.
304 300 */
305 301 if (real_sizes != NULL) {
306 302 real_sizes->srq_wr_sz = (1 << log_srq_size) - 1;
307 303 real_sizes->srq_sgl_sz = srq->srq_wq_sgl;
308 304 }
309 305
310 306 /*
311 307 * Fill in the SRQC entry. This is the final step before passing
312 308 * ownership of the SRQC entry to the Hermon hardware. We use all of
313 309 * the information collected/calculated above to fill in the
314 310 * requisite portions of the SRQC. Note: If this SRQ is going to be
315 311 * used for userland access, then we need to set the UAR page number
316 312 * appropriately (otherwise it's a "don't care")
317 313 */
318 314 bzero(&srqc_entry, sizeof (hermon_hw_srqc_t));
319 315 srqc_entry.state = HERMON_SRQ_STATE_HW_OWNER;
320 316 srqc_entry.log_srq_size = log_srq_size;
321 317 srqc_entry.srqn = srq->srq_srqnum;
322 318 srqc_entry.log_rq_stride = srq->srq_wq_log_wqesz - 4;
323 319 /* 16-byte chunks */
324 320
325 321 srqc_entry.page_offs = srq->srq_wqinfo.qa_pgoffs >> 6;
326 322 srqc_entry.log2_pgsz = mr->mr_log2_pgsz;
327 323 srqc_entry.mtt_base_addrh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
328 324 srqc_entry.mtt_base_addrl = mr->mr_mttaddr >> 3;
329 325 srqc_entry.pd = pd->pd_pdnum;
330 326 srqc_entry.dbr_addrh = (uint32_t)((uint64_t)srq->srq_wq_pdbr >> 32);
331 327 srqc_entry.dbr_addrl = (uint32_t)((uint64_t)srq->srq_wq_pdbr >> 2);
332 328
333 329 /*
334 330 * all others - specifically, xrcd, cqn_xrc, lwm, wqe_cnt, and wqe_cntr
335 331 * are zero thanks to the bzero of the structure
336 332 */
337 333
338 334 /*
339 335 * Write the SRQC entry to hardware. Lastly, we pass ownership of
340 336 * the entry to the hardware (using the Hermon SW2HW_SRQ firmware
341 337 * command). Note: In general, this operation shouldn't fail. But
342 338 * if it does, we have to undo everything we've done above before
343 339 * returning error.
344 340 */
345 341 status = hermon_cmn_ownership_cmd_post(state, SW2HW_SRQ, &srqc_entry,
346 342 sizeof (hermon_hw_srqc_t), srq->srq_srqnum,
347 343 sleepflag);
348 344 if (status != HERMON_CMD_SUCCESS) {
349 345 cmn_err(CE_CONT, "Hermon: SW2HW_SRQ command failed: %08x\n",
350 346 status);
351 347 if (status == HERMON_CMD_INVALID_STATUS) {
352 348 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
353 349 }
354 350 status = ibc_get_ci_failure(0);
355 351 goto srqalloc_fail8;
356 352 }
357 353
358 354 /*
359 355 * Fill in the rest of the Hermon SRQ handle. We can update
360 356 * the following fields for use in further operations on the SRQ.
361 357 */
362 358 srq->srq_srqcrsrcp = srqc;
363 359 srq->srq_rsrcp = rsrc;
364 360 srq->srq_mrhdl = mr;
365 361 srq->srq_refcnt = 0;
366 362 srq->srq_is_umap = srq_is_umap;
367 363 srq->srq_uarpg = uarpg;
368 364 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
369 365 srq->srq_pdhdl = pd;
370 366 srq->srq_wq_bufsz = (1 << log_srq_size);
371 367 srq->srq_wq_buf = buf;
372 368 srq->srq_desc_off = srq_desc_off;
373 369 srq->srq_hdlrarg = (void *)ibt_srqhdl;
374 370 srq->srq_state = 0;
375 371 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
376 372 srq->srq_real_sizes.srq_sgl_sz = srq->srq_wq_sgl;
377 373
378 374 /*
379 375 * Put SRQ handle in Hermon SRQNum-to-SRQhdl list. Then fill in the
380 376 * "srqhdl" and return success
381 377 */
382 378 hermon_icm_set_num_to_hdl(state, HERMON_SRQC, srqc->hr_indx, srq);
383 379
384 380 /*
385 381 * If this is a user-mappable SRQ, then we need to insert the
386 382 * previously allocated entry into the "userland resources database".
387 383 * This will allow for later lookup during devmap() (i.e. mmap())
388 384 * calls.
389 385 */
390 386 if (srq->srq_is_umap) {
391 387 hermon_umap_db_add(umapdb);
392 388 } else { /* initialize work queue for kernel SRQs */
393 389 int i, len, last;
394 390 uint16_t *desc;
395 391
396 392 desc = (uint16_t *)buf;
397 393 len = wqesz / sizeof (*desc);
398 394 last = srq->srq_wq_bufsz - 1;
399 395 for (i = 0; i < last; i++) {
400 396 desc[1] = htons(i + 1);
401 397 desc += len;
402 398 }
403 399 srq->srq_wq_wqhdr->wq_tail = last;
404 400 srq->srq_wq_wqhdr->wq_head = 0;
405 401 }
406 402
407 403 *srqhdl = srq;
408 404
409 405 return (status);
410 406
411 407 /*
412 408 * The following is cleanup for all possible failure cases in this routine
413 409 */
414 410 srqalloc_fail8:
415 411 hermon_wrid_wqhdr_destroy(srq->srq_wq_wqhdr);
416 412 srqalloc_fail7:
417 413 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
418 414 HERMON_SLEEPFLAG_FOR_CONTEXT()) != DDI_SUCCESS) {
419 415 HERMON_WARNING(state, "failed to deregister SRQ memory");
420 416 }
421 417 srqalloc_fail5:
422 418 hermon_queue_free(&srq->srq_wqinfo);
423 419 srqalloc_fail4a:
424 420 hermon_dbr_free(state, uarpg, srq->srq_wq_vdbr);
425 421 srqalloc_fail4:
426 422 if (srq_is_umap) {
427 423 hermon_umap_db_free(umapdb);
428 424 }
429 425 srqalloc_fail3:
430 426 hermon_rsrc_free(state, &rsrc);
431 427 srqalloc_fail2:
432 428 hermon_rsrc_free(state, &srqc);
433 429 srqalloc_fail1:
434 430 hermon_pd_refcnt_dec(pd);
435 431 srqalloc_fail:
436 432 return (status);
437 433 }
438 434
439 435
440 436 /*
441 437 * hermon_srq_free()
442 438 * Context: Can be called only from user or kernel context.
443 439 */
444 440 /* ARGSUSED */
445 441 int
446 442 hermon_srq_free(hermon_state_t *state, hermon_srqhdl_t *srqhdl,
447 443 uint_t sleepflag)
448 444 {
449 445 hermon_rsrc_t *srqc, *rsrc;
450 446 hermon_umap_db_entry_t *umapdb;
451 447 uint64_t value;
452 448 hermon_srqhdl_t srq;
453 449 hermon_mrhdl_t mr;
454 450 hermon_pdhdl_t pd;
455 451 hermon_hw_srqc_t srqc_entry;
456 452 uint32_t srqnum;
457 453 uint_t maxprot;
458 454 int status;
459 455
460 456 /*
461 457 * Pull all the necessary information from the Hermon Shared Receive
462 458 * Queue handle. This is necessary here because the resource for the
463 459 * SRQ handle is going to be freed up as part of this operation.
464 460 */
465 461 srq = *srqhdl;
466 462 mutex_enter(&srq->srq_lock);
467 463 srqc = srq->srq_srqcrsrcp;
468 464 rsrc = srq->srq_rsrcp;
469 465 pd = srq->srq_pdhdl;
470 466 mr = srq->srq_mrhdl;
471 467 srqnum = srq->srq_srqnum;
472 468
473 469 /*
474 470 * If there are work queues still associated with the SRQ, then return
475 471 * an error. Otherwise, we will be holding the SRQ lock.
476 472 */
477 473 if (srq->srq_refcnt != 0) {
478 474 mutex_exit(&srq->srq_lock);
479 475 return (IBT_SRQ_IN_USE);
480 476 }
481 477
482 478 /*
483 479 * If this was a user-mappable SRQ, then we need to remove its entry
484 480 * from the "userland resources database". If it is also currently
485 481 * mmap()'d out to a user process, then we need to call
486 482 * devmap_devmem_remap() to remap the SRQ memory to an invalid mapping.
487 483 * We also need to invalidate the SRQ tracking information for the
488 484 * user mapping.
489 485 */
490 486 if (srq->srq_is_umap) {
491 487 status = hermon_umap_db_find(state->hs_instance,
492 488 srq->srq_srqnum, MLNX_UMAP_SRQMEM_RSRC, &value,
493 489 HERMON_UMAP_DB_REMOVE, &umapdb);
494 490 if (status != DDI_SUCCESS) {
495 491 mutex_exit(&srq->srq_lock);
496 492 HERMON_WARNING(state, "failed to find in database");
497 493 return (ibc_get_ci_failure(0));
498 494 }
499 495 hermon_umap_db_free(umapdb);
500 496 if (srq->srq_umap_dhp != NULL) {
501 497 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
502 498 status = devmap_devmem_remap(srq->srq_umap_dhp,
503 499 state->hs_dip, 0, 0, srq->srq_wqinfo.qa_size,
504 500 maxprot, DEVMAP_MAPPING_INVALID, NULL);
505 501 if (status != DDI_SUCCESS) {
506 502 mutex_exit(&srq->srq_lock);
507 503 HERMON_WARNING(state, "failed in SRQ memory "
508 504 "devmap_devmem_remap()");
509 505 return (ibc_get_ci_failure(0));
510 506 }
511 507 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
512 508 }
↓ open down ↓ |
213 lines elided |
↑ open up ↑ |
513 509 }
514 510
515 511 /*
516 512 * Put NULL into the Hermon SRQNum-to-SRQHdl list. This will allow any
517 513 * in-progress events to detect that the SRQ corresponding to this
518 514 * number has been freed.
519 515 */
520 516 hermon_icm_set_num_to_hdl(state, HERMON_SRQC, srqc->hr_indx, NULL);
521 517
522 518 mutex_exit(&srq->srq_lock);
523 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*srq));
524 519
525 520 /*
526 521 * Reclaim SRQC entry from hardware (using the Hermon HW2SW_SRQ
527 522 * firmware command). If the ownership transfer fails for any reason,
528 523 * then it is an indication that something (either in HW or SW) has
529 524 * gone seriously wrong.
530 525 */
531 526 status = hermon_cmn_ownership_cmd_post(state, HW2SW_SRQ, &srqc_entry,
532 527 sizeof (hermon_hw_srqc_t), srqnum, sleepflag);
533 528 if (status != HERMON_CMD_SUCCESS) {
534 529 HERMON_WARNING(state, "failed to reclaim SRQC ownership");
535 530 cmn_err(CE_CONT, "Hermon: HW2SW_SRQ command failed: %08x\n",
536 531 status);
537 532 if (status == HERMON_CMD_INVALID_STATUS) {
538 533 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
539 534 }
540 535 return (ibc_get_ci_failure(0));
541 536 }
542 537
543 538 /*
544 539 * Deregister the memory for the Shared Receive Queue. If this fails
545 540 * for any reason, then it is an indication that something (either
546 541 * in HW or SW) has gone seriously wrong. So we print a warning
547 542 * message and return.
548 543 */
549 544 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
550 545 sleepflag);
551 546 if (status != DDI_SUCCESS) {
552 547 HERMON_WARNING(state, "failed to deregister SRQ memory");
553 548 return (IBT_FAILURE);
554 549 }
555 550
556 551 hermon_wrid_wqhdr_destroy(srq->srq_wq_wqhdr);
557 552
558 553 /* Free the memory for the SRQ */
559 554 hermon_queue_free(&srq->srq_wqinfo);
560 555
561 556 /* Free the dbr */
562 557 hermon_dbr_free(state, srq->srq_uarpg, srq->srq_wq_vdbr);
563 558
564 559 /* Free the Hermon SRQ Handle */
565 560 hermon_rsrc_free(state, &rsrc);
566 561
567 562 /* Free the SRQC entry resource */
568 563 hermon_rsrc_free(state, &srqc);
569 564
570 565 /* Decrement the reference count on the protection domain (PD) */
571 566 hermon_pd_refcnt_dec(pd);
572 567
573 568 /* Set the srqhdl pointer to NULL and return success */
574 569 *srqhdl = NULL;
575 570
576 571 return (DDI_SUCCESS);
577 572 }
578 573
579 574
580 575 /*
581 576 * hermon_srq_modify()
582 577 * Context: Can be called only from user or kernel context.
583 578 */
584 579 int
585 580 hermon_srq_modify(hermon_state_t *state, hermon_srqhdl_t srq, uint_t size,
586 581 uint_t *real_size, uint_t sleepflag)
587 582 {
588 583 hermon_qalloc_info_t new_srqinfo, old_srqinfo;
589 584 hermon_rsrc_t *mtt, *old_mtt;
590 585 hermon_bind_info_t bind;
591 586 hermon_bind_info_t old_bind;
592 587 hermon_mrhdl_t mr;
593 588 hermon_hw_srqc_t srqc_entry;
594 589 hermon_hw_dmpt_t mpt_entry;
595 590 uint64_t *wre_new, *wre_old;
596 591 uint64_t mtt_addr;
597 592 uint64_t srq_pgoffs;
598 593 uint64_t srq_desc_off;
599 594 uint32_t *buf, srq_old_bufsz;
600 595 uint32_t wqesz;
601 596 uint_t max_srq_size;
602 597 uint_t mtt_pgsize_bits;
603 598 uint_t log_srq_size, maxprot;
604 599 int status;
605 600
606 601 if ((state->hs_devlim.mod_wr_srq == 0) ||
607 602 (state->hs_cfg_profile->cp_srq_resize_enabled == 0))
608 603 return (IBT_NOT_SUPPORTED);
609 604
610 605 /*
611 606 * If size requested is larger than device capability, return
612 607 * Insufficient Resources
613 608 */
614 609 max_srq_size = (1 << state->hs_cfg_profile->cp_log_max_srq_sz);
615 610 if (size > max_srq_size) {
616 611 return (IBT_HCA_WR_EXCEEDED);
617 612 }
618 613
619 614 /*
620 615 * Calculate the appropriate size for the SRQ.
621 616 * Note: All Hermon SRQs must be a power-of-2 in size. Also
622 617 * they may not be any smaller than HERMON_SRQ_MIN_SIZE. This step
623 618 * is to round the requested size up to the next highest power-of-2
624 619 */
625 620 size = max(size, HERMON_SRQ_MIN_SIZE);
626 621 log_srq_size = highbit(size);
627 622 if (ISP2(size)) {
628 623 log_srq_size = log_srq_size - 1;
629 624 }
630 625
631 626 /*
632 627 * Next we verify that the rounded-up size is valid (i.e. consistent
633 628 * with the device limits and/or software-configured limits).
634 629 */
635 630 if (log_srq_size > state->hs_cfg_profile->cp_log_max_srq_sz) {
636 631 status = IBT_HCA_WR_EXCEEDED;
637 632 goto srqmodify_fail;
638 633 }
639 634
640 635 /*
641 636 * Allocate the memory for newly resized Shared Receive Queue.
642 637 *
643 638 * Note: If SRQ is not user-mappable, then it may come from either
644 639 * kernel system memory or from HCA-attached local DDR memory.
645 640 *
646 641 * Note2: We align this queue on a pagesize boundary. This is required
647 642 * to make sure that all the resulting IB addresses will start at 0,
648 643 * for a zero-based queue. By making sure we are aligned on at least a
649 644 * page, any offset we use into our queue will be the same as it was
650 645 * when we allocated it at hermon_srq_alloc() time.
651 646 */
652 647 wqesz = (1 << srq->srq_wq_log_wqesz);
653 648 new_srqinfo.qa_size = (1 << log_srq_size) * wqesz;
654 649 new_srqinfo.qa_alloc_align = PAGESIZE;
655 650 new_srqinfo.qa_bind_align = PAGESIZE;
656 651 if (srq->srq_is_umap) {
↓ open down ↓ |
123 lines elided |
↑ open up ↑ |
657 652 new_srqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
658 653 } else {
659 654 new_srqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
660 655 }
661 656 status = hermon_queue_alloc(state, &new_srqinfo, sleepflag);
662 657 if (status != DDI_SUCCESS) {
663 658 status = IBT_INSUFF_RESOURCE;
664 659 goto srqmodify_fail;
665 660 }
666 661 buf = (uint32_t *)new_srqinfo.qa_buf_aligned;
667 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf))
668 662
669 663 /*
670 664 * Allocate the memory for the new WRE list. This will be used later
671 665 * when we resize the wridlist based on the new SRQ size.
672 666 */
673 667 wre_new = kmem_zalloc((1 << log_srq_size) * sizeof (uint64_t),
674 668 sleepflag);
675 669 if (wre_new == NULL) {
676 670 status = IBT_INSUFF_RESOURCE;
677 671 goto srqmodify_fail;
678 672 }
679 673
680 674 /*
681 675 * Fill in the "bind" struct. This struct provides the majority
682 676 * of the information that will be used to distinguish between an
683 677 * "addr" binding (as is the case here) and a "buf" binding (see
684 678 * below). The "bind" struct is later passed to hermon_mr_mem_bind()
685 679 * which does most of the "heavy lifting" for the Hermon memory
686 680 * registration routines.
687 681 */
688 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(bind))
689 682 bzero(&bind, sizeof (hermon_bind_info_t));
690 683 bind.bi_type = HERMON_BINDHDL_VADDR;
691 684 bind.bi_addr = (uint64_t)(uintptr_t)buf;
692 685 bind.bi_len = new_srqinfo.qa_size;
693 686 bind.bi_as = NULL;
694 687 bind.bi_flags = sleepflag == HERMON_SLEEP ? IBT_MR_SLEEP :
695 688 IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
696 689 bind.bi_bypass = state->hs_cfg_profile->cp_iommu_bypass;
697 690
698 691 status = hermon_mr_mtt_bind(state, &bind, new_srqinfo.qa_dmahdl, &mtt,
699 692 &mtt_pgsize_bits, 0); /* no relaxed ordering */
700 693 if (status != DDI_SUCCESS) {
701 694 status = status;
702 695 kmem_free(wre_new, (1 << log_srq_size) *
703 696 sizeof (uint64_t));
704 697 hermon_queue_free(&new_srqinfo);
705 698 goto srqmodify_fail;
706 699 }
707 700
708 701 /*
709 702 * Calculate the offset between the kernel virtual address space
710 703 * and the IB virtual address space. This will be used when
711 704 * posting work requests to properly initialize each WQE.
712 705 *
713 706 * Note: bind addr is zero-based (from alloc) so we calculate the
714 707 * correct new offset here.
715 708 */
716 709 bind.bi_addr = bind.bi_addr & ((1 << mtt_pgsize_bits) - 1);
717 710 srq_desc_off = (uint64_t)(uintptr_t)new_srqinfo.qa_buf_aligned -
718 711 (uint64_t)bind.bi_addr;
719 712 srq_pgoffs = (uint_t)
720 713 ((uintptr_t)new_srqinfo.qa_buf_aligned & HERMON_PAGEOFFSET);
721 714
722 715 /*
723 716 * Fill in the MPT entry. This is the final step before passing
724 717 * ownership of the MPT entry to the Hermon hardware. We use all of
725 718 * the information collected/calculated above to fill in the
726 719 * requisite portions of the MPT.
727 720 */
728 721 bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
729 722 mpt_entry.reg_win_len = bind.bi_len;
730 723 mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
731 724 mpt_entry.mtt_addr_h = mtt_addr >> 32;
732 725 mpt_entry.mtt_addr_l = mtt_addr >> 3;
733 726
734 727 /*
735 728 * for hermon we build up a new srqc and pass that (partially filled
736 729 * to resize SRQ instead of modifying the (d)mpt directly
737 730 */
738 731
739 732
740 733
741 734 /*
742 735 * Now we grab the SRQ lock. Since we will be updating the actual
743 736 * SRQ location and the producer/consumer indexes, we should hold
744 737 * the lock.
745 738 *
746 739 * We do a HERMON_NOSLEEP here (and below), though, because we are
747 740 * holding the "srq_lock" and if we got raised to interrupt level
748 741 * by priority inversion, we would not want to block in this routine
749 742 * waiting for success.
750 743 */
751 744 mutex_enter(&srq->srq_lock);
752 745
753 746 /*
754 747 * Copy old entries to new buffer
755 748 */
756 749 srq_old_bufsz = srq->srq_wq_bufsz;
757 750 bcopy(srq->srq_wq_buf, buf, srq_old_bufsz * wqesz);
758 751
759 752 /*
760 753 * Setup MPT information for use in the MODIFY_MPT command
761 754 */
762 755 mr = srq->srq_mrhdl;
763 756 mutex_enter(&mr->mr_lock);
764 757
765 758 /*
766 759 * now, setup the srqc information needed for resize - limit the
767 760 * values, but use the same structure as the srqc
768 761 */
769 762
770 763 srqc_entry.log_srq_size = log_srq_size;
771 764 srqc_entry.page_offs = srq_pgoffs >> 6;
772 765 srqc_entry.log2_pgsz = mr->mr_log2_pgsz;
773 766 srqc_entry.mtt_base_addrl = (uint64_t)mtt_addr >> 32;
774 767 srqc_entry.mtt_base_addrh = mtt_addr >> 3;
775 768
776 769 /*
777 770 * RESIZE_SRQ
778 771 *
779 772 * If this fails for any reason, then it is an indication that
780 773 * something (either in HW or SW) has gone seriously wrong. So we
781 774 * print a warning message and return.
782 775 */
783 776 status = hermon_resize_srq_cmd_post(state, &srqc_entry,
784 777 srq->srq_srqnum, sleepflag);
785 778 if (status != HERMON_CMD_SUCCESS) {
786 779 cmn_err(CE_CONT, "Hermon: RESIZE_SRQ command failed: %08x\n",
787 780 status);
788 781 if (status == HERMON_CMD_INVALID_STATUS) {
789 782 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
790 783 }
791 784 (void) hermon_mr_mtt_unbind(state, &bind, mtt);
792 785 kmem_free(wre_new, (1 << log_srq_size) *
793 786 sizeof (uint64_t));
794 787 hermon_queue_free(&new_srqinfo);
795 788 mutex_exit(&mr->mr_lock);
796 789 mutex_exit(&srq->srq_lock);
797 790 return (ibc_get_ci_failure(0));
798 791 }
799 792 /*
800 793 * Update the Hermon Shared Receive Queue handle with all the new
801 794 * information. At the same time, save away all the necessary
802 795 * information for freeing up the old resources
803 796 */
804 797 old_srqinfo = srq->srq_wqinfo;
805 798 old_mtt = srq->srq_mrhdl->mr_mttrsrcp;
806 799 bcopy(&srq->srq_mrhdl->mr_bindinfo, &old_bind,
807 800 sizeof (hermon_bind_info_t));
808 801
809 802 /* Now set the new info */
810 803 srq->srq_wqinfo = new_srqinfo;
811 804 srq->srq_wq_buf = buf;
812 805 srq->srq_wq_bufsz = (1 << log_srq_size);
813 806 bcopy(&bind, &srq->srq_mrhdl->mr_bindinfo, sizeof (hermon_bind_info_t));
814 807 srq->srq_mrhdl->mr_mttrsrcp = mtt;
815 808 srq->srq_desc_off = srq_desc_off;
816 809 srq->srq_real_sizes.srq_wr_sz = (1 << log_srq_size);
817 810
818 811 /* Update MR mtt pagesize */
819 812 mr->mr_logmttpgsz = mtt_pgsize_bits;
820 813 mutex_exit(&mr->mr_lock);
821 814
822 815 /*
823 816 * Initialize new wridlist, if needed.
824 817 *
825 818 * If a wridlist already is setup on an SRQ (the QP associated with an
826 819 * SRQ has moved "from_reset") then we must update this wridlist based
827 820 * on the new SRQ size. We allocate the new size of Work Request ID
828 821 * Entries, copy over the old entries to the new list, and
829 822 * re-initialize the srq wridlist in non-umap case
830 823 */
831 824 wre_old = srq->srq_wq_wqhdr->wq_wrid;
832 825
833 826 bcopy(wre_old, wre_new, srq_old_bufsz * sizeof (uint64_t));
834 827
835 828 /* Setup new sizes in wre */
836 829 srq->srq_wq_wqhdr->wq_wrid = wre_new;
837 830
838 831 /*
839 832 * If "old" SRQ was a user-mappable SRQ that is currently mmap()'d out
840 833 * to a user process, then we need to call devmap_devmem_remap() to
841 834 * invalidate the mapping to the SRQ memory. We also need to
842 835 * invalidate the SRQ tracking information for the user mapping.
843 836 *
844 837 * Note: On failure, the remap really shouldn't ever happen. So, if it
845 838 * does, it is an indication that something has gone seriously wrong.
846 839 * So we print a warning message and return error (knowing, of course,
847 840 * that the "old" SRQ memory will be leaked)
848 841 */
849 842 if ((srq->srq_is_umap) && (srq->srq_umap_dhp != NULL)) {
850 843 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
851 844 status = devmap_devmem_remap(srq->srq_umap_dhp,
852 845 state->hs_dip, 0, 0, srq->srq_wqinfo.qa_size, maxprot,
853 846 DEVMAP_MAPPING_INVALID, NULL);
854 847 if (status != DDI_SUCCESS) {
855 848 mutex_exit(&srq->srq_lock);
856 849 HERMON_WARNING(state, "failed in SRQ memory "
857 850 "devmap_devmem_remap()");
858 851 /* We can, however, free the memory for old wre */
859 852 kmem_free(wre_old, srq_old_bufsz * sizeof (uint64_t));
860 853 return (ibc_get_ci_failure(0));
861 854 }
862 855 srq->srq_umap_dhp = (devmap_cookie_t)NULL;
863 856 }
864 857
865 858 /*
866 859 * Drop the SRQ lock now. The only thing left to do is to free up
867 860 * the old resources.
868 861 */
869 862 mutex_exit(&srq->srq_lock);
870 863
871 864 /*
872 865 * Unbind the MTT entries.
873 866 */
874 867 status = hermon_mr_mtt_unbind(state, &old_bind, old_mtt);
875 868 if (status != DDI_SUCCESS) {
876 869 HERMON_WARNING(state, "failed to unbind old SRQ memory");
877 870 status = ibc_get_ci_failure(0);
878 871 goto srqmodify_fail;
879 872 }
880 873
881 874 /* Free the memory for old wre */
882 875 kmem_free(wre_old, srq_old_bufsz * sizeof (uint64_t));
883 876
884 877 /* Free the memory for the old SRQ */
885 878 hermon_queue_free(&old_srqinfo);
886 879
887 880 /*
888 881 * Fill in the return arguments (if necessary). This includes the
889 882 * real new completion queue size.
890 883 */
891 884 if (real_size != NULL) {
892 885 *real_size = (1 << log_srq_size);
893 886 }
894 887
895 888 return (DDI_SUCCESS);
896 889
897 890 srqmodify_fail:
898 891 return (status);
899 892 }
900 893
901 894
902 895 /*
903 896 * hermon_srq_refcnt_inc()
904 897 * Context: Can be called from interrupt or base context.
905 898 */
906 899 void
907 900 hermon_srq_refcnt_inc(hermon_srqhdl_t srq)
908 901 {
909 902 mutex_enter(&srq->srq_lock);
910 903 srq->srq_refcnt++;
911 904 mutex_exit(&srq->srq_lock);
912 905 }
913 906
914 907
915 908 /*
916 909 * hermon_srq_refcnt_dec()
917 910 * Context: Can be called from interrupt or base context.
918 911 */
919 912 void
920 913 hermon_srq_refcnt_dec(hermon_srqhdl_t srq)
921 914 {
922 915 mutex_enter(&srq->srq_lock);
923 916 srq->srq_refcnt--;
924 917 mutex_exit(&srq->srq_lock);
925 918 }
926 919
927 920
928 921 /*
929 922 * hermon_srqhdl_from_srqnum()
930 923 * Context: Can be called from interrupt or base context.
931 924 *
932 925 * This routine is important because changing the unconstrained
933 926 * portion of the SRQ number is critical to the detection of a
934 927 * potential race condition in the SRQ handler code (i.e. the case
935 928 * where a SRQ is freed and alloc'd again before an event for the
936 929 * "old" SRQ can be handled).
937 930 *
938 931 * While this is not a perfect solution (not sure that one exists)
939 932 * it does help to mitigate the chance that this race condition will
940 933 * cause us to deliver a "stale" event to the new SRQ owner. Note:
941 934 * this solution does not scale well because the number of constrained
942 935 * bits increases (and, hence, the number of unconstrained bits
943 936 * decreases) as the number of supported SRQ grows. For small and
944 937 * intermediate values, it should hopefully provide sufficient
945 938 * protection.
946 939 */
947 940 hermon_srqhdl_t
948 941 hermon_srqhdl_from_srqnum(hermon_state_t *state, uint_t srqnum)
949 942 {
950 943 uint_t srqindx, srqmask;
951 944
952 945 /* Calculate the SRQ table index from the srqnum */
953 946 srqmask = (1 << state->hs_cfg_profile->cp_log_num_srq) - 1;
954 947 srqindx = srqnum & srqmask;
955 948 return (hermon_icm_num_to_hdl(state, HERMON_SRQC, srqindx));
956 949 }
957 950
958 951
959 952 /*
960 953 * hermon_srq_sgl_to_logwqesz()
961 954 * Context: Can be called from interrupt or base context.
962 955 */
963 956 static void
964 957 hermon_srq_sgl_to_logwqesz(hermon_state_t *state, uint_t num_sgl,
965 958 hermon_qp_wq_type_t wq_type, uint_t *logwqesz, uint_t *max_sgl)
966 959 {
967 960 uint_t max_size, log2, actual_sgl;
968 961
969 962 switch (wq_type) {
970 963 case HERMON_QP_WQ_TYPE_RECVQ:
971 964 /*
972 965 * Use requested maximum SGL to calculate max descriptor size
973 966 * (while guaranteeing that the descriptor size is a
974 967 * power-of-2 cachelines).
975 968 */
976 969 max_size = (HERMON_QP_WQE_MLX_SRQ_HDRS + (num_sgl << 4));
977 970 log2 = highbit(max_size);
978 971 if (ISP2(max_size)) {
979 972 log2 = log2 - 1;
980 973 }
981 974
982 975 /* Make sure descriptor is at least the minimum size */
983 976 log2 = max(log2, HERMON_QP_WQE_LOG_MINIMUM);
984 977
985 978 /* Calculate actual number of SGL (given WQE size) */
986 979 actual_sgl = ((1 << log2) - HERMON_QP_WQE_MLX_SRQ_HDRS) >> 4;
987 980 break;
988 981
989 982 default:
990 983 HERMON_WARNING(state, "unexpected work queue type");
991 984 break;
992 985 }
993 986
994 987 /* Fill in the return values */
995 988 *logwqesz = log2;
996 989 *max_sgl = min(state->hs_cfg_profile->cp_srq_max_sgl, actual_sgl);
997 990 }
↓ open down ↓ |
299 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX