1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * hermon_cq.c
28 * Hermon Completion Queue Processing Routines
29 *
30 * Implements all the routines necessary for allocating, freeing, resizing,
31 * and handling the completion type events that the Hermon hardware can
32 * generate.
33 */
34
35 #include <sys/types.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/modctl.h>
40 #include <sys/bitmap.h>
41 #include <sys/sysmacros.h>
42
43 #include <sys/ib/adapters/hermon/hermon.h>
44
45 int hermon_should_panic = 0; /* debugging aid */
46
47 #define hermon_cq_update_ci_doorbell(cq) \
48 /* Build the doorbell record data (low 24 bits only) */ \
49 HERMON_UAR_DB_RECORD_WRITE(cq->cq_arm_ci_vdbr, \
50 cq->cq_consindx & 0x00FFFFFF)
51
52 static int hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq,
53 uint_t cmd);
54 #pragma inline(hermon_cq_arm_doorbell)
55 static void hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr);
56 #pragma inline(hermon_arm_cq_dbr_init)
57 static void hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
58 hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
59 static void hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
60 hermon_hw_cqe_t *cqe, ibt_wc_t *wc);
61
62
63 /*
64 * hermon_cq_alloc()
65 * Context: Can be called only from user or kernel context.
66 */
67 int
68 hermon_cq_alloc(hermon_state_t *state, ibt_cq_hdl_t ibt_cqhdl,
69 ibt_cq_attr_t *cq_attr, uint_t *actual_size, hermon_cqhdl_t *cqhdl,
70 uint_t sleepflag)
71 {
72 hermon_rsrc_t *cqc, *rsrc;
73 hermon_umap_db_entry_t *umapdb;
74 hermon_hw_cqc_t cqc_entry;
75 hermon_cqhdl_t cq;
76 ibt_mr_attr_t mr_attr;
77 hermon_mr_options_t op;
78 hermon_pdhdl_t pd;
79 hermon_mrhdl_t mr;
80 hermon_hw_cqe_t *buf;
81 uint64_t value;
82 uint32_t log_cq_size, uarpg;
83 uint_t cq_is_umap;
84 uint32_t status, flag;
85 hermon_cq_sched_t *cq_schedp;
86
87 /*
88 * Determine whether CQ is being allocated for userland access or
89 * whether it is being allocated for kernel access. If the CQ is
90 * being allocated for userland access, then lookup the UAR
91 * page number for the current process. Note: If this is not found
92 * (e.g. if the process has not previously open()'d the Hermon driver),
93 * then an error is returned.
94 */
95 cq_is_umap = (cq_attr->cq_flags & IBT_CQ_USER_MAP) ? 1 : 0;
96 if (cq_is_umap) {
97 status = hermon_umap_db_find(state->hs_instance, ddi_get_pid(),
98 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
99 if (status != DDI_SUCCESS) {
100 status = IBT_INVALID_PARAM;
101 goto cqalloc_fail;
102 }
103 uarpg = ((hermon_rsrc_t *)(uintptr_t)value)->hr_indx;
104 } else {
105 uarpg = state->hs_kernel_uar_index;
106 }
107
108 /* Use the internal protection domain (PD) for setting up CQs */
109 pd = state->hs_pdhdl_internal;
110
111 /* Increment the reference count on the protection domain (PD) */
112 hermon_pd_refcnt_inc(pd);
113
114 /*
115 * Allocate an CQ context entry. This will be filled in with all
116 * the necessary parameters to define the Completion Queue. And then
117 * ownership will be passed to the hardware in the final step
118 * below. If we fail here, we must undo the protection domain
119 * reference count.
120 */
121 status = hermon_rsrc_alloc(state, HERMON_CQC, 1, sleepflag, &cqc);
122 if (status != DDI_SUCCESS) {
123 status = IBT_INSUFF_RESOURCE;
124 goto cqalloc_fail1;
125 }
126
127 /*
128 * Allocate the software structure for tracking the completion queue
129 * (i.e. the Hermon Completion Queue handle). If we fail here, we must
130 * undo the protection domain reference count and the previous
131 * resource allocation.
132 */
133 status = hermon_rsrc_alloc(state, HERMON_CQHDL, 1, sleepflag, &rsrc);
134 if (status != DDI_SUCCESS) {
135 status = IBT_INSUFF_RESOURCE;
136 goto cqalloc_fail2;
137 }
138 cq = (hermon_cqhdl_t)rsrc->hr_addr;
139 cq->cq_is_umap = cq_is_umap;
140 cq->cq_cqnum = cqc->hr_indx; /* just use index, implicit in Hermon */
141 cq->cq_intmod_count = 0;
142 cq->cq_intmod_usec = 0;
143
144 /*
145 * If this will be a user-mappable CQ, then allocate an entry for
146 * the "userland resources database". This will later be added to
147 * the database (after all further CQ operations are successful).
148 * If we fail here, we must undo the reference counts and the
149 * previous resource allocation.
150 */
151 if (cq->cq_is_umap) {
152 umapdb = hermon_umap_db_alloc(state->hs_instance, cq->cq_cqnum,
153 MLNX_UMAP_CQMEM_RSRC, (uint64_t)(uintptr_t)rsrc);
154 if (umapdb == NULL) {
155 status = IBT_INSUFF_RESOURCE;
156 goto cqalloc_fail3;
157 }
158 }
159
160
161 /*
162 * Allocate the doorbell record. We'll need one for the CQ, handling
163 * both consumer index (SET CI) and the CQ state (CQ ARM).
164 */
165
166 status = hermon_dbr_alloc(state, uarpg, &cq->cq_arm_ci_dbr_acchdl,
167 &cq->cq_arm_ci_vdbr, &cq->cq_arm_ci_pdbr, &cq->cq_dbr_mapoffset);
168 if (status != DDI_SUCCESS) {
169 status = IBT_INSUFF_RESOURCE;
170 goto cqalloc_fail4;
171 }
172
173 /*
174 * Calculate the appropriate size for the completion queue.
175 * Note: All Hermon CQs must be a power-of-2 minus 1 in size. Also
176 * they may not be any smaller than HERMON_CQ_MIN_SIZE. This step is
177 * to round the requested size up to the next highest power-of-2
178 */
179 cq_attr->cq_size = max(cq_attr->cq_size, HERMON_CQ_MIN_SIZE);
180 log_cq_size = highbit(cq_attr->cq_size);
181
182 /*
183 * Next we verify that the rounded-up size is valid (i.e. consistent
184 * with the device limits and/or software-configured limits)
185 */
186 if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
187 status = IBT_HCA_CQ_EXCEEDED;
188 goto cqalloc_fail4a;
189 }
190
191 /*
192 * Allocate the memory for Completion Queue.
193 *
194 * Note: Although we use the common queue allocation routine, we
195 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
196 * kernel system memory) for kernel CQs because it would be
197 * inefficient to have CQs located in DDR memory. This is primarily
198 * because CQs are read from (by software) more than they are written
199 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
200 * user-mappable CQs for a similar reason.)
201 * It is also worth noting that, unlike Hermon QP work queues,
202 * completion queues do not have the same strict alignment
203 * requirements. It is sufficient for the CQ memory to be both
204 * aligned to and bound to addresses which are a multiple of CQE size.
205 */
206 cq->cq_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
207
208 cq->cq_cqinfo.qa_alloc_align = PAGESIZE;
209 cq->cq_cqinfo.qa_bind_align = PAGESIZE;
210 if (cq->cq_is_umap) {
211 cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
212 } else {
213 cq->cq_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
214 hermon_arm_cq_dbr_init(cq->cq_arm_ci_vdbr);
215 }
216 status = hermon_queue_alloc(state, &cq->cq_cqinfo, sleepflag);
217 if (status != DDI_SUCCESS) {
218 status = IBT_INSUFF_RESOURCE;
219 goto cqalloc_fail4;
220 }
221 buf = (hermon_hw_cqe_t *)cq->cq_cqinfo.qa_buf_aligned;
222
223 /*
224 * The ownership bit of the CQE's is set by the HW during the process
225 * of transferrring ownership of the CQ (PRM 09.35c, 14.2.1, note D1
226 *
227 */
228
229 /*
230 * Register the memory for the CQ. The memory for the CQ must
231 * be registered in the Hermon TPT tables. This gives us the LKey
232 * to specify in the CQ context below. Note: If this is a user-
233 * mappable CQ, then we will force DDI_DMA_CONSISTENT mapping.
234 */
235 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
236 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
237 mr_attr.mr_len = cq->cq_cqinfo.qa_size;
238 mr_attr.mr_as = NULL;
239 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
240 op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
241 op.mro_bind_dmahdl = cq->cq_cqinfo.qa_dmahdl;
242 op.mro_bind_override_addr = 0;
243 status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
244 HERMON_CQ_CMPT);
245 if (status != DDI_SUCCESS) {
246 status = IBT_INSUFF_RESOURCE;
247 goto cqalloc_fail5;
248 }
249
250 cq->cq_erreqnum = HERMON_CQ_ERREQNUM_GET(state);
251 if (cq_attr->cq_flags & IBT_CQ_HID) {
252 if (!HERMON_HID_VALID(state, cq_attr->cq_hid)) {
253 IBTF_DPRINTF_L2("CQalloc", "bad handler id 0x%x",
254 cq_attr->cq_hid);
255 status = IBT_INVALID_PARAM;
256 goto cqalloc_fail5;
257 }
258 cq->cq_eqnum = HERMON_HID_TO_EQNUM(state, cq_attr->cq_hid);
259 IBTF_DPRINTF_L2("cqalloc", "hid: eqn %d", cq->cq_eqnum);
260 } else {
261 cq_schedp = (hermon_cq_sched_t *)cq_attr->cq_sched;
262 if (cq_schedp == NULL) {
263 cq_schedp = &state->hs_cq_sched_default;
264 } else if (cq_schedp != &state->hs_cq_sched_default) {
265 int i;
266 hermon_cq_sched_t *tmp;
267
268 tmp = state->hs_cq_sched_array;
269 for (i = 0; i < state->hs_cq_sched_array_size; i++)
270 if (cq_schedp == &tmp[i])
271 break; /* found it */
272 if (i >= state->hs_cq_sched_array_size) {
273 cmn_err(CE_CONT, "!Invalid cq_sched argument: "
274 "ignored\n");
275 cq_schedp = &state->hs_cq_sched_default;
276 }
277 }
278 cq->cq_eqnum = HERMON_HID_TO_EQNUM(state,
279 HERMON_CQSCHED_NEXT_HID(cq_schedp));
280 IBTF_DPRINTF_L2("cqalloc", "sched: first-1 %d, len %d, "
281 "eqn %d", cq_schedp->cqs_start_hid - 1,
282 cq_schedp->cqs_len, cq->cq_eqnum);
283 }
284
285 /*
286 * Fill in the CQC entry. This is the final step before passing
287 * ownership of the CQC entry to the Hermon hardware. We use all of
288 * the information collected/calculated above to fill in the
289 * requisite portions of the CQC. Note: If this CQ is going to be
290 * used for userland access, then we need to set the UAR page number
291 * appropriately (otherwise it's a "don't care")
292 */
293 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
294
295 cqc_entry.state = HERMON_CQ_DISARMED;
296 cqc_entry.pg_offs = cq->cq_cqinfo.qa_pgoffs >> 5;
297 cqc_entry.log_cq_sz = log_cq_size;
298 cqc_entry.usr_page = uarpg;
299 cqc_entry.c_eqn = cq->cq_eqnum;
300 cqc_entry.log2_pgsz = mr->mr_log2_pgsz;
301 cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
302 cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
303 cqc_entry.dbr_addrh = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 32);
304 cqc_entry.dbr_addrl = (uint32_t)((uint64_t)cq->cq_arm_ci_pdbr >> 3);
305
306 /*
307 * Write the CQC entry to hardware - we pass ownership of
308 * the entry to the hardware (using the Hermon SW2HW_CQ firmware
309 * command). Note: In general, this operation shouldn't fail. But
310 * if it does, we have to undo everything we've done above before
311 * returning error.
312 */
313 status = hermon_cmn_ownership_cmd_post(state, SW2HW_CQ, &cqc_entry,
314 sizeof (hermon_hw_cqc_t), cq->cq_cqnum, sleepflag);
315 if (status != HERMON_CMD_SUCCESS) {
316 cmn_err(CE_CONT, "Hermon: SW2HW_CQ command failed: %08x\n",
317 status);
318 if (status == HERMON_CMD_INVALID_STATUS) {
319 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
320 }
321 status = ibc_get_ci_failure(0);
322 goto cqalloc_fail6;
323 }
324
325 /*
326 * Fill in the rest of the Hermon Completion Queue handle. Having
327 * successfully transferred ownership of the CQC, we can update the
328 * following fields for use in further operations on the CQ.
329 */
330 cq->cq_resize_hdl = 0;
331 cq->cq_cqcrsrcp = cqc;
332 cq->cq_rsrcp = rsrc;
333 cq->cq_consindx = 0;
334 /* least restrictive */
335 cq->cq_buf = buf;
336 cq->cq_bufsz = (1 << log_cq_size);
337 cq->cq_log_cqsz = log_cq_size;
338 cq->cq_mrhdl = mr;
339 cq->cq_refcnt = 0;
340 cq->cq_is_special = 0;
341 cq->cq_uarpg = uarpg;
342 cq->cq_umap_dhp = (devmap_cookie_t)NULL;
343 avl_create(&cq->cq_wrid_wqhdr_avl_tree, hermon_wrid_workq_compare,
344 sizeof (struct hermon_workq_avl_s),
345 offsetof(struct hermon_workq_avl_s, wqa_link));
346
347 cq->cq_hdlrarg = (void *)ibt_cqhdl;
348
349 /*
350 * Put CQ handle in Hermon CQNum-to-CQHdl list. Then fill in the
351 * "actual_size" and "cqhdl" and return success
352 */
353 hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, cq);
354
355 /*
356 * If this is a user-mappable CQ, then we need to insert the previously
357 * allocated entry into the "userland resources database". This will
358 * allow for later lookup during devmap() (i.e. mmap()) calls.
359 */
360 if (cq->cq_is_umap) {
361 hermon_umap_db_add(umapdb);
362 }
363
364 /*
365 * Fill in the return arguments (if necessary). This includes the
366 * real completion queue size.
367 */
368 if (actual_size != NULL) {
369 *actual_size = (1 << log_cq_size) - 1;
370 }
371 *cqhdl = cq;
372
373 return (DDI_SUCCESS);
374
375 /*
376 * The following is cleanup for all possible failure cases in this routine
377 */
378 cqalloc_fail6:
379 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
380 sleepflag) != DDI_SUCCESS) {
381 HERMON_WARNING(state, "failed to deregister CQ memory");
382 }
383 cqalloc_fail5:
384 hermon_queue_free(&cq->cq_cqinfo);
385 cqalloc_fail4a:
386 hermon_dbr_free(state, uarpg, cq->cq_arm_ci_vdbr);
387 cqalloc_fail4:
388 if (cq_is_umap) {
389 hermon_umap_db_free(umapdb);
390 }
391 cqalloc_fail3:
392 hermon_rsrc_free(state, &rsrc);
393 cqalloc_fail2:
394 hermon_rsrc_free(state, &cqc);
395 cqalloc_fail1:
396 hermon_pd_refcnt_dec(pd);
397 cqalloc_fail:
398 return (status);
399 }
400
401
402 /*
403 * hermon_cq_free()
404 * Context: Can be called only from user or kernel context.
405 */
406 /* ARGSUSED */
407 int
408 hermon_cq_free(hermon_state_t *state, hermon_cqhdl_t *cqhdl, uint_t sleepflag)
409 {
410 hermon_rsrc_t *cqc, *rsrc;
411 hermon_umap_db_entry_t *umapdb;
412 hermon_hw_cqc_t cqc_entry;
413 hermon_pdhdl_t pd;
414 hermon_mrhdl_t mr;
415 hermon_cqhdl_t cq, resize;
416 uint32_t cqnum;
417 uint64_t value;
418 uint_t maxprot;
419 int status;
420
421 /*
422 * Pull all the necessary information from the Hermon Completion Queue
423 * handle. This is necessary here because the resource for the
424 * CQ handle is going to be freed up as part of this operation.
425 */
426 cq = *cqhdl;
427 mutex_enter(&cq->cq_lock);
428 cqc = cq->cq_cqcrsrcp;
429 rsrc = cq->cq_rsrcp;
430 pd = state->hs_pdhdl_internal;
431 mr = cq->cq_mrhdl;
432 cqnum = cq->cq_cqnum;
433
434 resize = cq->cq_resize_hdl; /* save the handle for later */
435
436 /*
437 * If there are work queues still associated with the CQ, then return
438 * an error. Otherwise, we will be holding the CQ lock.
439 */
440 if (cq->cq_refcnt != 0) {
441 mutex_exit(&cq->cq_lock);
442 return (IBT_CQ_BUSY);
443 }
444
445 /*
446 * If this was a user-mappable CQ, then we need to remove its entry
447 * from the "userland resources database". If it is also currently
448 * mmap()'d out to a user process, then we need to call
449 * devmap_devmem_remap() to remap the CQ memory to an invalid mapping.
450 * We also need to invalidate the CQ tracking information for the
451 * user mapping.
452 */
453 if (cq->cq_is_umap) {
454 status = hermon_umap_db_find(state->hs_instance, cqnum,
455 MLNX_UMAP_CQMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
456 &umapdb);
457 if (status != DDI_SUCCESS) {
458 mutex_exit(&cq->cq_lock);
459 HERMON_WARNING(state, "failed to find in database");
460 return (ibc_get_ci_failure(0));
461 }
462 hermon_umap_db_free(umapdb);
463 if (cq->cq_umap_dhp != NULL) {
464 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
465 status = devmap_devmem_remap(cq->cq_umap_dhp,
466 state->hs_dip, 0, 0, cq->cq_cqinfo.qa_size,
467 maxprot, DEVMAP_MAPPING_INVALID, NULL);
468 if (status != DDI_SUCCESS) {
469 mutex_exit(&cq->cq_lock);
470 HERMON_WARNING(state, "failed in CQ memory "
471 "devmap_devmem_remap()");
472 return (ibc_get_ci_failure(0));
473 }
474 cq->cq_umap_dhp = (devmap_cookie_t)NULL;
475 }
476 }
477
478 /*
479 * Put NULL into the Arbel CQNum-to-CQHdl list. This will allow any
480 * in-progress events to detect that the CQ corresponding to this
481 * number has been freed.
482 */
483 hermon_icm_set_num_to_hdl(state, HERMON_CQC, cqc->hr_indx, NULL);
484
485 mutex_exit(&cq->cq_lock);
486
487 /*
488 * Reclaim CQC entry from hardware (using the Hermon HW2SW_CQ
489 * firmware command). If the ownership transfer fails for any reason,
490 * then it is an indication that something (either in HW or SW) has
491 * gone seriously wrong.
492 */
493 status = hermon_cmn_ownership_cmd_post(state, HW2SW_CQ, &cqc_entry,
494 sizeof (hermon_hw_cqc_t), cqnum, sleepflag);
495 if (status != HERMON_CMD_SUCCESS) {
496 HERMON_WARNING(state, "failed to reclaim CQC ownership");
497 cmn_err(CE_CONT, "Hermon: HW2SW_CQ command failed: %08x\n",
498 status);
499 if (status == HERMON_CMD_INVALID_STATUS) {
500 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
501 }
502 return (ibc_get_ci_failure(0));
503 }
504
505 /*
506 * From here on, we start reliquishing resources - but check to see
507 * if a resize was in progress - if so, we need to relinquish those
508 * resources as well
509 */
510
511
512 /*
513 * Deregister the memory for the Completion Queue. If this fails
514 * for any reason, then it is an indication that something (either
515 * in HW or SW) has gone seriously wrong. So we print a warning
516 * message and return.
517 */
518 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
519 sleepflag);
520 if (status != DDI_SUCCESS) {
521 HERMON_WARNING(state, "failed to deregister CQ memory");
522 return (ibc_get_ci_failure(0));
523 }
524
525 if (resize) { /* there was a pointer to a handle */
526 mr = resize->cq_mrhdl; /* reuse the pointer to the region */
527 status = hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
528 sleepflag);
529 if (status != DDI_SUCCESS) {
530 HERMON_WARNING(state, "failed to deregister resize CQ "
531 "memory");
532 return (ibc_get_ci_failure(0));
533 }
534 }
535
536 /* Free the memory for the CQ */
537 hermon_queue_free(&cq->cq_cqinfo);
538 if (resize) {
539 hermon_queue_free(&resize->cq_cqinfo);
540 /* and the temporary handle */
541 kmem_free(resize, sizeof (struct hermon_sw_cq_s));
542 }
543
544 /* everything else does not matter for the resize in progress */
545
546 /* Free the dbr */
547 hermon_dbr_free(state, cq->cq_uarpg, cq->cq_arm_ci_vdbr);
548
549 /* Free the Hermon Completion Queue handle */
550 hermon_rsrc_free(state, &rsrc);
551
552 /* Free up the CQC entry resource */
553 hermon_rsrc_free(state, &cqc);
554
555 /* Decrement the reference count on the protection domain (PD) */
556 hermon_pd_refcnt_dec(pd);
557
558 /* Set the cqhdl pointer to NULL and return success */
559 *cqhdl = NULL;
560
561 return (DDI_SUCCESS);
562 }
563
564
565 /*
566 * hermon_cq_resize()
567 * Context: Can be called only from user or kernel context.
568 */
569 int
570 hermon_cq_resize(hermon_state_t *state, hermon_cqhdl_t cq, uint_t req_size,
571 uint_t *actual_size, uint_t sleepflag)
572 {
573 hermon_hw_cqc_t cqc_entry;
574 hermon_cqhdl_t resize_hdl;
575 hermon_qalloc_info_t new_cqinfo;
576 ibt_mr_attr_t mr_attr;
577 hermon_mr_options_t op;
578 hermon_pdhdl_t pd;
579 hermon_mrhdl_t mr;
580 hermon_hw_cqe_t *buf;
581 uint32_t new_prod_indx;
582 uint_t log_cq_size;
583 int status, flag;
584
585 if (cq->cq_resize_hdl != 0) { /* already in process */
586 status = IBT_CQ_BUSY;
587 goto cqresize_fail;
588 }
589
590
591 /* Use the internal protection domain (PD) for CQs */
592 pd = state->hs_pdhdl_internal;
593
594 /*
595 * Calculate the appropriate size for the new resized completion queue.
596 * Note: All Hermon CQs must be a power-of-2 minus 1 in size. Also
597 * they may not be any smaller than HERMON_CQ_MIN_SIZE. This step is
598 * to round the requested size up to the next highest power-of-2
599 */
600 req_size = max(req_size, HERMON_CQ_MIN_SIZE);
601 log_cq_size = highbit(req_size);
602
603 /*
604 * Next we verify that the rounded-up size is valid (i.e. consistent
605 * with the device limits and/or software-configured limits)
606 */
607 if (log_cq_size > state->hs_cfg_profile->cp_log_max_cq_sz) {
608 status = IBT_HCA_CQ_EXCEEDED;
609 goto cqresize_fail;
610 }
611
612 /*
613 * Allocate the memory for newly resized Completion Queue.
614 *
615 * Note: Although we use the common queue allocation routine, we
616 * always specify HERMON_QUEUE_LOCATION_NORMAL (i.e. CQ located in
617 * kernel system memory) for kernel CQs because it would be
618 * inefficient to have CQs located in DDR memory. This is the same
619 * as we do when we first allocate completion queues primarily
620 * because CQs are read from (by software) more than they are written
621 * to. (We always specify HERMON_QUEUE_LOCATION_USERLAND for all
622 * user-mappable CQs for a similar reason.)
623 * It is also worth noting that, unlike Hermon QP work queues,
624 * completion queues do not have the same strict alignment
625 * requirements. It is sufficient for the CQ memory to be both
626 * aligned to and bound to addresses which are a multiple of CQE size.
627 */
628
629 /* first, alloc the resize_handle */
630 resize_hdl = kmem_zalloc(sizeof (struct hermon_sw_cq_s), KM_SLEEP);
631
632 new_cqinfo.qa_size = (1 << log_cq_size) * sizeof (hermon_hw_cqe_t);
633 new_cqinfo.qa_alloc_align = PAGESIZE;
634 new_cqinfo.qa_bind_align = PAGESIZE;
635 if (cq->cq_is_umap) {
636 new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_USERLAND;
637 } else {
638 new_cqinfo.qa_location = HERMON_QUEUE_LOCATION_NORMAL;
639 }
640 status = hermon_queue_alloc(state, &new_cqinfo, sleepflag);
641 if (status != DDI_SUCCESS) {
642 /* free the resize handle */
643 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
644 status = IBT_INSUFF_RESOURCE;
645 goto cqresize_fail;
646 }
647 buf = (hermon_hw_cqe_t *)new_cqinfo.qa_buf_aligned;
648
649 /*
650 * No initialization of the cq is needed - the command will do it
651 */
652
653 /*
654 * Register the memory for the CQ. The memory for the CQ must
655 * be registered in the Hermon TPT tables. This gives us the LKey
656 * to specify in the CQ context below.
657 */
658 flag = (sleepflag == HERMON_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
659 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf;
660 mr_attr.mr_len = new_cqinfo.qa_size;
661 mr_attr.mr_as = NULL;
662 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE;
663 op.mro_bind_type = state->hs_cfg_profile->cp_iommu_bypass;
664 op.mro_bind_dmahdl = new_cqinfo.qa_dmahdl;
665 op.mro_bind_override_addr = 0;
666 status = hermon_mr_register(state, pd, &mr_attr, &mr, &op,
667 HERMON_CQ_CMPT);
668 if (status != DDI_SUCCESS) {
669 hermon_queue_free(&new_cqinfo);
670 /* free the resize handle */
671 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
672 status = IBT_INSUFF_RESOURCE;
673 goto cqresize_fail;
674 }
675
676 /*
677 * Now we grab the CQ lock. Since we will be updating the actual
678 * CQ location and the producer/consumer indexes, we should hold
679 * the lock.
680 *
681 * We do a ARBEL_NOSLEEP here (and below), though, because we are
682 * holding the "cq_lock" and if we got raised to interrupt level
683 * by priority inversion, we would not want to block in this routine
684 * waiting for success.
685 */
686 mutex_enter(&cq->cq_lock);
687
688 /*
689 * Fill in the CQC entry. For the resize operation this is the
690 * final step before attempting the resize operation on the CQC entry.
691 * We use all of the information collected/calculated above to fill
692 * in the requisite portions of the CQC.
693 */
694 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
695 cqc_entry.log_cq_sz = log_cq_size;
696 cqc_entry.pg_offs = new_cqinfo.qa_pgoffs >> 5;
697 cqc_entry.log2_pgsz = mr->mr_log2_pgsz;
698 cqc_entry.mtt_base_addh = (uint32_t)((mr->mr_mttaddr >> 32) & 0xFF);
699 cqc_entry.mtt_base_addl = mr->mr_mttaddr >> 3;
700
701 /*
702 * Write the CQC entry to hardware. Lastly, we pass ownership of
703 * the entry to the hardware (using the Hermon RESIZE_CQ firmware
704 * command). Note: In general, this operation shouldn't fail. But
705 * if it does, we have to undo everything we've done above before
706 * returning error. Also note that the status returned may indicate
707 * the code to return to the IBTF.
708 */
709 status = hermon_resize_cq_cmd_post(state, &cqc_entry, cq->cq_cqnum,
710 &new_prod_indx, HERMON_CMD_NOSLEEP_SPIN);
711 if (status != HERMON_CMD_SUCCESS) {
712 /* Resize attempt has failed, drop CQ lock and cleanup */
713 mutex_exit(&cq->cq_lock);
714 if (hermon_mr_deregister(state, &mr, HERMON_MR_DEREG_ALL,
715 sleepflag) != DDI_SUCCESS) {
716 HERMON_WARNING(state, "failed to deregister CQ memory");
717 }
718 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
719 hermon_queue_free(&new_cqinfo);
720 if (status == HERMON_CMD_BAD_SIZE) {
721 return (IBT_CQ_SZ_INSUFFICIENT);
722 } else {
723 cmn_err(CE_CONT, "Hermon: RESIZE_CQ command failed: "
724 "%08x\n", status);
725 if (status == HERMON_CMD_INVALID_STATUS) {
726 hermon_fm_ereport(state, HCA_SYS_ERR,
727 HCA_ERR_SRV_LOST);
728 }
729 return (ibc_get_ci_failure(0));
730 }
731 }
732
733 /*
734 * For Hermon, we've alloc'd another handle structure and save off the
735 * important things in it. Then, in polling we check to see if there's
736 * a "resizing handle" and if so we look for the "special CQE", opcode
737 * 0x16, that indicates the transition to the new buffer.
738 *
739 * At that point, we'll adjust everything - including dereg and
740 * freeing of the original buffer, updating all the necessary fields
741 * in the cq_hdl, and setting up for the next cqe polling
742 */
743
744 resize_hdl->cq_buf = buf;
745 resize_hdl->cq_bufsz = (1 << log_cq_size);
746 resize_hdl->cq_mrhdl = mr;
747 resize_hdl->cq_log_cqsz = log_cq_size;
748
749 bcopy(&new_cqinfo, &(resize_hdl->cq_cqinfo),
750 sizeof (struct hermon_qalloc_info_s));
751
752 /* now, save the address in the cq_handle */
753 cq->cq_resize_hdl = resize_hdl;
754
755 /*
756 * Drop the CQ lock now.
757 */
758
759 mutex_exit(&cq->cq_lock);
760 /*
761 * Fill in the return arguments (if necessary). This includes the
762 * real new completion queue size.
763 */
764 if (actual_size != NULL) {
765 *actual_size = (1 << log_cq_size) - 1;
766 }
767
768 return (DDI_SUCCESS);
769
770 cqresize_fail:
771 return (status);
772 }
773
774
775 /*
776 * hermon_cq_modify()
777 * Context: Can be called base context.
778 */
779 /* ARGSUSED */
780 int
781 hermon_cq_modify(hermon_state_t *state, hermon_cqhdl_t cq,
782 uint_t count, uint_t usec, ibt_cq_handler_id_t hid, uint_t sleepflag)
783 {
784 int status;
785 hermon_hw_cqc_t cqc_entry;
786
787 mutex_enter(&cq->cq_lock);
788 if (count != cq->cq_intmod_count ||
789 usec != cq->cq_intmod_usec) {
790 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
791 cqc_entry.cq_max_cnt = count;
792 cqc_entry.cq_period = usec;
793 status = hermon_modify_cq_cmd_post(state, &cqc_entry,
794 cq->cq_cqnum, MODIFY_MODERATION_CQ, sleepflag);
795 if (status != HERMON_CMD_SUCCESS) {
796 mutex_exit(&cq->cq_lock);
797 cmn_err(CE_CONT, "Hermon: MODIFY_MODERATION_CQ "
798 "command failed: %08x\n", status);
799 if (status == HERMON_CMD_INVALID_STATUS) {
800 hermon_fm_ereport(state, HCA_SYS_ERR,
801 HCA_ERR_SRV_LOST);
802 }
803 return (ibc_get_ci_failure(0));
804 }
805 cq->cq_intmod_count = count;
806 cq->cq_intmod_usec = usec;
807 }
808 if (hid && (hid - 1 != cq->cq_eqnum)) {
809 bzero(&cqc_entry, sizeof (hermon_hw_cqc_t));
810 cqc_entry.c_eqn = HERMON_HID_TO_EQNUM(state, hid);
811 status = hermon_modify_cq_cmd_post(state, &cqc_entry,
812 cq->cq_cqnum, MODIFY_EQN, sleepflag);
813 if (status != HERMON_CMD_SUCCESS) {
814 mutex_exit(&cq->cq_lock);
815 cmn_err(CE_CONT, "Hermon: MODIFY_EQN command failed: "
816 "%08x\n", status);
817 if (status == HERMON_CMD_INVALID_STATUS) {
818 hermon_fm_ereport(state, HCA_SYS_ERR,
819 HCA_ERR_SRV_LOST);
820 }
821 return (ibc_get_ci_failure(0));
822 }
823 cq->cq_eqnum = hid - 1;
824 }
825 mutex_exit(&cq->cq_lock);
826 return (DDI_SUCCESS);
827 }
828
829 /*
830 * hermon_cq_notify()
831 * Context: Can be called from interrupt or base context.
832 */
833 int
834 hermon_cq_notify(hermon_state_t *state, hermon_cqhdl_t cq,
835 ibt_cq_notify_flags_t flags)
836 {
837 uint_t cmd;
838 ibt_status_t status;
839
840 /* Validate IBT flags and call doorbell routine. */
841 if (flags == IBT_NEXT_COMPLETION) {
842 cmd = HERMON_CQDB_NOTIFY_CQ;
843 } else if (flags == IBT_NEXT_SOLICITED) {
844 cmd = HERMON_CQDB_NOTIFY_CQ_SOLICIT;
845 } else {
846 return (IBT_CQ_NOTIFY_TYPE_INVALID);
847 }
848
849 status = hermon_cq_arm_doorbell(state, cq, cmd);
850 return (status);
851 }
852
853
854 /*
855 * hermon_cq_poll()
856 * Context: Can be called from interrupt or base context.
857 */
858 int
859 hermon_cq_poll(hermon_state_t *state, hermon_cqhdl_t cq, ibt_wc_t *wc_p,
860 uint_t num_wc, uint_t *num_polled)
861 {
862 hermon_hw_cqe_t *cqe;
863 uint_t opcode;
864 uint32_t cons_indx, wrap_around_mask, shift, mask;
865 uint32_t polled_cnt, spec_op = 0;
866 int status;
867
868 /*
869 * Check for user-mappable CQ memory. Note: We do not allow kernel
870 * clients to poll CQ memory that is accessible directly by the user.
871 * If the CQ memory is user accessible, then return an error.
872 */
873 if (cq->cq_is_umap) {
874 return (IBT_CQ_HDL_INVALID);
875 }
876
877 mutex_enter(&cq->cq_lock);
878
879 /* Get the consumer index */
880 cons_indx = cq->cq_consindx;
881 shift = cq->cq_log_cqsz;
882 mask = cq->cq_bufsz;
883
884 /*
885 * Calculate the wrap around mask. Note: This operation only works
886 * because all Hermon completion queues have power-of-2 sizes
887 */
888 wrap_around_mask = (cq->cq_bufsz - 1);
889
890 /* Calculate the pointer to the first CQ entry */
891 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
892
893 /*
894 * Keep pulling entries from the CQ until we find an entry owned by
895 * the hardware. As long as there the CQE's owned by SW, process
896 * each entry by calling hermon_cq_cqe_consume() and updating the CQ
897 * consumer index. Note: We only update the consumer index if
898 * hermon_cq_cqe_consume() returns HERMON_CQ_SYNC_AND_DB. Otherwise,
899 * it indicates that we are going to "recycle" the CQE (probably
900 * because it is a error CQE and corresponds to more than one
901 * completion).
902 */
903 polled_cnt = 0;
904 while (HERMON_CQE_OWNER_IS_SW(cq, cqe, cons_indx, shift, mask)) {
905 if (cq->cq_resize_hdl != 0) { /* in midst of resize */
906 /* peek at the opcode */
907 opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
908 if (opcode == HERMON_CQE_RCV_RESIZE_CODE) {
909 hermon_cq_resize_helper(state, cq);
910
911 /* Increment the consumer index */
912 cons_indx = (cons_indx + 1);
913 spec_op = 1; /* plus one for the limiting CQE */
914
915 wrap_around_mask = (cq->cq_bufsz - 1);
916
917 /* Update the pointer to the next CQ entry */
918 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
919
920 continue;
921 }
922 } /* in resizing CQ */
923
924 /*
925 * either resizing and not the special opcode, or
926 * not resizing at all
927 */
928 hermon_cq_cqe_consume(state, cq, cqe, &wc_p[polled_cnt++]);
929
930 /* Increment the consumer index */
931 cons_indx = (cons_indx + 1);
932
933 /* Update the pointer to the next CQ entry */
934 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
935
936 /*
937 * If we have run out of space to store work completions,
938 * then stop and return the ones we have pulled of the CQ.
939 */
940 if (polled_cnt >= num_wc) {
941 break;
942 }
943 }
944
945 /*
946 * Now we only ring the doorbell (to update the consumer index) if
947 * we've actually consumed a CQ entry.
948 */
949 if ((polled_cnt != 0) && (cq->cq_consindx != cons_indx)) {
950 /*
951 * Update the consumer index in both the CQ handle and the
952 * doorbell record.
953 */
954 cq->cq_consindx = cons_indx;
955 hermon_cq_update_ci_doorbell(cq);
956
957 } else if (polled_cnt == 0) {
958 if (spec_op != 0) {
959 /* if we got the special opcode, update the consindx */
960 cq->cq_consindx = cons_indx;
961 hermon_cq_update_ci_doorbell(cq);
962 }
963 }
964
965 mutex_exit(&cq->cq_lock);
966
967 /* Set "num_polled" (if necessary) */
968 if (num_polled != NULL) {
969 *num_polled = polled_cnt;
970 }
971
972 /* Set CQ_EMPTY condition if needed, otherwise return success */
973 if (polled_cnt == 0) {
974 status = IBT_CQ_EMPTY;
975 } else {
976 status = DDI_SUCCESS;
977 }
978
979 /*
980 * Check if the system is currently panicking. If it is, then call
981 * the Hermon interrupt service routine. This step is necessary here
982 * because we might be in a polled I/O mode and without the call to
983 * hermon_isr() - and its subsequent calls to poll and rearm each
984 * event queue - we might overflow our EQs and render the system
985 * unable to sync/dump.
986 */
987 if (ddi_in_panic() != 0) {
988 (void) hermon_isr((caddr_t)state, (caddr_t)NULL);
989 }
990 return (status);
991 }
992
993 /*
994 * cmd_sn must be initialized to 1 to enable proper reenabling
995 * by hermon_arm_cq_dbr_update().
996 */
997 static void
998 hermon_arm_cq_dbr_init(hermon_dbr_t *cq_arm_dbr)
999 {
1000 uint32_t *target;
1001
1002 target = (uint32_t *)cq_arm_dbr + 1;
1003 *target = htonl(1 << HERMON_CQDB_CMDSN_SHIFT);
1004 }
1005
1006
1007 /*
1008 * User cmd_sn needs help from this kernel function to know
1009 * when it should be incremented (modulo 4). We do an atomic
1010 * update of the arm_cq dbr to communicate this fact. We retry
1011 * in the case that user library is racing with us. We zero
1012 * out the cmd field so that the user library can use the cmd
1013 * field to track the last command it issued (solicited verses any).
1014 */
1015 static void
1016 hermon_arm_cq_dbr_update(hermon_dbr_t *cq_arm_dbr)
1017 {
1018 uint32_t tmp, cmp, new;
1019 uint32_t old_cmd_sn, new_cmd_sn;
1020 uint32_t *target;
1021 int retries = 0;
1022
1023 target = (uint32_t *)cq_arm_dbr + 1;
1024 retry:
1025 cmp = *target;
1026 tmp = htonl(cmp);
1027 old_cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1028 new_cmd_sn = (old_cmd_sn + (0x1 << HERMON_CQDB_CMDSN_SHIFT)) &
1029 (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1030 new = htonl((tmp & ~(0x37 << HERMON_CQDB_CMD_SHIFT)) | new_cmd_sn);
1031 tmp = atomic_cas_32(target, cmp, new);
1032 if (tmp != cmp) { /* cas failed, so need to retry */
1033 drv_usecwait(retries & 0xff); /* avoid race */
1034 if (++retries > 100000) {
1035 cmn_err(CE_CONT, "cas failed in hermon\n");
1036 retries = 0;
1037 }
1038 goto retry;
1039 }
1040 }
1041
1042
1043 /*
1044 * hermon_cq_handler()
1045 * Context: Only called from interrupt context
1046 */
1047 /* ARGSUSED */
1048 int
1049 hermon_cq_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1050 hermon_hw_eqe_t *eqe)
1051 {
1052 hermon_cqhdl_t cq;
1053 uint_t cqnum;
1054
1055 /* Get the CQ handle from CQ number in event descriptor */
1056 cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1057 cq = hermon_cqhdl_from_cqnum(state, cqnum);
1058
1059 /*
1060 * If the CQ handle is NULL, this is probably an indication
1061 * that the CQ has been freed already. In which case, we
1062 * should not deliver this event.
1063 *
1064 * We also check that the CQ number in the handle is the
1065 * same as the CQ number in the event queue entry. This
1066 * extra check allows us to handle the case where a CQ was
1067 * freed and then allocated again in the time it took to
1068 * handle the event queue processing. By constantly incrementing
1069 * the non-constrained portion of the CQ number every time
1070 * a new CQ is allocated, we mitigate (somewhat) the chance
1071 * that a stale event could be passed to the client's CQ
1072 * handler.
1073 *
1074 * Lastly, we check if "hs_ibtfpriv" is NULL. If it is then it
1075 * means that we've have either received this event before we
1076 * finished attaching to the IBTF or we've received it while we
1077 * are in the process of detaching.
1078 */
1079 if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1080 (state->hs_ibtfpriv != NULL)) {
1081 hermon_arm_cq_dbr_update(cq->cq_arm_ci_vdbr);
1082 HERMON_DO_IBTF_CQ_CALLB(state, cq);
1083 }
1084
1085 return (DDI_SUCCESS);
1086 }
1087
1088
1089 /*
1090 * hermon_cq_err_handler()
1091 * Context: Only called from interrupt context
1092 */
1093 /* ARGSUSED */
1094 int
1095 hermon_cq_err_handler(hermon_state_t *state, hermon_eqhdl_t eq,
1096 hermon_hw_eqe_t *eqe)
1097 {
1098 hermon_cqhdl_t cq;
1099 uint_t cqnum;
1100 ibc_async_event_t event;
1101 ibt_async_code_t type;
1102
1103 HERMON_FMANOTE(state, HERMON_FMA_OVERRUN);
1104 /* Get the CQ handle from CQ number in event descriptor */
1105 cqnum = HERMON_EQE_CQNUM_GET(eq, eqe);
1106 cq = hermon_cqhdl_from_cqnum(state, cqnum);
1107
1108 /*
1109 * If the CQ handle is NULL, this is probably an indication
1110 * that the CQ has been freed already. In which case, we
1111 * should not deliver this event.
1112 *
1113 * We also check that the CQ number in the handle is the
1114 * same as the CQ number in the event queue entry. This
1115 * extra check allows us to handle the case where a CQ was
1116 * freed and then allocated again in the time it took to
1117 * handle the event queue processing. By constantly incrementing
1118 * the non-constrained portion of the CQ number every time
1119 * a new CQ is allocated, we mitigate (somewhat) the chance
1120 * that a stale event could be passed to the client's CQ
1121 * handler.
1122 *
1123 * And then we check if "hs_ibtfpriv" is NULL. If it is then it
1124 * means that we've have either received this event before we
1125 * finished attaching to the IBTF or we've received it while we
1126 * are in the process of detaching.
1127 */
1128 if ((cq != NULL) && (cq->cq_cqnum == cqnum) &&
1129 (state->hs_ibtfpriv != NULL)) {
1130 event.ev_cq_hdl = (ibt_cq_hdl_t)cq->cq_hdlrarg;
1131 type = IBT_ERROR_CQ;
1132 HERMON_DO_IBTF_ASYNC_CALLB(state, type, &event);
1133 }
1134
1135 return (DDI_SUCCESS);
1136 }
1137
1138
1139 /*
1140 * hermon_cq_refcnt_inc()
1141 * Context: Can be called from interrupt or base context.
1142 */
1143 int
1144 hermon_cq_refcnt_inc(hermon_cqhdl_t cq, uint_t is_special)
1145 {
1146 /*
1147 * Increment the completion queue's reference count. Note: In order
1148 * to ensure compliance with IBA C11-15, we must ensure that a given
1149 * CQ is not used for both special (SMI/GSI) QP and non-special QP.
1150 * This is accomplished here by keeping track of how the referenced
1151 * CQ is being used.
1152 */
1153 mutex_enter(&cq->cq_lock);
1154 if (cq->cq_refcnt == 0) {
1155 cq->cq_is_special = is_special;
1156 } else {
1157 if (cq->cq_is_special != is_special) {
1158 mutex_exit(&cq->cq_lock);
1159 return (DDI_FAILURE);
1160 }
1161 }
1162 cq->cq_refcnt++;
1163 mutex_exit(&cq->cq_lock);
1164 return (DDI_SUCCESS);
1165 }
1166
1167
1168 /*
1169 * hermon_cq_refcnt_dec()
1170 * Context: Can be called from interrupt or base context.
1171 */
1172 void
1173 hermon_cq_refcnt_dec(hermon_cqhdl_t cq)
1174 {
1175 /* Decrement the completion queue's reference count */
1176 mutex_enter(&cq->cq_lock);
1177 cq->cq_refcnt--;
1178 mutex_exit(&cq->cq_lock);
1179 }
1180
1181
1182 /*
1183 * hermon_cq_arm_doorbell()
1184 * Context: Can be called from interrupt or base context.
1185 */
1186 static int
1187 hermon_cq_arm_doorbell(hermon_state_t *state, hermon_cqhdl_t cq, uint_t cq_cmd)
1188 {
1189 uint32_t cq_num;
1190 uint32_t *target;
1191 uint32_t old_cmd, cmp, new, tmp, cmd_sn;
1192 ddi_acc_handle_t uarhdl = hermon_get_uarhdl(state);
1193
1194 /* initialize the FMA retry loop */
1195 hermon_pio_init(fm_loop_cnt, fm_status, fm_test_num);
1196
1197 cq_num = cq->cq_cqnum;
1198 target = (uint32_t *)cq->cq_arm_ci_vdbr + 1;
1199
1200 /* the FMA retry loop starts for Hermon doorbell register. */
1201 hermon_pio_start(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1202 fm_test_num);
1203 retry:
1204 cmp = *target;
1205 tmp = htonl(cmp);
1206 old_cmd = tmp & (0x7 << HERMON_CQDB_CMD_SHIFT);
1207 cmd_sn = tmp & (0x3 << HERMON_CQDB_CMDSN_SHIFT);
1208 if (cq_cmd == HERMON_CQDB_NOTIFY_CQ) {
1209 if (old_cmd != HERMON_CQDB_NOTIFY_CQ) {
1210 cmd_sn |= (HERMON_CQDB_NOTIFY_CQ <<
1211 HERMON_CQDB_CMD_SHIFT);
1212 new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1213 tmp = atomic_cas_32(target, cmp, new);
1214 if (tmp != cmp)
1215 goto retry;
1216 HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1217 &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1218 32) | (cq->cq_consindx & 0xFFFFFF));
1219 } /* else it's already armed */
1220 } else {
1221 ASSERT(cq_cmd == HERMON_CQDB_NOTIFY_CQ_SOLICIT);
1222 if (old_cmd != HERMON_CQDB_NOTIFY_CQ &&
1223 old_cmd != HERMON_CQDB_NOTIFY_CQ_SOLICIT) {
1224 cmd_sn |= (HERMON_CQDB_NOTIFY_CQ_SOLICIT <<
1225 HERMON_CQDB_CMD_SHIFT);
1226 new = htonl(cmd_sn | (cq->cq_consindx & 0xFFFFFF));
1227 tmp = atomic_cas_32(target, cmp, new);
1228 if (tmp != cmp)
1229 goto retry;
1230 HERMON_UAR_DOORBELL(state, uarhdl, (uint64_t *)(void *)
1231 &state->hs_uar->cq, (((uint64_t)cmd_sn | cq_num) <<
1232 32) | (cq->cq_consindx & 0xFFFFFF));
1233 } /* else it's already armed */
1234 }
1235
1236 /* the FMA retry loop ends. */
1237 hermon_pio_end(state, uarhdl, pio_error, fm_loop_cnt, fm_status,
1238 fm_test_num);
1239
1240 return (IBT_SUCCESS);
1241
1242 pio_error:
1243 hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
1244 return (ibc_get_ci_failure(0));
1245 }
1246
1247
1248 /*
1249 * hermon_cqhdl_from_cqnum()
1250 * Context: Can be called from interrupt or base context.
1251 *
1252 * This routine is important because changing the unconstrained
1253 * portion of the CQ number is critical to the detection of a
1254 * potential race condition in the CQ handler code (i.e. the case
1255 * where a CQ is freed and alloc'd again before an event for the
1256 * "old" CQ can be handled).
1257 *
1258 * While this is not a perfect solution (not sure that one exists)
1259 * it does help to mitigate the chance that this race condition will
1260 * cause us to deliver a "stale" event to the new CQ owner. Note:
1261 * this solution does not scale well because the number of constrained
1262 * bits increases (and, hence, the number of unconstrained bits
1263 * decreases) as the number of supported CQs grows. For small and
1264 * intermediate values, it should hopefully provide sufficient
1265 * protection.
1266 */
1267 hermon_cqhdl_t
1268 hermon_cqhdl_from_cqnum(hermon_state_t *state, uint_t cqnum)
1269 {
1270 uint_t cqindx, cqmask;
1271
1272 /* Calculate the CQ table index from the cqnum */
1273 cqmask = (1 << state->hs_cfg_profile->cp_log_num_cq) - 1;
1274 cqindx = cqnum & cqmask;
1275 return (hermon_icm_num_to_hdl(state, HERMON_CQC, cqindx));
1276 }
1277
1278 /*
1279 * hermon_cq_cqe_consume()
1280 * Context: Can be called from interrupt or base context.
1281 */
1282 static void
1283 hermon_cq_cqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1284 hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1285 {
1286 uint_t opcode, qpnum, qp1_indx;
1287 ibt_wc_flags_t flags;
1288 ibt_wrc_opcode_t type;
1289
1290 /*
1291 * Determine if this is an "error" CQE by examining "opcode". If it
1292 * is an error CQE, then call hermon_cq_errcqe_consume() and return
1293 * whatever status it returns. Otherwise, this is a successful
1294 * completion.
1295 */
1296 opcode = HERMON_CQE_OPCODE_GET(cq, cqe);
1297 if ((opcode == HERMON_CQE_SEND_ERR_OPCODE) ||
1298 (opcode == HERMON_CQE_RECV_ERR_OPCODE)) {
1299 hermon_cq_errcqe_consume(state, cq, cqe, wc);
1300 return;
1301 }
1302
1303 /*
1304 * Fetch the Work Request ID using the information in the CQE.
1305 * See hermon_wr.c for more details.
1306 */
1307 wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1308
1309 /*
1310 * Parse the CQE opcode to determine completion type. This will set
1311 * not only the type of the completion, but also any flags that might
1312 * be associated with it (e.g. whether immediate data is present).
1313 */
1314 flags = IBT_WC_NO_FLAGS;
1315 if (HERMON_CQE_SENDRECV_GET(cq, cqe) != HERMON_COMPLETION_RECV) {
1316
1317 /* Send CQE */
1318 switch (opcode) {
1319 case HERMON_CQE_SND_RDMAWR_IMM:
1320 case HERMON_CQE_SND_RDMAWR:
1321 type = IBT_WRC_RDMAW;
1322 break;
1323
1324 case HERMON_CQE_SND_SEND_INV:
1325 case HERMON_CQE_SND_SEND_IMM:
1326 case HERMON_CQE_SND_SEND:
1327 type = IBT_WRC_SEND;
1328 break;
1329
1330 case HERMON_CQE_SND_LSO:
1331 type = IBT_WRC_SEND_LSO;
1332 break;
1333
1334 case HERMON_CQE_SND_RDMARD:
1335 type = IBT_WRC_RDMAR;
1336 break;
1337
1338 case HERMON_CQE_SND_ATOMIC_CS:
1339 type = IBT_WRC_CSWAP;
1340 break;
1341
1342 case HERMON_CQE_SND_ATOMIC_FA:
1343 type = IBT_WRC_FADD;
1344 break;
1345
1346 case HERMON_CQE_SND_BIND_MW:
1347 type = IBT_WRC_BIND;
1348 break;
1349
1350 case HERMON_CQE_SND_FRWR:
1351 type = IBT_WRC_FAST_REG_PMR;
1352 break;
1353
1354 case HERMON_CQE_SND_LCL_INV:
1355 type = IBT_WRC_LOCAL_INVALIDATE;
1356 break;
1357
1358 default:
1359 HERMON_WARNING(state, "unknown send CQE type");
1360 wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1361 return;
1362 }
1363 } else if ((state->hs_fcoib_may_be_running == B_TRUE) &&
1364 hermon_fcoib_is_fexch_qpn(state, HERMON_CQE_QPNUM_GET(cq, cqe))) {
1365 type = IBT_WRC_RECV;
1366 if (HERMON_CQE_FEXCH_DIFE(cq, cqe))
1367 flags |= IBT_WC_DIF_ERROR;
1368 wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1369 wc->wc_fexch_seq_cnt = HERMON_CQE_FEXCH_SEQ_CNT(cq, cqe);
1370 wc->wc_fexch_tx_bytes_xfer = HERMON_CQE_FEXCH_TX_BYTES(cq, cqe);
1371 wc->wc_fexch_rx_bytes_xfer = HERMON_CQE_FEXCH_RX_BYTES(cq, cqe);
1372 wc->wc_fexch_seq_id = HERMON_CQE_FEXCH_SEQ_ID(cq, cqe);
1373 wc->wc_detail = HERMON_CQE_FEXCH_DETAIL(cq, cqe) &
1374 IBT_WC_DETAIL_FC_MATCH_MASK;
1375 wc->wc_rkey = HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1376 flags |= IBT_WC_FEXCH_FMT | IBT_WC_RKEY_INVALIDATED;
1377 } else {
1378 /*
1379 * Parse the remaining contents of the CQE into the work
1380 * completion. This means filling in SL, QP number, SLID,
1381 * immediate data, etc.
1382 *
1383 * Note: Not all of these fields are valid in a given
1384 * completion. Many of them depend on the actual type of
1385 * completion. So we fill in all of the fields and leave
1386 * it up to the IBTF and consumer to sort out which are
1387 * valid based on their context.
1388 */
1389 wc->wc_sl = HERMON_CQE_SL_GET(cq, cqe);
1390 wc->wc_qpn = HERMON_CQE_DQPN_GET(cq, cqe);
1391 wc->wc_slid = HERMON_CQE_DLID_GET(cq, cqe);
1392 wc->wc_immed_data =
1393 HERMON_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe);
1394 wc->wc_ethertype = (wc->wc_immed_data & 0xFFFF);
1395 wc->wc_pkey_ix = (wc->wc_immed_data &
1396 ((1 << state->hs_queryport.log_max_pkey) - 1));
1397 /*
1398 * Fill in "bytes transferred" as appropriate. Also,
1399 * if necessary, fill in the "path bits" field.
1400 */
1401 wc->wc_path_bits = HERMON_CQE_PATHBITS_GET(cq, cqe);
1402 wc->wc_bytes_xfer = HERMON_CQE_BYTECNT_GET(cq, cqe);
1403
1404 /*
1405 * Check for GRH, update the flags, then fill in "wc_flags"
1406 * field in the work completion
1407 */
1408 if (HERMON_CQE_GRH_GET(cq, cqe) != 0) {
1409 flags |= IBT_WC_GRH_PRESENT;
1410 }
1411
1412 /* Receive CQE */
1413 switch (opcode) {
1414 case HERMON_CQE_RCV_SEND_IMM:
1415 /*
1416 * Note: According to the PRM, all QP1 recv
1417 * completions look like the result of a Send with
1418 * Immediate. They are not, however, (MADs are Send
1419 * Only) so we need to check the QP number and set
1420 * the flag only if it is non-QP1.
1421 */
1422 qpnum = HERMON_CQE_QPNUM_GET(cq, cqe);
1423 qp1_indx = state->hs_spec_qp1->hr_indx;
1424 if ((qpnum < qp1_indx) || (qpnum > qp1_indx + 1)) {
1425 flags |= IBT_WC_IMMED_DATA_PRESENT;
1426 }
1427 /* FALLTHROUGH */
1428
1429 case HERMON_CQE_RCV_SEND:
1430 type = IBT_WRC_RECV;
1431 if (HERMON_CQE_IS_IPOK(cq, cqe)) {
1432 wc->wc_cksum = HERMON_CQE_CKSUM(cq, cqe);
1433 flags |= IBT_WC_CKSUM_OK;
1434 wc->wc_detail = IBT_WC_DETAIL_ALL_FLAGS_MASK &
1435 HERMON_CQE_IPOIB_STATUS(cq, cqe);
1436 }
1437 break;
1438
1439 case HERMON_CQE_RCV_SEND_INV:
1440 type = IBT_WRC_RECV;
1441 flags |= IBT_WC_RKEY_INVALIDATED;
1442 wc->wc_rkey = wc->wc_immed_data; /* same field in cqe */
1443 break;
1444
1445 case HERMON_CQE_RCV_RDMAWR_IMM:
1446 flags |= IBT_WC_IMMED_DATA_PRESENT;
1447 type = IBT_WRC_RECV_RDMAWI;
1448 break;
1449
1450 default:
1451
1452 HERMON_WARNING(state, "unknown recv CQE type");
1453 wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR;
1454 return;
1455 }
1456 }
1457 wc->wc_type = type;
1458 wc->wc_flags = flags;
1459 wc->wc_status = IBT_WC_SUCCESS;
1460 }
1461
1462 /*
1463 * hermon_cq_errcqe_consume()
1464 * Context: Can be called from interrupt or base context.
1465 */
1466 static void
1467 hermon_cq_errcqe_consume(hermon_state_t *state, hermon_cqhdl_t cq,
1468 hermon_hw_cqe_t *cqe, ibt_wc_t *wc)
1469 {
1470 uint32_t imm_eth_pkey_cred;
1471 uint_t status;
1472 ibt_wc_status_t ibt_status;
1473
1474 /*
1475 * Fetch the Work Request ID using the information in the CQE.
1476 * See hermon_wr.c for more details.
1477 */
1478 wc->wc_id = hermon_wrid_get_entry(cq, cqe);
1479
1480 /*
1481 * Parse the CQE opcode to determine completion type. We know that
1482 * the CQE is an error completion, so we extract only the completion
1483 * status/syndrome here.
1484 */
1485 imm_eth_pkey_cred = HERMON_CQE_ERROR_SYNDROME_GET(cq, cqe);
1486 status = imm_eth_pkey_cred;
1487 if (status != HERMON_CQE_WR_FLUSHED_ERR)
1488 IBTF_DPRINTF_L2("CQE ERR", "cqe %p QPN %x indx %x status 0x%x "
1489 "vendor syndrome %x", cqe, HERMON_CQE_QPNUM_GET(cq, cqe),
1490 HERMON_CQE_WQECNTR_GET(cq, cqe), status,
1491 HERMON_CQE_ERROR_VENDOR_SYNDROME_GET(cq, cqe));
1492 switch (status) {
1493 case HERMON_CQE_LOC_LEN_ERR:
1494 HERMON_WARNING(state, HERMON_FMA_LOCLEN);
1495 ibt_status = IBT_WC_LOCAL_LEN_ERR;
1496 break;
1497
1498 case HERMON_CQE_LOC_OP_ERR:
1499 HERMON_WARNING(state, HERMON_FMA_LOCQPOP);
1500 ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1501 break;
1502
1503 case HERMON_CQE_LOC_PROT_ERR:
1504 HERMON_WARNING(state, HERMON_FMA_LOCPROT);
1505 ibt_status = IBT_WC_LOCAL_PROTECT_ERR;
1506 IBTF_DPRINTF_L2("ERRCQE", "is at %p", cqe);
1507 if (hermon_should_panic) {
1508 cmn_err(CE_PANIC, "Hermon intentional PANIC - "
1509 "Local Protection Error\n");
1510 }
1511 break;
1512
1513 case HERMON_CQE_WR_FLUSHED_ERR:
1514 ibt_status = IBT_WC_WR_FLUSHED_ERR;
1515 break;
1516
1517 case HERMON_CQE_MW_BIND_ERR:
1518 HERMON_WARNING(state, HERMON_FMA_MWBIND);
1519 ibt_status = IBT_WC_MEM_WIN_BIND_ERR;
1520 break;
1521
1522 case HERMON_CQE_BAD_RESPONSE_ERR:
1523 HERMON_WARNING(state, HERMON_FMA_RESP);
1524 ibt_status = IBT_WC_BAD_RESPONSE_ERR;
1525 break;
1526
1527 case HERMON_CQE_LOCAL_ACCESS_ERR:
1528 HERMON_WARNING(state, HERMON_FMA_LOCACC);
1529 ibt_status = IBT_WC_LOCAL_ACCESS_ERR;
1530 break;
1531
1532 case HERMON_CQE_REM_INV_REQ_ERR:
1533 HERMON_WARNING(state, HERMON_FMA_REMREQ);
1534 ibt_status = IBT_WC_REMOTE_INVALID_REQ_ERR;
1535 break;
1536
1537 case HERMON_CQE_REM_ACC_ERR:
1538 HERMON_WARNING(state, HERMON_FMA_REMACC);
1539 ibt_status = IBT_WC_REMOTE_ACCESS_ERR;
1540 break;
1541
1542 case HERMON_CQE_REM_OP_ERR:
1543 HERMON_WARNING(state, HERMON_FMA_REMOP);
1544 ibt_status = IBT_WC_REMOTE_OP_ERR;
1545 break;
1546
1547 case HERMON_CQE_TRANS_TO_ERR:
1548 HERMON_WARNING(state, HERMON_FMA_XPORTCNT);
1549 ibt_status = IBT_WC_TRANS_TIMEOUT_ERR;
1550 break;
1551
1552 case HERMON_CQE_RNRNAK_TO_ERR:
1553 HERMON_WARNING(state, HERMON_FMA_RNRCNT);
1554 ibt_status = IBT_WC_RNR_NAK_TIMEOUT_ERR;
1555 break;
1556
1557 /*
1558 * The following error codes are not supported in the Hermon driver
1559 * as they relate only to Reliable Datagram completion statuses:
1560 * case HERMON_CQE_LOCAL_RDD_VIO_ERR:
1561 * case HERMON_CQE_REM_INV_RD_REQ_ERR:
1562 * case HERMON_CQE_EEC_REM_ABORTED_ERR:
1563 * case HERMON_CQE_INV_EEC_NUM_ERR:
1564 * case HERMON_CQE_INV_EEC_STATE_ERR:
1565 * case HERMON_CQE_LOC_EEC_ERR:
1566 */
1567
1568 default:
1569 HERMON_WARNING(state, "unknown error CQE status");
1570 HERMON_FMANOTE(state, HERMON_FMA_UNKN);
1571 ibt_status = IBT_WC_LOCAL_QP_OP_ERR;
1572 break;
1573 }
1574
1575 wc->wc_status = ibt_status;
1576 }
1577
1578
1579 /*
1580 * hermon_cq_resize_helper()
1581 * Context: Can be called only from user or kernel context.
1582 */
1583 void
1584 hermon_cq_resize_helper(hermon_state_t *state, hermon_cqhdl_t cq)
1585 {
1586 hermon_cqhdl_t resize_hdl;
1587 int status;
1588
1589 /*
1590 * we're here because we found the special cqe opcode, so we have
1591 * to update the cq_handle, release the old resources, clear the
1592 * flag in the cq_hdl, and release the resize_hdl. When we return
1593 * above, it will take care of the rest
1594 */
1595 ASSERT(MUTEX_HELD(&cq->cq_lock));
1596
1597 resize_hdl = cq->cq_resize_hdl;
1598
1599 /*
1600 * Deregister the memory for the old Completion Queue. Note: We
1601 * really can't return error here because we have no good way to
1602 * cleanup. Plus, the deregistration really shouldn't ever happen.
1603 * So, if it does, it is an indication that something has gone
1604 * seriously wrong. So we print a warning message and return error
1605 * (knowing, of course, that the "old" CQ memory will be leaked)
1606 */
1607 status = hermon_mr_deregister(state, &cq->cq_mrhdl, HERMON_MR_DEREG_ALL,
1608 HERMON_SLEEP);
1609 if (status != DDI_SUCCESS) {
1610 HERMON_WARNING(state, "failed to deregister old CQ memory");
1611 }
1612
1613 /* Next, free the memory from the old CQ buffer */
1614 hermon_queue_free(&cq->cq_cqinfo);
1615
1616 /* now we can update the cq_hdl with the new things saved */
1617
1618 cq->cq_buf = resize_hdl->cq_buf;
1619 cq->cq_mrhdl = resize_hdl->cq_mrhdl;
1620 cq->cq_bufsz = resize_hdl->cq_bufsz;
1621 cq->cq_log_cqsz = resize_hdl->cq_log_cqsz;
1622 cq->cq_umap_dhp = cq->cq_resize_hdl->cq_umap_dhp;
1623 cq->cq_resize_hdl = 0;
1624 bcopy(&resize_hdl->cq_cqinfo, &cq->cq_cqinfo,
1625 sizeof (struct hermon_qalloc_info_s));
1626
1627 /* finally, release the resizing handle */
1628 kmem_free(resize_hdl, sizeof (struct hermon_sw_cq_s));
1629 }
1630
1631
1632 /*
1633 * hermon_cq_entries_flush()
1634 * Context: Can be called from interrupt or base context.
1635 */
1636 /* ARGSUSED */
1637 void
1638 hermon_cq_entries_flush(hermon_state_t *state, hermon_qphdl_t qp)
1639 {
1640 hermon_cqhdl_t cq;
1641 hermon_hw_cqe_t *cqe, *next_cqe;
1642 hermon_srqhdl_t srq;
1643 hermon_workq_hdr_t *wq;
1644 uint32_t cons_indx, tail_cons_indx, wrap_around_mask;
1645 uint32_t new_indx, check_indx, qpnum;
1646 uint32_t shift, mask;
1647 int outstanding_cqes;
1648
1649 qpnum = qp->qp_qpnum;
1650 if ((srq = qp->qp_srqhdl) != NULL)
1651 wq = qp->qp_srqhdl->srq_wq_wqhdr;
1652 else
1653 wq = NULL;
1654 cq = qp->qp_rq_cqhdl;
1655
1656 if (cq == NULL) {
1657 cq = qp->qp_sq_cqhdl;
1658 }
1659
1660 do_send_cq: /* loop back to here if send_cq is not the same as recv_cq */
1661 if (cq == NULL)
1662 return;
1663
1664 cons_indx = cq->cq_consindx;
1665 shift = cq->cq_log_cqsz;
1666 mask = cq->cq_bufsz;
1667 wrap_around_mask = mask - 1;
1668
1669 /* Calculate the pointer to the first CQ entry */
1670 cqe = &cq->cq_buf[cons_indx & wrap_around_mask];
1671
1672 /*
1673 * Loop through the CQ looking for entries owned by software. If an
1674 * entry is owned by software then we increment an 'outstanding_cqes'
1675 * count to know how many entries total we have on our CQ. We use this
1676 * value further down to know how many entries to loop through looking
1677 * for our same QP number.
1678 */
1679 outstanding_cqes = 0;
1680 tail_cons_indx = cons_indx;
1681 while (HERMON_CQE_OWNER_IS_SW(cq, cqe, tail_cons_indx, shift, mask)) {
1682 /* increment total cqes count */
1683 outstanding_cqes++;
1684
1685 /* increment the consumer index */
1686 tail_cons_indx++;
1687
1688 /* update the pointer to the next cq entry */
1689 cqe = &cq->cq_buf[tail_cons_indx & wrap_around_mask];
1690 }
1691
1692 /*
1693 * Using the 'tail_cons_indx' that was just set, we now know how many
1694 * total CQEs possible there are. Set the 'check_indx' and the
1695 * 'new_indx' to the last entry identified by 'tail_cons_indx'
1696 */
1697 check_indx = new_indx = (tail_cons_indx - 1);
1698
1699 while (--outstanding_cqes >= 0) {
1700 cqe = &cq->cq_buf[check_indx & wrap_around_mask];
1701
1702 /*
1703 * If the QP number is the same in the CQE as the QP, then
1704 * we must "consume" it. If it is for an SRQ wqe, then we
1705 * also must free the wqe back onto the free list of the SRQ.
1706 */
1707 if (qpnum == HERMON_CQE_QPNUM_GET(cq, cqe)) {
1708 if (srq && (HERMON_CQE_SENDRECV_GET(cq, cqe) ==
1709 HERMON_COMPLETION_RECV)) {
1710 uint64_t *desc;
1711 int indx;
1712
1713 /* Add wqe back to SRQ free list */
1714 indx = HERMON_CQE_WQEADDRSZ_GET(cq, cqe) &
1715 wq->wq_mask;
1716 desc = HERMON_SRQ_WQE_ADDR(srq, wq->wq_tail);
1717 ((uint16_t *)desc)[1] = htons(indx);
1718 wq->wq_tail = indx;
1719 }
1720 } else { /* CQEs for other QPNs need to remain */
1721 if (check_indx != new_indx) {
1722 next_cqe =
1723 &cq->cq_buf[new_indx & wrap_around_mask];
1724 /* Copy the CQE into the "next_cqe" pointer. */
1725 bcopy(cqe, next_cqe, sizeof (hermon_hw_cqe_t));
1726 }
1727 new_indx--; /* move index to next CQE to fill */
1728 }
1729 check_indx--; /* move index to next CQE to check */
1730 }
1731
1732 /*
1733 * Update consumer index to be the 'new_indx'. This moves it past all
1734 * removed entries. Because 'new_indx' is pointing to the last
1735 * previously valid SW owned entry, we add 1 to point the cons_indx to
1736 * the first HW owned entry.
1737 */
1738 cons_indx = (new_indx + 1);
1739
1740 /*
1741 * Now we only ring the doorbell (to update the consumer index) if
1742 * we've actually consumed a CQ entry. If we found no QP number
1743 * matches above, then we would not have removed anything. So only if
1744 * something was removed do we ring the doorbell.
1745 */
1746 if (cq->cq_consindx != cons_indx) {
1747 /*
1748 * Update the consumer index in both the CQ handle and the
1749 * doorbell record.
1750 */
1751 cq->cq_consindx = cons_indx;
1752
1753 hermon_cq_update_ci_doorbell(cq);
1754
1755 }
1756 if (cq != qp->qp_sq_cqhdl) {
1757 cq = qp->qp_sq_cqhdl;
1758 goto do_send_cq;
1759 }
1760 }
1761
1762 /*
1763 * hermon_get_cq_sched_list()
1764 * Context: Only called from attach() path context
1765 *
1766 * Read properties, creating entries in hs_cq_sched_list with
1767 * information about the requested "expected" and "minimum"
1768 * number of MSI-X interrupt vectors per list entry.
1769 */
1770 static int
1771 hermon_get_cq_sched_list(hermon_state_t *state)
1772 {
1773 char **listp, ulp_prop[HERMON_CQH_MAX + 4];
1774 uint_t nlist, i, j, ndata;
1775 int *data;
1776 size_t len;
1777 hermon_cq_sched_t *cq_schedp;
1778
1779 if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, state->hs_dip,
1780 DDI_PROP_DONTPASS, "cqh-group-list", &listp, &nlist) !=
1781 DDI_PROP_SUCCESS)
1782 return (0);
1783
1784 state->hs_cq_sched_array_size = nlist;
1785 state->hs_cq_sched_array = cq_schedp = kmem_zalloc(nlist *
1786 sizeof (hermon_cq_sched_t), KM_SLEEP);
1787 for (i = 0; i < nlist; i++) {
1788 if ((len = strlen(listp[i])) >= HERMON_CQH_MAX) {
1789 cmn_err(CE_CONT, "'cqh' property name too long\n");
1790 goto game_over;
1791 }
1792 for (j = 0; j < i; j++) {
1793 if (strcmp(listp[j], listp[i]) == 0) {
1794 cmn_err(CE_CONT, "Duplicate 'cqh' property\n");
1795 goto game_over;
1796 }
1797 }
1798 (void) strncpy(cq_schedp[i].cqs_name, listp[i], HERMON_CQH_MAX);
1799 ulp_prop[0] = 'c';
1800 ulp_prop[1] = 'q';
1801 ulp_prop[2] = 'h';
1802 ulp_prop[3] = '-';
1803 (void) strncpy(ulp_prop + 4, listp[i], len + 1);
1804 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
1805 DDI_PROP_DONTPASS, ulp_prop, &data, &ndata) !=
1806 DDI_PROP_SUCCESS) {
1807 cmn_err(CE_CONT, "property '%s' not found\n", ulp_prop);
1808 goto game_over;
1809 }
1810 if (ndata != 2) {
1811 cmn_err(CE_CONT, "property '%s' does not "
1812 "have 2 integers\n", ulp_prop);
1813 goto game_over_free_data;
1814 }
1815 cq_schedp[i].cqs_desired = data[0];
1816 cq_schedp[i].cqs_minimum = data[1];
1817 cq_schedp[i].cqs_refcnt = 0;
1818 ddi_prop_free(data);
1819 }
1820 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
1821 DDI_PROP_DONTPASS, "cqh-default", &data, &ndata) !=
1822 DDI_PROP_SUCCESS) {
1823 cmn_err(CE_CONT, "property 'cqh-default' not found\n");
1824 goto game_over;
1825 }
1826 if (ndata != 2) {
1827 cmn_err(CE_CONT, "property 'cqh-default' does not "
1828 "have 2 integers\n");
1829 goto game_over_free_data;
1830 }
1831 cq_schedp = &state->hs_cq_sched_default;
1832 cq_schedp->cqs_desired = data[0];
1833 cq_schedp->cqs_minimum = data[1];
1834 cq_schedp->cqs_refcnt = 0;
1835 ddi_prop_free(data);
1836 ddi_prop_free(listp);
1837 return (1); /* game on */
1838
1839 game_over_free_data:
1840 ddi_prop_free(data);
1841 game_over:
1842 cmn_err(CE_CONT, "Error in 'cqh' properties in hermon.conf\n");
1843 cmn_err(CE_CONT, "completion handler groups not being used\n");
1844 kmem_free(cq_schedp, nlist * sizeof (hermon_cq_sched_t));
1845 state->hs_cq_sched_array_size = 0;
1846 ddi_prop_free(listp);
1847 return (0);
1848 }
1849
1850 /*
1851 * hermon_cq_sched_init()
1852 * Context: Only called from attach() path context
1853 *
1854 * Read the hermon.conf properties looking for cq_sched info,
1855 * creating reserved pools of MSI-X interrupt ranges for the
1856 * specified ULPs.
1857 */
1858 int
1859 hermon_cq_sched_init(hermon_state_t *state)
1860 {
1861 hermon_cq_sched_t *cq_schedp, *defp;
1862 int i, desired, array_size;
1863
1864 mutex_init(&state->hs_cq_sched_lock, NULL, MUTEX_DRIVER,
1865 DDI_INTR_PRI(state->hs_intrmsi_pri));
1866
1867 mutex_enter(&state->hs_cq_sched_lock);
1868 state->hs_cq_sched_array = NULL;
1869
1870 /* initialize cq_sched_default */
1871 defp = &state->hs_cq_sched_default;
1872 defp->cqs_start_hid = 1;
1873 defp->cqs_len = state->hs_intrmsi_allocd;
1874 defp->cqs_next_alloc = defp->cqs_len - 1;
1875 (void) strncpy(defp->cqs_name, "default", 8);
1876
1877 /* Read properties to determine which ULPs use cq_sched */
1878 if (hermon_get_cq_sched_list(state) == 0)
1879 goto done;
1880
1881 /* Determine if we have enough vectors, or if we have to scale down */
1882 desired = defp->cqs_desired; /* default desired (from hermon.conf) */
1883 if (desired <= 0)
1884 goto done; /* all interrupts in the default pool */
1885 cq_schedp = state->hs_cq_sched_array;
1886 array_size = state->hs_cq_sched_array_size;
1887 for (i = 0; i < array_size; i++)
1888 desired += cq_schedp[i].cqs_desired;
1889 if (desired > state->hs_intrmsi_allocd) {
1890 cmn_err(CE_CONT, "#interrupts allocated (%d) is less than "
1891 "the #interrupts desired (%d)\n",
1892 state->hs_intrmsi_allocd, desired);
1893 cmn_err(CE_CONT, "completion handler groups not being used\n");
1894 goto done; /* all interrupts in the default pool */
1895 }
1896 /* Game on. For each cq_sched group, reserve the MSI-X range */
1897 for (i = 0; i < array_size; i++) {
1898 desired = cq_schedp[i].cqs_desired;
1899 cq_schedp[i].cqs_start_hid = defp->cqs_start_hid;
1900 cq_schedp[i].cqs_len = desired;
1901 cq_schedp[i].cqs_next_alloc = desired - 1;
1902 defp->cqs_len -= desired;
1903 defp->cqs_start_hid += desired;
1904 }
1905 /* reset default's start allocation seed */
1906 state->hs_cq_sched_default.cqs_next_alloc =
1907 state->hs_cq_sched_default.cqs_len - 1;
1908
1909 done:
1910 mutex_exit(&state->hs_cq_sched_lock);
1911 return (IBT_SUCCESS);
1912 }
1913
1914 void
1915 hermon_cq_sched_fini(hermon_state_t *state)
1916 {
1917 mutex_enter(&state->hs_cq_sched_lock);
1918 if (state->hs_cq_sched_array_size) {
1919 kmem_free(state->hs_cq_sched_array, sizeof (hermon_cq_sched_t) *
1920 state->hs_cq_sched_array_size);
1921 state->hs_cq_sched_array_size = 0;
1922 state->hs_cq_sched_array = NULL;
1923 }
1924 mutex_exit(&state->hs_cq_sched_lock);
1925 mutex_destroy(&state->hs_cq_sched_lock);
1926 }
1927
1928 int
1929 hermon_cq_sched_alloc(hermon_state_t *state, ibt_cq_sched_attr_t *attr,
1930 hermon_cq_sched_t **cq_sched_pp)
1931 {
1932 hermon_cq_sched_t *cq_schedp;
1933 int i;
1934 char *name;
1935 ibt_cq_sched_flags_t flags;
1936
1937 flags = attr->cqs_flags;
1938 if ((flags & (IBT_CQS_SCHED_GROUP | IBT_CQS_EXACT_SCHED_GROUP)) == 0) {
1939 *cq_sched_pp = NULL;
1940 return (IBT_SUCCESS);
1941 }
1942 name = attr->cqs_pool_name;
1943
1944 mutex_enter(&state->hs_cq_sched_lock);
1945 cq_schedp = state->hs_cq_sched_array;
1946 for (i = 0; i < state->hs_cq_sched_array_size; i++, cq_schedp++) {
1947 if (strcmp(name, cq_schedp->cqs_name) == 0) {
1948 if (cq_schedp->cqs_len != 0)
1949 cq_schedp->cqs_refcnt++;
1950 break; /* found it */
1951 }
1952 }
1953 if ((i == state->hs_cq_sched_array_size) || /* not found, or */
1954 (cq_schedp->cqs_len == 0)) /* defined, but no dedicated intr's */
1955 cq_schedp = NULL;
1956 mutex_exit(&state->hs_cq_sched_lock);
1957
1958 *cq_sched_pp = cq_schedp; /* set to valid hdl, or to NULL */
1959 if ((cq_schedp == NULL) &&
1960 (attr->cqs_flags & IBT_CQS_EXACT_SCHED_GROUP))
1961 return (IBT_CQ_NO_SCHED_GROUP);
1962 else
1963 return (IBT_SUCCESS);
1964 }
1965
1966 int
1967 hermon_cq_sched_free(hermon_state_t *state, hermon_cq_sched_t *cq_schedp)
1968 {
1969 if (cq_schedp != NULL) {
1970 /* Just decrement refcnt */
1971 mutex_enter(&state->hs_cq_sched_lock);
1972 if (cq_schedp->cqs_refcnt == 0)
1973 HERMON_WARNING(state, "cq_sched free underflow\n");
1974 else
1975 cq_schedp->cqs_refcnt--;
1976 mutex_exit(&state->hs_cq_sched_lock);
1977 }
1978 return (IBT_SUCCESS);
1979 }