1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * tavor_misc.c
29 * Tavor Miscellaneous routines - Address Handle, Multicast, Protection
30 * Domain, and port-related operations
31 *
32 * Implements all the routines necessary for allocating, freeing, querying
33 * and modifying Address Handles and Protection Domains. Also implements
34 * all the routines necessary for adding and removing Queue Pairs to/from
35 * Multicast Groups. Lastly, it implements the routines necessary for
36 * port-related query and modify operations.
37 */
38
39 #include <sys/types.h>
40 #include <sys/conf.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/modctl.h>
44 #include <sys/bitmap.h>
45 #include <sys/sysmacros.h>
46
47 #include <sys/ib/adapters/tavor/tavor.h>
48
49 static void tavor_udav_sync(tavor_ahhdl_t ah, tavor_hw_udav_t *udav,
50 uint_t flag);
51 static int tavor_mcg_qplist_add(tavor_state_t *state, tavor_mcghdl_t mcg,
52 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp, uint_t *qp_found);
53 static int tavor_mcg_qplist_remove(tavor_mcghdl_t mcg,
54 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp);
55 static void tavor_qp_mcg_refcnt_inc(tavor_qphdl_t qp);
56 static void tavor_qp_mcg_refcnt_dec(tavor_qphdl_t qp);
57 static uint_t tavor_mcg_walk_mgid_hash(tavor_state_t *state,
58 uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx);
59 static void tavor_mcg_setup_new_hdr(tavor_mcghdl_t mcg,
60 tavor_hw_mcg_t *mcg_hdr, ib_gid_t mgid, tavor_rsrc_t *mcg_rsrc);
61 static int tavor_mcg_hash_list_remove(tavor_state_t *state, uint_t curr_indx,
62 uint_t prev_indx, tavor_hw_mcg_t *mcg_entry);
63 static int tavor_mcg_entry_invalidate(tavor_state_t *state,
64 tavor_hw_mcg_t *mcg_entry, uint_t indx);
65 static int tavor_mgid_is_valid(ib_gid_t gid);
66 static int tavor_mlid_is_valid(ib_lid_t lid);
67
68
69 /*
70 * tavor_ah_alloc()
71 * Context: Can be called only from user or kernel context.
72 */
73 int
74 tavor_ah_alloc(tavor_state_t *state, tavor_pdhdl_t pd,
75 ibt_adds_vect_t *attr_p, tavor_ahhdl_t *ahhdl, uint_t sleepflag)
76 {
77 tavor_rsrc_t *udav, *rsrc;
78 tavor_hw_udav_t udav_entry;
79 tavor_ahhdl_t ah;
80 ibt_mr_attr_t mr_attr;
81 tavor_mr_options_t op;
82 tavor_mrhdl_t mr;
83 uint64_t data;
84 uint32_t size;
85 int status, i, flag;
86 char *errormsg;
87
88 TAVOR_TNF_ENTER(tavor_ah_alloc);
89
90 /*
91 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to
92 * indicate that we wish to allocate an "invalid" (i.e. empty)
93 * address handle XXX
94 */
95
96 /* Validate that specified port number is legal */
97 if (!tavor_portnum_is_valid(state, attr_p->av_port_num)) {
98 /* Set "status" and "errormsg" and goto failure */
99 TAVOR_TNF_FAIL(IBT_HCA_PORT_INVALID, "invalid port num");
100 goto ahalloc_fail;
101 }
102
103 /*
104 * Allocate a UDAV entry. This will be filled in with all the
105 * necessary parameters to define the Address Handle. Unlike the
106 * other hardware resources no ownership transfer takes place as
107 * these UDAV entries are always owned by hardware.
108 */
109 status = tavor_rsrc_alloc(state, TAVOR_UDAV, 1, sleepflag, &udav);
110 if (status != DDI_SUCCESS) {
111 /* Set "status" and "errormsg" and goto failure */
112 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed UDAV");
113 goto ahalloc_fail;
114 }
115
116 /*
117 * Allocate the software structure for tracking the address handle
118 * (i.e. the Tavor Address Handle struct). If we fail here, we must
119 * undo the previous resource allocation.
120 */
121 status = tavor_rsrc_alloc(state, TAVOR_AHHDL, 1, sleepflag, &rsrc);
122 if (status != DDI_SUCCESS) {
123 /* Set "status" and "errormsg" and goto failure */
124 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed AH handler");
125 goto ahalloc_fail1;
126 }
127 ah = (tavor_ahhdl_t)rsrc->tr_addr;
128
129 /* Increment the reference count on the protection domain (PD) */
130 tavor_pd_refcnt_inc(pd);
131
132 /*
133 * Fill in the UDAV entry. Note: We are only filling in a temporary
134 * copy here, which we will later copy into the actual entry in
135 * Tavor DDR memory. This starts be zeroing out the temporary copy
136 * and then calling tavor_set_addr_path() to fill in the common
137 * portions that can be pulled from the "ibt_adds_vect_t" passed in
138 */
139 bzero(&udav_entry, sizeof (tavor_hw_udav_t));
140 status = tavor_set_addr_path(state, attr_p,
141 (tavor_hw_addr_path_t *)&udav_entry, TAVOR_ADDRPATH_UDAV, NULL);
142 if (status != DDI_SUCCESS) {
143 tavor_pd_refcnt_dec(pd);
144 tavor_rsrc_free(state, &rsrc);
145 tavor_rsrc_free(state, &udav);
146 /* Set "status" and "errormsg" and goto failure */
147 TAVOR_TNF_FAIL(status, "failed in tavor_set_addr_path");
148 goto ahalloc_fail;
149 }
150 udav_entry.pd = pd->pd_pdnum;
151 udav_entry.msg_sz = state->ts_cfg_profile->cp_max_mtu - 1;
152
153 /*
154 * Register the memory for the UDAV. The memory for the UDAV must
155 * be registered in the Tavor TPT tables. This gives us the LKey
156 * that we will need when we later post a UD work request that
157 * uses this address handle.
158 * We might be able to pre-register all the memory for the UDAV XXX
159 */
160 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
161 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)udav->tr_addr;
162 mr_attr.mr_len = udav->tr_len;
163 mr_attr.mr_as = NULL;
164 mr_attr.mr_flags = flag;
165 op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass;
166 op.mro_bind_dmahdl = NULL;
167 op.mro_bind_override_addr = 0;
168 status = tavor_mr_register(state, pd, &mr_attr, &mr, &op);
169 if (status != DDI_SUCCESS) {
170 /* Set "status" and "errormsg" and goto failure */
171 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
172 goto ahalloc_fail2;
173 }
174
175 /*
176 * Fill in the UDAV entry. Here we copy all the information from
177 * the temporary UDAV into the DDR memory for the real UDAV entry.
178 * Note that we copy everything but the first 64-bit word. This
179 * is where the PD number for the address handle resides.
180 * By filling everything except the PD and then writing the PD in
181 * a separate step below, we can ensure that the UDAV is not
182 * accessed while there are partially written values in it (something
183 * which really should not happen anyway). This is guaranteed
184 * because we take measures to ensure that the PD number is zero for
185 * all unused UDAV (and because PD#0 is reserved for Tavor).
186 */
187 size = sizeof (tavor_hw_udav_t) >> 3;
188 for (i = 1; i < size; i++) {
189 data = ((uint64_t *)&udav_entry)[i];
190 ddi_put64(udav->tr_acchdl, ((uint64_t *)udav->tr_addr + i),
191 data);
192 }
193 data = ((uint64_t *)&udav_entry)[0];
194 ddi_put64(udav->tr_acchdl, (uint64_t *)udav->tr_addr, data);
195
196 /*
197 * Fill in the rest of the Tavor Address Handle struct. Having
198 * successfully copied the UDAV into the hardware, we update the
199 * following fields for use in further operations on the AH.
200 *
201 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field
202 * here because we may need to return it later to the IBTF (as a
203 * result of a subsequent query operation). Unlike the other UDAV
204 * parameters, the value of "av_dgid.gid_guid" is not always preserved
205 * by being written to hardware. The reason for this is described in
206 * tavor_set_addr_path().
207 */
208 ah->ah_udavrsrcp = udav;
209 ah->ah_rsrcp = rsrc;
210 ah->ah_pdhdl = pd;
211 ah->ah_mrhdl = mr;
212 ah->ah_save_guid = attr_p->av_dgid.gid_guid;
213 ah->ah_save_srate = attr_p->av_srate;
214 *ahhdl = ah;
215
216 /* Determine if later ddi_dma_sync will be necessary */
217 ah->ah_sync = TAVOR_UDAV_IS_SYNC_REQ(state);
218
219 /* Sync the UDAV for use by the hardware */
220 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
221
222 TAVOR_TNF_EXIT(tavor_ah_alloc);
223 return (DDI_SUCCESS);
224
225 ahalloc_fail2:
226 tavor_pd_refcnt_dec(pd);
227 tavor_rsrc_free(state, &rsrc);
228 ahalloc_fail1:
229 tavor_rsrc_free(state, &udav);
230 ahalloc_fail:
231 TNF_PROBE_1(tavor_ah_alloc_fail, TAVOR_TNF_ERROR, "",
232 tnf_string, msg, errormsg);
233 TAVOR_TNF_EXIT(tavor_ah_alloc);
234 return (status);
235 }
236
237
238 /*
239 * tavor_ah_free()
240 * Context: Can be called only from user or kernel context.
241 */
242 /* ARGSUSED */
243 int
244 tavor_ah_free(tavor_state_t *state, tavor_ahhdl_t *ahhdl, uint_t sleepflag)
245 {
246 tavor_rsrc_t *udav, *rsrc;
247 tavor_pdhdl_t pd;
248 tavor_mrhdl_t mr;
249 tavor_ahhdl_t ah;
250 int status;
251
252 TAVOR_TNF_ENTER(tavor_ah_free);
253
254 /*
255 * Pull all the necessary information from the Tavor Address Handle
256 * struct. This is necessary here because the resource for the
257 * AH is going to be freed up as part of this operation.
258 */
259 ah = *ahhdl;
260 mutex_enter(&ah->ah_lock);
261 udav = ah->ah_udavrsrcp;
262 rsrc = ah->ah_rsrcp;
263 pd = ah->ah_pdhdl;
264 mr = ah->ah_mrhdl;
265 mutex_exit(&ah->ah_lock);
266
267 /*
268 * Deregister the memory for the UDAV. If this fails for any reason,
269 * then it is an indication that something (either in HW or SW) has
270 * gone seriously wrong. So we print a warning message and return
271 * failure.
272 */
273 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
274 sleepflag);
275 if (status != DDI_SUCCESS) {
276 TNF_PROBE_0(tavor_ah_free_dereg_mr_fail, TAVOR_TNF_ERROR, "");
277 TAVOR_TNF_EXIT(tavor_ah_free);
278 return (ibc_get_ci_failure(0));
279 }
280
281 /*
282 * Write zero to the first 64-bit word in the UDAV entry. As
283 * described above (in tavor_ah_alloc), the PD number is stored in
284 * the first 64-bits of each UDAV and setting this to zero is
285 * guaranteed to invalidate the entry.
286 */
287 ddi_put64(udav->tr_acchdl, (uint64_t *)udav->tr_addr, 0);
288
289 /* Sync the UDAV for use by the hardware */
290 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
291
292 /* Decrement the reference count on the protection domain (PD) */
293 tavor_pd_refcnt_dec(pd);
294
295 /* Free the Tavor Address Handle structure */
296 tavor_rsrc_free(state, &rsrc);
297
298 /* Free up the UDAV entry resource */
299 tavor_rsrc_free(state, &udav);
300
301 /* Set the ahhdl pointer to NULL and return success */
302 *ahhdl = NULL;
303
304 TAVOR_TNF_EXIT(tavor_ah_free);
305 return (DDI_SUCCESS);
306 }
307
308
309 /*
310 * tavor_ah_query()
311 * Context: Can be called from interrupt or base context.
312 */
313 /* ARGSUSED */
314 int
315 tavor_ah_query(tavor_state_t *state, tavor_ahhdl_t ah, tavor_pdhdl_t *pd,
316 ibt_adds_vect_t *attr_p)
317 {
318 tavor_hw_udav_t udav_entry;
319 tavor_rsrc_t *udav;
320 uint64_t data;
321 uint32_t size;
322 int i;
323
324 TAVOR_TNF_ENTER(tavor_ah_query);
325
326 mutex_enter(&ah->ah_lock);
327
328 /*
329 * Pull all the necessary information from the Tavor Address Handle
330 * structure
331 */
332 udav = ah->ah_udavrsrcp;
333 *pd = ah->ah_pdhdl;
334
335 /*
336 * Copy the UDAV entry into the temporary copy. Here we copy all
337 * the information from the UDAV entry in DDR memory into the
338 * temporary UDAV. Note: We don't need to sync the UDAV for
339 * reading by software because Tavor HW never modifies the entry.
340 */
341 size = sizeof (tavor_hw_udav_t) >> 3;
342 for (i = 0; i < size; i++) {
343 data = ddi_get64(udav->tr_acchdl,
344 ((uint64_t *)udav->tr_addr + i));
345 ((uint64_t *)&udav_entry)[i] = data;
346 }
347
348 /*
349 * Fill in "ibt_adds_vect_t". We call tavor_get_addr_path() to fill
350 * the common portions that can be pulled from the UDAV we pass in.
351 *
352 * NOTE: We will also fill the "av_dgid.gid_guid" field from the
353 * "ah_save_guid" field we have previously saved away. The reason
354 * for this is described in tavor_ah_alloc() and tavor_ah_modify().
355 */
356 tavor_get_addr_path(state, (tavor_hw_addr_path_t *)&udav_entry,
357 attr_p, TAVOR_ADDRPATH_UDAV, NULL);
358
359 attr_p->av_dgid.gid_guid = ah->ah_save_guid;
360 attr_p->av_srate = ah->ah_save_srate;
361
362 mutex_exit(&ah->ah_lock);
363 TAVOR_TNF_EXIT(tavor_ah_query);
364 return (DDI_SUCCESS);
365 }
366
367
368 /*
369 * tavor_ah_modify()
370 * Context: Can be called from interrupt or base context.
371 */
372 /* ARGSUSED */
373 int
374 tavor_ah_modify(tavor_state_t *state, tavor_ahhdl_t ah,
375 ibt_adds_vect_t *attr_p)
376 {
377 tavor_hw_udav_t udav_entry;
378 tavor_rsrc_t *udav;
379 uint64_t data_new, data_old;
380 uint32_t udav_pd, size, portnum_new;
381 int i, status;
382
383 TAVOR_TNF_ENTER(tavor_ah_modify);
384
385 /* Validate that specified port number is legal */
386 if (!tavor_portnum_is_valid(state, attr_p->av_port_num)) {
387 TNF_PROBE_1(tavor_ah_modify_inv_portnum,
388 TAVOR_TNF_ERROR, "", tnf_uint, port, attr_p->av_port_num);
389 TAVOR_TNF_EXIT(tavor_ah_modify);
390 return (IBT_HCA_PORT_INVALID);
391 }
392
393 mutex_enter(&ah->ah_lock);
394
395 /*
396 * Pull all the necessary information from the Tavor Address Handle
397 * structure
398 */
399 udav = ah->ah_udavrsrcp;
400
401 /*
402 * Fill in the UDAV entry. Note: we are only filling in a temporary
403 * copy here, which we will later copy into the actual entry in
404 * Tavor DDR memory. This starts be zeroing out the temporary copy
405 * and then calling tavor_set_addr_path() to fill in the common
406 * portions that can be pulled from the "ibt_adds_vect_t" passed in
407 *
408 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid"
409 * field here (just as we did during tavor_ah_alloc()) because we
410 * may need to return it later to the IBTF (as a result of a
411 * subsequent query operation). As explained in tavor_ah_alloc(),
412 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid"
413 * is not always preserved by being written to hardware. The reason
414 * for this is described in tavor_set_addr_path().
415 */
416 bzero(&udav_entry, sizeof (tavor_hw_udav_t));
417 status = tavor_set_addr_path(state, attr_p,
418 (tavor_hw_addr_path_t *)&udav_entry, TAVOR_ADDRPATH_UDAV, NULL);
419 if (status != DDI_SUCCESS) {
420 mutex_exit(&ah->ah_lock);
421 TNF_PROBE_0(tavor_ah_modify_setaddrpath_fail,
422 TAVOR_TNF_ERROR, "");
423 TAVOR_TNF_EXIT(tavor_ah_modify);
424 return (status);
425 }
426 ah->ah_save_guid = attr_p->av_dgid.gid_guid;
427 ah->ah_save_srate = attr_p->av_srate;
428
429 /*
430 * Save away the current PD number for this UDAV. Then temporarily
431 * invalidate the entry (by setting the PD to zero). Note: Since
432 * the first 32 bits of the UDAV actually contain the current port
433 * number _and_ current PD number, we need to mask off some bits.
434 */
435 udav_pd = ddi_get32(udav->tr_acchdl, (uint32_t *)udav->tr_addr);
436 udav_pd = udav_pd & 0xFFFFFF;
437 ddi_put32(udav->tr_acchdl, (uint32_t *)udav->tr_addr, 0);
438
439 /* Sync the UDAV for use by the hardware */
440 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
441
442 /*
443 * Copy UDAV structure to the entry
444 * Note: We copy in 64-bit chunks. For the first two of these
445 * chunks it is necessary to read the current contents of the
446 * UDAV, mask off the modifiable portions (maintaining any
447 * of the "reserved" portions), and then mask on the new data.
448 */
449 size = sizeof (tavor_hw_udav_t) >> 3;
450 for (i = 0; i < size; i++) {
451 data_new = ((uint64_t *)&udav_entry)[i];
452 data_old = ddi_get64(udav->tr_acchdl,
453 ((uint64_t *)udav->tr_addr + i));
454
455 /*
456 * Apply mask to change only the relevant values. Note: We
457 * extract the new portnum from the address handle here
458 * because the "PD" and "portnum" fields are in the same
459 * 32-bit word in the UDAV. We will use the (new) port
460 * number extracted here when we write the valid PD number
461 * in the last step below.
462 */
463 if (i == 0) {
464 data_old = data_old & TAVOR_UDAV_MODIFY_MASK0;
465 portnum_new = data_new >> 56;
466 } else if (i == 1) {
467 data_old = data_old & TAVOR_UDAV_MODIFY_MASK1;
468 } else {
469 data_old = 0;
470 }
471
472 /* Write the updated values to the UDAV (in DDR) */
473 data_new = data_old | data_new;
474 ddi_put64(udav->tr_acchdl, ((uint64_t *)udav->tr_addr + i),
475 data_new);
476 }
477
478 /*
479 * Sync the body of the UDAV for use by the hardware. After we
480 * have updated the PD number (to make the UDAV valid), we sync
481 * again to push the entire entry out for hardware access.
482 */
483 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
484
485 /*
486 * Put the valid PD number back into UDAV entry. Note: Because port
487 * number and PD number are in the same word, we must mask the
488 * new port number with the old PD number before writing it back
489 * to the UDAV entry
490 */
491 udav_pd = ((portnum_new << 24) | udav_pd);
492 ddi_put32(udav->tr_acchdl, (uint32_t *)udav->tr_addr, udav_pd);
493
494 /* Sync the rest of the UDAV for use by the hardware */
495 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
496
497 mutex_exit(&ah->ah_lock);
498 TAVOR_TNF_EXIT(tavor_ah_modify);
499 return (DDI_SUCCESS);
500 }
501
502
503 /*
504 * tavor_udav_sync()
505 * Context: Can be called from interrupt or base context.
506 */
507 /* ARGSUSED */
508 static void
509 tavor_udav_sync(tavor_ahhdl_t ah, tavor_hw_udav_t *udav, uint_t flag)
510 {
511 ddi_dma_handle_t dmahdl;
512 off_t offset;
513 int status;
514
515 TAVOR_TNF_ENTER(tavor_udav_sync);
516
517 /* Determine if AH needs to be synced or not */
518 if (ah->ah_sync == 0) {
519 TAVOR_TNF_EXIT(tavor_udav_sync);
520 return;
521 }
522
523 /* Get the DMA handle from AH handle */
524 dmahdl = ah->ah_mrhdl->mr_bindinfo.bi_dmahdl;
525
526 /* Calculate offset into address handle */
527 offset = (off_t)0;
528 status = ddi_dma_sync(dmahdl, offset, sizeof (tavor_hw_udav_t), flag);
529 if (status != DDI_SUCCESS) {
530 TNF_PROBE_0(tavor_udav_sync_getnextentry_fail,
531 TAVOR_TNF_ERROR, "");
532 TAVOR_TNF_EXIT(tavor_udav_sync);
533 return;
534 }
535
536 TAVOR_TNF_EXIT(tavor_udav_sync);
537 }
538
539
540 /*
541 * tavor_mcg_attach()
542 * Context: Can be called only from user or kernel context.
543 */
544 int
545 tavor_mcg_attach(tavor_state_t *state, tavor_qphdl_t qp, ib_gid_t gid,
546 ib_lid_t lid)
547 {
548 tavor_rsrc_t *rsrc;
549 tavor_hw_mcg_t *mcg_entry;
550 tavor_hw_mcg_qp_list_t *mcg_entry_qplist;
551 tavor_mcghdl_t mcg, newmcg;
552 uint64_t mgid_hash;
553 uint32_t end_indx;
554 int status;
555 uint_t qp_found;
556 char *errormsg;
557
558 TAVOR_TNF_ENTER(tavor_mcg_attach);
559
560 /*
561 * It is only allowed to attach MCG to UD queue pairs. Verify
562 * that the intended QP is of the appropriate transport type
563 */
564 if (qp->qp_serv_type != TAVOR_QP_UD) {
565 /* Set "status" and "errormsg" and goto failure */
566 TAVOR_TNF_FAIL(IBT_QP_SRV_TYPE_INVALID, "invalid service type");
567 goto mcgattach_fail;
568 }
569
570 /*
571 * Check for invalid Multicast DLID. Specifically, all Multicast
572 * LIDs should be within a well defined range. If the specified LID
573 * is outside of that range, then return an error.
574 */
575 if (tavor_mlid_is_valid(lid) == 0) {
576 /* Set "status" and "errormsg" and goto failure */
577 TAVOR_TNF_FAIL(IBT_MC_MLID_INVALID, "invalid MLID");
578 goto mcgattach_fail;
579 }
580 /*
581 * Check for invalid Multicast GID. All Multicast GIDs should have
582 * a well-defined pattern of bits and flags that are allowable. If
583 * the specified GID does not meet the criteria, then return an error.
584 */
585 if (tavor_mgid_is_valid(gid) == 0) {
586 /* Set "status" and "errormsg" and goto failure */
587 TAVOR_TNF_FAIL(IBT_MC_MGID_INVALID, "invalid MGID");
588 goto mcgattach_fail;
589 }
590
591 /*
592 * Compute the MGID hash value. Since the MCG table is arranged as
593 * a number of separate hash chains, this operation converts the
594 * specified MGID into the starting index of an entry in the hash
595 * table (i.e. the index for the start of the appropriate hash chain).
596 * Subsequent operations below will walk the chain searching for the
597 * right place to add this new QP.
598 */
599 status = tavor_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
600 &mgid_hash, TAVOR_SLEEPFLAG_FOR_CONTEXT());
601 if (status != TAVOR_CMD_SUCCESS) {
602 cmn_err(CE_CONT, "Tavor: MGID_HASH command failed: %08x\n",
603 status);
604 TNF_PROBE_1(tavor_mcg_attach_mgid_hash_cmd_fail,
605 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
606 TAVOR_TNF_EXIT(tavor_mcg_attach);
607 return (ibc_get_ci_failure(0));
608 }
609
610 /*
611 * Grab the multicast group mutex. Then grab the pre-allocated
612 * temporary buffer used for holding and/or modifying MCG entries.
613 * Zero out the temporary MCG entry before we begin.
614 */
615 mutex_enter(&state->ts_mcglock);
616 mcg_entry = state->ts_mcgtmp;
617 mcg_entry_qplist = TAVOR_MCG_GET_QPLIST_PTR(mcg_entry);
618 bzero(mcg_entry, TAVOR_MCGMEM_SZ(state));
619
620 /*
621 * Walk through the array of MCG entries starting at "mgid_hash".
622 * Try to find the appropriate place for this new QP to be added.
623 * This could happen when the first entry of the chain has MGID == 0
624 * (which means that the hash chain is empty), or because we find
625 * an entry with the same MGID (in which case we'll add the QP to
626 * that MCG), or because we come to the end of the chain (in which
627 * case this is the first QP being added to the multicast group that
628 * corresponds to the MGID. The tavor_mcg_walk_mgid_hash() routine
629 * walks the list and returns an index into the MCG table. The entry
630 * at this index is then checked to determine which case we have
631 * fallen into (see below). Note: We are using the "shadow" MCG
632 * list (of tavor_mcg_t structs) for this lookup because the real
633 * MCG entries are in hardware (and the lookup process would be much
634 * more time consuming).
635 */
636 end_indx = tavor_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL);
637 mcg = &state->ts_mcghdl[end_indx];
638
639 /*
640 * If MGID == 0, then the hash chain is empty. Just fill in the
641 * current entry. Note: No need to allocate an MCG table entry
642 * as all the hash chain "heads" are already preallocated.
643 */
644 if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) {
645
646 /* Fill in the current entry in the "shadow" MCG list */
647 tavor_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL);
648
649 /*
650 * Try to add the new QP number to the list. This (and the
651 * above) routine fills in a temporary MCG. The "mcg_entry"
652 * and "mcg_entry_qplist" pointers simply point to different
653 * offsets within the same temporary copy of the MCG (for
654 * convenience). Note: If this fails, we need to invalidate
655 * the entries we've already put into the "shadow" list entry
656 * above.
657 */
658 status = tavor_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
659 &qp_found);
660 if (status != DDI_SUCCESS) {
661 bzero(mcg, sizeof (struct tavor_sw_mcg_list_s));
662 mutex_exit(&state->ts_mcglock);
663 /* Set "status" and "errormsg" and goto failure */
664 TAVOR_TNF_FAIL(status, "failed qplist add");
665 goto mcgattach_fail;
666 }
667
668 /*
669 * Once the temporary MCG has been filled in, write the entry
670 * into the appropriate location in the Tavor MCG entry table.
671 * If it's successful, then drop the lock and return success.
672 * Note: In general, this operation shouldn't fail. If it
673 * does, then it is an indication that something (probably in
674 * HW, but maybe in SW) has gone seriously wrong. We still
675 * want to zero out the entries that we've filled in above
676 * (in the tavor_mcg_setup_new_hdr() routine).
677 */
678 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
679 TAVOR_CMD_NOSLEEP_SPIN);
680 if (status != TAVOR_CMD_SUCCESS) {
681 bzero(mcg, sizeof (struct tavor_sw_mcg_list_s));
682 mutex_exit(&state->ts_mcglock);
683 TAVOR_WARNING(state, "failed to write MCG entry");
684 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
685 "%08x\n", status);
686 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
687 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
688 tnf_uint, indx, end_indx);
689 TAVOR_TNF_EXIT(tavor_mcg_attach);
690 return (ibc_get_ci_failure(0));
691 }
692
693 /*
694 * Now that we know all the Tavor firmware accesses have been
695 * successful, we update the "shadow" MCG entry by incrementing
696 * the "number of attached QPs" count.
697 *
698 * We increment only if the QP is not already part of the
699 * MCG by checking the 'qp_found' flag returned from the
700 * qplist_add above.
701 */
702 if (!qp_found) {
703 mcg->mcg_num_qps++;
704
705 /*
706 * Increment the refcnt for this QP. Because the QP
707 * was added to this MCG, the refcnt must be
708 * incremented.
709 */
710 tavor_qp_mcg_refcnt_inc(qp);
711 }
712
713 /*
714 * We drop the lock and return success.
715 */
716 mutex_exit(&state->ts_mcglock);
717 TAVOR_TNF_EXIT(tavor_mcg_attach);
718 return (DDI_SUCCESS);
719 }
720
721 /*
722 * If the specified MGID matches the MGID in the current entry, then
723 * we need to try to add the QP to the current MCG entry. In this
724 * case, it means that we need to read the existing MCG entry (into
725 * the temporary MCG), add the new QP number to the temporary entry
726 * (using the same method we used above), and write the entry back
727 * to the hardware (same as above).
728 */
729 if ((mcg->mcg_mgid_h == gid.gid_prefix) &&
730 (mcg->mcg_mgid_l == gid.gid_guid)) {
731
732 /*
733 * Read the current MCG entry into the temporary MCG. Note:
734 * In general, this operation shouldn't fail. If it does,
735 * then it is an indication that something (probably in HW,
736 * but maybe in SW) has gone seriously wrong.
737 */
738 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx,
739 TAVOR_CMD_NOSLEEP_SPIN);
740 if (status != TAVOR_CMD_SUCCESS) {
741 mutex_exit(&state->ts_mcglock);
742 TAVOR_WARNING(state, "failed to read MCG entry");
743 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: "
744 "%08x\n", status);
745 TNF_PROBE_2(tavor_mcg_attach_read_mgm_cmd_fail,
746 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
747 tnf_uint, indx, end_indx);
748 TAVOR_TNF_EXIT(tavor_mcg_attach);
749 return (ibc_get_ci_failure(0));
750 }
751
752 /*
753 * Try to add the new QP number to the list. This routine
754 * fills in the necessary pieces of the temporary MCG. The
755 * "mcg_entry_qplist" pointer is used to point to the portion
756 * of the temporary MCG that holds the QP numbers.
757 *
758 * Note: tavor_mcg_qplist_add() returns SUCCESS if it
759 * already found the QP in the list. In this case, the QP is
760 * not added on to the list again. Check the flag 'qp_found'
761 * if this value is needed to be known.
762 *
763 */
764 status = tavor_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
765 &qp_found);
766 if (status != DDI_SUCCESS) {
767 mutex_exit(&state->ts_mcglock);
768 /* Set "status" and "errormsg" and goto failure */
769 TAVOR_TNF_FAIL(status, "failed qplist add");
770 goto mcgattach_fail;
771 }
772
773 /*
774 * Once the temporary MCG has been updated, write the entry
775 * into the appropriate location in the Tavor MCG entry table.
776 * If it's successful, then drop the lock and return success.
777 * Note: In general, this operation shouldn't fail. If it
778 * does, then it is an indication that something (probably in
779 * HW, but maybe in SW) has gone seriously wrong.
780 */
781 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
782 TAVOR_CMD_NOSLEEP_SPIN);
783 if (status != TAVOR_CMD_SUCCESS) {
784 mutex_exit(&state->ts_mcglock);
785 TAVOR_WARNING(state, "failed to write MCG entry");
786 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
787 "%08x\n", status);
788 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
789 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
790 tnf_uint, indx, end_indx);
791 TAVOR_TNF_EXIT(tavor_mcg_attach);
792 return (ibc_get_ci_failure(0));
793 }
794
795 /*
796 * Now that we know all the Tavor firmware accesses have been
797 * successful, we update the current "shadow" MCG entry by
798 * incrementing the "number of attached QPs" count.
799 *
800 * We increment only if the QP is not already part of the
801 * MCG by checking the 'qp_found' flag returned from the
802 * qplist_add above.
803 */
804 if (!qp_found) {
805 mcg->mcg_num_qps++;
806
807 /*
808 * Increment the refcnt for this QP. Because the QP
809 * was added to this MCG, the refcnt must be
810 * incremented.
811 */
812 tavor_qp_mcg_refcnt_inc(qp);
813 }
814
815 /*
816 * We drop the lock and return success.
817 */
818 mutex_exit(&state->ts_mcglock);
819 TAVOR_TNF_EXIT(tavor_mcg_attach);
820 return (DDI_SUCCESS);
821 }
822
823 /*
824 * If we've reached here, then we're at the end of the hash chain.
825 * We need to allocate a new MCG entry, fill it in, write it to Tavor,
826 * and update the previous entry to link the new one to the end of the
827 * chain.
828 */
829
830 /*
831 * Allocate an MCG table entry. This will be filled in with all
832 * the necessary parameters to define the multicast group. Then it
833 * will be written to the hardware in the next-to-last step below.
834 */
835 status = tavor_rsrc_alloc(state, TAVOR_MCG, 1, TAVOR_NOSLEEP, &rsrc);
836 if (status != DDI_SUCCESS) {
837 mutex_exit(&state->ts_mcglock);
838 /* Set "status" and "errormsg" and goto failure */
839 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MCG");
840 goto mcgattach_fail;
841 }
842
843 /*
844 * Fill in the new entry in the "shadow" MCG list. Note: Just as
845 * it does above, tavor_mcg_setup_new_hdr() also fills in a portion
846 * of the temporary MCG entry (the rest of which will be filled in by
847 * tavor_mcg_qplist_add() below)
848 */
849 newmcg = &state->ts_mcghdl[rsrc->tr_indx];
850 tavor_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc);
851
852 /*
853 * Try to add the new QP number to the list. This routine fills in
854 * the final necessary pieces of the temporary MCG. The
855 * "mcg_entry_qplist" pointer is used to point to the portion of the
856 * temporary MCG that holds the QP numbers. If we fail here, we
857 * must undo the previous resource allocation.
858 *
859 * Note: tavor_mcg_qplist_add() can we return SUCCESS if it already
860 * found the QP in the list. In this case, the QP is not added on to
861 * the list again. Check the flag 'qp_found' if this value is needed
862 * to be known.
863 */
864 status = tavor_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp,
865 &qp_found);
866 if (status != DDI_SUCCESS) {
867 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
868 tavor_rsrc_free(state, &rsrc);
869 mutex_exit(&state->ts_mcglock);
870 /* Set "status" and "errormsg" and goto failure */
871 TAVOR_TNF_FAIL(status, "failed qplist add");
872 goto mcgattach_fail;
873 }
874
875 /*
876 * Once the temporary MCG has been updated, write the entry into the
877 * appropriate location in the Tavor MCG entry table. If this is
878 * successful, then we need to chain the previous entry to this one.
879 * Note: In general, this operation shouldn't fail. If it does, then
880 * it is an indication that something (probably in HW, but maybe in
881 * SW) has gone seriously wrong.
882 */
883 status = tavor_write_mgm_cmd_post(state, mcg_entry, rsrc->tr_indx,
884 TAVOR_CMD_NOSLEEP_SPIN);
885 if (status != TAVOR_CMD_SUCCESS) {
886 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
887 tavor_rsrc_free(state, &rsrc);
888 mutex_exit(&state->ts_mcglock);
889 TAVOR_WARNING(state, "failed to write MCG entry");
890 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
891 status);
892 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
893 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
894 tnf_uint, indx, rsrc->tr_indx);
895 TAVOR_TNF_EXIT(tavor_mcg_attach);
896 return (ibc_get_ci_failure(0));
897 }
898
899 /*
900 * Now read the current MCG entry (the one previously at the end of
901 * hash chain) into the temporary MCG. We are going to update its
902 * "next_gid_indx" now and write the entry back to the MCG table.
903 * Note: In general, this operation shouldn't fail. If it does, then
904 * it is an indication that something (probably in HW, but maybe in SW)
905 * has gone seriously wrong. We will free up the MCG entry resource,
906 * but we will not undo the previously written MCG entry in the HW.
907 * This is OK, though, because the MCG entry is not currently attached
908 * to any hash chain.
909 */
910 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx,
911 TAVOR_CMD_NOSLEEP_SPIN);
912 if (status != TAVOR_CMD_SUCCESS) {
913 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
914 tavor_rsrc_free(state, &rsrc);
915 mutex_exit(&state->ts_mcglock);
916 TAVOR_WARNING(state, "failed to read MCG entry");
917 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n",
918 status);
919 TNF_PROBE_2(tavor_mcg_attach_read_mgm_cmd_fail,
920 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
921 tnf_uint, indx, end_indx);
922 TAVOR_TNF_EXIT(tavor_mcg_attach);
923 return (ibc_get_ci_failure(0));
924 }
925
926 /*
927 * Finally, we update the "next_gid_indx" field in the temporary MCG
928 * and attempt to write the entry back into the Tavor MCG table. If
929 * this succeeds, then we update the "shadow" list to reflect the
930 * change, drop the lock, and return success. Note: In general, this
931 * operation shouldn't fail. If it does, then it is an indication
932 * that something (probably in HW, but maybe in SW) has gone seriously
933 * wrong. Just as we do above, we will free up the MCG entry resource,
934 * but we will not try to undo the previously written MCG entry. This
935 * is OK, though, because (since we failed here to update the end of
936 * the chain) that other entry is not currently attached to any chain.
937 */
938 mcg_entry->next_gid_indx = rsrc->tr_indx;
939 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
940 TAVOR_CMD_NOSLEEP_SPIN);
941 if (status != TAVOR_CMD_SUCCESS) {
942 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
943 tavor_rsrc_free(state, &rsrc);
944 mutex_exit(&state->ts_mcglock);
945 TAVOR_WARNING(state, "failed to write MCG entry");
946 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
947 status);
948 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
949 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
950 tnf_uint, indx, end_indx);
951 TAVOR_TNF_EXIT(tavor_mcg_attach);
952 return (ibc_get_ci_failure(0));
953 }
954 mcg = &state->ts_mcghdl[end_indx];
955 mcg->mcg_next_indx = rsrc->tr_indx;
956
957 /*
958 * Now that we know all the Tavor firmware accesses have been
959 * successful, we update the new "shadow" MCG entry by incrementing
960 * the "number of attached QPs" count. Then we drop the lock and
961 * return success.
962 */
963 newmcg->mcg_num_qps++;
964
965 /*
966 * Increment the refcnt for this QP. Because the QP
967 * was added to this MCG, the refcnt must be
968 * incremented.
969 */
970 tavor_qp_mcg_refcnt_inc(qp);
971
972 mutex_exit(&state->ts_mcglock);
973 TAVOR_TNF_EXIT(tavor_mcg_attach);
974 return (DDI_SUCCESS);
975
976 mcgattach_fail:
977 TNF_PROBE_1(tavor_mcg_attach_fail, TAVOR_TNF_ERROR, "", tnf_string,
978 msg, errormsg);
979 TAVOR_TNF_EXIT(tavor_mcg_attach);
980 return (status);
981 }
982
983
984 /*
985 * tavor_mcg_detach()
986 * Context: Can be called only from user or kernel context.
987 */
988 int
989 tavor_mcg_detach(tavor_state_t *state, tavor_qphdl_t qp, ib_gid_t gid,
990 ib_lid_t lid)
991 {
992 tavor_hw_mcg_t *mcg_entry;
993 tavor_hw_mcg_qp_list_t *mcg_entry_qplist;
994 tavor_mcghdl_t mcg;
995 uint64_t mgid_hash;
996 uint32_t end_indx, prev_indx;
997 int status;
998
999 TAVOR_TNF_ENTER(tavor_mcg_detach);
1000
1001 /*
1002 * Check for invalid Multicast DLID. Specifically, all Multicast
1003 * LIDs should be within a well defined range. If the specified LID
1004 * is outside of that range, then return an error.
1005 */
1006 if (tavor_mlid_is_valid(lid) == 0) {
1007 TNF_PROBE_0(tavor_mcg_detach_invmlid_fail, TAVOR_TNF_ERROR, "");
1008 TAVOR_TNF_EXIT(tavor_mcg_detach);
1009 return (IBT_MC_MLID_INVALID);
1010 }
1011
1012 /*
1013 * Compute the MGID hash value. As described above, the MCG table is
1014 * arranged as a number of separate hash chains. This operation
1015 * converts the specified MGID into the starting index of an entry in
1016 * the hash table (i.e. the index for the start of the appropriate
1017 * hash chain). Subsequent operations below will walk the chain
1018 * searching for a matching entry from which to attempt to remove
1019 * the specified QP.
1020 */
1021 status = tavor_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
1022 &mgid_hash, TAVOR_SLEEPFLAG_FOR_CONTEXT());
1023 if (status != TAVOR_CMD_SUCCESS) {
1024 cmn_err(CE_CONT, "Tavor: MGID_HASH command failed: %08x\n",
1025 status);
1026 TNF_PROBE_1(tavor_mcg_detach_mgid_hash_cmd_fail,
1027 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1028 TAVOR_TNF_EXIT(tavor_mcg_attach);
1029 return (ibc_get_ci_failure(0));
1030 }
1031
1032 /*
1033 * Grab the multicast group mutex. Then grab the pre-allocated
1034 * temporary buffer used for holding and/or modifying MCG entries.
1035 */
1036 mutex_enter(&state->ts_mcglock);
1037 mcg_entry = state->ts_mcgtmp;
1038 mcg_entry_qplist = TAVOR_MCG_GET_QPLIST_PTR(mcg_entry);
1039
1040 /*
1041 * Walk through the array of MCG entries starting at "mgid_hash".
1042 * Try to find an MCG entry with a matching MGID. The
1043 * tavor_mcg_walk_mgid_hash() routine walks the list and returns an
1044 * index into the MCG table. The entry at this index is checked to
1045 * determine whether it is a match or not. If it is a match, then
1046 * we continue on to attempt to remove the QP from the MCG. If it
1047 * is not a match (or not a valid MCG entry), then we return an error.
1048 */
1049 end_indx = tavor_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx);
1050 mcg = &state->ts_mcghdl[end_indx];
1051
1052 /*
1053 * If MGID == 0 (the hash chain is empty) or if the specified MGID
1054 * does not match the MGID in the current entry, then return
1055 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not
1056 * valid).
1057 */
1058 if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) ||
1059 ((mcg->mcg_mgid_h != gid.gid_prefix) ||
1060 (mcg->mcg_mgid_l != gid.gid_guid))) {
1061 mutex_exit(&state->ts_mcglock);
1062 TNF_PROBE_0(tavor_mcg_detach_invmgid_fail, TAVOR_TNF_ERROR, "");
1063 TAVOR_TNF_EXIT(tavor_mcg_detach);
1064 return (IBT_MC_MGID_INVALID);
1065 }
1066
1067 /*
1068 * Read the current MCG entry into the temporary MCG. Note: In
1069 * general, this operation shouldn't fail. If it does, then it is
1070 * an indication that something (probably in HW, but maybe in SW)
1071 * has gone seriously wrong.
1072 */
1073 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx,
1074 TAVOR_CMD_NOSLEEP_SPIN);
1075 if (status != TAVOR_CMD_SUCCESS) {
1076 mutex_exit(&state->ts_mcglock);
1077 TAVOR_WARNING(state, "failed to read MCG entry");
1078 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n",
1079 status);
1080 TNF_PROBE_2(tavor_mcg_detach_read_mgm_cmd_fail,
1081 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1082 tnf_uint, indx, end_indx);
1083 TAVOR_TNF_EXIT(tavor_mcg_attach);
1084 return (ibc_get_ci_failure(0));
1085 }
1086
1087 /*
1088 * Search the QP number list for a match. If a match is found, then
1089 * remove the entry from the QP list. Otherwise, if no match is found,
1090 * return an error.
1091 */
1092 status = tavor_mcg_qplist_remove(mcg, mcg_entry_qplist, qp);
1093 if (status != DDI_SUCCESS) {
1094 mutex_exit(&state->ts_mcglock);
1095 TAVOR_TNF_EXIT(tavor_mcg_detach);
1096 return (status);
1097 }
1098
1099 /*
1100 * Decrement the MCG count for this QP. When the 'qp_mcg'
1101 * field becomes 0, then this QP is no longer a member of any
1102 * MCG.
1103 */
1104 tavor_qp_mcg_refcnt_dec(qp);
1105
1106 /*
1107 * If the current MCG's QP number list is about to be made empty
1108 * ("mcg_num_qps" == 1), then remove the entry itself from the hash
1109 * chain. Otherwise, just write the updated MCG entry back to the
1110 * hardware. In either case, once we successfully update the hardware
1111 * chain, then we decrement the "shadow" list entry's "mcg_num_qps"
1112 * count (or zero out the entire "shadow" list entry) before returning
1113 * success. Note: Zeroing out the "shadow" list entry is done
1114 * inside of tavor_mcg_hash_list_remove().
1115 */
1116 if (mcg->mcg_num_qps == 1) {
1117
1118 /* Remove an MCG entry from the hash chain */
1119 status = tavor_mcg_hash_list_remove(state, end_indx, prev_indx,
1120 mcg_entry);
1121 if (status != DDI_SUCCESS) {
1122 mutex_exit(&state->ts_mcglock);
1123 TAVOR_TNF_EXIT(tavor_mcg_detach);
1124 return (status);
1125 }
1126
1127 } else {
1128 /*
1129 * Write the updated MCG entry back to the Tavor MCG table.
1130 * If this succeeds, then we update the "shadow" list to
1131 * reflect the change (i.e. decrement the "mcg_num_qps"),
1132 * drop the lock, and return success. Note: In general,
1133 * this operation shouldn't fail. If it does, then it is an
1134 * indication that something (probably in HW, but maybe in SW)
1135 * has gone seriously wrong.
1136 */
1137 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
1138 TAVOR_CMD_NOSLEEP_SPIN);
1139 if (status != TAVOR_CMD_SUCCESS) {
1140 mutex_exit(&state->ts_mcglock);
1141 TAVOR_WARNING(state, "failed to write MCG entry");
1142 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
1143 "%08x\n", status);
1144 TNF_PROBE_2(tavor_mcg_detach_write_mgm_cmd_fail,
1145 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1146 tnf_uint, indx, end_indx);
1147 TAVOR_TNF_EXIT(tavor_mcg_detach);
1148 return (ibc_get_ci_failure(0));
1149 }
1150 mcg->mcg_num_qps--;
1151 }
1152
1153 mutex_exit(&state->ts_mcglock);
1154 TAVOR_TNF_EXIT(tavor_mcg_detach);
1155 return (DDI_SUCCESS);
1156 }
1157
1158 /*
1159 * tavor_qp_mcg_refcnt_inc()
1160 * Context: Can be called from interrupt or base context.
1161 */
1162 static void
1163 tavor_qp_mcg_refcnt_inc(tavor_qphdl_t qp)
1164 {
1165 /* Increment the QP's MCG reference count */
1166 mutex_enter(&qp->qp_lock);
1167 qp->qp_mcg_refcnt++;
1168 TNF_PROBE_1_DEBUG(tavor_qp_mcg_refcnt_inc, TAVOR_TNF_TRACE, "",
1169 tnf_uint, refcnt, qp->qp_mcg_refcnt);
1170 mutex_exit(&qp->qp_lock);
1171 }
1172
1173
1174 /*
1175 * tavor_qp_mcg_refcnt_dec()
1176 * Context: Can be called from interrupt or base context.
1177 */
1178 static void
1179 tavor_qp_mcg_refcnt_dec(tavor_qphdl_t qp)
1180 {
1181 /* Decrement the QP's MCG reference count */
1182 mutex_enter(&qp->qp_lock);
1183 qp->qp_mcg_refcnt--;
1184 TNF_PROBE_1_DEBUG(tavor_qp_mcg_refcnt_dec, TAVOR_TNF_TRACE, "",
1185 tnf_uint, refcnt, qp->qp_mcg_refcnt);
1186 mutex_exit(&qp->qp_lock);
1187 }
1188
1189
1190 /*
1191 * tavor_mcg_qplist_add()
1192 * Context: Can be called from interrupt or base context.
1193 */
1194 static int
1195 tavor_mcg_qplist_add(tavor_state_t *state, tavor_mcghdl_t mcg,
1196 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp,
1197 uint_t *qp_found)
1198 {
1199 uint_t qplist_indx;
1200
1201 TAVOR_TNF_ENTER(tavor_mcg_qplist_add);
1202
1203 ASSERT(MUTEX_HELD(&state->ts_mcglock));
1204
1205 qplist_indx = mcg->mcg_num_qps;
1206
1207 /*
1208 * Determine if we have exceeded the maximum number of QP per
1209 * multicast group. If we have, then return an error
1210 */
1211 if (qplist_indx >= state->ts_cfg_profile->cp_num_qp_per_mcg) {
1212 TNF_PROBE_0(tavor_mcg_qplist_add_too_many_qps,
1213 TAVOR_TNF_ERROR, "");
1214 TAVOR_TNF_EXIT(tavor_mcg_qplist_add);
1215 return (IBT_HCA_MCG_QP_EXCEEDED);
1216 }
1217
1218 /*
1219 * Determine if the QP is already attached to this MCG table. If it
1220 * is, then we break out and treat this operation as a NO-OP
1221 */
1222 for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps;
1223 qplist_indx++) {
1224 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) {
1225 break;
1226 }
1227 }
1228
1229 /*
1230 * If the QP was already on the list, set 'qp_found' to TRUE. We still
1231 * return SUCCESS in this case, but the qplist will not have been
1232 * updated because the QP was already on the list.
1233 */
1234 if (qplist_indx < mcg->mcg_num_qps) {
1235 *qp_found = 1;
1236 } else {
1237 /*
1238 * Otherwise, append the new QP number to the end of the
1239 * current QP list. Note: We will increment the "mcg_num_qps"
1240 * field on the "shadow" MCG list entry later (after we know
1241 * that all necessary Tavor firmware accesses have been
1242 * successful).
1243 *
1244 * Set 'qp_found' to 0 so we know the QP was added on to the
1245 * list for sure.
1246 */
1247 mcg_qplist[qplist_indx].q = TAVOR_MCG_QPN_VALID;
1248 mcg_qplist[qplist_indx].qpn = qp->qp_qpnum;
1249 *qp_found = 0;
1250 }
1251
1252 TAVOR_TNF_EXIT(tavor_mcg_qplist_add);
1253 return (DDI_SUCCESS);
1254 }
1255
1256
1257
1258 /*
1259 * tavor_mcg_qplist_remove()
1260 * Context: Can be called from interrupt or base context.
1261 */
1262 static int
1263 tavor_mcg_qplist_remove(tavor_mcghdl_t mcg, tavor_hw_mcg_qp_list_t *mcg_qplist,
1264 tavor_qphdl_t qp)
1265 {
1266 uint_t i, qplist_indx;
1267
1268 TAVOR_TNF_ENTER(tavor_mcg_qplist_remove);
1269
1270 /*
1271 * Search the MCG QP list for a matching QPN. When
1272 * it's found, we swap the last entry with the current
1273 * one, set the last entry to zero, decrement the last
1274 * entry, and return. If it's not found, then it's
1275 * and error.
1276 */
1277 qplist_indx = mcg->mcg_num_qps;
1278 for (i = 0; i < qplist_indx; i++) {
1279 if (mcg_qplist[i].qpn == qp->qp_qpnum) {
1280 mcg_qplist[i] = mcg_qplist[qplist_indx - 1];
1281 mcg_qplist[qplist_indx - 1].q = TAVOR_MCG_QPN_INVALID;
1282 mcg_qplist[qplist_indx - 1].qpn = 0;
1283
1284 TAVOR_TNF_EXIT(tavor_mcg_qplist_remove);
1285 return (DDI_SUCCESS);
1286 }
1287 }
1288
1289 TNF_PROBE_0(tavor_mcg_qplist_remove_invqphdl_fail, TAVOR_TNF_ERROR, "");
1290 TAVOR_TNF_EXIT(tavor_mcg_qplist_remove);
1291 return (IBT_QP_HDL_INVALID);
1292 }
1293
1294
1295 /*
1296 * tavor_mcg_walk_mgid_hash()
1297 * Context: Can be called from interrupt or base context.
1298 */
1299 static uint_t
1300 tavor_mcg_walk_mgid_hash(tavor_state_t *state, uint64_t start_indx,
1301 ib_gid_t mgid, uint_t *p_indx)
1302 {
1303 tavor_mcghdl_t curr_mcghdl;
1304 uint_t curr_indx, prev_indx;
1305
1306 TAVOR_TNF_ENTER(tavor_mcg_walk_mgid_hash);
1307
1308 ASSERT(MUTEX_HELD(&state->ts_mcglock));
1309
1310 /* Start at the head of the hash chain */
1311 curr_indx = start_indx;
1312 prev_indx = curr_indx;
1313 curr_mcghdl = &state->ts_mcghdl[curr_indx];
1314
1315 /* If the first entry in the chain has MGID == 0, then stop */
1316 if ((curr_mcghdl->mcg_mgid_h == 0) &&
1317 (curr_mcghdl->mcg_mgid_l == 0)) {
1318 goto end_mgid_hash_walk;
1319 }
1320
1321 /* If the first entry in the chain matches the MGID, then stop */
1322 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1323 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1324 goto end_mgid_hash_walk;
1325 }
1326
1327 /* Otherwise, walk the hash chain looking for a match */
1328 while (curr_mcghdl->mcg_next_indx != 0) {
1329 prev_indx = curr_indx;
1330 curr_indx = curr_mcghdl->mcg_next_indx;
1331 curr_mcghdl = &state->ts_mcghdl[curr_indx];
1332
1333 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1334 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1335 break;
1336 }
1337 }
1338
1339 end_mgid_hash_walk:
1340 /*
1341 * If necessary, return the index of the previous entry too. This
1342 * is primarily used for detaching a QP from a multicast group. It
1343 * may be necessary, in that case, to delete an MCG entry from the
1344 * hash chain and having the index of the previous entry is helpful.
1345 */
1346 if (p_indx != NULL) {
1347 *p_indx = prev_indx;
1348 }
1349 TAVOR_TNF_EXIT(tavor_mcg_walk_mgid_hash);
1350 return (curr_indx);
1351 }
1352
1353
1354 /*
1355 * tavor_mcg_setup_new_hdr()
1356 * Context: Can be called from interrupt or base context.
1357 */
1358 static void
1359 tavor_mcg_setup_new_hdr(tavor_mcghdl_t mcg, tavor_hw_mcg_t *mcg_hdr,
1360 ib_gid_t mgid, tavor_rsrc_t *mcg_rsrc)
1361 {
1362 TAVOR_TNF_ENTER(tavor_mcg_setup_new_hdr);
1363
1364 /*
1365 * Fill in the fields of the "shadow" entry used by software
1366 * to track MCG hardware entry
1367 */
1368 mcg->mcg_mgid_h = mgid.gid_prefix;
1369 mcg->mcg_mgid_l = mgid.gid_guid;
1370 mcg->mcg_rsrcp = mcg_rsrc;
1371 mcg->mcg_next_indx = 0;
1372 mcg->mcg_num_qps = 0;
1373
1374 /*
1375 * Fill the header fields of the MCG entry (in the temporary copy)
1376 */
1377 mcg_hdr->mgid_h = mgid.gid_prefix;
1378 mcg_hdr->mgid_l = mgid.gid_guid;
1379 mcg_hdr->next_gid_indx = 0;
1380
1381 TAVOR_TNF_EXIT(tavor_mcg_setup_new_hdr);
1382 }
1383
1384
1385 /*
1386 * tavor_mcg_hash_list_remove()
1387 * Context: Can be called only from user or kernel context.
1388 */
1389 static int
1390 tavor_mcg_hash_list_remove(tavor_state_t *state, uint_t curr_indx,
1391 uint_t prev_indx, tavor_hw_mcg_t *mcg_entry)
1392 {
1393 tavor_mcghdl_t curr_mcg, prev_mcg, next_mcg;
1394 uint_t next_indx;
1395 int status;
1396
1397 /* Get the pointer to "shadow" list for current entry */
1398 curr_mcg = &state->ts_mcghdl[curr_indx];
1399
1400 /*
1401 * If this is the first entry on a hash chain, then attempt to replace
1402 * the entry with the next entry on the chain. If there are no
1403 * subsequent entries on the chain, then this is the only entry and
1404 * should be invalidated.
1405 */
1406 if (curr_indx == prev_indx) {
1407
1408 /*
1409 * If this is the only entry on the chain, then invalidate it.
1410 * Note: Invalidating an MCG entry means writing all zeros
1411 * to the entry. This is only necessary for those MCG
1412 * entries that are the "head" entries of the individual hash
1413 * chains. Regardless of whether this operation returns
1414 * success or failure, return that result to the caller.
1415 */
1416 next_indx = curr_mcg->mcg_next_indx;
1417 if (next_indx == 0) {
1418 status = tavor_mcg_entry_invalidate(state, mcg_entry,
1419 curr_indx);
1420 bzero(curr_mcg, sizeof (struct tavor_sw_mcg_list_s));
1421 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1422 return (status);
1423 }
1424
1425 /*
1426 * Otherwise, this is just the first entry on the chain, so
1427 * grab the next one
1428 */
1429 next_mcg = &state->ts_mcghdl[next_indx];
1430
1431 /*
1432 * Read the next MCG entry into the temporary MCG. Note:
1433 * In general, this operation shouldn't fail. If it does,
1434 * then it is an indication that something (probably in HW,
1435 * but maybe in SW) has gone seriously wrong.
1436 */
1437 status = tavor_read_mgm_cmd_post(state, mcg_entry, next_indx,
1438 TAVOR_CMD_NOSLEEP_SPIN);
1439 if (status != TAVOR_CMD_SUCCESS) {
1440 TAVOR_WARNING(state, "failed to read MCG entry");
1441 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: "
1442 "%08x\n", status);
1443 TNF_PROBE_2(tavor_mcg_hash_list_rem_read_mgm_cmd_fail,
1444 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1445 tnf_uint, indx, next_indx);
1446 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1447 return (ibc_get_ci_failure(0));
1448 }
1449
1450 /*
1451 * Copy/Write the temporary MCG back to the hardware MCG list
1452 * using the current index. This essentially removes the
1453 * current MCG entry from the list by writing over it with
1454 * the next one. If this is successful, then we can do the
1455 * same operation for the "shadow" list. And we can also
1456 * free up the Tavor MCG entry resource that was associated
1457 * with the (old) next entry. Note: In general, this
1458 * operation shouldn't fail. If it does, then it is an
1459 * indication that something (probably in HW, but maybe in SW)
1460 * has gone seriously wrong.
1461 */
1462 status = tavor_write_mgm_cmd_post(state, mcg_entry, curr_indx,
1463 TAVOR_CMD_NOSLEEP_SPIN);
1464 if (status != TAVOR_CMD_SUCCESS) {
1465 TAVOR_WARNING(state, "failed to write MCG entry");
1466 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
1467 "%08x\n", status);
1468 TNF_PROBE_2(tavor_mcg_hash_list_rem_write_mgm_cmd_fail,
1469 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1470 tnf_uint, indx, curr_indx);
1471 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1472 return (ibc_get_ci_failure(0));
1473 }
1474
1475 /*
1476 * Copy all the software tracking information from the next
1477 * entry on the "shadow" MCG list into the current entry on
1478 * the list. Then invalidate (zero out) the other "shadow"
1479 * list entry.
1480 */
1481 bcopy(next_mcg, curr_mcg, sizeof (struct tavor_sw_mcg_list_s));
1482 bzero(next_mcg, sizeof (struct tavor_sw_mcg_list_s));
1483
1484 /*
1485 * Free up the Tavor MCG entry resource used by the "next"
1486 * MCG entry. That resource is no longer needed by any
1487 * MCG entry which is first on a hash chain (like the "next"
1488 * entry has just become).
1489 */
1490 tavor_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1491
1492 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1493 return (DDI_SUCCESS);
1494 }
1495
1496 /*
1497 * Else if this is the last entry on the hash chain (or a middle
1498 * entry, then we update the previous entry's "next_gid_index" field
1499 * to make it point instead to the next entry on the chain. By
1500 * skipping over the removed entry in this way, we can then free up
1501 * any resources associated with the current entry. Note: We don't
1502 * need to invalidate the "skipped over" hardware entry because it
1503 * will no be longer connected to any hash chains, and if/when it is
1504 * finally re-used, it will be written with entirely new values.
1505 */
1506
1507 /*
1508 * Read the next MCG entry into the temporary MCG. Note: In general,
1509 * this operation shouldn't fail. If it does, then it is an
1510 * indication that something (probably in HW, but maybe in SW) has
1511 * gone seriously wrong.
1512 */
1513 status = tavor_read_mgm_cmd_post(state, mcg_entry, prev_indx,
1514 TAVOR_CMD_NOSLEEP_SPIN);
1515 if (status != TAVOR_CMD_SUCCESS) {
1516 TAVOR_WARNING(state, "failed to read MCG entry");
1517 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n",
1518 status);
1519 TNF_PROBE_2(tavor_mcg_hash_list_rem_read_mgm_cmd_fail,
1520 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1521 tnf_uint, indx, prev_indx);
1522 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1523 return (ibc_get_ci_failure(0));
1524 }
1525
1526 /*
1527 * Finally, we update the "next_gid_indx" field in the temporary MCG
1528 * and attempt to write the entry back into the Tavor MCG table. If
1529 * this succeeds, then we update the "shadow" list to reflect the
1530 * change, free up the Tavor MCG entry resource that was associated
1531 * with the current entry, and return success. Note: In general,
1532 * this operation shouldn't fail. If it does, then it is an indication
1533 * that something (probably in HW, but maybe in SW) has gone seriously
1534 * wrong.
1535 */
1536 mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx;
1537 status = tavor_write_mgm_cmd_post(state, mcg_entry, prev_indx,
1538 TAVOR_CMD_NOSLEEP_SPIN);
1539 if (status != TAVOR_CMD_SUCCESS) {
1540 TAVOR_WARNING(state, "failed to write MCG entry");
1541 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
1542 status);
1543 TNF_PROBE_2(tavor_mcg_hash_list_rem_write_mgm_cmd_fail,
1544 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1545 tnf_uint, indx, prev_indx);
1546 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1547 return (ibc_get_ci_failure(0));
1548 }
1549
1550 /*
1551 * Get the pointer to the "shadow" MCG list entry for the previous
1552 * MCG. Update its "mcg_next_indx" to point to the next entry
1553 * the one after the current entry. Note: This next index may be
1554 * zero, indicating the end of the list.
1555 */
1556 prev_mcg = &state->ts_mcghdl[prev_indx];
1557 prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx;
1558
1559 /*
1560 * Free up the Tavor MCG entry resource used by the current entry.
1561 * This resource is no longer needed because the chain now skips over
1562 * the current entry. Then invalidate (zero out) the current "shadow"
1563 * list entry.
1564 */
1565 tavor_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1566 bzero(curr_mcg, sizeof (struct tavor_sw_mcg_list_s));
1567
1568 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1569 return (DDI_SUCCESS);
1570 }
1571
1572
1573 /*
1574 * tavor_mcg_entry_invalidate()
1575 * Context: Can be called only from user or kernel context.
1576 */
1577 static int
1578 tavor_mcg_entry_invalidate(tavor_state_t *state, tavor_hw_mcg_t *mcg_entry,
1579 uint_t indx)
1580 {
1581 int status;
1582
1583 TAVOR_TNF_ENTER(tavor_mcg_entry_invalidate);
1584
1585 /*
1586 * Invalidate the hardware MCG entry by zeroing out this temporary
1587 * MCG and writing it the the hardware. Note: In general, this
1588 * operation shouldn't fail. If it does, then it is an indication
1589 * that something (probably in HW, but maybe in SW) has gone seriously
1590 * wrong.
1591 */
1592 bzero(mcg_entry, TAVOR_MCGMEM_SZ(state));
1593 status = tavor_write_mgm_cmd_post(state, mcg_entry, indx,
1594 TAVOR_CMD_NOSLEEP_SPIN);
1595 if (status != TAVOR_CMD_SUCCESS) {
1596 TAVOR_WARNING(state, "failed to write MCG entry");
1597 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
1598 status);
1599 TNF_PROBE_2(tavor_mcg_entry_invalidate_write_mgm_cmd_fail,
1600 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1601 tnf_uint, indx, indx);
1602 TAVOR_TNF_EXIT(tavor_mcg_entry_invalidate);
1603 return (ibc_get_ci_failure(0));
1604 }
1605
1606 TAVOR_TNF_EXIT(tavor_mcg_entry_invalidate);
1607 return (DDI_SUCCESS);
1608 }
1609
1610
1611 /*
1612 * tavor_mgid_is_valid()
1613 * Context: Can be called from interrupt or base context.
1614 */
1615 static int
1616 tavor_mgid_is_valid(ib_gid_t gid)
1617 {
1618 uint_t topbits, flags, scope;
1619
1620 TAVOR_TNF_ENTER(tavor_mgid_is_valid);
1621
1622 /*
1623 * According to IBA 1.1 specification (section 4.1.1) a valid
1624 * "multicast GID" must have its top eight bits set to all ones
1625 */
1626 topbits = (gid.gid_prefix >> TAVOR_MCG_TOPBITS_SHIFT) &
1627 TAVOR_MCG_TOPBITS_MASK;
1628 if (topbits != TAVOR_MCG_TOPBITS) {
1629 TNF_PROBE_0(tavor_mgid_is_valid_invbits_fail, TAVOR_TNF_ERROR,
1630 "");
1631 TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1632 return (0);
1633 }
1634
1635 /*
1636 * The next 4 bits are the "flag" bits. These are valid only
1637 * if they are "0" (which correspond to permanently assigned/
1638 * "well-known" multicast GIDs) or "1" (for so-called "transient"
1639 * multicast GIDs). All other values are reserved.
1640 */
1641 flags = (gid.gid_prefix >> TAVOR_MCG_FLAGS_SHIFT) &
1642 TAVOR_MCG_FLAGS_MASK;
1643 if (!((flags == TAVOR_MCG_FLAGS_PERM) ||
1644 (flags == TAVOR_MCG_FLAGS_NONPERM))) {
1645 TNF_PROBE_1(tavor_mgid_is_valid_invflags_fail, TAVOR_TNF_ERROR,
1646 "", tnf_uint, flags, flags);
1647 TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1648 return (0);
1649 }
1650
1651 /*
1652 * The next 4 bits are the "scope" bits. These are valid only
1653 * if they are "2" (Link-local), "5" (Site-local), "8"
1654 * (Organization-local) or "E" (Global). All other values
1655 * are reserved (or currently unassigned).
1656 */
1657 scope = (gid.gid_prefix >> TAVOR_MCG_SCOPE_SHIFT) &
1658 TAVOR_MCG_SCOPE_MASK;
1659 if (!((scope == TAVOR_MCG_SCOPE_LINKLOC) ||
1660 (scope == TAVOR_MCG_SCOPE_SITELOC) ||
1661 (scope == TAVOR_MCG_SCOPE_ORGLOC) ||
1662 (scope == TAVOR_MCG_SCOPE_GLOBAL))) {
1663 TNF_PROBE_1(tavor_mgid_is_valid_invscope_fail, TAVOR_TNF_ERROR,
1664 "", tnf_uint, scope, scope);
1665 TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1666 return (0);
1667 }
1668
1669 /*
1670 * If it passes all of the above checks, then we will consider it
1671 * a valid multicast GID.
1672 */
1673 TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1674 return (1);
1675 }
1676
1677
1678 /*
1679 * tavor_mlid_is_valid()
1680 * Context: Can be called from interrupt or base context.
1681 */
1682 static int
1683 tavor_mlid_is_valid(ib_lid_t lid)
1684 {
1685 TAVOR_TNF_ENTER(tavor_mlid_is_valid);
1686
1687 /*
1688 * According to IBA 1.1 specification (section 4.1.1) a valid
1689 * "multicast DLID" must be between 0xC000 and 0xFFFE.
1690 */
1691 if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) {
1692 TNF_PROBE_1(tavor_mlid_is_valid_invdlid_fail, TAVOR_TNF_ERROR,
1693 "", tnf_uint, mlid, lid);
1694 TAVOR_TNF_EXIT(tavor_mlid_is_valid);
1695 return (0);
1696 }
1697
1698 TAVOR_TNF_EXIT(tavor_mlid_is_valid);
1699 return (1);
1700 }
1701
1702
1703 /*
1704 * tavor_pd_alloc()
1705 * Context: Can be called only from user or kernel context.
1706 */
1707 int
1708 tavor_pd_alloc(tavor_state_t *state, tavor_pdhdl_t *pdhdl, uint_t sleepflag)
1709 {
1710 tavor_rsrc_t *rsrc;
1711 tavor_pdhdl_t pd;
1712 int status;
1713
1714 TAVOR_TNF_ENTER(tavor_pd_alloc);
1715
1716 /*
1717 * Allocate the software structure for tracking the protection domain
1718 * (i.e. the Tavor Protection Domain handle). By default each PD
1719 * structure will have a unique PD number assigned to it. All that
1720 * is necessary is for software to initialize the PD reference count
1721 * (to zero) and return success.
1722 */
1723 status = tavor_rsrc_alloc(state, TAVOR_PDHDL, 1, sleepflag, &rsrc);
1724 if (status != DDI_SUCCESS) {
1725 TNF_PROBE_0(tavor_pd_alloc_rsrcalloc_fail, TAVOR_TNF_ERROR, "");
1726 TAVOR_TNF_EXIT(tavor_pd_alloc);
1727 return (IBT_INSUFF_RESOURCE);
1728 }
1729 pd = (tavor_pdhdl_t)rsrc->tr_addr;
1730
1731 pd->pd_refcnt = 0;
1732 *pdhdl = pd;
1733
1734 TAVOR_TNF_EXIT(tavor_pd_alloc);
1735 return (DDI_SUCCESS);
1736 }
1737
1738
1739 /*
1740 * tavor_pd_free()
1741 * Context: Can be called only from user or kernel context.
1742 */
1743 int
1744 tavor_pd_free(tavor_state_t *state, tavor_pdhdl_t *pdhdl)
1745 {
1746 tavor_rsrc_t *rsrc;
1747 tavor_pdhdl_t pd;
1748
1749 TAVOR_TNF_ENTER(tavor_pd_free);
1750
1751 /*
1752 * Pull all the necessary information from the Tavor Protection Domain
1753 * handle. This is necessary here because the resource for the
1754 * PD is going to be freed up as part of this operation.
1755 */
1756 pd = *pdhdl;
1757 rsrc = pd->pd_rsrcp;
1758
1759 /*
1760 * Check the PD reference count. If the reference count is non-zero,
1761 * then it means that this protection domain is still referenced by
1762 * some memory region, queue pair, address handle, or other IB object
1763 * If it is non-zero, then return an error. Otherwise, free the
1764 * Tavor resource and return success.
1765 */
1766 if (pd->pd_refcnt != 0) {
1767 TNF_PROBE_1(tavor_pd_free_refcnt_fail, TAVOR_TNF_ERROR, "",
1768 tnf_int, refcnt, pd->pd_refcnt);
1769 TAVOR_TNF_EXIT(tavor_pd_free);
1770 return (IBT_PD_IN_USE);
1771 }
1772
1773 /* Free the Tavor Protection Domain handle */
1774 tavor_rsrc_free(state, &rsrc);
1775
1776 /* Set the pdhdl pointer to NULL and return success */
1777 *pdhdl = (tavor_pdhdl_t)NULL;
1778
1779 TAVOR_TNF_EXIT(tavor_pd_free);
1780 return (DDI_SUCCESS);
1781 }
1782
1783
1784 /*
1785 * tavor_pd_refcnt_inc()
1786 * Context: Can be called from interrupt or base context.
1787 */
1788 void
1789 tavor_pd_refcnt_inc(tavor_pdhdl_t pd)
1790 {
1791 /* Increment the protection domain's reference count */
1792 mutex_enter(&pd->pd_lock);
1793 TNF_PROBE_1_DEBUG(tavor_pd_refcnt_inc, TAVOR_TNF_TRACE, "",
1794 tnf_uint, refcnt, pd->pd_refcnt);
1795 pd->pd_refcnt++;
1796 mutex_exit(&pd->pd_lock);
1797
1798 }
1799
1800
1801 /*
1802 * tavor_pd_refcnt_dec()
1803 * Context: Can be called from interrupt or base context.
1804 */
1805 void
1806 tavor_pd_refcnt_dec(tavor_pdhdl_t pd)
1807 {
1808 /* Decrement the protection domain's reference count */
1809 mutex_enter(&pd->pd_lock);
1810 pd->pd_refcnt--;
1811 TNF_PROBE_1_DEBUG(tavor_pd_refcnt_dec, TAVOR_TNF_TRACE, "",
1812 tnf_uint, refcnt, pd->pd_refcnt);
1813 mutex_exit(&pd->pd_lock);
1814
1815 }
1816
1817
1818 /*
1819 * tavor_port_query()
1820 * Context: Can be called only from user or kernel context.
1821 */
1822 int
1823 tavor_port_query(tavor_state_t *state, uint_t port, ibt_hca_portinfo_t *pi)
1824 {
1825 sm_portinfo_t portinfo;
1826 sm_guidinfo_t guidinfo;
1827 sm_pkey_table_t pkeytable;
1828 ib_gid_t *sgid;
1829 uint_t sgid_max, pkey_max, tbl_size;
1830 int i, j, indx, status;
1831
1832 TAVOR_TNF_ENTER(tavor_port_query);
1833
1834 /* Validate that specified port number is legal */
1835 if (!tavor_portnum_is_valid(state, port)) {
1836 TNF_PROBE_1(tavor_port_query_inv_portnum_fail,
1837 TAVOR_TNF_ERROR, "", tnf_uint, port, port);
1838 TAVOR_TNF_EXIT(tavor_port_query);
1839 return (IBT_HCA_PORT_INVALID);
1840 }
1841
1842 /*
1843 * We use the Tavor MAD_IFC command to post a GetPortInfo MAD
1844 * to the firmware (for the specified port number). This returns
1845 * a full PortInfo MAD (in "portinfo") which we subsequently
1846 * parse to fill in the "ibt_hca_portinfo_t" structure returned
1847 * to the IBTF.
1848 */
1849 status = tavor_getportinfo_cmd_post(state, port,
1850 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
1851 if (status != TAVOR_CMD_SUCCESS) {
1852 cmn_err(CE_CONT, "Tavor: GetPortInfo (port %02d) command "
1853 "failed: %08x\n", port, status);
1854 TNF_PROBE_1(tavor_port_query_getportinfo_cmd_fail,
1855 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1856 TAVOR_TNF_EXIT(tavor_port_query);
1857 return (ibc_get_ci_failure(0));
1858 }
1859
1860 /*
1861 * Parse the PortInfo MAD and fill in the IBTF structure
1862 */
1863 pi->p_base_lid = portinfo.LID;
1864 pi->p_qkey_violations = portinfo.Q_KeyViolations;
1865 pi->p_pkey_violations = portinfo.P_KeyViolations;
1866 pi->p_sm_sl = portinfo.MasterSMSL;
1867 pi->p_sm_lid = portinfo.MasterSMLID;
1868 pi->p_linkstate = portinfo.PortState;
1869 pi->p_port_num = portinfo.LocalPortNum;
1870 pi->p_phys_state = portinfo.PortPhysicalState;
1871 pi->p_width_supported = portinfo.LinkWidthSupported;
1872 pi->p_width_enabled = portinfo.LinkWidthEnabled;
1873 pi->p_width_active = portinfo.LinkWidthActive;
1874 pi->p_speed_supported = portinfo.LinkSpeedSupported;
1875 pi->p_speed_enabled = portinfo.LinkSpeedEnabled;
1876 pi->p_speed_active = portinfo.LinkSpeedActive;
1877 pi->p_mtu = portinfo.MTUCap;
1878 pi->p_lmc = portinfo.LMC;
1879 pi->p_max_vl = portinfo.VLCap;
1880 pi->p_subnet_timeout = portinfo.SubnetTimeOut;
1881 pi->p_msg_sz = ((uint32_t)1 << TAVOR_QP_LOG_MAX_MSGSZ);
1882 tbl_size = state->ts_cfg_profile->cp_log_max_gidtbl;
1883 pi->p_sgid_tbl_sz = (1 << tbl_size);
1884 tbl_size = state->ts_cfg_profile->cp_log_max_pkeytbl;
1885 pi->p_pkey_tbl_sz = (1 << tbl_size);
1886
1887 /*
1888 * Convert InfiniBand-defined port capability flags to the format
1889 * specified by the IBTF
1890 */
1891 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM)
1892 pi->p_capabilities |= IBT_PORT_CAP_SM;
1893 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED)
1894 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED;
1895 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD)
1896 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL;
1897 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD)
1898 pi->p_capabilities |= IBT_PORT_CAP_DM;
1899 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD)
1900 pi->p_capabilities |= IBT_PORT_CAP_VENDOR;
1901
1902 /*
1903 * Fill in the SGID table. Since the only access to the Tavor
1904 * GID tables is through the firmware's MAD_IFC interface, we
1905 * post as many GetGUIDInfo MADs as necessary to read in the entire
1906 * contents of the SGID table (for the specified port). Note: The
1907 * GetGUIDInfo command only gets eight GUIDs per operation. These
1908 * GUIDs are then appended to the GID prefix for the port (from the
1909 * GetPortInfo above) to form the entire SGID table.
1910 */
1911 for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) {
1912 status = tavor_getguidinfo_cmd_post(state, port, i >> 3,
1913 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &guidinfo);
1914 if (status != TAVOR_CMD_SUCCESS) {
1915 cmn_err(CE_CONT, "Tavor: GetGUIDInfo (port %02d) "
1916 "command failed: %08x\n", port, status);
1917 TNF_PROBE_1(tavor_port_query_getguidinfo_cmd_fail,
1918 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1919 TAVOR_TNF_EXIT(tavor_port_query);
1920 return (ibc_get_ci_failure(0));
1921 }
1922
1923 /* Figure out how many of the entries are valid */
1924 sgid_max = min((pi->p_sgid_tbl_sz - i), 8);
1925 for (j = 0; j < sgid_max; j++) {
1926 indx = (i + j);
1927 sgid = &pi->p_sgid_tbl[indx];
1928 sgid->gid_prefix = portinfo.GidPrefix;
1929 sgid->gid_guid = guidinfo.GUIDBlocks[j];
1930 }
1931 }
1932
1933 /*
1934 * Fill in the PKey table. Just as for the GID tables above, the
1935 * only access to the Tavor PKey tables is through the firmware's
1936 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary
1937 * to read in the entire contents of the PKey table (for the specified
1938 * port). Note: The GetPKeyTable command only gets 32 PKeys per
1939 * operation.
1940 */
1941 for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) {
1942 status = tavor_getpkeytable_cmd_post(state, port, i,
1943 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &pkeytable);
1944 if (status != TAVOR_CMD_SUCCESS) {
1945 cmn_err(CE_CONT, "Tavor: GetPKeyTable (port %02d) "
1946 "command failed: %08x\n", port, status);
1947 TNF_PROBE_1(tavor_port_query_getpkeytable_cmd_fail,
1948 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1949 TAVOR_TNF_EXIT(tavor_port_query);
1950 return (ibc_get_ci_failure(0));
1951 }
1952
1953 /* Figure out how many of the entries are valid */
1954 pkey_max = min((pi->p_pkey_tbl_sz - i), 32);
1955 for (j = 0; j < pkey_max; j++) {
1956 indx = (i + j);
1957 pi->p_pkey_tbl[indx] = pkeytable.P_KeyTableBlocks[j];
1958 }
1959 }
1960
1961 TAVOR_TNF_EXIT(tavor_port_query);
1962 return (DDI_SUCCESS);
1963 }
1964
1965
1966 /*
1967 * tavor_port_modify()
1968 * Context: Can be called only from user or kernel context.
1969 */
1970 /* ARGSUSED */
1971 int
1972 tavor_port_modify(tavor_state_t *state, uint8_t port,
1973 ibt_port_modify_flags_t flags, uint8_t init_type)
1974 {
1975 sm_portinfo_t portinfo;
1976 uint32_t capmask, reset_qkey;
1977 int status;
1978
1979 TAVOR_TNF_ENTER(tavor_port_modify);
1980
1981 /*
1982 * Return an error if either of the unsupported flags are set
1983 */
1984 if ((flags & IBT_PORT_SHUTDOWN) ||
1985 (flags & IBT_PORT_SET_INIT_TYPE)) {
1986 TNF_PROBE_1(tavor_port_modify_inv_flags_fail,
1987 TAVOR_TNF_ERROR, "", tnf_uint, flags, flags);
1988 TAVOR_TNF_EXIT(tavor_port_modify);
1989 return (IBT_NOT_SUPPORTED);
1990 }
1991
1992 /*
1993 * Determine whether we are trying to reset the QKey counter
1994 */
1995 reset_qkey = (flags & IBT_PORT_RESET_QKEY) ? 1 : 0;
1996
1997 /* Validate that specified port number is legal */
1998 if (!tavor_portnum_is_valid(state, port)) {
1999 TNF_PROBE_1(tavor_port_modify_inv_portnum_fail,
2000 TAVOR_TNF_ERROR, "", tnf_uint, port, port);
2001 TAVOR_TNF_EXIT(tavor_port_modify);
2002 return (IBT_HCA_PORT_INVALID);
2003 }
2004
2005 /*
2006 * Use the Tavor MAD_IFC command to post a GetPortInfo MAD to the
2007 * firmware (for the specified port number). This returns a full
2008 * PortInfo MAD (in "portinfo") from which we pull the current
2009 * capability mask. We then modify the capability mask as directed
2010 * by the "pmod_flags" field, and write the updated capability mask
2011 * using the Tavor SET_IB command (below).
2012 */
2013 status = tavor_getportinfo_cmd_post(state, port,
2014 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
2015 if (status != TAVOR_CMD_SUCCESS) {
2016 TNF_PROBE_1(tavor_port_modify_getportinfo_cmd_fail,
2017 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
2018 TAVOR_TNF_EXIT(tavor_port_modify);
2019 return (ibc_get_ci_failure(0));
2020 }
2021
2022 /*
2023 * Convert InfiniBand-defined port capability flags to the format
2024 * specified by the IBTF. Specifically, we modify the capability
2025 * mask based on the specified values.
2026 */
2027 capmask = portinfo.CapabilityMask;
2028
2029 if (flags & IBT_PORT_RESET_SM)
2030 capmask &= ~SM_CAP_MASK_IS_SM;
2031 else if (flags & IBT_PORT_SET_SM)
2032 capmask |= SM_CAP_MASK_IS_SM;
2033
2034 if (flags & IBT_PORT_RESET_SNMP)
2035 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD;
2036 else if (flags & IBT_PORT_SET_SNMP)
2037 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD;
2038
2039 if (flags & IBT_PORT_RESET_DEVMGT)
2040 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD;
2041 else if (flags & IBT_PORT_SET_DEVMGT)
2042 capmask |= SM_CAP_MASK_IS_DM_SUPPD;
2043
2044 if (flags & IBT_PORT_RESET_VENDOR)
2045 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD;
2046 else if (flags & IBT_PORT_SET_VENDOR)
2047 capmask |= SM_CAP_MASK_IS_VM_SUPPD;
2048
2049 /*
2050 * Use the Tavor SET_IB command to update the capability mask and
2051 * (possibly) reset the QKey violation counter for the specified port.
2052 * Note: In general, this operation shouldn't fail. If it does, then
2053 * it is an indication that something (probably in HW, but maybe in
2054 * SW) has gone seriously wrong.
2055 */
2056 status = tavor_set_ib_cmd_post(state, capmask, port, reset_qkey,
2057 TAVOR_SLEEPFLAG_FOR_CONTEXT());
2058 if (status != TAVOR_CMD_SUCCESS) {
2059 TAVOR_WARNING(state, "failed to modify port capabilities");
2060 cmn_err(CE_CONT, "Tavor: SET_IB (port %02d) command failed: "
2061 "%08x\n", port, status);
2062 TNF_PROBE_1(tavor_port_modify_set_ib_cmd_fail,
2063 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
2064 TAVOR_TNF_EXIT(tavor_port_modify);
2065 return (ibc_get_ci_failure(0));
2066 }
2067
2068 TAVOR_TNF_EXIT(tavor_port_modify);
2069 return (DDI_SUCCESS);
2070 }
2071
2072
2073 /*
2074 * tavor_set_addr_path()
2075 * Context: Can be called from interrupt or base context.
2076 *
2077 * Note: This routine is used for two purposes. It is used to fill in the
2078 * Tavor UDAV fields, and it is used to fill in the address path information
2079 * for QPs. Because the two Tavor structures are similar, common fields can
2080 * be filled in here. Because they are slightly different, however, we pass
2081 * an additional flag to indicate which type is being filled.
2082 */
2083 int
2084 tavor_set_addr_path(tavor_state_t *state, ibt_adds_vect_t *av,
2085 tavor_hw_addr_path_t *path, uint_t type, tavor_qphdl_t qp)
2086 {
2087 uint_t gidtbl_sz;
2088
2089 TAVOR_TNF_ENTER(tavor_set_addr_path);
2090
2091 path->ml_path = av->av_src_path;
2092 path->rlid = av->av_dlid;
2093 path->sl = av->av_srvl;
2094
2095 /* Port number only valid (in "av_port_num") if this is a UDAV */
2096 if (type == TAVOR_ADDRPATH_UDAV) {
2097 path->portnum = av->av_port_num;
2098 }
2099
2100 /*
2101 * Validate (and fill in) static rate.
2102 *
2103 * The stat_rate_sup is used to decide how to set the rate and
2104 * if it is zero, the driver uses the old interface.
2105 */
2106 if (state->ts_devlim.stat_rate_sup) {
2107 if (av->av_srate == IBT_SRATE_20) {
2108 path->max_stat_rate = 0; /* 4x@DDR injection rate */
2109 } else if (av->av_srate == IBT_SRATE_5) {
2110 path->max_stat_rate = 3; /* 1x@DDR injection rate */
2111 } else if (av->av_srate == IBT_SRATE_10) {
2112 path->max_stat_rate = 2; /* 4x@SDR injection rate */
2113 } else if (av->av_srate == IBT_SRATE_2) {
2114 path->max_stat_rate = 1; /* 1x@SDR injection rate */
2115 } else if (av->av_srate == IBT_SRATE_NOT_SPECIFIED) {
2116 path->max_stat_rate = 0; /* Max */
2117 } else {
2118 TNF_PROBE_1(tavor_set_addr_path_inv_srate_fail,
2119 TAVOR_TNF_ERROR, "", tnf_uint, srate, av->av_srate);
2120 TAVOR_TNF_EXIT(tavor_set_addr_path);
2121 return (IBT_STATIC_RATE_INVALID);
2122 }
2123 } else {
2124 if (av->av_srate == IBT_SRATE_10) {
2125 path->max_stat_rate = 0; /* 4x@SDR injection rate */
2126 } else if (av->av_srate == IBT_SRATE_2) {
2127 path->max_stat_rate = 1; /* 1x@SDR injection rate */
2128 } else if (av->av_srate == IBT_SRATE_NOT_SPECIFIED) {
2129 path->max_stat_rate = 0; /* Max */
2130 } else {
2131 TNF_PROBE_1(tavor_set_addr_path_inv_srate_fail,
2132 TAVOR_TNF_ERROR, "", tnf_uint, srate, av->av_srate);
2133 TAVOR_TNF_EXIT(tavor_set_addr_path);
2134 return (IBT_STATIC_RATE_INVALID);
2135 }
2136 }
2137
2138 /*
2139 * If this is a QP operation save asoft copy.
2140 */
2141 if (qp) {
2142 qp->qp_save_srate = av->av_srate;
2143 }
2144
2145 /* If "grh" flag is set, then check for valid SGID index too */
2146 gidtbl_sz = (1 << state->ts_devlim.log_max_gid);
2147 if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) {
2148 TNF_PROBE_1(tavor_set_addr_path_inv_sgid_ix_fail,
2149 TAVOR_TNF_ERROR, "", tnf_uint, sgid_ix, av->av_sgid_ix);
2150 TAVOR_TNF_EXIT(tavor_set_addr_path);
2151 return (IBT_SGID_INVALID);
2152 }
2153
2154 /*
2155 * Fill in all "global" values regardless of the value in the GRH
2156 * flag. Because "grh" is not set unless "av_send_grh" is set, the
2157 * hardware will ignore the other "global" values as necessary. Note:
2158 * SW does this here to enable later query operations to return
2159 * exactly the same params that were passed when the addr path was
2160 * last written.
2161 */
2162 path->grh = av->av_send_grh;
2163 if (type == TAVOR_ADDRPATH_QP) {
2164 path->mgid_index = av->av_sgid_ix;
2165 } else {
2166 /*
2167 * For Tavor UDAV, the "mgid_index" field is the index into
2168 * a combined table (not a per-port table). So some extra
2169 * calculations are necessary.
2170 */
2171 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) +
2172 av->av_sgid_ix;
2173 }
2174 path->flow_label = av->av_flow;
2175 path->tclass = av->av_tclass;
2176 path->hop_limit = av->av_hop;
2177 path->rgid_h = av->av_dgid.gid_prefix;
2178
2179 /*
2180 * According to Tavor PRM, the (31:0) part of rgid_l must be set to
2181 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we
2182 * only need to do it for UDAV's. So we enforce that here.
2183 *
2184 * NOTE: The entire 64 bits worth of GUID info is actually being
2185 * preserved (for UDAVs) by the callers of this function
2186 * (tavor_ah_alloc() and tavor_ah_modify()) and as long as the
2187 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are
2188 * "don't care".
2189 */
2190 if ((path->grh) || (type == TAVOR_ADDRPATH_QP)) {
2191 path->rgid_l = av->av_dgid.gid_guid;
2192 } else {
2193 path->rgid_l = 0x2;
2194 }
2195
2196 TAVOR_TNF_EXIT(tavor_set_addr_path);
2197 return (DDI_SUCCESS);
2198 }
2199
2200
2201 /*
2202 * tavor_get_addr_path()
2203 * Context: Can be called from interrupt or base context.
2204 *
2205 * Note: Just like tavor_set_addr_path() above, this routine is used for two
2206 * purposes. It is used to read in the Tavor UDAV fields, and it is used to
2207 * read in the address path information for QPs. Because the two Tavor
2208 * structures are similar, common fields can be read in here. But because
2209 * they are slightly different, we pass an additional flag to indicate which
2210 * type is being read.
2211 */
2212 void
2213 tavor_get_addr_path(tavor_state_t *state, tavor_hw_addr_path_t *path,
2214 ibt_adds_vect_t *av, uint_t type, tavor_qphdl_t qp)
2215 {
2216 uint_t gidtbl_sz;
2217
2218 av->av_src_path = path->ml_path;
2219 av->av_port_num = path->portnum;
2220 av->av_dlid = path->rlid;
2221 av->av_srvl = path->sl;
2222
2223 /*
2224 * Set "av_ipd" value from max_stat_rate.
2225 */
2226 if (qp) {
2227 /*
2228 * If a QP operation use the soft copy
2229 */
2230 av->av_srate = qp->qp_save_srate;
2231 } else {
2232 /*
2233 * The stat_rate_sup is used to decide how the srate value is
2234 * set and
2235 * if it is zero, the driver uses the old interface.
2236 */
2237 if (state->ts_devlim.stat_rate_sup) {
2238 if (path->max_stat_rate == 0) {
2239 av->av_srate = IBT_SRATE_20; /* 4x@DDR rate */
2240 } else if (path->max_stat_rate == 1) {
2241 av->av_srate = IBT_SRATE_2; /* 1x@SDR rate */
2242 } else if (path->max_stat_rate == 2) {
2243 av->av_srate = IBT_SRATE_10; /* 4x@SDR rate */
2244 } else if (path->max_stat_rate == 3) {
2245 av->av_srate = IBT_SRATE_5; /* 1xDDR rate */
2246 }
2247 } else {
2248 if (path->max_stat_rate == 0) {
2249 av->av_srate = IBT_SRATE_10; /* 4x@SDR rate */
2250 } else if (path->max_stat_rate == 1) {
2251 av->av_srate = IBT_SRATE_2; /* 1x@SDR rate */
2252 }
2253 }
2254 }
2255
2256 /*
2257 * Extract all "global" values regardless of the value in the GRH
2258 * flag. Because "av_send_grh" is set only if "grh" is set, software
2259 * knows to ignore the other "global" values as necessary. Note: SW
2260 * does it this way to enable these query operations to return exactly
2261 * the same params that were passed when the addr path was last written.
2262 */
2263 av->av_send_grh = path->grh;
2264 if (type == TAVOR_ADDRPATH_QP) {
2265 av->av_sgid_ix = path->mgid_index;
2266 } else {
2267 /*
2268 * For Tavor UDAV, the "mgid_index" field is the index into
2269 * a combined table (not a per-port table). So some extra
2270 * calculations are necessary.
2271 */
2272 gidtbl_sz = (1 << state->ts_devlim.log_max_gid);
2273 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) *
2274 gidtbl_sz);
2275 }
2276 av->av_flow = path->flow_label;
2277 av->av_tclass = path->tclass;
2278 av->av_hop = path->hop_limit;
2279 av->av_dgid.gid_prefix = path->rgid_h;
2280 av->av_dgid.gid_guid = path->rgid_l;
2281 }
2282
2283
2284 /*
2285 * tavor_portnum_is_valid()
2286 * Context: Can be called from interrupt or base context.
2287 */
2288 int
2289 tavor_portnum_is_valid(tavor_state_t *state, uint_t portnum)
2290 {
2291 uint_t max_port;
2292
2293 max_port = state->ts_cfg_profile->cp_num_ports;
2294 if ((portnum <= max_port) && (portnum != 0)) {
2295 return (1);
2296 } else {
2297 return (0);
2298 }
2299 }
2300
2301
2302 /*
2303 * tavor_pkeyindex_is_valid()
2304 * Context: Can be called from interrupt or base context.
2305 */
2306 int
2307 tavor_pkeyindex_is_valid(tavor_state_t *state, uint_t pkeyindx)
2308 {
2309 uint_t max_pkeyindx;
2310
2311 max_pkeyindx = 1 << state->ts_cfg_profile->cp_log_max_pkeytbl;
2312 if (pkeyindx < max_pkeyindx) {
2313 return (1);
2314 } else {
2315 return (0);
2316 }
2317 }
2318
2319
2320 /*
2321 * tavor_queue_alloc()
2322 * Context: Can be called from interrupt or base context.
2323 */
2324 int
2325 tavor_queue_alloc(tavor_state_t *state, tavor_qalloc_info_t *qa_info,
2326 uint_t sleepflag)
2327 {
2328 ddi_dma_attr_t dma_attr;
2329 int (*callback)(caddr_t);
2330 uint64_t realsize, alloc_mask;
2331 uint_t dma_xfer_mode, type;
2332 int flag, status;
2333
2334 TAVOR_TNF_ENTER(tavor_queue_alloc);
2335
2336 /* Set the callback flag appropriately */
2337 callback = (sleepflag == TAVOR_SLEEP) ? DDI_DMA_SLEEP :
2338 DDI_DMA_DONTWAIT;
2339
2340 /*
2341 * Initialize many of the default DMA attributes. Then set additional
2342 * alignment restrictions as necessary for the queue memory. Also
2343 * respect the configured value for IOMMU bypass
2344 */
2345 tavor_dma_attr_init(&dma_attr);
2346 dma_attr.dma_attr_align = qa_info->qa_bind_align;
2347 type = state->ts_cfg_profile->cp_iommu_bypass;
2348 if (type == TAVOR_BINDMEM_BYPASS) {
2349 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
2350 }
2351
2352 /* Allocate a DMA handle */
2353 status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, callback, NULL,
2354 &qa_info->qa_dmahdl);
2355 if (status != DDI_SUCCESS) {
2356 TNF_PROBE_0(tavor_queue_alloc_dmahdl_fail, TAVOR_TNF_ERROR, "");
2357 TAVOR_TNF_EXIT(tavor_queue_alloc);
2358 return (DDI_FAILURE);
2359 }
2360
2361 /*
2362 * Determine the amount of memory to allocate, depending on the values
2363 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying
2364 * to solve here is that allocating a DMA handle with IOMMU bypass
2365 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments
2366 * that are less than the page size. Since we may need stricter
2367 * alignments on the memory allocated by ddi_dma_mem_alloc() (e.g. in
2368 * Tavor QP work queue memory allocation), we use the following method
2369 * to calculate how much additional memory to request, and we enforce
2370 * our own alignment on the allocated result.
2371 */
2372 alloc_mask = qa_info->qa_alloc_align - 1;
2373 if (qa_info->qa_bind_align == qa_info->qa_alloc_align) {
2374 realsize = qa_info->qa_size;
2375 } else {
2376 realsize = qa_info->qa_size + alloc_mask;
2377 }
2378
2379 /*
2380 * If we are to allocate the queue from system memory, then use
2381 * ddi_dma_mem_alloc() to find the space. Otherwise, if we are to
2382 * allocate the queue from locally-attached DDR memory, then use the
2383 * vmem allocator to find the space. In either case, return a pointer
2384 * to the memory range allocated (including any necessary alignment
2385 * adjustments), the "real" memory pointer, the "real" size, and a
2386 * ddi_acc_handle_t to use when reading from/writing to the memory.
2387 */
2388 if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_NORMAL) {
2389
2390 /*
2391 * Determine whether to map STREAMING or CONSISTENT. This is
2392 * based on the value set in the configuration profile at
2393 * attach time.
2394 */
2395 dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent;
2396
2397 /* Allocate system memory for the queue */
2398 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize,
2399 &state->ts_reg_accattr, dma_xfer_mode, callback, NULL,
2400 (caddr_t *)&qa_info->qa_buf_real,
2401 (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl);
2402 if (status != DDI_SUCCESS) {
2403 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2404 TNF_PROBE_0(tavor_queue_alloc_dma_memalloc_fail,
2405 TAVOR_TNF_ERROR, "");
2406 TAVOR_TNF_EXIT(tavor_queue_alloc);
2407 return (DDI_FAILURE);
2408 }
2409
2410 /*
2411 * Save temporary copy of the real pointer. (This may be
2412 * modified in the last step below).
2413 */
2414 qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2415
2416 } else if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_USERLAND) {
2417
2418 /* Allocate userland mappable memory for the queue */
2419 flag = (sleepflag == TAVOR_SLEEP) ? DDI_UMEM_SLEEP :
2420 DDI_UMEM_NOSLEEP;
2421 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag,
2422 &qa_info->qa_umemcookie);
2423 if (qa_info->qa_buf_real == NULL) {
2424 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2425 TNF_PROBE_0(tavor_queue_alloc_umem_fail,
2426 TAVOR_TNF_ERROR, "");
2427 TAVOR_TNF_EXIT(tavor_queue_alloc);
2428 return (DDI_FAILURE);
2429 }
2430
2431 /*
2432 * Save temporary copy of the real pointer. (This may be
2433 * modified in the last step below).
2434 */
2435 qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2436
2437 } else { /* TAVOR_QUEUE_LOCATION_INDDR */
2438
2439 /* Allocate DDR memory for the queue */
2440 flag = (sleepflag == TAVOR_SLEEP) ? VM_SLEEP : VM_NOSLEEP;
2441 qa_info->qa_buf_real = (uint32_t *)vmem_xalloc(
2442 state->ts_ddrvmem, realsize, qa_info->qa_bind_align, 0, 0,
2443 NULL, NULL, flag);
2444 if (qa_info->qa_buf_real == NULL) {
2445 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2446 TNF_PROBE_0(tavor_queue_alloc_vmxa_fail,
2447 TAVOR_TNF_ERROR, "");
2448 TAVOR_TNF_EXIT(tavor_queue_alloc);
2449 return (DDI_FAILURE);
2450 }
2451
2452 /*
2453 * Since "qa_buf_real" will be a PCI address (the offset into
2454 * the DDR memory), we first need to do some calculations to
2455 * convert it to its kernel mapped address. (Note: This may
2456 * be modified again below, when any additional "alloc"
2457 * alignment constraint is applied).
2458 */
2459 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t)
2460 state->ts_reg_ddr_baseaddr) + ((uintptr_t)
2461 qa_info->qa_buf_real - state->ts_ddr.ddr_baseaddr));
2462 qa_info->qa_buf_realsz = realsize;
2463 qa_info->qa_acchdl = state->ts_reg_ddrhdl;
2464 }
2465
2466 /*
2467 * The last step is to ensure that the final address ("qa_buf_aligned")
2468 * has the appropriate "alloc" alignment restriction applied to it
2469 * (if necessary).
2470 */
2471 if (qa_info->qa_bind_align != qa_info->qa_alloc_align) {
2472 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t)
2473 qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask);
2474 }
2475
2476 TAVOR_TNF_EXIT(tavor_queue_alloc);
2477 return (DDI_SUCCESS);
2478 }
2479
2480
2481 /*
2482 * tavor_queue_free()
2483 * Context: Can be called from interrupt or base context.
2484 */
2485 void
2486 tavor_queue_free(tavor_state_t *state, tavor_qalloc_info_t *qa_info)
2487 {
2488 TAVOR_TNF_ENTER(tavor_queue_free);
2489
2490 /*
2491 * Depending on how (i.e. from where) we allocated the memory for
2492 * this queue, we choose the appropriate method for releasing the
2493 * resources.
2494 */
2495 if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_NORMAL) {
2496
2497 ddi_dma_mem_free(&qa_info->qa_acchdl);
2498
2499 } else if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_USERLAND) {
2500
2501 ddi_umem_free(qa_info->qa_umemcookie);
2502
2503 } else { /* TAVOR_QUEUE_LOCATION_INDDR */
2504
2505 vmem_xfree(state->ts_ddrvmem, qa_info->qa_buf_real,
2506 qa_info->qa_buf_realsz);
2507 }
2508
2509 /* Always free the dma handle */
2510 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2511
2512 TAVOR_TNF_EXIT(tavor_queue_free);
2513 }
2514
2515
2516 /*
2517 * tavor_dmaattr_get()
2518 * Context: Can be called from interrupt or base context.
2519 */
2520 void
2521 tavor_dma_attr_init(ddi_dma_attr_t *dma_attr)
2522 {
2523 dma_attr->dma_attr_version = DMA_ATTR_V0;
2524 dma_attr->dma_attr_addr_lo = 0;
2525 dma_attr->dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFFull;
2526 dma_attr->dma_attr_count_max = 0xFFFFFFFFFFFFFFFFull;
2527 dma_attr->dma_attr_align = 1;
2528 dma_attr->dma_attr_burstsizes = 0x3FF;
2529 dma_attr->dma_attr_minxfer = 1;
2530 dma_attr->dma_attr_maxxfer = 0xFFFFFFFFFFFFFFFFull;
2531 dma_attr->dma_attr_seg = 0xFFFFFFFFFFFFFFFFull;
2532 dma_attr->dma_attr_sgllen = 0x7FFFFFFF;
2533 dma_attr->dma_attr_granular = 1;
2534 dma_attr->dma_attr_flags = 0;
2535 }