Print this page
8368 remove warlock leftovers from usr/src/uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/adapters/tavor/tavor_misc.c
+++ new/usr/src/uts/common/io/ib/adapters/tavor/tavor_misc.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * tavor_misc.c
29 29 * Tavor Miscellaneous routines - Address Handle, Multicast, Protection
30 30 * Domain, and port-related operations
31 31 *
32 32 * Implements all the routines necessary for allocating, freeing, querying
33 33 * and modifying Address Handles and Protection Domains. Also implements
34 34 * all the routines necessary for adding and removing Queue Pairs to/from
35 35 * Multicast Groups. Lastly, it implements the routines necessary for
36 36 * port-related query and modify operations.
37 37 */
38 38
39 39 #include <sys/types.h>
40 40 #include <sys/conf.h>
41 41 #include <sys/ddi.h>
42 42 #include <sys/sunddi.h>
43 43 #include <sys/modctl.h>
44 44 #include <sys/bitmap.h>
45 45 #include <sys/sysmacros.h>
46 46
47 47 #include <sys/ib/adapters/tavor/tavor.h>
48 48
49 49 static void tavor_udav_sync(tavor_ahhdl_t ah, tavor_hw_udav_t *udav,
50 50 uint_t flag);
51 51 static int tavor_mcg_qplist_add(tavor_state_t *state, tavor_mcghdl_t mcg,
52 52 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp, uint_t *qp_found);
53 53 static int tavor_mcg_qplist_remove(tavor_mcghdl_t mcg,
54 54 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp);
55 55 static void tavor_qp_mcg_refcnt_inc(tavor_qphdl_t qp);
56 56 static void tavor_qp_mcg_refcnt_dec(tavor_qphdl_t qp);
57 57 static uint_t tavor_mcg_walk_mgid_hash(tavor_state_t *state,
58 58 uint64_t start_indx, ib_gid_t mgid, uint_t *prev_indx);
59 59 static void tavor_mcg_setup_new_hdr(tavor_mcghdl_t mcg,
60 60 tavor_hw_mcg_t *mcg_hdr, ib_gid_t mgid, tavor_rsrc_t *mcg_rsrc);
61 61 static int tavor_mcg_hash_list_remove(tavor_state_t *state, uint_t curr_indx,
62 62 uint_t prev_indx, tavor_hw_mcg_t *mcg_entry);
63 63 static int tavor_mcg_entry_invalidate(tavor_state_t *state,
64 64 tavor_hw_mcg_t *mcg_entry, uint_t indx);
65 65 static int tavor_mgid_is_valid(ib_gid_t gid);
66 66 static int tavor_mlid_is_valid(ib_lid_t lid);
67 67
68 68
69 69 /*
70 70 * tavor_ah_alloc()
71 71 * Context: Can be called only from user or kernel context.
72 72 */
73 73 int
74 74 tavor_ah_alloc(tavor_state_t *state, tavor_pdhdl_t pd,
75 75 ibt_adds_vect_t *attr_p, tavor_ahhdl_t *ahhdl, uint_t sleepflag)
76 76 {
77 77 tavor_rsrc_t *udav, *rsrc;
78 78 tavor_hw_udav_t udav_entry;
79 79 tavor_ahhdl_t ah;
80 80 ibt_mr_attr_t mr_attr;
81 81 tavor_mr_options_t op;
82 82 tavor_mrhdl_t mr;
83 83 uint64_t data;
84 84 uint32_t size;
85 85 int status, i, flag;
86 86 char *errormsg;
87 87
88 88 TAVOR_TNF_ENTER(tavor_ah_alloc);
89 89
90 90 /*
91 91 * Someday maybe the "ibt_adds_vect_t *attr_p" will be NULL to
92 92 * indicate that we wish to allocate an "invalid" (i.e. empty)
93 93 * address handle XXX
94 94 */
95 95
96 96 /* Validate that specified port number is legal */
97 97 if (!tavor_portnum_is_valid(state, attr_p->av_port_num)) {
98 98 /* Set "status" and "errormsg" and goto failure */
99 99 TAVOR_TNF_FAIL(IBT_HCA_PORT_INVALID, "invalid port num");
100 100 goto ahalloc_fail;
101 101 }
102 102
103 103 /*
104 104 * Allocate a UDAV entry. This will be filled in with all the
105 105 * necessary parameters to define the Address Handle. Unlike the
106 106 * other hardware resources no ownership transfer takes place as
107 107 * these UDAV entries are always owned by hardware.
108 108 */
109 109 status = tavor_rsrc_alloc(state, TAVOR_UDAV, 1, sleepflag, &udav);
110 110 if (status != DDI_SUCCESS) {
111 111 /* Set "status" and "errormsg" and goto failure */
112 112 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed UDAV");
113 113 goto ahalloc_fail;
114 114 }
115 115
116 116 /*
117 117 * Allocate the software structure for tracking the address handle
↓ open down ↓ |
117 lines elided |
↑ open up ↑ |
118 118 * (i.e. the Tavor Address Handle struct). If we fail here, we must
119 119 * undo the previous resource allocation.
120 120 */
121 121 status = tavor_rsrc_alloc(state, TAVOR_AHHDL, 1, sleepflag, &rsrc);
122 122 if (status != DDI_SUCCESS) {
123 123 /* Set "status" and "errormsg" and goto failure */
124 124 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed AH handler");
125 125 goto ahalloc_fail1;
126 126 }
127 127 ah = (tavor_ahhdl_t)rsrc->tr_addr;
128 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
129 128
130 129 /* Increment the reference count on the protection domain (PD) */
131 130 tavor_pd_refcnt_inc(pd);
132 131
133 132 /*
134 133 * Fill in the UDAV entry. Note: We are only filling in a temporary
135 134 * copy here, which we will later copy into the actual entry in
136 135 * Tavor DDR memory. This starts be zeroing out the temporary copy
137 136 * and then calling tavor_set_addr_path() to fill in the common
138 137 * portions that can be pulled from the "ibt_adds_vect_t" passed in
139 138 */
140 139 bzero(&udav_entry, sizeof (tavor_hw_udav_t));
141 140 status = tavor_set_addr_path(state, attr_p,
142 141 (tavor_hw_addr_path_t *)&udav_entry, TAVOR_ADDRPATH_UDAV, NULL);
143 142 if (status != DDI_SUCCESS) {
144 143 tavor_pd_refcnt_dec(pd);
145 144 tavor_rsrc_free(state, &rsrc);
146 145 tavor_rsrc_free(state, &udav);
147 146 /* Set "status" and "errormsg" and goto failure */
148 147 TAVOR_TNF_FAIL(status, "failed in tavor_set_addr_path");
149 148 goto ahalloc_fail;
150 149 }
151 150 udav_entry.pd = pd->pd_pdnum;
152 151 udav_entry.msg_sz = state->ts_cfg_profile->cp_max_mtu - 1;
153 152
154 153 /*
155 154 * Register the memory for the UDAV. The memory for the UDAV must
156 155 * be registered in the Tavor TPT tables. This gives us the LKey
157 156 * that we will need when we later post a UD work request that
158 157 * uses this address handle.
159 158 * We might be able to pre-register all the memory for the UDAV XXX
160 159 */
161 160 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP;
162 161 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)udav->tr_addr;
163 162 mr_attr.mr_len = udav->tr_len;
164 163 mr_attr.mr_as = NULL;
165 164 mr_attr.mr_flags = flag;
166 165 op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass;
167 166 op.mro_bind_dmahdl = NULL;
168 167 op.mro_bind_override_addr = 0;
169 168 status = tavor_mr_register(state, pd, &mr_attr, &mr, &op);
170 169 if (status != DDI_SUCCESS) {
171 170 /* Set "status" and "errormsg" and goto failure */
172 171 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed register mr");
173 172 goto ahalloc_fail2;
174 173 }
175 174
176 175 /*
177 176 * Fill in the UDAV entry. Here we copy all the information from
178 177 * the temporary UDAV into the DDR memory for the real UDAV entry.
179 178 * Note that we copy everything but the first 64-bit word. This
180 179 * is where the PD number for the address handle resides.
181 180 * By filling everything except the PD and then writing the PD in
182 181 * a separate step below, we can ensure that the UDAV is not
183 182 * accessed while there are partially written values in it (something
184 183 * which really should not happen anyway). This is guaranteed
185 184 * because we take measures to ensure that the PD number is zero for
186 185 * all unused UDAV (and because PD#0 is reserved for Tavor).
187 186 */
188 187 size = sizeof (tavor_hw_udav_t) >> 3;
189 188 for (i = 1; i < size; i++) {
190 189 data = ((uint64_t *)&udav_entry)[i];
191 190 ddi_put64(udav->tr_acchdl, ((uint64_t *)udav->tr_addr + i),
192 191 data);
193 192 }
194 193 data = ((uint64_t *)&udav_entry)[0];
195 194 ddi_put64(udav->tr_acchdl, (uint64_t *)udav->tr_addr, data);
196 195
197 196 /*
198 197 * Fill in the rest of the Tavor Address Handle struct. Having
199 198 * successfully copied the UDAV into the hardware, we update the
200 199 * following fields for use in further operations on the AH.
201 200 *
202 201 * NOTE: We are saving away a copy of the "av_dgid.gid_guid" field
203 202 * here because we may need to return it later to the IBTF (as a
204 203 * result of a subsequent query operation). Unlike the other UDAV
205 204 * parameters, the value of "av_dgid.gid_guid" is not always preserved
206 205 * by being written to hardware. The reason for this is described in
207 206 * tavor_set_addr_path().
208 207 */
209 208 ah->ah_udavrsrcp = udav;
210 209 ah->ah_rsrcp = rsrc;
211 210 ah->ah_pdhdl = pd;
212 211 ah->ah_mrhdl = mr;
213 212 ah->ah_save_guid = attr_p->av_dgid.gid_guid;
214 213 ah->ah_save_srate = attr_p->av_srate;
215 214 *ahhdl = ah;
216 215
217 216 /* Determine if later ddi_dma_sync will be necessary */
218 217 ah->ah_sync = TAVOR_UDAV_IS_SYNC_REQ(state);
219 218
220 219 /* Sync the UDAV for use by the hardware */
221 220 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
222 221
223 222 TAVOR_TNF_EXIT(tavor_ah_alloc);
224 223 return (DDI_SUCCESS);
225 224
226 225 ahalloc_fail2:
227 226 tavor_pd_refcnt_dec(pd);
228 227 tavor_rsrc_free(state, &rsrc);
229 228 ahalloc_fail1:
230 229 tavor_rsrc_free(state, &udav);
231 230 ahalloc_fail:
232 231 TNF_PROBE_1(tavor_ah_alloc_fail, TAVOR_TNF_ERROR, "",
233 232 tnf_string, msg, errormsg);
234 233 TAVOR_TNF_EXIT(tavor_ah_alloc);
235 234 return (status);
236 235 }
237 236
238 237
239 238 /*
240 239 * tavor_ah_free()
241 240 * Context: Can be called only from user or kernel context.
242 241 */
243 242 /* ARGSUSED */
244 243 int
245 244 tavor_ah_free(tavor_state_t *state, tavor_ahhdl_t *ahhdl, uint_t sleepflag)
246 245 {
247 246 tavor_rsrc_t *udav, *rsrc;
248 247 tavor_pdhdl_t pd;
249 248 tavor_mrhdl_t mr;
250 249 tavor_ahhdl_t ah;
251 250 int status;
252 251
253 252 TAVOR_TNF_ENTER(tavor_ah_free);
254 253
255 254 /*
256 255 * Pull all the necessary information from the Tavor Address Handle
↓ open down ↓ |
118 lines elided |
↑ open up ↑ |
257 256 * struct. This is necessary here because the resource for the
258 257 * AH is going to be freed up as part of this operation.
259 258 */
260 259 ah = *ahhdl;
261 260 mutex_enter(&ah->ah_lock);
262 261 udav = ah->ah_udavrsrcp;
263 262 rsrc = ah->ah_rsrcp;
264 263 pd = ah->ah_pdhdl;
265 264 mr = ah->ah_mrhdl;
266 265 mutex_exit(&ah->ah_lock);
267 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
268 266
269 267 /*
270 268 * Deregister the memory for the UDAV. If this fails for any reason,
271 269 * then it is an indication that something (either in HW or SW) has
272 270 * gone seriously wrong. So we print a warning message and return
273 271 * failure.
274 272 */
275 273 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
276 274 sleepflag);
277 275 if (status != DDI_SUCCESS) {
278 276 TNF_PROBE_0(tavor_ah_free_dereg_mr_fail, TAVOR_TNF_ERROR, "");
279 277 TAVOR_TNF_EXIT(tavor_ah_free);
280 278 return (ibc_get_ci_failure(0));
281 279 }
282 280
283 281 /*
284 282 * Write zero to the first 64-bit word in the UDAV entry. As
285 283 * described above (in tavor_ah_alloc), the PD number is stored in
286 284 * the first 64-bits of each UDAV and setting this to zero is
287 285 * guaranteed to invalidate the entry.
288 286 */
289 287 ddi_put64(udav->tr_acchdl, (uint64_t *)udav->tr_addr, 0);
290 288
291 289 /* Sync the UDAV for use by the hardware */
292 290 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
293 291
294 292 /* Decrement the reference count on the protection domain (PD) */
295 293 tavor_pd_refcnt_dec(pd);
296 294
297 295 /* Free the Tavor Address Handle structure */
298 296 tavor_rsrc_free(state, &rsrc);
299 297
300 298 /* Free up the UDAV entry resource */
301 299 tavor_rsrc_free(state, &udav);
302 300
303 301 /* Set the ahhdl pointer to NULL and return success */
304 302 *ahhdl = NULL;
305 303
306 304 TAVOR_TNF_EXIT(tavor_ah_free);
307 305 return (DDI_SUCCESS);
308 306 }
309 307
310 308
311 309 /*
312 310 * tavor_ah_query()
313 311 * Context: Can be called from interrupt or base context.
314 312 */
315 313 /* ARGSUSED */
316 314 int
317 315 tavor_ah_query(tavor_state_t *state, tavor_ahhdl_t ah, tavor_pdhdl_t *pd,
318 316 ibt_adds_vect_t *attr_p)
↓ open down ↓ |
41 lines elided |
↑ open up ↑ |
319 317 {
320 318 tavor_hw_udav_t udav_entry;
321 319 tavor_rsrc_t *udav;
322 320 uint64_t data;
323 321 uint32_t size;
324 322 int i;
325 323
326 324 TAVOR_TNF_ENTER(tavor_ah_query);
327 325
328 326 mutex_enter(&ah->ah_lock);
329 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr_p))
330 327
331 328 /*
332 329 * Pull all the necessary information from the Tavor Address Handle
333 330 * structure
334 331 */
335 332 udav = ah->ah_udavrsrcp;
336 333 *pd = ah->ah_pdhdl;
337 334
338 335 /*
339 336 * Copy the UDAV entry into the temporary copy. Here we copy all
340 337 * the information from the UDAV entry in DDR memory into the
341 338 * temporary UDAV. Note: We don't need to sync the UDAV for
342 339 * reading by software because Tavor HW never modifies the entry.
343 340 */
344 341 size = sizeof (tavor_hw_udav_t) >> 3;
345 342 for (i = 0; i < size; i++) {
346 343 data = ddi_get64(udav->tr_acchdl,
347 344 ((uint64_t *)udav->tr_addr + i));
348 345 ((uint64_t *)&udav_entry)[i] = data;
349 346 }
350 347
351 348 /*
352 349 * Fill in "ibt_adds_vect_t". We call tavor_get_addr_path() to fill
353 350 * the common portions that can be pulled from the UDAV we pass in.
354 351 *
355 352 * NOTE: We will also fill the "av_dgid.gid_guid" field from the
356 353 * "ah_save_guid" field we have previously saved away. The reason
357 354 * for this is described in tavor_ah_alloc() and tavor_ah_modify().
358 355 */
359 356 tavor_get_addr_path(state, (tavor_hw_addr_path_t *)&udav_entry,
360 357 attr_p, TAVOR_ADDRPATH_UDAV, NULL);
361 358
362 359 attr_p->av_dgid.gid_guid = ah->ah_save_guid;
363 360 attr_p->av_srate = ah->ah_save_srate;
364 361
365 362 mutex_exit(&ah->ah_lock);
366 363 TAVOR_TNF_EXIT(tavor_ah_query);
367 364 return (DDI_SUCCESS);
368 365 }
369 366
370 367
371 368 /*
372 369 * tavor_ah_modify()
373 370 * Context: Can be called from interrupt or base context.
374 371 */
375 372 /* ARGSUSED */
376 373 int
377 374 tavor_ah_modify(tavor_state_t *state, tavor_ahhdl_t ah,
378 375 ibt_adds_vect_t *attr_p)
379 376 {
380 377 tavor_hw_udav_t udav_entry;
381 378 tavor_rsrc_t *udav;
382 379 uint64_t data_new, data_old;
383 380 uint32_t udav_pd, size, portnum_new;
384 381 int i, status;
385 382
386 383 TAVOR_TNF_ENTER(tavor_ah_modify);
387 384
388 385 /* Validate that specified port number is legal */
389 386 if (!tavor_portnum_is_valid(state, attr_p->av_port_num)) {
390 387 TNF_PROBE_1(tavor_ah_modify_inv_portnum,
391 388 TAVOR_TNF_ERROR, "", tnf_uint, port, attr_p->av_port_num);
392 389 TAVOR_TNF_EXIT(tavor_ah_modify);
393 390 return (IBT_HCA_PORT_INVALID);
394 391 }
395 392
396 393 mutex_enter(&ah->ah_lock);
397 394
398 395 /*
399 396 * Pull all the necessary information from the Tavor Address Handle
400 397 * structure
401 398 */
402 399 udav = ah->ah_udavrsrcp;
403 400
404 401 /*
405 402 * Fill in the UDAV entry. Note: we are only filling in a temporary
406 403 * copy here, which we will later copy into the actual entry in
407 404 * Tavor DDR memory. This starts be zeroing out the temporary copy
408 405 * and then calling tavor_set_addr_path() to fill in the common
409 406 * portions that can be pulled from the "ibt_adds_vect_t" passed in
410 407 *
411 408 * NOTE: We also need to save away a copy of the "av_dgid.gid_guid"
412 409 * field here (just as we did during tavor_ah_alloc()) because we
413 410 * may need to return it later to the IBTF (as a result of a
414 411 * subsequent query operation). As explained in tavor_ah_alloc(),
415 412 * unlike the other UDAV parameters, the value of "av_dgid.gid_guid"
416 413 * is not always preserved by being written to hardware. The reason
417 414 * for this is described in tavor_set_addr_path().
418 415 */
419 416 bzero(&udav_entry, sizeof (tavor_hw_udav_t));
420 417 status = tavor_set_addr_path(state, attr_p,
421 418 (tavor_hw_addr_path_t *)&udav_entry, TAVOR_ADDRPATH_UDAV, NULL);
422 419 if (status != DDI_SUCCESS) {
423 420 mutex_exit(&ah->ah_lock);
424 421 TNF_PROBE_0(tavor_ah_modify_setaddrpath_fail,
425 422 TAVOR_TNF_ERROR, "");
426 423 TAVOR_TNF_EXIT(tavor_ah_modify);
427 424 return (status);
428 425 }
429 426 ah->ah_save_guid = attr_p->av_dgid.gid_guid;
430 427 ah->ah_save_srate = attr_p->av_srate;
431 428
432 429 /*
433 430 * Save away the current PD number for this UDAV. Then temporarily
434 431 * invalidate the entry (by setting the PD to zero). Note: Since
435 432 * the first 32 bits of the UDAV actually contain the current port
436 433 * number _and_ current PD number, we need to mask off some bits.
437 434 */
438 435 udav_pd = ddi_get32(udav->tr_acchdl, (uint32_t *)udav->tr_addr);
439 436 udav_pd = udav_pd & 0xFFFFFF;
440 437 ddi_put32(udav->tr_acchdl, (uint32_t *)udav->tr_addr, 0);
441 438
442 439 /* Sync the UDAV for use by the hardware */
443 440 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
444 441
445 442 /*
446 443 * Copy UDAV structure to the entry
447 444 * Note: We copy in 64-bit chunks. For the first two of these
448 445 * chunks it is necessary to read the current contents of the
449 446 * UDAV, mask off the modifiable portions (maintaining any
450 447 * of the "reserved" portions), and then mask on the new data.
451 448 */
452 449 size = sizeof (tavor_hw_udav_t) >> 3;
453 450 for (i = 0; i < size; i++) {
454 451 data_new = ((uint64_t *)&udav_entry)[i];
455 452 data_old = ddi_get64(udav->tr_acchdl,
456 453 ((uint64_t *)udav->tr_addr + i));
457 454
458 455 /*
459 456 * Apply mask to change only the relevant values. Note: We
460 457 * extract the new portnum from the address handle here
461 458 * because the "PD" and "portnum" fields are in the same
462 459 * 32-bit word in the UDAV. We will use the (new) port
463 460 * number extracted here when we write the valid PD number
464 461 * in the last step below.
465 462 */
466 463 if (i == 0) {
467 464 data_old = data_old & TAVOR_UDAV_MODIFY_MASK0;
468 465 portnum_new = data_new >> 56;
469 466 } else if (i == 1) {
470 467 data_old = data_old & TAVOR_UDAV_MODIFY_MASK1;
471 468 } else {
472 469 data_old = 0;
473 470 }
474 471
475 472 /* Write the updated values to the UDAV (in DDR) */
476 473 data_new = data_old | data_new;
477 474 ddi_put64(udav->tr_acchdl, ((uint64_t *)udav->tr_addr + i),
478 475 data_new);
479 476 }
480 477
481 478 /*
482 479 * Sync the body of the UDAV for use by the hardware. After we
483 480 * have updated the PD number (to make the UDAV valid), we sync
484 481 * again to push the entire entry out for hardware access.
485 482 */
486 483 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
487 484
488 485 /*
489 486 * Put the valid PD number back into UDAV entry. Note: Because port
490 487 * number and PD number are in the same word, we must mask the
491 488 * new port number with the old PD number before writing it back
492 489 * to the UDAV entry
493 490 */
494 491 udav_pd = ((portnum_new << 24) | udav_pd);
495 492 ddi_put32(udav->tr_acchdl, (uint32_t *)udav->tr_addr, udav_pd);
496 493
497 494 /* Sync the rest of the UDAV for use by the hardware */
498 495 tavor_udav_sync(ah, udav->tr_addr, DDI_DMA_SYNC_FORDEV);
499 496
500 497 mutex_exit(&ah->ah_lock);
501 498 TAVOR_TNF_EXIT(tavor_ah_modify);
502 499 return (DDI_SUCCESS);
503 500 }
504 501
505 502
506 503 /*
507 504 * tavor_udav_sync()
508 505 * Context: Can be called from interrupt or base context.
509 506 */
↓ open down ↓ |
170 lines elided |
↑ open up ↑ |
510 507 /* ARGSUSED */
511 508 static void
512 509 tavor_udav_sync(tavor_ahhdl_t ah, tavor_hw_udav_t *udav, uint_t flag)
513 510 {
514 511 ddi_dma_handle_t dmahdl;
515 512 off_t offset;
516 513 int status;
517 514
518 515 TAVOR_TNF_ENTER(tavor_udav_sync);
519 516
520 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ah))
521 -
522 517 /* Determine if AH needs to be synced or not */
523 518 if (ah->ah_sync == 0) {
524 519 TAVOR_TNF_EXIT(tavor_udav_sync);
525 520 return;
526 521 }
527 522
528 523 /* Get the DMA handle from AH handle */
529 524 dmahdl = ah->ah_mrhdl->mr_bindinfo.bi_dmahdl;
530 525
531 526 /* Calculate offset into address handle */
532 527 offset = (off_t)0;
533 528 status = ddi_dma_sync(dmahdl, offset, sizeof (tavor_hw_udav_t), flag);
534 529 if (status != DDI_SUCCESS) {
535 530 TNF_PROBE_0(tavor_udav_sync_getnextentry_fail,
536 531 TAVOR_TNF_ERROR, "");
537 532 TAVOR_TNF_EXIT(tavor_udav_sync);
538 533 return;
539 534 }
540 535
541 536 TAVOR_TNF_EXIT(tavor_udav_sync);
542 537 }
543 538
544 539
545 540 /*
546 541 * tavor_mcg_attach()
547 542 * Context: Can be called only from user or kernel context.
548 543 */
549 544 int
550 545 tavor_mcg_attach(tavor_state_t *state, tavor_qphdl_t qp, ib_gid_t gid,
551 546 ib_lid_t lid)
552 547 {
553 548 tavor_rsrc_t *rsrc;
554 549 tavor_hw_mcg_t *mcg_entry;
555 550 tavor_hw_mcg_qp_list_t *mcg_entry_qplist;
556 551 tavor_mcghdl_t mcg, newmcg;
557 552 uint64_t mgid_hash;
558 553 uint32_t end_indx;
559 554 int status;
560 555 uint_t qp_found;
561 556 char *errormsg;
562 557
563 558 TAVOR_TNF_ENTER(tavor_mcg_attach);
564 559
565 560 /*
566 561 * It is only allowed to attach MCG to UD queue pairs. Verify
567 562 * that the intended QP is of the appropriate transport type
568 563 */
569 564 if (qp->qp_serv_type != TAVOR_QP_UD) {
570 565 /* Set "status" and "errormsg" and goto failure */
571 566 TAVOR_TNF_FAIL(IBT_QP_SRV_TYPE_INVALID, "invalid service type");
572 567 goto mcgattach_fail;
573 568 }
574 569
575 570 /*
576 571 * Check for invalid Multicast DLID. Specifically, all Multicast
577 572 * LIDs should be within a well defined range. If the specified LID
578 573 * is outside of that range, then return an error.
579 574 */
580 575 if (tavor_mlid_is_valid(lid) == 0) {
581 576 /* Set "status" and "errormsg" and goto failure */
582 577 TAVOR_TNF_FAIL(IBT_MC_MLID_INVALID, "invalid MLID");
583 578 goto mcgattach_fail;
584 579 }
585 580 /*
586 581 * Check for invalid Multicast GID. All Multicast GIDs should have
587 582 * a well-defined pattern of bits and flags that are allowable. If
588 583 * the specified GID does not meet the criteria, then return an error.
589 584 */
590 585 if (tavor_mgid_is_valid(gid) == 0) {
591 586 /* Set "status" and "errormsg" and goto failure */
592 587 TAVOR_TNF_FAIL(IBT_MC_MGID_INVALID, "invalid MGID");
593 588 goto mcgattach_fail;
594 589 }
595 590
596 591 /*
597 592 * Compute the MGID hash value. Since the MCG table is arranged as
598 593 * a number of separate hash chains, this operation converts the
599 594 * specified MGID into the starting index of an entry in the hash
600 595 * table (i.e. the index for the start of the appropriate hash chain).
601 596 * Subsequent operations below will walk the chain searching for the
602 597 * right place to add this new QP.
603 598 */
604 599 status = tavor_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
605 600 &mgid_hash, TAVOR_SLEEPFLAG_FOR_CONTEXT());
606 601 if (status != TAVOR_CMD_SUCCESS) {
607 602 cmn_err(CE_CONT, "Tavor: MGID_HASH command failed: %08x\n",
608 603 status);
609 604 TNF_PROBE_1(tavor_mcg_attach_mgid_hash_cmd_fail,
610 605 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
611 606 TAVOR_TNF_EXIT(tavor_mcg_attach);
612 607 return (ibc_get_ci_failure(0));
613 608 }
614 609
615 610 /*
616 611 * Grab the multicast group mutex. Then grab the pre-allocated
617 612 * temporary buffer used for holding and/or modifying MCG entries.
618 613 * Zero out the temporary MCG entry before we begin.
619 614 */
620 615 mutex_enter(&state->ts_mcglock);
621 616 mcg_entry = state->ts_mcgtmp;
622 617 mcg_entry_qplist = TAVOR_MCG_GET_QPLIST_PTR(mcg_entry);
623 618 bzero(mcg_entry, TAVOR_MCGMEM_SZ(state));
624 619
625 620 /*
626 621 * Walk through the array of MCG entries starting at "mgid_hash".
627 622 * Try to find the appropriate place for this new QP to be added.
628 623 * This could happen when the first entry of the chain has MGID == 0
629 624 * (which means that the hash chain is empty), or because we find
630 625 * an entry with the same MGID (in which case we'll add the QP to
631 626 * that MCG), or because we come to the end of the chain (in which
632 627 * case this is the first QP being added to the multicast group that
633 628 * corresponds to the MGID. The tavor_mcg_walk_mgid_hash() routine
634 629 * walks the list and returns an index into the MCG table. The entry
635 630 * at this index is then checked to determine which case we have
636 631 * fallen into (see below). Note: We are using the "shadow" MCG
637 632 * list (of tavor_mcg_t structs) for this lookup because the real
638 633 * MCG entries are in hardware (and the lookup process would be much
639 634 * more time consuming).
640 635 */
641 636 end_indx = tavor_mcg_walk_mgid_hash(state, mgid_hash, gid, NULL);
642 637 mcg = &state->ts_mcghdl[end_indx];
643 638
644 639 /*
645 640 * If MGID == 0, then the hash chain is empty. Just fill in the
646 641 * current entry. Note: No need to allocate an MCG table entry
647 642 * as all the hash chain "heads" are already preallocated.
648 643 */
649 644 if ((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) {
650 645
651 646 /* Fill in the current entry in the "shadow" MCG list */
652 647 tavor_mcg_setup_new_hdr(mcg, mcg_entry, gid, NULL);
653 648
654 649 /*
655 650 * Try to add the new QP number to the list. This (and the
656 651 * above) routine fills in a temporary MCG. The "mcg_entry"
657 652 * and "mcg_entry_qplist" pointers simply point to different
658 653 * offsets within the same temporary copy of the MCG (for
659 654 * convenience). Note: If this fails, we need to invalidate
660 655 * the entries we've already put into the "shadow" list entry
661 656 * above.
662 657 */
663 658 status = tavor_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
664 659 &qp_found);
665 660 if (status != DDI_SUCCESS) {
666 661 bzero(mcg, sizeof (struct tavor_sw_mcg_list_s));
667 662 mutex_exit(&state->ts_mcglock);
668 663 /* Set "status" and "errormsg" and goto failure */
669 664 TAVOR_TNF_FAIL(status, "failed qplist add");
670 665 goto mcgattach_fail;
671 666 }
672 667
673 668 /*
674 669 * Once the temporary MCG has been filled in, write the entry
675 670 * into the appropriate location in the Tavor MCG entry table.
676 671 * If it's successful, then drop the lock and return success.
677 672 * Note: In general, this operation shouldn't fail. If it
678 673 * does, then it is an indication that something (probably in
679 674 * HW, but maybe in SW) has gone seriously wrong. We still
680 675 * want to zero out the entries that we've filled in above
681 676 * (in the tavor_mcg_setup_new_hdr() routine).
682 677 */
683 678 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
684 679 TAVOR_CMD_NOSLEEP_SPIN);
685 680 if (status != TAVOR_CMD_SUCCESS) {
686 681 bzero(mcg, sizeof (struct tavor_sw_mcg_list_s));
687 682 mutex_exit(&state->ts_mcglock);
688 683 TAVOR_WARNING(state, "failed to write MCG entry");
689 684 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
690 685 "%08x\n", status);
691 686 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
692 687 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
693 688 tnf_uint, indx, end_indx);
694 689 TAVOR_TNF_EXIT(tavor_mcg_attach);
695 690 return (ibc_get_ci_failure(0));
696 691 }
697 692
698 693 /*
699 694 * Now that we know all the Tavor firmware accesses have been
700 695 * successful, we update the "shadow" MCG entry by incrementing
701 696 * the "number of attached QPs" count.
702 697 *
703 698 * We increment only if the QP is not already part of the
704 699 * MCG by checking the 'qp_found' flag returned from the
705 700 * qplist_add above.
706 701 */
707 702 if (!qp_found) {
708 703 mcg->mcg_num_qps++;
709 704
710 705 /*
711 706 * Increment the refcnt for this QP. Because the QP
712 707 * was added to this MCG, the refcnt must be
713 708 * incremented.
714 709 */
715 710 tavor_qp_mcg_refcnt_inc(qp);
716 711 }
717 712
718 713 /*
719 714 * We drop the lock and return success.
720 715 */
721 716 mutex_exit(&state->ts_mcglock);
722 717 TAVOR_TNF_EXIT(tavor_mcg_attach);
723 718 return (DDI_SUCCESS);
724 719 }
725 720
726 721 /*
727 722 * If the specified MGID matches the MGID in the current entry, then
728 723 * we need to try to add the QP to the current MCG entry. In this
729 724 * case, it means that we need to read the existing MCG entry (into
730 725 * the temporary MCG), add the new QP number to the temporary entry
731 726 * (using the same method we used above), and write the entry back
732 727 * to the hardware (same as above).
733 728 */
734 729 if ((mcg->mcg_mgid_h == gid.gid_prefix) &&
735 730 (mcg->mcg_mgid_l == gid.gid_guid)) {
736 731
737 732 /*
738 733 * Read the current MCG entry into the temporary MCG. Note:
739 734 * In general, this operation shouldn't fail. If it does,
740 735 * then it is an indication that something (probably in HW,
741 736 * but maybe in SW) has gone seriously wrong.
742 737 */
743 738 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx,
744 739 TAVOR_CMD_NOSLEEP_SPIN);
745 740 if (status != TAVOR_CMD_SUCCESS) {
746 741 mutex_exit(&state->ts_mcglock);
747 742 TAVOR_WARNING(state, "failed to read MCG entry");
748 743 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: "
749 744 "%08x\n", status);
750 745 TNF_PROBE_2(tavor_mcg_attach_read_mgm_cmd_fail,
751 746 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
752 747 tnf_uint, indx, end_indx);
753 748 TAVOR_TNF_EXIT(tavor_mcg_attach);
754 749 return (ibc_get_ci_failure(0));
755 750 }
756 751
757 752 /*
758 753 * Try to add the new QP number to the list. This routine
759 754 * fills in the necessary pieces of the temporary MCG. The
760 755 * "mcg_entry_qplist" pointer is used to point to the portion
761 756 * of the temporary MCG that holds the QP numbers.
762 757 *
763 758 * Note: tavor_mcg_qplist_add() returns SUCCESS if it
764 759 * already found the QP in the list. In this case, the QP is
765 760 * not added on to the list again. Check the flag 'qp_found'
766 761 * if this value is needed to be known.
767 762 *
768 763 */
769 764 status = tavor_mcg_qplist_add(state, mcg, mcg_entry_qplist, qp,
770 765 &qp_found);
771 766 if (status != DDI_SUCCESS) {
772 767 mutex_exit(&state->ts_mcglock);
773 768 /* Set "status" and "errormsg" and goto failure */
774 769 TAVOR_TNF_FAIL(status, "failed qplist add");
775 770 goto mcgattach_fail;
776 771 }
777 772
778 773 /*
779 774 * Once the temporary MCG has been updated, write the entry
780 775 * into the appropriate location in the Tavor MCG entry table.
781 776 * If it's successful, then drop the lock and return success.
782 777 * Note: In general, this operation shouldn't fail. If it
783 778 * does, then it is an indication that something (probably in
784 779 * HW, but maybe in SW) has gone seriously wrong.
785 780 */
786 781 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
787 782 TAVOR_CMD_NOSLEEP_SPIN);
788 783 if (status != TAVOR_CMD_SUCCESS) {
789 784 mutex_exit(&state->ts_mcglock);
790 785 TAVOR_WARNING(state, "failed to write MCG entry");
791 786 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
792 787 "%08x\n", status);
793 788 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
794 789 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
795 790 tnf_uint, indx, end_indx);
796 791 TAVOR_TNF_EXIT(tavor_mcg_attach);
797 792 return (ibc_get_ci_failure(0));
798 793 }
799 794
800 795 /*
801 796 * Now that we know all the Tavor firmware accesses have been
802 797 * successful, we update the current "shadow" MCG entry by
803 798 * incrementing the "number of attached QPs" count.
804 799 *
805 800 * We increment only if the QP is not already part of the
806 801 * MCG by checking the 'qp_found' flag returned from the
807 802 * qplist_add above.
808 803 */
809 804 if (!qp_found) {
810 805 mcg->mcg_num_qps++;
811 806
812 807 /*
813 808 * Increment the refcnt for this QP. Because the QP
814 809 * was added to this MCG, the refcnt must be
815 810 * incremented.
816 811 */
817 812 tavor_qp_mcg_refcnt_inc(qp);
818 813 }
819 814
820 815 /*
821 816 * We drop the lock and return success.
822 817 */
823 818 mutex_exit(&state->ts_mcglock);
824 819 TAVOR_TNF_EXIT(tavor_mcg_attach);
825 820 return (DDI_SUCCESS);
826 821 }
827 822
828 823 /*
829 824 * If we've reached here, then we're at the end of the hash chain.
830 825 * We need to allocate a new MCG entry, fill it in, write it to Tavor,
831 826 * and update the previous entry to link the new one to the end of the
832 827 * chain.
833 828 */
834 829
835 830 /*
836 831 * Allocate an MCG table entry. This will be filled in with all
837 832 * the necessary parameters to define the multicast group. Then it
838 833 * will be written to the hardware in the next-to-last step below.
839 834 */
840 835 status = tavor_rsrc_alloc(state, TAVOR_MCG, 1, TAVOR_NOSLEEP, &rsrc);
841 836 if (status != DDI_SUCCESS) {
842 837 mutex_exit(&state->ts_mcglock);
843 838 /* Set "status" and "errormsg" and goto failure */
844 839 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MCG");
845 840 goto mcgattach_fail;
846 841 }
847 842
848 843 /*
849 844 * Fill in the new entry in the "shadow" MCG list. Note: Just as
850 845 * it does above, tavor_mcg_setup_new_hdr() also fills in a portion
851 846 * of the temporary MCG entry (the rest of which will be filled in by
852 847 * tavor_mcg_qplist_add() below)
853 848 */
854 849 newmcg = &state->ts_mcghdl[rsrc->tr_indx];
855 850 tavor_mcg_setup_new_hdr(newmcg, mcg_entry, gid, rsrc);
856 851
857 852 /*
858 853 * Try to add the new QP number to the list. This routine fills in
859 854 * the final necessary pieces of the temporary MCG. The
860 855 * "mcg_entry_qplist" pointer is used to point to the portion of the
861 856 * temporary MCG that holds the QP numbers. If we fail here, we
862 857 * must undo the previous resource allocation.
863 858 *
864 859 * Note: tavor_mcg_qplist_add() can we return SUCCESS if it already
865 860 * found the QP in the list. In this case, the QP is not added on to
866 861 * the list again. Check the flag 'qp_found' if this value is needed
867 862 * to be known.
868 863 */
869 864 status = tavor_mcg_qplist_add(state, newmcg, mcg_entry_qplist, qp,
870 865 &qp_found);
871 866 if (status != DDI_SUCCESS) {
872 867 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
873 868 tavor_rsrc_free(state, &rsrc);
874 869 mutex_exit(&state->ts_mcglock);
875 870 /* Set "status" and "errormsg" and goto failure */
876 871 TAVOR_TNF_FAIL(status, "failed qplist add");
877 872 goto mcgattach_fail;
878 873 }
879 874
880 875 /*
881 876 * Once the temporary MCG has been updated, write the entry into the
882 877 * appropriate location in the Tavor MCG entry table. If this is
883 878 * successful, then we need to chain the previous entry to this one.
884 879 * Note: In general, this operation shouldn't fail. If it does, then
885 880 * it is an indication that something (probably in HW, but maybe in
886 881 * SW) has gone seriously wrong.
887 882 */
888 883 status = tavor_write_mgm_cmd_post(state, mcg_entry, rsrc->tr_indx,
889 884 TAVOR_CMD_NOSLEEP_SPIN);
890 885 if (status != TAVOR_CMD_SUCCESS) {
891 886 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
892 887 tavor_rsrc_free(state, &rsrc);
893 888 mutex_exit(&state->ts_mcglock);
894 889 TAVOR_WARNING(state, "failed to write MCG entry");
895 890 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
896 891 status);
897 892 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
898 893 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
899 894 tnf_uint, indx, rsrc->tr_indx);
900 895 TAVOR_TNF_EXIT(tavor_mcg_attach);
901 896 return (ibc_get_ci_failure(0));
902 897 }
903 898
904 899 /*
905 900 * Now read the current MCG entry (the one previously at the end of
906 901 * hash chain) into the temporary MCG. We are going to update its
907 902 * "next_gid_indx" now and write the entry back to the MCG table.
908 903 * Note: In general, this operation shouldn't fail. If it does, then
909 904 * it is an indication that something (probably in HW, but maybe in SW)
910 905 * has gone seriously wrong. We will free up the MCG entry resource,
911 906 * but we will not undo the previously written MCG entry in the HW.
912 907 * This is OK, though, because the MCG entry is not currently attached
913 908 * to any hash chain.
914 909 */
915 910 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx,
916 911 TAVOR_CMD_NOSLEEP_SPIN);
917 912 if (status != TAVOR_CMD_SUCCESS) {
918 913 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
919 914 tavor_rsrc_free(state, &rsrc);
920 915 mutex_exit(&state->ts_mcglock);
921 916 TAVOR_WARNING(state, "failed to read MCG entry");
922 917 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n",
923 918 status);
924 919 TNF_PROBE_2(tavor_mcg_attach_read_mgm_cmd_fail,
925 920 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
926 921 tnf_uint, indx, end_indx);
927 922 TAVOR_TNF_EXIT(tavor_mcg_attach);
928 923 return (ibc_get_ci_failure(0));
929 924 }
930 925
931 926 /*
932 927 * Finally, we update the "next_gid_indx" field in the temporary MCG
933 928 * and attempt to write the entry back into the Tavor MCG table. If
934 929 * this succeeds, then we update the "shadow" list to reflect the
935 930 * change, drop the lock, and return success. Note: In general, this
936 931 * operation shouldn't fail. If it does, then it is an indication
937 932 * that something (probably in HW, but maybe in SW) has gone seriously
938 933 * wrong. Just as we do above, we will free up the MCG entry resource,
939 934 * but we will not try to undo the previously written MCG entry. This
940 935 * is OK, though, because (since we failed here to update the end of
941 936 * the chain) that other entry is not currently attached to any chain.
942 937 */
943 938 mcg_entry->next_gid_indx = rsrc->tr_indx;
944 939 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
945 940 TAVOR_CMD_NOSLEEP_SPIN);
946 941 if (status != TAVOR_CMD_SUCCESS) {
947 942 bzero(newmcg, sizeof (struct tavor_sw_mcg_list_s));
948 943 tavor_rsrc_free(state, &rsrc);
949 944 mutex_exit(&state->ts_mcglock);
950 945 TAVOR_WARNING(state, "failed to write MCG entry");
951 946 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
952 947 status);
953 948 TNF_PROBE_2(tavor_mcg_attach_write_mgm_cmd_fail,
954 949 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
955 950 tnf_uint, indx, end_indx);
956 951 TAVOR_TNF_EXIT(tavor_mcg_attach);
957 952 return (ibc_get_ci_failure(0));
958 953 }
959 954 mcg = &state->ts_mcghdl[end_indx];
960 955 mcg->mcg_next_indx = rsrc->tr_indx;
961 956
962 957 /*
963 958 * Now that we know all the Tavor firmware accesses have been
964 959 * successful, we update the new "shadow" MCG entry by incrementing
965 960 * the "number of attached QPs" count. Then we drop the lock and
966 961 * return success.
967 962 */
968 963 newmcg->mcg_num_qps++;
969 964
970 965 /*
971 966 * Increment the refcnt for this QP. Because the QP
972 967 * was added to this MCG, the refcnt must be
973 968 * incremented.
974 969 */
975 970 tavor_qp_mcg_refcnt_inc(qp);
976 971
977 972 mutex_exit(&state->ts_mcglock);
978 973 TAVOR_TNF_EXIT(tavor_mcg_attach);
979 974 return (DDI_SUCCESS);
980 975
981 976 mcgattach_fail:
982 977 TNF_PROBE_1(tavor_mcg_attach_fail, TAVOR_TNF_ERROR, "", tnf_string,
983 978 msg, errormsg);
984 979 TAVOR_TNF_EXIT(tavor_mcg_attach);
985 980 return (status);
986 981 }
987 982
988 983
989 984 /*
990 985 * tavor_mcg_detach()
991 986 * Context: Can be called only from user or kernel context.
992 987 */
993 988 int
994 989 tavor_mcg_detach(tavor_state_t *state, tavor_qphdl_t qp, ib_gid_t gid,
995 990 ib_lid_t lid)
996 991 {
997 992 tavor_hw_mcg_t *mcg_entry;
998 993 tavor_hw_mcg_qp_list_t *mcg_entry_qplist;
999 994 tavor_mcghdl_t mcg;
1000 995 uint64_t mgid_hash;
1001 996 uint32_t end_indx, prev_indx;
1002 997 int status;
1003 998
1004 999 TAVOR_TNF_ENTER(tavor_mcg_detach);
1005 1000
1006 1001 /*
1007 1002 * Check for invalid Multicast DLID. Specifically, all Multicast
1008 1003 * LIDs should be within a well defined range. If the specified LID
1009 1004 * is outside of that range, then return an error.
1010 1005 */
1011 1006 if (tavor_mlid_is_valid(lid) == 0) {
1012 1007 TNF_PROBE_0(tavor_mcg_detach_invmlid_fail, TAVOR_TNF_ERROR, "");
1013 1008 TAVOR_TNF_EXIT(tavor_mcg_detach);
1014 1009 return (IBT_MC_MLID_INVALID);
1015 1010 }
1016 1011
1017 1012 /*
1018 1013 * Compute the MGID hash value. As described above, the MCG table is
1019 1014 * arranged as a number of separate hash chains. This operation
1020 1015 * converts the specified MGID into the starting index of an entry in
1021 1016 * the hash table (i.e. the index for the start of the appropriate
1022 1017 * hash chain). Subsequent operations below will walk the chain
1023 1018 * searching for a matching entry from which to attempt to remove
1024 1019 * the specified QP.
1025 1020 */
1026 1021 status = tavor_mgid_hash_cmd_post(state, gid.gid_prefix, gid.gid_guid,
1027 1022 &mgid_hash, TAVOR_SLEEPFLAG_FOR_CONTEXT());
1028 1023 if (status != TAVOR_CMD_SUCCESS) {
1029 1024 cmn_err(CE_CONT, "Tavor: MGID_HASH command failed: %08x\n",
1030 1025 status);
1031 1026 TNF_PROBE_1(tavor_mcg_detach_mgid_hash_cmd_fail,
1032 1027 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1033 1028 TAVOR_TNF_EXIT(tavor_mcg_attach);
1034 1029 return (ibc_get_ci_failure(0));
1035 1030 }
1036 1031
1037 1032 /*
1038 1033 * Grab the multicast group mutex. Then grab the pre-allocated
1039 1034 * temporary buffer used for holding and/or modifying MCG entries.
1040 1035 */
1041 1036 mutex_enter(&state->ts_mcglock);
1042 1037 mcg_entry = state->ts_mcgtmp;
1043 1038 mcg_entry_qplist = TAVOR_MCG_GET_QPLIST_PTR(mcg_entry);
1044 1039
1045 1040 /*
1046 1041 * Walk through the array of MCG entries starting at "mgid_hash".
1047 1042 * Try to find an MCG entry with a matching MGID. The
1048 1043 * tavor_mcg_walk_mgid_hash() routine walks the list and returns an
1049 1044 * index into the MCG table. The entry at this index is checked to
1050 1045 * determine whether it is a match or not. If it is a match, then
1051 1046 * we continue on to attempt to remove the QP from the MCG. If it
1052 1047 * is not a match (or not a valid MCG entry), then we return an error.
1053 1048 */
1054 1049 end_indx = tavor_mcg_walk_mgid_hash(state, mgid_hash, gid, &prev_indx);
1055 1050 mcg = &state->ts_mcghdl[end_indx];
1056 1051
1057 1052 /*
1058 1053 * If MGID == 0 (the hash chain is empty) or if the specified MGID
1059 1054 * does not match the MGID in the current entry, then return
1060 1055 * IBT_MC_MGID_INVALID (to indicate that the specified MGID is not
1061 1056 * valid).
1062 1057 */
1063 1058 if (((mcg->mcg_mgid_h == 0) && (mcg->mcg_mgid_l == 0)) ||
1064 1059 ((mcg->mcg_mgid_h != gid.gid_prefix) ||
1065 1060 (mcg->mcg_mgid_l != gid.gid_guid))) {
1066 1061 mutex_exit(&state->ts_mcglock);
1067 1062 TNF_PROBE_0(tavor_mcg_detach_invmgid_fail, TAVOR_TNF_ERROR, "");
1068 1063 TAVOR_TNF_EXIT(tavor_mcg_detach);
1069 1064 return (IBT_MC_MGID_INVALID);
1070 1065 }
1071 1066
1072 1067 /*
1073 1068 * Read the current MCG entry into the temporary MCG. Note: In
1074 1069 * general, this operation shouldn't fail. If it does, then it is
1075 1070 * an indication that something (probably in HW, but maybe in SW)
1076 1071 * has gone seriously wrong.
1077 1072 */
1078 1073 status = tavor_read_mgm_cmd_post(state, mcg_entry, end_indx,
1079 1074 TAVOR_CMD_NOSLEEP_SPIN);
1080 1075 if (status != TAVOR_CMD_SUCCESS) {
1081 1076 mutex_exit(&state->ts_mcglock);
1082 1077 TAVOR_WARNING(state, "failed to read MCG entry");
1083 1078 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n",
1084 1079 status);
1085 1080 TNF_PROBE_2(tavor_mcg_detach_read_mgm_cmd_fail,
1086 1081 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1087 1082 tnf_uint, indx, end_indx);
1088 1083 TAVOR_TNF_EXIT(tavor_mcg_attach);
1089 1084 return (ibc_get_ci_failure(0));
1090 1085 }
1091 1086
1092 1087 /*
1093 1088 * Search the QP number list for a match. If a match is found, then
1094 1089 * remove the entry from the QP list. Otherwise, if no match is found,
1095 1090 * return an error.
1096 1091 */
1097 1092 status = tavor_mcg_qplist_remove(mcg, mcg_entry_qplist, qp);
1098 1093 if (status != DDI_SUCCESS) {
1099 1094 mutex_exit(&state->ts_mcglock);
1100 1095 TAVOR_TNF_EXIT(tavor_mcg_detach);
1101 1096 return (status);
1102 1097 }
1103 1098
1104 1099 /*
1105 1100 * Decrement the MCG count for this QP. When the 'qp_mcg'
1106 1101 * field becomes 0, then this QP is no longer a member of any
1107 1102 * MCG.
1108 1103 */
1109 1104 tavor_qp_mcg_refcnt_dec(qp);
1110 1105
1111 1106 /*
1112 1107 * If the current MCG's QP number list is about to be made empty
1113 1108 * ("mcg_num_qps" == 1), then remove the entry itself from the hash
1114 1109 * chain. Otherwise, just write the updated MCG entry back to the
1115 1110 * hardware. In either case, once we successfully update the hardware
1116 1111 * chain, then we decrement the "shadow" list entry's "mcg_num_qps"
1117 1112 * count (or zero out the entire "shadow" list entry) before returning
1118 1113 * success. Note: Zeroing out the "shadow" list entry is done
1119 1114 * inside of tavor_mcg_hash_list_remove().
1120 1115 */
1121 1116 if (mcg->mcg_num_qps == 1) {
1122 1117
1123 1118 /* Remove an MCG entry from the hash chain */
1124 1119 status = tavor_mcg_hash_list_remove(state, end_indx, prev_indx,
1125 1120 mcg_entry);
1126 1121 if (status != DDI_SUCCESS) {
1127 1122 mutex_exit(&state->ts_mcglock);
1128 1123 TAVOR_TNF_EXIT(tavor_mcg_detach);
1129 1124 return (status);
1130 1125 }
1131 1126
1132 1127 } else {
1133 1128 /*
1134 1129 * Write the updated MCG entry back to the Tavor MCG table.
1135 1130 * If this succeeds, then we update the "shadow" list to
1136 1131 * reflect the change (i.e. decrement the "mcg_num_qps"),
1137 1132 * drop the lock, and return success. Note: In general,
1138 1133 * this operation shouldn't fail. If it does, then it is an
1139 1134 * indication that something (probably in HW, but maybe in SW)
1140 1135 * has gone seriously wrong.
1141 1136 */
1142 1137 status = tavor_write_mgm_cmd_post(state, mcg_entry, end_indx,
1143 1138 TAVOR_CMD_NOSLEEP_SPIN);
1144 1139 if (status != TAVOR_CMD_SUCCESS) {
1145 1140 mutex_exit(&state->ts_mcglock);
1146 1141 TAVOR_WARNING(state, "failed to write MCG entry");
1147 1142 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
1148 1143 "%08x\n", status);
1149 1144 TNF_PROBE_2(tavor_mcg_detach_write_mgm_cmd_fail,
1150 1145 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1151 1146 tnf_uint, indx, end_indx);
1152 1147 TAVOR_TNF_EXIT(tavor_mcg_detach);
1153 1148 return (ibc_get_ci_failure(0));
1154 1149 }
1155 1150 mcg->mcg_num_qps--;
1156 1151 }
1157 1152
1158 1153 mutex_exit(&state->ts_mcglock);
1159 1154 TAVOR_TNF_EXIT(tavor_mcg_detach);
1160 1155 return (DDI_SUCCESS);
1161 1156 }
1162 1157
1163 1158 /*
1164 1159 * tavor_qp_mcg_refcnt_inc()
1165 1160 * Context: Can be called from interrupt or base context.
1166 1161 */
1167 1162 static void
1168 1163 tavor_qp_mcg_refcnt_inc(tavor_qphdl_t qp)
1169 1164 {
1170 1165 /* Increment the QP's MCG reference count */
1171 1166 mutex_enter(&qp->qp_lock);
1172 1167 qp->qp_mcg_refcnt++;
1173 1168 TNF_PROBE_1_DEBUG(tavor_qp_mcg_refcnt_inc, TAVOR_TNF_TRACE, "",
1174 1169 tnf_uint, refcnt, qp->qp_mcg_refcnt);
1175 1170 mutex_exit(&qp->qp_lock);
1176 1171 }
1177 1172
1178 1173
1179 1174 /*
1180 1175 * tavor_qp_mcg_refcnt_dec()
1181 1176 * Context: Can be called from interrupt or base context.
1182 1177 */
1183 1178 static void
1184 1179 tavor_qp_mcg_refcnt_dec(tavor_qphdl_t qp)
1185 1180 {
1186 1181 /* Decrement the QP's MCG reference count */
1187 1182 mutex_enter(&qp->qp_lock);
1188 1183 qp->qp_mcg_refcnt--;
1189 1184 TNF_PROBE_1_DEBUG(tavor_qp_mcg_refcnt_dec, TAVOR_TNF_TRACE, "",
1190 1185 tnf_uint, refcnt, qp->qp_mcg_refcnt);
1191 1186 mutex_exit(&qp->qp_lock);
1192 1187 }
1193 1188
1194 1189
1195 1190 /*
1196 1191 * tavor_mcg_qplist_add()
1197 1192 * Context: Can be called from interrupt or base context.
1198 1193 */
1199 1194 static int
1200 1195 tavor_mcg_qplist_add(tavor_state_t *state, tavor_mcghdl_t mcg,
1201 1196 tavor_hw_mcg_qp_list_t *mcg_qplist, tavor_qphdl_t qp,
1202 1197 uint_t *qp_found)
1203 1198 {
1204 1199 uint_t qplist_indx;
1205 1200
1206 1201 TAVOR_TNF_ENTER(tavor_mcg_qplist_add);
1207 1202
1208 1203 ASSERT(MUTEX_HELD(&state->ts_mcglock));
1209 1204
1210 1205 qplist_indx = mcg->mcg_num_qps;
1211 1206
1212 1207 /*
1213 1208 * Determine if we have exceeded the maximum number of QP per
1214 1209 * multicast group. If we have, then return an error
1215 1210 */
1216 1211 if (qplist_indx >= state->ts_cfg_profile->cp_num_qp_per_mcg) {
1217 1212 TNF_PROBE_0(tavor_mcg_qplist_add_too_many_qps,
1218 1213 TAVOR_TNF_ERROR, "");
1219 1214 TAVOR_TNF_EXIT(tavor_mcg_qplist_add);
1220 1215 return (IBT_HCA_MCG_QP_EXCEEDED);
1221 1216 }
1222 1217
1223 1218 /*
1224 1219 * Determine if the QP is already attached to this MCG table. If it
1225 1220 * is, then we break out and treat this operation as a NO-OP
1226 1221 */
1227 1222 for (qplist_indx = 0; qplist_indx < mcg->mcg_num_qps;
1228 1223 qplist_indx++) {
1229 1224 if (mcg_qplist[qplist_indx].qpn == qp->qp_qpnum) {
1230 1225 break;
1231 1226 }
1232 1227 }
1233 1228
1234 1229 /*
1235 1230 * If the QP was already on the list, set 'qp_found' to TRUE. We still
1236 1231 * return SUCCESS in this case, but the qplist will not have been
1237 1232 * updated because the QP was already on the list.
1238 1233 */
1239 1234 if (qplist_indx < mcg->mcg_num_qps) {
1240 1235 *qp_found = 1;
1241 1236 } else {
1242 1237 /*
1243 1238 * Otherwise, append the new QP number to the end of the
1244 1239 * current QP list. Note: We will increment the "mcg_num_qps"
1245 1240 * field on the "shadow" MCG list entry later (after we know
1246 1241 * that all necessary Tavor firmware accesses have been
1247 1242 * successful).
1248 1243 *
1249 1244 * Set 'qp_found' to 0 so we know the QP was added on to the
1250 1245 * list for sure.
1251 1246 */
1252 1247 mcg_qplist[qplist_indx].q = TAVOR_MCG_QPN_VALID;
1253 1248 mcg_qplist[qplist_indx].qpn = qp->qp_qpnum;
1254 1249 *qp_found = 0;
1255 1250 }
1256 1251
1257 1252 TAVOR_TNF_EXIT(tavor_mcg_qplist_add);
1258 1253 return (DDI_SUCCESS);
1259 1254 }
1260 1255
1261 1256
1262 1257
1263 1258 /*
1264 1259 * tavor_mcg_qplist_remove()
1265 1260 * Context: Can be called from interrupt or base context.
1266 1261 */
1267 1262 static int
1268 1263 tavor_mcg_qplist_remove(tavor_mcghdl_t mcg, tavor_hw_mcg_qp_list_t *mcg_qplist,
1269 1264 tavor_qphdl_t qp)
1270 1265 {
1271 1266 uint_t i, qplist_indx;
1272 1267
1273 1268 TAVOR_TNF_ENTER(tavor_mcg_qplist_remove);
1274 1269
1275 1270 /*
1276 1271 * Search the MCG QP list for a matching QPN. When
1277 1272 * it's found, we swap the last entry with the current
1278 1273 * one, set the last entry to zero, decrement the last
1279 1274 * entry, and return. If it's not found, then it's
1280 1275 * and error.
1281 1276 */
1282 1277 qplist_indx = mcg->mcg_num_qps;
1283 1278 for (i = 0; i < qplist_indx; i++) {
1284 1279 if (mcg_qplist[i].qpn == qp->qp_qpnum) {
1285 1280 mcg_qplist[i] = mcg_qplist[qplist_indx - 1];
1286 1281 mcg_qplist[qplist_indx - 1].q = TAVOR_MCG_QPN_INVALID;
1287 1282 mcg_qplist[qplist_indx - 1].qpn = 0;
1288 1283
1289 1284 TAVOR_TNF_EXIT(tavor_mcg_qplist_remove);
1290 1285 return (DDI_SUCCESS);
1291 1286 }
1292 1287 }
1293 1288
1294 1289 TNF_PROBE_0(tavor_mcg_qplist_remove_invqphdl_fail, TAVOR_TNF_ERROR, "");
1295 1290 TAVOR_TNF_EXIT(tavor_mcg_qplist_remove);
1296 1291 return (IBT_QP_HDL_INVALID);
1297 1292 }
1298 1293
1299 1294
1300 1295 /*
1301 1296 * tavor_mcg_walk_mgid_hash()
1302 1297 * Context: Can be called from interrupt or base context.
1303 1298 */
1304 1299 static uint_t
1305 1300 tavor_mcg_walk_mgid_hash(tavor_state_t *state, uint64_t start_indx,
1306 1301 ib_gid_t mgid, uint_t *p_indx)
1307 1302 {
1308 1303 tavor_mcghdl_t curr_mcghdl;
1309 1304 uint_t curr_indx, prev_indx;
1310 1305
1311 1306 TAVOR_TNF_ENTER(tavor_mcg_walk_mgid_hash);
1312 1307
1313 1308 ASSERT(MUTEX_HELD(&state->ts_mcglock));
1314 1309
1315 1310 /* Start at the head of the hash chain */
1316 1311 curr_indx = start_indx;
1317 1312 prev_indx = curr_indx;
1318 1313 curr_mcghdl = &state->ts_mcghdl[curr_indx];
1319 1314
1320 1315 /* If the first entry in the chain has MGID == 0, then stop */
1321 1316 if ((curr_mcghdl->mcg_mgid_h == 0) &&
1322 1317 (curr_mcghdl->mcg_mgid_l == 0)) {
1323 1318 goto end_mgid_hash_walk;
1324 1319 }
1325 1320
1326 1321 /* If the first entry in the chain matches the MGID, then stop */
1327 1322 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1328 1323 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1329 1324 goto end_mgid_hash_walk;
1330 1325 }
1331 1326
1332 1327 /* Otherwise, walk the hash chain looking for a match */
1333 1328 while (curr_mcghdl->mcg_next_indx != 0) {
1334 1329 prev_indx = curr_indx;
1335 1330 curr_indx = curr_mcghdl->mcg_next_indx;
1336 1331 curr_mcghdl = &state->ts_mcghdl[curr_indx];
1337 1332
1338 1333 if ((curr_mcghdl->mcg_mgid_h == mgid.gid_prefix) &&
1339 1334 (curr_mcghdl->mcg_mgid_l == mgid.gid_guid)) {
1340 1335 break;
1341 1336 }
1342 1337 }
1343 1338
1344 1339 end_mgid_hash_walk:
1345 1340 /*
1346 1341 * If necessary, return the index of the previous entry too. This
1347 1342 * is primarily used for detaching a QP from a multicast group. It
1348 1343 * may be necessary, in that case, to delete an MCG entry from the
1349 1344 * hash chain and having the index of the previous entry is helpful.
1350 1345 */
1351 1346 if (p_indx != NULL) {
1352 1347 *p_indx = prev_indx;
1353 1348 }
1354 1349 TAVOR_TNF_EXIT(tavor_mcg_walk_mgid_hash);
1355 1350 return (curr_indx);
1356 1351 }
1357 1352
1358 1353
1359 1354 /*
1360 1355 * tavor_mcg_setup_new_hdr()
1361 1356 * Context: Can be called from interrupt or base context.
1362 1357 */
1363 1358 static void
1364 1359 tavor_mcg_setup_new_hdr(tavor_mcghdl_t mcg, tavor_hw_mcg_t *mcg_hdr,
1365 1360 ib_gid_t mgid, tavor_rsrc_t *mcg_rsrc)
1366 1361 {
1367 1362 TAVOR_TNF_ENTER(tavor_mcg_setup_new_hdr);
1368 1363
1369 1364 /*
1370 1365 * Fill in the fields of the "shadow" entry used by software
1371 1366 * to track MCG hardware entry
1372 1367 */
1373 1368 mcg->mcg_mgid_h = mgid.gid_prefix;
1374 1369 mcg->mcg_mgid_l = mgid.gid_guid;
1375 1370 mcg->mcg_rsrcp = mcg_rsrc;
1376 1371 mcg->mcg_next_indx = 0;
1377 1372 mcg->mcg_num_qps = 0;
1378 1373
1379 1374 /*
1380 1375 * Fill the header fields of the MCG entry (in the temporary copy)
1381 1376 */
1382 1377 mcg_hdr->mgid_h = mgid.gid_prefix;
1383 1378 mcg_hdr->mgid_l = mgid.gid_guid;
1384 1379 mcg_hdr->next_gid_indx = 0;
1385 1380
1386 1381 TAVOR_TNF_EXIT(tavor_mcg_setup_new_hdr);
1387 1382 }
1388 1383
1389 1384
1390 1385 /*
1391 1386 * tavor_mcg_hash_list_remove()
1392 1387 * Context: Can be called only from user or kernel context.
1393 1388 */
1394 1389 static int
1395 1390 tavor_mcg_hash_list_remove(tavor_state_t *state, uint_t curr_indx,
1396 1391 uint_t prev_indx, tavor_hw_mcg_t *mcg_entry)
1397 1392 {
1398 1393 tavor_mcghdl_t curr_mcg, prev_mcg, next_mcg;
1399 1394 uint_t next_indx;
1400 1395 int status;
1401 1396
1402 1397 /* Get the pointer to "shadow" list for current entry */
1403 1398 curr_mcg = &state->ts_mcghdl[curr_indx];
1404 1399
1405 1400 /*
1406 1401 * If this is the first entry on a hash chain, then attempt to replace
1407 1402 * the entry with the next entry on the chain. If there are no
1408 1403 * subsequent entries on the chain, then this is the only entry and
1409 1404 * should be invalidated.
1410 1405 */
1411 1406 if (curr_indx == prev_indx) {
1412 1407
1413 1408 /*
1414 1409 * If this is the only entry on the chain, then invalidate it.
1415 1410 * Note: Invalidating an MCG entry means writing all zeros
1416 1411 * to the entry. This is only necessary for those MCG
1417 1412 * entries that are the "head" entries of the individual hash
1418 1413 * chains. Regardless of whether this operation returns
1419 1414 * success or failure, return that result to the caller.
1420 1415 */
1421 1416 next_indx = curr_mcg->mcg_next_indx;
1422 1417 if (next_indx == 0) {
1423 1418 status = tavor_mcg_entry_invalidate(state, mcg_entry,
1424 1419 curr_indx);
1425 1420 bzero(curr_mcg, sizeof (struct tavor_sw_mcg_list_s));
1426 1421 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1427 1422 return (status);
1428 1423 }
1429 1424
1430 1425 /*
1431 1426 * Otherwise, this is just the first entry on the chain, so
1432 1427 * grab the next one
1433 1428 */
1434 1429 next_mcg = &state->ts_mcghdl[next_indx];
1435 1430
1436 1431 /*
1437 1432 * Read the next MCG entry into the temporary MCG. Note:
1438 1433 * In general, this operation shouldn't fail. If it does,
1439 1434 * then it is an indication that something (probably in HW,
1440 1435 * but maybe in SW) has gone seriously wrong.
1441 1436 */
1442 1437 status = tavor_read_mgm_cmd_post(state, mcg_entry, next_indx,
1443 1438 TAVOR_CMD_NOSLEEP_SPIN);
1444 1439 if (status != TAVOR_CMD_SUCCESS) {
1445 1440 TAVOR_WARNING(state, "failed to read MCG entry");
1446 1441 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: "
1447 1442 "%08x\n", status);
1448 1443 TNF_PROBE_2(tavor_mcg_hash_list_rem_read_mgm_cmd_fail,
1449 1444 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1450 1445 tnf_uint, indx, next_indx);
1451 1446 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1452 1447 return (ibc_get_ci_failure(0));
1453 1448 }
1454 1449
1455 1450 /*
1456 1451 * Copy/Write the temporary MCG back to the hardware MCG list
1457 1452 * using the current index. This essentially removes the
1458 1453 * current MCG entry from the list by writing over it with
1459 1454 * the next one. If this is successful, then we can do the
1460 1455 * same operation for the "shadow" list. And we can also
1461 1456 * free up the Tavor MCG entry resource that was associated
1462 1457 * with the (old) next entry. Note: In general, this
1463 1458 * operation shouldn't fail. If it does, then it is an
1464 1459 * indication that something (probably in HW, but maybe in SW)
1465 1460 * has gone seriously wrong.
1466 1461 */
1467 1462 status = tavor_write_mgm_cmd_post(state, mcg_entry, curr_indx,
1468 1463 TAVOR_CMD_NOSLEEP_SPIN);
1469 1464 if (status != TAVOR_CMD_SUCCESS) {
1470 1465 TAVOR_WARNING(state, "failed to write MCG entry");
1471 1466 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: "
1472 1467 "%08x\n", status);
1473 1468 TNF_PROBE_2(tavor_mcg_hash_list_rem_write_mgm_cmd_fail,
1474 1469 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1475 1470 tnf_uint, indx, curr_indx);
1476 1471 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1477 1472 return (ibc_get_ci_failure(0));
1478 1473 }
1479 1474
1480 1475 /*
1481 1476 * Copy all the software tracking information from the next
1482 1477 * entry on the "shadow" MCG list into the current entry on
1483 1478 * the list. Then invalidate (zero out) the other "shadow"
1484 1479 * list entry.
1485 1480 */
1486 1481 bcopy(next_mcg, curr_mcg, sizeof (struct tavor_sw_mcg_list_s));
1487 1482 bzero(next_mcg, sizeof (struct tavor_sw_mcg_list_s));
1488 1483
1489 1484 /*
1490 1485 * Free up the Tavor MCG entry resource used by the "next"
1491 1486 * MCG entry. That resource is no longer needed by any
1492 1487 * MCG entry which is first on a hash chain (like the "next"
1493 1488 * entry has just become).
1494 1489 */
1495 1490 tavor_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1496 1491
1497 1492 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1498 1493 return (DDI_SUCCESS);
1499 1494 }
1500 1495
1501 1496 /*
1502 1497 * Else if this is the last entry on the hash chain (or a middle
1503 1498 * entry, then we update the previous entry's "next_gid_index" field
1504 1499 * to make it point instead to the next entry on the chain. By
1505 1500 * skipping over the removed entry in this way, we can then free up
1506 1501 * any resources associated with the current entry. Note: We don't
1507 1502 * need to invalidate the "skipped over" hardware entry because it
1508 1503 * will no be longer connected to any hash chains, and if/when it is
1509 1504 * finally re-used, it will be written with entirely new values.
1510 1505 */
1511 1506
1512 1507 /*
1513 1508 * Read the next MCG entry into the temporary MCG. Note: In general,
1514 1509 * this operation shouldn't fail. If it does, then it is an
1515 1510 * indication that something (probably in HW, but maybe in SW) has
1516 1511 * gone seriously wrong.
1517 1512 */
1518 1513 status = tavor_read_mgm_cmd_post(state, mcg_entry, prev_indx,
1519 1514 TAVOR_CMD_NOSLEEP_SPIN);
1520 1515 if (status != TAVOR_CMD_SUCCESS) {
1521 1516 TAVOR_WARNING(state, "failed to read MCG entry");
1522 1517 cmn_err(CE_CONT, "Tavor: READ_MGM command failed: %08x\n",
1523 1518 status);
1524 1519 TNF_PROBE_2(tavor_mcg_hash_list_rem_read_mgm_cmd_fail,
1525 1520 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1526 1521 tnf_uint, indx, prev_indx);
1527 1522 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1528 1523 return (ibc_get_ci_failure(0));
1529 1524 }
1530 1525
1531 1526 /*
1532 1527 * Finally, we update the "next_gid_indx" field in the temporary MCG
1533 1528 * and attempt to write the entry back into the Tavor MCG table. If
1534 1529 * this succeeds, then we update the "shadow" list to reflect the
1535 1530 * change, free up the Tavor MCG entry resource that was associated
1536 1531 * with the current entry, and return success. Note: In general,
1537 1532 * this operation shouldn't fail. If it does, then it is an indication
1538 1533 * that something (probably in HW, but maybe in SW) has gone seriously
1539 1534 * wrong.
1540 1535 */
1541 1536 mcg_entry->next_gid_indx = curr_mcg->mcg_next_indx;
1542 1537 status = tavor_write_mgm_cmd_post(state, mcg_entry, prev_indx,
1543 1538 TAVOR_CMD_NOSLEEP_SPIN);
1544 1539 if (status != TAVOR_CMD_SUCCESS) {
1545 1540 TAVOR_WARNING(state, "failed to write MCG entry");
1546 1541 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
1547 1542 status);
1548 1543 TNF_PROBE_2(tavor_mcg_hash_list_rem_write_mgm_cmd_fail,
1549 1544 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1550 1545 tnf_uint, indx, prev_indx);
1551 1546 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1552 1547 return (ibc_get_ci_failure(0));
1553 1548 }
1554 1549
1555 1550 /*
1556 1551 * Get the pointer to the "shadow" MCG list entry for the previous
1557 1552 * MCG. Update its "mcg_next_indx" to point to the next entry
1558 1553 * the one after the current entry. Note: This next index may be
1559 1554 * zero, indicating the end of the list.
1560 1555 */
1561 1556 prev_mcg = &state->ts_mcghdl[prev_indx];
1562 1557 prev_mcg->mcg_next_indx = curr_mcg->mcg_next_indx;
1563 1558
1564 1559 /*
1565 1560 * Free up the Tavor MCG entry resource used by the current entry.
1566 1561 * This resource is no longer needed because the chain now skips over
1567 1562 * the current entry. Then invalidate (zero out) the current "shadow"
1568 1563 * list entry.
1569 1564 */
1570 1565 tavor_rsrc_free(state, &curr_mcg->mcg_rsrcp);
1571 1566 bzero(curr_mcg, sizeof (struct tavor_sw_mcg_list_s));
1572 1567
1573 1568 TAVOR_TNF_EXIT(tavor_mcg_hash_list_remove);
1574 1569 return (DDI_SUCCESS);
1575 1570 }
1576 1571
1577 1572
1578 1573 /*
1579 1574 * tavor_mcg_entry_invalidate()
1580 1575 * Context: Can be called only from user or kernel context.
1581 1576 */
1582 1577 static int
1583 1578 tavor_mcg_entry_invalidate(tavor_state_t *state, tavor_hw_mcg_t *mcg_entry,
1584 1579 uint_t indx)
1585 1580 {
1586 1581 int status;
1587 1582
1588 1583 TAVOR_TNF_ENTER(tavor_mcg_entry_invalidate);
1589 1584
1590 1585 /*
1591 1586 * Invalidate the hardware MCG entry by zeroing out this temporary
1592 1587 * MCG and writing it the the hardware. Note: In general, this
1593 1588 * operation shouldn't fail. If it does, then it is an indication
1594 1589 * that something (probably in HW, but maybe in SW) has gone seriously
1595 1590 * wrong.
1596 1591 */
1597 1592 bzero(mcg_entry, TAVOR_MCGMEM_SZ(state));
1598 1593 status = tavor_write_mgm_cmd_post(state, mcg_entry, indx,
1599 1594 TAVOR_CMD_NOSLEEP_SPIN);
1600 1595 if (status != TAVOR_CMD_SUCCESS) {
1601 1596 TAVOR_WARNING(state, "failed to write MCG entry");
1602 1597 cmn_err(CE_CONT, "Tavor: WRITE_MGM command failed: %08x\n",
1603 1598 status);
1604 1599 TNF_PROBE_2(tavor_mcg_entry_invalidate_write_mgm_cmd_fail,
1605 1600 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status,
1606 1601 tnf_uint, indx, indx);
1607 1602 TAVOR_TNF_EXIT(tavor_mcg_entry_invalidate);
1608 1603 return (ibc_get_ci_failure(0));
1609 1604 }
1610 1605
1611 1606 TAVOR_TNF_EXIT(tavor_mcg_entry_invalidate);
1612 1607 return (DDI_SUCCESS);
1613 1608 }
1614 1609
1615 1610
1616 1611 /*
1617 1612 * tavor_mgid_is_valid()
1618 1613 * Context: Can be called from interrupt or base context.
1619 1614 */
1620 1615 static int
1621 1616 tavor_mgid_is_valid(ib_gid_t gid)
1622 1617 {
1623 1618 uint_t topbits, flags, scope;
1624 1619
1625 1620 TAVOR_TNF_ENTER(tavor_mgid_is_valid);
1626 1621
1627 1622 /*
1628 1623 * According to IBA 1.1 specification (section 4.1.1) a valid
1629 1624 * "multicast GID" must have its top eight bits set to all ones
1630 1625 */
1631 1626 topbits = (gid.gid_prefix >> TAVOR_MCG_TOPBITS_SHIFT) &
1632 1627 TAVOR_MCG_TOPBITS_MASK;
1633 1628 if (topbits != TAVOR_MCG_TOPBITS) {
1634 1629 TNF_PROBE_0(tavor_mgid_is_valid_invbits_fail, TAVOR_TNF_ERROR,
1635 1630 "");
1636 1631 TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1637 1632 return (0);
1638 1633 }
1639 1634
1640 1635 /*
1641 1636 * The next 4 bits are the "flag" bits. These are valid only
1642 1637 * if they are "0" (which correspond to permanently assigned/
1643 1638 * "well-known" multicast GIDs) or "1" (for so-called "transient"
1644 1639 * multicast GIDs). All other values are reserved.
1645 1640 */
1646 1641 flags = (gid.gid_prefix >> TAVOR_MCG_FLAGS_SHIFT) &
1647 1642 TAVOR_MCG_FLAGS_MASK;
1648 1643 if (!((flags == TAVOR_MCG_FLAGS_PERM) ||
1649 1644 (flags == TAVOR_MCG_FLAGS_NONPERM))) {
1650 1645 TNF_PROBE_1(tavor_mgid_is_valid_invflags_fail, TAVOR_TNF_ERROR,
1651 1646 "", tnf_uint, flags, flags);
1652 1647 TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1653 1648 return (0);
1654 1649 }
1655 1650
1656 1651 /*
1657 1652 * The next 4 bits are the "scope" bits. These are valid only
1658 1653 * if they are "2" (Link-local), "5" (Site-local), "8"
1659 1654 * (Organization-local) or "E" (Global). All other values
1660 1655 * are reserved (or currently unassigned).
1661 1656 */
1662 1657 scope = (gid.gid_prefix >> TAVOR_MCG_SCOPE_SHIFT) &
1663 1658 TAVOR_MCG_SCOPE_MASK;
1664 1659 if (!((scope == TAVOR_MCG_SCOPE_LINKLOC) ||
1665 1660 (scope == TAVOR_MCG_SCOPE_SITELOC) ||
1666 1661 (scope == TAVOR_MCG_SCOPE_ORGLOC) ||
1667 1662 (scope == TAVOR_MCG_SCOPE_GLOBAL))) {
1668 1663 TNF_PROBE_1(tavor_mgid_is_valid_invscope_fail, TAVOR_TNF_ERROR,
1669 1664 "", tnf_uint, scope, scope);
1670 1665 TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1671 1666 return (0);
1672 1667 }
1673 1668
1674 1669 /*
1675 1670 * If it passes all of the above checks, then we will consider it
1676 1671 * a valid multicast GID.
1677 1672 */
1678 1673 TAVOR_TNF_EXIT(tavor_mgid_is_valid);
1679 1674 return (1);
1680 1675 }
1681 1676
1682 1677
1683 1678 /*
1684 1679 * tavor_mlid_is_valid()
1685 1680 * Context: Can be called from interrupt or base context.
1686 1681 */
1687 1682 static int
1688 1683 tavor_mlid_is_valid(ib_lid_t lid)
1689 1684 {
1690 1685 TAVOR_TNF_ENTER(tavor_mlid_is_valid);
1691 1686
1692 1687 /*
1693 1688 * According to IBA 1.1 specification (section 4.1.1) a valid
1694 1689 * "multicast DLID" must be between 0xC000 and 0xFFFE.
1695 1690 */
1696 1691 if ((lid < IB_LID_MC_FIRST) || (lid > IB_LID_MC_LAST)) {
1697 1692 TNF_PROBE_1(tavor_mlid_is_valid_invdlid_fail, TAVOR_TNF_ERROR,
1698 1693 "", tnf_uint, mlid, lid);
1699 1694 TAVOR_TNF_EXIT(tavor_mlid_is_valid);
1700 1695 return (0);
1701 1696 }
1702 1697
1703 1698 TAVOR_TNF_EXIT(tavor_mlid_is_valid);
1704 1699 return (1);
1705 1700 }
1706 1701
1707 1702
1708 1703 /*
1709 1704 * tavor_pd_alloc()
1710 1705 * Context: Can be called only from user or kernel context.
1711 1706 */
1712 1707 int
1713 1708 tavor_pd_alloc(tavor_state_t *state, tavor_pdhdl_t *pdhdl, uint_t sleepflag)
1714 1709 {
1715 1710 tavor_rsrc_t *rsrc;
1716 1711 tavor_pdhdl_t pd;
1717 1712 int status;
1718 1713
1719 1714 TAVOR_TNF_ENTER(tavor_pd_alloc);
1720 1715
1721 1716 /*
1722 1717 * Allocate the software structure for tracking the protection domain
1723 1718 * (i.e. the Tavor Protection Domain handle). By default each PD
1724 1719 * structure will have a unique PD number assigned to it. All that
↓ open down ↓ |
1193 lines elided |
↑ open up ↑ |
1725 1720 * is necessary is for software to initialize the PD reference count
1726 1721 * (to zero) and return success.
1727 1722 */
1728 1723 status = tavor_rsrc_alloc(state, TAVOR_PDHDL, 1, sleepflag, &rsrc);
1729 1724 if (status != DDI_SUCCESS) {
1730 1725 TNF_PROBE_0(tavor_pd_alloc_rsrcalloc_fail, TAVOR_TNF_ERROR, "");
1731 1726 TAVOR_TNF_EXIT(tavor_pd_alloc);
1732 1727 return (IBT_INSUFF_RESOURCE);
1733 1728 }
1734 1729 pd = (tavor_pdhdl_t)rsrc->tr_addr;
1735 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1736 1730
1737 1731 pd->pd_refcnt = 0;
1738 1732 *pdhdl = pd;
1739 1733
1740 1734 TAVOR_TNF_EXIT(tavor_pd_alloc);
1741 1735 return (DDI_SUCCESS);
1742 1736 }
1743 1737
1744 1738
1745 1739 /*
1746 1740 * tavor_pd_free()
1747 1741 * Context: Can be called only from user or kernel context.
1748 1742 */
1749 1743 int
1750 1744 tavor_pd_free(tavor_state_t *state, tavor_pdhdl_t *pdhdl)
1751 1745 {
1752 1746 tavor_rsrc_t *rsrc;
↓ open down ↓ |
7 lines elided |
↑ open up ↑ |
1753 1747 tavor_pdhdl_t pd;
1754 1748
1755 1749 TAVOR_TNF_ENTER(tavor_pd_free);
1756 1750
1757 1751 /*
1758 1752 * Pull all the necessary information from the Tavor Protection Domain
1759 1753 * handle. This is necessary here because the resource for the
1760 1754 * PD is going to be freed up as part of this operation.
1761 1755 */
1762 1756 pd = *pdhdl;
1763 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd))
1764 1757 rsrc = pd->pd_rsrcp;
1765 1758
1766 1759 /*
1767 1760 * Check the PD reference count. If the reference count is non-zero,
1768 1761 * then it means that this protection domain is still referenced by
1769 1762 * some memory region, queue pair, address handle, or other IB object
1770 1763 * If it is non-zero, then return an error. Otherwise, free the
1771 1764 * Tavor resource and return success.
1772 1765 */
1773 1766 if (pd->pd_refcnt != 0) {
1774 1767 TNF_PROBE_1(tavor_pd_free_refcnt_fail, TAVOR_TNF_ERROR, "",
1775 1768 tnf_int, refcnt, pd->pd_refcnt);
1776 1769 TAVOR_TNF_EXIT(tavor_pd_free);
1777 1770 return (IBT_PD_IN_USE);
1778 1771 }
1779 1772
1780 1773 /* Free the Tavor Protection Domain handle */
1781 1774 tavor_rsrc_free(state, &rsrc);
1782 1775
1783 1776 /* Set the pdhdl pointer to NULL and return success */
1784 1777 *pdhdl = (tavor_pdhdl_t)NULL;
1785 1778
1786 1779 TAVOR_TNF_EXIT(tavor_pd_free);
1787 1780 return (DDI_SUCCESS);
1788 1781 }
1789 1782
1790 1783
1791 1784 /*
1792 1785 * tavor_pd_refcnt_inc()
1793 1786 * Context: Can be called from interrupt or base context.
1794 1787 */
1795 1788 void
1796 1789 tavor_pd_refcnt_inc(tavor_pdhdl_t pd)
1797 1790 {
1798 1791 /* Increment the protection domain's reference count */
1799 1792 mutex_enter(&pd->pd_lock);
1800 1793 TNF_PROBE_1_DEBUG(tavor_pd_refcnt_inc, TAVOR_TNF_TRACE, "",
1801 1794 tnf_uint, refcnt, pd->pd_refcnt);
1802 1795 pd->pd_refcnt++;
1803 1796 mutex_exit(&pd->pd_lock);
1804 1797
1805 1798 }
1806 1799
1807 1800
1808 1801 /*
1809 1802 * tavor_pd_refcnt_dec()
1810 1803 * Context: Can be called from interrupt or base context.
1811 1804 */
1812 1805 void
1813 1806 tavor_pd_refcnt_dec(tavor_pdhdl_t pd)
1814 1807 {
1815 1808 /* Decrement the protection domain's reference count */
1816 1809 mutex_enter(&pd->pd_lock);
1817 1810 pd->pd_refcnt--;
1818 1811 TNF_PROBE_1_DEBUG(tavor_pd_refcnt_dec, TAVOR_TNF_TRACE, "",
1819 1812 tnf_uint, refcnt, pd->pd_refcnt);
1820 1813 mutex_exit(&pd->pd_lock);
1821 1814
1822 1815 }
1823 1816
1824 1817
1825 1818 /*
1826 1819 * tavor_port_query()
1827 1820 * Context: Can be called only from user or kernel context.
1828 1821 */
1829 1822 int
1830 1823 tavor_port_query(tavor_state_t *state, uint_t port, ibt_hca_portinfo_t *pi)
↓ open down ↓ |
57 lines elided |
↑ open up ↑ |
1831 1824 {
1832 1825 sm_portinfo_t portinfo;
1833 1826 sm_guidinfo_t guidinfo;
1834 1827 sm_pkey_table_t pkeytable;
1835 1828 ib_gid_t *sgid;
1836 1829 uint_t sgid_max, pkey_max, tbl_size;
1837 1830 int i, j, indx, status;
1838 1831
1839 1832 TAVOR_TNF_ENTER(tavor_port_query);
1840 1833
1841 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pi))
1842 -
1843 1834 /* Validate that specified port number is legal */
1844 1835 if (!tavor_portnum_is_valid(state, port)) {
1845 1836 TNF_PROBE_1(tavor_port_query_inv_portnum_fail,
1846 1837 TAVOR_TNF_ERROR, "", tnf_uint, port, port);
1847 1838 TAVOR_TNF_EXIT(tavor_port_query);
1848 1839 return (IBT_HCA_PORT_INVALID);
1849 1840 }
1850 1841
1851 1842 /*
1852 1843 * We use the Tavor MAD_IFC command to post a GetPortInfo MAD
1853 1844 * to the firmware (for the specified port number). This returns
1854 1845 * a full PortInfo MAD (in "portinfo") which we subsequently
1855 1846 * parse to fill in the "ibt_hca_portinfo_t" structure returned
1856 1847 * to the IBTF.
1857 1848 */
1858 1849 status = tavor_getportinfo_cmd_post(state, port,
1859 1850 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
1860 1851 if (status != TAVOR_CMD_SUCCESS) {
1861 1852 cmn_err(CE_CONT, "Tavor: GetPortInfo (port %02d) command "
1862 1853 "failed: %08x\n", port, status);
1863 1854 TNF_PROBE_1(tavor_port_query_getportinfo_cmd_fail,
1864 1855 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1865 1856 TAVOR_TNF_EXIT(tavor_port_query);
1866 1857 return (ibc_get_ci_failure(0));
1867 1858 }
1868 1859
1869 1860 /*
1870 1861 * Parse the PortInfo MAD and fill in the IBTF structure
1871 1862 */
1872 1863 pi->p_base_lid = portinfo.LID;
1873 1864 pi->p_qkey_violations = portinfo.Q_KeyViolations;
1874 1865 pi->p_pkey_violations = portinfo.P_KeyViolations;
1875 1866 pi->p_sm_sl = portinfo.MasterSMSL;
1876 1867 pi->p_sm_lid = portinfo.MasterSMLID;
1877 1868 pi->p_linkstate = portinfo.PortState;
1878 1869 pi->p_port_num = portinfo.LocalPortNum;
1879 1870 pi->p_phys_state = portinfo.PortPhysicalState;
1880 1871 pi->p_width_supported = portinfo.LinkWidthSupported;
1881 1872 pi->p_width_enabled = portinfo.LinkWidthEnabled;
1882 1873 pi->p_width_active = portinfo.LinkWidthActive;
1883 1874 pi->p_speed_supported = portinfo.LinkSpeedSupported;
1884 1875 pi->p_speed_enabled = portinfo.LinkSpeedEnabled;
1885 1876 pi->p_speed_active = portinfo.LinkSpeedActive;
1886 1877 pi->p_mtu = portinfo.MTUCap;
1887 1878 pi->p_lmc = portinfo.LMC;
1888 1879 pi->p_max_vl = portinfo.VLCap;
1889 1880 pi->p_subnet_timeout = portinfo.SubnetTimeOut;
1890 1881 pi->p_msg_sz = ((uint32_t)1 << TAVOR_QP_LOG_MAX_MSGSZ);
1891 1882 tbl_size = state->ts_cfg_profile->cp_log_max_gidtbl;
1892 1883 pi->p_sgid_tbl_sz = (1 << tbl_size);
1893 1884 tbl_size = state->ts_cfg_profile->cp_log_max_pkeytbl;
1894 1885 pi->p_pkey_tbl_sz = (1 << tbl_size);
1895 1886
1896 1887 /*
1897 1888 * Convert InfiniBand-defined port capability flags to the format
1898 1889 * specified by the IBTF
1899 1890 */
1900 1891 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM)
1901 1892 pi->p_capabilities |= IBT_PORT_CAP_SM;
1902 1893 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SM_DISABLED)
1903 1894 pi->p_capabilities |= IBT_PORT_CAP_SM_DISABLED;
1904 1895 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_SNMP_SUPPD)
1905 1896 pi->p_capabilities |= IBT_PORT_CAP_SNMP_TUNNEL;
1906 1897 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_DM_SUPPD)
1907 1898 pi->p_capabilities |= IBT_PORT_CAP_DM;
1908 1899 if (portinfo.CapabilityMask & SM_CAP_MASK_IS_VM_SUPPD)
1909 1900 pi->p_capabilities |= IBT_PORT_CAP_VENDOR;
1910 1901
1911 1902 /*
1912 1903 * Fill in the SGID table. Since the only access to the Tavor
1913 1904 * GID tables is through the firmware's MAD_IFC interface, we
1914 1905 * post as many GetGUIDInfo MADs as necessary to read in the entire
1915 1906 * contents of the SGID table (for the specified port). Note: The
1916 1907 * GetGUIDInfo command only gets eight GUIDs per operation. These
1917 1908 * GUIDs are then appended to the GID prefix for the port (from the
1918 1909 * GetPortInfo above) to form the entire SGID table.
1919 1910 */
1920 1911 for (i = 0; i < pi->p_sgid_tbl_sz; i += 8) {
1921 1912 status = tavor_getguidinfo_cmd_post(state, port, i >> 3,
1922 1913 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &guidinfo);
1923 1914 if (status != TAVOR_CMD_SUCCESS) {
1924 1915 cmn_err(CE_CONT, "Tavor: GetGUIDInfo (port %02d) "
1925 1916 "command failed: %08x\n", port, status);
1926 1917 TNF_PROBE_1(tavor_port_query_getguidinfo_cmd_fail,
↓ open down ↓ |
74 lines elided |
↑ open up ↑ |
1927 1918 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1928 1919 TAVOR_TNF_EXIT(tavor_port_query);
1929 1920 return (ibc_get_ci_failure(0));
1930 1921 }
1931 1922
1932 1923 /* Figure out how many of the entries are valid */
1933 1924 sgid_max = min((pi->p_sgid_tbl_sz - i), 8);
1934 1925 for (j = 0; j < sgid_max; j++) {
1935 1926 indx = (i + j);
1936 1927 sgid = &pi->p_sgid_tbl[indx];
1937 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sgid))
1938 1928 sgid->gid_prefix = portinfo.GidPrefix;
1939 1929 sgid->gid_guid = guidinfo.GUIDBlocks[j];
1940 1930 }
1941 1931 }
1942 1932
1943 1933 /*
1944 1934 * Fill in the PKey table. Just as for the GID tables above, the
1945 1935 * only access to the Tavor PKey tables is through the firmware's
1946 1936 * MAD_IFC interface. We post as many GetPKeyTable MADs as necessary
1947 1937 * to read in the entire contents of the PKey table (for the specified
1948 1938 * port). Note: The GetPKeyTable command only gets 32 PKeys per
1949 1939 * operation.
1950 1940 */
1951 1941 for (i = 0; i < pi->p_pkey_tbl_sz; i += 32) {
1952 1942 status = tavor_getpkeytable_cmd_post(state, port, i,
1953 1943 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &pkeytable);
1954 1944 if (status != TAVOR_CMD_SUCCESS) {
1955 1945 cmn_err(CE_CONT, "Tavor: GetPKeyTable (port %02d) "
1956 1946 "command failed: %08x\n", port, status);
1957 1947 TNF_PROBE_1(tavor_port_query_getpkeytable_cmd_fail,
1958 1948 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
1959 1949 TAVOR_TNF_EXIT(tavor_port_query);
1960 1950 return (ibc_get_ci_failure(0));
1961 1951 }
1962 1952
1963 1953 /* Figure out how many of the entries are valid */
1964 1954 pkey_max = min((pi->p_pkey_tbl_sz - i), 32);
1965 1955 for (j = 0; j < pkey_max; j++) {
1966 1956 indx = (i + j);
1967 1957 pi->p_pkey_tbl[indx] = pkeytable.P_KeyTableBlocks[j];
1968 1958 }
1969 1959 }
1970 1960
1971 1961 TAVOR_TNF_EXIT(tavor_port_query);
1972 1962 return (DDI_SUCCESS);
1973 1963 }
1974 1964
1975 1965
1976 1966 /*
1977 1967 * tavor_port_modify()
1978 1968 * Context: Can be called only from user or kernel context.
1979 1969 */
1980 1970 /* ARGSUSED */
1981 1971 int
1982 1972 tavor_port_modify(tavor_state_t *state, uint8_t port,
1983 1973 ibt_port_modify_flags_t flags, uint8_t init_type)
1984 1974 {
1985 1975 sm_portinfo_t portinfo;
1986 1976 uint32_t capmask, reset_qkey;
1987 1977 int status;
1988 1978
1989 1979 TAVOR_TNF_ENTER(tavor_port_modify);
1990 1980
1991 1981 /*
1992 1982 * Return an error if either of the unsupported flags are set
1993 1983 */
1994 1984 if ((flags & IBT_PORT_SHUTDOWN) ||
1995 1985 (flags & IBT_PORT_SET_INIT_TYPE)) {
1996 1986 TNF_PROBE_1(tavor_port_modify_inv_flags_fail,
1997 1987 TAVOR_TNF_ERROR, "", tnf_uint, flags, flags);
1998 1988 TAVOR_TNF_EXIT(tavor_port_modify);
1999 1989 return (IBT_NOT_SUPPORTED);
2000 1990 }
2001 1991
2002 1992 /*
2003 1993 * Determine whether we are trying to reset the QKey counter
2004 1994 */
2005 1995 reset_qkey = (flags & IBT_PORT_RESET_QKEY) ? 1 : 0;
2006 1996
2007 1997 /* Validate that specified port number is legal */
2008 1998 if (!tavor_portnum_is_valid(state, port)) {
2009 1999 TNF_PROBE_1(tavor_port_modify_inv_portnum_fail,
2010 2000 TAVOR_TNF_ERROR, "", tnf_uint, port, port);
2011 2001 TAVOR_TNF_EXIT(tavor_port_modify);
2012 2002 return (IBT_HCA_PORT_INVALID);
2013 2003 }
2014 2004
2015 2005 /*
2016 2006 * Use the Tavor MAD_IFC command to post a GetPortInfo MAD to the
2017 2007 * firmware (for the specified port number). This returns a full
2018 2008 * PortInfo MAD (in "portinfo") from which we pull the current
2019 2009 * capability mask. We then modify the capability mask as directed
2020 2010 * by the "pmod_flags" field, and write the updated capability mask
2021 2011 * using the Tavor SET_IB command (below).
2022 2012 */
2023 2013 status = tavor_getportinfo_cmd_post(state, port,
2024 2014 TAVOR_SLEEPFLAG_FOR_CONTEXT(), &portinfo);
2025 2015 if (status != TAVOR_CMD_SUCCESS) {
2026 2016 TNF_PROBE_1(tavor_port_modify_getportinfo_cmd_fail,
2027 2017 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
2028 2018 TAVOR_TNF_EXIT(tavor_port_modify);
2029 2019 return (ibc_get_ci_failure(0));
2030 2020 }
2031 2021
2032 2022 /*
2033 2023 * Convert InfiniBand-defined port capability flags to the format
2034 2024 * specified by the IBTF. Specifically, we modify the capability
2035 2025 * mask based on the specified values.
2036 2026 */
2037 2027 capmask = portinfo.CapabilityMask;
2038 2028
2039 2029 if (flags & IBT_PORT_RESET_SM)
2040 2030 capmask &= ~SM_CAP_MASK_IS_SM;
2041 2031 else if (flags & IBT_PORT_SET_SM)
2042 2032 capmask |= SM_CAP_MASK_IS_SM;
2043 2033
2044 2034 if (flags & IBT_PORT_RESET_SNMP)
2045 2035 capmask &= ~SM_CAP_MASK_IS_SNMP_SUPPD;
2046 2036 else if (flags & IBT_PORT_SET_SNMP)
2047 2037 capmask |= SM_CAP_MASK_IS_SNMP_SUPPD;
2048 2038
2049 2039 if (flags & IBT_PORT_RESET_DEVMGT)
2050 2040 capmask &= ~SM_CAP_MASK_IS_DM_SUPPD;
2051 2041 else if (flags & IBT_PORT_SET_DEVMGT)
2052 2042 capmask |= SM_CAP_MASK_IS_DM_SUPPD;
2053 2043
2054 2044 if (flags & IBT_PORT_RESET_VENDOR)
2055 2045 capmask &= ~SM_CAP_MASK_IS_VM_SUPPD;
2056 2046 else if (flags & IBT_PORT_SET_VENDOR)
2057 2047 capmask |= SM_CAP_MASK_IS_VM_SUPPD;
2058 2048
2059 2049 /*
2060 2050 * Use the Tavor SET_IB command to update the capability mask and
2061 2051 * (possibly) reset the QKey violation counter for the specified port.
2062 2052 * Note: In general, this operation shouldn't fail. If it does, then
2063 2053 * it is an indication that something (probably in HW, but maybe in
2064 2054 * SW) has gone seriously wrong.
2065 2055 */
2066 2056 status = tavor_set_ib_cmd_post(state, capmask, port, reset_qkey,
2067 2057 TAVOR_SLEEPFLAG_FOR_CONTEXT());
2068 2058 if (status != TAVOR_CMD_SUCCESS) {
2069 2059 TAVOR_WARNING(state, "failed to modify port capabilities");
2070 2060 cmn_err(CE_CONT, "Tavor: SET_IB (port %02d) command failed: "
2071 2061 "%08x\n", port, status);
2072 2062 TNF_PROBE_1(tavor_port_modify_set_ib_cmd_fail,
2073 2063 TAVOR_TNF_ERROR, "", tnf_uint, cmd_status, status);
2074 2064 TAVOR_TNF_EXIT(tavor_port_modify);
2075 2065 return (ibc_get_ci_failure(0));
2076 2066 }
2077 2067
2078 2068 TAVOR_TNF_EXIT(tavor_port_modify);
2079 2069 return (DDI_SUCCESS);
2080 2070 }
2081 2071
2082 2072
2083 2073 /*
2084 2074 * tavor_set_addr_path()
2085 2075 * Context: Can be called from interrupt or base context.
2086 2076 *
2087 2077 * Note: This routine is used for two purposes. It is used to fill in the
2088 2078 * Tavor UDAV fields, and it is used to fill in the address path information
2089 2079 * for QPs. Because the two Tavor structures are similar, common fields can
2090 2080 * be filled in here. Because they are slightly different, however, we pass
↓ open down ↓ |
143 lines elided |
↑ open up ↑ |
2091 2081 * an additional flag to indicate which type is being filled.
2092 2082 */
2093 2083 int
2094 2084 tavor_set_addr_path(tavor_state_t *state, ibt_adds_vect_t *av,
2095 2085 tavor_hw_addr_path_t *path, uint_t type, tavor_qphdl_t qp)
2096 2086 {
2097 2087 uint_t gidtbl_sz;
2098 2088
2099 2089 TAVOR_TNF_ENTER(tavor_set_addr_path);
2100 2090
2101 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2102 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2103 -
2104 2091 path->ml_path = av->av_src_path;
2105 2092 path->rlid = av->av_dlid;
2106 2093 path->sl = av->av_srvl;
2107 2094
2108 2095 /* Port number only valid (in "av_port_num") if this is a UDAV */
2109 2096 if (type == TAVOR_ADDRPATH_UDAV) {
2110 2097 path->portnum = av->av_port_num;
2111 2098 }
2112 2099
2113 2100 /*
2114 2101 * Validate (and fill in) static rate.
2115 2102 *
2116 2103 * The stat_rate_sup is used to decide how to set the rate and
2117 2104 * if it is zero, the driver uses the old interface.
2118 2105 */
2119 2106 if (state->ts_devlim.stat_rate_sup) {
2120 2107 if (av->av_srate == IBT_SRATE_20) {
2121 2108 path->max_stat_rate = 0; /* 4x@DDR injection rate */
2122 2109 } else if (av->av_srate == IBT_SRATE_5) {
2123 2110 path->max_stat_rate = 3; /* 1x@DDR injection rate */
2124 2111 } else if (av->av_srate == IBT_SRATE_10) {
2125 2112 path->max_stat_rate = 2; /* 4x@SDR injection rate */
2126 2113 } else if (av->av_srate == IBT_SRATE_2) {
2127 2114 path->max_stat_rate = 1; /* 1x@SDR injection rate */
2128 2115 } else if (av->av_srate == IBT_SRATE_NOT_SPECIFIED) {
2129 2116 path->max_stat_rate = 0; /* Max */
2130 2117 } else {
2131 2118 TNF_PROBE_1(tavor_set_addr_path_inv_srate_fail,
2132 2119 TAVOR_TNF_ERROR, "", tnf_uint, srate, av->av_srate);
2133 2120 TAVOR_TNF_EXIT(tavor_set_addr_path);
2134 2121 return (IBT_STATIC_RATE_INVALID);
2135 2122 }
2136 2123 } else {
2137 2124 if (av->av_srate == IBT_SRATE_10) {
2138 2125 path->max_stat_rate = 0; /* 4x@SDR injection rate */
2139 2126 } else if (av->av_srate == IBT_SRATE_2) {
2140 2127 path->max_stat_rate = 1; /* 1x@SDR injection rate */
2141 2128 } else if (av->av_srate == IBT_SRATE_NOT_SPECIFIED) {
2142 2129 path->max_stat_rate = 0; /* Max */
2143 2130 } else {
2144 2131 TNF_PROBE_1(tavor_set_addr_path_inv_srate_fail,
↓ open down ↓ |
31 lines elided |
↑ open up ↑ |
2145 2132 TAVOR_TNF_ERROR, "", tnf_uint, srate, av->av_srate);
2146 2133 TAVOR_TNF_EXIT(tavor_set_addr_path);
2147 2134 return (IBT_STATIC_RATE_INVALID);
2148 2135 }
2149 2136 }
2150 2137
2151 2138 /*
2152 2139 * If this is a QP operation save asoft copy.
2153 2140 */
2154 2141 if (qp) {
2155 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(qp->qp_save_srate))
2156 2142 qp->qp_save_srate = av->av_srate;
2157 2143 }
2158 2144
2159 2145 /* If "grh" flag is set, then check for valid SGID index too */
2160 2146 gidtbl_sz = (1 << state->ts_devlim.log_max_gid);
2161 2147 if ((av->av_send_grh) && (av->av_sgid_ix > gidtbl_sz)) {
2162 2148 TNF_PROBE_1(tavor_set_addr_path_inv_sgid_ix_fail,
2163 2149 TAVOR_TNF_ERROR, "", tnf_uint, sgid_ix, av->av_sgid_ix);
2164 2150 TAVOR_TNF_EXIT(tavor_set_addr_path);
2165 2151 return (IBT_SGID_INVALID);
2166 2152 }
2167 2153
2168 2154 /*
2169 2155 * Fill in all "global" values regardless of the value in the GRH
2170 2156 * flag. Because "grh" is not set unless "av_send_grh" is set, the
2171 2157 * hardware will ignore the other "global" values as necessary. Note:
2172 2158 * SW does this here to enable later query operations to return
2173 2159 * exactly the same params that were passed when the addr path was
2174 2160 * last written.
2175 2161 */
2176 2162 path->grh = av->av_send_grh;
2177 2163 if (type == TAVOR_ADDRPATH_QP) {
2178 2164 path->mgid_index = av->av_sgid_ix;
2179 2165 } else {
2180 2166 /*
2181 2167 * For Tavor UDAV, the "mgid_index" field is the index into
2182 2168 * a combined table (not a per-port table). So some extra
2183 2169 * calculations are necessary.
2184 2170 */
2185 2171 path->mgid_index = ((av->av_port_num - 1) * gidtbl_sz) +
2186 2172 av->av_sgid_ix;
2187 2173 }
2188 2174 path->flow_label = av->av_flow;
2189 2175 path->tclass = av->av_tclass;
2190 2176 path->hop_limit = av->av_hop;
2191 2177 path->rgid_h = av->av_dgid.gid_prefix;
2192 2178
2193 2179 /*
2194 2180 * According to Tavor PRM, the (31:0) part of rgid_l must be set to
2195 2181 * "0x2" if the 'grh' or 'g' bit is cleared. It also says that we
2196 2182 * only need to do it for UDAV's. So we enforce that here.
2197 2183 *
2198 2184 * NOTE: The entire 64 bits worth of GUID info is actually being
2199 2185 * preserved (for UDAVs) by the callers of this function
2200 2186 * (tavor_ah_alloc() and tavor_ah_modify()) and as long as the
2201 2187 * 'grh' bit is not set, the upper 32 bits (63:32) of rgid_l are
2202 2188 * "don't care".
2203 2189 */
2204 2190 if ((path->grh) || (type == TAVOR_ADDRPATH_QP)) {
2205 2191 path->rgid_l = av->av_dgid.gid_guid;
2206 2192 } else {
2207 2193 path->rgid_l = 0x2;
2208 2194 }
2209 2195
2210 2196 TAVOR_TNF_EXIT(tavor_set_addr_path);
2211 2197 return (DDI_SUCCESS);
2212 2198 }
2213 2199
2214 2200
2215 2201 /*
2216 2202 * tavor_get_addr_path()
2217 2203 * Context: Can be called from interrupt or base context.
2218 2204 *
2219 2205 * Note: Just like tavor_set_addr_path() above, this routine is used for two
2220 2206 * purposes. It is used to read in the Tavor UDAV fields, and it is used to
2221 2207 * read in the address path information for QPs. Because the two Tavor
↓ open down ↓ |
56 lines elided |
↑ open up ↑ |
2222 2208 * structures are similar, common fields can be read in here. But because
2223 2209 * they are slightly different, we pass an additional flag to indicate which
2224 2210 * type is being read.
2225 2211 */
2226 2212 void
2227 2213 tavor_get_addr_path(tavor_state_t *state, tavor_hw_addr_path_t *path,
2228 2214 ibt_adds_vect_t *av, uint_t type, tavor_qphdl_t qp)
2229 2215 {
2230 2216 uint_t gidtbl_sz;
2231 2217
2232 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*path))
2233 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*av))
2234 -
2235 2218 av->av_src_path = path->ml_path;
2236 2219 av->av_port_num = path->portnum;
2237 2220 av->av_dlid = path->rlid;
2238 2221 av->av_srvl = path->sl;
2239 2222
2240 2223 /*
2241 2224 * Set "av_ipd" value from max_stat_rate.
2242 2225 */
2243 2226 if (qp) {
2244 2227 /*
2245 2228 * If a QP operation use the soft copy
2246 2229 */
2247 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(qp->qp_save_srate))
2248 2230 av->av_srate = qp->qp_save_srate;
2249 2231 } else {
2250 2232 /*
2251 2233 * The stat_rate_sup is used to decide how the srate value is
2252 2234 * set and
2253 2235 * if it is zero, the driver uses the old interface.
2254 2236 */
2255 2237 if (state->ts_devlim.stat_rate_sup) {
2256 2238 if (path->max_stat_rate == 0) {
2257 2239 av->av_srate = IBT_SRATE_20; /* 4x@DDR rate */
2258 2240 } else if (path->max_stat_rate == 1) {
2259 2241 av->av_srate = IBT_SRATE_2; /* 1x@SDR rate */
2260 2242 } else if (path->max_stat_rate == 2) {
2261 2243 av->av_srate = IBT_SRATE_10; /* 4x@SDR rate */
2262 2244 } else if (path->max_stat_rate == 3) {
2263 2245 av->av_srate = IBT_SRATE_5; /* 1xDDR rate */
2264 2246 }
2265 2247 } else {
2266 2248 if (path->max_stat_rate == 0) {
2267 2249 av->av_srate = IBT_SRATE_10; /* 4x@SDR rate */
2268 2250 } else if (path->max_stat_rate == 1) {
2269 2251 av->av_srate = IBT_SRATE_2; /* 1x@SDR rate */
2270 2252 }
2271 2253 }
2272 2254 }
2273 2255
2274 2256 /*
2275 2257 * Extract all "global" values regardless of the value in the GRH
2276 2258 * flag. Because "av_send_grh" is set only if "grh" is set, software
2277 2259 * knows to ignore the other "global" values as necessary. Note: SW
2278 2260 * does it this way to enable these query operations to return exactly
2279 2261 * the same params that were passed when the addr path was last written.
2280 2262 */
2281 2263 av->av_send_grh = path->grh;
2282 2264 if (type == TAVOR_ADDRPATH_QP) {
2283 2265 av->av_sgid_ix = path->mgid_index;
2284 2266 } else {
2285 2267 /*
2286 2268 * For Tavor UDAV, the "mgid_index" field is the index into
2287 2269 * a combined table (not a per-port table). So some extra
2288 2270 * calculations are necessary.
2289 2271 */
2290 2272 gidtbl_sz = (1 << state->ts_devlim.log_max_gid);
2291 2273 av->av_sgid_ix = path->mgid_index - ((av->av_port_num - 1) *
2292 2274 gidtbl_sz);
2293 2275 }
2294 2276 av->av_flow = path->flow_label;
2295 2277 av->av_tclass = path->tclass;
2296 2278 av->av_hop = path->hop_limit;
2297 2279 av->av_dgid.gid_prefix = path->rgid_h;
2298 2280 av->av_dgid.gid_guid = path->rgid_l;
2299 2281 }
2300 2282
2301 2283
2302 2284 /*
2303 2285 * tavor_portnum_is_valid()
2304 2286 * Context: Can be called from interrupt or base context.
2305 2287 */
2306 2288 int
2307 2289 tavor_portnum_is_valid(tavor_state_t *state, uint_t portnum)
2308 2290 {
2309 2291 uint_t max_port;
2310 2292
2311 2293 max_port = state->ts_cfg_profile->cp_num_ports;
2312 2294 if ((portnum <= max_port) && (portnum != 0)) {
2313 2295 return (1);
2314 2296 } else {
2315 2297 return (0);
2316 2298 }
2317 2299 }
2318 2300
2319 2301
2320 2302 /*
2321 2303 * tavor_pkeyindex_is_valid()
2322 2304 * Context: Can be called from interrupt or base context.
2323 2305 */
2324 2306 int
2325 2307 tavor_pkeyindex_is_valid(tavor_state_t *state, uint_t pkeyindx)
2326 2308 {
2327 2309 uint_t max_pkeyindx;
2328 2310
2329 2311 max_pkeyindx = 1 << state->ts_cfg_profile->cp_log_max_pkeytbl;
2330 2312 if (pkeyindx < max_pkeyindx) {
2331 2313 return (1);
2332 2314 } else {
2333 2315 return (0);
2334 2316 }
2335 2317 }
2336 2318
2337 2319
2338 2320 /*
2339 2321 * tavor_queue_alloc()
2340 2322 * Context: Can be called from interrupt or base context.
2341 2323 */
2342 2324 int
2343 2325 tavor_queue_alloc(tavor_state_t *state, tavor_qalloc_info_t *qa_info,
↓ open down ↓ |
86 lines elided |
↑ open up ↑ |
2344 2326 uint_t sleepflag)
2345 2327 {
2346 2328 ddi_dma_attr_t dma_attr;
2347 2329 int (*callback)(caddr_t);
2348 2330 uint64_t realsize, alloc_mask;
2349 2331 uint_t dma_xfer_mode, type;
2350 2332 int flag, status;
2351 2333
2352 2334 TAVOR_TNF_ENTER(tavor_queue_alloc);
2353 2335
2354 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2355 -
2356 2336 /* Set the callback flag appropriately */
2357 2337 callback = (sleepflag == TAVOR_SLEEP) ? DDI_DMA_SLEEP :
2358 2338 DDI_DMA_DONTWAIT;
2359 2339
2360 2340 /*
2361 2341 * Initialize many of the default DMA attributes. Then set additional
2362 2342 * alignment restrictions as necessary for the queue memory. Also
2363 2343 * respect the configured value for IOMMU bypass
2364 2344 */
2365 2345 tavor_dma_attr_init(&dma_attr);
2366 2346 dma_attr.dma_attr_align = qa_info->qa_bind_align;
2367 2347 type = state->ts_cfg_profile->cp_iommu_bypass;
2368 2348 if (type == TAVOR_BINDMEM_BYPASS) {
2369 2349 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
2370 2350 }
2371 2351
2372 2352 /* Allocate a DMA handle */
2373 2353 status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, callback, NULL,
2374 2354 &qa_info->qa_dmahdl);
2375 2355 if (status != DDI_SUCCESS) {
2376 2356 TNF_PROBE_0(tavor_queue_alloc_dmahdl_fail, TAVOR_TNF_ERROR, "");
2377 2357 TAVOR_TNF_EXIT(tavor_queue_alloc);
2378 2358 return (DDI_FAILURE);
2379 2359 }
2380 2360
2381 2361 /*
2382 2362 * Determine the amount of memory to allocate, depending on the values
2383 2363 * in "qa_bind_align" and "qa_alloc_align". The problem we are trying
2384 2364 * to solve here is that allocating a DMA handle with IOMMU bypass
2385 2365 * (DDI_DMA_FORCE_PHYSICAL) constrains us to only requesting alignments
2386 2366 * that are less than the page size. Since we may need stricter
2387 2367 * alignments on the memory allocated by ddi_dma_mem_alloc() (e.g. in
2388 2368 * Tavor QP work queue memory allocation), we use the following method
2389 2369 * to calculate how much additional memory to request, and we enforce
2390 2370 * our own alignment on the allocated result.
2391 2371 */
2392 2372 alloc_mask = qa_info->qa_alloc_align - 1;
2393 2373 if (qa_info->qa_bind_align == qa_info->qa_alloc_align) {
2394 2374 realsize = qa_info->qa_size;
2395 2375 } else {
2396 2376 realsize = qa_info->qa_size + alloc_mask;
2397 2377 }
2398 2378
2399 2379 /*
2400 2380 * If we are to allocate the queue from system memory, then use
2401 2381 * ddi_dma_mem_alloc() to find the space. Otherwise, if we are to
2402 2382 * allocate the queue from locally-attached DDR memory, then use the
2403 2383 * vmem allocator to find the space. In either case, return a pointer
2404 2384 * to the memory range allocated (including any necessary alignment
2405 2385 * adjustments), the "real" memory pointer, the "real" size, and a
2406 2386 * ddi_acc_handle_t to use when reading from/writing to the memory.
2407 2387 */
2408 2388 if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_NORMAL) {
2409 2389
2410 2390 /*
2411 2391 * Determine whether to map STREAMING or CONSISTENT. This is
2412 2392 * based on the value set in the configuration profile at
2413 2393 * attach time.
2414 2394 */
2415 2395 dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent;
2416 2396
2417 2397 /* Allocate system memory for the queue */
2418 2398 status = ddi_dma_mem_alloc(qa_info->qa_dmahdl, realsize,
2419 2399 &state->ts_reg_accattr, dma_xfer_mode, callback, NULL,
2420 2400 (caddr_t *)&qa_info->qa_buf_real,
2421 2401 (size_t *)&qa_info->qa_buf_realsz, &qa_info->qa_acchdl);
2422 2402 if (status != DDI_SUCCESS) {
2423 2403 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2424 2404 TNF_PROBE_0(tavor_queue_alloc_dma_memalloc_fail,
2425 2405 TAVOR_TNF_ERROR, "");
2426 2406 TAVOR_TNF_EXIT(tavor_queue_alloc);
2427 2407 return (DDI_FAILURE);
2428 2408 }
2429 2409
2430 2410 /*
2431 2411 * Save temporary copy of the real pointer. (This may be
2432 2412 * modified in the last step below).
2433 2413 */
2434 2414 qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2435 2415
2436 2416 } else if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_USERLAND) {
2437 2417
2438 2418 /* Allocate userland mappable memory for the queue */
2439 2419 flag = (sleepflag == TAVOR_SLEEP) ? DDI_UMEM_SLEEP :
2440 2420 DDI_UMEM_NOSLEEP;
2441 2421 qa_info->qa_buf_real = ddi_umem_alloc(realsize, flag,
2442 2422 &qa_info->qa_umemcookie);
2443 2423 if (qa_info->qa_buf_real == NULL) {
2444 2424 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2445 2425 TNF_PROBE_0(tavor_queue_alloc_umem_fail,
2446 2426 TAVOR_TNF_ERROR, "");
2447 2427 TAVOR_TNF_EXIT(tavor_queue_alloc);
2448 2428 return (DDI_FAILURE);
2449 2429 }
2450 2430
2451 2431 /*
2452 2432 * Save temporary copy of the real pointer. (This may be
2453 2433 * modified in the last step below).
2454 2434 */
2455 2435 qa_info->qa_buf_aligned = qa_info->qa_buf_real;
2456 2436
2457 2437 } else { /* TAVOR_QUEUE_LOCATION_INDDR */
2458 2438
2459 2439 /* Allocate DDR memory for the queue */
2460 2440 flag = (sleepflag == TAVOR_SLEEP) ? VM_SLEEP : VM_NOSLEEP;
2461 2441 qa_info->qa_buf_real = (uint32_t *)vmem_xalloc(
2462 2442 state->ts_ddrvmem, realsize, qa_info->qa_bind_align, 0, 0,
2463 2443 NULL, NULL, flag);
2464 2444 if (qa_info->qa_buf_real == NULL) {
2465 2445 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2466 2446 TNF_PROBE_0(tavor_queue_alloc_vmxa_fail,
2467 2447 TAVOR_TNF_ERROR, "");
2468 2448 TAVOR_TNF_EXIT(tavor_queue_alloc);
2469 2449 return (DDI_FAILURE);
2470 2450 }
2471 2451
2472 2452 /*
2473 2453 * Since "qa_buf_real" will be a PCI address (the offset into
2474 2454 * the DDR memory), we first need to do some calculations to
2475 2455 * convert it to its kernel mapped address. (Note: This may
2476 2456 * be modified again below, when any additional "alloc"
2477 2457 * alignment constraint is applied).
2478 2458 */
2479 2459 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t)
2480 2460 state->ts_reg_ddr_baseaddr) + ((uintptr_t)
2481 2461 qa_info->qa_buf_real - state->ts_ddr.ddr_baseaddr));
2482 2462 qa_info->qa_buf_realsz = realsize;
2483 2463 qa_info->qa_acchdl = state->ts_reg_ddrhdl;
2484 2464 }
2485 2465
2486 2466 /*
2487 2467 * The last step is to ensure that the final address ("qa_buf_aligned")
2488 2468 * has the appropriate "alloc" alignment restriction applied to it
2489 2469 * (if necessary).
2490 2470 */
2491 2471 if (qa_info->qa_bind_align != qa_info->qa_alloc_align) {
2492 2472 qa_info->qa_buf_aligned = (uint32_t *)(uintptr_t)(((uintptr_t)
2493 2473 qa_info->qa_buf_aligned + alloc_mask) & ~alloc_mask);
2494 2474 }
2495 2475
2496 2476 TAVOR_TNF_EXIT(tavor_queue_alloc);
2497 2477 return (DDI_SUCCESS);
2498 2478 }
2499 2479
↓ open down ↓ |
134 lines elided |
↑ open up ↑ |
2500 2480
2501 2481 /*
2502 2482 * tavor_queue_free()
2503 2483 * Context: Can be called from interrupt or base context.
2504 2484 */
2505 2485 void
2506 2486 tavor_queue_free(tavor_state_t *state, tavor_qalloc_info_t *qa_info)
2507 2487 {
2508 2488 TAVOR_TNF_ENTER(tavor_queue_free);
2509 2489
2510 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*qa_info))
2511 -
2512 2490 /*
2513 2491 * Depending on how (i.e. from where) we allocated the memory for
2514 2492 * this queue, we choose the appropriate method for releasing the
2515 2493 * resources.
2516 2494 */
2517 2495 if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_NORMAL) {
2518 2496
2519 2497 ddi_dma_mem_free(&qa_info->qa_acchdl);
2520 2498
2521 2499 } else if (qa_info->qa_location == TAVOR_QUEUE_LOCATION_USERLAND) {
2522 2500
2523 2501 ddi_umem_free(qa_info->qa_umemcookie);
2524 2502
2525 2503 } else { /* TAVOR_QUEUE_LOCATION_INDDR */
2526 2504
2527 2505 vmem_xfree(state->ts_ddrvmem, qa_info->qa_buf_real,
2528 2506 qa_info->qa_buf_realsz);
2529 2507 }
2530 2508
2531 2509 /* Always free the dma handle */
2532 2510 ddi_dma_free_handle(&qa_info->qa_dmahdl);
2533 2511
2534 2512 TAVOR_TNF_EXIT(tavor_queue_free);
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
2535 2513 }
2536 2514
2537 2515
2538 2516 /*
2539 2517 * tavor_dmaattr_get()
2540 2518 * Context: Can be called from interrupt or base context.
2541 2519 */
2542 2520 void
2543 2521 tavor_dma_attr_init(ddi_dma_attr_t *dma_attr)
2544 2522 {
2545 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_attr))
2546 -
2547 2523 dma_attr->dma_attr_version = DMA_ATTR_V0;
2548 2524 dma_attr->dma_attr_addr_lo = 0;
2549 2525 dma_attr->dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFFull;
2550 2526 dma_attr->dma_attr_count_max = 0xFFFFFFFFFFFFFFFFull;
2551 2527 dma_attr->dma_attr_align = 1;
2552 2528 dma_attr->dma_attr_burstsizes = 0x3FF;
2553 2529 dma_attr->dma_attr_minxfer = 1;
2554 2530 dma_attr->dma_attr_maxxfer = 0xFFFFFFFFFFFFFFFFull;
2555 2531 dma_attr->dma_attr_seg = 0xFFFFFFFFFFFFFFFFull;
2556 2532 dma_attr->dma_attr_sgllen = 0x7FFFFFFF;
2557 2533 dma_attr->dma_attr_granular = 1;
2558 2534 dma_attr->dma_attr_flags = 0;
2559 2535 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX