Print this page
8368 remove warlock leftovers from usr/src/uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/io/ib/adapters/tavor/tavor_mr.c
+++ new/usr/src/uts/common/io/ib/adapters/tavor/tavor_mr.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * tavor_mr.c
28 28 * Tavor Memory Region/Window Routines
29 29 *
30 30 * Implements all the routines necessary to provide the requisite memory
31 31 * registration verbs. These include operations like RegisterMemRegion(),
32 32 * DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion,
33 33 * etc., that affect Memory Regions. It also includes the verbs that
34 34 * affect Memory Windows, including AllocMemWindow(), FreeMemWindow(),
35 35 * and QueryMemWindow().
36 36 */
37 37
38 38 #include <sys/types.h>
39 39 #include <sys/conf.h>
40 40 #include <sys/ddi.h>
41 41 #include <sys/sunddi.h>
42 42 #include <sys/modctl.h>
43 43 #include <sys/esunddi.h>
44 44
45 45 #include <sys/ib/adapters/tavor/tavor.h>
46 46
47 47
48 48 /*
49 49 * Used by tavor_mr_keycalc() below to fill in the "unconstrained" portion
50 50 * of Tavor memory keys (LKeys and RKeys)
51 51 */
52 52 static uint_t tavor_debug_memkey_cnt = 0x00000000;
53 53
54 54 static int tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd,
55 55 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op);
56 56 static int tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr,
57 57 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new,
58 58 tavor_mr_options_t *op);
59 59 static int tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr,
60 60 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr,
61 61 uint_t sleep, uint_t *dereg_level);
62 62 static uint64_t tavor_mr_nummtt_needed(tavor_state_t *state,
63 63 tavor_bind_info_t *bind, uint_t *mtt_pgsize);
64 64 static int tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind,
65 65 ddi_dma_handle_t dmahdl, uint_t sleep);
66 66 static void tavor_mr_mem_unbind(tavor_state_t *state,
67 67 tavor_bind_info_t *bind);
68 68 static int tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind,
69 69 uint32_t mtt_pgsize_bits);
70 70 static int tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc);
71 71 static int tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc);
72 72
73 73 /*
74 74 * The Tavor umem_lockmemory() callback ops. When userland memory is
75 75 * registered, these callback ops are specified. The tavor_umap_umemlock_cb()
76 76 * callback will be called whenever the memory for the corresponding
77 77 * ddi_umem_cookie_t is being freed.
78 78 */
79 79 static struct umem_callback_ops tavor_umem_cbops = {
80 80 UMEM_CALLBACK_VERSION,
81 81 tavor_umap_umemlock_cb,
82 82 };
83 83
84 84
85 85 /*
86 86 * tavor_mr_register()
87 87 * Context: Can be called from interrupt or base context.
88 88 */
89 89 int
90 90 tavor_mr_register(tavor_state_t *state, tavor_pdhdl_t pd,
91 91 ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op)
92 92 {
93 93 tavor_bind_info_t bind;
94 94 int status;
95 95
96 96 TAVOR_TNF_ENTER(tavor_mr_register);
97 97
98 98 /*
99 99 * Fill in the "bind" struct. This struct provides the majority
100 100 * of the information that will be used to distinguish between an
101 101 * "addr" binding (as is the case here) and a "buf" binding (see
102 102 * below). The "bind" struct is later passed to tavor_mr_mem_bind()
103 103 * which does most of the "heavy lifting" for the Tavor memory
104 104 * registration routines.
105 105 */
106 106 bind.bi_type = TAVOR_BINDHDL_VADDR;
107 107 bind.bi_addr = mr_attr->mr_vaddr;
108 108 bind.bi_len = mr_attr->mr_len;
109 109 bind.bi_as = mr_attr->mr_as;
110 110 bind.bi_flags = mr_attr->mr_flags;
111 111 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op);
112 112 if (status != DDI_SUCCESS) {
113 113 TNF_PROBE_0(tavor_mr_register_cmnreg_fail,
114 114 TAVOR_TNF_ERROR, "");
115 115 TAVOR_TNF_EXIT(tavor_mr_register);
116 116 return (status);
117 117 }
118 118
119 119 TAVOR_TNF_EXIT(tavor_mr_register);
120 120 return (DDI_SUCCESS);
121 121 }
122 122
123 123
124 124 /*
125 125 * tavor_mr_register_buf()
126 126 * Context: Can be called from interrupt or base context.
127 127 */
128 128 int
129 129 tavor_mr_register_buf(tavor_state_t *state, tavor_pdhdl_t pd,
130 130 ibt_smr_attr_t *mr_attr, struct buf *buf, tavor_mrhdl_t *mrhdl,
131 131 tavor_mr_options_t *op)
132 132 {
133 133 tavor_bind_info_t bind;
134 134 int status;
135 135
136 136 TAVOR_TNF_ENTER(tavor_mr_register_buf);
137 137
138 138 /*
139 139 * Fill in the "bind" struct. This struct provides the majority
140 140 * of the information that will be used to distinguish between an
141 141 * "addr" binding (see above) and a "buf" binding (as is the case
142 142 * here). The "bind" struct is later passed to tavor_mr_mem_bind()
143 143 * which does most of the "heavy lifting" for the Tavor memory
144 144 * registration routines. Note: We have chosen to provide
145 145 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
146 146 * not set). It is not critical what value we choose here as it need
147 147 * only be unique for the given RKey (which will happen by default),
148 148 * so the choice here is somewhat arbitrary.
149 149 */
150 150 bind.bi_type = TAVOR_BINDHDL_BUF;
151 151 bind.bi_buf = buf;
152 152 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
153 153 bind.bi_addr = mr_attr->mr_vaddr;
154 154 } else {
155 155 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr;
156 156 }
157 157 bind.bi_as = NULL;
158 158 bind.bi_len = (uint64_t)buf->b_bcount;
159 159 bind.bi_flags = mr_attr->mr_flags;
160 160 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op);
161 161 if (status != DDI_SUCCESS) {
162 162 TNF_PROBE_0(tavor_mr_register_buf_cmnreg_fail,
163 163 TAVOR_TNF_ERROR, "");
164 164 TAVOR_TNF_EXIT(tavor_mr_register_buf);
165 165 return (status);
166 166 }
167 167
168 168 TAVOR_TNF_EXIT(tavor_mr_register_buf);
169 169 return (DDI_SUCCESS);
170 170 }
171 171
172 172
173 173 /*
174 174 * tavor_mr_register_shared()
175 175 * Context: Can be called from interrupt or base context.
176 176 */
177 177 int
178 178 tavor_mr_register_shared(tavor_state_t *state, tavor_mrhdl_t mrhdl,
179 179 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new)
180 180 {
181 181 tavor_rsrc_pool_info_t *rsrc_pool;
182 182 tavor_rsrc_t *mpt, *mtt, *rsrc;
183 183 tavor_umap_db_entry_t *umapdb;
184 184 tavor_hw_mpt_t mpt_entry;
185 185 tavor_mrhdl_t mr;
186 186 tavor_bind_info_t *bind;
187 187 ddi_umem_cookie_t umem_cookie;
188 188 size_t umem_len;
189 189 caddr_t umem_addr;
190 190 uint64_t mtt_addr, mtt_ddrbaseaddr, pgsize_msk;
191 191 uint_t sleep, mr_is_umem;
192 192 int status, umem_flags;
193 193 char *errormsg;
194 194
195 195 TAVOR_TNF_ENTER(tavor_mr_register_shared);
196 196
197 197 /*
198 198 * Check the sleep flag. Ensure that it is consistent with the
199 199 * current thread context (i.e. if we are currently in the interrupt
200 200 * context, then we shouldn't be attempting to sleep).
201 201 */
202 202 sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP :
203 203 TAVOR_SLEEP;
204 204 if ((sleep == TAVOR_SLEEP) &&
205 205 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
206 206 /* Set "status" and "errormsg" and goto failure */
207 207 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags");
208 208 goto mrshared_fail;
209 209 }
210 210
211 211 /* Increment the reference count on the protection domain (PD) */
212 212 tavor_pd_refcnt_inc(pd);
213 213
214 214 /*
215 215 * Allocate an MPT entry. This will be filled in with all the
216 216 * necessary parameters to define the shared memory region.
217 217 * Specifically, it will be made to reference the currently existing
218 218 * MTT entries and ownership of the MPT will be passed to the hardware
219 219 * in the last step below. If we fail here, we must undo the
220 220 * protection domain reference count.
221 221 */
222 222 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
223 223 if (status != DDI_SUCCESS) {
224 224 /* Set "status" and "errormsg" and goto failure */
225 225 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
226 226 goto mrshared_fail1;
227 227 }
228 228
229 229 /*
230 230 * Allocate the software structure for tracking the shared memory
231 231 * region (i.e. the Tavor Memory Region handle). If we fail here, we
↓ open down ↓ |
231 lines elided |
↑ open up ↑ |
232 232 * must undo the protection domain reference count and the previous
233 233 * resource allocation.
234 234 */
235 235 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
236 236 if (status != DDI_SUCCESS) {
237 237 /* Set "status" and "errormsg" and goto failure */
238 238 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
239 239 goto mrshared_fail2;
240 240 }
241 241 mr = (tavor_mrhdl_t)rsrc->tr_addr;
242 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
243 242
244 243 /*
245 244 * Setup and validate the memory region access flags. This means
246 245 * translating the IBTF's enable flags into the access flags that
247 246 * will be used in later operations.
248 247 */
249 248 mr->mr_accflag = 0;
250 249 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
251 250 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
252 251 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
253 252 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
254 253 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
255 254 mr->mr_accflag |= IBT_MR_REMOTE_READ;
256 255 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
257 256 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
258 257 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
259 258 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
260 259
261 260 /*
262 261 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
263 262 * from a certain number of "constrained" bits (the least significant
264 263 * bits) and some number of "unconstrained" bits. The constrained
265 264 * bits must be set to the index of the entry in the MPT table, but
266 265 * the unconstrained bits can be set to any value we wish. Note:
267 266 * if no remote access is required, then the RKey value is not filled
268 267 * in. Otherwise both Rkey and LKey are given the same value.
269 268 */
270 269 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey);
271 270 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
272 271 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
273 272 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
274 273 mr->mr_rkey = mr->mr_lkey;
275 274 }
276 275
277 276 /* Grab the MR lock for the current memory region */
278 277 mutex_enter(&mrhdl->mr_lock);
279 278
280 279 /*
281 280 * Check here to see if the memory region has already been partially
282 281 * deregistered as a result of a tavor_umap_umemlock_cb() callback.
283 282 * If so, this is an error, return failure.
284 283 */
285 284 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
286 285 mutex_exit(&mrhdl->mr_lock);
287 286 /* Set "status" and "errormsg" and goto failure */
288 287 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl");
289 288 goto mrshared_fail3;
290 289 }
291 290
292 291 /*
293 292 * Determine if the original memory was from userland and, if so, pin
294 293 * the pages (again) with umem_lockmemory(). This will guarantee a
295 294 * separate callback for each of this shared region's MR handles.
296 295 * If this is userland memory, then allocate an entry in the
297 296 * "userland resources database". This will later be added to
298 297 * the database (after all further memory registration operations are
299 298 * successful). If we fail here, we must undo all the above setup.
300 299 */
301 300 mr_is_umem = mrhdl->mr_is_umem;
302 301 if (mr_is_umem) {
303 302 umem_len = ptob(btopr(mrhdl->mr_bindinfo.bi_len +
304 303 ((uintptr_t)mrhdl->mr_bindinfo.bi_addr & PAGEOFFSET)));
305 304 umem_addr = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr &
306 305 ~PAGEOFFSET);
307 306 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
308 307 DDI_UMEMLOCK_LONGTERM);
309 308 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
310 309 &umem_cookie, &tavor_umem_cbops, NULL);
311 310 if (status != 0) {
312 311 mutex_exit(&mrhdl->mr_lock);
313 312 /* Set "status" and "errormsg" and goto failure */
314 313 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin");
315 314 goto mrshared_fail3;
316 315 }
317 316
318 317 umapdb = tavor_umap_db_alloc(state->ts_instance,
319 318 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
320 319 (uint64_t)(uintptr_t)rsrc);
321 320 if (umapdb == NULL) {
322 321 mutex_exit(&mrhdl->mr_lock);
323 322 /* Set "status" and "errormsg" and goto failure */
324 323 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
325 324 goto mrshared_fail4;
326 325 }
327 326 }
328 327
329 328 /*
330 329 * Copy the MTT resource pointer (and additional parameters) from
331 330 * the original Tavor Memory Region handle. Note: this is normally
332 331 * where the tavor_mr_mem_bind() routine would be called, but because
333 332 * we already have bound and filled-in MTT entries it is simply a
↓ open down ↓ |
81 lines elided |
↑ open up ↑ |
334 333 * matter here of managing the MTT reference count and grabbing the
335 334 * address of the MTT table entries (for filling in the shared region's
336 335 * MPT entry).
337 336 */
338 337 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp;
339 338 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
340 339 mr->mr_bindinfo = mrhdl->mr_bindinfo;
341 340 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
342 341 mutex_exit(&mrhdl->mr_lock);
343 342 bind = &mr->mr_bindinfo;
344 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
345 343 mtt = mr->mr_mttrsrcp;
346 344
347 345 /*
348 346 * Increment the MTT reference count (to reflect the fact that
349 347 * the MTT is now shared)
350 348 */
351 349 (void) tavor_mtt_refcnt_inc(mr->mr_mttrefcntp);
352 350
353 351 /*
354 352 * Update the new "bind" virtual address. Do some extra work here
355 353 * to ensure proper alignment. That is, make sure that the page
356 354 * offset for the beginning of the old range is the same as the
357 355 * offset for this new mapping
358 356 */
359 357 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
360 358 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
361 359 (mr->mr_bindinfo.bi_addr & pgsize_msk));
362 360
363 361 /*
364 362 * Get the base address for the MTT table. This will be necessary
365 363 * in the next step when we are setting up the MPT entry.
366 364 */
367 365 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT];
368 366 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset;
369 367
370 368 /*
371 369 * Fill in the MPT entry. This is the final step before passing
372 370 * ownership of the MPT entry to the Tavor hardware. We use all of
373 371 * the information collected/calculated above to fill in the
374 372 * requisite portions of the MPT.
375 373 */
376 374 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
377 375 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE;
378 376 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0;
379 377 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
380 378 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
381 379 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
382 380 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
383 381 mpt_entry.lr = 1;
384 382 mpt_entry.reg_win = TAVOR_MPT_IS_REGION;
385 383 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC;
386 384 mpt_entry.mem_key = mr->mr_lkey;
387 385 mpt_entry.pd = pd->pd_pdnum;
388 386 mpt_entry.start_addr = bind->bi_addr;
389 387 mpt_entry.reg_win_len = bind->bi_len;
390 388 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND;
391 389 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT);
392 390 mpt_entry.mttseg_addr_h = mtt_addr >> 32;
393 391 mpt_entry.mttseg_addr_l = mtt_addr >> 6;
394 392
395 393 /*
396 394 * Write the MPT entry to hardware. Lastly, we pass ownership of
397 395 * the entry to the hardware. Note: in general, this operation
398 396 * shouldn't fail. But if it does, we have to undo everything we've
399 397 * done above before returning error.
400 398 */
401 399 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
402 400 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep);
403 401 if (status != TAVOR_CMD_SUCCESS) {
404 402 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n",
405 403 status);
406 404 TNF_PROBE_1(tavor_mr_register_shared_sw2hw_mpt_cmd_fail,
407 405 TAVOR_TNF_ERROR, "", tnf_uint, status, status);
408 406 /* Set "status" and "errormsg" and goto failure */
409 407 TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
410 408 "tavor SW2HW_MPT command");
411 409 goto mrshared_fail5;
412 410 }
413 411
414 412 /*
415 413 * Fill in the rest of the Tavor Memory Region handle. Having
416 414 * successfully transferred ownership of the MPT, we can update the
417 415 * following fields for use in further operations on the MR.
418 416 */
419 417 mr->mr_mptrsrcp = mpt;
420 418 mr->mr_mttrsrcp = mtt;
421 419 mr->mr_pdhdl = pd;
422 420 mr->mr_rsrcp = rsrc;
423 421 mr->mr_is_umem = mr_is_umem;
424 422 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
425 423 mr->mr_umem_cbfunc = NULL;
426 424 mr->mr_umem_cbarg1 = NULL;
427 425 mr->mr_umem_cbarg2 = NULL;
428 426
429 427 /*
430 428 * If this is userland memory, then we need to insert the previously
431 429 * allocated entry into the "userland resources database". This will
432 430 * allow for later coordination between the tavor_umap_umemlock_cb()
433 431 * callback and tavor_mr_deregister().
434 432 */
435 433 if (mr_is_umem) {
436 434 tavor_umap_db_add(umapdb);
437 435 }
438 436
439 437 *mrhdl_new = mr;
440 438
441 439 TAVOR_TNF_EXIT(tavor_mr_register_shared);
442 440 return (DDI_SUCCESS);
443 441
444 442 /*
445 443 * The following is cleanup for all possible failure cases in this routine
446 444 */
447 445 mrshared_fail5:
448 446 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp);
449 447 if (mr_is_umem) {
450 448 tavor_umap_db_free(umapdb);
451 449 }
452 450 mrshared_fail4:
453 451 if (mr_is_umem) {
454 452 ddi_umem_unlock(umem_cookie);
455 453 }
456 454 mrshared_fail3:
457 455 tavor_rsrc_free(state, &rsrc);
458 456 mrshared_fail2:
459 457 tavor_rsrc_free(state, &mpt);
460 458 mrshared_fail1:
461 459 tavor_pd_refcnt_dec(pd);
462 460 mrshared_fail:
463 461 TNF_PROBE_1(tavor_mr_register_shared_fail, TAVOR_TNF_ERROR, "",
464 462 tnf_string, msg, errormsg);
465 463 TAVOR_TNF_EXIT(tavor_mr_register_shared);
466 464 return (status);
467 465 }
468 466
469 467
470 468 /*
471 469 * tavor_mr_deregister()
472 470 * Context: Can be called from interrupt or base context.
473 471 */
474 472 /* ARGSUSED */
475 473 int
476 474 tavor_mr_deregister(tavor_state_t *state, tavor_mrhdl_t *mrhdl, uint_t level,
477 475 uint_t sleep)
478 476 {
479 477 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt;
480 478 tavor_umap_db_entry_t *umapdb;
481 479 tavor_pdhdl_t pd;
482 480 tavor_mrhdl_t mr;
483 481 tavor_bind_info_t *bind;
484 482 uint64_t value;
485 483 int status, shared_mtt;
486 484 char *errormsg;
487 485
488 486 TAVOR_TNF_ENTER(tavor_mr_deregister);
489 487
490 488 /*
491 489 * Check the sleep flag. Ensure that it is consistent with the
492 490 * current thread context (i.e. if we are currently in the interrupt
493 491 * context, then we shouldn't be attempting to sleep).
494 492 */
495 493 if ((sleep == TAVOR_SLEEP) &&
496 494 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
497 495 /* Set "status" and "errormsg" and goto failure */
498 496 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags");
499 497 TNF_PROBE_1(tavor_mr_deregister_fail, TAVOR_TNF_ERROR, "",
500 498 tnf_string, msg, errormsg);
501 499 TAVOR_TNF_EXIT(tavor_mr_deregister);
502 500 return (status);
503 501 }
504 502
505 503 /*
506 504 * Pull all the necessary information from the Tavor Memory Region
507 505 * handle. This is necessary here because the resource for the
508 506 * MR handle is going to be freed up as part of the this
509 507 * deregistration
510 508 */
511 509 mr = *mrhdl;
512 510 mutex_enter(&mr->mr_lock);
513 511 mpt = mr->mr_mptrsrcp;
514 512 mtt = mr->mr_mttrsrcp;
515 513 mtt_refcnt = mr->mr_mttrefcntp;
516 514 rsrc = mr->mr_rsrcp;
517 515 pd = mr->mr_pdhdl;
518 516 bind = &mr->mr_bindinfo;
519 517
520 518 /*
521 519 * Check here to see if the memory region has already been partially
522 520 * deregistered as a result of the tavor_umap_umemlock_cb() callback.
523 521 * If so, then jump to the end and free the remaining resources.
524 522 */
525 523 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
526 524 goto mrdereg_finish_cleanup;
527 525 }
528 526
529 527 /*
530 528 * We must drop the "mr_lock" here to ensure that both SLEEP and
531 529 * NOSLEEP calls into the firmware work as expected. Also, if two
532 530 * threads are attemping to access this MR (via de-register,
533 531 * re-register, or otherwise), then we allow the firmware to enforce
534 532 * the checking, that only one deregister is valid.
535 533 */
536 534 mutex_exit(&mr->mr_lock);
537 535
538 536 /*
539 537 * Reclaim MPT entry from hardware (if necessary). Since the
540 538 * tavor_mr_deregister() routine is used in the memory region
541 539 * reregistration process as well, it is possible that we will
542 540 * not always wish to reclaim ownership of the MPT. Check the
543 541 * "level" arg and, if necessary, attempt to reclaim it. If
544 542 * the ownership transfer fails for any reason, we check to see
545 543 * what command status was returned from the hardware. The only
546 544 * "expected" error status is the one that indicates an attempt to
547 545 * deregister a memory region that has memory windows bound to it
548 546 */
549 547 if (level >= TAVOR_MR_DEREG_ALL) {
550 548 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT,
551 549 NULL, 0, mpt->tr_indx, sleep);
552 550 if (status != TAVOR_CMD_SUCCESS) {
553 551 if (status == TAVOR_CMD_REG_BOUND) {
554 552 TAVOR_TNF_EXIT(tavor_mr_deregister);
555 553 return (IBT_MR_IN_USE);
556 554 } else {
557 555 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command "
558 556 "failed: %08x\n", status);
559 557 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail,
560 558 TAVOR_TNF_ERROR, "", tnf_uint, status,
561 559 status);
562 560 TAVOR_TNF_EXIT(tavor_mr_deregister);
563 561 return (IBT_INVALID_PARAM);
564 562 }
565 563 }
566 564 }
567 565
568 566 /*
569 567 * Re-grab the mr_lock here. Since further access to the protected
570 568 * 'mr' structure is needed, and we would have returned previously for
571 569 * the multiple deregistration case, we can safely grab the lock here.
572 570 */
573 571 mutex_enter(&mr->mr_lock);
574 572
575 573 /*
576 574 * If the memory had come from userland, then we do a lookup in the
577 575 * "userland resources database". On success, we free the entry, call
578 576 * ddi_umem_unlock(), and continue the cleanup. On failure (which is
579 577 * an indication that the umem_lockmemory() callback has called
580 578 * tavor_mr_deregister()), we call ddi_umem_unlock() and invalidate
581 579 * the "mr_umemcookie" field in the MR handle (this will be used
582 580 * later to detect that only partial cleaup still remains to be done
583 581 * on the MR handle).
584 582 */
585 583 if (mr->mr_is_umem) {
586 584 status = tavor_umap_db_find(state->ts_instance,
587 585 (uint64_t)(uintptr_t)mr->mr_umemcookie,
588 586 MLNX_UMAP_MRMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE,
589 587 &umapdb);
590 588 if (status == DDI_SUCCESS) {
591 589 tavor_umap_db_free(umapdb);
592 590 ddi_umem_unlock(mr->mr_umemcookie);
593 591 } else {
594 592 ddi_umem_unlock(mr->mr_umemcookie);
595 593 mr->mr_umemcookie = NULL;
596 594 }
597 595 }
598 596
599 597 /* mtt_refcnt is NULL in the case of tavor_dma_mr_register() */
600 598 if (mtt_refcnt != NULL) {
601 599 /*
602 600 * Decrement the MTT reference count. Since the MTT resource
603 601 * may be shared between multiple memory regions (as a result
604 602 * of a "RegisterSharedMR" verb) it is important that we not
605 603 * free up or unbind resources prematurely. If it's not shared
606 604 * (as indicated by the return status), then free the resource.
607 605 */
608 606 shared_mtt = tavor_mtt_refcnt_dec(mtt_refcnt);
609 607 if (!shared_mtt) {
610 608 tavor_rsrc_free(state, &mtt_refcnt);
611 609 }
612 610
613 611 /*
614 612 * Free up the MTT entries and unbind the memory. Here,
615 613 * as above, we attempt to free these resources only if
616 614 * it is appropriate to do so.
617 615 */
618 616 if (!shared_mtt) {
619 617 if (level >= TAVOR_MR_DEREG_NO_HW2SW_MPT) {
620 618 tavor_mr_mem_unbind(state, bind);
621 619 }
622 620 tavor_rsrc_free(state, &mtt);
623 621 }
624 622 }
625 623
626 624 /*
627 625 * If the MR handle has been invalidated, then drop the
628 626 * lock and return success. Note: This only happens because
629 627 * the umem_lockmemory() callback has been triggered. The
630 628 * cleanup here is partial, and further cleanup (in a
631 629 * subsequent tavor_mr_deregister() call) will be necessary.
632 630 */
633 631 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
634 632 mutex_exit(&mr->mr_lock);
635 633 TAVOR_TNF_EXIT(tavor_mr_deregister);
636 634 return (DDI_SUCCESS);
637 635 }
638 636
639 637 mrdereg_finish_cleanup:
640 638 mutex_exit(&mr->mr_lock);
641 639
642 640 /* Free the Tavor Memory Region handle */
643 641 tavor_rsrc_free(state, &rsrc);
644 642
645 643 /* Free up the MPT entry resource */
646 644 tavor_rsrc_free(state, &mpt);
647 645
648 646 /* Decrement the reference count on the protection domain (PD) */
649 647 tavor_pd_refcnt_dec(pd);
650 648
651 649 /* Set the mrhdl pointer to NULL and return success */
652 650 *mrhdl = NULL;
653 651
654 652 TAVOR_TNF_EXIT(tavor_mr_deregister);
655 653 return (DDI_SUCCESS);
656 654 }
657 655
658 656
659 657 /*
↓ open down ↓ |
305 lines elided |
↑ open up ↑ |
660 658 * tavor_mr_query()
661 659 * Context: Can be called from interrupt or base context.
662 660 */
663 661 /* ARGSUSED */
664 662 int
665 663 tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mr,
666 664 ibt_mr_query_attr_t *attr)
667 665 {
668 666 TAVOR_TNF_ENTER(tavor_mr_query);
669 667
670 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr))
671 -
672 668 mutex_enter(&mr->mr_lock);
673 669
674 670 /*
675 671 * Check here to see if the memory region has already been partially
676 672 * deregistered as a result of a tavor_umap_umemlock_cb() callback.
677 673 * If so, this is an error, return failure.
678 674 */
679 675 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
680 676 mutex_exit(&mr->mr_lock);
681 677 TNF_PROBE_0(tavor_mr_query_inv_mrhdl_fail, TAVOR_TNF_ERROR, "");
682 678 TAVOR_TNF_EXIT(tavor_mr_query);
683 679 return (IBT_MR_HDL_INVALID);
684 680 }
685 681
686 682 /* Fill in the queried attributes */
687 683 attr->mr_attr_flags = mr->mr_accflag;
688 684 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl;
689 685
690 686 /* Fill in the "local" attributes */
691 687 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;
692 688 attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
693 689 attr->mr_lbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len;
694 690
695 691 /*
696 692 * Fill in the "remote" attributes (if necessary). Note: the
697 693 * remote attributes are only valid if the memory region has one
698 694 * or more of the remote access flags set.
699 695 */
700 696 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
701 697 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
702 698 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
703 699 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey;
704 700 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
705 701 attr->mr_rbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len;
706 702 }
707 703
708 704 /*
709 705 * If region is mapped for streaming (i.e. noncoherent), then set sync
710 706 * is required
711 707 */
712 708 attr->mr_sync_required = (mr->mr_bindinfo.bi_flags &
713 709 IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE;
714 710
715 711 mutex_exit(&mr->mr_lock);
716 712 TAVOR_TNF_EXIT(tavor_mr_query);
717 713 return (DDI_SUCCESS);
718 714 }
719 715
720 716
721 717 /*
722 718 * tavor_mr_reregister()
723 719 * Context: Can be called from interrupt or base context.
724 720 */
725 721 int
726 722 tavor_mr_reregister(tavor_state_t *state, tavor_mrhdl_t mr,
727 723 tavor_pdhdl_t pd, ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new,
728 724 tavor_mr_options_t *op)
729 725 {
730 726 tavor_bind_info_t bind;
731 727 int status;
732 728
733 729 TAVOR_TNF_ENTER(tavor_mr_reregister);
734 730
735 731 /*
736 732 * Fill in the "bind" struct. This struct provides the majority
737 733 * of the information that will be used to distinguish between an
738 734 * "addr" binding (as is the case here) and a "buf" binding (see
739 735 * below). The "bind" struct is later passed to tavor_mr_mem_bind()
740 736 * which does most of the "heavy lifting" for the Tavor memory
741 737 * registration (and reregistration) routines.
742 738 */
743 739 bind.bi_type = TAVOR_BINDHDL_VADDR;
744 740 bind.bi_addr = mr_attr->mr_vaddr;
745 741 bind.bi_len = mr_attr->mr_len;
746 742 bind.bi_as = mr_attr->mr_as;
747 743 bind.bi_flags = mr_attr->mr_flags;
748 744 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
749 745 if (status != DDI_SUCCESS) {
750 746 TNF_PROBE_0(tavor_mr_reregister_cmnreg_fail,
751 747 TAVOR_TNF_ERROR, "");
752 748 TAVOR_TNF_EXIT(tavor_mr_reregister);
753 749 return (status);
754 750 }
755 751
756 752 TAVOR_TNF_EXIT(tavor_mr_reregister);
757 753 return (DDI_SUCCESS);
758 754 }
759 755
760 756
761 757 /*
762 758 * tavor_mr_reregister_buf()
763 759 * Context: Can be called from interrupt or base context.
764 760 */
765 761 int
766 762 tavor_mr_reregister_buf(tavor_state_t *state, tavor_mrhdl_t mr,
767 763 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf,
768 764 tavor_mrhdl_t *mrhdl_new, tavor_mr_options_t *op)
769 765 {
770 766 tavor_bind_info_t bind;
771 767 int status;
772 768
773 769 TAVOR_TNF_ENTER(tavor_mr_reregister_buf);
774 770
775 771 /*
776 772 * Fill in the "bind" struct. This struct provides the majority
777 773 * of the information that will be used to distinguish between an
778 774 * "addr" binding (see above) and a "buf" binding (as is the case
779 775 * here). The "bind" struct is later passed to tavor_mr_mem_bind()
780 776 * which does most of the "heavy lifting" for the Tavor memory
781 777 * registration routines. Note: We have chosen to provide
782 778 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
783 779 * not set). It is not critical what value we choose here as it need
784 780 * only be unique for the given RKey (which will happen by default),
785 781 * so the choice here is somewhat arbitrary.
786 782 */
787 783 bind.bi_type = TAVOR_BINDHDL_BUF;
788 784 bind.bi_buf = buf;
789 785 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
790 786 bind.bi_addr = mr_attr->mr_vaddr;
791 787 } else {
792 788 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr;
793 789 }
794 790 bind.bi_len = (uint64_t)buf->b_bcount;
795 791 bind.bi_flags = mr_attr->mr_flags;
796 792 bind.bi_as = NULL;
797 793 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
798 794 if (status != DDI_SUCCESS) {
799 795 TNF_PROBE_0(tavor_mr_reregister_buf_cmnreg_fail,
800 796 TAVOR_TNF_ERROR, "");
801 797 TAVOR_TNF_EXIT(tavor_mr_reregister_buf);
802 798 return (status);
803 799 }
804 800
805 801 TAVOR_TNF_EXIT(tavor_mr_reregister_buf);
806 802 return (DDI_SUCCESS);
807 803 }
808 804
809 805
810 806 /*
811 807 * tavor_mr_sync()
812 808 * Context: Can be called from interrupt or base context.
813 809 */
814 810 /* ARGSUSED */
815 811 int
816 812 tavor_mr_sync(tavor_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs)
817 813 {
818 814 tavor_mrhdl_t mrhdl;
819 815 uint64_t seg_vaddr, seg_len, seg_end;
820 816 uint64_t mr_start, mr_end;
821 817 uint_t type;
822 818 int status, i;
823 819 char *errormsg;
824 820
825 821 TAVOR_TNF_ENTER(tavor_mr_sync);
826 822
827 823 /* Process each of the ibt_mr_sync_t's */
828 824 for (i = 0; i < num_segs; i++) {
829 825 mrhdl = (tavor_mrhdl_t)mr_segs[i].ms_handle;
830 826
831 827 /* Check for valid memory region handle */
832 828 if (mrhdl == NULL) {
833 829 /* Set "status" and "errormsg" and goto failure */
834 830 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl");
835 831 goto mrsync_fail;
836 832 }
837 833
838 834 mutex_enter(&mrhdl->mr_lock);
839 835
840 836 /*
841 837 * Check here to see if the memory region has already been
842 838 * partially deregistered as a result of a
843 839 * tavor_umap_umemlock_cb() callback. If so, this is an
844 840 * error, return failure.
845 841 */
846 842 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
847 843 mutex_exit(&mrhdl->mr_lock);
848 844 /* Set "status" and "errormsg" and goto failure */
849 845 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl2");
850 846 goto mrsync_fail;
851 847 }
852 848
853 849 /* Check for valid bounds on sync request */
854 850 seg_vaddr = mr_segs[i].ms_vaddr;
855 851 seg_len = mr_segs[i].ms_len;
856 852 seg_end = seg_vaddr + seg_len - 1;
857 853 mr_start = mrhdl->mr_bindinfo.bi_addr;
858 854 mr_end = mr_start + mrhdl->mr_bindinfo.bi_len - 1;
859 855 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) {
860 856 mutex_exit(&mrhdl->mr_lock);
861 857 /* Set "status" and "errormsg" and goto failure */
862 858 TAVOR_TNF_FAIL(IBT_MR_VA_INVALID, "invalid vaddr");
863 859 goto mrsync_fail;
864 860 }
865 861 if ((seg_end < mr_start) || (seg_end > mr_end)) {
866 862 mutex_exit(&mrhdl->mr_lock);
867 863 /* Set "status" and "errormsg" and goto failure */
868 864 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length");
869 865 goto mrsync_fail;
870 866 }
871 867
872 868 /* Determine what type (i.e. direction) for sync */
873 869 if (mr_segs[i].ms_flags & IBT_SYNC_READ) {
874 870 type = DDI_DMA_SYNC_FORDEV;
875 871 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) {
876 872 type = DDI_DMA_SYNC_FORCPU;
877 873 } else {
878 874 mutex_exit(&mrhdl->mr_lock);
879 875 /* Set "status" and "errormsg" and goto failure */
880 876 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sync type");
881 877 goto mrsync_fail;
882 878 }
883 879
884 880 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl,
885 881 (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type);
886 882 mutex_exit(&mrhdl->mr_lock);
887 883 }
888 884
889 885 TAVOR_TNF_EXIT(tavor_mr_sync);
890 886 return (DDI_SUCCESS);
891 887
892 888 mrsync_fail:
893 889 TNF_PROBE_1(tavor_mr_sync_fail, TAVOR_TNF_ERROR, "", tnf_string, msg,
894 890 errormsg);
895 891 TAVOR_TNF_EXIT(tavor_mr_sync);
896 892 return (status);
897 893 }
898 894
899 895
900 896 /*
901 897 * tavor_mw_alloc()
902 898 * Context: Can be called from interrupt or base context.
903 899 */
904 900 int
905 901 tavor_mw_alloc(tavor_state_t *state, tavor_pdhdl_t pd, ibt_mw_flags_t flags,
906 902 tavor_mwhdl_t *mwhdl)
907 903 {
908 904 tavor_rsrc_t *mpt, *rsrc;
909 905 tavor_hw_mpt_t mpt_entry;
910 906 tavor_mwhdl_t mw;
911 907 uint_t sleep;
912 908 int status;
913 909 char *errormsg;
914 910
915 911 TAVOR_TNF_ENTER(tavor_mw_alloc);
916 912
917 913 /*
918 914 * Check the sleep flag. Ensure that it is consistent with the
919 915 * current thread context (i.e. if we are currently in the interrupt
920 916 * context, then we shouldn't be attempting to sleep).
921 917 */
922 918 sleep = (flags & IBT_MW_NOSLEEP) ? TAVOR_NOSLEEP : TAVOR_SLEEP;
923 919 if ((sleep == TAVOR_SLEEP) &&
924 920 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
925 921 /* Set "status" and "errormsg" and goto failure */
926 922 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags");
927 923 goto mwalloc_fail;
928 924 }
929 925
930 926 /* Increment the reference count on the protection domain (PD) */
931 927 tavor_pd_refcnt_inc(pd);
932 928
933 929 /*
934 930 * Allocate an MPT entry (for use as a memory window). Since the
935 931 * Tavor hardware uses the MPT entry for memory regions and for
936 932 * memory windows, we will fill in this MPT with all the necessary
937 933 * parameters for the memory window. And then (just as we do for
938 934 * memory regions) ownership will be passed to the hardware in the
939 935 * final step below. If we fail here, we must undo the protection
940 936 * domain reference count.
941 937 */
942 938 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
943 939 if (status != DDI_SUCCESS) {
944 940 /* Set "status" and "errormsg" and goto failure */
945 941 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
946 942 goto mwalloc_fail1;
947 943 }
948 944
949 945 /*
950 946 * Allocate the software structure for tracking the memory window (i.e.
951 947 * the Tavor Memory Window handle). Note: This is actually the same
952 948 * software structure used for tracking memory regions, but since many
953 949 * of the same properties are needed, only a single structure is
↓ open down ↓ |
272 lines elided |
↑ open up ↑ |
954 950 * necessary. If we fail here, we must undo the protection domain
955 951 * reference count and the previous resource allocation.
956 952 */
957 953 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
958 954 if (status != DDI_SUCCESS) {
959 955 /* Set "status" and "errormsg" and goto failure */
960 956 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
961 957 goto mwalloc_fail2;
962 958 }
963 959 mw = (tavor_mwhdl_t)rsrc->tr_addr;
964 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
965 960
966 961 /*
967 962 * Calculate an "unbound" RKey from MPT index. In much the same way
968 963 * as we do for memory regions (above), this key is constructed from
969 964 * a "constrained" (which depends on the MPT index) and an
970 965 * "unconstrained" portion (which may be arbitrarily chosen).
971 966 */
972 967 tavor_mr_keycalc(state, mpt->tr_indx, &mw->mr_rkey);
973 968
974 969 /*
975 970 * Fill in the MPT entry. This is the final step before passing
976 971 * ownership of the MPT entry to the Tavor hardware. We use all of
977 972 * the information collected/calculated above to fill in the
978 973 * requisite portions of the MPT. Note: fewer entries in the MPT
979 974 * entry are necessary to allocate a memory window.
980 975 */
981 976 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
982 977 mpt_entry.reg_win = TAVOR_MPT_IS_WINDOW;
983 978 mpt_entry.mem_key = mw->mr_rkey;
984 979 mpt_entry.pd = pd->pd_pdnum;
985 980
986 981 /*
987 982 * Write the MPT entry to hardware. Lastly, we pass ownership of
988 983 * the entry to the hardware. Note: in general, this operation
989 984 * shouldn't fail. But if it does, we have to undo everything we've
990 985 * done above before returning error.
991 986 */
992 987 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
993 988 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep);
994 989 if (status != TAVOR_CMD_SUCCESS) {
995 990 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n",
996 991 status);
997 992 TNF_PROBE_1(tavor_mw_alloc_sw2hw_mpt_cmd_fail,
998 993 TAVOR_TNF_ERROR, "", tnf_uint, status, status);
999 994 /* Set "status" and "errormsg" and goto failure */
1000 995 TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
1001 996 "tavor SW2HW_MPT command");
1002 997 goto mwalloc_fail3;
1003 998 }
1004 999
1005 1000 /*
1006 1001 * Fill in the rest of the Tavor Memory Window handle. Having
1007 1002 * successfully transferred ownership of the MPT, we can update the
1008 1003 * following fields for use in further operations on the MW.
1009 1004 */
1010 1005 mw->mr_mptrsrcp = mpt;
1011 1006 mw->mr_pdhdl = pd;
1012 1007 mw->mr_rsrcp = rsrc;
1013 1008 *mwhdl = mw;
1014 1009
1015 1010 TAVOR_TNF_EXIT(tavor_mw_alloc);
1016 1011 return (DDI_SUCCESS);
1017 1012
1018 1013 mwalloc_fail3:
1019 1014 tavor_rsrc_free(state, &rsrc);
1020 1015 mwalloc_fail2:
1021 1016 tavor_rsrc_free(state, &mpt);
1022 1017 mwalloc_fail1:
1023 1018 tavor_pd_refcnt_dec(pd);
1024 1019 mwalloc_fail:
1025 1020 TNF_PROBE_1(tavor_mw_alloc_fail, TAVOR_TNF_ERROR, "",
1026 1021 tnf_string, msg, errormsg);
1027 1022 TAVOR_TNF_EXIT(tavor_mw_alloc);
1028 1023 return (status);
1029 1024 }
1030 1025
1031 1026
1032 1027 /*
1033 1028 * tavor_mw_free()
1034 1029 * Context: Can be called from interrupt or base context.
1035 1030 */
1036 1031 int
1037 1032 tavor_mw_free(tavor_state_t *state, tavor_mwhdl_t *mwhdl, uint_t sleep)
1038 1033 {
1039 1034 tavor_rsrc_t *mpt, *rsrc;
1040 1035 tavor_mwhdl_t mw;
1041 1036 int status;
1042 1037 char *errormsg;
1043 1038 tavor_pdhdl_t pd;
1044 1039
1045 1040 TAVOR_TNF_ENTER(tavor_mw_free);
1046 1041
1047 1042 /*
1048 1043 * Check the sleep flag. Ensure that it is consistent with the
1049 1044 * current thread context (i.e. if we are currently in the interrupt
1050 1045 * context, then we shouldn't be attempting to sleep).
1051 1046 */
1052 1047 if ((sleep == TAVOR_SLEEP) &&
1053 1048 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1054 1049 /* Set "status" and "errormsg" and goto failure */
1055 1050 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags");
1056 1051 TNF_PROBE_1(tavor_mw_free_fail, TAVOR_TNF_ERROR, "",
1057 1052 tnf_string, msg, errormsg);
1058 1053 TAVOR_TNF_EXIT(tavor_mw_free);
1059 1054 return (status);
1060 1055 }
1061 1056
1062 1057 /*
↓ open down ↓ |
88 lines elided |
↑ open up ↑ |
1063 1058 * Pull all the necessary information from the Tavor Memory Window
1064 1059 * handle. This is necessary here because the resource for the
1065 1060 * MW handle is going to be freed up as part of the this operation.
1066 1061 */
1067 1062 mw = *mwhdl;
1068 1063 mutex_enter(&mw->mr_lock);
1069 1064 mpt = mw->mr_mptrsrcp;
1070 1065 rsrc = mw->mr_rsrcp;
1071 1066 pd = mw->mr_pdhdl;
1072 1067 mutex_exit(&mw->mr_lock);
1073 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1074 1068
1075 1069 /*
1076 1070 * Reclaim the MPT entry from hardware. Note: in general, it is
1077 1071 * unexpected for this operation to return an error.
1078 1072 */
1079 1073 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
1080 1074 0, mpt->tr_indx, sleep);
1081 1075 if (status != TAVOR_CMD_SUCCESS) {
1082 1076 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: %08x\n",
1083 1077 status);
1084 1078 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, TAVOR_TNF_ERROR, "",
1085 1079 tnf_uint, status, status);
1086 1080 TAVOR_TNF_EXIT(tavor_mw_free);
1087 1081 return (IBT_INVALID_PARAM);
1088 1082 }
1089 1083
1090 1084 /* Free the Tavor Memory Window handle */
1091 1085 tavor_rsrc_free(state, &rsrc);
1092 1086
1093 1087 /* Free up the MPT entry resource */
1094 1088 tavor_rsrc_free(state, &mpt);
1095 1089
1096 1090 /* Decrement the reference count on the protection domain (PD) */
1097 1091 tavor_pd_refcnt_dec(pd);
1098 1092
1099 1093 /* Set the mwhdl pointer to NULL and return success */
1100 1094 *mwhdl = NULL;
1101 1095
1102 1096 TAVOR_TNF_EXIT(tavor_mw_free);
1103 1097 return (DDI_SUCCESS);
1104 1098 }
1105 1099
1106 1100
1107 1101 /*
1108 1102 * tavor_mr_keycalc()
1109 1103 * Context: Can be called from interrupt or base context.
1110 1104 */
1111 1105 void
1112 1106 tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key)
1113 1107 {
1114 1108 uint32_t tmp, log_num_mpt;
1115 1109
1116 1110 /*
1117 1111 * Generate a simple key from counter. Note: We increment this
↓ open down ↓ |
34 lines elided |
↑ open up ↑ |
1118 1112 * static variable _intentionally_ without any kind of mutex around
1119 1113 * it. First, single-threading all operations through a single lock
1120 1114 * would be a bad idea (from a performance point-of-view). Second,
1121 1115 * the upper "unconstrained" bits don't really have to be unique
1122 1116 * because the lower bits are guaranteed to be (although we do make a
1123 1117 * best effort to ensure that they are). Third, the window for the
1124 1118 * race (where both threads read and update the counter at the same
1125 1119 * time) is incredibly small.
1126 1120 * And, lastly, we'd like to make this into a "random" key XXX
1127 1121 */
1128 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(tavor_debug_memkey_cnt))
1129 1122 log_num_mpt = state->ts_cfg_profile->cp_log_num_mpt;
1130 1123 tmp = (tavor_debug_memkey_cnt++) << log_num_mpt;
1131 1124 *key = tmp | indx;
1132 1125 }
1133 1126
1134 1127
1135 1128 /*
1136 1129 * tavor_mr_common_reg()
1137 1130 * Context: Can be called from interrupt or base context.
1138 1131 */
1139 1132 static int
1140 1133 tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd,
1141 1134 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op)
1142 1135 {
1143 1136 tavor_rsrc_pool_info_t *rsrc_pool;
1144 1137 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt;
1145 1138 tavor_umap_db_entry_t *umapdb;
1146 1139 tavor_sw_refcnt_t *swrc_tmp;
1147 1140 tavor_hw_mpt_t mpt_entry;
1148 1141 tavor_mrhdl_t mr;
1149 1142 ibt_mr_flags_t flags;
1150 1143 tavor_bind_info_t *bh;
1151 1144 ddi_dma_handle_t bind_dmahdl;
1152 1145 ddi_umem_cookie_t umem_cookie;
1153 1146 size_t umem_len;
1154 1147 caddr_t umem_addr;
1155 1148 uint64_t mtt_addr, mtt_ddrbaseaddr, max_sz;
1156 1149 uint_t sleep, mtt_pgsize_bits, bind_type, mr_is_umem;
1157 1150 int status, umem_flags, bind_override_addr;
1158 1151 char *errormsg;
1159 1152
1160 1153 TAVOR_TNF_ENTER(tavor_mr_common_reg);
1161 1154
1162 1155 /*
1163 1156 * Check the "options" flag. Currently this flag tells the driver
1164 1157 * whether or not the region should be bound normally (i.e. with
1165 1158 * entries written into the PCI IOMMU), whether it should be
1166 1159 * registered to bypass the IOMMU, and whether or not the resulting
1167 1160 * address should be "zero-based" (to aid the alignment restrictions
1168 1161 * for QPs).
1169 1162 */
1170 1163 if (op == NULL) {
1171 1164 bind_type = TAVOR_BINDMEM_NORMAL;
1172 1165 bind_dmahdl = NULL;
1173 1166 bind_override_addr = 0;
1174 1167 } else {
1175 1168 bind_type = op->mro_bind_type;
1176 1169 bind_dmahdl = op->mro_bind_dmahdl;
1177 1170 bind_override_addr = op->mro_bind_override_addr;
1178 1171 }
1179 1172
1180 1173 /* Extract the flags field from the tavor_bind_info_t */
1181 1174 flags = bind->bi_flags;
1182 1175
1183 1176 /*
1184 1177 * Check for invalid length. Check is the length is zero or if the
1185 1178 * length is larger than the maximum configured value. Return error
1186 1179 * if it is.
1187 1180 */
1188 1181 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz);
1189 1182 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
1190 1183 /* Set "status" and "errormsg" and goto failure */
1191 1184 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length");
1192 1185 goto mrcommon_fail;
1193 1186 }
1194 1187
1195 1188 /*
1196 1189 * Check the sleep flag. Ensure that it is consistent with the
1197 1190 * current thread context (i.e. if we are currently in the interrupt
1198 1191 * context, then we shouldn't be attempting to sleep).
1199 1192 */
1200 1193 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP;
1201 1194 if ((sleep == TAVOR_SLEEP) &&
1202 1195 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1203 1196 /* Set "status" and "errormsg" and goto failure */
1204 1197 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags");
1205 1198 goto mrcommon_fail;
1206 1199 }
1207 1200
1208 1201 /*
1209 1202 * Get the base address for the MTT table. This will be necessary
1210 1203 * below when we are setting up the MPT entry.
1211 1204 */
1212 1205 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT];
1213 1206 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset;
1214 1207
1215 1208 /* Increment the reference count on the protection domain (PD) */
1216 1209 tavor_pd_refcnt_inc(pd);
1217 1210
1218 1211 /*
1219 1212 * Allocate an MPT entry. This will be filled in with all the
1220 1213 * necessary parameters to define the memory region. And then
1221 1214 * ownership will be passed to the hardware in the final step
1222 1215 * below. If we fail here, we must undo the protection domain
1223 1216 * reference count.
1224 1217 */
1225 1218 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
1226 1219 if (status != DDI_SUCCESS) {
1227 1220 /* Set "status" and "errormsg" and goto failure */
1228 1221 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
1229 1222 goto mrcommon_fail1;
1230 1223 }
1231 1224
1232 1225 /*
1233 1226 * Allocate the software structure for tracking the memory region (i.e.
1234 1227 * the Tavor Memory Region handle). If we fail here, we must undo
↓ open down ↓ |
96 lines elided |
↑ open up ↑ |
1235 1228 * the protection domain reference count and the previous resource
1236 1229 * allocation.
1237 1230 */
1238 1231 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
1239 1232 if (status != DDI_SUCCESS) {
1240 1233 /* Set "status" and "errormsg" and goto failure */
1241 1234 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
1242 1235 goto mrcommon_fail2;
1243 1236 }
1244 1237 mr = (tavor_mrhdl_t)rsrc->tr_addr;
1245 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1246 1238
1247 1239 /*
1248 1240 * Setup and validate the memory region access flags. This means
1249 1241 * translating the IBTF's enable flags into the access flags that
1250 1242 * will be used in later operations.
1251 1243 */
1252 1244 mr->mr_accflag = 0;
1253 1245 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1254 1246 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1255 1247 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1256 1248 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1257 1249 if (flags & IBT_MR_ENABLE_REMOTE_READ)
1258 1250 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1259 1251 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1260 1252 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1261 1253 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1262 1254 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1263 1255
1264 1256 /*
1265 1257 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
1266 1258 * from a certain number of "constrained" bits (the least significant
1267 1259 * bits) and some number of "unconstrained" bits. The constrained
1268 1260 * bits must be set to the index of the entry in the MPT table, but
1269 1261 * the unconstrained bits can be set to any value we wish. Note:
1270 1262 * if no remote access is required, then the RKey value is not filled
1271 1263 * in. Otherwise both Rkey and LKey are given the same value.
1272 1264 */
1273 1265 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey);
1274 1266 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
1275 1267 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
1276 1268 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
1277 1269 mr->mr_rkey = mr->mr_lkey;
1278 1270 }
1279 1271
1280 1272 /*
1281 1273 * Determine if the memory is from userland and pin the pages
1282 1274 * with umem_lockmemory() if necessary.
1283 1275 * Then, if this is userland memory, allocate an entry in the
1284 1276 * "userland resources database". This will later be added to
1285 1277 * the database (after all further memory registration operations are
1286 1278 * successful). If we fail here, we must undo the reference counts
1287 1279 * and the previous resource allocations.
1288 1280 */
1289 1281 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
1290 1282 if (mr_is_umem) {
1291 1283 umem_len = ptob(btopr(bind->bi_len +
1292 1284 ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
1293 1285 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
↓ open down ↓ |
38 lines elided |
↑ open up ↑ |
1294 1286 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
1295 1287 DDI_UMEMLOCK_LONGTERM);
1296 1288 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
1297 1289 &umem_cookie, &tavor_umem_cbops, NULL);
1298 1290 if (status != 0) {
1299 1291 /* Set "status" and "errormsg" and goto failure */
1300 1292 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin");
1301 1293 goto mrcommon_fail3;
1302 1294 }
1303 1295
1304 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1305 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1306 -
1307 1296 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
1308 1297 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
1309 1298 if (bind->bi_buf == NULL) {
1310 1299 /* Set "status" and "errormsg" and goto failure */
1311 1300 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed iosetup");
1312 1301 goto mrcommon_fail3;
1313 1302 }
1314 1303 bind->bi_type = TAVOR_BINDHDL_UBUF;
1315 1304 bind->bi_buf->b_flags |= B_READ;
1316 1305
1317 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1318 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1319 -
1320 1306 umapdb = tavor_umap_db_alloc(state->ts_instance,
1321 1307 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
1322 1308 (uint64_t)(uintptr_t)rsrc);
1323 1309 if (umapdb == NULL) {
1324 1310 /* Set "status" and "errormsg" and goto failure */
1325 1311 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
1326 1312 goto mrcommon_fail4;
1327 1313 }
1328 1314 }
1329 1315
1330 1316 /*
1331 1317 * Setup the bindinfo for the mtt bind call
1332 1318 */
1333 1319 bh = &mr->mr_bindinfo;
1334 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh))
1335 1320 bcopy(bind, bh, sizeof (tavor_bind_info_t));
1336 1321 bh->bi_bypass = bind_type;
1337 1322 status = tavor_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
1338 1323 &mtt_pgsize_bits);
1339 1324 if (status != DDI_SUCCESS) {
1340 1325 /* Set "status" and "errormsg" and goto failure */
1341 1326 TAVOR_TNF_FAIL(status, "failed mtt bind");
1342 1327 /*
1343 1328 * When mtt_bind fails, freerbuf has already been done,
1344 1329 * so make sure not to call it again.
1345 1330 */
1346 1331 bind->bi_type = bh->bi_type;
1347 1332 goto mrcommon_fail5;
1348 1333 }
1349 1334 mr->mr_logmttpgsz = mtt_pgsize_bits;
1350 1335
1351 1336 /*
1352 1337 * Allocate MTT reference count (to track shared memory regions).
1353 1338 * This reference count resource may never be used on the given
1354 1339 * memory region, but if it is ever later registered as "shared"
1355 1340 * memory region then this resource will be necessary. If we fail
1356 1341 * here, we do pretty much the same as above to clean up.
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
1357 1342 */
1358 1343 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, sleep,
1359 1344 &mtt_refcnt);
1360 1345 if (status != DDI_SUCCESS) {
1361 1346 /* Set "status" and "errormsg" and goto failure */
1362 1347 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed refence count");
1363 1348 goto mrcommon_fail6;
1364 1349 }
1365 1350 mr->mr_mttrefcntp = mtt_refcnt;
1366 1351 swrc_tmp = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr;
1367 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
1368 1352 TAVOR_MTT_REFCNT_INIT(swrc_tmp);
1369 1353
1370 1354 /*
1371 1355 * Fill in the MPT entry. This is the final step before passing
1372 1356 * ownership of the MPT entry to the Tavor hardware. We use all of
1373 1357 * the information collected/calculated above to fill in the
1374 1358 * requisite portions of the MPT.
1375 1359 */
1376 1360 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
1377 1361 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE;
1378 1362 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0;
1379 1363 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1380 1364 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
1381 1365 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
1382 1366 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
1383 1367 mpt_entry.lr = 1;
1384 1368 mpt_entry.reg_win = TAVOR_MPT_IS_REGION;
1385 1369 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC;
1386 1370 mpt_entry.mem_key = mr->mr_lkey;
1387 1371 mpt_entry.pd = pd->pd_pdnum;
1388 1372 if (bind_override_addr == 0) {
1389 1373 mpt_entry.start_addr = bh->bi_addr;
1390 1374 } else {
1391 1375 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1);
1392 1376 mpt_entry.start_addr = bh->bi_addr;
1393 1377 }
1394 1378 mpt_entry.reg_win_len = bh->bi_len;
1395 1379 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND;
1396 1380 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT);
1397 1381 mpt_entry.mttseg_addr_h = mtt_addr >> 32;
1398 1382 mpt_entry.mttseg_addr_l = mtt_addr >> 6;
1399 1383
1400 1384 /*
1401 1385 * Write the MPT entry to hardware. Lastly, we pass ownership of
1402 1386 * the entry to the hardware. Note: in general, this operation
1403 1387 * shouldn't fail. But if it does, we have to undo everything we've
1404 1388 * done above before returning error.
1405 1389 */
1406 1390 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1407 1391 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep);
1408 1392 if (status != TAVOR_CMD_SUCCESS) {
1409 1393 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n",
1410 1394 status);
1411 1395 TNF_PROBE_1(tavor_mr_common_reg_sw2hw_mpt_cmd_fail,
1412 1396 TAVOR_TNF_ERROR, "", tnf_uint, status, status);
1413 1397 /* Set "status" and "errormsg" and goto failure */
1414 1398 TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
1415 1399 "tavor SW2HW_MPT command");
1416 1400 goto mrcommon_fail7;
1417 1401 }
1418 1402
1419 1403 /*
1420 1404 * Fill in the rest of the Tavor Memory Region handle. Having
1421 1405 * successfully transferred ownership of the MPT, we can update the
1422 1406 * following fields for use in further operations on the MR.
1423 1407 */
1424 1408 mr->mr_mptrsrcp = mpt;
1425 1409 mr->mr_mttrsrcp = mtt;
1426 1410 mr->mr_pdhdl = pd;
1427 1411 mr->mr_rsrcp = rsrc;
1428 1412 mr->mr_is_umem = mr_is_umem;
1429 1413 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
1430 1414 mr->mr_umem_cbfunc = NULL;
1431 1415 mr->mr_umem_cbarg1 = NULL;
1432 1416 mr->mr_umem_cbarg2 = NULL;
1433 1417
1434 1418 /*
1435 1419 * If this is userland memory, then we need to insert the previously
1436 1420 * allocated entry into the "userland resources database". This will
1437 1421 * allow for later coordination between the tavor_umap_umemlock_cb()
1438 1422 * callback and tavor_mr_deregister().
1439 1423 */
1440 1424 if (mr_is_umem) {
1441 1425 tavor_umap_db_add(umapdb);
1442 1426 }
1443 1427
1444 1428 *mrhdl = mr;
1445 1429
1446 1430 TAVOR_TNF_EXIT(tavor_mr_common_reg);
1447 1431 return (DDI_SUCCESS);
1448 1432
1449 1433 /*
1450 1434 * The following is cleanup for all possible failure cases in this routine
1451 1435 */
1452 1436 mrcommon_fail7:
1453 1437 tavor_rsrc_free(state, &mtt_refcnt);
1454 1438 mrcommon_fail6:
1455 1439 tavor_rsrc_free(state, &mtt);
1456 1440 tavor_mr_mem_unbind(state, bh);
1457 1441 bind->bi_type = bh->bi_type;
1458 1442 mrcommon_fail5:
1459 1443 if (mr_is_umem) {
↓ open down ↓ |
82 lines elided |
↑ open up ↑ |
1460 1444 tavor_umap_db_free(umapdb);
1461 1445 }
1462 1446 mrcommon_fail4:
1463 1447 if (mr_is_umem) {
1464 1448 /*
1465 1449 * Free up the memory ddi_umem_iosetup() allocates
1466 1450 * internally.
1467 1451 */
1468 1452 if (bind->bi_type == TAVOR_BINDHDL_UBUF) {
1469 1453 freerbuf(bind->bi_buf);
1470 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1471 1454 bind->bi_type = TAVOR_BINDHDL_NONE;
1472 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1473 1455 }
1474 1456 ddi_umem_unlock(umem_cookie);
1475 1457 }
1476 1458 mrcommon_fail3:
1477 1459 tavor_rsrc_free(state, &rsrc);
1478 1460 mrcommon_fail2:
1479 1461 tavor_rsrc_free(state, &mpt);
1480 1462 mrcommon_fail1:
1481 1463 tavor_pd_refcnt_dec(pd);
1482 1464 mrcommon_fail:
1483 1465 TNF_PROBE_1(tavor_mr_common_reg_fail, TAVOR_TNF_ERROR, "",
1484 1466 tnf_string, msg, errormsg);
1485 1467 TAVOR_TNF_EXIT(tavor_mr_common_reg);
1486 1468 return (status);
1487 1469 }
1488 1470
1489 1471 int
1490 1472 tavor_dma_mr_register(tavor_state_t *state, tavor_pdhdl_t pd,
1491 1473 ibt_dmr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl)
1492 1474 {
1493 1475 tavor_rsrc_t *mpt, *rsrc;
1494 1476 tavor_hw_mpt_t mpt_entry;
1495 1477 tavor_mrhdl_t mr;
1496 1478 ibt_mr_flags_t flags;
1497 1479 uint_t sleep;
1498 1480 int status;
1499 1481
1500 1482 /* Extract the flags field */
1501 1483 flags = mr_attr->dmr_flags;
1502 1484
1503 1485 /*
1504 1486 * Check the sleep flag. Ensure that it is consistent with the
1505 1487 * current thread context (i.e. if we are currently in the interrupt
1506 1488 * context, then we shouldn't be attempting to sleep).
1507 1489 */
1508 1490 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP;
1509 1491 if ((sleep == TAVOR_SLEEP) &&
1510 1492 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1511 1493 status = IBT_INVALID_PARAM;
1512 1494 goto mrcommon_fail;
1513 1495 }
1514 1496
1515 1497 /* Increment the reference count on the protection domain (PD) */
1516 1498 tavor_pd_refcnt_inc(pd);
1517 1499
1518 1500 /*
1519 1501 * Allocate an MPT entry. This will be filled in with all the
1520 1502 * necessary parameters to define the memory region. And then
1521 1503 * ownership will be passed to the hardware in the final step
1522 1504 * below. If we fail here, we must undo the protection domain
1523 1505 * reference count.
1524 1506 */
1525 1507 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
1526 1508 if (status != DDI_SUCCESS) {
1527 1509 status = IBT_INSUFF_RESOURCE;
1528 1510 goto mrcommon_fail1;
1529 1511 }
1530 1512
1531 1513 /*
1532 1514 * Allocate the software structure for tracking the memory region (i.e.
↓ open down ↓ |
50 lines elided |
↑ open up ↑ |
1533 1515 * the Tavor Memory Region handle). If we fail here, we must undo
1534 1516 * the protection domain reference count and the previous resource
1535 1517 * allocation.
1536 1518 */
1537 1519 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
1538 1520 if (status != DDI_SUCCESS) {
1539 1521 status = IBT_INSUFF_RESOURCE;
1540 1522 goto mrcommon_fail2;
1541 1523 }
1542 1524 mr = (tavor_mrhdl_t)rsrc->tr_addr;
1543 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1544 1525 bzero(mr, sizeof (*mr));
1545 1526
1546 1527 /*
1547 1528 * Setup and validate the memory region access flags. This means
1548 1529 * translating the IBTF's enable flags into the access flags that
1549 1530 * will be used in later operations.
1550 1531 */
1551 1532 mr->mr_accflag = 0;
1552 1533 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1553 1534 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1554 1535 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1555 1536 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1556 1537 if (flags & IBT_MR_ENABLE_REMOTE_READ)
1557 1538 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1558 1539 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1559 1540 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1560 1541 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1561 1542 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1562 1543
1563 1544 /*
1564 1545 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
1565 1546 * from a certain number of "constrained" bits (the least significant
1566 1547 * bits) and some number of "unconstrained" bits. The constrained
1567 1548 * bits must be set to the index of the entry in the MPT table, but
1568 1549 * the unconstrained bits can be set to any value we wish. Note:
1569 1550 * if no remote access is required, then the RKey value is not filled
1570 1551 * in. Otherwise both Rkey and LKey are given the same value.
1571 1552 */
1572 1553 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey);
1573 1554 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
1574 1555 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
1575 1556 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
1576 1557 mr->mr_rkey = mr->mr_lkey;
1577 1558 }
1578 1559
1579 1560 /*
1580 1561 * Fill in the MPT entry. This is the final step before passing
1581 1562 * ownership of the MPT entry to the Tavor hardware. We use all of
1582 1563 * the information collected/calculated above to fill in the
1583 1564 * requisite portions of the MPT.
1584 1565 */
1585 1566 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
1586 1567
1587 1568 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE;
1588 1569 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0;
1589 1570 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1590 1571 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
1591 1572 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
1592 1573 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
1593 1574 mpt_entry.lr = 1;
1594 1575 mpt_entry.phys_addr = 1; /* critical bit for this */
1595 1576 mpt_entry.reg_win = TAVOR_MPT_IS_REGION;
1596 1577
1597 1578 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC;
1598 1579 mpt_entry.mem_key = mr->mr_lkey;
1599 1580 mpt_entry.pd = pd->pd_pdnum;
1600 1581 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND;
1601 1582
1602 1583 mpt_entry.start_addr = mr_attr->dmr_paddr;
1603 1584 mpt_entry.reg_win_len = mr_attr->dmr_len;
1604 1585
1605 1586 mpt_entry.mttseg_addr_h = 0;
1606 1587 mpt_entry.mttseg_addr_l = 0;
1607 1588
1608 1589 /*
1609 1590 * Write the MPT entry to hardware. Lastly, we pass ownership of
1610 1591 * the entry to the hardware if needed. Note: in general, this
1611 1592 * operation shouldn't fail. But if it does, we have to undo
1612 1593 * everything we've done above before returning error.
1613 1594 *
1614 1595 * For Tavor, this routine (which is common to the contexts) will only
1615 1596 * set the ownership if needed - the process of passing the context
1616 1597 * itself to HW will take care of setting up the MPT (based on type
1617 1598 * and index).
1618 1599 */
1619 1600
1620 1601 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1621 1602 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep);
1622 1603 if (status != TAVOR_CMD_SUCCESS) {
1623 1604 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n",
1624 1605 status);
1625 1606 status = ibc_get_ci_failure(0);
1626 1607 goto mrcommon_fail7;
1627 1608 }
1628 1609
1629 1610 /*
1630 1611 * Fill in the rest of the Tavor Memory Region handle. Having
1631 1612 * successfully transferred ownership of the MPT, we can update the
1632 1613 * following fields for use in further operations on the MR.
1633 1614 */
1634 1615 mr->mr_mptrsrcp = mpt;
1635 1616 mr->mr_mttrsrcp = NULL;
1636 1617 mr->mr_pdhdl = pd;
1637 1618 mr->mr_rsrcp = rsrc;
1638 1619 mr->mr_is_umem = 0;
1639 1620 mr->mr_umemcookie = NULL;
1640 1621 mr->mr_umem_cbfunc = NULL;
1641 1622 mr->mr_umem_cbarg1 = NULL;
1642 1623 mr->mr_umem_cbarg2 = NULL;
1643 1624
1644 1625 *mrhdl = mr;
1645 1626
1646 1627 return (DDI_SUCCESS);
1647 1628
1648 1629 /*
1649 1630 * The following is cleanup for all possible failure cases in this routine
1650 1631 */
1651 1632 mrcommon_fail7:
1652 1633 tavor_rsrc_free(state, &rsrc);
1653 1634 mrcommon_fail2:
1654 1635 tavor_rsrc_free(state, &mpt);
1655 1636 mrcommon_fail1:
1656 1637 tavor_pd_refcnt_dec(pd);
1657 1638 mrcommon_fail:
1658 1639 return (status);
1659 1640 }
1660 1641
1661 1642 /*
1662 1643 * tavor_mr_mtt_bind()
1663 1644 * Context: Can be called from interrupt or base context.
1664 1645 */
1665 1646 int
1666 1647 tavor_mr_mtt_bind(tavor_state_t *state, tavor_bind_info_t *bind,
1667 1648 ddi_dma_handle_t bind_dmahdl, tavor_rsrc_t **mtt, uint_t *mtt_pgsize_bits)
1668 1649 {
1669 1650 uint64_t nummtt;
1670 1651 uint_t sleep;
1671 1652 int status;
1672 1653 char *errormsg;
1673 1654
1674 1655 TAVOR_TNF_ENTER(tavor_mr_common_reg);
1675 1656
1676 1657 /*
1677 1658 * Check the sleep flag. Ensure that it is consistent with the
1678 1659 * current thread context (i.e. if we are currently in the interrupt
1679 1660 * context, then we shouldn't be attempting to sleep).
1680 1661 */
1681 1662 sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP;
1682 1663 if ((sleep == TAVOR_SLEEP) &&
1683 1664 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1684 1665 /* Set "status" and "errormsg" and goto failure */
1685 1666 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags");
1686 1667 goto mrmttbind_fail;
1687 1668 }
1688 1669
1689 1670 /*
1690 1671 * Bind the memory and determine the mapped addresses. This is
1691 1672 * the first of two routines that do all the "heavy lifting" for
1692 1673 * the Tavor memory registration routines. The tavor_mr_mem_bind()
1693 1674 * routine takes the "bind" struct with all its fields filled
1694 1675 * in and returns a list of DMA cookies (for the PCI mapped addresses
1695 1676 * corresponding to the specified address region) which are used by
1696 1677 * the tavor_mr_fast_mtt_write() routine below. If we fail here, we
1697 1678 * must undo all the previous resource allocation (and PD reference
1698 1679 * count).
1699 1680 */
1700 1681 status = tavor_mr_mem_bind(state, bind, bind_dmahdl, sleep);
1701 1682 if (status != DDI_SUCCESS) {
1702 1683 /* Set "status" and "errormsg" and goto failure */
1703 1684 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind");
1704 1685 goto mrmttbind_fail;
1705 1686 }
1706 1687
1707 1688 /*
1708 1689 * Determine number of pages spanned. This routine uses the
1709 1690 * information in the "bind" struct to determine the required
1710 1691 * number of MTT entries needed (and returns the suggested page size -
1711 1692 * as a "power-of-2" - for each MTT entry).
1712 1693 */
1713 1694 nummtt = tavor_mr_nummtt_needed(state, bind, mtt_pgsize_bits);
1714 1695
1715 1696 /*
1716 1697 * Allocate the MTT entries. Use the calculations performed above to
1717 1698 * allocate the required number of MTT entries. Note: MTT entries are
1718 1699 * allocated in "MTT segments" which consist of complete cachelines
1719 1700 * (i.e. 8 entries, 16 entries, etc.) So the TAVOR_NUMMTT_TO_MTTSEG()
1720 1701 * macro is used to do the proper conversion. If we fail here, we
1721 1702 * must not only undo all the previous resource allocation (and PD
1722 1703 * reference count), but we must also unbind the memory.
1723 1704 */
1724 1705 status = tavor_rsrc_alloc(state, TAVOR_MTT,
1725 1706 TAVOR_NUMMTT_TO_MTTSEG(nummtt), sleep, mtt);
1726 1707 if (status != DDI_SUCCESS) {
1727 1708 /* Set "status" and "errormsg" and goto failure */
1728 1709 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT");
1729 1710 goto mrmttbind_fail2;
1730 1711 }
1731 1712
1732 1713 /*
1733 1714 * Write the mapped addresses into the MTT entries. This is part two
1734 1715 * of the "heavy lifting" routines that we talked about above. Note:
1735 1716 * we pass the suggested page size from the earlier operation here.
1736 1717 * And if we fail here, we again do pretty much the same huge clean up.
1737 1718 */
1738 1719 status = tavor_mr_fast_mtt_write(*mtt, bind, *mtt_pgsize_bits);
1739 1720 if (status != DDI_SUCCESS) {
1740 1721 /* Set "status" and "errormsg" and goto failure */
1741 1722 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed write mtt");
1742 1723 goto mrmttbind_fail3;
1743 1724 }
1744 1725 TAVOR_TNF_EXIT(tavor_mr_mtt_bind);
1745 1726 return (DDI_SUCCESS);
1746 1727
1747 1728 /*
1748 1729 * The following is cleanup for all possible failure cases in this routine
1749 1730 */
1750 1731 mrmttbind_fail3:
1751 1732 tavor_rsrc_free(state, mtt);
1752 1733 mrmttbind_fail2:
1753 1734 tavor_mr_mem_unbind(state, bind);
1754 1735 mrmttbind_fail:
1755 1736 TNF_PROBE_1(tavor_mr_mtt_bind_fail, TAVOR_TNF_ERROR, "",
1756 1737 tnf_string, msg, errormsg);
1757 1738 TAVOR_TNF_EXIT(tavor_mr_mtt_bind);
1758 1739 return (status);
1759 1740 }
1760 1741
1761 1742
1762 1743 /*
1763 1744 * tavor_mr_mtt_unbind()
1764 1745 * Context: Can be called from interrupt or base context.
1765 1746 */
1766 1747 int
1767 1748 tavor_mr_mtt_unbind(tavor_state_t *state, tavor_bind_info_t *bind,
1768 1749 tavor_rsrc_t *mtt)
1769 1750 {
1770 1751 TAVOR_TNF_ENTER(tavor_mr_mtt_unbind);
1771 1752
1772 1753 /*
1773 1754 * Free up the MTT entries and unbind the memory. Here, as above, we
1774 1755 * attempt to free these resources only if it is appropriate to do so.
1775 1756 */
1776 1757 tavor_mr_mem_unbind(state, bind);
1777 1758 tavor_rsrc_free(state, &mtt);
1778 1759
1779 1760 TAVOR_TNF_EXIT(tavor_mr_mtt_unbind);
1780 1761 return (DDI_SUCCESS);
1781 1762 }
1782 1763
1783 1764
1784 1765 /*
1785 1766 * tavor_mr_common_rereg()
1786 1767 * Context: Can be called from interrupt or base context.
1787 1768 */
1788 1769 static int
1789 1770 tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr,
1790 1771 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new,
1791 1772 tavor_mr_options_t *op)
1792 1773 {
1793 1774 tavor_rsrc_t *mpt;
1794 1775 ibt_mr_attr_flags_t acc_flags_to_use;
↓ open down ↓ |
241 lines elided |
↑ open up ↑ |
1795 1776 ibt_mr_flags_t flags;
1796 1777 tavor_pdhdl_t pd_to_use;
1797 1778 tavor_hw_mpt_t mpt_entry;
1798 1779 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use;
1799 1780 uint_t sleep, dereg_level;
1800 1781 int status;
1801 1782 char *errormsg;
1802 1783
1803 1784 TAVOR_TNF_ENTER(tavor_mr_common_rereg);
1804 1785
1805 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1806 -
1807 1786 /*
1808 1787 * Check here to see if the memory region corresponds to a userland
1809 1788 * mapping. Reregistration of userland memory regions is not
1810 1789 * currently supported. Return failure. XXX
1811 1790 */
1812 1791 if (mr->mr_is_umem) {
1813 1792 /* Set "status" and "errormsg" and goto failure */
1814 1793 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl");
1815 1794 goto mrrereg_fail;
1816 1795 }
1817 1796
1818 1797 mutex_enter(&mr->mr_lock);
1819 1798
1820 1799 /* Pull MPT resource pointer from the Tavor Memory Region handle */
1821 1800 mpt = mr->mr_mptrsrcp;
1822 1801
1823 1802 /* Extract the flags field from the tavor_bind_info_t */
1824 1803 flags = bind->bi_flags;
1825 1804
1826 1805 /*
1827 1806 * Check the sleep flag. Ensure that it is consistent with the
1828 1807 * current thread context (i.e. if we are currently in the interrupt
1829 1808 * context, then we shouldn't be attempting to sleep).
1830 1809 */
1831 1810 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP;
1832 1811 if ((sleep == TAVOR_SLEEP) &&
1833 1812 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1834 1813 mutex_exit(&mr->mr_lock);
1835 1814 /* Set "status" and "errormsg" and goto failure */
1836 1815 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags");
1837 1816 goto mrrereg_fail;
1838 1817 }
1839 1818
1840 1819 /*
1841 1820 * First step is to temporarily invalidate the MPT entry. This
1842 1821 * regains ownership from the hardware, and gives us the opportunity
1843 1822 * to modify the entry. Note: The HW2SW_MPT command returns the
1844 1823 * current MPT entry contents. These are saved away here because
1845 1824 * they will be reused in a later step below. If the region has
1846 1825 * bound memory windows that we fail returning an "in use" error code.
1847 1826 * Otherwise, this is an unexpected error and we deregister the
1848 1827 * memory region and return error.
1849 1828 *
1850 1829 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect
1851 1830 * against holding the lock around this rereg call in all contexts.
1852 1831 */
1853 1832 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry,
1854 1833 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN);
1855 1834 if (status != TAVOR_CMD_SUCCESS) {
1856 1835 mutex_exit(&mr->mr_lock);
1857 1836 if (status == TAVOR_CMD_REG_BOUND) {
1858 1837 TAVOR_TNF_EXIT(tavor_mr_common_rereg);
1859 1838 return (IBT_MR_IN_USE);
1860 1839 } else {
1861 1840 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: "
1862 1841 "%08x\n", status);
1863 1842
1864 1843 /*
1865 1844 * Call deregister and ensure that all current
1866 1845 * resources get freed up
1867 1846 */
1868 1847 if (tavor_mr_deregister(state, &mr,
1869 1848 TAVOR_MR_DEREG_ALL, sleep) != DDI_SUCCESS) {
1870 1849 TAVOR_WARNING(state, "failed to deregister "
1871 1850 "memory region");
1872 1851 }
1873 1852 TNF_PROBE_1(tavor_mr_common_rereg_hw2sw_mpt_cmd_fail,
1874 1853 TAVOR_TNF_ERROR, "", tnf_uint, status, status);
1875 1854 TAVOR_TNF_EXIT(tavor_mr_common_rereg);
1876 1855 return (ibc_get_ci_failure(0));
1877 1856 }
1878 1857 }
1879 1858
1880 1859 /*
1881 1860 * If we're changing the protection domain, then validate the new one
1882 1861 */
1883 1862 if (flags & IBT_MR_CHANGE_PD) {
1884 1863
1885 1864 /* Check for valid PD handle pointer */
1886 1865 if (pd == NULL) {
1887 1866 mutex_exit(&mr->mr_lock);
1888 1867 /*
1889 1868 * Call deregister and ensure that all current
1890 1869 * resources get properly freed up. Unnecessary
1891 1870 * here to attempt to regain software ownership
1892 1871 * of the MPT entry as that has already been
1893 1872 * done above.
1894 1873 */
1895 1874 if (tavor_mr_deregister(state, &mr,
1896 1875 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) !=
1897 1876 DDI_SUCCESS) {
1898 1877 TAVOR_WARNING(state, "failed to deregister "
1899 1878 "memory region");
1900 1879 }
1901 1880 /* Set "status" and "errormsg" and goto failure */
1902 1881 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle");
1903 1882 goto mrrereg_fail;
1904 1883 }
1905 1884
1906 1885 /* Use the new PD handle in all operations below */
1907 1886 pd_to_use = pd;
1908 1887
1909 1888 } else {
1910 1889 /* Use the current PD handle in all operations below */
1911 1890 pd_to_use = mr->mr_pdhdl;
1912 1891 }
1913 1892
1914 1893 /*
1915 1894 * If we're changing access permissions, then validate the new ones
1916 1895 */
1917 1896 if (flags & IBT_MR_CHANGE_ACCESS) {
1918 1897 /*
1919 1898 * Validate the access flags. Both remote write and remote
1920 1899 * atomic require the local write flag to be set
1921 1900 */
1922 1901 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) ||
1923 1902 (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) &&
1924 1903 !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) {
1925 1904 mutex_exit(&mr->mr_lock);
1926 1905 /*
1927 1906 * Call deregister and ensure that all current
1928 1907 * resources get properly freed up. Unnecessary
1929 1908 * here to attempt to regain software ownership
1930 1909 * of the MPT entry as that has already been
1931 1910 * done above.
1932 1911 */
1933 1912 if (tavor_mr_deregister(state, &mr,
1934 1913 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) !=
1935 1914 DDI_SUCCESS) {
1936 1915 TAVOR_WARNING(state, "failed to deregister "
1937 1916 "memory region");
1938 1917 }
1939 1918 /* Set "status" and "errormsg" and goto failure */
1940 1919 TAVOR_TNF_FAIL(IBT_MR_ACCESS_REQ_INVALID,
1941 1920 "invalid access flags");
1942 1921 goto mrrereg_fail;
1943 1922 }
1944 1923
1945 1924 /*
1946 1925 * Setup and validate the memory region access flags. This
1947 1926 * means translating the IBTF's enable flags into the access
1948 1927 * flags that will be used in later operations.
1949 1928 */
1950 1929 acc_flags_to_use = 0;
1951 1930 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1952 1931 acc_flags_to_use |= IBT_MR_WINDOW_BIND;
1953 1932 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1954 1933 acc_flags_to_use |= IBT_MR_LOCAL_WRITE;
1955 1934 if (flags & IBT_MR_ENABLE_REMOTE_READ)
1956 1935 acc_flags_to_use |= IBT_MR_REMOTE_READ;
1957 1936 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1958 1937 acc_flags_to_use |= IBT_MR_REMOTE_WRITE;
1959 1938 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1960 1939 acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC;
1961 1940
1962 1941 } else {
1963 1942 acc_flags_to_use = mr->mr_accflag;
1964 1943 }
1965 1944
1966 1945 /*
1967 1946 * If we're modifying the translation, then figure out whether
1968 1947 * we can reuse the current MTT resources. This means calling
1969 1948 * tavor_mr_rereg_xlat_helper() which does most of the heavy lifting
1970 1949 * for the reregistration. If the current memory region contains
1971 1950 * sufficient MTT entries for the new regions, then it will be
1972 1951 * reused and filled in. Otherwise, new entries will be allocated,
1973 1952 * the old ones will be freed, and the new entries will be filled
1974 1953 * in. Note: If we're not modifying the translation, then we
1975 1954 * should already have all the information we need to update the MPT.
1976 1955 * Also note: If tavor_mr_rereg_xlat_helper() fails, it will return
1977 1956 * a "dereg_level" which is the level of cleanup that needs to be
1978 1957 * passed to tavor_mr_deregister() to finish the cleanup.
1979 1958 */
1980 1959 if (flags & IBT_MR_CHANGE_TRANSLATION) {
1981 1960 status = tavor_mr_rereg_xlat_helper(state, mr, bind, op,
1982 1961 &mtt_addr_to_use, sleep, &dereg_level);
1983 1962 if (status != DDI_SUCCESS) {
1984 1963 mutex_exit(&mr->mr_lock);
1985 1964 /*
1986 1965 * Call deregister and ensure that all resources get
1987 1966 * properly freed up.
1988 1967 */
1989 1968 if (tavor_mr_deregister(state, &mr, dereg_level,
1990 1969 sleep) != DDI_SUCCESS) {
1991 1970 TAVOR_WARNING(state, "failed to deregister "
1992 1971 "memory region");
1993 1972 }
1994 1973
1995 1974 /* Set "status" and "errormsg" and goto failure */
1996 1975 TAVOR_TNF_FAIL(status, "failed rereg helper");
1997 1976 goto mrrereg_fail;
1998 1977 }
1999 1978 vaddr_to_use = mr->mr_bindinfo.bi_addr;
2000 1979 len_to_use = mr->mr_bindinfo.bi_len;
2001 1980 } else {
2002 1981 mtt_addr_to_use = (((uint64_t)mpt_entry.mttseg_addr_h << 32) |
2003 1982 ((uint64_t)mpt_entry.mttseg_addr_l << 6));
2004 1983 vaddr_to_use = mr->mr_bindinfo.bi_addr;
2005 1984 len_to_use = mr->mr_bindinfo.bi_len;
2006 1985 }
2007 1986
2008 1987 /*
2009 1988 * Calculate new keys (Lkey, Rkey) from MPT index. Just like they were
2010 1989 * when the region was first registered, each key is formed from
2011 1990 * "constrained" bits and "unconstrained" bits. Note: If no remote
2012 1991 * access is required, then the RKey value is not filled in. Otherwise
2013 1992 * both Rkey and LKey are given the same value.
2014 1993 */
2015 1994 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey);
2016 1995 if ((acc_flags_to_use & IBT_MR_REMOTE_READ) ||
2017 1996 (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ||
2018 1997 (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) {
2019 1998 mr->mr_rkey = mr->mr_lkey;
2020 1999 }
2021 2000
2022 2001 /*
2023 2002 * Update the MPT entry with the new information. Some of this
2024 2003 * information is retained from the previous operation, some of
2025 2004 * it is new based on request.
2026 2005 */
2027 2006 mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND) ? 1 : 0;
2028 2007 mpt_entry.atomic = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
2029 2008 mpt_entry.rw = (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ? 1 : 0;
2030 2009 mpt_entry.rr = (acc_flags_to_use & IBT_MR_REMOTE_READ) ? 1 : 0;
2031 2010 mpt_entry.lw = (acc_flags_to_use & IBT_MR_LOCAL_WRITE) ? 1 : 0;
2032 2011 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC;
2033 2012 mpt_entry.mem_key = mr->mr_lkey;
2034 2013 mpt_entry.pd = pd_to_use->pd_pdnum;
2035 2014 mpt_entry.start_addr = vaddr_to_use;
2036 2015 mpt_entry.reg_win_len = len_to_use;
2037 2016 mpt_entry.mttseg_addr_h = mtt_addr_to_use >> 32;
2038 2017 mpt_entry.mttseg_addr_l = mtt_addr_to_use >> 6;
2039 2018
2040 2019 /*
2041 2020 * Write the updated MPT entry to hardware
2042 2021 *
2043 2022 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect
2044 2023 * against holding the lock around this rereg call in all contexts.
2045 2024 */
2046 2025 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
2047 2026 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN);
2048 2027 if (status != TAVOR_CMD_SUCCESS) {
2049 2028 mutex_exit(&mr->mr_lock);
2050 2029 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n",
2051 2030 status);
2052 2031 /*
2053 2032 * Call deregister and ensure that all current resources get
2054 2033 * properly freed up. Unnecessary here to attempt to regain
2055 2034 * software ownership of the MPT entry as that has already
2056 2035 * been done above.
2057 2036 */
2058 2037 if (tavor_mr_deregister(state, &mr,
2059 2038 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) {
2060 2039 TAVOR_WARNING(state, "failed to deregister memory "
2061 2040 "region");
2062 2041 }
2063 2042 TNF_PROBE_1(tavor_mr_common_rereg_sw2hw_mpt_cmd_fail,
2064 2043 TAVOR_TNF_ERROR, "", tnf_uint, status, status);
2065 2044 TAVOR_TNF_EXIT(tavor_mr_common_rereg);
2066 2045 return (ibc_get_ci_failure(0));
2067 2046 }
2068 2047
2069 2048 /*
2070 2049 * If we're changing PD, then update their reference counts now.
2071 2050 * This means decrementing the reference count on the old PD and
2072 2051 * incrementing the reference count on the new PD.
2073 2052 */
2074 2053 if (flags & IBT_MR_CHANGE_PD) {
2075 2054 tavor_pd_refcnt_dec(mr->mr_pdhdl);
2076 2055 tavor_pd_refcnt_inc(pd);
2077 2056 }
2078 2057
2079 2058 /*
2080 2059 * Update the contents of the Tavor Memory Region handle to reflect
2081 2060 * what has been changed.
2082 2061 */
2083 2062 mr->mr_pdhdl = pd_to_use;
2084 2063 mr->mr_accflag = acc_flags_to_use;
2085 2064 mr->mr_is_umem = 0;
2086 2065 mr->mr_umemcookie = NULL;
2087 2066
2088 2067 /* New MR handle is same as the old */
2089 2068 *mrhdl_new = mr;
2090 2069 mutex_exit(&mr->mr_lock);
2091 2070
2092 2071 TAVOR_TNF_EXIT(tavor_mr_common_rereg);
2093 2072 return (DDI_SUCCESS);
2094 2073
2095 2074 mrrereg_fail:
2096 2075 TNF_PROBE_1(tavor_mr_common_rereg_fail, TAVOR_TNF_ERROR, "",
2097 2076 tnf_string, msg, errormsg);
2098 2077 TAVOR_TNF_EXIT(tavor_mr_common_rereg);
2099 2078 return (status);
2100 2079 }
2101 2080
2102 2081
2103 2082 /*
2104 2083 * tavor_mr_rereg_xlat_helper
2105 2084 * Context: Can be called from interrupt or base context.
2106 2085 * Note: This routine expects the "mr_lock" to be held when it
2107 2086 * is called. Upon returning failure, this routine passes information
2108 2087 * about what "dereg_level" should be passed to tavor_mr_deregister().
2109 2088 */
2110 2089 static int
2111 2090 tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr,
2112 2091 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr,
2113 2092 uint_t sleep, uint_t *dereg_level)
2114 2093 {
2115 2094 tavor_rsrc_pool_info_t *rsrc_pool;
2116 2095 tavor_rsrc_t *mtt, *mtt_refcnt;
2117 2096 tavor_sw_refcnt_t *swrc_old, *swrc_new;
2118 2097 ddi_dma_handle_t dmahdl;
2119 2098 uint64_t nummtt_needed, nummtt_in_currrsrc, max_sz;
2120 2099 uint64_t mtt_ddrbaseaddr;
2121 2100 uint_t mtt_pgsize_bits, bind_type, reuse_dmahdl;
2122 2101 int status;
2123 2102 char *errormsg;
2124 2103
2125 2104 TAVOR_TNF_ENTER(tavor_mr_rereg_xlat_helper);
2126 2105
2127 2106 ASSERT(MUTEX_HELD(&mr->mr_lock));
2128 2107
2129 2108 /*
2130 2109 * Check the "options" flag. Currently this flag tells the driver
2131 2110 * whether or not the region should be bound normally (i.e. with
2132 2111 * entries written into the PCI IOMMU) or whether it should be
2133 2112 * registered to bypass the IOMMU.
2134 2113 */
2135 2114 if (op == NULL) {
2136 2115 bind_type = TAVOR_BINDMEM_NORMAL;
2137 2116 } else {
2138 2117 bind_type = op->mro_bind_type;
2139 2118 }
2140 2119
2141 2120 /*
2142 2121 * Check for invalid length. Check is the length is zero or if the
2143 2122 * length is larger than the maximum configured value. Return error
2144 2123 * if it is.
2145 2124 */
2146 2125 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz);
2147 2126 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
2148 2127 /*
2149 2128 * Deregister will be called upon returning failure from this
2150 2129 * routine. This will ensure that all current resources get
2151 2130 * properly freed up. Unnecessary to attempt to regain
2152 2131 * software ownership of the MPT entry as that has already
2153 2132 * been done above (in tavor_mr_reregister())
2154 2133 */
2155 2134 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT;
2156 2135
2157 2136 /* Set "status" and "errormsg" and goto failure */
2158 2137 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length");
2159 2138 goto mrrereghelp_fail;
2160 2139 }
2161 2140
2162 2141 /*
2163 2142 * Determine the number of pages necessary for new region and the
2164 2143 * number of pages supported by the current MTT resources
2165 2144 */
2166 2145 nummtt_needed = tavor_mr_nummtt_needed(state, bind, &mtt_pgsize_bits);
2167 2146 nummtt_in_currrsrc = mr->mr_mttrsrcp->tr_len >> TAVOR_MTT_SIZE_SHIFT;
2168 2147
2169 2148 /*
2170 2149 * Depending on whether we have enough pages or not, the next step is
2171 2150 * to fill in a set of MTT entries that reflect the new mapping. In
2172 2151 * the first case below, we already have enough entries. This means
2173 2152 * we need to unbind the memory from the previous mapping, bind the
2174 2153 * memory for the new mapping, write the new MTT entries, and update
2175 2154 * the mr to reflect the changes.
2176 2155 * In the second case below, we do not have enough entries in the
2177 2156 * current mapping. So, in this case, we need not only to unbind the
2178 2157 * current mapping, but we need to free up the MTT resources associated
2179 2158 * with that mapping. After we've successfully done that, we continue
2180 2159 * by binding the new memory, allocating new MTT entries, writing the
2181 2160 * new MTT entries, and updating the mr to reflect the changes.
2182 2161 */
2183 2162
2184 2163 /*
2185 2164 * If this region is being shared (i.e. MTT refcount != 1), then we
2186 2165 * can't reuse the current MTT resources regardless of their size.
2187 2166 * Instead we'll need to alloc new ones (below) just as if there
2188 2167 * hadn't been enough room in the current entries.
2189 2168 */
2190 2169 swrc_old = (tavor_sw_refcnt_t *)mr->mr_mttrefcntp->tr_addr;
2191 2170 if (TAVOR_MTT_IS_NOT_SHARED(swrc_old) &&
2192 2171 (nummtt_needed <= nummtt_in_currrsrc)) {
2193 2172
2194 2173 /*
2195 2174 * Unbind the old mapping for this memory region, but retain
2196 2175 * the ddi_dma_handle_t (if possible) for reuse in the bind
2197 2176 * operation below. Note: If original memory region was
2198 2177 * bound for IOMMU bypass and the new region can not use
2199 2178 * bypass, then a new DMA handle will be necessary.
2200 2179 */
2201 2180 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2202 2181 mr->mr_bindinfo.bi_free_dmahdl = 0;
2203 2182 tavor_mr_mem_unbind(state, &mr->mr_bindinfo);
2204 2183 dmahdl = mr->mr_bindinfo.bi_dmahdl;
2205 2184 reuse_dmahdl = 1;
2206 2185 } else {
2207 2186 tavor_mr_mem_unbind(state, &mr->mr_bindinfo);
2208 2187 dmahdl = NULL;
2209 2188 reuse_dmahdl = 0;
2210 2189 }
2211 2190
2212 2191 /*
2213 2192 * Bind the new memory and determine the mapped addresses.
2214 2193 * As described, this routine and tavor_mr_fast_mtt_write()
2215 2194 * do the majority of the work for the memory registration
2216 2195 * operations. Note: When we successfully finish the binding,
2217 2196 * we will set the "bi_free_dmahdl" flag to indicate that
2218 2197 * even though we may have reused the ddi_dma_handle_t we do
2219 2198 * wish it to be freed up at some later time. Note also that
2220 2199 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2221 2200 */
2222 2201 bind->bi_bypass = bind_type;
2223 2202 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep);
2224 2203 if (status != DDI_SUCCESS) {
2225 2204 if (reuse_dmahdl) {
2226 2205 ddi_dma_free_handle(&dmahdl);
2227 2206 }
2228 2207
2229 2208 /*
2230 2209 * Deregister will be called upon returning failure
2231 2210 * from this routine. This will ensure that all
2232 2211 * current resources get properly freed up.
2233 2212 * Unnecessary to attempt to regain software ownership
2234 2213 * of the MPT entry as that has already been done
2235 2214 * above (in tavor_mr_reregister()). Also unnecessary
2236 2215 * to attempt to unbind the memory.
2237 2216 */
2238 2217 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2239 2218
2240 2219 /* Set "status" and "errormsg" and goto failure */
2241 2220 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind");
2242 2221 goto mrrereghelp_fail;
2243 2222 }
2244 2223 if (reuse_dmahdl) {
2245 2224 bind->bi_free_dmahdl = 1;
2246 2225 }
2247 2226
2248 2227 /*
2249 2228 * Using the new mapping, but reusing the current MTT
2250 2229 * resources, write the updated entries to MTT
2251 2230 */
2252 2231 mtt = mr->mr_mttrsrcp;
2253 2232 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits);
2254 2233 if (status != DDI_SUCCESS) {
2255 2234 /*
2256 2235 * Deregister will be called upon returning failure
2257 2236 * from this routine. This will ensure that all
2258 2237 * current resources get properly freed up.
2259 2238 * Unnecessary to attempt to regain software ownership
2260 2239 * of the MPT entry as that has already been done
2261 2240 * above (in tavor_mr_reregister()). Also unnecessary
2262 2241 * to attempt to unbind the memory.
2263 2242 *
2264 2243 * But we do need to unbind the newly bound memory
2265 2244 * before returning.
2266 2245 */
2267 2246 tavor_mr_mem_unbind(state, bind);
2268 2247 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2269 2248
2270 2249 /* Set "status" and "errormsg" and goto failure */
2271 2250 TAVOR_TNF_FAIL(ibc_get_ci_failure(0),
2272 2251 "failed write mtt");
2273 2252 goto mrrereghelp_fail;
2274 2253 }
2275 2254
2276 2255 /* Put the updated information into the Mem Region handle */
2277 2256 mr->mr_bindinfo = *bind;
2278 2257 mr->mr_logmttpgsz = mtt_pgsize_bits;
2279 2258
2280 2259 } else {
2281 2260 /*
2282 2261 * Check if the memory region MTT is shared by any other MRs.
2283 2262 * Since the resource may be shared between multiple memory
2284 2263 * regions (as a result of a "RegisterSharedMR()" verb) it is
2285 2264 * important that we not unbind any resources prematurely.
2286 2265 */
2287 2266 if (!TAVOR_MTT_IS_SHARED(swrc_old)) {
2288 2267 /*
2289 2268 * Unbind the old mapping for this memory region, but
2290 2269 * retain the ddi_dma_handle_t for reuse in the bind
2291 2270 * operation below. Note: This can only be done here
2292 2271 * because the region being reregistered is not
2293 2272 * currently shared. Also if original memory region
2294 2273 * was bound for IOMMU bypass and the new region can
2295 2274 * not use bypass, then a new DMA handle will be
2296 2275 * necessary.
2297 2276 */
2298 2277 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2299 2278 mr->mr_bindinfo.bi_free_dmahdl = 0;
2300 2279 tavor_mr_mem_unbind(state, &mr->mr_bindinfo);
2301 2280 dmahdl = mr->mr_bindinfo.bi_dmahdl;
2302 2281 reuse_dmahdl = 1;
2303 2282 } else {
2304 2283 tavor_mr_mem_unbind(state, &mr->mr_bindinfo);
2305 2284 dmahdl = NULL;
2306 2285 reuse_dmahdl = 0;
2307 2286 }
2308 2287 } else {
2309 2288 dmahdl = NULL;
2310 2289 reuse_dmahdl = 0;
2311 2290 }
2312 2291
2313 2292 /*
2314 2293 * Bind the new memory and determine the mapped addresses.
2315 2294 * As described, this routine and tavor_mr_fast_mtt_write()
2316 2295 * do the majority of the work for the memory registration
2317 2296 * operations. Note: When we successfully finish the binding,
2318 2297 * we will set the "bi_free_dmahdl" flag to indicate that
2319 2298 * even though we may have reused the ddi_dma_handle_t we do
2320 2299 * wish it to be freed up at some later time. Note also that
2321 2300 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2322 2301 */
2323 2302 bind->bi_bypass = bind_type;
2324 2303 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep);
2325 2304 if (status != DDI_SUCCESS) {
2326 2305 if (reuse_dmahdl) {
2327 2306 ddi_dma_free_handle(&dmahdl);
2328 2307 }
2329 2308
2330 2309 /*
2331 2310 * Deregister will be called upon returning failure
2332 2311 * from this routine. This will ensure that all
2333 2312 * current resources get properly freed up.
2334 2313 * Unnecessary to attempt to regain software ownership
2335 2314 * of the MPT entry as that has already been done
2336 2315 * above (in tavor_mr_reregister()). Also unnecessary
2337 2316 * to attempt to unbind the memory.
2338 2317 */
2339 2318 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2340 2319
2341 2320 /* Set "status" and "errormsg" and goto failure */
2342 2321 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind");
2343 2322 goto mrrereghelp_fail;
2344 2323 }
2345 2324 if (reuse_dmahdl) {
2346 2325 bind->bi_free_dmahdl = 1;
2347 2326 }
2348 2327
2349 2328 /*
2350 2329 * Allocate the new MTT entries resource
2351 2330 */
2352 2331 status = tavor_rsrc_alloc(state, TAVOR_MTT,
2353 2332 TAVOR_NUMMTT_TO_MTTSEG(nummtt_needed), sleep, &mtt);
2354 2333 if (status != DDI_SUCCESS) {
2355 2334 /*
2356 2335 * Deregister will be called upon returning failure
2357 2336 * from this routine. This will ensure that all
2358 2337 * current resources get properly freed up.
2359 2338 * Unnecessary to attempt to regain software ownership
2360 2339 * of the MPT entry as that has already been done
2361 2340 * above (in tavor_mr_reregister()). Also unnecessary
2362 2341 * to attempt to unbind the memory.
2363 2342 *
2364 2343 * But we do need to unbind the newly bound memory
2365 2344 * before returning.
2366 2345 */
2367 2346 tavor_mr_mem_unbind(state, bind);
2368 2347 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2369 2348
2370 2349 /* Set "status" and "errormsg" and goto failure */
2371 2350 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT");
2372 2351 goto mrrereghelp_fail;
2373 2352 }
2374 2353
2375 2354 /*
2376 2355 * Allocate MTT reference count (to track shared memory
2377 2356 * regions). As mentioned elsewhere above, this reference
2378 2357 * count resource may never be used on the given memory region,
2379 2358 * but if it is ever later registered as a "shared" memory
2380 2359 * region then this resource will be necessary. Note: This
2381 2360 * is only necessary here if the existing memory region is
2382 2361 * already being shared (because otherwise we already have
2383 2362 * a useable reference count resource).
2384 2363 */
2385 2364 if (TAVOR_MTT_IS_SHARED(swrc_old)) {
2386 2365 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1,
2387 2366 sleep, &mtt_refcnt);
2388 2367 if (status != DDI_SUCCESS) {
2389 2368 /*
2390 2369 * Deregister will be called upon returning
2391 2370 * failure from this routine. This will ensure
2392 2371 * that all current resources get properly
2393 2372 * freed up. Unnecessary to attempt to regain
2394 2373 * software ownership of the MPT entry as that
2395 2374 * has already been done above (in
2396 2375 * tavor_mr_reregister()). Also unnecessary
2397 2376 * to attempt to unbind the memory.
2398 2377 *
2399 2378 * But we need to unbind the newly bound
2400 2379 * memory and free up the newly allocated MTT
2401 2380 * entries before returning.
2402 2381 */
2403 2382 tavor_mr_mem_unbind(state, bind);
↓ open down ↓ |
587 lines elided |
↑ open up ↑ |
2404 2383 tavor_rsrc_free(state, &mtt);
2405 2384 *dereg_level =
2406 2385 TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2407 2386
2408 2387 /* Set "status"/"errormsg", goto failure */
2409 2388 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE,
2410 2389 "failed reference count");
2411 2390 goto mrrereghelp_fail;
2412 2391 }
2413 2392 swrc_new = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr;
2414 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new))
2415 2393 TAVOR_MTT_REFCNT_INIT(swrc_new);
2416 2394 } else {
2417 2395 mtt_refcnt = mr->mr_mttrefcntp;
2418 2396 }
2419 2397
2420 2398 /*
2421 2399 * Using the new mapping and the new MTT resources, write the
2422 2400 * updated entries to MTT
2423 2401 */
2424 2402 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits);
2425 2403 if (status != DDI_SUCCESS) {
2426 2404 /*
2427 2405 * Deregister will be called upon returning failure
2428 2406 * from this routine. This will ensure that all
2429 2407 * current resources get properly freed up.
2430 2408 * Unnecessary to attempt to regain software ownership
2431 2409 * of the MPT entry as that has already been done
2432 2410 * above (in tavor_mr_reregister()). Also unnecessary
2433 2411 * to attempt to unbind the memory.
2434 2412 *
2435 2413 * But we need to unbind the newly bound memory,
2436 2414 * free up the newly allocated MTT entries, and
2437 2415 * (possibly) free the new MTT reference count
2438 2416 * resource before returning.
2439 2417 */
2440 2418 if (TAVOR_MTT_IS_SHARED(swrc_old)) {
2441 2419 tavor_rsrc_free(state, &mtt_refcnt);
2442 2420 }
2443 2421 tavor_mr_mem_unbind(state, bind);
2444 2422 tavor_rsrc_free(state, &mtt);
2445 2423 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2446 2424
2447 2425 /* Set "status" and "errormsg" and goto failure */
2448 2426 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed write mtt");
2449 2427 goto mrrereghelp_fail;
2450 2428 }
2451 2429
2452 2430 /*
2453 2431 * Check if the memory region MTT is shared by any other MRs.
2454 2432 * Since the resource may be shared between multiple memory
2455 2433 * regions (as a result of a "RegisterSharedMR()" verb) it is
2456 2434 * important that we not free up any resources prematurely.
2457 2435 */
2458 2436 if (TAVOR_MTT_IS_SHARED(swrc_old)) {
2459 2437 /* Decrement MTT reference count for "old" region */
2460 2438 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp);
2461 2439 } else {
2462 2440 /* Free up the old MTT entries resource */
2463 2441 tavor_rsrc_free(state, &mr->mr_mttrsrcp);
2464 2442 }
2465 2443
2466 2444 /* Put the updated information into the mrhdl */
2467 2445 mr->mr_bindinfo = *bind;
2468 2446 mr->mr_logmttpgsz = mtt_pgsize_bits;
2469 2447 mr->mr_mttrsrcp = mtt;
2470 2448 mr->mr_mttrefcntp = mtt_refcnt;
2471 2449 }
2472 2450
2473 2451 /*
2474 2452 * Calculate and return the updated MTT address (in the DDR address
2475 2453 * space). This will be used by the caller (tavor_mr_reregister) in
2476 2454 * the updated MPT entry
2477 2455 */
2478 2456 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT];
2479 2457 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset;
2480 2458 *mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx <<
2481 2459 TAVOR_MTT_SIZE_SHIFT);
2482 2460
2483 2461 TAVOR_TNF_EXIT(tavor_mr_rereg_xlat_helper);
2484 2462 return (DDI_SUCCESS);
2485 2463
2486 2464 mrrereghelp_fail:
2487 2465 TNF_PROBE_1(tavor_mr_rereg_xlat_helper_fail, TAVOR_TNF_ERROR, "",
2488 2466 tnf_string, msg, errormsg);
2489 2467 TAVOR_TNF_EXIT(tavor_mr_rereg_xlat_helper);
2490 2468 return (status);
2491 2469 }
2492 2470
2493 2471
2494 2472 /*
2495 2473 * tavor_mr_nummtt_needed()
2496 2474 * Context: Can be called from interrupt or base context.
2497 2475 */
2498 2476 /* ARGSUSED */
2499 2477 static uint64_t
2500 2478 tavor_mr_nummtt_needed(tavor_state_t *state, tavor_bind_info_t *bind,
2501 2479 uint_t *mtt_pgsize_bits)
2502 2480 {
2503 2481 uint64_t pg_offset_mask;
2504 2482 uint64_t pg_offset, tmp_length;
2505 2483
2506 2484 /*
2507 2485 * For now we specify the page size as 8Kb (the default page size for
2508 2486 * the sun4u architecture), or 4Kb for x86. Figure out optimal page
2509 2487 * size by examining the dmacookies XXX
2510 2488 */
2511 2489 *mtt_pgsize_bits = PAGESHIFT;
2512 2490
2513 2491 pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1;
2514 2492 pg_offset = bind->bi_addr & pg_offset_mask;
2515 2493 tmp_length = pg_offset + (bind->bi_len - 1);
2516 2494 return ((tmp_length >> *mtt_pgsize_bits) + 1);
2517 2495 }
2518 2496
2519 2497
2520 2498 /*
2521 2499 * tavor_mr_mem_bind()
2522 2500 * Context: Can be called from interrupt or base context.
2523 2501 */
2524 2502 static int
2525 2503 tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind,
2526 2504 ddi_dma_handle_t dmahdl, uint_t sleep)
2527 2505 {
2528 2506 ddi_dma_attr_t dma_attr;
2529 2507 int (*callback)(caddr_t);
↓ open down ↓ |
105 lines elided |
↑ open up ↑ |
2530 2508 uint_t dma_xfer_mode;
2531 2509 int status;
2532 2510
2533 2511 /* bi_type must be set to a meaningful value to get a bind handle */
2534 2512 ASSERT(bind->bi_type == TAVOR_BINDHDL_VADDR ||
2535 2513 bind->bi_type == TAVOR_BINDHDL_BUF ||
2536 2514 bind->bi_type == TAVOR_BINDHDL_UBUF);
2537 2515
2538 2516 TAVOR_TNF_ENTER(tavor_mr_mem_bind);
2539 2517
2540 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2541 -
2542 2518 /* Set the callback flag appropriately */
2543 2519 callback = (sleep == TAVOR_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
2544 2520
2545 2521 /* Determine whether to map STREAMING or CONSISTENT */
2546 2522 dma_xfer_mode = (bind->bi_flags & IBT_MR_NONCOHERENT) ?
2547 2523 DDI_DMA_STREAMING : DDI_DMA_CONSISTENT;
2548 2524
2549 2525 /*
2550 2526 * Initialize many of the default DMA attributes. Then, if we're
2551 2527 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
2552 2528 */
2553 2529 if (dmahdl == NULL) {
2554 2530 tavor_dma_attr_init(&dma_attr);
2555 2531 #ifdef __sparc
2556 2532 /*
2557 2533 * First, disable streaming and switch to consistent if
2558 2534 * configured to do so and IOMMU BYPASS is enabled.
2559 2535 */
2560 2536 if (state->ts_cfg_profile->cp_disable_streaming_on_bypass &&
2561 2537 dma_xfer_mode == DDI_DMA_STREAMING &&
2562 2538 bind->bi_bypass == TAVOR_BINDMEM_BYPASS) {
2563 2539 dma_xfer_mode = DDI_DMA_CONSISTENT;
2564 2540 }
2565 2541
2566 2542 /*
2567 2543 * Then, if streaming is still specified, then "bypass" is not
2568 2544 * allowed.
2569 2545 */
2570 2546 if ((dma_xfer_mode == DDI_DMA_CONSISTENT) &&
2571 2547 (bind->bi_bypass == TAVOR_BINDMEM_BYPASS)) {
2572 2548 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
2573 2549 }
2574 2550 #endif
2575 2551 /* Allocate a DMA handle for the binding */
2576 2552 status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr,
2577 2553 callback, NULL, &bind->bi_dmahdl);
2578 2554 if (status != DDI_SUCCESS) {
2579 2555 TNF_PROBE_0(tavor_mr_mem_bind_dmahdl_fail,
2580 2556 TAVOR_TNF_ERROR, "");
2581 2557 TAVOR_TNF_EXIT(tavor_mr_mem_bind);
2582 2558 return (status);
2583 2559 }
2584 2560 bind->bi_free_dmahdl = 1;
2585 2561
2586 2562 } else {
2587 2563 bind->bi_dmahdl = dmahdl;
2588 2564 bind->bi_free_dmahdl = 0;
2589 2565 }
2590 2566
2591 2567 /*
2592 2568 * Bind the memory to get the PCI mapped addresses. The decision
2593 2569 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle()
2594 2570 * is determined by the "bi_type" flag. Note: if the bind operation
2595 2571 * fails then we have to free up the DMA handle and return error.
2596 2572 */
2597 2573 if (bind->bi_type == TAVOR_BINDHDL_VADDR) {
2598 2574 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL,
2599 2575 (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len,
2600 2576 (DDI_DMA_RDWR | dma_xfer_mode), callback, NULL,
2601 2577 &bind->bi_dmacookie, &bind->bi_cookiecnt);
2602 2578 } else { /* TAVOR_BINDHDL_BUF || TAVOR_BINDHDL_UBUF */
2603 2579 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl,
2604 2580 bind->bi_buf, (DDI_DMA_RDWR | dma_xfer_mode), callback,
2605 2581 NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt);
2606 2582 }
2607 2583
2608 2584 if (status != DDI_DMA_MAPPED) {
2609 2585 if (bind->bi_free_dmahdl != 0) {
2610 2586 ddi_dma_free_handle(&bind->bi_dmahdl);
2611 2587 }
2612 2588 TNF_PROBE_0(tavor_mr_mem_bind_dmabind_fail, TAVOR_TNF_ERROR,
2613 2589 "");
2614 2590 TAVOR_TNF_EXIT(tavor_mr_mem_bind);
2615 2591 return (status);
2616 2592 }
2617 2593
2618 2594 TAVOR_TNF_EXIT(tavor_mr_mem_bind);
2619 2595 return (DDI_SUCCESS);
2620 2596 }
2621 2597
2622 2598
2623 2599 /*
2624 2600 * tavor_mr_mem_unbind()
2625 2601 * Context: Can be called from interrupt or base context.
2626 2602 */
2627 2603 static void
2628 2604 tavor_mr_mem_unbind(tavor_state_t *state, tavor_bind_info_t *bind)
2629 2605 {
↓ open down ↓ |
78 lines elided |
↑ open up ↑ |
2630 2606 int status;
2631 2607
2632 2608 TAVOR_TNF_ENTER(tavor_mr_mem_unbind);
2633 2609
2634 2610 /*
2635 2611 * In case of TAVOR_BINDHDL_UBUF, the memory bi_buf points to
2636 2612 * is actually allocated by ddi_umem_iosetup() internally, then
2637 2613 * it's required to free it here. Reset bi_type to TAVOR_BINDHDL_NONE
2638 2614 * not to free it again later.
2639 2615 */
2640 - _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2641 2616 if (bind->bi_type == TAVOR_BINDHDL_UBUF) {
2642 2617 freerbuf(bind->bi_buf);
2643 2618 bind->bi_type = TAVOR_BINDHDL_NONE;
2644 2619 }
2645 - _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
2646 2620
2647 2621 /*
2648 2622 * Unbind the DMA memory for the region
2649 2623 *
2650 2624 * Note: The only way ddi_dma_unbind_handle() currently
2651 2625 * can return an error is if the handle passed in is invalid.
2652 2626 * Since this should never happen, we choose to return void
2653 2627 * from this function! If this does return an error, however,
2654 2628 * then we print a warning message to the console.
2655 2629 */
2656 2630 status = ddi_dma_unbind_handle(bind->bi_dmahdl);
2657 2631 if (status != DDI_SUCCESS) {
2658 2632 TAVOR_WARNING(state, "failed to unbind DMA mapping");
2659 2633 TNF_PROBE_0(tavor_mr_mem_unbind_dmaunbind_fail,
2660 2634 TAVOR_TNF_ERROR, "");
2661 2635 TAVOR_TNF_EXIT(tavor_mr_mem_unbind);
2662 2636 return;
2663 2637 }
2664 2638
2665 2639 /* Free up the DMA handle */
2666 2640 if (bind->bi_free_dmahdl != 0) {
2667 2641 ddi_dma_free_handle(&bind->bi_dmahdl);
2668 2642 }
2669 2643
2670 2644 TAVOR_TNF_EXIT(tavor_mr_mem_unbind);
2671 2645 }
2672 2646
2673 2647
2674 2648 /*
2675 2649 * tavor_mr_fast_mtt_write()
2676 2650 * Context: Can be called from interrupt or base context.
2677 2651 */
2678 2652 static int
2679 2653 tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind,
2680 2654 uint32_t mtt_pgsize_bits)
2681 2655 {
2682 2656 ddi_dma_cookie_t dmacookie;
2683 2657 uint_t cookie_cnt;
2684 2658 uint64_t *mtt_table;
2685 2659 uint64_t mtt_entry;
2686 2660 uint64_t addr, endaddr;
2687 2661 uint64_t pagesize;
2688 2662 int i;
2689 2663
2690 2664 TAVOR_TNF_ENTER(tavor_mr_fast_mtt_write);
2691 2665
2692 2666 /* Calculate page size from the suggested value passed in */
2693 2667 pagesize = ((uint64_t)1 << mtt_pgsize_bits);
2694 2668
2695 2669 /*
2696 2670 * Walk the "cookie list" and fill in the MTT table entries
2697 2671 */
2698 2672 i = 0;
2699 2673 mtt_table = (uint64_t *)mtt->tr_addr;
2700 2674 dmacookie = bind->bi_dmacookie;
2701 2675 cookie_cnt = bind->bi_cookiecnt;
2702 2676 while (cookie_cnt-- > 0) {
2703 2677 addr = dmacookie.dmac_laddress;
2704 2678 endaddr = addr + (dmacookie.dmac_size - 1);
2705 2679 addr = addr & ~((uint64_t)pagesize - 1);
2706 2680 while (addr <= endaddr) {
2707 2681 /*
↓ open down ↓ |
52 lines elided |
↑ open up ↑ |
2708 2682 * Fill in the mapped addresses (calculated above) and
2709 2683 * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry.
2710 2684 */
2711 2685 mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET;
2712 2686 ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry);
2713 2687 addr += pagesize;
2714 2688 i++;
2715 2689
2716 2690 if (addr == 0) {
2717 2691 static int do_once = 1;
2718 - _NOTE(SCHEME_PROTECTS_DATA("safe sharing",
2719 - do_once))
2720 2692 if (do_once) {
2721 2693 do_once = 0;
2722 2694 cmn_err(CE_NOTE, "probable error in "
2723 2695 "dma_cookie address from caller\n");
2724 2696 }
2725 2697 break;
2726 2698 }
2727 2699 }
2728 2700
2729 2701 /*
2730 2702 * When we've reached the end of the current DMA cookie,
2731 2703 * jump to the next cookie (if there are more)
2732 2704 */
2733 2705 if (cookie_cnt != 0) {
2734 2706 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
2735 2707 }
2736 2708 }
2737 2709
2738 2710 TAVOR_TNF_EXIT(tavor_mr_fast_mtt_write);
2739 2711 return (DDI_SUCCESS);
2740 2712 }
2741 2713
2742 2714 /*
2743 2715 * tavor_mtt_refcnt_inc()
2744 2716 * Context: Can be called from interrupt or base context.
2745 2717 */
2746 2718 static int
2747 2719 tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc)
2748 2720 {
2749 2721 tavor_sw_refcnt_t *rc;
2750 2722 uint32_t cnt;
2751 2723
2752 2724 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr;
2753 2725
2754 2726 /* Increment the MTT's reference count */
2755 2727 mutex_enter(&rc->swrc_lock);
2756 2728 TNF_PROBE_1_DEBUG(tavor_mtt_refcnt_inc, TAVOR_TNF_TRACE, "",
2757 2729 tnf_uint, refcnt, rc->swrc_refcnt);
2758 2730 cnt = rc->swrc_refcnt++;
2759 2731 mutex_exit(&rc->swrc_lock);
2760 2732
2761 2733 return (cnt);
2762 2734 }
2763 2735
2764 2736
2765 2737 /*
2766 2738 * tavor_mtt_refcnt_dec()
2767 2739 * Context: Can be called from interrupt or base context.
2768 2740 */
2769 2741 static int
2770 2742 tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc)
2771 2743 {
2772 2744 tavor_sw_refcnt_t *rc;
2773 2745 uint32_t cnt;
2774 2746
2775 2747 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr;
2776 2748
2777 2749 /* Decrement the MTT's reference count */
2778 2750 mutex_enter(&rc->swrc_lock);
2779 2751 cnt = --rc->swrc_refcnt;
2780 2752 TNF_PROBE_1_DEBUG(tavor_mtt_refcnt_dec, TAVOR_TNF_TRACE, "",
2781 2753 tnf_uint, refcnt, rc->swrc_refcnt);
2782 2754 mutex_exit(&rc->swrc_lock);
2783 2755
2784 2756 return (cnt);
2785 2757 }
↓ open down ↓ |
56 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX