1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * tavor_mr.c 28 * Tavor Memory Region/Window Routines 29 * 30 * Implements all the routines necessary to provide the requisite memory 31 * registration verbs. These include operations like RegisterMemRegion(), 32 * DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion, 33 * etc., that affect Memory Regions. It also includes the verbs that 34 * affect Memory Windows, including AllocMemWindow(), FreeMemWindow(), 35 * and QueryMemWindow(). 36 */ 37 38 #include <sys/types.h> 39 #include <sys/conf.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/modctl.h> 43 #include <sys/esunddi.h> 44 45 #include <sys/ib/adapters/tavor/tavor.h> 46 47 48 /* 49 * Used by tavor_mr_keycalc() below to fill in the "unconstrained" portion 50 * of Tavor memory keys (LKeys and RKeys) 51 */ 52 static uint_t tavor_debug_memkey_cnt = 0x00000000; 53 54 static int tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd, 55 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op); 56 static int tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr, 57 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new, 58 tavor_mr_options_t *op); 59 static int tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr, 60 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr, 61 uint_t sleep, uint_t *dereg_level); 62 static uint64_t tavor_mr_nummtt_needed(tavor_state_t *state, 63 tavor_bind_info_t *bind, uint_t *mtt_pgsize); 64 static int tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind, 65 ddi_dma_handle_t dmahdl, uint_t sleep); 66 static void tavor_mr_mem_unbind(tavor_state_t *state, 67 tavor_bind_info_t *bind); 68 static int tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind, 69 uint32_t mtt_pgsize_bits); 70 static int tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc); 71 static int tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc); 72 73 /* 74 * The Tavor umem_lockmemory() callback ops. When userland memory is 75 * registered, these callback ops are specified. The tavor_umap_umemlock_cb() 76 * callback will be called whenever the memory for the corresponding 77 * ddi_umem_cookie_t is being freed. 78 */ 79 static struct umem_callback_ops tavor_umem_cbops = { 80 UMEM_CALLBACK_VERSION, 81 tavor_umap_umemlock_cb, 82 }; 83 84 85 /* 86 * tavor_mr_register() 87 * Context: Can be called from interrupt or base context. 88 */ 89 int 90 tavor_mr_register(tavor_state_t *state, tavor_pdhdl_t pd, 91 ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op) 92 { 93 tavor_bind_info_t bind; 94 int status; 95 96 TAVOR_TNF_ENTER(tavor_mr_register); 97 98 /* 99 * Fill in the "bind" struct. This struct provides the majority 100 * of the information that will be used to distinguish between an 101 * "addr" binding (as is the case here) and a "buf" binding (see 102 * below). The "bind" struct is later passed to tavor_mr_mem_bind() 103 * which does most of the "heavy lifting" for the Tavor memory 104 * registration routines. 105 */ 106 bind.bi_type = TAVOR_BINDHDL_VADDR; 107 bind.bi_addr = mr_attr->mr_vaddr; 108 bind.bi_len = mr_attr->mr_len; 109 bind.bi_as = mr_attr->mr_as; 110 bind.bi_flags = mr_attr->mr_flags; 111 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op); 112 if (status != DDI_SUCCESS) { 113 TNF_PROBE_0(tavor_mr_register_cmnreg_fail, 114 TAVOR_TNF_ERROR, ""); 115 TAVOR_TNF_EXIT(tavor_mr_register); 116 return (status); 117 } 118 119 TAVOR_TNF_EXIT(tavor_mr_register); 120 return (DDI_SUCCESS); 121 } 122 123 124 /* 125 * tavor_mr_register_buf() 126 * Context: Can be called from interrupt or base context. 127 */ 128 int 129 tavor_mr_register_buf(tavor_state_t *state, tavor_pdhdl_t pd, 130 ibt_smr_attr_t *mr_attr, struct buf *buf, tavor_mrhdl_t *mrhdl, 131 tavor_mr_options_t *op) 132 { 133 tavor_bind_info_t bind; 134 int status; 135 136 TAVOR_TNF_ENTER(tavor_mr_register_buf); 137 138 /* 139 * Fill in the "bind" struct. This struct provides the majority 140 * of the information that will be used to distinguish between an 141 * "addr" binding (see above) and a "buf" binding (as is the case 142 * here). The "bind" struct is later passed to tavor_mr_mem_bind() 143 * which does most of the "heavy lifting" for the Tavor memory 144 * registration routines. Note: We have chosen to provide 145 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 146 * not set). It is not critical what value we choose here as it need 147 * only be unique for the given RKey (which will happen by default), 148 * so the choice here is somewhat arbitrary. 149 */ 150 bind.bi_type = TAVOR_BINDHDL_BUF; 151 bind.bi_buf = buf; 152 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 153 bind.bi_addr = mr_attr->mr_vaddr; 154 } else { 155 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 156 } 157 bind.bi_as = NULL; 158 bind.bi_len = (uint64_t)buf->b_bcount; 159 bind.bi_flags = mr_attr->mr_flags; 160 status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op); 161 if (status != DDI_SUCCESS) { 162 TNF_PROBE_0(tavor_mr_register_buf_cmnreg_fail, 163 TAVOR_TNF_ERROR, ""); 164 TAVOR_TNF_EXIT(tavor_mr_register_buf); 165 return (status); 166 } 167 168 TAVOR_TNF_EXIT(tavor_mr_register_buf); 169 return (DDI_SUCCESS); 170 } 171 172 173 /* 174 * tavor_mr_register_shared() 175 * Context: Can be called from interrupt or base context. 176 */ 177 int 178 tavor_mr_register_shared(tavor_state_t *state, tavor_mrhdl_t mrhdl, 179 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new) 180 { 181 tavor_rsrc_pool_info_t *rsrc_pool; 182 tavor_rsrc_t *mpt, *mtt, *rsrc; 183 tavor_umap_db_entry_t *umapdb; 184 tavor_hw_mpt_t mpt_entry; 185 tavor_mrhdl_t mr; 186 tavor_bind_info_t *bind; 187 ddi_umem_cookie_t umem_cookie; 188 size_t umem_len; 189 caddr_t umem_addr; 190 uint64_t mtt_addr, mtt_ddrbaseaddr, pgsize_msk; 191 uint_t sleep, mr_is_umem; 192 int status, umem_flags; 193 char *errormsg; 194 195 TAVOR_TNF_ENTER(tavor_mr_register_shared); 196 197 /* 198 * Check the sleep flag. Ensure that it is consistent with the 199 * current thread context (i.e. if we are currently in the interrupt 200 * context, then we shouldn't be attempting to sleep). 201 */ 202 sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP : 203 TAVOR_SLEEP; 204 if ((sleep == TAVOR_SLEEP) && 205 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 206 /* Set "status" and "errormsg" and goto failure */ 207 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 208 goto mrshared_fail; 209 } 210 211 /* Increment the reference count on the protection domain (PD) */ 212 tavor_pd_refcnt_inc(pd); 213 214 /* 215 * Allocate an MPT entry. This will be filled in with all the 216 * necessary parameters to define the shared memory region. 217 * Specifically, it will be made to reference the currently existing 218 * MTT entries and ownership of the MPT will be passed to the hardware 219 * in the last step below. If we fail here, we must undo the 220 * protection domain reference count. 221 */ 222 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 223 if (status != DDI_SUCCESS) { 224 /* Set "status" and "errormsg" and goto failure */ 225 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 226 goto mrshared_fail1; 227 } 228 229 /* 230 * Allocate the software structure for tracking the shared memory 231 * region (i.e. the Tavor Memory Region handle). If we fail here, we 232 * must undo the protection domain reference count and the previous 233 * resource allocation. 234 */ 235 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 236 if (status != DDI_SUCCESS) { 237 /* Set "status" and "errormsg" and goto failure */ 238 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 239 goto mrshared_fail2; 240 } 241 mr = (tavor_mrhdl_t)rsrc->tr_addr; 242 243 /* 244 * Setup and validate the memory region access flags. This means 245 * translating the IBTF's enable flags into the access flags that 246 * will be used in later operations. 247 */ 248 mr->mr_accflag = 0; 249 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND) 250 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 251 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE) 252 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 253 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ) 254 mr->mr_accflag |= IBT_MR_REMOTE_READ; 255 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE) 256 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 257 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 258 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 259 260 /* 261 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 262 * from a certain number of "constrained" bits (the least significant 263 * bits) and some number of "unconstrained" bits. The constrained 264 * bits must be set to the index of the entry in the MPT table, but 265 * the unconstrained bits can be set to any value we wish. Note: 266 * if no remote access is required, then the RKey value is not filled 267 * in. Otherwise both Rkey and LKey are given the same value. 268 */ 269 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 270 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 271 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 272 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 273 mr->mr_rkey = mr->mr_lkey; 274 } 275 276 /* Grab the MR lock for the current memory region */ 277 mutex_enter(&mrhdl->mr_lock); 278 279 /* 280 * Check here to see if the memory region has already been partially 281 * deregistered as a result of a tavor_umap_umemlock_cb() callback. 282 * If so, this is an error, return failure. 283 */ 284 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 285 mutex_exit(&mrhdl->mr_lock); 286 /* Set "status" and "errormsg" and goto failure */ 287 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl"); 288 goto mrshared_fail3; 289 } 290 291 /* 292 * Determine if the original memory was from userland and, if so, pin 293 * the pages (again) with umem_lockmemory(). This will guarantee a 294 * separate callback for each of this shared region's MR handles. 295 * If this is userland memory, then allocate an entry in the 296 * "userland resources database". This will later be added to 297 * the database (after all further memory registration operations are 298 * successful). If we fail here, we must undo all the above setup. 299 */ 300 mr_is_umem = mrhdl->mr_is_umem; 301 if (mr_is_umem) { 302 umem_len = ptob(btopr(mrhdl->mr_bindinfo.bi_len + 303 ((uintptr_t)mrhdl->mr_bindinfo.bi_addr & PAGEOFFSET))); 304 umem_addr = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr & 305 ~PAGEOFFSET); 306 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 307 DDI_UMEMLOCK_LONGTERM); 308 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 309 &umem_cookie, &tavor_umem_cbops, NULL); 310 if (status != 0) { 311 mutex_exit(&mrhdl->mr_lock); 312 /* Set "status" and "errormsg" and goto failure */ 313 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin"); 314 goto mrshared_fail3; 315 } 316 317 umapdb = tavor_umap_db_alloc(state->ts_instance, 318 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 319 (uint64_t)(uintptr_t)rsrc); 320 if (umapdb == NULL) { 321 mutex_exit(&mrhdl->mr_lock); 322 /* Set "status" and "errormsg" and goto failure */ 323 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add"); 324 goto mrshared_fail4; 325 } 326 } 327 328 /* 329 * Copy the MTT resource pointer (and additional parameters) from 330 * the original Tavor Memory Region handle. Note: this is normally 331 * where the tavor_mr_mem_bind() routine would be called, but because 332 * we already have bound and filled-in MTT entries it is simply a 333 * matter here of managing the MTT reference count and grabbing the 334 * address of the MTT table entries (for filling in the shared region's 335 * MPT entry). 336 */ 337 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp; 338 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz; 339 mr->mr_bindinfo = mrhdl->mr_bindinfo; 340 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp; 341 mutex_exit(&mrhdl->mr_lock); 342 bind = &mr->mr_bindinfo; 343 mtt = mr->mr_mttrsrcp; 344 345 /* 346 * Increment the MTT reference count (to reflect the fact that 347 * the MTT is now shared) 348 */ 349 (void) tavor_mtt_refcnt_inc(mr->mr_mttrefcntp); 350 351 /* 352 * Update the new "bind" virtual address. Do some extra work here 353 * to ensure proper alignment. That is, make sure that the page 354 * offset for the beginning of the old range is the same as the 355 * offset for this new mapping 356 */ 357 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1); 358 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) | 359 (mr->mr_bindinfo.bi_addr & pgsize_msk)); 360 361 /* 362 * Get the base address for the MTT table. This will be necessary 363 * in the next step when we are setting up the MPT entry. 364 */ 365 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 366 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 367 368 /* 369 * Fill in the MPT entry. This is the final step before passing 370 * ownership of the MPT entry to the Tavor hardware. We use all of 371 * the information collected/calculated above to fill in the 372 * requisite portions of the MPT. 373 */ 374 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 375 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 376 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 377 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 378 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 379 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 380 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 381 mpt_entry.lr = 1; 382 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 383 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 384 mpt_entry.mem_key = mr->mr_lkey; 385 mpt_entry.pd = pd->pd_pdnum; 386 mpt_entry.start_addr = bind->bi_addr; 387 mpt_entry.reg_win_len = bind->bi_len; 388 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 389 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 390 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 391 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 392 393 /* 394 * Write the MPT entry to hardware. Lastly, we pass ownership of 395 * the entry to the hardware. Note: in general, this operation 396 * shouldn't fail. But if it does, we have to undo everything we've 397 * done above before returning error. 398 */ 399 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 400 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 401 if (status != TAVOR_CMD_SUCCESS) { 402 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 403 status); 404 TNF_PROBE_1(tavor_mr_register_shared_sw2hw_mpt_cmd_fail, 405 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 406 /* Set "status" and "errormsg" and goto failure */ 407 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 408 "tavor SW2HW_MPT command"); 409 goto mrshared_fail5; 410 } 411 412 /* 413 * Fill in the rest of the Tavor Memory Region handle. Having 414 * successfully transferred ownership of the MPT, we can update the 415 * following fields for use in further operations on the MR. 416 */ 417 mr->mr_mptrsrcp = mpt; 418 mr->mr_mttrsrcp = mtt; 419 mr->mr_pdhdl = pd; 420 mr->mr_rsrcp = rsrc; 421 mr->mr_is_umem = mr_is_umem; 422 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 423 mr->mr_umem_cbfunc = NULL; 424 mr->mr_umem_cbarg1 = NULL; 425 mr->mr_umem_cbarg2 = NULL; 426 427 /* 428 * If this is userland memory, then we need to insert the previously 429 * allocated entry into the "userland resources database". This will 430 * allow for later coordination between the tavor_umap_umemlock_cb() 431 * callback and tavor_mr_deregister(). 432 */ 433 if (mr_is_umem) { 434 tavor_umap_db_add(umapdb); 435 } 436 437 *mrhdl_new = mr; 438 439 TAVOR_TNF_EXIT(tavor_mr_register_shared); 440 return (DDI_SUCCESS); 441 442 /* 443 * The following is cleanup for all possible failure cases in this routine 444 */ 445 mrshared_fail5: 446 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp); 447 if (mr_is_umem) { 448 tavor_umap_db_free(umapdb); 449 } 450 mrshared_fail4: 451 if (mr_is_umem) { 452 ddi_umem_unlock(umem_cookie); 453 } 454 mrshared_fail3: 455 tavor_rsrc_free(state, &rsrc); 456 mrshared_fail2: 457 tavor_rsrc_free(state, &mpt); 458 mrshared_fail1: 459 tavor_pd_refcnt_dec(pd); 460 mrshared_fail: 461 TNF_PROBE_1(tavor_mr_register_shared_fail, TAVOR_TNF_ERROR, "", 462 tnf_string, msg, errormsg); 463 TAVOR_TNF_EXIT(tavor_mr_register_shared); 464 return (status); 465 } 466 467 468 /* 469 * tavor_mr_deregister() 470 * Context: Can be called from interrupt or base context. 471 */ 472 /* ARGSUSED */ 473 int 474 tavor_mr_deregister(tavor_state_t *state, tavor_mrhdl_t *mrhdl, uint_t level, 475 uint_t sleep) 476 { 477 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 478 tavor_umap_db_entry_t *umapdb; 479 tavor_pdhdl_t pd; 480 tavor_mrhdl_t mr; 481 tavor_bind_info_t *bind; 482 uint64_t value; 483 int status, shared_mtt; 484 char *errormsg; 485 486 TAVOR_TNF_ENTER(tavor_mr_deregister); 487 488 /* 489 * Check the sleep flag. Ensure that it is consistent with the 490 * current thread context (i.e. if we are currently in the interrupt 491 * context, then we shouldn't be attempting to sleep). 492 */ 493 if ((sleep == TAVOR_SLEEP) && 494 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 495 /* Set "status" and "errormsg" and goto failure */ 496 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags"); 497 TNF_PROBE_1(tavor_mr_deregister_fail, TAVOR_TNF_ERROR, "", 498 tnf_string, msg, errormsg); 499 TAVOR_TNF_EXIT(tavor_mr_deregister); 500 return (status); 501 } 502 503 /* 504 * Pull all the necessary information from the Tavor Memory Region 505 * handle. This is necessary here because the resource for the 506 * MR handle is going to be freed up as part of the this 507 * deregistration 508 */ 509 mr = *mrhdl; 510 mutex_enter(&mr->mr_lock); 511 mpt = mr->mr_mptrsrcp; 512 mtt = mr->mr_mttrsrcp; 513 mtt_refcnt = mr->mr_mttrefcntp; 514 rsrc = mr->mr_rsrcp; 515 pd = mr->mr_pdhdl; 516 bind = &mr->mr_bindinfo; 517 518 /* 519 * Check here to see if the memory region has already been partially 520 * deregistered as a result of the tavor_umap_umemlock_cb() callback. 521 * If so, then jump to the end and free the remaining resources. 522 */ 523 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 524 goto mrdereg_finish_cleanup; 525 } 526 527 /* 528 * We must drop the "mr_lock" here to ensure that both SLEEP and 529 * NOSLEEP calls into the firmware work as expected. Also, if two 530 * threads are attemping to access this MR (via de-register, 531 * re-register, or otherwise), then we allow the firmware to enforce 532 * the checking, that only one deregister is valid. 533 */ 534 mutex_exit(&mr->mr_lock); 535 536 /* 537 * Reclaim MPT entry from hardware (if necessary). Since the 538 * tavor_mr_deregister() routine is used in the memory region 539 * reregistration process as well, it is possible that we will 540 * not always wish to reclaim ownership of the MPT. Check the 541 * "level" arg and, if necessary, attempt to reclaim it. If 542 * the ownership transfer fails for any reason, we check to see 543 * what command status was returned from the hardware. The only 544 * "expected" error status is the one that indicates an attempt to 545 * deregister a memory region that has memory windows bound to it 546 */ 547 if (level >= TAVOR_MR_DEREG_ALL) { 548 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, 549 NULL, 0, mpt->tr_indx, sleep); 550 if (status != TAVOR_CMD_SUCCESS) { 551 if (status == TAVOR_CMD_REG_BOUND) { 552 TAVOR_TNF_EXIT(tavor_mr_deregister); 553 return (IBT_MR_IN_USE); 554 } else { 555 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command " 556 "failed: %08x\n", status); 557 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, 558 TAVOR_TNF_ERROR, "", tnf_uint, status, 559 status); 560 TAVOR_TNF_EXIT(tavor_mr_deregister); 561 return (IBT_INVALID_PARAM); 562 } 563 } 564 } 565 566 /* 567 * Re-grab the mr_lock here. Since further access to the protected 568 * 'mr' structure is needed, and we would have returned previously for 569 * the multiple deregistration case, we can safely grab the lock here. 570 */ 571 mutex_enter(&mr->mr_lock); 572 573 /* 574 * If the memory had come from userland, then we do a lookup in the 575 * "userland resources database". On success, we free the entry, call 576 * ddi_umem_unlock(), and continue the cleanup. On failure (which is 577 * an indication that the umem_lockmemory() callback has called 578 * tavor_mr_deregister()), we call ddi_umem_unlock() and invalidate 579 * the "mr_umemcookie" field in the MR handle (this will be used 580 * later to detect that only partial cleaup still remains to be done 581 * on the MR handle). 582 */ 583 if (mr->mr_is_umem) { 584 status = tavor_umap_db_find(state->ts_instance, 585 (uint64_t)(uintptr_t)mr->mr_umemcookie, 586 MLNX_UMAP_MRMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE, 587 &umapdb); 588 if (status == DDI_SUCCESS) { 589 tavor_umap_db_free(umapdb); 590 ddi_umem_unlock(mr->mr_umemcookie); 591 } else { 592 ddi_umem_unlock(mr->mr_umemcookie); 593 mr->mr_umemcookie = NULL; 594 } 595 } 596 597 /* mtt_refcnt is NULL in the case of tavor_dma_mr_register() */ 598 if (mtt_refcnt != NULL) { 599 /* 600 * Decrement the MTT reference count. Since the MTT resource 601 * may be shared between multiple memory regions (as a result 602 * of a "RegisterSharedMR" verb) it is important that we not 603 * free up or unbind resources prematurely. If it's not shared 604 * (as indicated by the return status), then free the resource. 605 */ 606 shared_mtt = tavor_mtt_refcnt_dec(mtt_refcnt); 607 if (!shared_mtt) { 608 tavor_rsrc_free(state, &mtt_refcnt); 609 } 610 611 /* 612 * Free up the MTT entries and unbind the memory. Here, 613 * as above, we attempt to free these resources only if 614 * it is appropriate to do so. 615 */ 616 if (!shared_mtt) { 617 if (level >= TAVOR_MR_DEREG_NO_HW2SW_MPT) { 618 tavor_mr_mem_unbind(state, bind); 619 } 620 tavor_rsrc_free(state, &mtt); 621 } 622 } 623 624 /* 625 * If the MR handle has been invalidated, then drop the 626 * lock and return success. Note: This only happens because 627 * the umem_lockmemory() callback has been triggered. The 628 * cleanup here is partial, and further cleanup (in a 629 * subsequent tavor_mr_deregister() call) will be necessary. 630 */ 631 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 632 mutex_exit(&mr->mr_lock); 633 TAVOR_TNF_EXIT(tavor_mr_deregister); 634 return (DDI_SUCCESS); 635 } 636 637 mrdereg_finish_cleanup: 638 mutex_exit(&mr->mr_lock); 639 640 /* Free the Tavor Memory Region handle */ 641 tavor_rsrc_free(state, &rsrc); 642 643 /* Free up the MPT entry resource */ 644 tavor_rsrc_free(state, &mpt); 645 646 /* Decrement the reference count on the protection domain (PD) */ 647 tavor_pd_refcnt_dec(pd); 648 649 /* Set the mrhdl pointer to NULL and return success */ 650 *mrhdl = NULL; 651 652 TAVOR_TNF_EXIT(tavor_mr_deregister); 653 return (DDI_SUCCESS); 654 } 655 656 657 /* 658 * tavor_mr_query() 659 * Context: Can be called from interrupt or base context. 660 */ 661 /* ARGSUSED */ 662 int 663 tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mr, 664 ibt_mr_query_attr_t *attr) 665 { 666 TAVOR_TNF_ENTER(tavor_mr_query); 667 668 mutex_enter(&mr->mr_lock); 669 670 /* 671 * Check here to see if the memory region has already been partially 672 * deregistered as a result of a tavor_umap_umemlock_cb() callback. 673 * If so, this is an error, return failure. 674 */ 675 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) { 676 mutex_exit(&mr->mr_lock); 677 TNF_PROBE_0(tavor_mr_query_inv_mrhdl_fail, TAVOR_TNF_ERROR, ""); 678 TAVOR_TNF_EXIT(tavor_mr_query); 679 return (IBT_MR_HDL_INVALID); 680 } 681 682 /* Fill in the queried attributes */ 683 attr->mr_attr_flags = mr->mr_accflag; 684 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl; 685 686 /* Fill in the "local" attributes */ 687 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey; 688 attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 689 attr->mr_lbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 690 691 /* 692 * Fill in the "remote" attributes (if necessary). Note: the 693 * remote attributes are only valid if the memory region has one 694 * or more of the remote access flags set. 695 */ 696 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 697 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 698 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 699 attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey; 700 attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr; 701 attr->mr_rbounds.pb_len = (size_t)mr->mr_bindinfo.bi_len; 702 } 703 704 /* 705 * If region is mapped for streaming (i.e. noncoherent), then set sync 706 * is required 707 */ 708 attr->mr_sync_required = (mr->mr_bindinfo.bi_flags & 709 IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE; 710 711 mutex_exit(&mr->mr_lock); 712 TAVOR_TNF_EXIT(tavor_mr_query); 713 return (DDI_SUCCESS); 714 } 715 716 717 /* 718 * tavor_mr_reregister() 719 * Context: Can be called from interrupt or base context. 720 */ 721 int 722 tavor_mr_reregister(tavor_state_t *state, tavor_mrhdl_t mr, 723 tavor_pdhdl_t pd, ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new, 724 tavor_mr_options_t *op) 725 { 726 tavor_bind_info_t bind; 727 int status; 728 729 TAVOR_TNF_ENTER(tavor_mr_reregister); 730 731 /* 732 * Fill in the "bind" struct. This struct provides the majority 733 * of the information that will be used to distinguish between an 734 * "addr" binding (as is the case here) and a "buf" binding (see 735 * below). The "bind" struct is later passed to tavor_mr_mem_bind() 736 * which does most of the "heavy lifting" for the Tavor memory 737 * registration (and reregistration) routines. 738 */ 739 bind.bi_type = TAVOR_BINDHDL_VADDR; 740 bind.bi_addr = mr_attr->mr_vaddr; 741 bind.bi_len = mr_attr->mr_len; 742 bind.bi_as = mr_attr->mr_as; 743 bind.bi_flags = mr_attr->mr_flags; 744 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 745 if (status != DDI_SUCCESS) { 746 TNF_PROBE_0(tavor_mr_reregister_cmnreg_fail, 747 TAVOR_TNF_ERROR, ""); 748 TAVOR_TNF_EXIT(tavor_mr_reregister); 749 return (status); 750 } 751 752 TAVOR_TNF_EXIT(tavor_mr_reregister); 753 return (DDI_SUCCESS); 754 } 755 756 757 /* 758 * tavor_mr_reregister_buf() 759 * Context: Can be called from interrupt or base context. 760 */ 761 int 762 tavor_mr_reregister_buf(tavor_state_t *state, tavor_mrhdl_t mr, 763 tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf, 764 tavor_mrhdl_t *mrhdl_new, tavor_mr_options_t *op) 765 { 766 tavor_bind_info_t bind; 767 int status; 768 769 TAVOR_TNF_ENTER(tavor_mr_reregister_buf); 770 771 /* 772 * Fill in the "bind" struct. This struct provides the majority 773 * of the information that will be used to distinguish between an 774 * "addr" binding (see above) and a "buf" binding (as is the case 775 * here). The "bind" struct is later passed to tavor_mr_mem_bind() 776 * which does most of the "heavy lifting" for the Tavor memory 777 * registration routines. Note: We have chosen to provide 778 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is 779 * not set). It is not critical what value we choose here as it need 780 * only be unique for the given RKey (which will happen by default), 781 * so the choice here is somewhat arbitrary. 782 */ 783 bind.bi_type = TAVOR_BINDHDL_BUF; 784 bind.bi_buf = buf; 785 if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) { 786 bind.bi_addr = mr_attr->mr_vaddr; 787 } else { 788 bind.bi_addr = (uint64_t)(uintptr_t)buf->b_un.b_addr; 789 } 790 bind.bi_len = (uint64_t)buf->b_bcount; 791 bind.bi_flags = mr_attr->mr_flags; 792 bind.bi_as = NULL; 793 status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op); 794 if (status != DDI_SUCCESS) { 795 TNF_PROBE_0(tavor_mr_reregister_buf_cmnreg_fail, 796 TAVOR_TNF_ERROR, ""); 797 TAVOR_TNF_EXIT(tavor_mr_reregister_buf); 798 return (status); 799 } 800 801 TAVOR_TNF_EXIT(tavor_mr_reregister_buf); 802 return (DDI_SUCCESS); 803 } 804 805 806 /* 807 * tavor_mr_sync() 808 * Context: Can be called from interrupt or base context. 809 */ 810 /* ARGSUSED */ 811 int 812 tavor_mr_sync(tavor_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs) 813 { 814 tavor_mrhdl_t mrhdl; 815 uint64_t seg_vaddr, seg_len, seg_end; 816 uint64_t mr_start, mr_end; 817 uint_t type; 818 int status, i; 819 char *errormsg; 820 821 TAVOR_TNF_ENTER(tavor_mr_sync); 822 823 /* Process each of the ibt_mr_sync_t's */ 824 for (i = 0; i < num_segs; i++) { 825 mrhdl = (tavor_mrhdl_t)mr_segs[i].ms_handle; 826 827 /* Check for valid memory region handle */ 828 if (mrhdl == NULL) { 829 /* Set "status" and "errormsg" and goto failure */ 830 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl"); 831 goto mrsync_fail; 832 } 833 834 mutex_enter(&mrhdl->mr_lock); 835 836 /* 837 * Check here to see if the memory region has already been 838 * partially deregistered as a result of a 839 * tavor_umap_umemlock_cb() callback. If so, this is an 840 * error, return failure. 841 */ 842 if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) { 843 mutex_exit(&mrhdl->mr_lock); 844 /* Set "status" and "errormsg" and goto failure */ 845 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl2"); 846 goto mrsync_fail; 847 } 848 849 /* Check for valid bounds on sync request */ 850 seg_vaddr = mr_segs[i].ms_vaddr; 851 seg_len = mr_segs[i].ms_len; 852 seg_end = seg_vaddr + seg_len - 1; 853 mr_start = mrhdl->mr_bindinfo.bi_addr; 854 mr_end = mr_start + mrhdl->mr_bindinfo.bi_len - 1; 855 if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) { 856 mutex_exit(&mrhdl->mr_lock); 857 /* Set "status" and "errormsg" and goto failure */ 858 TAVOR_TNF_FAIL(IBT_MR_VA_INVALID, "invalid vaddr"); 859 goto mrsync_fail; 860 } 861 if ((seg_end < mr_start) || (seg_end > mr_end)) { 862 mutex_exit(&mrhdl->mr_lock); 863 /* Set "status" and "errormsg" and goto failure */ 864 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length"); 865 goto mrsync_fail; 866 } 867 868 /* Determine what type (i.e. direction) for sync */ 869 if (mr_segs[i].ms_flags & IBT_SYNC_READ) { 870 type = DDI_DMA_SYNC_FORDEV; 871 } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) { 872 type = DDI_DMA_SYNC_FORCPU; 873 } else { 874 mutex_exit(&mrhdl->mr_lock); 875 /* Set "status" and "errormsg" and goto failure */ 876 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sync type"); 877 goto mrsync_fail; 878 } 879 880 (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl, 881 (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type); 882 mutex_exit(&mrhdl->mr_lock); 883 } 884 885 TAVOR_TNF_EXIT(tavor_mr_sync); 886 return (DDI_SUCCESS); 887 888 mrsync_fail: 889 TNF_PROBE_1(tavor_mr_sync_fail, TAVOR_TNF_ERROR, "", tnf_string, msg, 890 errormsg); 891 TAVOR_TNF_EXIT(tavor_mr_sync); 892 return (status); 893 } 894 895 896 /* 897 * tavor_mw_alloc() 898 * Context: Can be called from interrupt or base context. 899 */ 900 int 901 tavor_mw_alloc(tavor_state_t *state, tavor_pdhdl_t pd, ibt_mw_flags_t flags, 902 tavor_mwhdl_t *mwhdl) 903 { 904 tavor_rsrc_t *mpt, *rsrc; 905 tavor_hw_mpt_t mpt_entry; 906 tavor_mwhdl_t mw; 907 uint_t sleep; 908 int status; 909 char *errormsg; 910 911 TAVOR_TNF_ENTER(tavor_mw_alloc); 912 913 /* 914 * Check the sleep flag. Ensure that it is consistent with the 915 * current thread context (i.e. if we are currently in the interrupt 916 * context, then we shouldn't be attempting to sleep). 917 */ 918 sleep = (flags & IBT_MW_NOSLEEP) ? TAVOR_NOSLEEP : TAVOR_SLEEP; 919 if ((sleep == TAVOR_SLEEP) && 920 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 921 /* Set "status" and "errormsg" and goto failure */ 922 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 923 goto mwalloc_fail; 924 } 925 926 /* Increment the reference count on the protection domain (PD) */ 927 tavor_pd_refcnt_inc(pd); 928 929 /* 930 * Allocate an MPT entry (for use as a memory window). Since the 931 * Tavor hardware uses the MPT entry for memory regions and for 932 * memory windows, we will fill in this MPT with all the necessary 933 * parameters for the memory window. And then (just as we do for 934 * memory regions) ownership will be passed to the hardware in the 935 * final step below. If we fail here, we must undo the protection 936 * domain reference count. 937 */ 938 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 939 if (status != DDI_SUCCESS) { 940 /* Set "status" and "errormsg" and goto failure */ 941 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 942 goto mwalloc_fail1; 943 } 944 945 /* 946 * Allocate the software structure for tracking the memory window (i.e. 947 * the Tavor Memory Window handle). Note: This is actually the same 948 * software structure used for tracking memory regions, but since many 949 * of the same properties are needed, only a single structure is 950 * necessary. If we fail here, we must undo the protection domain 951 * reference count and the previous resource allocation. 952 */ 953 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 954 if (status != DDI_SUCCESS) { 955 /* Set "status" and "errormsg" and goto failure */ 956 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 957 goto mwalloc_fail2; 958 } 959 mw = (tavor_mwhdl_t)rsrc->tr_addr; 960 961 /* 962 * Calculate an "unbound" RKey from MPT index. In much the same way 963 * as we do for memory regions (above), this key is constructed from 964 * a "constrained" (which depends on the MPT index) and an 965 * "unconstrained" portion (which may be arbitrarily chosen). 966 */ 967 tavor_mr_keycalc(state, mpt->tr_indx, &mw->mr_rkey); 968 969 /* 970 * Fill in the MPT entry. This is the final step before passing 971 * ownership of the MPT entry to the Tavor hardware. We use all of 972 * the information collected/calculated above to fill in the 973 * requisite portions of the MPT. Note: fewer entries in the MPT 974 * entry are necessary to allocate a memory window. 975 */ 976 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 977 mpt_entry.reg_win = TAVOR_MPT_IS_WINDOW; 978 mpt_entry.mem_key = mw->mr_rkey; 979 mpt_entry.pd = pd->pd_pdnum; 980 981 /* 982 * Write the MPT entry to hardware. Lastly, we pass ownership of 983 * the entry to the hardware. Note: in general, this operation 984 * shouldn't fail. But if it does, we have to undo everything we've 985 * done above before returning error. 986 */ 987 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 988 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 989 if (status != TAVOR_CMD_SUCCESS) { 990 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 991 status); 992 TNF_PROBE_1(tavor_mw_alloc_sw2hw_mpt_cmd_fail, 993 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 994 /* Set "status" and "errormsg" and goto failure */ 995 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 996 "tavor SW2HW_MPT command"); 997 goto mwalloc_fail3; 998 } 999 1000 /* 1001 * Fill in the rest of the Tavor Memory Window handle. Having 1002 * successfully transferred ownership of the MPT, we can update the 1003 * following fields for use in further operations on the MW. 1004 */ 1005 mw->mr_mptrsrcp = mpt; 1006 mw->mr_pdhdl = pd; 1007 mw->mr_rsrcp = rsrc; 1008 *mwhdl = mw; 1009 1010 TAVOR_TNF_EXIT(tavor_mw_alloc); 1011 return (DDI_SUCCESS); 1012 1013 mwalloc_fail3: 1014 tavor_rsrc_free(state, &rsrc); 1015 mwalloc_fail2: 1016 tavor_rsrc_free(state, &mpt); 1017 mwalloc_fail1: 1018 tavor_pd_refcnt_dec(pd); 1019 mwalloc_fail: 1020 TNF_PROBE_1(tavor_mw_alloc_fail, TAVOR_TNF_ERROR, "", 1021 tnf_string, msg, errormsg); 1022 TAVOR_TNF_EXIT(tavor_mw_alloc); 1023 return (status); 1024 } 1025 1026 1027 /* 1028 * tavor_mw_free() 1029 * Context: Can be called from interrupt or base context. 1030 */ 1031 int 1032 tavor_mw_free(tavor_state_t *state, tavor_mwhdl_t *mwhdl, uint_t sleep) 1033 { 1034 tavor_rsrc_t *mpt, *rsrc; 1035 tavor_mwhdl_t mw; 1036 int status; 1037 char *errormsg; 1038 tavor_pdhdl_t pd; 1039 1040 TAVOR_TNF_ENTER(tavor_mw_free); 1041 1042 /* 1043 * Check the sleep flag. Ensure that it is consistent with the 1044 * current thread context (i.e. if we are currently in the interrupt 1045 * context, then we shouldn't be attempting to sleep). 1046 */ 1047 if ((sleep == TAVOR_SLEEP) && 1048 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1049 /* Set "status" and "errormsg" and goto failure */ 1050 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags"); 1051 TNF_PROBE_1(tavor_mw_free_fail, TAVOR_TNF_ERROR, "", 1052 tnf_string, msg, errormsg); 1053 TAVOR_TNF_EXIT(tavor_mw_free); 1054 return (status); 1055 } 1056 1057 /* 1058 * Pull all the necessary information from the Tavor Memory Window 1059 * handle. This is necessary here because the resource for the 1060 * MW handle is going to be freed up as part of the this operation. 1061 */ 1062 mw = *mwhdl; 1063 mutex_enter(&mw->mr_lock); 1064 mpt = mw->mr_mptrsrcp; 1065 rsrc = mw->mr_rsrcp; 1066 pd = mw->mr_pdhdl; 1067 mutex_exit(&mw->mr_lock); 1068 1069 /* 1070 * Reclaim the MPT entry from hardware. Note: in general, it is 1071 * unexpected for this operation to return an error. 1072 */ 1073 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL, 1074 0, mpt->tr_indx, sleep); 1075 if (status != TAVOR_CMD_SUCCESS) { 1076 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: %08x\n", 1077 status); 1078 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, TAVOR_TNF_ERROR, "", 1079 tnf_uint, status, status); 1080 TAVOR_TNF_EXIT(tavor_mw_free); 1081 return (IBT_INVALID_PARAM); 1082 } 1083 1084 /* Free the Tavor Memory Window handle */ 1085 tavor_rsrc_free(state, &rsrc); 1086 1087 /* Free up the MPT entry resource */ 1088 tavor_rsrc_free(state, &mpt); 1089 1090 /* Decrement the reference count on the protection domain (PD) */ 1091 tavor_pd_refcnt_dec(pd); 1092 1093 /* Set the mwhdl pointer to NULL and return success */ 1094 *mwhdl = NULL; 1095 1096 TAVOR_TNF_EXIT(tavor_mw_free); 1097 return (DDI_SUCCESS); 1098 } 1099 1100 1101 /* 1102 * tavor_mr_keycalc() 1103 * Context: Can be called from interrupt or base context. 1104 */ 1105 void 1106 tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key) 1107 { 1108 uint32_t tmp, log_num_mpt; 1109 1110 /* 1111 * Generate a simple key from counter. Note: We increment this 1112 * static variable _intentionally_ without any kind of mutex around 1113 * it. First, single-threading all operations through a single lock 1114 * would be a bad idea (from a performance point-of-view). Second, 1115 * the upper "unconstrained" bits don't really have to be unique 1116 * because the lower bits are guaranteed to be (although we do make a 1117 * best effort to ensure that they are). Third, the window for the 1118 * race (where both threads read and update the counter at the same 1119 * time) is incredibly small. 1120 * And, lastly, we'd like to make this into a "random" key XXX 1121 */ 1122 log_num_mpt = state->ts_cfg_profile->cp_log_num_mpt; 1123 tmp = (tavor_debug_memkey_cnt++) << log_num_mpt; 1124 *key = tmp | indx; 1125 } 1126 1127 1128 /* 1129 * tavor_mr_common_reg() 1130 * Context: Can be called from interrupt or base context. 1131 */ 1132 static int 1133 tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd, 1134 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op) 1135 { 1136 tavor_rsrc_pool_info_t *rsrc_pool; 1137 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt; 1138 tavor_umap_db_entry_t *umapdb; 1139 tavor_sw_refcnt_t *swrc_tmp; 1140 tavor_hw_mpt_t mpt_entry; 1141 tavor_mrhdl_t mr; 1142 ibt_mr_flags_t flags; 1143 tavor_bind_info_t *bh; 1144 ddi_dma_handle_t bind_dmahdl; 1145 ddi_umem_cookie_t umem_cookie; 1146 size_t umem_len; 1147 caddr_t umem_addr; 1148 uint64_t mtt_addr, mtt_ddrbaseaddr, max_sz; 1149 uint_t sleep, mtt_pgsize_bits, bind_type, mr_is_umem; 1150 int status, umem_flags, bind_override_addr; 1151 char *errormsg; 1152 1153 TAVOR_TNF_ENTER(tavor_mr_common_reg); 1154 1155 /* 1156 * Check the "options" flag. Currently this flag tells the driver 1157 * whether or not the region should be bound normally (i.e. with 1158 * entries written into the PCI IOMMU), whether it should be 1159 * registered to bypass the IOMMU, and whether or not the resulting 1160 * address should be "zero-based" (to aid the alignment restrictions 1161 * for QPs). 1162 */ 1163 if (op == NULL) { 1164 bind_type = TAVOR_BINDMEM_NORMAL; 1165 bind_dmahdl = NULL; 1166 bind_override_addr = 0; 1167 } else { 1168 bind_type = op->mro_bind_type; 1169 bind_dmahdl = op->mro_bind_dmahdl; 1170 bind_override_addr = op->mro_bind_override_addr; 1171 } 1172 1173 /* Extract the flags field from the tavor_bind_info_t */ 1174 flags = bind->bi_flags; 1175 1176 /* 1177 * Check for invalid length. Check is the length is zero or if the 1178 * length is larger than the maximum configured value. Return error 1179 * if it is. 1180 */ 1181 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz); 1182 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 1183 /* Set "status" and "errormsg" and goto failure */ 1184 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length"); 1185 goto mrcommon_fail; 1186 } 1187 1188 /* 1189 * Check the sleep flag. Ensure that it is consistent with the 1190 * current thread context (i.e. if we are currently in the interrupt 1191 * context, then we shouldn't be attempting to sleep). 1192 */ 1193 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1194 if ((sleep == TAVOR_SLEEP) && 1195 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1196 /* Set "status" and "errormsg" and goto failure */ 1197 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 1198 goto mrcommon_fail; 1199 } 1200 1201 /* 1202 * Get the base address for the MTT table. This will be necessary 1203 * below when we are setting up the MPT entry. 1204 */ 1205 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 1206 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 1207 1208 /* Increment the reference count on the protection domain (PD) */ 1209 tavor_pd_refcnt_inc(pd); 1210 1211 /* 1212 * Allocate an MPT entry. This will be filled in with all the 1213 * necessary parameters to define the memory region. And then 1214 * ownership will be passed to the hardware in the final step 1215 * below. If we fail here, we must undo the protection domain 1216 * reference count. 1217 */ 1218 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 1219 if (status != DDI_SUCCESS) { 1220 /* Set "status" and "errormsg" and goto failure */ 1221 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT"); 1222 goto mrcommon_fail1; 1223 } 1224 1225 /* 1226 * Allocate the software structure for tracking the memory region (i.e. 1227 * the Tavor Memory Region handle). If we fail here, we must undo 1228 * the protection domain reference count and the previous resource 1229 * allocation. 1230 */ 1231 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 1232 if (status != DDI_SUCCESS) { 1233 /* Set "status" and "errormsg" and goto failure */ 1234 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle"); 1235 goto mrcommon_fail2; 1236 } 1237 mr = (tavor_mrhdl_t)rsrc->tr_addr; 1238 1239 /* 1240 * Setup and validate the memory region access flags. This means 1241 * translating the IBTF's enable flags into the access flags that 1242 * will be used in later operations. 1243 */ 1244 mr->mr_accflag = 0; 1245 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1246 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 1247 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1248 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 1249 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1250 mr->mr_accflag |= IBT_MR_REMOTE_READ; 1251 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1252 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 1253 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1254 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 1255 1256 /* 1257 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 1258 * from a certain number of "constrained" bits (the least significant 1259 * bits) and some number of "unconstrained" bits. The constrained 1260 * bits must be set to the index of the entry in the MPT table, but 1261 * the unconstrained bits can be set to any value we wish. Note: 1262 * if no remote access is required, then the RKey value is not filled 1263 * in. Otherwise both Rkey and LKey are given the same value. 1264 */ 1265 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 1266 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1267 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1268 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1269 mr->mr_rkey = mr->mr_lkey; 1270 } 1271 1272 /* 1273 * Determine if the memory is from userland and pin the pages 1274 * with umem_lockmemory() if necessary. 1275 * Then, if this is userland memory, allocate an entry in the 1276 * "userland resources database". This will later be added to 1277 * the database (after all further memory registration operations are 1278 * successful). If we fail here, we must undo the reference counts 1279 * and the previous resource allocations. 1280 */ 1281 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0); 1282 if (mr_is_umem) { 1283 umem_len = ptob(btopr(bind->bi_len + 1284 ((uintptr_t)bind->bi_addr & PAGEOFFSET))); 1285 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET); 1286 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 1287 DDI_UMEMLOCK_LONGTERM); 1288 status = umem_lockmemory(umem_addr, umem_len, umem_flags, 1289 &umem_cookie, &tavor_umem_cbops, NULL); 1290 if (status != 0) { 1291 /* Set "status" and "errormsg" and goto failure */ 1292 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin"); 1293 goto mrcommon_fail3; 1294 } 1295 1296 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len, 1297 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 1298 if (bind->bi_buf == NULL) { 1299 /* Set "status" and "errormsg" and goto failure */ 1300 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed iosetup"); 1301 goto mrcommon_fail3; 1302 } 1303 bind->bi_type = TAVOR_BINDHDL_UBUF; 1304 bind->bi_buf->b_flags |= B_READ; 1305 1306 umapdb = tavor_umap_db_alloc(state->ts_instance, 1307 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC, 1308 (uint64_t)(uintptr_t)rsrc); 1309 if (umapdb == NULL) { 1310 /* Set "status" and "errormsg" and goto failure */ 1311 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add"); 1312 goto mrcommon_fail4; 1313 } 1314 } 1315 1316 /* 1317 * Setup the bindinfo for the mtt bind call 1318 */ 1319 bh = &mr->mr_bindinfo; 1320 bcopy(bind, bh, sizeof (tavor_bind_info_t)); 1321 bh->bi_bypass = bind_type; 1322 status = tavor_mr_mtt_bind(state, bh, bind_dmahdl, &mtt, 1323 &mtt_pgsize_bits); 1324 if (status != DDI_SUCCESS) { 1325 /* Set "status" and "errormsg" and goto failure */ 1326 TAVOR_TNF_FAIL(status, "failed mtt bind"); 1327 /* 1328 * When mtt_bind fails, freerbuf has already been done, 1329 * so make sure not to call it again. 1330 */ 1331 bind->bi_type = bh->bi_type; 1332 goto mrcommon_fail5; 1333 } 1334 mr->mr_logmttpgsz = mtt_pgsize_bits; 1335 1336 /* 1337 * Allocate MTT reference count (to track shared memory regions). 1338 * This reference count resource may never be used on the given 1339 * memory region, but if it is ever later registered as "shared" 1340 * memory region then this resource will be necessary. If we fail 1341 * here, we do pretty much the same as above to clean up. 1342 */ 1343 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, sleep, 1344 &mtt_refcnt); 1345 if (status != DDI_SUCCESS) { 1346 /* Set "status" and "errormsg" and goto failure */ 1347 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed refence count"); 1348 goto mrcommon_fail6; 1349 } 1350 mr->mr_mttrefcntp = mtt_refcnt; 1351 swrc_tmp = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr; 1352 TAVOR_MTT_REFCNT_INIT(swrc_tmp); 1353 1354 /* 1355 * Fill in the MPT entry. This is the final step before passing 1356 * ownership of the MPT entry to the Tavor hardware. We use all of 1357 * the information collected/calculated above to fill in the 1358 * requisite portions of the MPT. 1359 */ 1360 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 1361 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 1362 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 1363 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 1364 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 1365 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 1366 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 1367 mpt_entry.lr = 1; 1368 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 1369 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 1370 mpt_entry.mem_key = mr->mr_lkey; 1371 mpt_entry.pd = pd->pd_pdnum; 1372 if (bind_override_addr == 0) { 1373 mpt_entry.start_addr = bh->bi_addr; 1374 } else { 1375 bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1); 1376 mpt_entry.start_addr = bh->bi_addr; 1377 } 1378 mpt_entry.reg_win_len = bh->bi_len; 1379 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 1380 mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT); 1381 mpt_entry.mttseg_addr_h = mtt_addr >> 32; 1382 mpt_entry.mttseg_addr_l = mtt_addr >> 6; 1383 1384 /* 1385 * Write the MPT entry to hardware. Lastly, we pass ownership of 1386 * the entry to the hardware. Note: in general, this operation 1387 * shouldn't fail. But if it does, we have to undo everything we've 1388 * done above before returning error. 1389 */ 1390 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1391 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 1392 if (status != TAVOR_CMD_SUCCESS) { 1393 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 1394 status); 1395 TNF_PROBE_1(tavor_mr_common_reg_sw2hw_mpt_cmd_fail, 1396 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 1397 /* Set "status" and "errormsg" and goto failure */ 1398 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 1399 "tavor SW2HW_MPT command"); 1400 goto mrcommon_fail7; 1401 } 1402 1403 /* 1404 * Fill in the rest of the Tavor Memory Region handle. Having 1405 * successfully transferred ownership of the MPT, we can update the 1406 * following fields for use in further operations on the MR. 1407 */ 1408 mr->mr_mptrsrcp = mpt; 1409 mr->mr_mttrsrcp = mtt; 1410 mr->mr_pdhdl = pd; 1411 mr->mr_rsrcp = rsrc; 1412 mr->mr_is_umem = mr_is_umem; 1413 mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL; 1414 mr->mr_umem_cbfunc = NULL; 1415 mr->mr_umem_cbarg1 = NULL; 1416 mr->mr_umem_cbarg2 = NULL; 1417 1418 /* 1419 * If this is userland memory, then we need to insert the previously 1420 * allocated entry into the "userland resources database". This will 1421 * allow for later coordination between the tavor_umap_umemlock_cb() 1422 * callback and tavor_mr_deregister(). 1423 */ 1424 if (mr_is_umem) { 1425 tavor_umap_db_add(umapdb); 1426 } 1427 1428 *mrhdl = mr; 1429 1430 TAVOR_TNF_EXIT(tavor_mr_common_reg); 1431 return (DDI_SUCCESS); 1432 1433 /* 1434 * The following is cleanup for all possible failure cases in this routine 1435 */ 1436 mrcommon_fail7: 1437 tavor_rsrc_free(state, &mtt_refcnt); 1438 mrcommon_fail6: 1439 tavor_rsrc_free(state, &mtt); 1440 tavor_mr_mem_unbind(state, bh); 1441 bind->bi_type = bh->bi_type; 1442 mrcommon_fail5: 1443 if (mr_is_umem) { 1444 tavor_umap_db_free(umapdb); 1445 } 1446 mrcommon_fail4: 1447 if (mr_is_umem) { 1448 /* 1449 * Free up the memory ddi_umem_iosetup() allocates 1450 * internally. 1451 */ 1452 if (bind->bi_type == TAVOR_BINDHDL_UBUF) { 1453 freerbuf(bind->bi_buf); 1454 bind->bi_type = TAVOR_BINDHDL_NONE; 1455 } 1456 ddi_umem_unlock(umem_cookie); 1457 } 1458 mrcommon_fail3: 1459 tavor_rsrc_free(state, &rsrc); 1460 mrcommon_fail2: 1461 tavor_rsrc_free(state, &mpt); 1462 mrcommon_fail1: 1463 tavor_pd_refcnt_dec(pd); 1464 mrcommon_fail: 1465 TNF_PROBE_1(tavor_mr_common_reg_fail, TAVOR_TNF_ERROR, "", 1466 tnf_string, msg, errormsg); 1467 TAVOR_TNF_EXIT(tavor_mr_common_reg); 1468 return (status); 1469 } 1470 1471 int 1472 tavor_dma_mr_register(tavor_state_t *state, tavor_pdhdl_t pd, 1473 ibt_dmr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl) 1474 { 1475 tavor_rsrc_t *mpt, *rsrc; 1476 tavor_hw_mpt_t mpt_entry; 1477 tavor_mrhdl_t mr; 1478 ibt_mr_flags_t flags; 1479 uint_t sleep; 1480 int status; 1481 1482 /* Extract the flags field */ 1483 flags = mr_attr->dmr_flags; 1484 1485 /* 1486 * Check the sleep flag. Ensure that it is consistent with the 1487 * current thread context (i.e. if we are currently in the interrupt 1488 * context, then we shouldn't be attempting to sleep). 1489 */ 1490 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1491 if ((sleep == TAVOR_SLEEP) && 1492 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1493 status = IBT_INVALID_PARAM; 1494 goto mrcommon_fail; 1495 } 1496 1497 /* Increment the reference count on the protection domain (PD) */ 1498 tavor_pd_refcnt_inc(pd); 1499 1500 /* 1501 * Allocate an MPT entry. This will be filled in with all the 1502 * necessary parameters to define the memory region. And then 1503 * ownership will be passed to the hardware in the final step 1504 * below. If we fail here, we must undo the protection domain 1505 * reference count. 1506 */ 1507 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt); 1508 if (status != DDI_SUCCESS) { 1509 status = IBT_INSUFF_RESOURCE; 1510 goto mrcommon_fail1; 1511 } 1512 1513 /* 1514 * Allocate the software structure for tracking the memory region (i.e. 1515 * the Tavor Memory Region handle). If we fail here, we must undo 1516 * the protection domain reference count and the previous resource 1517 * allocation. 1518 */ 1519 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc); 1520 if (status != DDI_SUCCESS) { 1521 status = IBT_INSUFF_RESOURCE; 1522 goto mrcommon_fail2; 1523 } 1524 mr = (tavor_mrhdl_t)rsrc->tr_addr; 1525 bzero(mr, sizeof (*mr)); 1526 1527 /* 1528 * Setup and validate the memory region access flags. This means 1529 * translating the IBTF's enable flags into the access flags that 1530 * will be used in later operations. 1531 */ 1532 mr->mr_accflag = 0; 1533 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1534 mr->mr_accflag |= IBT_MR_WINDOW_BIND; 1535 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1536 mr->mr_accflag |= IBT_MR_LOCAL_WRITE; 1537 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1538 mr->mr_accflag |= IBT_MR_REMOTE_READ; 1539 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1540 mr->mr_accflag |= IBT_MR_REMOTE_WRITE; 1541 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1542 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC; 1543 1544 /* 1545 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed 1546 * from a certain number of "constrained" bits (the least significant 1547 * bits) and some number of "unconstrained" bits. The constrained 1548 * bits must be set to the index of the entry in the MPT table, but 1549 * the unconstrained bits can be set to any value we wish. Note: 1550 * if no remote access is required, then the RKey value is not filled 1551 * in. Otherwise both Rkey and LKey are given the same value. 1552 */ 1553 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 1554 if ((mr->mr_accflag & IBT_MR_REMOTE_READ) || 1555 (mr->mr_accflag & IBT_MR_REMOTE_WRITE) || 1556 (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) { 1557 mr->mr_rkey = mr->mr_lkey; 1558 } 1559 1560 /* 1561 * Fill in the MPT entry. This is the final step before passing 1562 * ownership of the MPT entry to the Tavor hardware. We use all of 1563 * the information collected/calculated above to fill in the 1564 * requisite portions of the MPT. 1565 */ 1566 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t)); 1567 1568 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE; 1569 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0; 1570 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 1571 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0; 1572 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0; 1573 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0; 1574 mpt_entry.lr = 1; 1575 mpt_entry.phys_addr = 1; /* critical bit for this */ 1576 mpt_entry.reg_win = TAVOR_MPT_IS_REGION; 1577 1578 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 1579 mpt_entry.mem_key = mr->mr_lkey; 1580 mpt_entry.pd = pd->pd_pdnum; 1581 mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND; 1582 1583 mpt_entry.start_addr = mr_attr->dmr_paddr; 1584 mpt_entry.reg_win_len = mr_attr->dmr_len; 1585 1586 mpt_entry.mttseg_addr_h = 0; 1587 mpt_entry.mttseg_addr_l = 0; 1588 1589 /* 1590 * Write the MPT entry to hardware. Lastly, we pass ownership of 1591 * the entry to the hardware if needed. Note: in general, this 1592 * operation shouldn't fail. But if it does, we have to undo 1593 * everything we've done above before returning error. 1594 * 1595 * For Tavor, this routine (which is common to the contexts) will only 1596 * set the ownership if needed - the process of passing the context 1597 * itself to HW will take care of setting up the MPT (based on type 1598 * and index). 1599 */ 1600 1601 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 1602 sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep); 1603 if (status != TAVOR_CMD_SUCCESS) { 1604 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 1605 status); 1606 status = ibc_get_ci_failure(0); 1607 goto mrcommon_fail7; 1608 } 1609 1610 /* 1611 * Fill in the rest of the Tavor Memory Region handle. Having 1612 * successfully transferred ownership of the MPT, we can update the 1613 * following fields for use in further operations on the MR. 1614 */ 1615 mr->mr_mptrsrcp = mpt; 1616 mr->mr_mttrsrcp = NULL; 1617 mr->mr_pdhdl = pd; 1618 mr->mr_rsrcp = rsrc; 1619 mr->mr_is_umem = 0; 1620 mr->mr_umemcookie = NULL; 1621 mr->mr_umem_cbfunc = NULL; 1622 mr->mr_umem_cbarg1 = NULL; 1623 mr->mr_umem_cbarg2 = NULL; 1624 1625 *mrhdl = mr; 1626 1627 return (DDI_SUCCESS); 1628 1629 /* 1630 * The following is cleanup for all possible failure cases in this routine 1631 */ 1632 mrcommon_fail7: 1633 tavor_rsrc_free(state, &rsrc); 1634 mrcommon_fail2: 1635 tavor_rsrc_free(state, &mpt); 1636 mrcommon_fail1: 1637 tavor_pd_refcnt_dec(pd); 1638 mrcommon_fail: 1639 return (status); 1640 } 1641 1642 /* 1643 * tavor_mr_mtt_bind() 1644 * Context: Can be called from interrupt or base context. 1645 */ 1646 int 1647 tavor_mr_mtt_bind(tavor_state_t *state, tavor_bind_info_t *bind, 1648 ddi_dma_handle_t bind_dmahdl, tavor_rsrc_t **mtt, uint_t *mtt_pgsize_bits) 1649 { 1650 uint64_t nummtt; 1651 uint_t sleep; 1652 int status; 1653 char *errormsg; 1654 1655 TAVOR_TNF_ENTER(tavor_mr_common_reg); 1656 1657 /* 1658 * Check the sleep flag. Ensure that it is consistent with the 1659 * current thread context (i.e. if we are currently in the interrupt 1660 * context, then we shouldn't be attempting to sleep). 1661 */ 1662 sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1663 if ((sleep == TAVOR_SLEEP) && 1664 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1665 /* Set "status" and "errormsg" and goto failure */ 1666 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 1667 goto mrmttbind_fail; 1668 } 1669 1670 /* 1671 * Bind the memory and determine the mapped addresses. This is 1672 * the first of two routines that do all the "heavy lifting" for 1673 * the Tavor memory registration routines. The tavor_mr_mem_bind() 1674 * routine takes the "bind" struct with all its fields filled 1675 * in and returns a list of DMA cookies (for the PCI mapped addresses 1676 * corresponding to the specified address region) which are used by 1677 * the tavor_mr_fast_mtt_write() routine below. If we fail here, we 1678 * must undo all the previous resource allocation (and PD reference 1679 * count). 1680 */ 1681 status = tavor_mr_mem_bind(state, bind, bind_dmahdl, sleep); 1682 if (status != DDI_SUCCESS) { 1683 /* Set "status" and "errormsg" and goto failure */ 1684 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind"); 1685 goto mrmttbind_fail; 1686 } 1687 1688 /* 1689 * Determine number of pages spanned. This routine uses the 1690 * information in the "bind" struct to determine the required 1691 * number of MTT entries needed (and returns the suggested page size - 1692 * as a "power-of-2" - for each MTT entry). 1693 */ 1694 nummtt = tavor_mr_nummtt_needed(state, bind, mtt_pgsize_bits); 1695 1696 /* 1697 * Allocate the MTT entries. Use the calculations performed above to 1698 * allocate the required number of MTT entries. Note: MTT entries are 1699 * allocated in "MTT segments" which consist of complete cachelines 1700 * (i.e. 8 entries, 16 entries, etc.) So the TAVOR_NUMMTT_TO_MTTSEG() 1701 * macro is used to do the proper conversion. If we fail here, we 1702 * must not only undo all the previous resource allocation (and PD 1703 * reference count), but we must also unbind the memory. 1704 */ 1705 status = tavor_rsrc_alloc(state, TAVOR_MTT, 1706 TAVOR_NUMMTT_TO_MTTSEG(nummtt), sleep, mtt); 1707 if (status != DDI_SUCCESS) { 1708 /* Set "status" and "errormsg" and goto failure */ 1709 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT"); 1710 goto mrmttbind_fail2; 1711 } 1712 1713 /* 1714 * Write the mapped addresses into the MTT entries. This is part two 1715 * of the "heavy lifting" routines that we talked about above. Note: 1716 * we pass the suggested page size from the earlier operation here. 1717 * And if we fail here, we again do pretty much the same huge clean up. 1718 */ 1719 status = tavor_mr_fast_mtt_write(*mtt, bind, *mtt_pgsize_bits); 1720 if (status != DDI_SUCCESS) { 1721 /* Set "status" and "errormsg" and goto failure */ 1722 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), "failed write mtt"); 1723 goto mrmttbind_fail3; 1724 } 1725 TAVOR_TNF_EXIT(tavor_mr_mtt_bind); 1726 return (DDI_SUCCESS); 1727 1728 /* 1729 * The following is cleanup for all possible failure cases in this routine 1730 */ 1731 mrmttbind_fail3: 1732 tavor_rsrc_free(state, mtt); 1733 mrmttbind_fail2: 1734 tavor_mr_mem_unbind(state, bind); 1735 mrmttbind_fail: 1736 TNF_PROBE_1(tavor_mr_mtt_bind_fail, TAVOR_TNF_ERROR, "", 1737 tnf_string, msg, errormsg); 1738 TAVOR_TNF_EXIT(tavor_mr_mtt_bind); 1739 return (status); 1740 } 1741 1742 1743 /* 1744 * tavor_mr_mtt_unbind() 1745 * Context: Can be called from interrupt or base context. 1746 */ 1747 int 1748 tavor_mr_mtt_unbind(tavor_state_t *state, tavor_bind_info_t *bind, 1749 tavor_rsrc_t *mtt) 1750 { 1751 TAVOR_TNF_ENTER(tavor_mr_mtt_unbind); 1752 1753 /* 1754 * Free up the MTT entries and unbind the memory. Here, as above, we 1755 * attempt to free these resources only if it is appropriate to do so. 1756 */ 1757 tavor_mr_mem_unbind(state, bind); 1758 tavor_rsrc_free(state, &mtt); 1759 1760 TAVOR_TNF_EXIT(tavor_mr_mtt_unbind); 1761 return (DDI_SUCCESS); 1762 } 1763 1764 1765 /* 1766 * tavor_mr_common_rereg() 1767 * Context: Can be called from interrupt or base context. 1768 */ 1769 static int 1770 tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr, 1771 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new, 1772 tavor_mr_options_t *op) 1773 { 1774 tavor_rsrc_t *mpt; 1775 ibt_mr_attr_flags_t acc_flags_to_use; 1776 ibt_mr_flags_t flags; 1777 tavor_pdhdl_t pd_to_use; 1778 tavor_hw_mpt_t mpt_entry; 1779 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use; 1780 uint_t sleep, dereg_level; 1781 int status; 1782 char *errormsg; 1783 1784 TAVOR_TNF_ENTER(tavor_mr_common_rereg); 1785 1786 /* 1787 * Check here to see if the memory region corresponds to a userland 1788 * mapping. Reregistration of userland memory regions is not 1789 * currently supported. Return failure. XXX 1790 */ 1791 if (mr->mr_is_umem) { 1792 /* Set "status" and "errormsg" and goto failure */ 1793 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl"); 1794 goto mrrereg_fail; 1795 } 1796 1797 mutex_enter(&mr->mr_lock); 1798 1799 /* Pull MPT resource pointer from the Tavor Memory Region handle */ 1800 mpt = mr->mr_mptrsrcp; 1801 1802 /* Extract the flags field from the tavor_bind_info_t */ 1803 flags = bind->bi_flags; 1804 1805 /* 1806 * Check the sleep flag. Ensure that it is consistent with the 1807 * current thread context (i.e. if we are currently in the interrupt 1808 * context, then we shouldn't be attempting to sleep). 1809 */ 1810 sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP; 1811 if ((sleep == TAVOR_SLEEP) && 1812 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) { 1813 mutex_exit(&mr->mr_lock); 1814 /* Set "status" and "errormsg" and goto failure */ 1815 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid flags"); 1816 goto mrrereg_fail; 1817 } 1818 1819 /* 1820 * First step is to temporarily invalidate the MPT entry. This 1821 * regains ownership from the hardware, and gives us the opportunity 1822 * to modify the entry. Note: The HW2SW_MPT command returns the 1823 * current MPT entry contents. These are saved away here because 1824 * they will be reused in a later step below. If the region has 1825 * bound memory windows that we fail returning an "in use" error code. 1826 * Otherwise, this is an unexpected error and we deregister the 1827 * memory region and return error. 1828 * 1829 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect 1830 * against holding the lock around this rereg call in all contexts. 1831 */ 1832 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry, 1833 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN); 1834 if (status != TAVOR_CMD_SUCCESS) { 1835 mutex_exit(&mr->mr_lock); 1836 if (status == TAVOR_CMD_REG_BOUND) { 1837 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 1838 return (IBT_MR_IN_USE); 1839 } else { 1840 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: " 1841 "%08x\n", status); 1842 1843 /* 1844 * Call deregister and ensure that all current 1845 * resources get freed up 1846 */ 1847 if (tavor_mr_deregister(state, &mr, 1848 TAVOR_MR_DEREG_ALL, sleep) != DDI_SUCCESS) { 1849 TAVOR_WARNING(state, "failed to deregister " 1850 "memory region"); 1851 } 1852 TNF_PROBE_1(tavor_mr_common_rereg_hw2sw_mpt_cmd_fail, 1853 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 1854 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 1855 return (ibc_get_ci_failure(0)); 1856 } 1857 } 1858 1859 /* 1860 * If we're changing the protection domain, then validate the new one 1861 */ 1862 if (flags & IBT_MR_CHANGE_PD) { 1863 1864 /* Check for valid PD handle pointer */ 1865 if (pd == NULL) { 1866 mutex_exit(&mr->mr_lock); 1867 /* 1868 * Call deregister and ensure that all current 1869 * resources get properly freed up. Unnecessary 1870 * here to attempt to regain software ownership 1871 * of the MPT entry as that has already been 1872 * done above. 1873 */ 1874 if (tavor_mr_deregister(state, &mr, 1875 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != 1876 DDI_SUCCESS) { 1877 TAVOR_WARNING(state, "failed to deregister " 1878 "memory region"); 1879 } 1880 /* Set "status" and "errormsg" and goto failure */ 1881 TAVOR_TNF_FAIL(IBT_PD_HDL_INVALID, "invalid PD handle"); 1882 goto mrrereg_fail; 1883 } 1884 1885 /* Use the new PD handle in all operations below */ 1886 pd_to_use = pd; 1887 1888 } else { 1889 /* Use the current PD handle in all operations below */ 1890 pd_to_use = mr->mr_pdhdl; 1891 } 1892 1893 /* 1894 * If we're changing access permissions, then validate the new ones 1895 */ 1896 if (flags & IBT_MR_CHANGE_ACCESS) { 1897 /* 1898 * Validate the access flags. Both remote write and remote 1899 * atomic require the local write flag to be set 1900 */ 1901 if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) || 1902 (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) && 1903 !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) { 1904 mutex_exit(&mr->mr_lock); 1905 /* 1906 * Call deregister and ensure that all current 1907 * resources get properly freed up. Unnecessary 1908 * here to attempt to regain software ownership 1909 * of the MPT entry as that has already been 1910 * done above. 1911 */ 1912 if (tavor_mr_deregister(state, &mr, 1913 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != 1914 DDI_SUCCESS) { 1915 TAVOR_WARNING(state, "failed to deregister " 1916 "memory region"); 1917 } 1918 /* Set "status" and "errormsg" and goto failure */ 1919 TAVOR_TNF_FAIL(IBT_MR_ACCESS_REQ_INVALID, 1920 "invalid access flags"); 1921 goto mrrereg_fail; 1922 } 1923 1924 /* 1925 * Setup and validate the memory region access flags. This 1926 * means translating the IBTF's enable flags into the access 1927 * flags that will be used in later operations. 1928 */ 1929 acc_flags_to_use = 0; 1930 if (flags & IBT_MR_ENABLE_WINDOW_BIND) 1931 acc_flags_to_use |= IBT_MR_WINDOW_BIND; 1932 if (flags & IBT_MR_ENABLE_LOCAL_WRITE) 1933 acc_flags_to_use |= IBT_MR_LOCAL_WRITE; 1934 if (flags & IBT_MR_ENABLE_REMOTE_READ) 1935 acc_flags_to_use |= IBT_MR_REMOTE_READ; 1936 if (flags & IBT_MR_ENABLE_REMOTE_WRITE) 1937 acc_flags_to_use |= IBT_MR_REMOTE_WRITE; 1938 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC) 1939 acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC; 1940 1941 } else { 1942 acc_flags_to_use = mr->mr_accflag; 1943 } 1944 1945 /* 1946 * If we're modifying the translation, then figure out whether 1947 * we can reuse the current MTT resources. This means calling 1948 * tavor_mr_rereg_xlat_helper() which does most of the heavy lifting 1949 * for the reregistration. If the current memory region contains 1950 * sufficient MTT entries for the new regions, then it will be 1951 * reused and filled in. Otherwise, new entries will be allocated, 1952 * the old ones will be freed, and the new entries will be filled 1953 * in. Note: If we're not modifying the translation, then we 1954 * should already have all the information we need to update the MPT. 1955 * Also note: If tavor_mr_rereg_xlat_helper() fails, it will return 1956 * a "dereg_level" which is the level of cleanup that needs to be 1957 * passed to tavor_mr_deregister() to finish the cleanup. 1958 */ 1959 if (flags & IBT_MR_CHANGE_TRANSLATION) { 1960 status = tavor_mr_rereg_xlat_helper(state, mr, bind, op, 1961 &mtt_addr_to_use, sleep, &dereg_level); 1962 if (status != DDI_SUCCESS) { 1963 mutex_exit(&mr->mr_lock); 1964 /* 1965 * Call deregister and ensure that all resources get 1966 * properly freed up. 1967 */ 1968 if (tavor_mr_deregister(state, &mr, dereg_level, 1969 sleep) != DDI_SUCCESS) { 1970 TAVOR_WARNING(state, "failed to deregister " 1971 "memory region"); 1972 } 1973 1974 /* Set "status" and "errormsg" and goto failure */ 1975 TAVOR_TNF_FAIL(status, "failed rereg helper"); 1976 goto mrrereg_fail; 1977 } 1978 vaddr_to_use = mr->mr_bindinfo.bi_addr; 1979 len_to_use = mr->mr_bindinfo.bi_len; 1980 } else { 1981 mtt_addr_to_use = (((uint64_t)mpt_entry.mttseg_addr_h << 32) | 1982 ((uint64_t)mpt_entry.mttseg_addr_l << 6)); 1983 vaddr_to_use = mr->mr_bindinfo.bi_addr; 1984 len_to_use = mr->mr_bindinfo.bi_len; 1985 } 1986 1987 /* 1988 * Calculate new keys (Lkey, Rkey) from MPT index. Just like they were 1989 * when the region was first registered, each key is formed from 1990 * "constrained" bits and "unconstrained" bits. Note: If no remote 1991 * access is required, then the RKey value is not filled in. Otherwise 1992 * both Rkey and LKey are given the same value. 1993 */ 1994 tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey); 1995 if ((acc_flags_to_use & IBT_MR_REMOTE_READ) || 1996 (acc_flags_to_use & IBT_MR_REMOTE_WRITE) || 1997 (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) { 1998 mr->mr_rkey = mr->mr_lkey; 1999 } 2000 2001 /* 2002 * Update the MPT entry with the new information. Some of this 2003 * information is retained from the previous operation, some of 2004 * it is new based on request. 2005 */ 2006 mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND) ? 1 : 0; 2007 mpt_entry.atomic = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0; 2008 mpt_entry.rw = (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ? 1 : 0; 2009 mpt_entry.rr = (acc_flags_to_use & IBT_MR_REMOTE_READ) ? 1 : 0; 2010 mpt_entry.lw = (acc_flags_to_use & IBT_MR_LOCAL_WRITE) ? 1 : 0; 2011 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC; 2012 mpt_entry.mem_key = mr->mr_lkey; 2013 mpt_entry.pd = pd_to_use->pd_pdnum; 2014 mpt_entry.start_addr = vaddr_to_use; 2015 mpt_entry.reg_win_len = len_to_use; 2016 mpt_entry.mttseg_addr_h = mtt_addr_to_use >> 32; 2017 mpt_entry.mttseg_addr_l = mtt_addr_to_use >> 6; 2018 2019 /* 2020 * Write the updated MPT entry to hardware 2021 * 2022 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect 2023 * against holding the lock around this rereg call in all contexts. 2024 */ 2025 status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry, 2026 sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN); 2027 if (status != TAVOR_CMD_SUCCESS) { 2028 mutex_exit(&mr->mr_lock); 2029 cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n", 2030 status); 2031 /* 2032 * Call deregister and ensure that all current resources get 2033 * properly freed up. Unnecessary here to attempt to regain 2034 * software ownership of the MPT entry as that has already 2035 * been done above. 2036 */ 2037 if (tavor_mr_deregister(state, &mr, 2038 TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) { 2039 TAVOR_WARNING(state, "failed to deregister memory " 2040 "region"); 2041 } 2042 TNF_PROBE_1(tavor_mr_common_rereg_sw2hw_mpt_cmd_fail, 2043 TAVOR_TNF_ERROR, "", tnf_uint, status, status); 2044 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2045 return (ibc_get_ci_failure(0)); 2046 } 2047 2048 /* 2049 * If we're changing PD, then update their reference counts now. 2050 * This means decrementing the reference count on the old PD and 2051 * incrementing the reference count on the new PD. 2052 */ 2053 if (flags & IBT_MR_CHANGE_PD) { 2054 tavor_pd_refcnt_dec(mr->mr_pdhdl); 2055 tavor_pd_refcnt_inc(pd); 2056 } 2057 2058 /* 2059 * Update the contents of the Tavor Memory Region handle to reflect 2060 * what has been changed. 2061 */ 2062 mr->mr_pdhdl = pd_to_use; 2063 mr->mr_accflag = acc_flags_to_use; 2064 mr->mr_is_umem = 0; 2065 mr->mr_umemcookie = NULL; 2066 2067 /* New MR handle is same as the old */ 2068 *mrhdl_new = mr; 2069 mutex_exit(&mr->mr_lock); 2070 2071 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2072 return (DDI_SUCCESS); 2073 2074 mrrereg_fail: 2075 TNF_PROBE_1(tavor_mr_common_rereg_fail, TAVOR_TNF_ERROR, "", 2076 tnf_string, msg, errormsg); 2077 TAVOR_TNF_EXIT(tavor_mr_common_rereg); 2078 return (status); 2079 } 2080 2081 2082 /* 2083 * tavor_mr_rereg_xlat_helper 2084 * Context: Can be called from interrupt or base context. 2085 * Note: This routine expects the "mr_lock" to be held when it 2086 * is called. Upon returning failure, this routine passes information 2087 * about what "dereg_level" should be passed to tavor_mr_deregister(). 2088 */ 2089 static int 2090 tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr, 2091 tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr, 2092 uint_t sleep, uint_t *dereg_level) 2093 { 2094 tavor_rsrc_pool_info_t *rsrc_pool; 2095 tavor_rsrc_t *mtt, *mtt_refcnt; 2096 tavor_sw_refcnt_t *swrc_old, *swrc_new; 2097 ddi_dma_handle_t dmahdl; 2098 uint64_t nummtt_needed, nummtt_in_currrsrc, max_sz; 2099 uint64_t mtt_ddrbaseaddr; 2100 uint_t mtt_pgsize_bits, bind_type, reuse_dmahdl; 2101 int status; 2102 char *errormsg; 2103 2104 TAVOR_TNF_ENTER(tavor_mr_rereg_xlat_helper); 2105 2106 ASSERT(MUTEX_HELD(&mr->mr_lock)); 2107 2108 /* 2109 * Check the "options" flag. Currently this flag tells the driver 2110 * whether or not the region should be bound normally (i.e. with 2111 * entries written into the PCI IOMMU) or whether it should be 2112 * registered to bypass the IOMMU. 2113 */ 2114 if (op == NULL) { 2115 bind_type = TAVOR_BINDMEM_NORMAL; 2116 } else { 2117 bind_type = op->mro_bind_type; 2118 } 2119 2120 /* 2121 * Check for invalid length. Check is the length is zero or if the 2122 * length is larger than the maximum configured value. Return error 2123 * if it is. 2124 */ 2125 max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz); 2126 if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) { 2127 /* 2128 * Deregister will be called upon returning failure from this 2129 * routine. This will ensure that all current resources get 2130 * properly freed up. Unnecessary to attempt to regain 2131 * software ownership of the MPT entry as that has already 2132 * been done above (in tavor_mr_reregister()) 2133 */ 2134 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT; 2135 2136 /* Set "status" and "errormsg" and goto failure */ 2137 TAVOR_TNF_FAIL(IBT_MR_LEN_INVALID, "invalid length"); 2138 goto mrrereghelp_fail; 2139 } 2140 2141 /* 2142 * Determine the number of pages necessary for new region and the 2143 * number of pages supported by the current MTT resources 2144 */ 2145 nummtt_needed = tavor_mr_nummtt_needed(state, bind, &mtt_pgsize_bits); 2146 nummtt_in_currrsrc = mr->mr_mttrsrcp->tr_len >> TAVOR_MTT_SIZE_SHIFT; 2147 2148 /* 2149 * Depending on whether we have enough pages or not, the next step is 2150 * to fill in a set of MTT entries that reflect the new mapping. In 2151 * the first case below, we already have enough entries. This means 2152 * we need to unbind the memory from the previous mapping, bind the 2153 * memory for the new mapping, write the new MTT entries, and update 2154 * the mr to reflect the changes. 2155 * In the second case below, we do not have enough entries in the 2156 * current mapping. So, in this case, we need not only to unbind the 2157 * current mapping, but we need to free up the MTT resources associated 2158 * with that mapping. After we've successfully done that, we continue 2159 * by binding the new memory, allocating new MTT entries, writing the 2160 * new MTT entries, and updating the mr to reflect the changes. 2161 */ 2162 2163 /* 2164 * If this region is being shared (i.e. MTT refcount != 1), then we 2165 * can't reuse the current MTT resources regardless of their size. 2166 * Instead we'll need to alloc new ones (below) just as if there 2167 * hadn't been enough room in the current entries. 2168 */ 2169 swrc_old = (tavor_sw_refcnt_t *)mr->mr_mttrefcntp->tr_addr; 2170 if (TAVOR_MTT_IS_NOT_SHARED(swrc_old) && 2171 (nummtt_needed <= nummtt_in_currrsrc)) { 2172 2173 /* 2174 * Unbind the old mapping for this memory region, but retain 2175 * the ddi_dma_handle_t (if possible) for reuse in the bind 2176 * operation below. Note: If original memory region was 2177 * bound for IOMMU bypass and the new region can not use 2178 * bypass, then a new DMA handle will be necessary. 2179 */ 2180 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2181 mr->mr_bindinfo.bi_free_dmahdl = 0; 2182 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2183 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2184 reuse_dmahdl = 1; 2185 } else { 2186 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2187 dmahdl = NULL; 2188 reuse_dmahdl = 0; 2189 } 2190 2191 /* 2192 * Bind the new memory and determine the mapped addresses. 2193 * As described, this routine and tavor_mr_fast_mtt_write() 2194 * do the majority of the work for the memory registration 2195 * operations. Note: When we successfully finish the binding, 2196 * we will set the "bi_free_dmahdl" flag to indicate that 2197 * even though we may have reused the ddi_dma_handle_t we do 2198 * wish it to be freed up at some later time. Note also that 2199 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2200 */ 2201 bind->bi_bypass = bind_type; 2202 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep); 2203 if (status != DDI_SUCCESS) { 2204 if (reuse_dmahdl) { 2205 ddi_dma_free_handle(&dmahdl); 2206 } 2207 2208 /* 2209 * Deregister will be called upon returning failure 2210 * from this routine. This will ensure that all 2211 * current resources get properly freed up. 2212 * Unnecessary to attempt to regain software ownership 2213 * of the MPT entry as that has already been done 2214 * above (in tavor_mr_reregister()). Also unnecessary 2215 * to attempt to unbind the memory. 2216 */ 2217 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2218 2219 /* Set "status" and "errormsg" and goto failure */ 2220 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind"); 2221 goto mrrereghelp_fail; 2222 } 2223 if (reuse_dmahdl) { 2224 bind->bi_free_dmahdl = 1; 2225 } 2226 2227 /* 2228 * Using the new mapping, but reusing the current MTT 2229 * resources, write the updated entries to MTT 2230 */ 2231 mtt = mr->mr_mttrsrcp; 2232 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits); 2233 if (status != DDI_SUCCESS) { 2234 /* 2235 * Deregister will be called upon returning failure 2236 * from this routine. This will ensure that all 2237 * current resources get properly freed up. 2238 * Unnecessary to attempt to regain software ownership 2239 * of the MPT entry as that has already been done 2240 * above (in tavor_mr_reregister()). Also unnecessary 2241 * to attempt to unbind the memory. 2242 * 2243 * But we do need to unbind the newly bound memory 2244 * before returning. 2245 */ 2246 tavor_mr_mem_unbind(state, bind); 2247 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2248 2249 /* Set "status" and "errormsg" and goto failure */ 2250 TAVOR_TNF_FAIL(ibc_get_ci_failure(0), 2251 "failed write mtt"); 2252 goto mrrereghelp_fail; 2253 } 2254 2255 /* Put the updated information into the Mem Region handle */ 2256 mr->mr_bindinfo = *bind; 2257 mr->mr_logmttpgsz = mtt_pgsize_bits; 2258 2259 } else { 2260 /* 2261 * Check if the memory region MTT is shared by any other MRs. 2262 * Since the resource may be shared between multiple memory 2263 * regions (as a result of a "RegisterSharedMR()" verb) it is 2264 * important that we not unbind any resources prematurely. 2265 */ 2266 if (!TAVOR_MTT_IS_SHARED(swrc_old)) { 2267 /* 2268 * Unbind the old mapping for this memory region, but 2269 * retain the ddi_dma_handle_t for reuse in the bind 2270 * operation below. Note: This can only be done here 2271 * because the region being reregistered is not 2272 * currently shared. Also if original memory region 2273 * was bound for IOMMU bypass and the new region can 2274 * not use bypass, then a new DMA handle will be 2275 * necessary. 2276 */ 2277 if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) { 2278 mr->mr_bindinfo.bi_free_dmahdl = 0; 2279 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2280 dmahdl = mr->mr_bindinfo.bi_dmahdl; 2281 reuse_dmahdl = 1; 2282 } else { 2283 tavor_mr_mem_unbind(state, &mr->mr_bindinfo); 2284 dmahdl = NULL; 2285 reuse_dmahdl = 0; 2286 } 2287 } else { 2288 dmahdl = NULL; 2289 reuse_dmahdl = 0; 2290 } 2291 2292 /* 2293 * Bind the new memory and determine the mapped addresses. 2294 * As described, this routine and tavor_mr_fast_mtt_write() 2295 * do the majority of the work for the memory registration 2296 * operations. Note: When we successfully finish the binding, 2297 * we will set the "bi_free_dmahdl" flag to indicate that 2298 * even though we may have reused the ddi_dma_handle_t we do 2299 * wish it to be freed up at some later time. Note also that 2300 * if we fail, we may need to cleanup the ddi_dma_handle_t. 2301 */ 2302 bind->bi_bypass = bind_type; 2303 status = tavor_mr_mem_bind(state, bind, dmahdl, sleep); 2304 if (status != DDI_SUCCESS) { 2305 if (reuse_dmahdl) { 2306 ddi_dma_free_handle(&dmahdl); 2307 } 2308 2309 /* 2310 * Deregister will be called upon returning failure 2311 * from this routine. This will ensure that all 2312 * current resources get properly freed up. 2313 * Unnecessary to attempt to regain software ownership 2314 * of the MPT entry as that has already been done 2315 * above (in tavor_mr_reregister()). Also unnecessary 2316 * to attempt to unbind the memory. 2317 */ 2318 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2319 2320 /* Set "status" and "errormsg" and goto failure */ 2321 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed mem bind"); 2322 goto mrrereghelp_fail; 2323 } 2324 if (reuse_dmahdl) { 2325 bind->bi_free_dmahdl = 1; 2326 } 2327 2328 /* 2329 * Allocate the new MTT entries resource 2330 */ 2331 status = tavor_rsrc_alloc(state, TAVOR_MTT, 2332 TAVOR_NUMMTT_TO_MTTSEG(nummtt_needed), sleep, &mtt); 2333 if (status != DDI_SUCCESS) { 2334 /* 2335 * Deregister will be called upon returning failure 2336 * from this routine. This will ensure that all 2337 * current resources get properly freed up. 2338 * Unnecessary to attempt to regain software ownership 2339 * of the MPT entry as that has already been done 2340 * above (in tavor_mr_reregister()). Also unnecessary 2341 * to attempt to unbind the memory. 2342 * 2343 * But we do need to unbind the newly bound memory 2344 * before returning. 2345 */ 2346 tavor_mr_mem_unbind(state, bind); 2347 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2348 2349 /* Set "status" and "errormsg" and goto failure */ 2350 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MTT"); 2351 goto mrrereghelp_fail; 2352 } 2353 2354 /* 2355 * Allocate MTT reference count (to track shared memory 2356 * regions). As mentioned elsewhere above, this reference 2357 * count resource may never be used on the given memory region, 2358 * but if it is ever later registered as a "shared" memory 2359 * region then this resource will be necessary. Note: This 2360 * is only necessary here if the existing memory region is 2361 * already being shared (because otherwise we already have 2362 * a useable reference count resource). 2363 */ 2364 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2365 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, 2366 sleep, &mtt_refcnt); 2367 if (status != DDI_SUCCESS) { 2368 /* 2369 * Deregister will be called upon returning 2370 * failure from this routine. This will ensure 2371 * that all current resources get properly 2372 * freed up. Unnecessary to attempt to regain 2373 * software ownership of the MPT entry as that 2374 * has already been done above (in 2375 * tavor_mr_reregister()). Also unnecessary 2376 * to attempt to unbind the memory. 2377 * 2378 * But we need to unbind the newly bound 2379 * memory and free up the newly allocated MTT 2380 * entries before returning. 2381 */ 2382 tavor_mr_mem_unbind(state, bind); 2383 tavor_rsrc_free(state, &mtt); 2384 *dereg_level = 2385 TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2386 2387 /* Set "status"/"errormsg", goto failure */ 2388 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, 2389 "failed reference count"); 2390 goto mrrereghelp_fail; 2391 } 2392 swrc_new = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr; 2393 TAVOR_MTT_REFCNT_INIT(swrc_new); 2394 } else { 2395 mtt_refcnt = mr->mr_mttrefcntp; 2396 } 2397 2398 /* 2399 * Using the new mapping and the new MTT resources, write the 2400 * updated entries to MTT 2401 */ 2402 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits); 2403 if (status != DDI_SUCCESS) { 2404 /* 2405 * Deregister will be called upon returning failure 2406 * from this routine. This will ensure that all 2407 * current resources get properly freed up. 2408 * Unnecessary to attempt to regain software ownership 2409 * of the MPT entry as that has already been done 2410 * above (in tavor_mr_reregister()). Also unnecessary 2411 * to attempt to unbind the memory. 2412 * 2413 * But we need to unbind the newly bound memory, 2414 * free up the newly allocated MTT entries, and 2415 * (possibly) free the new MTT reference count 2416 * resource before returning. 2417 */ 2418 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2419 tavor_rsrc_free(state, &mtt_refcnt); 2420 } 2421 tavor_mr_mem_unbind(state, bind); 2422 tavor_rsrc_free(state, &mtt); 2423 *dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND; 2424 2425 /* Set "status" and "errormsg" and goto failure */ 2426 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed write mtt"); 2427 goto mrrereghelp_fail; 2428 } 2429 2430 /* 2431 * Check if the memory region MTT is shared by any other MRs. 2432 * Since the resource may be shared between multiple memory 2433 * regions (as a result of a "RegisterSharedMR()" verb) it is 2434 * important that we not free up any resources prematurely. 2435 */ 2436 if (TAVOR_MTT_IS_SHARED(swrc_old)) { 2437 /* Decrement MTT reference count for "old" region */ 2438 (void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp); 2439 } else { 2440 /* Free up the old MTT entries resource */ 2441 tavor_rsrc_free(state, &mr->mr_mttrsrcp); 2442 } 2443 2444 /* Put the updated information into the mrhdl */ 2445 mr->mr_bindinfo = *bind; 2446 mr->mr_logmttpgsz = mtt_pgsize_bits; 2447 mr->mr_mttrsrcp = mtt; 2448 mr->mr_mttrefcntp = mtt_refcnt; 2449 } 2450 2451 /* 2452 * Calculate and return the updated MTT address (in the DDR address 2453 * space). This will be used by the caller (tavor_mr_reregister) in 2454 * the updated MPT entry 2455 */ 2456 rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT]; 2457 mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset; 2458 *mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << 2459 TAVOR_MTT_SIZE_SHIFT); 2460 2461 TAVOR_TNF_EXIT(tavor_mr_rereg_xlat_helper); 2462 return (DDI_SUCCESS); 2463 2464 mrrereghelp_fail: 2465 TNF_PROBE_1(tavor_mr_rereg_xlat_helper_fail, TAVOR_TNF_ERROR, "", 2466 tnf_string, msg, errormsg); 2467 TAVOR_TNF_EXIT(tavor_mr_rereg_xlat_helper); 2468 return (status); 2469 } 2470 2471 2472 /* 2473 * tavor_mr_nummtt_needed() 2474 * Context: Can be called from interrupt or base context. 2475 */ 2476 /* ARGSUSED */ 2477 static uint64_t 2478 tavor_mr_nummtt_needed(tavor_state_t *state, tavor_bind_info_t *bind, 2479 uint_t *mtt_pgsize_bits) 2480 { 2481 uint64_t pg_offset_mask; 2482 uint64_t pg_offset, tmp_length; 2483 2484 /* 2485 * For now we specify the page size as 8Kb (the default page size for 2486 * the sun4u architecture), or 4Kb for x86. Figure out optimal page 2487 * size by examining the dmacookies XXX 2488 */ 2489 *mtt_pgsize_bits = PAGESHIFT; 2490 2491 pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1; 2492 pg_offset = bind->bi_addr & pg_offset_mask; 2493 tmp_length = pg_offset + (bind->bi_len - 1); 2494 return ((tmp_length >> *mtt_pgsize_bits) + 1); 2495 } 2496 2497 2498 /* 2499 * tavor_mr_mem_bind() 2500 * Context: Can be called from interrupt or base context. 2501 */ 2502 static int 2503 tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind, 2504 ddi_dma_handle_t dmahdl, uint_t sleep) 2505 { 2506 ddi_dma_attr_t dma_attr; 2507 int (*callback)(caddr_t); 2508 uint_t dma_xfer_mode; 2509 int status; 2510 2511 /* bi_type must be set to a meaningful value to get a bind handle */ 2512 ASSERT(bind->bi_type == TAVOR_BINDHDL_VADDR || 2513 bind->bi_type == TAVOR_BINDHDL_BUF || 2514 bind->bi_type == TAVOR_BINDHDL_UBUF); 2515 2516 TAVOR_TNF_ENTER(tavor_mr_mem_bind); 2517 2518 /* Set the callback flag appropriately */ 2519 callback = (sleep == TAVOR_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT; 2520 2521 /* Determine whether to map STREAMING or CONSISTENT */ 2522 dma_xfer_mode = (bind->bi_flags & IBT_MR_NONCOHERENT) ? 2523 DDI_DMA_STREAMING : DDI_DMA_CONSISTENT; 2524 2525 /* 2526 * Initialize many of the default DMA attributes. Then, if we're 2527 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag. 2528 */ 2529 if (dmahdl == NULL) { 2530 tavor_dma_attr_init(&dma_attr); 2531 #ifdef __sparc 2532 /* 2533 * First, disable streaming and switch to consistent if 2534 * configured to do so and IOMMU BYPASS is enabled. 2535 */ 2536 if (state->ts_cfg_profile->cp_disable_streaming_on_bypass && 2537 dma_xfer_mode == DDI_DMA_STREAMING && 2538 bind->bi_bypass == TAVOR_BINDMEM_BYPASS) { 2539 dma_xfer_mode = DDI_DMA_CONSISTENT; 2540 } 2541 2542 /* 2543 * Then, if streaming is still specified, then "bypass" is not 2544 * allowed. 2545 */ 2546 if ((dma_xfer_mode == DDI_DMA_CONSISTENT) && 2547 (bind->bi_bypass == TAVOR_BINDMEM_BYPASS)) { 2548 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL; 2549 } 2550 #endif 2551 /* Allocate a DMA handle for the binding */ 2552 status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr, 2553 callback, NULL, &bind->bi_dmahdl); 2554 if (status != DDI_SUCCESS) { 2555 TNF_PROBE_0(tavor_mr_mem_bind_dmahdl_fail, 2556 TAVOR_TNF_ERROR, ""); 2557 TAVOR_TNF_EXIT(tavor_mr_mem_bind); 2558 return (status); 2559 } 2560 bind->bi_free_dmahdl = 1; 2561 2562 } else { 2563 bind->bi_dmahdl = dmahdl; 2564 bind->bi_free_dmahdl = 0; 2565 } 2566 2567 /* 2568 * Bind the memory to get the PCI mapped addresses. The decision 2569 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle() 2570 * is determined by the "bi_type" flag. Note: if the bind operation 2571 * fails then we have to free up the DMA handle and return error. 2572 */ 2573 if (bind->bi_type == TAVOR_BINDHDL_VADDR) { 2574 status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL, 2575 (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len, 2576 (DDI_DMA_RDWR | dma_xfer_mode), callback, NULL, 2577 &bind->bi_dmacookie, &bind->bi_cookiecnt); 2578 } else { /* TAVOR_BINDHDL_BUF || TAVOR_BINDHDL_UBUF */ 2579 status = ddi_dma_buf_bind_handle(bind->bi_dmahdl, 2580 bind->bi_buf, (DDI_DMA_RDWR | dma_xfer_mode), callback, 2581 NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt); 2582 } 2583 2584 if (status != DDI_DMA_MAPPED) { 2585 if (bind->bi_free_dmahdl != 0) { 2586 ddi_dma_free_handle(&bind->bi_dmahdl); 2587 } 2588 TNF_PROBE_0(tavor_mr_mem_bind_dmabind_fail, TAVOR_TNF_ERROR, 2589 ""); 2590 TAVOR_TNF_EXIT(tavor_mr_mem_bind); 2591 return (status); 2592 } 2593 2594 TAVOR_TNF_EXIT(tavor_mr_mem_bind); 2595 return (DDI_SUCCESS); 2596 } 2597 2598 2599 /* 2600 * tavor_mr_mem_unbind() 2601 * Context: Can be called from interrupt or base context. 2602 */ 2603 static void 2604 tavor_mr_mem_unbind(tavor_state_t *state, tavor_bind_info_t *bind) 2605 { 2606 int status; 2607 2608 TAVOR_TNF_ENTER(tavor_mr_mem_unbind); 2609 2610 /* 2611 * In case of TAVOR_BINDHDL_UBUF, the memory bi_buf points to 2612 * is actually allocated by ddi_umem_iosetup() internally, then 2613 * it's required to free it here. Reset bi_type to TAVOR_BINDHDL_NONE 2614 * not to free it again later. 2615 */ 2616 if (bind->bi_type == TAVOR_BINDHDL_UBUF) { 2617 freerbuf(bind->bi_buf); 2618 bind->bi_type = TAVOR_BINDHDL_NONE; 2619 } 2620 2621 /* 2622 * Unbind the DMA memory for the region 2623 * 2624 * Note: The only way ddi_dma_unbind_handle() currently 2625 * can return an error is if the handle passed in is invalid. 2626 * Since this should never happen, we choose to return void 2627 * from this function! If this does return an error, however, 2628 * then we print a warning message to the console. 2629 */ 2630 status = ddi_dma_unbind_handle(bind->bi_dmahdl); 2631 if (status != DDI_SUCCESS) { 2632 TAVOR_WARNING(state, "failed to unbind DMA mapping"); 2633 TNF_PROBE_0(tavor_mr_mem_unbind_dmaunbind_fail, 2634 TAVOR_TNF_ERROR, ""); 2635 TAVOR_TNF_EXIT(tavor_mr_mem_unbind); 2636 return; 2637 } 2638 2639 /* Free up the DMA handle */ 2640 if (bind->bi_free_dmahdl != 0) { 2641 ddi_dma_free_handle(&bind->bi_dmahdl); 2642 } 2643 2644 TAVOR_TNF_EXIT(tavor_mr_mem_unbind); 2645 } 2646 2647 2648 /* 2649 * tavor_mr_fast_mtt_write() 2650 * Context: Can be called from interrupt or base context. 2651 */ 2652 static int 2653 tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind, 2654 uint32_t mtt_pgsize_bits) 2655 { 2656 ddi_dma_cookie_t dmacookie; 2657 uint_t cookie_cnt; 2658 uint64_t *mtt_table; 2659 uint64_t mtt_entry; 2660 uint64_t addr, endaddr; 2661 uint64_t pagesize; 2662 int i; 2663 2664 TAVOR_TNF_ENTER(tavor_mr_fast_mtt_write); 2665 2666 /* Calculate page size from the suggested value passed in */ 2667 pagesize = ((uint64_t)1 << mtt_pgsize_bits); 2668 2669 /* 2670 * Walk the "cookie list" and fill in the MTT table entries 2671 */ 2672 i = 0; 2673 mtt_table = (uint64_t *)mtt->tr_addr; 2674 dmacookie = bind->bi_dmacookie; 2675 cookie_cnt = bind->bi_cookiecnt; 2676 while (cookie_cnt-- > 0) { 2677 addr = dmacookie.dmac_laddress; 2678 endaddr = addr + (dmacookie.dmac_size - 1); 2679 addr = addr & ~((uint64_t)pagesize - 1); 2680 while (addr <= endaddr) { 2681 /* 2682 * Fill in the mapped addresses (calculated above) and 2683 * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry. 2684 */ 2685 mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET; 2686 ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry); 2687 addr += pagesize; 2688 i++; 2689 2690 if (addr == 0) { 2691 static int do_once = 1; 2692 if (do_once) { 2693 do_once = 0; 2694 cmn_err(CE_NOTE, "probable error in " 2695 "dma_cookie address from caller\n"); 2696 } 2697 break; 2698 } 2699 } 2700 2701 /* 2702 * When we've reached the end of the current DMA cookie, 2703 * jump to the next cookie (if there are more) 2704 */ 2705 if (cookie_cnt != 0) { 2706 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie); 2707 } 2708 } 2709 2710 TAVOR_TNF_EXIT(tavor_mr_fast_mtt_write); 2711 return (DDI_SUCCESS); 2712 } 2713 2714 /* 2715 * tavor_mtt_refcnt_inc() 2716 * Context: Can be called from interrupt or base context. 2717 */ 2718 static int 2719 tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc) 2720 { 2721 tavor_sw_refcnt_t *rc; 2722 uint32_t cnt; 2723 2724 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr; 2725 2726 /* Increment the MTT's reference count */ 2727 mutex_enter(&rc->swrc_lock); 2728 TNF_PROBE_1_DEBUG(tavor_mtt_refcnt_inc, TAVOR_TNF_TRACE, "", 2729 tnf_uint, refcnt, rc->swrc_refcnt); 2730 cnt = rc->swrc_refcnt++; 2731 mutex_exit(&rc->swrc_lock); 2732 2733 return (cnt); 2734 } 2735 2736 2737 /* 2738 * tavor_mtt_refcnt_dec() 2739 * Context: Can be called from interrupt or base context. 2740 */ 2741 static int 2742 tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc) 2743 { 2744 tavor_sw_refcnt_t *rc; 2745 uint32_t cnt; 2746 2747 rc = (tavor_sw_refcnt_t *)rsrc->tr_addr; 2748 2749 /* Decrement the MTT's reference count */ 2750 mutex_enter(&rc->swrc_lock); 2751 cnt = --rc->swrc_refcnt; 2752 TNF_PROBE_1_DEBUG(tavor_mtt_refcnt_dec, TAVOR_TNF_TRACE, "", 2753 tnf_uint, refcnt, rc->swrc_refcnt); 2754 mutex_exit(&rc->swrc_lock); 2755 2756 return (cnt); 2757 }