222 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
223 if (status != DDI_SUCCESS) {
224 /* Set "status" and "errormsg" and goto failure */
225 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
226 goto mrshared_fail1;
227 }
228
229 /*
230 * Allocate the software structure for tracking the shared memory
231 * region (i.e. the Tavor Memory Region handle). If we fail here, we
232 * must undo the protection domain reference count and the previous
233 * resource allocation.
234 */
235 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
236 if (status != DDI_SUCCESS) {
237 /* Set "status" and "errormsg" and goto failure */
238 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
239 goto mrshared_fail2;
240 }
241 mr = (tavor_mrhdl_t)rsrc->tr_addr;
242 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
243
244 /*
245 * Setup and validate the memory region access flags. This means
246 * translating the IBTF's enable flags into the access flags that
247 * will be used in later operations.
248 */
249 mr->mr_accflag = 0;
250 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
251 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
252 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
253 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
254 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
255 mr->mr_accflag |= IBT_MR_REMOTE_READ;
256 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
257 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
258 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
259 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
260
261 /*
262 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
324 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
325 goto mrshared_fail4;
326 }
327 }
328
329 /*
330 * Copy the MTT resource pointer (and additional parameters) from
331 * the original Tavor Memory Region handle. Note: this is normally
332 * where the tavor_mr_mem_bind() routine would be called, but because
333 * we already have bound and filled-in MTT entries it is simply a
334 * matter here of managing the MTT reference count and grabbing the
335 * address of the MTT table entries (for filling in the shared region's
336 * MPT entry).
337 */
338 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp;
339 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
340 mr->mr_bindinfo = mrhdl->mr_bindinfo;
341 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
342 mutex_exit(&mrhdl->mr_lock);
343 bind = &mr->mr_bindinfo;
344 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
345 mtt = mr->mr_mttrsrcp;
346
347 /*
348 * Increment the MTT reference count (to reflect the fact that
349 * the MTT is now shared)
350 */
351 (void) tavor_mtt_refcnt_inc(mr->mr_mttrefcntp);
352
353 /*
354 * Update the new "bind" virtual address. Do some extra work here
355 * to ensure proper alignment. That is, make sure that the page
356 * offset for the beginning of the old range is the same as the
357 * offset for this new mapping
358 */
359 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
360 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
361 (mr->mr_bindinfo.bi_addr & pgsize_msk));
362
363 /*
364 * Get the base address for the MTT table. This will be necessary
650
651 /* Set the mrhdl pointer to NULL and return success */
652 *mrhdl = NULL;
653
654 TAVOR_TNF_EXIT(tavor_mr_deregister);
655 return (DDI_SUCCESS);
656 }
657
658
659 /*
660 * tavor_mr_query()
661 * Context: Can be called from interrupt or base context.
662 */
663 /* ARGSUSED */
664 int
665 tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mr,
666 ibt_mr_query_attr_t *attr)
667 {
668 TAVOR_TNF_ENTER(tavor_mr_query);
669
670 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr))
671
672 mutex_enter(&mr->mr_lock);
673
674 /*
675 * Check here to see if the memory region has already been partially
676 * deregistered as a result of a tavor_umap_umemlock_cb() callback.
677 * If so, this is an error, return failure.
678 */
679 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
680 mutex_exit(&mr->mr_lock);
681 TNF_PROBE_0(tavor_mr_query_inv_mrhdl_fail, TAVOR_TNF_ERROR, "");
682 TAVOR_TNF_EXIT(tavor_mr_query);
683 return (IBT_MR_HDL_INVALID);
684 }
685
686 /* Fill in the queried attributes */
687 attr->mr_attr_flags = mr->mr_accflag;
688 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl;
689
690 /* Fill in the "local" attributes */
691 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;
944 /* Set "status" and "errormsg" and goto failure */
945 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
946 goto mwalloc_fail1;
947 }
948
949 /*
950 * Allocate the software structure for tracking the memory window (i.e.
951 * the Tavor Memory Window handle). Note: This is actually the same
952 * software structure used for tracking memory regions, but since many
953 * of the same properties are needed, only a single structure is
954 * necessary. If we fail here, we must undo the protection domain
955 * reference count and the previous resource allocation.
956 */
957 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
958 if (status != DDI_SUCCESS) {
959 /* Set "status" and "errormsg" and goto failure */
960 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
961 goto mwalloc_fail2;
962 }
963 mw = (tavor_mwhdl_t)rsrc->tr_addr;
964 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
965
966 /*
967 * Calculate an "unbound" RKey from MPT index. In much the same way
968 * as we do for memory regions (above), this key is constructed from
969 * a "constrained" (which depends on the MPT index) and an
970 * "unconstrained" portion (which may be arbitrarily chosen).
971 */
972 tavor_mr_keycalc(state, mpt->tr_indx, &mw->mr_rkey);
973
974 /*
975 * Fill in the MPT entry. This is the final step before passing
976 * ownership of the MPT entry to the Tavor hardware. We use all of
977 * the information collected/calculated above to fill in the
978 * requisite portions of the MPT. Note: fewer entries in the MPT
979 * entry are necessary to allocate a memory window.
980 */
981 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
982 mpt_entry.reg_win = TAVOR_MPT_IS_WINDOW;
983 mpt_entry.mem_key = mw->mr_rkey;
984 mpt_entry.pd = pd->pd_pdnum;
1053 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1054 /* Set "status" and "errormsg" and goto failure */
1055 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags");
1056 TNF_PROBE_1(tavor_mw_free_fail, TAVOR_TNF_ERROR, "",
1057 tnf_string, msg, errormsg);
1058 TAVOR_TNF_EXIT(tavor_mw_free);
1059 return (status);
1060 }
1061
1062 /*
1063 * Pull all the necessary information from the Tavor Memory Window
1064 * handle. This is necessary here because the resource for the
1065 * MW handle is going to be freed up as part of the this operation.
1066 */
1067 mw = *mwhdl;
1068 mutex_enter(&mw->mr_lock);
1069 mpt = mw->mr_mptrsrcp;
1070 rsrc = mw->mr_rsrcp;
1071 pd = mw->mr_pdhdl;
1072 mutex_exit(&mw->mr_lock);
1073 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1074
1075 /*
1076 * Reclaim the MPT entry from hardware. Note: in general, it is
1077 * unexpected for this operation to return an error.
1078 */
1079 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
1080 0, mpt->tr_indx, sleep);
1081 if (status != TAVOR_CMD_SUCCESS) {
1082 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: %08x\n",
1083 status);
1084 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, TAVOR_TNF_ERROR, "",
1085 tnf_uint, status, status);
1086 TAVOR_TNF_EXIT(tavor_mw_free);
1087 return (IBT_INVALID_PARAM);
1088 }
1089
1090 /* Free the Tavor Memory Window handle */
1091 tavor_rsrc_free(state, &rsrc);
1092
1093 /* Free up the MPT entry resource */
1108 * tavor_mr_keycalc()
1109 * Context: Can be called from interrupt or base context.
1110 */
1111 void
1112 tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key)
1113 {
1114 uint32_t tmp, log_num_mpt;
1115
1116 /*
1117 * Generate a simple key from counter. Note: We increment this
1118 * static variable _intentionally_ without any kind of mutex around
1119 * it. First, single-threading all operations through a single lock
1120 * would be a bad idea (from a performance point-of-view). Second,
1121 * the upper "unconstrained" bits don't really have to be unique
1122 * because the lower bits are guaranteed to be (although we do make a
1123 * best effort to ensure that they are). Third, the window for the
1124 * race (where both threads read and update the counter at the same
1125 * time) is incredibly small.
1126 * And, lastly, we'd like to make this into a "random" key XXX
1127 */
1128 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(tavor_debug_memkey_cnt))
1129 log_num_mpt = state->ts_cfg_profile->cp_log_num_mpt;
1130 tmp = (tavor_debug_memkey_cnt++) << log_num_mpt;
1131 *key = tmp | indx;
1132 }
1133
1134
1135 /*
1136 * tavor_mr_common_reg()
1137 * Context: Can be called from interrupt or base context.
1138 */
1139 static int
1140 tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd,
1141 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op)
1142 {
1143 tavor_rsrc_pool_info_t *rsrc_pool;
1144 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt;
1145 tavor_umap_db_entry_t *umapdb;
1146 tavor_sw_refcnt_t *swrc_tmp;
1147 tavor_hw_mpt_t mpt_entry;
1148 tavor_mrhdl_t mr;
1225 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
1226 if (status != DDI_SUCCESS) {
1227 /* Set "status" and "errormsg" and goto failure */
1228 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
1229 goto mrcommon_fail1;
1230 }
1231
1232 /*
1233 * Allocate the software structure for tracking the memory region (i.e.
1234 * the Tavor Memory Region handle). If we fail here, we must undo
1235 * the protection domain reference count and the previous resource
1236 * allocation.
1237 */
1238 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
1239 if (status != DDI_SUCCESS) {
1240 /* Set "status" and "errormsg" and goto failure */
1241 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
1242 goto mrcommon_fail2;
1243 }
1244 mr = (tavor_mrhdl_t)rsrc->tr_addr;
1245 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1246
1247 /*
1248 * Setup and validate the memory region access flags. This means
1249 * translating the IBTF's enable flags into the access flags that
1250 * will be used in later operations.
1251 */
1252 mr->mr_accflag = 0;
1253 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1254 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1255 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1256 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1257 if (flags & IBT_MR_ENABLE_REMOTE_READ)
1258 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1259 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1260 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1261 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1262 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1263
1264 /*
1265 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
1284 * "userland resources database". This will later be added to
1285 * the database (after all further memory registration operations are
1286 * successful). If we fail here, we must undo the reference counts
1287 * and the previous resource allocations.
1288 */
1289 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
1290 if (mr_is_umem) {
1291 umem_len = ptob(btopr(bind->bi_len +
1292 ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
1293 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
1294 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
1295 DDI_UMEMLOCK_LONGTERM);
1296 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
1297 &umem_cookie, &tavor_umem_cbops, NULL);
1298 if (status != 0) {
1299 /* Set "status" and "errormsg" and goto failure */
1300 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin");
1301 goto mrcommon_fail3;
1302 }
1303
1304 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1305 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1306
1307 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
1308 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
1309 if (bind->bi_buf == NULL) {
1310 /* Set "status" and "errormsg" and goto failure */
1311 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed iosetup");
1312 goto mrcommon_fail3;
1313 }
1314 bind->bi_type = TAVOR_BINDHDL_UBUF;
1315 bind->bi_buf->b_flags |= B_READ;
1316
1317 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1318 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1319
1320 umapdb = tavor_umap_db_alloc(state->ts_instance,
1321 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
1322 (uint64_t)(uintptr_t)rsrc);
1323 if (umapdb == NULL) {
1324 /* Set "status" and "errormsg" and goto failure */
1325 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
1326 goto mrcommon_fail4;
1327 }
1328 }
1329
1330 /*
1331 * Setup the bindinfo for the mtt bind call
1332 */
1333 bh = &mr->mr_bindinfo;
1334 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh))
1335 bcopy(bind, bh, sizeof (tavor_bind_info_t));
1336 bh->bi_bypass = bind_type;
1337 status = tavor_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
1338 &mtt_pgsize_bits);
1339 if (status != DDI_SUCCESS) {
1340 /* Set "status" and "errormsg" and goto failure */
1341 TAVOR_TNF_FAIL(status, "failed mtt bind");
1342 /*
1343 * When mtt_bind fails, freerbuf has already been done,
1344 * so make sure not to call it again.
1345 */
1346 bind->bi_type = bh->bi_type;
1347 goto mrcommon_fail5;
1348 }
1349 mr->mr_logmttpgsz = mtt_pgsize_bits;
1350
1351 /*
1352 * Allocate MTT reference count (to track shared memory regions).
1353 * This reference count resource may never be used on the given
1354 * memory region, but if it is ever later registered as "shared"
1355 * memory region then this resource will be necessary. If we fail
1356 * here, we do pretty much the same as above to clean up.
1357 */
1358 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, sleep,
1359 &mtt_refcnt);
1360 if (status != DDI_SUCCESS) {
1361 /* Set "status" and "errormsg" and goto failure */
1362 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed refence count");
1363 goto mrcommon_fail6;
1364 }
1365 mr->mr_mttrefcntp = mtt_refcnt;
1366 swrc_tmp = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr;
1367 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
1368 TAVOR_MTT_REFCNT_INIT(swrc_tmp);
1369
1370 /*
1371 * Fill in the MPT entry. This is the final step before passing
1372 * ownership of the MPT entry to the Tavor hardware. We use all of
1373 * the information collected/calculated above to fill in the
1374 * requisite portions of the MPT.
1375 */
1376 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
1377 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE;
1378 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0;
1379 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1380 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
1381 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
1382 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
1383 mpt_entry.lr = 1;
1384 mpt_entry.reg_win = TAVOR_MPT_IS_REGION;
1385 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC;
1386 mpt_entry.mem_key = mr->mr_lkey;
1387 mpt_entry.pd = pd->pd_pdnum;
1450 * The following is cleanup for all possible failure cases in this routine
1451 */
1452 mrcommon_fail7:
1453 tavor_rsrc_free(state, &mtt_refcnt);
1454 mrcommon_fail6:
1455 tavor_rsrc_free(state, &mtt);
1456 tavor_mr_mem_unbind(state, bh);
1457 bind->bi_type = bh->bi_type;
1458 mrcommon_fail5:
1459 if (mr_is_umem) {
1460 tavor_umap_db_free(umapdb);
1461 }
1462 mrcommon_fail4:
1463 if (mr_is_umem) {
1464 /*
1465 * Free up the memory ddi_umem_iosetup() allocates
1466 * internally.
1467 */
1468 if (bind->bi_type == TAVOR_BINDHDL_UBUF) {
1469 freerbuf(bind->bi_buf);
1470 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1471 bind->bi_type = TAVOR_BINDHDL_NONE;
1472 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1473 }
1474 ddi_umem_unlock(umem_cookie);
1475 }
1476 mrcommon_fail3:
1477 tavor_rsrc_free(state, &rsrc);
1478 mrcommon_fail2:
1479 tavor_rsrc_free(state, &mpt);
1480 mrcommon_fail1:
1481 tavor_pd_refcnt_dec(pd);
1482 mrcommon_fail:
1483 TNF_PROBE_1(tavor_mr_common_reg_fail, TAVOR_TNF_ERROR, "",
1484 tnf_string, msg, errormsg);
1485 TAVOR_TNF_EXIT(tavor_mr_common_reg);
1486 return (status);
1487 }
1488
1489 int
1490 tavor_dma_mr_register(tavor_state_t *state, tavor_pdhdl_t pd,
1491 ibt_dmr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl)
1492 {
1523 * reference count.
1524 */
1525 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
1526 if (status != DDI_SUCCESS) {
1527 status = IBT_INSUFF_RESOURCE;
1528 goto mrcommon_fail1;
1529 }
1530
1531 /*
1532 * Allocate the software structure for tracking the memory region (i.e.
1533 * the Tavor Memory Region handle). If we fail here, we must undo
1534 * the protection domain reference count and the previous resource
1535 * allocation.
1536 */
1537 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
1538 if (status != DDI_SUCCESS) {
1539 status = IBT_INSUFF_RESOURCE;
1540 goto mrcommon_fail2;
1541 }
1542 mr = (tavor_mrhdl_t)rsrc->tr_addr;
1543 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1544 bzero(mr, sizeof (*mr));
1545
1546 /*
1547 * Setup and validate the memory region access flags. This means
1548 * translating the IBTF's enable flags into the access flags that
1549 * will be used in later operations.
1550 */
1551 mr->mr_accflag = 0;
1552 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1553 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1554 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1555 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1556 if (flags & IBT_MR_ENABLE_REMOTE_READ)
1557 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1558 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1559 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1560 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1561 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1562
1563 /*
1785 * tavor_mr_common_rereg()
1786 * Context: Can be called from interrupt or base context.
1787 */
1788 static int
1789 tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr,
1790 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new,
1791 tavor_mr_options_t *op)
1792 {
1793 tavor_rsrc_t *mpt;
1794 ibt_mr_attr_flags_t acc_flags_to_use;
1795 ibt_mr_flags_t flags;
1796 tavor_pdhdl_t pd_to_use;
1797 tavor_hw_mpt_t mpt_entry;
1798 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use;
1799 uint_t sleep, dereg_level;
1800 int status;
1801 char *errormsg;
1802
1803 TAVOR_TNF_ENTER(tavor_mr_common_rereg);
1804
1805 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1806
1807 /*
1808 * Check here to see if the memory region corresponds to a userland
1809 * mapping. Reregistration of userland memory regions is not
1810 * currently supported. Return failure. XXX
1811 */
1812 if (mr->mr_is_umem) {
1813 /* Set "status" and "errormsg" and goto failure */
1814 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl");
1815 goto mrrereg_fail;
1816 }
1817
1818 mutex_enter(&mr->mr_lock);
1819
1820 /* Pull MPT resource pointer from the Tavor Memory Region handle */
1821 mpt = mr->mr_mptrsrcp;
1822
1823 /* Extract the flags field from the tavor_bind_info_t */
1824 flags = bind->bi_flags;
1825
1826 /*
2394 * software ownership of the MPT entry as that
2395 * has already been done above (in
2396 * tavor_mr_reregister()). Also unnecessary
2397 * to attempt to unbind the memory.
2398 *
2399 * But we need to unbind the newly bound
2400 * memory and free up the newly allocated MTT
2401 * entries before returning.
2402 */
2403 tavor_mr_mem_unbind(state, bind);
2404 tavor_rsrc_free(state, &mtt);
2405 *dereg_level =
2406 TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2407
2408 /* Set "status"/"errormsg", goto failure */
2409 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE,
2410 "failed reference count");
2411 goto mrrereghelp_fail;
2412 }
2413 swrc_new = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr;
2414 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new))
2415 TAVOR_MTT_REFCNT_INIT(swrc_new);
2416 } else {
2417 mtt_refcnt = mr->mr_mttrefcntp;
2418 }
2419
2420 /*
2421 * Using the new mapping and the new MTT resources, write the
2422 * updated entries to MTT
2423 */
2424 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits);
2425 if (status != DDI_SUCCESS) {
2426 /*
2427 * Deregister will be called upon returning failure
2428 * from this routine. This will ensure that all
2429 * current resources get properly freed up.
2430 * Unnecessary to attempt to regain software ownership
2431 * of the MPT entry as that has already been done
2432 * above (in tavor_mr_reregister()). Also unnecessary
2433 * to attempt to unbind the memory.
2434 *
2520 /*
2521 * tavor_mr_mem_bind()
2522 * Context: Can be called from interrupt or base context.
2523 */
2524 static int
2525 tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind,
2526 ddi_dma_handle_t dmahdl, uint_t sleep)
2527 {
2528 ddi_dma_attr_t dma_attr;
2529 int (*callback)(caddr_t);
2530 uint_t dma_xfer_mode;
2531 int status;
2532
2533 /* bi_type must be set to a meaningful value to get a bind handle */
2534 ASSERT(bind->bi_type == TAVOR_BINDHDL_VADDR ||
2535 bind->bi_type == TAVOR_BINDHDL_BUF ||
2536 bind->bi_type == TAVOR_BINDHDL_UBUF);
2537
2538 TAVOR_TNF_ENTER(tavor_mr_mem_bind);
2539
2540 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2541
2542 /* Set the callback flag appropriately */
2543 callback = (sleep == TAVOR_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
2544
2545 /* Determine whether to map STREAMING or CONSISTENT */
2546 dma_xfer_mode = (bind->bi_flags & IBT_MR_NONCOHERENT) ?
2547 DDI_DMA_STREAMING : DDI_DMA_CONSISTENT;
2548
2549 /*
2550 * Initialize many of the default DMA attributes. Then, if we're
2551 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
2552 */
2553 if (dmahdl == NULL) {
2554 tavor_dma_attr_init(&dma_attr);
2555 #ifdef __sparc
2556 /*
2557 * First, disable streaming and switch to consistent if
2558 * configured to do so and IOMMU BYPASS is enabled.
2559 */
2560 if (state->ts_cfg_profile->cp_disable_streaming_on_bypass &&
2561 dma_xfer_mode == DDI_DMA_STREAMING &&
2620 }
2621
2622
2623 /*
2624 * tavor_mr_mem_unbind()
2625 * Context: Can be called from interrupt or base context.
2626 */
2627 static void
2628 tavor_mr_mem_unbind(tavor_state_t *state, tavor_bind_info_t *bind)
2629 {
2630 int status;
2631
2632 TAVOR_TNF_ENTER(tavor_mr_mem_unbind);
2633
2634 /*
2635 * In case of TAVOR_BINDHDL_UBUF, the memory bi_buf points to
2636 * is actually allocated by ddi_umem_iosetup() internally, then
2637 * it's required to free it here. Reset bi_type to TAVOR_BINDHDL_NONE
2638 * not to free it again later.
2639 */
2640 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2641 if (bind->bi_type == TAVOR_BINDHDL_UBUF) {
2642 freerbuf(bind->bi_buf);
2643 bind->bi_type = TAVOR_BINDHDL_NONE;
2644 }
2645 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
2646
2647 /*
2648 * Unbind the DMA memory for the region
2649 *
2650 * Note: The only way ddi_dma_unbind_handle() currently
2651 * can return an error is if the handle passed in is invalid.
2652 * Since this should never happen, we choose to return void
2653 * from this function! If this does return an error, however,
2654 * then we print a warning message to the console.
2655 */
2656 status = ddi_dma_unbind_handle(bind->bi_dmahdl);
2657 if (status != DDI_SUCCESS) {
2658 TAVOR_WARNING(state, "failed to unbind DMA mapping");
2659 TNF_PROBE_0(tavor_mr_mem_unbind_dmaunbind_fail,
2660 TAVOR_TNF_ERROR, "");
2661 TAVOR_TNF_EXIT(tavor_mr_mem_unbind);
2662 return;
2663 }
2664
2665 /* Free up the DMA handle */
2698 i = 0;
2699 mtt_table = (uint64_t *)mtt->tr_addr;
2700 dmacookie = bind->bi_dmacookie;
2701 cookie_cnt = bind->bi_cookiecnt;
2702 while (cookie_cnt-- > 0) {
2703 addr = dmacookie.dmac_laddress;
2704 endaddr = addr + (dmacookie.dmac_size - 1);
2705 addr = addr & ~((uint64_t)pagesize - 1);
2706 while (addr <= endaddr) {
2707 /*
2708 * Fill in the mapped addresses (calculated above) and
2709 * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry.
2710 */
2711 mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET;
2712 ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry);
2713 addr += pagesize;
2714 i++;
2715
2716 if (addr == 0) {
2717 static int do_once = 1;
2718 _NOTE(SCHEME_PROTECTS_DATA("safe sharing",
2719 do_once))
2720 if (do_once) {
2721 do_once = 0;
2722 cmn_err(CE_NOTE, "probable error in "
2723 "dma_cookie address from caller\n");
2724 }
2725 break;
2726 }
2727 }
2728
2729 /*
2730 * When we've reached the end of the current DMA cookie,
2731 * jump to the next cookie (if there are more)
2732 */
2733 if (cookie_cnt != 0) {
2734 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
2735 }
2736 }
2737
2738 TAVOR_TNF_EXIT(tavor_mr_fast_mtt_write);
2739 return (DDI_SUCCESS);
|
222 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
223 if (status != DDI_SUCCESS) {
224 /* Set "status" and "errormsg" and goto failure */
225 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
226 goto mrshared_fail1;
227 }
228
229 /*
230 * Allocate the software structure for tracking the shared memory
231 * region (i.e. the Tavor Memory Region handle). If we fail here, we
232 * must undo the protection domain reference count and the previous
233 * resource allocation.
234 */
235 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
236 if (status != DDI_SUCCESS) {
237 /* Set "status" and "errormsg" and goto failure */
238 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
239 goto mrshared_fail2;
240 }
241 mr = (tavor_mrhdl_t)rsrc->tr_addr;
242
243 /*
244 * Setup and validate the memory region access flags. This means
245 * translating the IBTF's enable flags into the access flags that
246 * will be used in later operations.
247 */
248 mr->mr_accflag = 0;
249 if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
250 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
251 if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
252 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
253 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
254 mr->mr_accflag |= IBT_MR_REMOTE_READ;
255 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
256 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
257 if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
258 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
259
260 /*
261 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
323 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
324 goto mrshared_fail4;
325 }
326 }
327
328 /*
329 * Copy the MTT resource pointer (and additional parameters) from
330 * the original Tavor Memory Region handle. Note: this is normally
331 * where the tavor_mr_mem_bind() routine would be called, but because
332 * we already have bound and filled-in MTT entries it is simply a
333 * matter here of managing the MTT reference count and grabbing the
334 * address of the MTT table entries (for filling in the shared region's
335 * MPT entry).
336 */
337 mr->mr_mttrsrcp = mrhdl->mr_mttrsrcp;
338 mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
339 mr->mr_bindinfo = mrhdl->mr_bindinfo;
340 mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
341 mutex_exit(&mrhdl->mr_lock);
342 bind = &mr->mr_bindinfo;
343 mtt = mr->mr_mttrsrcp;
344
345 /*
346 * Increment the MTT reference count (to reflect the fact that
347 * the MTT is now shared)
348 */
349 (void) tavor_mtt_refcnt_inc(mr->mr_mttrefcntp);
350
351 /*
352 * Update the new "bind" virtual address. Do some extra work here
353 * to ensure proper alignment. That is, make sure that the page
354 * offset for the beginning of the old range is the same as the
355 * offset for this new mapping
356 */
357 pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
358 bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
359 (mr->mr_bindinfo.bi_addr & pgsize_msk));
360
361 /*
362 * Get the base address for the MTT table. This will be necessary
648
649 /* Set the mrhdl pointer to NULL and return success */
650 *mrhdl = NULL;
651
652 TAVOR_TNF_EXIT(tavor_mr_deregister);
653 return (DDI_SUCCESS);
654 }
655
656
657 /*
658 * tavor_mr_query()
659 * Context: Can be called from interrupt or base context.
660 */
661 /* ARGSUSED */
662 int
663 tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mr,
664 ibt_mr_query_attr_t *attr)
665 {
666 TAVOR_TNF_ENTER(tavor_mr_query);
667
668 mutex_enter(&mr->mr_lock);
669
670 /*
671 * Check here to see if the memory region has already been partially
672 * deregistered as a result of a tavor_umap_umemlock_cb() callback.
673 * If so, this is an error, return failure.
674 */
675 if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
676 mutex_exit(&mr->mr_lock);
677 TNF_PROBE_0(tavor_mr_query_inv_mrhdl_fail, TAVOR_TNF_ERROR, "");
678 TAVOR_TNF_EXIT(tavor_mr_query);
679 return (IBT_MR_HDL_INVALID);
680 }
681
682 /* Fill in the queried attributes */
683 attr->mr_attr_flags = mr->mr_accflag;
684 attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl;
685
686 /* Fill in the "local" attributes */
687 attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;
940 /* Set "status" and "errormsg" and goto failure */
941 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
942 goto mwalloc_fail1;
943 }
944
945 /*
946 * Allocate the software structure for tracking the memory window (i.e.
947 * the Tavor Memory Window handle). Note: This is actually the same
948 * software structure used for tracking memory regions, but since many
949 * of the same properties are needed, only a single structure is
950 * necessary. If we fail here, we must undo the protection domain
951 * reference count and the previous resource allocation.
952 */
953 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
954 if (status != DDI_SUCCESS) {
955 /* Set "status" and "errormsg" and goto failure */
956 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
957 goto mwalloc_fail2;
958 }
959 mw = (tavor_mwhdl_t)rsrc->tr_addr;
960
961 /*
962 * Calculate an "unbound" RKey from MPT index. In much the same way
963 * as we do for memory regions (above), this key is constructed from
964 * a "constrained" (which depends on the MPT index) and an
965 * "unconstrained" portion (which may be arbitrarily chosen).
966 */
967 tavor_mr_keycalc(state, mpt->tr_indx, &mw->mr_rkey);
968
969 /*
970 * Fill in the MPT entry. This is the final step before passing
971 * ownership of the MPT entry to the Tavor hardware. We use all of
972 * the information collected/calculated above to fill in the
973 * requisite portions of the MPT. Note: fewer entries in the MPT
974 * entry are necessary to allocate a memory window.
975 */
976 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
977 mpt_entry.reg_win = TAVOR_MPT_IS_WINDOW;
978 mpt_entry.mem_key = mw->mr_rkey;
979 mpt_entry.pd = pd->pd_pdnum;
1048 (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1049 /* Set "status" and "errormsg" and goto failure */
1050 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags");
1051 TNF_PROBE_1(tavor_mw_free_fail, TAVOR_TNF_ERROR, "",
1052 tnf_string, msg, errormsg);
1053 TAVOR_TNF_EXIT(tavor_mw_free);
1054 return (status);
1055 }
1056
1057 /*
1058 * Pull all the necessary information from the Tavor Memory Window
1059 * handle. This is necessary here because the resource for the
1060 * MW handle is going to be freed up as part of the this operation.
1061 */
1062 mw = *mwhdl;
1063 mutex_enter(&mw->mr_lock);
1064 mpt = mw->mr_mptrsrcp;
1065 rsrc = mw->mr_rsrcp;
1066 pd = mw->mr_pdhdl;
1067 mutex_exit(&mw->mr_lock);
1068
1069 /*
1070 * Reclaim the MPT entry from hardware. Note: in general, it is
1071 * unexpected for this operation to return an error.
1072 */
1073 status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
1074 0, mpt->tr_indx, sleep);
1075 if (status != TAVOR_CMD_SUCCESS) {
1076 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: %08x\n",
1077 status);
1078 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, TAVOR_TNF_ERROR, "",
1079 tnf_uint, status, status);
1080 TAVOR_TNF_EXIT(tavor_mw_free);
1081 return (IBT_INVALID_PARAM);
1082 }
1083
1084 /* Free the Tavor Memory Window handle */
1085 tavor_rsrc_free(state, &rsrc);
1086
1087 /* Free up the MPT entry resource */
1102 * tavor_mr_keycalc()
1103 * Context: Can be called from interrupt or base context.
1104 */
1105 void
1106 tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key)
1107 {
1108 uint32_t tmp, log_num_mpt;
1109
1110 /*
1111 * Generate a simple key from counter. Note: We increment this
1112 * static variable _intentionally_ without any kind of mutex around
1113 * it. First, single-threading all operations through a single lock
1114 * would be a bad idea (from a performance point-of-view). Second,
1115 * the upper "unconstrained" bits don't really have to be unique
1116 * because the lower bits are guaranteed to be (although we do make a
1117 * best effort to ensure that they are). Third, the window for the
1118 * race (where both threads read and update the counter at the same
1119 * time) is incredibly small.
1120 * And, lastly, we'd like to make this into a "random" key XXX
1121 */
1122 log_num_mpt = state->ts_cfg_profile->cp_log_num_mpt;
1123 tmp = (tavor_debug_memkey_cnt++) << log_num_mpt;
1124 *key = tmp | indx;
1125 }
1126
1127
1128 /*
1129 * tavor_mr_common_reg()
1130 * Context: Can be called from interrupt or base context.
1131 */
1132 static int
1133 tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd,
1134 tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op)
1135 {
1136 tavor_rsrc_pool_info_t *rsrc_pool;
1137 tavor_rsrc_t *mpt, *mtt, *rsrc, *mtt_refcnt;
1138 tavor_umap_db_entry_t *umapdb;
1139 tavor_sw_refcnt_t *swrc_tmp;
1140 tavor_hw_mpt_t mpt_entry;
1141 tavor_mrhdl_t mr;
1218 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
1219 if (status != DDI_SUCCESS) {
1220 /* Set "status" and "errormsg" and goto failure */
1221 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
1222 goto mrcommon_fail1;
1223 }
1224
1225 /*
1226 * Allocate the software structure for tracking the memory region (i.e.
1227 * the Tavor Memory Region handle). If we fail here, we must undo
1228 * the protection domain reference count and the previous resource
1229 * allocation.
1230 */
1231 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
1232 if (status != DDI_SUCCESS) {
1233 /* Set "status" and "errormsg" and goto failure */
1234 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
1235 goto mrcommon_fail2;
1236 }
1237 mr = (tavor_mrhdl_t)rsrc->tr_addr;
1238
1239 /*
1240 * Setup and validate the memory region access flags. This means
1241 * translating the IBTF's enable flags into the access flags that
1242 * will be used in later operations.
1243 */
1244 mr->mr_accflag = 0;
1245 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1246 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1247 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1248 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1249 if (flags & IBT_MR_ENABLE_REMOTE_READ)
1250 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1251 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1252 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1253 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1254 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1255
1256 /*
1257 * Calculate keys (Lkey, Rkey) from MPT index. Each key is formed
1276 * "userland resources database". This will later be added to
1277 * the database (after all further memory registration operations are
1278 * successful). If we fail here, we must undo the reference counts
1279 * and the previous resource allocations.
1280 */
1281 mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
1282 if (mr_is_umem) {
1283 umem_len = ptob(btopr(bind->bi_len +
1284 ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
1285 umem_addr = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
1286 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
1287 DDI_UMEMLOCK_LONGTERM);
1288 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
1289 &umem_cookie, &tavor_umem_cbops, NULL);
1290 if (status != 0) {
1291 /* Set "status" and "errormsg" and goto failure */
1292 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin");
1293 goto mrcommon_fail3;
1294 }
1295
1296 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
1297 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
1298 if (bind->bi_buf == NULL) {
1299 /* Set "status" and "errormsg" and goto failure */
1300 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed iosetup");
1301 goto mrcommon_fail3;
1302 }
1303 bind->bi_type = TAVOR_BINDHDL_UBUF;
1304 bind->bi_buf->b_flags |= B_READ;
1305
1306 umapdb = tavor_umap_db_alloc(state->ts_instance,
1307 (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
1308 (uint64_t)(uintptr_t)rsrc);
1309 if (umapdb == NULL) {
1310 /* Set "status" and "errormsg" and goto failure */
1311 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
1312 goto mrcommon_fail4;
1313 }
1314 }
1315
1316 /*
1317 * Setup the bindinfo for the mtt bind call
1318 */
1319 bh = &mr->mr_bindinfo;
1320 bcopy(bind, bh, sizeof (tavor_bind_info_t));
1321 bh->bi_bypass = bind_type;
1322 status = tavor_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
1323 &mtt_pgsize_bits);
1324 if (status != DDI_SUCCESS) {
1325 /* Set "status" and "errormsg" and goto failure */
1326 TAVOR_TNF_FAIL(status, "failed mtt bind");
1327 /*
1328 * When mtt_bind fails, freerbuf has already been done,
1329 * so make sure not to call it again.
1330 */
1331 bind->bi_type = bh->bi_type;
1332 goto mrcommon_fail5;
1333 }
1334 mr->mr_logmttpgsz = mtt_pgsize_bits;
1335
1336 /*
1337 * Allocate MTT reference count (to track shared memory regions).
1338 * This reference count resource may never be used on the given
1339 * memory region, but if it is ever later registered as "shared"
1340 * memory region then this resource will be necessary. If we fail
1341 * here, we do pretty much the same as above to clean up.
1342 */
1343 status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, sleep,
1344 &mtt_refcnt);
1345 if (status != DDI_SUCCESS) {
1346 /* Set "status" and "errormsg" and goto failure */
1347 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed refence count");
1348 goto mrcommon_fail6;
1349 }
1350 mr->mr_mttrefcntp = mtt_refcnt;
1351 swrc_tmp = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr;
1352 TAVOR_MTT_REFCNT_INIT(swrc_tmp);
1353
1354 /*
1355 * Fill in the MPT entry. This is the final step before passing
1356 * ownership of the MPT entry to the Tavor hardware. We use all of
1357 * the information collected/calculated above to fill in the
1358 * requisite portions of the MPT.
1359 */
1360 bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
1361 mpt_entry.m_io = TAVOR_MEM_CYCLE_GENERATE;
1362 mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND) ? 1 : 0;
1363 mpt_entry.atomic = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1364 mpt_entry.rw = (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ? 1 : 0;
1365 mpt_entry.rr = (mr->mr_accflag & IBT_MR_REMOTE_READ) ? 1 : 0;
1366 mpt_entry.lw = (mr->mr_accflag & IBT_MR_LOCAL_WRITE) ? 1 : 0;
1367 mpt_entry.lr = 1;
1368 mpt_entry.reg_win = TAVOR_MPT_IS_REGION;
1369 mpt_entry.page_sz = mr->mr_logmttpgsz - 0xC;
1370 mpt_entry.mem_key = mr->mr_lkey;
1371 mpt_entry.pd = pd->pd_pdnum;
1434 * The following is cleanup for all possible failure cases in this routine
1435 */
1436 mrcommon_fail7:
1437 tavor_rsrc_free(state, &mtt_refcnt);
1438 mrcommon_fail6:
1439 tavor_rsrc_free(state, &mtt);
1440 tavor_mr_mem_unbind(state, bh);
1441 bind->bi_type = bh->bi_type;
1442 mrcommon_fail5:
1443 if (mr_is_umem) {
1444 tavor_umap_db_free(umapdb);
1445 }
1446 mrcommon_fail4:
1447 if (mr_is_umem) {
1448 /*
1449 * Free up the memory ddi_umem_iosetup() allocates
1450 * internally.
1451 */
1452 if (bind->bi_type == TAVOR_BINDHDL_UBUF) {
1453 freerbuf(bind->bi_buf);
1454 bind->bi_type = TAVOR_BINDHDL_NONE;
1455 }
1456 ddi_umem_unlock(umem_cookie);
1457 }
1458 mrcommon_fail3:
1459 tavor_rsrc_free(state, &rsrc);
1460 mrcommon_fail2:
1461 tavor_rsrc_free(state, &mpt);
1462 mrcommon_fail1:
1463 tavor_pd_refcnt_dec(pd);
1464 mrcommon_fail:
1465 TNF_PROBE_1(tavor_mr_common_reg_fail, TAVOR_TNF_ERROR, "",
1466 tnf_string, msg, errormsg);
1467 TAVOR_TNF_EXIT(tavor_mr_common_reg);
1468 return (status);
1469 }
1470
1471 int
1472 tavor_dma_mr_register(tavor_state_t *state, tavor_pdhdl_t pd,
1473 ibt_dmr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl)
1474 {
1505 * reference count.
1506 */
1507 status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
1508 if (status != DDI_SUCCESS) {
1509 status = IBT_INSUFF_RESOURCE;
1510 goto mrcommon_fail1;
1511 }
1512
1513 /*
1514 * Allocate the software structure for tracking the memory region (i.e.
1515 * the Tavor Memory Region handle). If we fail here, we must undo
1516 * the protection domain reference count and the previous resource
1517 * allocation.
1518 */
1519 status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
1520 if (status != DDI_SUCCESS) {
1521 status = IBT_INSUFF_RESOURCE;
1522 goto mrcommon_fail2;
1523 }
1524 mr = (tavor_mrhdl_t)rsrc->tr_addr;
1525 bzero(mr, sizeof (*mr));
1526
1527 /*
1528 * Setup and validate the memory region access flags. This means
1529 * translating the IBTF's enable flags into the access flags that
1530 * will be used in later operations.
1531 */
1532 mr->mr_accflag = 0;
1533 if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1534 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1535 if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1536 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1537 if (flags & IBT_MR_ENABLE_REMOTE_READ)
1538 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1539 if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1540 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1541 if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1542 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1543
1544 /*
1766 * tavor_mr_common_rereg()
1767 * Context: Can be called from interrupt or base context.
1768 */
1769 static int
1770 tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr,
1771 tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new,
1772 tavor_mr_options_t *op)
1773 {
1774 tavor_rsrc_t *mpt;
1775 ibt_mr_attr_flags_t acc_flags_to_use;
1776 ibt_mr_flags_t flags;
1777 tavor_pdhdl_t pd_to_use;
1778 tavor_hw_mpt_t mpt_entry;
1779 uint64_t mtt_addr_to_use, vaddr_to_use, len_to_use;
1780 uint_t sleep, dereg_level;
1781 int status;
1782 char *errormsg;
1783
1784 TAVOR_TNF_ENTER(tavor_mr_common_rereg);
1785
1786 /*
1787 * Check here to see if the memory region corresponds to a userland
1788 * mapping. Reregistration of userland memory regions is not
1789 * currently supported. Return failure. XXX
1790 */
1791 if (mr->mr_is_umem) {
1792 /* Set "status" and "errormsg" and goto failure */
1793 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl");
1794 goto mrrereg_fail;
1795 }
1796
1797 mutex_enter(&mr->mr_lock);
1798
1799 /* Pull MPT resource pointer from the Tavor Memory Region handle */
1800 mpt = mr->mr_mptrsrcp;
1801
1802 /* Extract the flags field from the tavor_bind_info_t */
1803 flags = bind->bi_flags;
1804
1805 /*
2373 * software ownership of the MPT entry as that
2374 * has already been done above (in
2375 * tavor_mr_reregister()). Also unnecessary
2376 * to attempt to unbind the memory.
2377 *
2378 * But we need to unbind the newly bound
2379 * memory and free up the newly allocated MTT
2380 * entries before returning.
2381 */
2382 tavor_mr_mem_unbind(state, bind);
2383 tavor_rsrc_free(state, &mtt);
2384 *dereg_level =
2385 TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2386
2387 /* Set "status"/"errormsg", goto failure */
2388 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE,
2389 "failed reference count");
2390 goto mrrereghelp_fail;
2391 }
2392 swrc_new = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr;
2393 TAVOR_MTT_REFCNT_INIT(swrc_new);
2394 } else {
2395 mtt_refcnt = mr->mr_mttrefcntp;
2396 }
2397
2398 /*
2399 * Using the new mapping and the new MTT resources, write the
2400 * updated entries to MTT
2401 */
2402 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits);
2403 if (status != DDI_SUCCESS) {
2404 /*
2405 * Deregister will be called upon returning failure
2406 * from this routine. This will ensure that all
2407 * current resources get properly freed up.
2408 * Unnecessary to attempt to regain software ownership
2409 * of the MPT entry as that has already been done
2410 * above (in tavor_mr_reregister()). Also unnecessary
2411 * to attempt to unbind the memory.
2412 *
2498 /*
2499 * tavor_mr_mem_bind()
2500 * Context: Can be called from interrupt or base context.
2501 */
2502 static int
2503 tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind,
2504 ddi_dma_handle_t dmahdl, uint_t sleep)
2505 {
2506 ddi_dma_attr_t dma_attr;
2507 int (*callback)(caddr_t);
2508 uint_t dma_xfer_mode;
2509 int status;
2510
2511 /* bi_type must be set to a meaningful value to get a bind handle */
2512 ASSERT(bind->bi_type == TAVOR_BINDHDL_VADDR ||
2513 bind->bi_type == TAVOR_BINDHDL_BUF ||
2514 bind->bi_type == TAVOR_BINDHDL_UBUF);
2515
2516 TAVOR_TNF_ENTER(tavor_mr_mem_bind);
2517
2518 /* Set the callback flag appropriately */
2519 callback = (sleep == TAVOR_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
2520
2521 /* Determine whether to map STREAMING or CONSISTENT */
2522 dma_xfer_mode = (bind->bi_flags & IBT_MR_NONCOHERENT) ?
2523 DDI_DMA_STREAMING : DDI_DMA_CONSISTENT;
2524
2525 /*
2526 * Initialize many of the default DMA attributes. Then, if we're
2527 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
2528 */
2529 if (dmahdl == NULL) {
2530 tavor_dma_attr_init(&dma_attr);
2531 #ifdef __sparc
2532 /*
2533 * First, disable streaming and switch to consistent if
2534 * configured to do so and IOMMU BYPASS is enabled.
2535 */
2536 if (state->ts_cfg_profile->cp_disable_streaming_on_bypass &&
2537 dma_xfer_mode == DDI_DMA_STREAMING &&
2596 }
2597
2598
2599 /*
2600 * tavor_mr_mem_unbind()
2601 * Context: Can be called from interrupt or base context.
2602 */
2603 static void
2604 tavor_mr_mem_unbind(tavor_state_t *state, tavor_bind_info_t *bind)
2605 {
2606 int status;
2607
2608 TAVOR_TNF_ENTER(tavor_mr_mem_unbind);
2609
2610 /*
2611 * In case of TAVOR_BINDHDL_UBUF, the memory bi_buf points to
2612 * is actually allocated by ddi_umem_iosetup() internally, then
2613 * it's required to free it here. Reset bi_type to TAVOR_BINDHDL_NONE
2614 * not to free it again later.
2615 */
2616 if (bind->bi_type == TAVOR_BINDHDL_UBUF) {
2617 freerbuf(bind->bi_buf);
2618 bind->bi_type = TAVOR_BINDHDL_NONE;
2619 }
2620
2621 /*
2622 * Unbind the DMA memory for the region
2623 *
2624 * Note: The only way ddi_dma_unbind_handle() currently
2625 * can return an error is if the handle passed in is invalid.
2626 * Since this should never happen, we choose to return void
2627 * from this function! If this does return an error, however,
2628 * then we print a warning message to the console.
2629 */
2630 status = ddi_dma_unbind_handle(bind->bi_dmahdl);
2631 if (status != DDI_SUCCESS) {
2632 TAVOR_WARNING(state, "failed to unbind DMA mapping");
2633 TNF_PROBE_0(tavor_mr_mem_unbind_dmaunbind_fail,
2634 TAVOR_TNF_ERROR, "");
2635 TAVOR_TNF_EXIT(tavor_mr_mem_unbind);
2636 return;
2637 }
2638
2639 /* Free up the DMA handle */
2672 i = 0;
2673 mtt_table = (uint64_t *)mtt->tr_addr;
2674 dmacookie = bind->bi_dmacookie;
2675 cookie_cnt = bind->bi_cookiecnt;
2676 while (cookie_cnt-- > 0) {
2677 addr = dmacookie.dmac_laddress;
2678 endaddr = addr + (dmacookie.dmac_size - 1);
2679 addr = addr & ~((uint64_t)pagesize - 1);
2680 while (addr <= endaddr) {
2681 /*
2682 * Fill in the mapped addresses (calculated above) and
2683 * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry.
2684 */
2685 mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET;
2686 ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry);
2687 addr += pagesize;
2688 i++;
2689
2690 if (addr == 0) {
2691 static int do_once = 1;
2692 if (do_once) {
2693 do_once = 0;
2694 cmn_err(CE_NOTE, "probable error in "
2695 "dma_cookie address from caller\n");
2696 }
2697 break;
2698 }
2699 }
2700
2701 /*
2702 * When we've reached the end of the current DMA cookie,
2703 * jump to the next cookie (if there are more)
2704 */
2705 if (cookie_cnt != 0) {
2706 ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
2707 }
2708 }
2709
2710 TAVOR_TNF_EXIT(tavor_mr_fast_mtt_write);
2711 return (DDI_SUCCESS);
|