Print this page
8368 remove warlock leftovers from usr/src/uts


 222         status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
 223         if (status != DDI_SUCCESS) {
 224                 /* Set "status" and "errormsg" and goto failure */
 225                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
 226                 goto mrshared_fail1;
 227         }
 228 
 229         /*
 230          * Allocate the software structure for tracking the shared memory
 231          * region (i.e. the Tavor Memory Region handle).  If we fail here, we
 232          * must undo the protection domain reference count and the previous
 233          * resource allocation.
 234          */
 235         status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
 236         if (status != DDI_SUCCESS) {
 237                 /* Set "status" and "errormsg" and goto failure */
 238                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
 239                 goto mrshared_fail2;
 240         }
 241         mr = (tavor_mrhdl_t)rsrc->tr_addr;
 242         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
 243 
 244         /*
 245          * Setup and validate the memory region access flags.  This means
 246          * translating the IBTF's enable flags into the access flags that
 247          * will be used in later operations.
 248          */
 249         mr->mr_accflag = 0;
 250         if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
 251                 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
 252         if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
 253                 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
 254         if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
 255                 mr->mr_accflag |= IBT_MR_REMOTE_READ;
 256         if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
 257                 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
 258         if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
 259                 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
 260 
 261         /*
 262          * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed


 324                         TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
 325                         goto mrshared_fail4;
 326                 }
 327         }
 328 
 329         /*
 330          * Copy the MTT resource pointer (and additional parameters) from
 331          * the original Tavor Memory Region handle.  Note: this is normally
 332          * where the tavor_mr_mem_bind() routine would be called, but because
 333          * we already have bound and filled-in MTT entries it is simply a
 334          * matter here of managing the MTT reference count and grabbing the
 335          * address of the MTT table entries (for filling in the shared region's
 336          * MPT entry).
 337          */
 338         mr->mr_mttrsrcp        = mrhdl->mr_mttrsrcp;
 339         mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
 340         mr->mr_bindinfo        = mrhdl->mr_bindinfo;
 341         mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
 342         mutex_exit(&mrhdl->mr_lock);
 343         bind = &mr->mr_bindinfo;
 344         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
 345         mtt = mr->mr_mttrsrcp;
 346 
 347         /*
 348          * Increment the MTT reference count (to reflect the fact that
 349          * the MTT is now shared)
 350          */
 351         (void) tavor_mtt_refcnt_inc(mr->mr_mttrefcntp);
 352 
 353         /*
 354          * Update the new "bind" virtual address.  Do some extra work here
 355          * to ensure proper alignment.  That is, make sure that the page
 356          * offset for the beginning of the old range is the same as the
 357          * offset for this new mapping
 358          */
 359         pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
 360         bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
 361             (mr->mr_bindinfo.bi_addr & pgsize_msk));
 362 
 363         /*
 364          * Get the base address for the MTT table.  This will be necessary


 650 
 651         /* Set the mrhdl pointer to NULL and return success */
 652         *mrhdl = NULL;
 653 
 654         TAVOR_TNF_EXIT(tavor_mr_deregister);
 655         return (DDI_SUCCESS);
 656 }
 657 
 658 
 659 /*
 660  * tavor_mr_query()
 661  *    Context: Can be called from interrupt or base context.
 662  */
 663 /* ARGSUSED */
 664 int
 665 tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mr,
 666     ibt_mr_query_attr_t *attr)
 667 {
 668         TAVOR_TNF_ENTER(tavor_mr_query);
 669 
 670         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr))
 671 
 672         mutex_enter(&mr->mr_lock);
 673 
 674         /*
 675          * Check here to see if the memory region has already been partially
 676          * deregistered as a result of a tavor_umap_umemlock_cb() callback.
 677          * If so, this is an error, return failure.
 678          */
 679         if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
 680                 mutex_exit(&mr->mr_lock);
 681                 TNF_PROBE_0(tavor_mr_query_inv_mrhdl_fail, TAVOR_TNF_ERROR, "");
 682                 TAVOR_TNF_EXIT(tavor_mr_query);
 683                 return (IBT_MR_HDL_INVALID);
 684         }
 685 
 686         /* Fill in the queried attributes */
 687         attr->mr_attr_flags = mr->mr_accflag;
 688         attr->mr_pd  = (ibt_pd_hdl_t)mr->mr_pdhdl;
 689 
 690         /* Fill in the "local" attributes */
 691         attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;


 944                 /* Set "status" and "errormsg" and goto failure */
 945                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
 946                 goto mwalloc_fail1;
 947         }
 948 
 949         /*
 950          * Allocate the software structure for tracking the memory window (i.e.
 951          * the Tavor Memory Window handle).  Note: This is actually the same
 952          * software structure used for tracking memory regions, but since many
 953          * of the same properties are needed, only a single structure is
 954          * necessary.  If we fail here, we must undo the protection domain
 955          * reference count and the previous resource allocation.
 956          */
 957         status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
 958         if (status != DDI_SUCCESS) {
 959                 /* Set "status" and "errormsg" and goto failure */
 960                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
 961                 goto mwalloc_fail2;
 962         }
 963         mw = (tavor_mwhdl_t)rsrc->tr_addr;
 964         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
 965 
 966         /*
 967          * Calculate an "unbound" RKey from MPT index.  In much the same way
 968          * as we do for memory regions (above), this key is constructed from
 969          * a "constrained" (which depends on the MPT index) and an
 970          * "unconstrained" portion (which may be arbitrarily chosen).
 971          */
 972         tavor_mr_keycalc(state, mpt->tr_indx, &mw->mr_rkey);
 973 
 974         /*
 975          * Fill in the MPT entry.  This is the final step before passing
 976          * ownership of the MPT entry to the Tavor hardware.  We use all of
 977          * the information collected/calculated above to fill in the
 978          * requisite portions of the MPT.  Note: fewer entries in the MPT
 979          * entry are necessary to allocate a memory window.
 980          */
 981         bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
 982         mpt_entry.reg_win       = TAVOR_MPT_IS_WINDOW;
 983         mpt_entry.mem_key       = mw->mr_rkey;
 984         mpt_entry.pd            = pd->pd_pdnum;


1053             (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1054                 /* Set "status" and "errormsg" and goto failure */
1055                 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags");
1056                 TNF_PROBE_1(tavor_mw_free_fail, TAVOR_TNF_ERROR, "",
1057                     tnf_string, msg, errormsg);
1058                 TAVOR_TNF_EXIT(tavor_mw_free);
1059                 return (status);
1060         }
1061 
1062         /*
1063          * Pull all the necessary information from the Tavor Memory Window
1064          * handle.  This is necessary here because the resource for the
1065          * MW handle is going to be freed up as part of the this operation.
1066          */
1067         mw      = *mwhdl;
1068         mutex_enter(&mw->mr_lock);
1069         mpt     = mw->mr_mptrsrcp;
1070         rsrc    = mw->mr_rsrcp;
1071         pd      = mw->mr_pdhdl;
1072         mutex_exit(&mw->mr_lock);
1073         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
1074 
1075         /*
1076          * Reclaim the MPT entry from hardware.  Note: in general, it is
1077          * unexpected for this operation to return an error.
1078          */
1079         status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
1080             0, mpt->tr_indx, sleep);
1081         if (status != TAVOR_CMD_SUCCESS) {
1082                 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: %08x\n",
1083                     status);
1084                 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, TAVOR_TNF_ERROR, "",
1085                     tnf_uint, status, status);
1086                 TAVOR_TNF_EXIT(tavor_mw_free);
1087                 return (IBT_INVALID_PARAM);
1088         }
1089 
1090         /* Free the Tavor Memory Window handle */
1091         tavor_rsrc_free(state, &rsrc);
1092 
1093         /* Free up the MPT entry resource */


1108  * tavor_mr_keycalc()
1109  *    Context: Can be called from interrupt or base context.
1110  */
1111 void
1112 tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key)
1113 {
1114         uint32_t        tmp, log_num_mpt;
1115 
1116         /*
1117          * Generate a simple key from counter.  Note:  We increment this
1118          * static variable _intentionally_ without any kind of mutex around
1119          * it.  First, single-threading all operations through a single lock
1120          * would be a bad idea (from a performance point-of-view).  Second,
1121          * the upper "unconstrained" bits don't really have to be unique
1122          * because the lower bits are guaranteed to be (although we do make a
1123          * best effort to ensure that they are).  Third, the window for the
1124          * race (where both threads read and update the counter at the same
1125          * time) is incredibly small.
1126          * And, lastly, we'd like to make this into a "random" key XXX
1127          */
1128         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(tavor_debug_memkey_cnt))
1129         log_num_mpt = state->ts_cfg_profile->cp_log_num_mpt;
1130         tmp = (tavor_debug_memkey_cnt++) << log_num_mpt;
1131         *key = tmp | indx;
1132 }
1133 
1134 
1135 /*
1136  * tavor_mr_common_reg()
1137  *    Context: Can be called from interrupt or base context.
1138  */
1139 static int
1140 tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd,
1141     tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op)
1142 {
1143         tavor_rsrc_pool_info_t  *rsrc_pool;
1144         tavor_rsrc_t            *mpt, *mtt, *rsrc, *mtt_refcnt;
1145         tavor_umap_db_entry_t   *umapdb;
1146         tavor_sw_refcnt_t       *swrc_tmp;
1147         tavor_hw_mpt_t          mpt_entry;
1148         tavor_mrhdl_t           mr;


1225         status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
1226         if (status != DDI_SUCCESS) {
1227                 /* Set "status" and "errormsg" and goto failure */
1228                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
1229                 goto mrcommon_fail1;
1230         }
1231 
1232         /*
1233          * Allocate the software structure for tracking the memory region (i.e.
1234          * the Tavor Memory Region handle).  If we fail here, we must undo
1235          * the protection domain reference count and the previous resource
1236          * allocation.
1237          */
1238         status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
1239         if (status != DDI_SUCCESS) {
1240                 /* Set "status" and "errormsg" and goto failure */
1241                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
1242                 goto mrcommon_fail2;
1243         }
1244         mr = (tavor_mrhdl_t)rsrc->tr_addr;
1245         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1246 
1247         /*
1248          * Setup and validate the memory region access flags.  This means
1249          * translating the IBTF's enable flags into the access flags that
1250          * will be used in later operations.
1251          */
1252         mr->mr_accflag = 0;
1253         if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1254                 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1255         if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1256                 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1257         if (flags & IBT_MR_ENABLE_REMOTE_READ)
1258                 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1259         if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1260                 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1261         if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1262                 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1263 
1264         /*
1265          * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed


1284          * "userland resources database".  This will later be added to
1285          * the database (after all further memory registration operations are
1286          * successful).  If we fail here, we must undo the reference counts
1287          * and the previous resource allocations.
1288          */
1289         mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
1290         if (mr_is_umem) {
1291                 umem_len   = ptob(btopr(bind->bi_len +
1292                     ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
1293                 umem_addr  = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
1294                 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
1295                     DDI_UMEMLOCK_LONGTERM);
1296                 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
1297                     &umem_cookie, &tavor_umem_cbops, NULL);
1298                 if (status != 0) {
1299                         /* Set "status" and "errormsg" and goto failure */
1300                         TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin");
1301                         goto mrcommon_fail3;
1302                 }
1303 
1304                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1305                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1306 
1307                 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
1308                     B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
1309                 if (bind->bi_buf == NULL) {
1310                         /* Set "status" and "errormsg" and goto failure */
1311                         TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed iosetup");
1312                         goto mrcommon_fail3;
1313                 }
1314                 bind->bi_type = TAVOR_BINDHDL_UBUF;
1315                 bind->bi_buf->b_flags |= B_READ;
1316 
1317                 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1318                 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1319 
1320                 umapdb = tavor_umap_db_alloc(state->ts_instance,
1321                     (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
1322                     (uint64_t)(uintptr_t)rsrc);
1323                 if (umapdb == NULL) {
1324                         /* Set "status" and "errormsg" and goto failure */
1325                         TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
1326                         goto mrcommon_fail4;
1327                 }
1328         }
1329 
1330         /*
1331          * Setup the bindinfo for the mtt bind call
1332          */
1333         bh = &mr->mr_bindinfo;
1334         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh))
1335         bcopy(bind, bh, sizeof (tavor_bind_info_t));
1336         bh->bi_bypass = bind_type;
1337         status = tavor_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
1338             &mtt_pgsize_bits);
1339         if (status != DDI_SUCCESS) {
1340                 /* Set "status" and "errormsg" and goto failure */
1341                 TAVOR_TNF_FAIL(status, "failed mtt bind");
1342                 /*
1343                  * When mtt_bind fails, freerbuf has already been done,
1344                  * so make sure not to call it again.
1345                  */
1346                 bind->bi_type = bh->bi_type;
1347                 goto mrcommon_fail5;
1348         }
1349         mr->mr_logmttpgsz = mtt_pgsize_bits;
1350 
1351         /*
1352          * Allocate MTT reference count (to track shared memory regions).
1353          * This reference count resource may never be used on the given
1354          * memory region, but if it is ever later registered as "shared"
1355          * memory region then this resource will be necessary.  If we fail
1356          * here, we do pretty much the same as above to clean up.
1357          */
1358         status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, sleep,
1359             &mtt_refcnt);
1360         if (status != DDI_SUCCESS) {
1361                 /* Set "status" and "errormsg" and goto failure */
1362                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed refence count");
1363                 goto mrcommon_fail6;
1364         }
1365         mr->mr_mttrefcntp = mtt_refcnt;
1366         swrc_tmp = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr;
1367         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
1368         TAVOR_MTT_REFCNT_INIT(swrc_tmp);
1369 
1370         /*
1371          * Fill in the MPT entry.  This is the final step before passing
1372          * ownership of the MPT entry to the Tavor hardware.  We use all of
1373          * the information collected/calculated above to fill in the
1374          * requisite portions of the MPT.
1375          */
1376         bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
1377         mpt_entry.m_io    = TAVOR_MEM_CYCLE_GENERATE;
1378         mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1379         mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1380         mpt_entry.rw      = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1381         mpt_entry.rr      = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
1382         mpt_entry.lw      = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1383         mpt_entry.lr      = 1;
1384         mpt_entry.reg_win = TAVOR_MPT_IS_REGION;
1385         mpt_entry.page_sz       = mr->mr_logmttpgsz - 0xC;
1386         mpt_entry.mem_key       = mr->mr_lkey;
1387         mpt_entry.pd            = pd->pd_pdnum;


1450  * The following is cleanup for all possible failure cases in this routine
1451  */
1452 mrcommon_fail7:
1453         tavor_rsrc_free(state, &mtt_refcnt);
1454 mrcommon_fail6:
1455         tavor_rsrc_free(state, &mtt);
1456         tavor_mr_mem_unbind(state, bh);
1457         bind->bi_type = bh->bi_type;
1458 mrcommon_fail5:
1459         if (mr_is_umem) {
1460                 tavor_umap_db_free(umapdb);
1461         }
1462 mrcommon_fail4:
1463         if (mr_is_umem) {
1464                 /*
1465                  * Free up the memory ddi_umem_iosetup() allocates
1466                  * internally.
1467                  */
1468                 if (bind->bi_type == TAVOR_BINDHDL_UBUF) {
1469                         freerbuf(bind->bi_buf);
1470                         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1471                         bind->bi_type = TAVOR_BINDHDL_NONE;
1472                         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1473                 }
1474                 ddi_umem_unlock(umem_cookie);
1475         }
1476 mrcommon_fail3:
1477         tavor_rsrc_free(state, &rsrc);
1478 mrcommon_fail2:
1479         tavor_rsrc_free(state, &mpt);
1480 mrcommon_fail1:
1481         tavor_pd_refcnt_dec(pd);
1482 mrcommon_fail:
1483         TNF_PROBE_1(tavor_mr_common_reg_fail, TAVOR_TNF_ERROR, "",
1484             tnf_string, msg, errormsg);
1485         TAVOR_TNF_EXIT(tavor_mr_common_reg);
1486         return (status);
1487 }
1488 
1489 int
1490 tavor_dma_mr_register(tavor_state_t *state, tavor_pdhdl_t pd,
1491     ibt_dmr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl)
1492 {


1523          * reference count.
1524          */
1525         status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
1526         if (status != DDI_SUCCESS) {
1527                 status = IBT_INSUFF_RESOURCE;
1528                 goto mrcommon_fail1;
1529         }
1530 
1531         /*
1532          * Allocate the software structure for tracking the memory region (i.e.
1533          * the Tavor Memory Region handle).  If we fail here, we must undo
1534          * the protection domain reference count and the previous resource
1535          * allocation.
1536          */
1537         status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
1538         if (status != DDI_SUCCESS) {
1539                 status = IBT_INSUFF_RESOURCE;
1540                 goto mrcommon_fail2;
1541         }
1542         mr = (tavor_mrhdl_t)rsrc->tr_addr;
1543         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1544         bzero(mr, sizeof (*mr));
1545 
1546         /*
1547          * Setup and validate the memory region access flags.  This means
1548          * translating the IBTF's enable flags into the access flags that
1549          * will be used in later operations.
1550          */
1551         mr->mr_accflag = 0;
1552         if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1553                 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1554         if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1555                 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1556         if (flags & IBT_MR_ENABLE_REMOTE_READ)
1557                 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1558         if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1559                 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1560         if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1561                 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1562 
1563         /*


1785  * tavor_mr_common_rereg()
1786  *    Context: Can be called from interrupt or base context.
1787  */
1788 static int
1789 tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr,
1790     tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new,
1791     tavor_mr_options_t *op)
1792 {
1793         tavor_rsrc_t            *mpt;
1794         ibt_mr_attr_flags_t     acc_flags_to_use;
1795         ibt_mr_flags_t          flags;
1796         tavor_pdhdl_t           pd_to_use;
1797         tavor_hw_mpt_t          mpt_entry;
1798         uint64_t                mtt_addr_to_use, vaddr_to_use, len_to_use;
1799         uint_t                  sleep, dereg_level;
1800         int                     status;
1801         char                    *errormsg;
1802 
1803         TAVOR_TNF_ENTER(tavor_mr_common_rereg);
1804 
1805         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1806 
1807         /*
1808          * Check here to see if the memory region corresponds to a userland
1809          * mapping.  Reregistration of userland memory regions is not
1810          * currently supported.  Return failure. XXX
1811          */
1812         if (mr->mr_is_umem) {
1813                 /* Set "status" and "errormsg" and goto failure */
1814                 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl");
1815                 goto mrrereg_fail;
1816         }
1817 
1818         mutex_enter(&mr->mr_lock);
1819 
1820         /* Pull MPT resource pointer from the Tavor Memory Region handle */
1821         mpt = mr->mr_mptrsrcp;
1822 
1823         /* Extract the flags field from the tavor_bind_info_t */
1824         flags = bind->bi_flags;
1825 
1826         /*


2394                                  * software ownership of the MPT entry as that
2395                                  * has already been done above (in
2396                                  * tavor_mr_reregister()).  Also unnecessary
2397                                  * to attempt to unbind the memory.
2398                                  *
2399                                  * But we need to unbind the newly bound
2400                                  * memory and free up the newly allocated MTT
2401                                  * entries before returning.
2402                                  */
2403                                 tavor_mr_mem_unbind(state, bind);
2404                                 tavor_rsrc_free(state, &mtt);
2405                                 *dereg_level =
2406                                     TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2407 
2408                                 /* Set "status"/"errormsg", goto failure */
2409                                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE,
2410                                     "failed reference count");
2411                                 goto mrrereghelp_fail;
2412                         }
2413                         swrc_new = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr;
2414                         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new))
2415                         TAVOR_MTT_REFCNT_INIT(swrc_new);
2416                 } else {
2417                         mtt_refcnt = mr->mr_mttrefcntp;
2418                 }
2419 
2420                 /*
2421                  * Using the new mapping and the new MTT resources, write the
2422                  * updated entries to MTT
2423                  */
2424                 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits);
2425                 if (status != DDI_SUCCESS) {
2426                         /*
2427                          * Deregister will be called upon returning failure
2428                          * from this routine. This will ensure that all
2429                          * current resources get properly freed up.
2430                          * Unnecessary to attempt to regain software ownership
2431                          * of the MPT entry as that has already been done
2432                          * above (in tavor_mr_reregister()).  Also unnecessary
2433                          * to attempt to unbind the memory.
2434                          *


2520 /*
2521  * tavor_mr_mem_bind()
2522  *    Context: Can be called from interrupt or base context.
2523  */
2524 static int
2525 tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind,
2526     ddi_dma_handle_t dmahdl, uint_t sleep)
2527 {
2528         ddi_dma_attr_t  dma_attr;
2529         int             (*callback)(caddr_t);
2530         uint_t          dma_xfer_mode;
2531         int             status;
2532 
2533         /* bi_type must be set to a meaningful value to get a bind handle */
2534         ASSERT(bind->bi_type == TAVOR_BINDHDL_VADDR ||
2535             bind->bi_type == TAVOR_BINDHDL_BUF ||
2536             bind->bi_type == TAVOR_BINDHDL_UBUF);
2537 
2538         TAVOR_TNF_ENTER(tavor_mr_mem_bind);
2539 
2540         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2541 
2542         /* Set the callback flag appropriately */
2543         callback = (sleep == TAVOR_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
2544 
2545         /* Determine whether to map STREAMING or CONSISTENT */
2546         dma_xfer_mode = (bind->bi_flags & IBT_MR_NONCOHERENT) ?
2547             DDI_DMA_STREAMING : DDI_DMA_CONSISTENT;
2548 
2549         /*
2550          * Initialize many of the default DMA attributes.  Then, if we're
2551          * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
2552          */
2553         if (dmahdl == NULL) {
2554                 tavor_dma_attr_init(&dma_attr);
2555 #ifdef  __sparc
2556                 /*
2557                  * First, disable streaming and switch to consistent if
2558                  * configured to do so and IOMMU BYPASS is enabled.
2559                  */
2560                 if (state->ts_cfg_profile->cp_disable_streaming_on_bypass &&
2561                     dma_xfer_mode == DDI_DMA_STREAMING &&


2620 }
2621 
2622 
2623 /*
2624  * tavor_mr_mem_unbind()
2625  *    Context: Can be called from interrupt or base context.
2626  */
2627 static void
2628 tavor_mr_mem_unbind(tavor_state_t *state, tavor_bind_info_t *bind)
2629 {
2630         int     status;
2631 
2632         TAVOR_TNF_ENTER(tavor_mr_mem_unbind);
2633 
2634         /*
2635          * In case of TAVOR_BINDHDL_UBUF, the memory bi_buf points to
2636          * is actually allocated by ddi_umem_iosetup() internally, then
2637          * it's required to free it here. Reset bi_type to TAVOR_BINDHDL_NONE
2638          * not to free it again later.
2639          */
2640         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2641         if (bind->bi_type == TAVOR_BINDHDL_UBUF) {
2642                 freerbuf(bind->bi_buf);
2643                 bind->bi_type = TAVOR_BINDHDL_NONE;
2644         }
2645         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
2646 
2647         /*
2648          * Unbind the DMA memory for the region
2649          *
2650          * Note: The only way ddi_dma_unbind_handle() currently
2651          * can return an error is if the handle passed in is invalid.
2652          * Since this should never happen, we choose to return void
2653          * from this function!  If this does return an error, however,
2654          * then we print a warning message to the console.
2655          */
2656         status = ddi_dma_unbind_handle(bind->bi_dmahdl);
2657         if (status != DDI_SUCCESS) {
2658                 TAVOR_WARNING(state, "failed to unbind DMA mapping");
2659                 TNF_PROBE_0(tavor_mr_mem_unbind_dmaunbind_fail,
2660                     TAVOR_TNF_ERROR, "");
2661                 TAVOR_TNF_EXIT(tavor_mr_mem_unbind);
2662                 return;
2663         }
2664 
2665         /* Free up the DMA handle */


2698         i = 0;
2699         mtt_table  = (uint64_t *)mtt->tr_addr;
2700         dmacookie  = bind->bi_dmacookie;
2701         cookie_cnt = bind->bi_cookiecnt;
2702         while (cookie_cnt-- > 0) {
2703                 addr    = dmacookie.dmac_laddress;
2704                 endaddr = addr + (dmacookie.dmac_size - 1);
2705                 addr    = addr & ~((uint64_t)pagesize - 1);
2706                 while (addr <= endaddr) {
2707                         /*
2708                          * Fill in the mapped addresses (calculated above) and
2709                          * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry.
2710                          */
2711                         mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET;
2712                         ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry);
2713                         addr += pagesize;
2714                         i++;
2715 
2716                         if (addr == 0) {
2717                                 static int do_once = 1;
2718                                 _NOTE(SCHEME_PROTECTS_DATA("safe sharing",
2719                                     do_once))
2720                                 if (do_once) {
2721                                         do_once = 0;
2722                                         cmn_err(CE_NOTE, "probable error in "
2723                                             "dma_cookie address from caller\n");
2724                                 }
2725                                 break;
2726                         }
2727                 }
2728 
2729                 /*
2730                  * When we've reached the end of the current DMA cookie,
2731                  * jump to the next cookie (if there are more)
2732                  */
2733                 if (cookie_cnt != 0) {
2734                         ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
2735                 }
2736         }
2737 
2738         TAVOR_TNF_EXIT(tavor_mr_fast_mtt_write);
2739         return (DDI_SUCCESS);




 222         status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
 223         if (status != DDI_SUCCESS) {
 224                 /* Set "status" and "errormsg" and goto failure */
 225                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
 226                 goto mrshared_fail1;
 227         }
 228 
 229         /*
 230          * Allocate the software structure for tracking the shared memory
 231          * region (i.e. the Tavor Memory Region handle).  If we fail here, we
 232          * must undo the protection domain reference count and the previous
 233          * resource allocation.
 234          */
 235         status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
 236         if (status != DDI_SUCCESS) {
 237                 /* Set "status" and "errormsg" and goto failure */
 238                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
 239                 goto mrshared_fail2;
 240         }
 241         mr = (tavor_mrhdl_t)rsrc->tr_addr;

 242 
 243         /*
 244          * Setup and validate the memory region access flags.  This means
 245          * translating the IBTF's enable flags into the access flags that
 246          * will be used in later operations.
 247          */
 248         mr->mr_accflag = 0;
 249         if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
 250                 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
 251         if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
 252                 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
 253         if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
 254                 mr->mr_accflag |= IBT_MR_REMOTE_READ;
 255         if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
 256                 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
 257         if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
 258                 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
 259 
 260         /*
 261          * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed


 323                         TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
 324                         goto mrshared_fail4;
 325                 }
 326         }
 327 
 328         /*
 329          * Copy the MTT resource pointer (and additional parameters) from
 330          * the original Tavor Memory Region handle.  Note: this is normally
 331          * where the tavor_mr_mem_bind() routine would be called, but because
 332          * we already have bound and filled-in MTT entries it is simply a
 333          * matter here of managing the MTT reference count and grabbing the
 334          * address of the MTT table entries (for filling in the shared region's
 335          * MPT entry).
 336          */
 337         mr->mr_mttrsrcp        = mrhdl->mr_mttrsrcp;
 338         mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
 339         mr->mr_bindinfo        = mrhdl->mr_bindinfo;
 340         mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
 341         mutex_exit(&mrhdl->mr_lock);
 342         bind = &mr->mr_bindinfo;

 343         mtt = mr->mr_mttrsrcp;
 344 
 345         /*
 346          * Increment the MTT reference count (to reflect the fact that
 347          * the MTT is now shared)
 348          */
 349         (void) tavor_mtt_refcnt_inc(mr->mr_mttrefcntp);
 350 
 351         /*
 352          * Update the new "bind" virtual address.  Do some extra work here
 353          * to ensure proper alignment.  That is, make sure that the page
 354          * offset for the beginning of the old range is the same as the
 355          * offset for this new mapping
 356          */
 357         pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
 358         bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
 359             (mr->mr_bindinfo.bi_addr & pgsize_msk));
 360 
 361         /*
 362          * Get the base address for the MTT table.  This will be necessary


 648 
 649         /* Set the mrhdl pointer to NULL and return success */
 650         *mrhdl = NULL;
 651 
 652         TAVOR_TNF_EXIT(tavor_mr_deregister);
 653         return (DDI_SUCCESS);
 654 }
 655 
 656 
 657 /*
 658  * tavor_mr_query()
 659  *    Context: Can be called from interrupt or base context.
 660  */
 661 /* ARGSUSED */
 662 int
 663 tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mr,
 664     ibt_mr_query_attr_t *attr)
 665 {
 666         TAVOR_TNF_ENTER(tavor_mr_query);
 667 


 668         mutex_enter(&mr->mr_lock);
 669 
 670         /*
 671          * Check here to see if the memory region has already been partially
 672          * deregistered as a result of a tavor_umap_umemlock_cb() callback.
 673          * If so, this is an error, return failure.
 674          */
 675         if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
 676                 mutex_exit(&mr->mr_lock);
 677                 TNF_PROBE_0(tavor_mr_query_inv_mrhdl_fail, TAVOR_TNF_ERROR, "");
 678                 TAVOR_TNF_EXIT(tavor_mr_query);
 679                 return (IBT_MR_HDL_INVALID);
 680         }
 681 
 682         /* Fill in the queried attributes */
 683         attr->mr_attr_flags = mr->mr_accflag;
 684         attr->mr_pd  = (ibt_pd_hdl_t)mr->mr_pdhdl;
 685 
 686         /* Fill in the "local" attributes */
 687         attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;


 940                 /* Set "status" and "errormsg" and goto failure */
 941                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
 942                 goto mwalloc_fail1;
 943         }
 944 
 945         /*
 946          * Allocate the software structure for tracking the memory window (i.e.
 947          * the Tavor Memory Window handle).  Note: This is actually the same
 948          * software structure used for tracking memory regions, but since many
 949          * of the same properties are needed, only a single structure is
 950          * necessary.  If we fail here, we must undo the protection domain
 951          * reference count and the previous resource allocation.
 952          */
 953         status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
 954         if (status != DDI_SUCCESS) {
 955                 /* Set "status" and "errormsg" and goto failure */
 956                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
 957                 goto mwalloc_fail2;
 958         }
 959         mw = (tavor_mwhdl_t)rsrc->tr_addr;

 960 
 961         /*
 962          * Calculate an "unbound" RKey from MPT index.  In much the same way
 963          * as we do for memory regions (above), this key is constructed from
 964          * a "constrained" (which depends on the MPT index) and an
 965          * "unconstrained" portion (which may be arbitrarily chosen).
 966          */
 967         tavor_mr_keycalc(state, mpt->tr_indx, &mw->mr_rkey);
 968 
 969         /*
 970          * Fill in the MPT entry.  This is the final step before passing
 971          * ownership of the MPT entry to the Tavor hardware.  We use all of
 972          * the information collected/calculated above to fill in the
 973          * requisite portions of the MPT.  Note: fewer entries in the MPT
 974          * entry are necessary to allocate a memory window.
 975          */
 976         bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
 977         mpt_entry.reg_win       = TAVOR_MPT_IS_WINDOW;
 978         mpt_entry.mem_key       = mw->mr_rkey;
 979         mpt_entry.pd            = pd->pd_pdnum;


1048             (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1049                 /* Set "status" and "errormsg" and goto failure */
1050                 TAVOR_TNF_FAIL(IBT_INVALID_PARAM, "invalid sleep flags");
1051                 TNF_PROBE_1(tavor_mw_free_fail, TAVOR_TNF_ERROR, "",
1052                     tnf_string, msg, errormsg);
1053                 TAVOR_TNF_EXIT(tavor_mw_free);
1054                 return (status);
1055         }
1056 
1057         /*
1058          * Pull all the necessary information from the Tavor Memory Window
1059          * handle.  This is necessary here because the resource for the
1060          * MW handle is going to be freed up as part of the this operation.
1061          */
1062         mw      = *mwhdl;
1063         mutex_enter(&mw->mr_lock);
1064         mpt     = mw->mr_mptrsrcp;
1065         rsrc    = mw->mr_rsrcp;
1066         pd      = mw->mr_pdhdl;
1067         mutex_exit(&mw->mr_lock);

1068 
1069         /*
1070          * Reclaim the MPT entry from hardware.  Note: in general, it is
1071          * unexpected for this operation to return an error.
1072          */
1073         status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
1074             0, mpt->tr_indx, sleep);
1075         if (status != TAVOR_CMD_SUCCESS) {
1076                 cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: %08x\n",
1077                     status);
1078                 TNF_PROBE_1(tavor_hw2sw_mpt_cmd_fail, TAVOR_TNF_ERROR, "",
1079                     tnf_uint, status, status);
1080                 TAVOR_TNF_EXIT(tavor_mw_free);
1081                 return (IBT_INVALID_PARAM);
1082         }
1083 
1084         /* Free the Tavor Memory Window handle */
1085         tavor_rsrc_free(state, &rsrc);
1086 
1087         /* Free up the MPT entry resource */


1102  * tavor_mr_keycalc()
1103  *    Context: Can be called from interrupt or base context.
1104  */
1105 void
1106 tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key)
1107 {
1108         uint32_t        tmp, log_num_mpt;
1109 
1110         /*
1111          * Generate a simple key from counter.  Note:  We increment this
1112          * static variable _intentionally_ without any kind of mutex around
1113          * it.  First, single-threading all operations through a single lock
1114          * would be a bad idea (from a performance point-of-view).  Second,
1115          * the upper "unconstrained" bits don't really have to be unique
1116          * because the lower bits are guaranteed to be (although we do make a
1117          * best effort to ensure that they are).  Third, the window for the
1118          * race (where both threads read and update the counter at the same
1119          * time) is incredibly small.
1120          * And, lastly, we'd like to make this into a "random" key XXX
1121          */

1122         log_num_mpt = state->ts_cfg_profile->cp_log_num_mpt;
1123         tmp = (tavor_debug_memkey_cnt++) << log_num_mpt;
1124         *key = tmp | indx;
1125 }
1126 
1127 
1128 /*
1129  * tavor_mr_common_reg()
1130  *    Context: Can be called from interrupt or base context.
1131  */
1132 static int
1133 tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd,
1134     tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op)
1135 {
1136         tavor_rsrc_pool_info_t  *rsrc_pool;
1137         tavor_rsrc_t            *mpt, *mtt, *rsrc, *mtt_refcnt;
1138         tavor_umap_db_entry_t   *umapdb;
1139         tavor_sw_refcnt_t       *swrc_tmp;
1140         tavor_hw_mpt_t          mpt_entry;
1141         tavor_mrhdl_t           mr;


1218         status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
1219         if (status != DDI_SUCCESS) {
1220                 /* Set "status" and "errormsg" and goto failure */
1221                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MPT");
1222                 goto mrcommon_fail1;
1223         }
1224 
1225         /*
1226          * Allocate the software structure for tracking the memory region (i.e.
1227          * the Tavor Memory Region handle).  If we fail here, we must undo
1228          * the protection domain reference count and the previous resource
1229          * allocation.
1230          */
1231         status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
1232         if (status != DDI_SUCCESS) {
1233                 /* Set "status" and "errormsg" and goto failure */
1234                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed MR handle");
1235                 goto mrcommon_fail2;
1236         }
1237         mr = (tavor_mrhdl_t)rsrc->tr_addr;

1238 
1239         /*
1240          * Setup and validate the memory region access flags.  This means
1241          * translating the IBTF's enable flags into the access flags that
1242          * will be used in later operations.
1243          */
1244         mr->mr_accflag = 0;
1245         if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1246                 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1247         if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1248                 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1249         if (flags & IBT_MR_ENABLE_REMOTE_READ)
1250                 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1251         if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1252                 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1253         if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1254                 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1255 
1256         /*
1257          * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed


1276          * "userland resources database".  This will later be added to
1277          * the database (after all further memory registration operations are
1278          * successful).  If we fail here, we must undo the reference counts
1279          * and the previous resource allocations.
1280          */
1281         mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
1282         if (mr_is_umem) {
1283                 umem_len   = ptob(btopr(bind->bi_len +
1284                     ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
1285                 umem_addr  = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
1286                 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
1287                     DDI_UMEMLOCK_LONGTERM);
1288                 status = umem_lockmemory(umem_addr, umem_len, umem_flags,
1289                     &umem_cookie, &tavor_umem_cbops, NULL);
1290                 if (status != 0) {
1291                         /* Set "status" and "errormsg" and goto failure */
1292                         TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umem pin");
1293                         goto mrcommon_fail3;
1294                 }
1295 



1296                 bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
1297                     B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
1298                 if (bind->bi_buf == NULL) {
1299                         /* Set "status" and "errormsg" and goto failure */
1300                         TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed iosetup");
1301                         goto mrcommon_fail3;
1302                 }
1303                 bind->bi_type = TAVOR_BINDHDL_UBUF;
1304                 bind->bi_buf->b_flags |= B_READ;
1305 



1306                 umapdb = tavor_umap_db_alloc(state->ts_instance,
1307                     (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
1308                     (uint64_t)(uintptr_t)rsrc);
1309                 if (umapdb == NULL) {
1310                         /* Set "status" and "errormsg" and goto failure */
1311                         TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed umap add");
1312                         goto mrcommon_fail4;
1313                 }
1314         }
1315 
1316         /*
1317          * Setup the bindinfo for the mtt bind call
1318          */
1319         bh = &mr->mr_bindinfo;

1320         bcopy(bind, bh, sizeof (tavor_bind_info_t));
1321         bh->bi_bypass = bind_type;
1322         status = tavor_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
1323             &mtt_pgsize_bits);
1324         if (status != DDI_SUCCESS) {
1325                 /* Set "status" and "errormsg" and goto failure */
1326                 TAVOR_TNF_FAIL(status, "failed mtt bind");
1327                 /*
1328                  * When mtt_bind fails, freerbuf has already been done,
1329                  * so make sure not to call it again.
1330                  */
1331                 bind->bi_type = bh->bi_type;
1332                 goto mrcommon_fail5;
1333         }
1334         mr->mr_logmttpgsz = mtt_pgsize_bits;
1335 
1336         /*
1337          * Allocate MTT reference count (to track shared memory regions).
1338          * This reference count resource may never be used on the given
1339          * memory region, but if it is ever later registered as "shared"
1340          * memory region then this resource will be necessary.  If we fail
1341          * here, we do pretty much the same as above to clean up.
1342          */
1343         status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, sleep,
1344             &mtt_refcnt);
1345         if (status != DDI_SUCCESS) {
1346                 /* Set "status" and "errormsg" and goto failure */
1347                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE, "failed refence count");
1348                 goto mrcommon_fail6;
1349         }
1350         mr->mr_mttrefcntp = mtt_refcnt;
1351         swrc_tmp = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr;

1352         TAVOR_MTT_REFCNT_INIT(swrc_tmp);
1353 
1354         /*
1355          * Fill in the MPT entry.  This is the final step before passing
1356          * ownership of the MPT entry to the Tavor hardware.  We use all of
1357          * the information collected/calculated above to fill in the
1358          * requisite portions of the MPT.
1359          */
1360         bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
1361         mpt_entry.m_io    = TAVOR_MEM_CYCLE_GENERATE;
1362         mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1363         mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1364         mpt_entry.rw      = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1365         mpt_entry.rr      = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
1366         mpt_entry.lw      = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1367         mpt_entry.lr      = 1;
1368         mpt_entry.reg_win = TAVOR_MPT_IS_REGION;
1369         mpt_entry.page_sz       = mr->mr_logmttpgsz - 0xC;
1370         mpt_entry.mem_key       = mr->mr_lkey;
1371         mpt_entry.pd            = pd->pd_pdnum;


1434  * The following is cleanup for all possible failure cases in this routine
1435  */
1436 mrcommon_fail7:
1437         tavor_rsrc_free(state, &mtt_refcnt);
1438 mrcommon_fail6:
1439         tavor_rsrc_free(state, &mtt);
1440         tavor_mr_mem_unbind(state, bh);
1441         bind->bi_type = bh->bi_type;
1442 mrcommon_fail5:
1443         if (mr_is_umem) {
1444                 tavor_umap_db_free(umapdb);
1445         }
1446 mrcommon_fail4:
1447         if (mr_is_umem) {
1448                 /*
1449                  * Free up the memory ddi_umem_iosetup() allocates
1450                  * internally.
1451                  */
1452                 if (bind->bi_type == TAVOR_BINDHDL_UBUF) {
1453                         freerbuf(bind->bi_buf);

1454                         bind->bi_type = TAVOR_BINDHDL_NONE;

1455                 }
1456                 ddi_umem_unlock(umem_cookie);
1457         }
1458 mrcommon_fail3:
1459         tavor_rsrc_free(state, &rsrc);
1460 mrcommon_fail2:
1461         tavor_rsrc_free(state, &mpt);
1462 mrcommon_fail1:
1463         tavor_pd_refcnt_dec(pd);
1464 mrcommon_fail:
1465         TNF_PROBE_1(tavor_mr_common_reg_fail, TAVOR_TNF_ERROR, "",
1466             tnf_string, msg, errormsg);
1467         TAVOR_TNF_EXIT(tavor_mr_common_reg);
1468         return (status);
1469 }
1470 
1471 int
1472 tavor_dma_mr_register(tavor_state_t *state, tavor_pdhdl_t pd,
1473     ibt_dmr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl)
1474 {


1505          * reference count.
1506          */
1507         status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
1508         if (status != DDI_SUCCESS) {
1509                 status = IBT_INSUFF_RESOURCE;
1510                 goto mrcommon_fail1;
1511         }
1512 
1513         /*
1514          * Allocate the software structure for tracking the memory region (i.e.
1515          * the Tavor Memory Region handle).  If we fail here, we must undo
1516          * the protection domain reference count and the previous resource
1517          * allocation.
1518          */
1519         status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
1520         if (status != DDI_SUCCESS) {
1521                 status = IBT_INSUFF_RESOURCE;
1522                 goto mrcommon_fail2;
1523         }
1524         mr = (tavor_mrhdl_t)rsrc->tr_addr;

1525         bzero(mr, sizeof (*mr));
1526 
1527         /*
1528          * Setup and validate the memory region access flags.  This means
1529          * translating the IBTF's enable flags into the access flags that
1530          * will be used in later operations.
1531          */
1532         mr->mr_accflag = 0;
1533         if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1534                 mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1535         if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1536                 mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1537         if (flags & IBT_MR_ENABLE_REMOTE_READ)
1538                 mr->mr_accflag |= IBT_MR_REMOTE_READ;
1539         if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1540                 mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1541         if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1542                 mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1543 
1544         /*


1766  * tavor_mr_common_rereg()
1767  *    Context: Can be called from interrupt or base context.
1768  */
1769 static int
1770 tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr,
1771     tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new,
1772     tavor_mr_options_t *op)
1773 {
1774         tavor_rsrc_t            *mpt;
1775         ibt_mr_attr_flags_t     acc_flags_to_use;
1776         ibt_mr_flags_t          flags;
1777         tavor_pdhdl_t           pd_to_use;
1778         tavor_hw_mpt_t          mpt_entry;
1779         uint64_t                mtt_addr_to_use, vaddr_to_use, len_to_use;
1780         uint_t                  sleep, dereg_level;
1781         int                     status;
1782         char                    *errormsg;
1783 
1784         TAVOR_TNF_ENTER(tavor_mr_common_rereg);
1785 


1786         /*
1787          * Check here to see if the memory region corresponds to a userland
1788          * mapping.  Reregistration of userland memory regions is not
1789          * currently supported.  Return failure. XXX
1790          */
1791         if (mr->mr_is_umem) {
1792                 /* Set "status" and "errormsg" and goto failure */
1793                 TAVOR_TNF_FAIL(IBT_MR_HDL_INVALID, "invalid mrhdl");
1794                 goto mrrereg_fail;
1795         }
1796 
1797         mutex_enter(&mr->mr_lock);
1798 
1799         /* Pull MPT resource pointer from the Tavor Memory Region handle */
1800         mpt = mr->mr_mptrsrcp;
1801 
1802         /* Extract the flags field from the tavor_bind_info_t */
1803         flags = bind->bi_flags;
1804 
1805         /*


2373                                  * software ownership of the MPT entry as that
2374                                  * has already been done above (in
2375                                  * tavor_mr_reregister()).  Also unnecessary
2376                                  * to attempt to unbind the memory.
2377                                  *
2378                                  * But we need to unbind the newly bound
2379                                  * memory and free up the newly allocated MTT
2380                                  * entries before returning.
2381                                  */
2382                                 tavor_mr_mem_unbind(state, bind);
2383                                 tavor_rsrc_free(state, &mtt);
2384                                 *dereg_level =
2385                                     TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2386 
2387                                 /* Set "status"/"errormsg", goto failure */
2388                                 TAVOR_TNF_FAIL(IBT_INSUFF_RESOURCE,
2389                                     "failed reference count");
2390                                 goto mrrereghelp_fail;
2391                         }
2392                         swrc_new = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr;

2393                         TAVOR_MTT_REFCNT_INIT(swrc_new);
2394                 } else {
2395                         mtt_refcnt = mr->mr_mttrefcntp;
2396                 }
2397 
2398                 /*
2399                  * Using the new mapping and the new MTT resources, write the
2400                  * updated entries to MTT
2401                  */
2402                 status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits);
2403                 if (status != DDI_SUCCESS) {
2404                         /*
2405                          * Deregister will be called upon returning failure
2406                          * from this routine. This will ensure that all
2407                          * current resources get properly freed up.
2408                          * Unnecessary to attempt to regain software ownership
2409                          * of the MPT entry as that has already been done
2410                          * above (in tavor_mr_reregister()).  Also unnecessary
2411                          * to attempt to unbind the memory.
2412                          *


2498 /*
2499  * tavor_mr_mem_bind()
2500  *    Context: Can be called from interrupt or base context.
2501  */
2502 static int
2503 tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind,
2504     ddi_dma_handle_t dmahdl, uint_t sleep)
2505 {
2506         ddi_dma_attr_t  dma_attr;
2507         int             (*callback)(caddr_t);
2508         uint_t          dma_xfer_mode;
2509         int             status;
2510 
2511         /* bi_type must be set to a meaningful value to get a bind handle */
2512         ASSERT(bind->bi_type == TAVOR_BINDHDL_VADDR ||
2513             bind->bi_type == TAVOR_BINDHDL_BUF ||
2514             bind->bi_type == TAVOR_BINDHDL_UBUF);
2515 
2516         TAVOR_TNF_ENTER(tavor_mr_mem_bind);
2517 


2518         /* Set the callback flag appropriately */
2519         callback = (sleep == TAVOR_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
2520 
2521         /* Determine whether to map STREAMING or CONSISTENT */
2522         dma_xfer_mode = (bind->bi_flags & IBT_MR_NONCOHERENT) ?
2523             DDI_DMA_STREAMING : DDI_DMA_CONSISTENT;
2524 
2525         /*
2526          * Initialize many of the default DMA attributes.  Then, if we're
2527          * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
2528          */
2529         if (dmahdl == NULL) {
2530                 tavor_dma_attr_init(&dma_attr);
2531 #ifdef  __sparc
2532                 /*
2533                  * First, disable streaming and switch to consistent if
2534                  * configured to do so and IOMMU BYPASS is enabled.
2535                  */
2536                 if (state->ts_cfg_profile->cp_disable_streaming_on_bypass &&
2537                     dma_xfer_mode == DDI_DMA_STREAMING &&


2596 }
2597 
2598 
2599 /*
2600  * tavor_mr_mem_unbind()
2601  *    Context: Can be called from interrupt or base context.
2602  */
2603 static void
2604 tavor_mr_mem_unbind(tavor_state_t *state, tavor_bind_info_t *bind)
2605 {
2606         int     status;
2607 
2608         TAVOR_TNF_ENTER(tavor_mr_mem_unbind);
2609 
2610         /*
2611          * In case of TAVOR_BINDHDL_UBUF, the memory bi_buf points to
2612          * is actually allocated by ddi_umem_iosetup() internally, then
2613          * it's required to free it here. Reset bi_type to TAVOR_BINDHDL_NONE
2614          * not to free it again later.
2615          */

2616         if (bind->bi_type == TAVOR_BINDHDL_UBUF) {
2617                 freerbuf(bind->bi_buf);
2618                 bind->bi_type = TAVOR_BINDHDL_NONE;
2619         }

2620 
2621         /*
2622          * Unbind the DMA memory for the region
2623          *
2624          * Note: The only way ddi_dma_unbind_handle() currently
2625          * can return an error is if the handle passed in is invalid.
2626          * Since this should never happen, we choose to return void
2627          * from this function!  If this does return an error, however,
2628          * then we print a warning message to the console.
2629          */
2630         status = ddi_dma_unbind_handle(bind->bi_dmahdl);
2631         if (status != DDI_SUCCESS) {
2632                 TAVOR_WARNING(state, "failed to unbind DMA mapping");
2633                 TNF_PROBE_0(tavor_mr_mem_unbind_dmaunbind_fail,
2634                     TAVOR_TNF_ERROR, "");
2635                 TAVOR_TNF_EXIT(tavor_mr_mem_unbind);
2636                 return;
2637         }
2638 
2639         /* Free up the DMA handle */


2672         i = 0;
2673         mtt_table  = (uint64_t *)mtt->tr_addr;
2674         dmacookie  = bind->bi_dmacookie;
2675         cookie_cnt = bind->bi_cookiecnt;
2676         while (cookie_cnt-- > 0) {
2677                 addr    = dmacookie.dmac_laddress;
2678                 endaddr = addr + (dmacookie.dmac_size - 1);
2679                 addr    = addr & ~((uint64_t)pagesize - 1);
2680                 while (addr <= endaddr) {
2681                         /*
2682                          * Fill in the mapped addresses (calculated above) and
2683                          * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry.
2684                          */
2685                         mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET;
2686                         ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry);
2687                         addr += pagesize;
2688                         i++;
2689 
2690                         if (addr == 0) {
2691                                 static int do_once = 1;


2692                                 if (do_once) {
2693                                         do_once = 0;
2694                                         cmn_err(CE_NOTE, "probable error in "
2695                                             "dma_cookie address from caller\n");
2696                                 }
2697                                 break;
2698                         }
2699                 }
2700 
2701                 /*
2702                  * When we've reached the end of the current DMA cookie,
2703                  * jump to the next cookie (if there are more)
2704                  */
2705                 if (cookie_cnt != 0) {
2706                         ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
2707                 }
2708         }
2709 
2710         TAVOR_TNF_EXIT(tavor_mr_fast_mtt_write);
2711         return (DDI_SUCCESS);