437
438 /*
439 * The bottom bits of "offset" are undefined (number depends on
440 * system PAGESIZE). Shifting these off leaves us with a "key".
441 * The "key" is actually a combination of both a real key value
442 * (for the purpose of database lookup) and a "type" value. Although
443 * we are not going to do any database lookup per se, we do want
444 * to extract the "key" and the "type" (to enable faster lookup of
445 * the appropriate CQ or QP handle).
446 */
447 key = off >> PAGESHIFT;
448 type = key & MLNX_UMAP_RSRC_TYPE_MASK;
449 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
450
451 /*
452 * Allocate an entry to track the mapping and unmapping (specifically,
453 * partial unmapping) of this resource.
454 */
455 dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
456 sizeof (tavor_devmap_track_t), KM_SLEEP);
457 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
458 dvm_track->tdt_offset = off;
459 dvm_track->tdt_state = state;
460 dvm_track->tdt_refcnt = 1;
461 mutex_init(&dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
462 DDI_INTR_PRI(state->ts_intrmsi_pri));
463
464 /*
465 * Depending of the type of resource that has been mapped out, we
466 * need to update the QP or CQ handle to reflect that it has, in
467 * fact, been mapped. This allows the driver code which frees a QP
468 * or a CQ to know whether it is appropriate to do a
469 * devmap_devmem_remap() to invalidate the userland mapping for the
470 * corresponding queue's memory.
471 */
472 if (type == MLNX_UMAP_CQMEM_RSRC) {
473
474 /* Use "key" (CQ number) to do fast lookup of CQ handle */
475 cq = tavor_cqhdl_from_cqnum(state, key);
476
477 /*
551 * tavor_devmap_umem_dup()
552 * Context: Can be called from kernel context.
553 */
554 /* ARGSUSED */
555 static int
556 tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp, devmap_cookie_t new_dhp,
557 void **new_pvtp)
558 {
559 tavor_state_t *state;
560 tavor_devmap_track_t *dvm_track, *new_dvm_track;
561 uint_t maxprot;
562 int status;
563
564 TAVOR_TNF_ENTER(tavor_devmap_umem_dup);
565
566 /*
567 * Extract the Tavor softstate pointer from "Tavor devmap tracking
568 * structure" (in "pvtp").
569 */
570 dvm_track = (tavor_devmap_track_t *)pvtp;
571 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
572 state = dvm_track->tdt_state;
573
574 /*
575 * Since this devmap_dup() entry point is generally called
576 * when a process does fork(2), it is incumbent upon the driver
577 * to insure that the child does not inherit a valid copy of
578 * the parent's QP or CQ resource. This is accomplished by using
579 * devmap_devmem_remap() to invalidate the child's mapping to the
580 * kernel memory.
581 */
582 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
583 status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
584 dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
585 if (status != DDI_SUCCESS) {
586 TAVOR_WARNING(state, "failed in tavor_devmap_umem_dup()");
587 TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
588 return (status);
589 }
590
591 /*
592 * Allocate a new entry to track the subsequent unmapping
593 * (specifically, all partial unmappings) of the child's newly
594 * invalidated resource. Note: Setting the "tdt_size" field to
595 * zero here is an indication to the devmap_unmap() entry point
596 * that this mapping is invalid, and that its subsequent unmapping
597 * should not affect any of the parent's CQ or QP resources.
598 */
599 new_dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
600 sizeof (tavor_devmap_track_t), KM_SLEEP);
601 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*new_dvm_track))
602 new_dvm_track->tdt_offset = 0;
603 new_dvm_track->tdt_state = state;
604 new_dvm_track->tdt_refcnt = 1;
605 new_dvm_track->tdt_size = 0;
606 mutex_init(&new_dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
607 DDI_INTR_PRI(state->ts_intrmsi_pri));
608 *new_pvtp = new_dvm_track;
609
610 TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
611 return (DDI_SUCCESS);
612 }
613
614
615 /*
616 * tavor_devmap_umem_unmap()
617 * Context: Can be called from kernel context.
618 */
619 /* ARGSUSED */
620 static void
621 tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
623 devmap_cookie_t new_dhp2, void **pvtp2)
624 {
625 tavor_state_t *state;
626 tavor_rsrc_t *rsrcp;
627 tavor_devmap_track_t *dvm_track;
628 tavor_cqhdl_t cq;
629 tavor_qphdl_t qp;
630 tavor_srqhdl_t srq;
631 uint64_t key, value;
632 uint_t type;
633 uint_t size;
634 int status;
635
636 TAVOR_TNF_ENTER(tavor_devmap_umem_unmap);
637
638 /*
639 * Extract the Tavor softstate pointer from "Tavor devmap tracking
640 * structure" (in "pvtp").
641 */
642 dvm_track = (tavor_devmap_track_t *)pvtp;
643 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
644 state = dvm_track->tdt_state;
645
646 /*
647 * Extract the "offset" from the "Tavor devmap tracking structure".
648 * Note: The input argument "off" is ignored here because the
649 * Tavor mapping interfaces define a very specific meaning to
650 * each "logical offset". Also extract the "key" and "type" encoded
651 * in the logical offset.
652 */
653 key = dvm_track->tdt_offset >> PAGESHIFT;
654 type = key & MLNX_UMAP_RSRC_TYPE_MASK;
655 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
656
657 /*
658 * Extract the "size" of the mapping. If this size is determined
659 * to be zero, then it is an indication of a previously invalidated
660 * mapping, and no CQ or QP resources should be affected.
661 */
662 size = dvm_track->tdt_size;
663
825 /* Get Tavor softstate structure from instance */
826 instance = TAVOR_DEV_INSTANCE(dev);
827 state = ddi_get_soft_state(tavor_statep, instance);
828 if (state == NULL) {
829 TNF_PROBE_0(tavor_devmap_devmem_map_gss_fail, TAVOR_TNF_ERROR,
830 "");
831 TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
832 return (ENXIO);
833 }
834
835 /*
836 * Allocate an entry to track the mapping and unmapping of this
837 * resource. Note: We don't need to initialize the "refcnt" or
838 * "offset" fields here, nor do we need to initialize the mutex
839 * used with the "refcnt". Since UAR pages are single pages, they
840 * are not subject to "partial" unmappings. This makes these other
841 * fields unnecessary.
842 */
843 dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
844 sizeof (tavor_devmap_track_t), KM_SLEEP);
845 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
846 dvm_track->tdt_state = state;
847 dvm_track->tdt_size = PAGESIZE;
848
849 /*
850 * Pass the private "Tavor devmap tracking structure" back. This
851 * pointer will be returned in a subsequent "unmap" callback.
852 */
853 *pvtp = dvm_track;
854
855 TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
856 return (DDI_SUCCESS);
857 }
858
859
860 /*
861 * tavor_devmap_devmem_dup()
862 * Context: Can be called from kernel context.
863 */
864 /* ARGSUSED */
865 static int
869 tavor_state_t *state;
870 tavor_devmap_track_t *dvm_track;
871 uint_t maxprot;
872 int status;
873
874 TAVOR_TNF_ENTER(tavor_devmap_devmem_dup);
875
876 /*
877 * Extract the Tavor softstate pointer from "Tavor devmap tracking
878 * structure" (in "pvtp"). Note: If the tracking structure is NULL
879 * here, it means that the mapping corresponds to an invalid mapping.
880 * In this case, it can be safely ignored ("new_pvtp" set to NULL).
881 */
882 dvm_track = (tavor_devmap_track_t *)pvtp;
883 if (dvm_track == NULL) {
884 *new_pvtp = NULL;
885 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
886 return (DDI_SUCCESS);
887 }
888
889 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
890 state = dvm_track->tdt_state;
891
892 /*
893 * Since this devmap_dup() entry point is generally called
894 * when a process does fork(2), it is incumbent upon the driver
895 * to insure that the child does not inherit a valid copy of
896 * the parent's resource. This is accomplished by using
897 * devmap_devmem_remap() to invalidate the child's mapping to the
898 * kernel memory.
899 */
900 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
901 status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
902 dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
903 if (status != DDI_SUCCESS) {
904 TAVOR_WARNING(state, "failed in tavor_devmap_devmem_dup()");
905 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
906 return (status);
907 }
908
909 /*
925 * Context: Can be called from kernel context.
926 */
927 /* ARGSUSED */
928 static void
929 tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
930 size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
931 devmap_cookie_t new_dhp2, void **pvtp2)
932 {
933 tavor_devmap_track_t *dvm_track;
934
935 TAVOR_TNF_ENTER(tavor_devmap_devmem_unmap);
936
937 /*
938 * Free up the "Tavor devmap tracking structure" (in "pvtp").
939 * There cannot be "partial" unmappings here because all UAR pages
940 * are single pages. Note: If the tracking structure is NULL here,
941 * it means that the mapping corresponds to an invalid mapping. In
942 * this case, it can be safely ignored.
943 */
944 dvm_track = (tavor_devmap_track_t *)pvtp;
945 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
946 if (dvm_track == NULL) {
947 TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
948 return;
949 }
950
951 kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
952 TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
953 }
954
955
956 /*
957 * tavor_umap_ci_data_in()
958 * Context: Can be called from user or kernel context.
959 */
960 /* ARGSUSED */
961 ibt_status_t
962 tavor_umap_ci_data_in(tavor_state_t *state, ibt_ci_data_flags_t flags,
963 ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
964 {
965 int status;
1032 TAVOR_TNF_ERROR, "");
1033 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1034 return (IBT_NOT_SUPPORTED);
1035 }
1036
1037 /* Check for valid MR handle pointer */
1038 if (mr == NULL) {
1039 TNF_PROBE_0(tavor_umap_mr_data_in_invmrhdl_fail,
1040 TAVOR_TNF_ERROR, "");
1041 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1042 return (IBT_MR_HDL_INVALID);
1043 }
1044
1045 /* Check for valid MR input structure size */
1046 if (data_sz < sizeof (ibt_mr_data_in_t)) {
1047 TNF_PROBE_0(tavor_umap_mr_data_in_invdatasz_fail,
1048 TAVOR_TNF_ERROR, "");
1049 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1050 return (IBT_INSUFF_RESOURCE);
1051 }
1052 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1053
1054 /*
1055 * Ensure that the MR corresponds to userland memory and that it is
1056 * a currently valid memory region as well.
1057 */
1058 mutex_enter(&mr->mr_lock);
1059 if ((mr->mr_is_umem == 0) || (mr->mr_umemcookie == NULL)) {
1060 mutex_exit(&mr->mr_lock);
1061 TNF_PROBE_0(tavor_umap_mr_data_in_invumem_fail,
1062 TAVOR_TNF_ERROR, "");
1063 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1064 return (IBT_MR_HDL_INVALID);
1065 }
1066
1067 /*
1068 * If it has passed all the above checks, then extract the callback
1069 * function and argument from the input structure. Copy them into
1070 * the MR handle. This function will be called only if the memory
1071 * corresponding to the MR handle gets a umem_lockmemory() callback.
1072 */
1182 tavor_umap_cq_data_out(tavor_cqhdl_t cq, mlnx_umap_cq_data_out_t *data,
1183 size_t data_sz)
1184 {
1185 TAVOR_TNF_ENTER(tavor_umap_cq_data_out);
1186
1187 /* Check for valid CQ handle pointer */
1188 if (cq == NULL) {
1189 TNF_PROBE_0(tavor_umap_cq_data_out_invcqhdl_fail,
1190 TAVOR_TNF_ERROR, "");
1191 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1192 return (IBT_CQ_HDL_INVALID);
1193 }
1194
1195 /* Check for valid CQ mapping structure size */
1196 if (data_sz < sizeof (mlnx_umap_cq_data_out_t)) {
1197 TNF_PROBE_0(tavor_umap_cq_data_out_invdatasz_fail,
1198 TAVOR_TNF_ERROR, "");
1199 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1200 return (IBT_INSUFF_RESOURCE);
1201 }
1202 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1203
1204 /*
1205 * If it has passed all the above checks, then fill in all the useful
1206 * mapping information (including the mapping offset that will be
1207 * passed back to the devmap() interface during a subsequent mmap()
1208 * call.
1209 *
1210 * The "offset" for CQ mmap()'s looks like this:
1211 * +----------------------------------------+--------+--------------+
1212 * | CQ Number | 0x33 | Reserved (0) |
1213 * +----------------------------------------+--------+--------------+
1214 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits
1215 *
1216 * This returns information about the mapping offset, the length of
1217 * the CQ memory, the CQ number (for use in later CQ doorbells), the
1218 * number of CQEs the CQ memory can hold, and the size of each CQE.
1219 */
1220 data->mcq_rev = MLNX_UMAP_IF_VERSION;
1221 data->mcq_mapoffset = ((((uint64_t)cq->cq_cqnum <<
1222 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_CQMEM_RSRC) << PAGESHIFT);
1238 tavor_umap_qp_data_out(tavor_qphdl_t qp, mlnx_umap_qp_data_out_t *data,
1239 size_t data_sz)
1240 {
1241 TAVOR_TNF_ENTER(tavor_umap_qp_data_out);
1242
1243 /* Check for valid QP handle pointer */
1244 if (qp == NULL) {
1245 TNF_PROBE_0(tavor_umap_qp_data_out_invqphdl_fail,
1246 TAVOR_TNF_ERROR, "");
1247 TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1248 return (IBT_QP_HDL_INVALID);
1249 }
1250
1251 /* Check for valid QP mapping structure size */
1252 if (data_sz < sizeof (mlnx_umap_qp_data_out_t)) {
1253 TNF_PROBE_0(tavor_umap_qp_data_out_invdatasz_fail,
1254 TAVOR_TNF_ERROR, "");
1255 TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1256 return (IBT_INSUFF_RESOURCE);
1257 }
1258 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1259
1260 /*
1261 * If it has passed all the checks, then fill in all the useful
1262 * mapping information (including the mapping offset that will be
1263 * passed back to the devmap() interface during a subsequent mmap()
1264 * call.
1265 *
1266 * The "offset" for QP mmap()'s looks like this:
1267 * +----------------------------------------+--------+--------------+
1268 * | QP Number | 0x44 | Reserved (0) |
1269 * +----------------------------------------+--------+--------------+
1270 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits
1271 *
1272 * This returns information about the mapping offset, the length of
1273 * the QP memory, and the QP number (for use in later send and recv
1274 * doorbells). It also returns the following information for both
1275 * the receive work queue and the send work queue, respectively: the
1276 * offset (from the base mapped address) of the start of the given
1277 * work queue, the 64-bit IB virtual address that corresponds to
1278 * the base mapped address (needed for posting WQEs though the
1323 tavor_umap_srq_data_out(tavor_srqhdl_t srq, mlnx_umap_srq_data_out_t *data,
1324 size_t data_sz)
1325 {
1326 TAVOR_TNF_ENTER(tavor_umap_srq_data_out);
1327
1328 /* Check for valid SRQ handle pointer */
1329 if (srq == NULL) {
1330 TNF_PROBE_0(tavor_umap_srq_data_out_invsrqhdl_fail,
1331 TAVOR_TNF_ERROR, "");
1332 TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1333 return (IBT_SRQ_HDL_INVALID);
1334 }
1335
1336 /* Check for valid SRQ mapping structure size */
1337 if (data_sz < sizeof (mlnx_umap_srq_data_out_t)) {
1338 TNF_PROBE_0(tavor_umap_srq_data_out_invdatasz_fail,
1339 TAVOR_TNF_ERROR, "");
1340 TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1341 return (IBT_INSUFF_RESOURCE);
1342 }
1343 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1344
1345 /*
1346 * If it has passed all the checks, then fill in all the useful
1347 * mapping information (including the mapping offset that will be
1348 * passed back to the devmap() interface during a subsequent mmap()
1349 * call.
1350 *
1351 * The "offset" for SRQ mmap()'s looks like this:
1352 * +----------------------------------------+--------+--------------+
1353 * | SRQ Number | 0x66 | Reserved (0) |
1354 * +----------------------------------------+--------+--------------+
1355 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits
1356 *
1357 * This returns information about the mapping offset, the length of the
1358 * SRQ memory, and the SRQ number (for use in later send and recv
1359 * doorbells). It also returns the following information for the
1360 * shared receive queue: the offset (from the base mapped address) of
1361 * the start of the given work queue, the 64-bit IB virtual address
1362 * that corresponds to the base mapped address (needed for posting WQEs
1363 * though the QP doorbells), the number of WQEs the given work queue
1386 tavor_umap_pd_data_out(tavor_pdhdl_t pd, mlnx_umap_pd_data_out_t *data,
1387 size_t data_sz)
1388 {
1389 TAVOR_TNF_ENTER(tavor_umap_pd_data_out);
1390
1391 /* Check for valid PD handle pointer */
1392 if (pd == NULL) {
1393 TNF_PROBE_0(tavor_umap_pd_data_out_invpdhdl_fail,
1394 TAVOR_TNF_ERROR, "");
1395 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1396 return (IBT_PD_HDL_INVALID);
1397 }
1398
1399 /* Check for valid PD mapping structure size */
1400 if (data_sz < sizeof (mlnx_umap_pd_data_out_t)) {
1401 TNF_PROBE_0(tavor_umap_pd_data_out_invdatasz_fail,
1402 TAVOR_TNF_ERROR, "");
1403 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1404 return (IBT_INSUFF_RESOURCE);
1405 }
1406 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1407
1408 /*
1409 * If it has passed all the checks, then fill the PD table index
1410 * (the PD table allocated index for the PD pd_pdnum)
1411 */
1412 data->mpd_rev = MLNX_UMAP_IF_VERSION;
1413 data->mpd_pdnum = pd->pd_pdnum;
1414
1415 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1416 return (DDI_SUCCESS);
1417 }
1418
1419 /*
1420 * tavor_umap_db_init()
1421 * Context: Only called from attach() path context
1422 */
1423 void
1424 tavor_umap_db_init(void)
1425 {
1426 TAVOR_TNF_ENTER(tavor_umap_db_init);
1469
1470
1471 /*
1472 * tavor_umap_db_alloc()
1473 * Context: Can be called from user or kernel context.
1474 */
1475 tavor_umap_db_entry_t *
1476 tavor_umap_db_alloc(uint_t instance, uint64_t key, uint_t type, uint64_t value)
1477 {
1478 tavor_umap_db_entry_t *umapdb;
1479
1480 TAVOR_TNF_ENTER(tavor_umap_db_alloc);
1481
1482 /* Allocate an entry to add to the "userland resources database" */
1483 umapdb = kmem_zalloc(sizeof (tavor_umap_db_entry_t), KM_NOSLEEP);
1484 if (umapdb == NULL) {
1485 TNF_PROBE_0(tavor_umap_db_alloc_kmz_fail, TAVOR_TNF_ERROR, "");
1486 TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1487 return (NULL);
1488 }
1489 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1490
1491 /* Fill in the fields in the database entry */
1492 umapdb->tdbe_common.tdb_instance = instance;
1493 umapdb->tdbe_common.tdb_type = type;
1494 umapdb->tdbe_common.tdb_key = key;
1495 umapdb->tdbe_common.tdb_value = value;
1496
1497 TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1498 return (umapdb);
1499 }
1500
1501
1502 /*
1503 * tavor_umap_db_free()
1504 * Context: Can be called from user or kernel context.
1505 */
1506 void
1507 tavor_umap_db_free(tavor_umap_db_entry_t *umapdb)
1508 {
1509 TAVOR_TNF_ENTER(tavor_umap_db_free);
1529 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1530
1531 TAVOR_TNF_EXIT(tavor_umap_db_add);
1532 }
1533
1534
1535 /*
1536 * tavor_umap_db_add_nolock()
1537 * Context: Can be called from user or kernel context.
1538 */
1539 void
1540 tavor_umap_db_add_nolock(tavor_umap_db_entry_t *umapdb)
1541 {
1542 tavor_umap_db_query_t query;
1543 avl_index_t where;
1544
1545 TAVOR_TNF_ENTER(tavor_umap_db_add_nolock);
1546
1547 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1548
1549 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1550
1551 /*
1552 * Copy the common portion of the "to-be-added" database entry
1553 * into the "tavor_umap_db_query_t" structure. We use this structure
1554 * (with no flags set) to find the appropriate location in the
1555 * "userland resources database" for the new entry to be added.
1556 *
1557 * Note: we expect that this entry should not be found in the
1558 * database (unless something bad has happened).
1559 */
1560 query.tqdb_common = umapdb->tdbe_common;
1561 query.tqdb_flags = 0;
1562 (void) avl_find(&tavor_userland_rsrc_db.tdl_umapdb_avl, &query,
1563 &where);
1564
1565 /*
1566 * Now, using the "where" field from the avl_find() operation
1567 * above, we will insert the new database entry ("umapdb").
1568 */
1569 avl_insert(&tavor_userland_rsrc_db.tdl_umapdb_avl, umapdb,
1570 where);
1614 /*
1615 * Fill in key, type, instance, and flags values of the
1616 * tavor_umap_db_query_t in preparation for the database
1617 * lookup.
1618 */
1619 query.tqdb_flags = flags;
1620 query.tqdb_common.tdb_key = key;
1621 query.tqdb_common.tdb_type = type;
1622 query.tqdb_common.tdb_instance = instance;
1623
1624 /*
1625 * Perform the database query. If no entry is found, then
1626 * return failure, else continue.
1627 */
1628 entry = (tavor_umap_db_entry_t *)avl_find(
1629 &tavor_userland_rsrc_db.tdl_umapdb_avl, &query, &where);
1630 if (entry == NULL) {
1631 TAVOR_TNF_EXIT(tavor_umap_db_find_nolock);
1632 return (DDI_FAILURE);
1633 }
1634 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry))
1635
1636 /*
1637 * If the flags argument specifies that the entry should
1638 * be removed if found, then call avl_remove() to remove
1639 * the entry from the database.
1640 */
1641 if (flags & TAVOR_UMAP_DB_REMOVE) {
1642
1643 avl_remove(&tavor_userland_rsrc_db.tdl_umapdb_avl, entry);
1644
1645 /*
1646 * The database entry is returned with the expectation
1647 * that the caller will use tavor_umap_db_free() to
1648 * free the entry's memory. ASSERT that this is non-NULL.
1649 * NULL pointer should never be passed for the
1650 * TAVOR_UMAP_DB_REMOVE case.
1651 */
1652 ASSERT(umapdb != NULL);
1653 }
1654
1681 tavor_rsrc_t *rsrcp;
1682 tavor_mrhdl_t mr;
1683 uint64_t value;
1684 uint_t instance;
1685 int status;
1686 void (*mr_callback)(void *, void *);
1687 void *mr_cbarg1, *mr_cbarg2;
1688
1689 TAVOR_TNF_ENTER(tavor_umap_umemlock_cb);
1690
1691 /*
1692 * If this was userland memory, then we need to remove its entry
1693 * from the "userland resources database". Note: We use the
1694 * TAVOR_UMAP_DB_IGNORE_INSTANCE flag here because we don't know
1695 * which instance was used when the entry was added (but we want
1696 * to know after the entry is found using the other search criteria).
1697 */
1698 status = tavor_umap_db_find(0, (uint64_t)(uintptr_t)umem_cookie,
1699 MLNX_UMAP_MRMEM_RSRC, &value, (TAVOR_UMAP_DB_REMOVE |
1700 TAVOR_UMAP_DB_IGNORE_INSTANCE), &umapdb);
1701 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1702 if (status == DDI_SUCCESS) {
1703 instance = umapdb->tdbe_common.tdb_instance;
1704 state = ddi_get_soft_state(tavor_statep, instance);
1705 if (state == NULL) {
1706 cmn_err(CE_WARN, "Unable to match Tavor instance\n");
1707 TNF_PROBE_0(tavor_umap_umemlock_cb_gss_fail,
1708 TAVOR_TNF_ERROR, "");
1709 TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1710 return;
1711 }
1712
1713 /* Free the database entry */
1714 tavor_umap_db_free(umapdb);
1715
1716 /* Use "value" to convert to an MR handle */
1717 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
1718 mr = (tavor_mrhdl_t)rsrcp->tr_addr;
1719
1720 /*
1721 * If a callback has been provided, call it first. This
1750 TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1751 }
1752 }
1753
1754 TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1755 }
1756
1757
1758 /*
1759 * tavor_umap_db_compare()
1760 * Context: Can be called from user or kernel context.
1761 */
1762 static int
1763 tavor_umap_db_compare(const void *q, const void *e)
1764 {
1765 tavor_umap_db_common_t *entry_common, *query_common;
1766 uint_t query_flags;
1767
1768 TAVOR_TNF_ENTER(tavor_umap_db_compare);
1769
1770 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*((tavor_umap_db_query_t *)q)))
1771
1772 entry_common = &((tavor_umap_db_entry_t *)e)->tdbe_common;
1773 query_common = &((tavor_umap_db_query_t *)q)->tqdb_common;
1774 query_flags = ((tavor_umap_db_query_t *)q)->tqdb_flags;
1775
1776 /*
1777 * The first comparison is done on the "key" value in "query"
1778 * and "entry". If they are not equal, then the appropriate
1779 * search direction is returned. Else, we continue by
1780 * comparing "type".
1781 */
1782 if (query_common->tdb_key < entry_common->tdb_key) {
1783 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1784 return (-1);
1785 } else if (query_common->tdb_key > entry_common->tdb_key) {
1786 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1787 return (+1);
1788 }
1789
1790 /*
1791 * If the search reaches this point, then "query" and "entry"
|
437
438 /*
439 * The bottom bits of "offset" are undefined (number depends on
440 * system PAGESIZE). Shifting these off leaves us with a "key".
441 * The "key" is actually a combination of both a real key value
442 * (for the purpose of database lookup) and a "type" value. Although
443 * we are not going to do any database lookup per se, we do want
444 * to extract the "key" and the "type" (to enable faster lookup of
445 * the appropriate CQ or QP handle).
446 */
447 key = off >> PAGESHIFT;
448 type = key & MLNX_UMAP_RSRC_TYPE_MASK;
449 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
450
451 /*
452 * Allocate an entry to track the mapping and unmapping (specifically,
453 * partial unmapping) of this resource.
454 */
455 dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
456 sizeof (tavor_devmap_track_t), KM_SLEEP);
457 dvm_track->tdt_offset = off;
458 dvm_track->tdt_state = state;
459 dvm_track->tdt_refcnt = 1;
460 mutex_init(&dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
461 DDI_INTR_PRI(state->ts_intrmsi_pri));
462
463 /*
464 * Depending of the type of resource that has been mapped out, we
465 * need to update the QP or CQ handle to reflect that it has, in
466 * fact, been mapped. This allows the driver code which frees a QP
467 * or a CQ to know whether it is appropriate to do a
468 * devmap_devmem_remap() to invalidate the userland mapping for the
469 * corresponding queue's memory.
470 */
471 if (type == MLNX_UMAP_CQMEM_RSRC) {
472
473 /* Use "key" (CQ number) to do fast lookup of CQ handle */
474 cq = tavor_cqhdl_from_cqnum(state, key);
475
476 /*
550 * tavor_devmap_umem_dup()
551 * Context: Can be called from kernel context.
552 */
553 /* ARGSUSED */
554 static int
555 tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp, devmap_cookie_t new_dhp,
556 void **new_pvtp)
557 {
558 tavor_state_t *state;
559 tavor_devmap_track_t *dvm_track, *new_dvm_track;
560 uint_t maxprot;
561 int status;
562
563 TAVOR_TNF_ENTER(tavor_devmap_umem_dup);
564
565 /*
566 * Extract the Tavor softstate pointer from "Tavor devmap tracking
567 * structure" (in "pvtp").
568 */
569 dvm_track = (tavor_devmap_track_t *)pvtp;
570 state = dvm_track->tdt_state;
571
572 /*
573 * Since this devmap_dup() entry point is generally called
574 * when a process does fork(2), it is incumbent upon the driver
575 * to insure that the child does not inherit a valid copy of
576 * the parent's QP or CQ resource. This is accomplished by using
577 * devmap_devmem_remap() to invalidate the child's mapping to the
578 * kernel memory.
579 */
580 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
581 status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
582 dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
583 if (status != DDI_SUCCESS) {
584 TAVOR_WARNING(state, "failed in tavor_devmap_umem_dup()");
585 TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
586 return (status);
587 }
588
589 /*
590 * Allocate a new entry to track the subsequent unmapping
591 * (specifically, all partial unmappings) of the child's newly
592 * invalidated resource. Note: Setting the "tdt_size" field to
593 * zero here is an indication to the devmap_unmap() entry point
594 * that this mapping is invalid, and that its subsequent unmapping
595 * should not affect any of the parent's CQ or QP resources.
596 */
597 new_dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
598 sizeof (tavor_devmap_track_t), KM_SLEEP);
599 new_dvm_track->tdt_offset = 0;
600 new_dvm_track->tdt_state = state;
601 new_dvm_track->tdt_refcnt = 1;
602 new_dvm_track->tdt_size = 0;
603 mutex_init(&new_dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
604 DDI_INTR_PRI(state->ts_intrmsi_pri));
605 *new_pvtp = new_dvm_track;
606
607 TAVOR_TNF_EXIT(tavor_devmap_umem_dup);
608 return (DDI_SUCCESS);
609 }
610
611
612 /*
613 * tavor_devmap_umem_unmap()
614 * Context: Can be called from kernel context.
615 */
616 /* ARGSUSED */
617 static void
618 tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
620 devmap_cookie_t new_dhp2, void **pvtp2)
621 {
622 tavor_state_t *state;
623 tavor_rsrc_t *rsrcp;
624 tavor_devmap_track_t *dvm_track;
625 tavor_cqhdl_t cq;
626 tavor_qphdl_t qp;
627 tavor_srqhdl_t srq;
628 uint64_t key, value;
629 uint_t type;
630 uint_t size;
631 int status;
632
633 TAVOR_TNF_ENTER(tavor_devmap_umem_unmap);
634
635 /*
636 * Extract the Tavor softstate pointer from "Tavor devmap tracking
637 * structure" (in "pvtp").
638 */
639 dvm_track = (tavor_devmap_track_t *)pvtp;
640 state = dvm_track->tdt_state;
641
642 /*
643 * Extract the "offset" from the "Tavor devmap tracking structure".
644 * Note: The input argument "off" is ignored here because the
645 * Tavor mapping interfaces define a very specific meaning to
646 * each "logical offset". Also extract the "key" and "type" encoded
647 * in the logical offset.
648 */
649 key = dvm_track->tdt_offset >> PAGESHIFT;
650 type = key & MLNX_UMAP_RSRC_TYPE_MASK;
651 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
652
653 /*
654 * Extract the "size" of the mapping. If this size is determined
655 * to be zero, then it is an indication of a previously invalidated
656 * mapping, and no CQ or QP resources should be affected.
657 */
658 size = dvm_track->tdt_size;
659
821 /* Get Tavor softstate structure from instance */
822 instance = TAVOR_DEV_INSTANCE(dev);
823 state = ddi_get_soft_state(tavor_statep, instance);
824 if (state == NULL) {
825 TNF_PROBE_0(tavor_devmap_devmem_map_gss_fail, TAVOR_TNF_ERROR,
826 "");
827 TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
828 return (ENXIO);
829 }
830
831 /*
832 * Allocate an entry to track the mapping and unmapping of this
833 * resource. Note: We don't need to initialize the "refcnt" or
834 * "offset" fields here, nor do we need to initialize the mutex
835 * used with the "refcnt". Since UAR pages are single pages, they
836 * are not subject to "partial" unmappings. This makes these other
837 * fields unnecessary.
838 */
839 dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
840 sizeof (tavor_devmap_track_t), KM_SLEEP);
841 dvm_track->tdt_state = state;
842 dvm_track->tdt_size = PAGESIZE;
843
844 /*
845 * Pass the private "Tavor devmap tracking structure" back. This
846 * pointer will be returned in a subsequent "unmap" callback.
847 */
848 *pvtp = dvm_track;
849
850 TAVOR_TNF_EXIT(tavor_devmap_devmem_map);
851 return (DDI_SUCCESS);
852 }
853
854
855 /*
856 * tavor_devmap_devmem_dup()
857 * Context: Can be called from kernel context.
858 */
859 /* ARGSUSED */
860 static int
864 tavor_state_t *state;
865 tavor_devmap_track_t *dvm_track;
866 uint_t maxprot;
867 int status;
868
869 TAVOR_TNF_ENTER(tavor_devmap_devmem_dup);
870
871 /*
872 * Extract the Tavor softstate pointer from "Tavor devmap tracking
873 * structure" (in "pvtp"). Note: If the tracking structure is NULL
874 * here, it means that the mapping corresponds to an invalid mapping.
875 * In this case, it can be safely ignored ("new_pvtp" set to NULL).
876 */
877 dvm_track = (tavor_devmap_track_t *)pvtp;
878 if (dvm_track == NULL) {
879 *new_pvtp = NULL;
880 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
881 return (DDI_SUCCESS);
882 }
883
884 state = dvm_track->tdt_state;
885
886 /*
887 * Since this devmap_dup() entry point is generally called
888 * when a process does fork(2), it is incumbent upon the driver
889 * to insure that the child does not inherit a valid copy of
890 * the parent's resource. This is accomplished by using
891 * devmap_devmem_remap() to invalidate the child's mapping to the
892 * kernel memory.
893 */
894 maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
895 status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
896 dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
897 if (status != DDI_SUCCESS) {
898 TAVOR_WARNING(state, "failed in tavor_devmap_devmem_dup()");
899 TAVOR_TNF_EXIT(tavor_devmap_devmem_dup);
900 return (status);
901 }
902
903 /*
919 * Context: Can be called from kernel context.
920 */
921 /* ARGSUSED */
922 static void
923 tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
924 size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
925 devmap_cookie_t new_dhp2, void **pvtp2)
926 {
927 tavor_devmap_track_t *dvm_track;
928
929 TAVOR_TNF_ENTER(tavor_devmap_devmem_unmap);
930
931 /*
932 * Free up the "Tavor devmap tracking structure" (in "pvtp").
933 * There cannot be "partial" unmappings here because all UAR pages
934 * are single pages. Note: If the tracking structure is NULL here,
935 * it means that the mapping corresponds to an invalid mapping. In
936 * this case, it can be safely ignored.
937 */
938 dvm_track = (tavor_devmap_track_t *)pvtp;
939 if (dvm_track == NULL) {
940 TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
941 return;
942 }
943
944 kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
945 TAVOR_TNF_EXIT(tavor_devmap_devmem_unmap);
946 }
947
948
949 /*
950 * tavor_umap_ci_data_in()
951 * Context: Can be called from user or kernel context.
952 */
953 /* ARGSUSED */
954 ibt_status_t
955 tavor_umap_ci_data_in(tavor_state_t *state, ibt_ci_data_flags_t flags,
956 ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
957 {
958 int status;
1025 TAVOR_TNF_ERROR, "");
1026 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1027 return (IBT_NOT_SUPPORTED);
1028 }
1029
1030 /* Check for valid MR handle pointer */
1031 if (mr == NULL) {
1032 TNF_PROBE_0(tavor_umap_mr_data_in_invmrhdl_fail,
1033 TAVOR_TNF_ERROR, "");
1034 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1035 return (IBT_MR_HDL_INVALID);
1036 }
1037
1038 /* Check for valid MR input structure size */
1039 if (data_sz < sizeof (ibt_mr_data_in_t)) {
1040 TNF_PROBE_0(tavor_umap_mr_data_in_invdatasz_fail,
1041 TAVOR_TNF_ERROR, "");
1042 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1043 return (IBT_INSUFF_RESOURCE);
1044 }
1045
1046 /*
1047 * Ensure that the MR corresponds to userland memory and that it is
1048 * a currently valid memory region as well.
1049 */
1050 mutex_enter(&mr->mr_lock);
1051 if ((mr->mr_is_umem == 0) || (mr->mr_umemcookie == NULL)) {
1052 mutex_exit(&mr->mr_lock);
1053 TNF_PROBE_0(tavor_umap_mr_data_in_invumem_fail,
1054 TAVOR_TNF_ERROR, "");
1055 TAVOR_TNF_EXIT(tavor_umap_mr_data_in);
1056 return (IBT_MR_HDL_INVALID);
1057 }
1058
1059 /*
1060 * If it has passed all the above checks, then extract the callback
1061 * function and argument from the input structure. Copy them into
1062 * the MR handle. This function will be called only if the memory
1063 * corresponding to the MR handle gets a umem_lockmemory() callback.
1064 */
1174 tavor_umap_cq_data_out(tavor_cqhdl_t cq, mlnx_umap_cq_data_out_t *data,
1175 size_t data_sz)
1176 {
1177 TAVOR_TNF_ENTER(tavor_umap_cq_data_out);
1178
1179 /* Check for valid CQ handle pointer */
1180 if (cq == NULL) {
1181 TNF_PROBE_0(tavor_umap_cq_data_out_invcqhdl_fail,
1182 TAVOR_TNF_ERROR, "");
1183 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1184 return (IBT_CQ_HDL_INVALID);
1185 }
1186
1187 /* Check for valid CQ mapping structure size */
1188 if (data_sz < sizeof (mlnx_umap_cq_data_out_t)) {
1189 TNF_PROBE_0(tavor_umap_cq_data_out_invdatasz_fail,
1190 TAVOR_TNF_ERROR, "");
1191 TAVOR_TNF_EXIT(tavor_umap_cq_data_out);
1192 return (IBT_INSUFF_RESOURCE);
1193 }
1194
1195 /*
1196 * If it has passed all the above checks, then fill in all the useful
1197 * mapping information (including the mapping offset that will be
1198 * passed back to the devmap() interface during a subsequent mmap()
1199 * call.
1200 *
1201 * The "offset" for CQ mmap()'s looks like this:
1202 * +----------------------------------------+--------+--------------+
1203 * | CQ Number | 0x33 | Reserved (0) |
1204 * +----------------------------------------+--------+--------------+
1205 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits
1206 *
1207 * This returns information about the mapping offset, the length of
1208 * the CQ memory, the CQ number (for use in later CQ doorbells), the
1209 * number of CQEs the CQ memory can hold, and the size of each CQE.
1210 */
1211 data->mcq_rev = MLNX_UMAP_IF_VERSION;
1212 data->mcq_mapoffset = ((((uint64_t)cq->cq_cqnum <<
1213 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_CQMEM_RSRC) << PAGESHIFT);
1229 tavor_umap_qp_data_out(tavor_qphdl_t qp, mlnx_umap_qp_data_out_t *data,
1230 size_t data_sz)
1231 {
1232 TAVOR_TNF_ENTER(tavor_umap_qp_data_out);
1233
1234 /* Check for valid QP handle pointer */
1235 if (qp == NULL) {
1236 TNF_PROBE_0(tavor_umap_qp_data_out_invqphdl_fail,
1237 TAVOR_TNF_ERROR, "");
1238 TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1239 return (IBT_QP_HDL_INVALID);
1240 }
1241
1242 /* Check for valid QP mapping structure size */
1243 if (data_sz < sizeof (mlnx_umap_qp_data_out_t)) {
1244 TNF_PROBE_0(tavor_umap_qp_data_out_invdatasz_fail,
1245 TAVOR_TNF_ERROR, "");
1246 TAVOR_TNF_EXIT(tavor_umap_qp_data_out);
1247 return (IBT_INSUFF_RESOURCE);
1248 }
1249
1250 /*
1251 * If it has passed all the checks, then fill in all the useful
1252 * mapping information (including the mapping offset that will be
1253 * passed back to the devmap() interface during a subsequent mmap()
1254 * call.
1255 *
1256 * The "offset" for QP mmap()'s looks like this:
1257 * +----------------------------------------+--------+--------------+
1258 * | QP Number | 0x44 | Reserved (0) |
1259 * +----------------------------------------+--------+--------------+
1260 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits
1261 *
1262 * This returns information about the mapping offset, the length of
1263 * the QP memory, and the QP number (for use in later send and recv
1264 * doorbells). It also returns the following information for both
1265 * the receive work queue and the send work queue, respectively: the
1266 * offset (from the base mapped address) of the start of the given
1267 * work queue, the 64-bit IB virtual address that corresponds to
1268 * the base mapped address (needed for posting WQEs though the
1313 tavor_umap_srq_data_out(tavor_srqhdl_t srq, mlnx_umap_srq_data_out_t *data,
1314 size_t data_sz)
1315 {
1316 TAVOR_TNF_ENTER(tavor_umap_srq_data_out);
1317
1318 /* Check for valid SRQ handle pointer */
1319 if (srq == NULL) {
1320 TNF_PROBE_0(tavor_umap_srq_data_out_invsrqhdl_fail,
1321 TAVOR_TNF_ERROR, "");
1322 TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1323 return (IBT_SRQ_HDL_INVALID);
1324 }
1325
1326 /* Check for valid SRQ mapping structure size */
1327 if (data_sz < sizeof (mlnx_umap_srq_data_out_t)) {
1328 TNF_PROBE_0(tavor_umap_srq_data_out_invdatasz_fail,
1329 TAVOR_TNF_ERROR, "");
1330 TAVOR_TNF_EXIT(tavor_umap_srq_data_out);
1331 return (IBT_INSUFF_RESOURCE);
1332 }
1333
1334 /*
1335 * If it has passed all the checks, then fill in all the useful
1336 * mapping information (including the mapping offset that will be
1337 * passed back to the devmap() interface during a subsequent mmap()
1338 * call.
1339 *
1340 * The "offset" for SRQ mmap()'s looks like this:
1341 * +----------------------------------------+--------+--------------+
1342 * | SRQ Number | 0x66 | Reserved (0) |
1343 * +----------------------------------------+--------+--------------+
1344 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits
1345 *
1346 * This returns information about the mapping offset, the length of the
1347 * SRQ memory, and the SRQ number (for use in later send and recv
1348 * doorbells). It also returns the following information for the
1349 * shared receive queue: the offset (from the base mapped address) of
1350 * the start of the given work queue, the 64-bit IB virtual address
1351 * that corresponds to the base mapped address (needed for posting WQEs
1352 * though the QP doorbells), the number of WQEs the given work queue
1375 tavor_umap_pd_data_out(tavor_pdhdl_t pd, mlnx_umap_pd_data_out_t *data,
1376 size_t data_sz)
1377 {
1378 TAVOR_TNF_ENTER(tavor_umap_pd_data_out);
1379
1380 /* Check for valid PD handle pointer */
1381 if (pd == NULL) {
1382 TNF_PROBE_0(tavor_umap_pd_data_out_invpdhdl_fail,
1383 TAVOR_TNF_ERROR, "");
1384 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1385 return (IBT_PD_HDL_INVALID);
1386 }
1387
1388 /* Check for valid PD mapping structure size */
1389 if (data_sz < sizeof (mlnx_umap_pd_data_out_t)) {
1390 TNF_PROBE_0(tavor_umap_pd_data_out_invdatasz_fail,
1391 TAVOR_TNF_ERROR, "");
1392 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1393 return (IBT_INSUFF_RESOURCE);
1394 }
1395
1396 /*
1397 * If it has passed all the checks, then fill the PD table index
1398 * (the PD table allocated index for the PD pd_pdnum)
1399 */
1400 data->mpd_rev = MLNX_UMAP_IF_VERSION;
1401 data->mpd_pdnum = pd->pd_pdnum;
1402
1403 TAVOR_TNF_EXIT(tavor_umap_pd_data_out);
1404 return (DDI_SUCCESS);
1405 }
1406
1407 /*
1408 * tavor_umap_db_init()
1409 * Context: Only called from attach() path context
1410 */
1411 void
1412 tavor_umap_db_init(void)
1413 {
1414 TAVOR_TNF_ENTER(tavor_umap_db_init);
1457
1458
1459 /*
1460 * tavor_umap_db_alloc()
1461 * Context: Can be called from user or kernel context.
1462 */
1463 tavor_umap_db_entry_t *
1464 tavor_umap_db_alloc(uint_t instance, uint64_t key, uint_t type, uint64_t value)
1465 {
1466 tavor_umap_db_entry_t *umapdb;
1467
1468 TAVOR_TNF_ENTER(tavor_umap_db_alloc);
1469
1470 /* Allocate an entry to add to the "userland resources database" */
1471 umapdb = kmem_zalloc(sizeof (tavor_umap_db_entry_t), KM_NOSLEEP);
1472 if (umapdb == NULL) {
1473 TNF_PROBE_0(tavor_umap_db_alloc_kmz_fail, TAVOR_TNF_ERROR, "");
1474 TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1475 return (NULL);
1476 }
1477
1478 /* Fill in the fields in the database entry */
1479 umapdb->tdbe_common.tdb_instance = instance;
1480 umapdb->tdbe_common.tdb_type = type;
1481 umapdb->tdbe_common.tdb_key = key;
1482 umapdb->tdbe_common.tdb_value = value;
1483
1484 TAVOR_TNF_EXIT(tavor_umap_db_alloc);
1485 return (umapdb);
1486 }
1487
1488
1489 /*
1490 * tavor_umap_db_free()
1491 * Context: Can be called from user or kernel context.
1492 */
1493 void
1494 tavor_umap_db_free(tavor_umap_db_entry_t *umapdb)
1495 {
1496 TAVOR_TNF_ENTER(tavor_umap_db_free);
1516 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1517
1518 TAVOR_TNF_EXIT(tavor_umap_db_add);
1519 }
1520
1521
1522 /*
1523 * tavor_umap_db_add_nolock()
1524 * Context: Can be called from user or kernel context.
1525 */
1526 void
1527 tavor_umap_db_add_nolock(tavor_umap_db_entry_t *umapdb)
1528 {
1529 tavor_umap_db_query_t query;
1530 avl_index_t where;
1531
1532 TAVOR_TNF_ENTER(tavor_umap_db_add_nolock);
1533
1534 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1535
1536 /*
1537 * Copy the common portion of the "to-be-added" database entry
1538 * into the "tavor_umap_db_query_t" structure. We use this structure
1539 * (with no flags set) to find the appropriate location in the
1540 * "userland resources database" for the new entry to be added.
1541 *
1542 * Note: we expect that this entry should not be found in the
1543 * database (unless something bad has happened).
1544 */
1545 query.tqdb_common = umapdb->tdbe_common;
1546 query.tqdb_flags = 0;
1547 (void) avl_find(&tavor_userland_rsrc_db.tdl_umapdb_avl, &query,
1548 &where);
1549
1550 /*
1551 * Now, using the "where" field from the avl_find() operation
1552 * above, we will insert the new database entry ("umapdb").
1553 */
1554 avl_insert(&tavor_userland_rsrc_db.tdl_umapdb_avl, umapdb,
1555 where);
1599 /*
1600 * Fill in key, type, instance, and flags values of the
1601 * tavor_umap_db_query_t in preparation for the database
1602 * lookup.
1603 */
1604 query.tqdb_flags = flags;
1605 query.tqdb_common.tdb_key = key;
1606 query.tqdb_common.tdb_type = type;
1607 query.tqdb_common.tdb_instance = instance;
1608
1609 /*
1610 * Perform the database query. If no entry is found, then
1611 * return failure, else continue.
1612 */
1613 entry = (tavor_umap_db_entry_t *)avl_find(
1614 &tavor_userland_rsrc_db.tdl_umapdb_avl, &query, &where);
1615 if (entry == NULL) {
1616 TAVOR_TNF_EXIT(tavor_umap_db_find_nolock);
1617 return (DDI_FAILURE);
1618 }
1619
1620 /*
1621 * If the flags argument specifies that the entry should
1622 * be removed if found, then call avl_remove() to remove
1623 * the entry from the database.
1624 */
1625 if (flags & TAVOR_UMAP_DB_REMOVE) {
1626
1627 avl_remove(&tavor_userland_rsrc_db.tdl_umapdb_avl, entry);
1628
1629 /*
1630 * The database entry is returned with the expectation
1631 * that the caller will use tavor_umap_db_free() to
1632 * free the entry's memory. ASSERT that this is non-NULL.
1633 * NULL pointer should never be passed for the
1634 * TAVOR_UMAP_DB_REMOVE case.
1635 */
1636 ASSERT(umapdb != NULL);
1637 }
1638
1665 tavor_rsrc_t *rsrcp;
1666 tavor_mrhdl_t mr;
1667 uint64_t value;
1668 uint_t instance;
1669 int status;
1670 void (*mr_callback)(void *, void *);
1671 void *mr_cbarg1, *mr_cbarg2;
1672
1673 TAVOR_TNF_ENTER(tavor_umap_umemlock_cb);
1674
1675 /*
1676 * If this was userland memory, then we need to remove its entry
1677 * from the "userland resources database". Note: We use the
1678 * TAVOR_UMAP_DB_IGNORE_INSTANCE flag here because we don't know
1679 * which instance was used when the entry was added (but we want
1680 * to know after the entry is found using the other search criteria).
1681 */
1682 status = tavor_umap_db_find(0, (uint64_t)(uintptr_t)umem_cookie,
1683 MLNX_UMAP_MRMEM_RSRC, &value, (TAVOR_UMAP_DB_REMOVE |
1684 TAVOR_UMAP_DB_IGNORE_INSTANCE), &umapdb);
1685 if (status == DDI_SUCCESS) {
1686 instance = umapdb->tdbe_common.tdb_instance;
1687 state = ddi_get_soft_state(tavor_statep, instance);
1688 if (state == NULL) {
1689 cmn_err(CE_WARN, "Unable to match Tavor instance\n");
1690 TNF_PROBE_0(tavor_umap_umemlock_cb_gss_fail,
1691 TAVOR_TNF_ERROR, "");
1692 TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1693 return;
1694 }
1695
1696 /* Free the database entry */
1697 tavor_umap_db_free(umapdb);
1698
1699 /* Use "value" to convert to an MR handle */
1700 rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
1701 mr = (tavor_mrhdl_t)rsrcp->tr_addr;
1702
1703 /*
1704 * If a callback has been provided, call it first. This
1733 TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1734 }
1735 }
1736
1737 TAVOR_TNF_EXIT(tavor_umap_umemlock_cb);
1738 }
1739
1740
1741 /*
1742 * tavor_umap_db_compare()
1743 * Context: Can be called from user or kernel context.
1744 */
1745 static int
1746 tavor_umap_db_compare(const void *q, const void *e)
1747 {
1748 tavor_umap_db_common_t *entry_common, *query_common;
1749 uint_t query_flags;
1750
1751 TAVOR_TNF_ENTER(tavor_umap_db_compare);
1752
1753 entry_common = &((tavor_umap_db_entry_t *)e)->tdbe_common;
1754 query_common = &((tavor_umap_db_query_t *)q)->tqdb_common;
1755 query_flags = ((tavor_umap_db_query_t *)q)->tqdb_flags;
1756
1757 /*
1758 * The first comparison is done on the "key" value in "query"
1759 * and "entry". If they are not equal, then the appropriate
1760 * search direction is returned. Else, we continue by
1761 * comparing "type".
1762 */
1763 if (query_common->tdb_key < entry_common->tdb_key) {
1764 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1765 return (-1);
1766 } else if (query_common->tdb_key > entry_common->tdb_key) {
1767 TAVOR_TNF_EXIT(tavor_umap_db_compare);
1768 return (+1);
1769 }
1770
1771 /*
1772 * If the search reaches this point, then "query" and "entry"
|