602 int len;
603 va_list ap;
604 char part_name[MAXNAMELEN];
605 datalink_id_t linkid = state->id_plinkid;
606
607 hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, state->id_dip,
608 0, "hca-guid", 0);
609 (void) dls_mgmt_get_linkinfo(linkid, part_name, NULL, NULL, NULL);
610 len = snprintf(ibd_print_buf, sizeof (ibd_print_buf),
611 "%s%d: HCA GUID %016llx port %d PKEY %02x link %s ",
612 ddi_driver_name(state->id_dip), ddi_get_instance(state->id_dip),
613 (u_longlong_t)hca_guid, state->id_port, state->id_pkey,
614 part_name);
615 va_start(ap, fmt);
616 (void) vsnprintf(ibd_print_buf + len, sizeof (ibd_print_buf) - len,
617 fmt, ap);
618 cmn_err(CE_NOTE, "!%s", ibd_print_buf);
619 va_end(ap);
620 }
621
622 /*
623 * Warlock directives
624 */
625
626 /*
627 * id_lso_lock
628 *
629 * state->id_lso->bkt_nfree may be accessed without a lock to
630 * determine the threshold at which we have to ask the nw layer
631 * to resume transmission (see ibd_resume_transmission()).
632 */
633 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_lso_lock,
634 ibd_state_t::id_lso))
635 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibd_state_t::id_lso))
636 _NOTE(SCHEME_PROTECTS_DATA("init", ibd_state_t::id_lso_policy))
637 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibd_lsobkt_t::bkt_nfree))
638
639 /*
640 * id_scq_poll_lock
641 */
642 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_scq_poll_lock,
643 ibd_state_t::id_scq_poll_busy))
644
645 /*
646 * id_txpost_lock
647 */
648 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_txpost_lock,
649 ibd_state_t::id_tx_head))
650 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_txpost_lock,
651 ibd_state_t::id_tx_busy))
652
653 /*
654 * id_acache_req_lock
655 */
656 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_acache_req_lock,
657 ibd_state_t::id_acache_req_cv))
658 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_acache_req_lock,
659 ibd_state_t::id_req_list))
660 _NOTE(SCHEME_PROTECTS_DATA("atomic",
661 ibd_acache_s::ac_ref))
662
663 /*
664 * id_ac_mutex
665 *
666 * This mutex is actually supposed to protect id_ah_op as well,
667 * but this path of the code isn't clean (see update of id_ah_op
668 * in ibd_async_acache(), immediately after the call to
669 * ibd_async_mcache()). For now, we'll skip this check by
670 * declaring that id_ah_op is protected by some internal scheme
671 * that warlock isn't aware of.
672 */
673 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_ac_mutex,
674 ibd_state_t::id_ah_active))
675 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_ac_mutex,
676 ibd_state_t::id_ah_free))
677 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_ac_mutex,
678 ibd_state_t::id_ah_addr))
679 _NOTE(SCHEME_PROTECTS_DATA("ac mutex should protect this",
680 ibd_state_t::id_ah_op))
681 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_ac_mutex,
682 ibd_state_t::id_ah_error))
683 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_ac_mutex,
684 ibd_state_t::id_ac_hot_ace))
685 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibd_state_t::id_ah_error))
686
687 /*
688 * id_mc_mutex
689 */
690 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_mc_mutex,
691 ibd_state_t::id_mc_full))
692 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_mc_mutex,
693 ibd_state_t::id_mc_non))
694
695 /*
696 * id_trap_lock
697 */
698 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_trap_lock,
699 ibd_state_t::id_trap_cv))
700 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_trap_lock,
701 ibd_state_t::id_trap_stop))
702 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_trap_lock,
703 ibd_state_t::id_trap_inprog))
704
705 /*
706 * id_prom_op
707 */
708 _NOTE(SCHEME_PROTECTS_DATA("only by async thread",
709 ibd_state_t::id_prom_op))
710
711 /*
712 * id_sched_lock
713 */
714 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_sched_lock,
715 ibd_state_t::id_sched_needed))
716
717 /*
718 * id_link_mutex
719 */
720 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_link_mutex,
721 ibd_state_t::id_link_state))
722 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibd_state_t::id_link_state))
723 _NOTE(SCHEME_PROTECTS_DATA("only async thr and ibd_m_start",
724 ibd_state_t::id_link_speed))
725 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibd_state_t::id_sgid))
726
727 /*
728 * id_tx_list.dl_mutex
729 */
730 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_tx_list.dl_mutex,
731 ibd_state_t::id_tx_list.dl_head))
732 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_tx_list.dl_mutex,
733 ibd_state_t::id_tx_list.dl_pending_sends))
734 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_tx_list.dl_mutex,
735 ibd_state_t::id_tx_list.dl_cnt))
736
737 /*
738 * id_rx_list.dl_mutex
739 */
740 _NOTE(SCHEME_PROTECTS_DATA("atomic or dl mutex or single thr",
741 ibd_state_t::id_rx_list.dl_bufs_outstanding))
742 _NOTE(SCHEME_PROTECTS_DATA("atomic or dl mutex or single thr",
743 ibd_state_t::id_rx_list.dl_cnt))
744
745 /*
746 * rc_timeout_lock
747 */
748 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::rc_timeout_lock,
749 ibd_state_t::rc_timeout_start))
750 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::rc_timeout_lock,
751 ibd_state_t::rc_timeout))
752
753
754 /*
755 * Items protected by atomic updates
756 */
757 _NOTE(SCHEME_PROTECTS_DATA("atomic update only",
758 ibd_state_s::id_brd_rcv
759 ibd_state_s::id_brd_xmt
760 ibd_state_s::id_multi_rcv
761 ibd_state_s::id_multi_xmt
762 ibd_state_s::id_num_intrs
763 ibd_state_s::id_rcv_bytes
764 ibd_state_s::id_rcv_pkt
765 ibd_state_s::id_rx_post_queue_index
766 ibd_state_s::id_tx_short
767 ibd_state_s::id_xmt_bytes
768 ibd_state_s::id_xmt_pkt
769 ibd_state_s::rc_rcv_trans_byte
770 ibd_state_s::rc_rcv_trans_pkt
771 ibd_state_s::rc_rcv_copy_byte
772 ibd_state_s::rc_rcv_copy_pkt
773 ibd_state_s::rc_xmt_bytes
774 ibd_state_s::rc_xmt_small_pkt
775 ibd_state_s::rc_xmt_fragmented_pkt
776 ibd_state_s::rc_xmt_map_fail_pkt
777 ibd_state_s::rc_xmt_map_succ_pkt
778 ibd_rc_chan_s::rcq_invoking))
779
780 /*
781 * Non-mutex protection schemes for data elements. Almost all of
782 * these are non-shared items.
783 */
784 _NOTE(SCHEME_PROTECTS_DATA("unshared or single-threaded",
785 callb_cpr
786 ib_gid_s
787 ib_header_info
788 ibd_acache_rq
789 ibd_acache_s::ac_mce
790 ibd_acache_s::ac_chan
791 ibd_mcache::mc_fullreap
792 ibd_mcache::mc_jstate
793 ibd_mcache::mc_req
794 ibd_rwqe_s
795 ibd_swqe_s
796 ibd_wqe_s
797 ibt_wr_ds_s::ds_va
798 ibt_wr_lso_s
799 ipoib_mac::ipoib_qpn
800 mac_capab_lso_s
801 msgb::b_next
802 msgb::b_cont
803 msgb::b_rptr
804 msgb::b_wptr
805 ibd_state_s::id_bgroup_created
806 ibd_state_s::id_mac_state
807 ibd_state_s::id_mtu
808 ibd_state_s::id_ud_num_rwqe
809 ibd_state_s::id_ud_num_swqe
810 ibd_state_s::id_qpnum
811 ibd_state_s::id_rcq_hdl
812 ibd_state_s::id_rx_buf_sz
813 ibd_state_s::id_rx_bufs
814 ibd_state_s::id_rx_mr_hdl
815 ibd_state_s::id_rx_wqes
816 ibd_state_s::id_rxwcs
817 ibd_state_s::id_rxwcs_size
818 ibd_state_s::id_rx_nqueues
819 ibd_state_s::id_rx_queues
820 ibd_state_s::id_scope
821 ibd_state_s::id_scq_hdl
822 ibd_state_s::id_tx_buf_sz
823 ibd_state_s::id_tx_bufs
824 ibd_state_s::id_tx_mr_hdl
825 ibd_state_s::id_tx_rel_list.dl_cnt
826 ibd_state_s::id_tx_wqes
827 ibd_state_s::id_txwcs
828 ibd_state_s::id_txwcs_size
829 ibd_state_s::rc_listen_hdl
830 ibd_state_s::rc_listen_hdl_OFED_interop
831 ibd_state_s::rc_srq_size
832 ibd_state_s::rc_srq_rwqes
833 ibd_state_s::rc_srq_rx_bufs
834 ibd_state_s::rc_srq_rx_mr_hdl
835 ibd_state_s::rc_tx_largebuf_desc_base
836 ibd_state_s::rc_tx_mr_bufs
837 ibd_state_s::rc_tx_mr_hdl
838 ipha_s
839 icmph_s
840 ibt_path_info_s::pi_sid
841 ibd_rc_chan_s::ace
842 ibd_rc_chan_s::chan_hdl
843 ibd_rc_chan_s::state
844 ibd_rc_chan_s::chan_state
845 ibd_rc_chan_s::is_tx_chan
846 ibd_rc_chan_s::rcq_hdl
847 ibd_rc_chan_s::rcq_size
848 ibd_rc_chan_s::scq_hdl
849 ibd_rc_chan_s::scq_size
850 ibd_rc_chan_s::rx_bufs
851 ibd_rc_chan_s::rx_mr_hdl
852 ibd_rc_chan_s::rx_rwqes
853 ibd_rc_chan_s::tx_wqes
854 ibd_rc_chan_s::tx_mr_bufs
855 ibd_rc_chan_s::tx_mr_hdl
856 ibd_rc_chan_s::tx_rel_list.dl_cnt
857 ibd_rc_chan_s::is_used
858 ibd_rc_tx_largebuf_s::lb_buf
859 ibd_rc_msg_hello_s
860 ibt_cm_return_args_s))
861
862 /*
863 * ibd_rc_chan_s::next is protected by two mutexes:
864 * 1) ibd_state_s::rc_pass_chan_list.chan_list_mutex
865 * 2) ibd_state_s::rc_obs_act_chan_list.chan_list_mutex.
866 */
867 _NOTE(SCHEME_PROTECTS_DATA("protected by two mutexes",
868 ibd_rc_chan_s::next))
869
870 /*
871 * ibd_state_s.rc_tx_large_bufs_lock
872 */
873 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_s::rc_tx_large_bufs_lock,
874 ibd_state_s::rc_tx_largebuf_free_head))
875 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_s::rc_tx_large_bufs_lock,
876 ibd_state_s::rc_tx_largebuf_nfree))
877 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_s::rc_tx_large_bufs_lock,
878 ibd_rc_tx_largebuf_s::lb_next))
879
880 /*
881 * ibd_acache_s.tx_too_big_mutex
882 */
883 _NOTE(MUTEX_PROTECTS_DATA(ibd_acache_s::tx_too_big_mutex,
884 ibd_acache_s::tx_too_big_ongoing))
885
886 /*
887 * tx_wqe_list.dl_mutex
888 */
889 _NOTE(MUTEX_PROTECTS_DATA(ibd_rc_chan_s::tx_wqe_list.dl_mutex,
890 ibd_rc_chan_s::tx_wqe_list.dl_head))
891 _NOTE(MUTEX_PROTECTS_DATA(ibd_rc_chan_s::tx_wqe_list.dl_mutex,
892 ibd_rc_chan_s::tx_wqe_list.dl_pending_sends))
893 _NOTE(MUTEX_PROTECTS_DATA(ibd_rc_chan_s::tx_wqe_list.dl_mutex,
894 ibd_rc_chan_s::tx_wqe_list.dl_cnt))
895
896 /*
897 * ibd_state_s.rc_ace_recycle_lock
898 */
899 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_s::rc_ace_recycle_lock,
900 ibd_state_s::rc_ace_recycle))
901
902 /*
903 * rc_srq_rwqe_list.dl_mutex
904 */
905 _NOTE(SCHEME_PROTECTS_DATA("atomic or dl mutex or single thr",
906 ibd_state_t::rc_srq_rwqe_list.dl_bufs_outstanding))
907 _NOTE(SCHEME_PROTECTS_DATA("atomic or dl mutex or single thr",
908 ibd_state_t::rc_srq_rwqe_list.dl_cnt))
909
910 /*
911 * Non-mutex protection schemes for data elements. They are counters
912 * for problem diagnosis. Don't need be protected.
913 */
914 _NOTE(SCHEME_PROTECTS_DATA("counters for problem diagnosis",
915 ibd_state_s::rc_rcv_alloc_fail
916 ibd_state_s::rc_rcq_err
917 ibd_state_s::rc_ace_not_found
918 ibd_state_s::rc_xmt_drop_too_long_pkt
919 ibd_state_s::rc_xmt_icmp_too_long_pkt
920 ibd_state_s::rc_xmt_reenter_too_long_pkt
921 ibd_state_s::rc_swqe_short
922 ibd_state_s::rc_swqe_mac_update
923 ibd_state_s::rc_xmt_buf_short
924 ibd_state_s::rc_xmt_buf_mac_update
925 ibd_state_s::rc_scq_no_swqe
926 ibd_state_s::rc_scq_no_largebuf
927 ibd_state_s::rc_conn_succ
928 ibd_state_s::rc_conn_fail
929 ibd_state_s::rc_null_conn
930 ibd_state_s::rc_no_estab_conn
931 ibd_state_s::rc_act_close
932 ibd_state_s::rc_pas_close
933 ibd_state_s::rc_delay_ace_recycle
934 ibd_state_s::rc_act_close_simultaneous
935 ibd_state_s::rc_act_close_not_clean
936 ibd_state_s::rc_pas_close_rcq_invoking
937 ibd_state_s::rc_reset_cnt
938 ibd_state_s::rc_timeout_act
939 ibd_state_s::rc_timeout_pas
940 ibd_state_s::rc_stop_connect))
941
942 #ifdef DEBUG
943 /*
944 * Non-mutex protection schemes for data elements. They are counters
945 * for problem diagnosis. Don't need be protected.
946 */
947 _NOTE(SCHEME_PROTECTS_DATA("counters for problem diagnosis",
948 ibd_state_s::rc_rwqe_short
949 ibd_rc_stat_s::rc_rcv_trans_byte
950 ibd_rc_stat_s::rc_rcv_trans_pkt
951 ibd_rc_stat_s::rc_rcv_copy_byte
952 ibd_rc_stat_s::rc_rcv_copy_pkt
953 ibd_rc_stat_s::rc_rcv_alloc_fail
954 ibd_rc_stat_s::rc_rcq_err
955 ibd_rc_stat_s::rc_rwqe_short
956 ibd_rc_stat_s::rc_xmt_bytes
957 ibd_rc_stat_s::rc_xmt_small_pkt
958 ibd_rc_stat_s::rc_xmt_fragmented_pkt
959 ibd_rc_stat_s::rc_xmt_map_fail_pkt
960 ibd_rc_stat_s::rc_xmt_map_succ_pkt
961 ibd_rc_stat_s::rc_ace_not_found
962 ibd_rc_stat_s::rc_scq_no_swqe
963 ibd_rc_stat_s::rc_scq_no_largebuf
964 ibd_rc_stat_s::rc_swqe_short
965 ibd_rc_stat_s::rc_swqe_mac_update
966 ibd_rc_stat_s::rc_xmt_buf_short
967 ibd_rc_stat_s::rc_xmt_buf_mac_update
968 ibd_rc_stat_s::rc_conn_succ
969 ibd_rc_stat_s::rc_conn_fail
970 ibd_rc_stat_s::rc_null_conn
971 ibd_rc_stat_s::rc_no_estab_conn
972 ibd_rc_stat_s::rc_act_close
973 ibd_rc_stat_s::rc_pas_close
974 ibd_rc_stat_s::rc_delay_ace_recycle
975 ibd_rc_stat_s::rc_act_close_simultaneous
976 ibd_rc_stat_s::rc_reset_cnt
977 ibd_rc_stat_s::rc_timeout_act
978 ibd_rc_stat_s::rc_timeout_pas))
979 #endif
980
981 int
982 _init()
983 {
984 int status;
985
986 status = ddi_soft_state_init(&ibd_list, max(sizeof (ibd_state_t),
987 PAGESIZE), 0);
988 if (status != 0) {
989 DPRINT(10, "_init:failed in ddi_soft_state_init()");
990 return (status);
991 }
992
993 mutex_init(&ibd_objlist_lock, NULL, MUTEX_DRIVER, NULL);
994
995 mac_init_ops(&ibd_dev_ops, "ibp");
996 status = mod_install(&ibd_modlinkage);
997 if (status != 0) {
998 DPRINT(10, "_init:failed in mod_install()");
999 ddi_soft_state_fini(&ibd_list);
1000 mac_fini_ops(&ibd_dev_ops);
1246 ptr->rq_ptr, ptr->rq_gid,
1247 IB_MC_JSTATE_FULL);
1248 /*
1249 * the req buf contains in mce
1250 * structure, so we do not need
1251 * to free it here.
1252 */
1253 ptr = NULL;
1254 break;
1255 case IBD_ASYNC_TRAP:
1256 ibd_async_trap(state, ptr);
1257 break;
1258 case IBD_ASYNC_SCHED:
1259 ibd_async_txsched(state);
1260 break;
1261 case IBD_ASYNC_LINK:
1262 ibd_async_link(state, ptr);
1263 break;
1264 case IBD_ASYNC_EXIT:
1265 mutex_enter(&state->id_acache_req_lock);
1266 #ifndef __lock_lint
1267 CALLB_CPR_EXIT(&cprinfo);
1268 #else
1269 mutex_exit(&state->id_acache_req_lock);
1270 #endif
1271 return;
1272 case IBD_ASYNC_RC_TOO_BIG:
1273 ibd_async_rc_process_too_big(state,
1274 ptr);
1275 break;
1276 case IBD_ASYNC_RC_CLOSE_ACT_CHAN:
1277 ibd_async_rc_close_act_chan(state, ptr);
1278 break;
1279 case IBD_ASYNC_RC_RECYCLE_ACE:
1280 ibd_async_rc_recycle_ace(state, ptr);
1281 break;
1282 case IBD_ASYNC_RC_CLOSE_PAS_CHAN:
1283 (void) ibd_rc_pas_close(ptr->rq_ptr,
1284 B_TRUE, B_TRUE);
1285 break;
1286 }
1287 free_req_and_continue:
1288 if (ptr != NULL)
1289 kmem_cache_free(state->id_req_kmc, ptr);
1290
1291 mutex_enter(&state->id_acache_req_lock);
1292 } else {
1293 #ifndef __lock_lint
1294 /*
1295 * Nothing to do: wait till new request arrives.
1296 */
1297 CALLB_CPR_SAFE_BEGIN(&cprinfo);
1298 cv_wait(&state->id_acache_req_cv,
1299 &state->id_acache_req_lock);
1300 CALLB_CPR_SAFE_END(&cprinfo,
1301 &state->id_acache_req_lock);
1302 #endif
1303 }
1304 }
1305
1306 /*NOTREACHED*/
1307 _NOTE(NOT_REACHED)
1308 }
1309
1310 /*
1311 * Return when it is safe to queue requests to the async daemon; primarily
1312 * for subnet trap and async event handling. Disallow requests before the
1313 * daemon is created, and when interface deinitilization starts.
1314 */
1315 static boolean_t
1316 ibd_async_safe(ibd_state_t *state)
1317 {
1318 mutex_enter(&state->id_trap_lock);
1319 if (state->id_trap_stop) {
1320 mutex_exit(&state->id_trap_lock);
1321 return (B_FALSE);
1322 }
1929 */
1930 static void
1931 ibd_async_link(ibd_state_t *state, ibd_req_t *req)
1932 {
1933 ibd_link_op_t opcode = (ibd_link_op_t)req->rq_ptr;
1934 link_state_t lstate = (opcode == IBD_LINK_DOWN) ? LINK_STATE_DOWN :
1935 LINK_STATE_UP;
1936 ibd_mce_t *mce, *pmce;
1937 ibd_ace_t *ace, *pace;
1938
1939 DPRINT(10, "ibd_async_link(): %d", opcode);
1940
1941 /*
1942 * On a link up, revalidate the link speed/width. No point doing
1943 * this on a link down, since we will be unable to do SA operations,
1944 * defaulting to the lowest speed. Also notice that we update our
1945 * notion of speed before calling mac_link_update(), which will do
1946 * necessary higher level notifications for speed changes.
1947 */
1948 if ((opcode == IBD_LINK_UP_ABSENT) || (opcode == IBD_LINK_UP)) {
1949 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state))
1950 state->id_link_speed = ibd_get_portspeed(state);
1951 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*state))
1952 }
1953
1954 /*
1955 * Do all the work required to establish our presence on
1956 * the subnet.
1957 */
1958 if (opcode == IBD_LINK_UP_ABSENT) {
1959 /*
1960 * If in promiscuous mode ...
1961 */
1962 if (state->id_prom_op == IBD_OP_COMPLETED) {
1963 /*
1964 * Drop all nonmembership.
1965 */
1966 ibd_async_unsetprom(state);
1967
1968 /*
1969 * Then, try to regain nonmembership to all mcg's.
1970 */
1971 ibd_async_setprom(state);
2183 }
2184 ibt_free_portinfo(port_infop, port_infosz);
2185 goto link_mod_return;
2186 }
2187
2188 /*
2189 * Check the SM InitTypeReply flags. If both NoLoadReply and
2190 * PreserveContentReply are 0, we don't know anything about the
2191 * data loaded into the port attributes, so we need to verify
2192 * if gid0 and pkey are still valid.
2193 */
2194 itreply = port_infop->p_init_type_reply;
2195 if (((itreply & SM_INIT_TYPE_REPLY_NO_LOAD_REPLY) == 0) &&
2196 ((itreply & SM_INIT_TYPE_PRESERVE_CONTENT_REPLY) == 0)) {
2197 /*
2198 * Check to see if the subnet part of GID0 has changed. If
2199 * not, check the simple case first to see if the pkey
2200 * index is the same as before; finally check to see if the
2201 * pkey has been relocated to a different index in the table.
2202 */
2203 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_sgid))
2204 if (bcmp(port_infop->p_sgid_tbl,
2205 &state->id_sgid, sizeof (ib_gid_t)) != 0) {
2206
2207 new_link_state = LINK_STATE_DOWN;
2208
2209 } else if (port_infop->p_pkey_tbl[state->id_pkix] ==
2210 state->id_pkey) {
2211
2212 new_link_state = LINK_STATE_UP;
2213
2214 } else if (ibd_locate_pkey(port_infop->p_pkey_tbl,
2215 port_infop->p_pkey_tbl_sz, state->id_pkey, &pkix) == 0) {
2216
2217 ibt_free_portinfo(port_infop, port_infosz);
2218 mutex_exit(&state->id_link_mutex);
2219
2220 /*
2221 * Currently a restart is required if our pkey has moved
2222 * in the pkey table. If we get the ibt_recycle_ud() to
2223 * work as documented (expected), we may be able to
2224 * avoid a complete restart. Note that we've already
2225 * marked both the start and stop 'in-progress' flags,
2226 * so it is ok to go ahead and do this restart.
2227 */
2228 (void) ibd_undo_start(state, LINK_STATE_DOWN);
2229 if ((ret = ibd_start(state)) != 0) {
2230 DPRINT(10, "ibd_restart: cannot restart, "
2231 "ret=%d", ret);
2232 }
2233
2234 goto link_mod_return;
2235 } else {
2236 new_link_state = LINK_STATE_DOWN;
2237 }
2238 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_sgid))
2239 }
2240
2241 update_link_state:
2242 if (port_infop) {
2243 ibt_free_portinfo(port_infop, port_infosz);
2244 }
2245
2246 /*
2247 * If we're reporting a link up, check InitTypeReply to see if
2248 * the SM has ensured that the port's presence in mcg, traps,
2249 * etc. is intact.
2250 */
2251 if (new_link_state == LINK_STATE_DOWN) {
2252 opcode = IBD_LINK_DOWN;
2253 } else {
2254 if ((itreply & SM_INIT_TYPE_PRESERVE_PRESENCE_REPLY) ==
2255 SM_INIT_TYPE_PRESERVE_PRESENCE_REPLY) {
2256 opcode = IBD_LINK_UP;
2257 } else {
2258 opcode = IBD_LINK_UP_ABSENT;
2405 /*
2406 * Register ourselves with the GLDv3 interface
2407 */
2408 if ((ret = mac_register(macp, &state->id_mh)) != 0) {
2409 mac_free(macp);
2410 DPRINT(10,
2411 "ibd_register_mac: mac_register() failed, ret=%d", ret);
2412 return (DDI_FAILURE);
2413 }
2414
2415 mac_free(macp);
2416 return (DDI_SUCCESS);
2417 }
2418
2419 static int
2420 ibd_record_capab(ibd_state_t *state)
2421 {
2422 ibt_hca_attr_t hca_attrs;
2423 ibt_status_t ibt_status;
2424
2425 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state))
2426
2427 /*
2428 * Query the HCA and fetch its attributes
2429 */
2430 ibt_status = ibt_query_hca(state->id_hca_hdl, &hca_attrs);
2431 ASSERT(ibt_status == IBT_SUCCESS);
2432
2433 /*
2434 * 1. Set the Hardware Checksum capability. Currently we only consider
2435 * full checksum offload.
2436 */
2437 if (state->id_enable_rc) {
2438 state->id_hwcksum_capab = 0;
2439 } else {
2440 if ((hca_attrs.hca_flags & IBT_HCA_CKSUM_FULL)
2441 == IBT_HCA_CKSUM_FULL) {
2442 state->id_hwcksum_capab = IBT_HCA_CKSUM_FULL;
2443 }
2444 }
2445
2446 /*
2509 state->rc_max_sqseg_hiwm = (state->rc_tx_max_sqseg * 65) / 100;
2510
2511 /*
2512 * 5. Set number of recv and send wqes after checking hca maximum
2513 * channel size. Store the max channel size in the state so that it
2514 * can be referred to when the swqe/rwqe change is requested via
2515 * dladm.
2516 */
2517
2518 state->id_hca_max_chan_sz = hca_attrs.hca_max_chan_sz;
2519
2520 if (hca_attrs.hca_max_chan_sz < state->id_ud_num_rwqe)
2521 state->id_ud_num_rwqe = hca_attrs.hca_max_chan_sz;
2522
2523 state->id_rx_bufs_outstanding_limit = state->id_ud_num_rwqe -
2524 IBD_RWQE_MIN;
2525
2526 if (hca_attrs.hca_max_chan_sz < state->id_ud_num_swqe)
2527 state->id_ud_num_swqe = hca_attrs.hca_max_chan_sz;
2528
2529 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*state))
2530
2531 return (DDI_SUCCESS);
2532 }
2533
2534 static int
2535 ibd_part_busy(ibd_state_t *state)
2536 {
2537 if (atomic_add_32_nv(&state->id_rx_list.dl_bufs_outstanding, 0) != 0) {
2538 DPRINT(10, "ibd_part_busy: failed: rx bufs outstanding\n");
2539 return (DDI_FAILURE);
2540 }
2541
2542 if (state->rc_srq_rwqe_list.dl_bufs_outstanding != 0) {
2543 DPRINT(10, "ibd_part_busy: failed: srq bufs outstanding\n");
2544 return (DDI_FAILURE);
2545 }
2546
2547 /*
2548 * "state->id_ah_op == IBD_OP_ONGOING" means this IPoIB port is
2549 * connecting to a remote IPoIB port. We can't remove this port.
2550 */
3400 */
3401 static ibt_status_t
3402 ibd_find_bgroup(ibd_state_t *state)
3403 {
3404 ibt_mcg_attr_t mcg_attr;
3405 uint_t numg;
3406 uchar_t scopes[] = { IB_MC_SCOPE_SUBNET_LOCAL,
3407 IB_MC_SCOPE_SITE_LOCAL, IB_MC_SCOPE_ORG_LOCAL,
3408 IB_MC_SCOPE_GLOBAL };
3409 int i, mcgmtu;
3410 boolean_t found = B_FALSE;
3411 int ret;
3412 ibt_mcg_info_t mcg_info;
3413
3414 state->id_bgroup_created = B_FALSE;
3415 state->id_bgroup_present = B_FALSE;
3416
3417 query_bcast_grp:
3418 bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
3419 mcg_attr.mc_pkey = state->id_pkey;
3420 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_mgid))
3421 state->id_mgid.gid_guid = IB_MGID_IPV4_LOWGRP_MASK;
3422 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_mgid))
3423
3424 for (i = 0; i < sizeof (scopes)/sizeof (scopes[0]); i++) {
3425 state->id_scope = mcg_attr.mc_scope = scopes[i];
3426
3427 /*
3428 * Look for the IPoIB broadcast group.
3429 */
3430 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_mgid))
3431 state->id_mgid.gid_prefix =
3432 (((uint64_t)IB_MCGID_IPV4_PREFIX << 32) |
3433 ((uint64_t)state->id_scope << 48) |
3434 ((uint32_t)(state->id_pkey << 16)));
3435 mcg_attr.mc_mgid = state->id_mgid;
3436 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_mgid))
3437 if (ibt_query_mcg(state->id_sgid, &mcg_attr, 1,
3438 &state->id_mcinfo, &numg) == IBT_SUCCESS) {
3439 found = B_TRUE;
3440 break;
3441 }
3442 }
3443
3444 if (!found) {
3445 if (state->id_create_broadcast_group) {
3446 /*
3447 * If we created the broadcast group, but failed to
3448 * find it, we can't do anything except leave the
3449 * one we created and return failure.
3450 */
3451 if (state->id_bgroup_created) {
3452 ibd_print_warn(state, "IPoIB broadcast group "
3453 "absent. Unable to query after create.");
3454 goto find_bgroup_fail;
3455 }
3456
3457 /*
3458 * Create the ipoib broadcast group if it didn't exist
3459 */
3460 bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
3461 mcg_attr.mc_qkey = IBD_DEFAULT_QKEY;
3462 mcg_attr.mc_join_state = IB_MC_JSTATE_FULL;
3463 mcg_attr.mc_scope = IB_MC_SCOPE_SUBNET_LOCAL;
3464 mcg_attr.mc_pkey = state->id_pkey;
3465 mcg_attr.mc_flow = 0;
3466 mcg_attr.mc_sl = 0;
3467 mcg_attr.mc_tclass = 0;
3468 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_mgid))
3469 state->id_mgid.gid_prefix =
3470 (((uint64_t)IB_MCGID_IPV4_PREFIX << 32) |
3471 ((uint64_t)IB_MC_SCOPE_SUBNET_LOCAL << 48) |
3472 ((uint32_t)(state->id_pkey << 16)));
3473 mcg_attr.mc_mgid = state->id_mgid;
3474 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_mgid))
3475
3476 if ((ret = ibt_join_mcg(state->id_sgid, &mcg_attr,
3477 &mcg_info, NULL, NULL)) != IBT_SUCCESS) {
3478 ibd_print_warn(state, "IPoIB broadcast group "
3479 "absent, create failed: ret = %d\n", ret);
3480 state->id_bgroup_created = B_FALSE;
3481 return (IBT_FAILURE);
3482 }
3483 state->id_bgroup_created = B_TRUE;
3484 goto query_bcast_grp;
3485 } else {
3486 ibd_print_warn(state, "IPoIB broadcast group absent");
3487 return (IBT_FAILURE);
3488 }
3489 }
3490
3491 /*
3492 * Assert that the mcg mtu <= id_mtu. Fill in updated id_mtu.
3493 */
3494 mcgmtu = (128 << state->id_mcinfo->mc_mtu);
4336 * on a kernel thread (handling can thus block) and can be invoked
4337 * concurrently. The handler can be invoked anytime after it is
4338 * registered and before ibt_detach().
4339 */
4340 /* ARGSUSED */
4341 static void
4342 ibd_snet_notices_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t code,
4343 ibt_subnet_event_t *event)
4344 {
4345 ibd_state_t *state = (ibd_state_t *)arg;
4346 ibd_req_t *req;
4347
4348 /*
4349 * The trap handler will get invoked once for every event for
4350 * every port. The input "gid" is the GID0 of the port the
4351 * trap came in on; we just need to act on traps that came
4352 * to our port, meaning the port on which the ipoib interface
4353 * resides. Since ipoib uses GID0 of the port, we just match
4354 * the gids to check whether we need to handle the trap.
4355 */
4356 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_sgid))
4357 if (bcmp(&gid, &state->id_sgid, sizeof (ib_gid_t)) != 0)
4358 return;
4359 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_sgid))
4360
4361 DPRINT(10, "ibd_notices_handler : %d\n", code);
4362
4363 switch (code) {
4364 case IBT_SM_EVENT_UNAVAILABLE:
4365 /*
4366 * If we are in promiscuous mode or have
4367 * sendnonmembers, we need to print a warning
4368 * message right now. Else, just store the
4369 * information, print when we enter promiscuous
4370 * mode or attempt nonmember send. We might
4371 * also want to stop caching sendnonmember.
4372 */
4373 ibd_print_warn(state, "IBA multicast support "
4374 "degraded due to unavailability of multicast "
4375 "traps");
4376 break;
4377 case IBT_SM_EVENT_AVAILABLE:
4378 /*
4379 * If we printed a warning message above or
5326 ret = ibt_query_hca_ports(state->id_hca_hdl, state->id_port,
5327 &port_infop, &psize, &port_infosz);
5328 if ((ret != IBT_SUCCESS) || (psize != 1)) {
5329 mutex_exit(&state->id_link_mutex);
5330 DPRINT(10, "ibd_get_port_details: ibt_query_hca_ports() "
5331 "failed, ret=%d", ret);
5332 return (ENETDOWN);
5333 }
5334
5335 /*
5336 * If the link is active, verify the pkey
5337 */
5338 if (port_infop->p_linkstate == IBT_PORT_ACTIVE) {
5339 if ((ret = ibt_pkey2index(state->id_hca_hdl, state->id_port,
5340 state->id_pkey, &state->id_pkix)) != IBT_SUCCESS) {
5341 state->id_link_state = LINK_STATE_DOWN;
5342 } else {
5343 state->id_link_state = LINK_STATE_UP;
5344 }
5345 state->id_mtu = (128 << port_infop->p_mtu);
5346 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_sgid))
5347 state->id_sgid = *port_infop->p_sgid_tbl;
5348 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_sgid))
5349 /*
5350 * Now that the port is active, record the port speed
5351 */
5352 state->id_link_speed = ibd_get_portspeed(state);
5353 } else {
5354 /* Make sure that these are handled in PORT_UP/CHANGE */
5355 state->id_mtu = 0;
5356 state->id_link_state = LINK_STATE_DOWN;
5357 state->id_link_speed = 0;
5358 }
5359 mutex_exit(&state->id_link_mutex);
5360 ibt_free_portinfo(port_infop, port_infosz);
5361
5362 return (0);
5363 }
5364
5365 static int
5366 ibd_alloc_cqs(ibd_state_t *state)
5367 {
5368 ibt_hca_attr_t hca_attrs;
|
602 int len;
603 va_list ap;
604 char part_name[MAXNAMELEN];
605 datalink_id_t linkid = state->id_plinkid;
606
607 hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, state->id_dip,
608 0, "hca-guid", 0);
609 (void) dls_mgmt_get_linkinfo(linkid, part_name, NULL, NULL, NULL);
610 len = snprintf(ibd_print_buf, sizeof (ibd_print_buf),
611 "%s%d: HCA GUID %016llx port %d PKEY %02x link %s ",
612 ddi_driver_name(state->id_dip), ddi_get_instance(state->id_dip),
613 (u_longlong_t)hca_guid, state->id_port, state->id_pkey,
614 part_name);
615 va_start(ap, fmt);
616 (void) vsnprintf(ibd_print_buf + len, sizeof (ibd_print_buf) - len,
617 fmt, ap);
618 cmn_err(CE_NOTE, "!%s", ibd_print_buf);
619 va_end(ap);
620 }
621
622 int
623 _init()
624 {
625 int status;
626
627 status = ddi_soft_state_init(&ibd_list, max(sizeof (ibd_state_t),
628 PAGESIZE), 0);
629 if (status != 0) {
630 DPRINT(10, "_init:failed in ddi_soft_state_init()");
631 return (status);
632 }
633
634 mutex_init(&ibd_objlist_lock, NULL, MUTEX_DRIVER, NULL);
635
636 mac_init_ops(&ibd_dev_ops, "ibp");
637 status = mod_install(&ibd_modlinkage);
638 if (status != 0) {
639 DPRINT(10, "_init:failed in mod_install()");
640 ddi_soft_state_fini(&ibd_list);
641 mac_fini_ops(&ibd_dev_ops);
887 ptr->rq_ptr, ptr->rq_gid,
888 IB_MC_JSTATE_FULL);
889 /*
890 * the req buf contains in mce
891 * structure, so we do not need
892 * to free it here.
893 */
894 ptr = NULL;
895 break;
896 case IBD_ASYNC_TRAP:
897 ibd_async_trap(state, ptr);
898 break;
899 case IBD_ASYNC_SCHED:
900 ibd_async_txsched(state);
901 break;
902 case IBD_ASYNC_LINK:
903 ibd_async_link(state, ptr);
904 break;
905 case IBD_ASYNC_EXIT:
906 mutex_enter(&state->id_acache_req_lock);
907 CALLB_CPR_EXIT(&cprinfo);
908 return;
909 case IBD_ASYNC_RC_TOO_BIG:
910 ibd_async_rc_process_too_big(state,
911 ptr);
912 break;
913 case IBD_ASYNC_RC_CLOSE_ACT_CHAN:
914 ibd_async_rc_close_act_chan(state, ptr);
915 break;
916 case IBD_ASYNC_RC_RECYCLE_ACE:
917 ibd_async_rc_recycle_ace(state, ptr);
918 break;
919 case IBD_ASYNC_RC_CLOSE_PAS_CHAN:
920 (void) ibd_rc_pas_close(ptr->rq_ptr,
921 B_TRUE, B_TRUE);
922 break;
923 }
924 free_req_and_continue:
925 if (ptr != NULL)
926 kmem_cache_free(state->id_req_kmc, ptr);
927
928 mutex_enter(&state->id_acache_req_lock);
929 } else {
930 /*
931 * Nothing to do: wait till new request arrives.
932 */
933 CALLB_CPR_SAFE_BEGIN(&cprinfo);
934 cv_wait(&state->id_acache_req_cv,
935 &state->id_acache_req_lock);
936 CALLB_CPR_SAFE_END(&cprinfo,
937 &state->id_acache_req_lock);
938 }
939 }
940
941 /*NOTREACHED*/
942 _NOTE(NOT_REACHED)
943 }
944
945 /*
946 * Return when it is safe to queue requests to the async daemon; primarily
947 * for subnet trap and async event handling. Disallow requests before the
948 * daemon is created, and when interface deinitilization starts.
949 */
950 static boolean_t
951 ibd_async_safe(ibd_state_t *state)
952 {
953 mutex_enter(&state->id_trap_lock);
954 if (state->id_trap_stop) {
955 mutex_exit(&state->id_trap_lock);
956 return (B_FALSE);
957 }
1564 */
1565 static void
1566 ibd_async_link(ibd_state_t *state, ibd_req_t *req)
1567 {
1568 ibd_link_op_t opcode = (ibd_link_op_t)req->rq_ptr;
1569 link_state_t lstate = (opcode == IBD_LINK_DOWN) ? LINK_STATE_DOWN :
1570 LINK_STATE_UP;
1571 ibd_mce_t *mce, *pmce;
1572 ibd_ace_t *ace, *pace;
1573
1574 DPRINT(10, "ibd_async_link(): %d", opcode);
1575
1576 /*
1577 * On a link up, revalidate the link speed/width. No point doing
1578 * this on a link down, since we will be unable to do SA operations,
1579 * defaulting to the lowest speed. Also notice that we update our
1580 * notion of speed before calling mac_link_update(), which will do
1581 * necessary higher level notifications for speed changes.
1582 */
1583 if ((opcode == IBD_LINK_UP_ABSENT) || (opcode == IBD_LINK_UP)) {
1584 state->id_link_speed = ibd_get_portspeed(state);
1585 }
1586
1587 /*
1588 * Do all the work required to establish our presence on
1589 * the subnet.
1590 */
1591 if (opcode == IBD_LINK_UP_ABSENT) {
1592 /*
1593 * If in promiscuous mode ...
1594 */
1595 if (state->id_prom_op == IBD_OP_COMPLETED) {
1596 /*
1597 * Drop all nonmembership.
1598 */
1599 ibd_async_unsetprom(state);
1600
1601 /*
1602 * Then, try to regain nonmembership to all mcg's.
1603 */
1604 ibd_async_setprom(state);
1816 }
1817 ibt_free_portinfo(port_infop, port_infosz);
1818 goto link_mod_return;
1819 }
1820
1821 /*
1822 * Check the SM InitTypeReply flags. If both NoLoadReply and
1823 * PreserveContentReply are 0, we don't know anything about the
1824 * data loaded into the port attributes, so we need to verify
1825 * if gid0 and pkey are still valid.
1826 */
1827 itreply = port_infop->p_init_type_reply;
1828 if (((itreply & SM_INIT_TYPE_REPLY_NO_LOAD_REPLY) == 0) &&
1829 ((itreply & SM_INIT_TYPE_PRESERVE_CONTENT_REPLY) == 0)) {
1830 /*
1831 * Check to see if the subnet part of GID0 has changed. If
1832 * not, check the simple case first to see if the pkey
1833 * index is the same as before; finally check to see if the
1834 * pkey has been relocated to a different index in the table.
1835 */
1836 if (bcmp(port_infop->p_sgid_tbl,
1837 &state->id_sgid, sizeof (ib_gid_t)) != 0) {
1838
1839 new_link_state = LINK_STATE_DOWN;
1840
1841 } else if (port_infop->p_pkey_tbl[state->id_pkix] ==
1842 state->id_pkey) {
1843
1844 new_link_state = LINK_STATE_UP;
1845
1846 } else if (ibd_locate_pkey(port_infop->p_pkey_tbl,
1847 port_infop->p_pkey_tbl_sz, state->id_pkey, &pkix) == 0) {
1848
1849 ibt_free_portinfo(port_infop, port_infosz);
1850 mutex_exit(&state->id_link_mutex);
1851
1852 /*
1853 * Currently a restart is required if our pkey has moved
1854 * in the pkey table. If we get the ibt_recycle_ud() to
1855 * work as documented (expected), we may be able to
1856 * avoid a complete restart. Note that we've already
1857 * marked both the start and stop 'in-progress' flags,
1858 * so it is ok to go ahead and do this restart.
1859 */
1860 (void) ibd_undo_start(state, LINK_STATE_DOWN);
1861 if ((ret = ibd_start(state)) != 0) {
1862 DPRINT(10, "ibd_restart: cannot restart, "
1863 "ret=%d", ret);
1864 }
1865
1866 goto link_mod_return;
1867 } else {
1868 new_link_state = LINK_STATE_DOWN;
1869 }
1870 }
1871
1872 update_link_state:
1873 if (port_infop) {
1874 ibt_free_portinfo(port_infop, port_infosz);
1875 }
1876
1877 /*
1878 * If we're reporting a link up, check InitTypeReply to see if
1879 * the SM has ensured that the port's presence in mcg, traps,
1880 * etc. is intact.
1881 */
1882 if (new_link_state == LINK_STATE_DOWN) {
1883 opcode = IBD_LINK_DOWN;
1884 } else {
1885 if ((itreply & SM_INIT_TYPE_PRESERVE_PRESENCE_REPLY) ==
1886 SM_INIT_TYPE_PRESERVE_PRESENCE_REPLY) {
1887 opcode = IBD_LINK_UP;
1888 } else {
1889 opcode = IBD_LINK_UP_ABSENT;
2036 /*
2037 * Register ourselves with the GLDv3 interface
2038 */
2039 if ((ret = mac_register(macp, &state->id_mh)) != 0) {
2040 mac_free(macp);
2041 DPRINT(10,
2042 "ibd_register_mac: mac_register() failed, ret=%d", ret);
2043 return (DDI_FAILURE);
2044 }
2045
2046 mac_free(macp);
2047 return (DDI_SUCCESS);
2048 }
2049
2050 static int
2051 ibd_record_capab(ibd_state_t *state)
2052 {
2053 ibt_hca_attr_t hca_attrs;
2054 ibt_status_t ibt_status;
2055
2056 /*
2057 * Query the HCA and fetch its attributes
2058 */
2059 ibt_status = ibt_query_hca(state->id_hca_hdl, &hca_attrs);
2060 ASSERT(ibt_status == IBT_SUCCESS);
2061
2062 /*
2063 * 1. Set the Hardware Checksum capability. Currently we only consider
2064 * full checksum offload.
2065 */
2066 if (state->id_enable_rc) {
2067 state->id_hwcksum_capab = 0;
2068 } else {
2069 if ((hca_attrs.hca_flags & IBT_HCA_CKSUM_FULL)
2070 == IBT_HCA_CKSUM_FULL) {
2071 state->id_hwcksum_capab = IBT_HCA_CKSUM_FULL;
2072 }
2073 }
2074
2075 /*
2138 state->rc_max_sqseg_hiwm = (state->rc_tx_max_sqseg * 65) / 100;
2139
2140 /*
2141 * 5. Set number of recv and send wqes after checking hca maximum
2142 * channel size. Store the max channel size in the state so that it
2143 * can be referred to when the swqe/rwqe change is requested via
2144 * dladm.
2145 */
2146
2147 state->id_hca_max_chan_sz = hca_attrs.hca_max_chan_sz;
2148
2149 if (hca_attrs.hca_max_chan_sz < state->id_ud_num_rwqe)
2150 state->id_ud_num_rwqe = hca_attrs.hca_max_chan_sz;
2151
2152 state->id_rx_bufs_outstanding_limit = state->id_ud_num_rwqe -
2153 IBD_RWQE_MIN;
2154
2155 if (hca_attrs.hca_max_chan_sz < state->id_ud_num_swqe)
2156 state->id_ud_num_swqe = hca_attrs.hca_max_chan_sz;
2157
2158 return (DDI_SUCCESS);
2159 }
2160
2161 static int
2162 ibd_part_busy(ibd_state_t *state)
2163 {
2164 if (atomic_add_32_nv(&state->id_rx_list.dl_bufs_outstanding, 0) != 0) {
2165 DPRINT(10, "ibd_part_busy: failed: rx bufs outstanding\n");
2166 return (DDI_FAILURE);
2167 }
2168
2169 if (state->rc_srq_rwqe_list.dl_bufs_outstanding != 0) {
2170 DPRINT(10, "ibd_part_busy: failed: srq bufs outstanding\n");
2171 return (DDI_FAILURE);
2172 }
2173
2174 /*
2175 * "state->id_ah_op == IBD_OP_ONGOING" means this IPoIB port is
2176 * connecting to a remote IPoIB port. We can't remove this port.
2177 */
3027 */
3028 static ibt_status_t
3029 ibd_find_bgroup(ibd_state_t *state)
3030 {
3031 ibt_mcg_attr_t mcg_attr;
3032 uint_t numg;
3033 uchar_t scopes[] = { IB_MC_SCOPE_SUBNET_LOCAL,
3034 IB_MC_SCOPE_SITE_LOCAL, IB_MC_SCOPE_ORG_LOCAL,
3035 IB_MC_SCOPE_GLOBAL };
3036 int i, mcgmtu;
3037 boolean_t found = B_FALSE;
3038 int ret;
3039 ibt_mcg_info_t mcg_info;
3040
3041 state->id_bgroup_created = B_FALSE;
3042 state->id_bgroup_present = B_FALSE;
3043
3044 query_bcast_grp:
3045 bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
3046 mcg_attr.mc_pkey = state->id_pkey;
3047 state->id_mgid.gid_guid = IB_MGID_IPV4_LOWGRP_MASK;
3048
3049 for (i = 0; i < sizeof (scopes)/sizeof (scopes[0]); i++) {
3050 state->id_scope = mcg_attr.mc_scope = scopes[i];
3051
3052 /*
3053 * Look for the IPoIB broadcast group.
3054 */
3055 state->id_mgid.gid_prefix =
3056 (((uint64_t)IB_MCGID_IPV4_PREFIX << 32) |
3057 ((uint64_t)state->id_scope << 48) |
3058 ((uint32_t)(state->id_pkey << 16)));
3059 mcg_attr.mc_mgid = state->id_mgid;
3060 if (ibt_query_mcg(state->id_sgid, &mcg_attr, 1,
3061 &state->id_mcinfo, &numg) == IBT_SUCCESS) {
3062 found = B_TRUE;
3063 break;
3064 }
3065 }
3066
3067 if (!found) {
3068 if (state->id_create_broadcast_group) {
3069 /*
3070 * If we created the broadcast group, but failed to
3071 * find it, we can't do anything except leave the
3072 * one we created and return failure.
3073 */
3074 if (state->id_bgroup_created) {
3075 ibd_print_warn(state, "IPoIB broadcast group "
3076 "absent. Unable to query after create.");
3077 goto find_bgroup_fail;
3078 }
3079
3080 /*
3081 * Create the ipoib broadcast group if it didn't exist
3082 */
3083 bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
3084 mcg_attr.mc_qkey = IBD_DEFAULT_QKEY;
3085 mcg_attr.mc_join_state = IB_MC_JSTATE_FULL;
3086 mcg_attr.mc_scope = IB_MC_SCOPE_SUBNET_LOCAL;
3087 mcg_attr.mc_pkey = state->id_pkey;
3088 mcg_attr.mc_flow = 0;
3089 mcg_attr.mc_sl = 0;
3090 mcg_attr.mc_tclass = 0;
3091 state->id_mgid.gid_prefix =
3092 (((uint64_t)IB_MCGID_IPV4_PREFIX << 32) |
3093 ((uint64_t)IB_MC_SCOPE_SUBNET_LOCAL << 48) |
3094 ((uint32_t)(state->id_pkey << 16)));
3095 mcg_attr.mc_mgid = state->id_mgid;
3096
3097 if ((ret = ibt_join_mcg(state->id_sgid, &mcg_attr,
3098 &mcg_info, NULL, NULL)) != IBT_SUCCESS) {
3099 ibd_print_warn(state, "IPoIB broadcast group "
3100 "absent, create failed: ret = %d\n", ret);
3101 state->id_bgroup_created = B_FALSE;
3102 return (IBT_FAILURE);
3103 }
3104 state->id_bgroup_created = B_TRUE;
3105 goto query_bcast_grp;
3106 } else {
3107 ibd_print_warn(state, "IPoIB broadcast group absent");
3108 return (IBT_FAILURE);
3109 }
3110 }
3111
3112 /*
3113 * Assert that the mcg mtu <= id_mtu. Fill in updated id_mtu.
3114 */
3115 mcgmtu = (128 << state->id_mcinfo->mc_mtu);
3957 * on a kernel thread (handling can thus block) and can be invoked
3958 * concurrently. The handler can be invoked anytime after it is
3959 * registered and before ibt_detach().
3960 */
3961 /* ARGSUSED */
3962 static void
3963 ibd_snet_notices_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t code,
3964 ibt_subnet_event_t *event)
3965 {
3966 ibd_state_t *state = (ibd_state_t *)arg;
3967 ibd_req_t *req;
3968
3969 /*
3970 * The trap handler will get invoked once for every event for
3971 * every port. The input "gid" is the GID0 of the port the
3972 * trap came in on; we just need to act on traps that came
3973 * to our port, meaning the port on which the ipoib interface
3974 * resides. Since ipoib uses GID0 of the port, we just match
3975 * the gids to check whether we need to handle the trap.
3976 */
3977 if (bcmp(&gid, &state->id_sgid, sizeof (ib_gid_t)) != 0)
3978 return;
3979
3980 DPRINT(10, "ibd_notices_handler : %d\n", code);
3981
3982 switch (code) {
3983 case IBT_SM_EVENT_UNAVAILABLE:
3984 /*
3985 * If we are in promiscuous mode or have
3986 * sendnonmembers, we need to print a warning
3987 * message right now. Else, just store the
3988 * information, print when we enter promiscuous
3989 * mode or attempt nonmember send. We might
3990 * also want to stop caching sendnonmember.
3991 */
3992 ibd_print_warn(state, "IBA multicast support "
3993 "degraded due to unavailability of multicast "
3994 "traps");
3995 break;
3996 case IBT_SM_EVENT_AVAILABLE:
3997 /*
3998 * If we printed a warning message above or
4945 ret = ibt_query_hca_ports(state->id_hca_hdl, state->id_port,
4946 &port_infop, &psize, &port_infosz);
4947 if ((ret != IBT_SUCCESS) || (psize != 1)) {
4948 mutex_exit(&state->id_link_mutex);
4949 DPRINT(10, "ibd_get_port_details: ibt_query_hca_ports() "
4950 "failed, ret=%d", ret);
4951 return (ENETDOWN);
4952 }
4953
4954 /*
4955 * If the link is active, verify the pkey
4956 */
4957 if (port_infop->p_linkstate == IBT_PORT_ACTIVE) {
4958 if ((ret = ibt_pkey2index(state->id_hca_hdl, state->id_port,
4959 state->id_pkey, &state->id_pkix)) != IBT_SUCCESS) {
4960 state->id_link_state = LINK_STATE_DOWN;
4961 } else {
4962 state->id_link_state = LINK_STATE_UP;
4963 }
4964 state->id_mtu = (128 << port_infop->p_mtu);
4965 state->id_sgid = *port_infop->p_sgid_tbl;
4966 /*
4967 * Now that the port is active, record the port speed
4968 */
4969 state->id_link_speed = ibd_get_portspeed(state);
4970 } else {
4971 /* Make sure that these are handled in PORT_UP/CHANGE */
4972 state->id_mtu = 0;
4973 state->id_link_state = LINK_STATE_DOWN;
4974 state->id_link_speed = 0;
4975 }
4976 mutex_exit(&state->id_link_mutex);
4977 ibt_free_portinfo(port_infop, port_infosz);
4978
4979 return (0);
4980 }
4981
4982 static int
4983 ibd_alloc_cqs(ibd_state_t *state)
4984 {
4985 ibt_hca_attr_t hca_attrs;
|