Print this page
8368 remove warlock leftovers from usr/src/uts


 602         int len;
 603         va_list ap;
 604         char part_name[MAXNAMELEN];
 605         datalink_id_t linkid = state->id_plinkid;
 606 
 607         hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, state->id_dip,
 608             0, "hca-guid", 0);
 609         (void) dls_mgmt_get_linkinfo(linkid, part_name, NULL, NULL, NULL);
 610         len = snprintf(ibd_print_buf, sizeof (ibd_print_buf),
 611             "%s%d: HCA GUID %016llx port %d PKEY %02x link %s ",
 612             ddi_driver_name(state->id_dip), ddi_get_instance(state->id_dip),
 613             (u_longlong_t)hca_guid, state->id_port, state->id_pkey,
 614             part_name);
 615         va_start(ap, fmt);
 616         (void) vsnprintf(ibd_print_buf + len, sizeof (ibd_print_buf) - len,
 617             fmt, ap);
 618         cmn_err(CE_NOTE, "!%s", ibd_print_buf);
 619         va_end(ap);
 620 }
 621 
 622 /*
 623  * Warlock directives
 624  */
 625 
 626 /*
 627  * id_lso_lock
 628  *
 629  * state->id_lso->bkt_nfree may be accessed without a lock to
 630  * determine the threshold at which we have to ask the nw layer
 631  * to resume transmission (see ibd_resume_transmission()).
 632  */
 633 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_lso_lock,
 634     ibd_state_t::id_lso))
 635 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibd_state_t::id_lso))
 636 _NOTE(SCHEME_PROTECTS_DATA("init", ibd_state_t::id_lso_policy))
 637 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibd_lsobkt_t::bkt_nfree))
 638 
 639 /*
 640  * id_scq_poll_lock
 641  */
 642 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_scq_poll_lock,
 643     ibd_state_t::id_scq_poll_busy))
 644 
 645 /*
 646  * id_txpost_lock
 647  */
 648 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_txpost_lock,
 649     ibd_state_t::id_tx_head))
 650 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_txpost_lock,
 651     ibd_state_t::id_tx_busy))
 652 
 653 /*
 654  * id_acache_req_lock
 655  */
 656 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_acache_req_lock, 
 657     ibd_state_t::id_acache_req_cv))
 658 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_acache_req_lock, 
 659     ibd_state_t::id_req_list))
 660 _NOTE(SCHEME_PROTECTS_DATA("atomic",
 661     ibd_acache_s::ac_ref))
 662 
 663 /*
 664  * id_ac_mutex
 665  *
 666  * This mutex is actually supposed to protect id_ah_op as well,
 667  * but this path of the code isn't clean (see update of id_ah_op
 668  * in ibd_async_acache(), immediately after the call to
 669  * ibd_async_mcache()). For now, we'll skip this check by
 670  * declaring that id_ah_op is protected by some internal scheme
 671  * that warlock isn't aware of.
 672  */
 673 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_ac_mutex,
 674     ibd_state_t::id_ah_active))
 675 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_ac_mutex,
 676     ibd_state_t::id_ah_free))
 677 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_ac_mutex,
 678     ibd_state_t::id_ah_addr))
 679 _NOTE(SCHEME_PROTECTS_DATA("ac mutex should protect this",
 680     ibd_state_t::id_ah_op))
 681 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_ac_mutex,
 682     ibd_state_t::id_ah_error))
 683 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_ac_mutex,
 684     ibd_state_t::id_ac_hot_ace))
 685 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibd_state_t::id_ah_error))
 686 
 687 /*
 688  * id_mc_mutex
 689  */
 690 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_mc_mutex,
 691     ibd_state_t::id_mc_full))
 692 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_mc_mutex,
 693     ibd_state_t::id_mc_non))
 694 
 695 /*
 696  * id_trap_lock
 697  */
 698 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_trap_lock,
 699     ibd_state_t::id_trap_cv))
 700 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_trap_lock,
 701     ibd_state_t::id_trap_stop))
 702 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_trap_lock,
 703     ibd_state_t::id_trap_inprog))
 704 
 705 /*
 706  * id_prom_op
 707  */
 708 _NOTE(SCHEME_PROTECTS_DATA("only by async thread",
 709     ibd_state_t::id_prom_op))
 710 
 711 /*
 712  * id_sched_lock
 713  */
 714 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_sched_lock,
 715     ibd_state_t::id_sched_needed))
 716 
 717 /*
 718  * id_link_mutex
 719  */
 720 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_link_mutex, 
 721     ibd_state_t::id_link_state))
 722 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibd_state_t::id_link_state))
 723 _NOTE(SCHEME_PROTECTS_DATA("only async thr and ibd_m_start",
 724     ibd_state_t::id_link_speed))
 725 _NOTE(DATA_READABLE_WITHOUT_LOCK(ibd_state_t::id_sgid))
 726 
 727 /*
 728  * id_tx_list.dl_mutex
 729  */
 730 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_tx_list.dl_mutex,
 731     ibd_state_t::id_tx_list.dl_head))
 732 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_tx_list.dl_mutex,
 733     ibd_state_t::id_tx_list.dl_pending_sends))
 734 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_tx_list.dl_mutex,
 735     ibd_state_t::id_tx_list.dl_cnt))
 736 
 737 /*
 738  * id_rx_list.dl_mutex
 739  */
 740 _NOTE(SCHEME_PROTECTS_DATA("atomic or dl mutex or single thr",
 741     ibd_state_t::id_rx_list.dl_bufs_outstanding))
 742 _NOTE(SCHEME_PROTECTS_DATA("atomic or dl mutex or single thr",
 743     ibd_state_t::id_rx_list.dl_cnt))
 744 
 745 /*
 746  * rc_timeout_lock
 747  */
 748 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::rc_timeout_lock,
 749     ibd_state_t::rc_timeout_start))
 750 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::rc_timeout_lock,
 751     ibd_state_t::rc_timeout))
 752 
 753 
 754 /*
 755  * Items protected by atomic updates
 756  */
 757 _NOTE(SCHEME_PROTECTS_DATA("atomic update only",
 758     ibd_state_s::id_brd_rcv
 759     ibd_state_s::id_brd_xmt
 760     ibd_state_s::id_multi_rcv
 761     ibd_state_s::id_multi_xmt
 762     ibd_state_s::id_num_intrs
 763     ibd_state_s::id_rcv_bytes
 764     ibd_state_s::id_rcv_pkt
 765     ibd_state_s::id_rx_post_queue_index
 766     ibd_state_s::id_tx_short
 767     ibd_state_s::id_xmt_bytes
 768     ibd_state_s::id_xmt_pkt
 769     ibd_state_s::rc_rcv_trans_byte
 770     ibd_state_s::rc_rcv_trans_pkt
 771     ibd_state_s::rc_rcv_copy_byte
 772     ibd_state_s::rc_rcv_copy_pkt
 773     ibd_state_s::rc_xmt_bytes
 774     ibd_state_s::rc_xmt_small_pkt
 775     ibd_state_s::rc_xmt_fragmented_pkt
 776     ibd_state_s::rc_xmt_map_fail_pkt
 777     ibd_state_s::rc_xmt_map_succ_pkt
 778     ibd_rc_chan_s::rcq_invoking))
 779 
 780 /*
 781  * Non-mutex protection schemes for data elements. Almost all of
 782  * these are non-shared items.
 783  */
 784 _NOTE(SCHEME_PROTECTS_DATA("unshared or single-threaded",
 785     callb_cpr
 786     ib_gid_s
 787     ib_header_info
 788     ibd_acache_rq
 789     ibd_acache_s::ac_mce
 790     ibd_acache_s::ac_chan
 791     ibd_mcache::mc_fullreap
 792     ibd_mcache::mc_jstate
 793     ibd_mcache::mc_req
 794     ibd_rwqe_s
 795     ibd_swqe_s
 796     ibd_wqe_s
 797     ibt_wr_ds_s::ds_va
 798     ibt_wr_lso_s
 799     ipoib_mac::ipoib_qpn
 800     mac_capab_lso_s
 801     msgb::b_next
 802     msgb::b_cont
 803     msgb::b_rptr
 804     msgb::b_wptr
 805     ibd_state_s::id_bgroup_created
 806     ibd_state_s::id_mac_state
 807     ibd_state_s::id_mtu
 808     ibd_state_s::id_ud_num_rwqe
 809     ibd_state_s::id_ud_num_swqe
 810     ibd_state_s::id_qpnum
 811     ibd_state_s::id_rcq_hdl
 812     ibd_state_s::id_rx_buf_sz
 813     ibd_state_s::id_rx_bufs
 814     ibd_state_s::id_rx_mr_hdl
 815     ibd_state_s::id_rx_wqes
 816     ibd_state_s::id_rxwcs
 817     ibd_state_s::id_rxwcs_size
 818     ibd_state_s::id_rx_nqueues
 819     ibd_state_s::id_rx_queues
 820     ibd_state_s::id_scope
 821     ibd_state_s::id_scq_hdl
 822     ibd_state_s::id_tx_buf_sz
 823     ibd_state_s::id_tx_bufs
 824     ibd_state_s::id_tx_mr_hdl
 825     ibd_state_s::id_tx_rel_list.dl_cnt
 826     ibd_state_s::id_tx_wqes
 827     ibd_state_s::id_txwcs
 828     ibd_state_s::id_txwcs_size
 829     ibd_state_s::rc_listen_hdl
 830     ibd_state_s::rc_listen_hdl_OFED_interop
 831     ibd_state_s::rc_srq_size
 832     ibd_state_s::rc_srq_rwqes
 833     ibd_state_s::rc_srq_rx_bufs
 834     ibd_state_s::rc_srq_rx_mr_hdl
 835     ibd_state_s::rc_tx_largebuf_desc_base
 836     ibd_state_s::rc_tx_mr_bufs
 837     ibd_state_s::rc_tx_mr_hdl
 838     ipha_s
 839     icmph_s
 840     ibt_path_info_s::pi_sid
 841     ibd_rc_chan_s::ace
 842     ibd_rc_chan_s::chan_hdl
 843     ibd_rc_chan_s::state
 844     ibd_rc_chan_s::chan_state
 845     ibd_rc_chan_s::is_tx_chan
 846     ibd_rc_chan_s::rcq_hdl
 847     ibd_rc_chan_s::rcq_size
 848     ibd_rc_chan_s::scq_hdl
 849     ibd_rc_chan_s::scq_size
 850     ibd_rc_chan_s::rx_bufs
 851     ibd_rc_chan_s::rx_mr_hdl
 852     ibd_rc_chan_s::rx_rwqes
 853     ibd_rc_chan_s::tx_wqes
 854     ibd_rc_chan_s::tx_mr_bufs
 855     ibd_rc_chan_s::tx_mr_hdl
 856     ibd_rc_chan_s::tx_rel_list.dl_cnt
 857     ibd_rc_chan_s::is_used
 858     ibd_rc_tx_largebuf_s::lb_buf
 859     ibd_rc_msg_hello_s
 860     ibt_cm_return_args_s))
 861 
 862 /*
 863  * ibd_rc_chan_s::next is protected by two mutexes:
 864  * 1) ibd_state_s::rc_pass_chan_list.chan_list_mutex
 865  * 2) ibd_state_s::rc_obs_act_chan_list.chan_list_mutex.
 866  */
 867 _NOTE(SCHEME_PROTECTS_DATA("protected by two mutexes",
 868     ibd_rc_chan_s::next))
 869 
 870 /*
 871  * ibd_state_s.rc_tx_large_bufs_lock
 872  */
 873 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_s::rc_tx_large_bufs_lock,
 874     ibd_state_s::rc_tx_largebuf_free_head))
 875 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_s::rc_tx_large_bufs_lock,
 876     ibd_state_s::rc_tx_largebuf_nfree))
 877 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_s::rc_tx_large_bufs_lock,
 878     ibd_rc_tx_largebuf_s::lb_next))
 879 
 880 /*
 881  * ibd_acache_s.tx_too_big_mutex
 882  */
 883 _NOTE(MUTEX_PROTECTS_DATA(ibd_acache_s::tx_too_big_mutex,
 884     ibd_acache_s::tx_too_big_ongoing))
 885 
 886 /*
 887  * tx_wqe_list.dl_mutex
 888  */
 889 _NOTE(MUTEX_PROTECTS_DATA(ibd_rc_chan_s::tx_wqe_list.dl_mutex,
 890     ibd_rc_chan_s::tx_wqe_list.dl_head))
 891 _NOTE(MUTEX_PROTECTS_DATA(ibd_rc_chan_s::tx_wqe_list.dl_mutex,
 892     ibd_rc_chan_s::tx_wqe_list.dl_pending_sends))
 893 _NOTE(MUTEX_PROTECTS_DATA(ibd_rc_chan_s::tx_wqe_list.dl_mutex,
 894     ibd_rc_chan_s::tx_wqe_list.dl_cnt))
 895 
 896 /*
 897  * ibd_state_s.rc_ace_recycle_lock
 898  */
 899 _NOTE(MUTEX_PROTECTS_DATA(ibd_state_s::rc_ace_recycle_lock,
 900     ibd_state_s::rc_ace_recycle))
 901 
 902 /*
 903  * rc_srq_rwqe_list.dl_mutex
 904  */
 905 _NOTE(SCHEME_PROTECTS_DATA("atomic or dl mutex or single thr",
 906     ibd_state_t::rc_srq_rwqe_list.dl_bufs_outstanding))
 907 _NOTE(SCHEME_PROTECTS_DATA("atomic or dl mutex or single thr",
 908     ibd_state_t::rc_srq_rwqe_list.dl_cnt))
 909 
 910 /*
 911  * Non-mutex protection schemes for data elements. They are counters
 912  * for problem diagnosis. Don't need be protected.
 913  */
 914 _NOTE(SCHEME_PROTECTS_DATA("counters for problem diagnosis",
 915     ibd_state_s::rc_rcv_alloc_fail
 916     ibd_state_s::rc_rcq_err
 917     ibd_state_s::rc_ace_not_found
 918     ibd_state_s::rc_xmt_drop_too_long_pkt
 919     ibd_state_s::rc_xmt_icmp_too_long_pkt
 920     ibd_state_s::rc_xmt_reenter_too_long_pkt
 921     ibd_state_s::rc_swqe_short
 922     ibd_state_s::rc_swqe_mac_update
 923     ibd_state_s::rc_xmt_buf_short
 924     ibd_state_s::rc_xmt_buf_mac_update
 925     ibd_state_s::rc_scq_no_swqe
 926     ibd_state_s::rc_scq_no_largebuf
 927     ibd_state_s::rc_conn_succ
 928     ibd_state_s::rc_conn_fail
 929     ibd_state_s::rc_null_conn
 930     ibd_state_s::rc_no_estab_conn
 931     ibd_state_s::rc_act_close
 932     ibd_state_s::rc_pas_close
 933     ibd_state_s::rc_delay_ace_recycle
 934     ibd_state_s::rc_act_close_simultaneous
 935     ibd_state_s::rc_act_close_not_clean
 936     ibd_state_s::rc_pas_close_rcq_invoking
 937     ibd_state_s::rc_reset_cnt
 938     ibd_state_s::rc_timeout_act
 939     ibd_state_s::rc_timeout_pas
 940     ibd_state_s::rc_stop_connect))
 941 
 942 #ifdef DEBUG
 943 /*
 944  * Non-mutex protection schemes for data elements. They are counters
 945  * for problem diagnosis. Don't need be protected.
 946  */
 947 _NOTE(SCHEME_PROTECTS_DATA("counters for problem diagnosis",
 948     ibd_state_s::rc_rwqe_short
 949     ibd_rc_stat_s::rc_rcv_trans_byte
 950     ibd_rc_stat_s::rc_rcv_trans_pkt
 951     ibd_rc_stat_s::rc_rcv_copy_byte
 952     ibd_rc_stat_s::rc_rcv_copy_pkt
 953     ibd_rc_stat_s::rc_rcv_alloc_fail
 954     ibd_rc_stat_s::rc_rcq_err 
 955     ibd_rc_stat_s::rc_rwqe_short
 956     ibd_rc_stat_s::rc_xmt_bytes
 957     ibd_rc_stat_s::rc_xmt_small_pkt
 958     ibd_rc_stat_s::rc_xmt_fragmented_pkt
 959     ibd_rc_stat_s::rc_xmt_map_fail_pkt
 960     ibd_rc_stat_s::rc_xmt_map_succ_pkt
 961     ibd_rc_stat_s::rc_ace_not_found
 962     ibd_rc_stat_s::rc_scq_no_swqe
 963     ibd_rc_stat_s::rc_scq_no_largebuf
 964     ibd_rc_stat_s::rc_swqe_short
 965     ibd_rc_stat_s::rc_swqe_mac_update
 966     ibd_rc_stat_s::rc_xmt_buf_short
 967     ibd_rc_stat_s::rc_xmt_buf_mac_update
 968     ibd_rc_stat_s::rc_conn_succ
 969     ibd_rc_stat_s::rc_conn_fail
 970     ibd_rc_stat_s::rc_null_conn
 971     ibd_rc_stat_s::rc_no_estab_conn
 972     ibd_rc_stat_s::rc_act_close
 973     ibd_rc_stat_s::rc_pas_close
 974     ibd_rc_stat_s::rc_delay_ace_recycle
 975     ibd_rc_stat_s::rc_act_close_simultaneous
 976     ibd_rc_stat_s::rc_reset_cnt
 977     ibd_rc_stat_s::rc_timeout_act
 978     ibd_rc_stat_s::rc_timeout_pas))
 979 #endif
 980 
 981 int
 982 _init()
 983 {
 984         int status;
 985 
 986         status = ddi_soft_state_init(&ibd_list, max(sizeof (ibd_state_t),
 987             PAGESIZE), 0);
 988         if (status != 0) {
 989                 DPRINT(10, "_init:failed in ddi_soft_state_init()");
 990                 return (status);
 991         }
 992 
 993         mutex_init(&ibd_objlist_lock, NULL, MUTEX_DRIVER, NULL);
 994 
 995         mac_init_ops(&ibd_dev_ops, "ibp");
 996         status = mod_install(&ibd_modlinkage);
 997         if (status != 0) {
 998                 DPRINT(10, "_init:failed in mod_install()");
 999                 ddi_soft_state_fini(&ibd_list);
1000                 mac_fini_ops(&ibd_dev_ops);


1246                                             ptr->rq_ptr, ptr->rq_gid,
1247                                             IB_MC_JSTATE_FULL);
1248                                         /*
1249                                          * the req buf contains in mce
1250                                          * structure, so we do not need
1251                                          * to free it here.
1252                                          */
1253                                         ptr = NULL;
1254                                         break;
1255                                 case IBD_ASYNC_TRAP:
1256                                         ibd_async_trap(state, ptr);
1257                                         break;
1258                                 case IBD_ASYNC_SCHED:
1259                                         ibd_async_txsched(state);
1260                                         break;
1261                                 case IBD_ASYNC_LINK:
1262                                         ibd_async_link(state, ptr);
1263                                         break;
1264                                 case IBD_ASYNC_EXIT:
1265                                         mutex_enter(&state->id_acache_req_lock);
1266 #ifndef __lock_lint
1267                                         CALLB_CPR_EXIT(&cprinfo);
1268 #else
1269                                         mutex_exit(&state->id_acache_req_lock);
1270 #endif
1271                                         return;
1272                                 case IBD_ASYNC_RC_TOO_BIG:
1273                                         ibd_async_rc_process_too_big(state,
1274                                             ptr);
1275                                         break;
1276                                 case IBD_ASYNC_RC_CLOSE_ACT_CHAN:
1277                                         ibd_async_rc_close_act_chan(state, ptr);
1278                                         break;
1279                                 case IBD_ASYNC_RC_RECYCLE_ACE:
1280                                         ibd_async_rc_recycle_ace(state, ptr);
1281                                         break;
1282                                 case IBD_ASYNC_RC_CLOSE_PAS_CHAN:
1283                                         (void) ibd_rc_pas_close(ptr->rq_ptr,
1284                                             B_TRUE, B_TRUE);
1285                                         break;
1286                         }
1287 free_req_and_continue:
1288                         if (ptr != NULL)
1289                                 kmem_cache_free(state->id_req_kmc, ptr);
1290 
1291                         mutex_enter(&state->id_acache_req_lock);
1292                 } else {
1293 #ifndef __lock_lint
1294                         /*
1295                          * Nothing to do: wait till new request arrives.
1296                          */
1297                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
1298                         cv_wait(&state->id_acache_req_cv,
1299                             &state->id_acache_req_lock);
1300                         CALLB_CPR_SAFE_END(&cprinfo,
1301                             &state->id_acache_req_lock);
1302 #endif
1303                 }
1304         }
1305 
1306         /*NOTREACHED*/
1307         _NOTE(NOT_REACHED)
1308 }
1309 
1310 /*
1311  * Return when it is safe to queue requests to the async daemon; primarily
1312  * for subnet trap and async event handling. Disallow requests before the
1313  * daemon is created, and when interface deinitilization starts.
1314  */
1315 static boolean_t
1316 ibd_async_safe(ibd_state_t *state)
1317 {
1318         mutex_enter(&state->id_trap_lock);
1319         if (state->id_trap_stop) {
1320                 mutex_exit(&state->id_trap_lock);
1321                 return (B_FALSE);
1322         }


1929  */
1930 static void
1931 ibd_async_link(ibd_state_t *state, ibd_req_t *req)
1932 {
1933         ibd_link_op_t opcode = (ibd_link_op_t)req->rq_ptr;
1934         link_state_t lstate = (opcode == IBD_LINK_DOWN) ? LINK_STATE_DOWN :
1935             LINK_STATE_UP;
1936         ibd_mce_t *mce, *pmce;
1937         ibd_ace_t *ace, *pace;
1938 
1939         DPRINT(10, "ibd_async_link(): %d", opcode);
1940 
1941         /*
1942          * On a link up, revalidate the link speed/width. No point doing
1943          * this on a link down, since we will be unable to do SA operations,
1944          * defaulting to the lowest speed. Also notice that we update our
1945          * notion of speed before calling mac_link_update(), which will do
1946          * necessary higher level notifications for speed changes.
1947          */
1948         if ((opcode == IBD_LINK_UP_ABSENT) || (opcode == IBD_LINK_UP)) {
1949                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state))
1950                 state->id_link_speed = ibd_get_portspeed(state);
1951                 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*state))
1952         }
1953 
1954         /*
1955          * Do all the work required to establish our presence on
1956          * the subnet.
1957          */
1958         if (opcode == IBD_LINK_UP_ABSENT) {
1959                 /*
1960                  * If in promiscuous mode ...
1961                  */
1962                 if (state->id_prom_op == IBD_OP_COMPLETED) {
1963                         /*
1964                          * Drop all nonmembership.
1965                          */
1966                         ibd_async_unsetprom(state);
1967 
1968                         /*
1969                          * Then, try to regain nonmembership to all mcg's.
1970                          */
1971                         ibd_async_setprom(state);


2183                 }
2184                 ibt_free_portinfo(port_infop, port_infosz);
2185                 goto link_mod_return;
2186         }
2187 
2188         /*
2189          * Check the SM InitTypeReply flags. If both NoLoadReply and
2190          * PreserveContentReply are 0, we don't know anything about the
2191          * data loaded into the port attributes, so we need to verify
2192          * if gid0 and pkey are still valid.
2193          */
2194         itreply = port_infop->p_init_type_reply;
2195         if (((itreply & SM_INIT_TYPE_REPLY_NO_LOAD_REPLY) == 0) &&
2196             ((itreply & SM_INIT_TYPE_PRESERVE_CONTENT_REPLY) == 0)) {
2197                 /*
2198                  * Check to see if the subnet part of GID0 has changed. If
2199                  * not, check the simple case first to see if the pkey
2200                  * index is the same as before; finally check to see if the
2201                  * pkey has been relocated to a different index in the table.
2202                  */
2203                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_sgid))
2204                 if (bcmp(port_infop->p_sgid_tbl,
2205                     &state->id_sgid, sizeof (ib_gid_t)) != 0) {
2206 
2207                         new_link_state = LINK_STATE_DOWN;
2208 
2209                 } else if (port_infop->p_pkey_tbl[state->id_pkix] ==
2210                     state->id_pkey) {
2211 
2212                         new_link_state = LINK_STATE_UP;
2213 
2214                 } else if (ibd_locate_pkey(port_infop->p_pkey_tbl,
2215                     port_infop->p_pkey_tbl_sz, state->id_pkey, &pkix) == 0) {
2216 
2217                         ibt_free_portinfo(port_infop, port_infosz);
2218                         mutex_exit(&state->id_link_mutex);
2219 
2220                         /*
2221                          * Currently a restart is required if our pkey has moved
2222                          * in the pkey table. If we get the ibt_recycle_ud() to
2223                          * work as documented (expected), we may be able to
2224                          * avoid a complete restart.  Note that we've already
2225                          * marked both the start and stop 'in-progress' flags,
2226                          * so it is ok to go ahead and do this restart.
2227                          */
2228                         (void) ibd_undo_start(state, LINK_STATE_DOWN);
2229                         if ((ret = ibd_start(state)) != 0) {
2230                                 DPRINT(10, "ibd_restart: cannot restart, "
2231                                     "ret=%d", ret);
2232                         }
2233 
2234                         goto link_mod_return;
2235                 } else {
2236                         new_link_state = LINK_STATE_DOWN;
2237                 }
2238                 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_sgid))
2239         }
2240 
2241 update_link_state:
2242         if (port_infop) {
2243                 ibt_free_portinfo(port_infop, port_infosz);
2244         }
2245 
2246         /*
2247          * If we're reporting a link up, check InitTypeReply to see if
2248          * the SM has ensured that the port's presence in mcg, traps,
2249          * etc. is intact.
2250          */
2251         if (new_link_state == LINK_STATE_DOWN) {
2252                 opcode = IBD_LINK_DOWN;
2253         } else {
2254                 if ((itreply & SM_INIT_TYPE_PRESERVE_PRESENCE_REPLY) ==
2255                     SM_INIT_TYPE_PRESERVE_PRESENCE_REPLY) {
2256                         opcode = IBD_LINK_UP;
2257                 } else {
2258                         opcode = IBD_LINK_UP_ABSENT;


2405         /*
2406          *  Register ourselves with the GLDv3 interface
2407          */
2408         if ((ret = mac_register(macp, &state->id_mh)) != 0) {
2409                 mac_free(macp);
2410                 DPRINT(10,
2411                     "ibd_register_mac: mac_register() failed, ret=%d", ret);
2412                 return (DDI_FAILURE);
2413         }
2414 
2415         mac_free(macp);
2416         return (DDI_SUCCESS);
2417 }
2418 
2419 static int
2420 ibd_record_capab(ibd_state_t *state)
2421 {
2422         ibt_hca_attr_t hca_attrs;
2423         ibt_status_t ibt_status;
2424 
2425         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state))
2426 
2427         /*
2428          * Query the HCA and fetch its attributes
2429          */
2430         ibt_status = ibt_query_hca(state->id_hca_hdl, &hca_attrs);
2431         ASSERT(ibt_status == IBT_SUCCESS);
2432 
2433         /*
2434          * 1. Set the Hardware Checksum capability. Currently we only consider
2435          *    full checksum offload.
2436          */
2437         if (state->id_enable_rc) {
2438                         state->id_hwcksum_capab = 0;
2439         } else {
2440                 if ((hca_attrs.hca_flags & IBT_HCA_CKSUM_FULL)
2441                     == IBT_HCA_CKSUM_FULL) {
2442                         state->id_hwcksum_capab = IBT_HCA_CKSUM_FULL;
2443                 }
2444         }
2445 
2446         /*


2509         state->rc_max_sqseg_hiwm = (state->rc_tx_max_sqseg * 65) / 100;
2510 
2511         /*
2512          * 5. Set number of recv and send wqes after checking hca maximum
2513          *    channel size. Store the max channel size in the state so that it
2514          *    can be referred to when the swqe/rwqe change is requested via
2515          *    dladm.
2516          */
2517 
2518         state->id_hca_max_chan_sz = hca_attrs.hca_max_chan_sz;
2519 
2520         if (hca_attrs.hca_max_chan_sz < state->id_ud_num_rwqe)
2521                 state->id_ud_num_rwqe = hca_attrs.hca_max_chan_sz;
2522 
2523         state->id_rx_bufs_outstanding_limit = state->id_ud_num_rwqe -
2524             IBD_RWQE_MIN;
2525 
2526         if (hca_attrs.hca_max_chan_sz < state->id_ud_num_swqe)
2527                 state->id_ud_num_swqe = hca_attrs.hca_max_chan_sz;
2528 
2529         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*state))
2530 
2531         return (DDI_SUCCESS);
2532 }
2533 
2534 static int
2535 ibd_part_busy(ibd_state_t *state)
2536 {
2537         if (atomic_add_32_nv(&state->id_rx_list.dl_bufs_outstanding, 0) != 0) {
2538                 DPRINT(10, "ibd_part_busy: failed: rx bufs outstanding\n");
2539                 return (DDI_FAILURE);
2540         }
2541 
2542         if (state->rc_srq_rwqe_list.dl_bufs_outstanding != 0) {
2543                 DPRINT(10, "ibd_part_busy: failed: srq bufs outstanding\n");
2544                 return (DDI_FAILURE);
2545         }
2546 
2547         /*
2548          * "state->id_ah_op == IBD_OP_ONGOING" means this IPoIB port is
2549          * connecting to a remote IPoIB port. We can't remove this port.
2550          */


3400  */
3401 static ibt_status_t
3402 ibd_find_bgroup(ibd_state_t *state)
3403 {
3404         ibt_mcg_attr_t mcg_attr;
3405         uint_t numg;
3406         uchar_t scopes[] = { IB_MC_SCOPE_SUBNET_LOCAL,
3407             IB_MC_SCOPE_SITE_LOCAL, IB_MC_SCOPE_ORG_LOCAL,
3408             IB_MC_SCOPE_GLOBAL };
3409         int i, mcgmtu;
3410         boolean_t found = B_FALSE;
3411         int ret;
3412         ibt_mcg_info_t mcg_info;
3413 
3414         state->id_bgroup_created = B_FALSE;
3415         state->id_bgroup_present = B_FALSE;
3416 
3417 query_bcast_grp:
3418         bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
3419         mcg_attr.mc_pkey = state->id_pkey;
3420         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_mgid))
3421         state->id_mgid.gid_guid = IB_MGID_IPV4_LOWGRP_MASK;
3422         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_mgid))
3423 
3424         for (i = 0; i < sizeof (scopes)/sizeof (scopes[0]); i++) {
3425                 state->id_scope = mcg_attr.mc_scope = scopes[i];
3426 
3427                 /*
3428                  * Look for the IPoIB broadcast group.
3429                  */
3430                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_mgid))
3431                 state->id_mgid.gid_prefix =
3432                     (((uint64_t)IB_MCGID_IPV4_PREFIX << 32) |
3433                     ((uint64_t)state->id_scope << 48) |
3434                     ((uint32_t)(state->id_pkey << 16)));
3435                 mcg_attr.mc_mgid = state->id_mgid;
3436                 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_mgid))
3437                 if (ibt_query_mcg(state->id_sgid, &mcg_attr, 1,
3438                     &state->id_mcinfo, &numg) == IBT_SUCCESS) {
3439                         found = B_TRUE;
3440                         break;
3441                 }
3442         }
3443 
3444         if (!found) {
3445                 if (state->id_create_broadcast_group) {
3446                         /*
3447                          * If we created the broadcast group, but failed to
3448                          * find it, we can't do anything except leave the
3449                          * one we created and return failure.
3450                          */
3451                         if (state->id_bgroup_created) {
3452                                 ibd_print_warn(state, "IPoIB broadcast group "
3453                                     "absent. Unable to query after create.");
3454                                 goto find_bgroup_fail;
3455                         }
3456 
3457                         /*
3458                          * Create the ipoib broadcast group if it didn't exist
3459                          */
3460                         bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
3461                         mcg_attr.mc_qkey = IBD_DEFAULT_QKEY;
3462                         mcg_attr.mc_join_state = IB_MC_JSTATE_FULL;
3463                         mcg_attr.mc_scope = IB_MC_SCOPE_SUBNET_LOCAL;
3464                         mcg_attr.mc_pkey = state->id_pkey;
3465                         mcg_attr.mc_flow = 0;
3466                         mcg_attr.mc_sl = 0;
3467                         mcg_attr.mc_tclass = 0;
3468                         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_mgid))
3469                         state->id_mgid.gid_prefix =
3470                             (((uint64_t)IB_MCGID_IPV4_PREFIX << 32) |
3471                             ((uint64_t)IB_MC_SCOPE_SUBNET_LOCAL << 48) |
3472                             ((uint32_t)(state->id_pkey << 16)));
3473                         mcg_attr.mc_mgid = state->id_mgid;
3474                         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_mgid))
3475 
3476                         if ((ret = ibt_join_mcg(state->id_sgid, &mcg_attr,
3477                             &mcg_info, NULL, NULL)) != IBT_SUCCESS) {
3478                                 ibd_print_warn(state, "IPoIB broadcast group "
3479                                     "absent, create failed: ret = %d\n", ret);
3480                                 state->id_bgroup_created = B_FALSE;
3481                                 return (IBT_FAILURE);
3482                         }
3483                         state->id_bgroup_created = B_TRUE;
3484                         goto query_bcast_grp;
3485                 } else {
3486                         ibd_print_warn(state, "IPoIB broadcast group absent");
3487                         return (IBT_FAILURE);
3488                 }
3489         }
3490 
3491         /*
3492          * Assert that the mcg mtu <= id_mtu. Fill in updated id_mtu.
3493          */
3494         mcgmtu = (128 << state->id_mcinfo->mc_mtu);


4336  * on a kernel thread (handling can thus block) and can be invoked
4337  * concurrently. The handler can be invoked anytime after it is
4338  * registered and before ibt_detach().
4339  */
4340 /* ARGSUSED */
4341 static void
4342 ibd_snet_notices_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t code,
4343     ibt_subnet_event_t *event)
4344 {
4345         ibd_state_t *state = (ibd_state_t *)arg;
4346         ibd_req_t *req;
4347 
4348         /*
4349          * The trap handler will get invoked once for every event for
4350          * every port. The input "gid" is the GID0 of the port the
4351          * trap came in on; we just need to act on traps that came
4352          * to our port, meaning the port on which the ipoib interface
4353          * resides. Since ipoib uses GID0 of the port, we just match
4354          * the gids to check whether we need to handle the trap.
4355          */
4356         _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_sgid))
4357         if (bcmp(&gid, &state->id_sgid, sizeof (ib_gid_t)) != 0)
4358                 return;
4359         _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_sgid))
4360 
4361         DPRINT(10, "ibd_notices_handler : %d\n", code);
4362 
4363         switch (code) {
4364                 case IBT_SM_EVENT_UNAVAILABLE:
4365                         /*
4366                          * If we are in promiscuous mode or have
4367                          * sendnonmembers, we need to print a warning
4368                          * message right now. Else, just store the
4369                          * information, print when we enter promiscuous
4370                          * mode or attempt nonmember send. We might
4371                          * also want to stop caching sendnonmember.
4372                          */
4373                         ibd_print_warn(state, "IBA multicast support "
4374                             "degraded due to unavailability of multicast "
4375                             "traps");
4376                         break;
4377                 case IBT_SM_EVENT_AVAILABLE:
4378                         /*
4379                          * If we printed a warning message above or


5326         ret = ibt_query_hca_ports(state->id_hca_hdl, state->id_port,
5327             &port_infop, &psize, &port_infosz);
5328         if ((ret != IBT_SUCCESS) || (psize != 1)) {
5329                 mutex_exit(&state->id_link_mutex);
5330                 DPRINT(10, "ibd_get_port_details: ibt_query_hca_ports() "
5331                     "failed, ret=%d", ret);
5332                 return (ENETDOWN);
5333         }
5334 
5335         /*
5336          * If the link is active, verify the pkey
5337          */
5338         if (port_infop->p_linkstate == IBT_PORT_ACTIVE) {
5339                 if ((ret = ibt_pkey2index(state->id_hca_hdl, state->id_port,
5340                     state->id_pkey, &state->id_pkix)) != IBT_SUCCESS) {
5341                         state->id_link_state = LINK_STATE_DOWN;
5342                 } else {
5343                         state->id_link_state = LINK_STATE_UP;
5344                 }
5345                 state->id_mtu = (128 << port_infop->p_mtu);
5346                 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_sgid))
5347                 state->id_sgid = *port_infop->p_sgid_tbl;
5348                 _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_sgid))
5349                 /*
5350                  * Now that the port is active, record the port speed
5351                  */
5352                 state->id_link_speed = ibd_get_portspeed(state);
5353         } else {
5354                 /* Make sure that these are handled in PORT_UP/CHANGE */
5355                 state->id_mtu = 0;
5356                 state->id_link_state = LINK_STATE_DOWN;
5357                 state->id_link_speed = 0;
5358         }
5359         mutex_exit(&state->id_link_mutex);
5360         ibt_free_portinfo(port_infop, port_infosz);
5361 
5362         return (0);
5363 }
5364 
5365 static int
5366 ibd_alloc_cqs(ibd_state_t *state)
5367 {
5368         ibt_hca_attr_t hca_attrs;




 602         int len;
 603         va_list ap;
 604         char part_name[MAXNAMELEN];
 605         datalink_id_t linkid = state->id_plinkid;
 606 
 607         hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, state->id_dip,
 608             0, "hca-guid", 0);
 609         (void) dls_mgmt_get_linkinfo(linkid, part_name, NULL, NULL, NULL);
 610         len = snprintf(ibd_print_buf, sizeof (ibd_print_buf),
 611             "%s%d: HCA GUID %016llx port %d PKEY %02x link %s ",
 612             ddi_driver_name(state->id_dip), ddi_get_instance(state->id_dip),
 613             (u_longlong_t)hca_guid, state->id_port, state->id_pkey,
 614             part_name);
 615         va_start(ap, fmt);
 616         (void) vsnprintf(ibd_print_buf + len, sizeof (ibd_print_buf) - len,
 617             fmt, ap);
 618         cmn_err(CE_NOTE, "!%s", ibd_print_buf);
 619         va_end(ap);
 620 }
 621 







































































































































































































































































































































































 622 int
 623 _init()
 624 {
 625         int status;
 626 
 627         status = ddi_soft_state_init(&ibd_list, max(sizeof (ibd_state_t),
 628             PAGESIZE), 0);
 629         if (status != 0) {
 630                 DPRINT(10, "_init:failed in ddi_soft_state_init()");
 631                 return (status);
 632         }
 633 
 634         mutex_init(&ibd_objlist_lock, NULL, MUTEX_DRIVER, NULL);
 635 
 636         mac_init_ops(&ibd_dev_ops, "ibp");
 637         status = mod_install(&ibd_modlinkage);
 638         if (status != 0) {
 639                 DPRINT(10, "_init:failed in mod_install()");
 640                 ddi_soft_state_fini(&ibd_list);
 641                 mac_fini_ops(&ibd_dev_ops);


 887                                             ptr->rq_ptr, ptr->rq_gid,
 888                                             IB_MC_JSTATE_FULL);
 889                                         /*
 890                                          * the req buf contains in mce
 891                                          * structure, so we do not need
 892                                          * to free it here.
 893                                          */
 894                                         ptr = NULL;
 895                                         break;
 896                                 case IBD_ASYNC_TRAP:
 897                                         ibd_async_trap(state, ptr);
 898                                         break;
 899                                 case IBD_ASYNC_SCHED:
 900                                         ibd_async_txsched(state);
 901                                         break;
 902                                 case IBD_ASYNC_LINK:
 903                                         ibd_async_link(state, ptr);
 904                                         break;
 905                                 case IBD_ASYNC_EXIT:
 906                                         mutex_enter(&state->id_acache_req_lock);

 907                                         CALLB_CPR_EXIT(&cprinfo);



 908                                         return;
 909                                 case IBD_ASYNC_RC_TOO_BIG:
 910                                         ibd_async_rc_process_too_big(state,
 911                                             ptr);
 912                                         break;
 913                                 case IBD_ASYNC_RC_CLOSE_ACT_CHAN:
 914                                         ibd_async_rc_close_act_chan(state, ptr);
 915                                         break;
 916                                 case IBD_ASYNC_RC_RECYCLE_ACE:
 917                                         ibd_async_rc_recycle_ace(state, ptr);
 918                                         break;
 919                                 case IBD_ASYNC_RC_CLOSE_PAS_CHAN:
 920                                         (void) ibd_rc_pas_close(ptr->rq_ptr,
 921                                             B_TRUE, B_TRUE);
 922                                         break;
 923                         }
 924 free_req_and_continue:
 925                         if (ptr != NULL)
 926                                 kmem_cache_free(state->id_req_kmc, ptr);
 927 
 928                         mutex_enter(&state->id_acache_req_lock);
 929                 } else {

 930                         /*
 931                          * Nothing to do: wait till new request arrives.
 932                          */
 933                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
 934                         cv_wait(&state->id_acache_req_cv,
 935                             &state->id_acache_req_lock);
 936                         CALLB_CPR_SAFE_END(&cprinfo,
 937                             &state->id_acache_req_lock);

 938                 }
 939         }
 940 
 941         /*NOTREACHED*/
 942         _NOTE(NOT_REACHED)
 943 }
 944 
 945 /*
 946  * Return when it is safe to queue requests to the async daemon; primarily
 947  * for subnet trap and async event handling. Disallow requests before the
 948  * daemon is created, and when interface deinitilization starts.
 949  */
 950 static boolean_t
 951 ibd_async_safe(ibd_state_t *state)
 952 {
 953         mutex_enter(&state->id_trap_lock);
 954         if (state->id_trap_stop) {
 955                 mutex_exit(&state->id_trap_lock);
 956                 return (B_FALSE);
 957         }


1564  */
1565 static void
1566 ibd_async_link(ibd_state_t *state, ibd_req_t *req)
1567 {
1568         ibd_link_op_t opcode = (ibd_link_op_t)req->rq_ptr;
1569         link_state_t lstate = (opcode == IBD_LINK_DOWN) ? LINK_STATE_DOWN :
1570             LINK_STATE_UP;
1571         ibd_mce_t *mce, *pmce;
1572         ibd_ace_t *ace, *pace;
1573 
1574         DPRINT(10, "ibd_async_link(): %d", opcode);
1575 
1576         /*
1577          * On a link up, revalidate the link speed/width. No point doing
1578          * this on a link down, since we will be unable to do SA operations,
1579          * defaulting to the lowest speed. Also notice that we update our
1580          * notion of speed before calling mac_link_update(), which will do
1581          * necessary higher level notifications for speed changes.
1582          */
1583         if ((opcode == IBD_LINK_UP_ABSENT) || (opcode == IBD_LINK_UP)) {

1584                 state->id_link_speed = ibd_get_portspeed(state);

1585         }
1586 
1587         /*
1588          * Do all the work required to establish our presence on
1589          * the subnet.
1590          */
1591         if (opcode == IBD_LINK_UP_ABSENT) {
1592                 /*
1593                  * If in promiscuous mode ...
1594                  */
1595                 if (state->id_prom_op == IBD_OP_COMPLETED) {
1596                         /*
1597                          * Drop all nonmembership.
1598                          */
1599                         ibd_async_unsetprom(state);
1600 
1601                         /*
1602                          * Then, try to regain nonmembership to all mcg's.
1603                          */
1604                         ibd_async_setprom(state);


1816                 }
1817                 ibt_free_portinfo(port_infop, port_infosz);
1818                 goto link_mod_return;
1819         }
1820 
1821         /*
1822          * Check the SM InitTypeReply flags. If both NoLoadReply and
1823          * PreserveContentReply are 0, we don't know anything about the
1824          * data loaded into the port attributes, so we need to verify
1825          * if gid0 and pkey are still valid.
1826          */
1827         itreply = port_infop->p_init_type_reply;
1828         if (((itreply & SM_INIT_TYPE_REPLY_NO_LOAD_REPLY) == 0) &&
1829             ((itreply & SM_INIT_TYPE_PRESERVE_CONTENT_REPLY) == 0)) {
1830                 /*
1831                  * Check to see if the subnet part of GID0 has changed. If
1832                  * not, check the simple case first to see if the pkey
1833                  * index is the same as before; finally check to see if the
1834                  * pkey has been relocated to a different index in the table.
1835                  */

1836                 if (bcmp(port_infop->p_sgid_tbl,
1837                     &state->id_sgid, sizeof (ib_gid_t)) != 0) {
1838 
1839                         new_link_state = LINK_STATE_DOWN;
1840 
1841                 } else if (port_infop->p_pkey_tbl[state->id_pkix] ==
1842                     state->id_pkey) {
1843 
1844                         new_link_state = LINK_STATE_UP;
1845 
1846                 } else if (ibd_locate_pkey(port_infop->p_pkey_tbl,
1847                     port_infop->p_pkey_tbl_sz, state->id_pkey, &pkix) == 0) {
1848 
1849                         ibt_free_portinfo(port_infop, port_infosz);
1850                         mutex_exit(&state->id_link_mutex);
1851 
1852                         /*
1853                          * Currently a restart is required if our pkey has moved
1854                          * in the pkey table. If we get the ibt_recycle_ud() to
1855                          * work as documented (expected), we may be able to
1856                          * avoid a complete restart.  Note that we've already
1857                          * marked both the start and stop 'in-progress' flags,
1858                          * so it is ok to go ahead and do this restart.
1859                          */
1860                         (void) ibd_undo_start(state, LINK_STATE_DOWN);
1861                         if ((ret = ibd_start(state)) != 0) {
1862                                 DPRINT(10, "ibd_restart: cannot restart, "
1863                                     "ret=%d", ret);
1864                         }
1865 
1866                         goto link_mod_return;
1867                 } else {
1868                         new_link_state = LINK_STATE_DOWN;
1869                 }

1870         }
1871 
1872 update_link_state:
1873         if (port_infop) {
1874                 ibt_free_portinfo(port_infop, port_infosz);
1875         }
1876 
1877         /*
1878          * If we're reporting a link up, check InitTypeReply to see if
1879          * the SM has ensured that the port's presence in mcg, traps,
1880          * etc. is intact.
1881          */
1882         if (new_link_state == LINK_STATE_DOWN) {
1883                 opcode = IBD_LINK_DOWN;
1884         } else {
1885                 if ((itreply & SM_INIT_TYPE_PRESERVE_PRESENCE_REPLY) ==
1886                     SM_INIT_TYPE_PRESERVE_PRESENCE_REPLY) {
1887                         opcode = IBD_LINK_UP;
1888                 } else {
1889                         opcode = IBD_LINK_UP_ABSENT;


2036         /*
2037          *  Register ourselves with the GLDv3 interface
2038          */
2039         if ((ret = mac_register(macp, &state->id_mh)) != 0) {
2040                 mac_free(macp);
2041                 DPRINT(10,
2042                     "ibd_register_mac: mac_register() failed, ret=%d", ret);
2043                 return (DDI_FAILURE);
2044         }
2045 
2046         mac_free(macp);
2047         return (DDI_SUCCESS);
2048 }
2049 
2050 static int
2051 ibd_record_capab(ibd_state_t *state)
2052 {
2053         ibt_hca_attr_t hca_attrs;
2054         ibt_status_t ibt_status;
2055 


2056         /*
2057          * Query the HCA and fetch its attributes
2058          */
2059         ibt_status = ibt_query_hca(state->id_hca_hdl, &hca_attrs);
2060         ASSERT(ibt_status == IBT_SUCCESS);
2061 
2062         /*
2063          * 1. Set the Hardware Checksum capability. Currently we only consider
2064          *    full checksum offload.
2065          */
2066         if (state->id_enable_rc) {
2067                         state->id_hwcksum_capab = 0;
2068         } else {
2069                 if ((hca_attrs.hca_flags & IBT_HCA_CKSUM_FULL)
2070                     == IBT_HCA_CKSUM_FULL) {
2071                         state->id_hwcksum_capab = IBT_HCA_CKSUM_FULL;
2072                 }
2073         }
2074 
2075         /*


2138         state->rc_max_sqseg_hiwm = (state->rc_tx_max_sqseg * 65) / 100;
2139 
2140         /*
2141          * 5. Set number of recv and send wqes after checking hca maximum
2142          *    channel size. Store the max channel size in the state so that it
2143          *    can be referred to when the swqe/rwqe change is requested via
2144          *    dladm.
2145          */
2146 
2147         state->id_hca_max_chan_sz = hca_attrs.hca_max_chan_sz;
2148 
2149         if (hca_attrs.hca_max_chan_sz < state->id_ud_num_rwqe)
2150                 state->id_ud_num_rwqe = hca_attrs.hca_max_chan_sz;
2151 
2152         state->id_rx_bufs_outstanding_limit = state->id_ud_num_rwqe -
2153             IBD_RWQE_MIN;
2154 
2155         if (hca_attrs.hca_max_chan_sz < state->id_ud_num_swqe)
2156                 state->id_ud_num_swqe = hca_attrs.hca_max_chan_sz;
2157 


2158         return (DDI_SUCCESS);
2159 }
2160 
2161 static int
2162 ibd_part_busy(ibd_state_t *state)
2163 {
2164         if (atomic_add_32_nv(&state->id_rx_list.dl_bufs_outstanding, 0) != 0) {
2165                 DPRINT(10, "ibd_part_busy: failed: rx bufs outstanding\n");
2166                 return (DDI_FAILURE);
2167         }
2168 
2169         if (state->rc_srq_rwqe_list.dl_bufs_outstanding != 0) {
2170                 DPRINT(10, "ibd_part_busy: failed: srq bufs outstanding\n");
2171                 return (DDI_FAILURE);
2172         }
2173 
2174         /*
2175          * "state->id_ah_op == IBD_OP_ONGOING" means this IPoIB port is
2176          * connecting to a remote IPoIB port. We can't remove this port.
2177          */


3027  */
3028 static ibt_status_t
3029 ibd_find_bgroup(ibd_state_t *state)
3030 {
3031         ibt_mcg_attr_t mcg_attr;
3032         uint_t numg;
3033         uchar_t scopes[] = { IB_MC_SCOPE_SUBNET_LOCAL,
3034             IB_MC_SCOPE_SITE_LOCAL, IB_MC_SCOPE_ORG_LOCAL,
3035             IB_MC_SCOPE_GLOBAL };
3036         int i, mcgmtu;
3037         boolean_t found = B_FALSE;
3038         int ret;
3039         ibt_mcg_info_t mcg_info;
3040 
3041         state->id_bgroup_created = B_FALSE;
3042         state->id_bgroup_present = B_FALSE;
3043 
3044 query_bcast_grp:
3045         bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
3046         mcg_attr.mc_pkey = state->id_pkey;

3047         state->id_mgid.gid_guid = IB_MGID_IPV4_LOWGRP_MASK;

3048 
3049         for (i = 0; i < sizeof (scopes)/sizeof (scopes[0]); i++) {
3050                 state->id_scope = mcg_attr.mc_scope = scopes[i];
3051 
3052                 /*
3053                  * Look for the IPoIB broadcast group.
3054                  */

3055                 state->id_mgid.gid_prefix =
3056                     (((uint64_t)IB_MCGID_IPV4_PREFIX << 32) |
3057                     ((uint64_t)state->id_scope << 48) |
3058                     ((uint32_t)(state->id_pkey << 16)));
3059                 mcg_attr.mc_mgid = state->id_mgid;

3060                 if (ibt_query_mcg(state->id_sgid, &mcg_attr, 1,
3061                     &state->id_mcinfo, &numg) == IBT_SUCCESS) {
3062                         found = B_TRUE;
3063                         break;
3064                 }
3065         }
3066 
3067         if (!found) {
3068                 if (state->id_create_broadcast_group) {
3069                         /*
3070                          * If we created the broadcast group, but failed to
3071                          * find it, we can't do anything except leave the
3072                          * one we created and return failure.
3073                          */
3074                         if (state->id_bgroup_created) {
3075                                 ibd_print_warn(state, "IPoIB broadcast group "
3076                                     "absent. Unable to query after create.");
3077                                 goto find_bgroup_fail;
3078                         }
3079 
3080                         /*
3081                          * Create the ipoib broadcast group if it didn't exist
3082                          */
3083                         bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
3084                         mcg_attr.mc_qkey = IBD_DEFAULT_QKEY;
3085                         mcg_attr.mc_join_state = IB_MC_JSTATE_FULL;
3086                         mcg_attr.mc_scope = IB_MC_SCOPE_SUBNET_LOCAL;
3087                         mcg_attr.mc_pkey = state->id_pkey;
3088                         mcg_attr.mc_flow = 0;
3089                         mcg_attr.mc_sl = 0;
3090                         mcg_attr.mc_tclass = 0;

3091                         state->id_mgid.gid_prefix =
3092                             (((uint64_t)IB_MCGID_IPV4_PREFIX << 32) |
3093                             ((uint64_t)IB_MC_SCOPE_SUBNET_LOCAL << 48) |
3094                             ((uint32_t)(state->id_pkey << 16)));
3095                         mcg_attr.mc_mgid = state->id_mgid;

3096 
3097                         if ((ret = ibt_join_mcg(state->id_sgid, &mcg_attr,
3098                             &mcg_info, NULL, NULL)) != IBT_SUCCESS) {
3099                                 ibd_print_warn(state, "IPoIB broadcast group "
3100                                     "absent, create failed: ret = %d\n", ret);
3101                                 state->id_bgroup_created = B_FALSE;
3102                                 return (IBT_FAILURE);
3103                         }
3104                         state->id_bgroup_created = B_TRUE;
3105                         goto query_bcast_grp;
3106                 } else {
3107                         ibd_print_warn(state, "IPoIB broadcast group absent");
3108                         return (IBT_FAILURE);
3109                 }
3110         }
3111 
3112         /*
3113          * Assert that the mcg mtu <= id_mtu. Fill in updated id_mtu.
3114          */
3115         mcgmtu = (128 << state->id_mcinfo->mc_mtu);


3957  * on a kernel thread (handling can thus block) and can be invoked
3958  * concurrently. The handler can be invoked anytime after it is
3959  * registered and before ibt_detach().
3960  */
3961 /* ARGSUSED */
3962 static void
3963 ibd_snet_notices_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t code,
3964     ibt_subnet_event_t *event)
3965 {
3966         ibd_state_t *state = (ibd_state_t *)arg;
3967         ibd_req_t *req;
3968 
3969         /*
3970          * The trap handler will get invoked once for every event for
3971          * every port. The input "gid" is the GID0 of the port the
3972          * trap came in on; we just need to act on traps that came
3973          * to our port, meaning the port on which the ipoib interface
3974          * resides. Since ipoib uses GID0 of the port, we just match
3975          * the gids to check whether we need to handle the trap.
3976          */

3977         if (bcmp(&gid, &state->id_sgid, sizeof (ib_gid_t)) != 0)
3978                 return;

3979 
3980         DPRINT(10, "ibd_notices_handler : %d\n", code);
3981 
3982         switch (code) {
3983                 case IBT_SM_EVENT_UNAVAILABLE:
3984                         /*
3985                          * If we are in promiscuous mode or have
3986                          * sendnonmembers, we need to print a warning
3987                          * message right now. Else, just store the
3988                          * information, print when we enter promiscuous
3989                          * mode or attempt nonmember send. We might
3990                          * also want to stop caching sendnonmember.
3991                          */
3992                         ibd_print_warn(state, "IBA multicast support "
3993                             "degraded due to unavailability of multicast "
3994                             "traps");
3995                         break;
3996                 case IBT_SM_EVENT_AVAILABLE:
3997                         /*
3998                          * If we printed a warning message above or


4945         ret = ibt_query_hca_ports(state->id_hca_hdl, state->id_port,
4946             &port_infop, &psize, &port_infosz);
4947         if ((ret != IBT_SUCCESS) || (psize != 1)) {
4948                 mutex_exit(&state->id_link_mutex);
4949                 DPRINT(10, "ibd_get_port_details: ibt_query_hca_ports() "
4950                     "failed, ret=%d", ret);
4951                 return (ENETDOWN);
4952         }
4953 
4954         /*
4955          * If the link is active, verify the pkey
4956          */
4957         if (port_infop->p_linkstate == IBT_PORT_ACTIVE) {
4958                 if ((ret = ibt_pkey2index(state->id_hca_hdl, state->id_port,
4959                     state->id_pkey, &state->id_pkix)) != IBT_SUCCESS) {
4960                         state->id_link_state = LINK_STATE_DOWN;
4961                 } else {
4962                         state->id_link_state = LINK_STATE_UP;
4963                 }
4964                 state->id_mtu = (128 << port_infop->p_mtu);

4965                 state->id_sgid = *port_infop->p_sgid_tbl;

4966                 /*
4967                  * Now that the port is active, record the port speed
4968                  */
4969                 state->id_link_speed = ibd_get_portspeed(state);
4970         } else {
4971                 /* Make sure that these are handled in PORT_UP/CHANGE */
4972                 state->id_mtu = 0;
4973                 state->id_link_state = LINK_STATE_DOWN;
4974                 state->id_link_speed = 0;
4975         }
4976         mutex_exit(&state->id_link_mutex);
4977         ibt_free_portinfo(port_infop, port_infosz);
4978 
4979         return (0);
4980 }
4981 
4982 static int
4983 ibd_alloc_cqs(ibd_state_t *state)
4984 {
4985         ibt_hca_attr_t hca_attrs;