1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * The Ethernet Over Infiniband driver
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/conf.h>
  32 #include <sys/devops.h>
  33 #include <sys/kmem.h>
  34 #include <sys/ksynch.h>
  35 #include <sys/modctl.h>
  36 #include <sys/stat.h>
  37 #include <sys/ddi.h>
  38 #include <sys/sunddi.h>
  39 
  40 #include <sys/mac_provider.h>
  41 #include <sys/mac_ether.h>
  42 
  43 #include <sys/ib/clients/eoib/eib_impl.h>
  44 
  45 /*
  46  * Driver entry point declarations
  47  */
  48 static int eib_attach(dev_info_t *, ddi_attach_cmd_t);
  49 static int eib_detach(dev_info_t *, ddi_detach_cmd_t);
  50 
  51 /*
  52  * MAC callbacks
  53  */
  54 static int eib_m_stat(void *, uint_t, uint64_t *);
  55 static int eib_m_start(void *);
  56 static void eib_m_stop(void *);
  57 static int eib_m_promisc(void *, boolean_t);
  58 static int eib_m_multicast(void *, boolean_t, const uint8_t *);
  59 static int eib_m_unicast(void *, const uint8_t *);
  60 static mblk_t *eib_m_tx(void *, mblk_t *);
  61 static boolean_t eib_m_getcapab(void *, mac_capab_t, void *);
  62 static int eib_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
  63     const void *);
  64 static int eib_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *);
  65 static void eib_m_propinfo(void *, const char *, mac_prop_id_t,
  66     mac_prop_info_handle_t);
  67 
  68 /*
  69  * Devops definition
  70  */
  71 DDI_DEFINE_STREAM_OPS(eib_ops, nulldev, nulldev, eib_attach, eib_detach,
  72     nodev, NULL, D_MP, NULL, ddi_quiesce_not_needed);
  73 
  74 /*
  75  * Module Driver Info
  76  */
  77 static struct modldrv eib_modldrv = {
  78         &mod_driverops,             /* Driver module */
  79         "EoIB Driver",          /* Driver name and version */
  80         &eib_ops,           /* Driver ops */
  81 };
  82 
  83 /*
  84  * Module Linkage
  85  */
  86 static struct modlinkage eib_modlinkage = {
  87         MODREV_1, (void *)&eib_modldrv, NULL
  88 };
  89 
  90 /*
  91  * GLDv3 entry points
  92  */
  93 #define EIB_M_CALLBACK_FLAGS    \
  94         (MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO)
  95 static mac_callbacks_t eib_m_callbacks = {
  96         EIB_M_CALLBACK_FLAGS,
  97         eib_m_stat,
  98         eib_m_start,
  99         eib_m_stop,
 100         eib_m_promisc,
 101         eib_m_multicast,
 102         eib_m_unicast,
 103         eib_m_tx,
 104         NULL,
 105         NULL,
 106         eib_m_getcapab,
 107         NULL,
 108         NULL,
 109         eib_m_setprop,
 110         eib_m_getprop,
 111         eib_m_propinfo
 112 };
 113 
 114 /*
 115  * Async handler callback for ibt events
 116  */
 117 static ibt_clnt_modinfo_t eib_clnt_modinfo = {
 118         IBTI_V_CURR,
 119         IBT_NETWORK,
 120         eib_ibt_async_handler,
 121         NULL,
 122         EIB_DRV_NAME
 123 };
 124 
 125 /*
 126  * Driver State Pointer
 127  */
 128 void *eib_state;
 129 
 130 /*
 131  * Declarations private to this file
 132  */
 133 static int eib_state_init(eib_t *);
 134 static int eib_add_event_callbacks(eib_t *);
 135 static int eib_register_with_mac(eib_t *, dev_info_t *);
 136 static void eib_rb_attach(eib_t *, uint_t);
 137 static void eib_rb_state_init(eib_t *);
 138 static void eib_rb_add_event_callbacks(eib_t *);
 139 static void eib_rb_register_with_mac(eib_t *);
 140 
 141 /*
 142  * Definitions private to this file
 143  */
 144 #define EIB_ATTACH_STATE_ALLOCD         0x01
 145 #define EIB_ATTACH_PROPS_PARSED         0x02
 146 #define EIB_ATTACH_STATE_INIT_DONE      0x04
 147 #define EIB_ATTACH_IBT_ATT_DONE         0x08
 148 #define EIB_ATTACH_EV_CBS_ADDED         0x10
 149 #define EIB_ATTACH_REGISTER_MAC_DONE    0x20
 150 
 151 int
 152 _init()
 153 {
 154         int ret;
 155 
 156         if (ddi_name_to_major(EIB_DRV_NAME) == (major_t)-1)
 157                 return (ENODEV);
 158 
 159         if ((ret = ddi_soft_state_init(&eib_state, sizeof (eib_t), 0)) != 0)
 160                 return (ret);
 161 
 162         mac_init_ops(&eib_ops, EIB_DRV_NAME);
 163         if ((ret = mod_install(&eib_modlinkage)) != 0) {
 164                 mac_fini_ops(&eib_ops);
 165                 ddi_soft_state_fini(&eib_state);
 166                 return (ret);
 167         }
 168 
 169         eib_debug_init();
 170 
 171         return (ret);
 172 }
 173 
 174 int
 175 _info(struct modinfo *modinfop)
 176 {
 177         return (mod_info(&eib_modlinkage, modinfop));
 178 }
 179 
 180 int
 181 _fini()
 182 {
 183         int ret;
 184 
 185         if ((ret = mod_remove(&eib_modlinkage)) != 0)
 186                 return (ret);
 187 
 188         eib_debug_fini();
 189 
 190         mac_fini_ops(&eib_ops);
 191         ddi_soft_state_fini(&eib_state);
 192 
 193         return (ret);
 194 }
 195 
 196 static int
 197 eib_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 198 {
 199         eib_t *ss;
 200         ibt_status_t ret;
 201         int instance;
 202         uint_t progress = 0;
 203 
 204         if (cmd != DDI_ATTACH)
 205                 return (DDI_FAILURE);
 206 
 207         /*
 208          * Allocate softstate for this instance
 209          */
 210         instance = ddi_get_instance(dip);
 211         if (ddi_soft_state_zalloc(eib_state, instance) == DDI_FAILURE)
 212                 goto attach_fail;
 213 
 214         progress |= EIB_ATTACH_STATE_ALLOCD;
 215 
 216         ss = ddi_get_soft_state(eib_state, instance);
 217         ss->ei_dip = dip;
 218         ss->ei_instance = (uint_t)instance;
 219 
 220         /*
 221          * Parse the node properties and get the gateway parameters
 222          * for this instance
 223          */
 224         if (eib_get_props(ss) != EIB_E_SUCCESS) {
 225                 EIB_DPRINTF_ERR(ss->ei_instance,
 226                     "eib_attach: eib_get_props() failed");
 227                 goto attach_fail;
 228         }
 229         progress |= EIB_ATTACH_PROPS_PARSED;
 230 
 231         /*
 232          * Do per-state initialization
 233          */
 234         if (eib_state_init(ss) != EIB_E_SUCCESS) {
 235                 EIB_DPRINTF_ERR(ss->ei_instance,
 236                     "eib_attach: eib_state_init() failed");
 237                 goto attach_fail;
 238         }
 239         progress |= EIB_ATTACH_STATE_INIT_DONE;
 240 
 241         /*
 242          * Attach to IBTL
 243          */
 244         if ((ret = ibt_attach(&eib_clnt_modinfo, ss->ei_dip, ss,
 245             &ss->ei_ibt_hdl)) != IBT_SUCCESS) {
 246                 EIB_DPRINTF_ERR(ss->ei_instance,
 247                     "eib_attach: ibt_attach() failed, ret=%d", ret);
 248                 goto attach_fail;
 249         }
 250         progress |= EIB_ATTACH_IBT_ATT_DONE;
 251 
 252         /*
 253          * Register NDI event callbacks with EoIB nexus
 254          */
 255         if (eib_add_event_callbacks(ss) != EIB_E_SUCCESS) {
 256                 EIB_DPRINTF_ERR(ss->ei_instance,
 257                     "eib_attach: eib_add_event_callbacks() failed");
 258                 goto attach_fail;
 259         }
 260         progress |= EIB_ATTACH_EV_CBS_ADDED;
 261 
 262         /*
 263          * Register with mac layer
 264          */
 265         if (eib_register_with_mac(ss, dip) != EIB_E_SUCCESS) {
 266                 EIB_DPRINTF_ERR(ss->ei_instance,
 267                     "eib_attach: eib_register_with_mac() failed");
 268                 goto attach_fail;
 269         }
 270         progress |= EIB_ATTACH_REGISTER_MAC_DONE;
 271 
 272         return (DDI_SUCCESS);
 273 
 274 attach_fail:
 275         eib_rb_attach(ss, progress);
 276         return (DDI_FAILURE);
 277 }
 278 
 279 static int
 280 eib_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 281 {
 282         eib_t *ss;
 283         int instance;
 284 
 285         if (cmd != DDI_DETACH)
 286                 return (DDI_FAILURE);
 287 
 288         instance = ddi_get_instance(dip);
 289         ss = ddi_get_soft_state(eib_state, instance);
 290 
 291         /*
 292          * If we had not cleaned up rx buffers (and hca resources) during
 293          * unplumb because they were stuck with the nw layer at the time,
 294          * we can try to clean them up now before doing the detach.
 295          */
 296         eib_mac_set_nic_state(ss, EIB_NIC_STOPPING);
 297 
 298         eib_rb_rsrc_setup_bufs(ss, B_FALSE);
 299         if (ss->ei_tx || ss->ei_rx || ss->ei_lso) {
 300                 EIB_DPRINTF_WARN(ss->ei_instance,
 301                     "eib_detach: buffers still not returned "
 302                     "(tx=0x%llx, rx=0x%llx, lso=0x%llx), could "
 303                     "not detach", ss->ei_tx, ss->ei_rx, ss->ei_lso);
 304                 eib_mac_clr_nic_state(ss, EIB_NIC_STOPPING);
 305                 return (DDI_FAILURE);
 306         }
 307         if (ss->ei_hca_hdl) {
 308                 eib_rb_ibt_hca_init(ss, ~0);
 309         }
 310         eib_mac_clr_nic_state(ss, EIB_NIC_STOPPING);
 311 
 312         eib_rb_attach(ss, ~0);
 313 
 314         return (DDI_SUCCESS);
 315 }
 316 
 317 static int
 318 eib_m_stat(void *arg, uint_t stat, uint64_t *val)
 319 {
 320         eib_t *ss = arg;
 321         eib_stats_t *stats = ss->ei_stats;
 322 
 323         switch (stat) {
 324         case MAC_STAT_IFSPEED:
 325                 *val = ss->ei_props->ep_ifspeed;
 326                 break;
 327 
 328         case MAC_STAT_OBYTES:
 329                 *val = stats->st_obytes;
 330                 break;
 331 
 332         case MAC_STAT_OPACKETS:
 333                 *val = stats->st_opkts;
 334                 break;
 335 
 336         case MAC_STAT_BRDCSTXMT:
 337                 *val = stats->st_brdcstxmit;
 338                 break;
 339 
 340         case MAC_STAT_MULTIXMT:
 341                 *val = stats->st_multixmit;
 342                 break;
 343 
 344         case MAC_STAT_OERRORS:
 345                 *val = stats->st_oerrors;
 346                 break;
 347 
 348         case MAC_STAT_NOXMTBUF:
 349                 *val = stats->st_noxmitbuf;
 350                 break;
 351 
 352         case MAC_STAT_RBYTES:
 353                 *val = stats->st_rbytes;
 354                 break;
 355 
 356         case MAC_STAT_IPACKETS:
 357                 *val = stats->st_ipkts;
 358                 break;
 359 
 360         case MAC_STAT_BRDCSTRCV:
 361                 *val = stats->st_brdcstrcv;
 362                 break;
 363 
 364         case MAC_STAT_MULTIRCV:
 365                 *val = stats->st_multircv;
 366                 break;
 367 
 368         case MAC_STAT_IERRORS:
 369                 *val = stats->st_ierrors;
 370                 break;
 371 
 372         case MAC_STAT_NORCVBUF:
 373                 *val = stats->st_norcvbuf;
 374                 break;
 375 
 376         case ETHER_STAT_LINK_DUPLEX:
 377                 *val = LINK_DUPLEX_FULL;
 378                 break;
 379 
 380         default:
 381                 return (ENOTSUP);
 382         }
 383 
 384         return (0);
 385 }
 386 
 387 static int
 388 eib_m_start(void *arg)
 389 {
 390         eib_t *ss = arg;
 391         int ret = -1;
 392 
 393         eib_mac_set_nic_state(ss, EIB_NIC_STARTING);
 394 
 395         if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) == 0)
 396                 ret = eib_mac_start(ss);
 397 
 398         if (ret == 0)
 399                 eib_mac_upd_nic_state(ss, EIB_NIC_STARTING, EIB_NIC_STARTED);
 400         else
 401                 eib_mac_clr_nic_state(ss, EIB_NIC_STARTING);
 402 
 403         return (ret);
 404 }
 405 
 406 static void
 407 eib_m_stop(void *arg)
 408 {
 409         eib_t *ss = arg;
 410 
 411         eib_mac_set_nic_state(ss, EIB_NIC_STOPPING);
 412 
 413         if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) != 0)
 414                 eib_mac_stop(ss);
 415 
 416         eib_mac_clr_nic_state(ss, EIB_NIC_STARTED|EIB_NIC_STOPPING);
 417 }
 418 
 419 static int
 420 eib_m_promisc(void *arg, boolean_t flag)
 421 {
 422         eib_t *ss = arg;
 423 
 424         if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) == 0)
 425                 return (0);
 426 
 427         return (eib_mac_promisc(ss, flag));
 428 }
 429 
 430 static int
 431 eib_m_multicast(void *arg, boolean_t add, const uint8_t *mcast_mac)
 432 {
 433         eib_t *ss = arg;
 434 
 435         if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) == 0)
 436                 return (0);
 437 
 438         /*
 439          * We don't have any knowledge which of the vnics built on top of
 440          * the physlink is this multicast group relevant for.  We'll join
 441          * it for vnic0 for now.
 442          *
 443          * Since the tx routine in EoIB currently piggy backs all multicast
 444          * traffic over the broadcast channel, and all vnics are joined to
 445          * the broadcast address when they're created, everyone should receive
 446          * all multicast traffic anyway.
 447          *
 448          * On the rx side, we'll check if the incoming multicast address is
 449          * either on the vnic's list of mcgs joined to (which will only be the
 450          * broadcast address) or on vnic0's list of mcgs.  If we find a match,
 451          * we let the packet come through.
 452          *
 453          * This isn't perfect, but it's the best we can do given that we don't
 454          * have any vlan information corresponding to this multicast address.
 455          *
 456          * Also, for now we'll use the synchronous multicast joins and
 457          * leaves instead of the asynchronous mechanism provided by
 458          * ibt_join_mcg() since that involves additional complexity for failed
 459          * joins and removals.
 460          */
 461         return (eib_mac_multicast(ss, add, (uint8_t *)mcast_mac));
 462 }
 463 
 464 static int
 465 eib_m_unicast(void *arg, const uint8_t *macaddr)
 466 {
 467         eib_t *ss = arg;
 468         eib_vnic_t *vnic;
 469 
 470         if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) == 0)
 471                 return (0);
 472 
 473         mutex_enter(&ss->ei_vnic_lock);
 474 
 475         vnic = ss->ei_vnic[0];
 476         if (bcmp(macaddr, vnic->vn_login_data.ld_assigned_mac,
 477             ETHERADDRL) == 0) {
 478                 mutex_exit(&ss->ei_vnic_lock);
 479                 return (0);
 480         }
 481 
 482         mutex_exit(&ss->ei_vnic_lock);
 483 
 484         return (EINVAL);
 485 }
 486 
 487 static mblk_t *
 488 eib_m_tx(void *arg, mblk_t *mp)
 489 {
 490         eib_t *ss = arg;
 491         mblk_t *next;
 492 
 493         /*
 494          * If the nic hasn't been started, drop the message(s)
 495          */
 496         if ((ss->ei_node_state->ns_nic_state & EIB_NIC_STARTED) == 0) {
 497                 freemsgchain(mp);
 498                 return (NULL);
 499         }
 500 
 501         for (; mp != NULL; mp = next) {
 502                 /*
 503                  * Detach this message from the message chain
 504                  */
 505                 next = mp->b_next;
 506                 mp->b_next = NULL;
 507 
 508                 /*
 509                  * Attempt to send the message; if we fail (likely due
 510                  * to lack of resources), reattach this message to the
 511                  * chain and return the unsent chain back.  When we're
 512                  * ready to send again, we'll issue a mac_tx_update().
 513                  */
 514                 if (eib_mac_tx(ss, mp) != EIB_E_SUCCESS) {
 515                         mp->b_next = next;
 516                         break;
 517                 }
 518         }
 519 
 520         return (mp);
 521 }
 522 
 523 static boolean_t
 524 eib_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
 525 {
 526         eib_t *ss = arg;
 527         eib_caps_t *caps = ss->ei_caps;
 528         eib_caps_t s_caps;
 529         ibt_hca_attr_t hca_attrs;
 530         ibt_status_t ret;
 531 
 532         /*
 533          * If we haven't been plumbed yet, try getting the hca attributes
 534          * and figure out the capabilities now
 535          */
 536         if (caps == NULL) {
 537                 ASSERT(ss->ei_props != NULL);
 538 
 539                 ret = ibt_query_hca_byguid(ss->ei_props->ep_hca_guid,
 540                     &hca_attrs);
 541                 if (ret == IBT_SUCCESS) {
 542                         eib_ibt_record_capab(ss, &hca_attrs, &s_caps);
 543                         caps = &s_caps;
 544                 }
 545         }
 546 
 547         if ((caps != NULL) && (cap == MAC_CAPAB_HCKSUM)) {
 548                 uint32_t *tx_flags = cap_data;
 549 
 550                 if (caps->cp_cksum_flags == 0) {
 551                         EIB_DPRINTF_VERBOSE(ss->ei_instance,
 552                             "eib_m_getcapab: hw cksum disabled, cksum_flags=0");
 553                         return (B_FALSE);
 554                 }
 555 
 556                 *tx_flags = caps->cp_cksum_flags;
 557 
 558                 return (B_TRUE);
 559 
 560         } else if ((caps != NULL) && (cap == MAC_CAPAB_LSO)) {
 561                 mac_capab_lso_t *cap_lso = cap_data;
 562 
 563                 /*
 564                  * If the HCA supports LSO, it will advertise a non-zero
 565                  * "max lso size" parameter. Also, LSO relies on hw
 566                  * checksum being available.  Finally, if the HCA
 567                  * doesn't provide the reserved-lkey capability, LSO
 568                  * will adversely affect the performance.  So, we'll
 569                  * enable LSO only if we have a non-zero max lso size,
 570                  * support checksum offload and provide reserved lkey.
 571                  */
 572                 if (caps->cp_lso_maxlen == 0 ||
 573                     caps->cp_cksum_flags == 0 ||
 574                     caps->cp_resv_lkey_capab == 0) {
 575                         EIB_DPRINTF_VERBOSE(ss->ei_instance, "eib_m_getcapab: "
 576                             "LSO disabled, lso_maxlen=0x%lx, "
 577                             "cksum_flags=0x%lx, resv_lkey_capab=%d",
 578                             caps->cp_lso_maxlen,
 579                             caps->cp_cksum_flags,
 580                             caps->cp_resv_lkey_capab);
 581                         return (B_FALSE);
 582                 }
 583 
 584                 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
 585                 cap_lso->lso_basic_tcp_ipv4.lso_max = caps->cp_lso_maxlen - 1;
 586 
 587                 return (B_TRUE);
 588         }
 589 
 590         return (B_FALSE);
 591 }
 592 
 593 /*ARGSUSED*/
 594 static int
 595 eib_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
 596     uint_t pr_valsize, const void *pr_val)
 597 {
 598         return (ENOTSUP);
 599 }
 600 
 601 static int
 602 eib_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
 603     uint_t pr_valsize, void *pr_val)
 604 {
 605         eib_t *ss = arg;
 606         link_duplex_t duplex = LINK_DUPLEX_FULL;
 607         uint64_t speed = ss->ei_props->ep_ifspeed;
 608         int err = 0;
 609 
 610         switch (pr_num) {
 611         case MAC_PROP_DUPLEX:
 612                 ASSERT(pr_valsize >= sizeof (link_duplex_t));
 613                 bcopy(&duplex, pr_val, sizeof (link_duplex_t));
 614                 break;
 615 
 616         case MAC_PROP_SPEED:
 617                 ASSERT(pr_valsize >= sizeof (uint64_t));
 618                 bcopy(&speed, pr_val, sizeof (speed));
 619                 break;
 620 
 621         case MAC_PROP_PRIVATE:
 622                 if (strcmp(pr_name, EIB_DLPROP_GW_EPORT_STATE) == 0) {
 623                         if (ss->ei_gw_eport_state == FIP_EPORT_UP) {
 624                                 (void) snprintf(pr_val, pr_valsize,
 625                                     "%s", "up");
 626                         } else {
 627                                 (void) snprintf(pr_val, pr_valsize,
 628                                     "%s", "down");
 629                         }
 630                 } else if (strcmp(pr_name, EIB_DLPROP_HCA_GUID) == 0) {
 631                         (void) snprintf(pr_val, pr_valsize, "%llX",
 632                             (u_longlong_t)ss->ei_props->ep_hca_guid);
 633 
 634                 } else if (strcmp(pr_name, EIB_DLPROP_PORT_GUID) == 0) {
 635                         (void) snprintf(pr_val, pr_valsize, "%llX",
 636                             (u_longlong_t)((ss->ei_props->ep_sgid).gid_guid));
 637                 }
 638                 break;
 639 
 640         default:
 641                 err = ENOTSUP;
 642                 break;
 643         }
 644 
 645         return (err);
 646 }
 647 
 648 /*ARGSUSED*/
 649 static void
 650 eib_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
 651     mac_prop_info_handle_t prh)
 652 {
 653         switch (pr_num) {
 654         case MAC_PROP_DUPLEX:
 655         case MAC_PROP_SPEED:
 656                 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
 657                 break;
 658 
 659         case MAC_PROP_MTU:
 660                 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
 661                 mac_prop_info_set_range_uint32(prh, ETHERMTU, ETHERMTU);
 662                 break;
 663 
 664         case MAC_PROP_PRIVATE:
 665                 if (strcmp(pr_name, EIB_DLPROP_GW_EPORT_STATE) == 0) {
 666                         mac_prop_info_set_default_str(prh, "up ");
 667                         mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
 668                 } else if (strcmp(pr_name, EIB_DLPROP_HCA_GUID) == 0) {
 669                         mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
 670                 } else if (strcmp(pr_name, EIB_DLPROP_PORT_GUID) == 0) {
 671                         mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
 672                 }
 673                 break;
 674         }
 675 }
 676 
 677 static int
 678 eib_state_init(eib_t *ss)
 679 {
 680         kthread_t *kt;
 681 
 682         /*
 683          * Initialize synchronization primitives
 684          */
 685         mutex_init(&ss->ei_vnic_lock, NULL, MUTEX_DRIVER, NULL);
 686         mutex_init(&ss->ei_av_lock, NULL, MUTEX_DRIVER, NULL);
 687         mutex_init(&ss->ei_ev_lock, NULL, MUTEX_DRIVER, NULL);
 688         mutex_init(&ss->ei_rxpost_lock, NULL, MUTEX_DRIVER, NULL);
 689         mutex_init(&ss->ei_vnic_req_lock, NULL, MUTEX_DRIVER, NULL);
 690         mutex_init(&ss->ei_ka_vnics_lock, NULL, MUTEX_DRIVER, NULL);
 691         cv_init(&ss->ei_vnic_cv, NULL, CV_DEFAULT, NULL);
 692         cv_init(&ss->ei_ev_cv, NULL, CV_DEFAULT, NULL);
 693         cv_init(&ss->ei_rxpost_cv, NULL, CV_DEFAULT, NULL);
 694         cv_init(&ss->ei_vnic_req_cv, NULL, CV_DEFAULT, NULL);
 695         cv_init(&ss->ei_ka_vnics_cv, NULL, CV_DEFAULT, NULL);
 696 
 697         /*
 698          * Create a node state structure and initialize
 699          */
 700         ss->ei_node_state = kmem_zalloc(sizeof (eib_node_state_t), KM_SLEEP);
 701         ss->ei_node_state->ns_link_state = LINK_STATE_UNKNOWN;
 702         mutex_init(&ss->ei_node_state->ns_lock, NULL, MUTEX_DRIVER, NULL);
 703         cv_init(&ss->ei_node_state->ns_cv, NULL, CV_DEFAULT, NULL);
 704 
 705         /*
 706          * Allocate for gathering statistics
 707          */
 708         ss->ei_stats = kmem_zalloc(sizeof (eib_stats_t), KM_SLEEP);
 709 
 710         /*
 711          * Start up service threads
 712          */
 713         kt = thread_create(NULL, 0, eib_events_handler, ss, 0,
 714             &p0, TS_RUN, minclsyspri);
 715         ss->ei_events_handler = kt->t_did;
 716 
 717         kt = thread_create(NULL, 0, eib_refill_rwqes, ss, 0,
 718             &p0, TS_RUN, minclsyspri);
 719         ss->ei_rwqes_refiller = kt->t_did;
 720 
 721         kt = thread_create(NULL, 0, eib_vnic_creator, ss, 0,
 722             &p0, TS_RUN, minclsyspri);
 723         ss->ei_vnic_creator = kt->t_did;
 724 
 725         kt = thread_create(NULL, 0, eib_manage_keepalives, ss, 0,
 726             &p0, TS_RUN, minclsyspri);
 727         ss->ei_keepalives_manager = kt->t_did;
 728 
 729         /*
 730          * Set default state of gw eport
 731          */
 732         ss->ei_gw_eport_state = FIP_EPORT_UP;
 733 
 734         /*
 735          * Do static initializations of common structures
 736          */
 737         eib_reserved_gid.gid_prefix = 0;
 738         eib_reserved_gid.gid_guid = 0;
 739 
 740         return (EIB_E_SUCCESS);
 741 }
 742 
 743 static int
 744 eib_add_event_callbacks(eib_t *ss)
 745 {
 746         int ret;
 747         ddi_eventcookie_t login_ack_evc;
 748         ddi_eventcookie_t gw_alive_evc;
 749         ddi_eventcookie_t gw_info_evc;
 750 
 751         /*
 752          * Add callback for receiving vnic login acks from the gateway
 753          */
 754         if ((ret = ddi_get_eventcookie(ss->ei_dip, EIB_NDI_EVENT_LOGIN_ACK,
 755             &login_ack_evc)) != DDI_SUCCESS) {
 756                 EIB_DPRINTF_ERR(ss->ei_instance, "eib_add_event_callbacks: "
 757                     "ddi_get_eventcookie(LOGIN_ACK) failed, ret=%d", ret);
 758                 return (EIB_E_FAILURE);
 759         }
 760         if ((ret = ddi_add_event_handler(ss->ei_dip, login_ack_evc,
 761             eib_login_ack_cb, ss, &ss->ei_login_ack_cb)) != DDI_SUCCESS) {
 762                 EIB_DPRINTF_ERR(ss->ei_instance, "eib_add_event_callbacks: "
 763                     "ddi_add_event_handler(LOGIN_ACK) failed, ret=%d", ret);
 764                 return (EIB_E_FAILURE);
 765         }
 766 
 767         /*
 768          * Add callback for receiving status on gateway transitioning from
 769          * not-available to available
 770          */
 771         if ((ret = ddi_get_eventcookie(ss->ei_dip, EIB_NDI_EVENT_GW_AVAILABLE,
 772             &gw_alive_evc)) != DDI_SUCCESS) {
 773                 EIB_DPRINTF_ERR(ss->ei_instance, "eib_add_event_callbacks: "
 774                     "ddi_get_eventcookie(GW_AVAILABLE) failed, ret=%d", ret);
 775                 (void) ddi_remove_event_handler(ss->ei_login_ack_cb);
 776                 return (EIB_E_FAILURE);
 777         }
 778         if ((ret = ddi_add_event_handler(ss->ei_dip, gw_alive_evc,
 779             eib_gw_alive_cb, ss, &ss->ei_gw_alive_cb)) != DDI_SUCCESS) {
 780                 EIB_DPRINTF_ERR(ss->ei_instance, "eib_add_event_callbacks: "
 781                     "ddi_add_event_handler(GW_AVAILABLE) failed, ret=%d", ret);
 782                 (void) ddi_remove_event_handler(ss->ei_login_ack_cb);
 783                 return (EIB_E_FAILURE);
 784         }
 785 
 786         /*
 787          * Add callback for receiving gateway info update
 788          */
 789         if ((ret = ddi_get_eventcookie(ss->ei_dip, EIB_NDI_EVENT_GW_INFO_UPDATE,
 790             &gw_info_evc)) != DDI_SUCCESS) {
 791                 EIB_DPRINTF_ERR(ss->ei_instance, "eib_add_event_callbacks: "
 792                     "ddi_get_eventcookie(GW_INFO_UPDATE) failed, ret=%d", ret);
 793                 (void) ddi_remove_event_handler(ss->ei_gw_alive_cb);
 794                 (void) ddi_remove_event_handler(ss->ei_login_ack_cb);
 795                 return (EIB_E_FAILURE);
 796         }
 797         if ((ret = ddi_add_event_handler(ss->ei_dip, gw_info_evc,
 798             eib_gw_info_cb, ss, &ss->ei_gw_info_cb)) != DDI_SUCCESS) {
 799                 EIB_DPRINTF_ERR(ss->ei_instance, "eib_add_event_callbacks: "
 800                     "ddi_add_event_handler(GW_INFO) failed, ret=%d", ret);
 801                 (void) ddi_remove_event_handler(ss->ei_gw_alive_cb);
 802                 (void) ddi_remove_event_handler(ss->ei_login_ack_cb);
 803                 return (EIB_E_FAILURE);
 804         }
 805 
 806         return (EIB_E_SUCCESS);
 807 }
 808 
 809 static int
 810 eib_register_with_mac(eib_t *ss, dev_info_t *dip)
 811 {
 812         mac_register_t *macp;
 813         int ret;
 814 
 815         if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
 816                 EIB_DPRINTF_ERR(ss->ei_instance, "eib_register_with_mac: "
 817                     "mac_alloc(MAC_VERSION=%d) failed", MAC_VERSION);
 818                 return (EIB_E_FAILURE);
 819         }
 820 
 821         /*
 822          * Note that when we register with mac during attach, we don't
 823          * have the mac address yet (we'll get that after we login into
 824          * the gateway) so we'll simply register a zero macaddr that
 825          * we'll overwrite later during plumb, in eib_m_start(). Likewise,
 826          * we'll also update the max-sdu with the correct MTU after we
 827          * figure it out when we login to the gateway during plumb.
 828          */
 829         macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
 830         macp->m_driver = ss;
 831         macp->m_dip = dip;
 832         macp->m_src_addr = eib_zero_mac;
 833         macp->m_callbacks = &eib_m_callbacks;
 834         macp->m_min_sdu = 0;
 835         macp->m_max_sdu = ETHERMTU;
 836         macp->m_margin = VLAN_TAGSZ;
 837         macp->m_priv_props = eib_pvt_props;
 838 
 839         ret = mac_register(macp, &ss->ei_mac_hdl);
 840         mac_free(macp);
 841 
 842         if (ret != 0) {
 843                 EIB_DPRINTF_ERR(ss->ei_instance, "eib_register_with_mac: "
 844                     "mac_register() failed, ret=%d", ret);
 845                 return (EIB_E_FAILURE);
 846         }
 847 
 848         return (EIB_E_SUCCESS);
 849 }
 850 
 851 static void
 852 eib_rb_attach(eib_t *ss, uint_t progress)
 853 {
 854         ibt_status_t ret;
 855         int instance;
 856 
 857         if (progress & EIB_ATTACH_REGISTER_MAC_DONE)
 858                 eib_rb_register_with_mac(ss);
 859 
 860         if (progress & EIB_ATTACH_EV_CBS_ADDED)
 861                 eib_rb_add_event_callbacks(ss);
 862 
 863         if (progress & EIB_ATTACH_IBT_ATT_DONE) {
 864                 ret = ibt_detach(ss->ei_ibt_hdl);
 865                 if (ret != IBT_SUCCESS) {
 866                         EIB_DPRINTF_WARN(ss->ei_instance, "eib_rb_attach: "
 867                             "ibt_detach() failed, ret=%d", ret);
 868                 }
 869                 ss->ei_ibt_hdl = NULL;
 870         }
 871 
 872         if (progress & EIB_ATTACH_STATE_INIT_DONE)
 873                 eib_rb_state_init(ss);
 874 
 875         if (progress & EIB_ATTACH_PROPS_PARSED)
 876                 eib_rb_get_props(ss);
 877 
 878         if (progress & EIB_ATTACH_STATE_ALLOCD) {
 879                 instance = ddi_get_instance(ss->ei_dip);
 880                 ddi_soft_state_free(eib_state, instance);
 881         }
 882 }
 883 
 884 static void
 885 eib_rb_state_init(eib_t *ss)
 886 {
 887         /*
 888          * Terminate service threads
 889          */
 890         if (ss->ei_keepalives_manager) {
 891                 eib_stop_manage_keepalives(ss);
 892                 ss->ei_keepalives_manager = 0;
 893         }
 894         if (ss->ei_vnic_creator) {
 895                 eib_stop_vnic_creator(ss);
 896                 ss->ei_vnic_creator = 0;
 897         }
 898         if (ss->ei_rwqes_refiller) {
 899                 eib_stop_refill_rwqes(ss);
 900                 ss->ei_rwqes_refiller = 0;
 901         }
 902         if (ss->ei_events_handler) {
 903                 eib_stop_events_handler(ss);
 904                 ss->ei_events_handler = 0;
 905         }
 906 
 907         /*
 908          * Remove space allocated for gathering statistics
 909          */
 910         if (ss->ei_stats) {
 911                 kmem_free(ss->ei_stats, sizeof (eib_stats_t));
 912                 ss->ei_stats = NULL;
 913         }
 914 
 915         /*
 916          * Remove space allocated for keeping node state
 917          */
 918         if (ss->ei_node_state) {
 919                 cv_destroy(&ss->ei_node_state->ns_cv);
 920                 mutex_destroy(&ss->ei_node_state->ns_lock);
 921                 kmem_free(ss->ei_node_state, sizeof (eib_node_state_t));
 922                 ss->ei_node_state = NULL;
 923         }
 924 
 925         /*
 926          * Finally, destroy all synchronization resources
 927          */
 928         cv_destroy(&ss->ei_ka_vnics_cv);
 929         cv_destroy(&ss->ei_vnic_req_cv);
 930         cv_destroy(&ss->ei_rxpost_cv);
 931         cv_destroy(&ss->ei_ev_cv);
 932         cv_destroy(&ss->ei_vnic_cv);
 933         mutex_destroy(&ss->ei_ka_vnics_lock);
 934         mutex_destroy(&ss->ei_vnic_req_lock);
 935         mutex_destroy(&ss->ei_rxpost_lock);
 936         mutex_destroy(&ss->ei_ev_lock);
 937         mutex_destroy(&ss->ei_av_lock);
 938         mutex_destroy(&ss->ei_vnic_lock);
 939 }
 940 
 941 static void
 942 eib_rb_add_event_callbacks(eib_t *ss)
 943 {
 944         ddi_eventcookie_t evc;
 945 
 946         if (ddi_get_eventcookie(ss->ei_dip, EIB_NDI_EVENT_GW_INFO_UPDATE,
 947             &evc) == DDI_SUCCESS) {
 948                 (void) ddi_remove_event_handler(ss->ei_gw_info_cb);
 949                 ss->ei_gw_info_cb = NULL;
 950         }
 951 
 952         if (ddi_get_eventcookie(ss->ei_dip, EIB_NDI_EVENT_GW_AVAILABLE,
 953             &evc) == DDI_SUCCESS) {
 954                 (void) ddi_remove_event_handler(ss->ei_gw_alive_cb);
 955                 ss->ei_gw_alive_cb = NULL;
 956         }
 957 
 958         if (ddi_get_eventcookie(ss->ei_dip, EIB_NDI_EVENT_LOGIN_ACK,
 959             &evc) == DDI_SUCCESS) {
 960                 (void) ddi_remove_event_handler(ss->ei_login_ack_cb);
 961                 ss->ei_login_ack_cb = NULL;
 962         }
 963 }
 964 
 965 static void
 966 eib_rb_register_with_mac(eib_t *ss)
 967 {
 968         int ret;
 969 
 970         if ((ret = mac_unregister(ss->ei_mac_hdl)) != 0) {
 971                 EIB_DPRINTF_WARN(ss->ei_instance,
 972                     "eib_rb_register_with_mac: "
 973                     "mac_unregister() failed, ret=%d", ret);
 974         }
 975 
 976         ss->ei_mac_hdl = NULL;
 977 }