1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012, Joyent, Inc.  All rights reserved.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/cred.h>
  28 #include <sys/sysmacros.h>
  29 #include <sys/conf.h>
  30 #include <sys/cmn_err.h>
  31 #include <sys/list.h>
  32 #include <sys/ksynch.h>
  33 #include <sys/kmem.h>
  34 #include <sys/stream.h>
  35 #include <sys/modctl.h>
  36 #include <sys/ddi.h>
  37 #include <sys/sunddi.h>
  38 #include <sys/atomic.h>
  39 #include <sys/stat.h>
  40 #include <sys/modhash.h>
  41 #include <sys/strsubr.h>
  42 #include <sys/strsun.h>
  43 #include <sys/dlpi.h>
  44 #include <sys/mac.h>
  45 #include <sys/mac_provider.h>
  46 #include <sys/mac_client.h>
  47 #include <sys/mac_client_priv.h>
  48 #include <sys/mac_ether.h>
  49 #include <sys/dls.h>
  50 #include <sys/pattr.h>
  51 #include <sys/time.h>
  52 #include <sys/vlan.h>
  53 #include <sys/vnic.h>
  54 #include <sys/vnic_impl.h>
  55 #include <sys/mac_flow_impl.h>
  56 #include <inet/ip_impl.h>
  57 
  58 /*
  59  * Note that for best performance, the VNIC is a passthrough design.
  60  * For each VNIC corresponds a MAC client of the underlying MAC (lower MAC).
  61  * This MAC client is opened by the VNIC driver at VNIC creation,
  62  * and closed when the VNIC is deleted.
  63  * When a MAC client of the VNIC itself opens a VNIC, the MAC layer
  64  * (upper MAC) detects that the MAC being opened is a VNIC. Instead
  65  * of allocating a new MAC client, it asks the VNIC driver to return
  66  * the lower MAC client handle associated with the VNIC, and that handle
  67  * is returned to the upper MAC client directly. This allows access
  68  * by upper MAC clients of the VNIC to have direct access to the lower
  69  * MAC client for the control path and data path.
  70  *
  71  * Due to this passthrough, some of the entry points exported by the
  72  * VNIC driver are never directly invoked. These entry points include
  73  * vnic_m_start, vnic_m_stop, vnic_m_promisc, vnic_m_multicst, etc.
  74  */
  75 
  76 static int vnic_m_start(void *);
  77 static void vnic_m_stop(void *);
  78 static int vnic_m_promisc(void *, boolean_t);
  79 static int vnic_m_multicst(void *, boolean_t, const uint8_t *);
  80 static int vnic_m_unicst(void *, const uint8_t *);
  81 static int vnic_m_stat(void *, uint_t, uint64_t *);
  82 static void vnic_m_ioctl(void *, queue_t *, mblk_t *);
  83 static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
  84     const void *);
  85 static int vnic_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *);
  86 static void vnic_m_propinfo(void *, const char *, mac_prop_id_t,
  87     mac_prop_info_handle_t);
  88 static mblk_t *vnic_m_tx(void *, mblk_t *);
  89 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *);
  90 static void vnic_notify_cb(void *, mac_notify_type_t);
  91 
  92 static kmem_cache_t     *vnic_cache;
  93 static krwlock_t        vnic_lock;
  94 static uint_t           vnic_count;
  95 
  96 #define ANCHOR_VNIC_MIN_MTU     576
  97 #define ANCHOR_VNIC_MAX_MTU     9000
  98 
  99 /* hash of VNICs (vnic_t's), keyed by VNIC id */
 100 static mod_hash_t       *vnic_hash;
 101 #define VNIC_HASHSZ     64
 102 #define VNIC_HASH_KEY(vnic_id)  ((mod_hash_key_t)(uintptr_t)vnic_id)
 103 
 104 #define VNIC_M_CALLBACK_FLAGS   \
 105         (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO)
 106 
 107 static mac_callbacks_t vnic_m_callbacks = {
 108         VNIC_M_CALLBACK_FLAGS,
 109         vnic_m_stat,
 110         vnic_m_start,
 111         vnic_m_stop,
 112         vnic_m_promisc,
 113         vnic_m_multicst,
 114         vnic_m_unicst,
 115         vnic_m_tx,
 116         NULL,
 117         vnic_m_ioctl,
 118         vnic_m_capab_get,
 119         NULL,
 120         NULL,
 121         vnic_m_setprop,
 122         vnic_m_getprop,
 123         vnic_m_propinfo
 124 };
 125 
 126 void
 127 vnic_dev_init(void)
 128 {
 129         vnic_cache = kmem_cache_create("vnic_cache",
 130             sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 131 
 132         vnic_hash = mod_hash_create_idhash("vnic_hash",
 133             VNIC_HASHSZ, mod_hash_null_valdtor);
 134 
 135         rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL);
 136 
 137         vnic_count = 0;
 138 }
 139 
 140 void
 141 vnic_dev_fini(void)
 142 {
 143         ASSERT(vnic_count == 0);
 144 
 145         rw_destroy(&vnic_lock);
 146         mod_hash_destroy_idhash(vnic_hash);
 147         kmem_cache_destroy(vnic_cache);
 148 }
 149 
 150 uint_t
 151 vnic_dev_count(void)
 152 {
 153         return (vnic_count);
 154 }
 155 
 156 static vnic_ioc_diag_t
 157 vnic_mac2vnic_diag(mac_diag_t diag)
 158 {
 159         switch (diag) {
 160         case MAC_DIAG_MACADDR_NIC:
 161                 return (VNIC_IOC_DIAG_MACADDR_NIC);
 162         case MAC_DIAG_MACADDR_INUSE:
 163                 return (VNIC_IOC_DIAG_MACADDR_INUSE);
 164         case MAC_DIAG_MACADDR_INVALID:
 165                 return (VNIC_IOC_DIAG_MACADDR_INVALID);
 166         case MAC_DIAG_MACADDRLEN_INVALID:
 167                 return (VNIC_IOC_DIAG_MACADDRLEN_INVALID);
 168         case MAC_DIAG_MACFACTORYSLOTINVALID:
 169                 return (VNIC_IOC_DIAG_MACFACTORYSLOTINVALID);
 170         case MAC_DIAG_MACFACTORYSLOTUSED:
 171                 return (VNIC_IOC_DIAG_MACFACTORYSLOTUSED);
 172         case MAC_DIAG_MACFACTORYSLOTALLUSED:
 173                 return (VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED);
 174         case MAC_DIAG_MACFACTORYNOTSUP:
 175                 return (VNIC_IOC_DIAG_MACFACTORYNOTSUP);
 176         case MAC_DIAG_MACPREFIX_INVALID:
 177                 return (VNIC_IOC_DIAG_MACPREFIX_INVALID);
 178         case MAC_DIAG_MACPREFIXLEN_INVALID:
 179                 return (VNIC_IOC_DIAG_MACPREFIXLEN_INVALID);
 180         case MAC_DIAG_MACNO_HWRINGS:
 181                 return (VNIC_IOC_DIAG_NO_HWRINGS);
 182         default:
 183                 return (VNIC_IOC_DIAG_NONE);
 184         }
 185 }
 186 
 187 static int
 188 vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type,
 189     int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg,
 190     uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag,
 191     uint16_t vid, boolean_t req_hwgrp_flag)
 192 {
 193         mac_diag_t mac_diag;
 194         uint16_t mac_flags = 0;
 195         int err;
 196         uint_t addr_len;
 197 
 198         if (flags & VNIC_IOC_CREATE_NODUPCHECK)
 199                 mac_flags |= MAC_UNICAST_NODUPCHECK;
 200 
 201         switch (vnic_addr_type) {
 202         case VNIC_MAC_ADDR_TYPE_FIXED:
 203         case VNIC_MAC_ADDR_TYPE_VRID:
 204                 /*
 205                  * The MAC address value to assign to the VNIC
 206                  * is already provided in mac_addr_arg. addr_len_ptr_arg
 207                  * already contains the MAC address length.
 208                  */
 209                 break;
 210 
 211         case VNIC_MAC_ADDR_TYPE_RANDOM:
 212                 /*
 213                  * Random MAC address. There are two sub-cases:
 214                  *
 215                  * 1 - If mac_len == 0, a new MAC address is generated.
 216                  *      The length of the MAC address to generated depends
 217                  *      on the type of MAC used. The prefix to use for the MAC
 218                  *      address is stored in the most significant bytes
 219                  *      of the mac_addr argument, and its length is specified
 220                  *      by the mac_prefix_len argument. This prefix can
 221                  *      correspond to a IEEE OUI in the case of Ethernet,
 222                  *      for example.
 223                  *
 224                  * 2 - If mac_len > 0, the address was already picked
 225                  *      randomly, and is now passed back during VNIC
 226                  *      re-creation. The mac_addr argument contains the MAC
 227                  *      address that was generated. We distinguish this
 228                  *      case from the fixed MAC address case, since we
 229                  *      want the user consumers to know, when they query
 230                  *      the list of VNICs, that a VNIC was assigned a
 231                  *      random MAC address vs assigned a fixed address
 232                  *      specified by the user.
 233                  */
 234 
 235                 /*
 236                  * If it's a pre-generated address, we're done. mac_addr_arg
 237                  * and addr_len_ptr_arg already contain the MAC address
 238                  * value and length.
 239                  */
 240                 if (*addr_len_ptr_arg > 0)
 241                         break;
 242 
 243                 /* generate a new random MAC address */
 244                 if ((err = mac_addr_random(vnic->vn_mch,
 245                     prefix_len, mac_addr_arg, &mac_diag)) != 0) {
 246                         *diag = vnic_mac2vnic_diag(mac_diag);
 247                         return (err);
 248                 }
 249                 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
 250                 break;
 251 
 252         case VNIC_MAC_ADDR_TYPE_FACTORY:
 253                 err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
 254                 if (err != 0) {
 255                         if (err == EINVAL)
 256                                 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTINVALID;
 257                         if (err == EBUSY)
 258                                 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTUSED;
 259                         if (err == ENOSPC)
 260                                 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED;
 261                         return (err);
 262                 }
 263 
 264                 mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
 265                     mac_addr_arg, &addr_len, NULL, NULL);
 266                 *addr_len_ptr_arg = addr_len;
 267                 break;
 268 
 269         case VNIC_MAC_ADDR_TYPE_AUTO:
 270                 /* first try to allocate a factory MAC address */
 271                 err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
 272                 if (err == 0) {
 273                         mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
 274                             mac_addr_arg, &addr_len, NULL, NULL);
 275                         vnic_addr_type = VNIC_MAC_ADDR_TYPE_FACTORY;
 276                         *addr_len_ptr_arg = addr_len;
 277                         break;
 278                 }
 279 
 280                 /*
 281                  * Allocating a factory MAC address failed, generate a
 282                  * random MAC address instead.
 283                  */
 284                 if ((err = mac_addr_random(vnic->vn_mch,
 285                     prefix_len, mac_addr_arg, &mac_diag)) != 0) {
 286                         *diag = vnic_mac2vnic_diag(mac_diag);
 287                         return (err);
 288                 }
 289                 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
 290                 vnic_addr_type = VNIC_MAC_ADDR_TYPE_RANDOM;
 291                 break;
 292         case VNIC_MAC_ADDR_TYPE_PRIMARY:
 293                 /*
 294                  * We get the address here since we copy it in the
 295                  * vnic's vn_addr.
 296                  * We can't ask for hardware resources since we
 297                  * don't currently support hardware classification
 298                  * for these MAC clients.
 299                  */
 300                 if (req_hwgrp_flag) {
 301                         *diag = VNIC_IOC_DIAG_NO_HWRINGS;
 302                         return (ENOTSUP);
 303                 }
 304                 mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg);
 305                 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
 306                 mac_flags |= MAC_UNICAST_VNIC_PRIMARY;
 307                 break;
 308         }
 309 
 310         vnic->vn_addr_type = vnic_addr_type;
 311 
 312         err = mac_unicast_add(vnic->vn_mch, mac_addr_arg, mac_flags,
 313             &vnic->vn_muh, vid, &mac_diag);
 314         if (err != 0) {
 315                 if (vnic_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
 316                         /* release factory MAC address */
 317                         mac_addr_factory_release(vnic->vn_mch, *addr_slot);
 318                 }
 319                 *diag = vnic_mac2vnic_diag(mac_diag);
 320         }
 321 
 322         return (err);
 323 }
 324 
 325 /*
 326  * Create a new VNIC upon request from administrator.
 327  * Returns 0 on success, an errno on failure.
 328  */
 329 /* ARGSUSED */
 330 int
 331 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid,
 332     vnic_mac_addr_type_t *vnic_addr_type, int *mac_len, uchar_t *mac_addr,
 333     int *mac_slot, uint_t mac_prefix_len, uint16_t vid, vrid_t vrid,
 334     int af, mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag,
 335     cred_t *credp)
 336 {
 337         vnic_t *vnic;
 338         mac_register_t *mac;
 339         int err;
 340         boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0);
 341         char vnic_name[MAXNAMELEN];
 342         const mac_info_t *minfop;
 343         uint32_t req_hwgrp_flag = B_FALSE;
 344 
 345         *diag = VNIC_IOC_DIAG_NONE;
 346 
 347         rw_enter(&vnic_lock, RW_WRITER);
 348 
 349         /* does a VNIC with the same id already exist? */
 350         err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
 351             (mod_hash_val_t *)&vnic);
 352         if (err == 0) {
 353                 rw_exit(&vnic_lock);
 354                 return (EEXIST);
 355         }
 356 
 357         vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP);
 358         if (vnic == NULL) {
 359                 rw_exit(&vnic_lock);
 360                 return (ENOMEM);
 361         }
 362 
 363         bzero(vnic, sizeof (*vnic));
 364 
 365         vnic->vn_id = vnic_id;
 366         vnic->vn_link_id = linkid;
 367         vnic->vn_vrid = vrid;
 368         vnic->vn_af = af;
 369 
 370         if (!is_anchor) {
 371                 if (linkid == DATALINK_INVALID_LINKID) {
 372                         err = EINVAL;
 373                         goto bail;
 374                 }
 375 
 376                 /*
 377                  * Open the lower MAC and assign its initial bandwidth and
 378                  * MAC address. We do this here during VNIC creation and
 379                  * do not wait until the upper MAC client open so that we
 380                  * can validate the VNIC creation parameters (bandwidth,
 381                  * MAC address, etc) and reserve a factory MAC address if
 382                  * one was requested.
 383                  */
 384                 err = mac_open_by_linkid(linkid, &vnic->vn_lower_mh);
 385                 if (err != 0)
 386                         goto bail;
 387 
 388                 /*
 389                  * VNIC(vlan) over VNICs(vlans) is not supported.
 390                  */
 391                 if (mac_is_vnic(vnic->vn_lower_mh)) {
 392                         err = EINVAL;
 393                         goto bail;
 394                 }
 395 
 396                 /* only ethernet support for now */
 397                 minfop = mac_info(vnic->vn_lower_mh);
 398                 if (minfop->mi_nativemedia != DL_ETHER) {
 399                         err = ENOTSUP;
 400                         goto bail;
 401                 }
 402 
 403                 (void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL,
 404                     NULL);
 405                 err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch,
 406                     vnic_name, MAC_OPEN_FLAGS_IS_VNIC);
 407                 if (err != 0)
 408                         goto bail;
 409 
 410                 if (mrp != NULL) {
 411                         if ((mrp->mrp_mask & MRP_RX_RINGS) != 0 ||
 412                             (mrp->mrp_mask & MRP_TX_RINGS) != 0) {
 413                                 req_hwgrp_flag = B_TRUE;
 414                         }
 415                         err = mac_client_set_resources(vnic->vn_mch, mrp);
 416                         if (err != 0)
 417                                 goto bail;
 418                 }
 419                 /* assign a MAC address to the VNIC */
 420 
 421                 err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot,
 422                     mac_prefix_len, mac_len, mac_addr, flags, diag, vid,
 423                     req_hwgrp_flag);
 424                 if (err != 0) {
 425                         vnic->vn_muh = NULL;
 426                         if (diag != NULL && req_hwgrp_flag)
 427                                 *diag = VNIC_IOC_DIAG_NO_HWRINGS;
 428                         goto bail;
 429                 }
 430 
 431                 /* register to receive notification from underlying MAC */
 432                 vnic->vn_mnh = mac_notify_add(vnic->vn_lower_mh, vnic_notify_cb,
 433                     vnic);
 434 
 435                 *vnic_addr_type = vnic->vn_addr_type;
 436                 vnic->vn_addr_len = *mac_len;
 437                 vnic->vn_vid = vid;
 438 
 439                 bcopy(mac_addr, vnic->vn_addr, vnic->vn_addr_len);
 440 
 441                 if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY)
 442                         vnic->vn_slot_id = *mac_slot;
 443 
 444                 /*
 445                  * Set the initial VNIC capabilities. If the VNIC is created
 446                  * over MACs which does not support nactive vlan, disable
 447                  * VNIC's hardware checksum capability if its VID is not 0,
 448                  * since the underlying MAC would get the hardware checksum
 449                  * offset wrong in case of VLAN packets.
 450                  */
 451                 if (vid == 0 || !mac_capab_get(vnic->vn_lower_mh,
 452                     MAC_CAPAB_NO_NATIVEVLAN, NULL)) {
 453                         if (!mac_capab_get(vnic->vn_lower_mh, MAC_CAPAB_HCKSUM,
 454                             &vnic->vn_hcksum_txflags))
 455                                 vnic->vn_hcksum_txflags = 0;
 456                 } else {
 457                         vnic->vn_hcksum_txflags = 0;
 458                 }
 459         }
 460 
 461         /* register with the MAC module */
 462         if ((mac = mac_alloc(MAC_VERSION)) == NULL)
 463                 goto bail;
 464 
 465         mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
 466         mac->m_driver = vnic;
 467         mac->m_dip = vnic_get_dip();
 468         mac->m_instance = (uint_t)-1;
 469         mac->m_src_addr = vnic->vn_addr;
 470         mac->m_callbacks = &vnic_m_callbacks;
 471 
 472         if (!is_anchor) {
 473                 /*
 474                  * If this is a VNIC based VLAN, then we check for the
 475                  * margin unless it has been created with the force
 476                  * flag. If we are configuring a VLAN over an etherstub,
 477                  * we don't check the margin even if force is not set.
 478                  */
 479                 if (vid == 0 || (flags & VNIC_IOC_CREATE_FORCE) != 0) {
 480                         if (vid != VLAN_ID_NONE)
 481                                 vnic->vn_force = B_TRUE;
 482                         /*
 483                          * As the current margin size of the underlying mac is
 484                          * used to determine the margin size of the VNIC
 485                          * itself, request the underlying mac not to change
 486                          * to a smaller margin size.
 487                          */
 488                         err = mac_margin_add(vnic->vn_lower_mh,
 489                             &vnic->vn_margin, B_TRUE);
 490                         ASSERT(err == 0);
 491                 } else {
 492                         vnic->vn_margin = VLAN_TAGSZ;
 493                         err = mac_margin_add(vnic->vn_lower_mh,
 494                             &vnic->vn_margin, B_FALSE);
 495                         if (err != 0) {
 496                                 mac_free(mac);
 497                                 if (diag != NULL)
 498                                         *diag = VNIC_IOC_DIAG_MACMARGIN_INVALID;
 499                                 goto bail;
 500                         }
 501                 }
 502 
 503                 mac_sdu_get(vnic->vn_lower_mh, &mac->m_min_sdu,
 504                     &mac->m_max_sdu);
 505         } else {
 506                 vnic->vn_margin = VLAN_TAGSZ;
 507                 mac->m_min_sdu = ANCHOR_VNIC_MIN_MTU;
 508                 mac->m_max_sdu = ANCHOR_VNIC_MAX_MTU;
 509         }
 510 
 511         mac->m_margin = vnic->vn_margin;
 512 
 513         err = mac_register(mac, &vnic->vn_mh);
 514         mac_free(mac);
 515         if (err != 0) {
 516                 VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
 517                     vnic->vn_margin) == 0);
 518                 goto bail;
 519         }
 520 
 521         /* Set the VNIC's MAC in the client */
 522         if (!is_anchor)
 523                 mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh, mrp);
 524 
 525         err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp));
 526         if (err != 0) {
 527                 VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
 528                     vnic->vn_margin) == 0);
 529                 (void) mac_unregister(vnic->vn_mh);
 530                 goto bail;
 531         }
 532 
 533         /* add new VNIC to hash table */
 534         err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id),
 535             (mod_hash_val_t)vnic);
 536         ASSERT(err == 0);
 537         vnic_count++;
 538 
 539         vnic->vn_enabled = B_TRUE;
 540         rw_exit(&vnic_lock);
 541 
 542         return (0);
 543 
 544 bail:
 545         rw_exit(&vnic_lock);
 546         if (!is_anchor) {
 547                 if (vnic->vn_mnh != NULL)
 548                         (void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
 549                 if (vnic->vn_muh != NULL)
 550                         (void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
 551                 if (vnic->vn_mch != NULL)
 552                         mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
 553                 if (vnic->vn_lower_mh != NULL)
 554                         mac_close(vnic->vn_lower_mh);
 555         }
 556 
 557         kmem_cache_free(vnic_cache, vnic);
 558         return (err);
 559 }
 560 
 561 /*
 562  * Modify the properties of an existing VNIC.
 563  */
 564 /* ARGSUSED */
 565 int
 566 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask,
 567     vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr,
 568     uint_t mac_slot, mac_resource_props_t *mrp)
 569 {
 570         vnic_t *vnic = NULL;
 571 
 572         rw_enter(&vnic_lock, RW_WRITER);
 573 
 574         if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
 575             (mod_hash_val_t *)&vnic) != 0) {
 576                 rw_exit(&vnic_lock);
 577                 return (ENOENT);
 578         }
 579 
 580         rw_exit(&vnic_lock);
 581 
 582         return (0);
 583 }
 584 
 585 /* ARGSUSED */
 586 int
 587 vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags, cred_t *credp)
 588 {
 589         vnic_t *vnic = NULL;
 590         mod_hash_val_t val;
 591         datalink_id_t tmpid;
 592         int rc;
 593 
 594         rw_enter(&vnic_lock, RW_WRITER);
 595 
 596         if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
 597             (mod_hash_val_t *)&vnic) != 0) {
 598                 rw_exit(&vnic_lock);
 599                 return (ENOENT);
 600         }
 601 
 602         if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid, B_TRUE)) != 0) {
 603                 rw_exit(&vnic_lock);
 604                 return (rc);
 605         }
 606 
 607         ASSERT(vnic_id == tmpid);
 608 
 609         /*
 610          * We cannot unregister the MAC yet. Unregistering would
 611          * free up mac_impl_t which should not happen at this time.
 612          * So disable mac_impl_t by calling mac_disable(). This will prevent
 613          * any new claims on mac_impl_t.
 614          */
 615         if ((rc = mac_disable(vnic->vn_mh)) != 0) {
 616                 (void) dls_devnet_create(vnic->vn_mh, vnic_id,
 617                     crgetzoneid(credp));
 618                 rw_exit(&vnic_lock);
 619                 return (rc);
 620         }
 621 
 622         vnic->vn_enabled = B_FALSE;
 623         (void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val);
 624         ASSERT(vnic == (vnic_t *)val);
 625         vnic_count--;
 626         rw_exit(&vnic_lock);
 627 
 628         /*
 629          * XXX-nicolas shouldn't have a void cast here, if it's
 630          * expected that the function will never fail, then we should
 631          * have an ASSERT().
 632          */
 633         (void) mac_unregister(vnic->vn_mh);
 634 
 635         if (vnic->vn_lower_mh != NULL) {
 636                 /*
 637                  * Check if MAC address for the vnic was obtained from the
 638                  * factory MAC addresses. If yes, release it.
 639                  */
 640                 if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
 641                         (void) mac_addr_factory_release(vnic->vn_mch,
 642                             vnic->vn_slot_id);
 643                 }
 644                 (void) mac_margin_remove(vnic->vn_lower_mh, vnic->vn_margin);
 645                 (void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
 646                 (void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
 647                 mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
 648                 mac_close(vnic->vn_lower_mh);
 649         }
 650 
 651         kmem_cache_free(vnic_cache, vnic);
 652         return (0);
 653 }
 654 
 655 /* ARGSUSED */
 656 mblk_t *
 657 vnic_m_tx(void *arg, mblk_t *mp_chain)
 658 {
 659         /*
 660          * This function could be invoked for an anchor VNIC when sending
 661          * broadcast and multicast packets, and unicast packets which did
 662          * not match any local known destination.
 663          */
 664         freemsgchain(mp_chain);
 665         return (NULL);
 666 }
 667 
 668 /*ARGSUSED*/
 669 static void
 670 vnic_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
 671 {
 672         miocnak(q, mp, 0, ENOTSUP);
 673 }
 674 
 675 /*
 676  * This entry point cannot be passed-through, since it is invoked
 677  * for the per-VNIC kstats which must be exported independently
 678  * of the existence of VNIC MAC clients.
 679  */
 680 static int
 681 vnic_m_stat(void *arg, uint_t stat, uint64_t *val)
 682 {
 683         vnic_t *vnic = arg;
 684         int rval = 0;
 685 
 686         if (vnic->vn_lower_mh == NULL) {
 687                 /*
 688                  * It's an anchor VNIC, which does not have any
 689                  * statistics in itself.
 690                  */
 691                 return (ENOTSUP);
 692         }
 693 
 694         /*
 695          * ENOTSUP must be reported for unsupported stats, the VNIC
 696          * driver reports a subset of the stats that would
 697          * be returned by a real piece of hardware.
 698          */
 699 
 700         switch (stat) {
 701         case MAC_STAT_LINK_STATE:
 702         case MAC_STAT_LINK_UP:
 703         case MAC_STAT_PROMISC:
 704         case MAC_STAT_IFSPEED:
 705         case MAC_STAT_MULTIRCV:
 706         case MAC_STAT_MULTIXMT:
 707         case MAC_STAT_BRDCSTRCV:
 708         case MAC_STAT_BRDCSTXMT:
 709         case MAC_STAT_OPACKETS:
 710         case MAC_STAT_OBYTES:
 711         case MAC_STAT_IERRORS:
 712         case MAC_STAT_OERRORS:
 713         case MAC_STAT_RBYTES:
 714         case MAC_STAT_IPACKETS:
 715                 *val = mac_client_stat_get(vnic->vn_mch, stat);
 716                 break;
 717         default:
 718                 rval = ENOTSUP;
 719         }
 720 
 721         return (rval);
 722 }
 723 
 724 /*
 725  * Invoked by the upper MAC to retrieve the lower MAC client handle
 726  * corresponding to a VNIC. A pointer to this function is obtained
 727  * by the upper MAC via capability query.
 728  *
 729  * XXX-nicolas Note: this currently causes all VNIC MAC clients to
 730  * receive the same MAC client handle for the same VNIC. This is ok
 731  * as long as we have only one VNIC MAC client which sends and
 732  * receives data, but we don't currently enforce this at the MAC layer.
 733  */
 734 static void *
 735 vnic_mac_client_handle(void *vnic_arg)
 736 {
 737         vnic_t *vnic = vnic_arg;
 738 
 739         return (vnic->vn_mch);
 740 }
 741 
 742 
 743 /*
 744  * Return information about the specified capability.
 745  */
 746 /* ARGSUSED */
 747 static boolean_t
 748 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
 749 {
 750         vnic_t *vnic = arg;
 751 
 752         switch (cap) {
 753         case MAC_CAPAB_HCKSUM: {
 754                 uint32_t *hcksum_txflags = cap_data;
 755 
 756                 *hcksum_txflags = vnic->vn_hcksum_txflags &
 757                     (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM |
 758                     HCKSUM_INET_PARTIAL);
 759                 break;
 760         }
 761         case MAC_CAPAB_VNIC: {
 762                 mac_capab_vnic_t *vnic_capab = cap_data;
 763 
 764                 if (vnic->vn_lower_mh == NULL) {
 765                         /*
 766                          * It's an anchor VNIC, we don't have an underlying
 767                          * NIC and MAC client handle.
 768                          */
 769                         return (B_FALSE);
 770                 }
 771 
 772                 if (vnic_capab != NULL) {
 773                         vnic_capab->mcv_arg = vnic;
 774                         vnic_capab->mcv_mac_client_handle =
 775                             vnic_mac_client_handle;
 776                 }
 777                 break;
 778         }
 779         case MAC_CAPAB_ANCHOR_VNIC: {
 780                 /* since it's an anchor VNIC we don't have lower mac handle */
 781                 if (vnic->vn_lower_mh == NULL) {
 782                         ASSERT(vnic->vn_link_id == 0);
 783                         return (B_TRUE);
 784                 }
 785                 return (B_FALSE);
 786         }
 787         case MAC_CAPAB_NO_NATIVEVLAN:
 788                 return (B_FALSE);
 789         case MAC_CAPAB_NO_ZCOPY:
 790                 return (B_TRUE);
 791         case MAC_CAPAB_VRRP: {
 792                 mac_capab_vrrp_t *vrrp_capab = cap_data;
 793 
 794                 if (vnic->vn_vrid != 0) {
 795                         if (vrrp_capab != NULL)
 796                                 vrrp_capab->mcv_af = vnic->vn_af;
 797                         return (B_TRUE);
 798                 }
 799                 return (B_FALSE);
 800         }
 801         default:
 802                 return (B_FALSE);
 803         }
 804         return (B_TRUE);
 805 }
 806 
 807 /* ARGSUSED */
 808 static int
 809 vnic_m_start(void *arg)
 810 {
 811         return (0);
 812 }
 813 
 814 /* ARGSUSED */
 815 static void
 816 vnic_m_stop(void *arg)
 817 {
 818 }
 819 
 820 /* ARGSUSED */
 821 static int
 822 vnic_m_promisc(void *arg, boolean_t on)
 823 {
 824         return (0);
 825 }
 826 
 827 /* ARGSUSED */
 828 static int
 829 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
 830 {
 831         return (0);
 832 }
 833 
 834 static int
 835 vnic_m_unicst(void *arg, const uint8_t *macaddr)
 836 {
 837         vnic_t *vnic = arg;
 838 
 839         return (mac_vnic_unicast_set(vnic->vn_mch, macaddr));
 840 }
 841 
 842 /*
 843  * Callback functions for set/get of properties
 844  */
 845 /*ARGSUSED*/
 846 static int
 847 vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
 848     uint_t pr_valsize, const void *pr_val)
 849 {
 850         int             err = ENOTSUP;
 851         vnic_t          *vn = m_driver;
 852 
 853         switch (pr_num) {
 854         case MAC_PROP_MTU: {
 855                 uint32_t        mtu;
 856 
 857                 /* allow setting MTU only on an etherstub */
 858                 if (vn->vn_link_id != DATALINK_INVALID_LINKID)
 859                         return (err);
 860 
 861                 if (pr_valsize < sizeof (mtu)) {
 862                         err = EINVAL;
 863                         break;
 864                 }
 865                 bcopy(pr_val, &mtu, sizeof (mtu));
 866                 if (mtu < ANCHOR_VNIC_MIN_MTU || mtu > ANCHOR_VNIC_MAX_MTU) {
 867                         err = EINVAL;
 868                         break;
 869                 }
 870                 err = mac_maxsdu_update(vn->vn_mh, mtu);
 871                 break;
 872         }
 873         case MAC_PROP_VN_PROMISC_FILTERED: {
 874                 boolean_t filtered;
 875 
 876                 if (pr_valsize < sizeof (filtered)) {
 877                         err = EINVAL;
 878                         break;
 879                 }
 880 
 881                 bcopy(pr_val, &filtered, sizeof (filtered));
 882                 err = mac_set_promisc_filtered(vn->vn_mch, filtered);
 883         }
 884         default:
 885                 break;
 886         }
 887         return (err);
 888 }
 889 
 890 static int
 891 vnic_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
 892     uint_t pr_valsize, void *pr_val)
 893 {
 894         vnic_t          *vn = arg;
 895         int             ret = 0;
 896         boolean_t       out;
 897 
 898         switch (pr_num) {
 899         case MAC_PROP_VN_PROMISC_FILTERED:
 900                 out = mac_get_promisc_filtered(vn->vn_mch);
 901                 ASSERT(pr_valsize >= sizeof (boolean_t));
 902                 bcopy(&out, pr_val, sizeof (boolean_t));
 903                 break;
 904         default:
 905                 ret = EINVAL;
 906                 break;
 907         }
 908 
 909         return (ret);
 910 }
 911 
 912 /* ARGSUSED */
 913 static void vnic_m_propinfo(void *m_driver, const char *pr_name,
 914     mac_prop_id_t pr_num, mac_prop_info_handle_t prh)
 915 {
 916         vnic_t          *vn = m_driver;
 917 
 918         /* MTU setting allowed only on an etherstub */
 919         if (vn->vn_link_id != DATALINK_INVALID_LINKID)
 920                 return;
 921 
 922         switch (pr_num) {
 923         case MAC_PROP_MTU:
 924                 mac_prop_info_set_range_uint32(prh,
 925                     ANCHOR_VNIC_MIN_MTU, ANCHOR_VNIC_MAX_MTU);
 926                 break;
 927         }
 928 }
 929 
 930 
 931 int
 932 vnic_info(vnic_info_t *info, cred_t *credp)
 933 {
 934         vnic_t          *vnic;
 935         int             err;
 936 
 937         /* Make sure that the VNIC link is visible from the caller's zone. */
 938         if (!dls_devnet_islinkvisible(info->vn_vnic_id, crgetzoneid(credp)))
 939                 return (ENOENT);
 940 
 941         rw_enter(&vnic_lock, RW_WRITER);
 942 
 943         err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(info->vn_vnic_id),
 944             (mod_hash_val_t *)&vnic);
 945         if (err != 0) {
 946                 rw_exit(&vnic_lock);
 947                 return (ENOENT);
 948         }
 949 
 950         info->vn_link_id = vnic->vn_link_id;
 951         info->vn_mac_addr_type = vnic->vn_addr_type;
 952         info->vn_mac_len = vnic->vn_addr_len;
 953         bcopy(vnic->vn_addr, info->vn_mac_addr, MAXMACADDRLEN);
 954         info->vn_mac_slot = vnic->vn_slot_id;
 955         info->vn_mac_prefix_len = 0;
 956         info->vn_vid = vnic->vn_vid;
 957         info->vn_force = vnic->vn_force;
 958         info->vn_vrid = vnic->vn_vrid;
 959         info->vn_af = vnic->vn_af;
 960 
 961         bzero(&info->vn_resource_props, sizeof (mac_resource_props_t));
 962         if (vnic->vn_mch != NULL)
 963                 mac_resource_ctl_get(vnic->vn_mch, &info->vn_resource_props);
 964 
 965         rw_exit(&vnic_lock);
 966         return (0);
 967 }
 968 
 969 static void
 970 vnic_notify_cb(void *arg, mac_notify_type_t type)
 971 {
 972         vnic_t *vnic = arg;
 973 
 974         /*
 975          * Do not deliver notifications if the vnic is not fully initialized
 976          * or is in process of being torn down.
 977          */
 978         if (!vnic->vn_enabled)
 979                 return;
 980 
 981         switch (type) {
 982         case MAC_NOTE_UNICST:
 983                 /*
 984                  * Only the VLAN VNIC needs to be notified with primary MAC
 985                  * address change.
 986                  */
 987                 if (vnic->vn_addr_type != VNIC_MAC_ADDR_TYPE_PRIMARY)
 988                         return;
 989 
 990                 /*  the unicast MAC address value */
 991                 mac_unicast_primary_get(vnic->vn_lower_mh, vnic->vn_addr);
 992 
 993                 /* notify its upper layer MAC about MAC address change */
 994                 mac_unicst_update(vnic->vn_mh, (const uint8_t *)vnic->vn_addr);
 995                 break;
 996 
 997         case MAC_NOTE_LINK:
 998                 mac_link_update(vnic->vn_mh,
 999                     mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE));
1000                 break;
1001 
1002         default:
1003                 break;
1004         }
1005 }