1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2012, Joyent, Inc.  All rights reserved.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/cred.h>
  28 #include <sys/sysmacros.h>
  29 #include <sys/conf.h>
  30 #include <sys/cmn_err.h>
  31 #include <sys/list.h>
  32 #include <sys/ksynch.h>
  33 #include <sys/kmem.h>
  34 #include <sys/stream.h>
  35 #include <sys/modctl.h>
  36 #include <sys/ddi.h>
  37 #include <sys/sunddi.h>
  38 #include <sys/atomic.h>
  39 #include <sys/stat.h>
  40 #include <sys/modhash.h>
  41 #include <sys/strsubr.h>
  42 #include <sys/strsun.h>
  43 #include <sys/dlpi.h>
  44 #include <sys/mac.h>
  45 #include <sys/mac_provider.h>
  46 #include <sys/mac_client.h>
  47 #include <sys/mac_client_priv.h>
  48 #include <sys/mac_ether.h>
  49 #include <sys/dls.h>
  50 #include <sys/pattr.h>
  51 #include <sys/time.h>
  52 #include <sys/vlan.h>
  53 #include <sys/vnic.h>
  54 #include <sys/vnic_impl.h>
  55 #include <sys/mac_impl.h>
  56 #include <sys/mac_flow_impl.h>
  57 #include <inet/ip_impl.h>
  58 
  59 /*
  60  * Note that for best performance, the VNIC is a passthrough design.
  61  * For each VNIC corresponds a MAC client of the underlying MAC (lower MAC).
  62  * This MAC client is opened by the VNIC driver at VNIC creation,
  63  * and closed when the VNIC is deleted.
  64  * When a MAC client of the VNIC itself opens a VNIC, the MAC layer
  65  * (upper MAC) detects that the MAC being opened is a VNIC. Instead
  66  * of allocating a new MAC client, it asks the VNIC driver to return
  67  * the lower MAC client handle associated with the VNIC, and that handle
  68  * is returned to the upper MAC client directly. This allows access
  69  * by upper MAC clients of the VNIC to have direct access to the lower
  70  * MAC client for the control path and data path.
  71  *
  72  * Due to this passthrough, some of the entry points exported by the
  73  * VNIC driver are never directly invoked. These entry points include
  74  * vnic_m_start, vnic_m_stop, vnic_m_promisc, vnic_m_multicst, etc.
  75  */
  76 
  77 static int vnic_m_start(void *);
  78 static void vnic_m_stop(void *);
  79 static int vnic_m_promisc(void *, boolean_t);
  80 static int vnic_m_multicst(void *, boolean_t, const uint8_t *);
  81 static int vnic_m_unicst(void *, const uint8_t *);
  82 static int vnic_m_stat(void *, uint_t, uint64_t *);
  83 static void vnic_m_ioctl(void *, queue_t *, mblk_t *);
  84 static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
  85     const void *);
  86 static int vnic_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *);
  87 static void vnic_m_propinfo(void *, const char *, mac_prop_id_t,
  88     mac_prop_info_handle_t);
  89 static mblk_t *vnic_m_tx(void *, mblk_t *);
  90 static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *);
  91 static void vnic_notify_cb(void *, mac_notify_type_t);
  92 
  93 static kmem_cache_t     *vnic_cache;
  94 static krwlock_t        vnic_lock;
  95 static uint_t           vnic_count;
  96 
  97 #define ANCHOR_VNIC_MIN_MTU     576
  98 #define ANCHOR_VNIC_MAX_MTU     9000
  99 
 100 /* hash of VNICs (vnic_t's), keyed by VNIC id */
 101 static mod_hash_t       *vnic_hash;
 102 #define VNIC_HASHSZ     64
 103 #define VNIC_HASH_KEY(vnic_id)  ((mod_hash_key_t)(uintptr_t)vnic_id)
 104 
 105 #define VNIC_M_CALLBACK_FLAGS   \
 106         (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO)
 107 
 108 static mac_callbacks_t vnic_m_callbacks = {
 109         VNIC_M_CALLBACK_FLAGS,
 110         vnic_m_stat,
 111         vnic_m_start,
 112         vnic_m_stop,
 113         vnic_m_promisc,
 114         vnic_m_multicst,
 115         vnic_m_unicst,
 116         vnic_m_tx,
 117         NULL,
 118         vnic_m_ioctl,
 119         vnic_m_capab_get,
 120         NULL,
 121         NULL,
 122         vnic_m_setprop,
 123         vnic_m_getprop,
 124         vnic_m_propinfo
 125 };
 126 
 127 void
 128 vnic_dev_init(void)
 129 {
 130         vnic_cache = kmem_cache_create("vnic_cache",
 131             sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
 132 
 133         vnic_hash = mod_hash_create_idhash("vnic_hash",
 134             VNIC_HASHSZ, mod_hash_null_valdtor);
 135 
 136         rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL);
 137 
 138         vnic_count = 0;
 139 }
 140 
 141 void
 142 vnic_dev_fini(void)
 143 {
 144         ASSERT(vnic_count == 0);
 145 
 146         rw_destroy(&vnic_lock);
 147         mod_hash_destroy_idhash(vnic_hash);
 148         kmem_cache_destroy(vnic_cache);
 149 }
 150 
 151 uint_t
 152 vnic_dev_count(void)
 153 {
 154         return (vnic_count);
 155 }
 156 
 157 static vnic_ioc_diag_t
 158 vnic_mac2vnic_diag(mac_diag_t diag)
 159 {
 160         switch (diag) {
 161         case MAC_DIAG_MACADDR_NIC:
 162                 return (VNIC_IOC_DIAG_MACADDR_NIC);
 163         case MAC_DIAG_MACADDR_INUSE:
 164                 return (VNIC_IOC_DIAG_MACADDR_INUSE);
 165         case MAC_DIAG_MACADDR_INVALID:
 166                 return (VNIC_IOC_DIAG_MACADDR_INVALID);
 167         case MAC_DIAG_MACADDRLEN_INVALID:
 168                 return (VNIC_IOC_DIAG_MACADDRLEN_INVALID);
 169         case MAC_DIAG_MACFACTORYSLOTINVALID:
 170                 return (VNIC_IOC_DIAG_MACFACTORYSLOTINVALID);
 171         case MAC_DIAG_MACFACTORYSLOTUSED:
 172                 return (VNIC_IOC_DIAG_MACFACTORYSLOTUSED);
 173         case MAC_DIAG_MACFACTORYSLOTALLUSED:
 174                 return (VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED);
 175         case MAC_DIAG_MACFACTORYNOTSUP:
 176                 return (VNIC_IOC_DIAG_MACFACTORYNOTSUP);
 177         case MAC_DIAG_MACPREFIX_INVALID:
 178                 return (VNIC_IOC_DIAG_MACPREFIX_INVALID);
 179         case MAC_DIAG_MACPREFIXLEN_INVALID:
 180                 return (VNIC_IOC_DIAG_MACPREFIXLEN_INVALID);
 181         case MAC_DIAG_MACNO_HWRINGS:
 182                 return (VNIC_IOC_DIAG_NO_HWRINGS);
 183         default:
 184                 return (VNIC_IOC_DIAG_NONE);
 185         }
 186 }
 187 
 188 static int
 189 vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type,
 190     int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg,
 191     uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag,
 192     uint16_t vid, boolean_t req_hwgrp_flag)
 193 {
 194         mac_diag_t mac_diag;
 195         uint16_t mac_flags = 0;
 196         int err;
 197         uint_t addr_len;
 198 
 199         if (flags & VNIC_IOC_CREATE_NODUPCHECK)
 200                 mac_flags |= MAC_UNICAST_NODUPCHECK;
 201 
 202         switch (vnic_addr_type) {
 203         case VNIC_MAC_ADDR_TYPE_FIXED:
 204         case VNIC_MAC_ADDR_TYPE_VRID:
 205                 /*
 206                  * The MAC address value to assign to the VNIC
 207                  * is already provided in mac_addr_arg. addr_len_ptr_arg
 208                  * already contains the MAC address length.
 209                  */
 210                 break;
 211 
 212         case VNIC_MAC_ADDR_TYPE_RANDOM:
 213                 /*
 214                  * Random MAC address. There are two sub-cases:
 215                  *
 216                  * 1 - If mac_len == 0, a new MAC address is generated.
 217                  *      The length of the MAC address to generated depends
 218                  *      on the type of MAC used. The prefix to use for the MAC
 219                  *      address is stored in the most significant bytes
 220                  *      of the mac_addr argument, and its length is specified
 221                  *      by the mac_prefix_len argument. This prefix can
 222                  *      correspond to a IEEE OUI in the case of Ethernet,
 223                  *      for example.
 224                  *
 225                  * 2 - If mac_len > 0, the address was already picked
 226                  *      randomly, and is now passed back during VNIC
 227                  *      re-creation. The mac_addr argument contains the MAC
 228                  *      address that was generated. We distinguish this
 229                  *      case from the fixed MAC address case, since we
 230                  *      want the user consumers to know, when they query
 231                  *      the list of VNICs, that a VNIC was assigned a
 232                  *      random MAC address vs assigned a fixed address
 233                  *      specified by the user.
 234                  */
 235 
 236                 /*
 237                  * If it's a pre-generated address, we're done. mac_addr_arg
 238                  * and addr_len_ptr_arg already contain the MAC address
 239                  * value and length.
 240                  */
 241                 if (*addr_len_ptr_arg > 0)
 242                         break;
 243 
 244                 /* generate a new random MAC address */
 245                 if ((err = mac_addr_random(vnic->vn_mch,
 246                     prefix_len, mac_addr_arg, &mac_diag)) != 0) {
 247                         *diag = vnic_mac2vnic_diag(mac_diag);
 248                         return (err);
 249                 }
 250                 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
 251                 break;
 252 
 253         case VNIC_MAC_ADDR_TYPE_FACTORY:
 254                 err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
 255                 if (err != 0) {
 256                         if (err == EINVAL)
 257                                 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTINVALID;
 258                         if (err == EBUSY)
 259                                 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTUSED;
 260                         if (err == ENOSPC)
 261                                 *diag = VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED;
 262                         return (err);
 263                 }
 264 
 265                 mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
 266                     mac_addr_arg, &addr_len, NULL, NULL);
 267                 *addr_len_ptr_arg = addr_len;
 268                 break;
 269 
 270         case VNIC_MAC_ADDR_TYPE_AUTO:
 271                 /* first try to allocate a factory MAC address */
 272                 err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
 273                 if (err == 0) {
 274                         mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
 275                             mac_addr_arg, &addr_len, NULL, NULL);
 276                         vnic_addr_type = VNIC_MAC_ADDR_TYPE_FACTORY;
 277                         *addr_len_ptr_arg = addr_len;
 278                         break;
 279                 }
 280 
 281                 /*
 282                  * Allocating a factory MAC address failed, generate a
 283                  * random MAC address instead.
 284                  */
 285                 if ((err = mac_addr_random(vnic->vn_mch,
 286                     prefix_len, mac_addr_arg, &mac_diag)) != 0) {
 287                         *diag = vnic_mac2vnic_diag(mac_diag);
 288                         return (err);
 289                 }
 290                 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
 291                 vnic_addr_type = VNIC_MAC_ADDR_TYPE_RANDOM;
 292                 break;
 293         case VNIC_MAC_ADDR_TYPE_PRIMARY:
 294                 /*
 295                  * We get the address here since we copy it in the
 296                  * vnic's vn_addr.
 297                  * We can't ask for hardware resources since we
 298                  * don't currently support hardware classification
 299                  * for these MAC clients.
 300                  */
 301                 if (req_hwgrp_flag) {
 302                         *diag = VNIC_IOC_DIAG_NO_HWRINGS;
 303                         return (ENOTSUP);
 304                 }
 305                 mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg);
 306                 *addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
 307                 mac_flags |= MAC_UNICAST_VNIC_PRIMARY;
 308                 break;
 309         }
 310 
 311         vnic->vn_addr_type = vnic_addr_type;
 312 
 313         err = mac_unicast_add(vnic->vn_mch, mac_addr_arg, mac_flags,
 314             &vnic->vn_muh, vid, &mac_diag);
 315         if (err != 0) {
 316                 if (vnic_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
 317                         /* release factory MAC address */
 318                         mac_addr_factory_release(vnic->vn_mch, *addr_slot);
 319                 }
 320                 *diag = vnic_mac2vnic_diag(mac_diag);
 321         }
 322 
 323         return (err);
 324 }
 325 
 326 /*
 327  * Create a new VNIC upon request from administrator.
 328  * Returns 0 on success, an errno on failure.
 329  */
 330 /* ARGSUSED */
 331 int
 332 vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid,
 333     vnic_mac_addr_type_t *vnic_addr_type, int *mac_len, uchar_t *mac_addr,
 334     int *mac_slot, uint_t mac_prefix_len, uint16_t vid, vrid_t vrid,
 335     int af, mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag,
 336     cred_t *credp)
 337 {
 338         vnic_t *vnic;
 339         mac_register_t *mac;
 340         int err;
 341         boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0);
 342         char vnic_name[MAXNAMELEN];
 343         const mac_info_t *minfop;
 344         uint32_t req_hwgrp_flag = B_FALSE;
 345 
 346         *diag = VNIC_IOC_DIAG_NONE;
 347 
 348         rw_enter(&vnic_lock, RW_WRITER);
 349 
 350         /* does a VNIC with the same id already exist? */
 351         err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
 352             (mod_hash_val_t *)&vnic);
 353         if (err == 0) {
 354                 rw_exit(&vnic_lock);
 355                 return (EEXIST);
 356         }
 357 
 358         vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP);
 359         if (vnic == NULL) {
 360                 rw_exit(&vnic_lock);
 361                 return (ENOMEM);
 362         }
 363 
 364         bzero(vnic, sizeof (*vnic));
 365 
 366         vnic->vn_id = vnic_id;
 367         vnic->vn_link_id = linkid;
 368         vnic->vn_vrid = vrid;
 369         vnic->vn_af = af;
 370 
 371         if (!is_anchor) {
 372                 if (linkid == DATALINK_INVALID_LINKID) {
 373                         err = EINVAL;
 374                         goto bail;
 375                 }
 376 
 377                 /*
 378                  * Open the lower MAC and assign its initial bandwidth and
 379                  * MAC address. We do this here during VNIC creation and
 380                  * do not wait until the upper MAC client open so that we
 381                  * can validate the VNIC creation parameters (bandwidth,
 382                  * MAC address, etc) and reserve a factory MAC address if
 383                  * one was requested.
 384                  */
 385                 err = mac_open_by_linkid(linkid, &vnic->vn_lower_mh);
 386                 if (err != 0)
 387                         goto bail;
 388 
 389                 /*
 390                  * VNIC(vlan) over VNICs(vlans) is not supported.
 391                  */
 392                 if (mac_is_vnic(vnic->vn_lower_mh)) {
 393                         err = EINVAL;
 394                         goto bail;
 395                 }
 396 
 397                 /* only ethernet support for now */
 398                 minfop = mac_info(vnic->vn_lower_mh);
 399                 if (minfop->mi_nativemedia != DL_ETHER) {
 400                         err = ENOTSUP;
 401                         goto bail;
 402                 }
 403 
 404                 (void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL,
 405                     NULL);
 406                 err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch,
 407                     vnic_name, MAC_OPEN_FLAGS_IS_VNIC);
 408                 if (err != 0)
 409                         goto bail;
 410 
 411                 if (mrp != NULL) {
 412                         if ((mrp->mrp_mask & MRP_RX_RINGS) != 0 ||
 413                             (mrp->mrp_mask & MRP_TX_RINGS) != 0) {
 414                                 req_hwgrp_flag = B_TRUE;
 415                         }
 416                         err = mac_client_set_resources(vnic->vn_mch, mrp);
 417                         if (err != 0)
 418                                 goto bail;
 419                 }
 420                 /* assign a MAC address to the VNIC */
 421 
 422                 err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot,
 423                     mac_prefix_len, mac_len, mac_addr, flags, diag, vid,
 424                     req_hwgrp_flag);
 425                 if (err != 0) {
 426                         vnic->vn_muh = NULL;
 427                         if (diag != NULL && req_hwgrp_flag)
 428                                 *diag = VNIC_IOC_DIAG_NO_HWRINGS;
 429                         goto bail;
 430                 }
 431 
 432                 /* register to receive notification from underlying MAC */
 433                 vnic->vn_mnh = mac_notify_add(vnic->vn_lower_mh, vnic_notify_cb,
 434                     vnic);
 435 
 436                 *vnic_addr_type = vnic->vn_addr_type;
 437                 vnic->vn_addr_len = *mac_len;
 438                 vnic->vn_vid = vid;
 439 
 440                 bcopy(mac_addr, vnic->vn_addr, vnic->vn_addr_len);
 441 
 442                 if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY)
 443                         vnic->vn_slot_id = *mac_slot;
 444 
 445                 /*
 446                  * Set the initial VNIC capabilities. If the VNIC is created
 447                  * over MACs which does not support nactive vlan, disable
 448                  * VNIC's hardware checksum capability if its VID is not 0,
 449                  * since the underlying MAC would get the hardware checksum
 450                  * offset wrong in case of VLAN packets.
 451                  */
 452                 if (vid == 0 || !mac_capab_get(vnic->vn_lower_mh,
 453                     MAC_CAPAB_NO_NATIVEVLAN, NULL)) {
 454                         if (!mac_capab_get(vnic->vn_lower_mh, MAC_CAPAB_HCKSUM,
 455                             &vnic->vn_hcksum_txflags))
 456                                 vnic->vn_hcksum_txflags = 0;
 457                 } else {
 458                         vnic->vn_hcksum_txflags = 0;
 459                 }
 460         }
 461 
 462         /* register with the MAC module */
 463         if ((mac = mac_alloc(MAC_VERSION)) == NULL)
 464                 goto bail;
 465 
 466         mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
 467         mac->m_driver = vnic;
 468         mac->m_dip = vnic_get_dip();
 469         mac->m_instance = (uint_t)-1;
 470         mac->m_src_addr = vnic->vn_addr;
 471         mac->m_callbacks = &vnic_m_callbacks;
 472 
 473         if (!is_anchor) {
 474                 /*
 475                  * If this is a VNIC based VLAN, then we check for the
 476                  * margin unless it has been created with the force
 477                  * flag. If we are configuring a VLAN over an etherstub,
 478                  * we don't check the margin even if force is not set.
 479                  */
 480                 if (vid == 0 || (flags & VNIC_IOC_CREATE_FORCE) != 0) {
 481                         if (vid != VLAN_ID_NONE)
 482                                 vnic->vn_force = B_TRUE;
 483                         /*
 484                          * As the current margin size of the underlying mac is
 485                          * used to determine the margin size of the VNIC
 486                          * itself, request the underlying mac not to change
 487                          * to a smaller margin size.
 488                          */
 489                         err = mac_margin_add(vnic->vn_lower_mh,
 490                             &vnic->vn_margin, B_TRUE);
 491                         ASSERT(err == 0);
 492                 } else {
 493                         vnic->vn_margin = VLAN_TAGSZ;
 494                         err = mac_margin_add(vnic->vn_lower_mh,
 495                             &vnic->vn_margin, B_FALSE);
 496                         if (err != 0) {
 497                                 mac_free(mac);
 498                                 if (diag != NULL)
 499                                         *diag = VNIC_IOC_DIAG_MACMARGIN_INVALID;
 500                                 goto bail;
 501                         }
 502                 }
 503 
 504                 mac_sdu_get(vnic->vn_lower_mh, &mac->m_min_sdu,
 505                     &mac->m_max_sdu);
 506         } else {
 507                 vnic->vn_margin = VLAN_TAGSZ;
 508                 mac->m_min_sdu = ANCHOR_VNIC_MIN_MTU;
 509                 mac->m_max_sdu = ANCHOR_VNIC_MAX_MTU;
 510         }
 511 
 512         mac->m_margin = vnic->vn_margin;
 513 
 514         err = mac_register(mac, &vnic->vn_mh);
 515         mac_free(mac);
 516         if (err != 0) {
 517                 VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
 518                     vnic->vn_margin) == 0);
 519                 goto bail;
 520         }
 521 
 522         /* Set the VNIC's MAC in the client */
 523         if (!is_anchor)
 524                 mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh, mrp);
 525 
 526         err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp));
 527         if (err != 0) {
 528                 VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
 529                     vnic->vn_margin) == 0);
 530                 (void) mac_unregister(vnic->vn_mh);
 531                 goto bail;
 532         }
 533 
 534         /* add new VNIC to hash table */
 535         err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id),
 536             (mod_hash_val_t)vnic);
 537         ASSERT(err == 0);
 538         vnic_count++;
 539 
 540         vnic->vn_enabled = B_TRUE;
 541         rw_exit(&vnic_lock);
 542 
 543         return (0);
 544 
 545 bail:
 546         rw_exit(&vnic_lock);
 547         if (!is_anchor) {
 548                 if (vnic->vn_mnh != NULL)
 549                         (void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
 550                 if (vnic->vn_muh != NULL)
 551                         (void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
 552                 if (vnic->vn_mch != NULL)
 553                         mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
 554                 if (vnic->vn_lower_mh != NULL)
 555                         mac_close(vnic->vn_lower_mh);
 556         }
 557 
 558         kmem_cache_free(vnic_cache, vnic);
 559         return (err);
 560 }
 561 
 562 /*
 563  * Modify the properties of an existing VNIC.
 564  */
 565 /* ARGSUSED */
 566 int
 567 vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask,
 568     vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr,
 569     uint_t mac_slot, mac_resource_props_t *mrp)
 570 {
 571         vnic_t *vnic = NULL;
 572 
 573         rw_enter(&vnic_lock, RW_WRITER);
 574 
 575         if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
 576             (mod_hash_val_t *)&vnic) != 0) {
 577                 rw_exit(&vnic_lock);
 578                 return (ENOENT);
 579         }
 580 
 581         rw_exit(&vnic_lock);
 582 
 583         return (0);
 584 }
 585 
 586 /* ARGSUSED */
 587 int
 588 vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags, cred_t *credp)
 589 {
 590         vnic_t *vnic = NULL;
 591         mod_hash_val_t val;
 592         datalink_id_t tmpid;
 593         int rc;
 594 
 595         rw_enter(&vnic_lock, RW_WRITER);
 596 
 597         if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
 598             (mod_hash_val_t *)&vnic) != 0) {
 599                 rw_exit(&vnic_lock);
 600                 return (ENOENT);
 601         }
 602 
 603         if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid, B_TRUE)) != 0) {
 604                 rw_exit(&vnic_lock);
 605                 return (rc);
 606         }
 607 
 608         ASSERT(vnic_id == tmpid);
 609 
 610         /*
 611          * We cannot unregister the MAC yet. Unregistering would
 612          * free up mac_impl_t which should not happen at this time.
 613          * So disable mac_impl_t by calling mac_disable(). This will prevent
 614          * any new claims on mac_impl_t.
 615          */
 616         if ((rc = mac_disable(vnic->vn_mh)) != 0) {
 617                 (void) dls_devnet_create(vnic->vn_mh, vnic_id,
 618                     crgetzoneid(credp));
 619                 rw_exit(&vnic_lock);
 620                 return (rc);
 621         }
 622 
 623         vnic->vn_enabled = B_FALSE;
 624         (void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val);
 625         ASSERT(vnic == (vnic_t *)val);
 626         vnic_count--;
 627         rw_exit(&vnic_lock);
 628 
 629         /*
 630          * XXX-nicolas shouldn't have a void cast here, if it's
 631          * expected that the function will never fail, then we should
 632          * have an ASSERT().
 633          */
 634         (void) mac_unregister(vnic->vn_mh);
 635 
 636         if (vnic->vn_lower_mh != NULL) {
 637                 /*
 638                  * Check if MAC address for the vnic was obtained from the
 639                  * factory MAC addresses. If yes, release it.
 640                  */
 641                 if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
 642                         (void) mac_addr_factory_release(vnic->vn_mch,
 643                             vnic->vn_slot_id);
 644                 }
 645                 (void) mac_margin_remove(vnic->vn_lower_mh, vnic->vn_margin);
 646                 (void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
 647                 (void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
 648                 mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
 649                 mac_close(vnic->vn_lower_mh);
 650         }
 651 
 652         kmem_cache_free(vnic_cache, vnic);
 653         return (0);
 654 }
 655 
 656 /* ARGSUSED */
 657 mblk_t *
 658 vnic_m_tx(void *arg, mblk_t *mp_chain)
 659 {
 660         /*
 661          * This function could be invoked for an anchor VNIC when sending
 662          * broadcast and multicast packets, and unicast packets which did
 663          * not match any local known destination.
 664          */
 665         freemsgchain(mp_chain);
 666         return (NULL);
 667 }
 668 
 669 /*ARGSUSED*/
 670 static void
 671 vnic_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
 672 {
 673         miocnak(q, mp, 0, ENOTSUP);
 674 }
 675 
 676 /*
 677  * This entry point cannot be passed-through, since it is invoked
 678  * for the per-VNIC kstats which must be exported independently
 679  * of the existence of VNIC MAC clients.
 680  */
 681 static int
 682 vnic_m_stat(void *arg, uint_t stat, uint64_t *val)
 683 {
 684         vnic_t *vnic = arg;
 685         int rval = 0;
 686 
 687         if (vnic->vn_lower_mh == NULL) {
 688                 /*
 689                  * It's an anchor VNIC, which does not have any
 690                  * statistics in itself.
 691                  */
 692                 return (ENOTSUP);
 693         }
 694 
 695         /*
 696          * ENOTSUP must be reported for unsupported stats, the VNIC
 697          * driver reports a subset of the stats that would
 698          * be returned by a real piece of hardware.
 699          */
 700 
 701         switch (stat) {
 702         case MAC_STAT_LINK_STATE:
 703         case MAC_STAT_LINK_UP:
 704         case MAC_STAT_PROMISC:
 705         case MAC_STAT_IFSPEED:
 706         case MAC_STAT_MULTIRCV:
 707         case MAC_STAT_MULTIXMT:
 708         case MAC_STAT_BRDCSTRCV:
 709         case MAC_STAT_BRDCSTXMT:
 710         case MAC_STAT_OPACKETS:
 711         case MAC_STAT_OBYTES:
 712         case MAC_STAT_IERRORS:
 713         case MAC_STAT_OERRORS:
 714         case MAC_STAT_RBYTES:
 715         case MAC_STAT_IPACKETS:
 716                 *val = mac_client_stat_get(vnic->vn_mch, stat);
 717                 break;
 718         default:
 719                 rval = ENOTSUP;
 720         }
 721 
 722         return (rval);
 723 }
 724 
 725 /*
 726  * Invoked by the upper MAC to retrieve the lower MAC client handle
 727  * corresponding to a VNIC. A pointer to this function is obtained
 728  * by the upper MAC via capability query.
 729  *
 730  * XXX-nicolas Note: this currently causes all VNIC MAC clients to
 731  * receive the same MAC client handle for the same VNIC. This is ok
 732  * as long as we have only one VNIC MAC client which sends and
 733  * receives data, but we don't currently enforce this at the MAC layer.
 734  */
 735 static void *
 736 vnic_mac_client_handle(void *vnic_arg)
 737 {
 738         vnic_t *vnic = vnic_arg;
 739 
 740         return (vnic->vn_mch);
 741 }
 742 
 743 
 744 /*
 745  * Return information about the specified capability.
 746  */
 747 /* ARGSUSED */
 748 static boolean_t
 749 vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
 750 {
 751         vnic_t *vnic = arg;
 752 
 753         switch (cap) {
 754         case MAC_CAPAB_HCKSUM: {
 755                 uint32_t *hcksum_txflags = cap_data;
 756 
 757                 *hcksum_txflags = vnic->vn_hcksum_txflags &
 758                     (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM |
 759                     HCKSUM_INET_PARTIAL);
 760                 break;
 761         }
 762         case MAC_CAPAB_VNIC: {
 763                 mac_capab_vnic_t *vnic_capab = cap_data;
 764 
 765                 if (vnic->vn_lower_mh == NULL) {
 766                         /*
 767                          * It's an anchor VNIC, we don't have an underlying
 768                          * NIC and MAC client handle.
 769                          */
 770                         return (B_FALSE);
 771                 }
 772 
 773                 if (vnic_capab != NULL) {
 774                         vnic_capab->mcv_arg = vnic;
 775                         vnic_capab->mcv_mac_client_handle =
 776                             vnic_mac_client_handle;
 777                 }
 778                 break;
 779         }
 780         case MAC_CAPAB_ANCHOR_VNIC: {
 781                 /* since it's an anchor VNIC we don't have lower mac handle */
 782                 if (vnic->vn_lower_mh == NULL) {
 783                         ASSERT(vnic->vn_link_id == 0);
 784                         return (B_TRUE);
 785                 }
 786                 return (B_FALSE);
 787         }
 788         case MAC_CAPAB_NO_NATIVEVLAN:
 789                 return (B_FALSE);
 790         case MAC_CAPAB_NO_ZCOPY:
 791                 return (B_TRUE);
 792         case MAC_CAPAB_VRRP: {
 793                 mac_capab_vrrp_t *vrrp_capab = cap_data;
 794 
 795                 if (vnic->vn_vrid != 0) {
 796                         if (vrrp_capab != NULL)
 797                                 vrrp_capab->mcv_af = vnic->vn_af;
 798                         return (B_TRUE);
 799                 }
 800                 return (B_FALSE);
 801         }
 802         default:
 803                 return (B_FALSE);
 804         }
 805         return (B_TRUE);
 806 }
 807 
 808 /* ARGSUSED */
 809 static int
 810 vnic_m_start(void *arg)
 811 {
 812         return (0);
 813 }
 814 
 815 /* ARGSUSED */
 816 static void
 817 vnic_m_stop(void *arg)
 818 {
 819 }
 820 
 821 /* ARGSUSED */
 822 static int
 823 vnic_m_promisc(void *arg, boolean_t on)
 824 {
 825         return (0);
 826 }
 827 
 828 /* ARGSUSED */
 829 static int
 830 vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
 831 {
 832         return (0);
 833 }
 834 
 835 static int
 836 vnic_m_unicst(void *arg, const uint8_t *macaddr)
 837 {
 838         vnic_t *vnic = arg;
 839 
 840         return (mac_vnic_unicast_set(vnic->vn_mch, macaddr));
 841 }
 842 
 843 /*
 844  * Callback functions for set/get of properties
 845  */
 846 /*ARGSUSED*/
 847 static int
 848 vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
 849     uint_t pr_valsize, const void *pr_val)
 850 {
 851         int             err = 0;
 852         vnic_t          *vn = m_driver;
 853 
 854         switch (pr_num) {
 855         case MAC_PROP_MTU: {
 856                 uint32_t        mtu;
 857 
 858                 /* allow setting MTU only on an etherstub */
 859                 if (vn->vn_link_id != DATALINK_INVALID_LINKID) {
 860                         err = ENOTSUP;
 861                         break;
 862                 }
 863 
 864                 if (pr_valsize < sizeof (mtu)) {
 865                         err = EINVAL;
 866                         break;
 867                 }
 868                 bcopy(pr_val, &mtu, sizeof (mtu));
 869                 if (mtu < ANCHOR_VNIC_MIN_MTU || mtu > ANCHOR_VNIC_MAX_MTU) {
 870                         err = EINVAL;
 871                         break;
 872                 }
 873                 err = mac_maxsdu_update(vn->vn_mh, mtu);
 874                 break;
 875         }
 876         case MAC_PROP_VN_PROMISC_FILTERED: {
 877                 boolean_t filtered;
 878 
 879                 if (pr_valsize < sizeof (filtered)) {
 880                         err = EINVAL;
 881                         break;
 882                 }
 883 
 884                 bcopy(pr_val, &filtered, sizeof (filtered));
 885                 mac_set_promisc_filtered(vn->vn_mch, filtered);
 886         }
 887         default:
 888                 err = ENOTSUP;
 889                 break;
 890         }
 891         return (err);
 892 }
 893 
 894 static int
 895 vnic_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
 896     uint_t pr_valsize, void *pr_val)
 897 {
 898         vnic_t          *vn = arg;
 899         int             ret = 0;
 900         boolean_t       out;
 901 
 902         switch (pr_num) {
 903         case MAC_PROP_VN_PROMISC_FILTERED:
 904                 out = mac_get_promisc_filtered(vn->vn_mch);
 905                 ASSERT(pr_valsize >= sizeof (boolean_t));
 906                 bcopy(&out, pr_val, sizeof (boolean_t));
 907                 break;
 908         default:
 909                 ret = EINVAL;
 910                 break;
 911         }
 912 
 913         return (ret);
 914 }
 915 
 916 /* ARGSUSED */
 917 static void vnic_m_propinfo(void *m_driver, const char *pr_name,
 918     mac_prop_id_t pr_num, mac_prop_info_handle_t prh)
 919 {
 920         vnic_t          *vn = m_driver;
 921 
 922         /* MTU setting allowed only on an etherstub */
 923         if (vn->vn_link_id != DATALINK_INVALID_LINKID)
 924                 return;
 925 
 926         switch (pr_num) {
 927         case MAC_PROP_MTU:
 928                 mac_prop_info_set_range_uint32(prh,
 929                     ANCHOR_VNIC_MIN_MTU, ANCHOR_VNIC_MAX_MTU);
 930                 break;
 931         }
 932 }
 933 
 934 
 935 int
 936 vnic_info(vnic_info_t *info, cred_t *credp)
 937 {
 938         vnic_t          *vnic;
 939         int             err;
 940 
 941         /* Make sure that the VNIC link is visible from the caller's zone. */
 942         if (!dls_devnet_islinkvisible(info->vn_vnic_id, crgetzoneid(credp)))
 943                 return (ENOENT);
 944 
 945         rw_enter(&vnic_lock, RW_WRITER);
 946 
 947         err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(info->vn_vnic_id),
 948             (mod_hash_val_t *)&vnic);
 949         if (err != 0) {
 950                 rw_exit(&vnic_lock);
 951                 return (ENOENT);
 952         }
 953 
 954         info->vn_link_id = vnic->vn_link_id;
 955         info->vn_mac_addr_type = vnic->vn_addr_type;
 956         info->vn_mac_len = vnic->vn_addr_len;
 957         bcopy(vnic->vn_addr, info->vn_mac_addr, MAXMACADDRLEN);
 958         info->vn_mac_slot = vnic->vn_slot_id;
 959         info->vn_mac_prefix_len = 0;
 960         info->vn_vid = vnic->vn_vid;
 961         info->vn_force = vnic->vn_force;
 962         info->vn_vrid = vnic->vn_vrid;
 963         info->vn_af = vnic->vn_af;
 964 
 965         bzero(&info->vn_resource_props, sizeof (mac_resource_props_t));
 966         if (vnic->vn_mch != NULL)
 967                 mac_resource_ctl_get(vnic->vn_mch, &info->vn_resource_props);
 968 
 969         rw_exit(&vnic_lock);
 970         return (0);
 971 }
 972 
 973 static void
 974 vnic_notify_cb(void *arg, mac_notify_type_t type)
 975 {
 976         vnic_t *vnic = arg;
 977 
 978         /*
 979          * Do not deliver notifications if the vnic is not fully initialized
 980          * or is in process of being torn down.
 981          */
 982         if (!vnic->vn_enabled)
 983                 return;
 984 
 985         switch (type) {
 986         case MAC_NOTE_UNICST:
 987                 /*
 988                  * Only the VLAN VNIC needs to be notified with primary MAC
 989                  * address change.
 990                  */
 991                 if (vnic->vn_addr_type != VNIC_MAC_ADDR_TYPE_PRIMARY)
 992                         return;
 993 
 994                 /*  the unicast MAC address value */
 995                 mac_unicast_primary_get(vnic->vn_lower_mh, vnic->vn_addr);
 996 
 997                 /* notify its upper layer MAC about MAC address change */
 998                 mac_unicst_update(vnic->vn_mh, (const uint8_t *)vnic->vn_addr);
 999                 break;
1000 
1001         case MAC_NOTE_LINK:
1002                 mac_link_update(vnic->vn_mh,
1003                     mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE));
1004                 break;
1005 
1006         default:
1007                 break;
1008         }
1009 }