illumos-gate New usr/src/uts/common/inet/sockmods/sockmod

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2015 Joyent, Inc. All rights reserved.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/stropts.h>
  31 #include <sys/socket.h>
  32 #include <sys/socketvar.h>
  33 #include <sys/socket_proto.h>
  34 #include <sys/sockio.h>
  35 #include <sys/strsun.h>
  36 #include <sys/kstat.h>
  37 #include <sys/modctl.h>
  38 #include <sys/policy.h>
  39 #include <sys/priv_const.h>
  40 #include <sys/tihdr.h>
  41 #include <sys/zone.h>
  42 #include <sys/time.h>
  43 #include <sys/ethernet.h>
  44 #include <sys/llc1.h>
  45 #include <fs/sockfs/sockcommon.h>
  46 #include <net/if.h>
  47 #include <inet/ip_arp.h>
  48 
  49 #include <sys/dls.h>
  50 #include <sys/mac.h>
  51 #include <sys/mac_client.h>
  52 #include <sys/mac_provider.h>
  53 #include <sys/mac_client_priv.h>
  54 
  55 #include <netpacket/packet.h>
  56 
  57 static void pfp_close(mac_handle_t, mac_client_handle_t);
  58 static int pfp_dl_to_arphrd(int);
  59 static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *,
  60     socklen_t *);
  61 static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *, int);
  62 static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *,
  63     int);
  64 static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *,
  65     cred_t *);
  66 static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t);
  67 static void pfp_release_bpf(struct pfpsock *);
  68 static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t);
  69 static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *,
  70     socklen_t);
  71 static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *,
  72     socklen_t);
  73 
  74 /*
  75  * PFP sockfs operations
  76  * Most are currently no-ops because they have no meaning for a connectionless
  77  * socket.
  78  */
  79 static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t,
  80     sock_upcalls_t *, int, struct cred *);
  81 static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t,
  82     struct cred *);
  83 static int sdpfp_close(sock_lower_handle_t, int, struct cred *);
  84 static void sdpfp_clr_flowctrl(sock_lower_handle_t);
  85 static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *,
  86     socklen_t *, struct cred *);
  87 static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
  88     struct cred *);
  89 static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *,
  90     struct cred *);
  91 static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *,
  92     socklen_t, struct cred *);
  93 
  94 static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **,
  95     uint_t *, int *, int, cred_t *);
  96 
  97 static int sockpfp_init(void);
  98 static void sockpfp_fini(void);
  99 
 100 static kstat_t *pfp_ksp;
 101 static pfp_kstats_t ks_stats;
 102 static pfp_kstats_t pfp_kstats = {
 103         /*
 104          * Each one of these kstats is a different return path in handling
 105          * a packet received from the mac layer.
 106          */
 107         { "recvMacHeaderFail",  KSTAT_DATA_UINT64 },
 108         { "recvBadProtocol",    KSTAT_DATA_UINT64 },
 109         { "recvAllocbFail",     KSTAT_DATA_UINT64 },
 110         { "recvOk",             KSTAT_DATA_UINT64 },
 111         { "recvFail",           KSTAT_DATA_UINT64 },
 112         { "recvFiltered",       KSTAT_DATA_UINT64 },
 113         { "recvFlowControl",    KSTAT_DATA_UINT64 },
 114         /*
 115          * A global set of counters is maintained to track the behaviour
 116          * of the system (kernel & applications) in sending packets.
 117          */
 118         { "sendUnbound",        KSTAT_DATA_UINT64 },
 119         { "sendFailed",         KSTAT_DATA_UINT64 },
 120         { "sendTooBig",         KSTAT_DATA_UINT64 },
 121         { "sendAllocFail",      KSTAT_DATA_UINT64 },
 122         { "sendUiomoveFail",    KSTAT_DATA_UINT64 },
 123         { "sendNoMemory",       KSTAT_DATA_UINT64 },
 124         { "sendOpenFail",       KSTAT_DATA_UINT64 },
 125         { "sendWrongFamily",    KSTAT_DATA_UINT64 },
 126         { "sendShortMsg",       KSTAT_DATA_UINT64 },
 127         { "sendOk",             KSTAT_DATA_UINT64 }
 128 };
 129 
 130 sock_downcalls_t pfp_downcalls = {
 131         sdpfp_activate,
 132         sock_accept_notsupp,
 133         sdpfp_bind,
 134         sock_listen_notsupp,
 135         sock_connect_notsupp,
 136         sock_getpeername_notsupp,
 137         sock_getsockname_notsupp,
 138         sdpfp_getsockopt,
 139         sdpfp_setsockopt,
 140         sock_send_notsupp,
 141         sdpfp_senduio,
 142         NULL,
 143         sock_poll_notsupp,
 144         sock_shutdown_notsupp,
 145         sdpfp_clr_flowctrl,
 146         sdpfp_ioctl,
 147         sdpfp_close,
 148 };
 149 
 150 static smod_reg_t sinfo = {
 151         SOCKMOD_VERSION,
 152         "sockpfp",
 153         SOCK_UC_VERSION,
 154         SOCK_DC_VERSION,
 155         sockpfp_create,
 156         NULL
 157 };
 158 
 159 static int accepted_protos[3][2] = {
 160         { ETH_P_ALL,    0 },
 161         { ETH_P_802_2,  LLC_SNAP_SAP },
 162         { ETH_P_803_3,  0 },
 163 };
 164 
 165 /*
 166  * This sets an upper bound on the size of the receive buffer for a PF_PACKET
 167  * socket. More properly, this should be controlled through ipadm, ala TCP, UDP,
 168  * SCTP, etc. Until that's done, this provides a hard cap of 4 MB and allows an
 169  * opportunity for it to be changed, should it be needed.
 170  */
 171 int sockmod_pfp_rcvbuf_max = 1024 * 1024 * 4;
 172 
 173 /*
 174  * Module linkage information for the kernel.
 175  */
 176 static struct modlsockmod modlsockmod = {
 177         &mod_sockmodops, "PF Packet socket module", &sinfo
 178 };
 179 
 180 static struct modlinkage modlinkage = {
 181         MODREV_1,
 182         { &modlsockmod, NULL }
 183 };
 184 
 185 int
 186 _init(void)
 187 {
 188         int error;
 189 
 190         error = sockpfp_init();
 191         if (error != 0)
 192                 return (error);
 193 
 194         error = mod_install(&modlinkage);
 195         if (error != 0)
 196                 sockpfp_fini();
 197 
 198         return (error);
 199 }
 200 
 201 int
 202 _fini(void)
 203 {
 204         int error;
 205 
 206         error = mod_remove(&modlinkage);
 207         if (error == 0)
 208                 sockpfp_fini();
 209 
 210         return (error);
 211 }
 212 
 213 int
 214 _info(struct modinfo *modinfop)
 215 {
 216         return (mod_info(&modlinkage, modinfop));
 217 }
 218 
 219 /*
 220  * sockpfp_init: called as part of the initialisation of the module when
 221  * loaded into the kernel.
 222  *
 223  * Being able to create and record the kstats data in the kernel is not
 224  * considered to be vital to the operation of this kernel module, thus
 225  * its failure is tolerated.
 226  */
 227 static int
 228 sockpfp_init(void)
 229 {
 230         (void) memset(&ks_stats, 0, sizeof (ks_stats));
 231 
 232         (void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats));
 233 
 234         pfp_ksp = kstat_create("pfpacket", 0, "global", "misc",
 235             KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t),
 236             KSTAT_FLAG_VIRTUAL);
 237         if (pfp_ksp != NULL) {
 238                 pfp_ksp->ks_data = &ks_stats;
 239                 kstat_install(pfp_ksp);
 240         }
 241 
 242         return (0);
 243 }
 244 
 245 /*
 246  * sockpfp_fini: called when the operating system wants to unload the
 247  * socket module from the kernel.
 248  */
 249 static void
 250 sockpfp_fini(void)
 251 {
 252         if (pfp_ksp != NULL)
 253                 kstat_delete(pfp_ksp);
 254 }
 255 
 256 /*
 257  * Due to sockets being created read-write by default, all PF_PACKET sockets
 258  * therefore require the NET_RAWACCESS priviliege, even if the socket is only
 259  * being used for reading packets from.
 260  *
 261  * This create function enforces this module only being used with PF_PACKET
 262  * sockets and the policy that we support via the config file in sock2path.d:
 263  * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW.
 264  */
 265 /* ARGSUSED */
 266 static sock_lower_handle_t
 267 sockpfp_create(int family, int type, int proto,
 268     sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
 269     int sflags, cred_t *cred)
 270 {
 271         struct pfpsock *ps;
 272         int kmflags;
 273         int newproto;
 274         int i;
 275 
 276         if (secpolicy_net_rawaccess(cred) != 0) {
 277                 *errorp = EACCES;
 278                 return (NULL);
 279         }
 280 
 281         if (family != AF_PACKET) {
 282                 *errorp = EAFNOSUPPORT;
 283                 return (NULL);
 284         }
 285 
 286         if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) {
 287                 *errorp = ESOCKTNOSUPPORT;
 288                 return (NULL);
 289         }
 290 
 291         /*
 292          * First check to see if the protocol number passed in via the socket
 293          * creation should be mapped to a different number for internal use.
 294          */
 295         for (i = 0, newproto = -1;
 296             i < sizeof (accepted_protos)/ sizeof (accepted_protos[0]); i++) {
 297                 if (accepted_protos[i][0] == proto) {
 298                         newproto = accepted_protos[i][1];
 299                         break;
 300                 }
 301         }
 302 
 303         /*
 304          * If the mapping of the protocol that was under 0x800 failed to find
 305          * a local equivalent then fail the socket creation. If the protocol
 306          * for the socket is over 0x800 and it was not found in the mapping
 307          * table above, then use the value as is.
 308          */
 309         if (newproto == -1) {
 310                 if (proto < 0x800) {
 311                         *errorp = ENOPROTOOPT;
 312                         return (NULL);
 313                 }
 314                 newproto = proto;
 315         }
 316         proto = newproto;
 317 
 318         kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
 319         ps = kmem_zalloc(sizeof (*ps), kmflags);
 320         if (ps == NULL) {
 321                 *errorp = ENOMEM;
 322                 return (NULL);
 323         }
 324 
 325         ps->ps_type = type;
 326         ps->ps_proto = proto;
 327         rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL);
 328         mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL);
 329 
 330         *sock_downcalls = &pfp_downcalls;
 331         /*
 332          * Setting this causes bytes from a packet that do not fit into the
 333          * destination user buffer to be discarded. Thus the API is one
 334          * packet per receive and callers are required to use a buffer large
 335          * enough for the biggest packet that the interface can provide.
 336          */
 337         *smodep = SM_ATOMIC;
 338 
 339         return ((sock_lower_handle_t)ps);
 340 }
 341 
 342 /* ************************************************************************* */
 343 
 344 /*
 345  * pfp_packet is the callback function that is given to the mac layer for
 346  * PF_PACKET to receive packets with. One packet at a time is passed into
 347  * this function from the mac layer. Each packet is a private copy given
 348  * to PF_PACKET to modify or free as it wishes and does not harm the original
 349  * packet from which it was cloned.
 350  */
 351 /* ARGSUSED */
 352 static void
 353 pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag)
 354 {
 355         struct T_unitdata_ind *tunit;
 356         struct sockaddr_ll *sll;
 357         struct sockaddr_ll *sol;
 358         mac_header_info_t hdr;
 359         struct pfpsock *ps;
 360         size_t tusz;
 361         mblk_t *mp0;
 362         int error;
 363 
 364         if (mp == NULL)
 365                 return;
 366 
 367         ps = arg;
 368         if (ps->ps_flow_ctrld) {
 369                 ps->ps_flow_ctrl_drops++;
 370                 ps->ps_stats.tp_drops++;
 371                 ks_stats.kp_recv_flow_cntrld.value.ui64++;
 372                 freemsg(mp);
 373                 return;
 374         }
 375 
 376         if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) {
 377                 /*
 378                  * Can't decode the packet header information so drop it.
 379                  */
 380                 ps->ps_stats.tp_drops++;
 381                 ks_stats.kp_recv_mac_hdr_fail.value.ui64++;
 382                 freemsg(mp);
 383                 return;
 384         }
 385 
 386         if (mac_type(ps->ps_mh) == DL_ETHER &&
 387             hdr.mhi_bindsap == ETHERTYPE_VLAN) {
 388                 struct ether_vlan_header *evhp;
 389                 struct ether_vlan_header evh;
 390 
 391                 hdr.mhi_hdrsize = sizeof (struct ether_vlan_header);
 392                 hdr.mhi_istagged = B_TRUE;
 393 
 394                 if (MBLKL(mp) >= sizeof (*evhp)) {
 395                         evhp = (struct ether_vlan_header *)mp->b_rptr;
 396                 } else {
 397                         int sz = sizeof (*evhp);
 398                         char *s = (char *)&evh;
 399                         mblk_t *tmp;
 400                         int len;
 401 
 402                         for (tmp = mp; sz > 0 && tmp != NULL;
 403                             tmp = tmp->b_cont) {
 404                                 len = min(sz, MBLKL(tmp));
 405                                 bcopy(tmp->b_rptr, s, len);
 406                                 sz -= len;
 407                         }
 408                         evhp = &evh;
 409                 }
 410                 hdr.mhi_tci = ntohs(evhp->ether_tci);
 411                 hdr.mhi_bindsap = ntohs(evhp->ether_type);
 412         }
 413 
 414         if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) {
 415                 /*
 416                  * The packet is not of interest to this socket so
 417                  * drop it on the floor. Here the SAP is being used
 418                  * as a very course filter.
 419                  */
 420                 ps->ps_stats.tp_drops++;
 421                 ks_stats.kp_recv_bad_proto.value.ui64++;
 422                 freemsg(mp);
 423                 return;
 424         }
 425 
 426         /*
 427          * This field is not often set, even for ethernet,
 428          * by mac_header_info, so compute it if it is 0.
 429          */
 430         if (hdr.mhi_pktsize == 0)
 431                 hdr.mhi_pktsize = msgdsize(mp);
 432 
 433         /*
 434          * If a BPF filter is present, pass the raw packet into that.
 435          * A failed match will result in zero being returned, indicating
 436          * that this socket is not interested in the packet.
 437          */
 438         if (ps->ps_bpf.bf_len != 0) {
 439                 uchar_t *buffer;
 440                 int buflen;
 441 
 442                 buflen = MBLKL(mp);
 443                 if (hdr.mhi_pktsize == buflen) {
 444                         buffer = mp->b_rptr;
 445                 } else {
 446                         buflen = 0;
 447                         buffer = (uchar_t *)mp;
 448                 }
 449                 rw_enter(&ps->ps_bpflock, RW_READER);
 450                 if (bpf_filter(ps->ps_bpf.bf_insns, buffer,
 451                     hdr.mhi_pktsize, buflen) == 0) {
 452                         rw_exit(&ps->ps_bpflock);
 453                         ps->ps_stats.tp_drops++;
 454                         ks_stats.kp_recv_filtered.value.ui64++;
 455                         freemsg(mp);
 456                         return;
 457                 }
 458                 rw_exit(&ps->ps_bpflock);
 459         }
 460 
 461         if (ps->ps_type == SOCK_DGRAM) {
 462                 /*
 463                  * SOCK_DGRAM socket expect a "layer 3" packet, so advance
 464                  * past the link layer header.
 465                  */
 466                 mp->b_rptr += hdr.mhi_hdrsize;
 467                 hdr.mhi_pktsize -= hdr.mhi_hdrsize;
 468         }
 469 
 470         tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll);
 471         if (ps->ps_auxdata) {
 472                 tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
 473                 tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr));
 474         }
 475 
 476         /*
 477          * It is tempting to think that this could be optimised by having
 478          * the base mblk_t allocated and hung off the pfpsock structure,
 479          * except that then another one would need to be allocated for the
 480          * sockaddr_ll that is included. Even creating a template to copy
 481          * from is of questionable value, as read-write from one structure
 482          * to the other is going to be slower than all of the initialisation.
 483          */
 484         mp0 = allocb(tusz, BPRI_HI);
 485         if (mp0 == NULL) {
 486                 ps->ps_stats.tp_drops++;
 487                 ks_stats.kp_recv_alloc_fail.value.ui64++;
 488                 freemsg(mp);
 489                 return;
 490         }
 491 
 492         (void) memset(mp0->b_rptr, 0, tusz);
 493 
 494         mp0->b_datap->db_type = M_PROTO;
 495         mp0->b_wptr = mp0->b_rptr + tusz;
 496 
 497         tunit = (struct T_unitdata_ind *)mp0->b_rptr;
 498         tunit->PRIM_type = T_UNITDATA_IND;
 499         tunit->SRC_length = sizeof (struct sockaddr);
 500         tunit->SRC_offset = sizeof (*tunit);
 501 
 502         sol = &ps->ps_sock;
 503         sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit));
 504         sll->sll_ifindex = sol->sll_ifindex;
 505         sll->sll_hatype = (uint16_t)hdr.mhi_origsap;
 506         sll->sll_halen = sol->sll_halen;
 507         if (hdr.mhi_saddr != NULL)
 508                 (void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen);
 509 
 510         switch (hdr.mhi_dsttype) {
 511         case MAC_ADDRTYPE_MULTICAST :
 512                 sll->sll_pkttype = PACKET_MULTICAST;
 513                 break;
 514         case MAC_ADDRTYPE_BROADCAST :
 515                 sll->sll_pkttype = PACKET_BROADCAST;
 516                 break;
 517         case MAC_ADDRTYPE_UNICAST :
 518                 if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0)
 519                         sll->sll_pkttype = PACKET_HOST;
 520                 else
 521                         sll->sll_pkttype = PACKET_OTHERHOST;
 522                 break;
 523         }
 524 
 525         if (ps->ps_auxdata) {
 526                 struct tpacket_auxdata *aux;
 527                 struct T_opthdr *topt;
 528 
 529                 tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset +
 530                     sizeof (struct sockaddr_ll));
 531                 tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) +
 532                     _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
 533 
 534                 topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset);
 535                 aux = (struct tpacket_auxdata *)
 536                     ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt)));
 537 
 538                 topt->len = tunit->OPT_length;
 539                 topt->level = SOL_PACKET;
 540                 topt->name = PACKET_AUXDATA;
 541                 topt->status = 0;
 542                 /*
 543                  * libpcap doesn't seem to use any other field,
 544                  * so it isn't clear how they should be filled in.
 545                  */
 546                 aux->tp_vlan_vci = hdr.mhi_tci;
 547         }
 548 
 549         linkb(mp0, mp);
 550 
 551         (void) gethrestime(&ps->ps_timestamp);
 552 
 553         ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0,
 554             &error, NULL);
 555 
 556         if (error == 0) {
 557                 ps->ps_stats.tp_packets++;
 558                 ks_stats.kp_recv_ok.value.ui64++;
 559         } else {
 560                 mutex_enter(&ps->ps_lock);
 561                 if (error == ENOSPC) {
 562                         ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0,
 563                             &error, NULL);
 564                         if (error == ENOSPC)
 565                                 ps->ps_flow_ctrld = B_TRUE;
 566                 }
 567                 mutex_exit(&ps->ps_lock);
 568                 ps->ps_stats.tp_drops++;
 569                 ks_stats.kp_recv_fail.value.ui64++;
 570         }
 571 }
 572 
 573 /*
 574  * Bind a PF_PACKET socket to a network interface.
 575  *
 576  * The default operation of this bind() is to place the socket (and thus the
 577  * network interface) into promiscuous mode. It is then up to the application
 578  * to turn that down by issuing the relevant ioctls, if desired.
 579  */
 580 static int
 581 sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr,
 582     socklen_t addrlen, struct cred *cred)
 583 {
 584         struct sockaddr_ll *addr_ll, *sol;
 585         mac_client_handle_t mch;
 586         struct pfpsock *ps;
 587         mac_handle_t mh;
 588         int error;
 589 
 590         ps = (struct pfpsock *)handle;
 591         if (ps->ps_bound)
 592                 return (EINVAL);
 593 
 594         if (addrlen < sizeof (struct sockaddr_ll) || addr == NULL)
 595                 return (EINVAL);
 596 
 597         addr_ll = (struct sockaddr_ll *)addr;
 598 
 599         error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred);
 600         if (error != 0)
 601                 return (error);
 602         /*
 603          * Ensure that each socket is only bound once.
 604          */
 605         mutex_enter(&ps->ps_lock);
 606         if (ps->ps_mh != 0) {
 607                 mutex_exit(&ps->ps_lock);
 608                 pfp_close(mh, mch);
 609                 return (EADDRINUSE);
 610         }
 611         ps->ps_mh = mh;
 612         ps->ps_mch = mch;
 613         mutex_exit(&ps->ps_lock);
 614 
 615         /*
 616          * Cache all of the information from bind so that it's in an easy
 617          * place to get at when packets are received.
 618          */
 619         sol = &ps->ps_sock;
 620         sol->sll_family = AF_PACKET;
 621         sol->sll_ifindex = addr_ll->sll_ifindex;
 622         sol->sll_protocol = addr_ll->sll_protocol;
 623         sol->sll_halen = mac_addr_len(ps->ps_mh);
 624         mac_unicast_primary_get(ps->ps_mh, sol->sll_addr);
 625         mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu);
 626         ps->ps_linkid = addr_ll->sll_ifindex;
 627 
 628         error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL,
 629             pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
 630         if (error == 0) {
 631                 ps->ps_promisc = MAC_CLIENT_PROMISC_ALL;
 632                 ps->ps_bound = B_TRUE;
 633         }
 634 
 635         return (error);
 636 }
 637 
 638 /* ARGSUSED */
 639 static void
 640 sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper,
 641     sock_upcalls_t *upcalls, int flags, cred_t *cred)
 642 {
 643         struct pfpsock *ps;
 644 
 645         ps = (struct pfpsock *)lower;
 646         ps->ps_upper = upper;
 647         ps->ps_upcalls = upcalls;
 648 }
 649 
 650 /*
 651  * This module only implements getting socket options for the new socket
 652  * option level (SOL_PACKET) that it introduces. All other requests are
 653  * passed back to the sockfs layer.
 654  */
 655 /* ARGSUSED */
 656 static int
 657 sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name,
 658     void *optval, socklen_t *optlenp, struct cred *cred)
 659 {
 660         struct pfpsock *ps;
 661         int error = 0;
 662 
 663         ps = (struct pfpsock *)handle;
 664 
 665         switch (level) {
 666         case SOL_PACKET :
 667                 error = pfp_getpacket_sockopt(handle, option_name, optval,
 668                     optlenp);
 669                 break;
 670 
 671         case SOL_SOCKET :
 672                 if (option_name == SO_RCVBUF) {
 673                         if (*optlenp < sizeof (int32_t))
 674                                 return (EINVAL);
 675                         *((int32_t *)optval) = ps->ps_rcvbuf;
 676                         *optlenp = sizeof (int32_t);
 677                 } else {
 678                         error = ENOPROTOOPT;
 679                 }
 680                 break;
 681 
 682         default :
 683                 /*
 684                  * If sockfs code receives this error in return from the
 685                  * getsockopt downcall it handles the option locally, if
 686                  * it can.
 687                  */
 688                 error = ENOPROTOOPT;
 689                 break;
 690         }
 691 
 692         return (error);
 693 }
 694 
 695 /*
 696  * PF_PACKET supports setting socket options at only two levels:
 697  * SOL_SOCKET and SOL_PACKET.
 698  */
 699 /* ARGSUSED */
 700 static int
 701 sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name,
 702     const void *optval, socklen_t optlen, struct cred *cred)
 703 {
 704         int error = 0;
 705 
 706         switch (level) {
 707         case SOL_SOCKET :
 708                 error = pfp_setsocket_sockopt(handle, option_name, optval,
 709                     optlen);
 710                 break;
 711         case SOL_PACKET :
 712                 error = pfp_setpacket_sockopt(handle, option_name, optval,
 713                     optlen);
 714                 break;
 715         default :
 716                 error = EINVAL;
 717                 break;
 718         }
 719 
 720         return (error);
 721 }
 722 
 723 /*
 724  * This function is incredibly inefficient for sending any packet that
 725  * comes with a msghdr asking to be sent to an interface to which the
 726  * socket has not been bound. Some possibilities here are keeping a
 727  * cache of all open mac's and mac_client's, for the purpose of sending,
 728  * and closing them after some amount of inactivity. Clearly, applications
 729  * should not be written to use one socket for multiple interfaces if
 730  * performance is desired with the code as is.
 731  */
 732 /* ARGSUSED */
 733 static int
 734 sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop,
 735     struct nmsghdr *msg, struct cred *cred)
 736 {
 737         struct sockaddr_ll *sol;
 738         mac_client_handle_t mch;
 739         struct pfpsock *ps;
 740         boolean_t new_open;
 741         mac_handle_t mh;
 742         size_t mpsize;
 743         uint_t maxsdu;
 744         mblk_t *mp0;
 745         mblk_t *mp;
 746         int error;
 747 
 748         mp = NULL;
 749         mp0 = NULL;
 750         new_open = B_FALSE;
 751         ps = (struct pfpsock *)handle;
 752         mh = ps->ps_mh;
 753         mch = ps->ps_mch;
 754         maxsdu = ps->ps_max_sdu;
 755 
 756         sol = (struct sockaddr_ll *)msg->msg_name;
 757         if (sol == NULL) {
 758                 /*
 759                  * If no sockaddr_ll has been provided with the send call,
 760                  * use the one constructed when the socket was bound to an
 761                  * interface and fail if it hasn't been bound.
 762                  */
 763                 if (!ps->ps_bound) {
 764                         ks_stats.kp_send_unbound.value.ui64++;
 765                         return (EPROTO);
 766                 }
 767                 sol = &ps->ps_sock;
 768         } else {
 769                 /*
 770                  * Verify the sockaddr_ll message passed down before using
 771                  * it to send a packet out with. If it refers to an interface
 772                  * that has not been bound, it is necessary to open it.
 773                  */
 774                 struct sockaddr_ll *sll;
 775 
 776                 if (msg->msg_namelen < sizeof (struct sockaddr_ll)) {
 777                         ks_stats.kp_send_short_msg.value.ui64++;
 778                         return (EINVAL);
 779                 }
 780 
 781                 if (sol->sll_family != AF_PACKET) {
 782                         ks_stats.kp_send_wrong_family.value.ui64++;
 783                         return (EAFNOSUPPORT);
 784                 }
 785 
 786                 sll = &ps->ps_sock;
 787                 if (sol->sll_ifindex != sll->sll_ifindex) {
 788                         error = pfp_open_index(sol->sll_ifindex, &mh, &mch,
 789                             cred);
 790                         if (error != 0) {
 791                                 ks_stats.kp_send_open_fail.value.ui64++;
 792                                 return (error);
 793                         }
 794                         mac_sdu_get(mh, NULL, &maxsdu);
 795                         new_open = B_TRUE;
 796                 }
 797         }
 798 
 799         mpsize = uiop->uio_resid;
 800         if (mpsize > maxsdu) {
 801                 ks_stats.kp_send_too_big.value.ui64++;
 802                 error = EMSGSIZE;
 803                 goto done;
 804         }
 805 
 806         if ((mp = allocb(mpsize, BPRI_HI)) == NULL) {
 807                 ks_stats.kp_send_alloc_fail.value.ui64++;
 808                 error = ENOBUFS;
 809                 goto done;
 810         }
 811 
 812         mp->b_wptr = mp->b_rptr + mpsize;
 813         error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop);
 814         if (error != 0) {
 815                 ks_stats.kp_send_uiomove_fail.value.ui64++;
 816                 goto done;
 817         }
 818 
 819         if (ps->ps_type == SOCK_DGRAM) {
 820                 mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0);
 821                 if (mp0 == NULL) {
 822                         ks_stats.kp_send_no_memory.value.ui64++;
 823                         error = ENOBUFS;
 824                         goto done;
 825                 }
 826                 linkb(mp0, mp);
 827                 mp = mp0;
 828         }
 829 
 830         /*
 831          * As this is sending datagrams and no promise is made about
 832          * how or if a packet will be sent/delivered, no effort is to
 833          * be expended in recovering from a situation where the packet
 834          * cannot be sent - it is just dropped.
 835          */
 836         error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
 837         if (error == 0) {
 838                 mp = NULL;
 839                 ks_stats.kp_send_ok.value.ui64++;
 840         } else {
 841                 ks_stats.kp_send_failed.value.ui64++;
 842         }
 843 
 844 done:
 845 
 846         if (new_open) {
 847                 ASSERT(mch != ps->ps_mch);
 848                 ASSERT(mh != ps->ps_mh);
 849                 pfp_close(mh, mch);
 850         }
 851         if (mp != NULL)
 852                 freemsg(mp);
 853 
 854         return (error);
 855 
 856 }
 857 
 858 /*
 859  * There's no use of a lock here, or at the bottom of pfp_packet() where
 860  * ps_flow_ctrld is set to true, because in a situation where these two
 861  * are racing to set the flag one way or the other, the end result is
 862  * going to be ultimately determined by the scheduler anyway - which of
 863  * the two threads gets the lock first? In such an operational environment,
 864  * we've got packets arriving too fast to be delt with so packets are going
 865  * to be dropped. Grabbing a lock just makes the drop more expensive.
 866  */
 867 static void
 868 sdpfp_clr_flowctrl(sock_lower_handle_t handle)
 869 {
 870         struct pfpsock *ps;
 871 
 872         ps = (struct pfpsock *)handle;
 873 
 874         mutex_enter(&ps->ps_lock);
 875         ps->ps_flow_ctrld = B_FALSE;
 876         mutex_exit(&ps->ps_lock);
 877 }
 878 
 879 /*
 880  * The implementation of this ioctl() handler is intended to function
 881  * in the absence of a bind() being made before it is called. Thus the
 882  * function calls mac_open() itself to provide a handle
 883  * This function is structured like this:
 884  * - determine the linkid for the interface being targetted
 885  * - open the interface with said linkid
 886  * - perform ioctl
 887  * - copy results back to caller
 888  *
 889  * The ioctls that interact with interface flags have been implented below
 890  * to assume that the interface is always up and running (IFF_RUNNING) and
 891  * to use the state of this socket to determine whether or not the network
 892  * interface is in promiscuous mode. Thus an ioctl to get the interface flags
 893  * of an interface that has been put in promiscuous mode by another socket
 894  * (in the same program or different), will not report that status.
 895  */
 896 /* ARGSUSED */
 897 static int
 898 sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod,
 899     int32_t *rval, struct cred *cr)
 900 {
 901         struct timeval tival;
 902         mac_client_promisc_type_t mtype;
 903         struct sockaddr_dl *sock;
 904         datalink_id_t linkid;
 905         struct lifreq lifreq;
 906         struct ifreq ifreq;
 907         struct pfpsock *ps;
 908         mac_handle_t mh;
 909         int error;
 910 
 911         ps = (struct pfpsock *)handle;
 912 
 913         switch (cmd) {
 914         /*
 915          * ioctls that work on "struct lifreq"
 916          */
 917         case SIOCSLIFFLAGS :
 918         case SIOCGLIFINDEX :
 919         case SIOCGLIFFLAGS :
 920         case SIOCGLIFMTU :
 921         case SIOCGLIFHWADDR :
 922                 error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid, mod);
 923                 if (error != 0)
 924                         return (error);
 925                 break;
 926 
 927         /*
 928          * ioctls that work on "struct ifreq".
 929          * Not all of these have a "struct lifreq" partner, for example
 930          * SIOCGIFHWADDR, for the simple reason that the logical interface
 931          * does not have a hardware address.
 932          */
 933         case SIOCSIFFLAGS :
 934         case SIOCGIFINDEX :
 935         case SIOCGIFFLAGS :
 936         case SIOCGIFMTU :
 937         case SIOCGIFHWADDR :
 938                 error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid, mod);
 939                 if (error != 0)
 940                         return (error);
 941                 break;
 942 
 943         case SIOCGSTAMP :
 944                 tival.tv_sec = (time_t)ps->ps_timestamp.tv_sec;
 945                 tival.tv_usec = ps->ps_timestamp.tv_nsec / 1000;
 946                 if (get_udatamodel() == DATAMODEL_NATIVE) {
 947                         error = ddi_copyout(&tival, (void *)arg,
 948                             sizeof (tival), mod);
 949                 }
 950 #ifdef _SYSCALL32_IMPL
 951                 else {
 952                         struct timeval32 tv32;
 953                         TIMEVAL_TO_TIMEVAL32(&tv32, &tival);
 954                         error = ddi_copyout(&tv32, (void *)arg,
 955                             sizeof (tv32), mod);
 956                 }
 957 #endif
 958                 return (error);
 959         }
 960 
 961         error =  mac_open_by_linkid(linkid, &mh);
 962         if (error != 0)
 963                 return (error);
 964 
 965         switch (cmd) {
 966         case SIOCGLIFINDEX :
 967                 lifreq.lifr_index = linkid;
 968                 break;
 969 
 970         case SIOCGIFINDEX :
 971                 ifreq.ifr_index = linkid;
 972                 break;
 973 
 974         case SIOCGIFFLAGS :
 975                 ifreq.ifr_flags = IFF_RUNNING;
 976                 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
 977                         ifreq.ifr_flags |= IFF_PROMISC;
 978                 break;
 979 
 980         case SIOCGLIFFLAGS :
 981                 lifreq.lifr_flags = IFF_RUNNING;
 982                 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
 983                         lifreq.lifr_flags |= IFF_PROMISC;
 984                 break;
 985 
 986         case SIOCSIFFLAGS :
 987                 if (linkid != ps->ps_linkid) {
 988                         error = EINVAL;
 989                 } else {
 990                         if ((ifreq.ifr_flags & IFF_PROMISC) != 0)
 991                                 mtype = MAC_CLIENT_PROMISC_ALL;
 992                         else
 993                                 mtype = MAC_CLIENT_PROMISC_FILTERED;
 994                         error = pfp_set_promisc(ps, mtype);
 995                 }
 996                 break;
 997 
 998         case SIOCSLIFFLAGS :
 999                 if (linkid != ps->ps_linkid) {
1000                         error = EINVAL;
1001                 } else {
1002                         if ((lifreq.lifr_flags & IFF_PROMISC) != 0)
1003                                 mtype = MAC_CLIENT_PROMISC_ALL;
1004                         else
1005                                 mtype = MAC_CLIENT_PROMISC_FILTERED;
1006                         error = pfp_set_promisc(ps, mtype);
1007                 }
1008                 break;
1009 
1010         case SIOCGIFMTU :
1011                 mac_sdu_get(mh, NULL, &ifreq.ifr_mtu);
1012                 break;
1013 
1014         case SIOCGLIFMTU :
1015                 mac_sdu_get(mh, NULL, &lifreq.lifr_mtu);
1016                 break;
1017 
1018         case SIOCGIFHWADDR :
1019                 if (mac_addr_len(mh) > sizeof (ifreq.ifr_addr.sa_data)) {
1020                         error = EPFNOSUPPORT;
1021                         break;
1022                 }
1023 
1024                 if (mac_addr_len(mh) == 0) {
1025                         (void) memset(ifreq.ifr_addr.sa_data, 0,
1026                             sizeof (ifreq.ifr_addr.sa_data));
1027                 } else {
1028                         mac_unicast_primary_get(mh,
1029                             (uint8_t *)ifreq.ifr_addr.sa_data);
1030                 }
1031 
1032                 /*
1033                  * The behaviour here in setting sa_family is consistent
1034                  * with what applications such as tcpdump would expect
1035                  * for a Linux PF_PACKET socket.
1036                  */
1037                 ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh));
1038                 break;
1039 
1040         case SIOCGLIFHWADDR :
1041                 lifreq.lifr_type = 0;
1042                 sock = (struct sockaddr_dl *)&lifreq.lifr_addr;
1043 
1044                 if (mac_addr_len(mh) > sizeof (sock->sdl_data)) {
1045                         error = EPFNOSUPPORT;
1046                         break;
1047                 }
1048 
1049                 /*
1050                  * Fill in the sockaddr_dl with link layer details. Of note,
1051                  * the index is returned as 0 for a couple of reasons:
1052                  * (1) there is no public API that uses or requires it
1053                  * (2) the MAC index is currently 32bits and sdl_index is 16.
1054                  */
1055                 sock->sdl_family = AF_LINK;
1056                 sock->sdl_index = 0;
1057                 sock->sdl_type = mac_type(mh);
1058                 sock->sdl_nlen = 0;
1059                 sock->sdl_alen = mac_addr_len(mh);
1060                 sock->sdl_slen = 0;
1061                 if (mac_addr_len(mh) == 0) {
1062                         (void) memset(sock->sdl_data, 0,
1063                             sizeof (sock->sdl_data));
1064                 } else {
1065                         mac_unicast_primary_get(mh, (uint8_t *)sock->sdl_data);
1066                 }
1067                 break;
1068 
1069         default :
1070                 break;
1071         }
1072 
1073         mac_close(mh);
1074 
1075         if (error == 0) {
1076                 /*
1077                  * Only the "GET" ioctls need to copy data back to userace.
1078                  */
1079                 switch (cmd) {
1080                 case SIOCGLIFINDEX :
1081                 case SIOCGLIFFLAGS :
1082                 case SIOCGLIFMTU :
1083                 case SIOCGLIFHWADDR :
1084                         error = ddi_copyout(&lifreq, (void *)arg,
1085                             sizeof (lifreq), mod);
1086                         break;
1087 
1088                 case SIOCGIFINDEX :
1089                 case SIOCGIFFLAGS :
1090                 case SIOCGIFMTU :
1091                 case SIOCGIFHWADDR :
1092                         error = ddi_copyout(&ifreq, (void *)arg,
1093                             sizeof (ifreq), mod);
1094                         break;
1095                 default :
1096                         break;
1097                 }
1098         }
1099 
1100         return (error);
1101 }
1102 
1103 /*
1104  * Closing the socket requires that all open references to network
1105  * interfaces be closed.
1106  */
1107 /* ARGSUSED */
1108 static int
1109 sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr)
1110 {
1111         struct pfpsock *ps = (struct pfpsock *)handle;
1112 
1113         if (ps->ps_phd != 0) {
1114                 mac_promisc_remove(ps->ps_phd);
1115                 ps->ps_phd = 0;
1116         }
1117 
1118         if (ps->ps_mch != 0) {
1119                 mac_client_close(ps->ps_mch, 0);
1120                 ps->ps_mch = 0;
1121         }
1122 
1123         if (ps->ps_mh != 0) {
1124                 mac_close(ps->ps_mh);
1125                 ps->ps_mh = 0;
1126         }
1127 
1128         kmem_free(ps, sizeof (*ps));
1129 
1130         return (0);
1131 }
1132 
1133 /* ************************************************************************* */
1134 
1135 /*
1136  * Given a pointer (arg) to a "struct ifreq" (potentially in user space),
1137  * determine the linkid for the interface name stored in that structure.
1138  * name is used as a buffer so that we can ensure a trailing \0 is appended
1139  * to the name safely.
1140  */
1141 static int
1142 pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp,
1143     datalink_id_t *linkidp, int mode)
1144 {
1145         char name[IFNAMSIZ + 1];
1146         int error;
1147 
1148         if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), mode) != 0)
1149                 return (EFAULT);
1150 
1151         (void) strlcpy(name, ifreqp->ifr_name, sizeof (name));
1152 
1153         error = dls_mgmt_get_linkid(name, linkidp);
1154         if (error != 0)
1155                 error = dls_devnet_macname2linkid(name, linkidp);
1156 
1157         return (error);
1158 }
1159 
1160 /*
1161  * Given a pointer (arg) to a "struct lifreq" (potentially in user space),
1162  * determine the linkid for the interface name stored in that structure.
1163  * name is used as a buffer so that we can ensure a trailing \0 is appended
1164  * to the name safely.
1165  */
1166 static int
1167 pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp,
1168     datalink_id_t *linkidp, int mode)
1169 {
1170         char name[LIFNAMSIZ + 1];
1171         int error;
1172 
1173         if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), mode) != 0)
1174                 return (EFAULT);
1175 
1176         (void) strlcpy(name, lifreqp->lifr_name, sizeof (name));
1177 
1178         error = dls_mgmt_get_linkid(name, linkidp);
1179         if (error != 0)
1180                 error = dls_devnet_macname2linkid(name, linkidp);
1181 
1182         return (error);
1183 }
1184 
1185 /*
1186  * Although there are several new SOL_PACKET options that can be set and
1187  * are specific to this implementation of PF_PACKET, the current API does
1188  * not support doing a get on them to retrieve accompanying status. Thus
1189  * it is only currently possible to use SOL_PACKET with getsockopt to
1190  * retrieve statistical information. This remains consistant with the
1191  * Linux API at the time of writing.
1192  */
1193 static int
1194 pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name,
1195     void *optval, socklen_t *optlenp)
1196 {
1197         struct pfpsock *ps;
1198         struct tpacket_stats_short tpss;
1199         int error = 0;
1200 
1201         ps = (struct pfpsock *)handle;
1202 
1203         switch (option_name) {
1204         case PACKET_STATISTICS :
1205                 if (*optlenp < sizeof (ps->ps_stats)) {
1206                         error = EINVAL;
1207                         break;
1208                 }
1209                 *optlenp = sizeof (ps->ps_stats);
1210                 bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats));
1211                 break;
1212         case PACKET_STATISTICS_SHORT :
1213                 if (*optlenp < sizeof (tpss)) {
1214                         error = EINVAL;
1215                         break;
1216                 }
1217                 *optlenp = sizeof (tpss);
1218                 tpss.tp_packets = ps->ps_stats.tp_packets;
1219                 tpss.tp_drops = ps->ps_stats.tp_drops;
1220                 bcopy(&tpss, optval, sizeof (tpss));
1221                 break;
1222         default :
1223                 error = EINVAL;
1224                 break;
1225         }
1226 
1227         return (error);
1228 }
1229 
1230 /*
1231  * The SOL_PACKET level for socket options supports three options,
1232  * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA.
1233  * This function is responsible for mapping the two socket options
1234  * that manage multicast membership into the appropriate internal
1235  * function calls to bring the option into effect. Whilst direct
1236  * changes to the multicast membership (ADD/DROP) groups is handled
1237  * by calls directly into the mac module, changes to the promiscuos
1238  * mode are vectored through pfp_set_promisc() so that the logic for
1239  * managing the promiscuous mode is in one place.
1240  */
1241 /* ARGSUSED */
1242 static int
1243 pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name,
1244     const void *optval, socklen_t optlen)
1245 {
1246         struct packet_mreq mreq;
1247         struct pfpsock *ps;
1248         int error = 0;
1249         int opt;
1250 
1251         ps = (struct pfpsock *)handle;
1252         if (!ps->ps_bound)
1253                 return (EPROTO);
1254 
1255         if ((option_name == PACKET_ADD_MEMBERSHIP) ||
1256             (option_name == PACKET_DROP_MEMBERSHIP)) {
1257                 if (!ps->ps_bound)
1258                         return (EPROTO);
1259                 bcopy(optval, &mreq, sizeof (mreq));
1260                 if (ps->ps_linkid != mreq.mr_ifindex)
1261                         return (EINVAL);
1262         }
1263 
1264         switch (option_name) {
1265         case PACKET_ADD_MEMBERSHIP :
1266                 switch (mreq.mr_type) {
1267                 case PACKET_MR_MULTICAST :
1268                         if (mreq.mr_alen != ps->ps_sock.sll_halen)
1269                                 return (EINVAL);
1270 
1271                         error = mac_multicast_add(ps->ps_mch, mreq.mr_address);
1272                         break;
1273 
1274                 case PACKET_MR_PROMISC :
1275                         error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL);
1276                         break;
1277 
1278                 case PACKET_MR_ALLMULTI :
1279                         error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI);
1280                         break;
1281                 }
1282                 break;
1283 
1284         case PACKET_DROP_MEMBERSHIP :
1285                 switch (mreq.mr_type) {
1286                 case PACKET_MR_MULTICAST :
1287                         if (mreq.mr_alen != ps->ps_sock.sll_halen)
1288                                 return (EINVAL);
1289 
1290                         mac_multicast_remove(ps->ps_mch, mreq.mr_address);
1291                         break;
1292 
1293                 case PACKET_MR_PROMISC :
1294                         if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL)
1295                                 return (EINVAL);
1296                         error = pfp_set_promisc(ps,
1297                             MAC_CLIENT_PROMISC_FILTERED);
1298                         break;
1299 
1300                 case PACKET_MR_ALLMULTI :
1301                         if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI)
1302                                 return (EINVAL);
1303                         error = pfp_set_promisc(ps,
1304                             MAC_CLIENT_PROMISC_FILTERED);
1305                         break;
1306                 }
1307                 break;
1308 
1309         case PACKET_AUXDATA :
1310                 if (optlen == sizeof (int)) {
1311                         opt = *(int *)optval;
1312                         ps->ps_auxdata = (opt != 0);
1313                 } else {
1314                         error = EINVAL;
1315                 }
1316                 break;
1317         default :
1318                 error = EINVAL;
1319                 break;
1320         }
1321 
1322         return (error);
1323 }
1324 
1325 /*
1326  * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET:
1327  * SO_ATTACH_FILTER and SO_DETACH_FILTER.
1328  *
1329  * Both of these setsockopt values are candidates for being handled by the
1330  * socket layer itself in future, however this requires understanding how
1331  * they would interact with all other sockets.
1332  */
1333 static int
1334 pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name,
1335     const void *optval, socklen_t optlen)
1336 {
1337         struct bpf_program prog;
1338         struct bpf_insn *fcode;
1339         struct pfpsock *ps;
1340         struct sock_proto_props sopp;
1341         int error = 0;
1342         int size;
1343 
1344         ps = (struct pfpsock *)handle;
1345 
1346         switch (option_name) {
1347         case SO_ATTACH_FILTER :
1348 #ifdef _LP64
1349                 if (optlen == sizeof (struct bpf_program32)) {
1350                         struct bpf_program32 prog32;
1351 
1352                         bcopy(optval, &prog32, sizeof (prog32));
1353                         prog.bf_len = prog32.bf_len;
1354                         prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
1355                 } else
1356 #endif
1357                 if (optlen == sizeof (struct bpf_program)) {
1358                         bcopy(optval, &prog, sizeof (prog));
1359                 } else if (optlen != sizeof (struct bpf_program)) {
1360                         return (EINVAL);
1361                 }
1362                 if (prog.bf_len > BPF_MAXINSNS)
1363                         return (EINVAL);
1364 
1365                 size = prog.bf_len * sizeof (*prog.bf_insns);
1366                 fcode = kmem_alloc(size, KM_SLEEP);
1367                 if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) {
1368                         kmem_free(fcode, size);
1369                         return (EFAULT);
1370                 }
1371 
1372                 if (bpf_validate(fcode, (int)prog.bf_len)) {
1373                         rw_enter(&ps->ps_bpflock, RW_WRITER);
1374                         pfp_release_bpf(ps);
1375                         ps->ps_bpf.bf_insns = fcode;
1376                         ps->ps_bpf.bf_len = size;
1377                         rw_exit(&ps->ps_bpflock);
1378 
1379                         return (0);
1380                 }
1381                 kmem_free(fcode, size);
1382                 error = EINVAL;
1383                 break;
1384 
1385         case SO_DETACH_FILTER :
1386                 pfp_release_bpf(ps);
1387                 break;
1388 
1389         case SO_RCVBUF :
1390                 size = *(int32_t *)optval;
1391                 if (size > sockmod_pfp_rcvbuf_max || size < 0)
1392                         return (ENOBUFS);
1393                 sopp.sopp_flags = SOCKOPT_RCVHIWAT;
1394                 sopp.sopp_rxhiwat = size;
1395                 ps->ps_upcalls->su_set_proto_props(ps->ps_upper, &sopp);
1396                 ps->ps_rcvbuf = size;
1397                 break;
1398 
1399         default :
1400                 error = ENOPROTOOPT;
1401                 break;
1402         }
1403 
1404         return (error);
1405 }
1406 
1407 /*
1408  * pfp_open_index is an internal function used to open a MAC device by
1409  * its index. Both a mac_handle_t and mac_client_handle_t are acquired
1410  * because some of the interfaces provided by the mac layer require either
1411  * only the mac_handle_t or both it and mac_handle_t.
1412  *
1413  * Whilst inside the kernel we can access data structures supporting any
1414  * zone, access to interfaces from non-global zones is restricted to those
1415  * interfaces (if any) that are exclusively assigned to a zone.
1416  */
1417 static int
1418 pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip,
1419     cred_t *cred)
1420 {
1421         mac_client_handle_t mch;
1422         zoneid_t ifzoneid;
1423         mac_handle_t mh;
1424         zoneid_t zoneid;
1425         int error;
1426 
1427         mh = 0;
1428         mch = 0;
1429         error = mac_open_by_linkid(index, &mh);
1430         if (error != 0)
1431                 goto bad_open;
1432 
1433         error = mac_client_open(mh, &mch, NULL,
1434             MAC_OPEN_FLAGS_USE_DATALINK_NAME);
1435         if (error != 0)
1436                 goto bad_open;
1437 
1438         zoneid = crgetzoneid(cred);
1439         if (zoneid != GLOBAL_ZONEID) {
1440                 mac_perim_handle_t perim;
1441 
1442                 mac_perim_enter_by_mh(mh, &perim);
1443                 error = dls_link_getzid(mac_name(mh), &ifzoneid);
1444                 mac_perim_exit(perim);
1445                 if (error != 0)
1446                         goto bad_open;
1447                 if (ifzoneid != zoneid) {
1448                         error = EACCES;
1449                         goto bad_open;
1450                 }
1451         }
1452 
1453         *mcip = mch;
1454         *mhp = mh;
1455 
1456         return (0);
1457 bad_open:
1458         if (mch != 0)
1459                 mac_client_close(mch, 0);
1460         if (mh != 0)
1461                 mac_close(mh);
1462         return (error);
1463 }
1464 
1465 static void
1466 pfp_close(mac_handle_t mh, mac_client_handle_t mch)
1467 {
1468         mac_client_close(mch, 0);
1469         mac_close(mh);
1470 }
1471 
1472 /*
1473  * The purpose of this function is to provide a single place where we free
1474  * the loaded BPF program and reset all pointers/counters associated with
1475  * it.
1476  */
1477 static void
1478 pfp_release_bpf(struct pfpsock *ps)
1479 {
1480         if (ps->ps_bpf.bf_len != 0) {
1481                 kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len);
1482                 ps->ps_bpf.bf_len = 0;
1483                 ps->ps_bpf.bf_insns = NULL;
1484         }
1485 }
1486 
1487 /*
1488  * Set the promiscuous mode of a network interface.
1489  * This function only calls the mac layer when there is a change to the
1490  * status of a network interface's promiscous mode. Tracking of how many
1491  * sockets have the network interface in promiscuous mode, and thus the
1492  * control over the physical device's status, is left to the mac layer.
1493  */
1494 static int
1495 pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon)
1496 {
1497         int error = 0;
1498         int flags;
1499 
1500         /*
1501          * There are 4 combinations of turnon/ps_promisc.
1502          * This if handles 2 (both false, both true) and the if() below
1503          * handles the remaining one - when change is required.
1504          */
1505         if (turnon == ps->ps_promisc)
1506                 return (error);
1507 
1508         if (ps->ps_phd != 0) {
1509                 mac_promisc_remove(ps->ps_phd);
1510                 ps->ps_phd = 0;
1511 
1512                 /*
1513                  * ps_promisc is set here in case the call to mac_promisc_add
1514                  * fails: leaving it to indicate that the interface is still
1515                  * in some sort of promiscuous mode is false.
1516                  */
1517                 if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) {
1518                         ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED;
1519                         flags = MAC_PROMISC_FLAGS_NO_PHYS;
1520                 } else {
1521                         flags = 0;
1522                 }
1523                 flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP;
1524         }
1525 
1526         error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps,
1527             &ps->ps_phd, flags);
1528         if (error == 0)
1529                 ps->ps_promisc = turnon;
1530 
1531         return (error);
1532 }
1533 
1534 /*
1535  * This table maps the MAC types in Solaris to the ARPHRD_* values used
1536  * on Linux. This is used with the SIOCGIFHWADDR/SIOCGLIFHWADDR ioctl.
1537  *
1538  * The symbols in this table are *not* pulled in from <net/if_arp.h>,
1539  * they are pulled from <netpacket/packet.h>, thus it acts as a source
1540  * of supplementary information to the ARP table.
1541  */
1542 static uint_t arphrd_to_dl[][2] = {
1543         { ARPHRD_IEEE80211,     DL_WIFI },
1544         { ARPHRD_TUNNEL,        DL_IPV4 },
1545         { ARPHRD_TUNNEL,        DL_IPV6 },
1546         { ARPHRD_TUNNEL,        DL_6TO4 },
1547         { ARPHRD_AX25,          DL_X25 },
1548         { ARPHRD_ATM,           DL_ATM },
1549         { 0,                    0 }
1550 };
1551 
1552 static int
1553 pfp_dl_to_arphrd(int dltype)
1554 {
1555         int i;
1556 
1557         for (i = 0; arphrd_to_dl[i][0] != 0; i++)
1558                 if (arphrd_to_dl[i][1] == dltype)
1559                         return (arphrd_to_dl[i][0]);
1560         return (arp_hw_type(dltype));
1561 }