1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2015 Joyent, Inc. All rights reserved.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/stropts.h>
  31 #include <sys/socket.h>
  32 #include <sys/socketvar.h>
  33 #include <sys/socket_proto.h>
  34 #include <sys/sockio.h>
  35 #include <sys/strsun.h>
  36 #include <sys/kstat.h>
  37 #include <sys/modctl.h>
  38 #include <sys/policy.h>
  39 #include <sys/priv_const.h>
  40 #include <sys/tihdr.h>
  41 #include <sys/zone.h>
  42 #include <sys/time.h>
  43 #include <sys/ethernet.h>
  44 #include <sys/llc1.h>
  45 #include <fs/sockfs/sockcommon.h>
  46 #include <net/if.h>
  47 #include <inet/ip_arp.h>
  48 
  49 #include <sys/dls.h>
  50 #include <sys/mac.h>
  51 #include <sys/mac_client.h>
  52 #include <sys/mac_provider.h>
  53 #include <sys/mac_client_priv.h>
  54 
  55 #include <netpacket/packet.h>
  56 
  57 static void pfp_close(mac_handle_t, mac_client_handle_t);
  58 static int pfp_dl_to_arphrd(int);
  59 static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *,
  60     socklen_t *);
  61 static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *, int);
  62 static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *,
  63     int);
  64 static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *,
  65     cred_t *);
  66 static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t);
  67 static void pfp_release_bpf(struct pfpsock *);
  68 static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t);
  69 static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *,
  70     socklen_t);
  71 static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *,
  72     socklen_t);
  73 
  74 /*
  75  * PFP sockfs operations
  76  * Most are currently no-ops because they have no meaning for a connectionless
  77  * socket.
  78  */
  79 static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t,
  80     sock_upcalls_t *, int, struct cred *);
  81 static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t,
  82     struct cred *);
  83 static int sdpfp_close(sock_lower_handle_t, int, struct cred *);
  84 static void sdpfp_clr_flowctrl(sock_lower_handle_t);
  85 static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *,
  86     socklen_t *, struct cred *);
  87 static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
  88     struct cred *);
  89 static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *,
  90     struct cred *);
  91 static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *,
  92     socklen_t, struct cred *);
  93 
  94 static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **,
  95     uint_t *, int *, int, cred_t *);
  96 
  97 static int sockpfp_init(void);
  98 static void sockpfp_fini(void);
  99 
 100 static kstat_t *pfp_ksp;
 101 static pfp_kstats_t ks_stats;
 102 static pfp_kstats_t pfp_kstats = {
 103         /*
 104          * Each one of these kstats is a different return path in handling
 105          * a packet received from the mac layer.
 106          */
 107         { "recvMacHeaderFail",  KSTAT_DATA_UINT64 },
 108         { "recvBadProtocol",    KSTAT_DATA_UINT64 },
 109         { "recvAllocbFail",     KSTAT_DATA_UINT64 },
 110         { "recvOk",             KSTAT_DATA_UINT64 },
 111         { "recvFail",           KSTAT_DATA_UINT64 },
 112         { "recvFiltered",       KSTAT_DATA_UINT64 },
 113         { "recvFlowControl",    KSTAT_DATA_UINT64 },
 114         /*
 115          * A global set of counters is maintained to track the behaviour
 116          * of the system (kernel & applications) in sending packets.
 117          */
 118         { "sendUnbound",        KSTAT_DATA_UINT64 },
 119         { "sendFailed",         KSTAT_DATA_UINT64 },
 120         { "sendTooBig",         KSTAT_DATA_UINT64 },
 121         { "sendAllocFail",      KSTAT_DATA_UINT64 },
 122         { "sendUiomoveFail",    KSTAT_DATA_UINT64 },
 123         { "sendNoMemory",       KSTAT_DATA_UINT64 },
 124         { "sendOpenFail",       KSTAT_DATA_UINT64 },
 125         { "sendWrongFamily",    KSTAT_DATA_UINT64 },
 126         { "sendShortMsg",       KSTAT_DATA_UINT64 },
 127         { "sendOk",             KSTAT_DATA_UINT64 }
 128 };
 129 
 130 sock_downcalls_t pfp_downcalls = {
 131         sdpfp_activate,
 132         sock_accept_notsupp,
 133         sdpfp_bind,
 134         sock_listen_notsupp,
 135         sock_connect_notsupp,
 136         sock_getpeername_notsupp,
 137         sock_getsockname_notsupp,
 138         sdpfp_getsockopt,
 139         sdpfp_setsockopt,
 140         sock_send_notsupp,
 141         sdpfp_senduio,
 142         NULL,
 143         sock_poll_notsupp,
 144         sock_shutdown_notsupp,
 145         sdpfp_clr_flowctrl,
 146         sdpfp_ioctl,
 147         sdpfp_close,
 148 };
 149 
 150 static smod_reg_t sinfo = {
 151         SOCKMOD_VERSION,
 152         "sockpfp",
 153         SOCK_UC_VERSION,
 154         SOCK_DC_VERSION,
 155         sockpfp_create,
 156         NULL
 157 };
 158 
 159 static int accepted_protos[3][2] = {
 160         { ETH_P_ALL,    0 },
 161         { ETH_P_802_2,  LLC_SNAP_SAP },
 162         { ETH_P_803_3,  0 },
 163 };
 164 
 165 /*
 166  * This sets an upper bound on the size of the receive buffer for a PF_PACKET
 167  * socket. More properly, this should be controlled through ipadm, ala TCP, UDP,
 168  * SCTP, etc. Until that's done, this provides a hard cap of 4 MB and allows an
 169  * opportunity for it to be changed, should it be needed.
 170  */
 171 int sockmod_pfp_rcvbuf_max = 1024 * 1024 * 4;
 172 
 173 /*
 174  * Module linkage information for the kernel.
 175  */
 176 static struct modlsockmod modlsockmod = {
 177         &mod_sockmodops, "PF Packet socket module", &sinfo
 178 };
 179 
 180 static struct modlinkage modlinkage = {
 181         MODREV_1,
 182         &modlsockmod,
 183         NULL
 184 };
 185 
 186 int
 187 _init(void)
 188 {
 189         int error;
 190 
 191         error = sockpfp_init();
 192         if (error != 0)
 193                 return (error);
 194 
 195         error = mod_install(&modlinkage);
 196         if (error != 0)
 197                 sockpfp_fini();
 198 
 199         return (error);
 200 }
 201 
 202 int
 203 _fini(void)
 204 {
 205         int error;
 206 
 207         error = mod_remove(&modlinkage);
 208         if (error == 0)
 209                 sockpfp_fini();
 210 
 211         return (error);
 212 }
 213 
 214 int
 215 _info(struct modinfo *modinfop)
 216 {
 217         return (mod_info(&modlinkage, modinfop));
 218 }
 219 
 220 /*
 221  * sockpfp_init: called as part of the initialisation of the module when
 222  * loaded into the kernel.
 223  *
 224  * Being able to create and record the kstats data in the kernel is not
 225  * considered to be vital to the operation of this kernel module, thus
 226  * its failure is tolerated.
 227  */
 228 static int
 229 sockpfp_init(void)
 230 {
 231         (void) memset(&ks_stats, 0, sizeof (ks_stats));
 232 
 233         (void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats));
 234 
 235         pfp_ksp = kstat_create("pfpacket", 0, "global", "misc",
 236             KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t),
 237             KSTAT_FLAG_VIRTUAL);
 238         if (pfp_ksp != NULL) {
 239                 pfp_ksp->ks_data = &ks_stats;
 240                 kstat_install(pfp_ksp);
 241         }
 242 
 243         return (0);
 244 }
 245 
 246 /*
 247  * sockpfp_fini: called when the operating system wants to unload the
 248  * socket module from the kernel.
 249  */
 250 static void
 251 sockpfp_fini(void)
 252 {
 253         if (pfp_ksp != NULL)
 254                 kstat_delete(pfp_ksp);
 255 }
 256 
 257 /*
 258  * Due to sockets being created read-write by default, all PF_PACKET sockets
 259  * therefore require the NET_RAWACCESS priviliege, even if the socket is only
 260  * being used for reading packets from.
 261  *
 262  * This create function enforces this module only being used with PF_PACKET
 263  * sockets and the policy that we support via the config file in sock2path.d:
 264  * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW.
 265  */
 266 /* ARGSUSED */
 267 static sock_lower_handle_t
 268 sockpfp_create(int family, int type, int proto,
 269     sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
 270     int sflags, cred_t *cred)
 271 {
 272         struct pfpsock *ps;
 273         int kmflags;
 274         int newproto;
 275         int i;
 276 
 277         if (secpolicy_net_rawaccess(cred) != 0) {
 278                 *errorp = EACCES;
 279                 return (NULL);
 280         }
 281 
 282         if (family != AF_PACKET) {
 283                 *errorp = EAFNOSUPPORT;
 284                 return (NULL);
 285         }
 286 
 287         if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) {
 288                 *errorp = ESOCKTNOSUPPORT;
 289                 return (NULL);
 290         }
 291 
 292         /*
 293          * First check to see if the protocol number passed in via the socket
 294          * creation should be mapped to a different number for internal use.
 295          */
 296         for (i = 0, newproto = -1;
 297             i < sizeof (accepted_protos)/ sizeof (accepted_protos[0]); i++) {
 298                 if (accepted_protos[i][0] == proto) {
 299                         newproto = accepted_protos[i][1];
 300                         break;
 301                 }
 302         }
 303 
 304         /*
 305          * If the mapping of the protocol that was under 0x800 failed to find
 306          * a local equivalent then fail the socket creation. If the protocol
 307          * for the socket is over 0x800 and it was not found in the mapping
 308          * table above, then use the value as is.
 309          */
 310         if (newproto == -1) {
 311                 if (proto < 0x800) {
 312                         *errorp = ENOPROTOOPT;
 313                         return (NULL);
 314                 }
 315                 newproto = proto;
 316         }
 317         proto = newproto;
 318 
 319         kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
 320         ps = kmem_zalloc(sizeof (*ps), kmflags);
 321         if (ps == NULL) {
 322                 *errorp = ENOMEM;
 323                 return (NULL);
 324         }
 325 
 326         ps->ps_type = type;
 327         ps->ps_proto = proto;
 328         rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL);
 329         mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL);
 330 
 331         *sock_downcalls = &pfp_downcalls;
 332         /*
 333          * Setting this causes bytes from a packet that do not fit into the
 334          * destination user buffer to be discarded. Thus the API is one
 335          * packet per receive and callers are required to use a buffer large
 336          * enough for the biggest packet that the interface can provide.
 337          */
 338         *smodep = SM_ATOMIC;
 339 
 340         return ((sock_lower_handle_t)ps);
 341 }
 342 
 343 /* ************************************************************************* */
 344 
 345 /*
 346  * pfp_packet is the callback function that is given to the mac layer for
 347  * PF_PACKET to receive packets with. One packet at a time is passed into
 348  * this function from the mac layer. Each packet is a private copy given
 349  * to PF_PACKET to modify or free as it wishes and does not harm the original
 350  * packet from which it was cloned.
 351  */
 352 /* ARGSUSED */
 353 static void
 354 pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag)
 355 {
 356         struct T_unitdata_ind *tunit;
 357         struct sockaddr_ll *sll;
 358         struct sockaddr_ll *sol;
 359         mac_header_info_t hdr;
 360         struct pfpsock *ps;
 361         size_t tusz;
 362         mblk_t *mp0;
 363         int error;
 364 
 365         if (mp == NULL)
 366                 return;
 367 
 368         ps = arg;
 369         if (ps->ps_flow_ctrld) {
 370                 ps->ps_flow_ctrl_drops++;
 371                 ps->ps_stats.tp_drops++;
 372                 ks_stats.kp_recv_flow_cntrld.value.ui64++;
 373                 freemsg(mp);
 374                 return;
 375         }
 376 
 377         if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) {
 378                 /*
 379                  * Can't decode the packet header information so drop it.
 380                  */
 381                 ps->ps_stats.tp_drops++;
 382                 ks_stats.kp_recv_mac_hdr_fail.value.ui64++;
 383                 freemsg(mp);
 384                 return;
 385         }
 386 
 387         if (mac_type(ps->ps_mh) == DL_ETHER &&
 388             hdr.mhi_bindsap == ETHERTYPE_VLAN) {
 389                 struct ether_vlan_header *evhp;
 390                 struct ether_vlan_header evh;
 391 
 392                 hdr.mhi_hdrsize = sizeof (struct ether_vlan_header);
 393                 hdr.mhi_istagged = B_TRUE;
 394 
 395                 if (MBLKL(mp) >= sizeof (*evhp)) {
 396                         evhp = (struct ether_vlan_header *)mp->b_rptr;
 397                 } else {
 398                         int sz = sizeof (*evhp);
 399                         char *s = (char *)&evh;
 400                         mblk_t *tmp;
 401                         int len;
 402 
 403                         for (tmp = mp; sz > 0 && tmp != NULL;
 404                             tmp = tmp->b_cont) {
 405                                 len = min(sz, MBLKL(tmp));
 406                                 bcopy(tmp->b_rptr, s, len);
 407                                 sz -= len;
 408                         }
 409                         evhp = &evh;
 410                 }
 411                 hdr.mhi_tci = ntohs(evhp->ether_tci);
 412                 hdr.mhi_bindsap = ntohs(evhp->ether_type);
 413         }
 414 
 415         if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) {
 416                 /*
 417                  * The packet is not of interest to this socket so
 418                  * drop it on the floor. Here the SAP is being used
 419                  * as a very course filter.
 420                  */
 421                 ps->ps_stats.tp_drops++;
 422                 ks_stats.kp_recv_bad_proto.value.ui64++;
 423                 freemsg(mp);
 424                 return;
 425         }
 426 
 427         /*
 428          * This field is not often set, even for ethernet,
 429          * by mac_header_info, so compute it if it is 0.
 430          */
 431         if (hdr.mhi_pktsize == 0)
 432                 hdr.mhi_pktsize = msgdsize(mp);
 433 
 434         /*
 435          * If a BPF filter is present, pass the raw packet into that.
 436          * A failed match will result in zero being returned, indicating
 437          * that this socket is not interested in the packet.
 438          */
 439         if (ps->ps_bpf.bf_len != 0) {
 440                 uchar_t *buffer;
 441                 int buflen;
 442 
 443                 buflen = MBLKL(mp);
 444                 if (hdr.mhi_pktsize == buflen) {
 445                         buffer = mp->b_rptr;
 446                 } else {
 447                         buflen = 0;
 448                         buffer = (uchar_t *)mp;
 449                 }
 450                 rw_enter(&ps->ps_bpflock, RW_READER);
 451                 if (bpf_filter(ps->ps_bpf.bf_insns, buffer,
 452                     hdr.mhi_pktsize, buflen) == 0) {
 453                         rw_exit(&ps->ps_bpflock);
 454                         ps->ps_stats.tp_drops++;
 455                         ks_stats.kp_recv_filtered.value.ui64++;
 456                         freemsg(mp);
 457                         return;
 458                 }
 459                 rw_exit(&ps->ps_bpflock);
 460         }
 461 
 462         if (ps->ps_type == SOCK_DGRAM) {
 463                 /*
 464                  * SOCK_DGRAM socket expect a "layer 3" packet, so advance
 465                  * past the link layer header.
 466                  */
 467                 mp->b_rptr += hdr.mhi_hdrsize;
 468                 hdr.mhi_pktsize -= hdr.mhi_hdrsize;
 469         }
 470 
 471         tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll);
 472         if (ps->ps_auxdata) {
 473                 tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
 474                 tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr));
 475         }
 476 
 477         /*
 478          * It is tempting to think that this could be optimised by having
 479          * the base mblk_t allocated and hung off the pfpsock structure,
 480          * except that then another one would need to be allocated for the
 481          * sockaddr_ll that is included. Even creating a template to copy
 482          * from is of questionable value, as read-write from one structure
 483          * to the other is going to be slower than all of the initialisation.
 484          */
 485         mp0 = allocb(tusz, BPRI_HI);
 486         if (mp0 == NULL) {
 487                 ps->ps_stats.tp_drops++;
 488                 ks_stats.kp_recv_alloc_fail.value.ui64++;
 489                 freemsg(mp);
 490                 return;
 491         }
 492 
 493         (void) memset(mp0->b_rptr, 0, tusz);
 494 
 495         mp0->b_datap->db_type = M_PROTO;
 496         mp0->b_wptr = mp0->b_rptr + tusz;
 497 
 498         tunit = (struct T_unitdata_ind *)mp0->b_rptr;
 499         tunit->PRIM_type = T_UNITDATA_IND;
 500         tunit->SRC_length = sizeof (struct sockaddr);
 501         tunit->SRC_offset = sizeof (*tunit);
 502 
 503         sol = &ps->ps_sock;
 504         sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit));
 505         sll->sll_ifindex = sol->sll_ifindex;
 506         sll->sll_hatype = (uint16_t)hdr.mhi_origsap;
 507         sll->sll_halen = sol->sll_halen;
 508         if (hdr.mhi_saddr != NULL)
 509                 (void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen);
 510 
 511         switch (hdr.mhi_dsttype) {
 512         case MAC_ADDRTYPE_MULTICAST :
 513                 sll->sll_pkttype = PACKET_MULTICAST;
 514                 break;
 515         case MAC_ADDRTYPE_BROADCAST :
 516                 sll->sll_pkttype = PACKET_BROADCAST;
 517                 break;
 518         case MAC_ADDRTYPE_UNICAST :
 519                 if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0)
 520                         sll->sll_pkttype = PACKET_HOST;
 521                 else
 522                         sll->sll_pkttype = PACKET_OTHERHOST;
 523                 break;
 524         }
 525 
 526         if (ps->ps_auxdata) {
 527                 struct tpacket_auxdata *aux;
 528                 struct T_opthdr *topt;
 529 
 530                 tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset +
 531                     sizeof (struct sockaddr_ll));
 532                 tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) +
 533                     _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
 534 
 535                 topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset);
 536                 aux = (struct tpacket_auxdata *)
 537                     ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt)));
 538 
 539                 topt->len = tunit->OPT_length;
 540                 topt->level = SOL_PACKET;
 541                 topt->name = PACKET_AUXDATA;
 542                 topt->status = 0;
 543                 /*
 544                  * libpcap doesn't seem to use any other field,
 545                  * so it isn't clear how they should be filled in.
 546                  */
 547                 aux->tp_vlan_vci = hdr.mhi_tci;
 548         }
 549 
 550         linkb(mp0, mp);
 551 
 552         (void) gethrestime(&ps->ps_timestamp);
 553 
 554         ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0,
 555             &error, NULL);
 556 
 557         if (error == 0) {
 558                 ps->ps_stats.tp_packets++;
 559                 ks_stats.kp_recv_ok.value.ui64++;
 560         } else {
 561                 mutex_enter(&ps->ps_lock);
 562                 if (error == ENOSPC) {
 563                         ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0,
 564                             &error, NULL);
 565                         if (error == ENOSPC)
 566                                 ps->ps_flow_ctrld = B_TRUE;
 567                 }
 568                 mutex_exit(&ps->ps_lock);
 569                 ps->ps_stats.tp_drops++;
 570                 ks_stats.kp_recv_fail.value.ui64++;
 571         }
 572 }
 573 
 574 /*
 575  * Bind a PF_PACKET socket to a network interface.
 576  *
 577  * The default operation of this bind() is to place the socket (and thus the
 578  * network interface) into promiscuous mode. It is then up to the application
 579  * to turn that down by issuing the relevant ioctls, if desired.
 580  */
 581 static int
 582 sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr,
 583     socklen_t addrlen, struct cred *cred)
 584 {
 585         struct sockaddr_ll *addr_ll, *sol;
 586         mac_client_handle_t mch;
 587         struct pfpsock *ps;
 588         mac_handle_t mh;
 589         int error;
 590 
 591         ps = (struct pfpsock *)handle;
 592         if (ps->ps_bound)
 593                 return (EINVAL);
 594 
 595         if (addrlen < sizeof (struct sockaddr_ll) || addr == NULL)
 596                 return (EINVAL);
 597 
 598         addr_ll = (struct sockaddr_ll *)addr;
 599 
 600         error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred);
 601         if (error != 0)
 602                 return (error);
 603         /*
 604          * Ensure that each socket is only bound once.
 605          */
 606         mutex_enter(&ps->ps_lock);
 607         if (ps->ps_mh != 0) {
 608                 mutex_exit(&ps->ps_lock);
 609                 pfp_close(mh, mch);
 610                 return (EADDRINUSE);
 611         }
 612         ps->ps_mh = mh;
 613         ps->ps_mch = mch;
 614         mutex_exit(&ps->ps_lock);
 615 
 616         /*
 617          * Cache all of the information from bind so that it's in an easy
 618          * place to get at when packets are received.
 619          */
 620         sol = &ps->ps_sock;
 621         sol->sll_family = AF_PACKET;
 622         sol->sll_ifindex = addr_ll->sll_ifindex;
 623         sol->sll_protocol = addr_ll->sll_protocol;
 624         sol->sll_halen = mac_addr_len(ps->ps_mh);
 625         mac_unicast_primary_get(ps->ps_mh, sol->sll_addr);
 626         mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu);
 627         ps->ps_linkid = addr_ll->sll_ifindex;
 628 
 629         error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL,
 630             pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
 631         if (error == 0) {
 632                 ps->ps_promisc = MAC_CLIENT_PROMISC_ALL;
 633                 ps->ps_bound = B_TRUE;
 634         }
 635 
 636         return (error);
 637 }
 638 
 639 /* ARGSUSED */
 640 static void
 641 sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper,
 642     sock_upcalls_t *upcalls, int flags, cred_t *cred)
 643 {
 644         struct pfpsock *ps;
 645 
 646         ps = (struct pfpsock *)lower;
 647         ps->ps_upper = upper;
 648         ps->ps_upcalls = upcalls;
 649 }
 650 
 651 /*
 652  * This module only implements getting socket options for the new socket
 653  * option level (SOL_PACKET) that it introduces. All other requests are
 654  * passed back to the sockfs layer.
 655  */
 656 /* ARGSUSED */
 657 static int
 658 sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name,
 659     void *optval, socklen_t *optlenp, struct cred *cred)
 660 {
 661         struct pfpsock *ps;
 662         int error = 0;
 663 
 664         ps = (struct pfpsock *)handle;
 665 
 666         switch (level) {
 667         case SOL_PACKET :
 668                 error = pfp_getpacket_sockopt(handle, option_name, optval,
 669                     optlenp);
 670                 break;
 671 
 672         case SOL_SOCKET :
 673                 if (option_name == SO_RCVBUF) {
 674                         if (*optlenp < sizeof (int32_t))
 675                                 return (EINVAL);
 676                         *((int32_t *)optval) = ps->ps_rcvbuf;
 677                         *optlenp = sizeof (int32_t);
 678                 } else {
 679                         error = ENOPROTOOPT;
 680                 }
 681                 break;
 682 
 683         default :
 684                 /*
 685                  * If sockfs code receives this error in return from the
 686                  * getsockopt downcall it handles the option locally, if
 687                  * it can.
 688                  */
 689                 error = ENOPROTOOPT;
 690                 break;
 691         }
 692 
 693         return (error);
 694 }
 695 
 696 /*
 697  * PF_PACKET supports setting socket options at only two levels:
 698  * SOL_SOCKET and SOL_PACKET.
 699  */
 700 /* ARGSUSED */
 701 static int
 702 sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name,
 703     const void *optval, socklen_t optlen, struct cred *cred)
 704 {
 705         int error = 0;
 706 
 707         switch (level) {
 708         case SOL_SOCKET :
 709                 error = pfp_setsocket_sockopt(handle, option_name, optval,
 710                     optlen);
 711                 break;
 712         case SOL_PACKET :
 713                 error = pfp_setpacket_sockopt(handle, option_name, optval,
 714                     optlen);
 715                 break;
 716         default :
 717                 error = EINVAL;
 718                 break;
 719         }
 720 
 721         return (error);
 722 }
 723 
 724 /*
 725  * This function is incredibly inefficient for sending any packet that
 726  * comes with a msghdr asking to be sent to an interface to which the
 727  * socket has not been bound. Some possibilities here are keeping a
 728  * cache of all open mac's and mac_client's, for the purpose of sending,
 729  * and closing them after some amount of inactivity. Clearly, applications
 730  * should not be written to use one socket for multiple interfaces if
 731  * performance is desired with the code as is.
 732  */
 733 /* ARGSUSED */
 734 static int
 735 sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop,
 736     struct nmsghdr *msg, struct cred *cred)
 737 {
 738         struct sockaddr_ll *sol;
 739         mac_client_handle_t mch;
 740         struct pfpsock *ps;
 741         boolean_t new_open;
 742         mac_handle_t mh;
 743         size_t mpsize;
 744         uint_t maxsdu;
 745         mblk_t *mp0;
 746         mblk_t *mp;
 747         int error;
 748 
 749         mp = NULL;
 750         mp0 = NULL;
 751         new_open = B_FALSE;
 752         ps = (struct pfpsock *)handle;
 753         mh = ps->ps_mh;
 754         mch = ps->ps_mch;
 755         maxsdu = ps->ps_max_sdu;
 756 
 757         sol = (struct sockaddr_ll *)msg->msg_name;
 758         if (sol == NULL) {
 759                 /*
 760                  * If no sockaddr_ll has been provided with the send call,
 761                  * use the one constructed when the socket was bound to an
 762                  * interface and fail if it hasn't been bound.
 763                  */
 764                 if (!ps->ps_bound) {
 765                         ks_stats.kp_send_unbound.value.ui64++;
 766                         return (EPROTO);
 767                 }
 768                 sol = &ps->ps_sock;
 769         } else {
 770                 /*
 771                  * Verify the sockaddr_ll message passed down before using
 772                  * it to send a packet out with. If it refers to an interface
 773                  * that has not been bound, it is necessary to open it.
 774                  */
 775                 struct sockaddr_ll *sll;
 776 
 777                 if (msg->msg_namelen < sizeof (struct sockaddr_ll)) {
 778                         ks_stats.kp_send_short_msg.value.ui64++;
 779                         return (EINVAL);
 780                 }
 781 
 782                 if (sol->sll_family != AF_PACKET) {
 783                         ks_stats.kp_send_wrong_family.value.ui64++;
 784                         return (EAFNOSUPPORT);
 785                 }
 786 
 787                 sll = &ps->ps_sock;
 788                 if (sol->sll_ifindex != sll->sll_ifindex) {
 789                         error = pfp_open_index(sol->sll_ifindex, &mh, &mch,
 790                             cred);
 791                         if (error != 0) {
 792                                 ks_stats.kp_send_open_fail.value.ui64++;
 793                                 return (error);
 794                         }
 795                         mac_sdu_get(mh, NULL, &maxsdu);
 796                         new_open = B_TRUE;
 797                 }
 798         }
 799 
 800         mpsize = uiop->uio_resid;
 801         if (mpsize > maxsdu) {
 802                 ks_stats.kp_send_too_big.value.ui64++;
 803                 error = EMSGSIZE;
 804                 goto done;
 805         }
 806 
 807         if ((mp = allocb(mpsize, BPRI_HI)) == NULL) {
 808                 ks_stats.kp_send_alloc_fail.value.ui64++;
 809                 error = ENOBUFS;
 810                 goto done;
 811         }
 812 
 813         mp->b_wptr = mp->b_rptr + mpsize;
 814         error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop);
 815         if (error != 0) {
 816                 ks_stats.kp_send_uiomove_fail.value.ui64++;
 817                 goto done;
 818         }
 819 
 820         if (ps->ps_type == SOCK_DGRAM) {
 821                 mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0);
 822                 if (mp0 == NULL) {
 823                         ks_stats.kp_send_no_memory.value.ui64++;
 824                         error = ENOBUFS;
 825                         goto done;
 826                 }
 827                 linkb(mp0, mp);
 828                 mp = mp0;
 829         }
 830 
 831         /*
 832          * As this is sending datagrams and no promise is made about
 833          * how or if a packet will be sent/delivered, no effort is to
 834          * be expended in recovering from a situation where the packet
 835          * cannot be sent - it is just dropped.
 836          */
 837         error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
 838         if (error == 0) {
 839                 mp = NULL;
 840                 ks_stats.kp_send_ok.value.ui64++;
 841         } else {
 842                 ks_stats.kp_send_failed.value.ui64++;
 843         }
 844 
 845 done:
 846 
 847         if (new_open) {
 848                 ASSERT(mch != ps->ps_mch);
 849                 ASSERT(mh != ps->ps_mh);
 850                 pfp_close(mh, mch);
 851         }
 852         if (mp != NULL)
 853                 freemsg(mp);
 854 
 855         return (error);
 856 
 857 }
 858 
 859 /*
 860  * There's no use of a lock here, or at the bottom of pfp_packet() where
 861  * ps_flow_ctrld is set to true, because in a situation where these two
 862  * are racing to set the flag one way or the other, the end result is
 863  * going to be ultimately determined by the scheduler anyway - which of
 864  * the two threads gets the lock first? In such an operational environment,
 865  * we've got packets arriving too fast to be delt with so packets are going
 866  * to be dropped. Grabbing a lock just makes the drop more expensive.
 867  */
 868 static void
 869 sdpfp_clr_flowctrl(sock_lower_handle_t handle)
 870 {
 871         struct pfpsock *ps;
 872 
 873         ps = (struct pfpsock *)handle;
 874 
 875         mutex_enter(&ps->ps_lock);
 876         ps->ps_flow_ctrld = B_FALSE;
 877         mutex_exit(&ps->ps_lock);
 878 }
 879 
 880 /*
 881  * The implementation of this ioctl() handler is intended to function
 882  * in the absence of a bind() being made before it is called. Thus the
 883  * function calls mac_open() itself to provide a handle
 884  * This function is structured like this:
 885  * - determine the linkid for the interface being targetted
 886  * - open the interface with said linkid
 887  * - perform ioctl
 888  * - copy results back to caller
 889  *
 890  * The ioctls that interact with interface flags have been implented below
 891  * to assume that the interface is always up and running (IFF_RUNNING) and
 892  * to use the state of this socket to determine whether or not the network
 893  * interface is in promiscuous mode. Thus an ioctl to get the interface flags
 894  * of an interface that has been put in promiscuous mode by another socket
 895  * (in the same program or different), will not report that status.
 896  */
 897 /* ARGSUSED */
 898 static int
 899 sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod,
 900     int32_t *rval, struct cred *cr)
 901 {
 902         struct timeval tival;
 903         mac_client_promisc_type_t mtype;
 904         struct sockaddr_dl *sock;
 905         datalink_id_t linkid;
 906         struct lifreq lifreq;
 907         struct ifreq ifreq;
 908         struct pfpsock *ps;
 909         mac_handle_t mh;
 910         int error;
 911 
 912         ps = (struct pfpsock *)handle;
 913 
 914         switch (cmd) {
 915         /*
 916          * ioctls that work on "struct lifreq"
 917          */
 918         case SIOCSLIFFLAGS :
 919         case SIOCGLIFINDEX :
 920         case SIOCGLIFFLAGS :
 921         case SIOCGLIFMTU :
 922         case SIOCGLIFHWADDR :
 923                 error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid, mod);
 924                 if (error != 0)
 925                         return (error);
 926                 break;
 927 
 928         /*
 929          * ioctls that work on "struct ifreq".
 930          * Not all of these have a "struct lifreq" partner, for example
 931          * SIOCGIFHWADDR, for the simple reason that the logical interface
 932          * does not have a hardware address.
 933          */
 934         case SIOCSIFFLAGS :
 935         case SIOCGIFINDEX :
 936         case SIOCGIFFLAGS :
 937         case SIOCGIFMTU :
 938         case SIOCGIFHWADDR :
 939                 error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid, mod);
 940                 if (error != 0)
 941                         return (error);
 942                 break;
 943 
 944         case SIOCGSTAMP :
 945                 tival.tv_sec = (time_t)ps->ps_timestamp.tv_sec;
 946                 tival.tv_usec = ps->ps_timestamp.tv_nsec / 1000;
 947                 if (get_udatamodel() == DATAMODEL_NATIVE) {
 948                         error = ddi_copyout(&tival, (void *)arg,
 949                             sizeof (tival), mod);
 950                 }
 951 #ifdef _SYSCALL32_IMPL
 952                 else {
 953                         struct timeval32 tv32;
 954                         TIMEVAL_TO_TIMEVAL32(&tv32, &tival);
 955                         error = ddi_copyout(&tv32, (void *)arg,
 956                             sizeof (tv32), mod);
 957                 }
 958 #endif
 959                 return (error);
 960         }
 961 
 962         error =  mac_open_by_linkid(linkid, &mh);
 963         if (error != 0)
 964                 return (error);
 965 
 966         switch (cmd) {
 967         case SIOCGLIFINDEX :
 968                 lifreq.lifr_index = linkid;
 969                 break;
 970 
 971         case SIOCGIFINDEX :
 972                 ifreq.ifr_index = linkid;
 973                 break;
 974 
 975         case SIOCGIFFLAGS :
 976                 ifreq.ifr_flags = IFF_RUNNING;
 977                 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
 978                         ifreq.ifr_flags |= IFF_PROMISC;
 979                 break;
 980 
 981         case SIOCGLIFFLAGS :
 982                 lifreq.lifr_flags = IFF_RUNNING;
 983                 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
 984                         lifreq.lifr_flags |= IFF_PROMISC;
 985                 break;
 986 
 987         case SIOCSIFFLAGS :
 988                 if (linkid != ps->ps_linkid) {
 989                         error = EINVAL;
 990                 } else {
 991                         if ((ifreq.ifr_flags & IFF_PROMISC) != 0)
 992                                 mtype = MAC_CLIENT_PROMISC_ALL;
 993                         else
 994                                 mtype = MAC_CLIENT_PROMISC_FILTERED;
 995                         error = pfp_set_promisc(ps, mtype);
 996                 }
 997                 break;
 998 
 999         case SIOCSLIFFLAGS :
1000                 if (linkid != ps->ps_linkid) {
1001                         error = EINVAL;
1002                 } else {
1003                         if ((lifreq.lifr_flags & IFF_PROMISC) != 0)
1004                                 mtype = MAC_CLIENT_PROMISC_ALL;
1005                         else
1006                                 mtype = MAC_CLIENT_PROMISC_FILTERED;
1007                         error = pfp_set_promisc(ps, mtype);
1008                 }
1009                 break;
1010 
1011         case SIOCGIFMTU :
1012                 mac_sdu_get(mh, NULL, &ifreq.ifr_mtu);
1013                 break;
1014 
1015         case SIOCGLIFMTU :
1016                 mac_sdu_get(mh, NULL, &lifreq.lifr_mtu);
1017                 break;
1018 
1019         case SIOCGIFHWADDR :
1020                 if (mac_addr_len(mh) > sizeof (ifreq.ifr_addr.sa_data)) {
1021                         error = EPFNOSUPPORT;
1022                         break;
1023                 }
1024 
1025                 if (mac_addr_len(mh) == 0) {
1026                         (void) memset(ifreq.ifr_addr.sa_data, 0,
1027                             sizeof (ifreq.ifr_addr.sa_data));
1028                 } else {
1029                         mac_unicast_primary_get(mh,
1030                             (uint8_t *)ifreq.ifr_addr.sa_data);
1031                 }
1032 
1033                 /*
1034                  * The behaviour here in setting sa_family is consistent
1035                  * with what applications such as tcpdump would expect
1036                  * for a Linux PF_PACKET socket.
1037                  */
1038                 ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh));
1039                 break;
1040 
1041         case SIOCGLIFHWADDR :
1042                 lifreq.lifr_type = 0;
1043                 sock = (struct sockaddr_dl *)&lifreq.lifr_addr;
1044 
1045                 if (mac_addr_len(mh) > sizeof (sock->sdl_data)) {
1046                         error = EPFNOSUPPORT;
1047                         break;
1048                 }
1049 
1050                 /*
1051                  * Fill in the sockaddr_dl with link layer details. Of note,
1052                  * the index is returned as 0 for a couple of reasons:
1053                  * (1) there is no public API that uses or requires it
1054                  * (2) the MAC index is currently 32bits and sdl_index is 16.
1055                  */
1056                 sock->sdl_family = AF_LINK;
1057                 sock->sdl_index = 0;
1058                 sock->sdl_type = mac_type(mh);
1059                 sock->sdl_nlen = 0;
1060                 sock->sdl_alen = mac_addr_len(mh);
1061                 sock->sdl_slen = 0;
1062                 if (mac_addr_len(mh) == 0) {
1063                         (void) memset(sock->sdl_data, 0,
1064                             sizeof (sock->sdl_data));
1065                 } else {
1066                         mac_unicast_primary_get(mh, (uint8_t *)sock->sdl_data);
1067                 }
1068                 break;
1069 
1070         default :
1071                 break;
1072         }
1073 
1074         mac_close(mh);
1075 
1076         if (error == 0) {
1077                 /*
1078                  * Only the "GET" ioctls need to copy data back to userace.
1079                  */
1080                 switch (cmd) {
1081                 case SIOCGLIFINDEX :
1082                 case SIOCGLIFFLAGS :
1083                 case SIOCGLIFMTU :
1084                 case SIOCGLIFHWADDR :
1085                         error = ddi_copyout(&lifreq, (void *)arg,
1086                             sizeof (lifreq), mod);
1087                         break;
1088 
1089                 case SIOCGIFINDEX :
1090                 case SIOCGIFFLAGS :
1091                 case SIOCGIFMTU :
1092                 case SIOCGIFHWADDR :
1093                         error = ddi_copyout(&ifreq, (void *)arg,
1094                             sizeof (ifreq), mod);
1095                         break;
1096                 default :
1097                         break;
1098                 }
1099         }
1100 
1101         return (error);
1102 }
1103 
1104 /*
1105  * Closing the socket requires that all open references to network
1106  * interfaces be closed.
1107  */
1108 /* ARGSUSED */
1109 static int
1110 sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr)
1111 {
1112         struct pfpsock *ps = (struct pfpsock *)handle;
1113 
1114         if (ps->ps_phd != 0) {
1115                 mac_promisc_remove(ps->ps_phd);
1116                 ps->ps_phd = 0;
1117         }
1118 
1119         if (ps->ps_mch != 0) {
1120                 mac_client_close(ps->ps_mch, 0);
1121                 ps->ps_mch = 0;
1122         }
1123 
1124         if (ps->ps_mh != 0) {
1125                 mac_close(ps->ps_mh);
1126                 ps->ps_mh = 0;
1127         }
1128 
1129         kmem_free(ps, sizeof (*ps));
1130 
1131         return (0);
1132 }
1133 
1134 /* ************************************************************************* */
1135 
1136 /*
1137  * Given a pointer (arg) to a "struct ifreq" (potentially in user space),
1138  * determine the linkid for the interface name stored in that structure.
1139  * name is used as a buffer so that we can ensure a trailing \0 is appended
1140  * to the name safely.
1141  */
1142 static int
1143 pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp,
1144     datalink_id_t *linkidp, int mode)
1145 {
1146         char name[IFNAMSIZ + 1];
1147         int error;
1148 
1149         if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), mode) != 0)
1150                 return (EFAULT);
1151 
1152         (void) strlcpy(name, ifreqp->ifr_name, sizeof (name));
1153 
1154         error = dls_mgmt_get_linkid(name, linkidp);
1155         if (error != 0)
1156                 error = dls_devnet_macname2linkid(name, linkidp);
1157 
1158         return (error);
1159 }
1160 
1161 /*
1162  * Given a pointer (arg) to a "struct lifreq" (potentially in user space),
1163  * determine the linkid for the interface name stored in that structure.
1164  * name is used as a buffer so that we can ensure a trailing \0 is appended
1165  * to the name safely.
1166  */
1167 static int
1168 pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp,
1169     datalink_id_t *linkidp, int mode)
1170 {
1171         char name[LIFNAMSIZ + 1];
1172         int error;
1173 
1174         if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), mode) != 0)
1175                 return (EFAULT);
1176 
1177         (void) strlcpy(name, lifreqp->lifr_name, sizeof (name));
1178 
1179         error = dls_mgmt_get_linkid(name, linkidp);
1180         if (error != 0)
1181                 error = dls_devnet_macname2linkid(name, linkidp);
1182 
1183         return (error);
1184 }
1185 
1186 /*
1187  * Although there are several new SOL_PACKET options that can be set and
1188  * are specific to this implementation of PF_PACKET, the current API does
1189  * not support doing a get on them to retrieve accompanying status. Thus
1190  * it is only currently possible to use SOL_PACKET with getsockopt to
1191  * retrieve statistical information. This remains consistant with the
1192  * Linux API at the time of writing.
1193  */
1194 static int
1195 pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name,
1196     void *optval, socklen_t *optlenp)
1197 {
1198         struct pfpsock *ps;
1199         struct tpacket_stats_short tpss;
1200         int error = 0;
1201 
1202         ps = (struct pfpsock *)handle;
1203 
1204         switch (option_name) {
1205         case PACKET_STATISTICS :
1206                 if (*optlenp < sizeof (ps->ps_stats)) {
1207                         error = EINVAL;
1208                         break;
1209                 }
1210                 *optlenp = sizeof (ps->ps_stats);
1211                 bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats));
1212                 break;
1213         case PACKET_STATISTICS_SHORT :
1214                 if (*optlenp < sizeof (tpss)) {
1215                         error = EINVAL;
1216                         break;
1217                 }
1218                 *optlenp = sizeof (tpss);
1219                 tpss.tp_packets = ps->ps_stats.tp_packets;
1220                 tpss.tp_drops = ps->ps_stats.tp_drops;
1221                 bcopy(&tpss, optval, sizeof (tpss));
1222                 break;
1223         default :
1224                 error = EINVAL;
1225                 break;
1226         }
1227 
1228         return (error);
1229 }
1230 
1231 /*
1232  * The SOL_PACKET level for socket options supports three options,
1233  * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA.
1234  * This function is responsible for mapping the two socket options
1235  * that manage multicast membership into the appropriate internal
1236  * function calls to bring the option into effect. Whilst direct
1237  * changes to the multicast membership (ADD/DROP) groups is handled
1238  * by calls directly into the mac module, changes to the promiscuos
1239  * mode are vectored through pfp_set_promisc() so that the logic for
1240  * managing the promiscuous mode is in one place.
1241  */
1242 /* ARGSUSED */
1243 static int
1244 pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name,
1245     const void *optval, socklen_t optlen)
1246 {
1247         struct packet_mreq mreq;
1248         struct pfpsock *ps;
1249         int error = 0;
1250         int opt;
1251 
1252         ps = (struct pfpsock *)handle;
1253         if (!ps->ps_bound)
1254                 return (EPROTO);
1255 
1256         if ((option_name == PACKET_ADD_MEMBERSHIP) ||
1257             (option_name == PACKET_DROP_MEMBERSHIP)) {
1258                 if (!ps->ps_bound)
1259                         return (EPROTO);
1260                 bcopy(optval, &mreq, sizeof (mreq));
1261                 if (ps->ps_linkid != mreq.mr_ifindex)
1262                         return (EINVAL);
1263         }
1264 
1265         switch (option_name) {
1266         case PACKET_ADD_MEMBERSHIP :
1267                 switch (mreq.mr_type) {
1268                 case PACKET_MR_MULTICAST :
1269                         if (mreq.mr_alen != ps->ps_sock.sll_halen)
1270                                 return (EINVAL);
1271 
1272                         error = mac_multicast_add(ps->ps_mch, mreq.mr_address);
1273                         break;
1274 
1275                 case PACKET_MR_PROMISC :
1276                         error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL);
1277                         break;
1278 
1279                 case PACKET_MR_ALLMULTI :
1280                         error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI);
1281                         break;
1282                 }
1283                 break;
1284 
1285         case PACKET_DROP_MEMBERSHIP :
1286                 switch (mreq.mr_type) {
1287                 case PACKET_MR_MULTICAST :
1288                         if (mreq.mr_alen != ps->ps_sock.sll_halen)
1289                                 return (EINVAL);
1290 
1291                         mac_multicast_remove(ps->ps_mch, mreq.mr_address);
1292                         break;
1293 
1294                 case PACKET_MR_PROMISC :
1295                         if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL)
1296                                 return (EINVAL);
1297                         error = pfp_set_promisc(ps,
1298                             MAC_CLIENT_PROMISC_FILTERED);
1299                         break;
1300 
1301                 case PACKET_MR_ALLMULTI :
1302                         if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI)
1303                                 return (EINVAL);
1304                         error = pfp_set_promisc(ps,
1305                             MAC_CLIENT_PROMISC_FILTERED);
1306                         break;
1307                 }
1308                 break;
1309 
1310         case PACKET_AUXDATA :
1311                 if (optlen == sizeof (int)) {
1312                         opt = *(int *)optval;
1313                         ps->ps_auxdata = (opt != 0);
1314                 } else {
1315                         error = EINVAL;
1316                 }
1317                 break;
1318         default :
1319                 error = EINVAL;
1320                 break;
1321         }
1322 
1323         return (error);
1324 }
1325 
1326 /*
1327  * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET:
1328  * SO_ATTACH_FILTER and SO_DETACH_FILTER.
1329  *
1330  * Both of these setsockopt values are candidates for being handled by the
1331  * socket layer itself in future, however this requires understanding how
1332  * they would interact with all other sockets.
1333  */
1334 static int
1335 pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name,
1336     const void *optval, socklen_t optlen)
1337 {
1338         struct bpf_program prog;
1339         struct bpf_insn *fcode;
1340         struct pfpsock *ps;
1341         struct sock_proto_props sopp;
1342         int error = 0;
1343         int size;
1344 
1345         ps = (struct pfpsock *)handle;
1346 
1347         switch (option_name) {
1348         case SO_ATTACH_FILTER :
1349 #ifdef _LP64
1350                 if (optlen == sizeof (struct bpf_program32)) {
1351                         struct bpf_program32 prog32;
1352 
1353                         bcopy(optval, &prog32, sizeof (prog32));
1354                         prog.bf_len = prog32.bf_len;
1355                         prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
1356                 } else
1357 #endif
1358                 if (optlen == sizeof (struct bpf_program)) {
1359                         bcopy(optval, &prog, sizeof (prog));
1360                 } else if (optlen != sizeof (struct bpf_program)) {
1361                         return (EINVAL);
1362                 }
1363                 if (prog.bf_len > BPF_MAXINSNS)
1364                         return (EINVAL);
1365 
1366                 size = prog.bf_len * sizeof (*prog.bf_insns);
1367                 fcode = kmem_alloc(size, KM_SLEEP);
1368                 if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) {
1369                         kmem_free(fcode, size);
1370                         return (EFAULT);
1371                 }
1372 
1373                 if (bpf_validate(fcode, (int)prog.bf_len)) {
1374                         rw_enter(&ps->ps_bpflock, RW_WRITER);
1375                         pfp_release_bpf(ps);
1376                         ps->ps_bpf.bf_insns = fcode;
1377                         ps->ps_bpf.bf_len = size;
1378                         rw_exit(&ps->ps_bpflock);
1379 
1380                         return (0);
1381                 }
1382                 kmem_free(fcode, size);
1383                 error = EINVAL;
1384                 break;
1385 
1386         case SO_DETACH_FILTER :
1387                 pfp_release_bpf(ps);
1388                 break;
1389 
1390         case SO_RCVBUF :
1391                 size = *(int32_t *)optval;
1392                 if (size > sockmod_pfp_rcvbuf_max || size < 0)
1393                         return (ENOBUFS);
1394                 sopp.sopp_flags = SOCKOPT_RCVHIWAT;
1395                 sopp.sopp_rxhiwat = size;
1396                 ps->ps_upcalls->su_set_proto_props(ps->ps_upper, &sopp);
1397                 ps->ps_rcvbuf = size;
1398                 break;
1399 
1400         default :
1401                 error = ENOPROTOOPT;
1402                 break;
1403         }
1404 
1405         return (error);
1406 }
1407 
1408 /*
1409  * pfp_open_index is an internal function used to open a MAC device by
1410  * its index. Both a mac_handle_t and mac_client_handle_t are acquired
1411  * because some of the interfaces provided by the mac layer require either
1412  * only the mac_handle_t or both it and mac_handle_t.
1413  *
1414  * Whilst inside the kernel we can access data structures supporting any
1415  * zone, access to interfaces from non-global zones is restricted to those
1416  * interfaces (if any) that are exclusively assigned to a zone.
1417  */
1418 static int
1419 pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip,
1420     cred_t *cred)
1421 {
1422         mac_client_handle_t mch;
1423         zoneid_t ifzoneid;
1424         mac_handle_t mh;
1425         zoneid_t zoneid;
1426         int error;
1427 
1428         mh = 0;
1429         mch = 0;
1430         error = mac_open_by_linkid(index, &mh);
1431         if (error != 0)
1432                 goto bad_open;
1433 
1434         error = mac_client_open(mh, &mch, NULL,
1435             MAC_OPEN_FLAGS_USE_DATALINK_NAME);
1436         if (error != 0)
1437                 goto bad_open;
1438 
1439         zoneid = crgetzoneid(cred);
1440         if (zoneid != GLOBAL_ZONEID) {
1441                 mac_perim_handle_t perim;
1442 
1443                 mac_perim_enter_by_mh(mh, &perim);
1444                 error = dls_link_getzid(mac_name(mh), &ifzoneid);
1445                 mac_perim_exit(perim);
1446                 if (error != 0)
1447                         goto bad_open;
1448                 if (ifzoneid != zoneid) {
1449                         error = EACCES;
1450                         goto bad_open;
1451                 }
1452         }
1453 
1454         *mcip = mch;
1455         *mhp = mh;
1456 
1457         return (0);
1458 bad_open:
1459         if (mch != 0)
1460                 mac_client_close(mch, 0);
1461         if (mh != 0)
1462                 mac_close(mh);
1463         return (error);
1464 }
1465 
1466 static void
1467 pfp_close(mac_handle_t mh, mac_client_handle_t mch)
1468 {
1469         mac_client_close(mch, 0);
1470         mac_close(mh);
1471 }
1472 
1473 /*
1474  * The purpose of this function is to provide a single place where we free
1475  * the loaded BPF program and reset all pointers/counters associated with
1476  * it.
1477  */
1478 static void
1479 pfp_release_bpf(struct pfpsock *ps)
1480 {
1481         if (ps->ps_bpf.bf_len != 0) {
1482                 kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len);
1483                 ps->ps_bpf.bf_len = 0;
1484                 ps->ps_bpf.bf_insns = NULL;
1485         }
1486 }
1487 
1488 /*
1489  * Set the promiscuous mode of a network interface.
1490  * This function only calls the mac layer when there is a change to the
1491  * status of a network interface's promiscous mode. Tracking of how many
1492  * sockets have the network interface in promiscuous mode, and thus the
1493  * control over the physical device's status, is left to the mac layer.
1494  */
1495 static int
1496 pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon)
1497 {
1498         int error = 0;
1499         int flags;
1500 
1501         /*
1502          * There are 4 combinations of turnon/ps_promisc.
1503          * This if handles 2 (both false, both true) and the if() below
1504          * handles the remaining one - when change is required.
1505          */
1506         if (turnon == ps->ps_promisc)
1507                 return (error);
1508 
1509         if (ps->ps_phd != 0) {
1510                 mac_promisc_remove(ps->ps_phd);
1511                 ps->ps_phd = 0;
1512 
1513                 /*
1514                  * ps_promisc is set here in case the call to mac_promisc_add
1515                  * fails: leaving it to indicate that the interface is still
1516                  * in some sort of promiscuous mode is false.
1517                  */
1518                 if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) {
1519                         ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED;
1520                         flags = MAC_PROMISC_FLAGS_NO_PHYS;
1521                 } else {
1522                         flags = 0;
1523                 }
1524                 flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP;
1525         }
1526 
1527         error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps,
1528             &ps->ps_phd, flags);
1529         if (error == 0)
1530                 ps->ps_promisc = turnon;
1531 
1532         return (error);
1533 }
1534 
1535 /*
1536  * This table maps the MAC types in Solaris to the ARPHRD_* values used
1537  * on Linux. This is used with the SIOCGIFHWADDR/SIOCGLIFHWADDR ioctl.
1538  *
1539  * The symbols in this table are *not* pulled in from <net/if_arp.h>,
1540  * they are pulled from <netpacket/packet.h>, thus it acts as a source
1541  * of supplementary information to the ARP table.
1542  */
1543 static uint_t arphrd_to_dl[][2] = {
1544         { ARPHRD_IEEE80211,     DL_WIFI },
1545         { ARPHRD_TUNNEL,        DL_IPV4 },
1546         { ARPHRD_TUNNEL,        DL_IPV6 },
1547         { ARPHRD_TUNNEL,        DL_6TO4 },
1548         { ARPHRD_AX25,          DL_X25 },
1549         { ARPHRD_ATM,           DL_ATM },
1550         { 0,                    0 }
1551 };
1552 
1553 static int
1554 pfp_dl_to_arphrd(int dltype)
1555 {
1556         int i;
1557 
1558         for (i = 0; arphrd_to_dl[i][0] != 0; i++)
1559                 if (arphrd_to_dl[i][1] == dltype)
1560                         return (arphrd_to_dl[i][0]);
1561         return (arp_hw_type(dltype));
1562 }