illumos-gate Wdiff usr/src/uts/common/inet/sockmods/sockmod_pfp.c

Print this page

7127  remove -Wno-missing-braces from Makefile.uts

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/inet/sockmods/sockmod_pfp.c
          +++ new/usr/src/uts/common/inet/sockmods/sockmod_pfp.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright 2015 Joyent, Inc. All rights reserved.
  25   25   */
  26   26  
  27   27  #include <sys/types.h>
  28   28  #include <sys/param.h>
  29   29  #include <sys/systm.h>
  30   30  #include <sys/stropts.h>
  31   31  #include <sys/socket.h>
  32   32  #include <sys/socketvar.h>
  33   33  #include <sys/socket_proto.h>
  34   34  #include <sys/sockio.h>
  35   35  #include <sys/strsun.h>
  36   36  #include <sys/kstat.h>
  37   37  #include <sys/modctl.h>
  38   38  #include <sys/policy.h>
  39   39  #include <sys/priv_const.h>
  40   40  #include <sys/tihdr.h>
  41   41  #include <sys/zone.h>
  42   42  #include <sys/time.h>
  43   43  #include <sys/ethernet.h>
  44   44  #include <sys/llc1.h>
  45   45  #include <fs/sockfs/sockcommon.h>
  46   46  #include <net/if.h>
  47   47  #include <inet/ip_arp.h>
  48   48  
  49   49  #include <sys/dls.h>
  50   50  #include <sys/mac.h>
  51   51  #include <sys/mac_client.h>
  52   52  #include <sys/mac_provider.h>
  53   53  #include <sys/mac_client_priv.h>
  54   54  
  55   55  #include <netpacket/packet.h>
  56   56  
  57   57  static void pfp_close(mac_handle_t, mac_client_handle_t);
  58   58  static int pfp_dl_to_arphrd(int);
  59   59  static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *,
  60   60      socklen_t *);
  61   61  static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *, int);
  62   62  static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *,
  63   63      int);
  64   64  static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *,
  65   65      cred_t *);
  66   66  static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t);
  67   67  static void pfp_release_bpf(struct pfpsock *);
  68   68  static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t);
  69   69  static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *,
  70   70      socklen_t);
  71   71  static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *,
  72   72      socklen_t);
  73   73  
  74   74  /*
  75   75   * PFP sockfs operations
  76   76   * Most are currently no-ops because they have no meaning for a connectionless
  77   77   * socket.
  78   78   */
  79   79  static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t,
  80   80      sock_upcalls_t *, int, struct cred *);
  81   81  static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t,
  82   82      struct cred *);
  83   83  static int sdpfp_close(sock_lower_handle_t, int, struct cred *);
  84   84  static void sdpfp_clr_flowctrl(sock_lower_handle_t);
  85   85  static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *,
  86   86      socklen_t *, struct cred *);
  87   87  static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
  88   88      struct cred *);
  89   89  static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *,
  90   90      struct cred *);
  91   91  static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *,
  92   92      socklen_t, struct cred *);
  93   93  
  94   94  static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **,
  95   95      uint_t *, int *, int, cred_t *);
  96   96  
  97   97  static int sockpfp_init(void);
  98   98  static void sockpfp_fini(void);
  99   99  
 100  100  static kstat_t *pfp_ksp;
 101  101  static pfp_kstats_t ks_stats;
 102  102  static pfp_kstats_t pfp_kstats = {
 103  103          /*
 104  104           * Each one of these kstats is a different return path in handling
 105  105           * a packet received from the mac layer.
 106  106           */
 107  107          { "recvMacHeaderFail",  KSTAT_DATA_UINT64 },
 108  108          { "recvBadProtocol",    KSTAT_DATA_UINT64 },
 109  109          { "recvAllocbFail",     KSTAT_DATA_UINT64 },
 110  110          { "recvOk",             KSTAT_DATA_UINT64 },
 111  111          { "recvFail",           KSTAT_DATA_UINT64 },
 112  112          { "recvFiltered",       KSTAT_DATA_UINT64 },
 113  113          { "recvFlowControl",    KSTAT_DATA_UINT64 },
 114  114          /*
 115  115           * A global set of counters is maintained to track the behaviour
 116  116           * of the system (kernel & applications) in sending packets.
 117  117           */
 118  118          { "sendUnbound",        KSTAT_DATA_UINT64 },
 119  119          { "sendFailed",         KSTAT_DATA_UINT64 },
 120  120          { "sendTooBig",         KSTAT_DATA_UINT64 },
 121  121          { "sendAllocFail",      KSTAT_DATA_UINT64 },
 122  122          { "sendUiomoveFail",    KSTAT_DATA_UINT64 },
 123  123          { "sendNoMemory",       KSTAT_DATA_UINT64 },
 124  124          { "sendOpenFail",       KSTAT_DATA_UINT64 },
 125  125          { "sendWrongFamily",    KSTAT_DATA_UINT64 },
 126  126          { "sendShortMsg",       KSTAT_DATA_UINT64 },
 127  127          { "sendOk",             KSTAT_DATA_UINT64 }
 128  128  };
 129  129  
 130  130  sock_downcalls_t pfp_downcalls = {
 131  131          sdpfp_activate,
 132  132          sock_accept_notsupp,
 133  133          sdpfp_bind,
 134  134          sock_listen_notsupp,
 135  135          sock_connect_notsupp,
 136  136          sock_getpeername_notsupp,
 137  137          sock_getsockname_notsupp,
 138  138          sdpfp_getsockopt,
 139  139          sdpfp_setsockopt,
 140  140          sock_send_notsupp,
 141  141          sdpfp_senduio,
 142  142          NULL,
 143  143          sock_poll_notsupp,
 144  144          sock_shutdown_notsupp,
 145  145          sdpfp_clr_flowctrl,
 146  146          sdpfp_ioctl,
 147  147          sdpfp_close,
 148  148  };
 149  149  
 150  150  static smod_reg_t sinfo = {
 151  151          SOCKMOD_VERSION,
 152  152          "sockpfp",
 153  153          SOCK_UC_VERSION,
 154  154          SOCK_DC_VERSION,
 155  155          sockpfp_create,
 156  156          NULL
 157  157  };
 158  158  
 159  159  static int accepted_protos[3][2] = {
 160  160          { ETH_P_ALL,    0 },
 161  161          { ETH_P_802_2,  LLC_SNAP_SAP },
 162  162          { ETH_P_803_3,  0 },
 163  163  };
 164  164  
 165  165  /*
 166  166   * This sets an upper bound on the size of the receive buffer for a PF_PACKET
 167  167   * socket. More properly, this should be controlled through ipadm, ala TCP, UDP,
 168  168   * SCTP, etc. Until that's done, this provides a hard cap of 4 MB and allows an
 169  169   * opportunity for it to be changed, should it be needed.
 170  170   */
 171  171  int sockmod_pfp_rcvbuf_max = 1024 * 1024 * 4;

↓ open down ↓

171 lines elided

↑ open up ↑

 172  172  
 173  173  /*
 174  174   * Module linkage information for the kernel.
 175  175   */
 176  176  static struct modlsockmod modlsockmod = {
 177  177          &mod_sockmodops, "PF Packet socket module", &sinfo
 178  178  };
 179  179  
 180  180  static struct modlinkage modlinkage = {
 181  181          MODREV_1,
 182      -        &modlsockmod,
 183      -        NULL
      182 +        { &modlsockmod, NULL }
 184  183  };
 185  184  
 186  185  int
 187  186  _init(void)
 188  187  {
 189  188          int error;
 190  189  
 191  190          error = sockpfp_init();
 192  191          if (error != 0)
 193  192                  return (error);

 194  193  
 195  194          error = mod_install(&modlinkage);
 196  195          if (error != 0)
 197  196                  sockpfp_fini();
 198  197  
 199  198          return (error);
 200  199  }
 201  200  
 202  201  int
 203  202  _fini(void)
 204  203  {
 205  204          int error;
 206  205  
 207  206          error = mod_remove(&modlinkage);
 208  207          if (error == 0)
 209  208                  sockpfp_fini();
 210  209  
 211  210          return (error);
 212  211  }
 213  212  
 214  213  int
 215  214  _info(struct modinfo *modinfop)
 216  215  {
 217  216          return (mod_info(&modlinkage, modinfop));
 218  217  }
 219  218  
 220  219  /*
 221  220   * sockpfp_init: called as part of the initialisation of the module when
 222  221   * loaded into the kernel.
 223  222   *
 224  223   * Being able to create and record the kstats data in the kernel is not
 225  224   * considered to be vital to the operation of this kernel module, thus
 226  225   * its failure is tolerated.
 227  226   */
 228  227  static int
 229  228  sockpfp_init(void)
 230  229  {
 231  230          (void) memset(&ks_stats, 0, sizeof (ks_stats));
 232  231  
 233  232          (void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats));
 234  233  
 235  234          pfp_ksp = kstat_create("pfpacket", 0, "global", "misc",
 236  235              KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t),
 237  236              KSTAT_FLAG_VIRTUAL);
 238  237          if (pfp_ksp != NULL) {
 239  238                  pfp_ksp->ks_data = &ks_stats;
 240  239                  kstat_install(pfp_ksp);
 241  240          }
 242  241  
 243  242          return (0);
 244  243  }
 245  244  
 246  245  /*
 247  246   * sockpfp_fini: called when the operating system wants to unload the
 248  247   * socket module from the kernel.
 249  248   */
 250  249  static void
 251  250  sockpfp_fini(void)
 252  251  {
 253  252          if (pfp_ksp != NULL)
 254  253                  kstat_delete(pfp_ksp);
 255  254  }
 256  255  
 257  256  /*
 258  257   * Due to sockets being created read-write by default, all PF_PACKET sockets
 259  258   * therefore require the NET_RAWACCESS priviliege, even if the socket is only
 260  259   * being used for reading packets from.
 261  260   *
 262  261   * This create function enforces this module only being used with PF_PACKET
 263  262   * sockets and the policy that we support via the config file in sock2path.d:
 264  263   * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW.
 265  264   */
 266  265  /* ARGSUSED */
 267  266  static sock_lower_handle_t
 268  267  sockpfp_create(int family, int type, int proto,
 269  268      sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
 270  269      int sflags, cred_t *cred)
 271  270  {
 272  271          struct pfpsock *ps;
 273  272          int kmflags;
 274  273          int newproto;
 275  274          int i;
 276  275  
 277  276          if (secpolicy_net_rawaccess(cred) != 0) {
 278  277                  *errorp = EACCES;
 279  278                  return (NULL);
 280  279          }
 281  280  
 282  281          if (family != AF_PACKET) {
 283  282                  *errorp = EAFNOSUPPORT;
 284  283                  return (NULL);
 285  284          }
 286  285  
 287  286          if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) {
 288  287                  *errorp = ESOCKTNOSUPPORT;
 289  288                  return (NULL);
 290  289          }
 291  290  
 292  291          /*
 293  292           * First check to see if the protocol number passed in via the socket
 294  293           * creation should be mapped to a different number for internal use.
 295  294           */
 296  295          for (i = 0, newproto = -1;
 297  296              i < sizeof (accepted_protos)/ sizeof (accepted_protos[0]); i++) {
 298  297                  if (accepted_protos[i][0] == proto) {
 299  298                          newproto = accepted_protos[i][1];
 300  299                          break;
 301  300                  }
 302  301          }
 303  302  
 304  303          /*
 305  304           * If the mapping of the protocol that was under 0x800 failed to find
 306  305           * a local equivalent then fail the socket creation. If the protocol
 307  306           * for the socket is over 0x800 and it was not found in the mapping
 308  307           * table above, then use the value as is.
 309  308           */
 310  309          if (newproto == -1) {
 311  310                  if (proto < 0x800) {
 312  311                          *errorp = ENOPROTOOPT;
 313  312                          return (NULL);
 314  313                  }
 315  314                  newproto = proto;
 316  315          }
 317  316          proto = newproto;
 318  317  
 319  318          kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
 320  319          ps = kmem_zalloc(sizeof (*ps), kmflags);
 321  320          if (ps == NULL) {
 322  321                  *errorp = ENOMEM;
 323  322                  return (NULL);
 324  323          }
 325  324  
 326  325          ps->ps_type = type;
 327  326          ps->ps_proto = proto;
 328  327          rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL);
 329  328          mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL);
 330  329  
 331  330          *sock_downcalls = &pfp_downcalls;
 332  331          /*
 333  332           * Setting this causes bytes from a packet that do not fit into the
 334  333           * destination user buffer to be discarded. Thus the API is one
 335  334           * packet per receive and callers are required to use a buffer large
 336  335           * enough for the biggest packet that the interface can provide.
 337  336           */
 338  337          *smodep = SM_ATOMIC;
 339  338  
 340  339          return ((sock_lower_handle_t)ps);
 341  340  }
 342  341  
 343  342  /* ************************************************************************* */
 344  343  
 345  344  /*
 346  345   * pfp_packet is the callback function that is given to the mac layer for
 347  346   * PF_PACKET to receive packets with. One packet at a time is passed into
 348  347   * this function from the mac layer. Each packet is a private copy given
 349  348   * to PF_PACKET to modify or free as it wishes and does not harm the original
 350  349   * packet from which it was cloned.
 351  350   */
 352  351  /* ARGSUSED */
 353  352  static void
 354  353  pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag)
 355  354  {
 356  355          struct T_unitdata_ind *tunit;
 357  356          struct sockaddr_ll *sll;
 358  357          struct sockaddr_ll *sol;
 359  358          mac_header_info_t hdr;
 360  359          struct pfpsock *ps;
 361  360          size_t tusz;
 362  361          mblk_t *mp0;
 363  362          int error;
 364  363  
 365  364          if (mp == NULL)
 366  365                  return;
 367  366  
 368  367          ps = arg;
 369  368          if (ps->ps_flow_ctrld) {
 370  369                  ps->ps_flow_ctrl_drops++;
 371  370                  ps->ps_stats.tp_drops++;
 372  371                  ks_stats.kp_recv_flow_cntrld.value.ui64++;
 373  372                  freemsg(mp);
 374  373                  return;
 375  374          }
 376  375  
 377  376          if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) {
 378  377                  /*
 379  378                   * Can't decode the packet header information so drop it.
 380  379                   */
 381  380                  ps->ps_stats.tp_drops++;
 382  381                  ks_stats.kp_recv_mac_hdr_fail.value.ui64++;
 383  382                  freemsg(mp);
 384  383                  return;
 385  384          }
 386  385  
 387  386          if (mac_type(ps->ps_mh) == DL_ETHER &&
 388  387              hdr.mhi_bindsap == ETHERTYPE_VLAN) {
 389  388                  struct ether_vlan_header *evhp;
 390  389                  struct ether_vlan_header evh;
 391  390  
 392  391                  hdr.mhi_hdrsize = sizeof (struct ether_vlan_header);
 393  392                  hdr.mhi_istagged = B_TRUE;
 394  393  
 395  394                  if (MBLKL(mp) >= sizeof (*evhp)) {
 396  395                          evhp = (struct ether_vlan_header *)mp->b_rptr;
 397  396                  } else {
 398  397                          int sz = sizeof (*evhp);
 399  398                          char *s = (char *)&evh;
 400  399                          mblk_t *tmp;
 401  400                          int len;
 402  401  
 403  402                          for (tmp = mp; sz > 0 && tmp != NULL;
 404  403                              tmp = tmp->b_cont) {
 405  404                                  len = min(sz, MBLKL(tmp));
 406  405                                  bcopy(tmp->b_rptr, s, len);
 407  406                                  sz -= len;
 408  407                          }
 409  408                          evhp = &evh;
 410  409                  }
 411  410                  hdr.mhi_tci = ntohs(evhp->ether_tci);
 412  411                  hdr.mhi_bindsap = ntohs(evhp->ether_type);
 413  412          }
 414  413  
 415  414          if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) {
 416  415                  /*
 417  416                   * The packet is not of interest to this socket so
 418  417                   * drop it on the floor. Here the SAP is being used
 419  418                   * as a very course filter.
 420  419                   */
 421  420                  ps->ps_stats.tp_drops++;
 422  421                  ks_stats.kp_recv_bad_proto.value.ui64++;
 423  422                  freemsg(mp);
 424  423                  return;
 425  424          }
 426  425  
 427  426          /*
 428  427           * This field is not often set, even for ethernet,
 429  428           * by mac_header_info, so compute it if it is 0.
 430  429           */
 431  430          if (hdr.mhi_pktsize == 0)
 432  431                  hdr.mhi_pktsize = msgdsize(mp);
 433  432  
 434  433          /*
 435  434           * If a BPF filter is present, pass the raw packet into that.
 436  435           * A failed match will result in zero being returned, indicating
 437  436           * that this socket is not interested in the packet.
 438  437           */
 439  438          if (ps->ps_bpf.bf_len != 0) {
 440  439                  uchar_t *buffer;
 441  440                  int buflen;
 442  441  
 443  442                  buflen = MBLKL(mp);
 444  443                  if (hdr.mhi_pktsize == buflen) {
 445  444                          buffer = mp->b_rptr;
 446  445                  } else {
 447  446                          buflen = 0;
 448  447                          buffer = (uchar_t *)mp;
 449  448                  }
 450  449                  rw_enter(&ps->ps_bpflock, RW_READER);
 451  450                  if (bpf_filter(ps->ps_bpf.bf_insns, buffer,
 452  451                      hdr.mhi_pktsize, buflen) == 0) {
 453  452                          rw_exit(&ps->ps_bpflock);
 454  453                          ps->ps_stats.tp_drops++;
 455  454                          ks_stats.kp_recv_filtered.value.ui64++;
 456  455                          freemsg(mp);
 457  456                          return;
 458  457                  }
 459  458                  rw_exit(&ps->ps_bpflock);
 460  459          }
 461  460  
 462  461          if (ps->ps_type == SOCK_DGRAM) {
 463  462                  /*
 464  463                   * SOCK_DGRAM socket expect a "layer 3" packet, so advance
 465  464                   * past the link layer header.
 466  465                   */
 467  466                  mp->b_rptr += hdr.mhi_hdrsize;
 468  467                  hdr.mhi_pktsize -= hdr.mhi_hdrsize;
 469  468          }
 470  469  
 471  470          tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll);
 472  471          if (ps->ps_auxdata) {
 473  472                  tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
 474  473                  tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr));
 475  474          }
 476  475  
 477  476          /*
 478  477           * It is tempting to think that this could be optimised by having
 479  478           * the base mblk_t allocated and hung off the pfpsock structure,
 480  479           * except that then another one would need to be allocated for the
 481  480           * sockaddr_ll that is included. Even creating a template to copy
 482  481           * from is of questionable value, as read-write from one structure
 483  482           * to the other is going to be slower than all of the initialisation.
 484  483           */
 485  484          mp0 = allocb(tusz, BPRI_HI);
 486  485          if (mp0 == NULL) {
 487  486                  ps->ps_stats.tp_drops++;
 488  487                  ks_stats.kp_recv_alloc_fail.value.ui64++;
 489  488                  freemsg(mp);
 490  489                  return;
 491  490          }
 492  491  
 493  492          (void) memset(mp0->b_rptr, 0, tusz);
 494  493  
 495  494          mp0->b_datap->db_type = M_PROTO;
 496  495          mp0->b_wptr = mp0->b_rptr + tusz;
 497  496  
 498  497          tunit = (struct T_unitdata_ind *)mp0->b_rptr;
 499  498          tunit->PRIM_type = T_UNITDATA_IND;
 500  499          tunit->SRC_length = sizeof (struct sockaddr);
 501  500          tunit->SRC_offset = sizeof (*tunit);
 502  501  
 503  502          sol = &ps->ps_sock;
 504  503          sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit));
 505  504          sll->sll_ifindex = sol->sll_ifindex;
 506  505          sll->sll_hatype = (uint16_t)hdr.mhi_origsap;
 507  506          sll->sll_halen = sol->sll_halen;
 508  507          if (hdr.mhi_saddr != NULL)
 509  508                  (void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen);
 510  509  
 511  510          switch (hdr.mhi_dsttype) {
 512  511          case MAC_ADDRTYPE_MULTICAST :
 513  512                  sll->sll_pkttype = PACKET_MULTICAST;
 514  513                  break;
 515  514          case MAC_ADDRTYPE_BROADCAST :
 516  515                  sll->sll_pkttype = PACKET_BROADCAST;
 517  516                  break;
 518  517          case MAC_ADDRTYPE_UNICAST :
 519  518                  if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0)
 520  519                          sll->sll_pkttype = PACKET_HOST;
 521  520                  else
 522  521                          sll->sll_pkttype = PACKET_OTHERHOST;
 523  522                  break;
 524  523          }
 525  524  
 526  525          if (ps->ps_auxdata) {
 527  526                  struct tpacket_auxdata *aux;
 528  527                  struct T_opthdr *topt;
 529  528  
 530  529                  tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset +
 531  530                      sizeof (struct sockaddr_ll));
 532  531                  tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) +
 533  532                      _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
 534  533  
 535  534                  topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset);
 536  535                  aux = (struct tpacket_auxdata *)
 537  536                      ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt)));
 538  537  
 539  538                  topt->len = tunit->OPT_length;
 540  539                  topt->level = SOL_PACKET;
 541  540                  topt->name = PACKET_AUXDATA;
 542  541                  topt->status = 0;
 543  542                  /*
 544  543                   * libpcap doesn't seem to use any other field,
 545  544                   * so it isn't clear how they should be filled in.
 546  545                   */
 547  546                  aux->tp_vlan_vci = hdr.mhi_tci;
 548  547          }
 549  548  
 550  549          linkb(mp0, mp);
 551  550  
 552  551          (void) gethrestime(&ps->ps_timestamp);
 553  552  
 554  553          ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0,
 555  554              &error, NULL);
 556  555  
 557  556          if (error == 0) {
 558  557                  ps->ps_stats.tp_packets++;
 559  558                  ks_stats.kp_recv_ok.value.ui64++;
 560  559          } else {
 561  560                  mutex_enter(&ps->ps_lock);
 562  561                  if (error == ENOSPC) {
 563  562                          ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0,
 564  563                              &error, NULL);
 565  564                          if (error == ENOSPC)
 566  565                                  ps->ps_flow_ctrld = B_TRUE;
 567  566                  }
 568  567                  mutex_exit(&ps->ps_lock);
 569  568                  ps->ps_stats.tp_drops++;
 570  569                  ks_stats.kp_recv_fail.value.ui64++;
 571  570          }
 572  571  }
 573  572  
 574  573  /*
 575  574   * Bind a PF_PACKET socket to a network interface.
 576  575   *
 577  576   * The default operation of this bind() is to place the socket (and thus the
 578  577   * network interface) into promiscuous mode. It is then up to the application
 579  578   * to turn that down by issuing the relevant ioctls, if desired.
 580  579   */
 581  580  static int
 582  581  sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr,
 583  582      socklen_t addrlen, struct cred *cred)
 584  583  {
 585  584          struct sockaddr_ll *addr_ll, *sol;
 586  585          mac_client_handle_t mch;
 587  586          struct pfpsock *ps;
 588  587          mac_handle_t mh;
 589  588          int error;
 590  589  
 591  590          ps = (struct pfpsock *)handle;
 592  591          if (ps->ps_bound)
 593  592                  return (EINVAL);
 594  593  
 595  594          if (addrlen < sizeof (struct sockaddr_ll) || addr == NULL)
 596  595                  return (EINVAL);
 597  596  
 598  597          addr_ll = (struct sockaddr_ll *)addr;
 599  598  
 600  599          error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred);
 601  600          if (error != 0)
 602  601                  return (error);
 603  602          /*
 604  603           * Ensure that each socket is only bound once.
 605  604           */
 606  605          mutex_enter(&ps->ps_lock);
 607  606          if (ps->ps_mh != 0) {
 608  607                  mutex_exit(&ps->ps_lock);
 609  608                  pfp_close(mh, mch);
 610  609                  return (EADDRINUSE);
 611  610          }
 612  611          ps->ps_mh = mh;
 613  612          ps->ps_mch = mch;
 614  613          mutex_exit(&ps->ps_lock);
 615  614  
 616  615          /*
 617  616           * Cache all of the information from bind so that it's in an easy
 618  617           * place to get at when packets are received.
 619  618           */
 620  619          sol = &ps->ps_sock;
 621  620          sol->sll_family = AF_PACKET;
 622  621          sol->sll_ifindex = addr_ll->sll_ifindex;
 623  622          sol->sll_protocol = addr_ll->sll_protocol;
 624  623          sol->sll_halen = mac_addr_len(ps->ps_mh);
 625  624          mac_unicast_primary_get(ps->ps_mh, sol->sll_addr);
 626  625          mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu);
 627  626          ps->ps_linkid = addr_ll->sll_ifindex;
 628  627  
 629  628          error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL,
 630  629              pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
 631  630          if (error == 0) {
 632  631                  ps->ps_promisc = MAC_CLIENT_PROMISC_ALL;
 633  632                  ps->ps_bound = B_TRUE;
 634  633          }
 635  634  
 636  635          return (error);
 637  636  }
 638  637  
 639  638  /* ARGSUSED */
 640  639  static void
 641  640  sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper,
 642  641      sock_upcalls_t *upcalls, int flags, cred_t *cred)
 643  642  {
 644  643          struct pfpsock *ps;
 645  644  
 646  645          ps = (struct pfpsock *)lower;
 647  646          ps->ps_upper = upper;
 648  647          ps->ps_upcalls = upcalls;
 649  648  }
 650  649  
 651  650  /*
 652  651   * This module only implements getting socket options for the new socket
 653  652   * option level (SOL_PACKET) that it introduces. All other requests are
 654  653   * passed back to the sockfs layer.
 655  654   */
 656  655  /* ARGSUSED */
 657  656  static int
 658  657  sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name,
 659  658      void *optval, socklen_t *optlenp, struct cred *cred)
 660  659  {
 661  660          struct pfpsock *ps;
 662  661          int error = 0;
 663  662  
 664  663          ps = (struct pfpsock *)handle;
 665  664  
 666  665          switch (level) {
 667  666          case SOL_PACKET :
 668  667                  error = pfp_getpacket_sockopt(handle, option_name, optval,
 669  668                      optlenp);
 670  669                  break;
 671  670  
 672  671          case SOL_SOCKET :
 673  672                  if (option_name == SO_RCVBUF) {
 674  673                          if (*optlenp < sizeof (int32_t))
 675  674                                  return (EINVAL);
 676  675                          *((int32_t *)optval) = ps->ps_rcvbuf;
 677  676                          *optlenp = sizeof (int32_t);
 678  677                  } else {
 679  678                          error = ENOPROTOOPT;
 680  679                  }
 681  680                  break;
 682  681  
 683  682          default :
 684  683                  /*
 685  684                   * If sockfs code receives this error in return from the
 686  685                   * getsockopt downcall it handles the option locally, if
 687  686                   * it can.
 688  687                   */
 689  688                  error = ENOPROTOOPT;
 690  689                  break;
 691  690          }
 692  691  
 693  692          return (error);
 694  693  }
 695  694  
 696  695  /*
 697  696   * PF_PACKET supports setting socket options at only two levels:
 698  697   * SOL_SOCKET and SOL_PACKET.
 699  698   */
 700  699  /* ARGSUSED */
 701  700  static int
 702  701  sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name,
 703  702      const void *optval, socklen_t optlen, struct cred *cred)
 704  703  {
 705  704          int error = 0;
 706  705  
 707  706          switch (level) {
 708  707          case SOL_SOCKET :
 709  708                  error = pfp_setsocket_sockopt(handle, option_name, optval,
 710  709                      optlen);
 711  710                  break;
 712  711          case SOL_PACKET :
 713  712                  error = pfp_setpacket_sockopt(handle, option_name, optval,
 714  713                      optlen);
 715  714                  break;
 716  715          default :
 717  716                  error = EINVAL;
 718  717                  break;
 719  718          }
 720  719  
 721  720          return (error);
 722  721  }
 723  722  
 724  723  /*
 725  724   * This function is incredibly inefficient for sending any packet that
 726  725   * comes with a msghdr asking to be sent to an interface to which the
 727  726   * socket has not been bound. Some possibilities here are keeping a
 728  727   * cache of all open mac's and mac_client's, for the purpose of sending,
 729  728   * and closing them after some amount of inactivity. Clearly, applications
 730  729   * should not be written to use one socket for multiple interfaces if
 731  730   * performance is desired with the code as is.
 732  731   */
 733  732  /* ARGSUSED */
 734  733  static int
 735  734  sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop,
 736  735      struct nmsghdr *msg, struct cred *cred)
 737  736  {
 738  737          struct sockaddr_ll *sol;
 739  738          mac_client_handle_t mch;
 740  739          struct pfpsock *ps;
 741  740          boolean_t new_open;
 742  741          mac_handle_t mh;
 743  742          size_t mpsize;
 744  743          uint_t maxsdu;
 745  744          mblk_t *mp0;
 746  745          mblk_t *mp;
 747  746          int error;
 748  747  
 749  748          mp = NULL;
 750  749          mp0 = NULL;
 751  750          new_open = B_FALSE;
 752  751          ps = (struct pfpsock *)handle;
 753  752          mh = ps->ps_mh;
 754  753          mch = ps->ps_mch;
 755  754          maxsdu = ps->ps_max_sdu;
 756  755  
 757  756          sol = (struct sockaddr_ll *)msg->msg_name;
 758  757          if (sol == NULL) {
 759  758                  /*
 760  759                   * If no sockaddr_ll has been provided with the send call,
 761  760                   * use the one constructed when the socket was bound to an
 762  761                   * interface and fail if it hasn't been bound.
 763  762                   */
 764  763                  if (!ps->ps_bound) {
 765  764                          ks_stats.kp_send_unbound.value.ui64++;
 766  765                          return (EPROTO);
 767  766                  }
 768  767                  sol = &ps->ps_sock;
 769  768          } else {
 770  769                  /*
 771  770                   * Verify the sockaddr_ll message passed down before using
 772  771                   * it to send a packet out with. If it refers to an interface
 773  772                   * that has not been bound, it is necessary to open it.
 774  773                   */
 775  774                  struct sockaddr_ll *sll;
 776  775  
 777  776                  if (msg->msg_namelen < sizeof (struct sockaddr_ll)) {
 778  777                          ks_stats.kp_send_short_msg.value.ui64++;
 779  778                          return (EINVAL);
 780  779                  }
 781  780  
 782  781                  if (sol->sll_family != AF_PACKET) {
 783  782                          ks_stats.kp_send_wrong_family.value.ui64++;
 784  783                          return (EAFNOSUPPORT);
 785  784                  }
 786  785  
 787  786                  sll = &ps->ps_sock;
 788  787                  if (sol->sll_ifindex != sll->sll_ifindex) {
 789  788                          error = pfp_open_index(sol->sll_ifindex, &mh, &mch,
 790  789                              cred);
 791  790                          if (error != 0) {
 792  791                                  ks_stats.kp_send_open_fail.value.ui64++;
 793  792                                  return (error);
 794  793                          }
 795  794                          mac_sdu_get(mh, NULL, &maxsdu);
 796  795                          new_open = B_TRUE;
 797  796                  }
 798  797          }
 799  798  
 800  799          mpsize = uiop->uio_resid;
 801  800          if (mpsize > maxsdu) {
 802  801                  ks_stats.kp_send_too_big.value.ui64++;
 803  802                  error = EMSGSIZE;
 804  803                  goto done;
 805  804          }
 806  805  
 807  806          if ((mp = allocb(mpsize, BPRI_HI)) == NULL) {
 808  807                  ks_stats.kp_send_alloc_fail.value.ui64++;
 809  808                  error = ENOBUFS;
 810  809                  goto done;
 811  810          }
 812  811  
 813  812          mp->b_wptr = mp->b_rptr + mpsize;
 814  813          error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop);
 815  814          if (error != 0) {
 816  815                  ks_stats.kp_send_uiomove_fail.value.ui64++;
 817  816                  goto done;
 818  817          }
 819  818  
 820  819          if (ps->ps_type == SOCK_DGRAM) {
 821  820                  mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0);
 822  821                  if (mp0 == NULL) {
 823  822                          ks_stats.kp_send_no_memory.value.ui64++;
 824  823                          error = ENOBUFS;
 825  824                          goto done;
 826  825                  }
 827  826                  linkb(mp0, mp);
 828  827                  mp = mp0;
 829  828          }
 830  829  
 831  830          /*
 832  831           * As this is sending datagrams and no promise is made about
 833  832           * how or if a packet will be sent/delivered, no effort is to
 834  833           * be expended in recovering from a situation where the packet
 835  834           * cannot be sent - it is just dropped.
 836  835           */
 837  836          error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
 838  837          if (error == 0) {
 839  838                  mp = NULL;
 840  839                  ks_stats.kp_send_ok.value.ui64++;
 841  840          } else {
 842  841                  ks_stats.kp_send_failed.value.ui64++;
 843  842          }
 844  843  
 845  844  done:
 846  845  
 847  846          if (new_open) {
 848  847                  ASSERT(mch != ps->ps_mch);
 849  848                  ASSERT(mh != ps->ps_mh);
 850  849                  pfp_close(mh, mch);
 851  850          }
 852  851          if (mp != NULL)
 853  852                  freemsg(mp);
 854  853  
 855  854          return (error);
 856  855  
 857  856  }
 858  857  
 859  858  /*
 860  859   * There's no use of a lock here, or at the bottom of pfp_packet() where
 861  860   * ps_flow_ctrld is set to true, because in a situation where these two
 862  861   * are racing to set the flag one way or the other, the end result is
 863  862   * going to be ultimately determined by the scheduler anyway - which of
 864  863   * the two threads gets the lock first? In such an operational environment,
 865  864   * we've got packets arriving too fast to be delt with so packets are going
 866  865   * to be dropped. Grabbing a lock just makes the drop more expensive.
 867  866   */
 868  867  static void
 869  868  sdpfp_clr_flowctrl(sock_lower_handle_t handle)
 870  869  {
 871  870          struct pfpsock *ps;
 872  871  
 873  872          ps = (struct pfpsock *)handle;
 874  873  
 875  874          mutex_enter(&ps->ps_lock);
 876  875          ps->ps_flow_ctrld = B_FALSE;
 877  876          mutex_exit(&ps->ps_lock);
 878  877  }
 879  878  
 880  879  /*
 881  880   * The implementation of this ioctl() handler is intended to function
 882  881   * in the absence of a bind() being made before it is called. Thus the
 883  882   * function calls mac_open() itself to provide a handle
 884  883   * This function is structured like this:
 885  884   * - determine the linkid for the interface being targetted
 886  885   * - open the interface with said linkid
 887  886   * - perform ioctl
 888  887   * - copy results back to caller
 889  888   *
 890  889   * The ioctls that interact with interface flags have been implented below
 891  890   * to assume that the interface is always up and running (IFF_RUNNING) and
 892  891   * to use the state of this socket to determine whether or not the network
 893  892   * interface is in promiscuous mode. Thus an ioctl to get the interface flags
 894  893   * of an interface that has been put in promiscuous mode by another socket
 895  894   * (in the same program or different), will not report that status.
 896  895   */
 897  896  /* ARGSUSED */
 898  897  static int
 899  898  sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod,
 900  899      int32_t *rval, struct cred *cr)
 901  900  {
 902  901          struct timeval tival;
 903  902          mac_client_promisc_type_t mtype;
 904  903          struct sockaddr_dl *sock;
 905  904          datalink_id_t linkid;
 906  905          struct lifreq lifreq;
 907  906          struct ifreq ifreq;
 908  907          struct pfpsock *ps;
 909  908          mac_handle_t mh;
 910  909          int error;
 911  910  
 912  911          ps = (struct pfpsock *)handle;
 913  912  
 914  913          switch (cmd) {
 915  914          /*
 916  915           * ioctls that work on "struct lifreq"
 917  916           */
 918  917          case SIOCSLIFFLAGS :
 919  918          case SIOCGLIFINDEX :
 920  919          case SIOCGLIFFLAGS :
 921  920          case SIOCGLIFMTU :
 922  921          case SIOCGLIFHWADDR :
 923  922                  error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid, mod);
 924  923                  if (error != 0)
 925  924                          return (error);
 926  925                  break;
 927  926  
 928  927          /*
 929  928           * ioctls that work on "struct ifreq".
 930  929           * Not all of these have a "struct lifreq" partner, for example
 931  930           * SIOCGIFHWADDR, for the simple reason that the logical interface
 932  931           * does not have a hardware address.
 933  932           */
 934  933          case SIOCSIFFLAGS :
 935  934          case SIOCGIFINDEX :
 936  935          case SIOCGIFFLAGS :
 937  936          case SIOCGIFMTU :
 938  937          case SIOCGIFHWADDR :
 939  938                  error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid, mod);
 940  939                  if (error != 0)
 941  940                          return (error);
 942  941                  break;
 943  942  
 944  943          case SIOCGSTAMP :
 945  944                  tival.tv_sec = (time_t)ps->ps_timestamp.tv_sec;
 946  945                  tival.tv_usec = ps->ps_timestamp.tv_nsec / 1000;
 947  946                  if (get_udatamodel() == DATAMODEL_NATIVE) {
 948  947                          error = ddi_copyout(&tival, (void *)arg,
 949  948                              sizeof (tival), mod);
 950  949                  }
 951  950  #ifdef _SYSCALL32_IMPL
 952  951                  else {
 953  952                          struct timeval32 tv32;
 954  953                          TIMEVAL_TO_TIMEVAL32(&tv32, &tival);
 955  954                          error = ddi_copyout(&tv32, (void *)arg,
 956  955                              sizeof (tv32), mod);
 957  956                  }
 958  957  #endif
 959  958                  return (error);
 960  959          }
 961  960  
 962  961          error =  mac_open_by_linkid(linkid, &mh);
 963  962          if (error != 0)
 964  963                  return (error);
 965  964  
 966  965          switch (cmd) {
 967  966          case SIOCGLIFINDEX :
 968  967                  lifreq.lifr_index = linkid;
 969  968                  break;
 970  969  
 971  970          case SIOCGIFINDEX :
 972  971                  ifreq.ifr_index = linkid;
 973  972                  break;
 974  973  
 975  974          case SIOCGIFFLAGS :
 976  975                  ifreq.ifr_flags = IFF_RUNNING;
 977  976                  if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
 978  977                          ifreq.ifr_flags |= IFF_PROMISC;
 979  978                  break;
 980  979  
 981  980          case SIOCGLIFFLAGS :
 982  981                  lifreq.lifr_flags = IFF_RUNNING;
 983  982                  if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
 984  983                          lifreq.lifr_flags |= IFF_PROMISC;
 985  984                  break;
 986  985  
 987  986          case SIOCSIFFLAGS :
 988  987                  if (linkid != ps->ps_linkid) {
 989  988                          error = EINVAL;
 990  989                  } else {
 991  990                          if ((ifreq.ifr_flags & IFF_PROMISC) != 0)
 992  991                                  mtype = MAC_CLIENT_PROMISC_ALL;
 993  992                          else
 994  993                                  mtype = MAC_CLIENT_PROMISC_FILTERED;
 995  994                          error = pfp_set_promisc(ps, mtype);
 996  995                  }
 997  996                  break;
 998  997  
 999  998          case SIOCSLIFFLAGS :
1000  999                  if (linkid != ps->ps_linkid) {
1001 1000                          error = EINVAL;
1002 1001                  } else {
1003 1002                          if ((lifreq.lifr_flags & IFF_PROMISC) != 0)
1004 1003                                  mtype = MAC_CLIENT_PROMISC_ALL;
1005 1004                          else
1006 1005                                  mtype = MAC_CLIENT_PROMISC_FILTERED;
1007 1006                          error = pfp_set_promisc(ps, mtype);
1008 1007                  }
1009 1008                  break;
1010 1009  
1011 1010          case SIOCGIFMTU :
1012 1011                  mac_sdu_get(mh, NULL, &ifreq.ifr_mtu);
1013 1012                  break;
1014 1013  
1015 1014          case SIOCGLIFMTU :
1016 1015                  mac_sdu_get(mh, NULL, &lifreq.lifr_mtu);
1017 1016                  break;
1018 1017  
1019 1018          case SIOCGIFHWADDR :
1020 1019                  if (mac_addr_len(mh) > sizeof (ifreq.ifr_addr.sa_data)) {
1021 1020                          error = EPFNOSUPPORT;
1022 1021                          break;
1023 1022                  }
1024 1023  
1025 1024                  if (mac_addr_len(mh) == 0) {
1026 1025                          (void) memset(ifreq.ifr_addr.sa_data, 0,
1027 1026                              sizeof (ifreq.ifr_addr.sa_data));
1028 1027                  } else {
1029 1028                          mac_unicast_primary_get(mh,
1030 1029                              (uint8_t *)ifreq.ifr_addr.sa_data);
1031 1030                  }
1032 1031  
1033 1032                  /*
1034 1033                   * The behaviour here in setting sa_family is consistent
1035 1034                   * with what applications such as tcpdump would expect
1036 1035                   * for a Linux PF_PACKET socket.
1037 1036                   */
1038 1037                  ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh));
1039 1038                  break;
1040 1039  
1041 1040          case SIOCGLIFHWADDR :
1042 1041                  lifreq.lifr_type = 0;
1043 1042                  sock = (struct sockaddr_dl *)&lifreq.lifr_addr;
1044 1043  
1045 1044                  if (mac_addr_len(mh) > sizeof (sock->sdl_data)) {
1046 1045                          error = EPFNOSUPPORT;
1047 1046                          break;
1048 1047                  }
1049 1048  
1050 1049                  /*
1051 1050                   * Fill in the sockaddr_dl with link layer details. Of note,
1052 1051                   * the index is returned as 0 for a couple of reasons:
1053 1052                   * (1) there is no public API that uses or requires it
1054 1053                   * (2) the MAC index is currently 32bits and sdl_index is 16.
1055 1054                   */
1056 1055                  sock->sdl_family = AF_LINK;
1057 1056                  sock->sdl_index = 0;
1058 1057                  sock->sdl_type = mac_type(mh);
1059 1058                  sock->sdl_nlen = 0;
1060 1059                  sock->sdl_alen = mac_addr_len(mh);
1061 1060                  sock->sdl_slen = 0;
1062 1061                  if (mac_addr_len(mh) == 0) {
1063 1062                          (void) memset(sock->sdl_data, 0,
1064 1063                              sizeof (sock->sdl_data));
1065 1064                  } else {
1066 1065                          mac_unicast_primary_get(mh, (uint8_t *)sock->sdl_data);
1067 1066                  }
1068 1067                  break;
1069 1068  
1070 1069          default :
1071 1070                  break;
1072 1071          }
1073 1072  
1074 1073          mac_close(mh);
1075 1074  
1076 1075          if (error == 0) {
1077 1076                  /*
1078 1077                   * Only the "GET" ioctls need to copy data back to userace.
1079 1078                   */
1080 1079                  switch (cmd) {
1081 1080                  case SIOCGLIFINDEX :
1082 1081                  case SIOCGLIFFLAGS :
1083 1082                  case SIOCGLIFMTU :
1084 1083                  case SIOCGLIFHWADDR :
1085 1084                          error = ddi_copyout(&lifreq, (void *)arg,
1086 1085                              sizeof (lifreq), mod);
1087 1086                          break;
1088 1087  
1089 1088                  case SIOCGIFINDEX :
1090 1089                  case SIOCGIFFLAGS :
1091 1090                  case SIOCGIFMTU :
1092 1091                  case SIOCGIFHWADDR :
1093 1092                          error = ddi_copyout(&ifreq, (void *)arg,
1094 1093                              sizeof (ifreq), mod);
1095 1094                          break;
1096 1095                  default :
1097 1096                          break;
1098 1097                  }
1099 1098          }
1100 1099  
1101 1100          return (error);
1102 1101  }
1103 1102  
1104 1103  /*
1105 1104   * Closing the socket requires that all open references to network
1106 1105   * interfaces be closed.
1107 1106   */
1108 1107  /* ARGSUSED */
1109 1108  static int
1110 1109  sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr)
1111 1110  {
1112 1111          struct pfpsock *ps = (struct pfpsock *)handle;
1113 1112  
1114 1113          if (ps->ps_phd != 0) {
1115 1114                  mac_promisc_remove(ps->ps_phd);
1116 1115                  ps->ps_phd = 0;
1117 1116          }
1118 1117  
1119 1118          if (ps->ps_mch != 0) {
1120 1119                  mac_client_close(ps->ps_mch, 0);
1121 1120                  ps->ps_mch = 0;
1122 1121          }
1123 1122  
1124 1123          if (ps->ps_mh != 0) {
1125 1124                  mac_close(ps->ps_mh);
1126 1125                  ps->ps_mh = 0;
1127 1126          }
1128 1127  
1129 1128          kmem_free(ps, sizeof (*ps));
1130 1129  
1131 1130          return (0);
1132 1131  }
1133 1132  
1134 1133  /* ************************************************************************* */
1135 1134  
1136 1135  /*
1137 1136   * Given a pointer (arg) to a "struct ifreq" (potentially in user space),
1138 1137   * determine the linkid for the interface name stored in that structure.
1139 1138   * name is used as a buffer so that we can ensure a trailing \0 is appended
1140 1139   * to the name safely.
1141 1140   */
1142 1141  static int
1143 1142  pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp,
1144 1143      datalink_id_t *linkidp, int mode)
1145 1144  {
1146 1145          char name[IFNAMSIZ + 1];
1147 1146          int error;
1148 1147  
1149 1148          if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), mode) != 0)
1150 1149                  return (EFAULT);
1151 1150  
1152 1151          (void) strlcpy(name, ifreqp->ifr_name, sizeof (name));
1153 1152  
1154 1153          error = dls_mgmt_get_linkid(name, linkidp);
1155 1154          if (error != 0)
1156 1155                  error = dls_devnet_macname2linkid(name, linkidp);
1157 1156  
1158 1157          return (error);
1159 1158  }
1160 1159  
1161 1160  /*
1162 1161   * Given a pointer (arg) to a "struct lifreq" (potentially in user space),
1163 1162   * determine the linkid for the interface name stored in that structure.
1164 1163   * name is used as a buffer so that we can ensure a trailing \0 is appended
1165 1164   * to the name safely.
1166 1165   */
1167 1166  static int
1168 1167  pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp,
1169 1168      datalink_id_t *linkidp, int mode)
1170 1169  {
1171 1170          char name[LIFNAMSIZ + 1];
1172 1171          int error;
1173 1172  
1174 1173          if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), mode) != 0)
1175 1174                  return (EFAULT);
1176 1175  
1177 1176          (void) strlcpy(name, lifreqp->lifr_name, sizeof (name));
1178 1177  
1179 1178          error = dls_mgmt_get_linkid(name, linkidp);
1180 1179          if (error != 0)
1181 1180                  error = dls_devnet_macname2linkid(name, linkidp);
1182 1181  
1183 1182          return (error);
1184 1183  }
1185 1184  
1186 1185  /*
1187 1186   * Although there are several new SOL_PACKET options that can be set and
1188 1187   * are specific to this implementation of PF_PACKET, the current API does
1189 1188   * not support doing a get on them to retrieve accompanying status. Thus
1190 1189   * it is only currently possible to use SOL_PACKET with getsockopt to
1191 1190   * retrieve statistical information. This remains consistant with the
1192 1191   * Linux API at the time of writing.
1193 1192   */
1194 1193  static int
1195 1194  pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name,
1196 1195      void *optval, socklen_t *optlenp)
1197 1196  {
1198 1197          struct pfpsock *ps;
1199 1198          struct tpacket_stats_short tpss;
1200 1199          int error = 0;
1201 1200  
1202 1201          ps = (struct pfpsock *)handle;
1203 1202  
1204 1203          switch (option_name) {
1205 1204          case PACKET_STATISTICS :
1206 1205                  if (*optlenp < sizeof (ps->ps_stats)) {
1207 1206                          error = EINVAL;
1208 1207                          break;
1209 1208                  }
1210 1209                  *optlenp = sizeof (ps->ps_stats);
1211 1210                  bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats));
1212 1211                  break;
1213 1212          case PACKET_STATISTICS_SHORT :
1214 1213                  if (*optlenp < sizeof (tpss)) {
1215 1214                          error = EINVAL;
1216 1215                          break;
1217 1216                  }
1218 1217                  *optlenp = sizeof (tpss);
1219 1218                  tpss.tp_packets = ps->ps_stats.tp_packets;
1220 1219                  tpss.tp_drops = ps->ps_stats.tp_drops;
1221 1220                  bcopy(&tpss, optval, sizeof (tpss));
1222 1221                  break;
1223 1222          default :
1224 1223                  error = EINVAL;
1225 1224                  break;
1226 1225          }
1227 1226  
1228 1227          return (error);
1229 1228  }
1230 1229  
1231 1230  /*
1232 1231   * The SOL_PACKET level for socket options supports three options,
1233 1232   * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA.
1234 1233   * This function is responsible for mapping the two socket options
1235 1234   * that manage multicast membership into the appropriate internal
1236 1235   * function calls to bring the option into effect. Whilst direct
1237 1236   * changes to the multicast membership (ADD/DROP) groups is handled
1238 1237   * by calls directly into the mac module, changes to the promiscuos
1239 1238   * mode are vectored through pfp_set_promisc() so that the logic for
1240 1239   * managing the promiscuous mode is in one place.
1241 1240   */
1242 1241  /* ARGSUSED */
1243 1242  static int
1244 1243  pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name,
1245 1244      const void *optval, socklen_t optlen)
1246 1245  {
1247 1246          struct packet_mreq mreq;
1248 1247          struct pfpsock *ps;
1249 1248          int error = 0;
1250 1249          int opt;
1251 1250  
1252 1251          ps = (struct pfpsock *)handle;
1253 1252          if (!ps->ps_bound)
1254 1253                  return (EPROTO);
1255 1254  
1256 1255          if ((option_name == PACKET_ADD_MEMBERSHIP) ||
1257 1256              (option_name == PACKET_DROP_MEMBERSHIP)) {
1258 1257                  if (!ps->ps_bound)
1259 1258                          return (EPROTO);
1260 1259                  bcopy(optval, &mreq, sizeof (mreq));
1261 1260                  if (ps->ps_linkid != mreq.mr_ifindex)
1262 1261                          return (EINVAL);
1263 1262          }
1264 1263  
1265 1264          switch (option_name) {
1266 1265          case PACKET_ADD_MEMBERSHIP :
1267 1266                  switch (mreq.mr_type) {
1268 1267                  case PACKET_MR_MULTICAST :
1269 1268                          if (mreq.mr_alen != ps->ps_sock.sll_halen)
1270 1269                                  return (EINVAL);
1271 1270  
1272 1271                          error = mac_multicast_add(ps->ps_mch, mreq.mr_address);
1273 1272                          break;
1274 1273  
1275 1274                  case PACKET_MR_PROMISC :
1276 1275                          error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL);
1277 1276                          break;
1278 1277  
1279 1278                  case PACKET_MR_ALLMULTI :
1280 1279                          error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI);
1281 1280                          break;
1282 1281                  }
1283 1282                  break;
1284 1283  
1285 1284          case PACKET_DROP_MEMBERSHIP :
1286 1285                  switch (mreq.mr_type) {
1287 1286                  case PACKET_MR_MULTICAST :
1288 1287                          if (mreq.mr_alen != ps->ps_sock.sll_halen)
1289 1288                                  return (EINVAL);
1290 1289  
1291 1290                          mac_multicast_remove(ps->ps_mch, mreq.mr_address);
1292 1291                          break;
1293 1292  
1294 1293                  case PACKET_MR_PROMISC :
1295 1294                          if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL)
1296 1295                                  return (EINVAL);
1297 1296                          error = pfp_set_promisc(ps,
1298 1297                              MAC_CLIENT_PROMISC_FILTERED);
1299 1298                          break;
1300 1299  
1301 1300                  case PACKET_MR_ALLMULTI :
1302 1301                          if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI)
1303 1302                                  return (EINVAL);
1304 1303                          error = pfp_set_promisc(ps,
1305 1304                              MAC_CLIENT_PROMISC_FILTERED);
1306 1305                          break;
1307 1306                  }
1308 1307                  break;
1309 1308  
1310 1309          case PACKET_AUXDATA :
1311 1310                  if (optlen == sizeof (int)) {
1312 1311                          opt = *(int *)optval;
1313 1312                          ps->ps_auxdata = (opt != 0);
1314 1313                  } else {
1315 1314                          error = EINVAL;
1316 1315                  }
1317 1316                  break;
1318 1317          default :
1319 1318                  error = EINVAL;
1320 1319                  break;
1321 1320          }
1322 1321  
1323 1322          return (error);
1324 1323  }
1325 1324  
1326 1325  /*
1327 1326   * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET:
1328 1327   * SO_ATTACH_FILTER and SO_DETACH_FILTER.
1329 1328   *
1330 1329   * Both of these setsockopt values are candidates for being handled by the
1331 1330   * socket layer itself in future, however this requires understanding how
1332 1331   * they would interact with all other sockets.
1333 1332   */
1334 1333  static int
1335 1334  pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name,
1336 1335      const void *optval, socklen_t optlen)
1337 1336  {
1338 1337          struct bpf_program prog;
1339 1338          struct bpf_insn *fcode;
1340 1339          struct pfpsock *ps;
1341 1340          struct sock_proto_props sopp;
1342 1341          int error = 0;
1343 1342          int size;
1344 1343  
1345 1344          ps = (struct pfpsock *)handle;
1346 1345  
1347 1346          switch (option_name) {
1348 1347          case SO_ATTACH_FILTER :
1349 1348  #ifdef _LP64
1350 1349                  if (optlen == sizeof (struct bpf_program32)) {
1351 1350                          struct bpf_program32 prog32;
1352 1351  
1353 1352                          bcopy(optval, &prog32, sizeof (prog32));
1354 1353                          prog.bf_len = prog32.bf_len;
1355 1354                          prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
1356 1355                  } else
1357 1356  #endif
1358 1357                  if (optlen == sizeof (struct bpf_program)) {
1359 1358                          bcopy(optval, &prog, sizeof (prog));
1360 1359                  } else if (optlen != sizeof (struct bpf_program)) {
1361 1360                          return (EINVAL);
1362 1361                  }
1363 1362                  if (prog.bf_len > BPF_MAXINSNS)
1364 1363                          return (EINVAL);
1365 1364  
1366 1365                  size = prog.bf_len * sizeof (*prog.bf_insns);
1367 1366                  fcode = kmem_alloc(size, KM_SLEEP);
1368 1367                  if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) {
1369 1368                          kmem_free(fcode, size);
1370 1369                          return (EFAULT);
1371 1370                  }
1372 1371  
1373 1372                  if (bpf_validate(fcode, (int)prog.bf_len)) {
1374 1373                          rw_enter(&ps->ps_bpflock, RW_WRITER);
1375 1374                          pfp_release_bpf(ps);
1376 1375                          ps->ps_bpf.bf_insns = fcode;
1377 1376                          ps->ps_bpf.bf_len = size;
1378 1377                          rw_exit(&ps->ps_bpflock);
1379 1378  
1380 1379                          return (0);
1381 1380                  }
1382 1381                  kmem_free(fcode, size);
1383 1382                  error = EINVAL;
1384 1383                  break;
1385 1384  
1386 1385          case SO_DETACH_FILTER :
1387 1386                  pfp_release_bpf(ps);
1388 1387                  break;
1389 1388  
1390 1389          case SO_RCVBUF :
1391 1390                  size = *(int32_t *)optval;
1392 1391                  if (size > sockmod_pfp_rcvbuf_max || size < 0)
1393 1392                          return (ENOBUFS);
1394 1393                  sopp.sopp_flags = SOCKOPT_RCVHIWAT;
1395 1394                  sopp.sopp_rxhiwat = size;
1396 1395                  ps->ps_upcalls->su_set_proto_props(ps->ps_upper, &sopp);
1397 1396                  ps->ps_rcvbuf = size;
1398 1397                  break;
1399 1398  
1400 1399          default :
1401 1400                  error = ENOPROTOOPT;
1402 1401                  break;
1403 1402          }
1404 1403  
1405 1404          return (error);
1406 1405  }
1407 1406  
1408 1407  /*
1409 1408   * pfp_open_index is an internal function used to open a MAC device by
1410 1409   * its index. Both a mac_handle_t and mac_client_handle_t are acquired
1411 1410   * because some of the interfaces provided by the mac layer require either
1412 1411   * only the mac_handle_t or both it and mac_handle_t.
1413 1412   *
1414 1413   * Whilst inside the kernel we can access data structures supporting any
1415 1414   * zone, access to interfaces from non-global zones is restricted to those
1416 1415   * interfaces (if any) that are exclusively assigned to a zone.
1417 1416   */
1418 1417  static int
1419 1418  pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip,
1420 1419      cred_t *cred)
1421 1420  {
1422 1421          mac_client_handle_t mch;
1423 1422          zoneid_t ifzoneid;
1424 1423          mac_handle_t mh;
1425 1424          zoneid_t zoneid;
1426 1425          int error;
1427 1426  
1428 1427          mh = 0;
1429 1428          mch = 0;
1430 1429          error = mac_open_by_linkid(index, &mh);
1431 1430          if (error != 0)
1432 1431                  goto bad_open;
1433 1432  
1434 1433          error = mac_client_open(mh, &mch, NULL,
1435 1434              MAC_OPEN_FLAGS_USE_DATALINK_NAME);
1436 1435          if (error != 0)
1437 1436                  goto bad_open;
1438 1437  
1439 1438          zoneid = crgetzoneid(cred);
1440 1439          if (zoneid != GLOBAL_ZONEID) {
1441 1440                  mac_perim_handle_t perim;
1442 1441  
1443 1442                  mac_perim_enter_by_mh(mh, &perim);
1444 1443                  error = dls_link_getzid(mac_name(mh), &ifzoneid);
1445 1444                  mac_perim_exit(perim);
1446 1445                  if (error != 0)
1447 1446                          goto bad_open;
1448 1447                  if (ifzoneid != zoneid) {
1449 1448                          error = EACCES;
1450 1449                          goto bad_open;
1451 1450                  }
1452 1451          }
1453 1452  
1454 1453          *mcip = mch;
1455 1454          *mhp = mh;
1456 1455  
1457 1456          return (0);
1458 1457  bad_open:
1459 1458          if (mch != 0)
1460 1459                  mac_client_close(mch, 0);
1461 1460          if (mh != 0)
1462 1461                  mac_close(mh);
1463 1462          return (error);
1464 1463  }
1465 1464  
1466 1465  static void
1467 1466  pfp_close(mac_handle_t mh, mac_client_handle_t mch)
1468 1467  {
1469 1468          mac_client_close(mch, 0);
1470 1469          mac_close(mh);
1471 1470  }
1472 1471  
1473 1472  /*
1474 1473   * The purpose of this function is to provide a single place where we free
1475 1474   * the loaded BPF program and reset all pointers/counters associated with
1476 1475   * it.
1477 1476   */
1478 1477  static void
1479 1478  pfp_release_bpf(struct pfpsock *ps)
1480 1479  {
1481 1480          if (ps->ps_bpf.bf_len != 0) {
1482 1481                  kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len);
1483 1482                  ps->ps_bpf.bf_len = 0;
1484 1483                  ps->ps_bpf.bf_insns = NULL;
1485 1484          }
1486 1485  }
1487 1486  
1488 1487  /*
1489 1488   * Set the promiscuous mode of a network interface.
1490 1489   * This function only calls the mac layer when there is a change to the
1491 1490   * status of a network interface's promiscous mode. Tracking of how many
1492 1491   * sockets have the network interface in promiscuous mode, and thus the
1493 1492   * control over the physical device's status, is left to the mac layer.
1494 1493   */
1495 1494  static int
1496 1495  pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon)
1497 1496  {
1498 1497          int error = 0;
1499 1498          int flags;
1500 1499  
1501 1500          /*
1502 1501           * There are 4 combinations of turnon/ps_promisc.
1503 1502           * This if handles 2 (both false, both true) and the if() below
1504 1503           * handles the remaining one - when change is required.
1505 1504           */
1506 1505          if (turnon == ps->ps_promisc)
1507 1506                  return (error);
1508 1507  
1509 1508          if (ps->ps_phd != 0) {
1510 1509                  mac_promisc_remove(ps->ps_phd);
1511 1510                  ps->ps_phd = 0;
1512 1511  
1513 1512                  /*
1514 1513                   * ps_promisc is set here in case the call to mac_promisc_add
1515 1514                   * fails: leaving it to indicate that the interface is still
1516 1515                   * in some sort of promiscuous mode is false.
1517 1516                   */
1518 1517                  if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) {
1519 1518                          ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED;
1520 1519                          flags = MAC_PROMISC_FLAGS_NO_PHYS;
1521 1520                  } else {
1522 1521                          flags = 0;
1523 1522                  }
1524 1523                  flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP;
1525 1524          }
1526 1525  
1527 1526          error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps,
1528 1527              &ps->ps_phd, flags);
1529 1528          if (error == 0)
1530 1529                  ps->ps_promisc = turnon;
1531 1530  
1532 1531          return (error);
1533 1532  }
1534 1533  
1535 1534  /*
1536 1535   * This table maps the MAC types in Solaris to the ARPHRD_* values used
1537 1536   * on Linux. This is used with the SIOCGIFHWADDR/SIOCGLIFHWADDR ioctl.
1538 1537   *
1539 1538   * The symbols in this table are *not* pulled in from <net/if_arp.h>,
1540 1539   * they are pulled from <netpacket/packet.h>, thus it acts as a source
1541 1540   * of supplementary information to the ARP table.
1542 1541   */
1543 1542  static uint_t arphrd_to_dl[][2] = {
1544 1543          { ARPHRD_IEEE80211,     DL_WIFI },
1545 1544          { ARPHRD_TUNNEL,        DL_IPV4 },
1546 1545          { ARPHRD_TUNNEL,        DL_IPV6 },
1547 1546          { ARPHRD_TUNNEL,        DL_6TO4 },
1548 1547          { ARPHRD_AX25,          DL_X25 },
1549 1548          { ARPHRD_ATM,           DL_ATM },
1550 1549          { 0,                    0 }
1551 1550  };
1552 1551  
1553 1552  static int
1554 1553  pfp_dl_to_arphrd(int dltype)
1555 1554  {
1556 1555          int i;
1557 1556  
1558 1557          for (i = 0; arphrd_to_dl[i][0] != 0; i++)
1559 1558                  if (arphrd_to_dl[i][1] == dltype)
1560 1559                          return (arphrd_to_dl[i][0]);
1561 1560          return (arp_hw_type(dltype));
1562 1561  }

↓ open down ↓

1369 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX