1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2015 Joyent, Inc. All rights reserved.
25 */
26
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/stropts.h>
31 #include <sys/socket.h>
32 #include <sys/socketvar.h>
33 #include <sys/socket_proto.h>
34 #include <sys/sockio.h>
35 #include <sys/strsun.h>
36 #include <sys/kstat.h>
37 #include <sys/modctl.h>
38 #include <sys/policy.h>
39 #include <sys/priv_const.h>
40 #include <sys/tihdr.h>
41 #include <sys/zone.h>
42 #include <sys/time.h>
43 #include <sys/ethernet.h>
44 #include <sys/llc1.h>
45 #include <fs/sockfs/sockcommon.h>
46 #include <net/if.h>
47 #include <inet/ip_arp.h>
48
49 #include <sys/dls.h>
50 #include <sys/mac.h>
51 #include <sys/mac_client.h>
52 #include <sys/mac_provider.h>
53 #include <sys/mac_client_priv.h>
54
55 #include <netpacket/packet.h>
56
57 static void pfp_close(mac_handle_t, mac_client_handle_t);
58 static int pfp_dl_to_arphrd(int);
59 static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *,
60 socklen_t *);
61 static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *, int);
62 static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *,
63 int);
64 static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *,
65 cred_t *);
66 static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t);
67 static void pfp_release_bpf(struct pfpsock *);
68 static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t);
69 static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *,
70 socklen_t);
71 static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *,
72 socklen_t);
73
74 /*
75 * PFP sockfs operations
76 * Most are currently no-ops because they have no meaning for a connectionless
77 * socket.
78 */
79 static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t,
80 sock_upcalls_t *, int, struct cred *);
81 static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t,
82 struct cred *);
83 static int sdpfp_close(sock_lower_handle_t, int, struct cred *);
84 static void sdpfp_clr_flowctrl(sock_lower_handle_t);
85 static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *,
86 socklen_t *, struct cred *);
87 static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
88 struct cred *);
89 static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *,
90 struct cred *);
91 static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *,
92 socklen_t, struct cred *);
93
94 static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **,
95 uint_t *, int *, int, cred_t *);
96
97 static int sockpfp_init(void);
98 static void sockpfp_fini(void);
99
100 static kstat_t *pfp_ksp;
101 static pfp_kstats_t ks_stats;
102 static pfp_kstats_t pfp_kstats = {
103 /*
104 * Each one of these kstats is a different return path in handling
105 * a packet received from the mac layer.
106 */
107 { "recvMacHeaderFail", KSTAT_DATA_UINT64 },
108 { "recvBadProtocol", KSTAT_DATA_UINT64 },
109 { "recvAllocbFail", KSTAT_DATA_UINT64 },
110 { "recvOk", KSTAT_DATA_UINT64 },
111 { "recvFail", KSTAT_DATA_UINT64 },
112 { "recvFiltered", KSTAT_DATA_UINT64 },
113 { "recvFlowControl", KSTAT_DATA_UINT64 },
114 /*
115 * A global set of counters is maintained to track the behaviour
116 * of the system (kernel & applications) in sending packets.
117 */
118 { "sendUnbound", KSTAT_DATA_UINT64 },
119 { "sendFailed", KSTAT_DATA_UINT64 },
120 { "sendTooBig", KSTAT_DATA_UINT64 },
121 { "sendAllocFail", KSTAT_DATA_UINT64 },
122 { "sendUiomoveFail", KSTAT_DATA_UINT64 },
123 { "sendNoMemory", KSTAT_DATA_UINT64 },
124 { "sendOpenFail", KSTAT_DATA_UINT64 },
125 { "sendWrongFamily", KSTAT_DATA_UINT64 },
126 { "sendShortMsg", KSTAT_DATA_UINT64 },
127 { "sendOk", KSTAT_DATA_UINT64 }
128 };
129
130 sock_downcalls_t pfp_downcalls = {
131 sdpfp_activate,
132 sock_accept_notsupp,
133 sdpfp_bind,
134 sock_listen_notsupp,
135 sock_connect_notsupp,
136 sock_getpeername_notsupp,
137 sock_getsockname_notsupp,
138 sdpfp_getsockopt,
139 sdpfp_setsockopt,
140 sock_send_notsupp,
141 sdpfp_senduio,
142 NULL,
143 sock_poll_notsupp,
144 sock_shutdown_notsupp,
145 sdpfp_clr_flowctrl,
146 sdpfp_ioctl,
147 sdpfp_close,
148 };
149
150 static smod_reg_t sinfo = {
151 SOCKMOD_VERSION,
152 "sockpfp",
153 SOCK_UC_VERSION,
154 SOCK_DC_VERSION,
155 sockpfp_create,
156 NULL
157 };
158
159 static int accepted_protos[3][2] = {
160 { ETH_P_ALL, 0 },
161 { ETH_P_802_2, LLC_SNAP_SAP },
162 { ETH_P_803_3, 0 },
163 };
164
165 /*
166 * This sets an upper bound on the size of the receive buffer for a PF_PACKET
167 * socket. More properly, this should be controlled through ipadm, ala TCP, UDP,
168 * SCTP, etc. Until that's done, this provides a hard cap of 4 MB and allows an
169 * opportunity for it to be changed, should it be needed.
170 */
171 int sockmod_pfp_rcvbuf_max = 1024 * 1024 * 4;
172
173 /*
174 * Module linkage information for the kernel.
175 */
176 static struct modlsockmod modlsockmod = {
177 &mod_sockmodops, "PF Packet socket module", &sinfo
178 };
179
180 static struct modlinkage modlinkage = {
181 MODREV_1,
182 { &modlsockmod, NULL }
183 };
184
185 int
186 _init(void)
187 {
188 int error;
189
190 error = sockpfp_init();
191 if (error != 0)
192 return (error);
193
194 error = mod_install(&modlinkage);
195 if (error != 0)
196 sockpfp_fini();
197
198 return (error);
199 }
200
201 int
202 _fini(void)
203 {
204 int error;
205
206 error = mod_remove(&modlinkage);
207 if (error == 0)
208 sockpfp_fini();
209
210 return (error);
211 }
212
213 int
214 _info(struct modinfo *modinfop)
215 {
216 return (mod_info(&modlinkage, modinfop));
217 }
218
219 /*
220 * sockpfp_init: called as part of the initialisation of the module when
221 * loaded into the kernel.
222 *
223 * Being able to create and record the kstats data in the kernel is not
224 * considered to be vital to the operation of this kernel module, thus
225 * its failure is tolerated.
226 */
227 static int
228 sockpfp_init(void)
229 {
230 (void) memset(&ks_stats, 0, sizeof (ks_stats));
231
232 (void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats));
233
234 pfp_ksp = kstat_create("pfpacket", 0, "global", "misc",
235 KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t),
236 KSTAT_FLAG_VIRTUAL);
237 if (pfp_ksp != NULL) {
238 pfp_ksp->ks_data = &ks_stats;
239 kstat_install(pfp_ksp);
240 }
241
242 return (0);
243 }
244
245 /*
246 * sockpfp_fini: called when the operating system wants to unload the
247 * socket module from the kernel.
248 */
249 static void
250 sockpfp_fini(void)
251 {
252 if (pfp_ksp != NULL)
253 kstat_delete(pfp_ksp);
254 }
255
256 /*
257 * Due to sockets being created read-write by default, all PF_PACKET sockets
258 * therefore require the NET_RAWACCESS priviliege, even if the socket is only
259 * being used for reading packets from.
260 *
261 * This create function enforces this module only being used with PF_PACKET
262 * sockets and the policy that we support via the config file in sock2path.d:
263 * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW.
264 */
265 /* ARGSUSED */
266 static sock_lower_handle_t
267 sockpfp_create(int family, int type, int proto,
268 sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
269 int sflags, cred_t *cred)
270 {
271 struct pfpsock *ps;
272 int kmflags;
273 int newproto;
274 int i;
275
276 if (secpolicy_net_rawaccess(cred) != 0) {
277 *errorp = EACCES;
278 return (NULL);
279 }
280
281 if (family != AF_PACKET) {
282 *errorp = EAFNOSUPPORT;
283 return (NULL);
284 }
285
286 if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) {
287 *errorp = ESOCKTNOSUPPORT;
288 return (NULL);
289 }
290
291 /*
292 * First check to see if the protocol number passed in via the socket
293 * creation should be mapped to a different number for internal use.
294 */
295 for (i = 0, newproto = -1;
296 i < sizeof (accepted_protos)/ sizeof (accepted_protos[0]); i++) {
297 if (accepted_protos[i][0] == proto) {
298 newproto = accepted_protos[i][1];
299 break;
300 }
301 }
302
303 /*
304 * If the mapping of the protocol that was under 0x800 failed to find
305 * a local equivalent then fail the socket creation. If the protocol
306 * for the socket is over 0x800 and it was not found in the mapping
307 * table above, then use the value as is.
308 */
309 if (newproto == -1) {
310 if (proto < 0x800) {
311 *errorp = ENOPROTOOPT;
312 return (NULL);
313 }
314 newproto = proto;
315 }
316 proto = newproto;
317
318 kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
319 ps = kmem_zalloc(sizeof (*ps), kmflags);
320 if (ps == NULL) {
321 *errorp = ENOMEM;
322 return (NULL);
323 }
324
325 ps->ps_type = type;
326 ps->ps_proto = proto;
327 rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL);
328 mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL);
329
330 *sock_downcalls = &pfp_downcalls;
331 /*
332 * Setting this causes bytes from a packet that do not fit into the
333 * destination user buffer to be discarded. Thus the API is one
334 * packet per receive and callers are required to use a buffer large
335 * enough for the biggest packet that the interface can provide.
336 */
337 *smodep = SM_ATOMIC;
338
339 return ((sock_lower_handle_t)ps);
340 }
341
342 /* ************************************************************************* */
343
344 /*
345 * pfp_packet is the callback function that is given to the mac layer for
346 * PF_PACKET to receive packets with. One packet at a time is passed into
347 * this function from the mac layer. Each packet is a private copy given
348 * to PF_PACKET to modify or free as it wishes and does not harm the original
349 * packet from which it was cloned.
350 */
351 /* ARGSUSED */
352 static void
353 pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag)
354 {
355 struct T_unitdata_ind *tunit;
356 struct sockaddr_ll *sll;
357 struct sockaddr_ll *sol;
358 mac_header_info_t hdr;
359 struct pfpsock *ps;
360 size_t tusz;
361 mblk_t *mp0;
362 int error;
363
364 if (mp == NULL)
365 return;
366
367 ps = arg;
368 if (ps->ps_flow_ctrld) {
369 ps->ps_flow_ctrl_drops++;
370 ps->ps_stats.tp_drops++;
371 ks_stats.kp_recv_flow_cntrld.value.ui64++;
372 freemsg(mp);
373 return;
374 }
375
376 if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) {
377 /*
378 * Can't decode the packet header information so drop it.
379 */
380 ps->ps_stats.tp_drops++;
381 ks_stats.kp_recv_mac_hdr_fail.value.ui64++;
382 freemsg(mp);
383 return;
384 }
385
386 if (mac_type(ps->ps_mh) == DL_ETHER &&
387 hdr.mhi_bindsap == ETHERTYPE_VLAN) {
388 struct ether_vlan_header *evhp;
389 struct ether_vlan_header evh;
390
391 hdr.mhi_hdrsize = sizeof (struct ether_vlan_header);
392 hdr.mhi_istagged = B_TRUE;
393
394 if (MBLKL(mp) >= sizeof (*evhp)) {
395 evhp = (struct ether_vlan_header *)mp->b_rptr;
396 } else {
397 int sz = sizeof (*evhp);
398 char *s = (char *)&evh;
399 mblk_t *tmp;
400 int len;
401
402 for (tmp = mp; sz > 0 && tmp != NULL;
403 tmp = tmp->b_cont) {
404 len = min(sz, MBLKL(tmp));
405 bcopy(tmp->b_rptr, s, len);
406 sz -= len;
407 }
408 evhp = &evh;
409 }
410 hdr.mhi_tci = ntohs(evhp->ether_tci);
411 hdr.mhi_bindsap = ntohs(evhp->ether_type);
412 }
413
414 if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) {
415 /*
416 * The packet is not of interest to this socket so
417 * drop it on the floor. Here the SAP is being used
418 * as a very course filter.
419 */
420 ps->ps_stats.tp_drops++;
421 ks_stats.kp_recv_bad_proto.value.ui64++;
422 freemsg(mp);
423 return;
424 }
425
426 /*
427 * This field is not often set, even for ethernet,
428 * by mac_header_info, so compute it if it is 0.
429 */
430 if (hdr.mhi_pktsize == 0)
431 hdr.mhi_pktsize = msgdsize(mp);
432
433 /*
434 * If a BPF filter is present, pass the raw packet into that.
435 * A failed match will result in zero being returned, indicating
436 * that this socket is not interested in the packet.
437 */
438 if (ps->ps_bpf.bf_len != 0) {
439 uchar_t *buffer;
440 int buflen;
441
442 buflen = MBLKL(mp);
443 if (hdr.mhi_pktsize == buflen) {
444 buffer = mp->b_rptr;
445 } else {
446 buflen = 0;
447 buffer = (uchar_t *)mp;
448 }
449 rw_enter(&ps->ps_bpflock, RW_READER);
450 if (bpf_filter(ps->ps_bpf.bf_insns, buffer,
451 hdr.mhi_pktsize, buflen) == 0) {
452 rw_exit(&ps->ps_bpflock);
453 ps->ps_stats.tp_drops++;
454 ks_stats.kp_recv_filtered.value.ui64++;
455 freemsg(mp);
456 return;
457 }
458 rw_exit(&ps->ps_bpflock);
459 }
460
461 if (ps->ps_type == SOCK_DGRAM) {
462 /*
463 * SOCK_DGRAM socket expect a "layer 3" packet, so advance
464 * past the link layer header.
465 */
466 mp->b_rptr += hdr.mhi_hdrsize;
467 hdr.mhi_pktsize -= hdr.mhi_hdrsize;
468 }
469
470 tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll);
471 if (ps->ps_auxdata) {
472 tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
473 tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr));
474 }
475
476 /*
477 * It is tempting to think that this could be optimised by having
478 * the base mblk_t allocated and hung off the pfpsock structure,
479 * except that then another one would need to be allocated for the
480 * sockaddr_ll that is included. Even creating a template to copy
481 * from is of questionable value, as read-write from one structure
482 * to the other is going to be slower than all of the initialisation.
483 */
484 mp0 = allocb(tusz, BPRI_HI);
485 if (mp0 == NULL) {
486 ps->ps_stats.tp_drops++;
487 ks_stats.kp_recv_alloc_fail.value.ui64++;
488 freemsg(mp);
489 return;
490 }
491
492 (void) memset(mp0->b_rptr, 0, tusz);
493
494 mp0->b_datap->db_type = M_PROTO;
495 mp0->b_wptr = mp0->b_rptr + tusz;
496
497 tunit = (struct T_unitdata_ind *)mp0->b_rptr;
498 tunit->PRIM_type = T_UNITDATA_IND;
499 tunit->SRC_length = sizeof (struct sockaddr);
500 tunit->SRC_offset = sizeof (*tunit);
501
502 sol = &ps->ps_sock;
503 sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit));
504 sll->sll_ifindex = sol->sll_ifindex;
505 sll->sll_hatype = (uint16_t)hdr.mhi_origsap;
506 sll->sll_halen = sol->sll_halen;
507 if (hdr.mhi_saddr != NULL)
508 (void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen);
509
510 switch (hdr.mhi_dsttype) {
511 case MAC_ADDRTYPE_MULTICAST :
512 sll->sll_pkttype = PACKET_MULTICAST;
513 break;
514 case MAC_ADDRTYPE_BROADCAST :
515 sll->sll_pkttype = PACKET_BROADCAST;
516 break;
517 case MAC_ADDRTYPE_UNICAST :
518 if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0)
519 sll->sll_pkttype = PACKET_HOST;
520 else
521 sll->sll_pkttype = PACKET_OTHERHOST;
522 break;
523 }
524
525 if (ps->ps_auxdata) {
526 struct tpacket_auxdata *aux;
527 struct T_opthdr *topt;
528
529 tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset +
530 sizeof (struct sockaddr_ll));
531 tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) +
532 _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
533
534 topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset);
535 aux = (struct tpacket_auxdata *)
536 ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt)));
537
538 topt->len = tunit->OPT_length;
539 topt->level = SOL_PACKET;
540 topt->name = PACKET_AUXDATA;
541 topt->status = 0;
542 /*
543 * libpcap doesn't seem to use any other field,
544 * so it isn't clear how they should be filled in.
545 */
546 aux->tp_vlan_vci = hdr.mhi_tci;
547 }
548
549 linkb(mp0, mp);
550
551 (void) gethrestime(&ps->ps_timestamp);
552
553 ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0,
554 &error, NULL);
555
556 if (error == 0) {
557 ps->ps_stats.tp_packets++;
558 ks_stats.kp_recv_ok.value.ui64++;
559 } else {
560 mutex_enter(&ps->ps_lock);
561 if (error == ENOSPC) {
562 ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0,
563 &error, NULL);
564 if (error == ENOSPC)
565 ps->ps_flow_ctrld = B_TRUE;
566 }
567 mutex_exit(&ps->ps_lock);
568 ps->ps_stats.tp_drops++;
569 ks_stats.kp_recv_fail.value.ui64++;
570 }
571 }
572
573 /*
574 * Bind a PF_PACKET socket to a network interface.
575 *
576 * The default operation of this bind() is to place the socket (and thus the
577 * network interface) into promiscuous mode. It is then up to the application
578 * to turn that down by issuing the relevant ioctls, if desired.
579 */
580 static int
581 sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr,
582 socklen_t addrlen, struct cred *cred)
583 {
584 struct sockaddr_ll *addr_ll, *sol;
585 mac_client_handle_t mch;
586 struct pfpsock *ps;
587 mac_handle_t mh;
588 int error;
589
590 ps = (struct pfpsock *)handle;
591 if (ps->ps_bound)
592 return (EINVAL);
593
594 if (addrlen < sizeof (struct sockaddr_ll) || addr == NULL)
595 return (EINVAL);
596
597 addr_ll = (struct sockaddr_ll *)addr;
598
599 error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred);
600 if (error != 0)
601 return (error);
602 /*
603 * Ensure that each socket is only bound once.
604 */
605 mutex_enter(&ps->ps_lock);
606 if (ps->ps_mh != 0) {
607 mutex_exit(&ps->ps_lock);
608 pfp_close(mh, mch);
609 return (EADDRINUSE);
610 }
611 ps->ps_mh = mh;
612 ps->ps_mch = mch;
613 mutex_exit(&ps->ps_lock);
614
615 /*
616 * Cache all of the information from bind so that it's in an easy
617 * place to get at when packets are received.
618 */
619 sol = &ps->ps_sock;
620 sol->sll_family = AF_PACKET;
621 sol->sll_ifindex = addr_ll->sll_ifindex;
622 sol->sll_protocol = addr_ll->sll_protocol;
623 sol->sll_halen = mac_addr_len(ps->ps_mh);
624 mac_unicast_primary_get(ps->ps_mh, sol->sll_addr);
625 mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu);
626 ps->ps_linkid = addr_ll->sll_ifindex;
627
628 error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL,
629 pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
630 if (error == 0) {
631 ps->ps_promisc = MAC_CLIENT_PROMISC_ALL;
632 ps->ps_bound = B_TRUE;
633 }
634
635 return (error);
636 }
637
638 /* ARGSUSED */
639 static void
640 sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper,
641 sock_upcalls_t *upcalls, int flags, cred_t *cred)
642 {
643 struct pfpsock *ps;
644
645 ps = (struct pfpsock *)lower;
646 ps->ps_upper = upper;
647 ps->ps_upcalls = upcalls;
648 }
649
650 /*
651 * This module only implements getting socket options for the new socket
652 * option level (SOL_PACKET) that it introduces. All other requests are
653 * passed back to the sockfs layer.
654 */
655 /* ARGSUSED */
656 static int
657 sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name,
658 void *optval, socklen_t *optlenp, struct cred *cred)
659 {
660 struct pfpsock *ps;
661 int error = 0;
662
663 ps = (struct pfpsock *)handle;
664
665 switch (level) {
666 case SOL_PACKET :
667 error = pfp_getpacket_sockopt(handle, option_name, optval,
668 optlenp);
669 break;
670
671 case SOL_SOCKET :
672 if (option_name == SO_RCVBUF) {
673 if (*optlenp < sizeof (int32_t))
674 return (EINVAL);
675 *((int32_t *)optval) = ps->ps_rcvbuf;
676 *optlenp = sizeof (int32_t);
677 } else {
678 error = ENOPROTOOPT;
679 }
680 break;
681
682 default :
683 /*
684 * If sockfs code receives this error in return from the
685 * getsockopt downcall it handles the option locally, if
686 * it can.
687 */
688 error = ENOPROTOOPT;
689 break;
690 }
691
692 return (error);
693 }
694
695 /*
696 * PF_PACKET supports setting socket options at only two levels:
697 * SOL_SOCKET and SOL_PACKET.
698 */
699 /* ARGSUSED */
700 static int
701 sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name,
702 const void *optval, socklen_t optlen, struct cred *cred)
703 {
704 int error = 0;
705
706 switch (level) {
707 case SOL_SOCKET :
708 error = pfp_setsocket_sockopt(handle, option_name, optval,
709 optlen);
710 break;
711 case SOL_PACKET :
712 error = pfp_setpacket_sockopt(handle, option_name, optval,
713 optlen);
714 break;
715 default :
716 error = EINVAL;
717 break;
718 }
719
720 return (error);
721 }
722
723 /*
724 * This function is incredibly inefficient for sending any packet that
725 * comes with a msghdr asking to be sent to an interface to which the
726 * socket has not been bound. Some possibilities here are keeping a
727 * cache of all open mac's and mac_client's, for the purpose of sending,
728 * and closing them after some amount of inactivity. Clearly, applications
729 * should not be written to use one socket for multiple interfaces if
730 * performance is desired with the code as is.
731 */
732 /* ARGSUSED */
733 static int
734 sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop,
735 struct nmsghdr *msg, struct cred *cred)
736 {
737 struct sockaddr_ll *sol;
738 mac_client_handle_t mch;
739 struct pfpsock *ps;
740 boolean_t new_open;
741 mac_handle_t mh;
742 size_t mpsize;
743 uint_t maxsdu;
744 mblk_t *mp0;
745 mblk_t *mp;
746 int error;
747
748 mp = NULL;
749 mp0 = NULL;
750 new_open = B_FALSE;
751 ps = (struct pfpsock *)handle;
752 mh = ps->ps_mh;
753 mch = ps->ps_mch;
754 maxsdu = ps->ps_max_sdu;
755
756 sol = (struct sockaddr_ll *)msg->msg_name;
757 if (sol == NULL) {
758 /*
759 * If no sockaddr_ll has been provided with the send call,
760 * use the one constructed when the socket was bound to an
761 * interface and fail if it hasn't been bound.
762 */
763 if (!ps->ps_bound) {
764 ks_stats.kp_send_unbound.value.ui64++;
765 return (EPROTO);
766 }
767 sol = &ps->ps_sock;
768 } else {
769 /*
770 * Verify the sockaddr_ll message passed down before using
771 * it to send a packet out with. If it refers to an interface
772 * that has not been bound, it is necessary to open it.
773 */
774 struct sockaddr_ll *sll;
775
776 if (msg->msg_namelen < sizeof (struct sockaddr_ll)) {
777 ks_stats.kp_send_short_msg.value.ui64++;
778 return (EINVAL);
779 }
780
781 if (sol->sll_family != AF_PACKET) {
782 ks_stats.kp_send_wrong_family.value.ui64++;
783 return (EAFNOSUPPORT);
784 }
785
786 sll = &ps->ps_sock;
787 if (sol->sll_ifindex != sll->sll_ifindex) {
788 error = pfp_open_index(sol->sll_ifindex, &mh, &mch,
789 cred);
790 if (error != 0) {
791 ks_stats.kp_send_open_fail.value.ui64++;
792 return (error);
793 }
794 mac_sdu_get(mh, NULL, &maxsdu);
795 new_open = B_TRUE;
796 }
797 }
798
799 mpsize = uiop->uio_resid;
800 if (mpsize > maxsdu) {
801 ks_stats.kp_send_too_big.value.ui64++;
802 error = EMSGSIZE;
803 goto done;
804 }
805
806 if ((mp = allocb(mpsize, BPRI_HI)) == NULL) {
807 ks_stats.kp_send_alloc_fail.value.ui64++;
808 error = ENOBUFS;
809 goto done;
810 }
811
812 mp->b_wptr = mp->b_rptr + mpsize;
813 error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop);
814 if (error != 0) {
815 ks_stats.kp_send_uiomove_fail.value.ui64++;
816 goto done;
817 }
818
819 if (ps->ps_type == SOCK_DGRAM) {
820 mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0);
821 if (mp0 == NULL) {
822 ks_stats.kp_send_no_memory.value.ui64++;
823 error = ENOBUFS;
824 goto done;
825 }
826 linkb(mp0, mp);
827 mp = mp0;
828 }
829
830 /*
831 * As this is sending datagrams and no promise is made about
832 * how or if a packet will be sent/delivered, no effort is to
833 * be expended in recovering from a situation where the packet
834 * cannot be sent - it is just dropped.
835 */
836 error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
837 if (error == 0) {
838 mp = NULL;
839 ks_stats.kp_send_ok.value.ui64++;
840 } else {
841 ks_stats.kp_send_failed.value.ui64++;
842 }
843
844 done:
845
846 if (new_open) {
847 ASSERT(mch != ps->ps_mch);
848 ASSERT(mh != ps->ps_mh);
849 pfp_close(mh, mch);
850 }
851 if (mp != NULL)
852 freemsg(mp);
853
854 return (error);
855
856 }
857
858 /*
859 * There's no use of a lock here, or at the bottom of pfp_packet() where
860 * ps_flow_ctrld is set to true, because in a situation where these two
861 * are racing to set the flag one way or the other, the end result is
862 * going to be ultimately determined by the scheduler anyway - which of
863 * the two threads gets the lock first? In such an operational environment,
864 * we've got packets arriving too fast to be delt with so packets are going
865 * to be dropped. Grabbing a lock just makes the drop more expensive.
866 */
867 static void
868 sdpfp_clr_flowctrl(sock_lower_handle_t handle)
869 {
870 struct pfpsock *ps;
871
872 ps = (struct pfpsock *)handle;
873
874 mutex_enter(&ps->ps_lock);
875 ps->ps_flow_ctrld = B_FALSE;
876 mutex_exit(&ps->ps_lock);
877 }
878
879 /*
880 * The implementation of this ioctl() handler is intended to function
881 * in the absence of a bind() being made before it is called. Thus the
882 * function calls mac_open() itself to provide a handle
883 * This function is structured like this:
884 * - determine the linkid for the interface being targetted
885 * - open the interface with said linkid
886 * - perform ioctl
887 * - copy results back to caller
888 *
889 * The ioctls that interact with interface flags have been implented below
890 * to assume that the interface is always up and running (IFF_RUNNING) and
891 * to use the state of this socket to determine whether or not the network
892 * interface is in promiscuous mode. Thus an ioctl to get the interface flags
893 * of an interface that has been put in promiscuous mode by another socket
894 * (in the same program or different), will not report that status.
895 */
896 /* ARGSUSED */
897 static int
898 sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod,
899 int32_t *rval, struct cred *cr)
900 {
901 struct timeval tival;
902 mac_client_promisc_type_t mtype;
903 struct sockaddr_dl *sock;
904 datalink_id_t linkid;
905 struct lifreq lifreq;
906 struct ifreq ifreq;
907 struct pfpsock *ps;
908 mac_handle_t mh;
909 int error;
910
911 ps = (struct pfpsock *)handle;
912
913 switch (cmd) {
914 /*
915 * ioctls that work on "struct lifreq"
916 */
917 case SIOCSLIFFLAGS :
918 case SIOCGLIFINDEX :
919 case SIOCGLIFFLAGS :
920 case SIOCGLIFMTU :
921 case SIOCGLIFHWADDR :
922 error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid, mod);
923 if (error != 0)
924 return (error);
925 break;
926
927 /*
928 * ioctls that work on "struct ifreq".
929 * Not all of these have a "struct lifreq" partner, for example
930 * SIOCGIFHWADDR, for the simple reason that the logical interface
931 * does not have a hardware address.
932 */
933 case SIOCSIFFLAGS :
934 case SIOCGIFINDEX :
935 case SIOCGIFFLAGS :
936 case SIOCGIFMTU :
937 case SIOCGIFHWADDR :
938 error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid, mod);
939 if (error != 0)
940 return (error);
941 break;
942
943 case SIOCGSTAMP :
944 tival.tv_sec = (time_t)ps->ps_timestamp.tv_sec;
945 tival.tv_usec = ps->ps_timestamp.tv_nsec / 1000;
946 if (get_udatamodel() == DATAMODEL_NATIVE) {
947 error = ddi_copyout(&tival, (void *)arg,
948 sizeof (tival), mod);
949 }
950 #ifdef _SYSCALL32_IMPL
951 else {
952 struct timeval32 tv32;
953 TIMEVAL_TO_TIMEVAL32(&tv32, &tival);
954 error = ddi_copyout(&tv32, (void *)arg,
955 sizeof (tv32), mod);
956 }
957 #endif
958 return (error);
959 }
960
961 error = mac_open_by_linkid(linkid, &mh);
962 if (error != 0)
963 return (error);
964
965 switch (cmd) {
966 case SIOCGLIFINDEX :
967 lifreq.lifr_index = linkid;
968 break;
969
970 case SIOCGIFINDEX :
971 ifreq.ifr_index = linkid;
972 break;
973
974 case SIOCGIFFLAGS :
975 ifreq.ifr_flags = IFF_RUNNING;
976 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
977 ifreq.ifr_flags |= IFF_PROMISC;
978 break;
979
980 case SIOCGLIFFLAGS :
981 lifreq.lifr_flags = IFF_RUNNING;
982 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
983 lifreq.lifr_flags |= IFF_PROMISC;
984 break;
985
986 case SIOCSIFFLAGS :
987 if (linkid != ps->ps_linkid) {
988 error = EINVAL;
989 } else {
990 if ((ifreq.ifr_flags & IFF_PROMISC) != 0)
991 mtype = MAC_CLIENT_PROMISC_ALL;
992 else
993 mtype = MAC_CLIENT_PROMISC_FILTERED;
994 error = pfp_set_promisc(ps, mtype);
995 }
996 break;
997
998 case SIOCSLIFFLAGS :
999 if (linkid != ps->ps_linkid) {
1000 error = EINVAL;
1001 } else {
1002 if ((lifreq.lifr_flags & IFF_PROMISC) != 0)
1003 mtype = MAC_CLIENT_PROMISC_ALL;
1004 else
1005 mtype = MAC_CLIENT_PROMISC_FILTERED;
1006 error = pfp_set_promisc(ps, mtype);
1007 }
1008 break;
1009
1010 case SIOCGIFMTU :
1011 mac_sdu_get(mh, NULL, &ifreq.ifr_mtu);
1012 break;
1013
1014 case SIOCGLIFMTU :
1015 mac_sdu_get(mh, NULL, &lifreq.lifr_mtu);
1016 break;
1017
1018 case SIOCGIFHWADDR :
1019 if (mac_addr_len(mh) > sizeof (ifreq.ifr_addr.sa_data)) {
1020 error = EPFNOSUPPORT;
1021 break;
1022 }
1023
1024 if (mac_addr_len(mh) == 0) {
1025 (void) memset(ifreq.ifr_addr.sa_data, 0,
1026 sizeof (ifreq.ifr_addr.sa_data));
1027 } else {
1028 mac_unicast_primary_get(mh,
1029 (uint8_t *)ifreq.ifr_addr.sa_data);
1030 }
1031
1032 /*
1033 * The behaviour here in setting sa_family is consistent
1034 * with what applications such as tcpdump would expect
1035 * for a Linux PF_PACKET socket.
1036 */
1037 ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh));
1038 break;
1039
1040 case SIOCGLIFHWADDR :
1041 lifreq.lifr_type = 0;
1042 sock = (struct sockaddr_dl *)&lifreq.lifr_addr;
1043
1044 if (mac_addr_len(mh) > sizeof (sock->sdl_data)) {
1045 error = EPFNOSUPPORT;
1046 break;
1047 }
1048
1049 /*
1050 * Fill in the sockaddr_dl with link layer details. Of note,
1051 * the index is returned as 0 for a couple of reasons:
1052 * (1) there is no public API that uses or requires it
1053 * (2) the MAC index is currently 32bits and sdl_index is 16.
1054 */
1055 sock->sdl_family = AF_LINK;
1056 sock->sdl_index = 0;
1057 sock->sdl_type = mac_type(mh);
1058 sock->sdl_nlen = 0;
1059 sock->sdl_alen = mac_addr_len(mh);
1060 sock->sdl_slen = 0;
1061 if (mac_addr_len(mh) == 0) {
1062 (void) memset(sock->sdl_data, 0,
1063 sizeof (sock->sdl_data));
1064 } else {
1065 mac_unicast_primary_get(mh, (uint8_t *)sock->sdl_data);
1066 }
1067 break;
1068
1069 default :
1070 break;
1071 }
1072
1073 mac_close(mh);
1074
1075 if (error == 0) {
1076 /*
1077 * Only the "GET" ioctls need to copy data back to userace.
1078 */
1079 switch (cmd) {
1080 case SIOCGLIFINDEX :
1081 case SIOCGLIFFLAGS :
1082 case SIOCGLIFMTU :
1083 case SIOCGLIFHWADDR :
1084 error = ddi_copyout(&lifreq, (void *)arg,
1085 sizeof (lifreq), mod);
1086 break;
1087
1088 case SIOCGIFINDEX :
1089 case SIOCGIFFLAGS :
1090 case SIOCGIFMTU :
1091 case SIOCGIFHWADDR :
1092 error = ddi_copyout(&ifreq, (void *)arg,
1093 sizeof (ifreq), mod);
1094 break;
1095 default :
1096 break;
1097 }
1098 }
1099
1100 return (error);
1101 }
1102
1103 /*
1104 * Closing the socket requires that all open references to network
1105 * interfaces be closed.
1106 */
1107 /* ARGSUSED */
1108 static int
1109 sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr)
1110 {
1111 struct pfpsock *ps = (struct pfpsock *)handle;
1112
1113 if (ps->ps_phd != 0) {
1114 mac_promisc_remove(ps->ps_phd);
1115 ps->ps_phd = 0;
1116 }
1117
1118 if (ps->ps_mch != 0) {
1119 mac_client_close(ps->ps_mch, 0);
1120 ps->ps_mch = 0;
1121 }
1122
1123 if (ps->ps_mh != 0) {
1124 mac_close(ps->ps_mh);
1125 ps->ps_mh = 0;
1126 }
1127
1128 kmem_free(ps, sizeof (*ps));
1129
1130 return (0);
1131 }
1132
1133 /* ************************************************************************* */
1134
1135 /*
1136 * Given a pointer (arg) to a "struct ifreq" (potentially in user space),
1137 * determine the linkid for the interface name stored in that structure.
1138 * name is used as a buffer so that we can ensure a trailing \0 is appended
1139 * to the name safely.
1140 */
1141 static int
1142 pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp,
1143 datalink_id_t *linkidp, int mode)
1144 {
1145 char name[IFNAMSIZ + 1];
1146 int error;
1147
1148 if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), mode) != 0)
1149 return (EFAULT);
1150
1151 (void) strlcpy(name, ifreqp->ifr_name, sizeof (name));
1152
1153 error = dls_mgmt_get_linkid(name, linkidp);
1154 if (error != 0)
1155 error = dls_devnet_macname2linkid(name, linkidp);
1156
1157 return (error);
1158 }
1159
1160 /*
1161 * Given a pointer (arg) to a "struct lifreq" (potentially in user space),
1162 * determine the linkid for the interface name stored in that structure.
1163 * name is used as a buffer so that we can ensure a trailing \0 is appended
1164 * to the name safely.
1165 */
1166 static int
1167 pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp,
1168 datalink_id_t *linkidp, int mode)
1169 {
1170 char name[LIFNAMSIZ + 1];
1171 int error;
1172
1173 if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), mode) != 0)
1174 return (EFAULT);
1175
1176 (void) strlcpy(name, lifreqp->lifr_name, sizeof (name));
1177
1178 error = dls_mgmt_get_linkid(name, linkidp);
1179 if (error != 0)
1180 error = dls_devnet_macname2linkid(name, linkidp);
1181
1182 return (error);
1183 }
1184
1185 /*
1186 * Although there are several new SOL_PACKET options that can be set and
1187 * are specific to this implementation of PF_PACKET, the current API does
1188 * not support doing a get on them to retrieve accompanying status. Thus
1189 * it is only currently possible to use SOL_PACKET with getsockopt to
1190 * retrieve statistical information. This remains consistant with the
1191 * Linux API at the time of writing.
1192 */
1193 static int
1194 pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name,
1195 void *optval, socklen_t *optlenp)
1196 {
1197 struct pfpsock *ps;
1198 struct tpacket_stats_short tpss;
1199 int error = 0;
1200
1201 ps = (struct pfpsock *)handle;
1202
1203 switch (option_name) {
1204 case PACKET_STATISTICS :
1205 if (*optlenp < sizeof (ps->ps_stats)) {
1206 error = EINVAL;
1207 break;
1208 }
1209 *optlenp = sizeof (ps->ps_stats);
1210 bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats));
1211 break;
1212 case PACKET_STATISTICS_SHORT :
1213 if (*optlenp < sizeof (tpss)) {
1214 error = EINVAL;
1215 break;
1216 }
1217 *optlenp = sizeof (tpss);
1218 tpss.tp_packets = ps->ps_stats.tp_packets;
1219 tpss.tp_drops = ps->ps_stats.tp_drops;
1220 bcopy(&tpss, optval, sizeof (tpss));
1221 break;
1222 default :
1223 error = EINVAL;
1224 break;
1225 }
1226
1227 return (error);
1228 }
1229
1230 /*
1231 * The SOL_PACKET level for socket options supports three options,
1232 * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA.
1233 * This function is responsible for mapping the two socket options
1234 * that manage multicast membership into the appropriate internal
1235 * function calls to bring the option into effect. Whilst direct
1236 * changes to the multicast membership (ADD/DROP) groups is handled
1237 * by calls directly into the mac module, changes to the promiscuos
1238 * mode are vectored through pfp_set_promisc() so that the logic for
1239 * managing the promiscuous mode is in one place.
1240 */
1241 /* ARGSUSED */
1242 static int
1243 pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name,
1244 const void *optval, socklen_t optlen)
1245 {
1246 struct packet_mreq mreq;
1247 struct pfpsock *ps;
1248 int error = 0;
1249 int opt;
1250
1251 ps = (struct pfpsock *)handle;
1252 if (!ps->ps_bound)
1253 return (EPROTO);
1254
1255 if ((option_name == PACKET_ADD_MEMBERSHIP) ||
1256 (option_name == PACKET_DROP_MEMBERSHIP)) {
1257 if (!ps->ps_bound)
1258 return (EPROTO);
1259 bcopy(optval, &mreq, sizeof (mreq));
1260 if (ps->ps_linkid != mreq.mr_ifindex)
1261 return (EINVAL);
1262 }
1263
1264 switch (option_name) {
1265 case PACKET_ADD_MEMBERSHIP :
1266 switch (mreq.mr_type) {
1267 case PACKET_MR_MULTICAST :
1268 if (mreq.mr_alen != ps->ps_sock.sll_halen)
1269 return (EINVAL);
1270
1271 error = mac_multicast_add(ps->ps_mch, mreq.mr_address);
1272 break;
1273
1274 case PACKET_MR_PROMISC :
1275 error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL);
1276 break;
1277
1278 case PACKET_MR_ALLMULTI :
1279 error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI);
1280 break;
1281 }
1282 break;
1283
1284 case PACKET_DROP_MEMBERSHIP :
1285 switch (mreq.mr_type) {
1286 case PACKET_MR_MULTICAST :
1287 if (mreq.mr_alen != ps->ps_sock.sll_halen)
1288 return (EINVAL);
1289
1290 mac_multicast_remove(ps->ps_mch, mreq.mr_address);
1291 break;
1292
1293 case PACKET_MR_PROMISC :
1294 if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL)
1295 return (EINVAL);
1296 error = pfp_set_promisc(ps,
1297 MAC_CLIENT_PROMISC_FILTERED);
1298 break;
1299
1300 case PACKET_MR_ALLMULTI :
1301 if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI)
1302 return (EINVAL);
1303 error = pfp_set_promisc(ps,
1304 MAC_CLIENT_PROMISC_FILTERED);
1305 break;
1306 }
1307 break;
1308
1309 case PACKET_AUXDATA :
1310 if (optlen == sizeof (int)) {
1311 opt = *(int *)optval;
1312 ps->ps_auxdata = (opt != 0);
1313 } else {
1314 error = EINVAL;
1315 }
1316 break;
1317 default :
1318 error = EINVAL;
1319 break;
1320 }
1321
1322 return (error);
1323 }
1324
1325 /*
1326 * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET:
1327 * SO_ATTACH_FILTER and SO_DETACH_FILTER.
1328 *
1329 * Both of these setsockopt values are candidates for being handled by the
1330 * socket layer itself in future, however this requires understanding how
1331 * they would interact with all other sockets.
1332 */
1333 static int
1334 pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name,
1335 const void *optval, socklen_t optlen)
1336 {
1337 struct bpf_program prog;
1338 struct bpf_insn *fcode;
1339 struct pfpsock *ps;
1340 struct sock_proto_props sopp;
1341 int error = 0;
1342 int size;
1343
1344 ps = (struct pfpsock *)handle;
1345
1346 switch (option_name) {
1347 case SO_ATTACH_FILTER :
1348 #ifdef _LP64
1349 if (optlen == sizeof (struct bpf_program32)) {
1350 struct bpf_program32 prog32;
1351
1352 bcopy(optval, &prog32, sizeof (prog32));
1353 prog.bf_len = prog32.bf_len;
1354 prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
1355 } else
1356 #endif
1357 if (optlen == sizeof (struct bpf_program)) {
1358 bcopy(optval, &prog, sizeof (prog));
1359 } else if (optlen != sizeof (struct bpf_program)) {
1360 return (EINVAL);
1361 }
1362 if (prog.bf_len > BPF_MAXINSNS)
1363 return (EINVAL);
1364
1365 size = prog.bf_len * sizeof (*prog.bf_insns);
1366 fcode = kmem_alloc(size, KM_SLEEP);
1367 if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) {
1368 kmem_free(fcode, size);
1369 return (EFAULT);
1370 }
1371
1372 if (bpf_validate(fcode, (int)prog.bf_len)) {
1373 rw_enter(&ps->ps_bpflock, RW_WRITER);
1374 pfp_release_bpf(ps);
1375 ps->ps_bpf.bf_insns = fcode;
1376 ps->ps_bpf.bf_len = size;
1377 rw_exit(&ps->ps_bpflock);
1378
1379 return (0);
1380 }
1381 kmem_free(fcode, size);
1382 error = EINVAL;
1383 break;
1384
1385 case SO_DETACH_FILTER :
1386 pfp_release_bpf(ps);
1387 break;
1388
1389 case SO_RCVBUF :
1390 size = *(int32_t *)optval;
1391 if (size > sockmod_pfp_rcvbuf_max || size < 0)
1392 return (ENOBUFS);
1393 sopp.sopp_flags = SOCKOPT_RCVHIWAT;
1394 sopp.sopp_rxhiwat = size;
1395 ps->ps_upcalls->su_set_proto_props(ps->ps_upper, &sopp);
1396 ps->ps_rcvbuf = size;
1397 break;
1398
1399 default :
1400 error = ENOPROTOOPT;
1401 break;
1402 }
1403
1404 return (error);
1405 }
1406
1407 /*
1408 * pfp_open_index is an internal function used to open a MAC device by
1409 * its index. Both a mac_handle_t and mac_client_handle_t are acquired
1410 * because some of the interfaces provided by the mac layer require either
1411 * only the mac_handle_t or both it and mac_handle_t.
1412 *
1413 * Whilst inside the kernel we can access data structures supporting any
1414 * zone, access to interfaces from non-global zones is restricted to those
1415 * interfaces (if any) that are exclusively assigned to a zone.
1416 */
1417 static int
1418 pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip,
1419 cred_t *cred)
1420 {
1421 mac_client_handle_t mch;
1422 zoneid_t ifzoneid;
1423 mac_handle_t mh;
1424 zoneid_t zoneid;
1425 int error;
1426
1427 mh = 0;
1428 mch = 0;
1429 error = mac_open_by_linkid(index, &mh);
1430 if (error != 0)
1431 goto bad_open;
1432
1433 error = mac_client_open(mh, &mch, NULL,
1434 MAC_OPEN_FLAGS_USE_DATALINK_NAME);
1435 if (error != 0)
1436 goto bad_open;
1437
1438 zoneid = crgetzoneid(cred);
1439 if (zoneid != GLOBAL_ZONEID) {
1440 mac_perim_handle_t perim;
1441
1442 mac_perim_enter_by_mh(mh, &perim);
1443 error = dls_link_getzid(mac_name(mh), &ifzoneid);
1444 mac_perim_exit(perim);
1445 if (error != 0)
1446 goto bad_open;
1447 if (ifzoneid != zoneid) {
1448 error = EACCES;
1449 goto bad_open;
1450 }
1451 }
1452
1453 *mcip = mch;
1454 *mhp = mh;
1455
1456 return (0);
1457 bad_open:
1458 if (mch != 0)
1459 mac_client_close(mch, 0);
1460 if (mh != 0)
1461 mac_close(mh);
1462 return (error);
1463 }
1464
1465 static void
1466 pfp_close(mac_handle_t mh, mac_client_handle_t mch)
1467 {
1468 mac_client_close(mch, 0);
1469 mac_close(mh);
1470 }
1471
1472 /*
1473 * The purpose of this function is to provide a single place where we free
1474 * the loaded BPF program and reset all pointers/counters associated with
1475 * it.
1476 */
1477 static void
1478 pfp_release_bpf(struct pfpsock *ps)
1479 {
1480 if (ps->ps_bpf.bf_len != 0) {
1481 kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len);
1482 ps->ps_bpf.bf_len = 0;
1483 ps->ps_bpf.bf_insns = NULL;
1484 }
1485 }
1486
1487 /*
1488 * Set the promiscuous mode of a network interface.
1489 * This function only calls the mac layer when there is a change to the
1490 * status of a network interface's promiscous mode. Tracking of how many
1491 * sockets have the network interface in promiscuous mode, and thus the
1492 * control over the physical device's status, is left to the mac layer.
1493 */
1494 static int
1495 pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon)
1496 {
1497 int error = 0;
1498 int flags;
1499
1500 /*
1501 * There are 4 combinations of turnon/ps_promisc.
1502 * This if handles 2 (both false, both true) and the if() below
1503 * handles the remaining one - when change is required.
1504 */
1505 if (turnon == ps->ps_promisc)
1506 return (error);
1507
1508 if (ps->ps_phd != 0) {
1509 mac_promisc_remove(ps->ps_phd);
1510 ps->ps_phd = 0;
1511
1512 /*
1513 * ps_promisc is set here in case the call to mac_promisc_add
1514 * fails: leaving it to indicate that the interface is still
1515 * in some sort of promiscuous mode is false.
1516 */
1517 if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) {
1518 ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED;
1519 flags = MAC_PROMISC_FLAGS_NO_PHYS;
1520 } else {
1521 flags = 0;
1522 }
1523 flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP;
1524 }
1525
1526 error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps,
1527 &ps->ps_phd, flags);
1528 if (error == 0)
1529 ps->ps_promisc = turnon;
1530
1531 return (error);
1532 }
1533
1534 /*
1535 * This table maps the MAC types in Solaris to the ARPHRD_* values used
1536 * on Linux. This is used with the SIOCGIFHWADDR/SIOCGLIFHWADDR ioctl.
1537 *
1538 * The symbols in this table are *not* pulled in from <net/if_arp.h>,
1539 * they are pulled from <netpacket/packet.h>, thus it acts as a source
1540 * of supplementary information to the ARP table.
1541 */
1542 static uint_t arphrd_to_dl[][2] = {
1543 { ARPHRD_IEEE80211, DL_WIFI },
1544 { ARPHRD_TUNNEL, DL_IPV4 },
1545 { ARPHRD_TUNNEL, DL_IPV6 },
1546 { ARPHRD_TUNNEL, DL_6TO4 },
1547 { ARPHRD_AX25, DL_X25 },
1548 { ARPHRD_ATM, DL_ATM },
1549 { 0, 0 }
1550 };
1551
1552 static int
1553 pfp_dl_to_arphrd(int dltype)
1554 {
1555 int i;
1556
1557 for (i = 0; arphrd_to_dl[i][0] != 0; i++)
1558 if (arphrd_to_dl[i][1] == dltype)
1559 return (arphrd_to_dl[i][0]);
1560 return (arp_hw_type(dltype));
1561 }