Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/sockmods/sockmod_pfp.c
+++ new/usr/src/uts/common/inet/sockmods/sockmod_pfp.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 * Copyright 2015 Joyent, Inc. All rights reserved.
25 25 */
26 26
27 27 #include <sys/types.h>
28 28 #include <sys/param.h>
29 29 #include <sys/systm.h>
30 30 #include <sys/stropts.h>
31 31 #include <sys/socket.h>
32 32 #include <sys/socketvar.h>
33 33 #include <sys/socket_proto.h>
34 34 #include <sys/sockio.h>
35 35 #include <sys/strsun.h>
36 36 #include <sys/kstat.h>
37 37 #include <sys/modctl.h>
38 38 #include <sys/policy.h>
39 39 #include <sys/priv_const.h>
40 40 #include <sys/tihdr.h>
41 41 #include <sys/zone.h>
42 42 #include <sys/time.h>
43 43 #include <sys/ethernet.h>
44 44 #include <sys/llc1.h>
45 45 #include <fs/sockfs/sockcommon.h>
46 46 #include <net/if.h>
47 47 #include <inet/ip_arp.h>
48 48
49 49 #include <sys/dls.h>
50 50 #include <sys/mac.h>
51 51 #include <sys/mac_client.h>
52 52 #include <sys/mac_provider.h>
53 53 #include <sys/mac_client_priv.h>
54 54
55 55 #include <netpacket/packet.h>
56 56
57 57 static void pfp_close(mac_handle_t, mac_client_handle_t);
58 58 static int pfp_dl_to_arphrd(int);
59 59 static int pfp_getpacket_sockopt(sock_lower_handle_t, int, void *,
60 60 socklen_t *);
61 61 static int pfp_ifreq_getlinkid(intptr_t, struct ifreq *, datalink_id_t *, int);
62 62 static int pfp_lifreq_getlinkid(intptr_t, struct lifreq *, datalink_id_t *,
63 63 int);
64 64 static int pfp_open_index(int, mac_handle_t *, mac_client_handle_t *,
65 65 cred_t *);
66 66 static void pfp_packet(void *, mac_resource_handle_t, mblk_t *, boolean_t);
67 67 static void pfp_release_bpf(struct pfpsock *);
68 68 static int pfp_set_promisc(struct pfpsock *, mac_client_promisc_type_t);
69 69 static int pfp_setsocket_sockopt(sock_lower_handle_t, int, const void *,
70 70 socklen_t);
71 71 static int pfp_setpacket_sockopt(sock_lower_handle_t, int, const void *,
72 72 socklen_t);
73 73
74 74 /*
75 75 * PFP sockfs operations
76 76 * Most are currently no-ops because they have no meaning for a connectionless
77 77 * socket.
78 78 */
79 79 static void sdpfp_activate(sock_lower_handle_t, sock_upper_handle_t,
80 80 sock_upcalls_t *, int, struct cred *);
81 81 static int sdpfp_bind(sock_lower_handle_t, struct sockaddr *, socklen_t,
82 82 struct cred *);
83 83 static int sdpfp_close(sock_lower_handle_t, int, struct cred *);
84 84 static void sdpfp_clr_flowctrl(sock_lower_handle_t);
85 85 static int sdpfp_getsockopt(sock_lower_handle_t, int, int, void *,
86 86 socklen_t *, struct cred *);
87 87 static int sdpfp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
88 88 struct cred *);
89 89 static int sdpfp_senduio(sock_lower_handle_t, struct uio *, struct nmsghdr *,
90 90 struct cred *);
91 91 static int sdpfp_setsockopt(sock_lower_handle_t, int, int, const void *,
92 92 socklen_t, struct cred *);
93 93
94 94 static sock_lower_handle_t sockpfp_create(int, int, int, sock_downcalls_t **,
95 95 uint_t *, int *, int, cred_t *);
96 96
97 97 static int sockpfp_init(void);
98 98 static void sockpfp_fini(void);
99 99
100 100 static kstat_t *pfp_ksp;
101 101 static pfp_kstats_t ks_stats;
102 102 static pfp_kstats_t pfp_kstats = {
103 103 /*
104 104 * Each one of these kstats is a different return path in handling
105 105 * a packet received from the mac layer.
106 106 */
107 107 { "recvMacHeaderFail", KSTAT_DATA_UINT64 },
108 108 { "recvBadProtocol", KSTAT_DATA_UINT64 },
109 109 { "recvAllocbFail", KSTAT_DATA_UINT64 },
110 110 { "recvOk", KSTAT_DATA_UINT64 },
111 111 { "recvFail", KSTAT_DATA_UINT64 },
112 112 { "recvFiltered", KSTAT_DATA_UINT64 },
113 113 { "recvFlowControl", KSTAT_DATA_UINT64 },
114 114 /*
115 115 * A global set of counters is maintained to track the behaviour
116 116 * of the system (kernel & applications) in sending packets.
117 117 */
118 118 { "sendUnbound", KSTAT_DATA_UINT64 },
119 119 { "sendFailed", KSTAT_DATA_UINT64 },
120 120 { "sendTooBig", KSTAT_DATA_UINT64 },
121 121 { "sendAllocFail", KSTAT_DATA_UINT64 },
122 122 { "sendUiomoveFail", KSTAT_DATA_UINT64 },
123 123 { "sendNoMemory", KSTAT_DATA_UINT64 },
124 124 { "sendOpenFail", KSTAT_DATA_UINT64 },
125 125 { "sendWrongFamily", KSTAT_DATA_UINT64 },
126 126 { "sendShortMsg", KSTAT_DATA_UINT64 },
127 127 { "sendOk", KSTAT_DATA_UINT64 }
128 128 };
129 129
130 130 sock_downcalls_t pfp_downcalls = {
131 131 sdpfp_activate,
132 132 sock_accept_notsupp,
133 133 sdpfp_bind,
134 134 sock_listen_notsupp,
135 135 sock_connect_notsupp,
136 136 sock_getpeername_notsupp,
137 137 sock_getsockname_notsupp,
138 138 sdpfp_getsockopt,
139 139 sdpfp_setsockopt,
140 140 sock_send_notsupp,
141 141 sdpfp_senduio,
142 142 NULL,
143 143 sock_poll_notsupp,
144 144 sock_shutdown_notsupp,
145 145 sdpfp_clr_flowctrl,
146 146 sdpfp_ioctl,
147 147 sdpfp_close,
148 148 };
149 149
150 150 static smod_reg_t sinfo = {
151 151 SOCKMOD_VERSION,
152 152 "sockpfp",
153 153 SOCK_UC_VERSION,
154 154 SOCK_DC_VERSION,
155 155 sockpfp_create,
156 156 NULL
157 157 };
158 158
159 159 static int accepted_protos[3][2] = {
160 160 { ETH_P_ALL, 0 },
161 161 { ETH_P_802_2, LLC_SNAP_SAP },
162 162 { ETH_P_803_3, 0 },
163 163 };
164 164
165 165 /*
166 166 * This sets an upper bound on the size of the receive buffer for a PF_PACKET
167 167 * socket. More properly, this should be controlled through ipadm, ala TCP, UDP,
168 168 * SCTP, etc. Until that's done, this provides a hard cap of 4 MB and allows an
169 169 * opportunity for it to be changed, should it be needed.
170 170 */
171 171 int sockmod_pfp_rcvbuf_max = 1024 * 1024 * 4;
↓ open down ↓ |
171 lines elided |
↑ open up ↑ |
172 172
173 173 /*
174 174 * Module linkage information for the kernel.
175 175 */
176 176 static struct modlsockmod modlsockmod = {
177 177 &mod_sockmodops, "PF Packet socket module", &sinfo
178 178 };
179 179
180 180 static struct modlinkage modlinkage = {
181 181 MODREV_1,
182 - &modlsockmod,
183 - NULL
182 + { &modlsockmod, NULL }
184 183 };
185 184
186 185 int
187 186 _init(void)
188 187 {
189 188 int error;
190 189
191 190 error = sockpfp_init();
192 191 if (error != 0)
193 192 return (error);
194 193
195 194 error = mod_install(&modlinkage);
196 195 if (error != 0)
197 196 sockpfp_fini();
198 197
199 198 return (error);
200 199 }
201 200
202 201 int
203 202 _fini(void)
204 203 {
205 204 int error;
206 205
207 206 error = mod_remove(&modlinkage);
208 207 if (error == 0)
209 208 sockpfp_fini();
210 209
211 210 return (error);
212 211 }
213 212
214 213 int
215 214 _info(struct modinfo *modinfop)
216 215 {
217 216 return (mod_info(&modlinkage, modinfop));
218 217 }
219 218
220 219 /*
221 220 * sockpfp_init: called as part of the initialisation of the module when
222 221 * loaded into the kernel.
223 222 *
224 223 * Being able to create and record the kstats data in the kernel is not
225 224 * considered to be vital to the operation of this kernel module, thus
226 225 * its failure is tolerated.
227 226 */
228 227 static int
229 228 sockpfp_init(void)
230 229 {
231 230 (void) memset(&ks_stats, 0, sizeof (ks_stats));
232 231
233 232 (void) memcpy(&ks_stats, &pfp_kstats, sizeof (pfp_kstats));
234 233
235 234 pfp_ksp = kstat_create("pfpacket", 0, "global", "misc",
236 235 KSTAT_TYPE_NAMED, sizeof (pfp_kstats) / sizeof (kstat_named_t),
237 236 KSTAT_FLAG_VIRTUAL);
238 237 if (pfp_ksp != NULL) {
239 238 pfp_ksp->ks_data = &ks_stats;
240 239 kstat_install(pfp_ksp);
241 240 }
242 241
243 242 return (0);
244 243 }
245 244
246 245 /*
247 246 * sockpfp_fini: called when the operating system wants to unload the
248 247 * socket module from the kernel.
249 248 */
250 249 static void
251 250 sockpfp_fini(void)
252 251 {
253 252 if (pfp_ksp != NULL)
254 253 kstat_delete(pfp_ksp);
255 254 }
256 255
257 256 /*
258 257 * Due to sockets being created read-write by default, all PF_PACKET sockets
259 258 * therefore require the NET_RAWACCESS priviliege, even if the socket is only
260 259 * being used for reading packets from.
261 260 *
262 261 * This create function enforces this module only being used with PF_PACKET
263 262 * sockets and the policy that we support via the config file in sock2path.d:
264 263 * PF_PACKET sockets must be either SOCK_DGRAM or SOCK_RAW.
265 264 */
266 265 /* ARGSUSED */
267 266 static sock_lower_handle_t
268 267 sockpfp_create(int family, int type, int proto,
269 268 sock_downcalls_t **sock_downcalls, uint_t *smodep, int *errorp,
270 269 int sflags, cred_t *cred)
271 270 {
272 271 struct pfpsock *ps;
273 272 int kmflags;
274 273 int newproto;
275 274 int i;
276 275
277 276 if (secpolicy_net_rawaccess(cred) != 0) {
278 277 *errorp = EACCES;
279 278 return (NULL);
280 279 }
281 280
282 281 if (family != AF_PACKET) {
283 282 *errorp = EAFNOSUPPORT;
284 283 return (NULL);
285 284 }
286 285
287 286 if ((type != SOCK_RAW) && (type != SOCK_DGRAM)) {
288 287 *errorp = ESOCKTNOSUPPORT;
289 288 return (NULL);
290 289 }
291 290
292 291 /*
293 292 * First check to see if the protocol number passed in via the socket
294 293 * creation should be mapped to a different number for internal use.
295 294 */
296 295 for (i = 0, newproto = -1;
297 296 i < sizeof (accepted_protos)/ sizeof (accepted_protos[0]); i++) {
298 297 if (accepted_protos[i][0] == proto) {
299 298 newproto = accepted_protos[i][1];
300 299 break;
301 300 }
302 301 }
303 302
304 303 /*
305 304 * If the mapping of the protocol that was under 0x800 failed to find
306 305 * a local equivalent then fail the socket creation. If the protocol
307 306 * for the socket is over 0x800 and it was not found in the mapping
308 307 * table above, then use the value as is.
309 308 */
310 309 if (newproto == -1) {
311 310 if (proto < 0x800) {
312 311 *errorp = ENOPROTOOPT;
313 312 return (NULL);
314 313 }
315 314 newproto = proto;
316 315 }
317 316 proto = newproto;
318 317
319 318 kmflags = (sflags & SOCKET_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
320 319 ps = kmem_zalloc(sizeof (*ps), kmflags);
321 320 if (ps == NULL) {
322 321 *errorp = ENOMEM;
323 322 return (NULL);
324 323 }
325 324
326 325 ps->ps_type = type;
327 326 ps->ps_proto = proto;
328 327 rw_init(&ps->ps_bpflock, NULL, RW_DRIVER, NULL);
329 328 mutex_init(&ps->ps_lock, NULL, MUTEX_DRIVER, NULL);
330 329
331 330 *sock_downcalls = &pfp_downcalls;
332 331 /*
333 332 * Setting this causes bytes from a packet that do not fit into the
334 333 * destination user buffer to be discarded. Thus the API is one
335 334 * packet per receive and callers are required to use a buffer large
336 335 * enough for the biggest packet that the interface can provide.
337 336 */
338 337 *smodep = SM_ATOMIC;
339 338
340 339 return ((sock_lower_handle_t)ps);
341 340 }
342 341
343 342 /* ************************************************************************* */
344 343
345 344 /*
346 345 * pfp_packet is the callback function that is given to the mac layer for
347 346 * PF_PACKET to receive packets with. One packet at a time is passed into
348 347 * this function from the mac layer. Each packet is a private copy given
349 348 * to PF_PACKET to modify or free as it wishes and does not harm the original
350 349 * packet from which it was cloned.
351 350 */
352 351 /* ARGSUSED */
353 352 static void
354 353 pfp_packet(void *arg, mac_resource_handle_t mrh, mblk_t *mp, boolean_t flag)
355 354 {
356 355 struct T_unitdata_ind *tunit;
357 356 struct sockaddr_ll *sll;
358 357 struct sockaddr_ll *sol;
359 358 mac_header_info_t hdr;
360 359 struct pfpsock *ps;
361 360 size_t tusz;
362 361 mblk_t *mp0;
363 362 int error;
364 363
365 364 if (mp == NULL)
366 365 return;
367 366
368 367 ps = arg;
369 368 if (ps->ps_flow_ctrld) {
370 369 ps->ps_flow_ctrl_drops++;
371 370 ps->ps_stats.tp_drops++;
372 371 ks_stats.kp_recv_flow_cntrld.value.ui64++;
373 372 freemsg(mp);
374 373 return;
375 374 }
376 375
377 376 if (mac_header_info(ps->ps_mh, mp, &hdr) != 0) {
378 377 /*
379 378 * Can't decode the packet header information so drop it.
380 379 */
381 380 ps->ps_stats.tp_drops++;
382 381 ks_stats.kp_recv_mac_hdr_fail.value.ui64++;
383 382 freemsg(mp);
384 383 return;
385 384 }
386 385
387 386 if (mac_type(ps->ps_mh) == DL_ETHER &&
388 387 hdr.mhi_bindsap == ETHERTYPE_VLAN) {
389 388 struct ether_vlan_header *evhp;
390 389 struct ether_vlan_header evh;
391 390
392 391 hdr.mhi_hdrsize = sizeof (struct ether_vlan_header);
393 392 hdr.mhi_istagged = B_TRUE;
394 393
395 394 if (MBLKL(mp) >= sizeof (*evhp)) {
396 395 evhp = (struct ether_vlan_header *)mp->b_rptr;
397 396 } else {
398 397 int sz = sizeof (*evhp);
399 398 char *s = (char *)&evh;
400 399 mblk_t *tmp;
401 400 int len;
402 401
403 402 for (tmp = mp; sz > 0 && tmp != NULL;
404 403 tmp = tmp->b_cont) {
405 404 len = min(sz, MBLKL(tmp));
406 405 bcopy(tmp->b_rptr, s, len);
407 406 sz -= len;
408 407 }
409 408 evhp = &evh;
410 409 }
411 410 hdr.mhi_tci = ntohs(evhp->ether_tci);
412 411 hdr.mhi_bindsap = ntohs(evhp->ether_type);
413 412 }
414 413
415 414 if ((ps->ps_proto != 0) && (ps->ps_proto != hdr.mhi_bindsap)) {
416 415 /*
417 416 * The packet is not of interest to this socket so
418 417 * drop it on the floor. Here the SAP is being used
419 418 * as a very course filter.
420 419 */
421 420 ps->ps_stats.tp_drops++;
422 421 ks_stats.kp_recv_bad_proto.value.ui64++;
423 422 freemsg(mp);
424 423 return;
425 424 }
426 425
427 426 /*
428 427 * This field is not often set, even for ethernet,
429 428 * by mac_header_info, so compute it if it is 0.
430 429 */
431 430 if (hdr.mhi_pktsize == 0)
432 431 hdr.mhi_pktsize = msgdsize(mp);
433 432
434 433 /*
435 434 * If a BPF filter is present, pass the raw packet into that.
436 435 * A failed match will result in zero being returned, indicating
437 436 * that this socket is not interested in the packet.
438 437 */
439 438 if (ps->ps_bpf.bf_len != 0) {
440 439 uchar_t *buffer;
441 440 int buflen;
442 441
443 442 buflen = MBLKL(mp);
444 443 if (hdr.mhi_pktsize == buflen) {
445 444 buffer = mp->b_rptr;
446 445 } else {
447 446 buflen = 0;
448 447 buffer = (uchar_t *)mp;
449 448 }
450 449 rw_enter(&ps->ps_bpflock, RW_READER);
451 450 if (bpf_filter(ps->ps_bpf.bf_insns, buffer,
452 451 hdr.mhi_pktsize, buflen) == 0) {
453 452 rw_exit(&ps->ps_bpflock);
454 453 ps->ps_stats.tp_drops++;
455 454 ks_stats.kp_recv_filtered.value.ui64++;
456 455 freemsg(mp);
457 456 return;
458 457 }
459 458 rw_exit(&ps->ps_bpflock);
460 459 }
461 460
462 461 if (ps->ps_type == SOCK_DGRAM) {
463 462 /*
464 463 * SOCK_DGRAM socket expect a "layer 3" packet, so advance
465 464 * past the link layer header.
466 465 */
467 466 mp->b_rptr += hdr.mhi_hdrsize;
468 467 hdr.mhi_pktsize -= hdr.mhi_hdrsize;
469 468 }
470 469
471 470 tusz = sizeof (struct T_unitdata_ind) + sizeof (struct sockaddr_ll);
472 471 if (ps->ps_auxdata) {
473 472 tusz += _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
474 473 tusz += _TPI_ALIGN_TOPT(sizeof (struct T_opthdr));
475 474 }
476 475
477 476 /*
478 477 * It is tempting to think that this could be optimised by having
479 478 * the base mblk_t allocated and hung off the pfpsock structure,
480 479 * except that then another one would need to be allocated for the
481 480 * sockaddr_ll that is included. Even creating a template to copy
482 481 * from is of questionable value, as read-write from one structure
483 482 * to the other is going to be slower than all of the initialisation.
484 483 */
485 484 mp0 = allocb(tusz, BPRI_HI);
486 485 if (mp0 == NULL) {
487 486 ps->ps_stats.tp_drops++;
488 487 ks_stats.kp_recv_alloc_fail.value.ui64++;
489 488 freemsg(mp);
490 489 return;
491 490 }
492 491
493 492 (void) memset(mp0->b_rptr, 0, tusz);
494 493
495 494 mp0->b_datap->db_type = M_PROTO;
496 495 mp0->b_wptr = mp0->b_rptr + tusz;
497 496
498 497 tunit = (struct T_unitdata_ind *)mp0->b_rptr;
499 498 tunit->PRIM_type = T_UNITDATA_IND;
500 499 tunit->SRC_length = sizeof (struct sockaddr);
501 500 tunit->SRC_offset = sizeof (*tunit);
502 501
503 502 sol = &ps->ps_sock;
504 503 sll = (struct sockaddr_ll *)(mp0->b_rptr + sizeof (*tunit));
505 504 sll->sll_ifindex = sol->sll_ifindex;
506 505 sll->sll_hatype = (uint16_t)hdr.mhi_origsap;
507 506 sll->sll_halen = sol->sll_halen;
508 507 if (hdr.mhi_saddr != NULL)
509 508 (void) memcpy(sll->sll_addr, hdr.mhi_saddr, sll->sll_halen);
510 509
511 510 switch (hdr.mhi_dsttype) {
512 511 case MAC_ADDRTYPE_MULTICAST :
513 512 sll->sll_pkttype = PACKET_MULTICAST;
514 513 break;
515 514 case MAC_ADDRTYPE_BROADCAST :
516 515 sll->sll_pkttype = PACKET_BROADCAST;
517 516 break;
518 517 case MAC_ADDRTYPE_UNICAST :
519 518 if (memcmp(sol->sll_addr, hdr.mhi_daddr, sol->sll_halen) == 0)
520 519 sll->sll_pkttype = PACKET_HOST;
521 520 else
522 521 sll->sll_pkttype = PACKET_OTHERHOST;
523 522 break;
524 523 }
525 524
526 525 if (ps->ps_auxdata) {
527 526 struct tpacket_auxdata *aux;
528 527 struct T_opthdr *topt;
529 528
530 529 tunit->OPT_offset = _TPI_ALIGN_TOPT(tunit->SRC_offset +
531 530 sizeof (struct sockaddr_ll));
532 531 tunit->OPT_length = _TPI_ALIGN_TOPT(sizeof (struct T_opthdr)) +
533 532 _TPI_ALIGN_TOPT(sizeof (struct tpacket_auxdata));
534 533
535 534 topt = (struct T_opthdr *)(mp0->b_rptr + tunit->OPT_offset);
536 535 aux = (struct tpacket_auxdata *)
537 536 ((char *)topt + _TPI_ALIGN_TOPT(sizeof (*topt)));
538 537
539 538 topt->len = tunit->OPT_length;
540 539 topt->level = SOL_PACKET;
541 540 topt->name = PACKET_AUXDATA;
542 541 topt->status = 0;
543 542 /*
544 543 * libpcap doesn't seem to use any other field,
545 544 * so it isn't clear how they should be filled in.
546 545 */
547 546 aux->tp_vlan_vci = hdr.mhi_tci;
548 547 }
549 548
550 549 linkb(mp0, mp);
551 550
552 551 (void) gethrestime(&ps->ps_timestamp);
553 552
554 553 ps->ps_upcalls->su_recv(ps->ps_upper, mp0, hdr.mhi_pktsize, 0,
555 554 &error, NULL);
556 555
557 556 if (error == 0) {
558 557 ps->ps_stats.tp_packets++;
559 558 ks_stats.kp_recv_ok.value.ui64++;
560 559 } else {
561 560 mutex_enter(&ps->ps_lock);
562 561 if (error == ENOSPC) {
563 562 ps->ps_upcalls->su_recv(ps->ps_upper, NULL, 0, 0,
564 563 &error, NULL);
565 564 if (error == ENOSPC)
566 565 ps->ps_flow_ctrld = B_TRUE;
567 566 }
568 567 mutex_exit(&ps->ps_lock);
569 568 ps->ps_stats.tp_drops++;
570 569 ks_stats.kp_recv_fail.value.ui64++;
571 570 }
572 571 }
573 572
574 573 /*
575 574 * Bind a PF_PACKET socket to a network interface.
576 575 *
577 576 * The default operation of this bind() is to place the socket (and thus the
578 577 * network interface) into promiscuous mode. It is then up to the application
579 578 * to turn that down by issuing the relevant ioctls, if desired.
580 579 */
581 580 static int
582 581 sdpfp_bind(sock_lower_handle_t handle, struct sockaddr *addr,
583 582 socklen_t addrlen, struct cred *cred)
584 583 {
585 584 struct sockaddr_ll *addr_ll, *sol;
586 585 mac_client_handle_t mch;
587 586 struct pfpsock *ps;
588 587 mac_handle_t mh;
589 588 int error;
590 589
591 590 ps = (struct pfpsock *)handle;
592 591 if (ps->ps_bound)
593 592 return (EINVAL);
594 593
595 594 if (addrlen < sizeof (struct sockaddr_ll) || addr == NULL)
596 595 return (EINVAL);
597 596
598 597 addr_ll = (struct sockaddr_ll *)addr;
599 598
600 599 error = pfp_open_index(addr_ll->sll_ifindex, &mh, &mch, cred);
601 600 if (error != 0)
602 601 return (error);
603 602 /*
604 603 * Ensure that each socket is only bound once.
605 604 */
606 605 mutex_enter(&ps->ps_lock);
607 606 if (ps->ps_mh != 0) {
608 607 mutex_exit(&ps->ps_lock);
609 608 pfp_close(mh, mch);
610 609 return (EADDRINUSE);
611 610 }
612 611 ps->ps_mh = mh;
613 612 ps->ps_mch = mch;
614 613 mutex_exit(&ps->ps_lock);
615 614
616 615 /*
617 616 * Cache all of the information from bind so that it's in an easy
618 617 * place to get at when packets are received.
619 618 */
620 619 sol = &ps->ps_sock;
621 620 sol->sll_family = AF_PACKET;
622 621 sol->sll_ifindex = addr_ll->sll_ifindex;
623 622 sol->sll_protocol = addr_ll->sll_protocol;
624 623 sol->sll_halen = mac_addr_len(ps->ps_mh);
625 624 mac_unicast_primary_get(ps->ps_mh, sol->sll_addr);
626 625 mac_sdu_get(ps->ps_mh, NULL, &ps->ps_max_sdu);
627 626 ps->ps_linkid = addr_ll->sll_ifindex;
628 627
629 628 error = mac_promisc_add(ps->ps_mch, MAC_CLIENT_PROMISC_ALL,
630 629 pfp_packet, ps, &ps->ps_phd, MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
631 630 if (error == 0) {
632 631 ps->ps_promisc = MAC_CLIENT_PROMISC_ALL;
633 632 ps->ps_bound = B_TRUE;
634 633 }
635 634
636 635 return (error);
637 636 }
638 637
639 638 /* ARGSUSED */
640 639 static void
641 640 sdpfp_activate(sock_lower_handle_t lower, sock_upper_handle_t upper,
642 641 sock_upcalls_t *upcalls, int flags, cred_t *cred)
643 642 {
644 643 struct pfpsock *ps;
645 644
646 645 ps = (struct pfpsock *)lower;
647 646 ps->ps_upper = upper;
648 647 ps->ps_upcalls = upcalls;
649 648 }
650 649
651 650 /*
652 651 * This module only implements getting socket options for the new socket
653 652 * option level (SOL_PACKET) that it introduces. All other requests are
654 653 * passed back to the sockfs layer.
655 654 */
656 655 /* ARGSUSED */
657 656 static int
658 657 sdpfp_getsockopt(sock_lower_handle_t handle, int level, int option_name,
659 658 void *optval, socklen_t *optlenp, struct cred *cred)
660 659 {
661 660 struct pfpsock *ps;
662 661 int error = 0;
663 662
664 663 ps = (struct pfpsock *)handle;
665 664
666 665 switch (level) {
667 666 case SOL_PACKET :
668 667 error = pfp_getpacket_sockopt(handle, option_name, optval,
669 668 optlenp);
670 669 break;
671 670
672 671 case SOL_SOCKET :
673 672 if (option_name == SO_RCVBUF) {
674 673 if (*optlenp < sizeof (int32_t))
675 674 return (EINVAL);
676 675 *((int32_t *)optval) = ps->ps_rcvbuf;
677 676 *optlenp = sizeof (int32_t);
678 677 } else {
679 678 error = ENOPROTOOPT;
680 679 }
681 680 break;
682 681
683 682 default :
684 683 /*
685 684 * If sockfs code receives this error in return from the
686 685 * getsockopt downcall it handles the option locally, if
687 686 * it can.
688 687 */
689 688 error = ENOPROTOOPT;
690 689 break;
691 690 }
692 691
693 692 return (error);
694 693 }
695 694
696 695 /*
697 696 * PF_PACKET supports setting socket options at only two levels:
698 697 * SOL_SOCKET and SOL_PACKET.
699 698 */
700 699 /* ARGSUSED */
701 700 static int
702 701 sdpfp_setsockopt(sock_lower_handle_t handle, int level, int option_name,
703 702 const void *optval, socklen_t optlen, struct cred *cred)
704 703 {
705 704 int error = 0;
706 705
707 706 switch (level) {
708 707 case SOL_SOCKET :
709 708 error = pfp_setsocket_sockopt(handle, option_name, optval,
710 709 optlen);
711 710 break;
712 711 case SOL_PACKET :
713 712 error = pfp_setpacket_sockopt(handle, option_name, optval,
714 713 optlen);
715 714 break;
716 715 default :
717 716 error = EINVAL;
718 717 break;
719 718 }
720 719
721 720 return (error);
722 721 }
723 722
724 723 /*
725 724 * This function is incredibly inefficient for sending any packet that
726 725 * comes with a msghdr asking to be sent to an interface to which the
727 726 * socket has not been bound. Some possibilities here are keeping a
728 727 * cache of all open mac's and mac_client's, for the purpose of sending,
729 728 * and closing them after some amount of inactivity. Clearly, applications
730 729 * should not be written to use one socket for multiple interfaces if
731 730 * performance is desired with the code as is.
732 731 */
733 732 /* ARGSUSED */
734 733 static int
735 734 sdpfp_senduio(sock_lower_handle_t handle, struct uio *uiop,
736 735 struct nmsghdr *msg, struct cred *cred)
737 736 {
738 737 struct sockaddr_ll *sol;
739 738 mac_client_handle_t mch;
740 739 struct pfpsock *ps;
741 740 boolean_t new_open;
742 741 mac_handle_t mh;
743 742 size_t mpsize;
744 743 uint_t maxsdu;
745 744 mblk_t *mp0;
746 745 mblk_t *mp;
747 746 int error;
748 747
749 748 mp = NULL;
750 749 mp0 = NULL;
751 750 new_open = B_FALSE;
752 751 ps = (struct pfpsock *)handle;
753 752 mh = ps->ps_mh;
754 753 mch = ps->ps_mch;
755 754 maxsdu = ps->ps_max_sdu;
756 755
757 756 sol = (struct sockaddr_ll *)msg->msg_name;
758 757 if (sol == NULL) {
759 758 /*
760 759 * If no sockaddr_ll has been provided with the send call,
761 760 * use the one constructed when the socket was bound to an
762 761 * interface and fail if it hasn't been bound.
763 762 */
764 763 if (!ps->ps_bound) {
765 764 ks_stats.kp_send_unbound.value.ui64++;
766 765 return (EPROTO);
767 766 }
768 767 sol = &ps->ps_sock;
769 768 } else {
770 769 /*
771 770 * Verify the sockaddr_ll message passed down before using
772 771 * it to send a packet out with. If it refers to an interface
773 772 * that has not been bound, it is necessary to open it.
774 773 */
775 774 struct sockaddr_ll *sll;
776 775
777 776 if (msg->msg_namelen < sizeof (struct sockaddr_ll)) {
778 777 ks_stats.kp_send_short_msg.value.ui64++;
779 778 return (EINVAL);
780 779 }
781 780
782 781 if (sol->sll_family != AF_PACKET) {
783 782 ks_stats.kp_send_wrong_family.value.ui64++;
784 783 return (EAFNOSUPPORT);
785 784 }
786 785
787 786 sll = &ps->ps_sock;
788 787 if (sol->sll_ifindex != sll->sll_ifindex) {
789 788 error = pfp_open_index(sol->sll_ifindex, &mh, &mch,
790 789 cred);
791 790 if (error != 0) {
792 791 ks_stats.kp_send_open_fail.value.ui64++;
793 792 return (error);
794 793 }
795 794 mac_sdu_get(mh, NULL, &maxsdu);
796 795 new_open = B_TRUE;
797 796 }
798 797 }
799 798
800 799 mpsize = uiop->uio_resid;
801 800 if (mpsize > maxsdu) {
802 801 ks_stats.kp_send_too_big.value.ui64++;
803 802 error = EMSGSIZE;
804 803 goto done;
805 804 }
806 805
807 806 if ((mp = allocb(mpsize, BPRI_HI)) == NULL) {
808 807 ks_stats.kp_send_alloc_fail.value.ui64++;
809 808 error = ENOBUFS;
810 809 goto done;
811 810 }
812 811
813 812 mp->b_wptr = mp->b_rptr + mpsize;
814 813 error = uiomove(mp->b_rptr, mpsize, UIO_WRITE, uiop);
815 814 if (error != 0) {
816 815 ks_stats.kp_send_uiomove_fail.value.ui64++;
817 816 goto done;
818 817 }
819 818
820 819 if (ps->ps_type == SOCK_DGRAM) {
821 820 mp0 = mac_header(mh, sol->sll_addr, sol->sll_protocol, mp, 0);
822 821 if (mp0 == NULL) {
823 822 ks_stats.kp_send_no_memory.value.ui64++;
824 823 error = ENOBUFS;
825 824 goto done;
826 825 }
827 826 linkb(mp0, mp);
828 827 mp = mp0;
829 828 }
830 829
831 830 /*
832 831 * As this is sending datagrams and no promise is made about
833 832 * how or if a packet will be sent/delivered, no effort is to
834 833 * be expended in recovering from a situation where the packet
835 834 * cannot be sent - it is just dropped.
836 835 */
837 836 error = mac_tx(mch, mp, 0, MAC_DROP_ON_NO_DESC, NULL);
838 837 if (error == 0) {
839 838 mp = NULL;
840 839 ks_stats.kp_send_ok.value.ui64++;
841 840 } else {
842 841 ks_stats.kp_send_failed.value.ui64++;
843 842 }
844 843
845 844 done:
846 845
847 846 if (new_open) {
848 847 ASSERT(mch != ps->ps_mch);
849 848 ASSERT(mh != ps->ps_mh);
850 849 pfp_close(mh, mch);
851 850 }
852 851 if (mp != NULL)
853 852 freemsg(mp);
854 853
855 854 return (error);
856 855
857 856 }
858 857
859 858 /*
860 859 * There's no use of a lock here, or at the bottom of pfp_packet() where
861 860 * ps_flow_ctrld is set to true, because in a situation where these two
862 861 * are racing to set the flag one way or the other, the end result is
863 862 * going to be ultimately determined by the scheduler anyway - which of
864 863 * the two threads gets the lock first? In such an operational environment,
865 864 * we've got packets arriving too fast to be delt with so packets are going
866 865 * to be dropped. Grabbing a lock just makes the drop more expensive.
867 866 */
868 867 static void
869 868 sdpfp_clr_flowctrl(sock_lower_handle_t handle)
870 869 {
871 870 struct pfpsock *ps;
872 871
873 872 ps = (struct pfpsock *)handle;
874 873
875 874 mutex_enter(&ps->ps_lock);
876 875 ps->ps_flow_ctrld = B_FALSE;
877 876 mutex_exit(&ps->ps_lock);
878 877 }
879 878
880 879 /*
881 880 * The implementation of this ioctl() handler is intended to function
882 881 * in the absence of a bind() being made before it is called. Thus the
883 882 * function calls mac_open() itself to provide a handle
884 883 * This function is structured like this:
885 884 * - determine the linkid for the interface being targetted
886 885 * - open the interface with said linkid
887 886 * - perform ioctl
888 887 * - copy results back to caller
889 888 *
890 889 * The ioctls that interact with interface flags have been implented below
891 890 * to assume that the interface is always up and running (IFF_RUNNING) and
892 891 * to use the state of this socket to determine whether or not the network
893 892 * interface is in promiscuous mode. Thus an ioctl to get the interface flags
894 893 * of an interface that has been put in promiscuous mode by another socket
895 894 * (in the same program or different), will not report that status.
896 895 */
897 896 /* ARGSUSED */
898 897 static int
899 898 sdpfp_ioctl(sock_lower_handle_t handle, int cmd, intptr_t arg, int mod,
900 899 int32_t *rval, struct cred *cr)
901 900 {
902 901 struct timeval tival;
903 902 mac_client_promisc_type_t mtype;
904 903 struct sockaddr_dl *sock;
905 904 datalink_id_t linkid;
906 905 struct lifreq lifreq;
907 906 struct ifreq ifreq;
908 907 struct pfpsock *ps;
909 908 mac_handle_t mh;
910 909 int error;
911 910
912 911 ps = (struct pfpsock *)handle;
913 912
914 913 switch (cmd) {
915 914 /*
916 915 * ioctls that work on "struct lifreq"
917 916 */
918 917 case SIOCSLIFFLAGS :
919 918 case SIOCGLIFINDEX :
920 919 case SIOCGLIFFLAGS :
921 920 case SIOCGLIFMTU :
922 921 case SIOCGLIFHWADDR :
923 922 error = pfp_lifreq_getlinkid(arg, &lifreq, &linkid, mod);
924 923 if (error != 0)
925 924 return (error);
926 925 break;
927 926
928 927 /*
929 928 * ioctls that work on "struct ifreq".
930 929 * Not all of these have a "struct lifreq" partner, for example
931 930 * SIOCGIFHWADDR, for the simple reason that the logical interface
932 931 * does not have a hardware address.
933 932 */
934 933 case SIOCSIFFLAGS :
935 934 case SIOCGIFINDEX :
936 935 case SIOCGIFFLAGS :
937 936 case SIOCGIFMTU :
938 937 case SIOCGIFHWADDR :
939 938 error = pfp_ifreq_getlinkid(arg, &ifreq, &linkid, mod);
940 939 if (error != 0)
941 940 return (error);
942 941 break;
943 942
944 943 case SIOCGSTAMP :
945 944 tival.tv_sec = (time_t)ps->ps_timestamp.tv_sec;
946 945 tival.tv_usec = ps->ps_timestamp.tv_nsec / 1000;
947 946 if (get_udatamodel() == DATAMODEL_NATIVE) {
948 947 error = ddi_copyout(&tival, (void *)arg,
949 948 sizeof (tival), mod);
950 949 }
951 950 #ifdef _SYSCALL32_IMPL
952 951 else {
953 952 struct timeval32 tv32;
954 953 TIMEVAL_TO_TIMEVAL32(&tv32, &tival);
955 954 error = ddi_copyout(&tv32, (void *)arg,
956 955 sizeof (tv32), mod);
957 956 }
958 957 #endif
959 958 return (error);
960 959 }
961 960
962 961 error = mac_open_by_linkid(linkid, &mh);
963 962 if (error != 0)
964 963 return (error);
965 964
966 965 switch (cmd) {
967 966 case SIOCGLIFINDEX :
968 967 lifreq.lifr_index = linkid;
969 968 break;
970 969
971 970 case SIOCGIFINDEX :
972 971 ifreq.ifr_index = linkid;
973 972 break;
974 973
975 974 case SIOCGIFFLAGS :
976 975 ifreq.ifr_flags = IFF_RUNNING;
977 976 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
978 977 ifreq.ifr_flags |= IFF_PROMISC;
979 978 break;
980 979
981 980 case SIOCGLIFFLAGS :
982 981 lifreq.lifr_flags = IFF_RUNNING;
983 982 if (ps->ps_promisc == MAC_CLIENT_PROMISC_ALL)
984 983 lifreq.lifr_flags |= IFF_PROMISC;
985 984 break;
986 985
987 986 case SIOCSIFFLAGS :
988 987 if (linkid != ps->ps_linkid) {
989 988 error = EINVAL;
990 989 } else {
991 990 if ((ifreq.ifr_flags & IFF_PROMISC) != 0)
992 991 mtype = MAC_CLIENT_PROMISC_ALL;
993 992 else
994 993 mtype = MAC_CLIENT_PROMISC_FILTERED;
995 994 error = pfp_set_promisc(ps, mtype);
996 995 }
997 996 break;
998 997
999 998 case SIOCSLIFFLAGS :
1000 999 if (linkid != ps->ps_linkid) {
1001 1000 error = EINVAL;
1002 1001 } else {
1003 1002 if ((lifreq.lifr_flags & IFF_PROMISC) != 0)
1004 1003 mtype = MAC_CLIENT_PROMISC_ALL;
1005 1004 else
1006 1005 mtype = MAC_CLIENT_PROMISC_FILTERED;
1007 1006 error = pfp_set_promisc(ps, mtype);
1008 1007 }
1009 1008 break;
1010 1009
1011 1010 case SIOCGIFMTU :
1012 1011 mac_sdu_get(mh, NULL, &ifreq.ifr_mtu);
1013 1012 break;
1014 1013
1015 1014 case SIOCGLIFMTU :
1016 1015 mac_sdu_get(mh, NULL, &lifreq.lifr_mtu);
1017 1016 break;
1018 1017
1019 1018 case SIOCGIFHWADDR :
1020 1019 if (mac_addr_len(mh) > sizeof (ifreq.ifr_addr.sa_data)) {
1021 1020 error = EPFNOSUPPORT;
1022 1021 break;
1023 1022 }
1024 1023
1025 1024 if (mac_addr_len(mh) == 0) {
1026 1025 (void) memset(ifreq.ifr_addr.sa_data, 0,
1027 1026 sizeof (ifreq.ifr_addr.sa_data));
1028 1027 } else {
1029 1028 mac_unicast_primary_get(mh,
1030 1029 (uint8_t *)ifreq.ifr_addr.sa_data);
1031 1030 }
1032 1031
1033 1032 /*
1034 1033 * The behaviour here in setting sa_family is consistent
1035 1034 * with what applications such as tcpdump would expect
1036 1035 * for a Linux PF_PACKET socket.
1037 1036 */
1038 1037 ifreq.ifr_addr.sa_family = pfp_dl_to_arphrd(mac_type(mh));
1039 1038 break;
1040 1039
1041 1040 case SIOCGLIFHWADDR :
1042 1041 lifreq.lifr_type = 0;
1043 1042 sock = (struct sockaddr_dl *)&lifreq.lifr_addr;
1044 1043
1045 1044 if (mac_addr_len(mh) > sizeof (sock->sdl_data)) {
1046 1045 error = EPFNOSUPPORT;
1047 1046 break;
1048 1047 }
1049 1048
1050 1049 /*
1051 1050 * Fill in the sockaddr_dl with link layer details. Of note,
1052 1051 * the index is returned as 0 for a couple of reasons:
1053 1052 * (1) there is no public API that uses or requires it
1054 1053 * (2) the MAC index is currently 32bits and sdl_index is 16.
1055 1054 */
1056 1055 sock->sdl_family = AF_LINK;
1057 1056 sock->sdl_index = 0;
1058 1057 sock->sdl_type = mac_type(mh);
1059 1058 sock->sdl_nlen = 0;
1060 1059 sock->sdl_alen = mac_addr_len(mh);
1061 1060 sock->sdl_slen = 0;
1062 1061 if (mac_addr_len(mh) == 0) {
1063 1062 (void) memset(sock->sdl_data, 0,
1064 1063 sizeof (sock->sdl_data));
1065 1064 } else {
1066 1065 mac_unicast_primary_get(mh, (uint8_t *)sock->sdl_data);
1067 1066 }
1068 1067 break;
1069 1068
1070 1069 default :
1071 1070 break;
1072 1071 }
1073 1072
1074 1073 mac_close(mh);
1075 1074
1076 1075 if (error == 0) {
1077 1076 /*
1078 1077 * Only the "GET" ioctls need to copy data back to userace.
1079 1078 */
1080 1079 switch (cmd) {
1081 1080 case SIOCGLIFINDEX :
1082 1081 case SIOCGLIFFLAGS :
1083 1082 case SIOCGLIFMTU :
1084 1083 case SIOCGLIFHWADDR :
1085 1084 error = ddi_copyout(&lifreq, (void *)arg,
1086 1085 sizeof (lifreq), mod);
1087 1086 break;
1088 1087
1089 1088 case SIOCGIFINDEX :
1090 1089 case SIOCGIFFLAGS :
1091 1090 case SIOCGIFMTU :
1092 1091 case SIOCGIFHWADDR :
1093 1092 error = ddi_copyout(&ifreq, (void *)arg,
1094 1093 sizeof (ifreq), mod);
1095 1094 break;
1096 1095 default :
1097 1096 break;
1098 1097 }
1099 1098 }
1100 1099
1101 1100 return (error);
1102 1101 }
1103 1102
1104 1103 /*
1105 1104 * Closing the socket requires that all open references to network
1106 1105 * interfaces be closed.
1107 1106 */
1108 1107 /* ARGSUSED */
1109 1108 static int
1110 1109 sdpfp_close(sock_lower_handle_t handle, int flag, struct cred *cr)
1111 1110 {
1112 1111 struct pfpsock *ps = (struct pfpsock *)handle;
1113 1112
1114 1113 if (ps->ps_phd != 0) {
1115 1114 mac_promisc_remove(ps->ps_phd);
1116 1115 ps->ps_phd = 0;
1117 1116 }
1118 1117
1119 1118 if (ps->ps_mch != 0) {
1120 1119 mac_client_close(ps->ps_mch, 0);
1121 1120 ps->ps_mch = 0;
1122 1121 }
1123 1122
1124 1123 if (ps->ps_mh != 0) {
1125 1124 mac_close(ps->ps_mh);
1126 1125 ps->ps_mh = 0;
1127 1126 }
1128 1127
1129 1128 kmem_free(ps, sizeof (*ps));
1130 1129
1131 1130 return (0);
1132 1131 }
1133 1132
1134 1133 /* ************************************************************************* */
1135 1134
1136 1135 /*
1137 1136 * Given a pointer (arg) to a "struct ifreq" (potentially in user space),
1138 1137 * determine the linkid for the interface name stored in that structure.
1139 1138 * name is used as a buffer so that we can ensure a trailing \0 is appended
1140 1139 * to the name safely.
1141 1140 */
1142 1141 static int
1143 1142 pfp_ifreq_getlinkid(intptr_t arg, struct ifreq *ifreqp,
1144 1143 datalink_id_t *linkidp, int mode)
1145 1144 {
1146 1145 char name[IFNAMSIZ + 1];
1147 1146 int error;
1148 1147
1149 1148 if (ddi_copyin((void *)arg, ifreqp, sizeof (*ifreqp), mode) != 0)
1150 1149 return (EFAULT);
1151 1150
1152 1151 (void) strlcpy(name, ifreqp->ifr_name, sizeof (name));
1153 1152
1154 1153 error = dls_mgmt_get_linkid(name, linkidp);
1155 1154 if (error != 0)
1156 1155 error = dls_devnet_macname2linkid(name, linkidp);
1157 1156
1158 1157 return (error);
1159 1158 }
1160 1159
1161 1160 /*
1162 1161 * Given a pointer (arg) to a "struct lifreq" (potentially in user space),
1163 1162 * determine the linkid for the interface name stored in that structure.
1164 1163 * name is used as a buffer so that we can ensure a trailing \0 is appended
1165 1164 * to the name safely.
1166 1165 */
1167 1166 static int
1168 1167 pfp_lifreq_getlinkid(intptr_t arg, struct lifreq *lifreqp,
1169 1168 datalink_id_t *linkidp, int mode)
1170 1169 {
1171 1170 char name[LIFNAMSIZ + 1];
1172 1171 int error;
1173 1172
1174 1173 if (ddi_copyin((void *)arg, lifreqp, sizeof (*lifreqp), mode) != 0)
1175 1174 return (EFAULT);
1176 1175
1177 1176 (void) strlcpy(name, lifreqp->lifr_name, sizeof (name));
1178 1177
1179 1178 error = dls_mgmt_get_linkid(name, linkidp);
1180 1179 if (error != 0)
1181 1180 error = dls_devnet_macname2linkid(name, linkidp);
1182 1181
1183 1182 return (error);
1184 1183 }
1185 1184
1186 1185 /*
1187 1186 * Although there are several new SOL_PACKET options that can be set and
1188 1187 * are specific to this implementation of PF_PACKET, the current API does
1189 1188 * not support doing a get on them to retrieve accompanying status. Thus
1190 1189 * it is only currently possible to use SOL_PACKET with getsockopt to
1191 1190 * retrieve statistical information. This remains consistant with the
1192 1191 * Linux API at the time of writing.
1193 1192 */
1194 1193 static int
1195 1194 pfp_getpacket_sockopt(sock_lower_handle_t handle, int option_name,
1196 1195 void *optval, socklen_t *optlenp)
1197 1196 {
1198 1197 struct pfpsock *ps;
1199 1198 struct tpacket_stats_short tpss;
1200 1199 int error = 0;
1201 1200
1202 1201 ps = (struct pfpsock *)handle;
1203 1202
1204 1203 switch (option_name) {
1205 1204 case PACKET_STATISTICS :
1206 1205 if (*optlenp < sizeof (ps->ps_stats)) {
1207 1206 error = EINVAL;
1208 1207 break;
1209 1208 }
1210 1209 *optlenp = sizeof (ps->ps_stats);
1211 1210 bcopy(&ps->ps_stats, optval, sizeof (ps->ps_stats));
1212 1211 break;
1213 1212 case PACKET_STATISTICS_SHORT :
1214 1213 if (*optlenp < sizeof (tpss)) {
1215 1214 error = EINVAL;
1216 1215 break;
1217 1216 }
1218 1217 *optlenp = sizeof (tpss);
1219 1218 tpss.tp_packets = ps->ps_stats.tp_packets;
1220 1219 tpss.tp_drops = ps->ps_stats.tp_drops;
1221 1220 bcopy(&tpss, optval, sizeof (tpss));
1222 1221 break;
1223 1222 default :
1224 1223 error = EINVAL;
1225 1224 break;
1226 1225 }
1227 1226
1228 1227 return (error);
1229 1228 }
1230 1229
1231 1230 /*
1232 1231 * The SOL_PACKET level for socket options supports three options,
1233 1232 * PACKET_ADD_MEMBERSHIP, PACKET_DROP_MEMBERSHIP and PACKET_AUXDATA.
1234 1233 * This function is responsible for mapping the two socket options
1235 1234 * that manage multicast membership into the appropriate internal
1236 1235 * function calls to bring the option into effect. Whilst direct
1237 1236 * changes to the multicast membership (ADD/DROP) groups is handled
1238 1237 * by calls directly into the mac module, changes to the promiscuos
1239 1238 * mode are vectored through pfp_set_promisc() so that the logic for
1240 1239 * managing the promiscuous mode is in one place.
1241 1240 */
1242 1241 /* ARGSUSED */
1243 1242 static int
1244 1243 pfp_setpacket_sockopt(sock_lower_handle_t handle, int option_name,
1245 1244 const void *optval, socklen_t optlen)
1246 1245 {
1247 1246 struct packet_mreq mreq;
1248 1247 struct pfpsock *ps;
1249 1248 int error = 0;
1250 1249 int opt;
1251 1250
1252 1251 ps = (struct pfpsock *)handle;
1253 1252 if (!ps->ps_bound)
1254 1253 return (EPROTO);
1255 1254
1256 1255 if ((option_name == PACKET_ADD_MEMBERSHIP) ||
1257 1256 (option_name == PACKET_DROP_MEMBERSHIP)) {
1258 1257 if (!ps->ps_bound)
1259 1258 return (EPROTO);
1260 1259 bcopy(optval, &mreq, sizeof (mreq));
1261 1260 if (ps->ps_linkid != mreq.mr_ifindex)
1262 1261 return (EINVAL);
1263 1262 }
1264 1263
1265 1264 switch (option_name) {
1266 1265 case PACKET_ADD_MEMBERSHIP :
1267 1266 switch (mreq.mr_type) {
1268 1267 case PACKET_MR_MULTICAST :
1269 1268 if (mreq.mr_alen != ps->ps_sock.sll_halen)
1270 1269 return (EINVAL);
1271 1270
1272 1271 error = mac_multicast_add(ps->ps_mch, mreq.mr_address);
1273 1272 break;
1274 1273
1275 1274 case PACKET_MR_PROMISC :
1276 1275 error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_ALL);
1277 1276 break;
1278 1277
1279 1278 case PACKET_MR_ALLMULTI :
1280 1279 error = pfp_set_promisc(ps, MAC_CLIENT_PROMISC_MULTI);
1281 1280 break;
1282 1281 }
1283 1282 break;
1284 1283
1285 1284 case PACKET_DROP_MEMBERSHIP :
1286 1285 switch (mreq.mr_type) {
1287 1286 case PACKET_MR_MULTICAST :
1288 1287 if (mreq.mr_alen != ps->ps_sock.sll_halen)
1289 1288 return (EINVAL);
1290 1289
1291 1290 mac_multicast_remove(ps->ps_mch, mreq.mr_address);
1292 1291 break;
1293 1292
1294 1293 case PACKET_MR_PROMISC :
1295 1294 if (ps->ps_promisc != MAC_CLIENT_PROMISC_ALL)
1296 1295 return (EINVAL);
1297 1296 error = pfp_set_promisc(ps,
1298 1297 MAC_CLIENT_PROMISC_FILTERED);
1299 1298 break;
1300 1299
1301 1300 case PACKET_MR_ALLMULTI :
1302 1301 if (ps->ps_promisc != MAC_CLIENT_PROMISC_MULTI)
1303 1302 return (EINVAL);
1304 1303 error = pfp_set_promisc(ps,
1305 1304 MAC_CLIENT_PROMISC_FILTERED);
1306 1305 break;
1307 1306 }
1308 1307 break;
1309 1308
1310 1309 case PACKET_AUXDATA :
1311 1310 if (optlen == sizeof (int)) {
1312 1311 opt = *(int *)optval;
1313 1312 ps->ps_auxdata = (opt != 0);
1314 1313 } else {
1315 1314 error = EINVAL;
1316 1315 }
1317 1316 break;
1318 1317 default :
1319 1318 error = EINVAL;
1320 1319 break;
1321 1320 }
1322 1321
1323 1322 return (error);
1324 1323 }
1325 1324
1326 1325 /*
1327 1326 * There are only two special setsockopt's for SOL_SOCKET with PF_PACKET:
1328 1327 * SO_ATTACH_FILTER and SO_DETACH_FILTER.
1329 1328 *
1330 1329 * Both of these setsockopt values are candidates for being handled by the
1331 1330 * socket layer itself in future, however this requires understanding how
1332 1331 * they would interact with all other sockets.
1333 1332 */
1334 1333 static int
1335 1334 pfp_setsocket_sockopt(sock_lower_handle_t handle, int option_name,
1336 1335 const void *optval, socklen_t optlen)
1337 1336 {
1338 1337 struct bpf_program prog;
1339 1338 struct bpf_insn *fcode;
1340 1339 struct pfpsock *ps;
1341 1340 struct sock_proto_props sopp;
1342 1341 int error = 0;
1343 1342 int size;
1344 1343
1345 1344 ps = (struct pfpsock *)handle;
1346 1345
1347 1346 switch (option_name) {
1348 1347 case SO_ATTACH_FILTER :
1349 1348 #ifdef _LP64
1350 1349 if (optlen == sizeof (struct bpf_program32)) {
1351 1350 struct bpf_program32 prog32;
1352 1351
1353 1352 bcopy(optval, &prog32, sizeof (prog32));
1354 1353 prog.bf_len = prog32.bf_len;
1355 1354 prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
1356 1355 } else
1357 1356 #endif
1358 1357 if (optlen == sizeof (struct bpf_program)) {
1359 1358 bcopy(optval, &prog, sizeof (prog));
1360 1359 } else if (optlen != sizeof (struct bpf_program)) {
1361 1360 return (EINVAL);
1362 1361 }
1363 1362 if (prog.bf_len > BPF_MAXINSNS)
1364 1363 return (EINVAL);
1365 1364
1366 1365 size = prog.bf_len * sizeof (*prog.bf_insns);
1367 1366 fcode = kmem_alloc(size, KM_SLEEP);
1368 1367 if (ddi_copyin(prog.bf_insns, fcode, size, 0) != 0) {
1369 1368 kmem_free(fcode, size);
1370 1369 return (EFAULT);
1371 1370 }
1372 1371
1373 1372 if (bpf_validate(fcode, (int)prog.bf_len)) {
1374 1373 rw_enter(&ps->ps_bpflock, RW_WRITER);
1375 1374 pfp_release_bpf(ps);
1376 1375 ps->ps_bpf.bf_insns = fcode;
1377 1376 ps->ps_bpf.bf_len = size;
1378 1377 rw_exit(&ps->ps_bpflock);
1379 1378
1380 1379 return (0);
1381 1380 }
1382 1381 kmem_free(fcode, size);
1383 1382 error = EINVAL;
1384 1383 break;
1385 1384
1386 1385 case SO_DETACH_FILTER :
1387 1386 pfp_release_bpf(ps);
1388 1387 break;
1389 1388
1390 1389 case SO_RCVBUF :
1391 1390 size = *(int32_t *)optval;
1392 1391 if (size > sockmod_pfp_rcvbuf_max || size < 0)
1393 1392 return (ENOBUFS);
1394 1393 sopp.sopp_flags = SOCKOPT_RCVHIWAT;
1395 1394 sopp.sopp_rxhiwat = size;
1396 1395 ps->ps_upcalls->su_set_proto_props(ps->ps_upper, &sopp);
1397 1396 ps->ps_rcvbuf = size;
1398 1397 break;
1399 1398
1400 1399 default :
1401 1400 error = ENOPROTOOPT;
1402 1401 break;
1403 1402 }
1404 1403
1405 1404 return (error);
1406 1405 }
1407 1406
1408 1407 /*
1409 1408 * pfp_open_index is an internal function used to open a MAC device by
1410 1409 * its index. Both a mac_handle_t and mac_client_handle_t are acquired
1411 1410 * because some of the interfaces provided by the mac layer require either
1412 1411 * only the mac_handle_t or both it and mac_handle_t.
1413 1412 *
1414 1413 * Whilst inside the kernel we can access data structures supporting any
1415 1414 * zone, access to interfaces from non-global zones is restricted to those
1416 1415 * interfaces (if any) that are exclusively assigned to a zone.
1417 1416 */
1418 1417 static int
1419 1418 pfp_open_index(int index, mac_handle_t *mhp, mac_client_handle_t *mcip,
1420 1419 cred_t *cred)
1421 1420 {
1422 1421 mac_client_handle_t mch;
1423 1422 zoneid_t ifzoneid;
1424 1423 mac_handle_t mh;
1425 1424 zoneid_t zoneid;
1426 1425 int error;
1427 1426
1428 1427 mh = 0;
1429 1428 mch = 0;
1430 1429 error = mac_open_by_linkid(index, &mh);
1431 1430 if (error != 0)
1432 1431 goto bad_open;
1433 1432
1434 1433 error = mac_client_open(mh, &mch, NULL,
1435 1434 MAC_OPEN_FLAGS_USE_DATALINK_NAME);
1436 1435 if (error != 0)
1437 1436 goto bad_open;
1438 1437
1439 1438 zoneid = crgetzoneid(cred);
1440 1439 if (zoneid != GLOBAL_ZONEID) {
1441 1440 mac_perim_handle_t perim;
1442 1441
1443 1442 mac_perim_enter_by_mh(mh, &perim);
1444 1443 error = dls_link_getzid(mac_name(mh), &ifzoneid);
1445 1444 mac_perim_exit(perim);
1446 1445 if (error != 0)
1447 1446 goto bad_open;
1448 1447 if (ifzoneid != zoneid) {
1449 1448 error = EACCES;
1450 1449 goto bad_open;
1451 1450 }
1452 1451 }
1453 1452
1454 1453 *mcip = mch;
1455 1454 *mhp = mh;
1456 1455
1457 1456 return (0);
1458 1457 bad_open:
1459 1458 if (mch != 0)
1460 1459 mac_client_close(mch, 0);
1461 1460 if (mh != 0)
1462 1461 mac_close(mh);
1463 1462 return (error);
1464 1463 }
1465 1464
1466 1465 static void
1467 1466 pfp_close(mac_handle_t mh, mac_client_handle_t mch)
1468 1467 {
1469 1468 mac_client_close(mch, 0);
1470 1469 mac_close(mh);
1471 1470 }
1472 1471
1473 1472 /*
1474 1473 * The purpose of this function is to provide a single place where we free
1475 1474 * the loaded BPF program and reset all pointers/counters associated with
1476 1475 * it.
1477 1476 */
1478 1477 static void
1479 1478 pfp_release_bpf(struct pfpsock *ps)
1480 1479 {
1481 1480 if (ps->ps_bpf.bf_len != 0) {
1482 1481 kmem_free(ps->ps_bpf.bf_insns, ps->ps_bpf.bf_len);
1483 1482 ps->ps_bpf.bf_len = 0;
1484 1483 ps->ps_bpf.bf_insns = NULL;
1485 1484 }
1486 1485 }
1487 1486
1488 1487 /*
1489 1488 * Set the promiscuous mode of a network interface.
1490 1489 * This function only calls the mac layer when there is a change to the
1491 1490 * status of a network interface's promiscous mode. Tracking of how many
1492 1491 * sockets have the network interface in promiscuous mode, and thus the
1493 1492 * control over the physical device's status, is left to the mac layer.
1494 1493 */
1495 1494 static int
1496 1495 pfp_set_promisc(struct pfpsock *ps, mac_client_promisc_type_t turnon)
1497 1496 {
1498 1497 int error = 0;
1499 1498 int flags;
1500 1499
1501 1500 /*
1502 1501 * There are 4 combinations of turnon/ps_promisc.
1503 1502 * This if handles 2 (both false, both true) and the if() below
1504 1503 * handles the remaining one - when change is required.
1505 1504 */
1506 1505 if (turnon == ps->ps_promisc)
1507 1506 return (error);
1508 1507
1509 1508 if (ps->ps_phd != 0) {
1510 1509 mac_promisc_remove(ps->ps_phd);
1511 1510 ps->ps_phd = 0;
1512 1511
1513 1512 /*
1514 1513 * ps_promisc is set here in case the call to mac_promisc_add
1515 1514 * fails: leaving it to indicate that the interface is still
1516 1515 * in some sort of promiscuous mode is false.
1517 1516 */
1518 1517 if (ps->ps_promisc != MAC_CLIENT_PROMISC_FILTERED) {
1519 1518 ps->ps_promisc = MAC_CLIENT_PROMISC_FILTERED;
1520 1519 flags = MAC_PROMISC_FLAGS_NO_PHYS;
1521 1520 } else {
1522 1521 flags = 0;
1523 1522 }
1524 1523 flags |= MAC_PROMISC_FLAGS_VLAN_TAG_STRIP;
1525 1524 }
1526 1525
1527 1526 error = mac_promisc_add(ps->ps_mch, turnon, pfp_packet, ps,
1528 1527 &ps->ps_phd, flags);
1529 1528 if (error == 0)
1530 1529 ps->ps_promisc = turnon;
1531 1530
1532 1531 return (error);
1533 1532 }
1534 1533
1535 1534 /*
1536 1535 * This table maps the MAC types in Solaris to the ARPHRD_* values used
1537 1536 * on Linux. This is used with the SIOCGIFHWADDR/SIOCGLIFHWADDR ioctl.
1538 1537 *
1539 1538 * The symbols in this table are *not* pulled in from <net/if_arp.h>,
1540 1539 * they are pulled from <netpacket/packet.h>, thus it acts as a source
1541 1540 * of supplementary information to the ARP table.
1542 1541 */
1543 1542 static uint_t arphrd_to_dl[][2] = {
1544 1543 { ARPHRD_IEEE80211, DL_WIFI },
1545 1544 { ARPHRD_TUNNEL, DL_IPV4 },
1546 1545 { ARPHRD_TUNNEL, DL_IPV6 },
1547 1546 { ARPHRD_TUNNEL, DL_6TO4 },
1548 1547 { ARPHRD_AX25, DL_X25 },
1549 1548 { ARPHRD_ATM, DL_ATM },
1550 1549 { 0, 0 }
1551 1550 };
1552 1551
1553 1552 static int
1554 1553 pfp_dl_to_arphrd(int dltype)
1555 1554 {
1556 1555 int i;
1557 1556
1558 1557 for (i = 0; arphrd_to_dl[i][0] != 0; i++)
1559 1558 if (arphrd_to_dl[i][1] == dltype)
1560 1559 return (arphrd_to_dl[i][0]);
1561 1560 return (arp_hw_type(dltype));
1562 1561 }
↓ open down ↓ |
1369 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX