Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/inet/ipnet/ipnet.c
+++ new/usr/src/uts/common/inet/ipnet/ipnet.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * The ipnet device defined here provides access to packets at the IP layer. To
29 29 * provide access to packets at this layer it registers a callback function in
30 30 * the ip module and when there are open instances of the device ip will pass
31 31 * packets into the device. Packets from ip are passed on the input, output and
32 32 * loopback paths. Internally the module returns to ip as soon as possible by
33 33 * deferring processing using a taskq.
34 34 *
35 35 * Management of the devices in /dev/ipnet/ is handled by the devname
36 36 * filesystem and use of the neti interfaces. This module registers for NIC
37 37 * events using the neti framework so that when IP interfaces are bought up,
38 38 * taken down etc. the ipnet module is notified and its view of the interfaces
39 39 * configured on the system adjusted. On attach, the module gets an initial
40 40 * view of the system again using the neti framework but as it has already
41 41 * registered for IP interface events, it is still up-to-date with any changes.
42 42 */
43 43
44 44 #include <sys/types.h>
45 45 #include <sys/conf.h>
46 46 #include <sys/cred.h>
47 47 #include <sys/stat.h>
48 48 #include <sys/ddi.h>
49 49 #include <sys/sunddi.h>
50 50 #include <sys/modctl.h>
51 51 #include <sys/dlpi.h>
52 52 #include <sys/strsun.h>
53 53 #include <sys/id_space.h>
54 54 #include <sys/kmem.h>
55 55 #include <sys/mkdev.h>
56 56 #include <sys/neti.h>
57 57 #include <net/if.h>
58 58 #include <sys/errno.h>
59 59 #include <sys/list.h>
60 60 #include <sys/ksynch.h>
61 61 #include <sys/hook_event.h>
62 62 #include <sys/sdt.h>
63 63 #include <sys/stropts.h>
64 64 #include <sys/sysmacros.h>
65 65 #include <inet/ip.h>
66 66 #include <inet/ip_if.h>
67 67 #include <inet/ip_multi.h>
68 68 #include <inet/ip6.h>
69 69 #include <inet/ipnet.h>
70 70 #include <net/bpf.h>
71 71 #include <net/bpfdesc.h>
72 72 #include <net/dlt.h>
73 73
74 74 static struct module_info ipnet_minfo = {
75 75 1, /* mi_idnum */
76 76 "ipnet", /* mi_idname */
77 77 0, /* mi_minpsz */
78 78 INFPSZ, /* mi_maxpsz */
79 79 2048, /* mi_hiwat */
80 80 0 /* mi_lowat */
81 81 };
82 82
83 83 /*
84 84 * List to hold static view of ipnetif_t's on the system. This is needed to
85 85 * avoid holding the lock protecting the avl tree of ipnetif's over the
86 86 * callback into the dev filesystem.
87 87 */
88 88 typedef struct ipnetif_cbdata {
89 89 char ic_ifname[LIFNAMSIZ];
90 90 dev_t ic_dev;
91 91 list_node_t ic_next;
92 92 } ipnetif_cbdata_t;
93 93
94 94 /*
95 95 * Convenience enumerated type for ipnet_accept(). It describes the
96 96 * properties of a given ipnet_addrp_t relative to a single ipnet_t
97 97 * client stream. The values represent whether the address is ...
98 98 */
99 99 typedef enum {
100 100 IPNETADDR_MYADDR, /* an address on my ipnetif_t. */
101 101 IPNETADDR_MBCAST, /* a multicast or broadcast address. */
102 102 IPNETADDR_UNKNOWN /* none of the above. */
103 103 } ipnet_addrtype_t;
104 104
105 105 /* Argument used for the ipnet_nicevent_taskq callback. */
106 106 typedef struct ipnet_nicevent_s {
107 107 nic_event_t ipne_event;
108 108 net_handle_t ipne_protocol;
109 109 netstackid_t ipne_stackid;
110 110 uint64_t ipne_ifindex;
111 111 uint64_t ipne_lifindex;
112 112 char ipne_ifname[LIFNAMSIZ];
113 113 } ipnet_nicevent_t;
114 114
115 115 static dev_info_t *ipnet_dip;
116 116 static major_t ipnet_major;
117 117 static ddi_taskq_t *ipnet_taskq; /* taskq for packets */
118 118 static ddi_taskq_t *ipnet_nicevent_taskq; /* taskq for NIC events */
119 119 static id_space_t *ipnet_minor_space;
120 120 static const int IPNET_MINOR_LO = 1; /* minor number for /dev/lo0 */
121 121 static const int IPNET_MINOR_MIN = 2; /* start of dynamic minors */
122 122 static dl_info_ack_t ipnet_infoack = IPNET_INFO_ACK_INIT;
123 123 static ipnet_acceptfn_t ipnet_accept, ipnet_loaccept;
124 124 static bpf_itap_fn_t ipnet_itap;
125 125
126 126 static void ipnet_input(mblk_t *);
127 127 static int ipnet_wput(queue_t *, mblk_t *);
128 128 static int ipnet_rsrv(queue_t *);
129 129 static int ipnet_open(queue_t *, dev_t *, int, int, cred_t *);
130 130 static int ipnet_close(queue_t *);
131 131 static void ipnet_ioctl(queue_t *, mblk_t *);
132 132 static void ipnet_iocdata(queue_t *, mblk_t *);
133 133 static void ipnet_wputnondata(queue_t *, mblk_t *);
134 134 static int ipnet_attach(dev_info_t *, ddi_attach_cmd_t);
135 135 static int ipnet_detach(dev_info_t *, ddi_detach_cmd_t);
136 136 static int ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
137 137 static void ipnet_inforeq(queue_t *q, mblk_t *mp);
138 138 static void ipnet_bindreq(queue_t *q, mblk_t *mp);
139 139 static void ipnet_unbindreq(queue_t *q, mblk_t *mp);
140 140 static void ipnet_dlpromisconreq(queue_t *q, mblk_t *mp);
141 141 static void ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp);
142 142 static int ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *);
143 143 static void ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *);
144 144 static int ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *);
145 145 static void ipnet_nicevent_task(void *);
146 146 static ipnetif_t *ipnetif_create(const char *, uint64_t, ipnet_stack_t *,
147 147 uint64_t);
148 148 static void ipnetif_remove(ipnetif_t *, ipnet_stack_t *);
149 149 static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t);
150 150 static ipnetif_t *ipnetif_getby_index(uint64_t, ipnet_stack_t *);
151 151 static ipnetif_t *ipnetif_getby_dev(dev_t, ipnet_stack_t *);
152 152 static boolean_t ipnetif_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *);
153 153 static void ipnetif_zonecheck(ipnetif_t *, ipnet_stack_t *);
154 154 static int ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t);
155 155 static int ipnetif_compare_name(const void *, const void *);
156 156 static int ipnetif_compare_name_zone(const void *, const void *);
157 157 static int ipnetif_compare_index(const void *, const void *);
158 158 static void ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t);
159 159 static void ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t);
160 160 static void ipnetif_refhold(ipnetif_t *);
161 161 static void ipnetif_refrele(ipnetif_t *);
162 162 static void ipnet_walkers_inc(ipnet_stack_t *);
163 163 static void ipnet_walkers_dec(ipnet_stack_t *);
164 164 static void ipnet_register_netihook(ipnet_stack_t *);
165 165 static void *ipnet_stack_init(netstackid_t, netstack_t *);
166 166 static void ipnet_stack_fini(netstackid_t, void *);
167 167 static void ipnet_dispatch(void *);
168 168 static int ipobs_bounce_func(hook_event_token_t, hook_data_t, void *);
169 169 static int ipnet_bpf_bounce(hook_event_token_t, hook_data_t, void *);
170 170 static ipnetif_t *ipnetif_clone_create(ipnetif_t *, zoneid_t);
171 171 static void ipnetif_clone_release(ipnetif_t *);
172 172
173 173 static struct qinit ipnet_rinit = {
174 174 NULL, /* qi_putp */
175 175 ipnet_rsrv, /* qi_srvp */
176 176 ipnet_open, /* qi_qopen */
177 177 ipnet_close, /* qi_qclose */
178 178 NULL, /* qi_qadmin */
179 179 &ipnet_minfo, /* qi_minfo */
180 180 };
181 181
182 182 static struct qinit ipnet_winit = {
183 183 ipnet_wput, /* qi_putp */
184 184 NULL, /* qi_srvp */
185 185 NULL, /* qi_qopen */
186 186 NULL, /* qi_qclose */
187 187 NULL, /* qi_qadmin */
188 188 &ipnet_minfo, /* qi_minfo */
189 189 };
190 190
191 191 static struct streamtab ipnet_info = {
192 192 &ipnet_rinit, &ipnet_winit
193 193 };
194 194
195 195 DDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach,
↓ open down ↓ |
195 lines elided |
↑ open up ↑ |
196 196 ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info,
197 197 ddi_quiesce_not_supported);
198 198
199 199 static struct modldrv modldrv = {
200 200 &mod_driverops,
201 201 "STREAMS ipnet driver",
202 202 &ipnet_ops
203 203 };
204 204
205 205 static struct modlinkage modlinkage = {
206 - MODREV_1, &modldrv, NULL
206 + MODREV_1, { &modldrv, NULL }
207 207 };
208 208
209 209 /*
210 210 * This structure contains the template data (names and type) that is
211 211 * copied, in bulk, into the new kstats structure created by net_kstat_create.
212 212 * No actual statistical information is stored in this instance of the
213 213 * ipnet_kstats_t structure.
214 214 */
215 215 static ipnet_kstats_t stats_template = {
216 216 { "duplicationFail", KSTAT_DATA_UINT64 },
217 217 { "dispatchOk", KSTAT_DATA_UINT64 },
218 218 { "dispatchFail", KSTAT_DATA_UINT64 },
219 219 { "dispatchHeaderDrop", KSTAT_DATA_UINT64 },
220 220 { "dispatchDupDrop", KSTAT_DATA_UINT64 },
221 221 { "dispatchDeliver", KSTAT_DATA_UINT64 },
222 222 { "acceptOk", KSTAT_DATA_UINT64 },
223 223 { "acceptFail", KSTAT_DATA_UINT64 }
224 224 };
225 225
226 226 /*
227 227 * Walk the list of physical interfaces on the machine, for each
228 228 * interface create a new ipnetif_t and add any addresses to it. We
229 229 * need to do the walk twice, once for IPv4 and once for IPv6.
230 230 *
231 231 * The interfaces are destroyed as part of ipnet_stack_fini() for each
232 232 * stack. Note that we cannot do this initialization in
233 233 * ipnet_stack_init(), since ipnet_stack_init() cannot fail.
234 234 */
235 235 static int
236 236 ipnetif_init(void)
237 237 {
238 238 netstack_handle_t nh;
239 239 netstack_t *ns;
240 240 ipnet_stack_t *ips;
241 241 int ret = 0;
242 242
243 243 netstack_next_init(&nh);
244 244 while ((ns = netstack_next(&nh)) != NULL) {
245 245 ips = ns->netstack_ipnet;
246 246 if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) == 0)
247 247 ret = ipnet_populate_if(ips->ips_ndv6, ips, B_TRUE);
248 248 netstack_rele(ns);
249 249 if (ret != 0)
250 250 break;
251 251 }
252 252 netstack_next_fini(&nh);
253 253 return (ret);
254 254 }
255 255
256 256 /*
257 257 * Standard module entry points.
258 258 */
259 259 int
260 260 _init(void)
261 261 {
262 262 int ret;
263 263 boolean_t netstack_registered = B_FALSE;
264 264
265 265 if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1)
266 266 return (ENODEV);
267 267 ipnet_minor_space = id_space_create("ipnet_minor_space",
268 268 IPNET_MINOR_MIN, MAXMIN32);
269 269
270 270 /*
271 271 * We call ddi_taskq_create() with nthread == 1 to ensure in-order
272 272 * delivery of packets to clients. Note that we need to create the
273 273 * taskqs before calling netstack_register() since ipnet_stack_init()
274 274 * registers callbacks that use 'em.
275 275 */
276 276 ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0);
277 277 ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue",
278 278 1, TASKQ_DEFAULTPRI, 0);
279 279 if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) {
280 280 ret = ENOMEM;
281 281 goto done;
282 282 }
283 283
284 284 netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini);
285 285 netstack_registered = B_TRUE;
286 286
287 287 if ((ret = ipnetif_init()) == 0)
288 288 ret = mod_install(&modlinkage);
289 289 done:
290 290 if (ret != 0) {
291 291 if (ipnet_taskq != NULL)
292 292 ddi_taskq_destroy(ipnet_taskq);
293 293 if (ipnet_nicevent_taskq != NULL)
294 294 ddi_taskq_destroy(ipnet_nicevent_taskq);
295 295 if (netstack_registered)
296 296 netstack_unregister(NS_IPNET);
297 297 id_space_destroy(ipnet_minor_space);
298 298 }
299 299 return (ret);
300 300 }
301 301
302 302 int
303 303 _fini(void)
304 304 {
305 305 int err;
306 306
307 307 if ((err = mod_remove(&modlinkage)) != 0)
308 308 return (err);
309 309
310 310 netstack_unregister(NS_IPNET);
311 311 ddi_taskq_destroy(ipnet_nicevent_taskq);
312 312 ddi_taskq_destroy(ipnet_taskq);
313 313 id_space_destroy(ipnet_minor_space);
314 314 return (0);
315 315 }
316 316
317 317 int
318 318 _info(struct modinfo *modinfop)
319 319 {
320 320 return (mod_info(&modlinkage, modinfop));
321 321 }
322 322
323 323 static void
324 324 ipnet_register_netihook(ipnet_stack_t *ips)
325 325 {
326 326 int ret;
327 327 zoneid_t zoneid;
328 328 netid_t netid;
329 329
330 330 HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents",
331 331 ips);
332 332
333 333 /*
334 334 * It is possible for an exclusive stack to be in the process of
335 335 * shutting down here, and the netid and protocol lookups could fail
336 336 * in that case.
337 337 */
338 338 zoneid = netstackid_to_zoneid(ips->ips_netstack->netstack_stackid);
339 339 if ((netid = net_zoneidtonetid(zoneid)) == -1)
340 340 return;
341 341
342 342 if ((ips->ips_ndv4 = net_protocol_lookup(netid, NHF_INET)) != NULL) {
343 343 if ((ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS,
344 344 ips->ips_nicevents)) != 0) {
345 345 VERIFY(net_protocol_release(ips->ips_ndv4) == 0);
346 346 ips->ips_ndv4 = NULL;
347 347 cmn_err(CE_WARN, "unable to register IPv4 netinfo hooks"
348 348 " in zone %d: %d", zoneid, ret);
349 349 }
350 350 }
351 351 if ((ips->ips_ndv6 = net_protocol_lookup(netid, NHF_INET6)) != NULL) {
352 352 if ((ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS,
353 353 ips->ips_nicevents)) != 0) {
354 354 VERIFY(net_protocol_release(ips->ips_ndv6) == 0);
355 355 ips->ips_ndv6 = NULL;
356 356 cmn_err(CE_WARN, "unable to register IPv6 netinfo hooks"
357 357 " in zone %d: %d", zoneid, ret);
358 358 }
359 359 }
360 360
361 361 /*
362 362 * Create a local set of kstats for each zone.
363 363 */
364 364 ips->ips_kstatp = net_kstat_create(netid, "ipnet", 0, "ipnet_stats",
365 365 "misc", KSTAT_TYPE_NAMED,
366 366 sizeof (ipnet_kstats_t) / sizeof (kstat_named_t), 0);
367 367 if (ips->ips_kstatp != NULL) {
368 368 bcopy(&stats_template, &ips->ips_stats,
369 369 sizeof (ips->ips_stats));
370 370 ips->ips_kstatp->ks_data = &ips->ips_stats;
371 371 ips->ips_kstatp->ks_private =
372 372 (void *)(uintptr_t)ips->ips_netstack->netstack_stackid;
373 373 kstat_install(ips->ips_kstatp);
374 374 } else {
375 375 cmn_err(CE_WARN, "net_kstat_create(%s,%s,%s) failed",
376 376 "ipnet", "ipnet_stats", "misc");
377 377 }
378 378 }
379 379
380 380 /*
381 381 * This function is called on attach to build an initial view of the
382 382 * interfaces on the system. It will be called once for IPv4 and once
383 383 * for IPv6, although there is only one ipnet interface for both IPv4
384 384 * and IPv6 there are separate address lists.
385 385 */
386 386 static int
387 387 ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6)
388 388 {
389 389 phy_if_t phyif;
390 390 lif_if_t lif;
391 391 ipnetif_t *ipnetif;
392 392 char name[LIFNAMSIZ];
393 393 boolean_t new_if = B_FALSE;
394 394 uint64_t ifflags;
395 395 int ret = 0;
396 396
397 397 /*
398 398 * If ipnet_register_netihook() was unable to initialize this
399 399 * stack's net_handle_t, then we cannot populate any interface
400 400 * information. This usually happens when we attempted to
401 401 * grab a net_handle_t as a stack was shutting down. We don't
402 402 * want to fail the entire _init() operation because of a
403 403 * stack shutdown (other stacks will continue to work just
404 404 * fine), so we silently return success here.
405 405 */
406 406 if (nd == NULL)
407 407 return (0);
408 408
409 409 /*
410 410 * Make sure we're not processing NIC events during the
411 411 * population of our interfaces and address lists.
412 412 */
413 413 mutex_enter(&ips->ips_event_lock);
414 414
415 415 for (phyif = net_phygetnext(nd, 0); phyif != 0;
416 416 phyif = net_phygetnext(nd, phyif)) {
417 417 if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0)
418 418 continue;
419 419 ifflags = 0;
420 420 (void) net_getlifflags(nd, phyif, 0, &ifflags);
421 421 if ((ipnetif = ipnetif_getby_index(phyif, ips)) == NULL) {
422 422 ipnetif = ipnetif_create(name, phyif, ips, ifflags);
423 423 if (ipnetif == NULL) {
424 424 ret = ENOMEM;
425 425 goto done;
426 426 }
427 427 new_if = B_TRUE;
428 428 }
429 429 ipnetif->if_flags |=
430 430 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED;
431 431
432 432 for (lif = net_lifgetnext(nd, phyif, 0); lif != 0;
433 433 lif = net_lifgetnext(nd, phyif, lif)) {
434 434 /*
435 435 * Skip addresses that aren't up. We'll add
436 436 * them when we receive an NE_LIF_UP event.
437 437 */
438 438 if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 ||
439 439 !(ifflags & IFF_UP))
440 440 continue;
441 441 /* Don't add it if we already have it. */
442 442 if (ipnet_match_lif(ipnetif, lif, isv6) != NULL)
443 443 continue;
444 444 ipnet_add_ifaddr(lif, ipnetif, nd);
445 445 }
446 446 if (!new_if)
447 447 ipnetif_refrele(ipnetif);
448 448 }
449 449
450 450 done:
451 451 mutex_exit(&ips->ips_event_lock);
452 452 return (ret);
453 453 }
454 454
455 455 static int
456 456 ipnet_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
457 457 {
458 458 if (cmd != DDI_ATTACH)
459 459 return (DDI_FAILURE);
460 460
461 461 if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO,
462 462 DDI_PSEUDO, 0) == DDI_FAILURE)
463 463 return (DDI_FAILURE);
464 464
465 465 ipnet_dip = dip;
466 466 return (DDI_SUCCESS);
467 467 }
468 468
469 469 static int
470 470 ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
471 471 {
472 472 if (cmd != DDI_DETACH)
473 473 return (DDI_FAILURE);
474 474
475 475 ASSERT(dip == ipnet_dip);
476 476 ddi_remove_minor_node(ipnet_dip, NULL);
477 477 ipnet_dip = NULL;
478 478 return (DDI_SUCCESS);
479 479 }
480 480
481 481 /* ARGSUSED */
482 482 static int
483 483 ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
484 484 {
485 485 int error = DDI_FAILURE;
486 486
487 487 switch (infocmd) {
488 488 case DDI_INFO_DEVT2INSTANCE:
489 489 *result = (void *)0;
490 490 error = DDI_SUCCESS;
491 491 break;
492 492 case DDI_INFO_DEVT2DEVINFO:
493 493 if (ipnet_dip != NULL) {
494 494 *result = ipnet_dip;
495 495 error = DDI_SUCCESS;
496 496 }
497 497 break;
498 498 }
499 499 return (error);
500 500 }
501 501
502 502 /* ARGSUSED */
503 503 static int
504 504 ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp)
505 505 {
506 506 ipnet_t *ipnet;
507 507 netstack_t *ns = NULL;
508 508 ipnet_stack_t *ips;
509 509 int err = 0;
510 510 zoneid_t zoneid = crgetzoneid(crp);
511 511
512 512 /*
513 513 * If the system is labeled, only the global zone is allowed to open
514 514 * IP observability nodes.
515 515 */
516 516 if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
517 517 return (EACCES);
518 518
519 519 /* We don't support open as a module */
520 520 if (sflag & MODOPEN)
521 521 return (ENOTSUP);
522 522
523 523 /* This driver is self-cloning, we don't support re-open. */
524 524 if (rq->q_ptr != NULL)
525 525 return (EBUSY);
526 526
527 527 if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL)
528 528 return (ENOMEM);
529 529
530 530 VERIFY((ns = netstack_find_by_cred(crp)) != NULL);
531 531 ips = ns->netstack_ipnet;
532 532
533 533 rq->q_ptr = WR(rq)->q_ptr = ipnet;
534 534 ipnet->ipnet_rq = rq;
535 535 ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space);
536 536 ipnet->ipnet_zoneid = zoneid;
537 537 ipnet->ipnet_dlstate = DL_UNBOUND;
538 538 ipnet->ipnet_ns = ns;
539 539
540 540 /*
541 541 * We need to hold ips_event_lock here as any NE_LIF_DOWN events need
542 542 * to be processed after ipnet_if is set and the ipnet_t has been
543 543 * inserted in the ips_str_list.
544 544 */
545 545 mutex_enter(&ips->ips_event_lock);
546 546 if (getminor(*dev) == IPNET_MINOR_LO) {
547 547 ipnet->ipnet_flags |= IPNET_LOMODE;
548 548 ipnet->ipnet_acceptfn = ipnet_loaccept;
549 549 } else {
550 550 ipnet->ipnet_acceptfn = ipnet_accept;
551 551 ipnet->ipnet_if = ipnetif_getby_dev(*dev, ips);
552 552 if (ipnet->ipnet_if == NULL ||
553 553 !ipnetif_in_zone(ipnet->ipnet_if, zoneid, ips)) {
554 554 err = ENODEV;
555 555 goto done;
556 556 }
557 557 }
558 558
559 559 mutex_enter(&ips->ips_walkers_lock);
560 560 while (ips->ips_walkers_cnt != 0)
561 561 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
562 562 list_insert_head(&ips->ips_str_list, ipnet);
563 563 *dev = makedevice(getmajor(*dev), ipnet->ipnet_minor);
564 564 qprocson(rq);
565 565
566 566 /*
567 567 * Only register our callback if we're the first open client; we call
568 568 * unregister in close() for the last open client.
569 569 */
570 570 if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list))
571 571 ips->ips_hook = ipobs_register_hook(ns, ipnet_input);
572 572 mutex_exit(&ips->ips_walkers_lock);
573 573
574 574 done:
575 575 mutex_exit(&ips->ips_event_lock);
576 576 if (err != 0) {
577 577 netstack_rele(ns);
578 578 id_free(ipnet_minor_space, ipnet->ipnet_minor);
579 579 if (ipnet->ipnet_if != NULL)
580 580 ipnetif_refrele(ipnet->ipnet_if);
581 581 kmem_free(ipnet, sizeof (*ipnet));
582 582 }
583 583 return (err);
584 584 }
585 585
586 586 static int
587 587 ipnet_close(queue_t *rq)
588 588 {
589 589 ipnet_t *ipnet = rq->q_ptr;
590 590 ipnet_stack_t *ips = ipnet->ipnet_ns->netstack_ipnet;
591 591
592 592 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS)
593 593 ipnet_leave_allmulti(ipnet->ipnet_if, ips);
594 594 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI)
595 595 ipnet_leave_allmulti(ipnet->ipnet_if, ips);
596 596
597 597 mutex_enter(&ips->ips_walkers_lock);
598 598 while (ips->ips_walkers_cnt != 0)
599 599 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock);
600 600
601 601 qprocsoff(rq);
602 602
603 603 list_remove(&ips->ips_str_list, ipnet);
604 604 if (ipnet->ipnet_if != NULL)
605 605 ipnetif_refrele(ipnet->ipnet_if);
606 606 id_free(ipnet_minor_space, ipnet->ipnet_minor);
607 607
608 608 if (list_is_empty(&ips->ips_str_list)) {
609 609 ipobs_unregister_hook(ips->ips_netstack, ips->ips_hook);
610 610 ips->ips_hook = NULL;
611 611 }
612 612
613 613 kmem_free(ipnet, sizeof (*ipnet));
614 614
615 615 mutex_exit(&ips->ips_walkers_lock);
616 616 netstack_rele(ips->ips_netstack);
617 617 return (0);
618 618 }
619 619
620 620 static int
621 621 ipnet_wput(queue_t *q, mblk_t *mp)
622 622 {
623 623 switch (mp->b_datap->db_type) {
624 624 case M_FLUSH:
625 625 if (*mp->b_rptr & FLUSHW) {
626 626 flushq(q, FLUSHDATA);
627 627 *mp->b_rptr &= ~FLUSHW;
628 628 }
629 629 if (*mp->b_rptr & FLUSHR)
630 630 qreply(q, mp);
631 631 else
632 632 freemsg(mp);
633 633 break;
634 634 case M_PROTO:
635 635 case M_PCPROTO:
636 636 ipnet_wputnondata(q, mp);
637 637 break;
638 638 case M_IOCTL:
639 639 ipnet_ioctl(q, mp);
640 640 break;
641 641 case M_IOCDATA:
642 642 ipnet_iocdata(q, mp);
643 643 break;
644 644 default:
645 645 freemsg(mp);
646 646 break;
647 647 }
648 648 return (0);
649 649 }
650 650
651 651 static int
652 652 ipnet_rsrv(queue_t *q)
653 653 {
654 654 mblk_t *mp;
655 655
656 656 while ((mp = getq(q)) != NULL) {
657 657 ASSERT(DB_TYPE(mp) == M_DATA);
658 658 if (canputnext(q)) {
659 659 putnext(q, mp);
660 660 } else {
661 661 (void) putbq(q, mp);
662 662 break;
663 663 }
664 664 }
665 665 return (0);
666 666 }
667 667
668 668 static void
669 669 ipnet_ioctl(queue_t *q, mblk_t *mp)
670 670 {
671 671 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
672 672
673 673 switch (iocp->ioc_cmd) {
674 674 case DLIOCRAW:
675 675 miocack(q, mp, 0, 0);
676 676 break;
677 677 case DLIOCIPNETINFO:
678 678 if (iocp->ioc_count == TRANSPARENT) {
679 679 mcopyin(mp, NULL, sizeof (uint_t), NULL);
680 680 qreply(q, mp);
681 681 break;
682 682 }
683 683 /* Fallthrough, we don't support I_STR with DLIOCIPNETINFO. */
684 684 default:
685 685 miocnak(q, mp, 0, EINVAL);
686 686 break;
687 687 }
688 688 }
689 689
690 690 static void
691 691 ipnet_iocdata(queue_t *q, mblk_t *mp)
692 692 {
693 693 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
694 694 ipnet_t *ipnet = q->q_ptr;
695 695
696 696 switch (iocp->ioc_cmd) {
697 697 case DLIOCIPNETINFO:
698 698 if (*(int *)mp->b_cont->b_rptr == 1)
699 699 ipnet->ipnet_flags |= IPNET_INFO;
700 700 else if (*(int *)mp->b_cont->b_rptr == 0)
701 701 ipnet->ipnet_flags &= ~IPNET_INFO;
702 702 else
703 703 goto iocnak;
704 704 miocack(q, mp, 0, DL_IPNETINFO_VERSION);
705 705 break;
706 706 default:
707 707 iocnak:
708 708 miocnak(q, mp, 0, EINVAL);
709 709 break;
710 710 }
711 711 }
712 712
713 713 static void
714 714 ipnet_wputnondata(queue_t *q, mblk_t *mp)
715 715 {
716 716 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
717 717 t_uscalar_t prim = dlp->dl_primitive;
718 718
719 719 switch (prim) {
720 720 case DL_INFO_REQ:
721 721 ipnet_inforeq(q, mp);
722 722 break;
723 723 case DL_UNBIND_REQ:
724 724 ipnet_unbindreq(q, mp);
725 725 break;
726 726 case DL_BIND_REQ:
727 727 ipnet_bindreq(q, mp);
728 728 break;
729 729 case DL_PROMISCON_REQ:
730 730 ipnet_dlpromisconreq(q, mp);
731 731 break;
732 732 case DL_PROMISCOFF_REQ:
733 733 ipnet_dlpromiscoffreq(q, mp);
734 734 break;
735 735 case DL_UNITDATA_REQ:
736 736 case DL_DETACH_REQ:
737 737 case DL_PHYS_ADDR_REQ:
738 738 case DL_SET_PHYS_ADDR_REQ:
739 739 case DL_ENABMULTI_REQ:
740 740 case DL_DISABMULTI_REQ:
741 741 case DL_ATTACH_REQ:
742 742 dlerrorack(q, mp, prim, DL_UNSUPPORTED, 0);
743 743 break;
744 744 default:
745 745 dlerrorack(q, mp, prim, DL_BADPRIM, 0);
746 746 break;
747 747 }
748 748 }
749 749
750 750 static void
751 751 ipnet_inforeq(queue_t *q, mblk_t *mp)
752 752 {
753 753 dl_info_ack_t *dlip;
754 754 size_t size = sizeof (dl_info_ack_t) + sizeof (ushort_t);
755 755
756 756 if (MBLKL(mp) < DL_INFO_REQ_SIZE) {
757 757 dlerrorack(q, mp, DL_INFO_REQ, DL_BADPRIM, 0);
758 758 return;
759 759 }
760 760
761 761 if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL)
762 762 return;
763 763
764 764 dlip = (dl_info_ack_t *)mp->b_rptr;
765 765 *dlip = ipnet_infoack;
766 766 qreply(q, mp);
767 767 }
768 768
769 769 static void
770 770 ipnet_bindreq(queue_t *q, mblk_t *mp)
771 771 {
772 772 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr;
773 773 ipnet_t *ipnet = q->q_ptr;
774 774
775 775 if (MBLKL(mp) < DL_BIND_REQ_SIZE) {
776 776 dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0);
777 777 return;
778 778 }
779 779
780 780 switch (dlp->bind_req.dl_sap) {
781 781 case 0 :
782 782 ipnet->ipnet_family = AF_UNSPEC;
783 783 break;
784 784 case IPV4_VERSION :
785 785 ipnet->ipnet_family = AF_INET;
786 786 break;
787 787 case IPV6_VERSION :
788 788 ipnet->ipnet_family = AF_INET6;
789 789 break;
790 790 default :
791 791 dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0);
792 792 return;
793 793 /*NOTREACHED*/
794 794 }
795 795
796 796 ipnet->ipnet_dlstate = DL_IDLE;
797 797 dlbindack(q, mp, dlp->bind_req.dl_sap, 0, 0, 0, 0);
798 798 }
799 799
800 800 static void
801 801 ipnet_unbindreq(queue_t *q, mblk_t *mp)
802 802 {
803 803 ipnet_t *ipnet = q->q_ptr;
804 804
805 805 if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) {
806 806 dlerrorack(q, mp, DL_UNBIND_REQ, DL_BADPRIM, 0);
807 807 return;
808 808 }
809 809
810 810 if (ipnet->ipnet_dlstate != DL_IDLE) {
811 811 dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0);
812 812 } else {
813 813 ipnet->ipnet_dlstate = DL_UNBOUND;
814 814 ipnet->ipnet_family = AF_UNSPEC;
815 815 dlokack(q, mp, DL_UNBIND_REQ);
816 816 }
817 817 }
818 818
819 819 static void
820 820 ipnet_dlpromisconreq(queue_t *q, mblk_t *mp)
821 821 {
822 822 ipnet_t *ipnet = q->q_ptr;
823 823 t_uscalar_t level;
824 824 int err;
825 825
826 826 if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) {
827 827 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
828 828 return;
829 829 }
830 830
831 831 if (ipnet->ipnet_flags & IPNET_LOMODE) {
832 832 dlokack(q, mp, DL_PROMISCON_REQ);
833 833 return;
834 834 }
835 835
836 836 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level;
837 837 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) {
838 838 if ((err = ipnet_join_allmulti(ipnet->ipnet_if,
839 839 ipnet->ipnet_ns->netstack_ipnet)) != 0) {
840 840 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_SYSERR, err);
841 841 return;
842 842 }
843 843 }
844 844
845 845 switch (level) {
846 846 case DL_PROMISC_PHYS:
847 847 ipnet->ipnet_flags |= IPNET_PROMISC_PHYS;
848 848 break;
849 849 case DL_PROMISC_SAP:
850 850 ipnet->ipnet_flags |= IPNET_PROMISC_SAP;
851 851 break;
852 852 case DL_PROMISC_MULTI:
853 853 ipnet->ipnet_flags |= IPNET_PROMISC_MULTI;
854 854 break;
855 855 default:
856 856 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0);
857 857 return;
858 858 }
859 859
860 860 dlokack(q, mp, DL_PROMISCON_REQ);
861 861 }
862 862
863 863 static void
864 864 ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp)
865 865 {
866 866 ipnet_t *ipnet = q->q_ptr;
867 867 t_uscalar_t level;
868 868 uint16_t orig_ipnet_flags = ipnet->ipnet_flags;
869 869
870 870 if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) {
871 871 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
872 872 return;
873 873 }
874 874
875 875 if (ipnet->ipnet_flags & IPNET_LOMODE) {
876 876 dlokack(q, mp, DL_PROMISCOFF_REQ);
877 877 return;
878 878 }
879 879
880 880 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level;
881 881 switch (level) {
882 882 case DL_PROMISC_PHYS:
883 883 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS)
884 884 ipnet->ipnet_flags &= ~IPNET_PROMISC_PHYS;
885 885 break;
886 886 case DL_PROMISC_SAP:
887 887 if (ipnet->ipnet_flags & IPNET_PROMISC_SAP)
888 888 ipnet->ipnet_flags &= ~IPNET_PROMISC_SAP;
889 889 break;
890 890 case DL_PROMISC_MULTI:
891 891 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI)
892 892 ipnet->ipnet_flags &= ~IPNET_PROMISC_MULTI;
893 893 break;
894 894 default:
895 895 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0);
896 896 return;
897 897 }
898 898
899 899 if (orig_ipnet_flags == ipnet->ipnet_flags) {
900 900 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0);
901 901 return;
902 902 }
903 903
904 904 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) {
905 905 ipnet_leave_allmulti(ipnet->ipnet_if,
906 906 ipnet->ipnet_ns->netstack_ipnet);
907 907 }
908 908
909 909 dlokack(q, mp, DL_PROMISCOFF_REQ);
910 910 }
911 911
912 912 static int
913 913 ipnet_join_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips)
914 914 {
915 915 int err = 0;
916 916 ip_stack_t *ipst = ips->ips_netstack->netstack_ip;
917 917 uint64_t index = ipnetif->if_index;
918 918
919 919 mutex_enter(&ips->ips_event_lock);
920 920 if (ipnetif->if_multicnt == 0) {
921 921 ASSERT((ipnetif->if_flags &
922 922 (IPNETIF_IPV4ALLMULTI | IPNETIF_IPV6ALLMULTI)) == 0);
923 923 if (ipnetif->if_flags & IPNETIF_IPV4PLUMBED) {
924 924 err = ip_join_allmulti(index, B_FALSE, ipst);
925 925 if (err != 0)
926 926 goto done;
927 927 ipnetif->if_flags |= IPNETIF_IPV4ALLMULTI;
928 928 }
929 929 if (ipnetif->if_flags & IPNETIF_IPV6PLUMBED) {
930 930 err = ip_join_allmulti(index, B_TRUE, ipst);
931 931 if (err != 0 &&
932 932 (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI)) {
933 933 (void) ip_leave_allmulti(index, B_FALSE, ipst);
934 934 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI;
935 935 goto done;
936 936 }
937 937 ipnetif->if_flags |= IPNETIF_IPV6ALLMULTI;
938 938 }
939 939 }
940 940 ipnetif->if_multicnt++;
941 941
942 942 done:
943 943 mutex_exit(&ips->ips_event_lock);
944 944 return (err);
945 945 }
946 946
947 947 static void
948 948 ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips)
949 949 {
950 950 int err;
951 951 ip_stack_t *ipst = ips->ips_netstack->netstack_ip;
952 952 uint64_t index = ipnetif->if_index;
953 953
954 954 mutex_enter(&ips->ips_event_lock);
955 955 ASSERT(ipnetif->if_multicnt != 0);
956 956 if (--ipnetif->if_multicnt == 0) {
957 957 if (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI) {
958 958 err = ip_leave_allmulti(index, B_FALSE, ipst);
959 959 ASSERT(err == 0 || err == ENODEV);
960 960 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI;
961 961 }
962 962 if (ipnetif->if_flags & IPNETIF_IPV6ALLMULTI) {
963 963 err = ip_leave_allmulti(index, B_TRUE, ipst);
964 964 ASSERT(err == 0 || err == ENODEV);
965 965 ipnetif->if_flags &= ~IPNETIF_IPV6ALLMULTI;
966 966 }
967 967 }
968 968 mutex_exit(&ips->ips_event_lock);
969 969 }
970 970
971 971 /*
972 972 * Allocate a new mblk_t and put a dl_ipnetinfo_t in it.
973 973 * The structure it copies the header information from,
974 974 * hook_pkt_observe_t, is constructed using network byte
975 975 * order in ipobs_hook(), so there is no conversion here.
976 976 */
977 977 static mblk_t *
978 978 ipnet_addheader(hook_pkt_observe_t *hdr, mblk_t *mp)
979 979 {
980 980 mblk_t *dlhdr;
981 981 dl_ipnetinfo_t *dl;
982 982
983 983 if ((dlhdr = allocb(sizeof (dl_ipnetinfo_t), BPRI_HI)) == NULL) {
984 984 freemsg(mp);
985 985 return (NULL);
986 986 }
987 987 dl = (dl_ipnetinfo_t *)dlhdr->b_rptr;
988 988 dl->dli_version = DL_IPNETINFO_VERSION;
989 989 dl->dli_family = hdr->hpo_family;
990 990 dl->dli_htype = hdr->hpo_htype;
991 991 dl->dli_pktlen = hdr->hpo_pktlen;
992 992 dl->dli_ifindex = hdr->hpo_ifindex;
993 993 dl->dli_grifindex = hdr->hpo_grifindex;
994 994 dl->dli_zsrc = hdr->hpo_zsrc;
995 995 dl->dli_zdst = hdr->hpo_zdst;
996 996 dlhdr->b_wptr += sizeof (*dl);
997 997 dlhdr->b_cont = mp;
998 998
999 999 return (dlhdr);
1000 1000 }
1001 1001
1002 1002 static ipnet_addrtype_t
1003 1003 ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr)
1004 1004 {
1005 1005 list_t *list;
1006 1006 ipnetif_t *ipnetif = ipnet->ipnet_if;
1007 1007 ipnetif_addr_t *ifaddr;
1008 1008 ipnet_addrtype_t addrtype = IPNETADDR_UNKNOWN;
1009 1009
1010 1010 /* First check if the address is multicast or limited broadcast. */
1011 1011 switch (addr->iap_family) {
1012 1012 case AF_INET:
1013 1013 if (CLASSD(*(addr->iap_addr4)) ||
1014 1014 *(addr->iap_addr4) == INADDR_BROADCAST)
1015 1015 return (IPNETADDR_MBCAST);
1016 1016 break;
1017 1017 case AF_INET6:
1018 1018 if (IN6_IS_ADDR_MULTICAST(addr->iap_addr6))
1019 1019 return (IPNETADDR_MBCAST);
1020 1020 break;
1021 1021 }
1022 1022
1023 1023 /*
1024 1024 * Walk the address list to see if the address belongs to our
1025 1025 * interface or is one of our subnet broadcast addresses.
1026 1026 */
1027 1027 mutex_enter(&ipnetif->if_addr_lock);
1028 1028 list = (addr->iap_family == AF_INET) ?
1029 1029 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list;
1030 1030 for (ifaddr = list_head(list);
1031 1031 ifaddr != NULL && addrtype == IPNETADDR_UNKNOWN;
1032 1032 ifaddr = list_next(list, ifaddr)) {
1033 1033 /*
1034 1034 * If we're not in the global zone, then only look at
1035 1035 * addresses in our zone.
1036 1036 */
1037 1037 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
1038 1038 ipnet->ipnet_zoneid != ifaddr->ifa_zone)
1039 1039 continue;
1040 1040 switch (addr->iap_family) {
1041 1041 case AF_INET:
1042 1042 if (ifaddr->ifa_ip4addr != INADDR_ANY &&
1043 1043 *(addr->iap_addr4) == ifaddr->ifa_ip4addr)
1044 1044 addrtype = IPNETADDR_MYADDR;
1045 1045 else if (ifaddr->ifa_brdaddr != INADDR_ANY &&
1046 1046 *(addr->iap_addr4) == ifaddr->ifa_brdaddr)
1047 1047 addrtype = IPNETADDR_MBCAST;
1048 1048 break;
1049 1049 case AF_INET6:
1050 1050 if (IN6_ARE_ADDR_EQUAL(addr->iap_addr6,
1051 1051 &ifaddr->ifa_ip6addr))
1052 1052 addrtype = IPNETADDR_MYADDR;
1053 1053 break;
1054 1054 }
1055 1055 }
1056 1056 mutex_exit(&ipnetif->if_addr_lock);
1057 1057
1058 1058 return (addrtype);
1059 1059 }
1060 1060
1061 1061 /*
1062 1062 * Verify if the packet contained in hdr should be passed up to the
1063 1063 * ipnet client stream.
1064 1064 */
1065 1065 static boolean_t
1066 1066 ipnet_accept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src,
1067 1067 ipnet_addrp_t *dst)
1068 1068 {
1069 1069 boolean_t obsif;
1070 1070 uint64_t ifindex = ipnet->ipnet_if->if_index;
1071 1071 ipnet_addrtype_t srctype;
1072 1072 ipnet_addrtype_t dsttype;
1073 1073
1074 1074 srctype = ipnet_get_addrtype(ipnet, src);
1075 1075 dsttype = ipnet_get_addrtype(ipnet, dst);
1076 1076
1077 1077 /*
1078 1078 * If the packet's ifindex matches ours, or the packet's group ifindex
1079 1079 * matches ours, it's on the interface we're observing. (Thus,
1080 1080 * observing on the group ifindex matches all ifindexes in the group.)
1081 1081 */
1082 1082 obsif = (ntohl(hdr->hpo_ifindex) == ifindex ||
1083 1083 ntohl(hdr->hpo_grifindex) == ifindex);
1084 1084
1085 1085 DTRACE_PROBE5(ipnet_accept__addr,
1086 1086 ipnet_addrtype_t, srctype, ipnet_addrp_t *, src,
1087 1087 ipnet_addrtype_t, dsttype, ipnet_addrp_t *, dst,
1088 1088 boolean_t, obsif);
1089 1089
1090 1090 /*
1091 1091 * Do not allow an ipnet stream to see packets that are not from or to
1092 1092 * its zone. The exception is when zones are using the shared stack
1093 1093 * model. In this case, streams in the global zone have visibility
1094 1094 * into other shared-stack zones, and broadcast and multicast traffic
1095 1095 * is visible by all zones in the stack.
1096 1096 */
1097 1097 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID &&
1098 1098 dsttype != IPNETADDR_MBCAST) {
1099 1099 if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) &&
1100 1100 ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst))
1101 1101 return (B_FALSE);
1102 1102 }
1103 1103
1104 1104 /*
1105 1105 * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the
1106 1106 * packet's IP version.
1107 1107 */
1108 1108 if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) &&
1109 1109 ipnet->ipnet_family != hdr->hpo_family)
1110 1110 return (B_FALSE);
1111 1111
1112 1112 /* If the destination address is ours, then accept the packet. */
1113 1113 if (dsttype == IPNETADDR_MYADDR)
1114 1114 return (B_TRUE);
1115 1115
1116 1116 /*
1117 1117 * If DL_PROMISC_PHYS is enabled, then we can see all packets that are
1118 1118 * sent or received on the interface we're observing, or packets that
1119 1119 * have our source address (this allows us to see packets we send).
1120 1120 */
1121 1121 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) {
1122 1122 if (srctype == IPNETADDR_MYADDR || obsif)
1123 1123 return (B_TRUE);
1124 1124 }
1125 1125
1126 1126 /*
1127 1127 * We accept multicast and broadcast packets transmitted or received
1128 1128 * on the interface we're observing.
1129 1129 */
1130 1130 if (dsttype == IPNETADDR_MBCAST && obsif)
1131 1131 return (B_TRUE);
1132 1132
1133 1133 return (B_FALSE);
1134 1134 }
1135 1135
1136 1136 /*
1137 1137 * Verify if the packet contained in hdr should be passed up to the ipnet
1138 1138 * client stream that's in IPNET_LOMODE.
1139 1139 */
1140 1140 /* ARGSUSED */
1141 1141 static boolean_t
1142 1142 ipnet_loaccept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src,
1143 1143 ipnet_addrp_t *dst)
1144 1144 {
1145 1145 if (hdr->hpo_htype != htons(IPOBS_HOOK_LOCAL)) {
1146 1146 /*
1147 1147 * ipnet_if is only NULL for IPNET_MINOR_LO devices.
1148 1148 */
1149 1149 if (ipnet->ipnet_if == NULL)
1150 1150 return (B_FALSE);
1151 1151 }
1152 1152
1153 1153 /*
1154 1154 * An ipnet stream must not see packets that are not from/to its zone.
1155 1155 */
1156 1156 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) {
1157 1157 if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) &&
1158 1158 ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst))
1159 1159 return (B_FALSE);
1160 1160 }
1161 1161
1162 1162 return (ipnet->ipnet_family == AF_UNSPEC ||
1163 1163 ipnet->ipnet_family == hdr->hpo_family);
1164 1164 }
1165 1165
1166 1166 static void
1167 1167 ipnet_dispatch(void *arg)
1168 1168 {
1169 1169 mblk_t *mp = arg;
1170 1170 hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr;
1171 1171 ipnet_t *ipnet;
1172 1172 mblk_t *netmp;
1173 1173 list_t *list;
1174 1174 ipnet_stack_t *ips;
1175 1175 ipnet_addrp_t src;
1176 1176 ipnet_addrp_t dst;
1177 1177
1178 1178 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
1179 1179
1180 1180 netmp = hdr->hpo_pkt->b_cont;
1181 1181 src.iap_family = hdr->hpo_family;
1182 1182 dst.iap_family = hdr->hpo_family;
1183 1183
1184 1184 if (hdr->hpo_family == AF_INET) {
1185 1185 src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src;
1186 1186 dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst;
1187 1187 } else {
1188 1188 src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src;
1189 1189 dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst;
1190 1190 }
1191 1191
1192 1192 ipnet_walkers_inc(ips);
1193 1193
1194 1194 list = &ips->ips_str_list;
1195 1195 for (ipnet = list_head(list); ipnet != NULL;
1196 1196 ipnet = list_next(list, ipnet)) {
1197 1197 if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) {
1198 1198 IPSK_BUMP(ips, ik_acceptFail);
1199 1199 continue;
1200 1200 }
1201 1201 IPSK_BUMP(ips, ik_acceptOk);
1202 1202
1203 1203 if (list_next(list, ipnet) == NULL) {
1204 1204 netmp = hdr->hpo_pkt->b_cont;
1205 1205 hdr->hpo_pkt->b_cont = NULL;
1206 1206 } else {
1207 1207 if ((netmp = dupmsg(hdr->hpo_pkt->b_cont)) == NULL &&
1208 1208 (netmp = copymsg(hdr->hpo_pkt->b_cont)) == NULL) {
1209 1209 IPSK_BUMP(ips, ik_duplicationFail);
1210 1210 continue;
1211 1211 }
1212 1212 }
1213 1213
1214 1214 if (ipnet->ipnet_flags & IPNET_INFO) {
1215 1215 if ((netmp = ipnet_addheader(hdr, netmp)) == NULL) {
1216 1216 IPSK_BUMP(ips, ik_dispatchHeaderDrop);
1217 1217 continue;
1218 1218 }
1219 1219 }
1220 1220
1221 1221 if (ipnet->ipnet_rq->q_first == NULL &&
1222 1222 canputnext(ipnet->ipnet_rq)) {
1223 1223 putnext(ipnet->ipnet_rq, netmp);
1224 1224 IPSK_BUMP(ips, ik_dispatchDeliver);
1225 1225 } else if (canput(ipnet->ipnet_rq)) {
1226 1226 (void) putq(ipnet->ipnet_rq, netmp);
1227 1227 IPSK_BUMP(ips, ik_dispatchDeliver);
1228 1228 } else {
1229 1229 freemsg(netmp);
1230 1230 IPSK_BUMP(ips, ik_dispatchPutDrop);
1231 1231 }
1232 1232 }
1233 1233
1234 1234 ipnet_walkers_dec(ips);
1235 1235
1236 1236 freemsg(mp);
1237 1237 }
1238 1238
1239 1239 static void
1240 1240 ipnet_input(mblk_t *mp)
1241 1241 {
1242 1242 hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr;
1243 1243 ipnet_stack_t *ips;
1244 1244
1245 1245 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
1246 1246
1247 1247 if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) !=
1248 1248 DDI_SUCCESS) {
1249 1249 IPSK_BUMP(ips, ik_dispatchFail);
1250 1250 freemsg(mp);
1251 1251 } else {
1252 1252 IPSK_BUMP(ips, ik_dispatchOk);
1253 1253 }
1254 1254 }
1255 1255
1256 1256 static ipnetif_t *
1257 1257 ipnet_alloc_if(ipnet_stack_t *ips)
1258 1258 {
1259 1259 ipnetif_t *ipnetif;
1260 1260
1261 1261 if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL)
1262 1262 return (NULL);
1263 1263
1264 1264 mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0);
1265 1265 list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t),
1266 1266 offsetof(ipnetif_addr_t, ifa_link));
1267 1267 list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t),
1268 1268 offsetof(ipnetif_addr_t, ifa_link));
1269 1269 mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0);
1270 1270
1271 1271 ipnetif->if_stackp = ips;
1272 1272
1273 1273 return (ipnetif);
1274 1274 }
1275 1275
1276 1276 /*
1277 1277 * Create a new ipnetif_t and new minor node for it. If creation is
1278 1278 * successful the new ipnetif_t is inserted into an avl_tree
1279 1279 * containing ipnetif's for this stack instance.
1280 1280 */
1281 1281 static ipnetif_t *
1282 1282 ipnetif_create(const char *name, uint64_t index, ipnet_stack_t *ips,
1283 1283 uint64_t ifflags)
1284 1284 {
1285 1285 ipnetif_t *ipnetif;
1286 1286 avl_index_t where = 0;
1287 1287 minor_t ifminor;
1288 1288
1289 1289 /*
1290 1290 * Because ipnetif_create() can be called from a NIC event
1291 1291 * callback, it should not block.
1292 1292 */
1293 1293 ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space);
1294 1294 if (ifminor == (minor_t)-1)
1295 1295 return (NULL);
1296 1296 if ((ipnetif = ipnet_alloc_if(ips)) == NULL) {
1297 1297 id_free(ipnet_minor_space, ifminor);
1298 1298 return (NULL);
1299 1299 }
1300 1300
1301 1301 (void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ);
1302 1302 ipnetif->if_index = (uint_t)index;
1303 1303 ipnetif->if_zoneid = netstack_get_zoneid(ips->ips_netstack);
1304 1304 ipnetif->if_dev = makedevice(ipnet_major, ifminor);
1305 1305
1306 1306 ipnetif->if_refcnt = 1;
1307 1307 if ((ifflags & IFF_LOOPBACK) != 0)
1308 1308 ipnetif->if_flags = IPNETIF_LOOPBACK;
1309 1309
1310 1310 mutex_enter(&ips->ips_avl_lock);
1311 1311 VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL);
1312 1312 avl_insert(&ips->ips_avl_by_index, ipnetif, where);
1313 1313 VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL);
1314 1314 avl_insert(&ips->ips_avl_by_name, ipnetif, where);
1315 1315 mutex_exit(&ips->ips_avl_lock);
1316 1316
1317 1317 return (ipnetif);
1318 1318 }
1319 1319
1320 1320 static void
1321 1321 ipnetif_remove(ipnetif_t *ipnetif, ipnet_stack_t *ips)
1322 1322 {
1323 1323 ipnet_t *ipnet;
1324 1324
1325 1325 ipnet_walkers_inc(ips);
1326 1326 /* Send a SIGHUP to all open streams associated with this ipnetif. */
1327 1327 for (ipnet = list_head(&ips->ips_str_list); ipnet != NULL;
1328 1328 ipnet = list_next(&ips->ips_str_list, ipnet)) {
1329 1329 if (ipnet->ipnet_if == ipnetif)
1330 1330 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
1331 1331 }
1332 1332 ipnet_walkers_dec(ips);
1333 1333 mutex_enter(&ips->ips_avl_lock);
1334 1334 avl_remove(&ips->ips_avl_by_index, ipnetif);
1335 1335 avl_remove(&ips->ips_avl_by_name, ipnetif);
1336 1336 mutex_exit(&ips->ips_avl_lock);
1337 1337 /*
1338 1338 * Release the reference we implicitly held in ipnetif_create().
1339 1339 */
1340 1340 ipnetif_refrele(ipnetif);
1341 1341 }
1342 1342
1343 1343 static void
1344 1344 ipnet_purge_addrlist(list_t *addrlist)
1345 1345 {
1346 1346 ipnetif_addr_t *ifa;
1347 1347
1348 1348 while ((ifa = list_head(addrlist)) != NULL) {
1349 1349 list_remove(addrlist, ifa);
1350 1350 if (ifa->ifa_shared != NULL)
1351 1351 ipnetif_clone_release(ifa->ifa_shared);
1352 1352 kmem_free(ifa, sizeof (*ifa));
1353 1353 }
1354 1354 }
1355 1355
1356 1356 static void
1357 1357 ipnetif_free(ipnetif_t *ipnetif)
1358 1358 {
1359 1359 ASSERT(ipnetif->if_refcnt == 0);
1360 1360 ASSERT(ipnetif->if_sharecnt == 0);
1361 1361
1362 1362 /* Remove IPv4/v6 address lists from the ipnetif */
1363 1363 ipnet_purge_addrlist(&ipnetif->if_ip4addr_list);
1364 1364 list_destroy(&ipnetif->if_ip4addr_list);
1365 1365 ipnet_purge_addrlist(&ipnetif->if_ip6addr_list);
1366 1366 list_destroy(&ipnetif->if_ip6addr_list);
1367 1367 mutex_destroy(&ipnetif->if_addr_lock);
1368 1368 mutex_destroy(&ipnetif->if_reflock);
1369 1369 if (ipnetif->if_dev != 0)
1370 1370 id_free(ipnet_minor_space, getminor(ipnetif->if_dev));
1371 1371 kmem_free(ipnetif, sizeof (*ipnetif));
1372 1372 }
1373 1373
1374 1374 /*
1375 1375 * Create an ipnetif_addr_t with the given logical interface id (lif)
1376 1376 * and add it to the supplied ipnetif. The lif is the netinfo
1377 1377 * representation of logical interface id, and we use this id to match
1378 1378 * incoming netinfo events against our lists of addresses.
1379 1379 */
1380 1380 static void
1381 1381 ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd)
1382 1382 {
1383 1383 ipnetif_addr_t *ifaddr;
1384 1384 zoneid_t zoneid;
1385 1385 struct sockaddr_in bcast;
1386 1386 struct sockaddr_storage addr;
1387 1387 net_ifaddr_t type = NA_ADDRESS;
1388 1388 uint64_t phyif = ipnetif->if_index;
1389 1389
1390 1390 if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 ||
1391 1391 net_getlifzone(nd, phyif, lif, &zoneid) != 0)
1392 1392 return;
1393 1393
1394 1394 if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL)
1395 1395 return;
1396 1396 ifaddr->ifa_zone = zoneid;
1397 1397 ifaddr->ifa_id = lif;
1398 1398 ifaddr->ifa_shared = NULL;
1399 1399
1400 1400 switch (addr.ss_family) {
1401 1401 case AF_INET:
1402 1402 ifaddr->ifa_ip4addr =
1403 1403 ((struct sockaddr_in *)&addr)->sin_addr.s_addr;
1404 1404 /*
1405 1405 * Try and get the broadcast address. Note that it's okay for
1406 1406 * an interface to not have a broadcast address, so we don't
1407 1407 * fail the entire operation if net_getlifaddr() fails here.
1408 1408 */
1409 1409 type = NA_BROADCAST;
1410 1410 if (net_getlifaddr(nd, phyif, lif, 1, &type, &bcast) == 0)
1411 1411 ifaddr->ifa_brdaddr = bcast.sin_addr.s_addr;
1412 1412 break;
1413 1413 case AF_INET6:
1414 1414 ifaddr->ifa_ip6addr = ((struct sockaddr_in6 *)&addr)->sin6_addr;
1415 1415 break;
1416 1416 }
1417 1417
1418 1418 /*
1419 1419 * The zoneid stored in ipnetif_t needs to correspond to the actual
1420 1420 * zone the address is being used in. This facilitates finding the
1421 1421 * correct netstack_t pointer, amongst other things, later.
1422 1422 */
1423 1423 if (zoneid == ALL_ZONES)
1424 1424 zoneid = GLOBAL_ZONEID;
1425 1425
1426 1426 mutex_enter(&ipnetif->if_addr_lock);
1427 1427 if (zoneid != ipnetif->if_zoneid) {
1428 1428 ipnetif_t *ifp2;
1429 1429
1430 1430 ifp2 = ipnetif_clone_create(ipnetif, zoneid);
1431 1431 ifaddr->ifa_shared = ifp2;
1432 1432 }
1433 1433 list_insert_tail(addr.ss_family == AF_INET ?
1434 1434 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr);
1435 1435 mutex_exit(&ipnetif->if_addr_lock);
1436 1436 }
1437 1437
1438 1438 static void
1439 1439 ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6)
1440 1440 {
1441 1441 mutex_enter(&ipnetif->if_addr_lock);
1442 1442 if (ifaddr->ifa_shared != NULL)
1443 1443 ipnetif_clone_release(ifaddr->ifa_shared);
1444 1444
1445 1445 list_remove(isv6 ?
1446 1446 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr);
1447 1447 mutex_exit(&ipnetif->if_addr_lock);
1448 1448 kmem_free(ifaddr, sizeof (*ifaddr));
1449 1449 }
1450 1450
1451 1451 static void
1452 1452 ipnet_plumb_ev(ipnet_nicevent_t *ipne, ipnet_stack_t *ips, boolean_t isv6)
1453 1453 {
1454 1454 ipnetif_t *ipnetif;
1455 1455 boolean_t refrele_needed = B_TRUE;
1456 1456 uint64_t ifflags;
1457 1457 uint64_t ifindex;
1458 1458 char *ifname;
1459 1459
1460 1460 ifflags = 0;
1461 1461 ifname = ipne->ipne_ifname;
1462 1462 ifindex = ipne->ipne_ifindex;
1463 1463
1464 1464 (void) net_getlifflags(ipne->ipne_protocol, ifindex, 0, &ifflags);
1465 1465
1466 1466 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) {
1467 1467 ipnetif = ipnetif_create(ifname, ifindex, ips, ifflags);
1468 1468 refrele_needed = B_FALSE;
1469 1469 }
1470 1470 if (ipnetif != NULL) {
1471 1471 ipnetif->if_flags |=
1472 1472 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED;
1473 1473 }
1474 1474
1475 1475 if (ipnetif->if_multicnt != 0) {
1476 1476 if (ip_join_allmulti(ifindex, isv6,
1477 1477 ips->ips_netstack->netstack_ip) == 0) {
1478 1478 ipnetif->if_flags |=
1479 1479 isv6 ? IPNETIF_IPV6ALLMULTI : IPNETIF_IPV4ALLMULTI;
1480 1480 }
1481 1481 }
1482 1482
1483 1483 if (refrele_needed)
1484 1484 ipnetif_refrele(ipnetif);
1485 1485 }
1486 1486
1487 1487 static void
1488 1488 ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6)
1489 1489 {
1490 1490 ipnetif_t *ipnetif;
1491 1491
1492 1492 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
1493 1493 return;
1494 1494
1495 1495 mutex_enter(&ipnetif->if_addr_lock);
1496 1496 ipnet_purge_addrlist(isv6 ?
1497 1497 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list);
1498 1498 mutex_exit(&ipnetif->if_addr_lock);
1499 1499
1500 1500 /*
1501 1501 * Note that we have one ipnetif for both IPv4 and IPv6, but we receive
1502 1502 * separate NE_UNPLUMB events for IPv4 and IPv6. We remove the ipnetif
1503 1503 * if both IPv4 and IPv6 interfaces have been unplumbed.
1504 1504 */
1505 1505 ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED;
1506 1506 if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED)))
1507 1507 ipnetif_remove(ipnetif, ips);
1508 1508 ipnetif_refrele(ipnetif);
1509 1509 }
1510 1510
1511 1511 static void
1512 1512 ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd,
1513 1513 ipnet_stack_t *ips, boolean_t isv6)
1514 1514 {
1515 1515 ipnetif_t *ipnetif;
1516 1516 ipnetif_addr_t *ifaddr;
1517 1517
1518 1518 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
1519 1519 return;
1520 1520 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) {
1521 1521 /*
1522 1522 * We must have missed a NE_LIF_DOWN event. Delete this
1523 1523 * ifaddr and re-create it.
1524 1524 */
1525 1525 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
1526 1526 }
1527 1527
1528 1528 ipnet_add_ifaddr(lifindex, ipnetif, nd);
1529 1529 ipnetif_refrele(ipnetif);
1530 1530 }
1531 1531
1532 1532 static void
1533 1533 ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips,
1534 1534 boolean_t isv6)
1535 1535 {
1536 1536 ipnetif_t *ipnetif;
1537 1537 ipnetif_addr_t *ifaddr;
1538 1538
1539 1539 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL)
1540 1540 return;
1541 1541 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL)
1542 1542 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6);
1543 1543 ipnetif_refrele(ipnetif);
1544 1544 /*
1545 1545 * Make sure that open streams on this ipnetif are still allowed to
1546 1546 * have it open.
1547 1547 */
1548 1548 ipnetif_zonecheck(ipnetif, ips);
1549 1549 }
1550 1550
1551 1551 /*
1552 1552 * This callback from the NIC event framework dispatches a taskq as the event
1553 1553 * handlers may block.
1554 1554 */
1555 1555 /* ARGSUSED */
1556 1556 static int
1557 1557 ipnet_nicevent_cb(hook_event_token_t token, hook_data_t info, void *arg)
1558 1558 {
1559 1559 ipnet_stack_t *ips = arg;
1560 1560 hook_nic_event_t *hn = (hook_nic_event_t *)info;
1561 1561 ipnet_nicevent_t *ipne;
1562 1562
1563 1563 if ((ipne = kmem_alloc(sizeof (ipnet_nicevent_t), KM_NOSLEEP)) == NULL)
1564 1564 return (0);
1565 1565 ipne->ipne_event = hn->hne_event;
1566 1566 ipne->ipne_protocol = hn->hne_protocol;
1567 1567 ipne->ipne_stackid = ips->ips_netstack->netstack_stackid;
1568 1568 ipne->ipne_ifindex = hn->hne_nic;
1569 1569 ipne->ipne_lifindex = hn->hne_lif;
1570 1570 if (hn->hne_datalen != 0) {
1571 1571 (void) strlcpy(ipne->ipne_ifname, hn->hne_data,
1572 1572 sizeof (ipne->ipne_ifname));
1573 1573 }
1574 1574 (void) ddi_taskq_dispatch(ipnet_nicevent_taskq, ipnet_nicevent_task,
1575 1575 ipne, DDI_NOSLEEP);
1576 1576 return (0);
1577 1577 }
1578 1578
1579 1579 static void
1580 1580 ipnet_nicevent_task(void *arg)
1581 1581 {
1582 1582 ipnet_nicevent_t *ipne = arg;
1583 1583 netstack_t *ns;
1584 1584 ipnet_stack_t *ips;
1585 1585 boolean_t isv6;
1586 1586
1587 1587 if ((ns = netstack_find_by_stackid(ipne->ipne_stackid)) == NULL)
1588 1588 goto done;
1589 1589 ips = ns->netstack_ipnet;
1590 1590 isv6 = (ipne->ipne_protocol == ips->ips_ndv6);
1591 1591
1592 1592 mutex_enter(&ips->ips_event_lock);
1593 1593 switch (ipne->ipne_event) {
1594 1594 case NE_PLUMB:
1595 1595 ipnet_plumb_ev(ipne, ips, isv6);
1596 1596 break;
1597 1597 case NE_UNPLUMB:
1598 1598 ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6);
1599 1599 break;
1600 1600 case NE_LIF_UP:
1601 1601 ipnet_lifup_ev(ipne->ipne_ifindex, ipne->ipne_lifindex,
1602 1602 ipne->ipne_protocol, ips, isv6);
1603 1603 break;
1604 1604 case NE_LIF_DOWN:
1605 1605 ipnet_lifdown_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ips,
1606 1606 isv6);
1607 1607 break;
1608 1608 default:
1609 1609 break;
1610 1610 }
1611 1611 mutex_exit(&ips->ips_event_lock);
1612 1612 done:
1613 1613 if (ns != NULL)
1614 1614 netstack_rele(ns);
1615 1615 kmem_free(ipne, sizeof (ipnet_nicevent_t));
1616 1616 }
1617 1617
1618 1618 dev_t
1619 1619 ipnet_if_getdev(char *name, zoneid_t zoneid)
1620 1620 {
1621 1621 netstack_t *ns;
1622 1622 ipnet_stack_t *ips;
1623 1623 ipnetif_t *ipnetif;
1624 1624 dev_t dev = (dev_t)-1;
1625 1625
1626 1626 if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
1627 1627 return (dev);
1628 1628 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL)
1629 1629 return (dev);
1630 1630
1631 1631 ips = ns->netstack_ipnet;
1632 1632 mutex_enter(&ips->ips_avl_lock);
1633 1633 if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) {
1634 1634 if (ipnetif_in_zone(ipnetif, zoneid, ips))
1635 1635 dev = ipnetif->if_dev;
1636 1636 }
1637 1637 mutex_exit(&ips->ips_avl_lock);
1638 1638 netstack_rele(ns);
1639 1639
1640 1640 return (dev);
1641 1641 }
1642 1642
1643 1643 static ipnetif_t *
1644 1644 ipnetif_getby_index(uint64_t id, ipnet_stack_t *ips)
1645 1645 {
1646 1646 ipnetif_t *ipnetif;
1647 1647
1648 1648 mutex_enter(&ips->ips_avl_lock);
1649 1649 if ((ipnetif = avl_find(&ips->ips_avl_by_index, &id, NULL)) != NULL)
1650 1650 ipnetif_refhold(ipnetif);
1651 1651 mutex_exit(&ips->ips_avl_lock);
1652 1652 return (ipnetif);
1653 1653 }
1654 1654
1655 1655 static ipnetif_t *
1656 1656 ipnetif_getby_dev(dev_t dev, ipnet_stack_t *ips)
1657 1657 {
1658 1658 ipnetif_t *ipnetif;
1659 1659 avl_tree_t *tree;
1660 1660
1661 1661 mutex_enter(&ips->ips_avl_lock);
1662 1662 tree = &ips->ips_avl_by_index;
1663 1663 for (ipnetif = avl_first(tree); ipnetif != NULL;
1664 1664 ipnetif = avl_walk(tree, ipnetif, AVL_AFTER)) {
1665 1665 if (ipnetif->if_dev == dev) {
1666 1666 ipnetif_refhold(ipnetif);
1667 1667 break;
1668 1668 }
1669 1669 }
1670 1670 mutex_exit(&ips->ips_avl_lock);
1671 1671 return (ipnetif);
1672 1672 }
1673 1673
1674 1674 static ipnetif_addr_t *
1675 1675 ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6)
1676 1676 {
1677 1677 ipnetif_addr_t *ifaddr;
1678 1678 list_t *list;
1679 1679
1680 1680 mutex_enter(&ipnetif->if_addr_lock);
1681 1681 list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list;
1682 1682 for (ifaddr = list_head(list); ifaddr != NULL;
1683 1683 ifaddr = list_next(list, ifaddr)) {
1684 1684 if (lid == ifaddr->ifa_id)
1685 1685 break;
1686 1686 }
1687 1687 mutex_exit(&ipnetif->if_addr_lock);
1688 1688 return (ifaddr);
1689 1689 }
1690 1690
1691 1691 /* ARGSUSED */
1692 1692 static void *
1693 1693 ipnet_stack_init(netstackid_t stackid, netstack_t *ns)
1694 1694 {
1695 1695 ipnet_stack_t *ips;
1696 1696
1697 1697 ips = kmem_zalloc(sizeof (*ips), KM_SLEEP);
1698 1698 ips->ips_netstack = ns;
1699 1699 mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0);
1700 1700 avl_create(&ips->ips_avl_by_index, ipnetif_compare_index,
1701 1701 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index));
1702 1702 avl_create(&ips->ips_avl_by_name, ipnetif_compare_name,
1703 1703 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name));
1704 1704 avl_create(&ips->ips_avl_by_shared, ipnetif_compare_name_zone,
1705 1705 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_shared));
1706 1706 mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL);
1707 1707 cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL);
1708 1708 list_create(&ips->ips_str_list, sizeof (ipnet_t),
1709 1709 offsetof(ipnet_t, ipnet_next));
1710 1710 ipnet_register_netihook(ips);
1711 1711 return (ips);
1712 1712 }
1713 1713
1714 1714 /* ARGSUSED */
1715 1715 static void
1716 1716 ipnet_stack_fini(netstackid_t stackid, void *arg)
1717 1717 {
1718 1718 ipnet_stack_t *ips = arg;
1719 1719 ipnetif_t *ipnetif, *nipnetif;
1720 1720
1721 1721 if (ips->ips_kstatp != NULL) {
1722 1722 zoneid_t zoneid;
1723 1723
1724 1724 zoneid = netstackid_to_zoneid(stackid);
1725 1725 net_kstat_delete(net_zoneidtonetid(zoneid), ips->ips_kstatp);
1726 1726 }
1727 1727 if (ips->ips_ndv4 != NULL) {
1728 1728 VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS,
1729 1729 ips->ips_nicevents) == 0);
1730 1730 VERIFY(net_protocol_release(ips->ips_ndv4) == 0);
1731 1731 }
1732 1732 if (ips->ips_ndv6 != NULL) {
1733 1733 VERIFY(net_hook_unregister(ips->ips_ndv6, NH_NIC_EVENTS,
1734 1734 ips->ips_nicevents) == 0);
1735 1735 VERIFY(net_protocol_release(ips->ips_ndv6) == 0);
1736 1736 }
1737 1737 hook_free(ips->ips_nicevents);
1738 1738
1739 1739 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
1740 1740 ipnetif = nipnetif) {
1741 1741 nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif);
1742 1742 ipnetif_remove(ipnetif, ips);
1743 1743 }
1744 1744 avl_destroy(&ips->ips_avl_by_shared);
1745 1745 avl_destroy(&ips->ips_avl_by_index);
1746 1746 avl_destroy(&ips->ips_avl_by_name);
1747 1747 mutex_destroy(&ips->ips_avl_lock);
1748 1748 mutex_destroy(&ips->ips_walkers_lock);
1749 1749 cv_destroy(&ips->ips_walkers_cv);
1750 1750 list_destroy(&ips->ips_str_list);
1751 1751 kmem_free(ips, sizeof (*ips));
1752 1752 }
1753 1753
1754 1754 /* Do any of the addresses in addrlist belong the supplied zoneid? */
1755 1755 static boolean_t
1756 1756 ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid)
1757 1757 {
1758 1758 ipnetif_addr_t *ifa;
1759 1759
1760 1760 for (ifa = list_head(addrlist); ifa != NULL;
1761 1761 ifa = list_next(addrlist, ifa)) {
1762 1762 if (ifa->ifa_zone == zoneid)
1763 1763 return (B_TRUE);
1764 1764 }
1765 1765 return (B_FALSE);
1766 1766 }
1767 1767
1768 1768 /* Should the supplied ipnetif be visible from the supplied zoneid? */
1769 1769 static boolean_t
1770 1770 ipnetif_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips)
1771 1771 {
1772 1772 int ret;
1773 1773
1774 1774 /*
1775 1775 * The global zone has visibility into all interfaces in the global
1776 1776 * stack, and exclusive stack zones have visibility into all
1777 1777 * interfaces in their stack.
1778 1778 */
1779 1779 if (zoneid == GLOBAL_ZONEID ||
1780 1780 ips->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
1781 1781 return (B_TRUE);
1782 1782
1783 1783 /*
1784 1784 * Shared-stack zones only have visibility for interfaces that have
1785 1785 * addresses in their zone.
1786 1786 */
1787 1787 mutex_enter(&ipnetif->if_addr_lock);
1788 1788 ret = ipnet_addrs_in_zone(&ipnetif->if_ip4addr_list, zoneid) ||
1789 1789 ipnet_addrs_in_zone(&ipnetif->if_ip6addr_list, zoneid);
1790 1790 mutex_exit(&ipnetif->if_addr_lock);
1791 1791 return (ret);
1792 1792 }
1793 1793
1794 1794 /*
1795 1795 * Verify that any ipnet_t that has a reference to the supplied ipnetif should
1796 1796 * still be allowed to have it open. A given ipnet_t may no longer be allowed
1797 1797 * to have an ipnetif open if there are no longer any addresses that belong to
1798 1798 * the ipnetif in the ipnet_t's non-global shared-stack zoneid. If that's the
1799 1799 * case, send the ipnet_t an M_HANGUP.
1800 1800 */
1801 1801 static void
1802 1802 ipnetif_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips)
1803 1803 {
1804 1804 list_t *strlist = &ips->ips_str_list;
1805 1805 ipnet_t *ipnet;
1806 1806
1807 1807 ipnet_walkers_inc(ips);
1808 1808 for (ipnet = list_head(strlist); ipnet != NULL;
1809 1809 ipnet = list_next(strlist, ipnet)) {
1810 1810 if (ipnet->ipnet_if != ipnetif)
1811 1811 continue;
1812 1812 if (!ipnetif_in_zone(ipnetif, ipnet->ipnet_zoneid, ips))
1813 1813 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP);
1814 1814 }
1815 1815 ipnet_walkers_dec(ips);
1816 1816 }
1817 1817
1818 1818 void
1819 1819 ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid)
1820 1820 {
1821 1821 ipnetif_t *ipnetif;
1822 1822 list_t cbdata;
1823 1823 ipnetif_cbdata_t *cbnode;
1824 1824 netstack_t *ns;
1825 1825 ipnet_stack_t *ips;
1826 1826
1827 1827 /*
1828 1828 * On labeled systems, non-global zones shouldn't see anything
1829 1829 * in /dev/ipnet.
1830 1830 */
1831 1831 if (is_system_labeled() && zoneid != GLOBAL_ZONEID)
1832 1832 return;
1833 1833
1834 1834 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL)
1835 1835 return;
1836 1836
1837 1837 ips = ns->netstack_ipnet;
1838 1838 list_create(&cbdata, sizeof (ipnetif_cbdata_t),
1839 1839 offsetof(ipnetif_cbdata_t, ic_next));
1840 1840
1841 1841 mutex_enter(&ips->ips_avl_lock);
1842 1842 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL;
1843 1843 ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) {
1844 1844 if (!ipnetif_in_zone(ipnetif, zoneid, ips))
1845 1845 continue;
1846 1846 cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP);
1847 1847 (void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ);
1848 1848 cbnode->ic_dev = ipnetif->if_dev;
1849 1849 list_insert_head(&cbdata, cbnode);
1850 1850 }
1851 1851 mutex_exit(&ips->ips_avl_lock);
1852 1852
1853 1853 while ((cbnode = list_head(&cbdata)) != NULL) {
1854 1854 cb(cbnode->ic_ifname, arg, cbnode->ic_dev);
1855 1855 list_remove(&cbdata, cbnode);
1856 1856 kmem_free(cbnode, sizeof (ipnetif_cbdata_t));
1857 1857 }
1858 1858 list_destroy(&cbdata);
1859 1859 netstack_rele(ns);
1860 1860 }
1861 1861
1862 1862 static int
1863 1863 ipnetif_compare_index(const void *index_ptr, const void *ipnetifp)
1864 1864 {
1865 1865 int64_t index1 = *((int64_t *)index_ptr);
1866 1866 int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index;
1867 1867
1868 1868 return (SIGNOF(index2 - index1));
1869 1869 }
1870 1870
1871 1871 static int
1872 1872 ipnetif_compare_name(const void *name_ptr, const void *ipnetifp)
1873 1873 {
1874 1874 int res;
1875 1875
1876 1876 res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr);
1877 1877 return (SIGNOF(res));
1878 1878 }
1879 1879
1880 1880 static int
1881 1881 ipnetif_compare_name_zone(const void *key_ptr, const void *ipnetifp)
1882 1882 {
1883 1883 const uintptr_t *ptr = key_ptr;
1884 1884 const ipnetif_t *ifp;
1885 1885 int res;
1886 1886
1887 1887 ifp = ipnetifp;
1888 1888 res = ifp->if_zoneid - ptr[0];
1889 1889 if (res != 0)
1890 1890 return (SIGNOF(res));
1891 1891 res = strcmp(ifp->if_name, (char *)ptr[1]);
1892 1892 return (SIGNOF(res));
1893 1893 }
1894 1894
1895 1895 static void
1896 1896 ipnetif_refhold(ipnetif_t *ipnetif)
1897 1897 {
1898 1898 mutex_enter(&ipnetif->if_reflock);
1899 1899 ipnetif->if_refcnt++;
1900 1900 mutex_exit(&ipnetif->if_reflock);
1901 1901 }
1902 1902
1903 1903 static void
1904 1904 ipnetif_refrele(ipnetif_t *ipnetif)
1905 1905 {
1906 1906 mutex_enter(&ipnetif->if_reflock);
1907 1907 ASSERT(ipnetif->if_refcnt > 0);
1908 1908 if (--ipnetif->if_refcnt == 0)
1909 1909 ipnetif_free(ipnetif);
1910 1910 else
1911 1911 mutex_exit(&ipnetif->if_reflock);
1912 1912 }
1913 1913
1914 1914 static void
1915 1915 ipnet_walkers_inc(ipnet_stack_t *ips)
1916 1916 {
1917 1917 mutex_enter(&ips->ips_walkers_lock);
1918 1918 ips->ips_walkers_cnt++;
1919 1919 mutex_exit(&ips->ips_walkers_lock);
1920 1920 }
1921 1921
1922 1922 static void
1923 1923 ipnet_walkers_dec(ipnet_stack_t *ips)
1924 1924 {
1925 1925 mutex_enter(&ips->ips_walkers_lock);
1926 1926 ASSERT(ips->ips_walkers_cnt != 0);
1927 1927 if (--ips->ips_walkers_cnt == 0)
1928 1928 cv_broadcast(&ips->ips_walkers_cv);
1929 1929 mutex_exit(&ips->ips_walkers_lock);
1930 1930 }
1931 1931
1932 1932 /*ARGSUSED*/
1933 1933 static int
1934 1934 ipobs_bounce_func(hook_event_token_t token, hook_data_t info, void *arg)
1935 1935 {
1936 1936 hook_pkt_observe_t *hdr;
1937 1937 pfv_t func = (pfv_t)arg;
1938 1938 mblk_t *mp;
1939 1939
1940 1940 hdr = (hook_pkt_observe_t *)info;
1941 1941 /*
1942 1942 * Code in ip_input() expects that it is the only one accessing the
1943 1943 * packet.
1944 1944 */
1945 1945 mp = copymsg(hdr->hpo_pkt);
1946 1946 if (mp == NULL) {
1947 1947 netstack_t *ns = hdr->hpo_ctx;
1948 1948 ipnet_stack_t *ips = ns->netstack_ipnet;
1949 1949
1950 1950 IPSK_BUMP(ips, ik_dispatchDupDrop);
1951 1951 return (0);
1952 1952 }
1953 1953
1954 1954 hdr = (hook_pkt_observe_t *)mp->b_rptr;
1955 1955 hdr->hpo_pkt = mp;
1956 1956
1957 1957 func(mp);
1958 1958
1959 1959 return (0);
1960 1960 }
1961 1961
1962 1962 hook_t *
1963 1963 ipobs_register_hook(netstack_t *ns, pfv_t func)
1964 1964 {
1965 1965 ip_stack_t *ipst = ns->netstack_ip;
1966 1966 char name[32];
1967 1967 hook_t *hook;
1968 1968
1969 1969 HOOK_INIT(hook, ipobs_bounce_func, "", (void *)func);
1970 1970 VERIFY(hook != NULL);
1971 1971
1972 1972 /*
1973 1973 * To register multiple hooks with he same callback function,
1974 1974 * a unique name is needed.
1975 1975 */
1976 1976 (void) snprintf(name, sizeof (name), "ipobserve_%p", (void *)hook);
1977 1977 hook->h_name = strdup(name);
1978 1978
1979 1979 (void) net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook);
1980 1980 (void) net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook);
1981 1981
1982 1982 return (hook);
1983 1983 }
1984 1984
1985 1985 void
1986 1986 ipobs_unregister_hook(netstack_t *ns, hook_t *hook)
1987 1987 {
1988 1988 ip_stack_t *ipst = ns->netstack_ip;
1989 1989
1990 1990 (void) net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook);
1991 1991
1992 1992 (void) net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook);
1993 1993
1994 1994 strfree(hook->h_name);
1995 1995
1996 1996 hook_free(hook);
1997 1997 }
1998 1998
1999 1999 /* ******************************************************************** */
2000 2000 /* BPF Functions below */
2001 2001 /* ******************************************************************** */
2002 2002
2003 2003 /*
2004 2004 * Convenience function to make mapping a zoneid to an ipnet_stack_t easy.
2005 2005 */
2006 2006 ipnet_stack_t *
2007 2007 ipnet_find_by_zoneid(zoneid_t zoneid)
2008 2008 {
2009 2009 netstack_t *ns;
2010 2010
2011 2011 VERIFY((ns = netstack_find_by_zoneid(zoneid)) != NULL);
2012 2012 return (ns->netstack_ipnet);
2013 2013 }
2014 2014
2015 2015 /*
2016 2016 * Functions, such as the above ipnet_find_by_zoneid(), will return a
2017 2017 * pointer to ipnet_stack_t by calling a netstack lookup function.
2018 2018 * The netstack_find_*() functions return a pointer after doing a "hold"
2019 2019 * on the data structure and thereby require a "release" when the caller
2020 2020 * is finished with it. We need to mirror that API here and thus a caller
2021 2021 * of ipnet_find_by_zoneid() is required to call ipnet_rele().
2022 2022 */
2023 2023 void
2024 2024 ipnet_rele(ipnet_stack_t *ips)
2025 2025 {
2026 2026 netstack_rele(ips->ips_netstack);
2027 2027 }
2028 2028
2029 2029 /*
2030 2030 */
2031 2031 void
2032 2032 ipnet_set_itap(bpf_itap_fn_t tapfunc)
2033 2033 {
2034 2034 ipnet_itap = tapfunc;
2035 2035 }
2036 2036
2037 2037 /*
2038 2038 * The list of interfaces available via ipnet is private for each zone,
2039 2039 * so the AVL tree of each zone must be searched for a given name, even
2040 2040 * if all names are unique.
2041 2041 */
2042 2042 int
2043 2043 ipnet_open_byname(const char *name, ipnetif_t **ptr, zoneid_t zoneid)
2044 2044 {
2045 2045 ipnet_stack_t *ips;
2046 2046 ipnetif_t *ipnetif;
2047 2047
2048 2048 ASSERT(ptr != NULL);
2049 2049 VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL);
2050 2050
2051 2051 mutex_enter(&ips->ips_avl_lock);
2052 2052
2053 2053 /*
2054 2054 * Shared instance zone?
2055 2055 */
2056 2056 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) {
2057 2057 uintptr_t key[2] = { zoneid, (uintptr_t)name };
2058 2058
2059 2059 ipnetif = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL);
2060 2060 } else {
2061 2061 ipnetif = avl_find(&ips->ips_avl_by_name, (void *)name, NULL);
2062 2062 }
2063 2063 if (ipnetif != NULL)
2064 2064 ipnetif_refhold(ipnetif);
2065 2065 mutex_exit(&ips->ips_avl_lock);
2066 2066
2067 2067 *ptr = ipnetif;
2068 2068 ipnet_rele(ips);
2069 2069
2070 2070 if (ipnetif == NULL)
2071 2071 return (ESRCH);
2072 2072 return (0);
2073 2073 }
2074 2074
2075 2075 void
2076 2076 ipnet_close_byhandle(ipnetif_t *ifp)
2077 2077 {
2078 2078 ASSERT(ifp != NULL);
2079 2079 ipnetif_refrele(ifp);
2080 2080 }
2081 2081
2082 2082 const char *
2083 2083 ipnet_name(ipnetif_t *ifp)
2084 2084 {
2085 2085 ASSERT(ifp != NULL);
2086 2086 return (ifp->if_name);
2087 2087 }
2088 2088
2089 2089 /*
2090 2090 * To find the linkid for a given name, it is necessary to know which zone
2091 2091 * the interface name belongs to and to search the avl tree for that zone
2092 2092 * as there is no master list of all interfaces and which zone they belong
2093 2093 * to. It is assumed that the caller of this function is somehow already
2094 2094 * working with the ipnet interfaces and hence the ips_event_lock is held.
2095 2095 * When BPF calls into this function, it is doing so because of an event
2096 2096 * in ipnet, and thus ipnet holds the ips_event_lock. Thus the datalink id
2097 2097 * value returned has meaning without the need for grabbing a hold on the
2098 2098 * owning structure.
2099 2099 */
2100 2100 int
2101 2101 ipnet_get_linkid_byname(const char *name, uint_t *idp, zoneid_t zoneid)
2102 2102 {
2103 2103 ipnet_stack_t *ips;
2104 2104 ipnetif_t *ifp;
2105 2105
2106 2106 VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL);
2107 2107 ASSERT(mutex_owned(&ips->ips_event_lock));
2108 2108
2109 2109 mutex_enter(&ips->ips_avl_lock);
2110 2110 ifp = avl_find(&ips->ips_avl_by_name, (void *)name, NULL);
2111 2111 if (ifp != NULL)
2112 2112 *idp = (uint_t)ifp->if_index;
2113 2113
2114 2114 /*
2115 2115 * Shared instance zone?
2116 2116 */
2117 2117 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) {
2118 2118 uintptr_t key[2] = { zoneid, (uintptr_t)name };
2119 2119
2120 2120 ifp = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL);
2121 2121 if (ifp != NULL)
2122 2122 *idp = (uint_t)ifp->if_index;
2123 2123 }
2124 2124
2125 2125 mutex_exit(&ips->ips_avl_lock);
2126 2126 ipnet_rele(ips);
2127 2127
2128 2128 if (ifp == NULL)
2129 2129 return (ESRCH);
2130 2130 return (0);
2131 2131 }
2132 2132
2133 2133 /*
2134 2134 * Strictly speaking, there is no such thing as a "client" in ipnet, like
2135 2135 * there is in mac. BPF only needs to have this because it is required as
2136 2136 * part of interfacing correctly with mac. The reuse of the original
2137 2137 * ipnetif_t as a client poses no danger, so long as it is done with its
2138 2138 * own ref-count'd hold that is given up on close.
2139 2139 */
2140 2140 int
2141 2141 ipnet_client_open(ipnetif_t *ptr, ipnetif_t **result)
2142 2142 {
2143 2143 ASSERT(ptr != NULL);
2144 2144 ASSERT(result != NULL);
2145 2145 ipnetif_refhold(ptr);
2146 2146 *result = ptr;
2147 2147
2148 2148 return (0);
2149 2149 }
2150 2150
2151 2151 void
2152 2152 ipnet_client_close(ipnetif_t *ptr)
2153 2153 {
2154 2154 ASSERT(ptr != NULL);
2155 2155 ipnetif_refrele(ptr);
2156 2156 }
2157 2157
2158 2158 /*
2159 2159 * This is called from BPF when it needs to start receiving packets
2160 2160 * from ipnet.
2161 2161 *
2162 2162 * The use of the ipnet_t structure here is somewhat lightweight when
2163 2163 * compared to how it is used elsewhere but it already has all of the
2164 2164 * right fields in it, so reuse here doesn't seem out of order. Its
2165 2165 * primary purpose here is to provide the means to store pointers for
2166 2166 * use when ipnet_promisc_remove() needs to be called.
2167 2167 *
2168 2168 * This should never be called for the IPNET_MINOR_LO device as it is
2169 2169 * never created via ipnetif_create.
2170 2170 */
2171 2171 /*ARGSUSED*/
2172 2172 int
2173 2173 ipnet_promisc_add(void *handle, uint_t how, void *data, uintptr_t *mhandle,
2174 2174 int flags)
2175 2175 {
2176 2176 ip_stack_t *ipst;
2177 2177 netstack_t *ns;
2178 2178 ipnetif_t *ifp;
2179 2179 ipnet_t *ipnet;
2180 2180 char name[32];
2181 2181 int error;
2182 2182
2183 2183 ifp = (ipnetif_t *)handle;
2184 2184 ns = netstack_find_by_zoneid(ifp->if_zoneid);
2185 2185
2186 2186 if ((how == DL_PROMISC_PHYS) || (how == DL_PROMISC_MULTI)) {
2187 2187 error = ipnet_join_allmulti(ifp, ns->netstack_ipnet);
2188 2188 if (error != 0)
2189 2189 return (error);
2190 2190 } else {
2191 2191 return (EINVAL);
2192 2192 }
2193 2193
2194 2194 ipnet = kmem_zalloc(sizeof (*ipnet), KM_SLEEP);
2195 2195 ipnet->ipnet_if = ifp;
2196 2196 ipnet->ipnet_ns = ns;
2197 2197 ipnet->ipnet_flags = flags;
2198 2198
2199 2199 if ((ifp->if_flags & IPNETIF_LOOPBACK) != 0) {
2200 2200 ipnet->ipnet_acceptfn = ipnet_loaccept;
2201 2201 } else {
2202 2202 ipnet->ipnet_acceptfn = ipnet_accept;
2203 2203 }
2204 2204
2205 2205 /*
2206 2206 * To register multiple hooks with the same callback function,
2207 2207 * a unique name is needed.
2208 2208 */
2209 2209 HOOK_INIT(ipnet->ipnet_hook, ipnet_bpf_bounce, "", ipnet);
2210 2210 (void) snprintf(name, sizeof (name), "ipnet_promisc_%p",
2211 2211 (void *)ipnet->ipnet_hook);
2212 2212 ipnet->ipnet_hook->h_name = strdup(name);
2213 2213 ipnet->ipnet_data = data;
2214 2214 ipnet->ipnet_zoneid = ifp->if_zoneid;
2215 2215
2216 2216 ipst = ns->netstack_ip;
2217 2217
2218 2218 error = net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE,
2219 2219 ipnet->ipnet_hook);
2220 2220 if (error != 0)
2221 2221 goto regfail;
2222 2222
2223 2223 error = net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE,
2224 2224 ipnet->ipnet_hook);
2225 2225 if (error != 0) {
2226 2226 (void) net_hook_unregister(ipst->ips_ip4_observe_pr,
2227 2227 NH_OBSERVE, ipnet->ipnet_hook);
2228 2228 goto regfail;
2229 2229 }
2230 2230
2231 2231 *mhandle = (uintptr_t)ipnet;
2232 2232 netstack_rele(ns);
2233 2233
2234 2234 return (0);
2235 2235
2236 2236 regfail:
2237 2237 cmn_err(CE_WARN, "net_hook_register failed: %d", error);
2238 2238 strfree(ipnet->ipnet_hook->h_name);
2239 2239 hook_free(ipnet->ipnet_hook);
2240 2240 netstack_rele(ns);
2241 2241 return (error);
2242 2242 }
2243 2243
2244 2244 void
2245 2245 ipnet_promisc_remove(void *data)
2246 2246 {
2247 2247 ip_stack_t *ipst;
2248 2248 ipnet_t *ipnet;
2249 2249 hook_t *hook;
2250 2250
2251 2251 ipnet = data;
2252 2252 ipst = ipnet->ipnet_ns->netstack_ip;
2253 2253 hook = ipnet->ipnet_hook;
2254 2254
2255 2255 VERIFY(net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE,
2256 2256 hook) == 0);
2257 2257
2258 2258 VERIFY(net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE,
2259 2259 hook) == 0);
2260 2260
2261 2261 strfree(hook->h_name);
2262 2262
2263 2263 hook_free(hook);
2264 2264
2265 2265 kmem_free(ipnet, sizeof (*ipnet));
2266 2266 }
2267 2267
2268 2268 /*
2269 2269 * arg here comes from the ipnet_t allocated in ipnet_promisc_add.
2270 2270 * An important field from that structure is "ipnet_data" that
2271 2271 * contains the "data" pointer passed into ipnet_promisc_add: it needs
2272 2272 * to be passed back to bpf when we call into ipnet_itap.
2273 2273 *
2274 2274 * ipnet_itap is set by ipnet_set_bpfattach, which in turn is called
2275 2275 * from BPF.
2276 2276 */
2277 2277 /*ARGSUSED*/
2278 2278 static int
2279 2279 ipnet_bpf_bounce(hook_event_token_t token, hook_data_t info, void *arg)
2280 2280 {
2281 2281 hook_pkt_observe_t *hdr;
2282 2282 ipnet_addrp_t src;
2283 2283 ipnet_addrp_t dst;
2284 2284 ipnet_stack_t *ips;
2285 2285 ipnet_t *ipnet;
2286 2286 mblk_t *netmp;
2287 2287 mblk_t *mp;
2288 2288
2289 2289 hdr = (hook_pkt_observe_t *)info;
2290 2290 mp = hdr->hpo_pkt;
2291 2291 ipnet = (ipnet_t *)arg;
2292 2292 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet;
2293 2293
2294 2294 netmp = hdr->hpo_pkt->b_cont;
2295 2295 src.iap_family = hdr->hpo_family;
2296 2296 dst.iap_family = hdr->hpo_family;
2297 2297
2298 2298 if (hdr->hpo_family == AF_INET) {
2299 2299 src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src;
2300 2300 dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst;
2301 2301 } else {
2302 2302 src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src;
2303 2303 dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst;
2304 2304 }
2305 2305
2306 2306 if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) {
2307 2307 IPSK_BUMP(ips, ik_acceptFail);
2308 2308 return (0);
2309 2309 }
2310 2310 IPSK_BUMP(ips, ik_acceptOk);
2311 2311
2312 2312 ipnet_itap(ipnet->ipnet_data, mp,
2313 2313 hdr->hpo_htype == htons(IPOBS_HOOK_OUTBOUND),
2314 2314 ntohl(hdr->hpo_pktlen) + MBLKL(mp));
2315 2315
2316 2316 return (0);
2317 2317 }
2318 2318
2319 2319 /*
2320 2320 * clone'd ipnetif_t's are created when a shared IP instance zone comes
2321 2321 * to life and configures an IP address. The model that BPF uses is that
2322 2322 * each interface must have a unique pointer and each interface must be
2323 2323 * representative of what it can capture. They are limited to one DLT
2324 2324 * per interface and one zone per interface. Thus every interface that
2325 2325 * can be seen in a zone must be announced via an attach to bpf. For
2326 2326 * shared instance zones, this means the ipnet driver needs to detect
2327 2327 * when an address is added to an interface in a zone for the first
2328 2328 * time (and also when the last address is removed.)
2329 2329 */
2330 2330 static ipnetif_t *
2331 2331 ipnetif_clone_create(ipnetif_t *ifp, zoneid_t zoneid)
2332 2332 {
2333 2333 uintptr_t key[2] = { zoneid, (uintptr_t)ifp->if_name };
2334 2334 ipnet_stack_t *ips = ifp->if_stackp;
2335 2335 avl_index_t where = 0;
2336 2336 ipnetif_t *newif;
2337 2337
2338 2338 mutex_enter(&ips->ips_avl_lock);
2339 2339 newif = avl_find(&ips->ips_avl_by_shared, (void *)key, &where);
2340 2340 if (newif != NULL) {
2341 2341 ipnetif_refhold(newif);
2342 2342 newif->if_sharecnt++;
2343 2343 mutex_exit(&ips->ips_avl_lock);
2344 2344 return (newif);
2345 2345 }
2346 2346
2347 2347 newif = ipnet_alloc_if(ips);
2348 2348 if (newif == NULL) {
2349 2349 mutex_exit(&ips->ips_avl_lock);
2350 2350 return (NULL);
2351 2351 }
2352 2352
2353 2353 newif->if_refcnt = 1;
2354 2354 newif->if_sharecnt = 1;
2355 2355 newif->if_zoneid = zoneid;
2356 2356 (void) strlcpy(newif->if_name, ifp->if_name, LIFNAMSIZ);
2357 2357 newif->if_flags = ifp->if_flags & IPNETIF_LOOPBACK;
2358 2358 newif->if_index = ifp->if_index;
2359 2359
2360 2360 avl_insert(&ips->ips_avl_by_shared, newif, where);
2361 2361 mutex_exit(&ips->ips_avl_lock);
2362 2362
2363 2363 return (newif);
2364 2364 }
2365 2365
2366 2366 static void
2367 2367 ipnetif_clone_release(ipnetif_t *ipnetif)
2368 2368 {
2369 2369 boolean_t dofree = B_FALSE;
2370 2370 boolean_t doremove = B_FALSE;
2371 2371 ipnet_stack_t *ips = ipnetif->if_stackp;
2372 2372
2373 2373 mutex_enter(&ipnetif->if_reflock);
2374 2374 ASSERT(ipnetif->if_refcnt > 0);
2375 2375 if (--ipnetif->if_refcnt == 0)
2376 2376 dofree = B_TRUE;
2377 2377 ASSERT(ipnetif->if_sharecnt > 0);
2378 2378 if (--ipnetif->if_sharecnt == 0)
2379 2379 doremove = B_TRUE;
2380 2380 mutex_exit(&ipnetif->if_reflock);
2381 2381 if (doremove) {
2382 2382 mutex_enter(&ips->ips_avl_lock);
2383 2383 avl_remove(&ips->ips_avl_by_shared, ipnetif);
2384 2384 mutex_exit(&ips->ips_avl_lock);
2385 2385 }
2386 2386 if (dofree) {
2387 2387 ASSERT(ipnetif->if_sharecnt == 0);
2388 2388 ipnetif_free(ipnetif);
2389 2389 }
2390 2390 }
↓ open down ↓ |
2174 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX