5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 1990 Mentat Inc.
24 */
25
26 /*
27 * This file contains the interface control functions for IP.
28 */
29
30 #include <sys/types.h>
31 #include <sys/stream.h>
32 #include <sys/dlpi.h>
33 #include <sys/stropts.h>
34 #include <sys/strsun.h>
35 #include <sys/sysmacros.h>
36 #include <sys/strsubr.h>
37 #include <sys/strlog.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/cmn_err.h>
41 #include <sys/kstat.h>
42 #include <sys/debug.h>
43 #include <sys/zone.h>
44 #include <sys/sunldi.h>
205 static void ill_capability_zerocopy_reset_fill(ill_t *, mblk_t *);
206 static void ill_capability_dld_reset_fill(ill_t *, mblk_t *);
207 static void ill_capability_dld_ack(ill_t *, mblk_t *,
208 dl_capability_sub_t *);
209 static void ill_capability_dld_enable(ill_t *);
210 static void ill_capability_ack_thr(void *);
211 static void ill_capability_lso_enable(ill_t *);
212
213 static ill_t *ill_prev_usesrc(ill_t *);
214 static int ill_relink_usesrc_ills(ill_t *, ill_t *, uint_t);
215 static void ill_disband_usesrc_group(ill_t *);
216 static void ip_sioctl_garp_reply(mblk_t *, ill_t *, void *, int);
217
218 #ifdef DEBUG
219 static void ill_trace_cleanup(const ill_t *);
220 static void ipif_trace_cleanup(const ipif_t *);
221 #endif
222
223 static void ill_dlpi_clear_deferred(ill_t *ill);
224
225 /*
226 * if we go over the memory footprint limit more than once in this msec
227 * interval, we'll start pruning aggressively.
228 */
229 int ip_min_frag_prune_time = 0;
230
231 static ipft_t ip_ioctl_ftbl[] = {
232 { IP_IOC_IRE_DELETE, ip_ire_delete, sizeof (ipid_t), 0 },
233 { IP_IOC_IRE_DELETE_NO_REPLY, ip_ire_delete, sizeof (ipid_t),
234 IPFT_F_NO_REPLY },
235 { IP_IOC_RTS_REQUEST, ip_rts_request, 0, IPFT_F_SELF_REPLY },
236 { 0 }
237 };
238
239 /* Simple ICMP IP Header Template */
240 static ipha_t icmp_ipha = {
241 IP_SIMPLE_HDR_VERSION, 0, 0, 0, 0, 0, IPPROTO_ICMP
242 };
243
244 static uchar_t ip_six_byte_all_ones[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
263 ip_ib_v4_mapping, ip_ib_v6_mapping, ip_ib_v6intfid,
264 ip_nodef_v6intfid },
265 { DL_IPV4, IFT_IPV4, IPPROTO_ENCAP, IPPROTO_IPV6,
266 ip_mbcast_mapping, ip_mbcast_mapping, ip_ipv4_v6intfid,
267 ip_ipv4_v6destintfid },
268 { DL_IPV6, IFT_IPV6, IPPROTO_ENCAP, IPPROTO_IPV6,
269 ip_mbcast_mapping, ip_mbcast_mapping, ip_ipv6_v6intfid,
270 ip_ipv6_v6destintfid },
271 { DL_6TO4, IFT_6TO4, IPPROTO_ENCAP, IPPROTO_IPV6,
272 ip_mbcast_mapping, ip_mbcast_mapping, ip_ipv4_v6intfid,
273 ip_nodef_v6intfid },
274 { SUNW_DL_VNI, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
275 NULL, NULL, ip_nodef_v6intfid, ip_nodef_v6intfid },
276 { SUNW_DL_IPMP, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
277 NULL, NULL, ip_ipmp_v6intfid, ip_nodef_v6intfid },
278 { DL_OTHER, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
279 ip_ether_v4_mapping, ip_ether_v6_mapping, ip_nodef_v6intfid,
280 ip_nodef_v6intfid }
281 };
282
283 static ill_t ill_null; /* Empty ILL for init. */
284 char ipif_loopback_name[] = "lo0";
285
286 /* These are used by all IP network modules. */
287 sin6_t sin6_null; /* Zero address for quick clears */
288 sin_t sin_null; /* Zero address for quick clears */
289
290 /* When set search for unused ipif_seqid */
291 static ipif_t ipif_zero;
292
293 /*
294 * ppa arena is created after these many
295 * interfaces have been plumbed.
296 */
297 uint_t ill_no_arena = 12; /* Setable in /etc/system */
298
299 /*
300 * Allocate per-interface mibs.
301 * Returns true if ok. False otherwise.
302 * ipsq may not yet be allocated (loopback case ).
303 */
3312 ill->ill_phyint->phyint_ipsq = ipsq;
3313 ipx = ipsq->ipsq_xop = &ipsq->ipsq_ownxop;
3314 ipx->ipx_ipsq = ipsq;
3315 ipsq->ipsq_next = ipsq;
3316 ipsq->ipsq_phyint = ill->ill_phyint;
3317 mutex_init(&ipsq->ipsq_lock, NULL, MUTEX_DEFAULT, 0);
3318 mutex_init(&ipx->ipx_lock, NULL, MUTEX_DEFAULT, 0);
3319 ipsq->ipsq_ipst = ill->ill_ipst; /* No netstack_hold */
3320 if (enter) {
3321 ipx->ipx_writer = curthread;
3322 ipx->ipx_forced = B_FALSE;
3323 ipx->ipx_reentry_cnt = 1;
3324 #ifdef DEBUG
3325 ipx->ipx_depth = getpcstack(ipx->ipx_stack, IPX_STACK_DEPTH);
3326 #endif
3327 }
3328 return (B_TRUE);
3329 }
3330
3331 /*
3332 * ill_init is called by ip_open when a device control stream is opened.
3333 * It does a few initializations, and shoots a DL_INFO_REQ message down
3334 * to the driver. The response is later picked up in ip_rput_dlpi and
3335 * used to set up default mechanisms for talking to the driver. (Always
3336 * called as writer.)
3337 *
3338 * If this function returns error, ip_open will call ip_close which in
3339 * turn will call ill_delete to clean up any memory allocated here that
3340 * is not yet freed.
3341 */
3342 int
3343 ill_init(queue_t *q, ill_t *ill)
3344 {
3345 int count;
3346 dl_info_req_t *dlir;
3347 mblk_t *info_mp;
3348 uchar_t *frag_ptr;
3349
3350 /*
3351 * The ill is initialized to zero by mi_alloc*(). In addition
3352 * some fields already contain valid values, initialized in
3353 * ip_open(), before we reach here.
3354 */
3355 mutex_init(&ill->ill_lock, NULL, MUTEX_DEFAULT, 0);
3356 mutex_init(&ill->ill_saved_ire_lock, NULL, MUTEX_DEFAULT, NULL);
3357 ill->ill_saved_ire_cnt = 0;
3358
3359 ill->ill_rq = q;
3360 ill->ill_wq = WR(q);
3361
3362 info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)),
3363 BPRI_HI);
3364 if (info_mp == NULL)
3365 return (ENOMEM);
3366
3367 /*
3368 * Allocate sufficient space to contain our fragment hash table and
3369 * the device name.
3370 */
3371 frag_ptr = (uchar_t *)mi_zalloc(ILL_FRAG_HASH_TBL_SIZE + 2 * LIFNAMSIZ);
3372 if (frag_ptr == NULL) {
3373 freemsg(info_mp);
3374 return (ENOMEM);
3375 }
3376 ill->ill_frag_ptr = frag_ptr;
3377 ill->ill_frag_free_num_pkts = 0;
3378 ill->ill_last_frag_clean_time = 0;
3379 ill->ill_frag_hash_tbl = (ipfb_t *)frag_ptr;
3380 ill->ill_name = (char *)(frag_ptr + ILL_FRAG_HASH_TBL_SIZE);
3381 for (count = 0; count < ILL_FRAG_HASH_TBL_COUNT; count++) {
3382 mutex_init(&ill->ill_frag_hash_tbl[count].ipfb_lock,
3383 NULL, MUTEX_DEFAULT, NULL);
3384 }
3385
3386 ill->ill_phyint = (phyint_t *)mi_zalloc(sizeof (phyint_t));
3387 if (ill->ill_phyint == NULL) {
3388 freemsg(info_mp);
3389 mi_free(frag_ptr);
3390 return (ENOMEM);
3391 }
3392
3393 mutex_init(&ill->ill_phyint->phyint_lock, NULL, MUTEX_DEFAULT, 0);
3394 /*
3395 * For now pretend this is a v4 ill. We need to set phyint_ill*
3396 * at this point because of the following reason. If we can't
3397 * enter the ipsq at some point and cv_wait, the writer that
3398 * wakes us up tries to locate us using the list of all phyints
3399 * in an ipsq and the ills from the phyint thru the phyint_ill*.
3400 * If we don't set it now, we risk a missed wakeup.
3401 */
3402 ill->ill_phyint->phyint_illv4 = ill;
3403 ill->ill_ppa = UINT_MAX;
3404 list_create(&ill->ill_nce, sizeof (nce_t), offsetof(nce_t, nce_node));
3405
3406 ill_set_inputfn(ill);
3407
3408 if (!ipsq_init(ill, B_TRUE)) {
3409 freemsg(info_mp);
3410 mi_free(frag_ptr);
3411 mi_free(ill->ill_phyint);
3412 return (ENOMEM);
3413 }
3414
3415 ill->ill_state_flags |= ILL_LL_SUBNET_PENDING;
3416
3417 /* Frag queue limit stuff */
3418 ill->ill_frag_count = 0;
3419 ill->ill_ipf_gen = 0;
3420
3421 rw_init(&ill->ill_mcast_lock, NULL, RW_DEFAULT, NULL);
3422 mutex_init(&ill->ill_mcast_serializer, NULL, MUTEX_DEFAULT, NULL);
3423 ill->ill_global_timer = INFINITY;
3424 ill->ill_mcast_v1_time = ill->ill_mcast_v2_time = 0;
3425 ill->ill_mcast_v1_tset = ill->ill_mcast_v2_tset = 0;
3426 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
3427 ill->ill_mcast_qi = MCAST_DEF_QUERY_INTERVAL;
3428
3429 /*
3430 * Initialize IPv6 configuration variables. The IP module is always
3431 * opened as an IPv4 module. Instead tracking down the cases where
3432 * it switches to do ipv6, we'll just initialize the IPv6 configuration
3433 * here for convenience, this has no effect until the ill is set to do
3434 * IPv6.
3435 */
3436 ill->ill_reachable_time = ND_REACHABLE_TIME;
3437 ill->ill_xmit_count = ND_MAX_MULTICAST_SOLICIT;
3438 ill->ill_max_buf = ND_MAX_Q;
3439 ill->ill_refcnt = 0;
3440
3441 /* Send down the Info Request to the driver. */
3442 info_mp->b_datap->db_type = M_PCPROTO;
3443 dlir = (dl_info_req_t *)info_mp->b_rptr;
3444 info_mp->b_wptr = (uchar_t *)&dlir[1];
3445 dlir->dl_primitive = DL_INFO_REQ;
3446
3447 ill->ill_dlpi_pending = DL_PRIM_INVAL;
3448
3449 qprocson(q);
3450 ill_dlpi_send(ill, info_mp);
3451
3452 return (0);
3453 }
3454
3455 /*
3456 * ill_dls_info
3457 * creates datalink socket info from the device.
3458 */
3459 int
3460 ill_dls_info(struct sockaddr_dl *sdl, const ill_t *ill)
3668 /*
3669 * Couldn't find it. Does this happen to be a lookup for the
3670 * loopback device and are we allowed to allocate it?
3671 */
3672 if (!isloopback || !do_alloc)
3673 return (NULL);
3674
3675 rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
3676 ill = ill_find_by_name(name, isv6, ipst);
3677 if (ill != NULL) {
3678 rw_exit(&ipst->ips_ill_g_lock);
3679 return (ill);
3680 }
3681
3682 /* Create the loopback device on demand */
3683 ill = (ill_t *)(mi_alloc(sizeof (ill_t) +
3684 sizeof (ipif_loopback_name), BPRI_MED));
3685 if (ill == NULL)
3686 goto done;
3687
3688 *ill = ill_null;
3689 mutex_init(&ill->ill_lock, NULL, MUTEX_DEFAULT, NULL);
3690 ill->ill_ipst = ipst;
3691 list_create(&ill->ill_nce, sizeof (nce_t), offsetof(nce_t, nce_node));
3692 netstack_hold(ipst->ips_netstack);
3693 /*
3694 * For exclusive stacks we set the zoneid to zero
3695 * to make IP operate as if in the global zone.
3696 */
3697 ill->ill_zoneid = GLOBAL_ZONEID;
3698
3699 ill->ill_phyint = (phyint_t *)mi_zalloc(sizeof (phyint_t));
3700 if (ill->ill_phyint == NULL)
3701 goto done;
3702
3703 if (isv6)
3704 ill->ill_phyint->phyint_illv6 = ill;
3705 else
3706 ill->ill_phyint->phyint_illv4 = ill;
3707 mutex_init(&ill->ill_phyint->phyint_lock, NULL, MUTEX_DEFAULT, 0);
3708 phyint_flags_init(ill->ill_phyint, DL_LOOP);
3709
3710 if (isv6) {
3711 ill->ill_isv6 = B_TRUE;
3712 ill->ill_max_frag = ip_loopback_mtu_v6plus;
3713 } else {
3714 ill->ill_max_frag = ip_loopback_mtuplus;
3715 }
3716 if (!ill_allocate_mibs(ill))
3717 goto done;
3718 ill->ill_current_frag = ill->ill_max_frag;
3719 ill->ill_mtu = ill->ill_max_frag; /* Initial value */
3720 ill->ill_mc_mtu = ill->ill_mtu;
3721 /*
3722 * ipif_loopback_name can't be pointed at directly because its used
3723 * by both the ipv4 and ipv6 interfaces. When the ill is removed
3724 * from the glist, ill_glist_delete() sets the first character of
3725 * ill_name to '\0'.
3726 */
3727 ill->ill_name = (char *)ill + sizeof (*ill);
3728 (void) strcpy(ill->ill_name, ipif_loopback_name);
3729 ill->ill_name_length = sizeof (ipif_loopback_name);
3730 /* Set ill_dlpi_pending for ipsq_current_finish() to work properly */
3731 ill->ill_dlpi_pending = DL_PRIM_INVAL;
3732
3733 rw_init(&ill->ill_mcast_lock, NULL, RW_DEFAULT, NULL);
3734 mutex_init(&ill->ill_mcast_serializer, NULL, MUTEX_DEFAULT, NULL);
3735 ill->ill_global_timer = INFINITY;
3736 ill->ill_mcast_v1_time = ill->ill_mcast_v2_time = 0;
3737 ill->ill_mcast_v1_tset = ill->ill_mcast_v2_tset = 0;
3738 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
3739 ill->ill_mcast_qi = MCAST_DEF_QUERY_INTERVAL;
3740
3741 /* No resolver here. */
3742 ill->ill_net_type = IRE_LOOPBACK;
3743
3744 /* Initialize the ipsq */
3745 if (!ipsq_init(ill, B_FALSE))
3746 goto done;
3747
3748 ipif = ipif_allocate(ill, 0L, IRE_LOOPBACK, B_TRUE, B_TRUE, NULL);
3749 if (ipif == NULL)
3750 goto done;
3751
3752 ill->ill_flags = ILLF_MULTICAST;
3753
3754 ov6addr = ipif->ipif_v6lcl_addr;
3755 /* Set up default loopback address and mask. */
3756 if (!isv6) {
3757 ipaddr_t inaddr_loopback = htonl(INADDR_LOOPBACK);
3758
3759 IN6_IPADDR_TO_V4MAPPED(inaddr_loopback, &ipif->ipif_v6lcl_addr);
3760 V4MASK_TO_V6(htonl(IN_CLASSA_NET), ipif->ipif_v6net_mask);
3761 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask,
3762 ipif->ipif_v6subnet);
3763 ill->ill_flags |= ILLF_IPV4;
3764 } else {
3765 ipif->ipif_v6lcl_addr = ipv6_loopback;
3766 ipif->ipif_v6net_mask = ipv6_all_ones;
3767 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask,
3768 ipif->ipif_v6subnet);
3769 ill->ill_flags |= ILLF_IPV6;
3770 }
3771
3772 /*
3773 * Chain us in at the end of the ill list. hold the ill
3774 * before we make it globally visible. 1 for the lookup.
3775 */
3776 ill->ill_refcnt = 0;
3777 ill_refhold(ill);
3778
3779 ill->ill_frag_count = 0;
3780 ill->ill_frag_free_num_pkts = 0;
3781 ill->ill_last_frag_clean_time = 0;
3782
3783 ipsq = ill->ill_phyint->phyint_ipsq;
3784
3785 ill_set_inputfn(ill);
3786
3787 if (ill_glist_insert(ill, "lo", isv6) != 0)
3788 cmn_err(CE_PANIC, "cannot insert loopback interface");
3789
3790 /* Let SCTP know so that it can add this to its list */
3791 sctp_update_ill(ill, SCTP_ILL_INSERT);
3792
3793 /*
3794 * We have already assigned ipif_v6lcl_addr above, but we need to
3795 * call sctp_update_ipif_addr() after SCTP_ILL_INSERT, which
3796 * requires to be after ill_glist_insert() since we need the
3797 * ill_index set. Pass on ipv6_loopback as the old address.
3798 */
3799 sctp_update_ipif_addr(ipif, ov6addr);
3800
3801 ip_rts_newaddrmsg(RTM_CHGADDR, 0, ipif, RTSQ_DEFAULT);
3802
3803 /*
3804 * ill_glist_insert() -> ill_phyint_reinit() may have merged IPSQs.
3805 * If so, free our original one.
3806 */
|
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 1990 Mentat Inc.
24 */
25 /*
26 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
27 */
28
29 /*
30 * This file contains the interface control functions for IP.
31 */
32
33 #include <sys/types.h>
34 #include <sys/stream.h>
35 #include <sys/dlpi.h>
36 #include <sys/stropts.h>
37 #include <sys/strsun.h>
38 #include <sys/sysmacros.h>
39 #include <sys/strsubr.h>
40 #include <sys/strlog.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/cmn_err.h>
44 #include <sys/kstat.h>
45 #include <sys/debug.h>
46 #include <sys/zone.h>
47 #include <sys/sunldi.h>
208 static void ill_capability_zerocopy_reset_fill(ill_t *, mblk_t *);
209 static void ill_capability_dld_reset_fill(ill_t *, mblk_t *);
210 static void ill_capability_dld_ack(ill_t *, mblk_t *,
211 dl_capability_sub_t *);
212 static void ill_capability_dld_enable(ill_t *);
213 static void ill_capability_ack_thr(void *);
214 static void ill_capability_lso_enable(ill_t *);
215
216 static ill_t *ill_prev_usesrc(ill_t *);
217 static int ill_relink_usesrc_ills(ill_t *, ill_t *, uint_t);
218 static void ill_disband_usesrc_group(ill_t *);
219 static void ip_sioctl_garp_reply(mblk_t *, ill_t *, void *, int);
220
221 #ifdef DEBUG
222 static void ill_trace_cleanup(const ill_t *);
223 static void ipif_trace_cleanup(const ipif_t *);
224 #endif
225
226 static void ill_dlpi_clear_deferred(ill_t *ill);
227
228 static void phyint_flags_init(phyint_t *, t_uscalar_t);
229
230 /*
231 * if we go over the memory footprint limit more than once in this msec
232 * interval, we'll start pruning aggressively.
233 */
234 int ip_min_frag_prune_time = 0;
235
236 static ipft_t ip_ioctl_ftbl[] = {
237 { IP_IOC_IRE_DELETE, ip_ire_delete, sizeof (ipid_t), 0 },
238 { IP_IOC_IRE_DELETE_NO_REPLY, ip_ire_delete, sizeof (ipid_t),
239 IPFT_F_NO_REPLY },
240 { IP_IOC_RTS_REQUEST, ip_rts_request, 0, IPFT_F_SELF_REPLY },
241 { 0 }
242 };
243
244 /* Simple ICMP IP Header Template */
245 static ipha_t icmp_ipha = {
246 IP_SIMPLE_HDR_VERSION, 0, 0, 0, 0, 0, IPPROTO_ICMP
247 };
248
249 static uchar_t ip_six_byte_all_ones[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
268 ip_ib_v4_mapping, ip_ib_v6_mapping, ip_ib_v6intfid,
269 ip_nodef_v6intfid },
270 { DL_IPV4, IFT_IPV4, IPPROTO_ENCAP, IPPROTO_IPV6,
271 ip_mbcast_mapping, ip_mbcast_mapping, ip_ipv4_v6intfid,
272 ip_ipv4_v6destintfid },
273 { DL_IPV6, IFT_IPV6, IPPROTO_ENCAP, IPPROTO_IPV6,
274 ip_mbcast_mapping, ip_mbcast_mapping, ip_ipv6_v6intfid,
275 ip_ipv6_v6destintfid },
276 { DL_6TO4, IFT_6TO4, IPPROTO_ENCAP, IPPROTO_IPV6,
277 ip_mbcast_mapping, ip_mbcast_mapping, ip_ipv4_v6intfid,
278 ip_nodef_v6intfid },
279 { SUNW_DL_VNI, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
280 NULL, NULL, ip_nodef_v6intfid, ip_nodef_v6intfid },
281 { SUNW_DL_IPMP, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
282 NULL, NULL, ip_ipmp_v6intfid, ip_nodef_v6intfid },
283 { DL_OTHER, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
284 ip_ether_v4_mapping, ip_ether_v6_mapping, ip_nodef_v6intfid,
285 ip_nodef_v6intfid }
286 };
287
288 char ipif_loopback_name[] = "lo0";
289
290 /* These are used by all IP network modules. */
291 sin6_t sin6_null; /* Zero address for quick clears */
292 sin_t sin_null; /* Zero address for quick clears */
293
294 /* When set search for unused ipif_seqid */
295 static ipif_t ipif_zero;
296
297 /*
298 * ppa arena is created after these many
299 * interfaces have been plumbed.
300 */
301 uint_t ill_no_arena = 12; /* Setable in /etc/system */
302
303 /*
304 * Allocate per-interface mibs.
305 * Returns true if ok. False otherwise.
306 * ipsq may not yet be allocated (loopback case ).
307 */
3316 ill->ill_phyint->phyint_ipsq = ipsq;
3317 ipx = ipsq->ipsq_xop = &ipsq->ipsq_ownxop;
3318 ipx->ipx_ipsq = ipsq;
3319 ipsq->ipsq_next = ipsq;
3320 ipsq->ipsq_phyint = ill->ill_phyint;
3321 mutex_init(&ipsq->ipsq_lock, NULL, MUTEX_DEFAULT, 0);
3322 mutex_init(&ipx->ipx_lock, NULL, MUTEX_DEFAULT, 0);
3323 ipsq->ipsq_ipst = ill->ill_ipst; /* No netstack_hold */
3324 if (enter) {
3325 ipx->ipx_writer = curthread;
3326 ipx->ipx_forced = B_FALSE;
3327 ipx->ipx_reentry_cnt = 1;
3328 #ifdef DEBUG
3329 ipx->ipx_depth = getpcstack(ipx->ipx_stack, IPX_STACK_DEPTH);
3330 #endif
3331 }
3332 return (B_TRUE);
3333 }
3334
3335 /*
3336 * Here we perform initialisation of the ill_t common to both regular
3337 * interface ILLs and the special loopback ILL created by ill_lookup_on_name.
3338 */
3339 static int
3340 ill_init_common(ill_t *ill, queue_t *q, boolean_t isv6, boolean_t is_loopback,
3341 boolean_t ipsq_enter)
3342 {
3343 int count;
3344 uchar_t *frag_ptr;
3345
3346 mutex_init(&ill->ill_lock, NULL, MUTEX_DEFAULT, 0);
3347 mutex_init(&ill->ill_saved_ire_lock, NULL, MUTEX_DEFAULT, NULL);
3348 ill->ill_saved_ire_cnt = 0;
3349
3350 if (is_loopback) {
3351 ill->ill_max_frag = isv6 ? ip_loopback_mtu_v6plus :
3352 ip_loopback_mtuplus;
3353 /*
3354 * No resolver here.
3355 */
3356 ill->ill_net_type = IRE_LOOPBACK;
3357 } else {
3358 ill->ill_rq = q;
3359 ill->ill_wq = WR(q);
3360 ill->ill_ppa = UINT_MAX;
3361 }
3362
3363 ill->ill_isv6 = isv6;
3364
3365 /*
3366 * Allocate sufficient space to contain our fragment hash table and
3367 * the device name.
3368 */
3369 frag_ptr = (uchar_t *)mi_zalloc(ILL_FRAG_HASH_TBL_SIZE + 2 * LIFNAMSIZ);
3370 if (frag_ptr == NULL)
3371 return (ENOMEM);
3372 ill->ill_frag_ptr = frag_ptr;
3373 ill->ill_frag_free_num_pkts = 0;
3374 ill->ill_last_frag_clean_time = 0;
3375 ill->ill_frag_hash_tbl = (ipfb_t *)frag_ptr;
3376 ill->ill_name = (char *)(frag_ptr + ILL_FRAG_HASH_TBL_SIZE);
3377 for (count = 0; count < ILL_FRAG_HASH_TBL_COUNT; count++) {
3378 mutex_init(&ill->ill_frag_hash_tbl[count].ipfb_lock,
3379 NULL, MUTEX_DEFAULT, NULL);
3380 }
3381
3382 ill->ill_phyint = (phyint_t *)mi_zalloc(sizeof (phyint_t));
3383 if (ill->ill_phyint == NULL) {
3384 mi_free(frag_ptr);
3385 return (ENOMEM);
3386 }
3387
3388 mutex_init(&ill->ill_phyint->phyint_lock, NULL, MUTEX_DEFAULT, 0);
3389 if (isv6) {
3390 ill->ill_phyint->phyint_illv6 = ill;
3391 } else {
3392 ill->ill_phyint->phyint_illv4 = ill;
3393 }
3394 if (is_loopback) {
3395 phyint_flags_init(ill->ill_phyint, DL_LOOP);
3396 }
3397
3398 list_create(&ill->ill_nce, sizeof (nce_t), offsetof(nce_t, nce_node));
3399
3400 ill_set_inputfn(ill);
3401
3402 if (!ipsq_init(ill, ipsq_enter)) {
3403 mi_free(frag_ptr);
3404 mi_free(ill->ill_phyint);
3405 return (ENOMEM);
3406 }
3407
3408 /* Frag queue limit stuff */
3409 ill->ill_frag_count = 0;
3410 ill->ill_ipf_gen = 0;
3411
3412 rw_init(&ill->ill_mcast_lock, NULL, RW_DEFAULT, NULL);
3413 mutex_init(&ill->ill_mcast_serializer, NULL, MUTEX_DEFAULT, NULL);
3414 ill->ill_global_timer = INFINITY;
3415 ill->ill_mcast_v1_time = ill->ill_mcast_v2_time = 0;
3416 ill->ill_mcast_v1_tset = ill->ill_mcast_v2_tset = 0;
3417 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
3418 ill->ill_mcast_qi = MCAST_DEF_QUERY_INTERVAL;
3419
3420 /*
3421 * Initialize IPv6 configuration variables. The IP module is always
3422 * opened as an IPv4 module. Instead tracking down the cases where
3423 * it switches to do ipv6, we'll just initialize the IPv6 configuration
3424 * here for convenience, this has no effect until the ill is set to do
3425 * IPv6.
3426 */
3427 ill->ill_reachable_time = ND_REACHABLE_TIME;
3428 ill->ill_xmit_count = ND_MAX_MULTICAST_SOLICIT;
3429 ill->ill_max_buf = ND_MAX_Q;
3430 ill->ill_refcnt = 0;
3431
3432 return (0);
3433 }
3434
3435 /*
3436 * ill_init is called by ip_open when a device control stream is opened.
3437 * It does a few initializations, and shoots a DL_INFO_REQ message down
3438 * to the driver. The response is later picked up in ip_rput_dlpi and
3439 * used to set up default mechanisms for talking to the driver. (Always
3440 * called as writer.)
3441 *
3442 * If this function returns error, ip_open will call ip_close which in
3443 * turn will call ill_delete to clean up any memory allocated here that
3444 * is not yet freed.
3445 *
3446 * Note: ill_ipst and ill_zoneid must be set before calling ill_init.
3447 */
3448 int
3449 ill_init(queue_t *q, ill_t *ill)
3450 {
3451 int ret;
3452 dl_info_req_t *dlir;
3453 mblk_t *info_mp;
3454
3455 info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)),
3456 BPRI_HI);
3457 if (info_mp == NULL)
3458 return (ENOMEM);
3459
3460 /*
3461 * The ill is initialized to zero by mi_alloc*(). In addition
3462 * some fields already contain valid values, initialized in
3463 * ip_open(), before we reach here.
3464 *
3465 * For now pretend this is a v4 ill. We need to set phyint_ill*
3466 * at this point because of the following reason. If we can't
3467 * enter the ipsq at some point and cv_wait, the writer that
3468 * wakes us up tries to locate us using the list of all phyints
3469 * in an ipsq and the ills from the phyint thru the phyint_ill*.
3470 * If we don't set it now, we risk a missed wakeup.
3471 */
3472 if ((ret = ill_init_common(ill, q, B_FALSE, B_FALSE, B_TRUE)) != 0) {
3473 freemsg(info_mp);
3474 return (ret);
3475 }
3476
3477 ill->ill_state_flags |= ILL_LL_SUBNET_PENDING;
3478
3479 /* Send down the Info Request to the driver. */
3480 info_mp->b_datap->db_type = M_PCPROTO;
3481 dlir = (dl_info_req_t *)info_mp->b_rptr;
3482 info_mp->b_wptr = (uchar_t *)&dlir[1];
3483 dlir->dl_primitive = DL_INFO_REQ;
3484
3485 ill->ill_dlpi_pending = DL_PRIM_INVAL;
3486
3487 qprocson(q);
3488 ill_dlpi_send(ill, info_mp);
3489
3490 return (0);
3491 }
3492
3493 /*
3494 * ill_dls_info
3495 * creates datalink socket info from the device.
3496 */
3497 int
3498 ill_dls_info(struct sockaddr_dl *sdl, const ill_t *ill)
3706 /*
3707 * Couldn't find it. Does this happen to be a lookup for the
3708 * loopback device and are we allowed to allocate it?
3709 */
3710 if (!isloopback || !do_alloc)
3711 return (NULL);
3712
3713 rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
3714 ill = ill_find_by_name(name, isv6, ipst);
3715 if (ill != NULL) {
3716 rw_exit(&ipst->ips_ill_g_lock);
3717 return (ill);
3718 }
3719
3720 /* Create the loopback device on demand */
3721 ill = (ill_t *)(mi_alloc(sizeof (ill_t) +
3722 sizeof (ipif_loopback_name), BPRI_MED));
3723 if (ill == NULL)
3724 goto done;
3725
3726 bzero(ill, sizeof (*ill));
3727 ill->ill_ipst = ipst;
3728 netstack_hold(ipst->ips_netstack);
3729 /*
3730 * For exclusive stacks we set the zoneid to zero
3731 * to make IP operate as if in the global zone.
3732 */
3733 ill->ill_zoneid = GLOBAL_ZONEID;
3734
3735 if (ill_init_common(ill, NULL, isv6, B_TRUE, B_FALSE) != 0)
3736 goto done;
3737
3738 if (!ill_allocate_mibs(ill))
3739 goto done;
3740
3741 ill->ill_current_frag = ill->ill_max_frag;
3742 ill->ill_mtu = ill->ill_max_frag; /* Initial value */
3743 ill->ill_mc_mtu = ill->ill_mtu;
3744 /*
3745 * ipif_loopback_name can't be pointed at directly because its used
3746 * by both the ipv4 and ipv6 interfaces. When the ill is removed
3747 * from the glist, ill_glist_delete() sets the first character of
3748 * ill_name to '\0'.
3749 */
3750 ill->ill_name = (char *)ill + sizeof (*ill);
3751 (void) strcpy(ill->ill_name, ipif_loopback_name);
3752 ill->ill_name_length = sizeof (ipif_loopback_name);
3753 /* Set ill_dlpi_pending for ipsq_current_finish() to work properly */
3754 ill->ill_dlpi_pending = DL_PRIM_INVAL;
3755
3756 ipif = ipif_allocate(ill, 0L, IRE_LOOPBACK, B_TRUE, B_TRUE, NULL);
3757 if (ipif == NULL)
3758 goto done;
3759
3760 ill->ill_flags = ILLF_MULTICAST;
3761
3762 ov6addr = ipif->ipif_v6lcl_addr;
3763 /* Set up default loopback address and mask. */
3764 if (!isv6) {
3765 ipaddr_t inaddr_loopback = htonl(INADDR_LOOPBACK);
3766
3767 IN6_IPADDR_TO_V4MAPPED(inaddr_loopback, &ipif->ipif_v6lcl_addr);
3768 V4MASK_TO_V6(htonl(IN_CLASSA_NET), ipif->ipif_v6net_mask);
3769 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask,
3770 ipif->ipif_v6subnet);
3771 ill->ill_flags |= ILLF_IPV4;
3772 } else {
3773 ipif->ipif_v6lcl_addr = ipv6_loopback;
3774 ipif->ipif_v6net_mask = ipv6_all_ones;
3775 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask,
3776 ipif->ipif_v6subnet);
3777 ill->ill_flags |= ILLF_IPV6;
3778 }
3779
3780 /*
3781 * Chain us in at the end of the ill list. hold the ill
3782 * before we make it globally visible. 1 for the lookup.
3783 */
3784 ill_refhold(ill);
3785
3786 ipsq = ill->ill_phyint->phyint_ipsq;
3787
3788 if (ill_glist_insert(ill, "lo", isv6) != 0)
3789 cmn_err(CE_PANIC, "cannot insert loopback interface");
3790
3791 /* Let SCTP know so that it can add this to its list */
3792 sctp_update_ill(ill, SCTP_ILL_INSERT);
3793
3794 /*
3795 * We have already assigned ipif_v6lcl_addr above, but we need to
3796 * call sctp_update_ipif_addr() after SCTP_ILL_INSERT, which
3797 * requires to be after ill_glist_insert() since we need the
3798 * ill_index set. Pass on ipv6_loopback as the old address.
3799 */
3800 sctp_update_ipif_addr(ipif, ov6addr);
3801
3802 ip_rts_newaddrmsg(RTM_CHGADDR, 0, ipif, RTSQ_DEFAULT);
3803
3804 /*
3805 * ill_glist_insert() -> ill_phyint_reinit() may have merged IPSQs.
3806 * If so, free our original one.
3807 */
|