Print this page
XXX function ordering, typos
XXX minor typos
XXX ill_init_common (OS-2239)


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 1990 Mentat Inc.
  24  */



  25 
  26 /*
  27  * This file contains the interface control functions for IP.
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/stream.h>
  32 #include <sys/dlpi.h>
  33 #include <sys/stropts.h>
  34 #include <sys/strsun.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/strsubr.h>
  37 #include <sys/strlog.h>
  38 #include <sys/ddi.h>
  39 #include <sys/sunddi.h>
  40 #include <sys/cmn_err.h>
  41 #include <sys/kstat.h>
  42 #include <sys/debug.h>
  43 #include <sys/zone.h>
  44 #include <sys/sunldi.h>


 205 static void ill_capability_zerocopy_reset_fill(ill_t *, mblk_t *);
 206 static void     ill_capability_dld_reset_fill(ill_t *, mblk_t *);
 207 static void     ill_capability_dld_ack(ill_t *, mblk_t *,
 208                     dl_capability_sub_t *);
 209 static void     ill_capability_dld_enable(ill_t *);
 210 static void     ill_capability_ack_thr(void *);
 211 static void     ill_capability_lso_enable(ill_t *);
 212 
 213 static ill_t    *ill_prev_usesrc(ill_t *);
 214 static int      ill_relink_usesrc_ills(ill_t *, ill_t *, uint_t);
 215 static void     ill_disband_usesrc_group(ill_t *);
 216 static void     ip_sioctl_garp_reply(mblk_t *, ill_t *, void *, int);
 217 
 218 #ifdef DEBUG
 219 static  void    ill_trace_cleanup(const ill_t *);
 220 static  void    ipif_trace_cleanup(const ipif_t *);
 221 #endif
 222 
 223 static  void    ill_dlpi_clear_deferred(ill_t *ill);
 224 


 225 /*
 226  * if we go over the memory footprint limit more than once in this msec
 227  * interval, we'll start pruning aggressively.
 228  */
 229 int ip_min_frag_prune_time = 0;
 230 
 231 static ipft_t   ip_ioctl_ftbl[] = {
 232         { IP_IOC_IRE_DELETE, ip_ire_delete, sizeof (ipid_t), 0 },
 233         { IP_IOC_IRE_DELETE_NO_REPLY, ip_ire_delete, sizeof (ipid_t),
 234                 IPFT_F_NO_REPLY },
 235         { IP_IOC_RTS_REQUEST, ip_rts_request, 0, IPFT_F_SELF_REPLY },
 236         { 0 }
 237 };
 238 
 239 /* Simple ICMP IP Header Template */
 240 static ipha_t icmp_ipha = {
 241         IP_SIMPLE_HDR_VERSION, 0, 0, 0, 0, 0, IPPROTO_ICMP
 242 };
 243 
 244 static uchar_t  ip_six_byte_all_ones[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };


 263             ip_ib_v4_mapping, ip_ib_v6_mapping, ip_ib_v6intfid,
 264             ip_nodef_v6intfid },
 265         { DL_IPV4, IFT_IPV4, IPPROTO_ENCAP, IPPROTO_IPV6,
 266             ip_mbcast_mapping, ip_mbcast_mapping, ip_ipv4_v6intfid,
 267             ip_ipv4_v6destintfid },
 268         { DL_IPV6, IFT_IPV6, IPPROTO_ENCAP, IPPROTO_IPV6,
 269             ip_mbcast_mapping, ip_mbcast_mapping, ip_ipv6_v6intfid,
 270             ip_ipv6_v6destintfid },
 271         { DL_6TO4, IFT_6TO4, IPPROTO_ENCAP, IPPROTO_IPV6,
 272             ip_mbcast_mapping, ip_mbcast_mapping, ip_ipv4_v6intfid,
 273             ip_nodef_v6intfid },
 274         { SUNW_DL_VNI, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
 275             NULL, NULL, ip_nodef_v6intfid, ip_nodef_v6intfid },
 276         { SUNW_DL_IPMP, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
 277             NULL, NULL, ip_ipmp_v6intfid, ip_nodef_v6intfid },
 278         { DL_OTHER, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
 279             ip_ether_v4_mapping, ip_ether_v6_mapping, ip_nodef_v6intfid,
 280             ip_nodef_v6intfid }
 281 };
 282 
 283 static ill_t    ill_null;               /* Empty ILL for init. */
 284 char    ipif_loopback_name[] = "lo0";
 285 
 286 /* These are used by all IP network modules. */
 287 sin6_t  sin6_null;      /* Zero address for quick clears */
 288 sin_t   sin_null;       /* Zero address for quick clears */
 289 
 290 /* When set search for unused ipif_seqid */
 291 static ipif_t   ipif_zero;
 292 
 293 /*
 294  * ppa arena is created after these many
 295  * interfaces have been plumbed.
 296  */
 297 uint_t  ill_no_arena = 12;      /* Setable in /etc/system */
 298 
 299 /*
 300  * Allocate per-interface mibs.
 301  * Returns true if ok. False otherwise.
 302  *  ipsq  may not yet be allocated (loopback case ).
 303  */


3312         ill->ill_phyint->phyint_ipsq = ipsq;
3313         ipx = ipsq->ipsq_xop = &ipsq->ipsq_ownxop;
3314         ipx->ipx_ipsq = ipsq;
3315         ipsq->ipsq_next = ipsq;
3316         ipsq->ipsq_phyint = ill->ill_phyint;
3317         mutex_init(&ipsq->ipsq_lock, NULL, MUTEX_DEFAULT, 0);
3318         mutex_init(&ipx->ipx_lock, NULL, MUTEX_DEFAULT, 0);
3319         ipsq->ipsq_ipst = ill->ill_ipst;  /* No netstack_hold */
3320         if (enter) {
3321                 ipx->ipx_writer = curthread;
3322                 ipx->ipx_forced = B_FALSE;
3323                 ipx->ipx_reentry_cnt = 1;
3324 #ifdef DEBUG
3325                 ipx->ipx_depth = getpcstack(ipx->ipx_stack, IPX_STACK_DEPTH);
3326 #endif
3327         }
3328         return (B_TRUE);
3329 }
3330 
3331 /*
3332  * ill_init is called by ip_open when a device control stream is opened.
3333  * It does a few initializations, and shoots a DL_INFO_REQ message down
3334  * to the driver.  The response is later picked up in ip_rput_dlpi and
3335  * used to set up default mechanisms for talking to the driver.  (Always
3336  * called as writer.)
3337  *
3338  * If this function returns error, ip_open will call ip_close which in
3339  * turn will call ill_delete to clean up any memory allocated here that
3340  * is not yet freed.
3341  */
3342 int
3343 ill_init(queue_t *q, ill_t *ill)

3344 {
3345         int     count;
3346         dl_info_req_t   *dlir;
3347         mblk_t  *info_mp;
3348         uchar_t *frag_ptr;
3349 
3350         /*
3351          * The ill is initialized to zero by mi_alloc*(). In addition
3352          * some fields already contain valid values, initialized in
3353          * ip_open(), before we reach here.
3354          */
3355         mutex_init(&ill->ill_lock, NULL, MUTEX_DEFAULT, 0);
3356         mutex_init(&ill->ill_saved_ire_lock, NULL, MUTEX_DEFAULT, NULL);
3357         ill->ill_saved_ire_cnt = 0;
3358 








3359         ill->ill_rq = q;
3360         ill->ill_wq = WR(q);


3361 
3362         info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)),
3363             BPRI_HI);
3364         if (info_mp == NULL)
3365                 return (ENOMEM);
3366 
3367         /*
3368          * Allocate sufficient space to contain our fragment hash table and
3369          * the device name.
3370          */
3371         frag_ptr = (uchar_t *)mi_zalloc(ILL_FRAG_HASH_TBL_SIZE + 2 * LIFNAMSIZ);
3372         if (frag_ptr == NULL) {
3373                 freemsg(info_mp);
3374                 return (ENOMEM);
3375         }
3376         ill->ill_frag_ptr = frag_ptr;
3377         ill->ill_frag_free_num_pkts = 0;
3378         ill->ill_last_frag_clean_time = 0;
3379         ill->ill_frag_hash_tbl = (ipfb_t *)frag_ptr;
3380         ill->ill_name = (char *)(frag_ptr + ILL_FRAG_HASH_TBL_SIZE);
3381         for (count = 0; count < ILL_FRAG_HASH_TBL_COUNT; count++) {
3382                 mutex_init(&ill->ill_frag_hash_tbl[count].ipfb_lock,
3383                     NULL, MUTEX_DEFAULT, NULL);
3384         }
3385 
3386         ill->ill_phyint = (phyint_t *)mi_zalloc(sizeof (phyint_t));
3387         if (ill->ill_phyint == NULL) {
3388                 freemsg(info_mp);
3389                 mi_free(frag_ptr);
3390                 return (ENOMEM);
3391         }
3392 
3393         mutex_init(&ill->ill_phyint->phyint_lock, NULL, MUTEX_DEFAULT, 0);
3394         /*
3395          * For now pretend this is a v4 ill. We need to set phyint_ill*
3396          * at this point because of the following reason. If we can't
3397          * enter the ipsq at some point and cv_wait, the writer that
3398          * wakes us up tries to locate us using the list of all phyints
3399          * in an ipsq and the ills from the phyint thru the phyint_ill*.
3400          * If we don't set it now, we risk a missed wakeup.
3401          */
3402         ill->ill_phyint->phyint_illv4 = ill;
3403         ill->ill_ppa = UINT_MAX;




3404         list_create(&ill->ill_nce, sizeof (nce_t), offsetof(nce_t, nce_node));
3405 
3406         ill_set_inputfn(ill);
3407 
3408         if (!ipsq_init(ill, B_TRUE)) {
3409                 freemsg(info_mp);
3410                 mi_free(frag_ptr);
3411                 mi_free(ill->ill_phyint);
3412                 return (ENOMEM);
3413         }
3414 
3415         ill->ill_state_flags |= ILL_LL_SUBNET_PENDING;
3416 
3417         /* Frag queue limit stuff */
3418         ill->ill_frag_count = 0;
3419         ill->ill_ipf_gen = 0;
3420 
3421         rw_init(&ill->ill_mcast_lock, NULL, RW_DEFAULT, NULL);
3422         mutex_init(&ill->ill_mcast_serializer, NULL, MUTEX_DEFAULT, NULL);
3423         ill->ill_global_timer = INFINITY;
3424         ill->ill_mcast_v1_time = ill->ill_mcast_v2_time = 0;
3425         ill->ill_mcast_v1_tset = ill->ill_mcast_v2_tset = 0;
3426         ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
3427         ill->ill_mcast_qi = MCAST_DEF_QUERY_INTERVAL;
3428 
3429         /*
3430          * Initialize IPv6 configuration variables.  The IP module is always
3431          * opened as an IPv4 module.  Instead tracking down the cases where
3432          * it switches to do ipv6, we'll just initialize the IPv6 configuration
3433          * here for convenience, this has no effect until the ill is set to do
3434          * IPv6.
3435          */
3436         ill->ill_reachable_time = ND_REACHABLE_TIME;
3437         ill->ill_xmit_count = ND_MAX_MULTICAST_SOLICIT;
3438         ill->ill_max_buf = ND_MAX_Q;
3439         ill->ill_refcnt = 0;
3440 















































3441         /* Send down the Info Request to the driver. */
3442         info_mp->b_datap->db_type = M_PCPROTO;
3443         dlir = (dl_info_req_t *)info_mp->b_rptr;
3444         info_mp->b_wptr = (uchar_t *)&dlir[1];
3445         dlir->dl_primitive = DL_INFO_REQ;
3446 
3447         ill->ill_dlpi_pending = DL_PRIM_INVAL;
3448 
3449         qprocson(q);
3450         ill_dlpi_send(ill, info_mp);
3451 
3452         return (0);
3453 }
3454 
3455 /*
3456  * ill_dls_info
3457  * creates datalink socket info from the device.
3458  */
3459 int
3460 ill_dls_info(struct sockaddr_dl *sdl, const ill_t *ill)


3668         /*
3669          * Couldn't find it.  Does this happen to be a lookup for the
3670          * loopback device and are we allowed to allocate it?
3671          */
3672         if (!isloopback || !do_alloc)
3673                 return (NULL);
3674 
3675         rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
3676         ill = ill_find_by_name(name, isv6, ipst);
3677         if (ill != NULL) {
3678                 rw_exit(&ipst->ips_ill_g_lock);
3679                 return (ill);
3680         }
3681 
3682         /* Create the loopback device on demand */
3683         ill = (ill_t *)(mi_alloc(sizeof (ill_t) +
3684             sizeof (ipif_loopback_name), BPRI_MED));
3685         if (ill == NULL)
3686                 goto done;
3687 
3688         *ill = ill_null;
3689         mutex_init(&ill->ill_lock, NULL, MUTEX_DEFAULT, NULL);
3690         ill->ill_ipst = ipst;
3691         list_create(&ill->ill_nce, sizeof (nce_t), offsetof(nce_t, nce_node));
3692         netstack_hold(ipst->ips_netstack);
3693         /*
3694          * For exclusive stacks we set the zoneid to zero
3695          * to make IP operate as if in the global zone.
3696          */
3697         ill->ill_zoneid = GLOBAL_ZONEID;
3698 
3699         ill->ill_phyint = (phyint_t *)mi_zalloc(sizeof (phyint_t));
3700         if (ill->ill_phyint == NULL)
3701                 goto done;
3702 
3703         if (isv6)
3704                 ill->ill_phyint->phyint_illv6 = ill;
3705         else
3706                 ill->ill_phyint->phyint_illv4 = ill;
3707         mutex_init(&ill->ill_phyint->phyint_lock, NULL, MUTEX_DEFAULT, 0);
3708         phyint_flags_init(ill->ill_phyint, DL_LOOP);
3709 
3710         if (isv6) {
3711                 ill->ill_isv6 = B_TRUE;
3712                 ill->ill_max_frag = ip_loopback_mtu_v6plus;
3713         } else {
3714                 ill->ill_max_frag = ip_loopback_mtuplus;
3715         }
3716         if (!ill_allocate_mibs(ill))
3717                 goto done;

3718         ill->ill_current_frag = ill->ill_max_frag;
3719         ill->ill_mtu = ill->ill_max_frag; /* Initial value */
3720         ill->ill_mc_mtu = ill->ill_mtu;
3721         /*
3722          * ipif_loopback_name can't be pointed at directly because its used
3723          * by both the ipv4 and ipv6 interfaces.  When the ill is removed
3724          * from the glist, ill_glist_delete() sets the first character of
3725          * ill_name to '\0'.
3726          */
3727         ill->ill_name = (char *)ill + sizeof (*ill);
3728         (void) strcpy(ill->ill_name, ipif_loopback_name);
3729         ill->ill_name_length = sizeof (ipif_loopback_name);
3730         /* Set ill_dlpi_pending for ipsq_current_finish() to work properly */
3731         ill->ill_dlpi_pending = DL_PRIM_INVAL;
3732 
3733         rw_init(&ill->ill_mcast_lock, NULL, RW_DEFAULT, NULL);
3734         mutex_init(&ill->ill_mcast_serializer, NULL, MUTEX_DEFAULT, NULL);
3735         ill->ill_global_timer = INFINITY;
3736         ill->ill_mcast_v1_time = ill->ill_mcast_v2_time = 0;
3737         ill->ill_mcast_v1_tset = ill->ill_mcast_v2_tset = 0;
3738         ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
3739         ill->ill_mcast_qi = MCAST_DEF_QUERY_INTERVAL;
3740 
3741         /* No resolver here. */
3742         ill->ill_net_type = IRE_LOOPBACK;
3743 
3744         /* Initialize the ipsq */
3745         if (!ipsq_init(ill, B_FALSE))
3746                 goto done;
3747 
3748         ipif = ipif_allocate(ill, 0L, IRE_LOOPBACK, B_TRUE, B_TRUE, NULL);
3749         if (ipif == NULL)
3750                 goto done;
3751 
3752         ill->ill_flags = ILLF_MULTICAST;
3753 
3754         ov6addr = ipif->ipif_v6lcl_addr;
3755         /* Set up default loopback address and mask. */
3756         if (!isv6) {
3757                 ipaddr_t inaddr_loopback = htonl(INADDR_LOOPBACK);
3758 
3759                 IN6_IPADDR_TO_V4MAPPED(inaddr_loopback, &ipif->ipif_v6lcl_addr);
3760                 V4MASK_TO_V6(htonl(IN_CLASSA_NET), ipif->ipif_v6net_mask);
3761                 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask,
3762                     ipif->ipif_v6subnet);
3763                 ill->ill_flags |= ILLF_IPV4;
3764         } else {
3765                 ipif->ipif_v6lcl_addr = ipv6_loopback;
3766                 ipif->ipif_v6net_mask = ipv6_all_ones;
3767                 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask,
3768                     ipif->ipif_v6subnet);
3769                 ill->ill_flags |= ILLF_IPV6;
3770         }
3771 
3772         /*
3773          * Chain us in at the end of the ill list. hold the ill
3774          * before we make it globally visible. 1 for the lookup.
3775          */
3776         ill->ill_refcnt = 0;
3777         ill_refhold(ill);
3778 
3779         ill->ill_frag_count = 0;
3780         ill->ill_frag_free_num_pkts = 0;
3781         ill->ill_last_frag_clean_time = 0;
3782 
3783         ipsq = ill->ill_phyint->phyint_ipsq;
3784 
3785         ill_set_inputfn(ill);
3786 
3787         if (ill_glist_insert(ill, "lo", isv6) != 0)
3788                 cmn_err(CE_PANIC, "cannot insert loopback interface");
3789 
3790         /* Let SCTP know so that it can add this to its list */
3791         sctp_update_ill(ill, SCTP_ILL_INSERT);
3792 
3793         /*
3794          * We have already assigned ipif_v6lcl_addr above, but we need to
3795          * call sctp_update_ipif_addr() after SCTP_ILL_INSERT, which
3796          * requires to be after ill_glist_insert() since we need the
3797          * ill_index set. Pass on ipv6_loopback as the old address.
3798          */
3799         sctp_update_ipif_addr(ipif, ov6addr);
3800 
3801         ip_rts_newaddrmsg(RTM_CHGADDR, 0, ipif, RTSQ_DEFAULT);
3802 
3803         /*
3804          * ill_glist_insert() -> ill_phyint_reinit() may have merged IPSQs.
3805          * If so, free our original one.
3806          */




   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 1990 Mentat Inc.
  24  */
  25 /*
  26  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  27  */
  28 
  29 /*
  30  * This file contains the interface control functions for IP.
  31  */
  32 
  33 #include <sys/types.h>
  34 #include <sys/stream.h>
  35 #include <sys/dlpi.h>
  36 #include <sys/stropts.h>
  37 #include <sys/strsun.h>
  38 #include <sys/sysmacros.h>
  39 #include <sys/strsubr.h>
  40 #include <sys/strlog.h>
  41 #include <sys/ddi.h>
  42 #include <sys/sunddi.h>
  43 #include <sys/cmn_err.h>
  44 #include <sys/kstat.h>
  45 #include <sys/debug.h>
  46 #include <sys/zone.h>
  47 #include <sys/sunldi.h>


 208 static void ill_capability_zerocopy_reset_fill(ill_t *, mblk_t *);
 209 static void     ill_capability_dld_reset_fill(ill_t *, mblk_t *);
 210 static void     ill_capability_dld_ack(ill_t *, mblk_t *,
 211                     dl_capability_sub_t *);
 212 static void     ill_capability_dld_enable(ill_t *);
 213 static void     ill_capability_ack_thr(void *);
 214 static void     ill_capability_lso_enable(ill_t *);
 215 
 216 static ill_t    *ill_prev_usesrc(ill_t *);
 217 static int      ill_relink_usesrc_ills(ill_t *, ill_t *, uint_t);
 218 static void     ill_disband_usesrc_group(ill_t *);
 219 static void     ip_sioctl_garp_reply(mblk_t *, ill_t *, void *, int);
 220 
 221 #ifdef DEBUG
 222 static  void    ill_trace_cleanup(const ill_t *);
 223 static  void    ipif_trace_cleanup(const ipif_t *);
 224 #endif
 225 
 226 static  void    ill_dlpi_clear_deferred(ill_t *ill);
 227 
 228 static  void    phyint_flags_init(phyint_t *, t_uscalar_t);
 229 
 230 /*
 231  * if we go over the memory footprint limit more than once in this msec
 232  * interval, we'll start pruning aggressively.
 233  */
 234 int ip_min_frag_prune_time = 0;
 235 
 236 static ipft_t   ip_ioctl_ftbl[] = {
 237         { IP_IOC_IRE_DELETE, ip_ire_delete, sizeof (ipid_t), 0 },
 238         { IP_IOC_IRE_DELETE_NO_REPLY, ip_ire_delete, sizeof (ipid_t),
 239                 IPFT_F_NO_REPLY },
 240         { IP_IOC_RTS_REQUEST, ip_rts_request, 0, IPFT_F_SELF_REPLY },
 241         { 0 }
 242 };
 243 
 244 /* Simple ICMP IP Header Template */
 245 static ipha_t icmp_ipha = {
 246         IP_SIMPLE_HDR_VERSION, 0, 0, 0, 0, 0, IPPROTO_ICMP
 247 };
 248 
 249 static uchar_t  ip_six_byte_all_ones[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };


 268             ip_ib_v4_mapping, ip_ib_v6_mapping, ip_ib_v6intfid,
 269             ip_nodef_v6intfid },
 270         { DL_IPV4, IFT_IPV4, IPPROTO_ENCAP, IPPROTO_IPV6,
 271             ip_mbcast_mapping, ip_mbcast_mapping, ip_ipv4_v6intfid,
 272             ip_ipv4_v6destintfid },
 273         { DL_IPV6, IFT_IPV6, IPPROTO_ENCAP, IPPROTO_IPV6,
 274             ip_mbcast_mapping, ip_mbcast_mapping, ip_ipv6_v6intfid,
 275             ip_ipv6_v6destintfid },
 276         { DL_6TO4, IFT_6TO4, IPPROTO_ENCAP, IPPROTO_IPV6,
 277             ip_mbcast_mapping, ip_mbcast_mapping, ip_ipv4_v6intfid,
 278             ip_nodef_v6intfid },
 279         { SUNW_DL_VNI, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
 280             NULL, NULL, ip_nodef_v6intfid, ip_nodef_v6intfid },
 281         { SUNW_DL_IPMP, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
 282             NULL, NULL, ip_ipmp_v6intfid, ip_nodef_v6intfid },
 283         { DL_OTHER, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
 284             ip_ether_v4_mapping, ip_ether_v6_mapping, ip_nodef_v6intfid,
 285             ip_nodef_v6intfid }
 286 };
 287 

 288 char    ipif_loopback_name[] = "lo0";
 289 
 290 /* These are used by all IP network modules. */
 291 sin6_t  sin6_null;      /* Zero address for quick clears */
 292 sin_t   sin_null;       /* Zero address for quick clears */
 293 
 294 /* When set search for unused ipif_seqid */
 295 static ipif_t   ipif_zero;
 296 
 297 /*
 298  * ppa arena is created after these many
 299  * interfaces have been plumbed.
 300  */
 301 uint_t  ill_no_arena = 12;      /* Setable in /etc/system */
 302 
 303 /*
 304  * Allocate per-interface mibs.
 305  * Returns true if ok. False otherwise.
 306  *  ipsq  may not yet be allocated (loopback case ).
 307  */


3316         ill->ill_phyint->phyint_ipsq = ipsq;
3317         ipx = ipsq->ipsq_xop = &ipsq->ipsq_ownxop;
3318         ipx->ipx_ipsq = ipsq;
3319         ipsq->ipsq_next = ipsq;
3320         ipsq->ipsq_phyint = ill->ill_phyint;
3321         mutex_init(&ipsq->ipsq_lock, NULL, MUTEX_DEFAULT, 0);
3322         mutex_init(&ipx->ipx_lock, NULL, MUTEX_DEFAULT, 0);
3323         ipsq->ipsq_ipst = ill->ill_ipst;  /* No netstack_hold */
3324         if (enter) {
3325                 ipx->ipx_writer = curthread;
3326                 ipx->ipx_forced = B_FALSE;
3327                 ipx->ipx_reentry_cnt = 1;
3328 #ifdef DEBUG
3329                 ipx->ipx_depth = getpcstack(ipx->ipx_stack, IPX_STACK_DEPTH);
3330 #endif
3331         }
3332         return (B_TRUE);
3333 }
3334 
3335 /*
3336  * Here we perform initialisation of the ill_t common to both regular
3337  * interface ILLs and the special loopback ILL created by ill_lookup_on_name.







3338  */
3339 static int
3340 ill_init_common(ill_t *ill, queue_t *q, boolean_t isv6, boolean_t is_loopback,
3341     boolean_t ipsq_enter)
3342 {
3343         int count;


3344         uchar_t *frag_ptr;
3345 





3346         mutex_init(&ill->ill_lock, NULL, MUTEX_DEFAULT, 0);
3347         mutex_init(&ill->ill_saved_ire_lock, NULL, MUTEX_DEFAULT, NULL);
3348         ill->ill_saved_ire_cnt = 0;
3349 
3350         if (is_loopback == B_TRUE) {
3351                 ill->ill_max_frag = isv6 == B_TRUE ? ip_loopback_mtu_v6plus :
3352                     ip_loopback_mtuplus;
3353                 /*
3354                  * No resolver here.
3355                  */
3356                 ill->ill_net_type = IRE_LOOPBACK;
3357         } else {
3358                 ill->ill_rq = q;
3359                 ill->ill_wq = WR(q);
3360                 ill->ill_ppa = UINT_MAX;
3361         }
3362 
3363         ill->ill_isv6 = isv6;



3364 
3365         /*
3366          * Allocate sufficient space to contain our fragment hash table and
3367          * the device name.
3368          */
3369         frag_ptr = (uchar_t *)mi_zalloc(ILL_FRAG_HASH_TBL_SIZE + 2 * LIFNAMSIZ);
3370         if (frag_ptr == NULL)

3371                 return (ENOMEM);

3372         ill->ill_frag_ptr = frag_ptr;
3373         ill->ill_frag_free_num_pkts = 0;
3374         ill->ill_last_frag_clean_time = 0;
3375         ill->ill_frag_hash_tbl = (ipfb_t *)frag_ptr;
3376         ill->ill_name = (char *)(frag_ptr + ILL_FRAG_HASH_TBL_SIZE);
3377         for (count = 0; count < ILL_FRAG_HASH_TBL_COUNT; count++) {
3378                 mutex_init(&ill->ill_frag_hash_tbl[count].ipfb_lock,
3379                     NULL, MUTEX_DEFAULT, NULL);
3380         }
3381 
3382         ill->ill_phyint = (phyint_t *)mi_zalloc(sizeof (phyint_t));
3383         if (ill->ill_phyint == NULL) {

3384                 mi_free(frag_ptr);
3385                 return (ENOMEM);
3386         }
3387 
3388         mutex_init(&ill->ill_phyint->phyint_lock, NULL, MUTEX_DEFAULT, 0);
3389         if (isv6 == B_TRUE) {
3390                 ill->ill_phyint->phyint_illv6 = ill;
3391         } else {





3392                 ill->ill_phyint->phyint_illv4 = ill;
3393         }
3394         if (is_loopback == B_TRUE) {
3395                 phyint_flags_init(ill->ill_phyint, DL_LOOP);
3396         }
3397 
3398         list_create(&ill->ill_nce, sizeof (nce_t), offsetof(nce_t, nce_node));
3399 
3400         ill_set_inputfn(ill);
3401 
3402         if (!ipsq_init(ill, ipsq_enter)) {

3403                 mi_free(frag_ptr);
3404                 mi_free(ill->ill_phyint);
3405                 return (ENOMEM);
3406         }
3407 


3408         /* Frag queue limit stuff */
3409         ill->ill_frag_count = 0;
3410         ill->ill_ipf_gen = 0;
3411 
3412         rw_init(&ill->ill_mcast_lock, NULL, RW_DEFAULT, NULL);
3413         mutex_init(&ill->ill_mcast_serializer, NULL, MUTEX_DEFAULT, NULL);
3414         ill->ill_global_timer = INFINITY;
3415         ill->ill_mcast_v1_time = ill->ill_mcast_v2_time = 0;
3416         ill->ill_mcast_v1_tset = ill->ill_mcast_v2_tset = 0;
3417         ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
3418         ill->ill_mcast_qi = MCAST_DEF_QUERY_INTERVAL;
3419 
3420         /*
3421          * Initialize IPv6 configuration variables.  The IP module is always
3422          * opened as an IPv4 module.  Instead tracking down the cases where
3423          * it switches to do ipv6, we'll just initialize the IPv6 configuration
3424          * here for convenience, this has no effect until the ill is set to do
3425          * IPv6.
3426          */
3427         ill->ill_reachable_time = ND_REACHABLE_TIME;
3428         ill->ill_xmit_count = ND_MAX_MULTICAST_SOLICIT;
3429         ill->ill_max_buf = ND_MAX_Q;
3430         ill->ill_refcnt = 0;
3431 
3432         return (0);
3433 }
3434 
3435 /*
3436  * ill_init is called by ip_open when a device control stream is opened.
3437  * It does a few initializations, and shoots a DL_INFO_REQ message down
3438  * to the driver.  The response is later picked up in ip_rput_dlpi and
3439  * used to set up default mechanisms for talking to the driver.  (Always
3440  * called as writer.)
3441  *
3442  * If this function returns error, ip_open will call ip_close which in
3443  * turn will call ill_delete to clean up any memory allocated here that
3444  * is not yet freed.
3445  *
3446  * Note: ill_ipst and ill_zoneid must be set before calling ill_init.
3447  */
3448 int
3449 ill_init(queue_t *q, ill_t *ill)
3450 {
3451         int ret;
3452         dl_info_req_t   *dlir;
3453         mblk_t  *info_mp;
3454 
3455         info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)),
3456             BPRI_HI);
3457         if (info_mp == NULL)
3458                 return (ENOMEM);
3459 
3460         /*
3461          * The ill is initialized to zero by mi_alloc*(). In addition
3462          * some fields already contain valid values, initialized in
3463          * ip_open(), before we reach here.
3464          *
3465          * For now pretend this is a v4 ill. We need to set phyint_ill*
3466          * at this point because of the following reason. If we can't
3467          * enter the ipsq at some point and cv_wait, the writer that
3468          * wakes us up tries to locate us using the list of all phyints
3469          * in an ipsq and the ills from the phyint thru the phyint_ill*.
3470          * If we don't set it now, we risk a missed wakeup.
3471          */
3472         if ((ret = ill_init_common(ill, q, B_FALSE, B_FALSE, B_TRUE)) != 0) {
3473                 freemsg(info_mp);
3474                 return (ret);
3475         }
3476 
3477         ill->ill_state_flags |= ILL_LL_SUBNET_PENDING;
3478 
3479         /* Send down the Info Request to the driver. */
3480         info_mp->b_datap->db_type = M_PCPROTO;
3481         dlir = (dl_info_req_t *)info_mp->b_rptr;
3482         info_mp->b_wptr = (uchar_t *)&dlir[1];
3483         dlir->dl_primitive = DL_INFO_REQ;
3484 
3485         ill->ill_dlpi_pending = DL_PRIM_INVAL;
3486 
3487         qprocson(q);
3488         ill_dlpi_send(ill, info_mp);
3489 
3490         return (0);
3491 }
3492 
3493 /*
3494  * ill_dls_info
3495  * creates datalink socket info from the device.
3496  */
3497 int
3498 ill_dls_info(struct sockaddr_dl *sdl, const ill_t *ill)


3706         /*
3707          * Couldn't find it.  Does this happen to be a lookup for the
3708          * loopback device and are we allowed to allocate it?
3709          */
3710         if (!isloopback || !do_alloc)
3711                 return (NULL);
3712 
3713         rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
3714         ill = ill_find_by_name(name, isv6, ipst);
3715         if (ill != NULL) {
3716                 rw_exit(&ipst->ips_ill_g_lock);
3717                 return (ill);
3718         }
3719 
3720         /* Create the loopback device on demand */
3721         ill = (ill_t *)(mi_alloc(sizeof (ill_t) +
3722             sizeof (ipif_loopback_name), BPRI_MED));
3723         if (ill == NULL)
3724                 goto done;
3725 
3726         bzero(ill, sizeof (*ill));

3727         ill->ill_ipst = ipst;

3728         netstack_hold(ipst->ips_netstack);
3729         /*
3730          * For exclusive stacks we set the zoneid to zero
3731          * to make IP operate as if in the global zone.
3732          */
3733         ill->ill_zoneid = GLOBAL_ZONEID;
3734 
3735         if (ill_init_common(ill, NULL, isv6, B_TRUE, B_FALSE) != 0)

3736                 goto done;
3737 













3738         if (!ill_allocate_mibs(ill))
3739                 goto done;
3740 
3741         ill->ill_current_frag = ill->ill_max_frag;
3742         ill->ill_mtu = ill->ill_max_frag; /* Initial value */
3743         ill->ill_mc_mtu = ill->ill_mtu;
3744         /*
3745          * ipif_loopback_name can't be pointed at directly because its used
3746          * by both the ipv4 and ipv6 interfaces.  When the ill is removed
3747          * from the glist, ill_glist_delete() sets the first character of
3748          * ill_name to '\0'.
3749          */
3750         ill->ill_name = (char *)ill + sizeof (*ill);
3751         (void) strcpy(ill->ill_name, ipif_loopback_name);
3752         ill->ill_name_length = sizeof (ipif_loopback_name);
3753         /* Set ill_dlpi_pending for ipsq_current_finish() to work properly */
3754         ill->ill_dlpi_pending = DL_PRIM_INVAL;
3755 















3756         ipif = ipif_allocate(ill, 0L, IRE_LOOPBACK, B_TRUE, B_TRUE, NULL);
3757         if (ipif == NULL)
3758                 goto done;
3759 
3760         ill->ill_flags = ILLF_MULTICAST;
3761 
3762         ov6addr = ipif->ipif_v6lcl_addr;
3763         /* Set up default loopback address and mask. */
3764         if (!isv6) {
3765                 ipaddr_t inaddr_loopback = htonl(INADDR_LOOPBACK);
3766 
3767                 IN6_IPADDR_TO_V4MAPPED(inaddr_loopback, &ipif->ipif_v6lcl_addr);
3768                 V4MASK_TO_V6(htonl(IN_CLASSA_NET), ipif->ipif_v6net_mask);
3769                 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask,
3770                     ipif->ipif_v6subnet);
3771                 ill->ill_flags |= ILLF_IPV4;
3772         } else {
3773                 ipif->ipif_v6lcl_addr = ipv6_loopback;
3774                 ipif->ipif_v6net_mask = ipv6_all_ones;
3775                 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask,
3776                     ipif->ipif_v6subnet);
3777                 ill->ill_flags |= ILLF_IPV6;
3778         }
3779 
3780         /*
3781          * Chain us in at the end of the ill list. hold the ill
3782          * before we make it globally visible. 1 for the lookup.
3783          */

3784         ill_refhold(ill);
3785 




3786         ipsq = ill->ill_phyint->phyint_ipsq;
3787 


3788         if (ill_glist_insert(ill, "lo", isv6) != 0)
3789                 cmn_err(CE_PANIC, "cannot insert loopback interface");
3790 
3791         /* Let SCTP know so that it can add this to its list */
3792         sctp_update_ill(ill, SCTP_ILL_INSERT);
3793 
3794         /*
3795          * We have already assigned ipif_v6lcl_addr above, but we need to
3796          * call sctp_update_ipif_addr() after SCTP_ILL_INSERT, which
3797          * requires to be after ill_glist_insert() since we need the
3798          * ill_index set. Pass on ipv6_loopback as the old address.
3799          */
3800         sctp_update_ipif_addr(ipif, ov6addr);
3801 
3802         ip_rts_newaddrmsg(RTM_CHGADDR, 0, ipif, RTSQ_DEFAULT);
3803 
3804         /*
3805          * ill_glist_insert() -> ill_phyint_reinit() may have merged IPSQs.
3806          * If so, free our original one.
3807          */