Print this page
XXX function ordering, typos
XXX minor typos
XXX ill_init_common (OS-2239)

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/inet/ip/ip_if.c
          +++ new/usr/src/uts/common/inet/ip/ip_if.c
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 1990 Mentat Inc.
  24   24   */
       25 +/*
       26 + * Copyright (c) 2013, Joyent, Inc. All rights reserved.
       27 + */
  25   28  
  26   29  /*
  27   30   * This file contains the interface control functions for IP.
  28   31   */
  29   32  
  30   33  #include <sys/types.h>
  31   34  #include <sys/stream.h>
  32   35  #include <sys/dlpi.h>
  33   36  #include <sys/stropts.h>
  34   37  #include <sys/strsun.h>
↓ open down ↓ 180 lines elided ↑ open up ↑
 215  218  static void     ill_disband_usesrc_group(ill_t *);
 216  219  static void     ip_sioctl_garp_reply(mblk_t *, ill_t *, void *, int);
 217  220  
 218  221  #ifdef DEBUG
 219  222  static  void    ill_trace_cleanup(const ill_t *);
 220  223  static  void    ipif_trace_cleanup(const ipif_t *);
 221  224  #endif
 222  225  
 223  226  static  void    ill_dlpi_clear_deferred(ill_t *ill);
 224  227  
      228 +static  void    phyint_flags_init(phyint_t *, t_uscalar_t);
      229 +
 225  230  /*
 226  231   * if we go over the memory footprint limit more than once in this msec
 227  232   * interval, we'll start pruning aggressively.
 228  233   */
 229  234  int ip_min_frag_prune_time = 0;
 230  235  
 231  236  static ipft_t   ip_ioctl_ftbl[] = {
 232  237          { IP_IOC_IRE_DELETE, ip_ire_delete, sizeof (ipid_t), 0 },
 233  238          { IP_IOC_IRE_DELETE_NO_REPLY, ip_ire_delete, sizeof (ipid_t),
 234  239                  IPFT_F_NO_REPLY },
↓ open down ↓ 38 lines elided ↑ open up ↑
 273  278              ip_nodef_v6intfid },
 274  279          { SUNW_DL_VNI, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
 275  280              NULL, NULL, ip_nodef_v6intfid, ip_nodef_v6intfid },
 276  281          { SUNW_DL_IPMP, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
 277  282              NULL, NULL, ip_ipmp_v6intfid, ip_nodef_v6intfid },
 278  283          { DL_OTHER, IFT_OTHER, ETHERTYPE_IP, ETHERTYPE_IPV6,
 279  284              ip_ether_v4_mapping, ip_ether_v6_mapping, ip_nodef_v6intfid,
 280  285              ip_nodef_v6intfid }
 281  286  };
 282  287  
 283      -static ill_t    ill_null;               /* Empty ILL for init. */
 284  288  char    ipif_loopback_name[] = "lo0";
 285  289  
 286  290  /* These are used by all IP network modules. */
 287  291  sin6_t  sin6_null;      /* Zero address for quick clears */
 288  292  sin_t   sin_null;       /* Zero address for quick clears */
 289  293  
 290  294  /* When set search for unused ipif_seqid */
 291  295  static ipif_t   ipif_zero;
 292  296  
 293  297  /*
↓ open down ↓ 3028 lines elided ↑ open up ↑
3322 3326                  ipx->ipx_forced = B_FALSE;
3323 3327                  ipx->ipx_reentry_cnt = 1;
3324 3328  #ifdef DEBUG
3325 3329                  ipx->ipx_depth = getpcstack(ipx->ipx_stack, IPX_STACK_DEPTH);
3326 3330  #endif
3327 3331          }
3328 3332          return (B_TRUE);
3329 3333  }
3330 3334  
3331 3335  /*
3332      - * ill_init is called by ip_open when a device control stream is opened.
3333      - * It does a few initializations, and shoots a DL_INFO_REQ message down
3334      - * to the driver.  The response is later picked up in ip_rput_dlpi and
3335      - * used to set up default mechanisms for talking to the driver.  (Always
3336      - * called as writer.)
3337      - *
3338      - * If this function returns error, ip_open will call ip_close which in
3339      - * turn will call ill_delete to clean up any memory allocated here that
3340      - * is not yet freed.
     3336 + * Here we perform initialisation of the ill_t common to both regular
     3337 + * interface ILLs and the special loopback ILL created by ill_lookup_on_name.
3341 3338   */
3342      -int
3343      -ill_init(queue_t *q, ill_t *ill)
     3339 +static int
     3340 +ill_init_common(ill_t *ill, queue_t *q, boolean_t isv6, boolean_t is_loopback,
     3341 +    boolean_t ipsq_enter)
3344 3342  {
3345      -        int     count;
3346      -        dl_info_req_t   *dlir;
3347      -        mblk_t  *info_mp;
     3343 +        int count;
3348 3344          uchar_t *frag_ptr;
3349 3345  
3350      -        /*
3351      -         * The ill is initialized to zero by mi_alloc*(). In addition
3352      -         * some fields already contain valid values, initialized in
3353      -         * ip_open(), before we reach here.
3354      -         */
3355 3346          mutex_init(&ill->ill_lock, NULL, MUTEX_DEFAULT, 0);
3356 3347          mutex_init(&ill->ill_saved_ire_lock, NULL, MUTEX_DEFAULT, NULL);
3357 3348          ill->ill_saved_ire_cnt = 0;
3358 3349  
3359      -        ill->ill_rq = q;
3360      -        ill->ill_wq = WR(q);
     3350 +        if (is_loopback == B_TRUE) {
     3351 +                ill->ill_max_frag = isv6 == B_TRUE ? ip_loopback_mtu_v6plus :
     3352 +                    ip_loopback_mtuplus;
     3353 +                /*
     3354 +                 * No resolver here.
     3355 +                 */
     3356 +                ill->ill_net_type = IRE_LOOPBACK;
     3357 +        } else {
     3358 +                ill->ill_rq = q;
     3359 +                ill->ill_wq = WR(q);
     3360 +                ill->ill_ppa = UINT_MAX;
     3361 +        }
3361 3362  
3362      -        info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)),
3363      -            BPRI_HI);
3364      -        if (info_mp == NULL)
3365      -                return (ENOMEM);
     3363 +        ill->ill_isv6 = isv6;
3366 3364  
3367 3365          /*
3368 3366           * Allocate sufficient space to contain our fragment hash table and
3369 3367           * the device name.
3370 3368           */
3371 3369          frag_ptr = (uchar_t *)mi_zalloc(ILL_FRAG_HASH_TBL_SIZE + 2 * LIFNAMSIZ);
3372      -        if (frag_ptr == NULL) {
3373      -                freemsg(info_mp);
     3370 +        if (frag_ptr == NULL)
3374 3371                  return (ENOMEM);
3375      -        }
3376 3372          ill->ill_frag_ptr = frag_ptr;
3377 3373          ill->ill_frag_free_num_pkts = 0;
3378 3374          ill->ill_last_frag_clean_time = 0;
3379 3375          ill->ill_frag_hash_tbl = (ipfb_t *)frag_ptr;
3380 3376          ill->ill_name = (char *)(frag_ptr + ILL_FRAG_HASH_TBL_SIZE);
3381 3377          for (count = 0; count < ILL_FRAG_HASH_TBL_COUNT; count++) {
3382 3378                  mutex_init(&ill->ill_frag_hash_tbl[count].ipfb_lock,
3383 3379                      NULL, MUTEX_DEFAULT, NULL);
3384 3380          }
3385 3381  
3386 3382          ill->ill_phyint = (phyint_t *)mi_zalloc(sizeof (phyint_t));
3387 3383          if (ill->ill_phyint == NULL) {
3388      -                freemsg(info_mp);
3389 3384                  mi_free(frag_ptr);
3390 3385                  return (ENOMEM);
3391 3386          }
3392 3387  
3393 3388          mutex_init(&ill->ill_phyint->phyint_lock, NULL, MUTEX_DEFAULT, 0);
3394      -        /*
3395      -         * For now pretend this is a v4 ill. We need to set phyint_ill*
3396      -         * at this point because of the following reason. If we can't
3397      -         * enter the ipsq at some point and cv_wait, the writer that
3398      -         * wakes us up tries to locate us using the list of all phyints
3399      -         * in an ipsq and the ills from the phyint thru the phyint_ill*.
3400      -         * If we don't set it now, we risk a missed wakeup.
3401      -         */
3402      -        ill->ill_phyint->phyint_illv4 = ill;
3403      -        ill->ill_ppa = UINT_MAX;
     3389 +        if (isv6 == B_TRUE) {
     3390 +                ill->ill_phyint->phyint_illv6 = ill;
     3391 +        } else {
     3392 +                ill->ill_phyint->phyint_illv4 = ill;
     3393 +        }
     3394 +        if (is_loopback == B_TRUE) {
     3395 +                phyint_flags_init(ill->ill_phyint, DL_LOOP);
     3396 +        }
     3397 +
3404 3398          list_create(&ill->ill_nce, sizeof (nce_t), offsetof(nce_t, nce_node));
3405 3399  
3406 3400          ill_set_inputfn(ill);
3407 3401  
3408      -        if (!ipsq_init(ill, B_TRUE)) {
3409      -                freemsg(info_mp);
     3402 +        if (!ipsq_init(ill, ipsq_enter)) {
3410 3403                  mi_free(frag_ptr);
3411 3404                  mi_free(ill->ill_phyint);
3412 3405                  return (ENOMEM);
3413 3406          }
3414 3407  
3415      -        ill->ill_state_flags |= ILL_LL_SUBNET_PENDING;
3416      -
3417 3408          /* Frag queue limit stuff */
3418 3409          ill->ill_frag_count = 0;
3419 3410          ill->ill_ipf_gen = 0;
3420 3411  
3421 3412          rw_init(&ill->ill_mcast_lock, NULL, RW_DEFAULT, NULL);
3422 3413          mutex_init(&ill->ill_mcast_serializer, NULL, MUTEX_DEFAULT, NULL);
3423 3414          ill->ill_global_timer = INFINITY;
3424 3415          ill->ill_mcast_v1_time = ill->ill_mcast_v2_time = 0;
3425 3416          ill->ill_mcast_v1_tset = ill->ill_mcast_v2_tset = 0;
3426 3417          ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
↓ open down ↓ 4 lines elided ↑ open up ↑
3431 3422           * opened as an IPv4 module.  Instead tracking down the cases where
3432 3423           * it switches to do ipv6, we'll just initialize the IPv6 configuration
3433 3424           * here for convenience, this has no effect until the ill is set to do
3434 3425           * IPv6.
3435 3426           */
3436 3427          ill->ill_reachable_time = ND_REACHABLE_TIME;
3437 3428          ill->ill_xmit_count = ND_MAX_MULTICAST_SOLICIT;
3438 3429          ill->ill_max_buf = ND_MAX_Q;
3439 3430          ill->ill_refcnt = 0;
3440 3431  
     3432 +        return (0);
     3433 +}
     3434 +
     3435 +/*
     3436 + * ill_init is called by ip_open when a device control stream is opened.
     3437 + * It does a few initializations, and shoots a DL_INFO_REQ message down
     3438 + * to the driver.  The response is later picked up in ip_rput_dlpi and
     3439 + * used to set up default mechanisms for talking to the driver.  (Always
     3440 + * called as writer.)
     3441 + *
     3442 + * If this function returns error, ip_open will call ip_close which in
     3443 + * turn will call ill_delete to clean up any memory allocated here that
     3444 + * is not yet freed.
     3445 + *
     3446 + * Note: ill_ipst and ill_zoneid must be set before calling ill_init.
     3447 + */
     3448 +int
     3449 +ill_init(queue_t *q, ill_t *ill)
     3450 +{
     3451 +        int ret;
     3452 +        dl_info_req_t   *dlir;
     3453 +        mblk_t  *info_mp;
     3454 +
     3455 +        info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)),
     3456 +            BPRI_HI);
     3457 +        if (info_mp == NULL)
     3458 +                return (ENOMEM);
     3459 +
     3460 +        /*
     3461 +         * The ill is initialized to zero by mi_alloc*(). In addition
     3462 +         * some fields already contain valid values, initialized in
     3463 +         * ip_open(), before we reach here.
     3464 +         *
     3465 +         * For now pretend this is a v4 ill. We need to set phyint_ill*
     3466 +         * at this point because of the following reason. If we can't
     3467 +         * enter the ipsq at some point and cv_wait, the writer that
     3468 +         * wakes us up tries to locate us using the list of all phyints
     3469 +         * in an ipsq and the ills from the phyint thru the phyint_ill*.
     3470 +         * If we don't set it now, we risk a missed wakeup.
     3471 +         */
     3472 +        if ((ret = ill_init_common(ill, q, B_FALSE, B_FALSE, B_TRUE)) != 0) {
     3473 +                freemsg(info_mp);
     3474 +                return (ret);
     3475 +        }
     3476 +
     3477 +        ill->ill_state_flags |= ILL_LL_SUBNET_PENDING;
     3478 +
3441 3479          /* Send down the Info Request to the driver. */
3442 3480          info_mp->b_datap->db_type = M_PCPROTO;
3443 3481          dlir = (dl_info_req_t *)info_mp->b_rptr;
3444 3482          info_mp->b_wptr = (uchar_t *)&dlir[1];
3445 3483          dlir->dl_primitive = DL_INFO_REQ;
3446 3484  
3447 3485          ill->ill_dlpi_pending = DL_PRIM_INVAL;
3448 3486  
3449 3487          qprocson(q);
3450 3488          ill_dlpi_send(ill, info_mp);
↓ open down ↓ 227 lines elided ↑ open up ↑
3678 3716                  rw_exit(&ipst->ips_ill_g_lock);
3679 3717                  return (ill);
3680 3718          }
3681 3719  
3682 3720          /* Create the loopback device on demand */
3683 3721          ill = (ill_t *)(mi_alloc(sizeof (ill_t) +
3684 3722              sizeof (ipif_loopback_name), BPRI_MED));
3685 3723          if (ill == NULL)
3686 3724                  goto done;
3687 3725  
3688      -        *ill = ill_null;
3689      -        mutex_init(&ill->ill_lock, NULL, MUTEX_DEFAULT, NULL);
     3726 +        bzero(ill, sizeof (*ill));
3690 3727          ill->ill_ipst = ipst;
3691      -        list_create(&ill->ill_nce, sizeof (nce_t), offsetof(nce_t, nce_node));
3692 3728          netstack_hold(ipst->ips_netstack);
3693 3729          /*
3694 3730           * For exclusive stacks we set the zoneid to zero
3695 3731           * to make IP operate as if in the global zone.
3696 3732           */
3697 3733          ill->ill_zoneid = GLOBAL_ZONEID;
3698 3734  
3699      -        ill->ill_phyint = (phyint_t *)mi_zalloc(sizeof (phyint_t));
3700      -        if (ill->ill_phyint == NULL)
     3735 +        if (ill_init_common(ill, NULL, isv6, B_TRUE, B_FALSE) != 0)
3701 3736                  goto done;
3702 3737  
3703      -        if (isv6)
3704      -                ill->ill_phyint->phyint_illv6 = ill;
3705      -        else
3706      -                ill->ill_phyint->phyint_illv4 = ill;
3707      -        mutex_init(&ill->ill_phyint->phyint_lock, NULL, MUTEX_DEFAULT, 0);
3708      -        phyint_flags_init(ill->ill_phyint, DL_LOOP);
3709      -
3710      -        if (isv6) {
3711      -                ill->ill_isv6 = B_TRUE;
3712      -                ill->ill_max_frag = ip_loopback_mtu_v6plus;
3713      -        } else {
3714      -                ill->ill_max_frag = ip_loopback_mtuplus;
3715      -        }
3716 3738          if (!ill_allocate_mibs(ill))
3717 3739                  goto done;
     3740 +
3718 3741          ill->ill_current_frag = ill->ill_max_frag;
3719 3742          ill->ill_mtu = ill->ill_max_frag;       /* Initial value */
3720 3743          ill->ill_mc_mtu = ill->ill_mtu;
3721 3744          /*
3722 3745           * ipif_loopback_name can't be pointed at directly because its used
3723 3746           * by both the ipv4 and ipv6 interfaces.  When the ill is removed
3724 3747           * from the glist, ill_glist_delete() sets the first character of
3725 3748           * ill_name to '\0'.
3726 3749           */
3727 3750          ill->ill_name = (char *)ill + sizeof (*ill);
3728 3751          (void) strcpy(ill->ill_name, ipif_loopback_name);
3729 3752          ill->ill_name_length = sizeof (ipif_loopback_name);
3730 3753          /* Set ill_dlpi_pending for ipsq_current_finish() to work properly */
3731 3754          ill->ill_dlpi_pending = DL_PRIM_INVAL;
3732 3755  
3733      -        rw_init(&ill->ill_mcast_lock, NULL, RW_DEFAULT, NULL);
3734      -        mutex_init(&ill->ill_mcast_serializer, NULL, MUTEX_DEFAULT, NULL);
3735      -        ill->ill_global_timer = INFINITY;
3736      -        ill->ill_mcast_v1_time = ill->ill_mcast_v2_time = 0;
3737      -        ill->ill_mcast_v1_tset = ill->ill_mcast_v2_tset = 0;
3738      -        ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
3739      -        ill->ill_mcast_qi = MCAST_DEF_QUERY_INTERVAL;
3740      -
3741      -        /* No resolver here. */
3742      -        ill->ill_net_type = IRE_LOOPBACK;
3743      -
3744      -        /* Initialize the ipsq */
3745      -        if (!ipsq_init(ill, B_FALSE))
3746      -                goto done;
3747      -
3748 3756          ipif = ipif_allocate(ill, 0L, IRE_LOOPBACK, B_TRUE, B_TRUE, NULL);
3749 3757          if (ipif == NULL)
3750 3758                  goto done;
3751 3759  
3752 3760          ill->ill_flags = ILLF_MULTICAST;
3753 3761  
3754 3762          ov6addr = ipif->ipif_v6lcl_addr;
3755 3763          /* Set up default loopback address and mask. */
3756 3764          if (!isv6) {
3757 3765                  ipaddr_t inaddr_loopback = htonl(INADDR_LOOPBACK);
↓ open down ↓ 8 lines elided ↑ open up ↑
3766 3774                  ipif->ipif_v6net_mask = ipv6_all_ones;
3767 3775                  V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask,
3768 3776                      ipif->ipif_v6subnet);
3769 3777                  ill->ill_flags |= ILLF_IPV6;
3770 3778          }
3771 3779  
3772 3780          /*
3773 3781           * Chain us in at the end of the ill list. hold the ill
3774 3782           * before we make it globally visible. 1 for the lookup.
3775 3783           */
3776      -        ill->ill_refcnt = 0;
3777 3784          ill_refhold(ill);
3778 3785  
3779      -        ill->ill_frag_count = 0;
3780      -        ill->ill_frag_free_num_pkts = 0;
3781      -        ill->ill_last_frag_clean_time = 0;
3782      -
3783 3786          ipsq = ill->ill_phyint->phyint_ipsq;
3784 3787  
3785      -        ill_set_inputfn(ill);
3786      -
3787 3788          if (ill_glist_insert(ill, "lo", isv6) != 0)
3788 3789                  cmn_err(CE_PANIC, "cannot insert loopback interface");
3789 3790  
3790 3791          /* Let SCTP know so that it can add this to its list */
3791 3792          sctp_update_ill(ill, SCTP_ILL_INSERT);
3792 3793  
3793 3794          /*
3794 3795           * We have already assigned ipif_v6lcl_addr above, but we need to
3795 3796           * call sctp_update_ipif_addr() after SCTP_ILL_INSERT, which
3796 3797           * requires to be after ill_glist_insert() since we need the
↓ open down ↓ 15366 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX