1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 1990 Mentat Inc.
  24  */
  25 
  26 /*
  27  * This file contains the interface control functions for IPv6.
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/sysmacros.h>
  32 #include <sys/stream.h>
  33 #include <sys/dlpi.h>
  34 #include <sys/stropts.h>
  35 #include <sys/ddi.h>
  36 #include <sys/cmn_err.h>
  37 #include <sys/kstat.h>
  38 #include <sys/debug.h>
  39 #include <sys/zone.h>
  40 #include <sys/policy.h>
  41 
  42 #include <sys/systm.h>
  43 #include <sys/param.h>
  44 #include <sys/socket.h>
  45 #include <sys/isa_defs.h>
  46 #include <net/if.h>
  47 #include <net/if_dl.h>
  48 #include <net/route.h>
  49 #include <netinet/in.h>
  50 #include <netinet/igmp_var.h>
  51 #include <netinet/ip6.h>
  52 #include <netinet/icmp6.h>
  53 
  54 #include <inet/common.h>
  55 #include <inet/nd.h>
  56 #include <inet/tunables.h>
  57 #include <inet/mib2.h>
  58 #include <inet/ip.h>
  59 #include <inet/ip6.h>
  60 #include <inet/ip_multi.h>
  61 #include <inet/ip_ire.h>
  62 #include <inet/ip_rts.h>
  63 #include <inet/ip_ndp.h>
  64 #include <inet/ip_if.h>
  65 #include <inet/ip6_asp.h>
  66 #include <inet/ipclassifier.h>
  67 #include <inet/sctp_ip.h>
  68 
  69 #include <sys/tsol/tndb.h>
  70 #include <sys/tsol/tnet.h>
  71 
  72 static in6_addr_t       ipv6_ll_template =
  73                         {(uint32_t)V6_LINKLOCAL, 0x0, 0x0, 0x0};
  74 
  75 static ipif_t *
  76 ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst,
  77     ip_stack_t *ipst);
  78 
  79 static int      ipif_add_ires_v6(ipif_t *, boolean_t);
  80 
  81 /*
  82  * This function is called when an application does not specify an interface
  83  * to be used for multicast traffic.  It calls ire_lookup_multi_v6() to look
  84  * for an interface route for the specified multicast group.  Doing
  85  * this allows the administrator to add prefix routes for multicast to
  86  * indicate which interface to be used for multicast traffic in the above
  87  * scenario.  The route could be for all multicast (ff00::/8), for a single
  88  * multicast group (a /128 route) or anything in between.  If there is no
  89  * such multicast route, we just find any multicast capable interface and
  90  * return it.
  91  *
  92  * We support MULTIRT and RTF_SETSRC on the multicast routes added to the
  93  * unicast table. This is used by CGTP.
  94  */
  95 ill_t *
  96 ill_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst,
  97     boolean_t *multirtp, in6_addr_t *setsrcp)
  98 {
  99         ill_t   *ill;
 100 
 101         ill = ire_lookup_multi_ill_v6(group, zoneid, ipst, multirtp, setsrcp);
 102         if (ill != NULL)
 103                 return (ill);
 104 
 105         return (ill_lookup_multicast(ipst, zoneid, B_TRUE));
 106 }
 107 
 108 /*
 109  * Look for an ipif with the specified interface address and destination.
 110  * The destination address is used only for matching point-to-point interfaces.
 111  */
 112 static ipif_t *
 113 ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst,
 114     ip_stack_t *ipst)
 115 {
 116         ipif_t  *ipif;
 117         ill_t   *ill;
 118         ill_walk_context_t ctx;
 119 
 120         /*
 121          * First match all the point-to-point interfaces
 122          * before looking at non-point-to-point interfaces.
 123          * This is done to avoid returning non-point-to-point
 124          * ipif instead of unnumbered point-to-point ipif.
 125          */
 126         rw_enter(&ipst->ips_ill_g_lock, RW_READER);
 127         ill = ILL_START_WALK_V6(&ctx, ipst);
 128         for (; ill != NULL; ill = ill_next(&ctx, ill)) {
 129                 mutex_enter(&ill->ill_lock);
 130                 for (ipif = ill->ill_ipif; ipif != NULL;
 131                     ipif = ipif->ipif_next) {
 132                         /* Allow the ipif to be down */
 133                         if ((ipif->ipif_flags & IPIF_POINTOPOINT) &&
 134                             (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
 135                             if_addr)) &&
 136                             (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr,
 137                             dst))) {
 138                                 if (!IPIF_IS_CONDEMNED(ipif)) {
 139                                         ipif_refhold_locked(ipif);
 140                                         mutex_exit(&ill->ill_lock);
 141                                         rw_exit(&ipst->ips_ill_g_lock);
 142                                         return (ipif);
 143                                 }
 144                         }
 145                 }
 146                 mutex_exit(&ill->ill_lock);
 147         }
 148         rw_exit(&ipst->ips_ill_g_lock);
 149         /* lookup the ipif based on interface address */
 150         ipif = ipif_lookup_addr_v6(if_addr, NULL, ALL_ZONES, ipst);
 151         ASSERT(ipif == NULL || ipif->ipif_isv6);
 152         return (ipif);
 153 }
 154 
 155 /*
 156  * Common function for ipif_lookup_addr_v6() and ipif_lookup_addr_exact_v6().
 157  */
 158 static ipif_t *
 159 ipif_lookup_addr_common_v6(const in6_addr_t *addr, ill_t *match_ill,
 160     uint32_t match_flags, zoneid_t zoneid, ip_stack_t *ipst)
 161 {
 162         ipif_t  *ipif;
 163         ill_t   *ill;
 164         boolean_t  ptp = B_FALSE;
 165         ill_walk_context_t ctx;
 166         boolean_t match_illgrp = (match_flags & IPIF_MATCH_ILLGRP);
 167         boolean_t no_duplicate = (match_flags & IPIF_MATCH_NONDUP);
 168 
 169         rw_enter(&ipst->ips_ill_g_lock, RW_READER);
 170         /*
 171          * Repeat twice, first based on local addresses and
 172          * next time for pointopoint.
 173          */
 174 repeat:
 175         ill = ILL_START_WALK_V6(&ctx, ipst);
 176         for (; ill != NULL; ill = ill_next(&ctx, ill)) {
 177                 if (match_ill != NULL && ill != match_ill &&
 178                     (!match_illgrp || !IS_IN_SAME_ILLGRP(ill, match_ill))) {
 179                         continue;
 180                 }
 181                 mutex_enter(&ill->ill_lock);
 182                 for (ipif = ill->ill_ipif; ipif != NULL;
 183                     ipif = ipif->ipif_next) {
 184                         if (zoneid != ALL_ZONES &&
 185                             ipif->ipif_zoneid != zoneid &&
 186                             ipif->ipif_zoneid != ALL_ZONES)
 187                                 continue;
 188 
 189                         if (no_duplicate &&
 190                             !(ipif->ipif_flags & IPIF_UP)) {
 191                                 continue;
 192                         }
 193 
 194                         /* Allow the ipif to be down */
 195                         if ((!ptp && (IN6_ARE_ADDR_EQUAL(
 196                             &ipif->ipif_v6lcl_addr, addr) &&
 197                             (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) ||
 198                             (ptp && (ipif->ipif_flags & IPIF_POINTOPOINT) &&
 199                             IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr,
 200                             addr))) {
 201                                 if (!IPIF_IS_CONDEMNED(ipif)) {
 202                                         ipif_refhold_locked(ipif);
 203                                         mutex_exit(&ill->ill_lock);
 204                                         rw_exit(&ipst->ips_ill_g_lock);
 205                                         return (ipif);
 206                                 }
 207                         }
 208                 }
 209                 mutex_exit(&ill->ill_lock);
 210         }
 211 
 212         /* If we already did the ptp case, then we are done */
 213         if (ptp) {
 214                 rw_exit(&ipst->ips_ill_g_lock);
 215                 return (NULL);
 216         }
 217         ptp = B_TRUE;
 218         goto repeat;
 219 }
 220 
 221 /*
 222  * Lookup an ipif with the specified address.  For point-to-point links we
 223  * look for matches on either the destination address or the local address,
 224  * but we skip the local address check if IPIF_UNNUMBERED is set.  If the
 225  * `match_ill' argument is non-NULL, the lookup is restricted to that ill
 226  * (or illgrp if `match_ill' is in an IPMP group).
 227  */
 228 ipif_t *
 229 ipif_lookup_addr_v6(const in6_addr_t *addr, ill_t *match_ill, zoneid_t zoneid,
 230     ip_stack_t *ipst)
 231 {
 232         return (ipif_lookup_addr_common_v6(addr, match_ill, IPIF_MATCH_ILLGRP,
 233             zoneid, ipst));
 234 }
 235 
 236 /*
 237  * Lookup an ipif with the specified address. Similar to ipif_lookup_addr,
 238  * except that we will only return an address if it is not marked as
 239  * IPIF_DUPLICATE
 240  */
 241 ipif_t *
 242 ipif_lookup_addr_nondup_v6(const in6_addr_t *addr, ill_t *match_ill,
 243     zoneid_t zoneid, ip_stack_t *ipst)
 244 {
 245         return (ipif_lookup_addr_common_v6(addr, match_ill,
 246             (IPIF_MATCH_ILLGRP | IPIF_MATCH_NONDUP), zoneid,
 247             ipst));
 248 }
 249 
 250 /*
 251  * Special abbreviated version of ipif_lookup_addr_v6() that doesn't match
 252  * `match_ill' across the IPMP group.  This function is only needed in some
 253  * corner-cases; almost everything should use ipif_lookup_addr_v6().
 254  */
 255 ipif_t *
 256 ipif_lookup_addr_exact_v6(const in6_addr_t *addr, ill_t *match_ill,
 257     ip_stack_t *ipst)
 258 {
 259         ASSERT(match_ill != NULL);
 260         return (ipif_lookup_addr_common_v6(addr, match_ill, 0, ALL_ZONES,
 261             ipst));
 262 }
 263 
 264 /*
 265  * Look for an ipif with the specified address. For point-point links
 266  * we look for matches on either the destination address and the local
 267  * address, but we ignore the check on the local address if IPIF_UNNUMBERED
 268  * is set.
 269  * If the `match_ill' argument is non-NULL, the lookup is restricted to that
 270  * ill (or illgrp if `match_ill' is in an IPMP group).
 271  * Return the zoneid for the ipif. ALL_ZONES if none found.
 272  */
 273 zoneid_t
 274 ipif_lookup_addr_zoneid_v6(const in6_addr_t *addr, ill_t *match_ill,
 275     ip_stack_t *ipst)
 276 {
 277         ipif_t  *ipif;
 278         ill_t   *ill;
 279         boolean_t  ptp = B_FALSE;
 280         ill_walk_context_t ctx;
 281         zoneid_t        zoneid;
 282 
 283         rw_enter(&ipst->ips_ill_g_lock, RW_READER);
 284         /*
 285          * Repeat twice, first based on local addresses and
 286          * next time for pointopoint.
 287          */
 288 repeat:
 289         ill = ILL_START_WALK_V6(&ctx, ipst);
 290         for (; ill != NULL; ill = ill_next(&ctx, ill)) {
 291                 if (match_ill != NULL && ill != match_ill &&
 292                     !IS_IN_SAME_ILLGRP(ill, match_ill)) {
 293                         continue;
 294                 }
 295                 mutex_enter(&ill->ill_lock);
 296                 for (ipif = ill->ill_ipif; ipif != NULL;
 297                     ipif = ipif->ipif_next) {
 298                         /* Allow the ipif to be down */
 299                         if ((!ptp && (IN6_ARE_ADDR_EQUAL(
 300                             &ipif->ipif_v6lcl_addr, addr) &&
 301                             (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) ||
 302                             (ptp && (ipif->ipif_flags & IPIF_POINTOPOINT) &&
 303                             IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr,
 304                             addr)) &&
 305                             !(ipif->ipif_state_flags & IPIF_CONDEMNED)) {
 306                                 zoneid = ipif->ipif_zoneid;
 307                                 mutex_exit(&ill->ill_lock);
 308                                 rw_exit(&ipst->ips_ill_g_lock);
 309                                 /*
 310                                  * If ipif_zoneid was ALL_ZONES then we have
 311                                  * a trusted extensions shared IP address.
 312                                  * In that case GLOBAL_ZONEID works to send.
 313                                  */
 314                                 if (zoneid == ALL_ZONES)
 315                                         zoneid = GLOBAL_ZONEID;
 316                                 return (zoneid);
 317                         }
 318                 }
 319                 mutex_exit(&ill->ill_lock);
 320         }
 321 
 322         /* If we already did the ptp case, then we are done */
 323         if (ptp) {
 324                 rw_exit(&ipst->ips_ill_g_lock);
 325                 return (ALL_ZONES);
 326         }
 327         ptp = B_TRUE;
 328         goto repeat;
 329 }
 330 
 331 /*
 332  * Perform various checks to verify that an address would make sense as a local
 333  * interface address.  This is currently only called when an attempt is made
 334  * to set a local address.
 335  *
 336  * Does not allow a v4-mapped address, an address that equals the subnet
 337  * anycast address, ... a multicast address, ...
 338  */
 339 boolean_t
 340 ip_local_addr_ok_v6(const in6_addr_t *addr, const in6_addr_t *subnet_mask)
 341 {
 342         in6_addr_t subnet;
 343 
 344         if (IN6_IS_ADDR_UNSPECIFIED(addr))
 345                 return (B_TRUE);        /* Allow all zeros */
 346 
 347         /*
 348          * Don't allow all zeroes or host part, but allow
 349          * all ones netmask.
 350          */
 351         V6_MASK_COPY(*addr, *subnet_mask, subnet);
 352         if (IN6_IS_ADDR_V4MAPPED(addr) ||
 353             (IN6_ARE_ADDR_EQUAL(addr, &subnet) &&
 354             !IN6_ARE_ADDR_EQUAL(subnet_mask, &ipv6_all_ones)) ||
 355             (IN6_IS_ADDR_V4COMPAT(addr) && CLASSD(V4_PART_OF_V6((*addr)))) ||
 356             IN6_IS_ADDR_MULTICAST(addr))
 357                 return (B_FALSE);
 358 
 359         return (B_TRUE);
 360 }
 361 
 362 /*
 363  * Perform various checks to verify that an address would make sense as a
 364  * remote/subnet interface address.
 365  */
 366 boolean_t
 367 ip_remote_addr_ok_v6(const in6_addr_t *addr, const in6_addr_t *subnet_mask)
 368 {
 369         in6_addr_t subnet;
 370 
 371         if (IN6_IS_ADDR_UNSPECIFIED(addr))
 372                 return (B_TRUE);        /* Allow all zeros */
 373 
 374         V6_MASK_COPY(*addr, *subnet_mask, subnet);
 375         if (IN6_IS_ADDR_V4MAPPED(addr) ||
 376             (IN6_ARE_ADDR_EQUAL(addr, &subnet) &&
 377             !IN6_ARE_ADDR_EQUAL(subnet_mask, &ipv6_all_ones)) ||
 378             IN6_IS_ADDR_MULTICAST(addr) ||
 379             (IN6_IS_ADDR_V4COMPAT(addr) && CLASSD(V4_PART_OF_V6((*addr)))))
 380                 return (B_FALSE);
 381 
 382         return (B_TRUE);
 383 }
 384 
 385 /*
 386  * ip_rt_add_v6 is called to add an IPv6 route to the forwarding table.
 387  * ill is passed in to associate it with the correct interface
 388  * (for link-local destinations and gateways).
 389  * If ire_arg is set, then we return the held IRE in that location.
 390  */
 391 /* ARGSUSED1 */
 392 int
 393 ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask,
 394     const in6_addr_t *gw_addr, const in6_addr_t *src_addr, int flags,
 395     ill_t *ill, ire_t **ire_arg, struct rtsa_s *sp, ip_stack_t *ipst,
 396     zoneid_t zoneid)
 397 {
 398         ire_t   *ire, *nire;
 399         ire_t   *gw_ire = NULL;
 400         ipif_t  *ipif;
 401         uint_t  type;
 402         int     match_flags = MATCH_IRE_TYPE;
 403         tsol_gc_t *gc = NULL;
 404         tsol_gcgrp_t *gcgrp = NULL;
 405         boolean_t gcgrp_xtraref = B_FALSE;
 406         boolean_t unbound = B_FALSE;
 407 
 408         if (ire_arg != NULL)
 409                 *ire_arg = NULL;
 410 
 411         /*
 412          * Prevent routes with a zero gateway from being created (since
 413          * interfaces can currently be plumbed and brought up with no assigned
 414          * address).
 415          */
 416         if (IN6_IS_ADDR_UNSPECIFIED(gw_addr))
 417                 return (ENETUNREACH);
 418 
 419         /*
 420          * If this is the case of RTF_HOST being set, then we set the netmask
 421          * to all ones (regardless if one was supplied).
 422          */
 423         if (flags & RTF_HOST)
 424                 mask = &ipv6_all_ones;
 425 
 426         /*
 427          * Get the ipif, if any, corresponding to the gw_addr
 428          * If -ifp was specified we restrict ourselves to the ill, otherwise
 429          * we match on the gatway and destination to handle unnumbered pt-pt
 430          * interfaces.
 431          */
 432         if (ill != NULL)
 433                 ipif = ipif_lookup_addr_v6(gw_addr, ill, ALL_ZONES, ipst);
 434         else
 435                 ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, ipst);
 436         if (ipif != NULL) {
 437                 if (IS_VNI(ipif->ipif_ill)) {
 438                         ipif_refrele(ipif);
 439                         return (EINVAL);
 440                 }
 441         }
 442 
 443         /*
 444          * GateD will attempt to create routes with a loopback interface
 445          * address as the gateway and with RTF_GATEWAY set.  We allow
 446          * these routes to be added, but create them as interface routes
 447          * since the gateway is an interface address.
 448          */
 449         if ((ipif != NULL) && (ipif->ipif_ire_type == IRE_LOOPBACK)) {
 450                 flags &= ~RTF_GATEWAY;
 451                 if (IN6_ARE_ADDR_EQUAL(gw_addr, &ipv6_loopback) &&
 452                     IN6_ARE_ADDR_EQUAL(dst_addr, &ipv6_loopback) &&
 453                     IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) {
 454                         ire = ire_ftable_lookup_v6(dst_addr, 0, 0, IRE_LOOPBACK,
 455                             NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE, 0, ipst,
 456                             NULL);
 457                         if (ire != NULL) {
 458                                 ire_refrele(ire);
 459                                 ipif_refrele(ipif);
 460                                 return (EEXIST);
 461                         }
 462                         ip1dbg(("ip_rt_add_v6: 0x%p creating IRE 0x%x"
 463                             "for 0x%x\n", (void *)ipif,
 464                             ipif->ipif_ire_type,
 465                             ntohl(ipif->ipif_lcl_addr)));
 466                         ire = ire_create_v6(
 467                             dst_addr,
 468                             mask,
 469                             NULL,
 470                             ipif->ipif_ire_type,     /* LOOPBACK */
 471                             ipif->ipif_ill,
 472                             zoneid,
 473                             (ipif->ipif_flags & IPIF_PRIVATE) ? RTF_PRIVATE : 0,
 474                             NULL,
 475                             ipst);
 476 
 477                         if (ire == NULL) {
 478                                 ipif_refrele(ipif);
 479                                 return (ENOMEM);
 480                         }
 481                         /* src address assigned by the caller? */
 482                         if ((flags & RTF_SETSRC) &&
 483                             !IN6_IS_ADDR_UNSPECIFIED(src_addr))
 484                                 ire->ire_setsrc_addr_v6 = *src_addr;
 485 
 486                         nire = ire_add(ire);
 487                         if (nire == NULL) {
 488                                 /*
 489                                  * In the result of failure, ire_add() will have
 490                                  * already deleted the ire in question, so there
 491                                  * is no need to do that here.
 492                                  */
 493                                 ipif_refrele(ipif);
 494                                 return (ENOMEM);
 495                         }
 496                         /*
 497                          * Check if it was a duplicate entry. This handles
 498                          * the case of two racing route adds for the same route
 499                          */
 500                         if (nire != ire) {
 501                                 ASSERT(nire->ire_identical_ref > 1);
 502                                 ire_delete(nire);
 503                                 ire_refrele(nire);
 504                                 ipif_refrele(ipif);
 505                                 return (EEXIST);
 506                         }
 507                         ire = nire;
 508                         goto save_ire;
 509                 }
 510         }
 511 
 512         /*
 513          * The routes for multicast with CGTP are quite special in that
 514          * the gateway is the local interface address, yet RTF_GATEWAY
 515          * is set. We turn off RTF_GATEWAY to provide compatibility with
 516          * this undocumented and unusual use of multicast routes.
 517          */
 518         if ((flags & RTF_MULTIRT) && ipif != NULL)
 519                 flags &= ~RTF_GATEWAY;
 520 
 521         /*
 522          * Traditionally, interface routes are ones where RTF_GATEWAY isn't set
 523          * and the gateway address provided is one of the system's interface
 524          * addresses.  By using the routing socket interface and supplying an
 525          * RTA_IFP sockaddr with an interface index, an alternate method of
 526          * specifying an interface route to be created is available which uses
 527          * the interface index that specifies the outgoing interface rather than
 528          * the address of an outgoing interface (which may not be able to
 529          * uniquely identify an interface).  When coupled with the RTF_GATEWAY
 530          * flag, routes can be specified which not only specify the next-hop to
 531          * be used when routing to a certain prefix, but also which outgoing
 532          * interface should be used.
 533          *
 534          * Previously, interfaces would have unique addresses assigned to them
 535          * and so the address assigned to a particular interface could be used
 536          * to identify a particular interface.  One exception to this was the
 537          * case of an unnumbered interface (where IPIF_UNNUMBERED was set).
 538          *
 539          * With the advent of IPv6 and its link-local addresses, this
 540          * restriction was relaxed and interfaces could share addresses between
 541          * themselves.  In fact, typically all of the link-local interfaces on
 542          * an IPv6 node or router will have the same link-local address.  In
 543          * order to differentiate between these interfaces, the use of an
 544          * interface index is necessary and this index can be carried inside a
 545          * RTA_IFP sockaddr (which is actually a sockaddr_dl).  One restriction
 546          * of using the interface index, however, is that all of the ipif's that
 547          * are part of an ill have the same index and so the RTA_IFP sockaddr
 548          * cannot be used to differentiate between ipif's (or logical
 549          * interfaces) that belong to the same ill (physical interface).
 550          *
 551          * For example, in the following case involving IPv4 interfaces and
 552          * logical interfaces
 553          *
 554          *      192.0.2.32      255.255.255.224 192.0.2.33      U       if0
 555          *      192.0.2.32      255.255.255.224 192.0.2.34      U       if0
 556          *      192.0.2.32      255.255.255.224 192.0.2.35      U       if0
 557          *
 558          * the ipif's corresponding to each of these interface routes can be
 559          * uniquely identified by the "gateway" (actually interface address).
 560          *
 561          * In this case involving multiple IPv6 default routes to a particular
 562          * link-local gateway, the use of RTA_IFP is necessary to specify which
 563          * default route is of interest:
 564          *
 565          *      default         fe80::123:4567:89ab:cdef        U       if0
 566          *      default         fe80::123:4567:89ab:cdef        U       if1
 567          */
 568 
 569         /* RTF_GATEWAY not set */
 570         if (!(flags & RTF_GATEWAY)) {
 571                 if (sp != NULL) {
 572                         ip2dbg(("ip_rt_add_v6: gateway security attributes "
 573                             "cannot be set with interface route\n"));
 574                         if (ipif != NULL)
 575                                 ipif_refrele(ipif);
 576                         return (EINVAL);
 577                 }
 578 
 579                 /*
 580                  * Whether or not ill (RTA_IFP) is set, we require that
 581                  * the gateway is one of our local addresses.
 582                  */
 583                 if (ipif == NULL)
 584                         return (ENETUNREACH);
 585 
 586                 /*
 587                  * We use MATCH_IRE_ILL here. If the caller specified an
 588                  * interface (from the RTA_IFP sockaddr) we use it, otherwise
 589                  * we use the ill derived from the gateway address.
 590                  * We can always match the gateway address since we record it
 591                  * in ire_gateway_addr.
 592                  * We don't allow RTA_IFP to specify a different ill than the
 593                  * one matching the ipif to make sure we can delete the route.
 594                  */
 595                 match_flags |= MATCH_IRE_GW | MATCH_IRE_ILL;
 596                 if (ill == NULL) {
 597                         ill = ipif->ipif_ill;
 598                 } else if (ill != ipif->ipif_ill) {
 599                         ipif_refrele(ipif);
 600                         return (EINVAL);
 601                 }
 602 
 603                 /*
 604                  * We check for an existing entry at this point.
 605                  */
 606                 match_flags |= MATCH_IRE_MASK;
 607                 ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr,
 608                     IRE_INTERFACE, ill, ALL_ZONES, NULL, match_flags, 0, ipst,
 609                     NULL);
 610                 if (ire != NULL) {
 611                         ire_refrele(ire);
 612                         ipif_refrele(ipif);
 613                         return (EEXIST);
 614                 }
 615 
 616                 /*
 617                  * Some software (for example, GateD and Sun Cluster) attempts
 618                  * to create (what amount to) IRE_PREFIX routes with the
 619                  * loopback address as the gateway.  This is primarily done to
 620                  * set up prefixes with the RTF_REJECT flag set (for example,
 621                  * when generating aggregate routes). We also OR in the
 622                  * RTF_BLACKHOLE flag as these interface routes, by
 623                  * definition, can only be that.
 624                  *
 625                  * If the IRE type (as defined by ill->ill_net_type) would be
 626                  * IRE_LOOPBACK, then we map the request into a
 627                  * IRE_IF_NORESOLVER.
 628                  *
 629                  * Needless to say, the real IRE_LOOPBACK is NOT created by this
 630                  * routine, but rather using ire_create_v6() directly.
 631                  */
 632                 type = ill->ill_net_type;
 633                 if (type == IRE_LOOPBACK) {
 634                         type = IRE_IF_NORESOLVER;
 635                         flags |= RTF_BLACKHOLE;
 636                 }
 637 
 638                 /*
 639                  * Create a copy of the IRE_IF_NORESOLVER or
 640                  * IRE_IF_RESOLVER with the modified address, netmask, and
 641                  * gateway.
 642                  */
 643                 ire = ire_create_v6(
 644                     dst_addr,
 645                     mask,
 646                     gw_addr,
 647                     type,
 648                     ill,
 649                     zoneid,
 650                     flags,
 651                     NULL,
 652                     ipst);
 653                 if (ire == NULL) {
 654                         ipif_refrele(ipif);
 655                         return (ENOMEM);
 656                 }
 657 
 658                 /* src address assigned by the caller? */
 659                 if ((flags & RTF_SETSRC) && !IN6_IS_ADDR_UNSPECIFIED(src_addr))
 660                         ire->ire_setsrc_addr_v6 = *src_addr;
 661 
 662                 nire = ire_add(ire);
 663                 if (nire == NULL) {
 664                         /*
 665                          * In the result of failure, ire_add() will have
 666                          * already deleted the ire in question, so there
 667                          * is no need to do that here.
 668                          */
 669                         ipif_refrele(ipif);
 670                         return (ENOMEM);
 671                 }
 672                 /*
 673                  * Check if it was a duplicate entry. This handles
 674                  * the case of two racing route adds for the same route
 675                  */
 676                 if (nire != ire) {
 677                         ASSERT(nire->ire_identical_ref > 1);
 678                         ire_delete(nire);
 679                         ire_refrele(nire);
 680                         ipif_refrele(ipif);
 681                         return (EEXIST);
 682                 }
 683                 ire = nire;
 684                 goto save_ire;
 685         }
 686 
 687         /*
 688          * Get an interface IRE for the specified gateway.
 689          * If we don't have an IRE_IF_NORESOLVER or IRE_IF_RESOLVER for the
 690          * gateway, it is currently unreachable and we fail the request
 691          * accordingly. We reject any RTF_GATEWAY routes where the gateway
 692          * is an IRE_LOCAL or IRE_LOOPBACK.
 693          * If RTA_IFP was specified we look on that particular ill.
 694          */
 695         if (ill != NULL)
 696                 match_flags |= MATCH_IRE_ILL;
 697 
 698         /* Check whether the gateway is reachable. */
 699 again:
 700         type = IRE_INTERFACE | IRE_LOCAL | IRE_LOOPBACK;
 701         if (flags & RTF_INDIRECT)
 702                 type |= IRE_OFFLINK;
 703 
 704         gw_ire = ire_ftable_lookup_v6(gw_addr, 0, 0, type, ill,
 705             ALL_ZONES, NULL, match_flags, 0, ipst, NULL);
 706         if (gw_ire == NULL) {
 707                 /*
 708                  * With IPMP, we allow host routes to influence in.mpathd's
 709                  * target selection.  However, if the test addresses are on
 710                  * their own network, the above lookup will fail since the
 711                  * underlying IRE_INTERFACEs are marked hidden.  So allow
 712                  * hidden test IREs to be found and try again.
 713                  */
 714                 if (!(match_flags & MATCH_IRE_TESTHIDDEN))  {
 715                         match_flags |= MATCH_IRE_TESTHIDDEN;
 716                         goto again;
 717                 }
 718                 if (ipif != NULL)
 719                         ipif_refrele(ipif);
 720                 return (ENETUNREACH);
 721         }
 722         if (gw_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) {
 723                 ire_refrele(gw_ire);
 724                 if (ipif != NULL)
 725                         ipif_refrele(ipif);
 726                 return (ENETUNREACH);
 727         }
 728         if (ill == NULL && !(flags & RTF_INDIRECT)) {
 729                 unbound = B_TRUE;
 730                 if (ipst->ips_ipv6_strict_src_multihoming > 0)
 731                         ill = gw_ire->ire_ill;
 732         }
 733 
 734         /*
 735          * We create one of three types of IREs as a result of this request
 736          * based on the netmask.  A netmask of all ones (which is automatically
 737          * assumed when RTF_HOST is set) results in an IRE_HOST being created.
 738          * An all zeroes netmask implies a default route so an IRE_DEFAULT is
 739          * created.  Otherwise, an IRE_PREFIX route is created for the
 740          * destination prefix.
 741          */
 742         if (IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones))
 743                 type = IRE_HOST;
 744         else if (IN6_IS_ADDR_UNSPECIFIED(mask))
 745                 type = IRE_DEFAULT;
 746         else
 747                 type = IRE_PREFIX;
 748 
 749         /* check for a duplicate entry */
 750         ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type, ill,
 751             ALL_ZONES, NULL,
 752             match_flags | MATCH_IRE_MASK | MATCH_IRE_GW, 0, ipst, NULL);
 753         if (ire != NULL) {
 754                 if (ipif != NULL)
 755                         ipif_refrele(ipif);
 756                 ire_refrele(gw_ire);
 757                 ire_refrele(ire);
 758                 return (EEXIST);
 759         }
 760 
 761         /* Security attribute exists */
 762         if (sp != NULL) {
 763                 tsol_gcgrp_addr_t ga;
 764 
 765                 /* find or create the gateway credentials group */
 766                 ga.ga_af = AF_INET6;
 767                 ga.ga_addr = *gw_addr;
 768 
 769                 /* we hold reference to it upon success */
 770                 gcgrp = gcgrp_lookup(&ga, B_TRUE);
 771                 if (gcgrp == NULL) {
 772                         if (ipif != NULL)
 773                                 ipif_refrele(ipif);
 774                         ire_refrele(gw_ire);
 775                         return (ENOMEM);
 776                 }
 777 
 778                 /*
 779                  * Create and add the security attribute to the group; a
 780                  * reference to the group is made upon allocating a new
 781                  * entry successfully.  If it finds an already-existing
 782                  * entry for the security attribute in the group, it simply
 783                  * returns it and no new reference is made to the group.
 784                  */
 785                 gc = gc_create(sp, gcgrp, &gcgrp_xtraref);
 786                 if (gc == NULL) {
 787                         /* release reference held by gcgrp_lookup */
 788                         GCGRP_REFRELE(gcgrp);
 789                         if (ipif != NULL)
 790                                 ipif_refrele(ipif);
 791                         ire_refrele(gw_ire);
 792                         return (ENOMEM);
 793                 }
 794         }
 795 
 796         /* Create the IRE. */
 797         ire = ire_create_v6(
 798             dst_addr,                           /* dest address */
 799             mask,                               /* mask */
 800             gw_addr,                            /* gateway address */
 801             (ushort_t)type,                     /* IRE type */
 802             ill,
 803             zoneid,
 804             flags,
 805             gc,                                 /* security attribute */
 806             ipst);
 807 
 808         /*
 809          * The ire holds a reference to the 'gc' and the 'gc' holds a
 810          * reference to the 'gcgrp'. We can now release the extra reference
 811          * the 'gcgrp' acquired in the gcgrp_lookup, if it was not used.
 812          */
 813         if (gcgrp_xtraref)
 814                 GCGRP_REFRELE(gcgrp);
 815         if (ire == NULL) {
 816                 if (gc != NULL)
 817                         GC_REFRELE(gc);
 818                 if (ipif != NULL)
 819                         ipif_refrele(ipif);
 820                 ire_refrele(gw_ire);
 821                 return (ENOMEM);
 822         }
 823 
 824         /* src address assigned by the caller? */
 825         if ((flags & RTF_SETSRC) && !IN6_IS_ADDR_UNSPECIFIED(src_addr))
 826                 ire->ire_setsrc_addr_v6 = *src_addr;
 827 
 828         ire->ire_unbound = unbound;
 829 
 830         /*
 831          * POLICY: should we allow an RTF_HOST with address INADDR_ANY?
 832          * SUN/OS socket stuff does but do we really want to allow ::0 ?
 833          */
 834 
 835         /* Add the new IRE. */
 836         nire = ire_add(ire);
 837         if (nire == NULL) {
 838                 /*
 839                  * In the result of failure, ire_add() will have
 840                  * already deleted the ire in question, so there
 841                  * is no need to do that here.
 842                  */
 843                 if (ipif != NULL)
 844                         ipif_refrele(ipif);
 845                 ire_refrele(gw_ire);
 846                 return (ENOMEM);
 847         }
 848         /*
 849          * Check if it was a duplicate entry. This handles
 850          * the case of two racing route adds for the same route
 851          */
 852         if (nire != ire) {
 853                 ASSERT(nire->ire_identical_ref > 1);
 854                 ire_delete(nire);
 855                 ire_refrele(nire);
 856                 if (ipif != NULL)
 857                         ipif_refrele(ipif);
 858                 ire_refrele(gw_ire);
 859                 return (EEXIST);
 860         }
 861         ire = nire;
 862 
 863         if (flags & RTF_MULTIRT) {
 864                 /*
 865                  * Invoke the CGTP (multirouting) filtering module
 866                  * to add the dst address in the filtering database.
 867                  * Replicated inbound packets coming from that address
 868                  * will be filtered to discard the duplicates.
 869                  * It is not necessary to call the CGTP filter hook
 870                  * when the dst address is a multicast, because an
 871                  * IP source address cannot be a multicast.
 872                  */
 873                 if (ipst->ips_ip_cgtp_filter_ops != NULL &&
 874                     !IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))) {
 875                         int res;
 876                         ipif_t *src_ipif;
 877 
 878                         /* Find the source address corresponding to gw_ire */
 879                         src_ipif = ipif_lookup_addr_v6(
 880                             &gw_ire->ire_gateway_addr_v6, NULL, zoneid, ipst);
 881                         if (src_ipif != NULL) {
 882                                 res = ipst->ips_ip_cgtp_filter_ops->
 883                                     cfo_add_dest_v6(
 884                                     ipst->ips_netstack->netstack_stackid,
 885                                     &ire->ire_addr_v6,
 886                                     &ire->ire_gateway_addr_v6,
 887                                     &ire->ire_setsrc_addr_v6,
 888                                     &src_ipif->ipif_v6lcl_addr);
 889                                 ipif_refrele(src_ipif);
 890                         } else {
 891                                 res = EADDRNOTAVAIL;
 892                         }
 893                         if (res != 0) {
 894                                 if (ipif != NULL)
 895                                         ipif_refrele(ipif);
 896                                 ire_refrele(gw_ire);
 897                                 ire_delete(ire);
 898                                 ire_refrele(ire);       /* Held in ire_add */
 899                                 return (res);
 900                         }
 901                 }
 902         }
 903 
 904 save_ire:
 905         if (gw_ire != NULL) {
 906                 ire_refrele(gw_ire);
 907                 gw_ire = NULL;
 908         }
 909         if (ire->ire_ill != NULL) {
 910                 /*
 911                  * Save enough information so that we can recreate the IRE if
 912                  * the ILL goes down and then up.  The metrics associated
 913                  * with the route will be saved as well when rts_setmetrics() is
 914                  * called after the IRE has been created.  In the case where
 915                  * memory cannot be allocated, none of this information will be
 916                  * saved.
 917                  */
 918                 ill_save_ire(ire->ire_ill, ire);
 919         }
 920 
 921         if (ire_arg != NULL) {
 922                 /*
 923                  * Store the ire that was successfully added into where ire_arg
 924                  * points to so that callers don't have to look it up
 925                  * themselves (but they are responsible for ire_refrele()ing
 926                  * the ire when they are finished with it).
 927                  */
 928                 *ire_arg = ire;
 929         } else {
 930                 ire_refrele(ire);               /* Held in ire_add */
 931         }
 932         if (ipif != NULL)
 933                 ipif_refrele(ipif);
 934         return (0);
 935 }
 936 
 937 /*
 938  * ip_rt_delete_v6 is called to delete an IPv6 route.
 939  * ill is passed in to associate it with the correct interface.
 940  * (for link-local destinations and gateways).
 941  */
 942 /* ARGSUSED4 */
 943 int
 944 ip_rt_delete_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask,
 945     const in6_addr_t *gw_addr, uint_t rtm_addrs, int flags, ill_t *ill,
 946     ip_stack_t *ipst, zoneid_t zoneid)
 947 {
 948         ire_t   *ire = NULL;
 949         ipif_t  *ipif;
 950         uint_t  type;
 951         uint_t  match_flags = MATCH_IRE_TYPE;
 952         int     err = 0;
 953 
 954         /*
 955          * If this is the case of RTF_HOST being set, then we set the netmask
 956          * to all ones.  Otherwise, we use the netmask if one was supplied.
 957          */
 958         if (flags & RTF_HOST) {
 959                 mask = &ipv6_all_ones;
 960                 match_flags |= MATCH_IRE_MASK;
 961         } else if (rtm_addrs & RTA_NETMASK) {
 962                 match_flags |= MATCH_IRE_MASK;
 963         }
 964 
 965         /*
 966          * Note that RTF_GATEWAY is never set on a delete, therefore
 967          * we check if the gateway address is one of our interfaces first,
 968          * and fall back on RTF_GATEWAY routes.
 969          *
 970          * This makes it possible to delete an original
 971          * IRE_IF_NORESOLVER/IRE_IF_RESOLVER - consistent with SunOS 4.1.
 972          * However, we have RTF_KERNEL set on the ones created by ipif_up
 973          * and those can not be deleted here.
 974          *
 975          * We use MATCH_IRE_ILL if we know the interface. If the caller
 976          * specified an interface (from the RTA_IFP sockaddr) we use it,
 977          * otherwise we use the ill derived from the gateway address.
 978          * We can always match the gateway address since we record it
 979          * in ire_gateway_addr.
 980          *
 981          * For more detail on specifying routes by gateway address and by
 982          * interface index, see the comments in ip_rt_add_v6().
 983          */
 984         ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, ipst);
 985         if (ipif != NULL) {
 986                 ill_t   *ill_match;
 987 
 988                 if (ill != NULL)
 989                         ill_match = ill;
 990                 else
 991                         ill_match = ipif->ipif_ill;
 992 
 993                 match_flags |= MATCH_IRE_ILL;
 994                 if (ipif->ipif_ire_type == IRE_LOOPBACK) {
 995                         ire = ire_ftable_lookup_v6(dst_addr, mask, 0,
 996                             IRE_LOOPBACK, ill_match, ALL_ZONES, NULL,
 997                             match_flags, 0, ipst, NULL);
 998                 }
 999                 if (ire == NULL) {
1000                         match_flags |= MATCH_IRE_GW;
1001                         ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr,
1002                             IRE_INTERFACE, ill_match, ALL_ZONES, NULL,
1003                             match_flags, 0, ipst, NULL);
1004                 }
1005                 /* Avoid deleting routes created by kernel from an ipif */
1006                 if (ire != NULL && (ire->ire_flags & RTF_KERNEL)) {
1007                         ire_refrele(ire);
1008                         ire = NULL;
1009                 }
1010 
1011                 /* Restore in case we didn't find a match */
1012                 match_flags &= ~(MATCH_IRE_GW|MATCH_IRE_ILL);
1013         }
1014 
1015         if (ire == NULL) {
1016                 /*
1017                  * At this point, the gateway address is not one of our own
1018                  * addresses or a matching interface route was not found.  We
1019                  * set the IRE type to lookup based on whether
1020                  * this is a host route, a default route or just a prefix.
1021                  *
1022                  * If an ill was passed in, then the lookup is based on an
1023                  * interface index so MATCH_IRE_ILL is added to match_flags.
1024                  */
1025                 match_flags |= MATCH_IRE_GW;
1026                 if (ill != NULL)
1027                         match_flags |= MATCH_IRE_ILL;
1028                 if (IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones))
1029                         type = IRE_HOST;
1030                 else if (IN6_IS_ADDR_UNSPECIFIED(mask))
1031                         type = IRE_DEFAULT;
1032                 else
1033                         type = IRE_PREFIX;
1034                 ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type,
1035                     ill, ALL_ZONES, NULL, match_flags, 0, ipst, NULL);
1036         }
1037 
1038         if (ipif != NULL) {
1039                 ipif_refrele(ipif);
1040                 ipif = NULL;
1041         }
1042         if (ire == NULL)
1043                 return (ESRCH);
1044 
1045         if (ire->ire_flags & RTF_MULTIRT) {
1046                 /*
1047                  * Invoke the CGTP (multirouting) filtering module
1048                  * to remove the dst address from the filtering database.
1049                  * Packets coming from that address will no longer be
1050                  * filtered to remove duplicates.
1051                  */
1052                 if (ipst->ips_ip_cgtp_filter_ops != NULL) {
1053                         err = ipst->ips_ip_cgtp_filter_ops->cfo_del_dest_v6(
1054                             ipst->ips_netstack->netstack_stackid,
1055                             &ire->ire_addr_v6, &ire->ire_gateway_addr_v6);
1056                 }
1057         }
1058 
1059         ill = ire->ire_ill;
1060         if (ill != NULL)
1061                 ill_remove_saved_ire(ill, ire);
1062         ire_delete(ire);
1063         ire_refrele(ire);
1064         return (err);
1065 }
1066 
1067 /*
1068  * Derive an interface id from the link layer address.
1069  */
1070 void
1071 ill_setdefaulttoken(ill_t *ill)
1072 {
1073         if (!ill->ill_manual_token) {
1074                 bzero(&ill->ill_token, sizeof (ill->ill_token));
1075                 MEDIA_V6INTFID(ill->ill_media, ill, &ill->ill_token);
1076                 ill->ill_token_length = IPV6_TOKEN_LEN;
1077         }
1078 }
1079 
1080 void
1081 ill_setdesttoken(ill_t *ill)
1082 {
1083         bzero(&ill->ill_dest_token, sizeof (ill->ill_dest_token));
1084         MEDIA_V6DESTINTFID(ill->ill_media, ill, &ill->ill_dest_token);
1085 }
1086 
1087 /*
1088  * Create a link-local address from a token.
1089  */
1090 static void
1091 ipif_get_linklocal(in6_addr_t *dest, const in6_addr_t *token)
1092 {
1093         int i;
1094 
1095         for (i = 0; i < 4; i++) {
1096                 dest->s6_addr32[i] =
1097                     token->s6_addr32[i] | ipv6_ll_template.s6_addr32[i];
1098         }
1099 }
1100 
1101 /*
1102  * Set a default IPv6 address for a 6to4 tunnel interface 2002:<tsrc>::1/16
1103  */
1104 static void
1105 ipif_set6to4addr(ipif_t *ipif)
1106 {
1107         ill_t           *ill = ipif->ipif_ill;
1108         struct in_addr  v4phys;
1109 
1110         ASSERT(ill->ill_mactype == DL_6TO4);
1111         ASSERT(ill->ill_phys_addr_length == sizeof (struct in_addr));
1112         ASSERT(ipif->ipif_isv6);
1113 
1114         if (ipif->ipif_flags & IPIF_UP)
1115                 return;
1116 
1117         (void) ip_plen_to_mask_v6(16, &ipif->ipif_v6net_mask);
1118         bcopy(ill->ill_phys_addr, &v4phys, sizeof (struct in_addr));
1119         IN6_V4ADDR_TO_6TO4(&v4phys, &ipif->ipif_v6lcl_addr);
1120         V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask,
1121             ipif->ipif_v6subnet);
1122 }
1123 
1124 /*
1125  * Is it not possible to set the link local address?
1126  * The address can be set if the token is set, and the token
1127  * isn't too long.
1128  * Return B_TRUE if the address can't be set, or B_FALSE if it can.
1129  */
1130 boolean_t
1131 ipif_cant_setlinklocal(ipif_t *ipif)
1132 {
1133         ill_t *ill = ipif->ipif_ill;
1134 
1135         if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_token) ||
1136             ill->ill_token_length > IPV6_ABITS - IPV6_LL_PREFIXLEN)
1137                 return (B_TRUE);
1138 
1139         return (B_FALSE);
1140 }
1141 
1142 /*
1143  * Generate a link-local address from the token.
1144  */
1145 void
1146 ipif_setlinklocal(ipif_t *ipif)
1147 {
1148         ill_t           *ill = ipif->ipif_ill;
1149         in6_addr_t      ov6addr;
1150 
1151         ASSERT(IAM_WRITER_ILL(ill));
1152 
1153         /*
1154          * If the interface was created with no link-local address
1155          * on it and the flag ILLF_NOLINKLOCAL was set, then we
1156          * dont want to update the link-local.
1157          */
1158         if ((ill->ill_flags & ILLF_NOLINKLOCAL) &&
1159             IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr))
1160                 return;
1161         /*
1162          * ill_manual_linklocal is set when the link-local address was
1163          * manually configured.
1164          */
1165         if (ill->ill_manual_linklocal)
1166                 return;
1167 
1168         /*
1169          * IPv6 interfaces over 6to4 tunnels are special.  They do not have
1170          * link-local addresses, but instead have a single automatically
1171          * generated global address.
1172          */
1173         if (ill->ill_mactype == DL_6TO4) {
1174                 ipif_set6to4addr(ipif);
1175                 return;
1176         }
1177 
1178         if (ipif_cant_setlinklocal(ipif))
1179                 return;
1180 
1181         ov6addr = ipif->ipif_v6lcl_addr;
1182         ipif_get_linklocal(&ipif->ipif_v6lcl_addr, &ill->ill_token);
1183         sctp_update_ipif_addr(ipif, ov6addr);
1184         (void) ip_plen_to_mask_v6(IPV6_LL_PREFIXLEN, &ipif->ipif_v6net_mask);
1185         if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6pp_dst_addr)) {
1186                 V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask,
1187                     ipif->ipif_v6subnet);
1188         }
1189 
1190         ip_rts_newaddrmsg(RTM_CHGADDR, 0, ipif, RTSQ_DEFAULT);
1191 }
1192 
1193 /*
1194  * Generate a destination link-local address for a point-to-point IPv6
1195  * interface with a destination interface id (IP tunnels are such interfaces)
1196  * based on the destination token.
1197  */
1198 void
1199 ipif_setdestlinklocal(ipif_t *ipif)
1200 {
1201         ill_t   *ill = ipif->ipif_ill;
1202 
1203         ASSERT(IAM_WRITER_ILL(ill));
1204 
1205         if (ill->ill_manual_dst_linklocal)
1206                 return;
1207 
1208         if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_dest_token))
1209                 return;
1210 
1211         ipif_get_linklocal(&ipif->ipif_v6pp_dst_addr, &ill->ill_dest_token);
1212         ipif->ipif_v6subnet = ipif->ipif_v6pp_dst_addr;
1213 }
1214 
1215 /*
1216  * Get the resolver set up for a new ipif.  (Always called as writer.)
1217  */
1218 int
1219 ipif_ndp_up(ipif_t *ipif, boolean_t initial)
1220 {
1221         ill_t           *ill = ipif->ipif_ill;
1222         int             err = 0;
1223         nce_t           *nce = NULL;
1224         boolean_t       added_ipif = B_FALSE;
1225 
1226         DTRACE_PROBE3(ipif__downup, char *, "ipif_ndp_up",
1227             ill_t *, ill, ipif_t *, ipif);
1228         ip1dbg(("ipif_ndp_up(%s:%u)\n", ill->ill_name, ipif->ipif_id));
1229 
1230         if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr) ||
1231             (!(ill->ill_net_type & IRE_INTERFACE))) {
1232                 ipif->ipif_addr_ready = 1;
1233                 return (0);
1234         }
1235 
1236         if ((ipif->ipif_flags & (IPIF_UNNUMBERED|IPIF_NOLOCAL)) == 0) {
1237                 uint16_t        flags;
1238                 uint16_t        state;
1239                 uchar_t         *hw_addr;
1240                 ill_t           *bound_ill;
1241                 ipmp_illgrp_t   *illg = ill->ill_grp;
1242                 uint_t          hw_addr_len;
1243 
1244                 flags = NCE_F_MYADDR | NCE_F_NONUD | NCE_F_PUBLISH |
1245                     NCE_F_AUTHORITY;
1246                 if (ill->ill_flags & ILLF_ROUTER)
1247                         flags |= NCE_F_ISROUTER;
1248 
1249                 if (ipif->ipif_flags & IPIF_ANYCAST)
1250                         flags |= NCE_F_ANYCAST;
1251 
1252                 if (IS_IPMP(ill)) {
1253                         ASSERT(ill->ill_net_type == IRE_IF_RESOLVER);
1254                         /*
1255                          * If we're here via ipif_up(), then the ipif won't be
1256                          * bound yet -- add it to the group, which will bind
1257                          * it if possible.  (We would add it in ipif_up(), but
1258                          * deleting on failure there is gruesome.)  If we're
1259                          * here via ipmp_ill_bind_ipif(), then the ipif has
1260                          * already been added to the group and we just need to
1261                          * use the binding.
1262                          */
1263                         if ((bound_ill = ipmp_ipif_bound_ill(ipif)) == NULL) {
1264                                 bound_ill = ipmp_illgrp_add_ipif(illg, ipif);
1265                                 if (bound_ill == NULL) {
1266                                         /*
1267                                          * We couldn't bind the ipif to an ill
1268                                          * yet, so we have nothing to publish.
1269                                          * Set ipif_addr_ready so that this
1270                                          * address can be used locally for now.
1271                                          * The routing socket message will be
1272                                          * sent from ipif_up_done_v6().
1273                                          */
1274                                         ipif->ipif_addr_ready = 1;
1275                                         return (0);
1276                                 }
1277                                 added_ipif = B_TRUE;
1278                         }
1279                         hw_addr = bound_ill->ill_nd_lla;
1280                         hw_addr_len = bound_ill->ill_phys_addr_length;
1281                 } else {
1282                         bound_ill = ill;
1283                         hw_addr = ill->ill_nd_lla;
1284                         hw_addr_len = ill->ill_phys_addr_length;
1285                 }
1286 
1287                 /*
1288                  * If this is an initial bring-up (or the ipif was never
1289                  * completely brought up), do DAD.  Otherwise, we're here
1290                  * because IPMP has rebound an address to this ill: send
1291                  * unsolicited advertisements to inform others.
1292                  */
1293                 if (initial || !ipif->ipif_addr_ready) {
1294                         /* Causes Duplicate Address Detection to run */
1295                         state = ND_PROBE;
1296                 } else {
1297                         state = ND_REACHABLE;
1298                         flags |= NCE_F_UNSOL_ADV;
1299                 }
1300 
1301 retry:
1302                 err = nce_lookup_then_add_v6(ill, hw_addr, hw_addr_len,
1303                     &ipif->ipif_v6lcl_addr, flags, state, &nce);
1304                 switch (err) {
1305                 case 0:
1306                         ip1dbg(("ipif_ndp_up: NCE created for %s\n",
1307                             ill->ill_name));
1308                         ipif->ipif_addr_ready = 1;
1309                         ipif->ipif_added_nce = 1;
1310                         nce->nce_ipif_cnt++;
1311                         break;
1312                 case EINPROGRESS:
1313                         ip1dbg(("ipif_ndp_up: running DAD now for %s\n",
1314                             ill->ill_name));
1315                         ipif->ipif_added_nce = 1;
1316                         nce->nce_ipif_cnt++;
1317                         break;
1318                 case EEXIST:
1319                         ip1dbg(("ipif_ndp_up: NCE already exists for %s\n",
1320                             ill->ill_name));
1321                         if (!NCE_MYADDR(nce->nce_common)) {
1322                                 /*
1323                                  * A leftover nce from before this address
1324                                  * existed
1325                                  */
1326                                 ncec_delete(nce->nce_common);
1327                                 nce_refrele(nce);
1328                                 nce = NULL;
1329                                 goto retry;
1330                         }
1331                         if ((ipif->ipif_flags & IPIF_POINTOPOINT) == 0) {
1332                                 nce_refrele(nce);
1333                                 nce = NULL;
1334                                 ip1dbg(("ipif_ndp_up: NCE already exists "
1335                                     "for %s\n", ill->ill_name));
1336                                 goto fail;
1337                         }
1338                         /*
1339                          * Duplicate local addresses are permissible for
1340                          * IPIF_POINTOPOINT interfaces which will get marked
1341                          * IPIF_UNNUMBERED later in
1342                          * ip_addr_availability_check().
1343                          *
1344                          * The nce_ipif_cnt field tracks the number of
1345                          * ipifs that have nce_addr as their local address.
1346                          */
1347                         ipif->ipif_addr_ready = 1;
1348                         ipif->ipif_added_nce = 1;
1349                         nce->nce_ipif_cnt++;
1350                         err = 0;
1351                         break;
1352                 default:
1353                         ip1dbg(("ipif_ndp_up: NCE creation failed for %s\n",
1354                             ill->ill_name));
1355                         goto fail;
1356                 }
1357         } else {
1358                 /* No local NCE for this entry */
1359                 ipif->ipif_addr_ready = 1;
1360         }
1361         if (nce != NULL)
1362                 nce_refrele(nce);
1363         return (0);
1364 fail:
1365         if (added_ipif)
1366                 ipmp_illgrp_del_ipif(ill->ill_grp, ipif);
1367 
1368         return (err);
1369 }
1370 
1371 /* Remove all cache entries for this logical interface */
1372 void
1373 ipif_ndp_down(ipif_t *ipif)
1374 {
1375         ipif_nce_down(ipif);
1376 }
1377 
1378 /*
1379  * Return the scope of the given IPv6 address.  If the address is an
1380  * IPv4 mapped IPv6 address, return the scope of the corresponding
1381  * IPv4 address.
1382  */
1383 in6addr_scope_t
1384 ip_addr_scope_v6(const in6_addr_t *addr)
1385 {
1386         static in6_addr_t ipv6loopback = IN6ADDR_LOOPBACK_INIT;
1387 
1388         if (IN6_IS_ADDR_V4MAPPED(addr)) {
1389                 in_addr_t v4addr_h = ntohl(V4_PART_OF_V6((*addr)));
1390                 if ((v4addr_h >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1391                     (v4addr_h & IN_AUTOCONF_MASK) == IN_AUTOCONF_NET)
1392                         return (IP6_SCOPE_LINKLOCAL);
1393                 if ((v4addr_h & IN_PRIVATE8_MASK) == IN_PRIVATE8_NET ||
1394                     (v4addr_h & IN_PRIVATE12_MASK) == IN_PRIVATE12_NET ||
1395                     (v4addr_h & IN_PRIVATE16_MASK) == IN_PRIVATE16_NET)
1396                         return (IP6_SCOPE_SITELOCAL);
1397                 return (IP6_SCOPE_GLOBAL);
1398         }
1399 
1400         if (IN6_IS_ADDR_MULTICAST(addr))
1401                 return (IN6_ADDR_MC_SCOPE(addr));
1402 
1403         /* link-local and loopback addresses are of link-local scope */
1404         if (IN6_IS_ADDR_LINKLOCAL(addr) ||
1405             IN6_ARE_ADDR_EQUAL(addr, &ipv6loopback))
1406                 return (IP6_SCOPE_LINKLOCAL);
1407         if (IN6_IS_ADDR_SITELOCAL(addr))
1408                 return (IP6_SCOPE_SITELOCAL);
1409         return (IP6_SCOPE_GLOBAL);
1410 }
1411 
1412 
1413 /*
1414  * Returns the length of the common prefix of a1 and a2, as per
1415  * CommonPrefixLen() defined in RFC 3484.
1416  */
1417 static int
1418 ip_common_prefix_v6(const in6_addr_t *a1, const in6_addr_t *a2)
1419 {
1420         int i;
1421         uint32_t a1val, a2val, mask;
1422 
1423         for (i = 0; i < 4; i++) {
1424                 if ((a1val = a1->s6_addr32[i]) != (a2val = a2->s6_addr32[i])) {
1425                         a1val ^= a2val;
1426                         i *= 32;
1427                         mask = 0x80000000u;
1428                         while (!(a1val & mask)) {
1429                                 mask >>= 1;
1430                                 i++;
1431                         }
1432                         return (i);
1433                 }
1434         }
1435         return (IPV6_ABITS);
1436 }
1437 
1438 #define IPIF_VALID_IPV6_SOURCE(ipif) \
1439         (((ipif)->ipif_flags & IPIF_UP) && \
1440         !((ipif)->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST)) && \
1441         !((ipif)->ipif_ill->ill_flags & ILLF_NOACCEPT))
1442 
1443 /* source address candidate */
1444 typedef struct candidate {
1445         ipif_t          *cand_ipif;
1446         /* The properties of this candidate */
1447         boolean_t       cand_isdst;
1448         boolean_t       cand_isdst_set;
1449         in6addr_scope_t cand_scope;
1450         boolean_t       cand_scope_set;
1451         boolean_t       cand_isdeprecated;
1452         boolean_t       cand_isdeprecated_set;
1453         boolean_t       cand_ispreferred;
1454         boolean_t       cand_ispreferred_set;
1455         boolean_t       cand_matchedinterface;
1456         boolean_t       cand_matchedinterface_set;
1457         boolean_t       cand_matchedlabel;
1458         boolean_t       cand_matchedlabel_set;
1459         boolean_t       cand_istmp;
1460         boolean_t       cand_istmp_set;
1461         int             cand_common_pref;
1462         boolean_t       cand_common_pref_set;
1463         boolean_t       cand_pref_eq;
1464         boolean_t       cand_pref_eq_set;
1465         int             cand_pref_len;
1466         boolean_t       cand_pref_len_set;
1467 } cand_t;
1468 #define cand_srcaddr    cand_ipif->ipif_v6lcl_addr
1469 #define cand_mask       cand_ipif->ipif_v6net_mask
1470 #define cand_flags      cand_ipif->ipif_flags
1471 #define cand_ill        cand_ipif->ipif_ill
1472 #define cand_zoneid     cand_ipif->ipif_zoneid
1473 
1474 /* information about the destination for source address selection */
1475 typedef struct dstinfo {
1476         const in6_addr_t        *dst_addr;
1477         ill_t                   *dst_ill;
1478         uint_t                  dst_restrict_ill;
1479         boolean_t               dst_prefer_src_tmp;
1480         in6addr_scope_t         dst_scope;
1481         char                    *dst_label;
1482 } dstinfo_t;
1483 
1484 /*
1485  * The following functions are rules used to select a source address in
1486  * ipif_select_source_v6().  Each rule compares a current candidate (cc)
1487  * against the best candidate (bc).  Each rule has three possible outcomes;
1488  * the candidate is preferred over the best candidate (CAND_PREFER), the
1489  * candidate is not preferred over the best candidate (CAND_AVOID), or the
1490  * candidate is of equal value as the best candidate (CAND_TIE).
1491  *
1492  * These rules are part of a greater "Default Address Selection for IPv6"
1493  * sheme, which is standards based work coming out of the IETF ipv6 working
1494  * group.  The IETF document defines both IPv6 source address selection and
1495  * destination address ordering.  The rules defined here implement the IPv6
1496  * source address selection.  Destination address ordering is done by
1497  * libnsl, and uses a similar set of rules to implement the sorting.
1498  *
1499  * Most of the rules are defined by the RFC and are not typically altered.  The
1500  * last rule, number 8, has language that allows for local preferences.  In the
1501  * scheme below, this means that new Solaris rules should normally go between
1502  * rule_ifprefix and rule_prefix.
1503  */
1504 typedef enum {CAND_AVOID, CAND_TIE, CAND_PREFER} rule_res_t;
1505 typedef rule_res_t (*rulef_t)(cand_t *, cand_t *, const dstinfo_t *,
1506     ip_stack_t *);
1507 
1508 /* Prefer an address if it is equal to the destination address. */
1509 /* ARGSUSED3 */
1510 static rule_res_t
1511 rule_isdst(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst)
1512 {
1513         if (!bc->cand_isdst_set) {
1514                 bc->cand_isdst =
1515                     IN6_ARE_ADDR_EQUAL(&bc->cand_srcaddr, dstinfo->dst_addr);
1516                 bc->cand_isdst_set = B_TRUE;
1517         }
1518 
1519         cc->cand_isdst =
1520             IN6_ARE_ADDR_EQUAL(&cc->cand_srcaddr, dstinfo->dst_addr);
1521         cc->cand_isdst_set = B_TRUE;
1522 
1523         if (cc->cand_isdst == bc->cand_isdst)
1524                 return (CAND_TIE);
1525         else if (cc->cand_isdst)
1526                 return (CAND_PREFER);
1527         else
1528                 return (CAND_AVOID);
1529 }
1530 
1531 /*
1532  * Prefer addresses that are of closest scope to the destination.  Always
1533  * prefer addresses that are of greater scope than the destination over
1534  * those that are of lesser scope than the destination.
1535  */
1536 /* ARGSUSED3 */
1537 static rule_res_t
1538 rule_scope(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst)
1539 {
1540         if (!bc->cand_scope_set) {
1541                 bc->cand_scope = ip_addr_scope_v6(&bc->cand_srcaddr);
1542                 bc->cand_scope_set = B_TRUE;
1543         }
1544 
1545         cc->cand_scope = ip_addr_scope_v6(&cc->cand_srcaddr);
1546         cc->cand_scope_set = B_TRUE;
1547 
1548         if (cc->cand_scope < bc->cand_scope) {
1549                 if (cc->cand_scope < dstinfo->dst_scope)
1550                         return (CAND_AVOID);
1551                 else
1552                         return (CAND_PREFER);
1553         } else if (bc->cand_scope < cc->cand_scope) {
1554                 if (bc->cand_scope < dstinfo->dst_scope)
1555                         return (CAND_PREFER);
1556                 else
1557                         return (CAND_AVOID);
1558         } else {
1559                 return (CAND_TIE);
1560         }
1561 }
1562 
1563 /*
1564  * Prefer non-deprecated source addresses.
1565  */
1566 /* ARGSUSED2 */
1567 static rule_res_t
1568 rule_deprecated(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
1569     ip_stack_t *ipst)
1570 {
1571         if (!bc->cand_isdeprecated_set) {
1572                 bc->cand_isdeprecated =
1573                     ((bc->cand_flags & IPIF_DEPRECATED) != 0);
1574                 bc->cand_isdeprecated_set = B_TRUE;
1575         }
1576 
1577         cc->cand_isdeprecated = ((cc->cand_flags & IPIF_DEPRECATED) != 0);
1578         cc->cand_isdeprecated_set = B_TRUE;
1579 
1580         if (bc->cand_isdeprecated == cc->cand_isdeprecated)
1581                 return (CAND_TIE);
1582         else if (cc->cand_isdeprecated)
1583                 return (CAND_AVOID);
1584         else
1585                 return (CAND_PREFER);
1586 }
1587 
1588 /*
1589  * Prefer source addresses that have the IPIF_PREFERRED flag set.  This
1590  * rule must be before rule_interface because the flag could be set on any
1591  * interface, not just the interface being used for outgoing packets (for
1592  * example, the IFF_PREFERRED could be set on an address assigned to the
1593  * loopback interface).
1594  */
1595 /* ARGSUSED2 */
1596 static rule_res_t
1597 rule_preferred(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
1598     ip_stack_t *ipst)
1599 {
1600         if (!bc->cand_ispreferred_set) {
1601                 bc->cand_ispreferred = ((bc->cand_flags & IPIF_PREFERRED) != 0);
1602                 bc->cand_ispreferred_set = B_TRUE;
1603         }
1604 
1605         cc->cand_ispreferred = ((cc->cand_flags & IPIF_PREFERRED) != 0);
1606         cc->cand_ispreferred_set = B_TRUE;
1607 
1608         if (bc->cand_ispreferred == cc->cand_ispreferred)
1609                 return (CAND_TIE);
1610         else if (cc->cand_ispreferred)
1611                 return (CAND_PREFER);
1612         else
1613                 return (CAND_AVOID);
1614 }
1615 
1616 /*
1617  * Prefer source addresses that are assigned to the outgoing interface.
1618  */
1619 /* ARGSUSED3 */
1620 static rule_res_t
1621 rule_interface(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
1622     ip_stack_t *ipst)
1623 {
1624         ill_t *dstill = dstinfo->dst_ill;
1625 
1626         /*
1627          * If dstinfo->dst_restrict_ill is set, this rule is unnecessary
1628          * since we know all candidates will be on the same link.
1629          */
1630         if (dstinfo->dst_restrict_ill)
1631                 return (CAND_TIE);
1632 
1633         if (!bc->cand_matchedinterface_set) {
1634                 bc->cand_matchedinterface = bc->cand_ill == dstill;
1635                 bc->cand_matchedinterface_set = B_TRUE;
1636         }
1637 
1638         cc->cand_matchedinterface = cc->cand_ill == dstill;
1639         cc->cand_matchedinterface_set = B_TRUE;
1640 
1641         if (bc->cand_matchedinterface == cc->cand_matchedinterface)
1642                 return (CAND_TIE);
1643         else if (cc->cand_matchedinterface)
1644                 return (CAND_PREFER);
1645         else
1646                 return (CAND_AVOID);
1647 }
1648 
1649 /*
1650  * Prefer source addresses whose label matches the destination's label.
1651  */
1652 static rule_res_t
1653 rule_label(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst)
1654 {
1655         char *label;
1656 
1657         if (!bc->cand_matchedlabel_set) {
1658                 label = ip6_asp_lookup(&bc->cand_srcaddr, NULL, ipst);
1659                 bc->cand_matchedlabel =
1660                     ip6_asp_labelcmp(label, dstinfo->dst_label);
1661                 bc->cand_matchedlabel_set = B_TRUE;
1662         }
1663 
1664         label = ip6_asp_lookup(&cc->cand_srcaddr, NULL, ipst);
1665         cc->cand_matchedlabel = ip6_asp_labelcmp(label, dstinfo->dst_label);
1666         cc->cand_matchedlabel_set = B_TRUE;
1667 
1668         if (bc->cand_matchedlabel == cc->cand_matchedlabel)
1669                 return (CAND_TIE);
1670         else if (cc->cand_matchedlabel)
1671                 return (CAND_PREFER);
1672         else
1673                 return (CAND_AVOID);
1674 }
1675 
1676 /*
1677  * Prefer public addresses over temporary ones.  An application can reverse
1678  * the logic of this rule and prefer temporary addresses by using the
1679  * IPV6_SRC_PREFERENCES socket option.
1680  */
1681 /* ARGSUSED3 */
1682 static rule_res_t
1683 rule_temporary(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
1684     ip_stack_t *ipst)
1685 {
1686         if (!bc->cand_istmp_set) {
1687                 bc->cand_istmp = ((bc->cand_flags & IPIF_TEMPORARY) != 0);
1688                 bc->cand_istmp_set = B_TRUE;
1689         }
1690 
1691         cc->cand_istmp = ((cc->cand_flags & IPIF_TEMPORARY) != 0);
1692         cc->cand_istmp_set = B_TRUE;
1693 
1694         if (bc->cand_istmp == cc->cand_istmp)
1695                 return (CAND_TIE);
1696 
1697         if (dstinfo->dst_prefer_src_tmp && cc->cand_istmp)
1698                 return (CAND_PREFER);
1699         else if (!dstinfo->dst_prefer_src_tmp && !cc->cand_istmp)
1700                 return (CAND_PREFER);
1701         else
1702                 return (CAND_AVOID);
1703 }
1704 
1705 /*
1706  * Prefer source addresses with longer matching prefix with the destination
1707  * under the interface mask.  This gets us on the same subnet before applying
1708  * any Solaris-specific rules.
1709  */
1710 /* ARGSUSED3 */
1711 static rule_res_t
1712 rule_ifprefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
1713     ip_stack_t *ipst)
1714 {
1715         if (!bc->cand_pref_eq_set) {
1716                 bc->cand_pref_eq = V6_MASK_EQ_2(bc->cand_srcaddr,
1717                     bc->cand_mask, *dstinfo->dst_addr);
1718                 bc->cand_pref_eq_set = B_TRUE;
1719         }
1720 
1721         cc->cand_pref_eq = V6_MASK_EQ_2(cc->cand_srcaddr, cc->cand_mask,
1722             *dstinfo->dst_addr);
1723         cc->cand_pref_eq_set = B_TRUE;
1724 
1725         if (bc->cand_pref_eq) {
1726                 if (cc->cand_pref_eq) {
1727                         if (!bc->cand_pref_len_set) {
1728                                 bc->cand_pref_len =
1729                                     ip_mask_to_plen_v6(&bc->cand_mask);
1730                                 bc->cand_pref_len_set = B_TRUE;
1731                         }
1732                         cc->cand_pref_len = ip_mask_to_plen_v6(&cc->cand_mask);
1733                         cc->cand_pref_len_set = B_TRUE;
1734                         if (bc->cand_pref_len == cc->cand_pref_len)
1735                                 return (CAND_TIE);
1736                         else if (bc->cand_pref_len > cc->cand_pref_len)
1737                                 return (CAND_AVOID);
1738                         else
1739                                 return (CAND_PREFER);
1740                 } else {
1741                         return (CAND_AVOID);
1742                 }
1743         } else {
1744                 if (cc->cand_pref_eq)
1745                         return (CAND_PREFER);
1746                 else
1747                         return (CAND_TIE);
1748         }
1749 }
1750 
1751 /*
1752  * Prefer to use zone-specific addresses when possible instead of all-zones
1753  * addresses.
1754  */
1755 /* ARGSUSED2 */
1756 static rule_res_t
1757 rule_zone_specific(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
1758     ip_stack_t *ipst)
1759 {
1760         if ((bc->cand_zoneid == ALL_ZONES) ==
1761             (cc->cand_zoneid == ALL_ZONES))
1762                 return (CAND_TIE);
1763         else if (cc->cand_zoneid == ALL_ZONES)
1764                 return (CAND_AVOID);
1765         else
1766                 return (CAND_PREFER);
1767 }
1768 
1769 /*
1770  * Prefer to use DHCPv6 (first) and static addresses (second) when possible
1771  * instead of statelessly autoconfigured addresses.
1772  *
1773  * This is done after trying all other preferences (and before the final tie
1774  * breaker) so that, if all else is equal, we select addresses configured by
1775  * DHCPv6 over other addresses.  We presume that DHCPv6 addresses, unlike
1776  * stateless autoconfigured addresses, are deliberately configured by an
1777  * administrator, and thus are correctly set up in DNS and network packet
1778  * filters.
1779  */
1780 /* ARGSUSED2 */
1781 static rule_res_t
1782 rule_addr_type(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
1783     ip_stack_t *ipst)
1784 {
1785 #define ATYPE(x)        \
1786         ((x) & IPIF_DHCPRUNNING) ? 1 : ((x) & IPIF_ADDRCONF) ? 3 : 2
1787         int bcval = ATYPE(bc->cand_flags);
1788         int ccval = ATYPE(cc->cand_flags);
1789 #undef ATYPE
1790 
1791         if (bcval == ccval)
1792                 return (CAND_TIE);
1793         else if (ccval < bcval)
1794                 return (CAND_PREFER);
1795         else
1796                 return (CAND_AVOID);
1797 }
1798 
1799 /*
1800  * Prefer source addresses with longer matching prefix with the destination.
1801  * We do the longest matching prefix calculation by doing an xor of both
1802  * addresses with the destination, and pick the address with the longest string
1803  * of leading zeros, as per CommonPrefixLen() defined in RFC 3484.
1804  */
1805 /* ARGSUSED3 */
1806 static rule_res_t
1807 rule_prefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst)
1808 {
1809         if (!bc->cand_common_pref_set) {
1810                 bc->cand_common_pref = ip_common_prefix_v6(&bc->cand_srcaddr,
1811                     dstinfo->dst_addr);
1812                 bc->cand_common_pref_set = B_TRUE;
1813         }
1814 
1815         cc->cand_common_pref = ip_common_prefix_v6(&cc->cand_srcaddr,
1816             dstinfo->dst_addr);
1817         cc->cand_common_pref_set = B_TRUE;
1818 
1819         if (bc->cand_common_pref == cc->cand_common_pref)
1820                 return (CAND_TIE);
1821         else if (bc->cand_common_pref > cc->cand_common_pref)
1822                 return (CAND_AVOID);
1823         else
1824                 return (CAND_PREFER);
1825 }
1826 
1827 /*
1828  * Last rule: we must pick something, so just prefer the current best
1829  * candidate.
1830  */
1831 /* ARGSUSED */
1832 static rule_res_t
1833 rule_must_be_last(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
1834     ip_stack_t *ipst)
1835 {
1836         return (CAND_AVOID);
1837 }
1838 
1839 /*
1840  * Determine the best source address given a destination address and a
1841  * destination ill.  If no suitable source address is found, it returns
1842  * NULL. If there is a usable address pointed to by the usesrc
1843  * (i.e ill_usesrc_ifindex != 0) then return that first since it is more
1844  * fine grained (i.e per interface)
1845  *
1846  * This implementation is based on the "Default Address Selection for IPv6"
1847  * specification produced by the IETF IPv6 working group.  It has been
1848  * implemented so that the list of addresses is only traversed once (the
1849  * specification's algorithm could traverse the list of addresses once for
1850  * every rule).
1851  *
1852  * The restrict_ill argument restricts the algorithm to choose a source
1853  * address that is assigned to the destination ill.  This is used when
1854  * the destination address is a link-local or multicast address, and when
1855  * ipv6_strict_dst_multihoming is turned on.
1856  *
1857  * src_prefs is the caller's set of source address preferences.  If source
1858  * address selection is being called to determine the source address of a
1859  * connected socket (from ip_set_destination_v6()), then the preferences are
1860  * taken from conn_ixa->ixa_src_preferences.  These preferences can be set on a
1861  * per-socket basis using the IPV6_SRC_PREFERENCES socket option.  The only
1862  * preference currently implemented is for rfc3041 temporary addresses.
1863  */
1864 ipif_t *
1865 ipif_select_source_v6(ill_t *dstill, const in6_addr_t *dst,
1866     boolean_t restrict_ill, uint32_t src_prefs, zoneid_t zoneid,
1867     boolean_t allow_usesrc, boolean_t *notreadyp)
1868 {
1869         dstinfo_t       dstinfo;
1870         char            dstr[INET6_ADDRSTRLEN];
1871         char            sstr[INET6_ADDRSTRLEN];
1872         ipif_t          *ipif, *start_ipif, *next_ipif;
1873         ill_t           *ill, *usesrc_ill = NULL, *ipmp_ill = NULL;
1874         ill_walk_context_t      ctx;
1875         cand_t          best_c; /* The best candidate */
1876         cand_t          curr_c; /* The current candidate */
1877         uint_t          index;
1878         boolean_t       first_candidate = B_TRUE;
1879         rule_res_t      rule_result;
1880         tsol_tpc_t      *src_rhtp, *dst_rhtp;
1881         ip_stack_t      *ipst = dstill->ill_ipst;
1882 
1883         /*
1884          * The list of ordering rules.  They are applied in the order they
1885          * appear in the list.
1886          *
1887          * Solaris doesn't currently support Mobile IPv6, so there's no
1888          * rule_mipv6 corresponding to rule 4 in the specification.
1889          */
1890         rulef_t rules[] = {
1891                 rule_isdst,
1892                 rule_scope,
1893                 rule_deprecated,
1894                 rule_preferred,
1895                 rule_interface,
1896                 rule_label,
1897                 rule_temporary,
1898                 rule_ifprefix,                  /* local rules after this */
1899                 rule_zone_specific,
1900                 rule_addr_type,
1901                 rule_prefix,                    /* local rules before this */
1902                 rule_must_be_last,              /* must always be last */
1903                 NULL
1904         };
1905 
1906         ASSERT(dstill->ill_isv6);
1907         ASSERT(!IN6_IS_ADDR_V4MAPPED(dst));
1908 
1909         /*
1910          * Check if there is a usable src address pointed to by the
1911          * usesrc ifindex. This has higher precedence since it is
1912          * finer grained (i.e per interface) v/s being system wide.
1913          */
1914         if (dstill->ill_usesrc_ifindex != 0 && allow_usesrc) {
1915                 if ((usesrc_ill =
1916                     ill_lookup_on_ifindex(dstill->ill_usesrc_ifindex, B_TRUE,
1917                     ipst)) != NULL) {
1918                         dstinfo.dst_ill = usesrc_ill;
1919                 } else {
1920                         return (NULL);
1921                 }
1922         } else if (IS_UNDER_IPMP(dstill)) {
1923                 /*
1924                  * Test addresses should never be used for source address
1925                  * selection, so if we were passed an underlying ill, switch
1926                  * to the IPMP meta-interface.
1927                  */
1928                 if ((ipmp_ill = ipmp_ill_hold_ipmp_ill(dstill)) != NULL)
1929                         dstinfo.dst_ill = ipmp_ill;
1930                 else
1931                         return (NULL);
1932         } else {
1933                 dstinfo.dst_ill = dstill;
1934         }
1935 
1936         /*
1937          * If we're dealing with an unlabeled destination on a labeled system,
1938          * make sure that we ignore source addresses that are incompatible with
1939          * the destination's default label.  That destination's default label
1940          * must dominate the minimum label on the source address.
1941          *
1942          * (Note that this has to do with Trusted Solaris.  It's not related to
1943          * the labels described by ip6_asp_lookup.)
1944          */
1945         dst_rhtp = NULL;
1946         if (is_system_labeled()) {
1947                 dst_rhtp = find_tpc(dst, IPV6_VERSION, B_FALSE);
1948                 if (dst_rhtp == NULL)
1949                         return (NULL);
1950                 if (dst_rhtp->tpc_tp.host_type != UNLABELED) {
1951                         TPC_RELE(dst_rhtp);
1952                         dst_rhtp = NULL;
1953                 }
1954         }
1955 
1956         dstinfo.dst_addr = dst;
1957         dstinfo.dst_scope = ip_addr_scope_v6(dst);
1958         dstinfo.dst_label = ip6_asp_lookup(dst, NULL, ipst);
1959         dstinfo.dst_prefer_src_tmp = ((src_prefs & IPV6_PREFER_SRC_TMP) != 0);
1960         rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1961         /*
1962          * Section three of the I-D states that for multicast and
1963          * link-local destinations, the candidate set must be restricted to
1964          * an interface that is on the same link as the outgoing interface.
1965          * Also, when ipv6_strict_dst_multihoming is turned on, always
1966          * restrict the source address to the destination link as doing
1967          * otherwise will almost certainly cause problems.
1968          */
1969         if (IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst) ||
1970             ipst->ips_ipv6_strict_dst_multihoming || usesrc_ill != NULL) {
1971                 dstinfo.dst_restrict_ill = B_TRUE;
1972         } else {
1973                 dstinfo.dst_restrict_ill = restrict_ill;
1974         }
1975 
1976         bzero(&best_c, sizeof (cand_t));
1977 
1978         /*
1979          * Take a pass through the list of IPv6 interfaces to choose the best
1980          * possible source address.  If restrict_ill is set, just use dst_ill.
1981          */
1982         if (dstinfo.dst_restrict_ill)
1983                 ill = dstinfo.dst_ill;
1984         else
1985                 ill = ILL_START_WALK_V6(&ctx, ipst);
1986 
1987         for (; ill != NULL; ill = ill_next(&ctx, ill)) {
1988                 ASSERT(ill->ill_isv6);
1989 
1990                 /*
1991                  * Test addresses should never be used for source address
1992                  * selection, so ignore underlying ills.
1993                  */
1994                 if (IS_UNDER_IPMP(ill))
1995                         continue;
1996 
1997                 if (ill->ill_ipif == NULL)
1998                         continue;
1999                 /*
2000                  * For source address selection, we treat the ipif list as
2001                  * circular and continue until we get back to where we
2002                  * started.  This allows IPMP to vary source address selection
2003                  * (which improves inbound load spreading) by caching its last
2004                  * ending point and starting from there.  NOTE: we don't have
2005                  * to worry about ill_src_ipif changing ills since that can't
2006                  * happen on the IPMP ill.
2007                  */
2008                 start_ipif = ill->ill_ipif;
2009                 if (IS_IPMP(ill) && ill->ill_src_ipif != NULL)
2010                         start_ipif = ill->ill_src_ipif;
2011 
2012                 ipif = start_ipif;
2013                 do {
2014                         if ((next_ipif = ipif->ipif_next) == NULL)
2015                                 next_ipif = ill->ill_ipif;
2016 
2017                         if (!IPIF_VALID_IPV6_SOURCE(ipif))
2018                                 continue;
2019 
2020                         if (!ipif->ipif_addr_ready) {
2021                                 if (notreadyp != NULL)
2022                                         *notreadyp = B_TRUE;
2023                                 continue;
2024                         }
2025 
2026                         if (zoneid != ALL_ZONES &&
2027                             ipif->ipif_zoneid != zoneid &&
2028                             ipif->ipif_zoneid != ALL_ZONES)
2029                                 continue;
2030 
2031                         /*
2032                          * Check compatibility of local address for
2033                          * destination's default label if we're on a labeled
2034                          * system.  Incompatible addresses can't be used at
2035                          * all and must be skipped over.
2036                          */
2037                         if (dst_rhtp != NULL) {
2038                                 boolean_t incompat;
2039 
2040                                 src_rhtp = find_tpc(&ipif->ipif_v6lcl_addr,
2041                                     IPV6_VERSION, B_FALSE);
2042                                 if (src_rhtp == NULL)
2043                                         continue;
2044                                 incompat =
2045                                     src_rhtp->tpc_tp.host_type != SUN_CIPSO ||
2046                                     src_rhtp->tpc_tp.tp_doi !=
2047                                     dst_rhtp->tpc_tp.tp_doi ||
2048                                     (!_blinrange(&dst_rhtp->tpc_tp.tp_def_label,
2049                                     &src_rhtp->tpc_tp.tp_sl_range_cipso) &&
2050                                     !blinlset(&dst_rhtp->tpc_tp.tp_def_label,
2051                                     src_rhtp->tpc_tp.tp_sl_set_cipso));
2052                                 TPC_RELE(src_rhtp);
2053                                 if (incompat)
2054                                         continue;
2055                         }
2056 
2057                         if (first_candidate) {
2058                                 /*
2059                                  * This is first valid address in the list.
2060                                  * It is automatically the best candidate
2061                                  * so far.
2062                                  */
2063                                 best_c.cand_ipif = ipif;
2064                                 first_candidate = B_FALSE;
2065                                 continue;
2066                         }
2067 
2068                         bzero(&curr_c, sizeof (cand_t));
2069                         curr_c.cand_ipif = ipif;
2070 
2071                         /*
2072                          * Compare this current candidate (curr_c) with the
2073                          * best candidate (best_c) by applying the
2074                          * comparison rules in order until one breaks the
2075                          * tie.
2076                          */
2077                         for (index = 0; rules[index] != NULL; index++) {
2078                                 /* Apply a comparison rule. */
2079                                 rule_result = (rules[index])(&best_c, &curr_c,
2080                                     &dstinfo, ipst);
2081                                 if (rule_result == CAND_AVOID) {
2082                                         /*
2083                                          * The best candidate is still the
2084                                          * best candidate.  Forget about
2085                                          * this current candidate and go on
2086                                          * to the next one.
2087                                          */
2088                                         break;
2089                                 } else if (rule_result == CAND_PREFER) {
2090                                         /*
2091                                          * This candidate is prefered.  It
2092                                          * becomes the best candidate so
2093                                          * far.  Go on to the next address.
2094                                          */
2095                                         best_c = curr_c;
2096                                         break;
2097                                 }
2098                                 /* We have a tie, apply the next rule. */
2099                         }
2100 
2101                         /*
2102                          * The last rule must be a tie breaker rule and
2103                          * must never produce a tie.  At this point, the
2104                          * candidate should have either been rejected, or
2105                          * have been prefered as the best candidate so far.
2106                          */
2107                         ASSERT(rule_result != CAND_TIE);
2108                 } while ((ipif = next_ipif) != start_ipif);
2109 
2110                 /*
2111                  * For IPMP, update the source ipif rotor to the next ipif,
2112                  * provided we can look it up.  (We must not use it if it's
2113                  * IPIF_CONDEMNED since we may have grabbed ill_g_lock after
2114                  * ipif_free() checked ill_src_ipif.)
2115                  */
2116                 if (IS_IPMP(ill) && ipif != NULL) {
2117                         mutex_enter(&ipif->ipif_ill->ill_lock);
2118                         next_ipif = ipif->ipif_next;
2119                         if (next_ipif != NULL && !IPIF_IS_CONDEMNED(next_ipif))
2120                                 ill->ill_src_ipif = next_ipif;
2121                         else
2122                                 ill->ill_src_ipif = NULL;
2123                         mutex_exit(&ipif->ipif_ill->ill_lock);
2124                 }
2125 
2126                 /*
2127                  * Only one ill to consider if dst_restrict_ill is set.
2128                  */
2129                 if (dstinfo.dst_restrict_ill)
2130                         break;
2131         }
2132 
2133         ipif = best_c.cand_ipif;
2134         ip1dbg(("ipif_select_source_v6(%s, %s) -> %s\n",
2135             dstinfo.dst_ill->ill_name,
2136             inet_ntop(AF_INET6, dstinfo.dst_addr, dstr, sizeof (dstr)),
2137             (ipif == NULL ? "NULL" :
2138             inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, sstr, sizeof (sstr)))));
2139 
2140         if (usesrc_ill != NULL)
2141                 ill_refrele(usesrc_ill);
2142 
2143         if (ipmp_ill != NULL)
2144                 ill_refrele(ipmp_ill);
2145 
2146         if (dst_rhtp != NULL)
2147                 TPC_RELE(dst_rhtp);
2148 
2149         if (ipif == NULL) {
2150                 rw_exit(&ipst->ips_ill_g_lock);
2151                 return (NULL);
2152         }
2153 
2154         mutex_enter(&ipif->ipif_ill->ill_lock);
2155         if (!IPIF_IS_CONDEMNED(ipif)) {
2156                 ipif_refhold_locked(ipif);
2157                 mutex_exit(&ipif->ipif_ill->ill_lock);
2158                 rw_exit(&ipst->ips_ill_g_lock);
2159                 return (ipif);
2160         }
2161         mutex_exit(&ipif->ipif_ill->ill_lock);
2162         rw_exit(&ipst->ips_ill_g_lock);
2163         ip1dbg(("ipif_select_source_v6 cannot lookup ipif %p"
2164             " returning null \n", (void *)ipif));
2165 
2166         return (NULL);
2167 }
2168 
2169 /*
2170  * Pick a source address based on the destination ill and an optional setsrc
2171  * address.
2172  * The result is stored in srcp. If generation is set, then put the source
2173  * generation number there before we look for the source address (to avoid
2174  * missing changes in the set of source addresses.
2175  * If flagsp is set, then us it to pass back ipif_flags.
2176  *
2177  * If the caller wants to cache the returned source address and detect when
2178  * that might be stale, the caller should pass in a generation argument,
2179  * which the caller can later compare against ips_src_generation
2180  *
2181  * The precedence order for selecting an IPv6 source address is:
2182  *  - RTF_SETSRC on the first ire in the recursive lookup always wins.
2183  *  - If usrsrc is set, swap the ill to be the usesrc one.
2184  *  - If IPMP is used on the ill, select a random address from the most
2185  *    preferred ones below:
2186  * That is followed by the long list of IPv6 source address selection rules
2187  * starting with rule_isdst(), rule_scope(), etc.
2188  *
2189  * We have lower preference for ALL_ZONES IP addresses,
2190  * as they pose problems with unlabeled destinations.
2191  *
2192  * Note that when multiple IP addresses match e.g., with rule_scope() we pick
2193  * the first one if IPMP is not in use. With IPMP we randomize.
2194  */
2195 int
2196 ip_select_source_v6(ill_t *ill, const in6_addr_t *setsrc, const in6_addr_t *dst,
2197     zoneid_t zoneid, ip_stack_t *ipst, uint_t restrict_ill, uint32_t src_prefs,
2198     in6_addr_t *srcp, uint32_t *generation, uint64_t *flagsp)
2199 {
2200         ipif_t *ipif;
2201         boolean_t notready = B_FALSE;   /* Set if !ipif_addr_ready found */
2202 
2203         if (flagsp != NULL)
2204                 *flagsp = 0;
2205 
2206         /*
2207          * Need to grab the generation number before we check to
2208          * avoid a race with a change to the set of local addresses.
2209          * No lock needed since the thread which updates the set of local
2210          * addresses use ipif/ill locks and exit those (hence a store memory
2211          * barrier) before doing the atomic increase of ips_src_generation.
2212          */
2213         if (generation != NULL) {
2214                 *generation = ipst->ips_src_generation;
2215         }
2216 
2217         /* Was RTF_SETSRC set on the first IRE in the recursive lookup? */
2218         if (setsrc != NULL && !IN6_IS_ADDR_UNSPECIFIED(setsrc)) {
2219                 *srcp = *setsrc;
2220                 return (0);
2221         }
2222 
2223         ipif = ipif_select_source_v6(ill, dst, restrict_ill, src_prefs, zoneid,
2224             B_TRUE, &notready);
2225         if (ipif == NULL) {
2226                 if (notready)
2227                         return (ENETDOWN);
2228                 else
2229                         return (EADDRNOTAVAIL);
2230         }
2231         *srcp = ipif->ipif_v6lcl_addr;
2232         if (flagsp != NULL)
2233                 *flagsp = ipif->ipif_flags;
2234         ipif_refrele(ipif);
2235         return (0);
2236 }
2237 
2238 /*
2239  * Perform an attach and bind to get phys addr plus info_req for
2240  * the physical device.
2241  * q and mp represents an ioctl which will be queued waiting for
2242  * completion of the DLPI message exchange.
2243  * MUST be called on an ill queue.
2244  *
2245  * Returns EINPROGRESS when mp has been consumed by queueing it.
2246  * The ioctl will complete in ip_rput.
2247  */
2248 int
2249 ill_dl_phys(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q)
2250 {
2251         mblk_t  *v6token_mp = NULL;
2252         mblk_t  *v6lla_mp = NULL;
2253         mblk_t  *dest_mp = NULL;
2254         mblk_t  *phys_mp = NULL;
2255         mblk_t  *info_mp = NULL;
2256         mblk_t  *attach_mp = NULL;
2257         mblk_t  *bind_mp = NULL;
2258         mblk_t  *unbind_mp = NULL;
2259         mblk_t  *notify_mp = NULL;
2260         mblk_t  *capab_mp = NULL;
2261 
2262         ip1dbg(("ill_dl_phys(%s:%u)\n", ill->ill_name, ipif->ipif_id));
2263         ASSERT(ill->ill_dlpi_style_set);
2264         ASSERT(WR(q)->q_next != NULL);
2265 
2266         if (ill->ill_isv6) {
2267                 v6token_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) +
2268                     sizeof (t_scalar_t), DL_PHYS_ADDR_REQ);
2269                 if (v6token_mp == NULL)
2270                         goto bad;
2271                 ((dl_phys_addr_req_t *)v6token_mp->b_rptr)->dl_addr_type =
2272                     DL_IPV6_TOKEN;
2273 
2274                 v6lla_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) +
2275                     sizeof (t_scalar_t), DL_PHYS_ADDR_REQ);
2276                 if (v6lla_mp == NULL)
2277                         goto bad;
2278                 ((dl_phys_addr_req_t *)v6lla_mp->b_rptr)->dl_addr_type =
2279                     DL_IPV6_LINK_LAYER_ADDR;
2280         }
2281 
2282         if (ill->ill_mactype == DL_IPV4 || ill->ill_mactype == DL_IPV6) {
2283                 dest_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) +
2284                     sizeof (t_scalar_t), DL_PHYS_ADDR_REQ);
2285                 if (dest_mp == NULL)
2286                         goto bad;
2287                 ((dl_phys_addr_req_t *)dest_mp->b_rptr)->dl_addr_type =
2288                     DL_CURR_DEST_ADDR;
2289         }
2290 
2291         /*
2292          * Allocate a DL_NOTIFY_REQ and set the notifications we want.
2293          */
2294         notify_mp = ip_dlpi_alloc(sizeof (dl_notify_req_t) + sizeof (long),
2295             DL_NOTIFY_REQ);
2296         if (notify_mp == NULL)
2297                 goto bad;
2298         ((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications =
2299             (DL_NOTE_PHYS_ADDR | DL_NOTE_SDU_SIZE | DL_NOTE_FASTPATH_FLUSH |
2300             DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_CAPAB_RENEG |
2301             DL_NOTE_PROMISC_ON_PHYS | DL_NOTE_PROMISC_OFF_PHYS |
2302             DL_NOTE_REPLUMB | DL_NOTE_ALLOWED_IPS | DL_NOTE_SDU_SIZE2);
2303 
2304         phys_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) +
2305             sizeof (t_scalar_t), DL_PHYS_ADDR_REQ);
2306         if (phys_mp == NULL)
2307                 goto bad;
2308         ((dl_phys_addr_req_t *)phys_mp->b_rptr)->dl_addr_type =
2309             DL_CURR_PHYS_ADDR;
2310 
2311         info_mp = ip_dlpi_alloc(
2312             sizeof (dl_info_req_t) + sizeof (dl_info_ack_t),
2313             DL_INFO_REQ);
2314         if (info_mp == NULL)
2315                 goto bad;
2316 
2317         ASSERT(ill->ill_dlpi_capab_state == IDCS_UNKNOWN);
2318         capab_mp = ip_dlpi_alloc(sizeof (dl_capability_req_t),
2319             DL_CAPABILITY_REQ);
2320         if (capab_mp == NULL)
2321                 goto bad;
2322 
2323         bind_mp = ip_dlpi_alloc(sizeof (dl_bind_req_t) + sizeof (long),
2324             DL_BIND_REQ);
2325         if (bind_mp == NULL)
2326                 goto bad;
2327         ((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ill->ill_sap;
2328         ((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS;
2329 
2330         unbind_mp = ip_dlpi_alloc(sizeof (dl_unbind_req_t), DL_UNBIND_REQ);
2331         if (unbind_mp == NULL)
2332                 goto bad;
2333 
2334         /* If we need to attach, pre-alloc and initialize the mblk */
2335         if (ill->ill_needs_attach) {
2336                 attach_mp = ip_dlpi_alloc(sizeof (dl_attach_req_t),
2337                     DL_ATTACH_REQ);
2338                 if (attach_mp == NULL)
2339                         goto bad;
2340                 ((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = ill->ill_ppa;
2341         }
2342 
2343         /*
2344          * Here we are going to delay the ioctl ack until after
2345          * ACKs from DL_PHYS_ADDR_REQ. So need to save the
2346          * original ioctl message before sending the requests
2347          */
2348         mutex_enter(&ill->ill_lock);
2349         /* ipsq_pending_mp_add won't fail since we pass in a NULL connp */
2350         (void) ipsq_pending_mp_add(NULL, ipif, ill->ill_wq, mp, 0);
2351         /*
2352          * Set ill_phys_addr_pend to zero. It will be set to the addr_type of
2353          * the DL_PHYS_ADDR_REQ in ill_dlpi_send() and ill_dlpi_done(). It will
2354          * be used to track which DL_PHYS_ADDR_REQ is being ACK'd/NAK'd.
2355          */
2356         ill->ill_phys_addr_pend = 0;
2357         mutex_exit(&ill->ill_lock);
2358 
2359         if (attach_mp != NULL) {
2360                 ip1dbg(("ill_dl_phys: attach\n"));
2361                 ill_dlpi_send(ill, attach_mp);
2362         }
2363         ill_dlpi_send(ill, bind_mp);
2364         ill_dlpi_send(ill, info_mp);
2365 
2366         /*
2367          * Send the capability request to get the VRRP capability information.
2368          */
2369         ill_capability_send(ill, capab_mp);
2370 
2371         if (v6token_mp != NULL)
2372                 ill_dlpi_send(ill, v6token_mp);
2373         if (v6lla_mp != NULL)
2374                 ill_dlpi_send(ill, v6lla_mp);
2375         if (dest_mp != NULL)
2376                 ill_dlpi_send(ill, dest_mp);
2377         ill_dlpi_send(ill, phys_mp);
2378         ill_dlpi_send(ill, notify_mp);
2379         ill_dlpi_send(ill, unbind_mp);
2380 
2381         /*
2382          * This operation will complete in ip_rput_dlpi_writer with either
2383          * a DL_PHYS_ADDR_ACK or DL_ERROR_ACK.
2384          */
2385         return (EINPROGRESS);
2386 bad:
2387         freemsg(v6token_mp);
2388         freemsg(v6lla_mp);
2389         freemsg(dest_mp);
2390         freemsg(phys_mp);
2391         freemsg(info_mp);
2392         freemsg(attach_mp);
2393         freemsg(bind_mp);
2394         freemsg(capab_mp);
2395         freemsg(unbind_mp);
2396         freemsg(notify_mp);
2397         return (ENOMEM);
2398 }
2399 
2400 /* Add room for tcp+ip headers */
2401 uint_t ip_loopback_mtu_v6plus = IP_LOOPBACK_MTU + IPV6_HDR_LEN + 20;
2402 
2403 /*
2404  * DLPI is up.
2405  * Create all the IREs associated with an interface bring up multicast.
2406  * Set the interface flag and finish other initialization
2407  * that potentially had to be differed to after DL_BIND_ACK.
2408  */
2409 int
2410 ipif_up_done_v6(ipif_t *ipif)
2411 {
2412         ill_t   *ill = ipif->ipif_ill;
2413         int     err;
2414         boolean_t loopback = B_FALSE;
2415 
2416         ip1dbg(("ipif_up_done_v6(%s:%u)\n",
2417             ipif->ipif_ill->ill_name, ipif->ipif_id));
2418         DTRACE_PROBE3(ipif__downup, char *, "ipif_up_done_v6",
2419             ill_t *, ill, ipif_t *, ipif);
2420 
2421         /* Check if this is a loopback interface */
2422         if (ipif->ipif_ill->ill_wq == NULL)
2423                 loopback = B_TRUE;
2424 
2425         ASSERT(ipif->ipif_isv6);
2426         ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
2427 
2428         if (IS_LOOPBACK(ill) || ill->ill_net_type == IRE_IF_NORESOLVER) {
2429                 nce_t *loop_nce = NULL;
2430                 uint16_t flags = (NCE_F_MYADDR | NCE_F_NONUD | NCE_F_AUTHORITY);
2431 
2432                 /*
2433                  * lo0:1 and subsequent ipifs were marked IRE_LOCAL in
2434                  * ipif_lookup_on_name(), but in the case of zones we can have
2435                  * several loopback addresses on lo0. So all the interfaces with
2436                  * loopback addresses need to be marked IRE_LOOPBACK.
2437                  */
2438                 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, &ipv6_loopback))
2439                         ipif->ipif_ire_type = IRE_LOOPBACK;
2440                 else
2441                         ipif->ipif_ire_type = IRE_LOCAL;
2442                 if (ill->ill_net_type != IRE_LOOPBACK)
2443                         flags |= NCE_F_PUBLISH;
2444                 err = nce_lookup_then_add_v6(ill, NULL,
2445                     ill->ill_phys_addr_length,
2446                     &ipif->ipif_v6lcl_addr, flags, ND_REACHABLE, &loop_nce);
2447 
2448                 /* A shared-IP zone sees EEXIST for lo0:N */
2449                 if (err == 0 || err == EEXIST) {
2450                         ipif->ipif_added_nce = 1;
2451                         loop_nce->nce_ipif_cnt++;
2452                         nce_refrele(loop_nce);
2453                         err = 0;
2454                 } else {
2455                         ASSERT(loop_nce == NULL);
2456                         return (err);
2457                 }
2458         }
2459 
2460         err = ipif_add_ires_v6(ipif, loopback);
2461         if (err != 0) {
2462                 /*
2463                  * See comments about return value from
2464                  * ipif_addr_availability_check() in ipif_add_ires_v6().
2465                  */
2466                 if (err != EADDRINUSE) {
2467                         ipif_ndp_down(ipif);
2468                 } else {
2469                         /*
2470                          * Make IPMP aware of the deleted ipif so that
2471                          * the needed ipmp cleanup (e.g., of ipif_bound_ill)
2472                          * can be completed. Note that we do not want to
2473                          * destroy the nce that was created on the ipmp_ill
2474                          * for the active copy of the duplicate address in
2475                          * use.
2476                          */
2477                         if (IS_IPMP(ill))
2478                                 ipmp_illgrp_del_ipif(ill->ill_grp, ipif);
2479                         err = EADDRNOTAVAIL;
2480                 }
2481                 return (err);
2482         }
2483 
2484         if (ill->ill_ipif_up_count == 1 && !loopback) {
2485                 /* Recover any additional IREs entries for this ill */
2486                 (void) ill_recover_saved_ire(ill);
2487         }
2488 
2489         if (ill->ill_need_recover_multicast) {
2490                 /*
2491                  * Need to recover all multicast memberships in the driver.
2492                  * This had to be deferred until we had attached.
2493                  */
2494                 ill_recover_multicast(ill);
2495         }
2496 
2497         if (ill->ill_ipif_up_count == 1) {
2498                 /*
2499                  * Since the interface is now up, it may now be active.
2500                  */
2501                 if (IS_UNDER_IPMP(ill))
2502                         ipmp_ill_refresh_active(ill);
2503         }
2504 
2505         /* Join the allhosts multicast address and the solicited node MC */
2506         ipif_multicast_up(ipif);
2507 
2508         /* Perhaps ilgs should use this ill */
2509         update_conn_ill(NULL, ill->ill_ipst);
2510 
2511         if (ipif->ipif_addr_ready)
2512                 ipif_up_notify(ipif);
2513 
2514         return (0);
2515 }
2516 
2517 /*
2518  * Add the IREs associated with the ipif.
2519  * Those MUST be explicitly removed in ipif_delete_ires_v6.
2520  */
2521 static int
2522 ipif_add_ires_v6(ipif_t *ipif, boolean_t loopback)
2523 {
2524         ill_t           *ill = ipif->ipif_ill;
2525         ip_stack_t      *ipst = ill->ill_ipst;
2526         in6_addr_t      v6addr;
2527         in6_addr_t      route_mask;
2528         int             err;
2529         char            buf[INET6_ADDRSTRLEN];
2530         ire_t           *ire_local = NULL;      /* LOCAL or LOOPBACK */
2531         ire_t           *ire_if = NULL;
2532         in6_addr_t      *gw;
2533 
2534         if (!IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr) &&
2535             !(ipif->ipif_flags & IPIF_NOLOCAL)) {
2536 
2537                 /*
2538                  * If we're on a labeled system then make sure that zone-
2539                  * private addresses have proper remote host database entries.
2540                  */
2541                 if (is_system_labeled() &&
2542                     ipif->ipif_ire_type != IRE_LOOPBACK) {
2543                         if (ip6opt_ls == 0) {
2544                                 cmn_err(CE_WARN, "IPv6 not enabled "
2545                                     "via /etc/system");
2546                                 return (EINVAL);
2547                         }
2548                         if (!tsol_check_interface_address(ipif))
2549                                 return (EINVAL);
2550                 }
2551 
2552                 if (loopback)
2553                         gw = &ipif->ipif_v6lcl_addr;
2554                 else
2555                         gw = NULL;
2556 
2557                 /* Register the source address for __sin6_src_id */
2558                 err = ip_srcid_insert(&ipif->ipif_v6lcl_addr,
2559                     ipif->ipif_zoneid, ipst);
2560                 if (err != 0) {
2561                         ip0dbg(("ipif_add_ires_v6: srcid_insert %d\n", err));
2562                         return (err);
2563                 }
2564                 /*
2565                  * If the interface address is set, create the LOCAL
2566                  * or LOOPBACK IRE.
2567                  */
2568                 ip1dbg(("ipif_add_ires_v6: creating IRE %d for %s\n",
2569                     ipif->ipif_ire_type,
2570                     inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr,
2571                     buf, sizeof (buf))));
2572 
2573                 ire_local = ire_create_v6(
2574                     &ipif->ipif_v6lcl_addr,              /* dest address */
2575                     &ipv6_all_ones,                 /* mask */
2576                     gw,                                 /* gateway */
2577                     ipif->ipif_ire_type,             /* LOCAL or LOOPBACK */
2578                     ipif->ipif_ill,                  /* interface */
2579                     ipif->ipif_zoneid,
2580                     ((ipif->ipif_flags & IPIF_PRIVATE) ?
2581                     RTF_PRIVATE : 0) | RTF_KERNEL,
2582                     NULL,
2583                     ipst);
2584                 if (ire_local == NULL) {
2585                         ip1dbg(("ipif_up_done_v6: NULL ire_local\n"));
2586                         err = ENOMEM;
2587                         goto bad;
2588                 }
2589         }
2590 
2591         /* Set up the IRE_IF_RESOLVER or IRE_IF_NORESOLVER, as appropriate. */
2592         if (!loopback && !(ipif->ipif_flags & IPIF_NOXMIT) &&
2593             !(IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6subnet) &&
2594             IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6net_mask))) {
2595                 /* ipif_v6subnet is ipif_v6pp_dst_addr for pt-pt */
2596                 v6addr = ipif->ipif_v6subnet;
2597 
2598                 if (ipif->ipif_flags & IPIF_POINTOPOINT) {
2599                         route_mask = ipv6_all_ones;
2600                 } else {
2601                         route_mask = ipif->ipif_v6net_mask;
2602                 }
2603 
2604                 ip1dbg(("ipif_add_ires_v6: creating if IRE %d for %s\n",
2605                     ill->ill_net_type,
2606                     inet_ntop(AF_INET6, &v6addr, buf, sizeof (buf))));
2607 
2608                 ire_if = ire_create_v6(
2609                     &v6addr,                        /* dest pref */
2610                     &route_mask,            /* mask */
2611                     &ipif->ipif_v6lcl_addr,      /* gateway */
2612                     ill->ill_net_type,               /* IF_[NO]RESOLVER */
2613                     ipif->ipif_ill,
2614                     ipif->ipif_zoneid,
2615                     ((ipif->ipif_flags & IPIF_PRIVATE) ?
2616                     RTF_PRIVATE : 0) | RTF_KERNEL,
2617                     NULL,
2618                     ipst);
2619                 if (ire_if == NULL) {
2620                         ip1dbg(("ipif_up_done: NULL ire_if\n"));
2621                         err = ENOMEM;
2622                         goto bad;
2623                 }
2624         }
2625 
2626         /*
2627          * Need to atomically check for IP address availability under
2628          * ip_addr_avail_lock.  ill_g_lock is held as reader to ensure no new
2629          * ills or new ipifs can be added while we are checking availability.
2630          */
2631         rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2632         mutex_enter(&ipst->ips_ip_addr_avail_lock);
2633         ill->ill_ipif_up_count++;
2634         ipif->ipif_flags |= IPIF_UP;
2635         err = ip_addr_availability_check(ipif);
2636         mutex_exit(&ipst->ips_ip_addr_avail_lock);
2637         rw_exit(&ipst->ips_ill_g_lock);
2638 
2639         if (err != 0) {
2640                 /*
2641                  * Our address may already be up on the same ill. In this case,
2642                  * the external resolver entry for our ipif replaced the one for
2643                  * the other ipif. So we don't want to delete it (otherwise the
2644                  * other ipif would be unable to send packets).
2645                  * ip_addr_availability_check() identifies this case for us and
2646                  * returns EADDRINUSE; Caller must  turn it into EADDRNOTAVAIL
2647                  * which is the expected error code.
2648                  *
2649                  * Note that ipif_ndp_down() will only delete the nce in the
2650                  * case when the nce_ipif_cnt drops to 0.
2651                  */
2652                 ill->ill_ipif_up_count--;
2653                 ipif->ipif_flags &= ~IPIF_UP;
2654                 goto bad;
2655         }
2656 
2657         /*
2658          * Add in all newly created IREs.
2659          * We add the IRE_INTERFACE before the IRE_LOCAL to ensure
2660          * that lookups find the IRE_LOCAL even if the IRE_INTERFACE is
2661          * a /128 route.
2662          */
2663         if (ire_if != NULL) {
2664                 ire_if = ire_add(ire_if);
2665                 if (ire_if == NULL) {
2666                         err = ENOMEM;
2667                         goto bad2;
2668                 }
2669 #ifdef DEBUG
2670                 ire_refhold_notr(ire_if);
2671                 ire_refrele(ire_if);
2672 #endif
2673         }
2674         if (ire_local != NULL) {
2675                 ire_local = ire_add(ire_local);
2676                 if (ire_local == NULL) {
2677                         err = ENOMEM;
2678                         goto bad2;
2679                 }
2680 #ifdef DEBUG
2681                 ire_refhold_notr(ire_local);
2682                 ire_refrele(ire_local);
2683 #endif
2684         }
2685         rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
2686         if (ire_local != NULL)
2687                 ipif->ipif_ire_local = ire_local;
2688         if (ire_if != NULL)
2689                 ipif->ipif_ire_if = ire_if;
2690         rw_exit(&ipst->ips_ill_g_lock);
2691         ire_local = NULL;
2692         ire_if = NULL;
2693 
2694         if (ipif->ipif_addr_ready)
2695                 ipif_up_notify(ipif);
2696         return (0);
2697 
2698 bad2:
2699         ill->ill_ipif_up_count--;
2700         ipif->ipif_flags &= ~IPIF_UP;
2701 
2702 bad:
2703         if (ire_local != NULL)
2704                 ire_delete(ire_local);
2705         if (ire_if != NULL)
2706                 ire_delete(ire_if);
2707 
2708         rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
2709         ire_local = ipif->ipif_ire_local;
2710         ipif->ipif_ire_local = NULL;
2711         ire_if = ipif->ipif_ire_if;
2712         ipif->ipif_ire_if = NULL;
2713         rw_exit(&ipst->ips_ill_g_lock);
2714         if (ire_local != NULL) {
2715                 ire_delete(ire_local);
2716                 ire_refrele_notr(ire_local);
2717         }
2718         if (ire_if != NULL) {
2719                 ire_delete(ire_if);
2720                 ire_refrele_notr(ire_if);
2721         }
2722         (void) ip_srcid_remove(&ipif->ipif_v6lcl_addr, ipif->ipif_zoneid, ipst);
2723 
2724         return (err);
2725 }
2726 
2727 /* Remove all the IREs created by ipif_add_ires_v6 */
2728 void
2729 ipif_delete_ires_v6(ipif_t *ipif)
2730 {
2731         ill_t           *ill = ipif->ipif_ill;
2732         ip_stack_t      *ipst = ill->ill_ipst;
2733         ire_t           *ire;
2734 
2735         rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
2736         ire = ipif->ipif_ire_local;
2737         ipif->ipif_ire_local = NULL;
2738         rw_exit(&ipst->ips_ill_g_lock);
2739         if (ire != NULL) {
2740                 /*
2741                  * Move count to ipif so we don't loose the count due to
2742                  * a down/up dance.
2743                  */
2744                 atomic_add_32(&ipif->ipif_ib_pkt_count, ire->ire_ib_pkt_count);
2745 
2746                 ire_delete(ire);
2747                 ire_refrele_notr(ire);
2748         }
2749         rw_enter(&ipst->ips_ill_g_lock, RW_WRITER);
2750         ire = ipif->ipif_ire_if;
2751         ipif->ipif_ire_if = NULL;
2752         rw_exit(&ipst->ips_ill_g_lock);
2753         if (ire != NULL) {
2754                 ire_delete(ire);
2755                 ire_refrele_notr(ire);
2756         }
2757 }
2758 
2759 /*
2760  * Delete an ND entry if it exists.
2761  */
2762 /* ARGSUSED */
2763 int
2764 ip_siocdelndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2765     ip_ioctl_cmd_t *ipip, void *dummy_ifreq)
2766 {
2767         sin6_t          *sin6;
2768         struct lifreq   *lifr;
2769         lif_nd_req_t    *lnr;
2770         ill_t           *ill = ipif->ipif_ill;
2771         nce_t           *nce;
2772 
2773         lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr;
2774         lnr = &lifr->lifr_nd;
2775         /* Only allow for logical unit zero i.e. not on "le0:17" */
2776         if (ipif->ipif_id != 0)
2777                 return (EINVAL);
2778 
2779         if (!ipif->ipif_isv6)
2780                 return (EINVAL);
2781 
2782         if (lnr->lnr_addr.ss_family != AF_INET6)
2783                 return (EAFNOSUPPORT);
2784 
2785         sin6 = (sin6_t *)&lnr->lnr_addr;
2786 
2787         /*
2788          * Since ND mappings must be consistent across an IPMP group, prohibit
2789          * deleting ND mappings on underlying interfaces.
2790          * Don't allow deletion of mappings for local addresses.
2791          */
2792         if (IS_UNDER_IPMP(ill))
2793                 return (EPERM);
2794 
2795         nce = nce_lookup_v6(ill, &sin6->sin6_addr);
2796         if (nce == NULL)
2797                 return (ESRCH);
2798 
2799         if (NCE_MYADDR(nce->nce_common)) {
2800                 nce_refrele(nce);
2801                 return (EPERM);
2802         }
2803 
2804         /*
2805          * delete the nce_common which will also delete the nces on any
2806          * under_ill in the case of ipmp.
2807          */
2808         ncec_delete(nce->nce_common);
2809         nce_refrele(nce);
2810         return (0);
2811 }
2812 
2813 /*
2814  * Return nbr cache info.
2815  */
2816 /* ARGSUSED */
2817 int
2818 ip_siocqueryndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2819     ip_ioctl_cmd_t *ipip, void *dummy_ifreq)
2820 {
2821         ill_t           *ill = ipif->ipif_ill;
2822         struct lifreq   *lifr;
2823         lif_nd_req_t    *lnr;
2824 
2825         lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr;
2826         lnr = &lifr->lifr_nd;
2827         /* Only allow for logical unit zero i.e. not on "le0:17" */
2828         if (ipif->ipif_id != 0)
2829                 return (EINVAL);
2830 
2831         if (!ipif->ipif_isv6)
2832                 return (EINVAL);
2833 
2834         if (lnr->lnr_addr.ss_family != AF_INET6)
2835                 return (EAFNOSUPPORT);
2836 
2837         if (ill->ill_phys_addr_length > sizeof (lnr->lnr_hdw_addr))
2838                 return (EINVAL);
2839 
2840         return (ndp_query(ill, lnr));
2841 }
2842 
2843 /*
2844  * Perform an update of the nd entry for the specified address.
2845  */
2846 /* ARGSUSED */
2847 int
2848 ip_siocsetndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2849     ip_ioctl_cmd_t *ipip, void *dummy_ifreq)
2850 {
2851         sin6_t          *sin6;
2852         ill_t           *ill = ipif->ipif_ill;
2853         struct  lifreq  *lifr;
2854         lif_nd_req_t    *lnr;
2855         ire_t           *ire;
2856 
2857         lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr;
2858         lnr = &lifr->lifr_nd;
2859         /* Only allow for logical unit zero i.e. not on "le0:17" */
2860         if (ipif->ipif_id != 0)
2861                 return (EINVAL);
2862 
2863         if (!ipif->ipif_isv6)
2864                 return (EINVAL);
2865 
2866         if (lnr->lnr_addr.ss_family != AF_INET6)
2867                 return (EAFNOSUPPORT);
2868 
2869         sin6 = (sin6_t *)&lnr->lnr_addr;
2870 
2871         /*
2872          * Since ND mappings must be consistent across an IPMP group, prohibit
2873          * updating ND mappings on underlying interfaces.  Also, since ND
2874          * mappings for IPMP data addresses are owned by IP itself, prohibit
2875          * updating them.
2876          */
2877         if (IS_UNDER_IPMP(ill))
2878                 return (EPERM);
2879 
2880         if (IS_IPMP(ill)) {
2881                 ire = ire_ftable_lookup_v6(&sin6->sin6_addr, NULL, NULL,
2882                     IRE_LOCAL, ill, ALL_ZONES, NULL,
2883                     MATCH_IRE_TYPE | MATCH_IRE_ILL, 0, ill->ill_ipst, NULL);
2884                 if (ire != NULL) {
2885                         ire_refrele(ire);
2886                         return (EPERM);
2887                 }
2888         }
2889 
2890         return (ndp_sioc_update(ill, lnr));
2891 }